Skip to content

Commit 597d817

Browse files
authored
deps: move bigtable and pubsub to extras (#1696)
* deps: move bigtable and pubsub to extras * 2025 * fix mypy
1 parent 24b37ae commit 597d817

File tree

3 files changed

+115
-85
lines changed

3 files changed

+115
-85
lines changed

setup.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -39,8 +39,6 @@
3939
"gcsfs >=2023.3.0",
4040
"geopandas >=0.12.2",
4141
"google-auth >=2.15.0,<3.0",
42-
"google-cloud-bigtable >=2.24.0",
43-
"google-cloud-pubsub >=2.21.4",
4442
"google-cloud-bigquery[bqstorage,pandas] >=3.31.0",
4543
# 2.30 needed for arrow support.
4644
"google-cloud-bigquery-storage >= 2.30.0, < 3.0.0",
@@ -72,7 +70,12 @@
7270
]
7371
extras = {
7472
# Optional test dependencies packages. If they're missed, may skip some tests.
75-
"tests": ["freezegun", "pytest-snapshot"],
73+
"tests": [
74+
"freezegun",
75+
"pytest-snapshot",
76+
"google-cloud-bigtable >=2.24.0",
77+
"google-cloud-pubsub >=2.21.4",
78+
],
7679
# used for local engine, which is only needed for unit tests at present.
7780
"polars": ["polars >= 1.7.0"],
7881
"scikit-learn": ["scikit-learn>=1.2.2"],
Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,104 @@
1+
# Copyright 2025 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
import time
16+
from typing import Generator
17+
import uuid
18+
19+
import pytest
20+
21+
import bigframes
22+
23+
pytest.importorskip("google.cloud.bigtable")
24+
25+
from google.cloud import bigtable # noqa
26+
from google.cloud.bigtable import column_family, instance, table # noqa
27+
28+
29+
@pytest.fixture(scope="session")
30+
def bigtable_instance(session_load: bigframes.Session) -> instance.Instance:
31+
client = bigtable.Client(project=session_load._project, admin=True)
32+
33+
instance_name = "streaming-testing-instance"
34+
bt_instance = instance.Instance(
35+
instance_name,
36+
client,
37+
)
38+
39+
if not bt_instance.exists():
40+
cluster_id = "streaming-testing-instance-c1"
41+
cluster = bt_instance.cluster(
42+
cluster_id,
43+
location_id="us-west1-a",
44+
serve_nodes=1,
45+
)
46+
operation = bt_instance.create(
47+
clusters=[cluster],
48+
)
49+
operation.result(timeout=480)
50+
return bt_instance
51+
52+
53+
@pytest.fixture(scope="function")
54+
def bigtable_table(
55+
bigtable_instance: instance.Instance,
56+
) -> Generator[table.Table, None, None]:
57+
table_id = "bigframes_test_" + uuid.uuid4().hex
58+
bt_table = table.Table(
59+
table_id,
60+
bigtable_instance,
61+
)
62+
max_versions_rule = column_family.MaxVersionsGCRule(1)
63+
column_family_id = "body_mass_g"
64+
column_families = {column_family_id: max_versions_rule}
65+
bt_table.create(column_families=column_families)
66+
yield bt_table
67+
bt_table.delete()
68+
69+
70+
@pytest.mark.flaky(retries=3, delay=10)
71+
def test_streaming_df_to_bigtable(
72+
session_load: bigframes.Session, bigtable_table: table.Table
73+
):
74+
# launch a continuous query
75+
job_id_prefix = "test_streaming_"
76+
sdf = session_load.read_gbq_table_streaming("birds.penguins_bigtable_streaming")
77+
78+
sdf = sdf[["species", "island", "body_mass_g"]]
79+
sdf = sdf[sdf["body_mass_g"] < 4000]
80+
sdf = sdf.rename(columns={"island": "rowkey"})
81+
82+
try:
83+
query_job = sdf.to_bigtable(
84+
instance="streaming-testing-instance",
85+
table=bigtable_table.table_id,
86+
service_account_email="streaming-testing-admin@bigframes-load-testing.iam.gserviceaccount.com",
87+
app_profile=None,
88+
truncate=True,
89+
overwrite=True,
90+
auto_create_column_families=True,
91+
bigtable_options={},
92+
job_id=None,
93+
job_id_prefix=job_id_prefix,
94+
)
95+
96+
# wait 100 seconds in order to ensure the query doesn't stop
97+
# (i.e. it is continuous)
98+
time.sleep(100)
99+
assert query_job.running()
100+
assert query_job.error_result is None
101+
assert str(query_job.job_id).startswith(job_id_prefix)
102+
assert len(list(bigtable_table.read_rows())) > 0
103+
finally:
104+
query_job.cancel()

tests/system/large/test_streaming.py renamed to tests/system/large/streaming/test_pubsub.py

Lines changed: 5 additions & 82 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# Copyright 2024 Google LLC
1+
# Copyright 2025 Google LLC
22
#
33
# Licensed under the Apache License, Version 2.0 (the "License");
44
# you may not use this file except in compliance with the License.
@@ -13,62 +13,22 @@
1313
# limitations under the License.
1414

1515
from concurrent import futures
16-
import time
1716
from typing import Generator
1817
import uuid
1918

20-
from google.cloud import bigtable, pubsub # type: ignore
21-
from google.cloud.bigtable import column_family, instance, table
2219
import pytest
2320

2421
import bigframes
2522

23+
pytest.importorskip("google.cloud.pubsub")
24+
from google.cloud import pubsub # type: ignore # noqa
25+
2626

2727
def resource_name_full(project_id: str, resource_type: str, resource_id: str):
28+
"""Used for bigtable or pubsub resources."""
2829
return f"projects/{project_id}/{resource_type}/{resource_id}"
2930

3031

31-
@pytest.fixture(scope="session")
32-
def bigtable_instance(session_load: bigframes.Session) -> instance.Instance:
33-
client = bigtable.Client(project=session_load._project, admin=True)
34-
35-
instance_name = "streaming-testing-instance"
36-
bt_instance = instance.Instance(
37-
instance_name,
38-
client,
39-
)
40-
41-
if not bt_instance.exists():
42-
cluster_id = "streaming-testing-instance-c1"
43-
cluster = bt_instance.cluster(
44-
cluster_id,
45-
location_id="us-west1-a",
46-
serve_nodes=1,
47-
)
48-
operation = bt_instance.create(
49-
clusters=[cluster],
50-
)
51-
operation.result(timeout=480)
52-
return bt_instance
53-
54-
55-
@pytest.fixture(scope="function")
56-
def bigtable_table(
57-
bigtable_instance: instance.Instance,
58-
) -> Generator[table.Table, None, None]:
59-
table_id = "bigframes_test_" + uuid.uuid4().hex
60-
bt_table = table.Table(
61-
table_id,
62-
bigtable_instance,
63-
)
64-
max_versions_rule = column_family.MaxVersionsGCRule(1)
65-
column_family_id = "body_mass_g"
66-
column_families = {column_family_id: max_versions_rule}
67-
bt_table.create(column_families=column_families)
68-
yield bt_table
69-
bt_table.delete()
70-
71-
7232
@pytest.fixture(scope="function")
7333
def pubsub_topic_id(session_load: bigframes.Session) -> Generator[str, None, None]:
7434
publisher = pubsub.PublisherClient()
@@ -98,43 +58,6 @@ def pubsub_topic_subscription_ids(
9858
subscriber.delete_subscription(subscription=subscription_name)
9959

10060

101-
@pytest.mark.flaky(retries=3, delay=10)
102-
def test_streaming_df_to_bigtable(
103-
session_load: bigframes.Session, bigtable_table: table.Table
104-
):
105-
# launch a continuous query
106-
job_id_prefix = "test_streaming_"
107-
sdf = session_load.read_gbq_table_streaming("birds.penguins_bigtable_streaming")
108-
109-
sdf = sdf[["species", "island", "body_mass_g"]]
110-
sdf = sdf[sdf["body_mass_g"] < 4000]
111-
sdf = sdf.rename(columns={"island": "rowkey"})
112-
113-
try:
114-
query_job = sdf.to_bigtable(
115-
instance="streaming-testing-instance",
116-
table=bigtable_table.table_id,
117-
service_account_email="streaming-testing-admin@bigframes-load-testing.iam.gserviceaccount.com",
118-
app_profile=None,
119-
truncate=True,
120-
overwrite=True,
121-
auto_create_column_families=True,
122-
bigtable_options={},
123-
job_id=None,
124-
job_id_prefix=job_id_prefix,
125-
)
126-
127-
# wait 100 seconds in order to ensure the query doesn't stop
128-
# (i.e. it is continuous)
129-
time.sleep(100)
130-
assert query_job.running()
131-
assert query_job.error_result is None
132-
assert str(query_job.job_id).startswith(job_id_prefix)
133-
assert len(list(bigtable_table.read_rows())) > 0
134-
finally:
135-
query_job.cancel()
136-
137-
13861
@pytest.mark.flaky(retries=3, delay=10)
13962
def test_streaming_df_to_pubsub(
14063
session_load: bigframes.Session, pubsub_topic_subscription_ids: tuple[str, str]

0 commit comments

Comments
 (0)