Skip to content

feat: allow functions returned from bpd.read_gbq_function to execute outside of apply #706

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 20 commits into from
Jun 4, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
e24944c
feat: allow functions decorated with `@bpd.remote_function` to execut…
tswast May 17, 2024
0475c39
fix read_gbq_function
tswast May 17, 2024
47b49a9
feat: allow functions returned from `bpd.read_gbq_function` to execut…
tswast May 17, 2024
9a3170c
Merge remote-tracking branch 'origin/main' into tswast-remote-functio…
tswast May 20, 2024
cc11de5
fix for rare case where re-deploy exact same function object
tswast May 20, 2024
05724b3
Merge branch 'main' into tswast-remote-function-local-testing
tswast May 22, 2024
395b9ee
feat: support type annotations to supply input and output types to `@…
tswast May 22, 2024
41f10a0
Merge branch 'main' into b336023152-remote_function-type-annotations
tswast May 22, 2024
c31d2d4
Merge branch 'main' into tswast-remote-function-local-testing
tswast May 24, 2024
1869aba
Merge remote-tracking branch 'origin/main' into b336023152-remote_fun…
tswast May 24, 2024
1cf934e
make tests robust to cloud function listing failures too
tswast May 24, 2024
59da6f4
Merge remote-tracking branch 'origin/b336023152-remote_function-type-…
tswast May 24, 2024
740c7a0
Merge remote-tracking branch 'origin/main' into b336023152-remote_fun…
tswast May 24, 2024
682afed
Merge branch 'b336023152-remote_function-type-annotations' into tswas…
tswast May 24, 2024
2292fb8
Merge remote-tracking branch 'origin/main' into tswast-remote-functio…
tswast May 25, 2024
310dfbc
Merge branch 'tswast-remote-function-local-testing' into tswast-read_…
tswast May 25, 2024
98e5f2f
Merge remote-tracking branch 'origin/main' into tswast-read_gbq_funct…
tswast May 30, 2024
b3d8223
remove unused bigquery_client argument
tswast May 30, 2024
b1ae0b9
add test that function can be called directly
tswast May 30, 2024
1366688
Merge branch 'main' into tswast-read_gbq_function-local-testing
tswast Jun 4, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 8 additions & 13 deletions bigframes/functions/remote_function.py
Original file line number Diff line number Diff line change
Expand Up @@ -1146,21 +1146,14 @@ def try_delattr(attr):

def read_gbq_function(
function_name: str,
session: Optional[Session] = None,
bigquery_client: Optional[bigquery.Client] = None,
*,
session: Session,
):
"""
Read an existing BigQuery function and prepare it for use in future queries.
"""

# A BigQuery client is required to perform BQ operations
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Outside of system tests, there weren't any code paths that passed in bigquery_client but not session, so removing this unnecessary logic.

if not bigquery_client and session:
bigquery_client = session.bqclient
if not bigquery_client:
raise ValueError(
"A bigquery client must be provided, either directly or via session. "
f"{constants.FEEDBACK_LINK}"
)
bigquery_client = session.bqclient
ibis_client = session.ibis_client

try:
routine_ref = get_routine_reference(function_name, bigquery_client, session)
Expand Down Expand Up @@ -1192,8 +1185,10 @@ def read_gbq_function(
# non-standard names for the arguments here.
def func(*ignored_args, **ignored_kwargs):
f"""Remote function {str(routine_ref)}."""
# TODO(swast): Construct an ibis client from bigquery_client and
# execute node via a query.
nonlocal node # type: ignore

expr = node(*ignored_args, **ignored_kwargs) # type: ignore
return ibis_client.execute(expr)

# TODO: Move ibis logic to compiler step
func.__name__ = routine_ref.routine_id
Expand Down
35 changes: 25 additions & 10 deletions tests/system/small/test_remote_function.py
Original file line number Diff line number Diff line change
Expand Up @@ -537,19 +537,20 @@ def add_one(x):


@pytest.mark.flaky(retries=2, delay=120)
def test_read_gbq_function_detects_invalid_function(bigquery_client, dataset_id):
def test_read_gbq_function_detects_invalid_function(session, dataset_id):
dataset_ref = bigquery.DatasetReference.from_string(dataset_id)
with pytest.raises(ValueError) as e:
rf.read_gbq_function(
str(dataset_ref.routine("not_a_function")),
bigquery_client=bigquery_client,
session=session,
)

assert "Unknown function" in str(e.value)


@pytest.mark.flaky(retries=2, delay=120)
def test_read_gbq_function_like_original(
session,
bigquery_client,
bigqueryconnection_client,
cloudfunctions_client,
Expand Down Expand Up @@ -577,7 +578,7 @@ def square1(x):

square2 = rf.read_gbq_function(
function_name=square1.bigframes_remote_function,
bigquery_client=bigquery_client,
session=session,
)

# The newly-created function (square1) should have a remote function AND a
Expand Down Expand Up @@ -607,7 +608,14 @@ def square1(x):


@pytest.mark.flaky(retries=2, delay=120)
def test_read_gbq_function_reads_udfs(bigquery_client, dataset_id):
def test_read_gbq_function_runs_existing_udf(session, bigquery_client, dataset_id):
func = session.read_gbq_function("bqutil.fn.cw_lower_case_ascii_only")
got = func("AURÉLIE")
assert got == "aurÉlie"


@pytest.mark.flaky(retries=2, delay=120)
def test_read_gbq_function_reads_udfs(session, bigquery_client, dataset_id):
dataset_ref = bigquery.DatasetReference.from_string(dataset_id)
arg = bigquery.RoutineArgument(
name="x",
Expand All @@ -633,7 +641,8 @@ def test_read_gbq_function_reads_udfs(bigquery_client, dataset_id):
# Create the routine in BigQuery and read it back using read_gbq_function.
bigquery_client.create_routine(routine, exists_ok=True)
square = rf.read_gbq_function(
str(routine.reference), bigquery_client=bigquery_client
str(routine.reference),
session=session,
)

# It should point to the named routine and yield the expected results.
Expand All @@ -658,7 +667,9 @@ def test_read_gbq_function_reads_udfs(bigquery_client, dataset_id):


@pytest.mark.flaky(retries=2, delay=120)
def test_read_gbq_function_enforces_explicit_types(bigquery_client, dataset_id):
def test_read_gbq_function_enforces_explicit_types(
session, bigquery_client, dataset_id
):
dataset_ref = bigquery.DatasetReference.from_string(dataset_id)
typed_arg = bigquery.RoutineArgument(
name="x",
Expand Down Expand Up @@ -702,18 +713,22 @@ def test_read_gbq_function_enforces_explicit_types(bigquery_client, dataset_id):
bigquery_client.create_routine(neither_type_specified, exists_ok=True)

rf.read_gbq_function(
str(both_types_specified.reference), bigquery_client=bigquery_client
str(both_types_specified.reference),
session=session,
)
rf.read_gbq_function(
str(only_return_type_specified.reference), bigquery_client=bigquery_client
str(only_return_type_specified.reference),
session=session,
)
with pytest.raises(ValueError):
rf.read_gbq_function(
str(only_arg_type_specified.reference), bigquery_client=bigquery_client
str(only_arg_type_specified.reference),
session=session,
)
with pytest.raises(ValueError):
rf.read_gbq_function(
str(neither_type_specified.reference), bigquery_client=bigquery_client
str(neither_type_specified.reference),
session=session,
)


Expand Down