Skip to content

feat: merge only generates a default index if both inputs already have an index #733

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
May 31, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions bigframes/_config/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,10 +61,12 @@ def _init_bigquery_thread_local(self):
@property
def bigquery(self) -> bigquery_options.BigQueryOptions:
"""Options to use with the BigQuery engine."""
if self._local.bigquery_options is not None:
if (
bigquery_options := getattr(self._local, "bigquery_options", None)
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not 100% sure how this was failing, but for some reason self._local.bigquery_options wasn't being initialized when I ran from a notebook in VS Code.

I think it's because the original Options.__init__ from the import ran in a different thread from the one where the actual cell code was running.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed by #741

) is not None:
# The only way we can get here is if someone called
# _init_bigquery_thread_local.
return self._local.bigquery_options
return bigquery_options

return self._bigquery_options

Expand Down
22 changes: 17 additions & 5 deletions bigframes/core/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ def __init__(
if len(index_columns) == 0:
warnings.warn(
"Creating object with Null Index. Null Index is a preview feature.",
category=bigframes.exceptions.PreviewWarning,
category=bigframes.exceptions.NullIndexPreviewWarning,
)
self._index_columns = tuple(index_columns)
# Index labels don't need complicated hierarchical access so can store as tuple
Expand Down Expand Up @@ -1930,10 +1930,22 @@ def merge(
coalesce_labels=matching_join_labels,
suffixes=suffixes,
)
# Constructs default index
offset_index_id = guid.generate_guid()
expr = joined_expr.promote_offsets(offset_index_id)
return Block(expr, index_columns=[offset_index_id], column_labels=labels)

# Construct a default index only if this object and the other both have
# indexes. In other words, joining anything to a NULL index object
# keeps everything as a NULL index.
#
# This keeps us from generating an index if the user joins a large
# BigQuery table against small local data, for example.
if len(self._index_columns) > 0 and len(other._index_columns) > 0:
offset_index_id = guid.generate_guid()
expr = joined_expr.promote_offsets(offset_index_id)
index_columns = [offset_index_id]
else:
expr = joined_expr
index_columns = []

return Block(expr, index_columns=index_columns, column_labels=labels)

def join(
self,
Expand Down
4 changes: 4 additions & 0 deletions bigframes/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,10 @@ class PreviewWarning(Warning):
"""The feature is in preview."""


class NullIndexPreviewWarning(PreviewWarning):
"""Null index feature is in preview."""


class NullIndexError(ValueError):
"""Object has no index."""

Expand Down
Loading