googleapis
diff --git a/‎bigframes/_config/__init__.py
Lines changed: 9 additions & 0 deletions b/‎bigframes/_config/__init__.py
Lines changed: 9 additions & 0 deletions
diff --git a/‎bigframes/_config/bigquery_options.py
Lines changed: 46 additions & 1 deletion b/‎bigframes/_config/bigquery_options.py
Lines changed: 46 additions & 1 deletion
diff --git a/‎bigframes/bigquery/__init__.py
Lines changed: 3 additions & 1 deletion b/‎bigframes/bigquery/__init__.py
Lines changed: 3 additions & 1 deletion
diff --git a/‎bigframes/bigquery/_operations/json.py
Lines changed: 45 additions & 3 deletions b/‎bigframes/bigquery/_operations/json.py
Lines changed: 45 additions & 3 deletions
diff --git a/‎bigframes/bigquery/_operations/sql.py
Lines changed: 7 additions & 6 deletions b/‎bigframes/bigquery/_operations/sql.py
Lines changed: 7 additions & 6 deletions
diff --git a/‎bigframes/constants.py
Lines changed: 4 additions & 0 deletions b/‎bigframes/constants.py
Lines changed: 4 additions & 0 deletions
diff --git a/‎bigframes/core/array_value.py
Lines changed: 10 additions & 1 deletion b/‎bigframes/core/array_value.py
Lines changed: 10 additions & 1 deletion
diff --git a/‎bigframes/core/bigframe_node.py
Lines changed: 3 additions & 20 deletions b/‎bigframes/core/bigframe_node.py
Lines changed: 3 additions & 20 deletions
diff --git a/‎bigframes/core/blocks.py
Lines changed: 1 addition & 1 deletion b/‎bigframes/core/blocks.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎bigframes/core/compile/scalar_op_compiler.py
Lines changed: 16 additions & 19 deletions b/‎bigframes/core/compile/scalar_op_compiler.py
Lines changed: 16 additions & 19 deletions
@@ -56,12 +56,21 @@ class Options:
     """Global options affecting BigQuery DataFrames behavior."""
 
     def __init__(self):
+        self.reset()
+
+    def reset(self) -> Options:
+        """Reset the option settings to defaults.
+
+        Returns:
+            bigframes._config.Options: Options object with default values.
+        """
         self._local = ThreadLocalConfig()
 
         # BigQuery options are special because they can only be set once per
         # session, so we need an indicator as to whether we are using the
         # thread-local session or the global session.
         self._bigquery_options = bigquery_options.BigQueryOptions()
+        return self
 
     def _init_bigquery_thread_local(self):
         """Initialize thread-local options, based on current global options."""
 
@@ -16,10 +16,11 @@
 
 from __future__ import annotations
 
-from typing import Literal, Optional
+from typing import Literal, Optional, Sequence, Tuple
 import warnings
 
 import google.auth.credentials
+import requests.adapters
 
 import bigframes.enums
 import bigframes.exceptions as bfe
@@ -90,6 +91,9 @@ def __init__(
         allow_large_results: bool = False,
         ordering_mode: Literal["strict", "partial"] = "strict",
         client_endpoints_override: Optional[dict] = None,
+        requests_transport_adapters: Sequence[
+            Tuple[str, requests.adapters.BaseAdapter]
+        ] = (),
     ):
         self._credentials = credentials
         self._project = project
@@ -100,6 +104,7 @@ def __init__(
         self._kms_key_name = kms_key_name
         self._skip_bq_connection_check = skip_bq_connection_check
         self._allow_large_results = allow_large_results
+        self._requests_transport_adapters = requests_transport_adapters
         self._session_started = False
         # Determines the ordering strictness for the session.
         self._ordering_mode = _validate_ordering_mode(ordering_mode)
@@ -379,3 +384,43 @@ def client_endpoints_override(self, value: dict):
             )
 
         self._client_endpoints_override = value
+
+    @property
+    def requests_transport_adapters(
+        self,
+    ) -> Sequence[Tuple[str, requests.adapters.BaseAdapter]]:
+        """Transport adapters for requests-based REST clients such as the
+        google-cloud-bigquery package.
+
+        For more details, see the explanation in `requests guide to transport
+        adapters
+        <https://requests.readthedocs.io/en/latest/user/advanced/#transport-adapters>`_.
+
+        **Examples:**
+
+        Increase the connection pool size using the requests `HTTPAdapter
+        <https://requests.readthedocs.io/en/latest/api/#requests.adapters.HTTPAdapter>`_.
+
+            >>> import bigframes.pandas as bpd
+            >>> bpd.options.bigquery.requests_transport_adapters = (
+            ...     ("http://", requests.adapters.HTTPAdapter(pool_maxsize=100)),
+            ...     ("https://", requests.adapters.HTTPAdapter(pool_maxsize=100)),
+            ... )  # doctest: +SKIP
+
+        Returns:
+            Sequence[Tuple[str, requests.adapters.BaseAdapter]]:
+                Prefixes and corresponding transport adapters to `mount
+                <https://requests.readthedocs.io/en/latest/api/#requests.Session.mount>`_
+                in requests-based REST clients.
+        """
+        return self._requests_transport_adapters
+
+    @requests_transport_adapters.setter
+    def requests_transport_adapters(
+        self, value: Sequence[Tuple[str, requests.adapters.BaseAdapter]]
+    ) -> None:
+        if self._session_started and self._requests_transport_adapters != value:
+            raise ValueError(
+                SESSION_STARTED_MESSAGE.format(attribute="requests_transport_adapters")
+            )
+        self._requests_transport_adapters = value
@@ -37,6 +37,7 @@
     json_extract,
     json_extract_array,
     json_extract_string_array,
+    json_query,
     json_set,
     json_value,
     parse_json,
@@ -58,10 +59,11 @@
     "st_distance",
     "st_intersection",
     # json ops
-    "json_set",
     "json_extract",
     "json_extract_array",
     "json_extract_string_array",
+    "json_query",
+    "json_set",
     "json_value",
     "parse_json",
     # search ops
 
@@ -22,9 +22,11 @@
 from __future__ import annotations
 
 from typing import Any, cast, Optional, Sequence, Tuple, Union
+import warnings
 
 import bigframes.core.utils as utils
 import bigframes.dtypes
+import bigframes.exceptions as bfe
 import bigframes.operations as ops
 import bigframes.series as series
 
@@ -87,9 +89,13 @@ def json_extract(
     input: series.Series,
     json_path: str,
 ) -> series.Series:
-    """Extracts a JSON value and converts it to a SQL JSON-formatted `STRING` or `JSON`
-    value. This function uses single quotes and brackets to escape invalid JSONPath
-    characters in JSON keys.
+    """Extracts a JSON value and converts it to a SQL JSON-formatted ``STRING`` or
+    ``JSON`` value. This function uses single quotes and brackets to escape invalid
+    JSONPath characters in JSON keys.
+
+    .. deprecated:: 2.5.0
+        The ``json_extract`` is deprecated and will be removed in a future version.
+        Use ``json_query`` instead.
 
     **Examples:**
 
@@ -111,6 +117,11 @@ def json_extract(
     Returns:
         bigframes.series.Series: A new Series with the JSON or JSON-formatted STRING.
     """
+    msg = (
+        "The `json_extract` is deprecated and will be removed in a future version. "
+        "Use `json_query` instead."
+    )
+    warnings.warn(bfe.format_message(msg), category=UserWarning)
     return input._apply_unary_op(ops.JSONExtract(json_path=json_path))
 
 
@@ -231,6 +242,37 @@ def json_extract_string_array(
     return array_series
 
 
+def json_query(
+    input: series.Series,
+    json_path: str,
+) -> series.Series:
+    """Extracts a JSON value and converts it to a SQL JSON-formatted ``STRING``
+    or ``JSON`` value. This function uses double quotes to escape invalid JSONPath
+    characters in JSON keys. For example: ``"a.b"``.
+
+    **Examples:**
+
+        >>> import bigframes.pandas as bpd
+        >>> import bigframes.bigquery as bbq
+        >>> bpd.options.display.progress_bar = None
+
+        >>> s = bpd.Series(['{"class": {"students": [{"id": 5}, {"id": 12}]}}'])
+        >>> bbq.json_query(s, json_path="$.class")
+        0    {"students":[{"id":5},{"id":12}]}
+        dtype: string
+
+    Args:
+        input (bigframes.series.Series):
+            The Series containing JSON data (as native JSON objects or JSON-formatted strings).
+        json_path (str):
+            The JSON path identifying the data that you want to obtain from the input.
+
+    Returns:
+        bigframes.series.Series: A new Series with the JSON or JSON-formatted STRING.
+    """
+    return input._apply_unary_op(ops.JSONQuery(json_path=json_path))
+
+
 def json_value(
     input: series.Series,
     json_path: str,
 
@@ -20,6 +20,7 @@
 
 import google.cloud.bigquery
 
+import bigframes.core.compile.sqlglot.sqlglot_ir as sqlglot_ir
 import bigframes.core.sql
 import bigframes.dataframe
 import bigframes.dtypes
@@ -72,16 +73,16 @@ def sql_scalar(
     # Another benefit of this is that if there is a syntax error in the SQL
     # template, then this will fail with an error earlier in the process,
     # aiding users in debugging.
-    base_series = columns[0]
-    literals = [
-        bigframes.dtypes.bigframes_dtype_to_literal(column.dtype) for column in columns
+    literals_sql = [
+        sqlglot_ir._literal(None, column.dtype).sql(dialect="bigquery")
+        for column in columns
     ]
-    literals_sql = [bigframes.core.sql.simple_literal(literal) for literal in literals]
+    select_sql = sql_template.format(*literals_sql)
+    dry_run_sql = f"SELECT {select_sql}"
 
     # Use the executor directly, because we want the original column IDs, not
     # the user-friendly column names that block.to_sql_query() would produce.
-    select_sql = sql_template.format(*literals_sql)
-    dry_run_sql = f"SELECT {select_sql}"
+    base_series = columns[0]
     bqclient = base_series._session.bqclient
     job = bqclient.query(
         dry_run_sql, job_config=google.cloud.bigquery.QueryJobConfig(dry_run=True)
 
@@ -128,4 +128,8 @@
 # BigQuery default is 10000, leave 100 for overhead
 MAX_COLUMNS = 9900
 
+# BigQuery has 1 MB query size limit. Don't want to take up more than a few % of that inlining a table.
+# Also must assume that text encoding as literals is much less efficient than in-memory representation.
+MAX_INLINE_BYTES = 5000
+
 SUGGEST_PEEK_PREVIEW = "Use .peek(n) to preview n arbitrary rows."
@@ -133,8 +133,17 @@ def from_table(
             ordering=ordering,
             n_rows=n_rows,
         )
+        return cls.from_bq_data_source(source_def, scan_list, session)
+
+    @classmethod
+    def from_bq_data_source(
+        cls,
+        source: nodes.BigqueryDataSource,
+        scan_list: nodes.ScanList,
+        session: Session,
+    ):
         node = nodes.ReadTableNode(
-            source=source_def,
+            source=source,
             scan_list=scan_list,
             table_session=session,
         )
 
@@ -22,7 +22,7 @@
 import typing
 from typing import Callable, Dict, Generator, Iterable, Mapping, Sequence, Set, Tuple
 
-from bigframes.core import identifiers
+from bigframes.core import field, identifiers
 import bigframes.core.schema as schemata
 import bigframes.dtypes
 
@@ -34,23 +34,6 @@
 T = typing.TypeVar("T")
 
 
-@dataclasses.dataclass(frozen=True)
-class Field:
-    id: identifiers.ColumnId
-    dtype: bigframes.dtypes.Dtype
-    # Best effort, nullable=True if not certain
-    nullable: bool = True
-
-    def with_nullable(self) -> Field:
-        return Field(self.id, self.dtype, nullable=True)
-
-    def with_nonnull(self) -> Field:
-        return Field(self.id, self.dtype, nullable=False)
-
-    def with_id(self, id: identifiers.ColumnId) -> Field:
-        return Field(id, self.dtype, nullable=self.nullable)
-
-
 @dataclasses.dataclass(eq=False, frozen=True)
 class BigFrameNode:
     """
@@ -162,7 +145,7 @@ def roots(self) -> typing.Set[BigFrameNode]:
     # TODO: Store some local data lazily for select, aggregate nodes.
     @property
     @abc.abstractmethod
-    def fields(self) -> Sequence[Field]:
+    def fields(self) -> Sequence[field.Field]:
         ...
 
     @property
@@ -292,7 +275,7 @@ def _dtype_lookup(self) -> dict[identifiers.ColumnId, bigframes.dtypes.Dtype]:
         return {field.id: field.dtype for field in self.fields}
 
     @functools.cached_property
-    def field_by_id(self) -> Mapping[identifiers.ColumnId, Field]:
+    def field_by_id(self) -> Mapping[identifiers.ColumnId, field.Field]:
         return {field.id: field for field in self.fields}
 
     # Plan algorithms
 
@@ -2166,7 +2166,7 @@ def merge(
                 result_columns.append(get_column_left[col_id])
         for col_id in other.value_columns:
             if col_id in right_join_ids:
-                if other.col_id_to_label[matching_right_id] in matching_join_labels:
+                if other.col_id_to_label[col_id] in matching_join_labels:
                     pass
                 else:
                     result_columns.append(get_column_right[col_id])
 
@@ -1356,6 +1356,19 @@ def json_extract_string_array_op_impl(
     return json_extract_string_array(json_obj=x, json_path=op.json_path)
 
 
+@scalar_op_compiler.register_unary_op(ops.JSONQuery, pass_op=True)
+def json_query_op_impl(x: ibis_types.Value, op: ops.JSONQuery):
+    # Define a user-defined function whose returned type is dynamically matching the input.
+    def json_query(json_or_json_string, json_path: ibis_dtypes.str):  # type: ignore
+        """Extracts a JSON value and converts it to a SQL JSON-formatted STRING or JSON value."""
+        ...
+
+    return_type = x.type()
+    json_query.__annotations__["return"] = return_type
+    json_query_op = ibis_udf.scalar.builtin(json_query)
+    return json_query_op(json_or_json_string=x, json_path=op.json_path)
+
+
 @scalar_op_compiler.register_unary_op(ops.ParseJSON, pass_op=True)
 def parse_json_op_impl(x: ibis_types.Value, op: ops.ParseJSON):
     return parse_json(json_str=x)
@@ -1935,34 +1948,18 @@ def clip_op(
     if isinstance(lower, ibis_types.NullScalar) and (
         not isinstance(upper, ibis_types.NullScalar)
     ):
-        return (
-            ibis_api.case()  # type: ignore
-            .when(upper.isnull() | (original > upper), upper)
-            .else_(original)
-            .end()
-        )
+        return ibis_api.least(original, upper)
     elif (not isinstance(lower, ibis_types.NullScalar)) and isinstance(
         upper, ibis_types.NullScalar
     ):
-        return (
-            ibis_api.case()  # type: ignore
-            .when(lower.isnull() | (original < lower), lower)
-            .else_(original)
-            .end()
-        )
+        return ibis_api.greatest(original, lower)
     elif isinstance(lower, ibis_types.NullScalar) and (
         isinstance(upper, ibis_types.NullScalar)
     ):
         return original
     else:
         # Note: Pandas has unchanged behavior when upper bound and lower bound are flipped. This implementation requires that lower_bound < upper_bound
-        return (
-            ibis_api.case()  # type: ignore
-            .when(lower.isnull() | (original < lower), lower)
-            .when(upper.isnull() | (original > upper), upper)
-            .else_(original)
-            .end()
-        )
+        return ibis_api.greatest(ibis_api.least(original, upper), lower)
 
 
 # N-ary Operations