googleapis · shuoweil · May 16, 2025 · May 9, 2025 · May 12, 2025 · May 13, 2025
@@ -112,11 +112,18 @@
     "If you proceed with '{model_name}', it might not work as expected or could lead to errors with multimodal inputs."
 )
 
+_MODEL_DEPRECATE_WARNING = (
+    "'{model_name}' is going to be deprecated. Use '{new_model_name}' ({link}) instead."
+)
+
 
 @log_adapter.class_logger
 class TextEmbeddingGenerator(base.RetriableRemotePredictor):
     """Text embedding generator LLM model.
 
+    .. note::
+        text-embedding-004 is going to be deprecated. Use text-embedding-005(https://cloud.google.com/python/docs/reference/bigframes/latest/bigframes.ml.llm.TextEmbeddingGenerator) instead.
+
     Args:
         model_name (str, Default to "text-embedding-004"):
             The model for text embedding. Possible values are "text-embedding-005", "text-embedding-004"
@@ -169,6 +176,15 @@ def _create_bqml_model(self):
                 )
             )
             warnings.warn(msg)
+        if self.model_name == "text-embedding-004":
+            msg = exceptions.format_message(
+                _MODEL_DEPRECATE_WARNING.format(
+                    model_name=self.model_name,
+                    new_model_name="text-embedding-005",
+                    link="https://cloud.google.com/python/docs/reference/bigframes/latest/bigframes.ml.llm.TextEmbeddingGenerator",
+                )
+            )
+            warnings.warn(msg)
 
         options = {
             "endpoint": self.model_name,
@@ -416,6 +432,7 @@ class GeminiTextGenerator(base.RetriableRemotePredictor):
             default and a warning will be issued.
 
         .. note::
+            "gemini-1.5-X" is going to be deprecated. Please use gemini-2.0-X instead. For example, "gemini-2.0-flash-001".
             "gemini-2.0-flash-exp", "gemini-1.5-pro-preview-0514" and "gemini-1.5-flash-preview-0514" is subject to the "Pre-GA Offerings Terms" in the General Service Terms section of the
             Service Specific Terms(https://cloud.google.com/terms/service-terms#1). Pre-GA products and features are available "as is"
             and might have limited support. For more information, see the launch stage descriptions
@@ -461,10 +478,12 @@ def __init__(
                 "(https://cloud.google.com/products#product-launch-stages)."
             )
             warnings.warn(msg, category=exceptions.PreviewWarning)
+
         if model_name is None:
             model_name = "gemini-2.0-flash-001"
             msg = exceptions.format_message(_REMOVE_DEFAULT_MODEL_WARNING)
             warnings.warn(msg, category=FutureWarning, stacklevel=2)
+
         self.model_name = model_name
         self.session = session or global_session.get_global_session()
         self.max_iterations = max_iterations
@@ -487,6 +506,15 @@ def _create_bqml_model(self):
                 )
             )
             warnings.warn(msg)
+        if self.model_name.startswith("gemini-1.5"):
+            msg = exceptions.format_message(
+                _MODEL_DEPRECATE_WARNING.format(
+                    model_name=self.model_name,
+                    new_model_name="gemini-2.0-X",
+                    link="https://cloud.google.com/python/docs/reference/bigframes/latest/bigframes.ml.llm.GeminiTextGenerator",
+                )
+            )
+            warnings.warn(msg)
 
         options = {"endpoint": self.model_name}
 

@@ -252,7 +252,7 @@ def cluster_by(
             >>> bpd.options.compute.semantic_ops_confirmation_threshold = 25
 
             >>> import bigframes.ml.llm as llm
-            >>> model = llm.TextEmbeddingGenerator()
+            >>> model = llm.TextEmbeddingGenerator(model_name="text-embedding-005")
 
             >>> df = bpd.DataFrame({
             ...     "Product": ["Smartphone", "Laptop", "T-shirt", "Jeans"],

@@ -111,7 +111,7 @@
       "source": [
         "from bigframes.ml.llm import GeminiTextGenerator\n",
         "\n",
-        "model = GeminiTextGenerator(model_name=\"gemini-1.5-flash-002\")"
+        "model = GeminiTextGenerator(model_name=\"gemini-2.0-flash-001\")"
       ]
     },
     {

@@ -430,7 +430,7 @@
       "source": [
         "from bigframes.ml.llm import GeminiTextGenerator\n",
         "\n",
-        "model = GeminiTextGenerator(model_name=\"gemini-1.5-flash-002\")"
+        "model = GeminiTextGenerator(model_name=\"gemini-2.0-flash-001\")"
       ]
     },
     {

@@ -1614,7 +1614,7 @@
    "source": [
     "from bigframes.ml.llm import GeminiTextGenerator\n",
     "\n",
-    "q_a_model = GeminiTextGenerator(model_name=\"gemini-1.5-flash-002\")"
+    "q_a_model = GeminiTextGenerator(model_name=\"gemini-2.0-flash-001\")"
    ]
   },
   {

@@ -1816,7 +1816,7 @@
       "source": [
         "## gemini model\n",
         "\n",
-        "llm_model = bf_llm.GeminiTextGenerator(model_name = \"gemini-1.5-flash-002\") ## replace with other model as needed"
+        "llm_model = bf_llm.GeminiTextGenerator(model_name = \"gemini-2.0-flash-001\") ## replace with other model as needed"
       ]
     },
     {

@@ -581,7 +581,7 @@
       ],
       "source": [
         "# Define the model\n",
-        "model = GeminiTextGenerator(model_name=\"gemini-1.5-flash-002\")\n",
+        "model = GeminiTextGenerator(model_name=\"gemini-2.0-flash-001\")\n",
         "\n",
         "# Invoke LLM with prompt\n",
         "response = predict(zero_shot_prompt, temperature = TEMPERATURE)\n",

@@ -2,7 +2,7 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": 2,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -21,23 +21,23 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 3,
    "metadata": {},
    "outputs": [
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "/tmp/ipykernel_604997/3896046356.py:1: ApiDeprecationWarning: gemini-pro and gemini-1.5-X are going to be deprecated. Use gemini-2.0-X (https://cloud.google.com/python/docs/reference/bigframes/latest/bigframes.ml.llm.GeminiTextGenerator) instead. \n",
-      "  model = GeminiTextGenerator(model_name=\"gemini-1.5-flash-002\")\n",
-      "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/ml/llm.py:981: DefaultLocationWarning: No explicit location is set, so using location US for the session.\n",
+      "/tmp/ipykernel_176683/987800245.py:1: ApiDeprecationWarning: gemini-1.5-X are going to be deprecated. Use gemini-2.0-X (https://cloud.google.com/python/docs/reference/bigframes/latest/bigframes.ml.llm.GeminiTextGenerator) instead. \n",
+      "  model = GeminiTextGenerator(model_name=\"gemini-2.0-flash-001\")\n",
+      "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/ml/llm.py:486: DefaultLocationWarning: No explicit location is set, so using location US for the session.\n",
       "  self.session = session or global_session.get_global_session()\n"
      ]
     },
     {
      "data": {
       "text/html": [
-       "Query job dd2da3cc-27c3-4c6f-9936-4f7769c85090 is DONE. 0 Bytes processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:dd2da3cc-27c3-4c6f-9936-4f7769c85090&page=queryresults\">Open Job</a>"
+       "Query job 6fa5121a-6da4-4c75-92ec-936799da4513 is DONE. 0 Bytes processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:6fa5121a-6da4-4c75-92ec-936799da4513&page=queryresults\">Open Job</a>"
       ],
       "text/plain": [
        "<IPython.core.display.HTML object>"
@@ -49,7 +49,7 @@
     {
      "data": {
       "text/html": [
-       "Query job 00947011-4d7c-42fa-ae19-3b684976cec6 is DONE. 0 Bytes processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:00947011-4d7c-42fa-ae19-3b684976cec6&page=queryresults\">Open Job</a>"
+       "Query job 74460ae9-3e89-49e7-93ad-bafbb6197a86 is DONE. 0 Bytes processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:74460ae9-3e89-49e7-93ad-bafbb6197a86&page=queryresults\">Open Job</a>"
       ],
       "text/plain": [
        "<IPython.core.display.HTML object>"
@@ -60,7 +60,7 @@
     }
    ],
    "source": [
-    "model = GeminiTextGenerator(model_name=\"gemini-1.5-flash-002\")"
+    "model = GeminiTextGenerator(model_name=\"gemini-2.0-flash-001\")"
    ]
   },
   {

@@ -1419,7 +1419,7 @@
       "source": [
         "# from bigframes.ml.llm import GeminiTextGenerator\n",
         "\n",
-        "# model = GeminiTextGenerator(model_name=\"gemini-1.5-flash-002\")\n",
+        "# model = GeminiTextGenerator(model_name=\"gemini-2.0-flash-001\")\n",
         "\n",
         "# pred = model.predict(df)\n",
         "# pred"

@@ -30,7 +30,7 @@ def test_gemini_text_generator_model() -> None:
     session = bpd.get_global_session()
     connection = f"{PROJECT_ID}.{REGION}.{CONN_NAME}"
     model = GeminiTextGenerator(
-        session=session, connection_name=connection, model_name="gemini-1.5-flash-002"
+        session=session, connection_name=connection, model_name="gemini-2.0-flash-001"
     )
 
     df_api = bpd.read_csv("gs://cloud-samples-data/vertex-ai/bigframe/df.csv")

@@ -78,7 +78,7 @@ def test_multimodal_dataframe(gcs_dst_bucket: str) -> None:
     # [START bigquery_dataframes_multimodal_dataframe_ml_text]
     from bigframes.ml import llm
 
-    gemini = llm.GeminiTextGenerator(model_name="gemini-1.5-flash-002")
+    gemini = llm.GeminiTextGenerator(model_name="gemini-2.0-flash-001")
 
     # Deal with first 2 images as example
     df_image = df_image.head(2)

@@ -22,7 +22,7 @@ def gemini_flash_model(session, bq_connection) -> llm.GeminiTextGenerator:
     return llm.GeminiTextGenerator(
         session=session,
         connection_name=bq_connection,
-        model_name="gemini-1.5-flash-001",
+        model_name="gemini-2.0-flash-001",
     )
 
 

@@ -434,7 +434,7 @@ def test_join_with_confirmation(session, gemini_flash_model, reply, monkeypatch)
 def test_self_join(session, gemini_flash_model):
     animals = dataframe.DataFrame(
         data={
-            "animal": ["spider", "capybara"],
+            "animal": ["ant", "elephant"],
         },
         session=session,
     )
@@ -453,8 +453,8 @@ def test_self_join(session, gemini_flash_model):
 
     expected_df = pd.DataFrame(
         {
-            "animal_left": ["capybara"],
-            "animal_right": ["spider"],
+            "animal_left": ["elephant"],
+            "animal_right": ["ant"],
         }
     )
     pandas.testing.assert_frame_equal(

@@ -86,7 +86,7 @@ def test_agg(session, gemini_flash_model, max_agg_rows, cluster_column):
             cluster_column=cluster_column,
         ).to_pandas()
 
-    expected_s = pd.Series(["Leonardo \n"], dtype=dtypes.STRING_DTYPE)
+    expected_s = pd.Series(["Leonardo\n"], dtype=dtypes.STRING_DTYPE)
     expected_s.name = "Movies"
     pandas.testing.assert_series_equal(actual_s, expected_s, check_index_type=False)
 
@@ -137,12 +137,13 @@ def test_agg_w_int_column(session, gemini_flash_model):
             "Movies": [
                 "Killers of the Flower Moon",
                 "The Great Gatsby",
+                "The Wolf of Wall Street",
             ],
-            "Years": [2023, 2013],
+            "Years": [2023, 2013, 2013],
         },
         session=session,
     )
-    instruction = "Find the {Years} Leonardo DiCaprio acted in the most movies. Answer with the year only."
+    instruction = "Find the {Years} Leonardo DiCaprio acted in the most movies. Your answer should be the four-digit year, returned as a string."
 
     with bigframes.option_context(
         SEM_OP_EXP_OPTION,
@@ -155,7 +156,7 @@ def test_agg_w_int_column(session, gemini_flash_model):
             model=gemini_flash_model,
         ).to_pandas()
 
-    expected_s = pd.Series(["2013 \n"], dtype=dtypes.STRING_DTYPE)
+    expected_s = pd.Series(["2013\n"], dtype=dtypes.STRING_DTYPE)
     expected_s.name = "Years"
     pandas.testing.assert_series_equal(actual_s, expected_s, check_index_type=False)
 
@@ -764,7 +765,7 @@ def test_join_with_confirmation(session, gemini_flash_model, reply, monkeypatch)
 def test_self_join(session, gemini_flash_model):
     animals = dataframe.DataFrame(
         data={
-            "animal": ["spider", "capybara"],
+            "animal": ["ant", "elephant"],
         },
         session=session,
     )
@@ -783,8 +784,8 @@ def test_self_join(session, gemini_flash_model):
 
     expected_df = pd.DataFrame(
         {
-            "animal_left": ["capybara"],
-            "animal_right": ["spider"],
+            "animal_left": ["elephant"],
+            "animal_right": ["ant"],
         }
     )
     pandas.testing.assert_frame_equal(

@@ -81,7 +81,7 @@ def test_llm_gemini_configure_fit(
 
 @pytest.mark.flaky(retries=2)
 def test_llm_gemini_w_ground_with_google_search(llm_remote_text_df):
-    model = llm.GeminiTextGenerator(model_name="gemini-1.5-flash-002", max_iterations=1)
+    model = llm.GeminiTextGenerator(model_name="gemini-2.0-flash-001", max_iterations=1)
     df = model.predict(
         llm_remote_text_df["prompt"],
         ground_with_google_search=True,