feat: add deprecation warning to Gemini-1.5-X, text-embedding-004, and remove remove legacy models in notebooks and docs (#1723)

shuoweil · web-flow · commit 80aad9af794c · 2025-05-16T17:16:59.000-05:00
* change all model_name from gemini-1.5-pro to gemini-2.0 due to model deprication

* add warnign for deprcated models

* add space for style

* rewording note

* test change

* fix failed test

* add comment back

* use warning instead

* remove replcated notes
diff --git a/bigframes/ml/llm.py b/bigframes/ml/llm.py
@@ -112,11 +112,18 @@
     "If you proceed with '{model_name}', it might not work as expected or could lead to errors with multimodal inputs."
 )
 
+_MODEL_DEPRECATE_WARNING = (
+    "'{model_name}' is going to be deprecated. Use '{new_model_name}' ({link}) instead."
+)
+
 
 @log_adapter.class_logger
 class TextEmbeddingGenerator(base.RetriableRemotePredictor):
     """Text embedding generator LLM model.
 
+    .. note::
+        text-embedding-004 is going to be deprecated. Use text-embedding-005(https://cloud.google.com/python/docs/reference/bigframes/latest/bigframes.ml.llm.TextEmbeddingGenerator) instead.
+
     Args:
         model_name (str, Default to "text-embedding-004"):
             The model for text embedding. Possible values are "text-embedding-005", "text-embedding-004"
@@ -169,6 +176,15 @@ def _create_bqml_model(self):
                 )
             )
             warnings.warn(msg)
+        if self.model_name == "text-embedding-004":
+            msg = exceptions.format_message(
+                _MODEL_DEPRECATE_WARNING.format(
+                    model_name=self.model_name,
+                    new_model_name="text-embedding-005",
+                    link="https://cloud.google.com/python/docs/reference/bigframes/latest/bigframes.ml.llm.TextEmbeddingGenerator",
+                )
+            )
+            warnings.warn(msg)
 
         options = {
             "endpoint": self.model_name,
@@ -416,6 +432,7 @@ class GeminiTextGenerator(base.RetriableRemotePredictor):
             default and a warning will be issued.
 
         .. note::
+            "gemini-1.5-X" is going to be deprecated. Please use gemini-2.0-X instead. For example, "gemini-2.0-flash-001".
             "gemini-2.0-flash-exp", "gemini-1.5-pro-preview-0514" and "gemini-1.5-flash-preview-0514" is subject to the "Pre-GA Offerings Terms" in the General Service Terms section of the
             Service Specific Terms(https://cloud.google.com/terms/service-terms#1). Pre-GA products and features are available "as is"
             and might have limited support. For more information, see the launch stage descriptions
@@ -461,10 +478,12 @@ def __init__(
                 "(https://cloud.google.com/products#product-launch-stages)."
             )
             warnings.warn(msg, category=exceptions.PreviewWarning)
+
         if model_name is None:
             model_name = "gemini-2.0-flash-001"
             msg = exceptions.format_message(_REMOVE_DEFAULT_MODEL_WARNING)
             warnings.warn(msg, category=FutureWarning, stacklevel=2)
+
         self.model_name = model_name
         self.session = session or global_session.get_global_session()
         self.max_iterations = max_iterations
@@ -487,6 +506,15 @@ def _create_bqml_model(self):
                 )
             )
             warnings.warn(msg)
+        if self.model_name.startswith("gemini-1.5"):
+            msg = exceptions.format_message(
+                _MODEL_DEPRECATE_WARNING.format(
+                    model_name=self.model_name,
+                    new_model_name="gemini-2.0-X",
+                    link="https://cloud.google.com/python/docs/reference/bigframes/latest/bigframes.ml.llm.GeminiTextGenerator",
+                )
+            )
+            warnings.warn(msg)
 
         options = {"endpoint": self.model_name}
 
diff --git a/bigframes/operations/semantics.py b/bigframes/operations/semantics.py
@@ -252,7 +252,7 @@ def cluster_by(
             >>> bpd.options.compute.semantic_ops_confirmation_threshold = 25
 
             >>> import bigframes.ml.llm as llm
-            >>> model = llm.TextEmbeddingGenerator()
+            >>> model = llm.TextEmbeddingGenerator(model_name="text-embedding-005")
 
             >>> df = bpd.DataFrame({
             ...     "Product": ["Smartphone", "Laptop", "T-shirt", "Jeans"],
diff --git a/notebooks/apps/synthetic_data_generation.ipynb b/notebooks/apps/synthetic_data_generation.ipynb
@@ -111,7 +111,7 @@
       "source": [
         "from bigframes.ml.llm import GeminiTextGenerator\n",
         "\n",
-        "model = GeminiTextGenerator(model_name=\"gemini-1.5-flash-002\")"
+        "model = GeminiTextGenerator(model_name=\"gemini-2.0-flash-001\")"
       ]
     },
     {
diff --git a/notebooks/generative_ai/bq_dataframes_llm_code_generation.ipynb b/notebooks/generative_ai/bq_dataframes_llm_code_generation.ipynb
@@ -430,7 +430,7 @@
       "source": [
         "from bigframes.ml.llm import GeminiTextGenerator\n",
         "\n",
-        "model = GeminiTextGenerator(model_name=\"gemini-1.5-flash-002\")"
+        "model = GeminiTextGenerator(model_name=\"gemini-2.0-flash-001\")"
       ]
     },
     {
diff --git a/notebooks/generative_ai/bq_dataframes_llm_kmeans.ipynb b/notebooks/generative_ai/bq_dataframes_llm_kmeans.ipynb
@@ -1614,7 +1614,7 @@
    "source": [
     "from bigframes.ml.llm import GeminiTextGenerator\n",
     "\n",
-    "q_a_model = GeminiTextGenerator(model_name=\"gemini-1.5-flash-002\")"
+    "q_a_model = GeminiTextGenerator(model_name=\"gemini-2.0-flash-001\")"
    ]
   },
   {
diff --git a/notebooks/generative_ai/bq_dataframes_llm_vector_search.ipynb b/notebooks/generative_ai/bq_dataframes_llm_vector_search.ipynb
@@ -1816,7 +1816,7 @@
       "source": [
         "## gemini model\n",
         "\n",
-        "llm_model = bf_llm.GeminiTextGenerator(model_name = \"gemini-1.5-flash-002\") ## replace with other model as needed"
+        "llm_model = bf_llm.GeminiTextGenerator(model_name = \"gemini-2.0-flash-001\") ## replace with other model as needed"
       ]
     },
     {
diff --git a/notebooks/generative_ai/bq_dataframes_ml_drug_name_generation.ipynb b/notebooks/generative_ai/bq_dataframes_ml_drug_name_generation.ipynb
@@ -581,7 +581,7 @@
       ],
       "source": [
         "# Define the model\n",
-        "model = GeminiTextGenerator(model_name=\"gemini-1.5-flash-002\")\n",
+        "model = GeminiTextGenerator(model_name=\"gemini-2.0-flash-001\")\n",
         "\n",
         "# Invoke LLM with prompt\n",
         "response = predict(zero_shot_prompt, temperature = TEMPERATURE)\n",
diff --git a/notebooks/generative_ai/large_language_models.ipynb b/notebooks/generative_ai/large_language_models.ipynb
@@ -2,7 +2,7 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": 2,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -21,23 +21,23 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 3,
    "metadata": {},
    "outputs": [
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "/tmp/ipykernel_604997/3896046356.py:1: ApiDeprecationWarning: gemini-pro and gemini-1.5-X are going to be deprecated. Use gemini-2.0-X (https://cloud.google.com/python/docs/reference/bigframes/latest/bigframes.ml.llm.GeminiTextGenerator) instead. \n",
-      "  model = GeminiTextGenerator(model_name=\"gemini-1.5-flash-002\")\n",
-      "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/ml/llm.py:981: DefaultLocationWarning: No explicit location is set, so using location US for the session.\n",
+      "/tmp/ipykernel_176683/987800245.py:1: ApiDeprecationWarning: gemini-1.5-X are going to be deprecated. Use gemini-2.0-X (https://cloud.google.com/python/docs/reference/bigframes/latest/bigframes.ml.llm.GeminiTextGenerator) instead. \n",
+      "  model = GeminiTextGenerator(model_name=\"gemini-2.0-flash-001\")\n",
+      "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/ml/llm.py:486: DefaultLocationWarning: No explicit location is set, so using location US for the session.\n",
       "  self.session = session or global_session.get_global_session()\n"
      ]
     },
     {
      "data": {
       "text/html": [
-       "Query job dd2da3cc-27c3-4c6f-9936-4f7769c85090 is DONE. 0 Bytes processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:dd2da3cc-27c3-4c6f-9936-4f7769c85090&page=queryresults\">Open Job</a>"
+       "Query job 6fa5121a-6da4-4c75-92ec-936799da4513 is DONE. 0 Bytes processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:6fa5121a-6da4-4c75-92ec-936799da4513&page=queryresults\">Open Job</a>"
       ],
       "text/plain": [
        "<IPython.core.display.HTML object>"
@@ -49,7 +49,7 @@
     {
      "data": {
       "text/html": [
-       "Query job 00947011-4d7c-42fa-ae19-3b684976cec6 is DONE. 0 Bytes processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:00947011-4d7c-42fa-ae19-3b684976cec6&page=queryresults\">Open Job</a>"
+       "Query job 74460ae9-3e89-49e7-93ad-bafbb6197a86 is DONE. 0 Bytes processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:74460ae9-3e89-49e7-93ad-bafbb6197a86&page=queryresults\">Open Job</a>"
       ],
       "text/plain": [
        "<IPython.core.display.HTML object>"
@@ -60,7 +60,7 @@
     }
    ],
    "source": [
-    "model = GeminiTextGenerator(model_name=\"gemini-1.5-flash-002\")"
+    "model = GeminiTextGenerator(model_name=\"gemini-2.0-flash-001\")"
    ]
   },
   {
diff --git a/notebooks/getting_started/bq_dataframes_template.ipynb b/notebooks/getting_started/bq_dataframes_template.ipynb
@@ -1419,7 +1419,7 @@
       "source": [
         "# from bigframes.ml.llm import GeminiTextGenerator\n",
         "\n",
-        "# model = GeminiTextGenerator(model_name=\"gemini-1.5-flash-002\")\n",
+        "# model = GeminiTextGenerator(model_name=\"gemini-2.0-flash-001\")\n",
         "\n",
         "# pred = model.predict(df)\n",
         "# pred"
diff --git a/samples/snippets/gemini_model_test.py b/samples/snippets/gemini_model_test.py
@@ -30,7 +30,7 @@ def test_gemini_text_generator_model() -> None:
     session = bpd.get_global_session()
     connection = f"{PROJECT_ID}.{REGION}.{CONN_NAME}"
     model = GeminiTextGenerator(
-        session=session, connection_name=connection, model_name="gemini-1.5-flash-002"
+        session=session, connection_name=connection, model_name="gemini-2.0-flash-001"
     )
 
     df_api = bpd.read_csv("gs://cloud-samples-data/vertex-ai/bigframe/df.csv")
diff --git a/samples/snippets/multimodal_test.py b/samples/snippets/multimodal_test.py
@@ -78,7 +78,7 @@ def test_multimodal_dataframe(gcs_dst_bucket: str) -> None:
     # [START bigquery_dataframes_multimodal_dataframe_ml_text]
     from bigframes.ml import llm
 
-    gemini = llm.GeminiTextGenerator(model_name="gemini-1.5-flash-002")
+    gemini = llm.GeminiTextGenerator(model_name="gemini-2.0-flash-001")
 
     # Deal with first 2 images as example
     df_image = df_image.head(2)
diff --git a/tests/system/large/operations/conftest.py b/tests/system/large/operations/conftest.py
@@ -22,7 +22,7 @@ def gemini_flash_model(session, bq_connection) -> llm.GeminiTextGenerator:
     return llm.GeminiTextGenerator(
         session=session,
         connection_name=bq_connection,
-        model_name="gemini-1.5-flash-001",
+        model_name="gemini-2.0-flash-001",
     )
 
 
diff --git a/tests/system/large/operations/test_ai.py b/tests/system/large/operations/test_ai.py
@@ -434,7 +434,7 @@ def test_join_with_confirmation(session, gemini_flash_model, reply, monkeypatch)
 def test_self_join(session, gemini_flash_model):
     animals = dataframe.DataFrame(
         data={
-            "animal": ["spider", "capybara"],
+            "animal": ["ant", "elephant"],
         },
         session=session,
     )
@@ -453,8 +453,8 @@ def test_self_join(session, gemini_flash_model):
 
     expected_df = pd.DataFrame(
         {
-            "animal_left": ["capybara"],
-            "animal_right": ["spider"],
+            "animal_left": ["elephant"],
+            "animal_right": ["ant"],
         }
     )
     pandas.testing.assert_frame_equal(
diff --git a/tests/system/large/operations/test_semantics.py b/tests/system/large/operations/test_semantics.py
@@ -86,7 +86,7 @@ def test_agg(session, gemini_flash_model, max_agg_rows, cluster_column):
             cluster_column=cluster_column,
         ).to_pandas()
 
-    expected_s = pd.Series(["Leonardo \n"], dtype=dtypes.STRING_DTYPE)
+    expected_s = pd.Series(["Leonardo\n"], dtype=dtypes.STRING_DTYPE)
     expected_s.name = "Movies"
     pandas.testing.assert_series_equal(actual_s, expected_s, check_index_type=False)
 
@@ -137,12 +137,13 @@ def test_agg_w_int_column(session, gemini_flash_model):
             "Movies": [
                 "Killers of the Flower Moon",
                 "The Great Gatsby",
+                "The Wolf of Wall Street",
             ],
-            "Years": [2023, 2013],
+            "Years": [2023, 2013, 2013],
         },
         session=session,
     )
-    instruction = "Find the {Years} Leonardo DiCaprio acted in the most movies. Answer with the year only."
+    instruction = "Find the {Years} Leonardo DiCaprio acted in the most movies. Your answer should be the four-digit year, returned as a string."
 
     with bigframes.option_context(
         SEM_OP_EXP_OPTION,
@@ -155,7 +156,7 @@ def test_agg_w_int_column(session, gemini_flash_model):
             model=gemini_flash_model,
         ).to_pandas()
 
-    expected_s = pd.Series(["2013 \n"], dtype=dtypes.STRING_DTYPE)
+    expected_s = pd.Series(["2013\n"], dtype=dtypes.STRING_DTYPE)
     expected_s.name = "Years"
     pandas.testing.assert_series_equal(actual_s, expected_s, check_index_type=False)
 
@@ -764,7 +765,7 @@ def test_join_with_confirmation(session, gemini_flash_model, reply, monkeypatch)
 def test_self_join(session, gemini_flash_model):
     animals = dataframe.DataFrame(
         data={
-            "animal": ["spider", "capybara"],
+            "animal": ["ant", "elephant"],
         },
         session=session,
     )
@@ -783,8 +784,8 @@ def test_self_join(session, gemini_flash_model):
 
     expected_df = pd.DataFrame(
         {
-            "animal_left": ["capybara"],
-            "animal_right": ["spider"],
+            "animal_left": ["elephant"],
+            "animal_right": ["ant"],
         }
     )
     pandas.testing.assert_frame_equal(
diff --git a/tests/system/load/test_llm.py b/tests/system/load/test_llm.py
@@ -81,7 +81,7 @@ def test_llm_gemini_configure_fit(
 
 @pytest.mark.flaky(retries=2)
 def test_llm_gemini_w_ground_with_google_search(llm_remote_text_df):
-    model = llm.GeminiTextGenerator(model_name="gemini-1.5-flash-002", max_iterations=1)
+    model = llm.GeminiTextGenerator(model_name="gemini-2.0-flash-001", max_iterations=1)
     df = model.predict(
         llm_remote_text_df["prompt"],
         ground_with_google_search=True,

Original file line number	Diff line number	Diff line change
`@@ -111,7 +111,7 @@`
`111`	`111`	`"source": [`
`112`	`112`	`"from bigframes.ml.llm import GeminiTextGenerator\n",`
`113`	`113`	`"\n",`
`114`		`- "model = GeminiTextGenerator(model_name=\"gemini-1.5-flash-002\")"`
	`114`	`+ "model = GeminiTextGenerator(model_name=\"gemini-2.0-flash-001\")"`
`115`	`115`	`]`
`116`	`116`	`},`
`117`	`117`	`{`
Original file line number	Diff line number	Diff line change
`@@ -430,7 +430,7 @@`
`430`	`430`	`"source": [`
`431`	`431`	`"from bigframes.ml.llm import GeminiTextGenerator\n",`
`432`	`432`	`"\n",`
`433`		`- "model = GeminiTextGenerator(model_name=\"gemini-1.5-flash-002\")"`
	`433`	`+ "model = GeminiTextGenerator(model_name=\"gemini-2.0-flash-001\")"`
`434`	`434`	`]`
`435`	`435`	`},`
`436`	`436`	`{`
Original file line number	Diff line number	Diff line change
`@@ -1614,7 +1614,7 @@`
`1614`	`1614`	`"source": [`
`1615`	`1615`	`"from bigframes.ml.llm import GeminiTextGenerator\n",`
`1616`	`1616`	`"\n",`
`1617`		`- "q_a_model = GeminiTextGenerator(model_name=\"gemini-1.5-flash-002\")"`
	`1617`	`+ "q_a_model = GeminiTextGenerator(model_name=\"gemini-2.0-flash-001\")"`
`1618`	`1618`	`]`
`1619`	`1619`	`},`
`1620`	`1620`	`{`
Original file line number	Diff line number	Diff line change
`@@ -1816,7 +1816,7 @@`
`1816`	`1816`	`"source": [`
`1817`	`1817`	`"## gemini model\n",`
`1818`	`1818`	`"\n",`
`1819`		`- "llm_model = bf_llm.GeminiTextGenerator(model_name = \"gemini-1.5-flash-002\") ## replace with other model as needed"`
	`1819`	`+ "llm_model = bf_llm.GeminiTextGenerator(model_name = \"gemini-2.0-flash-001\") ## replace with other model as needed"`
`1820`	`1820`	`]`
`1821`	`1821`	`},`
`1822`	`1822`	`{`
Original file line number	Diff line number	Diff line change
`@@ -30,7 +30,7 @@ def test_gemini_text_generator_model() -> None:`
`30`	`30`	`session = bpd.get_global_session()`
`31`	`31`	`connection = f"{PROJECT_ID}.{REGION}.{CONN_NAME}"`
`32`	`32`	`model = GeminiTextGenerator(`
`33`		`- session=session, connection_name=connection, model_name="gemini-1.5-flash-002"`
	`33`	`+ session=session, connection_name=connection, model_name="gemini-2.0-flash-001"`
`34`	`34`	`)`
`35`	`35`
`36`	`36`	`df_api = bpd.read_csv("gs://cloud-samples-data/vertex-ai/bigframe/df.csv")`
Original file line number	Diff line number	Diff line change
`@@ -22,7 +22,7 @@ def gemini_flash_model(session, bq_connection) -> llm.GeminiTextGenerator:`
`22`	`22`	`return llm.GeminiTextGenerator(`
`23`	`23`	`session=session,`
`24`	`24`	`connection_name=bq_connection,`
`25`		`- model_name="gemini-1.5-flash-001",`
	`25`	`+ model_name="gemini-2.0-flash-001",`
`26`	`26`	`)`
`27`	`27`
`28`	`28`
Original file line number	Diff line number	Diff line change
`@@ -434,7 +434,7 @@ def test_join_with_confirmation(session, gemini_flash_model, reply, monkeypatch)`
`434`	`434`	`def test_self_join(session, gemini_flash_model):`
`435`	`435`	`animals = dataframe.DataFrame(`
`436`	`436`	`data={`
`437`		`- "animal": ["spider", "capybara"],`
	`437`	`+ "animal": ["ant", "elephant"],`
`438`	`438`	`},`
`439`	`439`	`session=session,`
`440`	`440`	`)`
`@@ -453,8 +453,8 @@ def test_self_join(session, gemini_flash_model):`
`453`	`453`
`454`	`454`	`expected_df = pd.DataFrame(`
`455`	`455`	`{`
`456`		`- "animal_left": ["capybara"],`
`457`		`- "animal_right": ["spider"],`
	`456`	`+ "animal_left": ["elephant"],`
	`457`	`+ "animal_right": ["ant"],`
`458`	`458`	`}`
`459`	`459`	`)`
`460`	`460`	`pandas.testing.assert_frame_equal(`