diff --git a/bigframes/ml/llm.py b/bigframes/ml/llm.py
index 591d18e3b5..11861c786e 100644
--- a/bigframes/ml/llm.py
+++ b/bigframes/ml/llm.py
@@ -112,11 +112,18 @@
"If you proceed with '{model_name}', it might not work as expected or could lead to errors with multimodal inputs."
)
+_MODEL_DEPRECATE_WARNING = (
+ "'{model_name}' is going to be deprecated. Use '{new_model_name}' ({link}) instead."
+)
+
@log_adapter.class_logger
class TextEmbeddingGenerator(base.RetriableRemotePredictor):
"""Text embedding generator LLM model.
+ .. note::
+ text-embedding-004 is going to be deprecated. Use text-embedding-005(https://cloud.google.com/python/docs/reference/bigframes/latest/bigframes.ml.llm.TextEmbeddingGenerator) instead.
+
Args:
model_name (str, Default to "text-embedding-004"):
The model for text embedding. Possible values are "text-embedding-005", "text-embedding-004"
@@ -169,6 +176,15 @@ def _create_bqml_model(self):
)
)
warnings.warn(msg)
+ if self.model_name == "text-embedding-004":
+ msg = exceptions.format_message(
+ _MODEL_DEPRECATE_WARNING.format(
+ model_name=self.model_name,
+ new_model_name="text-embedding-005",
+ link="https://cloud.google.com/python/docs/reference/bigframes/latest/bigframes.ml.llm.TextEmbeddingGenerator",
+ )
+ )
+ warnings.warn(msg)
options = {
"endpoint": self.model_name,
@@ -416,6 +432,7 @@ class GeminiTextGenerator(base.RetriableRemotePredictor):
default and a warning will be issued.
.. note::
+ "gemini-1.5-X" is going to be deprecated. Please use gemini-2.0-X instead. For example, "gemini-2.0-flash-001".
"gemini-2.0-flash-exp", "gemini-1.5-pro-preview-0514" and "gemini-1.5-flash-preview-0514" is subject to the "Pre-GA Offerings Terms" in the General Service Terms section of the
Service Specific Terms(https://cloud.google.com/terms/service-terms#1). Pre-GA products and features are available "as is"
and might have limited support. For more information, see the launch stage descriptions
@@ -461,10 +478,12 @@ def __init__(
"(https://cloud.google.com/products#product-launch-stages)."
)
warnings.warn(msg, category=exceptions.PreviewWarning)
+
if model_name is None:
model_name = "gemini-2.0-flash-001"
msg = exceptions.format_message(_REMOVE_DEFAULT_MODEL_WARNING)
warnings.warn(msg, category=FutureWarning, stacklevel=2)
+
self.model_name = model_name
self.session = session or global_session.get_global_session()
self.max_iterations = max_iterations
@@ -487,6 +506,15 @@ def _create_bqml_model(self):
)
)
warnings.warn(msg)
+ if self.model_name.startswith("gemini-1.5"):
+ msg = exceptions.format_message(
+ _MODEL_DEPRECATE_WARNING.format(
+ model_name=self.model_name,
+ new_model_name="gemini-2.0-X",
+ link="https://cloud.google.com/python/docs/reference/bigframes/latest/bigframes.ml.llm.GeminiTextGenerator",
+ )
+ )
+ warnings.warn(msg)
options = {"endpoint": self.model_name}
diff --git a/bigframes/operations/semantics.py b/bigframes/operations/semantics.py
index 60d619992a..9fa5450748 100644
--- a/bigframes/operations/semantics.py
+++ b/bigframes/operations/semantics.py
@@ -252,7 +252,7 @@ def cluster_by(
>>> bpd.options.compute.semantic_ops_confirmation_threshold = 25
>>> import bigframes.ml.llm as llm
- >>> model = llm.TextEmbeddingGenerator()
+ >>> model = llm.TextEmbeddingGenerator(model_name="text-embedding-005")
>>> df = bpd.DataFrame({
... "Product": ["Smartphone", "Laptop", "T-shirt", "Jeans"],
diff --git a/notebooks/apps/synthetic_data_generation.ipynb b/notebooks/apps/synthetic_data_generation.ipynb
index f830e35c16..b59777a5da 100644
--- a/notebooks/apps/synthetic_data_generation.ipynb
+++ b/notebooks/apps/synthetic_data_generation.ipynb
@@ -111,7 +111,7 @@
"source": [
"from bigframes.ml.llm import GeminiTextGenerator\n",
"\n",
- "model = GeminiTextGenerator(model_name=\"gemini-1.5-flash-002\")"
+ "model = GeminiTextGenerator(model_name=\"gemini-2.0-flash-001\")"
]
},
{
diff --git a/notebooks/generative_ai/bq_dataframes_llm_code_generation.ipynb b/notebooks/generative_ai/bq_dataframes_llm_code_generation.ipynb
index 788111cfe6..edb864613c 100644
--- a/notebooks/generative_ai/bq_dataframes_llm_code_generation.ipynb
+++ b/notebooks/generative_ai/bq_dataframes_llm_code_generation.ipynb
@@ -430,7 +430,7 @@
"source": [
"from bigframes.ml.llm import GeminiTextGenerator\n",
"\n",
- "model = GeminiTextGenerator(model_name=\"gemini-1.5-flash-002\")"
+ "model = GeminiTextGenerator(model_name=\"gemini-2.0-flash-001\")"
]
},
{
diff --git a/notebooks/generative_ai/bq_dataframes_llm_kmeans.ipynb b/notebooks/generative_ai/bq_dataframes_llm_kmeans.ipynb
index 31a47ea424..9b05e1ab02 100644
--- a/notebooks/generative_ai/bq_dataframes_llm_kmeans.ipynb
+++ b/notebooks/generative_ai/bq_dataframes_llm_kmeans.ipynb
@@ -1614,7 +1614,7 @@
"source": [
"from bigframes.ml.llm import GeminiTextGenerator\n",
"\n",
- "q_a_model = GeminiTextGenerator(model_name=\"gemini-1.5-flash-002\")"
+ "q_a_model = GeminiTextGenerator(model_name=\"gemini-2.0-flash-001\")"
]
},
{
diff --git a/notebooks/generative_ai/bq_dataframes_llm_vector_search.ipynb b/notebooks/generative_ai/bq_dataframes_llm_vector_search.ipynb
index a15209aae4..15929fd666 100644
--- a/notebooks/generative_ai/bq_dataframes_llm_vector_search.ipynb
+++ b/notebooks/generative_ai/bq_dataframes_llm_vector_search.ipynb
@@ -1816,7 +1816,7 @@
"source": [
"## gemini model\n",
"\n",
- "llm_model = bf_llm.GeminiTextGenerator(model_name = \"gemini-1.5-flash-002\") ## replace with other model as needed"
+ "llm_model = bf_llm.GeminiTextGenerator(model_name = \"gemini-2.0-flash-001\") ## replace with other model as needed"
]
},
{
diff --git a/notebooks/generative_ai/bq_dataframes_ml_drug_name_generation.ipynb b/notebooks/generative_ai/bq_dataframes_ml_drug_name_generation.ipynb
index c65a0f0854..413e473c2f 100644
--- a/notebooks/generative_ai/bq_dataframes_ml_drug_name_generation.ipynb
+++ b/notebooks/generative_ai/bq_dataframes_ml_drug_name_generation.ipynb
@@ -581,7 +581,7 @@
],
"source": [
"# Define the model\n",
- "model = GeminiTextGenerator(model_name=\"gemini-1.5-flash-002\")\n",
+ "model = GeminiTextGenerator(model_name=\"gemini-2.0-flash-001\")\n",
"\n",
"# Invoke LLM with prompt\n",
"response = predict(zero_shot_prompt, temperature = TEMPERATURE)\n",
diff --git a/notebooks/generative_ai/large_language_models.ipynb b/notebooks/generative_ai/large_language_models.ipynb
index 4a0d2f2b3c..1d7bc7f6ef 100644
--- a/notebooks/generative_ai/large_language_models.ipynb
+++ b/notebooks/generative_ai/large_language_models.ipynb
@@ -2,7 +2,7 @@
"cells": [
{
"cell_type": "code",
- "execution_count": 1,
+ "execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
@@ -21,23 +21,23 @@
},
{
"cell_type": "code",
- "execution_count": 2,
+ "execution_count": 3,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
- "/tmp/ipykernel_604997/3896046356.py:1: ApiDeprecationWarning: gemini-pro and gemini-1.5-X are going to be deprecated. Use gemini-2.0-X (https://cloud.google.com/python/docs/reference/bigframes/latest/bigframes.ml.llm.GeminiTextGenerator) instead. \n",
- " model = GeminiTextGenerator(model_name=\"gemini-1.5-flash-002\")\n",
- "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/ml/llm.py:981: DefaultLocationWarning: No explicit location is set, so using location US for the session.\n",
+ "/tmp/ipykernel_176683/987800245.py:1: ApiDeprecationWarning: gemini-1.5-X are going to be deprecated. Use gemini-2.0-X (https://cloud.google.com/python/docs/reference/bigframes/latest/bigframes.ml.llm.GeminiTextGenerator) instead. \n",
+ " model = GeminiTextGenerator(model_name=\"gemini-2.0-flash-001\")\n",
+ "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/ml/llm.py:486: DefaultLocationWarning: No explicit location is set, so using location US for the session.\n",
" self.session = session or global_session.get_global_session()\n"
]
},
{
"data": {
"text/html": [
- "Query job dd2da3cc-27c3-4c6f-9936-4f7769c85090 is DONE. 0 Bytes processed. Open Job"
+ "Query job 6fa5121a-6da4-4c75-92ec-936799da4513 is DONE. 0 Bytes processed. Open Job"
],
"text/plain": [
""
@@ -49,7 +49,7 @@
{
"data": {
"text/html": [
- "Query job 00947011-4d7c-42fa-ae19-3b684976cec6 is DONE. 0 Bytes processed. Open Job"
+ "Query job 74460ae9-3e89-49e7-93ad-bafbb6197a86 is DONE. 0 Bytes processed. Open Job"
],
"text/plain": [
""
@@ -60,7 +60,7 @@
}
],
"source": [
- "model = GeminiTextGenerator(model_name=\"gemini-1.5-flash-002\")"
+ "model = GeminiTextGenerator(model_name=\"gemini-2.0-flash-001\")"
]
},
{
diff --git a/notebooks/getting_started/bq_dataframes_template.ipynb b/notebooks/getting_started/bq_dataframes_template.ipynb
index 12847483ac..68c5e9f74d 100644
--- a/notebooks/getting_started/bq_dataframes_template.ipynb
+++ b/notebooks/getting_started/bq_dataframes_template.ipynb
@@ -1419,7 +1419,7 @@
"source": [
"# from bigframes.ml.llm import GeminiTextGenerator\n",
"\n",
- "# model = GeminiTextGenerator(model_name=\"gemini-1.5-flash-002\")\n",
+ "# model = GeminiTextGenerator(model_name=\"gemini-2.0-flash-001\")\n",
"\n",
"# pred = model.predict(df)\n",
"# pred"
diff --git a/samples/snippets/gemini_model_test.py b/samples/snippets/gemini_model_test.py
index cf809ebb3a..fe5d7d5b1e 100644
--- a/samples/snippets/gemini_model_test.py
+++ b/samples/snippets/gemini_model_test.py
@@ -30,7 +30,7 @@ def test_gemini_text_generator_model() -> None:
session = bpd.get_global_session()
connection = f"{PROJECT_ID}.{REGION}.{CONN_NAME}"
model = GeminiTextGenerator(
- session=session, connection_name=connection, model_name="gemini-1.5-flash-002"
+ session=session, connection_name=connection, model_name="gemini-2.0-flash-001"
)
df_api = bpd.read_csv("gs://cloud-samples-data/vertex-ai/bigframe/df.csv")
diff --git a/samples/snippets/multimodal_test.py b/samples/snippets/multimodal_test.py
index 368f82d849..7f8e13cd7b 100644
--- a/samples/snippets/multimodal_test.py
+++ b/samples/snippets/multimodal_test.py
@@ -78,7 +78,7 @@ def test_multimodal_dataframe(gcs_dst_bucket: str) -> None:
# [START bigquery_dataframes_multimodal_dataframe_ml_text]
from bigframes.ml import llm
- gemini = llm.GeminiTextGenerator(model_name="gemini-1.5-flash-002")
+ gemini = llm.GeminiTextGenerator(model_name="gemini-2.0-flash-001")
# Deal with first 2 images as example
df_image = df_image.head(2)
diff --git a/tests/system/large/operations/conftest.py b/tests/system/large/operations/conftest.py
index 4f6e2d1704..6f64c7552f 100644
--- a/tests/system/large/operations/conftest.py
+++ b/tests/system/large/operations/conftest.py
@@ -22,7 +22,7 @@ def gemini_flash_model(session, bq_connection) -> llm.GeminiTextGenerator:
return llm.GeminiTextGenerator(
session=session,
connection_name=bq_connection,
- model_name="gemini-1.5-flash-001",
+ model_name="gemini-2.0-flash-001",
)
diff --git a/tests/system/large/operations/test_ai.py b/tests/system/large/operations/test_ai.py
index 04074a2ea6..c2797e39ee 100644
--- a/tests/system/large/operations/test_ai.py
+++ b/tests/system/large/operations/test_ai.py
@@ -434,7 +434,7 @@ def test_join_with_confirmation(session, gemini_flash_model, reply, monkeypatch)
def test_self_join(session, gemini_flash_model):
animals = dataframe.DataFrame(
data={
- "animal": ["spider", "capybara"],
+ "animal": ["ant", "elephant"],
},
session=session,
)
@@ -453,8 +453,8 @@ def test_self_join(session, gemini_flash_model):
expected_df = pd.DataFrame(
{
- "animal_left": ["capybara"],
- "animal_right": ["spider"],
+ "animal_left": ["elephant"],
+ "animal_right": ["ant"],
}
)
pandas.testing.assert_frame_equal(
diff --git a/tests/system/large/operations/test_semantics.py b/tests/system/large/operations/test_semantics.py
index 3517b1adbc..7ae78a5c53 100644
--- a/tests/system/large/operations/test_semantics.py
+++ b/tests/system/large/operations/test_semantics.py
@@ -86,7 +86,7 @@ def test_agg(session, gemini_flash_model, max_agg_rows, cluster_column):
cluster_column=cluster_column,
).to_pandas()
- expected_s = pd.Series(["Leonardo \n"], dtype=dtypes.STRING_DTYPE)
+ expected_s = pd.Series(["Leonardo\n"], dtype=dtypes.STRING_DTYPE)
expected_s.name = "Movies"
pandas.testing.assert_series_equal(actual_s, expected_s, check_index_type=False)
@@ -137,12 +137,13 @@ def test_agg_w_int_column(session, gemini_flash_model):
"Movies": [
"Killers of the Flower Moon",
"The Great Gatsby",
+ "The Wolf of Wall Street",
],
- "Years": [2023, 2013],
+ "Years": [2023, 2013, 2013],
},
session=session,
)
- instruction = "Find the {Years} Leonardo DiCaprio acted in the most movies. Answer with the year only."
+ instruction = "Find the {Years} Leonardo DiCaprio acted in the most movies. Your answer should be the four-digit year, returned as a string."
with bigframes.option_context(
SEM_OP_EXP_OPTION,
@@ -155,7 +156,7 @@ def test_agg_w_int_column(session, gemini_flash_model):
model=gemini_flash_model,
).to_pandas()
- expected_s = pd.Series(["2013 \n"], dtype=dtypes.STRING_DTYPE)
+ expected_s = pd.Series(["2013\n"], dtype=dtypes.STRING_DTYPE)
expected_s.name = "Years"
pandas.testing.assert_series_equal(actual_s, expected_s, check_index_type=False)
@@ -764,7 +765,7 @@ def test_join_with_confirmation(session, gemini_flash_model, reply, monkeypatch)
def test_self_join(session, gemini_flash_model):
animals = dataframe.DataFrame(
data={
- "animal": ["spider", "capybara"],
+ "animal": ["ant", "elephant"],
},
session=session,
)
@@ -783,8 +784,8 @@ def test_self_join(session, gemini_flash_model):
expected_df = pd.DataFrame(
{
- "animal_left": ["capybara"],
- "animal_right": ["spider"],
+ "animal_left": ["elephant"],
+ "animal_right": ["ant"],
}
)
pandas.testing.assert_frame_equal(
diff --git a/tests/system/load/test_llm.py b/tests/system/load/test_llm.py
index 49f79d9d44..354aebcac5 100644
--- a/tests/system/load/test_llm.py
+++ b/tests/system/load/test_llm.py
@@ -81,7 +81,7 @@ def test_llm_gemini_configure_fit(
@pytest.mark.flaky(retries=2)
def test_llm_gemini_w_ground_with_google_search(llm_remote_text_df):
- model = llm.GeminiTextGenerator(model_name="gemini-1.5-flash-002", max_iterations=1)
+ model = llm.GeminiTextGenerator(model_name="gemini-2.0-flash-001", max_iterations=1)
df = model.predict(
llm_remote_text_df["prompt"],
ground_with_google_search=True,