Skip to content

feat: add deprecation warning to Gemini-1.5-X, text-embedding-004, and remove remove legacy models in notebooks and docs #1723

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 9 commits into from
May 16, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 28 additions & 0 deletions bigframes/ml/llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,11 +112,18 @@
"If you proceed with '{model_name}', it might not work as expected or could lead to errors with multimodal inputs."
)

_MODEL_DEPRECATE_WARNING = (
"'{model_name}' is going to be deprecated. Use '{new_model_name}' ({link}) instead."
)


@log_adapter.class_logger
class TextEmbeddingGenerator(base.RetriableRemotePredictor):
"""Text embedding generator LLM model.

.. note::
text-embedding-004 is going to be deprecated. Use text-embedding-005(https://cloud.google.com/python/docs/reference/bigframes/latest/bigframes.ml.llm.TextEmbeddingGenerator) instead.

Args:
model_name (str, Default to "text-embedding-004"):
The model for text embedding. Possible values are "text-embedding-005", "text-embedding-004"
Expand Down Expand Up @@ -169,6 +176,15 @@ def _create_bqml_model(self):
)
)
warnings.warn(msg)
if self.model_name == "text-embedding-004":
msg = exceptions.format_message(
_MODEL_DEPRECATE_WARNING.format(
model_name=self.model_name,
new_model_name="text-embedding-005",
link="https://cloud.google.com/python/docs/reference/bigframes/latest/bigframes.ml.llm.TextEmbeddingGenerator",
)
)
warnings.warn(msg)

options = {
"endpoint": self.model_name,
Expand Down Expand Up @@ -416,6 +432,7 @@ class GeminiTextGenerator(base.RetriableRemotePredictor):
default and a warning will be issued.

.. note::
"gemini-1.5-X" is going to be deprecated. Please use gemini-2.0-X instead. For example, "gemini-2.0-flash-001".
"gemini-2.0-flash-exp", "gemini-1.5-pro-preview-0514" and "gemini-1.5-flash-preview-0514" is subject to the "Pre-GA Offerings Terms" in the General Service Terms section of the
Service Specific Terms(https://cloud.google.com/terms/service-terms#1). Pre-GA products and features are available "as is"
and might have limited support. For more information, see the launch stage descriptions
Expand Down Expand Up @@ -461,10 +478,12 @@ def __init__(
"(https://cloud.google.com/products#product-launch-stages)."
)
warnings.warn(msg, category=exceptions.PreviewWarning)

if model_name is None:
model_name = "gemini-2.0-flash-001"
msg = exceptions.format_message(_REMOVE_DEFAULT_MODEL_WARNING)
warnings.warn(msg, category=FutureWarning, stacklevel=2)

self.model_name = model_name
self.session = session or global_session.get_global_session()
self.max_iterations = max_iterations
Expand All @@ -487,6 +506,15 @@ def _create_bqml_model(self):
)
)
warnings.warn(msg)
if self.model_name.startswith("gemini-1.5"):
msg = exceptions.format_message(
_MODEL_DEPRECATE_WARNING.format(
model_name=self.model_name,
new_model_name="gemini-2.0-X",
link="https://cloud.google.com/python/docs/reference/bigframes/latest/bigframes.ml.llm.GeminiTextGenerator",
)
)
warnings.warn(msg)

options = {"endpoint": self.model_name}

Expand Down
2 changes: 1 addition & 1 deletion bigframes/operations/semantics.py
Original file line number Diff line number Diff line change
Expand Up @@ -252,7 +252,7 @@ def cluster_by(
>>> bpd.options.compute.semantic_ops_confirmation_threshold = 25

>>> import bigframes.ml.llm as llm
>>> model = llm.TextEmbeddingGenerator()
>>> model = llm.TextEmbeddingGenerator(model_name="text-embedding-005")

>>> df = bpd.DataFrame({
... "Product": ["Smartphone", "Laptop", "T-shirt", "Jeans"],
Expand Down
2 changes: 1 addition & 1 deletion notebooks/apps/synthetic_data_generation.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@
"source": [
"from bigframes.ml.llm import GeminiTextGenerator\n",
"\n",
"model = GeminiTextGenerator(model_name=\"gemini-1.5-flash-002\")"
"model = GeminiTextGenerator(model_name=\"gemini-2.0-flash-001\")"
]
},
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -430,7 +430,7 @@
"source": [
"from bigframes.ml.llm import GeminiTextGenerator\n",
"\n",
"model = GeminiTextGenerator(model_name=\"gemini-1.5-flash-002\")"
"model = GeminiTextGenerator(model_name=\"gemini-2.0-flash-001\")"
]
},
{
Expand Down
2 changes: 1 addition & 1 deletion notebooks/generative_ai/bq_dataframes_llm_kmeans.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -1614,7 +1614,7 @@
"source": [
"from bigframes.ml.llm import GeminiTextGenerator\n",
"\n",
"q_a_model = GeminiTextGenerator(model_name=\"gemini-1.5-flash-002\")"
"q_a_model = GeminiTextGenerator(model_name=\"gemini-2.0-flash-001\")"
]
},
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1816,7 +1816,7 @@
"source": [
"## gemini model\n",
"\n",
"llm_model = bf_llm.GeminiTextGenerator(model_name = \"gemini-1.5-flash-002\") ## replace with other model as needed"
"llm_model = bf_llm.GeminiTextGenerator(model_name = \"gemini-2.0-flash-001\") ## replace with other model as needed"
]
},
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -581,7 +581,7 @@
],
"source": [
"# Define the model\n",
"model = GeminiTextGenerator(model_name=\"gemini-1.5-flash-002\")\n",
"model = GeminiTextGenerator(model_name=\"gemini-2.0-flash-001\")\n",
"\n",
"# Invoke LLM with prompt\n",
"response = predict(zero_shot_prompt, temperature = TEMPERATURE)\n",
Expand Down
16 changes: 8 additions & 8 deletions notebooks/generative_ai/large_language_models.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -21,23 +21,23 @@
},
{
"cell_type": "code",
"execution_count": 2,
"execution_count": 3,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/tmp/ipykernel_604997/3896046356.py:1: ApiDeprecationWarning: gemini-pro and gemini-1.5-X are going to be deprecated. Use gemini-2.0-X (https://cloud.google.com/python/docs/reference/bigframes/latest/bigframes.ml.llm.GeminiTextGenerator) instead. \n",
" model = GeminiTextGenerator(model_name=\"gemini-1.5-flash-002\")\n",
"/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/ml/llm.py:981: DefaultLocationWarning: No explicit location is set, so using location US for the session.\n",
"/tmp/ipykernel_176683/987800245.py:1: ApiDeprecationWarning: gemini-1.5-X are going to be deprecated. Use gemini-2.0-X (https://cloud.google.com/python/docs/reference/bigframes/latest/bigframes.ml.llm.GeminiTextGenerator) instead. \n",
" model = GeminiTextGenerator(model_name=\"gemini-2.0-flash-001\")\n",
"/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/ml/llm.py:486: DefaultLocationWarning: No explicit location is set, so using location US for the session.\n",
" self.session = session or global_session.get_global_session()\n"
]
},
{
"data": {
"text/html": [
"Query job dd2da3cc-27c3-4c6f-9936-4f7769c85090 is DONE. 0 Bytes processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:dd2da3cc-27c3-4c6f-9936-4f7769c85090&page=queryresults\">Open Job</a>"
"Query job 6fa5121a-6da4-4c75-92ec-936799da4513 is DONE. 0 Bytes processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:6fa5121a-6da4-4c75-92ec-936799da4513&page=queryresults\">Open Job</a>"
],
"text/plain": [
"<IPython.core.display.HTML object>"
Expand All @@ -49,7 +49,7 @@
{
"data": {
"text/html": [
"Query job 00947011-4d7c-42fa-ae19-3b684976cec6 is DONE. 0 Bytes processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:00947011-4d7c-42fa-ae19-3b684976cec6&page=queryresults\">Open Job</a>"
"Query job 74460ae9-3e89-49e7-93ad-bafbb6197a86 is DONE. 0 Bytes processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:74460ae9-3e89-49e7-93ad-bafbb6197a86&page=queryresults\">Open Job</a>"
],
"text/plain": [
"<IPython.core.display.HTML object>"
Expand All @@ -60,7 +60,7 @@
}
],
"source": [
"model = GeminiTextGenerator(model_name=\"gemini-1.5-flash-002\")"
"model = GeminiTextGenerator(model_name=\"gemini-2.0-flash-001\")"
]
},
{
Expand Down
2 changes: 1 addition & 1 deletion notebooks/getting_started/bq_dataframes_template.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -1419,7 +1419,7 @@
"source": [
"# from bigframes.ml.llm import GeminiTextGenerator\n",
"\n",
"# model = GeminiTextGenerator(model_name=\"gemini-1.5-flash-002\")\n",
"# model = GeminiTextGenerator(model_name=\"gemini-2.0-flash-001\")\n",
"\n",
"# pred = model.predict(df)\n",
"# pred"
Expand Down
2 changes: 1 addition & 1 deletion samples/snippets/gemini_model_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ def test_gemini_text_generator_model() -> None:
session = bpd.get_global_session()
connection = f"{PROJECT_ID}.{REGION}.{CONN_NAME}"
model = GeminiTextGenerator(
session=session, connection_name=connection, model_name="gemini-1.5-flash-002"
session=session, connection_name=connection, model_name="gemini-2.0-flash-001"
)

df_api = bpd.read_csv("gs://cloud-samples-data/vertex-ai/bigframe/df.csv")
Expand Down
2 changes: 1 addition & 1 deletion samples/snippets/multimodal_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ def test_multimodal_dataframe(gcs_dst_bucket: str) -> None:
# [START bigquery_dataframes_multimodal_dataframe_ml_text]
from bigframes.ml import llm

gemini = llm.GeminiTextGenerator(model_name="gemini-1.5-flash-002")
gemini = llm.GeminiTextGenerator(model_name="gemini-2.0-flash-001")

# Deal with first 2 images as example
df_image = df_image.head(2)
Expand Down
2 changes: 1 addition & 1 deletion tests/system/large/operations/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ def gemini_flash_model(session, bq_connection) -> llm.GeminiTextGenerator:
return llm.GeminiTextGenerator(
session=session,
connection_name=bq_connection,
model_name="gemini-1.5-flash-001",
model_name="gemini-2.0-flash-001",
)


Expand Down
6 changes: 3 additions & 3 deletions tests/system/large/operations/test_ai.py
Original file line number Diff line number Diff line change
Expand Up @@ -434,7 +434,7 @@ def test_join_with_confirmation(session, gemini_flash_model, reply, monkeypatch)
def test_self_join(session, gemini_flash_model):
animals = dataframe.DataFrame(
data={
"animal": ["spider", "capybara"],
"animal": ["ant", "elephant"],
},
session=session,
)
Expand All @@ -453,8 +453,8 @@ def test_self_join(session, gemini_flash_model):

expected_df = pd.DataFrame(
{
"animal_left": ["capybara"],
"animal_right": ["spider"],
"animal_left": ["elephant"],
"animal_right": ["ant"],
}
)
pandas.testing.assert_frame_equal(
Expand Down
15 changes: 8 additions & 7 deletions tests/system/large/operations/test_semantics.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ def test_agg(session, gemini_flash_model, max_agg_rows, cluster_column):
cluster_column=cluster_column,
).to_pandas()

expected_s = pd.Series(["Leonardo \n"], dtype=dtypes.STRING_DTYPE)
expected_s = pd.Series(["Leonardo\n"], dtype=dtypes.STRING_DTYPE)
expected_s.name = "Movies"
pandas.testing.assert_series_equal(actual_s, expected_s, check_index_type=False)

Expand Down Expand Up @@ -137,12 +137,13 @@ def test_agg_w_int_column(session, gemini_flash_model):
"Movies": [
"Killers of the Flower Moon",
"The Great Gatsby",
"The Wolf of Wall Street",
],
"Years": [2023, 2013],
"Years": [2023, 2013, 2013],
},
session=session,
)
instruction = "Find the {Years} Leonardo DiCaprio acted in the most movies. Answer with the year only."
instruction = "Find the {Years} Leonardo DiCaprio acted in the most movies. Your answer should be the four-digit year, returned as a string."

with bigframes.option_context(
SEM_OP_EXP_OPTION,
Expand All @@ -155,7 +156,7 @@ def test_agg_w_int_column(session, gemini_flash_model):
model=gemini_flash_model,
).to_pandas()

expected_s = pd.Series(["2013 \n"], dtype=dtypes.STRING_DTYPE)
expected_s = pd.Series(["2013\n"], dtype=dtypes.STRING_DTYPE)
expected_s.name = "Years"
pandas.testing.assert_series_equal(actual_s, expected_s, check_index_type=False)

Expand Down Expand Up @@ -764,7 +765,7 @@ def test_join_with_confirmation(session, gemini_flash_model, reply, monkeypatch)
def test_self_join(session, gemini_flash_model):
animals = dataframe.DataFrame(
data={
"animal": ["spider", "capybara"],
"animal": ["ant", "elephant"],
},
session=session,
)
Expand All @@ -783,8 +784,8 @@ def test_self_join(session, gemini_flash_model):

expected_df = pd.DataFrame(
{
"animal_left": ["capybara"],
"animal_right": ["spider"],
"animal_left": ["elephant"],
"animal_right": ["ant"],
}
)
pandas.testing.assert_frame_equal(
Expand Down
2 changes: 1 addition & 1 deletion tests/system/load/test_llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ def test_llm_gemini_configure_fit(

@pytest.mark.flaky(retries=2)
def test_llm_gemini_w_ground_with_google_search(llm_remote_text_df):
model = llm.GeminiTextGenerator(model_name="gemini-1.5-flash-002", max_iterations=1)
model = llm.GeminiTextGenerator(model_name="gemini-2.0-flash-001", max_iterations=1)
df = model.predict(
llm_remote_text_df["prompt"],
ground_with_google_search=True,
Expand Down