Skip to content

Commit 80aad9a

Browse files
authored
feat: add deprecation warning to Gemini-1.5-X, text-embedding-004, and remove remove legacy models in notebooks and docs (#1723)
* change all model_name from gemini-1.5-pro to gemini-2.0 due to model deprication * add warnign for deprcated models * add space for style * rewording note * test change * fix failed test * add comment back * use warning instead * remove replcated notes
1 parent 1df8ca6 commit 80aad9a

15 files changed

+58
-29
lines changed

bigframes/ml/llm.py

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -112,11 +112,18 @@
112112
"If you proceed with '{model_name}', it might not work as expected or could lead to errors with multimodal inputs."
113113
)
114114

115+
_MODEL_DEPRECATE_WARNING = (
116+
"'{model_name}' is going to be deprecated. Use '{new_model_name}' ({link}) instead."
117+
)
118+
115119

116120
@log_adapter.class_logger
117121
class TextEmbeddingGenerator(base.RetriableRemotePredictor):
118122
"""Text embedding generator LLM model.
119123
124+
.. note::
125+
text-embedding-004 is going to be deprecated. Use text-embedding-005(https://cloud.google.com/python/docs/reference/bigframes/latest/bigframes.ml.llm.TextEmbeddingGenerator) instead.
126+
120127
Args:
121128
model_name (str, Default to "text-embedding-004"):
122129
The model for text embedding. Possible values are "text-embedding-005", "text-embedding-004"
@@ -169,6 +176,15 @@ def _create_bqml_model(self):
169176
)
170177
)
171178
warnings.warn(msg)
179+
if self.model_name == "text-embedding-004":
180+
msg = exceptions.format_message(
181+
_MODEL_DEPRECATE_WARNING.format(
182+
model_name=self.model_name,
183+
new_model_name="text-embedding-005",
184+
link="https://cloud.google.com/python/docs/reference/bigframes/latest/bigframes.ml.llm.TextEmbeddingGenerator",
185+
)
186+
)
187+
warnings.warn(msg)
172188

173189
options = {
174190
"endpoint": self.model_name,
@@ -416,6 +432,7 @@ class GeminiTextGenerator(base.RetriableRemotePredictor):
416432
default and a warning will be issued.
417433
418434
.. note::
435+
"gemini-1.5-X" is going to be deprecated. Please use gemini-2.0-X instead. For example, "gemini-2.0-flash-001".
419436
"gemini-2.0-flash-exp", "gemini-1.5-pro-preview-0514" and "gemini-1.5-flash-preview-0514" is subject to the "Pre-GA Offerings Terms" in the General Service Terms section of the
420437
Service Specific Terms(https://cloud.google.com/terms/service-terms#1). Pre-GA products and features are available "as is"
421438
and might have limited support. For more information, see the launch stage descriptions
@@ -461,10 +478,12 @@ def __init__(
461478
"(https://cloud.google.com/products#product-launch-stages)."
462479
)
463480
warnings.warn(msg, category=exceptions.PreviewWarning)
481+
464482
if model_name is None:
465483
model_name = "gemini-2.0-flash-001"
466484
msg = exceptions.format_message(_REMOVE_DEFAULT_MODEL_WARNING)
467485
warnings.warn(msg, category=FutureWarning, stacklevel=2)
486+
468487
self.model_name = model_name
469488
self.session = session or global_session.get_global_session()
470489
self.max_iterations = max_iterations
@@ -487,6 +506,15 @@ def _create_bqml_model(self):
487506
)
488507
)
489508
warnings.warn(msg)
509+
if self.model_name.startswith("gemini-1.5"):
510+
msg = exceptions.format_message(
511+
_MODEL_DEPRECATE_WARNING.format(
512+
model_name=self.model_name,
513+
new_model_name="gemini-2.0-X",
514+
link="https://cloud.google.com/python/docs/reference/bigframes/latest/bigframes.ml.llm.GeminiTextGenerator",
515+
)
516+
)
517+
warnings.warn(msg)
490518

491519
options = {"endpoint": self.model_name}
492520

bigframes/operations/semantics.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -252,7 +252,7 @@ def cluster_by(
252252
>>> bpd.options.compute.semantic_ops_confirmation_threshold = 25
253253
254254
>>> import bigframes.ml.llm as llm
255-
>>> model = llm.TextEmbeddingGenerator()
255+
>>> model = llm.TextEmbeddingGenerator(model_name="text-embedding-005")
256256
257257
>>> df = bpd.DataFrame({
258258
... "Product": ["Smartphone", "Laptop", "T-shirt", "Jeans"],

notebooks/apps/synthetic_data_generation.ipynb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -111,7 +111,7 @@
111111
"source": [
112112
"from bigframes.ml.llm import GeminiTextGenerator\n",
113113
"\n",
114-
"model = GeminiTextGenerator(model_name=\"gemini-1.5-flash-002\")"
114+
"model = GeminiTextGenerator(model_name=\"gemini-2.0-flash-001\")"
115115
]
116116
},
117117
{

notebooks/generative_ai/bq_dataframes_llm_code_generation.ipynb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -430,7 +430,7 @@
430430
"source": [
431431
"from bigframes.ml.llm import GeminiTextGenerator\n",
432432
"\n",
433-
"model = GeminiTextGenerator(model_name=\"gemini-1.5-flash-002\")"
433+
"model = GeminiTextGenerator(model_name=\"gemini-2.0-flash-001\")"
434434
]
435435
},
436436
{

notebooks/generative_ai/bq_dataframes_llm_kmeans.ipynb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1614,7 +1614,7 @@
16141614
"source": [
16151615
"from bigframes.ml.llm import GeminiTextGenerator\n",
16161616
"\n",
1617-
"q_a_model = GeminiTextGenerator(model_name=\"gemini-1.5-flash-002\")"
1617+
"q_a_model = GeminiTextGenerator(model_name=\"gemini-2.0-flash-001\")"
16181618
]
16191619
},
16201620
{

notebooks/generative_ai/bq_dataframes_llm_vector_search.ipynb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1816,7 +1816,7 @@
18161816
"source": [
18171817
"## gemini model\n",
18181818
"\n",
1819-
"llm_model = bf_llm.GeminiTextGenerator(model_name = \"gemini-1.5-flash-002\") ## replace with other model as needed"
1819+
"llm_model = bf_llm.GeminiTextGenerator(model_name = \"gemini-2.0-flash-001\") ## replace with other model as needed"
18201820
]
18211821
},
18221822
{

notebooks/generative_ai/bq_dataframes_ml_drug_name_generation.ipynb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -581,7 +581,7 @@
581581
],
582582
"source": [
583583
"# Define the model\n",
584-
"model = GeminiTextGenerator(model_name=\"gemini-1.5-flash-002\")\n",
584+
"model = GeminiTextGenerator(model_name=\"gemini-2.0-flash-001\")\n",
585585
"\n",
586586
"# Invoke LLM with prompt\n",
587587
"response = predict(zero_shot_prompt, temperature = TEMPERATURE)\n",

notebooks/generative_ai/large_language_models.ipynb

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
"cells": [
33
{
44
"cell_type": "code",
5-
"execution_count": 1,
5+
"execution_count": 2,
66
"metadata": {},
77
"outputs": [],
88
"source": [
@@ -21,23 +21,23 @@
2121
},
2222
{
2323
"cell_type": "code",
24-
"execution_count": 2,
24+
"execution_count": 3,
2525
"metadata": {},
2626
"outputs": [
2727
{
2828
"name": "stderr",
2929
"output_type": "stream",
3030
"text": [
31-
"/tmp/ipykernel_604997/3896046356.py:1: ApiDeprecationWarning: gemini-pro and gemini-1.5-X are going to be deprecated. Use gemini-2.0-X (https://cloud.google.com/python/docs/reference/bigframes/latest/bigframes.ml.llm.GeminiTextGenerator) instead. \n",
32-
" model = GeminiTextGenerator(model_name=\"gemini-1.5-flash-002\")\n",
33-
"/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/ml/llm.py:981: DefaultLocationWarning: No explicit location is set, so using location US for the session.\n",
31+
"/tmp/ipykernel_176683/987800245.py:1: ApiDeprecationWarning: gemini-1.5-X are going to be deprecated. Use gemini-2.0-X (https://cloud.google.com/python/docs/reference/bigframes/latest/bigframes.ml.llm.GeminiTextGenerator) instead. \n",
32+
" model = GeminiTextGenerator(model_name=\"gemini-2.0-flash-001\")\n",
33+
"/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/ml/llm.py:486: DefaultLocationWarning: No explicit location is set, so using location US for the session.\n",
3434
" self.session = session or global_session.get_global_session()\n"
3535
]
3636
},
3737
{
3838
"data": {
3939
"text/html": [
40-
"Query job dd2da3cc-27c3-4c6f-9936-4f7769c85090 is DONE. 0 Bytes processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:dd2da3cc-27c3-4c6f-9936-4f7769c85090&page=queryresults\">Open Job</a>"
40+
"Query job 6fa5121a-6da4-4c75-92ec-936799da4513 is DONE. 0 Bytes processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:6fa5121a-6da4-4c75-92ec-936799da4513&page=queryresults\">Open Job</a>"
4141
],
4242
"text/plain": [
4343
"<IPython.core.display.HTML object>"
@@ -49,7 +49,7 @@
4949
{
5050
"data": {
5151
"text/html": [
52-
"Query job 00947011-4d7c-42fa-ae19-3b684976cec6 is DONE. 0 Bytes processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:00947011-4d7c-42fa-ae19-3b684976cec6&page=queryresults\">Open Job</a>"
52+
"Query job 74460ae9-3e89-49e7-93ad-bafbb6197a86 is DONE. 0 Bytes processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:74460ae9-3e89-49e7-93ad-bafbb6197a86&page=queryresults\">Open Job</a>"
5353
],
5454
"text/plain": [
5555
"<IPython.core.display.HTML object>"
@@ -60,7 +60,7 @@
6060
}
6161
],
6262
"source": [
63-
"model = GeminiTextGenerator(model_name=\"gemini-1.5-flash-002\")"
63+
"model = GeminiTextGenerator(model_name=\"gemini-2.0-flash-001\")"
6464
]
6565
},
6666
{

notebooks/getting_started/bq_dataframes_template.ipynb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1419,7 +1419,7 @@
14191419
"source": [
14201420
"# from bigframes.ml.llm import GeminiTextGenerator\n",
14211421
"\n",
1422-
"# model = GeminiTextGenerator(model_name=\"gemini-1.5-flash-002\")\n",
1422+
"# model = GeminiTextGenerator(model_name=\"gemini-2.0-flash-001\")\n",
14231423
"\n",
14241424
"# pred = model.predict(df)\n",
14251425
"# pred"

samples/snippets/gemini_model_test.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ def test_gemini_text_generator_model() -> None:
3030
session = bpd.get_global_session()
3131
connection = f"{PROJECT_ID}.{REGION}.{CONN_NAME}"
3232
model = GeminiTextGenerator(
33-
session=session, connection_name=connection, model_name="gemini-1.5-flash-002"
33+
session=session, connection_name=connection, model_name="gemini-2.0-flash-001"
3434
)
3535

3636
df_api = bpd.read_csv("gs://cloud-samples-data/vertex-ai/bigframe/df.csv")

samples/snippets/multimodal_test.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,7 @@ def test_multimodal_dataframe(gcs_dst_bucket: str) -> None:
7878
# [START bigquery_dataframes_multimodal_dataframe_ml_text]
7979
from bigframes.ml import llm
8080

81-
gemini = llm.GeminiTextGenerator(model_name="gemini-1.5-flash-002")
81+
gemini = llm.GeminiTextGenerator(model_name="gemini-2.0-flash-001")
8282

8383
# Deal with first 2 images as example
8484
df_image = df_image.head(2)

tests/system/large/operations/conftest.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ def gemini_flash_model(session, bq_connection) -> llm.GeminiTextGenerator:
2222
return llm.GeminiTextGenerator(
2323
session=session,
2424
connection_name=bq_connection,
25-
model_name="gemini-1.5-flash-001",
25+
model_name="gemini-2.0-flash-001",
2626
)
2727

2828

tests/system/large/operations/test_ai.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -434,7 +434,7 @@ def test_join_with_confirmation(session, gemini_flash_model, reply, monkeypatch)
434434
def test_self_join(session, gemini_flash_model):
435435
animals = dataframe.DataFrame(
436436
data={
437-
"animal": ["spider", "capybara"],
437+
"animal": ["ant", "elephant"],
438438
},
439439
session=session,
440440
)
@@ -453,8 +453,8 @@ def test_self_join(session, gemini_flash_model):
453453

454454
expected_df = pd.DataFrame(
455455
{
456-
"animal_left": ["capybara"],
457-
"animal_right": ["spider"],
456+
"animal_left": ["elephant"],
457+
"animal_right": ["ant"],
458458
}
459459
)
460460
pandas.testing.assert_frame_equal(

tests/system/large/operations/test_semantics.py

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,7 @@ def test_agg(session, gemini_flash_model, max_agg_rows, cluster_column):
8686
cluster_column=cluster_column,
8787
).to_pandas()
8888

89-
expected_s = pd.Series(["Leonardo \n"], dtype=dtypes.STRING_DTYPE)
89+
expected_s = pd.Series(["Leonardo\n"], dtype=dtypes.STRING_DTYPE)
9090
expected_s.name = "Movies"
9191
pandas.testing.assert_series_equal(actual_s, expected_s, check_index_type=False)
9292

@@ -137,12 +137,13 @@ def test_agg_w_int_column(session, gemini_flash_model):
137137
"Movies": [
138138
"Killers of the Flower Moon",
139139
"The Great Gatsby",
140+
"The Wolf of Wall Street",
140141
],
141-
"Years": [2023, 2013],
142+
"Years": [2023, 2013, 2013],
142143
},
143144
session=session,
144145
)
145-
instruction = "Find the {Years} Leonardo DiCaprio acted in the most movies. Answer with the year only."
146+
instruction = "Find the {Years} Leonardo DiCaprio acted in the most movies. Your answer should be the four-digit year, returned as a string."
146147

147148
with bigframes.option_context(
148149
SEM_OP_EXP_OPTION,
@@ -155,7 +156,7 @@ def test_agg_w_int_column(session, gemini_flash_model):
155156
model=gemini_flash_model,
156157
).to_pandas()
157158

158-
expected_s = pd.Series(["2013 \n"], dtype=dtypes.STRING_DTYPE)
159+
expected_s = pd.Series(["2013\n"], dtype=dtypes.STRING_DTYPE)
159160
expected_s.name = "Years"
160161
pandas.testing.assert_series_equal(actual_s, expected_s, check_index_type=False)
161162

@@ -764,7 +765,7 @@ def test_join_with_confirmation(session, gemini_flash_model, reply, monkeypatch)
764765
def test_self_join(session, gemini_flash_model):
765766
animals = dataframe.DataFrame(
766767
data={
767-
"animal": ["spider", "capybara"],
768+
"animal": ["ant", "elephant"],
768769
},
769770
session=session,
770771
)
@@ -783,8 +784,8 @@ def test_self_join(session, gemini_flash_model):
783784

784785
expected_df = pd.DataFrame(
785786
{
786-
"animal_left": ["capybara"],
787-
"animal_right": ["spider"],
787+
"animal_left": ["elephant"],
788+
"animal_right": ["ant"],
788789
}
789790
)
790791
pandas.testing.assert_frame_equal(

tests/system/load/test_llm.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,7 @@ def test_llm_gemini_configure_fit(
8181

8282
@pytest.mark.flaky(retries=2)
8383
def test_llm_gemini_w_ground_with_google_search(llm_remote_text_df):
84-
model = llm.GeminiTextGenerator(model_name="gemini-1.5-flash-002", max_iterations=1)
84+
model = llm.GeminiTextGenerator(model_name="gemini-2.0-flash-001", max_iterations=1)
8585
df = model.predict(
8686
llm_remote_text_df["prompt"],
8787
ground_with_google_search=True,

0 commit comments

Comments
 (0)