From 8703eb4fe184b1987a3513fe87dd3316cc052117 Mon Sep 17 00:00:00 2001
From: Daniela <drespana@google.com>
Date: Tue, 1 Apr 2025 22:17:59 +0000
Subject: [PATCH 01/15] docs: add matrix_factorization snippets

---
 samples/snippets/mf_explicit_model_test.py | 17 +++++++++++++++++
 samples/snippets/mf_implicit_model_test.py | 17 +++++++++++++++++
 2 files changed, 34 insertions(+)
 create mode 100644 samples/snippets/mf_explicit_model_test.py
 create mode 100644 samples/snippets/mf_implicit_model_test.py

diff --git a/samples/snippets/mf_explicit_model_test.py b/samples/snippets/mf_explicit_model_test.py
new file mode 100644
index 0000000000..cac9dfb3a2
--- /dev/null
+++ b/samples/snippets/mf_explicit_model_test.py
@@ -0,0 +1,17 @@
+# Copyright 2024 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (t
+# you may not use this file except in compliance wi
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in
+# distributed under the License is distributed on a
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, eit
+# See the License for the specific language governi
+# limitations under the License.
+
+
+def test_explicit_matrix_factorization(random_model_id: str) -> None:
+    pass
diff --git a/samples/snippets/mf_implicit_model_test.py b/samples/snippets/mf_implicit_model_test.py
new file mode 100644
index 0000000000..cac9dfb3a2
--- /dev/null
+++ b/samples/snippets/mf_implicit_model_test.py
@@ -0,0 +1,17 @@
+# Copyright 2024 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (t
+# you may not use this file except in compliance wi
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in
+# distributed under the License is distributed on a
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, eit
+# See the License for the specific language governi
+# limitations under the License.
+
+
+def test_explicit_matrix_factorization(random_model_id: str) -> None:
+    pass

From 5b71583f9d89fb586a97f83b35875c3ce3bf5a2b Mon Sep 17 00:00:00 2001
From: Daniela <drespana@google.com>
Date: Wed, 2 Apr 2025 18:15:24 +0000
Subject: [PATCH 02/15] incomplete mf snippets

---
 samples/snippets/mf_explicit_model_test.py | 35 ++++++++++++++++++++++
 1 file changed, 35 insertions(+)

diff --git a/samples/snippets/mf_explicit_model_test.py b/samples/snippets/mf_explicit_model_test.py
index cac9dfb3a2..aa4f3b73b3 100644
--- a/samples/snippets/mf_explicit_model_test.py
+++ b/samples/snippets/mf_explicit_model_test.py
@@ -14,4 +14,39 @@
 
 
 def test_explicit_matrix_factorization(random_model_id: str) -> None:
+    your_model_id = random_model_id
+    # [START bigframes_dataframes_bqml_mf_create]
+    from bigframes.ml import decomposition
+    import bigframes.pandas as bpd
+
+    # Load data from BigQuery
+    bq_df = bpd.read_gbq(
+        "bqml_tutorial.ratings", columns=("user_id", "item_id", "rating")
+    )
+
+    # Create the Matrix Factorization model
+    model = decomposition.MatrixFactorization(
+        num_factors=34,
+        feedback_type="explicit",
+        user_col="user_id",
+        item_col="item_id",
+        rating_col="rating",
+        l2_reg=9.83,
+    )
+    model.fit(bq_df)
+    model.to_gbq(
+        your_model_id, replace=True  # For example: "bqml_tutorial.mf_explicit"
+    )
+    # [END bigframes_dataframes_bqml_mf_create]
+    # [START bigframes_dataframe_bqml_mf_evaluate]
+    import bigframes.pandas as bpd
+
+    model.score(bq_df)
+    # [END bigframes_dataframe_bqml_mf_evaluate]
+    # [START bigframes_dataframe_bqml_mf_predict]
+
+    # [END bigframes_dataframe_bqml_mf_predict]
+    # [START bigframes_dataframe_bqml_mf_recommend]
+    model.predict(bq_df)
+    # [END bigframes_dataframe_bqml_mf_recommend]
     pass

From edd4cd568f6de26686aa9f48dd53c4271f5863c4 Mon Sep 17 00:00:00 2001
From: Daniela <drespana@google.com>
Date: Wed, 2 Apr 2025 19:41:06 +0000
Subject: [PATCH 03/15] prep implicit

---
 samples/snippets/mf_explicit_model_test.py | 16 +++++-----
 samples/snippets/mf_implicit_model_test.py | 34 +++++++++++++++++++++-
 2 files changed, 41 insertions(+), 9 deletions(-)

diff --git a/samples/snippets/mf_explicit_model_test.py b/samples/snippets/mf_explicit_model_test.py
index aa4f3b73b3..3ecaefe9a2 100644
--- a/samples/snippets/mf_explicit_model_test.py
+++ b/samples/snippets/mf_explicit_model_test.py
@@ -15,7 +15,7 @@
 
 def test_explicit_matrix_factorization(random_model_id: str) -> None:
     your_model_id = random_model_id
-    # [START bigframes_dataframes_bqml_mf_create]
+    # [START bigframes_dataframes_bqml_mf_explicit_create]
     from bigframes.ml import decomposition
     import bigframes.pandas as bpd
 
@@ -37,16 +37,16 @@ def test_explicit_matrix_factorization(random_model_id: str) -> None:
     model.to_gbq(
         your_model_id, replace=True  # For example: "bqml_tutorial.mf_explicit"
     )
-    # [END bigframes_dataframes_bqml_mf_create]
-    # [START bigframes_dataframe_bqml_mf_evaluate]
+    # [END bigframes_dataframes_bqml_mf_explicit_create]
+    # [START bigframes_dataframe_bqml_mf_explicit_evaluate]
     import bigframes.pandas as bpd
 
     model.score(bq_df)
-    # [END bigframes_dataframe_bqml_mf_evaluate]
-    # [START bigframes_dataframe_bqml_mf_predict]
+    # [END bigframes_dataframe_bqml_mf_explicit_evaluate]
+    # [START bigframes_dataframe_bqml_mf_explicit_predict]
 
-    # [END bigframes_dataframe_bqml_mf_predict]
-    # [START bigframes_dataframe_bqml_mf_recommend]
+    # [END bigframes_dataframe_bqml_mf_explicit_predict]
+    # [START bigframes_dataframe_bqml_mf_explicit_recommend]
     model.predict(bq_df)
-    # [END bigframes_dataframe_bqml_mf_recommend]
+    # [END bigframes_dataframe_bqml_mf_explicit_recommend]
     pass
diff --git a/samples/snippets/mf_implicit_model_test.py b/samples/snippets/mf_implicit_model_test.py
index cac9dfb3a2..54dc66c645 100644
--- a/samples/snippets/mf_implicit_model_test.py
+++ b/samples/snippets/mf_implicit_model_test.py
@@ -13,5 +13,37 @@
 # limitations under the License.
 
 
-def test_explicit_matrix_factorization(random_model_id: str) -> None:
+def test_implicit_matrix_factorization(random_model_id: str) -> None:
+    # [START bigframes_dataframe_mf_implicit_data]
+    from bigframes.ml import decomposition
+    import bigframes.pandas as bpd
+
+    # sample data must be created from joined data and then grouped and ordered
+    bq_df = bpd.read_gbq("bqml_tutorial.analytics_session_data")
+    print(bq_df.peek(5))
+    # Expected output:
+    #
+    # [END bigframes_dataframe_mf_implicit_data]
+    # [START bigframes_dataframe_mf_implicit_model]
+    rating = 0.3 * (1 + (bq_df["session_duration"] - 57937) / 57937)
+    model = decomposition.MatrixFactorization(
+        num_factors=15,
+        feedback_type="implicit",
+        user_col="visitorId",
+        item_col="contentId",
+        rating_col=rating,
+        l2_reg=30,
+    )
+    # condition of rating < 1 required before fitting model
+
+    # [END bigframes_dataframe_mf_implicit_model]
+    # [START bigframes_dataframe_mf_implicit_evaluate]
+    model.fit(bq_df)
+    # [END bigframes_dataframe_mf_implicit_evaluate]
+    # [START bigframes_dataframe_mf_implicit_subset]
+
+    # [END bigframes_dataframe_mf_implicit_subset]
+    # [START bigframes_dataframe_mf_implicit_recommend]
+    model.score()
+    # [END bigframes_dataframe_mf_implicit_recommend]
     pass

From b898a76f5c2e0fe3e40ffc253d1f89b08d2017a2 Mon Sep 17 00:00:00 2001
From: Daniela <drespana@google.com>
Date: Thu, 1 May 2025 18:44:01 +0000
Subject: [PATCH 04/15] near complete tutorial

---
 samples/snippets/mf_explicit_model_test.py | 44 ++++++++++++++++++----
 samples/snippets/mf_implicit_model_test.py |  2 -
 2 files changed, 36 insertions(+), 10 deletions(-)

diff --git a/samples/snippets/mf_explicit_model_test.py b/samples/snippets/mf_explicit_model_test.py
index 3ecaefe9a2..71c20db54f 100644
--- a/samples/snippets/mf_explicit_model_test.py
+++ b/samples/snippets/mf_explicit_model_test.py
@@ -39,14 +39,42 @@ def test_explicit_matrix_factorization(random_model_id: str) -> None:
     )
     # [END bigframes_dataframes_bqml_mf_explicit_create]
     # [START bigframes_dataframe_bqml_mf_explicit_evaluate]
-    import bigframes.pandas as bpd
-
-    model.score(bq_df)
+    # Evaluate the model using the score() function
+    model.score()
+    # Output:
+    # mean_absolute_error	mean_squared_error	mean_squared_log_error	median_absolute_error	r2_score	explained_variance
+    # 0.485403	                0.395052	        0.025515	            0.390573	        0.68343	        0.68343
     # [END bigframes_dataframe_bqml_mf_explicit_evaluate]
-    # [START bigframes_dataframe_bqml_mf_explicit_predict]
+    # [START bigframes_dataframe_bqml_mf_recommend_df]
+    subset = bq_df.head(6)
+    predicted = model.predict(subset)
+    print(predicted)
+    # Output:
+    #   predicted_rating	user_id	 item_id	rating
+    # 0	    4.206146	     4354	  968	     4.0
+    # 1	    4.853099	     3622	  3521	     5.0
+    # 2	    2.679067	     5543	  920	     2.0
+    # 3	    4.323458	     445	  3175	     5.0
+    # 4	    3.476911	     5535	  235	     4.0
+    # [END bigframes_dataframe_bqml_mf_explicit_recommend_df]
+    # [START bigframes_dataframe_bqml_mf_explicit_recommend_model]
+    # import bigframes.bigquery as bbq
 
-    # [END bigframes_dataframe_bqml_mf_explicit_predict]
-    # [START bigframes_dataframe_bqml_mf_explicit_recommend]
-    model.predict(bq_df)
-    # [END bigframes_dataframe_bqml_mf_explicit_recommend]
+    # TODO: implement right_index parameter for DataFrame.merge()
+    # # Load movie data from BigQuery
+    # movies = bpd.read_gbq("bqml_tutorial.movies")
+    # # Merge movie data with rating data
+    # merged_df = bpd.merge(predicted, movies, left_on='item_id', right_on='movie_id')
+    # # separate users from data to call struct on data
+    # users = merged_df[['user_id', 'item_id']]
+    # user_data = merged_df[['movie_title', 'genre', 'predicted_rating', 'movie_id']].set_index('movie_id')
+    # struct_data = bbq.struct(user_data).to_frame()
+    # # Merge data to groupby predicted_rating and sort
+    # merged_user = bpd.merge(users, struct_data, left_on='item_id', right_index=True).drop('item_id', axis=1)
+    # desc_pred = merged_user.sort_values(by='predicted_rating', ascending=False)
+    # grouped = desc_pred.groupby('predicted_rating')
+    # result = bbq.array_agg(grouped)
+    # result.head(5)
+    # Output:
+    # [END bigframes_dataframe_bqml_mf_explicit_recommend_model]
     pass
diff --git a/samples/snippets/mf_implicit_model_test.py b/samples/snippets/mf_implicit_model_test.py
index 54dc66c645..dc658ad417 100644
--- a/samples/snippets/mf_implicit_model_test.py
+++ b/samples/snippets/mf_implicit_model_test.py
@@ -41,9 +41,7 @@ def test_implicit_matrix_factorization(random_model_id: str) -> None:
     model.fit(bq_df)
     # [END bigframes_dataframe_mf_implicit_evaluate]
     # [START bigframes_dataframe_mf_implicit_subset]
-
     # [END bigframes_dataframe_mf_implicit_subset]
     # [START bigframes_dataframe_mf_implicit_recommend]
-    model.score()
     # [END bigframes_dataframe_mf_implicit_recommend]
     pass

From 33f446a1cf70ed6440538b20776bf5f972bb5437 Mon Sep 17 00:00:00 2001
From: Daniela <drespana@google.com>
Date: Thu, 1 May 2025 19:06:36 +0000
Subject: [PATCH 05/15] implicit create

---
 samples/snippets/mf_implicit_model_test.py | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/samples/snippets/mf_implicit_model_test.py b/samples/snippets/mf_implicit_model_test.py
index dc658ad417..7203d5e591 100644
--- a/samples/snippets/mf_implicit_model_test.py
+++ b/samples/snippets/mf_implicit_model_test.py
@@ -25,20 +25,23 @@ def test_implicit_matrix_factorization(random_model_id: str) -> None:
     #
     # [END bigframes_dataframe_mf_implicit_data]
     # [START bigframes_dataframe_mf_implicit_model]
-    rating = 0.3 * (1 + (bq_df["session_duration"] - 57937) / 57937)
+    rating_calculation = 0.3 * (1 + (bq_df["session_duration"] - 57937) / 57937)
+    filtered_bq_df = bq_df[rating_calculation < 1].assign(
+        rating=rating_calculation[rating_calculation < 1]
+    )
     model = decomposition.MatrixFactorization(
         num_factors=15,
         feedback_type="implicit",
         user_col="visitorId",
         item_col="contentId",
-        rating_col=rating,
+        rating_col="rating",
         l2_reg=30,
     )
-    # condition of rating < 1 required before fitting model
-
+    model.fit(filtered_bq_df)
     # [END bigframes_dataframe_mf_implicit_model]
     # [START bigframes_dataframe_mf_implicit_evaluate]
-    model.fit(bq_df)
+    model.score()
+    # Output:
     # [END bigframes_dataframe_mf_implicit_evaluate]
     # [START bigframes_dataframe_mf_implicit_subset]
     # [END bigframes_dataframe_mf_implicit_subset]

From 431e9eb30187e938cc2fe0d0e81705f116907c31 Mon Sep 17 00:00:00 2001
From: Daniela <drespana@google.com>
Date: Thu, 1 May 2025 19:11:10 +0000
Subject: [PATCH 06/15] add doc note

---
 samples/snippets/mf_explicit_model_test.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/samples/snippets/mf_explicit_model_test.py b/samples/snippets/mf_explicit_model_test.py
index 71c20db54f..dec8f0ad01 100644
--- a/samples/snippets/mf_explicit_model_test.py
+++ b/samples/snippets/mf_explicit_model_test.py
@@ -46,7 +46,8 @@ def test_explicit_matrix_factorization(random_model_id: str) -> None:
     # 0.485403	                0.395052	        0.025515	            0.390573	        0.68343	        0.68343
     # [END bigframes_dataframe_bqml_mf_explicit_evaluate]
     # [START bigframes_dataframe_bqml_mf_recommend_df]
-    subset = bq_df.head(6)
+    # Use predict() to get the predicted rating for each movie for 5 users
+    subset = bq_df.head(5)
     predicted = model.predict(subset)
     print(predicted)
     # Output:

From 7380f6fac0b509644cf45d6a6a03dcd98e158cc6 Mon Sep 17 00:00:00 2001
From: Daniela <drespana@google.com>
Date: Mon, 5 May 2025 14:44:19 +0000
Subject: [PATCH 07/15] complete explicit tutorial

---
 samples/snippets/mf_explicit_model_test.py | 49 +++++++++++++++-------
 1 file changed, 34 insertions(+), 15 deletions(-)

diff --git a/samples/snippets/mf_explicit_model_test.py b/samples/snippets/mf_explicit_model_test.py
index dec8f0ad01..1493a7e9a8 100644
--- a/samples/snippets/mf_explicit_model_test.py
+++ b/samples/snippets/mf_explicit_model_test.py
@@ -61,21 +61,40 @@ def test_explicit_matrix_factorization(random_model_id: str) -> None:
     # [START bigframes_dataframe_bqml_mf_explicit_recommend_model]
     # import bigframes.bigquery as bbq
 
-    # TODO: implement right_index parameter for DataFrame.merge()
-    # # Load movie data from BigQuery
-    # movies = bpd.read_gbq("bqml_tutorial.movies")
-    # # Merge movie data with rating data
-    # merged_df = bpd.merge(predicted, movies, left_on='item_id', right_on='movie_id')
-    # # separate users from data to call struct on data
-    # users = merged_df[['user_id', 'item_id']]
-    # user_data = merged_df[['movie_title', 'genre', 'predicted_rating', 'movie_id']].set_index('movie_id')
-    # struct_data = bbq.struct(user_data).to_frame()
-    # # Merge data to groupby predicted_rating and sort
-    # merged_user = bpd.merge(users, struct_data, left_on='item_id', right_index=True).drop('item_id', axis=1)
-    # desc_pred = merged_user.sort_values(by='predicted_rating', ascending=False)
-    # grouped = desc_pred.groupby('predicted_rating')
-    # result = bbq.array_agg(grouped)
-    # result.head(5)
+    # Load movies
+    movies = bpd.read_gbq("bqml_tutorial.movies")
+
+    # Merge the movies df with the previously created predicted df
+    merged_df = bpd.merge(predicted, movies, left_on="item_id", right_on="movie_id")
+
+    # Separate users and predicted data, setting the index to 'movie_id'
+    users = merged_df[["user_id", "movie_id"]].set_index("movie_id")
+
+    # Take the predicted data and sort it in descending order by 'predicted_rating', setting the index to 'movie_id'
+    sort_data = (
+        merged_df[["movie_title", "genre", "predicted_rating", "movie_id"]]
+        .sort_values(by="predicted_rating", ascending=False)
+        .set_index("movie_id")
+    )
+
+    # re-merge the separated dfs by index
+    merged_user = sort_data.join(users, how="outer")
+
+    # group the users and set the user_id as the index
+    merged_user.groupby("user_id").head(5).set_index("user_id").sort_index()
+    print(merged_user)
     # Output:
+    # 	            movie_title	                genre	        predicted_rating
+    # user_id
+    #   1	    Saving Private Ryan (1998)	Action|Drama|War	    5.19326
+    #   1	        Fargo (1996)	       Crime|Drama|Thriller	    4.996954
+    #   1	    Driving Miss Daisy (1989)	    Drama	            4.983671
+    #   1	        Ben-Hur (1959)	       Action|Adventure|Drama	4.877622
+    #   1	     Schindler's List (1993)	   Drama|War	        4.802336
+    #   2	    Saving Private Ryan (1998)	Action|Drama|War	    5.19326
+    #   2	        Braveheart (1995)	    Action|Drama|War	    5.174145
+    #   2	        Gladiator (2000)	      Action|Drama	        5.066372
+    #   2	        On Golden Pond (1981)	     Drama	            5.01198
+    #   2	    Driving Miss Daisy (1989)	     Drama	            4.983671
     # [END bigframes_dataframe_bqml_mf_explicit_recommend_model]
     pass

From 74c0d853b8616b154bfe014433eb56fc6d9ebf1b Mon Sep 17 00:00:00 2001
From: Daniela <drespana@google.com>
Date: Mon, 5 May 2025 15:00:31 +0000
Subject: [PATCH 08/15] remove implicit snippets

---
 samples/snippets/mf_implicit_model_test.py | 50 ----------------------
 1 file changed, 50 deletions(-)
 delete mode 100644 samples/snippets/mf_implicit_model_test.py

diff --git a/samples/snippets/mf_implicit_model_test.py b/samples/snippets/mf_implicit_model_test.py
deleted file mode 100644
index 7203d5e591..0000000000
--- a/samples/snippets/mf_implicit_model_test.py
+++ /dev/null
@@ -1,50 +0,0 @@
-# Copyright 2024 Google LLC
-#
-# Licensed under the Apache License, Version 2.0 (t
-# you may not use this file except in compliance wi
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in
-# distributed under the License is distributed on a
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, eit
-# See the License for the specific language governi
-# limitations under the License.
-
-
-def test_implicit_matrix_factorization(random_model_id: str) -> None:
-    # [START bigframes_dataframe_mf_implicit_data]
-    from bigframes.ml import decomposition
-    import bigframes.pandas as bpd
-
-    # sample data must be created from joined data and then grouped and ordered
-    bq_df = bpd.read_gbq("bqml_tutorial.analytics_session_data")
-    print(bq_df.peek(5))
-    # Expected output:
-    #
-    # [END bigframes_dataframe_mf_implicit_data]
-    # [START bigframes_dataframe_mf_implicit_model]
-    rating_calculation = 0.3 * (1 + (bq_df["session_duration"] - 57937) / 57937)
-    filtered_bq_df = bq_df[rating_calculation < 1].assign(
-        rating=rating_calculation[rating_calculation < 1]
-    )
-    model = decomposition.MatrixFactorization(
-        num_factors=15,
-        feedback_type="implicit",
-        user_col="visitorId",
-        item_col="contentId",
-        rating_col="rating",
-        l2_reg=30,
-    )
-    model.fit(filtered_bq_df)
-    # [END bigframes_dataframe_mf_implicit_model]
-    # [START bigframes_dataframe_mf_implicit_evaluate]
-    model.score()
-    # Output:
-    # [END bigframes_dataframe_mf_implicit_evaluate]
-    # [START bigframes_dataframe_mf_implicit_subset]
-    # [END bigframes_dataframe_mf_implicit_subset]
-    # [START bigframes_dataframe_mf_implicit_recommend]
-    # [END bigframes_dataframe_mf_implicit_recommend]
-    pass

From 862e11815e5995ebc8cf8f1d5e8337ae2f1f656d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tim=20Swe=C3=B1a=20=28Swast=29?= <swast@google.com>
Date: Wed, 7 May 2025 09:40:06 -0500
Subject: [PATCH 09/15] Update samples/snippets/mf_explicit_model_test.py

---
 samples/snippets/mf_explicit_model_test.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/samples/snippets/mf_explicit_model_test.py b/samples/snippets/mf_explicit_model_test.py
index 1493a7e9a8..d67c7b9199 100644
--- a/samples/snippets/mf_explicit_model_test.py
+++ b/samples/snippets/mf_explicit_model_test.py
@@ -45,7 +45,7 @@ def test_explicit_matrix_factorization(random_model_id: str) -> None:
     # mean_absolute_error	mean_squared_error	mean_squared_log_error	median_absolute_error	r2_score	explained_variance
     # 0.485403	                0.395052	        0.025515	            0.390573	        0.68343	        0.68343
     # [END bigframes_dataframe_bqml_mf_explicit_evaluate]
-    # [START bigframes_dataframe_bqml_mf_recommend_df]
+    # [START bigframes_dataframe_bqml_mf_explicit_recommend_df]
     # Use predict() to get the predicted rating for each movie for 5 users
     subset = bq_df.head(5)
     predicted = model.predict(subset)

From 7f2d7f672c8ec085dcd384ee4eb03ce88e5ba98a Mon Sep 17 00:00:00 2001
From: Tim Swena <swast@google.com>
Date: Wed, 7 May 2025 10:24:07 -0500
Subject: [PATCH 10/15] add snippets to create dataset and movielens tables

---
 samples/snippets/mf_explicit_model_test.py | 63 ++++++++++++++++++++++
 1 file changed, 63 insertions(+)

diff --git a/samples/snippets/mf_explicit_model_test.py b/samples/snippets/mf_explicit_model_test.py
index d67c7b9199..b4fb2cad82 100644
--- a/samples/snippets/mf_explicit_model_test.py
+++ b/samples/snippets/mf_explicit_model_test.py
@@ -15,6 +15,69 @@
 
 def test_explicit_matrix_factorization(random_model_id: str) -> None:
     your_model_id = random_model_id
+
+    # [START bigframes_dataframes_bqml_mf_explicit_create_dataset]
+    import google.cloud.bigquery
+
+    bqclient = google.cloud.bigquery.Client()
+    bqclient.create_dataset("bqml_tutorial", exists_ok=True)
+    # [END bigframes_dataframes_bqml_mf_explicit_create_dataset]
+
+    # [START bigframes_dataframes_bqml_mf_explicit_upload_movielens]
+    import io
+    import zipfile
+
+    import google.api_core.exceptions
+    import requests
+
+    try:
+        # Check if you've already created the Movielens tables to avoid downloading
+        # and uploading the dataset unnecessarily.
+        bqclient.get_table("bqml_tutorial.ratings")
+        bqclient.get_table("bqml_tutorial.movies")
+    except google.api_core.exceptions.NotFound:
+        # Download the https://grouplens.org/datasets/movielens/1m/ dataset.
+        ml1m = requests.get("http://files.grouplens.org/datasets/movielens/ml-1m.zip")
+        ml1m_file = io.BytesIO(ml1m.content)
+        ml1m_zip = zipfile.ZipFile(ml1m_file)
+
+        # Upload the ratings data into the ratings table.
+        with ml1m_zip.open("ml-1m/ratings.dat") as ratings_file:
+            ratings_content = ratings_file.read()
+
+        ratings_csv = io.BytesIO(ratings_content.replace(b"::", b","))
+        ratings_config = google.cloud.bigquery.LoadJobConfig()
+        ratings_config.source_format = "CSV"
+        ratings_config.write_disposition = "WRITE_TRUNCATE"
+        ratings_config.schema = [
+            google.cloud.bigquery.SchemaField("user_id", "INT64"),
+            google.cloud.bigquery.SchemaField("item_id", "INT64"),
+            google.cloud.bigquery.SchemaField("rating", "FLOAT64"),
+            google.cloud.bigquery.SchemaField("timestamp", "TIMESTAMP"),
+        ]
+        bqclient.load_table_from_file(
+            ratings_csv, "bqml_tutorial.ratings", job_config=ratings_config
+        ).result()
+
+        # Upload the movie data into the movies table.
+        with ml1m_zip.open("ml-1m/movies.dat") as movies_file:
+            movies_content = movies_file.read()
+
+        movies_csv = io.BytesIO(movies_content.replace(b"::", b"@"))
+        movies_config = google.cloud.bigquery.LoadJobConfig()
+        movies_config.source_format = "CSV"
+        movies_config.field_delimiter = "@"
+        movies_config.write_disposition = "WRITE_TRUNCATE"
+        movies_config.schema = [
+            google.cloud.bigquery.SchemaField("movie_id", "INT64"),
+            google.cloud.bigquery.SchemaField("movie_title", "STRING"),
+            google.cloud.bigquery.SchemaField("genre", "STRING"),
+        ]
+        bqclient.load_table_from_file(
+            movies_csv, "bqml_tutorial.movies", job_config=movies_config
+        ).result()
+    # [END bigframes_dataframes_bqml_mf_explicit_upload_movielens]
+
     # [START bigframes_dataframes_bqml_mf_explicit_create]
     from bigframes.ml import decomposition
     import bigframes.pandas as bpd

From 6bdfbfb29bdef9e2b44d75071865bbec6062871e Mon Sep 17 00:00:00 2001
From: Tim Swena <swast@google.com>
Date: Wed, 7 May 2025 10:27:37 -0500
Subject: [PATCH 11/15] correct the region tags

---
 samples/snippets/mf_explicit_model_test.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/samples/snippets/mf_explicit_model_test.py b/samples/snippets/mf_explicit_model_test.py
index b4fb2cad82..9189147a67 100644
--- a/samples/snippets/mf_explicit_model_test.py
+++ b/samples/snippets/mf_explicit_model_test.py
@@ -16,14 +16,14 @@
 def test_explicit_matrix_factorization(random_model_id: str) -> None:
     your_model_id = random_model_id
 
-    # [START bigframes_dataframes_bqml_mf_explicit_create_dataset]
+    # [START bigquery_dataframes_bqml_mf_explicit_create_dataset]
     import google.cloud.bigquery
 
     bqclient = google.cloud.bigquery.Client()
     bqclient.create_dataset("bqml_tutorial", exists_ok=True)
-    # [END bigframes_dataframes_bqml_mf_explicit_create_dataset]
+    # [END bigquery_dataframes_bqml_mf_explicit_create_dataset]
 
-    # [START bigframes_dataframes_bqml_mf_explicit_upload_movielens]
+    # [START bigquery_dataframes_bqml_mf_explicit_upload_movielens]
     import io
     import zipfile
 
@@ -76,9 +76,9 @@ def test_explicit_matrix_factorization(random_model_id: str) -> None:
         bqclient.load_table_from_file(
             movies_csv, "bqml_tutorial.movies", job_config=movies_config
         ).result()
-    # [END bigframes_dataframes_bqml_mf_explicit_upload_movielens]
+    # [END bigquery_dataframes_bqml_mf_explicit_upload_movielens]
 
-    # [START bigframes_dataframes_bqml_mf_explicit_create]
+    # [START bigquery_dataframes_bqml_mf_explicit_create]
     from bigframes.ml import decomposition
     import bigframes.pandas as bpd
 
@@ -100,7 +100,7 @@ def test_explicit_matrix_factorization(random_model_id: str) -> None:
     model.to_gbq(
         your_model_id, replace=True  # For example: "bqml_tutorial.mf_explicit"
     )
-    # [END bigframes_dataframes_bqml_mf_explicit_create]
+    # [END bigquery_dataframes_bqml_mf_explicit_create]
     # [START bigframes_dataframe_bqml_mf_explicit_evaluate]
     # Evaluate the model using the score() function
     model.score()

From 1847d61dbca675f159f68cc15642c2dc267fc889 Mon Sep 17 00:00:00 2001
From: Tim Swena <swast@google.com>
Date: Wed, 7 May 2025 10:28:37 -0500
Subject: [PATCH 12/15] correct more region tags

---
 samples/snippets/mf_explicit_model_test.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/samples/snippets/mf_explicit_model_test.py b/samples/snippets/mf_explicit_model_test.py
index 9189147a67..a13b77263d 100644
--- a/samples/snippets/mf_explicit_model_test.py
+++ b/samples/snippets/mf_explicit_model_test.py
@@ -101,14 +101,14 @@ def test_explicit_matrix_factorization(random_model_id: str) -> None:
         your_model_id, replace=True  # For example: "bqml_tutorial.mf_explicit"
     )
     # [END bigquery_dataframes_bqml_mf_explicit_create]
-    # [START bigframes_dataframe_bqml_mf_explicit_evaluate]
+    # [START bigquery_dataframes_bqml_mf_explicit_evaluate]
     # Evaluate the model using the score() function
     model.score()
     # Output:
     # mean_absolute_error	mean_squared_error	mean_squared_log_error	median_absolute_error	r2_score	explained_variance
     # 0.485403	                0.395052	        0.025515	            0.390573	        0.68343	        0.68343
-    # [END bigframes_dataframe_bqml_mf_explicit_evaluate]
-    # [START bigframes_dataframe_bqml_mf_explicit_recommend_df]
+    # [END bigquery_dataframes_bqml_mf_explicit_evaluate]
+    # [START bigquery_dataframes_bqml_mf_explicit_recommend_df]
     # Use predict() to get the predicted rating for each movie for 5 users
     subset = bq_df.head(5)
     predicted = model.predict(subset)
@@ -120,8 +120,8 @@ def test_explicit_matrix_factorization(random_model_id: str) -> None:
     # 2	    2.679067	     5543	  920	     2.0
     # 3	    4.323458	     445	  3175	     5.0
     # 4	    3.476911	     5535	  235	     4.0
-    # [END bigframes_dataframe_bqml_mf_explicit_recommend_df]
-    # [START bigframes_dataframe_bqml_mf_explicit_recommend_model]
+    # [END bigquery_dataframes_bqml_mf_explicit_recommend_df]
+    # [START bigquery_dataframes_bqml_mf_explicit_recommend_model]
     # import bigframes.bigquery as bbq
 
     # Load movies
@@ -159,5 +159,5 @@ def test_explicit_matrix_factorization(random_model_id: str) -> None:
     #   2	        Gladiator (2000)	      Action|Drama	        5.066372
     #   2	        On Golden Pond (1981)	     Drama	            5.01198
     #   2	    Driving Miss Daisy (1989)	     Drama	            4.983671
-    # [END bigframes_dataframe_bqml_mf_explicit_recommend_model]
+    # [END bigquery_dataframes_bqml_mf_explicit_recommend_model]
     pass

From 0ad94b2e09d20070ce89111a3edc8c0cfafac0e4 Mon Sep 17 00:00:00 2001
From: rey-esp <drespana@google.com>
Date: Wed, 7 May 2025 14:14:54 -0500
Subject: [PATCH 13/15] Update samples/snippets/mf_explicit_model_test.py
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Co-authored-by: Tim Sweña (Swast) <swast@google.com>
---
 samples/snippets/mf_explicit_model_test.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/samples/snippets/mf_explicit_model_test.py b/samples/snippets/mf_explicit_model_test.py
index a13b77263d..ba5e1fa026 100644
--- a/samples/snippets/mf_explicit_model_test.py
+++ b/samples/snippets/mf_explicit_model_test.py
@@ -160,4 +160,3 @@ def test_explicit_matrix_factorization(random_model_id: str) -> None:
     #   2	        On Golden Pond (1981)	     Drama	            5.01198
     #   2	    Driving Miss Daisy (1989)	     Drama	            4.983671
     # [END bigquery_dataframes_bqml_mf_explicit_recommend_model]
-    pass

From 1a81d6a452ff2dda9c503a0fe668e27669abdcb6 Mon Sep 17 00:00:00 2001
From: rey-esp <drespana@google.com>
Date: Wed, 7 May 2025 14:15:25 -0500
Subject: [PATCH 14/15] Update samples/snippets/mf_explicit_model_test.py
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Co-authored-by: Tim Sweña (Swast) <swast@google.com>
---
 samples/snippets/mf_explicit_model_test.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/samples/snippets/mf_explicit_model_test.py b/samples/snippets/mf_explicit_model_test.py
index ba5e1fa026..08506991c5 100644
--- a/samples/snippets/mf_explicit_model_test.py
+++ b/samples/snippets/mf_explicit_model_test.py
@@ -110,7 +110,7 @@ def test_explicit_matrix_factorization(random_model_id: str) -> None:
     # [END bigquery_dataframes_bqml_mf_explicit_evaluate]
     # [START bigquery_dataframes_bqml_mf_explicit_recommend_df]
     # Use predict() to get the predicted rating for each movie for 5 users
-    subset = bq_df.head(5)
+    subset = bq_df[["user_id"]].head(5)
     predicted = model.predict(subset)
     print(predicted)
     # Output:

From dbfadbd2536e6522e210ce2dbd1541eab6b4de3c Mon Sep 17 00:00:00 2001
From: rey-esp <drespana@google.com>
Date: Wed, 7 May 2025 15:50:37 -0500
Subject: [PATCH 15/15] update evaluate section

---
 samples/snippets/mf_explicit_model_test.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/samples/snippets/mf_explicit_model_test.py b/samples/snippets/mf_explicit_model_test.py
index 08506991c5..fb54b7271c 100644
--- a/samples/snippets/mf_explicit_model_test.py
+++ b/samples/snippets/mf_explicit_model_test.py
@@ -103,7 +103,7 @@ def test_explicit_matrix_factorization(random_model_id: str) -> None:
     # [END bigquery_dataframes_bqml_mf_explicit_create]
     # [START bigquery_dataframes_bqml_mf_explicit_evaluate]
     # Evaluate the model using the score() function
-    model.score()
+    model.score(bq_df)
     # Output:
     # mean_absolute_error	mean_squared_error	mean_squared_log_error	median_absolute_error	r2_score	explained_variance
     # 0.485403	                0.395052	        0.025515	            0.390573	        0.68343	        0.68343