diff --git a/bigframes/ml/decomposition.py b/bigframes/ml/decomposition.py index ece950a5a2..3ff32d2433 100644 --- a/bigframes/ml/decomposition.py +++ b/bigframes/ml/decomposition.py @@ -360,5 +360,12 @@ def score( if not self._bqml_model: raise RuntimeError("A model must be fitted before score") - # TODO(b/291973741): X param is ignored. Update BQML supports input in ML.EVALUATE. - return self._bqml_model.evaluate() + if X is not None and y is not None: + X, y = utils.batch_convert_to_dataframe( + X, y, session=self._bqml_model.session + ) + input_data = X.join(y, how="outer") + else: + input_data = X + + return self._bqml_model.evaluate(input_data) diff --git a/tests/system/large/ml/test_decomposition.py b/tests/system/large/ml/test_decomposition.py index d1a5f9f2aa..e0e4b79c6f 100644 --- a/tests/system/large/ml/test_decomposition.py +++ b/tests/system/large/ml/test_decomposition.py @@ -13,6 +13,7 @@ # limitations under the License. import pandas as pd +import pandas.testing from bigframes.ml import decomposition from tests.system import utils @@ -193,7 +194,16 @@ def test_decomposition_mf_configure_fit_load( ) ) - reloaded_model.score(new_ratings) + # Make sure the input to score is not ignored. + scores_training_data = reloaded_model.score().to_pandas() + scores_new_ratings = reloaded_model.score(new_ratings).to_pandas() + pandas.testing.assert_index_equal( + scores_training_data.columns, scores_new_ratings.columns + ) + assert ( + scores_training_data["mean_squared_error"].iloc[0] + != scores_new_ratings["mean_squared_error"].iloc[0] + ) result = reloaded_model.predict(new_ratings).to_pandas() diff --git a/tests/unit/ml/test_golden_sql.py b/tests/unit/ml/test_golden_sql.py index 62cfe09704..10fefcc457 100644 --- a/tests/unit/ml/test_golden_sql.py +++ b/tests/unit/ml/test_golden_sql.py @@ -81,6 +81,7 @@ def mock_X(mock_y, mock_session): ["index_column_id"], ["index_column_label"], ) + type(mock_X).sql = mock.PropertyMock(return_value="input_X_sql_property") mock_X.reset_index(drop=True).cache().sql = "input_X_no_index_sql" mock_X.join(mock_y).sql = "input_X_y_sql" mock_X.join(mock_y).cache.return_value = mock_X.join(mock_y) @@ -248,7 +249,7 @@ def test_decomposition_mf_predict(mock_session, bqml_model, mock_X): ) -def test_decomposition_mf_score(mock_session, bqml_model, mock_X): +def test_decomposition_mf_score(mock_session, bqml_model): model = decomposition.MatrixFactorization( num_factors=34, feedback_type="explicit", @@ -258,8 +259,23 @@ def test_decomposition_mf_score(mock_session, bqml_model, mock_X): l2_reg=9.83, ) model._bqml_model = bqml_model - model.score(mock_X) - + model.score() mock_session.read_gbq.assert_called_once_with( "SELECT * FROM ML.EVALUATE(MODEL `model_project`.`model_dataset`.`model_id`)" ) + + +def test_decomposition_mf_score_with_x(mock_session, bqml_model, mock_X): + model = decomposition.MatrixFactorization( + num_factors=34, + feedback_type="explicit", + user_col="user_id", + item_col="item_col", + rating_col="rating_col", + l2_reg=9.83, + ) + model._bqml_model = bqml_model + model.score(mock_X) + mock_session.read_gbq.assert_called_once_with( + "SELECT * FROM ML.EVALUATE(MODEL `model_project`.`model_dataset`.`model_id`,\n (input_X_sql_property))" + ) diff --git a/third_party/bigframes_vendored/sklearn/decomposition/_mf.py b/third_party/bigframes_vendored/sklearn/decomposition/_mf.py index fb29cc8984..c3c3a77b71 100644 --- a/third_party/bigframes_vendored/sklearn/decomposition/_mf.py +++ b/third_party/bigframes_vendored/sklearn/decomposition/_mf.py @@ -73,11 +73,13 @@ def score(self, X=None, y=None): for the outputs relevant to this model type. Args: - X (default None): - Ignored. + X (bigframes.dataframe.DataFrame | bigframes.series.Series | None): + DataFrame of shape (n_samples, n_features). Test samples. + + y (bigframes.dataframe.DataFrame | bigframes.series.Series | None): + DataFrame of shape (n_samples,) or (n_samples, n_outputs). True + labels for `X`. - y (default None): - Ignored. Returns: bigframes.dataframe.DataFrame: DataFrame that represents model metrics. """