From 10cc6a819933e36548892e30d4832538cf8a20d5 Mon Sep 17 00:00:00 2001 From: Garrett Wu Date: Tue, 15 Jul 2025 20:23:32 +0000 Subject: [PATCH] feat: add ml.metrics.mean_absolute_error method --- bigframes/ml/metrics/__init__.py | 2 ++ bigframes/ml/metrics/_metrics.py | 14 ++++++++++ tests/system/small/ml/test_metrics.py | 7 +++++ .../sklearn/metrics/_regression.py | 27 +++++++++++++++++++ 4 files changed, 50 insertions(+) diff --git a/bigframes/ml/metrics/__init__.py b/bigframes/ml/metrics/__init__.py index e79b46877b..f6c7d5e52f 100644 --- a/bigframes/ml/metrics/__init__.py +++ b/bigframes/ml/metrics/__init__.py @@ -18,6 +18,7 @@ auc, confusion_matrix, f1_score, + mean_absolute_error, mean_squared_error, precision_score, r2_score, @@ -36,6 +37,7 @@ "confusion_matrix", "precision_score", "f1_score", + "mean_absolute_error", "mean_squared_error", "pairwise", ] diff --git a/bigframes/ml/metrics/_metrics.py b/bigframes/ml/metrics/_metrics.py index d7591ef011..c9639f4b16 100644 --- a/bigframes/ml/metrics/_metrics.py +++ b/bigframes/ml/metrics/_metrics.py @@ -344,3 +344,17 @@ def mean_squared_error( mean_squared_error.__doc__ = inspect.getdoc( vendored_metrics_regression.mean_squared_error ) + + +def mean_absolute_error( + y_true: Union[bpd.DataFrame, bpd.Series], + y_pred: Union[bpd.DataFrame, bpd.Series], +) -> float: + y_true_series, y_pred_series = utils.batch_convert_to_series(y_true, y_pred) + + return (y_pred_series - y_true_series).abs().sum() / len(y_true_series) + + +mean_absolute_error.__doc__ = inspect.getdoc( + vendored_metrics_regression.mean_absolute_error +) diff --git a/tests/system/small/ml/test_metrics.py b/tests/system/small/ml/test_metrics.py index b80202bdbe..fd5dbef2e3 100644 --- a/tests/system/small/ml/test_metrics.py +++ b/tests/system/small/ml/test_metrics.py @@ -818,3 +818,10 @@ def test_mean_squared_error(session: bigframes.Session): df = session.read_pandas(pd_df) mse = metrics.mean_squared_error(df["y_true"], df["y_pred"]) assert mse == 0.375 + + +def test_mean_absolute_error(session: bigframes.Session): + pd_df = pd.DataFrame({"y_true": [3, -0.5, 2, 7], "y_pred": [2.5, 0.0, 2, 8]}) + df = session.read_pandas(pd_df) + mse = metrics.mean_absolute_error(df["y_true"], df["y_pred"]) + assert mse == 0.5 diff --git a/third_party/bigframes_vendored/sklearn/metrics/_regression.py b/third_party/bigframes_vendored/sklearn/metrics/_regression.py index 56f78c6d0b..1c14e8068b 100644 --- a/third_party/bigframes_vendored/sklearn/metrics/_regression.py +++ b/third_party/bigframes_vendored/sklearn/metrics/_regression.py @@ -91,3 +91,30 @@ def mean_squared_error(y_true, y_pred) -> float: float: Mean squared error. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) + + +def mean_absolute_error(y_true, y_pred) -> float: + """Mean absolute error regression loss. + + **Examples:** + + >>> import bigframes.pandas as bpd + >>> import bigframes.ml.metrics + >>> bpd.options.display.progress_bar = None + + >>> y_true = bpd.DataFrame([3, -0.5, 2, 7]) + >>> y_pred = bpd.DataFrame([2.5, 0.0, 2, 8]) + >>> mae = bigframes.ml.metrics.mean_absolute_error(y_true, y_pred) + >>> mae + np.float64(0.5) + + Args: + y_true (Series or DataFrame of shape (n_samples,)): + Ground truth (correct) target values. + y_pred (Series or DataFrame of shape (n_samples,)): + Estimated target values. + + Returns: + float: Mean absolute error. + """ + raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)