Skip to content

Commit b23f7fa

Browse files
committed
feat: Add ml.metrics.pairwise.manhattan_distance
1 parent a0490a4 commit b23f7fa

File tree

3 files changed

+45
-0
lines changed

3 files changed

+45
-0
lines changed

bigframes/ml/metrics/pairwise.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,3 +32,19 @@ def cosine_similarity(
3232

3333

3434
cosine_similarity.__doc__ = inspect.getdoc(vendored_metrics_pairwise.cosine_similarity)
35+
36+
37+
def manhattan_distance(
38+
X: Union[bpd.DataFrame, bpd.Series], Y: Union[bpd.DataFrame, bpd.Series]
39+
) -> bpd.DataFrame:
40+
X, Y = utils.convert_to_dataframe(X, Y)
41+
if len(X.columns) != 1 or len(Y.columns) != 1:
42+
raise ValueError("Inputs X and Y can only contain 1 column.")
43+
44+
base_bqml = core.BaseBqml(session=X._session)
45+
return base_bqml.distance(X, Y, type="MANHATTAN", name="manhattan_distance")
46+
47+
48+
manhattan_distance.__doc__ = inspect.getdoc(
49+
vendored_metrics_pairwise.manhattan_distance
50+
)

tests/system/small/ml/test_metrics_pairwise.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,3 +33,17 @@ def test_cosine_similarity():
3333
pd.testing.assert_frame_equal(
3434
result.to_pandas(), expected_pd_df, check_dtype=False, check_index_type=False
3535
)
36+
37+
38+
def test_manhattan_distance():
39+
x_col = [np.array([4.1, 0.5, 1.0])]
40+
y_col = [np.array([3.0, 0.0, 2.5])]
41+
X = bpd.read_pandas(pd.DataFrame({"X": x_col}))
42+
Y = bpd.read_pandas(pd.DataFrame({"Y": y_col}))
43+
44+
result = metrics.pairwise.manhattan_distance(X, Y)
45+
expected_pd_df = pd.DataFrame({"X": x_col, "Y": y_col, "manhattan_distance": [3.1]})
46+
47+
pd.testing.assert_frame_equal(
48+
result.to_pandas(), expected_pd_df, check_dtype=False, check_index_type=False
49+
)

third_party/bigframes_vendored/sklearn/metrics/pairwise.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,3 +29,18 @@ def cosine_similarity(X, Y) -> bpd.DataFrame:
2929
bigframes.dataframe.DataFrame: DataFrame with columns of X, Y and cosine_similarity
3030
"""
3131
raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
32+
33+
34+
def manhattan_distance(X, Y) -> bpd.DataFrame:
35+
"""Compute the L1 distances between the vectors in X and Y.
36+
37+
Args:
38+
X (Series or single column DataFrame of array of numeric type):
39+
Input data.
40+
Y (Series or single column DataFrame of array of numeric type):
41+
Input data. X and Y are mapped by indexes, must have the same index.
42+
43+
Returns:
44+
bigframes.dataframe.DataFrame: DataFrame with columns of X, Y and manhattan_distance
45+
"""
46+
raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)

0 commit comments

Comments
 (0)