Skip to content

Commit 81ece46

Browse files
authored
fix!: rename cosine_similarity to paired_cosine_distances (#393)
Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly: - [ ] Make sure to open an issue as a [bug/issue](https://togithub.com/googleapis/python-bigquery-dataframes/issues/new/choose) before writing your code! That way we can discuss the change, evaluate designs, and agree on the general idea - [ ] Ensure the tests and linter pass - [ ] Code coverage does not decrease (if any source code was changed) - [ ] Appropriate docs were updated (if necessary) Fixes #<issue_number_goes_here> 🦕
1 parent a0490a4 commit 81ece46

File tree

3 files changed

+11
-14
lines changed

3 files changed

+11
-14
lines changed

bigframes/ml/metrics/pairwise.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,15 +20,17 @@
2020
import third_party.bigframes_vendored.sklearn.metrics.pairwise as vendored_metrics_pairwise
2121

2222

23-
def cosine_similarity(
23+
def paired_cosine_distances(
2424
X: Union[bpd.DataFrame, bpd.Series], Y: Union[bpd.DataFrame, bpd.Series]
2525
) -> bpd.DataFrame:
2626
X, Y = utils.convert_to_dataframe(X, Y)
2727
if len(X.columns) != 1 or len(Y.columns) != 1:
2828
raise ValueError("Inputs X and Y can only contain 1 column.")
2929

3030
base_bqml = core.BaseBqml(session=X._session)
31-
return base_bqml.distance(X, Y, type="COSINE", name="cosine_similarity")
31+
return base_bqml.distance(X, Y, type="COSINE", name="cosine_distance")
3232

3333

34-
cosine_similarity.__doc__ = inspect.getdoc(vendored_metrics_pairwise.cosine_similarity)
34+
paired_cosine_distances.__doc__ = inspect.getdoc(
35+
vendored_metrics_pairwise.paired_cosine_distances
36+
)

tests/system/small/ml/test_metrics_pairwise.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,15 +19,15 @@
1919
import bigframes.pandas as bpd
2020

2121

22-
def test_cosine_similarity():
22+
def test_paired_cosine_distances():
2323
x_col = [np.array([4.1, 0.5, 1.0])]
2424
y_col = [np.array([3.0, 0.0, 2.5])]
2525
X = bpd.read_pandas(pd.DataFrame({"X": x_col}))
2626
Y = bpd.read_pandas(pd.DataFrame({"Y": y_col}))
2727

28-
result = metrics.pairwise.cosine_similarity(X, Y)
28+
result = metrics.pairwise.paired_cosine_distances(X, Y)
2929
expected_pd_df = pd.DataFrame(
30-
{"X": x_col, "Y": y_col, "cosine_similarity": [0.108199]}
30+
{"X": x_col, "Y": y_col, "cosine_distance": [0.108199]}
3131
)
3232

3333
pd.testing.assert_frame_equal(

third_party/bigframes_vendored/sklearn/metrics/pairwise.py

Lines changed: 3 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -11,13 +11,8 @@
1111
import bigframes.pandas as bpd
1212

1313

14-
def cosine_similarity(X, Y) -> bpd.DataFrame:
15-
"""Compute cosine similarity between samples in X and Y.
16-
17-
Cosine similarity, or the cosine kernel, computes similarity as the
18-
normalized dot product of X and Y:
19-
20-
K(X, Y) = <X, Y> / (||X||*||Y||)
14+
def paired_cosine_distances(X, Y) -> bpd.DataFrame:
15+
"""Compute the paired cosine distances between X and Y.
2116
2217
Args:
2318
X (Series or single column DataFrame of array of numeric type):
@@ -26,6 +21,6 @@ def cosine_similarity(X, Y) -> bpd.DataFrame:
2621
Input data. X and Y are mapped by indexes, must have the same index.
2722
2823
Returns:
29-
bigframes.dataframe.DataFrame: DataFrame with columns of X, Y and cosine_similarity
24+
bigframes.dataframe.DataFrame: DataFrame with columns of X, Y and cosine_distance
3025
"""
3126
raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)

0 commit comments

Comments
 (0)