Skip to content

Commit c36ba63

Browse files
committed
minor fix
1 parent c36909e commit c36ba63

File tree

3 files changed

+88
-73
lines changed

3 files changed

+88
-73
lines changed

bigframes/bigquery/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -178,7 +178,7 @@ def vector_search(
178178
... column_to_search="my_embedding",
179179
... query=search_query,
180180
... top_k=2)
181-
query_id embedding id my_embedding distance
181+
query_id embedding id my_embedding distance
182182
1 cat [3. 5.2] 5 [5. 5.4] 2.009975
183183
0 dog [1. 2.] 1 [1. 2.] 0.0
184184
0 dog [1. 2.] 4 [1. 3.2] 1.2
Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
# Copyright 2024 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
import numpy as np
16+
import pandas as pd
17+
18+
import bigframes.bigquery as bbq
19+
import bigframes.pandas as bpd
20+
21+
22+
def test_apply_sql_df_query():
23+
query = bpd.DataFrame(
24+
{
25+
"query_id": ["dog", "cat"],
26+
"embedding": [[1.0, 2.0], [3.0, 5.2]],
27+
}
28+
)
29+
options = {
30+
"base_table": "bigframes-dev.bigframes_tests_sys.base_table",
31+
"column_to_search": "my_embedding",
32+
"distance_type": "cosine",
33+
"top_k": 2,
34+
}
35+
result = bbq.utils.apply_sql(query, options).to_pandas() # type:ignore
36+
expected = pd.DataFrame(
37+
{
38+
"query_id": ["cat", "dog", "dog", "cat"],
39+
"embedding": [
40+
np.array([3.0, 5.2]),
41+
np.array([1.0, 2.0]),
42+
np.array([1.0, 2.0]),
43+
np.array([3.0, 5.2]),
44+
],
45+
"id": [1, 2, 1, 2],
46+
"my_embedding": [
47+
np.array([1.0, 2.0]),
48+
np.array([2.0, 4.0]),
49+
np.array([1.0, 2.0]),
50+
np.array([2.0, 4.0]),
51+
],
52+
"distance": [0.001777, 0.0, 0.0, 0.001777],
53+
},
54+
index=pd.Index([1, 0, 0, 1], dtype="Int64"),
55+
)
56+
pd.testing.assert_frame_equal(result, expected, check_dtype=False, rtol=0.1)
57+
58+
59+
def test_apply_sql_series_query():
60+
query = bpd.Series([[1.0, 2.0], [3.0, 5.2]])
61+
options = {
62+
"base_table": "bigframes-dev.bigframes_tests_sys.base_table",
63+
"column_to_search": "my_embedding",
64+
"distance_type": "euclidean",
65+
"top_k": 2,
66+
}
67+
result = bbq.utils.apply_sql(query, options).to_pandas() # type:ignore
68+
expected = pd.DataFrame(
69+
{
70+
"0": [
71+
np.array([3.0, 5.2]),
72+
np.array([1.0, 2.0]),
73+
np.array([3.0, 5.2]),
74+
np.array([1.0, 2.0]),
75+
],
76+
"id": [2, 4, 5, 1],
77+
"my_embedding": [
78+
np.array([2.0, 4.0]),
79+
np.array([1.0, 3.2]),
80+
np.array([5.0, 5.4]),
81+
np.array([1.0, 2.0]),
82+
],
83+
"distance": [1.562049935181331, 1.2000000000000002, 2.009975124224178, 0.0],
84+
},
85+
index=pd.Index([1, 0, 1, 0], dtype="Int64"),
86+
)
87+
pd.testing.assert_frame_equal(result, expected, check_dtype=False, rtol=0.1)

tests/unit/bigquery/test_utils.py

Lines changed: 0 additions & 72 deletions
Original file line numberDiff line numberDiff line change
@@ -12,11 +12,7 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414

15-
import numpy as np
16-
import pandas as pd
17-
1815
import bigframes.bigquery as bbq
19-
import bigframes.pandas as bpd
2016

2117

2218
def test_create_vector_search_sql_simple():
@@ -79,71 +75,3 @@ def test_create_vector_search_sql_query_column_to_search():
7975
sql_string, options # type:ignore
8076
)
8177
assert result_query == expected_query
82-
83-
84-
def test_apply_sql_df_query():
85-
query = bpd.DataFrame(
86-
{
87-
"query_id": ["dog", "cat"],
88-
"embedding": [[1.0, 2.0], [3.0, 5.2]],
89-
}
90-
)
91-
options = {
92-
"base_table": "bigframes-dev.bigframes_tests_sys.base_table",
93-
"column_to_search": "my_embedding",
94-
"distance_type": "cosine",
95-
"top_k": 2,
96-
}
97-
result = bbq.utils.apply_sql(query, options).to_pandas() # type:ignore
98-
expected = pd.DataFrame(
99-
{
100-
"query_id": ["cat", "dog", "dog", "cat"],
101-
"embedding": [
102-
np.array([3.0, 5.2]),
103-
np.array([1.0, 2.0]),
104-
np.array([1.0, 2.0]),
105-
np.array([3.0, 5.2]),
106-
],
107-
"id": [1, 2, 1, 2],
108-
"my_embedding": [
109-
np.array([1.0, 2.0]),
110-
np.array([2.0, 4.0]),
111-
np.array([1.0, 2.0]),
112-
np.array([2.0, 4.0]),
113-
],
114-
"distance": [0.001777, 0.0, 0.0, 0.001777],
115-
},
116-
index=pd.Index([1, 0, 0, 1], dtype="Int64"),
117-
)
118-
pd.testing.assert_frame_equal(result, expected, check_dtype=False, rtol=0.1)
119-
120-
121-
def test_apply_sql_series_query():
122-
query = bpd.Series([[1.0, 2.0], [3.0, 5.2]])
123-
options = {
124-
"base_table": "bigframes-dev.bigframes_tests_sys.base_table",
125-
"column_to_search": "my_embedding",
126-
"distance_type": "euclidean",
127-
"top_k": 2,
128-
}
129-
result = bbq.utils.apply_sql(query, options).to_pandas() # type:ignore
130-
expected = pd.DataFrame(
131-
{
132-
"0": [
133-
np.array([3.0, 5.2]),
134-
np.array([1.0, 2.0]),
135-
np.array([3.0, 5.2]),
136-
np.array([1.0, 2.0]),
137-
],
138-
"id": [2, 4, 5, 1],
139-
"my_embedding": [
140-
np.array([2.0, 4.0]),
141-
np.array([1.0, 3.2]),
142-
np.array([5.0, 5.4]),
143-
np.array([1.0, 2.0]),
144-
],
145-
"distance": [1.562049935181331, 1.2000000000000002, 2.009975124224178, 0.0],
146-
},
147-
index=pd.Index([1, 0, 1, 0], dtype="Int64"),
148-
)
149-
pd.testing.assert_frame_equal(result, expected, check_dtype=False, rtol=0.1)

0 commit comments

Comments
 (0)