Skip to content

Commit 74d4479

Browse files
milkshakeiiitswast
andauthored
tests: add benchmark and split nox sessions into multiple kokoro jobs (#734)
* tests: add benchmark nox session and split nox sessions into multiple kokoro jobs * remove accidentally added file * Update dataframe.ipynb * update noxfile * revert noxfile * remove benchmark notebook * use regular python files for benchmark scripts * add benchmark script * remove accidentally added files * remove stray line * add to comment * correct filepath * fix filenames --------- Co-authored-by: Tim Sweña (Swast) <[email protected]>
1 parent d93dbaf commit 74d4479

File tree

9 files changed

+164
-13
lines changed

9 files changed

+164
-13
lines changed

.kokoro/continuous/doctest.cfg

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
# Format: //devtools/kokoro/config/proto/build.proto
2+
3+
# Only run this nox session.
4+
env_vars: {
5+
key: "NOX_SESSION"
6+
value: "doctest"
7+
}
8+
9+
env_vars: {
10+
key: "GOOGLE_CLOUD_PROJECT"
11+
value: "bigframes-load-testing"
12+
}
13+
14+
env_vars: {
15+
key: "BIGFRAMES_TEST_MODEL_VERTEX_ENDPOINT"
16+
value: "https://us-central1-aiplatform.googleapis.com/v1/projects/272725758477/locations/us-central1/endpoints/590545496255234048"
17+
}

.kokoro/continuous/e2e.cfg

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
# Only run this nox session.
44
env_vars: {
55
key: "NOX_SESSION"
6-
value: "e2e doctest notebook unit_prerelease system_prerelease system_noextras"
6+
value: "e2e unit_prerelease system_prerelease system_noextras"
77
}
88

99
env_vars: {

.kokoro/continuous/notebook.cfg

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
# Format: //devtools/kokoro/config/proto/build.proto
2+
3+
# Only run this nox session.
4+
env_vars: {
5+
key: "NOX_SESSION"
6+
value: "notebook"
7+
}
8+
9+
env_vars: {
10+
key: "GOOGLE_CLOUD_PROJECT"
11+
value: "bigframes-load-testing"
12+
}
13+
14+
env_vars: {
15+
key: "BIGFRAMES_TEST_MODEL_VERTEX_ENDPOINT"
16+
value: "https://us-central1-aiplatform.googleapis.com/v1/projects/272725758477/locations/us-central1/endpoints/590545496255234048"
17+
}

.kokoro/load/benchmark.cfg

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
# Format: //devtools/kokoro/config/proto/build.proto
2+
3+
# Only run this nox session.
4+
env_vars: {
5+
key: "NOX_SESSION"
6+
value: "benchmark"
7+
}
8+
9+
env_vars: {
10+
key: "GOOGLE_CLOUD_PROJECT"
11+
value: "bigframes-load-testing"
12+
}
13+
14+
env_vars: {
15+
key: "BIGFRAMES_TEST_MODEL_VERTEX_ENDPOINT"
16+
value: "https://us-central1-aiplatform.googleapis.com/v1/projects/272725758477/locations/us-central1/endpoints/590545496255234048"
17+
}

.kokoro/presubmit/doctest.cfg

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
# Format: //devtools/kokoro/config/proto/build.proto
2+
3+
# Only run this nox session.
4+
env_vars: {
5+
key: "NOX_SESSION"
6+
value: "doctest"
7+
}
8+
9+
env_vars: {
10+
key: "GOOGLE_CLOUD_PROJECT"
11+
value: "bigframes-load-testing"
12+
}
13+
14+
env_vars: {
15+
key: "BIGFRAMES_TEST_MODEL_VERTEX_ENDPOINT"
16+
value: "https://us-central1-aiplatform.googleapis.com/v1/projects/272725758477/locations/us-central1/endpoints/590545496255234048"
17+
}

.kokoro/presubmit/e2e.cfg

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
# Only run this nox session.
44
env_vars: {
55
key: "NOX_SESSION"
6-
value: "e2e doctest notebook unit_prerelease system_prerelease system_noextras"
6+
value: "e2e unit_prerelease system_prerelease system_noextras"
77
}
88

99
env_vars: {

.kokoro/presubmit/notebook.cfg

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
# Format: //devtools/kokoro/config/proto/build.proto
2+
3+
# Only run this nox session.
4+
env_vars: {
5+
key: "NOX_SESSION"
6+
value: "notebook"
7+
}
8+
9+
env_vars: {
10+
key: "GOOGLE_CLOUD_PROJECT"
11+
value: "bigframes-load-testing"
12+
}
13+
14+
env_vars: {
15+
key: "BIGFRAMES_TEST_MODEL_VERTEX_ENDPOINT"
16+
value: "https://us-central1-aiplatform.googleapis.com/v1/projects/272725758477/locations/us-central1/endpoints/590545496255234048"
17+
}

noxfile.py

Lines changed: 50 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,8 @@
7676
SYSTEM_TEST_EXTRAS: List[str] = ["tests"]
7777
SYSTEM_TEST_EXTRAS_BY_PYTHON: Dict[str, List[str]] = {}
7878

79+
LOGGING_NAME_ENV_VAR = "BIGFRAMES_PERFORMANCE_LOG_NAME"
80+
7981
CURRENT_DIRECTORY = pathlib.Path(__file__).parent.absolute()
8082

8183
# Sessions are executed in the order so putting the smaller sessions
@@ -748,8 +750,6 @@ def notebook(session: nox.Session):
748750
"--nbmake-timeout=900", # 15 minutes
749751
]
750752

751-
logging_name_env_var = "BIGFRAMES_PERFORMANCE_LOG_NAME"
752-
753753
try:
754754
# Populate notebook parameters and make a backup so that the notebooks
755755
# are runnable.
@@ -763,10 +763,10 @@ def notebook(session: nox.Session):
763763
# takes an environment variable for performance logging
764764
processes = []
765765
for notebook in notebooks:
766-
session.env[logging_name_env_var] = os.path.basename(notebook)
767766
process = Process(
768767
target=session.run,
769768
args=(*pytest_command, notebook),
769+
kwargs={"env": {LOGGING_NAME_ENV_VAR: os.path.basename(notebook)}},
770770
)
771771
process.start()
772772
processes.append(process)
@@ -788,11 +788,15 @@ def notebook(session: nox.Session):
788788
processes = []
789789
for notebook, regions in notebooks_reg.items():
790790
for region in regions:
791-
session.env[logging_name_env_var] = os.path.basename(notebook)
792791
process = Process(
793792
target=session.run,
794793
args=(*pytest_command, notebook),
795-
kwargs={"env": {"BIGQUERY_LOCATION": region}},
794+
kwargs={
795+
"env": {
796+
"BIGQUERY_LOCATION": region,
797+
LOGGING_NAME_ENV_VAR: os.path.basename(notebook),
798+
}
799+
},
796800
)
797801
process.start()
798802
processes.append(process)
@@ -803,24 +807,59 @@ def notebook(session: nox.Session):
803807
# when the environment variable is set as it is above,
804808
# notebooks output a .bytesprocessed and .slotmillis report
805809
# collect those reports and print a summary
806-
_print_performance_report()
810+
_print_performance_report("notebooks/")
811+
812+
813+
@nox.session(python=SYSTEM_TEST_PYTHON_VERSIONS)
814+
def benchmark(session: nox.Session):
815+
session.install("-e", ".[all]")
816+
817+
benchmark_script_list = list(Path("scripts/benchmark/").glob("*.py"))
818+
819+
# Run benchmarks in parallel session.run's, since each benchmark
820+
# takes an environment variable for performance logging
821+
processes = []
822+
for benchmark in benchmark_script_list:
823+
process = Process(
824+
target=session.run,
825+
args=("python", benchmark),
826+
kwargs={
827+
"env": {
828+
LOGGING_NAME_ENV_VAR: "scripts/benchmark/"
829+
+ os.path.basename(benchmark)
830+
}
831+
},
832+
)
833+
process.start()
834+
processes.append(process)
835+
836+
for process in processes:
837+
process.join()
838+
839+
# when the environment variable is set as it is above,
840+
# notebooks output a .bytesprocessed and .slotmillis report
841+
# collect those reports and print a summary
842+
_print_performance_report("scripts/")
807843

808844

809-
def _print_performance_report():
845+
def _print_performance_report(path: str):
810846
"""Add an informational report about http queries, bytes
811847
processed, and slot time to the testlog output for purposes
812848
of measuring bigquery-related performance changes.
849+
850+
Looks specifically for output files in subfolders of the
851+
passed path. (*/*.bytesprocessed and */*.slotmillis)
813852
"""
814853
print("---BIGQUERY USAGE REPORT---")
815854
results_dict = {}
816-
for bytes_report in Path("notebooks/").glob("*/*.bytesprocessed"):
855+
for bytes_report in Path(path).glob("*/*.bytesprocessed"):
817856
with open(bytes_report, "r") as bytes_file:
818857
filename = bytes_report.stem
819858
lines = bytes_file.read().splitlines()
820859
query_count = len(lines)
821860
total_bytes = sum([int(line) for line in lines])
822861
results_dict[filename] = [query_count, total_bytes]
823-
for millis_report in Path("notebooks/").glob("*/*.slotmillis"):
862+
for millis_report in Path(path).glob("*/*.slotmillis"):
824863
with open(millis_report, "r") as millis_file:
825864
filename = millis_report.stem
826865
lines = millis_file.read().splitlines()
@@ -830,7 +869,7 @@ def _print_performance_report():
830869
cumulative_queries = 0
831870
cumulative_bytes = 0
832871
cumulative_slot_millis = 0
833-
for results in results_dict.values():
872+
for name, results in results_dict.items():
834873
if len(results) != 3:
835874
raise IOError(
836875
"Mismatch in performance logging output. "
@@ -842,7 +881,7 @@ def _print_performance_report():
842881
cumulative_bytes += total_bytes
843882
cumulative_slot_millis += total_slot_millis
844883
print(
845-
f"{filename} - query count: {query_count},"
884+
f"{name} - query count: {query_count},"
846885
f" bytes processed sum: {total_bytes},"
847886
f" slot millis sum: {total_slot_millis}"
848887
)

scripts/benchmark/simple_benchmark.py

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
# Copyright 2024 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# https://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
import bigframes.pandas as bpd
16+
17+
# This is a placeholder benchmark.
18+
# TODO(340278185): Add more data analysis tasks and benchmark files
19+
# like this one.
20+
21+
print("Performing simple benchmark.")
22+
df = bpd.DataFrame()
23+
df["column_1"] = bpd.Series([i for i in range(100000)])
24+
df["column_2"] = bpd.Series([i * 2 for i in range(100000)])
25+
df["column_3"] = df["column_1"] + df["column_2"]
26+
df.__repr__()
27+
bpd.reset_session()

0 commit comments

Comments
 (0)