@@ -95,55 +95,62 @@ def collect_benchmark_result(
95
95
if not (
96
96
len(bytes_files)
97
97
== len(millis_files)
98
- == len(local_seconds_files)
99
98
== len(bq_seconds_files)
100
- == len(query_char_count_files)
99
+ <= len(query_char_count_files)
100
+ == len(local_seconds_files)
101
101
):
102
102
raise ValueError(
103
103
"Mismatch in the number of report files for bytes, millis, seconds and query char count."
104
104
)
105
105
106
- for idx in range(len(bytes_files)):
107
- bytes_file = bytes_files[idx]
108
- millis_file = millis_files[idx]
109
- bq_seconds_file = bq_seconds_files[idx]
110
- query_char_count_file = query_char_count_files[idx]
111
-
112
- filename = bytes_file.relative_to(path).with_suffix("")
113
-
114
- if filename != millis_file.relative_to(path).with_suffix(
115
- ""
116
- ) or filename != bq_seconds_file.relative_to(path).with_suffix(""):
117
- raise ValueError(
118
- "File name mismatch among bytes, millis, and seconds reports."
119
- )
106
+ has_full_metrics = len(bq_seconds_files) == len(local_seconds_files)
120
107
108
+ for idx in range(len(local_seconds_files)):
109
+ query_char_count_file = query_char_count_files[idx]
121
110
local_seconds_file = local_seconds_files[idx]
111
+ filename = query_char_count_file.relative_to(path).with_suffix("")
122
112
if filename != local_seconds_file.relative_to(path).with_suffix(""):
123
113
raise ValueError(
124
- "File name mismatch among bytes, millis, and seconds reports."
114
+ "File name mismatch between query_char_count and seconds reports."
125
115
)
126
116
127
- with open(bytes_file , "r") as file:
117
+ with open(query_char_count_file , "r") as file:
128
118
lines = file.read().splitlines()
119
+ query_char_count = sum(int(line) for line in lines) / iterations
129
120
query_count = len(lines) / iterations
130
- total_bytes = sum(int(line) for line in lines) / iterations
131
-
132
- with open(millis_file, "r") as file:
133
- lines = file.read().splitlines()
134
- total_slot_millis = sum(int(line) for line in lines) / iterations
135
121
136
122
with open(local_seconds_file, "r") as file:
137
123
lines = file.read().splitlines()
138
124
local_seconds = sum(float(line) for line in lines) / iterations
139
125
140
- with open(bq_seconds_file, "r") as file:
141
- lines = file.read().splitlines()
142
- bq_seconds = sum(float(line) for line in lines) / iterations
126
+ if not has_full_metrics:
127
+ total_bytes = None
128
+ total_slot_millis = None
129
+ bq_seconds = None
130
+ else:
131
+ bytes_file = bytes_files[idx]
132
+ millis_file = millis_files[idx]
133
+ bq_seconds_file = bq_seconds_files[idx]
134
+ if (
135
+ filename != bytes_file.relative_to(path).with_suffix("")
136
+ or filename != millis_file.relative_to(path).with_suffix("")
137
+ or filename != bq_seconds_file.relative_to(path).with_suffix("")
138
+ ):
139
+ raise ValueError(
140
+ "File name mismatch among query_char_count, bytes, millis, and seconds reports."
141
+ )
143
142
144
- with open(query_char_count_file, "r") as file:
145
- lines = file.read().splitlines()
146
- query_char_count = sum(int(line) for line in lines) / iterations
143
+ with open(bytes_file, "r") as file:
144
+ lines = file.read().splitlines()
145
+ total_bytes = sum(int(line) for line in lines) / iterations
146
+
147
+ with open(millis_file, "r") as file:
148
+ lines = file.read().splitlines()
149
+ total_slot_millis = sum(int(line) for line in lines) / iterations
150
+
151
+ with open(bq_seconds_file, "r") as file:
152
+ lines = file.read().splitlines()
153
+ bq_seconds = sum(float(line) for line in lines) / iterations
147
154
148
155
results_dict[str(filename)] = [
149
156
query_count,
@@ -194,11 +201,19 @@ def collect_benchmark_result(
194
201
)
195
202
print(
196
203
f"{index} - query count: {row['Query_Count']},"
197
- f" query char count: {row['Query_Char_Count']},",
198
- f" bytes processed sum: {row['Bytes_Processed']},"
199
- f" slot millis sum: {row['Slot_Millis']},"
200
- f" local execution time: {formatted_local_exec_time} seconds,"
201
- f" bigquery execution time: {round(row['BigQuery_Execution_Time_Sec'], 1)} seconds",
204
+ + f" query char count: {row['Query_Char_Count']},"
205
+ + (
206
+ f" bytes processed sum: {row['Bytes_Processed']},"
207
+ if has_full_metrics
208
+ else ""
209
+ )
210
+ + (f" slot millis sum: {row['Slot_Millis']}," if has_full_metrics else "")
211
+ + f" local execution time: {formatted_local_exec_time} seconds"
212
+ + (
213
+ f", bigquery execution time: {round(row['BigQuery_Execution_Time_Sec'], 1)} seconds"
214
+ if has_full_metrics
215
+ else ""
216
+ )
202
217
)
203
218
204
219
geometric_mean_queries = geometric_mean_excluding_zeros(
@@ -221,12 +236,24 @@ def collect_benchmark_result(
221
236
)
222
237
223
238
print(
224
- f"---Geometric mean of queries: {geometric_mean_queries}, "
225
- f"Geometric mean of queries char counts: {geometric_mean_query_char_count}, "
226
- f"Geometric mean of bytes processed: {geometric_mean_bytes}, "
227
- f"Geometric mean of slot millis: {geometric_mean_slot_millis}, "
228
- f"Geometric mean of local execution time: {geometric_mean_local_seconds} seconds, "
229
- f"Geometric mean of BigQuery execution time: {geometric_mean_bq_seconds} seconds---"
239
+ f"---Geometric mean of queries: {geometric_mean_queries},"
240
+ + f" Geometric mean of queries char counts: {geometric_mean_query_char_count},"
241
+ + (
242
+ f" Geometric mean of bytes processed: {geometric_mean_bytes},"
243
+ if has_full_metrics
244
+ else ""
245
+ )
246
+ + (
247
+ f" Geometric mean of slot millis: {geometric_mean_slot_millis},"
248
+ if has_full_metrics
249
+ else ""
250
+ )
251
+ + f" Geometric mean of local execution time: {geometric_mean_local_seconds} seconds"
252
+ + (
253
+ f", Geometric mean of BigQuery execution time: {geometric_mean_bq_seconds} seconds---"
254
+ if has_full_metrics
255
+ else ""
256
+ )
230
257
)
231
258
232
259
error_message = (
0 commit comments