Skip to content

Commit 11f2bb8

Browse files
authored
Fix comparison kernel benchmarks (#6147)
* fix comparison kernel benchmarks * add comment as suggested by @alamb
1 parent 80ed712 commit 11f2bb8

File tree

2 files changed

+36
-13
lines changed

2 files changed

+36
-13
lines changed

arrow/benches/comparison_kernels.rs

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -215,11 +215,11 @@ fn add_benchmark(c: &mut Criterion) {
215215
});
216216

217217
c.bench_function("like_utf8 scalar ends with", |b| {
218-
b.iter(|| bench_like_utf8_scalar(&arr_string, "xxxx%"))
218+
b.iter(|| bench_like_utf8_scalar(&arr_string, "%xxxx"))
219219
});
220220

221221
c.bench_function("like_utf8 scalar starts with", |b| {
222-
b.iter(|| bench_like_utf8_scalar(&arr_string, "%xxxx"))
222+
b.iter(|| bench_like_utf8_scalar(&arr_string, "xxxx%"))
223223
});
224224

225225
c.bench_function("like_utf8 scalar complex", |b| {
@@ -237,11 +237,11 @@ fn add_benchmark(c: &mut Criterion) {
237237
});
238238

239239
c.bench_function("like_utf8view scalar ends with", |b| {
240-
b.iter(|| bench_like_utf8view_scalar(&string_view_left, "xxxx%"))
240+
b.iter(|| bench_like_utf8view_scalar(&string_view_left, "%xxxx"))
241241
});
242242

243243
c.bench_function("like_utf8view scalar starts with", |b| {
244-
b.iter(|| bench_like_utf8view_scalar(&string_view_left, "%xxxx"))
244+
b.iter(|| bench_like_utf8view_scalar(&string_view_left, "xxxx%"))
245245
});
246246

247247
c.bench_function("like_utf8view scalar complex", |b| {
@@ -259,11 +259,11 @@ fn add_benchmark(c: &mut Criterion) {
259259
});
260260

261261
c.bench_function("nlike_utf8 scalar ends with", |b| {
262-
b.iter(|| bench_nlike_utf8_scalar(&arr_string, "xxxx%"))
262+
b.iter(|| bench_nlike_utf8_scalar(&arr_string, "%xxxx"))
263263
});
264264

265265
c.bench_function("nlike_utf8 scalar starts with", |b| {
266-
b.iter(|| bench_nlike_utf8_scalar(&arr_string, "%xxxx"))
266+
b.iter(|| bench_nlike_utf8_scalar(&arr_string, "xxxx%"))
267267
});
268268

269269
c.bench_function("nlike_utf8 scalar complex", |b| {
@@ -281,11 +281,11 @@ fn add_benchmark(c: &mut Criterion) {
281281
});
282282

283283
c.bench_function("ilike_utf8 scalar ends with", |b| {
284-
b.iter(|| bench_ilike_utf8_scalar(&arr_string, "xXXx%"))
284+
b.iter(|| bench_ilike_utf8_scalar(&arr_string, "%xXXx"))
285285
});
286286

287287
c.bench_function("ilike_utf8 scalar starts with", |b| {
288-
b.iter(|| bench_ilike_utf8_scalar(&arr_string, "%XXXx"))
288+
b.iter(|| bench_ilike_utf8_scalar(&arr_string, "XXXx%"))
289289
});
290290

291291
c.bench_function("ilike_utf8 scalar complex", |b| {
@@ -303,11 +303,11 @@ fn add_benchmark(c: &mut Criterion) {
303303
});
304304

305305
c.bench_function("nilike_utf8 scalar ends with", |b| {
306-
b.iter(|| bench_nilike_utf8_scalar(&arr_string, "xXXx%"))
306+
b.iter(|| bench_nilike_utf8_scalar(&arr_string, "%xXXx"))
307307
});
308308

309309
c.bench_function("nilike_utf8 scalar starts with", |b| {
310-
b.iter(|| bench_nilike_utf8_scalar(&arr_string, "%XXXx"))
310+
b.iter(|| bench_nilike_utf8_scalar(&arr_string, "XXXx%"))
311311
});
312312

313313
c.bench_function("nilike_utf8 scalar complex", |b| {

arrow/src/util/bench_util.rs

Lines changed: 26 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,7 @@ pub fn create_month_day_nano_array_with_seed(
9090
.collect()
9191
}
9292

93-
/// Creates an random (but fixed-seeded) array of a given size and null density
93+
/// Creates a random (but fixed-seeded) array of a given size and null density
9494
pub fn create_boolean_array(size: usize, null_density: f32, true_density: f32) -> BooleanArray
9595
where
9696
Standard: Distribution<bool>,
@@ -108,12 +108,35 @@ where
108108
.collect()
109109
}
110110

111-
/// Creates an random (but fixed-seeded) array of a given size and null density
111+
/// Creates a random (but fixed-seeded) string array of a given size and null density, strings have a random length
112+
/// between 0 and 400 alphanumeric characters. `0..400` is chosen to cover a wide range of common string lengths,
113+
/// which have a dramatic impact on performance of some queries, e.g. LIKE/ILIKE/regex.
112114
pub fn create_string_array<Offset: OffsetSizeTrait>(
113115
size: usize,
114116
null_density: f32,
115117
) -> GenericStringArray<Offset> {
116-
create_string_array_with_len(size, null_density, 4)
118+
create_string_array_with_max_len(size, null_density, 400)
119+
}
120+
121+
/// Creates a random (but fixed-seeded) array of rand size with a given max size, null density and length
122+
fn create_string_array_with_max_len<Offset: OffsetSizeTrait>(
123+
size: usize,
124+
null_density: f32,
125+
max_str_len: usize,
126+
) -> GenericStringArray<Offset> {
127+
let rng = &mut seedable_rng();
128+
(0..size)
129+
.map(|_| {
130+
if rng.gen::<f32>() < null_density {
131+
None
132+
} else {
133+
let str_len = rng.gen_range(0..max_str_len);
134+
let value = rng.sample_iter(&Alphanumeric).take(str_len).collect();
135+
let value = String::from_utf8(value).unwrap();
136+
Some(value)
137+
}
138+
})
139+
.collect()
117140
}
118141

119142
/// Creates a random (but fixed-seeded) array of a given size, null density and length

0 commit comments

Comments
 (0)