Commit bfe5e8c
committed
Auto merge of #128204 - GuillaumeGomez:integers-opti, r=workingjubilee
Small optimization for integers Display implementation
This is a first pass to try to speed up a bit integers `Display` implementation. The idea behind this is to reduce the stack usage for the buffer storing the output (shouldn't be visible in bench normally) and some small specialization which benefits a lot to smaller integers like `u8` and `i8`.
Here are the results of the benchmarks:
| bench name | current std | with this PR |
|-|-|-|
| bench_std_fmt::bench_i16_0 | 16.45 ns/iter (+/- 0.25) | 16.50 ns/iter (+/- 0.15) |
| bench_std_fmt::bench_i16_max | 17.83 ns/iter (+/- 0.66) | 17.58 ns/iter (+/- 0.10) |
| bench_std_fmt::bench_i16_min | 20.97 ns/iter (+/- 0.49) | 20.50 ns/iter (+/- 0.28) |
| bench_std_fmt::bench_i32_0 | 16.63 ns/iter (+/- 0.06) | 16.62 ns/iter (+/- 0.07) |
| bench_std_fmt::bench_i32_max | 19.79 ns/iter (+/- 0.43) | 19.55 ns/iter (+/- 0.14) |
| bench_std_fmt::bench_i32_min | 22.97 ns/iter (+/- 0.50) | 22.08 ns/iter (+/- 0.08) |
| bench_std_fmt::bench_i64_0 | 16.63 ns/iter (+/- 0.39) | 16.69 ns/iter (+/- 0.44) |
| bench_std_fmt::bench_i64_half | 19.60 ns/iter (+/- 0.05) | 19.10 ns/iter (+/- 0.05) |
| bench_std_fmt::bench_i64_max | 25.22 ns/iter (+/- 0.34) | 24.43 ns/iter (+/- 0.02) |
| bench_std_fmt::bench_i8_0 | 16.27 ns/iter (+/- 0.32) | 15.80 ns/iter (+/- 0.17) |
| bench_std_fmt::bench_i8_max | 16.71 ns/iter (+/- 0.09) | 16.25 ns/iter (+/- 0.01) |
| bench_std_fmt::bench_i8_min | 20.07 ns/iter (+/- 0.22) | 19.80 ns/iter (+/- 0.30) |
| bench_std_fmt::bench_u128_0 | 21.37 ns/iter (+/- 0.24) | 21.35 ns/iter (+/- 0.35) |
| bench_std_fmt::bench_u128_max | 48.13 ns/iter (+/- 0.20) | 48.78 ns/iter (+/- 0.29) |
| bench_std_fmt::bench_u16_0 | 16.48 ns/iter (+/- 0.46) | 16.03 ns/iter (+/- 0.39) |
| bench_std_fmt::bench_u16_max | 17.31 ns/iter (+/- 0.32) | 17.41 ns/iter (+/- 0.32) |
| bench_std_fmt::bench_u16_min | 16.40 ns/iter (+/- 0.45) | 16.02 ns/iter (+/- 0.39) |
| bench_std_fmt::bench_u32_0 | 16.17 ns/iter (+/- 0.04) | 16.29 ns/iter (+/- 0.16) |
| bench_std_fmt::bench_u32_max | 19.00 ns/iter (+/- 0.10) | 19.16 ns/iter (+/- 0.28) |
| bench_std_fmt::bench_u32_min | 16.16 ns/iter (+/- 0.09) | 16.28 ns/iter (+/- 0.11) |
| bench_std_fmt::bench_u64_0 | 16.22 ns/iter (+/- 0.22) | 16.14 ns/iter (+/- 0.18) |
| bench_std_fmt::bench_u64_half | 19.25 ns/iter (+/- 0.07) | 18.95 ns/iter (+/- 0.05) |
| bench_std_fmt::bench_u64_max | 24.31 ns/iter (+/- 0.08) | 24.18 ns/iter (+/- 0.08) |
| bench_std_fmt::bench_u8_0 | 15.76 ns/iter (+/- 0.08) | 15.66 ns/iter (+/- 0.08) |
| bench_std_fmt::bench_u8_max | 16.53 ns/iter (+/- 0.03) | 16.29 ns/iter (+/- 0.02) |
| bench_std_fmt::bench_u8_min | 15.77 ns/iter (+/- 0.06) | 15.67 ns/iter (+/- 0.02) |
The source code is:
<details>
<summary>source code</summary>
```rust
#![feature(test)]
#![allow(non_snake_case)]
#![allow(clippy::cast_lossless)]
extern crate test;
macro_rules! benches {
($($name:ident($value:expr))*) => {
mod bench_std_fmt {
use std::io::Write;
use test::{Bencher, black_box};
$(
#[bench]
fn $name(b: &mut Bencher) {
let mut buf = Vec::with_capacity(40);
b.iter(|| {
buf.clear();
write!(&mut buf, "{}", black_box($value)).unwrap();
black_box(&buf);
});
}
)*
}
}
}
benches! {
bench_u64_0(0u64)
bench_u64_half(u32::max_value() as u64)
bench_u64_max(u64::max_value())
bench_i64_0(0i64)
bench_i64_half(i32::max_value() as i64)
bench_i64_max(i64::max_value())
bench_u16_0(0u16)
bench_u16_min(u16::min_value())
bench_u16_max(u16::max_value())
bench_i16_0(0i16)
bench_i16_min(i16::min_value())
bench_i16_max(i16::max_value())
bench_u128_0(0u128)
bench_u128_max(u128::max_value())
bench_i8_0(0i8)
bench_i8_min(i8::min_value())
bench_i8_max(i8::max_value())
bench_u8_0(0u8)
bench_u8_min(u8::min_value())
bench_u8_max(u8::max_value())
bench_u32_0(0u32)
bench_u32_min(u32::min_value())
bench_u32_max(u32::max_value())
bench_i32_0(0i32)
bench_i32_min(i32::min_value())
bench_i32_max(i32::max_value())
}
```
</details>
And then I ran the equivalent code (source code below) in callgrind with [callgrind_differ](https://github.com/Ethiraric/callgrind_differ) to generate a nice output and here's the result:
```
core::fmt::num::imp::<impl core::fmt::Display for i16>::fmt | 1300000 | - 70000 - 5.385% 1230000
core::fmt::num::imp::<impl core::fmt::Display for i32>::fmt | 1910000 | - 100000 - 5.236% 1810000
core::fmt::num::imp::<impl core::fmt::Display for i64>::fmt | 2430000 | - 110000 - 4.527% 2320000
core::fmt::num::imp::<impl core::fmt::Display for i8>::fmt | 1080000 | - 170000 - 15.741% 910000
core::fmt::num::imp::<impl core::fmt::Display for u16>::fmt | 960000 | + 10000 + 1.042% 970000
core::fmt::num::imp::<impl core::fmt::Display for u32>::fmt | 1300000 | + 30000 + 2.308% 1330000
core::fmt::num::imp::<impl core::fmt::Display for u8>::fmt | 820000 | - 30000 - 3.659% 790000
```
<details>
<summary>Source code</summary>
```rust
#![feature(test)]
extern crate test;
use std::io::{stdout, Write};
use std::io::StdoutLock;
use test::black_box;
macro_rules! benches {
($handle:ident, $buf:ident, $($name:ident($value:expr))*) => {
$(
fn $name(handle: &mut StdoutLock, buf: &mut Vec<u8>) {
for _ in 0..10000 {
buf.clear();
write!(buf, "{}", black_box($value)).unwrap();
handle.write_all(buf);
}
}
$name(&mut $handle, &mut $buf);
)*
}
}
fn main() {
let mut handle = stdout().lock();
let mut buf = Vec::with_capacity(40);
benches! {
handle, buf,
bench_u64_0(0u64)
bench_u64_half(u32::max_value() as u64)
bench_u64_max(u64::max_value())
bench_i64_0(0i64)
bench_i64_half(i32::max_value() as i64)
bench_i64_max(i64::max_value())
bench_u16_0(0u16)
bench_u16_min(u16::min_value())
bench_u16_max(u16::max_value())
bench_i16_0(0i16)
bench_i16_min(i16::min_value())
bench_i16_max(i16::max_value())
bench_u128_0(0u128)
bench_u128_max(u128::max_value())
bench_i8_0(0i8)
bench_i8_min(i8::min_value())
bench_i8_max(i8::max_value())
bench_u8_0(0u8)
bench_u8_min(u8::min_value())
bench_u8_max(u8::max_value())
bench_i32_0(0i32)
bench_i32_min(i32::min_value())
bench_i32_max(i32::max_value())
bench_u32_0(0u32)
bench_u32_min(u32::min_value())
bench_u32_max(u32::max_value())
}
}
```
</details>
The next step would be to specialize the `ToString` implementation so it doesn't go through the `Display` trait. I'm not sure if it will improve anything but I think it's worth a try.
r? `@Amanieu`1 file changed
+131
-81
lines changed| Original file line number | Diff line number | Diff line change | |
|---|---|---|---|
| |||
208 | 208 | | |
209 | 209 | | |
210 | 210 | | |
211 | | - | |
212 | | - | |
213 | | - | |
214 | | - | |
215 | | - | |
216 | | - | |
217 | | - | |
218 | | - | |
| 211 | + | |
219 | 212 | | |
220 | | - | |
221 | | - | |
222 | | - | |
223 | | - | |
224 | | - | |
225 | | - | |
226 | | - | |
227 | | - | |
228 | | - | |
229 | | - | |
| 213 | + | |
| 214 | + | |
| 215 | + | |
| 216 | + | |
| 217 | + | |
| 218 | + | |
| 219 | + | |
230 | 220 | | |
231 | | - | |
232 | | - | |
233 | | - | |
234 | | - | |
| 221 | + | |
| 222 | + | |
| 223 | + | |
| 224 | + | |
| 225 | + | |
| 226 | + | |
| 227 | + | |
| 228 | + | |
| 229 | + | |
| 230 | + | |
| 231 | + | |
| 232 | + | |
| 233 | + | |
| 234 | + | |
| 235 | + | |
| 236 | + | |
| 237 | + | |
| 238 | + | |
| 239 | + | |
| 240 | + | |
| 241 | + | |
| 242 | + | |
| 243 | + | |
| 244 | + | |
| 245 | + | |
| 246 | + | |
235 | 247 | | |
236 | | - | |
237 | | - | |
238 | | - | |
| 248 | + | |
| 249 | + | |
| 250 | + | |
| 251 | + | |
| 252 | + | |
| 253 | + | |
| 254 | + | |
| 255 | + | |
| 256 | + | |
| 257 | + | |
| 258 | + | |
| 259 | + | |
| 260 | + | |
| 261 | + | |
| 262 | + | |
| 263 | + | |
| 264 | + | |
| 265 | + | |
| 266 | + | |
| 267 | + | |
| 268 | + | |
| 269 | + | |
| 270 | + | |
| 271 | + | |
| 272 | + | |
| 273 | + | |
| 274 | + | |
| 275 | + | |
| 276 | + | |
| 277 | + | |
| 278 | + | |
| 279 | + | |
| 280 | + | |
| 281 | + | |
| 282 | + | |
| 283 | + | |
| 284 | + | |
| 285 | + | |
| 286 | + | |
239 | 287 | | |
240 | | - | |
241 | | - | |
242 | | - | |
243 | | - | |
244 | | - | |
245 | | - | |
| 288 | + | |
| 289 | + | |
246 | 290 | | |
247 | | - | |
248 | | - | |
| 291 | + | |
| 292 | + | |
| 293 | + | |
| 294 | + | |
| 295 | + | |
| 296 | + | |
| 297 | + | |
249 | 298 | | |
250 | | - | |
251 | | - | |
252 | | - | |
253 | | - | |
254 | | - | |
255 | | - | |
| 299 | + | |
| 300 | + | |
| 301 | + | |
| 302 | + | |
| 303 | + | |
| 304 | + | |
| 305 | + | |
| 306 | + | |
| 307 | + | |
| 308 | + | |
| 309 | + | |
256 | 310 | | |
257 | 311 | | |
258 | | - | |
259 | | - | |
260 | | - | |
261 | | - | |
262 | | - | |
263 | | - | |
264 | | - | |
265 | | - | |
266 | | - | |
| 312 | + | |
| 313 | + | |
| 314 | + | |
| 315 | + | |
| 316 | + | |
| 317 | + | |
| 318 | + | |
267 | 319 | | |
268 | | - | |
269 | | - | |
270 | | - | |
271 | | - | |
272 | | - | |
273 | | - | |
274 | | - | |
275 | | - | |
276 | | - | |
| 320 | + | |
277 | 321 | | |
278 | 322 | | |
279 | | - | |
| 323 | + | |
280 | 324 | | |
281 | 325 | | |
282 | 326 | | |
| |||
306 | 350 | | |
307 | 351 | | |
308 | 352 | | |
309 | | - | |
310 | | - | |
311 | | - | |
312 | | - | |
313 | | - | |
314 | | - | |
315 | | - | |
316 | | - | |
317 | | - | |
318 | | - | |
319 | | - | |
320 | | - | |
321 | | - | |
322 | | - | |
323 | | - | |
324 | 353 | | |
325 | 354 | | |
326 | 355 | | |
| |||
374 | 403 | | |
375 | 404 | | |
376 | 405 | | |
377 | | - | |
378 | 406 | | |
379 | 407 | | |
380 | 408 | | |
| |||
469 | 497 | | |
470 | 498 | | |
471 | 499 | | |
472 | | - | |
| 500 | + | |
473 | 501 | | |
474 | 502 | | |
475 | 503 | | |
| |||
484 | 512 | | |
485 | 513 | | |
486 | 514 | | |
487 | | - | |
| 515 | + | |
488 | 516 | | |
489 | 517 | | |
490 | 518 | | |
| |||
499 | 527 | | |
500 | 528 | | |
501 | 529 | | |
502 | | - | |
503 | | - | |
| 530 | + | |
| 531 | + | |
| 532 | + | |
| 533 | + | |
| 534 | + | |
| 535 | + | |
| 536 | + | |
| 537 | + | |
| 538 | + | |
| 539 | + | |
| 540 | + | |
504 | 541 | | |
505 | 542 | | |
506 | 543 | | |
| |||
511 | 548 | | |
512 | 549 | | |
513 | 550 | | |
514 | | - | |
515 | | - | |
| 551 | + | |
| 552 | + | |
| 553 | + | |
| 554 | + | |
| 555 | + | |
| 556 | + | |
| 557 | + | |
| 558 | + | |
| 559 | + | |
| 560 | + | |
| 561 | + | |
| 562 | + | |
| 563 | + | |
| 564 | + | |
| 565 | + | |
516 | 566 | | |
517 | 567 | | |
518 | 568 | | |
| |||
619 | 669 | | |
620 | 670 | | |
621 | 671 | | |
622 | | - | |
| 672 | + | |
623 | 673 | | |
624 | 674 | | |
625 | 675 | | |
| |||
0 commit comments