Skip to content

Commit 0d8f690

Browse files
Pre-process start/end points for commit graph
This is a 5x improvement locally as we avoid re-scanning the data for ~every missing run and commit. No changes to the output have resulted, as best as I can tell.
1 parent 2af67e3 commit 0d8f690

File tree

1 file changed

+112
-40
lines changed

1 file changed

+112
-40
lines changed

site/src/load.rs

Lines changed: 112 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -283,6 +283,7 @@ impl InputData {
283283
}
284284

285285
eprintln!("Starting interpolation...");
286+
let mut latest_section_start = ::std::time::Instant::now();
286287
let start = ::std::time::Instant::now();
287288
let data_real = data.clone();
288289
let mut interpolated = HashMap::new();
@@ -308,11 +309,78 @@ impl InputData {
308309
}
309310
}
310311
}
311-
trace!("computed current benchmarks and runs, in {:?}", start.elapsed());
312+
trace!("computed current benchmarks and runs, in {:?}", latest_section_start.elapsed());
313+
latest_section_start = ::std::time::Instant::now();
314+
315+
let mut last_commit = Vec::with_capacity(data_next.len());
316+
let mut next_commit = Vec::with_capacity(data_next.len());
317+
318+
let mut last_seen = HashMap::new();
319+
for needle in data_commits.iter() {
320+
for (name, value) in &data_real[needle].benchmarks {
321+
if value.is_ok() {
322+
last_seen.insert(name.clone(), needle.clone());
323+
}
324+
}
325+
last_commit.push(last_seen.clone());
326+
}
327+
last_seen.clear();
328+
for needle in data_commits.iter().rev() {
329+
for (name, value) in &data_real[needle].benchmarks {
330+
if value.is_ok() {
331+
last_seen.insert(name.clone(), needle.clone());
332+
}
333+
}
334+
next_commit.push(last_seen.clone());
335+
}
336+
next_commit.reverse();
337+
338+
trace!("computed start/ends of benchmark holes in {:?}", latest_section_start.elapsed());
339+
latest_section_start = ::std::time::Instant::now();
340+
341+
// Find the earliest and latest (scanning from left and from right) runs for every
342+
// benchmark
343+
344+
let mut last_run = Vec::with_capacity(data_next.len());
345+
let mut next_run = Vec::with_capacity(data_next.len());
346+
347+
let mut last_seen = HashMap::new();
348+
for (idx, needle) in data_commits.iter().enumerate() {
349+
for (name, value) in &data_real[needle].benchmarks {
350+
if let Ok(bench) = value {
351+
let mut e = last_seen
352+
.entry(name.clone())
353+
.or_insert_with(HashMap::new);
354+
for run in bench.runs.iter() {
355+
e.insert(run.id(), (idx, run.clone()));
356+
}
357+
}
358+
}
359+
last_run.push(last_seen.clone());
360+
}
361+
last_seen.clear();
362+
for (idx, needle) in data_commits.iter().enumerate().rev() {
363+
for (name, value) in &data_real[needle].benchmarks {
364+
if let Ok(bench) = value {
365+
let mut e = last_seen
366+
.entry(name.clone())
367+
.or_insert_with(HashMap::new);
368+
for run in bench.runs.iter() {
369+
e.insert(run.id(), (idx, run.clone()));
370+
}
371+
}
372+
}
373+
next_run.push(last_seen.clone());
374+
}
375+
next_run.reverse();
376+
377+
trace!("computed start/ends of run holes in {:?}", latest_section_start.elapsed());
378+
latest_section_start = ::std::time::Instant::now();
312379

313380
// The data holds this tree:
314381
// [commit] -> [benchmark] -> [run] -> [stat]
315382

383+
let mut dur = ::std::time::Duration::new(0, 0);
316384
for (commit, cd) in &mut data_next {
317385
for benchmark_name in &current_benchmarks {
318386
// We do not interpolate try commits today
@@ -328,6 +396,11 @@ impl InputData {
328396
commits: &data_commits,
329397
commit_map: &commit_map,
330398
interpolated: &mut interpolated,
399+
last_seen_commit: &last_commit,
400+
next_seen_commit: &next_commit,
401+
last_seen_run: &last_run,
402+
next_seen_run: &next_run,
403+
dur: &mut dur,
331404
};
332405

333406
let entry = cd.benchmarks.entry(benchmark_name.to_owned())
@@ -360,11 +433,13 @@ impl InputData {
360433
}
361434
}
362435
}
436+
trace!("total time finding runs: {:?}", dur);
363437

364438
let interpolated = interpolated.into_iter()
365439
.filter(|(_, v)| !v.is_empty())
366440
.collect::<HashMap<_, _>>();
367441

442+
trace!("finished primary interpolation in {:?}", latest_section_start.elapsed());
368443
eprintln!("Interpolation of {} commits complete in {:?}", interpolated.len(), start.elapsed());
369444
let data = data_next;
370445

@@ -426,42 +501,41 @@ struct AssociatedData<'a> {
426501
commits: &'a [Commit],
427502
commit_map: &'a HashMap<Commit, usize>,
428503
interpolated: &'a mut HashMap<String, Vec<Interpolation>>,
504+
505+
// By benchmark name
506+
last_seen_commit: &'a [HashMap<String, Commit>],
507+
next_seen_commit: &'a [HashMap<String, Commit>],
508+
509+
last_seen_run: &'a [HashMap<String, HashMap<RunId, (usize, Run)>>],
510+
next_seen_run: &'a [HashMap<String, HashMap<RunId, (usize, Run)>>],
511+
512+
dur: &'a mut ::std::time::Duration,
429513
}
430514

431515
// This function can assume that the benchmark exists and is restricted to filling in runs within
432516
// the benchmark.
433517
fn fill_benchmark_runs(benchmark: &mut Benchmark, missing_runs: Vec<&RunId>, data: &mut AssociatedData) {
434-
fn find_run<'a, I>(benchmark: &str, needle_run: &RunId, commits: I, data: &AssociatedData) -> Option<(usize, Run)>
435-
where I: Iterator<Item=&'a Commit>,
436-
{
437-
for (idx, needle) in commits.enumerate() {
438-
let bench = data.data[needle].benchmarks.get(benchmark);
439-
if let Some(Ok(bench)) = bench {
440-
if let Some(run) = bench.runs.iter().find(|run| **run == *needle_run) {
441-
return Some((idx, run.clone()));
442-
}
443-
}
444-
}
445-
None
446-
}
447-
448518
let commit_idx = data.commit_map[data.commit];
449519
for missing_run in missing_runs {
450-
let start = find_run(&benchmark.name, &missing_run, data.commits[..commit_idx].iter().rev(), &*data);
451-
let end = find_run(&benchmark.name, &missing_run, data.commits[commit_idx + 1..].iter(), &*data);
452-
let start_commit = start.as_ref().map(|(idx, _)| data.commits[commit_idx - 1 - idx].clone());
453-
let end_commit = end.as_ref().map(|(idx, _)| data.commits[commit_idx + 1 + idx].clone());
520+
let time_start = ::std::time::Instant::now();
521+
let start = data.last_seen_run[commit_idx]
522+
.get(&benchmark.name).and_then(|b| b.get(missing_run));
523+
let end = data.next_seen_run[commit_idx]
524+
.get(&benchmark.name).and_then(|b| b.get(missing_run));
525+
let start_commit = start.map(|(idx, _)| data.commits[*idx].clone());
526+
let end_commit = end.map(|(idx, _)| data.commits[*idx].clone());
527+
*data.dur += time_start.elapsed();
454528

455529
assert_ne!(start_commit.as_ref(), Some(data.commit));
456530
assert_ne!(end_commit.as_ref(), Some(data.commit));
457531

458532
let mut interpolations = data.interpolated.entry(data.commit.sha.clone()).or_insert_with(Vec::new);
459533
let run = match (start, end) {
460534
(Some(srun), Some(erun)) => {
461-
let distance = srun.0 + erun.0;
462-
let from_start = srun.0;
535+
let distance = (commit_idx - srun.0 - 1) + (erun.0 - commit_idx - 1);
536+
let from_start = commit_idx - srun.0 - 1;
463537
let interpolated_stats = interpolate_stats(&srun.1, &erun.1, distance, from_start);
464-
let mut interpolated_run = srun.1;
538+
let mut interpolated_run = srun.1.clone();
465539
interpolated_run.stats = interpolated_stats;
466540
interpolations.push(Interpolation {
467541
benchmark: benchmark.name.clone(),
@@ -481,7 +555,7 @@ fn fill_benchmark_runs(benchmark: &mut Benchmark, missing_runs: Vec<&RunId>, dat
481555
start_commit.unwrap(),
482556
),
483557
});
484-
srun.1
558+
srun.1.clone()
485559
}
486560
(None, Some(erun)) => {
487561
interpolations.push(Interpolation {
@@ -491,7 +565,7 @@ fn fill_benchmark_runs(benchmark: &mut Benchmark, missing_runs: Vec<&RunId>, dat
491565
end_commit.unwrap(),
492566
),
493567
});
494-
erun.1
568+
erun.1.clone()
495569
}
496570
(None, None) => {
497571
unreachable!("{} run in benchmark {} has no entries, but it's missing!",
@@ -507,22 +581,20 @@ fn fill_benchmark_data(benchmark_name: &str, data: &mut AssociatedData) -> Optio
507581
let interpolation_entry =
508582
data.interpolated.entry(data.commit.sha.clone()).or_insert_with(Vec::new);
509583

510-
let mut start = None;
511-
let mut end = None;
512-
for needle_commit in data.commits[..commit_idx].iter().rev() {
513-
let bench = data.data[needle_commit].benchmarks.get(benchmark_name);
514-
if let Some(Ok(bench)) = bench {
515-
start = Some((needle_commit.clone(), bench.clone()));
516-
break;
517-
}
518-
}
519-
for needle_commit in data.commits[commit_idx + 1..].iter() {
520-
let bench = data.data[needle_commit].benchmarks.get(benchmark_name);
521-
if let Some(Ok(bench)) = bench {
522-
end = Some((needle_commit.clone(), bench.clone()));
523-
break;
524-
}
525-
}
584+
let start = if let Some(needle) = data.last_seen_commit[commit_idx].get(benchmark_name) {
585+
let commit = needle.clone();
586+
let bench = data.data[&commit].benchmarks[benchmark_name].as_ref().unwrap().clone();
587+
Some((commit, bench))
588+
} else {
589+
None
590+
};
591+
let end = if let Some(needle) = data.next_seen_commit[commit_idx].get(benchmark_name) {
592+
let commit = needle.clone();
593+
let bench = data.data[&commit].benchmarks[benchmark_name].as_ref().unwrap().clone();
594+
Some((commit, bench))
595+
} else {
596+
None
597+
};
526598

527599
match (start, end) {
528600
// This hole is bounded on both left and

0 commit comments

Comments
 (0)