From 4ac3542c070b3477454672946a68cf01b717d5b7 Mon Sep 17 00:00:00 2001 From: Wesley Wiser Date: Thu, 4 Apr 2019 19:49:25 -0400 Subject: [PATCH 01/12] Implement a basic summarization tool for profile traces Output is currently just: {event}- {total time} --- Cargo.toml | 1 + summarize/Cargo.toml | 10 ++++++ summarize/src/main.rs | 71 +++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 82 insertions(+) create mode 100644 summarize/Cargo.toml create mode 100644 summarize/src/main.rs diff --git a/Cargo.toml b/Cargo.toml index d30c453..803aef9 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -3,4 +3,5 @@ members = [ "measureme", "mmview", + "summarize", ] diff --git a/summarize/Cargo.toml b/summarize/Cargo.toml new file mode 100644 index 0000000..e0b9453 --- /dev/null +++ b/summarize/Cargo.toml @@ -0,0 +1,10 @@ +[package] +name = "summarize" +version = "0.1.0" +authors = ["Wesley Wiser ", "Michael Woerister "] +edition = "2018" +license = "MIT OR Apache-2.0" + +[dependencies] +measureme = { path = "../measureme" } +structopt = "0.2" diff --git a/summarize/src/main.rs b/summarize/src/main.rs new file mode 100644 index 0000000..6d4c206 --- /dev/null +++ b/summarize/src/main.rs @@ -0,0 +1,71 @@ +use std::collections::HashMap; +use std::path::PathBuf; +use std::time::Duration; +use measureme::{ProfilingData, TimestampKind, Event}; + +use structopt::StructOpt; + +#[derive(StructOpt, Debug)] +struct Opt { + file_prefix: PathBuf, +} + +fn main() { + let opt = Opt::from_args(); + + let data = ProfilingData::new(&opt.file_prefix); + + let mut cumulative_time = HashMap::<_, Duration>::new(); + let mut threads = HashMap::<_, Vec>::new(); + + for event in data.iter() { + match event.timestamp_kind { + TimestampKind::Start => { + let thread_stack = threads.entry(event.thread_id).or_default(); + + if let Some(prev_event) = thread_stack.last() { + //count the time run so far for this event + let duration = + event.timestamp.duration_since(prev_event.timestamp) + .unwrap_or(Duration::from_nanos(0)); + *cumulative_time.entry(prev_event.label.clone()).or_default() += duration; + } + + thread_stack.push(event); + }, + TimestampKind::Instant => { }, + TimestampKind::End => { + let thread_stack = threads.get_mut(&event.thread_id).unwrap(); + let start_event = thread_stack.pop().unwrap(); + assert_eq!(start_event.event_kind, event.event_kind); + assert_eq!(start_event.label, event.label); + assert_eq!(start_event.timestamp_kind, TimestampKind::Start); + + //track the time for this event + let duration = + event.timestamp + .duration_since(start_event.timestamp) + .unwrap_or(Duration::from_nanos(0)); + *cumulative_time.entry(start_event.label.clone()).or_default() += duration; + + //now adjust the previous event's start time so that it "started" right now + if let Some(previous_event) = thread_stack.last_mut() { + assert_eq!(TimestampKind::Start, previous_event.timestamp_kind); + previous_event.timestamp = event.timestamp; + } + } + } + } + + let mut total_time = Duration::from_nanos(0); + + let mut times: Vec<_> = cumulative_time.iter().collect(); + times.sort_by_key(|(_, v)| *v); + times.reverse(); + for (k, v) in times { + total_time += *v; + println!("{}- {:?}", k, v); + } + + println!("Total cpu time: {:?}", total_time); +} From 77958c59f47870ebd95726f8944795838acf6fce Mon Sep 17 00:00:00 2001 From: Wesley Wiser Date: Mon, 8 Apr 2019 21:55:11 -0400 Subject: [PATCH 02/12] Add helper functions --- measureme/src/lib.rs | 2 +- measureme/src/profiling_data.rs | 79 ++++++++++++++++++++++++++++++--- 2 files changed, 74 insertions(+), 7 deletions(-) diff --git a/measureme/src/lib.rs b/measureme/src/lib.rs index a709d97..f252233 100644 --- a/measureme/src/lib.rs +++ b/measureme/src/lib.rs @@ -13,7 +13,7 @@ pub use crate::event::Event; pub use crate::file_serialization_sink::FileSerializationSink; pub use crate::mmap_serialization_sink::MmapSerializationSink; pub use crate::profiler::{Profiler, ProfilerFiles}; -pub use crate::profiling_data::ProfilingData; +pub use crate::profiling_data::{ProfilingData, MatchingEvent}; pub use crate::raw_event::{RawEvent, TimestampKind}; pub use crate::serialization::{Addr, SerializationSink}; pub use crate::stringtable::{ diff --git a/measureme/src/profiling_data.rs b/measureme/src/profiling_data.rs index c7b8a5c..2df29a9 100644 --- a/measureme/src/profiling_data.rs +++ b/measureme/src/profiling_data.rs @@ -1,5 +1,5 @@ use crate::event::Event; -use crate::{ProfilerFiles, RawEvent, StringTable}; +use crate::{ProfilerFiles, RawEvent, StringTable, TimestampKind}; use std::fs; use std::mem; use std::path::Path; @@ -27,10 +27,11 @@ impl ProfilingData { } pub fn iter(&self) -> impl Iterator> { - ProfilerEventIterator { - data: self, - curr_event_idx: 0, - } + ProfilerEventIterator::new(&self) + } + + pub fn iter_matching_events(&self) -> impl Iterator> { + MatchingEventsIterator::new(ProfilerEventIterator::new(&self)) } } @@ -39,6 +40,15 @@ struct ProfilerEventIterator<'a> { curr_event_idx: usize, } +impl<'a> ProfilerEventIterator<'a> { + pub fn new(data: &'a ProfilingData) -> ProfilerEventIterator<'a> { + ProfilerEventIterator { + data, + curr_event_idx: 0, + } + } +} + impl<'a> Iterator for ProfilerEventIterator<'a> { type Item = Event<'a>; @@ -76,4 +86,61 @@ impl<'a> Iterator for ProfilerEventIterator<'a> { thread_id: raw_event.thread_id, }) } -} \ No newline at end of file +} + +#[derive(Clone, Debug, PartialEq, Eq, Hash)] +pub enum MatchingEvent<'a> { + StartStop(Event<'a>, Event<'a>), + Instant(Event<'a>), +} + +struct MatchingEventsIterator<'a> { + events: ProfilerEventIterator<'a>, + thread_stacks: Vec>>, +} + +impl<'a> MatchingEventsIterator<'a> { + pub fn new(events: ProfilerEventIterator<'a>) -> MatchingEventsIterator<'a> { + MatchingEventsIterator { + events, + thread_stacks: vec![], + } + } +} + +impl<'a> Iterator for MatchingEventsIterator<'a> { + type Item = MatchingEvent<'a>; + + fn next(&mut self) -> Option> { + while let Some(event) = self.events.next() { + match event.timestamp_kind { + TimestampKind::Start => { + let thread_id = event.thread_id as usize; + if thread_id >= self.thread_stacks.len() { + let growth_size = (thread_id + 1) - self.thread_stacks.len(); + self.thread_stacks.append( + &mut vec![vec![]; growth_size] + ) + } + + self.thread_stacks[thread_id].push(event); + }, + TimestampKind::Instant => { + return Some(MatchingEvent::Instant(event)); + }, + TimestampKind::End => { + let thread_id = event.thread_id as usize; + let previous_event = self.thread_stacks[thread_id].pop().expect("no previous event"); + if previous_event.event_kind != event.event_kind || + previous_event.label != event.label { + panic!("previous event on thread wasn't the start event"); + } + + return Some(MatchingEvent::StartStop(previous_event, event)); + } + } + } + + None + } +} From 1334160a6aca558346317f2079c4c289f1295bdd Mon Sep 17 00:00:00 2001 From: Wesley Wiser Date: Sun, 14 Apr 2019 20:14:46 -0400 Subject: [PATCH 03/12] Move code out of main --- summarize/src/analysis.rs | 108 ++++++++++++++++++++++++++++++++++++++ summarize/src/main.rs | 67 ++++++----------------- 2 files changed, 124 insertions(+), 51 deletions(-) create mode 100644 summarize/src/analysis.rs diff --git a/summarize/src/analysis.rs b/summarize/src/analysis.rs new file mode 100644 index 0000000..0e0926e --- /dev/null +++ b/summarize/src/analysis.rs @@ -0,0 +1,108 @@ +use std::collections::HashMap; +use std::time::Duration; +use measureme::{ProfilingData, TimestampKind, Event}; + +pub struct QueryData { + pub label: String, + pub self_time: Duration, + pub number_of_cache_misses: usize, + pub number_of_cache_hits: usize, +} + +impl QueryData { + fn new(label: String) -> QueryData { + QueryData { + label, + self_time: Duration::from_nanos(0), + number_of_cache_misses: 0, + number_of_cache_hits: 0, + } + } +} + +pub struct Results { + pub query_data: Vec, + pub total_time: Duration, +} + +pub fn perform_analysis(data: ProfilingData) -> Results { + let mut query_data = HashMap::::new(); + let mut threads = HashMap::<_, Vec>::new(); + let mut total_time = Duration::from_nanos(0); + + for event in data.iter() { + match event.timestamp_kind { + TimestampKind::Start => { + let thread_stack = threads.entry(event.thread_id).or_default(); + + if let Some(prev_event) = thread_stack.last() { + //count the time run so far for this event + let duration = + event.timestamp.duration_since(prev_event.timestamp) + .unwrap_or(Duration::from_nanos(0)); + + if let Some(data) = query_data.get_mut(&prev_event.label[..]) { + data.self_time += duration; + } else { + let mut data = QueryData::new(prev_event.label.clone().into_owned()); + data.self_time = duration; + query_data.insert(prev_event.label.clone().into_owned(), data); + } + + //record the total time + total_time += duration; + } + + thread_stack.push(event); + }, + TimestampKind::Instant => { + if &event.event_kind[..] == "QueryCacheHit" { + if let Some(data) = query_data.get_mut(&event.label[..]) { + data.number_of_cache_hits += 1; + } else { + let mut data = QueryData::new(event.label.clone().into_owned()); + data.number_of_cache_hits = 1; + query_data.insert(event.label.clone().into_owned(), data); + } + } + }, + TimestampKind::End => { + let thread_stack = threads.get_mut(&event.thread_id).unwrap(); + let start_event = thread_stack.pop().unwrap(); + assert_eq!(start_event.event_kind, event.event_kind); + assert_eq!(start_event.label, event.label); + assert_eq!(start_event.timestamp_kind, TimestampKind::Start); + + //track the time for this event + let duration = + event.timestamp + .duration_since(start_event.timestamp) + .unwrap_or(Duration::from_nanos(0)); + + if let Some(data) = query_data.get_mut(&start_event.label[..]) { + data.self_time += duration; + data.number_of_cache_misses += 1; + } else { + let mut data = QueryData::new(start_event.label.clone().into_owned()); + data.self_time = duration; + data.number_of_cache_misses = 1; + query_data.insert(start_event.label.into_owned(), data); + } + + //now adjust the previous event's start time so that it "started" right now + if let Some(previous_event) = thread_stack.last_mut() { + assert_eq!(TimestampKind::Start, previous_event.timestamp_kind); + previous_event.timestamp = event.timestamp; + } + + //record the total time + total_time += duration; + } + } + } + + Results { + query_data: query_data.drain().map(|(_, value)| value).collect(), + total_time, + } +} diff --git a/summarize/src/main.rs b/summarize/src/main.rs index 6d4c206..4383391 100644 --- a/summarize/src/main.rs +++ b/summarize/src/main.rs @@ -1,10 +1,10 @@ -use std::collections::HashMap; use std::path::PathBuf; -use std::time::Duration; -use measureme::{ProfilingData, TimestampKind, Event}; +use measureme::ProfilingData; use structopt::StructOpt; +mod analysis; + #[derive(StructOpt, Debug)] struct Opt { file_prefix: PathBuf, @@ -15,57 +15,22 @@ fn main() { let data = ProfilingData::new(&opt.file_prefix); - let mut cumulative_time = HashMap::<_, Duration>::new(); - let mut threads = HashMap::<_, Vec>::new(); - - for event in data.iter() { - match event.timestamp_kind { - TimestampKind::Start => { - let thread_stack = threads.entry(event.thread_id).or_default(); - - if let Some(prev_event) = thread_stack.last() { - //count the time run so far for this event - let duration = - event.timestamp.duration_since(prev_event.timestamp) - .unwrap_or(Duration::from_nanos(0)); - *cumulative_time.entry(prev_event.label.clone()).or_default() += duration; - } + let mut results = analysis::perform_analysis(data); - thread_stack.push(event); - }, - TimestampKind::Instant => { }, - TimestampKind::End => { - let thread_stack = threads.get_mut(&event.thread_id).unwrap(); - let start_event = thread_stack.pop().unwrap(); - assert_eq!(start_event.event_kind, event.event_kind); - assert_eq!(start_event.label, event.label); - assert_eq!(start_event.timestamp_kind, TimestampKind::Start); - - //track the time for this event - let duration = - event.timestamp - .duration_since(start_event.timestamp) - .unwrap_or(Duration::from_nanos(0)); - *cumulative_time.entry(start_event.label.clone()).or_default() += duration; - - //now adjust the previous event's start time so that it "started" right now - if let Some(previous_event) = thread_stack.last_mut() { - assert_eq!(TimestampKind::Start, previous_event.timestamp_kind); - previous_event.timestamp = event.timestamp; - } - } - } - } + //order the results by descending self time + results.query_data.sort_by(|l, r| r.self_time.cmp(&l.self_time)); - let mut total_time = Duration::from_nanos(0); + println!("| Item | Self Time | % of total time | Number of invocations | Cache hits |"); - let mut times: Vec<_> = cumulative_time.iter().collect(); - times.sort_by_key(|(_, v)| *v); - times.reverse(); - for (k, v) in times { - total_time += *v; - println!("{}- {:?}", k, v); + for query_data in results.query_data { + println!( + "{} | {:?} | {} | {} |", + query_data.label, + query_data.self_time, + query_data.number_of_cache_hits + query_data.number_of_cache_misses, + query_data.number_of_cache_hits, + ); } - println!("Total cpu time: {:?}", total_time); + println!("Total cpu time: {:?}", results.total_time); } From 4118fcd5e7f0d2a54cf3784b484f13b6acea6b17 Mon Sep 17 00:00:00 2001 From: Wesley Wiser Date: Tue, 16 Apr 2019 20:51:22 -0400 Subject: [PATCH 04/12] Calculate time blocked --- summarize/src/analysis.rs | 80 ++++++++++++++++++++++++--------------- summarize/src/main.rs | 6 ++- 2 files changed, 53 insertions(+), 33 deletions(-) diff --git a/summarize/src/analysis.rs b/summarize/src/analysis.rs index 0e0926e..8f70040 100644 --- a/summarize/src/analysis.rs +++ b/summarize/src/analysis.rs @@ -7,6 +7,7 @@ pub struct QueryData { pub self_time: Duration, pub number_of_cache_misses: usize, pub number_of_cache_hits: usize, + pub blocked_time: Duration, } impl QueryData { @@ -16,6 +17,7 @@ impl QueryData { self_time: Duration::from_nanos(0), number_of_cache_misses: 0, number_of_cache_hits: 0, + blocked_time: Duration::from_nanos(0), } } } @@ -35,25 +37,30 @@ pub fn perform_analysis(data: ProfilingData) -> Results { TimestampKind::Start => { let thread_stack = threads.entry(event.thread_id).or_default(); - if let Some(prev_event) = thread_stack.last() { - //count the time run so far for this event - let duration = - event.timestamp.duration_since(prev_event.timestamp) - .unwrap_or(Duration::from_nanos(0)); + if &event.event_kind[..] == "Query" || &event.event_kind[..] == "GenericActivity" { + if let Some(prev_event) = thread_stack.last() { + //count the time run so far for this event + let duration = + event.timestamp.duration_since(prev_event.timestamp) + .unwrap_or(Duration::from_nanos(0)); - if let Some(data) = query_data.get_mut(&prev_event.label[..]) { - data.self_time += duration; - } else { - let mut data = QueryData::new(prev_event.label.clone().into_owned()); - data.self_time = duration; - query_data.insert(prev_event.label.clone().into_owned(), data); + if let Some(data) = query_data.get_mut(&prev_event.label[..]) { + data.self_time += duration; + } else { + let mut data = QueryData::new(prev_event.label.clone().into_owned()); + data.self_time = duration; + query_data.insert(prev_event.label.clone().into_owned(), data); + } + + //record the total time + total_time += duration; } - //record the total time - total_time += duration; + thread_stack.push(event); + } else if &event.event_kind[..] == "QueryBlocked" || + &event.event_kind[..] == "IncrementalLoadResult" { + thread_stack.push(event); } - - thread_stack.push(event); }, TimestampKind::Instant => { if &event.event_kind[..] == "QueryCacheHit" { @@ -69,6 +76,7 @@ pub fn perform_analysis(data: ProfilingData) -> Results { TimestampKind::End => { let thread_stack = threads.get_mut(&event.thread_id).unwrap(); let start_event = thread_stack.pop().unwrap(); + assert_eq!(start_event.event_kind, event.event_kind); assert_eq!(start_event.label, event.label); assert_eq!(start_event.timestamp_kind, TimestampKind::Start); @@ -79,24 +87,34 @@ pub fn perform_analysis(data: ProfilingData) -> Results { .duration_since(start_event.timestamp) .unwrap_or(Duration::from_nanos(0)); - if let Some(data) = query_data.get_mut(&start_event.label[..]) { - data.self_time += duration; - data.number_of_cache_misses += 1; - } else { - let mut data = QueryData::new(start_event.label.clone().into_owned()); - data.self_time = duration; - data.number_of_cache_misses = 1; - query_data.insert(start_event.label.into_owned(), data); - } + if &event.event_kind[..] == "Query" || &event.event_kind[..] == "GenericActivity" { + if let Some(data) = query_data.get_mut(&start_event.label[..]) { + data.self_time += duration; + data.number_of_cache_misses += 1; + } else { + let mut data = QueryData::new(start_event.label.clone().into_owned()); + data.self_time = duration; + data.number_of_cache_misses = 1; + query_data.insert(start_event.label.clone().into_owned(), data); + } - //now adjust the previous event's start time so that it "started" right now - if let Some(previous_event) = thread_stack.last_mut() { - assert_eq!(TimestampKind::Start, previous_event.timestamp_kind); - previous_event.timestamp = event.timestamp; - } + //now adjust the previous event's start time so that it "started" right now + if let Some(previous_event) = thread_stack.last_mut() { + assert_eq!(TimestampKind::Start, previous_event.timestamp_kind); + previous_event.timestamp = event.timestamp; + } - //record the total time - total_time += duration; + //record the total time + total_time += duration; + } else if &event.event_kind[..] == "QueryBlocked" { + if let Some(data) = query_data.get_mut(&start_event.label[..]) { + data.blocked_time += duration; + } else { + let mut data = QueryData::new(start_event.label.clone().into_owned()); + data.blocked_time = duration; + query_data.insert(start_event.label.clone().into_owned(), data); + } + } } } } diff --git a/summarize/src/main.rs b/summarize/src/main.rs index 4383391..ba9b766 100644 --- a/summarize/src/main.rs +++ b/summarize/src/main.rs @@ -20,15 +20,17 @@ fn main() { //order the results by descending self time results.query_data.sort_by(|l, r| r.self_time.cmp(&l.self_time)); - println!("| Item | Self Time | % of total time | Number of invocations | Cache hits |"); + println!("| Item | Self Time | % of total time | Number of invocations \ + | Cache hits | Blocked time |"); for query_data in results.query_data { println!( - "{} | {:?} | {} | {} |", + "{} | {:?} | {} | {} | {:?} |", query_data.label, query_data.self_time, query_data.number_of_cache_hits + query_data.number_of_cache_misses, query_data.number_of_cache_hits, + query_data.blocked_time, ); } From eebb68ddb0890404e4b7abde02ea8c9c27597518 Mon Sep 17 00:00:00 2001 From: Wesley Wiser Date: Tue, 16 Apr 2019 21:25:26 -0400 Subject: [PATCH 05/12] Refactor the analysis code --- summarize/src/analysis.rs | 44 +++++++++++++++++---------------------- 1 file changed, 19 insertions(+), 25 deletions(-) diff --git a/summarize/src/analysis.rs b/summarize/src/analysis.rs index 8f70040..8e7f2d3 100644 --- a/summarize/src/analysis.rs +++ b/summarize/src/analysis.rs @@ -1,3 +1,4 @@ +use std::borrow::Cow; use std::collections::HashMap; use std::time::Duration; use measureme::{ProfilingData, TimestampKind, Event}; @@ -32,6 +33,16 @@ pub fn perform_analysis(data: ProfilingData) -> Results { let mut threads = HashMap::<_, Vec>::new(); let mut total_time = Duration::from_nanos(0); + let mut record_event_data = |label: &Cow<'_, str>, f: &Fn(&mut QueryData)| { + if let Some(data) = query_data.get_mut(&label[..]) { + f(data); + } else { + let mut data = QueryData::new(label.clone().into_owned()); + f(&mut data); + query_data.insert(label.clone().into_owned(), data); + } + }; + for event in data.iter() { match event.timestamp_kind { TimestampKind::Start => { @@ -44,13 +55,9 @@ pub fn perform_analysis(data: ProfilingData) -> Results { event.timestamp.duration_since(prev_event.timestamp) .unwrap_or(Duration::from_nanos(0)); - if let Some(data) = query_data.get_mut(&prev_event.label[..]) { + record_event_data(&prev_event.label, &|data| { data.self_time += duration; - } else { - let mut data = QueryData::new(prev_event.label.clone().into_owned()); - data.self_time = duration; - query_data.insert(prev_event.label.clone().into_owned(), data); - } + }); //record the total time total_time += duration; @@ -64,13 +71,9 @@ pub fn perform_analysis(data: ProfilingData) -> Results { }, TimestampKind::Instant => { if &event.event_kind[..] == "QueryCacheHit" { - if let Some(data) = query_data.get_mut(&event.label[..]) { + record_event_data(&event.label, &|data| { data.number_of_cache_hits += 1; - } else { - let mut data = QueryData::new(event.label.clone().into_owned()); - data.number_of_cache_hits = 1; - query_data.insert(event.label.clone().into_owned(), data); - } + }); } }, TimestampKind::End => { @@ -88,15 +91,10 @@ pub fn perform_analysis(data: ProfilingData) -> Results { .unwrap_or(Duration::from_nanos(0)); if &event.event_kind[..] == "Query" || &event.event_kind[..] == "GenericActivity" { - if let Some(data) = query_data.get_mut(&start_event.label[..]) { + record_event_data(&event.label, &|data| { data.self_time += duration; data.number_of_cache_misses += 1; - } else { - let mut data = QueryData::new(start_event.label.clone().into_owned()); - data.self_time = duration; - data.number_of_cache_misses = 1; - query_data.insert(start_event.label.clone().into_owned(), data); - } + }); //now adjust the previous event's start time so that it "started" right now if let Some(previous_event) = thread_stack.last_mut() { @@ -107,13 +105,9 @@ pub fn perform_analysis(data: ProfilingData) -> Results { //record the total time total_time += duration; } else if &event.event_kind[..] == "QueryBlocked" { - if let Some(data) = query_data.get_mut(&start_event.label[..]) { + record_event_data(&event.label, &|data| { data.blocked_time += duration; - } else { - let mut data = QueryData::new(start_event.label.clone().into_owned()); - data.blocked_time = duration; - query_data.insert(start_event.label.clone().into_owned(), data); - } + }); } } } From 12fad82bd6d2d93c7e30f87bbd0b5f411465362a Mon Sep 17 00:00:00 2001 From: Wesley Wiser Date: Tue, 16 Apr 2019 21:38:17 -0400 Subject: [PATCH 06/12] Add incremental load time data --- summarize/src/analysis.rs | 6 ++++++ summarize/src/main.rs | 5 +++-- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/summarize/src/analysis.rs b/summarize/src/analysis.rs index 8e7f2d3..7745f8e 100644 --- a/summarize/src/analysis.rs +++ b/summarize/src/analysis.rs @@ -9,6 +9,7 @@ pub struct QueryData { pub number_of_cache_misses: usize, pub number_of_cache_hits: usize, pub blocked_time: Duration, + pub incremental_load_time: Duration, } impl QueryData { @@ -19,6 +20,7 @@ impl QueryData { number_of_cache_misses: 0, number_of_cache_hits: 0, blocked_time: Duration::from_nanos(0), + incremental_load_time: Duration::from_nanos(0), } } } @@ -108,6 +110,10 @@ pub fn perform_analysis(data: ProfilingData) -> Results { record_event_data(&event.label, &|data| { data.blocked_time += duration; }); + } else if &event.event_kind[..] == "IncrementalLoadResult" { + record_event_data(&event.label, &|data| { + data.incremental_load_time += duration; + }); } } } diff --git a/summarize/src/main.rs b/summarize/src/main.rs index ba9b766..e0bbec1 100644 --- a/summarize/src/main.rs +++ b/summarize/src/main.rs @@ -21,16 +21,17 @@ fn main() { results.query_data.sort_by(|l, r| r.self_time.cmp(&l.self_time)); println!("| Item | Self Time | % of total time | Number of invocations \ - | Cache hits | Blocked time |"); + | Cache hits | Blocked time | Incremental load time |"); for query_data in results.query_data { println!( - "{} | {:?} | {} | {} | {:?} |", + "| {} | {:?} | {} | {} | {:?} | {:?} |", query_data.label, query_data.self_time, query_data.number_of_cache_hits + query_data.number_of_cache_misses, query_data.number_of_cache_hits, query_data.blocked_time, + query_data.incremental_load_time, ); } From 025898261ece62c604cf46ca9dc4f7cca2847180 Mon Sep 17 00:00:00 2001 From: Wesley Wiser Date: Wed, 17 Apr 2019 07:49:03 -0400 Subject: [PATCH 07/12] Use prettytable for formatting --- summarize/Cargo.toml | 1 + summarize/src/main.rs | 37 +++++++++++++++++++++++++++---------- 2 files changed, 28 insertions(+), 10 deletions(-) diff --git a/summarize/Cargo.toml b/summarize/Cargo.toml index e0b9453..98b3c0f 100644 --- a/summarize/Cargo.toml +++ b/summarize/Cargo.toml @@ -7,4 +7,5 @@ license = "MIT OR Apache-2.0" [dependencies] measureme = { path = "../measureme" } +prettytable-rs = "0.8" structopt = "0.2" diff --git a/summarize/src/main.rs b/summarize/src/main.rs index e0bbec1..c4be9ea 100644 --- a/summarize/src/main.rs +++ b/summarize/src/main.rs @@ -1,6 +1,10 @@ +#[macro_use] +extern crate prettytable; + use std::path::PathBuf; use measureme::ProfilingData; +use prettytable::{Table}; use structopt::StructOpt; mod analysis; @@ -20,20 +24,33 @@ fn main() { //order the results by descending self time results.query_data.sort_by(|l, r| r.self_time.cmp(&l.self_time)); - println!("| Item | Self Time | % of total time | Number of invocations \ - | Cache hits | Blocked time | Incremental load time |"); + let mut table = Table::new(); + + table.add_row(row![ + "Item", + "Self time", + "% of total time", + "Item count", + "Cache hits", + "Blocked time", + "Incremental load time", + ]); + + let total_time = results.total_time.as_nanos() as f64; for query_data in results.query_data { - println!( - "| {} | {:?} | {} | {} | {:?} | {:?} |", + table.add_row(row![ query_data.label, - query_data.self_time, - query_data.number_of_cache_hits + query_data.number_of_cache_misses, - query_data.number_of_cache_hits, - query_data.blocked_time, - query_data.incremental_load_time, - ); + format!("{:.2?}", query_data.self_time), + format!("{:.3}", ((query_data.self_time.as_nanos() as f64) / total_time) * 100.0), + format!("{}", query_data.number_of_cache_hits + query_data.number_of_cache_misses), + format!("{}", query_data.number_of_cache_hits), + format!("{:.2?}", query_data.blocked_time), + format!("{:.2?}", query_data.incremental_load_time), + ]); } + table.printstd(); + println!("Total cpu time: {:?}", results.total_time); } From 7d280dc847d6f7f1e52a70c3a0417894732bcc47 Mon Sep 17 00:00:00 2001 From: Wesley Wiser Date: Wed, 17 Apr 2019 20:57:12 -0400 Subject: [PATCH 08/12] Add json export mode --- summarize/Cargo.toml | 2 ++ summarize/src/analysis.rs | 4 ++++ summarize/src/main.rs | 17 ++++++++++++++++- 3 files changed, 22 insertions(+), 1 deletion(-) diff --git a/summarize/Cargo.toml b/summarize/Cargo.toml index 98b3c0f..dfd419f 100644 --- a/summarize/Cargo.toml +++ b/summarize/Cargo.toml @@ -8,4 +8,6 @@ license = "MIT OR Apache-2.0" [dependencies] measureme = { path = "../measureme" } prettytable-rs = "0.8" +serde = { version = "1.0", features = [ "derive" ] } +serde_json = "1.0" structopt = "0.2" diff --git a/summarize/src/analysis.rs b/summarize/src/analysis.rs index 7745f8e..a3c6eec 100644 --- a/summarize/src/analysis.rs +++ b/summarize/src/analysis.rs @@ -3,6 +3,9 @@ use std::collections::HashMap; use std::time::Duration; use measureme::{ProfilingData, TimestampKind, Event}; +use serde::{Serialize}; + +#[derive(Serialize)] pub struct QueryData { pub label: String, pub self_time: Duration, @@ -25,6 +28,7 @@ impl QueryData { } } +#[derive(Serialize)] pub struct Results { pub query_data: Vec, pub total_time: Duration, diff --git a/summarize/src/main.rs b/summarize/src/main.rs index c4be9ea..dbfe167 100644 --- a/summarize/src/main.rs +++ b/summarize/src/main.rs @@ -1,6 +1,8 @@ #[macro_use] extern crate prettytable; +use std::fs::File; +use std::io::BufWriter; use std::path::PathBuf; use measureme::ProfilingData; @@ -12,15 +14,26 @@ mod analysis; #[derive(StructOpt, Debug)] struct Opt { file_prefix: PathBuf, + + /// Writes the analysis to a json file next to instead of stdout + #[structopt(long = "json")] + json: bool, } -fn main() { +fn main() -> Result<(), Box> { let opt = Opt::from_args(); let data = ProfilingData::new(&opt.file_prefix); let mut results = analysis::perform_analysis(data); + //just output the results into a json file + if opt.json { + let file = BufWriter::new(File::create(opt.file_prefix.with_extension("json"))?); + serde_json::to_writer(file, &results)?; + return Ok(()); + } + //order the results by descending self time results.query_data.sort_by(|l, r| r.self_time.cmp(&l.self_time)); @@ -53,4 +66,6 @@ fn main() { table.printstd(); println!("Total cpu time: {:?}", results.total_time); + + Ok(()) } From 7b0166d0f3832b8d71603016b427b0fe6933845b Mon Sep 17 00:00:00 2001 From: Wesley Wiser Date: Wed, 17 Apr 2019 21:34:20 -0400 Subject: [PATCH 09/12] Include invocation count in json output --- summarize/src/analysis.rs | 4 ++++ summarize/src/main.rs | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/summarize/src/analysis.rs b/summarize/src/analysis.rs index a3c6eec..2e9f1bd 100644 --- a/summarize/src/analysis.rs +++ b/summarize/src/analysis.rs @@ -11,6 +11,7 @@ pub struct QueryData { pub self_time: Duration, pub number_of_cache_misses: usize, pub number_of_cache_hits: usize, + pub invocation_count: usize, pub blocked_time: Duration, pub incremental_load_time: Duration, } @@ -22,6 +23,7 @@ impl QueryData { self_time: Duration::from_nanos(0), number_of_cache_misses: 0, number_of_cache_hits: 0, + invocation_count: 0, blocked_time: Duration::from_nanos(0), incremental_load_time: Duration::from_nanos(0), } @@ -79,6 +81,7 @@ pub fn perform_analysis(data: ProfilingData) -> Results { if &event.event_kind[..] == "QueryCacheHit" { record_event_data(&event.label, &|data| { data.number_of_cache_hits += 1; + data.invocation_count += 1; }); } }, @@ -100,6 +103,7 @@ pub fn perform_analysis(data: ProfilingData) -> Results { record_event_data(&event.label, &|data| { data.self_time += duration; data.number_of_cache_misses += 1; + data.invocation_count += 1; }); //now adjust the previous event's start time so that it "started" right now diff --git a/summarize/src/main.rs b/summarize/src/main.rs index dbfe167..ab5768a 100644 --- a/summarize/src/main.rs +++ b/summarize/src/main.rs @@ -56,7 +56,7 @@ fn main() -> Result<(), Box> { query_data.label, format!("{:.2?}", query_data.self_time), format!("{:.3}", ((query_data.self_time.as_nanos() as f64) / total_time) * 100.0), - format!("{}", query_data.number_of_cache_hits + query_data.number_of_cache_misses), + format!("{}", query_data.invocation_count), format!("{}", query_data.number_of_cache_hits), format!("{:.2?}", query_data.blocked_time), format!("{:.2?}", query_data.incremental_load_time), From 72e0cad153efe7611fa1d998861b78d4c2c8798a Mon Sep 17 00:00:00 2001 From: Wesley Wiser Date: Fri, 19 Apr 2019 17:04:35 -0400 Subject: [PATCH 10/12] Add a readme for summarize --- summarize/Readme.md | 46 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) create mode 100644 summarize/Readme.md diff --git a/summarize/Readme.md b/summarize/Readme.md new file mode 100644 index 0000000..42acb69 --- /dev/null +++ b/summarize/Readme.md @@ -0,0 +1,46 @@ +# summarize + +Summarize is a tool to produce a human readable summary of `mesaurme` profiling data. + +## Example + +```bash +$ git clone https://github.com/rust-lang/regex.git + +$ cd regex + +$ cargo rustc -- -Z self-profile + +$ summarize pid-{pid} + ++------------------------+-----------+-----------------+------------+------------+--------------+-----------------------+ +| Item | Self time | % of total time | Item count | Cache hits | Blocked time | Incremental load time | ++------------------------+-----------+-----------------+------------+------------+--------------+-----------------------+ +| LLVM_emit_obj | 4.51s | 41.432 | 141 | 0 | 0.00ns | 0.00ns | ++------------------------+-----------+-----------------+------------+------------+--------------+-----------------------+ +| LLVM_module_passes | 1.05s | 9.626 | 140 | 0 | 0.00ns | 0.00ns | ++------------------------+-----------+-----------------+------------+------------+--------------+-----------------------+ +| LLVM_make_bitcode | 712.94ms | 6.543 | 140 | 0 | 0.00ns | 0.00ns | ++------------------------+-----------+-----------------+------------+------------+--------------+-----------------------+ +| typeck_tables_of | 542.23ms | 4.976 | 17470 | 16520 | 0.00ns | 0.00ns | ++------------------------+-----------+-----------------+------------+------------+--------------+-----------------------+ +| codegen | 366.82ms | 3.366 | 141 | 0 | 0.00ns | 0.00ns | ++------------------------+-----------+-----------------+------------+------------+--------------+-----------------------+ +| optimized_mir | 188.22ms | 1.727 | 11668 | 9114 | 0.00ns | 0.00ns | ++------------------------+-----------+-----------------+------------+------------+--------------+-----------------------+ +| mir_built | 156.30ms | 1.434 | 2040 | 1020 | 0.00ns | 0.00ns | ++------------------------+-----------+-----------------+------------+------------+--------------+-----------------------+ +| evaluate_obligation | 151.95ms | 1.394 | 33134 | 23817 | 0.00ns | 0.00ns | ++------------------------+-----------+-----------------+------------+------------+--------------+-----------------------+ +| LLVM_compress_bitcode | 126.55ms | 1.161 | 140 | 0 | 0.00ns | 0.00ns | ++------------------------+-----------+-----------------+------------+------------+--------------+-----------------------+ +| codegen crate | 119.08ms | 1.093 | 1 | 0 | 0.00ns | 0.00ns | ++------------------------+-----------+-----------------+------------+------------+--------------+-----------------------+ +| mir_const | 117.82ms | 1.081 | 1050 | 30 | 0.00ns | 0.00ns | ++------------------------+-----------+-----------------+------------+------------+--------------+-----------------------+ + +(many more rows elided) + +Total cpu time: 10.896488447s + +``` From e7fd3d8126e10d7dee01c82d8f66a1fbe4e3f38c Mon Sep 17 00:00:00 2001 From: Wesley Wiser Date: Wed, 24 Apr 2019 18:15:15 -0400 Subject: [PATCH 11/12] Add constants for rustc events --- measureme/src/lib.rs | 1 + measureme/src/rustc.rs | 11 +++++++++++ summarize/src/analysis.rs | 17 ++++++++++------- 3 files changed, 22 insertions(+), 7 deletions(-) create mode 100644 measureme/src/rustc.rs diff --git a/measureme/src/lib.rs b/measureme/src/lib.rs index f252233..775ac2c 100644 --- a/measureme/src/lib.rs +++ b/measureme/src/lib.rs @@ -7,6 +7,7 @@ mod raw_event; mod serialization; mod stringtable; +pub mod rustc; pub mod testing_common; pub use crate::event::Event; diff --git a/measureme/src/rustc.rs b/measureme/src/rustc.rs new file mode 100644 index 0000000..033c146 --- /dev/null +++ b/measureme/src/rustc.rs @@ -0,0 +1,11 @@ +//! This module contains functionality specific to to the measureme integration with rustc + +pub const QUERY_EVENT_KIND: &str = "Query"; + +pub const GENERIC_ACTIVITY_EVENT_KIND: &str = "GenericActivity"; + +pub const INCREMENTAL_LOAD_RESULT_EVENT_KIND: &str = "IncrementalLoadResult"; + +pub const QUERY_BLOCKED_EVENT_KIND: &str = "QueryBlocked"; + +pub const QUERY_CACHE_HIT_EVENT_KIND: &str = "QueryCacheHit"; diff --git a/summarize/src/analysis.rs b/summarize/src/analysis.rs index 2e9f1bd..c9342c2 100644 --- a/summarize/src/analysis.rs +++ b/summarize/src/analysis.rs @@ -2,6 +2,7 @@ use std::borrow::Cow; use std::collections::HashMap; use std::time::Duration; use measureme::{ProfilingData, TimestampKind, Event}; +use measureme::rustc::*; use serde::{Serialize}; @@ -56,7 +57,8 @@ pub fn perform_analysis(data: ProfilingData) -> Results { TimestampKind::Start => { let thread_stack = threads.entry(event.thread_id).or_default(); - if &event.event_kind[..] == "Query" || &event.event_kind[..] == "GenericActivity" { + if &event.event_kind[..] == QUERY_EVENT_KIND || + &event.event_kind[..] == GENERIC_ACTIVITY_EVENT_KIND { if let Some(prev_event) = thread_stack.last() { //count the time run so far for this event let duration = @@ -72,13 +74,13 @@ pub fn perform_analysis(data: ProfilingData) -> Results { } thread_stack.push(event); - } else if &event.event_kind[..] == "QueryBlocked" || - &event.event_kind[..] == "IncrementalLoadResult" { + } else if &event.event_kind[..] == QUERY_BLOCKED_EVENT_KIND || + &event.event_kind[..] == INCREMENTAL_LOAD_RESULT_EVENT_KIND { thread_stack.push(event); } }, TimestampKind::Instant => { - if &event.event_kind[..] == "QueryCacheHit" { + if &event.event_kind[..] == QUERY_CACHE_HIT_EVENT_KIND { record_event_data(&event.label, &|data| { data.number_of_cache_hits += 1; data.invocation_count += 1; @@ -99,7 +101,8 @@ pub fn perform_analysis(data: ProfilingData) -> Results { .duration_since(start_event.timestamp) .unwrap_or(Duration::from_nanos(0)); - if &event.event_kind[..] == "Query" || &event.event_kind[..] == "GenericActivity" { + if &event.event_kind[..] == QUERY_EVENT_KIND || + &event.event_kind[..] == GENERIC_ACTIVITY_EVENT_KIND { record_event_data(&event.label, &|data| { data.self_time += duration; data.number_of_cache_misses += 1; @@ -114,11 +117,11 @@ pub fn perform_analysis(data: ProfilingData) -> Results { //record the total time total_time += duration; - } else if &event.event_kind[..] == "QueryBlocked" { + } else if &event.event_kind[..] == QUERY_BLOCKED_EVENT_KIND { record_event_data(&event.label, &|data| { data.blocked_time += duration; }); - } else if &event.event_kind[..] == "IncrementalLoadResult" { + } else if &event.event_kind[..] == INCREMENTAL_LOAD_RESULT_EVENT_KIND { record_event_data(&event.label, &|data| { data.incremental_load_time += duration; }); From 583254f6dea4bee641d171e5dd1845731dd8de40 Mon Sep 17 00:00:00 2001 From: Wesley Wiser Date: Wed, 24 Apr 2019 18:22:40 -0400 Subject: [PATCH 12/12] Add more comments about how self-time works --- summarize/src/analysis.rs | 59 ++++++++++++++++++++++++++++++++++++++- 1 file changed, 58 insertions(+), 1 deletion(-) diff --git a/summarize/src/analysis.rs b/summarize/src/analysis.rs index c9342c2..3363d35 100644 --- a/summarize/src/analysis.rs +++ b/summarize/src/analysis.rs @@ -52,6 +52,62 @@ pub fn perform_analysis(data: ProfilingData) -> Results { } }; + /* + The basic idea is to iterate over all of the events in the profile data file, with some + special handling for Start and Stop events. + + When calculating timing data, the core thing we're interested in is self-time. + In order to calculate that correctly, we need to track when an event is running and when + it has been interrupted by another event. + + Let's look at a simple example with two events: + + Event 1: + - Started at 0ms + - Ended at 10ms + + Event 2: + - Started at 4ms + - Ended at 6ms + + 0 1 2 3 4 5 6 7 8 9 10 + ================================ + 1 |------------------------------| + 2 |-----| + + When processing this, we see the events like this: + + - Start Event 1 + - Start Event 2 + - End Event 2 + - End Event 1 + + Now, I'll add some annotation to these events to show what's happening in the code: + + - Start Event 1 + - Since there is no other event is running, there is no additional bookkeeping to do + - We push Event 1 onto the thread stack. + - Start Event 2 + - Since there is another event on the stack running, record the time from that event's + start time to this event's start time. (In this case, that's the time from 0ms - 4ms) + - We push Event 2 onto the thread stack. + - End Event 2 + - We pop Event 2's start event from the thread stack and record the time from its start + time to the current time (In this case, that's 4ms - 6ms) + - Since there's another event on the stack, we mutate its start time to be the current + time. This effectively "restarts" that event's timer. + - End Event 1 + - We pop Event 1's start event from the thread stack and record the time from its start + time to the current time (In this case, that's 6ms - 10ms because we mutated the start + time when we processed End Event 2) + - Since there's no other events on the stack, there is no additional bookkeeping to do + + As a result: + Event 1's self-time is `(4-0)ms + (10-6)ms = 8ms` + + Event 2's self-time is `(6-2)ms = 2ms` + */ + for event in data.iter() { match event.timestamp_kind { TimestampKind::Start => { @@ -109,7 +165,8 @@ pub fn perform_analysis(data: ProfilingData) -> Results { data.invocation_count += 1; }); - //now adjust the previous event's start time so that it "started" right now + //this is the critical bit to correctly calculating self-time: + //adjust the previous event's start time so that it "started" right now if let Some(previous_event) = thread_stack.last_mut() { assert_eq!(TimestampKind::Start, previous_event.timestamp_kind); previous_event.timestamp = event.timestamp;