diff --git a/CHANGELOG.md b/CHANGELOG.md index af0af8d..f4c999a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,8 @@ ## Unreleased ### Added - `flamegraph`: new tool that uses the `inferno` crate to generate flamegraph svg files ([GH-73]) +- `crox`: Added the `--dir` parameter to merge all events files in dir in to one trace file ([GH-84]) +- `crox`: Added possibility to add multiple `file_prefix` parameters to merge all them to one trace file ([GH-84]) ### Changed - `measureme`: Events are recorded in a more compact format ([GH-76]) @@ -47,3 +49,4 @@ [GH-70]: https://github.com/rust-lang/measureme/pull/70 [GH-73]: https://github.com/rust-lang/measureme/pull/73 [GH-76]: https://github.com/rust-lang/measureme/pull/76 +[GH-84]: https://github.com/rust-lang/measureme/pull/84 diff --git a/analyzeme/Cargo.toml b/analyzeme/Cargo.toml index c76ea60..83072ae 100644 --- a/analyzeme/Cargo.toml +++ b/analyzeme/Cargo.toml @@ -8,3 +8,5 @@ license = "MIT OR Apache-2.0" [dependencies] measureme = { path = "../measureme" } rustc-hash = "1.0.1" +serde = { version = "1.0", features = [ "derive" ] } +serde_json = "1.0" diff --git a/analyzeme/src/profiling_data.rs b/analyzeme/src/profiling_data.rs index 0328c96..b46185d 100644 --- a/analyzeme/src/profiling_data.rs +++ b/analyzeme/src/profiling_data.rs @@ -5,18 +5,38 @@ use measureme::file_header::{ }; use measureme::ByteVecSink; use measureme::{ProfilerFiles, RawEvent, SerializationSink, StringTable, StringTableBuilder}; +use serde::{Deserialize, Deserializer}; use std::error::Error; use std::fs; use std::mem; use std::path::Path; use std::sync::Arc; -use std::time::SystemTime; +use std::time::{Duration, SystemTime, UNIX_EPOCH}; const RAW_EVENT_SIZE: usize = mem::size_of::(); +fn system_time_from_nanos<'de, D>(deserializer: D) -> Result +where + D: Deserializer<'de>, +{ + let duration_from_epoch = Duration::from_nanos(u64::deserialize(deserializer)?); + Ok(UNIX_EPOCH + .checked_add(duration_from_epoch) + .expect("a time that can be represented as SystemTime")) +} + +#[derive(Deserialize)] +pub struct Metadata { + #[serde(deserialize_with = "system_time_from_nanos")] + pub start_time: SystemTime, + pub process_id: u32, + pub cmd: String, +} + pub struct ProfilingData { event_data: Vec, string_table: StringTable, + pub metadata: Metadata, } impl ProfilingData { @@ -39,9 +59,13 @@ impl ProfilingData { let string_table = StringTable::new(string_data, index_data)?; + let metadata = string_table.get_metadata().to_string(); + let metadata: Metadata = serde_json::from_str(&metadata)?; + Ok(ProfilingData { string_table, event_data, + metadata, }) } @@ -88,8 +112,7 @@ impl<'a> ProfilerEventIterator<'a> { let string_table = &self.data.string_table; - // FIXME: Lots of Rust code compiled in the seventies apparently. - let timestamp = Timestamp::from_raw_event(&raw_event, SystemTime::UNIX_EPOCH); + let timestamp = Timestamp::from_raw_event(&raw_event, self.data.metadata.start_time); Event { event_kind: string_table.get(raw_event.event_kind).to_string(), @@ -240,10 +263,16 @@ impl ProfilingDataBuilder { CURRENT_FILE_FORMAT_VERSION ); let string_table = StringTable::new(data_bytes, index_bytes).unwrap(); + let metadata = Metadata { + start_time: UNIX_EPOCH, + process_id: 0, + cmd: "test cmd".to_string(), + }; ProfilingData { event_data, string_table, + metadata, } } diff --git a/crox/src/main.rs b/crox/src/main.rs index 2acda0e..04cd149 100644 --- a/crox/src/main.rs +++ b/crox/src/main.rs @@ -2,12 +2,13 @@ use rustc_hash::FxHashMap; use std::fs; use std::io::BufWriter; use std::path::PathBuf; -use std::time::Duration; -use std::time::SystemTime; +use std::time::{Duration, SystemTime, UNIX_EPOCH}; use analyzeme::{ProfilingData, Timestamp}; +use serde::ser::SerializeSeq; use serde::{Serialize, Serializer}; +use serde_json::json; use std::cmp; use structopt::StructOpt; @@ -43,7 +44,11 @@ struct Event { #[derive(StructOpt, Debug)] struct Opt { - file_prefix: PathBuf, + #[structopt(required_unless = "dir")] + file_prefix: Vec, + /// all event trace files in dir will be merged to one chrome_profiler.json file + #[structopt(long = "dir")] + dir: Option, /// collapse threads without overlapping events #[structopt(long = "collapse-threads")] collapse_threads: bool, @@ -114,53 +119,98 @@ fn generate_thread_to_collapsed_thread_mapping( fn main() -> Result<(), Box> { let opt = Opt::from_args(); - let data = ProfilingData::new(&opt.file_prefix)?; - let chrome_file = BufWriter::new(fs::File::create("chrome_profiler.json")?); + let mut serializer = serde_json::Serializer::new(chrome_file); - //find the earlier timestamp (it should be the first event) - //subtract one tick so that the start of the event shows in Chrome - let first_event_timestamp = make_start_timestamp(&data); + let mut seq = serializer.serialize_seq(None)?; - let mut serializer = serde_json::Serializer::new(chrome_file); - let thread_to_collapsed_thread = generate_thread_to_collapsed_thread_mapping(&opt, &data); - let mut event_iterator = data.iter(); - - //create an iterator so we can avoid allocating a Vec with every Event for serialization - let json_event_iterator = std::iter::from_fn(|| { - while let Some(event) = event_iterator.next() { - // Chrome does not seem to like how many QueryCacheHit events we generate - // only handle startStop events for now - if let Timestamp::Interval { start, end } = event.timestamp { - let duration = end.duration_since(start).unwrap(); - if let Some(minimum_duration) = opt.minimum_duration { - if duration.as_micros() < minimum_duration { - continue; - } + let dir_paths = file_prefixes_in_dir(&opt)?; + + for file_prefix in opt.file_prefix.iter().chain(dir_paths.iter()) { + let data = ProfilingData::new(&file_prefix)?; + + let thread_to_collapsed_thread = generate_thread_to_collapsed_thread_mapping(&opt, &data); + + // Chrome does not seem to like how many QueryCacheHit events we generate + // only handle Interval events for now + for event in data.iter().filter(|e| !e.timestamp.is_instant()) { + let duration = event.duration().unwrap(); + if let Some(minimum_duration) = opt.minimum_duration { + if duration.as_micros() < minimum_duration { + continue; } - return Some(Event { - name: event.label.clone().into_owned(), - category: event.event_kind.clone().into_owned(), - event_type: EventType::Complete, - timestamp: start.duration_since(first_event_timestamp).unwrap(), - duration, - process_id: 0, - thread_id: *thread_to_collapsed_thread - .get(&event.thread_id) - .unwrap_or(&event.thread_id), - args: None, - }); } + let crox_event = Event { + name: event.label.clone().into_owned(), + category: event.event_kind.clone().into_owned(), + event_type: EventType::Complete, + timestamp: event.timestamp.start().duration_since(UNIX_EPOCH).unwrap(), + duration, + process_id: data.metadata.process_id, + thread_id: *thread_to_collapsed_thread + .get(&event.thread_id) + .unwrap_or(&event.thread_id), + args: None, + }; + seq.serialize_element(&crox_event)?; } + // add crate name for the process_id + let index_of_crate_name = data + .metadata + .cmd + .find(" --crate-name ") + .map(|index| index + 14); + if let Some(index) = index_of_crate_name { + let (_, last) = data.metadata.cmd.split_at(index); + let (crate_name, _) = last.split_at(last.find(" ").unwrap_or(last.len())); + + let process_name = json!({ + "name": "process_name", + "ph" : "M", + "ts" : 0, + "tid" : 0, + "cat" : "", + "pid" : data.metadata.process_id, + "args": { + "name" : crate_name + } + }); + seq.serialize_element(&process_name)?; + } + // sort the processes after start time + let process_name = json!({ + "name": "process_sort_index", + "ph" : "M", + "ts" : 0, + "tid" : 0, + "cat" : "", + "pid" : data.metadata.process_id, + "args": { + "sort_index" : data.metadata.start_time.duration_since(UNIX_EPOCH).unwrap().as_micros() as u64 + } + }); + seq.serialize_element(&process_name)?; + } - None - }); - - serializer.collect_seq(json_event_iterator)?; + seq.end()?; Ok(()) } +fn file_prefixes_in_dir(opt: &Opt) -> Result, std::io::Error> { + let mut result = Vec::new(); + if let Some(dir_path) = &opt.dir { + for entry in fs::read_dir(dir_path)? { + let entry = entry?; + let path = entry.path(); + if path.extension().filter(|e| *e == "events").is_some() { + result.push(path) + } + } + } + Ok(result) +} + fn timestamp_to_min_max(timestamp: Timestamp) -> (SystemTime, SystemTime) { match timestamp { Timestamp::Instant(t) => (t, t), @@ -171,22 +221,3 @@ fn timestamp_to_min_max(timestamp: Timestamp) -> (SystemTime, SystemTime) { } } } - -// FIXME: Move this to `ProfilingData` and base it on the `start_time` field -// from metadata. -fn make_start_timestamp(data: &ProfilingData) -> SystemTime { - // We cannot assume the first event in the stream actually is the first - // event because interval events are emitted at their end. So in theory it - // is possible that the event with the earliest starting time is the last - // event in the stream (i.e. if there is an interval event that spans the - // entire execution of the profile). - // - // Let's be on the safe side and iterate the whole stream. - let min = data - .iter() - .map(|e| timestamp_to_min_max(e.timestamp).0) - .min() - .unwrap(); - - min - Duration::from_micros(1) -} diff --git a/measureme/src/stringtable.rs b/measureme/src/stringtable.rs index 26797e8..a41c15c 100644 --- a/measureme/src/stringtable.rs +++ b/measureme/src/stringtable.rs @@ -295,6 +295,10 @@ impl StringTable { pub fn get<'a>(&'a self, id: StringId) -> StringRef<'a> { StringRef { id, table: self } } + pub fn get_metadata<'a>(&'a self) -> StringRef<'a> { + let id = StringId(METADATA_STRING_ID); + self.get(id) + } } #[cfg(test)]