Skip to content

Commit ee0f127

Browse files
authored
Add NotebookIndex to the cache (#6863)
## Summary This PR updates the `FileCache` to include an optional `NotebookIndex` to support caching for Jupyter Notebooks. We only require the index to compute the diagnostics and thus we don't really need to store the entire `Notebook` on the `Diagnostics` struct. This means we only need the index to be stored in the cache to reconstruct the `Diagnostics`. ## Test Plan Update an existing test case to run over the fixtures under `ruff_notebook` crate where there are multiple Jupyter Notebook. Locally, the following commands were run in order: 1. Remove the cache: `rm -rf .ruff_cache` 2. Run without cache: `cargo run --bin ruff -- check --isolated crates/ruff_notebook/resources/test/fixtures/jupyter/unused_variable.ipynb --no-cache` 3. Run with cache: `cargo run --bin ruff -- check --isolated crates/ruff_notebook/resources/test/fixtures/jupyter/unused_variable.ipynb` 4. Check whether the `.ruff_cache` directory was created or not 5. Run with cache again and verify: `cargo run --bin ruff -- check --isolated crates/ruff_notebook/resources/test/fixtures/jupyter/unused_variable.ipynb` ## Benchmarks #6863 (comment) fixes: #6671
1 parent e7b7e4a commit ee0f127

File tree

11 files changed

+84
-58
lines changed

11 files changed

+84
-58
lines changed

Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

crates/ruff/src/message/grouped.rs

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ use std::num::NonZeroUsize;
44

55
use colored::Colorize;
66

7-
use ruff_notebook::{Notebook, NotebookIndex};
7+
use ruff_notebook::NotebookIndex;
88
use ruff_source_file::OneIndexed;
99

1010
use crate::fs::relativize_path;
@@ -65,7 +65,7 @@ impl Emitter for GroupedEmitter {
6565
writer,
6666
"{}",
6767
DisplayGroupedMessage {
68-
jupyter_index: context.notebook(message.filename()).map(Notebook::index),
68+
notebook_index: context.notebook_index(message.filename()),
6969
message,
7070
show_fix_status: self.show_fix_status,
7171
show_source: self.show_source,
@@ -92,7 +92,7 @@ struct DisplayGroupedMessage<'a> {
9292
show_source: bool,
9393
row_length: NonZeroUsize,
9494
column_length: NonZeroUsize,
95-
jupyter_index: Option<&'a NotebookIndex>,
95+
notebook_index: Option<&'a NotebookIndex>,
9696
}
9797

9898
impl Display for DisplayGroupedMessage<'_> {
@@ -110,7 +110,7 @@ impl Display for DisplayGroupedMessage<'_> {
110110
)?;
111111

112112
// Check if we're working on a jupyter notebook and translate positions with cell accordingly
113-
let (row, col) = if let Some(jupyter_index) = self.jupyter_index {
113+
let (row, col) = if let Some(jupyter_index) = self.notebook_index {
114114
write!(
115115
f,
116116
"cell {cell}{sep}",
@@ -150,7 +150,7 @@ impl Display for DisplayGroupedMessage<'_> {
150150
"{}",
151151
MessageCodeFrame {
152152
message,
153-
jupyter_index: self.jupyter_index
153+
notebook_index: self.notebook_index
154154
}
155155
)?;
156156
}

crates/ruff/src/message/mod.rs

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ pub use json_lines::JsonLinesEmitter;
1414
pub use junit::JunitEmitter;
1515
pub use pylint::PylintEmitter;
1616
use ruff_diagnostics::{Diagnostic, DiagnosticKind, Fix};
17-
use ruff_notebook::Notebook;
17+
use ruff_notebook::NotebookIndex;
1818
use ruff_source_file::{SourceFile, SourceLocation};
1919
use ruff_text_size::{Ranged, TextRange, TextSize};
2020
pub use text::TextEmitter;
@@ -127,21 +127,21 @@ pub trait Emitter {
127127

128128
/// Context passed to [`Emitter`].
129129
pub struct EmitterContext<'a> {
130-
notebooks: &'a FxHashMap<String, Notebook>,
130+
notebook_indexes: &'a FxHashMap<String, NotebookIndex>,
131131
}
132132

133133
impl<'a> EmitterContext<'a> {
134-
pub fn new(notebooks: &'a FxHashMap<String, Notebook>) -> Self {
135-
Self { notebooks }
134+
pub fn new(notebook_indexes: &'a FxHashMap<String, NotebookIndex>) -> Self {
135+
Self { notebook_indexes }
136136
}
137137

138138
/// Tests if the file with `name` is a jupyter notebook.
139139
pub fn is_notebook(&self, name: &str) -> bool {
140-
self.notebooks.contains_key(name)
140+
self.notebook_indexes.contains_key(name)
141141
}
142142

143-
pub fn notebook(&self, name: &str) -> Option<&Notebook> {
144-
self.notebooks.get(name)
143+
pub fn notebook_index(&self, name: &str) -> Option<&NotebookIndex> {
144+
self.notebook_indexes.get(name)
145145
}
146146
}
147147

@@ -225,8 +225,8 @@ def fibonacci(n):
225225
emitter: &mut dyn Emitter,
226226
messages: &[Message],
227227
) -> String {
228-
let source_kinds = FxHashMap::default();
229-
let context = EmitterContext::new(&source_kinds);
228+
let notebook_indexes = FxHashMap::default();
229+
let context = EmitterContext::new(&notebook_indexes);
230230
let mut output: Vec<u8> = Vec::new();
231231
emitter.emit(&mut output, messages, &context).unwrap();
232232

crates/ruff/src/message/text.rs

Lines changed: 18 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ use annotate_snippets::snippet::{Annotation, AnnotationType, Slice, Snippet, Sou
77
use bitflags::bitflags;
88
use colored::Colorize;
99

10-
use ruff_notebook::{Notebook, NotebookIndex};
10+
use ruff_notebook::NotebookIndex;
1111
use ruff_source_file::{OneIndexed, SourceLocation};
1212
use ruff_text_size::{Ranged, TextRange, TextSize};
1313

@@ -71,22 +71,22 @@ impl Emitter for TextEmitter {
7171
)?;
7272

7373
let start_location = message.compute_start_location();
74-
let jupyter_index = context.notebook(message.filename()).map(Notebook::index);
74+
let notebook_index = context.notebook_index(message.filename());
7575

7676
// Check if we're working on a jupyter notebook and translate positions with cell accordingly
77-
let diagnostic_location = if let Some(jupyter_index) = jupyter_index {
77+
let diagnostic_location = if let Some(notebook_index) = notebook_index {
7878
write!(
7979
writer,
8080
"cell {cell}{sep}",
81-
cell = jupyter_index
81+
cell = notebook_index
8282
.cell(start_location.row.get())
8383
.unwrap_or_default(),
8484
sep = ":".cyan(),
8585
)?;
8686

8787
SourceLocation {
8888
row: OneIndexed::new(
89-
jupyter_index
89+
notebook_index
9090
.cell_row(start_location.row.get())
9191
.unwrap_or(1) as usize,
9292
)
@@ -115,7 +115,7 @@ impl Emitter for TextEmitter {
115115
"{}",
116116
MessageCodeFrame {
117117
message,
118-
jupyter_index
118+
notebook_index
119119
}
120120
)?;
121121
}
@@ -161,7 +161,7 @@ impl Display for RuleCodeAndBody<'_> {
161161

162162
pub(super) struct MessageCodeFrame<'a> {
163163
pub(crate) message: &'a Message,
164-
pub(crate) jupyter_index: Option<&'a NotebookIndex>,
164+
pub(crate) notebook_index: Option<&'a NotebookIndex>,
165165
}
166166

167167
impl Display for MessageCodeFrame<'_> {
@@ -186,14 +186,12 @@ impl Display for MessageCodeFrame<'_> {
186186
let content_start_index = source_code.line_index(range.start());
187187
let mut start_index = content_start_index.saturating_sub(2);
188188

189-
// If we're working on a jupyter notebook, skip the lines which are
189+
// If we're working with a Jupyter Notebook, skip the lines which are
190190
// outside of the cell containing the diagnostic.
191-
if let Some(jupyter_index) = self.jupyter_index {
192-
let content_start_cell = jupyter_index
193-
.cell(content_start_index.get())
194-
.unwrap_or_default();
191+
if let Some(index) = self.notebook_index {
192+
let content_start_cell = index.cell(content_start_index.get()).unwrap_or_default();
195193
while start_index < content_start_index {
196-
if jupyter_index.cell(start_index.get()).unwrap_or_default() == content_start_cell {
194+
if index.cell(start_index.get()).unwrap_or_default() == content_start_cell {
197195
break;
198196
}
199197
start_index = start_index.saturating_add(1);
@@ -213,14 +211,12 @@ impl Display for MessageCodeFrame<'_> {
213211
.saturating_add(2)
214212
.min(OneIndexed::from_zero_indexed(source_code.line_count()));
215213

216-
// If we're working on a jupyter notebook, skip the lines which are
214+
// If we're working with a Jupyter Notebook, skip the lines which are
217215
// outside of the cell containing the diagnostic.
218-
if let Some(jupyter_index) = self.jupyter_index {
219-
let content_end_cell = jupyter_index
220-
.cell(content_end_index.get())
221-
.unwrap_or_default();
216+
if let Some(index) = self.notebook_index {
217+
let content_end_cell = index.cell(content_end_index.get()).unwrap_or_default();
222218
while end_index > content_end_index {
223-
if jupyter_index.cell(end_index.get()).unwrap_or_default() == content_end_cell {
219+
if index.cell(end_index.get()).unwrap_or_default() == content_end_cell {
224220
break;
225221
}
226222
end_index = end_index.saturating_sub(1);
@@ -256,10 +252,10 @@ impl Display for MessageCodeFrame<'_> {
256252
title: None,
257253
slices: vec![Slice {
258254
source: &source.text,
259-
line_start: self.jupyter_index.map_or_else(
255+
line_start: self.notebook_index.map_or_else(
260256
|| start_index.get(),
261-
|jupyter_index| {
262-
jupyter_index
257+
|notebook_index| {
258+
notebook_index
263259
.cell_row(start_index.get())
264260
.unwrap_or_default() as usize
265261
},

crates/ruff/src/test.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -297,7 +297,7 @@ pub(crate) fn print_jupyter_messages(
297297
messages,
298298
&EmitterContext::new(&FxHashMap::from_iter([(
299299
path.file_name().unwrap().to_string_lossy().to_string(),
300-
notebook.clone(),
300+
notebook.index().clone(),
301301
)])),
302302
)
303303
.unwrap();

crates/ruff_cli/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,7 @@ colored = { workspace = true, features = ["no-color"]}
7575
insta = { workspace = true, features = ["filters"] }
7676
insta-cmd = { version = "0.4.0" }
7777
tempfile = "3.6.0"
78+
test-case = { workspace = true }
7879
ureq = { version = "2.6.2", features = [] }
7980

8081
[target.'cfg(target_os = "windows")'.dependencies]

crates/ruff_cli/src/cache.rs

Lines changed: 20 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -8,13 +8,15 @@ use std::sync::Mutex;
88
use std::time::{Duration, SystemTime};
99

1010
use anyhow::{Context, Result};
11+
use rustc_hash::FxHashMap;
1112
use serde::{Deserialize, Serialize};
1213

1314
use ruff::message::Message;
1415
use ruff::settings::Settings;
1516
use ruff::warn_user;
1617
use ruff_cache::{CacheKey, CacheKeyHasher};
1718
use ruff_diagnostics::{DiagnosticKind, Fix};
19+
use ruff_notebook::NotebookIndex;
1820
use ruff_python_ast::imports::ImportMap;
1921
use ruff_source_file::SourceFileBuilder;
2022
use ruff_text_size::{TextRange, TextSize};
@@ -193,6 +195,7 @@ impl Cache {
193195
key: T,
194196
messages: &[Message],
195197
imports: &ImportMap,
198+
notebook_index: Option<&NotebookIndex>,
196199
) {
197200
let source = if let Some(msg) = messages.first() {
198201
msg.file.source_text().to_owned()
@@ -226,6 +229,7 @@ impl Cache {
226229
imports: imports.clone(),
227230
messages,
228231
source,
232+
notebook_index: notebook_index.cloned(),
229233
};
230234
self.new_files.lock().unwrap().insert(path, file);
231235
}
@@ -263,6 +267,8 @@ pub(crate) struct FileCache {
263267
///
264268
/// This will be empty if `messages` is empty.
265269
source: String,
270+
/// Notebook index if this file is a Jupyter Notebook.
271+
notebook_index: Option<NotebookIndex>,
266272
}
267273

268274
impl FileCache {
@@ -283,7 +289,12 @@ impl FileCache {
283289
})
284290
.collect()
285291
};
286-
Diagnostics::new(messages, self.imports.clone())
292+
let notebook_indexes = if let Some(notebook_index) = self.notebook_index.as_ref() {
293+
FxHashMap::from_iter([(path.to_string_lossy().to_string(), notebook_index.clone())])
294+
} else {
295+
FxHashMap::default()
296+
};
297+
Diagnostics::new(messages, self.imports.clone(), notebook_indexes)
287298
}
288299
}
289300

@@ -350,16 +361,19 @@ mod tests {
350361
use anyhow::Result;
351362
use ruff_python_ast::imports::ImportMap;
352363

353-
#[test]
354-
fn same_results() {
364+
use test_case::test_case;
365+
366+
#[test_case("../ruff/resources/test/fixtures", "ruff_tests/cache_same_results_ruff"; "ruff_fixtures")]
367+
#[test_case("../ruff_notebook/resources/test/fixtures", "ruff_tests/cache_same_results_ruff_notebook"; "ruff_notebook_fixtures")]
368+
fn same_results(package_root: &str, cache_dir_path: &str) {
355369
let mut cache_dir = temp_dir();
356-
cache_dir.push("ruff_tests/cache_same_results");
370+
cache_dir.push(cache_dir_path);
357371
let _ = fs::remove_dir_all(&cache_dir);
358372
cache::init(&cache_dir).unwrap();
359373

360374
let settings = AllSettings::default();
361375

362-
let package_root = fs::canonicalize("../ruff/resources/test/fixtures").unwrap();
376+
let package_root = fs::canonicalize(package_root).unwrap();
363377
let cache = Cache::open(&cache_dir, package_root.clone(), &settings.lib);
364378
assert_eq!(cache.new_files.lock().unwrap().len(), 0);
365379

@@ -444,9 +458,6 @@ mod tests {
444458
.unwrap();
445459
}
446460

447-
// Not stored in the cache.
448-
expected_diagnostics.notebooks.clear();
449-
got_diagnostics.notebooks.clear();
450461
assert_eq!(expected_diagnostics, got_diagnostics);
451462
}
452463

@@ -614,6 +625,7 @@ mod tests {
614625
imports: ImportMap::new(),
615626
messages: Vec::new(),
616627
source: String::new(),
628+
notebook_index: None,
617629
},
618630
);
619631

crates/ruff_cli/src/commands/check.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ use itertools::Itertools;
1111
use log::{debug, error, warn};
1212
#[cfg(not(target_family = "wasm"))]
1313
use rayon::prelude::*;
14+
use rustc_hash::FxHashMap;
1415

1516
use ruff::message::Message;
1617
use ruff::registry::Rule;
@@ -156,6 +157,7 @@ pub(crate) fn check(
156157
TextSize::default(),
157158
)],
158159
ImportMap::default(),
160+
FxHashMap::default(),
159161
)
160162
} else {
161163
warn!(

0 commit comments

Comments
 (0)