From 47434d74a3df5c9fcfe0dd37696822925348c678 Mon Sep 17 00:00:00 2001 From: Julian Date: Sun, 30 Mar 2025 14:03:40 +0200 Subject: [PATCH 1/2] use internal adapters --- Cargo.lock | 1 - crates/pgt_lsp/Cargo.toml | 1 - crates/pgt_lsp/src/adapters/from_lsp.rs | 41 ++++ crates/pgt_lsp/src/adapters/line_index.rs | 144 ++++++++++++++ crates/pgt_lsp/src/adapters/mod.rs | 195 +++++++++++++++++++ crates/pgt_lsp/src/adapters/to_lsp.rs | 39 ++++ crates/pgt_lsp/src/capabilities.rs | 9 +- crates/pgt_lsp/src/documents.rs | 2 +- crates/pgt_lsp/src/handlers/helper.rs | 9 +- crates/pgt_lsp/src/handlers/text_document.rs | 4 +- crates/pgt_lsp/src/lib.rs | 1 + crates/pgt_lsp/src/session.rs | 2 +- crates/pgt_lsp/src/utils.rs | 21 +- 13 files changed, 444 insertions(+), 25 deletions(-) create mode 100644 crates/pgt_lsp/src/adapters/from_lsp.rs create mode 100644 crates/pgt_lsp/src/adapters/line_index.rs create mode 100644 crates/pgt_lsp/src/adapters/mod.rs create mode 100644 crates/pgt_lsp/src/adapters/to_lsp.rs diff --git a/Cargo.lock b/Cargo.lock index 032c60af..35bdd186 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2559,7 +2559,6 @@ dependencies = [ "pgt_console", "pgt_diagnostics", "pgt_fs", - "pgt_lsp_converters", "pgt_test_utils", "pgt_text_edit", "pgt_text_size", diff --git a/crates/pgt_lsp/Cargo.toml b/crates/pgt_lsp/Cargo.toml index 4fafc27f..ff3d00dc 100644 --- a/crates/pgt_lsp/Cargo.toml +++ b/crates/pgt_lsp/Cargo.toml @@ -21,7 +21,6 @@ pgt_configuration = { workspace = true } pgt_console = { workspace = true } pgt_diagnostics = { workspace = true } pgt_fs = { workspace = true } -pgt_lsp_converters = { workspace = true } pgt_text_edit = { workspace = true } pgt_text_size.workspace = true pgt_workspace = { workspace = true } diff --git a/crates/pgt_lsp/src/adapters/from_lsp.rs b/crates/pgt_lsp/src/adapters/from_lsp.rs new file mode 100644 index 00000000..ce764a9e --- /dev/null +++ b/crates/pgt_lsp/src/adapters/from_lsp.rs @@ -0,0 +1,41 @@ +use crate::adapters::line_index::LineIndex; +use crate::adapters::{LineCol, PositionEncoding, WideLineCol}; +use anyhow::{Context, Result}; +use pgt_text_size::{TextRange, TextSize}; +use tower_lsp::lsp_types; + +/// The function is used to convert a LSP position to TextSize. +pub fn offset( + line_index: &LineIndex, + position: lsp_types::Position, + position_encoding: PositionEncoding, +) -> Result { + let line_col = match position_encoding { + PositionEncoding::Utf8 => LineCol { + line: position.line, + col: position.character, + }, + PositionEncoding::Wide(enc) => { + let line_col = WideLineCol { + line: position.line, + col: position.character, + }; + line_index.to_utf8(enc, line_col) + } + }; + + line_index + .offset(line_col) + .with_context(|| format!("position {position:?} is out of range")) +} + +/// The function is used to convert a LSP range to TextRange. +pub fn text_range( + line_index: &LineIndex, + range: lsp_types::Range, + position_encoding: PositionEncoding, +) -> Result { + let start = offset(line_index, range.start, position_encoding)?; + let end = offset(line_index, range.end, position_encoding)?; + Ok(TextRange::new(start, end)) +} diff --git a/crates/pgt_lsp/src/adapters/line_index.rs b/crates/pgt_lsp/src/adapters/line_index.rs new file mode 100644 index 00000000..88b1351a --- /dev/null +++ b/crates/pgt_lsp/src/adapters/line_index.rs @@ -0,0 +1,144 @@ +//! `LineIndex` maps flat `TextSize` offsets into `(Line, Column)` +//! representation. + +use std::mem; + +use pgt_text_size::TextSize; +use rustc_hash::FxHashMap; + +use crate::adapters::{LineCol, WideChar, WideEncoding, WideLineCol}; + +#[derive(Clone, Debug, PartialEq, Eq)] +pub struct LineIndex { + /// Offset the beginning of each line, zero-based. + pub newlines: Vec, + /// List of non-ASCII characters on each line. + pub line_wide_chars: FxHashMap>, +} + +impl LineIndex { + pub fn new(text: &str) -> LineIndex { + let mut line_wide_chars = FxHashMap::default(); + let mut wide_chars = Vec::new(); + + let mut newlines = vec![TextSize::from(0)]; + + let mut current_col = TextSize::from(0); + + let mut line = 0; + for (offset, char) in text.char_indices() { + let char_size = TextSize::of(char); + + if char == '\n' { + // SAFETY: the conversion from `usize` to `TextSize` can fail if `offset` + // is larger than 2^32. We don't support such large files. + let char_offset = TextSize::try_from(offset).expect("TextSize overflow"); + newlines.push(char_offset + char_size); + + // Save any utf-16 characters seen in the previous line + if !wide_chars.is_empty() { + line_wide_chars.insert(line, mem::take(&mut wide_chars)); + } + + // Prepare for processing the next line + current_col = TextSize::from(0); + line += 1; + continue; + } + + if !char.is_ascii() { + wide_chars.push(WideChar { + start: current_col, + end: current_col + char_size, + }); + } + + current_col += char_size; + } + + // Save any utf-16 characters seen in the last line + if !wide_chars.is_empty() { + line_wide_chars.insert(line, wide_chars); + } + + LineIndex { + newlines, + line_wide_chars, + } + } + + /// Return the number of lines in the index, clamped to [u32::MAX] + pub fn len(&self) -> u32 { + self.newlines.len().try_into().unwrap_or(u32::MAX) + } + + /// Return `true` if the index contains no lines. + pub fn is_empty(&self) -> bool { + self.newlines.is_empty() + } + + pub fn line_col(&self, offset: TextSize) -> Option { + let line = self.newlines.partition_point(|&it| it <= offset) - 1; + let line_start_offset = self.newlines.get(line)?; + let col = offset - line_start_offset; + + Some(LineCol { + line: u32::try_from(line).ok()?, + col: col.into(), + }) + } + + pub fn offset(&self, line_col: LineCol) -> Option { + self.newlines + .get(line_col.line as usize) + .map(|offset| offset + TextSize::from(line_col.col)) + } + + pub fn to_wide(&self, enc: WideEncoding, line_col: LineCol) -> Option { + let col = self.utf8_to_wide_col(enc, line_col.line, line_col.col.into()); + Some(WideLineCol { + line: line_col.line, + col: u32::try_from(col).ok()?, + }) + } + + pub fn to_utf8(&self, enc: WideEncoding, line_col: WideLineCol) -> LineCol { + let col = self.wide_to_utf8_col(enc, line_col.line, line_col.col); + LineCol { + line: line_col.line, + col: col.into(), + } + } + + fn utf8_to_wide_col(&self, enc: WideEncoding, line: u32, col: TextSize) -> usize { + let mut res: usize = col.into(); + if let Some(wide_chars) = self.line_wide_chars.get(&line) { + for c in wide_chars { + if c.end <= col { + res -= usize::from(c.len()) - c.wide_len(enc); + } else { + // From here on, all utf16 characters come *after* the character we are mapping, + // so we don't need to take them into account + break; + } + } + } + res + } + + fn wide_to_utf8_col(&self, enc: WideEncoding, line: u32, mut col: u32) -> TextSize { + if let Some(wide_chars) = self.line_wide_chars.get(&line) { + for c in wide_chars { + if col > u32::from(c.start) { + col += u32::from(c.len()) - c.wide_len(enc) as u32; + } else { + // From here on, all utf16 characters come *after* the character we are mapping, + // so we don't need to take them into account + break; + } + } + } + + col.into() + } +} diff --git a/crates/pgt_lsp/src/adapters/mod.rs b/crates/pgt_lsp/src/adapters/mod.rs new file mode 100644 index 00000000..af1f10c2 --- /dev/null +++ b/crates/pgt_lsp/src/adapters/mod.rs @@ -0,0 +1,195 @@ +//! The crate contains a set of converters to translate between `lsp-types` and `text_size` (and vice versa) types. + +use pgt_text_size::TextSize; +use tower_lsp::lsp_types::{ClientCapabilities, PositionEncodingKind}; + +pub mod from_lsp; +pub mod line_index; +pub mod to_lsp; + +pub fn negotiated_encoding(capabilities: &ClientCapabilities) -> PositionEncoding { + let client_encodings = match &capabilities.general { + Some(general) => general.position_encodings.as_deref().unwrap_or_default(), + None => &[], + }; + + for enc in client_encodings { + if enc == &PositionEncodingKind::UTF8 { + return PositionEncoding::Utf8; + } else if enc == &PositionEncodingKind::UTF32 { + return PositionEncoding::Wide(WideEncoding::Utf32); + } + // NB: intentionally prefer just about anything else to utf-16. + } + + PositionEncoding::Wide(WideEncoding::Utf16) +} + +#[derive(Clone, Copy, Debug)] +pub enum PositionEncoding { + Utf8, + Wide(WideEncoding), +} + +#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] +pub enum WideEncoding { + Utf16, + Utf32, +} + +#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] +pub struct LineCol { + /// Zero-based + pub line: u32, + /// Zero-based utf8 offset + pub col: u32, +} + +/// Deliberately not a generic type and different from `LineCol`. +#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] +pub struct WideLineCol { + /// Zero-based + pub line: u32, + /// Zero-based + pub col: u32, +} + +#[derive(Clone, Debug, Hash, PartialEq, Eq)] +pub struct WideChar { + /// Start offset of a character inside a line, zero-based + pub start: TextSize, + /// End offset of a character inside a line, zero-based + pub end: TextSize, +} + +impl WideChar { + /// Returns the length in 8-bit UTF-8 code units. + fn len(&self) -> TextSize { + self.end - self.start + } + + /// Returns the length in UTF-16 or UTF-32 code units. + fn wide_len(&self, enc: WideEncoding) -> usize { + match enc { + WideEncoding::Utf16 => { + if self.len() == TextSize::from(4) { + 2 + } else { + 1 + } + } + + WideEncoding::Utf32 => 1, + } + } +} + +#[cfg(test)] +mod tests { + use crate::adapters::WideEncoding::{Utf16, Utf32}; + use crate::adapters::from_lsp::offset; + use crate::adapters::line_index::LineIndex; + use crate::adapters::to_lsp::position; + use crate::adapters::{LineCol, PositionEncoding, WideEncoding}; + use pgt_text_size::TextSize; + use tower_lsp::lsp_types::Position; + + macro_rules! check_conversion { + ($line_index:ident : $position:expr_2021 => $text_size:expr_2021 ) => { + let position_encoding = PositionEncoding::Wide(WideEncoding::Utf16); + + let offset = offset(&$line_index, $position, position_encoding).ok(); + assert_eq!(offset, Some($text_size)); + + let position = position(&$line_index, offset.unwrap(), position_encoding).ok(); + + assert_eq!(position, Some($position)); + }; + } + + #[test] + fn empty_string() { + let line_index = LineIndex::new(""); + check_conversion!(line_index: Position { line: 0, character: 0 } => TextSize::from(0)); + } + + #[test] + fn empty_line() { + let line_index = LineIndex::new("\n\n"); + check_conversion!(line_index: Position { line: 1, character: 0 } => TextSize::from(1)); + } + + #[test] + fn line_end() { + let line_index = LineIndex::new("abc\ndef\nghi"); + check_conversion!(line_index: Position { line: 1, character: 3 } => TextSize::from(7)); + } + + #[test] + fn out_of_bounds_line() { + let line_index = LineIndex::new("abcde\nfghij\n"); + + let offset = line_index.offset(LineCol { line: 5, col: 0 }); + assert!(offset.is_none()); + } + + #[test] + fn unicode() { + let line_index = LineIndex::new("'Jan 1, 2018 – Jan 1, 2019'"); + + check_conversion!(line_index: Position { line: 0, character: 0 } => TextSize::from(0)); + check_conversion!(line_index: Position { line: 0, character: 1 } => TextSize::from(1)); + check_conversion!(line_index: Position { line: 0, character: 12 } => TextSize::from(12)); + check_conversion!(line_index: Position { line: 0, character: 13 } => TextSize::from(15)); + check_conversion!(line_index: Position { line: 0, character: 14 } => TextSize::from(18)); + check_conversion!(line_index: Position { line: 0, character: 15 } => TextSize::from(21)); + check_conversion!(line_index: Position { line: 0, character: 26 } => TextSize::from(32)); + check_conversion!(line_index: Position { line: 0, character: 27 } => TextSize::from(33)); + } + + #[ignore] + #[test] + fn test_every_chars() { + let text: String = { + let mut chars: Vec = ((0 as char)..char::MAX).collect(); + chars.extend("\n".repeat(chars.len() / 16).chars()); + chars.into_iter().collect() + }; + + let line_index = LineIndex::new(&text); + + let mut lin_col = LineCol { line: 0, col: 0 }; + let mut col_utf16 = 0; + let mut col_utf32 = 0; + for (offset, char) in text.char_indices() { + let got_offset = line_index.offset(lin_col).unwrap(); + assert_eq!(usize::from(got_offset), offset); + + let got_lin_col = line_index.line_col(got_offset).unwrap(); + assert_eq!(got_lin_col, lin_col); + + for enc in [Utf16, Utf32] { + let wide_lin_col = line_index.to_wide(enc, lin_col).unwrap(); + let got_lin_col = line_index.to_utf8(enc, wide_lin_col); + assert_eq!(got_lin_col, lin_col); + + let want_col = match enc { + Utf16 => col_utf16, + Utf32 => col_utf32, + }; + assert_eq!(wide_lin_col.col, want_col) + } + + if char == '\n' { + lin_col.line += 1; + lin_col.col = 0; + col_utf16 = 0; + col_utf32 = 0; + } else { + lin_col.col += char.len_utf8() as u32; + col_utf16 += char.len_utf16() as u32; + col_utf32 += 1; + } + } + } +} diff --git a/crates/pgt_lsp/src/adapters/to_lsp.rs b/crates/pgt_lsp/src/adapters/to_lsp.rs new file mode 100644 index 00000000..71a6b3c4 --- /dev/null +++ b/crates/pgt_lsp/src/adapters/to_lsp.rs @@ -0,0 +1,39 @@ +use crate::adapters::PositionEncoding; +use crate::adapters::line_index::LineIndex; +use anyhow::{Context, Result}; +use pgt_text_size::{TextRange, TextSize}; +use tower_lsp::lsp_types; + +/// The function is used to convert TextSize to a LSP position. +pub fn position( + line_index: &LineIndex, + offset: TextSize, + position_encoding: PositionEncoding, +) -> Result { + let line_col = line_index + .line_col(offset) + .with_context(|| format!("could not convert offset {offset:?} into a line-column index"))?; + + let position = match position_encoding { + PositionEncoding::Utf8 => lsp_types::Position::new(line_col.line, line_col.col), + PositionEncoding::Wide(enc) => { + let line_col = line_index + .to_wide(enc, line_col) + .with_context(|| format!("could not convert {line_col:?} into wide line column"))?; + lsp_types::Position::new(line_col.line, line_col.col) + } + }; + + Ok(position) +} + +/// The function is used to convert TextRange to a LSP range. +pub fn range( + line_index: &LineIndex, + range: TextRange, + position_encoding: PositionEncoding, +) -> Result { + let start = position(line_index, range.start(), position_encoding)?; + let end = position(line_index, range.end(), position_encoding)?; + Ok(lsp_types::Range::new(start, end)) +} diff --git a/crates/pgt_lsp/src/capabilities.rs b/crates/pgt_lsp/src/capabilities.rs index 6bc81084..0b64788e 100644 --- a/crates/pgt_lsp/src/capabilities.rs +++ b/crates/pgt_lsp/src/capabilities.rs @@ -1,11 +1,10 @@ -use pgt_lsp_converters::{PositionEncoding, WideEncoding, negotiated_encoding}; +use crate::adapters::{PositionEncoding, WideEncoding, negotiated_encoding}; use pgt_workspace::code_actions::{CommandActionCategory, CommandActionCategoryIter}; use strum::{EnumIter, IntoEnumIterator}; use tower_lsp::lsp_types::{ - ClientCapabilities, CodeActionOptions, CompletionOptions, ExecuteCommandOptions, - PositionEncodingKind, SaveOptions, ServerCapabilities, TextDocumentSyncCapability, - TextDocumentSyncKind, TextDocumentSyncOptions, TextDocumentSyncSaveOptions, - WorkDoneProgressOptions, + ClientCapabilities, CompletionOptions, ExecuteCommandOptions, PositionEncodingKind, + SaveOptions, ServerCapabilities, TextDocumentSyncCapability, TextDocumentSyncKind, + TextDocumentSyncOptions, TextDocumentSyncSaveOptions, WorkDoneProgressOptions, }; use crate::handlers::code_actions::command_id; diff --git a/crates/pgt_lsp/src/documents.rs b/crates/pgt_lsp/src/documents.rs index f5a2c72c..c0cf85f9 100644 --- a/crates/pgt_lsp/src/documents.rs +++ b/crates/pgt_lsp/src/documents.rs @@ -1,4 +1,4 @@ -use pgt_lsp_converters::line_index::LineIndex; +use crate::adapters::line_index::LineIndex; /// Represents an open [`textDocument`]. Can be cheaply cloned. /// diff --git a/crates/pgt_lsp/src/handlers/helper.rs b/crates/pgt_lsp/src/handlers/helper.rs index 99de22f1..1fe01036 100644 --- a/crates/pgt_lsp/src/handlers/helper.rs +++ b/crates/pgt_lsp/src/handlers/helper.rs @@ -1,4 +1,7 @@ -use crate::session::Session; +use crate::{ + adapters::{self, from_lsp}, + session::Session, +}; use pgt_text_size::TextSize; use tower_lsp::lsp_types; @@ -16,10 +19,10 @@ pub fn get_cursor_position( .map(|doc| doc.line_index) .map_err(|_| anyhow::anyhow!("Document not found."))?; - let cursor_pos = pgt_lsp_converters::from_proto::offset( + let cursor_pos = from_lsp::offset( &line_index, position, - pgt_lsp_converters::negotiated_encoding(client_capabilities), + adapters::negotiated_encoding(client_capabilities), )?; Ok(cursor_pos) diff --git a/crates/pgt_lsp/src/handlers/text_document.rs b/crates/pgt_lsp/src/handlers/text_document.rs index 2dadcf02..63250ef5 100644 --- a/crates/pgt_lsp/src/handlers/text_document.rs +++ b/crates/pgt_lsp/src/handlers/text_document.rs @@ -1,8 +1,8 @@ +use crate::adapters::from_lsp; use crate::{ diagnostics::LspError, documents::Document, session::Session, utils::apply_document_changes, }; use anyhow::Result; -use pgt_lsp_converters::from_proto::text_range; use pgt_workspace::workspace::{ ChangeFileParams, ChangeParams, CloseFileParams, GetFileContentParams, OpenFileParams, }; @@ -73,7 +73,7 @@ pub(crate) async fn did_change( .iter() .map(|c| ChangeParams { range: c.range.and_then(|r| { - text_range(&old_doc.line_index, r, session.position_encoding()).ok() + from_lsp::text_range(&old_doc.line_index, r, session.position_encoding()).ok() }), text: c.text.clone(), }) diff --git a/crates/pgt_lsp/src/lib.rs b/crates/pgt_lsp/src/lib.rs index 99db526f..cc014313 100644 --- a/crates/pgt_lsp/src/lib.rs +++ b/crates/pgt_lsp/src/lib.rs @@ -1,3 +1,4 @@ +mod adapters; mod capabilities; mod diagnostics; mod documents; diff --git a/crates/pgt_lsp/src/session.rs b/crates/pgt_lsp/src/session.rs index 70f9ac88..add71c92 100644 --- a/crates/pgt_lsp/src/session.rs +++ b/crates/pgt_lsp/src/session.rs @@ -1,3 +1,4 @@ +use crate::adapters::{PositionEncoding, WideEncoding, negotiated_encoding}; use crate::diagnostics::LspError; use crate::documents::Document; use crate::utils; @@ -8,7 +9,6 @@ use pgt_analyse::RuleCategoriesBuilder; use pgt_configuration::ConfigurationPathHint; use pgt_diagnostics::{DiagnosticExt, Error}; use pgt_fs::{FileSystem, PgTPath}; -use pgt_lsp_converters::{PositionEncoding, WideEncoding, negotiated_encoding}; use pgt_workspace::Workspace; use pgt_workspace::configuration::{LoadedConfiguration, load_configuration}; use pgt_workspace::settings::PartialConfigurationExt; diff --git a/crates/pgt_lsp/src/utils.rs b/crates/pgt_lsp/src/utils.rs index 638b4d57..a27dfecb 100644 --- a/crates/pgt_lsp/src/utils.rs +++ b/crates/pgt_lsp/src/utils.rs @@ -1,11 +1,11 @@ +use crate::adapters::line_index::LineIndex; +use crate::adapters::{PositionEncoding, from_lsp, to_lsp}; use anyhow::{Context, Result, ensure}; use pgt_console::MarkupBuf; use pgt_console::fmt::Termcolor; use pgt_console::fmt::{self, Formatter}; use pgt_diagnostics::termcolor::NoColor; use pgt_diagnostics::{Diagnostic, DiagnosticTags, Location, PrintDescription, Severity, Visit}; -use pgt_lsp_converters::line_index::LineIndex; -use pgt_lsp_converters::{PositionEncoding, from_proto, to_proto}; use pgt_text_edit::{CompressedOp, DiffOp, TextEdit}; use pgt_text_size::{TextRange, TextSize}; use std::any::Any; @@ -37,7 +37,7 @@ pub(crate) fn text_edit( offset += range.len(); } CompressedOp::DiffOp(DiffOp::Insert { range }) => { - let start = to_proto::position(line_index, offset, position_encoding)?; + let start = to_lsp::position(line_index, offset, position_encoding)?; // Merge with a previous delete operation if possible let last_edit = result.last_mut().filter(|text_edit| { @@ -54,9 +54,9 @@ pub(crate) fn text_edit( } } CompressedOp::DiffOp(DiffOp::Delete { range }) => { - let start = to_proto::position(line_index, offset, position_encoding)?; + let start = to_lsp::position(line_index, offset, position_encoding)?; offset += range.len(); - let end = to_proto::position(line_index, offset, position_encoding)?; + let end = to_lsp::position(line_index, offset, position_encoding)?; result.push(lsp::TextEdit { range: lsp::Range::new(start, end), @@ -108,7 +108,7 @@ pub(crate) fn diagnostic_to_lsp( } else { span }; - let span = to_proto::range(line_index, span, position_encoding) + let span = to_lsp::range(line_index, span, position_encoding) .context("failed to convert diagnostic span to LSP range")?; let severity = match diagnostic.severity() { @@ -189,7 +189,7 @@ impl Visit for RelatedInformationVisitor<'_> { None => return Ok(()), }; - let range = match to_proto::range(self.line_index, span, self.position_encoding) { + let range = match to_lsp::range(self.line_index, span, self.position_encoding) { Ok(range) => range, Err(_) => return Ok(()), }; @@ -293,7 +293,7 @@ pub(crate) fn apply_document_changes( line_index = LineIndex::new(&text); } index_valid = range.start.line; - if let Ok(range) = from_proto::text_range(&line_index, range, position_encoding) { + if let Ok(range) = from_lsp::text_range(&line_index, range, position_encoding) { text.replace_range(Range::::from(range), &change.text); } } @@ -304,9 +304,8 @@ pub(crate) fn apply_document_changes( #[cfg(test)] mod tests { - - use pgt_lsp_converters::PositionEncoding; - use pgt_lsp_converters::line_index::LineIndex; + use crate::adapters::PositionEncoding; + use crate::adapters::line_index::LineIndex; use pgt_text_edit::TextEdit; use tower_lsp::lsp_types as lsp; From 13a3f6bc4bafc39e9037b4ad354c4832541c6564 Mon Sep 17 00:00:00 2001 From: Julian Date: Sun, 30 Mar 2025 14:05:40 +0200 Subject: [PATCH 2/2] remove crate --- Cargo.lock | 10 - crates/pgt_lsp_converters/Cargo.toml | 23 --- crates/pgt_lsp_converters/src/from_proto.rs | 41 ---- crates/pgt_lsp_converters/src/lib.rs | 195 -------------------- crates/pgt_lsp_converters/src/line_index.rs | 144 --------------- crates/pgt_lsp_converters/src/to_proto.rs | 39 ---- 6 files changed, 452 deletions(-) delete mode 100644 crates/pgt_lsp_converters/Cargo.toml delete mode 100644 crates/pgt_lsp_converters/src/from_proto.rs delete mode 100644 crates/pgt_lsp_converters/src/lib.rs delete mode 100644 crates/pgt_lsp_converters/src/line_index.rs delete mode 100644 crates/pgt_lsp_converters/src/to_proto.rs diff --git a/Cargo.lock b/Cargo.lock index 35bdd186..40e1549b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2574,16 +2574,6 @@ dependencies = [ "tracing", ] -[[package]] -name = "pgt_lsp_converters" -version = "0.0.0" -dependencies = [ - "anyhow", - "pgt_text_size", - "rustc-hash 2.1.0", - "tower-lsp", -] - [[package]] name = "pgt_markup" version = "0.0.0" diff --git a/crates/pgt_lsp_converters/Cargo.toml b/crates/pgt_lsp_converters/Cargo.toml deleted file mode 100644 index ce3415f3..00000000 --- a/crates/pgt_lsp_converters/Cargo.toml +++ /dev/null @@ -1,23 +0,0 @@ -[package] -authors.workspace = true -categories.workspace = true -description = "" -edition.workspace = true -homepage.workspace = true -keywords.workspace = true -license.workspace = true -name = "pgt_lsp_converters" -repository.workspace = true -version = "0.0.0" - - -[dependencies] -anyhow = { workspace = true } -pgt_text_size.workspace = true -rustc-hash = { workspace = true } -tower-lsp = { version = "0.20.0" } - -[dev-dependencies] - -[lib] -doctest = false diff --git a/crates/pgt_lsp_converters/src/from_proto.rs b/crates/pgt_lsp_converters/src/from_proto.rs deleted file mode 100644 index 3968073d..00000000 --- a/crates/pgt_lsp_converters/src/from_proto.rs +++ /dev/null @@ -1,41 +0,0 @@ -use crate::line_index::LineIndex; -use crate::{LineCol, PositionEncoding, WideLineCol}; -use anyhow::{Context, Result}; -use pgt_text_size::{TextRange, TextSize}; -use tower_lsp::lsp_types; - -/// The function is used to convert a LSP position to TextSize. -pub fn offset( - line_index: &LineIndex, - position: lsp_types::Position, - position_encoding: PositionEncoding, -) -> Result { - let line_col = match position_encoding { - PositionEncoding::Utf8 => LineCol { - line: position.line, - col: position.character, - }, - PositionEncoding::Wide(enc) => { - let line_col = WideLineCol { - line: position.line, - col: position.character, - }; - line_index.to_utf8(enc, line_col) - } - }; - - line_index - .offset(line_col) - .with_context(|| format!("position {position:?} is out of range")) -} - -/// The function is used to convert a LSP range to TextRange. -pub fn text_range( - line_index: &LineIndex, - range: lsp_types::Range, - position_encoding: PositionEncoding, -) -> Result { - let start = offset(line_index, range.start, position_encoding)?; - let end = offset(line_index, range.end, position_encoding)?; - Ok(TextRange::new(start, end)) -} diff --git a/crates/pgt_lsp_converters/src/lib.rs b/crates/pgt_lsp_converters/src/lib.rs deleted file mode 100644 index fe0495ae..00000000 --- a/crates/pgt_lsp_converters/src/lib.rs +++ /dev/null @@ -1,195 +0,0 @@ -//! The crate contains a set of converters to translate between `lsp-types` and `text_size` (and vice versa) types. - -use pgt_text_size::TextSize; -use tower_lsp::lsp_types::{ClientCapabilities, PositionEncodingKind}; - -pub mod from_proto; -pub mod line_index; -pub mod to_proto; - -pub fn negotiated_encoding(capabilities: &ClientCapabilities) -> PositionEncoding { - let client_encodings = match &capabilities.general { - Some(general) => general.position_encodings.as_deref().unwrap_or_default(), - None => &[], - }; - - for enc in client_encodings { - if enc == &PositionEncodingKind::UTF8 { - return PositionEncoding::Utf8; - } else if enc == &PositionEncodingKind::UTF32 { - return PositionEncoding::Wide(WideEncoding::Utf32); - } - // NB: intentionally prefer just about anything else to utf-16. - } - - PositionEncoding::Wide(WideEncoding::Utf16) -} - -#[derive(Clone, Copy, Debug)] -pub enum PositionEncoding { - Utf8, - Wide(WideEncoding), -} - -#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] -pub enum WideEncoding { - Utf16, - Utf32, -} - -#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] -pub struct LineCol { - /// Zero-based - pub line: u32, - /// Zero-based utf8 offset - pub col: u32, -} - -/// Deliberately not a generic type and different from `LineCol`. -#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] -pub struct WideLineCol { - /// Zero-based - pub line: u32, - /// Zero-based - pub col: u32, -} - -#[derive(Clone, Debug, Hash, PartialEq, Eq)] -pub struct WideChar { - /// Start offset of a character inside a line, zero-based - pub start: TextSize, - /// End offset of a character inside a line, zero-based - pub end: TextSize, -} - -impl WideChar { - /// Returns the length in 8-bit UTF-8 code units. - fn len(&self) -> TextSize { - self.end - self.start - } - - /// Returns the length in UTF-16 or UTF-32 code units. - fn wide_len(&self, enc: WideEncoding) -> usize { - match enc { - WideEncoding::Utf16 => { - if self.len() == TextSize::from(4) { - 2 - } else { - 1 - } - } - - WideEncoding::Utf32 => 1, - } - } -} - -#[cfg(test)] -mod tests { - use crate::WideEncoding::{Utf16, Utf32}; - use crate::from_proto::offset; - use crate::line_index::LineIndex; - use crate::to_proto::position; - use crate::{LineCol, PositionEncoding, WideEncoding}; - use pgt_text_size::TextSize; - use tower_lsp::lsp_types::Position; - - macro_rules! check_conversion { - ($line_index:ident : $position:expr_2021 => $text_size:expr_2021 ) => { - let position_encoding = PositionEncoding::Wide(WideEncoding::Utf16); - - let offset = offset(&$line_index, $position, position_encoding).ok(); - assert_eq!(offset, Some($text_size)); - - let position = position(&$line_index, offset.unwrap(), position_encoding).ok(); - - assert_eq!(position, Some($position)); - }; - } - - #[test] - fn empty_string() { - let line_index = LineIndex::new(""); - check_conversion!(line_index: Position { line: 0, character: 0 } => TextSize::from(0)); - } - - #[test] - fn empty_line() { - let line_index = LineIndex::new("\n\n"); - check_conversion!(line_index: Position { line: 1, character: 0 } => TextSize::from(1)); - } - - #[test] - fn line_end() { - let line_index = LineIndex::new("abc\ndef\nghi"); - check_conversion!(line_index: Position { line: 1, character: 3 } => TextSize::from(7)); - } - - #[test] - fn out_of_bounds_line() { - let line_index = LineIndex::new("abcde\nfghij\n"); - - let offset = line_index.offset(LineCol { line: 5, col: 0 }); - assert!(offset.is_none()); - } - - #[test] - fn unicode() { - let line_index = LineIndex::new("'Jan 1, 2018 – Jan 1, 2019'"); - - check_conversion!(line_index: Position { line: 0, character: 0 } => TextSize::from(0)); - check_conversion!(line_index: Position { line: 0, character: 1 } => TextSize::from(1)); - check_conversion!(line_index: Position { line: 0, character: 12 } => TextSize::from(12)); - check_conversion!(line_index: Position { line: 0, character: 13 } => TextSize::from(15)); - check_conversion!(line_index: Position { line: 0, character: 14 } => TextSize::from(18)); - check_conversion!(line_index: Position { line: 0, character: 15 } => TextSize::from(21)); - check_conversion!(line_index: Position { line: 0, character: 26 } => TextSize::from(32)); - check_conversion!(line_index: Position { line: 0, character: 27 } => TextSize::from(33)); - } - - #[ignore] - #[test] - fn test_every_chars() { - let text: String = { - let mut chars: Vec = ((0 as char)..char::MAX).collect(); - chars.extend("\n".repeat(chars.len() / 16).chars()); - chars.into_iter().collect() - }; - - let line_index = LineIndex::new(&text); - - let mut lin_col = LineCol { line: 0, col: 0 }; - let mut col_utf16 = 0; - let mut col_utf32 = 0; - for (offset, char) in text.char_indices() { - let got_offset = line_index.offset(lin_col).unwrap(); - assert_eq!(usize::from(got_offset), offset); - - let got_lin_col = line_index.line_col(got_offset).unwrap(); - assert_eq!(got_lin_col, lin_col); - - for enc in [Utf16, Utf32] { - let wide_lin_col = line_index.to_wide(enc, lin_col).unwrap(); - let got_lin_col = line_index.to_utf8(enc, wide_lin_col); - assert_eq!(got_lin_col, lin_col); - - let want_col = match enc { - Utf16 => col_utf16, - Utf32 => col_utf32, - }; - assert_eq!(wide_lin_col.col, want_col) - } - - if char == '\n' { - lin_col.line += 1; - lin_col.col = 0; - col_utf16 = 0; - col_utf32 = 0; - } else { - lin_col.col += char.len_utf8() as u32; - col_utf16 += char.len_utf16() as u32; - col_utf32 += 1; - } - } - } -} diff --git a/crates/pgt_lsp_converters/src/line_index.rs b/crates/pgt_lsp_converters/src/line_index.rs deleted file mode 100644 index d1ae6289..00000000 --- a/crates/pgt_lsp_converters/src/line_index.rs +++ /dev/null @@ -1,144 +0,0 @@ -//! `LineIndex` maps flat `TextSize` offsets into `(Line, Column)` -//! representation. - -use std::mem; - -use pgt_text_size::TextSize; -use rustc_hash::FxHashMap; - -use crate::{LineCol, WideChar, WideEncoding, WideLineCol}; - -#[derive(Clone, Debug, PartialEq, Eq)] -pub struct LineIndex { - /// Offset the beginning of each line, zero-based. - pub newlines: Vec, - /// List of non-ASCII characters on each line. - pub line_wide_chars: FxHashMap>, -} - -impl LineIndex { - pub fn new(text: &str) -> LineIndex { - let mut line_wide_chars = FxHashMap::default(); - let mut wide_chars = Vec::new(); - - let mut newlines = vec![TextSize::from(0)]; - - let mut current_col = TextSize::from(0); - - let mut line = 0; - for (offset, char) in text.char_indices() { - let char_size = TextSize::of(char); - - if char == '\n' { - // SAFETY: the conversion from `usize` to `TextSize` can fail if `offset` - // is larger than 2^32. We don't support such large files. - let char_offset = TextSize::try_from(offset).expect("TextSize overflow"); - newlines.push(char_offset + char_size); - - // Save any utf-16 characters seen in the previous line - if !wide_chars.is_empty() { - line_wide_chars.insert(line, mem::take(&mut wide_chars)); - } - - // Prepare for processing the next line - current_col = TextSize::from(0); - line += 1; - continue; - } - - if !char.is_ascii() { - wide_chars.push(WideChar { - start: current_col, - end: current_col + char_size, - }); - } - - current_col += char_size; - } - - // Save any utf-16 characters seen in the last line - if !wide_chars.is_empty() { - line_wide_chars.insert(line, wide_chars); - } - - LineIndex { - newlines, - line_wide_chars, - } - } - - /// Return the number of lines in the index, clamped to [u32::MAX] - pub fn len(&self) -> u32 { - self.newlines.len().try_into().unwrap_or(u32::MAX) - } - - /// Return `true` if the index contains no lines. - pub fn is_empty(&self) -> bool { - self.newlines.is_empty() - } - - pub fn line_col(&self, offset: TextSize) -> Option { - let line = self.newlines.partition_point(|&it| it <= offset) - 1; - let line_start_offset = self.newlines.get(line)?; - let col = offset - line_start_offset; - - Some(LineCol { - line: u32::try_from(line).ok()?, - col: col.into(), - }) - } - - pub fn offset(&self, line_col: LineCol) -> Option { - self.newlines - .get(line_col.line as usize) - .map(|offset| offset + TextSize::from(line_col.col)) - } - - pub fn to_wide(&self, enc: WideEncoding, line_col: LineCol) -> Option { - let col = self.utf8_to_wide_col(enc, line_col.line, line_col.col.into()); - Some(WideLineCol { - line: line_col.line, - col: u32::try_from(col).ok()?, - }) - } - - pub fn to_utf8(&self, enc: WideEncoding, line_col: WideLineCol) -> LineCol { - let col = self.wide_to_utf8_col(enc, line_col.line, line_col.col); - LineCol { - line: line_col.line, - col: col.into(), - } - } - - fn utf8_to_wide_col(&self, enc: WideEncoding, line: u32, col: TextSize) -> usize { - let mut res: usize = col.into(); - if let Some(wide_chars) = self.line_wide_chars.get(&line) { - for c in wide_chars { - if c.end <= col { - res -= usize::from(c.len()) - c.wide_len(enc); - } else { - // From here on, all utf16 characters come *after* the character we are mapping, - // so we don't need to take them into account - break; - } - } - } - res - } - - fn wide_to_utf8_col(&self, enc: WideEncoding, line: u32, mut col: u32) -> TextSize { - if let Some(wide_chars) = self.line_wide_chars.get(&line) { - for c in wide_chars { - if col > u32::from(c.start) { - col += u32::from(c.len()) - c.wide_len(enc) as u32; - } else { - // From here on, all utf16 characters come *after* the character we are mapping, - // so we don't need to take them into account - break; - } - } - } - - col.into() - } -} diff --git a/crates/pgt_lsp_converters/src/to_proto.rs b/crates/pgt_lsp_converters/src/to_proto.rs deleted file mode 100644 index ea5f5869..00000000 --- a/crates/pgt_lsp_converters/src/to_proto.rs +++ /dev/null @@ -1,39 +0,0 @@ -use crate::PositionEncoding; -use crate::line_index::LineIndex; -use anyhow::{Context, Result}; -use pgt_text_size::{TextRange, TextSize}; -use tower_lsp::lsp_types; - -/// The function is used to convert TextSize to a LSP position. -pub fn position( - line_index: &LineIndex, - offset: TextSize, - position_encoding: PositionEncoding, -) -> Result { - let line_col = line_index - .line_col(offset) - .with_context(|| format!("could not convert offset {offset:?} into a line-column index"))?; - - let position = match position_encoding { - PositionEncoding::Utf8 => lsp_types::Position::new(line_col.line, line_col.col), - PositionEncoding::Wide(enc) => { - let line_col = line_index - .to_wide(enc, line_col) - .with_context(|| format!("could not convert {line_col:?} into wide line column"))?; - lsp_types::Position::new(line_col.line, line_col.col) - } - }; - - Ok(position) -} - -/// The function is used to convert TextRange to a LSP range. -pub fn range( - line_index: &LineIndex, - range: TextRange, - position_encoding: PositionEncoding, -) -> Result { - let start = position(line_index, range.start(), position_encoding)?; - let end = position(line_index, range.end(), position_encoding)?; - Ok(lsp_types::Range::new(start, end)) -}