diff --git a/Cargo.lock b/Cargo.lock index 02ead9b8cf75c..dbb4f2da5664f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2235,7 +2235,6 @@ dependencies = [ "drop_bomb", "insta", "ruff_text_size", - "rustc-hash", "schemars", "serde", "static_assertions", diff --git a/crates/ruff_formatter/Cargo.toml b/crates/ruff_formatter/Cargo.toml index 88f5bbf892df4..e1adb79752420 100644 --- a/crates/ruff_formatter/Cargo.toml +++ b/crates/ruff_formatter/Cargo.toml @@ -14,7 +14,6 @@ license = { workspace = true } ruff_text_size = { path = "../ruff_text_size" } drop_bomb = { version = "0.1.5" } -rustc-hash = { workspace = true } schemars = { workspace = true, optional = true } serde = { workspace = true, optional = true } static_assertions = { workspace = true } diff --git a/crates/ruff_formatter/src/buffer.rs b/crates/ruff_formatter/src/buffer.rs index 80ba8f15e7e36..024e3335f8c42 100644 --- a/crates/ruff_formatter/src/buffer.rs +++ b/crates/ruff_formatter/src/buffer.rs @@ -1,12 +1,12 @@ -use super::{write, Arguments, FormatElement}; -use crate::format_element::Interned; -use crate::prelude::LineMode; -use crate::{FormatResult, FormatState}; -use rustc_hash::FxHashMap; use std::any::{Any, TypeId}; use std::fmt::Debug; use std::ops::{Deref, DerefMut}; +use crate::prelude::LineMode; +use crate::{FormatResult, FormatState}; + +use super::{write, Arguments, FormatElement}; + /// A trait for writing or formatting into [`FormatElement`]-accepting buffers or streams. pub trait Buffer { /// The context used during formatting @@ -341,89 +341,12 @@ where /// ``` pub struct RemoveSoftLinesBuffer<'a, Context> { inner: &'a mut dyn Buffer, - - /// Caches the interned elements after the soft line breaks have been removed. - /// - /// The `key` is the [Interned] element as it has been passed to [Self::write_element] or the child of another - /// [Interned] element. The `value` is the matching document of the key where all soft line breaks have been removed. - /// - /// It's fine to not snapshot the cache. The worst that can happen is that it holds on interned elements - /// that are now unused. But there's little harm in that and the cache is cleaned when dropping the buffer. - interned_cache: FxHashMap, } impl<'a, Context> RemoveSoftLinesBuffer<'a, Context> { /// Creates a new buffer that removes the soft line breaks before writing them into `buffer`. pub fn new(inner: &'a mut dyn Buffer) -> Self { - Self { - inner, - interned_cache: FxHashMap::default(), - } - } - - /// Removes the soft line breaks from an interned element. - fn clean_interned(&mut self, interned: &Interned) -> Interned { - clean_interned(interned, &mut self.interned_cache) - } -} - -// Extracted to function to avoid monomorphization -fn clean_interned( - interned: &Interned, - interned_cache: &mut FxHashMap, -) -> Interned { - if let Some(cleaned) = interned_cache.get(interned) { - cleaned.clone() - } else { - // Find the first soft line break element or interned element that must be changed - let result = interned - .iter() - .enumerate() - .find_map(|(index, element)| match element { - FormatElement::Line(LineMode::Soft | LineMode::SoftOrSpace) => { - let mut cleaned = Vec::new(); - cleaned.extend_from_slice(&interned[..index]); - Some((cleaned, &interned[index..])) - } - FormatElement::Interned(inner) => { - let cleaned_inner = clean_interned(inner, interned_cache); - - if &cleaned_inner == inner { - None - } else { - let mut cleaned = Vec::with_capacity(interned.len()); - cleaned.extend_from_slice(&interned[..index]); - cleaned.push(FormatElement::Interned(cleaned_inner)); - Some((cleaned, &interned[index + 1..])) - } - } - - _ => None, - }); - - let result = match result { - // Copy the whole interned buffer so that becomes possible to change the necessary elements. - Some((mut cleaned, rest)) => { - for element in rest { - let element = match element { - FormatElement::Line(LineMode::Soft) => continue, - FormatElement::Line(LineMode::SoftOrSpace) => FormatElement::Space, - FormatElement::Interned(interned) => { - FormatElement::Interned(clean_interned(interned, interned_cache)) - } - element => element.clone(), - }; - cleaned.push(element); - } - - Interned::new(cleaned) - } - // No change necessary, return existing interned element - None => interned.clone(), - }; - - interned_cache.insert(interned.clone(), result.clone()); - result + Self { inner } } } @@ -434,9 +357,6 @@ impl Buffer for RemoveSoftLinesBuffer<'_, Context> { let element = match element { FormatElement::Line(LineMode::Soft) => return, FormatElement::Line(LineMode::SoftOrSpace) => FormatElement::Space, - FormatElement::Interned(interned) => { - FormatElement::Interned(self.clean_interned(&interned)) - } element => element, }; @@ -574,10 +494,16 @@ where #[derive(Debug, Copy, Clone)] pub struct Recorded<'a>(&'a [FormatElement]); +impl<'a> Recorded<'a> { + pub fn as_slice(self) -> &'a [FormatElement] { + self.0 + } +} + impl Deref for Recorded<'_> { type Target = [FormatElement]; fn deref(&self) -> &Self::Target { - self.0 + self.as_slice() } } diff --git a/crates/ruff_formatter/src/diagnostics.rs b/crates/ruff_formatter/src/diagnostics.rs index 7c9944a562d39..1213549049e49 100644 --- a/crates/ruff_formatter/src/diagnostics.rs +++ b/crates/ruff_formatter/src/diagnostics.rs @@ -1,3 +1,4 @@ +use crate::interned_id::InternedId; use crate::prelude::TagKind; use crate::GroupId; use ruff_text_size::TextRange; @@ -101,7 +102,11 @@ pub enum InvalidDocumentError { }, UnknownGroupId { - group_id: GroupId, + id: GroupId, + }, + + UnknownInternedId { + id: InternedId, }, } @@ -155,8 +160,11 @@ impl std::fmt::Display for InvalidDocumentError { } } } - InvalidDocumentError::UnknownGroupId { group_id } => { - std::write!(f, "Encountered unknown group id {group_id:?}. Ensure that the group with the id {group_id:?} exists and that the group is a parent of or comes before the element referring to it.") + InvalidDocumentError::UnknownGroupId { id } => { + std::write!(f, "Encountered unknown group id {id:?}. Ensure that the group with the id {id:?} exists and that the group is a parent of or comes before the element referring to it.") + } + InvalidDocumentError::UnknownInternedId { id } => { + std::write!(f, "Encountered unknown interned id {id:?}. Ensure that the interned element with the id {id:?} exists and that appears before the element referring to it and isn't recursive.") } } } diff --git a/crates/ruff_formatter/src/format_element.rs b/crates/ruff_formatter/src/format_element.rs index f9fe281df3fde..70eaf049749f9 100644 --- a/crates/ruff_formatter/src/format_element.rs +++ b/crates/ruff_formatter/src/format_element.rs @@ -1,18 +1,19 @@ -pub mod document; -pub mod tag; - use std::borrow::Cow; -use std::hash::{Hash, Hasher}; use std::iter::FusedIterator; use std::num::NonZeroU32; use std::ops::Deref; -use std::rc::Rc; + use unicode_width::UnicodeWidthChar; +use ruff_text_size::TextSize; + use crate::format_element::tag::{GroupMode, LabelId, Tag}; +use crate::interned_id::InternedId; use crate::source_code::SourceCodeSlice; use crate::{IndentWidth, TagKind}; -use ruff_text_size::TextSize; + +pub mod document; +pub mod tag; /// Language agnostic IR for formatting source code. /// @@ -34,7 +35,9 @@ pub enum FormatElement { SourcePosition(TextSize), /// A ASCII only Token that contains no line breaks or tab characters. - Token { text: &'static str }, + Token { + text: &'static str, + }, /// An arbitrary text that can contain tabs, newlines, and unicode characters. Text { @@ -53,9 +56,7 @@ pub enum FormatElement { /// line suffixes, potentially by inserting a hard line break. LineSuffixBoundary, - /// An interned format element. Useful when the same content must be emitted multiple times to avoid - /// deep cloning the IR when using the `best_fitting!` macro or `if_group_fits_on_line` and `if_group_breaks`. - Interned(Interned), + Reference(InternedId), /// A list of different variants representing the same content. The printer picks the best fitting content. /// Line breaks inside of a best fitting don't propagate to parent groups. @@ -68,16 +69,6 @@ pub enum FormatElement { Tag(Tag), } -impl FormatElement { - pub fn tag_kind(&self) -> Option { - if let FormatElement::Tag(tag) = self { - Some(tag.kind()) - } else { - None - } - } -} - impl std::fmt::Debug for FormatElement { fn fmt(&self, fmt: &mut std::fmt::Formatter) -> std::fmt::Result { match self { @@ -97,7 +88,8 @@ impl std::fmt::Debug for FormatElement { .field("variants", variants) .field("mode", &mode) .finish(), - FormatElement::Interned(interned) => fmt.debug_list().entries(&**interned).finish(), + FormatElement::Reference(id) => fmt.debug_tuple("Reference").field(id).finish(), + FormatElement::Tag(tag) => fmt.debug_tuple("Tag").field(tag).finish(), FormatElement::SourcePosition(position) => { fmt.debug_tuple("SourcePosition").field(position).finish() @@ -151,46 +143,6 @@ impl From for PrintMode { } } -#[derive(Clone)] -pub struct Interned(Rc<[FormatElement]>); - -impl Interned { - pub(super) fn new(content: Vec) -> Self { - Self(content.into()) - } -} - -impl PartialEq for Interned { - fn eq(&self, other: &Interned) -> bool { - Rc::ptr_eq(&self.0, &other.0) - } -} - -impl Eq for Interned {} - -impl Hash for Interned { - fn hash(&self, hasher: &mut H) - where - H: Hasher, - { - Rc::as_ptr(&self.0).hash(hasher); - } -} - -impl std::fmt::Debug for Interned { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - self.0.fmt(f) - } -} - -impl Deref for Interned { - type Target = [FormatElement]; - - fn deref(&self) -> &Self::Target { - &self.0 - } -} - const LINE_SEPARATOR: char = '\u{2028}'; const PARAGRAPH_SEPARATOR: char = '\u{2029}'; pub const LINE_TERMINATORS: [char; 3] = ['\r', LINE_SEPARATOR, PARAGRAPH_SEPARATOR]; @@ -257,18 +209,22 @@ impl FormatElement { pub const fn is_space(&self) -> bool { matches!(self, FormatElement::Space) } -} -impl FormatElements for FormatElement { - fn will_break(&self) -> bool { + pub const fn tag_kind(&self) -> Option { + if let FormatElement::Tag(tag) = self { + Some(tag.kind()) + } else { + None + } + } + + pub fn will_break(&self) -> bool { match self { FormatElement::ExpandParent => true, FormatElement::Tag(Tag::StartGroup(group)) => !group.mode().is_flat(), FormatElement::Line(line_mode) => matches!(line_mode, LineMode::Hard | LineMode::Empty), FormatElement::Text { text_width, .. } => text_width.is_multiline(), FormatElement::SourceCodeSlice { text_width, .. } => text_width.is_multiline(), - FormatElement::Interned(interned) => interned.will_break(), - // Traverse into the most flat version because the content is guaranteed to expand when even // the most flat version contains some content that forces a break. FormatElement::BestFitting { variants: best_fitting, @@ -278,23 +234,19 @@ impl FormatElements for FormatElement { | FormatElement::Space | FormatElement::Tag(_) | FormatElement::Token { .. } + | FormatElement::Reference(..) | FormatElement::SourcePosition(_) => false, } } - fn has_label(&self, label_id: LabelId) -> bool { + pub fn has_label(&self, label_id: LabelId) -> bool { match self { FormatElement::Tag(Tag::StartLabelled(actual)) => *actual == label_id, - FormatElement::Interned(interned) => interned.deref().has_label(label_id), _ => false, } } - fn start_tag(&self, _: TagKind) -> Option<&Tag> { - None - } - - fn end_tag(&self, kind: TagKind) -> Option<&Tag> { + pub fn end_tag(&self, kind: TagKind) -> Option<&Tag> { match self { FormatElement::Tag(tag) if tag.kind() == kind && tag.is_end() => Some(tag), _ => None, @@ -522,7 +474,6 @@ impl TextWidth { #[cfg(test)] mod tests { - use crate::format_element::{normalize_newlines, LINE_TERMINATORS}; #[test] @@ -547,13 +498,13 @@ mod sizes { // be recomputed at a later point in time? // You reduced the size of a format element? Excellent work! - use super::{BestFittingVariants, Interned, TextWidth}; use static_assertions::assert_eq_size; + use super::{BestFittingVariants, TextWidth}; + assert_eq_size!(ruff_text_size::TextRange, [u8; 8]); assert_eq_size!(TextWidth, [u8; 4]); assert_eq_size!(super::tag::VerbatimKind, [u8; 8]); - assert_eq_size!(Interned, [u8; 16]); assert_eq_size!(BestFittingVariants, [u8; 16]); #[cfg(not(debug_assertions))] diff --git a/crates/ruff_formatter/src/format_element/document.rs b/crates/ruff_formatter/src/format_element/document.rs index 87d8b9a1caa7c..166ac7de379c3 100644 --- a/crates/ruff_formatter/src/format_element/document.rs +++ b/crates/ruff_formatter/src/format_element/document.rs @@ -1,9 +1,7 @@ -use std::collections::HashMap; use std::ops::Deref; -use rustc_hash::FxHashMap; - use crate::format_element::tag::{Condition, DedentMode}; +use crate::interned_id::{InternedId, InternedIndex}; use crate::prelude::tag::GroupMode; use crate::prelude::*; use crate::source_code::SourceCode; @@ -39,6 +37,10 @@ impl Document { tag: &'a tag::FitsExpanded, expands_before: bool, }, + Interned { + id: InternedId, + prev_expanded: bool, + }, BestFitting, } @@ -54,7 +56,7 @@ impl Document { fn propagate_expands<'a>( elements: &'a [FormatElement], enclosing: &mut Vec>, - checked_interned: &mut FxHashMap<&'a Interned, bool>, + interned: &mut InternedIndex, ) -> bool { let mut expands = false; for element in elements { @@ -75,20 +77,11 @@ impl Document { Some(Enclosing::ConditionalGroup(group)) => !group.mode().is_flat(), _ => false, }, - FormatElement::Interned(interned) => { - if let Some(interned_expands) = checked_interned.get(interned) { - *interned_expands - } else { - let interned_expands = - propagate_expands(interned, enclosing, checked_interned); - checked_interned.insert(interned, interned_expands); - interned_expands - } - } + FormatElement::Reference(id) => interned.get(*id).copied().unwrap_or_default(), FormatElement::BestFitting { variants, mode: _ } => { enclosing.push(Enclosing::BestFitting); - propagate_expands(variants, enclosing, checked_interned); + propagate_expands(variants, enclosing, interned); enclosing.pop(); continue; } @@ -108,6 +101,23 @@ impl Document { continue; } + FormatElement::Tag(Tag::StartInterned { id }) => { + enclosing.push(Enclosing::Interned { + id: *id, + prev_expanded: expands, + }); + expands = false; + continue; + } + + FormatElement::Tag(Tag::EndInterned) => { + if let Some(Enclosing::Interned { id, prev_expanded }) = enclosing.pop() { + interned.insert(id, expands); + expands = prev_expanded; + } + + continue; + } FormatElement::Text { text: _, text_width, @@ -115,6 +125,7 @@ impl Document { FormatElement::SourceCodeSlice { text_width, .. } => text_width.is_multiline(), FormatElement::ExpandParent | FormatElement::Line(LineMode::Hard | LineMode::Empty) => true, + _ => false, }; @@ -132,7 +143,7 @@ impl Document { } else { self.len().ilog2() as usize }); - let mut interned = FxHashMap::default(); + let mut interned = InternedIndex::new(); propagate_expands(self, &mut enclosing, &mut interned); } @@ -185,18 +196,12 @@ impl std::fmt::Debug for DisplayDocument<'_> { #[derive(Clone, Debug)] struct IrFormatContext<'a> { - /// The interned elements that have been printed to this point - printed_interned_elements: HashMap, - source_code: SourceCode<'a>, } impl<'a> IrFormatContext<'a> { fn new(source_code: SourceCode<'a>) -> Self { - Self { - source_code, - printed_interned_elements: HashMap::new(), - } + Self { source_code } } } @@ -366,30 +371,8 @@ impl Format> for &[FormatElement] { write!(f, [token("])")])?; } - FormatElement::Interned(interned) => { - let interned_elements = &mut f.context_mut().printed_interned_elements; - - match interned_elements.get(interned).copied() { - None => { - let index = interned_elements.len(); - interned_elements.insert(interned.clone(), index); - - write!( - f, - [ - text(&std::format!(""), None), - space(), - &&**interned, - ] - )?; - } - Some(reference) => { - write!( - f, - [text(&std::format!(""), None)] - )?; - } - } + FormatElement::Reference(id) => { + write!(f, [text(&std::format!(""), None)])?; } FormatElement::Tag(tag) => { @@ -597,6 +580,9 @@ impl Format> for &[FormatElement] { )?; } } + StartInterned { id } => { + write!(f, [text(&std::format!(""), None), space()])?; + } StartEntry | StartBestFittingEntry { .. } => { // handled after the match for all start tags @@ -614,6 +600,7 @@ impl Format> for &[FormatElement] { | EndLineSuffix | EndDedent | EndFitsExpanded + | EndInterned { .. } | EndVerbatim => { write!(f, [ContentArrayEnd, token(")")])?; } @@ -691,11 +678,6 @@ impl FormatElements for [FormatElement] { FormatElement::Tag(Tag::EndLineSuffix | Tag::EndFitsExpanded) => { ignore_depth = ignore_depth.saturating_sub(1); } - FormatElement::Interned(interned) if ignore_depth == 0 => { - if interned.will_break() { - return true; - } - } element if ignore_depth == 0 && element.will_break() => { return true; @@ -735,20 +717,7 @@ impl FormatElements for [FormatElement] { *depth += 1; } } - FormatElement::Interned(interned) => { - match traverse_slice(interned, kind, depth) { - Some(start) => { - return Some(start); - } - // Reached end or invalid document - None if *depth == 0 => { - return None; - } - _ => { - // continue with other elements - } - } - } + _ => {} } } diff --git a/crates/ruff_formatter/src/format_element/tag.rs b/crates/ruff_formatter/src/format_element/tag.rs index fd29152961994..57714004547a4 100644 --- a/crates/ruff_formatter/src/format_element/tag.rs +++ b/crates/ruff_formatter/src/format_element/tag.rs @@ -1,8 +1,10 @@ -use crate::format_element::PrintMode; -use crate::{GroupId, TextSize}; use std::cell::Cell; use std::num::NonZeroU8; +use crate::format_element::PrintMode; +use crate::interned_id::InternedId; +use crate::{GroupId, TextSize}; + /// A Tag marking the start and end of some content to which some special formatting should be applied. /// /// Tags always come in pairs of a start and an end tag and the styling defined by this tag @@ -86,6 +88,11 @@ pub enum Tag { StartBestFittingEntry, EndBestFittingEntry, + + StartInterned { + id: InternedId, + }, + EndInterned, } impl Tag { @@ -106,7 +113,8 @@ impl Tag { | Tag::StartVerbatim(_) | Tag::StartLabelled(_) | Tag::StartFitsExpanded(_) - | Tag::StartBestFittingEntry, + | Tag::StartBestFittingEntry + | Tag::StartInterned { .. } ) } @@ -133,6 +141,7 @@ impl Tag { StartVerbatim(_) | EndVerbatim => TagKind::Verbatim, StartLabelled(_) | EndLabelled => TagKind::Labelled, StartFitsExpanded { .. } | EndFitsExpanded => TagKind::FitsExpanded, + StartInterned { .. } | EndInterned { .. } => TagKind::Interned, StartBestFittingEntry { .. } | EndBestFittingEntry => TagKind::BestFittingEntry, } } @@ -158,6 +167,7 @@ pub enum TagKind { Labelled, FitsExpanded, BestFittingEntry, + Interned, } #[derive(Debug, Copy, Default, Clone, Eq, PartialEq)] diff --git a/crates/ruff_formatter/src/format_extensions.rs b/crates/ruff_formatter/src/format_extensions.rs deleted file mode 100644 index 6c2aa85e199b9..0000000000000 --- a/crates/ruff_formatter/src/format_extensions.rs +++ /dev/null @@ -1,177 +0,0 @@ -#![allow(dead_code)] - -use crate::prelude::*; -use std::cell::RefCell; -use std::marker::PhantomData; - -use crate::Buffer; - -/// Utility trait that allows memorizing the output of a [`Format`]. -/// Useful to avoid re-formatting the same object twice. -pub trait MemoizeFormat { - /// Returns a formattable object that memoizes the result of `Format` by cloning. - /// Mainly useful if the same sub-tree can appear twice in the formatted output because it's - /// used inside of `if_group_breaks` or `if_group_fits_single_line`. - /// - /// ``` - /// use std::cell::Cell; - /// use ruff_formatter::{format, write}; - /// use ruff_formatter::prelude::*; - /// use ruff_text_size::{Ranged, TextSize}; - /// - /// struct MyFormat { - /// value: Cell - /// } - /// - /// impl MyFormat { - /// pub fn new() -> Self { - /// Self { value: Cell::new(1) } - /// } - /// } - /// - /// impl Format for MyFormat { - /// fn fmt(&self, f: &mut Formatter) -> FormatResult<()> { - /// let value = self.value.get(); - /// self.value.set(value + 1); - /// - /// write!(f, [text(&std::format!("Formatted {value} times."), None)]) - /// } - /// } - /// - /// # fn main() -> FormatResult<()> { - /// let normal = MyFormat::new(); - /// - /// // Calls `format` every time the object gets formatted - /// assert_eq!( - /// "Formatted 1 times. Formatted 2 times.", - /// format!(SimpleFormatContext::default(), [normal, space(), normal])?.print()?.as_code() - /// ); - /// - /// // Memoized memoizes the result and calls `format` only once. - /// let memoized = normal.memoized(); - /// assert_eq!( - /// "Formatted 3 times. Formatted 3 times.", - /// format![SimpleFormatContext::default(), [memoized, space(), memoized]]?.print()?.as_code() - /// ); - /// # Ok(()) - /// # } - /// ``` - fn memoized(self) -> Memoized - where - Self: Sized + Format, - { - Memoized::new(self) - } -} - -impl MemoizeFormat for T where T: Format {} - -/// Memoizes the output of its inner [`Format`] to avoid re-formatting a potential expensive object. -#[derive(Debug)] -pub struct Memoized { - inner: F, - memory: RefCell>>>, - options: PhantomData, -} - -impl Memoized -where - F: Format, -{ - fn new(inner: F) -> Self { - Self { - inner, - memory: RefCell::new(None), - options: PhantomData, - } - } - - /// Gives access to the memoized content. - /// - /// Performs the formatting if the content hasn't been formatted at this point. - /// - /// # Example - /// - /// Inspect if some memoized content breaks. - /// - /// ```rust - /// use std::cell::Cell; - /// use ruff_formatter::{format, write}; - /// use ruff_formatter::prelude::*; - /// use ruff_text_size::{Ranged, TextSize}; - /// - /// #[derive(Default)] - /// struct Counter { - /// value: Cell - /// } - /// - /// impl Format for Counter { - /// fn fmt(&self, f: &mut Formatter) -> FormatResult<()> { - /// let current = self.value.get(); - /// - /// write!(f, [ - /// token("Count:"), - /// space(), - /// text(&std::format!("{current}"), None), - /// hard_line_break() - /// ])?; - /// - /// self.value.set(current + 1); - /// Ok(()) - /// } - /// } - /// - /// # fn main() -> FormatResult<()> { - /// let content = format_with(|f| { - /// let mut counter = Counter::default().memoized(); - /// let counter_content = counter.inspect(f)?; - /// - /// if counter_content.will_break() { - /// write!(f, [token("Counter:"), block_indent(&counter)]) - /// } else { - /// write!(f, [token("Counter:"), counter]) - /// }?; - /// - /// write!(f, [counter]) - /// }); - /// - /// - /// let formatted = format!(SimpleFormatContext::default(), [content])?; - /// assert_eq!("Counter:\n\tCount: 0\nCount: 0\n", formatted.print()?.as_code()); - /// # Ok(()) - /// # } - /// ``` - pub fn inspect(&mut self, f: &mut Formatter) -> FormatResult<&[FormatElement]> { - let result = self - .memory - .get_mut() - .get_or_insert_with(|| f.intern(&self.inner)); - - match result.as_ref() { - Ok(Some(FormatElement::Interned(interned))) => Ok(&**interned), - Ok(Some(other)) => Ok(std::slice::from_ref(other)), - Ok(None) => Ok(&[]), - Err(error) => Err(*error), - } - } -} - -impl Format for Memoized -where - F: Format, -{ - fn fmt(&self, f: &mut Formatter) -> FormatResult<()> { - let mut memory = self.memory.borrow_mut(); - let result = memory.get_or_insert_with(|| f.intern(&self.inner)); - - match result { - Ok(Some(elements)) => { - f.write_element(elements.clone()); - - Ok(()) - } - Ok(None) => Ok(()), - Err(err) => Err(*err), - } - } -} diff --git a/crates/ruff_formatter/src/formatter.rs b/crates/ruff_formatter/src/formatter.rs index 8274485bc553b..38ddc2a41de27 100644 --- a/crates/ruff_formatter/src/formatter.rs +++ b/crates/ruff_formatter/src/formatter.rs @@ -1,7 +1,9 @@ use crate::buffer::BufferSnapshot; use crate::builders::{FillBuilder, JoinBuilder}; +use crate::interned_id::InternedId; use crate::prelude::*; -use crate::{Arguments, Buffer, FormatContext, FormatState, GroupId, VecBuffer}; +use crate::{Arguments, Buffer, FormatContext, FormatState, GroupId}; +use std::ops::Deref; /// Handles the formatting of a CST and stores the context how the CST should be formatted (user preferences). /// The formatter is passed to the [Format] implementation of every node in the CST so that they @@ -164,22 +166,34 @@ impl<'buf, Context> Formatter<'buf, Context> { } /// Formats `content` into an interned element without writing it to the formatter's buffer. - pub fn intern(&mut self, content: &dyn Format) -> FormatResult> { - let mut buffer = VecBuffer::new(self.state_mut()); - crate::write!(&mut buffer, [content])?; - let elements = buffer.into_vec(); - - Ok(self.intern_vec(elements)) + pub fn intern( + &mut self, + content: &dyn Format, + debug_name: &'static str, + ) -> FormatResult { + Ok(self.intern_inspect(content, debug_name)?.into_interned()) } - pub fn intern_vec(&mut self, mut elements: Vec) -> Option { - match elements.len() { - 0 => None, - // Doesn't get cheaper than calling clone, use the element directly - // SAFETY: Safe because of the `len == 1` check in the match arm. - 1 => Some(elements.pop().unwrap()), - _ => Some(FormatElement::Interned(Interned::new(elements))), - } + /// Formats `content` into an interned element without writing it to the formatter's buffer. + pub fn intern_inspect( + &mut self, + content: &dyn Format, + debug_name: &'static str, + ) -> FormatResult { + let id = self.state_mut().interned_id(debug_name); + + let recorded = { + let mut f = self.start_recording(); + f.write_element(FormatElement::Tag(tag::Tag::StartInterned { id })); + crate::write!(f, [content])?; + f.write_element(FormatElement::Tag(tag::Tag::EndInterned)); + f.stop() + }; + + Ok(InternedInspect { + id, + elements: recorded.as_slice(), + }) } } @@ -248,3 +262,33 @@ impl Buffer for Formatter<'_, Context> { pub struct FormatterSnapshot { buffer: BufferSnapshot, } + +pub struct Interned { + id: InternedId, +} + +impl Format for Interned { + fn fmt(&self, f: &mut Formatter) -> FormatResult<()> { + f.write_element(FormatElement::Reference(self.id)); + Ok(()) + } +} + +pub struct InternedInspect<'buf> { + id: InternedId, + elements: &'buf [FormatElement], +} + +impl InternedInspect<'_> { + pub fn into_interned(self) -> Interned { + Interned { id: self.id } + } +} + +impl Deref for InternedInspect<'_> { + type Target = [FormatElement]; + + fn deref(&self) -> &Self::Target { + self.elements + } +} diff --git a/crates/ruff_formatter/src/group_id.rs b/crates/ruff_formatter/src/group_id.rs index 94b36fa071c37..38ba0b79a39f4 100644 --- a/crates/ruff_formatter/src/group_id.rs +++ b/crates/ruff_formatter/src/group_id.rs @@ -67,7 +67,7 @@ pub(super) struct UniqueGroupIdBuilder { impl UniqueGroupIdBuilder { /// Creates a new unique group id with the given debug name. - pub(crate) fn group_id(&self, debug_name: &'static str) -> GroupId { + pub(crate) fn new_id(&self, debug_name: &'static str) -> GroupId { let id = self.next_id.fetch_add(1, Ordering::Relaxed); let id = NonZeroU32::new(id).unwrap_or_else(|| panic!("Group ID counter overflowed")); diff --git a/crates/ruff_formatter/src/interned_id.rs b/crates/ruff_formatter/src/interned_id.rs new file mode 100644 index 0000000000000..836f231dc0429 --- /dev/null +++ b/crates/ruff_formatter/src/interned_id.rs @@ -0,0 +1,119 @@ +use std::sync::atomic::{AtomicU32, Ordering}; + +#[allow(unreachable_pub)] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] +#[derive(Clone, Copy, Eq, PartialEq, Hash)] +pub struct DebugInternedId { + value: u32, + #[cfg_attr(feature = "serde", serde(skip))] + name: &'static str, +} + +impl DebugInternedId { + #[allow(unused)] + fn new(value: u32, debug_name: &'static str) -> Self { + Self { + value, + name: debug_name, + } + } +} + +impl std::fmt::Debug for DebugInternedId { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "#{}-{}", self.name, self.value) + } +} + +/// ID uniquely identifying a range in a document. +/// +/// See [`crate::Formatter::intern`] on how to intern content. +#[repr(transparent)] +#[derive(Clone, Copy, Eq, PartialEq, Hash)] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] +#[allow(unreachable_pub)] +pub struct ReleaseInternedId { + value: u32, +} + +impl ReleaseInternedId { + #[allow(unused)] + fn new(value: u32, _: &'static str) -> Self { + Self { value } + } +} + +impl std::fmt::Debug for ReleaseInternedId { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "#{}", self.value) + } +} + +#[cfg(not(debug_assertions))] +#[allow(unreachable_pub)] +pub type InternedId = ReleaseInternedId; +#[cfg(debug_assertions)] +#[allow(unreachable_pub)] +pub type InternedId = DebugInternedId; + +impl From for u32 { + fn from(id: InternedId) -> Self { + id.value + } +} + +/// Builder to construct [`InternedId`]s that are unique if created with the same builder. +pub(super) struct UniqueInternedIdBuilder { + next_id: AtomicU32, +} + +impl UniqueInternedIdBuilder { + /// Creates a new unique [`InternedId`] with the given debug name. + pub(crate) fn new_id(&self, debug_name: &'static str) -> InternedId { + let id = self.next_id.fetch_add(1, Ordering::Relaxed); + + if id == u32::MAX { + panic!("Interned ID counter overflowed"); + } + + InternedId::new(id, debug_name) + } +} + +impl Default for UniqueInternedIdBuilder { + fn default() -> Self { + UniqueInternedIdBuilder { + next_id: AtomicU32::new(0), + } + } +} + +/// Map indexed by [`InternedId`]. Uses a [`Vec`] internally, making use of the fact that +/// [`InternedId`]s are monotonic. +#[derive(Clone, Debug, Eq, PartialEq, Default)] +pub struct InternedIndex(Vec>); + +impl InternedIndex { + pub fn new() -> Self { + Self(Vec::new()) + } + + pub fn get(&self, id: InternedId) -> Option<&T> { + let index = u32::from(id) as usize; + + match self.0.get(index) { + Some(Some(value)) => Some(value), + Some(None) | None => None, + } + } + + pub fn insert(&mut self, id: InternedId, value: T) { + let index = u32::from(id) as usize; + + if self.0.len() <= index { + self.0.resize_with(index + 1, || None); + } + + self.0[index] = Some(value); + } +} diff --git a/crates/ruff_formatter/src/lib.rs b/crates/ruff_formatter/src/lib.rs index 11fee92fabfc4..4e41ca881976a 100644 --- a/crates/ruff_formatter/src/lib.rs +++ b/crates/ruff_formatter/src/lib.rs @@ -19,14 +19,16 @@ //! * [`format_args!`]: Concatenates a sequence of Format objects. //! * [`write!`]: Writes a sequence of formatable objects into an output buffer. +extern crate core; + mod arguments; mod buffer; mod builders; pub mod diagnostics; pub mod format_element; -mod format_extensions; pub mod formatter; pub mod group_id; +pub mod interned_id; pub mod macros; pub mod prelude; pub mod printer; @@ -49,6 +51,7 @@ pub use builders::BestFitting; pub use source_code::{SourceCode, SourceCodeSlice}; pub use crate::diagnostics::{ActualStart, FormatError, InvalidDocumentError, PrintError}; +use crate::interned_id::{InternedId, UniqueInternedIdBuilder}; pub use format_element::{normalize_newlines, FormatElement, LINE_TERMINATORS}; pub use group_id::GroupId; use ruff_text_size::{TextRange, TextSize}; @@ -779,6 +782,7 @@ pub struct FormatState { context: Context, group_id_builder: UniqueGroupIdBuilder, + interned_id_builder: UniqueInternedIdBuilder, } #[allow(clippy::missing_fields_in_debug)] @@ -799,6 +803,7 @@ impl FormatState { Self { context, group_id_builder: UniqueGroupIdBuilder::default(), + interned_id_builder: UniqueInternedIdBuilder::default(), } } @@ -816,10 +821,17 @@ impl FormatState { &mut self.context } - /// Creates a new group id that is unique to this document. The passed debug name is used in the + /// Creates a [`GroupId`] that is unique to this document. The passed debug name is used in the /// [`std::fmt::Debug`] of the document if this is a debug build. - /// The name is unused for production builds and has no meaning on the equality of two group ids. + /// The name is unused for production builds and has no meaning on the equality of two [`GroupId`]s. pub fn group_id(&self, debug_name: &'static str) -> GroupId { - self.group_id_builder.group_id(debug_name) + self.group_id_builder.new_id(debug_name) + } + + /// Creates a new [`InternedId`] that is unique to this document. The passed debug name is used in the + /// [`std::fmt::Debug`] of the document if this is a debug build. + /// The name is unused for production builds and has no meaning on the equality of [`InternedId`]s. + pub fn interned_id(&self, debug_name: &'static str) -> InternedId { + self.interned_id_builder.new_id(debug_name) } } diff --git a/crates/ruff_formatter/src/prelude.rs b/crates/ruff_formatter/src/prelude.rs index fb8b58d6e3930..f315254b82e63 100644 --- a/crates/ruff_formatter/src/prelude.rs +++ b/crates/ruff_formatter/src/prelude.rs @@ -2,7 +2,6 @@ pub use crate::builders::*; pub use crate::format_element::document::Document; pub use crate::format_element::tag::{LabelId, Tag, TagKind}; pub use crate::format_element::*; -pub use crate::format_extensions::{MemoizeFormat, Memoized}; pub use crate::formatter::Formatter; pub use crate::printer::PrinterOptions; diff --git a/crates/ruff_formatter/src/printer/mod.rs b/crates/ruff_formatter/src/printer/mod.rs index 951ce5633c127..0ab3d750514f5 100644 --- a/crates/ruff_formatter/src/printer/mod.rs +++ b/crates/ruff_formatter/src/printer/mod.rs @@ -9,6 +9,7 @@ use ruff_text_size::{Ranged, TextLen, TextSize}; use crate::format_element::document::Document; use crate::format_element::tag::{Condition, GroupMode}; use crate::format_element::{BestFittingMode, BestFittingVariants, LineMode, PrintMode}; +use crate::interned_id::{InternedId, InternedIndex}; use crate::prelude::tag::{DedentMode, Tag, TagKind, VerbatimKind}; use crate::prelude::{tag, TextWidth}; use crate::printer::call_stack::{ @@ -155,9 +156,7 @@ impl<'a> Printer<'a> { self.print_best_fitting(variants, *mode, queue, stack)?; } - FormatElement::Interned(content) => { - queue.extend_back(content); - } + FormatElement::Reference(id) => queue.extend_back(self.state.interned.get(*id)?), FormatElement::Tag(StartGroup(group)) => { let print_mode = match group.mode() { @@ -288,6 +287,17 @@ impl<'a> Printer<'a> { stack.push(TagKind::FitsExpanded, args); } + FormatElement::Tag(Tag::StartInterned { id }) => { + let slice = queue.top_slice().unwrap(); + let length = queue.iter_content(TagKind::Interned).count(); + + self.state.interned.insert(*id, &slice[..length]); + } + + FormatElement::Tag(EndInterned) => { + // Handled in `StartInterned` + } + FormatElement::Tag( tag @ (StartLabelled(_) | StartEntry | StartBestFittingEntry { .. }), ) => { @@ -825,6 +835,7 @@ struct PrinterState<'a> { line_suffixes: LineSuffixes<'a>, verbatim_markers: Vec, group_modes: GroupModes, + interned: Interned<'a>, // Re-used queue to measure if a group fits. Optimisation to avoid re-allocating a new // vec every time a group gets measured fits_stack: Vec, @@ -856,18 +867,32 @@ impl GroupModes { self.0[index] = Some(mode); } - fn get_print_mode(&self, group_id: GroupId) -> PrintResult { - let index = u32::from(group_id) as usize; + fn get_print_mode(&self, id: GroupId) -> PrintResult { + let index = u32::from(id) as usize; match self.0.get(index) { Some(Some(print_mode)) => Ok(*print_mode), None | Some(None) => Err(PrintError::InvalidDocument( - InvalidDocumentError::UnknownGroupId { group_id }, + InvalidDocumentError::UnknownGroupId { id }, )), } } } +#[derive(Debug, Default)] +struct Interned<'a>(InternedIndex<&'a [FormatElement]>); + +impl<'a> Interned<'a> { + fn insert(&mut self, id: InternedId, slice: &'a [FormatElement]) { + self.0.insert(id, slice); + } + + fn get(&self, id: InternedId) -> PrintResult<&'a [FormatElement]> { + self.0.get(id).copied().ok_or(PrintError::InvalidDocument( + InvalidDocumentError::UnknownInternedId { id }, + )) + } +} #[derive(Copy, Clone, Eq, PartialEq, Debug)] enum Indention { /// Indent the content by `count` levels by using the indention sequence specified by the printer options. @@ -1178,7 +1203,9 @@ impl<'a, 'print> FitsMeasurer<'a, 'print> { self.queue.extend_back(&slice[1..]); } - FormatElement::Interned(content) => self.queue.extend_back(content), + FormatElement::Reference(id) => self + .queue + .extend_back(self.printer.state.interned.get(*id)?), FormatElement::Tag(StartIndent) => { self.stack.push( @@ -1298,6 +1325,17 @@ impl<'a, 'print> FitsMeasurer<'a, 'print> { } } + FormatElement::Tag(StartInterned { id }) => { + let slice = self.queue.top_slice().unwrap(); + let length = self.queue.iter_content(TagKind::Interned).count(); + + self.printer.state.interned.insert(*id, &slice[..length]); + } + + FormatElement::Tag(EndInterned) => { + // Handled in `StartInternd` + } + FormatElement::Tag( tag @ (StartFill | StartVerbatim(_) diff --git a/crates/ruff_formatter/src/printer/queue.rs b/crates/ruff_formatter/src/printer/queue.rs index 07b83a03fa74f..bc12af1af9e14 100644 --- a/crates/ruff_formatter/src/printer/queue.rs +++ b/crates/ruff_formatter/src/printer/queue.rs @@ -11,25 +11,16 @@ pub(super) trait Queue<'a> { /// Pops the element at the end of the queue. fn pop(&mut self) -> Option<&'a FormatElement>; - /// Returns the next element, not traversing into [`FormatElement::Interned`]. - fn top_with_interned(&self) -> Option<&'a FormatElement>; - - /// Returns the next element, recursively resolving the first element of [`FormatElement::Interned`]. - fn top(&self) -> Option<&'a FormatElement> { - let mut top = self.top_with_interned(); - - while let Some(FormatElement::Interned(interned)) = top { - top = interned.first(); - } - - top - } + /// Returns the next element, not traversing into [`FormatElement::Reference`]. + fn top(&self) -> Option<&'a FormatElement>; /// Queues a single element to process before the other elements in this queue. fn push(&mut self, element: &'a FormatElement) { self.extend_back(std::slice::from_ref(element)); } + fn top_slice(&self) -> Option<&'a [FormatElement]>; + /// Queues a slice of elements to process before the other elements in this queue. fn extend_back(&mut self, elements: &'a [FormatElement]); @@ -85,7 +76,7 @@ impl<'a> Queue<'a> for PrintQueue<'a> { }) } - fn top_with_interned(&self) -> Option<&'a FormatElement> { + fn top(&self) -> Option<&'a FormatElement> { let mut slices = self.element_slices.iter().rev(); let slice = slices.next()?; @@ -101,6 +92,11 @@ impl<'a> Queue<'a> for PrintQueue<'a> { } } + fn top_slice(&self) -> Option<&'a [FormatElement]> { + let mut slice_iters = self.element_slices.iter().rev(); + Some(slice_iters.next()?.as_slice()) + } + fn extend_back(&mut self, elements: &'a [FormatElement]) { if !elements.is_empty() { self.element_slices.push(elements.iter()); @@ -156,8 +152,8 @@ impl<'a, 'print> Queue<'a> for FitsQueue<'a, 'print> { }) } - fn top_with_interned(&self) -> Option<&'a FormatElement> { - self.queue.top_with_interned().or_else(|| { + fn top(&self) -> Option<&'a FormatElement> { + self.queue.top().or_else(|| { if let Some(next_elements) = self.rest_elements.as_slice().last() { next_elements.as_slice().first() } else { @@ -166,6 +162,15 @@ impl<'a, 'print> Queue<'a> for FitsQueue<'a, 'print> { }) } + fn top_slice(&self) -> Option<&'a [FormatElement]> { + self.queue.top_slice().or_else(|| { + self.rest_elements + .as_slice() + .last() + .map(std::slice::Iter::as_slice) + }) + } + fn extend_back(&mut self, elements: &'a [FormatElement]) { if !elements.is_empty() { self.queue.extend_back(elements); @@ -184,7 +189,7 @@ impl<'a, 'print> Queue<'a> for FitsQueue<'a, 'print> { /// Iterator that calls [`Queue::pop`] until it reaches the end of the document. /// -/// The iterator traverses into the content of any [`FormatElement::Interned`]. +/// The iterator traverses into the content of any [`FormatElement::Reference`]. pub(super) struct QueueIterator<'a, 'q, Q: Queue<'a>> { queue: &'q mut Q, lifetime: PhantomData<&'a ()>, @@ -234,12 +239,7 @@ where if self.depth == 0 { None } else { - let mut top = self.queue.pop(); - - while let Some(FormatElement::Interned(interned)) = top { - self.queue.extend_back(interned); - top = self.queue.pop(); - } + let top = self.queue.pop(); match top.expect("Missing end signal.") { element @ FormatElement::Tag(tag) if tag.kind() == self.kind => { @@ -325,7 +325,7 @@ impl FitsEndPredicate for SingleEntryPredicate { is_end } - FormatElement::Interned(_) => false, + FormatElement::Reference(_) => false, element if *depth == 0 => { return invalid_start_tag(TagKind::Entry, Some(element)); } diff --git a/crates/ruff_python_formatter/src/expression/mod.rs b/crates/ruff_python_formatter/src/expression/mod.rs index dd20097a7e3b1..ce8e06bd9bd75 100644 --- a/crates/ruff_python_formatter/src/expression/mod.rs +++ b/crates/ruff_python_formatter/src/expression/mod.rs @@ -247,13 +247,17 @@ impl Format> for MaybeParenthesizeExpression<'_> { Parenthesize::IfBreaks => { let group_id = f.group_id("optional_parentheses"); let f = &mut WithNodeLevel::new(NodeLevel::Expression(Some(group_id)), f); - let mut format_expression = expression - .format() - .with_options(Parentheses::Never) - .memoized(); + + let interned = f.intern_inspect( + &expression.format().with_options(Parentheses::Never), + "expression", + )?; + + let breaks = interned.will_break(); + let format_expression = interned.into_interned(); // Don't use best fitting if it is known that the expression can never fit - if format_expression.inspect(f)?.will_break() { + if breaks { // The group here is necessary because `format_expression` may contain IR elements // that refer to the group id group(&format_args![