Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 0 additions & 1 deletion crates/ruff_formatter/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@ license = { workspace = true }
ruff_text_size = { path = "../ruff_text_size" }

drop_bomb = { version = "0.1.5" }
rustc-hash = { workspace = true }
schemars = { workspace = true, optional = true }
serde = { workspace = true, optional = true }
static_assertions = { workspace = true }
Expand Down
100 changes: 13 additions & 87 deletions crates/ruff_formatter/src/buffer.rs
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
use super::{write, Arguments, FormatElement};
use crate::format_element::Interned;
use crate::prelude::LineMode;
use crate::{FormatResult, FormatState};
use rustc_hash::FxHashMap;
use std::any::{Any, TypeId};
use std::fmt::Debug;
use std::ops::{Deref, DerefMut};

use crate::prelude::LineMode;
use crate::{FormatResult, FormatState};

use super::{write, Arguments, FormatElement};

/// A trait for writing or formatting into [`FormatElement`]-accepting buffers or streams.
pub trait Buffer {
/// The context used during formatting
Expand Down Expand Up @@ -341,89 +341,12 @@ where
/// ```
pub struct RemoveSoftLinesBuffer<'a, Context> {
inner: &'a mut dyn Buffer<Context = Context>,

/// Caches the interned elements after the soft line breaks have been removed.
///
/// The `key` is the [Interned] element as it has been passed to [Self::write_element] or the child of another
/// [Interned] element. The `value` is the matching document of the key where all soft line breaks have been removed.
///
/// It's fine to not snapshot the cache. The worst that can happen is that it holds on interned elements
/// that are now unused. But there's little harm in that and the cache is cleaned when dropping the buffer.
interned_cache: FxHashMap<Interned, Interned>,
}

impl<'a, Context> RemoveSoftLinesBuffer<'a, Context> {
/// Creates a new buffer that removes the soft line breaks before writing them into `buffer`.
pub fn new(inner: &'a mut dyn Buffer<Context = Context>) -> Self {
Self {
inner,
interned_cache: FxHashMap::default(),
}
}

/// Removes the soft line breaks from an interned element.
fn clean_interned(&mut self, interned: &Interned) -> Interned {
clean_interned(interned, &mut self.interned_cache)
}
}

// Extracted to function to avoid monomorphization
fn clean_interned(
interned: &Interned,
interned_cache: &mut FxHashMap<Interned, Interned>,
) -> Interned {
if let Some(cleaned) = interned_cache.get(interned) {
cleaned.clone()
} else {
// Find the first soft line break element or interned element that must be changed
let result = interned
.iter()
.enumerate()
.find_map(|(index, element)| match element {
FormatElement::Line(LineMode::Soft | LineMode::SoftOrSpace) => {
let mut cleaned = Vec::new();
cleaned.extend_from_slice(&interned[..index]);
Some((cleaned, &interned[index..]))
}
FormatElement::Interned(inner) => {
let cleaned_inner = clean_interned(inner, interned_cache);

if &cleaned_inner == inner {
None
} else {
let mut cleaned = Vec::with_capacity(interned.len());
cleaned.extend_from_slice(&interned[..index]);
cleaned.push(FormatElement::Interned(cleaned_inner));
Some((cleaned, &interned[index + 1..]))
}
}

_ => None,
});

let result = match result {
// Copy the whole interned buffer so that becomes possible to change the necessary elements.
Some((mut cleaned, rest)) => {
for element in rest {
let element = match element {
FormatElement::Line(LineMode::Soft) => continue,
FormatElement::Line(LineMode::SoftOrSpace) => FormatElement::Space,
FormatElement::Interned(interned) => {
FormatElement::Interned(clean_interned(interned, interned_cache))
}
element => element.clone(),
};
cleaned.push(element);
}

Interned::new(cleaned)
}
// No change necessary, return existing interned element
None => interned.clone(),
};

interned_cache.insert(interned.clone(), result.clone());
result
Self { inner }
}
}

Expand All @@ -434,9 +357,6 @@ impl<Context> Buffer for RemoveSoftLinesBuffer<'_, Context> {
let element = match element {
FormatElement::Line(LineMode::Soft) => return,
FormatElement::Line(LineMode::SoftOrSpace) => FormatElement::Space,
FormatElement::Interned(interned) => {
FormatElement::Interned(self.clean_interned(&interned))
}
element => element,
};

Expand Down Expand Up @@ -574,10 +494,16 @@ where
#[derive(Debug, Copy, Clone)]
pub struct Recorded<'a>(&'a [FormatElement]);

impl<'a> Recorded<'a> {
pub fn as_slice(self) -> &'a [FormatElement] {
self.0
}
}

impl Deref for Recorded<'_> {
type Target = [FormatElement];

fn deref(&self) -> &Self::Target {
self.0
self.as_slice()
}
}
14 changes: 11 additions & 3 deletions crates/ruff_formatter/src/diagnostics.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
use crate::interned_id::InternedId;
use crate::prelude::TagKind;
use crate::GroupId;
use ruff_text_size::TextRange;
Expand Down Expand Up @@ -101,7 +102,11 @@ pub enum InvalidDocumentError {
},

UnknownGroupId {
group_id: GroupId,
id: GroupId,
},

UnknownInternedId {
id: InternedId,
},
}

Expand Down Expand Up @@ -155,8 +160,11 @@ impl std::fmt::Display for InvalidDocumentError {
}
}
}
InvalidDocumentError::UnknownGroupId { group_id } => {
std::write!(f, "Encountered unknown group id {group_id:?}. Ensure that the group with the id {group_id:?} exists and that the group is a parent of or comes before the element referring to it.")
InvalidDocumentError::UnknownGroupId { id } => {
std::write!(f, "Encountered unknown group id {id:?}. Ensure that the group with the id {id:?} exists and that the group is a parent of or comes before the element referring to it.")
}
InvalidDocumentError::UnknownInternedId { id } => {
std::write!(f, "Encountered unknown interned id {id:?}. Ensure that the interned element with the id {id:?} exists and that appears before the element referring to it and isn't recursive.")
}
}
}
Expand Down
103 changes: 27 additions & 76 deletions crates/ruff_formatter/src/format_element.rs
Original file line number Diff line number Diff line change
@@ -1,18 +1,19 @@
pub mod document;
pub mod tag;

use std::borrow::Cow;
use std::hash::{Hash, Hasher};
use std::iter::FusedIterator;
use std::num::NonZeroU32;
use std::ops::Deref;
use std::rc::Rc;

use unicode_width::UnicodeWidthChar;

use ruff_text_size::TextSize;

use crate::format_element::tag::{GroupMode, LabelId, Tag};
use crate::interned_id::InternedId;
use crate::source_code::SourceCodeSlice;
use crate::{IndentWidth, TagKind};
use ruff_text_size::TextSize;

pub mod document;
pub mod tag;

/// Language agnostic IR for formatting source code.
///
Expand All @@ -34,7 +35,9 @@ pub enum FormatElement {
SourcePosition(TextSize),

/// A ASCII only Token that contains no line breaks or tab characters.
Token { text: &'static str },
Token {
text: &'static str,
},

/// An arbitrary text that can contain tabs, newlines, and unicode characters.
Text {
Expand All @@ -53,9 +56,7 @@ pub enum FormatElement {
/// line suffixes, potentially by inserting a hard line break.
LineSuffixBoundary,

/// An interned format element. Useful when the same content must be emitted multiple times to avoid
/// deep cloning the IR when using the `best_fitting!` macro or `if_group_fits_on_line` and `if_group_breaks`.
Interned(Interned),
Reference(InternedId),

/// A list of different variants representing the same content. The printer picks the best fitting content.
/// Line breaks inside of a best fitting don't propagate to parent groups.
Expand All @@ -68,16 +69,6 @@ pub enum FormatElement {
Tag(Tag),
}

impl FormatElement {
pub fn tag_kind(&self) -> Option<TagKind> {
if let FormatElement::Tag(tag) = self {
Some(tag.kind())
} else {
None
}
}
}

impl std::fmt::Debug for FormatElement {
fn fmt(&self, fmt: &mut std::fmt::Formatter) -> std::fmt::Result {
match self {
Expand All @@ -97,7 +88,8 @@ impl std::fmt::Debug for FormatElement {
.field("variants", variants)
.field("mode", &mode)
.finish(),
FormatElement::Interned(interned) => fmt.debug_list().entries(&**interned).finish(),
FormatElement::Reference(id) => fmt.debug_tuple("Reference").field(id).finish(),

FormatElement::Tag(tag) => fmt.debug_tuple("Tag").field(tag).finish(),
FormatElement::SourcePosition(position) => {
fmt.debug_tuple("SourcePosition").field(position).finish()
Expand Down Expand Up @@ -151,46 +143,6 @@ impl From<GroupMode> for PrintMode {
}
}

#[derive(Clone)]
pub struct Interned(Rc<[FormatElement]>);

impl Interned {
pub(super) fn new(content: Vec<FormatElement>) -> Self {
Self(content.into())
}
}

impl PartialEq for Interned {
fn eq(&self, other: &Interned) -> bool {
Rc::ptr_eq(&self.0, &other.0)
}
}

impl Eq for Interned {}

impl Hash for Interned {
fn hash<H>(&self, hasher: &mut H)
where
H: Hasher,
{
Rc::as_ptr(&self.0).hash(hasher);
}
}

impl std::fmt::Debug for Interned {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
self.0.fmt(f)
}
}

impl Deref for Interned {
type Target = [FormatElement];

fn deref(&self) -> &Self::Target {
&self.0
}
}

const LINE_SEPARATOR: char = '\u{2028}';
const PARAGRAPH_SEPARATOR: char = '\u{2029}';
pub const LINE_TERMINATORS: [char; 3] = ['\r', LINE_SEPARATOR, PARAGRAPH_SEPARATOR];
Expand Down Expand Up @@ -257,18 +209,22 @@ impl FormatElement {
pub const fn is_space(&self) -> bool {
matches!(self, FormatElement::Space)
}
}

impl FormatElements for FormatElement {
fn will_break(&self) -> bool {
pub const fn tag_kind(&self) -> Option<TagKind> {
if let FormatElement::Tag(tag) = self {
Some(tag.kind())
} else {
None
}
}

pub fn will_break(&self) -> bool {
match self {
FormatElement::ExpandParent => true,
FormatElement::Tag(Tag::StartGroup(group)) => !group.mode().is_flat(),
FormatElement::Line(line_mode) => matches!(line_mode, LineMode::Hard | LineMode::Empty),
FormatElement::Text { text_width, .. } => text_width.is_multiline(),
FormatElement::SourceCodeSlice { text_width, .. } => text_width.is_multiline(),
FormatElement::Interned(interned) => interned.will_break(),
// Traverse into the most flat version because the content is guaranteed to expand when even
// the most flat version contains some content that forces a break.
FormatElement::BestFitting {
variants: best_fitting,
Expand All @@ -278,23 +234,19 @@ impl FormatElements for FormatElement {
| FormatElement::Space
| FormatElement::Tag(_)
| FormatElement::Token { .. }
| FormatElement::Reference(..)
| FormatElement::SourcePosition(_) => false,
}
}

fn has_label(&self, label_id: LabelId) -> bool {
pub fn has_label(&self, label_id: LabelId) -> bool {
match self {
FormatElement::Tag(Tag::StartLabelled(actual)) => *actual == label_id,
FormatElement::Interned(interned) => interned.deref().has_label(label_id),
_ => false,
}
}

fn start_tag(&self, _: TagKind) -> Option<&Tag> {
None
}

fn end_tag(&self, kind: TagKind) -> Option<&Tag> {
pub fn end_tag(&self, kind: TagKind) -> Option<&Tag> {
match self {
FormatElement::Tag(tag) if tag.kind() == kind && tag.is_end() => Some(tag),
_ => None,
Expand Down Expand Up @@ -522,7 +474,6 @@ impl TextWidth {

#[cfg(test)]
mod tests {

use crate::format_element::{normalize_newlines, LINE_TERMINATORS};

#[test]
Expand All @@ -547,13 +498,13 @@ mod sizes {
// be recomputed at a later point in time?
// You reduced the size of a format element? Excellent work!

use super::{BestFittingVariants, Interned, TextWidth};
use static_assertions::assert_eq_size;

use super::{BestFittingVariants, TextWidth};

assert_eq_size!(ruff_text_size::TextRange, [u8; 8]);
assert_eq_size!(TextWidth, [u8; 4]);
assert_eq_size!(super::tag::VerbatimKind, [u8; 8]);
assert_eq_size!(Interned, [u8; 16]);
assert_eq_size!(BestFittingVariants, [u8; 16]);

#[cfg(not(debug_assertions))]
Expand Down
Loading