Skip to content

Commit

Permalink
Implement support for Jupyter Notebooks in ruff server
Browse files Browse the repository at this point in the history
  • Loading branch information
snowsignal committed May 14, 2024
1 parent a347a1b commit a8e00a6
Show file tree
Hide file tree
Showing 38 changed files with 1,351 additions and 614 deletions.
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion crates/ruff_notebook/src/cell.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ impl fmt::Display for SourceValue {

impl Cell {
/// Return the [`SourceValue`] of the cell.
pub(crate) fn source(&self) -> &SourceValue {
pub fn source(&self) -> &SourceValue {
match self {
Cell::Code(cell) => &cell.source,
Cell::Markdown(cell) => &cell.source,
Expand Down
25 changes: 24 additions & 1 deletion crates/ruff_notebook/src/notebook.rs
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,23 @@ impl Notebook {
Self::from_reader(Cursor::new(source_code))
}

/// Generate a pseudo-representation of a notebook that can be used
/// for linting by the language server. As this is not generated directly from the raw JSON
/// of a notebook file, writing this back into the file system is a bad idea.
pub fn from_cells(
cells: Vec<Cell>,
metadata: crate::RawNotebookMetadata,
) -> Result<Self, NotebookError> {
let raw_notebook = RawNotebook {
cells,
metadata,
nbformat: 4,
nbformat_minor: 5,
};

Self::from_raw(raw_notebook, false)
}

/// Read a Jupyter Notebook from a [`Read`] implementer.
///
/// See also the black implementation
Expand All @@ -98,7 +115,7 @@ impl Notebook {
reader.read_exact(&mut buf).is_ok_and(|()| buf[0] == b'\n')
});
reader.rewind()?;
let mut raw_notebook: RawNotebook = match serde_json::from_reader(reader.by_ref()) {
let raw_notebook: RawNotebook = match serde_json::from_reader(reader.by_ref()) {
Ok(notebook) => notebook,
Err(err) => {
// Translate the error into a diagnostic
Expand All @@ -113,7 +130,13 @@ impl Notebook {
});
}
};
Self::from_raw(raw_notebook, trailing_newline)
}

fn from_raw(
mut raw_notebook: RawNotebook,
trailing_newline: bool,
) -> Result<Self, NotebookError> {
// v4 is what everybody uses
if raw_notebook.nbformat != 4 {
// bail because we should have already failed at the json schema stage
Expand Down
1 change: 1 addition & 0 deletions crates/ruff_server/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ ruff_python_codegen = { workspace = true }
ruff_python_formatter = { workspace = true }
ruff_python_index = { workspace = true }
ruff_python_parser = { workspace = true }
ruff_notebook = { path = "../ruff_notebook" }
ruff_source_file = { workspace = true }
ruff_text_size = { workspace = true }
ruff_workspace = { workspace = true }
Expand Down
71 changes: 68 additions & 3 deletions crates/ruff_server/src/edit.rs
Original file line number Diff line number Diff line change
@@ -1,18 +1,20 @@
//! Types and utilities for working with text, modifying source files, and `Ruff <-> LSP` type conversion.

mod document;
mod notebook;
mod range;
mod replacement;

use std::collections::HashMap;
use std::{collections::HashMap, ffi::OsStr, path::PathBuf};

pub use document::Document;
pub(crate) use document::DocumentVersion;
pub use document::TextDocument;
use lsp_types::PositionEncodingKind;
pub(crate) use notebook::NotebookDocument;
pub(crate) use range::{RangeExt, ToRangeExt};
pub(crate) use replacement::Replacement;

use crate::session::ResolvedClientCapabilities;
use crate::{fix::Fixes, session::ResolvedClientCapabilities};

/// A convenient enumeration for supported text encodings. Can be converted to [`lsp_types::PositionEncodingKind`].
// Please maintain the order from least to greatest priority for the derived `Ord` impl.
Expand All @@ -29,6 +31,57 @@ pub enum PositionEncoding {
UTF8,
}

/// A unique document ID, derived from a URL passed as part of an LSP request.
/// This document ID can point to either be a standalone Python file, a full notebook, or a cell within a notebook.
#[derive(Clone, Debug)]
pub(crate) enum DocumentKey {
Notebook(PathBuf),
NotebookCell(lsp_types::Url),
Text(PathBuf),
}

impl DocumentKey {
/// Creates a document key from a URL provided in an LSP request.
pub(crate) fn from_url(url: &lsp_types::Url) -> Self {
if url.scheme() != "file" {
return Self::NotebookCell(url.clone());
}
let Some(path) = url.to_file_path().ok() else {
return Self::NotebookCell(url.clone());
};

// figure out whether this is a notebook or a text document
if path.extension() == Some(OsStr::new("ipynb")) {
Self::Notebook(path)
} else {
// Until we support additional document types, we need to confirm
// that any non-notebook file is a Python file
debug_assert_eq!(path.extension(), Some(OsStr::new("py")));
Self::Text(path)
}
}

/// Converts the key back into its original URL.
pub(crate) fn into_url(self) -> lsp_types::Url {
match self {
DocumentKey::NotebookCell(url) => url,
DocumentKey::Notebook(path) | DocumentKey::Text(path) => {
lsp_types::Url::from_file_path(path)
.expect("file path originally from URL should convert back to URL")
}
}
}
}

impl std::fmt::Display for DocumentKey {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::NotebookCell(url) => url.fmt(f),
Self::Notebook(path) | Self::Text(path) => path.display().fmt(f),
}
}
}

/// Tracks multi-document edits to eventually merge into a `WorkspaceEdit`.
/// Compatible with clients that don't support `workspace.workspaceEdit.documentChanges`.
#[derive(Debug)]
Expand Down Expand Up @@ -72,6 +125,18 @@ impl WorkspaceEditTracker {
}
}

/// Sets a series of [`Fixes`] for a text or notebook document.
pub(crate) fn set_fixes_for_document(
&mut self,
fixes: Fixes,
version: DocumentVersion,
) -> crate::Result<()> {
for (uri, edits) in fixes {
self.set_edits_for_document(uri, version, edits)?;
}
Ok(())
}

/// Sets the edits made to a specific document. This should only be called
/// once for each document `uri`, and will fail if this is called for the same `uri`
/// multiple times.
Expand Down
6 changes: 3 additions & 3 deletions crates/ruff_server/src/edit/document.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,10 @@ use super::RangeExt;

pub(crate) type DocumentVersion = i32;

/// The state for an individual document in the server. Stays up-to-date
/// The state of an individual document in the server. Stays up-to-date
/// with changes made by the user, including unsaved changes.
#[derive(Debug, Clone)]
pub struct Document {
pub struct TextDocument {
/// The string contents of the document.
contents: String,
/// A computed line index for the document. This should always reflect
Expand All @@ -22,7 +22,7 @@ pub struct Document {
version: DocumentVersion,
}

impl Document {
impl TextDocument {
pub fn new(contents: String, version: DocumentVersion) -> Self {
let index = LineIndex::from_source_text(&contents);
Self {
Expand Down
189 changes: 189 additions & 0 deletions crates/ruff_server/src/edit/notebook.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,189 @@
use std::{collections::HashMap, hash::BuildHasherDefault};

use anyhow::Ok;
use lsp_types::{NotebookCellKind, Url};
use rustc_hash::FxHashMap;

use crate::{PositionEncoding, TextDocument};

use super::DocumentVersion;

/// The state of a notebook document in the server. Contains an array of cells whose
/// contents are internally represented by [`TextDocument`]s.
#[derive(Clone, Debug)]
pub(crate) struct NotebookDocument {
cells: Vec<NotebookCell>,
metadata: ruff_notebook::RawNotebookMetadata,
version: DocumentVersion,
// Used to quickly find the index of a cell for a given URL.
cell_index: FxHashMap<lsp_types::Url, usize>,
}

#[derive(Clone, Debug)]
struct NotebookCell {
url: Url,
kind: NotebookCellKind,
document: TextDocument,
}

impl NotebookDocument {
pub(crate) fn new(
version: DocumentVersion,
cells: Vec<lsp_types::NotebookCell>,
metadata: serde_json::Map<String, serde_json::Value>,
cell_documents: Vec<lsp_types::TextDocumentItem>,
) -> crate::Result<Self> {
let mut cell_contents: FxHashMap<_, _> = cell_documents
.into_iter()
.map(|document| (document.uri, document.text))
.collect();

let cells: Vec<_> = cells
.into_iter()
.map(|cell| {
let contents = cell_contents.remove(&cell.document).unwrap_or_default();
NotebookCell::new(cell, contents, version)
})
.collect();

Ok(Self {
version,
cell_index: Self::make_cell_index(cells.as_slice()),
metadata: serde_json::from_value(serde_json::Value::Object(metadata))?,
cells,
})
}

/// Generates a pseudo-representation of a notebook that lacks per-cell metadata and contextual information
/// but should still work with Ruff's linter.
pub(crate) fn make_ruff_notebook(&self) -> ruff_notebook::Notebook {
let cells = self
.cells
.iter()
.map(|cell| match cell.kind {
NotebookCellKind::Code => ruff_notebook::Cell::Code(ruff_notebook::CodeCell {
execution_count: None,
id: None,
metadata: serde_json::Value::Null,
outputs: vec![],
source: ruff_notebook::SourceValue::String(
cell.document.contents().to_string(),
),
}),
NotebookCellKind::Markup => {
ruff_notebook::Cell::Markdown(ruff_notebook::MarkdownCell {
attachments: None,
id: None,
metadata: serde_json::Value::Null,
source: ruff_notebook::SourceValue::String(
cell.document.contents().to_string(),
),
})
}
})
.collect();

ruff_notebook::Notebook::from_cells(cells, self.metadata.clone())
.expect("notebook should convert successfully")
}

pub(crate) fn update(
&mut self,
cells: Option<lsp_types::NotebookDocumentCellChange>,
metadata_change: Option<serde_json::Map<String, serde_json::Value>>,
version: DocumentVersion,
encoding: PositionEncoding,
) -> crate::Result<()> {
self.version = version;

if let Some(lsp_types::NotebookDocumentCellChange {
structure,
data,
text_content,
}) = cells
{
if let Some(structure) = structure {
let start = usize::try_from(structure.array.start).unwrap();
let delete = usize::try_from(structure.array.delete_count).unwrap();
if delete > 0 {
self.cells.drain(start..start + delete);
}
for cell in structure.array.cells.into_iter().flatten().rev() {
self.cells
.insert(start, NotebookCell::new(cell, String::new(), version));
}

// the array has been updated - rebuild the cell index
self.rebuild_cell_index();
}
if let Some(cell_data) = data {
for cell in cell_data {
if let Some(existing_cell) = self.cell_by_uri_mut(&cell.document) {
existing_cell.kind = cell.kind;
}
}
}
if let Some(content_changes) = text_content {
for content_change in content_changes {
if let Some(cell) = self.cell_by_uri_mut(&content_change.document.uri) {
cell.document
.apply_changes(content_change.changes, version, encoding);
}
}
}
}
if let Some(metadata_change) = metadata_change {
self.metadata = serde_json::from_value(serde_json::Value::Object(metadata_change))?;
}
Ok(())
}

pub(crate) fn version(&self) -> DocumentVersion {
self.version
}

pub(crate) fn cell_uri_by_index(&self, index: usize) -> Option<&lsp_types::Url> {
self.cells.get(index).map(|cell| &cell.url)
}

pub(crate) fn cell_document_by_uri(&self, uri: &lsp_types::Url) -> Option<&TextDocument> {
self.cells
.get(*self.cell_index.get(uri)?)
.map(|cell| &cell.document)
}

pub(crate) fn urls(&self) -> impl Iterator<Item = &lsp_types::Url> {
self.cells.iter().map(|cell| &cell.url)
}

fn cell_by_uri_mut(&mut self, uri: &lsp_types::Url) -> Option<&mut NotebookCell> {
self.cells.get_mut(*self.cell_index.get(uri)?)
}

fn rebuild_cell_index(&mut self) {
self.cell_index = Self::make_cell_index(&self.cells);
}

fn make_cell_index(cells: &[NotebookCell]) -> FxHashMap<lsp_types::Url, usize> {
let mut index =
HashMap::with_capacity_and_hasher(cells.len(), BuildHasherDefault::default());
for (i, cell) in cells.iter().enumerate() {
index.insert(cell.url.clone(), i);
}
index
}
}

impl NotebookCell {
pub(crate) fn new(
cell: lsp_types::NotebookCell,
contents: String,
version: DocumentVersion,
) -> Self {
Self {
url: cell.document,
kind: cell.kind,
document: TextDocument::new(contents, version),
}
}
}
Loading

0 comments on commit a8e00a6

Please sign in to comment.