From 7aca63a659fecc84fab83233ba760324aa731d5c Mon Sep 17 00:00:00 2001 From: Dhruv Manilawala Date: Tue, 26 Sep 2023 11:47:28 +0530 Subject: [PATCH] Add `cell` field to JSON output format --- crates/ruff_cli/tests/format.rs | 1 + .../integration_test__stdin_json.snap | 1 + crates/ruff_linter/src/message/json.rs | 100 +++++++++++++++-- crates/ruff_linter/src/message/json_lines.rs | 18 ++- ...message__json__tests__notebook_output.snap | 105 ++++++++++++++++++ ..._linter__message__json__tests__output.snap | 3 + ...e__json_lines__tests__notebook_output.snap | 8 ++ ...r__message__json_lines__tests__output.snap | 6 +- crates/ruff_notebook/src/index.rs | 15 ++- crates/ruff_source_file/src/lib.rs | 4 + crates/ruff_source_file/src/line_index.rs | 14 +++ 11 files changed, 256 insertions(+), 19 deletions(-) create mode 100644 crates/ruff_linter/src/message/snapshots/ruff_linter__message__json__tests__notebook_output.snap create mode 100644 crates/ruff_linter/src/message/snapshots/ruff_linter__message__json_lines__tests__notebook_output.snap diff --git a/crates/ruff_cli/tests/format.rs b/crates/ruff_cli/tests/format.rs index 8f90feb6148be..da62bd7781ebe 100644 --- a/crates/ruff_cli/tests/format.rs +++ b/crates/ruff_cli/tests/format.rs @@ -168,6 +168,7 @@ import os ----- stdout ----- [ { + "cell": null, "code": "F401", "end_location": { "column": 10, diff --git a/crates/ruff_cli/tests/snapshots/integration_test__stdin_json.snap b/crates/ruff_cli/tests/snapshots/integration_test__stdin_json.snap index e06427e515b14..c374117787954 100644 --- a/crates/ruff_cli/tests/snapshots/integration_test__stdin_json.snap +++ b/crates/ruff_cli/tests/snapshots/integration_test__stdin_json.snap @@ -17,6 +17,7 @@ exit_code: 1 ----- stdout ----- [ { + "cell": null, "code": "F401", "end_location": { "column": 10, diff --git a/crates/ruff_linter/src/message/json.rs b/crates/ruff_linter/src/message/json.rs index e1a2e3e36df98..7c3b9764f3831 100644 --- a/crates/ruff_linter/src/message/json.rs +++ b/crates/ruff_linter/src/message/json.rs @@ -5,7 +5,8 @@ use serde::{Serialize, Serializer}; use serde_json::{json, Value}; use ruff_diagnostics::Edit; -use ruff_source_file::SourceCode; +use ruff_notebook::NotebookIndex; +use ruff_source_file::{OneIndexed, SourceCode, SourceLocation}; use ruff_text_size::Ranged; use crate::message::{Emitter, EmitterContext, Message}; @@ -19,9 +20,9 @@ impl Emitter for JsonEmitter { &mut self, writer: &mut dyn Write, messages: &[Message], - _context: &EmitterContext, + context: &EmitterContext, ) -> anyhow::Result<()> { - serde_json::to_writer_pretty(writer, &ExpandedMessages { messages })?; + serde_json::to_writer_pretty(writer, &ExpandedMessages { messages, context })?; Ok(()) } @@ -29,6 +30,7 @@ impl Emitter for JsonEmitter { struct ExpandedMessages<'a> { messages: &'a [Message], + context: &'a EmitterContext<'a>, } impl Serialize for ExpandedMessages<'_> { @@ -39,7 +41,7 @@ impl Serialize for ExpandedMessages<'_> { let mut s = serializer.serialize_seq(Some(self.messages.len()))?; for message in self.messages { - let value = message_to_json_value(message); + let value = message_to_json_value(message, self.context); s.serialize_element(&value)?; } @@ -47,26 +49,40 @@ impl Serialize for ExpandedMessages<'_> { } } -pub(crate) fn message_to_json_value(message: &Message) -> Value { +pub(crate) fn message_to_json_value(message: &Message, context: &EmitterContext) -> Value { let source_code = message.file.to_source_code(); + let notebook_index = context.notebook_index(message.filename()); let fix = message.fix.as_ref().map(|fix| { json!({ "applicability": fix.applicability(), "message": message.kind.suggestion.as_deref(), - "edits": &ExpandedEdits { edits: fix.edits(), source_code: &source_code }, + "edits": &ExpandedEdits { edits: fix.edits(), source_code: &source_code, notebook_index }, }) }); - let start_location = source_code.source_location(message.start()); - let end_location = source_code.source_location(message.end()); - let noqa_location = source_code.source_location(message.noqa_offset); + let mut start_location = source_code.source_location(message.start()); + let mut end_location = source_code.source_location(message.end()); + let mut noqa_location = source_code.source_location(message.noqa_offset); + let mut notebook_cell_index = None; + + if let Some(notebook_index) = notebook_index { + notebook_cell_index = Some( + notebook_index + .cell(start_location.row) + .unwrap_or(OneIndexed::MIN), + ); + start_location = notebook_index.translate_location(&start_location); + end_location = notebook_index.translate_location(&end_location); + noqa_location = notebook_index.translate_location(&noqa_location); + } json!({ "code": message.kind.rule().noqa_code().to_string(), "url": message.kind.rule().url(), "message": message.kind.body, "fix": fix, + "cell": notebook_cell_index, "location": start_location, "end_location": end_location, "filename": message.filename(), @@ -77,6 +93,7 @@ pub(crate) fn message_to_json_value(message: &Message) -> Value { struct ExpandedEdits<'a> { edits: &'a [Edit], source_code: &'a SourceCode<'a, 'a>, + notebook_index: Option<&'a NotebookIndex>, } impl Serialize for ExpandedEdits<'_> { @@ -87,10 +104,57 @@ impl Serialize for ExpandedEdits<'_> { let mut s = serializer.serialize_seq(Some(self.edits.len()))?; for edit in self.edits { + let mut location = self.source_code.source_location(edit.start()); + let mut end_location = self.source_code.source_location(edit.end()); + + if let Some(notebook_index) = self.notebook_index { + // There exists a newline between each cell's source code in the + // concatenated source code in Ruff. This newline doesn't actually + // exists in the JSON source field. + // + // Now, certain edits may try to remove this newline, which means + // the edit will spill over to the first character of the next cell. + // If it does, we need to translate the end location to the last + // character of the previous cell. + match ( + notebook_index.cell(location.row), + notebook_index.cell(end_location.row), + ) { + (Some(start_cell), Some(end_cell)) if start_cell != end_cell => { + debug_assert_eq!(end_location.column.get(), 1); + + let prev_row = end_location.row.saturating_sub(1); + end_location = SourceLocation { + row: notebook_index.cell_row(prev_row).unwrap_or(OneIndexed::MIN), + column: self + .source_code + .source_location(self.source_code.line_end_exclusive(prev_row)) + .column, + }; + } + (Some(_), None) => { + debug_assert_eq!(end_location.column.get(), 1); + + let prev_row = end_location.row.saturating_sub(1); + end_location = SourceLocation { + row: notebook_index.cell_row(prev_row).unwrap_or(OneIndexed::MIN), + column: self + .source_code + .source_location(self.source_code.line_end_exclusive(prev_row)) + .column, + }; + } + _ => { + end_location = notebook_index.translate_location(&end_location); + } + } + location = notebook_index.translate_location(&location); + } + let value = json!({ "content": edit.content().unwrap_or_default(), - "location": self.source_code.source_location(edit.start()), - "end_location": self.source_code.source_location(edit.end()) + "location": location, + "end_location": end_location }); s.serialize_element(&value)?; @@ -104,7 +168,10 @@ impl Serialize for ExpandedEdits<'_> { mod tests { use insta::assert_snapshot; - use crate::message::tests::{capture_emitter_output, create_messages}; + use crate::message::tests::{ + capture_emitter_notebook_output, capture_emitter_output, create_messages, + create_notebook_messages, + }; use crate::message::JsonEmitter; #[test] @@ -114,4 +181,13 @@ mod tests { assert_snapshot!(content); } + + #[test] + fn notebook_output() { + let mut emitter = JsonEmitter; + let (messages, notebook_indexes) = create_notebook_messages(); + let content = capture_emitter_notebook_output(&mut emitter, &messages, ¬ebook_indexes); + + assert_snapshot!(content); + } } diff --git a/crates/ruff_linter/src/message/json_lines.rs b/crates/ruff_linter/src/message/json_lines.rs index 360e7ec6a71a7..25b8cc1d5b32b 100644 --- a/crates/ruff_linter/src/message/json_lines.rs +++ b/crates/ruff_linter/src/message/json_lines.rs @@ -11,11 +11,11 @@ impl Emitter for JsonLinesEmitter { &mut self, writer: &mut dyn Write, messages: &[Message], - _context: &EmitterContext, + context: &EmitterContext, ) -> anyhow::Result<()> { let mut w = writer; for message in messages { - serde_json::to_writer(&mut w, &message_to_json_value(message))?; + serde_json::to_writer(&mut w, &message_to_json_value(message, context))?; w.write_all(b"\n")?; } Ok(()) @@ -27,7 +27,10 @@ mod tests { use insta::assert_snapshot; use crate::message::json_lines::JsonLinesEmitter; - use crate::message::tests::{capture_emitter_output, create_messages}; + use crate::message::tests::{ + capture_emitter_notebook_output, capture_emitter_output, create_messages, + create_notebook_messages, + }; #[test] fn output() { @@ -36,4 +39,13 @@ mod tests { assert_snapshot!(content); } + + #[test] + fn notebook_output() { + let mut emitter = JsonLinesEmitter; + let (messages, notebook_indexes) = create_notebook_messages(); + let content = capture_emitter_notebook_output(&mut emitter, &messages, ¬ebook_indexes); + + assert_snapshot!(content); + } } diff --git a/crates/ruff_linter/src/message/snapshots/ruff_linter__message__json__tests__notebook_output.snap b/crates/ruff_linter/src/message/snapshots/ruff_linter__message__json__tests__notebook_output.snap new file mode 100644 index 0000000000000..7403245cc13aa --- /dev/null +++ b/crates/ruff_linter/src/message/snapshots/ruff_linter__message__json__tests__notebook_output.snap @@ -0,0 +1,105 @@ +--- +source: crates/ruff_linter/src/message/json.rs +expression: content +--- +[ + { + "cell": 1, + "code": "F401", + "end_location": { + "column": 10, + "row": 2 + }, + "filename": "notebook.ipynb", + "fix": { + "applicability": "safe", + "edits": [ + { + "content": "", + "end_location": { + "column": 10, + "row": 2 + }, + "location": { + "column": 1, + "row": 2 + } + } + ], + "message": "Remove unused import: `os`" + }, + "location": { + "column": 8, + "row": 2 + }, + "message": "`os` imported but unused", + "noqa_row": 2, + "url": "https://docs.astral.sh/ruff/rules/unused-import" + }, + { + "cell": 2, + "code": "F401", + "end_location": { + "column": 12, + "row": 2 + }, + "filename": "notebook.ipynb", + "fix": { + "applicability": "safe", + "edits": [ + { + "content": "", + "end_location": { + "column": 1, + "row": 3 + }, + "location": { + "column": 1, + "row": 2 + } + } + ], + "message": "Remove unused import: `math`" + }, + "location": { + "column": 8, + "row": 2 + }, + "message": "`math` imported but unused", + "noqa_row": 2, + "url": "https://docs.astral.sh/ruff/rules/unused-import" + }, + { + "cell": 3, + "code": "F841", + "end_location": { + "column": 6, + "row": 4 + }, + "filename": "notebook.ipynb", + "fix": { + "applicability": "unsafe", + "edits": [ + { + "content": "", + "end_location": { + "column": 10, + "row": 4 + }, + "location": { + "column": 1, + "row": 4 + } + } + ], + "message": "Remove assignment to unused variable `x`" + }, + "location": { + "column": 5, + "row": 4 + }, + "message": "Local variable `x` is assigned to but never used", + "noqa_row": 4, + "url": "https://docs.astral.sh/ruff/rules/unused-variable" + } +] diff --git a/crates/ruff_linter/src/message/snapshots/ruff_linter__message__json__tests__output.snap b/crates/ruff_linter/src/message/snapshots/ruff_linter__message__json__tests__output.snap index a77054a249084..fed85f04281fe 100644 --- a/crates/ruff_linter/src/message/snapshots/ruff_linter__message__json__tests__output.snap +++ b/crates/ruff_linter/src/message/snapshots/ruff_linter__message__json__tests__output.snap @@ -4,6 +4,7 @@ expression: content --- [ { + "cell": null, "code": "F401", "end_location": { "column": 10, @@ -36,6 +37,7 @@ expression: content "url": "https://docs.astral.sh/ruff/rules/unused-import" }, { + "cell": null, "code": "F841", "end_location": { "column": 6, @@ -68,6 +70,7 @@ expression: content "url": "https://docs.astral.sh/ruff/rules/unused-variable" }, { + "cell": null, "code": "F821", "end_location": { "column": 5, diff --git a/crates/ruff_linter/src/message/snapshots/ruff_linter__message__json_lines__tests__notebook_output.snap b/crates/ruff_linter/src/message/snapshots/ruff_linter__message__json_lines__tests__notebook_output.snap new file mode 100644 index 0000000000000..e50520b8b9df9 --- /dev/null +++ b/crates/ruff_linter/src/message/snapshots/ruff_linter__message__json_lines__tests__notebook_output.snap @@ -0,0 +1,8 @@ +--- +source: crates/ruff_linter/src/message/json_lines.rs +expression: content +--- +{"cell":1,"code":"F401","end_location":{"column":10,"row":2},"filename":"notebook.ipynb","fix":{"applicability":"safe","edits":[{"content":"","end_location":{"column":10,"row":2},"location":{"column":1,"row":2}}],"message":"Remove unused import: `os`"},"location":{"column":8,"row":2},"message":"`os` imported but unused","noqa_row":2,"url":"https://docs.astral.sh/ruff/rules/unused-import"} +{"cell":2,"code":"F401","end_location":{"column":12,"row":2},"filename":"notebook.ipynb","fix":{"applicability":"safe","edits":[{"content":"","end_location":{"column":1,"row":3},"location":{"column":1,"row":2}}],"message":"Remove unused import: `math`"},"location":{"column":8,"row":2},"message":"`math` imported but unused","noqa_row":2,"url":"https://docs.astral.sh/ruff/rules/unused-import"} +{"cell":3,"code":"F841","end_location":{"column":6,"row":4},"filename":"notebook.ipynb","fix":{"applicability":"unsafe","edits":[{"content":"","end_location":{"column":10,"row":4},"location":{"column":1,"row":4}}],"message":"Remove assignment to unused variable `x`"},"location":{"column":5,"row":4},"message":"Local variable `x` is assigned to but never used","noqa_row":4,"url":"https://docs.astral.sh/ruff/rules/unused-variable"} + diff --git a/crates/ruff_linter/src/message/snapshots/ruff_linter__message__json_lines__tests__output.snap b/crates/ruff_linter/src/message/snapshots/ruff_linter__message__json_lines__tests__output.snap index 12d27cd21dec3..57cd47822ceea 100644 --- a/crates/ruff_linter/src/message/snapshots/ruff_linter__message__json_lines__tests__output.snap +++ b/crates/ruff_linter/src/message/snapshots/ruff_linter__message__json_lines__tests__output.snap @@ -2,7 +2,7 @@ source: crates/ruff_linter/src/message/json_lines.rs expression: content --- -{"code":"F401","end_location":{"column":10,"row":1},"filename":"fib.py","fix":{"applicability":"unsafe","edits":[{"content":"","end_location":{"column":1,"row":2},"location":{"column":1,"row":1}}],"message":"Remove unused import: `os`"},"location":{"column":8,"row":1},"message":"`os` imported but unused","noqa_row":1,"url":"https://docs.astral.sh/ruff/rules/unused-import"} -{"code":"F841","end_location":{"column":6,"row":6},"filename":"fib.py","fix":{"applicability":"unsafe","edits":[{"content":"","end_location":{"column":10,"row":6},"location":{"column":5,"row":6}}],"message":"Remove assignment to unused variable `x`"},"location":{"column":5,"row":6},"message":"Local variable `x` is assigned to but never used","noqa_row":6,"url":"https://docs.astral.sh/ruff/rules/unused-variable"} -{"code":"F821","end_location":{"column":5,"row":1},"filename":"undef.py","fix":null,"location":{"column":4,"row":1},"message":"Undefined name `a`","noqa_row":1,"url":"https://docs.astral.sh/ruff/rules/undefined-name"} +{"cell":null,"code":"F401","end_location":{"column":10,"row":1},"filename":"fib.py","fix":{"applicability":"unsafe","edits":[{"content":"","end_location":{"column":1,"row":2},"location":{"column":1,"row":1}}],"message":"Remove unused import: `os`"},"location":{"column":8,"row":1},"message":"`os` imported but unused","noqa_row":1,"url":"https://docs.astral.sh/ruff/rules/unused-import"} +{"cell":null,"code":"F841","end_location":{"column":6,"row":6},"filename":"fib.py","fix":{"applicability":"unsafe","edits":[{"content":"","end_location":{"column":10,"row":6},"location":{"column":5,"row":6}}],"message":"Remove assignment to unused variable `x`"},"location":{"column":5,"row":6},"message":"Local variable `x` is assigned to but never used","noqa_row":6,"url":"https://docs.astral.sh/ruff/rules/unused-variable"} +{"cell":null,"code":"F821","end_location":{"column":5,"row":1},"filename":"undef.py","fix":null,"location":{"column":4,"row":1},"message":"Undefined name `a`","noqa_row":1,"url":"https://docs.astral.sh/ruff/rules/undefined-name"} diff --git a/crates/ruff_notebook/src/index.rs b/crates/ruff_notebook/src/index.rs index a001df375fd5d..9912b94e6225e 100644 --- a/crates/ruff_notebook/src/index.rs +++ b/crates/ruff_notebook/src/index.rs @@ -1,6 +1,6 @@ use serde::{Deserialize, Serialize}; -use ruff_source_file::OneIndexed; +use ruff_source_file::{OneIndexed, SourceLocation}; /// Jupyter Notebook indexing table /// @@ -32,4 +32,17 @@ impl NotebookIndex { pub fn cell_row(&self, row: OneIndexed) -> Option { self.row_to_row_in_cell.get(row.to_zero_indexed()).copied() } + + /// Translates the given source location based on the indexing table. + /// + /// This will translate the row/column in the concatenated source code + /// to the row/column in the Jupyter Notebook. + pub fn translate_location(&self, source_location: &SourceLocation) -> SourceLocation { + SourceLocation { + row: self + .cell_row(source_location.row) + .unwrap_or(OneIndexed::MIN), + column: source_location.column, + } + } } diff --git a/crates/ruff_source_file/src/lib.rs b/crates/ruff_source_file/src/lib.rs index dd375f7e242dc..fc2a10108de69 100644 --- a/crates/ruff_source_file/src/lib.rs +++ b/crates/ruff_source_file/src/lib.rs @@ -68,6 +68,10 @@ impl<'src, 'index> SourceCode<'src, 'index> { self.index.line_end(line, self.text) } + pub fn line_end_exclusive(&self, line: OneIndexed) -> TextSize { + self.index.line_end_exclusive(line, self.text) + } + pub fn line_range(&self, line: OneIndexed) -> TextRange { self.index.line_range(line, self.text) } diff --git a/crates/ruff_source_file/src/line_index.rs b/crates/ruff_source_file/src/line_index.rs index 9bf7b20985558..279e68dfb84d0 100644 --- a/crates/ruff_source_file/src/line_index.rs +++ b/crates/ruff_source_file/src/line_index.rs @@ -184,6 +184,20 @@ impl LineIndex { } } + /// Returns the [byte offset](TextSize) of the `line`'s end. + /// The offset is the end of the line, excluding the newline character ending the line (if any). + pub fn line_end_exclusive(&self, line: OneIndexed, contents: &str) -> TextSize { + let row_index = line.to_zero_indexed(); + let starts = self.line_starts(); + + // If start-of-line position after last line + if row_index.saturating_add(1) >= starts.len() { + contents.text_len() + } else { + starts[row_index + 1] - TextSize::new(1) + } + } + /// Returns the [`TextRange`] of the `line` with the given index. /// The start points to the first character's [byte offset](TextSize), the end up to, and including /// the newline character ending the line (if any).