Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Per-document indentation style, and indent-style auto-detection. #245

Merged
merged 8 commits into from
Jun 15, 2021
41 changes: 41 additions & 0 deletions helix-core/src/chars.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
/// Determine whether a character is a line break.
pub fn char_is_linebreak(c: char) -> bool {
matches!(
c,
'\u{000A}' | // LineFeed
'\u{000B}' | // VerticalTab
'\u{000C}' | // FormFeed
'\u{000D}' | // CarriageReturn
'\u{0085}' | // NextLine
'\u{2028}' | // Line Separator
'\u{2029}' // ParagraphSeparator
)
}

/// Determine whether a character qualifies as (non-line-break)
/// whitespace.
pub fn char_is_whitespace(c: char) -> bool {
// TODO: this is a naive binary categorization of whitespace
// characters. For display, word wrapping, etc. we'll need a better
// categorization based on e.g. breaking vs non-breaking spaces
// and whether they're zero-width or not.
match c {
//'\u{1680}' | // Ogham Space Mark (here for completeness, but usually displayed as a dash, not as whitespace)
'\u{0009}' | // Character Tabulation
'\u{0020}' | // Space
'\u{00A0}' | // No-break Space
'\u{180E}' | // Mongolian Vowel Separator
'\u{202F}' | // Narrow No-break Space
'\u{205F}' | // Medium Mathematical Space
'\u{3000}' | // Ideographic Space
'\u{FEFF}' // Zero Width No-break Space
=> true,

// En Quad, Em Quad, En Space, Em Space, Three-per-em Space,
// Four-per-em Space, Six-per-em Space, Figure Space,
// Punctuation Space, Thin Space, Hair Space, Zero Width Space.
c if ('\u{2000}' ..= '\u{200B}').contains(&c) => true,

_ => false,
}
}
1 change: 1 addition & 0 deletions helix-core/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#![allow(unused)]
pub mod auto_pairs;
pub mod chars;
pub mod comment;
pub mod diagnostic;
pub mod graphemes;
Expand Down
29 changes: 28 additions & 1 deletion helix-term/src/commands.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ use helix_core::{
};

use helix_view::{
document::Mode,
document::{IndentStyle, Mode},
view::{View, PADDING},
Document, DocumentId, Editor, ViewId,
};
Expand Down Expand Up @@ -979,6 +979,26 @@ mod cmd {
doc.format(view.id)
}

fn set_indent_style(editor: &mut Editor, args: &[&str], event: PromptEvent) {
use IndentStyle::*;

let style = match args.get(0) {
Some(arg) if "tabs".starts_with(&arg.to_lowercase()) => Some(Tabs),
Some(&"0") => Some(Tabs),
Some(arg) => arg
.parse::<u8>()
.ok()
.filter(|n| (1..=8).contains(n))
.map(Spaces),
_ => None,
};

if let Some(s) = style {
let (_, doc) = editor.current();
doc.indent_style = s;
}
}

fn earlier(editor: &mut Editor, args: &[&str], event: PromptEvent) {
let uk = match args.join(" ").parse::<helix_core::history::UndoKind>() {
Ok(uk) => uk,
Expand Down Expand Up @@ -1143,6 +1163,13 @@ mod cmd {
fun: format,
completer: None,
},
Command {
name: "indent-style",
alias: None,
doc: "Set the indentation style for editing. ('t' for tabs or 1-8 for number of spaces.)",
fun: set_indent_style,
completer: None,
},
Command {
name: "earlier",
alias: Some("ear"),
Expand Down
56 changes: 40 additions & 16 deletions helix-term/src/ui/editor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,10 @@ use helix_core::{
syntax::{self, HighlightEvent},
Position, Range,
};
use helix_view::{document::Mode, Document, Editor, Theme, View};
use helix_view::{
document::{IndentStyle, Mode},
Document, Editor, Theme, View,
};
use std::borrow::Cow;

use crossterm::{
Expand Down Expand Up @@ -455,6 +458,10 @@ impl EditorView {
theme: &Theme,
is_focused: bool,
) {
//-------------------------------
// Left side of the status line.
//-------------------------------

let mode = match doc.mode() {
Mode::Insert => "INS",
Mode::Select => "SEL",
Expand Down Expand Up @@ -487,24 +494,41 @@ impl EditorView {
);
}

surface.set_stringn(
viewport.x + viewport.width.saturating_sub(15),
viewport.y,
format!("{}", doc.diagnostics().len()),
4,
text_color,
);

// render line:col
let pos = coords_at_pos(doc.text().slice(..), doc.selection(view.id).cursor());

let text = format!("{}:{}", pos.row + 1, pos.col + 1); // convert to 1-indexing
let len = text.len();
//-------------------------------
// Right side of the status line.
//-------------------------------

// Compute the individual info strings.
let diag_count = format!("{}", doc.diagnostics().len());
// let indent_info = match doc.indent_style {
// IndentStyle::Tabs => "tabs",
// IndentStyle::Spaces(1) => "spaces:1",
// IndentStyle::Spaces(2) => "spaces:2",
// IndentStyle::Spaces(3) => "spaces:3",
// IndentStyle::Spaces(4) => "spaces:4",
// IndentStyle::Spaces(5) => "spaces:5",
// IndentStyle::Spaces(6) => "spaces:6",
// IndentStyle::Spaces(7) => "spaces:7",
// IndentStyle::Spaces(8) => "spaces:8",
// _ => "indent:ERROR",
// };
let position_info = {
let pos = coords_at_pos(doc.text().slice(..), doc.selection(view.id).cursor());
format!("{}:{}", pos.row + 1, pos.col + 1) // convert to 1-indexing
};

// Render them to the status line together.
let right_side_text = format!(
"{} {} ",
&diag_count[..diag_count.len().min(4)],
// indent_info,
position_info
);
let text_len = right_side_text.len() as u16;
surface.set_string(
viewport.x + viewport.width.saturating_sub(len as u16 + 1),
viewport.x + viewport.width.saturating_sub(text_len),
viewport.y,
text,
right_side_text,
text_color,
);
}
Expand Down
164 changes: 157 additions & 7 deletions helix-view/src/document.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ use std::path::{Component, Path, PathBuf};
use std::sync::Arc;

use helix_core::{
chars::{char_is_linebreak, char_is_whitespace},
history::History,
syntax::{LanguageConfiguration, LOADER},
ChangeSet, Diagnostic, Rope, Selection, State, Syntax, Transaction,
Expand All @@ -21,6 +22,12 @@ pub enum Mode {
Insert,
}

#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
pub enum IndentStyle {
Tabs,
Spaces(u8),
cessen marked this conversation as resolved.
Show resolved Hide resolved
}

pub struct Document {
// rope + selection
pub(crate) id: DocumentId,
Expand All @@ -33,6 +40,9 @@ pub struct Document {
pub mode: Mode,
pub restore_cursor: bool,

/// Current indent style.
pub indent_style: IndentStyle,

syntax: Option<Syntax>,
// /// Corresponding language scope name. Usually `source.<lang>`.
pub(crate) language: Option<Arc<LanguageConfiguration>>,
Expand Down Expand Up @@ -149,6 +159,7 @@ impl Document {
path: None,
text,
selections: HashMap::default(),
indent_style: IndentStyle::Spaces(4),
mode: Mode::Normal,
restore_cursor: false,
syntax: None,
Expand Down Expand Up @@ -182,6 +193,7 @@ impl Document {
let mut doc = Self::new(doc);
// set the path and try detecting the language
doc.set_path(&path)?;
doc.detect_indent_style();

Ok(doc)
}
Expand Down Expand Up @@ -265,6 +277,132 @@ impl Document {
}
}

fn detect_indent_style(&mut self) {
// Build a histogram of the indentation *increases* between
// subsequent lines, ignoring lines that are all whitespace.
//
// Index 0 is for tabs, the rest are 1-8 spaces.
let histogram: [usize; 9] = {
let mut histogram = [0; 9];
let mut prev_line_is_tabs = false;
let mut prev_line_leading_count = 0usize;

// Loop through the lines, checking for and recording indentation
// increases as we go.
'outer: for line in self.text.lines().take(1000) {
let mut c_iter = line.chars();

// Is first character a tab or space?
let is_tabs = match c_iter.next() {
Some('\t') => true,
Some(' ') => false,

// Ignore blank lines.
Some(c) if char_is_linebreak(c) => continue,

_ => {
prev_line_is_tabs = false;
prev_line_leading_count = 0;
continue;
}
};

// Count the line's total leading tab/space characters.
let mut leading_count = 1;
let mut count_is_done = false;
for c in c_iter {
match c {
'\t' if is_tabs && !count_is_done => leading_count += 1,
' ' if !is_tabs && !count_is_done => leading_count += 1,

// We stop counting if we hit whitespace that doesn't
// qualify as indent or doesn't match the leading
// whitespace, but we don't exit the loop yet because
// we still want to determine if the line is blank.
c if char_is_whitespace(c) => count_is_done = true,

// Ignore blank lines.
c if char_is_linebreak(c) => continue 'outer,

_ => break,
}

// Bound the worst-case execution time for weird text files.
if leading_count > 256 {
continue 'outer;
}
}

// If there was an increase in indentation over the previous
// line, update the histogram with that increase.
if (prev_line_is_tabs == is_tabs || prev_line_leading_count == 0)
&& prev_line_leading_count < leading_count
{
if is_tabs {
histogram[0] += 1;
} else {
let amount = leading_count - prev_line_leading_count;
if amount <= 8 {
histogram[amount] += 1;
}
}
}

// Store this line's leading whitespace info for use with
// the next line.
prev_line_is_tabs = is_tabs;
prev_line_leading_count = leading_count;
}

// Give more weight to tabs, because their presence is a very
// strong indicator.
histogram[0] *= 2;

histogram
};

// Find the most frequent indent, its frequency, and the frequency of
// the next-most frequent indent.
let indent = histogram
.iter()
.enumerate()
.max_by_key(|kv| kv.1)
.unwrap()
.0;
let indent_freq = histogram[indent];
let indent_freq_2 = *histogram
.iter()
.enumerate()
.filter(|kv| kv.0 != indent)
.map(|kv| kv.1)
.max()
.unwrap();

// Use the auto-detected result if we're confident enough in its
// accuracy, based on some heuristics. Otherwise fall back to
// the language-based setting.
if indent_freq >= 1 && (indent_freq_2 as f64 / indent_freq as f64) < 0.66 {
cessen marked this conversation as resolved.
Show resolved Hide resolved
// Use the auto-detected setting.
self.indent_style = match indent {
0 => IndentStyle::Tabs,
_ => IndentStyle::Spaces(indent as u8),
};
} else {
// Fall back to language-based setting.
let indent = self
.language
.as_ref()
.and_then(|config| config.indent.as_ref())
.map_or(" ", |config| config.unit.as_str()); // fallback to 2 spaces

self.indent_style = if indent.starts_with(' ') {
IndentStyle::Spaces(indent.len() as u8)
} else {
IndentStyle::Tabs
};
}
}

pub fn set_path(&mut self, path: &Path) -> Result<(), std::io::Error> {
let path = canonicalize_path(path)?;

Expand Down Expand Up @@ -507,13 +645,25 @@ impl Document {
}

/// Returns a string containing a single level of indentation.
pub fn indent_unit(&self) -> &str {
self.language
.as_ref()
.and_then(|config| config.indent.as_ref())
.map_or(" ", |config| config.unit.as_str()) // fallback to 2 spaces

// " ".repeat(TAB_WIDTH)
///
/// TODO: we might not need this function anymore, since the information
/// is conveniently available in `Document::indent_style` now.
pub fn indent_unit(&self) -> &'static str {
match self.indent_style {
IndentStyle::Tabs => "\t",
IndentStyle::Spaces(1) => " ",
IndentStyle::Spaces(2) => " ",
IndentStyle::Spaces(3) => " ",
IndentStyle::Spaces(4) => " ",
IndentStyle::Spaces(5) => " ",
IndentStyle::Spaces(6) => " ",
IndentStyle::Spaces(7) => " ",
IndentStyle::Spaces(8) => " ",

// Unsupported indentation style. This should never happen,
// but just in case fall back to two spaces.
_ => " ",
}
}

#[inline]
Expand Down