helix-editor · cessen · Jun 15, 2021 · Jun 13, 2021 · Jun 13, 2021 · Jun 13, 2021
diff --git a/helix-core/src/chars.rs b/helix-core/src/chars.rs
@@ -0,0 +1,41 @@
+/// Determine whether a character is a line break.
+pub fn char_is_linebreak(c: char) -> bool {
+    matches!(
+        c,
+        '\u{000A}' | // LineFeed
+        '\u{000B}' | // VerticalTab
+        '\u{000C}' | // FormFeed
+        '\u{000D}' | // CarriageReturn
+        '\u{0085}' | // NextLine
+        '\u{2028}' | // Line Separator
+        '\u{2029}' // ParagraphSeparator
+    )
+}
+
+/// Determine whether a character qualifies as (non-line-break)
+/// whitespace.
+pub fn char_is_whitespace(c: char) -> bool {
+    // TODO: this is a naive binary categorization of whitespace
+    // characters.  For display, word wrapping, etc. we'll need a better
+    // categorization based on e.g. breaking vs non-breaking spaces
+    // and whether they're zero-width or not.
+    match c {
+        //'\u{1680}' | // Ogham Space Mark (here for completeness, but usually displayed as a dash, not as whitespace)
+        '\u{0009}' | // Character Tabulation
+        '\u{0020}' | // Space
+        '\u{00A0}' | // No-break Space
+        '\u{180E}' | // Mongolian Vowel Separator
+        '\u{202F}' | // Narrow No-break Space
+        '\u{205F}' | // Medium Mathematical Space
+        '\u{3000}' | // Ideographic Space
+        '\u{FEFF}'   // Zero Width No-break Space
+        => true,
+
+        // En Quad, Em Quad, En Space, Em Space, Three-per-em Space,
+        // Four-per-em Space, Six-per-em Space, Figure Space,
+        // Punctuation Space, Thin Space, Hair Space, Zero Width Space.
+        c if ('\u{2000}' ..= '\u{200B}').contains(&c) => true,
+
+        _ => false,
+    }
+}
diff --git a/helix-core/src/lib.rs b/helix-core/src/lib.rs
@@ -1,5 +1,6 @@
 #![allow(unused)]
 pub mod auto_pairs;
+pub mod chars;
 pub mod comment;
 pub mod diagnostic;
 pub mod graphemes;

diff --git a/helix-term/src/commands.rs b/helix-term/src/commands.rs
@@ -9,7 +9,7 @@ use helix_core::{
 };
 
 use helix_view::{
-    document::Mode,
+    document::{IndentStyle, Mode},
     view::{View, PADDING},
     Document, DocumentId, Editor, ViewId,
 };
@@ -979,6 +979,26 @@ mod cmd {
         doc.format(view.id)
     }
 
+    fn set_indent_style(editor: &mut Editor, args: &[&str], event: PromptEvent) {
+        use IndentStyle::*;
+
+        let style = match args.get(0) {
+            Some(arg) if "tabs".starts_with(&arg.to_lowercase()) => Some(Tabs),
+            Some(&"0") => Some(Tabs),
+            Some(arg) => arg
+                .parse::<u8>()
+                .ok()
+                .filter(|n| (1..=8).contains(n))
+                .map(Spaces),
+            _ => None,
+        };
+
+        if let Some(s) = style {
+            let (_, doc) = editor.current();
+            doc.indent_style = s;
+        }
+    }
+
     fn earlier(editor: &mut Editor, args: &[&str], event: PromptEvent) {
         let uk = match args.join(" ").parse::<helix_core::history::UndoKind>() {
             Ok(uk) => uk,
@@ -1143,6 +1163,13 @@ mod cmd {
             fun: format,
             completer: None,
         },
+        Command {
+            name: "indent-style",
+            alias: None,
+            doc: "Set the indentation style for editing. ('t' for tabs or 1-8 for number of spaces.)",
+            fun: set_indent_style,
+            completer: None,
+        },
         Command {
             name: "earlier",
             alias: Some("ear"),

diff --git a/helix-term/src/ui/editor.rs b/helix-term/src/ui/editor.rs
@@ -11,7 +11,10 @@ use helix_core::{
     syntax::{self, HighlightEvent},
     Position, Range,
 };
-use helix_view::{document::Mode, Document, Editor, Theme, View};
+use helix_view::{
+    document::{IndentStyle, Mode},
+    Document, Editor, Theme, View,
+};
 use std::borrow::Cow;
 
 use crossterm::{
@@ -455,6 +458,10 @@ impl EditorView {
         theme: &Theme,
         is_focused: bool,
     ) {
+        //-------------------------------
+        // Left side of the status line.
+        //-------------------------------
+
         let mode = match doc.mode() {
             Mode::Insert => "INS",
             Mode::Select => "SEL",
@@ -487,24 +494,41 @@ impl EditorView {
             );
         }
 
-        surface.set_stringn(
-            viewport.x + viewport.width.saturating_sub(15),
-            viewport.y,
-            format!("{}", doc.diagnostics().len()),
-            4,
-            text_color,
-        );
-
-        // render line:col
-        let pos = coords_at_pos(doc.text().slice(..), doc.selection(view.id).cursor());
-
-        let text = format!("{}:{}", pos.row + 1, pos.col + 1); // convert to 1-indexing
-        let len = text.len();
+        //-------------------------------
+        // Right side of the status line.
+        //-------------------------------
+
+        // Compute the individual info strings.
+        let diag_count = format!("{}", doc.diagnostics().len());
+        // let indent_info = match doc.indent_style {
+        //     IndentStyle::Tabs => "tabs",
+        //     IndentStyle::Spaces(1) => "spaces:1",
+        //     IndentStyle::Spaces(2) => "spaces:2",
+        //     IndentStyle::Spaces(3) => "spaces:3",
+        //     IndentStyle::Spaces(4) => "spaces:4",
+        //     IndentStyle::Spaces(5) => "spaces:5",
+        //     IndentStyle::Spaces(6) => "spaces:6",
+        //     IndentStyle::Spaces(7) => "spaces:7",
+        //     IndentStyle::Spaces(8) => "spaces:8",
+        //     _ => "indent:ERROR",
+        // };
+        let position_info = {
+            let pos = coords_at_pos(doc.text().slice(..), doc.selection(view.id).cursor());
+            format!("{}:{}", pos.row + 1, pos.col + 1) // convert to 1-indexing
+        };
 
+        // Render them to the status line together.
+        let right_side_text = format!(
+            "{}    {} ",
+            &diag_count[..diag_count.len().min(4)],
+            // indent_info,
+            position_info
+        );
+        let text_len = right_side_text.len() as u16;
         surface.set_string(
-            viewport.x + viewport.width.saturating_sub(len as u16 + 1),
+            viewport.x + viewport.width.saturating_sub(text_len),
             viewport.y,
-            text,
+            right_side_text,
             text_color,
         );
     }

diff --git a/helix-view/src/document.rs b/helix-view/src/document.rs
@@ -5,6 +5,7 @@ use std::path::{Component, Path, PathBuf};
 use std::sync::Arc;
 
 use helix_core::{
+    chars::{char_is_linebreak, char_is_whitespace},
     history::History,
     syntax::{LanguageConfiguration, LOADER},
     ChangeSet, Diagnostic, Rope, Selection, State, Syntax, Transaction,
@@ -21,6 +22,12 @@ pub enum Mode {
     Insert,
 }
 
+#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
+pub enum IndentStyle {
+    Tabs,
+    Spaces(u8),
+}
+
 pub struct Document {
     // rope + selection
     pub(crate) id: DocumentId,
@@ -33,6 +40,9 @@ pub struct Document {
     pub mode: Mode,
     pub restore_cursor: bool,
 
+    /// Current indent style.
+    pub indent_style: IndentStyle,
+
     syntax: Option<Syntax>,
     // /// Corresponding language scope name. Usually `source.<lang>`.
     pub(crate) language: Option<Arc<LanguageConfiguration>>,
@@ -149,6 +159,7 @@ impl Document {
             path: None,
             text,
             selections: HashMap::default(),
+            indent_style: IndentStyle::Spaces(4),
             mode: Mode::Normal,
             restore_cursor: false,
             syntax: None,
@@ -182,6 +193,7 @@ impl Document {
         let mut doc = Self::new(doc);
         // set the path and try detecting the language
         doc.set_path(&path)?;
+        doc.detect_indent_style();
 
         Ok(doc)
     }
@@ -265,6 +277,132 @@ impl Document {
         }
     }
 
+    fn detect_indent_style(&mut self) {
+        // Build a histogram of the indentation *increases* between
+        // subsequent lines, ignoring lines that are all whitespace.
+        //
+        // Index 0 is for tabs, the rest are 1-8 spaces.
+        let histogram: [usize; 9] = {
+            let mut histogram = [0; 9];
+            let mut prev_line_is_tabs = false;
+            let mut prev_line_leading_count = 0usize;
+
+            // Loop through the lines, checking for and recording indentation
+            // increases as we go.
+            'outer: for line in self.text.lines().take(1000) {
+                let mut c_iter = line.chars();
+
+                // Is first character a tab or space?
+                let is_tabs = match c_iter.next() {
+                    Some('\t') => true,
+                    Some(' ') => false,
+
+                    // Ignore blank lines.
+                    Some(c) if char_is_linebreak(c) => continue,
+
+                    _ => {
+                        prev_line_is_tabs = false;
+                        prev_line_leading_count = 0;
+                        continue;
+                    }
+                };
+
+                // Count the line's total leading tab/space characters.
+                let mut leading_count = 1;
+                let mut count_is_done = false;
+                for c in c_iter {
+                    match c {
+                        '\t' if is_tabs && !count_is_done => leading_count += 1,
+                        ' ' if !is_tabs && !count_is_done => leading_count += 1,
+
+                        // We stop counting if we hit whitespace that doesn't
+                        // qualify as indent or doesn't match the leading
+                        // whitespace, but we don't exit the loop yet because
+                        // we still want to determine if the line is blank.
+                        c if char_is_whitespace(c) => count_is_done = true,
+
+                        // Ignore blank lines.
+                        c if char_is_linebreak(c) => continue 'outer,
+
+                        _ => break,
+                    }
+
+                    // Bound the worst-case execution time for weird text files.
+                    if leading_count > 256 {
+                        continue 'outer;
+                    }
+                }
+
+                // If there was an increase in indentation over the previous
+                // line, update the histogram with that increase.
+                if (prev_line_is_tabs == is_tabs || prev_line_leading_count == 0)
+                    && prev_line_leading_count < leading_count
+                {
+                    if is_tabs {
+                        histogram[0] += 1;
+                    } else {
+                        let amount = leading_count - prev_line_leading_count;
+                        if amount <= 8 {
+                            histogram[amount] += 1;
+                        }
+                    }
+                }
+
+                // Store this line's leading whitespace info for use with
+                // the next line.
+                prev_line_is_tabs = is_tabs;
+                prev_line_leading_count = leading_count;
+            }
+
+            // Give more weight to tabs, because their presence is a very
+            // strong indicator.
+            histogram[0] *= 2;
+
+            histogram
+        };
+
+        // Find the most frequent indent, its frequency, and the frequency of
+        // the next-most frequent indent.
+        let indent = histogram
+            .iter()
+            .enumerate()
+            .max_by_key(|kv| kv.1)
+            .unwrap()
+            .0;
+        let indent_freq = histogram[indent];
+        let indent_freq_2 = *histogram
+            .iter()
+            .enumerate()
+            .filter(|kv| kv.0 != indent)
+            .map(|kv| kv.1)
+            .max()
+            .unwrap();
+
+        // Use the auto-detected result if we're confident enough in its
+        // accuracy, based on some heuristics.  Otherwise fall back to
+        // the language-based setting.
+        if indent_freq >= 1 && (indent_freq_2 as f64 / indent_freq as f64) < 0.66 {
+            // Use the auto-detected setting.
+            self.indent_style = match indent {
+                0 => IndentStyle::Tabs,
+                _ => IndentStyle::Spaces(indent as u8),
+            };
+        } else {
+            // Fall back to language-based setting.
+            let indent = self
+                .language
+                .as_ref()
+                .and_then(|config| config.indent.as_ref())
+                .map_or("  ", |config| config.unit.as_str()); // fallback to 2 spaces
+
+            self.indent_style = if indent.starts_with(' ') {
+                IndentStyle::Spaces(indent.len() as u8)
+            } else {
+                IndentStyle::Tabs
+            };
+        }
+    }
+
     pub fn set_path(&mut self, path: &Path) -> Result<(), std::io::Error> {
         let path = canonicalize_path(path)?;
 
@@ -507,13 +645,25 @@ impl Document {
     }
 
     /// Returns a string containing a single level of indentation.
-    pub fn indent_unit(&self) -> &str {
-        self.language
-            .as_ref()
-            .and_then(|config| config.indent.as_ref())
-            .map_or("  ", |config| config.unit.as_str()) // fallback to 2 spaces
-
-        // " ".repeat(TAB_WIDTH)
+    ///
+    /// TODO: we might not need this function anymore, since the information
+    /// is conveniently available in `Document::indent_style` now.
+    pub fn indent_unit(&self) -> &'static str {
+        match self.indent_style {
+            IndentStyle::Tabs => "\t",
+            IndentStyle::Spaces(1) => " ",
+            IndentStyle::Spaces(2) => "  ",
+            IndentStyle::Spaces(3) => "   ",
+            IndentStyle::Spaces(4) => "    ",
+            IndentStyle::Spaces(5) => "     ",
+            IndentStyle::Spaces(6) => "      ",
+            IndentStyle::Spaces(7) => "       ",
+            IndentStyle::Spaces(8) => "        ",
+
+            // Unsupported indentation style.  This should never happen,
+            // but just in case fall back to two spaces.
+            _ => "  ",
+        }
     }
 
     #[inline]