From 01684cf8e5a47c0ba2a067055785878c4413e90a Mon Sep 17 00:00:00 2001 From: pagedmov Date: Wed, 18 Feb 2026 16:29:40 -0500 Subject: [PATCH] more highlighter tune-ups --- src/prompt/readline/highlight.rs | 65 +++++++++++++++- src/prompt/readline/mod.rs | 125 ++++++++++++++++++++++++++++--- 2 files changed, 176 insertions(+), 14 deletions(-) diff --git a/src/prompt/readline/highlight.rs b/src/prompt/readline/highlight.rs index efce4b9..7d57835 100644 --- a/src/prompt/readline/highlight.rs +++ b/src/prompt/readline/highlight.rs @@ -2,6 +2,12 @@ use std::{env, path::{Path, PathBuf}}; use crate::{libsh::term::{Style, StyleSet, Styled}, prompt::readline::{annotate_input, markers}, state::read_logic}; +/// Syntax highlighter for shell input using Unicode marker-based annotation +/// +/// The highlighter processes annotated input strings containing invisible Unicode markers +/// (U+FDD0-U+FDEF range) that indicate syntax elements. It generates ANSI escape codes +/// for terminal display while maintaining a style stack for proper color restoration +/// in nested constructs (e.g., variables inside strings inside command substitutions). pub struct Highlighter { input: String, output: String, @@ -10,6 +16,7 @@ pub struct Highlighter { } impl Highlighter { + /// Creates a new highlighter with empty buffers and reset state pub fn new() -> Self { Self { input: String::new(), @@ -19,11 +26,20 @@ impl Highlighter { } } + /// Loads raw input text and annotates it with syntax markers + /// + /// The input is passed through the annotator which inserts Unicode markers + /// indicating token types and sub-token constructs (strings, variables, etc.) pub fn load_input(&mut self, input: &str) { let input = annotate_input(input); self.input = input; } + /// Processes the annotated input and generates ANSI-styled output + /// + /// Walks through the input character by character, interpreting markers and + /// applying appropriate styles. Nested constructs (command substitutions, + /// subshells, strings) are handled recursively with proper style restoration. pub fn highlight(&mut self) { let input = self.input.clone(); let mut input_chars = input.chars().peekable(); @@ -156,6 +172,10 @@ impl Highlighter { if *ch == markers::VAR_SUB_END { input_chars.next(); // consume the end marker break; + } else if markers::is_marker(*ch) { + log::warn!("Unhandled marker character in variable substitution: U+{:04X}", *ch as u32); + input_chars.next(); // skip the marker + continue; } var_sub.push(*ch); input_chars.next(); @@ -166,13 +186,21 @@ impl Highlighter { self.pop_style(); } _ => { - self.output.push(ch); - self.last_was_reset = false; + if markers::is_marker(ch) { + log::warn!("Unhandled marker character in highlighter: U+{:04X}", ch as u32); + } else { + self.output.push(ch); + self.last_was_reset = false; + } } } } } + /// Extracts the highlighted output and resets the highlighter state + /// + /// Clears the input buffer, style stack, and returns the generated output + /// containing ANSI escape codes. The highlighter is ready for reuse after this. pub fn take(&mut self) -> String { log::info!("Highlighting result: {:?}", self.output); self.input.clear(); @@ -180,6 +208,12 @@ impl Highlighter { std::mem::take(&mut self.output) } + /// Checks if a command name is valid (exists in PATH, is a function, or is an alias) + /// + /// Searches: + /// 1. Current directory if command is a path + /// 2. All directories in PATH environment variable + /// 3. Shell functions and aliases in the current shell state fn is_valid(command: &str) -> bool { let path = env::var("PATH").unwrap_or_default(); let paths = path.split(':'); @@ -202,6 +236,10 @@ impl Highlighter { false } + /// Emits a reset ANSI code to the output, with deduplication + /// + /// Only emits the reset if the last emitted code was not already a reset, + /// preventing redundant `\x1b[0m` sequences in the output. fn emit_reset(&mut self) { if !self.last_was_reset { self.output.push_str(&Style::Reset.to_string()); @@ -209,17 +247,31 @@ impl Highlighter { } } + /// Emits a style ANSI code to the output + /// + /// Unconditionally appends the ANSI escape sequence for the given style + /// and marks that we're no longer in a reset state. fn emit_style(&mut self, style: &StyleSet) { self.output.push_str(&style.to_string()); self.last_was_reset = false; } + /// Pushes a new style onto the stack and emits its ANSI code + /// + /// Used when entering a new syntax context (string, variable, command, etc.). + /// The style stack allows proper restoration when exiting nested constructs. pub fn push_style(&mut self, style: impl Into) { let set: StyleSet = style.into(); self.style_stack.push(set.clone()); self.emit_style(&set); } + /// Pops a style from the stack and restores the previous style + /// + /// Used when exiting a syntax context. If there's a parent style on the stack, + /// it's re-emitted to restore the previous color. Otherwise, emits a reset. + /// This ensures colors are properly restored in nested constructs like + /// `"string with $VAR"` where the string color resumes after the variable. pub fn pop_style(&mut self) { self.style_stack.pop(); if let Some(style) = self.style_stack.last().cloned() { @@ -229,11 +281,20 @@ impl Highlighter { } } + /// Clears all styles from the stack and emits a reset + /// + /// Used at command separators and explicit reset markers to return to + /// the default terminal color between independent commands. pub fn clear_styles(&mut self) { self.style_stack.clear(); self.emit_reset(); } + /// Simple marker-to-ANSI replacement (unused in favor of stack-based highlighting) + /// + /// Performs direct string replacement of markers with ANSI codes, without + /// handling nesting or proper color restoration. Kept for reference but not + /// used in the current implementation. pub fn trivial_replace(&mut self) { self.input = self.input .replace([markers::RESET, markers::ARG], "\x1b[0m") diff --git a/src/prompt/readline/mod.rs b/src/prompt/readline/mod.rs index 91d4aa5..adb550c 100644 --- a/src/prompt/readline/mod.rs +++ b/src/prompt/readline/mod.rs @@ -62,6 +62,30 @@ pub mod markers { SUBSH_END, RESET ]; + pub const TOKEN_LEVEL: [char;10] = [ + SUBSH, + COMMAND, + BUILTIN, + ARG, + KEYWORD, + OPERATOR, + REDIRECT, + CMD_SEP, + CASE_PAT, + ASSIGNMENT, + ]; + pub const SUB_TOKEN: [char;6] = [ + VAR_SUB, + CMD_SUB, + PROC_SUB, + STRING_DQ, + STRING_SQ, + GLOB, + ]; + + pub fn is_marker(c: char) -> bool { + TOKEN_LEVEL.contains(&c) || SUB_TOKEN.contains(&c) || END_MARKERS.contains(&c) + } } /// Non-blocking readline result @@ -219,7 +243,7 @@ impl FernVi { log::debug!("{line:?}"); let to_cursor = self.editor.slice_to_cursor().unwrap_or_default(); let (cols, _) = get_win_size(STDIN_FILENO); - Layout::from_parts(/* tab_stop: */ 8, cols, &self.prompt, to_cursor, &line) + Layout::from_parts(/* tab_stop: */ 8, cols, &self.prompt, to_cursor, line) } pub fn scroll_history(&mut self, cmd: ViCmd) { log::debug!("scrolling"); @@ -305,12 +329,16 @@ impl FernVi { } pub fn line_text(&mut self) -> String { + let start = Instant::now(); let line = self.editor.to_string(); self.highlighter.load_input(&line); self.highlighter.highlight(); let highlighted = self.highlighter.take(); let hint = self.editor.get_hint_text(); - format!("{highlighted}{hint}") + let complete = format!("{highlighted}{hint}"); + let end = start.elapsed(); + log::info!("Line styling done in: {:.2?}", end); + complete } pub fn print_line(&mut self) -> ShResult<()> { @@ -486,8 +514,25 @@ impl FernVi { } } -/// Annotate a given input with helpful markers that give quick contextual syntax information -/// Useful for syntax highlighting and completion +/// Annotates shell input with invisible Unicode markers for syntax highlighting +/// +/// Takes raw shell input and inserts non-character markers (U+FDD0-U+FDEF range) +/// around syntax elements. These markers indicate: +/// - Token-level context (commands, arguments, operators, keywords) +/// - Sub-token constructs (strings, variables, command substitutions, globs) +/// +/// The annotated string is suitable for processing by the highlighter, which +/// interprets the markers and generates ANSI escape codes. +/// +/// # Strategy +/// Tokens are processed in reverse order so that later insertions don't +/// invalidate earlier positions. Each token is annotated independently. +/// +/// # Example +/// ```text +/// "echo $USER" -> "COMMAND echo RESET ARG VAR_SUB $USER VAR_SUB_END RESET" +/// ``` +/// (where COMMAND, RESET, etc. are invisible Unicode markers) pub fn annotate_input(input: &str) -> String { let mut annotated = input.to_string(); let input = Arc::new(input.to_string()); @@ -502,6 +547,16 @@ pub fn annotate_input(input: &str) -> String { annotated } +/// Maps token class to its corresponding marker character +/// +/// Returns the appropriate Unicode marker for token-level syntax elements. +/// Token-level markers are derived directly from the lexer's token classification +/// and represent complete tokens (operators, separators, etc.). +/// +/// Returns `None` for: +/// - String tokens (which need sub-token scanning for variables, quotes, etc.) +/// - Structural markers (SOI, EOI, Null) +/// - Unimplemented features (comments, brace groups) pub fn marker_for(class: &TkRule) -> Option { match class { TkRule::Pipe | @@ -523,7 +578,48 @@ pub fn marker_for(class: &TkRule) -> Option { } } +/// Annotates a single token with markers for both token-level and sub-token constructs +/// +/// This is the core annotation function that handles the complexity of shell syntax. +/// It uses a two-phase approach: +/// +/// # Phase 1: Analysis (Delayed Insertion) +/// Scans through the token character by character, recording marker insertions +/// as `(position, marker)` pairs in a list. This avoids borrowing issues and +/// allows context queries during the scan. +/// +/// The analysis phase handles: +/// - **Strings**: Single/double quoted regions (with escaping rules) +/// - **Variables**: `$VAR` and `${VAR}` expansions +/// - **Command substitutions**: `$(...)` with depth tracking +/// - **Process substitutions**: `<(...)` and `>(...)` +/// - **Globs**: `*`, `?`, `[...]` patterns (context-aware) +/// - **Escapes**: Backslash escaping +/// +/// # Phase 2: Application (Sorted Insertion) +/// Markers are sorted by position (descending) to avoid index invalidation when +/// inserting into the string. At the same position, markers are ordered: +/// 1. RESET (rightmost) +/// 2. Regular markers (middle) +/// 3. END markers (leftmost) +/// +/// This produces the pattern: `[END][TOGGLE][RESET]` at boundaries. +/// +/// # Context Tracking +/// The `in_context` closure queries the insertion list to determine the active +/// syntax context at the current position. This enables context-aware decisions +/// like "only highlight globs in arguments, not in command names". +/// +/// # Depth Tracking +/// Nested constructs like `$(echo $(date))` are tracked with depth counters. +/// Only the outermost construct is marked; inner content is handled recursively +/// by the highlighter. pub fn annotate_token(input: &mut String, token: Tk) { + // Sort by position descending, with priority ordering at same position: + // - RESET first (inserted first, ends up rightmost) + // - Regular markers middle + // - END markers last (inserted last, ends up leftmost) + // Result: [END][TOGGLE][RESET] let sort_insertions = |insertions: &mut Vec<(usize, char)>| { insertions.sort_by(|a, b| { match b.0.cmp(&a.0) { @@ -531,12 +627,18 @@ pub fn annotate_token(input: &mut String, token: Tk) { let priority = |m: char| -> u8 { match m { markers::RESET => 0, + markers::VAR_SUB | markers::VAR_SUB_END | + markers::CMD_SUB | markers::CMD_SUB_END | + markers::PROC_SUB | markers::PROC_SUB_END | + markers::STRING_DQ | markers::STRING_DQ_END | + markers::STRING_SQ | markers::STRING_SQ_END | markers::SUBSH_END => 2, + markers::ARG => 3, _ => 1, } }; @@ -555,12 +657,18 @@ pub fn annotate_token(input: &mut String, token: Tk) { let priority = |m: char| -> u8 { match m { markers::RESET => 0, + markers::VAR_SUB | markers::VAR_SUB_END | + markers::CMD_SUB | markers::CMD_SUB_END | + markers::PROC_SUB | markers::PROC_SUB_END | + markers::STRING_DQ | markers::STRING_DQ_END | + markers::STRING_SQ | markers::STRING_SQ_END | markers::SUBSH_END => 2, + markers::ARG => 3, // Lowest priority - processed first, overridden by sub-tokens _ => 1, } }; @@ -571,12 +679,9 @@ pub fn annotate_token(input: &mut String, token: Tk) { }); stack.retain(|(i, m)| *i <= token.span.start && !markers::END_MARKERS.contains(m)); - log::error!("Checking context for token '{}', looking for '{}'", token.span.as_str(), c); let Some(ctx) = stack.last() else { return false; }; - log::error!("Context stack for token '{}': {:?}", token.span.as_str(), stack); - log::error!("Found context marker '{}' at position {}", ctx.1, ctx.0); ctx.1 == c }; @@ -788,14 +893,10 @@ pub fn annotate_token(input: &mut String, token: Tk) { } } - // Sort by position descending, with priority ordering at same position: - // - RESET first (inserted first, ends up rightmost) - // - Regular markers middle - // - END markers last (inserted last, ends up leftmost) - // Result: [END][TOGGLE][RESET] sort_insertions(&mut insertions); for (pos, marker) in insertions { + log::info!("Inserting marker {marker:?} at position {pos}"); let pos = pos.max(0).min(input.len()); input.insert(pos, marker); }