more highlighter tune-ups
This commit is contained in:
@@ -2,6 +2,12 @@ use std::{env, path::{Path, PathBuf}};
|
||||
|
||||
use crate::{libsh::term::{Style, StyleSet, Styled}, prompt::readline::{annotate_input, markers}, state::read_logic};
|
||||
|
||||
/// Syntax highlighter for shell input using Unicode marker-based annotation
|
||||
///
|
||||
/// The highlighter processes annotated input strings containing invisible Unicode markers
|
||||
/// (U+FDD0-U+FDEF range) that indicate syntax elements. It generates ANSI escape codes
|
||||
/// for terminal display while maintaining a style stack for proper color restoration
|
||||
/// in nested constructs (e.g., variables inside strings inside command substitutions).
|
||||
pub struct Highlighter {
|
||||
input: String,
|
||||
output: String,
|
||||
@@ -10,6 +16,7 @@ pub struct Highlighter {
|
||||
}
|
||||
|
||||
impl Highlighter {
|
||||
/// Creates a new highlighter with empty buffers and reset state
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
input: String::new(),
|
||||
@@ -19,11 +26,20 @@ impl Highlighter {
|
||||
}
|
||||
}
|
||||
|
||||
/// Loads raw input text and annotates it with syntax markers
|
||||
///
|
||||
/// The input is passed through the annotator which inserts Unicode markers
|
||||
/// indicating token types and sub-token constructs (strings, variables, etc.)
|
||||
pub fn load_input(&mut self, input: &str) {
|
||||
let input = annotate_input(input);
|
||||
self.input = input;
|
||||
}
|
||||
|
||||
/// Processes the annotated input and generates ANSI-styled output
|
||||
///
|
||||
/// Walks through the input character by character, interpreting markers and
|
||||
/// applying appropriate styles. Nested constructs (command substitutions,
|
||||
/// subshells, strings) are handled recursively with proper style restoration.
|
||||
pub fn highlight(&mut self) {
|
||||
let input = self.input.clone();
|
||||
let mut input_chars = input.chars().peekable();
|
||||
@@ -156,6 +172,10 @@ impl Highlighter {
|
||||
if *ch == markers::VAR_SUB_END {
|
||||
input_chars.next(); // consume the end marker
|
||||
break;
|
||||
} else if markers::is_marker(*ch) {
|
||||
log::warn!("Unhandled marker character in variable substitution: U+{:04X}", *ch as u32);
|
||||
input_chars.next(); // skip the marker
|
||||
continue;
|
||||
}
|
||||
var_sub.push(*ch);
|
||||
input_chars.next();
|
||||
@@ -166,13 +186,21 @@ impl Highlighter {
|
||||
self.pop_style();
|
||||
}
|
||||
_ => {
|
||||
self.output.push(ch);
|
||||
self.last_was_reset = false;
|
||||
if markers::is_marker(ch) {
|
||||
log::warn!("Unhandled marker character in highlighter: U+{:04X}", ch as u32);
|
||||
} else {
|
||||
self.output.push(ch);
|
||||
self.last_was_reset = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Extracts the highlighted output and resets the highlighter state
|
||||
///
|
||||
/// Clears the input buffer, style stack, and returns the generated output
|
||||
/// containing ANSI escape codes. The highlighter is ready for reuse after this.
|
||||
pub fn take(&mut self) -> String {
|
||||
log::info!("Highlighting result: {:?}", self.output);
|
||||
self.input.clear();
|
||||
@@ -180,6 +208,12 @@ impl Highlighter {
|
||||
std::mem::take(&mut self.output)
|
||||
}
|
||||
|
||||
/// Checks if a command name is valid (exists in PATH, is a function, or is an alias)
|
||||
///
|
||||
/// Searches:
|
||||
/// 1. Current directory if command is a path
|
||||
/// 2. All directories in PATH environment variable
|
||||
/// 3. Shell functions and aliases in the current shell state
|
||||
fn is_valid(command: &str) -> bool {
|
||||
let path = env::var("PATH").unwrap_or_default();
|
||||
let paths = path.split(':');
|
||||
@@ -202,6 +236,10 @@ impl Highlighter {
|
||||
false
|
||||
}
|
||||
|
||||
/// Emits a reset ANSI code to the output, with deduplication
|
||||
///
|
||||
/// Only emits the reset if the last emitted code was not already a reset,
|
||||
/// preventing redundant `\x1b[0m` sequences in the output.
|
||||
fn emit_reset(&mut self) {
|
||||
if !self.last_was_reset {
|
||||
self.output.push_str(&Style::Reset.to_string());
|
||||
@@ -209,17 +247,31 @@ impl Highlighter {
|
||||
}
|
||||
}
|
||||
|
||||
/// Emits a style ANSI code to the output
|
||||
///
|
||||
/// Unconditionally appends the ANSI escape sequence for the given style
|
||||
/// and marks that we're no longer in a reset state.
|
||||
fn emit_style(&mut self, style: &StyleSet) {
|
||||
self.output.push_str(&style.to_string());
|
||||
self.last_was_reset = false;
|
||||
}
|
||||
|
||||
/// Pushes a new style onto the stack and emits its ANSI code
|
||||
///
|
||||
/// Used when entering a new syntax context (string, variable, command, etc.).
|
||||
/// The style stack allows proper restoration when exiting nested constructs.
|
||||
pub fn push_style(&mut self, style: impl Into<StyleSet>) {
|
||||
let set: StyleSet = style.into();
|
||||
self.style_stack.push(set.clone());
|
||||
self.emit_style(&set);
|
||||
}
|
||||
|
||||
/// Pops a style from the stack and restores the previous style
|
||||
///
|
||||
/// Used when exiting a syntax context. If there's a parent style on the stack,
|
||||
/// it's re-emitted to restore the previous color. Otherwise, emits a reset.
|
||||
/// This ensures colors are properly restored in nested constructs like
|
||||
/// `"string with $VAR"` where the string color resumes after the variable.
|
||||
pub fn pop_style(&mut self) {
|
||||
self.style_stack.pop();
|
||||
if let Some(style) = self.style_stack.last().cloned() {
|
||||
@@ -229,11 +281,20 @@ impl Highlighter {
|
||||
}
|
||||
}
|
||||
|
||||
/// Clears all styles from the stack and emits a reset
|
||||
///
|
||||
/// Used at command separators and explicit reset markers to return to
|
||||
/// the default terminal color between independent commands.
|
||||
pub fn clear_styles(&mut self) {
|
||||
self.style_stack.clear();
|
||||
self.emit_reset();
|
||||
}
|
||||
|
||||
/// Simple marker-to-ANSI replacement (unused in favor of stack-based highlighting)
|
||||
///
|
||||
/// Performs direct string replacement of markers with ANSI codes, without
|
||||
/// handling nesting or proper color restoration. Kept for reference but not
|
||||
/// used in the current implementation.
|
||||
pub fn trivial_replace(&mut self) {
|
||||
self.input = self.input
|
||||
.replace([markers::RESET, markers::ARG], "\x1b[0m")
|
||||
|
||||
@@ -62,6 +62,30 @@ pub mod markers {
|
||||
SUBSH_END,
|
||||
RESET
|
||||
];
|
||||
pub const TOKEN_LEVEL: [char;10] = [
|
||||
SUBSH,
|
||||
COMMAND,
|
||||
BUILTIN,
|
||||
ARG,
|
||||
KEYWORD,
|
||||
OPERATOR,
|
||||
REDIRECT,
|
||||
CMD_SEP,
|
||||
CASE_PAT,
|
||||
ASSIGNMENT,
|
||||
];
|
||||
pub const SUB_TOKEN: [char;6] = [
|
||||
VAR_SUB,
|
||||
CMD_SUB,
|
||||
PROC_SUB,
|
||||
STRING_DQ,
|
||||
STRING_SQ,
|
||||
GLOB,
|
||||
];
|
||||
|
||||
pub fn is_marker(c: char) -> bool {
|
||||
TOKEN_LEVEL.contains(&c) || SUB_TOKEN.contains(&c) || END_MARKERS.contains(&c)
|
||||
}
|
||||
}
|
||||
|
||||
/// Non-blocking readline result
|
||||
@@ -219,7 +243,7 @@ impl FernVi {
|
||||
log::debug!("{line:?}");
|
||||
let to_cursor = self.editor.slice_to_cursor().unwrap_or_default();
|
||||
let (cols, _) = get_win_size(STDIN_FILENO);
|
||||
Layout::from_parts(/* tab_stop: */ 8, cols, &self.prompt, to_cursor, &line)
|
||||
Layout::from_parts(/* tab_stop: */ 8, cols, &self.prompt, to_cursor, line)
|
||||
}
|
||||
pub fn scroll_history(&mut self, cmd: ViCmd) {
|
||||
log::debug!("scrolling");
|
||||
@@ -305,12 +329,16 @@ impl FernVi {
|
||||
}
|
||||
|
||||
pub fn line_text(&mut self) -> String {
|
||||
let start = Instant::now();
|
||||
let line = self.editor.to_string();
|
||||
self.highlighter.load_input(&line);
|
||||
self.highlighter.highlight();
|
||||
let highlighted = self.highlighter.take();
|
||||
let hint = self.editor.get_hint_text();
|
||||
format!("{highlighted}{hint}")
|
||||
let complete = format!("{highlighted}{hint}");
|
||||
let end = start.elapsed();
|
||||
log::info!("Line styling done in: {:.2?}", end);
|
||||
complete
|
||||
}
|
||||
|
||||
pub fn print_line(&mut self) -> ShResult<()> {
|
||||
@@ -486,8 +514,25 @@ impl FernVi {
|
||||
}
|
||||
}
|
||||
|
||||
/// Annotate a given input with helpful markers that give quick contextual syntax information
|
||||
/// Useful for syntax highlighting and completion
|
||||
/// Annotates shell input with invisible Unicode markers for syntax highlighting
|
||||
///
|
||||
/// Takes raw shell input and inserts non-character markers (U+FDD0-U+FDEF range)
|
||||
/// around syntax elements. These markers indicate:
|
||||
/// - Token-level context (commands, arguments, operators, keywords)
|
||||
/// - Sub-token constructs (strings, variables, command substitutions, globs)
|
||||
///
|
||||
/// The annotated string is suitable for processing by the highlighter, which
|
||||
/// interprets the markers and generates ANSI escape codes.
|
||||
///
|
||||
/// # Strategy
|
||||
/// Tokens are processed in reverse order so that later insertions don't
|
||||
/// invalidate earlier positions. Each token is annotated independently.
|
||||
///
|
||||
/// # Example
|
||||
/// ```text
|
||||
/// "echo $USER" -> "COMMAND echo RESET ARG VAR_SUB $USER VAR_SUB_END RESET"
|
||||
/// ```
|
||||
/// (where COMMAND, RESET, etc. are invisible Unicode markers)
|
||||
pub fn annotate_input(input: &str) -> String {
|
||||
let mut annotated = input.to_string();
|
||||
let input = Arc::new(input.to_string());
|
||||
@@ -502,6 +547,16 @@ pub fn annotate_input(input: &str) -> String {
|
||||
annotated
|
||||
}
|
||||
|
||||
/// Maps token class to its corresponding marker character
|
||||
///
|
||||
/// Returns the appropriate Unicode marker for token-level syntax elements.
|
||||
/// Token-level markers are derived directly from the lexer's token classification
|
||||
/// and represent complete tokens (operators, separators, etc.).
|
||||
///
|
||||
/// Returns `None` for:
|
||||
/// - String tokens (which need sub-token scanning for variables, quotes, etc.)
|
||||
/// - Structural markers (SOI, EOI, Null)
|
||||
/// - Unimplemented features (comments, brace groups)
|
||||
pub fn marker_for(class: &TkRule) -> Option<char> {
|
||||
match class {
|
||||
TkRule::Pipe |
|
||||
@@ -523,7 +578,48 @@ pub fn marker_for(class: &TkRule) -> Option<char> {
|
||||
}
|
||||
}
|
||||
|
||||
/// Annotates a single token with markers for both token-level and sub-token constructs
|
||||
///
|
||||
/// This is the core annotation function that handles the complexity of shell syntax.
|
||||
/// It uses a two-phase approach:
|
||||
///
|
||||
/// # Phase 1: Analysis (Delayed Insertion)
|
||||
/// Scans through the token character by character, recording marker insertions
|
||||
/// as `(position, marker)` pairs in a list. This avoids borrowing issues and
|
||||
/// allows context queries during the scan.
|
||||
///
|
||||
/// The analysis phase handles:
|
||||
/// - **Strings**: Single/double quoted regions (with escaping rules)
|
||||
/// - **Variables**: `$VAR` and `${VAR}` expansions
|
||||
/// - **Command substitutions**: `$(...)` with depth tracking
|
||||
/// - **Process substitutions**: `<(...)` and `>(...)`
|
||||
/// - **Globs**: `*`, `?`, `[...]` patterns (context-aware)
|
||||
/// - **Escapes**: Backslash escaping
|
||||
///
|
||||
/// # Phase 2: Application (Sorted Insertion)
|
||||
/// Markers are sorted by position (descending) to avoid index invalidation when
|
||||
/// inserting into the string. At the same position, markers are ordered:
|
||||
/// 1. RESET (rightmost)
|
||||
/// 2. Regular markers (middle)
|
||||
/// 3. END markers (leftmost)
|
||||
///
|
||||
/// This produces the pattern: `[END][TOGGLE][RESET]` at boundaries.
|
||||
///
|
||||
/// # Context Tracking
|
||||
/// The `in_context` closure queries the insertion list to determine the active
|
||||
/// syntax context at the current position. This enables context-aware decisions
|
||||
/// like "only highlight globs in arguments, not in command names".
|
||||
///
|
||||
/// # Depth Tracking
|
||||
/// Nested constructs like `$(echo $(date))` are tracked with depth counters.
|
||||
/// Only the outermost construct is marked; inner content is handled recursively
|
||||
/// by the highlighter.
|
||||
pub fn annotate_token(input: &mut String, token: Tk) {
|
||||
// Sort by position descending, with priority ordering at same position:
|
||||
// - RESET first (inserted first, ends up rightmost)
|
||||
// - Regular markers middle
|
||||
// - END markers last (inserted last, ends up leftmost)
|
||||
// Result: [END][TOGGLE][RESET]
|
||||
let sort_insertions = |insertions: &mut Vec<(usize, char)>| {
|
||||
insertions.sort_by(|a, b| {
|
||||
match b.0.cmp(&a.0) {
|
||||
@@ -531,12 +627,18 @@ pub fn annotate_token(input: &mut String, token: Tk) {
|
||||
let priority = |m: char| -> u8 {
|
||||
match m {
|
||||
markers::RESET => 0,
|
||||
markers::VAR_SUB |
|
||||
markers::VAR_SUB_END |
|
||||
markers::CMD_SUB |
|
||||
markers::CMD_SUB_END |
|
||||
markers::PROC_SUB |
|
||||
markers::PROC_SUB_END |
|
||||
markers::STRING_DQ |
|
||||
markers::STRING_DQ_END |
|
||||
markers::STRING_SQ |
|
||||
markers::STRING_SQ_END |
|
||||
markers::SUBSH_END => 2,
|
||||
markers::ARG => 3,
|
||||
_ => 1,
|
||||
}
|
||||
};
|
||||
@@ -555,12 +657,18 @@ pub fn annotate_token(input: &mut String, token: Tk) {
|
||||
let priority = |m: char| -> u8 {
|
||||
match m {
|
||||
markers::RESET => 0,
|
||||
markers::VAR_SUB |
|
||||
markers::VAR_SUB_END |
|
||||
markers::CMD_SUB |
|
||||
markers::CMD_SUB_END |
|
||||
markers::PROC_SUB |
|
||||
markers::PROC_SUB_END |
|
||||
markers::STRING_DQ |
|
||||
markers::STRING_DQ_END |
|
||||
markers::STRING_SQ |
|
||||
markers::STRING_SQ_END |
|
||||
markers::SUBSH_END => 2,
|
||||
markers::ARG => 3, // Lowest priority - processed first, overridden by sub-tokens
|
||||
_ => 1,
|
||||
}
|
||||
};
|
||||
@@ -571,12 +679,9 @@ pub fn annotate_token(input: &mut String, token: Tk) {
|
||||
});
|
||||
stack.retain(|(i, m)| *i <= token.span.start && !markers::END_MARKERS.contains(m));
|
||||
|
||||
log::error!("Checking context for token '{}', looking for '{}'", token.span.as_str(), c);
|
||||
let Some(ctx) = stack.last() else {
|
||||
return false;
|
||||
};
|
||||
log::error!("Context stack for token '{}': {:?}", token.span.as_str(), stack);
|
||||
log::error!("Found context marker '{}' at position {}", ctx.1, ctx.0);
|
||||
|
||||
ctx.1 == c
|
||||
};
|
||||
@@ -788,14 +893,10 @@ pub fn annotate_token(input: &mut String, token: Tk) {
|
||||
}
|
||||
}
|
||||
|
||||
// Sort by position descending, with priority ordering at same position:
|
||||
// - RESET first (inserted first, ends up rightmost)
|
||||
// - Regular markers middle
|
||||
// - END markers last (inserted last, ends up leftmost)
|
||||
// Result: [END][TOGGLE][RESET]
|
||||
sort_insertions(&mut insertions);
|
||||
|
||||
for (pos, marker) in insertions {
|
||||
log::info!("Inserting marker {marker:?} at position {pos}");
|
||||
let pos = pos.max(0).min(input.len());
|
||||
input.insert(pos, marker);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user