more highlighter tune-ups
This commit is contained in:
@@ -2,6 +2,12 @@ use std::{env, path::{Path, PathBuf}};
|
|||||||
|
|
||||||
use crate::{libsh::term::{Style, StyleSet, Styled}, prompt::readline::{annotate_input, markers}, state::read_logic};
|
use crate::{libsh::term::{Style, StyleSet, Styled}, prompt::readline::{annotate_input, markers}, state::read_logic};
|
||||||
|
|
||||||
|
/// Syntax highlighter for shell input using Unicode marker-based annotation
|
||||||
|
///
|
||||||
|
/// The highlighter processes annotated input strings containing invisible Unicode markers
|
||||||
|
/// (U+FDD0-U+FDEF range) that indicate syntax elements. It generates ANSI escape codes
|
||||||
|
/// for terminal display while maintaining a style stack for proper color restoration
|
||||||
|
/// in nested constructs (e.g., variables inside strings inside command substitutions).
|
||||||
pub struct Highlighter {
|
pub struct Highlighter {
|
||||||
input: String,
|
input: String,
|
||||||
output: String,
|
output: String,
|
||||||
@@ -10,6 +16,7 @@ pub struct Highlighter {
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl Highlighter {
|
impl Highlighter {
|
||||||
|
/// Creates a new highlighter with empty buffers and reset state
|
||||||
pub fn new() -> Self {
|
pub fn new() -> Self {
|
||||||
Self {
|
Self {
|
||||||
input: String::new(),
|
input: String::new(),
|
||||||
@@ -19,11 +26,20 @@ impl Highlighter {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Loads raw input text and annotates it with syntax markers
|
||||||
|
///
|
||||||
|
/// The input is passed through the annotator which inserts Unicode markers
|
||||||
|
/// indicating token types and sub-token constructs (strings, variables, etc.)
|
||||||
pub fn load_input(&mut self, input: &str) {
|
pub fn load_input(&mut self, input: &str) {
|
||||||
let input = annotate_input(input);
|
let input = annotate_input(input);
|
||||||
self.input = input;
|
self.input = input;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Processes the annotated input and generates ANSI-styled output
|
||||||
|
///
|
||||||
|
/// Walks through the input character by character, interpreting markers and
|
||||||
|
/// applying appropriate styles. Nested constructs (command substitutions,
|
||||||
|
/// subshells, strings) are handled recursively with proper style restoration.
|
||||||
pub fn highlight(&mut self) {
|
pub fn highlight(&mut self) {
|
||||||
let input = self.input.clone();
|
let input = self.input.clone();
|
||||||
let mut input_chars = input.chars().peekable();
|
let mut input_chars = input.chars().peekable();
|
||||||
@@ -156,6 +172,10 @@ impl Highlighter {
|
|||||||
if *ch == markers::VAR_SUB_END {
|
if *ch == markers::VAR_SUB_END {
|
||||||
input_chars.next(); // consume the end marker
|
input_chars.next(); // consume the end marker
|
||||||
break;
|
break;
|
||||||
|
} else if markers::is_marker(*ch) {
|
||||||
|
log::warn!("Unhandled marker character in variable substitution: U+{:04X}", *ch as u32);
|
||||||
|
input_chars.next(); // skip the marker
|
||||||
|
continue;
|
||||||
}
|
}
|
||||||
var_sub.push(*ch);
|
var_sub.push(*ch);
|
||||||
input_chars.next();
|
input_chars.next();
|
||||||
@@ -166,13 +186,21 @@ impl Highlighter {
|
|||||||
self.pop_style();
|
self.pop_style();
|
||||||
}
|
}
|
||||||
_ => {
|
_ => {
|
||||||
self.output.push(ch);
|
if markers::is_marker(ch) {
|
||||||
self.last_was_reset = false;
|
log::warn!("Unhandled marker character in highlighter: U+{:04X}", ch as u32);
|
||||||
|
} else {
|
||||||
|
self.output.push(ch);
|
||||||
|
self.last_was_reset = false;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Extracts the highlighted output and resets the highlighter state
|
||||||
|
///
|
||||||
|
/// Clears the input buffer, style stack, and returns the generated output
|
||||||
|
/// containing ANSI escape codes. The highlighter is ready for reuse after this.
|
||||||
pub fn take(&mut self) -> String {
|
pub fn take(&mut self) -> String {
|
||||||
log::info!("Highlighting result: {:?}", self.output);
|
log::info!("Highlighting result: {:?}", self.output);
|
||||||
self.input.clear();
|
self.input.clear();
|
||||||
@@ -180,6 +208,12 @@ impl Highlighter {
|
|||||||
std::mem::take(&mut self.output)
|
std::mem::take(&mut self.output)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Checks if a command name is valid (exists in PATH, is a function, or is an alias)
|
||||||
|
///
|
||||||
|
/// Searches:
|
||||||
|
/// 1. Current directory if command is a path
|
||||||
|
/// 2. All directories in PATH environment variable
|
||||||
|
/// 3. Shell functions and aliases in the current shell state
|
||||||
fn is_valid(command: &str) -> bool {
|
fn is_valid(command: &str) -> bool {
|
||||||
let path = env::var("PATH").unwrap_or_default();
|
let path = env::var("PATH").unwrap_or_default();
|
||||||
let paths = path.split(':');
|
let paths = path.split(':');
|
||||||
@@ -202,6 +236,10 @@ impl Highlighter {
|
|||||||
false
|
false
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Emits a reset ANSI code to the output, with deduplication
|
||||||
|
///
|
||||||
|
/// Only emits the reset if the last emitted code was not already a reset,
|
||||||
|
/// preventing redundant `\x1b[0m` sequences in the output.
|
||||||
fn emit_reset(&mut self) {
|
fn emit_reset(&mut self) {
|
||||||
if !self.last_was_reset {
|
if !self.last_was_reset {
|
||||||
self.output.push_str(&Style::Reset.to_string());
|
self.output.push_str(&Style::Reset.to_string());
|
||||||
@@ -209,17 +247,31 @@ impl Highlighter {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Emits a style ANSI code to the output
|
||||||
|
///
|
||||||
|
/// Unconditionally appends the ANSI escape sequence for the given style
|
||||||
|
/// and marks that we're no longer in a reset state.
|
||||||
fn emit_style(&mut self, style: &StyleSet) {
|
fn emit_style(&mut self, style: &StyleSet) {
|
||||||
self.output.push_str(&style.to_string());
|
self.output.push_str(&style.to_string());
|
||||||
self.last_was_reset = false;
|
self.last_was_reset = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Pushes a new style onto the stack and emits its ANSI code
|
||||||
|
///
|
||||||
|
/// Used when entering a new syntax context (string, variable, command, etc.).
|
||||||
|
/// The style stack allows proper restoration when exiting nested constructs.
|
||||||
pub fn push_style(&mut self, style: impl Into<StyleSet>) {
|
pub fn push_style(&mut self, style: impl Into<StyleSet>) {
|
||||||
let set: StyleSet = style.into();
|
let set: StyleSet = style.into();
|
||||||
self.style_stack.push(set.clone());
|
self.style_stack.push(set.clone());
|
||||||
self.emit_style(&set);
|
self.emit_style(&set);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Pops a style from the stack and restores the previous style
|
||||||
|
///
|
||||||
|
/// Used when exiting a syntax context. If there's a parent style on the stack,
|
||||||
|
/// it's re-emitted to restore the previous color. Otherwise, emits a reset.
|
||||||
|
/// This ensures colors are properly restored in nested constructs like
|
||||||
|
/// `"string with $VAR"` where the string color resumes after the variable.
|
||||||
pub fn pop_style(&mut self) {
|
pub fn pop_style(&mut self) {
|
||||||
self.style_stack.pop();
|
self.style_stack.pop();
|
||||||
if let Some(style) = self.style_stack.last().cloned() {
|
if let Some(style) = self.style_stack.last().cloned() {
|
||||||
@@ -229,11 +281,20 @@ impl Highlighter {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Clears all styles from the stack and emits a reset
|
||||||
|
///
|
||||||
|
/// Used at command separators and explicit reset markers to return to
|
||||||
|
/// the default terminal color between independent commands.
|
||||||
pub fn clear_styles(&mut self) {
|
pub fn clear_styles(&mut self) {
|
||||||
self.style_stack.clear();
|
self.style_stack.clear();
|
||||||
self.emit_reset();
|
self.emit_reset();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Simple marker-to-ANSI replacement (unused in favor of stack-based highlighting)
|
||||||
|
///
|
||||||
|
/// Performs direct string replacement of markers with ANSI codes, without
|
||||||
|
/// handling nesting or proper color restoration. Kept for reference but not
|
||||||
|
/// used in the current implementation.
|
||||||
pub fn trivial_replace(&mut self) {
|
pub fn trivial_replace(&mut self) {
|
||||||
self.input = self.input
|
self.input = self.input
|
||||||
.replace([markers::RESET, markers::ARG], "\x1b[0m")
|
.replace([markers::RESET, markers::ARG], "\x1b[0m")
|
||||||
|
|||||||
@@ -62,6 +62,30 @@ pub mod markers {
|
|||||||
SUBSH_END,
|
SUBSH_END,
|
||||||
RESET
|
RESET
|
||||||
];
|
];
|
||||||
|
pub const TOKEN_LEVEL: [char;10] = [
|
||||||
|
SUBSH,
|
||||||
|
COMMAND,
|
||||||
|
BUILTIN,
|
||||||
|
ARG,
|
||||||
|
KEYWORD,
|
||||||
|
OPERATOR,
|
||||||
|
REDIRECT,
|
||||||
|
CMD_SEP,
|
||||||
|
CASE_PAT,
|
||||||
|
ASSIGNMENT,
|
||||||
|
];
|
||||||
|
pub const SUB_TOKEN: [char;6] = [
|
||||||
|
VAR_SUB,
|
||||||
|
CMD_SUB,
|
||||||
|
PROC_SUB,
|
||||||
|
STRING_DQ,
|
||||||
|
STRING_SQ,
|
||||||
|
GLOB,
|
||||||
|
];
|
||||||
|
|
||||||
|
pub fn is_marker(c: char) -> bool {
|
||||||
|
TOKEN_LEVEL.contains(&c) || SUB_TOKEN.contains(&c) || END_MARKERS.contains(&c)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Non-blocking readline result
|
/// Non-blocking readline result
|
||||||
@@ -219,7 +243,7 @@ impl FernVi {
|
|||||||
log::debug!("{line:?}");
|
log::debug!("{line:?}");
|
||||||
let to_cursor = self.editor.slice_to_cursor().unwrap_or_default();
|
let to_cursor = self.editor.slice_to_cursor().unwrap_or_default();
|
||||||
let (cols, _) = get_win_size(STDIN_FILENO);
|
let (cols, _) = get_win_size(STDIN_FILENO);
|
||||||
Layout::from_parts(/* tab_stop: */ 8, cols, &self.prompt, to_cursor, &line)
|
Layout::from_parts(/* tab_stop: */ 8, cols, &self.prompt, to_cursor, line)
|
||||||
}
|
}
|
||||||
pub fn scroll_history(&mut self, cmd: ViCmd) {
|
pub fn scroll_history(&mut self, cmd: ViCmd) {
|
||||||
log::debug!("scrolling");
|
log::debug!("scrolling");
|
||||||
@@ -305,12 +329,16 @@ impl FernVi {
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub fn line_text(&mut self) -> String {
|
pub fn line_text(&mut self) -> String {
|
||||||
|
let start = Instant::now();
|
||||||
let line = self.editor.to_string();
|
let line = self.editor.to_string();
|
||||||
self.highlighter.load_input(&line);
|
self.highlighter.load_input(&line);
|
||||||
self.highlighter.highlight();
|
self.highlighter.highlight();
|
||||||
let highlighted = self.highlighter.take();
|
let highlighted = self.highlighter.take();
|
||||||
let hint = self.editor.get_hint_text();
|
let hint = self.editor.get_hint_text();
|
||||||
format!("{highlighted}{hint}")
|
let complete = format!("{highlighted}{hint}");
|
||||||
|
let end = start.elapsed();
|
||||||
|
log::info!("Line styling done in: {:.2?}", end);
|
||||||
|
complete
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn print_line(&mut self) -> ShResult<()> {
|
pub fn print_line(&mut self) -> ShResult<()> {
|
||||||
@@ -486,8 +514,25 @@ impl FernVi {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Annotate a given input with helpful markers that give quick contextual syntax information
|
/// Annotates shell input with invisible Unicode markers for syntax highlighting
|
||||||
/// Useful for syntax highlighting and completion
|
///
|
||||||
|
/// Takes raw shell input and inserts non-character markers (U+FDD0-U+FDEF range)
|
||||||
|
/// around syntax elements. These markers indicate:
|
||||||
|
/// - Token-level context (commands, arguments, operators, keywords)
|
||||||
|
/// - Sub-token constructs (strings, variables, command substitutions, globs)
|
||||||
|
///
|
||||||
|
/// The annotated string is suitable for processing by the highlighter, which
|
||||||
|
/// interprets the markers and generates ANSI escape codes.
|
||||||
|
///
|
||||||
|
/// # Strategy
|
||||||
|
/// Tokens are processed in reverse order so that later insertions don't
|
||||||
|
/// invalidate earlier positions. Each token is annotated independently.
|
||||||
|
///
|
||||||
|
/// # Example
|
||||||
|
/// ```text
|
||||||
|
/// "echo $USER" -> "COMMAND echo RESET ARG VAR_SUB $USER VAR_SUB_END RESET"
|
||||||
|
/// ```
|
||||||
|
/// (where COMMAND, RESET, etc. are invisible Unicode markers)
|
||||||
pub fn annotate_input(input: &str) -> String {
|
pub fn annotate_input(input: &str) -> String {
|
||||||
let mut annotated = input.to_string();
|
let mut annotated = input.to_string();
|
||||||
let input = Arc::new(input.to_string());
|
let input = Arc::new(input.to_string());
|
||||||
@@ -502,6 +547,16 @@ pub fn annotate_input(input: &str) -> String {
|
|||||||
annotated
|
annotated
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Maps token class to its corresponding marker character
|
||||||
|
///
|
||||||
|
/// Returns the appropriate Unicode marker for token-level syntax elements.
|
||||||
|
/// Token-level markers are derived directly from the lexer's token classification
|
||||||
|
/// and represent complete tokens (operators, separators, etc.).
|
||||||
|
///
|
||||||
|
/// Returns `None` for:
|
||||||
|
/// - String tokens (which need sub-token scanning for variables, quotes, etc.)
|
||||||
|
/// - Structural markers (SOI, EOI, Null)
|
||||||
|
/// - Unimplemented features (comments, brace groups)
|
||||||
pub fn marker_for(class: &TkRule) -> Option<char> {
|
pub fn marker_for(class: &TkRule) -> Option<char> {
|
||||||
match class {
|
match class {
|
||||||
TkRule::Pipe |
|
TkRule::Pipe |
|
||||||
@@ -523,7 +578,48 @@ pub fn marker_for(class: &TkRule) -> Option<char> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Annotates a single token with markers for both token-level and sub-token constructs
|
||||||
|
///
|
||||||
|
/// This is the core annotation function that handles the complexity of shell syntax.
|
||||||
|
/// It uses a two-phase approach:
|
||||||
|
///
|
||||||
|
/// # Phase 1: Analysis (Delayed Insertion)
|
||||||
|
/// Scans through the token character by character, recording marker insertions
|
||||||
|
/// as `(position, marker)` pairs in a list. This avoids borrowing issues and
|
||||||
|
/// allows context queries during the scan.
|
||||||
|
///
|
||||||
|
/// The analysis phase handles:
|
||||||
|
/// - **Strings**: Single/double quoted regions (with escaping rules)
|
||||||
|
/// - **Variables**: `$VAR` and `${VAR}` expansions
|
||||||
|
/// - **Command substitutions**: `$(...)` with depth tracking
|
||||||
|
/// - **Process substitutions**: `<(...)` and `>(...)`
|
||||||
|
/// - **Globs**: `*`, `?`, `[...]` patterns (context-aware)
|
||||||
|
/// - **Escapes**: Backslash escaping
|
||||||
|
///
|
||||||
|
/// # Phase 2: Application (Sorted Insertion)
|
||||||
|
/// Markers are sorted by position (descending) to avoid index invalidation when
|
||||||
|
/// inserting into the string. At the same position, markers are ordered:
|
||||||
|
/// 1. RESET (rightmost)
|
||||||
|
/// 2. Regular markers (middle)
|
||||||
|
/// 3. END markers (leftmost)
|
||||||
|
///
|
||||||
|
/// This produces the pattern: `[END][TOGGLE][RESET]` at boundaries.
|
||||||
|
///
|
||||||
|
/// # Context Tracking
|
||||||
|
/// The `in_context` closure queries the insertion list to determine the active
|
||||||
|
/// syntax context at the current position. This enables context-aware decisions
|
||||||
|
/// like "only highlight globs in arguments, not in command names".
|
||||||
|
///
|
||||||
|
/// # Depth Tracking
|
||||||
|
/// Nested constructs like `$(echo $(date))` are tracked with depth counters.
|
||||||
|
/// Only the outermost construct is marked; inner content is handled recursively
|
||||||
|
/// by the highlighter.
|
||||||
pub fn annotate_token(input: &mut String, token: Tk) {
|
pub fn annotate_token(input: &mut String, token: Tk) {
|
||||||
|
// Sort by position descending, with priority ordering at same position:
|
||||||
|
// - RESET first (inserted first, ends up rightmost)
|
||||||
|
// - Regular markers middle
|
||||||
|
// - END markers last (inserted last, ends up leftmost)
|
||||||
|
// Result: [END][TOGGLE][RESET]
|
||||||
let sort_insertions = |insertions: &mut Vec<(usize, char)>| {
|
let sort_insertions = |insertions: &mut Vec<(usize, char)>| {
|
||||||
insertions.sort_by(|a, b| {
|
insertions.sort_by(|a, b| {
|
||||||
match b.0.cmp(&a.0) {
|
match b.0.cmp(&a.0) {
|
||||||
@@ -531,12 +627,18 @@ pub fn annotate_token(input: &mut String, token: Tk) {
|
|||||||
let priority = |m: char| -> u8 {
|
let priority = |m: char| -> u8 {
|
||||||
match m {
|
match m {
|
||||||
markers::RESET => 0,
|
markers::RESET => 0,
|
||||||
|
markers::VAR_SUB |
|
||||||
markers::VAR_SUB_END |
|
markers::VAR_SUB_END |
|
||||||
|
markers::CMD_SUB |
|
||||||
markers::CMD_SUB_END |
|
markers::CMD_SUB_END |
|
||||||
|
markers::PROC_SUB |
|
||||||
markers::PROC_SUB_END |
|
markers::PROC_SUB_END |
|
||||||
|
markers::STRING_DQ |
|
||||||
markers::STRING_DQ_END |
|
markers::STRING_DQ_END |
|
||||||
|
markers::STRING_SQ |
|
||||||
markers::STRING_SQ_END |
|
markers::STRING_SQ_END |
|
||||||
markers::SUBSH_END => 2,
|
markers::SUBSH_END => 2,
|
||||||
|
markers::ARG => 3,
|
||||||
_ => 1,
|
_ => 1,
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
@@ -555,12 +657,18 @@ pub fn annotate_token(input: &mut String, token: Tk) {
|
|||||||
let priority = |m: char| -> u8 {
|
let priority = |m: char| -> u8 {
|
||||||
match m {
|
match m {
|
||||||
markers::RESET => 0,
|
markers::RESET => 0,
|
||||||
|
markers::VAR_SUB |
|
||||||
markers::VAR_SUB_END |
|
markers::VAR_SUB_END |
|
||||||
|
markers::CMD_SUB |
|
||||||
markers::CMD_SUB_END |
|
markers::CMD_SUB_END |
|
||||||
|
markers::PROC_SUB |
|
||||||
markers::PROC_SUB_END |
|
markers::PROC_SUB_END |
|
||||||
|
markers::STRING_DQ |
|
||||||
markers::STRING_DQ_END |
|
markers::STRING_DQ_END |
|
||||||
|
markers::STRING_SQ |
|
||||||
markers::STRING_SQ_END |
|
markers::STRING_SQ_END |
|
||||||
markers::SUBSH_END => 2,
|
markers::SUBSH_END => 2,
|
||||||
|
markers::ARG => 3, // Lowest priority - processed first, overridden by sub-tokens
|
||||||
_ => 1,
|
_ => 1,
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
@@ -571,12 +679,9 @@ pub fn annotate_token(input: &mut String, token: Tk) {
|
|||||||
});
|
});
|
||||||
stack.retain(|(i, m)| *i <= token.span.start && !markers::END_MARKERS.contains(m));
|
stack.retain(|(i, m)| *i <= token.span.start && !markers::END_MARKERS.contains(m));
|
||||||
|
|
||||||
log::error!("Checking context for token '{}', looking for '{}'", token.span.as_str(), c);
|
|
||||||
let Some(ctx) = stack.last() else {
|
let Some(ctx) = stack.last() else {
|
||||||
return false;
|
return false;
|
||||||
};
|
};
|
||||||
log::error!("Context stack for token '{}': {:?}", token.span.as_str(), stack);
|
|
||||||
log::error!("Found context marker '{}' at position {}", ctx.1, ctx.0);
|
|
||||||
|
|
||||||
ctx.1 == c
|
ctx.1 == c
|
||||||
};
|
};
|
||||||
@@ -788,14 +893,10 @@ pub fn annotate_token(input: &mut String, token: Tk) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Sort by position descending, with priority ordering at same position:
|
|
||||||
// - RESET first (inserted first, ends up rightmost)
|
|
||||||
// - Regular markers middle
|
|
||||||
// - END markers last (inserted last, ends up leftmost)
|
|
||||||
// Result: [END][TOGGLE][RESET]
|
|
||||||
sort_insertions(&mut insertions);
|
sort_insertions(&mut insertions);
|
||||||
|
|
||||||
for (pos, marker) in insertions {
|
for (pos, marker) in insertions {
|
||||||
|
log::info!("Inserting marker {marker:?} at position {pos}");
|
||||||
let pos = pos.max(0).min(input.len());
|
let pos = pos.max(0).min(input.len());
|
||||||
input.insert(pos, marker);
|
input.insert(pos, marker);
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user