From 37e746cb90620818d513184fb04367c4b9207a33 Mon Sep 17 00:00:00 2001 From: Kyler Clay Date: Mon, 21 Apr 2025 01:56:05 -0400 Subject: [PATCH] Early implementation of syntax highlighting Various bug fixes related to command substitution --- src/expand.rs | 25 +++---- src/parse/execute.rs | 4 -- src/parse/lex.rs | 79 ++++++++++++---------- src/prompt/highlight.rs | 144 ++++++++++++++++++++++++++++++++++++++++ src/prompt/mod.rs | 1 + src/prompt/readline.rs | 11 +-- 6 files changed, 201 insertions(+), 63 deletions(-) create mode 100644 src/prompt/highlight.rs diff --git a/src/expand.rs b/src/expand.rs index 4bc6a2a..9f74cc6 100644 --- a/src/expand.rs +++ b/src/expand.rs @@ -93,6 +93,7 @@ impl Expander { let mut result = String::new(); let mut var_name = String::new(); let mut in_brace = false; + flog!(DEBUG, self.raw); while let Some(ch) = chars.next() { match ch { @@ -103,28 +104,19 @@ impl Expander { VAR_SUB => { while let Some(ch) = chars.next() { match ch { - '(' if var_name.is_empty() => { - let mut paren_stack = vec!['(']; + SUBSH if var_name.is_empty() => { let mut subsh_body = String::new(); while let Some(ch) = chars.next() { - flog!(DEBUG, "looping"); - flog!(DEBUG, subsh_body); match ch { - '(' => { - paren_stack.push(ch); - subsh_body.push(ch); - } - ')' => { - paren_stack.pop(); - if paren_stack.is_empty() { break }; - subsh_body.push(ch); + SUBSH => { + break } _ => subsh_body.push(ch) } } result.push_str(&expand_cmd_sub(&subsh_body)?); } - '{' => in_brace = true, + '{' if var_name.is_empty() => in_brace = true, '}' if in_brace => { let var_val = read_vars(|v| v.get_var(&var_name)); result.push_str(&var_val); @@ -208,12 +200,13 @@ pub fn expand_cmd_sub(raw: &str) -> ShResult { /// /// Clean up a single layer of escape characters, and then replace control characters like '$' with a non-character unicode representation that is unmistakable by the rest of the code pub fn unescape_str(raw: &str) -> String { - let mut chars = raw.chars(); + let mut chars = raw.chars().peekable(); let mut result = String::new(); let mut first_char = true; while let Some(ch) = chars.next() { + flog!(DEBUG,result); match ch { '~' if first_char => { result.push(TILDE_SUB) @@ -234,7 +227,7 @@ pub fn unescape_str(raw: &str) -> String { result.push(next_ch) } } - '$' => result.push(VAR_SUB), + '$' if chars.peek() != Some(&'(') => result.push(VAR_SUB), '(' => { paren_count += 1; result.push(subsh_ch) @@ -243,10 +236,10 @@ pub fn unescape_str(raw: &str) -> String { paren_count -= 1; if paren_count == 0 { result.push(SUBSH); + break } else { result.push(subsh_ch) } - break } _ => result.push(subsh_ch) } diff --git a/src/parse/execute.rs b/src/parse/execute.rs index 2a702e6..075ef4c 100644 --- a/src/parse/execute.rs +++ b/src/parse/execute.rs @@ -76,7 +76,6 @@ impl Dispatcher { Ok(()) } pub fn dispatch_node(&mut self, node: Node) -> ShResult<()> { - flog!(DEBUG, node.class); match node.class { NdRule::Conjunction {..} => self.exec_conjunction(node)?, NdRule::Pipeline {..} => self.exec_pipeline(node)?, @@ -166,7 +165,6 @@ impl Dispatcher { let subsh = argv.remove(0); let subsh_body = subsh.0.to_string(); - flog!(DEBUG, subsh_body); let snapshot = get_snapshots(); if let Err(e) = exec_input(subsh_body) { @@ -248,14 +246,12 @@ impl Dispatcher { self.io_stack.append_to_frame(case_stmt.redirs); - flog!(DEBUG,pattern.span.as_str()); let exp_pattern = pattern.clone().expand()?; let pattern_raw = exp_pattern .get_words() .first() .map(|s| s.to_string()) .unwrap_or_default(); - flog!(DEBUG,exp_pattern); for block in case_blocks { let CaseNode { pattern, body } = block; diff --git a/src/parse/lex.rs b/src/parse/lex.rs index fcad1ec..c17ccbb 100644 --- a/src/parse/lex.rs +++ b/src/parse/lex.rs @@ -109,6 +109,9 @@ impl Tk { pub fn source(&self) -> Arc { self.span.source.clone() } + pub fn mark(&mut self, flag: TkFlags) { + self.flags |= flag; + } /// Used to see if a separator is ';;' for case statements pub fn has_double_semi(&self) -> bool { let TkRule::Sep = self.class else { @@ -131,14 +134,15 @@ impl Display for Tk { bitflags! { #[derive(Debug,Clone,Copy,PartialEq,Default)] pub struct TkFlags: u32 { - const KEYWORD = 0b0000000000000001; - /// This is a keyword that opens a new block statement, like 'if' and 'while' - const OPENER = 0b0000000000000010; - const IS_CMD = 0b0000000000000100; - const IS_SUBSH = 0b0000000000001000; - const IS_OP = 0b0000000000010000; - const ASSIGN = 0b0000000000100000; - const BUILTIN = 0b0000000001000000; + const KEYWORD = 0b0000000000000001; + /// This is a keyword that opens a new block statement, like 'if' and 'while' + const OPENER = 0b0000000000000010; + const IS_CMD = 0b0000000000000100; + const IS_SUBSH = 0b0000000000001000; + const IS_CMDSUB = 0b0000000000010000; + const IS_OP = 0b0000000000100000; + const ASSIGN = 0b0000000001000000; + const BUILTIN = 0b0000000010000000; } } @@ -360,7 +364,7 @@ impl LexStream { _ => pos += ch.len_utf8() } } - if !paren_stack.is_empty() { + if !paren_stack.is_empty() && !self.flags.contains(LexFlags::LEX_UNFINISHED) { return Err( ShErr::full( ShErrKind::ParseErr, @@ -395,7 +399,7 @@ impl LexStream { _ => continue } } - if !paren_stack.is_empty() { + if !paren_stack.is_empty() && !self.flags.contains(LexFlags::LEX_UNFINISHED) { self.cursor = pos; return Err( ShErr::full( @@ -469,37 +473,40 @@ impl LexStream { ) ); } - // TODO: clean up this mess + + let text = new_tk.span.as_str(); if self.flags.contains(LexFlags::NEXT_IS_CMD) { - if is_keyword(new_tk.span.as_str()) { - if matches!(new_tk.span.as_str(), "case" | "select" | "for") { + match text { + "case" | "select" | "for" => { + new_tk.mark(TkFlags::KEYWORD); self.flags |= LexFlags::EXPECTING_IN; - new_tk.flags |= TkFlags::KEYWORD; - self.set_next_is_cmd(false); - } else { - new_tk.flags |= TkFlags::KEYWORD; } - } else if is_assignment(new_tk.span.as_str()) { - new_tk.flags |= TkFlags::ASSIGN; - } else { - if self.flags.contains(LexFlags::EXPECTING_IN) { - if new_tk.span.as_str() != "in" { - new_tk.flags |= TkFlags::IS_CMD; - } else { - new_tk.flags |= TkFlags::KEYWORD; - self.flags &= !LexFlags::EXPECTING_IN; - } - } else { + "in" if self.flags.contains(LexFlags::EXPECTING_IN) => { + new_tk.mark(TkFlags::KEYWORD); + self.flags &= !LexFlags::EXPECTING_IN; + } + _ if is_keyword(text) => { + new_tk.mark(TkFlags::KEYWORD); + } + _ if is_assignment(text) => { + new_tk.mark(TkFlags::ASSIGN); + } + _ if is_cmd_sub(text) => { + new_tk.mark(TkFlags::IS_CMDSUB) + } + _ => { new_tk.flags |= TkFlags::IS_CMD; + if BUILTINS.contains(&text) { + new_tk.mark(TkFlags::BUILTIN); + } } - if BUILTINS.contains(&new_tk.span.as_str()) { - new_tk.flags |= TkFlags::BUILTIN; - } - self.set_next_is_cmd(false); } - } else if self.flags.contains(LexFlags::EXPECTING_IN) && new_tk.span.as_str() == "in" { - new_tk.flags |= TkFlags::KEYWORD; + self.set_next_is_cmd(false); + } else if self.flags.contains(LexFlags::EXPECTING_IN) && text == "in" { + new_tk.mark(TkFlags::KEYWORD); self.flags &= !LexFlags::EXPECTING_IN; + } else if is_cmd_sub(text) { + new_tk.mark(TkFlags::IS_CMDSUB) } self.cursor = pos; Ok(new_tk) @@ -669,6 +676,10 @@ pub fn is_keyword(slice: &str) -> bool { (slice.ends_with("()") && !slice.ends_with("\\()")) } +pub fn is_cmd_sub(slice: &str) -> bool { + (slice.starts_with("$(") && slice.ends_with(')')) && !slice.ends_with("\\)") +} + pub fn lookahead(pat: &str, mut chars: Chars) -> Option { let mut pos = 0; let mut char_deque = VecDeque::new(); diff --git a/src/prompt/highlight.rs b/src/prompt/highlight.rs new file mode 100644 index 0000000..047b4d3 --- /dev/null +++ b/src/prompt/highlight.rs @@ -0,0 +1,144 @@ +use std::{env, os::unix::fs::PermissionsExt, path::{Path, PathBuf}, sync::Arc}; +use crate::prelude::*; + +use rustyline::highlight::Highlighter; +use crate::{libsh::term::{Style, StyleSet, Styled}, parse::lex::{LexFlags, LexStream, Tk, TkFlags, TkRule}, state::read_logic}; + +use super::readline::FernReadline; + +fn is_executable(path: &Path) -> bool { + path.metadata() + .map(|m| m.permissions().mode() & 0o111 != 0) + .unwrap_or(false) +} + +#[derive(Default,Debug)] +pub struct FernHighlighter { + input: String, +} + +impl FernHighlighter { + pub fn new(input: String) -> Self { + Self { + input, + } + } + pub fn highlight_subsh(&self, token: Tk) -> String { + if token.flags.contains(TkFlags::IS_SUBSH) { + let raw = token.as_str(); + let body = &raw[1..raw.len() - 1]; + let sub_hl = FernHighlighter::new(body.to_string()); + let body_highlighted = sub_hl.hl_input(); + let open_paren = "(".styled(Style::BrightBlue); + let close_paren = ")".styled(Style::BrightBlue); + format!("{open_paren}{body_highlighted}{close_paren}") + } else if token.flags.contains(TkFlags::IS_CMDSUB) { + let raw = token.as_str(); + let body = &raw[2..raw.len() - 1]; + let sub_hl = FernHighlighter::new(body.to_string()); + let body_highlighted = sub_hl.hl_input(); + let dollar_paren = "$(".styled(Style::BrightBlue); + let close_paren = ")".styled(Style::BrightBlue); + format!("{dollar_paren}{body_highlighted}{close_paren}") + } else { + unreachable!() + } + } + pub fn hl_command(&self, token: Tk) -> String { + let raw = token.as_str(); + let paths = env::var("PATH") + .unwrap_or_default(); + let mut paths = paths.split(':'); + + let is_in_path = { + loop { + let Some(path) = paths.next() else { + break false + }; + + let mut path = PathBuf::from(path); + path.push(PathBuf::from(raw)); + + if path.is_file() && is_executable(&path) { + break true + }; + } + }; + // TODO: zsh is capable of highlighting an alias red even if it exists, if the command it refers to is not found + // Implement some way to find out if the content of the alias is valid as well + let is_alias_or_function = read_logic(|l| { + l.get_func(raw).is_some() || l.get_alias(raw).is_some() + }); + + if is_alias_or_function || is_in_path { + raw.styled(Style::Green) + } else { + raw.styled(Style::Bold | Style::Red) + } + } + pub fn hl_input(&self) -> String { + let mut output = self.input.clone(); + + // TODO: properly implement highlighting for unfinished input + let lex_results = LexStream::new(Arc::new(output.clone()), LexFlags::empty()); + let mut tokens = vec![]; + + for result in lex_results { + let Ok(token) = result else { + return self.input.clone(); + }; + tokens.push(token) + } + + // Reverse the tokens, because we want to highlight from right to left + // Doing it this way allows us to trust the spans in the tokens throughout the entire process + let tokens = tokens.into_iter() + .rev() + .collect::>(); + for token in tokens { + flog!(DEBUG, token.flags); + match token.class { + _ if token.flags.intersects(TkFlags::IS_CMDSUB | TkFlags::IS_SUBSH) => { + let styled = self.highlight_subsh(token.clone()); + output.replace_range(token.span.start..token.span.end, &styled); + } + TkRule::Str => { + if token.flags.contains(TkFlags::IS_CMD) { + let styled = self.hl_command(token.clone()); + output.replace_range(token.span.start..token.span.end, &styled); + } else { + output.replace_range(token.span.start..token.span.end, &token.to_string()); + } + } + TkRule::Pipe | + TkRule::ErrPipe | + TkRule::And | + TkRule::Or | + TkRule::Bg | + TkRule::Sep | + TkRule::Redir => self.style_with_token(&token,&mut output,Style::Cyan.into()), + TkRule::CasePattern => self.style_with_token(&token,&mut output,Style::Blue.into()), + TkRule::BraceGrpStart | + TkRule::BraceGrpEnd => self.style_with_token(&token,&mut output,Style::Cyan.into()), + TkRule::Comment => self.style_with_token(&token,&mut output,Style::BrightBlack.into()), + _ => { output.replace_range(token.span.start..token.span.end, &token.to_string()); } + } + } + + output + } + fn style_with_token(&self, token: &Tk, highlighted: &mut String, style: StyleSet) { + let styled = token.to_string().styled(style); + highlighted.replace_range(token.span.start..token.span.end, &styled); + } +} + +impl Highlighter for FernReadline { + fn highlight<'l>(&self, line: &'l str, _pos: usize) -> std::borrow::Cow<'l, str> { + let highlighter = FernHighlighter::new(line.to_string()); + std::borrow::Cow::Owned(highlighter.hl_input()) + } + fn highlight_char(&self, _line: &str, _pos: usize, _kind: rustyline::highlight::CmdKind) -> bool { + true + } +} diff --git a/src/prompt/mod.rs b/src/prompt/mod.rs index 5fcfd91..ed6f367 100644 --- a/src/prompt/mod.rs +++ b/src/prompt/mod.rs @@ -1,4 +1,5 @@ pub mod readline; +pub mod highlight; use std::path::Path; diff --git a/src/prompt/readline.rs b/src/prompt/readline.rs index 56ea4d6..017afc2 100644 --- a/src/prompt/readline.rs +++ b/src/prompt/readline.rs @@ -1,13 +1,12 @@ use std::borrow::Cow; -use rustyline::{completion::Completer, highlight::Highlighter, hint::{Hint, Hinter}, validate::{ValidationResult, Validator}, Helper}; +use rustyline::{completion::Completer, hint::{Hint, Hinter}, validate::{ValidationResult, Validator}, Helper}; use crate::{libsh::term::{Style, Styled}, parse::{lex::{LexFlags, LexStream}, ParseStream}}; use crate::prelude::*; #[derive(Default,Debug)] -pub struct FernReadline { -} +pub struct FernReadline; impl FernReadline { pub fn new() -> Self { @@ -59,12 +58,6 @@ impl Hinter for FernReadline { } } -impl Highlighter for FernReadline { - fn highlight<'l>(&self, line: &'l str, _pos: usize) -> std::borrow::Cow<'l, str> { - Cow::Owned(line.to_string()) - } -} - impl Validator for FernReadline { fn validate(&self, ctx: &mut rustyline::validate::ValidationContext) -> rustyline::Result { let mut tokens = vec![];