From 05212749e37aa8f5853ba1493ca279a7b09182b9 Mon Sep 17 00:00:00 2001 From: pagedmov Date: Sat, 15 Mar 2025 23:52:58 -0400 Subject: [PATCH] Properly implemented word splitting --- src/expand.rs | 41 ++++++++++++++++++++++++++++++++++------- src/parse/lex.rs | 26 +++++++++++++++++--------- src/prompt/readline.rs | 16 +++++++++++++++- 3 files changed, 66 insertions(+), 17 deletions(-) diff --git a/src/expand.rs b/src/expand.rs index 56bb0e8..56b7473 100644 --- a/src/expand.rs +++ b/src/expand.rs @@ -1,7 +1,11 @@ -use crate::{parse::lex::{is_hard_sep, LexFlags, LexStream, Tk, Span, TkFlags, TkRule}, state::read_vars}; +use crate::{prelude::*, parse::lex::{is_field_sep, is_hard_sep, LexFlags, LexStream, Span, Tk, TkFlags, TkRule}, state::read_vars}; /// Variable substitution marker pub const VAR_SUB: char = '\u{fdd0}'; +/// Double quote '"' marker +pub const DUB_QUOTE: char = '\u{fdd1}'; +/// Single quote '\\'' marker +pub const SNG_QUOTE: char = '\u{fdd2}'; impl<'t> Tk<'t> { /// Create a new expanded token @@ -34,12 +38,33 @@ impl<'t> Expander { } pub fn expand(&'t mut self) -> Vec { self.raw = self.expand_raw(); - // Unwrap here is safe because LexFlags::RAW has no error states - let tokens: Vec<_> = LexStream::new(&self.raw, LexFlags::RAW) - .filter(|tk| !matches!(tk.as_ref().unwrap().class, TkRule::EOI | TkRule::SOI)) - .map(|tk| tk.unwrap().to_string()) - .collect(); - tokens + self.split_words() + } + pub fn split_words(&mut self) -> Vec { + let mut words = vec![]; + let mut chars = self.raw.chars(); + let mut cur_word = String::new(); + + 'outer: while let Some(ch) = chars.next() { + match ch { + DUB_QUOTE | SNG_QUOTE => { + while let Some(q_ch) = chars.next() { + match q_ch { + _ if q_ch == ch => continue 'outer, // Isn't rust cool + _ => cur_word.push(q_ch) + } + } + } + _ if is_field_sep(ch) => { + words.push(mem::take(&mut cur_word)); + } + _ => cur_word.push(ch) + } + } + if !cur_word.is_empty() { + words.push(cur_word); + } + words } pub fn expand_raw(&self) -> String { let mut chars = self.raw.chars(); @@ -97,6 +122,8 @@ pub fn unescape_str(raw: &str) -> String { result.push(next_ch) } } + '"' => result.push(DUB_QUOTE), + '\'' => result.push(SNG_QUOTE), '$' => result.push(VAR_SUB), _ => result.push(ch) } diff --git a/src/parse/lex.rs b/src/parse/lex.rs index a118765..90c2323 100644 --- a/src/parse/lex.rs +++ b/src/parse/lex.rs @@ -286,6 +286,19 @@ impl<'t> LexStream<'t> { let mut quote_pos = None; while let Some(ch) = chars.next() { match ch { + _ if self.flags.contains(LexFlags::RAW) => { + if ch.is_whitespace() { + break; + } else { + pos += ch.len_utf8() + } + } + '\\' => { + pos += 1; + if chars.next().is_some() { + pos += 1; + } + } '"' | '\'' => { self.in_quote = true; quote_pos = Some(pos); @@ -293,8 +306,10 @@ impl<'t> LexStream<'t> { while let Some(q_ch) = chars.next() { match q_ch { '\\' => { - pos += 2; - chars.next(); + pos += 1; + if chars.next().is_some() { + pos += 1; + } } _ if q_ch == ch => { pos += 1; @@ -309,13 +324,6 @@ impl<'t> LexStream<'t> { } } } - _ if self.flags.contains(LexFlags::RAW) => { - if ch.is_whitespace() { - break; - } else { - pos += ch.len_utf8() - } - } _ if !self.in_quote && is_op(ch) => break, _ if is_hard_sep(ch) => break, _ => pos += ch.len_utf8() diff --git a/src/prompt/readline.rs b/src/prompt/readline.rs index 4db8b36..eab13c8 100644 --- a/src/prompt/readline.rs +++ b/src/prompt/readline.rs @@ -2,7 +2,7 @@ use std::borrow::Cow; use rustyline::{completion::Completer, highlight::Highlighter, hint::{Hint, Hinter}, validate::{ValidationResult, Validator}, Helper}; -use crate::libsh::term::{Style, Styled}; +use crate::{libsh::term::{Style, Styled}, parse::{lex::{LexFlags, LexStream}, ParseStream}}; pub struct FernReadline { } @@ -65,6 +65,20 @@ impl Highlighter for FernReadline { impl Validator for FernReadline { fn validate(&self, ctx: &mut rustyline::validate::ValidationContext) -> rustyline::Result { + let mut tokens = vec![]; + let tk_stream = LexStream::new(ctx.input(), LexFlags::empty()); + for tk in tk_stream { + if tk.is_err() { + return Ok(ValidationResult::Incomplete) + } + tokens.push(tk.unwrap()); + } + let nd_stream = ParseStream::new(tokens); + for nd in nd_stream { + if nd.is_err() { + return Ok(ValidationResult::Incomplete) + } + } Ok(ValidationResult::Valid(None)) } }