Properly implemented word splitting

This commit is contained in:
2025-03-15 23:52:58 -04:00
parent 98739ba63a
commit ab0b100354
3 changed files with 66 additions and 17 deletions

View File

@@ -1,7 +1,11 @@
use crate::{parse::lex::{is_hard_sep, LexFlags, LexStream, Tk, Span, TkFlags, TkRule}, state::read_vars};
use crate::{prelude::*, parse::lex::{is_field_sep, is_hard_sep, LexFlags, LexStream, Span, Tk, TkFlags, TkRule}, state::read_vars};
/// Variable substitution marker
pub const VAR_SUB: char = '\u{fdd0}';
/// Double quote '"' marker
pub const DUB_QUOTE: char = '\u{fdd1}';
/// Single quote '\\'' marker
pub const SNG_QUOTE: char = '\u{fdd2}';
impl<'t> Tk<'t> {
/// Create a new expanded token
@@ -34,12 +38,33 @@ impl<'t> Expander {
}
pub fn expand(&'t mut self) -> Vec<String> {
self.raw = self.expand_raw();
// Unwrap here is safe because LexFlags::RAW has no error states
let tokens: Vec<_> = LexStream::new(&self.raw, LexFlags::RAW)
.filter(|tk| !matches!(tk.as_ref().unwrap().class, TkRule::EOI | TkRule::SOI))
.map(|tk| tk.unwrap().to_string())
.collect();
tokens
self.split_words()
}
pub fn split_words(&mut self) -> Vec<String> {
let mut words = vec![];
let mut chars = self.raw.chars();
let mut cur_word = String::new();
'outer: while let Some(ch) = chars.next() {
match ch {
DUB_QUOTE | SNG_QUOTE => {
while let Some(q_ch) = chars.next() {
match q_ch {
_ if q_ch == ch => continue 'outer, // Isn't rust cool
_ => cur_word.push(q_ch)
}
}
}
_ if is_field_sep(ch) => {
words.push(mem::take(&mut cur_word));
}
_ => cur_word.push(ch)
}
}
if !cur_word.is_empty() {
words.push(cur_word);
}
words
}
pub fn expand_raw(&self) -> String {
let mut chars = self.raw.chars();
@@ -97,6 +122,8 @@ pub fn unescape_str(raw: &str) -> String {
result.push(next_ch)
}
}
'"' => result.push(DUB_QUOTE),
'\'' => result.push(SNG_QUOTE),
'$' => result.push(VAR_SUB),
_ => result.push(ch)
}

View File

@@ -286,6 +286,19 @@ impl<'t> LexStream<'t> {
let mut quote_pos = None;
while let Some(ch) = chars.next() {
match ch {
_ if self.flags.contains(LexFlags::RAW) => {
if ch.is_whitespace() {
break;
} else {
pos += ch.len_utf8()
}
}
'\\' => {
pos += 1;
if chars.next().is_some() {
pos += 1;
}
}
'"' | '\'' => {
self.in_quote = true;
quote_pos = Some(pos);
@@ -293,8 +306,10 @@ impl<'t> LexStream<'t> {
while let Some(q_ch) = chars.next() {
match q_ch {
'\\' => {
pos += 2;
chars.next();
pos += 1;
if chars.next().is_some() {
pos += 1;
}
}
_ if q_ch == ch => {
pos += 1;
@@ -309,13 +324,6 @@ impl<'t> LexStream<'t> {
}
}
}
_ if self.flags.contains(LexFlags::RAW) => {
if ch.is_whitespace() {
break;
} else {
pos += ch.len_utf8()
}
}
_ if !self.in_quote && is_op(ch) => break,
_ if is_hard_sep(ch) => break,
_ => pos += ch.len_utf8()

View File

@@ -2,7 +2,7 @@ use std::borrow::Cow;
use rustyline::{completion::Completer, highlight::Highlighter, hint::{Hint, Hinter}, validate::{ValidationResult, Validator}, Helper};
use crate::libsh::term::{Style, Styled};
use crate::{libsh::term::{Style, Styled}, parse::{lex::{LexFlags, LexStream}, ParseStream}};
pub struct FernReadline {
}
@@ -65,6 +65,20 @@ impl Highlighter for FernReadline {
impl Validator for FernReadline {
fn validate(&self, ctx: &mut rustyline::validate::ValidationContext) -> rustyline::Result<rustyline::validate::ValidationResult> {
let mut tokens = vec![];
let tk_stream = LexStream::new(ctx.input(), LexFlags::empty());
for tk in tk_stream {
if tk.is_err() {
return Ok(ValidationResult::Incomplete)
}
tokens.push(tk.unwrap());
}
let nd_stream = ParseStream::new(tokens);
for nd in nd_stream {
if nd.is_err() {
return Ok(ValidationResult::Incomplete)
}
}
Ok(ValidationResult::Valid(None))
}
}