From bbb8162201b495852e63876b55f396b188d1d65c Mon Sep 17 00:00:00 2001 From: Kyler Clay Date: Mon, 12 May 2025 16:14:06 -0400 Subject: [PATCH] implemented most variable parameter expansion builtins --- Cargo.lock | 39 +++++++++++++ Cargo.toml | 1 + src/expand.rs | 139 ++++++++++++++++++++++++++++++++++++++++++++--- src/parse/lex.rs | 27 +++++++++ 4 files changed, 198 insertions(+), 8 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index becdcff..b8c9a31 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2,6 +2,15 @@ # It is not intended for manual editing. version = 4 +[[package]] +name = "aho-corasick" +version = "1.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916" +dependencies = [ + "memchr", +] + [[package]] name = "bitflags" version = "2.8.0" @@ -95,6 +104,7 @@ dependencies = [ "insta", "nix", "pretty_assertions", + "regex", "rustyline", ] @@ -241,6 +251,35 @@ dependencies = [ "nibble_vec", ] +[[package]] +name = "regex" +version = "1.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b544ef1b4eac5dc2db33ea63606ae9ffcfac26c1416a2806ae0bf5f56b201191" +dependencies = [ + "aho-corasick", + "memchr", + "regex-automata", + "regex-syntax", +] + +[[package]] +name = "regex-automata" +version = "0.4.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "809e8dc61f6de73b46c85f4c96486310fe304c434cfa43669d7b40f711150908" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] + +[[package]] +name = "regex-syntax" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c" + [[package]] name = "rustix" version = "0.38.44" diff --git a/Cargo.toml b/Cargo.toml index f93761d..8fbc34a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -15,6 +15,7 @@ glob = "0.3.2" insta = "1.42.2" nix = { version = "0.29.0", features = ["uio", "term", "user", "hostname", "fs", "default", "signal", "process", "event", "ioctl"] } pretty_assertions = "1.4.1" +regex = "1.11.1" rustyline = { version = "15.0.0", features = [ "derive" ] } [[bin]] diff --git a/src/expand.rs b/src/expand.rs index 2d41789..c195727 100644 --- a/src/expand.rs +++ b/src/expand.rs @@ -2,6 +2,7 @@ use std::collections::HashSet; use std::str::FromStr; use glob::Pattern; +use regex::Regex; use crate::state::{read_vars, write_meta, write_vars, LogTab}; use crate::procio::{IoBuf, IoFrame, IoMode}; @@ -125,11 +126,15 @@ impl Expander { } '{' if var_name.is_empty() => in_brace = true, '}' if in_brace => { + flog!(DEBUG, var_name); let var_val = perform_param_expansion(&var_name)?; result.push_str(&var_val); var_name.clear(); break } + _ if in_brace => { + var_name.push(ch) + } _ if is_hard_sep(ch) || ch == DUB_QUOTE || ch == SUBSH || ch == '/' => { let var_val = read_vars(|v| v.get_var(&var_name)); result.push_str(&var_val); @@ -324,6 +329,7 @@ pub fn unescape_str(raw: &str) -> String { result } +#[derive(Debug)] pub enum ParamExp { Len, // #var_name DefaultUnsetOrNull(String), // :- @@ -459,14 +465,17 @@ pub fn perform_param_expansion(raw: &str) -> ShResult { '+' | '=' | '?' => { - rest = chars.collect(); + rest.push(ch); + rest.push_str(&chars.collect::()); break } _ => var_name.push(ch) } } + flog!(DEBUG,rest); if let Ok(expansion) = rest.parse::() { + flog!(DEBUG,expansion); match expansion { ParamExp::Len => unreachable!(), ParamExp::DefaultUnsetOrNull(default) => { @@ -559,13 +568,90 @@ pub fn perform_param_expansion(raw: &str) -> ShResult { } Ok(value) } - ParamExp::RemLongestPrefix(prefix) => todo!(), - ParamExp::RemShortestSuffix(suffix) => todo!(), - ParamExp::RemLongestSuffix(suffix) => todo!(), - ParamExp::ReplaceFirstMatch(search, replace) => todo!(), - ParamExp::ReplaceAllMatches(search, replace) => todo!(), - ParamExp::ReplacePrefix(search, replace) => todo!(), - ParamExp::ReplaceSuffix(search, replace) => todo!(), + ParamExp::RemLongestPrefix(prefix) => { + let value = vars.get_var(&var_name); + let pattern = Pattern::new(&prefix).unwrap(); + for i in (0..=value.len()).rev() { + let sliced = &value[..i]; + if pattern.matches(sliced) { + return Ok(value[i..].to_string()); + } + } + Ok(value) // no match + } + ParamExp::RemShortestSuffix(suffix) => { + let value = vars.get_var(&var_name); + let pattern = Pattern::new(&suffix).unwrap(); + for i in 0..=value.len() { + let sliced = &value[i..]; + if pattern.matches(sliced) { + return Ok(value[..i].to_string()); + } + } + Ok(value) + } + ParamExp::RemLongestSuffix(suffix) => { + let value = vars.get_var(&var_name); + let pattern = Pattern::new(&suffix).unwrap(); + for i in (0..=value.len()).rev() { + let sliced = &value[i..]; + if pattern.matches(sliced) { + return Ok(value[..i].to_string()); + } + } + Ok(value) + } + ParamExp::ReplaceFirstMatch(search, replace) => { + let value = vars.get_var(&var_name); + let regex = glob_to_regex(&search, false); // unanchored pattern + + if let Some(mat) = regex.find(&value) { + let before = &value[..mat.start()]; + let after = &value[mat.end()..]; + let result = format!("{}{}{}", before, replace, after); + Ok(result) + } else { + Ok(value) + } + } + ParamExp::ReplaceAllMatches(search, replace) => { + let value = vars.get_var(&var_name); + let regex = glob_to_regex(&search, false); + let mut result = String::new(); + let mut last_match_end = 0; + + for mat in regex.find_iter(&value) { + result.push_str(&value[last_match_end..mat.start()]); + result.push_str(&replace); + last_match_end = mat.end(); + } + + // Append the rest of the string + result.push_str(&value[last_match_end..]); + Ok(result) + } + ParamExp::ReplacePrefix(search, replace) => { + let value = vars.get_var(&var_name); + let pattern = Pattern::new(&search).unwrap(); + for i in (0..=value.len()).rev() { + let sliced = &value[..i]; + if pattern.matches(sliced) { + return Ok(format!("{}{}",replace,&value[i..])) + } + } + Ok(value) + } + ParamExp::ReplaceSuffix(search, replace) => { + let value = vars.get_var(&var_name); + let pattern = Pattern::new(&search).unwrap(); + for i in (0..=value.len()).rev() { + let sliced = &value[i..]; + if pattern.matches(sliced) { + return Ok(format!("{}{}",&value[..i],replace)) + } + } + Ok(value) + } ParamExp::VarNamesWithSuffix(suffix) => todo!(), ParamExp::ExpandInnerVar(var_name) => todo!(), } @@ -574,6 +660,43 @@ pub fn perform_param_expansion(raw: &str) -> ShResult { } } +fn glob_to_regex(glob: &str, anchored: bool) -> Regex { + let mut regex = String::new(); + if anchored { + regex.push('^'); + } + for ch in glob.chars() { + match ch { + '*' => regex.push_str(".*"), + '?' => regex.push('.'), + '.' | '+' | '(' | ')' | '|' | '^' | '$' | '[' | ']' | '{' | '}' | '\\' => { + regex.push('\\'); + regex.push(ch); + } + _ => regex.push(ch), + } + } + if anchored { + regex.push('$'); + } + Regex::new(®ex).unwrap() +} +fn glob_to_regex_unanchored(glob: &str) -> Regex { + let mut regex = String::new(); + for ch in glob.chars() { + match ch { + '*' => regex.push_str(".*"), + '?' => regex.push('.'), + '.' | '+' | '(' | ')' | '|' | '^' | '$' | '[' | ']' | '{' | '}' | '\\' => { + regex.push('\\'); + regex.push(ch); + } + _ => regex.push(ch), + } + } + Regex::new(®ex).unwrap() +} + #[derive(Debug)] pub enum PromptTk { AsciiOct(i32), diff --git a/src/parse/lex.rs b/src/parse/lex.rs index d27ed7b..143bda3 100644 --- a/src/parse/lex.rs +++ b/src/parse/lex.rs @@ -339,6 +339,33 @@ impl LexStream { pos += ch.len_utf8(); } } + '$' if chars.peek() == Some(&'{') => { + pos += 2; + chars.next(); + let mut brace_count = 0; + while let Some(brc_ch) = chars.next() { + match brc_ch { + '\\' => { + pos += 1; + if let Some(next_ch) = chars.next() { + pos += next_ch.len_utf8() + } + } + '{' => { + pos += 1; + brace_count += 1; + } + '}' => { + pos += 1; + brace_count -= 1; + if brace_count == 0 { + break + } + } + _ => pos += ch.len_utf8() + } + } + } '$' if chars.peek() == Some(&'(') => { pos += 2; chars.next();