From 8c6de4f4ecbe6096407532dd21e623378102a92d Mon Sep 17 00:00:00 2001 From: pagedmov Date: Sun, 15 Mar 2026 00:01:33 -0400 Subject: [PATCH] fixed heredocs using the same expansion pathway as regular strings implemented backtick command subs deferred heredoc expansion until redir time instead of parse time implemented "$*" expansions function defs like 'func () { }' not parse correctly fixed conjunctions short circuiting instead of skipping --- src/expand.rs | 134 ++++++++++++++++++++++++++++++++++++++++++- src/getopt.rs | 8 ++- src/parse/execute.rs | 23 +++----- src/parse/lex.rs | 10 ++++ src/parse/mod.rs | 63 ++++++++------------ src/procio.rs | 98 +++++++++++++++++++++++-------- src/state.rs | 18 ++++++ 7 files changed, 271 insertions(+), 83 deletions(-) diff --git a/src/expand.rs b/src/expand.rs index b9cc949..8f9f14a 100644 --- a/src/expand.rs +++ b/src/expand.rs @@ -51,7 +51,11 @@ impl Expander { } pub fn from_raw(raw: &str, flags: TkFlags) -> ShResult { let raw = expand_braces_full(raw)?.join(" "); - let unescaped = unescape_str(&raw); + let unescaped = if flags.contains(TkFlags::IS_HEREDOC) { + unescape_heredoc(&raw) + } else { + unescape_str(&raw) + }; Ok(Self { raw: unescaped, flags }) } pub fn expand(&mut self) -> ShResult> { @@ -1159,6 +1163,25 @@ pub fn unescape_str(raw: &str) -> String { } } } + '`' => { + result.push(markers::VAR_SUB); + result.push(markers::SUBSH); + while let Some(bt_ch) = chars.next() { + match bt_ch { + '\\' => { + result.push(bt_ch); + if let Some(next_ch) = chars.next() { + result.push(next_ch); + } + } + '`' => { + result.push(markers::SUBSH); + break; + } + _ => result.push(bt_ch), + } + } + } '"' => { result.push(markers::DUB_QUOTE); break; @@ -1323,6 +1346,25 @@ pub fn unescape_str(raw: &str) -> String { result.push('$'); } } + '`' => { + result.push(markers::VAR_SUB); + result.push(markers::SUBSH); + while let Some(bt_ch) = chars.next() { + match bt_ch { + '\\' => { + result.push(bt_ch); + if let Some(next_ch) = chars.next() { + result.push(next_ch); + } + } + '`' => { + result.push(markers::SUBSH); + break; + } + _ => result.push(bt_ch), + } + } + } _ => result.push(ch), } first_char = false; @@ -1331,6 +1373,96 @@ pub fn unescape_str(raw: &str) -> String { result } +/// Like unescape_str but for heredoc bodies. Only processes: +/// - $var / ${var} / $(cmd) substitution markers +/// - Backslash escapes (only before $, `, \, and newline) +/// Everything else (quotes, tildes, globs, process subs, etc.) is literal. +pub fn unescape_heredoc(raw: &str) -> String { + let mut chars = raw.chars().peekable(); + let mut result = String::new(); + + while let Some(ch) = chars.next() { + match ch { + '\\' => { + match chars.peek() { + Some('$') | Some('`') | Some('\\') | Some('\n') => { + let next_ch = chars.next().unwrap(); + if next_ch == '\n' { + // line continuation — discard both backslash and newline + continue; + } + result.push(markers::ESCAPE); + result.push(next_ch); + } + _ => { + // backslash is literal + result.push('\\'); + } + } + } + '$' if chars.peek() == Some(&'(') => { + result.push(markers::VAR_SUB); + chars.next(); // consume '(' + result.push(markers::SUBSH); + let mut paren_count = 1; + while let Some(subsh_ch) = chars.next() { + match subsh_ch { + '\\' => { + result.push(subsh_ch); + if let Some(next_ch) = chars.next() { + result.push(next_ch); + } + } + '(' => { + paren_count += 1; + result.push(subsh_ch); + } + ')' => { + paren_count -= 1; + if paren_count == 0 { + result.push(markers::SUBSH); + break; + } else { + result.push(subsh_ch); + } + } + _ => result.push(subsh_ch), + } + } + } + '$' => { + result.push(markers::VAR_SUB); + if chars.peek() == Some(&'$') { + chars.next(); + result.push('$'); + } + } + '`' => { + result.push(markers::VAR_SUB); + result.push(markers::SUBSH); + while let Some(bt_ch) = chars.next() { + match bt_ch { + '\\' => { + result.push(bt_ch); + if let Some(next_ch) = chars.next() { + result.push(next_ch); + } + } + '`' => { + result.push(markers::SUBSH); + break; + } + _ => result.push(bt_ch), + } + } + } + _ => result.push(ch), + } + } + + result +} + /// Opposite of unescape_str - escapes a string to be executed as literal text /// Used for completion results, and glob filename matches. pub fn escape_str(raw: &str, use_marker: bool) -> String { diff --git a/src/getopt.rs b/src/getopt.rs index aa56569..3f98b2b 100644 --- a/src/getopt.rs +++ b/src/getopt.rs @@ -95,14 +95,16 @@ pub fn sort_tks( .into_iter() .map(|t| t.expand()) .collect::>>()? - .into_iter(); + .into_iter() + .peekable(); let mut opts = vec![]; let mut non_opts = vec![]; while let Some(token) = tokens_iter.next() { if &token.to_string() == "--" { - non_opts.extend(tokens_iter); - break; + non_opts.push(token); + non_opts.extend(tokens_iter); + break; } let parsed_opts = Opt::parse(&token.to_string()); diff --git a/src/parse/execute.rs b/src/parse/execute.rs index 3e47f3f..fc306ae 100644 --- a/src/parse/execute.rs +++ b/src/parse/execute.rs @@ -319,24 +319,19 @@ impl Dispatcher { }; let mut elem_iter = elements.into_iter(); + let mut skip = false; while let Some(element) = elem_iter.next() { let ConjunctNode { cmd, operator } = element; - self.dispatch_node(*cmd)?; + if !skip { + self.dispatch_node(*cmd)?; + } let status = state::get_status(); - match operator { - ConjunctOp::And => { - if status != 0 { - break; - } - } - ConjunctOp::Or => { - if status == 0 { - break; - } - } + skip = match operator { + ConjunctOp::And => status != 0, + ConjunctOp::Or => status == 0, ConjunctOp::Null => break, - } + }; } Ok(()) } @@ -356,7 +351,7 @@ impl Dispatcher { }; let body_span = body.get_span(); let body = body_span.as_str().to_string(); - let name = name.span.as_str().strip_suffix("()").unwrap(); + let name = name.span.as_str().strip_suffix("()").unwrap_or(name.span.as_str()); if KEYWORDS.contains(&name) { return Err(ShErr::at( diff --git a/src/parse/lex.rs b/src/parse/lex.rs index 876b12f..5c9c642 100644 --- a/src/parse/lex.rs +++ b/src/parse/lex.rs @@ -875,6 +875,16 @@ impl LexStream { )); } } + '(' if can_be_subshell && chars.peek() == Some(&')') => { + // standalone "()" — function definition marker + pos += 2; + chars.next(); + let mut tk = self.get_token(self.cursor..pos, TkRule::Str); + tk.mark(TkFlags::KEYWORD); + self.cursor = pos; + self.set_next_is_cmd(true); + return Ok(tk); + } '(' if self.next_is_cmd() && can_be_subshell => { pos += 1; let mut paren_count = 1; diff --git a/src/parse/mod.rs b/src/parse/mod.rs index 02bc519..d99f1ca 100644 --- a/src/parse/mod.rs +++ b/src/parse/mod.rs @@ -444,45 +444,9 @@ impl TryFrom for RedirBldr { let span = tk.span.clone(); if tk.flags.contains(TkFlags::IS_HEREDOC) { let flags = tk.flags; - let mut heredoc_body = if flags.contains(TkFlags::LIT_HEREDOC) { - tk.as_str().to_string() - } else { - tk.expand()?.get_words().first().map(|s| s.as_str()).unwrap_or_default().to_string() - }; - - if flags.contains(TkFlags::TAB_HEREDOC) { - let lines = heredoc_body.lines(); - let mut min_tabs = usize::MAX; - for line in lines { - if line.is_empty() { continue; } - let line_len = line.len(); - let after_strip = line.trim_start_matches('\t').len(); - let delta = line_len - after_strip; - min_tabs = min_tabs.min(delta); - } - if min_tabs == usize::MAX { - // let's avoid possibly allocating a string with 18 quintillion tabs - min_tabs = 0; - } - - if min_tabs > 0 { - let stripped = heredoc_body.lines() - .fold(vec![], |mut acc, ln| { - if ln.is_empty() { - acc.push(""); - return acc; - } - let stripped_ln = ln.strip_prefix(&"\t".repeat(min_tabs)).unwrap(); - acc.push(stripped_ln); - acc - }) - .join("\n"); - heredoc_body = stripped + "\n"; - } - } Ok(RedirBldr { - io_mode: Some(IoMode::loaded_pipe(0, heredoc_body.as_bytes())?), + io_mode: Some(IoMode::buffer(0, tk.to_string(), flags)?), class: Some(RedirType::HereDoc), tgt_fd: Some(0), span: Some(span) @@ -921,13 +885,26 @@ impl ParseStream { let mut node_tks: Vec = vec![]; let body; - if !is_func_name(self.peek_tk()) { + // Two forms: "name()" as one token, or "name" followed by "()" as separate tokens + let spaced_form = !is_func_name(self.peek_tk()) + && self.peek_tk().is_some_and(|tk| tk.flags.contains(TkFlags::IS_CMD)) + && is_func_parens(self.tokens.get(1)); + + if !is_func_name(self.peek_tk()) && !spaced_form { return Ok(None); } + let name_tk = self.next_tk().unwrap(); node_tks.push(name_tk.clone()); let name = name_tk.clone(); - let name_raw = name.to_string(); + let name_raw = if spaced_form { + // Consume the "()" token + let parens_tk = self.next_tk().unwrap(); + node_tks.push(parens_tk); + name.to_string() + } else { + name.to_string() + }; let mut src = name_tk.span.span_source().clone(); src.rename(name_raw.clone()); let color = next_color(); @@ -1155,7 +1132,7 @@ impl ParseStream { .get_words() .join(" "); string.push('\n'); - let io_mode = IoMode::loaded_pipe(redir_bldr.tgt_fd.unwrap_or(0), string.as_bytes())?; + let io_mode = IoMode::buffer(redir_bldr.tgt_fd.unwrap_or(0), string, redir_tk.flags)?; Ok(redir_bldr.with_io_mode(io_mode).build()) } _ => { @@ -1958,6 +1935,12 @@ fn is_func_name(tk: Option<&Tk>) -> bool { }) } +fn is_func_parens(tk: Option<&Tk>) -> bool { + tk.is_some_and(|tk| { + tk.flags.contains(TkFlags::KEYWORD) && tk.span.as_str() == "()" + }) +} + /// Perform an operation on the child nodes of a given node /// /// # Parameters diff --git a/src/procio.rs b/src/procio.rs index 99a68b0..9192f8c 100644 --- a/src/procio.rs +++ b/src/procio.rs @@ -12,7 +12,7 @@ use crate::{ utils::RedirVecUtils, }, parse::{Redir, RedirType, get_redir_file, lex::TkFlags}, - prelude::*, + prelude::*, state, }; // Credit to fish-shell for many of the implementation ideas present in this @@ -48,8 +48,9 @@ pub enum IoMode { pipe: Arc, }, Buffer { + tgt_fd: RawFd, buf: String, - pipe: Arc, + flags: TkFlags, // so we can see if its a heredoc or not }, Close { tgt_fd: RawFd, @@ -109,10 +110,8 @@ impl IoMode { } Ok(self) } - pub fn loaded_pipe(tgt_fd: RawFd, buf: &[u8]) -> ShResult { - let (rpipe, wpipe) = nix::unistd::pipe2(OFlag::O_CLOEXEC).unwrap(); - write(wpipe, buf)?; - Ok(Self::Pipe { tgt_fd, pipe: rpipe.into() }) + pub fn buffer(tgt_fd: RawFd, buf: String, flags: TkFlags) -> ShResult { + Ok(Self::Buffer { tgt_fd, buf, flags }) } pub fn get_pipes() -> (Self, Self) { let (rpipe, wpipe) = nix::unistd::pipe2(OFlag::O_CLOEXEC).unwrap(); @@ -245,25 +244,74 @@ impl<'e> IoFrame { fn apply_redirs(&mut self) -> ShResult<()> { for redir in &mut self.redirs { let io_mode = &mut redir.io_mode; - if let IoMode::Close { tgt_fd } = io_mode { - if *tgt_fd == *TTY_FILENO { - // Don't let user close the shell's tty fd. - continue; - } - close(*tgt_fd).ok(); - continue; - } - if let IoMode::File { .. } = io_mode { - match io_mode.clone().open_file() { - Ok(file) => *io_mode = file, - Err(e) => { - if let Some(span) = redir.span.as_ref() { - return Err(e.promote(span.clone())); - } - return Err(e) - } - } - }; + match io_mode { + IoMode::Close { tgt_fd } => { + if *tgt_fd == *TTY_FILENO { + // Don't let user close the shell's tty fd. + continue; + } + close(*tgt_fd).ok(); + continue; + } + IoMode::File { .. } => { + match io_mode.clone().open_file() { + Ok(file) => *io_mode = file, + Err(e) => { + if let Some(span) = redir.span.as_ref() { + return Err(e.promote(span.clone())); + } + return Err(e) + } + } + } + IoMode::Buffer { tgt_fd, buf, flags } => { + let (rpipe, wpipe) = nix::unistd::pipe()?; + let mut text = if flags.contains(TkFlags::LIT_HEREDOC) { + buf.clone() + } else { + let words = Expander::from_raw(buf, *flags)?.expand()?; + if flags.contains(TkFlags::IS_HEREDOC) { + words.into_iter().next().unwrap_or_default() + } else { + let ifs = state::get_separator(); + words.join(&ifs).trim().to_string() + "\n" + } + }; + if flags.contains(TkFlags::TAB_HEREDOC) { + let lines = text.lines(); + let mut min_tabs = usize::MAX; + for line in lines { + if line.is_empty() { continue; } + let line_len = line.len(); + let after_strip = line.trim_start_matches('\t').len(); + let delta = line_len - after_strip; + min_tabs = min_tabs.min(delta); + } + if min_tabs == usize::MAX { + // let's avoid possibly allocating a string with 18 quintillion tabs + min_tabs = 0; + } + + if min_tabs > 0 { + let stripped = text.lines() + .fold(vec![], |mut acc, ln| { + if ln.is_empty() { + acc.push(""); + return acc; + } + let stripped_ln = ln.strip_prefix(&"\t".repeat(min_tabs)).unwrap(); + acc.push(stripped_ln); + acc + }) + .join("\n"); + text = stripped + "\n"; + } + } + write(wpipe, text.as_bytes())?; + *io_mode = IoMode::Pipe { tgt_fd: *tgt_fd, pipe: rpipe.into() }; + } + _ => {} + } let tgt_fd = io_mode.tgt_fd(); let src_fd = io_mode.src_fd(); if let Err(e) = dup2(src_fd, tgt_fd) { diff --git a/src/state.rs b/src/state.rs index 1ecfab7..630881a 100644 --- a/src/state.rs +++ b/src/state.rs @@ -1330,6 +1330,15 @@ impl VarTab { .get(&ShellParam::Status) .map(|s| s.to_string()) .unwrap_or("0".into()), + ShellParam::AllArgsStr => { + let ifs = get_separator(); + self + .params + .get(&ShellParam::AllArgs) + .map(|s| s.replace(markers::ARG_SEP, &ifs).to_string()) + .unwrap_or_default() + } + _ => self .params .get(¶m) @@ -1842,6 +1851,15 @@ pub fn change_dir>(dir: P) -> ShResult<()> { Ok(()) } +pub fn get_separator() -> String { + env::var("IFS") + .unwrap_or(String::from(" ")) + .chars() + .next() + .unwrap() + .to_string() +} + pub fn get_status() -> i32 { read_vars(|v| v.get_param(ShellParam::Status)) .parse::()