fixed heredocs using the same expansion pathway as regular strings

implemented backtick command subs

deferred heredoc expansion until redir time instead of parse time

implemented "$*" expansions

function defs like 'func   ()  { }' not parse correctly

fixed conjunctions short circuiting instead of skipping
This commit is contained in:
2026-03-15 00:01:33 -04:00
parent 9bd9c66b92
commit 8c6de4f4ec
7 changed files with 271 additions and 83 deletions

View File

@@ -51,7 +51,11 @@ impl Expander {
} }
pub fn from_raw(raw: &str, flags: TkFlags) -> ShResult<Self> { pub fn from_raw(raw: &str, flags: TkFlags) -> ShResult<Self> {
let raw = expand_braces_full(raw)?.join(" "); let raw = expand_braces_full(raw)?.join(" ");
let unescaped = unescape_str(&raw); let unescaped = if flags.contains(TkFlags::IS_HEREDOC) {
unescape_heredoc(&raw)
} else {
unescape_str(&raw)
};
Ok(Self { raw: unescaped, flags }) Ok(Self { raw: unescaped, flags })
} }
pub fn expand(&mut self) -> ShResult<Vec<String>> { pub fn expand(&mut self) -> ShResult<Vec<String>> {
@@ -1159,6 +1163,25 @@ pub fn unescape_str(raw: &str) -> String {
} }
} }
} }
'`' => {
result.push(markers::VAR_SUB);
result.push(markers::SUBSH);
while let Some(bt_ch) = chars.next() {
match bt_ch {
'\\' => {
result.push(bt_ch);
if let Some(next_ch) = chars.next() {
result.push(next_ch);
}
}
'`' => {
result.push(markers::SUBSH);
break;
}
_ => result.push(bt_ch),
}
}
}
'"' => { '"' => {
result.push(markers::DUB_QUOTE); result.push(markers::DUB_QUOTE);
break; break;
@@ -1323,6 +1346,25 @@ pub fn unescape_str(raw: &str) -> String {
result.push('$'); result.push('$');
} }
} }
'`' => {
result.push(markers::VAR_SUB);
result.push(markers::SUBSH);
while let Some(bt_ch) = chars.next() {
match bt_ch {
'\\' => {
result.push(bt_ch);
if let Some(next_ch) = chars.next() {
result.push(next_ch);
}
}
'`' => {
result.push(markers::SUBSH);
break;
}
_ => result.push(bt_ch),
}
}
}
_ => result.push(ch), _ => result.push(ch),
} }
first_char = false; first_char = false;
@@ -1331,6 +1373,96 @@ pub fn unescape_str(raw: &str) -> String {
result result
} }
/// Like unescape_str but for heredoc bodies. Only processes:
/// - $var / ${var} / $(cmd) substitution markers
/// - Backslash escapes (only before $, `, \, and newline)
/// Everything else (quotes, tildes, globs, process subs, etc.) is literal.
pub fn unescape_heredoc(raw: &str) -> String {
let mut chars = raw.chars().peekable();
let mut result = String::new();
while let Some(ch) = chars.next() {
match ch {
'\\' => {
match chars.peek() {
Some('$') | Some('`') | Some('\\') | Some('\n') => {
let next_ch = chars.next().unwrap();
if next_ch == '\n' {
// line continuation — discard both backslash and newline
continue;
}
result.push(markers::ESCAPE);
result.push(next_ch);
}
_ => {
// backslash is literal
result.push('\\');
}
}
}
'$' if chars.peek() == Some(&'(') => {
result.push(markers::VAR_SUB);
chars.next(); // consume '('
result.push(markers::SUBSH);
let mut paren_count = 1;
while let Some(subsh_ch) = chars.next() {
match subsh_ch {
'\\' => {
result.push(subsh_ch);
if let Some(next_ch) = chars.next() {
result.push(next_ch);
}
}
'(' => {
paren_count += 1;
result.push(subsh_ch);
}
')' => {
paren_count -= 1;
if paren_count == 0 {
result.push(markers::SUBSH);
break;
} else {
result.push(subsh_ch);
}
}
_ => result.push(subsh_ch),
}
}
}
'$' => {
result.push(markers::VAR_SUB);
if chars.peek() == Some(&'$') {
chars.next();
result.push('$');
}
}
'`' => {
result.push(markers::VAR_SUB);
result.push(markers::SUBSH);
while let Some(bt_ch) = chars.next() {
match bt_ch {
'\\' => {
result.push(bt_ch);
if let Some(next_ch) = chars.next() {
result.push(next_ch);
}
}
'`' => {
result.push(markers::SUBSH);
break;
}
_ => result.push(bt_ch),
}
}
}
_ => result.push(ch),
}
}
result
}
/// Opposite of unescape_str - escapes a string to be executed as literal text /// Opposite of unescape_str - escapes a string to be executed as literal text
/// Used for completion results, and glob filename matches. /// Used for completion results, and glob filename matches.
pub fn escape_str(raw: &str, use_marker: bool) -> String { pub fn escape_str(raw: &str, use_marker: bool) -> String {

View File

@@ -95,14 +95,16 @@ pub fn sort_tks(
.into_iter() .into_iter()
.map(|t| t.expand()) .map(|t| t.expand())
.collect::<ShResult<Vec<_>>>()? .collect::<ShResult<Vec<_>>>()?
.into_iter(); .into_iter()
.peekable();
let mut opts = vec![]; let mut opts = vec![];
let mut non_opts = vec![]; let mut non_opts = vec![];
while let Some(token) = tokens_iter.next() { while let Some(token) = tokens_iter.next() {
if &token.to_string() == "--" { if &token.to_string() == "--" {
non_opts.extend(tokens_iter); non_opts.push(token);
break; non_opts.extend(tokens_iter);
break;
} }
let parsed_opts = Opt::parse(&token.to_string()); let parsed_opts = Opt::parse(&token.to_string());

View File

@@ -319,24 +319,19 @@ impl Dispatcher {
}; };
let mut elem_iter = elements.into_iter(); let mut elem_iter = elements.into_iter();
let mut skip = false;
while let Some(element) = elem_iter.next() { while let Some(element) = elem_iter.next() {
let ConjunctNode { cmd, operator } = element; let ConjunctNode { cmd, operator } = element;
self.dispatch_node(*cmd)?; if !skip {
self.dispatch_node(*cmd)?;
}
let status = state::get_status(); let status = state::get_status();
match operator { skip = match operator {
ConjunctOp::And => { ConjunctOp::And => status != 0,
if status != 0 { ConjunctOp::Or => status == 0,
break;
}
}
ConjunctOp::Or => {
if status == 0 {
break;
}
}
ConjunctOp::Null => break, ConjunctOp::Null => break,
} };
} }
Ok(()) Ok(())
} }
@@ -356,7 +351,7 @@ impl Dispatcher {
}; };
let body_span = body.get_span(); let body_span = body.get_span();
let body = body_span.as_str().to_string(); let body = body_span.as_str().to_string();
let name = name.span.as_str().strip_suffix("()").unwrap(); let name = name.span.as_str().strip_suffix("()").unwrap_or(name.span.as_str());
if KEYWORDS.contains(&name) { if KEYWORDS.contains(&name) {
return Err(ShErr::at( return Err(ShErr::at(

View File

@@ -875,6 +875,16 @@ impl LexStream {
)); ));
} }
} }
'(' if can_be_subshell && chars.peek() == Some(&')') => {
// standalone "()" — function definition marker
pos += 2;
chars.next();
let mut tk = self.get_token(self.cursor..pos, TkRule::Str);
tk.mark(TkFlags::KEYWORD);
self.cursor = pos;
self.set_next_is_cmd(true);
return Ok(tk);
}
'(' if self.next_is_cmd() && can_be_subshell => { '(' if self.next_is_cmd() && can_be_subshell => {
pos += 1; pos += 1;
let mut paren_count = 1; let mut paren_count = 1;

View File

@@ -444,45 +444,9 @@ impl TryFrom<Tk> for RedirBldr {
let span = tk.span.clone(); let span = tk.span.clone();
if tk.flags.contains(TkFlags::IS_HEREDOC) { if tk.flags.contains(TkFlags::IS_HEREDOC) {
let flags = tk.flags; let flags = tk.flags;
let mut heredoc_body = if flags.contains(TkFlags::LIT_HEREDOC) {
tk.as_str().to_string()
} else {
tk.expand()?.get_words().first().map(|s| s.as_str()).unwrap_or_default().to_string()
};
if flags.contains(TkFlags::TAB_HEREDOC) {
let lines = heredoc_body.lines();
let mut min_tabs = usize::MAX;
for line in lines {
if line.is_empty() { continue; }
let line_len = line.len();
let after_strip = line.trim_start_matches('\t').len();
let delta = line_len - after_strip;
min_tabs = min_tabs.min(delta);
}
if min_tabs == usize::MAX {
// let's avoid possibly allocating a string with 18 quintillion tabs
min_tabs = 0;
}
if min_tabs > 0 {
let stripped = heredoc_body.lines()
.fold(vec![], |mut acc, ln| {
if ln.is_empty() {
acc.push("");
return acc;
}
let stripped_ln = ln.strip_prefix(&"\t".repeat(min_tabs)).unwrap();
acc.push(stripped_ln);
acc
})
.join("\n");
heredoc_body = stripped + "\n";
}
}
Ok(RedirBldr { Ok(RedirBldr {
io_mode: Some(IoMode::loaded_pipe(0, heredoc_body.as_bytes())?), io_mode: Some(IoMode::buffer(0, tk.to_string(), flags)?),
class: Some(RedirType::HereDoc), class: Some(RedirType::HereDoc),
tgt_fd: Some(0), tgt_fd: Some(0),
span: Some(span) span: Some(span)
@@ -921,13 +885,26 @@ impl ParseStream {
let mut node_tks: Vec<Tk> = vec![]; let mut node_tks: Vec<Tk> = vec![];
let body; let body;
if !is_func_name(self.peek_tk()) { // Two forms: "name()" as one token, or "name" followed by "()" as separate tokens
let spaced_form = !is_func_name(self.peek_tk())
&& self.peek_tk().is_some_and(|tk| tk.flags.contains(TkFlags::IS_CMD))
&& is_func_parens(self.tokens.get(1));
if !is_func_name(self.peek_tk()) && !spaced_form {
return Ok(None); return Ok(None);
} }
let name_tk = self.next_tk().unwrap(); let name_tk = self.next_tk().unwrap();
node_tks.push(name_tk.clone()); node_tks.push(name_tk.clone());
let name = name_tk.clone(); let name = name_tk.clone();
let name_raw = name.to_string(); let name_raw = if spaced_form {
// Consume the "()" token
let parens_tk = self.next_tk().unwrap();
node_tks.push(parens_tk);
name.to_string()
} else {
name.to_string()
};
let mut src = name_tk.span.span_source().clone(); let mut src = name_tk.span.span_source().clone();
src.rename(name_raw.clone()); src.rename(name_raw.clone());
let color = next_color(); let color = next_color();
@@ -1155,7 +1132,7 @@ impl ParseStream {
.get_words() .get_words()
.join(" "); .join(" ");
string.push('\n'); string.push('\n');
let io_mode = IoMode::loaded_pipe(redir_bldr.tgt_fd.unwrap_or(0), string.as_bytes())?; let io_mode = IoMode::buffer(redir_bldr.tgt_fd.unwrap_or(0), string, redir_tk.flags)?;
Ok(redir_bldr.with_io_mode(io_mode).build()) Ok(redir_bldr.with_io_mode(io_mode).build())
} }
_ => { _ => {
@@ -1958,6 +1935,12 @@ fn is_func_name(tk: Option<&Tk>) -> bool {
}) })
} }
fn is_func_parens(tk: Option<&Tk>) -> bool {
tk.is_some_and(|tk| {
tk.flags.contains(TkFlags::KEYWORD) && tk.span.as_str() == "()"
})
}
/// Perform an operation on the child nodes of a given node /// Perform an operation on the child nodes of a given node
/// ///
/// # Parameters /// # Parameters

View File

@@ -12,7 +12,7 @@ use crate::{
utils::RedirVecUtils, utils::RedirVecUtils,
}, },
parse::{Redir, RedirType, get_redir_file, lex::TkFlags}, parse::{Redir, RedirType, get_redir_file, lex::TkFlags},
prelude::*, prelude::*, state,
}; };
// Credit to fish-shell for many of the implementation ideas present in this // Credit to fish-shell for many of the implementation ideas present in this
@@ -48,8 +48,9 @@ pub enum IoMode {
pipe: Arc<OwnedFd>, pipe: Arc<OwnedFd>,
}, },
Buffer { Buffer {
tgt_fd: RawFd,
buf: String, buf: String,
pipe: Arc<OwnedFd>, flags: TkFlags, // so we can see if its a heredoc or not
}, },
Close { Close {
tgt_fd: RawFd, tgt_fd: RawFd,
@@ -109,10 +110,8 @@ impl IoMode {
} }
Ok(self) Ok(self)
} }
pub fn loaded_pipe(tgt_fd: RawFd, buf: &[u8]) -> ShResult<Self> { pub fn buffer(tgt_fd: RawFd, buf: String, flags: TkFlags) -> ShResult<Self> {
let (rpipe, wpipe) = nix::unistd::pipe2(OFlag::O_CLOEXEC).unwrap(); Ok(Self::Buffer { tgt_fd, buf, flags })
write(wpipe, buf)?;
Ok(Self::Pipe { tgt_fd, pipe: rpipe.into() })
} }
pub fn get_pipes() -> (Self, Self) { pub fn get_pipes() -> (Self, Self) {
let (rpipe, wpipe) = nix::unistd::pipe2(OFlag::O_CLOEXEC).unwrap(); let (rpipe, wpipe) = nix::unistd::pipe2(OFlag::O_CLOEXEC).unwrap();
@@ -245,25 +244,74 @@ impl<'e> IoFrame {
fn apply_redirs(&mut self) -> ShResult<()> { fn apply_redirs(&mut self) -> ShResult<()> {
for redir in &mut self.redirs { for redir in &mut self.redirs {
let io_mode = &mut redir.io_mode; let io_mode = &mut redir.io_mode;
if let IoMode::Close { tgt_fd } = io_mode { match io_mode {
if *tgt_fd == *TTY_FILENO { IoMode::Close { tgt_fd } => {
// Don't let user close the shell's tty fd. if *tgt_fd == *TTY_FILENO {
continue; // Don't let user close the shell's tty fd.
} continue;
close(*tgt_fd).ok(); }
continue; close(*tgt_fd).ok();
} continue;
if let IoMode::File { .. } = io_mode { }
match io_mode.clone().open_file() { IoMode::File { .. } => {
Ok(file) => *io_mode = file, match io_mode.clone().open_file() {
Err(e) => { Ok(file) => *io_mode = file,
if let Some(span) = redir.span.as_ref() { Err(e) => {
return Err(e.promote(span.clone())); if let Some(span) = redir.span.as_ref() {
} return Err(e.promote(span.clone()));
return Err(e) }
} return Err(e)
} }
}; }
}
IoMode::Buffer { tgt_fd, buf, flags } => {
let (rpipe, wpipe) = nix::unistd::pipe()?;
let mut text = if flags.contains(TkFlags::LIT_HEREDOC) {
buf.clone()
} else {
let words = Expander::from_raw(buf, *flags)?.expand()?;
if flags.contains(TkFlags::IS_HEREDOC) {
words.into_iter().next().unwrap_or_default()
} else {
let ifs = state::get_separator();
words.join(&ifs).trim().to_string() + "\n"
}
};
if flags.contains(TkFlags::TAB_HEREDOC) {
let lines = text.lines();
let mut min_tabs = usize::MAX;
for line in lines {
if line.is_empty() { continue; }
let line_len = line.len();
let after_strip = line.trim_start_matches('\t').len();
let delta = line_len - after_strip;
min_tabs = min_tabs.min(delta);
}
if min_tabs == usize::MAX {
// let's avoid possibly allocating a string with 18 quintillion tabs
min_tabs = 0;
}
if min_tabs > 0 {
let stripped = text.lines()
.fold(vec![], |mut acc, ln| {
if ln.is_empty() {
acc.push("");
return acc;
}
let stripped_ln = ln.strip_prefix(&"\t".repeat(min_tabs)).unwrap();
acc.push(stripped_ln);
acc
})
.join("\n");
text = stripped + "\n";
}
}
write(wpipe, text.as_bytes())?;
*io_mode = IoMode::Pipe { tgt_fd: *tgt_fd, pipe: rpipe.into() };
}
_ => {}
}
let tgt_fd = io_mode.tgt_fd(); let tgt_fd = io_mode.tgt_fd();
let src_fd = io_mode.src_fd(); let src_fd = io_mode.src_fd();
if let Err(e) = dup2(src_fd, tgt_fd) { if let Err(e) = dup2(src_fd, tgt_fd) {

View File

@@ -1330,6 +1330,15 @@ impl VarTab {
.get(&ShellParam::Status) .get(&ShellParam::Status)
.map(|s| s.to_string()) .map(|s| s.to_string())
.unwrap_or("0".into()), .unwrap_or("0".into()),
ShellParam::AllArgsStr => {
let ifs = get_separator();
self
.params
.get(&ShellParam::AllArgs)
.map(|s| s.replace(markers::ARG_SEP, &ifs).to_string())
.unwrap_or_default()
}
_ => self _ => self
.params .params
.get(&param) .get(&param)
@@ -1842,6 +1851,15 @@ pub fn change_dir<P: AsRef<Path>>(dir: P) -> ShResult<()> {
Ok(()) Ok(())
} }
pub fn get_separator() -> String {
env::var("IFS")
.unwrap_or(String::from(" "))
.chars()
.next()
.unwrap()
.to_string()
}
pub fn get_status() -> i32 { pub fn get_status() -> i32 {
read_vars(|v| v.get_param(ShellParam::Status)) read_vars(|v| v.get_param(ShellParam::Status))
.parse::<i32>() .parse::<i32>()