From b6a9bb880d91ef55eb8258342329fd8e76406011 Mon Sep 17 00:00:00 2001 From: pagedmov Date: Wed, 19 Mar 2025 16:46:45 -0400 Subject: [PATCH] Implemented command substitution --- src/builtin/flowctl.rs | 2 +- src/builtin/mod.rs | 2 +- src/expand.rs | 79 ++++++++++++++++++++++---- src/libsh/utils.rs | 42 ++++++++++++++ src/parse/execute.rs | 16 +++--- src/parse/lex.rs | 124 ++++++++++++++++++++++++++++++++++++----- src/parse/mod.rs | 1 + src/procio.rs | 56 ++++++++++++++++++- 8 files changed, 285 insertions(+), 37 deletions(-) diff --git a/src/builtin/flowctl.rs b/src/builtin/flowctl.rs index 3afc3db..3ae295c 100644 --- a/src/builtin/flowctl.rs +++ b/src/builtin/flowctl.rs @@ -7,7 +7,7 @@ pub fn flowctl(node: Node, kind: ShErrKind) -> ShResult<()> { }; let mut code = 0; - let mut argv = prepare_argv(argv); + let mut argv = prepare_argv(argv)?; let cmd = argv.remove(0).0; if !argv.is_empty() { diff --git a/src/builtin/mod.rs b/src/builtin/mod.rs index 7705081..274acd9 100644 --- a/src/builtin/mod.rs +++ b/src/builtin/mod.rs @@ -56,7 +56,7 @@ pub fn setup_builtin( job: &mut JobBldr, io_mode: Option<(&mut IoStack,Vec)>, ) -> ShResult<(Vec<(String,Span)>, Option)> { - let mut argv: Vec<(String,Span)> = prepare_argv(argv); + let mut argv: Vec<(String,Span)> = prepare_argv(argv)?; let child_pgid = if let Some(pgid) = job.pgid() { pgid diff --git a/src/expand.rs b/src/expand.rs index cf42bf5..5ce6aa0 100644 --- a/src/expand.rs +++ b/src/expand.rs @@ -1,6 +1,6 @@ use std::collections::{HashSet, VecDeque}; -use crate::{libsh::error::ShResult, parse::lex::{is_field_sep, is_hard_sep, is_keyword, LexFlags, LexStream, Span, Tk, TkFlags, TkRule}, prelude::*, state::{read_logic, read_meta, read_vars, write_meta}}; +use crate::{exec_input, libsh::error::{ShErr, ShErrKind, ShResult}, parse::{lex::{is_field_sep, is_hard_sep, is_keyword, LexFlags, LexStream, Span, Tk, TkFlags, TkRule}, Redir, RedirType}, prelude::*, procio::{IoBuf, IoFrame, IoMode}, state::{read_logic, read_meta, read_vars, write_meta}}; /// Variable substitution marker pub const VAR_SUB: char = '\u{fdd0}'; @@ -16,10 +16,10 @@ impl Tk { /// tokens: A vector of raw tokens lexed from the expansion result /// span: The span of the original token that is being expanded /// flags: some TkFlags - pub fn expand(self, span: Span, flags: TkFlags) -> Self { - let exp = Expander::new(self).expand(); + pub fn expand(self, span: Span, flags: TkFlags) -> ShResult { + let exp = Expander::new(self).expand()?; let class = TkRule::Expanded { exp }; - Self { class, span, flags, } + Ok(Self { class, span, flags, }) } pub fn get_words(&self) -> Vec { match &self.class { @@ -38,9 +38,9 @@ impl Expander { let unescaped = unescape_str(raw.span.as_str()); Self { raw: unescaped } } - pub fn expand(&mut self) -> Vec { - self.raw = self.expand_raw(); - self.split_words() + pub fn expand(&mut self) -> ShResult> { + self.raw = self.expand_raw()?; + Ok(self.split_words()) } pub fn split_words(&mut self) -> Vec { let mut words = vec![]; @@ -68,18 +68,38 @@ impl Expander { } words } - pub fn expand_raw(&self) -> String { - let mut chars = self.raw.chars(); + pub fn expand_raw(&self) -> ShResult { + let mut chars = self.raw.chars().peekable(); let mut result = String::new(); let mut var_name = String::new(); let mut in_brace = false; - // TODO: implement error handling for unclosed braces while let Some(ch) = chars.next() { match ch { VAR_SUB => { while let Some(ch) = chars.next() { match ch { + '(' if var_name.is_empty() => { + let mut paren_stack = vec!['(']; + let mut subsh_body = String::new(); + while let Some(ch) = chars.next() { + flog!(DEBUG, "looping"); + flog!(DEBUG, subsh_body); + match ch { + '(' => { + paren_stack.push(ch); + subsh_body.push(ch); + } + ')' => { + paren_stack.pop(); + if paren_stack.is_empty() { break }; + subsh_body.push(ch); + } + _ => subsh_body.push(ch) + } + } + result.push_str(&expand_cmd_sub(&subsh_body)?); + } '{' => in_brace = true, '}' if in_brace => { let var_val = read_vars(|v| v.get_var(&var_name)); @@ -106,7 +126,44 @@ impl Expander { _ => result.push(ch) } } - result + Ok(result) + } +} + +/// Get the command output of a given command input as a String +pub fn expand_cmd_sub(raw: &str) -> ShResult { + flog!(DEBUG, "in expand_cmd_sub"); + let (rpipe,wpipe) = IoMode::get_pipes(); + let cmd_sub_redir = Redir::new(wpipe, RedirType::Output); + let mut cmd_sub_io_frame = IoFrame::from_redir(cmd_sub_redir); + let mut io_buf = IoBuf::new(rpipe); + + match unsafe { fork()? } { + ForkResult::Child => { + if let Err(e) = cmd_sub_io_frame.redirect() { + eprintln!("{e}"); + exit(1); + } + + if let Err(e) = exec_input(raw.to_string()) { + eprintln!("{e}"); + exit(1); + } + exit(0); + } + ForkResult::Parent { child } => { + std::mem::drop(cmd_sub_io_frame); // Closes the write pipe + let status = waitpid(child, Some(WtFlag::WSTOPPED))?; + match status { + WtStat::Exited(_, _) => { + flog!(DEBUG, "filling buffer"); + io_buf.fill_buffer()?; + flog!(DEBUG, "done"); + Ok(io_buf.as_str()?.trim().to_string()) + } + _ => return Err(ShErr::simple(ShErrKind::InternalErr, "Command sub failed")) + } + } } } diff --git a/src/libsh/utils.rs b/src/libsh/utils.rs index 6f28eeb..c038290 100644 --- a/src/libsh/utils.rs +++ b/src/libsh/utils.rs @@ -8,6 +8,12 @@ pub trait VecDequeExt { fn to_vec(self) -> Vec; } +pub trait CharDequeUtils { + fn to_string(self) -> String; + fn ends_with(&self, pat: &str) -> bool; + fn starts_with(&self, pat: &str) -> bool; +} + pub trait TkVecUtils { fn get_span(&self) -> Option; fn debug_tokens(&self); @@ -26,6 +32,42 @@ impl VecDequeExt for VecDeque { } } +impl CharDequeUtils for VecDeque { + fn to_string(mut self) -> String { + let mut result = String::with_capacity(self.len()); + while let Some(ch) = self.pop_front() { + result.push(ch); + } + result + } + + fn ends_with(&self, pat: &str) -> bool { + let pat_chars = pat.chars(); + let self_len = self.len(); + + // If pattern is longer than self, return false + if pat_chars.clone().count() > self_len { + return false; + } + + // Compare from the back + self.iter().rev().zip(pat_chars.rev()).all(|(c1, c2)| c1 == &c2) + } + + fn starts_with(&self, pat: &str) -> bool { + let pat_chars = pat.chars(); + let self_len = self.len(); + + // If pattern is longer than self, return false + if pat_chars.clone().count() > self_len { + return false; + } + + // Compare from the front + self.iter().zip(pat_chars).all(|(c1, c2)| c1 == &c2) + } +} + impl TkVecUtils for Vec { fn get_span(&self) -> Option { if let Some(first_tk) = self.first() { diff --git a/src/parse/execute.rs b/src/parse/execute.rs index 513b1c2..d4feab1 100644 --- a/src/parse/execute.rs +++ b/src/parse/execute.rs @@ -18,13 +18,13 @@ pub struct ExecArgs { } impl ExecArgs { - pub fn new(argv: Vec) -> Self { + pub fn new(argv: Vec) -> ShResult { assert!(!argv.is_empty()); - let argv = prepare_argv(argv); + let argv = prepare_argv(argv)?; let cmd = Self::get_cmd(&argv); let argv = Self::get_argv(argv); let envp = Self::get_envp(); - Self { cmd, argv, envp } + Ok(Self { cmd, argv, envp }) } pub fn get_cmd(argv: &[(String,Span)]) -> (CString,Span) { (CString::new(argv[0].0.as_str()).unwrap(),argv[0].1.clone()) @@ -197,7 +197,7 @@ impl Dispatcher { self.io_stack.append_to_frame(case_stmt.redirs); flog!(DEBUG,pattern.span.as_str()); - let exp_pattern = pattern.clone().expand(pattern.span.clone(), pattern.flags.clone()); + let exp_pattern = pattern.clone().expand(pattern.span.clone(), pattern.flags.clone())?; let pattern_raw = exp_pattern .get_words() .first() @@ -396,7 +396,7 @@ impl Dispatcher { self.io_stack.append_to_frame(cmd.redirs); - let exec_args = ExecArgs::new(argv); + let exec_args = ExecArgs::new(argv)?; let io_frame = self.io_stack.pop_frame(); run_fork( io_frame, @@ -453,18 +453,18 @@ impl Dispatcher { } } -pub fn prepare_argv(argv: Vec) -> Vec<(String,Span)> { +pub fn prepare_argv(argv: Vec) -> ShResult> { let mut args = vec![]; for arg in argv { let flags = arg.flags; let span = arg.span.clone(); - let expanded = arg.expand(span.clone(), flags); + let expanded = arg.expand(span.clone(), flags)?; for exp in expanded.get_words() { args.push((exp,span.clone())) } } - args + Ok(args) } pub fn run_fork<'t,C,P>( diff --git a/src/parse/lex.rs b/src/parse/lex.rs index aac8ebe..3fb052a 100644 --- a/src/parse/lex.rs +++ b/src/parse/lex.rs @@ -1,8 +1,8 @@ -use std::{fmt::Display, ops::{Bound, Deref, Range, RangeBounds}, str::Chars}; +use std::{collections::VecDeque, fmt::Display, iter::Peekable, ops::{Bound, Deref, Range, RangeBounds}, str::Chars}; use bitflags::bitflags; -use crate::{builtin::BUILTINS, libsh::error::{ShErr, ShErrKind, ShResult}, prelude::*}; +use crate::{builtin::BUILTINS, libsh::{error::{ShErr, ShErrKind, ShResult}, utils::CharDequeUtils}, prelude::*}; pub const KEYWORDS: [&'static str;14] = [ "if", @@ -133,13 +133,14 @@ impl Display for Tk { bitflags! { #[derive(Debug,Clone,Copy,PartialEq,Default)] pub struct TkFlags: u32 { - const KEYWORD = 0b0000000000000001; - /// This is a keyword that opens a new block statement, like 'if' and 'while' - const OPENER = 0b0000000000000010; - const IS_CMD = 0b0000000000000100; - const IS_OP = 0b0000000000001000; - const ASSIGN = 0b0000000000010000; - const BUILTIN = 0b0000000000100000; + const KEYWORD = 0b0000000000000001; + /// This is a keyword that opens a new block statement, like 'if' and 'while' + const OPENER = 0b0000000000000010; + const IS_CMD = 0b0000000000000100; + const IS_SUBSH = 0b0000000000001000; + const IS_OP = 0b0000000000010000; + const ASSIGN = 0b0000000000100000; + const BUILTIN = 0b0000000001000000; } } @@ -309,7 +310,7 @@ impl LexStream { assert!(self.cursor <= self.source.len()); let slice = self.slice_from_cursor().unwrap().to_string(); let mut pos = self.cursor; - let mut chars = slice.chars(); + let mut chars = slice.chars().peekable(); let mut quote_pos = None; if let Some(count) = case_pat_lookahead(chars.clone()) { @@ -331,10 +332,89 @@ impl LexStream { } '\\' => { pos += 1; - if chars.next().is_some() { - pos += 1; + if let Some(ch) = chars.next() { + pos += ch.len_utf8(); } } + '$' if chars.peek() == Some(&'(') => { + pos += 2; + chars.next(); + let mut paren_stack = vec!['(']; + let paren_pos = pos; + while let Some(ch) = chars.next() { + match ch { + '\\' => { + pos += 1; + if let Some(next_ch) = chars.next() { + pos += next_ch.len_utf8(); + } + } + '(' => { + pos += 1; + paren_stack.push(ch); + } + ')' => { + pos += 1; + paren_stack.pop(); + if paren_stack.is_empty() { + break + } + } + _ => pos += ch.len_utf8() + } + } + if !paren_stack.is_empty() { + return Err( + ShErr::full( + ShErrKind::ParseErr, + "Unclosed subshell", + Span::new(paren_pos..paren_pos + 1, self.source.clone()) + ) + ) + } + } + '(' if self.next_is_cmd() => { + let mut paren_stack = vec!['(']; + let paren_pos = pos; + while let Some(ch) = chars.next() { + pos += ch.len_utf8(); + match ch { + '\\' => { + if let Some(next_ch) = chars.next() { + pos += next_ch.len_utf8(); + } + } + '(' => { + pos += 1; + paren_stack.push(ch); + } + ')' => { + pos += 1; + paren_stack.pop(); + if paren_stack.is_empty() { + break + } + } + _ => continue + } + } + if !paren_stack.is_empty() { + return Err( + ShErr::full( + ShErrKind::ParseErr, + "Unclosed subshell", + Span::new(paren_pos..paren_pos + 1, self.source.clone()) + ) + ) + } + let mut subsh_tk = self.get_token(self.cursor..pos, TkRule::Str); + subsh_tk.flags |= TkFlags::IS_CMD; + subsh_tk.flags |= TkFlags::IS_SUBSH; + self.cursor = pos; + self.set_next_is_cmd(true); + flog!(DEBUG, subsh_tk); + return Ok(subsh_tk) + } '{' if pos == self.cursor && self.next_is_cmd() => { pos += 1; let mut tk = self.get_token(self.cursor..pos, TkRule::BraceGrpStart); @@ -384,6 +464,7 @@ impl LexStream { } } let mut new_tk = self.get_token(self.cursor..pos, TkRule::Str); + flog!(DEBUG,new_tk); if self.in_quote && !self.flags.contains(LexFlags::LEX_UNFINISHED) { return Err( ShErr::full( @@ -428,6 +509,7 @@ impl LexStream { } } self.cursor = pos; + flog!(DEBUG, self.slice_from_cursor()); Ok(new_tk) } pub fn get_token(&self, range: Range, class: TkRule) -> Tk { @@ -595,7 +677,23 @@ pub fn is_keyword(slice: &str) -> bool { (slice.ends_with("()") && !slice.ends_with("\\()")) } -pub fn case_pat_lookahead(mut chars: Chars) -> Option { +pub fn lookahead(pat: &str, mut chars: Chars) -> Option { + let mut pos = 0; + let mut char_deque = VecDeque::new(); + while let Some(ch) = chars.next() { + char_deque.push_back(ch); + if char_deque.len() > pat.len() { + char_deque.pop_front(); + } + if char_deque.starts_with(pat) { + return Some(pos) + } + pos += 1; + } + None +} + +pub fn case_pat_lookahead(mut chars: Peekable) -> Option { let mut pos = 0; while let Some(ch) = chars.next() { pos += 1; diff --git a/src/parse/mod.rs b/src/parse/mod.rs index d38e4b5..896f109 100644 --- a/src/parse/mod.rs +++ b/src/parse/mod.rs @@ -42,6 +42,7 @@ impl ParsedSrc { for token in LexStream::new(self.src.clone(), LexFlags::empty()) { tokens.push(token?); } + flog!(DEBUG,tokens); let mut nodes = vec![]; for result in ParseStream::new(tokens) { diff --git a/src/procio.rs b/src/procio.rs index 7fc0123..9251239 100644 --- a/src/procio.rs +++ b/src/procio.rs @@ -1,6 +1,6 @@ use std::{fmt::Debug, ops::{Deref, DerefMut}}; -use crate::{libsh::{error::ShResult, utils::RedirVecUtils}, parse::{Redir, RedirType}, prelude::*}; +use crate::{libsh::{error::{ShErr, ShErrKind, ShResult}, utils::RedirVecUtils}, parse::{Redir, RedirType}, prelude::*}; // Credit to fish-shell for many of the implementation ideas present in this module // https://fishshell.com/ @@ -10,6 +10,7 @@ pub enum IoMode { Fd { tgt_fd: RawFd, src_fd: Rc }, File { tgt_fd: RawFd, file: Rc }, Pipe { tgt_fd: RawFd, pipe: Rc }, + Buffer { buf: String, pipe: Rc } } impl IoMode { @@ -29,14 +30,16 @@ impl IoMode { match self { IoMode::Fd { tgt_fd, src_fd: _ } | IoMode::File { tgt_fd, file: _ } | - IoMode::Pipe { tgt_fd, pipe: _ } => *tgt_fd + IoMode::Pipe { tgt_fd, pipe: _ } => *tgt_fd, + _ => panic!() } } pub fn src_fd(&self) -> RawFd { match self { IoMode::Fd { tgt_fd: _, src_fd } => src_fd.as_raw_fd(), IoMode::File { tgt_fd: _, file } => file.as_raw_fd(), - IoMode::Pipe { tgt_fd: _, pipe } => pipe.as_raw_fd() + IoMode::Pipe { tgt_fd: _, pipe } => pipe.as_raw_fd(), + _ => panic!() } } pub fn get_pipes() -> (Self,Self) { @@ -55,6 +58,50 @@ impl Read for IoMode { } } +pub struct IoBuf { + buf: Vec, + reader: R, +} + +impl IoBuf { + pub fn new(reader: R) -> Self { + Self { + buf: Vec::new(), + reader, + } + } + + /// Reads exactly `size` bytes (or fewer if EOF) into the buffer + pub fn read_buffer(&mut self, size: usize) -> io::Result<()> { + let mut temp_buf = vec![0; size]; // Temporary buffer + let bytes_read = self.reader.read(&mut temp_buf)?; + self.buf.extend_from_slice(&temp_buf[..bytes_read]); // Append only what was read + Ok(()) + } + + /// Continuously reads until EOF + pub fn fill_buffer(&mut self) -> io::Result<()> { + let mut temp_buf = vec![0; 1024]; // Read in chunks + loop { + flog!(DEBUG, "reading bytes"); + let bytes_read = self.reader.read(&mut temp_buf)?; + flog!(DEBUG, bytes_read); + if bytes_read == 0 { + break; // EOF reached + } + self.buf.extend_from_slice(&temp_buf[..bytes_read]); + } + Ok(()) + } + + /// Get current buffer contents as a string (if valid UTF-8) + pub fn as_str(&self) -> ShResult<&str> { + std::str::from_utf8(&self.buf).map_err(|_| { + ShErr::simple(ShErrKind::InternalErr, "Invalid utf-8 in IoBuf") + }) + } +} + /// A struct wrapping three fildescs representing `stdin`, `stdout`, and `stderr` respectively #[derive(Debug,Clone)] pub struct IoGroup(RawFd,RawFd,RawFd); @@ -74,6 +121,9 @@ impl<'e> IoFrame { pub fn from_redirs(redirs: Vec) -> Self { Self { redirs, saved_io: None } } + pub fn from_redir(redir: Redir) -> Self { + Self { redirs: vec![redir], saved_io: None } + } /// Splits the frame into two frames ///