From ec179a04beb9c8a79b824541d0b6e76662d7f2ba Mon Sep 17 00:00:00 2001 From: Kyler Clay Date: Tue, 13 May 2025 00:48:19 -0400 Subject: [PATCH] Added regex to [[ ]] tests --- src/builtin/mod.rs | 3 +- src/builtin/test.rs | 328 +++++++++++++++++++++++++++++++++++++++++++ src/expand.rs | 19 ++- src/parse/execute.rs | 15 +- src/parse/lex.rs | 4 +- src/parse/mod.rs | 197 ++++++++++++++++++++++++++ 6 files changed, 557 insertions(+), 9 deletions(-) create mode 100644 src/builtin/test.rs diff --git a/src/builtin/mod.rs b/src/builtin/mod.rs index d90d16a..f4061a2 100644 --- a/src/builtin/mod.rs +++ b/src/builtin/mod.rs @@ -13,6 +13,7 @@ pub mod alias; pub mod flowctl; pub mod zoltraak; pub mod shopt; +pub mod test; // [[ ]] thing pub const BUILTINS: [&str;19] = [ "echo", @@ -33,7 +34,7 @@ pub const BUILTINS: [&str;19] = [ "zoltraak", "shopt", "builtin", - "command" + "command", ]; /// Sets up a builtin command diff --git a/src/builtin/test.rs b/src/builtin/test.rs new file mode 100644 index 0000000..8d2407b --- /dev/null +++ b/src/builtin/test.rs @@ -0,0 +1,328 @@ +use std::{fs::metadata, path::PathBuf, str::FromStr}; + +use nix::{sys::stat::{self, SFlag}, unistd::AccessFlags}; +use regex::Regex; + +use crate::{libsh::error::{ShErr, ShErrKind, ShResult},prelude::*, parse::{ConjunctOp, NdRule, Node, TestCase, TEST_UNARY_OPS}}; + +#[derive(Debug, Clone)] +pub enum UnaryOp { + Exists, // -e + Directory, // -d + File, // -f + Symlink, // -h or -L + Readable, // -r + Writable, // -w + Executable, // -x + NonEmpty, // -s + NamedPipe, // -p + Socket, // -S + BlockSpecial, // -b + CharSpecial, // -c + Sticky, // -k + UIDOwner, // -O + GIDOwner, // -G + ModifiedSinceStatusChange, // -N + SetUID, // -u + SetGID, // -g + Terminal, // -t + NonNull, // -n + Null, // -z +} + +impl FromStr for UnaryOp { + type Err = ShErr; + fn from_str(s: &str) -> Result { + match s { + "-e" => Ok(Self::Exists), + "-d" => Ok(Self::Directory), + "-f" => Ok(Self::File), + "-h" | "-L" => Ok(Self::Symlink), // -h or -L + "-r" => Ok(Self::Readable), + "-w" => Ok(Self::Writable), + "-x" => Ok(Self::Executable), + "-s" => Ok(Self::NonEmpty), + "-p" => Ok(Self::NamedPipe), + "-S" => Ok(Self::Socket), + "-b" => Ok(Self::BlockSpecial), + "-c" => Ok(Self::CharSpecial), + "-k" => Ok(Self::Sticky), + "-O" => Ok(Self::UIDOwner), + "-G" => Ok(Self::GIDOwner), + "-N" => Ok(Self::ModifiedSinceStatusChange), + "-u" => Ok(Self::SetUID), + "-g" => Ok(Self::SetGID), + "-t" => Ok(Self::Terminal), + "-n" => Ok(Self::NonNull), + "-z" => Ok(Self::Null), + _ => Err(ShErr::Simple { kind: ShErrKind::SyntaxErr, msg: "Invalid test operator".into(), notes: vec![] }) + } + } +} + +#[derive(Debug, Clone)] +pub enum TestOp { + Unary(UnaryOp), + StringEq, // == + StringNeq, // != + IntEq, // -eq + IntNeq, // -ne + IntGt, // -gt + IntLt, // -lt + IntGe, // -ge + IntLe, // -le + RegexMatch, // =~ +} + +impl FromStr for TestOp { + type Err = ShErr; + fn from_str(s: &str) -> Result { + match s { + "==" => Ok(Self::StringEq), + "!=" => Ok(Self::StringNeq), + "=~" => Ok(Self::RegexMatch), + "-eq" => Ok(Self::IntEq), + "-ne" => Ok(Self::IntNeq), + "-gt" => Ok(Self::IntGt), + "-lt" => Ok(Self::IntLt), + "-ge" => Ok(Self::IntGe), + "-le" => Ok(Self::IntLe), + _ if TEST_UNARY_OPS.contains(&s) => { + Ok(Self::Unary(s.parse::()?)) + } + _ => Err(ShErr::Simple { kind: ShErrKind::SyntaxErr, msg: "Invalid test operator".into(), notes: vec![] }) + } + } +} + +fn replace_posix_classes(pat: &str) -> String { + pat.replace("[[:alnum:]]", r"[A-Za-z0-9]") + .replace("[[:alpha:]]", r"[A-Za-z]") + .replace("[[:blank:]]", r"[ \t]") + .replace("[[:cntrl:]]", r"[\x00-\x1F\x7F]") + .replace("[[:digit:]]", r"[0-9]") + .replace("[[:graph:]]", r"[!-~]") + .replace("[[:lower:]]", r"[a-z]") + .replace("[[:print:]]", r"[\x20-\x7E]") + .replace("[[:space:]]", r"[ \t\r\n\x0B\x0C]") // vertical tab (\x0B), form feed (\x0C) + .replace("[[:upper:]]", r"[A-Z]") + .replace("[[:xdigit:]]", r"[0-9A-Fa-f]") +} + +pub fn double_bracket_test(node: Node) -> ShResult { + let err_span = node.get_span(); + let NdRule::Test { cases } = node.class else { + unreachable!() + }; + let mut last_result = false; + let mut conjunct_op: Option; + + for case in cases { + let result = match case { + TestCase::Unary { operator, operand, conjunct } => { + let operand = operand.expand()?.get_words().join(" "); + conjunct_op = conjunct; + let TestOp::Unary(op) = TestOp::from_str(operator.as_str())? else { + return Err( + ShErr::Full { kind: ShErrKind::SyntaxErr, msg: "Invalid unary operator".into(), notes: vec![], span: err_span } + ) + }; + match op { + UnaryOp::Exists => { + let path = PathBuf::from(operand.as_str()); + path.exists() + } + UnaryOp::Directory => { + let path = PathBuf::from(operand.as_str()); + if path.exists() { + path.metadata() + .unwrap() + .is_dir() + } else { + false + } + } + UnaryOp::File => { + let path = PathBuf::from(operand.as_str()); + if path.exists() { + path.metadata() + .unwrap() + .is_file() + } else { + false + } + } + UnaryOp::Symlink => { + let path = PathBuf::from(operand.as_str()); + if path.exists() { + path.metadata() + .unwrap() + .file_type() + .is_symlink() + } else { + false + } + } + UnaryOp::Readable => nix::unistd::access(operand.as_str(), AccessFlags::R_OK).is_ok(), + UnaryOp::Writable => nix::unistd::access(operand.as_str(), AccessFlags::W_OK).is_ok(), + UnaryOp::Executable => nix::unistd::access(operand.as_str(), AccessFlags::X_OK).is_ok(), + UnaryOp::NonEmpty => { + match metadata(operand.as_str()) { + Ok(meta) => meta.len() > 0, + Err(_) => false + } + } + UnaryOp::NamedPipe => { + match stat::stat(operand.as_str()) { + Ok(stat) => SFlag::from_bits_truncate(stat.st_mode).contains(SFlag::S_IFIFO), + Err(_) => false, + } + } + UnaryOp::Socket => { + match stat::stat(operand.as_str()) { + Ok(stat) => SFlag::from_bits_truncate(stat.st_mode).contains(SFlag::S_IFSOCK), + Err(_) => false, + } + } + UnaryOp::BlockSpecial => { + match stat::stat(operand.as_str()) { + Ok(stat) => SFlag::from_bits_truncate(stat.st_mode).contains(SFlag::S_IFBLK), + Err(_) => false, + } + } + UnaryOp::CharSpecial => { + match stat::stat(operand.as_str()) { + Ok(stat) => SFlag::from_bits_truncate(stat.st_mode).contains(SFlag::S_IFCHR), + Err(_) => false, + } + } + UnaryOp::Sticky => { + match stat::stat(operand.as_str()) { + Ok(stat) => stat.st_mode & nix::libc::S_ISVTX != 0, + Err(_) => false, + } + } + UnaryOp::UIDOwner => { + match stat::stat(operand.as_str()) { + Ok(stat) => stat.st_uid == nix::unistd::geteuid().as_raw(), + Err(_) => false, + } + } + + UnaryOp::GIDOwner => { + match stat::stat(operand.as_str()) { + Ok(stat) => stat.st_gid == nix::unistd::getegid().as_raw(), + Err(_) => false, + } + } + + UnaryOp::ModifiedSinceStatusChange => { + match stat::stat(operand.as_str()) { + Ok(stat) => stat.st_mtime > stat.st_ctime, + Err(_) => false, + } + } + + UnaryOp::SetUID => { + match stat::stat(operand.as_str()) { + Ok(stat) => stat.st_mode & nix::libc::S_ISUID != 0, + Err(_) => false, + } + } + + UnaryOp::SetGID => { + match stat::stat(operand.as_str()) { + Ok(stat) => stat.st_mode & nix::libc::S_ISGID != 0, + Err(_) => false, + } + } + + UnaryOp::Terminal => { + match operand.as_str().parse::() { + Ok(fd) => unsafe { nix::libc::isatty(fd) == 1 }, + Err(_) => false, + } + } + UnaryOp::NonNull => !operand.is_empty(), + UnaryOp::Null => operand.is_empty(), + } + } + TestCase::Binary { lhs, operator, rhs, conjunct } => { + let lhs = lhs.expand()?.get_words().join(" "); + let rhs = rhs.expand()?.get_words().join(" "); + conjunct_op = conjunct; + let test_op = operator.as_str().parse::()?; + flog!(DEBUG, lhs); + flog!(DEBUG, rhs); + flog!(DEBUG, test_op); + match test_op { + TestOp::Unary(_) => { + return Err( + ShErr::Full { + kind: ShErrKind::SyntaxErr, + msg: "Expected a binary operator in this test call; found a unary operator".into(), + notes: vec![], + span: err_span + } + ) + } + TestOp::StringEq => rhs.trim() == lhs.trim(), + TestOp::StringNeq => rhs.trim() != lhs.trim(), + TestOp::IntNeq | + TestOp::IntGt | + TestOp::IntLt | + TestOp::IntGe | + TestOp::IntLe | + TestOp::IntEq => { + let err = ShErr::Full { + kind: ShErrKind::SyntaxErr, + msg: format!("Expected an integer with '{}' operator", operator.as_str()), + notes: vec![], + span: err_span.clone() + }; + let Ok(lhs) = lhs.trim().parse::() else { + return Err(err) + }; + let Ok(rhs) = rhs.trim().parse::() else { + return Err(err) + }; + match test_op { + TestOp::IntNeq => lhs != rhs, + TestOp::IntGt => lhs > rhs, + TestOp::IntLt => lhs < rhs, + TestOp::IntGe => lhs >= rhs, + TestOp::IntLe => lhs <= rhs, + TestOp::IntEq => lhs == rhs, + _ => unreachable!() + } + } + TestOp::RegexMatch => { + // FIXME: Imagine doing all of this in every single iteration of a loop + let cleaned = replace_posix_classes(&rhs); + let regex = Regex::new(&cleaned).unwrap(); + regex.is_match(&lhs) + } + } + } + }; + flog!(DEBUG, last_result); + + if let Some(op) = conjunct_op { + match op { + ConjunctOp::And if !last_result => { + last_result = result; + break + } + ConjunctOp::Or if last_result => { + last_result = result; + break + } + _ => {} + } + } else { + last_result = result; + } + } + flog!(DEBUG, last_result); + Ok(last_result) +} diff --git a/src/expand.rs b/src/expand.rs index 9251c9c..6ebf9e6 100644 --- a/src/expand.rs +++ b/src/expand.rs @@ -350,7 +350,7 @@ pub enum ParamExp { ReplaceAllMatches(String,String), // //search/replace ReplacePrefix(String,String), // #search/replace ReplaceSuffix(String,String), // %search/replace - VarNamesWithSuffix(String), // !prefix@ || !prefix* + VarNamesWithPrefix(String), // !prefix@ || !prefix* ExpandInnerVar(String), // !var } @@ -369,7 +369,7 @@ impl FromStr for ParamExp { // Handle indirect var expansion: ${!var} if let Some(var) = s.strip_prefix('!') { if var.ends_with('*') || var.ends_with('@') { - return Ok(VarNamesWithSuffix(var.to_string())); + return Ok(VarNamesWithPrefix(var.to_string())); } return Ok(ExpandInnerVar(var.to_string())); } @@ -665,8 +665,19 @@ pub fn perform_param_expansion(raw: &str) -> ShResult { } Ok(value) } - ParamExp::VarNamesWithSuffix(suffix) => todo!(), - ParamExp::ExpandInnerVar(var_name) => todo!(), + ParamExp::VarNamesWithPrefix(prefix) => { + let mut match_vars = vec![]; + for var in vars.vars().keys() { + if var.starts_with(&prefix) { + match_vars.push(var.clone()) + } + } + Ok(match_vars.join(" ")) + } + ParamExp::ExpandInnerVar(var_name) => { + let value = vars.get_var(&var_name); + Ok(vars.get_var(&value)) + } } } else { Ok(vars.get_var(&var_name)) diff --git a/src/parse/execute.rs b/src/parse/execute.rs index 18b2eda..6152e11 100644 --- a/src/parse/execute.rs +++ b/src/parse/execute.rs @@ -1,7 +1,7 @@ use std::collections::{HashSet, VecDeque}; -use crate::{builtin::{alias::{alias, unalias}, cd::cd, echo::echo, export::export, flowctl::flowctl, jobctl::{continue_job, jobs, JobBehavior}, pwd::pwd, shift::shift, shopt::shopt, source::source, zoltraak::zoltraak}, expand::expand_aliases, jobs::{dispatch_job, ChildProc, JobBldr, JobStack}, libsh::{error::{ShErr, ShErrKind, ShResult, ShResultExt}, utils::RedirVecUtils}, prelude::*, procio::{IoFrame, IoMode, IoStack}, state::{self, get_snapshots, read_logic, restore_snapshot, write_logic, write_meta, write_vars, ShFunc, VarTab, LOGIC_TABLE}}; +use crate::{builtin::{alias::{alias, unalias}, cd::cd, echo::echo, export::export, flowctl::flowctl, jobctl::{continue_job, jobs, JobBehavior}, pwd::pwd, shift::shift, shopt::shopt, source::source, test::double_bracket_test, zoltraak::zoltraak}, expand::expand_aliases, jobs::{dispatch_job, ChildProc, JobBldr, JobStack}, libsh::{error::{ShErr, ShErrKind, ShResult, ShResultExt}, utils::RedirVecUtils}, prelude::*, procio::{IoFrame, IoMode, IoStack}, state::{self, get_snapshots, read_logic, restore_snapshot, write_logic, write_meta, write_vars, ShFunc, VarTab, LOGIC_TABLE}}; use super::{lex::{Span, Tk, TkFlags, KEYWORDS}, AssignKind, CaseNode, CondNode, ConjunctNode, ConjunctOp, LoopKind, NdFlags, NdRule, Node, ParsedSrc, Redir, RedirType}; @@ -86,6 +86,7 @@ impl Dispatcher { NdRule::BraceGrp {..} => self.exec_brc_grp(node)?, NdRule::FuncDef {..} => self.exec_func_def(node)?, NdRule::Command {..} => self.dispatch_cmd(node)?, + NdRule::Test {..} => self.exec_test(node)?, _ => unreachable!() } Ok(()) @@ -123,6 +124,14 @@ impl Dispatcher { } Ok(()) } + pub fn exec_test(&mut self, node: Node) -> ShResult<()> { + let test_result = double_bracket_test(node)?; + match test_result { + true => state::set_status(0), + false => state::set_status(1), + } + Ok(()) + } pub fn exec_func_def(&mut self, func_def: Node) -> ShResult<()> { let blame = func_def.get_span(); let NdRule::FuncDef { name, body } = func_def.class else { @@ -159,7 +168,7 @@ impl Dispatcher { unreachable!() }; - self.set_assignments(assignments, AssignBehavior::Export); + self.set_assignments(assignments, AssignBehavior::Export)?; self.io_stack.append_to_frame(subsh.redirs); let mut argv = prepare_argv(argv)?; @@ -181,7 +190,7 @@ impl Dispatcher { unreachable!() }; - self.set_assignments(assignments, AssignBehavior::Export); + self.set_assignments(assignments, AssignBehavior::Export)?; self.io_stack.append_to_frame(func.redirs); diff --git a/src/parse/lex.rs b/src/parse/lex.rs index 143bda3..0d85b51 100644 --- a/src/parse/lex.rs +++ b/src/parse/lex.rs @@ -4,7 +4,7 @@ use bitflags::bitflags; use crate::{builtin::BUILTINS, libsh::{error::{ShErr, ShErrKind, ShResult}, utils::CharDequeUtils}, prelude::*}; -pub const KEYWORDS: [&str;14] = [ +pub const KEYWORDS: [&str;16] = [ "if", "then", "elif", @@ -19,6 +19,8 @@ pub const KEYWORDS: [&str;14] = [ "done", "case", "esac", + "[[", + "]]" ]; pub const OPENERS: [&str;6] = [ diff --git a/src/parse/mod.rs b/src/parse/mod.rs index d83ad63..af069a2 100644 --- a/src/parse/mod.rs +++ b/src/parse/mod.rs @@ -9,6 +9,30 @@ use crate::{libsh::{error::{Note, ShErr, ShErrKind, ShResult}, utils::TkVecUtils pub mod lex; pub mod execute; +pub const TEST_UNARY_OPS: [&str; 21] = [ + "-a", + "-b", + "-c", + "-d", + "-e", + "-f", + "-g", + "-h", + "-L", + "-k", + "-p", + "-r", + "-s", + "-S", + "-t", + "-u", + "-w", + "-x", + "-O", + "-G", + "-N", +]; + /// Try to match a specific parsing rule /// /// # Notes @@ -277,6 +301,94 @@ pub enum LoopKind { Until } +#[derive(Clone,Debug)] +pub enum TestCase { + Unary { + operator: Tk, + operand: Tk, + conjunct: Option + }, + Binary { + lhs: Tk, + operator: Tk, + rhs: Tk, + conjunct: Option + } +} + +#[derive(Default,Clone,Debug)] +pub struct TestCaseBuilder { + lhs: Option, + operator: Option, + rhs: Option, + conjunct: Option +} + +impl TestCaseBuilder { + pub fn new() -> Self { + Self::default() + } + pub fn is_empty(&self) -> bool { + self.lhs.is_none() && + self.operator.is_none() && + self.rhs.is_none() && + self.conjunct.is_none() + } + pub fn with_lhs(self, lhs: Tk) -> Self { + let Self { lhs: _, operator, rhs, conjunct } = self; + Self { lhs: Some(lhs), operator, rhs, conjunct } + } + pub fn with_rhs(self, rhs: Tk) -> Self { + let Self { lhs, operator, rhs: _, conjunct } = self; + Self { lhs, operator, rhs: Some(rhs), conjunct } + } + pub fn with_operator(self, operator: Tk) -> Self { + let Self { lhs, operator: _, rhs, conjunct } = self; + Self { lhs, operator: Some(operator), rhs, conjunct } + } + pub fn with_conjunction(self, conjunction: ConjunctOp) -> Self { + let Self { lhs, operator, rhs, conjunct: _ } = self; + Self { lhs, operator, rhs, conjunct: Some(conjunction) } + } + pub fn can_build(&self) -> bool { + self.operator.is_some() && + self.rhs.is_some() + } + pub fn build(self) -> TestCase { + let Self { lhs, operator, rhs, conjunct } = self; + if let Some(lhs) = lhs { + TestCase::Binary { + lhs, + operator: operator.unwrap(), + rhs: rhs.unwrap(), + conjunct + } + } else { + TestCase::Unary { + operator: operator.unwrap(), + operand: rhs.unwrap(), + conjunct + } + } + } + pub fn build_and_take(&mut self) -> TestCase { + if self.lhs.is_some() { + TestCase::Binary { + lhs: self.lhs.take().unwrap(), + operator: self.operator.take().unwrap(), + rhs: self.rhs.take().unwrap(), + conjunct: self.conjunct.take(), + } + } else { + TestCase::Unary { + operator: self.operator.take().unwrap(), + operand: self.rhs.take().unwrap(), + conjunct: self.conjunct.take(), + } + } + } +} + impl FromStr for LoopKind { type Err = ShErr; fn from_str(s: &str) -> Result { @@ -317,6 +429,7 @@ pub enum NdRule { Conjunction { elements: Vec }, Assignment { kind: AssignKind, var: Tk, val: Tk }, BraceGrp { body: Vec }, + Test { cases: Vec }, FuncDef { name: Tk, body: Box } } @@ -423,6 +536,7 @@ impl ParseStream { try_match!(self.parse_loop()?); try_match!(self.parse_for()?); try_match!(self.parse_if()?); + try_match!(self.parse_test()?); if check_pipelines { try_match!(self.parse_pipeln()?); } else { @@ -501,6 +615,88 @@ impl ParseStream { } } } + fn parse_test(&mut self) -> ShResult> { + let mut node_tks: Vec = vec![]; + let mut cases: Vec = vec![]; + flog!(INFO, self.check_keyword("[[")); + if !self.check_keyword("[[") || !self.next_tk_is_some() { + return Ok(None) + } + node_tks.push(self.next_tk().unwrap()); + let mut case_builder = TestCaseBuilder::new(); + while let Some(tk) = self.next_tk() { + flog!(DEBUG, case_builder); + flog!(DEBUG, tk.as_str()); + node_tks.push(tk.clone()); + if tk.as_str() == "]]" { + if case_builder.can_build() { + let case = case_builder.build_and_take(); + cases.push(case); + break + } else if cases.is_empty() { + return Err( + parse_err_full("Malformed test call", &node_tks.get_span().unwrap()) + ) + } else { + break + } + } + if case_builder.is_empty() { + flog!(DEBUG, "case builder is empty"); + match tk.as_str() { + _ if TEST_UNARY_OPS.contains(&tk.as_str()) => case_builder = case_builder.with_operator(tk.clone()), + _ => case_builder = case_builder.with_lhs(tk.clone()) + } + continue + } else if case_builder.operator.is_some() && case_builder.rhs.is_none() { + flog!(DEBUG, "op is some, rhs is none"); + case_builder = case_builder.with_rhs(tk.clone()); + continue + } else if case_builder.lhs.is_some() && case_builder.operator.is_none() { + flog!(DEBUG, "lhs is some, op is none"); + // we got lhs, then rhs → treat it as operator maybe? + case_builder = case_builder.with_operator(tk.clone()); + continue + } else if let TkRule::And | TkRule::Or = tk.class { + flog!(DEBUG, "found conjunction"); + flog!(DEBUG, tk.class); + if case_builder.can_build() { + if case_builder.conjunct.is_some() { + return Err( + parse_err_full("Invalid placement for logical operator in test", &node_tks.get_span().unwrap()) + ) + } + let op = match tk.class { + TkRule::And => ConjunctOp::And, + TkRule::Or => ConjunctOp::Or, + _ => unreachable!() + }; + case_builder = case_builder.with_conjunction(op); + let case = case_builder.build_and_take(); + cases.push(case); + flog!(DEBUG, case_builder); + continue + } else { + return Err( + parse_err_full("Invalid placement for logical operator in test", &node_tks.get_span().unwrap()) + ) + } + } + if case_builder.can_build() { + let case = case_builder.build_and_take(); + cases.push(case); + } + } + self.catch_separator(&mut node_tks); + let node: Node = Node { + class: NdRule::Test { cases }, + flags: NdFlags::empty(), + redirs: vec![], + tokens: node_tks + }; + flog!(DEBUG, node); + Ok(Some(node)) + } fn parse_brc_grp(&mut self, from_func_def: bool) -> ShResult> { let mut node_tks: Vec = vec![]; let mut body: Vec = vec![]; @@ -1364,5 +1560,6 @@ pub fn node_operation(node: &mut Node, filter: &F1, operation: &mut F2) NdRule::FuncDef { name: _, ref mut body } => { check_node(body,filter,operation) } + NdRule::Test { cases: _ } => (), } }