heredocs and herestrings implemented

added more tests to the test suite
This commit is contained in:
2026-03-14 13:40:00 -04:00
parent 1f9c96f24e
commit 5173e1908d
6 changed files with 811 additions and 90 deletions

View File

@@ -40,18 +40,19 @@ impl Tk {
} }
pub struct Expander { pub struct Expander {
flags: TkFlags,
raw: String, raw: String,
} }
impl Expander { impl Expander {
pub fn new(raw: Tk) -> ShResult<Self> { pub fn new(raw: Tk) -> ShResult<Self> {
let raw = raw.span.as_str(); let tk_raw = raw.span.as_str();
Self::from_raw(raw) Self::from_raw(tk_raw, raw.flags)
} }
pub fn from_raw(raw: &str) -> ShResult<Self> { pub fn from_raw(raw: &str, flags: TkFlags) -> ShResult<Self> {
let raw = expand_braces_full(raw)?.join(" "); let raw = expand_braces_full(raw)?.join(" ");
let unescaped = unescape_str(&raw); let unescaped = unescape_str(&raw);
Ok(Self { raw: unescaped }) Ok(Self { raw: unescaped, flags })
} }
pub fn expand(&mut self) -> ShResult<Vec<String>> { pub fn expand(&mut self) -> ShResult<Vec<String>> {
let mut chars = self.raw.chars().peekable(); let mut chars = self.raw.chars().peekable();
@@ -75,8 +76,12 @@ impl Expander {
self.raw.insert_str(0, "./"); self.raw.insert_str(0, "./");
} }
if self.flags.contains(TkFlags::IS_HEREDOC) {
Ok(vec![self.raw.clone()])
} else {
Ok(self.split_words()) Ok(self.split_words())
} }
}
pub fn split_words(&mut self) -> Vec<String> { pub fn split_words(&mut self) -> Vec<String> {
let mut words = vec![]; let mut words = vec![];
let mut chars = self.raw.chars(); let mut chars = self.raw.chars();
@@ -3532,6 +3537,7 @@ mod tests {
let mut exp = Expander { let mut exp = Expander {
raw: "hello world\tfoo".to_string(), raw: "hello world\tfoo".to_string(),
flags: TkFlags::empty()
}; };
let words = exp.split_words(); let words = exp.split_words();
assert_eq!(words, vec!["hello", "world", "foo"]); assert_eq!(words, vec!["hello", "world", "foo"]);
@@ -3546,6 +3552,7 @@ mod tests {
let mut exp = Expander { let mut exp = Expander {
raw: "a:b:c".to_string(), raw: "a:b:c".to_string(),
flags: TkFlags::empty()
}; };
let words = exp.split_words(); let words = exp.split_words();
assert_eq!(words, vec!["a", "b", "c"]); assert_eq!(words, vec!["a", "b", "c"]);
@@ -3560,6 +3567,7 @@ mod tests {
let mut exp = Expander { let mut exp = Expander {
raw: "hello world".to_string(), raw: "hello world".to_string(),
flags: TkFlags::empty()
}; };
let words = exp.split_words(); let words = exp.split_words();
assert_eq!(words, vec!["hello world"]); assert_eq!(words, vec!["hello world"]);
@@ -3570,7 +3578,10 @@ mod tests {
let _guard = TestGuard::new(); let _guard = TestGuard::new();
let raw = format!("{}hello world{}", markers::DUB_QUOTE, markers::DUB_QUOTE); let raw = format!("{}hello world{}", markers::DUB_QUOTE, markers::DUB_QUOTE);
let mut exp = Expander { raw }; let mut exp = Expander {
raw,
flags: TkFlags::empty()
};
let words = exp.split_words(); let words = exp.split_words();
assert_eq!(words, vec!["hello world"]); assert_eq!(words, vec!["hello world"]);
} }
@@ -3582,7 +3593,10 @@ mod tests {
let _guard = TestGuard::new(); let _guard = TestGuard::new();
let raw = format!("hello{}world", unescape_str("\\ ")); let raw = format!("hello{}world", unescape_str("\\ "));
let mut exp = Expander { raw }; let mut exp = Expander {
raw,
flags: TkFlags::empty()
};
let words = exp.split_words(); let words = exp.split_words();
assert_eq!(words, vec!["hello world"]); assert_eq!(words, vec!["hello world"]);
} }
@@ -3592,7 +3606,10 @@ mod tests {
let _guard = TestGuard::new(); let _guard = TestGuard::new();
let raw = format!("hello{}world", unescape_str("\\\t")); let raw = format!("hello{}world", unescape_str("\\\t"));
let mut exp = Expander { raw }; let mut exp = Expander {
raw,
flags: TkFlags::empty()
};
let words = exp.split_words(); let words = exp.split_words();
assert_eq!(words, vec!["hello\tworld"]); assert_eq!(words, vec!["hello\tworld"]);
} }
@@ -3605,7 +3622,10 @@ mod tests {
} }
let raw = format!("a{}b:c", unescape_str("\\:")); let raw = format!("a{}b:c", unescape_str("\\:"));
let mut exp = Expander { raw }; let mut exp = Expander {
raw,
flags: TkFlags::empty()
};
let words = exp.split_words(); let words = exp.split_words();
assert_eq!(words, vec!["a:b", "c"]); assert_eq!(words, vec!["a:b", "c"]);
} }

View File

@@ -267,20 +267,12 @@ bitflags! {
const ASSIGN = 0b0000000001000000; const ASSIGN = 0b0000000001000000;
const BUILTIN = 0b0000000010000000; const BUILTIN = 0b0000000010000000;
const IS_PROCSUB = 0b0000000100000000; const IS_PROCSUB = 0b0000000100000000;
const IS_HEREDOC = 0b0000001000000000;
const LIT_HEREDOC = 0b0000010000000000;
const TAB_HEREDOC = 0b0000100000000000;
} }
} }
pub struct LexStream {
source: Arc<String>,
pub cursor: usize,
pub name: String,
quote_state: QuoteState,
brc_grp_depth: usize,
brc_grp_start: Option<usize>,
case_depth: usize,
flags: LexFlags,
}
bitflags! { bitflags! {
#[derive(Debug, Clone, Copy)] #[derive(Debug, Clone, Copy)]
pub struct LexFlags: u32 { pub struct LexFlags: u32 {
@@ -322,6 +314,19 @@ pub fn clean_input(input: &str) -> String {
output output
} }
pub struct LexStream {
source: Arc<String>,
pub cursor: usize,
pub name: String,
quote_state: QuoteState,
brc_grp_depth: usize,
brc_grp_start: Option<usize>,
case_depth: usize,
heredoc_skip: Option<usize>,
flags: LexFlags,
}
impl LexStream { impl LexStream {
pub fn new(source: Arc<String>, flags: LexFlags) -> Self { pub fn new(source: Arc<String>, flags: LexFlags) -> Self {
let flags = flags | LexFlags::FRESH | LexFlags::NEXT_IS_CMD; let flags = flags | LexFlags::FRESH | LexFlags::NEXT_IS_CMD;
@@ -333,6 +338,7 @@ impl LexStream {
quote_state: QuoteState::default(), quote_state: QuoteState::default(),
brc_grp_depth: 0, brc_grp_depth: 0,
brc_grp_start: None, brc_grp_start: None,
heredoc_skip: None,
case_depth: 0, case_depth: 0,
} }
} }
@@ -393,7 +399,7 @@ impl LexStream {
} }
pub fn read_redir(&mut self) -> Option<ShResult<Tk>> { pub fn read_redir(&mut self) -> Option<ShResult<Tk>> {
assert!(self.cursor <= self.source.len()); assert!(self.cursor <= self.source.len());
let slice = self.slice(self.cursor..)?; let slice = self.slice(self.cursor..)?.to_string();
let mut pos = self.cursor; let mut pos = self.cursor;
let mut chars = slice.chars().peekable(); let mut chars = slice.chars().peekable();
let mut tk = Tk::default(); let mut tk = Tk::default();
@@ -443,14 +449,55 @@ impl LexStream {
} }
pos += 1; pos += 1;
for _ in 0..2 {
if let Some('<') = chars.peek() { if let Some('<') = chars.peek() {
chars.next(); chars.next();
pos += 1; pos += 1;
match chars.peek() {
Some('<') => {
chars.next();
pos += 1;
}
Some(ch) => {
let mut ch = *ch;
while is_field_sep(ch) {
let Some(next_ch) = chars.next() else {
// Incomplete input — fall through to emit << as Redir
break;
};
pos += next_ch.len_utf8();
ch = next_ch;
}
if is_field_sep(ch) {
// Ran out of input while skipping whitespace — fall through
} else { } else {
let saved_cursor = self.cursor;
match self.read_heredoc(pos) {
Ok(Some(heredoc_tk)) => {
// cursor is set to after the delimiter word;
// heredoc_skip is set to after the body
pos = self.cursor;
self.cursor = saved_cursor;
tk = heredoc_tk;
break; break;
} }
Ok(None) => {
// Incomplete heredoc — restore cursor and fall through
self.cursor = saved_cursor;
} }
Err(e) => return Some(Err(e)),
}
}
}
_ => {
// No delimiter yet — input is incomplete
// Fall through to emit the << as a Redir token
}
}
}
tk = self.get_token(self.cursor..pos, TkRule::Redir); tk = self.get_token(self.cursor..pos, TkRule::Redir);
break; break;
} }
@@ -474,6 +521,130 @@ impl LexStream {
self.cursor = pos; self.cursor = pos;
Some(Ok(tk)) Some(Ok(tk))
} }
pub fn read_heredoc(&mut self, mut pos: usize) -> ShResult<Option<Tk>> {
let slice = self.slice(pos..).unwrap_or_default().to_string();
let mut chars = slice.chars();
let mut delim = String::new();
let mut flags = TkFlags::empty();
let mut first_char = true;
// Parse the delimiter word, stripping quotes
while let Some(ch) = chars.next() {
match ch {
'-' if first_char => {
pos += 1;
flags |= TkFlags::TAB_HEREDOC;
}
'\"' => {
pos += 1;
self.quote_state.toggle_double();
flags |= TkFlags::LIT_HEREDOC;
}
'\'' => {
pos += 1;
self.quote_state.toggle_single();
flags |= TkFlags::LIT_HEREDOC;
}
_ if self.quote_state.in_quote() => {
pos += ch.len_utf8();
delim.push(ch);
}
ch if is_hard_sep(ch) => {
break;
}
ch => {
pos += ch.len_utf8();
delim.push(ch);
}
}
first_char = false;
}
// pos is now right after the delimiter word — this is where
// the cursor should return so the rest of the line gets lexed
let cursor_after_delim = pos;
// Re-slice from cursor_after_delim so iterator and pos are in sync
// (the old chars iterator consumed the hard_sep without advancing pos)
let rest = self.slice(cursor_after_delim..).unwrap_or_default().to_string();
let mut chars = rest.chars();
// Scan forward to the newline (or use heredoc_skip from a previous heredoc)
let body_start = if let Some(skip) = self.heredoc_skip {
// A previous heredoc on this line already read its body;
// our body starts where that one ended
let skip_offset = skip - cursor_after_delim;
for _ in 0..skip_offset {
chars.next();
}
skip
} else {
// Skip the rest of the current line to find where the body begins
let mut scan = pos;
let mut found_newline = false;
while let Some(ch) = chars.next() {
scan += ch.len_utf8();
if ch == '\n' {
found_newline = true;
break;
}
}
if !found_newline {
if self.flags.contains(LexFlags::LEX_UNFINISHED) {
return Ok(None);
} else {
return Err(ShErr::at(
ShErrKind::ParseErr,
Span::new(pos..pos, self.source.clone()),
"Heredoc delimiter not found",
));
}
}
scan
};
pos = body_start;
let start = pos;
// Read lines until we find one that matches the delimiter exactly
let mut line = String::new();
let mut line_start = pos;
while let Some(ch) = chars.next() {
pos += ch.len_utf8();
if ch == '\n' {
let trimmed = line.trim_end_matches('\r');
if trimmed == delim {
let mut tk = self.get_token(start..line_start, TkRule::Redir);
tk.flags |= TkFlags::IS_HEREDOC | flags;
self.heredoc_skip = Some(pos);
self.cursor = cursor_after_delim;
return Ok(Some(tk));
}
line.clear();
line_start = pos;
} else {
line.push(ch);
}
}
// Check the last line (no trailing newline)
let trimmed = line.trim_end_matches('\r');
if trimmed == delim {
let mut tk = self.get_token(start..line_start, TkRule::Redir);
tk.flags |= TkFlags::IS_HEREDOC | flags;
self.heredoc_skip = Some(pos);
self.cursor = cursor_after_delim;
return Ok(Some(tk));
}
if !self.flags.contains(LexFlags::LEX_UNFINISHED) {
Err(ShErr::at(
ShErrKind::ParseErr,
Span::new(start..pos, self.source.clone()),
format!("Heredoc delimiter '{}' not found", delim),
))
} else {
Ok(None)
}
}
pub fn read_string(&mut self) -> ShResult<Tk> { pub fn read_string(&mut self) -> ShResult<Tk> {
assert!(self.cursor <= self.source.len()); assert!(self.cursor <= self.source.len());
let slice = self.slice_from_cursor().unwrap().to_string(); let slice = self.slice_from_cursor().unwrap().to_string();
@@ -871,10 +1042,19 @@ impl Iterator for LexStream {
let token = match get_char(&self.source, self.cursor).unwrap() { let token = match get_char(&self.source, self.cursor).unwrap() {
'\r' | '\n' | ';' => { '\r' | '\n' | ';' => {
let ch = get_char(&self.source, self.cursor).unwrap();
let ch_idx = self.cursor; let ch_idx = self.cursor;
self.cursor += 1; self.cursor += 1;
self.set_next_is_cmd(true); self.set_next_is_cmd(true);
// If a heredoc was parsed on this line, skip past the body
// Only on newline — ';' is a command separator within the same line
if ch == '\n' || ch == '\r' {
if let Some(skip) = self.heredoc_skip.take() {
self.cursor = skip;
}
}
while let Some(ch) = get_char(&self.source, self.cursor) { while let Some(ch) = get_char(&self.source, self.cursor) {
match ch { match ch {
'\\' if get_char(&self.source, self.cursor + 1) == Some('\n') => { '\\' if get_char(&self.source, self.cursor + 1) == Some('\n') => {

View File

@@ -341,7 +341,7 @@ impl RedirBldr {
} }
impl FromStr for RedirBldr { impl FromStr for RedirBldr {
type Err = (); type Err = ShErr;
fn from_str(s: &str) -> Result<Self, Self::Err> { fn from_str(s: &str) -> Result<Self, Self::Err> {
let mut chars = s.chars().peekable(); let mut chars = s.chars().peekable();
let mut src_fd = String::new(); let mut src_fd = String::new();
@@ -381,7 +381,10 @@ impl FromStr for RedirBldr {
} }
} }
if src_fd.is_empty() { if src_fd.is_empty() {
return Err(()); return Err(ShErr::simple(
ShErrKind::ParseErr,
format!("Invalid character '{}' in redirection operator", ch),
));
} }
} }
_ if ch.is_ascii_digit() && tgt_fd.is_empty() => { _ if ch.is_ascii_digit() && tgt_fd.is_empty() => {
@@ -395,7 +398,10 @@ impl FromStr for RedirBldr {
} }
} }
} }
_ => return Err(()), _ => return Err(ShErr::simple(
ShErrKind::ParseErr,
format!("Invalid character '{}' in redirection operator", ch),
)),
} }
} }
@@ -415,6 +421,59 @@ impl FromStr for RedirBldr {
} }
} }
impl TryFrom<Tk> for RedirBldr {
type Error = ShErr;
fn try_from(tk: Tk) -> Result<Self, Self::Error> {
if tk.flags.contains(TkFlags::IS_HEREDOC) {
let flags = tk.flags;
let mut heredoc_body = if flags.contains(TkFlags::LIT_HEREDOC) {
tk.as_str().to_string()
} else {
tk.expand()?.get_words().first().map(|s| s.as_str()).unwrap_or_default().to_string()
};
if flags.contains(TkFlags::TAB_HEREDOC) {
let lines = heredoc_body.lines();
let mut min_tabs = usize::MAX;
for line in lines {
if line.is_empty() { continue; }
let line_len = line.len();
let after_strip = line.trim_start_matches('\t').len();
let delta = line_len - after_strip;
min_tabs = min_tabs.min(delta);
}
if min_tabs == usize::MAX {
// let's avoid possibly allocating a string with 18 quintillion tabs
min_tabs = 0;
}
if min_tabs > 0 {
let stripped = heredoc_body.lines()
.fold(vec![], |mut acc, ln| {
if ln.is_empty() {
acc.push("");
return acc;
}
let stripped_ln = ln.strip_prefix(&"\t".repeat(min_tabs)).unwrap();
acc.push(stripped_ln);
acc
})
.join("\n");
heredoc_body = stripped + "\n";
}
}
Ok(RedirBldr {
io_mode: Some(IoMode::loaded_pipe(0, heredoc_body.as_bytes())?),
class: Some(RedirType::HereDoc),
tgt_fd: Some(0)
})
} else {
Self::from_str(tk.as_str())
}
}
}
#[derive(PartialEq, Clone, Copy, Debug)] #[derive(PartialEq, Clone, Copy, Debug)]
pub enum RedirType { pub enum RedirType {
Null, // Default Null, // Default
@@ -424,6 +483,7 @@ pub enum RedirType {
Output, // > Output, // >
Append, // >> Append, // >>
HereDoc, // << HereDoc, // <<
IndentHereDoc, // <<-, strips leading tabs
HereString, // <<< HereString, // <<<
} }
@@ -1038,36 +1098,65 @@ impl ParseStream {
}; };
Ok(Some(node)) Ok(Some(node))
} }
fn build_redir<F: FnMut() -> Option<Tk>>(
redir_tk: &Tk,
mut next: F,
node_tks: &mut Vec<Tk>,
context: LabelCtx,
) -> ShResult<Redir> {
let redir_bldr = RedirBldr::try_from(redir_tk.clone()).unwrap();
let next_tk = if redir_bldr.io_mode.is_none() { next() } else { None };
if redir_bldr.io_mode.is_some() {
return Ok(redir_bldr.build());
}
let Some(redir_type) = redir_bldr.class else {
return Err(parse_err_full(
"Malformed redirection operator",
&redir_tk.span,
context.clone(),
));
};
match redir_type {
RedirType::HereString => {
if next_tk.as_ref().is_none_or(|tk| tk.class == TkRule::EOI) {
return Err(ShErr::at(
ShErrKind::ParseErr,
next_tk.unwrap_or(redir_tk.clone()).span.clone(),
"Expected a string after this redirection",
));
}
let mut string = next_tk
.unwrap()
.expand()?
.get_words()
.join(" ");
string.push('\n');
let io_mode = IoMode::loaded_pipe(redir_bldr.tgt_fd.unwrap_or(0), string.as_bytes())?;
Ok(redir_bldr.with_io_mode(io_mode).build())
}
_ => {
if next_tk.as_ref().is_none_or(|tk| tk.class == TkRule::EOI) {
return Err(ShErr::at(
ShErrKind::ParseErr,
redir_tk.span.clone(),
"Expected a filename after this redirection",
));
}
let path_tk = next_tk.unwrap();
node_tks.push(path_tk.clone());
let pathbuf = PathBuf::from(path_tk.span.as_str());
let io_mode = IoMode::file(redir_bldr.tgt_fd.unwrap(), pathbuf, redir_type);
Ok(redir_bldr.with_io_mode(io_mode).build())
}
}
}
fn parse_redir(&mut self, redirs: &mut Vec<Redir>, node_tks: &mut Vec<Tk>) -> ShResult<()> { fn parse_redir(&mut self, redirs: &mut Vec<Redir>, node_tks: &mut Vec<Tk>) -> ShResult<()> {
while self.check_redir() { while self.check_redir() {
let tk = self.next_tk().unwrap(); let tk = self.next_tk().unwrap();
node_tks.push(tk.clone()); node_tks.push(tk.clone());
let redir_bldr = tk.span.as_str().parse::<RedirBldr>().unwrap(); let ctx = self.context.clone();
if redir_bldr.io_mode.is_none() { let redir = Self::build_redir(&tk, || self.next_tk(), node_tks, ctx)?;
let path_tk = self.next_tk();
if path_tk.clone().is_none_or(|tk| tk.class == TkRule::EOI) {
return Err(ShErr::at(
ShErrKind::ParseErr,
tk.span.clone(),
"Expected a filename after this redirection",
));
};
let path_tk = path_tk.unwrap();
node_tks.push(path_tk.clone());
let redir_class = redir_bldr.class.unwrap();
let pathbuf = PathBuf::from(path_tk.span.as_str());
let io_mode = IoMode::file(redir_bldr.tgt_fd.unwrap(), pathbuf, redir_class);
let redir_bldr = redir_bldr.with_io_mode(io_mode);
let redir = redir_bldr.build();
redirs.push(redir); redirs.push(redir);
} else {
// io_mode is already set (e.g., for fd redirections like 2>&1)
let redir = redir_bldr.build();
redirs.push(redir);
}
} }
Ok(()) Ok(())
} }
@@ -1631,33 +1720,9 @@ impl ParseStream {
} }
TkRule::Redir => { TkRule::Redir => {
node_tks.push(tk.clone()); node_tks.push(tk.clone());
let redir_bldr = tk.span.as_str().parse::<RedirBldr>().unwrap(); let ctx = self.context.clone();
if redir_bldr.io_mode.is_none() { let redir = Self::build_redir(tk, || tk_iter.next().cloned(), &mut node_tks, ctx)?;
let path_tk = tk_iter.next();
if path_tk.is_none_or(|tk| tk.class == TkRule::EOI) {
self.panic_mode(&mut node_tks);
return Err(ShErr::at(
ShErrKind::ParseErr,
tk.span.clone(),
"Expected a filename after this redirection",
));
};
let path_tk = path_tk.unwrap();
node_tks.push(path_tk.clone());
let redir_class = redir_bldr.class.unwrap();
let pathbuf = PathBuf::from(path_tk.span.as_str());
let io_mode = IoMode::file(redir_bldr.tgt_fd.unwrap(), pathbuf, redir_class);
let redir_bldr = redir_bldr.with_io_mode(io_mode);
let redir = redir_bldr.build();
redirs.push(redir); redirs.push(redir);
} else {
// io_mode is already set (e.g., for fd redirections like 2>&1)
let redir = redir_bldr.build();
redirs.push(redir);
}
} }
_ => unimplemented!("Unexpected token rule `{:?}` in parse_cmd()", tk.class), _ => unimplemented!("Unexpected token rule `{:?}` in parse_cmd()", tk.class),
} }
@@ -1822,7 +1887,7 @@ pub fn get_redir_file<P: AsRef<Path>>(class: RedirType, path: P) -> ShResult<Fil
.truncate(true) .truncate(true)
.open(path), .open(path),
RedirType::Append => OpenOptions::new().create(true).append(true).open(path), RedirType::Append => OpenOptions::new().create(true).append(true).open(path),
_ => unimplemented!(), _ => unimplemented!("Unimplemented redir type: {:?}", class),
}; };
Ok(result?) Ok(result?)
} }
@@ -2594,4 +2659,247 @@ pub mod tests {
let input = "{ echo bar case foo in bar) echo fizz ;; buzz) echo buzz ;; esac }"; let input = "{ echo bar case foo in bar) echo fizz ;; buzz) echo buzz ;; esac }";
assert!(get_ast(input).is_err()); assert!(get_ast(input).is_err());
} }
// ===================== Heredocs =====================
#[test]
fn parse_basic_heredoc() {
let input = "cat <<EOF\nhello world\nEOF";
let expected = &mut [NdKind::Conjunction, NdKind::Pipeline, NdKind::Command].into_iter();
let ast = get_ast(input).unwrap();
let mut node = ast[0].clone();
if let Err(e) = node.assert_structure(expected) {
panic!("{}", e);
}
}
#[test]
fn parse_heredoc_with_tab_strip() {
let input = "cat <<-EOF\n\t\thello\n\t\tworld\nEOF";
let expected = &mut [NdKind::Conjunction, NdKind::Pipeline, NdKind::Command].into_iter();
let ast = get_ast(input).unwrap();
let mut node = ast[0].clone();
if let Err(e) = node.assert_structure(expected) {
panic!("{}", e);
}
}
#[test]
fn parse_literal_heredoc() {
let input = "cat <<'EOF'\nhello $world\nEOF";
let expected = &mut [NdKind::Conjunction, NdKind::Pipeline, NdKind::Command].into_iter();
let ast = get_ast(input).unwrap();
let mut node = ast[0].clone();
if let Err(e) = node.assert_structure(expected) {
panic!("{}", e);
}
}
#[test]
fn parse_herestring() {
let input = "cat <<< \"hello world\"";
let expected = &mut [NdKind::Conjunction, NdKind::Pipeline, NdKind::Command].into_iter();
let ast = get_ast(input).unwrap();
let mut node = ast[0].clone();
if let Err(e) = node.assert_structure(expected) {
panic!("{}", e);
}
}
#[test]
fn parse_heredoc_in_pipeline() {
let input = "cat <<EOF | grep hello\nhello world\ngoodbye world\nEOF";
let expected = &mut [
NdKind::Conjunction,
NdKind::Pipeline,
NdKind::Command,
NdKind::Command,
]
.into_iter();
let ast = get_ast(input).unwrap();
let mut node = ast[0].clone();
if let Err(e) = node.assert_structure(expected) {
panic!("{}", e);
}
}
#[test]
fn parse_heredoc_in_conjunction() {
let input = "cat <<EOF && echo done\nhello\nEOF";
let expected = &mut [
NdKind::Conjunction,
NdKind::Pipeline,
NdKind::Command,
NdKind::Pipeline,
NdKind::Command,
]
.into_iter();
let ast = get_ast(input).unwrap();
let mut node = ast[0].clone();
if let Err(e) = node.assert_structure(expected) {
panic!("{}", e);
}
}
#[test]
fn parse_heredoc_double_quoted_delimiter() {
let input = "cat <<\"EOF\"\nhello $world\nEOF";
let expected = &mut [NdKind::Conjunction, NdKind::Pipeline, NdKind::Command].into_iter();
let ast = get_ast(input).unwrap();
let mut node = ast[0].clone();
if let Err(e) = node.assert_structure(expected) {
panic!("{}", e);
}
}
#[test]
fn parse_heredoc_empty_body() {
let input = "cat <<EOF\nEOF";
let expected = &mut [NdKind::Conjunction, NdKind::Pipeline, NdKind::Command].into_iter();
let ast = get_ast(input).unwrap();
let mut node = ast[0].clone();
if let Err(e) = node.assert_structure(expected) {
panic!("{}", e);
}
}
#[test]
fn parse_heredoc_multiword_delimiter() {
// delimiter should only be the first word
let input = "cat <<DELIM\nsome content\nDELIM";
let expected = &mut [NdKind::Conjunction, NdKind::Pipeline, NdKind::Command].into_iter();
let ast = get_ast(input).unwrap();
let mut node = ast[0].clone();
if let Err(e) = node.assert_structure(expected) {
panic!("{}", e);
}
}
#[test]
fn parse_two_heredocs_on_one_line() {
let input = "cat <<A; cat <<B\nfoo\nA\nbar\nB";
let ast = get_ast(input).unwrap();
assert_eq!(ast.len(), 2);
}
// ===================== Heredoc Execution =====================
use crate::testutil::{TestGuard, test_input};
use crate::state::{VarFlags, VarKind, write_vars};
#[test]
fn heredoc_basic_output() {
let guard = TestGuard::new();
test_input("cat <<EOF\nhello world\nEOF".to_string()).unwrap();
let out = guard.read_output();
assert_eq!(out, "hello world\n");
}
#[test]
fn heredoc_multiline_output() {
let guard = TestGuard::new();
test_input("cat <<EOF\nline one\nline two\nline three\nEOF".to_string()).unwrap();
let out = guard.read_output();
assert_eq!(out, "line one\nline two\nline three\n");
}
#[test]
fn heredoc_variable_expansion() {
let guard = TestGuard::new();
write_vars(|v| v.set_var("NAME", VarKind::Str("world".into()), VarFlags::NONE)).unwrap();
test_input("cat <<EOF\nhello $NAME\nEOF".to_string()).unwrap();
let out = guard.read_output();
assert_eq!(out, "hello world\n");
}
#[test]
fn heredoc_literal_no_expansion() {
let guard = TestGuard::new();
write_vars(|v| v.set_var("NAME", VarKind::Str("world".into()), VarFlags::NONE)).unwrap();
test_input("cat <<'EOF'\nhello $NAME\nEOF".to_string()).unwrap();
let out = guard.read_output();
assert_eq!(out, "hello $NAME\n");
}
#[test]
fn heredoc_tab_stripping() {
let guard = TestGuard::new();
test_input("cat <<-EOF\n\t\thello\n\t\tworld\nEOF".to_string()).unwrap();
let out = guard.read_output();
assert_eq!(out, "hello\nworld\n");
}
#[test]
fn heredoc_tab_stripping_uneven() {
let guard = TestGuard::new();
test_input("cat <<-EOF\n\t\t\thello\n\tworld\nEOF".to_string()).unwrap();
let out = guard.read_output();
assert_eq!(out, "\t\thello\nworld\n");
}
#[test]
fn heredoc_empty_body() {
let guard = TestGuard::new();
test_input("cat <<EOF\nEOF".to_string()).unwrap();
let out = guard.read_output();
assert_eq!(out, "");
}
#[test]
fn heredoc_in_pipeline() {
let guard = TestGuard::new();
test_input("cat <<EOF | grep hello\nhello world\ngoodbye world\nEOF".to_string()).unwrap();
let out = guard.read_output();
assert_eq!(out, "hello world\n");
}
#[test]
fn herestring_basic() {
let guard = TestGuard::new();
test_input("cat <<< \"hello world\"".to_string()).unwrap();
let out = guard.read_output();
assert_eq!(out, "hello world\n");
}
#[test]
fn herestring_variable_expansion() {
let guard = TestGuard::new();
write_vars(|v| v.set_var("MSG", VarKind::Str("hi there".into()), VarFlags::NONE)).unwrap();
test_input("cat <<< $MSG".to_string()).unwrap();
let out = guard.read_output();
assert_eq!(out, "hi there\n");
}
#[test]
fn heredoc_double_quoted_delimiter_is_literal() {
let guard = TestGuard::new();
write_vars(|v| v.set_var("X", VarKind::Str("val".into()), VarFlags::NONE)).unwrap();
test_input("cat <<\"EOF\"\nhello $X\nEOF".to_string()).unwrap();
let out = guard.read_output();
assert_eq!(out, "hello $X\n");
}
#[test]
fn heredoc_preserves_blank_lines() {
let guard = TestGuard::new();
test_input("cat <<EOF\nfirst\n\nsecond\nEOF".to_string()).unwrap();
let out = guard.read_output();
assert_eq!(out, "first\n\nsecond\n");
}
#[test]
fn heredoc_tab_strip_preserves_empty_lines() {
let guard = TestGuard::new();
test_input("cat <<-EOF\n\thello\n\n\tworld\nEOF".to_string()).unwrap();
let out = guard.read_output();
assert_eq!(out, "hello\n\nworld\n");
}
#[test]
fn heredoc_two_on_one_line() {
let guard = TestGuard::new();
test_input("cat <<A; cat <<B\nfoo\nA\nbar\nB".to_string()).unwrap();
let out = guard.read_output();
assert_eq!(out, "foo\nbar\n");
}
} }

View File

@@ -10,7 +10,7 @@ use crate::{
error::{ShErr, ShErrKind, ShResult}, error::{ShErr, ShErrKind, ShResult},
utils::RedirVecUtils, utils::RedirVecUtils,
}, },
parse::{Redir, RedirType, get_redir_file}, parse::{Redir, RedirType, get_redir_file, lex::TkFlags},
prelude::*, prelude::*,
}; };
@@ -79,8 +79,7 @@ impl IoMode {
if let IoMode::File { tgt_fd, path, mode } = self { if let IoMode::File { tgt_fd, path, mode } = self {
let path_raw = path.as_os_str().to_str().unwrap_or_default().to_string(); let path_raw = path.as_os_str().to_str().unwrap_or_default().to_string();
let expanded_path = Expander::from_raw(&path_raw)?.expand()?.join(" "); // should just be one string, will have to find some way to handle a return of let expanded_path = Expander::from_raw(&path_raw, TkFlags::empty())?.expand()?.join(" "); // should just be one string, will have to find some way to handle a return of multiple paths
// multiple
let expanded_pathbuf = PathBuf::from(expanded_path); let expanded_pathbuf = PathBuf::from(expanded_path);
@@ -92,6 +91,11 @@ impl IoMode {
} }
Ok(self) Ok(self)
} }
pub fn loaded_pipe(tgt_fd: RawFd, buf: &[u8]) -> ShResult<Self> {
let (rpipe, wpipe) = nix::unistd::pipe2(OFlag::O_CLOEXEC).unwrap();
write(wpipe, buf)?;
Ok(Self::Pipe { tgt_fd, pipe: rpipe.into() })
}
pub fn get_pipes() -> (Self, Self) { pub fn get_pipes() -> (Self, Self) {
let (rpipe, wpipe) = nix::unistd::pipe2(OFlag::O_CLOEXEC).unwrap(); let (rpipe, wpipe) = nix::unistd::pipe2(OFlag::O_CLOEXEC).unwrap();
( (

View File

@@ -1430,6 +1430,8 @@ pub fn annotate_input(input: &str) -> String {
for tk in tokens.into_iter().rev() { for tk in tokens.into_iter().rev() {
let insertions = annotate_token(tk); let insertions = annotate_token(tk);
for (pos, marker) in insertions { for (pos, marker) in insertions {
log::info!("pos: {pos}, marker: {marker:?}");
log::info!("before: {annotated:?}");
let pos = pos.max(0).min(annotated.len()); let pos = pos.max(0).min(annotated.len());
annotated.insert(pos, marker); annotated.insert(pos, marker);
} }
@@ -1611,6 +1613,12 @@ pub fn annotate_token(token: Tk) -> Vec<(usize, Marker)> {
let mut insertions: Vec<(usize, Marker)> = vec![]; let mut insertions: Vec<(usize, Marker)> = vec![];
// Heredoc tokens have spans covering the body content far from the <<
// operator, which breaks position tracking after marker insertions
if token.flags.contains(TkFlags::IS_HEREDOC) {
return insertions;
}
if token.class != TkRule::Str if token.class != TkRule::Str
&& let Some(marker) = marker_for(&token.class) && let Some(marker) = marker_for(&token.class)
{ {

View File

@@ -2,10 +2,15 @@
use std::os::fd::AsRawFd; use std::os::fd::AsRawFd;
use crate::{ use crate::{
readline::{Prompt, ShedVi}, readline::{Prompt, ShedVi, annotate_input},
testutil::TestGuard, testutil::TestGuard,
}; };
fn assert_annotated(input: &str, expected: &str) {
let result = annotate_input(input);
assert_eq!(result, expected, "\nInput: {input:?}");
}
/// Tests for our vim logic emulation. Each test consists of an initial text, a sequence of keys to feed, and the expected final text and cursor position. /// Tests for our vim logic emulation. Each test consists of an initial text, a sequence of keys to feed, and the expected final text and cursor position.
macro_rules! vi_test { macro_rules! vi_test {
{ $($name:ident: $input:expr => $op:expr => $expected_text:expr,$expected_cursor:expr);* } => { { $($name:ident: $input:expr => $op:expr => $expected_text:expr,$expected_cursor:expr);* } => {
@@ -26,6 +31,202 @@ macro_rules! vi_test {
}; };
} }
// ===================== Annotation Tests =====================
#[test]
fn annotate_simple_command() {
assert_annotated("echo hello",
"\u{e101}echo\u{e11a} \u{e102}hello\u{e11a}");
}
#[test]
fn annotate_pipeline() {
assert_annotated("ls | grep foo",
"\u{e100}ls\u{e11a} \u{e104}|\u{e11a} \u{e100}grep\u{e11a} \u{e102}foo\u{e11a}");
}
#[test]
fn annotate_conjunction() {
assert_annotated("echo foo && echo bar",
"\u{e101}echo\u{e11a} \u{e102}foo\u{e11a} \u{e104}&&\u{e11a} \u{e101}echo\u{e11a} \u{e102}bar\u{e11a}");
}
#[test]
fn annotate_redirect_output() {
assert_annotated("echo hello > file.txt",
"\u{e101}echo\u{e11a} \u{e102}hello\u{e11a} \u{e105}>\u{e11a} \u{e102}file.txt\u{e11a}");
}
#[test]
fn annotate_redirect_append() {
assert_annotated("echo hello >> file.txt",
"\u{e101}echo\u{e11a} \u{e102}hello\u{e11a} \u{e105}>>\u{e11a} \u{e102}file.txt\u{e11a}");
}
#[test]
fn annotate_redirect_input() {
assert_annotated("cat < file.txt",
"\u{e100}cat\u{e11a} \u{e105}<\u{e11a} \u{e102}file.txt\u{e11a}");
}
#[test]
fn annotate_fd_redirect() {
assert_annotated("cmd 2>&1",
"\u{e100}cmd\u{e11a} \u{e105}2>&1\u{e11a}");
}
#[test]
fn annotate_variable_sub() {
assert_annotated("echo $HOME",
"\u{e101}echo\u{e11a} \u{e102}\u{e10c}$HOME\u{e10d}\u{e11a}");
}
#[test]
fn annotate_variable_brace_sub() {
assert_annotated("echo ${HOME}",
"\u{e101}echo\u{e11a} \u{e102}\u{e10c}${HOME}\u{e10d}\u{e11a}");
}
#[test]
fn annotate_command_sub() {
assert_annotated("echo $(ls)",
"\u{e101}echo\u{e11a} \u{e102}\u{e10e}$(ls)\u{e10f}\u{e11a}");
}
#[test]
fn annotate_single_quoted_string() {
assert_annotated("echo 'hello world'",
"\u{e101}echo\u{e11a} \u{e102}\u{e114}'hello world'\u{e115}\u{e11a}");
}
#[test]
fn annotate_double_quoted_string() {
assert_annotated("echo \"hello world\"",
"\u{e101}echo\u{e11a} \u{e102}\u{e112}\"hello world\"\u{e113}\u{e11a}");
}
#[test]
fn annotate_assignment() {
assert_annotated("FOO=bar",
"\u{e107}FOO=bar\u{e11a}");
}
#[test]
fn annotate_assignment_with_command() {
assert_annotated("FOO=bar echo hello",
"\u{e107}FOO=bar\u{e11a} \u{e101}echo\u{e11a} \u{e102}hello\u{e11a}");
}
#[test]
fn annotate_if_statement() {
assert_annotated("if true; then echo yes; fi",
"\u{e103}if\u{e11a} \u{e101}true\u{e11a}\u{e108}; \u{e11a}\u{e103}then\u{e11a} \u{e101}echo\u{e11a} \u{e102}yes\u{e11a}\u{e108}; \u{e11a}\u{e103}fi\u{e11a}");
}
#[test]
fn annotate_for_loop() {
assert_annotated("for i in a b c; do echo $i; done",
"\u{e103}for\u{e11a} \u{e102}i\u{e11a} \u{e103}in\u{e11a} \u{e102}a\u{e11a} \u{e102}b\u{e11a} \u{e102}c\u{e11a}\u{e108}; \u{e11a}\u{e103}do\u{e11a} \u{e101}echo\u{e11a} \u{e102}\u{e10c}$i\u{e10d}\u{e11a}\u{e108}; \u{e11a}\u{e103}done\u{e11a}");
}
#[test]
fn annotate_while_loop() {
assert_annotated("while true; do echo hello; done",
"\u{e103}while\u{e11a} \u{e101}true\u{e11a}\u{e108}; \u{e11a}\u{e103}do\u{e11a} \u{e101}echo\u{e11a} \u{e102}hello\u{e11a}\u{e108}; \u{e11a}\u{e103}done\u{e11a}");
}
#[test]
fn annotate_case_statement() {
assert_annotated("case foo in bar) echo bar;; esac",
"\u{e103}case\u{e11a} \u{e102}foo\u{e11a} \u{e103}in\u{e11a} \u{e104}bar\u{e109})\u{e11a} \u{e101}echo\u{e11a} \u{e102}bar\u{e11a}\u{e108};; \u{e11a}\u{e103}esac\u{e11a}");
}
#[test]
fn annotate_brace_group() {
assert_annotated("{ echo hello; }",
"\u{e104}{\u{e11a} \u{e101}echo\u{e11a} \u{e102}hello\u{e11a}\u{e108}; \u{e11a}\u{e104}}\u{e11a}");
}
#[test]
fn annotate_comment() {
assert_annotated("echo hello # this is a comment",
"\u{e101}echo\u{e11a} \u{e102}hello\u{e11a} \u{e106}# this is a comment\u{e11a}");
}
#[test]
fn annotate_semicolon_sep() {
assert_annotated("echo foo; echo bar",
"\u{e101}echo\u{e11a} \u{e102}foo\u{e11a}\u{e108}; \u{e11a}\u{e101}echo\u{e11a} \u{e102}bar\u{e11a}");
}
#[test]
fn annotate_escaped_char() {
assert_annotated("echo hello\\ world",
"\u{e101}echo\u{e11a} \u{e102}hello\\ world\u{e11a}");
}
#[test]
fn annotate_glob() {
assert_annotated("ls *.txt",
"\u{e100}ls\u{e11a} \u{e102}\u{e117}*\u{e11a}.txt\u{e11a}");
}
#[test]
fn annotate_heredoc_operator() {
assert_annotated("cat <<EOF",
"\u{e100}cat\u{e11a} \u{e105}<<\u{e11a}\u{e102}EOF\u{e11a}");
}
#[test]
fn annotate_herestring_operator() {
assert_annotated("cat <<< hello",
"\u{e100}cat\u{e11a} \u{e105}<<<\u{e11a} \u{e102}hello\u{e11a}");
}
#[test]
fn annotate_nested_command_sub() {
assert_annotated("echo $(echo $(ls))",
"\u{e101}echo\u{e11a} \u{e102}\u{e10e}$(echo $(ls))\u{e10f}\u{e11a}");
}
#[test]
fn annotate_var_in_double_quotes() {
assert_annotated("echo \"hello $USER\"",
"\u{e101}echo\u{e11a} \u{e102}\u{e112}\"hello \u{e10c}$USER\u{e10d}\"\u{e113}\u{e11a}");
}
#[test]
fn annotate_func_def() {
assert_annotated("foo() { echo hello; }",
"\u{e103}foo()\u{e11a} \u{e104}{\u{e11a} \u{e101}echo\u{e11a} \u{e102}hello\u{e11a}\u{e108}; \u{e11a}\u{e104}}\u{e11a}");
}
#[test]
fn annotate_negate() {
assert_annotated("! echo hello",
"\u{e104}!\u{e11a} \u{e101}echo\u{e11a} \u{e102}hello\u{e11a}");
}
#[test]
fn annotate_or_conjunction() {
assert_annotated("false || echo fallback",
"\u{e101}false\u{e11a} \u{e104}||\u{e11a} \u{e101}echo\u{e11a} \u{e102}fallback\u{e11a}");
}
#[test]
fn annotate_complex_pipeline() {
assert_annotated("cat file.txt | grep pattern | wc -l",
"\u{e100}cat\u{e11a} \u{e102}file.txt\u{e11a} \u{e104}|\u{e11a} \u{e100}grep\u{e11a} \u{e102}pattern\u{e11a} \u{e104}|\u{e11a} \u{e100}wc\u{e11a} \u{e102}-l\u{e11a}");
}
#[test]
fn annotate_multiple_redirects() {
assert_annotated("cmd > out.txt 2> err.txt",
"\u{e100}cmd\u{e11a} \u{e105}>\u{e11a} \u{e102}out.txt\u{e11a} \u{e105}2>\u{e11a} \u{e102}err.txt\u{e11a}");
}
// ===================== Vi Tests =====================
fn test_vi(initial: &str) -> (ShedVi, TestGuard) { fn test_vi(initial: &str) -> (ShedVi, TestGuard) {
let g = TestGuard::new(); let g = TestGuard::new();
let prompt = Prompt::default(); let prompt = Prompt::default();