Add array support for local/export/readonly builtins

Add array length syntax ${arr[#]}

Map read path now expands variables before splitting on ., fixing map "$node" with dotted paths

Map assignment path uses quote-aware token splitting, enabling quoted keys like "--type="

Completion errors now display above prompt instead of being overwritten

Fix nested if/fi parser bug when closing keywords appear on separate lines

Add QuoteState enum, replacing ad-hoc quote tracking booleans across lexer, highlighter, and expansion

Add split_tk_at/split_tk for quote-aware token splitting with span preservation

Refactor setup_builtin to accept optional argv for deferred expansion

Add ariadne dependency (not yet wired up)
This commit is contained in:
2026-02-28 15:51:09 -05:00
parent ab5f42b281
commit 1b63eff783
26 changed files with 375 additions and 281 deletions

View File

@@ -24,6 +24,46 @@ pub const KEYWORDS: [&str; 16] = [
pub const OPENERS: [&str; 6] = ["if", "while", "until", "for", "select", "case"];
/// Used to track whether the lexer is currently inside a quote, and if so, which type
#[derive(Default,Debug)]
pub enum QuoteState {
#[default]
Outside,
Single,
Double
}
impl QuoteState {
pub fn outside(&self) -> bool {
matches!(self, QuoteState::Outside)
}
pub fn in_single(&self) -> bool {
matches!(self, QuoteState::Single)
}
pub fn in_double(&self) -> bool {
matches!(self, QuoteState::Double)
}
pub fn in_quote(&self) -> bool {
!self.outside()
}
/// Toggles whether we are in a double quote. If self = QuoteState::Single, this does nothing, since double quotes inside single quotes are just literal characters
pub fn toggle_double(&mut self) {
match self {
QuoteState::Outside => *self = QuoteState::Double,
QuoteState::Double => *self = QuoteState::Outside,
_ => {}
}
}
/// Toggles whether we are in a single quote. If self == QuoteState::Double, this does nothing, since single quotes are not interpreted inside double quotes
pub fn toggle_single(&mut self) {
match self {
QuoteState::Outside => *self = QuoteState::Single,
QuoteState::Single => *self = QuoteState::Outside,
_ => {}
}
}
}
/// Span::new(10..20)
#[derive(Clone, PartialEq, Default, Debug)]
pub struct Span {
@@ -150,7 +190,7 @@ bitflags! {
pub struct LexStream {
source: Arc<String>,
pub cursor: usize,
in_quote: bool,
quote_state: QuoteState,
brc_grp_start: Option<usize>,
flags: LexFlags,
}
@@ -183,11 +223,11 @@ impl LexStream {
pub fn new(source: Arc<String>, flags: LexFlags) -> Self {
let flags = flags | LexFlags::FRESH | LexFlags::NEXT_IS_CMD;
Self {
flags,
source,
cursor: 0,
in_quote: false,
quote_state: QuoteState::default(),
brc_grp_start: None,
flags,
}
}
/// Returns a slice of the source input using the given range
@@ -352,6 +392,47 @@ impl LexStream {
if let Some(ch) = chars.next() {
pos += ch.len_utf8();
}
}
'\'' => {
pos += 1;
self.quote_state.toggle_single();
}
_ if self.quote_state.in_single() => pos += ch.len_utf8(),
'$' if chars.peek() == Some(&'(') => {
pos += 2;
chars.next();
let mut paren_count = 1;
let paren_pos = pos;
while let Some(ch) = chars.next() {
match ch {
'\\' => {
pos += 1;
if let Some(next_ch) = chars.next() {
pos += next_ch.len_utf8();
}
}
'(' => {
pos += 1;
paren_count += 1;
}
')' => {
pos += 1;
paren_count -= 1;
if paren_count <= 0 {
break;
}
}
_ => pos += ch.len_utf8(),
}
}
if !paren_count == 0 && !self.flags.contains(LexFlags::LEX_UNFINISHED) {
self.cursor = pos;
return Err(ShErr::full(
ShErrKind::ParseErr,
"Unclosed subshell",
Span::new(paren_pos..paren_pos + 1, self.source.clone()),
));
}
}
'$' if chars.peek() == Some(&'{') => {
pos += 2;
@@ -380,6 +461,11 @@ impl LexStream {
}
}
}
'"' => {
pos += 1;
self.quote_state.toggle_double();
}
_ if self.quote_state.in_double() => pos += ch.len_utf8(),
'<' if chars.peek() == Some(&'(') => {
pos += 2;
chars.next();
@@ -452,42 +538,6 @@ impl LexStream {
));
}
}
'$' if chars.peek() == Some(&'(') => {
pos += 2;
chars.next();
let mut paren_count = 1;
let paren_pos = pos;
while let Some(ch) = chars.next() {
match ch {
'\\' => {
pos += 1;
if let Some(next_ch) = chars.next() {
pos += next_ch.len_utf8();
}
}
'(' => {
pos += 1;
paren_count += 1;
}
')' => {
pos += 1;
paren_count -= 1;
if paren_count <= 0 {
break;
}
}
_ => pos += ch.len_utf8(),
}
}
if !paren_count == 0 && !self.flags.contains(LexFlags::LEX_UNFINISHED) {
self.cursor = pos;
return Err(ShErr::full(
ShErrKind::ParseErr,
"Unclosed subshell",
Span::new(paren_pos..paren_pos + 1, self.source.clone()),
));
}
}
'(' if self.next_is_cmd() && can_be_subshell => {
pos += 1;
let mut paren_count = 1;
@@ -547,82 +597,6 @@ impl LexStream {
self.cursor = pos;
return Ok(tk);
}
'\'' => {
self.in_quote = true;
pos += 1;
while let Some(q_ch) = chars.next() {
match q_ch {
'\\' => {
pos += 1;
if chars.next().is_some() {
pos += 1;
}
}
_ if q_ch == '\'' => {
pos += 1;
self.in_quote = false;
break;
}
// Any time an ambiguous character is found
// we must push the cursor by the length of the character
// instead of just assuming a length of 1.
// Allows spans to work for wide characters
_ => pos += q_ch.len_utf8(),
}
}
}
'"' => {
self.in_quote = true;
pos += 1;
while let Some(q_ch) = chars.next() {
match q_ch {
'\\' => {
pos += 1;
if chars.next().is_some() {
pos += 1;
}
}
'$' if chars.peek() == Some(&'(') => {
pos += 2;
chars.next();
let mut cmdsub_count = 1;
while let Some(cmdsub_ch) = chars.next() {
match cmdsub_ch {
'\\' => {
pos += 1;
if chars.next().is_some() {
pos += 1;
}
}
'$' if chars.peek() == Some(&'(') => {
cmdsub_count += 1;
pos += 2;
chars.next();
}
')' => {
cmdsub_count -= 1;
pos += 1;
if cmdsub_count <= 0 {
break;
}
}
_ => pos += cmdsub_ch.len_utf8(),
}
}
}
_ if q_ch == '"' => {
pos += 1;
self.in_quote = false;
break;
}
// Any time an ambiguous character is found
// we must push the cursor by the length of the character
// instead of just assuming a length of 1.
// Allows spans to work for wide characters
_ => pos += q_ch.len_utf8(),
}
}
}
'=' if chars.peek() == Some(&'(') => {
pos += 1; // '='
let mut depth = 1;
@@ -652,13 +626,12 @@ impl LexStream {
}
}
}
_ if !self.in_quote && is_op(ch) => break,
_ if is_hard_sep(ch) => break,
_ => pos += ch.len_utf8(),
}
}
let mut new_tk = self.get_token(self.cursor..pos, TkRule::Str);
if self.in_quote && !self.flags.contains(LexFlags::LEX_UNFINISHED) {
if self.quote_state.in_quote() && !self.flags.contains(LexFlags::LEX_UNFINISHED) {
self.cursor = pos;
return Err(ShErr::full(
ShErrKind::ParseErr,
@@ -912,6 +885,8 @@ pub fn ends_with_unescaped(slice: &str, pat: &str) -> bool {
slice.ends_with(pat) && !pos_is_escaped(slice, slice.len() - pat.len())
}
/// Splits a string by a pattern, but only if the pattern is not escaped by a backslash
/// and not in quotes.
pub fn split_all_unescaped(slice: &str, pat: &str) -> Vec<String> {
let mut cursor = 0;
let mut splits = vec![];
@@ -925,19 +900,71 @@ pub fn split_all_unescaped(slice: &str, pat: &str) -> Vec<String> {
splits
}
/// Splits a string at the first occurrence of a pattern, but only if the pattern is not escaped by a backslash
/// and not in quotes. Returns None if the pattern is not found or only found escaped.
pub fn split_at_unescaped(slice: &str, pat: &str) -> Option<(String,String)> {
let mut window_start = 0;
let mut window_end = pat.len();
if window_end > slice.len() {
return None;
}
while window_end <= slice.len() {
if &slice[window_start..window_end] == pat && !pos_is_escaped(slice, window_start) {
return Some((slice[..window_start].to_string(), slice[window_end..].to_string()));
let mut chars = slice.char_indices().peekable();
let mut qt_state = QuoteState::default();
while let Some((i, ch)) = chars.next() {
match ch {
'\\' => { chars.next(); continue; }
'\'' => qt_state.toggle_single(),
'"' => qt_state.toggle_double(),
_ if qt_state.in_quote() => continue,
_ => {}
}
if slice[i..].starts_with(pat) {
let before = slice[..i].to_string();
let after = slice[i + pat.len()..].to_string();
return Some((before, after));
}
window_start += 1;
window_end += 1;
}
None
}
pub fn split_tk(tk: &Tk, pat: &str) -> Vec<Tk> {
let slice = tk.as_str();
let mut cursor = 0;
let mut splits = vec![];
while let Some(split) = split_at_unescaped(&slice[cursor..], pat) {
let before_span = Span::new(tk.span.start + cursor..tk.span.start + cursor + split.0.len(), tk.source().clone());
splits.push(Tk::new(tk.class.clone(), before_span));
cursor += split.0.len() + pat.len();
}
if slice.get(cursor..).is_some_and(|s| !s.is_empty()) {
let remaining_span = Span::new(tk.span.start + cursor..tk.span.end, tk.source().clone());
splits.push(Tk::new(tk.class.clone(), remaining_span));
}
splits
}
pub fn split_tk_at(tk: &Tk, pat: &str) -> Option<(Tk, Tk)> {
let slice = tk.as_str();
let mut chars = slice.char_indices().peekable();
let mut qt_state = QuoteState::default();
while let Some((i, ch)) = chars.next() {
match ch {
'\\' => { chars.next(); continue; }
'\'' => qt_state.toggle_single(),
'"' => qt_state.toggle_double(),
_ if qt_state.in_quote() => continue,
_ => {}
}
if slice[i..].starts_with(pat) {
let before_span = Span::new(tk.span.start..tk.span.start + i, tk.source().clone());
let after_span = Span::new(tk.span.start + i + pat.len()..tk.span.end, tk.source().clone());
let before_tk = Tk::new(tk.class.clone(), before_span);
let after_tk = Tk::new(tk.class.clone(), after_span);
return Some((before_tk, after_tk));
}
}
None
}