use std::{ collections::VecDeque, fmt::Display, iter::Peekable, ops::{Bound, Deref, Range, RangeBounds}, str::Chars, sync::Arc, }; use bitflags::bitflags; use crate::{ builtin::BUILTINS, libsh::{ error::{ShErr, ShErrKind, ShResult}, utils::CharDequeUtils, }, }; pub const KEYWORDS: [&str; 16] = [ "if", "then", "elif", "else", "fi", "while", "until", "select", "for", "in", "do", "done", "case", "esac", "[[", "]]", ]; pub const OPENERS: [&str; 6] = ["if", "while", "until", "for", "select", "case"]; /// Used to track whether the lexer is currently inside a quote, and if so, which type #[derive(Default,Debug)] pub enum QuoteState { #[default] Outside, Single, Double } impl QuoteState { pub fn outside(&self) -> bool { matches!(self, QuoteState::Outside) } pub fn in_single(&self) -> bool { matches!(self, QuoteState::Single) } pub fn in_double(&self) -> bool { matches!(self, QuoteState::Double) } pub fn in_quote(&self) -> bool { !self.outside() } /// Toggles whether we are in a double quote. If self = QuoteState::Single, this does nothing, since double quotes inside single quotes are just literal characters pub fn toggle_double(&mut self) { match self { QuoteState::Outside => *self = QuoteState::Double, QuoteState::Double => *self = QuoteState::Outside, _ => {} } } /// Toggles whether we are in a single quote. If self == QuoteState::Double, this does nothing, since single quotes are not interpreted inside double quotes pub fn toggle_single(&mut self) { match self { QuoteState::Outside => *self = QuoteState::Single, QuoteState::Single => *self = QuoteState::Outside, _ => {} } } } #[derive(Clone, PartialEq, Default, Debug, Eq, Hash)] pub struct SpanSource { name: String, content: Arc } impl SpanSource { pub fn name(&self) -> &str { &self.name } pub fn content(&self) -> Arc { self.content.clone() } pub fn rename(&mut self, name: String) { self.name = name; } } impl Display for SpanSource { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!(f, "{}", self.name) } } /// Span::new(10..20) #[derive(Clone, PartialEq, Default, Debug)] pub struct Span { range: Range, source: SpanSource } impl Span { /// New `Span`. Wraps a range and a string slice that it refers to. pub fn new(range: Range, source: Arc) -> Self { let source = SpanSource { name: "".into(), content: source }; Span { range, source } } pub fn rename(&mut self, name: String) { self.source.name = name; } pub fn with_name(mut self, name: String) -> Self { self.source.name = name; self } /// Slice the source string at the wrapped range pub fn as_str(&self) -> &str { &self.source.content[self.range().start..self.range().end] } pub fn get_source(&self) -> Arc { self.source.content.clone() } pub fn span_source(&self) -> &SpanSource { &self.source } pub fn range(&self) -> Range { self.range.clone() } /// With great power comes great responsibility /// Only use this in the most dire of circumstances pub fn set_range(&mut self, range: Range) { self.range = range; } } impl ariadne::Span for Span { type SourceId = SpanSource; fn source(&self) -> &Self::SourceId { &self.source } fn start(&self) -> usize { self.range.start } fn end(&self) -> usize { self.range.end } } /// Allows simple access to the underlying range wrapped by the span #[derive(Clone, PartialEq, Debug)] pub enum TkRule { Null, SOI, // Start-of-Input Str, Pipe, ErrPipe, And, Or, Bg, Sep, Redir, CasePattern, BraceGrpStart, BraceGrpEnd, Expanded { exp: Vec }, Comment, EOI, // End-of-Input } impl Default for TkRule { fn default() -> Self { TkRule::Null } } #[derive(Clone, Debug, PartialEq, Default)] pub struct Tk { pub class: TkRule, pub span: Span, pub flags: TkFlags, } // There's one impl here and then another in expand.rs which has the expansion // logic impl Tk { pub fn new(class: TkRule, span: Span) -> Self { Self { class, span, flags: TkFlags::empty(), } } pub fn as_str(&self) -> &str { self.span.as_str() } pub fn source(&self) -> Arc { self.span.source.content.clone() } pub fn mark(&mut self, flag: TkFlags) { self.flags |= flag; } /// Used to see if a separator is ';;' for case statements pub fn has_double_semi(&self) -> bool { let TkRule::Sep = self.class else { return false; }; self.span.as_str().trim() == ";;" } } impl Display for Tk { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match &self.class { TkRule::Expanded { exp } => write!(f, "{}", exp.join(" ")), _ => write!(f, "{}", self.span.as_str()), } } } bitflags! { #[derive(Debug,Clone,Copy,PartialEq,Default)] pub struct TkFlags: u32 { const KEYWORD = 0b0000000000000001; /// This is a keyword that opens a new block statement, like 'if' and 'while' const OPENER = 0b0000000000000010; const IS_CMD = 0b0000000000000100; const IS_SUBSH = 0b0000000000001000; const IS_CMDSUB = 0b0000000000010000; const IS_OP = 0b0000000000100000; const ASSIGN = 0b0000000001000000; const BUILTIN = 0b0000000010000000; const IS_PROCSUB = 0b0000000100000000; } } pub struct LexStream { source: Arc, pub cursor: usize, quote_state: QuoteState, brc_grp_start: Option, flags: LexFlags, } bitflags! { #[derive(Debug, Clone, Copy)] pub struct LexFlags: u32 { /// The lexer is operating in interactive mode const INTERACTIVE = 0b000000001; /// Allow unfinished input const LEX_UNFINISHED = 0b000000010; /// The next string-type token is a command name const NEXT_IS_CMD = 0b000000100; /// We are in a quotation, so quoting rules apply const IN_QUOTE = 0b000001000; /// Only lex strings; used in expansions const RAW = 0b000010000; /// The lexer has not produced any tokens yet const FRESH = 0b000010000; /// The lexer has no more tokens to produce const STALE = 0b000100000; /// The lexer's cursor is in a brace group const IN_BRC_GRP = 0b001000000; const EXPECTING_IN = 0b010000000; const IN_CASE = 0b100000000; } } impl LexStream { pub fn new(source: Arc, flags: LexFlags) -> Self { let flags = flags | LexFlags::FRESH | LexFlags::NEXT_IS_CMD; Self { flags, source, cursor: 0, quote_state: QuoteState::default(), brc_grp_start: None, } } /// Returns a slice of the source input using the given range /// Returns None if the range is out of the bounds of the string slice /// /// Works with any kind of range /// examples: /// `LexStream.slice(1..10)` /// `LexStream.slice(1..=10)` /// `LexStream.slice(..10)` /// `LexStream.slice(1..)` pub fn slice>(&self, range: R) -> Option<&str> { let start = match range.start_bound() { Bound::Included(&start) => start, Bound::Excluded(&start) => start + 1, Bound::Unbounded => 0, }; let end = match range.end_bound() { Bound::Included(&end) => end, Bound::Excluded(&end) => end + 1, Bound::Unbounded => self.source.len(), }; self.source.get(start..end) } pub fn slice_from_cursor(&self) -> Option<&str> { self.slice(self.cursor..) } pub fn in_brc_grp(&self) -> bool { self.flags.contains(LexFlags::IN_BRC_GRP) } pub fn set_in_brc_grp(&mut self, is: bool) { if is { self.flags |= LexFlags::IN_BRC_GRP; self.brc_grp_start = Some(self.cursor); } else { self.flags &= !LexFlags::IN_BRC_GRP; self.brc_grp_start = None; } } pub fn next_is_cmd(&self) -> bool { self.flags.contains(LexFlags::NEXT_IS_CMD) } /// Set whether the next string token is a command name pub fn set_next_is_cmd(&mut self, is: bool) { if is { self.flags |= LexFlags::NEXT_IS_CMD; } else { self.flags &= !LexFlags::NEXT_IS_CMD; } } pub fn read_redir(&mut self) -> Option> { assert!(self.cursor <= self.source.len()); let slice = self.slice(self.cursor..)?; let mut pos = self.cursor; let mut chars = slice.chars().peekable(); let mut tk = Tk::default(); while let Some(ch) = chars.next() { match ch { '>' => { if chars.peek() == Some(&'(') { return None; // It's a process sub } pos += 1; if let Some('>') = chars.peek() { chars.next(); pos += 1; } if let Some('&') = chars.peek() { chars.next(); pos += 1; let mut found_fd = false; while chars.peek().is_some_and(|ch| ch.is_ascii_digit()) { chars.next(); found_fd = true; pos += 1; } if !found_fd && !self.flags.contains(LexFlags::LEX_UNFINISHED) { let span_start = self.cursor; self.cursor = pos; return Some(Err(ShErr::at( ShErrKind::ParseErr, Span::new(span_start..pos, self.source.clone()), "Invalid redirection", ))); } else { tk = self.get_token(self.cursor..pos, TkRule::Redir); break; } } else { tk = self.get_token(self.cursor..pos, TkRule::Redir); break; } } '<' => { if chars.peek() == Some(&'(') { return None; // It's a process sub } pos += 1; for _ in 0..2 { if let Some('<') = chars.peek() { chars.next(); pos += 1; } else { break; } } tk = self.get_token(self.cursor..pos, TkRule::Redir); break; } '0'..='9' => { pos += 1; while chars.peek().is_some_and(|ch| ch.is_ascii_digit()) { chars.next(); pos += 1; } } _ => { return None; } } } if tk == Tk::default() { return None; } self.cursor = pos; Some(Ok(tk)) } pub fn read_string(&mut self) -> ShResult { assert!(self.cursor <= self.source.len()); let slice = self.slice_from_cursor().unwrap().to_string(); let mut pos = self.cursor; let mut chars = slice.chars().peekable(); let can_be_subshell = chars.peek() == Some(&'('); if self.flags.contains(LexFlags::IN_CASE) && let Some(count) = case_pat_lookahead(chars.clone()) { pos += count; let casepat_tk = self.get_token(self.cursor..pos, TkRule::CasePattern); self.cursor = pos; self.set_next_is_cmd(true); return Ok(casepat_tk); } while let Some(ch) = chars.next() { match ch { _ if self.flags.contains(LexFlags::RAW) => { if ch.is_whitespace() { break; } else { pos += ch.len_utf8() } } '\\' => { pos += 1; if let Some(ch) = chars.next() { pos += ch.len_utf8(); } } '\'' => { pos += 1; self.quote_state.toggle_single(); } _ if self.quote_state.in_single() => pos += ch.len_utf8(), '$' if chars.peek() == Some(&'(') => { pos += 2; chars.next(); let mut paren_count = 1; let paren_pos = pos; while let Some(ch) = chars.next() { match ch { '\\' => { pos += 1; if let Some(next_ch) = chars.next() { pos += next_ch.len_utf8(); } } '(' => { pos += 1; paren_count += 1; } ')' => { pos += 1; paren_count -= 1; if paren_count <= 0 { break; } } _ => pos += ch.len_utf8(), } } if !paren_count == 0 && !self.flags.contains(LexFlags::LEX_UNFINISHED) { self.cursor = pos; return Err(ShErr::at( ShErrKind::ParseErr, Span::new(paren_pos..paren_pos + 1, self.source.clone()), "Unclosed subshell", )); } } '$' if chars.peek() == Some(&'{') => { pos += 2; chars.next(); let mut brace_count = 1; while let Some(brc_ch) = chars.next() { match brc_ch { '\\' => { pos += 1; if let Some(next_ch) = chars.next() { pos += next_ch.len_utf8() } } '{' => { pos += 1; brace_count += 1; } '}' => { pos += 1; brace_count -= 1; if brace_count == 0 { break; } } _ => pos += ch.len_utf8(), } } } '"' => { pos += 1; self.quote_state.toggle_double(); } _ if self.quote_state.in_double() => pos += ch.len_utf8(), '<' if chars.peek() == Some(&'(') => { pos += 2; chars.next(); let mut paren_count = 1; let paren_pos = pos; while let Some(ch) = chars.next() { match ch { '\\' => { pos += 1; if let Some(next_ch) = chars.next() { pos += next_ch.len_utf8(); } } '(' => { pos += 1; paren_count += 1; } ')' => { pos += 1; paren_count -= 1; if paren_count <= 0 { break; } } _ => pos += ch.len_utf8(), } } if !paren_count == 0 && !self.flags.contains(LexFlags::LEX_UNFINISHED) { self.cursor = pos; return Err(ShErr::at( ShErrKind::ParseErr, Span::new(paren_pos..paren_pos + 1, self.source.clone()), "Unclosed subshell", )); } } '>' if chars.peek() == Some(&'(') => { pos += 2; chars.next(); let mut paren_count = 1; let paren_pos = pos; while let Some(ch) = chars.next() { match ch { '\\' => { pos += 1; if let Some(next_ch) = chars.next() { pos += next_ch.len_utf8(); } } '(' => { pos += 1; paren_count += 1; } ')' => { pos += 1; paren_count -= 1; if paren_count <= 0 { break; } } _ => pos += ch.len_utf8(), } } if !paren_count == 0 && !self.flags.contains(LexFlags::LEX_UNFINISHED) { self.cursor = pos; return Err(ShErr::at( ShErrKind::ParseErr, Span::new(paren_pos..paren_pos + 1, self.source.clone()), "Unclosed subshell", )); } } '(' if self.next_is_cmd() && can_be_subshell => { pos += 1; let mut paren_count = 1; let paren_pos = pos; while let Some(ch) = chars.next() { match ch { '\\' => { pos += 1; if let Some(next_ch) = chars.next() { pos += next_ch.len_utf8(); } } '(' => { pos += 1; paren_count += 1; } ')' => { pos += 1; paren_count -= 1; if paren_count <= 0 { break; } } _ => pos += ch.len_utf8(), } } if paren_count != 0 && !self.flags.contains(LexFlags::LEX_UNFINISHED) { self.cursor = pos; return Err(ShErr::at( ShErrKind::ParseErr, Span::new(paren_pos..paren_pos + 1, self.source.clone()), "Unclosed subshell", )); } let mut subsh_tk = self.get_token(self.cursor..pos, TkRule::Str); subsh_tk.flags |= TkFlags::IS_CMD; subsh_tk.flags |= TkFlags::IS_SUBSH; self.cursor = pos; self.set_next_is_cmd(true); return Ok(subsh_tk); } '{' if pos == self.cursor && self.next_is_cmd() => { pos += 1; let mut tk = self.get_token(self.cursor..pos, TkRule::BraceGrpStart); tk.flags |= TkFlags::IS_CMD; self.set_in_brc_grp(true); self.set_next_is_cmd(true); self.cursor = pos; return Ok(tk); } '}' if pos == self.cursor && self.in_brc_grp() => { pos += 1; let tk = self.get_token(self.cursor..pos, TkRule::BraceGrpEnd); self.set_in_brc_grp(false); self.set_next_is_cmd(true); self.cursor = pos; return Ok(tk); } '=' if chars.peek() == Some(&'(') => { pos += 1; // '=' let mut depth = 1; chars.next(); pos += 1; // '(' // looks like an array while let Some(arr_ch) = chars.next() { match arr_ch { '\\' => { pos += 1; if let Some(next_ch) = chars.next() { pos += next_ch.len_utf8(); } } '(' => { depth += 1; pos += 1; } ')' => { depth -= 1; pos += 1; if depth == 0 { break; } } _ => pos += arr_ch.len_utf8(), } } } _ if is_hard_sep(ch) => break, _ => pos += ch.len_utf8(), } } let mut new_tk = self.get_token(self.cursor..pos, TkRule::Str); if self.quote_state.in_quote() && !self.flags.contains(LexFlags::LEX_UNFINISHED) { self.cursor = pos; return Err(ShErr::at( ShErrKind::ParseErr, new_tk.span, "Unterminated quote", )); } let text = new_tk.span.as_str(); if self.flags.contains(LexFlags::NEXT_IS_CMD) { match text { "case" | "select" | "for" => { new_tk.mark(TkFlags::KEYWORD); self.flags |= LexFlags::EXPECTING_IN; self.flags |= LexFlags::IN_CASE; self.set_next_is_cmd(false); } "in" if self.flags.contains(LexFlags::EXPECTING_IN) => { new_tk.mark(TkFlags::KEYWORD); self.flags &= !LexFlags::EXPECTING_IN; } _ if is_keyword(text) => { if text == "esac" && self.flags.contains(LexFlags::IN_CASE) { self.flags &= !LexFlags::IN_CASE; } new_tk.mark(TkFlags::KEYWORD); } _ if is_assignment(text) => { new_tk.mark(TkFlags::ASSIGN); } _ if is_cmd_sub(text) => { new_tk.mark(TkFlags::IS_CMDSUB); if self.next_is_cmd() { new_tk.mark(TkFlags::IS_CMD); } self.set_next_is_cmd(false); } _ => { new_tk.flags |= TkFlags::IS_CMD; if BUILTINS.contains(&text) { new_tk.mark(TkFlags::BUILTIN); } self.set_next_is_cmd(false); } } } else if self.flags.contains(LexFlags::EXPECTING_IN) && text == "in" { new_tk.mark(TkFlags::KEYWORD); self.flags &= !LexFlags::EXPECTING_IN; } else if is_cmd_sub(text) { new_tk.mark(TkFlags::IS_CMDSUB) } self.cursor = pos; Ok(new_tk) } pub fn get_token(&self, range: Range, class: TkRule) -> Tk { let span = Span::new(range, self.source.clone()); Tk::new(class, span) } } impl Iterator for LexStream { type Item = ShResult; fn next(&mut self) -> Option { assert!(self.cursor <= self.source.len()); // We are at the end of the input if self.cursor == self.source.len() { if self.flags.contains(LexFlags::STALE) { // We've already returned an EOI token, nothing left to do return None; } else { // Return the EOI token if self.in_brc_grp() && !self.flags.contains(LexFlags::LEX_UNFINISHED) { let start = self.brc_grp_start.unwrap_or(self.cursor.saturating_sub(1)); self.flags |= LexFlags::STALE; return Err(ShErr::at( ShErrKind::ParseErr, Span::new(start..self.cursor, self.source.clone()), "Unclosed brace group", )) .into(); } let token = self.get_token(self.cursor..self.cursor, TkRule::EOI); self.flags |= LexFlags::STALE; return Some(Ok(token)); } } // Return the SOI token if self.flags.contains(LexFlags::FRESH) { self.flags &= !LexFlags::FRESH; let token = self.get_token(self.cursor..self.cursor, TkRule::SOI); return Some(Ok(token)); } // If we are just reading raw words, short circuit here // Used for word splitting variable values if self.flags.contains(LexFlags::RAW) { return Some(self.read_string()); } loop { let pos = self.cursor; if self.slice(pos..pos + 2) == Some("\\\n") { self.cursor += 2; } else if pos < self.source.len() && is_field_sep(get_char(&self.source, pos).unwrap()) { self.cursor += 1; } else { break; } } if self.cursor == self.source.len() { if self.in_brc_grp() && !self.flags.contains(LexFlags::LEX_UNFINISHED) { let start = self.brc_grp_start.unwrap_or(self.cursor.saturating_sub(1)); return Err(ShErr::at( ShErrKind::ParseErr, Span::new(start..self.cursor, self.source.clone()), "Unclosed brace group", )) .into(); } return None; } let token = match get_char(&self.source, self.cursor).unwrap() { '\r' | '\n' | ';' => { let ch_idx = self.cursor; self.cursor += 1; self.set_next_is_cmd(true); while let Some(ch) = get_char(&self.source, self.cursor) { if is_hard_sep(ch) { // Combine consecutive separators into one, including whitespace self.cursor += 1; } else { break; } } self.get_token(ch_idx..self.cursor, TkRule::Sep) } '#' if !self.flags.contains(LexFlags::INTERACTIVE) || crate::state::read_shopts(|s| s.core.interactive_comments) => { let ch_idx = self.cursor; self.cursor += 1; while let Some(ch) = get_char(&self.source, self.cursor) { self.cursor += 1; if ch == '\n' { break; } } self.get_token(ch_idx..self.cursor, TkRule::Comment) } '|' => { let ch_idx = self.cursor; self.cursor += 1; self.set_next_is_cmd(true); let tk_type = if let Some('|') = get_char(&self.source, self.cursor) { self.cursor += 1; TkRule::Or } else if let Some('&') = get_char(&self.source, self.cursor) { self.cursor += 1; TkRule::ErrPipe } else { TkRule::Pipe }; self.get_token(ch_idx..self.cursor, tk_type) } '&' => { let ch_idx = self.cursor; self.cursor += 1; self.set_next_is_cmd(true); let tk_type = if let Some('&') = get_char(&self.source, self.cursor) { self.cursor += 1; TkRule::And } else { TkRule::Bg }; self.get_token(ch_idx..self.cursor, tk_type) } _ => { if let Some(tk) = self.read_redir() { self.set_next_is_cmd(false); match tk { Ok(tk) => tk, Err(e) => return Some(Err(e)), } } else { match self.read_string() { Ok(tk) => tk, Err(e) => { return Some(Err(e)); } } } } }; Some(Ok(token)) } } pub fn get_char(src: &str, idx: usize) -> Option { src.get(idx..)?.chars().next() } pub fn is_assignment(text: &str) -> bool { let mut chars = text.chars(); while let Some(ch) = chars.next() { match ch { '\\' => { chars.next(); } '=' => return true, _ => continue, } } false } /// Is '|', '&', '>', or '<' pub fn is_op(ch: char) -> bool { matches!(ch, '|' | '&' | '>' | '<') } /// Is whitespace or a semicolon pub fn is_hard_sep(ch: char) -> bool { matches!(ch, ' ' | '\t' | '\n' | ';') } /// Is whitespace, but not a newline pub fn is_field_sep(ch: char) -> bool { matches!(ch, ' ' | '\t') } pub fn is_keyword(slice: &str) -> bool { KEYWORDS.contains(&slice) || (ends_with_unescaped(slice, "()") && !ends_with_unescaped(slice, "=()")) } pub fn is_cmd_sub(slice: &str) -> bool { slice.starts_with("$(") && ends_with_unescaped(slice, ")") } pub fn ends_with_unescaped(slice: &str, pat: &str) -> bool { slice.ends_with(pat) && !pos_is_escaped(slice, slice.len() - pat.len()) } /// Splits a string by a pattern, but only if the pattern is not escaped by a backslash /// and not in quotes. pub fn split_all_unescaped(slice: &str, pat: &str) -> Vec { let mut cursor = 0; let mut splits = vec![]; while let Some(split) = split_at_unescaped(&slice[cursor..], pat) { cursor += split.0.len() + pat.len(); splits.push(split.0); } if let Some(remaining) = slice.get(cursor..) { splits.push(remaining.to_string()); } splits } /// Splits a string at the first occurrence of a pattern, but only if the pattern is not escaped by a backslash /// and not in quotes. Returns None if the pattern is not found or only found escaped. pub fn split_at_unescaped(slice: &str, pat: &str) -> Option<(String,String)> { let mut chars = slice.char_indices().peekable(); let mut qt_state = QuoteState::default(); while let Some((i, ch)) = chars.next() { match ch { '\\' => { chars.next(); continue; } '\'' => qt_state.toggle_single(), '"' => qt_state.toggle_double(), _ if qt_state.in_quote() => continue, _ => {} } if slice[i..].starts_with(pat) { let before = slice[..i].to_string(); let after = slice[i + pat.len()..].to_string(); return Some((before, after)); } } None } pub fn split_tk(tk: &Tk, pat: &str) -> Vec { let slice = tk.as_str(); let mut cursor = 0; let mut splits = vec![]; while let Some(split) = split_at_unescaped(&slice[cursor..], pat) { let before_span = Span::new(tk.span.range().start + cursor..tk.span.range().start + cursor + split.0.len(), tk.source().clone()); splits.push(Tk::new(tk.class.clone(), before_span)); cursor += split.0.len() + pat.len(); } if slice.get(cursor..).is_some_and(|s| !s.is_empty()) { let remaining_span = Span::new(tk.span.range().start + cursor..tk.span.range().end, tk.source().clone()); splits.push(Tk::new(tk.class.clone(), remaining_span)); } splits } pub fn split_tk_at(tk: &Tk, pat: &str) -> Option<(Tk, Tk)> { let slice = tk.as_str(); let mut chars = slice.char_indices().peekable(); let mut qt_state = QuoteState::default(); while let Some((i, ch)) = chars.next() { match ch { '\\' => { chars.next(); continue; } '\'' => qt_state.toggle_single(), '"' => qt_state.toggle_double(), _ if qt_state.in_quote() => continue, _ => {} } if slice[i..].starts_with(pat) { let before_span = Span::new(tk.span.range().start..tk.span.range().start + i, tk.source().clone()); let after_span = Span::new(tk.span.range().start + i + pat.len()..tk.span.range().end, tk.source().clone()); let before_tk = Tk::new(tk.class.clone(), before_span); let after_tk = Tk::new(tk.class.clone(), after_span); return Some((before_tk, after_tk)); } } None } pub fn pos_is_escaped(slice: &str, pos: usize) -> bool { let bytes = slice.as_bytes(); let mut escaped = false; let mut i = pos; while i > 0 && bytes[i - 1] == b'\\' { escaped = !escaped; i -= 1; } escaped } pub fn lookahead(pat: &str, mut chars: Chars) -> Option { let mut pos = 0; let mut char_deque = VecDeque::new(); while let Some(ch) = chars.next() { char_deque.push_back(ch); if char_deque.len() > pat.len() { char_deque.pop_front(); } if char_deque.starts_with(pat) { return Some(pos); } pos += 1; } None } pub fn case_pat_lookahead(mut chars: Peekable) -> Option { let mut pos = 0; while let Some(ch) = chars.next() { pos += 1; match ch { _ if is_hard_sep(ch) => return None, '\\' => { chars.next(); } ')' => return Some(pos), '(' => return None, _ => { /* continue */ } } } None }