leo_parser/parser/
context.rs

1// Copyright (C) 2019-2025 Provable Inc.
2// This file is part of the Leo library.
3
4// The Leo library is free software: you can redistribute it and/or modify
5// it under the terms of the GNU General Public License as published by
6// the Free Software Foundation, either version 3 of the License, or
7// (at your option) any later version.
8
9// The Leo library is distributed in the hope that it will be useful,
10// but WITHOUT ANY WARRANTY; without even the implied warranty of
11// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12// GNU General Public License for more details.
13
14// You should have received a copy of the GNU General Public License
15// along with the Leo library. If not, see <https://www.gnu.org/licenses/>.
16
17use crate::{Token, tokenizer::*};
18
19use leo_ast::*;
20use leo_errors::{Handler, ParserError, Result};
21use leo_span::{Span, Symbol, with_session_globals};
22
23use snarkvm::prelude::{Field, Network};
24
25use std::{fmt::Display, marker::PhantomData, mem};
26
27/// Stores a program in tokenized format plus additional context.
28/// May be converted into a [`Program`] AST by parsing all tokens.
29pub(crate) struct ParserContext<'a, N: Network> {
30    /// Handler used to side-channel emit errors from the parser.
31    pub(crate) handler: Handler,
32    /// Counter used to generate unique node ids.
33    pub(crate) node_builder: &'a NodeBuilder,
34    /// All un-bumped tokens.
35    tokens: Vec<SpannedToken>,
36    /// The current token, i.e., if `p.tokens = ['3', *, '4']`,
37    /// then after a `p.bump()`, we'll have `p.token = '3'`.
38    pub(crate) token: SpannedToken,
39    /// The previous token, i.e., if `p.tokens = ['3', *, '4']`,
40    /// then after two `p.bump()`s, we'll have `p.token = '*'` and `p.prev_token = '3'`.
41    pub(crate) prev_token: SpannedToken,
42    /// True if parsing an expression for if and loop statements -- means struct inits are not legal.
43    pub(crate) disallow_struct_construction: bool,
44    /// The name of the program being parsed.
45    pub(crate) program_name: Option<Symbol>,
46    // Allows the parser to be generic over the network.
47    phantom: PhantomData<N>,
48}
49
50/// Dummy span used to appease borrow checker.
51const DUMMY_EOF: SpannedToken = SpannedToken { token: Token::Eof, span: Span::dummy() };
52
53impl<'a, N: Network> ParserContext<'a, N> {
54    /// Returns a new [`ParserContext`] type given a vector of tokens.
55    pub fn new(handler: Handler, node_builder: &'a NodeBuilder, mut tokens: Vec<SpannedToken>) -> Self {
56        // Strip out comments.
57        tokens.retain(|x| !matches!(x.token, Token::CommentLine(_) | Token::CommentBlock(_)));
58        // For performance we reverse so that we get cheap `.pop()`s.
59        tokens.reverse();
60
61        let token = SpannedToken::dummy();
62        let mut p = Self {
63            handler,
64            node_builder,
65            disallow_struct_construction: false,
66            prev_token: token.clone(),
67            token,
68            tokens,
69            program_name: None,
70            phantom: Default::default(),
71        };
72        p.bump();
73        p
74    }
75
76    /// Advances the parser cursor by one token.
77    ///
78    /// So e.g., if we had `previous = A`, `current = B`, and `tokens = [C, D, E]`,
79    /// then after `p.bump()`, the state will be `previous = B`, `current = C`, and `tokens = [D, E]`.
80    pub(crate) fn bump(&mut self) {
81        // Probably a bug (infinite loop), as the previous token was already EOF.
82        if let Token::Eof = self.prev_token.token {
83            panic!("attempted to bump the parser past EOF (may be stuck in a loop)");
84        }
85
86        // Extract next token, or `Eof` if there was none.
87        let next_token = self.tokens.pop().unwrap_or(SpannedToken { token: Token::Eof, span: self.token.span });
88
89        // Set the new token.
90        self.prev_token = mem::replace(&mut self.token, next_token);
91    }
92
93    /// Checks whether the current token is `tok`.
94    pub(super) fn check(&self, tok: &Token) -> bool {
95        &self.token.token == tok
96    }
97
98    /// Checks whether the current token is a `Token::Integer(_)`.
99    pub(super) fn check_int(&self) -> bool {
100        matches!(&self.token.token, Token::Integer(_))
101    }
102
103    /// Returns `true` if the next token is equal to the given token.
104    /// Advances the parser to the next token.
105    pub(super) fn eat(&mut self, token: &Token) -> bool {
106        self.check(token).then(|| self.bump()).is_some()
107    }
108
109    /// Look-ahead `dist` tokens of `self.token` and get access to that token there.
110    /// When `dist == 0` then the current token is looked at.
111    pub(super) fn look_ahead<'s, R>(&'s self, dist: usize, looker: impl FnOnce(&'s SpannedToken) -> R) -> R {
112        if dist == 0 {
113            return looker(&self.token);
114        }
115
116        let idx = match self.tokens.len().checked_sub(dist) {
117            None => return looker(&DUMMY_EOF),
118            Some(idx) => idx,
119        };
120
121        looker(self.tokens.get(idx).unwrap_or(&DUMMY_EOF))
122    }
123
124    /// Emit the error `err`.
125    pub(super) fn emit_err(&mut self, err: ParserError) {
126        self.handler.emit_err(err);
127    }
128
129    /// Returns true if the next token exists.
130    pub(crate) fn has_next(&self) -> bool {
131        !matches!(self.token.token, Token::Eof)
132    }
133
134    /// At the previous token, return and make an identifier with `name`.
135    fn mk_ident_prev(&self, name: Symbol) -> Identifier {
136        let span = self.prev_token.span;
137        Identifier { name, span, id: self.node_builder.next_id() }
138    }
139
140    /// Eats the next token if it is an identifier and returns it.
141    pub(super) fn eat_identifier(&mut self) -> Option<Identifier> {
142        if let Token::Identifier(name) = self.token.token {
143            self.bump();
144            let identifier = self.mk_ident_prev(name);
145            self.check_identifier(&identifier);
146            return Some(identifier);
147        }
148
149        None
150    }
151
152    /// Expects an [`Identifier`], or errors.
153    pub(super) fn expect_identifier(&mut self) -> Result<Identifier> {
154        self.eat_identifier()
155            .ok_or_else(|| ParserError::unexpected_str(&self.token.token, "identifier", self.token.span).into())
156    }
157
158    ///
159    /// Removes the next token if it is a [`Token::Integer(_)`] and returns it, or [None] if
160    /// the next token is not a [`Token::Integer(_)`] or if the next token does not exist.
161    ///
162    pub fn eat_whole_number(&mut self) -> Result<(NonNegativeNumber, Span)> {
163        if let Token::Integer(value) = &self.token.token {
164            let value = value.clone();
165            self.bump();
166            // Reject value if the length is over 2 and the first character is 0
167            if (value.len() > 1 && value.starts_with('0')) || value.contains('_') {
168                return Err(ParserError::tuple_index_must_be_whole_number(&self.token.token, self.token.span).into());
169            }
170
171            Ok((NonNegativeNumber::from(value), self.prev_token.span))
172        } else {
173            Err(ParserError::unexpected(&self.token.token, "integer literal", self.token.span).into())
174        }
175    }
176
177    /// Eats any of the given `tokens`, returning `true` if anything was eaten.
178    pub(super) fn eat_any(&mut self, tokens: &[Token]) -> bool {
179        tokens.iter().any(|x| self.check(x)).then(|| self.bump()).is_some()
180    }
181
182    /// Returns an unexpected error at the current token.
183    pub(super) fn unexpected<T>(&self, expected: impl Display) -> Result<T> {
184        Err(ParserError::unexpected(&self.token.token, expected, self.token.span).into())
185    }
186
187    /// Eats the expected `token`, or errors.
188    pub(super) fn expect(&mut self, token: &Token) -> Result<Span> {
189        if self.eat(token) { Ok(self.prev_token.span) } else { self.unexpected(token) }
190    }
191
192    /// Eats one of the expected `tokens`, or errors.
193    pub(super) fn expect_any(&mut self, tokens: &[Token]) -> Result<Span> {
194        if self.eat_any(tokens) {
195            Ok(self.prev_token.span)
196        } else {
197            self.unexpected(tokens.iter().map(|x| format!("'{x}'")).collect::<Vec<_>>().join(", "))
198        }
199    }
200
201    /// Parses a list of `T`s using `inner`
202    /// The opening and closing delimiters are specified in `delimiter`,
203    /// and elements in the list are optionally separated by `sep`.
204    /// When `(list, true)` is returned, `sep` was a terminator.
205    pub(super) fn parse_list<T>(
206        &mut self,
207        delimiter: Delimiter,
208        sep: Option<Token>,
209        mut inner: impl FnMut(&mut Self) -> Result<Option<T>>,
210    ) -> Result<(Vec<T>, bool, Span)> {
211        let (open, close) = delimiter.open_close_pair();
212        let mut list = Vec::new();
213        let mut trailing = false;
214
215        // Parse opening delimiter.
216        let open_span = self.expect(&open)?;
217
218        while !self.check(&close) {
219            // Parse the element. We allow inner parser recovery through the `Option`.
220            if let Some(elem) = inner(self)? {
221                list.push(elem);
222            }
223            // Parse the separator, if any.
224            if sep.as_ref().filter(|sep| !self.eat(sep)).is_some() {
225                trailing = false;
226                break;
227            }
228
229            trailing = true;
230        }
231
232        // Parse closing delimiter.
233        let span = open_span + self.expect(&close)?;
234
235        Ok((list, trailing, span))
236    }
237
238    /// Parse a list separated by `,` and delimited by parens.
239    pub(super) fn parse_paren_comma_list<T>(
240        &mut self,
241        f: impl FnMut(&mut Self) -> Result<Option<T>>,
242    ) -> Result<(Vec<T>, bool, Span)> {
243        self.parse_list(Delimiter::Parenthesis, Some(Token::Comma), f)
244    }
245
246    /// Parse a list separated by `,` and delimited by brackets.
247    pub(super) fn parse_bracket_comma_list<T>(
248        &mut self,
249        f: impl FnMut(&mut Self) -> Result<Option<T>>,
250    ) -> Result<(Vec<T>, bool, Span)> {
251        self.parse_list(Delimiter::Bracket, Some(Token::Comma), f)
252    }
253
254    /// Returns true if the current token is `(`.
255    pub(super) fn peek_is_left_par(&self) -> bool {
256        matches!(self.token.token, Token::LeftParen)
257    }
258
259    /// Error on identifiers that are longer than SnarkVM allows.
260    pub(crate) fn check_identifier(&mut self, identifier: &Identifier) {
261        let field_capacity_bytes: usize = Field::<N>::SIZE_IN_DATA_BITS / 8;
262        let len = with_session_globals(|sg| identifier.name.as_str(sg, |s| s.len()));
263        if len > field_capacity_bytes {
264            self.emit_err(ParserError::identifier_too_long(
265                identifier.name,
266                len,
267                field_capacity_bytes,
268                identifier.span,
269            ));
270        }
271    }
272}