leo_parser/parser/
context.rs

1// Copyright (C) 2019-2025 Provable Inc.
2// This file is part of the Leo library.
3
4// The Leo library is free software: you can redistribute it and/or modify
5// it under the terms of the GNU General Public License as published by
6// the Free Software Foundation, either version 3 of the License, or
7// (at your option) any later version.
8
9// The Leo library is distributed in the hope that it will be useful,
10// but WITHOUT ANY WARRANTY; without even the implied warranty of
11// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12// GNU General Public License for more details.
13
14// You should have received a copy of the GNU General Public License
15// along with the Leo library. If not, see <https://www.gnu.org/licenses/>.
16
17use crate::{Token, tokenizer::*};
18
19use leo_ast::*;
20use leo_errors::{Handler, ParserError, Result};
21use leo_span::{Span, Symbol, with_session_globals};
22
23use snarkvm::prelude::{CanaryV0, Field, MainnetV0, TestnetV0};
24
25use std::{fmt::Display, mem};
26
27/// Stores a program in tokenized format plus additional context.
28/// May be converted into a [`Program`] AST by parsing all tokens.
29pub(crate) struct ParserContext<'a> {
30    /// Handler used to side-channel emit errors from the parser.
31    pub(crate) handler: &'a Handler,
32    /// Counter used to generate unique node ids.
33    pub(crate) node_builder: &'a NodeBuilder,
34    /// All un-bumped tokens.
35    tokens: Vec<SpannedToken>,
36    /// The current token, i.e., if `p.tokens = ['3', *, '4']`,
37    /// then after a `p.bump()`, we'll have `p.token = '3'`.
38    pub(crate) token: SpannedToken,
39    /// The previous token, i.e., if `p.tokens = ['3', *, '4']`,
40    /// then after two `p.bump()`s, we'll have `p.token = '*'` and `p.prev_token = '3'`.
41    pub(crate) prev_token: SpannedToken,
42    /// True if parsing an expression for if and loop statements -- means struct inits are not legal.
43    pub(crate) disallow_struct_construction: bool,
44    /// The name of the program being parsed.
45    pub(crate) program_name: Option<Symbol>,
46    /// The network.
47    pub(crate) network: NetworkName,
48    /// The accumulated annotations.
49    pub(crate) annotations: Vec<Annotation>,
50}
51
52/// Dummy span used to appease borrow checker.
53const DUMMY_EOF: SpannedToken = SpannedToken { token: Token::Eof, span: Span::dummy() };
54
55impl<'a> ParserContext<'a> {
56    /// Returns a new [`ParserContext`] type given a vector of tokens.
57    pub fn new(
58        handler: &'a Handler,
59        node_builder: &'a NodeBuilder,
60        mut tokens: Vec<SpannedToken>,
61        program_name: Option<Symbol>,
62        network: NetworkName,
63    ) -> Self {
64        // Strip out comments.
65        tokens.retain(|x| !matches!(x.token, Token::CommentLine(_) | Token::CommentBlock(_)));
66        // For performance we reverse so that we get cheap `.pop()`s.
67        tokens.reverse();
68
69        let token = SpannedToken::dummy();
70        let mut p = Self {
71            handler,
72            node_builder,
73            disallow_struct_construction: false,
74            prev_token: token.clone(),
75            token,
76            tokens,
77            program_name,
78            network,
79            annotations: Vec::new(),
80        };
81        p.bump();
82        p
83    }
84
85    /// Advances the parser cursor by one token.
86    ///
87    /// So e.g., if we had `previous = A`, `current = B`, and `tokens = [C, D, E]`,
88    /// then after `p.bump()`, the state will be `previous = B`, `current = C`, and `tokens = [D, E]`.
89    pub(crate) fn bump(&mut self) {
90        // Probably a bug (infinite loop), as the previous token was already EOF.
91        if let Token::Eof = self.prev_token.token {
92            panic!("attempted to bump the parser past EOF (may be stuck in a loop)");
93        }
94
95        // Extract next token, or `Eof` if there was none.
96        let next_token = self.tokens.pop().unwrap_or(SpannedToken { token: Token::Eof, span: self.token.span });
97
98        // Set the new token.
99        self.prev_token = mem::replace(&mut self.token, next_token);
100    }
101
102    /// Checks whether the current token is `tok`.
103    pub(super) fn check(&self, tok: &Token) -> bool {
104        &self.token.token == tok
105    }
106
107    /// Checks whether the current token is a `Token::Integer(_)`.
108    pub(super) fn check_int(&self) -> bool {
109        matches!(&self.token.token, Token::Integer(_))
110    }
111
112    /// Returns `true` if the next token is equal to the given token.
113    /// Advances the parser to the next token.
114    pub(super) fn eat(&mut self, token: &Token) -> bool {
115        self.check(token).then(|| self.bump()).is_some()
116    }
117
118    /// Look-ahead `dist` tokens of `self.token` and get access to that token there.
119    /// When `dist == 0` then the current token is looked at.
120    pub(super) fn look_ahead<'s, R>(&'s self, dist: usize, looker: impl FnOnce(&'s SpannedToken) -> R) -> R {
121        if dist == 0 {
122            return looker(&self.token);
123        }
124
125        let idx = match self.tokens.len().checked_sub(dist) {
126            None => return looker(&DUMMY_EOF),
127            Some(idx) => idx,
128        };
129
130        looker(self.tokens.get(idx).unwrap_or(&DUMMY_EOF))
131    }
132
133    /// Emit the error `err`.
134    pub(super) fn emit_err(&mut self, err: ParserError) {
135        self.handler.emit_err(err);
136    }
137
138    /// Returns true if the next token exists.
139    pub(crate) fn has_next(&self) -> bool {
140        !matches!(self.token.token, Token::Eof)
141    }
142
143    /// At the previous token, return and make an identifier with `name`.
144    fn mk_ident_prev(&self, name: Symbol) -> Identifier {
145        let span = self.prev_token.span;
146        Identifier { name, span, id: self.node_builder.next_id() }
147    }
148
149    /// Eats the next token if it is an identifier and returns it.
150    pub(super) fn eat_identifier(&mut self) -> Option<Identifier> {
151        if let Token::Identifier(name) = self.token.token {
152            self.bump();
153            let identifier = self.mk_ident_prev(name);
154            self.check_identifier(&identifier);
155            return Some(identifier);
156        }
157
158        None
159    }
160
161    /// Expects an [`Identifier`], or errors.
162    pub(super) fn expect_identifier(&mut self) -> Result<Identifier> {
163        self.eat_identifier()
164            .ok_or_else(|| ParserError::unexpected_str(&self.token.token, "identifier", self.token.span).into())
165    }
166
167    ///
168    /// Removes the next token if it is a [`Token::Integer(_)`] and returns it, or [None] if
169    /// the next token is not a [`Token::Integer(_)`] or if the next token does not exist.
170    ///
171    pub fn eat_whole_number(&mut self) -> Result<(NonNegativeNumber, Span)> {
172        if let Token::Integer(value) = &self.token.token {
173            let value = value.clone();
174            self.bump();
175            // Reject value if the length is over 2 and the first character is 0
176            if (value.len() > 1 && value.starts_with('0')) || value.contains('_') {
177                return Err(ParserError::tuple_index_must_be_whole_number(&self.token.token, self.token.span).into());
178            }
179
180            Ok((NonNegativeNumber::from(value), self.prev_token.span))
181        } else {
182            Err(ParserError::unexpected(&self.token.token, "integer literal", self.token.span).into())
183        }
184    }
185
186    /// Eats any of the given `tokens`, returning `true` if anything was eaten.
187    pub(super) fn eat_any(&mut self, tokens: &[Token]) -> bool {
188        tokens.iter().any(|x| self.check(x)).then(|| self.bump()).is_some()
189    }
190
191    /// Returns an unexpected error at the current token.
192    pub(super) fn unexpected<T>(&self, expected: impl Display) -> Result<T> {
193        Err(ParserError::unexpected(&self.token.token, expected, self.token.span).into())
194    }
195
196    /// Eats the expected `token`, or errors.
197    pub(super) fn expect(&mut self, token: &Token) -> Result<Span> {
198        if self.eat(token) { Ok(self.prev_token.span) } else { self.unexpected(token) }
199    }
200
201    /// Eats one of the expected `tokens`, or errors.
202    pub(super) fn expect_any(&mut self, tokens: &[Token]) -> Result<Span> {
203        if self.eat_any(tokens) {
204            Ok(self.prev_token.span)
205        } else {
206            self.unexpected(tokens.iter().map(|x| format!("'{x}'")).collect::<Vec<_>>().join(", "))
207        }
208    }
209
210    /// Parses a list of `T`s using `inner`
211    /// The opening and closing delimiters are specified in `delimiter`,
212    /// and elements in the list are optionally separated by `sep`.
213    /// When `(list, true)` is returned, `sep` was a terminator.
214    pub(super) fn parse_list<T>(
215        &mut self,
216        delimiter: Delimiter,
217        sep: Option<Token>,
218        mut inner: impl FnMut(&mut Self) -> Result<Option<T>>,
219    ) -> Result<(Vec<T>, bool, Span)> {
220        let (open, close) = delimiter.open_close_pair();
221        let mut list = Vec::new();
222        let mut trailing = false;
223
224        // Parse opening delimiter.
225        let open_span = self.expect(&open)?;
226
227        while !self.check(&close) {
228            // Parse the element. We allow inner parser recovery through the `Option`.
229            if let Some(elem) = inner(self)? {
230                list.push(elem);
231            }
232            // Parse the separator, if any.
233            if sep.as_ref().filter(|sep| !self.eat(sep)).is_some() {
234                trailing = false;
235                break;
236            }
237
238            trailing = true;
239        }
240
241        // Parse closing delimiter.
242        let span = open_span + self.expect(&close)?;
243
244        Ok((list, trailing, span))
245    }
246
247    /// Parse a list separated by `,` and delimited by parens.
248    pub(super) fn parse_paren_comma_list<T>(
249        &mut self,
250        f: impl FnMut(&mut Self) -> Result<Option<T>>,
251    ) -> Result<(Vec<T>, bool, Span)> {
252        self.parse_list(Delimiter::Parenthesis, Some(Token::Comma), f)
253    }
254
255    /// Parse a list separated by `,` and delimited by brackets.
256    pub(super) fn parse_bracket_comma_list<T>(
257        &mut self,
258        f: impl FnMut(&mut Self) -> Result<Option<T>>,
259    ) -> Result<(Vec<T>, bool, Span)> {
260        self.parse_list(Delimiter::Bracket, Some(Token::Comma), f)
261    }
262
263    /// Returns true if the current token is `(`.
264    pub(super) fn peek_is_left_par(&self) -> bool {
265        matches!(self.token.token, Token::LeftParen)
266    }
267
268    /// Error on identifiers that are longer than SnarkVM allows.
269    pub(crate) fn check_identifier(&mut self, identifier: &Identifier) {
270        let field_capacity_bytes = match self.network {
271            NetworkName::MainnetV0 => Field::<MainnetV0>::SIZE_IN_DATA_BITS / 8,
272            NetworkName::TestnetV0 => Field::<TestnetV0>::SIZE_IN_DATA_BITS / 8,
273            NetworkName::CanaryV0 => Field::<CanaryV0>::SIZE_IN_DATA_BITS / 8,
274        };
275        with_session_globals(|sg| {
276            identifier.name.as_str(sg, |s| {
277                if s.len() > field_capacity_bytes {
278                    self.emit_err(ParserError::identifier_too_long(
279                        identifier.name,
280                        s.len(),
281                        field_capacity_bytes,
282                        identifier.span,
283                    ));
284                }
285                // These are reserved for compiler-generated names.
286                if s.contains("__") {
287                    self.emit_err(ParserError::identifier_cannot_contain_double_underscore(
288                        identifier.name,
289                        identifier.span,
290                    ));
291                }
292            })
293        });
294    }
295
296    /// Returns a [`ConstParameter`] AST node if the next tokens represent a generic const parameter.
297    pub(crate) fn parse_const_parameter(&mut self) -> Result<ConstParameter> {
298        let name = self.expect_identifier()?;
299        self.expect(&Token::Colon)?;
300
301        let (type_, type_span) = self.parse_type()?;
302        let span = name.span() + type_span;
303
304        Ok(ConstParameter { identifier: name, type_, span, id: self.node_builder.next_id() })
305    }
306}