leo_parser_lossless/
lib.rs

1// Copyright (C) 2019-2025 Provable Inc.
2// This file is part of the Leo library.
3
4// The Leo library is free software: you can redistribute it and/or modify
5// it under the terms of the GNU General Public License as published by
6// the Free Software Foundation, either version 3 of the License, or
7// (at your option) any later version.
8
9// The Leo library is distributed in the hope that it will be useful,
10// but WITHOUT ANY WARRANTY; without even the implied warranty of
11// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12// GNU General Public License for more details.
13
14// You should have received a copy of the GNU General Public License
15// along with the Leo library. If not, see <https://www.gnu.org/licenses/>.
16
17//! The lossless syntax tree and parser for Leo.
18
19use itertools::Itertools as _;
20use leo_errors::{Handler, LeoError, ParserError, Result};
21use leo_span::Span;
22
23// Comment me when running `cargo publish`.
24use lalrpop_util::lalrpop_mod;
25lalrpop_mod!(pub grammar);
26// Uncomment me when running `cargo publish` and be sure to generate and copy `grammar.rs` from `target/` into `src/`.
27//pub mod grammar;
28
29pub mod tokens;
30
31use tokens::*;
32
33/// A tag indicating the nature of a syntax node.
34#[derive(Clone, Copy, Debug, PartialEq, Eq)]
35pub enum SyntaxKind {
36    Whitespace,
37    Linebreak,
38    CommentLine,
39    CommentBlock,
40
41    Expression(ExpressionKind),
42    StructMemberInitializer,
43
44    Statement(StatementKind),
45    Type(TypeKind),
46    Token,
47
48    Annotation,
49    AnnotationMember,
50    AnnotationList,
51
52    Parameter,
53    ParameterList,
54    FunctionOutput,
55    FunctionOutputs,
56    Function,
57    Constructor,
58
59    ConstParameter,
60    ConstParameterList,
61    ConstArgumentList,
62
63    StructDeclaration,
64    StructMemberDeclaration,
65    StructMemberDeclarationList,
66
67    Mapping,
68    Storage,
69
70    GlobalConst,
71
72    Import,
73    MainContents,
74    ModuleContents,
75    ProgramDeclaration,
76}
77
78#[derive(Copy, Clone, Debug, PartialEq, Eq)]
79pub enum IntegerLiteralKind {
80    U8,
81    U16,
82    U32,
83    U64,
84    U128,
85
86    I8,
87    I16,
88    I32,
89    I64,
90    I128,
91}
92
93#[derive(Copy, Clone, Debug, PartialEq, Eq)]
94pub enum IntegerTypeKind {
95    U8,
96    U16,
97    U32,
98    U64,
99    U128,
100
101    I8,
102    I16,
103    I32,
104    I64,
105    I128,
106}
107
108#[derive(Clone, Copy, Debug, PartialEq, Eq)]
109pub enum TypeKind {
110    Address,
111    Array,
112    Boolean,
113    Composite,
114    Field,
115    Future,
116    Group,
117    Identifier,
118    Integer(IntegerTypeKind),
119    Mapping,
120    Optional,
121    Scalar,
122    Signature,
123    String,
124    Tuple,
125    Vector,
126    Numeric,
127    Unit,
128}
129
130impl From<TypeKind> for SyntaxKind {
131    fn from(value: TypeKind) -> Self {
132        SyntaxKind::Type(value)
133    }
134}
135
136impl From<IntegerTypeKind> for TypeKind {
137    fn from(value: IntegerTypeKind) -> Self {
138        TypeKind::Integer(value)
139    }
140}
141
142impl From<IntegerTypeKind> for SyntaxKind {
143    fn from(value: IntegerTypeKind) -> Self {
144        SyntaxKind::Type(TypeKind::Integer(value))
145    }
146}
147
148#[derive(Clone, Copy, Debug, PartialEq, Eq)]
149pub enum ExpressionKind {
150    ArrayAccess,
151    AssociatedConstant,
152    AssociatedFunctionCall,
153    Async,
154    Array,
155    Binary,
156    Call,
157    Cast,
158    Path,
159    Literal(LiteralKind),
160    Locator,
161    MemberAccess,
162    MethodCall,
163    Parenthesized,
164    Repeat,
165    Intrinsic,
166    SpecialAccess, // TODO: fold into Intrinsic
167    Struct,
168    Ternary,
169    Tuple,
170    TupleAccess,
171    Unary,
172    Unit,
173}
174
175#[derive(Clone, Copy, Debug, PartialEq, Eq)]
176pub enum LiteralKind {
177    Address,
178    Boolean,
179    Field,
180    Group,
181    Integer(IntegerLiteralKind),
182    None,
183    Scalar,
184    Unsuffixed,
185    String,
186}
187
188impl From<ExpressionKind> for SyntaxKind {
189    fn from(value: ExpressionKind) -> Self {
190        SyntaxKind::Expression(value)
191    }
192}
193
194impl From<LiteralKind> for ExpressionKind {
195    fn from(value: LiteralKind) -> Self {
196        ExpressionKind::Literal(value)
197    }
198}
199
200impl From<LiteralKind> for SyntaxKind {
201    fn from(value: LiteralKind) -> Self {
202        SyntaxKind::Expression(ExpressionKind::Literal(value))
203    }
204}
205
206impl From<IntegerLiteralKind> for LiteralKind {
207    fn from(value: IntegerLiteralKind) -> Self {
208        LiteralKind::Integer(value)
209    }
210}
211
212impl From<IntegerLiteralKind> for ExpressionKind {
213    fn from(value: IntegerLiteralKind) -> Self {
214        ExpressionKind::Literal(LiteralKind::Integer(value))
215    }
216}
217
218impl From<IntegerLiteralKind> for SyntaxKind {
219    fn from(value: IntegerLiteralKind) -> Self {
220        SyntaxKind::Expression(ExpressionKind::Literal(LiteralKind::Integer(value)))
221    }
222}
223
224#[derive(Clone, Copy, Debug, PartialEq, Eq)]
225pub enum StatementKind {
226    Assert,
227    AssertEq,
228    AssertNeq,
229    Assign,
230    Block,
231    Conditional,
232    Const,
233    Definition,
234    Expression,
235    Iteration,
236    Return,
237}
238
239impl From<StatementKind> for SyntaxKind {
240    fn from(value: StatementKind) -> Self {
241        SyntaxKind::Statement(value)
242    }
243}
244
245/// An untyped node in the lossless syntax tree.
246#[derive(Debug, Clone)]
247pub struct SyntaxNode<'a> {
248    /// A tag indicating the nature of the node.
249    pub kind: SyntaxKind,
250    /// The text from the source if applicable.
251    pub text: &'a str,
252    pub span: leo_span::Span,
253    pub children: Vec<SyntaxNode<'a>>,
254}
255
256impl<'a> SyntaxNode<'a> {
257    fn new_token(kind: SyntaxKind, token: LalrToken<'a>, children: Vec<Self>) -> Self {
258        Self { kind, text: token.text, span: token.span, children }
259    }
260
261    fn new(kind: impl Into<SyntaxKind>, children: impl IntoIterator<Item = Self>) -> Self {
262        let children: Vec<Self> = children.into_iter().collect();
263        let lo = children.first().unwrap().span.lo;
264        let hi = children.last().unwrap().span.hi;
265        let span = leo_span::Span { lo, hi };
266        Self { kind: kind.into(), text: "", span, children }
267    }
268
269    fn suffixed_literal(integer: LalrToken<'a>, suffix: LalrToken<'a>, children: Vec<Self>) -> Self {
270        let kind: SyntaxKind = match suffix.token {
271            Token::Field => LiteralKind::Field.into(),
272            Token::Group => LiteralKind::Group.into(),
273            Token::Scalar => LiteralKind::Scalar.into(),
274            Token::I8 => IntegerLiteralKind::I8.into(),
275            Token::I16 => IntegerLiteralKind::I16.into(),
276            Token::I32 => IntegerLiteralKind::I32.into(),
277            Token::I64 => IntegerLiteralKind::I64.into(),
278            Token::I128 => IntegerLiteralKind::I128.into(),
279            Token::U8 => IntegerLiteralKind::U8.into(),
280            Token::U16 => IntegerLiteralKind::U16.into(),
281            Token::U32 => IntegerLiteralKind::U32.into(),
282            Token::U64 => IntegerLiteralKind::U64.into(),
283            Token::U128 => IntegerLiteralKind::U128.into(),
284            x => panic!("Error in grammar.lalrpop: {x:?}"),
285        };
286
287        let lo = integer.span.lo;
288        let hi = suffix.span.hi;
289        let span = leo_span::Span { lo, hi };
290
291        Self { kind, text: integer.text, span, children }
292    }
293
294    fn binary_expression(lhs: Self, op: Self, rhs: Self) -> Self {
295        let span = leo_span::Span { lo: lhs.span.lo, hi: rhs.span.hi };
296        let children = vec![lhs, op, rhs];
297        SyntaxNode { kind: ExpressionKind::Binary.into(), text: "", span, children }
298    }
299}
300
301fn two_path_components(text: &str) -> Option<(&str, &str)> {
302    let mut iter = text.split("::");
303
304    match (iter.next(), iter.next(), iter.next()) {
305        (Some(first), Some(second), _) => Some((first, second)),
306        _ => None,
307    }
308}
309
310pub fn parse_expression<'a>(handler: Handler, source: &'a str, start_pos: u32) -> Result<SyntaxNode<'a>> {
311    let parser = grammar::ExprParser::new();
312    parse_general(handler.clone(), source, start_pos, |lexer| parser.parse(&handler, lexer))
313}
314
315pub fn parse_statement<'a>(handler: Handler, source: &'a str, start_pos: u32) -> Result<SyntaxNode<'a>> {
316    let parser = grammar::StatementParser::new();
317    parse_general(handler.clone(), source, start_pos, |lexer| parser.parse(&handler, lexer))
318}
319
320pub fn parse_module<'a>(handler: Handler, source: &'a str, start_pos: u32) -> Result<SyntaxNode<'a>> {
321    let parser = grammar::ModuleContentsParser::new();
322    parse_general(handler.clone(), source, start_pos, |lexer| parser.parse(&handler, lexer))
323}
324
325pub fn parse_main<'a>(handler: Handler, source: &'a str, start_pos: u32) -> Result<SyntaxNode<'a>> {
326    let parser = grammar::MainContentsParser::new();
327    parse_general(handler.clone(), source, start_pos, |lexer| parser.parse(&handler, lexer))
328}
329
330fn check_identifier(token: &LalrToken<'_>, handler: &Handler) {
331    const MAX_IDENTIFIER_LEN: usize = 31usize;
332    if token.token == Token::IdVariants(IdVariants::Identifier) {
333        if token.text.len() > MAX_IDENTIFIER_LEN {
334            handler.emit_err(leo_errors::ParserError::identifier_too_long(
335                token.text,
336                token.text.len(),
337                MAX_IDENTIFIER_LEN,
338                token.span,
339            ));
340        }
341        // These are reserved for compiler-generated names.
342        if token.text.contains("__") {
343            handler.emit_err(ParserError::identifier_cannot_contain_double_underscore(token.text, token.span));
344        }
345    }
346}
347
348fn parse_general<'a>(
349    handler: Handler,
350    source: &'a str,
351    start_pos: u32,
352    parse: impl FnOnce(
353        &mut Lexer<'a>,
354    ) -> Result<SyntaxNode<'a>, lalrpop_util::ParseError<usize, LalrToken<'a>, &'static str>>,
355) -> Result<SyntaxNode<'a>> {
356    let mut lexer = tokens::Lexer::new(source, start_pos, handler.clone());
357    match parse(&mut lexer) {
358        Ok(val) => {
359            handler.last_err()?;
360            Ok(val)
361        }
362        Err(e) => {
363            if matches!(e, lalrpop_util::ParseError::UnrecognizedEof { .. }) {
364                // We don't want to redundantly report the EOF error, when the meaningfull
365                // errors are recorded in the handler.
366                handler.last_err()?;
367            }
368            Err(convert(e, source, start_pos))
369        }
370    }
371}
372
373// We can't implement From<lalrpop_util::ParseError> since both that
374// trait and leo_errors::Error are defined in other crates.
375fn convert(
376    error: lalrpop_util::ParseError<usize, LalrToken<'_>, &'static str>,
377    source: &str,
378    start_pos: u32,
379) -> LeoError {
380    match error {
381        lalrpop_util::ParseError::UnrecognizedToken { token, expected } => {
382            let expected = expected.iter().flat_map(|s| tokens::Token::str_user(s)).format(", ");
383            ParserError::unexpected(token.1.text, expected, token.1.span).into()
384        }
385        lalrpop_util::ParseError::UnrecognizedEof { location, .. } => {
386            let (lo, hi) = if source.is_empty() {
387                (start_pos, start_pos)
388            } else if location >= source.len() + start_pos as usize {
389                // Generally lalrpop reports the `location` for this error as
390                // one character past the end of the source. So let's
391                // back up one character.
392                // Can't just subtract 1 as we may not be on a character boundary.
393                let lo = source.char_indices().last().unwrap().0 as u32 + start_pos;
394                (lo, lo + 1)
395            } else {
396                (location as u32, location as u32 + 1)
397            };
398            ParserError::unexpected_eof(Span { lo, hi }).into()
399        }
400        x => panic!("ERR: {x:?}"),
401    }
402}