leo_parser/tokenizer/
token.rs

1// Copyright (C) 2019-2025 Provable Inc.
2// This file is part of the Leo library.
3
4// The Leo library is free software: you can redistribute it and/or modify
5// it under the terms of the GNU General Public License as published by
6// the Free Software Foundation, either version 3 of the License, or
7// (at your option) any later version.
8
9// The Leo library is distributed in the hope that it will be useful,
10// but WITHOUT ANY WARRANTY; without even the implied warranty of
11// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12// GNU General Public License for more details.
13
14// You should have received a copy of the GNU General Public License
15// along with the Leo library. If not, see <https://www.gnu.org/licenses/>.
16
17use std::fmt;
18
19use serde::{Deserialize, Serialize};
20
21use leo_span::{Symbol, sym};
22
23/// Represents all valid Leo syntax tokens.
24///
25/// The notion of 'token' here is a bit more general than in the ABNF grammar:
26/// since it includes comments and whitespace,
27/// it corresponds to the notion of 'lexeme' in the ABNF grammar.
28/// There are also a few other differences, noted in comments below.
29#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
30pub enum Token {
31    // Comments
32    CommentLine(String),  // the string includes the starting '//' and the ending line feed
33    CommentBlock(String), // the string includes the starting '/*' and the ending '*/'
34
35    // Whitespace (we do not distinguish among different kinds here)
36    WhiteSpace,
37
38    // Literals (= atomic literals and numerals in the ABNF grammar)
39    // The string in Integer(String) consists of digits
40    // The string in AddressLit(String) has the form `aleo1...`.
41    True,
42    False,
43    Integer(String), // = numeral (including tuple index) in the ABNF grammar
44    AddressLit(String),
45    StaticString(String),
46    // The numeric literals in the ABNF grammar, which consist of numerals followed by types,
47    // are represented not as single tokens here,
48    // but as two separate tokens (one for the numeral and one for the type),
49    // enforcing, during parsing, the absence of whitespace or comments between those two tokens
50    // (see the parse_primary_expression function).
51
52    // Identifiers
53    Identifier(Symbol),
54
55    // Symbols
56    Not,
57    And,
58    AndAssign,
59    Or,
60    OrAssign,
61    BitAnd,
62    BitAndAssign,
63    BitOr,
64    BitOrAssign,
65    BitXor,
66    BitXorAssign,
67    Eq,
68    NotEq,
69    Lt,
70    LtEq,
71    Gt,
72    GtEq,
73    Add,
74    AddAssign,
75    Sub,
76    SubAssign,
77    Mul,
78    MulAssign,
79    Div,
80    DivAssign,
81    Pow,
82    PowAssign,
83    Rem,
84    RemAssign,
85    Shl,
86    ShlAssign,
87    Shr,
88    ShrAssign,
89    Assign,
90    LeftParen,
91    RightParen,
92    LeftSquare,
93    RightSquare,
94    LeftCurly,
95    RightCurly,
96    Comma,
97    Dot,
98    DotDot,
99    Semicolon,
100    Colon,
101    DoubleColon,
102    Question,
103    Arrow,
104    BigArrow,
105    Underscore,
106    At, // @ is not a symbol token in the ABNF grammar (see explanation about annotations below)
107
108    // The ABNF grammar has annotations as tokens,
109    // defined as @ immediately followed by an identifier.
110    // Here instead we regard the @ sign alone as a token (see `At` above),
111    // and we lex it separately from the identifier that is supposed to follow it in an annotation.
112    // When parsing annotations, we check that there is no whitespace or comments
113    // between the @ and the identifier, thus eventually complying to the ABNF grammar.
114    // See the parse_annotation function.
115
116    // Type keywords
117    Address,
118    Bool,
119    Field,
120    Group,
121    I8,
122    I16,
123    I32,
124    I64,
125    I128,
126    Record,
127    Scalar,
128    Signature,
129    String,
130    Struct,
131    U8,
132    U16,
133    U32,
134    U64,
135    U128,
136
137    // Other keywords
138    Aleo,
139    As,
140    Assert,
141    AssertEq,
142    AssertNeq,
143    Async,
144    Block,
145    Const,
146    Constant,
147    Constructor,
148    Else,
149    Fn,
150    For,
151    Function,
152    Future,
153    If,
154    Import,
155    In,
156    Inline,
157    Let,
158    Mapping,
159    Network,
160    Private,
161    Program,
162    Public,
163    Return,
164    Script,
165    SelfLower,
166    Transition,
167
168    // Meta tokens
169    Eof, // used to signal end-of-file, not an actual token of the language
170    Leo, // only used for error messages, not an actual keyword
171}
172
173/// Represents all valid Leo keyword tokens.
174/// This also includes the boolean literals `true` and `false`,
175/// unlike the ABNF grammar, which classifies them as literals and not keywords.
176/// But for the purposes of our lexer implementation,
177/// it is fine to include the boolean literals in this list.
178pub const KEYWORD_TOKENS: &[Token] = &[
179    Token::Address,
180    Token::Aleo,
181    Token::As,
182    Token::Assert,
183    Token::AssertEq,
184    Token::AssertNeq,
185    Token::Async,
186    Token::Bool,
187    Token::Const,
188    Token::Constant,
189    Token::Constructor,
190    Token::Else,
191    Token::False,
192    Token::Field,
193    Token::Fn,
194    Token::For,
195    Token::Function,
196    Token::Future,
197    Token::Group,
198    Token::I8,
199    Token::I16,
200    Token::I32,
201    Token::I64,
202    Token::I128,
203    Token::If,
204    Token::Import,
205    Token::In,
206    Token::Inline,
207    Token::Let,
208    Token::Mapping,
209    Token::Network,
210    Token::Private,
211    Token::Program,
212    Token::Public,
213    Token::Record,
214    Token::Return,
215    Token::Scalar,
216    Token::Script,
217    Token::SelfLower,
218    Token::Signature,
219    Token::String,
220    Token::Struct,
221    Token::Transition,
222    Token::True,
223    Token::U8,
224    Token::U16,
225    Token::U32,
226    Token::U64,
227    Token::U128,
228];
229
230impl Token {
231    /// Returns `true` if the `self` token equals a Leo keyword.
232    pub fn is_keyword(&self) -> bool {
233        KEYWORD_TOKENS.contains(self)
234    }
235
236    /// Converts `self` to the corresponding `Symbol` if it `is_keyword`.
237    pub fn keyword_to_symbol(&self) -> Option<Symbol> {
238        Some(match self {
239            Token::Address => sym::address,
240            Token::Aleo => sym::aleo,
241            Token::As => sym::As,
242            Token::Assert => sym::assert,
243            Token::AssertEq => sym::assert_eq,
244            Token::AssertNeq => sym::assert_neq,
245            Token::Block => sym::block,
246            Token::Bool => sym::bool,
247            Token::Const => sym::Const,
248            Token::Constant => sym::constant,
249            Token::Constructor => sym::constructor,
250            Token::Else => sym::Else,
251            Token::False => sym::False,
252            Token::Field => sym::field,
253            Token::For => sym::For,
254            Token::Function => sym::function,
255            Token::Group => sym::group,
256            Token::I8 => sym::i8,
257            Token::I16 => sym::i16,
258            Token::I32 => sym::i32,
259            Token::I64 => sym::i64,
260            Token::I128 => sym::i128,
261            Token::If => sym::If,
262            Token::Import => sym::import,
263            Token::In => sym::In,
264            Token::Inline => sym::inline,
265            Token::Let => sym::Let,
266            Token::Leo => sym::leo,
267            Token::Mapping => sym::mapping,
268            Token::Network => sym::network,
269            Token::Private => sym::private,
270            Token::Program => sym::program,
271            Token::Public => sym::public,
272            Token::Record => sym::record,
273            Token::Return => sym::Return,
274            Token::Scalar => sym::scalar,
275            Token::Script => sym::script,
276            Token::Signature => sym::signature,
277            Token::SelfLower => sym::SelfLower,
278            Token::String => sym::string,
279            Token::Struct => sym::Struct,
280            Token::Transition => sym::transition,
281            Token::True => sym::True,
282            Token::U8 => sym::u8,
283            Token::U16 => sym::u16,
284            Token::U32 => sym::u32,
285            Token::U64 => sym::u64,
286            Token::U128 => sym::u128,
287            _ => return None,
288        })
289    }
290}
291
292impl fmt::Display for Token {
293    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
294        use Token::*;
295        match self {
296            CommentLine(s) => write!(f, "{s}"),
297            CommentBlock(s) => write!(f, "{s}"),
298
299            WhiteSpace => write!(f, "whitespace"),
300
301            True => write!(f, "true"),
302            False => write!(f, "false"),
303            Integer(s) => write!(f, "{s}"),
304            AddressLit(s) => write!(f, "{s}"),
305            StaticString(s) => write!(f, "\"{s}\""),
306
307            Identifier(s) => write!(f, "{s}"),
308
309            Not => write!(f, "!"),
310            And => write!(f, "&&"),
311            AndAssign => write!(f, "&&="),
312            Or => write!(f, "||"),
313            OrAssign => write!(f, "||="),
314            BitAnd => write!(f, "&"),
315            BitAndAssign => write!(f, "&="),
316            BitOr => write!(f, "|"),
317            BitOrAssign => write!(f, "|="),
318            BitXor => write!(f, "^"),
319            BitXorAssign => write!(f, "^="),
320            Eq => write!(f, "=="),
321            NotEq => write!(f, "!="),
322            Lt => write!(f, "<"),
323            LtEq => write!(f, "<="),
324            Gt => write!(f, ">"),
325            GtEq => write!(f, ">="),
326            Add => write!(f, "+"),
327            AddAssign => write!(f, "+="),
328            Sub => write!(f, "-"),
329            SubAssign => write!(f, "-="),
330            Mul => write!(f, "*"),
331            MulAssign => write!(f, "*="),
332            Div => write!(f, "/"),
333            DivAssign => write!(f, "/="),
334            Pow => write!(f, "**"),
335            PowAssign => write!(f, "**="),
336            Rem => write!(f, "%"),
337            RemAssign => write!(f, "%="),
338            Shl => write!(f, "<<"),
339            ShlAssign => write!(f, "<<="),
340            Shr => write!(f, ">>"),
341            ShrAssign => write!(f, ">>="),
342            Assign => write!(f, "="),
343            LeftParen => write!(f, "("),
344            RightParen => write!(f, ")"),
345            LeftSquare => write!(f, "["),
346            RightSquare => write!(f, "]"),
347            LeftCurly => write!(f, "{{"),
348            RightCurly => write!(f, "}}"),
349            Comma => write!(f, ","),
350            Dot => write!(f, "."),
351            DotDot => write!(f, ".."),
352            Semicolon => write!(f, ";"),
353            Colon => write!(f, ":"),
354            DoubleColon => write!(f, "::"),
355            Question => write!(f, "?"),
356            Arrow => write!(f, "->"),
357            BigArrow => write!(f, "=>"),
358            Underscore => write!(f, "_"),
359            At => write!(f, "@"),
360
361            Address => write!(f, "address"),
362            Bool => write!(f, "bool"),
363            Field => write!(f, "field"),
364            Group => write!(f, "group"),
365            I8 => write!(f, "i8"),
366            I16 => write!(f, "i16"),
367            I32 => write!(f, "i32"),
368            I64 => write!(f, "i64"),
369            I128 => write!(f, "i128"),
370            Record => write!(f, "record"),
371            Scalar => write!(f, "scalar"),
372            Signature => write!(f, "signature"),
373            String => write!(f, "string"),
374            Struct => write!(f, "struct"),
375            U8 => write!(f, "u8"),
376            U16 => write!(f, "u16"),
377            U32 => write!(f, "u32"),
378            U64 => write!(f, "u64"),
379            U128 => write!(f, "u128"),
380
381            Aleo => write!(f, "aleo"),
382            As => write!(f, "as"),
383            Assert => write!(f, "assert"),
384            AssertEq => write!(f, "assert_eq"),
385            AssertNeq => write!(f, "assert_neq"),
386            Async => write!(f, "async"),
387            Block => write!(f, "block"),
388            Const => write!(f, "const"),
389            Constant => write!(f, "constant"),
390            Constructor => write!(f, "constructor"),
391            Else => write!(f, "else"),
392            Fn => write!(f, "Fn"),
393            For => write!(f, "for"),
394            Function => write!(f, "function"),
395            Future => write!(f, "Future"),
396            If => write!(f, "if"),
397            Import => write!(f, "import"),
398            In => write!(f, "in"),
399            Inline => write!(f, "inline"),
400            Let => write!(f, "let"),
401            Mapping => write!(f, "mapping"),
402            Network => write!(f, "network"),
403            Private => write!(f, "private"),
404            Program => write!(f, "program"),
405            Public => write!(f, "public"),
406            Return => write!(f, "return"),
407            Script => write!(f, "script"),
408            SelfLower => write!(f, "self"),
409            Transition => write!(f, "transition"),
410
411            Eof => write!(f, "<eof>"),
412            Leo => write!(f, "leo"),
413        }
414    }
415}
416
417/// Describes delimiters of a token sequence.
418#[derive(Copy, Clone)]
419pub enum Delimiter {
420    /// `( ... )`
421    Parenthesis,
422    /// `{ ... }`
423    Brace,
424    /// `[ ... ]`
425    Bracket,
426}
427
428impl Delimiter {
429    /// Returns the open/close tokens that the delimiter corresponds to.
430    pub fn open_close_pair(self) -> (Token, Token) {
431        match self {
432            Self::Parenthesis => (Token::LeftParen, Token::RightParen),
433            Self::Brace => (Token::LeftCurly, Token::RightCurly),
434            Self::Bracket => (Token::LeftSquare, Token::RightSquare),
435        }
436    }
437}