leo_parser/tokenizer/
token.rs

1// Copyright (C) 2019-2025 Provable Inc.
2// This file is part of the Leo library.
3
4// The Leo library is free software: you can redistribute it and/or modify
5// it under the terms of the GNU General Public License as published by
6// the Free Software Foundation, either version 3 of the License, or
7// (at your option) any later version.
8
9// The Leo library is distributed in the hope that it will be useful,
10// but WITHOUT ANY WARRANTY; without even the implied warranty of
11// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12// GNU General Public License for more details.
13
14// You should have received a copy of the GNU General Public License
15// along with the Leo library. If not, see <https://www.gnu.org/licenses/>.
16
17use std::fmt;
18
19use serde::{Deserialize, Serialize};
20
21use leo_span::{Symbol, sym};
22
23/// Represents all valid Leo syntax tokens.
24///
25/// The notion of 'token' here is a bit more general than in the ABNF grammar:
26/// since it includes comments and whitespace,
27/// it corresponds to the notion of 'lexeme' in the ABNF grammar.
28/// There are also a few other differences, noted in comments below.
29#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
30pub enum Token {
31    // Comments
32    CommentLine(String),  // the string includes the starting '//' and the ending line feed
33    CommentBlock(String), // the string includes the starting '/*' and the ending '*/'
34
35    // Whitespace (we do not distinguish among different kinds here)
36    WhiteSpace,
37
38    // Literals (= atomic literals and numerals in the ABNF grammar)
39    // The string in Integer(String) consists of digits
40    // The string in AddressLit(String) has the form `aleo1...`.
41    True,
42    False,
43    Integer(String), // = numeral (including tuple index) in the ABNF grammar
44    AddressLit(String),
45    StaticString(String),
46    // The numeric literals in the ABNF grammar, which consist of numerals followed by types,
47    // are represented not as single tokens here,
48    // but as two separate tokens (one for the numeral and one for the type),
49    // enforcing, during parsing, the absence of whitespace or comments between those two tokens
50    // (see the parse_primary_expression function).
51
52    // Identifiers
53    Identifier(Symbol),
54
55    // Symbols
56    Not,
57    And,
58    AndAssign,
59    Or,
60    OrAssign,
61    BitAnd,
62    BitAndAssign,
63    BitOr,
64    BitOrAssign,
65    BitXor,
66    BitXorAssign,
67    Eq,
68    NotEq,
69    Lt,
70    LtEq,
71    Gt,
72    GtEq,
73    Add,
74    AddAssign,
75    Sub,
76    SubAssign,
77    Mul,
78    MulAssign,
79    Div,
80    DivAssign,
81    Pow,
82    PowAssign,
83    Rem,
84    RemAssign,
85    Shl,
86    ShlAssign,
87    Shr,
88    ShrAssign,
89    Assign,
90    LeftParen,
91    RightParen,
92    LeftSquare,
93    RightSquare,
94    LeftCurly,
95    RightCurly,
96    Comma,
97    Dot,
98    DotDot,
99    Semicolon,
100    Colon,
101    DoubleColon,
102    Question,
103    Arrow,
104    BigArrow,
105    Underscore,
106    At, // @ is not a symbol token in the ABNF grammar (see explanation about annotations below)
107
108    // The ABNF grammar has annotations as tokens,
109    // defined as @ immediately followed by an identifier.
110    // Here instead we regard the @ sign alone as a token (see `At` above),
111    // and we lex it separately from the identifier that is supposed to follow it in an annotation.
112    // When parsing annotations, we check that there is no whitespace or comments
113    // between the @ and the identifier, thus eventually complying to the ABNF grammar.
114    // See the parse_annotation function.
115
116    // Type keywords
117    Address,
118    Bool,
119    Field,
120    Group,
121    I8,
122    I16,
123    I32,
124    I64,
125    I128,
126    Record,
127    Scalar,
128    Signature,
129    String,
130    Struct,
131    U8,
132    U16,
133    U32,
134    U64,
135    U128,
136
137    // Other keywords
138    Aleo,
139    As,
140    Assert,
141    AssertEq,
142    AssertNeq,
143    Async,
144    Block,
145    Const,
146    Constant,
147    Constructor,
148    Else,
149    Fn,
150    For,
151    Function,
152    Future,
153    If,
154    Import,
155    In,
156    Inline,
157    Let,
158    Mapping,
159    Network,
160    Private,
161    Program,
162    Public,
163    Return,
164    Script,
165    SelfLower,
166    Transition,
167
168    // Meta tokens
169    Eof, // used to signal end-of-file, not an actual token of the language
170    Leo, // only used for error messages, not an actual keyword
171}
172
173macro_rules! keyword_map {
174    ($($token:ident => $symbol:ident),* $(,)?) => {
175        pub const KEYWORD_TOKENS: &[Token] = &[
176            $(Token::$token),*
177        ];
178
179        impl Token {
180            pub fn is_keyword(&self) -> bool {
181                matches!(self, $(Token::$token)|*)
182            }
183
184            pub fn keyword_to_symbol(&self) -> Option<Symbol> {
185                match self {
186                    $(Token::$token => Some(sym::$symbol),)*
187                    _ => None,
188                }
189            }
190
191            pub fn symbol_to_keyword(symbol: Symbol) -> Option<Self> {
192                match symbol {
193                    $(sym::$symbol => Some(Token::$token),)*
194                    _ => None,
195                }
196            }
197        }
198    }
199}
200
201// Represents all valid Leo keyword tokens.
202// This also includes the boolean literals `true` and `false`,
203// unlike the ABNF grammar, which classifies them as literals and not keywords.
204// But for the purposes of our lexer implementation,
205// it is fine to include the boolean literals in this list.
206keyword_map! {
207    Address    => address,
208    Aleo       => aleo,
209    As         => As,
210    Assert     => assert,
211    AssertEq   => assert_eq,
212    AssertNeq  => assert_neq,
213    Async      => Async,   // if you need it
214    Block      => block,
215    Bool       => bool,
216    Const      => Const,
217    Constant   => constant,
218    Constructor => constructor,
219    Else       => Else,
220    False      => False,
221    Field      => field,
222    Fn         => Fn,
223    For        => For,
224    Function   => function,
225    Future     => Future,
226    Group      => group,
227    I8         => i8,
228    I16        => i16,
229    I32        => i32,
230    I64        => i64,
231    I128       => i128,
232    If         => If,
233    Import     => import,
234    In         => In,
235    Inline     => inline,
236    Let        => Let,
237    Leo        => leo,
238    Mapping    => mapping,
239    Network    => network,
240    Private    => private,
241    Program    => program,
242    Public     => public,
243    Record     => record,
244    Return     => Return,
245    Scalar     => scalar,
246    Script     => script,
247    SelfLower  => SelfLower,
248    Signature  => signature,
249    String     => string,
250    Struct     => Struct,
251    Transition => transition,
252    True       => True,
253    U8         => u8,
254    U16        => u16,
255    U32        => u32,
256    U64        => u64,
257    U128       => u128,
258}
259
260impl fmt::Display for Token {
261    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
262        use Token::*;
263        match self {
264            CommentLine(s) => write!(f, "{s}"),
265            CommentBlock(s) => write!(f, "{s}"),
266
267            WhiteSpace => write!(f, "whitespace"),
268
269            True => write!(f, "true"),
270            False => write!(f, "false"),
271            Integer(s) => write!(f, "{s}"),
272            AddressLit(s) => write!(f, "{s}"),
273            StaticString(s) => write!(f, "\"{s}\""),
274
275            Identifier(s) => write!(f, "{s}"),
276
277            Not => write!(f, "!"),
278            And => write!(f, "&&"),
279            AndAssign => write!(f, "&&="),
280            Or => write!(f, "||"),
281            OrAssign => write!(f, "||="),
282            BitAnd => write!(f, "&"),
283            BitAndAssign => write!(f, "&="),
284            BitOr => write!(f, "|"),
285            BitOrAssign => write!(f, "|="),
286            BitXor => write!(f, "^"),
287            BitXorAssign => write!(f, "^="),
288            Eq => write!(f, "=="),
289            NotEq => write!(f, "!="),
290            Lt => write!(f, "<"),
291            LtEq => write!(f, "<="),
292            Gt => write!(f, ">"),
293            GtEq => write!(f, ">="),
294            Add => write!(f, "+"),
295            AddAssign => write!(f, "+="),
296            Sub => write!(f, "-"),
297            SubAssign => write!(f, "-="),
298            Mul => write!(f, "*"),
299            MulAssign => write!(f, "*="),
300            Div => write!(f, "/"),
301            DivAssign => write!(f, "/="),
302            Pow => write!(f, "**"),
303            PowAssign => write!(f, "**="),
304            Rem => write!(f, "%"),
305            RemAssign => write!(f, "%="),
306            Shl => write!(f, "<<"),
307            ShlAssign => write!(f, "<<="),
308            Shr => write!(f, ">>"),
309            ShrAssign => write!(f, ">>="),
310            Assign => write!(f, "="),
311            LeftParen => write!(f, "("),
312            RightParen => write!(f, ")"),
313            LeftSquare => write!(f, "["),
314            RightSquare => write!(f, "]"),
315            LeftCurly => write!(f, "{{"),
316            RightCurly => write!(f, "}}"),
317            Comma => write!(f, ","),
318            Dot => write!(f, "."),
319            DotDot => write!(f, ".."),
320            Semicolon => write!(f, ";"),
321            Colon => write!(f, ":"),
322            DoubleColon => write!(f, "::"),
323            Question => write!(f, "?"),
324            Arrow => write!(f, "->"),
325            BigArrow => write!(f, "=>"),
326            Underscore => write!(f, "_"),
327            At => write!(f, "@"),
328
329            Address => write!(f, "address"),
330            Bool => write!(f, "bool"),
331            Field => write!(f, "field"),
332            Group => write!(f, "group"),
333            I8 => write!(f, "i8"),
334            I16 => write!(f, "i16"),
335            I32 => write!(f, "i32"),
336            I64 => write!(f, "i64"),
337            I128 => write!(f, "i128"),
338            Record => write!(f, "record"),
339            Scalar => write!(f, "scalar"),
340            Signature => write!(f, "signature"),
341            String => write!(f, "string"),
342            Struct => write!(f, "struct"),
343            U8 => write!(f, "u8"),
344            U16 => write!(f, "u16"),
345            U32 => write!(f, "u32"),
346            U64 => write!(f, "u64"),
347            U128 => write!(f, "u128"),
348
349            Aleo => write!(f, "aleo"),
350            As => write!(f, "as"),
351            Assert => write!(f, "assert"),
352            AssertEq => write!(f, "assert_eq"),
353            AssertNeq => write!(f, "assert_neq"),
354            Async => write!(f, "async"),
355            Block => write!(f, "block"),
356            Const => write!(f, "const"),
357            Constant => write!(f, "constant"),
358            Constructor => write!(f, "constructor"),
359            Else => write!(f, "else"),
360            Fn => write!(f, "Fn"),
361            For => write!(f, "for"),
362            Function => write!(f, "function"),
363            Future => write!(f, "Future"),
364            If => write!(f, "if"),
365            Import => write!(f, "import"),
366            In => write!(f, "in"),
367            Inline => write!(f, "inline"),
368            Let => write!(f, "let"),
369            Mapping => write!(f, "mapping"),
370            Network => write!(f, "network"),
371            Private => write!(f, "private"),
372            Program => write!(f, "program"),
373            Public => write!(f, "public"),
374            Return => write!(f, "return"),
375            Script => write!(f, "script"),
376            SelfLower => write!(f, "self"),
377            Transition => write!(f, "transition"),
378
379            Eof => write!(f, "<eof>"),
380            Leo => write!(f, "leo"),
381        }
382    }
383}
384
385/// Describes delimiters of a token sequence.
386#[derive(Copy, Clone)]
387pub enum Delimiter {
388    /// `( ... )`
389    Parenthesis,
390    /// `{ ... }`
391    Brace,
392    /// `[ ... ]`
393    Bracket,
394}
395
396impl Delimiter {
397    /// Returns the open/close tokens that the delimiter corresponds to.
398    pub fn open_close_pair(self) -> (Token, Token) {
399        match self {
400            Self::Parenthesis => (Token::LeftParen, Token::RightParen),
401            Self::Brace => (Token::LeftCurly, Token::RightCurly),
402            Self::Bracket => (Token::LeftSquare, Token::RightSquare),
403        }
404    }
405}