leo_parser/tokenizer/
mod.rs

1// Copyright (C) 2019-2025 Provable Inc.
2// This file is part of the Leo library.
3
4// The Leo library is free software: you can redistribute it and/or modify
5// it under the terms of the GNU General Public License as published by
6// the Free Software Foundation, either version 3 of the License, or
7// (at your option) any later version.
8
9// The Leo library is distributed in the hope that it will be useful,
10// but WITHOUT ANY WARRANTY; without even the implied warranty of
11// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12// GNU General Public License for more details.
13
14// You should have received a copy of the GNU General Public License
15// along with the Leo library. If not, see <https://www.gnu.org/licenses/>.
16
17//! The tokenizer to convert Leo code text into tokens.
18//!
19//! This module contains the [`tokenize()`] function, which breaks down string text into tokens,
20//! optionally separated by whitespace.
21
22pub(crate) mod token;
23
24pub use self::token::KEYWORD_TOKENS;
25pub(crate) use self::token::*;
26
27pub(crate) mod lexer;
28pub(crate) use self::lexer::*;
29
30use leo_errors::Result;
31use leo_span::Span;
32use std::iter;
33
34/// Creates a new vector of spanned tokens from a given file path and source code text.
35pub(crate) fn tokenize(input: &str, start_pos: u32) -> Result<Vec<SpannedToken>> {
36    tokenize_iter(input, start_pos).collect()
37}
38
39/// Yields spanned tokens from the given source code text.
40///
41/// The `lo` byte position determines where spans will start.
42pub(crate) fn tokenize_iter(mut input: &str, mut lo: u32) -> impl '_ + Iterator<Item = Result<SpannedToken>> {
43    iter::from_fn(move || {
44        while !input.is_empty() {
45            let (token_len, token) = match Token::eat(input) {
46                Err(e) => return Some(Err(e)),
47                Ok(t) => t,
48            };
49            input = &input[token_len..];
50
51            let span = Span::new(lo, lo + token_len as u32);
52            lo = span.hi;
53
54            match token {
55                Token::WhiteSpace => continue,
56                _ => return Some(Ok(SpannedToken { token, span })),
57            }
58        }
59
60        None
61    })
62}
63
64#[cfg(test)]
65mod tests {
66    use super::*;
67    use leo_span::{create_session_if_not_set_then, source_map::FileName};
68    use std::fmt::Write;
69
70    #[test]
71    fn test_tokenizer() {
72        create_session_if_not_set_then(|s| {
73            let raw = r#"
74    "test"
75    "test{}test"
76    "test{}"
77    "{}test"
78    "test{"
79    "test}"
80    "test{test"
81    "test}test"
82    "te{{}}"
83    test_ident
84    12345
85    address
86    as
87    assert
88    assert_eq
89    assert_neq
90    async
91    bool
92    const
93    else
94    false
95    field
96    for
97    function
98    Future
99    group
100    i128
101    i64
102    i32
103    i16
104    i8
105    if
106    in
107    inline
108    input
109    let
110    mut
111    private
112    program
113    public
114    return
115    scalar
116    script
117    self
118    signature
119    string
120    struct
121    test
122    transition
123    true
124    u128
125    u64
126    u32
127    u16
128    u8
129    !
130    !=
131    &&
132    (
133    )
134    *
135    **
136    +
137    ,
138    -
139    ->
140    =>
141    _
142    .
143    ..
144    /
145    :
146    ;
147    <
148    <=
149    =
150    ==
151    >
152    >=
153    [
154    ]
155    {{
156    }}
157    ||
158    ?
159    @
160    // test
161    /* test */
162    //"#;
163            let sf = s.source_map.new_source(raw, FileName::Custom("test".into()));
164            let tokens = tokenize(&sf.src, sf.absolute_start).unwrap();
165            let mut output = String::new();
166            for SpannedToken { token, .. } in tokens.iter() {
167                write!(output, "{token} ").expect("failed to write string");
168            }
169
170            assert_eq!(
171                output,
172                r#""test" "test{}test" "test{}" "{}test" "test{" "test}" "test{test" "test}test" "te{{}}" test_ident 12345 address as assert assert_eq assert_neq async bool const else false field for function Future group i128 i64 i32 i16 i8 if in inline input let mut private program public return scalar script self signature string struct test transition true u128 u64 u32 u16 u8 ! != && ( ) * ** + , - -> => _ . .. / : ; < <= = == > >= [ ] { { } } || ? @ // test
173 /* test */ // "#
174            );
175        });
176    }
177
178    #[test]
179    fn test_spans() {
180        create_session_if_not_set_then(|s| {
181            let raw = r#"
182ppp            test
183            // test
184            test
185            /* test */
186            test
187            /* test
188            test */
189            test
190            "#;
191
192            let sm = &s.source_map;
193            let sf = sm.new_source(raw, FileName::Custom("test".into()));
194            let tokens = tokenize(&sf.src, sf.absolute_start).unwrap();
195            let mut line_indices = vec![0];
196            for (i, c) in raw.chars().enumerate() {
197                if c == '\n' {
198                    line_indices.push(i + 1);
199                }
200            }
201            for token in tokens.iter() {
202                assert_eq!(token.token.to_string(), sm.contents_of_span(token.span).unwrap());
203            }
204        })
205    }
206}