leo_parser/tokenizer/
mod.rs

1// Copyright (C) 2019-2025 Provable Inc.
2// This file is part of the Leo library.
3
4// The Leo library is free software: you can redistribute it and/or modify
5// it under the terms of the GNU General Public License as published by
6// the Free Software Foundation, either version 3 of the License, or
7// (at your option) any later version.
8
9// The Leo library is distributed in the hope that it will be useful,
10// but WITHOUT ANY WARRANTY; without even the implied warranty of
11// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12// GNU General Public License for more details.
13
14// You should have received a copy of the GNU General Public License
15// along with the Leo library. If not, see <https://www.gnu.org/licenses/>.
16
17//! The tokenizer to convert Leo code text into tokens.
18//!
19//! This module contains the [`tokenize()`] function, which breaks down string text into tokens,
20//! optionally separated by whitespace.
21
22pub(crate) mod token;
23
24pub use self::token::KEYWORD_TOKENS;
25pub(crate) use self::token::*;
26
27pub(crate) mod lexer;
28pub(crate) use self::lexer::*;
29
30use leo_errors::Result;
31use leo_span::Span;
32use std::iter;
33
34/// Creates a new vector of spanned tokens from a given file path and source code text.
35pub(crate) fn tokenize(input: &str, start_pos: u32) -> Result<Vec<SpannedToken>> {
36    tokenize_iter(input, start_pos).collect()
37}
38
39/// Yields spanned tokens from the given source code text.
40///
41/// The `lo` byte position determines where spans will start.
42pub(crate) fn tokenize_iter(mut input: &str, mut lo: u32) -> impl '_ + Iterator<Item = Result<SpannedToken>> {
43    iter::from_fn(move || {
44        while !input.is_empty() {
45            let (token_len, token) = match Token::eat(input) {
46                Err(e) => return Some(Err(e)),
47                Ok(t) => t,
48            };
49            input = &input[token_len..];
50
51            let span = Span::new(lo, lo + token_len as u32);
52            lo = span.hi;
53
54            match token {
55                Token::WhiteSpace => continue,
56                _ => return Some(Ok(SpannedToken { token, span })),
57            }
58        }
59
60        None
61    })
62}
63
64#[cfg(test)]
65mod tests {
66    use super::*;
67    use leo_span::{create_session_if_not_set_then, source_map::FileName};
68    use std::fmt::Write;
69
70    #[test]
71    fn test_tokenizer() {
72        create_session_if_not_set_then(|s| {
73            let raw = r#"
74    "test"
75    "test{}test"
76    "test{}"
77    "{}test"
78    "test{"
79    "test}"
80    "test{test"
81    "test}test"
82    "te{{}}"
83    test_ident
84    12345
85    address
86    as
87    assert
88    assert_eq
89    assert_neq
90    async
91    bool
92    const
93    constructor
94    else
95    false
96    field
97    for
98    function
99    Future
100    group
101    i128
102    i64
103    i32
104    i16
105    i8
106    if
107    in
108    inline
109    input
110    let
111    mut
112    private
113    program
114    public
115    return
116    scalar
117    script
118    self
119    signature
120    string
121    struct
122    test
123    transition
124    true
125    u128
126    u64
127    u32
128    u16
129    u8
130    !
131    !=
132    &&
133    (
134    )
135    *
136    **
137    +
138    ,
139    -
140    ->
141    =>
142    _
143    .
144    ..
145    /
146    :
147    ;
148    <
149    <=
150    =
151    ==
152    >
153    >=
154    [
155    ]
156    {{
157    }}
158    ||
159    ?
160    @
161    // test
162    /* test */
163    //"#;
164            let sf = s.source_map.new_source(raw, FileName::Custom("test".into()));
165            let tokens = tokenize(&sf.src, sf.absolute_start).unwrap();
166            let mut output = String::new();
167            for SpannedToken { token, .. } in tokens.iter() {
168                write!(output, "{token} ").expect("failed to write string");
169            }
170
171            assert_eq!(
172                output,
173                r#""test" "test{}test" "test{}" "{}test" "test{" "test}" "test{test" "test}test" "te{{}}" test_ident 12345 address as assert assert_eq assert_neq async bool const constructor else false field for function Future group i128 i64 i32 i16 i8 if in inline input let mut private program public return scalar script self signature string struct test transition true u128 u64 u32 u16 u8 ! != && ( ) * ** + , - -> => _ . .. / : ; < <= = == > >= [ ] { { } } || ? @ // test
174 /* test */ // "#
175            );
176        });
177    }
178
179    #[test]
180    fn test_spans() {
181        create_session_if_not_set_then(|s| {
182            let raw = r#"
183ppp            test
184            // test
185            test
186            /* test */
187            test
188            /* test
189            test */
190            test
191            "#;
192
193            let sm = &s.source_map;
194            let sf = sm.new_source(raw, FileName::Custom("test".into()));
195            let tokens = tokenize(&sf.src, sf.absolute_start).unwrap();
196            let mut line_indices = vec![0];
197            for (i, c) in raw.chars().enumerate() {
198                if c == '\n' {
199                    line_indices.push(i + 1);
200                }
201            }
202            for token in tokens.iter() {
203                assert_eq!(token.token.to_string(), sm.contents_of_span(token.span).unwrap());
204            }
205        })
206    }
207}