1use leo_errors::{Handler, ParserError};
18use logos::Logos;
19use std::sync::LazyLock;
20
21#[derive(Clone, Copy, Debug, PartialEq, Eq)]
22pub enum IdVariants {
23 Identifier,
24 Intrinsic,
25 Path,
26 ProgramId,
27 Locator,
28}
29
30fn id_variant(lex: &mut logos::Lexer<Token>) -> IdVariants {
31 static REGEX_LOCATOR: LazyLock<regex::Regex> =
33 LazyLock::new(|| regex::Regex::new(r"^\.aleo/[a-zA-Z][a-zA-Z0-9_]*").unwrap());
34 static REGEX_PROGRAM_ID: LazyLock<regex::Regex> = LazyLock::new(|| regex::Regex::new(r"^\.aleo\b").unwrap());
35 static REGEX_PATH: LazyLock<regex::Regex> =
36 LazyLock::new(|| regex::Regex::new(r"^(?:::[a-zA-Z][a-zA-Z0-9_]*)+").unwrap());
37
38 if let Some(found) = REGEX_LOCATOR.find(lex.remainder()) {
39 lex.bump(found.len());
40 IdVariants::Locator
41 } else if let Some(found) = REGEX_PROGRAM_ID.find(lex.remainder()) {
42 lex.bump(found.len());
43 IdVariants::ProgramId
44 } else if let Some(found) = REGEX_PATH.find(lex.remainder()) {
45 lex.bump(found.len());
46 IdVariants::Path
47 } else if lex.remainder().starts_with("_") {
48 IdVariants::Intrinsic
49 } else {
50 IdVariants::Identifier
51 }
52}
53
54fn comment_block(lex: &mut logos::Lexer<Token>) -> bool {
55 let mut last_asterisk = false;
56 for (index, c) in lex.remainder().char_indices() {
57 if c == '*' {
58 last_asterisk = true;
59 } else if c == '/' && last_asterisk {
60 lex.bump(index + 1);
61 return true;
62 } else if matches!(c,
63 '\u{202A}'..='\u{202E}' |
64 '\u{2066}'..='\u{2069}'
65 ) {
66 lex.bump(index);
69 return true;
70 } else {
71 last_asterisk = false;
72 }
73 }
74 false
75}
76
77#[derive(Clone, Copy, Debug, PartialEq, Eq, Logos)]
78pub enum Token {
79 #[regex(r"[ \t\f]+")]
80 Whitespace,
81
82 #[regex(r"\r?\n")]
83 Linebreak,
84
85 #[regex(r"//[^\r\n\u{202A}-\u{202E}\u{2066}-\u{2069}]*")]
87 CommentLine,
88
89 #[token(r"/*", comment_block)]
92 CommentBlock,
93
94 #[regex(r"_[a-zA-Z][a-zA-Z0-9_]*", |_| IdVariants::Intrinsic)]
110 #[regex(r"[a-zA-Z][a-zA-Z0-9_]*", id_variant)]
111 #[regex(r"group::[a-zA-Z][a-zA-Z0-9_]*", |_| IdVariants::Path)]
113 #[regex(r"signature::[a-zA-Z][a-zA-Z0-9_]*", |_| IdVariants::Path)]
114 #[regex(r"Future::[a-zA-Z][a-zA-Z0-9_]*", |_| IdVariants::Path)]
115 IdVariants(IdVariants),
116
117 #[regex(r"aleo1[a-z0-9]*")]
120 AddressLiteral,
121
122 #[regex(r"0x[0-9A-Z_]+")]
125 #[regex(r"0o[0-9A-Z_]+")]
126 #[regex(r"0b[0-9A-Z_]+")]
127 #[regex(r"[0-9][0-9A-Z_]*")]
128 Integer,
129
130 #[regex(r#""[^"]*""#)]
131 StaticString,
132
133 #[token("=")]
135 Assign,
136 #[token("!")]
137 Not,
138 #[token("&&")]
139 And,
140 #[token("&&=")]
141 AndAssign,
142 #[token("||")]
143 Or,
144 #[token("||=")]
145 OrAssign,
146 #[token("&")]
147 BitAnd,
148 #[token("&=")]
149 BitAndAssign,
150 #[token("|")]
151 BitOr,
152 #[token("|=")]
153 BitOrAssign,
154 #[token("^")]
155 BitXor,
156 #[token("^=")]
157 BitXorAssign,
158 #[token("==")]
159 Eq,
160 #[token("!=")]
161 NotEq,
162 #[token("<")]
163 Lt,
164 #[token("<=")]
165 LtEq,
166 #[token(">")]
167 Gt,
168 #[token(">=")]
169 GtEq,
170 #[token("+")]
171 Add,
172 #[token("+=")]
173 AddAssign,
174 #[token("-")]
175 Sub,
176 #[token("-=")]
177 SubAssign,
178 #[token("*")]
179 Mul,
180 #[token("*=")]
181 MulAssign,
182 #[token("/")]
183 Div,
184 #[token("/=")]
185 DivAssign,
186 #[token("**")]
187 Pow,
188 #[token("**=")]
189 PowAssign,
190 #[token("%")]
191 Rem,
192 #[token("%=")]
193 RemAssign,
194 #[token("<<")]
195 Shl,
196 #[token("<<=")]
197 ShlAssign,
198 #[token(">>")]
199 Shr,
200 #[token(">>=")]
201 ShrAssign,
202 #[token("(")]
203 LeftParen,
204 #[token(")")]
205 RightParen,
206 #[token("[")]
207 LeftSquare,
208 #[token("]")]
209 RightSquare,
210 #[token("{")]
211 LeftCurly,
212 #[token("}")]
213 RightCurly,
214 #[token(",")]
215 Comma,
216 #[token(".")]
217 Dot,
218 #[token("..")]
219 DotDot,
220 #[token(";")]
221 Semicolon,
222 #[token(":")]
223 Colon,
224 #[token("::")]
225 DoubleColon,
226 #[token("?")]
227 Question,
228 #[token("->")]
229 Arrow,
230 #[token("=>")]
231 BigArrow,
232 #[token("_")]
233 Underscore,
234 #[token("@")]
235 At,
236
237 #[token("true")]
239 True,
240 #[token("false")]
241 False,
242 #[token("none")]
243 None,
244 #[token("address")]
245 Address,
246 #[token("bool")]
247 Bool,
248 #[token("field")]
249 Field,
250 #[token("group")]
251 Group,
252 #[token("i8")]
253 I8,
254 #[token("i16")]
255 I16,
256 #[token("i32")]
257 I32,
258 #[token("i64")]
259 I64,
260 #[token("i128")]
261 I128,
262 #[token("record")]
263 Record,
264 #[token("scalar")]
265 Scalar,
266 #[token("signature")]
267 Signature,
268 #[token("string")]
269 String,
270 #[token("struct")]
271 Struct,
272 #[token("u8")]
273 U8,
274 #[token("u16")]
275 U16,
276 #[token("u32")]
277 U32,
278 #[token("u64")]
279 U64,
280 #[token("u128")]
281 U128,
282
283 #[token("aleo")]
284 Aleo,
285 #[token("as")]
286 As,
287 #[token("assert")]
288 Assert,
289 #[token("assert_eq")]
290 AssertEq,
291 #[token("assert_neq")]
292 AssertNeq,
293 #[token("async")]
294 Async,
295 #[token("block")]
296 Block,
297 #[token("const")]
298 Const,
299 #[token("constant")]
300 Constant,
301 #[token("constructor")]
302 Constructor,
303 #[token("else")]
304 Else,
305 #[token("Fn")]
306 Fn,
307 #[token("for")]
308 For,
309 #[token("function")]
310 Function,
311 #[token("Future")]
312 Future,
313 #[token("if")]
314 If,
315 #[token("import")]
316 Import,
317 #[token("in")]
318 In,
319 #[token("inline")]
320 Inline,
321 #[token("let")]
322 Let,
323 #[token("mapping")]
324 Mapping,
325 #[token("storage")]
326 Storage,
327 #[token("network")]
328 Network,
329 #[token("private")]
330 Private,
331 #[token("program")]
332 Program,
333 #[token("public")]
334 Public,
335 #[token("return")]
336 Return,
337 #[token("script")]
338 Script,
339 #[token("self")]
340 SelfLower,
341 #[token("transition")]
342 Transition,
343
344 #[regex(r"[\u{202A}-\u{202E}\u{2066}-\u{2069}]")]
347 Bidi,
348
349 Never,
352}
353
354impl Token {
355 pub fn str_user(token_s: &str) -> Option<&'static str> {
359 let v = match token_s {
360 "Identifier" => "an identifier",
366 "AddressLiteral" => "an address literal",
367 "ProgramId" => "a program id",
368
369 "Integer" => "an integer literal",
370
371 "StaticString" => "a static string",
372
373 "Assign" => "'='",
375 "Not" => "'!'",
376 "And" => "'&&'",
377 "AndAssign" => "'&&='",
378 "Or" => "'||'",
379 "OrAssign" => "'||='",
380 "BitAnd" => "'&'",
381 "BitAndAssign" => "'&='",
382 "BitOr" => "'|'",
383 "BitOrAssign" => "'|='",
384 "BitXor" => "'^'",
385 "BitXorAssign" => "'&='",
386 "Eq" => "'=='",
387 "NotEq" => "'!='",
388 "Lt" => "'<'",
389 "LtEq" => "'<='",
390 "Gt" => "'>'",
391 "GtEq" => "'>='",
392 "Add" => "'+'",
393 "AddAssign" => "'+='",
394 "Sub" => "'-'",
395 "SubAssign" => "'-='",
396 "Mul" => "'*'",
397 "MulAssign" => "'*='",
398 "Div" => "'/'",
399 "DivAssign" => "'/='",
400 "Pow" => "'**'",
401 "PowAssign" => "'**='",
402 "Rem" => "'%'",
403 "RemAssign" => "'%='",
404 "Shl" => "'<<'",
405 "ShlAssign" => "'<<='",
406 "Shr" => "'>>'",
407 "ShrAssign" => "'>>='",
408 "LeftParen" => "'('",
409 "RightParen" => "')'",
410 "LeftSquare" => "'['",
411 "RightSquare" => "']'",
412 "LeftCurly" => "'{'",
413 "RightCurly" => "'}'",
414 "Comma" => "','",
415 "Dot" => "'.'",
416 "DotDot" => "'..'",
417 "Semicolon" => "';'",
418 "Colon" => "':'",
419 "DoubleColon" => "'::'",
420 "Question" => "'?'",
421 "Arrow" => "'->'",
422 "BigArrow" => "'=>'",
423 "Underscore" => "'_'",
424 "At" => "'@'",
425
426 "True" => "'true'",
428 "False" => "'false'",
429 "Address" => "'address",
430 "Bool" => "'bool'",
431 "Field" => "'field'",
432 "Group" => "'group'",
433 "I8" => "'i8'",
434 "I16" => "'i16'",
435 "I32" => "'i32'",
436 "I64" => "'i64'",
437 "I128" => "'i128'",
438 "Record" => "'record'",
439 "Scalar" => "'scalar'",
440 "Signature" => "'signature'",
441 "String" => "a string",
442 "Struct" => "'struct'",
443 "U8" => "'u8'",
444 "U16" => "'u16'",
445 "U32" => "'u32'",
446 "U64" => "'u64'",
447 "U128" => "'u128'",
448
449 "Aleo" => "'aleo'",
450 "As" => "'as'",
451 "Assert" => "'assert'",
452 "AssertEq" => "'assert_eq'",
453 "AssertNeq" => "'assert_neq'",
454 "Async" => "'async'",
455 "Block" => "'block'",
456 "Const" => "'const'",
457 "Constant" => "'constant'",
458 "Constructor" => "'constructor'",
459 "Else" => "'else'",
460 "Fn" => "'Fn'",
461 "For" => "'for'",
462 "Function" => "'function'",
463 "Future" => "'future'",
464 "If" => "'if'",
465 "Import" => "'import'",
466 "In" => "'in'",
467 "Inline" => "'inline'",
468 "Let" => "'let'",
469 "Mapping" => "'mapping'",
470 "Storage" => "'storage'",
471 "Network" => "'network'",
472 "Private" => "'private'",
473 "Program" => "'program'",
474 "Public" => "'public'",
475 "Return" => "'return'",
476 "Script" => "'script'",
477 "SelfLower" => "'self'",
478 "Transition" => "'transition'",
479
480 "Never" => return None,
481
482 _ => return None,
483 };
484 Some(v)
485 }
486}
487
488#[derive(Clone, Debug, PartialEq, Eq)]
490pub struct LalrToken<'a> {
491 pub token: Token,
492 pub text: &'a str,
493 pub span: leo_span::Span,
494}
495
496pub struct Lexer<'a> {
498 logos_lexer: logos::Lexer<'a, Token>,
499 start_pos: u32,
500 handler: Handler,
501}
502
503impl<'a> Lexer<'a> {
504 pub fn new(text: &'a str, start_pos: u32, handler: Handler) -> Self {
505 Self { logos_lexer: Token::lexer(text), start_pos, handler }
506 }
507}
508
509impl<'a> Iterator for Lexer<'a> {
510 type Item = (usize, LalrToken<'a>, usize);
511
512 fn next(&mut self) -> Option<Self::Item> {
513 let next = self.logos_lexer.next()?;
514 let logos_span = self.logos_lexer.span();
515 let span =
516 leo_span::Span { lo: self.start_pos + logos_span.start as u32, hi: self.start_pos + logos_span.end as u32 };
517
518 let text = self.logos_lexer.slice();
519
520 let Ok(token) = next else {
521 self.handler.emit_err(ParserError::could_not_lex_span(text.trim(), span));
522 return None;
523 };
524
525 if matches!(token, Token::Bidi) {
526 self.handler.emit_err(ParserError::lexer_bidi_override_span(span));
527 return None;
528 } else if matches!(token, Token::Integer) {
529 let (s, radix) = if let Some(s) = text.strip_prefix("0x") {
530 (s, 16)
531 } else if let Some(s) = text.strip_prefix("0o") {
532 (s, 8)
533 } else if let Some(s) = text.strip_prefix("0b") {
534 (s, 2)
535 } else {
536 (text, 10)
537 };
538
539 if let Some(c) = s.chars().find(|&c| c != '_' && !c.is_digit(radix)) {
540 self.handler.emit_err(ParserError::wrong_digit_for_radix_span(c, radix, text, span));
541 }
542 }
543
544 let lalr_token = LalrToken { token, text, span };
545
546 Some((span.lo as usize, lalr_token, span.hi as usize))
547 }
548}