fork download
  1. import re
  2.  
  3. # Token specification
  4. token_specification = [
  5. ('KEYWORD', r'\b(if|else|while|int|float|return)\b'),
  6. ('IDENTIFIER', r'\b[a-zA-Z_][a-zA-Z0-9_]*\b'),
  7. ('NUMBER', r'\b\d+(\.\d+)?\b'),
  8. ('OPERATOR', r'[+\-*/=]'),
  9. ('SEPARATOR', r'[(),;]'),
  10. ('WHITESPACE', r'\s+'),
  11. ('MISMATCH', r'.')
  12. ]
  13.  
  14. # Compile regex
  15. token_regex = '|'.join(f'(?P<{name}>{pattern})' for name, pattern in token_specification)
  16.  
  17. def lexical_analyzer(code):
  18. tokens = []
  19. for match in re.finditer(token_regex, code):
  20. token_type = match.lastgroup
  21. token_value = match.group()
  22.  
  23. if token_type == 'WHITESPACE':
  24. continue
  25. elif token_type == 'MISMATCH':
  26. raise RuntimeError(f'Unexpected character: {token_value}')
  27. else:
  28. tokens.append((token_type, token_value))
  29. return tokens
  30.  
  31. # Example input
  32. source_code = "int sum = a + 10;"
  33.  
  34. # Run lexer
  35. result = lexical_analyzer(source_code)
  36.  
  37. # Print tokens
  38. for token in result:
  39. print(token)
  40.  
Success #stdin #stdout #stderr 0.03s 6932KB
stdin
Standard input is empty
stdout
Standard output is empty
stderr
ERROR: /home/f1h0tC/prog:39:16: Syntax error: Unexpected end of file
ERROR: '$runtoplevel'/0: Undefined procedure: program/0
   Exception: (3) program ? EOF: exit