import re
# Token specification
token_specification = [
('KEYWORD', r'\b(if|else|while|int|float|return)\b'),
('IDENTIFIER', r'\b[a-zA-Z_][a-zA-Z0-9_]*\b'),
('NUMBER', r'\b\d+(\.\d+)?\b'),
('OPERATOR', r'[+\-*/=]'),
('SEPARATOR', r'[(),;]'),
('WHITESPACE', r'\s+'),
('MISMATCH', r'.')
]
# Compile regex
token_regex = '|'.join(f'(?P<{name}>{pattern})' for name, pattern in token_specification)
def lexical_analyzer(code):
tokens = []
for match in re.finditer(token_regex, code):
token_type = match.lastgroup
token_value = match.group()
if token_type == 'WHITESPACE':
continue
elif token_type == 'MISMATCH':
raise RuntimeError(f'Unexpected character: {token_value}')
else:
tokens.append((token_type, token_value))
return tokens
# Example input
source_code = "int sum = a + 10;"
# Run lexer
result = lexical_analyzer(source_code)
# Print tokens
for token in result:
print(token)
aW1wb3J0IHJlCgojIFRva2VuIHNwZWNpZmljYXRpb24KdG9rZW5fc3BlY2lmaWNhdGlvbiA9IFsKICAgICgnS0VZV09SRCcsICAgIHInXGIoaWZ8ZWxzZXx3aGlsZXxpbnR8ZmxvYXR8cmV0dXJuKVxiJyksCiAgICAoJ0lERU5USUZJRVInLCByJ1xiW2EtekEtWl9dW2EtekEtWjAtOV9dKlxiJyksCiAgICAoJ05VTUJFUicsICAgICByJ1xiXGQrKFwuXGQrKT9cYicpLAogICAgKCdPUEVSQVRPUicsICAgcidbK1wtKi89XScpLAogICAgKCdTRVBBUkFUT1InLCAgcidbKCksO10nKSwKICAgICgnV0hJVEVTUEFDRScsIHInXHMrJyksCiAgICAoJ01JU01BVENIJywgICByJy4nKQpdCgojIENvbXBpbGUgcmVnZXgKdG9rZW5fcmVnZXggPSAnfCcuam9pbihmJyg/UDx7bmFtZX0+e3BhdHRlcm59KScgZm9yIG5hbWUsIHBhdHRlcm4gaW4gdG9rZW5fc3BlY2lmaWNhdGlvbikKCmRlZiBsZXhpY2FsX2FuYWx5emVyKGNvZGUpOgogICAgdG9rZW5zID0gW10KICAgIGZvciBtYXRjaCBpbiByZS5maW5kaXRlcih0b2tlbl9yZWdleCwgY29kZSk6CiAgICAgICAgdG9rZW5fdHlwZSA9IG1hdGNoLmxhc3Rncm91cAogICAgICAgIHRva2VuX3ZhbHVlID0gbWF0Y2guZ3JvdXAoKQoKICAgICAgICBpZiB0b2tlbl90eXBlID09ICdXSElURVNQQUNFJzoKICAgICAgICAgICAgY29udGludWUKICAgICAgICBlbGlmIHRva2VuX3R5cGUgPT0gJ01JU01BVENIJzoKICAgICAgICAgICAgcmFpc2UgUnVudGltZUVycm9yKGYnVW5leHBlY3RlZCBjaGFyYWN0ZXI6IHt0b2tlbl92YWx1ZX0nKQogICAgICAgIGVsc2U6CiAgICAgICAgICAgIHRva2Vucy5hcHBlbmQoKHRva2VuX3R5cGUsIHRva2VuX3ZhbHVlKSkKICAgIHJldHVybiB0b2tlbnMKCiMgRXhhbXBsZSBpbnB1dApzb3VyY2VfY29kZSA9ICJpbnQgc3VtID0gYSArIDEwOyIKCiMgUnVuIGxleGVyCnJlc3VsdCA9IGxleGljYWxfYW5hbHl6ZXIoc291cmNlX2NvZGUpCgojIFByaW50IHRva2Vucwpmb3IgdG9rZW4gaW4gcmVzdWx0OgogICAgcHJpbnQodG9rZW4pCg==