-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathparser.py
More file actions
executable file
·128 lines (104 loc) · 3.24 KB
/
parser.py
File metadata and controls
executable file
·128 lines (104 loc) · 3.24 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
"""
Simple S-expression parser for Eva
"""
import re
def tokenize(program):
"""Convert program string into tokens"""
# Remove comments (line by line to handle ; style comments too)
lines = program.split('\n')
cleaned_lines = []
for line in lines:
# Remove ; comments
if ';' in line:
line = line[:line.index(';')]
cleaned_lines.append(line)
program = '\n'.join(cleaned_lines)
# Remove // and /* */ style comments
program = re.sub(r'//.*', '', program)
program = re.sub(r'/\*[\s\S]*?\*/', '', program)
# Token patterns
tokens = []
i = 0
while i < len(program):
# Skip whitespace
if program[i].isspace():
i += 1
continue
# Parentheses
if program[i] in '()':
tokens.append(program[i])
i += 1
continue
# Strings
if program[i] == '"':
j = i + 1
while j < len(program) and program[j] != '"':
j += 1
tokens.append(program[i:j+1])
i = j + 1
continue
# Numbers
if program[i].isdigit():
j = i
while j < len(program) and program[j].isdigit():
j += 1
tokens.append(program[i:j])
i = j
continue
# Symbols (including operators)
if program[i] in 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ-+*=!<>/':
j = i
while j < len(program) and program[j] in 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-+*=!<>/':
j += 1
tokens.append(program[i:j])
i = j
continue
i += 1
return tokens
def parse_tokens(tokens):
"""Parse tokens into AST"""
if not tokens:
raise Exception("Unexpected end of input")
token = tokens.pop(0)
# List
if token == '(':
result = []
while tokens and tokens[0] != ')':
result.append(parse_tokens(tokens))
if not tokens:
raise Exception("Expected ')' but got end of input")
tokens.pop(0) # Remove ')'
return result
# End of list (shouldn't happen)
if token == ')':
raise Exception("Unexpected ')'")
# Number
if token.isdigit():
return int(token)
# String
if token.startswith('"'):
return token[1:-1] # Remove quotes
# Symbol
return token
def parse(program):
"""Parse Eva program into AST"""
tokens = tokenize(program)
if not tokens:
return []
return parse_tokens(tokens)
# AST Type Checkers
def is_number_ast(exp):
"""Check if expression is a number"""
return isinstance(exp, int)
def is_string_ast(exp):
"""Check if expression is a string"""
return isinstance(exp, str) and not is_symbol_ast(exp)
def is_symbol_ast(exp):
"""Check if expression is a symbol"""
return isinstance(exp, str)
def is_list_ast(exp):
"""Check if expression is a list"""
return isinstance(exp, list)
def is_tagged_list(exp, tag):
"""Check if expression is a list with specific tag"""
return is_list_ast(exp) and len(exp) > 0 and exp[0] == tag