-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathscanner.py
More file actions
121 lines (97 loc) · 3.87 KB
/
scanner.py
File metadata and controls
121 lines (97 loc) · 3.87 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
import sys
class Scanner:
def __init__(self):
self.cursor = 0
self.tokens = []
self.errors = []
def clear(self):
#For testing purposes
self.tokens = []
self.errors = []
def advance(self):
self.cursor += 1
def reset_cursor(self):
self.cursor = 0
def add_tokens(self, token_type, token_value):
self.tokens.append((token_type, token_value))
def scan(self, program):
keywords = {'path', 'list', 'define', 'call', 'in', 'to', 'string',
'bulk_rename_files', 'create_directory', 'copy_files', 'move_files',
'sync_files', 'display_files', 'ends_with', 'not', 'where',
'append', 'get_files', 'foreach', 'create_new_file', 'add_content'}
separators = "();,{}[]"
operators = "+="
line_number=0
for line in program.split("\n"):
self.reset_cursor()
line_length = len(line)
line_number+=1
while self.cursor < line_length:
c = line[self.cursor]
# Whitespace handling
if c.isspace():
self.advance()
continue
# Identifier/Keyword State
elif c.isalnum() or c == '_':
token_start = self.cursor
while self.cursor < line_length and (line[self.cursor].isalnum() or line[self.cursor] == '_'):
self.advance()
lexeme = line[token_start:self.cursor]
if lexeme.lower() in keywords:
self.add_tokens('KEYWORD', lexeme)
else:
if line[token_start].isdigit():
self.errors.append(
f"Error: Identifiers cannot start with a number at line {line_number}, position {token_start}")
else:
self.add_tokens('IDENTIFIER', lexeme)
continue
# String State
elif c == '"':
token_start = self.cursor
self.advance()
while self.cursor < line_length and line[self.cursor] != '"':
self.advance()
if self.cursor < line_length:
self.advance()
lexeme = line[token_start:self.cursor]
self.add_tokens('STRING', lexeme)
else:
self.errors.append(f"Lexical error: unclosed string at position {token_start}")
continue
# Separator State
elif c in separators:
self.add_tokens('SEPARATOR', c)
self.advance()
continue
# Operator State
elif c in operators:
self.add_tokens('OPERATOR', c)
self.advance()
continue
# Error State
else:
self.errors.append(f"Lexical error at position {self.cursor}: invalid character '{c}'")
self.advance()
output = []
for tkn in self.tokens:
output.append('<' + tkn[0] + ', \'' + tkn[1] + '\'>')
return output, self.errors
if __name__ == "__main__":
if len(sys.argv) != 2:
print("Argument missing: python3 scanner.py <input_file>")
sys.exit(1)
input_file = sys.argv[1]
try:
with open(input_file, 'r') as file:
program = file.read()
except FileNotFoundError:
print(f"Error: File '{input_file}' not found.")
sys.exit(1)
scanner = Scanner()
tokens, errors = scanner.scan(program)
for token in tokens:
print(token)
for error in errors:
print(error)