-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathinfo_retrieve_v2.py
More file actions
executable file
·241 lines (183 loc) · 5.66 KB
/
info_retrieve_v2.py
File metadata and controls
executable file
·241 lines (183 loc) · 5.66 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
#!/usr/bin/python
import mechanize
import sys
import time
from HTMLParser import HTMLParser
class MLStripper(HTMLParser):
def __init__(self):
self.reset()
self.fed = []
def handle_data(self, d):
self.fed.append(d)
def get_data(self):
return ''.join(self.fed)
def strip_tags(html):
s = MLStripper()
s.feed(html)
return s.get_data()
def getAttr(input):
raw = ""
for entry in input.split('\n'):
if "dddefault" in entry and "/TD" in entry and (entry.__len__() <= 70 or "email" in entry):
raw += entry + "\n"
raw = strip_tags(raw)
#print raw
elem = 0
ret = []
temp = []
for entry in raw.split('\n'): # there are six attributes to consider
temp.append(entry)
elem = (elem + 1) % 7
if(elem == 0): # section information has been processed
ret.append(temp)
temp = []
#print ret
return ret
def parseInput(input):
"""
Returns a tuple of type (input_type, course dep/None, courser num/crn)
given a user unput, parseInput does it's best to understand what is going on
It broadly accepst two types of input
1: CRN
Simple string of numbers
2: Course Number
e.g. cs252
Spaces and capitalizations are okay as an input
if not a good input, returns (-1 -1 -1)
"""
# conver to lower case and classify
input = input.upper(); # convert to lower
input = input.replace(' ','') # remove spaces
# if all digits are not numbers, we have a course number
if ~input.isdigit():
index = 0 # get division cs|252
for char in input:
if char.isdigit():
break
index += 1
# check 1: course number must be 3 <= digits <= 5
if ( (input.__len__() - index) != 3 ) and ( (input.__len__() - index) != 5 ) :
return None
if ( (input.__len__() - index) == 3): # should add 00 at the end
input = input + "00"
return (0, input[0:index], input[index:input.__len__()])
# else, we have a crn
else:
# check: input must have five digits
if input.__len__() != 5:
return None
return None
def getMypurdue(input):
"""
Returns a tupe of type (bool: availablilty, str to print if bool is true)
accepst input returned by parseInput
"""
if input == None:
#print "err: bad input"
return None
# common mistakes 1: people write bio instead of biol
if input[1] == "BIO":
newInput = (input[0], "BIOL", input[2])
input = newInput
br = mechanize.Browser()
# set some headers
br.set_handle_robots(False)
br.addheaders = [('User-agent', 'Firefox')]
# open schedule
br.open("http://wl.mypurdue.purdue.edu/schedule")
# select form
br.form = list( br.forms() )[0]
br['p_term'] = ['201410'] # 201410 = fall 2013
# submit the form
br.submit()
#print br.response().read()
# Now we are in course selection mode
br.form = list( br.forms() )[0]
try :
for control in br.form.controls:
if control.name == 'sel_subj' and control.type == 'select':
control.value = [input[1]]
if control.name == 'sel_crse' and control.type == 'text':
control.value = input[2]
except:
#print "bad input"
return None
br.submit()
html = br.response().read() # this may be too long: FIXED
attr_all = getAttr(html);
#print attr_all
# now we need to find the link to the course
links = []
for link in br.links():
if (link.text is not None) and (input[1] in link.text) and (input[2] in link.text):
# all the follwing links must be of some value
links.append(link)
#br.back()
if not links:
#print "course not found"
return None
# visit links and get seats/waitlist seats
val_all = []
for link in links:
br.follow_link(link)
html = br.response().read()
val_current = getAttr(html) # GET ALL SEAT INFORMATION
# LETS CLEAN COURSE NAME HERE
course_raw = link.text
course_split = course_raw.split(" -");
course_title = course_split[0]
course_num = course_split[2]
course_section = course_split[3]
course = course_num + ": " + course_title + " (section: " + course_section + ")"
val_current.append(course)
val_all.append(val_current)
br.back()
#print val_all
# clean up lists (parse out avaialbe seats
vals_clean = []
for item in val_all:
tmp = []
tmp.append(item[1])
tmp.append(item[0][2])
vals_clean.append(tmp)
#print vals_clean
print len(vals_clean)
ret_fin = []
ret_temp = []
if( len(vals_clean) != len(attr_all) ): # we probably have mutliple sections. I can't handle this yet
for i in range(0, len(vals_clean)):
ret_temp.append( vals_clean[i][0] ) # course name
ret_temp.append( "N/A" ) # time
ret_temp.append( "N/A" ) # type of section
ret_temp.append( vals_clean[i][1]) # seats
ret_fin.append(ret_temp)
ret_temp=[]
else:
# NOW WE MERGE ATTRIBUTES AND Course Values
for i in range(0, len(vals_clean)):
ret_temp.append( vals_clean[i][0] ) # course name
ret_temp.append( attr_all[i][1] ) # time
ret_temp.append( attr_all[i][5] ) # type of section
ret_temp.append( vals_clean[i][1]) # seats
ret_fin.append(ret_temp)
ret_temp=[]
#print ret_fin
return ret_fin
def getCourse(arg):
#print 'getCourse called ' + arg
ret = getMypurdue(parseInput(arg))
if ret == None:
return None
if ret[0] == None: # getMypurdue couldn't find the course
return None
'''
retString = ""
for section in ret:
retString += section[0] + "| " + section[1] + "| " + section[2] + ": " + str(section[3]) + "\n"
'''
#print retString
return ret
if __name__ == '__main__':
start = time.clock()
getCourse(sys.argv[1])
print "global: " + str(time.clock() - start)