-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathscrape.py
More file actions
47 lines (38 loc) · 1.23 KB
/
scrape.py
File metadata and controls
47 lines (38 loc) · 1.23 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
import scholar_network
import csv
import time
from tqdm import tqdm
def load_scholar_names() -> tuple[list[str], list[str]]:
authors = list()
ids = list()
with open("data/IPOP-Scholars.csv", "r", encoding="utf-8-sig") as f:
csvreader = csv.DictReader(f)
for row in csvreader:
authors.append(row.get("Name"))
ids.append(row.get("ID"))
with open("data/COPScholars.csv", "r", encoding="utf-8-sig") as f:
csvreader = csv.DictReader(f)
for row in csvreader:
authors.append(row.get("Name"))
ids.append(row.get("ID"))
return authors, ids
authors, ids = load_scholar_names()
info = [{"name": x, "id": y} for x, y in zip(authors, ids)]
failed = list()
for person in tqdm(info):
try:
scholar_network.scrape_single_author(
person.get("id", ""), person.get("name", "")
)
except Exception:
print(f"{person.get('name')} failed")
failed.append(person)
time.sleep(5)
for person in tqdm(failed):
try:
scholar_network.scrape_single_author(
person.get("id", ""), person.get("name", "")
)
except Exception as e:
print(e)
print(f"{person.get('name')} failed AGAIN!!")