-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathcsvReader.py
More file actions
46 lines (34 loc) · 1.26 KB
/
csvReader.py
File metadata and controls
46 lines (34 loc) · 1.26 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
# python 3.6
import sys as S
import csv as CS
import collections as C
import xml.etree.ElementTree as ET
import os # dir paths
infile = S.argv[1]
csvfile = open(infile, 'r')
csvlist = [x.strip() for x in csvfile.readlines()]
csvfile.close()
AnnoDict = C.defaultdict(list)
for csv in csvlist:
print (csv)
with open(csv, 'r') as file:
fileReader = CS.reader(file, delimiter=';', quotechar='|')
print ('opened ' + csv)
header = next(fileReader) # note Python 3 & 2 next differences
while not 'sentence id' in header:
next(header)
IdCol = None
AnnoCols = {}
for (i, col) in enumerate(header):
if col.endswith(' id'): # get id column index
print ('ID column found!')
IdCol = i
if col.endswith(' class'): # get annotation column indices
AnnoCols[i] = col[:-6]
for row in fileReader:
AnnotationID = row[IdCol] # sentence id
for anno_col in AnnoCols.keys():
if row[anno_col].strip():
AnnoDict[AnnotationID].append((AnnoCols[anno_col],
row[anno_col]))
print (AnnoDict) # print the resulting annotation dictionary