-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtweet_collector.py
More file actions
90 lines (73 loc) · 2.2 KB
/
tweet_collector.py
File metadata and controls
90 lines (73 loc) · 2.2 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
# Lawrence Wang
import tweepy
from tweepy import OAuthHandler
from tweepy import Stream
from tweepy.streaming import StreamListener
import keys
import json
import time
import datetime
import sys
import os
key_words = ['hillary', 'clinton', 'donald', 'trump', 'kaine', 'pence',
'liberal', 'conservative', 'election2016', 'democrat',
'republican', 'gop', 'imwithher', 'crookedhillary', 'maga',
'lockherup']
def parse_user(author):
user = {}
user['id'] = author['id_str']
user['location'] = author['location']
user['geo_enabled'] = author['geo_enabled']
user['handle'] = author['screen_name']
user['name'] = author['name']
return user
def parse_tweet(status):
tweet = {}
tweet['author'] = parse_user(status['user'])
if tweet['author']['location'] == None:
return None
tweet['text'] = status['text']
tweet['id'] = status['id_str']
tweet['retweeted'] = status['retweeted']
tweet['retweet_count'] = status['retweet_count']
tweet['favorite_count'] = status['favorite_count']
tweet['date'] = status['created_at']
tweet['coordinates'] = status['coordinates']
tweet['hashtags'] = [ht['text'] for ht in status['entities']['hashtags']]
return json.dumps(tweet)
class MyListener(StreamListener):
def __init__(self, num):
super(MyListener, self).__init__()
self.num = num
def on_connect(self):
print("Connected")
def on_status(self, status):
try:
tweet = parse_tweet(status._json)
if tweet == None:
return True
fout = open('data/raw_tweets/%s-tweets%d.json'
%(str(datetime.datetime.now().date()), self.num), 'a')
fout.write(tweet)
fout.close()
return True
except BaseException as e:
print("Error on_status: %s" %str(e))
return False
def on_error(self, status):
print("Error #: %s" %str(status))
time.sleep(5)
return True
if __name__ == "__main__":
i = int(sys.argv[1]) - 1
key = keys.key_list[i]
auth = OAuthHandler(key['API_KEY'], key['API_SECRET'])
auth.set_access_token(key['ACCESS_TOKEN'], key['ACCESS_TOKEN_SECRET'])
twitterStream = Stream(auth, MyListener(num=i+1))
while True:
try:
print('connecting ...')
twitterStream.filter(track=key_words, languages=['en'])
except BaseException as e:
print("Error: %s" %str(e))
time.sleep(60)