You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
150 lines
5.0 KiB
150 lines
5.0 KiB
from tweepy import StreamListener |
|
from tweepy import OAuthHandler |
|
from tweepy import Stream |
|
from watson_developer_cloud import ToneAnalyzerV3 |
|
import json |
|
import _thread |
|
import time |
|
import tornado.ioloop |
|
import tornado.web |
|
|
|
with open('secrets.json') as f: |
|
secrets = json.load(f) |
|
|
|
tone_analyzer = ToneAnalyzerV3( |
|
username = secrets['watson_username'], |
|
password = secrets['watson_password'], |
|
version = '2016-05-19') |
|
|
|
#Variables that contains the user credentials to access Twitter API |
|
access_token = secrets['twitter_access_token'] |
|
access_token_secret = secrets['twitter_access_token_secret'] |
|
consumer_key = secrets['twitter_consumer_key'] |
|
consumer_secret = secrets['twitter_consumer_secret'] |
|
|
|
tweets = [] |
|
analyzed_data = {} |
|
analyzed_tweets = {} |
|
analyzed_whole = {} |
|
|
|
class handle_root(tornado.web.RequestHandler): |
|
def get(self): |
|
self.write("<u>Anger, Disgust, Fear, Joy, Sadness</u><br>") |
|
self.write("A fashion tech project by <a href='http://tannercollin.com'>Tanner Collin</a>.<br>") |
|
self.write("<a href='/tweets'>/tweets</a> - tweets not yet analyzed<br>") |
|
self.write("<a href='/data'>/data</a> - raw json data of analysis<br>") |
|
self.write("<a href='/each'>/each</a> - analysis of each tweet<br>") |
|
self.write("<a href='/all'>/all</a> - analysis of the whole batch of tweets<br>") |
|
|
|
class handle_tweets(tornado.web.RequestHandler): |
|
def get(self): |
|
for t in tweets: |
|
self.write(t + "<br>") |
|
|
|
class handle_data(tornado.web.RequestHandler): |
|
def get(self): |
|
self.set_header('Content-Type', 'text/plain; charset="utf-8"') |
|
self.write(json.dumps(analyzed_data, indent=2)) |
|
|
|
class handle_each(tornado.web.RequestHandler): |
|
def get(self): |
|
self.set_header('Content-Type', 'text/plain; charset="utf-8"') |
|
self.write(json.dumps(analyzed_tweets, indent=2)) |
|
|
|
class handle_all(tornado.web.RequestHandler): |
|
def get(self): |
|
self.set_header('Content-Type', 'text/plain; charset="utf-8"') |
|
self.write(json.dumps(analyzed_whole, indent=2)) |
|
|
|
def make_app(): |
|
return tornado.web.Application([ |
|
(r"/", handle_root), |
|
(r"/tweets", handle_tweets), |
|
(r"/data", handle_data), |
|
(r"/each", handle_each), |
|
(r"/all", handle_all), |
|
]) |
|
|
|
# A lot of japanese tweets contain 'YYC' for some reason, so filter them out |
|
def check_if_japanese(text): |
|
# These chars should find 99% of them (I'm unsure what they mean) |
|
chars = set('のマッチングサイトでをつけようでをつけようでにめれるのでトラしてみよやかしとかしないでからりったおきでいませんかス人今付会冷切割合始度彼恋料方氏気無見軽') |
|
if any((c in chars) for c in text): |
|
return True |
|
else: |
|
return False |
|
|
|
#This is a basic listener that just prints received tweets to stdout. |
|
class StdOutListener(StreamListener): |
|
def on_data(self, data): |
|
j = json.loads(data) |
|
test = "" |
|
|
|
if 'retweeted_status' in j: |
|
j = j['retweeted_status'] |
|
|
|
if j['truncated'] == True: |
|
text = j['extended_tweet']['full_text'] |
|
else: |
|
text = j['text'] |
|
|
|
# remove all .!? + newlines so we can form our own sentences |
|
text = text.replace(".", "") |
|
text = text.replace("!", "") |
|
text = text.replace("?", "") |
|
text = text.replace("\n", " ") |
|
|
|
tweets.append(text) |
|
|
|
def on_error(self, status): |
|
print(status) |
|
|
|
def webserver_thread(): |
|
app = make_app() |
|
app.listen(8888) |
|
tornado.ioloop.IOLoop.current().start() |
|
|
|
def watson_thread(): |
|
global tweets |
|
global analyzed_data |
|
global analyzed_tweets |
|
global analyzed_whole |
|
|
|
while True: |
|
time.sleep(0.1) |
|
if len(tweets) > 10: |
|
data = "" |
|
for x in range(10): |
|
tweet = tweets.pop(0) |
|
if check_if_japanese(tweet): |
|
x -= 1 |
|
else: |
|
data += tweet + "\n" |
|
analyzed_data = tone_analyzer.tone(text=data, tones='emotion') |
|
print(data) |
|
print() |
|
|
|
result_doc = analyzed_data["document_tone"]["tone_categories"][0]["tones"] |
|
analyzed_whole = [x["score"] for x in result_doc] |
|
print(analyzed_whole) |
|
print() |
|
|
|
result_sen = analyzed_data["sentences_tone"] |
|
each_sentence = [x["tone_categories"][0]["tones"] for x in result_sen] |
|
analyzed_tweets = [[y["score"] for y in x] for x in each_sentence] |
|
print(analyzed_tweets) |
|
print() |
|
|
|
if __name__ == '__main__': |
|
|
|
#This handles Twitter authetification and the connection to Twitter Streaming API |
|
l = StdOutListener() |
|
auth = OAuthHandler(consumer_key, consumer_secret) |
|
auth.set_access_token(access_token, access_token_secret) |
|
stream = Stream(auth, l) |
|
|
|
#This line filter Twitter Streams to capture data by the keywords |
|
stream.filter(track=['calgary', 'yyc'], async=True) |
|
|
|
_thread.start_new_thread(webserver_thread, ()) |
|
_thread.start_new_thread(watson_thread, ())
|
|
|