You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

150 lines
5.0 KiB

from tweepy import StreamListener
from tweepy import OAuthHandler
from tweepy import Stream
from watson_developer_cloud import ToneAnalyzerV3
import json
import _thread
import time
import tornado.ioloop
import tornado.web
with open('secrets.json') as f:
secrets = json.load(f)
tone_analyzer = ToneAnalyzerV3(
username = secrets['watson_username'],
password = secrets['watson_password'],
version = '2016-05-19')
#Variables that contains the user credentials to access Twitter API
access_token = secrets['twitter_access_token']
access_token_secret = secrets['twitter_access_token_secret']
consumer_key = secrets['twitter_consumer_key']
consumer_secret = secrets['twitter_consumer_secret']
tweets = []
analyzed_data = {}
analyzed_tweets = {}
analyzed_whole = {}
class handle_root(tornado.web.RequestHandler):
def get(self):
self.write("<u>Anger, Disgust, Fear, Joy, Sadness</u><br>")
self.write("A fashion tech project by <a href='http://tannercollin.com'>Tanner Collin</a>.<br>")
self.write("<a href='/tweets'>/tweets</a> - tweets not yet analyzed<br>")
self.write("<a href='/data'>/data</a> - raw json data of analysis<br>")
self.write("<a href='/each'>/each</a> - analysis of each tweet<br>")
self.write("<a href='/all'>/all</a> - analysis of the whole batch of tweets<br>")
class handle_tweets(tornado.web.RequestHandler):
def get(self):
for t in tweets:
self.write(t + "<br>")
class handle_data(tornado.web.RequestHandler):
def get(self):
self.set_header('Content-Type', 'text/plain; charset="utf-8"')
self.write(json.dumps(analyzed_data, indent=2))
class handle_each(tornado.web.RequestHandler):
def get(self):
self.set_header('Content-Type', 'text/plain; charset="utf-8"')
self.write(json.dumps(analyzed_tweets, indent=2))
class handle_all(tornado.web.RequestHandler):
def get(self):
self.set_header('Content-Type', 'text/plain; charset="utf-8"')
self.write(json.dumps(analyzed_whole, indent=2))
def make_app():
return tornado.web.Application([
(r"/", handle_root),
(r"/tweets", handle_tweets),
(r"/data", handle_data),
(r"/each", handle_each),
(r"/all", handle_all),
])
# A lot of japanese tweets contain 'YYC' for some reason, so filter them out
def check_if_japanese(text):
# These chars should find 99% of them (I'm unsure what they mean)
chars = set('のマッチングサイトでをつけようでをつけようでにめれるのでトラしてみよやかしとかしないでからりったおきでいませんかス人今付会冷切割合始度彼恋料方氏気無見軽')
if any((c in chars) for c in text):
return True
else:
return False
#This is a basic listener that just prints received tweets to stdout.
class StdOutListener(StreamListener):
def on_data(self, data):
j = json.loads(data)
test = ""
if 'retweeted_status' in j:
j = j['retweeted_status']
if j['truncated'] == True:
text = j['extended_tweet']['full_text']
else:
text = j['text']
# remove all .!? + newlines so we can form our own sentences
text = text.replace(".", "")
text = text.replace("!", "")
text = text.replace("?", "")
text = text.replace("\n", " ")
tweets.append(text)
def on_error(self, status):
print(status)
def webserver_thread():
app = make_app()
app.listen(8888)
tornado.ioloop.IOLoop.current().start()
def watson_thread():
global tweets
global analyzed_data
global analyzed_tweets
global analyzed_whole
while True:
time.sleep(0.1)
if len(tweets) > 10:
data = ""
for x in range(10):
tweet = tweets.pop(0)
if check_if_japanese(tweet):
x -= 1
else:
data += tweet + "\n"
analyzed_data = tone_analyzer.tone(text=data, tones='emotion')
print(data)
print()
result_doc = analyzed_data["document_tone"]["tone_categories"][0]["tones"]
analyzed_whole = [x["score"] for x in result_doc]
print(analyzed_whole)
print()
result_sen = analyzed_data["sentences_tone"]
each_sentence = [x["tone_categories"][0]["tones"] for x in result_sen]
analyzed_tweets = [[y["score"] for y in x] for x in each_sentence]
print(analyzed_tweets)
print()
if __name__ == '__main__':
#This handles Twitter authetification and the connection to Twitter Streaming API
l = StdOutListener()
auth = OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
stream = Stream(auth, l)
#This line filter Twitter Streams to capture data by the keywords
stream.filter(track=['calgary', 'yyc'], async=True)
_thread.start_new_thread(webserver_thread, ())
_thread.start_new_thread(watson_thread, ())