from tweepy import StreamListener from tweepy import OAuthHandler from tweepy import Stream from watson_developer_cloud import ToneAnalyzerV3 import json import _thread import time import tornado.ioloop import tornado.web with open('secrets.json') as f: secrets = json.load(f) tone_analyzer = ToneAnalyzerV3( username = secrets['watson_username'], password = secrets['watson_password'], version = '2016-05-19') #Variables that contains the user credentials to access Twitter API access_token = secrets['twitter_access_token'] access_token_secret = secrets['twitter_access_token_secret'] consumer_key = secrets['twitter_consumer_key'] consumer_secret = secrets['twitter_consumer_secret'] tweets = [] analyzed_data = {} analyzed_tweets = {} analyzed_whole = {} class handle_root(tornado.web.RequestHandler): def get(self): self.write("Anger, Disgust, Fear, Joy, Sadness
") self.write("A fashion tech project by Tanner Collin.
") self.write("/tweets - tweets not yet analyzed
") self.write("/data - raw json data of analysis
") self.write("/each - analysis of each tweet
") self.write("/all - analysis of the whole batch of tweets
") class handle_tweets(tornado.web.RequestHandler): def get(self): for t in tweets: self.write(t + "
") class handle_data(tornado.web.RequestHandler): def get(self): self.set_header('Content-Type', 'text/plain; charset="utf-8"') self.write(json.dumps(analyzed_data, indent=2)) class handle_each(tornado.web.RequestHandler): def get(self): self.set_header('Content-Type', 'text/plain; charset="utf-8"') self.write(json.dumps(analyzed_tweets, indent=2)) class handle_all(tornado.web.RequestHandler): def get(self): self.set_header('Content-Type', 'text/plain; charset="utf-8"') self.write(json.dumps(analyzed_whole, indent=2)) def make_app(): return tornado.web.Application([ (r"/", handle_root), (r"/tweets", handle_tweets), (r"/data", handle_data), (r"/each", handle_each), (r"/all", handle_all), ]) # A lot of japanese tweets contain 'YYC' for some reason, so filter them out def check_if_japanese(text): # These chars should find 99% of them (I'm unsure what they mean) chars = set('のマッチングサイトでをつけようでをつけようでにめれるのでトラしてみよやかしとかしないでからりったおきでいませんかス人今付会冷切割合始度彼恋料方氏気無見軽') if any((c in chars) for c in text): return True else: return False #This is a basic listener that just prints received tweets to stdout. class StdOutListener(StreamListener): def on_data(self, data): j = json.loads(data) test = "" if 'retweeted_status' in j: j = j['retweeted_status'] if j['truncated'] == True: text = j['extended_tweet']['full_text'] else: text = j['text'] # remove all .!? + newlines so we can form our own sentences text = text.replace(".", "") text = text.replace("!", "") text = text.replace("?", "") text = text.replace("\n", " ") tweets.append(text) def on_error(self, status): print(status) def webserver_thread(): app = make_app() app.listen(8888) tornado.ioloop.IOLoop.current().start() def watson_thread(): global tweets global analyzed_data global analyzed_tweets global analyzed_whole while True: time.sleep(0.1) if len(tweets) > 10: data = "" for x in range(10): tweet = tweets.pop(0) if check_if_japanese(tweet): x -= 1 else: data += tweet + "\n" analyzed_data = tone_analyzer.tone(text=data, tones='emotion') print(data) print() result_doc = analyzed_data["document_tone"]["tone_categories"][0]["tones"] analyzed_whole = [x["score"] for x in result_doc] print(analyzed_whole) print() result_sen = analyzed_data["sentences_tone"] each_sentence = [x["tone_categories"][0]["tones"] for x in result_sen] analyzed_tweets = [[y["score"] for y in x] for x in each_sentence] print(analyzed_tweets) print() if __name__ == '__main__': #This handles Twitter authetification and the connection to Twitter Streaming API l = StdOutListener() auth = OAuthHandler(consumer_key, consumer_secret) auth.set_access_token(access_token, access_token_secret) stream = Stream(auth, l) #This line filter Twitter Streams to capture data by the keywords stream.filter(track=['calgary', 'yyc'], async=True) _thread.start_new_thread(webserver_thread, ()) _thread.start_new_thread(watson_thread, ())