watson-dress/server/server.py

from tweepy import StreamListener
from tweepy import OAuthHandler
from tweepy import Stream
from watson_developer_cloud import ToneAnalyzerV3
import json
import _thread
import time
import tornado.ioloop
import tornado.web

with open('secrets.json') as f:
    secrets = json.load(f)

tone_analyzer = ToneAnalyzerV3(
    username = secrets['watson_username'],
    password = secrets['watson_password'],
    version = '2016-05-19')

#Variables that contains the user credentials to access Twitter API
access_token = secrets['twitter_access_token']
access_token_secret = secrets['twitter_access_token_secret']
consumer_key = secrets['twitter_consumer_key']
consumer_secret = secrets['twitter_consumer_secret']

tweets = []
analyzed_data = {}
analyzed_tweets = {}
analyzed_whole = {}

class handle_root(tornado.web.RequestHandler):
    def get(self):
        self.write("<u>Anger, Disgust, Fear, Joy, Sadness</u><br>")
        self.write("A fashion tech project by <a href='http://tannercollin.com'>Tanner Collin</a>.<br>")
        self.write("<a href='/tweets'>/tweets</a> - tweets not yet analyzed<br>")
        self.write("<a href='/data'>/data</a> - raw json data of analysis<br>")
        self.write("<a href='/each'>/each</a> - analysis of each tweet<br>")
        self.write("<a href='/all'>/all</a> - analysis of the whole batch of tweets<br>")

class handle_tweets(tornado.web.RequestHandler):
    def get(self):
        for t in tweets:
            self.write(t + "<br>")

class handle_data(tornado.web.RequestHandler):
    def get(self):
        self.set_header('Content-Type', 'text/plain; charset="utf-8"')
        self.write(json.dumps(analyzed_data, indent=2))

class handle_each(tornado.web.RequestHandler):
    def get(self):
        self.set_header('Content-Type', 'text/plain; charset="utf-8"')
        self.write(json.dumps(analyzed_tweets, indent=2))

class handle_all(tornado.web.RequestHandler):
    def get(self):
        self.set_header('Content-Type', 'text/plain; charset="utf-8"')
        self.write(json.dumps(analyzed_whole, indent=2))

def make_app():
    return tornado.web.Application([
        (r"/", handle_root),
        (r"/tweets", handle_tweets),
        (r"/data", handle_data),
        (r"/each", handle_each),
        (r"/all", handle_all),
    ])

# A lot of japanese tweets contain 'YYC' for some reason, so filter them out
def check_if_japanese(text):
    # These chars should find 99% of them (I'm unsure what they mean)
    chars = set('のマッチングサイトでをつけようでをつけようでにめれるのでトラしてみよやかしとかしないでからりったおきでいませんかス人今付会冷切割合始度彼恋料方氏気無見軽')
    if any((c in chars) for c in text):
        return True
    else:
        return False

#This is a basic listener that just prints received tweets to stdout.
class StdOutListener(StreamListener):
    def on_data(self, data):
        j = json.loads(data)
        test = ""

        if 'retweeted_status' in j:
            j = j['retweeted_status']

        if j['truncated'] == True:
            text = j['extended_tweet']['full_text']
        else:
            text = j['text']

        # remove all .!? + newlines so we can form our own sentences
        text = text.replace(".", "")
        text = text.replace("!", "")
        text = text.replace("?", "")
        text = text.replace("\n", " ")

        tweets.append(text)

    def on_error(self, status):
        print(status)

def webserver_thread():
    app = make_app()
    app.listen(8888)
    tornado.ioloop.IOLoop.current().start()

def watson_thread():
    global tweets
    global analyzed_data
    global analyzed_tweets
    global analyzed_whole

    while True:
        time.sleep(0.1)
        if len(tweets) > 10:
            data = ""
            for x in range(10):
                tweet = tweets.pop(0)
                if check_if_japanese(tweet):
                    x -= 1
                else:
                    data += tweet + "\n"
            analyzed_data = tone_analyzer.tone(text=data, tones='emotion')
            print(data)
            print()

            result_doc = analyzed_data["document_tone"]["tone_categories"][0]["tones"]
            analyzed_whole = [x["score"] for x in result_doc]
            print(analyzed_whole)
            print()

            result_sen = analyzed_data["sentences_tone"]
            each_sentence = [x["tone_categories"][0]["tones"] for x in result_sen]
            analyzed_tweets = [[y["score"] for y in x] for x in each_sentence]
            print(analyzed_tweets)
            print()

if __name__ == '__main__':

    #This handles Twitter authetification and the connection to Twitter Streaming API
    l = StdOutListener()
    auth = OAuthHandler(consumer_key, consumer_secret)
    auth.set_access_token(access_token, access_token_secret)
    stream = Stream(auth, l)

    #This line filter Twitter Streams to capture data by the keywords
    stream.filter(track=['calgary', 'yyc'], async=True)

    _thread.start_new_thread(webserver_thread, ())
    _thread.start_new_thread(watson_thread, ())