commit 5846777293e3c8277fa576d8a072c2d297f6a6b0 Author: Tanner Collin Date: Fri Nov 3 18:20:53 2017 -0600 Initial commit diff --git a/server/.gitignore b/server/.gitignore new file mode 100644 index 0000000..735c154 --- /dev/null +++ b/server/.gitignore @@ -0,0 +1,109 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +*.egg-info/ +.installed.cfg +*.egg + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +.hypothesis/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# pyenv +.python-version + +# celery beat schedule file +celerybeat-schedule + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ + +# Editor +*.swp +*.swo + +# DB +db.sqlite3 + +# passwords an api keys +secrets.json diff --git a/server/requirements.txt b/server/requirements.txt new file mode 100644 index 0000000..b8c4eb5 --- /dev/null +++ b/server/requirements.txt @@ -0,0 +1,22 @@ +asn1crypto==0.22.0 +certifi==2017.7.27.1 +cffi==1.10.0 +chardet==3.0.4 +click==6.7 +cryptography==2.0.3 +idna==2.6 +itsdangerous==0.24 +Jinja2==2.9.6 +MarkupSafe==1.0 +oauthlib==2.0.2 +pycparser==2.18 +pyOpenSSL==17.2.0 +pysolr==3.6.0 +requests==2.18.4 +requests-oauthlib==0.8.0 +six==1.10.0 +tornado==4.5.2 +tweepy==3.5.0 +urllib3==1.22 +watson-developer-cloud==0.26.1 +Werkzeug==0.12.2 diff --git a/server/secrets.json.example b/server/secrets.json.example new file mode 100644 index 0000000..c029c8e --- /dev/null +++ b/server/secrets.json.example @@ -0,0 +1,8 @@ +{ + "watson_password": "", + "watson_username": "", + "twitter_access_token": "", + "twitter_access_token_secret": "", + "twitter_consumer_key": "", + "twitter_consumer_secret": "" +} diff --git a/server/server.py b/server/server.py new file mode 100644 index 0000000..621730a --- /dev/null +++ b/server/server.py @@ -0,0 +1,150 @@ +from tweepy import StreamListener +from tweepy import OAuthHandler +from tweepy import Stream +from watson_developer_cloud import ToneAnalyzerV3 +import json +import _thread +import time +import tornado.ioloop +import tornado.web + +with open('secrets.json') as f: + secrets = json.load(f) + +tone_analyzer = ToneAnalyzerV3( + username = secrets['watson_username'], + password = secrets['watson_password'], + version = '2016-05-19') + +#Variables that contains the user credentials to access Twitter API +access_token = secrets['twitter_access_token'] +access_token_secret = secrets['twitter_access_token_secret'] +consumer_key = secrets['twitter_consumer_key'] +consumer_secret = secrets['twitter_consumer_secret'] + +tweets = [] +analyzed_data = {} +analyzed_tweets = {} +analyzed_whole = {} + +class handle_root(tornado.web.RequestHandler): + def get(self): + self.write("Anger, Disgust, Fear, Joy, Sadness
") + self.write("A fashion tech project by Tanner Collin.
") + self.write("/tweets - tweets not yet analyzed
") + self.write("/data - raw json data of analysis
") + self.write("/each - analysis of each tweet
") + self.write("/all - analysis of the whole batch of tweets
") + +class handle_tweets(tornado.web.RequestHandler): + def get(self): + for t in tweets: + self.write(t + "
") + +class handle_data(tornado.web.RequestHandler): + def get(self): + self.set_header('Content-Type', 'text/plain; charset="utf-8"') + self.write(json.dumps(analyzed_data, indent=2)) + +class handle_each(tornado.web.RequestHandler): + def get(self): + self.set_header('Content-Type', 'text/plain; charset="utf-8"') + self.write(json.dumps(analyzed_tweets, indent=2)) + +class handle_all(tornado.web.RequestHandler): + def get(self): + self.set_header('Content-Type', 'text/plain; charset="utf-8"') + self.write(json.dumps(analyzed_whole, indent=2)) + +def make_app(): + return tornado.web.Application([ + (r"/", handle_root), + (r"/tweets", handle_tweets), + (r"/data", handle_data), + (r"/each", handle_each), + (r"/all", handle_all), + ]) + +# A lot of japanese tweets contain 'YYC' for some reason, so filter them out +def check_if_japanese(text): + # These chars should find 99% of them (I'm unsure what they mean) + chars = set('のマッチングサイトでをつけようでをつけようでにめれるのでトラしてみよやかしとかしないでからりったおきでいませんかス人今付会冷切割合始度彼恋料方氏気無見軽') + if any((c in chars) for c in text): + return True + else: + return False + +#This is a basic listener that just prints received tweets to stdout. +class StdOutListener(StreamListener): + def on_data(self, data): + j = json.loads(data) + test = "" + + if 'retweeted_status' in j: + j = j['retweeted_status'] + + if j['truncated'] == True: + text = j['extended_tweet']['full_text'] + else: + text = j['text'] + + # remove all .!? + newlines so we can form our own sentences + text = text.replace(".", "") + text = text.replace("!", "") + text = text.replace("?", "") + text = text.replace("\n", " ") + + tweets.append(text) + + def on_error(self, status): + print(status) + +def webserver_thread(): + app = make_app() + app.listen(8888) + tornado.ioloop.IOLoop.current().start() + +def watson_thread(): + global tweets + global analyzed_data + global analyzed_tweets + global analyzed_whole + + while True: + time.sleep(0.1) + if len(tweets) > 10: + data = "" + for x in range(10): + tweet = tweets.pop(0) + if check_if_japanese(tweet): + x -= 1 + else: + data += tweet + "\n" + analyzed_data = tone_analyzer.tone(text=data, tones='emotion') + print(data) + print() + + result_doc = analyzed_data["document_tone"]["tone_categories"][0]["tones"] + analyzed_whole = [x["score"] for x in result_doc] + print(analyzed_whole) + print() + + result_sen = analyzed_data["sentences_tone"] + each_sentence = [x["tone_categories"][0]["tones"] for x in result_sen] + analyzed_tweets = [[y["score"] for y in x] for x in each_sentence] + print(analyzed_tweets) + print() + +if __name__ == '__main__': + + #This handles Twitter authetification and the connection to Twitter Streaming API + l = StdOutListener() + auth = OAuthHandler(consumer_key, consumer_secret) + auth.set_access_token(access_token, access_token_secret) + stream = Stream(auth, l) + + #This line filter Twitter Streams to capture data by the keywords + stream.filter(track=['calgary', 'yyc'], async=True) + + _thread.start_new_thread(webserver_thread, ()) + _thread.start_new_thread(watson_thread, ())