commit
5846777293
4 changed files with 289 additions and 0 deletions
@ -0,0 +1,109 @@ |
|||||||
|
# Byte-compiled / optimized / DLL files |
||||||
|
__pycache__/ |
||||||
|
*.py[cod] |
||||||
|
*$py.class |
||||||
|
|
||||||
|
# C extensions |
||||||
|
*.so |
||||||
|
|
||||||
|
# Distribution / packaging |
||||||
|
.Python |
||||||
|
build/ |
||||||
|
develop-eggs/ |
||||||
|
dist/ |
||||||
|
downloads/ |
||||||
|
eggs/ |
||||||
|
.eggs/ |
||||||
|
lib/ |
||||||
|
lib64/ |
||||||
|
parts/ |
||||||
|
sdist/ |
||||||
|
var/ |
||||||
|
wheels/ |
||||||
|
*.egg-info/ |
||||||
|
.installed.cfg |
||||||
|
*.egg |
||||||
|
|
||||||
|
# PyInstaller |
||||||
|
# Usually these files are written by a python script from a template |
||||||
|
# before PyInstaller builds the exe, so as to inject date/other infos into it. |
||||||
|
*.manifest |
||||||
|
*.spec |
||||||
|
|
||||||
|
# Installer logs |
||||||
|
pip-log.txt |
||||||
|
pip-delete-this-directory.txt |
||||||
|
|
||||||
|
# Unit test / coverage reports |
||||||
|
htmlcov/ |
||||||
|
.tox/ |
||||||
|
.coverage |
||||||
|
.coverage.* |
||||||
|
.cache |
||||||
|
nosetests.xml |
||||||
|
coverage.xml |
||||||
|
*.cover |
||||||
|
.hypothesis/ |
||||||
|
|
||||||
|
# Translations |
||||||
|
*.mo |
||||||
|
*.pot |
||||||
|
|
||||||
|
# Django stuff: |
||||||
|
*.log |
||||||
|
local_settings.py |
||||||
|
|
||||||
|
# Flask stuff: |
||||||
|
instance/ |
||||||
|
.webassets-cache |
||||||
|
|
||||||
|
# Scrapy stuff: |
||||||
|
.scrapy |
||||||
|
|
||||||
|
# Sphinx documentation |
||||||
|
docs/_build/ |
||||||
|
|
||||||
|
# PyBuilder |
||||||
|
target/ |
||||||
|
|
||||||
|
# Jupyter Notebook |
||||||
|
.ipynb_checkpoints |
||||||
|
|
||||||
|
# pyenv |
||||||
|
.python-version |
||||||
|
|
||||||
|
# celery beat schedule file |
||||||
|
celerybeat-schedule |
||||||
|
|
||||||
|
# SageMath parsed files |
||||||
|
*.sage.py |
||||||
|
|
||||||
|
# Environments |
||||||
|
.env |
||||||
|
.venv |
||||||
|
env/ |
||||||
|
venv/ |
||||||
|
ENV/ |
||||||
|
|
||||||
|
# Spyder project settings |
||||||
|
.spyderproject |
||||||
|
.spyproject |
||||||
|
|
||||||
|
# Rope project settings |
||||||
|
.ropeproject |
||||||
|
|
||||||
|
# mkdocs documentation |
||||||
|
/site |
||||||
|
|
||||||
|
# mypy |
||||||
|
.mypy_cache/ |
||||||
|
|
||||||
|
# Editor |
||||||
|
*.swp |
||||||
|
*.swo |
||||||
|
|
||||||
|
# DB |
||||||
|
db.sqlite3 |
||||||
|
|
||||||
|
# passwords an api keys |
||||||
|
secrets.json |
@ -0,0 +1,22 @@ |
|||||||
|
asn1crypto==0.22.0 |
||||||
|
certifi==2017.7.27.1 |
||||||
|
cffi==1.10.0 |
||||||
|
chardet==3.0.4 |
||||||
|
click==6.7 |
||||||
|
cryptography==2.0.3 |
||||||
|
idna==2.6 |
||||||
|
itsdangerous==0.24 |
||||||
|
Jinja2==2.9.6 |
||||||
|
MarkupSafe==1.0 |
||||||
|
oauthlib==2.0.2 |
||||||
|
pycparser==2.18 |
||||||
|
pyOpenSSL==17.2.0 |
||||||
|
pysolr==3.6.0 |
||||||
|
requests==2.18.4 |
||||||
|
requests-oauthlib==0.8.0 |
||||||
|
six==1.10.0 |
||||||
|
tornado==4.5.2 |
||||||
|
tweepy==3.5.0 |
||||||
|
urllib3==1.22 |
||||||
|
watson-developer-cloud==0.26.1 |
||||||
|
Werkzeug==0.12.2 |
@ -0,0 +1,8 @@ |
|||||||
|
{ |
||||||
|
"watson_password": "", |
||||||
|
"watson_username": "", |
||||||
|
"twitter_access_token": "", |
||||||
|
"twitter_access_token_secret": "", |
||||||
|
"twitter_consumer_key": "", |
||||||
|
"twitter_consumer_secret": "" |
||||||
|
} |
@ -0,0 +1,150 @@ |
|||||||
|
from tweepy import StreamListener |
||||||
|
from tweepy import OAuthHandler |
||||||
|
from tweepy import Stream |
||||||
|
from watson_developer_cloud import ToneAnalyzerV3 |
||||||
|
import json |
||||||
|
import _thread |
||||||
|
import time |
||||||
|
import tornado.ioloop |
||||||
|
import tornado.web |
||||||
|
|
||||||
|
with open('secrets.json') as f: |
||||||
|
secrets = json.load(f) |
||||||
|
|
||||||
|
tone_analyzer = ToneAnalyzerV3( |
||||||
|
username = secrets['watson_username'], |
||||||
|
password = secrets['watson_password'], |
||||||
|
version = '2016-05-19') |
||||||
|
|
||||||
|
#Variables that contains the user credentials to access Twitter API |
||||||
|
access_token = secrets['twitter_access_token'] |
||||||
|
access_token_secret = secrets['twitter_access_token_secret'] |
||||||
|
consumer_key = secrets['twitter_consumer_key'] |
||||||
|
consumer_secret = secrets['twitter_consumer_secret'] |
||||||
|
|
||||||
|
tweets = [] |
||||||
|
analyzed_data = {} |
||||||
|
analyzed_tweets = {} |
||||||
|
analyzed_whole = {} |
||||||
|
|
||||||
|
class handle_root(tornado.web.RequestHandler): |
||||||
|
def get(self): |
||||||
|
self.write("<u>Anger, Disgust, Fear, Joy, Sadness</u><br>") |
||||||
|
self.write("A fashion tech project by <a href='http://tannercollin.com'>Tanner Collin</a>.<br>") |
||||||
|
self.write("<a href='/tweets'>/tweets</a> - tweets not yet analyzed<br>") |
||||||
|
self.write("<a href='/data'>/data</a> - raw json data of analysis<br>") |
||||||
|
self.write("<a href='/each'>/each</a> - analysis of each tweet<br>") |
||||||
|
self.write("<a href='/all'>/all</a> - analysis of the whole batch of tweets<br>") |
||||||
|
|
||||||
|
class handle_tweets(tornado.web.RequestHandler): |
||||||
|
def get(self): |
||||||
|
for t in tweets: |
||||||
|
self.write(t + "<br>") |
||||||
|
|
||||||
|
class handle_data(tornado.web.RequestHandler): |
||||||
|
def get(self): |
||||||
|
self.set_header('Content-Type', 'text/plain; charset="utf-8"') |
||||||
|
self.write(json.dumps(analyzed_data, indent=2)) |
||||||
|
|
||||||
|
class handle_each(tornado.web.RequestHandler): |
||||||
|
def get(self): |
||||||
|
self.set_header('Content-Type', 'text/plain; charset="utf-8"') |
||||||
|
self.write(json.dumps(analyzed_tweets, indent=2)) |
||||||
|
|
||||||
|
class handle_all(tornado.web.RequestHandler): |
||||||
|
def get(self): |
||||||
|
self.set_header('Content-Type', 'text/plain; charset="utf-8"') |
||||||
|
self.write(json.dumps(analyzed_whole, indent=2)) |
||||||
|
|
||||||
|
def make_app(): |
||||||
|
return tornado.web.Application([ |
||||||
|
(r"/", handle_root), |
||||||
|
(r"/tweets", handle_tweets), |
||||||
|
(r"/data", handle_data), |
||||||
|
(r"/each", handle_each), |
||||||
|
(r"/all", handle_all), |
||||||
|
]) |
||||||
|
|
||||||
|
# A lot of japanese tweets contain 'YYC' for some reason, so filter them out |
||||||
|
def check_if_japanese(text): |
||||||
|
# These chars should find 99% of them (I'm unsure what they mean) |
||||||
|
chars = set('のマッチングサイトでをつけようでをつけようでにめれるのでトラしてみよやかしとかしないでからりったおきでいませんかス人今付会冷切割合始度彼恋料方氏気無見軽') |
||||||
|
if any((c in chars) for c in text): |
||||||
|
return True |
||||||
|
else: |
||||||
|
return False |
||||||
|
|
||||||
|
#This is a basic listener that just prints received tweets to stdout. |
||||||
|
class StdOutListener(StreamListener): |
||||||
|
def on_data(self, data): |
||||||
|
j = json.loads(data) |
||||||
|
test = "" |
||||||
|
|
||||||
|
if 'retweeted_status' in j: |
||||||
|
j = j['retweeted_status'] |
||||||
|
|
||||||
|
if j['truncated'] == True: |
||||||
|
text = j['extended_tweet']['full_text'] |
||||||
|
else: |
||||||
|
text = j['text'] |
||||||
|
|
||||||
|
# remove all .!? + newlines so we can form our own sentences |
||||||
|
text = text.replace(".", "") |
||||||
|
text = text.replace("!", "") |
||||||
|
text = text.replace("?", "") |
||||||
|
text = text.replace("\n", " ") |
||||||
|
|
||||||
|
tweets.append(text) |
||||||
|
|
||||||
|
def on_error(self, status): |
||||||
|
print(status) |
||||||
|
|
||||||
|
def webserver_thread(): |
||||||
|
app = make_app() |
||||||
|
app.listen(8888) |
||||||
|
tornado.ioloop.IOLoop.current().start() |
||||||
|
|
||||||
|
def watson_thread(): |
||||||
|
global tweets |
||||||
|
global analyzed_data |
||||||
|
global analyzed_tweets |
||||||
|
global analyzed_whole |
||||||
|
|
||||||
|
while True: |
||||||
|
time.sleep(0.1) |
||||||
|
if len(tweets) > 10: |
||||||
|
data = "" |
||||||
|
for x in range(10): |
||||||
|
tweet = tweets.pop(0) |
||||||
|
if check_if_japanese(tweet): |
||||||
|
x -= 1 |
||||||
|
else: |
||||||
|
data += tweet + "\n" |
||||||
|
analyzed_data = tone_analyzer.tone(text=data, tones='emotion') |
||||||
|
print(data) |
||||||
|
print() |
||||||
|
|
||||||
|
result_doc = analyzed_data["document_tone"]["tone_categories"][0]["tones"] |
||||||
|
analyzed_whole = [x["score"] for x in result_doc] |
||||||
|
print(analyzed_whole) |
||||||
|
print() |
||||||
|
|
||||||
|
result_sen = analyzed_data["sentences_tone"] |
||||||
|
each_sentence = [x["tone_categories"][0]["tones"] for x in result_sen] |
||||||
|
analyzed_tweets = [[y["score"] for y in x] for x in each_sentence] |
||||||
|
print(analyzed_tweets) |
||||||
|
print() |
||||||
|
|
||||||
|
if __name__ == '__main__': |
||||||
|
|
||||||
|
#This handles Twitter authetification and the connection to Twitter Streaming API |
||||||
|
l = StdOutListener() |
||||||
|
auth = OAuthHandler(consumer_key, consumer_secret) |
||||||
|
auth.set_access_token(access_token, access_token_secret) |
||||||
|
stream = Stream(auth, l) |
||||||
|
|
||||||
|
#This line filter Twitter Streams to capture data by the keywords |
||||||
|
stream.filter(track=['calgary', 'yyc'], async=True) |
||||||
|
|
||||||
|
_thread.start_new_thread(webserver_thread, ()) |
||||||
|
_thread.start_new_thread(watson_thread, ()) |
Loading…
Reference in new issue