Finish PoC that messages the result over Telegram
This commit is contained in:
parent
4b35ec7700
commit
ae416242e2
344
main.py
344
main.py
|
@ -0,0 +1,344 @@
|
|||
import os, logging
|
||||
DEBUG = os.environ.get('DEBUG')
|
||||
logging.basicConfig(
|
||||
#filename='protogram.log',# encoding='utf-8',
|
||||
format='[%(asctime)s] %(levelname)s %(module)s/%(funcName)s - %(message)s',
|
||||
level=logging.DEBUG if DEBUG else logging.INFO)
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import requests
|
||||
import time
|
||||
import pytz
|
||||
from datetime import datetime
|
||||
from bs4 import BeautifulSoup
|
||||
from telethon import TelegramClient, events
|
||||
|
||||
import secrets
|
||||
|
||||
TIMEZONE_CALGARY = pytz.timezone('America/Edmonton')
|
||||
|
||||
bot = TelegramClient('data/bot', secrets.API_ID, secrets.API_HASH).start(bot_token=secrets.API_TOKEN)
|
||||
|
||||
CHATGPT_TEMPLATE = '''Turn this forum post into an two paragraph instagram
|
||||
caption that tells about what a member of our makerspace has made. Add the
|
||||
hashtags: #makerspace #yyc #maker #diy #calgary and several relevant to the post
|
||||
at the end. Include a sentence explaining that this was made at Calgary
|
||||
Protospace, a makerspace that's non-profit and community ran. Only say the
|
||||
member's name once. Use no more than 1000 characters. Write in third person.
|
||||
|
||||
Title: {}
|
||||
Member: {}
|
||||
Post Body:
|
||||
```
|
||||
{}
|
||||
```'''
|
||||
|
||||
try:
|
||||
data = json.load(open('data/data.json'))
|
||||
except:
|
||||
logging.info('data.json missing, initializing data.')
|
||||
data = {}
|
||||
if 'topics' not in data: data['topics'] = {}
|
||||
if 'states' not in data: data['states'] = {}
|
||||
|
||||
def store_data():
|
||||
with open('data/data.json', 'w') as f:
|
||||
json.dump(data, f, indent=4)
|
||||
|
||||
|
||||
def get_sorted_category_topic_ids():
|
||||
API_TAG_URL = 'https://forum.protospace.ca/tags/c/18/protouse-consent.json?match_all_tags=true&page={}&tags[]=protouse-consent'
|
||||
|
||||
topic_ids = []
|
||||
|
||||
try:
|
||||
for page in range(100):
|
||||
r = requests.get(API_TAG_URL.format(page))
|
||||
r.raise_for_status()
|
||||
r = r.json()
|
||||
|
||||
topics = r['topic_list']['topics']
|
||||
ids = [str(t['id']) for t in topics]
|
||||
topic_ids.extend(ids)
|
||||
|
||||
logging.info('Got {} topic IDs from page {}.'.format(len(ids), page))
|
||||
|
||||
if len(ids) == 0:
|
||||
break
|
||||
|
||||
except BaseException as e:
|
||||
logging.error('Problem getting topic IDs: {} - {}'.format(e.__class__.__name__, str(e)))
|
||||
return False
|
||||
|
||||
return sorted(topic_ids, reverse=True)
|
||||
|
||||
def get_topic_details(topic_id):
|
||||
API_TOPIC_URL = 'https://forum.protospace.ca/t/{}.json'
|
||||
|
||||
try:
|
||||
r = requests.get(API_TOPIC_URL.format(topic_id))
|
||||
r.raise_for_status()
|
||||
r = r.json()
|
||||
|
||||
logging.info('Got topic ID: {}.'.format(topic_id))
|
||||
|
||||
except BaseException as e:
|
||||
logging.error('Problem getting topic details: {} - {}'.format(e.__class__.__name__, str(e)))
|
||||
return False
|
||||
|
||||
return r
|
||||
|
||||
def fetch_missing_topics(topic_ids):
|
||||
for topic_id in topic_ids:
|
||||
if topic_id in data['topics']:
|
||||
continue
|
||||
|
||||
time.sleep(1)
|
||||
|
||||
topic = get_topic_details(topic_id)
|
||||
|
||||
if not topic:
|
||||
continue
|
||||
|
||||
data['topics'][topic_id] = topic
|
||||
store_data()
|
||||
|
||||
logging.info('Fetched topic {}: {}'.format(topic_id, topic['title']))
|
||||
|
||||
def api_chatgpt(prompt):
|
||||
thread = [
|
||||
dict(role='system', content='You are ChatGPT, a large language model trained by OpenAI. Answer as concisely as possible. Be terse.'),
|
||||
dict(role='user', content=prompt),
|
||||
]
|
||||
|
||||
data = dict(
|
||||
messages=thread,
|
||||
model='gpt-4-1106-preview',
|
||||
temperature=0.5,
|
||||
user='protogram',
|
||||
max_tokens=1000,
|
||||
)
|
||||
headers = {'Authorization': 'Bearer ' + secrets.OPENAI_KEY}
|
||||
|
||||
start = time.time()
|
||||
|
||||
try:
|
||||
r = requests.post('https://api.openai.com/v1/chat/completions', json=data, headers=headers, timeout=40)
|
||||
r.raise_for_status()
|
||||
r = r.json()
|
||||
|
||||
gpt_reply = r['choices'][0]['message']['content']
|
||||
except BaseException as e:
|
||||
logging.error('Problem with chatgpt: {} - {}'.format(e.__class__.__name__, str(e)))
|
||||
return False
|
||||
|
||||
end = time.time()
|
||||
|
||||
logging.info('Got ChatGPT response in {}s:\n{}'.format(str(end - start), gpt_reply))
|
||||
return gpt_reply
|
||||
|
||||
def get_portal_name_from_discourse(username):
|
||||
try:
|
||||
params = dict(discourse_username=username)
|
||||
headers = {'Authorization': 'Bearer ' + secrets.PROTOGRAM_API_KEY}
|
||||
r = requests.get('https://api.my.protospace.ca/search/discourse/', params=params, headers=headers, timeout=5)
|
||||
r.raise_for_status()
|
||||
r = r.json()
|
||||
|
||||
return r['member']['preferred_name']
|
||||
except BaseException as e:
|
||||
logging.error('Problem with getting member name: {} - {}'.format(e.__class__.__name__, str(e)))
|
||||
return False
|
||||
|
||||
def generate_caption(topic):
|
||||
title = topic['title']
|
||||
|
||||
username = topic['post_stream']['posts'][0]['username']
|
||||
member = get_portal_name_from_discourse(username)
|
||||
if not member:
|
||||
return False
|
||||
logging.info('Converted discourse username {} -> {}'.format(username, member))
|
||||
|
||||
post_html = topic['post_stream']['posts'][0]['cooked']
|
||||
|
||||
soup = BeautifulSoup(post_html, 'html.parser')
|
||||
lines = soup.get_text().split('\n')
|
||||
filtered_lines = [line for line in lines if 'KB' not in line and 'MB' not in line]
|
||||
body = '\n'.join(filtered_lines).replace('\n\n\n', '\n\n')
|
||||
|
||||
prompt = CHATGPT_TEMPLATE.format(title, member, body)
|
||||
|
||||
logging.info('Generating caption for: {}\n{}'.format(title, body))
|
||||
|
||||
return api_chatgpt(prompt)
|
||||
|
||||
def test_generate_topic_ids(topic_ids):
|
||||
logging.info('Test generating topic IDs {} with template:\n{}\n\n'.format(str(topic_ids), CHATGPT_TEMPLATE))
|
||||
|
||||
for topic_id in topic_ids:
|
||||
generate_caption(data['topics'][topic_id])
|
||||
logging.info('Finished topic ID {}\n\n\n'.format(topic_id))
|
||||
|
||||
logging.info('Done.')
|
||||
|
||||
def save_images_from_topic(topic_id, topic, state):
|
||||
link_counts = topic['post_stream']['posts'][0]['link_counts']
|
||||
|
||||
urls = [x['url'] for x in link_counts if x['url'].endswith('.jpeg')] # skip png for now
|
||||
|
||||
if len(urls) == 0:
|
||||
logging.info('No photos found.')
|
||||
return False
|
||||
|
||||
logging.info('Downloading {} photos...'.format(len(urls)))
|
||||
|
||||
count = 0
|
||||
for url in urls:
|
||||
logging.info('Downloading photo: {}'.format(url))
|
||||
|
||||
try:
|
||||
r = requests.get(url, timeout=10)
|
||||
r.raise_for_status()
|
||||
filename = 'data/photos/{}_{}.jpg'.format(topic_id, count)
|
||||
|
||||
with open(filename, 'wb') as f:
|
||||
f.write(r.content)
|
||||
|
||||
state['photos'].append(filename)
|
||||
store_data()
|
||||
|
||||
count += 1
|
||||
except BaseException as e:
|
||||
logging.error('Problem downloading photo: {} - {}'.format(e.__class__.__name__, str(e)))
|
||||
continue
|
||||
|
||||
return count
|
||||
|
||||
def find_next_valid_topic_id(topic_ids):
|
||||
for topic_id in topic_ids:
|
||||
try:
|
||||
if data['states'][topic_id]['status'] in ['POSTED', 'ERROR']:
|
||||
continue
|
||||
except KeyError:
|
||||
break
|
||||
else: # for loop
|
||||
return False
|
||||
|
||||
return topic_id
|
||||
|
||||
async def send_data_to_admin(state):
|
||||
try:
|
||||
await bot.send_message(
|
||||
secrets.ADMIN_TELEGRAM_ID,
|
||||
state['caption'],
|
||||
file=state['photos'],
|
||||
)
|
||||
return True
|
||||
except BaseException as e:
|
||||
logging.error('Problem sending to admin: {} - {}'.format(e.__class__.__name__, str(e)))
|
||||
return False
|
||||
|
||||
|
||||
async def process_topics():
|
||||
while True:
|
||||
#await asyncio.sleep(60)
|
||||
|
||||
FRIDAY = 4
|
||||
now = datetime.now(TIMEZONE_CALGARY)
|
||||
|
||||
#if not (now.weekday() == FRIDAY and now.hour == 18 and now.minute == 15):
|
||||
# continue
|
||||
|
||||
logging.info('Processing topics...')
|
||||
|
||||
topic_ids = get_sorted_category_topic_ids()
|
||||
fetch_missing_topics(topic_ids)
|
||||
|
||||
topic_id = find_next_valid_topic_id(topic_ids)
|
||||
|
||||
if not topic_id:
|
||||
logging.info('No next valid topic ID found.')
|
||||
continue
|
||||
|
||||
logging.info('Next valid topic ID: {}'.format(topic_id))
|
||||
|
||||
if topic_id not in data['states']:
|
||||
data['states'][topic_id] = dict(
|
||||
status='NEW',
|
||||
photos=[],
|
||||
caption=None
|
||||
)
|
||||
|
||||
topic = data['topics'][topic_id]
|
||||
state = data['states'][topic_id]
|
||||
|
||||
count = save_images_from_topic(topic_id, topic, state)
|
||||
|
||||
if not count:
|
||||
state['status'] = 'ERROR'
|
||||
store_data()
|
||||
continue
|
||||
|
||||
caption = generate_caption(topic)
|
||||
state['caption'] = caption
|
||||
|
||||
result = await send_data_to_admin(state)
|
||||
|
||||
if result:
|
||||
state['status'] = 'POSTED'
|
||||
else:
|
||||
state['status'] = 'ERROR'
|
||||
store_data()
|
||||
|
||||
print('done')
|
||||
while True: pass
|
||||
|
||||
|
||||
|
||||
@bot.on(events.NewMessage(pattern='/start'))
|
||||
async def start(event):
|
||||
await event.respond('Hello world')
|
||||
raise events.StopPropagation
|
||||
|
||||
@bot.on(events.NewMessage)
|
||||
async def new_message(event):
|
||||
if not event.raw_text:
|
||||
logging.info('No text found')
|
||||
return
|
||||
|
||||
logging.info('Message: ' + event.raw_text)
|
||||
|
||||
if event.sender.id != secrets.ADMIN_TELEGRAM_ID:
|
||||
logging.info('Message not from Admin')
|
||||
return
|
||||
|
||||
await event.respond(event.text)
|
||||
|
||||
|
||||
def task_died(future):
|
||||
if os.environ.get('SHELL'):
|
||||
logging.error('Protogram task died!')
|
||||
else:
|
||||
logging.error('Protogram task died! Waiting 60s and exiting...')
|
||||
try:
|
||||
#controller_message('Protogram task died! Waiting 60s and exiting...')
|
||||
pass
|
||||
except: # we want this to succeed no matter what
|
||||
pass
|
||||
time.sleep(60)
|
||||
exit()
|
||||
|
||||
if __name__ == '__main__':
|
||||
logging.info('===== BOOT UP =====')
|
||||
|
||||
#topic_ids = get_sorted_category_topic_ids()
|
||||
#print(topic_ids)
|
||||
#fetch_missing_topics(topic_ids)
|
||||
#print('next valid:', find_next_valid_topic_id(topic_ids))
|
||||
#generate_caption(data['topics']['5363'])
|
||||
#test_generate_topic_ids(['5174', '5381', '5363', '5205', '5273'])
|
||||
|
||||
loop = asyncio.get_event_loop()
|
||||
a = loop.create_task(process_topics()).add_done_callback(task_died)
|
||||
loop.run_forever()
|
Loading…
Reference in New Issue
Block a user