You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 

344 lines
9.8 KiB

import os, logging
DEBUG = os.environ.get('DEBUG')
logging.basicConfig(
#filename='protogram.log',# encoding='utf-8',
format='[%(asctime)s] %(levelname)s %(module)s/%(funcName)s - %(message)s',
level=logging.DEBUG if DEBUG else logging.INFO)
import asyncio
import json
import requests
import time
import pytz
from datetime import datetime
from bs4 import BeautifulSoup
from telethon import TelegramClient, events
import secrets
TIMEZONE_CALGARY = pytz.timezone('America/Edmonton')
bot = TelegramClient('data/bot', secrets.API_ID, secrets.API_HASH).start(bot_token=secrets.API_TOKEN)
CHATGPT_TEMPLATE = '''Turn this forum post into an two paragraph instagram
caption that tells about what a member of our makerspace has made. Add the
hashtags: #makerspace #yyc #maker #diy #calgary and several relevant to the post
at the end. Include a sentence explaining that this was made at Calgary
Protospace, a makerspace that's non-profit and community ran. Only say the
member's name once. Use no more than 1000 characters. Write in third person.
Title: {}
Member: {}
Post Body:
```
{}
```'''
try:
data = json.load(open('data/data.json'))
except:
logging.info('data.json missing, initializing data.')
data = {}
if 'topics' not in data: data['topics'] = {}
if 'states' not in data: data['states'] = {}
def store_data():
with open('data/data.json', 'w') as f:
json.dump(data, f, indent=4)
def get_sorted_category_topic_ids():
API_TAG_URL = 'https://forum.protospace.ca/tags/c/18/protouse-consent.json?match_all_tags=true&page={}&tags[]=protouse-consent'
topic_ids = []
try:
for page in range(100):
r = requests.get(API_TAG_URL.format(page))
r.raise_for_status()
r = r.json()
topics = r['topic_list']['topics']
ids = [str(t['id']) for t in topics]
topic_ids.extend(ids)
logging.info('Got {} topic IDs from page {}.'.format(len(ids), page))
if len(ids) == 0:
break
except BaseException as e:
logging.error('Problem getting topic IDs: {} - {}'.format(e.__class__.__name__, str(e)))
return False
return sorted(topic_ids, reverse=True)
def get_topic_details(topic_id):
API_TOPIC_URL = 'https://forum.protospace.ca/t/{}.json'
try:
r = requests.get(API_TOPIC_URL.format(topic_id))
r.raise_for_status()
r = r.json()
logging.info('Got topic ID: {}.'.format(topic_id))
except BaseException as e:
logging.error('Problem getting topic details: {} - {}'.format(e.__class__.__name__, str(e)))
return False
return r
def fetch_missing_topics(topic_ids):
for topic_id in topic_ids:
if topic_id in data['topics']:
continue
time.sleep(1)
topic = get_topic_details(topic_id)
if not topic:
continue
data['topics'][topic_id] = topic
store_data()
logging.info('Fetched topic {}: {}'.format(topic_id, topic['title']))
def api_chatgpt(prompt):
thread = [
dict(role='system', content='You are ChatGPT, a large language model trained by OpenAI. Answer as concisely as possible. Be terse.'),
dict(role='user', content=prompt),
]
data = dict(
messages=thread,
model='gpt-4-1106-preview',
temperature=0.5,
user='protogram',
max_tokens=1000,
)
headers = {'Authorization': 'Bearer ' + secrets.OPENAI_KEY}
start = time.time()
try:
r = requests.post('https://api.openai.com/v1/chat/completions', json=data, headers=headers, timeout=40)
r.raise_for_status()
r = r.json()
gpt_reply = r['choices'][0]['message']['content']
except BaseException as e:
logging.error('Problem with chatgpt: {} - {}'.format(e.__class__.__name__, str(e)))
return False
end = time.time()
logging.info('Got ChatGPT response in {}s:\n{}'.format(str(end - start), gpt_reply))
return gpt_reply
def get_portal_name_from_discourse(username):
try:
params = dict(discourse_username=username)
headers = {'Authorization': 'Bearer ' + secrets.PROTOGRAM_API_KEY}
r = requests.get('https://api.my.protospace.ca/search/discourse/', params=params, headers=headers, timeout=5)
r.raise_for_status()
r = r.json()
return r['member']['preferred_name']
except BaseException as e:
logging.error('Problem with getting member name: {} - {}'.format(e.__class__.__name__, str(e)))
return False
def generate_caption(topic):
title = topic['title']
username = topic['post_stream']['posts'][0]['username']
member = get_portal_name_from_discourse(username)
if not member:
return False
logging.info('Converted discourse username {} -> {}'.format(username, member))
post_html = topic['post_stream']['posts'][0]['cooked']
soup = BeautifulSoup(post_html, 'html.parser')
lines = soup.get_text().split('\n')
filtered_lines = [line for line in lines if 'KB' not in line and 'MB' not in line]
body = '\n'.join(filtered_lines).replace('\n\n\n', '\n\n')
prompt = CHATGPT_TEMPLATE.format(title, member, body)
logging.info('Generating caption for: {}\n{}'.format(title, body))
return api_chatgpt(prompt)
def test_generate_topic_ids(topic_ids):
logging.info('Test generating topic IDs {} with template:\n{}\n\n'.format(str(topic_ids), CHATGPT_TEMPLATE))
for topic_id in topic_ids:
generate_caption(data['topics'][topic_id])
logging.info('Finished topic ID {}\n\n\n'.format(topic_id))
logging.info('Done.')
def save_images_from_topic(topic_id, topic, state):
link_counts = topic['post_stream']['posts'][0]['link_counts']
urls = [x['url'] for x in link_counts if x['url'].endswith('.jpeg')] # skip png for now
if len(urls) == 0:
logging.info('No photos found.')
return False
logging.info('Downloading {} photos...'.format(len(urls)))
count = 0
for url in urls:
logging.info('Downloading photo: {}'.format(url))
try:
r = requests.get(url, timeout=10)
r.raise_for_status()
filename = 'data/photos/{}_{}.jpg'.format(topic_id, count)
with open(filename, 'wb') as f:
f.write(r.content)
state['photos'].append(filename)
store_data()
count += 1
except BaseException as e:
logging.error('Problem downloading photo: {} - {}'.format(e.__class__.__name__, str(e)))
continue
return count
def find_next_valid_topic_id(topic_ids):
for topic_id in topic_ids:
try:
if data['states'][topic_id]['status'] in ['POSTED', 'ERROR']:
continue
except KeyError:
break
else: # for loop
return False
return topic_id
async def send_data_to_admin(state):
try:
await bot.send_message(
secrets.ADMIN_TELEGRAM_ID,
state['caption'],
file=state['photos'],
)
return True
except BaseException as e:
logging.error('Problem sending to admin: {} - {}'.format(e.__class__.__name__, str(e)))
return False
async def process_topics():
while True:
#await asyncio.sleep(60)
FRIDAY = 4
now = datetime.now(TIMEZONE_CALGARY)
#if not (now.weekday() == FRIDAY and now.hour == 18 and now.minute == 15):
# continue
logging.info('Processing topics...')
topic_ids = get_sorted_category_topic_ids()
fetch_missing_topics(topic_ids)
topic_id = find_next_valid_topic_id(topic_ids)
if not topic_id:
logging.info('No next valid topic ID found.')
continue
logging.info('Next valid topic ID: {}'.format(topic_id))
if topic_id not in data['states']:
data['states'][topic_id] = dict(
status='NEW',
photos=[],
caption=None
)
topic = data['topics'][topic_id]
state = data['states'][topic_id]
count = save_images_from_topic(topic_id, topic, state)
if not count:
state['status'] = 'ERROR'
store_data()
continue
caption = generate_caption(topic)
state['caption'] = caption
result = await send_data_to_admin(state)
if result:
state['status'] = 'POSTED'
else:
state['status'] = 'ERROR'
store_data()
print('done')
while True: pass
@bot.on(events.NewMessage(pattern='/start'))
async def start(event):
await event.respond('Hello world')
raise events.StopPropagation
@bot.on(events.NewMessage)
async def new_message(event):
if not event.raw_text:
logging.info('No text found')
return
logging.info('Message: ' + event.raw_text)
if event.sender.id != secrets.ADMIN_TELEGRAM_ID:
logging.info('Message not from Admin')
return
await event.respond(event.text)
def task_died(future):
if os.environ.get('SHELL'):
logging.error('Protogram task died!')
else:
logging.error('Protogram task died! Waiting 60s and exiting...')
try:
#controller_message('Protogram task died! Waiting 60s and exiting...')
pass
except: # we want this to succeed no matter what
pass
time.sleep(60)
exit()
if __name__ == '__main__':
logging.info('===== BOOT UP =====')
#topic_ids = get_sorted_category_topic_ids()
#print(topic_ids)
#fetch_missing_topics(topic_ids)
#print('next valid:', find_next_valid_topic_id(topic_ids))
#generate_caption(data['topics']['5363'])
#test_generate_topic_ids(['5174', '5381', '5363', '5205', '5273'])
loop = asyncio.get_event_loop()
a = loop.create_task(process_topics()).add_done_callback(task_died)
loop.run_forever()