parent
4b35ec7700
commit
ae416242e2
1 changed files with 344 additions and 0 deletions
@ -0,0 +1,344 @@ |
||||
import os, logging |
||||
DEBUG = os.environ.get('DEBUG') |
||||
logging.basicConfig( |
||||
#filename='protogram.log',# encoding='utf-8', |
||||
format='[%(asctime)s] %(levelname)s %(module)s/%(funcName)s - %(message)s', |
||||
level=logging.DEBUG if DEBUG else logging.INFO) |
||||
|
||||
import asyncio |
||||
import json |
||||
import requests |
||||
import time |
||||
import pytz |
||||
from datetime import datetime |
||||
from bs4 import BeautifulSoup |
||||
from telethon import TelegramClient, events |
||||
|
||||
import secrets |
||||
|
||||
TIMEZONE_CALGARY = pytz.timezone('America/Edmonton') |
||||
|
||||
bot = TelegramClient('data/bot', secrets.API_ID, secrets.API_HASH).start(bot_token=secrets.API_TOKEN) |
||||
|
||||
CHATGPT_TEMPLATE = '''Turn this forum post into an two paragraph instagram |
||||
caption that tells about what a member of our makerspace has made. Add the |
||||
hashtags: #makerspace #yyc #maker #diy #calgary and several relevant to the post |
||||
at the end. Include a sentence explaining that this was made at Calgary |
||||
Protospace, a makerspace that's non-profit and community ran. Only say the |
||||
member's name once. Use no more than 1000 characters. Write in third person. |
||||
|
||||
Title: {} |
||||
Member: {} |
||||
Post Body: |
||||
``` |
||||
{} |
||||
```''' |
||||
|
||||
try: |
||||
data = json.load(open('data/data.json')) |
||||
except: |
||||
logging.info('data.json missing, initializing data.') |
||||
data = {} |
||||
if 'topics' not in data: data['topics'] = {} |
||||
if 'states' not in data: data['states'] = {} |
||||
|
||||
def store_data(): |
||||
with open('data/data.json', 'w') as f: |
||||
json.dump(data, f, indent=4) |
||||
|
||||
|
||||
def get_sorted_category_topic_ids(): |
||||
API_TAG_URL = 'https://forum.protospace.ca/tags/c/18/protouse-consent.json?match_all_tags=true&page={}&tags[]=protouse-consent' |
||||
|
||||
topic_ids = [] |
||||
|
||||
try: |
||||
for page in range(100): |
||||
r = requests.get(API_TAG_URL.format(page)) |
||||
r.raise_for_status() |
||||
r = r.json() |
||||
|
||||
topics = r['topic_list']['topics'] |
||||
ids = [str(t['id']) for t in topics] |
||||
topic_ids.extend(ids) |
||||
|
||||
logging.info('Got {} topic IDs from page {}.'.format(len(ids), page)) |
||||
|
||||
if len(ids) == 0: |
||||
break |
||||
|
||||
except BaseException as e: |
||||
logging.error('Problem getting topic IDs: {} - {}'.format(e.__class__.__name__, str(e))) |
||||
return False |
||||
|
||||
return sorted(topic_ids, reverse=True) |
||||
|
||||
def get_topic_details(topic_id): |
||||
API_TOPIC_URL = 'https://forum.protospace.ca/t/{}.json' |
||||
|
||||
try: |
||||
r = requests.get(API_TOPIC_URL.format(topic_id)) |
||||
r.raise_for_status() |
||||
r = r.json() |
||||
|
||||
logging.info('Got topic ID: {}.'.format(topic_id)) |
||||
|
||||
except BaseException as e: |
||||
logging.error('Problem getting topic details: {} - {}'.format(e.__class__.__name__, str(e))) |
||||
return False |
||||
|
||||
return r |
||||
|
||||
def fetch_missing_topics(topic_ids): |
||||
for topic_id in topic_ids: |
||||
if topic_id in data['topics']: |
||||
continue |
||||
|
||||
time.sleep(1) |
||||
|
||||
topic = get_topic_details(topic_id) |
||||
|
||||
if not topic: |
||||
continue |
||||
|
||||
data['topics'][topic_id] = topic |
||||
store_data() |
||||
|
||||
logging.info('Fetched topic {}: {}'.format(topic_id, topic['title'])) |
||||
|
||||
def api_chatgpt(prompt): |
||||
thread = [ |
||||
dict(role='system', content='You are ChatGPT, a large language model trained by OpenAI. Answer as concisely as possible. Be terse.'), |
||||
dict(role='user', content=prompt), |
||||
] |
||||
|
||||
data = dict( |
||||
messages=thread, |
||||
model='gpt-4-1106-preview', |
||||
temperature=0.5, |
||||
user='protogram', |
||||
max_tokens=1000, |
||||
) |
||||
headers = {'Authorization': 'Bearer ' + secrets.OPENAI_KEY} |
||||
|
||||
start = time.time() |
||||
|
||||
try: |
||||
r = requests.post('https://api.openai.com/v1/chat/completions', json=data, headers=headers, timeout=40) |
||||
r.raise_for_status() |
||||
r = r.json() |
||||
|
||||
gpt_reply = r['choices'][0]['message']['content'] |
||||
except BaseException as e: |
||||
logging.error('Problem with chatgpt: {} - {}'.format(e.__class__.__name__, str(e))) |
||||
return False |
||||
|
||||
end = time.time() |
||||
|
||||
logging.info('Got ChatGPT response in {}s:\n{}'.format(str(end - start), gpt_reply)) |
||||
return gpt_reply |
||||
|
||||
def get_portal_name_from_discourse(username): |
||||
try: |
||||
params = dict(discourse_username=username) |
||||
headers = {'Authorization': 'Bearer ' + secrets.PROTOGRAM_API_KEY} |
||||
r = requests.get('https://api.my.protospace.ca/search/discourse/', params=params, headers=headers, timeout=5) |
||||
r.raise_for_status() |
||||
r = r.json() |
||||
|
||||
return r['member']['preferred_name'] |
||||
except BaseException as e: |
||||
logging.error('Problem with getting member name: {} - {}'.format(e.__class__.__name__, str(e))) |
||||
return False |
||||
|
||||
def generate_caption(topic): |
||||
title = topic['title'] |
||||
|
||||
username = topic['post_stream']['posts'][0]['username'] |
||||
member = get_portal_name_from_discourse(username) |
||||
if not member: |
||||
return False |
||||
logging.info('Converted discourse username {} -> {}'.format(username, member)) |
||||
|
||||
post_html = topic['post_stream']['posts'][0]['cooked'] |
||||
|
||||
soup = BeautifulSoup(post_html, 'html.parser') |
||||
lines = soup.get_text().split('\n') |
||||
filtered_lines = [line for line in lines if 'KB' not in line and 'MB' not in line] |
||||
body = '\n'.join(filtered_lines).replace('\n\n\n', '\n\n') |
||||
|
||||
prompt = CHATGPT_TEMPLATE.format(title, member, body) |
||||
|
||||
logging.info('Generating caption for: {}\n{}'.format(title, body)) |
||||
|
||||
return api_chatgpt(prompt) |
||||
|
||||
def test_generate_topic_ids(topic_ids): |
||||
logging.info('Test generating topic IDs {} with template:\n{}\n\n'.format(str(topic_ids), CHATGPT_TEMPLATE)) |
||||
|
||||
for topic_id in topic_ids: |
||||
generate_caption(data['topics'][topic_id]) |
||||
logging.info('Finished topic ID {}\n\n\n'.format(topic_id)) |
||||
|
||||
logging.info('Done.') |
||||
|
||||
def save_images_from_topic(topic_id, topic, state): |
||||
link_counts = topic['post_stream']['posts'][0]['link_counts'] |
||||
|
||||
urls = [x['url'] for x in link_counts if x['url'].endswith('.jpeg')] # skip png for now |
||||
|
||||
if len(urls) == 0: |
||||
logging.info('No photos found.') |
||||
return False |
||||
|
||||
logging.info('Downloading {} photos...'.format(len(urls))) |
||||
|
||||
count = 0 |
||||
for url in urls: |
||||
logging.info('Downloading photo: {}'.format(url)) |
||||
|
||||
try: |
||||
r = requests.get(url, timeout=10) |
||||
r.raise_for_status() |
||||
filename = 'data/photos/{}_{}.jpg'.format(topic_id, count) |
||||
|
||||
with open(filename, 'wb') as f: |
||||
f.write(r.content) |
||||
|
||||
state['photos'].append(filename) |
||||
store_data() |
||||
|
||||
count += 1 |
||||
except BaseException as e: |
||||
logging.error('Problem downloading photo: {} - {}'.format(e.__class__.__name__, str(e))) |
||||
continue |
||||
|
||||
return count |
||||
|
||||
def find_next_valid_topic_id(topic_ids): |
||||
for topic_id in topic_ids: |
||||
try: |
||||
if data['states'][topic_id]['status'] in ['POSTED', 'ERROR']: |
||||
continue |
||||
except KeyError: |
||||
break |
||||
else: # for loop |
||||
return False |
||||
|
||||
return topic_id |
||||
|
||||
async def send_data_to_admin(state): |
||||
try: |
||||
await bot.send_message( |
||||
secrets.ADMIN_TELEGRAM_ID, |
||||
state['caption'], |
||||
file=state['photos'], |
||||
) |
||||
return True |
||||
except BaseException as e: |
||||
logging.error('Problem sending to admin: {} - {}'.format(e.__class__.__name__, str(e))) |
||||
return False |
||||
|
||||
|
||||
async def process_topics(): |
||||
while True: |
||||
#await asyncio.sleep(60) |
||||
|
||||
FRIDAY = 4 |
||||
now = datetime.now(TIMEZONE_CALGARY) |
||||
|
||||
#if not (now.weekday() == FRIDAY and now.hour == 18 and now.minute == 15): |
||||
# continue |
||||
|
||||
logging.info('Processing topics...') |
||||
|
||||
topic_ids = get_sorted_category_topic_ids() |
||||
fetch_missing_topics(topic_ids) |
||||
|
||||
topic_id = find_next_valid_topic_id(topic_ids) |
||||
|
||||
if not topic_id: |
||||
logging.info('No next valid topic ID found.') |
||||
continue |
||||
|
||||
logging.info('Next valid topic ID: {}'.format(topic_id)) |
||||
|
||||
if topic_id not in data['states']: |
||||
data['states'][topic_id] = dict( |
||||
status='NEW', |
||||
photos=[], |
||||
caption=None |
||||
) |
||||
|
||||
topic = data['topics'][topic_id] |
||||
state = data['states'][topic_id] |
||||
|
||||
count = save_images_from_topic(topic_id, topic, state) |
||||
|
||||
if not count: |
||||
state['status'] = 'ERROR' |
||||
store_data() |
||||
continue |
||||
|
||||
caption = generate_caption(topic) |
||||
state['caption'] = caption |
||||
|
||||
result = await send_data_to_admin(state) |
||||
|
||||
if result: |
||||
state['status'] = 'POSTED' |
||||
else: |
||||
state['status'] = 'ERROR' |
||||
store_data() |
||||
|
||||
print('done') |
||||
while True: pass |
||||
|
||||
|
||||
|
||||
@bot.on(events.NewMessage(pattern='/start')) |
||||
async def start(event): |
||||
await event.respond('Hello world') |
||||
raise events.StopPropagation |
||||
|
||||
@bot.on(events.NewMessage) |
||||
async def new_message(event): |
||||
if not event.raw_text: |
||||
logging.info('No text found') |
||||
return |
||||
|
||||
logging.info('Message: ' + event.raw_text) |
||||
|
||||
if event.sender.id != secrets.ADMIN_TELEGRAM_ID: |
||||
logging.info('Message not from Admin') |
||||
return |
||||
|
||||
await event.respond(event.text) |
||||
|
||||
|
||||
def task_died(future): |
||||
if os.environ.get('SHELL'): |
||||
logging.error('Protogram task died!') |
||||
else: |
||||
logging.error('Protogram task died! Waiting 60s and exiting...') |
||||
try: |
||||
#controller_message('Protogram task died! Waiting 60s and exiting...') |
||||
pass |
||||
except: # we want this to succeed no matter what |
||||
pass |
||||
time.sleep(60) |
||||
exit() |
||||
|
||||
if __name__ == '__main__': |
||||
logging.info('===== BOOT UP =====') |
||||
|
||||
#topic_ids = get_sorted_category_topic_ids() |
||||
#print(topic_ids) |
||||
#fetch_missing_topics(topic_ids) |
||||
#print('next valid:', find_next_valid_topic_id(topic_ids)) |
||||
#generate_caption(data['topics']['5363']) |
||||
#test_generate_topic_ids(['5174', '5381', '5363', '5205', '5273']) |
||||
|
||||
loop = asyncio.get_event_loop() |
||||
a = loop.create_task(process_topics()).add_done_callback(task_died) |
||||
loop.run_forever() |
Loading…
Reference in new issue