qotnews/apiserver/scripts/fix-stories.py

59 lines
1.4 KiB
Python
Raw Normal View History

2022-08-10 04:06:39 +00:00
import time
2022-07-05 00:57:56 +00:00
import json
2022-07-04 20:32:01 +00:00
import logging
2022-07-05 00:57:56 +00:00
import feed
import database
import search
2022-07-04 20:32:01 +00:00
database.init()
def fix_gzip_bug(story_list):
FIX_THRESHOLD = 150
count = 1
for sid in story_list:
try:
sid = sid[0]
story = database.get_story(sid)
full_json = json.loads(story.full_json)
2022-07-05 00:57:56 +00:00
meta_json = json.loads(story.meta_json)
2022-07-04 20:32:01 +00:00
text = full_json.get('text', '')
count = text.count('<EFBFBD>')
if not count: continue
ratio = count / len(text) * 1000
print('Bad story:', sid, 'Num ?:', count, 'Ratio:', ratio)
2022-07-05 00:57:56 +00:00
if ratio < FIX_THRESHOLD: continue
print('Attempting to fix...')
valid = feed.update_story(meta_json, is_manual=True)
if valid:
database.put_story(meta_json)
search.put_story(meta_json)
print('Success')
else:
print('Story was not valid')
time.sleep(3)
2022-07-04 20:32:01 +00:00
except KeyboardInterrupt:
raise
except BaseException as e:
logging.exception(e)
breakpoint()
if __name__ == '__main__':
num_stories = database.count_stories()
print('Fix {} stories?'.format(num_stories))
print('Press ENTER to continue, ctrl-c to cancel')
input()
story_list = database.get_story_list()
fix_gzip_bug(story_list)