|
|
|
@ -1,7 +1,9 @@ |
|
|
|
|
import json |
|
|
|
|
import logging |
|
|
|
|
import database |
|
|
|
|
|
|
|
|
|
import json |
|
|
|
|
import feed |
|
|
|
|
import database |
|
|
|
|
import search |
|
|
|
|
|
|
|
|
|
database.init() |
|
|
|
|
|
|
|
|
@ -14,6 +16,7 @@ def fix_gzip_bug(story_list): |
|
|
|
|
sid = sid[0] |
|
|
|
|
story = database.get_story(sid) |
|
|
|
|
full_json = json.loads(story.full_json) |
|
|
|
|
meta_json = json.loads(story.meta_json) |
|
|
|
|
text = full_json.get('text', '') |
|
|
|
|
|
|
|
|
|
count = text.count('<EFBFBD>') |
|
|
|
@ -21,6 +24,20 @@ def fix_gzip_bug(story_list): |
|
|
|
|
|
|
|
|
|
ratio = count / len(text) * 1000 |
|
|
|
|
print('Bad story:', sid, 'Num ?:', count, 'Ratio:', ratio) |
|
|
|
|
if ratio < FIX_THRESHOLD: continue |
|
|
|
|
|
|
|
|
|
print('Attempting to fix...') |
|
|
|
|
|
|
|
|
|
valid = feed.update_story(meta_json, is_manual=True) |
|
|
|
|
if valid: |
|
|
|
|
database.put_story(meta_json) |
|
|
|
|
search.put_story(meta_json) |
|
|
|
|
print('Success') |
|
|
|
|
else: |
|
|
|
|
print('Story was not valid') |
|
|
|
|
|
|
|
|
|
time.sleep(3) |
|
|
|
|
|
|
|
|
|
except KeyboardInterrupt: |
|
|
|
|
raise |
|
|
|
|
except BaseException as e: |
|
|
|
|