Remove document img and ignore r/technology
This commit is contained in:
parent
ebedaef00b
commit
9c116bde4a
|
@ -43,22 +43,29 @@ with archive.ix.searcher() as searcher:
|
||||||
print('num', count, 'id', doc['id'])
|
print('num', count, 'id', doc['id'])
|
||||||
count += 1
|
count += 1
|
||||||
|
|
||||||
|
story = doc['story']
|
||||||
|
story.pop('img', None)
|
||||||
|
|
||||||
|
if 'reddit.com/r/technology' in story['link']:
|
||||||
|
print('skipping r/technology')
|
||||||
|
continue
|
||||||
|
|
||||||
try:
|
try:
|
||||||
database.put_story(doc['story'])
|
database.put_story(story)
|
||||||
except database.IntegrityError:
|
except database.IntegrityError:
|
||||||
print('collision!')
|
print('collision!')
|
||||||
old_story = database.get_story_by_ref(doc['story']['ref'])
|
old_story = database.get_story_by_ref(story['ref'])
|
||||||
story = json.loads(old_story.full_json)
|
old_story = json.loads(old_story.full_json)
|
||||||
if doc['story']['num_comments'] > story['num_comments']:
|
if story['num_comments'] > old_story['num_comments']:
|
||||||
print('more comments, replacing')
|
print('more comments, replacing')
|
||||||
database_del_story_by_ref(doc['story']['ref'])
|
database_del_story_by_ref(story['ref'])
|
||||||
database.put_story(doc['story'])
|
database.put_story(story)
|
||||||
search_del_story(story['id'])
|
search_del_story(old_story['id'])
|
||||||
else:
|
else:
|
||||||
print('fewer comments, skipping')
|
print('fewer comments, skipping')
|
||||||
continue
|
continue
|
||||||
|
|
||||||
search.put_story(doc['story'])
|
search.put_story(story)
|
||||||
print()
|
print()
|
||||||
except KeyboardInterrupt:
|
except KeyboardInterrupt:
|
||||||
break
|
break
|
||||||
|
|
Loading…
Reference in New Issue
Block a user