Compare commits

...

2 Commits

  1. 8
      apiserver/feeds/reddit.py
  2. 8
      apiserver/feeds/tildes.py
  3. 45
      apiserver/reindex.py
  4. 76
      apiserver/search.py
  5. 4
      apiserver/server.py
  6. 2
      searchserver/README.md

@ -33,10 +33,10 @@ def feed():
except KeyboardInterrupt:
raise
except PRAWException as e:
logging.error('Problem hitting reddit API: {}'.format(str(e)))
logging.critical('Problem hitting reddit API: {}'.format(str(e)))
return []
except PrawcoreException as e:
logging.error('Problem hitting reddit API: {}'.format(str(e)))
logging.critical('Problem hitting reddit API: {}'.format(str(e)))
return []
def comment(i):
@ -84,10 +84,10 @@ def story(ref):
except KeyboardInterrupt:
raise
except PRAWException as e:
logging.error('Problem hitting reddit API: {}'.format(str(e)))
logging.critical('Problem hitting reddit API: {}'.format(str(e)))
return False
except PrawcoreException as e:
logging.error('Problem hitting reddit API: {}'.format(str(e)))
logging.critical('Problem hitting reddit API: {}'.format(str(e)))
return False
# scratchpad so I can quickly develop the parser

@ -34,7 +34,7 @@ def api(route):
except KeyboardInterrupt:
raise
except BaseException as e:
logging.error('Problem hitting tildes website: {}'.format(str(e)))
logging.critical('Problem hitting tildes website: {}'.format(str(e)))
return False
def feed():
@ -113,7 +113,7 @@ def story(ref):
# scratchpad so I can quickly develop the parser
if __name__ == '__main__':
#print(feed())
print(feed())
#normal = story('gxt')
#print(normal)
#no_comments = story('gxr')
@ -122,8 +122,8 @@ if __name__ == '__main__':
#print(self_post)
#li_comment = story('gqx')
#print(li_comment)
broken = story('q4y')
print(broken)
#broken = story('q4y')
#print(broken)
# make sure there's no self-reference
#import copy

@ -0,0 +1,45 @@
import logging
logging.basicConfig(
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
level=logging.INFO)
import database
from sqlalchemy import select
import search
import sys
import json
import requests
database.init()
search.init()
def count_stories():
try:
session = database.Session()
return session.query(database.Story).count()
finally:
session.close()
def get_story_list():
try:
session = database.Session()
return session.query(database.Story.sid).all()
finally:
session.close()
if __name__ == '__main__':
num_stories = count_stories()
print('Reindex {} stories?'.format(num_stories))
print('Press ENTER to continue, ctrl-c to cancel')
input()
count = 1
for sid in get_story_list():
story = database.get_story(sid)
print('Indexing {}/{} id: {} title: {}'.format(count, num_stories, sid[0], story.title))
story_obj = json.loads(story.meta_json)
search.put_story(story_obj)
count += 1

@ -7,51 +7,34 @@ import requests
MEILI_URL = 'http://127.0.0.1:7700/'
def create_index():
def meili_api(method, route, json=None, params=None):
try:
json = dict(name='qotnews', uid='qotnews')
r = requests.post(MEILI_URL + 'indexes', json=json, timeout=2)
if r.status_code != 201:
r = method(MEILI_URL + route, json=json, params=params, timeout=4)
if r.status_code > 299:
raise Exception('Bad response code ' + str(r.status_code))
return r.json()
except KeyboardInterrupt:
raise
except BaseException as e:
logging.error('Problem creating MeiliSearch index: {}'.format(str(e)))
logging.error('Problem with MeiliSearch api route: %s: %s', route, str(e))
return False
def create_index():
json = dict(uid='qotnews', primaryKey='id')
return meili_api(requests.post, 'indexes', json=json)
def update_rankings():
try:
json = ['typo', 'words', 'proximity', 'attribute', 'desc(date)', 'wordsPosition', 'exactness']
r = requests.post(MEILI_URL + 'indexes/qotnews/settings/ranking-rules', json=json, timeout=2)
if r.status_code != 202:
raise Exception('Bad response code ' + str(r.status_code))
return r.json()
except KeyboardInterrupt:
raise
except BaseException as e:
logging.error('Problem setting MeiliSearch ranking rules: {}'.format(str(e)))
return False
json = ['typo', 'words', 'proximity', 'attribute', 'desc(date)', 'wordsPosition', 'exactness']
return meili_api(requests.post, 'indexes/qotnews/settings/ranking-rules', json=json)
def update_attributes():
try:
json = ['title', 'url', 'author', 'link', 'id']
r = requests.post(MEILI_URL + 'indexes/qotnews/settings/searchable-attributes', json=json, timeout=2)
if r.status_code != 202:
raise Exception('Bad response code ' + str(r.status_code))
return r.json()
r = requests.delete(MEILI_URL + 'indexes/qotnews/settings/displayed-attributes', timeout=2)
if r.status_code != 202:
raise Exception('Bad response code ' + str(r.status_code))
return r.json()
except KeyboardInterrupt:
raise
except BaseException as e:
logging.error('Problem setting MeiliSearch searchable attributes: {}'.format(str(e)))
return False
json = ['title', 'url', 'author', 'link', 'id']
r = meili_api(requests.post, 'indexes/qotnews/settings/searchable-attributes', json=json)
meili_api(requests.delete, 'indexes/qotnews/settings/displayed-attributes', json=json)
return r
def init():
create_index()
print(create_index())
update_rankings()
update_attributes()
@ -59,31 +42,14 @@ def put_story(story):
story = story.copy()
story.pop('text', None)
story.pop('comments', None)
try:
r = requests.post(MEILI_URL + 'indexes/qotnews/documents', json=[story], timeout=2)
if r.status_code != 202:
raise Exception('Bad response code ' + str(r.status_code))
return r.json()
except KeyboardInterrupt:
raise
except BaseException as e:
logging.error('Problem putting MeiliSearch story: {}'.format(str(e)))
return False
return meili_api(requests.post, 'indexes/qotnews/documents', [story])
def search(q):
try:
params = dict(q=q, limit=250)
r = requests.get(MEILI_URL + 'indexes/qotnews/search', params=params, timeout=2)
if r.status_code != 200:
raise Exception('Bad response code ' + str(r.status_code))
return r.json()['hits']
except KeyboardInterrupt:
raise
except BaseException as e:
logging.error('Problem searching MeiliSearch: {}'.format(str(e)))
return False
params = dict(q=q, limit=250)
r = meili_api(requests.get, 'indexes/qotnews/search', params=params)
return r['hits']
if __name__ == '__main__':
create_index()
init()
print(search('the'))
print(search('qot'))

@ -65,6 +65,8 @@ def submit():
url = request.form['url']
nid = new_id()
logging.info('Manual submission: ' + url)
parse = urlparse(url)
if 'news.ycombinator.com' in parse.hostname:
source = 'hackernews'
@ -200,7 +202,7 @@ def feed_thread():
except KeyboardInterrupt:
logging.info('Ending feed thread...')
except ValueError as e:
logging.error('feed_thread error: {} {}'.format(e.__class__.__name__, e))
logging.critical('feed_thread error: {} {}'.format(e.__class__.__name__, e))
http_server.stop()
print('Starting Feed thread...')

@ -3,7 +3,7 @@
Download MeiliSearch with:
```
wget https://github.com/meilisearch/MeiliSearch/releases/download/v0.11.1/meilisearch-linux-amd64
wget https://github.com/meilisearch/MeiliSearch/releases/download/v0.21.1/meilisearch-linux-amd64
chmod +x meilisearch-linux-amd64
```

Loading…
Cancel
Save