Add tildes to feeds
This commit is contained in:
		| @@ -5,7 +5,7 @@ logging.basicConfig( | ||||
|  | ||||
| import requests | ||||
|  | ||||
| from feeds import hackernews, reddit | ||||
| from feeds import hackernews, reddit, tildes | ||||
|  | ||||
| READ_API = 'http://127.0.0.1:33843' | ||||
|  | ||||
| @@ -13,6 +13,7 @@ def list(): | ||||
|     feed = [] | ||||
|     feed += [(x, 'hackernews') for x in hackernews.feed()] | ||||
|     feed += [(x, 'reddit') for x in reddit.feed()] | ||||
|     feed += [(x, 'tildes') for x in tildes.feed()] | ||||
|     return feed | ||||
|  | ||||
| def get_article(url): | ||||
| @@ -32,6 +33,8 @@ def update_story(story): | ||||
|         res = hackernews.story(story['ref']) | ||||
|     elif story['source'] == 'reddit': | ||||
|         res = reddit.story(story['ref']) | ||||
|     elif story['source'] == 'tildes': | ||||
|         res = tildes.story(story['ref']) | ||||
|     else: | ||||
|         return | ||||
|  | ||||
|   | ||||
							
								
								
									
										86
									
								
								apiserver/feeds/tildes.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										86
									
								
								apiserver/feeds/tildes.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,86 @@ | ||||
| import logging | ||||
| logging.basicConfig( | ||||
|         format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', | ||||
|         level=logging.INFO) | ||||
|  | ||||
| import requests | ||||
| from bs4 import BeautifulSoup | ||||
| from datetime import datetime | ||||
|  | ||||
| USER_AGENT = 'qotnews scraper (github:tannercollin)' | ||||
|  | ||||
| API_TOPSTORIES = lambda x: 'https://tildes.net' | ||||
| API_ITEM = lambda x : 'https://tildes.net/~qotnews/{}/'.format(x) | ||||
|  | ||||
| SITE_LINK = lambda x : 'https://tildes.net/~qotnews/{}/'.format(x) | ||||
| SITE_AUTHOR_LINK = lambda x : 'https://tildes.net/user/{}'.format(x) | ||||
|  | ||||
| def api(route, ref=None): | ||||
|     try: | ||||
|         headers = {'User-Agent': USER_AGENT} | ||||
|         r = requests.get(route(ref), headers=headers, timeout=5) | ||||
|         if r.status_code != 200: | ||||
|             raise | ||||
|         return r.text | ||||
|     except BaseException as e: | ||||
|         logging.error('Problem hitting tildes website: {}'.format(str(e))) | ||||
|         return False | ||||
|  | ||||
| def feed(): | ||||
|     soup = BeautifulSoup(api(API_TOPSTORIES), features='html.parser') | ||||
|     articles = soup.find('ol', class_='topic-listing').findAll('article') | ||||
|     return [x['id'].split('-')[1] for x in articles][:30] or [] | ||||
|  | ||||
| def unix(date_str): | ||||
|     return int(datetime.strptime(date_str, '%Y-%m-%dT%H:%M:%SZ').timestamp()) | ||||
|  | ||||
| def comment(i): | ||||
|     i = i.article | ||||
|     c = {} | ||||
|     c['author'] = str(i.find('a', class_='link-user').string) | ||||
|     c['score'] = 1 | ||||
|     c['date'] = unix(i.find('time')['datetime']) | ||||
|     c['text'] = i.find('div', class_='comment-text').encode_contents().decode() | ||||
|     c['comments'] = [comment(j) for j in i.find('ol', class_='comment-tree').findAll('li', recursive=False)] if i.ol else [] | ||||
|     return c | ||||
|  | ||||
| def story(ref): | ||||
|     html = api(API_ITEM, ref) | ||||
|     if not html: return False | ||||
|  | ||||
|     soup = BeautifulSoup(html, features='html.parser') | ||||
|     a = soup.find('article', class_='topic-full') | ||||
|     h = a.find('header') | ||||
|  | ||||
|     s = {} | ||||
|     s['author'] = str(h.find('a', class_='link-user').string) | ||||
|     s['author_link'] = SITE_AUTHOR_LINK(s['author']) | ||||
|     s['score'] = int(h.find('span', class_='topic-voting-votes').string) | ||||
|     s['date'] = unix(h.find('time')['datetime']) | ||||
|     s['title'] = str(h.h1.string) | ||||
|     s['link'] = SITE_LINK(ref) | ||||
|     ud = a.find('div', class_='topic-full-link') | ||||
|     s['url'] = ud.a['href'] if ud else s['link'] | ||||
|     s['comments'] = [comment(i) for i in a.find('ol', id='comments').findAll('li', recursive=False)] | ||||
|     ch = a.find('header', class_='topic-comments-header') | ||||
|     s['num_comments'] = int(ch.h2.string.split(' ')[0]) if ch else 0 | ||||
|  | ||||
|     td = a.find('div', class_='topic-full-text') | ||||
|     if td: | ||||
|         s['text'] = td.encode_contents().decode() | ||||
|  | ||||
|     return s | ||||
|  | ||||
| if __name__ == '__main__': | ||||
|     print(feed()) | ||||
|     normal = story('gxt') | ||||
|     print(normal) | ||||
|     no_comments = story('gxr') | ||||
|     print(no_comments) | ||||
|     self_post = story('gsb') | ||||
|     print(self_post) | ||||
|  | ||||
|     # make sure there's no self-reference | ||||
|     import copy | ||||
|     for x in [normal, no_comments, self_post]: | ||||
|         _ = copy.deepcopy(x) | ||||
| @@ -32,7 +32,7 @@ cors = CORS(flask_app) | ||||
| @flask_app.route('/') | ||||
| def index(): | ||||
|     front_page = [news_cache[news_ref_to_id[ref]] for ref in news_list] | ||||
|     front_page = [copy.deepcopy(x) for x in front_page if 'title' in x] | ||||
|     front_page = [copy.copy(x) for x in front_page if 'title' in x] | ||||
|     for story in front_page: | ||||
|         if 'comments' in story: story.pop('comments') | ||||
|         if 'text' in story: story.pop('text') | ||||
|   | ||||
		Reference in New Issue
	
	Block a user