Change the order by which content-type is grabbed

This commit is contained in:
Tanner Collin 2021-01-30 06:36:02 +00:00
parent 6f64401785
commit 7a131ebd03

View File

@ -66,20 +66,20 @@ def get_article(url):
return '' return ''
def get_content_type(url): def get_content_type(url):
try:
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:77.0) Gecko/20100101 Firefox/77.0'}
return requests.get(url, headers=headers, timeout=5).headers['content-type']
except:
return ''
try: try:
headers = { headers = {
'User-Agent': 'Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)', 'User-Agent': 'Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)',
'X-Forwarded-For': '66.249.66.1', 'X-Forwarded-For': '66.249.66.1',
} }
return requests.get(url, headers=headers, timeout=5).headers['content-type']
except:
pass
try:
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:77.0) Gecko/20100101 Firefox/77.0'}
return requests.get(url, headers=headers, timeout=10).headers['content-type'] return requests.get(url, headers=headers, timeout=10).headers['content-type']
except: except:
return '' pass
def update_story(story, is_manual=False): def update_story(story, is_manual=False):
res = {} res = {}
@ -133,7 +133,7 @@ if __name__ == '__main__':
#print(get_article('https://www.bloomberg.com/news/articles/2019-09-23/xi-s-communists-under-pressure-as-high-prices-hit-china-workers')) #print(get_article('https://www.bloomberg.com/news/articles/2019-09-23/xi-s-communists-under-pressure-as-high-prices-hit-china-workers'))
a = get_article('https://blog.joinmastodon.org/2019/10/mastodon-3.0/') a = get_content_type('https://tefkos.comminfo.rutgers.edu/Courses/e530/Readings/Beal%202008%20full%20text%20searching.pdf')
print(a) print(a)
print('done') print('done')