adding stuffyoushouldknow extractor

This commit is contained in:
galdwulf 2016-12-13 11:40:08 +00:00
parent 3a40f859b5
commit 48f349863d

View File

@ -0,0 +1,40 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
import re
class StuffyoushouldknowIE(InfoExtractor):
_VALID_URL = r'https?://?(www).stuffyoushouldknow.com/podcasts/(?P<id>[[a-zA-Z0-9_-]+)'
_TEST = {
'url': 'http://www.stuffyoushouldknow.com/podcasts/banned-kids-advertising.htm',
'md5': '12cfeb58e11776addb58ce37c12711b7',
'info_dict': {
'title': 'Should Advertising to Kids Be Banned?',
'url': 'http://www.stuffyoushouldknow.com/podcasts/banned-kids-advertising.htm',
'site_name': 'Stuff You Should Know',
'description': 'As kids buying power in America has exploded in recent decades, so too has the amount companies spend advertising to them. But because of a quirk of brain development, kids arent equipped to understand ads are manipulating them. Should they be banned?',
'content': 'http://s.hswstatic.com/gif/banned-kids-advertising-sysk.jpg',
},
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id=mobj.group('id')
webpage = self._download_webpage(url, video_id)
video_url = re.search(r'https?://www.podtrac.com/pts/redirect.mp3/podcasts.howstuffworks.com/hsw/podcasts'
r'/sysk/[0-9a-zA-Z_-]*.mp3', webpage)
title = self._og_search_title(webpage)
site_name= self._og_search_title(webpage)
description = self._og_search_description(webpage)
return {
'id': video_id,
'title': title,
'description': description,
'site name': site_name,
'url': video_url.group(0)
}