diff --git a/youtube_dl/extractor/stuffyoushouldknow.py b/youtube_dl/extractor/stuffyoushouldknow.py new file mode 100644 index 000000000..871d1659f --- /dev/null +++ b/youtube_dl/extractor/stuffyoushouldknow.py @@ -0,0 +1,40 @@ +# coding: utf-8 +from __future__ import unicode_literals +from .common import InfoExtractor + +import re + + +class StuffyoushouldknowIE(InfoExtractor): + _VALID_URL = r'https?://?(www).stuffyoushouldknow.com/podcasts/(?P[[a-zA-Z0-9_-]+)' + _TEST = { + 'url': 'http://www.stuffyoushouldknow.com/podcasts/banned-kids-advertising.htm', + 'md5': '12cfeb58e11776addb58ce37c12711b7', + 'info_dict': { + 'title': 'Should Advertising to Kids Be Banned?', + 'url': 'http://www.stuffyoushouldknow.com/podcasts/banned-kids-advertising.htm', + 'site_name': 'Stuff You Should Know', + 'description': 'As kids’ buying power in America has exploded in recent decades, so too has the amount companies spend advertising to them. But because of a quirk of brain development, kids aren’t equipped to understand ads are manipulating them. Should they be banned?', + 'content': 'http://s.hswstatic.com/gif/banned-kids-advertising-sysk.jpg', + }, + + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id=mobj.group('id') + webpage = self._download_webpage(url, video_id) + + video_url = re.search(r'https?://www.podtrac.com/pts/redirect.mp3/podcasts.howstuffworks.com/hsw/podcasts' + r'/sysk/[0-9a-zA-Z_-]*.mp3', webpage) + title = self._og_search_title(webpage) + site_name= self._og_search_title(webpage) + description = self._og_search_description(webpage) + + return { + 'id': video_id, + 'title': title, + 'description': description, + 'site name': site_name, + 'url': video_url.group(0) + } \ No newline at end of file