Stitcher review updates

This commit is contained in:
mjdubell 2015-10-19 17:48:34 +02:00
parent 2a24b7b5c8
commit 3c17b4ca6e

View File

@ -1,11 +1,11 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
import re
class StitcherIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?stitcher\.com/podcast/[\/a-z\-]+\d+\?.+'
_VALID_URL = r'https?://(?:www\.)?stitcher\.com/podcast/[\/a-z\-]+(?P<id>\d+)|\?[a-z=]+'
_TEST = {
'url': 'http://www.stitcher.com/podcast/the-talking-machines/e/40789481?autoplay=true',
'md5': '391dd4e021e6edeb7b8e68fbf2e9e940',
@ -17,14 +17,14 @@ class StitcherIE(InfoExtractor):
}
def _real_extract(self, url):
audio_id = self._search_regex(r'[a-z\/\-\:\/\/.]+?(\d+?)\?.+', url, "audio_id")
audio_id = self._match_id(url)
webpage = self._download_webpage(url, audio_id)
title = self._og_search_title(webpage)
url = self._search_regex(r'[\s\S]*episodeURL: "(.+?)"[\s\S]*', webpage, 'url')
episode_image = self._search_regex(r'[\s\S]*episodeImage: "(.+?)"[\s\S]*', webpage, 'thumbnail')
duration = int(self._search_regex(r'[\s\S]*duration: (\d+?),[\s\S]*', webpage, 'duration')) / 60
url = self._search_regex(r'episodeURL: "(.+?)"', webpage, 'url')
episode_image = self._search_regex(r'episodeImage: "(.+?)"', webpage, 'episode_image', fatal=False)
duration = self._search_regex(r'simpleDuration: "(\d+?) minutes"', webpage, 'duration', fatal=False)
return {
'id': audio_id,