diff --git a/youtube_dl/extractor/bbc.py b/youtube_dl/extractor/bbc.py index e3b14c854..9cb7630a1 100644 --- a/youtube_dl/extractor/bbc.py +++ b/youtube_dl/extractor/bbc.py @@ -1012,10 +1012,10 @@ class BBCCoUkPlaylistBaseIE(InfoExtractor): class BBCCoUkIPlayerPlaylistIE(BBCCoUkPlaylistBaseIE): IE_NAME = 'bbc.co.uk:iplayer:playlist' - _VALID_URL = r'https?://(?:www\.)?bbc\.co\.uk/iplayer/episodes/(?P%s)' % BBCCoUkIE._ID_REGEX + _VALID_URL = r'https?://(?:www\.)?bbc\.co\.uk/iplayer/(?:episodes|group)/(?P%s)' % BBCCoUkIE._ID_REGEX _URL_TEMPLATE = 'http://www.bbc.co.uk/iplayer/episode/%s' _VIDEO_ID_TEMPLATE = r'data-ip-id=["\'](%s)' - _TEST = { + _TESTS = [{ 'url': 'http://www.bbc.co.uk/iplayer/episodes/b05rcz9v', 'info_dict': { 'id': 'b05rcz9v', @@ -1023,7 +1023,17 @@ class BBCCoUkIPlayerPlaylistIE(BBCCoUkPlaylistBaseIE): 'description': 'French thriller serial about a missing teenager.', }, 'playlist_mincount': 6, - } + 'skip': 'This programme is not currently available on BBC iPlayer', + }, { + # Available for over a year unlike 30 days for most other programmes + 'url': 'http://www.bbc.co.uk/iplayer/group/p02tcc32', + 'info_dict': { + 'id': 'p02tcc32', + 'title': 'Bohemian Icons', + 'description': 'md5:683e901041b2fe9ba596f2ab04c4dbe7', + }, + 'playlist_mincount': 10, + }] def _extract_title_and_description(self, webpage): title = self._search_regex(r'

([^<]+)

', webpage, 'title', fatal=False) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 7314be747..6de50296c 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -587,6 +587,7 @@ from .nytimes import ( NYTimesArticleIE, ) from .nuvid import NuvidIE +from .odatv import OdaTVIE from .odnoklassniki import OdnoklassnikiIE from .oktoberfesttv import OktoberfestTVIE from .onet import ( diff --git a/youtube_dl/extractor/odatv.py b/youtube_dl/extractor/odatv.py new file mode 100644 index 000000000..314527f98 --- /dev/null +++ b/youtube_dl/extractor/odatv.py @@ -0,0 +1,50 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( + ExtractorError, + NO_DEFAULT, + remove_start +) + + +class OdaTVIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?odatv\.com/(?:mob|vid)_video\.php\?.*\bid=(?P[^&]+)' + _TESTS = [{ + 'url': 'http://odatv.com/vid_video.php?id=8E388', + 'md5': 'dc61d052f205c9bf2da3545691485154', + 'info_dict': { + 'id': '8E388', + 'ext': 'mp4', + 'title': 'Artık Davutoğlu ile devam edemeyiz' + } + }, { + # mobile URL + 'url': 'http://odatv.com/mob_video.php?id=8E388', + 'only_matching': True, + }, { + # no video + 'url': 'http://odatv.com/mob_video.php?id=8E900', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + no_video = 'NO VIDEO!' in webpage + + video_url = self._search_regex( + r'mp4\s*:\s*(["\'])(?Phttp.+?)\1', webpage, 'video url', + default=None if no_video else NO_DEFAULT, group='url') + + if no_video: + raise ExtractorError('Video %s does not exist' % video_id, expected=True) + + return { + 'id': video_id, + 'url': video_url, + 'title': remove_start(self._og_search_title(webpage), 'Video: '), + 'thumbnail': self._og_search_thumbnail(webpage), + } diff --git a/youtube_dl/extractor/youjizz.py b/youtube_dl/extractor/youjizz.py index 4150b28da..31e2f9263 100644 --- a/youtube_dl/extractor/youjizz.py +++ b/youtube_dl/extractor/youjizz.py @@ -9,8 +9,8 @@ from ..utils import ( class YouJizzIE(InfoExtractor): - _VALID_URL = r'https?://(?:\w+\.)?youjizz\.com/videos/[^/#?]+-(?P[0-9]+)\.html(?:$|[?#])' - _TEST = { + _VALID_URL = r'https?://(?:\w+\.)?youjizz\.com/videos/(?:[^/#?]+)?-(?P[0-9]+)\.html(?:$|[?#])' + _TESTS = [{ 'url': 'http://www.youjizz.com/videos/zeichentrick-1-2189178.html', 'md5': '07e15fa469ba384c7693fd246905547c', 'info_dict': { @@ -19,7 +19,10 @@ class YouJizzIE(InfoExtractor): 'title': 'Zeichentrick 1', 'age_limit': 18, } - } + }, { + 'url': 'http://www.youjizz.com/videos/-2189178.html', + 'only_matching': True, + }] def _real_extract(self, url): video_id = self._match_id(url) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 49c264c3a..270ee8861 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1736,7 +1736,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): class YoutubeSharedVideoIE(InfoExtractor): - _VALID_URL = r'(?:https?:)?//(?:www\.)?youtube\.com/shared\?ci=(?P[0-9A-Za-z_-]{11})' + _VALID_URL = r'(?:https?:)?//(?:www\.)?youtube\.com/shared\?.*\bci=(?P[0-9A-Za-z_-]{11})' IE_NAME = 'youtube:shared' _TEST = {