From 740d7c49c251464b25bea5146e054390ada6b997 Mon Sep 17 00:00:00 2001 From: skacurt Date: Sat, 23 Apr 2016 04:20:04 +0300 Subject: [PATCH 1/6] [odatv] Add extractor --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/odatv.py | 51 ++++++++++++++++++++++++++++++ 2 files changed, 52 insertions(+) create mode 100644 youtube_dl/extractor/odatv.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 7314be747..6de50296c 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -587,6 +587,7 @@ from .nytimes import ( NYTimesArticleIE, ) from .nuvid import NuvidIE +from .odatv import OdaTVIE from .odnoklassniki import OdnoklassnikiIE from .oktoberfesttv import OktoberfestTVIE from .onet import ( diff --git a/youtube_dl/extractor/odatv.py b/youtube_dl/extractor/odatv.py new file mode 100644 index 000000000..dbf96aefe --- /dev/null +++ b/youtube_dl/extractor/odatv.py @@ -0,0 +1,51 @@ +# coding: utf-8 + +from __future__ import unicode_literals +from .common import InfoExtractor +from ..utils import ( + ExtractorError, + remove_start +) +import re + + +class OdaTVIE(InfoExtractor): + _VALID_URL = r'^https?://(?:www\.)?odatv\.com/(?:mob|vid)_video\.php\?id=(?P[^&]*)' + _TESTS = [{ + 'url': 'http://odatv.com/vid_video.php?id=8E388', + 'md5': 'dc61d052f205c9bf2da3545691485154', + 'info_dict': { + 'id': '8E388', + 'ext': 'mp4', + 'title': 'md5:69654805a16a16cf9ec9d055e079831c' + } + }, { + 'url': 'http://odatv.com/mob_video.php?id=8E388', + 'md5': 'dc61d052f205c9bf2da3545691485154', + 'info_dict': { + 'id': '8E388', + 'ext': 'mp4', + 'title': 'md5:69654805a16a16cf9ec9d055e079831c' + } + }, { + 'url': 'http://odatv.com/mob_video.php?id=8E900', + 'md5': '', + 'info_dict': { + 'id': '8E900', + 'ext': 'mp4', + 'title': 'not found check' + } + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + if 'NO VIDEO!' in webpage: + raise ExtractorError('Video %s does not exist' % video_id, expected=True) + + return { + 'id': video_id, + 'title': remove_start(self._og_search_title(webpage), 'Video: '), + 'thumbnail': self._og_search_thumbnail(webpage), + 'url': self._html_search_regex(r"(http.+?video_%s\.mp4)" % re.escape(video_id), webpage, 'url', flags=re.IGNORECASE) + } From 790b06b7d4490e1e54659fafb71167bc459c701c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 20 Jul 2016 21:43:22 +0700 Subject: [PATCH 2/6] [odatv] Improve (Closes #9285) --- youtube_dl/extractor/odatv.py | 35 +++++++++++++++++------------------ 1 file changed, 17 insertions(+), 18 deletions(-) diff --git a/youtube_dl/extractor/odatv.py b/youtube_dl/extractor/odatv.py index dbf96aefe..314527f98 100644 --- a/youtube_dl/extractor/odatv.py +++ b/youtube_dl/extractor/odatv.py @@ -1,51 +1,50 @@ # coding: utf-8 - from __future__ import unicode_literals + from .common import InfoExtractor from ..utils import ( ExtractorError, + NO_DEFAULT, remove_start ) -import re class OdaTVIE(InfoExtractor): - _VALID_URL = r'^https?://(?:www\.)?odatv\.com/(?:mob|vid)_video\.php\?id=(?P[^&]*)' + _VALID_URL = r'https?://(?:www\.)?odatv\.com/(?:mob|vid)_video\.php\?.*\bid=(?P[^&]+)' _TESTS = [{ 'url': 'http://odatv.com/vid_video.php?id=8E388', 'md5': 'dc61d052f205c9bf2da3545691485154', 'info_dict': { 'id': '8E388', 'ext': 'mp4', - 'title': 'md5:69654805a16a16cf9ec9d055e079831c' + 'title': 'Artık Davutoğlu ile devam edemeyiz' } }, { + # mobile URL 'url': 'http://odatv.com/mob_video.php?id=8E388', - 'md5': 'dc61d052f205c9bf2da3545691485154', - 'info_dict': { - 'id': '8E388', - 'ext': 'mp4', - 'title': 'md5:69654805a16a16cf9ec9d055e079831c' - } + 'only_matching': True, }, { + # no video 'url': 'http://odatv.com/mob_video.php?id=8E900', - 'md5': '', - 'info_dict': { - 'id': '8E900', - 'ext': 'mp4', - 'title': 'not found check' - } + 'only_matching': True, }] def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - if 'NO VIDEO!' in webpage: + + no_video = 'NO VIDEO!' in webpage + + video_url = self._search_regex( + r'mp4\s*:\s*(["\'])(?Phttp.+?)\1', webpage, 'video url', + default=None if no_video else NO_DEFAULT, group='url') + + if no_video: raise ExtractorError('Video %s does not exist' % video_id, expected=True) return { 'id': video_id, + 'url': video_url, 'title': remove_start(self._og_search_title(webpage), 'Video: '), 'thumbnail': self._og_search_thumbnail(webpage), - 'url': self._html_search_regex(r"(http.+?video_%s\.mp4)" % re.escape(video_id), webpage, 'url', flags=re.IGNORECASE) } From 84e8cca48bdb2cda290f458e31b34a677eb260ae Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 20 Jul 2016 22:41:13 +0700 Subject: [PATCH 3/6] [youjizz] Relax _VALID_URL (Closes #10131) --- youtube_dl/extractor/youjizz.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/youjizz.py b/youtube_dl/extractor/youjizz.py index 4150b28da..31e2f9263 100644 --- a/youtube_dl/extractor/youjizz.py +++ b/youtube_dl/extractor/youjizz.py @@ -9,8 +9,8 @@ from ..utils import ( class YouJizzIE(InfoExtractor): - _VALID_URL = r'https?://(?:\w+\.)?youjizz\.com/videos/[^/#?]+-(?P[0-9]+)\.html(?:$|[?#])' - _TEST = { + _VALID_URL = r'https?://(?:\w+\.)?youjizz\.com/videos/(?:[^/#?]+)?-(?P[0-9]+)\.html(?:$|[?#])' + _TESTS = [{ 'url': 'http://www.youjizz.com/videos/zeichentrick-1-2189178.html', 'md5': '07e15fa469ba384c7693fd246905547c', 'info_dict': { @@ -19,7 +19,10 @@ class YouJizzIE(InfoExtractor): 'title': 'Zeichentrick 1', 'age_limit': 18, } - } + }, { + 'url': 'http://www.youjizz.com/videos/-2189178.html', + 'only_matching': True, + }] def _real_extract(self, url): video_id = self._match_id(url) From c6668e4ad1f0debd4058c0d42229f45f1e49c5d1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 21 Jul 2016 22:34:55 +0700 Subject: [PATCH 4/6] [bbc.co.uk:iplayer:playlist] Skip unavailable test --- youtube_dl/extractor/bbc.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/bbc.py b/youtube_dl/extractor/bbc.py index e3b14c854..31065cde2 100644 --- a/youtube_dl/extractor/bbc.py +++ b/youtube_dl/extractor/bbc.py @@ -1023,6 +1023,7 @@ class BBCCoUkIPlayerPlaylistIE(BBCCoUkPlaylistBaseIE): 'description': 'French thriller serial about a missing teenager.', }, 'playlist_mincount': 6, + 'skip': 'This programme is not currently available on BBC iPlayer', } def _extract_title_and_description(self, webpage): From 9158af16cc63998fc42fbdb5c70f38074233fc24 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 21 Jul 2016 22:37:36 +0700 Subject: [PATCH 5/6] [bbc.co.uk:iplayer:playlist] Add support for group URLs --- youtube_dl/extractor/bbc.py | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/bbc.py b/youtube_dl/extractor/bbc.py index 31065cde2..9cb7630a1 100644 --- a/youtube_dl/extractor/bbc.py +++ b/youtube_dl/extractor/bbc.py @@ -1012,10 +1012,10 @@ class BBCCoUkPlaylistBaseIE(InfoExtractor): class BBCCoUkIPlayerPlaylistIE(BBCCoUkPlaylistBaseIE): IE_NAME = 'bbc.co.uk:iplayer:playlist' - _VALID_URL = r'https?://(?:www\.)?bbc\.co\.uk/iplayer/episodes/(?P%s)' % BBCCoUkIE._ID_REGEX + _VALID_URL = r'https?://(?:www\.)?bbc\.co\.uk/iplayer/(?:episodes|group)/(?P%s)' % BBCCoUkIE._ID_REGEX _URL_TEMPLATE = 'http://www.bbc.co.uk/iplayer/episode/%s' _VIDEO_ID_TEMPLATE = r'data-ip-id=["\'](%s)' - _TEST = { + _TESTS = [{ 'url': 'http://www.bbc.co.uk/iplayer/episodes/b05rcz9v', 'info_dict': { 'id': 'b05rcz9v', @@ -1024,7 +1024,16 @@ class BBCCoUkIPlayerPlaylistIE(BBCCoUkPlaylistBaseIE): }, 'playlist_mincount': 6, 'skip': 'This programme is not currently available on BBC iPlayer', - } + }, { + # Available for over a year unlike 30 days for most other programmes + 'url': 'http://www.bbc.co.uk/iplayer/group/p02tcc32', + 'info_dict': { + 'id': 'p02tcc32', + 'title': 'Bohemian Icons', + 'description': 'md5:683e901041b2fe9ba596f2ab04c4dbe7', + }, + 'playlist_mincount': 10, + }] def _extract_title_and_description(self, webpage): title = self._search_regex(r'

([^<]+)

', webpage, 'title', fatal=False) From fd8c8c7dcdb722ae0ed96b8ceb4c844c87f2c896 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 21 Jul 2016 22:58:34 +0700 Subject: [PATCH 6/6] [youtube:shared] Relax _VALID_URL --- youtube_dl/extractor/youtube.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 49c264c3a..270ee8861 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1736,7 +1736,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): class YoutubeSharedVideoIE(InfoExtractor): - _VALID_URL = r'(?:https?:)?//(?:www\.)?youtube\.com/shared\?ci=(?P[0-9A-Za-z_-]{11})' + _VALID_URL = r'(?:https?:)?//(?:www\.)?youtube\.com/shared\?.*\bci=(?P[0-9A-Za-z_-]{11})' IE_NAME = 'youtube:shared' _TEST = {