From 372b3d1634e66063227739a97ad2b62bb08122e5 Mon Sep 17 00:00:00 2001 From: remitamine Date: Wed, 24 Jun 2015 01:13:23 +0100 Subject: [PATCH 1/4] [googledrive] Add new extractor --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/googledrive.py | 106 ++++++++++++++++++++++++++++ 2 files changed, 107 insertions(+) create mode 100644 youtube_dl/extractor/googledrive.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index dc1a302e6..e857e43d6 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -205,6 +205,7 @@ from .globo import GloboIE from .godtube import GodTubeIE from .goldenmoustache import GoldenMoustacheIE from .golem import GolemIE +from .googledrive import GoogleDriveIE from .googleplus import GooglePlusIE from .googlesearch import GoogleSearchIE from .gorillavid import GorillaVidIE diff --git a/youtube_dl/extractor/googledrive.py b/youtube_dl/extractor/googledrive.py new file mode 100644 index 000000000..8c611fa47 --- /dev/null +++ b/youtube_dl/extractor/googledrive.py @@ -0,0 +1,106 @@ +from .common import InfoExtractor +from ..utils import RegexNotFoundError + +class GoogleDriveIE(InfoExtractor): + _VALID_URL = r'(?:https?://)?(?:video\.google\.com/get_player\?.*?docid=|(?:docs|drive)\.google\.com/(?:uc\?.*?id=|file/d/))(?P.+?)(?:&|/|$)' + _TEST = { + 'url': 'https://drive.google.com/file/d/0BzpExh0WzJF0NlR5WUlxdEVsY0U/edit?pli=1', + 'info_dict': { + 'id': '0BzpExh0WzJF0NlR5WUlxdEVsY0U', + 'ext': 'mp4', + 'title': '[AHSH] Fairy Tail S2 - 01 [720p].mp4', + } + } + _formats = { + '5': {'ext': 'flv'}, + '6': {'ext': 'flv'}, + '13': {'ext': '3gp'}, + '17': {'ext': '3gp'}, + '18': {'ext': 'mp4'}, + '22': {'ext': 'mp4'}, + '34': {'ext': 'flv'}, + '35': {'ext': 'flv'}, + '36': {'ext': '3gp'}, + '37': {'ext': 'mp4'}, + '38': {'ext': 'mp4'}, + '43': {'ext': 'webm'}, + '44': {'ext': 'webm'}, + '45': {'ext': 'webm'}, + '46': {'ext': 'webm'}, + '59': {'ext': 'mp4'} + } + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage( + 'http://docs.google.com/file/d/'+video_id, video_id, encoding='unicode_escape' + ) + try: + title = self._html_search_regex( + r'"title","(?P.*?)"', + webpage, + 'title', + group='title' + ) + fmt_stream_map = self._html_search_regex( + r'"fmt_stream_map","(?P<fmt_stream_map>.*?)"', + webpage, + 'fmt_stream_map', + group='fmt_stream_map' + ) + fmt_list = self._html_search_regex( + r'"fmt_list","(?P<fmt_list>.*?)"', + webpage, + 'fmt_list', + group='fmt_list' + ) +# timestamp = self._html_search_regex( +# r'"timestamp","(?P<timestamp>.*?)"', +# webpage, +# 'timestamp', +# group='timestamp' +# ) + length_seconds = self._html_search_regex( + r'"length_seconds","(?P<length_seconds>.*?)"', + webpage, + 'length_seconds', + group='length_seconds' + ) + except RegexNotFoundError: + try: + reason = self._html_search_regex( + r'"reason","(?P<reason>.*?)"', + webpage, + 'reason', + group='reason' + ) + self.report_warning(reason) + return + except RegexNotFoundError: + self.report_warning('not a video') + return + + fmt_stream_map = fmt_stream_map.split(',') + fmt_list = fmt_list.split(',') + formats = [] + for i in range(len(fmt_stream_map)): + fmt_id, fmt_url = fmt_stream_map[i].split('|') + resolution = fmt_list[i].split('/')[1] + width, height = resolution.split('x') + formats.append({ + 'url': fmt_url, + 'format_id': fmt_id, + 'resolution': resolution, + 'width': int(width), + 'height': int(height), + 'ext': self._formats[fmt_id]['ext'] + }) + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': title, +# 'timestamp': int(timestamp), + 'duration': int(length_seconds), + 'formats': formats + } From 50e989d7a414c9dfc87c81746946573ce37d5280 Mon Sep 17 00:00:00 2001 From: remitamine <remitamine@gmail.com> Date: Wed, 24 Jun 2015 02:37:03 +0100 Subject: [PATCH 2/4] [Shahid] Add new extractor --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/shahid.py | 44 ++++++++++++++++++++++++++++++++ 2 files changed, 45 insertions(+) create mode 100644 youtube_dl/extractor/shahid.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index e857e43d6..ccad17775 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -482,6 +482,7 @@ from .senateisvp import SenateISVPIE from .servingsys import ServingSysIE from .sexu import SexuIE from .sexykarma import SexyKarmaIE +from .shahid import ShahidIE from .shared import SharedIE from .sharesix import ShareSixIE from .sina import SinaIE diff --git a/youtube_dl/extractor/shahid.py b/youtube_dl/extractor/shahid.py new file mode 100644 index 000000000..4d1c466c1 --- /dev/null +++ b/youtube_dl/extractor/shahid.py @@ -0,0 +1,44 @@ +from .common import InfoExtractor + +class ShahidIE(InfoExtractor): + _VALID_URL = r'(?:https?://)?shahid\.mbc\.net/ar/episode/(?P<id>\d+)/?' + _TESTS = [ + { + 'url': 'https://shahid.mbc.net/ar/episode/108084/%D8%AE%D9%88%D8%A7%D8%B7%D8%B1-%D8%A7%D9%84%D9%85%D9%88%D8%B3%D9%85-11-%D8%A7%D9%84%D8%AD%D9%84%D9%82%D8%A9-1.html', + 'info_dict': { + 'id': '108084', + 'ext': 'm3u8', + 'title': 'بسم الله', + }, + 'params': { + # m3u8 download + 'skip_download': True, + } + }, + { + #shahid plus subscriber only + 'url': 'https://shahid.mbc.net/ar/series/90497/%D9%85%D8%B1%D8%A7%D9%8A%D8%A7-2011.html', + 'only_matching': True + } + ] + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + title = self._og_search_title(webpage); + json_data = self._download_json( + 'https://shahid.mbc.net/arContent/getPlayerContent-param-.id-'+video_id+'.type-player.html', + video_id + )['data'] + if 'url' in json_data: + m3u8_url = json_data['url'] + else: + for error in json_data['error'].values(): + self.report_warning(error) + return + formats = self._extract_m3u8_formats(m3u8_url, video_id) + return { + 'id': video_id, + 'title': title, + 'formats': formats, + } \ No newline at end of file From a226a25f68f5304e78c58d552fbd897aa8390540 Mon Sep 17 00:00:00 2001 From: remitamine <remitamine@gmail.com> Date: Wed, 24 Jun 2015 02:46:20 +0100 Subject: [PATCH 3/4] Update __init__.py --- youtube_dl/extractor/__init__.py | 1 - 1 file changed, 1 deletion(-) diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index ccad17775..5f6f3e09f 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -205,7 +205,6 @@ from .globo import GloboIE from .godtube import GodTubeIE from .goldenmoustache import GoldenMoustacheIE from .golem import GolemIE -from .googledrive import GoogleDriveIE from .googleplus import GooglePlusIE from .googlesearch import GoogleSearchIE from .gorillavid import GorillaVidIE From 85da94137d070d565f517c1d5d04f0ee206ccae1 Mon Sep 17 00:00:00 2001 From: remitamine <remitamine@gmail.com> Date: Wed, 24 Jun 2015 02:47:15 +0100 Subject: [PATCH 4/4] Delete googledrive.py --- youtube_dl/extractor/googledrive.py | 106 ---------------------------- 1 file changed, 106 deletions(-) delete mode 100644 youtube_dl/extractor/googledrive.py diff --git a/youtube_dl/extractor/googledrive.py b/youtube_dl/extractor/googledrive.py deleted file mode 100644 index 8c611fa47..000000000 --- a/youtube_dl/extractor/googledrive.py +++ /dev/null @@ -1,106 +0,0 @@ -from .common import InfoExtractor -from ..utils import RegexNotFoundError - -class GoogleDriveIE(InfoExtractor): - _VALID_URL = r'(?:https?://)?(?:video\.google\.com/get_player\?.*?docid=|(?:docs|drive)\.google\.com/(?:uc\?.*?id=|file/d/))(?P<id>.+?)(?:&|/|$)' - _TEST = { - 'url': 'https://drive.google.com/file/d/0BzpExh0WzJF0NlR5WUlxdEVsY0U/edit?pli=1', - 'info_dict': { - 'id': '0BzpExh0WzJF0NlR5WUlxdEVsY0U', - 'ext': 'mp4', - 'title': '[AHSH] Fairy Tail S2 - 01 [720p].mp4', - } - } - _formats = { - '5': {'ext': 'flv'}, - '6': {'ext': 'flv'}, - '13': {'ext': '3gp'}, - '17': {'ext': '3gp'}, - '18': {'ext': 'mp4'}, - '22': {'ext': 'mp4'}, - '34': {'ext': 'flv'}, - '35': {'ext': 'flv'}, - '36': {'ext': '3gp'}, - '37': {'ext': 'mp4'}, - '38': {'ext': 'mp4'}, - '43': {'ext': 'webm'}, - '44': {'ext': 'webm'}, - '45': {'ext': 'webm'}, - '46': {'ext': 'webm'}, - '59': {'ext': 'mp4'} - } - - def _real_extract(self, url): - video_id = self._match_id(url) - webpage = self._download_webpage( - 'http://docs.google.com/file/d/'+video_id, video_id, encoding='unicode_escape' - ) - try: - title = self._html_search_regex( - r'"title","(?P<title>.*?)"', - webpage, - 'title', - group='title' - ) - fmt_stream_map = self._html_search_regex( - r'"fmt_stream_map","(?P<fmt_stream_map>.*?)"', - webpage, - 'fmt_stream_map', - group='fmt_stream_map' - ) - fmt_list = self._html_search_regex( - r'"fmt_list","(?P<fmt_list>.*?)"', - webpage, - 'fmt_list', - group='fmt_list' - ) -# timestamp = self._html_search_regex( -# r'"timestamp","(?P<timestamp>.*?)"', -# webpage, -# 'timestamp', -# group='timestamp' -# ) - length_seconds = self._html_search_regex( - r'"length_seconds","(?P<length_seconds>.*?)"', - webpage, - 'length_seconds', - group='length_seconds' - ) - except RegexNotFoundError: - try: - reason = self._html_search_regex( - r'"reason","(?P<reason>.*?)"', - webpage, - 'reason', - group='reason' - ) - self.report_warning(reason) - return - except RegexNotFoundError: - self.report_warning('not a video') - return - - fmt_stream_map = fmt_stream_map.split(',') - fmt_list = fmt_list.split(',') - formats = [] - for i in range(len(fmt_stream_map)): - fmt_id, fmt_url = fmt_stream_map[i].split('|') - resolution = fmt_list[i].split('/')[1] - width, height = resolution.split('x') - formats.append({ - 'url': fmt_url, - 'format_id': fmt_id, - 'resolution': resolution, - 'width': int(width), - 'height': int(height), - 'ext': self._formats[fmt_id]['ext'] - }) - self._sort_formats(formats) - - return { - 'id': video_id, - 'title': title, -# 'timestamp': int(timestamp), - 'duration': int(length_seconds), - 'formats': formats - }