From 56797808f41cb7c95e0215069a2641cc3303a910 Mon Sep 17 00:00:00 2001 From: Golenickij Kirill Date: Tue, 3 Oct 2017 19:51:02 +0300 Subject: [PATCH 1/3] [goodgame] Add goodgame extractors --- youtube_dl/extractor/extractors.py | 5 + youtube_dl/extractor/goodgame.py | 239 +++++++++++++++++++++++++++++ 2 files changed, 244 insertions(+) create mode 100644 youtube_dl/extractor/goodgame.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 24e9acda6..30ec2af1c 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -411,6 +411,11 @@ from .go import GoIE from .go90 import Go90IE from .godtube import GodTubeIE from .golem import GolemIE +from .goodgame import ( + GoodgameStreamIE, + GoodgameVideoIE, + GoodgameClipIE, +) from .googledrive import GoogleDriveIE from .googleplus import GooglePlusIE from .googlesearch import GoogleSearchIE diff --git a/youtube_dl/extractor/goodgame.py b/youtube_dl/extractor/goodgame.py new file mode 100644 index 000000000..6c4bcdc05 --- /dev/null +++ b/youtube_dl/extractor/goodgame.py @@ -0,0 +1,239 @@ +# coding=utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ExtractorError, int_or_none + + +class GoodgameBaseIE(InfoExtractor): + _RTMP_SERVER = 'rtmp://46.61.227.158:1940/vod//' + _API_BASE = 'https://goodgame.ru/api' + _HLS_BASE = 'https://hls.goodgame.ru/hls' + _QUALITIES = { + '240p': '_240', + '480p': '_480', + '720p': '_720', + 'Source': '' + } + _RE_UPLOADER = r'''(?x) + ]+ + href=\"https?://(?:www\.)?goodgame\.ru/user/(?P\d+)/\" + [^>]*> + (?P[^<]+) + ''' + _RE_TIMESTAMP = r'utc-timestamp=\"(?P\d+)\"' + + +class GoodgameStreamIE(GoodgameBaseIE): + IE_NAME = 'goodgame:stream' + _VALID_URL = r'https?://(?:www\.)?goodgame\.ru/(channel/|player\?)(?P[^/#?]+)' + _TESTS = [{ + 'url': 'https://goodgame.ru/channel/rutony', + 'info_dict': { + 'id': 'rutony', + 'stream_id': '2399', + 'title': 're:^.*', + 'ext': 'mp4', + 'is_live': True, + }, + 'params': { + 'skip_download': True, + } + }, { + 'url': 'https://goodgame.ru/player?9418', + 'info_dict': { + 'id': 'Artist.the', + 'stream_id': '9418', + 'title': 're:^.*', + 'ext': 'mp4', + 'is_live': True, + }, + 'params': { + 'skip_download': True, + } + }, { + 'url': 'https://goodgame.ru/channel/BRAT_OK/#autoplay', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + stream_info = next( + _ for _ in self._download_json('%s/getchannelstatus?id=%s&fmt=json' % (self._API_BASE, video_id), + video_id, + note='Downloading stream JSON').values()) + + if stream_info.get('status') == 'Dead': + raise ExtractorError('%s is offline' % video_id, expected=True) + + # url with player and stream_id + if stream_info.get('key') == stream_info.get('stream_id'): + video_id = self._download_json('%s/player?src=%s' % (self._API_BASE, video_id), + video_id, + note='Downloading streamer info JSON').get('streamer_name') + + _id = self._search_regex('src=\"https://goodgame.ru/player\?(?P\d+)\"', stream_info.get('embed'), 'id') + thumbnails = [{'url': 'https:%s' % stream_info.get('thumb')}] + formats = [] + for quality, suffix in self._QUALITIES.items(): + formats.append({ + 'format_id': quality, + 'url': '%s/%s%s.m3u8' % (self._HLS_BASE, _id, suffix), + 'ext': 'mp4', + 'protocol': 'm3u8' + }) + + return { + 'id': video_id, + 'stream_id': stream_info.get('stream_id'), + 'title': stream_info.get('title'), + 'description': stream_info.get('description'), + 'view_count': int(stream_info.get('viewers')), + 'status': stream_info.get('status'), + 'thumbnails': thumbnails, + 'formats': formats, + 'is_live': True, + } + + +class GoodgameVideoIE(GoodgameBaseIE): + IE_NAME = 'goodgame:video' + _VALID_URL = r'https?://(?:www\.)?goodgame\.ru/video/(?P\d+)' + _TESTS = [{ + 'url': 'https://goodgame.ru/video/49359/', + 'info_dict': { + 'id': '49359', + 'title': 'GSL Super tournament s2 - день#4(1/2 и Финал!)Pomi', + 'description': '', + 'uploader': 'Pomi', + 'uploader_id': '374', + 'timestamp': 1506862050, + 'upload_date': '20171001', + 'ext': 'mp4', + }, + 'params': { + 'skip_download': True, + } + }, { + # Embedded youtube video + 'url': 'https://goodgame.ru/video/49294/', + 'add_ie': ['Youtube'], + 'info_dict': { + 'id': 'EihUN4ylsn4', + 'title': 'Шахматы с Бонивуром | Осторожно, мат на стриме! [запись 25.09.17]', + 'description': 're:^Большое спасибо за поддержку трансляций интеллектуального формата\..*', + 'uploader': 'bonivur', + 'uploader_id': '30', + 'timestamp': 1506416377, + 'upload_date': '20170925', + 'ext': 'mp4', + }, + 'params': { + 'skip_download': True, + } + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + matches = list(re.finditer(r']+class=\"title\"[^>]*>(?P[^<]+)', webpage)) + title = matches[1].group('title') + description = self._html_search_regex(r'<div[^>]+class=\"description\"[^>]*>(?P<info>[^\0]*?)</div>', + webpage, 'info') + timestamp = int_or_none(self._html_search_regex(self._RE_TIMESTAMP, webpage, 'timestamp')) + uploader_match = re.search(self._RE_UPLOADER, webpage) + uploader = uploader_match.group('uploader') + uploader_id = uploader_match.group('uploader_id') + + embed_url = self._html_search_regex( + r'<iframe[^>]+src="((?:https?:)?//(?:www\.)?youtube\.com/embed[^"]+)"', + webpage, 'embed URL', default=None) + if embed_url: + return { + '_type': 'url_transparent', + 'url': embed_url, + 'title': title, + 'description': description, + 'timestamp': timestamp, + 'uploader': uploader, + 'uploader_id': uploader_id + } + + file = self._html_search_regex(r'<param[^>]+name=\"flashvars\"[^>]+value=\"src=(?P<path>[^=\"]+)\"[^>]*>', + webpage, 'path', default=None) + if file: + rtmp_url = '%s%s' % (self._RTMP_SERVER, file) + formats = [{ + 'format_id': 'rtmp', + 'url': rtmp_url, + 'protocol': 'rtmp', + 'ext': 'mp4', + }] + + return { + 'id': video_id, + 'title': title, + 'description': description, + 'timestamp': timestamp, + 'uploader': uploader, + 'uploader_id': uploader_id, + 'formats': formats, + } + + raise ExtractorError('Video %s was deleted' % video_id, expected=True) + + +class GoodgameClipIE(GoodgameBaseIE): + IE_NAME = 'goodgame:clip' + _VALID_URL = r'https?://(?:www\.)?goodgame\.ru/clip/(?P<id>\d+)' + _TESTS = [{ + 'url': 'https://goodgame.ru/clip/397722/', + 'info_dict': { + 'id': '397722', + 'title': 'ЭТО ФИАСКО', + 'uploader': '0x111BA6FA', + 'uploader_id': '1035569', + 'timestamp': 1506975639, + 'upload_date': '20171002', + 'ext': 'mp4', + }, + 'params': { + 'skip_download': True, + } + }, { + 'url': 'https://goodgame.ru/clip/397155/?from=rec', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + title = self._og_search_title(webpage) + thumbnail = self._og_search_thumbnail(webpage) + url = self._og_search_video_url(webpage) + timestamp = int_or_none(self._html_search_regex(self._RE_TIMESTAMP, webpage, 'timestamp')) + uploader_match = re.search(self._RE_UPLOADER, webpage) + uploader = uploader_match.group('uploader') + uploader_id = uploader_match.group('uploader_id') + + formats = [{ + 'format_id': 'clip', + 'url': url, + # http or https extraction + 'protocol': url[:6].rstrip(':/'), + 'ext': 'mp4', + }] + + return { + 'id': video_id, + 'title': title, + 'thumbnail': thumbnail, + 'timestamp': timestamp, + 'uploader': uploader, + 'uploader_id': uploader_id, + 'formats': formats, + } From 9ff9c9489d170f78826e0bad26c1ecc0960e05d4 Mon Sep 17 00:00:00 2001 From: Golenickij Kirill <sterk12@gmail.com> Date: Thu, 26 Oct 2017 17:08:03 +0300 Subject: [PATCH 2/3] [goodgame] Fix info extraction, cleanup code --- youtube_dl/extractor/goodgame.py | 112 ++++++++++++++++++------------- 1 file changed, 66 insertions(+), 46 deletions(-) diff --git a/youtube_dl/extractor/goodgame.py b/youtube_dl/extractor/goodgame.py index 6c4bcdc05..61295bd80 100644 --- a/youtube_dl/extractor/goodgame.py +++ b/youtube_dl/extractor/goodgame.py @@ -3,6 +3,7 @@ from __future__ import unicode_literals import re +from .youtube import YoutubeIE from .common import InfoExtractor from ..utils import ExtractorError, int_or_none @@ -25,6 +26,16 @@ class GoodgameBaseIE(InfoExtractor): ''' _RE_TIMESTAMP = r'utc-timestamp=\"(?P<timestamp>\d+)\"' + def _extract_uploader(self, webpage): + uploader_match = re.search(self._RE_UPLOADER, webpage) + if uploader_match: + uploader = uploader_match.group('uploader') + uploader_id = uploader_match.group('uploader_id') + else: + uploader, uploader_id = None, None + + return uploader, uploader_id + class GoodgameStreamIE(GoodgameBaseIE): IE_NAME = 'goodgame:stream' @@ -33,8 +44,9 @@ class GoodgameStreamIE(GoodgameBaseIE): 'url': 'https://goodgame.ru/channel/rutony', 'info_dict': { 'id': 'rutony', - 'stream_id': '2399', 'title': 're:^.*', + 'view_count': 're:^\d+', + 'thumbnail': 're:^https?://.*\.jpg$', 'ext': 'mp4', 'is_live': True, }, @@ -45,8 +57,9 @@ class GoodgameStreamIE(GoodgameBaseIE): 'url': 'https://goodgame.ru/player?9418', 'info_dict': { 'id': 'Artist.the', - 'stream_id': '9418', 'title': 're:^.*', + 'view_count': 're:^\d+', + 'thumbnail': 're:^https?://.*\.jpg$', 'ext': 'mp4', 'is_live': True, }, @@ -59,23 +72,29 @@ class GoodgameStreamIE(GoodgameBaseIE): }] def _real_extract(self, url): - video_id = self._match_id(url) + channel_id = self._match_id(url) stream_info = next( - _ for _ in self._download_json('%s/getchannelstatus?id=%s&fmt=json' % (self._API_BASE, video_id), - video_id, + _ for _ in self._download_json('%s/getchannelstatus?id=%s&fmt=json' % (self._API_BASE, channel_id), + channel_id, note='Downloading stream JSON').values()) if stream_info.get('status') == 'Dead': - raise ExtractorError('%s is offline' % video_id, expected=True) + raise ExtractorError('%s is offline' % channel_id, expected=True) # url with player and stream_id if stream_info.get('key') == stream_info.get('stream_id'): - video_id = self._download_json('%s/player?src=%s' % (self._API_BASE, video_id), - video_id, - note='Downloading streamer info JSON').get('streamer_name') + channel_id = self._download_json('%s/player?src=%s' % (self._API_BASE, channel_id), + channel_id, + note='Downloading streamer info JSON').get('streamer_name') _id = self._search_regex('src=\"https://goodgame.ru/player\?(?P<id>\d+)\"', stream_info.get('embed'), 'id') - thumbnails = [{'url': 'https:%s' % stream_info.get('thumb')}] + thumbnail = stream_info.get('thumb') + # goodgame.ru host thumbnail image + if thumbnail.startswith('//'): + thumbnail = 'https:%s' % thumbnail + else: + thumbnail = None + formats = [] for quality, suffix in self._QUALITIES.items(): formats.append({ @@ -84,17 +103,13 @@ class GoodgameStreamIE(GoodgameBaseIE): 'ext': 'mp4', 'protocol': 'm3u8' }) - return { - 'id': video_id, - 'stream_id': stream_info.get('stream_id'), + 'id': channel_id, 'title': stream_info.get('title'), - 'description': stream_info.get('description'), - 'view_count': int(stream_info.get('viewers')), - 'status': stream_info.get('status'), - 'thumbnails': thumbnails, - 'formats': formats, + 'view_count': int_or_none(stream_info.get('viewers')), + 'thumbnail': thumbnail, 'is_live': True, + 'formats': formats, } @@ -111,11 +126,26 @@ class GoodgameVideoIE(GoodgameBaseIE): 'uploader_id': '374', 'timestamp': 1506862050, 'upload_date': '20171001', - 'ext': 'mp4', + 'ext': 'flv', }, 'params': { 'skip_download': True, } + }, { + 'url': 'https://goodgame.ru/video/49097/', + 'info_dict': { + 'id': '49097', + 'title': 'GRAND FINAL Турнирa WCS Montreal 2017 по StarCraft 2', + 'description': 'SUPER FINAL на WCS', + 'uploader': 'ZERGTV', + 'uploader_id': '48996', + 'timestamp': 1505071700, + 'upload_date': '20170910', + 'ext': 'flv', + }, + 'params': { + 'skip_download': True + } }, { # Embedded youtube video 'url': 'https://goodgame.ru/video/49294/', @@ -139,25 +169,23 @@ class GoodgameVideoIE(GoodgameBaseIE): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - matches = list(re.finditer(r'<div[^>]+class=\"title\"[^>]*>(?P<title>[^<]+)', webpage)) - title = matches[1].group('title') + title = self._html_search_regex( + r'<div[^>]+class=([\"\'])[^\"\']*video-description[^\"\']*\1[^>]*>.*' + r'<div[^>]+class=\"title\"[^>]*>(?P<title>[^<]+)', + webpage, 'title', group='title', flags=re.DOTALL) description = self._html_search_regex(r'<div[^>]+class=\"description\"[^>]*>(?P<info>[^\0]*?)</div>', - webpage, 'info') - timestamp = int_or_none(self._html_search_regex(self._RE_TIMESTAMP, webpage, 'timestamp')) - uploader_match = re.search(self._RE_UPLOADER, webpage) - uploader = uploader_match.group('uploader') - uploader_id = uploader_match.group('uploader_id') + webpage, 'info', fatal=False, default=None) + timestamp = self._html_search_regex(self._RE_TIMESTAMP, webpage, 'timestamp', fatal=False, default=None) + uploader, uploader_id = self._extract_uploader(webpage) - embed_url = self._html_search_regex( - r'<iframe[^>]+src="((?:https?:)?//(?:www\.)?youtube\.com/embed[^"]+)"', - webpage, 'embed URL', default=None) + embed_url = YoutubeIE._extract_url(webpage) if embed_url: return { '_type': 'url_transparent', 'url': embed_url, 'title': title, 'description': description, - 'timestamp': timestamp, + 'timestamp': int_or_none(timestamp), 'uploader': uploader, 'uploader_id': uploader_id } @@ -169,15 +197,14 @@ class GoodgameVideoIE(GoodgameBaseIE): formats = [{ 'format_id': 'rtmp', 'url': rtmp_url, - 'protocol': 'rtmp', - 'ext': 'mp4', + 'ext': 'flv', }] return { 'id': video_id, 'title': title, 'description': description, - 'timestamp': timestamp, + 'timestamp': int_or_none(timestamp), 'uploader': uploader, 'uploader_id': uploader_id, 'formats': formats, @@ -212,27 +239,20 @@ class GoodgameClipIE(GoodgameBaseIE): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - title = self._og_search_title(webpage) - thumbnail = self._og_search_thumbnail(webpage) - url = self._og_search_video_url(webpage) - timestamp = int_or_none(self._html_search_regex(self._RE_TIMESTAMP, webpage, 'timestamp')) - uploader_match = re.search(self._RE_UPLOADER, webpage) - uploader = uploader_match.group('uploader') - uploader_id = uploader_match.group('uploader_id') + timestamp = self._html_search_regex(self._RE_TIMESTAMP, webpage, 'timestamp') + uploader, uploader_id = self._extract_uploader(webpage) formats = [{ 'format_id': 'clip', - 'url': url, - # http or https extraction - 'protocol': url[:6].rstrip(':/'), + 'url': self._og_search_video_url(webpage), 'ext': 'mp4', }] return { 'id': video_id, - 'title': title, - 'thumbnail': thumbnail, - 'timestamp': timestamp, + 'title': self._og_search_title(webpage), + 'thumbnail': self._og_search_thumbnail(webpage), + 'timestamp': int_or_none(timestamp), 'uploader': uploader, 'uploader_id': uploader_id, 'formats': formats, From f044e0d6313a4e2bb80509bffac02b142a37f854 Mon Sep 17 00:00:00 2001 From: Golenitskii K <sterk12@gmail.com> Date: Sun, 17 Dec 2017 14:35:54 +0300 Subject: [PATCH 3/3] [goodgame] Fix format preference, Source by default --- youtube_dl/extractor/goodgame.py | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/goodgame.py b/youtube_dl/extractor/goodgame.py index 61295bd80..58f1bafc0 100644 --- a/youtube_dl/extractor/goodgame.py +++ b/youtube_dl/extractor/goodgame.py @@ -3,9 +3,12 @@ from __future__ import unicode_literals import re -from .youtube import YoutubeIE from .common import InfoExtractor -from ..utils import ExtractorError, int_or_none +from .youtube import YoutubeIE +from ..utils import ( + ExtractorError, + int_or_none +) class GoodgameBaseIE(InfoExtractor): @@ -13,10 +16,10 @@ class GoodgameBaseIE(InfoExtractor): _API_BASE = 'https://goodgame.ru/api' _HLS_BASE = 'https://hls.goodgame.ru/hls' _QUALITIES = { + 'Source': '', '240p': '_240', '480p': '_480', '720p': '_720', - 'Source': '' } _RE_UPLOADER = r'''(?x) <a[^>]+ @@ -103,6 +106,7 @@ class GoodgameStreamIE(GoodgameBaseIE): 'ext': 'mp4', 'protocol': 'm3u8' }) + self._prefer_source(formats) return { 'id': channel_id, 'title': stream_info.get('title'), @@ -112,6 +116,14 @@ class GoodgameStreamIE(GoodgameBaseIE): 'formats': formats, } + def _prefer_source(self, formats): + try: + source = next(f for f in formats if f['format_id'] == 'Source') + source['preference'] = 10 + except StopIteration: + pass + self._sort_formats(formats) + class GoodgameVideoIE(GoodgameBaseIE): IE_NAME = 'goodgame:video'