[goodgame] Fix info extraction, cleanup code

This commit is contained in:
Golenickij Kirill 2017-10-26 17:08:03 +03:00
parent 7adc216640
commit 9ff9c9489d

View File

@ -3,6 +3,7 @@ from __future__ import unicode_literals
import re import re
from .youtube import YoutubeIE
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ExtractorError, int_or_none from ..utils import ExtractorError, int_or_none
@ -25,6 +26,16 @@ class GoodgameBaseIE(InfoExtractor):
''' '''
_RE_TIMESTAMP = r'utc-timestamp=\"(?P<timestamp>\d+)\"' _RE_TIMESTAMP = r'utc-timestamp=\"(?P<timestamp>\d+)\"'
def _extract_uploader(self, webpage):
uploader_match = re.search(self._RE_UPLOADER, webpage)
if uploader_match:
uploader = uploader_match.group('uploader')
uploader_id = uploader_match.group('uploader_id')
else:
uploader, uploader_id = None, None
return uploader, uploader_id
class GoodgameStreamIE(GoodgameBaseIE): class GoodgameStreamIE(GoodgameBaseIE):
IE_NAME = 'goodgame:stream' IE_NAME = 'goodgame:stream'
@ -33,8 +44,9 @@ class GoodgameStreamIE(GoodgameBaseIE):
'url': 'https://goodgame.ru/channel/rutony', 'url': 'https://goodgame.ru/channel/rutony',
'info_dict': { 'info_dict': {
'id': 'rutony', 'id': 'rutony',
'stream_id': '2399',
'title': 're:^.*', 'title': 're:^.*',
'view_count': 're:^\d+',
'thumbnail': 're:^https?://.*\.jpg$',
'ext': 'mp4', 'ext': 'mp4',
'is_live': True, 'is_live': True,
}, },
@ -45,8 +57,9 @@ class GoodgameStreamIE(GoodgameBaseIE):
'url': 'https://goodgame.ru/player?9418', 'url': 'https://goodgame.ru/player?9418',
'info_dict': { 'info_dict': {
'id': 'Artist.the', 'id': 'Artist.the',
'stream_id': '9418',
'title': 're:^.*', 'title': 're:^.*',
'view_count': 're:^\d+',
'thumbnail': 're:^https?://.*\.jpg$',
'ext': 'mp4', 'ext': 'mp4',
'is_live': True, 'is_live': True,
}, },
@ -59,23 +72,29 @@ class GoodgameStreamIE(GoodgameBaseIE):
}] }]
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) channel_id = self._match_id(url)
stream_info = next( stream_info = next(
_ for _ in self._download_json('%s/getchannelstatus?id=%s&fmt=json' % (self._API_BASE, video_id), _ for _ in self._download_json('%s/getchannelstatus?id=%s&fmt=json' % (self._API_BASE, channel_id),
video_id, channel_id,
note='Downloading stream JSON').values()) note='Downloading stream JSON').values())
if stream_info.get('status') == 'Dead': if stream_info.get('status') == 'Dead':
raise ExtractorError('%s is offline' % video_id, expected=True) raise ExtractorError('%s is offline' % channel_id, expected=True)
# url with player and stream_id # url with player and stream_id
if stream_info.get('key') == stream_info.get('stream_id'): if stream_info.get('key') == stream_info.get('stream_id'):
video_id = self._download_json('%s/player?src=%s' % (self._API_BASE, video_id), channel_id = self._download_json('%s/player?src=%s' % (self._API_BASE, channel_id),
video_id, channel_id,
note='Downloading streamer info JSON').get('streamer_name') note='Downloading streamer info JSON').get('streamer_name')
_id = self._search_regex('src=\"https://goodgame.ru/player\?(?P<id>\d+)\"', stream_info.get('embed'), 'id') _id = self._search_regex('src=\"https://goodgame.ru/player\?(?P<id>\d+)\"', stream_info.get('embed'), 'id')
thumbnails = [{'url': 'https:%s' % stream_info.get('thumb')}] thumbnail = stream_info.get('thumb')
# goodgame.ru host thumbnail image
if thumbnail.startswith('//'):
thumbnail = 'https:%s' % thumbnail
else:
thumbnail = None
formats = [] formats = []
for quality, suffix in self._QUALITIES.items(): for quality, suffix in self._QUALITIES.items():
formats.append({ formats.append({
@ -84,17 +103,13 @@ class GoodgameStreamIE(GoodgameBaseIE):
'ext': 'mp4', 'ext': 'mp4',
'protocol': 'm3u8' 'protocol': 'm3u8'
}) })
return { return {
'id': video_id, 'id': channel_id,
'stream_id': stream_info.get('stream_id'),
'title': stream_info.get('title'), 'title': stream_info.get('title'),
'description': stream_info.get('description'), 'view_count': int_or_none(stream_info.get('viewers')),
'view_count': int(stream_info.get('viewers')), 'thumbnail': thumbnail,
'status': stream_info.get('status'),
'thumbnails': thumbnails,
'formats': formats,
'is_live': True, 'is_live': True,
'formats': formats,
} }
@ -111,11 +126,26 @@ class GoodgameVideoIE(GoodgameBaseIE):
'uploader_id': '374', 'uploader_id': '374',
'timestamp': 1506862050, 'timestamp': 1506862050,
'upload_date': '20171001', 'upload_date': '20171001',
'ext': 'mp4', 'ext': 'flv',
}, },
'params': { 'params': {
'skip_download': True, 'skip_download': True,
} }
}, {
'url': 'https://goodgame.ru/video/49097/',
'info_dict': {
'id': '49097',
'title': 'GRAND FINAL Турнирa WCS Montreal 2017 по StarCraft 2',
'description': 'SUPER FINAL на WCS',
'uploader': 'ZERGTV',
'uploader_id': '48996',
'timestamp': 1505071700,
'upload_date': '20170910',
'ext': 'flv',
},
'params': {
'skip_download': True
}
}, { }, {
# Embedded youtube video # Embedded youtube video
'url': 'https://goodgame.ru/video/49294/', 'url': 'https://goodgame.ru/video/49294/',
@ -139,25 +169,23 @@ class GoodgameVideoIE(GoodgameBaseIE):
video_id = self._match_id(url) video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
matches = list(re.finditer(r'<div[^>]+class=\"title\"[^>]*>(?P<title>[^<]+)', webpage)) title = self._html_search_regex(
title = matches[1].group('title') r'<div[^>]+class=([\"\'])[^\"\']*video-description[^\"\']*\1[^>]*>.*'
r'<div[^>]+class=\"title\"[^>]*>(?P<title>[^<]+)',
webpage, 'title', group='title', flags=re.DOTALL)
description = self._html_search_regex(r'<div[^>]+class=\"description\"[^>]*>(?P<info>[^\0]*?)</div>', description = self._html_search_regex(r'<div[^>]+class=\"description\"[^>]*>(?P<info>[^\0]*?)</div>',
webpage, 'info') webpage, 'info', fatal=False, default=None)
timestamp = int_or_none(self._html_search_regex(self._RE_TIMESTAMP, webpage, 'timestamp')) timestamp = self._html_search_regex(self._RE_TIMESTAMP, webpage, 'timestamp', fatal=False, default=None)
uploader_match = re.search(self._RE_UPLOADER, webpage) uploader, uploader_id = self._extract_uploader(webpage)
uploader = uploader_match.group('uploader')
uploader_id = uploader_match.group('uploader_id')
embed_url = self._html_search_regex( embed_url = YoutubeIE._extract_url(webpage)
r'<iframe[^>]+src="((?:https?:)?//(?:www\.)?youtube\.com/embed[^"]+)"',
webpage, 'embed URL', default=None)
if embed_url: if embed_url:
return { return {
'_type': 'url_transparent', '_type': 'url_transparent',
'url': embed_url, 'url': embed_url,
'title': title, 'title': title,
'description': description, 'description': description,
'timestamp': timestamp, 'timestamp': int_or_none(timestamp),
'uploader': uploader, 'uploader': uploader,
'uploader_id': uploader_id 'uploader_id': uploader_id
} }
@ -169,15 +197,14 @@ class GoodgameVideoIE(GoodgameBaseIE):
formats = [{ formats = [{
'format_id': 'rtmp', 'format_id': 'rtmp',
'url': rtmp_url, 'url': rtmp_url,
'protocol': 'rtmp', 'ext': 'flv',
'ext': 'mp4',
}] }]
return { return {
'id': video_id, 'id': video_id,
'title': title, 'title': title,
'description': description, 'description': description,
'timestamp': timestamp, 'timestamp': int_or_none(timestamp),
'uploader': uploader, 'uploader': uploader,
'uploader_id': uploader_id, 'uploader_id': uploader_id,
'formats': formats, 'formats': formats,
@ -212,27 +239,20 @@ class GoodgameClipIE(GoodgameBaseIE):
video_id = self._match_id(url) video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
title = self._og_search_title(webpage) timestamp = self._html_search_regex(self._RE_TIMESTAMP, webpage, 'timestamp')
thumbnail = self._og_search_thumbnail(webpage) uploader, uploader_id = self._extract_uploader(webpage)
url = self._og_search_video_url(webpage)
timestamp = int_or_none(self._html_search_regex(self._RE_TIMESTAMP, webpage, 'timestamp'))
uploader_match = re.search(self._RE_UPLOADER, webpage)
uploader = uploader_match.group('uploader')
uploader_id = uploader_match.group('uploader_id')
formats = [{ formats = [{
'format_id': 'clip', 'format_id': 'clip',
'url': url, 'url': self._og_search_video_url(webpage),
# http or https extraction
'protocol': url[:6].rstrip(':/'),
'ext': 'mp4', 'ext': 'mp4',
}] }]
return { return {
'id': video_id, 'id': video_id,
'title': title, 'title': self._og_search_title(webpage),
'thumbnail': thumbnail, 'thumbnail': self._og_search_thumbnail(webpage),
'timestamp': timestamp, 'timestamp': int_or_none(timestamp),
'uploader': uploader, 'uploader': uploader,
'uploader_id': uploader_id, 'uploader_id': uploader_id,
'formats': formats, 'formats': formats,