[giga] improve extraction and add support for posts with multiple videos

This commit is contained in:
remitamine 2015-11-01 05:49:50 +01:00
parent 30eecc6a04
commit c3add5cb66
2 changed files with 126 additions and 67 deletions

View File

@ -209,7 +209,10 @@ from .gdcvault import GDCVaultIE
from .generic import GenericIE from .generic import GenericIE
from .gfycat import GfycatIE from .gfycat import GfycatIE
from .giantbomb import GiantBombIE from .giantbomb import GiantBombIE
from .giga import GigaIE from .giga import (
GigaIE,
GigaPostIE,
)
from .glide import GlideIE from .glide import GlideIE
from .globo import GloboIE from .globo import GloboIE
from .godtube import GodTubeIE from .godtube import GodTubeIE

View File

@ -7,53 +7,23 @@ from .common import InfoExtractor
from ..utils import ( from ..utils import (
qualities, qualities,
compat_str, compat_str,
parse_duration,
parse_iso8601, parse_iso8601,
str_to_int, int_or_none,
float_or_none,
) )
class GigaIE(InfoExtractor): class GigaIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?giga\.de/(?:[^/]+/)*(?P<id>[^/]+)' _VALID_URL = r'https?://videos\.giga\.de/embed/(?P<id>\d+)'
_TESTS = [{
'url': 'http://www.giga.de/filme/anime-awesome/trailer/anime-awesome-chihiros-reise-ins-zauberland-das-beste-kommt-zum-schluss/', def _call_api(self, path, video_id):
'md5': '6bc5535e945e724640664632055a584f', return self._download_json('http://www.giga.de/api/syndication/video/video_id/%s/%s.json' % (video_id, path), video_id)
'info_dict': {
'id': '2622086',
'display_id': 'anime-awesome-chihiros-reise-ins-zauberland-das-beste-kommt-zum-schluss',
'ext': 'mp4',
'title': 'Anime Awesome: Chihiros Reise ins Zauberland Das Beste kommt zum Schluss',
'description': 'md5:afdf5862241aded4718a30dff6a57baf',
'thumbnail': 're:^https?://.*\.jpg$',
'duration': 578,
'timestamp': 1414749706,
'upload_date': '20141031',
'uploader': 'Robin Schweiger',
'view_count': int,
},
}, {
'url': 'http://www.giga.de/games/channel/giga-top-montag/giga-topmontag-die-besten-serien-2014/',
'only_matching': True,
}, {
'url': 'http://www.giga.de/extra/netzkultur/videos/giga-games-tom-mats-robin-werden-eigene-wege-gehen-eine-ankuendigung/',
'only_matching': True,
}, {
'url': 'http://www.giga.de/tv/jonas-liest-spieletitel-eingedeutscht-episode-2/',
'only_matching': True,
}]
def _real_extract(self, url): def _real_extract(self, url):
display_id = self._match_id(url) video_id = self._match_id(url)
webpage = self._download_webpage(url, display_id) default = self._call_api('default', video_id)
playlist = self._call_api('playlist', video_id)[0]
video_id = self._search_regex(
[r'data-video-id="(\d+)"', r'/api/video/jwplayer/#v=(\d+)'],
webpage, 'video id')
playlist = self._download_json(
'http://www.giga.de/api/syndication/video/video_id/%s/playlist.json?content=syndication/key/368b5f151da4ae05ced7fa296bdff65a/'
% video_id, video_id)[0]
quality = qualities(['normal', 'hd720']) quality = qualities(['normal', 'hd720'])
@ -69,34 +39,120 @@ class GigaIE(InfoExtractor):
}) })
self._sort_formats(formats) self._sort_formats(formats)
title = self._html_search_meta(
'title', webpage, 'title', fatal=True)
description = self._html_search_meta(
'description', webpage, 'description')
thumbnail = self._og_search_thumbnail(webpage)
duration = parse_duration(self._search_regex(
r'(?s)(?:data-video-id="{0}"|data-video="[^"]*/api/video/jwplayer/#v={0}[^"]*")[^>]*>.+?<span class="duration">([^<]+)</span>'.format(video_id),
webpage, 'duration', fatal=False))
timestamp = parse_iso8601(self._search_regex(
r'datetime="([^"]+)"', webpage, 'upload date', fatal=False))
uploader = self._search_regex(
r'class="author">([^<]+)</a>', webpage, 'uploader', fatal=False)
view_count = str_to_int(self._search_regex(
r'<span class="views"><strong>([\d.,]+)</strong>',
webpage, 'view count', fatal=False))
return { return {
'id': video_id, 'id': video_id,
'display_id': display_id, 'title': default['video_title'],
'title': title, 'thumbnail': default.get('video_image'),
'description': description,
'thumbnail': thumbnail,
'duration': duration,
'timestamp': timestamp,
'uploader': uploader,
'view_count': view_count,
'formats': formats, 'formats': formats,
} }
class GigaPostIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?giga\.de/(?:[^/]+/)*(?P<id>[^/]+)'
_TESTS = [{
'url': 'http://www.giga.de/filme/anime-awesome/trailer/anime-awesome-chihiros-reise-ins-zauberland-das-beste-kommt-zum-schluss/',
'md5': '6bc5535e945e724640664632055a584f',
'info_dict': {
'id': '2622086',
'display_id': 'anime-awesome-chihiros-reise-ins-zauberland-das-beste-kommt-zum-schluss',
'ext': 'mp4',
'title': 'Anime Awesome Chihiros Reise ins Zauberland',
'description': 'md5:07e8592ab2fe66fd9e051106d888aee8',
'thumbnail': 're:^https?://.*\.jpg$',
'duration': 578,
'timestamp': 1414753306,
'upload_date': '20141031',
'uploader': 'Robin Schweiger',
'view_count': int,
},
}, {
'url': 'http://www.giga.de/games/channel/giga-top-montag/giga-topmontag-die-besten-serien-2014/',
'info_dict': {
'id': '2652823',
'description': 'md5:f83f343e4685b48cc1c872b8d2c45b08',
'uploader': 'Severin Pick',
'title': 'GIGA TOPmontag: Die besten Serien 2014',
'timestamp': 1419854425,
},
'playlist': [{
'md5': '4ab7f2c6054a3257975a8e7ad6b73ada',
'info_dict': {
'id': '2652842',
'ext': 'mp4',
'title': 'TOPmontag: Die besten Serien 2014 - Teil 1',
'upload_date': '20141229',
'uploader': 'Severin Pick',
'timestamp': 1419854425,
'duration': 799.0,
'view_count': int,
},
}, {
'md5': '4227d56ec615013a7f72fae900399ea8',
'info_dict': {
'id': '2653854',
'ext': 'mp4',
'upload_date': '20141229',
'title': 'TOPmontag: Die besten Serien 2014 - Teil 2',
'uploader': 'Severin Pick',
'timestamp': 1419854425,
'duration': 829.0,
'view_count': int,
},
}, {
'md5': '015963162a5d7f31bb5c43e548518981',
'info_dict': {
'id': '2652834',
'ext': 'mp4',
'upload_date': '20141229',
'title': 'TOPmontag: Die besten Serien 2014 - BONUS',
'timestamp': 1419854425,
'uploader': 'Severin Pick',
'duration': 283.0,
'view_count': int,
},
}]
}, {
'url': 'http://www.giga.de/extra/netzkultur/videos/giga-games-tom-mats-robin-werden-eigene-wege-gehen-eine-ankuendigung/',
'only_matching': True,
}, {
'url': 'http://www.giga.de/tv/jonas-liest-spieletitel-eingedeutscht-episode-2/',
'only_matching': True,
}]
def _real_extract(self, url):
display_id = self._match_id(url)
post_data = self._download_json(url + '?content=syndication/key/3c2244f8347e6f3edc482b3acb3674af/meta/json/', display_id)['v1.2']
post_info = {
'id': compat_str(post_data['post_id']),
'display_id': 'anime-awesome-chihiros-reise-ins-zauberland-das-beste-kommt-zum-schluss',
'title': post_data['title'],
'description': post_data.get('excerpt'),
'uploader': post_data.get('author'),
'timestamp': parse_iso8601(post_data.get('date'), ' '),
}
if post_data['type'] == 'video':
entries = []
for video in post_data['videos_list']:
entries.append({
'_type': 'url_transparent',
'id': compat_str(video['id']),
'title': video['title'],
'url': 'http://videos.giga.de/embed/%s' % video['id'],
'duration': float_or_none(video.get('length')),
'view_count': int_or_none(video.get('view_counter')),
'uploader': post_info['uploader'],
'timestamp': post_info['timestamp'],
'ie_key': 'Giga',
})
if len(entries) == 1:
post_info.update(entries[0])
else:
post_info.update({
'_type': 'multi_video',
'entries': entries,
})
return post_info