[sproutvideo] Add new extractor (closes #7935)
This commit is contained in:
parent
dcc8522fdb
commit
662087e491
@ -43,6 +43,9 @@ def get_suitable_downloader(info_dict, params={}):
|
||||
if ed.can_download(info_dict):
|
||||
return ed
|
||||
|
||||
if info_dict.get('force_hlsdl') is True:
|
||||
return HlsFD
|
||||
|
||||
if protocol.startswith('m3u8') and info_dict.get('is_live'):
|
||||
return FFmpegFD
|
||||
|
||||
|
@ -121,6 +121,8 @@ class FragmentFD(FileDownloader):
|
||||
del ctx['fragment_filename_sanitized']
|
||||
|
||||
def _prepare_frag_download(self, ctx):
|
||||
if 'hls' not in ctx:
|
||||
ctx['hls'] = False
|
||||
if 'live' not in ctx:
|
||||
ctx['live'] = False
|
||||
if not ctx['live']:
|
||||
@ -143,6 +145,7 @@ class FragmentFD(FileDownloader):
|
||||
'retries': self.params.get('retries', 0),
|
||||
'nopart': self.params.get('nopart', False),
|
||||
'test': self.params.get('test', False),
|
||||
'hls': ctx['hls'],
|
||||
}
|
||||
)
|
||||
tmpfilename = self.temp_name(ctx['filename'])
|
||||
|
@ -105,6 +105,7 @@ class HlsFD(FragmentFD):
|
||||
'filename': filename,
|
||||
'total_frags': media_frags,
|
||||
'ad_frags': ad_frags,
|
||||
'hls': '#EXT-X-KEY:METHOD=AES-128' in s,
|
||||
}
|
||||
|
||||
self._prepare_and_start_frag_download(ctx)
|
||||
@ -113,10 +114,15 @@ class HlsFD(FragmentFD):
|
||||
skip_unavailable_fragments = self.params.get('skip_unavailable_fragments', True)
|
||||
test = self.params.get('test', False)
|
||||
|
||||
extra_query = None
|
||||
extra_segment_query = None
|
||||
extra_key_query = None
|
||||
extra_param_to_segment_url = info_dict.get('extra_param_to_segment_url')
|
||||
if extra_param_to_segment_url:
|
||||
extra_query = compat_urlparse.parse_qs(extra_param_to_segment_url)
|
||||
extra_segment_query = compat_urlparse.parse_qs(extra_param_to_segment_url)
|
||||
extra_key_query = compat_urlparse.parse_qs(extra_param_to_segment_url)
|
||||
extra_param_to_key_url = info_dict.get('extra_param_to_key_url')
|
||||
if extra_param_to_key_url:
|
||||
extra_key_query = compat_urlparse.parse_qs(extra_param_to_key_url)
|
||||
i = 0
|
||||
media_sequence = 0
|
||||
decrypt_info = {'METHOD': 'NONE'}
|
||||
@ -136,8 +142,8 @@ class HlsFD(FragmentFD):
|
||||
line
|
||||
if re.match(r'^https?://', line)
|
||||
else compat_urlparse.urljoin(man_url, line))
|
||||
if extra_query:
|
||||
frag_url = update_url_query(frag_url, extra_query)
|
||||
if extra_segment_query:
|
||||
frag_url = update_url_query(frag_url, extra_segment_query)
|
||||
count = 0
|
||||
headers = info_dict.get('http_headers', {})
|
||||
if byte_range:
|
||||
@ -187,8 +193,8 @@ class HlsFD(FragmentFD):
|
||||
if not re.match(r'^https?://', decrypt_info['URI']):
|
||||
decrypt_info['URI'] = compat_urlparse.urljoin(
|
||||
man_url, decrypt_info['URI'])
|
||||
if extra_query:
|
||||
decrypt_info['URI'] = update_url_query(decrypt_info['URI'], extra_query)
|
||||
if extra_key_query:
|
||||
decrypt_info['URI'] = update_url_query(decrypt_info['URI'], extra_key_query)
|
||||
if decrypt_url != decrypt_info['URI']:
|
||||
decrypt_info['KEY'] = None
|
||||
elif line.startswith('#EXT-X-MEDIA-SEQUENCE'):
|
||||
|
@ -45,7 +45,8 @@ class HttpFD(FileDownloader):
|
||||
headers.update(add_headers)
|
||||
|
||||
is_test = self.params.get('test', False)
|
||||
chunk_size = self._TEST_FILE_SIZE if is_test else (
|
||||
is_hls = self.params.get('hls', False)
|
||||
chunk_size = self._TEST_FILE_SIZE if is_test and not is_hls else (
|
||||
info_dict.get('downloader_options', {}).get('http_chunk_size')
|
||||
or self.params.get('http_chunk_size') or 0)
|
||||
|
||||
@ -194,7 +195,8 @@ class HttpFD(FileDownloader):
|
||||
# However, for a test we still would like to download just a piece of a file.
|
||||
# To achieve this we limit data_len to _TEST_FILE_SIZE and manually control
|
||||
# block size when downloading a file.
|
||||
if is_test and (data_len is None or int(data_len) > self._TEST_FILE_SIZE):
|
||||
# If we are using HLS we cannot cut the fragment because it will break the decryption.
|
||||
if is_test and not is_hls and (data_len is None or int(data_len) > self._TEST_FILE_SIZE):
|
||||
data_len = self._TEST_FILE_SIZE
|
||||
|
||||
if data_len is not None:
|
||||
|
@ -1053,6 +1053,7 @@ from .sportbox import SportBoxIE
|
||||
from .sportdeutschland import SportDeutschlandIE
|
||||
from .springboardplatform import SpringboardPlatformIE
|
||||
from .sprout import SproutIE
|
||||
from .sproutvideo import SproutVideoIE
|
||||
from .srgssr import (
|
||||
SRGSSRIE,
|
||||
SRGSSRPlayIE,
|
||||
|
@ -119,6 +119,7 @@ from .expressen import ExpressenIE
|
||||
from .zype import ZypeIE
|
||||
from .odnoklassniki import OdnoklassnikiIE
|
||||
from .kinja import KinjaEmbedIE
|
||||
from .sproutvideo import SproutVideoIE
|
||||
|
||||
|
||||
class GenericIE(InfoExtractor):
|
||||
@ -2142,6 +2143,18 @@ class GenericIE(InfoExtractor):
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
{
|
||||
# SproutVideo iframe in page
|
||||
'url': 'https://www.solidarum.org/vivre-ensemble/adrien-labaeye-berlin-des-communautes-aux-communs',
|
||||
'info_dict': {
|
||||
'id': '4c9dddb01910e3c9c4',
|
||||
'ext': 'mp4',
|
||||
'title': 'Adrien Labaeye : Berlin, des communautés aux communs',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
# {
|
||||
# # TODO: find another test
|
||||
# # http://schema.org/VideoObject
|
||||
@ -3201,6 +3214,10 @@ class GenericIE(InfoExtractor):
|
||||
return self.playlist_from_matches(
|
||||
zype_urls, video_id, video_title, ie=ZypeIE.ie_key())
|
||||
|
||||
sproutvideo_url = SproutVideoIE._extract_url(webpage)
|
||||
if sproutvideo_url:
|
||||
return self.url_result(sproutvideo_url)
|
||||
|
||||
# Look for HTML5 media
|
||||
entries = self._parse_html5_media_entries(url, webpage, video_id, m3u8_id='hls')
|
||||
if entries:
|
||||
|
80
youtube_dl/extractor/sproutvideo.py
Normal file
80
youtube_dl/extractor/sproutvideo.py
Normal file
@ -0,0 +1,80 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
from ..compat import (
|
||||
compat_b64decode,
|
||||
compat_urllib_parse_urlencode,
|
||||
)
|
||||
|
||||
|
||||
class SproutVideoIE(InfoExtractor):
|
||||
_VALID_URL = r'(?:https?:|)//videos.sproutvideo.com/embed/(?P<id>[a-f0-9]+)/[a-f0-9]+\??.*'
|
||||
_TEST = {
|
||||
'url': 'https://videos.sproutvideo.com/embed/4c9dddb01910e3c9c4/0fc24387c4f24ee3',
|
||||
'md5': '1343ce1a6cb39d67889bfa07c7b02b0e',
|
||||
'info_dict': {
|
||||
'id': '4c9dddb01910e3c9c4',
|
||||
'ext': 'mp4',
|
||||
'title': 'Adrien Labaeye : Berlin, des communautés aux communs',
|
||||
}
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def _extract_url(webpage):
|
||||
sproutvideo = re.search(
|
||||
r'(?:<iframe\s+class=[\'\"]sproutvideo-player.*src|href)=[\'\"](?P<url>%s)[\'\"]' % SproutVideoIE._VALID_URL, webpage)
|
||||
if sproutvideo:
|
||||
return sproutvideo.group('url')
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
data = self._search_regex(r'<script[^>]+>var dat = \'([^\']+)\';</script>', webpage, 'data')
|
||||
data_decoded = compat_b64decode(data).decode('utf-8')
|
||||
parsed_data = self._parse_json(data_decoded, video_id)
|
||||
|
||||
# https://github.com/ytdl-org/youtube-dl/issues/16996#issuecomment-406901324
|
||||
# signature->m for manifests
|
||||
# signature->k for keys
|
||||
# signature->t for segments
|
||||
m_sig = self._policy_to_qs(parsed_data, 'm')
|
||||
k_sig = self._policy_to_qs(parsed_data, 'k')
|
||||
t_sig = self._policy_to_qs(parsed_data, 't')
|
||||
|
||||
url = "https://{0}.videos.sproutvideo.com/{1}/{2}/video/index.m3u8?{3}"
|
||||
url = url.format(parsed_data['base'],
|
||||
parsed_data['s3_user_hash'],
|
||||
parsed_data['s3_video_hash'],
|
||||
m_sig)
|
||||
|
||||
formats = self._extract_m3u8_formats(url, video_id, 'mp4', 'm3u8_native',
|
||||
m3u8_id='hls', fatal=False)
|
||||
self._sort_formats(formats)
|
||||
|
||||
for i in range(len(formats)):
|
||||
formats[i]['url'] = "{}?{}".format(formats[i]['url'], m_sig)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': parsed_data['title'],
|
||||
'formats': formats,
|
||||
'force_hlsdl': True, # currently FFmpeg is not supported
|
||||
'extra_param_to_segment_url': t_sig,
|
||||
'extra_param_to_key_url': k_sig
|
||||
}
|
||||
|
||||
def _format_qsdata(self, qs_data):
|
||||
parsed_dict = dict()
|
||||
for key in qs_data:
|
||||
parsed_dict[key.replace('CloudFront-', '')] = qs_data[key]
|
||||
return parsed_dict
|
||||
|
||||
def _policy_to_qs(self, policy, key):
|
||||
sig = self._format_qsdata(policy['signatures'][key])
|
||||
sig['sessionID'] = policy['sessionID']
|
||||
return compat_urllib_parse_urlencode(sig, doseq=True)
|
Loading…
x
Reference in New Issue
Block a user