Merge remote-tracking branch 'upstream/master'
This commit is contained in:
commit
0a8215677a
2
AUTHORS
2
AUTHORS
@ -169,3 +169,5 @@ Viťas Strádal
|
|||||||
Kagami Hiiragi
|
Kagami Hiiragi
|
||||||
Philip Huppert
|
Philip Huppert
|
||||||
blahgeek
|
blahgeek
|
||||||
|
Kevin Deldycke
|
||||||
|
inondle
|
||||||
|
@ -2025,6 +2025,7 @@ class YoutubeDL(object):
|
|||||||
if opts_cookiefile is None:
|
if opts_cookiefile is None:
|
||||||
self.cookiejar = compat_cookiejar.CookieJar()
|
self.cookiejar = compat_cookiejar.CookieJar()
|
||||||
else:
|
else:
|
||||||
|
opts_cookiefile = compat_expanduser(opts_cookiefile)
|
||||||
self.cookiejar = compat_cookiejar.MozillaCookieJar(
|
self.cookiejar = compat_cookiejar.MozillaCookieJar(
|
||||||
opts_cookiefile)
|
opts_cookiefile)
|
||||||
if os.access(opts_cookiefile, os.R_OK):
|
if os.access(opts_cookiefile, os.R_OK):
|
||||||
|
@ -86,7 +86,9 @@ def _real_main(argv=None):
|
|||||||
if opts.batchfile == '-':
|
if opts.batchfile == '-':
|
||||||
batchfd = sys.stdin
|
batchfd = sys.stdin
|
||||||
else:
|
else:
|
||||||
batchfd = io.open(opts.batchfile, 'r', encoding='utf-8', errors='ignore')
|
batchfd = io.open(
|
||||||
|
compat_expanduser(opts.batchfile),
|
||||||
|
'r', encoding='utf-8', errors='ignore')
|
||||||
batch_urls = read_batch_urls(batchfd)
|
batch_urls = read_batch_urls(batchfd)
|
||||||
if opts.verbose:
|
if opts.verbose:
|
||||||
write_string('[debug] Batch file urls: ' + repr(batch_urls) + '\n')
|
write_string('[debug] Batch file urls: ' + repr(batch_urls) + '\n')
|
||||||
@ -404,7 +406,7 @@ def _real_main(argv=None):
|
|||||||
|
|
||||||
try:
|
try:
|
||||||
if opts.load_info_filename is not None:
|
if opts.load_info_filename is not None:
|
||||||
retcode = ydl.download_with_info_file(opts.load_info_filename)
|
retcode = ydl.download_with_info_file(compat_expanduser(opts.load_info_filename))
|
||||||
else:
|
else:
|
||||||
retcode = ydl.download(all_urls)
|
retcode = ydl.download(all_urls)
|
||||||
except MaxDownloadsReached:
|
except MaxDownloadsReached:
|
||||||
|
@ -224,7 +224,7 @@ class FFmpegFD(ExternalFD):
|
|||||||
args += ['-rtmp_live', 'live']
|
args += ['-rtmp_live', 'live']
|
||||||
|
|
||||||
args += ['-i', url, '-c', 'copy']
|
args += ['-i', url, '-c', 'copy']
|
||||||
if protocol == 'm3u8':
|
if protocol in ('m3u8', 'm3u8_native'):
|
||||||
if self.params.get('hls_use_mpegts', False) or tmpfilename == '-':
|
if self.params.get('hls_use_mpegts', False) or tmpfilename == '-':
|
||||||
args += ['-f', 'mpegts']
|
args += ['-f', 'mpegts']
|
||||||
else:
|
else:
|
||||||
|
@ -4,6 +4,7 @@ import os.path
|
|||||||
import re
|
import re
|
||||||
|
|
||||||
from .fragment import FragmentFD
|
from .fragment import FragmentFD
|
||||||
|
from .external import FFmpegFD
|
||||||
|
|
||||||
from ..compat import compat_urlparse
|
from ..compat import compat_urlparse
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
@ -17,12 +18,34 @@ class HlsFD(FragmentFD):
|
|||||||
|
|
||||||
FD_NAME = 'hlsnative'
|
FD_NAME = 'hlsnative'
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def can_download(manifest):
|
||||||
|
UNSUPPORTED_FEATURES = (
|
||||||
|
r'#EXT-X-KEY:METHOD=(?!NONE)', # encrypted streams [1]
|
||||||
|
r'#EXT-X-BYTERANGE', # playlists composed of byte ranges of media files [2]
|
||||||
|
r'#EXT-X-MEDIA-SEQUENCE:(?!0$)', # live streams [3]
|
||||||
|
# 1. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.2.4
|
||||||
|
# 2. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.2.2
|
||||||
|
# 3. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.3.2
|
||||||
|
)
|
||||||
|
return all(not re.search(feature, manifest) for feature in UNSUPPORTED_FEATURES)
|
||||||
|
|
||||||
def real_download(self, filename, info_dict):
|
def real_download(self, filename, info_dict):
|
||||||
man_url = info_dict['url']
|
man_url = info_dict['url']
|
||||||
self.to_screen('[%s] Downloading m3u8 manifest' % self.FD_NAME)
|
self.to_screen('[%s] Downloading m3u8 manifest' % self.FD_NAME)
|
||||||
manifest = self.ydl.urlopen(man_url).read()
|
manifest = self.ydl.urlopen(man_url).read()
|
||||||
|
|
||||||
s = manifest.decode('utf-8', 'ignore')
|
s = manifest.decode('utf-8', 'ignore')
|
||||||
|
|
||||||
|
if not self.can_download(s):
|
||||||
|
self.report_warning(
|
||||||
|
'hlsnative has detected features it does not support, '
|
||||||
|
'extraction will be delegated to ffmpeg')
|
||||||
|
fd = FFmpegFD(self.ydl, self.params)
|
||||||
|
for ph in self._progress_hooks:
|
||||||
|
fd.add_progress_hook(ph)
|
||||||
|
return fd.real_download(filename, info_dict)
|
||||||
|
|
||||||
fragment_urls = []
|
fragment_urls = []
|
||||||
for line in s.splitlines():
|
for line in s.splitlines():
|
||||||
line = line.strip()
|
line = line.strip()
|
||||||
|
@ -161,24 +161,53 @@ class ArteTVPlus7IE(InfoExtractor):
|
|||||||
'es': 'E[ESP]',
|
'es': 'E[ESP]',
|
||||||
}
|
}
|
||||||
|
|
||||||
|
langcode = LANGS.get(lang, lang)
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
for format_id, format_dict in player_info['VSR'].items():
|
for format_id, format_dict in player_info['VSR'].items():
|
||||||
f = dict(format_dict)
|
f = dict(format_dict)
|
||||||
versionCode = f.get('versionCode')
|
versionCode = f.get('versionCode')
|
||||||
langcode = LANGS.get(lang, lang)
|
l = re.escape(langcode)
|
||||||
lang_rexs = [r'VO?%s-' % re.escape(langcode), r'VO?.-ST%s$' % re.escape(langcode)]
|
|
||||||
lang_pref = None
|
# Language preference from most to least priority
|
||||||
if versionCode:
|
# Reference: section 5.6.3 of
|
||||||
matched_lang_rexs = [r for r in lang_rexs if re.match(r, versionCode)]
|
# http://www.arte.tv/sites/en/corporate/files/complete-technical-guidelines-arte-geie-v1-05.pdf
|
||||||
lang_pref = -10 if not matched_lang_rexs else 10 * len(matched_lang_rexs)
|
PREFERENCES = (
|
||||||
source_pref = 0
|
# original version in requested language, without subtitles
|
||||||
if versionCode is not None:
|
r'VO{0}$'.format(l),
|
||||||
# The original version with subtitles has lower relevance
|
# original version in requested language, with partial subtitles in requested language
|
||||||
if re.match(r'VO-ST(F|A|E)', versionCode):
|
r'VO{0}-ST{0}$'.format(l),
|
||||||
source_pref -= 10
|
# original version in requested language, with subtitles for the deaf and hard-of-hearing in requested language
|
||||||
# The version with sourds/mal subtitles has also lower relevance
|
r'VO{0}-STM{0}$'.format(l),
|
||||||
elif re.match(r'VO?(F|A|E)-STM\1', versionCode):
|
# non-original (dubbed) version in requested language, without subtitles
|
||||||
source_pref -= 9
|
r'V{0}$'.format(l),
|
||||||
|
# non-original (dubbed) version in requested language, with subtitles partial subtitles in requested language
|
||||||
|
r'V{0}-ST{0}$'.format(l),
|
||||||
|
# non-original (dubbed) version in requested language, with subtitles for the deaf and hard-of-hearing in requested language
|
||||||
|
r'V{0}-STM{0}$'.format(l),
|
||||||
|
# original version in requested language, with partial subtitles in different language
|
||||||
|
r'VO{0}-ST(?!{0}).+?$'.format(l),
|
||||||
|
# original version in requested language, with subtitles for the deaf and hard-of-hearing in different language
|
||||||
|
r'VO{0}-STM(?!{0}).+?$'.format(l),
|
||||||
|
# original version in different language, with partial subtitles in requested language
|
||||||
|
r'VO(?:(?!{0}).+?)?-ST{0}$'.format(l),
|
||||||
|
# original version in different language, with subtitles for the deaf and hard-of-hearing in requested language
|
||||||
|
r'VO(?:(?!{0}).+?)?-STM{0}$'.format(l),
|
||||||
|
# original version in different language, without subtitles
|
||||||
|
r'VO(?:(?!{0}))?$'.format(l),
|
||||||
|
# original version in different language, with partial subtitles in different language
|
||||||
|
r'VO(?:(?!{0}).+?)?-ST(?!{0}).+?$'.format(l),
|
||||||
|
# original version in different language, with subtitles for the deaf and hard-of-hearing in different language
|
||||||
|
r'VO(?:(?!{0}).+?)?-STM(?!{0}).+?$'.format(l),
|
||||||
|
)
|
||||||
|
|
||||||
|
for pref, p in enumerate(PREFERENCES):
|
||||||
|
if re.match(p, versionCode):
|
||||||
|
lang_pref = len(PREFERENCES) - pref
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
lang_pref = -1
|
||||||
|
|
||||||
format = {
|
format = {
|
||||||
'format_id': format_id,
|
'format_id': format_id,
|
||||||
'preference': -10 if f.get('videoFormat') == 'M3U8' else None,
|
'preference': -10 if f.get('videoFormat') == 'M3U8' else None,
|
||||||
@ -188,7 +217,6 @@ class ArteTVPlus7IE(InfoExtractor):
|
|||||||
'height': int_or_none(f.get('height')),
|
'height': int_or_none(f.get('height')),
|
||||||
'tbr': int_or_none(f.get('bitrate')),
|
'tbr': int_or_none(f.get('bitrate')),
|
||||||
'quality': qfunc(f.get('quality')),
|
'quality': qfunc(f.get('quality')),
|
||||||
'source_preference': source_pref,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if f.get('mediaType') == 'rtmp':
|
if f.get('mediaType') == 'rtmp':
|
||||||
|
@ -384,6 +384,7 @@ from .limelight import (
|
|||||||
LimelightChannelIE,
|
LimelightChannelIE,
|
||||||
LimelightChannelListIE,
|
LimelightChannelListIE,
|
||||||
)
|
)
|
||||||
|
from .litv import LiTVIE
|
||||||
from .liveleak import LiveLeakIE
|
from .liveleak import LiveLeakIE
|
||||||
from .livestream import (
|
from .livestream import (
|
||||||
LivestreamIE,
|
LivestreamIE,
|
||||||
@ -408,6 +409,10 @@ from .metacafe import MetacafeIE
|
|||||||
from .metacritic import MetacriticIE
|
from .metacritic import MetacriticIE
|
||||||
from .mgoon import MgoonIE
|
from .mgoon import MgoonIE
|
||||||
from .mgtv import MGTVIE
|
from .mgtv import MGTVIE
|
||||||
|
from .microsoftvirtualacademy import (
|
||||||
|
MicrosoftVirtualAcademyIE,
|
||||||
|
MicrosoftVirtualAcademyCourseIE,
|
||||||
|
)
|
||||||
from .minhateca import MinhatecaIE
|
from .minhateca import MinhatecaIE
|
||||||
from .ministrygrid import MinistryGridIE
|
from .ministrygrid import MinistryGridIE
|
||||||
from .minoto import MinotoIE
|
from .minoto import MinotoIE
|
||||||
|
137
youtube_dl/extractor/litv.py
Normal file
137
youtube_dl/extractor/litv.py
Normal file
@ -0,0 +1,137 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import json
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
ExtractorError,
|
||||||
|
int_or_none,
|
||||||
|
smuggle_url,
|
||||||
|
unsmuggle_url,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class LiTVIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://www\.litv\.tv/vod/[^/]+/content\.do\?.*?\bid=(?P<id>[^&]+)'
|
||||||
|
|
||||||
|
_URL_TEMPLATE = 'https://www.litv.tv/vod/%s/content.do?id=%s'
|
||||||
|
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://www.litv.tv/vod/drama/content.do?brc_id=root&id=VOD00041610&isUHEnabled=true&autoPlay=1',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'VOD00041606',
|
||||||
|
'title': '花千骨',
|
||||||
|
},
|
||||||
|
'playlist_count': 50,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.litv.tv/vod/drama/content.do?brc_id=root&id=VOD00041610&isUHEnabled=true&autoPlay=1',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'VOD00041610',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': '花千骨第1集',
|
||||||
|
'thumbnail': 're:https?://.*\.jpg$',
|
||||||
|
'description': 'md5:c7017aa144c87467c4fb2909c4b05d6f',
|
||||||
|
'episode_number': 1,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'noplaylist': True,
|
||||||
|
'skip_download': True, # m3u8 download
|
||||||
|
},
|
||||||
|
'skip': 'Georestricted to Taiwan',
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _extract_playlist(self, season_list, video_id, vod_data, view_data, prompt=True):
|
||||||
|
episode_title = view_data['title']
|
||||||
|
content_id = season_list['contentId']
|
||||||
|
|
||||||
|
if prompt:
|
||||||
|
self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (content_id, video_id))
|
||||||
|
|
||||||
|
all_episodes = [
|
||||||
|
self.url_result(smuggle_url(
|
||||||
|
self._URL_TEMPLATE % (view_data['contentType'], episode['contentId']),
|
||||||
|
{'force_noplaylist': True})) # To prevent infinite recursion
|
||||||
|
for episode in season_list['episode']]
|
||||||
|
|
||||||
|
return self.playlist_result(all_episodes, content_id, episode_title)
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
url, data = unsmuggle_url(url, {})
|
||||||
|
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
|
noplaylist = self._downloader.params.get('noplaylist')
|
||||||
|
noplaylist_prompt = True
|
||||||
|
if 'force_noplaylist' in data:
|
||||||
|
noplaylist = data['force_noplaylist']
|
||||||
|
noplaylist_prompt = False
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
|
view_data = dict(map(lambda t: (t[0], t[2]), re.findall(
|
||||||
|
r'viewData\.([a-zA-Z]+)\s*=\s*(["\'])([^"\']+)\2',
|
||||||
|
webpage)))
|
||||||
|
|
||||||
|
vod_data = self._parse_json(self._search_regex(
|
||||||
|
'var\s+vod\s*=\s*([^;]+)', webpage, 'VOD data', default='{}'),
|
||||||
|
video_id)
|
||||||
|
|
||||||
|
season_list = list(vod_data.get('seasonList', {}).values())
|
||||||
|
if season_list:
|
||||||
|
if not noplaylist:
|
||||||
|
return self._extract_playlist(
|
||||||
|
season_list[0], video_id, vod_data, view_data,
|
||||||
|
prompt=noplaylist_prompt)
|
||||||
|
|
||||||
|
if noplaylist_prompt:
|
||||||
|
self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
|
||||||
|
|
||||||
|
# In browsers `getMainUrl` request is always issued. Usually this
|
||||||
|
# endpoint gives the same result as the data embedded in the webpage.
|
||||||
|
# If georestricted, there are no embedded data, so an extra request is
|
||||||
|
# necessary to get the error code
|
||||||
|
video_data = self._parse_json(self._search_regex(
|
||||||
|
r'uiHlsUrl\s*=\s*testBackendData\(([^;]+)\);',
|
||||||
|
webpage, 'video data', default='{}'), video_id)
|
||||||
|
if not video_data:
|
||||||
|
payload = {
|
||||||
|
'assetId': view_data['assetId'],
|
||||||
|
'watchDevices': vod_data['watchDevices'],
|
||||||
|
'contentType': view_data['contentType'],
|
||||||
|
}
|
||||||
|
video_data = self._download_json(
|
||||||
|
'https://www.litv.tv/vod/getMainUrl', video_id,
|
||||||
|
data=json.dumps(payload).encode('utf-8'),
|
||||||
|
headers={'Content-Type': 'application/json'})
|
||||||
|
|
||||||
|
if not video_data.get('fullpath'):
|
||||||
|
error_msg = video_data.get('errorMessage')
|
||||||
|
if error_msg == 'vod.error.outsideregionerror':
|
||||||
|
self.raise_geo_restricted('This video is available in Taiwan only')
|
||||||
|
if error_msg:
|
||||||
|
raise ExtractorError('%s said: %s' % (self.IE_NAME, error_msg), expected=True)
|
||||||
|
raise ExtractorError('Unexpected result from %s' % self.IE_NAME)
|
||||||
|
|
||||||
|
formats = self._extract_m3u8_formats(
|
||||||
|
video_data['fullpath'], video_id, ext='mp4', m3u8_id='hls')
|
||||||
|
for a_format in formats:
|
||||||
|
# LiTV HLS segments doesn't like compressions
|
||||||
|
a_format.setdefault('http_headers', {})['Youtubedl-no-compression'] = True
|
||||||
|
|
||||||
|
title = view_data['title'] + view_data.get('secondaryMark', '')
|
||||||
|
description = view_data.get('description')
|
||||||
|
thumbnail = view_data.get('imageFile')
|
||||||
|
categories = [item['name'] for item in vod_data.get('category', [])]
|
||||||
|
episode = int_or_none(view_data.get('episode'))
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'formats': formats,
|
||||||
|
'title': title,
|
||||||
|
'description': description,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'categories': categories,
|
||||||
|
'episode_number': episode,
|
||||||
|
}
|
192
youtube_dl/extractor/microsoftvirtualacademy.py
Normal file
192
youtube_dl/extractor/microsoftvirtualacademy.py
Normal file
@ -0,0 +1,192 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..compat import (
|
||||||
|
compat_xpath,
|
||||||
|
)
|
||||||
|
from ..utils import (
|
||||||
|
int_or_none,
|
||||||
|
parse_duration,
|
||||||
|
smuggle_url,
|
||||||
|
unsmuggle_url,
|
||||||
|
xpath_text,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class MicrosoftVirtualAcademyBaseIE(InfoExtractor):
|
||||||
|
def _extract_base_url(self, course_id, display_id):
|
||||||
|
return self._download_json(
|
||||||
|
'https://api-mlxprod.microsoft.com/services/products/anonymous/%s' % course_id,
|
||||||
|
display_id, 'Downloading course base URL')
|
||||||
|
|
||||||
|
def _extract_chapter_and_title(self, title):
|
||||||
|
if not title:
|
||||||
|
return None, None
|
||||||
|
m = re.search(r'(?P<chapter>\d+)\s*\|\s*(?P<title>.+)', title)
|
||||||
|
return (int(m.group('chapter')), m.group('title')) if m else (None, title)
|
||||||
|
|
||||||
|
|
||||||
|
class MicrosoftVirtualAcademyIE(MicrosoftVirtualAcademyBaseIE):
|
||||||
|
IE_NAME = 'mva'
|
||||||
|
IE_DESC = 'Microsoft Virtual Academy videos'
|
||||||
|
_VALID_URL = r'(?:%s:|https?://(?:mva\.microsoft|(?:www\.)?microsoftvirtualacademy)\.com/[^/]+/training-courses/[^/?#&]+-)(?P<course_id>\d+)(?::|\?l=)(?P<id>[\da-zA-Z]+_\d+)' % IE_NAME
|
||||||
|
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://mva.microsoft.com/en-US/training-courses/microsoft-azure-fundamentals-virtual-machines-11788?l=gfVXISmEB_6804984382',
|
||||||
|
'md5': '7826c44fc31678b12ad8db11f6b5abb9',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'gfVXISmEB_6804984382',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Course Introduction',
|
||||||
|
'formats': 'mincount:3',
|
||||||
|
'subtitles': {
|
||||||
|
'en': [{
|
||||||
|
'ext': 'ttml',
|
||||||
|
}],
|
||||||
|
},
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
'url': 'mva:11788:gfVXISmEB_6804984382',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
url, smuggled_data = unsmuggle_url(url, {})
|
||||||
|
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
course_id = mobj.group('course_id')
|
||||||
|
video_id = mobj.group('id')
|
||||||
|
|
||||||
|
base_url = smuggled_data.get('base_url') or self._extract_base_url(course_id, video_id)
|
||||||
|
|
||||||
|
settings = self._download_xml(
|
||||||
|
'%s/content/content_%s/videosettings.xml?v=1' % (base_url, video_id),
|
||||||
|
video_id, 'Downloading video settings XML')
|
||||||
|
|
||||||
|
_, title = self._extract_chapter_and_title(xpath_text(
|
||||||
|
settings, './/Title', 'title', fatal=True))
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
|
||||||
|
for sources in settings.findall(compat_xpath('.//MediaSources')):
|
||||||
|
if sources.get('videoType') == 'smoothstreaming':
|
||||||
|
continue
|
||||||
|
for source in sources.findall(compat_xpath('./MediaSource')):
|
||||||
|
video_url = source.text
|
||||||
|
if not video_url or not video_url.startswith('http'):
|
||||||
|
continue
|
||||||
|
video_mode = source.get('videoMode')
|
||||||
|
height = int_or_none(self._search_regex(
|
||||||
|
r'^(\d+)[pP]$', video_mode or '', 'height', default=None))
|
||||||
|
codec = source.get('codec')
|
||||||
|
acodec, vcodec = [None] * 2
|
||||||
|
if codec:
|
||||||
|
codecs = codec.split(',')
|
||||||
|
if len(codecs) == 2:
|
||||||
|
acodec, vcodec = codecs
|
||||||
|
elif len(codecs) == 1:
|
||||||
|
vcodec = codecs[0]
|
||||||
|
formats.append({
|
||||||
|
'url': video_url,
|
||||||
|
'format_id': video_mode,
|
||||||
|
'height': height,
|
||||||
|
'acodec': acodec,
|
||||||
|
'vcodec': vcodec,
|
||||||
|
})
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
subtitles = {}
|
||||||
|
for source in settings.findall(compat_xpath('.//MarkerResourceSource')):
|
||||||
|
subtitle_url = source.text
|
||||||
|
if not subtitle_url:
|
||||||
|
continue
|
||||||
|
subtitles.setdefault('en', []).append({
|
||||||
|
'url': '%s/%s' % (base_url, subtitle_url),
|
||||||
|
'ext': source.get('type'),
|
||||||
|
})
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'subtitles': subtitles,
|
||||||
|
'formats': formats
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class MicrosoftVirtualAcademyCourseIE(MicrosoftVirtualAcademyBaseIE):
|
||||||
|
IE_NAME = 'mva:course'
|
||||||
|
IE_DESC = 'Microsoft Virtual Academy courses'
|
||||||
|
_VALID_URL = r'(?:%s:|https?://(?:mva\.microsoft|(?:www\.)?microsoftvirtualacademy)\.com/[^/]+/training-courses/(?P<display_id>[^/?#&]+)-)(?P<id>\d+)' % IE_NAME
|
||||||
|
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://mva.microsoft.com/en-US/training-courses/microsoft-azure-fundamentals-virtual-machines-11788',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '11788',
|
||||||
|
'title': 'Microsoft Azure Fundamentals: Virtual Machines',
|
||||||
|
},
|
||||||
|
'playlist_count': 36,
|
||||||
|
}, {
|
||||||
|
# with emphasized chapters
|
||||||
|
'url': 'https://mva.microsoft.com/en-US/training-courses/developing-windows-10-games-with-construct-2-16335',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '16335',
|
||||||
|
'title': 'Developing Windows 10 Games with Construct 2',
|
||||||
|
},
|
||||||
|
'playlist_count': 10,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.microsoftvirtualacademy.com/en-US/training-courses/microsoft-azure-fundamentals-virtual-machines-11788',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'mva:course:11788',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def suitable(cls, url):
|
||||||
|
return False if MicrosoftVirtualAcademyIE.suitable(url) else super(
|
||||||
|
MicrosoftVirtualAcademyCourseIE, cls).suitable(url)
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
course_id = mobj.group('id')
|
||||||
|
display_id = mobj.group('display_id')
|
||||||
|
|
||||||
|
base_url = self._extract_base_url(course_id, display_id)
|
||||||
|
|
||||||
|
manifest = self._download_json(
|
||||||
|
'%s/imsmanifestlite.json' % base_url,
|
||||||
|
display_id, 'Downloading course manifest JSON')['manifest']
|
||||||
|
|
||||||
|
organization = manifest['organizations']['organization'][0]
|
||||||
|
|
||||||
|
entries = []
|
||||||
|
for chapter in organization['item']:
|
||||||
|
chapter_number, chapter_title = self._extract_chapter_and_title(chapter.get('title'))
|
||||||
|
chapter_id = chapter.get('@identifier')
|
||||||
|
for item in chapter.get('item', []):
|
||||||
|
item_id = item.get('@identifier')
|
||||||
|
if not item_id:
|
||||||
|
continue
|
||||||
|
metadata = item.get('resource', {}).get('metadata') or {}
|
||||||
|
if metadata.get('learningresourcetype') != 'Video':
|
||||||
|
continue
|
||||||
|
_, title = self._extract_chapter_and_title(item.get('title'))
|
||||||
|
duration = parse_duration(metadata.get('duration'))
|
||||||
|
description = metadata.get('description')
|
||||||
|
entries.append({
|
||||||
|
'_type': 'url_transparent',
|
||||||
|
'url': smuggle_url(
|
||||||
|
'mva:%s:%s' % (course_id, item_id), {'base_url': base_url}),
|
||||||
|
'title': title,
|
||||||
|
'description': description,
|
||||||
|
'duration': duration,
|
||||||
|
'chapter': chapter_title,
|
||||||
|
'chapter_number': chapter_number,
|
||||||
|
'chapter_id': chapter_id,
|
||||||
|
})
|
||||||
|
|
||||||
|
title = organization.get('title') or manifest.get('metadata', {}).get('title')
|
||||||
|
|
||||||
|
return self.playlist_result(entries, course_id, title)
|
@ -2,14 +2,16 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import remove_end
|
from ..utils import (
|
||||||
|
determine_ext,
|
||||||
|
remove_end,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class TelegraafIE(InfoExtractor):
|
class TelegraafIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?telegraaf\.nl/tv/(?:[^/]+/)+(?P<id>\d+)/[^/]+\.html'
|
_VALID_URL = r'https?://(?:www\.)?telegraaf\.nl/tv/(?:[^/]+/)+(?P<id>\d+)/[^/]+\.html'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://www.telegraaf.nl/tv/nieuws/binnenland/24353229/__Tikibad_ontruimd_wegens_brand__.html',
|
'url': 'http://www.telegraaf.nl/tv/nieuws/binnenland/24353229/__Tikibad_ontruimd_wegens_brand__.html',
|
||||||
'md5': '83245a9779bcc4a24454bfd53c65b6dc',
|
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '24353229',
|
'id': '24353229',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
@ -18,18 +20,60 @@ class TelegraafIE(InfoExtractor):
|
|||||||
'thumbnail': 're:^https?://.*\.jpg$',
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
'duration': 33,
|
'duration': 33,
|
||||||
},
|
},
|
||||||
|
'params': {
|
||||||
|
# m3u8 download
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
playlist_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
webpage = self._download_webpage(url, playlist_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
|
player_url = self._html_search_regex(
|
||||||
|
r'<iframe[^>]+src="([^"]+")', webpage, 'player URL')
|
||||||
|
player_page = self._download_webpage(
|
||||||
|
player_url, video_id, note='Download player webpage')
|
||||||
playlist_url = self._search_regex(
|
playlist_url = self._search_regex(
|
||||||
r"iframe\.loadPlayer\('([^']+)'", webpage, 'player')
|
r'playlist\s*:\s*"([^"]+)"', player_page, 'playlist URL')
|
||||||
|
playlist_data = self._download_json(playlist_url, video_id)
|
||||||
|
|
||||||
|
item = playlist_data['items'][0]
|
||||||
|
formats = []
|
||||||
|
locations = item['locations']
|
||||||
|
for location in locations.get('adaptive', []):
|
||||||
|
manifest_url = location['src']
|
||||||
|
ext = determine_ext(manifest_url)
|
||||||
|
if ext == 'm3u8':
|
||||||
|
formats.extend(self._extract_m3u8_formats(
|
||||||
|
manifest_url, video_id, ext='mp4', m3u8_id='hls'))
|
||||||
|
elif ext == 'mpd':
|
||||||
|
# TODO: Current DASH formats are broken - $Time$ pattern in
|
||||||
|
# <SegmentTemplate> not implemented yet
|
||||||
|
continue
|
||||||
|
else:
|
||||||
|
self.report_warning('Unknown adaptive format %s' % ext)
|
||||||
|
for location in locations.get('progressive', []):
|
||||||
|
formats.append({
|
||||||
|
'url': location['sources'][0]['src'],
|
||||||
|
'width': location.get('width'),
|
||||||
|
'height': location.get('height'),
|
||||||
|
'format_id': 'http-%s' % location['label'],
|
||||||
|
})
|
||||||
|
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
entries = self._extract_xspf_playlist(playlist_url, playlist_id)
|
|
||||||
title = remove_end(self._og_search_title(webpage), ' - VIDEO')
|
title = remove_end(self._og_search_title(webpage), ' - VIDEO')
|
||||||
description = self._og_search_description(webpage)
|
description = self._og_search_description(webpage)
|
||||||
|
duration = item.get('duration')
|
||||||
|
thumbnail = item.get('poster')
|
||||||
|
|
||||||
return self.playlist_result(entries, playlist_id, title, description)
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'description': description,
|
||||||
|
'formats': formats,
|
||||||
|
'duration': duration,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user