Merge branch 'master' into GoogleDrive-issue-13619

This commit is contained in:
Parmjit Virk 2017-07-16 20:02:59 -05:00
commit 46982e74c7
15 changed files with 439 additions and 122 deletions

View File

@ -6,8 +6,8 @@
--- ---
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.07.09*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. ### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.07.15*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.07.09** - [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.07.15**
### Before submitting an *issue* make sure you have: ### Before submitting an *issue* make sure you have:
- [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
@ -35,7 +35,7 @@ $ youtube-dl -v <your command line>
[debug] User config: [] [debug] User config: []
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
[debug] youtube-dl version 2017.07.09 [debug] youtube-dl version 2017.07.15
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
[debug] Proxy map: {} [debug] Proxy map: {}

View File

@ -1,11 +1,25 @@
version <unreleased> version 2017.07.15
Core
* [YoutubeDL] Don't expand environment variables in meta fields (#13637)
Extractors Extractors
* [spiegeltv] Delegate extraction to nexx extractor (#13159)
+ [nexx] Add support for nexx.cloud (#10807, #13465)
* [generic] Fix rutube embeds extraction (#13641)
* [karrierevideos] Fix title extraction (#13641)
* [youtube] Don't capture YouTube Red ad for creator meta field (#13621)
* [slideshare] Fix extraction (#13617)
+ [5tv] Add another video URL pattern (#13354, #13606)
* [drtv] Make HLS and HDS extraction non fatal
* [ted] Fix subtitles extraction (#13628, #13629)
* [vine] Make sure the title won't be empty * [vine] Make sure the title won't be empty
+ [twitter] Support HLS streams in vmap URLs + [twitter] Support HLS streams in vmap URLs
+ [periscope] Support pscp.tv URLs in embedded frames + [periscope] Support pscp.tv URLs in embedded frames
* [twitter] Extract mp4 urls via mobile API (#12726)
* [niconico] Fix authentication error handling (#12486) * [niconico] Fix authentication error handling (#12486)
* [giantbomb] Extract m3u8 formats (#13626) * [giantbomb] Extract m3u8 formats (#13626)
+ [vlive:playlist] Add support for playlists (#13613)
version 2017.07.09 version 2017.07.09

View File

@ -521,6 +521,7 @@
- **NextMedia**: 蘋果日報 - **NextMedia**: 蘋果日報
- **NextMediaActionNews**: 蘋果日報 - 動新聞 - **NextMediaActionNews**: 蘋果日報 - 動新聞
- **NextTV**: 壹電視 - **NextTV**: 壹電視
- **Nexx**
- **nfb**: National Film Board of Canada - **nfb**: National Film Board of Canada
- **nfl.com** - **nfl.com**
- **NhkVod** - **NhkVod**
@ -942,6 +943,7 @@
- **vk:wallpost** - **vk:wallpost**
- **vlive** - **vlive**
- **vlive:channel** - **vlive:channel**
- **vlive:playlist**
- **Vodlocker** - **Vodlocker**
- **VODPl** - **VODPl**
- **VODPlatform** - **VODPlatform**

View File

@ -20,13 +20,14 @@ import re
import shutil import shutil
import subprocess import subprocess
import socket import socket
import string
import sys import sys
import time import time
import tokenize import tokenize
import traceback import traceback
import random import random
from string import ascii_letters
from .compat import ( from .compat import (
compat_basestring, compat_basestring,
compat_cookiejar, compat_cookiejar,
@ -679,7 +680,7 @@ class YoutubeDL(object):
# correspondingly that is not what we want since we need to keep # correspondingly that is not what we want since we need to keep
# '%%' intact for template dict substitution step. Working around # '%%' intact for template dict substitution step. Working around
# with boundary-alike separator hack. # with boundary-alike separator hack.
sep = ''.join([random.choice(string.ascii_letters) for _ in range(32)]) sep = ''.join([random.choice(ascii_letters) for _ in range(32)])
outtmpl = outtmpl.replace('%%', '%{0}%'.format(sep)).replace('$$', '${0}$'.format(sep)) outtmpl = outtmpl.replace('%%', '%{0}%'.format(sep)).replace('$$', '${0}$'.format(sep))
# outtmpl should be expand_path'ed before template dict substitution # outtmpl should be expand_path'ed before template dict substitution

View File

@ -107,11 +107,13 @@ class ADNIE(InfoExtractor):
metas = options.get('metas') or {} metas = options.get('metas') or {}
title = metas.get('title') or video_info['title'] title = metas.get('title') or video_info['title']
links = player_config.get('links') or {} links = player_config.get('links') or {}
error = None
if not links: if not links:
links_url = player_config['linksurl'] links_url = player_config['linksurl']
links_data = self._download_json(urljoin( links_data = self._download_json(urljoin(
self._BASE_URL, links_url), video_id) self._BASE_URL, links_url), video_id)
links = links_data.get('links') or {} links = links_data.get('links') or {}
error = links_data.get('error')
formats = [] formats = []
for format_id, qualities in links.items(): for format_id, qualities in links.items():
@ -130,7 +132,8 @@ class ADNIE(InfoExtractor):
for f in m3u8_formats: for f in m3u8_formats:
f['language'] = 'fr' f['language'] = 'fr'
formats.extend(m3u8_formats) formats.extend(m3u8_formats)
error = options.get('error') if not error:
error = options.get('error')
if not formats and error: if not formats and error:
raise ExtractorError('%s said: %s' % (self.IE_NAME, error), expected=True) raise ExtractorError('%s said: %s' % (self.IE_NAME, error), expected=True)
self._sort_formats(formats) self._sort_formats(formats)

View File

@ -730,12 +730,12 @@ class InfoExtractor(object):
video_info['title'] = video_title video_info['title'] = video_title
return video_info return video_info
def playlist_from_matches(self, matches, video_id, video_title, getter=None, ie=None): def playlist_from_matches(self, matches, playlist_id=None, playlist_title=None, getter=None, ie=None):
urlrs = orderedSet( urls = orderedSet(
self.url_result(self._proto_relative_url(getter(m) if getter else m), ie) self.url_result(self._proto_relative_url(getter(m) if getter else m), ie)
for m in matches) for m in matches)
return self.playlist_result( return self.playlist_result(
urlrs, playlist_id=video_id, playlist_title=video_title) urls, playlist_id=playlist_id, playlist_title=playlist_title)
@staticmethod @staticmethod
def playlist_result(entries, playlist_id=None, playlist_title=None, playlist_description=None): def playlist_result(entries, playlist_id=None, playlist_title=None, playlist_description=None):

View File

@ -510,7 +510,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
# webpage provide more accurate data than series_title from XML # webpage provide more accurate data than series_title from XML
series = self._html_search_regex( series = self._html_search_regex(
r'id=["\']showmedia_about_episode_num[^>]+>\s*<a[^>]+>([^<]+)', r'(?s)<h\d[^>]+\bid=["\']showmedia_about_episode_num[^>]+>(.+?)</h\d',
webpage, 'series', fatal=False) webpage, 'series', fatal=False)
season = xpath_text(metadata, 'series_title') season = xpath_text(metadata, 'series_title')
@ -518,7 +518,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
episode_number = int_or_none(xpath_text(metadata, 'episode_number')) episode_number = int_or_none(xpath_text(metadata, 'episode_number'))
season_number = int_or_none(self._search_regex( season_number = int_or_none(self._search_regex(
r'(?s)<h4[^>]+id=["\']showmedia_about_episode_num[^>]+>.+?</h4>\s*<h4>\s*Season (\d+)', r'(?s)<h\d[^>]+id=["\']showmedia_about_episode_num[^>]+>.+?</h\d>\s*<h4>\s*Season (\d+)',
webpage, 'season number', default=None)) webpage, 'season number', default=None))
return { return {

View File

@ -653,6 +653,10 @@ from .nextmedia import (
AppleDailyIE, AppleDailyIE,
NextTVIE, NextTVIE,
) )
from .nexx import (
NexxIE,
NexxEmbedIE,
)
from .nfb import NFBIE from .nfb import NFBIE
from .nfl import NFLIE from .nfl import NFLIE
from .nhk import NhkVodIE from .nhk import NhkVodIE
@ -761,6 +765,7 @@ from .pandoratv import PandoraTVIE
from .parliamentliveuk import ParliamentLiveUKIE from .parliamentliveuk import ParliamentLiveUKIE
from .patreon import PatreonIE from .patreon import PatreonIE
from .pbs import PBSIE from .pbs import PBSIE
from .pearvideo import PearVideoIE
from .people import PeopleIE from .people import PeopleIE
from .periscope import ( from .periscope import (
PeriscopeIE, PeriscopeIE,

View File

@ -36,6 +36,10 @@ from .brightcove import (
BrightcoveLegacyIE, BrightcoveLegacyIE,
BrightcoveNewIE, BrightcoveNewIE,
) )
from .nexx import (
NexxIE,
NexxEmbedIE,
)
from .nbc import NBCSportsVPlayerIE from .nbc import NBCSportsVPlayerIE
from .ooyala import OoyalaIE from .ooyala import OoyalaIE
from .rutv import RUTVIE from .rutv import RUTVIE
@ -1549,6 +1553,43 @@ class GenericIE(InfoExtractor):
}, },
'add_ie': ['BrightcoveLegacy'], 'add_ie': ['BrightcoveLegacy'],
}, },
# Nexx embed
{
'url': 'https://www.funk.net/serien/5940e15073f6120001657956/items/593efbb173f6120001657503',
'info_dict': {
'id': '247746',
'ext': 'mp4',
'title': "Yesterday's Jam (OV)",
'description': 'md5:09bc0984723fed34e2581624a84e05f0',
'timestamp': 1492594816,
'upload_date': '20170419',
},
'params': {
'format': 'bestvideo',
'skip_download': True,
},
},
# Nexx iFrame embed
{
'url': 'http://www.spiegel.de/sptv/spiegeltv/spiegel-tv-ueber-schnellste-katapult-achterbahn-der-welt-taron-a-1137884.html',
'info_dict': {
'id': '161464',
'ext': 'mp4',
'title': 'Nervenkitzel Achterbahn',
'alt_title': 'Karussellbauer in Deutschland',
'description': 'md5:ffe7b1cc59a01f585e0569949aef73cc',
'release_year': 2005,
'creator': 'SPIEGEL TV',
'thumbnail': r're:^https?://.*\.jpg$',
'duration': 2761,
'timestamp': 1394021479,
'upload_date': '20140305',
},
'params': {
'format': 'bestvideo',
'skip_download': True,
},
},
# Facebook <iframe> embed # Facebook <iframe> embed
{ {
'url': 'https://www.hostblogger.de/blog/archives/6181-Auto-jagt-Betonmischer.html', 'url': 'https://www.hostblogger.de/blog/archives/6181-Auto-jagt-Betonmischer.html',
@ -2133,6 +2174,16 @@ class GenericIE(InfoExtractor):
if bc_urls: if bc_urls:
return self.playlist_from_matches(bc_urls, video_id, video_title, ie='BrightcoveNew') return self.playlist_from_matches(bc_urls, video_id, video_title, ie='BrightcoveNew')
# Look for Nexx embeds
nexx_urls = NexxIE._extract_urls(webpage)
if nexx_urls:
return self.playlist_from_matches(nexx_urls, video_id, video_title, ie=NexxIE.ie_key())
# Look for Nexx iFrame embeds
nexx_embed_urls = NexxEmbedIE._extract_urls(webpage)
if nexx_embed_urls:
return self.playlist_from_matches(nexx_embed_urls, video_id, video_title, ie=NexxEmbedIE.ie_key())
# Look for ThePlatform embeds # Look for ThePlatform embeds
tp_urls = ThePlatformIE._extract_urls(webpage) tp_urls = ThePlatformIE._extract_urls(webpage)
if tp_urls: if tp_urls:

View File

@ -0,0 +1,271 @@
# coding: utf-8
from __future__ import unicode_literals
import hashlib
import random
import re
import time
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
ExtractorError,
int_or_none,
parse_duration,
try_get,
urlencode_postdata,
)
class NexxIE(InfoExtractor):
_VALID_URL = r'https?://api\.nexx(?:\.cloud|cdn\.com)/v3/(?P<domain_id>\d+)/videos/byid/(?P<id>\d+)'
_TESTS = [{
# movie
'url': 'https://api.nexx.cloud/v3/748/videos/byid/128907',
'md5': '16746bfc28c42049492385c989b26c4a',
'info_dict': {
'id': '128907',
'ext': 'mp4',
'title': 'Stiftung Warentest',
'alt_title': 'Wie ein Test abläuft',
'description': 'md5:d1ddb1ef63de721132abd38639cc2fd2',
'release_year': 2013,
'creator': 'SPIEGEL TV',
'thumbnail': r're:^https?://.*\.jpg$',
'duration': 2509,
'timestamp': 1384264416,
'upload_date': '20131112',
},
'params': {
'format': 'bestvideo',
},
}, {
# episode
'url': 'https://api.nexx.cloud/v3/741/videos/byid/247858',
'info_dict': {
'id': '247858',
'ext': 'mp4',
'title': 'Return of the Golden Child (OV)',
'description': 'md5:5d969537509a92b733de21bae249dc63',
'release_year': 2017,
'thumbnail': r're:^https?://.*\.jpg$',
'duration': 1397,
'timestamp': 1495033267,
'upload_date': '20170517',
'episode_number': 2,
'season_number': 2,
},
'params': {
'format': 'bestvideo',
'skip_download': True,
},
}, {
'url': 'https://api.nexxcdn.com/v3/748/videos/byid/128907',
'only_matching': True,
}]
@staticmethod
def _extract_urls(webpage):
# Reference:
# 1. https://nx-s.akamaized.net/files/201510/44.pdf
entries = []
# JavaScript Integration
mobj = re.search(
r'<script\b[^>]+\bsrc=["\']https?://require\.nexx(?:\.cloud|cdn\.com)/(?P<id>\d+)',
webpage)
if mobj:
domain_id = mobj.group('id')
for video_id in re.findall(
r'(?is)onPLAYReady.+?_play\.init\s*\(.+?\s*,\s*["\']?(\d+)',
webpage):
entries.append(
'https://api.nexx.cloud/v3/%s/videos/byid/%s'
% (domain_id, video_id))
# TODO: support more embed formats
return entries
@staticmethod
def _extract_url(webpage):
return NexxIE._extract_urls(webpage)[0]
def _handle_error(self, response):
status = int_or_none(try_get(
response, lambda x: x['metadata']['status']) or 200)
if 200 <= status < 300:
return
raise ExtractorError(
'%s said: %s' % (self.IE_NAME, response['metadata']['errorhint']),
expected=True)
def _call_api(self, domain_id, path, video_id, data=None, headers={}):
headers['Content-Type'] = 'application/x-www-form-urlencoded; charset=UTF-8'
result = self._download_json(
'https://api.nexx.cloud/v3/%s/%s' % (domain_id, path), video_id,
'Downloading %s JSON' % path, data=urlencode_postdata(data),
headers=headers)
self._handle_error(result)
return result['result']
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
domain_id, video_id = mobj.group('domain_id', 'id')
# Reverse engineered from JS code (see getDeviceID function)
device_id = '%d:%d:%d%d' % (
random.randint(1, 4), int(time.time()),
random.randint(1e4, 99999), random.randint(1, 9))
result = self._call_api(domain_id, 'session/init', video_id, data={
'nxp_devh': device_id,
'nxp_userh': '',
'precid': '0',
'playlicense': '0',
'screenx': '1920',
'screeny': '1080',
'playerversion': '6.0.00',
'gateway': 'html5',
'adGateway': '',
'explicitlanguage': 'en-US',
'addTextTemplates': '1',
'addDomainData': '1',
'addAdModel': '1',
}, headers={
'X-Request-Enable-Auth-Fallback': '1',
})
cid = result['general']['cid']
# As described in [1] X-Request-Token generation algorithm is
# as follows:
# md5( operation + domain_id + domain_secret )
# where domain_secret is a static value that will be given by nexx.tv
# as per [1]. Here is how this "secret" is generated (reversed
# from _play.api.init function, search for clienttoken). So it's
# actually not static and not that much of a secret.
# 1. https://nexxtvstorage.blob.core.windows.net/files/201610/27.pdf
secret = result['device']['clienttoken'][int(device_id[0]):]
secret = secret[0:len(secret) - int(device_id[-1])]
op = 'byid'
# Reversed from JS code for _play.api.call function (search for
# X-Request-Token)
request_token = hashlib.md5(
''.join((op, domain_id, secret)).encode('utf-8')).hexdigest()
video = self._call_api(
domain_id, 'videos/%s/%s' % (op, video_id), video_id, data={
'additionalfields': 'language,channel,actors,studio,licenseby,slug,subtitle,teaser,description',
'addInteractionOptions': '1',
'addStatusDetails': '1',
'addStreamDetails': '1',
'addCaptions': '1',
'addScenes': '1',
'addHotSpots': '1',
'addBumpers': '1',
'captionFormat': 'data',
}, headers={
'X-Request-CID': cid,
'X-Request-Token': request_token,
})
general = video['general']
title = general['title']
stream_data = video['streamdata']
language = general.get('language_raw') or ''
# TODO: reverse more cdns and formats
cdn = stream_data['cdnType']
assert cdn == 'azure'
azure_locator = stream_data['azureLocator']
AZURE_URL = 'http://nx-p%02d.akamaized.net/'
for secure in ('s', ''):
cdn_shield = stream_data.get('cdnShieldHTTP%s' % secure.upper())
if cdn_shield:
azure_base = 'http%s://%s' % (secure, cdn_shield)
break
else:
azure_base = AZURE_URL % int(stream_data['azureAccount'].replace('nexxplayplus', ''))
is_ml = ',' in language
azure_m3u8_url = '%s%s/%s_src%s.ism/Manifest(format=m3u8-aapl)' % (
azure_base, azure_locator, video_id, ('_manifest' if is_ml else ''))
protection_token = try_get(
video, lambda x: x['protectiondata']['token'], compat_str)
if protection_token:
azure_m3u8_url += '?hdnts=%s' % protection_token
formats = self._extract_m3u8_formats(
azure_m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native',
m3u8_id='%s-hls' % cdn)
self._sort_formats(formats)
return {
'id': video_id,
'title': title,
'alt_title': general.get('subtitle'),
'description': general.get('description'),
'release_year': int_or_none(general.get('year')),
'creator': general.get('studio') or general.get('studio_adref'),
'thumbnail': try_get(
video, lambda x: x['imagedata']['thumb'], compat_str),
'duration': parse_duration(general.get('runtime')),
'timestamp': int_or_none(general.get('uploaded')),
'episode_number': int_or_none(try_get(
video, lambda x: x['episodedata']['episode'])),
'season_number': int_or_none(try_get(
video, lambda x: x['episodedata']['season'])),
'formats': formats,
}
class NexxEmbedIE(InfoExtractor):
_VALID_URL = r'https?://embed\.nexx(?:\.cloud|cdn\.com)/\d+/(?P<id>[^/?#&]+)'
_TEST = {
'url': 'http://embed.nexx.cloud/748/KC1614647Z27Y7T?autoplay=1',
'md5': '16746bfc28c42049492385c989b26c4a',
'info_dict': {
'id': '161464',
'ext': 'mp4',
'title': 'Nervenkitzel Achterbahn',
'alt_title': 'Karussellbauer in Deutschland',
'description': 'md5:ffe7b1cc59a01f585e0569949aef73cc',
'release_year': 2005,
'creator': 'SPIEGEL TV',
'thumbnail': r're:^https?://.*\.jpg$',
'duration': 2761,
'timestamp': 1394021479,
'upload_date': '20140305',
},
'params': {
'format': 'bestvideo',
'skip_download': True,
},
}
@staticmethod
def _extract_urls(webpage):
# Reference:
# 1. https://nx-s.akamaized.net/files/201510/44.pdf
# iFrame Embed Integration
return [mobj.group('url') for mobj in re.finditer(
r'<iframe[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//embed\.nexx(?:\.cloud|cdn\.com)/\d+/(?:(?!\1).)+)\1',
webpage)]
def _real_extract(self, url):
embed_id = self._match_id(url)
webpage = self._download_webpage(url, embed_id)
return self.url_result(NexxIE._extract_url(webpage), ie=NexxIE.ie_key())

View File

@ -0,0 +1,63 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
qualities,
unified_timestamp,
)
class PearVideoIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?pearvideo\.com/video_(?P<id>\d+)'
_TEST = {
'url': 'http://www.pearvideo.com/video_1076290',
'info_dict': {
'id': '1076290',
'ext': 'mp4',
'title': '小浣熊在主人家玻璃上滚石头:没砸',
'description': 'md5:01d576b747de71be0ee85eb7cac25f9d',
'timestamp': 1494275280,
'upload_date': '20170508',
}
}
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
quality = qualities(
('ldflv', 'ld', 'sdflv', 'sd', 'hdflv', 'hd', 'src'))
formats = [{
'url': mobj.group('url'),
'format_id': mobj.group('id'),
'quality': quality(mobj.group('id')),
} for mobj in re.finditer(
r'(?P<id>[a-zA-Z]+)Url\s*=\s*(["\'])(?P<url>(?:https?:)?//.+?)\2',
webpage)]
self._sort_formats(formats)
title = self._search_regex(
(r'<h1[^>]+\bclass=(["\'])video-tt\1[^>]*>(?P<value>[^<]+)',
r'<[^>]+\bdata-title=(["\'])(?P<value>(?:(?!\1).)+)\1'),
webpage, 'title', group='value')
description = self._search_regex(
(r'<div[^>]+\bclass=(["\'])summary\1[^>]*>(?P<value>[^<]+)',
r'<[^>]+\bdata-summary=(["\'])(?P<value>(?:(?!\1).)+)\1'),
webpage, 'description', default=None,
group='value') or self._html_search_meta('Description', webpage)
timestamp = unified_timestamp(self._search_regex(
r'<div[^>]+\bclass=["\']date["\'][^>]*>([^<]+)',
webpage, 'timestamp', fatal=False))
return {
'id': video_id,
'title': title,
'description': description,
'timestamp': timestamp,
'formats': formats,
}

View File

@ -4,6 +4,7 @@ from __future__ import unicode_literals
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from .nexx import NexxEmbedIE
from .spiegeltv import SpiegeltvIE from .spiegeltv import SpiegeltvIE
from ..compat import compat_urlparse from ..compat import compat_urlparse
from ..utils import ( from ..utils import (
@ -143,6 +144,9 @@ class SpiegelArticleIE(InfoExtractor):
entries = [ entries = [
self.url_result(compat_urlparse.urljoin( self.url_result(compat_urlparse.urljoin(
self.http_scheme() + '//spiegel.de/', embed_path)) self.http_scheme() + '//spiegel.de/', embed_path))
for embed_path in embeds for embed_path in embeds]
] if embeds:
return self.playlist_result(entries) return self.playlist_result(entries)
return self.playlist_from_matches(
NexxEmbedIE._extract_urls(webpage), ie=NexxEmbedIE.ie_key())

View File

@ -1,114 +1,17 @@
# coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_urllib_parse_urlparse from .nexx import NexxIE
from ..utils import (
determine_ext,
float_or_none,
)
class SpiegeltvIE(InfoExtractor): class SpiegeltvIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?spiegel\.tv/(?:#/)?filme/(?P<id>[\-a-z0-9]+)' _VALID_URL = r'https?://(?:www\.)?spiegel\.tv/videos/(?P<id>\d+)'
_TESTS = [{ _TEST = {
'url': 'http://www.spiegel.tv/filme/flug-mh370/', 'url': 'http://www.spiegel.tv/videos/161681-flug-mh370/',
'info_dict': {
'id': 'flug-mh370',
'ext': 'm4v',
'title': 'Flug MH370',
'description': 'Das Rätsel um die Boeing 777 der Malaysia-Airlines',
'thumbnail': r're:http://.*\.jpg$',
},
'params': {
# m3u8 download
'skip_download': True,
}
}, {
'url': 'http://www.spiegel.tv/#/filme/alleskino-die-wahrheit-ueber-maenner/',
'only_matching': True, 'only_matching': True,
}] }
def _real_extract(self, url): def _real_extract(self, url):
if '/#/' in url: return self.url_result(
url = url.replace('/#/', '/') 'https://api.nexx.cloud/v3/748/videos/byid/%s'
video_id = self._match_id(url) % self._match_id(url), ie=NexxIE.ie_key())
webpage = self._download_webpage(url, video_id)
title = self._html_search_regex(r'<h1.*?>(.*?)</h1>', webpage, 'title')
apihost = 'http://spiegeltv-ivms2-restapi.s3.amazonaws.com'
version_json = self._download_json(
'%s/version.json' % apihost, video_id,
note='Downloading version information')
version_name = version_json['version_name']
slug_json = self._download_json(
'%s/%s/restapi/slugs/%s.json' % (apihost, version_name, video_id),
video_id,
note='Downloading object information')
oid = slug_json['object_id']
media_json = self._download_json(
'%s/%s/restapi/media/%s.json' % (apihost, version_name, oid),
video_id, note='Downloading media information')
uuid = media_json['uuid']
is_wide = media_json['is_wide']
server_json = self._download_json(
'http://spiegeltv-prod-static.s3.amazonaws.com/projectConfigs/projectConfig.json',
video_id, note='Downloading server information')
format = '16x9' if is_wide else '4x3'
formats = []
for streamingserver in server_json['streamingserver']:
endpoint = streamingserver.get('endpoint')
if not endpoint:
continue
play_path = 'mp4:%s_spiegeltv_0500_%s.m4v' % (uuid, format)
if endpoint.startswith('rtmp'):
formats.append({
'url': endpoint,
'format_id': 'rtmp',
'app': compat_urllib_parse_urlparse(endpoint).path[1:],
'play_path': play_path,
'player_path': 'http://prod-static.spiegel.tv/frontend-076.swf',
'ext': 'flv',
'rtmp_live': True,
})
elif determine_ext(endpoint) == 'm3u8':
formats.append({
'url': endpoint.replace('[video]', play_path),
'ext': 'm4v',
'format_id': 'hls', # Prefer hls since it allows to workaround georestriction
'protocol': 'm3u8',
'preference': 1,
'http_headers': {
'Accept-Encoding': 'deflate', # gzip causes trouble on the server side
},
})
else:
formats.append({
'url': endpoint,
})
self._check_formats(formats, video_id)
thumbnails = []
for image in media_json['images']:
thumbnails.append({
'url': image['url'],
'width': image['width'],
'height': image['height'],
})
description = media_json['subtitle']
duration = float_or_none(media_json.get('duration_in_ms'), scale=1000)
return {
'id': video_id,
'title': title,
'description': description,
'duration': duration,
'thumbnails': thumbnails,
'formats': formats,
}

View File

@ -1660,7 +1660,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
<a[^>]* <a[^>]*
(?: (?:
\bhref=["\']/red[^>]*>| # drop possible \bhref=["\']/red[^>]*>| # drop possible
>\s*Listen ad-free with YouTube Red # YouTube Red ad >\s*Listen ad-free with YouTube Red # YouTube Red ad
) )
.*? .*?
)?</li )?</li

View File

@ -1,3 +1,3 @@
from __future__ import unicode_literals from __future__ import unicode_literals
__version__ = '2017.07.09' __version__ = '2017.07.15'