Merge remote-tracking branch 'upstream/master'

This commit is contained in:
kidburglar 2017-01-21 11:04:19 +01:00
commit 73e91ff8ac
21 changed files with 294 additions and 126 deletions

View File

@ -6,8 +6,8 @@
--- ---
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.01.14*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. ### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.01.18*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.01.14** - [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.01.18**
### Before submitting an *issue* make sure you have: ### Before submitting an *issue* make sure you have:
- [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
@ -35,7 +35,7 @@ $ youtube-dl -v <your command line>
[debug] User config: [] [debug] User config: []
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
[debug] youtube-dl version 2017.01.14 [debug] youtube-dl version 2017.01.18
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
[debug] Proxy map: {} [debug] Proxy map: {}

View File

@ -1,7 +1,38 @@
version <unreleased> version <unreleased>
Extractors Extractors
+ [cspan] Support Ustream embedded videos (#11547)
version 2017.01.18
Extractors
* [bilibili] Fix extraction (#11077)
+ [canalplus] Add fallback for video id (#11764)
* [20min] Fix extraction (#11683, #11751)
* [imdb] Extend URL regular expression (#11744)
+ [naver] Add support for tv.naver.com links (#11743)
version 2017.01.16
Core
* [options] Apply custom config to final composite configuration (#11741)
* [YoutubeDL] Improve protocol auto determining (#11720)
Extractors
* [xiami] Relax URL regular expressions
* [xiami] Improve track metadata extraction (#11699)
+ [limelight] Check hand-make direct HTTP links
+ [limelight] Add support for direct HTTP links at video.llnw.net (#11737)
+ [brightcove] Recognize another player ID pattern (#11688)
+ [niconico] Support login via cookies (#7968) + [niconico] Support login via cookies (#7968)
* [yourupload] Fix extraction (#11601)
+ [beam:live] Add support for beam.pro live streams (#10702, #11596)
* [vevo] Improve geo restriction detection
+ [dramafever] Add support for URLs with language code (#11714)
* [cbc] Improve playlist support (#11704)
version 2017.01.14 version 2017.01.14

View File

@ -86,6 +86,7 @@
- **bbc.co.uk:article**: BBC articles - **bbc.co.uk:article**: BBC articles
- **bbc.co.uk:iplayer:playlist** - **bbc.co.uk:iplayer:playlist**
- **bbc.co.uk:playlist** - **bbc.co.uk:playlist**
- **Beam:live**
- **Beatport** - **Beatport**
- **Beeg** - **Beeg**
- **BehindKink** - **BehindKink**

View File

@ -34,8 +34,8 @@ class BiliBiliIE(InfoExtractor):
}, },
} }
_APP_KEY = '6f90a59ac58a4123' _APP_KEY = '84956560bc028eb7'
_BILIBILI_KEY = '0bfd84cc3940035173f35e6777508326' _BILIBILI_KEY = '94aba54af9065f71de72f5508f1cd42e'
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)

View File

@ -179,7 +179,7 @@ class BrightcoveLegacyIE(InfoExtractor):
params = {} params = {}
playerID = find_param('playerID') playerID = find_param('playerID') or find_param('playerId')
if playerID is None: if playerID is None:
raise ExtractorError('Cannot find player ID') raise ExtractorError('Cannot find player ID')
params['playerID'] = playerID params['playerID'] = playerID

View File

@ -107,7 +107,7 @@ class CanalplusIE(InfoExtractor):
[r'<canal:player[^>]+?videoId=(["\'])(?P<id>\d+)', [r'<canal:player[^>]+?videoId=(["\'])(?P<id>\d+)',
r'id=["\']canal_video_player(?P<id>\d+)', r'id=["\']canal_video_player(?P<id>\d+)',
r'data-video=["\'](?P<id>\d+)'], r'data-video=["\'](?P<id>\d+)'],
webpage, 'video id', group='id') webpage, 'video id', default=mobj.group('vid'), group='id')
info_url = self._VIDEO_INFO_TEMPLATE % (site_id, video_id) info_url = self._VIDEO_INFO_TEMPLATE % (site_id, video_id)
video_data = self._download_json(info_url, video_id, 'Downloading video JSON') video_data = self._download_json(info_url, video_id, 'Downloading video JSON')

View File

@ -12,6 +12,7 @@ from ..utils import (
ExtractorError, ExtractorError,
) )
from .senateisvp import SenateISVPIE from .senateisvp import SenateISVPIE
from .ustream import UstreamIE
class CSpanIE(InfoExtractor): class CSpanIE(InfoExtractor):
@ -22,14 +23,13 @@ class CSpanIE(InfoExtractor):
'md5': '94b29a4f131ff03d23471dd6f60b6a1d', 'md5': '94b29a4f131ff03d23471dd6f60b6a1d',
'info_dict': { 'info_dict': {
'id': '315139', 'id': '315139',
'ext': 'mp4',
'title': 'Attorney General Eric Holder on Voting Rights Act Decision', 'title': 'Attorney General Eric Holder on Voting Rights Act Decision',
'description': 'Attorney General Eric Holder speaks to reporters following the Supreme Court decision in [Shelby County v. Holder], in which the court ruled that the preclearance provisions of the Voting Rights Act could not be enforced.',
}, },
'playlist_mincount': 2,
'skip': 'Regularly fails on travis, for unknown reasons', 'skip': 'Regularly fails on travis, for unknown reasons',
}, { }, {
'url': 'http://www.c-span.org/video/?c4486943/cspan-international-health-care-models', 'url': 'http://www.c-span.org/video/?c4486943/cspan-international-health-care-models',
'md5': '8e5fbfabe6ad0f89f3012a7943c1287b', # md5 is unstable
'info_dict': { 'info_dict': {
'id': 'c4486943', 'id': 'c4486943',
'ext': 'mp4', 'ext': 'mp4',
@ -38,14 +38,11 @@ class CSpanIE(InfoExtractor):
} }
}, { }, {
'url': 'http://www.c-span.org/video/?318608-1/gm-ignition-switch-recall', 'url': 'http://www.c-span.org/video/?318608-1/gm-ignition-switch-recall',
'md5': '2ae5051559169baadba13fc35345ae74',
'info_dict': { 'info_dict': {
'id': '342759', 'id': '342759',
'ext': 'mp4',
'title': 'General Motors Ignition Switch Recall', 'title': 'General Motors Ignition Switch Recall',
'duration': 14848,
'description': 'md5:118081aedd24bf1d3b68b3803344e7f3'
}, },
'playlist_mincount': 6,
}, { }, {
# Video from senate.gov # Video from senate.gov
'url': 'http://www.c-span.org/video/?104517-1/immigration-reforms-needed-protect-skilled-american-workers', 'url': 'http://www.c-span.org/video/?104517-1/immigration-reforms-needed-protect-skilled-american-workers',
@ -57,12 +54,30 @@ class CSpanIE(InfoExtractor):
'params': { 'params': {
'skip_download': True, # m3u8 downloads 'skip_download': True, # m3u8 downloads
} }
}, {
# Ustream embedded video
'url': 'https://www.c-span.org/video/?114917-1/armed-services',
'info_dict': {
'id': '58428542',
'ext': 'flv',
'title': 'USHR07 Armed Services Committee',
'description': 'hsas00-2118-20150204-1000et-07\n\n\nUSHR07 Armed Services Committee',
'timestamp': 1423060374,
'upload_date': '20150204',
'uploader': 'HouseCommittee',
'uploader_id': '12987475',
},
}] }]
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
video_type = None video_type = None
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
ustream_url = UstreamIE._extract_url(webpage)
if ustream_url:
return self.url_result(ustream_url, UstreamIE.ie_key())
# We first look for clipid, because clipprog always appears before # We first look for clipid, because clipprog always appears before
patterns = [r'id=\'clip(%s)\'\s*value=\'([0-9]+)\'' % t for t in ('id', 'prog')] patterns = [r'id=\'clip(%s)\'\s*value=\'([0-9]+)\'' % t for t in ('id', 'prog')]
results = list(filter(None, (re.search(p, webpage) for p in patterns))) results = list(filter(None, (re.search(p, webpage) for p in patterns)))

View File

@ -86,18 +86,43 @@ class FirstTVIE(InfoExtractor):
title = item['title'] title = item['title']
quality = qualities(QUALITIES) quality = qualities(QUALITIES)
formats = [] formats = []
path = None
for f in item.get('mbr', []): for f in item.get('mbr', []):
src = f.get('src') src = f.get('src')
if not src or not isinstance(src, compat_str): if not src or not isinstance(src, compat_str):
continue continue
tbr = int_or_none(self._search_regex( tbr = int_or_none(self._search_regex(
r'_(\d{3,})\.mp4', src, 'tbr', default=None)) r'_(\d{3,})\.mp4', src, 'tbr', default=None))
if not path:
path = self._search_regex(
r'//[^/]+/(.+?)_\d+\.mp4', src,
'm3u8 path', default=None)
formats.append({ formats.append({
'url': src, 'url': src,
'format_id': f.get('name'), 'format_id': f.get('name'),
'tbr': tbr, 'tbr': tbr,
'quality': quality(f.get('name')), 'source_preference': quality(f.get('name')),
}) })
# m3u8 URL format is reverse engineered from [1] (search for
# master.m3u8). dashEdges (that is currently balancer-vod.1tv.ru)
# is taken from [2].
# 1. http://static.1tv.ru/player/eump1tv-current/eump-1tv.all.min.js?rnd=9097422834:formatted
# 2. http://static.1tv.ru/player/eump1tv-config/config-main.js?rnd=9097422834
if not path and len(formats) == 1:
path = self._search_regex(
r'//[^/]+/(.+?$)', formats[0]['url'],
'm3u8 path', default=None)
if path:
if len(formats) == 1:
m3u8_path = ','
else:
tbrs = [compat_str(t) for t in sorted(f['tbr'] for f in formats)]
m3u8_path = '_,%s,%s' % (','.join(tbrs), '.mp4')
formats.extend(self._extract_m3u8_formats(
'http://balancer-vod.1tv.ru/%s%s.urlset/master.m3u8'
% (path, m3u8_path),
display_id, 'mp4',
entry_protocol='m3u8_native', m3u8_id='hls', fatal=False))
self._sort_formats(formats) self._sort_formats(formats)
thumbnail = item.get('poster') or self._og_search_thumbnail(webpage) thumbnail = item.get('poster') or self._og_search_thumbnail(webpage)

View File

@ -81,7 +81,7 @@ class FlipagramIE(InfoExtractor):
'filesize': int_or_none(cover.get('size')), 'filesize': int_or_none(cover.get('size')),
} for cover in flipagram.get('covers', []) if cover.get('url')] } for cover in flipagram.get('covers', []) if cover.get('url')]
# Note that this only retrieves comments that are initally loaded. # Note that this only retrieves comments that are initially loaded.
# For videos with large amounts of comments, most won't be retrieved. # For videos with large amounts of comments, most won't be retrieved.
comments = [] comments = []
for comment in video_data.get('comments', {}).get(video_id, {}).get('items', []): for comment in video_data.get('comments', {}).get(video_id, {}).get('items', []):

View File

@ -78,6 +78,8 @@ from .vbox7 import Vbox7IE
from .dbtv import DBTVIE from .dbtv import DBTVIE
from .piksel import PikselIE from .piksel import PikselIE
from .videa import VideaIE from .videa import VideaIE
from .twentymin import TwentyMinutenIE
from .ustream import UstreamIE
class GenericIE(InfoExtractor): class GenericIE(InfoExtractor):
@ -422,6 +424,26 @@ class GenericIE(InfoExtractor):
'skip_download': True, # m3u8 download 'skip_download': True, # m3u8 download
}, },
}, },
{
# Brightcove with alternative playerID key
'url': 'http://www.nature.com/nmeth/journal/v9/n7/fig_tab/nmeth.2062_SV1.html',
'info_dict': {
'id': 'nmeth.2062_SV1',
'title': 'Simultaneous multiview imaging of the Drosophila syncytial blastoderm : Quantitative high-speed imaging of entire developing embryos with simultaneous multiview light-sheet microscopy : Nature Methods : Nature Research',
},
'playlist': [{
'info_dict': {
'id': '2228375078001',
'ext': 'mp4',
'title': 'nmeth.2062-sv1',
'description': 'nmeth.2062-sv1',
'timestamp': 1363357591,
'upload_date': '20130315',
'uploader': 'Nature Publishing Group',
'uploader_id': '1964492299001',
},
}],
},
# ooyala video # ooyala video
{ {
'url': 'http://www.rollingstone.com/music/videos/norwegian-dj-cashmere-cat-goes-spartan-on-with-me-premiere-20131219', 'url': 'http://www.rollingstone.com/music/videos/norwegian-dj-cashmere-cat-goes-spartan-on-with-me-premiere-20131219',
@ -567,17 +589,6 @@ class GenericIE(InfoExtractor):
'description': 'md5:8145d19d320ff3e52f28401f4c4283b9', 'description': 'md5:8145d19d320ff3e52f28401f4c4283b9',
} }
}, },
# Embedded Ustream video
{
'url': 'http://www.american.edu/spa/pti/nsa-privacy-janus-2014.cfm',
'md5': '27b99cdb639c9b12a79bca876a073417',
'info_dict': {
'id': '45734260',
'ext': 'flv',
'uploader': 'AU SPA: The NSA and Privacy',
'title': 'NSA and Privacy Forum Debate featuring General Hayden and Barton Gellman'
}
},
# nowvideo embed hidden behind percent encoding # nowvideo embed hidden behind percent encoding
{ {
'url': 'http://www.waoanime.tv/the-super-dimension-fortress-macross-episode-1/', 'url': 'http://www.waoanime.tv/the-super-dimension-fortress-macross-episode-1/',
@ -1448,6 +1459,20 @@ class GenericIE(InfoExtractor):
}, },
'playlist_mincount': 2, 'playlist_mincount': 2,
}, },
{
# 20 minuten embed
'url': 'http://www.20min.ch/schweiz/news/story/So-kommen-Sie-bei-Eis-und-Schnee-sicher-an-27032552',
'info_dict': {
'id': '523629',
'ext': 'mp4',
'title': 'So kommen Sie bei Eis und Schnee sicher an',
'description': 'md5:117c212f64b25e3d95747e5276863f7d',
},
'params': {
'skip_download': True,
},
'add_ie': [TwentyMinutenIE.ie_key()],
}
# { # {
# # TODO: find another test # # TODO: find another test
# # http://schema.org/VideoObject # # http://schema.org/VideoObject
@ -2077,10 +2102,9 @@ class GenericIE(InfoExtractor):
return self.url_result(mobj.group('url'), 'TED') return self.url_result(mobj.group('url'), 'TED')
# Look for embedded Ustream videos # Look for embedded Ustream videos
mobj = re.search( ustream_url = UstreamIE._extract_url(webpage)
r'<iframe[^>]+?src=(["\'])(?P<url>http://www\.ustream\.tv/embed/.+?)\1', webpage) if ustream_url:
if mobj is not None: return self.url_result(ustream_url, UstreamIE.ie_key())
return self.url_result(mobj.group('url'), 'Ustream')
# Look for embedded arte.tv player # Look for embedded arte.tv player
mobj = re.search( mobj = re.search(
@ -2401,6 +2425,12 @@ class GenericIE(InfoExtractor):
if videa_urls: if videa_urls:
return _playlist_from_matches(videa_urls, ie=VideaIE.ie_key()) return _playlist_from_matches(videa_urls, ie=VideaIE.ie_key())
# Look for 20 minuten embeds
twentymin_urls = TwentyMinutenIE._extract_urls(webpage)
if twentymin_urls:
return _playlist_from_matches(
twentymin_urls, ie=TwentyMinutenIE.ie_key())
# Looking for http://schema.org/VideoObject # Looking for http://schema.org/VideoObject
json_ld = self._search_json_ld( json_ld = self._search_json_ld(
webpage, video_id, default={}, expected_type='VideoObject') webpage, video_id, default={}, expected_type='VideoObject')

View File

@ -13,7 +13,7 @@ from ..utils import (
class ImdbIE(InfoExtractor): class ImdbIE(InfoExtractor):
IE_NAME = 'imdb' IE_NAME = 'imdb'
IE_DESC = 'Internet Movie Database trailers' IE_DESC = 'Internet Movie Database trailers'
_VALID_URL = r'https?://(?:www|m)\.imdb\.com/(?:video/[^/]+/|title/tt\d+.*?#lb-)vi(?P<id>\d+)' _VALID_URL = r'https?://(?:www|m)\.imdb\.com/(?:video/[^/]+/|title/tt\d+.*?#lb-|videoplayer/)vi(?P<id>\d+)'
_TESTS = [{ _TESTS = [{
'url': 'http://www.imdb.com/video/imdb/vi2524815897', 'url': 'http://www.imdb.com/video/imdb/vi2524815897',
@ -32,6 +32,9 @@ class ImdbIE(InfoExtractor):
}, { }, {
'url': 'http://www.imdb.com/title/tt1667889/#lb-vi2524815897', 'url': 'http://www.imdb.com/title/tt1667889/#lb-vi2524815897',
'only_matching': True, 'only_matching': True,
}, {
'url': 'http://www.imdb.com/videoplayer/vi1562949145',
'only_matching': True,
}] }]
def _real_extract(self, url): def _real_extract(self, url):

View File

@ -59,14 +59,26 @@ class LimelightBaseIE(InfoExtractor):
format_id = 'rtmp' format_id = 'rtmp'
if stream.get('videoBitRate'): if stream.get('videoBitRate'):
format_id += '-%d' % int_or_none(stream['videoBitRate']) format_id += '-%d' % int_or_none(stream['videoBitRate'])
http_url = 'http://cpl.delvenetworks.com/' + rtmp.group('playpath')[4:] http_format_id = format_id.replace('rtmp', 'http')
urls.append(http_url)
http_fmt = fmt.copy() CDN_HOSTS = (
http_fmt.update({ ('delvenetworks.com', 'cpl.delvenetworks.com'),
'url': http_url, ('video.llnw.net', 's2.content.video.llnw.net'),
'format_id': format_id.replace('rtmp', 'http'), )
}) for cdn_host, http_host in CDN_HOSTS:
formats.append(http_fmt) if cdn_host not in rtmp.group('host').lower():
continue
http_url = 'http://%s/%s' % (http_host, rtmp.group('playpath')[4:])
urls.append(http_url)
if self._is_valid_url(http_url, video_id, http_format_id):
http_fmt = fmt.copy()
http_fmt.update({
'url': http_url,
'format_id': http_format_id,
})
formats.append(http_fmt)
break
fmt.update({ fmt.update({
'url': rtmp.group('url'), 'url': rtmp.group('url'),
'play_path': rtmp.group('playpath'), 'play_path': rtmp.group('playpath'),

View File

@ -211,7 +211,7 @@ class MTVServicesInfoExtractor(InfoExtractor):
def _extract_triforce_mgid(self, webpage, data_zone=None, video_id=None): def _extract_triforce_mgid(self, webpage, data_zone=None, video_id=None):
triforce_feed = self._parse_json(self._search_regex( triforce_feed = self._parse_json(self._search_regex(
r'triforceManifestFeed\s*=\s*(\{.+?\});\n', webpage, r'triforceManifestFeed\s*=\s*({.+?})\s*;\s*\n', webpage,
'triforce feed', default='{}'), video_id, fatal=False) 'triforce feed', default='{}'), video_id, fatal=False)
data_zone = self._search_regex( data_zone = self._search_regex(

View File

@ -12,10 +12,10 @@ from ..utils import (
class NaverIE(InfoExtractor): class NaverIE(InfoExtractor):
_VALID_URL = r'https?://(?:m\.)?tvcast\.naver\.com/v/(?P<id>\d+)' _VALID_URL = r'https?://(?:m\.)?tv(?:cast)?\.naver\.com/v/(?P<id>\d+)'
_TESTS = [{ _TESTS = [{
'url': 'http://tvcast.naver.com/v/81652', 'url': 'http://tv.naver.com/v/81652',
'info_dict': { 'info_dict': {
'id': '81652', 'id': '81652',
'ext': 'mp4', 'ext': 'mp4',
@ -24,7 +24,7 @@ class NaverIE(InfoExtractor):
'upload_date': '20130903', 'upload_date': '20130903',
}, },
}, { }, {
'url': 'http://tvcast.naver.com/v/395837', 'url': 'http://tv.naver.com/v/395837',
'md5': '638ed4c12012c458fefcddfd01f173cd', 'md5': '638ed4c12012c458fefcddfd01f173cd',
'info_dict': { 'info_dict': {
'id': '395837', 'id': '395837',
@ -34,6 +34,9 @@ class NaverIE(InfoExtractor):
'upload_date': '20150519', 'upload_date': '20150519',
}, },
'skip': 'Georestricted', 'skip': 'Georestricted',
}, {
'url': 'http://tvcast.naver.com/v/81652',
'only_matching': True,
}] }]
def _real_extract(self, url): def _real_extract(self, url):

View File

@ -4,91 +4,88 @@ from __future__ import unicode_literals
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import remove_end from ..utils import (
int_or_none,
try_get,
)
class TwentyMinutenIE(InfoExtractor): class TwentyMinutenIE(InfoExtractor):
IE_NAME = '20min' IE_NAME = '20min'
_VALID_URL = r'https?://(?:www\.)?20min\.ch/(?:videotv/*\?.*\bvid=(?P<id>\d+)|(?:[^/]+/)*(?P<display_id>[^/#?]+))' _VALID_URL = r'''(?x)
https?://
(?:www\.)?20min\.ch/
(?:
videotv/*\?.*?\bvid=|
videoplayer/videoplayer\.html\?.*?\bvideoId@
)
(?P<id>\d+)
'''
_TESTS = [{ _TESTS = [{
# regular video
'url': 'http://www.20min.ch/videotv/?vid=469148&cid=2', 'url': 'http://www.20min.ch/videotv/?vid=469148&cid=2',
'md5': 'b52d6bc6ea6398e6a38f12cfd418149c', 'md5': 'e7264320db31eed8c38364150c12496e',
'info_dict': { 'info_dict': {
'id': '469148', 'id': '469148',
'ext': 'flv',
'title': '85 000 Franken für 15 perfekte Minuten',
'description': 'Was die Besucher vom Silvesterzauber erwarten können. (Video: Alice Grosjean/Murat Temel)',
'thumbnail': 'http://thumbnails.20min-tv.ch/server063/469148/frame-72-469148.jpg'
}
}, {
# news article with video
'url': 'http://www.20min.ch/schweiz/news/story/-Wir-muessen-mutig-nach-vorne-schauen--22050469',
'md5': 'cd4cbb99b94130cff423e967cd275e5e',
'info_dict': {
'id': '469408',
'display_id': '-Wir-muessen-mutig-nach-vorne-schauen--22050469',
'ext': 'flv',
'title': '«Wir müssen mutig nach vorne schauen»',
'description': 'Kein Land sei innovativer als die Schweiz, sagte Johann Schneider-Ammann in seiner Neujahrsansprache. Das Land müsse aber seine Hausaufgaben machen.',
'thumbnail': 'http://www.20min.ch/images/content/2/2/0/22050469/10/teaserbreit.jpg'
},
'skip': '"This video is no longer available" is shown both on the web page and in the downloaded file.',
}, {
# YouTube embed
'url': 'http://www.20min.ch/ro/sports/football/story/Il-marque-une-bicyclette-de-plus-de-30-metres--21115184',
'md5': 'cec64d59aa01c0ed9dbba9cf639dd82f',
'info_dict': {
'id': 'ivM7A7SpDOs',
'ext': 'mp4', 'ext': 'mp4',
'title': 'GOLAZO DE CHILENA DE JAVI GÓMEZ, FINALISTA AL BALÓN DE CLM 2016', 'title': '85 000 Franken für 15 perfekte Minuten',
'description': 'md5:903c92fbf2b2f66c09de514bc25e9f5a', 'thumbnail': r're:https?://.*\.jpg$',
'upload_date': '20160424', },
'uploader': 'RTVCM Castilla-La Mancha', }, {
'uploader_id': 'RTVCM', 'url': 'http://www.20min.ch/videoplayer/videoplayer.html?params=client@twentyDE|videoId@523629',
'info_dict': {
'id': '523629',
'ext': 'mp4',
'title': 'So kommen Sie bei Eis und Schnee sicher an',
'description': 'md5:117c212f64b25e3d95747e5276863f7d',
'thumbnail': r're:https?://.*\.jpg$',
},
'params': {
'skip_download': True,
}, },
'add_ie': ['Youtube'],
}, { }, {
'url': 'http://www.20min.ch/videotv/?cid=44&vid=468738', 'url': 'http://www.20min.ch/videotv/?cid=44&vid=468738',
'only_matching': True, 'only_matching': True,
}, {
'url': 'http://www.20min.ch/ro/sortir/cinema/story/Grandir-au-bahut--c-est-dur-18927411',
'only_matching': True,
}] }]
@staticmethod
def _extract_urls(webpage):
return [m.group('url') for m in re.finditer(
r'<iframe[^>]+src=(["\'])(?P<url>(?:https?://)?(?:www\.)?20min\.ch/videoplayer/videoplayer.html\?.*?\bvideoId@\d+.*?)\1',
webpage)]
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) video_id = self._match_id(url)
video_id = mobj.group('id')
display_id = mobj.group('display_id') or video_id
webpage = self._download_webpage(url, display_id) video = self._download_json(
'http://api.20min.ch/video/%s/show' % video_id,
video_id)['content']
youtube_url = self._html_search_regex( title = video['title']
r'<iframe[^>]+src="((?:https?:)?//www\.youtube\.com/embed/[^"]+)"',
webpage, 'YouTube embed URL', default=None)
if youtube_url is not None:
return self.url_result(youtube_url, 'Youtube')
title = self._html_search_regex( formats = [{
r'<h1>.*?<span>(.+?)</span></h1>', 'format_id': format_id,
webpage, 'title', default=None) 'url': 'http://podcast.20min-tv.ch/podcast/20min/%s%s.mp4' % (video_id, p),
if not title: 'quality': quality,
title = remove_end(re.sub( } for quality, (format_id, p) in enumerate([('sd', ''), ('hd', 'h')])]
r'^20 [Mm]inuten.*? -', '', self._og_search_title(webpage)), ' - News') self._sort_formats(formats)
if not video_id: description = video.get('lead')
video_id = self._search_regex( thumbnail = video.get('thumbnail')
r'"file\d?"\s*,\s*\"(\d+)', webpage, 'video id')
description = self._html_search_meta( def extract_count(kind):
'description', webpage, 'description') return try_get(
thumbnail = self._og_search_thumbnail(webpage) video,
lambda x: int_or_none(x['communityobject']['thumbs_%s' % kind]))
like_count = extract_count('up')
dislike_count = extract_count('down')
return { return {
'id': video_id, 'id': video_id,
'display_id': display_id,
'url': 'http://speed.20min-tv.ch/%sm.flv' % video_id,
'title': title, 'title': title,
'description': description, 'description': description,
'thumbnail': thumbnail, 'thumbnail': thumbnail,
'like_count': like_count,
'dislike_count': dislike_count,
'formats': formats,
} }

View File

@ -84,12 +84,27 @@ class UOLIE(InfoExtractor):
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
if not video_id.isdigit(): media_id = None
embed_page = self._download_webpage('https://jsuol.com.br/c/tv/uol/embed/?params=[embed,%s]' % video_id, video_id)
video_id = self._search_regex(r'mediaId=(\d+)', embed_page, 'media id') if video_id.isdigit():
media_id = video_id
if not media_id:
embed_page = self._download_webpage(
'https://jsuol.com.br/c/tv/uol/embed/?params=[embed,%s]' % video_id,
video_id, 'Downloading embed page', fatal=False)
if embed_page:
media_id = self._search_regex(
(r'uol\.com\.br/(\d+)', r'mediaId=(\d+)'),
embed_page, 'media id', default=None)
if not media_id:
webpage = self._download_webpage(url, video_id)
media_id = self._search_regex(r'mediaId=(\d+)', webpage, 'media id')
video_data = self._download_json( video_data = self._download_json(
'http://mais.uol.com.br/apiuol/v3/player/getMedia/%s.json' % video_id, 'http://mais.uol.com.br/apiuol/v3/player/getMedia/%s.json' % media_id,
video_id)['item'] media_id)['item']
title = video_data['title'] title = video_data['title']
query = { query = {
@ -118,7 +133,7 @@ class UOLIE(InfoExtractor):
tags.append(tag_description) tags.append(tag_description)
return { return {
'id': video_id, 'id': media_id,
'title': title, 'title': title,
'description': clean_html(video_data.get('desMedia')), 'description': clean_html(video_data.get('desMedia')),
'thumbnail': video_data.get('thumbnail'), 'thumbnail': video_data.get('thumbnail'),

View File

@ -69,6 +69,13 @@ class UstreamIE(InfoExtractor):
}, },
}] }]
@staticmethod
def _extract_url(webpage):
mobj = re.search(
r'<iframe[^>]+?src=(["\'])(?P<url>http://www\.ustream\.tv/embed/.+?)\1', webpage)
if mobj is not None:
return mobj.group('url')
def _get_stream_info(self, url, video_id, app_id_ver, extra_note=None): def _get_stream_info(self, url, video_id, app_id_ver, extra_note=None):
def num_to_hex(n): def num_to_hex(n):
return hex(n)[2:] return hex(n)[2:]

View File

@ -338,7 +338,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
'expected_warnings': ['Unable to download JSON metadata'], 'expected_warnings': ['Unable to download JSON metadata'],
}, },
{ {
# redirects to ondemand extractor and should be passed throught it # redirects to ondemand extractor and should be passed through it
# for successful extraction # for successful extraction
'url': 'https://vimeo.com/73445910', 'url': 'https://vimeo.com/73445910',
'info_dict': { 'info_dict': {

View File

@ -16,7 +16,9 @@ class XiamiBaseIE(InfoExtractor):
return webpage return webpage
def _extract_track(self, track, track_id=None): def _extract_track(self, track, track_id=None):
title = track['title'] track_name = track.get('songName') or track.get('name') or track['subName']
artist = track.get('artist') or track.get('artist_name') or track.get('singers')
title = '%s - %s' % (artist, track_name) if artist else track_name
track_url = self._decrypt(track['location']) track_url = self._decrypt(track['location'])
subtitles = {} subtitles = {}
@ -31,9 +33,10 @@ class XiamiBaseIE(InfoExtractor):
'thumbnail': track.get('pic') or track.get('album_pic'), 'thumbnail': track.get('pic') or track.get('album_pic'),
'duration': int_or_none(track.get('length')), 'duration': int_or_none(track.get('length')),
'creator': track.get('artist', '').split(';')[0], 'creator': track.get('artist', '').split(';')[0],
'track': title, 'track': track_name,
'album': track.get('album_name'), 'track_number': int_or_none(track.get('track')),
'artist': track.get('artist'), 'album': track.get('album_name') or track.get('title'),
'artist': artist,
'subtitles': subtitles, 'subtitles': subtitles,
} }
@ -68,14 +71,14 @@ class XiamiBaseIE(InfoExtractor):
class XiamiSongIE(XiamiBaseIE): class XiamiSongIE(XiamiBaseIE):
IE_NAME = 'xiami:song' IE_NAME = 'xiami:song'
IE_DESC = '虾米音乐' IE_DESC = '虾米音乐'
_VALID_URL = r'https?://(?:www\.)?xiami\.com/song/(?P<id>[0-9]+)' _VALID_URL = r'https?://(?:www\.)?xiami\.com/song/(?P<id>[^/?#&]+)'
_TESTS = [{ _TESTS = [{
'url': 'http://www.xiami.com/song/1775610518', 'url': 'http://www.xiami.com/song/1775610518',
'md5': '521dd6bea40fd5c9c69f913c232cb57e', 'md5': '521dd6bea40fd5c9c69f913c232cb57e',
'info_dict': { 'info_dict': {
'id': '1775610518', 'id': '1775610518',
'ext': 'mp3', 'ext': 'mp3',
'title': 'Woman', 'title': 'HONNE - Woman',
'thumbnail': r're:http://img\.xiami\.net/images/album/.*\.jpg', 'thumbnail': r're:http://img\.xiami\.net/images/album/.*\.jpg',
'duration': 265, 'duration': 265,
'creator': 'HONNE', 'creator': 'HONNE',
@ -95,7 +98,7 @@ class XiamiSongIE(XiamiBaseIE):
'info_dict': { 'info_dict': {
'id': '1775256504', 'id': '1775256504',
'ext': 'mp3', 'ext': 'mp3',
'title': '悟空', 'title': '戴荃 - 悟空',
'thumbnail': r're:http://img\.xiami\.net/images/album/.*\.jpg', 'thumbnail': r're:http://img\.xiami\.net/images/album/.*\.jpg',
'duration': 200, 'duration': 200,
'creator': '戴荃', 'creator': '戴荃',
@ -109,6 +112,26 @@ class XiamiSongIE(XiamiBaseIE):
}, },
}, },
'skip': 'Georestricted', 'skip': 'Georestricted',
}, {
'url': 'http://www.xiami.com/song/1775953850',
'info_dict': {
'id': '1775953850',
'ext': 'mp3',
'title': 'До Скону - Чума Пожирает Землю',
'thumbnail': r're:http://img\.xiami\.net/images/album/.*\.jpg',
'duration': 683,
'creator': 'До Скону',
'track': 'Чума Пожирает Землю',
'track_number': 7,
'album': 'Ад',
'artist': 'До Скону',
},
'params': {
'skip_download': True,
},
}, {
'url': 'http://www.xiami.com/song/xLHGwgd07a1',
'only_matching': True,
}] }]
def _real_extract(self, url): def _real_extract(self, url):
@ -124,7 +147,7 @@ class XiamiPlaylistBaseIE(XiamiBaseIE):
class XiamiAlbumIE(XiamiPlaylistBaseIE): class XiamiAlbumIE(XiamiPlaylistBaseIE):
IE_NAME = 'xiami:album' IE_NAME = 'xiami:album'
IE_DESC = '虾米音乐 - 专辑' IE_DESC = '虾米音乐 - 专辑'
_VALID_URL = r'https?://(?:www\.)?xiami\.com/album/(?P<id>[0-9]+)' _VALID_URL = r'https?://(?:www\.)?xiami\.com/album/(?P<id>[^/?#&]+)'
_TYPE = '1' _TYPE = '1'
_TESTS = [{ _TESTS = [{
'url': 'http://www.xiami.com/album/2100300444', 'url': 'http://www.xiami.com/album/2100300444',
@ -136,28 +159,34 @@ class XiamiAlbumIE(XiamiPlaylistBaseIE):
}, { }, {
'url': 'http://www.xiami.com/album/512288?spm=a1z1s.6843761.1110925389.6.hhE9p9', 'url': 'http://www.xiami.com/album/512288?spm=a1z1s.6843761.1110925389.6.hhE9p9',
'only_matching': True, 'only_matching': True,
}, {
'url': 'http://www.xiami.com/album/URVDji2a506',
'only_matching': True,
}] }]
class XiamiArtistIE(XiamiPlaylistBaseIE): class XiamiArtistIE(XiamiPlaylistBaseIE):
IE_NAME = 'xiami:artist' IE_NAME = 'xiami:artist'
IE_DESC = '虾米音乐 - 歌手' IE_DESC = '虾米音乐 - 歌手'
_VALID_URL = r'https?://(?:www\.)?xiami\.com/artist/(?P<id>[0-9]+)' _VALID_URL = r'https?://(?:www\.)?xiami\.com/artist/(?P<id>[^/?#&]+)'
_TYPE = '2' _TYPE = '2'
_TEST = { _TESTS = [{
'url': 'http://www.xiami.com/artist/2132?spm=0.0.0.0.dKaScp', 'url': 'http://www.xiami.com/artist/2132?spm=0.0.0.0.dKaScp',
'info_dict': { 'info_dict': {
'id': '2132', 'id': '2132',
}, },
'playlist_count': 20, 'playlist_count': 20,
'skip': 'Georestricted', 'skip': 'Georestricted',
} }, {
'url': 'http://www.xiami.com/artist/bC5Tk2K6eb99',
'only_matching': True,
}]
class XiamiCollectionIE(XiamiPlaylistBaseIE): class XiamiCollectionIE(XiamiPlaylistBaseIE):
IE_NAME = 'xiami:collection' IE_NAME = 'xiami:collection'
IE_DESC = '虾米音乐 - 精选集' IE_DESC = '虾米音乐 - 精选集'
_VALID_URL = r'https?://(?:www\.)?xiami\.com/collect/(?P<id>[0-9]+)' _VALID_URL = r'https?://(?:www\.)?xiami\.com/collect/(?P<id>[^/?#&]+)'
_TYPE = '3' _TYPE = '3'
_TEST = { _TEST = {
'url': 'http://www.xiami.com/collect/156527391?spm=a1z1s.2943601.6856193.12.4jpBnr', 'url': 'http://www.xiami.com/collect/156527391?spm=a1z1s.2943601.6856193.12.4jpBnr',

View File

@ -751,7 +751,7 @@ def parseOpts(overrideArguments=None):
help='Convert video files to audio-only files (requires ffmpeg or avconv and ffprobe or avprobe)') help='Convert video files to audio-only files (requires ffmpeg or avconv and ffprobe or avprobe)')
postproc.add_option( postproc.add_option(
'--audio-format', metavar='FORMAT', dest='audioformat', default='best', '--audio-format', metavar='FORMAT', dest='audioformat', default='best',
help='Specify audio format: "best", "aac", "vorbis", "mp3", "m4a", "opus", or "wav"; "%default" by default') help='Specify audio format: "best", "aac", "vorbis", "mp3", "m4a", "opus", or "wav"; "%default" by default; No effect without -x')
postproc.add_option( postproc.add_option(
'--audio-quality', metavar='QUALITY', '--audio-quality', metavar='QUALITY',
dest='audioquality', default='5', dest='audioquality', default='5',
@ -867,7 +867,7 @@ def parseOpts(overrideArguments=None):
if '--ignore-config' not in system_conf: if '--ignore-config' not in system_conf:
user_conf = _readUserConf() user_conf = _readUserConf()
argv = system_conf + user_conf + command_line_conf argv = system_conf + user_conf + custom_conf + command_line_conf
opts, args = parser.parse_args(argv) opts, args = parser.parse_args(argv)
if opts.verbose: if opts.verbose:
for conf_label, conf in ( for conf_label, conf in (

View File

@ -1,3 +1,3 @@
from __future__ import unicode_literals from __future__ import unicode_literals
__version__ = '2017.01.14' __version__ = '2017.01.18'