Merge pull request #1 from rg3/master

Updating fork to match rg3
This commit is contained in:
PuffingtonToast 2017-08-12 13:50:45 -07:00 committed by GitHub
commit 2bb34f0ad2
16 changed files with 251 additions and 49 deletions

View File

@ -6,8 +6,8 @@
--- ---
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.08.06*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. ### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.08.09*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.08.06** - [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.08.09**
### Before submitting an *issue* make sure you have: ### Before submitting an *issue* make sure you have:
- [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections - [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
@ -35,7 +35,7 @@ Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl
[debug] User config: [] [debug] User config: []
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
[debug] youtube-dl version 2017.08.06 [debug] youtube-dl version 2017.08.09
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
[debug] Proxy map: {} [debug] Proxy map: {}

View File

@ -1,6 +1,14 @@
version <unreleased> version 2017.08.09
Core
* [utils] Skip missing params in cli_bool_option (#13865)
Extractors Extractors
* [xxxymovies] Fix title extraction (#13868)
+ [nick] Add support for nick.com.pl (#13860)
* [mixcloud] Fix play info decryption (#13867)
* [20min] Fix embeds extraction (#13852)
* [dplayit] Fix extraction (#13851)
+ [niconico] Support videos with multiple formats (#13522) + [niconico] Support videos with multiple formats (#13522)
+ [niconico] Support HTML5-only videos (#13806) + [niconico] Support HTML5-only videos (#13806)

View File

@ -1182,6 +1182,10 @@ part 3</font></u>
cli_bool_option( cli_bool_option(
{'nocheckcertificate': False}, '--check-certificate', 'nocheckcertificate', 'false', 'true', '='), {'nocheckcertificate': False}, '--check-certificate', 'nocheckcertificate', 'false', 'true', '='),
['--check-certificate=true']) ['--check-certificate=true'])
self.assertEqual(
cli_bool_option(
{}, '--check-certificate', 'nocheckcertificate', 'false', 'true', '='),
[])
def test_ohdave_rsa_encrypt(self): def test_ohdave_rsa_encrypt(self):
N = 0xab86b6371b5318aaa1d3c9e612a9f1264f372323c8c0f19875b5fc3b3fd3afcc1e5bec527aa94bfa85bffc157e4245aebda05389a5357b75115ac94f074aefcd N = 0xab86b6371b5318aaa1d3c9e612a9f1264f372323c8c0f19875b5fc3b3fd3afcc1e5bec527aa94bfa85bffc157e4245aebda05389a5357b75115ac94f074aefcd

View File

@ -1500,7 +1500,7 @@ class YoutubeDL(object):
sanitize_string_field(format, 'format_id') sanitize_string_field(format, 'format_id')
sanitize_numeric_fields(format) sanitize_numeric_fields(format)
format['url'] = sanitize_url(format['url']) format['url'] = sanitize_url(format['url'])
if format.get('format_id') is None: if not format.get('format_id'):
format['format_id'] = compat_str(i) format['format_id'] = compat_str(i)
else: else:
# Sanitize format_id from characters used in format selector expression # Sanitize format_id from characters used in format selector expression

View File

@ -3,13 +3,13 @@ from __future__ import unicode_literals
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
ExtractorError, int_or_none,
HEADRequest, mimetype2ext,
) )
class AparatIE(InfoExtractor): class AparatIE(InfoExtractor):
_VALID_URL = r'^https?://(?:www\.)?aparat\.com/(?:v/|video/video/embed/videohash/)(?P<id>[a-zA-Z0-9]+)' _VALID_URL = r'https?://(?:www\.)?aparat\.com/(?:v/|video/video/embed/videohash/)(?P<id>[a-zA-Z0-9]+)'
_TEST = { _TEST = {
'url': 'http://www.aparat.com/v/wP8On', 'url': 'http://www.aparat.com/v/wP8On',
@ -29,30 +29,41 @@ class AparatIE(InfoExtractor):
# Note: There is an easier-to-parse configuration at # Note: There is an easier-to-parse configuration at
# http://www.aparat.com/video/video/config/videohash/%video_id # http://www.aparat.com/video/video/config/videohash/%video_id
# but the URL in there does not work # but the URL in there does not work
embed_url = 'http://www.aparat.com/video/video/embed/vt/frame/showvideo/yes/videohash/' + video_id webpage = self._download_webpage(
webpage = self._download_webpage(embed_url, video_id) 'http://www.aparat.com/video/video/embed/vt/frame/showvideo/yes/videohash/' + video_id,
video_id)
file_list = self._parse_json(self._search_regex(
r'fileList\s*=\s*JSON\.parse\(\'([^\']+)\'\)', webpage, 'file list'), video_id)
for i, item in enumerate(file_list[0]):
video_url = item['file']
req = HEADRequest(video_url)
res = self._request_webpage(
req, video_id, note='Testing video URL %d' % i, errnote=False)
if res:
break
else:
raise ExtractorError('No working video URLs found')
title = self._search_regex(r'\s+title:\s*"([^"]+)"', webpage, 'title') title = self._search_regex(r'\s+title:\s*"([^"]+)"', webpage, 'title')
file_list = self._parse_json(
self._search_regex(
r'fileList\s*=\s*JSON\.parse\(\'([^\']+)\'\)', webpage,
'file list'),
video_id)
formats = []
for item in file_list[0]:
file_url = item.get('file')
if not file_url:
continue
ext = mimetype2ext(item.get('type'))
label = item.get('label')
formats.append({
'url': file_url,
'ext': ext,
'format_id': label or ext,
'height': int_or_none(self._search_regex(
r'(\d+)[pP]', label or '', 'height', default=None)),
})
self._sort_formats(formats)
thumbnail = self._search_regex( thumbnail = self._search_regex(
r'image:\s*"([^"]+)"', webpage, 'thumbnail', fatal=False) r'image:\s*"([^"]+)"', webpage, 'thumbnail', fatal=False)
return { return {
'id': video_id, 'id': video_id,
'title': title, 'title': title,
'url': video_url,
'ext': 'mp4',
'thumbnail': thumbnail, 'thumbnail': thumbnail,
'age_limit': self._family_friendly_search(webpage), 'age_limit': self._family_friendly_search(webpage),
'formats': formats,
} }

View File

@ -940,7 +940,8 @@ class InfoExtractor(object):
def _family_friendly_search(self, html): def _family_friendly_search(self, html):
# See http://schema.org/VideoObject # See http://schema.org/VideoObject
family_friendly = self._html_search_meta('isFamilyFriendly', html) family_friendly = self._html_search_meta(
'isFamilyFriendly', html, default=None)
if not family_friendly: if not family_friendly:
return None return None
@ -2114,9 +2115,9 @@ class InfoExtractor(object):
return f return f
return {} return {}
def _media_formats(src, cur_media_type): def _media_formats(src, cur_media_type, type_info={}):
full_url = absolute_url(src) full_url = absolute_url(src)
ext = determine_ext(full_url) ext = type_info.get('ext') or determine_ext(full_url)
if ext == 'm3u8': if ext == 'm3u8':
is_plain_url = False is_plain_url = False
formats = self._extract_m3u8_formats( formats = self._extract_m3u8_formats(
@ -2165,9 +2166,9 @@ class InfoExtractor(object):
src = source_attributes.get('src') src = source_attributes.get('src')
if not src: if not src:
continue continue
is_plain_url, formats = _media_formats(src, media_type) f = parse_content_type(source_attributes.get('type'))
is_plain_url, formats = _media_formats(src, media_type, f)
if is_plain_url: if is_plain_url:
f = parse_content_type(source_attributes.get('type'))
f.update(formats[0]) f.update(formats[0])
media_info['formats'].append(f) media_info['formats'].append(f)
else: else:

View File

@ -840,6 +840,10 @@ from .rai import (
from .rbmaradio import RBMARadioIE from .rbmaradio import RBMARadioIE
from .rds import RDSIE from .rds import RDSIE
from .redbulltv import RedBullTVIE from .redbulltv import RedBullTVIE
from .reddit import (
RedditIE,
RedditRIE,
)
from .redtube import RedTubeIE from .redtube import RedTubeIE
from .regiotv import RegioTVIE from .regiotv import RegioTVIE
from .rentv import ( from .rentv import (

View File

@ -98,6 +98,7 @@ from .wistia import WistiaIE
from .mediaset import MediasetIE from .mediaset import MediasetIE
from .joj import JojIE from .joj import JojIE
from .megaphone import MegaphoneIE from .megaphone import MegaphoneIE
from .vzaar import VzaarIE
class GenericIE(InfoExtractor): class GenericIE(InfoExtractor):
@ -1784,6 +1785,21 @@ class GenericIE(InfoExtractor):
}, },
'playlist_mincount': 5, 'playlist_mincount': 5,
}, },
{
# Limelight embed (LimelightPlayerUtil.embed)
'url': 'https://tv5.ca/videos?v=xuu8qowr291ri',
'info_dict': {
'id': '95d035dc5c8a401588e9c0e6bd1e9c92',
'ext': 'mp4',
'title': '07448641',
'timestamp': 1499890639,
'upload_date': '20170712',
},
'params': {
'skip_download': True,
},
'add_ie': ['LimelightMedia'],
},
{ {
'url': 'http://kron4.com/2017/04/28/standoff-with-walnut-creek-murder-suspect-ends-with-arrest/', 'url': 'http://kron4.com/2017/04/28/standoff-with-walnut-creek-murder-suspect-ends-with-arrest/',
'info_dict': { 'info_dict': {
@ -1840,6 +1856,16 @@ class GenericIE(InfoExtractor):
'title': 'Стас Намин: «Мы нарушили девственность Кремля»', 'title': 'Стас Намин: «Мы нарушили девственность Кремля»',
}, },
}, },
{
# vzaar embed
'url': 'http://help.vzaar.com/article/165-embedding-video',
'md5': '7e3919d9d2620b89e3e00bec7fe8c9d4',
'info_dict': {
'id': '8707641',
'ext': 'mp4',
'title': 'Building A Business Online: Principal Chairs Q & A',
},
},
# { # {
# # TODO: find another test # # TODO: find another test
# # http://schema.org/VideoObject # # http://schema.org/VideoObject
@ -2811,6 +2837,12 @@ class GenericIE(InfoExtractor):
return self.playlist_from_matches( return self.playlist_from_matches(
mpfn_urls, video_id, video_title, ie=MegaphoneIE.ie_key()) mpfn_urls, video_id, video_title, ie=MegaphoneIE.ie_key())
# Look for vzaar embeds
vzaar_urls = VzaarIE._extract_urls(webpage)
if vzaar_urls:
return self.playlist_from_matches(
vzaar_urls, video_id, video_title, ie=VzaarIE.ie_key())
def merge_dicts(dict1, dict2): def merge_dicts(dict1, dict2):
merged = {} merged = {}
for k, v in dict1.items(): for k, v in dict1.items():

View File

@ -26,14 +26,16 @@ class LimelightBaseIE(InfoExtractor):
'Channel': 'channel', 'Channel': 'channel',
'ChannelList': 'channel_list', 'ChannelList': 'channel_list',
} }
def smuggle(url):
return smuggle_url(url, {'source_url': source_url})
entries = [] entries = []
for kind, video_id in re.findall( for kind, video_id in re.findall(
r'LimelightPlayer\.doLoad(Media|Channel|ChannelList)\(["\'](?P<id>[a-z0-9]{32})', r'LimelightPlayer\.doLoad(Media|Channel|ChannelList)\(["\'](?P<id>[a-z0-9]{32})',
webpage): webpage):
entries.append(cls.url_result( entries.append(cls.url_result(
smuggle_url( smuggle('limelight:%s:%s' % (lm[kind], video_id)),
'limelight:%s:%s' % (lm[kind], video_id),
{'source_url': source_url}),
'Limelight%s' % kind, video_id)) 'Limelight%s' % kind, video_id))
for mobj in re.finditer( for mobj in re.finditer(
# As per [1] class attribute should be exactly equal to # As per [1] class attribute should be exactly equal to
@ -49,10 +51,15 @@ class LimelightBaseIE(InfoExtractor):
''', webpage): ''', webpage):
kind, video_id = mobj.group('kind'), mobj.group('id') kind, video_id = mobj.group('kind'), mobj.group('id')
entries.append(cls.url_result( entries.append(cls.url_result(
smuggle_url( smuggle('limelight:%s:%s' % (kind, video_id)),
'limelight:%s:%s' % (kind, video_id),
{'source_url': source_url}),
'Limelight%s' % kind.capitalize(), video_id)) 'Limelight%s' % kind.capitalize(), video_id))
# http://support.3playmedia.com/hc/en-us/articles/115009517327-Limelight-Embedding-the-Audio-Description-Plugin-with-the-Limelight-Player-on-Your-Web-Page)
for video_id in re.findall(
r'(?s)LimelightPlayerUtil\.embed\s*\(\s*{.*?\bmediaId["\']\s*:\s*["\'](?P<id>[a-z0-9]{32})',
webpage):
entries.append(cls.url_result(
smuggle('limelight:media:%s' % video_id),
LimelightMediaIE.ie_key(), video_id))
return entries return entries
def _call_playlist_service(self, item_id, method, fatal=True, referer=None): def _call_playlist_service(self, item_id, method, fatal=True, referer=None):

View File

@ -54,15 +54,23 @@ class MixcloudIE(InfoExtractor):
}] }]
# See https://www.mixcloud.com/media/js2/www_js_2.9e23256562c080482435196ca3975ab5.js # See https://www.mixcloud.com/media/js2/www_js_2.9e23256562c080482435196ca3975ab5.js
@staticmethod def _decrypt_play_info(self, play_info, video_id):
def _decrypt_play_info(play_info): KEYS = (
KEY = 'pleasedontdownloadourmusictheartistswontgetpaid' 'pleasedontdownloadourmusictheartistswontgetpaid',
'window.addEventListener = window.addEventListener || function() {};',
'(function() { return new Date().toLocaleDateString(); })()',
)
play_info = base64.b64decode(play_info.encode('ascii')) play_info = base64.b64decode(play_info.encode('ascii'))
for num, key in enumerate(KEYS, start=1):
return ''.join([ try:
compat_chr(compat_ord(ch) ^ compat_ord(KEY[idx % len(KEY)])) return self._parse_json(
for idx, ch in enumerate(play_info)]) ''.join([
compat_chr(compat_ord(ch) ^ compat_ord(key[idx % len(key)]))
for idx, ch in enumerate(play_info)]),
video_id)
except ExtractorError:
if num == len(KEYS):
raise
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
@ -78,8 +86,8 @@ class MixcloudIE(InfoExtractor):
encrypted_play_info = self._search_regex( encrypted_play_info = self._search_regex(
r'm-play-info="([^"]+)"', webpage, 'play info') r'm-play-info="([^"]+)"', webpage, 'play info')
play_info = self._parse_json(
self._decrypt_play_info(encrypted_play_info), track_id) play_info = self._decrypt_play_info(encrypted_play_info, track_id)
if message and 'stream_url' not in play_info: if message and 'stream_url' not in play_info:
raise ExtractorError('%s said: %s' % (self.IE_NAME, message), expected=True) raise ExtractorError('%s said: %s' % (self.IE_NAME, message), expected=True)

View File

@ -75,7 +75,7 @@ class NickIE(MTVServicesInfoExtractor):
class NickDeIE(MTVServicesInfoExtractor): class NickDeIE(MTVServicesInfoExtractor):
IE_NAME = 'nick.de' IE_NAME = 'nick.de'
_VALID_URL = r'https?://(?:www\.)?(?P<host>nick\.de|nickelodeon\.(?:nl|at))/(?:playlist|shows)/(?:[^/]+/)*(?P<id>[^/?#&]+)' _VALID_URL = r'https?://(?:www\.)?(?P<host>nick\.(?:de|com\.pl)|nickelodeon\.(?:nl|at))/[^/]+/(?:[^/]+/)*(?P<id>[^/?#&]+)'
_TESTS = [{ _TESTS = [{
'url': 'http://www.nick.de/playlist/3773-top-videos/videos/episode/17306-zu-wasser-und-zu-land-rauchende-erdnusse', 'url': 'http://www.nick.de/playlist/3773-top-videos/videos/episode/17306-zu-wasser-und-zu-land-rauchende-erdnusse',
'only_matching': True, 'only_matching': True,
@ -88,6 +88,9 @@ class NickDeIE(MTVServicesInfoExtractor):
}, { }, {
'url': 'http://www.nickelodeon.at/playlist/3773-top-videos/videos/episode/77993-das-letzte-gefecht', 'url': 'http://www.nickelodeon.at/playlist/3773-top-videos/videos/episode/77993-das-letzte-gefecht',
'only_matching': True, 'only_matching': True,
}, {
'url': 'http://www.nick.com.pl/seriale/474-spongebob-kanciastoporty/wideo/17412-teatr-to-jest-to-rodeo-oszolom',
'only_matching': True,
}] }]
def _extract_mrss_url(self, webpage, host): def _extract_mrss_url(self, webpage, host):

View File

@ -0,0 +1,114 @@
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
ExtractorError,
int_or_none,
float_or_none,
)
class RedditIE(InfoExtractor):
_VALID_URL = r'https?://v\.redd\.it/(?P<id>[^/?#&]+)'
_TEST = {
# from https://www.reddit.com/r/videos/comments/6rrwyj/that_small_heart_attack/
'url': 'https://v.redd.it/zv89llsvexdz',
'md5': '655d06ace653ea3b87bccfb1b27ec99d',
'info_dict': {
'id': 'zv89llsvexdz',
'ext': 'mp4',
'title': 'zv89llsvexdz',
},
'params': {
'format': 'bestvideo',
},
}
def _real_extract(self, url):
video_id = self._match_id(url)
formats = self._extract_m3u8_formats(
'https://v.redd.it/%s/HLSPlaylist.m3u8' % video_id, video_id,
'mp4', entry_protocol='m3u8_native', m3u8_id='hls', fatal=False)
formats.extend(self._extract_mpd_formats(
'https://v.redd.it/%s/DASHPlaylist.mpd' % video_id, video_id,
mpd_id='dash', fatal=False))
return {
'id': video_id,
'title': video_id,
'formats': formats,
}
class RedditRIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?reddit\.com/r/[^/]+/comments/(?P<id>[^/]+)'
_TESTS = [{
'url': 'https://www.reddit.com/r/videos/comments/6rrwyj/that_small_heart_attack/',
'info_dict': {
'id': 'zv89llsvexdz',
'ext': 'mp4',
'title': 'That small heart attack.',
'thumbnail': r're:^https?://.*\.jpg$',
'timestamp': 1501941939,
'upload_date': '20170805',
'uploader': 'Antw87',
'like_count': int,
'dislike_count': int,
'comment_count': int,
'age_limit': 0,
},
'params': {
'format': 'bestvideo',
'skip_download': True,
},
}, {
'url': 'https://www.reddit.com/r/videos/comments/6rrwyj',
'only_matching': True,
}, {
# imgur
'url': 'https://www.reddit.com/r/MadeMeSmile/comments/6t7wi5/wait_for_it/',
'only_matching': True,
}, {
# streamable
'url': 'https://www.reddit.com/r/videos/comments/6t7sg9/comedians_hilarious_joke_about_the_guam_flag/',
'only_matching': True,
}, {
# youtube
'url': 'https://www.reddit.com/r/videos/comments/6t75wq/southern_man_tries_to_speak_without_an_accent/',
'only_matching': True,
}]
def _real_extract(self, url):
video_id = self._match_id(url)
data = self._download_json(
url + '.json', video_id)[0]['data']['children'][0]['data']
video_url = data['url']
# Avoid recursing into the same reddit URL
if 'reddit.com/' in video_url and '/%s/' % video_id in video_url:
raise ExtractorError('No media found', expected=True)
over_18 = data.get('over_18')
if over_18 is True:
age_limit = 18
elif over_18 is False:
age_limit = 0
else:
age_limit = None
return {
'_type': 'url_transparent',
'url': video_url,
'title': data.get('title'),
'thumbnail': data.get('thumbnail'),
'timestamp': float_or_none(data.get('created_utc')),
'uploader': data.get('author'),
'like_count': int_or_none(data.get('ups')),
'dislike_count': int_or_none(data.get('downs')),
'comment_count': int_or_none(data.get('num_comments')),
'age_limit': age_limit,
}

View File

@ -1,6 +1,8 @@
# coding: utf-8 # coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
int_or_none, int_or_none,
@ -28,6 +30,12 @@ class VzaarIE(InfoExtractor):
}, },
}] }]
@staticmethod
def _extract_urls(webpage):
return re.findall(
r'<iframe[^>]+src=["\']((?:https?:)?//(?:view\.vzaar\.com)/[0-9]+)',
webpage)
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
video_data = self._download_json( video_data = self._download_json(

View File

@ -39,8 +39,8 @@ class XXXYMoviesIE(InfoExtractor):
r"video_url\s*:\s*'([^']+)'", webpage, 'video URL') r"video_url\s*:\s*'([^']+)'", webpage, 'video URL')
title = self._html_search_regex( title = self._html_search_regex(
[r'<div class="block_header">\s*<h1>([^<]+)</h1>', [r'<div[^>]+\bclass="block_header"[^>]*>\s*<h1>([^<]+)<',
r'<title>(.*?)\s*-\s*XXXYMovies\.com</title>'], r'<title>(.*?)\s*-\s*(?:XXXYMovies\.com|XXX\s+Movies)</title>'],
webpage, 'title') webpage, 'title')
thumbnail = self._search_regex( thumbnail = self._search_regex(

View File

@ -2733,6 +2733,8 @@ def cli_option(params, command_option, param):
def cli_bool_option(params, command_option, param, true_value='true', false_value='false', separator=None): def cli_bool_option(params, command_option, param, true_value='true', false_value='false', separator=None):
param = params.get(param) param = params.get(param)
if param is None:
return []
assert isinstance(param, bool) assert isinstance(param, bool)
if separator: if separator:
return [command_option + separator + (true_value if param else false_value)] return [command_option + separator + (true_value if param else false_value)]

View File

@ -1,3 +1,3 @@
from __future__ import unicode_literals from __future__ import unicode_literals
__version__ = '2017.08.06' __version__ = '2017.08.09'