Merge remote-tracking branch 'upstream/master'

This commit is contained in:
Ryan Hayward 2018-06-01 13:52:50 -05:00
commit 0654ffd27a
13 changed files with 131 additions and 25 deletions

View File

@ -6,8 +6,8 @@
--- ---
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2018.05.30*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. ### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2018.06.02*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2018.05.30** - [ ] I've **verified** and **I assure** that I'm running youtube-dl **2018.06.02**
### Before submitting an *issue* make sure you have: ### Before submitting an *issue* make sure you have:
- [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections - [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
@ -36,7 +36,7 @@ Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl
[debug] User config: [] [debug] User config: []
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
[debug] youtube-dl version 2018.05.30 [debug] youtube-dl version 2018.06.02
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
[debug] Proxy map: {} [debug] Proxy map: {}

1
.gitignore vendored
View File

@ -47,3 +47,4 @@ youtube-dl.zsh
*.iml *.iml
tmp/ tmp/
venv/

View File

@ -1,3 +1,21 @@
version 2018.06.02
Core
* [utils] Improve determine_ext
Extractors
+ [facebook] Add support for tahoe player videos (#15441, #16554)
* [cbc] Improve extraction (#16583, #16593)
* [openload] Improve ext extraction (#16595)
+ [twitter:card] Add support for another endpoint (#16586)
+ [openload] Add support for oload.win and oload.download (#16592)
* [audimedia] Fix extraction (#15309)
+ [francetv] Add support for sport.francetvinfo.fr (#15645)
* [mlb] Improve extraction (#16587)
- [nhl] Remove old extractors
* [rbmaradio] Check formats availability (#16585)
version 2018.05.30 version 2018.05.30
Core Core

View File

@ -553,9 +553,6 @@
- **nfl.com** - **nfl.com**
- **NhkVod** - **NhkVod**
- **nhl.com** - **nhl.com**
- **nhl.com:news**: NHL news
- **nhl.com:videocenter**
- **nhl.com:videocenter:category**: NHL videocenter category
- **nick.com** - **nick.com**
- **nick.de** - **nick.de**
- **nickelodeon:br** - **nickelodeon:br**
@ -793,6 +790,7 @@
- **Spiegel** - **Spiegel**
- **Spiegel:Article**: Articles on spiegel.de - **Spiegel:Article**: Articles on spiegel.de
- **Spiegeltv** - **Spiegeltv**
- **sport.francetvinfo.fr**
- **Sport5** - **Sport5**
- **SportBoxEmbed** - **SportBoxEmbed**
- **SportDeutschland** - **SportDeutschland**

View File

@ -2,5 +2,5 @@
universal = True universal = True
[flake8] [flake8]
exclude = youtube_dl/extractor/__init__.py,devscripts/buildserver.py,devscripts/lazy_load_template.py,devscripts/make_issue_template.py,setup.py,build,.git exclude = youtube_dl/extractor/__init__.py,devscripts/buildserver.py,devscripts/lazy_load_template.py,devscripts/make_issue_template.py,setup.py,build,.git,venv
ignore = E402,E501,E731,E741 ignore = E402,E501,E731,E741

View File

@ -361,6 +361,7 @@ class TestUtil(unittest.TestCase):
self.assertEqual(determine_ext('http://example.com/foo/bar.nonext/?download', None), None) self.assertEqual(determine_ext('http://example.com/foo/bar.nonext/?download', None), None)
self.assertEqual(determine_ext('http://example.com/foo/bar/mp4?download', None), None) self.assertEqual(determine_ext('http://example.com/foo/bar/mp4?download', None), None)
self.assertEqual(determine_ext('http://example.com/foo/bar.m3u8//?download'), 'm3u8') self.assertEqual(determine_ext('http://example.com/foo/bar.m3u8//?download'), 'm3u8')
self.assertEqual(determine_ext('foobar', None), None)
def test_find_xpath_attr(self): def test_find_xpath_attr(self):
testxml = '''<root> testxml = '''<root>

View File

@ -17,6 +17,7 @@ from ..utils import (
xpath_element, xpath_element,
xpath_with_ns, xpath_with_ns,
find_xpath_attr, find_xpath_attr,
orderedSet,
parse_duration, parse_duration,
parse_iso8601, parse_iso8601,
parse_age_limit, parse_age_limit,
@ -136,9 +137,15 @@ class CBCIE(InfoExtractor):
entries = [ entries = [
self._extract_player_init(player_init, display_id) self._extract_player_init(player_init, display_id)
for player_init in re.findall(r'CBC\.APP\.Caffeine\.initInstance\(({.+?})\);', webpage)] for player_init in re.findall(r'CBC\.APP\.Caffeine\.initInstance\(({.+?})\);', webpage)]
media_ids = []
for media_id_re in (
r'<iframe[^>]+src="[^"]+?mediaId=(\d+)"',
r'<div[^>]+\bid=["\']player-(\d+)',
r'guid["\']\s*:\s*["\'](\d+)'):
media_ids.extend(re.findall(media_id_re, webpage))
entries.extend([ entries.extend([
self.url_result('cbcplayer:%s' % media_id, 'CBCPlayer', media_id) self.url_result('cbcplayer:%s' % media_id, 'CBCPlayer', media_id)
for media_id in re.findall(r'<iframe[^>]+src="[^"]+?mediaId=(\d+)"', webpage)]) for media_id in orderedSet(media_ids)])
return self.playlist_result( return self.playlist_result(
entries, display_id, strip_or_none(title), entries, display_id, strip_or_none(title),
self._og_search_description(webpage)) self._og_search_description(webpage))

View File

@ -56,6 +56,7 @@ class FacebookIE(InfoExtractor):
_CHROME_USER_AGENT = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.2564.97 Safari/537.36' _CHROME_USER_AGENT = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.2564.97 Safari/537.36'
_VIDEO_PAGE_TEMPLATE = 'https://www.facebook.com/video/video.php?v=%s' _VIDEO_PAGE_TEMPLATE = 'https://www.facebook.com/video/video.php?v=%s'
_VIDEO_PAGE_TAHOE_TEMPLATE = 'https://www.facebook.com/video/tahoe/async/%s/?chain=true&isvideo=true'
_TESTS = [{ _TESTS = [{
'url': 'https://www.facebook.com/video.php?v=637842556329505&fref=nf', 'url': 'https://www.facebook.com/video.php?v=637842556329505&fref=nf',
@ -208,6 +209,17 @@ class FacebookIE(InfoExtractor):
# no title # no title
'url': 'https://www.facebook.com/onlycleverentertainment/videos/1947995502095005/', 'url': 'https://www.facebook.com/onlycleverentertainment/videos/1947995502095005/',
'only_matching': True, 'only_matching': True,
}, {
'url': 'https://www.facebook.com/WatchESLOne/videos/359649331226507/',
'info_dict': {
'id': '359649331226507',
'ext': 'mp4',
'title': '#ESLOne VoD - Birmingham Finals Day#1 Fnatic vs. @Evil Geniuses',
'uploader': 'ESL One Dota 2',
},
'params': {
'skip_download': True,
},
}] }]
@staticmethod @staticmethod
@ -312,16 +324,18 @@ class FacebookIE(InfoExtractor):
if server_js_data: if server_js_data:
video_data = extract_video_data(server_js_data.get('instances', [])) video_data = extract_video_data(server_js_data.get('instances', []))
def extract_from_jsmods_instances(js_data):
if js_data:
return extract_video_data(try_get(
js_data, lambda x: x['jsmods']['instances'], list) or [])
if not video_data: if not video_data:
server_js_data = self._parse_json( server_js_data = self._parse_json(
self._search_regex( self._search_regex(
r'bigPipe\.onPageletArrive\(({.+?})\)\s*;\s*}\s*\)\s*,\s*["\']onPageletArrive\s+(?:stream_pagelet|pagelet_group_mall|permalink_video_pagelet)', r'bigPipe\.onPageletArrive\(({.+?})\)\s*;\s*}\s*\)\s*,\s*["\']onPageletArrive\s+(?:stream_pagelet|pagelet_group_mall|permalink_video_pagelet)',
webpage, 'js data', default='{}'), webpage, 'js data', default='{}'),
video_id, transform_source=js_to_json, fatal=False) video_id, transform_source=js_to_json, fatal=False)
if server_js_data: video_data = extract_from_jsmods_instances(server_js_data)
video_data = extract_video_data(try_get(
server_js_data, lambda x: x['jsmods']['instances'],
list) or [])
if not video_data: if not video_data:
if not fatal_if_no_video: if not fatal_if_no_video:
@ -333,7 +347,32 @@ class FacebookIE(InfoExtractor):
expected=True) expected=True)
elif '>You must log in to continue' in webpage: elif '>You must log in to continue' in webpage:
self.raise_login_required() self.raise_login_required()
else:
# Video info not in first request, do a secondary request using
# tahoe player specific URL
tahoe_data = self._download_webpage(
self._VIDEO_PAGE_TAHOE_TEMPLATE % video_id, video_id,
data=urlencode_postdata({
'__user': 0,
'__a': 1,
'__pc': self._search_regex(
r'pkg_cohort["\']\s*:\s*["\'](.+?)["\']', webpage,
'pkg cohort', default='PHASED:DEFAULT'),
'__rev': self._search_regex(
r'client_revision["\']\s*:\s*(\d+),', webpage,
'client revision', default='3944515'),
}),
headers={
'Content-Type': 'application/x-www-form-urlencoded',
})
tahoe_js_data = self._parse_json(
self._search_regex(
r'for\s+\(\s*;\s*;\s*\)\s*;(.+)', tahoe_data,
'tahoe js data', default='{}'),
video_id, fatal=False)
video_data = extract_from_jsmods_instances(tahoe_js_data)
if not video_data:
raise ExtractorError('Cannot parse data') raise ExtractorError('Cannot parse data')
formats = [] formats = []
@ -380,7 +419,8 @@ class FacebookIE(InfoExtractor):
video_title = 'Facebook video #%s' % video_id video_title = 'Facebook video #%s' % video_id
uploader = clean_html(get_element_by_id( uploader = clean_html(get_element_by_id(
'fbPhotoPageAuthorName', webpage)) or self._search_regex( 'fbPhotoPageAuthorName', webpage)) or self._search_regex(
r'ownerName\s*:\s*"([^"]+)"', webpage, 'uploader', fatal=False) r'ownerName\s*:\s*"([^"]+)"', webpage, 'uploader',
fatal=False) or self._og_search_title(webpage, fatal=False)
timestamp = int_or_none(self._search_regex( timestamp = int_or_none(self._search_regex(
r'<abbr[^>]+data-utime=["\'](\d+)', webpage, r'<abbr[^>]+data-utime=["\'](\d+)', webpage,
'timestamp', default=None)) 'timestamp', default=None))

View File

@ -4,7 +4,6 @@ from __future__ import unicode_literals
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_str
from ..utils import ( from ..utils import (
parse_iso8601, parse_iso8601,
float_or_none, float_or_none,

View File

@ -307,6 +307,10 @@ class OpenloadIE(InfoExtractor):
}, { }, {
'url': 'https://oload.download/f/kUEfGclsU9o', 'url': 'https://oload.download/f/kUEfGclsU9o',
'only_matching': True, 'only_matching': True,
}, {
# Its title has not got its extension but url has it
'url': 'https://oload.download/f/N4Otkw39VCw/Tomb.Raider.2018.HDRip.XviD.AC3-EVO.avi.mp4',
'only_matching': True,
}] }]
_USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36' _USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36'
@ -368,8 +372,7 @@ class OpenloadIE(InfoExtractor):
'title': title, 'title': title,
'thumbnail': entry.get('thumbnail') or self._og_search_thumbnail(webpage, default=None), 'thumbnail': entry.get('thumbnail') or self._og_search_thumbnail(webpage, default=None),
'url': video_url, 'url': video_url,
# Seems all videos have extensions in their titles 'ext': determine_ext(title, None) or determine_ext(url, 'mp4'),
'ext': determine_ext(title, 'mp4'),
'subtitles': subtitles, 'subtitles': subtitles,
'http_headers': headers, 'http_headers': headers,
} }

View File

@ -63,7 +63,7 @@ class TwitterCardIE(TwitterBaseIE):
'id': '623160978427936768', 'id': '623160978427936768',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Twitter web player', 'title': 'Twitter web player',
'thumbnail': r're:^https?://.*(?:\bformat=|\.)jpg', 'thumbnail': r're:^https?://.*$',
}, },
}, },
{ {
@ -223,14 +223,37 @@ class TwitterCardIE(TwitterBaseIE):
formats.extend(self._extract_mobile_formats(username, video_id)) formats.extend(self._extract_mobile_formats(username, video_id))
if formats: if formats:
break
self._remove_duplicate_formats(formats)
self._sort_formats(formats)
title = self._search_regex(r'<title>([^<]+)</title>', webpage, 'title') title = self._search_regex(r'<title>([^<]+)</title>', webpage, 'title')
thumbnail = config.get('posterImageUrl') or config.get('image_src') thumbnail = config.get('posterImageUrl') or config.get('image_src')
duration = float_or_none(config.get('duration'), scale=1000) or duration duration = float_or_none(config.get('duration'), scale=1000) or duration
break
if not formats:
config = self._download_json(
'https://api.twitter.com/1.1/videos/tweet/config/%s.json' % video_id,
video_id, headers={
'Authorization': 'Bearer AAAAAAAAAAAAAAAAAAAAAIK1zgAAAAAA2tUWuhGZ2JceoId5GwYWU5GspY4%3DUq7gzFoCZs1QfwGoVdvSac3IniczZEYXIcDyumCauIXpcAPorE',
})
track = config['track']
vmap_url = track.get('vmapUrl')
if vmap_url:
formats = self._extract_formats_from_vmap_url(vmap_url, video_id)
else:
playback_url = track['playbackUrl']
if determine_ext(playback_url) == 'm3u8':
formats = self._extract_m3u8_formats(
playback_url, video_id, 'mp4',
entry_protocol='m3u8_native', m3u8_id='hls')
else:
formats = [{
'url': playback_url,
}]
title = 'Twitter web player'
thumbnail = config.get('posterImage')
duration = float_or_none(track.get('durationMs'), scale=1000)
self._remove_duplicate_formats(formats)
self._sort_formats(formats)
return { return {
'id': video_id, 'id': video_id,
@ -375,6 +398,22 @@ class TwitterIE(InfoExtractor):
'params': { 'params': {
'skip_download': True, # requires ffmpeg 'skip_download': True, # requires ffmpeg
}, },
}, {
# card via api.twitter.com/1.1/videos/tweet/config
'url': 'https://twitter.com/LisPower1/status/1001551623938805763',
'info_dict': {
'id': '1001551623938805763',
'ext': 'mp4',
'title': 're:.*?Shep is on a roll today.*?',
'thumbnail': r're:^https?://.*\.jpg',
'description': 'md5:63b036c228772523ae1924d5f8e5ed6b',
'uploader': 'Lis Power',
'uploader_id': 'LisPower1',
'duration': 111.278,
},
'params': {
'skip_download': True, # requires ffmpeg
},
}] }]
def _real_extract(self, url): def _real_extract(self, url):

View File

@ -1228,7 +1228,7 @@ def unified_timestamp(date_str, day_first=True):
def determine_ext(url, default_ext='unknown_video'): def determine_ext(url, default_ext='unknown_video'):
if url is None: if url is None or '.' not in url:
return default_ext return default_ext
guess = url.partition('?')[0].rpartition('.')[2] guess = url.partition('?')[0].rpartition('.')[2]
if re.match(r'^[A-Za-z0-9]+$', guess): if re.match(r'^[A-Za-z0-9]+$', guess):

View File

@ -1,3 +1,3 @@
from __future__ import unicode_literals from __future__ import unicode_literals
__version__ = '2018.05.30' __version__ = '2018.06.02'