Merge remote-tracking branch 'upstream/master'

2018-06-01 13:52:50 -05:00 · 2018-06-01 13:52:50 -05:00 · 0654ffd27a
commit 0654ffd27a
parent 0ec6aa0324 19e42ead9b
13 changed files with 131 additions and 25 deletions
--- a/.github/ISSUE_TEMPLATE.md
+++ b/.github/ISSUE_TEMPLATE.md
@ -6,8 +6,8 @@

 ---

-### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2018.05.30*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2018.05.30**
+### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2018.06.02*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
+- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2018.06.02**

 ### Before submitting an *issue* make sure you have:
 - [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
@ -36,7 +36,7 @@ Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl
 [debug] User config: []
 [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
 [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
-[debug] youtube-dl version 2018.05.30
+[debug] youtube-dl version 2018.06.02
 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
 [debug] Proxy map: {}
--- a/.gitignore
+++ b/.gitignore
@ -47,3 +47,4 @@ youtube-dl.zsh
 *.iml

 tmp/
+venv/
--- a/18
+++ b/18
@ -1,3 +1,21 @@
+version 2018.06.02
+
+Core
+* [utils] Improve determine_ext
+
+Extractors
+ [facebook] Add support for tahoe player videos (#15441, #16554)
+* [cbc] Improve extraction (#16583, #16593)
+* [openload] Improve ext extraction (#16595)
+ [twitter:card] Add support for another endpoint (#16586)
+ [openload] Add support for oload.win and oload.download (#16592)
+* [audimedia] Fix extraction (#15309)
+ [francetv] Add support for sport.francetvinfo.fr (#15645)
+* [mlb] Improve extraction (#16587)
+- [nhl] Remove old extractors
+* [rbmaradio] Check formats availability (#16585)
+
+
 version 2018.05.30

 Core
--- a/docs/supportedsites.md
+++ b/docs/supportedsites.md
@ -553,9 +553,6 @@
 - **nfl.com**
 - **NhkVod**
 - **nhl.com**
- - **nhl.com:news**: NHL news
- - **nhl.com:videocenter**
- - **nhl.com:videocenter:category**: NHL videocenter category
 - **nick.com**
 - **nick.de**
 - **nickelodeon:br**
@ -793,6 +790,7 @@
 - **Spiegel**
 - **Spiegel:Article**: Articles on spiegel.de
 - **Spiegeltv**
+ - **sport.francetvinfo.fr**
 - **Sport5**
 - **SportBoxEmbed**
 - **SportDeutschland**
--- a/setup.cfg
+++ b/setup.cfg
@ -2,5 +2,5 @@
 universal = True

 [flake8]
-exclude = youtube_dl/extractor/__init__.py,devscripts/buildserver.py,devscripts/lazy_load_template.py,devscripts/make_issue_template.py,setup.py,build,.git
+exclude = youtube_dl/extractor/__init__.py,devscripts/buildserver.py,devscripts/lazy_load_template.py,devscripts/make_issue_template.py,setup.py,build,.git,venv
 ignore = E402,E501,E731,E741
--- a/test/test_utils.py
+++ b/test/test_utils.py
@ -361,6 +361,7 @@ class TestUtil(unittest.TestCase):
        self.assertEqual(determine_ext('http://example.com/foo/bar.nonext/?download', None), None)
        self.assertEqual(determine_ext('http://example.com/foo/bar/mp4?download', None), None)
        self.assertEqual(determine_ext('http://example.com/foo/bar.m3u8//?download'), 'm3u8')
+        self.assertEqual(determine_ext('foobar', None), None)

    def test_find_xpath_attr(self):
        testxml = '''<root>
--- a/youtube_dl/extractor/cbc.py
+++ b/youtube_dl/extractor/cbc.py
@ -17,6 +17,7 @@ from ..utils import (
    xpath_element,
    xpath_with_ns,
    find_xpath_attr,
+    orderedSet,
    parse_duration,
    parse_iso8601,
    parse_age_limit,
@ -136,9 +137,15 @@ class CBCIE(InfoExtractor):
        entries = [
            self._extract_player_init(player_init, display_id)
            for player_init in re.findall(r'CBC\.APP\.Caffeine\.initInstance\(({.+?})\);', webpage)]
+        media_ids = []
+        for media_id_re in (
+                r'<iframe[^>]+src="[^"]+?mediaId=(\d+)"',
+                r'<div[^>]+\bid=["\']player-(\d+)',
+                r'guid["\']\s*:\s*["\'](\d+)'):
+            media_ids.extend(re.findall(media_id_re, webpage))
        entries.extend([
            self.url_result('cbcplayer:%s' % media_id, 'CBCPlayer', media_id)
-            for media_id in re.findall(r'<iframe[^>]+src="[^"]+?mediaId=(\d+)"', webpage)])
+            for media_id in orderedSet(media_ids)])
        return self.playlist_result(
            entries, display_id, strip_or_none(title),
            self._og_search_description(webpage))
--- a/youtube_dl/extractor/facebook.py
+++ b/youtube_dl/extractor/facebook.py
@ -56,6 +56,7 @@ class FacebookIE(InfoExtractor):
    _CHROME_USER_AGENT = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.2564.97 Safari/537.36'

    _VIDEO_PAGE_TEMPLATE = 'https://www.facebook.com/video/video.php?v=%s'
+    _VIDEO_PAGE_TAHOE_TEMPLATE = 'https://www.facebook.com/video/tahoe/async/%s/?chain=true&isvideo=true'

    _TESTS = [{
        'url': 'https://www.facebook.com/video.php?v=637842556329505&fref=nf',
@ -208,6 +209,17 @@ class FacebookIE(InfoExtractor):
        # no title
        'url': 'https://www.facebook.com/onlycleverentertainment/videos/1947995502095005/',
        'only_matching': True,
+    }, {
+        'url': 'https://www.facebook.com/WatchESLOne/videos/359649331226507/',
+        'info_dict': {
+            'id': '359649331226507',
+            'ext': 'mp4',
+            'title': '#ESLOne VoD - Birmingham Finals Day#1 Fnatic vs. @Evil Geniuses',
+            'uploader': 'ESL One Dota 2',
+        },
+        'params': {
+            'skip_download': True,
+        },
    }]

    @staticmethod
@ -312,16 +324,18 @@ class FacebookIE(InfoExtractor):
        if server_js_data:
            video_data = extract_video_data(server_js_data.get('instances', []))

+        def extract_from_jsmods_instances(js_data):
+            if js_data:
+                return extract_video_data(try_get(
+                    js_data, lambda x: x['jsmods']['instances'], list) or [])
+
        if not video_data:
            server_js_data = self._parse_json(
                self._search_regex(
                    r'bigPipe\.onPageletArrive\(({.+?})\)\s*;\s*}\s*\)\s*,\s*["\']onPageletArrive\s+(?:stream_pagelet|pagelet_group_mall|permalink_video_pagelet)',
                    webpage, 'js data', default='{}'),
                video_id, transform_source=js_to_json, fatal=False)
-            if server_js_data:
-                video_data = extract_video_data(try_get(
-                    server_js_data, lambda x: x['jsmods']['instances'],
-                    list) or [])
+            video_data = extract_from_jsmods_instances(server_js_data)

        if not video_data:
            if not fatal_if_no_video:
@ -333,7 +347,32 @@ class FacebookIE(InfoExtractor):
                    expected=True)
            elif '>You must log in to continue' in webpage:
                self.raise_login_required()
-            else:
+
+            # Video info not in first request, do a secondary request using
+            # tahoe player specific URL
+            tahoe_data = self._download_webpage(
+                self._VIDEO_PAGE_TAHOE_TEMPLATE % video_id, video_id,
+                data=urlencode_postdata({
+                    '__user': 0,
+                    '__a': 1,
+                    '__pc': self._search_regex(
+                        r'pkg_cohort["\']\s*:\s*["\'](.+?)["\']', webpage,
+                        'pkg cohort', default='PHASED:DEFAULT'),
+                    '__rev': self._search_regex(
+                        r'client_revision["\']\s*:\s*(\d+),', webpage,
+                        'client revision', default='3944515'),
+                }),
+                headers={
+                    'Content-Type': 'application/x-www-form-urlencoded',
+                })
+            tahoe_js_data = self._parse_json(
+                self._search_regex(
+                    r'for\s+\(\s*;\s*;\s*\)\s*;(.+)', tahoe_data,
+                    'tahoe js data', default='{}'),
+                video_id, fatal=False)
+            video_data = extract_from_jsmods_instances(tahoe_js_data)
+
+        if not video_data:
            raise ExtractorError('Cannot parse data')

        formats = []
@ -380,7 +419,8 @@ class FacebookIE(InfoExtractor):
            video_title = 'Facebook video #%s' % video_id
        uploader = clean_html(get_element_by_id(
            'fbPhotoPageAuthorName', webpage)) or self._search_regex(
-            r'ownerName\s*:\s*"([^"]+)"', webpage, 'uploader', fatal=False)
+            r'ownerName\s*:\s*"([^"]+)"', webpage, 'uploader',
+            fatal=False) or self._og_search_title(webpage, fatal=False)
        timestamp = int_or_none(self._search_regex(
            r'<abbr[^>]+data-utime=["\'](\d+)', webpage,
            'timestamp', default=None))
--- a/youtube_dl/extractor/ninecninemedia.py
+++ b/youtube_dl/extractor/ninecninemedia.py
@ -4,7 +4,6 @@ from __future__ import unicode_literals
 import re

 from .common import InfoExtractor
-from ..compat import compat_str
 from ..utils import (
    parse_iso8601,
    float_or_none,
--- a/youtube_dl/extractor/openload.py
+++ b/youtube_dl/extractor/openload.py
@ -307,6 +307,10 @@ class OpenloadIE(InfoExtractor):
    }, {
        'url': 'https://oload.download/f/kUEfGclsU9o',
        'only_matching': True,
+    }, {
+        # Its title has not got its extension but url has it
+        'url': 'https://oload.download/f/N4Otkw39VCw/Tomb.Raider.2018.HDRip.XviD.AC3-EVO.avi.mp4',
+        'only_matching': True,
    }]

    _USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36'
@ -368,8 +372,7 @@ class OpenloadIE(InfoExtractor):
            'title': title,
            'thumbnail': entry.get('thumbnail') or self._og_search_thumbnail(webpage, default=None),
            'url': video_url,
-            # Seems all videos have extensions in their titles
-            'ext': determine_ext(title, 'mp4'),
+            'ext': determine_ext(title, None) or determine_ext(url, 'mp4'),
            'subtitles': subtitles,
            'http_headers': headers,
        }
--- a/youtube_dl/extractor/twitter.py
+++ b/youtube_dl/extractor/twitter.py
@ -63,7 +63,7 @@ class TwitterCardIE(TwitterBaseIE):
                'id': '623160978427936768',
                'ext': 'mp4',
                'title': 'Twitter web player',
-                'thumbnail': r're:^https?://.*(?:\bformat=|\.)jpg',
+                'thumbnail': r're:^https?://.*$',
            },
        },
        {
@ -223,14 +223,37 @@ class TwitterCardIE(TwitterBaseIE):
                formats.extend(self._extract_mobile_formats(username, video_id))

            if formats:
-                break
-
-        self._remove_duplicate_formats(formats)
-        self._sort_formats(formats)
-
                title = self._search_regex(r'<title>([^<]+)</title>', webpage, 'title')
                thumbnail = config.get('posterImageUrl') or config.get('image_src')
                duration = float_or_none(config.get('duration'), scale=1000) or duration
+                break
+
+        if not formats:
+            config = self._download_json(
+                'https://api.twitter.com/1.1/videos/tweet/config/%s.json' % video_id,
+                video_id, headers={
+                    'Authorization': 'Bearer AAAAAAAAAAAAAAAAAAAAAIK1zgAAAAAA2tUWuhGZ2JceoId5GwYWU5GspY4%3DUq7gzFoCZs1QfwGoVdvSac3IniczZEYXIcDyumCauIXpcAPorE',
+                })
+            track = config['track']
+            vmap_url = track.get('vmapUrl')
+            if vmap_url:
+                formats = self._extract_formats_from_vmap_url(vmap_url, video_id)
+            else:
+                playback_url = track['playbackUrl']
+                if determine_ext(playback_url) == 'm3u8':
+                    formats = self._extract_m3u8_formats(
+                        playback_url, video_id, 'mp4',
+                        entry_protocol='m3u8_native', m3u8_id='hls')
+                else:
+                    formats = [{
+                        'url': playback_url,
+                    }]
+            title = 'Twitter web player'
+            thumbnail = config.get('posterImage')
+            duration = float_or_none(track.get('durationMs'), scale=1000)
+
+        self._remove_duplicate_formats(formats)
+        self._sort_formats(formats)

        return {
            'id': video_id,
@ -375,6 +398,22 @@ class TwitterIE(InfoExtractor):
        'params': {
            'skip_download': True,  # requires ffmpeg
        },
+    }, {
+        # card via api.twitter.com/1.1/videos/tweet/config
+        'url': 'https://twitter.com/LisPower1/status/1001551623938805763',
+        'info_dict': {
+            'id': '1001551623938805763',
+            'ext': 'mp4',
+            'title': 're:.*?Shep is on a roll today.*?',
+            'thumbnail': r're:^https?://.*\.jpg',
+            'description': 'md5:63b036c228772523ae1924d5f8e5ed6b',
+            'uploader': 'Lis Power',
+            'uploader_id': 'LisPower1',
+            'duration': 111.278,
+        },
+        'params': {
+            'skip_download': True,  # requires ffmpeg
+        },
    }]

    def _real_extract(self, url):
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@ -1228,7 +1228,7 @@ def unified_timestamp(date_str, day_first=True):


 def determine_ext(url, default_ext='unknown_video'):
-    if url is None:
+    if url is None or '.' not in url:
        return default_ext
    guess = url.partition('?')[0].rpartition('.')[2]
    if re.match(r'^[A-Za-z0-9]+$', guess):
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@ -1,3 +1,3 @@
 from __future__ import unicode_literals

-__version__ = '2018.05.30'
+__version__ = '2018.06.02'