Merge branch 'master' into fix.25.12.2018

2019-07-01 21:59:40 +03:00 · 2019-07-01 21:59:40 +03:00 · 116be746cd
commit 116be746cd
parent 8a0742c2a0 ed2226b0eb
7 changed files with 51 additions and 15 deletions
--- a/youtube_dl/extractor/acast.py
+++ b/youtube_dl/extractor/acast.py
@ -7,6 +7,7 @@ import functools
 from .common import InfoExtractor
 from ..compat import compat_str
 from ..utils import (
+    clean_html,
    float_or_none,
    int_or_none,
    try_get,
@ -27,7 +28,7 @@ class ACastIE(InfoExtractor):
                    '''
    _TESTS = [{
        'url': 'https://www.acast.com/sparpodcast/2.raggarmordet-rosterurdetforflutna',
-        'md5': 'a02393c74f3bdb1801c3ec2695577ce0',
+        'md5': '16d936099ec5ca2d5869e3a813ee8dc4',
        'info_dict': {
            'id': '2a92b283-1a75-4ad8-8396-499c641de0d9',
            'ext': 'mp3',
@ -46,28 +47,37 @@ class ACastIE(InfoExtractor):
    }, {
        'url': 'https://play.acast.com/s/rattegangspodden/s04e09-styckmordet-i-helenelund-del-22',
        'only_matching': True,
+    }, {
+        'url': 'https://play.acast.com/s/sparpodcast/2a92b283-1a75-4ad8-8396-499c641de0d9',
+        'only_matching': True,
    }]

    def _real_extract(self, url):
        channel, display_id = re.match(self._VALID_URL, url).groups()
        s = self._download_json(
-            'https://play-api.acast.com/stitch/%s/%s' % (channel, display_id),
-            display_id)['result']
+            'https://feeder.acast.com/api/v1/shows/%s/episodes/%s' % (channel, display_id),
+            display_id)
        media_url = s['url']
+        if re.search(r'[0-9a-f]{8}-(?:[0-9a-f]{4}-){3}[0-9a-f]{12}', display_id):
+            episode_url = s.get('episodeUrl')
+            if episode_url:
+                display_id = episode_url
+            else:
+                channel, display_id = re.match(self._VALID_URL, s['link']).groups()
        cast_data = self._download_json(
            'https://play-api.acast.com/splash/%s/%s' % (channel, display_id),
            display_id)['result']
        e = cast_data['episode']
-        title = e['name']
+        title = e.get('name') or s['title']
        return {
            'id': compat_str(e['id']),
            'display_id': display_id,
            'url': media_url,
            'title': title,
-            'description': e.get('description') or e.get('summary'),
+            'description': e.get('summary') or clean_html(e.get('description') or s.get('description')),
            'thumbnail': e.get('image'),
-            'timestamp': unified_timestamp(e.get('publishingDate')),
-            'duration': float_or_none(s.get('duration') or e.get('duration')),
+            'timestamp': unified_timestamp(e.get('publishingDate') or s.get('publishDate')),
+            'duration': float_or_none(e.get('duration') or s.get('duration')),
            'filesize': int_or_none(e.get('contentLength')),
            'creator': try_get(cast_data, lambda x: x['show']['author'], compat_str),
            'series': try_get(cast_data, lambda x: x['show']['name'], compat_str),
--- a/youtube_dl/extractor/dailymotion.py
+++ b/youtube_dl/extractor/dailymotion.py
@ -137,10 +137,16 @@ class DailymotionIE(DailymotionBaseInfoExtractor):

    @staticmethod
    def _extract_urls(webpage):
+        urls = []
        # Look for embedded Dailymotion player
-        matches = re.findall(
-            r'<(?:(?:embed|iframe)[^>]+?src=|input[^>]+id=[\'"]dmcloudUrlEmissionSelect[\'"][^>]+value=)(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.com/(?:embed|swf)/video/.+?)\1', webpage)
-        return list(map(lambda m: unescapeHTML(m[1]), matches))
+        # https://developer.dailymotion.com/player#player-parameters
+        for mobj in re.finditer(
+                r'<(?:(?:embed|iframe)[^>]+?src=|input[^>]+id=[\'"]dmcloudUrlEmissionSelect[\'"][^>]+value=)(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.com/(?:embed|swf)/video/.+?)\1', webpage):
+            urls.append(unescapeHTML(mobj.group('url')))
+        for mobj in re.finditer(
+                r'(?s)DM\.player\([^,]+,\s*{.*?video[\'"]?\s*:\s*["\']?(?P<id>[0-9a-zA-Z]+).+?}\s*\);', webpage):
+            urls.append('https://www.dailymotion.com/embed/video/' + mobj.group('id'))
+        return urls

    def _real_extract(self, url):
        video_id = self._match_id(url)
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@ -2104,6 +2104,23 @@ class GenericIE(InfoExtractor):
            },
            'expected_warnings': ['Failed to download MPD manifest'],
        },
+        {
+            # DailyMotion embed with DM.player
+            'url': 'https://www.beinsports.com/us/copa-del-rey/video/the-locker-room-valencia-beat-barca-in-copa/1203804',
+            'info_dict': {
+                'id': 'k6aKkGHd9FJs4mtJN39',
+                'ext': 'mp4',
+                'title': 'The Locker Room: Valencia Beat Barca In Copa del Rey Final',
+                'description': 'This video is private.',
+                'uploader_id': 'x1jf30l',
+                'uploader': 'beIN SPORTS USA',
+                'upload_date': '20190528',
+                'timestamp': 1559062971,
+            },
+            'params': {
+                'skip_download': True,
+            },
+        },
        # {
        #     # TODO: find another test
        #     # http://schema.org/VideoObject
--- a/youtube_dl/extractor/openload.py
+++ b/youtube_dl/extractor/openload.py
@ -243,7 +243,7 @@ class PhantomJSwrapper(object):


 class OpenloadIE(InfoExtractor):
-    _DOMAINS = r'(?:openload\.(?:co|io|link|pw)|oload\.(?:tv|stream|site|xyz|win|download|cloud|cc|icu|fun|club|info|press|pw|life|live|space|services|website)|oladblock\.(?:services|xyz|me)|openloed\.co)'
+    _DOMAINS = r'(?:openload\.(?:co|io|link|pw)|oload\.(?:tv|biz|stream|site|xyz|win|download|cloud|cc|icu|fun|club|info|press|pw|life|live|space|services|website)|oladblock\.(?:services|xyz|me)|openloed\.co)'
    _VALID_URL = r'''(?x)
                    https?://
                        (?P<host>
@ -365,6 +365,9 @@ class OpenloadIE(InfoExtractor):
    }, {
        'url': 'https://oload.life/embed/oOzZjNPw9Dc/',
        'only_matching': True,
+    }, {
+        'url': 'https://oload.biz/f/bEk3Gp8ARr4/',
+        'only_matching': True,
    }, {
        'url': 'https://oladblock.services/f/b8NWEgkqNLI/',
        'only_matching': True,
--- a/youtube_dl/extractor/twitter.py
+++ b/youtube_dl/extractor/twitter.py
@ -2,7 +2,7 @@
 from __future__ import unicode_literals

 import re
-
+import time
 from .common import InfoExtractor
 from ..compat import compat_urlparse
 from ..utils import (
@ -247,7 +247,7 @@ class TwitterCardIE(TwitterBaseIE):
                    '%s/guest/activate.json' % self._API_BASE, video_id,
                    'Downloading guest token', data=b'',
                    headers=headers)['guest_token']
-                self._set_cookie('api.twitter.com', 'gt', guest_token)
+                self._set_cookie('api.twitter.com', 'gt', guest_token, expire_time=time.time() + 3000)
            else:
                guest_token = guest_token_c.value

--- a/youtube_dl/extractor/xiami.py
+++ b/youtube_dl/extractor/xiami.py
@ -7,7 +7,7 @@ from ..utils import int_or_none


 class XiamiBaseIE(InfoExtractor):
-    _API_BASE_URL = 'http://www.xiami.com/song/playlist/cat/json/id'
+    _API_BASE_URL = 'https://emumo.xiami.com/song/playlist/cat/json/id'

    def _download_webpage_handle(self, *args, **kwargs):
        webpage = super(XiamiBaseIE, self)._download_webpage_handle(*args, **kwargs)
--- a/youtube_dl/extractor/yourporn.py
+++ b/youtube_dl/extractor/yourporn.py
@ -37,7 +37,7 @@ class YourPornIE(InfoExtractor):
            self._search_regex(
                r'data-vnfo=(["\'])(?P<data>{.+?})\1', webpage, 'data info',
                group='data'),
-            video_id)[video_id]).replace('/cdn/', '/cdn4/')
+            video_id)[video_id]).replace('/cdn/', '/cdn5/')

        title = (self._search_regex(
            r'<[^>]+\bclass=["\']PostEditTA[^>]+>([^<]+)', webpage, 'title',