Merge branch 'master' of https://github.com/rg3/youtube-dl

2016-07-05 08:29:37 +02:00 · 2016-07-05 08:29:37 +02:00 · f51f4723f3
commit f51f4723f3
parent df152a4dec c6054e3201
11 changed files with 132 additions and 49 deletions
--- a/.github/ISSUE_TEMPLATE.md
+++ b/.github/ISSUE_TEMPLATE.md
@ -6,8 +6,8 @@

 ---

-### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.07.03.1*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.07.03.1**
+### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.07.05*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
+- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.07.05**

 ### Before submitting an *issue* make sure you have:
 - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
@ -35,7 +35,7 @@ $ youtube-dl -v <your command line>
 [debug] User config: []
 [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
 [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
-[debug] youtube-dl version 2016.07.03.1
+[debug] youtube-dl version 2016.07.05
 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
 [debug] Proxy map: {}
--- a/README.md
+++ b/README.md
@ -103,9 +103,9 @@ which means you can modify it, redistribute it or use it however you like.
                                     (experimental)
    -6, --force-ipv6                 Make all connections via IPv6
                                     (experimental)
-    --cn-verification-proxy URL      Use this proxy to verify the IP address for
-                                     some Chinese sites. The default proxy
-                                     specified by --proxy (or none, if the
+    --geo-verification-proxy URL     Use this proxy to verify the IP address for
+                                     some geo-restricted sites. The default
+                                     proxy specified by --proxy (or none, if the
                                     options is not present) is used for the
                                     actual downloading. (experimental)

--- a/test/test_utils.py
+++ b/test/test_utils.py
@ -405,6 +405,12 @@ class TestUtil(unittest.TestCase):
        self.assertEqual(res_url, url)
        self.assertEqual(res_data, None)

+        smug_url = smuggle_url(url, {'a': 'b'})
+        smug_smug_url = smuggle_url(smug_url, {'c': 'd'})
+        res_url, res_data = unsmuggle_url(smug_smug_url)
+        self.assertEqual(res_url, url)
+        self.assertEqual(res_data, {'a': 'b', 'c': 'd'})
+
    def test_shell_quote(self):
        args = ['ffmpeg', '-i', encodeFilename('ñ€ß\'.mp4')]
        self.assertEqual(shell_quote(args), """ffmpeg -i 'ñ€ß'"'"'.mp4'""")
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@ -1066,6 +1066,7 @@ from .youtube import (
    YoutubeSearchDateIE,
    YoutubeSearchIE,
    YoutubeSearchURLIE,
+    YoutubeSharedVideoIE,
    YoutubeShowIE,
    YoutubeSubscriptionsIE,
    YoutubeTruncatedIDIE,
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@ -1295,6 +1295,21 @@ class GenericIE(InfoExtractor):
                'uploader': 'cylus cyrus',
            },
        },
+        {
+            # video stored on custom kaltura server
+            'url': 'http://www.expansion.com/multimedia/videos.html?media=EQcM30NHIPv',
+            'md5': '537617d06e64dfed891fa1593c4b30cc',
+            'info_dict': {
+                'id': '0_1iotm5bh',
+                'ext': 'mp4',
+                'title': 'Elecciones británicas: 5 lecciones para Rajoy',
+                'description': 'md5:435a89d68b9760b92ce67ed227055f16',
+                'uploader_id': 'videos.expansion@el-mundo.net',
+                'upload_date': '20150429',
+                'timestamp': 1430303472,
+            },
+            'add_ie': ['Kaltura'],
+        },
    ]

    def report_following_redirect(self, new_url):
--- a/youtube_dl/extractor/kaltura.py
+++ b/youtube_dl/extractor/kaltura.py
@ -6,7 +6,6 @@ import base64

 from .common import InfoExtractor
 from ..compat import (
-    compat_urllib_parse_urlencode,
    compat_urlparse,
    compat_parse_qs,
 )
@ -15,6 +14,7 @@ from ..utils import (
    ExtractorError,
    int_or_none,
    unsmuggle_url,
+    smuggle_url,
 )


@ -34,7 +34,8 @@ class KalturaIE(InfoExtractor):
                        )(?:/(?P<path>[^?]+))?(?:\?(?P<query>.*))?
                )
                '''
-    _API_BASE = 'http://cdnapi.kaltura.com/api_v3/index.php?'
+    _SERVICE_URL = 'http://cdnapi.kaltura.com'
+    _SERVICE_BASE = '/api_v3/index.php'
    _TESTS = [
        {
            'url': 'kaltura:269692:1_1jc2y3e4',
@ -88,18 +89,26 @@ class KalturaIE(InfoExtractor):
                    (?P<q3>["\'])(?P<id>.+?)(?P=q3)
                ''', webpage))
        if mobj:
-            return 'kaltura:%(partner_id)s:%(id)s' % mobj.groupdict()
+            embed_info = mobj.groupdict()
+            url = 'kaltura:%(partner_id)s:%(id)s' % embed_info
+            escaped_pid = re.escape(embed_info['partner_id'])
+            service_url = re.search(
+                r'<script[^>]+src=["\']((?:https?:)?//.+?)/p/%s/sp/%s00/embedIframeJs' % (escaped_pid, escaped_pid),
+                webpage)
+            if service_url:
+                url = smuggle_url(url, {'service_url': service_url.group(1)})
+            return url

-    def _kaltura_api_call(self, video_id, actions, *args, **kwargs):
+    def _kaltura_api_call(self, video_id, actions, service_url=None, *args, **kwargs):
        params = actions[0]
        if len(actions) > 1:
            for i, a in enumerate(actions[1:], start=1):
                for k, v in a.items():
                    params['%d:%s' % (i, k)] = v

-        query = compat_urllib_parse_urlencode(params)
-        url = self._API_BASE + query
-        data = self._download_json(url, video_id, *args, **kwargs)
+        data = self._download_json(
+            (service_url or self._SERVICE_URL) + self._SERVICE_BASE,
+            video_id, query=params, *args, **kwargs)

        status = data if len(actions) == 1 else data[0]
        if status.get('objectType') == 'KalturaAPIException':
@ -108,7 +117,7 @@ class KalturaIE(InfoExtractor):

        return data

-    def _get_kaltura_signature(self, video_id, partner_id):
+    def _get_kaltura_signature(self, video_id, partner_id, service_url=None):
        actions = [{
            'apiVersion': '3.1',
            'expiry': 86400,
@ -118,10 +127,10 @@ class KalturaIE(InfoExtractor):
            'widgetId': '_%s' % partner_id,
        }]
        return self._kaltura_api_call(
-            video_id, actions, note='Downloading Kaltura signature')['ks']
+            video_id, actions, service_url, note='Downloading Kaltura signature')['ks']

-    def _get_video_info(self, video_id, partner_id):
-        signature = self._get_kaltura_signature(video_id, partner_id)
+    def _get_video_info(self, video_id, partner_id, service_url=None):
+        signature = self._get_kaltura_signature(video_id, partner_id, service_url)
        actions = [
            {
                'action': 'null',
@ -144,7 +153,7 @@ class KalturaIE(InfoExtractor):
            },
        ]
        return self._kaltura_api_call(
-            video_id, actions, note='Downloading video info JSON')
+            video_id, actions, service_url, note='Downloading video info JSON')

    def _real_extract(self, url):
        url, smuggled_data = unsmuggle_url(url, {})
@ -153,7 +162,7 @@ class KalturaIE(InfoExtractor):
        partner_id, entry_id = mobj.group('partner_id', 'id')
        ks = None
        if partner_id and entry_id:
-            info, flavor_assets = self._get_video_info(entry_id, partner_id)
+            info, flavor_assets = self._get_video_info(entry_id, partner_id, smuggled_data.get('service_url'))
        else:
            path, query = mobj.group('path', 'query')
            if not path and not query:
@ -201,12 +210,17 @@ class KalturaIE(InfoExtractor):
                unsigned_url += '?referrer=%s' % referrer
            return unsigned_url

+        data_url = info['dataUrl']
+        if '/flvclipper/' in data_url:
+            data_url = re.sub(r'/flvclipper/.*', '/serveFlavor', data_url)
+
        formats = []
        for f in flavor_assets:
            # Continue if asset is not ready
            if f['status'] != 2:
                continue
-            video_url = sign_url('%s/flavorId/%s' % (info['dataUrl'], f['id']))
+            video_url = sign_url(
+                '%s/flavorId/%s' % (data_url, f['id']))
            formats.append({
                'format_id': '%(fileExt)s-%(bitrate)s' % f,
                'ext': f.get('fileExt'),
@ -219,9 +233,12 @@ class KalturaIE(InfoExtractor):
                'width': int_or_none(f.get('width')),
                'url': video_url,
            })
-        m3u8_url = sign_url(info['dataUrl'].replace('format/url', 'format/applehttp'))
-        formats.extend(self._extract_m3u8_formats(
-            m3u8_url, entry_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False))
+        if '/playManifest/' in data_url:
+            m3u8_url = sign_url(data_url.replace(
+                'format/url', 'format/applehttp'))
+            formats.extend(self._extract_m3u8_formats(
+                m3u8_url, entry_id, 'mp4', 'm3u8_native',
+                m3u8_id='hls', fatal=False))

        self._check_formats(formats, entry_id)
        self._sort_formats(formats)
--- a/youtube_dl/extractor/la7.py
+++ b/youtube_dl/extractor/la7.py
@ -3,8 +3,8 @@ from __future__ import unicode_literals

 from .common import InfoExtractor
 from ..utils import (
-    determine_ext,
    js_to_json,
+    smuggle_url,
 )


@ -18,13 +18,16 @@ class LA7IE(InfoExtractor):
    _TESTS = [{
        # 'src' is a plain URL
        'url': 'http://www.la7.it/crozza/video/inccool8-02-10-2015-163722',
-        'md5': '6054674766e7988d3e02f2148ff92180',
+        'md5': '8b613ffc0c4bf9b9e377169fc19c214c',
        'info_dict': {
            'id': 'inccool8-02-10-2015-163722',
            'ext': 'mp4',
            'title': 'Inc.Cool8',
            'description': 'Benvenuti nell\'incredibile mondo della INC. COOL. 8. dove “INC.” sta per “Incorporated” “COOL” sta per “fashion” ed Eight sta per il gesto  atletico',
            'thumbnail': 're:^https?://.*',
+            'uploader_id': 'kdla7pillole@iltrovatore.it',
+            'timestamp': 1443814869,
+            'upload_date': '20151002',
        },
    }, {
        # 'src' is a dictionary
@ -49,26 +52,14 @@ class LA7IE(InfoExtractor):
            self._search_regex(r'videoLa7\(({[^;]+})\);', webpage, 'player data'),
            video_id, transform_source=js_to_json)

-        source = player_data['src']
-        source_urls = source.values() if isinstance(source, dict) else [source]
-
-        formats = []
-        for source_url in source_urls:
-            ext = determine_ext(source_url)
-            if ext == 'm3u8':
-                formats.extend(self._extract_m3u8_formats(
-                    source_url, video_id, ext='mp4',
-                    entry_protocol='m3u8_native', m3u8_id='hls'))
-            else:
-                formats.append({
-                    'url': source_url,
-                })
-        self._sort_formats(formats)
-
        return {
+            '_type': 'url_transparent',
+            'url': smuggle_url('kaltura:103:%s' % player_data['vid'], {
+                'service_url': 'http://kdam.iltrovatore.it',
+            }),
            'id': video_id,
            'title': player_data['title'],
            'description': self._og_search_description(webpage, default=None),
            'thumbnail': player_data.get('poster'),
-            'formats': formats,
+            'ie_key': 'Kaltura',
        }
--- a/youtube_dl/extractor/xuite.py
+++ b/youtube_dl/extractor/xuite.py
@ -67,6 +67,20 @@ class XuiteIE(InfoExtractor):
            'categories': ['電玩動漫'],
        },
        'skip': 'Video removed',
+    }, {
+        # Video with encoded media id
+        # from http://forgetfulbc.blogspot.com/2016/06/date.html
+        'url': 'http://vlog.xuite.net/embed/cE1xbENoLTI3NDQ3MzM2LmZsdg==?ar=0&as=0',
+        'info_dict': {
+            'id': 'cE1xbENoLTI3NDQ3MzM2LmZsdg==',
+            'ext': 'mp4',
+            'title': '男女平權只是口號？專家解釋約會時男生是否該幫女生付錢 (中字)',
+            'description': 'md5:f0abdcb69df300f522a5442ef3146f2a',
+            'timestamp': 1466160960,
+            'upload_date': '20160617',
+            'uploader': 'B.C. & Lowy',
+            'uploader_id': '232279340',
+        },
    }, {
        'url': 'http://vlog.xuite.net/play/S1dDUjdyLTMyOTc3NjcuZmx2/%E5%AD%AB%E7%87%95%E5%A7%BF-%E7%9C%BC%E6%B7%9A%E6%88%90%E8%A9%A9',
        'only_matching': True,
@ -80,10 +94,9 @@ class XuiteIE(InfoExtractor):
    def base64_encode_utf8(data):
        return base64.b64encode(data.encode('utf-8')).decode('utf-8')

-    def _extract_flv_config(self, media_id):
-        base64_media_id = self.base64_encode_utf8(media_id)
+    def _extract_flv_config(self, encoded_media_id):
        flv_config = self._download_xml(
-            'http://vlog.xuite.net/flash/player?media=%s' % base64_media_id,
+            'http://vlog.xuite.net/flash/player?media=%s' % encoded_media_id,
            'flv config')
        prop_dict = {}
        for prop in flv_config.findall('./property'):
@ -108,9 +121,14 @@ class XuiteIE(InfoExtractor):
                '%s returned error: %s' % (self.IE_NAME, error_msg),
                expected=True)

-        video_id = self._html_search_regex(
-            r'data-mediaid="(\d+)"', webpage, 'media id')
-        flv_config = self._extract_flv_config(video_id)
+        encoded_media_id = self._search_regex(
+            r'attributes\.name\s*=\s*"([^"]+)"', webpage,
+            'encoded media id', default=None)
+        if encoded_media_id is None:
+            video_id = self._html_search_regex(
+                r'data-mediaid="(\d+)"', webpage, 'media id')
+            encoded_media_id = self.base64_encode_utf8(video_id)
+        flv_config = self._extract_flv_config(encoded_media_id)

        FORMATS = {
            'audio': 'mp3',
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@ -1730,6 +1730,39 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
        }


+class YoutubeSharedVideoIE(InfoExtractor):
+    _VALID_URL = r'(?:https?:)?//(?:www\.)?youtube\.com/shared\?ci=(?P<id>[0-9A-Za-z_-]{11})'
+    IE_NAME = 'youtube:shared'
+
+    _TEST = {
+        'url': 'https://www.youtube.com/shared?ci=1nEzmT-M4fU',
+        'info_dict': {
+            'id': 'uPDB5I9wfp8',
+            'ext': 'webm',
+            'title': 'Pocoyo: 90 minutos de episódios completos Português para crianças - PARTE 3',
+            'description': 'md5:d9e4d9346a2dfff4c7dc4c8cec0f546d',
+            'upload_date': '20160219',
+            'uploader': 'Pocoyo - Português (BR)',
+            'uploader_id': 'PocoyoBrazil',
+        },
+        'add_ie': ['Youtube'],
+        'params': {
+            # There are already too many Youtube downloads
+            'skip_download': True,
+        },
+    }
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, video_id)
+
+        real_video_id = self._html_search_meta(
+            'videoId', webpage, 'YouTube video id', fatal=True)
+
+        return self.url_result(real_video_id, YoutubeIE.ie_key())
+
+
 class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
    IE_DESC = 'YouTube.com playlists'
    _VALID_URL = r"""(?x)(?:
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@ -1444,6 +1444,8 @@ def shell_quote(args):
 def smuggle_url(url, data):
    """ Pass additional data in a URL for internal use. """

+    url, idata = unsmuggle_url(url, {})
+    data.update(idata)
    sdata = compat_urllib_parse_urlencode(
        {'__youtubedl_smuggle': json.dumps(data)})
    return url + '#' + sdata
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@ -1,3 +1,3 @@
 from __future__ import unicode_literals

-__version__ = '2016.07.03.1'
+__version__ = '2016.07.05'