Merge pull request #1 from rg3/master

Updating fork to match rg3
2017-08-12 13:50:45 -07:00 · 2017-08-12 13:50:45 -07:00 · 2bb34f0ad2
commit 2bb34f0ad2
parent 4bf22f7a10 eb02940cc7
16 changed files with 251 additions and 49 deletions
--- a/.github/ISSUE_TEMPLATE.md
+++ b/.github/ISSUE_TEMPLATE.md
@ -6,8 +6,8 @@
 ---
-### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.08.06*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
+### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.08.09*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.08.06**
+- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.08.09**
 ### Before submitting an *issue* make sure you have:
 - [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
@ -35,7 +35,7 @@ Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl
 [debug] User config: []
 [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
 [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
-[debug] youtube-dl version 2017.08.06
+[debug] youtube-dl version 2017.08.09
 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
 [debug] Proxy map: {}
--- a/10
+++ b/10
@ -1,6 +1,14 @@
-version <unreleased>
+version 2017.08.09
 Core
 * [utils] Skip missing params in cli_bool_option (#13865)
 Extractors
 * [xxxymovies] Fix title extraction (#13868)
 + [nick] Add support for nick.com.pl (#13860)
 * [mixcloud] Fix play info decryption (#13867)
 * [20min] Fix embeds extraction (#13852)
 * [dplayit] Fix extraction (#13851)
 + [niconico] Support videos with multiple formats (#13522)
 + [niconico] Support HTML5-only videos (#13806)
--- a/test/test_utils.py
+++ b/test/test_utils.py
@ -1182,6 +1182,10 @@ part 3</font></u>
            cli_bool_option(
                {'nocheckcertificate': False}, '--check-certificate', 'nocheckcertificate', 'false', 'true', '='),
            ['--check-certificate=true'])
        self.assertEqual(
            cli_bool_option(
                {}, '--check-certificate', 'nocheckcertificate', 'false', 'true', '='),
            [])
    def test_ohdave_rsa_encrypt(self):
        N = 0xab86b6371b5318aaa1d3c9e612a9f1264f372323c8c0f19875b5fc3b3fd3afcc1e5bec527aa94bfa85bffc157e4245aebda05389a5357b75115ac94f074aefcd
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@ -1500,7 +1500,7 @@ class YoutubeDL(object):
            sanitize_string_field(format, 'format_id')
            sanitize_numeric_fields(format)
            format['url'] = sanitize_url(format['url'])
-            if format.get('format_id') is None:
+            if not format.get('format_id'):
                format['format_id'] = compat_str(i)
            else:
                # Sanitize format_id from characters used in format selector expression
--- a/youtube_dl/extractor/aparat.py
+++ b/youtube_dl/extractor/aparat.py
@ -3,13 +3,13 @@ from __future__ import unicode_literals
 from .common import InfoExtractor
 from ..utils import (
-    ExtractorError,
+    int_or_none,
-    HEADRequest,
+    mimetype2ext,
 )
 class AparatIE(InfoExtractor):
-    _VALID_URL = r'^https?://(?:www\.)?aparat\.com/(?:v/|video/video/embed/videohash/)(?P<id>[a-zA-Z0-9]+)'
+    _VALID_URL = r'https?://(?:www\.)?aparat\.com/(?:v/|video/video/embed/videohash/)(?P<id>[a-zA-Z0-9]+)'
    _TEST = {
        'url': 'http://www.aparat.com/v/wP8On',
@ -29,30 +29,41 @@ class AparatIE(InfoExtractor):
        # Note: There is an easier-to-parse configuration at
        # http://www.aparat.com/video/video/config/videohash/%video_id
        # but the URL in there does not work
-        embed_url = 'http://www.aparat.com/video/video/embed/vt/frame/showvideo/yes/videohash/' + video_id
+        webpage = self._download_webpage(
-        webpage = self._download_webpage(embed_url, video_id)
+            'http://www.aparat.com/video/video/embed/vt/frame/showvideo/yes/videohash/' + video_id,
-
+            video_id)
        file_list = self._parse_json(self._search_regex(
            r'fileList\s*=\s*JSON\.parse\(\'([^\']+)\'\)', webpage, 'file list'), video_id)
        for i, item in enumerate(file_list[0]):
            video_url = item['file']
            req = HEADRequest(video_url)
            res = self._request_webpage(
                req, video_id, note='Testing video URL %d' % i, errnote=False)
            if res:
                break
        else:
            raise ExtractorError('No working video URLs found')
        title = self._search_regex(r'\s+title:\s*"([^"]+)"', webpage, 'title')
        file_list = self._parse_json(
            self._search_regex(
                r'fileList\s*=\s*JSON\.parse\(\'([^\']+)\'\)', webpage,
                'file list'),
            video_id)
        formats = []
        for item in file_list[0]:
            file_url = item.get('file')
            if not file_url:
                continue
            ext = mimetype2ext(item.get('type'))
            label = item.get('label')
            formats.append({
                'url': file_url,
                'ext': ext,
                'format_id': label or ext,
                'height': int_or_none(self._search_regex(
                    r'(\d+)[pP]', label or '', 'height', default=None)),
            })
        self._sort_formats(formats)
        thumbnail = self._search_regex(
            r'image:\s*"([^"]+)"', webpage, 'thumbnail', fatal=False)
        return {
            'id': video_id,
            'title': title,
            'url': video_url,
            'ext': 'mp4',
            'thumbnail': thumbnail,
            'age_limit': self._family_friendly_search(webpage),
            'formats': formats,
        }
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@ -940,7 +940,8 @@ class InfoExtractor(object):
    def _family_friendly_search(self, html):
        # See http://schema.org/VideoObject
-        family_friendly = self._html_search_meta('isFamilyFriendly', html)
+        family_friendly = self._html_search_meta(
            'isFamilyFriendly', html, default=None)
        if not family_friendly:
            return None
@ -2114,9 +2115,9 @@ class InfoExtractor(object):
                return f
            return {}
-        def _media_formats(src, cur_media_type):
+        def _media_formats(src, cur_media_type, type_info={}):
            full_url = absolute_url(src)
-            ext = determine_ext(full_url)
+            ext = type_info.get('ext') or determine_ext(full_url)
            if ext == 'm3u8':
                is_plain_url = False
                formats = self._extract_m3u8_formats(
@ -2165,9 +2166,9 @@ class InfoExtractor(object):
                    src = source_attributes.get('src')
                    if not src:
                        continue
-                    is_plain_url, formats = _media_formats(src, media_type)
+                    f = parse_content_type(source_attributes.get('type'))
                    is_plain_url, formats = _media_formats(src, media_type, f)
                    if is_plain_url:
                        f = parse_content_type(source_attributes.get('type'))
                        f.update(formats[0])
                        media_info['formats'].append(f)
                    else:
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@ -840,6 +840,10 @@ from .rai import (
 from .rbmaradio import RBMARadioIE
 from .rds import RDSIE
 from .redbulltv import RedBullTVIE
 from .reddit import (
    RedditIE,
    RedditRIE,
 )
 from .redtube import RedTubeIE
 from .regiotv import RegioTVIE
 from .rentv import (
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@ -98,6 +98,7 @@ from .wistia import WistiaIE
 from .mediaset import MediasetIE
 from .joj import JojIE
 from .megaphone import MegaphoneIE
 from .vzaar import VzaarIE
 class GenericIE(InfoExtractor):
@ -1784,6 +1785,21 @@ class GenericIE(InfoExtractor):
            },
            'playlist_mincount': 5,
        },
        {
            # Limelight embed (LimelightPlayerUtil.embed)
            'url': 'https://tv5.ca/videos?v=xuu8qowr291ri',
            'info_dict': {
                'id': '95d035dc5c8a401588e9c0e6bd1e9c92',
                'ext': 'mp4',
                'title': '07448641',
                'timestamp': 1499890639,
                'upload_date': '20170712',
            },
            'params': {
                'skip_download': True,
            },
            'add_ie': ['LimelightMedia'],
        },
        {
            'url': 'http://kron4.com/2017/04/28/standoff-with-walnut-creek-murder-suspect-ends-with-arrest/',
            'info_dict': {
@ -1840,6 +1856,16 @@ class GenericIE(InfoExtractor):
                'title': 'Стас Намин: «Мы нарушили девственность Кремля»',
            },
        },
        {
            # vzaar embed
            'url': 'http://help.vzaar.com/article/165-embedding-video',
            'md5': '7e3919d9d2620b89e3e00bec7fe8c9d4',
            'info_dict': {
                'id': '8707641',
                'ext': 'mp4',
                'title': 'Building A Business Online: Principal Chairs Q & A',
            },
        },
        # {
        #     # TODO: find another test
        #     # http://schema.org/VideoObject
@ -2811,6 +2837,12 @@ class GenericIE(InfoExtractor):
            return self.playlist_from_matches(
                mpfn_urls, video_id, video_title, ie=MegaphoneIE.ie_key())
        # Look for vzaar embeds
        vzaar_urls = VzaarIE._extract_urls(webpage)
        if vzaar_urls:
            return self.playlist_from_matches(
                vzaar_urls, video_id, video_title, ie=VzaarIE.ie_key())
        def merge_dicts(dict1, dict2):
            merged = {}
            for k, v in dict1.items():
--- a/youtube_dl/extractor/limelight.py
+++ b/youtube_dl/extractor/limelight.py
@ -26,14 +26,16 @@ class LimelightBaseIE(InfoExtractor):
            'Channel': 'channel',
            'ChannelList': 'channel_list',
        }
        def smuggle(url):
            return smuggle_url(url, {'source_url': source_url})
        entries = []
        for kind, video_id in re.findall(
                r'LimelightPlayer\.doLoad(Media|Channel|ChannelList)\(["\'](?P<id>[a-z0-9]{32})',
                webpage):
            entries.append(cls.url_result(
-                smuggle_url(
+                smuggle('limelight:%s:%s' % (lm[kind], video_id)),
                    'limelight:%s:%s' % (lm[kind], video_id),
                    {'source_url': source_url}),
                'Limelight%s' % kind, video_id))
        for mobj in re.finditer(
                # As per [1] class attribute should be exactly equal to
@ -49,10 +51,15 @@ class LimelightBaseIE(InfoExtractor):
                ''', webpage):
            kind, video_id = mobj.group('kind'), mobj.group('id')
            entries.append(cls.url_result(
-                smuggle_url(
+                smuggle('limelight:%s:%s' % (kind, video_id)),
                    'limelight:%s:%s' % (kind, video_id),
                    {'source_url': source_url}),
                'Limelight%s' % kind.capitalize(), video_id))
        # http://support.3playmedia.com/hc/en-us/articles/115009517327-Limelight-Embedding-the-Audio-Description-Plugin-with-the-Limelight-Player-on-Your-Web-Page)
        for video_id in re.findall(
                r'(?s)LimelightPlayerUtil\.embed\s*\(\s*{.*?\bmediaId["\']\s*:\s*["\'](?P<id>[a-z0-9]{32})',
                webpage):
            entries.append(cls.url_result(
                smuggle('limelight:media:%s' % video_id),
                LimelightMediaIE.ie_key(), video_id))
        return entries
    def _call_playlist_service(self, item_id, method, fatal=True, referer=None):
--- a/youtube_dl/extractor/mixcloud.py
+++ b/youtube_dl/extractor/mixcloud.py
@ -54,15 +54,23 @@ class MixcloudIE(InfoExtractor):
    }]
    # See https://www.mixcloud.com/media/js2/www_js_2.9e23256562c080482435196ca3975ab5.js
-    @staticmethod
+    def _decrypt_play_info(self, play_info, video_id):
-    def _decrypt_play_info(play_info):
+        KEYS = (
-        KEY = 'pleasedontdownloadourmusictheartistswontgetpaid'
+            'pleasedontdownloadourmusictheartistswontgetpaid',
-
+            'window.addEventListener = window.addEventListener || function() {};',
            '(function() { return new Date().toLocaleDateString(); })()',
        )
        play_info = base64.b64decode(play_info.encode('ascii'))
-
+        for num, key in enumerate(KEYS, start=1):
-        return ''.join([
+            try:
-            compat_chr(compat_ord(ch) ^ compat_ord(KEY[idx % len(KEY)]))
+                return self._parse_json(
-            for idx, ch in enumerate(play_info)])
+                    ''.join([
                        compat_chr(compat_ord(ch) ^ compat_ord(key[idx % len(key)]))
                        for idx, ch in enumerate(play_info)]),
                    video_id)
            except ExtractorError:
                if num == len(KEYS):
                    raise
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
@ -78,8 +86,8 @@ class MixcloudIE(InfoExtractor):
        encrypted_play_info = self._search_regex(
            r'm-play-info="([^"]+)"', webpage, 'play info')
-        play_info = self._parse_json(
+
-            self._decrypt_play_info(encrypted_play_info), track_id)
+        play_info = self._decrypt_play_info(encrypted_play_info, track_id)
        if message and 'stream_url' not in play_info:
            raise ExtractorError('%s said: %s' % (self.IE_NAME, message), expected=True)
--- a/youtube_dl/extractor/nick.py
+++ b/youtube_dl/extractor/nick.py
@ -75,7 +75,7 @@ class NickIE(MTVServicesInfoExtractor):
 class NickDeIE(MTVServicesInfoExtractor):
    IE_NAME = 'nick.de'
-    _VALID_URL = r'https?://(?:www\.)?(?P<host>nick\.de|nickelodeon\.(?:nl|at))/(?:playlist|shows)/(?:[^/]+/)*(?P<id>[^/?#&]+)'
+    _VALID_URL = r'https?://(?:www\.)?(?P<host>nick\.(?:de|com\.pl)|nickelodeon\.(?:nl|at))/[^/]+/(?:[^/]+/)*(?P<id>[^/?#&]+)'
    _TESTS = [{
        'url': 'http://www.nick.de/playlist/3773-top-videos/videos/episode/17306-zu-wasser-und-zu-land-rauchende-erdnusse',
        'only_matching': True,
@ -88,6 +88,9 @@ class NickDeIE(MTVServicesInfoExtractor):
    }, {
        'url': 'http://www.nickelodeon.at/playlist/3773-top-videos/videos/episode/77993-das-letzte-gefecht',
        'only_matching': True,
    }, {
        'url': 'http://www.nick.com.pl/seriale/474-spongebob-kanciastoporty/wideo/17412-teatr-to-jest-to-rodeo-oszolom',
        'only_matching': True,
    }]
    def _extract_mrss_url(self, webpage, host):
--- a/youtube_dl/extractor/reddit.py
+++ b/youtube_dl/extractor/reddit.py
@ -0,0 +1,114 @@
 from __future__ import unicode_literals
 from .common import InfoExtractor
 from ..utils import (
    ExtractorError,
    int_or_none,
    float_or_none,
 )
 class RedditIE(InfoExtractor):
    _VALID_URL = r'https?://v\.redd\.it/(?P<id>[^/?#&]+)'
    _TEST = {
        # from https://www.reddit.com/r/videos/comments/6rrwyj/that_small_heart_attack/
        'url': 'https://v.redd.it/zv89llsvexdz',
        'md5': '655d06ace653ea3b87bccfb1b27ec99d',
        'info_dict': {
            'id': 'zv89llsvexdz',
            'ext': 'mp4',
            'title': 'zv89llsvexdz',
        },
        'params': {
            'format': 'bestvideo',
        },
    }
    def _real_extract(self, url):
        video_id = self._match_id(url)
        formats = self._extract_m3u8_formats(
            'https://v.redd.it/%s/HLSPlaylist.m3u8' % video_id, video_id,
            'mp4', entry_protocol='m3u8_native', m3u8_id='hls', fatal=False)
        formats.extend(self._extract_mpd_formats(
            'https://v.redd.it/%s/DASHPlaylist.mpd' % video_id, video_id,
            mpd_id='dash', fatal=False))
        return {
            'id': video_id,
            'title': video_id,
            'formats': formats,
        }
 class RedditRIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?reddit\.com/r/[^/]+/comments/(?P<id>[^/]+)'
    _TESTS = [{
        'url': 'https://www.reddit.com/r/videos/comments/6rrwyj/that_small_heart_attack/',
        'info_dict': {
            'id': 'zv89llsvexdz',
            'ext': 'mp4',
            'title': 'That small heart attack.',
            'thumbnail': r're:^https?://.*\.jpg$',
            'timestamp': 1501941939,
            'upload_date': '20170805',
            'uploader': 'Antw87',
            'like_count': int,
            'dislike_count': int,
            'comment_count': int,
            'age_limit': 0,
        },
        'params': {
            'format': 'bestvideo',
            'skip_download': True,
        },
    }, {
        'url': 'https://www.reddit.com/r/videos/comments/6rrwyj',
        'only_matching': True,
    }, {
        # imgur
        'url': 'https://www.reddit.com/r/MadeMeSmile/comments/6t7wi5/wait_for_it/',
        'only_matching': True,
    }, {
        # streamable
        'url': 'https://www.reddit.com/r/videos/comments/6t7sg9/comedians_hilarious_joke_about_the_guam_flag/',
        'only_matching': True,
    }, {
        # youtube
        'url': 'https://www.reddit.com/r/videos/comments/6t75wq/southern_man_tries_to_speak_without_an_accent/',
        'only_matching': True,
    }]
    def _real_extract(self, url):
        video_id = self._match_id(url)
        data = self._download_json(
            url + '.json', video_id)[0]['data']['children'][0]['data']
        video_url = data['url']
        # Avoid recursing into the same reddit URL
        if 'reddit.com/' in video_url and '/%s/' % video_id in video_url:
            raise ExtractorError('No media found', expected=True)
        over_18 = data.get('over_18')
        if over_18 is True:
            age_limit = 18
        elif over_18 is False:
            age_limit = 0
        else:
            age_limit = None
        return {
            '_type': 'url_transparent',
            'url': video_url,
            'title': data.get('title'),
            'thumbnail': data.get('thumbnail'),
            'timestamp': float_or_none(data.get('created_utc')),
            'uploader': data.get('author'),
            'like_count': int_or_none(data.get('ups')),
            'dislike_count': int_or_none(data.get('downs')),
            'comment_count': int_or_none(data.get('num_comments')),
            'age_limit': age_limit,
        }
--- a/youtube_dl/extractor/vzaar.py
+++ b/youtube_dl/extractor/vzaar.py
@ -1,6 +1,8 @@
 # coding: utf-8
 from __future__ import unicode_literals
 import re
 from .common import InfoExtractor
 from ..utils import (
    int_or_none,
@ -28,6 +30,12 @@ class VzaarIE(InfoExtractor):
        },
    }]
    @staticmethod
    def _extract_urls(webpage):
        return re.findall(
            r'<iframe[^>]+src=["\']((?:https?:)?//(?:view\.vzaar\.com)/[0-9]+)',
            webpage)
    def _real_extract(self, url):
        video_id = self._match_id(url)
        video_data = self._download_json(
--- a/youtube_dl/extractor/xxxymovies.py
+++ b/youtube_dl/extractor/xxxymovies.py
@ -39,8 +39,8 @@ class XXXYMoviesIE(InfoExtractor):
            r"video_url\s*:\s*'([^']+)'", webpage, 'video URL')
        title = self._html_search_regex(
-            [r'<div class="block_header">\s*<h1>([^<]+)</h1>',
+            [r'<div[^>]+\bclass="block_header"[^>]*>\s*<h1>([^<]+)<',
-             r'<title>(.*?)\s*-\s*XXXYMovies\.com</title>'],
+             r'<title>(.*?)\s*-\s*(?:XXXYMovies\.com|XXX\s+Movies)</title>'],
            webpage, 'title')
        thumbnail = self._search_regex(
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@ -2733,6 +2733,8 @@ def cli_option(params, command_option, param):
 def cli_bool_option(params, command_option, param, true_value='true', false_value='false', separator=None):
    param = params.get(param)
    if param is None:
        return []
    assert isinstance(param, bool)
    if separator:
        return [command_option + separator + (true_value if param else false_value)]
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@ -1,3 +1,3 @@
 from __future__ import unicode_literals
-__version__ = '2017.08.06'
+__version__ = '2017.08.09'
`@ -1,3 +1,3 @@`
	`from __future__ import unicode_literals`	`from __future__ import unicode_literals`

	`__version__ = '2017.08.06'`	`__version__ = '2017.08.09'`