Merge branch 'master' of https://github.com/rg3/youtube-dl

2016-04-04 08:56:16 +02:00 · 2016-04-04 08:56:16 +02:00 · 2424572262
commit 2424572262
parent ca44adda06 6d4fc66bfc
27 changed files with 332 additions and 80 deletions
--- a/youtube_dl/extractor/init.py
+++ b/youtube_dl/extractor/init.py
@ -95,6 +95,7 @@ from .camdemy import (
    CamdemyIE,
    CamdemyFolderIE
 )
 from .camwithher import CamWithHerIE
 from .canalplus import CanalplusIE
 from .canalc2 import Canalc2IE
 from .canvas import CanvasIE
@ -103,6 +104,7 @@ from .cbc import (
    CBCPlayerIE,
 )
 from .cbs import CBSIE
 from .cbsinteractive import CBSInteractiveIE
 from .cbsnews import (
    CBSNewsIE,
    CBSNewsLiveVideoIE,
@ -128,7 +130,6 @@ from .clubic import ClubicIE
 from .clyp import ClypIE
 from .cmt import CMTIE
 from .cnbc import CNBCIE
 from .cnet import CNETIE
 from .cnn import (
    CNNIE,
    CNNBlogsIE,
--- a/youtube_dl/extractor/aenetworks.py
+++ b/youtube_dl/extractor/aenetworks.py
@ -22,6 +22,9 @@ class AENetworksIE(InfoExtractor):
            'ext': 'mp4',
            'title': "Bet You Didn't Know: Valentine's Day",
            'description': 'md5:7b57ea4829b391995b405fa60bd7b5f7',
            'timestamp': 1375819729,
            'upload_date': '20130806',
            'uploader': 'AENE-NEW',
        },
        'params': {
            # m3u8 download
@ -37,6 +40,9 @@ class AENetworksIE(InfoExtractor):
            'ext': 'mp4',
            'title': 'Winter Is Coming',
            'description': 'md5:641f424b7a19d8e24f26dea22cf59d74',
            'timestamp': 1338306241,
            'upload_date': '20120529',
            'uploader': 'AENE-NEW',
        },
        'add_ie': ['ThePlatform'],
    }, {
@ -69,8 +75,9 @@ class AENetworksIE(InfoExtractor):
        info = self._search_json_ld(webpage, video_id, fatal=False)
        info.update({
            '_type': 'url_transparent',
-            'url': smuggle_url(update_url_query(
+            'url': smuggle_url(
-                video_url, query), {
+                update_url_query(video_url, query),
                {
                    'sig': {
                        'key': 'crazyjava',
                        'secret': 's3cr3t'},
--- a/youtube_dl/extractor/bbc.py
+++ b/youtube_dl/extractor/bbc.py
@ -328,6 +328,7 @@ class BBCCoUkIE(InfoExtractor):
                    'format_id': '%s_%s' % (service, format['format_id']),
                    'abr': abr,
                    'acodec': acodec,
                    'vcodec': 'none',
                })
            formats.extend(conn_formats)
        return formats
--- a/youtube_dl/extractor/bravotv.py
+++ b/youtube_dl/extractor/bravotv.py
@ -15,6 +15,9 @@ class BravoTVIE(InfoExtractor):
            'ext': 'mp4',
            'title': 'Last Chance Kitchen Returns',
            'description': 'S13: Last Chance Kitchen Returns for Top Chef Season 13',
            'timestamp': 1448926740,
            'upload_date': '20151130',
            'uploader': 'NBCU-BRAV',
        }
    }
--- a/youtube_dl/extractor/brightcove.py
+++ b/youtube_dl/extractor/brightcove.py
@ -46,6 +46,9 @@ class BrightcoveLegacyIE(InfoExtractor):
                'title': 'Xavier Sala i Martín: “Un banc que no presta és un banc zombi que no serveix per a res”',
                'uploader': '8TV',
                'description': 'md5:a950cc4285c43e44d763d036710cd9cd',
                'timestamp': 1368213670,
                'upload_date': '20130510',
                'uploader_id': '1589608506001',
            }
        },
        {
@ -57,6 +60,9 @@ class BrightcoveLegacyIE(InfoExtractor):
                'title': 'JVMLS 2012: Arrays 2.0 - Opportunities and Challenges',
                'description': 'John Rose speaks at the JVM Language Summit, August 1, 2012.',
                'uploader': 'Oracle',
                'timestamp': 1344975024,
                'upload_date': '20120814',
                'uploader_id': '1460825906',
            },
        },
        {
@ -68,6 +74,9 @@ class BrightcoveLegacyIE(InfoExtractor):
                'title': 'This Bracelet Acts as a Personal Thermostat',
                'description': 'md5:547b78c64f4112766ccf4e151c20b6a0',
                'uploader': 'Mashable',
                'timestamp': 1382041798,
                'upload_date': '20131017',
                'uploader_id': '1130468786001',
            },
        },
        {
@ -85,14 +94,17 @@ class BrightcoveLegacyIE(InfoExtractor):
        {
            # test flv videos served by akamaihd.net
            # From http://www.redbull.com/en/bike/stories/1331655643987/replay-uci-dh-world-cup-2014-from-fort-william
-            'url': 'http://c.brightcove.com/services/viewer/htmlFederated?%40videoPlayer=ref%3ABC2996102916001&linkBaseURL=http%3A%2F%2Fwww.redbull.com%2Fen%2Fbike%2Fvideos%2F1331655630249%2Freplay-uci-fort-william-2014-dh&playerKey=AQ%7E%7E%2CAAAApYJ7UqE%7E%2Cxqr_zXk0I-zzNndy8NlHogrCb5QdyZRf&playerID=1398061561001#__youtubedl_smuggle=%7B%22Referer%22%3A+%22http%3A%2F%2Fwww.redbull.com%2Fen%2Fbike%2Fstories%2F1331655643987%2Freplay-uci-dh-world-cup-2014-from-fort-william%22%7D',
+            'url': 'http://c.brightcove.com/services/viewer/htmlFederated?%40videoPlayer=ref%3Aevent-stream-356&linkBaseURL=http%3A%2F%2Fwww.redbull.com%2Fen%2Fbike%2Fvideos%2F1331655630249%2Freplay-uci-fort-william-2014-dh&playerKey=AQ%7E%7E%2CAAAApYJ7UqE%7E%2Cxqr_zXk0I-zzNndy8NlHogrCb5QdyZRf&playerID=1398061561001#__youtubedl_smuggle=%7B%22Referer%22%3A+%22http%3A%2F%2Fwww.redbull.com%2Fen%2Fbike%2Fstories%2F1331655643987%2Freplay-uci-dh-world-cup-2014-from-fort-william%22%7D',
            # The md5 checksum changes on each download
            'info_dict': {
-                'id': '2996102916001',
+                'id': '3750436379001',
                'ext': 'flv',
                'title': 'UCI MTB World Cup 2014: Fort William, UK - Downhill Finals',
-                'uploader': 'Red Bull TV',
+                'uploader': 'RBTV Old (do not use)',
                'description': 'UCI MTB World Cup 2014: Fort William, UK - Downhill Finals',
                'timestamp': 1409122195,
                'upload_date': '20140827',
                'uploader_id': '710858724001',
            },
        },
        {
@ -106,6 +118,12 @@ class BrightcoveLegacyIE(InfoExtractor):
            'playlist_mincount': 7,
        },
    ]
    FLV_VCODECS = {
        1: 'SORENSON',
        2: 'ON2',
        3: 'H264',
        4: 'VP8',
    }
    @classmethod
    def _build_brighcove_url(cls, object_str):
@ -289,12 +307,16 @@ class BrightcoveLegacyIE(InfoExtractor):
                                    playlist_title=playlist_info['mediaCollectionDTO']['displayName'])
    def _extract_video_info(self, video_info):
        publisher_id = video_info.get('publisherId')
        info = {
            'id': compat_str(video_info['id']),
            'title': video_info['displayName'].strip(),
            'description': video_info.get('shortDescription'),
            'thumbnail': video_info.get('videoStillURL') or video_info.get('thumbnailURL'),
            'uploader': video_info.get('publisherName'),
            'uploader_id': compat_str(publisher_id) if publisher_id else None,
            'duration': float_or_none(video_info.get('length'), 1000),
            'timestamp': int_or_none(video_info.get('creationDate'), 1000),
        }
        renditions = video_info.get('renditions', []) + video_info.get('IOSRenditions', [])
@ -318,19 +340,30 @@ class BrightcoveLegacyIE(InfoExtractor):
                        ext = 'flv'
                if ext is None:
                    ext = determine_ext(url)
-                size = rend.get('size')
+                tbr = int_or_none(rend.get('encodingRate'), 1000),
                a_format = {
                    'format_id': 'http%s' % ('-%s' % tbr if tbr else ''),
                    'url': url,
                    'ext': ext,
-                    'height': rend.get('frameHeight'),
+                    'filesize': int_or_none(rend.get('size')) or None,
-                    'width': rend.get('frameWidth'),
+                    'tbr': tbr,
                    'filesize': size if size != 0 else None,
                }
                if rend.get('audioOnly'):
                    a_format.update({
                        'vcodec': 'none',
                    })
                else:
                    a_format.update({
                        'height': int_or_none(rend.get('frameHeight')),
                        'width': int_or_none(rend.get('frameWidth')),
                        'vcodec': rend.get('videoCodec'),
                    })
                # m3u8 manifests with remote == false are media playlists
                # Not calling _extract_m3u8_formats here to save network traffic
                if ext == 'm3u8':
                    a_format.update({
                        'format_id': 'hls%s' % ('-%s' % tbr if tbr else ''),
                        'ext': 'mp4',
                        'protocol': 'm3u8',
                    })
@ -341,6 +374,8 @@ class BrightcoveLegacyIE(InfoExtractor):
        elif video_info.get('FLVFullLengthURL') is not None:
            info.update({
                'url': video_info['FLVFullLengthURL'],
                'vcodec': self.FLV_VCODECS.get(video_info.get('FLVFullCodec')),
                'filesize': int_or_none(video_info.get('FLVFullSize')),
            })
        if self._downloader.params.get('include_ads', False):
@ -396,6 +431,7 @@ class BrightcoveNewIE(InfoExtractor):
            'formats': 'mincount:41',
        },
        'params': {
            # m3u8 download
            'skip_download': True,
        }
    }, {
@ -480,7 +516,7 @@ class BrightcoveNewIE(InfoExtractor):
                raise ExtractorError(json_data[0]['message'], expected=True)
            raise
-        title = json_data['name']
+        title = json_data['name'].strip()
        formats = []
        for source in json_data.get('sources', []):
@ -533,7 +569,7 @@ class BrightcoveNewIE(InfoExtractor):
                    f.update({
                        'url': src or streaming_src,
                        'format_id': build_format_id('http' if src else 'http-streaming'),
-                        'preference': 2 if src else 1,
+                        'source_preference': 0 if src else -1,
                    })
                else:
                    f.update({
@ -544,20 +580,22 @@ class BrightcoveNewIE(InfoExtractor):
                formats.append(f)
        self._sort_formats(formats)
-        description = json_data.get('description')
+        subtitles = {}
-        thumbnail = json_data.get('thumbnail')
+        for text_track in json_data.get('text_tracks', []):
-        timestamp = parse_iso8601(json_data.get('published_at'))
+            if text_track.get('src'):
-        duration = float_or_none(json_data.get('duration'), 1000)
+                subtitles.setdefault(text_track.get('srclang'), []).append({
-        tags = json_data.get('tags', [])
+                    'url': text_track['src'],
                })
        return {
            'id': video_id,
            'title': title,
-            'description': description,
+            'description': json_data.get('description'),
-            'thumbnail': thumbnail,
+            'thumbnail': json_data.get('thumbnail') or json_data.get('poster'),
-            'duration': duration,
+            'duration': float_or_none(json_data.get('duration'), 1000),
-            'timestamp': timestamp,
+            'timestamp': parse_iso8601(json_data.get('published_at')),
            'uploader_id': account_id,
            'formats': formats,
-            'tags': tags,
+            'subtitles': subtitles,
            'tags': json_data.get('tags', []),
        }
--- a/youtube_dl/extractor/camwithher.py
+++ b/youtube_dl/extractor/camwithher.py
@ -0,0 +1,87 @@
 from __future__ import unicode_literals
 import re
 from .common import InfoExtractor
 from ..utils import (
    int_or_none,
    parse_duration,
    unified_strdate,
 )
 class CamWithHerIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?camwithher\.tv/view_video\.php\?.*\bviewkey=(?P<id>\w+)'
    _TESTS = [{
        'url': 'http://camwithher.tv/view_video.php?viewkey=6e9a24e2c0e842e1f177&page=&viewtype=&category=',
        'info_dict': {
            'id': '5644',
            'ext': 'flv',
            'title': 'Periscope Tease',
            'description': 'In the clouds teasing on periscope to my favorite song',
            'duration': 240,
            'view_count': int,
            'comment_count': int,
            'uploader': 'MileenaK',
            'upload_date': '20160322',
        },
        'params': {
            'skip_download': True,
        }
    }, {
        'url': 'http://camwithher.tv/view_video.php?viewkey=6dfd8b7c97531a459937',
        'only_matching': True,
    }, {
        'url': 'http://camwithher.tv/view_video.php?page=&viewkey=6e9a24e2c0e842e1f177&viewtype=&category=',
        'only_matching': True,
    }, {
        'url': 'http://camwithher.tv/view_video.php?viewkey=b6c3b5bea9515d1a1fc4&page=&viewtype=&category=mv',
        'only_matching': True,
    }]
    def _real_extract(self, url):
        video_id = self._match_id(url)
        webpage = self._download_webpage(url, video_id)
        flv_id = self._html_search_regex(
            r'<a[^>]+href=["\']/download/\?v=(\d+)', webpage, 'video id')
        # Video URL construction algorithm is reverse-engineered from cwhplayer.swf
        rtmp_url = 'rtmp://camwithher.tv/clipshare/%s' % (
            ('mp4:%s.mp4' % flv_id) if int(flv_id) > 2010 else flv_id)
        title = self._html_search_regex(
            r'<div[^>]+style="float:left"[^>]*>\s*<h2>(.+?)</h2>', webpage, 'title')
        description = self._html_search_regex(
            r'>Description:</span>(.+?)</div>', webpage, 'description', default=None)
        runtime = self._search_regex(
            r'Runtime\s*:\s*(.+?) \|', webpage, 'duration', default=None)
        if runtime:
            runtime = re.sub(r'[\s-]', '', runtime)
        duration = parse_duration(runtime)
        view_count = int_or_none(self._search_regex(
            r'Views\s*:\s*(\d+)', webpage, 'view count', default=None))
        comment_count = int_or_none(self._search_regex(
            r'Comments\s*:\s*(\d+)', webpage, 'comment count', default=None))
        uploader = self._search_regex(
            r'Added by\s*:\s*<a[^>]+>([^<]+)</a>', webpage, 'uploader', default=None)
        upload_date = unified_strdate(self._search_regex(
            r'Added on\s*:\s*([\d-]+)', webpage, 'upload date', default=None))
        return {
            'id': flv_id,
            'url': rtmp_url,
            'ext': 'flv',
            'no_resume': True,
            'title': title,
            'description': description,
            'duration': duration,
            'view_count': view_count,
            'comment_count': comment_count,
            'uploader': uploader,
            'upload_date': upload_date,
        }
--- a/youtube_dl/extractor/cbs.py
+++ b/youtube_dl/extractor/cbs.py
@ -33,6 +33,9 @@ class CBSIE(CBSBaseIE):
            'title': 'Connect Chat feat. Garth Brooks',
            'description': 'Connect with country music singer Garth Brooks, as he chats with fans on Wednesday November 27, 2013. Be sure to tune in to Garth Brooks: Live from Las Vegas, Friday November 29, at 9/8c on CBS!',
            'duration': 1495,
            'timestamp': 1385585425,
            'upload_date': '20131127',
            'uploader': 'CBSI-NEW',
        },
        'params': {
            # rtmp download
--- a/youtube_dl/extractor/cbsinteractive.py
+++ b/youtube_dl/extractor/cbsinteractive.py
@ -1,12 +1,14 @@
 # coding: utf-8
 from __future__ import unicode_literals
 import re
 from .theplatform import ThePlatformIE
 from ..utils import int_or_none
-class CNETIE(ThePlatformIE):
+class CBSInteractiveIE(ThePlatformIE):
-    _VALID_URL = r'https?://(?:www\.)?cnet\.com/videos/(?P<id>[^/]+)/'
+    _VALID_URL = r'https?://(?:www\.)?(?P<site>cnet|zdnet)\.com/(?:videos|video/share)/(?P<id>[^/?]+)'
    _TESTS = [{
        'url': 'http://www.cnet.com/videos/hands-on-with-microsofts-windows-8-1-update/',
        'info_dict': {
@ -17,6 +19,8 @@ class CNETIE(ThePlatformIE):
            'uploader_id': '6085384d-619e-11e3-b231-14feb5ca9861',
            'uploader': 'Sarah Mitroff',
            'duration': 70,
            'timestamp': 1396479627,
            'upload_date': '20140402',
        },
    }, {
        'url': 'http://www.cnet.com/videos/whiny-pothole-tweets-at-local-government-when-hit-by-cars-tomorrow-daily-187/',
@ -28,15 +32,38 @@ class CNETIE(ThePlatformIE):
            'uploader_id': 'b163284d-6b73-44fc-b3e6-3da66c392d40',
            'uploader': 'Ashley Esqueda',
            'duration': 1482,
            'timestamp': 1433289889,
            'upload_date': '20150603',
        },
    }, {
        'url': 'http://www.zdnet.com/video/share/video-keeping-android-smartphones-and-tablets-secure/',
        'info_dict': {
            'id': 'bc1af9f0-a2b5-4e54-880d-0d95525781c0',
            'ext': 'mp4',
            'title': 'Video: Keeping Android smartphones and tablets secure',
            'description': 'Here\'s the best way to keep Android devices secure, and what you do when they\'ve come to the end of their lives.',
            'uploader_id': 'f2d97ea2-8175-11e2-9d12-0018fe8a00b0',
            'uploader': 'Adrian Kingsley-Hughes',
            'timestamp': 1448961720,
            'upload_date': '20151201',
        },
        'params': {
            # m3u8 download
            'skip_download': True,
        }
    }]
    TP_RELEASE_URL_TEMPLATE = 'http://link.theplatform.com/s/kYEXFC/%s?mbr=true'
    MPX_ACCOUNTS = {
        'cnet': 2288573011,
        'zdnet': 2387448114,
    }
    def _real_extract(self, url):
-        display_id = self._match_id(url)
+        site, display_id = re.match(self._VALID_URL, url).groups()
        webpage = self._download_webpage(url, display_id)
        data_json = self._html_search_regex(
-            r"data-cnet-video(?:-uvp)?-options='([^']+)'",
+            r"data-(?:cnet|zdnet)-video(?:-uvp)?-options='([^']+)'",
            webpage, 'data json')
        data = self._parse_json(data_json, display_id)
        vdata = data.get('video') or data['videos'][0]
@ -51,16 +78,15 @@ class CNETIE(ThePlatformIE):
            uploader = None
            uploader_id = None
-        metadata = self.get_metadata('kYEXFC/%s' % list(vdata['files'].values())[0], video_id)
+        media_guid_path = 'media/guid/%d/%s' % (self.MPX_ACCOUNTS[site], vdata['mpxRefId'])
-        description = vdata.get('description') or metadata.get('description')
+        formats, subtitles = [], {}
-        duration = int_or_none(vdata.get('duration')) or metadata.get('duration')
+        if site == 'cnet':
-
+            formats, subtitles = self._extract_theplatform_smil(
-        formats = []
+                self.TP_RELEASE_URL_TEMPLATE % media_guid_path, video_id)
        subtitles = {}
        for (fkey, vid) in vdata['files'].items():
            if fkey == 'hls_phone' and 'hls_tablet' in vdata['files']:
                continue
-            release_url = 'http://link.theplatform.com/s/kYEXFC/%s?mbr=true' % vid
+            release_url = self.TP_RELEASE_URL_TEMPLATE % vid
            if fkey == 'hds':
                release_url += '&manifest=f4m'
            tp_formats, tp_subtitles = self._extract_theplatform_smil(release_url, video_id, 'Downloading %s SMIL data' % fkey)
@ -68,15 +94,15 @@ class CNETIE(ThePlatformIE):
            subtitles = self._merge_subtitles(subtitles, tp_subtitles)
        self._sort_formats(formats)
-        return {
+        info = self.get_metadata('kYEXFC/%s' % media_guid_path, video_id)
        info.update({
            'id': video_id,
            'display_id': display_id,
            'title': title,
-            'description': description,
+            'duration': int_or_none(vdata.get('duration')),
            'thumbnail': metadata.get('thumbnail'),
            'duration': duration,
            'uploader': uploader,
            'uploader_id': uploader_id,
            'subtitles': subtitles,
            'formats': formats,
-        }
+        })
        return info
--- a/youtube_dl/extractor/cbsnews.py
+++ b/youtube_dl/extractor/cbsnews.py
@ -5,7 +5,6 @@ from .common import InfoExtractor
 from .cbs import CBSBaseIE
 from ..utils import (
    parse_duration,
    find_xpath_attr,
 )
--- a/youtube_dl/extractor/cnbc.py
+++ b/youtube_dl/extractor/cnbc.py
@ -14,6 +14,9 @@ class CNBCIE(InfoExtractor):
            'ext': 'mp4',
            'title': 'Fighting zombies is big business',
            'description': 'md5:0c100d8e1a7947bd2feec9a5550e519e',
            'timestamp': 1459332000,
            'upload_date': '20160330',
            'uploader': 'NBCU-CNBC',
        },
        'params': {
            # m3u8 download
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@ -1335,7 +1335,7 @@ class InfoExtractor(object):
            if not src or src in urls:
                continue
            urls.append(src)
-            ext = textstream.get('ext') or determine_ext(src) or mimetype2ext(textstream.get('type'))
+            ext = textstream.get('ext') or mimetype2ext(textstream.get('type')) or determine_ext(src)
            lang = textstream.get('systemLanguage') or textstream.get('systemLanguageName') or textstream.get('lang') or subtitles_lang
            subtitles.setdefault(lang, []).append({
                'url': src,
@ -1515,9 +1515,16 @@ class InfoExtractor(object):
                                representation_ms_info['total_number'] = int(math.ceil(float(period_duration) / segment_duration))
                            media_template = representation_ms_info['media_template']
                            media_template = media_template.replace('$RepresentationID$', representation_id)
-                            media_template = re.sub(r'\$(Number|Bandwidth)(?:%(0\d+)d)?\$', r'%(\1)\2d', media_template)
+                            media_template = re.sub(r'\$(Number|Bandwidth)\$', r'%(\1)d', media_template)
                            media_template = re.sub(r'\$(Number|Bandwidth)%(\d+)\$', r'%(\1)\2d', media_template)
                            media_template.replace('$$', '$')
-                            representation_ms_info['segment_urls'] = [media_template % {'Number': segment_number, 'Bandwidth': representation_attrib.get('bandwidth')} for segment_number in range(representation_ms_info['start_number'], representation_ms_info['total_number'] + representation_ms_info['start_number'])]
+                            representation_ms_info['segment_urls'] = [
                                media_template % {
                                    'Number': segment_number,
                                    'Bandwidth': representation_attrib.get('bandwidth')}
                                for segment_number in range(
                                    representation_ms_info['start_number'],
                                    representation_ms_info['total_number'] + representation_ms_info['start_number'])]
                        if 'segment_urls' in representation_ms_info:
                            f.update({
                                'segment_urls': representation_ms_info['segment_urls'],
--- a/youtube_dl/extractor/fox.py
+++ b/youtube_dl/extractor/fox.py
@ -16,6 +16,9 @@ class FOXIE(InfoExtractor):
            'title': 'Official Trailer: Gotham',
            'description': 'Tracing the rise of the great DC Comics Super-Villains and vigilantes, Gotham reveals an entirely new chapter that has never been told.',
            'duration': 129,
            'timestamp': 1400020798,
            'upload_date': '20140513',
            'uploader': 'NEWA-FNG-FOXCOM',
        },
        'add_ie': ['ThePlatform'],
    }
--- a/youtube_dl/extractor/instagram.py
+++ b/youtube_dl/extractor/instagram.py
@ -152,7 +152,7 @@ class InstagramUserIE(InfoExtractor):
            if not page['items']:
                break
-            max_id = page['items'][-1]['id']
+            max_id = page['items'][-1]['id'].split('_')[0]
            media_url = (
                'http://instagram.com/%s/media?max_id=%s' % (
                    uploader_id, max_id))
--- a/youtube_dl/extractor/lynda.py
+++ b/youtube_dl/extractor/lynda.py
@ -28,8 +28,8 @@ class LyndaBaseIE(InfoExtractor):
            return
        login_form = {
-            'username': username.encode('utf-8'),
+            'username': username,
-            'password': password.encode('utf-8'),
+            'password': password,
            'remember': 'false',
            'stayPut': 'false'
        }
--- a/youtube_dl/extractor/movieclips.py
+++ b/youtube_dl/extractor/movieclips.py
@ -2,39 +2,48 @@
 from __future__ import unicode_literals
 from .common import InfoExtractor
-from ..utils import sanitized_Request
+from ..utils import (
    smuggle_url,
    float_or_none,
    parse_iso8601,
    update_url_query,
 )
 class MovieClipsIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www.)?movieclips\.com/videos/(?P<id>[^/?#]+)'
+    _VALID_URL = r'https?://(?:www.)?movieclips\.com/videos/.+-(?P<id>\d+)(?:\?|$)'
    _TEST = {
-        'url': 'http://www.movieclips.com/videos/warcraft-trailer-1-561180739597?autoPlay=true&playlistId=5',
+        'url': 'http://www.movieclips.com/videos/warcraft-trailer-1-561180739597',
        'md5': '42b5a0352d4933a7bd54f2104f481244',
        'info_dict': {
            'id': 'pKIGmG83AqD9',
            'display_id': 'warcraft-trailer-1-561180739597',
            'ext': 'mp4',
            'title': 'Warcraft Trailer 1',
            'description': 'Watch Trailer 1 from Warcraft (2016). Legendary’s WARCRAFT is a 3D epic adventure of world-colliding conflict based.',
            'thumbnail': 're:^https?://.*\.jpg$',
            'timestamp': 1446843055,
            'upload_date': '20151106',
            'uploader': 'Movieclips',
        },
        'add_ie': ['ThePlatform'],
    }
    def _real_extract(self, url):
-        display_id = self._match_id(url)
+        video_id = self._match_id(url)
-
+        webpage = self._download_webpage(url, video_id)
-        req = sanitized_Request(url)
+        video = next(v for v in self._parse_json(self._search_regex(
-        # it doesn't work if it thinks the browser it's too old
+            r'var\s+__REACT_ENGINE__\s*=\s*({.+});',
-        req.add_header('User-Agent', 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20150101 Firefox/43.0 (Chrome)')
+            webpage, 'react engine'), video_id)['playlist']['videos'] if v['id'] == video_id)
        webpage = self._download_webpage(req, display_id)
        theplatform_link = self._html_search_regex(r'src="(http://player.theplatform.com/p/.*?)"', webpage, 'theplatform link')
        title = self._html_search_regex(r'<title[^>]*>([^>]+)-\s*\d+\s*|\s*Movieclips.com</title>', webpage, 'title')
        description = self._html_search_meta('description', webpage)
        return {
            '_type': 'url_transparent',
-            'url': theplatform_link,
+            'ie_key': 'ThePlatform',
-            'title': title,
+            'url': smuggle_url(update_url_query(
-            'display_id': display_id,
+                video['contentUrl'], {'mbr': 'true'}), {'force_smil_url': True}),
-            'description': description,
+            'title': self._og_search_title(webpage),
            'description': self._html_search_meta('description', webpage),
            'duration': float_or_none(video.get('duration')),
            'timestamp': parse_iso8601(video.get('dateCreated')),
            'thumbnail': video.get('defaultImage'),
            'uploader': video.get('provider'),
        }
--- a/youtube_dl/extractor/nationalgeographic.py
+++ b/youtube_dl/extractor/nationalgeographic.py
@ -21,6 +21,9 @@ class NationalGeographicIE(InfoExtractor):
                'ext': 'mp4',
                'title': 'Mating Crabs Busted by Sharks',
                'description': 'md5:16f25aeffdeba55aaa8ec37e093ad8b3',
                'timestamp': 1423523799,
                'upload_date': '20150209',
                'uploader': 'NAGS',
            },
            'add_ie': ['ThePlatform'],
        },
@ -32,6 +35,9 @@ class NationalGeographicIE(InfoExtractor):
                'ext': 'mp4',
                'title': 'The Real Jaws',
                'description': 'md5:8d3e09d9d53a85cd397b4b21b2c77be6',
                'timestamp': 1433772632,
                'upload_date': '20150608',
                'uploader': 'NAGS',
            },
            'add_ie': ['ThePlatform'],
        },
@ -68,6 +74,9 @@ class NationalGeographicChannelIE(InfoExtractor):
                'ext': 'mp4',
                'title': 'Uncovering a Universal Knowledge',
                'description': 'md5:1a89148475bf931b3661fcd6ddb2ae3a',
                'timestamp': 1458680907,
                'upload_date': '20160322',
                'uploader': 'NEWA-FNG-NGTV',
            },
            'add_ie': ['ThePlatform'],
        },
@ -79,6 +88,9 @@ class NationalGeographicChannelIE(InfoExtractor):
                'ext': 'mp4',
                'title': 'The Stunning Red Bird of Paradise',
                'description': 'md5:7bc8cd1da29686be4d17ad1230f0140c',
                'timestamp': 1459362152,
                'upload_date': '20160330',
                'uploader': 'NEWA-FNG-NGTV',
            },
            'add_ie': ['ThePlatform'],
        },
--- a/youtube_dl/extractor/nbc.py
+++ b/youtube_dl/extractor/nbc.py
@ -27,6 +27,9 @@ class NBCIE(InfoExtractor):
                'ext': 'mp4',
                'title': 'Jimmy Fallon Surprises Fans at Ben & Jerry\'s',
                'description': 'Jimmy gives out free scoops of his new "Tonight Dough" ice cream flavor by surprising customers at the Ben & Jerry\'s scoop shop.',
                'timestamp': 1424246400,
                'upload_date': '20150218',
                'uploader': 'NBCU-COM',
            },
            'params': {
                # m3u8 download
@ -50,6 +53,9 @@ class NBCIE(InfoExtractor):
                'ext': 'mp4',
                'title': 'Star Wars Teaser',
                'description': 'md5:0b40f9cbde5b671a7ff62fceccc4f442',
                'timestamp': 1417852800,
                'upload_date': '20141206',
                'uploader': 'NBCU-COM',
            },
            'params': {
                # m3u8 download
@ -78,6 +84,7 @@ class NBCIE(InfoExtractor):
            theplatform_url = 'http:' + theplatform_url
        return {
            '_type': 'url_transparent',
            'ie_key': 'ThePlatform',
            'url': smuggle_url(theplatform_url, {'source_url': url}),
            'id': video_id,
        }
@ -93,6 +100,9 @@ class NBCSportsVPlayerIE(InfoExtractor):
            'ext': 'flv',
            'description': 'md5:df390f70a9ba7c95ff1daace988f0d8d',
            'title': 'Tyler Kalinoski hits buzzer-beater to lift Davidson',
            'timestamp': 1426270238,
            'upload_date': '20150313',
            'uploader': 'NBCU-SPORTS',
        }
    }, {
        'url': 'http://vplayer.nbcsports.com/p/BxmELC/nbc_embedshare/select/_hqLjQ95yx8Z',
@ -144,6 +154,9 @@ class CSNNEIE(InfoExtractor):
            'ext': 'mp4',
            'title': 'SNC evening update: Wright named Red Sox\' No. 5 starter.',
            'description': 'md5:1753cfee40d9352b19b4c9b3e589b9e3',
            'timestamp': 1459369979,
            'upload_date': '20160330',
            'uploader': 'NBCU-SPORTS',
        }
    }
@ -331,6 +344,7 @@ class MSNBCIE(InfoExtractor):
            'thumbnail': 're:^https?://.*\.jpg$',
            'timestamp': 1406937606,
            'upload_date': '20140802',
            'uploader': 'NBCU-NEWS',
            'categories': ['MSNBC/Topics/Franchise/Best of last night', 'MSNBC/Topics/General/Congress'],
        },
    }
--- a/youtube_dl/extractor/nowness.py
+++ b/youtube_dl/extractor/nowness.py
@ -63,8 +63,11 @@ class NownessIE(NownessBaseIE):
            'title': 'Candor: The Art of Gesticulation',
            'description': 'Candor: The Art of Gesticulation',
            'thumbnail': 're:^https?://.*\.jpg',
-            'uploader': 'Nowness',
+            'timestamp': 1446745676,
            'upload_date': '20151105',
            'uploader_id': '2385340575001',
        },
        'add_ie': ['BrightcoveNew'],
    }, {
        'url': 'https://cn.nowness.com/story/kasper-bjorke-ft-jaakko-eino-kalevi-tnr',
        'md5': 'e79cf125e387216f86b2e0a5b5c63aa3',
@ -74,8 +77,11 @@ class NownessIE(NownessBaseIE):
            'title': 'Kasper Bjørke ft. Jaakko Eino Kalevi: TNR',
            'description': 'Kasper Bjørke ft. Jaakko Eino Kalevi: TNR',
            'thumbnail': 're:^https?://.*\.jpg',
-            'uploader': 'Nowness',
+            'timestamp': 1407315371,
            'upload_date': '20140806',
            'uploader_id': '2385340575001',
        },
        'add_ie': ['BrightcoveNew'],
    }, {
        # vimeo
        'url': 'https://www.nowness.com/series/nowness-picks/jean-luc-godard-supercut',
@ -90,6 +96,7 @@ class NownessIE(NownessBaseIE):
            'uploader': 'Cinema Sem Lei',
            'uploader_id': 'cinemasemlei',
        },
        'add_ie': ['Vimeo'],
    }]
    def _real_extract(self, url):
--- a/youtube_dl/extractor/pluralsight.py
+++ b/youtube_dl/extractor/pluralsight.py
@ -64,8 +64,8 @@ class PluralsightIE(PluralsightBaseIE):
        login_form = self._hidden_inputs(login_page)
        login_form.update({
-            'Username': username.encode('utf-8'),
+            'Username': username,
-            'Password': password.encode('utf-8'),
+            'Password': password,
        })
        post_url = self._search_regex(
--- a/youtube_dl/extractor/sbs.py
+++ b/youtube_dl/extractor/sbs.py
@ -24,6 +24,9 @@ class SBSIE(InfoExtractor):
            'description': 'md5:f250a9856fca50d22dec0b5b8015f8a5',
            'thumbnail': 're:http://.*\.jpg',
            'duration': 308,
            'timestamp': 1408613220,
            'upload_date': '20140821',
            'uploader': 'SBSC',
        },
    }, {
        'url': 'http://www.sbs.com.au/ondemand/video/320403011771/Dingo-Conservation-The-Feed',
@ -57,6 +60,7 @@ class SBSIE(InfoExtractor):
        return {
            '_type': 'url_transparent',
            'ie_key': 'ThePlatform',
            'id': video_id,
-            'url': smuggle_url(theplatform_url, {'force_smil_url': True}),
+            'url': smuggle_url(self._proto_relative_url(theplatform_url), {'force_smil_url': True}),
        }
--- a/youtube_dl/extractor/screencast.py
+++ b/youtube_dl/extractor/screencast.py
@ -12,7 +12,7 @@ from ..utils import (
 class ScreencastIE(InfoExtractor):
-    _VALID_URL = r'https?://www\.screencast\.com/t/(?P<id>[a-zA-Z0-9]+)'
+    _VALID_URL = r'https?://(?:www\.)?screencast\.com/t/(?P<id>[a-zA-Z0-9]+)'
    _TESTS = [{
        'url': 'http://www.screencast.com/t/3ZEjQXlT',
        'md5': '917df1c13798a3e96211dd1561fded83',
@ -53,8 +53,10 @@ class ScreencastIE(InfoExtractor):
            'description': 'md5:7b9f393bc92af02326a5c5889639eab0',
            'thumbnail': 're:^https?://.*\.(?:gif|jpg)$',
        }
-    },
+    }, {
-    ]
+        'url': 'http://screencast.com/t/aAB3iowa',
        'only_matching': True,
    }]
    def _real_extract(self, url):
        video_id = self._match_id(url)
@ -94,8 +96,9 @@ class ScreencastIE(InfoExtractor):
        title = self._og_search_title(webpage, default=None)
        if title is None:
            title = self._html_search_regex(
-                [r'<b>Title:</b> ([^<]*)</div>',
+                [r'<b>Title:</b> ([^<]+)</div>',
-                 r'class="tabSeperator">></span><span class="tabText">(.*?)<'],
+                 r'class="tabSeperator">></span><span class="tabText">(.+?)<',
                 r'<title>([^<]+)</title>'],
                webpage, 'title')
        thumbnail = self._og_search_thumbnail(webpage)
        description = self._og_search_description(webpage, default=None)
--- a/youtube_dl/extractor/theplatform.py
+++ b/youtube_dl/extractor/theplatform.py
@ -76,6 +76,8 @@ class ThePlatformBaseIE(OnceIE):
            'description': info['description'],
            'thumbnail': info['defaultThumbnailUrl'],
            'duration': int_or_none(info.get('duration'), 1000),
            'timestamp': int_or_none(info.get('pubDate'), 1000) or None,
            'uploader': info.get('billingCode'),
        }
@ -94,6 +96,9 @@ class ThePlatformIE(ThePlatformBaseIE):
            'title': 'Blackberry\'s big, bold Z30',
            'description': 'The Z30 is Blackberry\'s biggest, baddest mobile messaging device yet.',
            'duration': 247,
            'timestamp': 1383239700,
            'upload_date': '20131031',
            'uploader': 'CBSI-NEW',
        },
        'params': {
            # rtmp download
@ -107,6 +112,9 @@ class ThePlatformIE(ThePlatformBaseIE):
            'ext': 'flv',
            'description': 'md5:ac330c9258c04f9d7512cf26b9595409',
            'title': 'Tesla Model S: A second step towards a cleaner motoring future',
            'timestamp': 1426176191,
            'upload_date': '20150312',
            'uploader': 'CBSI-NEW',
        },
        'params': {
            # rtmp download
@ -119,6 +127,7 @@ class ThePlatformIE(ThePlatformBaseIE):
            'ext': 'mp4',
            'description': 'md5:644ad9188d655b742f942bf2e06b002d',
            'title': 'HIGHLIGHTS: USA bag first ever series Cup win',
            'uploader': 'EGSM',
        }
    }, {
        'url': 'http://player.theplatform.com/p/NnzsPC/widget/select/media/4Y0TlYUr_ZT7',
@ -135,6 +144,7 @@ class ThePlatformIE(ThePlatformBaseIE):
            'thumbnail': 're:^https?://.*\.jpg$',
            'timestamp': 1435752600,
            'upload_date': '20150701',
            'uploader': 'NBCU-NEWS',
        },
    }, {
        # From http://www.nbc.com/the-blacklist/video/sir-crispin-crandall/2928790?onid=137781#vc137781=1
--- a/youtube_dl/extractor/thestar.py
+++ b/youtube_dl/extractor/thestar.py
@ -19,6 +19,10 @@ class TheStarIE(InfoExtractor):
            'uploader_id': '794267642001',
            'timestamp': 1454353482,
            'upload_date': '20160201',
        },
        'params': {
            # m3u8 download
            'skip_download': True,
        }
    }
    BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/794267642001/default_default/index.html?videoId=%s'
--- a/youtube_dl/extractor/tv3.py
+++ b/youtube_dl/extractor/tv3.py
@ -21,6 +21,7 @@ class TV3IE(InfoExtractor):
            'Failed to download MPD manifest'
        ],
        'params': {
            # m3u8 download
            'skip_download': True,
        },
    }
--- a/youtube_dl/extractor/udemy.py
+++ b/youtube_dl/extractor/udemy.py
@ -151,8 +151,8 @@ class UdemyIE(InfoExtractor):
        login_form = self._form_hidden_inputs('login-form', login_popup)
        login_form.update({
-            'email': username.encode('utf-8'),
+            'email': username,
-            'password': password.encode('utf-8'),
+            'password': password,
        })
        request = sanitized_Request(
@ -193,12 +193,12 @@ class UdemyIE(InfoExtractor):
        asset = lecture['asset']
-        asset_type = asset.get('assetType') or asset.get('asset_type')
+        asset_type = asset.get('asset_type') or asset.get('assetType')
        if asset_type != 'Video':
            raise ExtractorError(
                'Lecture %s is not a video' % lecture_id, expected=True)
-        stream_url = asset.get('streamUrl') or asset.get('stream_url')
+        stream_url = asset.get('stream_url') or asset.get('streamUrl')
        if stream_url:
            youtube_url = self._search_regex(
                r'(https?://www\.youtube\.com/watch\?v=.*)', stream_url, 'youtube URL', default=None)
@ -206,7 +206,7 @@ class UdemyIE(InfoExtractor):
                return self.url_result(youtube_url, 'Youtube')
        video_id = asset['id']
-        thumbnail = asset.get('thumbnailUrl') or asset.get('thumbnail_url')
+        thumbnail = asset.get('thumbnail_url') or asset.get('thumbnailUrl')
        duration = float_or_none(asset.get('data', {}).get('duration'))
        formats = []
@ -325,7 +325,7 @@ class UdemyCourseIE(UdemyIE):
            'https://www.udemy.com/api-2.0/courses/%s/cached-subscriber-curriculum-items' % course_id,
            course_id, 'Downloading course curriculum', query={
                'fields[chapter]': 'title,object_index',
-                'fields[lecture]': 'title',
+                'fields[lecture]': 'title,asset',
                'page_size': '1000',
            })
@ -334,6 +334,11 @@ class UdemyCourseIE(UdemyIE):
        for entry in response['results']:
            clazz = entry.get('_class')
            if clazz == 'lecture':
                asset = entry.get('asset')
                if isinstance(asset, dict):
                    asset_type = asset.get('asset_type') or asset.get('assetType')
                    if asset_type != 'Video':
                        continue
                lecture_id = entry.get('id')
                if lecture_id:
                    entry = {
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@ -270,7 +270,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                         ))
                         |(?:
                            youtu\.be|                                        # just youtu.be/xxxx
-                            vid\.plus                                         # or vid.plus/xxxx
+                            vid\.plus|                                        # or vid.plus/xxxx
                            zwearz\.com/watch|                                # or zwearz.com/watch/xxxx
                         )/
                         |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
                         )
@ -758,6 +759,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
            'url': 'http://vid.plus/FlRa-iH7PGw',
            'only_matching': True,
        },
        {
            'url': 'http://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
            'only_matching': True,
        },
        {
            # Title with JS-like syntax "};" (see https://github.com/rg3/youtube-dl/issues/7468)
            # Also tests cut-off URL expansion in video description (see
--- a/youtube_dl/postprocessor/ffmpeg.py
+++ b/youtube_dl/postprocessor/ffmpeg.py
@ -536,7 +536,7 @@ class FFmpegSubtitlesConvertorPP(FFmpegPostProcessor):
            sub_filenames.append(old_file)
            new_file = subtitles_filename(filename, lang, new_ext)
-            if ext == 'dfxp' or ext == 'ttml':
+            if ext == 'dfxp' or ext == 'ttml' or ext == 'tt':
                self._downloader.report_warning(
                    'You have requested to convert dfxp (TTML) subtitles into another format, '
                    'which results in style information loss')