Merge branch 'master' of https://github.com/rg3/youtube-dl

2016-07-29 08:47:46 +02:00 · 2016-07-29 08:47:46 +02:00 · 8ddff4c81a
commit 8ddff4c81a
parent e440dae716 0cacae2807
40 changed files with 724 additions and 552 deletions
--- a/.github/ISSUE_TEMPLATE.md
+++ b/.github/ISSUE_TEMPLATE.md
@ -6,8 +6,8 @@
 ---
-### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.07.17*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
+### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.07.28*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.07.17**
+- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.07.28**
 ### Before submitting an *issue* make sure you have:
 - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
@ -35,7 +35,7 @@ $ youtube-dl -v <your command line>
 [debug] User config: []
 [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
 [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
-[debug] youtube-dl version 2016.07.17
+[debug] youtube-dl version 2016.07.28
 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
 [debug] Proxy map: {}
--- a/1
+++ b/1
@ -178,3 +178,4 @@ Artur Krysiak
 Jakub Adam Wieczorek
 Aleksandar Topuzović
 Nehal Patel
 Rob van Bekkum
--- a/devscripts/show-downloads-statistics.py
+++ b/devscripts/show-downloads-statistics.py
@ -1,6 +1,7 @@
 #!/usr/bin/env python
 from __future__ import unicode_literals
 import itertools
 import json
 import os
 import re
@ -21,21 +22,26 @@ def format_size(bytes):
 total_bytes = 0
-releases = json.loads(compat_urllib_request.urlopen(
+for page in itertools.count(1):
-    'https://api.github.com/repos/rg3/youtube-dl/releases').read().decode('utf-8'))
+    releases = json.loads(compat_urllib_request.urlopen(
        'https://api.github.com/repos/rg3/youtube-dl/releases?page=%s' % page
    ).read().decode('utf-8'))
-for release in releases:
+    if not releases:
-    compat_print(release['name'])
+        break
-    for asset in release['assets']:
+
-        asset_name = asset['name']
+    for release in releases:
-        total_bytes += asset['download_count'] * asset['size']
+        compat_print(release['name'])
-        if all(not re.match(p, asset_name) for p in (
+        for asset in release['assets']:
-                r'^youtube-dl$',
+            asset_name = asset['name']
-                r'^youtube-dl-\d{4}\.\d{2}\.\d{2}(?:\.\d+)?\.tar\.gz$',
+            total_bytes += asset['download_count'] * asset['size']
-                r'^youtube-dl\.exe$')):
+            if all(not re.match(p, asset_name) for p in (
-            continue
+                    r'^youtube-dl$',
-        compat_print(
+                    r'^youtube-dl-\d{4}\.\d{2}\.\d{2}(?:\.\d+)?\.tar\.gz$',
-            ' %s size: %s downloads: %d'
+                    r'^youtube-dl\.exe$')):
-            % (asset_name, format_size(asset['size']), asset['download_count']))
+                continue
            compat_print(
                ' %s size: %s downloads: %d'
                % (asset_name, format_size(asset['size']), asset['download_count']))
 compat_print('total downloads traffic: %s' % format_size(total_bytes))
--- a/docs/supportedsites.md
+++ b/docs/supportedsites.md
@ -46,6 +46,7 @@
 - **archive.org**: archive.org videos
 - **ARD**
 - **ARD:mediathek**
 - **Arkena**
 - **arte.tv**
 - **arte.tv:+7**
 - **arte.tv:cinema**
@ -141,7 +142,7 @@
 - **CollegeRama**
 - **ComCarCoff**
 - **ComedyCentral**
- - **ComedyCentralShows**: The Daily Show / The Colbert Report
+ - **ComedyCentralTV**
 - **CondeNast**: Condé Nast media group: Allure, Architectural Digest, Ars Technica, Bon Appétit, Brides, Condé Nast, Condé Nast Traveler, Details, Epicurious, GQ, Glamour, Golf Digest, SELF, Teen Vogue, The New Yorker, Vanity Fair, Vogue, W Magazine, WIRED
 - **Coub**
 - **Cracked**
@ -336,6 +337,8 @@
 - **kuwo:song**: 酷我音乐
 - **la7.it**
 - **Laola1Tv**
 - **Lcp**
 - **LcpPlay**
 - **Le**: 乐视网
 - **Learnr**
 - **Lecture2Go**
@ -397,7 +400,6 @@
 - **MSN**
 - **MTV**
 - **mtv.de**
 - **mtviggy.com**
 - **mtvservices:embedded**
 - **MuenchenTV**: münchen.tv
 - **MusicPlayOn**
@ -437,7 +439,6 @@
 - **Newstube**
 - **NextMedia**: 蘋果日報
 - **NextMediaActionNews**: 蘋果日報 - 動新聞
 - **nextmovie.com**
 - **nfb**: National Film Board of Canada
 - **nfl.com**
 - **nhl.com**
@ -477,6 +478,7 @@
 - **NYTimes**
 - **NYTimesArticle**
 - **ocw.mit.edu**
 - **OdaTV**
 - **Odnoklassniki**
 - **OktoberfestTV**
 - **on.aol.com**
@ -694,6 +696,7 @@
 - **TNAFlix**
 - **TNAFlixNetworkEmbed**
 - **toggle**
 - **Tosh**: Tosh.0
 - **tou.tv**
 - **Toypics**: Toypics user profile
 - **ToypicsUser**: Toypics user profile
--- a/test/test_all_urls.py
+++ b/test/test_all_urls.py
@ -101,8 +101,6 @@ class TestAllURLsMatching(unittest.TestCase):
        self.assertMatch(':ytsubs', ['youtube:subscriptions'])
        self.assertMatch(':ytsubscriptions', ['youtube:subscriptions'])
        self.assertMatch(':ythistory', ['youtube:history'])
        self.assertMatch(':thedailyshow', ['ComedyCentralShows'])
        self.assertMatch(':tds', ['ComedyCentralShows'])
    def test_vimeo_matching(self):
        self.assertMatch('https://vimeo.com/channels/tributes', ['vimeo:channel'])
--- a/youtube_dl/extractor/ard.py
+++ b/youtube_dl/extractor/ard.py
@ -73,6 +73,7 @@ class ARDMediathekIE(InfoExtractor):
            'description': 'md5:c0c1c8048514deaed2a73b3a60eecacb',
            'duration': 3287,
        },
        'skip': 'Video is no longer available',
    }]
    def _extract_media_info(self, media_info_url, webpage, video_id):
--- a/youtube_dl/extractor/arkena.py
+++ b/youtube_dl/extractor/arkena.py
@ -0,0 +1,115 @@
 # coding: utf-8
 from __future__ import unicode_literals
 import re
 from .common import InfoExtractor
 from ..utils import (
    determine_ext,
    float_or_none,
    int_or_none,
    mimetype2ext,
    parse_iso8601,
    strip_jsonp,
 )
 class ArkenaIE(InfoExtractor):
    _VALID_URL = r'https?://play\.arkena\.com/(?:config|embed)/avp/v\d/player/media/(?P<id>[^/]+)/[^/]+/(?P<account_id>\d+)'
    _TESTS = [{
        'url': 'https://play.arkena.com/embed/avp/v2/player/media/b41dda37-d8e7-4d3f-b1b5-9a9db578bdfe/1/129411',
        'md5': 'b96f2f71b359a8ecd05ce4e1daa72365',
        'info_dict': {
            'id': 'b41dda37-d8e7-4d3f-b1b5-9a9db578bdfe',
            'ext': 'mp4',
            'title': 'Big Buck Bunny',
            'description': 'Royalty free test video',
            'timestamp': 1432816365,
            'upload_date': '20150528',
            'is_live': False,
        },
    }, {
        'url': 'https://play.arkena.com/config/avp/v2/player/media/b41dda37-d8e7-4d3f-b1b5-9a9db578bdfe/1/129411/?callbackMethod=jQuery1111023664739129262213_1469227693893',
        'only_matching': True,
    }, {
        'url': 'http://play.arkena.com/config/avp/v1/player/media/327336/darkmatter/131064/?callbackMethod=jQuery1111002221189684892677_1469227595972',
        'only_matching': True,
    }, {
        'url': 'http://play.arkena.com/embed/avp/v1/player/media/327336/darkmatter/131064/',
        'only_matching': True,
    }]
    @staticmethod
    def _extract_url(webpage):
        # See https://support.arkena.com/display/PLAY/Ways+to+embed+your+video
        mobj = re.search(
            r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//play\.arkena\.com/embed/avp/.+?)\1',
            webpage)
        if mobj:
            return mobj.group('url')
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group('id')
        account_id = mobj.group('account_id')
        playlist = self._download_json(
            'https://play.arkena.com/config/avp/v2/player/media/%s/0/%s/?callbackMethod=_'
            % (video_id, account_id),
            video_id, transform_source=strip_jsonp)['Playlist'][0]
        media_info = playlist['MediaInfo']
        title = media_info['Title']
        media_files = playlist['MediaFiles']
        is_live = False
        formats = []
        for kind_case, kind_formats in media_files.items():
            kind = kind_case.lower()
            for f in kind_formats:
                f_url = f.get('Url')
                if not f_url:
                    continue
                is_live = f.get('Live') == 'true'
                exts = (mimetype2ext(f.get('Type')), determine_ext(f_url, None))
                if kind == 'm3u8' or 'm3u8' in exts:
                    formats.extend(self._extract_m3u8_formats(
                        f_url, video_id, 'mp4',
                        entry_protocol='m3u8' if is_live else 'm3u8_native',
                        m3u8_id=kind, fatal=False, live=is_live))
                elif kind == 'flash' or 'f4m' in exts:
                    formats.extend(self._extract_f4m_formats(
                        f_url, video_id, f4m_id=kind, fatal=False))
                elif kind == 'dash' or 'mpd' in exts:
                    formats.extend(self._extract_mpd_formats(
                        f_url, video_id, mpd_id=kind, fatal=False))
                elif kind == 'silverlight':
                    # TODO: process when ism is supported (see
                    # https://github.com/rg3/youtube-dl/issues/8118)
                    continue
                else:
                    tbr = float_or_none(f.get('Bitrate'), 1000)
                    formats.append({
                        'url': f_url,
                        'format_id': '%s-%d' % (kind, tbr) if tbr else kind,
                        'tbr': tbr,
                    })
        self._sort_formats(formats)
        description = media_info.get('Description')
        video_id = media_info.get('VideoId') or video_id
        timestamp = parse_iso8601(media_info.get('PublishDate'))
        thumbnails = [{
            'url': thumbnail['Url'],
            'width': int_or_none(thumbnail.get('Size')),
        } for thumbnail in (media_info.get('Poster') or []) if thumbnail.get('Url')]
        return {
            'id': video_id,
            'title': title,
            'description': description,
            'timestamp': timestamp,
            'is_live': is_live,
            'thumbnails': thumbnails,
            'formats': formats,
        }
--- a/youtube_dl/extractor/bigflix.py
+++ b/youtube_dl/extractor/bigflix.py
@ -12,7 +12,7 @@ class BigflixIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?bigflix\.com/.+/(?P<id>[0-9]+)'
    _TESTS = [{
        'url': 'http://www.bigflix.com/Hindi-movies/Action-movies/Singham-Returns/16537',
-        'md5': 'ec76aa9b1129e2e5b301a474e54fab74',
+        'md5': 'dc1b4aebb46e3a7077ecc0d9f43f61e3',
        'info_dict': {
            'id': '16537',
            'ext': 'mp4',
@ -26,7 +26,7 @@ class BigflixIE(InfoExtractor):
            'id': '16070',
            'ext': 'mp4',
            'title': 'Madarasapatinam',
-            'description': 'md5:63b9b8ed79189c6f0418c26d9a3452ca',
+            'description': 'md5:9f0470b26a4ba8e824c823b5d95c2f6b',
            'formats': 'mincount:2',
        },
        'params': {
--- a/youtube_dl/extractor/camdemy.py
+++ b/youtube_dl/extractor/camdemy.py
@ -1,7 +1,6 @@
 # coding: utf-8
 from __future__ import unicode_literals
 import datetime
 import re
 from .common import InfoExtractor
@ -10,8 +9,10 @@ from ..compat import (
    compat_urlparse,
 )
 from ..utils import (
-    parse_iso8601,
+    clean_html,
    parse_duration,
    str_to_int,
    unified_strdate,
 )
@ -26,14 +27,14 @@ class CamdemyIE(InfoExtractor):
            'ext': 'mp4',
            'title': 'Ch1-1 Introduction, Signals (02-23-2012)',
            'thumbnail': 're:^https?://.*\.jpg$',
            'description': '',
            'creator': 'ss11spring',
            'duration': 1591,
            'upload_date': '20130114',
            'timestamp': 1358154556,
            'view_count': int,
        }
    }, {
        # With non-empty description
        # webpage returns "No permission or not login"
        'url': 'http://www.camdemy.com/media/13885',
        'md5': '4576a3bb2581f86c61044822adbd1249',
        'info_dict': {
@ -41,64 +42,71 @@ class CamdemyIE(InfoExtractor):
            'ext': 'mp4',
            'title': 'EverCam + Camdemy QuickStart',
            'thumbnail': 're:^https?://.*\.jpg$',
-            'description': 'md5:050b62f71ed62928f8a35f1a41e186c9',
+            'description': 'md5:2a9f989c2b153a2342acee579c6e7db6',
            'creator': 'evercam',
-            'upload_date': '20140620',
+            'duration': 318,
            'timestamp': 1403271569,
        }
    }, {
-        # External source
+        # External source (YouTube)
        'url': 'http://www.camdemy.com/media/14842',
        'md5': '50e1c3c3aa233d3d7b7daa2fa10b1cf7',
        'info_dict': {
            'id': '2vsYQzNIsJo',
            'ext': 'mp4',
            'title': 'Excel 2013 Tutorial - How to add Password Protection',
            'description': 'Excel 2013 Tutorial for Beginners - How to add Password Protection',
            'upload_date': '20130211',
            'uploader': 'Hun Kim',
            'description': 'Excel 2013 Tutorial for Beginners - How to add Password Protection',
            'uploader_id': 'hunkimtutorials',
-            'title': 'Excel 2013 Tutorial - How to add Password Protection',
+        },
-        }
+        'params': {
            'skip_download': True,
        },
    }]
    def _real_extract(self, url):
        video_id = self._match_id(url)
-        page = self._download_webpage(url, video_id)
+
        webpage = self._download_webpage(url, video_id)
        src_from = self._html_search_regex(
-            r"<div class='srcFrom'>Source: <a title='([^']+)'", page,
+            r"class=['\"]srcFrom['\"][^>]*>Sources?(?:\s+from)?\s*:\s*<a[^>]+(?:href|title)=(['\"])(?P<url>(?:(?!\1).)+)\1",
-            'external source', default=None)
+            webpage, 'external source', default=None, group='url')
        if src_from:
            return self.url_result(src_from)
        oembed_obj = self._download_json(
            'http://www.camdemy.com/oembed/?format=json&url=' + url, video_id)
        title = oembed_obj['title']
        thumb_url = oembed_obj['thumbnail_url']
        video_folder = compat_urlparse.urljoin(thumb_url, 'video/')
        file_list_doc = self._download_xml(
            compat_urlparse.urljoin(video_folder, 'fileList.xml'),
-            video_id, 'Filelist XML')
+            video_id, 'Downloading filelist XML')
        file_name = file_list_doc.find('./video/item/fileName').text
        video_url = compat_urlparse.urljoin(video_folder, file_name)
-        timestamp = parse_iso8601(self._html_search_regex(
+        # Some URLs return "No permission or not login" in a webpage despite being
-            r"<div class='title'>Posted\s*:</div>\s*<div class='value'>([^<>]+)<",
+        # freely available via oembed JSON URL (e.g. http://www.camdemy.com/media/13885)
-            page, 'creation time', fatal=False),
+        upload_date = unified_strdate(self._search_regex(
-            delimiter=' ', timezone=datetime.timedelta(hours=8))
+            r'>published on ([^<]+)<', webpage,
-        view_count = str_to_int(self._html_search_regex(
+            'upload date', default=None))
-            r"<div class='title'>Views\s*:</div>\s*<div class='value'>([^<>]+)<",
+        view_count = str_to_int(self._search_regex(
-            page, 'view count', fatal=False))
+            r'role=["\']viewCnt["\'][^>]*>([\d,.]+) views',
            webpage, 'view count', default=None))
        description = self._html_search_meta(
            'description', webpage, default=None) or clean_html(
            oembed_obj.get('description'))
        return {
            'id': video_id,
            'url': video_url,
-            'title': oembed_obj['title'],
+            'title': title,
            'thumbnail': thumb_url,
-            'description': self._html_search_meta('description', page),
+            'description': description,
-            'creator': oembed_obj['author_name'],
+            'creator': oembed_obj.get('author_name'),
-            'duration': oembed_obj['duration'],
+            'duration': parse_duration(oembed_obj.get('duration')),
-            'timestamp': timestamp,
+            'upload_date': upload_date,
            'view_count': view_count,
        }
--- a/youtube_dl/extractor/cbc.py
+++ b/youtube_dl/extractor/cbc.py
@ -4,9 +4,11 @@ from __future__ import unicode_literals
 import re
 from .common import InfoExtractor
 from ..compat import compat_str
 from ..utils import (
    js_to_json,
    smuggle_url,
    try_get,
 )
@ -27,7 +29,20 @@ class CBCIE(InfoExtractor):
        },
        'skip': 'Geo-restricted to Canada',
    }, {
-        # with clipId
+        # with clipId, feed available via tpfeed.cbc.ca and feed.theplatform.com
        'url': 'http://www.cbc.ca/22minutes/videos/22-minutes-update/22-minutes-update-episode-4',
        'md5': '162adfa070274b144f4fdc3c3b8207db',
        'info_dict': {
            'id': '2414435309',
            'ext': 'mp4',
            'title': '22 Minutes Update: What Not To Wear Quebec',
            'description': "This week's latest Canadian top political story is What Not To Wear Quebec.",
            'upload_date': '20131025',
            'uploader': 'CBCC-NEW',
            'timestamp': 1382717907,
        },
    }, {
        # with clipId, feed only available via tpfeed.cbc.ca
        'url': 'http://www.cbc.ca/archives/entry/1978-robin-williams-freestyles-on-90-minutes-live',
        'md5': '0274a90b51a9b4971fe005c63f592f12',
        'info_dict': {
@ -83,9 +98,15 @@ class CBCIE(InfoExtractor):
            media_id = player_info.get('mediaId')
            if not media_id:
                clip_id = player_info['clipId']
-                media_id = self._download_json(
+                feed = self._download_json(
-                    'http://feed.theplatform.com/f/h9dtGB/punlNGjMlc1F?fields=id&byContent=byReleases%3DbyId%253D' + clip_id,
+                    'http://tpfeed.cbc.ca/f/ExhSPC/vms_5akSXx4Ng_Zn?byCustomValue={:mpsReleases}{%s}' % clip_id,
-                    clip_id)['entries'][0]['id'].split('/')[-1]
+                    clip_id, fatal=False)
                if feed:
                    media_id = try_get(feed, lambda x: x['entries'][0]['guid'], compat_str)
                if not media_id:
                    media_id = self._download_json(
                        'http://feed.theplatform.com/f/h9dtGB/punlNGjMlc1F?fields=id&byContent=byReleases%3DbyId%253D' + clip_id,
                        clip_id)['entries'][0]['id'].split('/')[-1]
            return self.url_result('cbcplayer:%s' % media_id, 'CBCPlayer', media_id)
        else:
            entries = [self.url_result('cbcplayer:%s' % media_id, 'CBCPlayer', media_id) for media_id in re.findall(r'<iframe[^>]+src="[^"]+?mediaId=(\d+)"', webpage)]
--- a/youtube_dl/extractor/cmt.py
+++ b/youtube_dl/extractor/cmt.py
@ -1,5 +1,7 @@
 from __future__ import unicode_literals
 from .mtv import MTVIE
 from ..utils import ExtractorError
 class CMTIE(MTVIE):
@ -16,7 +18,27 @@ class CMTIE(MTVIE):
            'title': 'Garth Brooks - "The Call (featuring Trisha Yearwood)"',
            'description': 'Blame It All On My Roots',
        },
        'skip': 'Video not available',
    }, {
        'url': 'http://www.cmt.com/videos/misc/1504699/still-the-king-ep-109-in-3-minutes.jhtml#id=1739908',
        'md5': 'e61a801ca4a183a466c08bd98dccbb1c',
        'info_dict': {
            'id': '1504699',
            'ext': 'mp4',
            'title': 'Still The King Ep. 109 in 3 Minutes',
            'description': 'Relive or catch up with Still The King by watching this recap of season 1, episode 9. New episodes Sundays 9/8c.',
            'timestamp': 1469421000.0,
            'upload_date': '20160725',
        },
    }, {
        'url': 'http://www.cmt.com/shows/party-down-south/party-down-south-ep-407-gone-girl/1738172/playlist/#id=1738172',
        'only_matching': True,
    }]
    @classmethod
    def _transform_rtmp_url(cls, rtmp_video_url):
        if 'error_not_available.swf' in rtmp_video_url:
            raise ExtractorError(
                '%s said: video is not available' % cls.IE_NAME, expected=True)
        return super(CMTIE, cls)._transform_rtmp_url(rtmp_video_url)
--- a/youtube_dl/extractor/comedycentral.py
+++ b/youtube_dl/extractor/comedycentral.py
@ -1,17 +1,6 @@
 from __future__ import unicode_literals
 import re
 from .mtv import MTVServicesInfoExtractor
 from ..compat import (
    compat_str,
    compat_urllib_parse_urlencode,
 )
 from ..utils import (
    ExtractorError,
    float_or_none,
    unified_strdate,
 )
 class ComedyCentralIE(MTVServicesInfoExtractor):
@ -26,8 +15,10 @@ class ComedyCentralIE(MTVServicesInfoExtractor):
        'info_dict': {
            'id': 'cef0cbb3-e776-4bc9-b62e-8016deccb354',
            'ext': 'mp4',
-            'title': 'CC:Stand-Up|Greg Fitzsimmons: Life on Stage|Uncensored - Too Good of a Mother',
+            'title': 'CC:Stand-Up|August 18, 2013|1|0101|Uncensored - Too Good of a Mother',
            'description': 'After a certain point, breastfeeding becomes c**kblocking.',
            'timestamp': 1376798400,
            'upload_date': '20130818',
        },
    }, {
        'url': 'http://www.cc.com/shows/the-daily-show-with-trevor-noah/interviews/6yx39d/exclusive-rand-paul-extended-interview',
@ -35,244 +26,43 @@ class ComedyCentralIE(MTVServicesInfoExtractor):
    }]
-class ComedyCentralShowsIE(MTVServicesInfoExtractor):
+class ToshIE(MTVServicesInfoExtractor):
-    IE_DESC = 'The Daily Show / The Colbert Report'
+    IE_DESC = 'Tosh.0'
-    # urls can be abbreviations like :thedailyshow
+    _VALID_URL = r'^https?://tosh\.cc\.com/video-(?:clips|collections)/[^/]+/(?P<videotitle>[^/?#]+)'
-    # urls for episodes like:
+    _FEED_URL = 'http://tosh.cc.com/feeds/mrss'
-    # or urls for clips like: http://www.thedailyshow.com/watch/mon-december-10-2012/any-given-gun-day
+
    #                     or: http://www.colbertnation.com/the-colbert-report-videos/421667/november-29-2012/moon-shattering-news
    #                     or: http://www.colbertnation.com/the-colbert-report-collections/422008/festival-of-lights/79524
    _VALID_URL = r'''(?x)^(:(?P<shortname>tds|thedailyshow)
                      |https?://(:www\.)?
                          (?P<showname>thedailyshow|thecolbertreport|tosh)\.(?:cc\.)?com/
                         ((?:full-)?episodes/(?:[0-9a-z]{6}/)?(?P<episode>.*)|
                          (?P<clip>
                              (?:(?:guests/[^/]+|videos|video-(?:clips|playlists)|special-editions|news-team/[^/]+)/[^/]+/(?P<videotitle>[^/?#]+))
                              |(the-colbert-report-(videos|collections)/(?P<clipID>[0-9]+)/[^/]*/(?P<cntitle>.*?))
                              |(watch/(?P<date>[^/]*)/(?P<tdstitle>.*))
                          )|
                          (?P<interview>
                              extended-interviews/(?P<interID>[0-9a-z]+)/
                              (?:playlist_tds_extended_)?(?P<interview_title>[^/?#]*?)
                              (?:/[^/?#]?|[?#]|$))))
                     '''
    _TESTS = [{
        'url': 'http://thedailyshow.cc.com/watch/thu-december-13-2012/kristen-stewart',
        'md5': '4e2f5cb088a83cd8cdb7756132f9739d',
        'info_dict': {
            'id': 'ab9ab3e7-5a98-4dbe-8b21-551dc0523d55',
            'ext': 'mp4',
            'upload_date': '20121213',
            'description': 'Kristen Stewart learns to let loose in "On the Road."',
            'uploader': 'thedailyshow',
            'title': 'thedailyshow kristen-stewart part 1',
        }
    }, {
        'url': 'http://thedailyshow.cc.com/extended-interviews/b6364d/sarah-chayes-extended-interview',
        'info_dict': {
            'id': 'sarah-chayes-extended-interview',
            'description': 'Carnegie Endowment Senior Associate Sarah Chayes discusses how corrupt institutions function throughout the world in her book "Thieves of State: Why Corruption Threatens Global Security."',
            'title': 'thedailyshow Sarah Chayes Extended Interview',
        },
        'playlist': [
            {
                'info_dict': {
                    'id': '0baad492-cbec-4ec1-9e50-ad91c291127f',
                    'ext': 'mp4',
                    'upload_date': '20150129',
                    'description': 'Carnegie Endowment Senior Associate Sarah Chayes discusses how corrupt institutions function throughout the world in her book "Thieves of State: Why Corruption Threatens Global Security."',
                    'uploader': 'thedailyshow',
                    'title': 'thedailyshow sarah-chayes-extended-interview part 1',
                },
            },
            {
                'info_dict': {
                    'id': '1e4fb91b-8ce7-4277-bd7c-98c9f1bbd283',
                    'ext': 'mp4',
                    'upload_date': '20150129',
                    'description': 'Carnegie Endowment Senior Associate Sarah Chayes discusses how corrupt institutions function throughout the world in her book "Thieves of State: Why Corruption Threatens Global Security."',
                    'uploader': 'thedailyshow',
                    'title': 'thedailyshow sarah-chayes-extended-interview part 2',
                },
            },
        ],
        'params': {
            'skip_download': True,
        },
    }, {
        'url': 'http://thedailyshow.cc.com/extended-interviews/xm3fnq/andrew-napolitano-extended-interview',
        'only_matching': True,
    }, {
        'url': 'http://thecolbertreport.cc.com/videos/29w6fx/-realhumanpraise-for-fox-news',
        'only_matching': True,
    }, {
        'url': 'http://thecolbertreport.cc.com/videos/gh6urb/neil-degrasse-tyson-pt--1?xrs=eml_col_031114',
        'only_matching': True,
    }, {
        'url': 'http://thedailyshow.cc.com/guests/michael-lewis/3efna8/exclusive---michael-lewis-extended-interview-pt--3',
        'only_matching': True,
    }, {
        'url': 'http://thedailyshow.cc.com/episodes/sy7yv0/april-8--2014---denis-leary',
        'only_matching': True,
    }, {
        'url': 'http://thecolbertreport.cc.com/episodes/8ase07/april-8--2014---jane-goodall',
        'only_matching': True,
    }, {
        'url': 'http://thedailyshow.cc.com/video-playlists/npde3s/the-daily-show-19088-highlights',
        'only_matching': True,
    }, {
        'url': 'http://thedailyshow.cc.com/video-playlists/t6d9sg/the-daily-show-20038-highlights/be3cwo',
        'only_matching': True,
    }, {
        'url': 'http://thedailyshow.cc.com/special-editions/2l8fdb/special-edition---a-look-back-at-food',
        'only_matching': True,
    }, {
        'url': 'http://thedailyshow.cc.com/news-team/michael-che/7wnfel/we-need-to-talk-about-israel',
        'only_matching': True,
    }, {
        'url': 'http://tosh.cc.com/video-clips/68g93d/twitter-users-share-summer-plans',
        'info_dict': {
            'description': 'Tosh asked fans to share their summer plans.',
            'title': 'Twitter Users Share Summer Plans',
        },
        'playlist': [{
            'md5': 'f269e88114c1805bb6d7653fecea9e06',
            'info_dict': {
                'id': '90498ec2-ed00-11e0-aca6-0026b9414f30',
                'ext': 'mp4',
                'title': 'Tosh.0|June 9, 2077|2|211|Twitter Users Share Summer Plans',
                'description': 'Tosh asked fans to share their summer plans.',
                'thumbnail': 're:^https?://.*\.jpg',
                # It's really reported to be published on year 2077
                'upload_date': '20770610',
                'timestamp': 3390510600,
                'subtitles': {
                    'en': 'mincount:3',
                },
            },
        }]
    }, {
        'url': 'http://tosh.cc.com/video-collections/x2iz7k/just-plain-foul/m5q4fp',
        'only_matching': True,
    }]
-    _available_formats = ['3500', '2200', '1700', '1200', '750', '400']
+    @classmethod
-
+    def _transform_rtmp_url(cls, rtmp_video_url):
-    _video_extensions = {
+        new_urls = super(ToshIE, cls)._transform_rtmp_url(rtmp_video_url)
-        '3500': 'mp4',
+        new_urls['rtmp'] = rtmp_video_url.replace('viacomccstrm', 'viacommtvstrm')
-        '2200': 'mp4',
+        return new_urls
        '1700': 'mp4',
        '1200': 'mp4',
        '750': 'mp4',
        '400': 'mp4',
    }
    _video_dimensions = {
        '3500': (1280, 720),
        '2200': (960, 540),
        '1700': (768, 432),
        '1200': (640, 360),
        '750': (512, 288),
        '400': (384, 216),
    }
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        if mobj.group('shortname'):
            return self.url_result('http://www.cc.com/shows/the-daily-show-with-trevor-noah/full-episodes')
        if mobj.group('clip'):
            if mobj.group('videotitle'):
                epTitle = mobj.group('videotitle')
            elif mobj.group('showname') == 'thedailyshow':
                epTitle = mobj.group('tdstitle')
            else:
                epTitle = mobj.group('cntitle')
            dlNewest = False
        elif mobj.group('interview'):
            epTitle = mobj.group('interview_title')
            dlNewest = False
        else:
            dlNewest = not mobj.group('episode')
            if dlNewest:
                epTitle = mobj.group('showname')
            else:
                epTitle = mobj.group('episode')
        show_name = mobj.group('showname')
        webpage, htmlHandle = self._download_webpage_handle(url, epTitle)
        if dlNewest:
            url = htmlHandle.geturl()
            mobj = re.match(self._VALID_URL, url, re.VERBOSE)
            if mobj is None:
                raise ExtractorError('Invalid redirected URL: ' + url)
            if mobj.group('episode') == '':
                raise ExtractorError('Redirected URL is still not specific: ' + url)
            epTitle = (mobj.group('episode') or mobj.group('videotitle')).rpartition('/')[-1]
        mMovieParams = re.findall('(?:<param name="movie" value="|var url = ")(http://media.mtvnservices.com/([^"]*(?:episode|video).*?:.*?))"', webpage)
        if len(mMovieParams) == 0:
            # The Colbert Report embeds the information in a without
            # a URL prefix; so extract the alternate reference
            # and then add the URL prefix manually.
            altMovieParams = re.findall('data-mgid="([^"]*(?:episode|video|playlist).*?:.*?)"', webpage)
            if len(altMovieParams) == 0:
                raise ExtractorError('unable to find Flash URL in webpage ' + url)
            else:
                mMovieParams = [('http://media.mtvnservices.com/' + altMovieParams[0], altMovieParams[0])]
        uri = mMovieParams[0][1]
        # Correct cc.com in uri
        uri = re.sub(r'(episode:[^.]+)(\.cc)?\.com', r'\1.com', uri)
        index_url = 'http://%s.cc.com/feeds/mrss?%s' % (show_name, compat_urllib_parse_urlencode({'uri': uri}))
        idoc = self._download_xml(
            index_url, epTitle,
            'Downloading show index', 'Unable to download episode index')
        title = idoc.find('./channel/title').text
        description = idoc.find('./channel/description').text
        entries = []
        item_els = idoc.findall('.//item')
        for part_num, itemEl in enumerate(item_els):
            upload_date = unified_strdate(itemEl.findall('./pubDate')[0].text)
            thumbnail = itemEl.find('.//{http://search.yahoo.com/mrss/}thumbnail').attrib.get('url')
            content = itemEl.find('.//{http://search.yahoo.com/mrss/}content')
            duration = float_or_none(content.attrib.get('duration'))
            mediagen_url = content.attrib['url']
            guid = itemEl.find('./guid').text.rpartition(':')[-1]
            cdoc = self._download_xml(
                mediagen_url, epTitle,
                'Downloading configuration for segment %d / %d' % (part_num + 1, len(item_els)))
            turls = []
            for rendition in cdoc.findall('.//rendition'):
                finfo = (rendition.attrib['bitrate'], rendition.findall('./src')[0].text)
                turls.append(finfo)
            formats = []
            for format, rtmp_video_url in turls:
                w, h = self._video_dimensions.get(format, (None, None))
                formats.append({
                    'format_id': 'vhttp-%s' % format,
                    'url': self._transform_rtmp_url(rtmp_video_url),
                    'ext': self._video_extensions.get(format, 'mp4'),
                    'height': h,
                    'width': w,
                })
                formats.append({
                    'format_id': 'rtmp-%s' % format,
                    'url': rtmp_video_url.replace('viacomccstrm', 'viacommtvstrm'),
                    'ext': self._video_extensions.get(format, 'mp4'),
                    'height': h,
                    'width': w,
                })
                self._sort_formats(formats)
            subtitles = self._extract_subtitles(cdoc, guid)
            virtual_id = show_name + ' ' + epTitle + ' part ' + compat_str(part_num + 1)
            entries.append({
                'id': guid,
                'title': virtual_id,
                'formats': formats,
                'uploader': show_name,
                'upload_date': upload_date,
                'duration': duration,
                'thumbnail': thumbnail,
                'description': description,
                'subtitles': subtitles,
            })
        return {
            '_type': 'playlist',
            'id': epTitle,
            'entries': entries,
            'title': show_name + ' ' + title,
            'description': description,
        }
 class ComedyCentralTVIE(MTVServicesInfoExtractor):
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@ -1481,6 +1481,13 @@ class InfoExtractor(object):
            compat_etree_fromstring(mpd.encode('utf-8')), mpd_id, mpd_base_url, formats_dict=formats_dict)
    def _parse_mpd_formats(self, mpd_doc, mpd_id=None, mpd_base_url='', formats_dict={}):
        """
        Parse formats from MPD manifest.
        References:
         1. MPEG-DASH Standard, ISO/IEC 23009-1:2014(E),
            http://standards.iso.org/ittf/PubliclyAvailableStandards/c065274_ISO_IEC_23009-1_2014.zip
         2. https://en.wikipedia.org/wiki/Dynamic_Adaptive_Streaming_over_HTTP
        """
        if mpd_doc.get('type') == 'dynamic':
            return []
@ -1513,8 +1520,16 @@ class InfoExtractor(object):
                        s_e = segment_timeline.findall(_add_ns('S'))
                        if s_e:
                            ms_info['total_number'] = 0
                            ms_info['s'] = []
                            for s in s_e:
-                                ms_info['total_number'] += 1 + int(s.get('r', '0'))
+                                r = int(s.get('r', 0))
                                ms_info['total_number'] += 1 + r
                                ms_info['s'].append({
                                    't': int(s.get('t', 0)),
                                    # @d is mandatory (see [1, 5.3.9.6.2, Table 17, page 60])
                                    'd': int(s.attrib['d']),
                                    'r': r,
                                })
                    else:
                        timescale = segment_template.get('timescale')
                        if timescale:
@ -1551,7 +1566,7 @@ class InfoExtractor(object):
                        continue
                    representation_attrib = adaptation_set.attrib.copy()
                    representation_attrib.update(representation.attrib)
-                    # According to page 41 of ISO/IEC 29001-1:2014, @mimeType is mandatory
+                    # According to [1, 5.3.7.2, Table 9, page 41], @mimeType is mandatory
                    mime_type = representation_attrib['mimeType']
                    content_type = mime_type.split('/')[0]
                    if content_type == 'text':
@ -1595,16 +1610,40 @@ class InfoExtractor(object):
                                representation_ms_info['total_number'] = int(math.ceil(float(period_duration) / segment_duration))
                            media_template = representation_ms_info['media_template']
                            media_template = media_template.replace('$RepresentationID$', representation_id)
-                            media_template = re.sub(r'\$(Number|Bandwidth)\$', r'%(\1)d', media_template)
+                            media_template = re.sub(r'\$(Number|Bandwidth|Time)\$', r'%(\1)d', media_template)
-                            media_template = re.sub(r'\$(Number|Bandwidth)%([^$]+)\$', r'%(\1)\2', media_template)
+                            media_template = re.sub(r'\$(Number|Bandwidth|Time)%([^$]+)\$', r'%(\1)\2', media_template)
                            media_template.replace('$$', '$')
-                            representation_ms_info['segment_urls'] = [
+
-                                media_template % {
+                            # As per [1, 5.3.9.4.4, Table 16, page 55] $Number$ and $Time$
-                                    'Number': segment_number,
+                            # can't be used at the same time
-                                    'Bandwidth': representation_attrib.get('bandwidth')}
+                            if '%(Number' in media_template:
-                                for segment_number in range(
+                                representation_ms_info['segment_urls'] = [
-                                    representation_ms_info['start_number'],
+                                    media_template % {
-                                    representation_ms_info['total_number'] + representation_ms_info['start_number'])]
+                                        'Number': segment_number,
                                        'Bandwidth': representation_attrib.get('bandwidth'),
                                    }
                                    for segment_number in range(
                                        representation_ms_info['start_number'],
                                        representation_ms_info['total_number'] + representation_ms_info['start_number'])]
                            else:
                                representation_ms_info['segment_urls'] = []
                                segment_time = 0
                                def add_segment_url():
                                    representation_ms_info['segment_urls'].append(
                                        media_template % {
                                            'Time': segment_time,
                                            'Bandwidth': representation_attrib.get('bandwidth'),
                                        }
                                    )
                                for num, s in enumerate(representation_ms_info['s']):
                                    segment_time = s.get('t') or segment_time
                                    add_segment_url()
                                    for r in range(s.get('r', 0)):
                                        segment_time += s['d']
                                        add_segment_url()
                                    segment_time += s['d']
                        if 'segment_urls' in representation_ms_info:
                            f.update({
                                'segment_urls': representation_ms_info['segment_urls'],
@ -1747,7 +1786,7 @@ class InfoExtractor(object):
        any_restricted = False
        for tc in self.get_testcases(include_onlymatching=False):
-            if 'playlist' in tc:
+            if tc.get('playlist', []):
                tc = tc['playlist'][0]
            is_restricted = age_restricted(
                tc.get('info_dict', {}).get('age_limit'), age_limit)
--- a/youtube_dl/extractor/dailymail.py
+++ b/youtube_dl/extractor/dailymail.py
@ -5,19 +5,20 @@ from .common import InfoExtractor
 from ..utils import (
    int_or_none,
    determine_protocol,
    unescapeHTML,
 )
 class DailyMailIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?dailymail\.co\.uk/video/[^/]+/video-(?P<id>[0-9]+)'
    _TEST = {
-        'url': 'http://www.dailymail.co.uk/video/sciencetech/video-1288527/Turn-video-impressionist-masterpiece.html',
+        'url': 'http://www.dailymail.co.uk/video/tvshowbiz/video-1295863/The-Mountain-appears-sparkling-water-ad-Heavy-Bubbles.html',
-        'md5': '2f639d446394f53f3a33658b518b6615',
+        'md5': 'f6129624562251f628296c3a9ffde124',
        'info_dict': {
-            'id': '1288527',
+            'id': '1295863',
            'ext': 'mp4',
-            'title': 'Turn any video into an impressionist masterpiece',
+            'title': 'The Mountain appears in sparkling water ad for \'Heavy Bubbles\'',
-            'description': 'md5:88ddbcb504367987b2708bb38677c9d2',
+            'description': 'md5:a93d74b6da172dd5dc4d973e0b766a84',
        }
    }
@ -26,7 +27,7 @@ class DailyMailIE(InfoExtractor):
        webpage = self._download_webpage(url, video_id)
        video_data = self._parse_json(self._search_regex(
            r"data-opts='({.+?})'", webpage, 'video data'), video_id)
-        title = video_data['title']
+        title = unescapeHTML(video_data['title'])
        video_sources = self._download_json(video_data.get(
            'sources', {}).get('url') or 'http://www.dailymail.co.uk/api/player/%s/video-sources.json' % video_id, video_id)
@ -55,7 +56,7 @@ class DailyMailIE(InfoExtractor):
        return {
            'id': video_id,
            'title': title,
-            'description': video_data.get('descr'),
+            'description': unescapeHTML(video_data.get('descr')),
            'thumbnail': video_data.get('poster') or video_data.get('thumbnail'),
            'formats': formats,
        }
--- a/youtube_dl/extractor/dcn.py
+++ b/youtube_dl/extractor/dcn.py
@ -62,11 +62,9 @@ class DCNBaseIE(InfoExtractor):
                r'file\s*:\s*"https?(://[^"]+)/playlist.m3u8',
                r'<a[^>]+href="rtsp(://[^"]+)"'
            ], webpage, 'format url')
-        # TODO: Current DASH formats are broken - $Time$ pattern in
+        formats.extend(self._extract_mpd_formats(
-        # <SegmentTemplate> not implemented yet
+            format_url_base + '/manifest.mpd',
-        # formats.extend(self._extract_mpd_formats(
+            video_id, mpd_id='dash', fatal=False))
        #     format_url_base + '/manifest.mpd',
        #     video_id, mpd_id='dash', fatal=False))
        formats.extend(self._extract_m3u8_formats(
            format_url_base + '/playlist.m3u8', video_id, 'mp4',
            m3u8_entry_protocol, m3u8_id='hls', fatal=False))
--- a/youtube_dl/extractor/eporner.py
+++ b/youtube_dl/extractor/eporner.py
@ -4,19 +4,23 @@ from __future__ import unicode_literals
 import re
 from .common import InfoExtractor
 from ..compat import compat_str
 from ..utils import (
    encode_base_n,
    ExtractorError,
    int_or_none,
    parse_duration,
    str_to_int,
 )
 class EpornerIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?eporner\.com/hd-porn/(?P<id>\w+)/(?P<display_id>[\w-]+)'
+    _VALID_URL = r'https?://(?:www\.)?eporner\.com/hd-porn/(?P<id>\w+)(?:/(?P<display_id>[\w-]+))?'
    _TESTS = [{
        'url': 'http://www.eporner.com/hd-porn/95008/Infamous-Tiffany-Teen-Strip-Tease-Video/',
        'md5': '39d486f046212d8e1b911c52ab4691f8',
        'info_dict': {
-            'id': '95008',
+            'id': 'qlDUmNsj6VS',
            'display_id': 'Infamous-Tiffany-Teen-Strip-Tease-Video',
            'ext': 'mp4',
            'title': 'Infamous Tiffany Teen Strip Tease Video',
@ -28,34 +32,72 @@ class EpornerIE(InfoExtractor):
        # New (May 2016) URL layout
        'url': 'http://www.eporner.com/hd-porn/3YRUtzMcWn0/Star-Wars-XXX-Parody/',
        'only_matching': True,
    }, {
        'url': 'http://www.eporner.com/hd-porn/3YRUtzMcWn0',
        'only_matching': True,
    }]
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group('id')
-        display_id = mobj.group('display_id')
+        display_id = mobj.group('display_id') or video_id
-        webpage = self._download_webpage(url, display_id)
+        webpage, urlh = self._download_webpage_handle(url, display_id)
        title = self._html_search_regex(
            r'<title>(.*?) - EPORNER', webpage, 'title')
-        redirect_url = 'http://www.eporner.com/config5/%s' % video_id
+        video_id = self._match_id(compat_str(urlh.geturl()))
        player_code = self._download_webpage(
            redirect_url, display_id, note='Downloading player config')
-        sources = self._search_regex(
+        hash = self._search_regex(
-            r'(?s)sources\s*:\s*\[\s*({.+?})\s*\]', player_code, 'sources')
+            r'hash\s*:\s*["\']([\da-f]{32})', webpage, 'hash')
        title = self._og_search_title(webpage, default=None) or self._html_search_regex(
            r'<title>(.+?) - EPORNER', webpage, 'title')
        # Reverse engineered from vjs.js
        def calc_hash(s):
            return ''.join((encode_base_n(int(s[lb:lb + 8], 16), 36) for lb in range(0, 32, 8)))
        video = self._download_json(
            'http://www.eporner.com/xhr/video/%s' % video_id,
            display_id, note='Downloading video JSON',
            query={
                'hash': calc_hash(hash),
                'device': 'generic',
                'domain': 'www.eporner.com',
                'fallback': 'false',
            })
        if video.get('available') is False:
            raise ExtractorError(
                '%s said: %s' % (self.IE_NAME, video['message']), expected=True)
        sources = video['sources']
        formats = []
-        for video_url, format_id in re.findall(r'file\s*:\s*"([^"]+)",\s*label\s*:\s*"([^"]+)"', sources):
+        for kind, formats_dict in sources.items():
-            fmt = {
+            if not isinstance(formats_dict, dict):
-                'url': video_url,
+                continue
-                'format_id': format_id,
+            for format_id, format_dict in formats_dict.items():
-            }
+                if not isinstance(format_dict, dict):
-            m = re.search(r'^(\d+)', format_id)
+                    continue
-            if m:
+                src = format_dict.get('src')
-                fmt['height'] = int(m.group(1))
+                if not isinstance(src, compat_str) or not src.startswith('http'):
-            formats.append(fmt)
+                    continue
                if kind == 'hls':
                    formats.extend(self._extract_m3u8_formats(
                        src, display_id, 'mp4', entry_protocol='m3u8_native',
                        m3u8_id=kind, fatal=False))
                else:
                    height = int_or_none(self._search_regex(
                        r'(\d+)[pP]', format_id, 'height', default=None))
                    fps = int_or_none(self._search_regex(
                        r'(\d+)fps', format_id, 'fps', default=None))
                    formats.append({
                        'url': src,
                        'format_id': format_id,
                        'height': height,
                        'fps': fps,
                    })
        self._sort_formats(formats)
        duration = parse_duration(self._html_search_meta('duration', webpage))
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@ -44,6 +44,7 @@ from .appletrailers import (
    AppleTrailersSectionIE,
 )
 from .archiveorg import ArchiveOrgIE
 from .arkena import ArkenaIE
 from .ard import (
    ARDIE,
    ARDMediathekIE,
@ -158,8 +159,8 @@ from .coub import CoubIE
 from .collegerama import CollegeRamaIE
 from .comedycentral import (
    ComedyCentralIE,
    ComedyCentralShowsIE,
    ComedyCentralTVIE,
    ToshIE,
 )
 from .comcarcoff import ComCarCoffIE
 from .commonmistakes import CommonMistakesIE, UnicodeBOMIE
@ -397,6 +398,10 @@ from .kuwo import (
 )
 from .la7 import LA7IE
 from .laola1tv import Laola1TvIE
 from .lcp import (
    LcpPlayIE,
    LcpIE,
 )
 from .learnr import LearnrIE
 from .lecture2go import Lecture2GoIE
 from .lemonde import LemondeIE
@ -475,7 +480,6 @@ from .msn import MSNIE
 from .mtv import (
    MTVIE,
    MTVServicesEmbeddedIE,
    MTVIggyIE,
    MTVDEIE,
 )
 from .muenchentv import MuenchenTVIE
@ -525,7 +529,6 @@ from .nextmedia import (
    NextMediaActionNewsIE,
    AppleDailyIE,
 )
 from .nextmovie import NextMovieIE
 from .nfb import NFBIE
 from .nfl import NFLIE
 from .nhl import (
--- a/youtube_dl/extractor/facebook.py
+++ b/youtube_dl/extractor/facebook.py
@ -27,7 +27,7 @@ class FacebookIE(InfoExtractor):
    _VALID_URL = r'''(?x)
                (?:
                    https?://
-                        (?:\w+\.)?facebook\.com/
+                        (?:[\w-]+\.)?facebook\.com/
                        (?:[^#]*?\#!/)?
                        (?:
                            (?:
@ -127,6 +127,9 @@ class FacebookIE(InfoExtractor):
    }, {
        'url': 'https://www.facebook.com/groups/164828000315060/permalink/764967300301124/',
        'only_matching': True,
    }, {
        'url': 'https://zh-hk.facebook.com/peoplespower/videos/1135894589806027/',
        'only_matching': True,
    }]
    @staticmethod
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@ -62,6 +62,7 @@ from .videomore import VideomoreIE
 from .googledrive import GoogleDriveIE
 from .jwplatform import JWPlatformIE
 from .digiteka import DigitekaIE
 from .arkena import ArkenaIE
 from .instagram import InstagramIE
 from .liveleak import LiveLeakIE
 from .threeqsdn import ThreeQSDNIE
@ -70,6 +71,7 @@ from .vessel import VesselIE
 from .kaltura import KalturaIE
 from .eagleplatform import EaglePlatformIE
 from .facebook import FacebookIE
 from .soundcloud import SoundcloudIE
 class GenericIE(InfoExtractor):
@ -473,7 +475,7 @@ class GenericIE(InfoExtractor):
            'url': 'http://www.vestifinance.ru/articles/25753',
            'info_dict': {
                'id': '25753',
-                'title': 'Вести Экономика ― Прямые трансляции с Форума-выставки "Госзаказ-2013"',
+                'title': 'Прямые трансляции с Форума-выставки "Госзаказ-2013"',
            },
            'playlist': [{
                'info_dict': {
@ -640,6 +642,8 @@ class GenericIE(InfoExtractor):
                'ext': 'mp4',
                'title': 'Key and Peele|October 10, 2012|2|203|Liam Neesons - Uncensored',
                'description': 'Two valets share their love for movie star Liam Neesons.',
                'timestamp': 1349922600,
                'upload_date': '20121011',
            },
        },
        # YouTube embed via <data-embed-url="">
@ -781,6 +785,15 @@ class GenericIE(InfoExtractor):
                'upload_date': '20141029',
            }
        },
        # Soundcloud multiple embeds
        {
            'url': 'http://www.guitarplayer.com/lessons/1014/legato-workout-one-hour-to-more-fluid-performance---tab/52809',
            'info_dict': {
                'id': '52809',
                'title': 'Guitar Essentials: Legato Workout—One-Hour to Fluid Performance  | TAB + AUDIO',
            },
            'playlist_mincount': 7,
        },
        # Livestream embed
        {
            'url': 'http://www.esa.int/Our_Activities/Space_Science/Rosetta/Philae_comet_touch-down_webcast',
@ -856,6 +869,7 @@ class GenericIE(InfoExtractor):
                'description': 'md5:601cb790edd05908957dae8aaa866465',
                'upload_date': '20150220',
            },
            'skip': 'All The Daily Show URLs now redirect to http://www.cc.com/shows/',
        },
        # jwplayer YouTube
        {
@ -1342,6 +1356,23 @@ class GenericIE(InfoExtractor):
            },
            'add_ie': ['Vimeo'],
        },
        {
            'url': 'https://support.arkena.com/display/PLAY/Ways+to+embed+your+video',
            'md5': 'b96f2f71b359a8ecd05ce4e1daa72365',
            'info_dict': {
                'id': 'b41dda37-d8e7-4d3f-b1b5-9a9db578bdfe',
                'ext': 'mp4',
                'title': 'Big Buck Bunny',
                'description': 'Royalty free test video',
                'timestamp': 1432816365,
                'upload_date': '20150528',
                'is_live': False,
            },
            'params': {
                'skip_download': True,
            },
            'add_ie': [ArkenaIE.ie_key()],
        },
        # {
        #     # TODO: find another test
        #     # http://schema.org/VideoObject
@ -1978,12 +2009,9 @@ class GenericIE(InfoExtractor):
            return self.url_result(myvi_url)
        # Look for embedded soundcloud player
-        mobj = re.search(
+        soundcloud_urls = SoundcloudIE._extract_urls(webpage)
-            r'<iframe\s+(?:[a-zA-Z0-9_-]+="[^"]+"\s+)*src="(?P<url>https?://(?:w\.)?soundcloud\.com/player[^"]+)"',
+        if soundcloud_urls:
-            webpage)
+            return _playlist_from_matches(soundcloud_urls, getter=unescapeHTML, ie=SoundcloudIE.ie_key())
        if mobj is not None:
            url = unescapeHTML(mobj.group('url'))
            return self.url_result(url)
        # Look for embedded mtvservices player
        mtvservices_url = MTVServicesEmbeddedIE._extract_url(webpage)
@ -2146,6 +2174,11 @@ class GenericIE(InfoExtractor):
        if digiteka_url:
            return self.url_result(self._proto_relative_url(digiteka_url), DigitekaIE.ie_key())
        # Look for Arkena embeds
        arkena_url = ArkenaIE._extract_url(webpage)
        if arkena_url:
            return self.url_result(arkena_url, ArkenaIE.ie_key())
        # Look for Limelight embeds
        mobj = re.search(r'LimelightPlayer\.doLoad(Media|Channel|ChannelList)\(["\'](?P<id>[a-z0-9]{32})', webpage)
        if mobj:
--- a/youtube_dl/extractor/instagram.py
+++ b/youtube_dl/extractor/instagram.py
@ -36,7 +36,6 @@ class InstagramIE(InfoExtractor):
        'info_dict': {
            'id': 'BA-pQFBG8HZ',
            'ext': 'mp4',
            'uploader_id': 'britneyspears',
            'title': 'Video by britneyspears',
            'thumbnail': 're:^https?://.*\.jpg',
            'timestamp': 1453760977,
--- a/youtube_dl/extractor/lcp.py
+++ b/youtube_dl/extractor/lcp.py
@ -0,0 +1,90 @@
 # coding: utf-8
 from __future__ import unicode_literals
 from .common import InfoExtractor
 from .arkena import ArkenaIE
 class LcpPlayIE(ArkenaIE):
    _VALID_URL = r'https?://play\.lcp\.fr/embed/(?P<id>[^/]+)/(?P<account_id>[^/]+)/[^/]+/[^/]+'
    _TESTS = [{
        'url': 'http://play.lcp.fr/embed/327336/131064/darkmatter/0',
        'md5': 'b8bd9298542929c06c1c15788b1f277a',
        'info_dict': {
            'id': '327336',
            'ext': 'mp4',
            'title': '327336',
            'timestamp': 1456391602,
            'upload_date': '20160225',
        },
        'params': {
            'skip_download': True,
        },
    }]
 class LcpIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?lcp\.fr/(?:[^/]+/)*(?P<id>[^/]+)'
    _TESTS = [{
        # arkena embed
        'url': 'http://www.lcp.fr/la-politique-en-video/schwartzenberg-prg-preconise-francois-hollande-de-participer-une-primaire',
        'md5': 'b8bd9298542929c06c1c15788b1f277a',
        'info_dict': {
            'id': 'd56d03e9',
            'ext': 'mp4',
            'title': 'Schwartzenberg (PRG) préconise à François Hollande de participer à une primaire à gauche',
            'description': 'md5:96ad55009548da9dea19f4120c6c16a8',
            'timestamp': 1456488895,
            'upload_date': '20160226',
        },
        'params': {
            'skip_download': True,
        },
    }, {
        # dailymotion live stream
        'url': 'http://www.lcp.fr/le-direct',
        'info_dict': {
            'id': 'xji3qy',
            'ext': 'mp4',
            'title': 'La Chaine Parlementaire (LCP), Live TNT',
            'description': 'md5:5c69593f2de0f38bd9a949f2c95e870b',
            'uploader': 'LCP',
            'uploader_id': 'xbz33d',
            'timestamp': 1308923058,
            'upload_date': '20110624',
        },
        'params': {
            # m3u8 live stream
            'skip_download': True,
        },
    }, {
        'url': 'http://www.lcp.fr/emissions/277792-les-volontaires',
        'only_matching': True,
    }]
    def _real_extract(self, url):
        display_id = self._match_id(url)
        webpage = self._download_webpage(url, display_id)
        play_url = self._search_regex(
            r'<iframe[^>]+src=(["\'])(?P<url>%s?(?:(?!\1).)*)\1' % LcpPlayIE._VALID_URL,
            webpage, 'play iframe', default=None, group='url')
        if not play_url:
            return self.url_result(url, 'Generic')
        title = self._og_search_title(webpage, default=None) or self._html_search_meta(
            'twitter:title', webpage, fatal=True)
        description = self._html_search_meta(
            ('description', 'twitter:description'), webpage)
        return {
            '_type': 'url_transparent',
            'ie_key': LcpPlayIE.ie_key(),
            'url': play_url,
            'display_id': display_id,
            'title': title,
            'description': description,
        }
--- a/youtube_dl/extractor/mgtv.py
+++ b/youtube_dl/extractor/mgtv.py
@ -9,7 +9,7 @@ class MGTVIE(InfoExtractor):
    _VALID_URL = r'https?://www\.mgtv\.com/v/(?:[^/]+/)*(?P<id>\d+)\.html'
    IE_DESC = '芒果TV'
-    _TEST = {
+    _TESTS = [{
        'url': 'http://www.mgtv.com/v/1/290525/f/3116640.html',
        'md5': '1bdadcf760a0b90946ca68ee9a2db41a',
        'info_dict': {
@ -20,7 +20,11 @@ class MGTVIE(InfoExtractor):
            'duration': 7461,
            'thumbnail': 're:^https?://.*\.jpg$',
        },
-    }
+    }, {
        # no tbr extracted from stream_url
        'url': 'http://www.mgtv.com/v/1/1/f/3324755.html',
        'only_matching': True,
    }]
    def _real_extract(self, url):
        video_id = self._match_id(url)
@ -41,7 +45,8 @@ class MGTVIE(InfoExtractor):
            def extract_format(stream_url, format_id, idx, query={}):
                format_info = self._download_json(
                    stream_url, video_id,
-                    note='Download video info for format %s' % format_id or '#%d' % idx, query=query)
+                    note='Download video info for format %s' % (format_id or '#%d' % idx),
                    query=query)
                return {
                    'format_id': format_id,
                    'url': format_info['info'],
--- a/youtube_dl/extractor/mtv.py
+++ b/youtube_dl/extractor/mtv.py
@ -16,6 +16,7 @@ from ..utils import (
    HEADRequest,
    sanitized_Request,
    strip_or_none,
    timeconvert,
    unescapeHTML,
    url_basename,
    RegexNotFoundError,
@ -36,13 +37,13 @@ class MTVServicesInfoExtractor(InfoExtractor):
        return uri.split(':')[-1]
    # This was originally implemented for ComedyCentral, but it also works here
-    @staticmethod
+    @classmethod
-    def _transform_rtmp_url(rtmp_video_url):
+    def _transform_rtmp_url(cls, rtmp_video_url):
        m = re.match(r'^rtmpe?://.*?/(?P<finalid>gsp\..+?/.*)$', rtmp_video_url)
        if not m:
-            return rtmp_video_url
+            return {'rtmp': rtmp_video_url}
        base = 'http://viacommtvstrmfs.fplive.net/'
-        return base + m.group('finalid')
+        return {'http': base + m.group('finalid')}
    def _get_feed_url(self, uri):
        return self._FEED_URL
@ -86,14 +87,14 @@ class MTVServicesInfoExtractor(InfoExtractor):
                rtmp_video_url = rendition.find('./src').text
                if rtmp_video_url.endswith('siteunavail.png'):
                    continue
-                new_url = self._transform_rtmp_url(rtmp_video_url)
+                new_urls = self._transform_rtmp_url(rtmp_video_url)
-                formats.append({
+                formats.extend([{
                    'ext': 'flv' if new_url.startswith('rtmp') else ext,
                    'url': new_url,
-                    'format_id': rendition.get('bitrate'),
+                    'format_id': '-'.join(filter(None, [kind, rendition.get('bitrate')])),
                    'width': int(rendition.get('width')),
                    'height': int(rendition.get('height')),
-                })
+                } for kind, new_url in new_urls.items()])
            except (KeyError, TypeError):
                raise ExtractorError('Invalid rendition field.')
        self._sort_formats(formats)
@ -136,6 +137,8 @@ class MTVServicesInfoExtractor(InfoExtractor):
        description = strip_or_none(xpath_text(itemdoc, 'description'))
        timestamp = timeconvert(xpath_text(itemdoc, 'pubDate'))
        title_el = None
        if title_el is None:
            title_el = find_xpath_attr(
@ -168,6 +171,7 @@ class MTVServicesInfoExtractor(InfoExtractor):
            'thumbnail': self._get_thumbnail_url(uri, itemdoc),
            'description': description,
            'duration': float_or_none(content_el.attrib.get('duration')),
            'timestamp': timestamp,
        }
    def _get_feed_query(self, uri):
@ -186,8 +190,13 @@ class MTVServicesInfoExtractor(InfoExtractor):
        idoc = self._download_xml(
            url, video_id,
            'Downloading info', transform_source=fix_xml_ampersands)
        title = xpath_text(idoc, './channel/title')
        description = xpath_text(idoc, './channel/description')
        return self.playlist_result(
-            [self._get_video_info(item) for item in idoc.findall('.//item')])
+            [self._get_video_info(item) for item in idoc.findall('.//item')],
            playlist_title=title, playlist_description=description)
    def _extract_mgid(self, webpage):
        try:
@ -233,6 +242,8 @@ class MTVServicesEmbeddedIE(MTVServicesInfoExtractor):
            'ext': 'mp4',
            'title': 'Peter Dinklage Sums Up \'Game Of Thrones\' In 45 Seconds',
            'description': '"Sexy sexy sexy, stabby stabby stabby, beautiful language," says Peter Dinklage as he tries summarizing "Game of Thrones" in under a minute.',
            'timestamp': 1400126400,
            'upload_date': '20140515',
        },
    }
@ -275,6 +286,8 @@ class MTVIE(MTVServicesInfoExtractor):
                'ext': 'mp4',
                'title': 'Taylor Swift - "Ours (VH1 Storytellers)"',
                'description': 'Album: Taylor Swift performs "Ours" for VH1 Storytellers at Harvey Mudd College.',
                'timestamp': 1352610000,
                'upload_date': '20121111',
            },
        },
    ]
@ -301,20 +314,6 @@ class MTVIE(MTVServicesInfoExtractor):
        return self._get_videos_info(uri)
 class MTVIggyIE(MTVServicesInfoExtractor):
    IE_NAME = 'mtviggy.com'
    _VALID_URL = r'https?://www\.mtviggy\.com/videos/.+'
    _TEST = {
        'url': 'http://www.mtviggy.com/videos/arcade-fire-behind-the-scenes-at-the-biggest-music-experiment-yet/',
        'info_dict': {
            'id': '984696',
            'ext': 'mp4',
            'title': 'Arcade Fire: Behind the Scenes at the Biggest Music Experiment Yet',
        }
    }
    _FEED_URL = 'http://all.mtvworldverticals.com/feed-xml/'
 class MTVDEIE(MTVServicesInfoExtractor):
    IE_NAME = 'mtv.de'
    _VALID_URL = r'https?://(?:www\.)?mtv\.de/(?:artists|shows|news)/(?:[^/]+/)*(?P<id>\d+)-[^/#?]+/*(?:[#?].*)?$'
@ -322,7 +321,7 @@ class MTVDEIE(MTVServicesInfoExtractor):
        'url': 'http://www.mtv.de/artists/10571-cro/videos/61131-traum',
        'info_dict': {
            'id': 'music_video-a50bc5f0b3aa4b3190aa',
-            'ext': 'mp4',
+            'ext': 'flv',
            'title': 'MusicVideo_cro-traum',
            'description': 'Cro - Traum',
        },
@ -330,20 +329,21 @@ class MTVDEIE(MTVServicesInfoExtractor):
            # rtmp download
            'skip_download': True,
        },
        'skip': 'Blocked at Travis CI',
    }, {
        # mediagen URL without query (e.g. http://videos.mtvnn.com/mediagen/e865da714c166d18d6f80893195fcb97)
        'url': 'http://www.mtv.de/shows/933-teen-mom-2/staffeln/5353/folgen/63565-enthullungen',
        'info_dict': {
            'id': 'local_playlist-f5ae778b9832cc837189',
-            'ext': 'mp4',
+            'ext': 'flv',
            'title': 'Episode_teen-mom-2_shows_season-5_episode-1_full-episode_part1',
        },
        'params': {
            # rtmp download
            'skip_download': True,
        },
        'skip': 'Blocked at Travis CI',
    }, {
        # single video in pagePlaylist with different id
        'url': 'http://www.mtv.de/news/77491-mtv-movies-spotlight-pixels-teil-3',
        'info_dict': {
            'id': 'local_playlist-4e760566473c4c8c5344',
@ -355,6 +355,7 @@ class MTVDEIE(MTVServicesInfoExtractor):
            # rtmp download
            'skip_download': True,
        },
        'skip': 'Das Video kann zur Zeit nicht abgespielt werden.',
    }]
    def _real_extract(self, url):
@ -367,11 +368,14 @@ class MTVDEIE(MTVServicesInfoExtractor):
                r'window\.pagePlaylist\s*=\s*(\[.+?\]);\n', webpage, 'page playlist'),
            video_id)
        def _mrss_url(item):
            return item['mrss'] + item.get('mrssvars', '')
        # news pages contain single video in playlist with different id
        if len(playlist) == 1:
-            return self._get_videos_info_from_url(playlist[0]['mrss'], video_id)
+            return self._get_videos_info_from_url(_mrss_url(playlist[0]), video_id)
        for item in playlist:
            item_id = item.get('id')
            if item_id and compat_str(item_id) == video_id:
-                return self._get_videos_info_from_url(item['mrss'], video_id)
+                return self._get_videos_info_from_url(_mrss_url(item), video_id)
--- a/youtube_dl/extractor/nextmovie.py
+++ b/youtube_dl/extractor/nextmovie.py
@ -1,30 +0,0 @@
 # coding: utf-8
 from __future__ import unicode_literals
 from .mtv import MTVServicesInfoExtractor
 from ..compat import compat_urllib_parse_urlencode
 class NextMovieIE(MTVServicesInfoExtractor):
    IE_NAME = 'nextmovie.com'
    _VALID_URL = r'https?://(?:www\.)?nextmovie\.com/shows/[^/]+/\d{4}-\d{2}-\d{2}/(?P<id>[^/?#]+)'
    _FEED_URL = 'http://lite.dextr.mtvi.com/service1/dispatch.htm'
    _TESTS = [{
        'url': 'http://www.nextmovie.com/shows/exclusives/2013-03-10/mgid:uma:videolist:nextmovie.com:1715019/',
        'md5': '09a9199f2f11f10107d04fcb153218aa',
        'info_dict': {
            'id': '961726',
            'ext': 'mp4',
            'title': 'The Muppets\' Gravity',
        },
    }]
    def _get_feed_query(self, uri):
        return compat_urllib_parse_urlencode({
            'feed': '1505',
            'mgid': uri,
        })
    def _real_extract(self, url):
        mgid = self._match_id(url)
        return self._get_videos_info(mgid)
--- a/youtube_dl/extractor/nick.py
+++ b/youtube_dl/extractor/nick.py
@ -7,6 +7,7 @@ from ..utils import update_url_query
 class NickIE(MTVServicesInfoExtractor):
    # None of videos on the website are still alive?
    IE_NAME = 'nick.com'
    _VALID_URL = r'https?://(?:www\.)?nick(?:jr)?\.com/(?:videos/clip|[^/]+/videos)/(?P<id>[^/?#.]+)'
    _FEED_URL = 'http://udat.mtvnservices.com/service1/dispatch.htm'
--- a/youtube_dl/extractor/onet.py
+++ b/youtube_dl/extractor/onet.py
@ -59,11 +59,8 @@ class OnetBaseIE(InfoExtractor):
                        # TODO: Support Microsoft Smooth Streaming
                        continue
                    elif ext == 'mpd':
-                        # TODO: Current DASH formats are broken - $Time$ pattern in
+                        formats.extend(self._extract_mpd_formats(
-                        # <SegmentTemplate> not implemented yet
+                            video_url, video_id, mpd_id='dash', fatal=False))
                        # formats.extend(self._extract_mpd_formats(
                        #    video_url, video_id, mpd_id='dash', fatal=False))
                        continue
                    else:
                        formats.append({
                            'url': video_url,
--- a/youtube_dl/extractor/orf.py
+++ b/youtube_dl/extractor/orf.py
@ -137,13 +137,16 @@ class ORFTVthekIE(InfoExtractor):
 class ORFOE1IE(InfoExtractor):
    IE_NAME = 'orf:oe1'
    IE_DESC = 'Radio Österreich 1'
-    _VALID_URL = r'https?://oe1\.orf\.at/(?:programm/|konsole.*?#\?track_id=)(?P<id>[0-9]+)'
+    _VALID_URL = r'https?://oe1\.orf\.at/(?:programm/|konsole\?.*?\btrack_id=)(?P<id>[0-9]+)'
    # Audios on ORF radio are only available for 7 days, so we can't add tests.
-    _TEST = {
+    _TESTS = [{
        'url': 'http://oe1.orf.at/konsole?show=on_demand#?track_id=394211',
        'only_matching': True,
-    }
+    }, {
        'url': 'http://oe1.orf.at/konsole?show=ondemand&track_id=443608&load_day=/programm/konsole/tag/20160726',
        'only_matching': True,
    }]
    def _real_extract(self, url):
        show_id = self._match_id(url)
--- a/youtube_dl/extractor/pornhub.py
+++ b/youtube_dl/extractor/pornhub.py
@ -111,7 +111,7 @@ class PornHubIE(InfoExtractor):
        webpage = self._download_webpage(req, video_id)
        error_msg = self._html_search_regex(
-            r'(?s)<div[^>]+class=(["\']).*?\b(?:removed|userMessageSection)\b.*?\1[^>]*>(?P<error>.+?)</div>',
+            r'(?s)<div[^>]+class=(["\'])(?:(?!\1).)*\b(?:removed|userMessageSection)\b(?:(?!\1).)*\1[^>]*>(?P<error>.+?)</div>',
            webpage, 'error message', default=None, group='error')
        if error_msg:
            error_msg = re.sub(r'\s+', ' ', error_msg)
--- a/youtube_dl/extractor/shared.py
+++ b/youtube_dl/extractor/shared.py
@ -6,7 +6,6 @@ from .common import InfoExtractor
 from ..utils import (
    ExtractorError,
    int_or_none,
    sanitized_Request,
    urlencode_postdata,
 )
@ -37,28 +36,33 @@ class SharedIE(InfoExtractor):
    def _real_extract(self, url):
        video_id = self._match_id(url)
-        webpage = self._download_webpage(url, video_id)
+
        webpage, urlh = self._download_webpage_handle(url, video_id)
        if '>File does not exist<' in webpage:
            raise ExtractorError(
                'Video %s does not exist' % video_id, expected=True)
        download_form = self._hidden_inputs(webpage)
        request = sanitized_Request(
            url, urlencode_postdata(download_form))
        request.add_header('Content-Type', 'application/x-www-form-urlencoded')
        video_page = self._download_webpage(
-            request, video_id, 'Downloading video page')
+            urlh.geturl(), video_id, 'Downloading video page',
            data=urlencode_postdata(download_form),
            headers={
                'Content-Type': 'application/x-www-form-urlencoded',
                'Referer': urlh.geturl(),
            })
        video_url = self._html_search_regex(
-            r'data-url="([^"]+)"', video_page, 'video URL')
+            r'data-url=(["\'])(?P<url>(?:(?!\1).)+)\1',
            video_page, 'video URL', group='url')
        title = base64.b64decode(self._html_search_meta(
            'full:title', webpage, 'title').encode('utf-8')).decode('utf-8')
        filesize = int_or_none(self._html_search_meta(
            'full:size', webpage, 'file size', fatal=False))
        thumbnail = self._html_search_regex(
-            r'data-poster="([^"]+)"', video_page, 'thumbnail', default=None)
+            r'data-poster=(["\'])(?P<url>(?:(?!\1).)+)\1',
            video_page, 'thumbnail', default=None, group='url')
        return {
            'id': video_id,
--- a/youtube_dl/extractor/smotri.py
+++ b/youtube_dl/extractor/smotri.py
@ -13,20 +13,21 @@ from ..utils import (
    sanitized_Request,
    unified_strdate,
    urlencode_postdata,
    xpath_text,
 )
 class SmotriIE(InfoExtractor):
    IE_DESC = 'Smotri.com'
    IE_NAME = 'smotri'
-    _VALID_URL = r'^https?://(?:www\.)?(?:smotri\.com/video/view/\?id=|pics\.smotri\.com/(?:player|scrubber_custom8)\.swf\?file=)(?P<id>v(?P<realvideoid>[0-9]+)[a-z0-9]{4})'
+    _VALID_URL = r'https?://(?:www\.)?(?:smotri\.com/video/view/\?id=|pics\.smotri\.com/(?:player|scrubber_custom8)\.swf\?file=)(?P<id>v(?P<realvideoid>[0-9]+)[a-z0-9]{4})'
    _NETRC_MACHINE = 'smotri'
    _TESTS = [
        # real video id 2610366
        {
            'url': 'http://smotri.com/video/view/?id=v261036632ab',
-            'md5': '2a7b08249e6f5636557579c368040eb9',
+            'md5': '02c0dfab2102984e9c5bb585cc7cc321',
            'info_dict': {
                'id': 'v261036632ab',
                'ext': 'mp4',
@ -174,11 +175,11 @@ class SmotriIE(InfoExtractor):
        if video_password:
            video_form['pass'] = hashlib.md5(video_password.encode('utf-8')).hexdigest()
-        request = sanitized_Request(
+        video = self._download_json(
-            'http://smotri.com/video/view/url/bot/', urlencode_postdata(video_form))
+            'http://smotri.com/video/view/url/bot/',
-        request.add_header('Content-Type', 'application/x-www-form-urlencoded')
+            video_id, 'Downloading video JSON',
-
+            data=urlencode_postdata(video_form),
-        video = self._download_json(request, video_id, 'Downloading video JSON')
+            headers={'Content-Type': 'application/x-www-form-urlencoded'})
        video_url = video.get('_vidURL') or video.get('_vidURL_mp4')
@ -196,11 +197,11 @@ class SmotriIE(InfoExtractor):
                raise ExtractorError(msg, expected=True)
        title = video['title']
-        thumbnail = video['_imgURL']
+        thumbnail = video.get('_imgURL')
-        upload_date = unified_strdate(video['added'])
+        upload_date = unified_strdate(video.get('added'))
-        uploader = video['userNick']
+        uploader = video.get('userNick')
-        uploader_id = video['userLogin']
+        uploader_id = video.get('userLogin')
-        duration = int_or_none(video['duration'])
+        duration = int_or_none(video.get('duration'))
        # Video JSON does not provide enough meta data
        # We will extract some from the video web page instead
@ -209,7 +210,7 @@ class SmotriIE(InfoExtractor):
        # Warning if video is unavailable
        warning = self._html_search_regex(
-            r'<div class="videoUnModer">(.*?)</div>', webpage,
+            r'<div[^>]+class="videoUnModer"[^>]*>(.+?)</div>', webpage,
            'warning message', default=None)
        if warning is not None:
            self._downloader.report_warning(
@ -217,20 +218,22 @@ class SmotriIE(InfoExtractor):
                (video_id, warning))
        # Adult content
-        if re.search('EroConfirmText">', webpage) is not None:
+        if 'EroConfirmText">' in webpage:
            self.report_age_confirmation()
            confirm_string = self._html_search_regex(
-                r'<a href="/video/view/\?id=%s&confirm=([^"]+)" title="[^"]+">' % video_id,
+                r'<a[^>]+href="/video/view/\?id=%s&confirm=([^"]+)"' % video_id,
                webpage, 'confirm string')
            confirm_url = webpage_url + '&confirm=%s' % confirm_string
-            webpage = self._download_webpage(confirm_url, video_id, 'Downloading video page (age confirmed)')
+            webpage = self._download_webpage(
                confirm_url, video_id,
                'Downloading video page (age confirmed)')
            adult_content = True
        else:
            adult_content = False
        view_count = self._html_search_regex(
-            'Общее количество просмотров.*?<span class="Number">(\\d+)</span>',
+            r'(?s)Общее количество просмотров.*?<span class="Number">(\d+)</span>',
-            webpage, 'view count', fatal=False, flags=re.MULTILINE | re.DOTALL)
+            webpage, 'view count', fatal=False)
        return {
            'id': video_id,
@ -249,37 +252,33 @@ class SmotriIE(InfoExtractor):
 class SmotriCommunityIE(InfoExtractor):
    IE_DESC = 'Smotri.com community videos'
    IE_NAME = 'smotri:community'
-    _VALID_URL = r'^https?://(?:www\.)?smotri\.com/community/video/(?P<communityid>[0-9A-Za-z_\'-]+)'
+    _VALID_URL = r'https?://(?:www\.)?smotri\.com/community/video/(?P<id>[0-9A-Za-z_\'-]+)'
    _TEST = {
        'url': 'http://smotri.com/community/video/kommuna',
        'info_dict': {
            'id': 'kommuna',
            'title': 'КПРФ',
        },
        'playlist_mincount': 4,
    }
    def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
+        community_id = self._match_id(url)
        community_id = mobj.group('communityid')
-        url = 'http://smotri.com/export/rss/video/by/community/-/%s/video.xml' % community_id
+        rss = self._download_xml(
-        rss = self._download_xml(url, community_id, 'Downloading community RSS')
+            'http://smotri.com/export/rss/video/by/community/-/%s/video.xml' % community_id,
            community_id, 'Downloading community RSS')
-        entries = [self.url_result(video_url.text, 'Smotri')
+        entries = [
-                   for video_url in rss.findall('./channel/item/link')]
+            self.url_result(video_url.text, SmotriIE.ie_key())
            for video_url in rss.findall('./channel/item/link')]
-        description_text = rss.find('./channel/description').text
+        return self.playlist_result(entries, community_id)
        community_title = self._html_search_regex(
            '^Видео сообщества "([^"]+)"$', description_text, 'community title')
        return self.playlist_result(entries, community_id, community_title)
 class SmotriUserIE(InfoExtractor):
    IE_DESC = 'Smotri.com user videos'
    IE_NAME = 'smotri:user'
-    _VALID_URL = r'^https?://(?:www\.)?smotri\.com/user/(?P<userid>[0-9A-Za-z_\'-]+)'
+    _VALID_URL = r'https?://(?:www\.)?smotri\.com/user/(?P<id>[0-9A-Za-z_\'-]+)'
    _TESTS = [{
        'url': 'http://smotri.com/user/inspector',
        'info_dict': {
@ -290,19 +289,19 @@ class SmotriUserIE(InfoExtractor):
    }]
    def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
+        user_id = self._match_id(url)
        user_id = mobj.group('userid')
-        url = 'http://smotri.com/export/rss/user/video/-/%s/video.xml' % user_id
+        rss = self._download_xml(
-        rss = self._download_xml(url, user_id, 'Downloading user RSS')
+            'http://smotri.com/export/rss/user/video/-/%s/video.xml' % user_id,
            user_id, 'Downloading user RSS')
        entries = [self.url_result(video_url.text, 'Smotri')
                   for video_url in rss.findall('./channel/item/link')]
-        description_text = rss.find('./channel/description').text
+        description_text = xpath_text(rss, './channel/description') or ''
-        user_nickname = self._html_search_regex(
+        user_nickname = self._search_regex(
-            '^Видео режиссера (.*)$', description_text,
+            '^Видео режиссера (.+)$', description_text,
-            'user nickname')
+            'user nickname', fatal=False)
        return self.playlist_result(entries, user_id, user_nickname)
@ -310,11 +309,11 @@ class SmotriUserIE(InfoExtractor):
 class SmotriBroadcastIE(InfoExtractor):
    IE_DESC = 'Smotri.com broadcasts'
    IE_NAME = 'smotri:broadcast'
-    _VALID_URL = r'^https?://(?:www\.)?(?P<url>smotri\.com/live/(?P<broadcastid>[^/]+))/?.*'
+    _VALID_URL = r'https?://(?:www\.)?(?P<url>smotri\.com/live/(?P<id>[^/]+))/?.*'
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
-        broadcast_id = mobj.group('broadcastid')
+        broadcast_id = mobj.group('id')
        broadcast_url = 'http://' + mobj.group('url')
        broadcast_page = self._download_webpage(broadcast_url, broadcast_id, 'Downloading broadcast page')
@ -328,7 +327,8 @@ class SmotriBroadcastIE(InfoExtractor):
            (username, password) = self._get_login_info()
            if username is None:
-                self.raise_login_required('Erotic broadcasts allowed only for registered users')
+                self.raise_login_required(
                    'Erotic broadcasts allowed only for registered users')
            login_form = {
                'login-hint53': '1',
@ -343,8 +343,9 @@ class SmotriBroadcastIE(InfoExtractor):
            broadcast_page = self._download_webpage(
                request, broadcast_id, 'Logging in and confirming age')
-            if re.search('>Неверный логин или пароль<', broadcast_page) is not None:
+            if '>Неверный логин или пароль<' in broadcast_page:
-                raise ExtractorError('Unable to log in: bad username or password', expected=True)
+                raise ExtractorError(
                    'Unable to log in: bad username or password', expected=True)
            adult_content = True
        else:
@ -383,11 +384,11 @@ class SmotriBroadcastIE(InfoExtractor):
            broadcast_playpath = broadcast_json['_streamName']
            broadcast_app = '%s/%s' % (mobj.group('app'), broadcast_json['_vidURL'])
-            broadcast_thumbnail = broadcast_json['_imgURL']
+            broadcast_thumbnail = broadcast_json.get('_imgURL')
            broadcast_title = self._live_title(broadcast_json['title'])
-            broadcast_description = broadcast_json['description']
+            broadcast_description = broadcast_json.get('description')
-            broadcaster_nick = broadcast_json['nick']
+            broadcaster_nick = broadcast_json.get('nick')
-            broadcaster_login = broadcast_json['login']
+            broadcaster_login = broadcast_json.get('login')
            rtmp_conn = 'S:%s' % uuid.uuid4().hex
        except KeyError:
            if protected_broadcast:
--- a/youtube_dl/extractor/soundcloud.py
+++ b/youtube_dl/extractor/soundcloud.py
@ -119,6 +119,12 @@ class SoundcloudIE(InfoExtractor):
    _CLIENT_ID = '02gUJC0hH2ct1EGOcYXQIzRFU91c72Ea'
    _IPHONE_CLIENT_ID = '376f225bf427445fc4bfb6b99b72e0bf'
    @staticmethod
    def _extract_urls(webpage):
        return [m.group('url') for m in re.finditer(
            r'<iframe[^>]+src=(["\'])(?P<url>(?:https?://)?(?:w\.)?soundcloud\.com/player.+?)\1',
            webpage)]
    def report_resolve(self, video_id):
        """Report information extraction."""
        self.to_screen('%s: Resolving id' % video_id)
--- a/youtube_dl/extractor/southpark.py
+++ b/youtube_dl/extractor/southpark.py
@ -17,6 +17,8 @@ class SouthParkIE(MTVServicesInfoExtractor):
            'ext': 'mp4',
            'title': 'South Park|Bat Daded',
            'description': 'Randy disqualifies South Park by getting into a fight with Bat Dad.',
            'timestamp': 1112760000,
            'upload_date': '20050406',
        },
    }]
@ -28,6 +30,10 @@ class SouthParkEsIE(SouthParkIE):
    _TESTS = [{
        'url': 'http://southpark.cc.com/episodios-en-espanol/s01e01-cartman-consigue-una-sonda-anal#source=351c1323-0b96-402d-a8b9-40d01b2e9bde&position=1&sort=!airdate',
        'info_dict': {
            'title': 'Cartman Consigue Una Sonda Anal',
            'description': 'Cartman Consigue Una Sonda Anal',
        },
        'playlist_count': 4,
    }]
@ -42,17 +48,27 @@ class SouthParkDeIE(SouthParkIE):
        'info_dict': {
            'id': '85487c96-b3b9-4e39-9127-ad88583d9bf2',
            'ext': 'mp4',
-            'title': 'The Government Won\'t Respect My Privacy',
+            'title': 'South Park|The Government Won\'t Respect My Privacy',
            'description': 'Cartman explains the benefits of "Shitter" to Stan, Kyle and Craig.',
            'timestamp': 1380160800,
            'upload_date': '20130926',
        },
    }, {
        # non-ASCII characters in initial URL
        'url': 'http://www.southpark.de/alle-episoden/s18e09-hashtag-aufwärmen',
-        'playlist_count': 4,
+        'info_dict': {
            'title': 'Hashtag „Aufwärmen“',
            'description': 'Kyle will mit seinem kleinen Bruder Ike Videospiele spielen. Als der nicht mehr mit ihm spielen will, hat Kyle Angst, dass er die Kids von heute nicht mehr versteht.',
        },
        'playlist_count': 3,
    }, {
        # non-ASCII characters in redirect URL
        'url': 'http://www.southpark.de/alle-episoden/s18e09',
-        'playlist_count': 4,
+        'info_dict': {
            'title': 'Hashtag „Aufwärmen“',
            'description': 'Kyle will mit seinem kleinen Bruder Ike Videospiele spielen. Als der nicht mehr mit ihm spielen will, hat Kyle Angst, dass er die Kids von heute nicht mehr versteht.',
        },
        'playlist_count': 3,
    }]
@ -63,7 +79,11 @@ class SouthParkNlIE(SouthParkIE):
    _TESTS = [{
        'url': 'http://www.southpark.nl/full-episodes/s18e06-freemium-isnt-free',
-        'playlist_count': 4,
+        'info_dict': {
            'title': 'Freemium Isn\'t Free',
            'description': 'Stan is addicted to the new Terrance and Phillip mobile game.',
        },
        'playlist_mincount': 3,
    }]
@ -74,5 +94,9 @@ class SouthParkDkIE(SouthParkIE):
    _TESTS = [{
        'url': 'http://www.southparkstudios.dk/full-episodes/s18e07-grounded-vindaloop',
-        'playlist_count': 4,
+        'info_dict': {
            'title': 'Grounded Vindaloop',
            'description': 'Butters is convinced he\'s living in a virtual reality.',
        },
        'playlist_mincount': 3,
    }]
--- a/youtube_dl/extractor/spike.py
+++ b/youtube_dl/extractor/spike.py
@ -11,8 +11,10 @@ class SpikeIE(MTVServicesInfoExtractor):
        'info_dict': {
            'id': 'b9c8221a-4e50-479a-b86d-3333323e38ba',
            'ext': 'mp4',
-            'title': 'Auction Hunters|Can Allen Ride A Hundred Year-Old Motorcycle?',
+            'title': 'Auction Hunters|December 27, 2013|4|414|Can Allen Ride A Hundred Year-Old Motorcycle?',
            'description': 'md5:fbed7e82ed5fad493615b3094a9499cb',
            'timestamp': 1388120400,
            'upload_date': '20131227',
        },
    }, {
        'url': 'http://www.spike.com/video-clips/lhtu8m/',
--- a/youtube_dl/extractor/telegraaf.py
+++ b/youtube_dl/extractor/telegraaf.py
@ -47,11 +47,10 @@ class TelegraafIE(InfoExtractor):
            ext = determine_ext(manifest_url)
            if ext == 'm3u8':
                formats.extend(self._extract_m3u8_formats(
-                    manifest_url, video_id, ext='mp4', m3u8_id='hls'))
+                    manifest_url, video_id, ext='mp4', m3u8_id='hls', fatal=False))
            elif ext == 'mpd':
-                # TODO: Current DASH formats are broken - $Time$ pattern in
+                formats.extend(self._extract_mpd_formats(
-                # <SegmentTemplate> not implemented yet
+                    manifest_url, video_id, mpd_id='dash', fatal=False))
                continue
            else:
                self.report_warning('Unknown adaptive format %s' % ext)
        for location in locations.get('progressive', []):
--- a/youtube_dl/extractor/tvland.py
+++ b/youtube_dl/extractor/tvland.py
@ -9,56 +9,23 @@ class TVLandIE(MTVServicesInfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?tvland\.com/(?:video-clips|episodes)/(?P<id>[^/?#.]+)'
    _FEED_URL = 'http://www.tvland.com/feeds/mrss/'
    _TESTS = [{
        # Geo-restricted. Without a proxy metadata are still there. With a
        # proxy it redirects to http://m.tvland.com/app/
        'url': 'http://www.tvland.com/episodes/hqhps2/everybody-loves-raymond-the-invasion-ep-048',
-        'playlist': [
+        'info_dict': {
-            {
+            'description': 'md5:80973e81b916a324e05c14a3fb506d29',
-                'md5': '227e9723b9669c05bf51098b10287aa7',
+            'title': 'The Invasion',
-                'info_dict': {
+        },
-                    'id': 'bcbd3a83-3aca-4dca-809b-f78a87dcccdd',
+        'playlist': [],
                    'ext': 'mp4',
                    'title': 'Everybody Loves Raymond|Everybody Loves Raymond 048 HD, Part 1 of 5',
                }
            },
            {
                'md5': '9fa2b764ec0e8194fb3ebb01a83df88b',
                'info_dict': {
                    'id': 'f4279548-6e13-40dd-92e8-860d27289197',
                    'ext': 'mp4',
                    'title': 'Everybody Loves Raymond|Everybody Loves Raymond 048 HD, Part 2 of 5',
                }
            },
            {
                'md5': 'fde4c3bccd7cc7e3576b338734153cec',
                'info_dict': {
                    'id': '664e4a38-53ef-4115-9bc9-d0f789ec6334',
                    'ext': 'mp4',
                    'title': 'Everybody Loves Raymond|Everybody Loves Raymond 048 HD, Part 3 of 5',
                }
            },
            {
                'md5': '247f6780cda6891f2e49b8ae2b10e017',
                'info_dict': {
                    'id': '9146ecf5-b15a-4d78-879c-6679b77f4960',
                    'ext': 'mp4',
                    'title': 'Everybody Loves Raymond|Everybody Loves Raymond 048 HD, Part 4 of 5',
                }
            },
            {
                'md5': 'fd269f33256e47bad5eb6c40de089ff6',
                'info_dict': {
                    'id': '04334a2e-9a47-4214-a8c2-ae5792e2fab7',
                    'ext': 'mp4',
                    'title': 'Everybody Loves Raymond|Everybody Loves Raymond 048 HD, Part 5 of 5',
                }
            }
        ],
    }, {
        'url': 'http://www.tvland.com/video-clips/zea2ev/younger-younger--hilary-duff---little-lies',
        'md5': 'e2c6389401cf485df26c79c247b08713',
        'info_dict': {
            'id': 'b8697515-4bbe-4e01-83d5-fa705ce5fa88',
            'ext': 'mp4',
-            'title': 'Younger|Younger: Hilary Duff - Little Lies',
+            'title': 'Younger|December 28, 2015|2|NO-EPISODE#|Younger: Hilary Duff - Little Lies',
-            'description': 'md5:7d192f56ca8d958645c83f0de8ef0269'
+            'description': 'md5:7d192f56ca8d958645c83f0de8ef0269',
            'upload_date': '20151228',
            'timestamp': 1451289600,
        },
    }]
--- a/youtube_dl/extractor/tvp.py
+++ b/youtube_dl/extractor/tvp.py
@ -89,8 +89,8 @@ class TVPIE(InfoExtractor):
            r'(https?://.+?/video)(?:\.(?:ism|f4m|m3u8)|-\d+\.mp4)',
            video_url, 'video base url', default=None)
        if video_url_base:
-            # TODO: Current DASH formats are broken - $Time$ pattern in
+            # TODO: <Group> found instead of <AdaptationSet> in MPD manifest.
-            # <SegmentTemplate> not implemented yet
+            # It's not mentioned in MPEG-DASH standard. Figure that out.
            # formats.extend(self._extract_mpd_formats(
            #     video_url_base + '.ism/video.mpd',
            #     video_id, mpd_id='dash', fatal=False))
--- a/youtube_dl/extractor/twitch.py
+++ b/youtube_dl/extractor/twitch.py
@ -461,7 +461,7 @@ class TwitchClipsIE(InfoExtractor):
    IE_NAME = 'twitch:clips'
    _VALID_URL = r'https?://clips\.twitch\.tv/(?:[^/]+/)*(?P<id>[^/?#&]+)'
-    _TEST = {
+    _TESTS = [{
        'url': 'https://clips.twitch.tv/ea/AggressiveCobraPoooound',
        'md5': '761769e1eafce0ffebfb4089cb3847cd',
        'info_dict': {
@ -473,7 +473,11 @@ class TwitchClipsIE(InfoExtractor):
            'uploader': 'stereotype_',
            'uploader_id': 'stereotype_',
        },
-    }
+    }, {
        # multiple formats
        'url': 'https://clips.twitch.tv/rflegendary/UninterestedBeeDAESuppy',
        'only_matching': True,
    }]
    def _real_extract(self, url):
        video_id = self._match_id(url)
@ -485,15 +489,27 @@ class TwitchClipsIE(InfoExtractor):
                r'(?s)clipInfo\s*=\s*({.+?});', webpage, 'clip info'),
            video_id, transform_source=js_to_json)
-        video_url = clip['clip_video_url']
+        title = clip.get('channel_title') or self._og_search_title(webpage)
-        title = clip['channel_title']
+
        formats = [{
            'url': option['source'],
            'format_id': option.get('quality'),
            'height': int_or_none(option.get('quality')),
        } for option in clip.get('quality_options', []) if option.get('source')]
        if not formats:
            formats = [{
                'url': clip['clip_video_url'],
            }]
        self._sort_formats(formats)
        return {
            'id': video_id,
            'url': video_url,
            'title': title,
            'thumbnail': self._og_search_thumbnail(webpage),
            'creator': clip.get('broadcaster_display_name') or clip.get('broadcaster_login'),
            'uploader': clip.get('curator_login'),
            'uploader_id': clip.get('curator_display_name'),
            'formats': formats,
        }
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@ -53,6 +53,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
    """Provide base functions for Youtube extractors"""
    _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
    _TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
    _PASSWORD_CHALLENGE_URL = 'https://accounts.google.com/signin/challenge/sl/password'
    _NETRC_MACHINE = 'youtube'
    # If True it will raise an error if no login info is provided
    _LOGIN_REQUIRED = False
@ -116,12 +117,10 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
            'hl': 'en_US',
        }
        login_data = urlencode_postdata(login_form_strs)
        req = sanitized_Request(self._LOGIN_URL, login_data)
        login_results = self._download_webpage(
-            req, None,
+            self._PASSWORD_CHALLENGE_URL, None,
-            note='Logging in', errnote='unable to log in', fatal=False)
+            note='Logging in', errnote='unable to log in', fatal=False,
            data=urlencode_postdata(login_form_strs))
        if login_results is False:
            return False
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@ -2123,6 +2123,7 @@ def mimetype2ext(mt):
        'dash+xml': 'mpd',
        'f4m': 'f4m',
        'f4m+xml': 'f4m',
        'hds+xml': 'f4m',
        'vnd.ms-sstr+xml': 'ism',
    }.get(res, res)
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@ -1,3 +1,3 @@
 from __future__ import unicode_literals
-__version__ = '2016.07.17'
+__version__ = '2016.07.28'
`@ -1,3 +1,3 @@`
	`from __future__ import unicode_literals`	`from __future__ import unicode_literals`

	`__version__ = '2016.07.17'`	`__version__ = '2016.07.28'`