From 6ff4469528d642bd678df9b1fa83545a0942e333 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com>
Date: Fri, 29 Apr 2016 19:39:27 +0600
Subject: [PATCH 01/50] [crunchyroll] Relax fmt regex

---
 youtube_dl/extractor/crunchyroll.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/youtube_dl/extractor/crunchyroll.py b/youtube_dl/extractor/crunchyroll.py
index 184ba6896..4a7664296 100644
--- a/youtube_dl/extractor/crunchyroll.py
+++ b/youtube_dl/extractor/crunchyroll.py
@@ -307,7 +307,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
             'video_uploader', fatal=False)
 
         available_fmts = []
-        for a, fmt in re.findall(r'(<a[^>]+token="showmedia\.([0-9]{3,4})p"[^>]+>.*?</a>)', webpage):
+        for a, fmt in re.findall(r'(<a[^>]+token=["\']showmedia\.([0-9]{3,4})p["\'][^>]+>)', webpage):
             attrs = extract_attributes(a)
             href = attrs.get('href')
             if href and '/freetrial' in href:

From 8312b1a3d1dc07d80d33e31f9b2b6facf13fa744 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com>
Date: Fri, 29 Apr 2016 19:43:53 +0600
Subject: [PATCH 02/50] [crunchyroll] Add even more relaxed fmt fallback

---
 youtube_dl/extractor/crunchyroll.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/youtube_dl/extractor/crunchyroll.py b/youtube_dl/extractor/crunchyroll.py
index 4a7664296..58960b2f8 100644
--- a/youtube_dl/extractor/crunchyroll.py
+++ b/youtube_dl/extractor/crunchyroll.py
@@ -314,7 +314,10 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
                 continue
             available_fmts.append(fmt)
         if not available_fmts:
-            available_fmts = re.findall(r'token="showmedia\.([0-9]{3,4})p"', webpage)
+            for p in (r'token=["\']showmedia\.([0-9]{3,4})p"', r'showmedia\.([0-9]{3,4})p'):
+                available_fmts = re.findall(p, webpage)
+                if available_fmts:
+                    break
         video_encode_ids = []
         formats = []
         for fmt in available_fmts:

From 00a17a9e1234ecc868a15b5759472a0f9215f797 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com>
Date: Fri, 29 Apr 2016 19:44:10 +0600
Subject: [PATCH 03/50] [crunchyroll] Sort formats

---
 youtube_dl/extractor/crunchyroll.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/youtube_dl/extractor/crunchyroll.py b/youtube_dl/extractor/crunchyroll.py
index 58960b2f8..90a64303d 100644
--- a/youtube_dl/extractor/crunchyroll.py
+++ b/youtube_dl/extractor/crunchyroll.py
@@ -367,6 +367,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
                 'ext': 'flv',
             })
             formats.append(format_info)
+        self._sort_formats(formats)
 
         metadata = self._download_xml(
             'http://www.crunchyroll.com/xml', video_id,

From e9c6cdf4a103d1ebdb6927bdab429c370cbe66b2 Mon Sep 17 00:00:00 2001
From: Yen Chi Hsuan <yan12125@gmail.com>
Date: Fri, 29 Apr 2016 22:49:04 +0800
Subject: [PATCH 04/50] [common] Fix format_id construction for HLS

---
 youtube_dl/extractor/common.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index 2763d2ffe..61a5d124c 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -1142,7 +1142,7 @@ class InfoExtractor(object):
                 # Bandwidth of live streams may differ over time thus making
                 # format_id unpredictable. So it's better to keep provided
                 # format_id intact.
-                if last_media_name and not live:
+                if not live:
                     format_id.append(last_media_name if last_media_name else '%d' % (tbr if tbr else len(formats)))
                 f = {
                     'format_id': '-'.join(format_id),

From cef3f3011f9d3a67de3ff064a5185a1a4bcf40e7 Mon Sep 17 00:00:00 2001
From: Yen Chi Hsuan <yan12125@gmail.com>
Date: Sat, 30 Apr 2016 00:17:09 +0800
Subject: [PATCH 05/50] [funimation] Detect blocking and support CloudFlare
 cookies

---
 youtube_dl/extractor/funimation.py | 48 ++++++++++++++++++++++++++++--
 1 file changed, 45 insertions(+), 3 deletions(-)

diff --git a/youtube_dl/extractor/funimation.py b/youtube_dl/extractor/funimation.py
index 1eb528f31..0ad0d9b6a 100644
--- a/youtube_dl/extractor/funimation.py
+++ b/youtube_dl/extractor/funimation.py
@@ -2,6 +2,10 @@
 from __future__ import unicode_literals
 
 from .common import InfoExtractor
+from ..compat import (
+    compat_HTTPError,
+    compat_urllib_parse_unquote_plus,
+)
 from ..utils import (
     clean_html,
     determine_ext,
@@ -27,6 +31,7 @@ class FunimationIE(InfoExtractor):
             'description': 'md5:1769f43cd5fc130ace8fd87232207892',
             'thumbnail': 're:https?://.*\.jpg',
         },
+        'skip': 'Access without user interaction is forbidden by CloudFlare, and video removed',
     }, {
         'url': 'http://www.funimation.com/shows/hacksign/videos/official/role-play',
         'info_dict': {
@@ -37,6 +42,7 @@ class FunimationIE(InfoExtractor):
             'description': 'md5:b602bdc15eef4c9bbb201bb6e6a4a2dd',
             'thumbnail': 're:https?://.*\.jpg',
         },
+        'skip': 'Access without user interaction is forbidden by CloudFlare',
     }, {
         'url': 'http://www.funimation.com/shows/attack-on-titan-junior-high/videos/promotional/broadcast-dub-preview',
         'info_dict': {
@@ -47,8 +53,36 @@ class FunimationIE(InfoExtractor):
             'description': 'md5:f8ec49c0aff702a7832cd81b8a44f803',
             'thumbnail': 're:https?://.*\.(?:jpg|png)',
         },
+        'skip': 'Access without user interaction is forbidden by CloudFlare',
     }]
 
+    _LOGIN_URL = 'http://www.funimation.com/login'
+
+    def _download_webpage(self, *args, **kwargs):
+        try:
+            return super(FunimationIE, self)._download_webpage(*args, **kwargs)
+        except ExtractorError as ee:
+            if isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 403:
+                response = ee.cause.read()
+                if b'>Please complete the security check to access<' in response:
+                    raise ExtractorError(
+                        'Access to funimation.com is blocked by CloudFlare. '
+                        'Please browse to http://www.funimation.com/, solve '
+                        'the reCAPTCHA, export browser cookies to a text file,'
+                        ' and then try again with --cookies YOUR_COOKIE_FILE.',
+                        expected=True)
+            raise
+
+    def _extract_cloudflare_session_ua(self, url):
+        ci_session_cookie = self._get_cookies(url).get('ci_session')
+        if ci_session_cookie:
+            ci_session = compat_urllib_parse_unquote_plus(ci_session_cookie.value)
+            # ci_session is a string serialized by PHP function serialize()
+            # This case is simple enough to use regular expressions only
+            return self._search_regex(
+                r'"user_agent";s:\d+:"([^"]+)"', ci_session, 'user agent',
+                default=None)
+
     def _login(self):
         (username, password) = self._get_login_info()
         if username is None:
@@ -57,8 +91,11 @@ class FunimationIE(InfoExtractor):
             'email_field': username,
             'password_field': password,
         })
-        login_request = sanitized_Request('http://www.funimation.com/login', data, headers={
-            'User-Agent': 'Mozilla/5.0 (Windows NT 5.2; WOW64; rv:42.0) Gecko/20100101 Firefox/42.0',
+        user_agent = self._extract_cloudflare_session_ua(self._LOGIN_URL)
+        if not user_agent:
+            user_agent = 'Mozilla/5.0 (Windows NT 5.2; WOW64; rv:42.0) Gecko/20100101 Firefox/42.0'
+        login_request = sanitized_Request(self._LOGIN_URL, data, headers={
+            'User-Agent': user_agent,
             'Content-Type': 'application/x-www-form-urlencoded'
         })
         login_page = self._download_webpage(
@@ -103,11 +140,16 @@ class FunimationIE(InfoExtractor):
             ('mobile', 'Mozilla/5.0 (Linux; Android 4.4.2; Nexus 4 Build/KOT49H) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/34.0.1847.114 Mobile Safari/537.36'),
         )
 
+        user_agent = self._extract_cloudflare_session_ua(url)
+        if user_agent:
+            USER_AGENTS = ((None, user_agent),)
+
         for kind, user_agent in USER_AGENTS:
             request = sanitized_Request(url)
             request.add_header('User-Agent', user_agent)
             webpage = self._download_webpage(
-                request, display_id, 'Downloading %s webpage' % kind)
+                request, display_id,
+                'Downloading %s webpage' % kind if kind else 'Downloading webpage')
 
             playlist = self._parse_json(
                 self._search_regex(

From 65a3bfb379c9d5e53cac874af097d2071ee4ac4d Mon Sep 17 00:00:00 2001
From: remitamine <remitamine@gmail.com>
Date: Fri, 29 Apr 2016 19:21:17 +0100
Subject: [PATCH 06/50] [dfb] extract m3u8 formats

---
 youtube_dl/extractor/dfb.py | 31 +++++++++++++++++++------------
 1 file changed, 19 insertions(+), 12 deletions(-)

diff --git a/youtube_dl/extractor/dfb.py b/youtube_dl/extractor/dfb.py
index cdfeccacb..a4d0448c2 100644
--- a/youtube_dl/extractor/dfb.py
+++ b/youtube_dl/extractor/dfb.py
@@ -12,39 +12,46 @@ class DFBIE(InfoExtractor):
 
     _TEST = {
         'url': 'http://tv.dfb.de/video/u-19-em-stimmen-zum-spiel-gegen-russland/11633/',
-        # The md5 is different each time
+        'md5': 'ac0f98a52a330f700b4b3034ad240649',
         'info_dict': {
             'id': '11633',
             'display_id': 'u-19-em-stimmen-zum-spiel-gegen-russland',
-            'ext': 'flv',
+            'ext': 'mp4',
             'title': 'U 19-EM: Stimmen zum Spiel gegen Russland',
             'upload_date': '20150714',
         },
     }
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
-        display_id = mobj.group('display_id')
+        display_id, video_id = re.match(self._VALID_URL, url).groups()
 
-        webpage = self._download_webpage(url, display_id)
         player_info = self._download_xml(
             'http://tv.dfb.de/server/hd_video.php?play=%s' % video_id,
             display_id)
         video_info = player_info.find('video')
+        stream_access_url = self._proto_relative_url(video_info.find('url').text.strip())
 
-        f4m_info = self._download_xml(
-            self._proto_relative_url(video_info.find('url').text.strip()), display_id)
-        token_el = f4m_info.find('token')
-        manifest_url = token_el.attrib['url'] + '?' + 'hdnea=' + token_el.attrib['auth'] + '&hdcore=3.2.0'
-        formats = self._extract_f4m_formats(manifest_url, display_id)
+        formats = []
+        # see http://tv.dfb.de/player/js/ajax.js for the method to extract m3u8 formats
+        for sa_url in (stream_access_url, stream_access_url + '&area=&format=iphone'):
+            stream_access_info = self._download_xml(sa_url, display_id)
+            token_el = stream_access_info.find('token')
+            manifest_url = token_el.attrib['url'] + '?' + 'hdnea=' + token_el.attrib['auth']
+            if '.f4m' in manifest_url:
+                formats.extend(self._extract_f4m_formats(
+                    manifest_url + '&hdcore=3.2.0',
+                    display_id, f4m_id='hds', fatal=False))
+            else:
+                formats.extend(self._extract_m3u8_formats(
+                    manifest_url, display_id, 'mp4',
+                    'm3u8_native', m3u8_id='hls', fatal=False))
         self._sort_formats(formats)
 
         return {
             'id': video_id,
             'display_id': display_id,
             'title': video_info.find('title').text,
-            'thumbnail': self._og_search_thumbnail(webpage),
+            'thumbnail': 'http://tv.dfb.de/images/%s_640x360.jpg' % video_id,
             'upload_date': unified_strdate(video_info.find('time_date').text),
             'formats': formats,
         }

From 5556047465e0601d2bdee0e5a436cee64b745851 Mon Sep 17 00:00:00 2001
From: Reino17 <reino@degeelebosch.nl>
Date: Wed, 27 Apr 2016 13:11:38 +0200
Subject: [PATCH 07/50] [rtlnl] Update 720p PG_URL_TEMPLATE

- Fixed the format_id for the 720p progressive videostream and added the video's resolution.
- The adaptive videostreams have the m3u8-extension, so I removed the confusing mp4-extension in order to make a better distinction between the these and the progressive videostreams.
---
 youtube_dl/extractor/rtlnl.py | 21 ++++++++++++++++-----
 1 file changed, 16 insertions(+), 5 deletions(-)

diff --git a/youtube_dl/extractor/rtlnl.py b/youtube_dl/extractor/rtlnl.py
index 543d94417..e8b55ea25 100644
--- a/youtube_dl/extractor/rtlnl.py
+++ b/youtube_dl/extractor/rtlnl.py
@@ -94,19 +94,30 @@ class RtlNlIE(InfoExtractor):
         videopath = material['videopath']
         m3u8_url = meta.get('videohost', 'http://manifest.us.rtl.nl') + videopath
 
-        formats = self._extract_m3u8_formats(m3u8_url, uuid, ext='mp4')
+        formats = self._extract_m3u8_formats(m3u8_url, uuid)
 
         video_urlpart = videopath.split('/adaptive/')[1][:-5]
         PG_URL_TEMPLATE = 'http://pg.us.rtl.nl/rtlxl/network/%s/progressive/%s.mp4'
 
         formats.extend([
             {
-                'url': PG_URL_TEMPLATE % ('a2m', video_urlpart),
-                'format_id': 'pg-sd',
+                'url': PG_URL_TEMPLATE % ('a2t', video_urlpart),
+                'format_id': 'a2t',
+                'width': 512,
+                'height': 288,
             },
             {
-                'url': PG_URL_TEMPLATE % ('a3m', video_urlpart),
-                'format_id': 'pg-hd',
+                'url': PG_URL_TEMPLATE % ('a3t', video_urlpart),
+                'format_id': 'a3t',
+                'width': 704,
+                'height': 400,
+                'quality': 0,
+            },
+            {
+                'url': PG_URL_TEMPLATE % ('nettv', video_urlpart),
+                'format_id': 'nettv',
+                'width': 1280,
+                'height': 720,
                 'quality': 0,
             }
         ])

From 0571ffda7dd12fc1067c0344f3ce4ce47b39edb0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com>
Date: Sat, 30 Apr 2016 01:43:39 +0600
Subject: [PATCH 08/50] [rtlnl] Improve extraction (Closes #9329)

* Make hls extraction non fatal and revert ext
* Extract progressive formats' metadata from corresponding hls formats
---
 youtube_dl/extractor/rtlnl.py | 55 +++++++++++++++++++++--------------
 1 file changed, 33 insertions(+), 22 deletions(-)

diff --git a/youtube_dl/extractor/rtlnl.py b/youtube_dl/extractor/rtlnl.py
index e8b55ea25..c95bcf035 100644
--- a/youtube_dl/extractor/rtlnl.py
+++ b/youtube_dl/extractor/rtlnl.py
@@ -94,33 +94,44 @@ class RtlNlIE(InfoExtractor):
         videopath = material['videopath']
         m3u8_url = meta.get('videohost', 'http://manifest.us.rtl.nl') + videopath
 
-        formats = self._extract_m3u8_formats(m3u8_url, uuid)
+        formats = self._extract_m3u8_formats(
+            m3u8_url, uuid, 'mp4', m3u8_id='hls', fatal=False)
 
         video_urlpart = videopath.split('/adaptive/')[1][:-5]
         PG_URL_TEMPLATE = 'http://pg.us.rtl.nl/rtlxl/network/%s/progressive/%s.mp4'
 
-        formats.extend([
-            {
-                'url': PG_URL_TEMPLATE % ('a2t', video_urlpart),
-                'format_id': 'a2t',
-                'width': 512,
-                'height': 288,
-            },
-            {
-                'url': PG_URL_TEMPLATE % ('a3t', video_urlpart),
-                'format_id': 'a3t',
-                'width': 704,
-                'height': 400,
-                'quality': 0,
-            },
-            {
-                'url': PG_URL_TEMPLATE % ('nettv', video_urlpart),
-                'format_id': 'nettv',
-                'width': 1280,
-                'height': 720,
-                'quality': 0,
+        PG_FORMATS = (
+            ('a2t', 512, 288),
+            ('a3t', 704, 400),
+            ('nettv', 1280, 720),
+        )
+
+        def pg_format(format_id, width, height):
+            return {
+                'url': PG_URL_TEMPLATE % (format_id, video_urlpart),
+                'format_id': 'pg-%s' % format_id,
+                'protocol': 'http',
+                'width': width,
+                'height': height,
             }
-        ])
+
+        if not formats:
+            formats = [pg_format(*pg_tuple) for pg_tuple in PG_FORMATS]
+        else:
+            pg_formats = []
+            for format_id, width, height in PG_FORMATS:
+                try:
+                    # Find hls format with the same width and height corresponding
+                    # to progressive format and copy metadata from it.
+                    f = next(f for f in formats
+                             if f.get('width') == width and f.get('height') == height).copy()
+                    f.update(pg_format(format_id, width, height))
+                    pg_formats.append(f)
+                except StopIteration:
+                    # Missing hls format does mean that no progressive format with
+                    # such width and height exists either.
+                    pass
+            formats.extend(pg_formats)
         self._sort_formats(formats)
 
         thumbnails = []

From cd63d091cecd8a85a2080035051205b00f3454d3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com>
Date: Sat, 30 Apr 2016 01:48:14 +0600
Subject: [PATCH 09/50] [rtlnl] Fix tests

---
 youtube_dl/extractor/rtlnl.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/youtube_dl/extractor/rtlnl.py b/youtube_dl/extractor/rtlnl.py
index c95bcf035..e4411054a 100644
--- a/youtube_dl/extractor/rtlnl.py
+++ b/youtube_dl/extractor/rtlnl.py
@@ -39,7 +39,7 @@ class RtlNlIE(InfoExtractor):
             'ext': 'mp4',
             'timestamp': 1424039400,
             'title': 'RTL Nieuws - Nieuwe beelden Kopenhagen: chaos direct na aanslag',
-            'thumbnail': 're:^https?://screenshots\.rtl\.nl/system/thumb/sz=[0-9]+x[0-9]+/uuid=84ae5571-ac25-4225-ae0c-ef8d9efb2aed$',
+            'thumbnail': 're:^https?://screenshots\.rtl\.nl/(?:[^/]+/)*sz=[0-9]+x[0-9]+/uuid=84ae5571-ac25-4225-ae0c-ef8d9efb2aed$',
             'upload_date': '20150215',
             'description': 'Er zijn nieuwe beelden vrijgegeven die vlak na de aanslag in Kopenhagen zijn gemaakt. Op de video is goed te zien hoe omstanders zich bekommeren om één van de slachtoffers, terwijl de eerste agenten ter plaatse komen.',
         }
@@ -50,7 +50,7 @@ class RtlNlIE(InfoExtractor):
             'id': 'f536aac0-1dc3-4314-920e-3bd1c5b3811a',
             'ext': 'mp4',
             'title': 'RTL Nieuws - Meer beelden van overval juwelier',
-            'thumbnail': 're:^https?://screenshots\.rtl\.nl/system/thumb/sz=[0-9]+x[0-9]+/uuid=f536aac0-1dc3-4314-920e-3bd1c5b3811a$',
+            'thumbnail': 're:^https?://screenshots\.rtl\.nl/(?:[^/]+/)*sz=[0-9]+x[0-9]+/uuid=f536aac0-1dc3-4314-920e-3bd1c5b3811a$',
             'timestamp': 1437233400,
             'upload_date': '20150718',
             'duration': 30.474,

From 373e1230e4a3b934ddc59c212773d36a7e998dec Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com>
Date: Sat, 30 Apr 2016 01:50:26 +0600
Subject: [PATCH 10/50] [rtlnl] Clarify tests

---
 youtube_dl/extractor/rtlnl.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/youtube_dl/extractor/rtlnl.py b/youtube_dl/extractor/rtlnl.py
index e4411054a..5e916c4ab 100644
--- a/youtube_dl/extractor/rtlnl.py
+++ b/youtube_dl/extractor/rtlnl.py
@@ -32,6 +32,7 @@ class RtlNlIE(InfoExtractor):
             'duration': 576.880,
         },
     }, {
+        # best format avaialble a3t
         'url': 'http://www.rtl.nl/system/videoplayer/derden/rtlnieuws/video_embed.html#uuid=84ae5571-ac25-4225-ae0c-ef8d9efb2aed/autoplay=false',
         'md5': 'dea7474214af1271d91ef332fb8be7ea',
         'info_dict': {
@@ -45,6 +46,7 @@ class RtlNlIE(InfoExtractor):
         }
     }, {
         # empty synopsis and missing episodes (see https://github.com/rg3/youtube-dl/issues/6275)
+        # best format available nettv
         'url': 'http://www.rtl.nl/system/videoplayer/derden/rtlnieuws/video_embed.html#uuid=f536aac0-1dc3-4314-920e-3bd1c5b3811a/autoplay=false',
         'info_dict': {
             'id': 'f536aac0-1dc3-4314-920e-3bd1c5b3811a',

From ca278a182b9331201e058f9f4d46b3b6114a1518 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com>
Date: Sat, 30 Apr 2016 02:07:29 +0600
Subject: [PATCH 11/50] [rtlnl] Replace test

---
 youtube_dl/extractor/rtlnl.py | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/youtube_dl/extractor/rtlnl.py b/youtube_dl/extractor/rtlnl.py
index 5e916c4ab..8598b5840 100644
--- a/youtube_dl/extractor/rtlnl.py
+++ b/youtube_dl/extractor/rtlnl.py
@@ -20,16 +20,16 @@ class RtlNlIE(InfoExtractor):
         (?P<id>[0-9a-f-]+)'''
 
     _TESTS = [{
-        'url': 'http://www.rtlxl.nl/#!/rtl-nieuws-132237/6e4203a6-0a5e-3596-8424-c599a59e0677',
-        'md5': 'cc16baa36a6c169391f0764fa6b16654',
+        'url': 'http://www.rtlxl.nl/#!/rtl-nieuws-132237/82b1aad1-4a14-3d7b-b554-b0aed1b2c416',
+        'md5': '473d1946c1fdd050b2c0161a4b13c373',
         'info_dict': {
-            'id': '6e4203a6-0a5e-3596-8424-c599a59e0677',
+            'id': '82b1aad1-4a14-3d7b-b554-b0aed1b2c416',
             'ext': 'mp4',
-            'title': 'RTL Nieuws - Laat',
-            'description': 'md5:6b61f66510c8889923b11f2778c72dc5',
-            'timestamp': 1408051800,
-            'upload_date': '20140814',
-            'duration': 576.880,
+            'title': 'RTL Nieuws',
+            'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
+            'timestamp': 1461951000,
+            'upload_date': '20160429',
+            'duration': 1167.96,
         },
     }, {
         # best format avaialble a3t

From 69c4cde4ba6a4c7dfb8a46d1713cbb46d6f1d623 Mon Sep 17 00:00:00 2001
From: remitamine <remitamine@gmail.com>
Date: Fri, 29 Apr 2016 21:35:09 +0100
Subject: [PATCH 12/50] [wsj] improve extraction

---
 youtube_dl/extractor/wsj.py | 95 +++++++++++++++++++------------------
 1 file changed, 48 insertions(+), 47 deletions(-)

diff --git a/youtube_dl/extractor/wsj.py b/youtube_dl/extractor/wsj.py
index 5a897371d..a83e68b17 100644
--- a/youtube_dl/extractor/wsj.py
+++ b/youtube_dl/extractor/wsj.py
@@ -4,16 +4,22 @@ from __future__ import unicode_literals
 from .common import InfoExtractor
 from ..utils import (
     int_or_none,
+    float_or_none,
     unified_strdate,
 )
 
 
 class WSJIE(InfoExtractor):
-    _VALID_URL = r'https?://video-api\.wsj\.com/api-video/player/iframe\.html\?guid=(?P<id>[a-zA-Z0-9-]+)'
+    _VALID_URL = r'''(?x)https?://
+        (?:
+            video-api\.wsj\.com/api-video/player/iframe\.html\?guid=|
+            (?:www\.)?wsj\.com/video/[^/]+/
+        )
+        (?P<id>[a-zA-Z0-9-]+)'''
     IE_DESC = 'Wall Street Journal'
-    _TEST = {
+    _TESTS = [{
         'url': 'http://video-api.wsj.com/api-video/player/iframe.html?guid=1BD01A4C-BFE8-40A5-A42F-8A8AF9898B1A',
-        'md5': '9747d7a6ebc2f4df64b981e1dde9efa9',
+        'md5': 'e230a5bb249075e40793b655a54a02e4',
         'info_dict': {
             'id': '1BD01A4C-BFE8-40A5-A42F-8A8AF9898B1A',
             'ext': 'mp4',
@@ -24,65 +30,60 @@ class WSJIE(InfoExtractor):
             'duration': 90,
             'title': 'Bills Coach Rex Ryan Updates His Old Jets Tattoo',
         },
-    }
+    }, {
+        'url': 'http://www.wsj.com/video/can-alphabet-build-a-smarter-city/359DDAA8-9AC1-489C-82E6-0429C1E430E0.html',
+        'only_matching': True,
+    }]
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
 
-        bitrates = [128, 174, 264, 320, 464, 664, 1264]
         api_url = (
             'http://video-api.wsj.com/api-video/find_all_videos.asp?'
-            'type=guid&count=1&query=%s&'
-            'fields=hls,adZone,thumbnailList,guid,state,secondsUntilStartTime,'
-            'author,description,name,linkURL,videoStillURL,duration,videoURL,'
-            'adCategory,catastrophic,linkShortURL,doctypeID,youtubeID,'
-            'titletag,rssURL,wsj-section,wsj-subsection,allthingsd-section,'
-            'allthingsd-subsection,sm-section,sm-subsection,provider,'
-            'formattedCreationDate,keywords,keywordsOmniture,column,editor,'
-            'emailURL,emailPartnerID,showName,omnitureProgramName,'
-            'omnitureVideoFormat,linkRelativeURL,touchCastID,'
-            'omniturePublishDate,%s') % (
-                video_id, ','.join('video%dkMP4Url' % br for br in bitrates))
+            'type=guid&count=1&query=%s&fields=type,hls,videoMP4List,'
+            'thumbnailList,author,description,name,duration,videoURL,'
+            'titletag,formattedCreationDate,keywords,editor' % video_id)
         info = self._download_json(api_url, video_id)['items'][0]
-
-        # Thumbnails are conveniently in the correct format already
-        thumbnails = info.get('thumbnailList')
-        creator = info.get('author')
-        uploader_id = info.get('editor')
-        categories = info.get('keywords')
-        duration = int_or_none(info.get('duration'))
-        upload_date = unified_strdate(
-            info.get('formattedCreationDate'), day_first=False)
         title = info.get('name', info.get('titletag'))
 
-        formats = [{
-            'format_id': 'f4m',
-            'format_note': 'f4m (meta URL)',
-            'url': info['videoURL'],
-        }]
-        if info.get('hls'):
+        formats = []
+
+        f4m_url = info.get('videoURL')
+        if f4m_url:
+            formats.extend(self._extract_f4m_formats(
+                f4m_url, video_id, f4m_id='hds', fatal=False))
+
+        m3u8_url = info.get('hls')
+        if m3u8_url:
             formats.extend(self._extract_m3u8_formats(
                 info['hls'], video_id, ext='mp4',
-                preference=0, entry_protocol='m3u8_native'))
-        for br in bitrates:
-            field = 'video%dkMP4Url' % br
-            if info.get(field):
-                formats.append({
-                    'format_id': 'mp4-%d' % br,
-                    'container': 'mp4',
-                    'tbr': br,
-                    'url': info[field],
-                })
+                entry_protocol='m3u8_native', m3u8_id='hls', fatal=False))
+
+        for v in info.get('videoMP4List', []):
+            mp4_url = v.get('url')
+            if not mp4_url:
+                continue
+            tbr = int_or_none(v.get('bitrate'))
+            formats.append({
+                'url': mp4_url,
+                'format_id': 'http' + ('-%d' % tbr if tbr else ''),
+                'tbr': tbr,
+                'width': int_or_none(v.get('width')),
+                'height': int_or_none(v.get('height')),
+                'fps': float_or_none(v.get('fps')),
+            })
         self._sort_formats(formats)
 
         return {
             'id': video_id,
             'formats': formats,
-            'thumbnails': thumbnails,
-            'creator': creator,
-            'uploader_id': uploader_id,
-            'duration': duration,
-            'upload_date': upload_date,
+            # Thumbnails are conveniently in the correct format already
+            'thumbnails': info.get('thumbnailList'),
+            'creator': info.get('author'),
+            'uploader_id': info.get('editor'),
+            'duration': int_or_none(info.get('duration')),
+            'upload_date': unified_strdate(info.get(
+                'formattedCreationDate'), day_first=False),
             'title': title,
-            'categories': categories,
+            'categories': info.get('keywords'),
         }

From cbc032c8b70a038a69259378c92b4ba97b42d491 Mon Sep 17 00:00:00 2001
From: remitamine <remitamine@gmail.com>
Date: Sat, 30 Apr 2016 01:24:36 +0100
Subject: [PATCH 13/50] [pbs] extract all http formats

---
 youtube_dl/extractor/pbs.py | 48 ++++++++++++++++++-------------------
 1 file changed, 24 insertions(+), 24 deletions(-)

diff --git a/youtube_dl/extractor/pbs.py b/youtube_dl/extractor/pbs.py
index f43e3a146..38cdb9975 100644
--- a/youtube_dl/extractor/pbs.py
+++ b/youtube_dl/extractor/pbs.py
@@ -196,7 +196,7 @@ class PBSIE(InfoExtractor):
     _TESTS = [
         {
             'url': 'http://www.pbs.org/tpt/constitution-usa-peter-sagal/watch/a-more-perfect-union/',
-            'md5': 'ce1888486f0908d555a8093cac9a7362',
+            'md5': '173dc391afd361fa72eab5d3d918968d',
             'info_dict': {
                 'id': '2365006249',
                 'ext': 'mp4',
@@ -204,13 +204,10 @@ class PBSIE(InfoExtractor):
                 'description': 'md5:36f341ae62e251b8f5bd2b754b95a071',
                 'duration': 3190,
             },
-            'params': {
-                'skip_download': True,  # requires ffmpeg
-            },
         },
         {
             'url': 'http://www.pbs.org/wgbh/pages/frontline/losing-iraq/',
-            'md5': '143c98aa54a346738a3d78f54c925321',
+            'md5': '6f722cb3c3982186d34b0f13374499c7',
             'info_dict': {
                 'id': '2365297690',
                 'ext': 'mp4',
@@ -218,9 +215,6 @@ class PBSIE(InfoExtractor):
                 'description': 'md5:4d3eaa01f94e61b3e73704735f1196d9',
                 'duration': 5050,
             },
-            'params': {
-                'skip_download': True,  # requires ffmpeg
-            }
         },
         {
             'url': 'http://www.pbs.org/newshour/bb/education-jan-june12-cyberschools_02-23/',
@@ -244,9 +238,6 @@ class PBSIE(InfoExtractor):
                 'duration': 6559,
                 'thumbnail': 're:^https?://.*\.jpg$',
             },
-            'params': {
-                'skip_download': True,  # requires ffmpeg
-            },
         },
         {
             'url': 'http://www.pbs.org/wgbh/nova/earth/killer-typhoon.html',
@@ -262,9 +253,6 @@ class PBSIE(InfoExtractor):
                 'upload_date': '20140122',
                 'age_limit': 10,
             },
-            'params': {
-                'skip_download': True,  # requires ffmpeg
-            },
         },
         {
             'url': 'http://www.pbs.org/wgbh/pages/frontline/united-states-of-secrets/',
@@ -290,6 +278,7 @@ class PBSIE(InfoExtractor):
         },
         {
             'url': 'http://www.pbs.org/video/2365245528/',
+            'md5': '115223d41bd55cda8ae5cd5ed4e11497',
             'info_dict': {
                 'id': '2365245528',
                 'display_id': '2365245528',
@@ -299,15 +288,13 @@ class PBSIE(InfoExtractor):
                 'duration': 6851,
                 'thumbnail': 're:^https?://.*\.jpg$',
             },
-            'params': {
-                'skip_download': True,  # requires ffmpeg
-            },
         },
         {
             # Video embedded in iframe containing angle brackets as attribute's value (e.g.
             # "<iframe style='position: absolute;<br />\ntop: 0; left: 0;' ...", see
             # https://github.com/rg3/youtube-dl/issues/7059)
             'url': 'http://www.pbs.org/food/features/a-chefs-life-season-3-episode-5-prickly-business/',
+            'md5': '84ced42850d78f1d4650297356e95e6f',
             'info_dict': {
                 'id': '2365546844',
                 'display_id': 'a-chefs-life-season-3-episode-5-prickly-business',
@@ -317,9 +304,6 @@ class PBSIE(InfoExtractor):
                 'duration': 1480,
                 'thumbnail': 're:^https?://.*\.jpg$',
             },
-            'params': {
-                'skip_download': True,  # requires ffmpeg
-            },
         },
         {
             # Frontline video embedded via flp2012.js
@@ -340,6 +324,7 @@ class PBSIE(InfoExtractor):
         {
             # Serves hd only via wigget/partnerplayer page
             'url': 'http://www.pbs.org/video/2365641075/',
+            'md5': 'acfd4c400b48149a44861cb16dd305cf',
             'info_dict': {
                 'id': '2365641075',
                 'ext': 'mp4',
@@ -348,9 +333,6 @@ class PBSIE(InfoExtractor):
                 'thumbnail': 're:^https?://.*\.jpg$',
                 'formats': 'mincount:8',
             },
-            'params': {
-                'skip_download': True,  # requires ffmpeg
-            },
         },
         {
             'url': 'http://player.pbs.org/widget/partnerplayer/2365297708/?start=0&end=0&chapterbar=false&endscreen=false&topbar=true',
@@ -494,6 +476,7 @@ class PBSIE(InfoExtractor):
                         info = video_info
 
         formats = []
+        http_url = None
         for num, redirect in enumerate(redirects):
             redirect_id = redirect.get('eeid')
 
@@ -514,13 +497,30 @@ class PBSIE(InfoExtractor):
 
             if determine_ext(format_url) == 'm3u8':
                 formats.extend(self._extract_m3u8_formats(
-                    format_url, display_id, 'mp4', preference=1, m3u8_id='hls'))
+                    format_url, display_id, 'mp4', m3u8_id='hls', fatal=False))
             else:
                 formats.append({
                     'url': format_url,
                     'format_id': redirect_id,
                 })
+                if re.search(r'^https?://.*(?:\d+k|baseline)', format_url):
+                    http_url = format_url
         self._remove_duplicate_formats(formats)
+        m3u8_formats = list(filter(
+            lambda f: f.get('protocol') == 'm3u8' and f.get('vcodec') != 'none' and f.get('resolution') != 'multiple',
+            formats))
+        if http_url:
+            for m3u8_format in m3u8_formats:
+                bitrate = self._search_regex(r'(\d+k)', m3u8_format['url'], 'bitrate', default=None)
+                if not bitrate:
+                    continue
+                f = m3u8_format.copy()
+                f.update({
+                    'url': re.sub(r'\d+k|baseline', bitrate, http_url),
+                    'format_id': m3u8_format['format_id'].replace('hls', 'http'),
+                    'protocol': 'http',
+                })
+                formats.append(f)
         self._sort_formats(formats)
 
         rating_str = info.get('rating')

From 350d7963db671884acd43f56f41bd499efd8e74a Mon Sep 17 00:00:00 2001
From: remitamine <remitamine@gmail.com>
Date: Sat, 30 Apr 2016 11:12:11 +0100
Subject: [PATCH 14/50] [pbs] fix the least bitrate http url construction

---
 youtube_dl/extractor/pbs.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/youtube_dl/extractor/pbs.py b/youtube_dl/extractor/pbs.py
index 38cdb9975..75c36a621 100644
--- a/youtube_dl/extractor/pbs.py
+++ b/youtube_dl/extractor/pbs.py
@@ -514,6 +514,8 @@ class PBSIE(InfoExtractor):
                 bitrate = self._search_regex(r'(\d+k)', m3u8_format['url'], 'bitrate', default=None)
                 if not bitrate:
                     continue
+                if bitrate == '192k':
+                    bitrate = 'baseline'
                 f = m3u8_format.copy()
                 f.update({
                     'url': re.sub(r'\d+k|baseline', bitrate, http_url),

From 35cd2f4c253fa9d37b6a253f9f63bfe258d8f334 Mon Sep 17 00:00:00 2001
From: remitamine <remitamine@gmail.com>
Date: Sat, 30 Apr 2016 11:31:09 +0100
Subject: [PATCH 15/50] [pbs] extract only the formats that we know that they
 will be available as http format

https://projects.pbs.org/confluence/display/coveapi/COVE+Video+Specifications
---
 youtube_dl/extractor/pbs.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/youtube_dl/extractor/pbs.py b/youtube_dl/extractor/pbs.py
index 75c36a621..17c85dd7b 100644
--- a/youtube_dl/extractor/pbs.py
+++ b/youtube_dl/extractor/pbs.py
@@ -512,7 +512,9 @@ class PBSIE(InfoExtractor):
         if http_url:
             for m3u8_format in m3u8_formats:
                 bitrate = self._search_regex(r'(\d+k)', m3u8_format['url'], 'bitrate', default=None)
-                if not bitrate:
+                # extract only the formats that we know that they will be available as http format.
+                # https://projects.pbs.org/confluence/display/coveapi/COVE+Video+Specifications
+                if not bitrate or bitrate not in ('192k', '400k', '800k', '1200k', '2500k'):
                     continue
                 if bitrate == '192k':
                     bitrate = 'baseline'

From 7691184a3128bd46544ff49e264322d5e9187fdc Mon Sep 17 00:00:00 2001
From: remitamine <remitamine@gmail.com>
Date: Sat, 30 Apr 2016 12:57:30 +0100
Subject: [PATCH 16/50] [pbs] remove duplicate format

---
 youtube_dl/extractor/pbs.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/youtube_dl/extractor/pbs.py b/youtube_dl/extractor/pbs.py
index 17c85dd7b..35fb1798d 100644
--- a/youtube_dl/extractor/pbs.py
+++ b/youtube_dl/extractor/pbs.py
@@ -514,10 +514,8 @@ class PBSIE(InfoExtractor):
                 bitrate = self._search_regex(r'(\d+k)', m3u8_format['url'], 'bitrate', default=None)
                 # extract only the formats that we know that they will be available as http format.
                 # https://projects.pbs.org/confluence/display/coveapi/COVE+Video+Specifications
-                if not bitrate or bitrate not in ('192k', '400k', '800k', '1200k', '2500k'):
+                if not bitrate or bitrate not in ('400k', '800k', '1200k', '2500k'):
                     continue
-                if bitrate == '192k':
-                    bitrate = 'baseline'
                 f = m3u8_format.copy()
                 f.update({
                     'url': re.sub(r'\d+k|baseline', bitrate, http_url),

From e0e9bbb0e9dd92f526b04584c47e6509a73fed04 Mon Sep 17 00:00:00 2001
From: remitamine <remitamine@gmail.com>
Date: Sat, 30 Apr 2016 14:02:17 +0100
Subject: [PATCH 17/50] [pbs] extract srt and vtt subtitles

---
 youtube_dl/extractor/pbs.py | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/youtube_dl/extractor/pbs.py b/youtube_dl/extractor/pbs.py
index 35fb1798d..81918ac6e 100644
--- a/youtube_dl/extractor/pbs.py
+++ b/youtube_dl/extractor/pbs.py
@@ -537,6 +537,19 @@ class PBSIE(InfoExtractor):
                 'ext': 'ttml',
                 'url': closed_captions_url,
             }]
+            mobj = re.search(r'/(\d+)_Encoded\.dfxp', closed_captions_url)
+            if mobj:
+                ttml_caption_suffix, ttml_caption_id = mobj.group(0, 1)
+                ttml_caption_id = int(ttml_caption_id)
+                subtitles['en'].extend([{
+                    'url': closed_captions_url.replace(
+                        ttml_caption_suffix, '/%d_Encoded.srt' % (ttml_caption_id + 1)),
+                    'ext': 'srt',
+                }, {
+                    'url': closed_captions_url.replace(
+                        ttml_caption_suffix, '/%d_Encoded.vtt' % (ttml_caption_id + 2)),
+                    'ext': 'vtt',
+                }])
 
         # info['title'] is often incomplete (e.g. 'Full Episode', 'Episode 5', etc)
         # Try turning it to 'program - title' naming scheme if possible

From d41ee7b7745d59d398f37b435146d4036e4a7448 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com>
Date: Sat, 30 Apr 2016 19:22:42 +0600
Subject: [PATCH 18/50] [vlive] Pass Referer as bytestring (Closes #9352)

---
 youtube_dl/extractor/vlive.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/youtube_dl/extractor/vlive.py b/youtube_dl/extractor/vlive.py
index 7f9e99ec2..a672ea9c5 100644
--- a/youtube_dl/extractor/vlive.py
+++ b/youtube_dl/extractor/vlive.py
@@ -43,7 +43,7 @@ class VLiveIE(InfoExtractor):
         status_params = self._download_json(
             'http://www.vlive.tv/video/status?videoSeq=%s' % video_id,
             video_id, 'Downloading JSON status',
-            headers={'Referer': url})
+            headers={'Referer': url.encode('utf-8')})
         status = status_params.get('status')
         air_start = status_params.get('onAirStartAt', '')
         is_live = status_params.get('isLive')

From 11fa3d7f997019dfce8b670e1b10042ac2004f69 Mon Sep 17 00:00:00 2001
From: remitamine <remitamine@gmail.com>
Date: Sat, 30 Apr 2016 15:41:22 +0100
Subject: [PATCH 19/50] [ted] extract all http formats

---
 youtube_dl/extractor/ted.py | 45 ++++++++++++++++++++++++-------------
 1 file changed, 29 insertions(+), 16 deletions(-)

diff --git a/youtube_dl/extractor/ted.py b/youtube_dl/extractor/ted.py
index cf8851438..aea6a02a7 100644
--- a/youtube_dl/extractor/ted.py
+++ b/youtube_dl/extractor/ted.py
@@ -102,9 +102,9 @@ class TEDIE(InfoExtractor):
     }]
 
     _NATIVE_FORMATS = {
-        'low': {'preference': 1, 'width': 320, 'height': 180},
-        'medium': {'preference': 2, 'width': 512, 'height': 288},
-        'high': {'preference': 3, 'width': 854, 'height': 480},
+        'low': {'width': 320, 'height': 180},
+        'medium': {'width': 512, 'height': 288},
+        'high': {'width': 854, 'height': 480},
     }
 
     def _extract_info(self, webpage):
@@ -171,15 +171,21 @@ class TEDIE(InfoExtractor):
                 if finfo:
                     f.update(finfo)
 
+        http_url = None
         for format_id, resources in talk_info['resources'].items():
             if format_id == 'h264':
                 for resource in resources:
+                    h264_url = resource.get('file')
+                    if not h264_url:
+                        continue
                     bitrate = int_or_none(resource.get('bitrate'))
                     formats.append({
-                        'url': resource['file'],
+                        'url': h264_url,
                         'format_id': '%s-%sk' % (format_id, bitrate),
                         'tbr': bitrate,
                     })
+                    if re.search('\d+k', h264_url):
+                        http_url = h264_url
             elif format_id == 'rtmp':
                 streamer = talk_info.get('streamer')
                 if not streamer:
@@ -195,16 +201,24 @@ class TEDIE(InfoExtractor):
                         'tbr': int_or_none(resource.get('bitrate')),
                     })
             elif format_id == 'hls':
-                hls_formats = self._extract_m3u8_formats(
-                    resources.get('stream'), video_name, 'mp4', m3u8_id=format_id)
-                for f in hls_formats:
-                    if f.get('format_id') == 'hls-meta':
-                        continue
-                    if not f.get('height'):
-                        f['vcodec'] = 'none'
-                    else:
-                        f['acodec'] = 'none'
-                formats.extend(hls_formats)
+                formats.extend(self._extract_m3u8_formats(
+                    resources.get('stream'), video_name, 'mp4', m3u8_id=format_id, fatal=False))
+
+        m3u8_formats = list(filter(
+            lambda f: f.get('protocol') == 'm3u8' and f.get('vcodec') != 'none' and f.get('resolution') != 'multiple',
+            formats))
+        if http_url:
+            for m3u8_format in m3u8_formats:
+                bitrate = self._search_regex(r'(\d+k)', m3u8_format['url'], 'bitrate', default=None)
+                if not bitrate:
+                    continue
+                f = m3u8_format.copy()
+                f.update({
+                    'url': re.sub(r'\d+k', bitrate, http_url),
+                    'format_id': m3u8_format['format_id'].replace('hls', 'http'),
+                    'protocol': 'http',
+                })
+                formats.append(f)
 
         audio_download = talk_info.get('audioDownload')
         if audio_download:
@@ -212,10 +226,9 @@ class TEDIE(InfoExtractor):
                 'url': audio_download,
                 'format_id': 'audio',
                 'vcodec': 'none',
-                'preference': -0.5,
             })
 
-        self._sort_formats(formats)
+        self._sort_formats(formats, ('width', 'height', 'tbr', 'format_id'))
 
         video_id = compat_str(talk_info['id'])
 

From f628d800fbaefe180bd354a0ff8a9009bc64da41 Mon Sep 17 00:00:00 2001
From: remitamine <remitamine@gmail.com>
Date: Sat, 30 Apr 2016 16:34:57 +0100
Subject: [PATCH 20/50] [ted] add support for youtube embeds and update tests

---
 youtube_dl/extractor/ted.py | 25 +++++++++++++++++--------
 1 file changed, 17 insertions(+), 8 deletions(-)

diff --git a/youtube_dl/extractor/ted.py b/youtube_dl/extractor/ted.py
index aea6a02a7..451cde76d 100644
--- a/youtube_dl/extractor/ted.py
+++ b/youtube_dl/extractor/ted.py
@@ -27,7 +27,7 @@ class TEDIE(InfoExtractor):
         '''
     _TESTS = [{
         'url': 'http://www.ted.com/talks/dan_dennett_on_our_consciousness.html',
-        'md5': 'fc94ac279feebbce69f21c0c6ee82810',
+        'md5': '0de43ac406aa3e4ea74b66c9c7789b13',
         'info_dict': {
             'id': '102',
             'ext': 'mp4',
@@ -37,21 +37,26 @@ class TEDIE(InfoExtractor):
                             'consciousness, but that half the time our brains are '
                             'actively fooling us.'),
             'uploader': 'Dan Dennett',
-            'width': 854,
+            'width': 853,
             'duration': 1308,
         }
     }, {
         'url': 'http://www.ted.com/watch/ted-institute/ted-bcg/vishal-sikka-the-beauty-and-power-of-algorithms',
-        'md5': '226f4fb9c62380d11b7995efa4c87994',
+        'md5': 'b899ac15e345fb39534d913f7606082b',
         'info_dict': {
-            'id': 'vishal-sikka-the-beauty-and-power-of-algorithms',
+            'id': 'tSVI8ta_P4w',
             'ext': 'mp4',
             'title': 'Vishal Sikka: The beauty and power of algorithms',
             'thumbnail': 're:^https?://.+\.jpg',
-            'description': 'Adaptive, intelligent, and consistent, algorithms are emerging as the ultimate app for everything from matching consumers to products to assessing medical diagnoses. Vishal Sikka shares his appreciation for the algorithm, charting both its inherent beauty and its growing power.',
-        }
+            'description': 'md5:6261fdfe3e02f4f579cbbfc00aff73f4',
+            'upload_date': '20140122',
+            'uploader_id': 'TEDInstitute',
+            'uploader': 'TED Institute',
+        },
+        'add_ie': ['Youtube'],
     }, {
         'url': 'http://www.ted.com/talks/gabby_giffords_and_mark_kelly_be_passionate_be_courageous_be_your_best',
+        'md5': '71b3ab2f4233012dce09d515c9c39ce2',
         'info_dict': {
             'id': '1972',
             'ext': 'mp4',
@@ -228,7 +233,7 @@ class TEDIE(InfoExtractor):
                 'vcodec': 'none',
             })
 
-        self._sort_formats(formats, ('width', 'height', 'tbr', 'format_id'))
+        self._sort_formats(formats)
 
         video_id = compat_str(talk_info['id'])
 
@@ -267,7 +272,11 @@ class TEDIE(InfoExtractor):
 
         config_json = self._html_search_regex(
             r'"pages\.jwplayer"\s*,\s*({.+?})\s*\)\s*</script>',
-            webpage, 'config')
+            webpage, 'config', default=None)
+        if not config_json:
+            embed_url = self._search_regex(
+                r"<iframe[^>]+class='pages-video-embed__video__object'[^>]+src='([^']+)'", webpage, 'embed url')
+            return self.url_result(self._proto_relative_url(embed_url))
         config = json.loads(config_json)['config']
         video_url = config['video']['url']
         thumbnail = config.get('image', {}).get('url')

From 89c0dc9a5fadc3927f7c03f5829e4f2ef8555888 Mon Sep 17 00:00:00 2001
From: BlahGeek <i@BlahGeek.com>
Date: Sat, 30 Apr 2016 21:32:54 +0800
Subject: [PATCH 21/50] [xiami] Add xiami extractor

---
 youtube_dl/extractor/extractors.py |   6 ++
 youtube_dl/extractor/xiami.py      | 161 +++++++++++++++++++++++++++++
 2 files changed, 167 insertions(+)
 create mode 100644 youtube_dl/extractor/xiami.py

diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py
index b1b7f9b42..14ca9eaee 100644
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@@ -941,6 +941,12 @@ from .xhamster import (
     XHamsterIE,
     XHamsterEmbedIE,
 )
+from .xiami import (
+    XiamiIE,
+    XiamiAlbumIE,
+    XiamiArtistIE,
+    XiamiCollectionIE
+)
 from .xminus import XMinusIE
 from .xnxx import XNXXIE
 from .xstream import XstreamIE
diff --git a/youtube_dl/extractor/xiami.py b/youtube_dl/extractor/xiami.py
new file mode 100644
index 000000000..a28d63c48
--- /dev/null
+++ b/youtube_dl/extractor/xiami.py
@@ -0,0 +1,161 @@
+# -*- coding: utf-8 -*-
+
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+    xpath_element,
+    xpath_text,
+    xpath_with_ns,
+    int_or_none,
+    ExtractorError
+)
+from ..compat import compat_urllib_parse_unquote
+
+
+class XiamiBaseIE(InfoExtractor):
+
+    _XML_BASE_URL = 'http://www.xiami.com/song/playlist/id'
+    _NS_MAP = {'xm': 'http://xspf.org/ns/0/'}
+
+    def _extract_track(self, track):
+        artist = xpath_text(track, xpath_with_ns('xm:artist', self._NS_MAP), default='')
+        artist = artist.split(';')
+
+        ret = {
+            'id': xpath_text(track, xpath_with_ns('xm:song_id', self._NS_MAP)),
+            'title': xpath_text(track, xpath_with_ns('xm:title', self._NS_MAP)),
+            'album': xpath_text(track, xpath_with_ns('xm:album_name', self._NS_MAP)),
+            'artist': ';'.join(artist) if artist else None,
+            'creator': artist[0] if artist else None,
+            'url': self._decrypt(xpath_text(track, xpath_with_ns('xm:location', self._NS_MAP))),
+            'thumbnail': xpath_text(track, xpath_with_ns('xm:pic', self._NS_MAP), default=None),
+            'duration': int_or_none(xpath_text(track, xpath_with_ns('xm:length', self._NS_MAP))),
+        }
+
+        lyrics_url = xpath_text(track, xpath_with_ns('xm:lyric', self._NS_MAP))
+        if lyrics_url and lyrics_url.endswith('.lrc'):
+            ret['description'] = self._download_webpage(lyrics_url, ret['id'])
+        return ret
+
+    def _extract_xml(self, _id, typ=''):
+        playlist = self._download_xml('%s/%s%s' % (self._XML_BASE_URL, _id, typ), _id)
+        tracklist = xpath_element(playlist, xpath_with_ns('./xm:trackList', self._NS_MAP))
+
+        if not len(tracklist):
+            raise ExtractorError('No track found')
+        return [self._extract_track(track) for track in tracklist]
+
+    @staticmethod
+    def _decrypt(origin):
+        n = int(origin[0])
+        origin = origin[1:]
+        short_lenth = len(origin) // n
+        long_num = len(origin) - short_lenth * n
+        l = tuple()
+        for i in range(0, n):
+            length = short_lenth
+            if i < long_num:
+                length += 1
+            l += (origin[0:length], )
+            origin = origin[length:]
+        ans = ''
+        for i in range(0, short_lenth + 1):
+            for j in range(0, n):
+                if len(l[j])>i:
+                    ans += l[j][i]
+        return compat_urllib_parse_unquote(ans).replace('^', '0')
+
+
+class XiamiIE(XiamiBaseIE):
+    IE_NAME = 'xiami:song'
+    IE_DESC = '虾米音乐'
+    _VALID_URL = r'http://www\.xiami\.com/song/(?P<id>[0-9]+)'
+    _TESTS = [
+        {
+            'url': 'http://www.xiami.com/song/1775610518',
+            'md5': '521dd6bea40fd5c9c69f913c232cb57e',
+            'info_dict': {
+                'id': '1775610518',
+                'ext': 'mp3',
+                'title': 'Woman',
+                'creator': 'HONNE',
+                'album': 'Woman',
+                'thumbnail': r're:http://img\.xiami\.net/images/album/.*\.jpg',
+                'description': 'md5:052ec7de41ca19f67e7fd70a1bfc4e0b',
+            }
+        },
+        {
+            'url': 'http://www.xiami.com/song/1775256504',
+            'md5': '932a3abd45c6aa2b1fdbe028fcb4c4fc',
+            'info_dict': {
+                'id': '1775256504',
+                'ext': 'mp3',
+                'title': '悟空',
+                'creator': '戴荃',
+                'album': '悟空',
+                'description': 'md5:206e67e84f9bed1d473d04196a00b990',
+            }
+        },
+    ]
+
+    def _real_extract(self, url):
+        _id = self._match_id(url)
+        return self._extract_xml(_id)[0]
+
+
+class XiamiAlbumIE(XiamiBaseIE):
+    IE_NAME = 'xiami:album'
+    IE_DESC = '虾米音乐 - 专辑'
+    _VALID_URL = r'http://www\.xiami\.com/album/(?P<id>[0-9]+)'
+    _TESTS = [
+        {
+            'url': 'http://www.xiami.com/album/2100300444',
+            'info_dict': {
+                'id': '2100300444',
+            },
+            'playlist_count': 10,
+        },
+        {
+            'url': 'http://www.xiami.com/album/512288?spm=a1z1s.6843761.1110925389.6.hhE9p9',
+            'only_matching': True,
+        }
+    ]
+
+    def _real_extract(self, url):
+        _id = self._match_id(url)
+        return self.playlist_result(self._extract_xml(_id, '/type/1'), _id)
+
+
+class XiamiArtistIE(XiamiBaseIE):
+    IE_NAME = 'xiami:artist'
+    IE_DESC = '虾米音乐 - 歌手'
+    _VALID_URL = r'http://www\.xiami\.com/artist/(?P<id>[0-9]+)'
+    _TEST = {
+        'url': 'http://www.xiami.com/artist/2132?spm=0.0.0.0.dKaScp',
+        'info_dict': {
+            'id': '2132',
+        },
+        'playlist_count': 20,
+    }
+
+    def _real_extract(self, url):
+        _id = self._match_id(url)
+        return self.playlist_result(self._extract_xml(_id, '/type/2'), _id)
+
+
+class XiamiCollectionIE(XiamiBaseIE):
+    IE_NAME = 'xiami:collection'
+    IE_DESC = '虾米音乐 - 精选集'
+    _VALID_URL = r'http://www\.xiami\.com/collect/(?P<id>[0-9]+)'
+    _TEST = {
+        'url': 'http://www.xiami.com/collect/156527391?spm=a1z1s.2943601.6856193.12.4jpBnr',
+        'info_dict': {
+            'id': '156527391',
+        },
+        'playlist_count': 26,
+    }
+
+    def _real_extract(self, url):
+        _id = self._match_id(url)
+        return self.playlist_result(self._extract_xml(_id, '/type/3'), _id)

From 4e0c0c1508810eb494cd32ef00fb75d03d03ce6f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com>
Date: Sat, 30 Apr 2016 21:50:23 +0600
Subject: [PATCH 22/50] [xiami] Improve extraction (Closes #9079)

* Switch to JSON source
* Add abstract IE for playlists
* Extract more track related metadata
---
 youtube_dl/extractor/extractors.py |   2 +-
 youtube_dl/extractor/xiami.py      | 193 ++++++++++++++---------------
 2 files changed, 96 insertions(+), 99 deletions(-)

diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py
index 14ca9eaee..737960a01 100644
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@@ -942,7 +942,7 @@ from .xhamster import (
     XHamsterEmbedIE,
 )
 from .xiami import (
-    XiamiIE,
+    XiamiSongIE,
     XiamiAlbumIE,
     XiamiArtistIE,
     XiamiCollectionIE
diff --git a/youtube_dl/extractor/xiami.py b/youtube_dl/extractor/xiami.py
index a28d63c48..e4ed306b4 100644
--- a/youtube_dl/extractor/xiami.py
+++ b/youtube_dl/extractor/xiami.py
@@ -1,50 +1,42 @@
-# -*- coding: utf-8 -*-
-
+# coding: utf-8
 from __future__ import unicode_literals
 
 from .common import InfoExtractor
-from ..utils import (
-    xpath_element,
-    xpath_text,
-    xpath_with_ns,
-    int_or_none,
-    ExtractorError
-)
 from ..compat import compat_urllib_parse_unquote
+from ..utils import int_or_none
 
 
 class XiamiBaseIE(InfoExtractor):
+    _API_BASE_URL = 'http://www.xiami.com/song/playlist/cat/json/id'
 
-    _XML_BASE_URL = 'http://www.xiami.com/song/playlist/id'
-    _NS_MAP = {'xm': 'http://xspf.org/ns/0/'}
+    def _extract_track(self, track, track_id=None):
+        title = track['title']
+        track_url = self._decrypt(track['location'])
 
-    def _extract_track(self, track):
-        artist = xpath_text(track, xpath_with_ns('xm:artist', self._NS_MAP), default='')
-        artist = artist.split(';')
+        subtitles = {}
+        lyrics_url = track.get('lyric_url') or track.get('lyric')
+        if lyrics_url and lyrics_url.startswith('http'):
+            subtitles['origin'] = [{'url': lyrics_url}]
 
-        ret = {
-            'id': xpath_text(track, xpath_with_ns('xm:song_id', self._NS_MAP)),
-            'title': xpath_text(track, xpath_with_ns('xm:title', self._NS_MAP)),
-            'album': xpath_text(track, xpath_with_ns('xm:album_name', self._NS_MAP)),
-            'artist': ';'.join(artist) if artist else None,
-            'creator': artist[0] if artist else None,
-            'url': self._decrypt(xpath_text(track, xpath_with_ns('xm:location', self._NS_MAP))),
-            'thumbnail': xpath_text(track, xpath_with_ns('xm:pic', self._NS_MAP), default=None),
-            'duration': int_or_none(xpath_text(track, xpath_with_ns('xm:length', self._NS_MAP))),
+        return {
+            'id': track.get('song_id') or track_id,
+            'url': track_url,
+            'title': title,
+            'thumbnail': track.get('pic') or track.get('album_pic'),
+            'duration': int_or_none(track.get('length')),
+            'creator': track.get('artist', '').split(';')[0],
+            'track': title,
+            'album': track.get('album_name'),
+            'artist': track.get('artist'),
+            'subtitles': subtitles,
         }
 
-        lyrics_url = xpath_text(track, xpath_with_ns('xm:lyric', self._NS_MAP))
-        if lyrics_url and lyrics_url.endswith('.lrc'):
-            ret['description'] = self._download_webpage(lyrics_url, ret['id'])
-        return ret
-
-    def _extract_xml(self, _id, typ=''):
-        playlist = self._download_xml('%s/%s%s' % (self._XML_BASE_URL, _id, typ), _id)
-        tracklist = xpath_element(playlist, xpath_with_ns('./xm:trackList', self._NS_MAP))
-
-        if not len(tracklist):
-            raise ExtractorError('No track found')
-        return [self._extract_track(track) for track in tracklist]
+    def _extract_tracks(self, item_id, typ=None):
+        playlist = self._download_json(
+            '%s/%s%s' % (self._API_BASE_URL, item_id, '/type/%s' % typ if typ else ''), item_id)
+        return [
+            self._extract_track(track, item_id)
+            for track in playlist['data']['trackList']]
 
     @staticmethod
     def _decrypt(origin):
@@ -62,75 +54,87 @@ class XiamiBaseIE(InfoExtractor):
         ans = ''
         for i in range(0, short_lenth + 1):
             for j in range(0, n):
-                if len(l[j])>i:
+                if len(l[j]) > i:
                     ans += l[j][i]
         return compat_urllib_parse_unquote(ans).replace('^', '0')
 
 
-class XiamiIE(XiamiBaseIE):
+class XiamiSongIE(XiamiBaseIE):
     IE_NAME = 'xiami:song'
     IE_DESC = '虾米音乐'
-    _VALID_URL = r'http://www\.xiami\.com/song/(?P<id>[0-9]+)'
-    _TESTS = [
-        {
-            'url': 'http://www.xiami.com/song/1775610518',
-            'md5': '521dd6bea40fd5c9c69f913c232cb57e',
-            'info_dict': {
-                'id': '1775610518',
-                'ext': 'mp3',
-                'title': 'Woman',
-                'creator': 'HONNE',
-                'album': 'Woman',
-                'thumbnail': r're:http://img\.xiami\.net/images/album/.*\.jpg',
-                'description': 'md5:052ec7de41ca19f67e7fd70a1bfc4e0b',
-            }
-        },
-        {
-            'url': 'http://www.xiami.com/song/1775256504',
-            'md5': '932a3abd45c6aa2b1fdbe028fcb4c4fc',
-            'info_dict': {
-                'id': '1775256504',
-                'ext': 'mp3',
-                'title': '悟空',
-                'creator': '戴荃',
-                'album': '悟空',
-                'description': 'md5:206e67e84f9bed1d473d04196a00b990',
-            }
-        },
-    ]
+    _VALID_URL = r'https?://(?:www\.)?xiami\.com/song/(?P<id>[0-9]+)'
+    _TESTS = [{
+        'url': 'http://www.xiami.com/song/1775610518',
+        'md5': '521dd6bea40fd5c9c69f913c232cb57e',
+        'info_dict': {
+            'id': '1775610518',
+            'ext': 'mp3',
+            'title': 'Woman',
+            'thumbnail': r're:http://img\.xiami\.net/images/album/.*\.jpg',
+            'duration': 265,
+            'creator': 'HONNE',
+            'track': 'Woman',
+            'album': 'Woman',
+            'artist': 'HONNE',
+            'subtitles': {
+                'origin': [{
+                    'ext': 'lrc',
+                }],
+            },
+        }
+    }, {
+        'url': 'http://www.xiami.com/song/1775256504',
+        'md5': '932a3abd45c6aa2b1fdbe028fcb4c4fc',
+        'info_dict': {
+            'id': '1775256504',
+            'ext': 'mp3',
+            'title': '悟空',
+            'thumbnail': r're:http://img\.xiami\.net/images/album/.*\.jpg',
+            'duration': 200,
+            'creator': '戴荃',
+            'track': '悟空',
+            'album': '悟空',
+            'artist': '戴荃',
+            'subtitles': {
+                'origin': [{
+                    'ext': 'lrc',
+                }],
+            },
+        }
+    }]
 
     def _real_extract(self, url):
-        _id = self._match_id(url)
-        return self._extract_xml(_id)[0]
+        return self._extract_tracks(self._match_id(url))[0]
 
 
-class XiamiAlbumIE(XiamiBaseIE):
+class XiamiPlaylistBaseIE(XiamiBaseIE):
+    def _real_extract(self, url):
+        item_id = self._match_id(url)
+        return self.playlist_result(self._extract_tracks(item_id, self._TYPE), item_id)
+
+
+class XiamiAlbumIE(XiamiPlaylistBaseIE):
     IE_NAME = 'xiami:album'
     IE_DESC = '虾米音乐 - 专辑'
-    _VALID_URL = r'http://www\.xiami\.com/album/(?P<id>[0-9]+)'
-    _TESTS = [
-        {
-            'url': 'http://www.xiami.com/album/2100300444',
-            'info_dict': {
-                'id': '2100300444',
-            },
-            'playlist_count': 10,
+    _VALID_URL = r'https?://(?:www\.)?xiami\.com/album/(?P<id>[0-9]+)'
+    _TYPE = '1'
+    _TESTS = [{
+        'url': 'http://www.xiami.com/album/2100300444',
+        'info_dict': {
+            'id': '2100300444',
         },
-        {
-            'url': 'http://www.xiami.com/album/512288?spm=a1z1s.6843761.1110925389.6.hhE9p9',
-            'only_matching': True,
-        }
-    ]
-
-    def _real_extract(self, url):
-        _id = self._match_id(url)
-        return self.playlist_result(self._extract_xml(_id, '/type/1'), _id)
+        'playlist_count': 10,
+    }, {
+        'url': 'http://www.xiami.com/album/512288?spm=a1z1s.6843761.1110925389.6.hhE9p9',
+        'only_matching': True,
+    }]
 
 
-class XiamiArtistIE(XiamiBaseIE):
+class XiamiArtistIE(XiamiPlaylistBaseIE):
     IE_NAME = 'xiami:artist'
     IE_DESC = '虾米音乐 - 歌手'
-    _VALID_URL = r'http://www\.xiami\.com/artist/(?P<id>[0-9]+)'
+    _VALID_URL = r'https?://(?:www\.)?xiami\.com/artist/(?P<id>[0-9]+)'
+    _TYPE = '2'
     _TEST = {
         'url': 'http://www.xiami.com/artist/2132?spm=0.0.0.0.dKaScp',
         'info_dict': {
@@ -139,23 +143,16 @@ class XiamiArtistIE(XiamiBaseIE):
         'playlist_count': 20,
     }
 
-    def _real_extract(self, url):
-        _id = self._match_id(url)
-        return self.playlist_result(self._extract_xml(_id, '/type/2'), _id)
 
-
-class XiamiCollectionIE(XiamiBaseIE):
+class XiamiCollectionIE(XiamiPlaylistBaseIE):
     IE_NAME = 'xiami:collection'
     IE_DESC = '虾米音乐 - 精选集'
-    _VALID_URL = r'http://www\.xiami\.com/collect/(?P<id>[0-9]+)'
+    _VALID_URL = r'https?://(?:www\.)?xiami\.com/collect/(?P<id>[0-9]+)'
+    _TYPE = '3'
     _TEST = {
         'url': 'http://www.xiami.com/collect/156527391?spm=a1z1s.2943601.6856193.12.4jpBnr',
         'info_dict': {
             'id': '156527391',
         },
-        'playlist_count': 26,
+        'playlist_mincount': 29,
     }
-
-    def _real_extract(self, url):
-        _id = self._match_id(url)
-        return self.playlist_result(self._extract_xml(_id, '/type/3'), _id)

From 33a1ec950c97b03e742926f3d37e9aa4ce642633 Mon Sep 17 00:00:00 2001
From: remitamine <remitamine@gmail.com>
Date: Sat, 30 Apr 2016 20:38:45 +0100
Subject: [PATCH 23/50] [discovery] extract http formats

---
 youtube_dl/extractor/discovery.py | 50 +++++++++++++++++++++++++++++--
 1 file changed, 47 insertions(+), 3 deletions(-)

diff --git a/youtube_dl/extractor/discovery.py b/youtube_dl/extractor/discovery.py
index 5f1275b39..6d1f8e670 100644
--- a/youtube_dl/extractor/discovery.py
+++ b/youtube_dl/extractor/discovery.py
@@ -1,11 +1,16 @@
 from __future__ import unicode_literals
 
+import re
+
 from .common import InfoExtractor
 from ..utils import (
     parse_duration,
     parse_iso8601,
 )
-from ..compat import compat_str
+from ..compat import (
+    compat_str,
+    compat_urlparse,
+)
 
 
 class DiscoveryIE(InfoExtractor):
@@ -66,9 +71,48 @@ class DiscoveryIE(InfoExtractor):
         entries = []
 
         for idx, video_info in enumerate(info['playlist']):
-            formats = self._extract_m3u8_formats(
-                video_info['src'], display_id, 'mp4', 'm3u8_native', m3u8_id='hls',
+            m3u8_url = video_info['src']
+            formats = m3u8_formats = self._extract_m3u8_formats(
+                m3u8_url, display_id, 'mp4', 'm3u8_native', m3u8_id='hls',
                 note='Download m3u8 information for video %d' % (idx + 1))
+            qualities_basename = self._search_regex(
+                '/([^/]+)\.csmil/', m3u8_url, 'qualities basename', default=None)
+            if qualities_basename:
+                m3u8_path = compat_urlparse.urlparse(m3u8_url).path
+                QUALITIES_RE = r'((,\d+k)+,?)'
+                qualities = self._search_regex(
+                    QUALITIES_RE, qualities_basename,
+                    'qualities', default=None)
+                if qualities:
+                    qualities = list(map(lambda q: int(q[:-1]), qualities.strip(',').split(',')))
+                    qualities.sort()
+                    http_path = m3u8_path[1:].split('/', 1)[1]
+                    http_template = re.sub(QUALITIES_RE, r'%dk', http_path)
+                    http_template = http_template.replace('.csmil/master.m3u8', '')
+                    http_template = compat_urlparse.urljoin(
+                        'http://discsmil.edgesuite.net/', http_template)
+                    if m3u8_formats:
+                        self._sort_formats(m3u8_formats)
+                        m3u8_formats = list(filter(
+                            lambda f: f.get('vcodec') != 'none' and f.get('resolution') != 'multiple',
+                            m3u8_formats))
+                    if len(qualities) == len(m3u8_formats):
+                        for q, m3u8_format in zip(qualities, m3u8_formats):
+                            f = m3u8_format.copy()
+                            f.update({
+                                'url': http_template % q,
+                                'format_id': f['format_id'].replace('hls', 'http'),
+                                'protocol': 'http',
+                            })
+                            formats.append(f)
+                    else:
+                        for q in qualities:
+                            formats.append({
+                                'url': http_template % q,
+                                'ext': 'mp4',
+                                'format_id': 'http-%d' % q,
+                                'tbr': q,
+                            })
             self._sort_formats(formats)
             entries.append({
                 'id': compat_str(video_info['id']),

From 93f7a31bf3b572a98982a380ae3167ce954adc04 Mon Sep 17 00:00:00 2001
From: remitamine <remitamine@gmail.com>
Date: Sat, 30 Apr 2016 20:49:09 +0100
Subject: [PATCH 24/50] [discovery] extract subtitle

---
 youtube_dl/extractor/discovery.py | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/youtube_dl/extractor/discovery.py b/youtube_dl/extractor/discovery.py
index 6d1f8e670..87fb29b02 100644
--- a/youtube_dl/extractor/discovery.py
+++ b/youtube_dl/extractor/discovery.py
@@ -114,6 +114,16 @@ class DiscoveryIE(InfoExtractor):
                                 'tbr': q,
                             })
             self._sort_formats(formats)
+
+            subtitles = []
+            caption_url = video_info.get('captionsUrl')
+            if caption_url:
+                subtitles = {
+                    'en': [{
+                        'url': caption_url,
+                    }]
+                }
+
             entries.append({
                 'id': compat_str(video_info['id']),
                 'formats': formats,
@@ -124,6 +134,7 @@ class DiscoveryIE(InfoExtractor):
                 'thumbnail': video_info.get('thumbnailURL'),
                 'alt_title': video_info.get('secondary_title'),
                 'timestamp': parse_iso8601(video_info.get('publishedDate')),
+                'subtitles': subtitles,
             })
 
         return self.playlist_result(entries, display_id, video_title)

From d00b93d58c2b78ce02fbf6c8e7ea556cd06ac3b6 Mon Sep 17 00:00:00 2001
From: remitamine <remitamine@gmail.com>
Date: Sat, 30 Apr 2016 21:49:32 +0100
Subject: [PATCH 25/50] [discovery] extract more info using BrightcoveNewIE

---
 youtube_dl/extractor/discovery.py | 52 +++++--------------------------
 1 file changed, 7 insertions(+), 45 deletions(-)

diff --git a/youtube_dl/extractor/discovery.py b/youtube_dl/extractor/discovery.py
index 87fb29b02..9dd631752 100644
--- a/youtube_dl/extractor/discovery.py
+++ b/youtube_dl/extractor/discovery.py
@@ -38,6 +38,7 @@ class DiscoveryIE(InfoExtractor):
             'duration': 156,
             'timestamp': 1302032462,
             'upload_date': '20110405',
+            'uploader_id': '103207',
         },
         'params': {
             'skip_download': True,  # requires ffmpeg
@@ -59,7 +60,11 @@ class DiscoveryIE(InfoExtractor):
             'upload_date': '20140725',
             'timestamp': 1406246400,
             'duration': 116,
+            'uploader_id': '103207',
         },
+        'params': {
+            'skip_download': True,  # requires ffmpeg
+        }
     }]
 
     def _real_extract(self, url):
@@ -71,50 +76,6 @@ class DiscoveryIE(InfoExtractor):
         entries = []
 
         for idx, video_info in enumerate(info['playlist']):
-            m3u8_url = video_info['src']
-            formats = m3u8_formats = self._extract_m3u8_formats(
-                m3u8_url, display_id, 'mp4', 'm3u8_native', m3u8_id='hls',
-                note='Download m3u8 information for video %d' % (idx + 1))
-            qualities_basename = self._search_regex(
-                '/([^/]+)\.csmil/', m3u8_url, 'qualities basename', default=None)
-            if qualities_basename:
-                m3u8_path = compat_urlparse.urlparse(m3u8_url).path
-                QUALITIES_RE = r'((,\d+k)+,?)'
-                qualities = self._search_regex(
-                    QUALITIES_RE, qualities_basename,
-                    'qualities', default=None)
-                if qualities:
-                    qualities = list(map(lambda q: int(q[:-1]), qualities.strip(',').split(',')))
-                    qualities.sort()
-                    http_path = m3u8_path[1:].split('/', 1)[1]
-                    http_template = re.sub(QUALITIES_RE, r'%dk', http_path)
-                    http_template = http_template.replace('.csmil/master.m3u8', '')
-                    http_template = compat_urlparse.urljoin(
-                        'http://discsmil.edgesuite.net/', http_template)
-                    if m3u8_formats:
-                        self._sort_formats(m3u8_formats)
-                        m3u8_formats = list(filter(
-                            lambda f: f.get('vcodec') != 'none' and f.get('resolution') != 'multiple',
-                            m3u8_formats))
-                    if len(qualities) == len(m3u8_formats):
-                        for q, m3u8_format in zip(qualities, m3u8_formats):
-                            f = m3u8_format.copy()
-                            f.update({
-                                'url': http_template % q,
-                                'format_id': f['format_id'].replace('hls', 'http'),
-                                'protocol': 'http',
-                            })
-                            formats.append(f)
-                    else:
-                        for q in qualities:
-                            formats.append({
-                                'url': http_template % q,
-                                'ext': 'mp4',
-                                'format_id': 'http-%d' % q,
-                                'tbr': q,
-                            })
-            self._sort_formats(formats)
-
             subtitles = []
             caption_url = video_info.get('captionsUrl')
             if caption_url:
@@ -125,8 +86,9 @@ class DiscoveryIE(InfoExtractor):
                 }
 
             entries.append({
+                '_type': 'url_transparent',
+                'url': 'http://players.brightcove.net/103207/default_default/index.html?videoId=ref:%s' % video_info['referenceId'],
                 'id': compat_str(video_info['id']),
-                'formats': formats,
                 'title': video_info['title'],
                 'description': video_info.get('description'),
                 'duration': parse_duration(video_info.get('video_length')),

From ea7e7fecbd5da6866be003ea1ce5072dbe0118ae Mon Sep 17 00:00:00 2001
From: remitamine <remitamine@gmail.com>
Date: Sat, 30 Apr 2016 21:55:28 +0100
Subject: [PATCH 26/50] [discovery] remove unused imports

---
 youtube_dl/extractor/discovery.py | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/youtube_dl/extractor/discovery.py b/youtube_dl/extractor/discovery.py
index 9dd631752..7c554ec14 100644
--- a/youtube_dl/extractor/discovery.py
+++ b/youtube_dl/extractor/discovery.py
@@ -1,16 +1,11 @@
 from __future__ import unicode_literals
 
-import re
-
 from .common import InfoExtractor
 from ..utils import (
     parse_duration,
     parse_iso8601,
 )
-from ..compat import (
-    compat_str,
-    compat_urlparse,
-)
+from ..compat import compat_str
 
 
 class DiscoveryIE(InfoExtractor):

From 339fe7228ae149db9fc163c94bef168f65a0a775 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com>
Date: Sun, 1 May 2016 02:53:42 +0600
Subject: [PATCH 27/50] [tagesschau] Update _FORMATS map

---
 youtube_dl/extractor/tagesschau.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/youtube_dl/extractor/tagesschau.py b/youtube_dl/extractor/tagesschau.py
index 73e7657d4..ebd81eadc 100644
--- a/youtube_dl/extractor/tagesschau.py
+++ b/youtube_dl/extractor/tagesschau.py
@@ -64,9 +64,12 @@ class TagesschauIE(InfoExtractor):
     }]
 
     _FORMATS = {
-        's': {'width': 256, 'height': 144, 'quality': 1},
+        'xs': {'quality': 0},
+        's': {'width': 320, 'height': 180, 'quality': 1},
         'm': {'width': 512, 'height': 288, 'quality': 2},
-        'l': {'width': 960, 'height': 544, 'quality': 3},
+        'l': {'width': 960, 'height': 540, 'quality': 3},
+        'xl': {'width': 1280, 'height': 720, 'quality': 4},
+        'xxl': {'quality': 5},
     }
 
     def _real_extract(self, url):

From fc35cd9e0c7ec88fedd90880dea23d593fed85ee Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com>
Date: Sun, 1 May 2016 02:54:39 +0600
Subject: [PATCH 28/50] [tagesschau] Relax _VALID_URL

---
 youtube_dl/extractor/tagesschau.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/youtube_dl/extractor/tagesschau.py b/youtube_dl/extractor/tagesschau.py
index ebd81eadc..fcccb230c 100644
--- a/youtube_dl/extractor/tagesschau.py
+++ b/youtube_dl/extractor/tagesschau.py
@@ -8,7 +8,7 @@ from ..utils import parse_filesize
 
 
 class TagesschauIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?tagesschau\.de/multimedia/(?:[^/]+/)*?[^/#?]+?(?P<id>-?[0-9]+)(?:~_[^/#?]+?)?\.html'
+    _VALID_URL = r'https?://(?:www\.)?tagesschau\.de/multimedia/(?:[^/]+/)*?[^/#?]+?(?P<id>-?[0-9]+)(?:~_?[^/#?]+?)?\.html'
 
     _TESTS = [{
         'url': 'http://www.tagesschau.de/multimedia/video/video-102143.html',
@@ -61,6 +61,9 @@ class TagesschauIE(InfoExtractor):
     }, {
         'url': 'http://www.tagesschau.de/multimedia/video/video-102303~_bab-sendung-211.html',
         'only_matching': True,
+    }, {
+        'url': 'http://www.tagesschau.de/multimedia/video/video-179517~player.html',
+        'only_matching': True,
     }]
 
     _FORMATS = {

From 9e1b96ae400c70b1ecfc2d8917510def2ed23a6e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com>
Date: Sun, 1 May 2016 03:20:36 +0600
Subject: [PATCH 29/50] [rtlnl] Match formats only by height

---
 youtube_dl/extractor/rtlnl.py | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/youtube_dl/extractor/rtlnl.py b/youtube_dl/extractor/rtlnl.py
index 8598b5840..4d612b5e3 100644
--- a/youtube_dl/extractor/rtlnl.py
+++ b/youtube_dl/extractor/rtlnl.py
@@ -125,10 +125,12 @@ class RtlNlIE(InfoExtractor):
                 try:
                     # Find hls format with the same width and height corresponding
                     # to progressive format and copy metadata from it.
-                    f = next(f for f in formats
-                             if f.get('width') == width and f.get('height') == height).copy()
-                    f.update(pg_format(format_id, width, height))
-                    pg_formats.append(f)
+                    f = next(f for f in formats if f.get('height') == height)
+                    # hls formats may have invalid width
+                    f['width'] = width
+                    f_copy = f.copy()
+                    f_copy.update(pg_format(format_id, width, height))
+                    pg_formats.append(f_copy)
                 except StopIteration:
                     # Missing hls format does mean that no progressive format with
                     # such width and height exists either.

From 4c1b2e5c0ea6a041bfd773efd7c4ac78ac8f3b4b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com>
Date: Sun, 1 May 2016 04:18:56 +0600
Subject: [PATCH 30/50] [tagesschau] Add support for playlists

---
 youtube_dl/extractor/tagesschau.py | 110 ++++++++++++++++++-----------
 1 file changed, 68 insertions(+), 42 deletions(-)

diff --git a/youtube_dl/extractor/tagesschau.py b/youtube_dl/extractor/tagesschau.py
index fcccb230c..e58385c57 100644
--- a/youtube_dl/extractor/tagesschau.py
+++ b/youtube_dl/extractor/tagesschau.py
@@ -8,11 +8,11 @@ from ..utils import parse_filesize
 
 
 class TagesschauIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?tagesschau\.de/multimedia/(?:[^/]+/)*?[^/#?]+?(?P<id>-?[0-9]+)(?:~_?[^/#?]+?)?\.html'
+    _VALID_URL = r'https?://(?:www\.)?tagesschau\.de/[^/]+/(?:[^/]+/)*?[^/#?]+?(?P<id>-?[0-9]+)(?:~_?[^/#?]+?)?\.html'
 
     _TESTS = [{
         'url': 'http://www.tagesschau.de/multimedia/video/video-102143.html',
-        'md5': '917a228bc7df7850783bc47979673a09',
+        'md5': 'f7c27a0eff3bfe8c7727e65f8fe1b1e6',
         'info_dict': {
             'id': '102143',
             'ext': 'mp4',
@@ -40,6 +40,13 @@ class TagesschauIE(InfoExtractor):
             'description': 'Flüchtlingsdebatte: Hitzig, aber wenig hilfreich',
             'thumbnail': 're:^https?:.*\.jpg$',
         },
+    }, {
+        'url': 'http://www.tagesschau.de/inland/afd-parteitag-135.html',
+        'info_dict': {
+            'id': '135',
+            'title': 'Möchtegern-Underdog mit Machtanspruch',
+        },
+        'playlist_count': 2,
     }, {
         'url': 'http://www.tagesschau.de/multimedia/sendung/tsg-3771.html',
         'only_matching': True,
@@ -75,6 +82,41 @@ class TagesschauIE(InfoExtractor):
         'xxl': {'quality': 5},
     }
 
+    def _extract_formats(self, download_text):
+        links = re.finditer(
+            r'<div class="button" title="(?P<title>[^"]*)"><a href="(?P<url>[^"]+)">(?P<name>.+?)</a></div>',
+            download_text)
+        formats = []
+        for l in links:
+            format_id = self._search_regex(
+                r'.*/[^/.]+\.([^/]+)\.[^/.]+', l.group('url'), 'format ID')
+            format = {
+                'format_id': format_id,
+                'url': l.group('url'),
+                'format_name': l.group('name'),
+            }
+            m = re.match(
+                r'''(?x)
+                    Video:\s*(?P<vcodec>[a-zA-Z0-9/._-]+)\s*&\#10;
+                    (?P<width>[0-9]+)x(?P<height>[0-9]+)px&\#10;
+                    (?P<vbr>[0-9]+)kbps&\#10;
+                    Audio:\s*(?P<abr>[0-9]+)kbps,\s*(?P<audio_desc>[A-Za-z\.0-9]+)&\#10;
+                    Gr&ouml;&szlig;e:\s*(?P<filesize_approx>[0-9.,]+\s+[a-zA-Z]*B)''',
+                l.group('title'))
+            if m:
+                format.update({
+                    'format_note': m.group('audio_desc'),
+                    'vcodec': m.group('vcodec'),
+                    'width': int(m.group('width')),
+                    'height': int(m.group('height')),
+                    'abr': int(m.group('abr')),
+                    'vbr': int(m.group('vbr')),
+                    'filesize_approx': parse_filesize(m.group('filesize_approx')),
+                })
+            formats.append(format)
+        self._sort_formats(formats)
+        return formats
+
     def _real_extract(self, url):
         video_id = self._match_id(url)
         display_id = video_id.lstrip('-')
@@ -94,14 +136,14 @@ class TagesschauIE(InfoExtractor):
                         (?:,\s*quality:(?P<q_quality>["\'])(?P<quality>.+?)(?P=q_quality))?
                     ''', playerpage):
                 url = media.group('url')
-                type_ = media.group('type')
+                webpage_type = media.group('type')
                 ext = media.group('ext')
                 res = media.group('quality')
                 f = {
                     'format_id': '%s_%s' % (res, ext) if res else ext,
                     'url': url,
                     'ext': ext,
-                    'vcodec': 'none' if type_ == 'audio' else None,
+                    'vcodec': 'none' if webpage_type == 'audio' else None,
                 }
                 f.update(self._FORMATS.get(res, {}))
                 formats.append(f)
@@ -109,47 +151,31 @@ class TagesschauIE(InfoExtractor):
             title = self._og_search_title(webpage).strip()
             description = self._og_search_description(webpage).strip()
         else:
-            download_text = self._search_regex(
-                r'(?s)<p>Wir bieten dieses Video in folgenden Formaten zum Download an:</p>\s*<div class="controls">(.*?)</div>\s*<p>',
-                webpage, 'download links')
-            links = re.finditer(
-                r'<div class="button" title="(?P<title>[^"]*)"><a href="(?P<url>[^"]+)">(?P<name>.+?)</a></div>',
-                download_text)
-            formats = []
-            for l in links:
-                format_id = self._search_regex(
-                    r'.*/[^/.]+\.([^/]+)\.[^/.]+', l.group('url'), 'format ID')
-                format = {
-                    'format_id': format_id,
-                    'url': l.group('url'),
-                    'format_name': l.group('name'),
-                }
-                m = re.match(
-                    r'''(?x)
-                        Video:\s*(?P<vcodec>[a-zA-Z0-9/._-]+)\s*&\#10;
-                        (?P<width>[0-9]+)x(?P<height>[0-9]+)px&\#10;
-                        (?P<vbr>[0-9]+)kbps&\#10;
-                        Audio:\s*(?P<abr>[0-9]+)kbps,\s*(?P<audio_desc>[A-Za-z\.0-9]+)&\#10;
-                        Gr&ouml;&szlig;e:\s*(?P<filesize_approx>[0-9.,]+\s+[a-zA-Z]*B)''',
-                    l.group('title'))
-                if m:
-                    format.update({
-                        'format_note': m.group('audio_desc'),
-                        'vcodec': m.group('vcodec'),
-                        'width': int(m.group('width')),
-                        'height': int(m.group('height')),
-                        'abr': int(m.group('abr')),
-                        'vbr': int(m.group('vbr')),
-                        'filesize_approx': parse_filesize(m.group('filesize_approx')),
-                    })
-                formats.append(format)
-            thumbnail = self._og_search_thumbnail(webpage)
-            description = self._html_search_regex(
-                r'(?s)<p class="teasertext">(.*?)</p>',
-                webpage, 'description', default=None)
             title = self._html_search_regex(
                 r'<span class="headline".*?>(.*?)</span>', webpage, 'title')
 
+            DOWNLOAD_REGEX = r'(?s)<p>Wir bieten dieses Video in folgenden Formaten zum Download an:</p>\s*<div class="controls">(.*?)</div>\s*<p>'
+
+            webpage_type = self._og_search_property('type', webpage, default=None)
+            if webpage_type == 'website':  # Article
+                entries = []
+                for num, (entry_title, download_text) in enumerate(re.findall(
+                        r'(?s)<p[^>]+class="infotext"[^>]*>.*?<strong>(.+?)</strong>.*?</p>.*?%s' % DOWNLOAD_REGEX,
+                        webpage)):
+                    entries.append({
+                        'id': display_id,
+                        'title': '%s-%d' % (entry_title, num),
+                        'formats': self._extract_formats(download_text),
+                    })
+                return self.playlist_result(entries, display_id, title)
+            else:  # Assume single video
+                download_text = self._search_regex(DOWNLOAD_REGEX, webpage, 'download links')
+                formats = self._extract_formats(download_text)
+                thumbnail = self._og_search_thumbnail(webpage)
+                description = self._html_search_regex(
+                    r'(?s)<p class="teasertext">(.*?)</p>',
+                    webpage, 'description', default=None)
+
         self._sort_formats(formats)
 
         return {

From 1a2b377cc2fa9546fa08a7777a6fc5fc545cc441 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com>
Date: Sun, 1 May 2016 04:38:46 +0600
Subject: [PATCH 31/50] [tagesschau] Fix audio support

---
 youtube_dl/extractor/tagesschau.py | 75 +++++++++++++++++++-----------
 1 file changed, 49 insertions(+), 26 deletions(-)

diff --git a/youtube_dl/extractor/tagesschau.py b/youtube_dl/extractor/tagesschau.py
index e58385c57..ccc2d476d 100644
--- a/youtube_dl/extractor/tagesschau.py
+++ b/youtube_dl/extractor/tagesschau.py
@@ -4,7 +4,10 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
-from ..utils import parse_filesize
+from ..utils import (
+    determine_ext,
+    parse_filesize,
+)
 
 
 class TagesschauIE(InfoExtractor):
@@ -82,37 +85,54 @@ class TagesschauIE(InfoExtractor):
         'xxl': {'quality': 5},
     }
 
-    def _extract_formats(self, download_text):
+    def _extract_formats(self, download_text, media_kind):
         links = re.finditer(
             r'<div class="button" title="(?P<title>[^"]*)"><a href="(?P<url>[^"]+)">(?P<name>.+?)</a></div>',
             download_text)
         formats = []
         for l in links:
+            link_url = l.group('url')
+            if not link_url:
+                continue
             format_id = self._search_regex(
-                r'.*/[^/.]+\.([^/]+)\.[^/.]+', l.group('url'), 'format ID')
+                r'.*/[^/.]+\.([^/]+)\.[^/.]+$', link_url, 'format ID',
+                default=determine_ext(link_url))
             format = {
                 'format_id': format_id,
                 'url': l.group('url'),
                 'format_name': l.group('name'),
             }
-            m = re.match(
-                r'''(?x)
-                    Video:\s*(?P<vcodec>[a-zA-Z0-9/._-]+)\s*&\#10;
-                    (?P<width>[0-9]+)x(?P<height>[0-9]+)px&\#10;
-                    (?P<vbr>[0-9]+)kbps&\#10;
-                    Audio:\s*(?P<abr>[0-9]+)kbps,\s*(?P<audio_desc>[A-Za-z\.0-9]+)&\#10;
-                    Gr&ouml;&szlig;e:\s*(?P<filesize_approx>[0-9.,]+\s+[a-zA-Z]*B)''',
-                l.group('title'))
-            if m:
-                format.update({
-                    'format_note': m.group('audio_desc'),
-                    'vcodec': m.group('vcodec'),
-                    'width': int(m.group('width')),
-                    'height': int(m.group('height')),
-                    'abr': int(m.group('abr')),
-                    'vbr': int(m.group('vbr')),
-                    'filesize_approx': parse_filesize(m.group('filesize_approx')),
-                })
+            title = l.group('title')
+            if title:
+                if media_kind.lower() == 'video':
+                    m = re.match(
+                        r'''(?x)
+                            Video:\s*(?P<vcodec>[a-zA-Z0-9/._-]+)\s*&\#10;
+                            (?P<width>[0-9]+)x(?P<height>[0-9]+)px&\#10;
+                            (?P<vbr>[0-9]+)kbps&\#10;
+                            Audio:\s*(?P<abr>[0-9]+)kbps,\s*(?P<audio_desc>[A-Za-z\.0-9]+)&\#10;
+                            Gr&ouml;&szlig;e:\s*(?P<filesize_approx>[0-9.,]+\s+[a-zA-Z]*B)''',
+                        title)
+                    if m:
+                        format.update({
+                            'format_note': m.group('audio_desc'),
+                            'vcodec': m.group('vcodec'),
+                            'width': int(m.group('width')),
+                            'height': int(m.group('height')),
+                            'abr': int(m.group('abr')),
+                            'vbr': int(m.group('vbr')),
+                            'filesize_approx': parse_filesize(m.group('filesize_approx')),
+                        })
+                else:
+                    m = re.match(
+                        r'(?P<format>.+?)-Format\s*:\s*(?P<abr>\d+)kbps\s*,\s*(?P<note>.+)',
+                        title)
+                    if m:
+                        format.update({
+                            'format_note': '%s, %s' % (m.group('format'), m.group('note')),
+                            'vcodec': 'none',
+                            'abr': int(m.group('abr')),
+                        })
             formats.append(format)
         self._sort_formats(formats)
         return formats
@@ -154,23 +174,26 @@ class TagesschauIE(InfoExtractor):
             title = self._html_search_regex(
                 r'<span class="headline".*?>(.*?)</span>', webpage, 'title')
 
-            DOWNLOAD_REGEX = r'(?s)<p>Wir bieten dieses Video in folgenden Formaten zum Download an:</p>\s*<div class="controls">(.*?)</div>\s*<p>'
+            DOWNLOAD_REGEX = r'(?s)<p>Wir bieten dieses (?P<kind>Video|Audio) in folgenden Formaten zum Download an:</p>\s*<div class="controls">(?P<links>.*?)</div>\s*<p>'
 
             webpage_type = self._og_search_property('type', webpage, default=None)
             if webpage_type == 'website':  # Article
                 entries = []
-                for num, (entry_title, download_text) in enumerate(re.findall(
+                for num, (entry_title, media_kind, download_text) in enumerate(re.findall(
                         r'(?s)<p[^>]+class="infotext"[^>]*>.*?<strong>(.+?)</strong>.*?</p>.*?%s' % DOWNLOAD_REGEX,
                         webpage)):
                     entries.append({
                         'id': display_id,
                         'title': '%s-%d' % (entry_title, num),
-                        'formats': self._extract_formats(download_text),
+                        'formats': self._extract_formats(download_text, media_kind),
                     })
                 return self.playlist_result(entries, display_id, title)
             else:  # Assume single video
-                download_text = self._search_regex(DOWNLOAD_REGEX, webpage, 'download links')
-                formats = self._extract_formats(download_text)
+                download_text = self._search_regex(
+                    DOWNLOAD_REGEX, webpage, 'download links', group='links')
+                media_kind = self._search_regex(
+                    DOWNLOAD_REGEX, webpage, 'media kind', default='Video', group='links')
+                formats = self._extract_formats(download_text, media_kind)
                 thumbnail = self._og_search_thumbnail(webpage)
                 description = self._html_search_regex(
                     r'(?s)<p class="teasertext">(.*?)</p>',

From 2844b093360cf53829e1c127aba0bbc4a6a279a8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com>
Date: Sun, 1 May 2016 04:42:05 +0600
Subject: [PATCH 32/50] [tagesschau] Fix article media ids

---
 youtube_dl/extractor/tagesschau.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/youtube_dl/extractor/tagesschau.py b/youtube_dl/extractor/tagesschau.py
index ccc2d476d..6b71c8f81 100644
--- a/youtube_dl/extractor/tagesschau.py
+++ b/youtube_dl/extractor/tagesschau.py
@@ -181,10 +181,10 @@ class TagesschauIE(InfoExtractor):
                 entries = []
                 for num, (entry_title, media_kind, download_text) in enumerate(re.findall(
                         r'(?s)<p[^>]+class="infotext"[^>]*>.*?<strong>(.+?)</strong>.*?</p>.*?%s' % DOWNLOAD_REGEX,
-                        webpage)):
+                        webpage), 1):
                     entries.append({
-                        'id': display_id,
-                        'title': '%s-%d' % (entry_title, num),
+                        'id': '%s-%d' % (display_id, num),
+                        'title': '%s' % entry_title,
                         'formats': self._extract_formats(download_text, media_kind),
                     })
                 return self.playlist_result(entries, display_id, title)

From 9cf79e8f4bd7e0dfdb2ea9d29cf3ba7d3c6ab647 Mon Sep 17 00:00:00 2001
From: remitamine <remitamine@gmail.com>
Date: Sun, 1 May 2016 01:43:58 +0100
Subject: [PATCH 33/50] [ccc] improve extraction

---
 youtube_dl/extractor/ccc.py | 111 ++++++++++++------------------------
 1 file changed, 38 insertions(+), 73 deletions(-)

diff --git a/youtube_dl/extractor/ccc.py b/youtube_dl/extractor/ccc.py
index dda2c0959..8f7f09e22 100644
--- a/youtube_dl/extractor/ccc.py
+++ b/youtube_dl/extractor/ccc.py
@@ -1,13 +1,9 @@
 from __future__ import unicode_literals
 
-import re
-
 from .common import InfoExtractor
 from ..utils import (
     int_or_none,
-    parse_duration,
-    qualities,
-    unified_strdate,
+    parse_iso8601,
 )
 
 
@@ -19,14 +15,14 @@ class CCCIE(InfoExtractor):
         'url': 'https://media.ccc.de/v/30C3_-_5443_-_en_-_saal_g_-_201312281830_-_introduction_to_processor_design_-_byterazor#video',
         'md5': '3a1eda8f3a29515d27f5adb967d7e740',
         'info_dict': {
-            'id': '30C3_-_5443_-_en_-_saal_g_-_201312281830_-_introduction_to_processor_design_-_byterazor',
+            'id': '1839',
             'ext': 'mp4',
             'title': 'Introduction to Processor Design',
-            'description': 'md5:80be298773966f66d56cb11260b879af',
+            'description': 'md5:df55f6d073d4ceae55aae6f2fd98a0ac',
             'thumbnail': 're:^https?://.*\.jpg$',
-            'view_count': int,
             'upload_date': '20131228',
-            'duration': 3660,
+            'timestamp': 1388188800,
+            'duration': 3710,
         }
     }, {
         'url': 'https://media.ccc.de/v/32c3-7368-shopshifting#download',
@@ -34,79 +30,48 @@ class CCCIE(InfoExtractor):
     }]
 
     def _real_extract(self, url):
-        video_id = self._match_id(url)
-        webpage = self._download_webpage(url, video_id)
+        display_id = self._match_id(url)
+        webpage = self._download_webpage(url, display_id)
+        event_id = self._search_regex("data-id='(\d+)'", webpage, 'event id')
+        event_data = self._download_json('https://media.ccc.de/public/events/%s' % event_id, event_id)
 
-        if self._downloader.params.get('prefer_free_formats'):
-            preference = qualities(['mp3', 'opus', 'mp4-lq', 'webm-lq', 'h264-sd', 'mp4-sd', 'webm-sd', 'mp4', 'webm', 'mp4-hd', 'h264-hd', 'webm-hd'])
-        else:
-            preference = qualities(['opus', 'mp3', 'webm-lq', 'mp4-lq', 'webm-sd', 'h264-sd', 'mp4-sd', 'webm', 'mp4', 'webm-hd', 'mp4-hd', 'h264-hd'])
-
-        title = self._html_search_regex(
-            r'(?s)<h1>(.*?)</h1>', webpage, 'title')
-        description = self._html_search_regex(
-            r'(?s)<h3>About</h3>(.+?)<h3>',
-            webpage, 'description', fatal=False)
-        upload_date = unified_strdate(self._html_search_regex(
-            r"(?s)<span[^>]+class='[^']*fa-calendar-o'[^>]*>(.+?)</span>",
-            webpage, 'upload date', fatal=False))
-        view_count = int_or_none(self._html_search_regex(
-            r"(?s)<span class='[^']*fa-eye'></span>(.*?)</li>",
-            webpage, 'view count', fatal=False))
-        duration = parse_duration(self._html_search_regex(
-            r'(?s)<span[^>]+class=(["\']).*?fa-clock-o.*?\1[^>]*></span>(?P<duration>.+?)</li',
-            webpage, 'duration', fatal=False, group='duration'))
-
-        matches = re.finditer(r'''(?xs)
-            <(?:span|div)\s+class='label\s+filetype'>(?P<format>[^<]*)</(?:span|div)>\s*
-            <(?:span|div)\s+class='label\s+filetype'>(?P<lang>[^<]*)</(?:span|div)>\s*
-            <a\s+download\s+href='(?P<http_url>[^']+)'>\s*
-            (?:
-                .*?
-                <a\s+(?:download\s+)?href='(?P<torrent_url>[^']+\.torrent)'
-            )?''', webpage)
         formats = []
-        for m in matches:
-            format = m.group('format')
-            format_id = self._search_regex(
-                r'.*/([a-z0-9_-]+)/[^/]*$',
-                m.group('http_url'), 'format id', default=None)
-            if format_id:
-                format_id = m.group('lang') + '-' + format_id
-            vcodec = 'h264' if 'h264' in format_id else (
-                'none' if format_id in ('mp3', 'opus') else None
+        for recording in event_data.get('recordings', []):
+            recording_url = recording.get('recording_url')
+            if not recording_url:
+                continue
+            language = recording.get('language')
+            folder = recording.get('folder')
+            format_id = None
+            if language:
+                format_id = language
+            if folder:
+                if language:
+                    format_id += '-' + folder
+                else:
+                    format_id = folder
+            vcodec = 'h264' if 'h264' in folder else (
+                'none' if folder in ('mp3', 'opus') else None
             )
             formats.append({
                 'format_id': format_id,
-                'format': format,
-                'language': m.group('lang'),
-                'url': m.group('http_url'),
+                'url': recording_url,
+                'width': int_or_none(recording.get('width')),
+                'height': int_or_none(recording.get('height')),
+                'filesize': int_or_none(recording.get('size'), invscale=1024 * 1024),
+                'language': language,
                 'vcodec': vcodec,
-                'preference': preference(format_id),
             })
-
-            if m.group('torrent_url'):
-                formats.append({
-                    'format_id': 'torrent-%s' % (format if format_id is None else format_id),
-                    'format': '%s (torrent)' % format,
-                    'proto': 'torrent',
-                    'format_note': '(unsupported; will just download the .torrent file)',
-                    'vcodec': vcodec,
-                    'preference': -100 + preference(format_id),
-                    'url': m.group('torrent_url'),
-                })
         self._sort_formats(formats)
 
-        thumbnail = self._html_search_regex(
-            r"<video.*?poster='([^']+)'", webpage, 'thumbnail', fatal=False)
-
         return {
-            'id': video_id,
-            'title': title,
-            'description': description,
-            'thumbnail': thumbnail,
-            'view_count': view_count,
-            'upload_date': upload_date,
-            'duration': duration,
+            'id': event_id,
+            'display_id': display_id,
+            'title': event_data['title'],
+            'description': event_data.get('description'),
+            'thumbnail': event_data.get('thumb_url'),
+            'timestamp': parse_iso8601(event_data.get('date')),
+            'duration': int_or_none(event_data.get('length')),
+            'tags': event_data.get('tags'),
             'formats': formats,
         }

From 6a0f9a24d0b8d979579276a7648df444634a5a50 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com>
Date: Sun, 1 May 2016 06:44:59 +0600
Subject: [PATCH 34/50] [tagesschau] Separate player extractor

---
 youtube_dl/extractor/extractors.py |   5 +-
 youtube_dl/extractor/tagesschau.py | 235 ++++++++++++++++++++---------
 2 files changed, 168 insertions(+), 72 deletions(-)

diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py
index 737960a01..4aee53d6a 100644
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@@ -724,7 +724,10 @@ from .svt import (
 from .swrmediathek import SWRMediathekIE
 from .syfy import SyfyIE
 from .sztvhu import SztvHuIE
-from .tagesschau import TagesschauIE
+from .tagesschau import (
+    TagesschauPlayerIE,
+    TagesschauIE,
+)
 from .tapely import TapelyIE
 from .tass import TassIE
 from .tdslifeway import TDSLifewayIE
diff --git a/youtube_dl/extractor/tagesschau.py b/youtube_dl/extractor/tagesschau.py
index 6b71c8f81..a71fbad7d 100644
--- a/youtube_dl/extractor/tagesschau.py
+++ b/youtube_dl/extractor/tagesschau.py
@@ -6,10 +6,124 @@ import re
 from .common import InfoExtractor
 from ..utils import (
     determine_ext,
+    js_to_json,
+    parse_iso8601,
     parse_filesize,
 )
 
 
+class TagesschauPlayerIE(InfoExtractor):
+    IE_NAME = 'tagesschau:player'
+    _VALID_URL = r'https?://(?:www\.)?tagesschau\.de/multimedia/(?P<kind>audio|video)/(?P=kind)-(?P<id>\d+)~player(?:_[^/?#&]+)?\.html'
+
+    _TESTS = [{
+        'url': 'http://www.tagesschau.de/multimedia/video/video-179517~player.html',
+        'md5': '8d09548d5c15debad38bee3a4d15ca21',
+        'info_dict': {
+            'id': '179517',
+            'ext': 'mp4',
+            'title': 'Marie Kristin Boese, ARD Berlin, über den zukünftigen Kurs der AfD',
+            'thumbnail': 're:^https?:.*\.jpg$',
+            'formats': 'mincount:6',
+        },
+    }, {
+        'url': 'https://www.tagesschau.de/multimedia/audio/audio-29417~player.html',
+        'md5': '76e6eec6ebd40740671cf0a2c88617e5',
+        'info_dict': {
+            'id': '29417',
+            'ext': 'mp3',
+            'title': 'Trabi - Bye, bye Rennpappe',
+            'thumbnail': 're:^https?:.*\.jpg$',
+            'formats': 'mincount:2',
+        },
+    }, {
+        'url': 'http://www.tagesschau.de/multimedia/audio/audio-29417~player_autoplay-true.html',
+        'only_matching': True,
+    }]
+
+    _FORMATS = {
+        'xs': {'quality': 0},
+        's': {'width': 320, 'height': 180, 'quality': 1},
+        'm': {'width': 512, 'height': 288, 'quality': 2},
+        'l': {'width': 960, 'height': 540, 'quality': 3},
+        'xl': {'width': 1280, 'height': 720, 'quality': 4},
+        'xxl': {'quality': 5},
+    }
+
+    def _extract_via_api(self, kind, video_id):
+        info = self._download_json(
+            'https://www.tagesschau.de/api/multimedia/{0}/{0}-{1}.json'.format(kind, video_id),
+            video_id)
+        title = info['headline']
+        formats = []
+        for media in info['mediadata']:
+            for format_id, format_url in media.items():
+                if determine_ext(format_url) == 'm3u8':
+                    formats.extend(self._extract_m3u8_formats(
+                        format_url, video_id, 'mp4',
+                        entry_protocol='m3u8_native', m3u8_id='hls'))
+                else:
+                    formats.append({
+                        'url': format_url,
+                        'format_id': format_id,
+                        'vcodec': 'none' if kind == 'audio' else None,
+                    })
+        self._sort_formats(formats)
+        timestamp = parse_iso8601(info.get('date'))
+        return {
+            'id': video_id,
+            'title': title,
+            'timestamp': timestamp,
+            'formats': formats,
+        }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+
+        # kind = mobj.group('kind').lower()
+        # if kind == 'video':
+        #     return self._extract_via_api(kind, video_id)
+
+        # JSON api does not provide some audio formats (e.g. ogg) thus
+        # extractiong audio via webpage
+
+        webpage = self._download_webpage(url, video_id)
+
+        title = self._og_search_title(webpage).strip()
+        formats = []
+
+        for media_json in re.findall(r'({src\s*:\s*["\']http[^}]+type\s*:[^}]+})', webpage):
+            media = self._parse_json(js_to_json(media_json), video_id, fatal=False)
+            if not media:
+                continue
+            src = media.get('src')
+            if not src:
+                return
+            quality = media.get('quality')
+            kind = media.get('type', '').split('/')[0]
+            ext = determine_ext(src)
+            f = {
+                'url': src,
+                'format_id': '%s_%s' % (quality, ext) if quality else ext,
+                'ext': ext,
+                'vcodec': 'none' if kind == 'audio' else None,
+            }
+            f.update(self._FORMATS.get(quality, {}))
+            formats.append(f)
+
+        self._sort_formats(formats)
+
+        thumbnail = self._og_search_thumbnail(webpage)
+
+        return {
+            'id': video_id,
+            'title': title,
+            'thumbnail': thumbnail,
+            'formats': formats,
+        }
+
+
 class TagesschauIE(InfoExtractor):
     _VALID_URL = r'https?://(?:www\.)?tagesschau\.de/[^/]+/(?:[^/]+/)*?[^/#?]+?(?P<id>-?[0-9]+)(?:~_?[^/#?]+?)?\.html'
 
@@ -20,7 +134,7 @@ class TagesschauIE(InfoExtractor):
             'id': '102143',
             'ext': 'mp4',
             'title': 'Regierungsumbildung in Athen: Neue Minister in Griechenland vereidigt',
-            'description': 'md5:171feccd9d9b3dd54d05d501568f6359',
+            'description': '18.07.2015 20:10 Uhr',
             'thumbnail': 're:^https?:.*\.jpg$',
         },
     }, {
@@ -29,18 +143,30 @@ class TagesschauIE(InfoExtractor):
         'info_dict': {
             'id': '5727',
             'ext': 'mp4',
-            'description': 'md5:695c01bfd98b7e313c501386327aea59',
             'title': 'Sendung: tagesschau \t04.12.2014 20:00 Uhr',
+            'description': 'md5:695c01bfd98b7e313c501386327aea59',
             'thumbnail': 're:^https?:.*\.jpg$',
         },
     }, {
-        'url': 'http://www.tagesschau.de/multimedia/politikimradio/audio-18407.html',
-        'md5': 'aef45de271c4bf0a5db834aa40bf774c',
+        # exclusive audio
+        'url': 'http://www.tagesschau.de/multimedia/audio/audio-29417.html',
+        'md5': '76e6eec6ebd40740671cf0a2c88617e5',
         'info_dict': {
-            'id': '18407',
+            'id': '29417',
             'ext': 'mp3',
-            'title': 'Flüchtlingsdebatte: Hitzig, aber wenig hilfreich',
-            'description': 'Flüchtlingsdebatte: Hitzig, aber wenig hilfreich',
+            'title': 'Trabi - Bye, bye Rennpappe',
+            'description': 'md5:8687dda862cbbe2cfb2df09b56341317',
+            'thumbnail': 're:^https?:.*\.jpg$',
+        },
+    }, {
+        # audio in article
+        'url': 'http://www.tagesschau.de/inland/bnd-303.html',
+        'md5': 'e0916c623e85fc1d2b26b78f299d3958',
+        'info_dict': {
+            'id': '303',
+            'ext': 'mp3',
+            'title': 'Viele Baustellen für neuen BND-Chef',
+            'description': 'md5:1e69a54be3e1255b2b07cdbce5bcd8b4',
             'thumbnail': 're:^https?:.*\.jpg$',
         },
     }, {
@@ -71,19 +197,11 @@ class TagesschauIE(InfoExtractor):
     }, {
         'url': 'http://www.tagesschau.de/multimedia/video/video-102303~_bab-sendung-211.html',
         'only_matching': True,
-    }, {
-        'url': 'http://www.tagesschau.de/multimedia/video/video-179517~player.html',
-        'only_matching': True,
     }]
 
-    _FORMATS = {
-        'xs': {'quality': 0},
-        's': {'width': 320, 'height': 180, 'quality': 1},
-        'm': {'width': 512, 'height': 288, 'quality': 2},
-        'l': {'width': 960, 'height': 540, 'quality': 3},
-        'xl': {'width': 1280, 'height': 720, 'quality': 4},
-        'xxl': {'quality': 5},
-    }
+    @classmethod
+    def suitable(cls, url):
+        return False if TagesschauPlayerIE.suitable(url) else super(TagesschauIE, cls).suitable(url)
 
     def _extract_formats(self, download_text, media_kind):
         links = re.finditer(
@@ -140,64 +258,39 @@ class TagesschauIE(InfoExtractor):
     def _real_extract(self, url):
         video_id = self._match_id(url)
         display_id = video_id.lstrip('-')
+
         webpage = self._download_webpage(url, display_id)
 
-        player_url = self._html_search_meta(
-            'twitter:player', webpage, 'player URL', default=None)
-        if player_url:
-            playerpage = self._download_webpage(
-                player_url, display_id, 'Downloading player page')
+        title = self._html_search_regex(
+            r'<span[^>]*class="headline"[^>]*>(.+?)</span>',
+            webpage, 'title', default=None) or self._og_search_title(webpage)
 
-            formats = []
-            for media in re.finditer(
-                    r'''(?x)
-                        (?P<q_url>["\'])(?P<url>http://media.+?)(?P=q_url)
-                        ,\s*type:(?P<q_type>["\'])(?P<type>video|audio)/(?P<ext>.+?)(?P=q_type)
-                        (?:,\s*quality:(?P<q_quality>["\'])(?P<quality>.+?)(?P=q_quality))?
-                    ''', playerpage):
-                url = media.group('url')
-                webpage_type = media.group('type')
-                ext = media.group('ext')
-                res = media.group('quality')
-                f = {
-                    'format_id': '%s_%s' % (res, ext) if res else ext,
-                    'url': url,
-                    'ext': ext,
-                    'vcodec': 'none' if webpage_type == 'audio' else None,
-                }
-                f.update(self._FORMATS.get(res, {}))
-                formats.append(f)
-            thumbnail = self._og_search_thumbnail(playerpage)
-            title = self._og_search_title(webpage).strip()
-            description = self._og_search_description(webpage).strip()
-        else:
-            title = self._html_search_regex(
-                r'<span class="headline".*?>(.*?)</span>', webpage, 'title')
+        DOWNLOAD_REGEX = r'(?s)<p>Wir bieten dieses (?P<kind>Video|Audio) in folgenden Formaten zum Download an:</p>\s*<div class="controls">(?P<links>.*?)</div>\s*<p>'
 
-            DOWNLOAD_REGEX = r'(?s)<p>Wir bieten dieses (?P<kind>Video|Audio) in folgenden Formaten zum Download an:</p>\s*<div class="controls">(?P<links>.*?)</div>\s*<p>'
-
-            webpage_type = self._og_search_property('type', webpage, default=None)
-            if webpage_type == 'website':  # Article
-                entries = []
-                for num, (entry_title, media_kind, download_text) in enumerate(re.findall(
-                        r'(?s)<p[^>]+class="infotext"[^>]*>.*?<strong>(.+?)</strong>.*?</p>.*?%s' % DOWNLOAD_REGEX,
-                        webpage), 1):
-                    entries.append({
-                        'id': '%s-%d' % (display_id, num),
-                        'title': '%s' % entry_title,
-                        'formats': self._extract_formats(download_text, media_kind),
-                    })
+        webpage_type = self._og_search_property('type', webpage, default=None)
+        if webpage_type == 'website':  # Article
+            entries = []
+            for num, (entry_title, media_kind, download_text) in enumerate(re.findall(
+                    r'(?s)<p[^>]+class="infotext"[^>]*>.*?<strong>(.+?)</strong>.*?</p>.*?%s' % DOWNLOAD_REGEX,
+                    webpage), 1):
+                entries.append({
+                    'id': '%s-%d' % (display_id, num),
+                    'title': '%s' % entry_title,
+                    'formats': self._extract_formats(download_text, media_kind),
+                })
+            if len(entries) > 1:
                 return self.playlist_result(entries, display_id, title)
-            else:  # Assume single video
-                download_text = self._search_regex(
-                    DOWNLOAD_REGEX, webpage, 'download links', group='links')
-                media_kind = self._search_regex(
-                    DOWNLOAD_REGEX, webpage, 'media kind', default='Video', group='links')
-                formats = self._extract_formats(download_text, media_kind)
-                thumbnail = self._og_search_thumbnail(webpage)
-                description = self._html_search_regex(
-                    r'(?s)<p class="teasertext">(.*?)</p>',
-                    webpage, 'description', default=None)
+            formats = entries[0]['formats']
+        else:  # Assume single video
+            download_text = self._search_regex(
+                DOWNLOAD_REGEX, webpage, 'download links', group='links')
+            media_kind = self._search_regex(
+                DOWNLOAD_REGEX, webpage, 'media kind', default='Video', group='kind')
+            formats = self._extract_formats(download_text, media_kind)
+        thumbnail = self._og_search_thumbnail(webpage)
+        description = self._html_search_regex(
+            r'(?s)<p class="teasertext">(.*?)</p>',
+            webpage, 'description', default=None)
 
         self._sort_formats(formats)
 

From 651ad35ce0f0ee9d04db085c50c29441b47bc825 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com>
Date: Sun, 1 May 2016 06:57:19 +0600
Subject: [PATCH 35/50] [tagesschau] Relax _VALID_URL

---
 youtube_dl/extractor/tagesschau.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/youtube_dl/extractor/tagesschau.py b/youtube_dl/extractor/tagesschau.py
index a71fbad7d..f6102c224 100644
--- a/youtube_dl/extractor/tagesschau.py
+++ b/youtube_dl/extractor/tagesschau.py
@@ -125,7 +125,7 @@ class TagesschauPlayerIE(InfoExtractor):
 
 
 class TagesschauIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?tagesschau\.de/[^/]+/(?:[^/]+/)*?[^/#?]+?(?P<id>-?[0-9]+)(?:~_?[^/#?]+?)?\.html'
+    _VALID_URL = r'https?://(?:www\.)?tagesschau\.de/(?P<path>[^/]+/(?:[^/]+/)*?[^/#?]+?(?P<id>-?[0-9]+)?)(?:~_?[^/#?]+?)?\.html'
 
     _TESTS = [{
         'url': 'http://www.tagesschau.de/multimedia/video/video-102143.html',
@@ -197,6 +197,9 @@ class TagesschauIE(InfoExtractor):
     }, {
         'url': 'http://www.tagesschau.de/multimedia/video/video-102303~_bab-sendung-211.html',
         'only_matching': True,
+    }, {
+        'url': 'http://www.tagesschau.de/100sekunden/index.html',
+        'only_matching': True,
     }]
 
     @classmethod
@@ -256,7 +259,8 @@ class TagesschauIE(InfoExtractor):
         return formats
 
     def _real_extract(self, url):
-        video_id = self._match_id(url)
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id') or mobj.group('path')
         display_id = video_id.lstrip('-')
 
         webpage = self._download_webpage(url, display_id)

From 854cc54bc1d0488d8fa88bd5dfed6f7f8981847e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com>
Date: Sun, 1 May 2016 07:01:55 +0600
Subject: [PATCH 36/50] [tagesschau] Expand video id

---
 youtube_dl/extractor/tagesschau.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/youtube_dl/extractor/tagesschau.py b/youtube_dl/extractor/tagesschau.py
index f6102c224..499bd260b 100644
--- a/youtube_dl/extractor/tagesschau.py
+++ b/youtube_dl/extractor/tagesschau.py
@@ -125,13 +125,13 @@ class TagesschauPlayerIE(InfoExtractor):
 
 
 class TagesschauIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?tagesschau\.de/(?P<path>[^/]+/(?:[^/]+/)*?[^/#?]+?(?P<id>-?[0-9]+)?)(?:~_?[^/#?]+?)?\.html'
+    _VALID_URL = r'https?://(?:www\.)?tagesschau\.de/(?P<path>[^/]+/(?:[^/]+/)*?(?P<id>[^/#?]+?(?:-?[0-9]+)?))(?:~_?[^/#?]+?)?\.html'
 
     _TESTS = [{
         'url': 'http://www.tagesschau.de/multimedia/video/video-102143.html',
         'md5': 'f7c27a0eff3bfe8c7727e65f8fe1b1e6',
         'info_dict': {
-            'id': '102143',
+            'id': 'video-102143',
             'ext': 'mp4',
             'title': 'Regierungsumbildung in Athen: Neue Minister in Griechenland vereidigt',
             'description': '18.07.2015 20:10 Uhr',
@@ -141,7 +141,7 @@ class TagesschauIE(InfoExtractor):
         'url': 'http://www.tagesschau.de/multimedia/sendung/ts-5727.html',
         'md5': '3c54c1f6243d279b706bde660ceec633',
         'info_dict': {
-            'id': '5727',
+            'id': 'ts-5727',
             'ext': 'mp4',
             'title': 'Sendung: tagesschau \t04.12.2014 20:00 Uhr',
             'description': 'md5:695c01bfd98b7e313c501386327aea59',
@@ -152,7 +152,7 @@ class TagesschauIE(InfoExtractor):
         'url': 'http://www.tagesschau.de/multimedia/audio/audio-29417.html',
         'md5': '76e6eec6ebd40740671cf0a2c88617e5',
         'info_dict': {
-            'id': '29417',
+            'id': 'audio-29417',
             'ext': 'mp3',
             'title': 'Trabi - Bye, bye Rennpappe',
             'description': 'md5:8687dda862cbbe2cfb2df09b56341317',
@@ -163,7 +163,7 @@ class TagesschauIE(InfoExtractor):
         'url': 'http://www.tagesschau.de/inland/bnd-303.html',
         'md5': 'e0916c623e85fc1d2b26b78f299d3958',
         'info_dict': {
-            'id': '303',
+            'id': 'bnd-303',
             'ext': 'mp3',
             'title': 'Viele Baustellen für neuen BND-Chef',
             'description': 'md5:1e69a54be3e1255b2b07cdbce5bcd8b4',
@@ -172,7 +172,7 @@ class TagesschauIE(InfoExtractor):
     }, {
         'url': 'http://www.tagesschau.de/inland/afd-parteitag-135.html',
         'info_dict': {
-            'id': '135',
+            'id': 'afd-parteitag-135',
             'title': 'Möchtegern-Underdog mit Machtanspruch',
         },
         'playlist_count': 2,

From 68bb2fef9565159eba4a47f464b6b420cf2d5cda Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com>
Date: Sun, 1 May 2016 07:15:23 +0600
Subject: [PATCH 37/50] [tagesschau] Restrict playlist entry regex

---
 youtube_dl/extractor/tagesschau.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/youtube_dl/extractor/tagesschau.py b/youtube_dl/extractor/tagesschau.py
index 499bd260b..136e18f96 100644
--- a/youtube_dl/extractor/tagesschau.py
+++ b/youtube_dl/extractor/tagesschau.py
@@ -200,6 +200,10 @@ class TagesschauIE(InfoExtractor):
     }, {
         'url': 'http://www.tagesschau.de/100sekunden/index.html',
         'only_matching': True,
+    }, {
+        # playlist article with collapsing sections
+        'url': 'http://www.tagesschau.de/wirtschaft/faq-freihandelszone-eu-usa-101.html',
+        'only_matching': True,
     }]
 
     @classmethod
@@ -275,7 +279,7 @@ class TagesschauIE(InfoExtractor):
         if webpage_type == 'website':  # Article
             entries = []
             for num, (entry_title, media_kind, download_text) in enumerate(re.findall(
-                    r'(?s)<p[^>]+class="infotext"[^>]*>.*?<strong>(.+?)</strong>.*?</p>.*?%s' % DOWNLOAD_REGEX,
+                    r'(?s)<p[^>]+class="infotext"[^>]*>\s*(?:<a[^>]+>)?\s*<strong>(.+?)</strong>.*?</p>.*?%s' % DOWNLOAD_REGEX,
                     webpage), 1):
                 entries.append({
                     'id': '%s-%d' % (display_id, num),

From 6f27bf1c7425d97eb07aee9f7e15d0066b0a74bf Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com>
Date: Sun, 1 May 2016 08:08:51 +0600
Subject: [PATCH 38/50] Credit @blahgeek for xiami (#9079)

---
 AUTHORS | 1 +
 1 file changed, 1 insertion(+)

diff --git a/AUTHORS b/AUTHORS
index 07cade723..814fe9ec3 100644
--- a/AUTHORS
+++ b/AUTHORS
@@ -168,3 +168,4 @@ José Joaquín Atria
 Viťas Strádal
 Kagami Hiiragi
 Philip Huppert
+blahgeek

From 4bd143a3a06264fcda5fa254709d404ccab6601c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com>
Date: Sun, 1 May 2016 10:56:54 +0600
Subject: [PATCH 39/50] [postprocessor/ffmpeg] Simplify metadata preparation
 and add track related metafields (Closes #9357)

---
 youtube_dl/postprocessor/ffmpeg.py | 41 +++++++++++++++++-------------
 1 file changed, 24 insertions(+), 17 deletions(-)

diff --git a/youtube_dl/postprocessor/ffmpeg.py b/youtube_dl/postprocessor/ffmpeg.py
index 1793a878c..fa99b0c2a 100644
--- a/youtube_dl/postprocessor/ffmpeg.py
+++ b/youtube_dl/postprocessor/ffmpeg.py
@@ -389,23 +389,30 @@ class FFmpegEmbedSubtitlePP(FFmpegPostProcessor):
 class FFmpegMetadataPP(FFmpegPostProcessor):
     def run(self, info):
         metadata = {}
-        if info.get('title') is not None:
-            metadata['title'] = info['title']
-        if info.get('upload_date') is not None:
-            metadata['date'] = info['upload_date']
-        if info.get('artist') is not None:
-            metadata['artist'] = info['artist']
-        elif info.get('uploader') is not None:
-            metadata['artist'] = info['uploader']
-        elif info.get('uploader_id') is not None:
-            metadata['artist'] = info['uploader_id']
-        if info.get('description') is not None:
-            metadata['description'] = info['description']
-            metadata['comment'] = info['description']
-        if info.get('webpage_url') is not None:
-            metadata['purl'] = info['webpage_url']
-        if info.get('album') is not None:
-            metadata['album'] = info['album']
+
+        def add(meta_list, info_list=None):
+            if not info_list:
+                info_list = meta_list
+            if not isinstance(meta_list, (list, tuple)):
+                meta_list = (meta_list,)
+            if not isinstance(info_list, (list, tuple)):
+                info_list = (info_list,)
+            for info_f in info_list:
+                if info.get(info_f) is not None:
+                    for meta_f in meta_list:
+                        metadata[meta_f] = info[info_f]
+                    break
+
+        add('title', ('track', 'title'))
+        add('date', 'upload_date')
+        add(('description', 'comment'), 'description')
+        add('purl', 'webpage_url')
+        add('track', 'track_number')
+        add('artist', ('artist', 'creator', 'uploader', 'uploader_id'))
+        add('genre')
+        add('album')
+        add('album_artist')
+        add('disc', 'disc_number')
 
         if not metadata:
             self._downloader.to_screen('[ffmpeg] There isn\'t any metadata to add')

From 174aba3223d0b01ec30cc0a814c6c0813ce13f15 Mon Sep 17 00:00:00 2001
From: Philipp Hagemeister <phihag@phihag.de>
Date: Sun, 1 May 2016 10:19:14 +0200
Subject: [PATCH 40/50] release 2016.05.01

---
 .github/ISSUE_TEMPLATE.md |  6 +++---
 docs/supportedsites.md    | 14 +++++++++-----
 youtube_dl/version.py     |  2 +-
 3 files changed, 13 insertions(+), 9 deletions(-)

diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md
index c208eb689..a26ff1de4 100644
--- a/.github/ISSUE_TEMPLATE.md
+++ b/.github/ISSUE_TEMPLATE.md
@@ -6,8 +6,8 @@
 
 ---
 
-### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.04.24*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
-- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.04.24**
+### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.05.01*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
+- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.05.01**
 
 ### Before submitting an *issue* make sure you have:
 - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
@@ -35,7 +35,7 @@ $ youtube-dl -v <your command line>
 [debug] User config: []
 [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
 [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
-[debug] youtube-dl version 2016.04.24
+[debug] youtube-dl version 2016.05.01
 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
 [debug] Proxy map: {}
diff --git a/docs/supportedsites.md b/docs/supportedsites.md
index 03875b8db..9fb43671f 100644
--- a/docs/supportedsites.md
+++ b/docs/supportedsites.md
@@ -338,7 +338,6 @@
  - **mailru**: Видео@Mail.Ru
  - **MakersChannel**
  - **MakerTV**
- - **Malemotion**
  - **MatchTV**
  - **MDR**: MDR.DE and KiKA
  - **media.ccc.de**
@@ -375,8 +374,8 @@
  - **mtvservices:embedded**
  - **MuenchenTV**: münchen.tv
  - **MusicPlayOn**
- - **muzu.tv**
  - **Mwave**
+ - **MwaveMeetGreet**
  - **MySpace**
  - **MySpace:album**
  - **MySpass**
@@ -554,7 +553,6 @@
  - **SenateISVP**
  - **ServingSys**
  - **Sexu**
- - **SexyKarma**: Sexy Karma and Watch Indian Porn
  - **Shahid**
  - **Shared**: shared.sx and vivo.sx
  - **ShareSix**
@@ -567,8 +565,6 @@
  - **smotri:broadcast**: Smotri.com broadcasts
  - **smotri:community**: Smotri.com community videos
  - **smotri:user**: Smotri.com user videos
- - **SnagFilms**
- - **SnagFilmsEmbed**
  - **Snotr**
  - **Sohu**
  - **soundcloud**
@@ -610,6 +606,7 @@
  - **Syfy**
  - **SztvHu**
  - **Tagesschau**
+ - **tagesschau:player**
  - **Tapely**
  - **Tass**
  - **TDSLifeway**
@@ -725,6 +722,8 @@
  - **Vidzi**
  - **vier**
  - **vier:videos**
+ - **ViewLift**
+ - **ViewLiftEmbed**
  - **Viewster**
  - **Viidea**
  - **viki**
@@ -756,6 +755,7 @@
  - **Walla**
  - **WashingtonPost**
  - **wat.tv**
+ - **WatchIndianPorn**: Watch Indian Porn
  - **WDR**
  - **wdr:mobile**
  - **WDRMaus**: Sendung mit der Maus
@@ -775,6 +775,10 @@
  - **XFileShare**: XFileShare based sites: GorillaVid.in, daclips.in, movpod.in, fastvideo.in, realvid.net, filehoot.com and vidto.me
  - **XHamster**
  - **XHamsterEmbed**
+ - **xiami:album**: 虾米音乐 - 专辑
+ - **xiami:artist**: 虾米音乐 - 歌手
+ - **xiami:collection**: 虾米音乐 - 精选集
+ - **xiami:song**: 虾米音乐
  - **XMinus**
  - **XNXX**
  - **Xstream**
diff --git a/youtube_dl/version.py b/youtube_dl/version.py
index 8befd9607..551160897 100644
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@@ -1,3 +1,3 @@
 from __future__ import unicode_literals
 
-__version__ = '2016.04.24'
+__version__ = '2016.05.01'

From e0da32df6e924535dbbe1e1c8ad05386dcf93029 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com>
Date: Mon, 2 May 2016 00:48:26 +0600
Subject: [PATCH 41/50] [vevo:playlist] Add extractor (Closes #9334, closes
 #9364)

---
 youtube_dl/extractor/extractors.py |  5 +-
 youtube_dl/extractor/vevo.py       | 74 +++++++++++++++++++++++++++++-
 2 files changed, 76 insertions(+), 3 deletions(-)

diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py
index 4aee53d6a..ef4431364 100644
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@@ -849,7 +849,10 @@ from .veehd import VeeHDIE
 from .veoh import VeohIE
 from .vessel import VesselIE
 from .vesti import VestiIE
-from .vevo import VevoIE
+from .vevo import (
+    VevoIE,
+    VevoPlaylistIE,
+)
 from .vgtv import (
     BTArticleIE,
     BTVestlendingenIE,
diff --git a/youtube_dl/extractor/vevo.py b/youtube_dl/extractor/vevo.py
index 147480f64..4ad1e87e4 100644
--- a/youtube_dl/extractor/vevo.py
+++ b/youtube_dl/extractor/vevo.py
@@ -3,7 +3,10 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
-from ..compat import compat_etree_fromstring
+from ..compat import (
+    compat_etree_fromstring,
+    compat_urlparse,
+)
 from ..utils import (
     ExtractorError,
     int_or_none,
@@ -18,7 +21,7 @@ class VevoIE(InfoExtractor):
     (currently used by MTVIE and MySpaceIE)
     '''
     _VALID_URL = r'''(?x)
-        (?:https?://www\.vevo\.com/watch/(?:[^/]+/(?:[^/]+/)?)?|
+        (?:https?://www\.vevo\.com/watch/(?!playlist|genre)(?:[^/]+/(?:[^/]+/)?)?|
            https?://cache\.vevo\.com/m/html/embed\.html\?video=|
            https?://videoplayer\.vevo\.com/embed/embedded\?videoId=|
            vevo:)
@@ -301,3 +304,70 @@ class VevoIE(InfoExtractor):
             'view_count': view_count,
             'age_limit': age_limit,
         }
+
+
+class VevoPlaylistIE(InfoExtractor):
+    _VALID_URL = r'https?://www\.vevo\.com/watch/(?:playlist|genre)/(?P<id>[^/?#&]+)'
+
+    _TESTS = [{
+        'url': 'http://www.vevo.com/watch/playlist/dadbf4e7-b99f-4184-9670-6f0e547b6a29',
+        'info_dict': {
+            'id': 'dadbf4e7-b99f-4184-9670-6f0e547b6a29',
+            'title': 'Best-Of: Birdman',
+        },
+        'playlist_count': 10,
+        'params': {
+            'proxy': '52.53.186.253:8083',
+            'no_check_certificate': True,
+        },
+    }, {
+        'url': 'http://www.vevo.com/watch/playlist/dadbf4e7-b99f-4184-9670-6f0e547b6a29?index=0',
+        'md5': '32dcdfddddf9ec6917fc88ca26d36282',
+        'info_dict': {
+            'id': 'USCMV1100073',
+            'ext': 'mp4',
+            'title': 'Y.U. MAD',
+            'timestamp': 1323417600,
+            'upload_date': '20111209',
+            'uploader': 'Birdman',
+        },
+        'expected_warnings': ['Unable to download SMIL file'],
+        'params': {
+            'proxy': '52.53.186.253:8083',
+            'no_check_certificate': True,
+        },
+    }, {
+        'url': 'http://www.vevo.com/watch/genre/rock?index=0',
+        'only_matching': True,
+    }]
+
+    def _real_extract(self, url):
+        playlist_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, playlist_id)
+
+        qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
+        index = qs.get('index', [None])[0]
+
+        if index:
+            video_id = self._search_regex(
+                r'<meta[^>]+content=(["\'])vevo://video/(?P<id>.+?)\1[^>]*>',
+                webpage, 'video id', default=None, group='id')
+            if video_id:
+                return self.url_result('vevo:%s' % video_id, VevoIE.ie_key())
+
+        playlists = self._parse_json(
+            self._search_regex(
+                r'window\.__INITIAL_STORE__\s*=\s*({.+?});\s*</script>',
+                webpage, 'initial store'),
+            playlist_id)['default']['playlists']
+
+        playlist = list(playlists.values())[0]
+
+        entries = [
+            self.url_result('vevo:%s' % src, VevoIE.ie_key())
+            for src in playlist['isrcs']]
+
+        return self.playlist_result(
+            entries, playlist.get('playlistId'),
+            playlist.get('name'), playlist.get('description'))

From 0c9d288ba0db3c1309bc6090457e11375073f3eb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com>
Date: Mon, 2 May 2016 00:50:31 +0600
Subject: [PATCH 42/50] [vevo:playlist] Remove debug params

---
 youtube_dl/extractor/vevo.py | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/youtube_dl/extractor/vevo.py b/youtube_dl/extractor/vevo.py
index 4ad1e87e4..8431077ad 100644
--- a/youtube_dl/extractor/vevo.py
+++ b/youtube_dl/extractor/vevo.py
@@ -316,10 +316,6 @@ class VevoPlaylistIE(InfoExtractor):
             'title': 'Best-Of: Birdman',
         },
         'playlist_count': 10,
-        'params': {
-            'proxy': '52.53.186.253:8083',
-            'no_check_certificate': True,
-        },
     }, {
         'url': 'http://www.vevo.com/watch/playlist/dadbf4e7-b99f-4184-9670-6f0e547b6a29?index=0',
         'md5': '32dcdfddddf9ec6917fc88ca26d36282',
@@ -332,10 +328,6 @@ class VevoPlaylistIE(InfoExtractor):
             'uploader': 'Birdman',
         },
         'expected_warnings': ['Unable to download SMIL file'],
-        'params': {
-            'proxy': '52.53.186.253:8083',
-            'no_check_certificate': True,
-        },
     }, {
         'url': 'http://www.vevo.com/watch/genre/rock?index=0',
         'only_matching': True,

From e2bd301ce7795597a7e3ef7f5a5446f9ec987883 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com>
Date: Mon, 2 May 2016 01:00:42 +0600
Subject: [PATCH 43/50] [vevo:playlist] Fix genre playlists

---
 youtube_dl/extractor/vevo.py | 18 ++++++++++++++----
 1 file changed, 14 insertions(+), 4 deletions(-)

diff --git a/youtube_dl/extractor/vevo.py b/youtube_dl/extractor/vevo.py
index 8431077ad..2d1ff05e1 100644
--- a/youtube_dl/extractor/vevo.py
+++ b/youtube_dl/extractor/vevo.py
@@ -307,7 +307,7 @@ class VevoIE(InfoExtractor):
 
 
 class VevoPlaylistIE(InfoExtractor):
-    _VALID_URL = r'https?://www\.vevo\.com/watch/(?:playlist|genre)/(?P<id>[^/?#&]+)'
+    _VALID_URL = r'https?://www\.vevo\.com/watch/(?P<kind>playlist|genre)/(?P<id>[^/?#&]+)'
 
     _TESTS = [{
         'url': 'http://www.vevo.com/watch/playlist/dadbf4e7-b99f-4184-9670-6f0e547b6a29',
@@ -316,6 +316,13 @@ class VevoPlaylistIE(InfoExtractor):
             'title': 'Best-Of: Birdman',
         },
         'playlist_count': 10,
+    }, {
+        'url': 'http://www.vevo.com/watch/genre/rock',
+        'info_dict': {
+            'id': 'rock',
+            'title': 'Rock',
+        },
+        'playlist_count': 20,
     }, {
         'url': 'http://www.vevo.com/watch/playlist/dadbf4e7-b99f-4184-9670-6f0e547b6a29?index=0',
         'md5': '32dcdfddddf9ec6917fc88ca26d36282',
@@ -334,7 +341,9 @@ class VevoPlaylistIE(InfoExtractor):
     }]
 
     def _real_extract(self, url):
-        playlist_id = self._match_id(url)
+        mobj = re.match(self._VALID_URL, url)
+        playlist_id = mobj.group('id')
+        playlist_kind = mobj.group('kind')
 
         webpage = self._download_webpage(url, playlist_id)
 
@@ -352,9 +361,10 @@ class VevoPlaylistIE(InfoExtractor):
             self._search_regex(
                 r'window\.__INITIAL_STORE__\s*=\s*({.+?});\s*</script>',
                 webpage, 'initial store'),
-            playlist_id)['default']['playlists']
+            playlist_id)['default']['%ss' % playlist_kind]
 
-        playlist = list(playlists.values())[0]
+        playlist = (list(playlists.values())[0]
+                    if playlist_kind == 'playlist' else playlists[playlist_id])
 
         entries = [
             self.url_result('vevo:%s' % src, VevoIE.ie_key())

From 516ea41a7dd5a350e93ea7cc5ca2c1fcbd0cb43b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com>
Date: Mon, 2 May 2016 02:54:50 +0600
Subject: [PATCH 44/50] [vevo] Fix _call_api

---
 youtube_dl/extractor/vevo.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/youtube_dl/extractor/vevo.py b/youtube_dl/extractor/vevo.py
index 2d1ff05e1..35f974c4e 100644
--- a/youtube_dl/extractor/vevo.py
+++ b/youtube_dl/extractor/vevo.py
@@ -149,8 +149,8 @@ class VevoIE(InfoExtractor):
         auth_info = self._parse_json(webpage, video_id)
         self._api_url_template = self.http_scheme() + '//apiv2.vevo.com/%s?token=' + auth_info['access_token']
 
-    def _call_api(self, path, video_id, note, errnote, fatal=True):
-        return self._download_json(self._api_url_template % path, video_id, note, errnote)
+    def _call_api(self, path, *args, **kwargs):
+        return self._download_json(self._api_url_template % path, *args, **kwargs)
 
     def _real_extract(self, url):
         video_id = self._match_id(url)

From 9618c448247a6aa528b4bf2f289d3dd164c11417 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com>
Date: Mon, 2 May 2016 02:58:20 +0600
Subject: [PATCH 45/50] [vevo] Extract video versions from webpage as a last
 resort (Closes #8426, closes #9366)

---
 youtube_dl/extractor/vevo.py | 28 ++++++++++++++++++++--------
 1 file changed, 20 insertions(+), 8 deletions(-)

diff --git a/youtube_dl/extractor/vevo.py b/youtube_dl/extractor/vevo.py
index 35f974c4e..b6c6ba89f 100644
--- a/youtube_dl/extractor/vevo.py
+++ b/youtube_dl/extractor/vevo.py
@@ -15,7 +15,16 @@ from ..utils import (
 )
 
 
-class VevoIE(InfoExtractor):
+class VevoBaseIE(InfoExtractor):
+    def _extract_json(self, webpage, video_id, item):
+        return self._parse_json(
+            self._search_regex(
+                r'window\.__INITIAL_STORE__\s*=\s*({.+?});\s*</script>',
+                webpage, 'initial store'),
+            video_id)['default'][item]
+
+
+class VevoIE(VevoBaseIE):
     '''
     Accepts urls from vevo.com or in the format 'vevo:{id}'
     (currently used by MTVIE and MySpaceIE)
@@ -186,7 +195,14 @@ class VevoIE(InfoExtractor):
             video_versions = self._call_api(
                 'video/%s/streams' % video_id, video_id,
                 'Downloading video versions info',
-                'Failed to download video versions info')
+                'Failed to download video versions info',
+                fatal=False)
+
+            # Some videos are only available via webpage (e.g.
+            # https://github.com/rg3/youtube-dl/issues/9366)
+            if not video_versions:
+                webpage = self._download_webpage(url, video_id)
+                video_versions = self._extract_json(webpage, video_id, 'streams')[video_id][0]
 
             timestamp = parse_iso8601(video_info.get('releaseDate'))
             artists = video_info.get('artists')
@@ -306,7 +322,7 @@ class VevoIE(InfoExtractor):
         }
 
 
-class VevoPlaylistIE(InfoExtractor):
+class VevoPlaylistIE(VevoBaseIE):
     _VALID_URL = r'https?://www\.vevo\.com/watch/(?P<kind>playlist|genre)/(?P<id>[^/?#&]+)'
 
     _TESTS = [{
@@ -357,11 +373,7 @@ class VevoPlaylistIE(InfoExtractor):
             if video_id:
                 return self.url_result('vevo:%s' % video_id, VevoIE.ie_key())
 
-        playlists = self._parse_json(
-            self._search_regex(
-                r'window\.__INITIAL_STORE__\s*=\s*({.+?});\s*</script>',
-                webpage, 'initial store'),
-            playlist_id)['default']['%ss' % playlist_kind]
+        playlists = self._extract_json(webpage, playlist_id, '%ss' % playlist_kind)
 
         playlist = (list(playlists.values())[0]
                     if playlist_kind == 'playlist' else playlists[playlist_id])

From 8e7d0048886f374a58f0fe8ba021644d7074d02c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com>
Date: Mon, 2 May 2016 03:06:48 +0600
Subject: [PATCH 46/50] [vevo] Add test for video only available via webpage

---
 youtube_dl/extractor/vevo.py | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/youtube_dl/extractor/vevo.py b/youtube_dl/extractor/vevo.py
index b6c6ba89f..4eb98e50d 100644
--- a/youtube_dl/extractor/vevo.py
+++ b/youtube_dl/extractor/vevo.py
@@ -84,6 +84,20 @@ class VevoIE(VevoBaseIE):
             'uploader': 'K Camp',
             'timestamp': 1449468000,
         },
+    }, {
+        'note': 'Only available via webpage',
+        'url': 'http://www.vevo.com/watch/GBUV71600656',
+        'md5': '67e79210613865b66a47c33baa5e37fe',
+        'info_dict': {
+            'id': 'GBUV71600656',
+            'ext': 'mp4',
+            'title': 'Viva Love',
+            'upload_date': '20160428',
+            'age_limit': 0,
+            'uploader': 'ABC',
+            'timestamp': 1461830400,
+        },
+        'expected_warnings': ['Failed to download video versions info'],
     }]
     _SMIL_BASE_URL = 'http://smil.lvl3.vevo.com'
     _SOURCE_TYPES = {

From 881dbc86c4e70252a5b8e5c726a6f2a32ff878c1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com>
Date: Mon, 2 May 2016 03:28:58 +0600
Subject: [PATCH 47/50] [vevo] Extract track related metafields and add artists
 to title (Closes #1684)

---
 youtube_dl/extractor/vevo.py | 58 ++++++++++++++++++++++++++----------
 1 file changed, 42 insertions(+), 16 deletions(-)

diff --git a/youtube_dl/extractor/vevo.py b/youtube_dl/extractor/vevo.py
index 4eb98e50d..49cb3f479 100644
--- a/youtube_dl/extractor/vevo.py
+++ b/youtube_dl/extractor/vevo.py
@@ -42,11 +42,15 @@ class VevoIE(VevoBaseIE):
         'info_dict': {
             'id': 'GB1101300280',
             'ext': 'mp4',
-            'title': 'Somebody to Die For',
+            'title': 'Hurts - Somebody to Die For',
+            'timestamp': 1372057200,
             'upload_date': '20130624',
             'uploader': 'Hurts',
-            'timestamp': 1372057200,
+            'track': 'Somebody to Die For',
+            'artist': 'Hurts',
+            'genre': 'Pop',
         },
+        'expected_warnings': ['Unable to download SMIL file'],
     }, {
         'note': 'v3 SMIL format',
         'url': 'http://www.vevo.com/watch/cassadee-pope/i-wish-i-could-break-your-heart/USUV71302923',
@@ -54,23 +58,31 @@ class VevoIE(VevoBaseIE):
         'info_dict': {
             'id': 'USUV71302923',
             'ext': 'mp4',
-            'title': 'I Wish I Could Break Your Heart',
+            'title': 'Cassadee Pope - I Wish I Could Break Your Heart',
+            'timestamp': 1392796919,
             'upload_date': '20140219',
             'uploader': 'Cassadee Pope',
-            'timestamp': 1392796919,
+            'track': 'I Wish I Could Break Your Heart',
+            'artist': 'Cassadee Pope',
+            'genre': 'Country',
         },
+        'expected_warnings': ['Unable to download SMIL file'],
     }, {
         'note': 'Age-limited video',
         'url': 'https://www.vevo.com/watch/justin-timberlake/tunnel-vision-explicit/USRV81300282',
         'info_dict': {
             'id': 'USRV81300282',
             'ext': 'mp4',
-            'title': 'Tunnel Vision (Explicit)',
-            'upload_date': '20130703',
+            'title': 'Justin Timberlake - Tunnel Vision (Explicit)',
             'age_limit': 18,
-            'uploader': 'Justin Timberlake',
             'timestamp': 1372888800,
+            'upload_date': '20130703',
+            'uploader': 'Justin Timberlake',
+            'track': 'Tunnel Vision (Explicit)',
+            'artist': 'Justin Timberlake',
+            'genre': 'Pop',
         },
+        'expected_warnings': ['Unable to download SMIL file'],
     }, {
         'note': 'No video_info',
         'url': 'http://www.vevo.com/watch/k-camp-1/Till-I-Die/USUV71503000',
@@ -78,11 +90,14 @@ class VevoIE(VevoBaseIE):
         'info_dict': {
             'id': 'USUV71503000',
             'ext': 'mp4',
-            'title': 'Till I Die',
-            'upload_date': '20151207',
+            'title': 'K Camp - Till I Die',
             'age_limit': 18,
-            'uploader': 'K Camp',
             'timestamp': 1449468000,
+            'upload_date': '20151207',
+            'uploader': 'K Camp',
+            'track': 'Till I Die',
+            'artist': 'K Camp',
+            'genre': 'Rap/Hip-Hop',
         },
     }, {
         'note': 'Only available via webpage',
@@ -91,11 +106,14 @@ class VevoIE(VevoBaseIE):
         'info_dict': {
             'id': 'GBUV71600656',
             'ext': 'mp4',
-            'title': 'Viva Love',
-            'upload_date': '20160428',
+            'title': 'ABC - Viva Love',
             'age_limit': 0,
-            'uploader': 'ABC',
             'timestamp': 1461830400,
+            'upload_date': '20160428',
+            'uploader': 'ABC',
+            'track': 'Viva Love',
+            'artist': 'ABC',
+            'genre': 'Pop',
         },
         'expected_warnings': ['Failed to download video versions info'],
     }]
@@ -184,8 +202,8 @@ class VevoIE(VevoBaseIE):
         video_info = response.get('video') or {}
         video_versions = video_info.get('videoVersions')
         uploader = None
-        timestamp = None
         view_count = None
+        timestamp = None
         formats = []
 
         if not video_info:
@@ -311,7 +329,9 @@ class VevoIE(VevoBaseIE):
                         smil_parsed = True
         self._sort_formats(formats)
 
-        title = video_info['title']
+        track = video_info['title']
+        title = '%s - %s' % (uploader, track) if uploader else track
+        genre = video_info.get('genres', [None])[0]
 
         is_explicit = video_info.get('isExplicit')
         if is_explicit is True:
@@ -333,6 +353,9 @@ class VevoIE(VevoBaseIE):
             'duration': duration,
             'view_count': view_count,
             'age_limit': age_limit,
+            'track': track,
+            'artist': uploader,
+            'genre': genre,
         }
 
 
@@ -359,10 +382,13 @@ class VevoPlaylistIE(VevoBaseIE):
         'info_dict': {
             'id': 'USCMV1100073',
             'ext': 'mp4',
-            'title': 'Y.U. MAD',
+            'title': 'Birdman - Y.U. MAD',
             'timestamp': 1323417600,
             'upload_date': '20111209',
             'uploader': 'Birdman',
+            'track': 'Y.U. MAD',
+            'artist': 'Birdman',
+            'genre': 'Rap/Hip-Hop',
         },
         'expected_warnings': ['Unable to download SMIL file'],
     }, {

From 78a3ff33ab686bc6fc75735e2b2a5935b80311be Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com>
Date: Mon, 2 May 2016 03:29:48 +0600
Subject: [PATCH 48/50] [vevo:playlist] Add fallback for playlist id

---
 youtube_dl/extractor/vevo.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/youtube_dl/extractor/vevo.py b/youtube_dl/extractor/vevo.py
index 49cb3f479..bbe29fc51 100644
--- a/youtube_dl/extractor/vevo.py
+++ b/youtube_dl/extractor/vevo.py
@@ -423,5 +423,5 @@ class VevoPlaylistIE(VevoBaseIE):
             for src in playlist['isrcs']]
 
         return self.playlist_result(
-            entries, playlist.get('playlistId'),
+            entries, playlist.get('playlistId') or playlist_id,
             playlist.get('name'), playlist.get('description'))

From 9508738f9a9d6fd3de2e60cd7ccb4c8631bea6f4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com>
Date: Mon, 2 May 2016 03:36:40 +0600
Subject: [PATCH 49/50] [vevo] Extract featured artist

---
 youtube_dl/extractor/vevo.py | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/youtube_dl/extractor/vevo.py b/youtube_dl/extractor/vevo.py
index bbe29fc51..63eab4148 100644
--- a/youtube_dl/extractor/vevo.py
+++ b/youtube_dl/extractor/vevo.py
@@ -201,6 +201,8 @@ class VevoIE(VevoBaseIE):
             json_url, video_id, 'Downloading video info', 'Unable to download info')
         video_info = response.get('video') or {}
         video_versions = video_info.get('videoVersions')
+        artist = None
+        featured_artist = None
         uploader = None
         view_count = None
         timestamp = None
@@ -239,7 +241,7 @@ class VevoIE(VevoBaseIE):
             timestamp = parse_iso8601(video_info.get('releaseDate'))
             artists = video_info.get('artists')
             if artists:
-                uploader = artists[0]['name']
+                artist = uploader = artists[0]['name']
             view_count = int_or_none(video_info.get('views', {}).get('total'))
 
             for video_version in video_versions:
@@ -292,7 +294,11 @@ class VevoIE(VevoBaseIE):
                 scale=1000)
             artists = video_info.get('mainArtists')
             if artists:
-                uploader = artists[0]['artistName']
+                artist = uploader = artists[0]['artistName']
+
+            featured_artists = video_info.get('featuredArtists')
+            if featured_artists:
+                featured_artist = featured_artists[0]['artistName']
 
             smil_parsed = False
             for video_version in video_info['videoVersions']:
@@ -330,7 +336,9 @@ class VevoIE(VevoBaseIE):
         self._sort_formats(formats)
 
         track = video_info['title']
-        title = '%s - %s' % (uploader, track) if uploader else track
+        if featured_artist:
+            artist = '%s ft. %s' % (artist, featured_artist)
+        title = '%s - %s' % (artist, track) if artist else track
         genre = video_info.get('genres', [None])[0]
 
         is_explicit = video_info.get('isExplicit')

From 686cc8963441c37105c0447f31c5ea21405be05a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com>
Date: Mon, 2 May 2016 07:07:35 +0600
Subject: [PATCH 50/50] [discovery] Fix typo

---
 youtube_dl/extractor/discovery.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/youtube_dl/extractor/discovery.py b/youtube_dl/extractor/discovery.py
index 7c554ec14..55853f76f 100644
--- a/youtube_dl/extractor/discovery.py
+++ b/youtube_dl/extractor/discovery.py
@@ -71,7 +71,7 @@ class DiscoveryIE(InfoExtractor):
         entries = []
 
         for idx, video_info in enumerate(info['playlist']):
-            subtitles = []
+            subtitles = {}
             caption_url = video_info.get('captionsUrl')
             if caption_url:
                 subtitles = {