Merge branch 'master' of https://github.com/rg3/youtube-dl

2016-05-02 11:59:25 +02:00 · 2016-05-02 11:59:25 +02:00 · faaac9b31e
commit faaac9b31e
parent 8e36f9baf4 686cc89634
20 changed files with 908 additions and 344 deletions
--- a/.github/ISSUE_TEMPLATE.md
+++ b/.github/ISSUE_TEMPLATE.md
@ -6,8 +6,8 @@
 ---
-### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.04.24*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
+### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.05.01*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.04.24**
+- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.05.01**
 ### Before submitting an *issue* make sure you have:
 - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
@ -35,7 +35,7 @@ $ youtube-dl -v <your command line>
 [debug] User config: []
 [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
 [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
-[debug] youtube-dl version 2016.04.24
+[debug] youtube-dl version 2016.05.01
 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
 [debug] Proxy map: {}
--- a/1
+++ b/1
@ -168,3 +168,4 @@ José Joaquín Atria
 Viťas Strádal
 Kagami Hiiragi
 Philip Huppert
 blahgeek
--- a/docs/supportedsites.md
+++ b/docs/supportedsites.md
@ -338,7 +338,6 @@
 - **mailru**: Видео@Mail.Ru
 - **MakersChannel**
 - **MakerTV**
 - **Malemotion**
 - **MatchTV**
 - **MDR**: MDR.DE and KiKA
 - **media.ccc.de**
@ -375,8 +374,8 @@
 - **mtvservices:embedded**
 - **MuenchenTV**: münchen.tv
 - **MusicPlayOn**
 - **muzu.tv**
 - **Mwave**
 - **MwaveMeetGreet**
 - **MySpace**
 - **MySpace:album**
 - **MySpass**
@ -554,7 +553,6 @@
 - **SenateISVP**
 - **ServingSys**
 - **Sexu**
 - **SexyKarma**: Sexy Karma and Watch Indian Porn
 - **Shahid**
 - **Shared**: shared.sx and vivo.sx
 - **ShareSix**
@ -567,8 +565,6 @@
 - **smotri:broadcast**: Smotri.com broadcasts
 - **smotri:community**: Smotri.com community videos
 - **smotri:user**: Smotri.com user videos
 - **SnagFilms**
 - **SnagFilmsEmbed**
 - **Snotr**
 - **Sohu**
 - **soundcloud**
@ -610,6 +606,7 @@
 - **Syfy**
 - **SztvHu**
 - **Tagesschau**
 - **tagesschau:player**
 - **Tapely**
 - **Tass**
 - **TDSLifeway**
@ -725,6 +722,8 @@
 - **Vidzi**
 - **vier**
 - **vier:videos**
 - **ViewLift**
 - **ViewLiftEmbed**
 - **Viewster**
 - **Viidea**
 - **viki**
@ -756,6 +755,7 @@
 - **Walla**
 - **WashingtonPost**
 - **wat.tv**
 - **WatchIndianPorn**: Watch Indian Porn
 - **WDR**
 - **wdr:mobile**
 - **WDRMaus**: Sendung mit der Maus
@ -775,6 +775,10 @@
 - **XFileShare**: XFileShare based sites: GorillaVid.in, daclips.in, movpod.in, fastvideo.in, realvid.net, filehoot.com and vidto.me
 - **XHamster**
 - **XHamsterEmbed**
 - **xiami:album**: 虾米音乐 - 专辑
 - **xiami:artist**: 虾米音乐 - 歌手
 - **xiami:collection**: 虾米音乐 - 精选集
 - **xiami:song**: 虾米音乐
 - **XMinus**
 - **XNXX**
 - **Xstream**
--- a/youtube_dl/extractor/ccc.py
+++ b/youtube_dl/extractor/ccc.py
@ -1,13 +1,9 @@
 from __future__ import unicode_literals
 import re
 from .common import InfoExtractor
 from ..utils import (
    int_or_none,
-    parse_duration,
+    parse_iso8601,
    qualities,
    unified_strdate,
 )
@ -19,14 +15,14 @@ class CCCIE(InfoExtractor):
        'url': 'https://media.ccc.de/v/30C3_-_5443_-_en_-_saal_g_-_201312281830_-_introduction_to_processor_design_-_byterazor#video',
        'md5': '3a1eda8f3a29515d27f5adb967d7e740',
        'info_dict': {
-            'id': '30C3_-_5443_-_en_-_saal_g_-_201312281830_-_introduction_to_processor_design_-_byterazor',
+            'id': '1839',
            'ext': 'mp4',
            'title': 'Introduction to Processor Design',
-            'description': 'md5:80be298773966f66d56cb11260b879af',
+            'description': 'md5:df55f6d073d4ceae55aae6f2fd98a0ac',
            'thumbnail': 're:^https?://.*\.jpg$',
            'view_count': int,
            'upload_date': '20131228',
-            'duration': 3660,
+            'timestamp': 1388188800,
            'duration': 3710,
        }
    }, {
        'url': 'https://media.ccc.de/v/32c3-7368-shopshifting#download',
@ -34,79 +30,48 @@ class CCCIE(InfoExtractor):
    }]
    def _real_extract(self, url):
-        video_id = self._match_id(url)
+        display_id = self._match_id(url)
-        webpage = self._download_webpage(url, video_id)
+        webpage = self._download_webpage(url, display_id)
        event_id = self._search_regex("data-id='(\d+)'", webpage, 'event id')
        event_data = self._download_json('https://media.ccc.de/public/events/%s' % event_id, event_id)
        if self._downloader.params.get('prefer_free_formats'):
            preference = qualities(['mp3', 'opus', 'mp4-lq', 'webm-lq', 'h264-sd', 'mp4-sd', 'webm-sd', 'mp4', 'webm', 'mp4-hd', 'h264-hd', 'webm-hd'])
        else:
            preference = qualities(['opus', 'mp3', 'webm-lq', 'mp4-lq', 'webm-sd', 'h264-sd', 'mp4-sd', 'webm', 'mp4', 'webm-hd', 'mp4-hd', 'h264-hd'])
        title = self._html_search_regex(
            r'(?s)<h1>(.*?)</h1>', webpage, 'title')
        description = self._html_search_regex(
            r'(?s)<h3>About</h3>(.+?)<h3>',
            webpage, 'description', fatal=False)
        upload_date = unified_strdate(self._html_search_regex(
            r"(?s)<span[^>]+class='[^']*fa-calendar-o'[^>]*>(.+?)</span>",
            webpage, 'upload date', fatal=False))
        view_count = int_or_none(self._html_search_regex(
            r"(?s)<span class='[^']*fa-eye'></span>(.*?)</li>",
            webpage, 'view count', fatal=False))
        duration = parse_duration(self._html_search_regex(
            r'(?s)<span[^>]+class=(["\']).*?fa-clock-o.*?\1[^>]*></span>(?P<duration>.+?)</li',
            webpage, 'duration', fatal=False, group='duration'))
        matches = re.finditer(r'''(?xs)
            <(?:span|div)\s+class='label\s+filetype'>(?P<format>[^<]*)</(?:span|div)>\s*
            <(?:span|div)\s+class='label\s+filetype'>(?P<lang>[^<]*)</(?:span|div)>\s*
            <a\s+download\s+href='(?P<http_url>[^']+)'>\s*
            (?:
                .*?
                <a\s+(?:download\s+)?href='(?P<torrent_url>[^']+\.torrent)'
            )?''', webpage)
        formats = []
-        for m in matches:
+        for recording in event_data.get('recordings', []):
-            format = m.group('format')
+            recording_url = recording.get('recording_url')
-            format_id = self._search_regex(
+            if not recording_url:
-                r'.*/([a-z0-9_-]+)/[^/]*$',
+                continue
-                m.group('http_url'), 'format id', default=None)
+            language = recording.get('language')
-            if format_id:
+            folder = recording.get('folder')
-                format_id = m.group('lang') + '-' + format_id
+            format_id = None
-            vcodec = 'h264' if 'h264' in format_id else (
+            if language:
-                'none' if format_id in ('mp3', 'opus') else None
+                format_id = language
            if folder:
                if language:
                    format_id += '-' + folder
                else:
                    format_id = folder
            vcodec = 'h264' if 'h264' in folder else (
                'none' if folder in ('mp3', 'opus') else None
            )
            formats.append({
                'format_id': format_id,
-                'format': format,
+                'url': recording_url,
-                'language': m.group('lang'),
+                'width': int_or_none(recording.get('width')),
-                'url': m.group('http_url'),
+                'height': int_or_none(recording.get('height')),
                'filesize': int_or_none(recording.get('size'), invscale=1024 * 1024),
                'language': language,
                'vcodec': vcodec,
                'preference': preference(format_id),
            })
            if m.group('torrent_url'):
                formats.append({
                    'format_id': 'torrent-%s' % (format if format_id is None else format_id),
                    'format': '%s (torrent)' % format,
                    'proto': 'torrent',
                    'format_note': '(unsupported; will just download the .torrent file)',
                    'vcodec': vcodec,
                    'preference': -100 + preference(format_id),
                    'url': m.group('torrent_url'),
                })
        self._sort_formats(formats)
        thumbnail = self._html_search_regex(
            r"<video.*?poster='([^']+)'", webpage, 'thumbnail', fatal=False)
        return {
-            'id': video_id,
+            'id': event_id,
-            'title': title,
+            'display_id': display_id,
-            'description': description,
+            'title': event_data['title'],
-            'thumbnail': thumbnail,
+            'description': event_data.get('description'),
-            'view_count': view_count,
+            'thumbnail': event_data.get('thumb_url'),
-            'upload_date': upload_date,
+            'timestamp': parse_iso8601(event_data.get('date')),
-            'duration': duration,
+            'duration': int_or_none(event_data.get('length')),
            'tags': event_data.get('tags'),
            'formats': formats,
        }
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@ -1142,7 +1142,7 @@ class InfoExtractor(object):
                # Bandwidth of live streams may differ over time thus making
                # format_id unpredictable. So it's better to keep provided
                # format_id intact.
-                if last_media_name and not live:
+                if not live:
                    format_id.append(last_media_name if last_media_name else '%d' % (tbr if tbr else len(formats)))
                f = {
                    'format_id': '-'.join(format_id),
--- a/youtube_dl/extractor/crunchyroll.py
+++ b/youtube_dl/extractor/crunchyroll.py
@ -307,14 +307,17 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
            'video_uploader', fatal=False)
        available_fmts = []
-        for a, fmt in re.findall(r'(<a[^>]+token="showmedia\.([0-9]{3,4})p"[^>]+>.*?</a>)', webpage):
+        for a, fmt in re.findall(r'(<a[^>]+token=["\']showmedia\.([0-9]{3,4})p["\'][^>]+>)', webpage):
            attrs = extract_attributes(a)
            href = attrs.get('href')
            if href and '/freetrial' in href:
                continue
            available_fmts.append(fmt)
        if not available_fmts:
-            available_fmts = re.findall(r'token="showmedia\.([0-9]{3,4})p"', webpage)
+            for p in (r'token=["\']showmedia\.([0-9]{3,4})p"', r'showmedia\.([0-9]{3,4})p'):
                available_fmts = re.findall(p, webpage)
                if available_fmts:
                    break
        video_encode_ids = []
        formats = []
        for fmt in available_fmts:
@ -364,6 +367,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
                'ext': 'flv',
            })
            formats.append(format_info)
        self._sort_formats(formats)
        metadata = self._download_xml(
            'http://www.crunchyroll.com/xml', video_id,
--- a/youtube_dl/extractor/dfb.py
+++ b/youtube_dl/extractor/dfb.py
@ -12,39 +12,46 @@ class DFBIE(InfoExtractor):
    _TEST = {
        'url': 'http://tv.dfb.de/video/u-19-em-stimmen-zum-spiel-gegen-russland/11633/',
-        # The md5 is different each time
+        'md5': 'ac0f98a52a330f700b4b3034ad240649',
        'info_dict': {
            'id': '11633',
            'display_id': 'u-19-em-stimmen-zum-spiel-gegen-russland',
-            'ext': 'flv',
+            'ext': 'mp4',
            'title': 'U 19-EM: Stimmen zum Spiel gegen Russland',
            'upload_date': '20150714',
        },
    }
    def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
+        display_id, video_id = re.match(self._VALID_URL, url).groups()
        video_id = mobj.group('id')
        display_id = mobj.group('display_id')
        webpage = self._download_webpage(url, display_id)
        player_info = self._download_xml(
            'http://tv.dfb.de/server/hd_video.php?play=%s' % video_id,
            display_id)
        video_info = player_info.find('video')
        stream_access_url = self._proto_relative_url(video_info.find('url').text.strip())
-        f4m_info = self._download_xml(
+        formats = []
-            self._proto_relative_url(video_info.find('url').text.strip()), display_id)
+        # see http://tv.dfb.de/player/js/ajax.js for the method to extract m3u8 formats
-        token_el = f4m_info.find('token')
+        for sa_url in (stream_access_url, stream_access_url + '&area=&format=iphone'):
-        manifest_url = token_el.attrib['url'] + '?' + 'hdnea=' + token_el.attrib['auth'] + '&hdcore=3.2.0'
+            stream_access_info = self._download_xml(sa_url, display_id)
-        formats = self._extract_f4m_formats(manifest_url, display_id)
+            token_el = stream_access_info.find('token')
            manifest_url = token_el.attrib['url'] + '?' + 'hdnea=' + token_el.attrib['auth']
            if '.f4m' in manifest_url:
                formats.extend(self._extract_f4m_formats(
                    manifest_url + '&hdcore=3.2.0',
                    display_id, f4m_id='hds', fatal=False))
            else:
                formats.extend(self._extract_m3u8_formats(
                    manifest_url, display_id, 'mp4',
                    'm3u8_native', m3u8_id='hls', fatal=False))
        self._sort_formats(formats)
        return {
            'id': video_id,
            'display_id': display_id,
            'title': video_info.find('title').text,
-            'thumbnail': self._og_search_thumbnail(webpage),
+            'thumbnail': 'http://tv.dfb.de/images/%s_640x360.jpg' % video_id,
            'upload_date': unified_strdate(video_info.find('time_date').text),
            'formats': formats,
        }
--- a/youtube_dl/extractor/discovery.py
+++ b/youtube_dl/extractor/discovery.py
@ -33,6 +33,7 @@ class DiscoveryIE(InfoExtractor):
            'duration': 156,
            'timestamp': 1302032462,
            'upload_date': '20110405',
            'uploader_id': '103207',
        },
        'params': {
            'skip_download': True,  # requires ffmpeg
@ -54,7 +55,11 @@ class DiscoveryIE(InfoExtractor):
            'upload_date': '20140725',
            'timestamp': 1406246400,
            'duration': 116,
            'uploader_id': '103207',
        },
        'params': {
            'skip_download': True,  # requires ffmpeg
        }
    }]
    def _real_extract(self, url):
@ -66,13 +71,19 @@ class DiscoveryIE(InfoExtractor):
        entries = []
        for idx, video_info in enumerate(info['playlist']):
-            formats = self._extract_m3u8_formats(
+            subtitles = {}
-                video_info['src'], display_id, 'mp4', 'm3u8_native', m3u8_id='hls',
+            caption_url = video_info.get('captionsUrl')
-                note='Download m3u8 information for video %d' % (idx + 1))
+            if caption_url:
-            self._sort_formats(formats)
+                subtitles = {
                    'en': [{
                        'url': caption_url,
                    }]
                }
            entries.append({
                '_type': 'url_transparent',
                'url': 'http://players.brightcove.net/103207/default_default/index.html?videoId=ref:%s' % video_info['referenceId'],
                'id': compat_str(video_info['id']),
                'formats': formats,
                'title': video_info['title'],
                'description': video_info.get('description'),
                'duration': parse_duration(video_info.get('video_length')),
@ -80,6 +91,7 @@ class DiscoveryIE(InfoExtractor):
                'thumbnail': video_info.get('thumbnailURL'),
                'alt_title': video_info.get('secondary_title'),
                'timestamp': parse_iso8601(video_info.get('publishedDate')),
                'subtitles': subtitles,
            })
        return self.playlist_result(entries, display_id, video_title)
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@ -724,7 +724,10 @@ from .svt import (
 from .swrmediathek import SWRMediathekIE
 from .syfy import SyfyIE
 from .sztvhu import SztvHuIE
-from .tagesschau import TagesschauIE
+from .tagesschau import (
    TagesschauPlayerIE,
    TagesschauIE,
 )
 from .tapely import TapelyIE
 from .tass import TassIE
 from .tdslifeway import TDSLifewayIE
@ -846,7 +849,10 @@ from .veehd import VeeHDIE
 from .veoh import VeohIE
 from .vessel import VesselIE
 from .vesti import VestiIE
-from .vevo import VevoIE
+from .vevo import (
    VevoIE,
    VevoPlaylistIE,
 )
 from .vgtv import (
    BTArticleIE,
    BTVestlendingenIE,
@ -941,6 +947,12 @@ from .xhamster import (
    XHamsterIE,
    XHamsterEmbedIE,
 )
 from .xiami import (
    XiamiSongIE,
    XiamiAlbumIE,
    XiamiArtistIE,
    XiamiCollectionIE
 )
 from .xminus import XMinusIE
 from .xnxx import XNXXIE
 from .xstream import XstreamIE
--- a/youtube_dl/extractor/funimation.py
+++ b/youtube_dl/extractor/funimation.py
@ -2,6 +2,10 @@
 from __future__ import unicode_literals
 from .common import InfoExtractor
 from ..compat import (
    compat_HTTPError,
    compat_urllib_parse_unquote_plus,
 )
 from ..utils import (
    clean_html,
    determine_ext,
@ -27,6 +31,7 @@ class FunimationIE(InfoExtractor):
            'description': 'md5:1769f43cd5fc130ace8fd87232207892',
            'thumbnail': 're:https?://.*\.jpg',
        },
        'skip': 'Access without user interaction is forbidden by CloudFlare, and video removed',
    }, {
        'url': 'http://www.funimation.com/shows/hacksign/videos/official/role-play',
        'info_dict': {
@ -37,6 +42,7 @@ class FunimationIE(InfoExtractor):
            'description': 'md5:b602bdc15eef4c9bbb201bb6e6a4a2dd',
            'thumbnail': 're:https?://.*\.jpg',
        },
        'skip': 'Access without user interaction is forbidden by CloudFlare',
    }, {
        'url': 'http://www.funimation.com/shows/attack-on-titan-junior-high/videos/promotional/broadcast-dub-preview',
        'info_dict': {
@ -47,8 +53,36 @@ class FunimationIE(InfoExtractor):
            'description': 'md5:f8ec49c0aff702a7832cd81b8a44f803',
            'thumbnail': 're:https?://.*\.(?:jpg|png)',
        },
        'skip': 'Access without user interaction is forbidden by CloudFlare',
    }]
    _LOGIN_URL = 'http://www.funimation.com/login'
    def _download_webpage(self, *args, **kwargs):
        try:
            return super(FunimationIE, self)._download_webpage(*args, **kwargs)
        except ExtractorError as ee:
            if isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 403:
                response = ee.cause.read()
                if b'>Please complete the security check to access<' in response:
                    raise ExtractorError(
                        'Access to funimation.com is blocked by CloudFlare. '
                        'Please browse to http://www.funimation.com/, solve '
                        'the reCAPTCHA, export browser cookies to a text file,'
                        ' and then try again with --cookies YOUR_COOKIE_FILE.',
                        expected=True)
            raise
    def _extract_cloudflare_session_ua(self, url):
        ci_session_cookie = self._get_cookies(url).get('ci_session')
        if ci_session_cookie:
            ci_session = compat_urllib_parse_unquote_plus(ci_session_cookie.value)
            # ci_session is a string serialized by PHP function serialize()
            # This case is simple enough to use regular expressions only
            return self._search_regex(
                r'"user_agent";s:\d+:"([^"]+)"', ci_session, 'user agent',
                default=None)
    def _login(self):
        (username, password) = self._get_login_info()
        if username is None:
@ -57,8 +91,11 @@ class FunimationIE(InfoExtractor):
            'email_field': username,
            'password_field': password,
        })
-        login_request = sanitized_Request('http://www.funimation.com/login', data, headers={
+        user_agent = self._extract_cloudflare_session_ua(self._LOGIN_URL)
-            'User-Agent': 'Mozilla/5.0 (Windows NT 5.2; WOW64; rv:42.0) Gecko/20100101 Firefox/42.0',
+        if not user_agent:
            user_agent = 'Mozilla/5.0 (Windows NT 5.2; WOW64; rv:42.0) Gecko/20100101 Firefox/42.0'
        login_request = sanitized_Request(self._LOGIN_URL, data, headers={
            'User-Agent': user_agent,
            'Content-Type': 'application/x-www-form-urlencoded'
        })
        login_page = self._download_webpage(
@ -103,11 +140,16 @@ class FunimationIE(InfoExtractor):
            ('mobile', 'Mozilla/5.0 (Linux; Android 4.4.2; Nexus 4 Build/KOT49H) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/34.0.1847.114 Mobile Safari/537.36'),
        )
        user_agent = self._extract_cloudflare_session_ua(url)
        if user_agent:
            USER_AGENTS = ((None, user_agent),)
        for kind, user_agent in USER_AGENTS:
            request = sanitized_Request(url)
            request.add_header('User-Agent', user_agent)
            webpage = self._download_webpage(
-                request, display_id, 'Downloading %s webpage' % kind)
+                request, display_id,
                'Downloading %s webpage' % kind if kind else 'Downloading webpage')
            playlist = self._parse_json(
                self._search_regex(
--- a/youtube_dl/extractor/pbs.py
+++ b/youtube_dl/extractor/pbs.py
@ -196,7 +196,7 @@ class PBSIE(InfoExtractor):
    _TESTS = [
        {
            'url': 'http://www.pbs.org/tpt/constitution-usa-peter-sagal/watch/a-more-perfect-union/',
-            'md5': 'ce1888486f0908d555a8093cac9a7362',
+            'md5': '173dc391afd361fa72eab5d3d918968d',
            'info_dict': {
                'id': '2365006249',
                'ext': 'mp4',
@ -204,13 +204,10 @@ class PBSIE(InfoExtractor):
                'description': 'md5:36f341ae62e251b8f5bd2b754b95a071',
                'duration': 3190,
            },
            'params': {
                'skip_download': True,  # requires ffmpeg
            },
        },
        {
            'url': 'http://www.pbs.org/wgbh/pages/frontline/losing-iraq/',
-            'md5': '143c98aa54a346738a3d78f54c925321',
+            'md5': '6f722cb3c3982186d34b0f13374499c7',
            'info_dict': {
                'id': '2365297690',
                'ext': 'mp4',
@ -218,9 +215,6 @@ class PBSIE(InfoExtractor):
                'description': 'md5:4d3eaa01f94e61b3e73704735f1196d9',
                'duration': 5050,
            },
            'params': {
                'skip_download': True,  # requires ffmpeg
            }
        },
        {
            'url': 'http://www.pbs.org/newshour/bb/education-jan-june12-cyberschools_02-23/',
@ -244,9 +238,6 @@ class PBSIE(InfoExtractor):
                'duration': 6559,
                'thumbnail': 're:^https?://.*\.jpg$',
            },
            'params': {
                'skip_download': True,  # requires ffmpeg
            },
        },
        {
            'url': 'http://www.pbs.org/wgbh/nova/earth/killer-typhoon.html',
@ -262,9 +253,6 @@ class PBSIE(InfoExtractor):
                'upload_date': '20140122',
                'age_limit': 10,
            },
            'params': {
                'skip_download': True,  # requires ffmpeg
            },
        },
        {
            'url': 'http://www.pbs.org/wgbh/pages/frontline/united-states-of-secrets/',
@ -290,6 +278,7 @@ class PBSIE(InfoExtractor):
        },
        {
            'url': 'http://www.pbs.org/video/2365245528/',
            'md5': '115223d41bd55cda8ae5cd5ed4e11497',
            'info_dict': {
                'id': '2365245528',
                'display_id': '2365245528',
@ -299,15 +288,13 @@ class PBSIE(InfoExtractor):
                'duration': 6851,
                'thumbnail': 're:^https?://.*\.jpg$',
            },
            'params': {
                'skip_download': True,  # requires ffmpeg
            },
        },
        {
            # Video embedded in iframe containing angle brackets as attribute's value (e.g.
            # "<iframe style='position: absolute;<br />\ntop: 0; left: 0;' ...", see
            # https://github.com/rg3/youtube-dl/issues/7059)
            'url': 'http://www.pbs.org/food/features/a-chefs-life-season-3-episode-5-prickly-business/',
            'md5': '84ced42850d78f1d4650297356e95e6f',
            'info_dict': {
                'id': '2365546844',
                'display_id': 'a-chefs-life-season-3-episode-5-prickly-business',
@ -317,9 +304,6 @@ class PBSIE(InfoExtractor):
                'duration': 1480,
                'thumbnail': 're:^https?://.*\.jpg$',
            },
            'params': {
                'skip_download': True,  # requires ffmpeg
            },
        },
        {
            # Frontline video embedded via flp2012.js
@ -340,6 +324,7 @@ class PBSIE(InfoExtractor):
        {
            # Serves hd only via wigget/partnerplayer page
            'url': 'http://www.pbs.org/video/2365641075/',
            'md5': 'acfd4c400b48149a44861cb16dd305cf',
            'info_dict': {
                'id': '2365641075',
                'ext': 'mp4',
@ -348,9 +333,6 @@ class PBSIE(InfoExtractor):
                'thumbnail': 're:^https?://.*\.jpg$',
                'formats': 'mincount:8',
            },
            'params': {
                'skip_download': True,  # requires ffmpeg
            },
        },
        {
            'url': 'http://player.pbs.org/widget/partnerplayer/2365297708/?start=0&end=0&chapterbar=false&endscreen=false&topbar=true',
@ -494,6 +476,7 @@ class PBSIE(InfoExtractor):
                        info = video_info
        formats = []
        http_url = None
        for num, redirect in enumerate(redirects):
            redirect_id = redirect.get('eeid')
@ -514,13 +497,32 @@ class PBSIE(InfoExtractor):
            if determine_ext(format_url) == 'm3u8':
                formats.extend(self._extract_m3u8_formats(
-                    format_url, display_id, 'mp4', preference=1, m3u8_id='hls'))
+                    format_url, display_id, 'mp4', m3u8_id='hls', fatal=False))
            else:
                formats.append({
                    'url': format_url,
                    'format_id': redirect_id,
                })
                if re.search(r'^https?://.*(?:\d+k|baseline)', format_url):
                    http_url = format_url
        self._remove_duplicate_formats(formats)
        m3u8_formats = list(filter(
            lambda f: f.get('protocol') == 'm3u8' and f.get('vcodec') != 'none' and f.get('resolution') != 'multiple',
            formats))
        if http_url:
            for m3u8_format in m3u8_formats:
                bitrate = self._search_regex(r'(\d+k)', m3u8_format['url'], 'bitrate', default=None)
                # extract only the formats that we know that they will be available as http format.
                # https://projects.pbs.org/confluence/display/coveapi/COVE+Video+Specifications
                if not bitrate or bitrate not in ('400k', '800k', '1200k', '2500k'):
                    continue
                f = m3u8_format.copy()
                f.update({
                    'url': re.sub(r'\d+k|baseline', bitrate, http_url),
                    'format_id': m3u8_format['format_id'].replace('hls', 'http'),
                    'protocol': 'http',
                })
                formats.append(f)
        self._sort_formats(formats)
        rating_str = info.get('rating')
@ -535,6 +537,19 @@ class PBSIE(InfoExtractor):
                'ext': 'ttml',
                'url': closed_captions_url,
            }]
            mobj = re.search(r'/(\d+)_Encoded\.dfxp', closed_captions_url)
            if mobj:
                ttml_caption_suffix, ttml_caption_id = mobj.group(0, 1)
                ttml_caption_id = int(ttml_caption_id)
                subtitles['en'].extend([{
                    'url': closed_captions_url.replace(
                        ttml_caption_suffix, '/%d_Encoded.srt' % (ttml_caption_id + 1)),
                    'ext': 'srt',
                }, {
                    'url': closed_captions_url.replace(
                        ttml_caption_suffix, '/%d_Encoded.vtt' % (ttml_caption_id + 2)),
                    'ext': 'vtt',
                }])
        # info['title'] is often incomplete (e.g. 'Full Episode', 'Episode 5', etc)
        # Try turning it to 'program - title' naming scheme if possible
--- a/youtube_dl/extractor/rtlnl.py
+++ b/youtube_dl/extractor/rtlnl.py
@ -20,18 +20,19 @@ class RtlNlIE(InfoExtractor):
        (?P<id>[0-9a-f-]+)'''
    _TESTS = [{
-        'url': 'http://www.rtlxl.nl/#!/rtl-nieuws-132237/6e4203a6-0a5e-3596-8424-c599a59e0677',
+        'url': 'http://www.rtlxl.nl/#!/rtl-nieuws-132237/82b1aad1-4a14-3d7b-b554-b0aed1b2c416',
-        'md5': 'cc16baa36a6c169391f0764fa6b16654',
+        'md5': '473d1946c1fdd050b2c0161a4b13c373',
        'info_dict': {
-            'id': '6e4203a6-0a5e-3596-8424-c599a59e0677',
+            'id': '82b1aad1-4a14-3d7b-b554-b0aed1b2c416',
            'ext': 'mp4',
-            'title': 'RTL Nieuws - Laat',
+            'title': 'RTL Nieuws',
-            'description': 'md5:6b61f66510c8889923b11f2778c72dc5',
+            'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
-            'timestamp': 1408051800,
+            'timestamp': 1461951000,
-            'upload_date': '20140814',
+            'upload_date': '20160429',
-            'duration': 576.880,
+            'duration': 1167.96,
        },
    }, {
        # best format avaialble a3t
        'url': 'http://www.rtl.nl/system/videoplayer/derden/rtlnieuws/video_embed.html#uuid=84ae5571-ac25-4225-ae0c-ef8d9efb2aed/autoplay=false',
        'md5': 'dea7474214af1271d91ef332fb8be7ea',
        'info_dict': {
@ -39,18 +40,19 @@ class RtlNlIE(InfoExtractor):
            'ext': 'mp4',
            'timestamp': 1424039400,
            'title': 'RTL Nieuws - Nieuwe beelden Kopenhagen: chaos direct na aanslag',
-            'thumbnail': 're:^https?://screenshots\.rtl\.nl/system/thumb/sz=[0-9]+x[0-9]+/uuid=84ae5571-ac25-4225-ae0c-ef8d9efb2aed$',
+            'thumbnail': 're:^https?://screenshots\.rtl\.nl/(?:[^/]+/)*sz=[0-9]+x[0-9]+/uuid=84ae5571-ac25-4225-ae0c-ef8d9efb2aed$',
            'upload_date': '20150215',
            'description': 'Er zijn nieuwe beelden vrijgegeven die vlak na de aanslag in Kopenhagen zijn gemaakt. Op de video is goed te zien hoe omstanders zich bekommeren om één van de slachtoffers, terwijl de eerste agenten ter plaatse komen.',
        }
    }, {
        # empty synopsis and missing episodes (see https://github.com/rg3/youtube-dl/issues/6275)
        # best format available nettv
        'url': 'http://www.rtl.nl/system/videoplayer/derden/rtlnieuws/video_embed.html#uuid=f536aac0-1dc3-4314-920e-3bd1c5b3811a/autoplay=false',
        'info_dict': {
            'id': 'f536aac0-1dc3-4314-920e-3bd1c5b3811a',
            'ext': 'mp4',
            'title': 'RTL Nieuws - Meer beelden van overval juwelier',
-            'thumbnail': 're:^https?://screenshots\.rtl\.nl/system/thumb/sz=[0-9]+x[0-9]+/uuid=f536aac0-1dc3-4314-920e-3bd1c5b3811a$',
+            'thumbnail': 're:^https?://screenshots\.rtl\.nl/(?:[^/]+/)*sz=[0-9]+x[0-9]+/uuid=f536aac0-1dc3-4314-920e-3bd1c5b3811a$',
            'timestamp': 1437233400,
            'upload_date': '20150718',
            'duration': 30.474,
@ -94,22 +96,46 @@ class RtlNlIE(InfoExtractor):
        videopath = material['videopath']
        m3u8_url = meta.get('videohost', 'http://manifest.us.rtl.nl') + videopath
-        formats = self._extract_m3u8_formats(m3u8_url, uuid, ext='mp4')
+        formats = self._extract_m3u8_formats(
            m3u8_url, uuid, 'mp4', m3u8_id='hls', fatal=False)
        video_urlpart = videopath.split('/adaptive/')[1][:-5]
        PG_URL_TEMPLATE = 'http://pg.us.rtl.nl/rtlxl/network/%s/progressive/%s.mp4'
-        formats.extend([
+        PG_FORMATS = (
-            {
+            ('a2t', 512, 288),
-                'url': PG_URL_TEMPLATE % ('a2m', video_urlpart),
+            ('a3t', 704, 400),
-                'format_id': 'pg-sd',
+            ('nettv', 1280, 720),
-            },
+        )
-            {
+
-                'url': PG_URL_TEMPLATE % ('a3m', video_urlpart),
+        def pg_format(format_id, width, height):
-                'format_id': 'pg-hd',
+            return {
-                'quality': 0,
+                'url': PG_URL_TEMPLATE % (format_id, video_urlpart),
                'format_id': 'pg-%s' % format_id,
                'protocol': 'http',
                'width': width,
                'height': height,
            }
-        ])
+
        if not formats:
            formats = [pg_format(*pg_tuple) for pg_tuple in PG_FORMATS]
        else:
            pg_formats = []
            for format_id, width, height in PG_FORMATS:
                try:
                    # Find hls format with the same width and height corresponding
                    # to progressive format and copy metadata from it.
                    f = next(f for f in formats if f.get('height') == height)
                    # hls formats may have invalid width
                    f['width'] = width
                    f_copy = f.copy()
                    f_copy.update(pg_format(format_id, width, height))
                    pg_formats.append(f_copy)
                except StopIteration:
                    # Missing hls format does mean that no progressive format with
                    # such width and height exists either.
                    pass
            formats.extend(pg_formats)
        self._sort_formats(formats)
        thumbnails = []
--- a/youtube_dl/extractor/tagesschau.py
+++ b/youtube_dl/extractor/tagesschau.py
@ -4,42 +4,178 @@ from __future__ import unicode_literals
 import re
 from .common import InfoExtractor
-from ..utils import parse_filesize
+from ..utils import (
    determine_ext,
    js_to_json,
    parse_iso8601,
    parse_filesize,
 )
 class TagesschauPlayerIE(InfoExtractor):
    IE_NAME = 'tagesschau:player'
    _VALID_URL = r'https?://(?:www\.)?tagesschau\.de/multimedia/(?P<kind>audio|video)/(?P=kind)-(?P<id>\d+)~player(?:_[^/?#&]+)?\.html'
    _TESTS = [{
        'url': 'http://www.tagesschau.de/multimedia/video/video-179517~player.html',
        'md5': '8d09548d5c15debad38bee3a4d15ca21',
        'info_dict': {
            'id': '179517',
            'ext': 'mp4',
            'title': 'Marie Kristin Boese, ARD Berlin, über den zukünftigen Kurs der AfD',
            'thumbnail': 're:^https?:.*\.jpg$',
            'formats': 'mincount:6',
        },
    }, {
        'url': 'https://www.tagesschau.de/multimedia/audio/audio-29417~player.html',
        'md5': '76e6eec6ebd40740671cf0a2c88617e5',
        'info_dict': {
            'id': '29417',
            'ext': 'mp3',
            'title': 'Trabi - Bye, bye Rennpappe',
            'thumbnail': 're:^https?:.*\.jpg$',
            'formats': 'mincount:2',
        },
    }, {
        'url': 'http://www.tagesschau.de/multimedia/audio/audio-29417~player_autoplay-true.html',
        'only_matching': True,
    }]
    _FORMATS = {
        'xs': {'quality': 0},
        's': {'width': 320, 'height': 180, 'quality': 1},
        'm': {'width': 512, 'height': 288, 'quality': 2},
        'l': {'width': 960, 'height': 540, 'quality': 3},
        'xl': {'width': 1280, 'height': 720, 'quality': 4},
        'xxl': {'quality': 5},
    }
    def _extract_via_api(self, kind, video_id):
        info = self._download_json(
            'https://www.tagesschau.de/api/multimedia/{0}/{0}-{1}.json'.format(kind, video_id),
            video_id)
        title = info['headline']
        formats = []
        for media in info['mediadata']:
            for format_id, format_url in media.items():
                if determine_ext(format_url) == 'm3u8':
                    formats.extend(self._extract_m3u8_formats(
                        format_url, video_id, 'mp4',
                        entry_protocol='m3u8_native', m3u8_id='hls'))
                else:
                    formats.append({
                        'url': format_url,
                        'format_id': format_id,
                        'vcodec': 'none' if kind == 'audio' else None,
                    })
        self._sort_formats(formats)
        timestamp = parse_iso8601(info.get('date'))
        return {
            'id': video_id,
            'title': title,
            'timestamp': timestamp,
            'formats': formats,
        }
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group('id')
        # kind = mobj.group('kind').lower()
        # if kind == 'video':
        #     return self._extract_via_api(kind, video_id)
        # JSON api does not provide some audio formats (e.g. ogg) thus
        # extractiong audio via webpage
        webpage = self._download_webpage(url, video_id)
        title = self._og_search_title(webpage).strip()
        formats = []
        for media_json in re.findall(r'({src\s*:\s*["\']http[^}]+type\s*:[^}]+})', webpage):
            media = self._parse_json(js_to_json(media_json), video_id, fatal=False)
            if not media:
                continue
            src = media.get('src')
            if not src:
                return
            quality = media.get('quality')
            kind = media.get('type', '').split('/')[0]
            ext = determine_ext(src)
            f = {
                'url': src,
                'format_id': '%s_%s' % (quality, ext) if quality else ext,
                'ext': ext,
                'vcodec': 'none' if kind == 'audio' else None,
            }
            f.update(self._FORMATS.get(quality, {}))
            formats.append(f)
        self._sort_formats(formats)
        thumbnail = self._og_search_thumbnail(webpage)
        return {
            'id': video_id,
            'title': title,
            'thumbnail': thumbnail,
            'formats': formats,
        }
 class TagesschauIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?tagesschau\.de/multimedia/(?:[^/]+/)*?[^/#?]+?(?P<id>-?[0-9]+)(?:~_[^/#?]+?)?\.html'
+    _VALID_URL = r'https?://(?:www\.)?tagesschau\.de/(?P<path>[^/]+/(?:[^/]+/)*?(?P<id>[^/#?]+?(?:-?[0-9]+)?))(?:~_?[^/#?]+?)?\.html'
    _TESTS = [{
        'url': 'http://www.tagesschau.de/multimedia/video/video-102143.html',
-        'md5': '917a228bc7df7850783bc47979673a09',
+        'md5': 'f7c27a0eff3bfe8c7727e65f8fe1b1e6',
        'info_dict': {
-            'id': '102143',
+            'id': 'video-102143',
            'ext': 'mp4',
            'title': 'Regierungsumbildung in Athen: Neue Minister in Griechenland vereidigt',
-            'description': 'md5:171feccd9d9b3dd54d05d501568f6359',
+            'description': '18.07.2015 20:10 Uhr',
            'thumbnail': 're:^https?:.*\.jpg$',
        },
    }, {
        'url': 'http://www.tagesschau.de/multimedia/sendung/ts-5727.html',
        'md5': '3c54c1f6243d279b706bde660ceec633',
        'info_dict': {
-            'id': '5727',
+            'id': 'ts-5727',
            'ext': 'mp4',
            'description': 'md5:695c01bfd98b7e313c501386327aea59',
            'title': 'Sendung: tagesschau \t04.12.2014 20:00 Uhr',
            'description': 'md5:695c01bfd98b7e313c501386327aea59',
            'thumbnail': 're:^https?:.*\.jpg$',
        },
    }, {
-        'url': 'http://www.tagesschau.de/multimedia/politikimradio/audio-18407.html',
+        # exclusive audio
-        'md5': 'aef45de271c4bf0a5db834aa40bf774c',
+        'url': 'http://www.tagesschau.de/multimedia/audio/audio-29417.html',
        'md5': '76e6eec6ebd40740671cf0a2c88617e5',
        'info_dict': {
-            'id': '18407',
+            'id': 'audio-29417',
            'ext': 'mp3',
-            'title': 'Flüchtlingsdebatte: Hitzig, aber wenig hilfreich',
+            'title': 'Trabi - Bye, bye Rennpappe',
-            'description': 'Flüchtlingsdebatte: Hitzig, aber wenig hilfreich',
+            'description': 'md5:8687dda862cbbe2cfb2df09b56341317',
            'thumbnail': 're:^https?:.*\.jpg$',
        },
    }, {
        # audio in article
        'url': 'http://www.tagesschau.de/inland/bnd-303.html',
        'md5': 'e0916c623e85fc1d2b26b78f299d3958',
        'info_dict': {
            'id': 'bnd-303',
            'ext': 'mp3',
            'title': 'Viele Baustellen für neuen BND-Chef',
            'description': 'md5:1e69a54be3e1255b2b07cdbce5bcd8b4',
            'thumbnail': 're:^https?:.*\.jpg$',
        },
    }, {
        'url': 'http://www.tagesschau.de/inland/afd-parteitag-135.html',
        'info_dict': {
            'id': 'afd-parteitag-135',
            'title': 'Möchtegern-Underdog mit Machtanspruch',
        },
        'playlist_count': 2,
    }, {
        'url': 'http://www.tagesschau.de/multimedia/sendung/tsg-3771.html',
        'only_matching': True,
@ -61,88 +197,108 @@ class TagesschauIE(InfoExtractor):
    }, {
        'url': 'http://www.tagesschau.de/multimedia/video/video-102303~_bab-sendung-211.html',
        'only_matching': True,
    }, {
        'url': 'http://www.tagesschau.de/100sekunden/index.html',
        'only_matching': True,
    }, {
        # playlist article with collapsing sections
        'url': 'http://www.tagesschau.de/wirtschaft/faq-freihandelszone-eu-usa-101.html',
        'only_matching': True,
    }]
-    _FORMATS = {
+    @classmethod
-        's': {'width': 256, 'height': 144, 'quality': 1},
+    def suitable(cls, url):
-        'm': {'width': 512, 'height': 288, 'quality': 2},
+        return False if TagesschauPlayerIE.suitable(url) else super(TagesschauIE, cls).suitable(url)
-        'l': {'width': 960, 'height': 544, 'quality': 3},
+
-    }
+    def _extract_formats(self, download_text, media_kind):
        links = re.finditer(
            r'<div class="button" title="(?P<title>[^"]*)"><a href="(?P<url>[^"]+)">(?P<name>.+?)</a></div>',
            download_text)
        formats = []
        for l in links:
            link_url = l.group('url')
            if not link_url:
                continue
            format_id = self._search_regex(
                r'.*/[^/.]+\.([^/]+)\.[^/.]+$', link_url, 'format ID',
                default=determine_ext(link_url))
            format = {
                'format_id': format_id,
                'url': l.group('url'),
                'format_name': l.group('name'),
            }
            title = l.group('title')
            if title:
                if media_kind.lower() == 'video':
                    m = re.match(
                        r'''(?x)
                            Video:\s*(?P<vcodec>[a-zA-Z0-9/._-]+)\s*&\#10;
                            (?P<width>[0-9]+)x(?P<height>[0-9]+)px&\#10;
                            (?P<vbr>[0-9]+)kbps&\#10;
                            Audio:\s*(?P<abr>[0-9]+)kbps,\s*(?P<audio_desc>[A-Za-z\.0-9]+)&\#10;
                            Gr&ouml;&szlig;e:\s*(?P<filesize_approx>[0-9.,]+\s+[a-zA-Z]*B)''',
                        title)
                    if m:
                        format.update({
                            'format_note': m.group('audio_desc'),
                            'vcodec': m.group('vcodec'),
                            'width': int(m.group('width')),
                            'height': int(m.group('height')),
                            'abr': int(m.group('abr')),
                            'vbr': int(m.group('vbr')),
                            'filesize_approx': parse_filesize(m.group('filesize_approx')),
                        })
                else:
                    m = re.match(
                        r'(?P<format>.+?)-Format\s*:\s*(?P<abr>\d+)kbps\s*,\s*(?P<note>.+)',
                        title)
                    if m:
                        format.update({
                            'format_note': '%s, %s' % (m.group('format'), m.group('note')),
                            'vcodec': 'none',
                            'abr': int(m.group('abr')),
                        })
            formats.append(format)
        self._sort_formats(formats)
        return formats
    def _real_extract(self, url):
-        video_id = self._match_id(url)
+        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group('id') or mobj.group('path')
        display_id = video_id.lstrip('-')
        webpage = self._download_webpage(url, display_id)
-        player_url = self._html_search_meta(
+        title = self._html_search_regex(
-            'twitter:player', webpage, 'player URL', default=None)
+            r'<span[^>]*class="headline"[^>]*>(.+?)</span>',
-        if player_url:
+            webpage, 'title', default=None) or self._og_search_title(webpage)
            playerpage = self._download_webpage(
                player_url, display_id, 'Downloading player page')
-            formats = []
+        DOWNLOAD_REGEX = r'(?s)<p>Wir bieten dieses (?P<kind>Video|Audio) in folgenden Formaten zum Download an:</p>\s*<div class="controls">(?P<links>.*?)</div>\s*<p>'
-            for media in re.finditer(
+
-                    r'''(?x)
+        webpage_type = self._og_search_property('type', webpage, default=None)
-                        (?P<q_url>["\'])(?P<url>http://media.+?)(?P=q_url)
+        if webpage_type == 'website':  # Article
-                        ,\s*type:(?P<q_type>["\'])(?P<type>video|audio)/(?P<ext>.+?)(?P=q_type)
+            entries = []
-                        (?:,\s*quality:(?P<q_quality>["\'])(?P<quality>.+?)(?P=q_quality))?
+            for num, (entry_title, media_kind, download_text) in enumerate(re.findall(
-                    ''', playerpage):
+                    r'(?s)<p[^>]+class="infotext"[^>]*>\s*(?:<a[^>]+>)?\s*<strong>(.+?)</strong>.*?</p>.*?%s' % DOWNLOAD_REGEX,
-                url = media.group('url')
+                    webpage), 1):
-                type_ = media.group('type')
+                entries.append({
-                ext = media.group('ext')
+                    'id': '%s-%d' % (display_id, num),
-                res = media.group('quality')
+                    'title': '%s' % entry_title,
-                f = {
+                    'formats': self._extract_formats(download_text, media_kind),
-                    'format_id': '%s_%s' % (res, ext) if res else ext,
+                })
-                    'url': url,
+            if len(entries) > 1:
-                    'ext': ext,
+                return self.playlist_result(entries, display_id, title)
-                    'vcodec': 'none' if type_ == 'audio' else None,
+            formats = entries[0]['formats']
-                }
+        else:  # Assume single video
                f.update(self._FORMATS.get(res, {}))
                formats.append(f)
            thumbnail = self._og_search_thumbnail(playerpage)
            title = self._og_search_title(webpage).strip()
            description = self._og_search_description(webpage).strip()
        else:
            download_text = self._search_regex(
-                r'(?s)<p>Wir bieten dieses Video in folgenden Formaten zum Download an:</p>\s*<div class="controls">(.*?)</div>\s*<p>',
+                DOWNLOAD_REGEX, webpage, 'download links', group='links')
-                webpage, 'download links')
+            media_kind = self._search_regex(
-            links = re.finditer(
+                DOWNLOAD_REGEX, webpage, 'media kind', default='Video', group='kind')
-                r'<div class="button" title="(?P<title>[^"]*)"><a href="(?P<url>[^"]+)">(?P<name>.+?)</a></div>',
+            formats = self._extract_formats(download_text, media_kind)
-                download_text)
+        thumbnail = self._og_search_thumbnail(webpage)
-            formats = []
+        description = self._html_search_regex(
-            for l in links:
+            r'(?s)<p class="teasertext">(.*?)</p>',
-                format_id = self._search_regex(
+            webpage, 'description', default=None)
                    r'.*/[^/.]+\.([^/]+)\.[^/.]+', l.group('url'), 'format ID')
                format = {
                    'format_id': format_id,
                    'url': l.group('url'),
                    'format_name': l.group('name'),
                }
                m = re.match(
                    r'''(?x)
                        Video:\s*(?P<vcodec>[a-zA-Z0-9/._-]+)\s*&\#10;
                        (?P<width>[0-9]+)x(?P<height>[0-9]+)px&\#10;
                        (?P<vbr>[0-9]+)kbps&\#10;
                        Audio:\s*(?P<abr>[0-9]+)kbps,\s*(?P<audio_desc>[A-Za-z\.0-9]+)&\#10;
                        Gr&ouml;&szlig;e:\s*(?P<filesize_approx>[0-9.,]+\s+[a-zA-Z]*B)''',
                    l.group('title'))
                if m:
                    format.update({
                        'format_note': m.group('audio_desc'),
                        'vcodec': m.group('vcodec'),
                        'width': int(m.group('width')),
                        'height': int(m.group('height')),
                        'abr': int(m.group('abr')),
                        'vbr': int(m.group('vbr')),
                        'filesize_approx': parse_filesize(m.group('filesize_approx')),
                    })
                formats.append(format)
            thumbnail = self._og_search_thumbnail(webpage)
            description = self._html_search_regex(
                r'(?s)<p class="teasertext">(.*?)</p>',
                webpage, 'description', default=None)
            title = self._html_search_regex(
                r'<span class="headline".*?>(.*?)</span>', webpage, 'title')
        self._sort_formats(formats)
--- a/youtube_dl/extractor/ted.py
+++ b/youtube_dl/extractor/ted.py
@ -27,7 +27,7 @@ class TEDIE(InfoExtractor):
        '''
    _TESTS = [{
        'url': 'http://www.ted.com/talks/dan_dennett_on_our_consciousness.html',
-        'md5': 'fc94ac279feebbce69f21c0c6ee82810',
+        'md5': '0de43ac406aa3e4ea74b66c9c7789b13',
        'info_dict': {
            'id': '102',
            'ext': 'mp4',
@ -37,21 +37,26 @@ class TEDIE(InfoExtractor):
                            'consciousness, but that half the time our brains are '
                            'actively fooling us.'),
            'uploader': 'Dan Dennett',
-            'width': 854,
+            'width': 853,
            'duration': 1308,
        }
    }, {
        'url': 'http://www.ted.com/watch/ted-institute/ted-bcg/vishal-sikka-the-beauty-and-power-of-algorithms',
-        'md5': '226f4fb9c62380d11b7995efa4c87994',
+        'md5': 'b899ac15e345fb39534d913f7606082b',
        'info_dict': {
-            'id': 'vishal-sikka-the-beauty-and-power-of-algorithms',
+            'id': 'tSVI8ta_P4w',
            'ext': 'mp4',
            'title': 'Vishal Sikka: The beauty and power of algorithms',
            'thumbnail': 're:^https?://.+\.jpg',
-            'description': 'Adaptive, intelligent, and consistent, algorithms are emerging as the ultimate app for everything from matching consumers to products to assessing medical diagnoses. Vishal Sikka shares his appreciation for the algorithm, charting both its inherent beauty and its growing power.',
+            'description': 'md5:6261fdfe3e02f4f579cbbfc00aff73f4',
-        }
+            'upload_date': '20140122',
            'uploader_id': 'TEDInstitute',
            'uploader': 'TED Institute',
        },
        'add_ie': ['Youtube'],
    }, {
        'url': 'http://www.ted.com/talks/gabby_giffords_and_mark_kelly_be_passionate_be_courageous_be_your_best',
        'md5': '71b3ab2f4233012dce09d515c9c39ce2',
        'info_dict': {
            'id': '1972',
            'ext': 'mp4',
@ -102,9 +107,9 @@ class TEDIE(InfoExtractor):
    }]
    _NATIVE_FORMATS = {
-        'low': {'preference': 1, 'width': 320, 'height': 180},
+        'low': {'width': 320, 'height': 180},
-        'medium': {'preference': 2, 'width': 512, 'height': 288},
+        'medium': {'width': 512, 'height': 288},
-        'high': {'preference': 3, 'width': 854, 'height': 480},
+        'high': {'width': 854, 'height': 480},
    }
    def _extract_info(self, webpage):
@ -171,15 +176,21 @@ class TEDIE(InfoExtractor):
                if finfo:
                    f.update(finfo)
        http_url = None
        for format_id, resources in talk_info['resources'].items():
            if format_id == 'h264':
                for resource in resources:
                    h264_url = resource.get('file')
                    if not h264_url:
                        continue
                    bitrate = int_or_none(resource.get('bitrate'))
                    formats.append({
-                        'url': resource['file'],
+                        'url': h264_url,
                        'format_id': '%s-%sk' % (format_id, bitrate),
                        'tbr': bitrate,
                    })
                    if re.search('\d+k', h264_url):
                        http_url = h264_url
            elif format_id == 'rtmp':
                streamer = talk_info.get('streamer')
                if not streamer:
@ -195,16 +206,24 @@ class TEDIE(InfoExtractor):
                        'tbr': int_or_none(resource.get('bitrate')),
                    })
            elif format_id == 'hls':
-                hls_formats = self._extract_m3u8_formats(
+                formats.extend(self._extract_m3u8_formats(
-                    resources.get('stream'), video_name, 'mp4', m3u8_id=format_id)
+                    resources.get('stream'), video_name, 'mp4', m3u8_id=format_id, fatal=False))
-                for f in hls_formats:
+
-                    if f.get('format_id') == 'hls-meta':
+        m3u8_formats = list(filter(
-                        continue
+            lambda f: f.get('protocol') == 'm3u8' and f.get('vcodec') != 'none' and f.get('resolution') != 'multiple',
-                    if not f.get('height'):
+            formats))
-                        f['vcodec'] = 'none'
+        if http_url:
-                    else:
+            for m3u8_format in m3u8_formats:
-                        f['acodec'] = 'none'
+                bitrate = self._search_regex(r'(\d+k)', m3u8_format['url'], 'bitrate', default=None)
-                formats.extend(hls_formats)
+                if not bitrate:
                    continue
                f = m3u8_format.copy()
                f.update({
                    'url': re.sub(r'\d+k', bitrate, http_url),
                    'format_id': m3u8_format['format_id'].replace('hls', 'http'),
                    'protocol': 'http',
                })
                formats.append(f)
        audio_download = talk_info.get('audioDownload')
        if audio_download:
@ -212,7 +231,6 @@ class TEDIE(InfoExtractor):
                'url': audio_download,
                'format_id': 'audio',
                'vcodec': 'none',
                'preference': -0.5,
            })
        self._sort_formats(formats)
@ -254,7 +272,11 @@ class TEDIE(InfoExtractor):
        config_json = self._html_search_regex(
            r'"pages\.jwplayer"\s*,\s*({.+?})\s*\)\s*</script>',
-            webpage, 'config')
+            webpage, 'config', default=None)
        if not config_json:
            embed_url = self._search_regex(
                r"<iframe[^>]+class='pages-video-embed__video__object'[^>]+src='([^']+)'", webpage, 'embed url')
            return self.url_result(self._proto_relative_url(embed_url))
        config = json.loads(config_json)['config']
        video_url = config['video']['url']
        thumbnail = config.get('image', {}).get('url')
--- a/youtube_dl/extractor/vevo.py
+++ b/youtube_dl/extractor/vevo.py
@ -3,7 +3,10 @@ from __future__ import unicode_literals
 import re
 from .common import InfoExtractor
-from ..compat import compat_etree_fromstring
+from ..compat import (
    compat_etree_fromstring,
    compat_urlparse,
 )
 from ..utils import (
    ExtractorError,
    int_or_none,
@ -12,13 +15,22 @@ from ..utils import (
 )
-class VevoIE(InfoExtractor):
+class VevoBaseIE(InfoExtractor):
    def _extract_json(self, webpage, video_id, item):
        return self._parse_json(
            self._search_regex(
                r'window\.__INITIAL_STORE__\s*=\s*({.+?});\s*</script>',
                webpage, 'initial store'),
            video_id)['default'][item]
 class VevoIE(VevoBaseIE):
    '''
    Accepts urls from vevo.com or in the format 'vevo:{id}'
    (currently used by MTVIE and MySpaceIE)
    '''
    _VALID_URL = r'''(?x)
-        (?:https?://www\.vevo\.com/watch/(?:[^/]+/(?:[^/]+/)?)?|
+        (?:https?://www\.vevo\.com/watch/(?!playlist|genre)(?:[^/]+/(?:[^/]+/)?)?|
           https?://cache\.vevo\.com/m/html/embed\.html\?video=|
           https?://videoplayer\.vevo\.com/embed/embedded\?videoId=|
           vevo:)
@ -30,11 +42,15 @@ class VevoIE(InfoExtractor):
        'info_dict': {
            'id': 'GB1101300280',
            'ext': 'mp4',
-            'title': 'Somebody to Die For',
+            'title': 'Hurts - Somebody to Die For',
            'timestamp': 1372057200,
            'upload_date': '20130624',
            'uploader': 'Hurts',
-            'timestamp': 1372057200,
+            'track': 'Somebody to Die For',
            'artist': 'Hurts',
            'genre': 'Pop',
        },
        'expected_warnings': ['Unable to download SMIL file'],
    }, {
        'note': 'v3 SMIL format',
        'url': 'http://www.vevo.com/watch/cassadee-pope/i-wish-i-could-break-your-heart/USUV71302923',
@ -42,23 +58,31 @@ class VevoIE(InfoExtractor):
        'info_dict': {
            'id': 'USUV71302923',
            'ext': 'mp4',
-            'title': 'I Wish I Could Break Your Heart',
+            'title': 'Cassadee Pope - I Wish I Could Break Your Heart',
            'timestamp': 1392796919,
            'upload_date': '20140219',
            'uploader': 'Cassadee Pope',
-            'timestamp': 1392796919,
+            'track': 'I Wish I Could Break Your Heart',
            'artist': 'Cassadee Pope',
            'genre': 'Country',
        },
        'expected_warnings': ['Unable to download SMIL file'],
    }, {
        'note': 'Age-limited video',
        'url': 'https://www.vevo.com/watch/justin-timberlake/tunnel-vision-explicit/USRV81300282',
        'info_dict': {
            'id': 'USRV81300282',
            'ext': 'mp4',
-            'title': 'Tunnel Vision (Explicit)',
+            'title': 'Justin Timberlake - Tunnel Vision (Explicit)',
            'upload_date': '20130703',
            'age_limit': 18,
            'uploader': 'Justin Timberlake',
            'timestamp': 1372888800,
            'upload_date': '20130703',
            'uploader': 'Justin Timberlake',
            'track': 'Tunnel Vision (Explicit)',
            'artist': 'Justin Timberlake',
            'genre': 'Pop',
        },
        'expected_warnings': ['Unable to download SMIL file'],
    }, {
        'note': 'No video_info',
        'url': 'http://www.vevo.com/watch/k-camp-1/Till-I-Die/USUV71503000',
@ -66,12 +90,32 @@ class VevoIE(InfoExtractor):
        'info_dict': {
            'id': 'USUV71503000',
            'ext': 'mp4',
-            'title': 'Till I Die',
+            'title': 'K Camp - Till I Die',
            'upload_date': '20151207',
            'age_limit': 18,
            'uploader': 'K Camp',
            'timestamp': 1449468000,
            'upload_date': '20151207',
            'uploader': 'K Camp',
            'track': 'Till I Die',
            'artist': 'K Camp',
            'genre': 'Rap/Hip-Hop',
        },
    }, {
        'note': 'Only available via webpage',
        'url': 'http://www.vevo.com/watch/GBUV71600656',
        'md5': '67e79210613865b66a47c33baa5e37fe',
        'info_dict': {
            'id': 'GBUV71600656',
            'ext': 'mp4',
            'title': 'ABC - Viva Love',
            'age_limit': 0,
            'timestamp': 1461830400,
            'upload_date': '20160428',
            'uploader': 'ABC',
            'track': 'Viva Love',
            'artist': 'ABC',
            'genre': 'Pop',
        },
        'expected_warnings': ['Failed to download video versions info'],
    }]
    _SMIL_BASE_URL = 'http://smil.lvl3.vevo.com'
    _SOURCE_TYPES = {
@ -146,8 +190,8 @@ class VevoIE(InfoExtractor):
        auth_info = self._parse_json(webpage, video_id)
        self._api_url_template = self.http_scheme() + '//apiv2.vevo.com/%s?token=' + auth_info['access_token']
-    def _call_api(self, path, video_id, note, errnote, fatal=True):
+    def _call_api(self, path, *args, **kwargs):
-        return self._download_json(self._api_url_template % path, video_id, note, errnote)
+        return self._download_json(self._api_url_template % path, *args, **kwargs)
    def _real_extract(self, url):
        video_id = self._match_id(url)
@ -157,9 +201,11 @@ class VevoIE(InfoExtractor):
            json_url, video_id, 'Downloading video info', 'Unable to download info')
        video_info = response.get('video') or {}
        video_versions = video_info.get('videoVersions')
        artist = None
        featured_artist = None
        uploader = None
        timestamp = None
        view_count = None
        timestamp = None
        formats = []
        if not video_info:
@ -183,12 +229,19 @@ class VevoIE(InfoExtractor):
            video_versions = self._call_api(
                'video/%s/streams' % video_id, video_id,
                'Downloading video versions info',
-                'Failed to download video versions info')
+                'Failed to download video versions info',
                fatal=False)
            # Some videos are only available via webpage (e.g.
            # https://github.com/rg3/youtube-dl/issues/9366)
            if not video_versions:
                webpage = self._download_webpage(url, video_id)
                video_versions = self._extract_json(webpage, video_id, 'streams')[video_id][0]
            timestamp = parse_iso8601(video_info.get('releaseDate'))
            artists = video_info.get('artists')
            if artists:
-                uploader = artists[0]['name']
+                artist = uploader = artists[0]['name']
            view_count = int_or_none(video_info.get('views', {}).get('total'))
            for video_version in video_versions:
@ -241,7 +294,11 @@ class VevoIE(InfoExtractor):
                scale=1000)
            artists = video_info.get('mainArtists')
            if artists:
-                uploader = artists[0]['artistName']
+                artist = uploader = artists[0]['artistName']
            featured_artists = video_info.get('featuredArtists')
            if featured_artists:
                featured_artist = featured_artists[0]['artistName']
            smil_parsed = False
            for video_version in video_info['videoVersions']:
@ -278,7 +335,11 @@ class VevoIE(InfoExtractor):
                        smil_parsed = True
        self._sort_formats(formats)
-        title = video_info['title']
+        track = video_info['title']
        if featured_artist:
            artist = '%s ft. %s' % (artist, featured_artist)
        title = '%s - %s' % (artist, track) if artist else track
        genre = video_info.get('genres', [None])[0]
        is_explicit = video_info.get('isExplicit')
        if is_explicit is True:
@ -300,4 +361,75 @@ class VevoIE(InfoExtractor):
            'duration': duration,
            'view_count': view_count,
            'age_limit': age_limit,
            'track': track,
            'artist': uploader,
            'genre': genre,
        }
 class VevoPlaylistIE(VevoBaseIE):
    _VALID_URL = r'https?://www\.vevo\.com/watch/(?P<kind>playlist|genre)/(?P<id>[^/?#&]+)'
    _TESTS = [{
        'url': 'http://www.vevo.com/watch/playlist/dadbf4e7-b99f-4184-9670-6f0e547b6a29',
        'info_dict': {
            'id': 'dadbf4e7-b99f-4184-9670-6f0e547b6a29',
            'title': 'Best-Of: Birdman',
        },
        'playlist_count': 10,
    }, {
        'url': 'http://www.vevo.com/watch/genre/rock',
        'info_dict': {
            'id': 'rock',
            'title': 'Rock',
        },
        'playlist_count': 20,
    }, {
        'url': 'http://www.vevo.com/watch/playlist/dadbf4e7-b99f-4184-9670-6f0e547b6a29?index=0',
        'md5': '32dcdfddddf9ec6917fc88ca26d36282',
        'info_dict': {
            'id': 'USCMV1100073',
            'ext': 'mp4',
            'title': 'Birdman - Y.U. MAD',
            'timestamp': 1323417600,
            'upload_date': '20111209',
            'uploader': 'Birdman',
            'track': 'Y.U. MAD',
            'artist': 'Birdman',
            'genre': 'Rap/Hip-Hop',
        },
        'expected_warnings': ['Unable to download SMIL file'],
    }, {
        'url': 'http://www.vevo.com/watch/genre/rock?index=0',
        'only_matching': True,
    }]
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        playlist_id = mobj.group('id')
        playlist_kind = mobj.group('kind')
        webpage = self._download_webpage(url, playlist_id)
        qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
        index = qs.get('index', [None])[0]
        if index:
            video_id = self._search_regex(
                r'<meta[^>]+content=(["\'])vevo://video/(?P<id>.+?)\1[^>]*>',
                webpage, 'video id', default=None, group='id')
            if video_id:
                return self.url_result('vevo:%s' % video_id, VevoIE.ie_key())
        playlists = self._extract_json(webpage, playlist_id, '%ss' % playlist_kind)
        playlist = (list(playlists.values())[0]
                    if playlist_kind == 'playlist' else playlists[playlist_id])
        entries = [
            self.url_result('vevo:%s' % src, VevoIE.ie_key())
            for src in playlist['isrcs']]
        return self.playlist_result(
            entries, playlist.get('playlistId') or playlist_id,
            playlist.get('name'), playlist.get('description'))
--- a/youtube_dl/extractor/vlive.py
+++ b/youtube_dl/extractor/vlive.py
@ -43,7 +43,7 @@ class VLiveIE(InfoExtractor):
        status_params = self._download_json(
            'http://www.vlive.tv/video/status?videoSeq=%s' % video_id,
            video_id, 'Downloading JSON status',
-            headers={'Referer': url})
+            headers={'Referer': url.encode('utf-8')})
        status = status_params.get('status')
        air_start = status_params.get('onAirStartAt', '')
        is_live = status_params.get('isLive')
--- a/youtube_dl/extractor/wsj.py
+++ b/youtube_dl/extractor/wsj.py
@ -4,16 +4,22 @@ from __future__ import unicode_literals
 from .common import InfoExtractor
 from ..utils import (
    int_or_none,
    float_or_none,
    unified_strdate,
 )
 class WSJIE(InfoExtractor):
-    _VALID_URL = r'https?://video-api\.wsj\.com/api-video/player/iframe\.html\?guid=(?P<id>[a-zA-Z0-9-]+)'
+    _VALID_URL = r'''(?x)https?://
        (?:
            video-api\.wsj\.com/api-video/player/iframe\.html\?guid=|
            (?:www\.)?wsj\.com/video/[^/]+/
        )
        (?P<id>[a-zA-Z0-9-]+)'''
    IE_DESC = 'Wall Street Journal'
-    _TEST = {
+    _TESTS = [{
        'url': 'http://video-api.wsj.com/api-video/player/iframe.html?guid=1BD01A4C-BFE8-40A5-A42F-8A8AF9898B1A',
-        'md5': '9747d7a6ebc2f4df64b981e1dde9efa9',
+        'md5': 'e230a5bb249075e40793b655a54a02e4',
        'info_dict': {
            'id': '1BD01A4C-BFE8-40A5-A42F-8A8AF9898B1A',
            'ext': 'mp4',
@ -24,65 +30,60 @@ class WSJIE(InfoExtractor):
            'duration': 90,
            'title': 'Bills Coach Rex Ryan Updates His Old Jets Tattoo',
        },
-    }
+    }, {
        'url': 'http://www.wsj.com/video/can-alphabet-build-a-smarter-city/359DDAA8-9AC1-489C-82E6-0429C1E430E0.html',
        'only_matching': True,
    }]
    def _real_extract(self, url):
        video_id = self._match_id(url)
        bitrates = [128, 174, 264, 320, 464, 664, 1264]
        api_url = (
            'http://video-api.wsj.com/api-video/find_all_videos.asp?'
-            'type=guid&count=1&query=%s&'
+            'type=guid&count=1&query=%s&fields=type,hls,videoMP4List,'
-            'fields=hls,adZone,thumbnailList,guid,state,secondsUntilStartTime,'
+            'thumbnailList,author,description,name,duration,videoURL,'
-            'author,description,name,linkURL,videoStillURL,duration,videoURL,'
+            'titletag,formattedCreationDate,keywords,editor' % video_id)
            'adCategory,catastrophic,linkShortURL,doctypeID,youtubeID,'
            'titletag,rssURL,wsj-section,wsj-subsection,allthingsd-section,'
            'allthingsd-subsection,sm-section,sm-subsection,provider,'
            'formattedCreationDate,keywords,keywordsOmniture,column,editor,'
            'emailURL,emailPartnerID,showName,omnitureProgramName,'
            'omnitureVideoFormat,linkRelativeURL,touchCastID,'
            'omniturePublishDate,%s') % (
                video_id, ','.join('video%dkMP4Url' % br for br in bitrates))
        info = self._download_json(api_url, video_id)['items'][0]
        # Thumbnails are conveniently in the correct format already
        thumbnails = info.get('thumbnailList')
        creator = info.get('author')
        uploader_id = info.get('editor')
        categories = info.get('keywords')
        duration = int_or_none(info.get('duration'))
        upload_date = unified_strdate(
            info.get('formattedCreationDate'), day_first=False)
        title = info.get('name', info.get('titletag'))
-        formats = [{
+        formats = []
-            'format_id': 'f4m',
+
-            'format_note': 'f4m (meta URL)',
+        f4m_url = info.get('videoURL')
-            'url': info['videoURL'],
+        if f4m_url:
-        }]
+            formats.extend(self._extract_f4m_formats(
-        if info.get('hls'):
+                f4m_url, video_id, f4m_id='hds', fatal=False))
        m3u8_url = info.get('hls')
        if m3u8_url:
            formats.extend(self._extract_m3u8_formats(
                info['hls'], video_id, ext='mp4',
-                preference=0, entry_protocol='m3u8_native'))
+                entry_protocol='m3u8_native', m3u8_id='hls', fatal=False))
-        for br in bitrates:
+
-            field = 'video%dkMP4Url' % br
+        for v in info.get('videoMP4List', []):
-            if info.get(field):
+            mp4_url = v.get('url')
-                formats.append({
+            if not mp4_url:
-                    'format_id': 'mp4-%d' % br,
+                continue
-                    'container': 'mp4',
+            tbr = int_or_none(v.get('bitrate'))
-                    'tbr': br,
+            formats.append({
-                    'url': info[field],
+                'url': mp4_url,
-                })
+                'format_id': 'http' + ('-%d' % tbr if tbr else ''),
                'tbr': tbr,
                'width': int_or_none(v.get('width')),
                'height': int_or_none(v.get('height')),
                'fps': float_or_none(v.get('fps')),
            })
        self._sort_formats(formats)
        return {
            'id': video_id,
            'formats': formats,
-            'thumbnails': thumbnails,
+            # Thumbnails are conveniently in the correct format already
-            'creator': creator,
+            'thumbnails': info.get('thumbnailList'),
-            'uploader_id': uploader_id,
+            'creator': info.get('author'),
-            'duration': duration,
+            'uploader_id': info.get('editor'),
-            'upload_date': upload_date,
+            'duration': int_or_none(info.get('duration')),
            'upload_date': unified_strdate(info.get(
                'formattedCreationDate'), day_first=False),
            'title': title,
-            'categories': categories,
+            'categories': info.get('keywords'),
        }
--- a/youtube_dl/extractor/xiami.py
+++ b/youtube_dl/extractor/xiami.py
@ -0,0 +1,158 @@
 # coding: utf-8
 from __future__ import unicode_literals
 from .common import InfoExtractor
 from ..compat import compat_urllib_parse_unquote
 from ..utils import int_or_none
 class XiamiBaseIE(InfoExtractor):
    _API_BASE_URL = 'http://www.xiami.com/song/playlist/cat/json/id'
    def _extract_track(self, track, track_id=None):
        title = track['title']
        track_url = self._decrypt(track['location'])
        subtitles = {}
        lyrics_url = track.get('lyric_url') or track.get('lyric')
        if lyrics_url and lyrics_url.startswith('http'):
            subtitles['origin'] = [{'url': lyrics_url}]
        return {
            'id': track.get('song_id') or track_id,
            'url': track_url,
            'title': title,
            'thumbnail': track.get('pic') or track.get('album_pic'),
            'duration': int_or_none(track.get('length')),
            'creator': track.get('artist', '').split(';')[0],
            'track': title,
            'album': track.get('album_name'),
            'artist': track.get('artist'),
            'subtitles': subtitles,
        }
    def _extract_tracks(self, item_id, typ=None):
        playlist = self._download_json(
            '%s/%s%s' % (self._API_BASE_URL, item_id, '/type/%s' % typ if typ else ''), item_id)
        return [
            self._extract_track(track, item_id)
            for track in playlist['data']['trackList']]
    @staticmethod
    def _decrypt(origin):
        n = int(origin[0])
        origin = origin[1:]
        short_lenth = len(origin) // n
        long_num = len(origin) - short_lenth * n
        l = tuple()
        for i in range(0, n):
            length = short_lenth
            if i < long_num:
                length += 1
            l += (origin[0:length], )
            origin = origin[length:]
        ans = ''
        for i in range(0, short_lenth + 1):
            for j in range(0, n):
                if len(l[j]) > i:
                    ans += l[j][i]
        return compat_urllib_parse_unquote(ans).replace('^', '0')
 class XiamiSongIE(XiamiBaseIE):
    IE_NAME = 'xiami:song'
    IE_DESC = '虾米音乐'
    _VALID_URL = r'https?://(?:www\.)?xiami\.com/song/(?P<id>[0-9]+)'
    _TESTS = [{
        'url': 'http://www.xiami.com/song/1775610518',
        'md5': '521dd6bea40fd5c9c69f913c232cb57e',
        'info_dict': {
            'id': '1775610518',
            'ext': 'mp3',
            'title': 'Woman',
            'thumbnail': r're:http://img\.xiami\.net/images/album/.*\.jpg',
            'duration': 265,
            'creator': 'HONNE',
            'track': 'Woman',
            'album': 'Woman',
            'artist': 'HONNE',
            'subtitles': {
                'origin': [{
                    'ext': 'lrc',
                }],
            },
        }
    }, {
        'url': 'http://www.xiami.com/song/1775256504',
        'md5': '932a3abd45c6aa2b1fdbe028fcb4c4fc',
        'info_dict': {
            'id': '1775256504',
            'ext': 'mp3',
            'title': '悟空',
            'thumbnail': r're:http://img\.xiami\.net/images/album/.*\.jpg',
            'duration': 200,
            'creator': '戴荃',
            'track': '悟空',
            'album': '悟空',
            'artist': '戴荃',
            'subtitles': {
                'origin': [{
                    'ext': 'lrc',
                }],
            },
        }
    }]
    def _real_extract(self, url):
        return self._extract_tracks(self._match_id(url))[0]
 class XiamiPlaylistBaseIE(XiamiBaseIE):
    def _real_extract(self, url):
        item_id = self._match_id(url)
        return self.playlist_result(self._extract_tracks(item_id, self._TYPE), item_id)
 class XiamiAlbumIE(XiamiPlaylistBaseIE):
    IE_NAME = 'xiami:album'
    IE_DESC = '虾米音乐 - 专辑'
    _VALID_URL = r'https?://(?:www\.)?xiami\.com/album/(?P<id>[0-9]+)'
    _TYPE = '1'
    _TESTS = [{
        'url': 'http://www.xiami.com/album/2100300444',
        'info_dict': {
            'id': '2100300444',
        },
        'playlist_count': 10,
    }, {
        'url': 'http://www.xiami.com/album/512288?spm=a1z1s.6843761.1110925389.6.hhE9p9',
        'only_matching': True,
    }]
 class XiamiArtistIE(XiamiPlaylistBaseIE):
    IE_NAME = 'xiami:artist'
    IE_DESC = '虾米音乐 - 歌手'
    _VALID_URL = r'https?://(?:www\.)?xiami\.com/artist/(?P<id>[0-9]+)'
    _TYPE = '2'
    _TEST = {
        'url': 'http://www.xiami.com/artist/2132?spm=0.0.0.0.dKaScp',
        'info_dict': {
            'id': '2132',
        },
        'playlist_count': 20,
    }
 class XiamiCollectionIE(XiamiPlaylistBaseIE):
    IE_NAME = 'xiami:collection'
    IE_DESC = '虾米音乐 - 精选集'
    _VALID_URL = r'https?://(?:www\.)?xiami\.com/collect/(?P<id>[0-9]+)'
    _TYPE = '3'
    _TEST = {
        'url': 'http://www.xiami.com/collect/156527391?spm=a1z1s.2943601.6856193.12.4jpBnr',
        'info_dict': {
            'id': '156527391',
        },
        'playlist_mincount': 29,
    }
--- a/youtube_dl/postprocessor/ffmpeg.py
+++ b/youtube_dl/postprocessor/ffmpeg.py
@ -389,23 +389,30 @@ class FFmpegEmbedSubtitlePP(FFmpegPostProcessor):
 class FFmpegMetadataPP(FFmpegPostProcessor):
    def run(self, info):
        metadata = {}
-        if info.get('title') is not None:
+
-            metadata['title'] = info['title']
+        def add(meta_list, info_list=None):
-        if info.get('upload_date') is not None:
+            if not info_list:
-            metadata['date'] = info['upload_date']
+                info_list = meta_list
-        if info.get('artist') is not None:
+            if not isinstance(meta_list, (list, tuple)):
-            metadata['artist'] = info['artist']
+                meta_list = (meta_list,)
-        elif info.get('uploader') is not None:
+            if not isinstance(info_list, (list, tuple)):
-            metadata['artist'] = info['uploader']
+                info_list = (info_list,)
-        elif info.get('uploader_id') is not None:
+            for info_f in info_list:
-            metadata['artist'] = info['uploader_id']
+                if info.get(info_f) is not None:
-        if info.get('description') is not None:
+                    for meta_f in meta_list:
-            metadata['description'] = info['description']
+                        metadata[meta_f] = info[info_f]
-            metadata['comment'] = info['description']
+                    break
-        if info.get('webpage_url') is not None:
+
-            metadata['purl'] = info['webpage_url']
+        add('title', ('track', 'title'))
-        if info.get('album') is not None:
+        add('date', 'upload_date')
-            metadata['album'] = info['album']
+        add(('description', 'comment'), 'description')
        add('purl', 'webpage_url')
        add('track', 'track_number')
        add('artist', ('artist', 'creator', 'uploader', 'uploader_id'))
        add('genre')
        add('album')
        add('album_artist')
        add('disc', 'disc_number')
        if not metadata:
            self._downloader.to_screen('[ffmpeg] There isn\'t any metadata to add')
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@ -1,3 +1,3 @@
 from __future__ import unicode_literals
-__version__ = '2016.04.24'
+__version__ = '2016.05.01'
`@ -1,3 +1,3 @@`
	`from __future__ import unicode_literals`	`from __future__ import unicode_literals`

	`__version__ = '2016.04.24'`	`__version__ = '2016.05.01'`