Merge pull request #3 from rg3/master

update
2016-03-19 19:25:00 +05:30 · 2016-03-19 19:25:00 +05:30 · e58df5787a
commit e58df5787a
parent 73fd1ab30a 782b1b5bd1
27 changed files with 620 additions and 154 deletions
--- a/docs/supportedsites.md
+++ b/docs/supportedsites.md
@ -81,6 +81,7 @@
 - **BokeCC**
 - **Bpb**: Bundeszentrale für politische Bildung
 - **BR**: Bayerischer Rundfunk Mediathek
 - **BravoTV**
 - **Break**
 - **brightcove:legacy**
 - **brightcove:new**
@ -499,6 +500,7 @@
 - **Restudy**
 - **ReverbNation**
 - **Revision3**
 - **RICE**
 - **RingTV**
 - **RottenTomatoes**
 - **Roxwel**
@ -617,6 +619,7 @@
 - **ThePlatform**
 - **ThePlatformFeed**
 - **TheSixtyOne**
 - **TheStar**
 - **ThisAmericanLife**
 - **ThisAV**
 - **THVideo**
@ -650,6 +653,7 @@
 - **tv.dfb.de**
 - **TV2**
 - **TV2Article**
 - **TV3**
 - **TV4**: tv4.se and tv4play.se
 - **TVC**
 - **TVCArticle**
--- a/test/test_YoutubeDL.py
+++ b/test/test_YoutubeDL.py
@ -222,6 +222,11 @@ class TestFormatSelection(unittest.TestCase):
        downloaded = ydl.downloaded_info_dicts[0]
        self.assertEqual(downloaded['format_id'], 'dash-video-low')
        ydl = YDL({'format': 'bestvideo[format_id^=dash][format_id$=low]'})
        ydl.process_ie_result(info_dict.copy())
        downloaded = ydl.downloaded_info_dicts[0]
        self.assertEqual(downloaded['format_id'], 'dash-video-low')
        formats = [
            {'format_id': 'vid-vcodec-dot', 'ext': 'mp4', 'preference': 1, 'vcodec': 'avc1.123456', 'acodec': 'none', 'url': TEST_URL},
        ]
--- a/test/test_utils.py
+++ b/test/test_utils.py
@ -28,6 +28,7 @@ from youtube_dl.utils import (
    encodeFilename,
    escape_rfc3986,
    escape_url,
    extract_attributes,
    ExtractorError,
    find_xpath_attr,
    fix_xml_ampersands,
@ -77,6 +78,7 @@ from youtube_dl.utils import (
    cli_bool_option,
 )
 from youtube_dl.compat import (
    compat_chr,
    compat_etree_fromstring,
    compat_urlparse,
    compat_parse_qs,
@ -629,6 +631,44 @@ class TestUtil(unittest.TestCase):
        on = js_to_json('{"abc": "def",}')
        self.assertEqual(json.loads(on), {'abc': 'def'})
    def test_extract_attributes(self):
        self.assertEqual(extract_attributes('<e x="y">'), {'x': 'y'})
        self.assertEqual(extract_attributes("<e x='y'>"), {'x': 'y'})
        self.assertEqual(extract_attributes('<e x=y>'), {'x': 'y'})
        self.assertEqual(extract_attributes('<e x="a \'b\' c">'), {'x': "a 'b' c"})
        self.assertEqual(extract_attributes('<e x=\'a "b" c\'>'), {'x': 'a "b" c'})
        self.assertEqual(extract_attributes('<e x="&#121;">'), {'x': 'y'})
        self.assertEqual(extract_attributes('<e x="&#x79;">'), {'x': 'y'})
        self.assertEqual(extract_attributes('<e x="&amp;">'), {'x': '&'})  # XML
        self.assertEqual(extract_attributes('<e x="&quot;">'), {'x': '"'})
        self.assertEqual(extract_attributes('<e x="&pound;">'), {'x': '£'})  # HTML 3.2
        self.assertEqual(extract_attributes('<e x="&lambda;">'), {'x': 'λ'})  # HTML 4.0
        self.assertEqual(extract_attributes('<e x="&foo">'), {'x': '&foo'})
        self.assertEqual(extract_attributes('<e x="\'">'), {'x': "'"})
        self.assertEqual(extract_attributes('<e x=\'"\'>'), {'x': '"'})
        self.assertEqual(extract_attributes('<e x >'), {'x': None})
        self.assertEqual(extract_attributes('<e x=y a>'), {'x': 'y', 'a': None})
        self.assertEqual(extract_attributes('<e x= y>'), {'x': 'y'})
        self.assertEqual(extract_attributes('<e x=1 y=2 x=3>'), {'y': '2', 'x': '3'})
        self.assertEqual(extract_attributes('<e \nx=\ny\n>'), {'x': 'y'})
        self.assertEqual(extract_attributes('<e \nx=\n"y"\n>'), {'x': 'y'})
        self.assertEqual(extract_attributes("<e \nx=\n'y'\n>"), {'x': 'y'})
        self.assertEqual(extract_attributes('<e \nx="\ny\n">'), {'x': '\ny\n'})
        self.assertEqual(extract_attributes('<e CAPS=x>'), {'caps': 'x'})  # Names lowercased
        self.assertEqual(extract_attributes('<e x=1 X=2>'), {'x': '2'})
        self.assertEqual(extract_attributes('<e X=1 x=2>'), {'x': '2'})
        self.assertEqual(extract_attributes('<e _:funny-name1=1>'), {'_:funny-name1': '1'})
        self.assertEqual(extract_attributes('<e x="Fáilte 世界 \U0001f600">'), {'x': 'Fáilte 世界 \U0001f600'})
        self.assertEqual(extract_attributes('<e x="décompose&#769;">'), {'x': 'décompose\u0301'})
        # "Narrow" Python builds don't support unicode code points outside BMP.
        try:
            compat_chr(0x10000)
            supports_outside_bmp = True
        except ValueError:
            supports_outside_bmp = False
        if supports_outside_bmp:
            self.assertEqual(extract_attributes('<e x="Smile &#128512;!">'), {'x': 'Smile \U0001f600!'})
    def test_clean_html(self):
        self.assertEqual(clean_html('a:\nb'), 'a: b')
        self.assertEqual(clean_html('a:\n   "b"'), 'a:    "b"')
@ -662,6 +702,8 @@ class TestUtil(unittest.TestCase):
        self.assertEqual(parse_count('1.000'), 1000)
        self.assertEqual(parse_count('1.1k'), 1100)
        self.assertEqual(parse_count('1.1kk'), 1100000)
        self.assertEqual(parse_count('1.1kk '), 1100000)
        self.assertEqual(parse_count('1.1kk views'), 1100000)
    def test_version_tuple(self):
        self.assertEqual(version_tuple('1'), (1,))
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@ -905,7 +905,7 @@ class YoutubeDL(object):
                '*=': lambda attr, value: value in attr,
            }
            str_operator_rex = re.compile(r'''(?x)
-                \s*(?P<key>ext|acodec|vcodec|container|protocol)
+                \s*(?P<key>ext|acodec|vcodec|container|protocol|format_id)
                \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?
                \s*(?P<value>[a-zA-Z0-9._-]+)
                \s*$
--- a/youtube_dl/compat.py
+++ b/youtube_dl/compat.py
@ -77,6 +77,11 @@ try:
 except ImportError:  # Python 2
    from urllib import urlretrieve as compat_urlretrieve
 try:
    from html.parser import HTMLParser as compat_HTMLParser
 except ImportError:  # Python 2
    from HTMLParser import HTMLParser as compat_HTMLParser
 try:
    from subprocess import DEVNULL
@ -251,6 +256,16 @@ else:
                el.text = el.text.decode('utf-8')
        return doc
 if sys.version_info < (2, 7):
    # Here comes the crazy part: In 2.6, if the xpath is a unicode,
    # .//node does not match if a node is a direct child of . !
    def compat_xpath(xpath):
        if isinstance(xpath, compat_str):
            xpath = xpath.encode('ascii')
        return xpath
 else:
    compat_xpath = lambda xpath: xpath
 try:
    from urllib.parse import parse_qs as compat_parse_qs
 except ImportError:  # Python 2
@ -543,6 +558,7 @@ else:
    from tokenize import generate_tokens as compat_tokenize_tokenize
 __all__ = [
    'compat_HTMLParser',
    'compat_HTTPError',
    'compat_basestring',
    'compat_chr',
@ -579,6 +595,7 @@ __all__ = [
    'compat_urlparse',
    'compat_urlretrieve',
    'compat_xml_parse_error',
    'compat_xpath',
    'shlex_quote',
    'subprocess_check_output',
    'workaround_optparse_bug9161',
--- a/youtube_dl/extractor/init.py
+++ b/youtube_dl/extractor/init.py
@ -81,6 +81,7 @@ from .bloomberg import BloombergIE
 from .bokecc import BokeCCIE
 from .bpb import BpbIE
 from .br import BRIE
 from .bravotv import BravoTVIE
 from .breakcom import BreakIE
 from .brightcove import (
    BrightcoveLegacyIE,
@ -135,6 +136,7 @@ from .collegerama import CollegeRamaIE
 from .comedycentral import ComedyCentralIE, ComedyCentralShowsIE
 from .comcarcoff import ComCarCoffIE
 from .commonmistakes import CommonMistakesIE, UnicodeBOMIE
 from .commonprotocols import RtmpIE
 from .condenast import CondeNastIE
 from .cracked import CrackedIE
 from .crackle import CrackleIE
@ -282,6 +284,7 @@ from .goshgay import GoshgayIE
 from .gputechconf import GPUTechConfIE
 from .groupon import GrouponIE
 from .hark import HarkIE
 from .hbo import HBOIE
 from .hearthisat import HearThisAtIE
 from .heise import HeiseIE
 from .hellporno import HellPornoIE
@ -784,6 +787,7 @@ from .tv2 import (
    TV2IE,
    TV2ArticleIE,
 )
 from .tv3 import TV3IE
 from .tv4 import TV4IE
 from .tvc import (
    TVCIE,
--- a/youtube_dl/extractor/animeondemand.py
+++ b/youtube_dl/extractor/animeondemand.py
@ -18,7 +18,7 @@ class AnimeOnDemandIE(InfoExtractor):
    _LOGIN_URL = 'https://www.anime-on-demand.de/users/sign_in'
    _APPLY_HTML5_URL = 'https://www.anime-on-demand.de/html5apply'
    _NETRC_MACHINE = 'animeondemand'
-    _TEST = {
+    _TESTS = [{
        'url': 'https://www.anime-on-demand.de/anime/161',
        'info_dict': {
            'id': '161',
@ -26,7 +26,15 @@ class AnimeOnDemandIE(InfoExtractor):
            'description': 'md5:6681ce3c07c7189d255ac6ab23812d31',
        },
        'playlist_mincount': 4,
-    }
+    }, {
        # Film wording is used instead of Episode
        'url': 'https://www.anime-on-demand.de/anime/39',
        'only_matching': True,
    }, {
        # Episodes without titles
        'url': 'https://www.anime-on-demand.de/anime/162',
        'only_matching': True,
    }]
    def _login(self):
        (username, password) = self._get_login_info()
@ -91,14 +99,22 @@ class AnimeOnDemandIE(InfoExtractor):
        entries = []
-        for episode_html in re.findall(r'(?s)<h3[^>]+class="episodebox-title".+?>Episodeninhalt<', webpage):
+        for num, episode_html in enumerate(re.findall(
-            m = re.search(
+                r'(?s)<h3[^>]+class="episodebox-title".+?>Episodeninhalt<', webpage), 1):
-                r'class="episodebox-title"[^>]+title="Episode (?P<number>\d+) - (?P<title>.+?)"', episode_html)
+            episodebox_title = self._search_regex(
-            if not m:
+                (r'class="episodebox-title"[^>]+title=(["\'])(?P<title>.+?)\1',
                 r'class="episodebox-title"[^>]+>(?P<title>.+?)<'),
                episode_html, 'episodebox title', default=None, group='title')
            if not episodebox_title:
                continue
-            episode_number = int(m.group('number'))
+            episode_number = int(self._search_regex(
-            episode_title = m.group('title')
+                r'(?:Episode|Film)\s*(\d+)',
                episodebox_title, 'episode number', default=num))
            episode_title = self._search_regex(
                r'(?:Episode|Film)\s*\d+\s*-\s*(.+)',
                episodebox_title, 'episode title', default=None)
            video_id = 'episode-%d' % episode_number
            common_info = {
--- a/youtube_dl/extractor/bravotv.py
+++ b/youtube_dl/extractor/bravotv.py
@ -0,0 +1,28 @@
 # coding: utf-8
 from __future__ import unicode_literals
 from .common import InfoExtractor
 from ..utils import smuggle_url
 class BravoTVIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?bravotv\.com/(?:[^/]+/)+videos/(?P<id>[^/?]+)'
    _TEST = {
        'url': 'http://www.bravotv.com/last-chance-kitchen/season-5/videos/lck-ep-12-fishy-finale',
        'md5': 'd60cdf68904e854fac669bd26cccf801',
        'info_dict': {
            'id': 'LitrBdX64qLn',
            'ext': 'mp4',
            'title': 'Last Chance Kitchen Returns',
            'description': 'S13: Last Chance Kitchen Returns for Top Chef Season 13',
        }
    }
    def _real_extract(self, url):
        video_id = self._match_id(url)
        webpage = self._download_webpage(url, video_id)
        account_pid = self._search_regex(r'"account_pid"\s*:\s*"([^"]+)"', webpage, 'account pid')
        release_pid = self._search_regex(r'"release_pid"\s*:\s*"([^"]+)"', webpage, 'release pid')
        return self.url_result(smuggle_url(
            'http://link.theplatform.com/s/%s/%s?mbr=true&switch=progressive' % (account_pid, release_pid),
            {'force_smil_url': True}), 'ThePlatform', release_pid)
--- a/youtube_dl/extractor/brightcove.py
+++ b/youtube_dl/extractor/brightcove.py
@ -9,7 +9,6 @@ from ..compat import (
    compat_etree_fromstring,
    compat_parse_qs,
    compat_str,
    compat_urllib_parse,
    compat_urllib_parse_urlparse,
    compat_urlparse,
    compat_xml_parse_error,
@ -24,16 +23,16 @@ from ..utils import (
    js_to_json,
    int_or_none,
    parse_iso8601,
    sanitized_Request,
    unescapeHTML,
    unsmuggle_url,
    update_url_query,
 )
 class BrightcoveLegacyIE(InfoExtractor):
    IE_NAME = 'brightcove:legacy'
    _VALID_URL = r'(?:https?://.*brightcove\.com/(services|viewer).*?\?|brightcove:)(?P<query>.*)'
-    _FEDERATED_URL_TEMPLATE = 'http://c.brightcove.com/services/viewer/htmlFederated?%s'
+    _FEDERATED_URL = 'http://c.brightcove.com/services/viewer/htmlFederated'
    _TESTS = [
        {
@ -156,7 +155,7 @@ class BrightcoveLegacyIE(InfoExtractor):
        # Not all pages define this value
        if playerKey is not None:
            params['playerKey'] = playerKey
-        # The three fields hold the id of the video
+        # These fields hold the id of the video
        videoPlayer = find_param('@videoPlayer') or find_param('videoId') or find_param('videoID') or find_param('@videoList')
        if videoPlayer is not None:
            params['@videoPlayer'] = videoPlayer
@ -185,8 +184,7 @@ class BrightcoveLegacyIE(InfoExtractor):
    @classmethod
    def _make_brightcove_url(cls, params):
-        data = compat_urllib_parse.urlencode(params)
+        return update_url_query(cls._FEDERATED_URL, params)
        return cls._FEDERATED_URL_TEMPLATE % data
    @classmethod
    def _extract_brightcove_url(cls, webpage):
@ -240,7 +238,7 @@ class BrightcoveLegacyIE(InfoExtractor):
            # We set the original url as the default 'Referer' header
            referer = smuggled_data.get('Referer', url)
            return self._get_video_info(
-                videoPlayer[0], query_str, query, referer=referer)
+                videoPlayer[0], query, referer=referer)
        elif 'playerKey' in query:
            player_key = query['playerKey']
            return self._get_playlist_info(player_key[0])
@ -249,15 +247,14 @@ class BrightcoveLegacyIE(InfoExtractor):
                'Cannot find playerKey= variable. Did you forget quotes in a shell invocation?',
                expected=True)
-    def _get_video_info(self, video_id, query_str, query, referer=None):
+    def _get_video_info(self, video_id, query, referer=None):
-        request_url = self._FEDERATED_URL_TEMPLATE % query_str
+        headers = {}
        req = sanitized_Request(request_url)
        linkBase = query.get('linkBaseURL')
        if linkBase is not None:
            referer = linkBase[0]
        if referer is not None:
-            req.add_header('Referer', referer)
+            headers['Referer'] = referer
-        webpage = self._download_webpage(req, video_id)
+        webpage = self._download_webpage(self._FEDERATED_URL, video_id, headers=headers, query=query)
        error_msg = self._html_search_regex(
            r"<h1>We're sorry.</h1>([\s\n]*<p>.*?</p>)+", webpage,
@ -415,8 +412,8 @@ class BrightcoveNewIE(InfoExtractor):
        # Look for iframe embeds [1]
        for _, url in re.findall(
-                r'<iframe[^>]+src=(["\'])((?:https?:)//players\.brightcove\.net/\d+/[^/]+/index\.html.+?)\1', webpage):
+                r'<iframe[^>]+src=(["\'])((?:https?:)?//players\.brightcove\.net/\d+/[^/]+/index\.html.+?)\1', webpage):
-            entries.append(url)
+            entries.append(url if url.startswith('http') else 'http:' + url)
        # Look for embed_in_page embeds [2]
        for video_id, account_id, player_id, embed in re.findall(
@ -459,12 +456,11 @@ class BrightcoveNewIE(InfoExtractor):
                r'policyKey\s*:\s*(["\'])(?P<pk>.+?)\1',
                webpage, 'policy key', group='pk')
-        req = sanitized_Request(
+        api_url = 'https://edge.api.brightcove.com/playback/v1/accounts/%s/videos/%s' % (account_id, video_id)
            'https://edge.api.brightcove.com/playback/v1/accounts/%s/videos/%s'
            % (account_id, video_id),
            headers={'Accept': 'application/json;pk=%s' % policy_key})
        try:
-            json_data = self._download_json(req, video_id)
+            json_data = self._download_json(api_url, video_id, headers={
                'Accept': 'application/json;pk=%s' % policy_key
            })
        except ExtractorError as e:
            if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
                json_data = self._parse_json(e.cause.read().decode(), video_id)
@ -482,8 +478,7 @@ class BrightcoveNewIE(InfoExtractor):
                if not src:
                    continue
                formats.extend(self._extract_m3u8_formats(
-                    src, video_id, 'mp4', entry_protocol='m3u8_native',
+                    src, video_id, 'mp4', m3u8_id='hls', fatal=False))
                    m3u8_id='hls', fatal=False))
            elif source_type == 'application/dash+xml':
                if not src:
                    continue
--- a/youtube_dl/extractor/cbsnews.py
+++ b/youtube_dl/extractor/cbsnews.py
@ -78,7 +78,7 @@ class CBSNewsIE(ThePlatformIE):
            pid = item.get('media' + format_id)
            if not pid:
                continue
-            release_url = 'http://link.theplatform.com/s/dJ5BDC/%s?format=SMIL&mbr=true' % pid
+            release_url = 'http://link.theplatform.com/s/dJ5BDC/%s?mbr=true' % pid
            tp_formats, tp_subtitles = self._extract_theplatform_smil(release_url, video_id, 'Downloading %s SMIL data' % pid)
            formats.extend(tp_formats)
            subtitles = self._merge_subtitles(subtitles, tp_subtitles)
--- a/youtube_dl/extractor/cnet.py
+++ b/youtube_dl/extractor/cnet.py
@ -60,7 +60,7 @@ class CNETIE(ThePlatformIE):
        for (fkey, vid) in vdata['files'].items():
            if fkey == 'hls_phone' and 'hls_tablet' in vdata['files']:
                continue
-            release_url = 'http://link.theplatform.com/s/kYEXFC/%s?format=SMIL&mbr=true' % vid
+            release_url = 'http://link.theplatform.com/s/kYEXFC/%s?mbr=true' % vid
            if fkey == 'hds':
                release_url += '&manifest=f4m'
            tp_formats, tp_subtitles = self._extract_theplatform_smil(release_url, video_id, 'Downloading %s SMIL data' % fkey)
--- a/youtube_dl/extractor/commonprotocols.py
+++ b/youtube_dl/extractor/commonprotocols.py
@ -0,0 +1,36 @@
 from __future__ import unicode_literals
 import os
 from .common import InfoExtractor
 from ..compat import (
    compat_urllib_parse_unquote,
    compat_urlparse,
 )
 from ..utils import url_basename
 class RtmpIE(InfoExtractor):
    IE_DESC = False  # Do not list
    _VALID_URL = r'(?i)rtmp[est]?://.+'
    _TESTS = [{
        'url': 'rtmp://cp44293.edgefcs.net/ondemand?auth=daEcTdydfdqcsb8cZcDbAaCbhamacbbawaS-bw7dBb-bWG-GqpGFqCpNCnGoyL&aifp=v001&slist=public/unsecure/audio/2c97899446428e4301471a8cb72b4b97--audio--pmg-20110908-0900a_flv_aac_med_int.mp4',
        'only_matching': True,
    }, {
        'url': 'rtmp://edge.live.hitbox.tv/live/dimak',
        'only_matching': True,
    }]
    def _real_extract(self, url):
        video_id = compat_urllib_parse_unquote(os.path.splitext(url.rstrip('/').split('/')[-1])[0])
        title = compat_urllib_parse_unquote(os.path.splitext(url_basename(url))[0])
        return {
            'id': video_id,
            'title': title,
            'formats': [{
                'url': url,
                'ext': 'flv',
                'format_id': compat_urlparse.urlparse(url).scheme,
            }],
        }
--- a/youtube_dl/extractor/crunchyroll.py
+++ b/youtube_dl/extractor/crunchyroll.py
@ -54,7 +54,7 @@ class CrunchyrollBaseIE(InfoExtractor):
    def _real_initialize(self):
        self._login()
-    def _download_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True, tries=1, timeout=5, encoding=None):
+    def _download_webpage(self, url_or_request, *args, **kwargs):
        request = (url_or_request if isinstance(url_or_request, compat_urllib_request.Request)
                   else sanitized_Request(url_or_request))
        # Accept-Language must be set explicitly to accept any language to avoid issues
@ -65,8 +65,7 @@ class CrunchyrollBaseIE(InfoExtractor):
        # Crunchyroll to not work in georestriction cases in some browsers that don't place
        # the locale lang first in header. However allowing any language seems to workaround the issue.
        request.add_header('Accept-Language', '*')
-        return super(CrunchyrollBaseIE, self)._download_webpage(
+        return super(CrunchyrollBaseIE, self)._download_webpage(request, *args, **kwargs)
            request, video_id, note, errnote, fatal, tries, timeout, encoding)
    @staticmethod
    def _add_skip_wall(url):
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@ -239,6 +239,35 @@ class GenericIE(InfoExtractor):
                'format': 'bestvideo',
            },
        },
        # m3u8 served with Content-Type: audio/x-mpegURL; charset=utf-8
        {
            'url': 'http://once.unicornmedia.com/now/master/playlist/bb0b18ba-64f5-4b1b-a29f-0ac252f06b68/77a785f3-5188-4806-b788-0893a61634ed/93677179-2d99-4ef4-9e17-fe70d49abfbf/content.m3u8',
            'info_dict': {
                'id': 'content',
                'ext': 'mp4',
                'title': 'content',
                'formats': 'mincount:8',
            },
            'params': {
                # m3u8 downloads
                'skip_download': True,
            }
        },
        # m3u8 served with Content-Type: text/plain
        {
            'url': 'http://www.nacentapps.com/m3u8/index.m3u8',
            'info_dict': {
                'id': 'index',
                'ext': 'mp4',
                'title': 'index',
                'upload_date': '20140720',
                'formats': 'mincount:11',
            },
            'params': {
                # m3u8 downloads
                'skip_download': True,
            }
        },
        # google redirect
        {
            'url': 'http://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=web&cd=1&cad=rja&ved=0CCUQtwIwAA&url=http%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DcmQHVoWB5FY&ei=F-sNU-LLCaXk4QT52ICQBQ&usg=AFQjCNEw4hL29zgOohLXvpJ-Bdh2bils1Q&bvm=bv.61965928,d.bGE',
@ -1245,14 +1274,13 @@ class GenericIE(InfoExtractor):
        info_dict = {
            'id': video_id,
            'title': compat_urllib_parse_unquote(os.path.splitext(url_basename(url))[0]),
            'upload_date': unified_strdate(head_response.headers.get('Last-Modified'))
        }
        # Check for direct link to a video
-        content_type = head_response.headers.get('Content-Type', '')
+        content_type = head_response.headers.get('Content-Type', '').lower()
-        m = re.match(r'^(?P<type>audio|video|application(?=/(?:ogg$|(?:vnd\.apple\.|x-)?mpegurl)))/(?P<format_id>.+)$', content_type)
+        m = re.match(r'^(?P<type>audio|video|application(?=/(?:ogg$|(?:vnd\.apple\.|x-)?mpegurl)))/(?P<format_id>[^;\s]+)', content_type)
        if m:
            upload_date = unified_strdate(
                head_response.headers.get('Last-Modified'))
            format_id = m.group('format_id')
            if format_id.endswith('mpegurl'):
                formats = self._extract_m3u8_formats(url, video_id, 'mp4')
@ -1264,11 +1292,8 @@ class GenericIE(InfoExtractor):
                    'url': url,
                    'vcodec': 'none' if m.group('type') == 'audio' else None
                }]
-            info_dict.update({
+                info_dict['direct'] = True
-                'direct': True,
+            info_dict['formats'] = formats
                'formats': formats,
                'upload_date': upload_date,
            })
            return info_dict
        if not self._downloader.params.get('test', False) and not is_intentional:
@ -1289,18 +1314,21 @@ class GenericIE(InfoExtractor):
            request.add_header('Accept-Encoding', '*')
            full_response = self._request_webpage(request, video_id)
        first_bytes = full_response.read(512)
        # Is it an M3U playlist?
        if first_bytes.startswith(b'#EXTM3U'):
            info_dict['formats'] = self._extract_m3u8_formats(url, video_id, 'mp4')
            return info_dict
        # Maybe it's a direct link to a video?
        # Be careful not to download the whole thing!
        first_bytes = full_response.read(512)
        if not is_html(first_bytes):
            self._downloader.report_warning(
                'URL could be a direct video link, returning it as such.')
            upload_date = unified_strdate(
                head_response.headers.get('Last-Modified'))
            info_dict.update({
                'direct': True,
                'url': url,
                'upload_date': upload_date,
            })
            return info_dict
--- a/youtube_dl/extractor/hbo.py
+++ b/youtube_dl/extractor/hbo.py
@ -0,0 +1,122 @@
 # coding: utf-8
 from __future__ import unicode_literals
 import re
 from .common import InfoExtractor
 from ..utils import (
    xpath_text,
    xpath_element,
    int_or_none,
    parse_duration,
 )
 class HBOIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?hbo\.com/video/video\.html\?.*vid=(?P<id>[0-9]+)'
    _TEST = {
        'url': 'http://www.hbo.com/video/video.html?autoplay=true&g=u&vid=1437839',
        'md5': '1c33253f0c7782142c993c0ba62a8753',
        'info_dict': {
            'id': '1437839',
            'ext': 'mp4',
            'title': 'Ep. 64 Clip: Encryption',
        }
    }
    _FORMATS_INFO = {
        '1920': {
            'width': 1280,
            'height': 720,
        },
        '640': {
            'width': 768,
            'height': 432,
        },
        'highwifi': {
            'width': 640,
            'height': 360,
        },
        'high3g': {
            'width': 640,
            'height': 360,
        },
        'medwifi': {
            'width': 400,
            'height': 224,
        },
        'med3g': {
            'width': 400,
            'height': 224,
        },
    }
    def _real_extract(self, url):
        video_id = self._match_id(url)
        video_data = self._download_xml(
            'http://render.lv3.hbo.com/data/content/global/videos/data/%s.xml' % video_id, video_id)
        title = xpath_text(video_data, 'title', 'title', True)
        formats = []
        for source in xpath_element(video_data, 'videos', 'sources', True):
            if source.tag == 'size':
                path = xpath_text(source, './/path')
                if not path:
                    continue
                width = source.attrib.get('width')
                format_info = self._FORMATS_INFO.get(width, {})
                height = format_info.get('height')
                fmt = {
                    'url': path,
                    'format_id': 'http%s' % ('-%dp' % height if height else ''),
                    'width': format_info.get('width'),
                    'height': height,
                }
                rtmp = re.search(r'^(?P<url>rtmpe?://[^/]+/(?P<app>.+))/(?P<playpath>mp4:.+)$', path)
                if rtmp:
                    fmt.update({
                        'url': rtmp.group('url'),
                        'play_path': rtmp.group('playpath'),
                        'app': rtmp.group('app'),
                        'ext': 'flv',
                        'format_id': fmt['format_id'].replace('http', 'rtmp'),
                    })
                formats.append(fmt)
            else:
                video_url = source.text
                if not video_url:
                    continue
                if source.tag == 'tarball':
                    formats.extend(self._extract_m3u8_formats(
                        video_url.replace('.tar', '/base_index_w8.m3u8'),
                        video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False))
                else:
                    format_info = self._FORMATS_INFO.get(source.tag, {})
                    formats.append({
                        'format_id': 'http-%s' % source.tag,
                        'url': video_url,
                        'width': format_info.get('width'),
                        'height': format_info.get('height'),
                    })
        self._sort_formats(formats, ('width', 'height', 'tbr', 'format_id'))
        thumbnails = []
        card_sizes = xpath_element(video_data, 'titleCardSizes')
        if card_sizes is not None:
            for size in card_sizes:
                path = xpath_text(size, 'path')
                if not path:
                    continue
                width = int_or_none(size.get('width'))
                thumbnails.append({
                    'id': width,
                    'url': path,
                    'width': width,
                })
        return {
            'id': video_id,
            'title': title,
            'duration': parse_duration(xpath_element(video_data, 'duration/tv14')),
            'formats': formats,
            'thumbnails': thumbnails,
        }
--- a/youtube_dl/extractor/nationalgeographic.py
+++ b/youtube_dl/extractor/nationalgeographic.py
@ -48,7 +48,7 @@ class NationalGeographicIE(InfoExtractor):
        theplatform_id = url_basename(content.attrib.get('url'))
        return self.url_result(smuggle_url(
-            'http://link.theplatform.com/s/ngs/%s?format=SMIL&formats=MPEG4&manifest=f4m' % theplatform_id,
+            'http://link.theplatform.com/s/ngs/%s?formats=MPEG4&manifest=f4m' % theplatform_id,
            # For some reason, the normal links don't work and we must force
            # the use of f4m
            {'force_smil_url': True}))
--- a/youtube_dl/extractor/nbc.py
+++ b/youtube_dl/extractor/nbc.py
@ -3,13 +3,16 @@ from __future__ import unicode_literals
 import re
 from .common import InfoExtractor
-from ..compat import compat_HTTPError
+from .theplatform import ThePlatformIE
 from ..utils import (
    ExtractorError,
    find_xpath_attr,
    lowercase_escape,
    smuggle_url,
    unescapeHTML,
    update_url_query,
    int_or_none,
    HEADRequest,
    parse_iso8601,
 )
@ -131,10 +134,10 @@ class NBCSportsIE(InfoExtractor):
            NBCSportsVPlayerIE._extract_url(webpage), 'NBCSportsVPlayer')
-class NBCNewsIE(InfoExtractor):
+class NBCNewsIE(ThePlatformIE):
    _VALID_URL = r'''(?x)https?://(?:www\.)?nbcnews\.com/
        (?:video/.+?/(?P<id>\d+)|
-        (?:watch|feature|nightly-news)/[^/]+/(?P<title>.+))
+        ([^/]+/)*(?P<display_id>[^/?]+))
        '''
    _TESTS = [
@ -149,15 +152,14 @@ class NBCNewsIE(InfoExtractor):
            },
        },
        {
-            'url': 'http://www.nbcnews.com/feature/edward-snowden-interview/how-twitter-reacted-snowden-interview-n117236',
+            'url': 'http://www.nbcnews.com/watch/nbcnews-com/how-twitter-reacted-to-the-snowden-interview-269389891880',
-            'md5': 'b2421750c9f260783721d898f4c42063',
+            'md5': 'af1adfa51312291a017720403826bb64',
            'info_dict': {
-                'id': 'I1wpAI_zmhsQ',
+                'id': '269389891880',
                'ext': 'mp4',
                'title': 'How Twitter Reacted To The Snowden Interview',
                'description': 'md5:65a0bd5d76fe114f3c2727aa3a81fe64',
            },
            'add_ie': ['ThePlatform'],
        },
        {
            'url': 'http://www.nbcnews.com/feature/dateline-full-episodes/full-episode-family-business-n285156',
@ -168,17 +170,29 @@ class NBCNewsIE(InfoExtractor):
                'title': 'FULL EPISODE: Family Business',
                'description': 'md5:757988edbaae9d7be1d585eb5d55cc04',
            },
            'skip': 'This page is unavailable.',
        },
        {
            'url': 'http://www.nbcnews.com/nightly-news/video/nightly-news-with-brian-williams-full-broadcast-february-4-394064451844',
-            'md5': 'b5dda8cddd8650baa0dcb616dd2cf60d',
+            'md5': '73135a2e0ef819107bbb55a5a9b2a802',
            'info_dict': {
-                'id': 'sekXqyTVnmN3',
+                'id': '394064451844',
                'ext': 'mp4',
                'title': 'Nightly News with Brian Williams Full Broadcast (February 4)',
                'description': 'md5:1c10c1eccbe84a26e5debb4381e2d3c5',
            },
        },
        {
            'url': 'http://www.nbcnews.com/business/autos/volkswagen-11-million-vehicles-could-have-suspect-software-emissions-scandal-n431456',
            'md5': 'a49e173825e5fcd15c13fc297fced39d',
            'info_dict': {
                'id': '529953347624',
                'ext': 'mp4',
                'title': 'Volkswagen U.S. Chief: We \'Totally Screwed Up\'',
                'description': 'md5:d22d1281a24f22ea0880741bb4dd6301',
            },
            'expected_warnings': ['http-6000 is not available']
        },
        {
            'url': 'http://www.nbcnews.com/watch/dateline/full-episode--deadly-betrayal-386250819952',
            'only_matching': True,
@ -202,49 +216,80 @@ class NBCNewsIE(InfoExtractor):
            }
        else:
            # "feature" and "nightly-news" pages use theplatform.com
-            title = mobj.group('title')
+            display_id = mobj.group('display_id')
-            webpage = self._download_webpage(url, title)
+            webpage = self._download_webpage(url, display_id)
            info = None
            bootstrap_json = self._search_regex(
-                r'var\s+(?:bootstrapJson|playlistData)\s*=\s*({.+});?\s*$',
+                r'(?m)var\s+(?:bootstrapJson|playlistData)\s*=\s*({.+});?\s*$',
-                webpage, 'bootstrap json', flags=re.MULTILINE)
+                webpage, 'bootstrap json', default=None)
-            bootstrap = self._parse_json(bootstrap_json, video_id)
+            if bootstrap_json:
                bootstrap = self._parse_json(bootstrap_json, display_id)
                info = bootstrap['results'][0]['video']
-            mpxid = info['mpxId']
+            else:
                player_instance_json = self._search_regex(
                    r'videoObj\s*:\s*({.+})', webpage, 'player instance')
                info = self._parse_json(player_instance_json, display_id)
            video_id = info['mpxId']
            title = info['title']
-            base_urls = [
+            subtitles = {}
-                info['fallbackPlaylistUrl'],
+            caption_links = info.get('captionLinks')
-                info['associatedPlaylistUrl'],
+            if caption_links:
-            ]
+                for (sub_key, sub_ext) in (('smpte-tt', 'ttml'), ('web-vtt', 'vtt'), ('srt', 'srt')):
                    sub_url = caption_links.get(sub_key)
                    if sub_url:
                        subtitles.setdefault('en', []).append({
                            'url': sub_url,
                            'ext': sub_ext,
                        })
-            for base_url in base_urls:
+            formats = []
-                if not base_url:
+            for video_asset in info['videoAssets']:
                video_url = video_asset.get('publicUrl')
                if not video_url:
                    continue
-                playlist_url = base_url + '?form=MPXNBCNewsAPI'
+                container = video_asset.get('format')
-
+                asset_type = video_asset.get('assetType') or ''
-                try:
+                if container == 'ISM' or asset_type == 'FireTV-Once':
                    all_videos = self._download_json(playlist_url, title)
                except ExtractorError as ee:
                    if isinstance(ee.cause, compat_HTTPError):
                    continue
-                    raise
+                elif asset_type == 'OnceURL':
-
+                    tp_formats, tp_subtitles = self._extract_theplatform_smil(
-                if not all_videos or 'videos' not in all_videos:
+                        video_url, video_id)
-                    continue
+                    formats.extend(tp_formats)
-
+                    subtitles = self._merge_subtitles(subtitles, tp_subtitles)
-                try:
+                else:
-                    info = next(v for v in all_videos['videos'] if v['mpxId'] == mpxid)
+                    tbr = int_or_none(video_asset.get('bitRate'), 1000)
-                    break
+                    format_id = 'http%s' % ('-%d' % tbr if tbr else '')
-                except StopIteration:
+                    video_url = update_url_query(
-                    continue
+                        video_url, {'format': 'redirect'})
-
+                    # resolve the url so that we can check availability and detect the correct extension
-            if info is None:
+                    head = self._request_webpage(
-                raise ExtractorError('Could not find video in playlists')
+                        HEADRequest(video_url), video_id,
                        'Checking %s url' % format_id,
                        '%s is not available' % format_id,
                        fatal=False)
                    if head:
                        video_url = head.geturl()
                        formats.append({
                            'format_id': format_id,
                            'url': video_url,
                            'width': int_or_none(video_asset.get('width')),
                            'height': int_or_none(video_asset.get('height')),
                            'tbr': tbr,
                            'container': video_asset.get('format'),
                        })
            self._sort_formats(formats)
            return {
-                '_type': 'url',
+                'id': video_id,
-                # We get the best quality video
+                'title': title,
-                'url': info['videoAssets'][-1]['publicUrl'],
+                'description': info.get('description'),
-                'ie_key': 'ThePlatform',
+                'thumbnail': info.get('description'),
                'thumbnail': info.get('thumbnail'),
                'duration': int_or_none(info.get('duration')),
                'timestamp': parse_iso8601(info.get('pubDate')),
                'formats': formats,
                'subtitles': subtitles,
            }
--- a/youtube_dl/extractor/noz.py
+++ b/youtube_dl/extractor/noz.py
@ -2,7 +2,10 @@
 from __future__ import unicode_literals
 from .common import InfoExtractor
-from ..compat import compat_urllib_parse_unquote
+from ..compat import (
    compat_urllib_parse_unquote,
    compat_xpath,
 )
 from ..utils import (
    int_or_none,
    find_xpath_attr,
@ -47,7 +50,7 @@ class NozIE(InfoExtractor):
        duration = int_or_none(xpath_text(
            doc, './/article/movie/file/duration'))
        formats = []
-        for qnode in doc.findall('.//article/movie/file/qualities/qual'):
+        for qnode in doc.findall(compat_xpath('.//article/movie/file/qualities/qual')):
            http_url_ele = find_xpath_attr(
                qnode, './html_urls/video_url', 'format', 'video/mp4')
            http_url = http_url_ele.text if http_url_ele is not None else None
--- a/youtube_dl/extractor/once.py
+++ b/youtube_dl/extractor/once.py
@ -0,0 +1,38 @@
 # coding: utf-8
 from __future__ import unicode_literals
 import re
 from .common import InfoExtractor
 class OnceIE(InfoExtractor):
    _VALID_URL = r'https?://once\.unicornmedia\.com/now/[^/]+/[^/]+/(?P<domain_id>[^/]+)/(?P<application_id>[^/]+)/(?:[^/]+/)?(?P<media_item_id>[^/]+)/content\.(?:once|m3u8|mp4)'
    ADAPTIVE_URL_TEMPLATE = 'http://once.unicornmedia.com/now/master/playlist/%s/%s/%s/content.m3u8'
    PROGRESSIVE_URL_TEMPLATE = 'http://once.unicornmedia.com/now/media/progressive/%s/%s/%s/%s/content.mp4'
    def _extract_once_formats(self, url):
        domain_id, application_id, media_item_id = re.match(
            OnceIE._VALID_URL, url).groups()
        formats = self._extract_m3u8_formats(
            self.ADAPTIVE_URL_TEMPLATE % (
                domain_id, application_id, media_item_id),
            media_item_id, 'mp4', m3u8_id='hls', fatal=False)
        progressive_formats = []
        for adaptive_format in formats:
            rendition_id = self._search_regex(
                r'/now/media/playlist/[^/]+/[^/]+/([^/]+)',
                adaptive_format['url'], 'redition id', default=None)
            if rendition_id:
                progressive_format = adaptive_format.copy()
                progressive_format.update({
                    'url': self.PROGRESSIVE_URL_TEMPLATE % (
                        domain_id, application_id, rendition_id, media_item_id),
                    'format_id': adaptive_format['format_id'].replace(
                        'hls', 'http'),
                    'protocol': 'http',
                })
                progressive_formats.append(progressive_format)
        self._check_formats(progressive_formats, media_item_id)
        formats.extend(progressive_formats)
        return formats
--- a/youtube_dl/extractor/sbs.py
+++ b/youtube_dl/extractor/sbs.py
@ -2,6 +2,10 @@
 from __future__ import unicode_literals
 from .common import InfoExtractor
 from ..utils import (
    smuggle_url,
    ExtractorError,
 )
 class SBSIE(InfoExtractor):
@ -31,21 +35,28 @@ class SBSIE(InfoExtractor):
    def _real_extract(self, url):
        video_id = self._match_id(url)
        player_params = self._download_json(
            'http://www.sbs.com.au/api/video_pdkvars/id/%s?form=json' % video_id, video_id)
-        webpage = self._download_webpage(
+        error = player_params.get('error')
-            'http://www.sbs.com.au/ondemand/video/single/%s?context=web' % video_id, video_id)
+        if error:
-
+            error_message = 'Sorry, The video you are looking for does not exist.'
-        player_params = self._parse_json(
+            video_data = error.get('results') or {}
-            self._search_regex(
+            error_code = error.get('errorCode')
-                r'(?s)var\s+playerParams\s*=\s*({.+?});', webpage, 'playerParams'),
+            if error_code == 'ComingSoon':
-            video_id)
+                error_message = '%s is not yet available.' % video_data.get('title', '')
            elif error_code in ('Forbidden', 'intranetAccessOnly'):
                error_message = 'Sorry, This video cannot be accessed via this website'
            elif error_code == 'Expired':
                error_message = 'Sorry, %s is no longer available.' % video_data.get('title', '')
            raise ExtractorError('%s said: %s' % (self.IE_NAME, error_message), expected=True)
        urls = player_params['releaseUrls']
-        theplatform_url = (urls.get('progressive') or urls.get('standard') or
+        theplatform_url = (urls.get('progressive') or urls.get('html') or
-                           urls.get('html') or player_params['relatedItemsURL'])
+                           urls.get('standard') or player_params['relatedItemsURL'])
        return {
            '_type': 'url_transparent',
            'id': video_id,
-            'url': theplatform_url,
+            'url': smuggle_url(theplatform_url, {'force_smil_url': True}),
        }
--- a/youtube_dl/extractor/theplatform.py
+++ b/youtube_dl/extractor/theplatform.py
@ -8,13 +8,12 @@ import binascii
 import hashlib
-from .common import InfoExtractor
+from .once import OnceIE
 from ..compat import (
    compat_parse_qs,
    compat_urllib_parse_urlparse,
 )
 from ..utils import (
    determine_ext,
    ExtractorError,
    float_or_none,
    int_or_none,
@ -29,26 +28,27 @@ default_ns = 'http://www.w3.org/2005/SMIL21/Language'
 _x = lambda p: xpath_with_ns(p, {'smil': default_ns})
-class ThePlatformBaseIE(InfoExtractor):
+class ThePlatformBaseIE(OnceIE):
    def _extract_theplatform_smil(self, smil_url, video_id, note='Downloading SMIL data'):
-        meta = self._download_xml(smil_url, video_id, note=note)
+        meta = self._download_xml(smil_url, video_id, note=note, query={'format': 'SMIL'})
-        error_element = find_xpath_attr(
+        error_element = find_xpath_attr(meta, _x('.//smil:ref'), 'src')
-            meta, _x('.//smil:ref'), 'src',
+        if error_element is not None and error_element.attrib['src'].startswith(
-            'http://link.theplatform.com/s/errorFiles/Unavailable.mp4')
+                'http://link.theplatform.com/s/errorFiles/Unavailable.'):
        if error_element is not None:
            raise ExtractorError(error_element.attrib['abstract'], expected=True)
-        formats = self._parse_smil_formats(
+        smil_formats = self._parse_smil_formats(
            meta, smil_url, video_id, namespace=default_ns,
            # the parameters are from syfy.com, other sites may use others,
            # they also work for nbc.com
            f4m_params={'g': 'UXWGVKRWHFSP', 'hdcore': '3.0.3'},
            transform_rtmp_url=lambda streamer, src: (streamer, 'mp4:' + src))
-        for _format in formats:
+        formats = []
-            ext = determine_ext(_format['url'])
+        for _format in smil_formats:
-            if ext == 'once':
+            if OnceIE.suitable(_format['url']):
-                _format['ext'] = 'mp4'
+                formats.extend(self._extract_once_formats(_format['url']))
            else:
                formats.append(_format)
        self._sort_formats(formats)
@ -125,7 +125,7 @@ class ThePlatformIE(ThePlatformBaseIE):
        'only_matching': True,
    }, {
        'url': 'http://player.theplatform.com/p/2E2eJC/nbcNewsOffsite?guid=tdy_or_siri_150701',
-        'md5': '734f3790fb5fc4903da391beeebc4836',
+        'md5': 'fb96bb3d85118930a5b055783a3bd992',
        'info_dict': {
            'id': 'tdy_or_siri_150701',
            'ext': 'mp4',
@ -135,7 +135,6 @@ class ThePlatformIE(ThePlatformBaseIE):
            'thumbnail': 're:^https?://.*\.jpg$',
            'timestamp': 1435752600,
            'upload_date': '20150701',
            'categories': ['Today/Shows/Orange Room', 'Today/Sections/Money', 'Today/Topics/Tech', "Today/Topics/Editor's picks"],
        },
    }, {
        # From http://www.nbc.com/the-blacklist/video/sir-crispin-crandall/2928790?onid=137781#vc137781=1
@ -213,7 +212,7 @@ class ThePlatformIE(ThePlatformBaseIE):
                webpage, 'smil url', group='url')
            path = self._search_regex(
                r'link\.theplatform\.com/s/((?:[^/?#&]+/)+[^/?#&]+)', smil_url, 'path')
-            smil_url += '?' if '?' not in smil_url else '&' + 'formats=m3u,mpeg4&format=SMIL'
+            smil_url += '?' if '?' not in smil_url else '&' + 'formats=m3u,mpeg4'
        elif mobj.group('config'):
            config_url = url + '&form=json'
            config_url = config_url.replace('swf/', 'config/')
@ -223,9 +222,9 @@ class ThePlatformIE(ThePlatformBaseIE):
                release_url = config['releaseUrl']
            else:
                release_url = 'http://link.theplatform.com/s/%s?mbr=true' % path
-            smil_url = release_url + '&format=SMIL&formats=MPEG4&manifest=f4m'
+            smil_url = release_url + '&formats=MPEG4&manifest=f4m'
        else:
-            smil_url = 'http://link.theplatform.com/s/%s/meta.smil?format=smil&mbr=true' % path
+            smil_url = 'http://link.theplatform.com/s/%s?mbr=true' % path
        sig = smuggled_data.get('sig')
        if sig:
@ -250,7 +249,7 @@ class ThePlatformFeedIE(ThePlatformBaseIE):
    _TEST = {
        # From http://player.theplatform.com/p/7wvmTC/MSNBCEmbeddedOffSite?guid=n_hardball_5biden_140207
        'url': 'http://feed.theplatform.com/f/7wvmTC/msnbc_video-p-test?form=json&pretty=true&range=-40&byGuid=n_hardball_5biden_140207',
-        'md5': '22d2b84f058d3586efcd99e57d59d314',
+        'md5': '6e32495b5073ab414471b615c5ded394',
        'info_dict': {
            'id': 'n_hardball_5biden_140207',
            'ext': 'mp4',
@ -280,7 +279,7 @@ class ThePlatformFeedIE(ThePlatformBaseIE):
        first_video_id = None
        duration = None
        for item in entry['media$content']:
-            smil_url = item['plfile$url'] + '&format=SMIL&mbr=true'
+            smil_url = item['plfile$url'] + '&mbr=true'
            cur_video_id = ThePlatformIE._match_id(smil_url)
            if first_video_id is None:
                first_video_id = cur_video_id
--- a/youtube_dl/extractor/tv3.py
+++ b/youtube_dl/extractor/tv3.py
@ -0,0 +1,33 @@
 # coding: utf-8
 from __future__ import unicode_literals
 from .common import InfoExtractor
 class TV3IE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?tv3\.co\.nz/(?P<id>[^/]+)/tabid/\d+/articleID/\d+/MCat/\d+/Default\.aspx'
    _TEST = {
        'url': 'http://www.tv3.co.nz/MOTORSPORT-SRS-SsangYong-Hampton-Downs-Round-3/tabid/3692/articleID/121615/MCat/2915/Default.aspx',
        'info_dict': {
            'id': '4659127992001',
            'ext': 'mp4',
            'title': 'CRC Motorsport: SRS SsangYong Hampton Downs Round 3 - S2015 Ep3',
            'description': 'SsangYong Racing Series returns for Round 3 with drivers from New Zealand and Australia taking to the grid at Hampton Downs raceway.',
            'uploader_id': '3812193411001',
            'upload_date': '20151213',
            'timestamp': 1449975272,
        },
        'expected_warnings': [
            'Failed to download MPD manifest'
        ],
        'params': {
            'skip_download': True,
        },
    }
    BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/3812193411001/default_default/index.html?videoId=%s'
    def _real_extract(self, url):
        display_id = self._match_id(url)
        webpage = self._download_webpage(url, display_id)
        brightcove_id = self._search_regex(r'<param\s*name="@videoPlayer"\s*value="(\d+)"', webpage, 'brightcove id')
        return self.url_result(self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id, 'BrightcoveNew', brightcove_id)
--- a/youtube_dl/extractor/udemy.py
+++ b/youtube_dl/extractor/udemy.py
@ -144,7 +144,8 @@ class UdemyIE(InfoExtractor):
        webpage = self._download_webpage(url, lecture_id)
        course_id = self._search_regex(
-            r'data-course-id=["\'](\d+)', webpage, 'course id')
+            (r'data-course-id=["\'](\d+)', r'&quot;id&quot;\s*:\s*(\d+)'),
            webpage, 'course id')
        try:
            lecture = self._download_lecture(course_id, lecture_id)
--- a/youtube_dl/extractor/wistia.py
+++ b/youtube_dl/extractor/wistia.py
@ -4,6 +4,7 @@ from .common import InfoExtractor
 from ..utils import (
    ExtractorError,
    sanitized_Request,
    int_or_none,
 )
@ -18,6 +19,9 @@ class WistiaIE(InfoExtractor):
            'id': 'sh7fpupwlt',
            'ext': 'mov',
            'title': 'Being Resourceful',
            'description': 'a Clients From Hell Video Series video from worldwidewebhosting',
            'upload_date': '20131204',
            'timestamp': 1386185018,
            'duration': 117,
        },
    }
@ -32,26 +36,32 @@ class WistiaIE(InfoExtractor):
            raise ExtractorError('Error while getting the playlist',
                                 expected=True)
        data = data_json['media']
        title = data['name']
        formats = []
        thumbnails = []
        for a in data['assets']:
            astatus = a.get('status')
            atype = a.get('type')
-            if atype == 'still':
+            if (astatus is not None and astatus != 2) or atype == 'preview':
                continue
            elif atype in ('still', 'still_image'):
                thumbnails.append({
                    'url': a['url'],
                    'resolution': '%dx%d' % (a['width'], a['height']),
                })
-                continue
+            else:
            if atype == 'preview':
                continue
                formats.append({
                    'format_id': atype,
                    'url': a['url'],
-                'width': a['width'],
+                    'tbr': int_or_none(a.get('bitrate')),
-                'height': a['height'],
+                    'vbr': int_or_none(a.get('opt_vbitrate')),
-                'filesize': a['size'],
+                    'width': int_or_none(a.get('width')),
-                'ext': a['ext'],
+                    'height': int_or_none(a.get('height')),
                    'filesize': int_or_none(a.get('size')),
                    'vcodec': a.get('codec'),
                    'container': a.get('container'),
                    'ext': a.get('ext'),
                    'preference': 1 if atype == 'original' else None,
                })
@ -59,8 +69,10 @@ class WistiaIE(InfoExtractor):
        return {
            'id': video_id,
-            'title': data['name'],
+            'title': title,
            'description': data.get('seoDescription'),
            'formats': formats,
            'thumbnails': thumbnails,
-            'duration': data.get('duration'),
+            'duration': int_or_none(data.get('duration')),
            'timestamp': int_or_none(data.get('createdAt')),
        }
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@ -309,6 +309,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
        '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
        # Apple HTTP Live Streaming
        '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
        '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
        '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
        '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@ -35,6 +35,7 @@ import xml.etree.ElementTree
 import zlib
 from .compat import (
    compat_HTMLParser,
    compat_basestring,
    compat_chr,
    compat_etree_fromstring,
@ -49,6 +50,7 @@ from .compat import (
    compat_urllib_parse_urlparse,
    compat_urllib_request,
    compat_urlparse,
    compat_xpath,
    shlex_quote,
 )
@ -164,12 +166,7 @@ if sys.version_info >= (2, 7):
        return node.find(expr)
 else:
    def find_xpath_attr(node, xpath, key, val=None):
-        # Here comes the crazy part: In 2.6, if the xpath is a unicode,
+        for f in node.findall(compat_xpath(xpath)):
        # .//node does not match if a node is a direct child of . !
        if isinstance(xpath, compat_str):
            xpath = xpath.encode('ascii')
        for f in node.findall(xpath):
            if key not in f.attrib:
                continue
            if val is None or f.attrib.get(key) == val:
@ -194,9 +191,7 @@ def xpath_with_ns(path, ns_map):
 def xpath_element(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
    def _find_xpath(xpath):
-        if sys.version_info < (2, 7):  # Crazy 2.6
+        return node.find(compat_xpath(xpath))
            xpath = xpath.encode('ascii')
        return node.find(xpath)
    if isinstance(xpath, (str, compat_str)):
        n = _find_xpath(xpath)
@ -273,6 +268,38 @@ def get_element_by_attribute(attribute, value, html):
    return unescapeHTML(res)
 class HTMLAttributeParser(compat_HTMLParser):
    """Trivial HTML parser to gather the attributes for a single element"""
    def __init__(self):
        self.attrs = {}
        compat_HTMLParser.__init__(self)
    def handle_starttag(self, tag, attrs):
        self.attrs = dict(attrs)
 def extract_attributes(html_element):
    """Given a string for an HTML element such as
    <el
         a="foo" B="bar" c="&98;az" d=boz
         empty= noval entity="&amp;"
         sq='"' dq="'"
    >
    Decode and return a dictionary of attributes.
    {
        'a': 'foo', 'b': 'bar', c: 'baz', d: 'boz',
        'empty': '', 'noval': None, 'entity': '&',
        'sq': '"', 'dq': '\''
    }.
    NB HTMLParser is stricter in Python 2.6 & 3.2 than in later versions,
    but the cases in the unit test will work for all of 2.6, 2.7, 3.2-3.5.
    """
    parser = HTMLAttributeParser()
    parser.feed(html_element)
    parser.close()
    return parser.attrs
 def clean_html(html):
    """Clean an HTML snippet into a readable string"""
@ -1319,7 +1346,7 @@ def format_bytes(bytes):
 def lookup_unit_table(unit_table, s):
    units_re = '|'.join(re.escape(u) for u in unit_table)
    m = re.match(
-        r'(?P<num>[0-9]+(?:[,.][0-9]*)?)\s*(?P<unit>%s)' % units_re, s)
+        r'(?P<num>[0-9]+(?:[,.][0-9]*)?)\s*(?P<unit>%s)\b' % units_re, s)
    if not m:
        return None
    num_str = m.group('num').replace(',', '.')
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@ -1,3 +1,3 @@
 from __future__ import unicode_literals
-__version__ = '2016.03.14'
+__version__ = '2016.03.18'
`@ -1,3 +1,3 @@`
	`from __future__ import unicode_literals`	`from __future__ import unicode_literals`

	`__version__ = '2016.03.14'`	`__version__ = '2016.03.18'`