Merge pull request #3 from rg3/master

update
2016-03-19 19:25:00 +05:30 · 2016-03-19 19:25:00 +05:30 · e58df5787a
commit e58df5787a
parent 73fd1ab30a 782b1b5bd1
27 changed files with 620 additions and 154 deletions
--- a/docs/supportedsites.md
+++ b/docs/supportedsites.md
@ -81,6 +81,7 @@
 - **BokeCC**
 - **Bpb**: Bundeszentrale für politische Bildung
 - **BR**: Bayerischer Rundfunk Mediathek
+ - **BravoTV**
 - **Break**
 - **brightcove:legacy**
 - **brightcove:new**
@ -499,6 +500,7 @@
 - **Restudy**
 - **ReverbNation**
 - **Revision3**
+ - **RICE**
 - **RingTV**
 - **RottenTomatoes**
 - **Roxwel**
@ -617,6 +619,7 @@
 - **ThePlatform**
 - **ThePlatformFeed**
 - **TheSixtyOne**
+ - **TheStar**
 - **ThisAmericanLife**
 - **ThisAV**
 - **THVideo**
@ -650,6 +653,7 @@
 - **tv.dfb.de**
 - **TV2**
 - **TV2Article**
+ - **TV3**
 - **TV4**: tv4.se and tv4play.se
 - **TVC**
 - **TVCArticle**
--- a/test/test_YoutubeDL.py
+++ b/test/test_YoutubeDL.py
@ -222,6 +222,11 @@ class TestFormatSelection(unittest.TestCase):
        downloaded = ydl.downloaded_info_dicts[0]
        self.assertEqual(downloaded['format_id'], 'dash-video-low')

+        ydl = YDL({'format': 'bestvideo[format_id^=dash][format_id$=low]'})
+        ydl.process_ie_result(info_dict.copy())
+        downloaded = ydl.downloaded_info_dicts[0]
+        self.assertEqual(downloaded['format_id'], 'dash-video-low')
+
        formats = [
            {'format_id': 'vid-vcodec-dot', 'ext': 'mp4', 'preference': 1, 'vcodec': 'avc1.123456', 'acodec': 'none', 'url': TEST_URL},
        ]
--- a/test/test_utils.py
+++ b/test/test_utils.py
@ -28,6 +28,7 @@ from youtube_dl.utils import (
    encodeFilename,
    escape_rfc3986,
    escape_url,
+    extract_attributes,
    ExtractorError,
    find_xpath_attr,
    fix_xml_ampersands,
@ -77,6 +78,7 @@ from youtube_dl.utils import (
    cli_bool_option,
 )
 from youtube_dl.compat import (
+    compat_chr,
    compat_etree_fromstring,
    compat_urlparse,
    compat_parse_qs,
@ -629,6 +631,44 @@ class TestUtil(unittest.TestCase):
        on = js_to_json('{"abc": "def",}')
        self.assertEqual(json.loads(on), {'abc': 'def'})

+    def test_extract_attributes(self):
+        self.assertEqual(extract_attributes('<e x="y">'), {'x': 'y'})
+        self.assertEqual(extract_attributes("<e x='y'>"), {'x': 'y'})
+        self.assertEqual(extract_attributes('<e x=y>'), {'x': 'y'})
+        self.assertEqual(extract_attributes('<e x="a \'b\' c">'), {'x': "a 'b' c"})
+        self.assertEqual(extract_attributes('<e x=\'a "b" c\'>'), {'x': 'a "b" c'})
+        self.assertEqual(extract_attributes('<e x="&#121;">'), {'x': 'y'})
+        self.assertEqual(extract_attributes('<e x="&#x79;">'), {'x': 'y'})
+        self.assertEqual(extract_attributes('<e x="&amp;">'), {'x': '&'})  # XML
+        self.assertEqual(extract_attributes('<e x="&quot;">'), {'x': '"'})
+        self.assertEqual(extract_attributes('<e x="&pound;">'), {'x': '£'})  # HTML 3.2
+        self.assertEqual(extract_attributes('<e x="&lambda;">'), {'x': 'λ'})  # HTML 4.0
+        self.assertEqual(extract_attributes('<e x="&foo">'), {'x': '&foo'})
+        self.assertEqual(extract_attributes('<e x="\'">'), {'x': "'"})
+        self.assertEqual(extract_attributes('<e x=\'"\'>'), {'x': '"'})
+        self.assertEqual(extract_attributes('<e x >'), {'x': None})
+        self.assertEqual(extract_attributes('<e x=y a>'), {'x': 'y', 'a': None})
+        self.assertEqual(extract_attributes('<e x= y>'), {'x': 'y'})
+        self.assertEqual(extract_attributes('<e x=1 y=2 x=3>'), {'y': '2', 'x': '3'})
+        self.assertEqual(extract_attributes('<e \nx=\ny\n>'), {'x': 'y'})
+        self.assertEqual(extract_attributes('<e \nx=\n"y"\n>'), {'x': 'y'})
+        self.assertEqual(extract_attributes("<e \nx=\n'y'\n>"), {'x': 'y'})
+        self.assertEqual(extract_attributes('<e \nx="\ny\n">'), {'x': '\ny\n'})
+        self.assertEqual(extract_attributes('<e CAPS=x>'), {'caps': 'x'})  # Names lowercased
+        self.assertEqual(extract_attributes('<e x=1 X=2>'), {'x': '2'})
+        self.assertEqual(extract_attributes('<e X=1 x=2>'), {'x': '2'})
+        self.assertEqual(extract_attributes('<e _:funny-name1=1>'), {'_:funny-name1': '1'})
+        self.assertEqual(extract_attributes('<e x="Fáilte 世界 \U0001f600">'), {'x': 'Fáilte 世界 \U0001f600'})
+        self.assertEqual(extract_attributes('<e x="décompose&#769;">'), {'x': 'décompose\u0301'})
+        # "Narrow" Python builds don't support unicode code points outside BMP.
+        try:
+            compat_chr(0x10000)
+            supports_outside_bmp = True
+        except ValueError:
+            supports_outside_bmp = False
+        if supports_outside_bmp:
+            self.assertEqual(extract_attributes('<e x="Smile &#128512;!">'), {'x': 'Smile \U0001f600!'})
+
    def test_clean_html(self):
        self.assertEqual(clean_html('a:\nb'), 'a: b')
        self.assertEqual(clean_html('a:\n   "b"'), 'a:    "b"')
@ -662,6 +702,8 @@ class TestUtil(unittest.TestCase):
        self.assertEqual(parse_count('1.000'), 1000)
        self.assertEqual(parse_count('1.1k'), 1100)
        self.assertEqual(parse_count('1.1kk'), 1100000)
+        self.assertEqual(parse_count('1.1kk '), 1100000)
+        self.assertEqual(parse_count('1.1kk views'), 1100000)

    def test_version_tuple(self):
        self.assertEqual(version_tuple('1'), (1,))
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@ -905,7 +905,7 @@ class YoutubeDL(object):
                '*=': lambda attr, value: value in attr,
            }
            str_operator_rex = re.compile(r'''(?x)
-                \s*(?P<key>ext|acodec|vcodec|container|protocol)
+                \s*(?P<key>ext|acodec|vcodec|container|protocol|format_id)
                \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?
                \s*(?P<value>[a-zA-Z0-9._-]+)
                \s*$
--- a/youtube_dl/compat.py
+++ b/youtube_dl/compat.py
@ -77,6 +77,11 @@ try:
 except ImportError:  # Python 2
    from urllib import urlretrieve as compat_urlretrieve

+try:
+    from html.parser import HTMLParser as compat_HTMLParser
+except ImportError:  # Python 2
+    from HTMLParser import HTMLParser as compat_HTMLParser
+

 try:
    from subprocess import DEVNULL
@ -251,6 +256,16 @@ else:
                el.text = el.text.decode('utf-8')
        return doc

+if sys.version_info < (2, 7):
+    # Here comes the crazy part: In 2.6, if the xpath is a unicode,
+    # .//node does not match if a node is a direct child of . !
+    def compat_xpath(xpath):
+        if isinstance(xpath, compat_str):
+            xpath = xpath.encode('ascii')
+        return xpath
+else:
+    compat_xpath = lambda xpath: xpath
+
 try:
    from urllib.parse import parse_qs as compat_parse_qs
 except ImportError:  # Python 2
@ -543,6 +558,7 @@ else:
    from tokenize import generate_tokens as compat_tokenize_tokenize

 __all__ = [
+    'compat_HTMLParser',
    'compat_HTTPError',
    'compat_basestring',
    'compat_chr',
@ -579,6 +595,7 @@ __all__ = [
    'compat_urlparse',
    'compat_urlretrieve',
    'compat_xml_parse_error',
+    'compat_xpath',
    'shlex_quote',
    'subprocess_check_output',
    'workaround_optparse_bug9161',
--- a/youtube_dl/extractor/init.py
+++ b/youtube_dl/extractor/init.py
@ -81,6 +81,7 @@ from .bloomberg import BloombergIE
 from .bokecc import BokeCCIE
 from .bpb import BpbIE
 from .br import BRIE
+from .bravotv import BravoTVIE
 from .breakcom import BreakIE
 from .brightcove import (
    BrightcoveLegacyIE,
@ -135,6 +136,7 @@ from .collegerama import CollegeRamaIE
 from .comedycentral import ComedyCentralIE, ComedyCentralShowsIE
 from .comcarcoff import ComCarCoffIE
 from .commonmistakes import CommonMistakesIE, UnicodeBOMIE
+from .commonprotocols import RtmpIE
 from .condenast import CondeNastIE
 from .cracked import CrackedIE
 from .crackle import CrackleIE
@ -282,6 +284,7 @@ from .goshgay import GoshgayIE
 from .gputechconf import GPUTechConfIE
 from .groupon import GrouponIE
 from .hark import HarkIE
+from .hbo import HBOIE
 from .hearthisat import HearThisAtIE
 from .heise import HeiseIE
 from .hellporno import HellPornoIE
@ -784,6 +787,7 @@ from .tv2 import (
    TV2IE,
    TV2ArticleIE,
 )
+from .tv3 import TV3IE
 from .tv4 import TV4IE
 from .tvc import (
    TVCIE,
--- a/youtube_dl/extractor/animeondemand.py
+++ b/youtube_dl/extractor/animeondemand.py
@ -18,7 +18,7 @@ class AnimeOnDemandIE(InfoExtractor):
    _LOGIN_URL = 'https://www.anime-on-demand.de/users/sign_in'
    _APPLY_HTML5_URL = 'https://www.anime-on-demand.de/html5apply'
    _NETRC_MACHINE = 'animeondemand'
-    _TEST = {
+    _TESTS = [{
        'url': 'https://www.anime-on-demand.de/anime/161',
        'info_dict': {
            'id': '161',
@ -26,7 +26,15 @@ class AnimeOnDemandIE(InfoExtractor):
            'description': 'md5:6681ce3c07c7189d255ac6ab23812d31',
        },
        'playlist_mincount': 4,
-    }
+    }, {
+        # Film wording is used instead of Episode
+        'url': 'https://www.anime-on-demand.de/anime/39',
+        'only_matching': True,
+    }, {
+        # Episodes without titles
+        'url': 'https://www.anime-on-demand.de/anime/162',
+        'only_matching': True,
+    }]

    def _login(self):
        (username, password) = self._get_login_info()
@ -91,14 +99,22 @@ class AnimeOnDemandIE(InfoExtractor):

        entries = []

-        for episode_html in re.findall(r'(?s)<h3[^>]+class="episodebox-title".+?>Episodeninhalt<', webpage):
-            m = re.search(
-                r'class="episodebox-title"[^>]+title="Episode (?P<number>\d+) - (?P<title>.+?)"', episode_html)
-            if not m:
+        for num, episode_html in enumerate(re.findall(
+                r'(?s)<h3[^>]+class="episodebox-title".+?>Episodeninhalt<', webpage), 1):
+            episodebox_title = self._search_regex(
+                (r'class="episodebox-title"[^>]+title=(["\'])(?P<title>.+?)\1',
+                 r'class="episodebox-title"[^>]+>(?P<title>.+?)<'),
+                episode_html, 'episodebox title', default=None, group='title')
+            if not episodebox_title:
                continue

-            episode_number = int(m.group('number'))
-            episode_title = m.group('title')
+            episode_number = int(self._search_regex(
+                r'(?:Episode|Film)\s*(\d+)',
+                episodebox_title, 'episode number', default=num))
+            episode_title = self._search_regex(
+                r'(?:Episode|Film)\s*\d+\s*-\s*(.+)',
+                episodebox_title, 'episode title', default=None)
+
            video_id = 'episode-%d' % episode_number

            common_info = {
--- a/youtube_dl/extractor/bravotv.py
+++ b/youtube_dl/extractor/bravotv.py
@ -0,0 +1,28 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import smuggle_url
+
+
+class BravoTVIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?bravotv\.com/(?:[^/]+/)+videos/(?P<id>[^/?]+)'
+    _TEST = {
+        'url': 'http://www.bravotv.com/last-chance-kitchen/season-5/videos/lck-ep-12-fishy-finale',
+        'md5': 'd60cdf68904e854fac669bd26cccf801',
+        'info_dict': {
+            'id': 'LitrBdX64qLn',
+            'ext': 'mp4',
+            'title': 'Last Chance Kitchen Returns',
+            'description': 'S13: Last Chance Kitchen Returns for Top Chef Season 13',
+        }
+    }
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        webpage = self._download_webpage(url, video_id)
+        account_pid = self._search_regex(r'"account_pid"\s*:\s*"([^"]+)"', webpage, 'account pid')
+        release_pid = self._search_regex(r'"release_pid"\s*:\s*"([^"]+)"', webpage, 'release pid')
+        return self.url_result(smuggle_url(
+            'http://link.theplatform.com/s/%s/%s?mbr=true&switch=progressive' % (account_pid, release_pid),
+            {'force_smil_url': True}), 'ThePlatform', release_pid)
--- a/youtube_dl/extractor/brightcove.py
+++ b/youtube_dl/extractor/brightcove.py
@ -9,7 +9,6 @@ from ..compat import (
    compat_etree_fromstring,
    compat_parse_qs,
    compat_str,
-    compat_urllib_parse,
    compat_urllib_parse_urlparse,
    compat_urlparse,
    compat_xml_parse_error,
@ -24,16 +23,16 @@ from ..utils import (
    js_to_json,
    int_or_none,
    parse_iso8601,
-    sanitized_Request,
    unescapeHTML,
    unsmuggle_url,
+    update_url_query,
 )


 class BrightcoveLegacyIE(InfoExtractor):
    IE_NAME = 'brightcove:legacy'
    _VALID_URL = r'(?:https?://.*brightcove\.com/(services|viewer).*?\?|brightcove:)(?P<query>.*)'
-    _FEDERATED_URL_TEMPLATE = 'http://c.brightcove.com/services/viewer/htmlFederated?%s'
+    _FEDERATED_URL = 'http://c.brightcove.com/services/viewer/htmlFederated'

    _TESTS = [
        {
@ -156,7 +155,7 @@ class BrightcoveLegacyIE(InfoExtractor):
        # Not all pages define this value
        if playerKey is not None:
            params['playerKey'] = playerKey
-        # The three fields hold the id of the video
+        # These fields hold the id of the video
        videoPlayer = find_param('@videoPlayer') or find_param('videoId') or find_param('videoID') or find_param('@videoList')
        if videoPlayer is not None:
            params['@videoPlayer'] = videoPlayer
@ -185,8 +184,7 @@ class BrightcoveLegacyIE(InfoExtractor):

    @classmethod
    def _make_brightcove_url(cls, params):
-        data = compat_urllib_parse.urlencode(params)
-        return cls._FEDERATED_URL_TEMPLATE % data
+        return update_url_query(cls._FEDERATED_URL, params)

    @classmethod
    def _extract_brightcove_url(cls, webpage):
@ -240,7 +238,7 @@ class BrightcoveLegacyIE(InfoExtractor):
            # We set the original url as the default 'Referer' header
            referer = smuggled_data.get('Referer', url)
            return self._get_video_info(
-                videoPlayer[0], query_str, query, referer=referer)
+                videoPlayer[0], query, referer=referer)
        elif 'playerKey' in query:
            player_key = query['playerKey']
            return self._get_playlist_info(player_key[0])
@ -249,15 +247,14 @@ class BrightcoveLegacyIE(InfoExtractor):
                'Cannot find playerKey= variable. Did you forget quotes in a shell invocation?',
                expected=True)

-    def _get_video_info(self, video_id, query_str, query, referer=None):
-        request_url = self._FEDERATED_URL_TEMPLATE % query_str
-        req = sanitized_Request(request_url)
+    def _get_video_info(self, video_id, query, referer=None):
+        headers = {}
        linkBase = query.get('linkBaseURL')
        if linkBase is not None:
            referer = linkBase[0]
        if referer is not None:
-            req.add_header('Referer', referer)
-        webpage = self._download_webpage(req, video_id)
+            headers['Referer'] = referer
+        webpage = self._download_webpage(self._FEDERATED_URL, video_id, headers=headers, query=query)

        error_msg = self._html_search_regex(
            r"<h1>We're sorry.</h1>([\s\n]*<p>.*?</p>)+", webpage,
@ -415,8 +412,8 @@ class BrightcoveNewIE(InfoExtractor):

        # Look for iframe embeds [1]
        for _, url in re.findall(
-                r'<iframe[^>]+src=(["\'])((?:https?:)//players\.brightcove\.net/\d+/[^/]+/index\.html.+?)\1', webpage):
-            entries.append(url)
+                r'<iframe[^>]+src=(["\'])((?:https?:)?//players\.brightcove\.net/\d+/[^/]+/index\.html.+?)\1', webpage):
+            entries.append(url if url.startswith('http') else 'http:' + url)

        # Look for embed_in_page embeds [2]
        for video_id, account_id, player_id, embed in re.findall(
@ -459,12 +456,11 @@ class BrightcoveNewIE(InfoExtractor):
                r'policyKey\s*:\s*(["\'])(?P<pk>.+?)\1',
                webpage, 'policy key', group='pk')

-        req = sanitized_Request(
-            'https://edge.api.brightcove.com/playback/v1/accounts/%s/videos/%s'
-            % (account_id, video_id),
-            headers={'Accept': 'application/json;pk=%s' % policy_key})
+        api_url = 'https://edge.api.brightcove.com/playback/v1/accounts/%s/videos/%s' % (account_id, video_id)
        try:
-            json_data = self._download_json(req, video_id)
+            json_data = self._download_json(api_url, video_id, headers={
+                'Accept': 'application/json;pk=%s' % policy_key
+            })
        except ExtractorError as e:
            if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
                json_data = self._parse_json(e.cause.read().decode(), video_id)
@ -482,8 +478,7 @@ class BrightcoveNewIE(InfoExtractor):
                if not src:
                    continue
                formats.extend(self._extract_m3u8_formats(
-                    src, video_id, 'mp4', entry_protocol='m3u8_native',
-                    m3u8_id='hls', fatal=False))
+                    src, video_id, 'mp4', m3u8_id='hls', fatal=False))
            elif source_type == 'application/dash+xml':
                if not src:
                    continue
--- a/youtube_dl/extractor/cbsnews.py
+++ b/youtube_dl/extractor/cbsnews.py
@ -78,7 +78,7 @@ class CBSNewsIE(ThePlatformIE):
            pid = item.get('media' + format_id)
            if not pid:
                continue
-            release_url = 'http://link.theplatform.com/s/dJ5BDC/%s?format=SMIL&mbr=true' % pid
+            release_url = 'http://link.theplatform.com/s/dJ5BDC/%s?mbr=true' % pid
            tp_formats, tp_subtitles = self._extract_theplatform_smil(release_url, video_id, 'Downloading %s SMIL data' % pid)
            formats.extend(tp_formats)
            subtitles = self._merge_subtitles(subtitles, tp_subtitles)
--- a/youtube_dl/extractor/cnet.py
+++ b/youtube_dl/extractor/cnet.py
@ -60,7 +60,7 @@ class CNETIE(ThePlatformIE):
        for (fkey, vid) in vdata['files'].items():
            if fkey == 'hls_phone' and 'hls_tablet' in vdata['files']:
                continue
-            release_url = 'http://link.theplatform.com/s/kYEXFC/%s?format=SMIL&mbr=true' % vid
+            release_url = 'http://link.theplatform.com/s/kYEXFC/%s?mbr=true' % vid
            if fkey == 'hds':
                release_url += '&manifest=f4m'
            tp_formats, tp_subtitles = self._extract_theplatform_smil(release_url, video_id, 'Downloading %s SMIL data' % fkey)
--- a/youtube_dl/extractor/commonprotocols.py
+++ b/youtube_dl/extractor/commonprotocols.py
@ -0,0 +1,36 @@
+from __future__ import unicode_literals
+
+import os
+
+from .common import InfoExtractor
+from ..compat import (
+    compat_urllib_parse_unquote,
+    compat_urlparse,
+)
+from ..utils import url_basename
+
+
+class RtmpIE(InfoExtractor):
+    IE_DESC = False  # Do not list
+    _VALID_URL = r'(?i)rtmp[est]?://.+'
+
+    _TESTS = [{
+        'url': 'rtmp://cp44293.edgefcs.net/ondemand?auth=daEcTdydfdqcsb8cZcDbAaCbhamacbbawaS-bw7dBb-bWG-GqpGFqCpNCnGoyL&aifp=v001&slist=public/unsecure/audio/2c97899446428e4301471a8cb72b4b97--audio--pmg-20110908-0900a_flv_aac_med_int.mp4',
+        'only_matching': True,
+    }, {
+        'url': 'rtmp://edge.live.hitbox.tv/live/dimak',
+        'only_matching': True,
+    }]
+
+    def _real_extract(self, url):
+        video_id = compat_urllib_parse_unquote(os.path.splitext(url.rstrip('/').split('/')[-1])[0])
+        title = compat_urllib_parse_unquote(os.path.splitext(url_basename(url))[0])
+        return {
+            'id': video_id,
+            'title': title,
+            'formats': [{
+                'url': url,
+                'ext': 'flv',
+                'format_id': compat_urlparse.urlparse(url).scheme,
+            }],
+        }
--- a/youtube_dl/extractor/crunchyroll.py
+++ b/youtube_dl/extractor/crunchyroll.py
@ -54,7 +54,7 @@ class CrunchyrollBaseIE(InfoExtractor):
    def _real_initialize(self):
        self._login()

-    def _download_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True, tries=1, timeout=5, encoding=None):
+    def _download_webpage(self, url_or_request, *args, **kwargs):
        request = (url_or_request if isinstance(url_or_request, compat_urllib_request.Request)
                   else sanitized_Request(url_or_request))
        # Accept-Language must be set explicitly to accept any language to avoid issues
@ -65,8 +65,7 @@ class CrunchyrollBaseIE(InfoExtractor):
        # Crunchyroll to not work in georestriction cases in some browsers that don't place
        # the locale lang first in header. However allowing any language seems to workaround the issue.
        request.add_header('Accept-Language', '*')
-        return super(CrunchyrollBaseIE, self)._download_webpage(
-            request, video_id, note, errnote, fatal, tries, timeout, encoding)
+        return super(CrunchyrollBaseIE, self)._download_webpage(request, *args, **kwargs)

    @staticmethod
    def _add_skip_wall(url):
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@ -239,6 +239,35 @@ class GenericIE(InfoExtractor):
                'format': 'bestvideo',
            },
        },
+        # m3u8 served with Content-Type: audio/x-mpegURL; charset=utf-8
+        {
+            'url': 'http://once.unicornmedia.com/now/master/playlist/bb0b18ba-64f5-4b1b-a29f-0ac252f06b68/77a785f3-5188-4806-b788-0893a61634ed/93677179-2d99-4ef4-9e17-fe70d49abfbf/content.m3u8',
+            'info_dict': {
+                'id': 'content',
+                'ext': 'mp4',
+                'title': 'content',
+                'formats': 'mincount:8',
+            },
+            'params': {
+                # m3u8 downloads
+                'skip_download': True,
+            }
+        },
+        # m3u8 served with Content-Type: text/plain
+        {
+            'url': 'http://www.nacentapps.com/m3u8/index.m3u8',
+            'info_dict': {
+                'id': 'index',
+                'ext': 'mp4',
+                'title': 'index',
+                'upload_date': '20140720',
+                'formats': 'mincount:11',
+            },
+            'params': {
+                # m3u8 downloads
+                'skip_download': True,
+            }
+        },
        # google redirect
        {
            'url': 'http://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=web&cd=1&cad=rja&ved=0CCUQtwIwAA&url=http%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DcmQHVoWB5FY&ei=F-sNU-LLCaXk4QT52ICQBQ&usg=AFQjCNEw4hL29zgOohLXvpJ-Bdh2bils1Q&bvm=bv.61965928,d.bGE',
@ -1245,14 +1274,13 @@ class GenericIE(InfoExtractor):
        info_dict = {
            'id': video_id,
            'title': compat_urllib_parse_unquote(os.path.splitext(url_basename(url))[0]),
+            'upload_date': unified_strdate(head_response.headers.get('Last-Modified'))
        }

        # Check for direct link to a video
-        content_type = head_response.headers.get('Content-Type', '')
-        m = re.match(r'^(?P<type>audio|video|application(?=/(?:ogg$|(?:vnd\.apple\.|x-)?mpegurl)))/(?P<format_id>.+)$', content_type)
+        content_type = head_response.headers.get('Content-Type', '').lower()
+        m = re.match(r'^(?P<type>audio|video|application(?=/(?:ogg$|(?:vnd\.apple\.|x-)?mpegurl)))/(?P<format_id>[^;\s]+)', content_type)
        if m:
-            upload_date = unified_strdate(
-                head_response.headers.get('Last-Modified'))
            format_id = m.group('format_id')
            if format_id.endswith('mpegurl'):
                formats = self._extract_m3u8_formats(url, video_id, 'mp4')
@ -1264,11 +1292,8 @@ class GenericIE(InfoExtractor):
                    'url': url,
                    'vcodec': 'none' if m.group('type') == 'audio' else None
                }]
-            info_dict.update({
-                'direct': True,
-                'formats': formats,
-                'upload_date': upload_date,
-            })
+                info_dict['direct'] = True
+            info_dict['formats'] = formats
            return info_dict

        if not self._downloader.params.get('test', False) and not is_intentional:
@ -1289,18 +1314,21 @@ class GenericIE(InfoExtractor):
            request.add_header('Accept-Encoding', '*')
            full_response = self._request_webpage(request, video_id)

+        first_bytes = full_response.read(512)
+
+        # Is it an M3U playlist?
+        if first_bytes.startswith(b'#EXTM3U'):
+            info_dict['formats'] = self._extract_m3u8_formats(url, video_id, 'mp4')
+            return info_dict
+
        # Maybe it's a direct link to a video?
        # Be careful not to download the whole thing!
-        first_bytes = full_response.read(512)
        if not is_html(first_bytes):
            self._downloader.report_warning(
                'URL could be a direct video link, returning it as such.')
-            upload_date = unified_strdate(
-                head_response.headers.get('Last-Modified'))
            info_dict.update({
                'direct': True,
                'url': url,
-                'upload_date': upload_date,
            })
            return info_dict

--- a/youtube_dl/extractor/hbo.py
+++ b/youtube_dl/extractor/hbo.py
@ -0,0 +1,122 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    xpath_text,
+    xpath_element,
+    int_or_none,
+    parse_duration,
+)
+
+
+class HBOIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?hbo\.com/video/video\.html\?.*vid=(?P<id>[0-9]+)'
+    _TEST = {
+        'url': 'http://www.hbo.com/video/video.html?autoplay=true&g=u&vid=1437839',
+        'md5': '1c33253f0c7782142c993c0ba62a8753',
+        'info_dict': {
+            'id': '1437839',
+            'ext': 'mp4',
+            'title': 'Ep. 64 Clip: Encryption',
+        }
+    }
+    _FORMATS_INFO = {
+        '1920': {
+            'width': 1280,
+            'height': 720,
+        },
+        '640': {
+            'width': 768,
+            'height': 432,
+        },
+        'highwifi': {
+            'width': 640,
+            'height': 360,
+        },
+        'high3g': {
+            'width': 640,
+            'height': 360,
+        },
+        'medwifi': {
+            'width': 400,
+            'height': 224,
+        },
+        'med3g': {
+            'width': 400,
+            'height': 224,
+        },
+    }
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        video_data = self._download_xml(
+            'http://render.lv3.hbo.com/data/content/global/videos/data/%s.xml' % video_id, video_id)
+        title = xpath_text(video_data, 'title', 'title', True)
+
+        formats = []
+        for source in xpath_element(video_data, 'videos', 'sources', True):
+            if source.tag == 'size':
+                path = xpath_text(source, './/path')
+                if not path:
+                    continue
+                width = source.attrib.get('width')
+                format_info = self._FORMATS_INFO.get(width, {})
+                height = format_info.get('height')
+                fmt = {
+                    'url': path,
+                    'format_id': 'http%s' % ('-%dp' % height if height else ''),
+                    'width': format_info.get('width'),
+                    'height': height,
+                }
+                rtmp = re.search(r'^(?P<url>rtmpe?://[^/]+/(?P<app>.+))/(?P<playpath>mp4:.+)$', path)
+                if rtmp:
+                    fmt.update({
+                        'url': rtmp.group('url'),
+                        'play_path': rtmp.group('playpath'),
+                        'app': rtmp.group('app'),
+                        'ext': 'flv',
+                        'format_id': fmt['format_id'].replace('http', 'rtmp'),
+                    })
+                formats.append(fmt)
+            else:
+                video_url = source.text
+                if not video_url:
+                    continue
+                if source.tag == 'tarball':
+                    formats.extend(self._extract_m3u8_formats(
+                        video_url.replace('.tar', '/base_index_w8.m3u8'),
+                        video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False))
+                else:
+                    format_info = self._FORMATS_INFO.get(source.tag, {})
+                    formats.append({
+                        'format_id': 'http-%s' % source.tag,
+                        'url': video_url,
+                        'width': format_info.get('width'),
+                        'height': format_info.get('height'),
+                    })
+        self._sort_formats(formats, ('width', 'height', 'tbr', 'format_id'))
+
+        thumbnails = []
+        card_sizes = xpath_element(video_data, 'titleCardSizes')
+        if card_sizes is not None:
+            for size in card_sizes:
+                path = xpath_text(size, 'path')
+                if not path:
+                    continue
+                width = int_or_none(size.get('width'))
+                thumbnails.append({
+                    'id': width,
+                    'url': path,
+                    'width': width,
+                })
+
+        return {
+            'id': video_id,
+            'title': title,
+            'duration': parse_duration(xpath_element(video_data, 'duration/tv14')),
+            'formats': formats,
+            'thumbnails': thumbnails,
+        }
--- a/youtube_dl/extractor/nationalgeographic.py
+++ b/youtube_dl/extractor/nationalgeographic.py
@ -48,7 +48,7 @@ class NationalGeographicIE(InfoExtractor):
        theplatform_id = url_basename(content.attrib.get('url'))

        return self.url_result(smuggle_url(
-            'http://link.theplatform.com/s/ngs/%s?format=SMIL&formats=MPEG4&manifest=f4m' % theplatform_id,
+            'http://link.theplatform.com/s/ngs/%s?formats=MPEG4&manifest=f4m' % theplatform_id,
            # For some reason, the normal links don't work and we must force
            # the use of f4m
            {'force_smil_url': True}))
--- a/youtube_dl/extractor/nbc.py
+++ b/youtube_dl/extractor/nbc.py
@ -3,13 +3,16 @@ from __future__ import unicode_literals
 import re

 from .common import InfoExtractor
-from ..compat import compat_HTTPError
+from .theplatform import ThePlatformIE
 from ..utils import (
-    ExtractorError,
    find_xpath_attr,
    lowercase_escape,
    smuggle_url,
    unescapeHTML,
+    update_url_query,
+    int_or_none,
+    HEADRequest,
+    parse_iso8601,
 )


@ -131,10 +134,10 @@ class NBCSportsIE(InfoExtractor):
            NBCSportsVPlayerIE._extract_url(webpage), 'NBCSportsVPlayer')


-class NBCNewsIE(InfoExtractor):
+class NBCNewsIE(ThePlatformIE):
    _VALID_URL = r'''(?x)https?://(?:www\.)?nbcnews\.com/
        (?:video/.+?/(?P<id>\d+)|
-        (?:watch|feature|nightly-news)/[^/]+/(?P<title>.+))
+        ([^/]+/)*(?P<display_id>[^/?]+))
        '''

    _TESTS = [
@ -149,15 +152,14 @@ class NBCNewsIE(InfoExtractor):
            },
        },
        {
-            'url': 'http://www.nbcnews.com/feature/edward-snowden-interview/how-twitter-reacted-snowden-interview-n117236',
-            'md5': 'b2421750c9f260783721d898f4c42063',
+            'url': 'http://www.nbcnews.com/watch/nbcnews-com/how-twitter-reacted-to-the-snowden-interview-269389891880',
+            'md5': 'af1adfa51312291a017720403826bb64',
            'info_dict': {
-                'id': 'I1wpAI_zmhsQ',
+                'id': '269389891880',
                'ext': 'mp4',
                'title': 'How Twitter Reacted To The Snowden Interview',
                'description': 'md5:65a0bd5d76fe114f3c2727aa3a81fe64',
            },
-            'add_ie': ['ThePlatform'],
        },
        {
            'url': 'http://www.nbcnews.com/feature/dateline-full-episodes/full-episode-family-business-n285156',
@ -168,17 +170,29 @@ class NBCNewsIE(InfoExtractor):
                'title': 'FULL EPISODE: Family Business',
                'description': 'md5:757988edbaae9d7be1d585eb5d55cc04',
            },
+            'skip': 'This page is unavailable.',
        },
        {
            'url': 'http://www.nbcnews.com/nightly-news/video/nightly-news-with-brian-williams-full-broadcast-february-4-394064451844',
-            'md5': 'b5dda8cddd8650baa0dcb616dd2cf60d',
+            'md5': '73135a2e0ef819107bbb55a5a9b2a802',
            'info_dict': {
-                'id': 'sekXqyTVnmN3',
+                'id': '394064451844',
                'ext': 'mp4',
                'title': 'Nightly News with Brian Williams Full Broadcast (February 4)',
                'description': 'md5:1c10c1eccbe84a26e5debb4381e2d3c5',
            },
        },
+        {
+            'url': 'http://www.nbcnews.com/business/autos/volkswagen-11-million-vehicles-could-have-suspect-software-emissions-scandal-n431456',
+            'md5': 'a49e173825e5fcd15c13fc297fced39d',
+            'info_dict': {
+                'id': '529953347624',
+                'ext': 'mp4',
+                'title': 'Volkswagen U.S. Chief: We \'Totally Screwed Up\'',
+                'description': 'md5:d22d1281a24f22ea0880741bb4dd6301',
+            },
+            'expected_warnings': ['http-6000 is not available']
+        },
        {
            'url': 'http://www.nbcnews.com/watch/dateline/full-episode--deadly-betrayal-386250819952',
            'only_matching': True,
@ -202,49 +216,80 @@ class NBCNewsIE(InfoExtractor):
            }
        else:
            # "feature" and "nightly-news" pages use theplatform.com
-            title = mobj.group('title')
-            webpage = self._download_webpage(url, title)
+            display_id = mobj.group('display_id')
+            webpage = self._download_webpage(url, display_id)
+            info = None
            bootstrap_json = self._search_regex(
-                r'var\s+(?:bootstrapJson|playlistData)\s*=\s*({.+});?\s*$',
-                webpage, 'bootstrap json', flags=re.MULTILINE)
-            bootstrap = self._parse_json(bootstrap_json, video_id)
-            info = bootstrap['results'][0]['video']
-            mpxid = info['mpxId']
+                r'(?m)var\s+(?:bootstrapJson|playlistData)\s*=\s*({.+});?\s*$',
+                webpage, 'bootstrap json', default=None)
+            if bootstrap_json:
+                bootstrap = self._parse_json(bootstrap_json, display_id)
+                info = bootstrap['results'][0]['video']
+            else:
+                player_instance_json = self._search_regex(
+                    r'videoObj\s*:\s*({.+})', webpage, 'player instance')
+                info = self._parse_json(player_instance_json, display_id)
+            video_id = info['mpxId']
+            title = info['title']

-            base_urls = [
-                info['fallbackPlaylistUrl'],
-                info['associatedPlaylistUrl'],
-            ]
+            subtitles = {}
+            caption_links = info.get('captionLinks')
+            if caption_links:
+                for (sub_key, sub_ext) in (('smpte-tt', 'ttml'), ('web-vtt', 'vtt'), ('srt', 'srt')):
+                    sub_url = caption_links.get(sub_key)
+                    if sub_url:
+                        subtitles.setdefault('en', []).append({
+                            'url': sub_url,
+                            'ext': sub_ext,
+                        })

-            for base_url in base_urls:
-                if not base_url:
+            formats = []
+            for video_asset in info['videoAssets']:
+                video_url = video_asset.get('publicUrl')
+                if not video_url:
                    continue
-                playlist_url = base_url + '?form=MPXNBCNewsAPI'
-
-                try:
-                    all_videos = self._download_json(playlist_url, title)
-                except ExtractorError as ee:
-                    if isinstance(ee.cause, compat_HTTPError):
-                        continue
-                    raise
-
-                if not all_videos or 'videos' not in all_videos:
+                container = video_asset.get('format')
+                asset_type = video_asset.get('assetType') or ''
+                if container == 'ISM' or asset_type == 'FireTV-Once':
                    continue
-
-                try:
-                    info = next(v for v in all_videos['videos'] if v['mpxId'] == mpxid)
-                    break
-                except StopIteration:
-                    continue
-
-            if info is None:
-                raise ExtractorError('Could not find video in playlists')
+                elif asset_type == 'OnceURL':
+                    tp_formats, tp_subtitles = self._extract_theplatform_smil(
+                        video_url, video_id)
+                    formats.extend(tp_formats)
+                    subtitles = self._merge_subtitles(subtitles, tp_subtitles)
+                else:
+                    tbr = int_or_none(video_asset.get('bitRate'), 1000)
+                    format_id = 'http%s' % ('-%d' % tbr if tbr else '')
+                    video_url = update_url_query(
+                        video_url, {'format': 'redirect'})
+                    # resolve the url so that we can check availability and detect the correct extension
+                    head = self._request_webpage(
+                        HEADRequest(video_url), video_id,
+                        'Checking %s url' % format_id,
+                        '%s is not available' % format_id,
+                        fatal=False)
+                    if head:
+                        video_url = head.geturl()
+                        formats.append({
+                            'format_id': format_id,
+                            'url': video_url,
+                            'width': int_or_none(video_asset.get('width')),
+                            'height': int_or_none(video_asset.get('height')),
+                            'tbr': tbr,
+                            'container': video_asset.get('format'),
+                        })
+            self._sort_formats(formats)

            return {
-                '_type': 'url',
-                # We get the best quality video
-                'url': info['videoAssets'][-1]['publicUrl'],
-                'ie_key': 'ThePlatform',
+                'id': video_id,
+                'title': title,
+                'description': info.get('description'),
+                'thumbnail': info.get('description'),
+                'thumbnail': info.get('thumbnail'),
+                'duration': int_or_none(info.get('duration')),
+                'timestamp': parse_iso8601(info.get('pubDate')),
+                'formats': formats,
+                'subtitles': subtitles,
            }


--- a/youtube_dl/extractor/noz.py
+++ b/youtube_dl/extractor/noz.py
@ -2,7 +2,10 @@
 from __future__ import unicode_literals

 from .common import InfoExtractor
-from ..compat import compat_urllib_parse_unquote
+from ..compat import (
+    compat_urllib_parse_unquote,
+    compat_xpath,
+)
 from ..utils import (
    int_or_none,
    find_xpath_attr,
@ -47,7 +50,7 @@ class NozIE(InfoExtractor):
        duration = int_or_none(xpath_text(
            doc, './/article/movie/file/duration'))
        formats = []
-        for qnode in doc.findall('.//article/movie/file/qualities/qual'):
+        for qnode in doc.findall(compat_xpath('.//article/movie/file/qualities/qual')):
            http_url_ele = find_xpath_attr(
                qnode, './html_urls/video_url', 'format', 'video/mp4')
            http_url = http_url_ele.text if http_url_ele is not None else None
--- a/youtube_dl/extractor/once.py
+++ b/youtube_dl/extractor/once.py
@ -0,0 +1,38 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+
+
+class OnceIE(InfoExtractor):
+    _VALID_URL = r'https?://once\.unicornmedia\.com/now/[^/]+/[^/]+/(?P<domain_id>[^/]+)/(?P<application_id>[^/]+)/(?:[^/]+/)?(?P<media_item_id>[^/]+)/content\.(?:once|m3u8|mp4)'
+    ADAPTIVE_URL_TEMPLATE = 'http://once.unicornmedia.com/now/master/playlist/%s/%s/%s/content.m3u8'
+    PROGRESSIVE_URL_TEMPLATE = 'http://once.unicornmedia.com/now/media/progressive/%s/%s/%s/%s/content.mp4'
+
+    def _extract_once_formats(self, url):
+        domain_id, application_id, media_item_id = re.match(
+            OnceIE._VALID_URL, url).groups()
+        formats = self._extract_m3u8_formats(
+            self.ADAPTIVE_URL_TEMPLATE % (
+                domain_id, application_id, media_item_id),
+            media_item_id, 'mp4', m3u8_id='hls', fatal=False)
+        progressive_formats = []
+        for adaptive_format in formats:
+            rendition_id = self._search_regex(
+                r'/now/media/playlist/[^/]+/[^/]+/([^/]+)',
+                adaptive_format['url'], 'redition id', default=None)
+            if rendition_id:
+                progressive_format = adaptive_format.copy()
+                progressive_format.update({
+                    'url': self.PROGRESSIVE_URL_TEMPLATE % (
+                        domain_id, application_id, rendition_id, media_item_id),
+                    'format_id': adaptive_format['format_id'].replace(
+                        'hls', 'http'),
+                    'protocol': 'http',
+                })
+                progressive_formats.append(progressive_format)
+        self._check_formats(progressive_formats, media_item_id)
+        formats.extend(progressive_formats)
+        return formats
--- a/youtube_dl/extractor/sbs.py
+++ b/youtube_dl/extractor/sbs.py
@ -2,6 +2,10 @@
 from __future__ import unicode_literals

 from .common import InfoExtractor
+from ..utils import (
+    smuggle_url,
+    ExtractorError,
+)


 class SBSIE(InfoExtractor):
@ -31,21 +35,28 @@ class SBSIE(InfoExtractor):

    def _real_extract(self, url):
        video_id = self._match_id(url)
+        player_params = self._download_json(
+            'http://www.sbs.com.au/api/video_pdkvars/id/%s?form=json' % video_id, video_id)

-        webpage = self._download_webpage(
-            'http://www.sbs.com.au/ondemand/video/single/%s?context=web' % video_id, video_id)
-
-        player_params = self._parse_json(
-            self._search_regex(
-                r'(?s)var\s+playerParams\s*=\s*({.+?});', webpage, 'playerParams'),
-            video_id)
+        error = player_params.get('error')
+        if error:
+            error_message = 'Sorry, The video you are looking for does not exist.'
+            video_data = error.get('results') or {}
+            error_code = error.get('errorCode')
+            if error_code == 'ComingSoon':
+                error_message = '%s is not yet available.' % video_data.get('title', '')
+            elif error_code in ('Forbidden', 'intranetAccessOnly'):
+                error_message = 'Sorry, This video cannot be accessed via this website'
+            elif error_code == 'Expired':
+                error_message = 'Sorry, %s is no longer available.' % video_data.get('title', '')
+            raise ExtractorError('%s said: %s' % (self.IE_NAME, error_message), expected=True)

        urls = player_params['releaseUrls']
-        theplatform_url = (urls.get('progressive') or urls.get('standard') or
-                           urls.get('html') or player_params['relatedItemsURL'])
+        theplatform_url = (urls.get('progressive') or urls.get('html') or
+                           urls.get('standard') or player_params['relatedItemsURL'])

        return {
            '_type': 'url_transparent',
            'id': video_id,
-            'url': theplatform_url,
+            'url': smuggle_url(theplatform_url, {'force_smil_url': True}),
        }
--- a/youtube_dl/extractor/theplatform.py
+++ b/youtube_dl/extractor/theplatform.py
@ -8,13 +8,12 @@ import binascii
 import hashlib


-from .common import InfoExtractor
+from .once import OnceIE
 from ..compat import (
    compat_parse_qs,
    compat_urllib_parse_urlparse,
 )
 from ..utils import (
-    determine_ext,
    ExtractorError,
    float_or_none,
    int_or_none,
@ -29,26 +28,27 @@ default_ns = 'http://www.w3.org/2005/SMIL21/Language'
 _x = lambda p: xpath_with_ns(p, {'smil': default_ns})


-class ThePlatformBaseIE(InfoExtractor):
+class ThePlatformBaseIE(OnceIE):
    def _extract_theplatform_smil(self, smil_url, video_id, note='Downloading SMIL data'):
-        meta = self._download_xml(smil_url, video_id, note=note)
-        error_element = find_xpath_attr(
-            meta, _x('.//smil:ref'), 'src',
-            'http://link.theplatform.com/s/errorFiles/Unavailable.mp4')
-        if error_element is not None:
+        meta = self._download_xml(smil_url, video_id, note=note, query={'format': 'SMIL'})
+        error_element = find_xpath_attr(meta, _x('.//smil:ref'), 'src')
+        if error_element is not None and error_element.attrib['src'].startswith(
+                'http://link.theplatform.com/s/errorFiles/Unavailable.'):
            raise ExtractorError(error_element.attrib['abstract'], expected=True)

-        formats = self._parse_smil_formats(
+        smil_formats = self._parse_smil_formats(
            meta, smil_url, video_id, namespace=default_ns,
            # the parameters are from syfy.com, other sites may use others,
            # they also work for nbc.com
            f4m_params={'g': 'UXWGVKRWHFSP', 'hdcore': '3.0.3'},
            transform_rtmp_url=lambda streamer, src: (streamer, 'mp4:' + src))

-        for _format in formats:
-            ext = determine_ext(_format['url'])
-            if ext == 'once':
-                _format['ext'] = 'mp4'
+        formats = []
+        for _format in smil_formats:
+            if OnceIE.suitable(_format['url']):
+                formats.extend(self._extract_once_formats(_format['url']))
+            else:
+                formats.append(_format)

        self._sort_formats(formats)

@ -125,7 +125,7 @@ class ThePlatformIE(ThePlatformBaseIE):
        'only_matching': True,
    }, {
        'url': 'http://player.theplatform.com/p/2E2eJC/nbcNewsOffsite?guid=tdy_or_siri_150701',
-        'md5': '734f3790fb5fc4903da391beeebc4836',
+        'md5': 'fb96bb3d85118930a5b055783a3bd992',
        'info_dict': {
            'id': 'tdy_or_siri_150701',
            'ext': 'mp4',
@ -135,7 +135,6 @@ class ThePlatformIE(ThePlatformBaseIE):
            'thumbnail': 're:^https?://.*\.jpg$',
            'timestamp': 1435752600,
            'upload_date': '20150701',
-            'categories': ['Today/Shows/Orange Room', 'Today/Sections/Money', 'Today/Topics/Tech', "Today/Topics/Editor's picks"],
        },
    }, {
        # From http://www.nbc.com/the-blacklist/video/sir-crispin-crandall/2928790?onid=137781#vc137781=1
@ -213,7 +212,7 @@ class ThePlatformIE(ThePlatformBaseIE):
                webpage, 'smil url', group='url')
            path = self._search_regex(
                r'link\.theplatform\.com/s/((?:[^/?#&]+/)+[^/?#&]+)', smil_url, 'path')
-            smil_url += '?' if '?' not in smil_url else '&' + 'formats=m3u,mpeg4&format=SMIL'
+            smil_url += '?' if '?' not in smil_url else '&' + 'formats=m3u,mpeg4'
        elif mobj.group('config'):
            config_url = url + '&form=json'
            config_url = config_url.replace('swf/', 'config/')
@ -223,9 +222,9 @@ class ThePlatformIE(ThePlatformBaseIE):
                release_url = config['releaseUrl']
            else:
                release_url = 'http://link.theplatform.com/s/%s?mbr=true' % path
-            smil_url = release_url + '&format=SMIL&formats=MPEG4&manifest=f4m'
+            smil_url = release_url + '&formats=MPEG4&manifest=f4m'
        else:
-            smil_url = 'http://link.theplatform.com/s/%s/meta.smil?format=smil&mbr=true' % path
+            smil_url = 'http://link.theplatform.com/s/%s?mbr=true' % path

        sig = smuggled_data.get('sig')
        if sig:
@ -250,7 +249,7 @@ class ThePlatformFeedIE(ThePlatformBaseIE):
    _TEST = {
        # From http://player.theplatform.com/p/7wvmTC/MSNBCEmbeddedOffSite?guid=n_hardball_5biden_140207
        'url': 'http://feed.theplatform.com/f/7wvmTC/msnbc_video-p-test?form=json&pretty=true&range=-40&byGuid=n_hardball_5biden_140207',
-        'md5': '22d2b84f058d3586efcd99e57d59d314',
+        'md5': '6e32495b5073ab414471b615c5ded394',
        'info_dict': {
            'id': 'n_hardball_5biden_140207',
            'ext': 'mp4',
@ -280,7 +279,7 @@ class ThePlatformFeedIE(ThePlatformBaseIE):
        first_video_id = None
        duration = None
        for item in entry['media$content']:
-            smil_url = item['plfile$url'] + '&format=SMIL&mbr=true'
+            smil_url = item['plfile$url'] + '&mbr=true'
            cur_video_id = ThePlatformIE._match_id(smil_url)
            if first_video_id is None:
                first_video_id = cur_video_id
--- a/youtube_dl/extractor/tv3.py
+++ b/youtube_dl/extractor/tv3.py
@ -0,0 +1,33 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+
+
+class TV3IE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?tv3\.co\.nz/(?P<id>[^/]+)/tabid/\d+/articleID/\d+/MCat/\d+/Default\.aspx'
+    _TEST = {
+        'url': 'http://www.tv3.co.nz/MOTORSPORT-SRS-SsangYong-Hampton-Downs-Round-3/tabid/3692/articleID/121615/MCat/2915/Default.aspx',
+        'info_dict': {
+            'id': '4659127992001',
+            'ext': 'mp4',
+            'title': 'CRC Motorsport: SRS SsangYong Hampton Downs Round 3 - S2015 Ep3',
+            'description': 'SsangYong Racing Series returns for Round 3 with drivers from New Zealand and Australia taking to the grid at Hampton Downs raceway.',
+            'uploader_id': '3812193411001',
+            'upload_date': '20151213',
+            'timestamp': 1449975272,
+        },
+        'expected_warnings': [
+            'Failed to download MPD manifest'
+        ],
+        'params': {
+            'skip_download': True,
+        },
+    }
+    BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/3812193411001/default_default/index.html?videoId=%s'
+
+    def _real_extract(self, url):
+        display_id = self._match_id(url)
+        webpage = self._download_webpage(url, display_id)
+        brightcove_id = self._search_regex(r'<param\s*name="@videoPlayer"\s*value="(\d+)"', webpage, 'brightcove id')
+        return self.url_result(self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id, 'BrightcoveNew', brightcove_id)
--- a/youtube_dl/extractor/udemy.py
+++ b/youtube_dl/extractor/udemy.py
@ -144,7 +144,8 @@ class UdemyIE(InfoExtractor):
        webpage = self._download_webpage(url, lecture_id)

        course_id = self._search_regex(
-            r'data-course-id=["\'](\d+)', webpage, 'course id')
+            (r'data-course-id=["\'](\d+)', r'&quot;id&quot;\s*:\s*(\d+)'),
+            webpage, 'course id')

        try:
            lecture = self._download_lecture(course_id, lecture_id)
--- a/youtube_dl/extractor/wistia.py
+++ b/youtube_dl/extractor/wistia.py
@ -4,6 +4,7 @@ from .common import InfoExtractor
 from ..utils import (
    ExtractorError,
    sanitized_Request,
+    int_or_none,
 )


@ -18,6 +19,9 @@ class WistiaIE(InfoExtractor):
            'id': 'sh7fpupwlt',
            'ext': 'mov',
            'title': 'Being Resourceful',
+            'description': 'a Clients From Hell Video Series video from worldwidewebhosting',
+            'upload_date': '20131204',
+            'timestamp': 1386185018,
            'duration': 117,
        },
    }
@ -32,35 +36,43 @@ class WistiaIE(InfoExtractor):
            raise ExtractorError('Error while getting the playlist',
                                 expected=True)
        data = data_json['media']
+        title = data['name']

        formats = []
        thumbnails = []
        for a in data['assets']:
+            astatus = a.get('status')
            atype = a.get('type')
-            if atype == 'still':
+            if (astatus is not None and astatus != 2) or atype == 'preview':
+                continue
+            elif atype in ('still', 'still_image'):
                thumbnails.append({
                    'url': a['url'],
                    'resolution': '%dx%d' % (a['width'], a['height']),
                })
-                continue
-            if atype == 'preview':
-                continue
-            formats.append({
-                'format_id': atype,
-                'url': a['url'],
-                'width': a['width'],
-                'height': a['height'],
-                'filesize': a['size'],
-                'ext': a['ext'],
-                'preference': 1 if atype == 'original' else None,
-            })
+            else:
+                formats.append({
+                    'format_id': atype,
+                    'url': a['url'],
+                    'tbr': int_or_none(a.get('bitrate')),
+                    'vbr': int_or_none(a.get('opt_vbitrate')),
+                    'width': int_or_none(a.get('width')),
+                    'height': int_or_none(a.get('height')),
+                    'filesize': int_or_none(a.get('size')),
+                    'vcodec': a.get('codec'),
+                    'container': a.get('container'),
+                    'ext': a.get('ext'),
+                    'preference': 1 if atype == 'original' else None,
+                })

        self._sort_formats(formats)

        return {
            'id': video_id,
-            'title': data['name'],
+            'title': title,
+            'description': data.get('seoDescription'),
            'formats': formats,
            'thumbnails': thumbnails,
-            'duration': data.get('duration'),
+            'duration': int_or_none(data.get('duration')),
+            'timestamp': int_or_none(data.get('createdAt')),
        }
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@ -309,6 +309,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
        '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},

        # Apple HTTP Live Streaming
+        '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
        '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
        '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
        '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@ -35,6 +35,7 @@ import xml.etree.ElementTree
 import zlib

 from .compat import (
+    compat_HTMLParser,
    compat_basestring,
    compat_chr,
    compat_etree_fromstring,
@ -49,6 +50,7 @@ from .compat import (
    compat_urllib_parse_urlparse,
    compat_urllib_request,
    compat_urlparse,
+    compat_xpath,
    shlex_quote,
 )

@ -164,12 +166,7 @@ if sys.version_info >= (2, 7):
        return node.find(expr)
 else:
    def find_xpath_attr(node, xpath, key, val=None):
-        # Here comes the crazy part: In 2.6, if the xpath is a unicode,
-        # .//node does not match if a node is a direct child of . !
-        if isinstance(xpath, compat_str):
-            xpath = xpath.encode('ascii')
-
-        for f in node.findall(xpath):
+        for f in node.findall(compat_xpath(xpath)):
            if key not in f.attrib:
                continue
            if val is None or f.attrib.get(key) == val:
@ -194,9 +191,7 @@ def xpath_with_ns(path, ns_map):

 def xpath_element(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
    def _find_xpath(xpath):
-        if sys.version_info < (2, 7):  # Crazy 2.6
-            xpath = xpath.encode('ascii')
-        return node.find(xpath)
+        return node.find(compat_xpath(xpath))

    if isinstance(xpath, (str, compat_str)):
        n = _find_xpath(xpath)
@ -273,6 +268,38 @@ def get_element_by_attribute(attribute, value, html):
    return unescapeHTML(res)


+class HTMLAttributeParser(compat_HTMLParser):
+    """Trivial HTML parser to gather the attributes for a single element"""
+    def __init__(self):
+        self.attrs = {}
+        compat_HTMLParser.__init__(self)
+
+    def handle_starttag(self, tag, attrs):
+        self.attrs = dict(attrs)
+
+
+def extract_attributes(html_element):
+    """Given a string for an HTML element such as
+    <el
+         a="foo" B="bar" c="&98;az" d=boz
+         empty= noval entity="&amp;"
+         sq='"' dq="'"
+    >
+    Decode and return a dictionary of attributes.
+    {
+        'a': 'foo', 'b': 'bar', c: 'baz', d: 'boz',
+        'empty': '', 'noval': None, 'entity': '&',
+        'sq': '"', 'dq': '\''
+    }.
+    NB HTMLParser is stricter in Python 2.6 & 3.2 than in later versions,
+    but the cases in the unit test will work for all of 2.6, 2.7, 3.2-3.5.
+    """
+    parser = HTMLAttributeParser()
+    parser.feed(html_element)
+    parser.close()
+    return parser.attrs
+
+
 def clean_html(html):
    """Clean an HTML snippet into a readable string"""

@ -1319,7 +1346,7 @@ def format_bytes(bytes):
 def lookup_unit_table(unit_table, s):
    units_re = '|'.join(re.escape(u) for u in unit_table)
    m = re.match(
-        r'(?P<num>[0-9]+(?:[,.][0-9]*)?)\s*(?P<unit>%s)' % units_re, s)
+        r'(?P<num>[0-9]+(?:[,.][0-9]*)?)\s*(?P<unit>%s)\b' % units_re, s)
    if not m:
        return None
    num_str = m.group('num').replace(',', '.')
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@ -1,3 +1,3 @@
 from __future__ import unicode_literals

-__version__ = '2016.03.14'
+__version__ = '2016.03.18'