Merge pull request #10 from rg3/master

update 24 may
2016-05-24 23:50:42 +05:30 · 2016-05-24 23:50:42 +05:30 · d030e9ca0b
commit d030e9ca0b
parent db00818fad 688c634b7d
26 changed files with 600 additions and 263 deletions
--- a/1
+++ b/1
@ -172,3 +172,4 @@ blahgeek
 Kevin Deldycke
 inondle
 Tomáš Čech
 Déstin Reed
--- a/README.md
+++ b/README.md
@ -693,6 +693,10 @@ hash -r
 Again, from then on you'll be able to update with `sudo youtube-dl -U`.
 ### youtube-dl is extremely slow to start on Windows
 Add a file exclusion for `youtube-dl.exe` in Windows Defender settings.
 ### I'm getting an error `Unable to extract OpenGraph title` on YouTube playlists
 YouTube changed their playlist format in March 2014 and later on, so you'll need at least youtube-dl 2014.07.25 to download all YouTube videos.
--- a/test/test_compat.py
+++ b/test/test_compat.py
@ -103,6 +103,12 @@ class TestCompat(unittest.TestCase):
        self.assertTrue(isinstance(doc.find('chinese').text, compat_str))
        self.assertTrue(isinstance(doc.find('foo/bar').text, compat_str))
    def test_compat_etree_fromstring_doctype(self):
        xml = '''<?xml version="1.0"?>
 <!DOCTYPE smil PUBLIC "-//W3C//DTD SMIL 2.0//EN" "http://www.w3.org/2001/SMIL20/SMIL20.dtd">
 <smil xmlns="http://www.w3.org/2001/SMIL20/Language"></smil>'''
        compat_etree_fromstring(xml)
    def test_struct_unpack(self):
        self.assertEqual(compat_struct_unpack('!B', b'\x00'), (0,))
--- a/youtube_dl/compat.py
+++ b/youtube_dl/compat.py
@ -245,13 +245,20 @@ try:
 except ImportError:  # Python 2.6
    from xml.parsers.expat import ExpatError as compat_xml_parse_error
 etree = xml.etree.ElementTree
 class _TreeBuilder(etree.TreeBuilder):
    def doctype(self, name, pubid, system):
        pass
 if sys.version_info[0] >= 3:
-    compat_etree_fromstring = xml.etree.ElementTree.fromstring
+    def compat_etree_fromstring(text):
        return etree.XML(text, parser=etree.XMLParser(target=_TreeBuilder()))
 else:
    # python 2.x tries to encode unicode strings with ascii (see the
    # XMLParser._fixtext method)
    etree = xml.etree.ElementTree
    try:
        _etree_iter = etree.Element.iter
    except AttributeError:  # Python <=2.6
@ -265,7 +272,7 @@ else:
    # 2.7 source
    def _XML(text, parser=None):
        if not parser:
-            parser = etree.XMLParser(target=etree.TreeBuilder())
+            parser = etree.XMLParser(target=_TreeBuilder())
        parser.feed(text)
        return parser.close()
@ -277,7 +284,7 @@ else:
        return el
    def compat_etree_fromstring(text):
-        doc = _XML(text, parser=etree.XMLParser(target=etree.TreeBuilder(element_factory=_element_factory)))
+        doc = _XML(text, parser=etree.XMLParser(target=_TreeBuilder(element_factory=_element_factory)))
        for el in _etree_iter(doc):
            if el.text is not None and isinstance(el.text, bytes):
                el.text = el.text.decode('utf-8')
--- a/youtube_dl/extractor/bandcamp.py
+++ b/youtube_dl/extractor/bandcamp.py
@ -29,7 +29,7 @@ class BandcampIE(InfoExtractor):
        '_skip': 'There is a limit of 200 free downloads / month for the test song'
    }, {
        'url': 'http://benprunty.bandcamp.com/track/lanius-battle',
-        'md5': '2b68e5851514c20efdff2afc5603b8b4',
+        'md5': '73d0b3171568232574e45652f8720b5c',
        'info_dict': {
            'id': '2650410135',
            'ext': 'mp3',
@ -48,6 +48,10 @@ class BandcampIE(InfoExtractor):
            if m_trackinfo:
                json_code = m_trackinfo.group(1)
                data = json.loads(json_code)[0]
                track_id = compat_str(data['id'])
                if not data.get('file'):
                    raise ExtractorError('Not streamable', video_id=track_id, expected=True)
                formats = []
                for format_id, format_url in data['file'].items():
@ -64,7 +68,7 @@ class BandcampIE(InfoExtractor):
                self._sort_formats(formats)
                return {
-                    'id': compat_str(data['id']),
+                    'id': track_id,
                    'title': data['title'],
                    'formats': formats,
                    'duration': float_or_none(data.get('duration')),
--- a/youtube_dl/extractor/byutv.py
+++ b/youtube_dl/extractor/byutv.py
@ -11,6 +11,7 @@ class BYUtvIE(InfoExtractor):
    _VALID_URL = r'^https?://(?:www\.)?byutv.org/watch/[0-9a-f-]+/(?P<video_id>[^/?#]+)'
    _TEST = {
        'url': 'http://www.byutv.org/watch/6587b9a3-89d2-42a6-a7f7-fd2f81840a7d/studio-c-season-5-episode-5',
        'md5': '05850eb8c749e2ee05ad5a1c34668493',
        'info_dict': {
            'id': 'studio-c-season-5-episode-5',
            'ext': 'mp4',
@ -21,7 +22,8 @@ class BYUtvIE(InfoExtractor):
        },
        'params': {
            'skip_download': True,
-        }
+        },
        'add_ie': ['Ooyala'],
    }
    def _real_extract(self, url):
--- a/youtube_dl/extractor/cbs.py
+++ b/youtube_dl/extractor/cbs.py
@ -1,5 +1,7 @@
 from __future__ import unicode_literals
 import re
 from .theplatform import ThePlatformIE
 from ..utils import (
    xpath_text,
@ -21,7 +23,7 @@ class CBSBaseIE(ThePlatformIE):
 class CBSIE(CBSBaseIE):
-    _VALID_URL = r'https?://(?:www\.)?(?:cbs\.com/shows/[^/]+/(?:video|artist)|colbertlateshow\.com/(?:video|podcasts))/[^/]+/(?P<id>[^/]+)'
+    _VALID_URL = r'(?:cbs:(?P<content_id>\w+)|https?://(?:www\.)?(?:cbs\.com/shows/[^/]+/(?:video|artist)|colbertlateshow\.com/(?:video|podcasts))/[^/]+/(?P<display_id>[^/]+))'
    _TESTS = [{
        'url': 'http://www.cbs.com/shows/garth-brooks/video/_u7W953k6la293J7EPTd9oHkSPs6Xn6_/connect-chat-feat-garth-brooks/',
@ -66,11 +68,12 @@ class CBSIE(CBSBaseIE):
    TP_RELEASE_URL_TEMPLATE = 'http://link.theplatform.com/s/dJ5BDC/%s?mbr=true'
    def _real_extract(self, url):
-        display_id = self._match_id(url)
+        content_id, display_id = re.match(self._VALID_URL, url).groups()
-        webpage = self._download_webpage(url, display_id)
+        if not content_id:
-        content_id = self._search_regex(
+            webpage = self._download_webpage(url, display_id)
-            [r"video\.settings\.content_id\s*=\s*'([^']+)';", r"cbsplayer\.contentId\s*=\s*'([^']+)';"],
+            content_id = self._search_regex(
-            webpage, 'content id')
+                [r"video\.settings\.content_id\s*=\s*'([^']+)';", r"cbsplayer\.contentId\s*=\s*'([^']+)';"],
                webpage, 'content id')
        items_data = self._download_xml(
            'http://can.cbs.com/thunder/player/videoPlayerService.php',
            content_id, query={'partner': 'cbs', 'contentId': content_id})
--- a/youtube_dl/extractor/espn.py
+++ b/youtube_dl/extractor/espn.py
@ -8,6 +8,7 @@ class ESPNIE(InfoExtractor):
    _VALID_URL = r'https?://espn\.go\.com/(?:[^/]+/)*(?P<id>[^/]+)'
    _TESTS = [{
        'url': 'http://espn.go.com/video/clip?id=10365079',
        'md5': '60e5d097a523e767d06479335d1bdc58',
        'info_dict': {
            'id': 'FkYWtmazr6Ed8xmvILvKLWjd4QvYZpzG',
            'ext': 'mp4',
@ -15,21 +16,22 @@ class ESPNIE(InfoExtractor):
            'description': None,
        },
        'params': {
            # m3u8 download
            'skip_download': True,
        },
        'add_ie': ['OoyalaExternal'],
    }, {
        # intl video, from http://www.espnfc.us/video/mls-highlights/150/video/2743663/must-see-moments-best-of-the-mls-season
        'url': 'http://espn.go.com/video/clip?id=2743663',
        'md5': 'f4ac89b59afc7e2d7dbb049523df6768',
        'info_dict': {
            'id': '50NDFkeTqRHB0nXBOK-RGdSG5YQPuxHg',
            'ext': 'mp4',
            'title': 'Must-See Moments: Best of the MLS season',
        },
        'params': {
            # m3u8 download
            'skip_download': True,
        },
        'add_ie': ['OoyalaExternal'],
    }, {
        'url': 'https://espn.go.com/video/iframe/twitter/?cms=espn&id=10365079',
        'only_matching': True,
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@ -617,6 +617,10 @@ from .qqmusic import (
    QQMusicPlaylistIE,
 )
 from .r7 import R7IE
 from .radiocanada import (
    RadioCanadaIE,
    RadioCanadaAudioVideoIE,
 )
 from .radiode import RadioDeIE
 from .radiojavan import RadioJavanIE
 from .radiobremen import RadioBremenIE
@ -630,6 +634,7 @@ from .rds import RDSIE
 from .redtube import RedTubeIE
 from .regiotv import RegioTVIE
 from .restudy import RestudyIE
 from .reuters import ReutersIE
 from .reverbnation import ReverbNationIE
 from .revision3 import Revision3IE
 from .rice import RICEIE
@ -941,7 +946,10 @@ from .vube import VubeIE
 from .vuclip import VuClipIE
 from .vulture import VultureIE
 from .walla import WallaIE
-from .washingtonpost import WashingtonPostIE
+from .washingtonpost import (
    WashingtonPostIE,
    WashingtonPostArticleIE,
 )
 from .wat import WatIE
 from .watchindianporn import WatchIndianPornIE
 from .wdr import (
--- a/youtube_dl/extractor/formula1.py
+++ b/youtube_dl/extractor/formula1.py
@ -13,7 +13,8 @@ class Formula1IE(InfoExtractor):
            'id': 'JvYXJpMzE6pArfHWm5ARp5AiUmD-gibV',
            'ext': 'flv',
            'title': 'Race highlights - Spain 2016',
-        }
+        },
        'add_ie': ['Ooyala'],
    }
    def _real_extract(self, url):
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@ -784,6 +784,19 @@ class GenericIE(InfoExtractor):
                'title': 'Rosetta #CometLanding webcast HL 10',
            }
        },
        # Another Livestream embed, without 'new.' in URL
        {
            'url': 'https://www.freespeech.org/',
            'info_dict': {
                'id': '123537347',
                'ext': 'mp4',
                'title': 're:^FSTV [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
            },
            'params': {
                # Live stream
                'skip_download': True,
            },
        },
        # LazyYT
        {
            'url': 'http://discourse.ubuntu.com/t/unity-8-desktop-mode-windows-on-mir/1986',
@ -1194,6 +1207,16 @@ class GenericIE(InfoExtractor):
                'uploader': 'Lake8737',
            }
        },
        # Duplicated embedded video URLs
        {
            'url': 'http://www.hudl.com/athlete/2538180/highlights/149298443',
            'info_dict': {
                'id': '149298443_480_16c25b74_2',
                'ext': 'mp4',
                'title': 'vs. Blue Orange Spring Game',
                'uploader': 'www.hudl.com',
            },
        },
    ]
    def report_following_redirect(self, new_url):
@ -1868,7 +1891,7 @@ class GenericIE(InfoExtractor):
            return self.url_result(self._proto_relative_url(mobj.group('url'), scheme='http:'), 'CondeNast')
        mobj = re.search(
-            r'<iframe[^>]+src="(?P<url>https?://new\.livestream\.com/[^"]+/player[^"]+)"',
+            r'<iframe[^>]+src="(?P<url>https?://(?:new\.)?livestream\.com/[^"]+/player[^"]+)"',
            webpage)
        if mobj is not None:
            return self.url_result(mobj.group('url'), 'Livestream')
@ -2111,7 +2134,7 @@ class GenericIE(InfoExtractor):
            raise UnsupportedError(url)
        entries = []
-        for video_url in found:
+        for video_url in orderedSet(found):
            video_url = unescapeHTML(video_url)
            video_url = video_url.replace('\\/', '/')
            video_url = compat_urlparse.urljoin(url, video_url)
--- a/youtube_dl/extractor/groupon.py
+++ b/youtube_dl/extractor/groupon.py
@ -14,6 +14,7 @@ class GrouponIE(InfoExtractor):
            'description': 'Studio kept at 105 degrees and 40% humidity with anti-microbial and anti-slip Flotex flooring; certified instructors',
        },
        'playlist': [{
            'md5': '42428ce8a00585f9bc36e49226eae7a1',
            'info_dict': {
                'id': 'fk6OhWpXgIQ',
                'ext': 'mp4',
@ -24,10 +25,11 @@ class GrouponIE(InfoExtractor):
                'uploader_id': 'groupon',
                'uploader': 'Groupon',
            },
            'add_ie': ['Youtube'],
        }],
        'params': {
            'skip_download': True,
-        }
+        },
    }
    _PROVIDERS = {
--- a/youtube_dl/extractor/howcast.py
+++ b/youtube_dl/extractor/howcast.py
@ -8,7 +8,7 @@ class HowcastIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?howcast\.com/videos/(?P<id>\d+)'
    _TEST = {
        'url': 'http://www.howcast.com/videos/390161-How-to-Tie-a-Square-Knot-Properly',
-        'md5': '8b743df908c42f60cf6496586c7f12c3',
+        'md5': '7d45932269a288149483144f01b99789',
        'info_dict': {
            'id': '390161',
            'ext': 'mp4',
@ -19,9 +19,9 @@ class HowcastIE(InfoExtractor):
            'duration': 56.823,
        },
        'params': {
            # m3u8 download
            'skip_download': True,
        },
        'add_ie': ['Ooyala'],
    }
    def _real_extract(self, url):
--- a/youtube_dl/extractor/lifenews.py
+++ b/youtube_dl/extractor/lifenews.py
@ -7,48 +7,53 @@ from .common import InfoExtractor
 from ..compat import compat_urlparse
 from ..utils import (
    determine_ext,
    int_or_none,
    remove_end,
    unified_strdate,
    ExtractorError,
    int_or_none,
    parse_iso8601,
    remove_end,
 )
 class LifeNewsIE(InfoExtractor):
-    IE_NAME = 'lifenews'
+    IE_NAME = 'life'
-    IE_DESC = 'LIFE | NEWS'
+    IE_DESC = 'Life.ru'
-    _VALID_URL = r'https?://lifenews\.ru/(?:mobile/)?(?P<section>news|video)/(?P<id>\d+)'
+    _VALID_URL = r'https?://life\.ru/t/[^/]+/(?P<id>\d+)'
    _TESTS = [{
        # single video embedded via video/source
-        'url': 'http://lifenews.ru/news/98736',
+        'url': 'https://life.ru/t/новости/98736',
        'md5': '77c95eaefaca216e32a76a343ad89d23',
        'info_dict': {
            'id': '98736',
            'ext': 'mp4',
            'title': 'Мужчина нашел дома архив оборонного завода',
            'description': 'md5:3b06b1b39b5e2bea548e403d99b8bf26',
            'timestamp': 1344154740,
            'upload_date': '20120805',
            'view_count': int,
        }
    }, {
        # single video embedded via iframe
-        'url': 'http://lifenews.ru/news/152125',
+        'url': 'https://life.ru/t/новости/152125',
        'md5': '77d19a6f0886cd76bdbf44b4d971a273',
        'info_dict': {
            'id': '152125',
            'ext': 'mp4',
            'title': 'В Сети появилось видео захвата «Правым сектором» колхозных полей ',
            'description': 'Жители двух поселков Днепропетровской области не простили радикалам угрозу лишения плодородных земель и пошли в лобовую. ',
            'timestamp': 1427961840,
            'upload_date': '20150402',
            'view_count': int,
        }
    }, {
        # two videos embedded via iframe
-        'url': 'http://lifenews.ru/news/153461',
+        'url': 'https://life.ru/t/новости/153461',
        'info_dict': {
            'id': '153461',
            'title': 'В Москве спасли потерявшегося медвежонка, который спрятался на дереве',
            'description': 'Маленький хищник не смог найти дорогу домой и обрел временное убежище на тополе недалеко от жилого массива, пока его не нашла соседская собака.',
-            'upload_date': '20150505',
+            'timestamp': 1430825520,
            'view_count': int,
        },
        'playlist': [{
            'md5': '9b6ef8bc0ffa25aebc8bdb40d89ab795',
@ -57,6 +62,7 @@ class LifeNewsIE(InfoExtractor):
                'ext': 'mp4',
                'title': 'В Москве спасли потерявшегося медвежонка, который спрятался на дереве (Видео 1)',
                'description': 'Маленький хищник не смог найти дорогу домой и обрел временное убежище на тополе недалеко от жилого массива, пока его не нашла соседская собака.',
                'timestamp': 1430825520,
                'upload_date': '20150505',
            },
        }, {
@ -66,22 +72,25 @@ class LifeNewsIE(InfoExtractor):
                'ext': 'mp4',
                'title': 'В Москве спасли потерявшегося медвежонка, который спрятался на дереве (Видео 2)',
                'description': 'Маленький хищник не смог найти дорогу домой и обрел временное убежище на тополе недалеко от жилого массива, пока его не нашла соседская собака.',
                'timestamp': 1430825520,
                'upload_date': '20150505',
            },
        }],
    }, {
-        'url': 'http://lifenews.ru/video/13035',
+        'url': 'https://life.ru/t/новости/213035',
        'only_matching': True,
    }, {
        'url': 'https://life.ru/t/%D0%BD%D0%BE%D0%B2%D0%BE%D1%81%D1%82%D0%B8/153461',
        'only_matching': True,
    }, {
        'url': 'https://life.ru/t/новости/411489/manuel_vals_nazval_frantsiiu_tsieliu_nomier_odin_dlia_ighil',
        'only_matching': True,
    }]
    def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
+        video_id = self._match_id(url)
        video_id = mobj.group('id')
        section = mobj.group('section')
-        webpage = self._download_webpage(
+        webpage = self._download_webpage(url, video_id)
            'http://lifenews.ru/%s/%s' % (section, video_id),
            video_id, 'Downloading page')
        video_urls = re.findall(
            r'<video[^>]+><source[^>]+src=["\'](.+?)["\']', webpage)
@ -95,26 +104,22 @@ class LifeNewsIE(InfoExtractor):
        title = remove_end(
            self._og_search_title(webpage),
-            ' - Первый по срочным новостям — LIFE | NEWS')
+            ' - Life.ru')
        description = self._og_search_description(webpage)
        view_count = self._html_search_regex(
-            r'<div class=\'views\'>\s*(\d+)\s*</div>', webpage, 'view count', fatal=False)
+            r'<div[^>]+class=(["\']).*?\bhits-count\b.*?\1[^>]*>\s*(?P<value>\d+)\s*</div>',
-        comment_count = self._html_search_regex(
+            webpage, 'view count', fatal=False, group='value')
            r'=\'commentCount\'[^>]*>\s*(\d+)\s*<',
            webpage, 'comment count', fatal=False)
-        upload_date = self._html_search_regex(
+        timestamp = parse_iso8601(self._search_regex(
-            r'<time[^>]*datetime=\'([^\']+)\'', webpage, 'upload date', fatal=False)
+            r'<time[^>]+datetime=(["\'])(?P<value>.+?)\1',
-        if upload_date is not None:
+            webpage, 'upload date', fatal=False, group='value'))
            upload_date = unified_strdate(upload_date)
        common_info = {
            'description': description,
            'view_count': int_or_none(view_count),
-            'comment_count': int_or_none(comment_count),
+            'timestamp': timestamp,
            'upload_date': upload_date,
        }
        def make_entry(video_id, video_url, index=None):
@ -183,7 +188,8 @@ class LifeEmbedIE(InfoExtractor):
            ext = determine_ext(video_url)
            if ext == 'm3u8':
                formats.extend(self._extract_m3u8_formats(
-                    video_url, video_id, 'mp4', m3u8_id='m3u8'))
+                    video_url, video_id, 'mp4',
                    entry_protocol='m3u8_native', m3u8_id='m3u8'))
            else:
                formats.append({
                    'url': video_url,
--- a/youtube_dl/extractor/livestream.py
+++ b/youtube_dl/extractor/livestream.py
@ -150,7 +150,7 @@ class LivestreamIE(InfoExtractor):
        }
    def _extract_stream_info(self, stream_info):
-        broadcast_id = stream_info['broadcast_id']
+        broadcast_id = compat_str(stream_info['broadcast_id'])
        is_live = stream_info.get('is_live')
        formats = []
--- a/youtube_dl/extractor/ooyala.py
+++ b/youtube_dl/extractor/ooyala.py
@ -8,6 +8,7 @@ from ..utils import (
    float_or_none,
    ExtractorError,
    unsmuggle_url,
    determine_ext,
 )
 from ..compat import compat_urllib_parse_urlencode
@ -15,71 +16,80 @@ from ..compat import compat_urllib_parse_urlencode
 class OoyalaBaseIE(InfoExtractor):
    _PLAYER_BASE = 'http://player.ooyala.com/'
    _CONTENT_TREE_BASE = _PLAYER_BASE + 'player_api/v1/content_tree/'
-    _AUTHORIZATION_URL_TEMPLATE = _PLAYER_BASE + 'sas/player_api/v1/authorization/embed_code/%s/%s?'
+    _AUTHORIZATION_URL_TEMPLATE = _PLAYER_BASE + 'sas/player_api/v2/authorization/embed_code/%s/%s?'
    def _extract(self, content_tree_url, video_id, domain='example.org'):
        content_tree = self._download_json(content_tree_url, video_id)['content_tree']
        metadata = content_tree[list(content_tree)[0]]
        embed_code = metadata['embed_code']
        pcode = metadata.get('asset_pcode') or embed_code
-        video_info = {
+        title = metadata['title']
-            'id': embed_code,
+
-            'title': metadata['title'],
+        auth_data = self._download_json(
-            'description': metadata.get('description'),
+            self._AUTHORIZATION_URL_TEMPLATE % (pcode, embed_code) +
-            'thumbnail': metadata.get('thumbnail_image') or metadata.get('promo_image'),
+            compat_urllib_parse_urlencode({
-            'duration': float_or_none(metadata.get('duration'), 1000),
+                'domain': domain,
-        }
+                'supportedFormats': 'mp4,rtmp,m3u8,hds',
            }), video_id)
        cur_auth_data = auth_data['authorization_data'][embed_code]
        urls = []
        formats = []
-        for supported_format in ('mp4', 'm3u8', 'hds', 'rtmp'):
+        if cur_auth_data['authorized']:
-            auth_data = self._download_json(
+            for stream in cur_auth_data['streams']:
-                self._AUTHORIZATION_URL_TEMPLATE % (pcode, embed_code) +
+                s_url = base64.b64decode(
-                compat_urllib_parse_urlencode({
+                    stream['url']['data'].encode('ascii')).decode('utf-8')
-                    'domain': domain,
+                if s_url in urls:
-                    'supportedFormats': supported_format
+                    continue
-                }),
+                urls.append(s_url)
-                video_id, 'Downloading %s JSON' % supported_format)
+                ext = determine_ext(s_url, None)
-
+                delivery_type = stream['delivery_type']
-            cur_auth_data = auth_data['authorization_data'][embed_code]
+                if delivery_type == 'hls' or ext == 'm3u8':
-
+                    formats.extend(self._extract_m3u8_formats(
-            if cur_auth_data['authorized']:
+                        s_url, embed_code, 'mp4', 'm3u8_native',
-                for stream in cur_auth_data['streams']:
+                        m3u8_id='hls', fatal=False))
-                    url = base64.b64decode(
+                elif delivery_type == 'hds' or ext == 'f4m':
-                        stream['url']['data'].encode('ascii')).decode('utf-8')
+                    formats.extend(self._extract_f4m_formats(
-                    if url in urls:
+                        s_url + '?hdcore=3.7.0', embed_code, f4m_id='hds', fatal=False))
-                        continue
+                elif ext == 'smil':
-                    urls.append(url)
+                    formats.extend(self._extract_smil_formats(
-                    delivery_type = stream['delivery_type']
+                        s_url, embed_code, fatal=False))
-                    if delivery_type == 'hls' or '.m3u8' in url:
+                else:
-                        formats.extend(self._extract_m3u8_formats(
+                    formats.append({
-                            url, embed_code, 'mp4', 'm3u8_native',
+                        'url': s_url,
-                            m3u8_id='hls', fatal=False))
+                        'ext': ext or stream.get('delivery_type'),
-                    elif delivery_type == 'hds' or '.f4m' in url:
+                        'vcodec': stream.get('video_codec'),
-                        formats.extend(self._extract_f4m_formats(
+                        'format_id': delivery_type,
-                            url + '?hdcore=3.7.0', embed_code, f4m_id='hds', fatal=False))
+                        'width': int_or_none(stream.get('width')),
-                    elif '.smil' in url:
+                        'height': int_or_none(stream.get('height')),
-                        formats.extend(self._extract_smil_formats(
+                        'abr': int_or_none(stream.get('audio_bitrate')),
-                            url, embed_code, fatal=False))
+                        'vbr': int_or_none(stream.get('video_bitrate')),
-                    else:
+                        'fps': float_or_none(stream.get('framerate')),
-                        formats.append({
+                    })
-                            'url': url,
+        else:
-                            'ext': stream.get('delivery_type'),
+            raise ExtractorError('%s said: %s' % (
-                            'vcodec': stream.get('video_codec'),
+                self.IE_NAME, cur_auth_data['message']), expected=True)
                            'format_id': delivery_type,
                            'width': int_or_none(stream.get('width')),
                            'height': int_or_none(stream.get('height')),
                            'abr': int_or_none(stream.get('audio_bitrate')),
                            'vbr': int_or_none(stream.get('video_bitrate')),
                            'fps': float_or_none(stream.get('framerate')),
                        })
            else:
                raise ExtractorError('%s said: %s' % (
                    self.IE_NAME, cur_auth_data['message']), expected=True)
        self._sort_formats(formats)
-        video_info['formats'] = formats
+        subtitles = {}
-        return video_info
+        for lang, sub in metadata.get('closed_captions_vtt', {}).get('captions', {}).items():
            sub_url = sub.get('url')
            if not sub_url:
                continue
            subtitles[lang] = [{
                'url': sub_url,
            }]
        return {
            'id': embed_code,
            'title': title,
            'description': metadata.get('description'),
            'thumbnail': metadata.get('thumbnail_image') or metadata.get('promo_image'),
            'duration': float_or_none(metadata.get('duration'), 1000),
            'subtitles': subtitles,
            'formats': formats,
        }
 class OoyalaIE(OoyalaBaseIE):
--- a/youtube_dl/extractor/radiocanada.py
+++ b/youtube_dl/extractor/radiocanada.py
@ -0,0 +1,130 @@
 # coding: utf-8
 from __future__ import unicode_literals
 import re
 from .common import InfoExtractor
 from ..utils import (
    xpath_text,
    find_xpath_attr,
    determine_ext,
    int_or_none,
    unified_strdate,
    xpath_element,
    ExtractorError,
 )
 class RadioCanadaIE(InfoExtractor):
    IE_NAME = 'radiocanada'
    _VALID_URL = r'(?:radiocanada:|https?://ici\.radio-canada\.ca/widgets/mediaconsole/)(?P<app_code>[^:/]+)[:/](?P<id>[0-9]+)'
    _TEST = {
        'url': 'http://ici.radio-canada.ca/widgets/mediaconsole/medianet/7184272',
        'info_dict': {
            'id': '7184272',
            'ext': 'flv',
            'title': 'Le parcours du tireur capté sur vidéo',
            'description': 'Images des caméras de surveillance fournies par la GRC montrant le parcours du tireur d\'Ottawa',
            'upload_date': '20141023',
        },
        'params': {
            # rtmp download
            'skip_download': True,
        },
    }
    def _real_extract(self, url):
        app_code, video_id = re.match(self._VALID_URL, url).groups()
        formats = []
        # TODO: extract m3u8 and f4m formats
        # m3u8 formats can be extracted using ipad device_type return 403 error code when ffmpeg try to download segements
        # f4m formats can be extracted using flashhd device_type but they produce unplayable file
        for device_type in ('flash',):
            v_data = self._download_xml(
                'http://api.radio-canada.ca/validationMedia/v1/Validation.ashx',
                video_id, note='Downloading %s XML' % device_type, query={
                    'appCode': app_code,
                    'idMedia': video_id,
                    'connectionType': 'broadband',
                    'multibitrate': 'true',
                    'deviceType': device_type,
                    # paysJ391wsHjbOJwvCs26toz and bypasslock are used to bypass geo-restriction
                    'paysJ391wsHjbOJwvCs26toz': 'CA',
                    'bypasslock': 'NZt5K62gRqfc',
                })
            v_url = xpath_text(v_data, 'url')
            if not v_url:
                continue
            if v_url == 'null':
                raise ExtractorError('%s said: %s' % (
                    self.IE_NAME, xpath_text(v_data, 'message')), expected=True)
            ext = determine_ext(v_url)
            if ext == 'm3u8':
                formats.extend(self._extract_m3u8_formats(
                    v_url, video_id, 'mp4', m3u8_id='hls', fatal=False))
            elif ext == 'f4m':
                formats.extend(self._extract_f4m_formats(v_url, video_id, f4m_id='hds', fatal=False))
            else:
                ext = determine_ext(v_url)
                bitrates = xpath_element(v_data, 'bitrates')
                for url_e in bitrates.findall('url'):
                    tbr = int_or_none(url_e.get('bitrate'))
                    if not tbr:
                        continue
                    formats.append({
                        'format_id': 'rtmp-%d' % tbr,
                        'url': re.sub(r'\d+\.%s' % ext, '%d.%s' % (tbr, ext), v_url),
                        'ext': 'flv',
                        'protocol': 'rtmp',
                        'width': int_or_none(url_e.get('width')),
                        'height': int_or_none(url_e.get('height')),
                        'tbr': tbr,
                    })
        self._sort_formats(formats)
        metadata = self._download_xml(
            'http://api.radio-canada.ca/metaMedia/v1/index.ashx',
            video_id, note='Downloading metadata XML', query={
                'appCode': app_code,
                'idMedia': video_id,
            })
        def get_meta(name):
            el = find_xpath_attr(metadata, './/Meta', 'name', name)
            return el.text if el is not None else None
        return {
            'id': video_id,
            'title': get_meta('Title'),
            'description': get_meta('Description') or get_meta('ShortDescription'),
            'thumbnail': get_meta('imageHR') or get_meta('imageMR') or get_meta('imageBR'),
            'duration': int_or_none(get_meta('length')),
            'series': get_meta('Emission'),
            'season_number': int_or_none('SrcSaison'),
            'episode_number': int_or_none('SrcEpisode'),
            'upload_date': unified_strdate(get_meta('Date')),
            'formats': formats,
        }
 class RadioCanadaAudioVideoIE(InfoExtractor):
    'radiocanada:audiovideo'
    _VALID_URL = r'https?://ici\.radio-canada\.ca/audio-video/media-(?P<id>[0-9]+)'
    _TEST = {
        'url': 'http://ici.radio-canada.ca/audio-video/media-7527184/barack-obama-au-vietnam',
        'info_dict': {
            'id': '7527184',
            'ext': 'flv',
            'title': 'Barack Obama au Vietnam',
            'description': 'Les États-Unis lèvent l\'embargo sur la vente d\'armes qui datait de la guerre du Vietnam',
            'upload_date': '20160523',
        },
        'params': {
            # rtmp download
            'skip_download': True,
        },
    }
    def _real_extract(self, url):
        return self.url_result('radiocanada:medianet:%s' % self._match_id(url))
--- a/youtube_dl/extractor/reuters.py
+++ b/youtube_dl/extractor/reuters.py
@ -0,0 +1,69 @@
 # coding: utf-8
 from __future__ import unicode_literals
 import re
 from .common import InfoExtractor
 from ..utils import (
    js_to_json,
    int_or_none,
    unescapeHTML,
 )
 class ReutersIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?reuters\.com/.*?\?.*?videoId=(?P<id>[0-9]+)'
    _TEST = {
        'url': 'http://www.reuters.com/video/2016/05/20/san-francisco-police-chief-resigns?videoId=368575562',
        'md5': '8015113643a0b12838f160b0b81cc2ee',
        'info_dict': {
            'id': '368575562',
            'ext': 'mp4',
            'title': 'San Francisco police chief resigns',
        }
    }
    def _real_extract(self, url):
        video_id = self._match_id(url)
        webpage = self._download_webpage(
            'http://www.reuters.com/assets/iframe/yovideo?videoId=%s' % video_id, video_id)
        video_data = js_to_json(self._search_regex(
            r'(?s)Reuters\.yovideo\.drawPlayer\(({.*?})\);',
            webpage, 'video data'))
        def get_json_value(key, fatal=False):
            return self._search_regex('"%s"\s*:\s*"([^"]+)"' % key, video_data, key, fatal=fatal)
        title = unescapeHTML(get_json_value('title', fatal=True))
        mmid, fid = re.search(r',/(\d+)\?f=(\d+)', get_json_value('flv', fatal=True)).groups()
        mas_data = self._download_json(
            'http://mas-e.cds1.yospace.com/mas/%s/%s?trans=json' % (mmid, fid),
            video_id, transform_source=js_to_json)
        formats = []
        for f in mas_data:
            f_url = f.get('url')
            if not f_url:
                continue
            method = f.get('method')
            if method == 'hls':
                formats.extend(self._extract_m3u8_formats(
                    f_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False))
            else:
                container = f.get('container')
                ext = '3gp' if method == 'mobile' else container
                formats.append({
                    'format_id': ext,
                    'url': f_url,
                    'ext': ext,
                    'container': container if method != 'mobile' else None,
                })
        self._sort_formats(formats)
        return {
            'id': video_id,
            'title': title,
            'thumbnail': get_json_value('thumb'),
            'duration': int_or_none(get_json_value('seconds')),
            'formats': formats,
        }
--- a/youtube_dl/extractor/teachingchannel.py
+++ b/youtube_dl/extractor/teachingchannel.py
@ -11,6 +11,7 @@ class TeachingChannelIE(InfoExtractor):
    _TEST = {
        'url': 'https://www.teachingchannel.org/videos/teacher-teaming-evolution',
        'md5': '3d6361864d7cac20b57c8784da17166f',
        'info_dict': {
            'id': 'F3bnlzbToeI6pLEfRyrlfooIILUjz4nM',
            'ext': 'mp4',
@ -19,9 +20,9 @@ class TeachingChannelIE(InfoExtractor):
            'duration': 422.255,
        },
        'params': {
            # m3u8 download
            'skip_download': True,
        },
        'add_ie': ['Ooyala'],
    }
    def _real_extract(self, url):
--- a/youtube_dl/extractor/tf1.py
+++ b/youtube_dl/extractor/tf1.py
@ -6,7 +6,7 @@ from .common import InfoExtractor
 class TF1IE(InfoExtractor):
    """TF1 uses the wat.tv player."""
-    _VALID_URL = r'https?://(?:(?:videos|www|lci)\.tf1|www\.tfou)\.fr/(?:[^/]+/)*(?P<id>.+?)\.html'
+    _VALID_URL = r'https?://(?:(?:videos|www|lci)\.tf1|(?:www\.)?(?:tfou|ushuaiatv|histoire|tvbreizh))\.fr/(?:[^/]+/)*(?P<id>[^/?#.]+)'
    _TESTS = [{
        'url': 'http://videos.tf1.fr/auto-moto/citroen-grand-c4-picasso-2013-presentation-officielle-8062060.html',
        'info_dict': {
@ -48,6 +48,6 @@ class TF1IE(InfoExtractor):
        video_id = self._match_id(url)
        webpage = self._download_webpage(url, video_id)
        wat_id = self._html_search_regex(
-            r'(["\'])(?:https?:)?//www\.wat\.tv/embedframe/.*?(?P<id>\d{8})(?:#.*?)?\1',
+            r'(["\'])(?:https?:)?//www\.wat\.tv/embedframe/.*?(?P<id>\d{8})(?:.*?)?\1',
            webpage, 'wat id', group='id')
        return self.url_result('wat:%s' % wat_id, 'Wat')
--- a/youtube_dl/extractor/veoh.py
+++ b/youtube_dl/extractor/veoh.py
@ -37,6 +37,7 @@ class VeohIE(InfoExtractor):
                'uploader': 'afp-news',
                'duration': 123,
            },
            'skip': 'This video has been deleted.',
        },
        {
            'url': 'http://www.veoh.com/watch/v69525809F6Nc4frX',
--- a/youtube_dl/extractor/vice.py
+++ b/youtube_dl/extractor/vice.py
@ -11,12 +11,14 @@ class ViceIE(InfoExtractor):
    _TESTS = [{
        'url': 'http://www.vice.com/video/cowboy-capitalists-part-1',
        'md5': 'e9d77741f9e42ba583e683cd170660f7',
        'info_dict': {
            'id': '43cW1mYzpia9IlestBjVpd23Yu3afAfp',
            'ext': 'flv',
            'title': 'VICE_COWBOYCAPITALISTS_PART01_v1_VICE_WM_1080p.mov',
            'duration': 725.983,
        },
        'add_ie': ['Ooyala'],
    }, {
        'url': 'http://www.vice.com/video/how-to-hack-a-car',
        'md5': '6fb2989a3fed069fb8eab3401fc2d3c9',
@ -29,6 +31,7 @@ class ViceIE(InfoExtractor):
            'uploader': 'Motherboard',
            'upload_date': '20140529',
        },
        'add_ie': ['Youtube'],
    }, {
        'url': 'https://news.vice.com/video/experimenting-on-animals-inside-the-monkey-lab',
        'only_matching': True,
--- a/youtube_dl/extractor/voxmedia.py
+++ b/youtube_dl/extractor/voxmedia.py
@ -15,7 +15,8 @@ class VoxMediaIE(InfoExtractor):
            'ext': 'mp4',
            'title': 'Google\'s new material design direction',
            'description': 'md5:2f44f74c4d14a1f800ea73e1c6832ad2',
-        }
+        },
        'add_ie': ['Ooyala'],
    }, {
        # data-ooyala-id
        'url': 'http://www.theverge.com/2014/10/21/7025853/google-nexus-6-hands-on-photos-video-android-phablet',
@ -25,7 +26,8 @@ class VoxMediaIE(InfoExtractor):
            'ext': 'mp4',
            'title': 'The Nexus 6: hands-on with Google\'s phablet',
            'description': 'md5:87a51fe95ff8cea8b5bdb9ac7ae6a6af',
-        }
+        },
        'add_ie': ['Ooyala'],
    }, {
        # volume embed
        'url': 'http://www.vox.com/2016/3/31/11336640/mississippi-lgbt-religious-freedom-bill',
@ -35,7 +37,8 @@ class VoxMediaIE(InfoExtractor):
            'ext': 'mp4',
            'title': 'The new frontier of LGBTQ civil rights, explained',
            'description': 'md5:0dc58e94a465cbe91d02950f770eb93f',
-        }
+        },
        'add_ie': ['Ooyala'],
    }, {
        # youtube embed
        'url': 'http://www.vox.com/2016/3/24/11291692/robot-dance',
@ -48,7 +51,8 @@ class VoxMediaIE(InfoExtractor):
            'upload_date': '20160324',
            'uploader_id': 'voxdotcom',
            'uploader': 'Vox',
-        }
+        },
        'add_ie': ['Youtube'],
    }, {
        # SBN.VideoLinkset.entryGroup multiple ooyala embeds
        'url': 'http://www.sbnation.com/college-football-recruiting/2015/2/3/7970291/national-signing-day-rationalizations-itll-be-ok-itll-be-ok',
@ -117,7 +121,7 @@ class VoxMediaIE(InfoExtractor):
            volume_webpage = self._download_webpage(
                'http://volume.vox-cdn.com/embed/%s' % volume_uuid, volume_uuid)
            video_data = self._parse_json(self._search_regex(
-                r'Volume\.createVideo\(({.+})\s*,\s*{.*}\);', volume_webpage, 'video data'), volume_uuid)
+                r'Volume\.createVideo\(({.+})\s*,\s*{.*}\s*,\s*\[.*\]\s*,\s*{.*}\);', volume_webpage, 'video data'), volume_uuid)
            for provider_video_type in ('ooyala', 'youtube'):
                provider_video_id = video_data.get('%s_id' % provider_video_type)
                if provider_video_id:
--- a/youtube_dl/extractor/washingtonpost.py
+++ b/youtube_dl/extractor/washingtonpost.py
@ -11,7 +11,96 @@ from ..utils import (
 class WashingtonPostIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?washingtonpost\.com/.*?/(?P<id>[^/]+)/(?:$|[?#])'
+    IE_NAME = 'washingtonpost'
    _VALID_URL = r'(?:washingtonpost:|https?://(?:www\.)?washingtonpost\.com/video/(?:[^/]+/)*)(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})'
    _TEST = {
        'url': 'https://www.washingtonpost.com/video/c/video/480ba4ee-1ec7-11e6-82c2-a7dcb313287d',
        'md5': '6f537e1334b714eb15f9563bd4b9cdfa',
        'info_dict': {
            'id': '480ba4ee-1ec7-11e6-82c2-a7dcb313287d',
            'ext': 'mp4',
            'title': 'Egypt finds belongings, debris from plane crash',
            'description': 'md5:a17ceee432f215a5371388c1f680bd86',
            'upload_date': '20160520',
            'uploader': 'Reuters',
            'timestamp': 1463778452,
        },
    }
    def _real_extract(self, url):
        video_id = self._match_id(url)
        video_data = self._download_json(
            'http://www.washingtonpost.com/posttv/c/videojson/%s?resType=jsonp' % video_id,
            video_id, transform_source=strip_jsonp)[0]['contentConfig']
        title = video_data['title']
        urls = []
        formats = []
        for s in video_data.get('streams', []):
            s_url = s.get('url')
            if not s_url or s_url in urls:
                continue
            urls.append(s_url)
            video_type = s.get('type')
            if video_type == 'smil':
                continue
            elif video_type in ('ts', 'hls') and ('_master.m3u8' in s_url or '_mobile.m3u8' in s_url):
                m3u8_formats = self._extract_m3u8_formats(
                    s_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)
                for m3u8_format in m3u8_formats:
                    width = m3u8_format.get('width')
                    if not width:
                        continue
                    vbr = self._search_regex(
                        r'%d_%d_(\d+)' % (width, m3u8_format['height']), m3u8_format['url'], 'vbr', default=None)
                    if vbr:
                        m3u8_format.update({
                            'vbr': int_or_none(vbr),
                        })
                formats.extend(m3u8_formats)
            else:
                width = int_or_none(s.get('width'))
                vbr = int_or_none(s.get('bitrate'))
                has_width = width != 0
                formats.append({
                    'format_id': (
                        '%s-%d-%d' % (video_type, width, vbr)
                        if width
                        else video_type),
                    'vbr': vbr if has_width else None,
                    'width': width,
                    'height': int_or_none(s.get('height')),
                    'acodec': s.get('audioCodec'),
                    'vcodec': s.get('videoCodec') if has_width else 'none',
                    'filesize': int_or_none(s.get('fileSize')),
                    'url': s_url,
                    'ext': 'mp4',
                    'protocol': 'm3u8_native' if video_type in ('ts', 'hls') else None,
                })
        source_media_url = video_data.get('sourceMediaURL')
        if source_media_url:
            formats.append({
                'format_id': 'source_media',
                'url': source_media_url,
            })
        self._sort_formats(
            formats, ('width', 'height', 'vbr', 'filesize', 'tbr', 'format_id'))
        return {
            'id': video_id,
            'title': title,
            'description': video_data.get('blurb'),
            'uploader': video_data.get('credits', {}).get('source'),
            'formats': formats,
            'duration': int_or_none(video_data.get('videoDuration'), 100),
            'timestamp': int_or_none(
                video_data.get('dateConfig', {}).get('dateFirstPublished'), 1000),
        }
 class WashingtonPostArticleIE(InfoExtractor):
    IE_NAME = 'washingtonpost:article'
    _VALID_URL = r'https?://(?:www\.)?washingtonpost\.com/(?:[^/]+/)*(?P<id>[^/?#]+)'
    _TESTS = [{
        'url': 'http://www.washingtonpost.com/sf/national/2014/03/22/sinkhole-of-bureaucracy/',
        'info_dict': {
@ -63,6 +152,10 @@ class WashingtonPostIE(InfoExtractor):
        }]
    }]
    @classmethod
    def suitable(cls, url):
        return False if WashingtonPostIE.suitable(url) else super(WashingtonPostArticleIE, cls).suitable(url)
    def _real_extract(self, url):
        page_id = self._match_id(url)
        webpage = self._download_webpage(url, page_id)
@ -74,54 +167,7 @@ class WashingtonPostIE(InfoExtractor):
                <div\s+class="posttv-video-embed[^>]*?data-uuid=|
                data-video-uuid=
            )"([^"]+)"''', webpage)
-        entries = []
+        entries = [self.url_result('washingtonpost:%s' % uuid, 'WashingtonPost', uuid) for uuid in uuids]
        for i, uuid in enumerate(uuids, start=1):
            vinfo_all = self._download_json(
                'http://www.washingtonpost.com/posttv/c/videojson/%s?resType=jsonp' % uuid,
                page_id,
                transform_source=strip_jsonp,
                note='Downloading information of video %d/%d' % (i, len(uuids))
            )
            vinfo = vinfo_all[0]['contentConfig']
            uploader = vinfo.get('credits', {}).get('source')
            timestamp = int_or_none(
                vinfo.get('dateConfig', {}).get('dateFirstPublished'), 1000)
            formats = [{
                'format_id': (
                    '%s-%s-%s' % (s.get('type'), s.get('width'), s.get('bitrate'))
                    if s.get('width')
                    else s.get('type')),
                'vbr': s.get('bitrate') if s.get('width') != 0 else None,
                'width': s.get('width'),
                'height': s.get('height'),
                'acodec': s.get('audioCodec'),
                'vcodec': s.get('videoCodec') if s.get('width') != 0 else 'none',
                'filesize': s.get('fileSize'),
                'url': s.get('url'),
                'ext': 'mp4',
                'preference': -100 if s.get('type') == 'smil' else None,
                'protocol': {
                    'MP4': 'http',
                    'F4F': 'f4m',
                }.get(s.get('type')),
            } for s in vinfo.get('streams', [])]
            source_media_url = vinfo.get('sourceMediaURL')
            if source_media_url:
                formats.append({
                    'format_id': 'source_media',
                    'url': source_media_url,
                })
            self._sort_formats(formats)
            entries.append({
                'id': uuid,
                'title': vinfo['title'],
                'description': vinfo.get('blurb'),
                'uploader': uploader,
                'formats': formats,
                'duration': int_or_none(vinfo.get('videoDuration'), 100),
                'timestamp': timestamp,
            })
        return {
            '_type': 'playlist',
--- a/youtube_dl/extractor/wat.py
+++ b/youtube_dl/extractor/wat.py
@ -2,25 +2,26 @@
 from __future__ import unicode_literals
 import re
 import hashlib
 from .common import InfoExtractor
 from ..compat import compat_str
 from ..utils import (
    ExtractorError,
    unified_strdate,
    HEADRequest,
    float_or_none,
 )
 class WatIE(InfoExtractor):
-    _VALID_URL = r'(?:wat:(?P<real_id>\d{8})|https?://www\.wat\.tv/video/(?P<display_id>.*)-(?P<short_id>.*?)_.*?\.html)'
+    _VALID_URL = r'(?:wat:|https?://(?:www\.)?wat\.tv/video/.*-)(?P<id>[0-9a-z]+)'
    IE_NAME = 'wat.tv'
    _TESTS = [
        {
            'url': 'http://www.wat.tv/video/soupe-figues-l-orange-aux-epices-6z1uz_2hvf7_.html',
-            'md5': 'ce70e9223945ed26a8056d413ca55dc9',
+            'md5': '83d882d9de5c9d97f0bb2c6273cde56a',
            'info_dict': {
                'id': '11713067',
                'display_id': 'soupe-figues-l-orange-aux-epices',
                'ext': 'mp4',
                'title': 'Soupe de figues à l\'orange et aux épices',
                'description': 'Retrouvez l\'émission "Petits plats en équilibre", diffusée le 18 août 2014.',
@ -33,7 +34,6 @@ class WatIE(InfoExtractor):
            'md5': 'fbc84e4378165278e743956d9c1bf16b',
            'info_dict': {
                'id': '11713075',
                'display_id': 'gregory-lemarchal-voix-ange',
                'ext': 'mp4',
                'title': 'Grégory Lemarchal, une voix d\'ange depuis 10 ans (1/3)',
                'description': 'md5:b7a849cf16a2b733d9cd10c52906dee3',
@ -44,96 +44,85 @@ class WatIE(InfoExtractor):
        },
    ]
-    def download_video_info(self, real_id):
+    def _real_extract(self, url):
        video_id = self._match_id(url)
        video_id = video_id if video_id.isdigit() and len(video_id) > 6 else compat_str(int(video_id, 36))
        # 'contentv4' is used in the website, but it also returns the related
        # videos, we don't need them
-        info = self._download_json('http://www.wat.tv/interface/contentv3/' + real_id, real_id)
+        video_info = self._download_json(
-        return info['media']
+            'http://www.wat.tv/interface/contentv3/' + video_id, video_id)['media']
    def _real_extract(self, url):
        def real_id_for_chapter(chapter):
            return chapter['tc_start'].split('-')[0]
        mobj = re.match(self._VALID_URL, url)
        display_id = mobj.group('display_id')
        real_id = mobj.group('real_id')
        if not real_id:
            short_id = mobj.group('short_id')
            webpage = self._download_webpage(url, display_id or short_id)
            real_id = self._search_regex(r'xtpage = ".*-(.*?)";', webpage, 'real id')
        video_info = self.download_video_info(real_id)
        error_desc = video_info.get('error_desc')
        if error_desc:
            raise ExtractorError(
                '%s returned error: %s' % (self.IE_NAME, error_desc), expected=True)
        geo_list = video_info.get('geoList')
        country = geo_list[0] if geo_list else ''
        chapters = video_info['chapters']
        first_chapter = chapters[0]
        files = video_info['files']
        first_file = files[0]
-        if real_id_for_chapter(first_chapter) != real_id:
+        def video_id_for_chapter(chapter):
            return chapter['tc_start'].split('-')[0]
        if video_id_for_chapter(first_chapter) != video_id:
            self.to_screen('Multipart video detected')
-            chapter_urls = []
+            entries = [self.url_result('wat:%s' % video_id_for_chapter(chapter)) for chapter in chapters]
-            for chapter in chapters:
+            return self.playlist_result(entries, video_id, video_info['title'])
                chapter_id = real_id_for_chapter(chapter)
                # Yes, when we this chapter is processed by WatIE,
                # it will download the info again
                chapter_info = self.download_video_info(chapter_id)
                chapter_urls.append(chapter_info['url'])
            entries = [self.url_result(chapter_url) for chapter_url in chapter_urls]
            return self.playlist_result(entries, real_id, video_info['title'])
        upload_date = None
        if 'date_diffusion' in first_chapter:
            upload_date = unified_strdate(first_chapter['date_diffusion'])
        # Otherwise we can continue and extract just one part, we have to use
-        # the short id for getting the video url
+        # the video id for getting the video url
-        formats = [{
+        date_diffusion = first_chapter.get('date_diffusion')
-            'url': 'http://wat.tv/get/android5/%s.mp4' % real_id,
+        upload_date = unified_strdate(date_diffusion) if date_diffusion else None
            'format_id': 'Mobile',
        }]
-        fmts = [('SD', 'web')]
+        def extract_url(path_template, url_type):
-        if first_file.get('hasHD'):
+            req_url = 'http://www.wat.tv/get/%s' % (path_template % video_id)
-            fmts.append(('HD', 'webhd'))
+            head = self._request_webpage(HEADRequest(req_url), video_id, 'Extracting %s url' % url_type)
            red_url = head.geturl()
            if req_url == red_url:
                raise ExtractorError(
                    '%s said: Sorry, this video is not available from your country.' % self.IE_NAME,
                    expected=True)
            return red_url
-        def compute_token(param):
+        m3u8_url = extract_url('ipad/%s.m3u8', 'm3u8')
-            timestamp = '%08x' % int(self._download_webpage(
+        http_url = extract_url('android5/%s.mp4', 'http')
                'http://www.wat.tv/servertime', real_id,
                'Downloading server time').split('|')[0])
            magic = '9b673b13fa4682ed14c3cfa5af5310274b514c4133e9b3a81e6e3aba009l2564'
            return '%s/%s' % (hashlib.md5((magic + param + timestamp).encode('ascii')).hexdigest(), timestamp)
-        for fmt in fmts:
+        formats = []
-            webid = '/%s/%s' % (fmt[1], real_id)
+        m3u8_formats = self._extract_m3u8_formats(
-            video_url = self._download_webpage(
+            m3u8_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls')
-                'http://www.wat.tv/get%s?token=%s&getURL=1&country=%s' % (webid, compute_token(webid), country),
+        formats.extend(m3u8_formats)
-                real_id,
+        formats.extend(self._extract_f4m_formats(
-                'Downloading %s video URL' % fmt[0],
+            m3u8_url.replace('ios.', 'web.').replace('.m3u8', '.f4m'),
-                'Failed to download %s video URL' % fmt[0],
+            video_id, f4m_id='hds', fatal=False))
-                False)
+        for m3u8_format in m3u8_formats:
-            if not video_url:
+            mobj = re.search(
                r'audio.*?%3D(\d+)(?:-video.*?%3D(\d+))?', m3u8_format['url'])
            if not mobj:
                continue
-            formats.append({
+            abr, vbr = mobj.groups()
-                'url': video_url,
+            abr, vbr = float_or_none(abr, 1000), float_or_none(vbr, 1000)
-                'ext': 'mp4',
+            m3u8_format.update({
-                'format_id': fmt[0],
+                'vbr': vbr,
                'abr': abr,
            })
            if not vbr or not abr:
                continue
            f = m3u8_format.copy()
            f.update({
                'url': re.sub(r'%s-\d+00-\d+' % video_id, '%s-%d00-%d' % (video_id, round(vbr / 100), round(abr)), http_url),
                'format_id': f['format_id'].replace('hls', 'http'),
                'protocol': 'http',
            })
            formats.append(f)
        self._sort_formats(formats)
        return {
-            'id': real_id,
+            'id': video_id,
            'display_id': display_id,
            'title': first_chapter['title'],
            'thumbnail': first_chapter['preview'],
            'description': first_chapter['description'],
            'view_count': video_info['views'],
            'upload_date': upload_date,
-            'duration': first_file['duration'],
+            'duration': video_info['files'][0]['duration'],
            'formats': formats,
        }
--- a/youtube_dl/extractor/xhamster.py
+++ b/youtube_dl/extractor/xhamster.py
@ -12,37 +12,52 @@ from ..utils import (
 class XHamsterIE(InfoExtractor):
-    _VALID_URL = r'(?P<proto>https?)://(?:.+?\.)?xhamster\.com/movies/(?P<id>[0-9]+)/(?P<seo>.+?)\.html(?:\?.*)?'
+    _VALID_URL = r'(?P<proto>https?)://(?:.+?\.)?xhamster\.com/movies/(?P<id>[0-9]+)/(?P<seo>.*?)\.html(?:\?.*)?'
-    _TESTS = [
+    _TESTS = [{
-        {
+        'url': 'http://xhamster.com/movies/1509445/femaleagent_shy_beauty_takes_the_bait.html',
-            'url': 'http://xhamster.com/movies/1509445/femaleagent_shy_beauty_takes_the_bait.html',
+        'md5': '8281348b8d3c53d39fffb377d24eac4e',
-            'info_dict': {
+        'info_dict': {
-                'id': '1509445',
+            'id': '1509445',
-                'ext': 'mp4',
+            'ext': 'mp4',
-                'title': 'FemaleAgent Shy beauty takes the bait',
+            'title': 'FemaleAgent Shy beauty takes the bait',
-                'upload_date': '20121014',
+            'upload_date': '20121014',
-                'uploader': 'Ruseful2011',
+            'uploader': 'Ruseful2011',
-                'duration': 893.52,
+            'duration': 893.52,
-                'age_limit': 18,
+            'age_limit': 18,
            }
        },
-        {
+    }, {
-            'url': 'http://xhamster.com/movies/2221348/britney_spears_sexy_booty.html?hd',
+        'url': 'http://xhamster.com/movies/2221348/britney_spears_sexy_booty.html?hd',
-            'info_dict': {
+        'info_dict': {
-                'id': '2221348',
+            'id': '2221348',
-                'ext': 'mp4',
+            'ext': 'mp4',
-                'title': 'Britney Spears  Sexy Booty',
+            'title': 'Britney Spears  Sexy Booty',
-                'upload_date': '20130914',
+            'upload_date': '20130914',
-                'uploader': 'jojo747400',
+            'uploader': 'jojo747400',
-                'duration': 200.48,
+            'duration': 200.48,
-                'age_limit': 18,
+            'age_limit': 18,
            }
        },
-        {
+        'params': {
-            'url': 'https://xhamster.com/movies/2272726/amber_slayed_by_the_knight.html',
+            'skip_download': True,
            'only_matching': True,
        },
-    ]
+    }, {
        # empty seo
        'url': 'http://xhamster.com/movies/5667973/.html',
        'info_dict': {
            'id': '5667973',
            'ext': 'mp4',
            'title': '....',
            'upload_date': '20160208',
            'uploader': 'parejafree',
            'duration': 72.0,
            'age_limit': 18,
        },
        'params': {
            'skip_download': True,
        },
    }, {
        'url': 'https://xhamster.com/movies/2272726/amber_slayed_by_the_knight.html',
        'only_matching': True,
    }]
    def _real_extract(self, url):
        def extract_video_url(webpage, name):
@ -170,7 +185,7 @@ class XHamsterEmbedIE(InfoExtractor):
        webpage = self._download_webpage(url, video_id)
        video_url = self._search_regex(
-            r'href="(https?://xhamster\.com/movies/%s/[^"]+\.html[^"]*)"' % video_id,
+            r'href="(https?://xhamster\.com/movies/%s/[^"]*\.html[^"]*)"' % video_id,
            webpage, 'xhamster url', default=None)
        if not video_url: