Merge pull request #10 from rg3/master

update 24 may
2016-05-24 23:50:42 +05:30 · 2016-05-24 23:50:42 +05:30 · d030e9ca0b
commit d030e9ca0b
parent db00818fad 688c634b7d
26 changed files with 600 additions and 263 deletions
--- a/1
+++ b/1
@ -172,3 +172,4 @@ blahgeek
 Kevin Deldycke
 inondle
 Tomáš Čech
+Déstin Reed
--- a/README.md
+++ b/README.md
@ -693,6 +693,10 @@ hash -r

 Again, from then on you'll be able to update with `sudo youtube-dl -U`.

+### youtube-dl is extremely slow to start on Windows
+
+Add a file exclusion for `youtube-dl.exe` in Windows Defender settings.
+
 ### I'm getting an error `Unable to extract OpenGraph title` on YouTube playlists

 YouTube changed their playlist format in March 2014 and later on, so you'll need at least youtube-dl 2014.07.25 to download all YouTube videos.
--- a/test/test_compat.py
+++ b/test/test_compat.py
@ -103,6 +103,12 @@ class TestCompat(unittest.TestCase):
        self.assertTrue(isinstance(doc.find('chinese').text, compat_str))
        self.assertTrue(isinstance(doc.find('foo/bar').text, compat_str))

+    def test_compat_etree_fromstring_doctype(self):
+        xml = '''<?xml version="1.0"?>
+<!DOCTYPE smil PUBLIC "-//W3C//DTD SMIL 2.0//EN" "http://www.w3.org/2001/SMIL20/SMIL20.dtd">
+<smil xmlns="http://www.w3.org/2001/SMIL20/Language"></smil>'''
+        compat_etree_fromstring(xml)
+
    def test_struct_unpack(self):
        self.assertEqual(compat_struct_unpack('!B', b'\x00'), (0,))

--- a/youtube_dl/compat.py
+++ b/youtube_dl/compat.py
@ -245,13 +245,20 @@ try:
 except ImportError:  # Python 2.6
    from xml.parsers.expat import ExpatError as compat_xml_parse_error

+
+etree = xml.etree.ElementTree
+
+
+class _TreeBuilder(etree.TreeBuilder):
+    def doctype(self, name, pubid, system):
+        pass
+
 if sys.version_info[0] >= 3:
-    compat_etree_fromstring = xml.etree.ElementTree.fromstring
+    def compat_etree_fromstring(text):
+        return etree.XML(text, parser=etree.XMLParser(target=_TreeBuilder()))
 else:
    # python 2.x tries to encode unicode strings with ascii (see the
    # XMLParser._fixtext method)
-    etree = xml.etree.ElementTree
-
    try:
        _etree_iter = etree.Element.iter
    except AttributeError:  # Python <=2.6
@ -265,7 +272,7 @@ else:
    # 2.7 source
    def _XML(text, parser=None):
        if not parser:
-            parser = etree.XMLParser(target=etree.TreeBuilder())
+            parser = etree.XMLParser(target=_TreeBuilder())
        parser.feed(text)
        return parser.close()

@ -277,7 +284,7 @@ else:
        return el

    def compat_etree_fromstring(text):
-        doc = _XML(text, parser=etree.XMLParser(target=etree.TreeBuilder(element_factory=_element_factory)))
+        doc = _XML(text, parser=etree.XMLParser(target=_TreeBuilder(element_factory=_element_factory)))
        for el in _etree_iter(doc):
            if el.text is not None and isinstance(el.text, bytes):
                el.text = el.text.decode('utf-8')
--- a/youtube_dl/extractor/bandcamp.py
+++ b/youtube_dl/extractor/bandcamp.py
@ -29,7 +29,7 @@ class BandcampIE(InfoExtractor):
        '_skip': 'There is a limit of 200 free downloads / month for the test song'
    }, {
        'url': 'http://benprunty.bandcamp.com/track/lanius-battle',
-        'md5': '2b68e5851514c20efdff2afc5603b8b4',
+        'md5': '73d0b3171568232574e45652f8720b5c',
        'info_dict': {
            'id': '2650410135',
            'ext': 'mp3',
@ -48,6 +48,10 @@ class BandcampIE(InfoExtractor):
            if m_trackinfo:
                json_code = m_trackinfo.group(1)
                data = json.loads(json_code)[0]
+                track_id = compat_str(data['id'])
+
+                if not data.get('file'):
+                    raise ExtractorError('Not streamable', video_id=track_id, expected=True)

                formats = []
                for format_id, format_url in data['file'].items():
@ -64,7 +68,7 @@ class BandcampIE(InfoExtractor):
                self._sort_formats(formats)

                return {
-                    'id': compat_str(data['id']),
+                    'id': track_id,
                    'title': data['title'],
                    'formats': formats,
                    'duration': float_or_none(data.get('duration')),
--- a/youtube_dl/extractor/byutv.py
+++ b/youtube_dl/extractor/byutv.py
@ -11,6 +11,7 @@ class BYUtvIE(InfoExtractor):
    _VALID_URL = r'^https?://(?:www\.)?byutv.org/watch/[0-9a-f-]+/(?P<video_id>[^/?#]+)'
    _TEST = {
        'url': 'http://www.byutv.org/watch/6587b9a3-89d2-42a6-a7f7-fd2f81840a7d/studio-c-season-5-episode-5',
+        'md5': '05850eb8c749e2ee05ad5a1c34668493',
        'info_dict': {
            'id': 'studio-c-season-5-episode-5',
            'ext': 'mp4',
@ -21,7 +22,8 @@ class BYUtvIE(InfoExtractor):
        },
        'params': {
            'skip_download': True,
-        }
+        },
+        'add_ie': ['Ooyala'],
    }

    def _real_extract(self, url):
--- a/youtube_dl/extractor/cbs.py
+++ b/youtube_dl/extractor/cbs.py
@ -1,5 +1,7 @@
 from __future__ import unicode_literals

+import re
+
 from .theplatform import ThePlatformIE
 from ..utils import (
    xpath_text,
@ -21,7 +23,7 @@ class CBSBaseIE(ThePlatformIE):


 class CBSIE(CBSBaseIE):
-    _VALID_URL = r'https?://(?:www\.)?(?:cbs\.com/shows/[^/]+/(?:video|artist)|colbertlateshow\.com/(?:video|podcasts))/[^/]+/(?P<id>[^/]+)'
+    _VALID_URL = r'(?:cbs:(?P<content_id>\w+)|https?://(?:www\.)?(?:cbs\.com/shows/[^/]+/(?:video|artist)|colbertlateshow\.com/(?:video|podcasts))/[^/]+/(?P<display_id>[^/]+))'

    _TESTS = [{
        'url': 'http://www.cbs.com/shows/garth-brooks/video/_u7W953k6la293J7EPTd9oHkSPs6Xn6_/connect-chat-feat-garth-brooks/',
@ -66,7 +68,8 @@ class CBSIE(CBSBaseIE):
    TP_RELEASE_URL_TEMPLATE = 'http://link.theplatform.com/s/dJ5BDC/%s?mbr=true'

    def _real_extract(self, url):
-        display_id = self._match_id(url)
+        content_id, display_id = re.match(self._VALID_URL, url).groups()
+        if not content_id:
            webpage = self._download_webpage(url, display_id)
            content_id = self._search_regex(
                [r"video\.settings\.content_id\s*=\s*'([^']+)';", r"cbsplayer\.contentId\s*=\s*'([^']+)';"],
--- a/youtube_dl/extractor/espn.py
+++ b/youtube_dl/extractor/espn.py
@ -8,6 +8,7 @@ class ESPNIE(InfoExtractor):
    _VALID_URL = r'https?://espn\.go\.com/(?:[^/]+/)*(?P<id>[^/]+)'
    _TESTS = [{
        'url': 'http://espn.go.com/video/clip?id=10365079',
+        'md5': '60e5d097a523e767d06479335d1bdc58',
        'info_dict': {
            'id': 'FkYWtmazr6Ed8xmvILvKLWjd4QvYZpzG',
            'ext': 'mp4',
@ -15,21 +16,22 @@ class ESPNIE(InfoExtractor):
            'description': None,
        },
        'params': {
-            # m3u8 download
            'skip_download': True,
        },
+        'add_ie': ['OoyalaExternal'],
    }, {
        # intl video, from http://www.espnfc.us/video/mls-highlights/150/video/2743663/must-see-moments-best-of-the-mls-season
        'url': 'http://espn.go.com/video/clip?id=2743663',
+        'md5': 'f4ac89b59afc7e2d7dbb049523df6768',
        'info_dict': {
            'id': '50NDFkeTqRHB0nXBOK-RGdSG5YQPuxHg',
            'ext': 'mp4',
            'title': 'Must-See Moments: Best of the MLS season',
        },
        'params': {
-            # m3u8 download
            'skip_download': True,
        },
+        'add_ie': ['OoyalaExternal'],
    }, {
        'url': 'https://espn.go.com/video/iframe/twitter/?cms=espn&id=10365079',
        'only_matching': True,
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@ -617,6 +617,10 @@ from .qqmusic import (
    QQMusicPlaylistIE,
 )
 from .r7 import R7IE
+from .radiocanada import (
+    RadioCanadaIE,
+    RadioCanadaAudioVideoIE,
+)
 from .radiode import RadioDeIE
 from .radiojavan import RadioJavanIE
 from .radiobremen import RadioBremenIE
@ -630,6 +634,7 @@ from .rds import RDSIE
 from .redtube import RedTubeIE
 from .regiotv import RegioTVIE
 from .restudy import RestudyIE
+from .reuters import ReutersIE
 from .reverbnation import ReverbNationIE
 from .revision3 import Revision3IE
 from .rice import RICEIE
@ -941,7 +946,10 @@ from .vube import VubeIE
 from .vuclip import VuClipIE
 from .vulture import VultureIE
 from .walla import WallaIE
-from .washingtonpost import WashingtonPostIE
+from .washingtonpost import (
+    WashingtonPostIE,
+    WashingtonPostArticleIE,
+)
 from .wat import WatIE
 from .watchindianporn import WatchIndianPornIE
 from .wdr import (
--- a/youtube_dl/extractor/formula1.py
+++ b/youtube_dl/extractor/formula1.py
@ -13,7 +13,8 @@ class Formula1IE(InfoExtractor):
            'id': 'JvYXJpMzE6pArfHWm5ARp5AiUmD-gibV',
            'ext': 'flv',
            'title': 'Race highlights - Spain 2016',
-        }
+        },
+        'add_ie': ['Ooyala'],
    }

    def _real_extract(self, url):
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@ -784,6 +784,19 @@ class GenericIE(InfoExtractor):
                'title': 'Rosetta #CometLanding webcast HL 10',
            }
        },
+        # Another Livestream embed, without 'new.' in URL
+        {
+            'url': 'https://www.freespeech.org/',
+            'info_dict': {
+                'id': '123537347',
+                'ext': 'mp4',
+                'title': 're:^FSTV [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
+            },
+            'params': {
+                # Live stream
+                'skip_download': True,
+            },
+        },
        # LazyYT
        {
            'url': 'http://discourse.ubuntu.com/t/unity-8-desktop-mode-windows-on-mir/1986',
@ -1194,6 +1207,16 @@ class GenericIE(InfoExtractor):
                'uploader': 'Lake8737',
            }
        },
+        # Duplicated embedded video URLs
+        {
+            'url': 'http://www.hudl.com/athlete/2538180/highlights/149298443',
+            'info_dict': {
+                'id': '149298443_480_16c25b74_2',
+                'ext': 'mp4',
+                'title': 'vs. Blue Orange Spring Game',
+                'uploader': 'www.hudl.com',
+            },
+        },
    ]

    def report_following_redirect(self, new_url):
@ -1868,7 +1891,7 @@ class GenericIE(InfoExtractor):
            return self.url_result(self._proto_relative_url(mobj.group('url'), scheme='http:'), 'CondeNast')

        mobj = re.search(
-            r'<iframe[^>]+src="(?P<url>https?://new\.livestream\.com/[^"]+/player[^"]+)"',
+            r'<iframe[^>]+src="(?P<url>https?://(?:new\.)?livestream\.com/[^"]+/player[^"]+)"',
            webpage)
        if mobj is not None:
            return self.url_result(mobj.group('url'), 'Livestream')
@ -2111,7 +2134,7 @@ class GenericIE(InfoExtractor):
            raise UnsupportedError(url)

        entries = []
-        for video_url in found:
+        for video_url in orderedSet(found):
            video_url = unescapeHTML(video_url)
            video_url = video_url.replace('\\/', '/')
            video_url = compat_urlparse.urljoin(url, video_url)
--- a/youtube_dl/extractor/groupon.py
+++ b/youtube_dl/extractor/groupon.py
@ -14,6 +14,7 @@ class GrouponIE(InfoExtractor):
            'description': 'Studio kept at 105 degrees and 40% humidity with anti-microbial and anti-slip Flotex flooring; certified instructors',
        },
        'playlist': [{
+            'md5': '42428ce8a00585f9bc36e49226eae7a1',
            'info_dict': {
                'id': 'fk6OhWpXgIQ',
                'ext': 'mp4',
@ -24,10 +25,11 @@ class GrouponIE(InfoExtractor):
                'uploader_id': 'groupon',
                'uploader': 'Groupon',
            },
+            'add_ie': ['Youtube'],
        }],
        'params': {
            'skip_download': True,
-        }
+        },
    }

    _PROVIDERS = {
--- a/youtube_dl/extractor/howcast.py
+++ b/youtube_dl/extractor/howcast.py
@ -8,7 +8,7 @@ class HowcastIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?howcast\.com/videos/(?P<id>\d+)'
    _TEST = {
        'url': 'http://www.howcast.com/videos/390161-How-to-Tie-a-Square-Knot-Properly',
-        'md5': '8b743df908c42f60cf6496586c7f12c3',
+        'md5': '7d45932269a288149483144f01b99789',
        'info_dict': {
            'id': '390161',
            'ext': 'mp4',
@ -19,9 +19,9 @@ class HowcastIE(InfoExtractor):
            'duration': 56.823,
        },
        'params': {
-            # m3u8 download
            'skip_download': True,
        },
+        'add_ie': ['Ooyala'],
    }

    def _real_extract(self, url):
--- a/youtube_dl/extractor/lifenews.py
+++ b/youtube_dl/extractor/lifenews.py
@ -7,48 +7,53 @@ from .common import InfoExtractor
 from ..compat import compat_urlparse
 from ..utils import (
    determine_ext,
-    int_or_none,
-    remove_end,
-    unified_strdate,
    ExtractorError,
+    int_or_none,
+    parse_iso8601,
+    remove_end,
 )


 class LifeNewsIE(InfoExtractor):
-    IE_NAME = 'lifenews'
-    IE_DESC = 'LIFE | NEWS'
-    _VALID_URL = r'https?://lifenews\.ru/(?:mobile/)?(?P<section>news|video)/(?P<id>\d+)'
+    IE_NAME = 'life'
+    IE_DESC = 'Life.ru'
+    _VALID_URL = r'https?://life\.ru/t/[^/]+/(?P<id>\d+)'

    _TESTS = [{
        # single video embedded via video/source
-        'url': 'http://lifenews.ru/news/98736',
+        'url': 'https://life.ru/t/новости/98736',
        'md5': '77c95eaefaca216e32a76a343ad89d23',
        'info_dict': {
            'id': '98736',
            'ext': 'mp4',
            'title': 'Мужчина нашел дома архив оборонного завода',
            'description': 'md5:3b06b1b39b5e2bea548e403d99b8bf26',
+            'timestamp': 1344154740,
            'upload_date': '20120805',
+            'view_count': int,
        }
    }, {
        # single video embedded via iframe
-        'url': 'http://lifenews.ru/news/152125',
+        'url': 'https://life.ru/t/новости/152125',
        'md5': '77d19a6f0886cd76bdbf44b4d971a273',
        'info_dict': {
            'id': '152125',
            'ext': 'mp4',
            'title': 'В Сети появилось видео захвата «Правым сектором» колхозных полей ',
            'description': 'Жители двух поселков Днепропетровской области не простили радикалам угрозу лишения плодородных земель и пошли в лобовую. ',
+            'timestamp': 1427961840,
            'upload_date': '20150402',
+            'view_count': int,
        }
    }, {
        # two videos embedded via iframe
-        'url': 'http://lifenews.ru/news/153461',
+        'url': 'https://life.ru/t/новости/153461',
        'info_dict': {
            'id': '153461',
            'title': 'В Москве спасли потерявшегося медвежонка, который спрятался на дереве',
            'description': 'Маленький хищник не смог найти дорогу домой и обрел временное убежище на тополе недалеко от жилого массива, пока его не нашла соседская собака.',
-            'upload_date': '20150505',
+            'timestamp': 1430825520,
+            'view_count': int,
        },
        'playlist': [{
            'md5': '9b6ef8bc0ffa25aebc8bdb40d89ab795',
@ -57,6 +62,7 @@ class LifeNewsIE(InfoExtractor):
                'ext': 'mp4',
                'title': 'В Москве спасли потерявшегося медвежонка, который спрятался на дереве (Видео 1)',
                'description': 'Маленький хищник не смог найти дорогу домой и обрел временное убежище на тополе недалеко от жилого массива, пока его не нашла соседская собака.',
+                'timestamp': 1430825520,
                'upload_date': '20150505',
            },
        }, {
@ -66,22 +72,25 @@ class LifeNewsIE(InfoExtractor):
                'ext': 'mp4',
                'title': 'В Москве спасли потерявшегося медвежонка, который спрятался на дереве (Видео 2)',
                'description': 'Маленький хищник не смог найти дорогу домой и обрел временное убежище на тополе недалеко от жилого массива, пока его не нашла соседская собака.',
+                'timestamp': 1430825520,
                'upload_date': '20150505',
            },
        }],
    }, {
-        'url': 'http://lifenews.ru/video/13035',
+        'url': 'https://life.ru/t/новости/213035',
+        'only_matching': True,
+    }, {
+        'url': 'https://life.ru/t/%D0%BD%D0%BE%D0%B2%D0%BE%D1%81%D1%82%D0%B8/153461',
+        'only_matching': True,
+    }, {
+        'url': 'https://life.ru/t/новости/411489/manuel_vals_nazval_frantsiiu_tsieliu_nomier_odin_dlia_ighil',
        'only_matching': True,
    }]

    def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
-        section = mobj.group('section')
+        video_id = self._match_id(url)

-        webpage = self._download_webpage(
-            'http://lifenews.ru/%s/%s' % (section, video_id),
-            video_id, 'Downloading page')
+        webpage = self._download_webpage(url, video_id)

        video_urls = re.findall(
            r'<video[^>]+><source[^>]+src=["\'](.+?)["\']', webpage)
@ -95,26 +104,22 @@ class LifeNewsIE(InfoExtractor):

        title = remove_end(
            self._og_search_title(webpage),
-            ' - Первый по срочным новостям — LIFE | NEWS')
+            ' - Life.ru')

        description = self._og_search_description(webpage)

        view_count = self._html_search_regex(
-            r'<div class=\'views\'>\s*(\d+)\s*</div>', webpage, 'view count', fatal=False)
-        comment_count = self._html_search_regex(
-            r'=\'commentCount\'[^>]*>\s*(\d+)\s*<',
-            webpage, 'comment count', fatal=False)
+            r'<div[^>]+class=(["\']).*?\bhits-count\b.*?\1[^>]*>\s*(?P<value>\d+)\s*</div>',
+            webpage, 'view count', fatal=False, group='value')

-        upload_date = self._html_search_regex(
-            r'<time[^>]*datetime=\'([^\']+)\'', webpage, 'upload date', fatal=False)
-        if upload_date is not None:
-            upload_date = unified_strdate(upload_date)
+        timestamp = parse_iso8601(self._search_regex(
+            r'<time[^>]+datetime=(["\'])(?P<value>.+?)\1',
+            webpage, 'upload date', fatal=False, group='value'))

        common_info = {
            'description': description,
            'view_count': int_or_none(view_count),
-            'comment_count': int_or_none(comment_count),
-            'upload_date': upload_date,
+            'timestamp': timestamp,
        }

        def make_entry(video_id, video_url, index=None):
@ -183,7 +188,8 @@ class LifeEmbedIE(InfoExtractor):
            ext = determine_ext(video_url)
            if ext == 'm3u8':
                formats.extend(self._extract_m3u8_formats(
-                    video_url, video_id, 'mp4', m3u8_id='m3u8'))
+                    video_url, video_id, 'mp4',
+                    entry_protocol='m3u8_native', m3u8_id='m3u8'))
            else:
                formats.append({
                    'url': video_url,
--- a/youtube_dl/extractor/livestream.py
+++ b/youtube_dl/extractor/livestream.py
@ -150,7 +150,7 @@ class LivestreamIE(InfoExtractor):
        }

    def _extract_stream_info(self, stream_info):
-        broadcast_id = stream_info['broadcast_id']
+        broadcast_id = compat_str(stream_info['broadcast_id'])
        is_live = stream_info.get('is_live')

        formats = []
--- a/youtube_dl/extractor/ooyala.py
+++ b/youtube_dl/extractor/ooyala.py
@ -8,6 +8,7 @@ from ..utils import (
    float_or_none,
    ExtractorError,
    unsmuggle_url,
+    determine_ext,
 )
 from ..compat import compat_urllib_parse_urlencode

@ -15,56 +16,49 @@ from ..compat import compat_urllib_parse_urlencode
 class OoyalaBaseIE(InfoExtractor):
    _PLAYER_BASE = 'http://player.ooyala.com/'
    _CONTENT_TREE_BASE = _PLAYER_BASE + 'player_api/v1/content_tree/'
-    _AUTHORIZATION_URL_TEMPLATE = _PLAYER_BASE + 'sas/player_api/v1/authorization/embed_code/%s/%s?'
+    _AUTHORIZATION_URL_TEMPLATE = _PLAYER_BASE + 'sas/player_api/v2/authorization/embed_code/%s/%s?'

    def _extract(self, content_tree_url, video_id, domain='example.org'):
        content_tree = self._download_json(content_tree_url, video_id)['content_tree']
        metadata = content_tree[list(content_tree)[0]]
        embed_code = metadata['embed_code']
        pcode = metadata.get('asset_pcode') or embed_code
-        video_info = {
-            'id': embed_code,
-            'title': metadata['title'],
-            'description': metadata.get('description'),
-            'thumbnail': metadata.get('thumbnail_image') or metadata.get('promo_image'),
-            'duration': float_or_none(metadata.get('duration'), 1000),
-        }
+        title = metadata['title']

-        urls = []
-        formats = []
-        for supported_format in ('mp4', 'm3u8', 'hds', 'rtmp'):
        auth_data = self._download_json(
            self._AUTHORIZATION_URL_TEMPLATE % (pcode, embed_code) +
            compat_urllib_parse_urlencode({
                'domain': domain,
-                    'supportedFormats': supported_format
-                }),
-                video_id, 'Downloading %s JSON' % supported_format)
+                'supportedFormats': 'mp4,rtmp,m3u8,hds',
+            }), video_id)

        cur_auth_data = auth_data['authorization_data'][embed_code]

+        urls = []
+        formats = []
        if cur_auth_data['authorized']:
            for stream in cur_auth_data['streams']:
-                    url = base64.b64decode(
+                s_url = base64.b64decode(
                    stream['url']['data'].encode('ascii')).decode('utf-8')
-                    if url in urls:
+                if s_url in urls:
                    continue
-                    urls.append(url)
+                urls.append(s_url)
+                ext = determine_ext(s_url, None)
                delivery_type = stream['delivery_type']
-                    if delivery_type == 'hls' or '.m3u8' in url:
+                if delivery_type == 'hls' or ext == 'm3u8':
                    formats.extend(self._extract_m3u8_formats(
-                            url, embed_code, 'mp4', 'm3u8_native',
+                        s_url, embed_code, 'mp4', 'm3u8_native',
                        m3u8_id='hls', fatal=False))
-                    elif delivery_type == 'hds' or '.f4m' in url:
+                elif delivery_type == 'hds' or ext == 'f4m':
                    formats.extend(self._extract_f4m_formats(
-                            url + '?hdcore=3.7.0', embed_code, f4m_id='hds', fatal=False))
-                    elif '.smil' in url:
+                        s_url + '?hdcore=3.7.0', embed_code, f4m_id='hds', fatal=False))
+                elif ext == 'smil':
                    formats.extend(self._extract_smil_formats(
-                            url, embed_code, fatal=False))
+                        s_url, embed_code, fatal=False))
                else:
                    formats.append({
-                            'url': url,
-                            'ext': stream.get('delivery_type'),
+                        'url': s_url,
+                        'ext': ext or stream.get('delivery_type'),
                        'vcodec': stream.get('video_codec'),
                        'format_id': delivery_type,
                        'width': int_or_none(stream.get('width')),
@ -78,8 +72,24 @@ class OoyalaBaseIE(InfoExtractor):
                self.IE_NAME, cur_auth_data['message']), expected=True)
        self._sort_formats(formats)

-        video_info['formats'] = formats
-        return video_info
+        subtitles = {}
+        for lang, sub in metadata.get('closed_captions_vtt', {}).get('captions', {}).items():
+            sub_url = sub.get('url')
+            if not sub_url:
+                continue
+            subtitles[lang] = [{
+                'url': sub_url,
+            }]
+
+        return {
+            'id': embed_code,
+            'title': title,
+            'description': metadata.get('description'),
+            'thumbnail': metadata.get('thumbnail_image') or metadata.get('promo_image'),
+            'duration': float_or_none(metadata.get('duration'), 1000),
+            'subtitles': subtitles,
+            'formats': formats,
+        }


 class OoyalaIE(OoyalaBaseIE):
--- a/youtube_dl/extractor/radiocanada.py
+++ b/youtube_dl/extractor/radiocanada.py
@ -0,0 +1,130 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    xpath_text,
+    find_xpath_attr,
+    determine_ext,
+    int_or_none,
+    unified_strdate,
+    xpath_element,
+    ExtractorError,
+)
+
+
+class RadioCanadaIE(InfoExtractor):
+    IE_NAME = 'radiocanada'
+    _VALID_URL = r'(?:radiocanada:|https?://ici\.radio-canada\.ca/widgets/mediaconsole/)(?P<app_code>[^:/]+)[:/](?P<id>[0-9]+)'
+    _TEST = {
+        'url': 'http://ici.radio-canada.ca/widgets/mediaconsole/medianet/7184272',
+        'info_dict': {
+            'id': '7184272',
+            'ext': 'flv',
+            'title': 'Le parcours du tireur capté sur vidéo',
+            'description': 'Images des caméras de surveillance fournies par la GRC montrant le parcours du tireur d\'Ottawa',
+            'upload_date': '20141023',
+        },
+        'params': {
+            # rtmp download
+            'skip_download': True,
+        },
+    }
+
+    def _real_extract(self, url):
+        app_code, video_id = re.match(self._VALID_URL, url).groups()
+
+        formats = []
+        # TODO: extract m3u8 and f4m formats
+        # m3u8 formats can be extracted using ipad device_type return 403 error code when ffmpeg try to download segements
+        # f4m formats can be extracted using flashhd device_type but they produce unplayable file
+        for device_type in ('flash',):
+            v_data = self._download_xml(
+                'http://api.radio-canada.ca/validationMedia/v1/Validation.ashx',
+                video_id, note='Downloading %s XML' % device_type, query={
+                    'appCode': app_code,
+                    'idMedia': video_id,
+                    'connectionType': 'broadband',
+                    'multibitrate': 'true',
+                    'deviceType': device_type,
+                    # paysJ391wsHjbOJwvCs26toz and bypasslock are used to bypass geo-restriction
+                    'paysJ391wsHjbOJwvCs26toz': 'CA',
+                    'bypasslock': 'NZt5K62gRqfc',
+                })
+            v_url = xpath_text(v_data, 'url')
+            if not v_url:
+                continue
+            if v_url == 'null':
+                raise ExtractorError('%s said: %s' % (
+                    self.IE_NAME, xpath_text(v_data, 'message')), expected=True)
+            ext = determine_ext(v_url)
+            if ext == 'm3u8':
+                formats.extend(self._extract_m3u8_formats(
+                    v_url, video_id, 'mp4', m3u8_id='hls', fatal=False))
+            elif ext == 'f4m':
+                formats.extend(self._extract_f4m_formats(v_url, video_id, f4m_id='hds', fatal=False))
+            else:
+                ext = determine_ext(v_url)
+                bitrates = xpath_element(v_data, 'bitrates')
+                for url_e in bitrates.findall('url'):
+                    tbr = int_or_none(url_e.get('bitrate'))
+                    if not tbr:
+                        continue
+                    formats.append({
+                        'format_id': 'rtmp-%d' % tbr,
+                        'url': re.sub(r'\d+\.%s' % ext, '%d.%s' % (tbr, ext), v_url),
+                        'ext': 'flv',
+                        'protocol': 'rtmp',
+                        'width': int_or_none(url_e.get('width')),
+                        'height': int_or_none(url_e.get('height')),
+                        'tbr': tbr,
+                    })
+        self._sort_formats(formats)
+
+        metadata = self._download_xml(
+            'http://api.radio-canada.ca/metaMedia/v1/index.ashx',
+            video_id, note='Downloading metadata XML', query={
+                'appCode': app_code,
+                'idMedia': video_id,
+            })
+
+        def get_meta(name):
+            el = find_xpath_attr(metadata, './/Meta', 'name', name)
+            return el.text if el is not None else None
+
+        return {
+            'id': video_id,
+            'title': get_meta('Title'),
+            'description': get_meta('Description') or get_meta('ShortDescription'),
+            'thumbnail': get_meta('imageHR') or get_meta('imageMR') or get_meta('imageBR'),
+            'duration': int_or_none(get_meta('length')),
+            'series': get_meta('Emission'),
+            'season_number': int_or_none('SrcSaison'),
+            'episode_number': int_or_none('SrcEpisode'),
+            'upload_date': unified_strdate(get_meta('Date')),
+            'formats': formats,
+        }
+
+
+class RadioCanadaAudioVideoIE(InfoExtractor):
+    'radiocanada:audiovideo'
+    _VALID_URL = r'https?://ici\.radio-canada\.ca/audio-video/media-(?P<id>[0-9]+)'
+    _TEST = {
+        'url': 'http://ici.radio-canada.ca/audio-video/media-7527184/barack-obama-au-vietnam',
+        'info_dict': {
+            'id': '7527184',
+            'ext': 'flv',
+            'title': 'Barack Obama au Vietnam',
+            'description': 'Les États-Unis lèvent l\'embargo sur la vente d\'armes qui datait de la guerre du Vietnam',
+            'upload_date': '20160523',
+        },
+        'params': {
+            # rtmp download
+            'skip_download': True,
+        },
+    }
+
+    def _real_extract(self, url):
+        return self.url_result('radiocanada:medianet:%s' % self._match_id(url))
--- a/youtube_dl/extractor/reuters.py
+++ b/youtube_dl/extractor/reuters.py
@ -0,0 +1,69 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    js_to_json,
+    int_or_none,
+    unescapeHTML,
+)
+
+
+class ReutersIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?reuters\.com/.*?\?.*?videoId=(?P<id>[0-9]+)'
+    _TEST = {
+        'url': 'http://www.reuters.com/video/2016/05/20/san-francisco-police-chief-resigns?videoId=368575562',
+        'md5': '8015113643a0b12838f160b0b81cc2ee',
+        'info_dict': {
+            'id': '368575562',
+            'ext': 'mp4',
+            'title': 'San Francisco police chief resigns',
+        }
+    }
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        webpage = self._download_webpage(
+            'http://www.reuters.com/assets/iframe/yovideo?videoId=%s' % video_id, video_id)
+        video_data = js_to_json(self._search_regex(
+            r'(?s)Reuters\.yovideo\.drawPlayer\(({.*?})\);',
+            webpage, 'video data'))
+
+        def get_json_value(key, fatal=False):
+            return self._search_regex('"%s"\s*:\s*"([^"]+)"' % key, video_data, key, fatal=fatal)
+
+        title = unescapeHTML(get_json_value('title', fatal=True))
+        mmid, fid = re.search(r',/(\d+)\?f=(\d+)', get_json_value('flv', fatal=True)).groups()
+
+        mas_data = self._download_json(
+            'http://mas-e.cds1.yospace.com/mas/%s/%s?trans=json' % (mmid, fid),
+            video_id, transform_source=js_to_json)
+        formats = []
+        for f in mas_data:
+            f_url = f.get('url')
+            if not f_url:
+                continue
+            method = f.get('method')
+            if method == 'hls':
+                formats.extend(self._extract_m3u8_formats(
+                    f_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False))
+            else:
+                container = f.get('container')
+                ext = '3gp' if method == 'mobile' else container
+                formats.append({
+                    'format_id': ext,
+                    'url': f_url,
+                    'ext': ext,
+                    'container': container if method != 'mobile' else None,
+                })
+        self._sort_formats(formats)
+
+        return {
+            'id': video_id,
+            'title': title,
+            'thumbnail': get_json_value('thumb'),
+            'duration': int_or_none(get_json_value('seconds')),
+            'formats': formats,
+        }
--- a/youtube_dl/extractor/teachingchannel.py
+++ b/youtube_dl/extractor/teachingchannel.py
@ -11,6 +11,7 @@ class TeachingChannelIE(InfoExtractor):

    _TEST = {
        'url': 'https://www.teachingchannel.org/videos/teacher-teaming-evolution',
+        'md5': '3d6361864d7cac20b57c8784da17166f',
        'info_dict': {
            'id': 'F3bnlzbToeI6pLEfRyrlfooIILUjz4nM',
            'ext': 'mp4',
@ -19,9 +20,9 @@ class TeachingChannelIE(InfoExtractor):
            'duration': 422.255,
        },
        'params': {
-            # m3u8 download
            'skip_download': True,
        },
+        'add_ie': ['Ooyala'],
    }

    def _real_extract(self, url):
--- a/youtube_dl/extractor/tf1.py
+++ b/youtube_dl/extractor/tf1.py
@ -6,7 +6,7 @@ from .common import InfoExtractor

 class TF1IE(InfoExtractor):
    """TF1 uses the wat.tv player."""
-    _VALID_URL = r'https?://(?:(?:videos|www|lci)\.tf1|www\.tfou)\.fr/(?:[^/]+/)*(?P<id>.+?)\.html'
+    _VALID_URL = r'https?://(?:(?:videos|www|lci)\.tf1|(?:www\.)?(?:tfou|ushuaiatv|histoire|tvbreizh))\.fr/(?:[^/]+/)*(?P<id>[^/?#.]+)'
    _TESTS = [{
        'url': 'http://videos.tf1.fr/auto-moto/citroen-grand-c4-picasso-2013-presentation-officielle-8062060.html',
        'info_dict': {
@ -48,6 +48,6 @@ class TF1IE(InfoExtractor):
        video_id = self._match_id(url)
        webpage = self._download_webpage(url, video_id)
        wat_id = self._html_search_regex(
-            r'(["\'])(?:https?:)?//www\.wat\.tv/embedframe/.*?(?P<id>\d{8})(?:#.*?)?\1',
+            r'(["\'])(?:https?:)?//www\.wat\.tv/embedframe/.*?(?P<id>\d{8})(?:.*?)?\1',
            webpage, 'wat id', group='id')
        return self.url_result('wat:%s' % wat_id, 'Wat')
--- a/youtube_dl/extractor/veoh.py
+++ b/youtube_dl/extractor/veoh.py
@ -37,6 +37,7 @@ class VeohIE(InfoExtractor):
                'uploader': 'afp-news',
                'duration': 123,
            },
+            'skip': 'This video has been deleted.',
        },
        {
            'url': 'http://www.veoh.com/watch/v69525809F6Nc4frX',
--- a/youtube_dl/extractor/vice.py
+++ b/youtube_dl/extractor/vice.py
@ -11,12 +11,14 @@ class ViceIE(InfoExtractor):

    _TESTS = [{
        'url': 'http://www.vice.com/video/cowboy-capitalists-part-1',
+        'md5': 'e9d77741f9e42ba583e683cd170660f7',
        'info_dict': {
            'id': '43cW1mYzpia9IlestBjVpd23Yu3afAfp',
            'ext': 'flv',
            'title': 'VICE_COWBOYCAPITALISTS_PART01_v1_VICE_WM_1080p.mov',
            'duration': 725.983,
        },
+        'add_ie': ['Ooyala'],
    }, {
        'url': 'http://www.vice.com/video/how-to-hack-a-car',
        'md5': '6fb2989a3fed069fb8eab3401fc2d3c9',
@ -29,6 +31,7 @@ class ViceIE(InfoExtractor):
            'uploader': 'Motherboard',
            'upload_date': '20140529',
        },
+        'add_ie': ['Youtube'],
    }, {
        'url': 'https://news.vice.com/video/experimenting-on-animals-inside-the-monkey-lab',
        'only_matching': True,
--- a/youtube_dl/extractor/voxmedia.py
+++ b/youtube_dl/extractor/voxmedia.py
@ -15,7 +15,8 @@ class VoxMediaIE(InfoExtractor):
            'ext': 'mp4',
            'title': 'Google\'s new material design direction',
            'description': 'md5:2f44f74c4d14a1f800ea73e1c6832ad2',
-        }
+        },
+        'add_ie': ['Ooyala'],
    }, {
        # data-ooyala-id
        'url': 'http://www.theverge.com/2014/10/21/7025853/google-nexus-6-hands-on-photos-video-android-phablet',
@ -25,7 +26,8 @@ class VoxMediaIE(InfoExtractor):
            'ext': 'mp4',
            'title': 'The Nexus 6: hands-on with Google\'s phablet',
            'description': 'md5:87a51fe95ff8cea8b5bdb9ac7ae6a6af',
-        }
+        },
+        'add_ie': ['Ooyala'],
    }, {
        # volume embed
        'url': 'http://www.vox.com/2016/3/31/11336640/mississippi-lgbt-religious-freedom-bill',
@ -35,7 +37,8 @@ class VoxMediaIE(InfoExtractor):
            'ext': 'mp4',
            'title': 'The new frontier of LGBTQ civil rights, explained',
            'description': 'md5:0dc58e94a465cbe91d02950f770eb93f',
-        }
+        },
+        'add_ie': ['Ooyala'],
    }, {
        # youtube embed
        'url': 'http://www.vox.com/2016/3/24/11291692/robot-dance',
@ -48,7 +51,8 @@ class VoxMediaIE(InfoExtractor):
            'upload_date': '20160324',
            'uploader_id': 'voxdotcom',
            'uploader': 'Vox',
-        }
+        },
+        'add_ie': ['Youtube'],
    }, {
        # SBN.VideoLinkset.entryGroup multiple ooyala embeds
        'url': 'http://www.sbnation.com/college-football-recruiting/2015/2/3/7970291/national-signing-day-rationalizations-itll-be-ok-itll-be-ok',
@ -117,7 +121,7 @@ class VoxMediaIE(InfoExtractor):
            volume_webpage = self._download_webpage(
                'http://volume.vox-cdn.com/embed/%s' % volume_uuid, volume_uuid)
            video_data = self._parse_json(self._search_regex(
-                r'Volume\.createVideo\(({.+})\s*,\s*{.*}\);', volume_webpage, 'video data'), volume_uuid)
+                r'Volume\.createVideo\(({.+})\s*,\s*{.*}\s*,\s*\[.*\]\s*,\s*{.*}\);', volume_webpage, 'video data'), volume_uuid)
            for provider_video_type in ('ooyala', 'youtube'):
                provider_video_id = video_data.get('%s_id' % provider_video_type)
                if provider_video_id:
--- a/youtube_dl/extractor/washingtonpost.py
+++ b/youtube_dl/extractor/washingtonpost.py
@ -11,7 +11,96 @@ from ..utils import (


 class WashingtonPostIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?washingtonpost\.com/.*?/(?P<id>[^/]+)/(?:$|[?#])'
+    IE_NAME = 'washingtonpost'
+    _VALID_URL = r'(?:washingtonpost:|https?://(?:www\.)?washingtonpost\.com/video/(?:[^/]+/)*)(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})'
+    _TEST = {
+        'url': 'https://www.washingtonpost.com/video/c/video/480ba4ee-1ec7-11e6-82c2-a7dcb313287d',
+        'md5': '6f537e1334b714eb15f9563bd4b9cdfa',
+        'info_dict': {
+            'id': '480ba4ee-1ec7-11e6-82c2-a7dcb313287d',
+            'ext': 'mp4',
+            'title': 'Egypt finds belongings, debris from plane crash',
+            'description': 'md5:a17ceee432f215a5371388c1f680bd86',
+            'upload_date': '20160520',
+            'uploader': 'Reuters',
+            'timestamp': 1463778452,
+        },
+    }
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        video_data = self._download_json(
+            'http://www.washingtonpost.com/posttv/c/videojson/%s?resType=jsonp' % video_id,
+            video_id, transform_source=strip_jsonp)[0]['contentConfig']
+        title = video_data['title']
+
+        urls = []
+        formats = []
+        for s in video_data.get('streams', []):
+            s_url = s.get('url')
+            if not s_url or s_url in urls:
+                continue
+            urls.append(s_url)
+            video_type = s.get('type')
+            if video_type == 'smil':
+                continue
+            elif video_type in ('ts', 'hls') and ('_master.m3u8' in s_url or '_mobile.m3u8' in s_url):
+                m3u8_formats = self._extract_m3u8_formats(
+                    s_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)
+                for m3u8_format in m3u8_formats:
+                    width = m3u8_format.get('width')
+                    if not width:
+                        continue
+                    vbr = self._search_regex(
+                        r'%d_%d_(\d+)' % (width, m3u8_format['height']), m3u8_format['url'], 'vbr', default=None)
+                    if vbr:
+                        m3u8_format.update({
+                            'vbr': int_or_none(vbr),
+                        })
+                formats.extend(m3u8_formats)
+            else:
+                width = int_or_none(s.get('width'))
+                vbr = int_or_none(s.get('bitrate'))
+                has_width = width != 0
+                formats.append({
+                    'format_id': (
+                        '%s-%d-%d' % (video_type, width, vbr)
+                        if width
+                        else video_type),
+                    'vbr': vbr if has_width else None,
+                    'width': width,
+                    'height': int_or_none(s.get('height')),
+                    'acodec': s.get('audioCodec'),
+                    'vcodec': s.get('videoCodec') if has_width else 'none',
+                    'filesize': int_or_none(s.get('fileSize')),
+                    'url': s_url,
+                    'ext': 'mp4',
+                    'protocol': 'm3u8_native' if video_type in ('ts', 'hls') else None,
+                })
+        source_media_url = video_data.get('sourceMediaURL')
+        if source_media_url:
+            formats.append({
+                'format_id': 'source_media',
+                'url': source_media_url,
+            })
+        self._sort_formats(
+            formats, ('width', 'height', 'vbr', 'filesize', 'tbr', 'format_id'))
+
+        return {
+            'id': video_id,
+            'title': title,
+            'description': video_data.get('blurb'),
+            'uploader': video_data.get('credits', {}).get('source'),
+            'formats': formats,
+            'duration': int_or_none(video_data.get('videoDuration'), 100),
+            'timestamp': int_or_none(
+                video_data.get('dateConfig', {}).get('dateFirstPublished'), 1000),
+        }
+
+
+class WashingtonPostArticleIE(InfoExtractor):
+    IE_NAME = 'washingtonpost:article'
+    _VALID_URL = r'https?://(?:www\.)?washingtonpost\.com/(?:[^/]+/)*(?P<id>[^/?#]+)'
    _TESTS = [{
        'url': 'http://www.washingtonpost.com/sf/national/2014/03/22/sinkhole-of-bureaucracy/',
        'info_dict': {
@ -63,6 +152,10 @@ class WashingtonPostIE(InfoExtractor):
        }]
    }]

+    @classmethod
+    def suitable(cls, url):
+        return False if WashingtonPostIE.suitable(url) else super(WashingtonPostArticleIE, cls).suitable(url)
+
    def _real_extract(self, url):
        page_id = self._match_id(url)
        webpage = self._download_webpage(url, page_id)
@ -74,54 +167,7 @@ class WashingtonPostIE(InfoExtractor):
                <div\s+class="posttv-video-embed[^>]*?data-uuid=|
                data-video-uuid=
            )"([^"]+)"''', webpage)
-        entries = []
-        for i, uuid in enumerate(uuids, start=1):
-            vinfo_all = self._download_json(
-                'http://www.washingtonpost.com/posttv/c/videojson/%s?resType=jsonp' % uuid,
-                page_id,
-                transform_source=strip_jsonp,
-                note='Downloading information of video %d/%d' % (i, len(uuids))
-            )
-            vinfo = vinfo_all[0]['contentConfig']
-            uploader = vinfo.get('credits', {}).get('source')
-            timestamp = int_or_none(
-                vinfo.get('dateConfig', {}).get('dateFirstPublished'), 1000)
-
-            formats = [{
-                'format_id': (
-                    '%s-%s-%s' % (s.get('type'), s.get('width'), s.get('bitrate'))
-                    if s.get('width')
-                    else s.get('type')),
-                'vbr': s.get('bitrate') if s.get('width') != 0 else None,
-                'width': s.get('width'),
-                'height': s.get('height'),
-                'acodec': s.get('audioCodec'),
-                'vcodec': s.get('videoCodec') if s.get('width') != 0 else 'none',
-                'filesize': s.get('fileSize'),
-                'url': s.get('url'),
-                'ext': 'mp4',
-                'preference': -100 if s.get('type') == 'smil' else None,
-                'protocol': {
-                    'MP4': 'http',
-                    'F4F': 'f4m',
-                }.get(s.get('type')),
-            } for s in vinfo.get('streams', [])]
-            source_media_url = vinfo.get('sourceMediaURL')
-            if source_media_url:
-                formats.append({
-                    'format_id': 'source_media',
-                    'url': source_media_url,
-                })
-            self._sort_formats(formats)
-            entries.append({
-                'id': uuid,
-                'title': vinfo['title'],
-                'description': vinfo.get('blurb'),
-                'uploader': uploader,
-                'formats': formats,
-                'duration': int_or_none(vinfo.get('videoDuration'), 100),
-                'timestamp': timestamp,
-            })
+        entries = [self.url_result('washingtonpost:%s' % uuid, 'WashingtonPost', uuid) for uuid in uuids]

        return {
            '_type': 'playlist',
--- a/youtube_dl/extractor/wat.py
+++ b/youtube_dl/extractor/wat.py
@ -2,25 +2,26 @@
 from __future__ import unicode_literals

 import re
-import hashlib

 from .common import InfoExtractor
+from ..compat import compat_str
 from ..utils import (
    ExtractorError,
    unified_strdate,
+    HEADRequest,
+    float_or_none,
 )


 class WatIE(InfoExtractor):
-    _VALID_URL = r'(?:wat:(?P<real_id>\d{8})|https?://www\.wat\.tv/video/(?P<display_id>.*)-(?P<short_id>.*?)_.*?\.html)'
+    _VALID_URL = r'(?:wat:|https?://(?:www\.)?wat\.tv/video/.*-)(?P<id>[0-9a-z]+)'
    IE_NAME = 'wat.tv'
    _TESTS = [
        {
            'url': 'http://www.wat.tv/video/soupe-figues-l-orange-aux-epices-6z1uz_2hvf7_.html',
-            'md5': 'ce70e9223945ed26a8056d413ca55dc9',
+            'md5': '83d882d9de5c9d97f0bb2c6273cde56a',
            'info_dict': {
                'id': '11713067',
-                'display_id': 'soupe-figues-l-orange-aux-epices',
                'ext': 'mp4',
                'title': 'Soupe de figues à l\'orange et aux épices',
                'description': 'Retrouvez l\'émission "Petits plats en équilibre", diffusée le 18 août 2014.',
@ -33,7 +34,6 @@ class WatIE(InfoExtractor):
            'md5': 'fbc84e4378165278e743956d9c1bf16b',
            'info_dict': {
                'id': '11713075',
-                'display_id': 'gregory-lemarchal-voix-ange',
                'ext': 'mp4',
                'title': 'Grégory Lemarchal, une voix d\'ange depuis 10 ans (1/3)',
                'description': 'md5:b7a849cf16a2b733d9cd10c52906dee3',
@ -44,96 +44,85 @@ class WatIE(InfoExtractor):
        },
    ]

-    def download_video_info(self, real_id):
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        video_id = video_id if video_id.isdigit() and len(video_id) > 6 else compat_str(int(video_id, 36))
+
        # 'contentv4' is used in the website, but it also returns the related
        # videos, we don't need them
-        info = self._download_json('http://www.wat.tv/interface/contentv3/' + real_id, real_id)
-        return info['media']
-
-    def _real_extract(self, url):
-        def real_id_for_chapter(chapter):
-            return chapter['tc_start'].split('-')[0]
-        mobj = re.match(self._VALID_URL, url)
-        display_id = mobj.group('display_id')
-        real_id = mobj.group('real_id')
-        if not real_id:
-            short_id = mobj.group('short_id')
-            webpage = self._download_webpage(url, display_id or short_id)
-            real_id = self._search_regex(r'xtpage = ".*-(.*?)";', webpage, 'real id')
-
-        video_info = self.download_video_info(real_id)
+        video_info = self._download_json(
+            'http://www.wat.tv/interface/contentv3/' + video_id, video_id)['media']

        error_desc = video_info.get('error_desc')
        if error_desc:
            raise ExtractorError(
                '%s returned error: %s' % (self.IE_NAME, error_desc), expected=True)

-        geo_list = video_info.get('geoList')
-        country = geo_list[0] if geo_list else ''
-
        chapters = video_info['chapters']
        first_chapter = chapters[0]
-        files = video_info['files']
-        first_file = files[0]

-        if real_id_for_chapter(first_chapter) != real_id:
+        def video_id_for_chapter(chapter):
+            return chapter['tc_start'].split('-')[0]
+
+        if video_id_for_chapter(first_chapter) != video_id:
            self.to_screen('Multipart video detected')
-            chapter_urls = []
-            for chapter in chapters:
-                chapter_id = real_id_for_chapter(chapter)
-                # Yes, when we this chapter is processed by WatIE,
-                # it will download the info again
-                chapter_info = self.download_video_info(chapter_id)
-                chapter_urls.append(chapter_info['url'])
-            entries = [self.url_result(chapter_url) for chapter_url in chapter_urls]
-            return self.playlist_result(entries, real_id, video_info['title'])
-
-        upload_date = None
-        if 'date_diffusion' in first_chapter:
-            upload_date = unified_strdate(first_chapter['date_diffusion'])
+            entries = [self.url_result('wat:%s' % video_id_for_chapter(chapter)) for chapter in chapters]
+            return self.playlist_result(entries, video_id, video_info['title'])
        # Otherwise we can continue and extract just one part, we have to use
-        # the short id for getting the video url
+        # the video id for getting the video url

-        formats = [{
-            'url': 'http://wat.tv/get/android5/%s.mp4' % real_id,
-            'format_id': 'Mobile',
-        }]
+        date_diffusion = first_chapter.get('date_diffusion')
+        upload_date = unified_strdate(date_diffusion) if date_diffusion else None

-        fmts = [('SD', 'web')]
-        if first_file.get('hasHD'):
-            fmts.append(('HD', 'webhd'))
+        def extract_url(path_template, url_type):
+            req_url = 'http://www.wat.tv/get/%s' % (path_template % video_id)
+            head = self._request_webpage(HEADRequest(req_url), video_id, 'Extracting %s url' % url_type)
+            red_url = head.geturl()
+            if req_url == red_url:
+                raise ExtractorError(
+                    '%s said: Sorry, this video is not available from your country.' % self.IE_NAME,
+                    expected=True)
+            return red_url

-        def compute_token(param):
-            timestamp = '%08x' % int(self._download_webpage(
-                'http://www.wat.tv/servertime', real_id,
-                'Downloading server time').split('|')[0])
-            magic = '9b673b13fa4682ed14c3cfa5af5310274b514c4133e9b3a81e6e3aba009l2564'
-            return '%s/%s' % (hashlib.md5((magic + param + timestamp).encode('ascii')).hexdigest(), timestamp)
+        m3u8_url = extract_url('ipad/%s.m3u8', 'm3u8')
+        http_url = extract_url('android5/%s.mp4', 'http')

-        for fmt in fmts:
-            webid = '/%s/%s' % (fmt[1], real_id)
-            video_url = self._download_webpage(
-                'http://www.wat.tv/get%s?token=%s&getURL=1&country=%s' % (webid, compute_token(webid), country),
-                real_id,
-                'Downloading %s video URL' % fmt[0],
-                'Failed to download %s video URL' % fmt[0],
-                False)
-            if not video_url:
+        formats = []
+        m3u8_formats = self._extract_m3u8_formats(
+            m3u8_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls')
+        formats.extend(m3u8_formats)
+        formats.extend(self._extract_f4m_formats(
+            m3u8_url.replace('ios.', 'web.').replace('.m3u8', '.f4m'),
+            video_id, f4m_id='hds', fatal=False))
+        for m3u8_format in m3u8_formats:
+            mobj = re.search(
+                r'audio.*?%3D(\d+)(?:-video.*?%3D(\d+))?', m3u8_format['url'])
+            if not mobj:
                continue
-            formats.append({
-                'url': video_url,
-                'ext': 'mp4',
-                'format_id': fmt[0],
+            abr, vbr = mobj.groups()
+            abr, vbr = float_or_none(abr, 1000), float_or_none(vbr, 1000)
+            m3u8_format.update({
+                'vbr': vbr,
+                'abr': abr,
            })
+            if not vbr or not abr:
+                continue
+            f = m3u8_format.copy()
+            f.update({
+                'url': re.sub(r'%s-\d+00-\d+' % video_id, '%s-%d00-%d' % (video_id, round(vbr / 100), round(abr)), http_url),
+                'format_id': f['format_id'].replace('hls', 'http'),
+                'protocol': 'http',
+            })
+            formats.append(f)
+        self._sort_formats(formats)

        return {
-            'id': real_id,
-            'display_id': display_id,
+            'id': video_id,
            'title': first_chapter['title'],
            'thumbnail': first_chapter['preview'],
            'description': first_chapter['description'],
            'view_count': video_info['views'],
            'upload_date': upload_date,
-            'duration': first_file['duration'],
+            'duration': video_info['files'][0]['duration'],
            'formats': formats,
        }
--- a/youtube_dl/extractor/xhamster.py
+++ b/youtube_dl/extractor/xhamster.py
@ -12,10 +12,10 @@ from ..utils import (


 class XHamsterIE(InfoExtractor):
-    _VALID_URL = r'(?P<proto>https?)://(?:.+?\.)?xhamster\.com/movies/(?P<id>[0-9]+)/(?P<seo>.+?)\.html(?:\?.*)?'
-    _TESTS = [
-        {
+    _VALID_URL = r'(?P<proto>https?)://(?:.+?\.)?xhamster\.com/movies/(?P<id>[0-9]+)/(?P<seo>.*?)\.html(?:\?.*)?'
+    _TESTS = [{
        'url': 'http://xhamster.com/movies/1509445/femaleagent_shy_beauty_takes_the_bait.html',
+        'md5': '8281348b8d3c53d39fffb377d24eac4e',
        'info_dict': {
            'id': '1509445',
            'ext': 'mp4',
@ -24,9 +24,8 @@ class XHamsterIE(InfoExtractor):
            'uploader': 'Ruseful2011',
            'duration': 893.52,
            'age_limit': 18,
-            }
        },
-        {
+    }, {
        'url': 'http://xhamster.com/movies/2221348/britney_spears_sexy_booty.html?hd',
        'info_dict': {
            'id': '2221348',
@ -36,13 +35,29 @@ class XHamsterIE(InfoExtractor):
            'uploader': 'jojo747400',
            'duration': 200.48,
            'age_limit': 18,
-            }
        },
-        {
+        'params': {
+            'skip_download': True,
+        },
+    }, {
+        # empty seo
+        'url': 'http://xhamster.com/movies/5667973/.html',
+        'info_dict': {
+            'id': '5667973',
+            'ext': 'mp4',
+            'title': '....',
+            'upload_date': '20160208',
+            'uploader': 'parejafree',
+            'duration': 72.0,
+            'age_limit': 18,
+        },
+        'params': {
+            'skip_download': True,
+        },
+    }, {
        'url': 'https://xhamster.com/movies/2272726/amber_slayed_by_the_knight.html',
        'only_matching': True,
-        },
-    ]
+    }]

    def _real_extract(self, url):
        def extract_video_url(webpage, name):
@ -170,7 +185,7 @@ class XHamsterEmbedIE(InfoExtractor):
        webpage = self._download_webpage(url, video_id)

        video_url = self._search_regex(
-            r'href="(https?://xhamster\.com/movies/%s/[^"]+\.html[^"]*)"' % video_id,
+            r'href="(https?://xhamster\.com/movies/%s/[^"]*\.html[^"]*)"' % video_id,
            webpage, 'xhamster url', default=None)

        if not video_url: