Merge pull request #21 from rg3/master

update
2016-11-08 20:49:02 +05:30 · 2016-11-08 20:49:02 +05:30 · 048fa3dc57
commit 048fa3dc57
parent 789fee4456 c58e07a7aa
14 changed files with 263 additions and 113 deletions
--- a/.github/ISSUE_TEMPLATE.md
+++ b/.github/ISSUE_TEMPLATE.md
@ -6,8 +6,8 @@
 ---
-### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.11.04*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
+### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.11.08*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.11.04**
+- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.11.08**
 ### Before submitting an *issue* make sure you have:
 - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
@ -35,7 +35,7 @@ $ youtube-dl -v <your command line>
 [debug] User config: []
 [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
 [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
-[debug] youtube-dl version 2016.11.04
+[debug] youtube-dl version 2016.11.08
 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
 [debug] Proxy map: {}
--- a/16
+++ b/16
@ -1,3 +1,19 @@
 version 2016.11.08
 Extractors
 * [tmz:article] Fix extraction (#11052)
 * [espn] Fix extraction (#11041)
 * [mitele] Fix extraction after website redesign (#10824)
 - [ard] Remove age restriction check (#11129)
 * [generic] Improve support for pornhub.com embeds (#11100)
 + [generic] Add support for redtube.com embeds (#11099)
 + [generic] Add support for drtuber.com embeds (#11098)
 + [redtube] Add support for embed URLs
 + [drtuber] Add support for embed URLs
 + [yahoo] Improve content id extraction (#11088)
 * [toutv] Relax URL regular expression (#11121)
 version 2016.11.04
 Core
--- a/README.md
+++ b/README.md
@ -758,7 +758,7 @@ Once the video is fully downloaded, use any video player, such as [mpv](https://
 ### I extracted a video URL with `-g`, but it does not play on another machine / in my webbrowser.
-It depends a lot on the service. In many cases, requests for the video (to download/play it) must come from the same IP address and with the same cookies.  Use the `--cookies` option to write the required cookies into a file, and advise your downloader to read cookies from that file. Some sites also require a common user agent to be used, use `--dump-user-agent` to see the one in use by youtube-dl.
+It depends a lot on the service. In many cases, requests for the video (to download/play it) must come from the same IP address and with the same cookies and/or HTTP headers. Use the `--cookies` option to write the required cookies into a file, and advise your downloader to read cookies from that file. Some sites also require a common user agent to be used, use `--dump-user-agent` to see the one in use by youtube-dl. You can also get necessary cookies and HTTP headers from JSON output obtained with `--dump-json`.
 It may be beneficial to use IPv6; in some cases, the restrictions are only applied to IPv4. Some services (sometimes only for a subset of videos) do not restrict the video URL by IP address, cookie, or user-agent, but these are the exception rather than the rule.
--- a/youtube_dl/extractor/ard.py
+++ b/youtube_dl/extractor/ard.py
@ -178,8 +178,6 @@ class ARDMediathekIE(InfoExtractor):
            ('>Leider liegt eine Störung vor.', 'Video %s is unavailable'),
            ('>Der gewünschte Beitrag ist nicht mehr verfügbar.<',
             'Video %s is no longer available'),
            ('Diese Sendung ist für Jugendliche unter 12 Jahren nicht geeignet. Der Clip ist deshalb nur von 20 bis 6 Uhr verfügbar.',
             'This program is only suitable for those aged 12 and older. Video %s is therefore only available between 8 pm and 6 am.'),
        )
        for pattern, message in ERRORS:
--- a/youtube_dl/extractor/drtuber.py
+++ b/youtube_dl/extractor/drtuber.py
@ -10,8 +10,8 @@ from ..utils import (
 class DrTuberIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?drtuber\.com/video/(?P<id>\d+)/(?P<display_id>[\w-]+)'
+    _VALID_URL = r'https?://(?:www\.)?drtuber\.com/(?:video|embed)/(?P<id>\d+)(?:/(?P<display_id>[\w-]+))?'
-    _TEST = {
+    _TESTS = [{
        'url': 'http://www.drtuber.com/video/1740434/hot-perky-blonde-naked-golf',
        'md5': '93e680cf2536ad0dfb7e74d94a89facd',
        'info_dict': {
@ -25,20 +25,30 @@ class DrTuberIE(InfoExtractor):
            'thumbnail': 're:https?://.*\.jpg$',
            'age_limit': 18,
        }
-    }
+    }, {
        'url': 'http://www.drtuber.com/embed/489939',
        'only_matching': True,
    }]
    @staticmethod
    def _extract_urls(webpage):
        return re.findall(
            r'<iframe[^>]+?src=["\'](?P<url>(?:https?:)?//(?:www\.)?drtuber\.com/embed/\d+)',
            webpage)
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group('id')
-        display_id = mobj.group('display_id')
+        display_id = mobj.group('display_id') or video_id
-        webpage = self._download_webpage(url, display_id)
+        webpage = self._download_webpage(
            'http://www.drtuber.com/video/%s' % video_id, display_id)
        video_url = self._html_search_regex(
            r'<source src="([^"]+)"', webpage, 'video URL')
        title = self._html_search_regex(
-            (r'class="title_watch"[^>]*><p>([^<]+)<',
+            (r'class="title_watch"[^>]*><(?:p|h\d+)[^>]*>([^<]+)<',
             r'<p[^>]+class="title_substrate">([^<]+)</p>',
             r'<title>([^<]+) - \d+'),
            webpage, 'title')
--- a/youtube_dl/extractor/espn.py
+++ b/youtube_dl/extractor/espn.py
@ -1,38 +1,117 @@
 from __future__ import unicode_literals
 from .common import InfoExtractor
-from ..utils import remove_end
+from ..compat import compat_str
 from ..utils import (
    determine_ext,
    int_or_none,
    unified_timestamp,
 )
 class ESPNIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:espn\.go|(?:www\.)?espn)\.com/(?:[^/]+/)*(?P<id>[^/]+)'
+    _VALID_URL = r'https?://(?:espn\.go|(?:www\.)?espn)\.com/video/clip(?:\?.*?\bid=|/_/id/)(?P<id>\d+)'
    _TESTS = [{
        'url': 'http://espn.go.com/video/clip?id=10365079',
        'md5': '60e5d097a523e767d06479335d1bdc58',
        'info_dict': {
-            'id': 'FkYWtmazr6Ed8xmvILvKLWjd4QvYZpzG',
+            'id': '10365079',
            'ext': 'mp4',
            'title': '30 for 30 Shorts: Judging Jewell',
-            'description': None,
+            'description': 'md5:39370c2e016cb4ecf498ffe75bef7f0f',
            'timestamp': 1390936111,
            'upload_date': '20140128',
        },
        'params': {
            'skip_download': True,
        },
        'add_ie': ['OoyalaExternal'],
    }, {
        # intl video, from http://www.espnfc.us/video/mls-highlights/150/video/2743663/must-see-moments-best-of-the-mls-season
        'url': 'http://espn.go.com/video/clip?id=2743663',
        'md5': 'f4ac89b59afc7e2d7dbb049523df6768',
        'info_dict': {
-            'id': '50NDFkeTqRHB0nXBOK-RGdSG5YQPuxHg',
+            'id': '2743663',
            'ext': 'mp4',
            'title': 'Must-See Moments: Best of the MLS season',
            'description': 'md5:4c2d7232beaea572632bec41004f0aeb',
            'timestamp': 1449446454,
            'upload_date': '20151207',
        },
        'params': {
            'skip_download': True,
        },
-        'add_ie': ['OoyalaExternal'],
+        'expected_warnings': ['Unable to download f4m manifest'],
    }, {
        'url': 'http://www.espn.com/video/clip?id=10365079',
        'only_matching': True,
    }, {
        'url': 'http://www.espn.com/video/clip/_/id/17989860',
        'only_matching': True,
    }]
    def _real_extract(self, url):
        video_id = self._match_id(url)
        clip = self._download_json(
            'http://api-app.espn.com/v1/video/clips/%s' % video_id,
            video_id)['videos'][0]
        title = clip['headline']
        format_urls = set()
        formats = []
        def traverse_source(source, base_source_id=None):
            for source_id, source in source.items():
                if isinstance(source, compat_str):
                    extract_source(source, base_source_id)
                elif isinstance(source, dict):
                    traverse_source(
                        source,
                        '%s-%s' % (base_source_id, source_id)
                        if base_source_id else source_id)
        def extract_source(source_url, source_id=None):
            if source_url in format_urls:
                return
            format_urls.add(source_url)
            ext = determine_ext(source_url)
            if ext == 'smil':
                formats.extend(self._extract_smil_formats(
                    source_url, video_id, fatal=False))
            elif ext == 'f4m':
                formats.extend(self._extract_f4m_formats(
                    source_url, video_id, f4m_id=source_id, fatal=False))
            elif ext == 'm3u8':
                formats.extend(self._extract_m3u8_formats(
                    source_url, video_id, 'mp4', entry_protocol='m3u8_native',
                    m3u8_id=source_id, fatal=False))
            else:
                formats.append({
                    'url': source_url,
                    'format_id': source_id,
                })
        traverse_source(clip['links']['source'])
        self._sort_formats(formats)
        description = clip.get('caption') or clip.get('description')
        thumbnail = clip.get('thumbnail')
        duration = int_or_none(clip.get('duration'))
        timestamp = unified_timestamp(clip.get('originalPublishDate'))
        return {
            'id': video_id,
            'title': title,
            'description': description,
            'thumbnail': thumbnail,
            'timestamp': timestamp,
            'duration': duration,
            'formats': formats,
        }
 class ESPNArticleIE(InfoExtractor):
    _VALID_URL = r'https?://(?:espn\.go|(?:www\.)?espn)\.com/(?:[^/]+/)*(?P<id>[^/]+)'
    _TESTS = [{
        'url': 'https://espn.go.com/video/iframe/twitter/?cms=espn&id=10365079',
        'only_matching': True,
    }, {
@ -47,11 +126,12 @@ class ESPNIE(InfoExtractor):
    }, {
        'url': 'http://espn.go.com/nba/playoffs/2015/story/_/id/12887571/john-wall-washington-wizards-no-swelling-left-hand-wrist-game-5-return',
        'only_matching': True,
    }, {
        'url': 'http://www.espn.com/video/clip?id=10365079',
        'only_matching': True,
    }]
    @classmethod
    def suitable(cls, url):
        return False if ESPNIE.suitable(url) else super(ESPNArticleIE, cls).suitable(url)
    def _real_extract(self, url):
        video_id = self._match_id(url)
@ -61,23 +141,5 @@ class ESPNIE(InfoExtractor):
            r'class=(["\']).*?video-play-button.*?\1[^>]+data-id=["\'](?P<id>\d+)',
            webpage, 'video id', group='id')
-        cms = 'espn'
+        return self.url_result(
-        if 'data-source="intl"' in webpage:
+            'http://espn.go.com/video/clip?id=%s' % video_id, ESPNIE.ie_key())
            cms = 'intl'
        player_url = 'https://espn.go.com/video/iframe/twitter/?id=%s&cms=%s' % (video_id, cms)
        player = self._download_webpage(
            player_url, video_id)
        pcode = self._search_regex(
            r'["\']pcode=([^"\']+)["\']', player, 'pcode')
        title = remove_end(
            self._og_search_title(webpage),
            '- ESPN Video').strip()
        return {
            '_type': 'url_transparent',
            'url': 'ooyalaexternal:%s:%s:%s' % (cms, video_id, pcode),
            'ie_key': 'OoyalaExternal',
            'title': title,
        }
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@ -47,6 +47,8 @@ from .svt import SVTIE
 from .pornhub import PornHubIE
 from .xhamster import XHamsterEmbedIE
 from .tnaflix import TNAFlixNetworkEmbedIE
 from .drtuber import DrTuberIE
 from .redtube import RedTubeIE
 from .vimeo import VimeoIE
 from .dailymotion import (
    DailymotionIE,
@ -1981,11 +1983,6 @@ class GenericIE(InfoExtractor):
        if sportbox_urls:
            return _playlist_from_matches(sportbox_urls, ie='SportBoxEmbed')
        # Look for embedded PornHub player
        pornhub_url = PornHubIE._extract_url(webpage)
        if pornhub_url:
            return self.url_result(pornhub_url, 'PornHub')
        # Look for embedded XHamster player
        xhamster_urls = XHamsterEmbedIE._extract_urls(webpage)
        if xhamster_urls:
@ -1996,6 +1993,21 @@ class GenericIE(InfoExtractor):
        if tnaflix_urls:
            return _playlist_from_matches(tnaflix_urls, ie=TNAFlixNetworkEmbedIE.ie_key())
        # Look for embedded PornHub player
        pornhub_urls = PornHubIE._extract_urls(webpage)
        if pornhub_urls:
            return _playlist_from_matches(pornhub_urls, ie=PornHubIE.ie_key())
        # Look for embedded DrTuber player
        drtuber_urls = DrTuberIE._extract_urls(webpage)
        if drtuber_urls:
            return _playlist_from_matches(drtuber_urls, ie=DrTuberIE.ie_key())
        # Look for embedded RedTube player
        redtube_urls = RedTubeIE._extract_urls(webpage)
        if redtube_urls:
            return _playlist_from_matches(redtube_urls, ie=RedTubeIE.ie_key())
        # Look for embedded Tvigle player
        mobj = re.search(
            r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//cloud\.tvigle\.ru/video/.+?)\1', webpage)
--- a/youtube_dl/extractor/mitele.py
+++ b/youtube_dl/extractor/mitele.py
@ -1,19 +1,20 @@
 # coding: utf-8
 from __future__ import unicode_literals
-import re
+import uuid
 from .common import InfoExtractor
 from ..compat import (
    compat_str,
    compat_urllib_parse_urlencode,
    compat_urlparse,
 )
 from ..utils import (
    get_element_by_attribute,
    int_or_none,
    remove_start,
    extract_attributes,
    determine_ext,
    smuggle_url,
    parse_duration,
 )
@ -72,16 +73,14 @@ class MiTeleBaseIE(InfoExtractor):
        }
-class MiTeleIE(MiTeleBaseIE):
+class MiTeleIE(InfoExtractor):
    IE_DESC = 'mitele.es'
-    _VALID_URL = r'https?://(?:www\.)?mitele\.es/(?:[^/]+/){3}(?P<id>[^/]+)/'
+    _VALID_URL = r'https?://(?:www\.)?mitele\.es/programas-tv/(?:[^/]+/)(?P<id>[^/]+)/player'
    _TESTS = [{
-        'url': 'http://www.mitele.es/programas-tv/diario-de/la-redaccion/programa-144/',
+        'url': 'http://www.mitele.es/programas-tv/diario-de/57b0dfb9c715da65618b4afa/player',
        # MD5 is unstable
        'info_dict': {
-            'id': '0NF1jJnxS1Wu3pHrmvFyw2',
+            'id': '57b0dfb9c715da65618b4afa',
            'display_id': 'programa-144',
            'ext': 'mp4',
            'title': 'Tor, la web invisible',
            'description': 'md5:3b6fce7eaa41b2d97358726378d9369f',
@ -91,57 +90,71 @@ class MiTeleIE(MiTeleBaseIE):
            'thumbnail': 're:(?i)^https?://.*\.jpg$',
            'duration': 2913,
        },
        'add_ie': ['Ooyala'],
    }, {
        # no explicit title
-        'url': 'http://www.mitele.es/programas-tv/cuarto-milenio/temporada-6/programa-226/',
+        'url': 'http://www.mitele.es/programas-tv/cuarto-milenio/57b0de3dc915da14058b4876/player',
        'info_dict': {
-            'id': 'eLZSwoEd1S3pVyUm8lc6F',
+            'id': '57b0de3dc915da14058b4876',
            'display_id': 'programa-226',
            'ext': 'mp4',
-            'title': 'Cuarto Milenio - Temporada 6 - Programa 226',
+            'title': 'Cuarto Milenio Temporada 6 Programa 226',
-            'description': 'md5:50daf9fadefa4e62d9fc866d0c015701',
+            'description': 'md5:5ff132013f0cd968ffbf1f5f3538a65f',
            'series': 'Cuarto Milenio',
            'season': 'Temporada 6',
            'episode': 'Programa 226',
            'thumbnail': 're:(?i)^https?://.*\.jpg$',
-            'duration': 7312,
+            'duration': 7313,
        },
        'params': {
            'skip_download': True,
        },
        'add_ie': ['Ooyala'],
    }]
    def _real_extract(self, url):
-        display_id = self._match_id(url)
+        video_id = self._match_id(url)
        webpage = self._download_webpage(url, video_id)
-        webpage = self._download_webpage(url, display_id)
+        gigya_url = self._search_regex(r'<gigya-api>[^>]*</gigya-api>[^>]*<script\s*src="([^"]*)">[^>]*</script>', webpage, 'gigya', default=None)
        gigya_sc = self._download_webpage(compat_urlparse.urljoin(r'http://www.mitele.es/', gigya_url), video_id, 'Downloading gigya script')
        # Get a appKey/uuid for getting the session key
        appKey_var = self._search_regex(r'value\("appGridApplicationKey",([0-9a-f]+)\)', gigya_sc, 'appKey variable')
        appKey = self._search_regex(r'var %s="([0-9a-f]+)"' % appKey_var, gigya_sc, 'appKey')
        uid = compat_str(uuid.uuid4())
        session_url = 'https://appgrid-api.cloud.accedo.tv/session?appKey=%s&uuid=%s' % (appKey, uid)
        session_json = self._download_json(session_url, video_id, 'Downloading session keys')
        sessionKey = compat_str(session_json['sessionKey'])
-        info = self._get_player_info(url, webpage)
+        paths_url = 'https://appgrid-api.cloud.accedo.tv/metadata/general_configuration,%20web_configuration?sessionKey=' + sessionKey
        paths = self._download_json(paths_url, video_id, 'Downloading paths JSON')
        ooyala_s = paths['general_configuration']['api_configuration']['ooyala_search']
        data_p = (
            'http://' + ooyala_s['base_url'] + ooyala_s['full_path'] + ooyala_s['provider_id'] +
            '/docs/' + video_id + '?include_titles=Series,Season&product_name=test&format=full')
        data = self._download_json(data_p, video_id, 'Downloading data JSON')
        source = data['hits']['hits'][0]['_source']
        embedCode = source['offers'][0]['embed_codes'][0]
-        title = self._search_regex(
+        titles = source['localizable_titles'][0]
-            r'class="Destacado-text"[^>]*>\s*<strong>([^<]+)</strong>',
+        title = titles.get('title_medium') or titles['title_long']
-            webpage, 'title', default=None)
+        episode = titles['title_sort_name']
        description = titles['summary_long']
        titles_series = source['localizable_titles_series'][0]
        series = titles_series['title_long']
        titles_season = source['localizable_titles_season'][0]
        season = titles_season['title_medium']
        duration = parse_duration(source['videos'][0]['duration'])
-        mobj = re.search(r'''(?sx)
+        return {
-                            class="Destacado-text"[^>]*>.*?<h1>\s*
+            '_type': 'url_transparent',
-                            <span>(?P<series>[^<]+)</span>\s*
+            # for some reason only HLS is supported
-                            <span>(?P<season>[^<]+)</span>\s*
+            'url': smuggle_url('ooyala:' + embedCode, {'supportedformats': 'm3u8'}),
-                            <span>(?P<episode>[^<]+)</span>''', webpage)
+            'id': video_id,
        series, season, episode = mobj.groups() if mobj else [None] * 3
        if not title:
            if mobj:
                title = '%s - %s - %s' % (series, season, episode)
            else:
                title = remove_start(self._search_regex(
                    r'<title>([^<]+)</title>', webpage, 'title'), 'Ver online ')
        info.update({
            'display_id': display_id,
            'title': title,
-            'description': get_element_by_attribute('class', 'text', webpage),
+            'description': description,
            'series': series,
            'season': season,
            'episode': episode,
-        })
+            'duration': duration,
-        return info
+            'thumbnail': source['images'][0]['url'],
        }
--- a/youtube_dl/extractor/ooyala.py
+++ b/youtube_dl/extractor/ooyala.py
@ -18,7 +18,7 @@ class OoyalaBaseIE(InfoExtractor):
    _CONTENT_TREE_BASE = _PLAYER_BASE + 'player_api/v1/content_tree/'
    _AUTHORIZATION_URL_TEMPLATE = _PLAYER_BASE + 'sas/player_api/v2/authorization/embed_code/%s/%s?'
-    def _extract(self, content_tree_url, video_id, domain='example.org'):
+    def _extract(self, content_tree_url, video_id, domain='example.org', supportedformats=None):
        content_tree = self._download_json(content_tree_url, video_id)['content_tree']
        metadata = content_tree[list(content_tree)[0]]
        embed_code = metadata['embed_code']
@ -29,7 +29,7 @@ class OoyalaBaseIE(InfoExtractor):
            self._AUTHORIZATION_URL_TEMPLATE % (pcode, embed_code) +
            compat_urllib_parse_urlencode({
                'domain': domain,
-                'supportedFormats': 'mp4,rtmp,m3u8,hds',
+                'supportedFormats': supportedformats or 'mp4,rtmp,m3u8,hds',
            }), video_id)
        cur_auth_data = auth_data['authorization_data'][embed_code]
@ -145,8 +145,9 @@ class OoyalaIE(OoyalaBaseIE):
        url, smuggled_data = unsmuggle_url(url, {})
        embed_code = self._match_id(url)
        domain = smuggled_data.get('domain')
        supportedformats = smuggled_data.get('supportedformats')
        content_tree_url = self._CONTENT_TREE_BASE + 'embed_code/%s/%s' % (embed_code, embed_code)
-        return self._extract(content_tree_url, embed_code, domain)
+        return self._extract(content_tree_url, embed_code, domain, supportedformats)
 class OoyalaExternalIE(OoyalaBaseIE):
--- a/youtube_dl/extractor/pornhub.py
+++ b/youtube_dl/extractor/pornhub.py
@ -33,7 +33,7 @@ class PornHubIE(InfoExtractor):
                            (?:[a-z]+\.)?pornhub\.com/(?:view_video\.php\?viewkey=|embed/)|
                            (?:www\.)?thumbzilla\.com/video/
                        )
-                        (?P<id>[0-9a-z]+)
+                        (?P<id>[\da-z]+)
                    '''
    _TESTS = [{
        'url': 'http://www.pornhub.com/view_video.php?viewkey=648719015',
@ -96,12 +96,11 @@ class PornHubIE(InfoExtractor):
        'only_matching': True,
    }]
-    @classmethod
+    @staticmethod
-    def _extract_url(cls, webpage):
+    def _extract_urls(webpage):
-        mobj = re.search(
+        return re.findall(
-            r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?pornhub\.com/embed/\d+)\1', webpage)
+            r'<iframe[^>]+?src=["\'](?P<url>(?:https?:)?//(?:www\.)?pornhub\.com/embed/[\da-z]+)',
-        if mobj:
+            webpage)
            return mobj.group('url')
    def _extract_count(self, pattern, webpage, name):
        return str_to_int(self._search_regex(
--- a/youtube_dl/extractor/redtube.py
+++ b/youtube_dl/extractor/redtube.py
@ -1,5 +1,7 @@
 from __future__ import unicode_literals
 import re
 from .common import InfoExtractor
 from ..utils import (
    ExtractorError,
@ -10,8 +12,8 @@ from ..utils import (
 class RedTubeIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?redtube\.com/(?P<id>[0-9]+)'
+    _VALID_URL = r'https?://(?:(?:www\.)?redtube\.com/|embed\.redtube\.com/\?.*?\bid=)(?P<id>[0-9]+)'
-    _TEST = {
+    _TESTS = [{
        'url': 'http://www.redtube.com/66418',
        'md5': '7b8c22b5e7098a3e1c09709df1126d2d',
        'info_dict': {
@ -23,11 +25,21 @@ class RedTubeIE(InfoExtractor):
            'view_count': int,
            'age_limit': 18,
        }
-    }
+    }, {
        'url': 'http://embed.redtube.com/?bgcolor=000000&id=1443286',
        'only_matching': True,
    }]
    @staticmethod
    def _extract_urls(webpage):
        return re.findall(
            r'<iframe[^>]+?src=["\'](?P<url>(?:https?:)?//embed\.redtube\.com/\?.*?\bid=\d+)',
            webpage)
    def _real_extract(self, url):
        video_id = self._match_id(url)
-        webpage = self._download_webpage(url, video_id)
+        webpage = self._download_webpage(
            'http://www.redtube.com/%s' % video_id, video_id)
        if any(s in webpage for s in ['video-deleted-info', '>This video has been removed']):
            raise ExtractorError('Video %s has been removed' % video_id, expected=True)
--- a/youtube_dl/extractor/tmz.py
+++ b/youtube_dl/extractor/tmz.py
@ -32,12 +32,15 @@ class TMZArticleIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?tmz\.com/\d{4}/\d{2}/\d{2}/(?P<id>[^/]+)/?'
    _TEST = {
        'url': 'http://www.tmz.com/2015/04/19/bobby-brown-bobbi-kristina-awake-video-concert',
-        'md5': 'e482a414a38db73087450e3a6ce69d00',
+        'md5': '3316ff838ae5bb7f642537825e1e90d2',
        'info_dict': {
            'id': '0_6snoelag',
-            'ext': 'mp4',
+            'ext': 'mov',
            'title': 'Bobby Brown Tells Crowd ... Bobbi Kristina is Awake',
            'description': 'Bobby Brown stunned his audience during a concert Saturday night, when he told the crowd, "Bobbi is awake.  She\'s watching me."',
            'timestamp': 1429467813,
            'upload_date': '20150419',
            'uploader_id': 'batchUser',
        }
    }
@ -45,12 +48,9 @@ class TMZArticleIE(InfoExtractor):
        video_id = self._match_id(url)
        webpage = self._download_webpage(url, video_id)
-        embedded_video_info_str = self._html_search_regex(
+        embedded_video_info = self._parse_json(self._html_search_regex(
-            r'tmzVideoEmbedV2\("([^)]+)"\);', webpage, 'embedded video info')
+            r'tmzVideoEmbed\(({.+?})\);', webpage, 'embedded video info'),
-
+            video_id)
        embedded_video_info = self._parse_json(
            embedded_video_info_str, video_id,
            transform_source=lambda s: s.replace('\\', ''))
        return self.url_result(
            'http://www.tmz.com/videos/%s/' % embedded_video_info['id'])
--- a/youtube_dl/extractor/yahoo.py
+++ b/youtube_dl/extractor/yahoo.py
@ -201,6 +201,32 @@ class YahooIE(InfoExtractor):
            },
            'skip': 'redirect to https://www.yahoo.com/music',
        },
        {
            # yahoo://article/
            'url': 'https://www.yahoo.com/movies/video/true-story-trailer-173000497.html',
            'info_dict': {
                'id': '071c4013-ce30-3a93-a5b2-e0413cd4a9d1',
                'ext': 'mp4',
                'title': "'True Story' Trailer",
                'description': 'True Story',
            },
            'params': {
                'skip_download': True,
            },
        },
        {
            # ytwnews://cavideo/
            'url': 'https://tw.video.yahoo.com/movie-tw/單車天使-中文版預-092316541.html',
            'info_dict': {
                'id': 'ba133ff2-0793-3510-b636-59dfe9ff6cff',
                'ext': 'mp4',
                'title': '單車天使 - 中文版預',
                'description': '中文版預',
            },
            'params': {
                'skip_download': True,
            },
        },
    ]
    def _real_extract(self, url):
@ -269,7 +295,8 @@ class YahooIE(InfoExtractor):
                    r'"first_videoid"\s*:\s*"([^"]+)"',
                    r'%s[^}]*"ccm_id"\s*:\s*"([^"]+)"' % re.escape(page_id),
                    r'<article[^>]data-uuid=["\']([^"\']+)',
-                    r'yahoo://article/view\?.*\buuid=([^&"\']+)',
+                    r'<meta[^<>]+yahoo://article/view\?.*\buuid=([^&"\']+)',
                    r'<meta[^<>]+["\']ytwnews://cavideo/(?:[^/]+/)+([\da-fA-F-]+)[&"\']',
                ]
                video_id = self._search_regex(
                    CONTENT_ID_REGEXES, webpage, 'content ID')
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@ -1,3 +1,3 @@
 from __future__ import unicode_literals
-__version__ = '2016.11.04'
+__version__ = '2016.11.08'
`@ -1,3 +1,3 @@`
	`from __future__ import unicode_literals`	`from __future__ import unicode_literals`

	`__version__ = '2016.11.04'`	`__version__ = '2016.11.08'`