[vzaar] update to generic extractor for embedded urls

2017-08-10 11:19:33 +02:00 · 2017-08-10 11:19:33 +02:00 · d491aaabc1
commit d491aaabc1
parent 4bf22f7a10
2 changed files with 26 additions and 2 deletions
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@ -98,7 +98,7 @@ from .wistia import WistiaIE
 from .mediaset import MediasetIE
 from .joj import JojIE
 from .megaphone import MegaphoneIE
-
+from .vzaar import VzaarIE
 class GenericIE(InfoExtractor):
    IE_DESC = 'Generic downloader that works on some sites'
@ -1840,6 +1840,16 @@ class GenericIE(InfoExtractor):
                'title': 'Стас Намин: «Мы нарушили девственность Кремля»',
            },
        },
        {
            # vzaar embed
            'url': 'http://www.xruniversity.com/bdsm-lets-begin-melissa-moore/',
            'md5': 'cddc9fb8a8644a0a7742149eee95080b',
            'info_dict': {
                'id': '11002506',
                'ext': 'mp4',
                'title': 'XR-U SHOW: Ready Player Fuck - EP. 61',
            },
        },
        # {
        #     # TODO: find another test
        #     # http://schema.org/VideoObject
@ -2781,6 +2791,12 @@ class GenericIE(InfoExtractor):
            return self.playlist_from_matches(
                videopress_urls, video_id, video_title, ie=VideoPressIE.ie_key())
        # Look for vzaar embeds
        vzaar_urls = VzaarIE._extract_urls(webpage)
        if vzaar_urls:
            return self.playlist_from_matches(
                vzaar_urls, video_id, video_title, ie=VzaarIE.ie_key())
        # Look for Rutube embeds
        rutube_urls = RutubeIE._extract_urls(webpage)
        if rutube_urls:
--- a/youtube_dl/extractor/vzaar.py
+++ b/youtube_dl/extractor/vzaar.py
@ -1,6 +1,8 @@
 # coding: utf-8
 from __future__ import unicode_literals
 import re
 from .common import InfoExtractor
 from ..utils import (
    int_or_none,
@ -9,7 +11,7 @@ from ..utils import (
 class VzaarIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:(?:www|view)\.)?vzaar\.com/(?:videos/)?(?P<id>\d+)'
+    _VALID_URL = r'(https?://)?(?:(?:www|view)\.)?vzaar\.com/(?:videos/)?(?P<id>\d+)'
    _TESTS = [{
        'url': 'https://vzaar.com/videos/1152805',
        'md5': 'bde5ddfeb104a6c56a93a06b04901dbf',
@ -28,6 +30,12 @@ class VzaarIE(InfoExtractor):
        },
    }]
    @staticmethod
    def _extract_urls(webpage):
        return re.findall(
            r'<iframe[^>]+src=["\']//((?:view.vzaar\.com)/[0-9]+)',
            webpage)
    def _real_extract(self, url):
        video_id = self._match_id(url)
        video_data = self._download_json(