add extractor for skypixel.com

resolves https://github.com/rg3/youtube-dl/issues/13495
2017-09-12 20:18:39 +00:00 · 2017-09-12 20:18:39 +00:00 · 45bc915507
commit 45bc915507
parent 51aee72d16
3 changed files with 97 additions and 0 deletions
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@ -809,6 +809,28 @@ class InfoExtractor(object):
        else:
            return res
    def _simple_search_between(self, search_str, start_str, end_str):
        """
        Searches string for a starting substring followed by an ending substring
        and returns the substring in between. If starting or ending substring are
        not found returns None.
        @search_str [str, unicode]
        @start_str [str, unicode]
        @end_str [str, unicode]
        """
        assert(type(search_str) in (str, unicode))
        assert(type(start_str) in (str, unicode))
        assert(type(end_str) in (str, unicode))
        search_start = search_str.find(start_str)
        if search_start == -1:
            return None
        between_a = search_start + len(start_str)
        search_end = search_str[between_a:].find(end_str)
        if search_end == -1:
            return None
        between_b = search_end + between_a
        return search_str[between_a:between_b]
    def _get_netrc_login_info(self, netrc_machine=None):
        username = None
        password = None
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@ -928,6 +928,7 @@ from .shared import (
 from .showroomlive import ShowRoomLiveIE
 from .sina import SinaIE
 from .sixplay import SixPlayIE
 from .skypixel import SkypixelIE
 from .skylinewebcams import SkylineWebcamsIE
 from .skynewsarabia import (
    SkyNewsArabiaIE,
--- a/youtube_dl/extractor/skypixel.py
+++ b/youtube_dl/extractor/skypixel.py
@ -0,0 +1,74 @@
 # coding: utf-8
 from __future__ import unicode_literals
 from .common import InfoExtractor
 class SkypixelIE(InfoExtractor):
    """InfoExtractor for Skypixel.com"""
    _VALID_URL = r'https?://(?:www\.)?skypixel\.com/share/video/(?P<id>[^&#]+)'
    _TESTS = [
        {
            'url': 'https://skypixel.com/share/video/check-out-my-latest-artwork-4f90b8ac-e7c3-4ed8-82c2-203addfd629e',
            'info_dict': {
                'id': 'check-out-my-latest-artwork-4f90b8ac-e7c3-4ed8-82c2-203addfd629e',
                'ext': 'mp4',
                'title': 'Check out my latest artwork!',
                'uploader': 'Alby98',
                'thumbnail': 'http://dn-djidl2.qbox.me/cloud/c89382b0b8dc75ea9f07354e098e0971/2.jpg',
            },
            'params': {
                'noplaylist': True,
                'skip_download': True,
            }
        },
        {
            'url': 'https://www.skypixel.com/share/video/undirfellsrett-i-vatnsdal-8-9-2017',
            'info_dict': {
                'id': 'undirfellsrett-i-vatnsdal-8-9-2017',
                'ext': 'mp4',
                'title': 'Undirfellsrétt í Vatnsdal 8/9/2017',
                'uploader': 'Flokmundur',
                'thumbnail': 'http://dn-djidl2.qbox.me/cloud/3c1a3aea1bdc042362a36ed482edb3ae/2.jpg',
            },
            'params': {
                'noplaylist': True,
                'skip_download': True,
            }
        }
    ]
    def _real_extract(self, url):
        video_id = self._match_id(url)
        webpage = self._download_webpage(url, video_id)
        title = self._og_search_title(webpage).strip()
        title = title.replace(' | SkyPixel.com', '')
        uploader = self._simple_search_between(
            webpage, u'<span itemprop="author">', u'</span>')
        assert(uploader is not None)
        djivideos_url = self._simple_search_between(
            webpage, u'<iframe frameborder="0" scrolling="no" src="', u'">')
        assert(djivideos_url is not None)
        djivideos_webpage = self._download_webpage(
            djivideos_url, 'djivideos.com[%s]' % (video_id, ))
        video_definitions_json = self._simple_search_between(
            djivideos_webpage, u'JSON.parse(\'', u'\');')
        assert(video_definitions_json is not None)
        video_definitions = self._parse_json(video_definitions_json, video_id)
        video_url = video_definitions[-1]['src']
        thumbnail = self._simple_search_between(
            djivideos_webpage, u'poster: "', u'",')
        if thumbnail is not None:
            url_params_start = thumbnail.find('?sign=')
            if url_params_start != -1:
                thumbnail = thumbnail[:url_params_start]
        return {
            'id': video_id,
            'url': video_url,
            'ext': 'mp4',
            'title': title,
            'uploader': uploader,
            'thumbnail': thumbnail,
        }