[picarto] Add new extractor, Picarto.TV

2017-03-21 00:49:31 +02:00 · 2017-03-21 00:49:31 +02:00 · f2d5dc6bde
commit f2d5dc6bde
parent 8a8cc339b6
2 changed files with 81 additions and 0 deletions
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@ -740,6 +740,7 @@ from .periscope import (
 from .philharmoniedeparis import PhilharmonieDeParisIE
 from .phoenix import PhoenixIE
 from .photobucket import PhotobucketIE
 from .picarto import PicartoIE
 from .piksel import PikselIE
 from .pinkbike import PinkbikeIE
 from .pladform import PladformIE
--- a/youtube_dl/extractor/picarto.py
+++ b/youtube_dl/extractor/picarto.py
@ -0,0 +1,80 @@
 # coding: utf-8
 from __future__ import unicode_literals
 import re
 import time
 from .common import InfoExtractor
 from ..utils import ExtractorError
 from ..compat import compat_str
 class PicartoIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?picarto\.tv/(?P<id>[a-zA-Z0-9]+)'
    _TEST = {
        'url': 'https://picarto.tv/setz',
        'info_dict': {
            'id': 'Setz',
            'ext': 'mp4',
            'title': 're:^Setz [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',  # match self._live_title
            'timestamp': int,
            'is_live': True
        },
        'params': {
            # Livestream
            'skip_download': True
        }
    }
    _PLACESTREAM_REGEX = r'''(?x)
                             placeStream\(
                                "(?P<channel>[a-zA-Z0-9]+)",
                                (?P<player_id>\d+),
                                (?P<product>\d+),
                                (?P<offline_image>\d+),
                                (?P<online>\d+),
                                "(?P<token>.+?)",
                                "(?P<tech>.+?)",
                                (?P<viewer>\d+)\ *
                            \);
                        '''.replace(r',', r',\ *')
    def _real_extract(self, url):
        url_channel_id = self._match_id(url)
        stream_page = self._download_webpage('https://picarto.tv/' + url_channel_id, url_channel_id, note="Downloading channel page")
        # Handle nonexistent channels
        if 'This channel does not exist.' in stream_page:
            raise ExtractorError("Channel does not exist", expected=True)
        # Grab all relevant stream info
        placestream_m = re.search(self._PLACESTREAM_REGEX, stream_page)
        if not placestream_m:
            raise ExtractorError("Unable to fetch channel info")
        elif int(placestream_m.group('online')) == 0:
            raise ExtractorError("Stream is offline", expected=True)
        channel_id = placestream_m.group('channel')
        player_id = placestream_m.group('player_id')
        token = placestream_m.group('token')
        # Ask for stream host
        post_body = ('loadbalancinginfo=' + channel_id).encode('utf-8')
        load_balancing_info = self._download_webpage('https://picarto.tv/process/channel', channel_id, data=post_body, note="Fetching load balancer info")
        if not load_balancing_info or load_balancing_info in ('FULL', 'failedGetIP'):
            raise ExtractorError("Unable to get stream")
        timestamp = time.time()
        video_url = "https://%s-%s/mp4/%s.mp4?token=%s&ts=%s" % (player_id, load_balancing_info,
                                                                 channel_id, token, compat_str(int(timestamp * 1000)))
        return {
            'id': channel_id,
            'url': video_url,
            'ext': 'mp4',
            'title': self._live_title(channel_id),
            'timestamp': int(timestamp),
            'is_live': True
        }