l1ving_youtube-dl/youtube_dl/extractor/yandexvideo.py

# coding: utf-8
from __future__ import unicode_literals

from .common import InfoExtractor
from ..utils import (
    determine_ext,
    int_or_none,
    url_or_none,
)
import re
import json


class YandexVideoIE(InfoExtractor):
    _VALID_URL = r'''(?x)
                    https?://
                        (?:
                            yandex\.ru(?:/portal/(?:video|efir)|/efir|/video)?/?\?.*?(stream_id=|stream_channel=)|
                            frontend\.vh\.yandex\.ru/player/
                        )
                        (?P<id>[\d\w]+)
                    '''

    _TESTS = [{
        'url': 'https://yandex.ru/portal/video?stream_id=4dbb262b4fe5cf15a215de4f34eee34d',
        'md5': '33955d7ae052f15853dc41f35f17581c',
        'info_dict': {
            'id': '4dbb262b4fe5cf15a215de4f34eee34d',
            'ext': 'mp4',
            'title': 'В Нью-Йорке баржи и теплоход оторвались от причала и расплылись по Гудзону',
            'description': '',
            'thumbnail': r're:^https?://.*\.jpg$',
            'timestamp': 0,
            'duration': 30,
            'age_limit': 18,
        },
    }, {
        'url': 'https://yandex.ru/portal/efir?stream_id=4dbb36ec4e0526d58f9f2dc8f0ecf374&from=morda',
        'only_matching': True,
    }, {
        'url': 'https://yandex.ru/?stream_id=4dbb262b4fe5cf15a215de4f34eee34d',
        'only_matching': True,
    }, {
        'url': 'https://frontend.vh.yandex.ru/player/4dbb262b4fe5cf15a215de4f34eee34d?from=morda',
        'only_matching': True,
    }, {
        # vod-episode, series episode
        'url': 'https://yandex.ru/portal/video?stream_id=45b11db6e4b68797919c93751a938cee',
        'only_matching': True,
    }, {
        # episode, sports
        'url': 'https://yandex.ru/?stream_channel=1538487871&stream_id=4132a07f71fb0396be93d74b3477131d',
        'only_matching': True,
    }, {
        # DASH with DRM
        'url': 'https://yandex.ru/portal/video?from=morda&stream_id=485a92d94518d73a9d0ff778e13505f8',
        'only_matching': True,
    }, {
        # Efir for program
        'url': 'https://yandex.ru/efir?stream_id=vMmiz2NPPDFE&from_block=player_context_menu_yavideo',
        'only_matching': True,
    }, {
        # Efir for channel
        'url': 'https://yandex.ru/efir?stream_channel=1335',
        'only_matching': True,
    }]

    def split_params(self, txt):
        braket = 0
        line = ''
        items = []
        for c in txt:
            if c == '{':
                braket += 1
            elif c == '}':
                braket -= 1
            if c == ',' and braket == 0:
                items.append(line)
                line = ''
            else:
                line += c
        if line != '':
            items.append(line)
        return items

    def _real_extract(self, url):
        video_id = self._match_id(url)

        html = self._download_webpage(url, video_id)

        # search call of Yandex stream player
        func_params = self._search_regex(r'\s*Ya\.streamModules\.player\.play\(\s*([\w\W\\R\"]*)\,\s*true\);\s*',
                                         html, 'Yandex player', flags=re.DOTALL | re.MULTILINE)
        params = self.split_params(func_params)
        json_text = params[1]
        content = json.loads(json_text)
        content = content.get('program')

        content_url = url_or_none(content.get('content_url')) or url_or_none(
            content['streams'][0]['url'])
        title = content.get('title') or content.get('computed_title')

        ext = determine_ext(content_url)

        if ext == 'm3u8':
            formats = self._extract_m3u8_formats(
                content_url, video_id, 'mp4', entry_protocol='m3u8_native',
                m3u8_id='hls')
        elif ext == 'mpd':
            formats = self._extract_mpd_formats(
                content_url, video_id, mpd_id='dash')
        else:
            formats = [{'url': content_url}]

        self._sort_formats(formats)

        description = content.get('description')
        thumbnail = content.get('thumbnail')
        timestamp = (int_or_none(content.get('release_date'))
                     or int_or_none(content.get('release_date_ut'))
                     or int_or_none(content.get('start_time')))
        duration = int_or_none(content.get('duration'))
        series = content.get('program_title')
        age_limit = int_or_none(content.get('restriction_age'))

        return {
            'id': video_id,
            'title': title,
            'description': description,
            'thumbnail': thumbnail,
            'timestamp': timestamp,
            'duration': duration,
            'series': series,
            'age_limit': age_limit,
            'formats': formats,
        }
-												[yandexvideo] Add extractor

											
										
										
											2019-03-17 17:33:07 +07:00
+								# coding: utf-8
 								from __future__ import unicode_literals
 								from .common import InfoExtractor
 								from ..utils import (
-												[yandexvideo] Add support for DASH formats (#21971)

											
										
										
											2019-08-02 05:03:25 +07:00
+								    determine_ext,
-												[yandexvideo] Add extractor

											
										
										
											2019-03-17 17:33:07 +07:00
+								    int_or_none,
 								    url_or_none,
 								)
-												[yandex video][yandex efir] fixed extractor

Fixed extractor for Yandex Video and Yandex Efir(single program and program on channel).
Latest version of Yandex Video and Yandex Efir not support request for external json. Now json data with video information used as a parameter in methods of Yandex stream player at webpage.
Current extractor find call of Yandex stream player, parse parameters and find information about program.
											
										
										
											2020-10-08 13:01:24 +03:00
+								import re
 								import json
-												[yandexvideo] Add extractor

											
										
										
											2019-03-17 17:33:07 +07:00
 								class YandexVideoIE(InfoExtractor):
 								    _VALID_URL = r'''(?x)
 								                    https?://
 								                        (?:
-												[yandex video][yandex efir] fixed extractor

Fixed extractor for Yandex Video and Yandex Efir(single program and program on channel).
Latest version of Yandex Video and Yandex Efir not support request for external json. Now json data with video information used as a parameter in methods of Yandex stream player at webpage.
Current extractor find call of Yandex stream player, parse parameters and find information about program.
											
										
										
											2020-10-08 13:01:24 +03:00
+								                            yandex\.ru(?:/portal/(?:video|efir)|/efir|/video)?/?\?.*?(stream_id=|stream_channel=)|
-												[yandexvideo] Add extractor

											
										
										
											2019-03-17 17:33:07 +07:00
+								                            frontend\.vh\.yandex\.ru/player/
 								                        )
-												[yandex video][yandex efir] fixed extractor

Fixed extractor for Yandex Video and Yandex Efir(single program and program on channel).
Latest version of Yandex Video and Yandex Efir not support request for external json. Now json data with video information used as a parameter in methods of Yandex stream player at webpage.
Current extractor find call of Yandex stream player, parse parameters and find information about program.
											
										
										
											2020-10-08 13:01:24 +03:00
+								                        (?P<id>[\d\w]+)
-												[yandexvideo] Add extractor

											
										
										
											2019-03-17 17:33:07 +07:00
+								                    '''
-												[yandex video][yandex efir] fixed extractor

Fixed extractor for Yandex Video and Yandex Efir(single program and program on channel).
Latest version of Yandex Video and Yandex Efir not support request for external json. Now json data with video information used as a parameter in methods of Yandex stream player at webpage.
Current extractor find call of Yandex stream player, parse parameters and find information about program.
											
										
										
											2020-10-08 13:01:24 +03:00
-												[yandexvideo] Add extractor

											
										
										
											2019-03-17 17:33:07 +07:00
+								    _TESTS = [{
 								        'url': 'https://yandex.ru/portal/video?stream_id=4dbb262b4fe5cf15a215de4f34eee34d',
 								        'md5': '33955d7ae052f15853dc41f35f17581c',
 								        'info_dict': {
 								            'id': '4dbb262b4fe5cf15a215de4f34eee34d',
 								            'ext': 'mp4',
 								            'title': 'В Нью-Йорке баржи и теплоход оторвались от причала и расплылись по Гудзону',
 								            'description': '',
 								            'thumbnail': r're:^https?://.*\.jpg$',
 								            'timestamp': 0,
 								            'duration': 30,
 								            'age_limit': 18,
 								        },
 								    }, {
 								        'url': 'https://yandex.ru/portal/efir?stream_id=4dbb36ec4e0526d58f9f2dc8f0ecf374&from=morda',
 								        'only_matching': True,
 								    }, {
 								        'url': 'https://yandex.ru/?stream_id=4dbb262b4fe5cf15a215de4f34eee34d',
 								        'only_matching': True,
 								    }, {
 								        'url': 'https://frontend.vh.yandex.ru/player/4dbb262b4fe5cf15a215de4f34eee34d?from=morda',
 								        'only_matching': True,
 								    }, {
 								        # vod-episode, series episode
 								        'url': 'https://yandex.ru/portal/video?stream_id=45b11db6e4b68797919c93751a938cee',
 								        'only_matching': True,
 								    }, {
 								        # episode, sports
 								        'url': 'https://yandex.ru/?stream_channel=1538487871&stream_id=4132a07f71fb0396be93d74b3477131d',
 								        'only_matching': True,
-												[yandexvideo] Add support for DASH formats (#21971)

											
										
										
											2019-08-02 05:03:25 +07:00
+								    }, {
 								        # DASH with DRM
 								        'url': 'https://yandex.ru/portal/video?from=morda&stream_id=485a92d94518d73a9d0ff778e13505f8',
 								        'only_matching': True,
-												[yandex video][yandex efir] fixed extractor

Fixed extractor for Yandex Video and Yandex Efir(single program and program on channel).
Latest version of Yandex Video and Yandex Efir not support request for external json. Now json data with video information used as a parameter in methods of Yandex stream player at webpage.
Current extractor find call of Yandex stream player, parse parameters and find information about program.
											
										
										
											2020-10-08 13:01:24 +03:00
+								    }, {
 								        # Efir for program
 								        'url': 'https://yandex.ru/efir?stream_id=vMmiz2NPPDFE&from_block=player_context_menu_yavideo',
 								        'only_matching': True,
 								    }, {
 								        # Efir for channel
 								        'url': 'https://yandex.ru/efir?stream_channel=1335',
 								        'only_matching': True,
-												[yandexvideo] Add extractor

											
										
										
											2019-03-17 17:33:07 +07:00
+								    }]
-												[yandex video][yandex efir] fixed extractor

Fixed extractor for Yandex Video and Yandex Efir(single program and program on channel).
Latest version of Yandex Video and Yandex Efir not support request for external json. Now json data with video information used as a parameter in methods of Yandex stream player at webpage.
Current extractor find call of Yandex stream player, parse parameters and find information about program.
											
										
										
											2020-10-08 13:06:50 +03:00
+								    def split_params(self, txt):
-												[yandex video][yandex efir] fixed extractor

Fixed extractor for Yandex Video and Yandex Efir(single program and program on channel).
Latest version of Yandex Video and Yandex Efir not support request for external json. Now json data with video information used as a parameter in methods of Yandex stream player at webpage.
Current extractor find call of Yandex stream player, parse parameters and find information about program.
											
										
										
											2020-10-08 13:01:24 +03:00
+								        braket = 0
 								        line = ''
 								        items = []
 								        for c in txt:
 								            if c == '{':
 								                braket += 1
 								            elif c == '}':
 								                braket -= 1
 								            if c == ',' and braket == 0:
 								                items.append(line)
 								                line = ''
 								            else:
 								                line += c
 								        if line != '':
 								            items.append(line)
 								        return items
-												[yandexvideo] Add extractor

											
										
										
											2019-03-17 17:33:07 +07:00
+								    def _real_extract(self, url):
 								        video_id = self._match_id(url)
-												[yandex video][yandex efir] fixed extractor

Fixed extractor for Yandex Video and Yandex Efir(single program and program on channel).
Latest version of Yandex Video and Yandex Efir not support request for external json. Now json data with video information used as a parameter in methods of Yandex stream player at webpage.
Current extractor find call of Yandex stream player, parse parameters and find information about program.
											
										
										
											2020-10-08 13:01:24 +03:00
+								        html = self._download_webpage(url, video_id)
 								        # search call of Yandex stream player
 								        func_params = self._search_regex(r'\s*Ya\.streamModules\.player\.play\(\s*([\w\W\\R\"]*)\,\s*true\);\s*',
 								                                         html, 'Yandex player', flags=re.DOTALL | re.MULTILINE)
 								        params = self.split_params(func_params)
 								        json_text = params[1]
 								        content = json.loads(json_text)
 								        content = content.get('program')
-												[yandexvideo] Add extractor

											
										
										
											2019-03-17 17:33:07 +07:00
-												[yandexvideo] Add support for DASH formats (#21971)

											
										
										
											2019-08-02 05:03:25 +07:00
+								        content_url = url_or_none(content.get('content_url')) or url_or_none(
-												[yandexvideo] Add extractor

											
										
										
											2019-03-17 17:33:07 +07:00
+								            content['streams'][0]['url'])
 								        title = content.get('title') or content.get('computed_title')
-												[yandexvideo] Add support for DASH formats (#21971)

											
										
										
											2019-08-02 05:03:25 +07:00
+								        ext = determine_ext(content_url)
 								        if ext == 'm3u8':
 								            formats = self._extract_m3u8_formats(
 								                content_url, video_id, 'mp4', entry_protocol='m3u8_native',
 								                m3u8_id='hls')
 								        elif ext == 'mpd':
 								            formats = self._extract_mpd_formats(
 								                content_url, video_id, mpd_id='dash')
 								        else:
 								            formats = [{'url': content_url}]
-												[yandexvideo] Add extractor

											
										
										
											2019-03-17 17:33:07 +07:00
+								        self._sort_formats(formats)
 								        description = content.get('description')
 								        thumbnail = content.get('thumbnail')
-												Fix W504 and disable W503 (closes #20863)

											
										
										
											2019-05-11 03:56:22 +07:00
+								        timestamp = (int_or_none(content.get('release_date'))
 								                     or int_or_none(content.get('release_date_ut'))
 								                     or int_or_none(content.get('start_time')))
-												[yandexvideo] Add extractor

											
										
										
											2019-03-17 17:33:07 +07:00
+								        duration = int_or_none(content.get('duration'))
 								        series = content.get('program_title')
 								        age_limit = int_or_none(content.get('restriction_age'))
 								        return {
 								            'id': video_id,
 								            'title': title,
 								            'description': description,
 								            'thumbnail': thumbnail,
 								            'timestamp': timestamp,
 								            'duration': duration,
 								            'series': series,
 								            'age_limit': age_limit,
 								            'formats': formats,
 								        }