From d4e5dc44e48583596ed79cc28468920408f0afa6 Mon Sep 17 00:00:00 2001 From: Dmitry Grigoryev Date: Mon, 10 Jun 2019 21:06:46 +0300 Subject: [PATCH] [tvrain] Add new extractor --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/tvrain.py | 51 ++++++++++++++++++++++++++++++ 2 files changed, 52 insertions(+) create mode 100644 youtube_dl/extractor/tvrain.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index eb5efd1e8..e0d4046c0 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1234,6 +1234,7 @@ from .tvplay import ( TVPlayHomeIE, ) from .tvplayer import TVPlayerIE +from .tvrain import TVRainIE from .tweakers import TweakersIE from .twentyfourvideo import TwentyFourVideoIE from .twentymin import TwentyMinutenIE diff --git a/youtube_dl/extractor/tvrain.py b/youtube_dl/extractor/tvrain.py new file mode 100644 index 000000000..5d9d74d66 --- /dev/null +++ b/youtube_dl/extractor/tvrain.py @@ -0,0 +1,51 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import json + +from .common import InfoExtractor +from ..utils import smuggle_url + + +class TVRainIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?tvrain\.ru.*/(?P[a-z_]+-\d+)/?' + _TESTS = [{ + 'url': 'https://tvrain.ru/lite/teleshow/kak_vse_nachinalos/namin-418921/', + 'info_dict': { + 'id': '582306', + 'ext': 'mp4', + 'title': 'Стас Намин: «Мы нарушили девственность Кремля»', + 'duration': 3382, + }, + }, { + 'url': 'https://tvrain.ru/teleshow/ted_dod/mozhete_li_vy_reshit_golovolomku_so_shkafchikami-432600/', + 'info_dict': { + 'id': '738482', + 'ext': 'mp4', + 'title': ' Можете ли вы решить головоломку со шкафчиками? ', + 'duration': 237, + }, + }] + + def _real_extract(self, url): + display_id = self._match_id(url) + + webpage = self._download_webpage(url, display_id) + + meta = self._search_regex( + r'(?s)window\.TVRAIN\.app\s*=\s*({.+?})[\s\/\*\]>]+<\/script>', + webpage, 'meta', default=None) + + if meta: + article = json.loads(meta)['article'] + eagle_id = str(article['eagle_id']) + return { + '_type': 'url', + 'id': eagle_id, + 'ie_key': 'EaglePlatform', + 'url': smuggle_url( + 'eagleplatform:tvrainru.media.eagleplatform.com:%s' % eagle_id, + {'referrer': url}), + } + + return self.url_result(url, ie='Generic')