From 634d079a9899852f41b7e206441abb531acdf562 Mon Sep 17 00:00:00 2001 From: kaspi Date: Mon, 12 Oct 2015 01:25:57 -0400 Subject: [PATCH 1/2] [fc-zenit] New extractor --- youtube_dl/extractor/__init__.py | 2 +- youtube_dl/extractor/fczenit.py | 52 ++++++++++++++++++++++++++++++++ 2 files changed, 53 insertions(+), 1 deletion(-) create mode 100644 youtube_dl/extractor/fczenit.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 75720843c..69572b573 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -76,7 +76,6 @@ from .cbssports import CBSSportsIE from .ccc import CCCIE from .ceskatelevize import CeskaTelevizeIE from .channel9 import Channel9IE -from .chaturbate import ChaturbateIE from .chilloutzone import ChilloutzoneIE from .chirbit import ( ChirbitIE, @@ -167,6 +166,7 @@ from .extremetube import ExtremeTubeIE from .facebook import FacebookIE from .faz import FazIE from .fc2 import FC2IE +from .fczenit import fczenitIE from .firstpost import FirstpostIE from .firsttv import FirstTVIE from .fivemin import FiveMinIE diff --git a/youtube_dl/extractor/fczenit.py b/youtube_dl/extractor/fczenit.py new file mode 100644 index 000000000..372548b82 --- /dev/null +++ b/youtube_dl/extractor/fczenit.py @@ -0,0 +1,52 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import os.path +import re +import json + +from ..compat import compat_urllib_parse_unquote +from ..utils import url_basename +from .common import InfoExtractor + +class fczenitIE(InfoExtractor): + _VALID_URL = r'(?:https?://(?:www\.)?fc-zenit\.ru/video/gl(?P[0-9]+))' + _TEST = { + u'url': u'http://fc-zenit.ru/video/gl6785/', + u'md5' : '458bacc24549173fe5a5aa29174a5606', + u'info_dict': { + u"id": u"6785", + u"ext": u"mp4", + u"title": u"«Зенит-ТВ»: как Олег Шатов играл против «Урала»" + } +} + + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + webpage_url = 'http://fc-zenit.ru/video/gl' + video_id + webpage = self._download_webpage(webpage_url, video_id) + + video_title = self._html_search_regex(r'
([^<]+)', webpage, u"title") + + # Log that we are starting to parse the page + self.report_extraction(video_id) + + bitrates_raw = self._html_search_regex(r'bitrates:.*\n(.*)\]', webpage, u'video URL') + bitrates = re.findall(r'url:.?\'(.+?)\'.*?bitrate:.?([0-9]{3}?)', bitrates_raw) + + formats = [{ + "url" : sources[0], + "tbr": sources[1] + } for sources in bitrates] + + self._sort_formats(formats) + + return { + 'id' : video_id, + 'title' : video_title, + 'url' : webpage_url, + 'ext' : u'mp4', + 'formats' : formats + } From 4e6174b97f4bcfa357488a31782c14b630419d73 Mon Sep 17 00:00:00 2001 From: kaspi Date: Mon, 12 Oct 2015 01:33:12 -0400 Subject: [PATCH 2/2] fixed typo --- youtube_dl/extractor/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 69572b573..3dcaa6306 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -76,6 +76,7 @@ from .cbssports import CBSSportsIE from .ccc import CCCIE from .ceskatelevize import CeskaTelevizeIE from .channel9 import Channel9IE +from .chaturbate import ChaturbateIE from .chilloutzone import ChilloutzoneIE from .chirbit import ( ChirbitIE,