From f6199a619d98fb6684655bab7c9a2da8b910f450 Mon Sep 17 00:00:00 2001 From: Simon Morgan Date: Fri, 7 Oct 2016 16:17:57 +0100 Subject: [PATCH] [yuvutu] Add new extractor --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/yuvutu.py | 46 ++++++++++++++++++++++++++++++ 2 files changed, 47 insertions(+) create mode 100644 youtube_dl/extractor/yuvutu.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index feee06004..6ae0f5a4c 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1162,6 +1162,7 @@ from .youtube import ( YoutubeUserIE, YoutubeWatchLaterIE, ) +from .yuvutu import YuvutuIE from .zapiks import ZapiksIE from .zdf import ZDFIE, ZDFChannelIE from .zingmp3 import ZingMp3IE diff --git a/youtube_dl/extractor/yuvutu.py b/youtube_dl/extractor/yuvutu.py new file mode 100644 index 000000000..ebf0846fb --- /dev/null +++ b/youtube_dl/extractor/yuvutu.py @@ -0,0 +1,46 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import determine_ext + + +class YuvutuIE(InfoExtractor): + _VALID_URL = r'http://(?:www\.)?yuvutu.com/video/(?P[0-9]+)(?:.*)' + _TEST = { + 'url': 'http://www.yuvutu.com/video/330/', + 'md5': 'af4a0d2eabec6b6bd43cd6b68543fa9c', + 'info_dict': { + 'id': '330', + 'title': 'carnal bliss', + 'ext': 'flv', + 'age_limit': 18, + } + } + + _title_regex = r"class=[\"']video-title-content[\"']>.+?>(.+?)<" + _thumbnail_regex = r"itemprop=[\"']thumbnailURL[\"']\s+content=[\"'](.+?)[\"']" + _embed_regex = r"[\"'](\/embed_video\.php.+?)[\"']" + _video_regex = r"file:\s*[\"']([^\s]+)[\"']" + + def _real_extract(self, url): + video_id = self._match_id(url) + + webpage = self._download_webpage(url, video_id) + + title = self._html_search_regex(self._title_regex, webpage, 'title') + + embed_url = self._html_search_regex(self._embed_regex, webpage, + 'embed') + embed_webpage = self._download_webpage( + "http://www.yuvutu.com/" + embed_url, video_id) + video_url = self._html_search_regex(self._video_regex, embed_webpage, + 'video_url') + + return { + 'id': video_id, + 'url': video_url, + 'ext': determine_ext(video_url, 'mp4'), + 'title': title, + 'age_limit': 18, + }