From abaa06f884403c05a0edc1b9738768a591603b0f Mon Sep 17 00:00:00 2001 From: TRox1972 Date: Sat, 21 May 2016 17:48:17 +0200 Subject: [PATCH 1/3] [Vidio] Add new extractor --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/vidio.py | 46 ++++++++++++++++++++++++++++++ 2 files changed, 47 insertions(+) create mode 100644 youtube_dl/extractor/vidio.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index c93cd2765..d30d098b9 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -894,6 +894,7 @@ from .videomore import ( ) from .videopremium import VideoPremiumIE from .videott import VideoTtIE +from .vidio import VidioIE from .vidme import ( VidmeIE, VidmeUserIE, diff --git a/youtube_dl/extractor/vidio.py b/youtube_dl/extractor/vidio.py new file mode 100644 index 000000000..8348b5f17 --- /dev/null +++ b/youtube_dl/extractor/vidio.py @@ -0,0 +1,46 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re +from .common import InfoExtractor + + +class VidioIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?vidio\.com/watch/(?P\d{6})-(?P[^/?]+)' + _TEST = { + 'url': 'http://www.vidio.com/watch/165683-dj_ambred-booyah-live-2015', + 'info_dict': { + 'id': '165683', + 'title': 'DJ_AMBRED - Booyah (Live 2015)', + 'ext': 'mp4', + 'thumbnail': 'https://cdn0-a.production.vidio.static6.com/uploads/video/image/165683/dj_ambred-booyah-live-2015-bfb2ba.jpg', + 'description': 'md5:27dc15f819b6a78a626490881adbadf8', + 'duration': 149, + }, + 'params': { + # m3u8 download + 'skip_download': True + } + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id, display_id = mobj.group('id', 'display_id') + + webpage = self._download_webpage(url, display_id) + + video_data = self._parse_json(self._html_search_regex( + r'data-json-clips\s*=\s*"\[(.+)\]"', webpage, display_id), display_id) + + formats = self._extract_m3u8_formats( + video_data['sources'][0]['file'], + display_id, ext='mp4') + + return { + 'id': video_id, + 'title': self._og_search_title(webpage), + 'formats': formats, + 'thumbnail': video_data.get('image', ''), + 'description': self._og_search_description(webpage), + 'duration': video_data.get('clip_duration'), + } From 66e70a58d4ebdd8d4f32bc00cd93af332332b919 Mon Sep 17 00:00:00 2001 From: TRox1972 Date: Sun, 22 May 2016 13:24:48 +0200 Subject: [PATCH 2/3] [Vidio] fix fallback value and wrap duration in int_or_none --- youtube_dl/extractor/vidio.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/vidio.py b/youtube_dl/extractor/vidio.py index 8348b5f17..6a7050322 100644 --- a/youtube_dl/extractor/vidio.py +++ b/youtube_dl/extractor/vidio.py @@ -4,6 +4,8 @@ from __future__ import unicode_literals import re from .common import InfoExtractor +from ..utils import int_or_none + class VidioIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?vidio\.com/watch/(?P\d{6})-(?P[^/?]+)' @@ -40,7 +42,7 @@ class VidioIE(InfoExtractor): 'id': video_id, 'title': self._og_search_title(webpage), 'formats': formats, - 'thumbnail': video_data.get('image', ''), + 'thumbnail': video_data.get('image'), 'description': self._og_search_description(webpage), - 'duration': video_data.get('clip_duration'), + 'duration': int_or_none(video_data.get('clip_duration')), } From f9ca0f11c3b2c1880e6c96ba19967ff27ea97635 Mon Sep 17 00:00:00 2001 From: TRox1972 Date: Sun, 22 May 2016 14:17:11 +0200 Subject: [PATCH 3/3] [Vidio] don't use video_id for _html_search_regex() --- youtube_dl/extractor/vidio.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/vidio.py b/youtube_dl/extractor/vidio.py index 6a7050322..d17c663fd 100644 --- a/youtube_dl/extractor/vidio.py +++ b/youtube_dl/extractor/vidio.py @@ -32,7 +32,7 @@ class VidioIE(InfoExtractor): webpage = self._download_webpage(url, display_id) video_data = self._parse_json(self._html_search_regex( - r'data-json-clips\s*=\s*"\[(.+)\]"', webpage, display_id), display_id) + r'data-json-clips\s*=\s*"\[(.+)\]"', webpage, 'video data'), display_id) formats = self._extract_m3u8_formats( video_data['sources'][0]['file'],