From d2727c3134b01657640e76ee9c6e35e0f7c8f751 Mon Sep 17 00:00:00 2001 From: Parmjit Virk Date: Wed, 14 Jun 2017 14:14:33 -0500 Subject: [PATCH] [BlenderCloud] Add new extractor --- youtube_dl/extractor/blendercloud.py | 255 +++++++++++++++++++++++++++ youtube_dl/extractor/extractors.py | 1 + 2 files changed, 256 insertions(+) create mode 100644 youtube_dl/extractor/blendercloud.py diff --git a/youtube_dl/extractor/blendercloud.py b/youtube_dl/extractor/blendercloud.py new file mode 100644 index 000000000..00c7f51fa --- /dev/null +++ b/youtube_dl/extractor/blendercloud.py @@ -0,0 +1,255 @@ +# coding: utf-8 +from __future__ import unicode_literals +from .common import InfoExtractor +import re + + +class BlenderCloudIE(InfoExtractor): + _VALID_URL = r'https?://cloud\.blender\.org/[^/]+/(?P[0-9a-z-]+)/(?P[0-9a-z]+)?' + + _TESTS = [ + { + # Single video + 'url': 'https://cloud.blender.org/p/game-asset-creation/56041550044a2a00d0d7e068', + 'info_dict': { + 'id': '56041550044a2a00d0d7e068', + 'display_id': 'game-asset-creation', + 'ext': 'mp4', + 'title': 'Introduction', + }, + }, + { + # Playlist (subsection) + 'url': 'https://cloud.blender.org/p/game-asset-creation/56041550044a2a00d0d7e069', + 'info_dict': { + 'id': '56041550044a2a00d0d7e069', + 'title': 'Section 1 - Understanding the Interface', + }, + 'playlist': [ + { + 'info_dict': { + 'id': '56041550044a2a00d0d7e06a', + 'display_id': 'game-asset-creation', + 'ext': 'mp4', + 'title': 'Chapter 01 - First Encounters', + }, + }, + { + 'info_dict': { + 'id': '56041550044a2a00d0d7e06b', + 'display_id': 'game-asset-creation', + 'ext': 'mp4', + 'title': 'Chapter 02 - Navigation', + }, + }, + { + 'info_dict': { + 'id': '56041550044a2a00d0d7e06c', + 'display_id': 'game-asset-creation', + 'ext': 'mp4', + 'title': 'Chapter 03 - Layout Customizing', + }, + }, + { + 'info_dict': { + 'id': '56041550044a2a00d0d7e06d', + 'display_id': 'game-asset-creation', + 'ext': 'mp4', + 'title': 'Chapter 04 - User Preference Changes', + }, + }, + ], + }, + { + # Playlist + 'url': 'https://cloud.blender.org/p/blenderella/', + 'info_dict': { + 'id': 'blenderella', + 'title': 'Learn Character Modeling — Blender Cloud', + }, + 'playlist': [ + { + 'info_dict': { + 'id': '56040ecf044a2a00a515adb0', + 'display_id': 'blenderella', + 'ext': 'mp4', + 'title': '10 - Cheek, Jaw, Forehead, Scalp', + }, + }, + ], + 'expected_warnings': [ + 'Only available to Blender Cloud subscribers.', + 'No video sources available.' + ], + }, + ] + + def get_node_title(self, source): + node_title = None + node_title = self._html_search_regex( + r'(.*?)', source, 'title').strip() + #print "BlenderCloudIE : get_node_title : node_title : %s" % node_title + return node_title + + def get_webpage_title(self, source): + webpage_title = None + webpage_title = self._html_search_regex( + r'(.*?)', source, 'title').strip() + #print "BlenderCloudIE : get_webpage_title : webpage_title : %s" % webpage_title + return webpage_title + + def is_video_subscriber_only(self, source): + errmsg_subscribers_only = 'Only available to Blender Cloud subscribers.' + return True if errmsg_subscribers_only in source else False + + def get_video_formats(self, source): + video_formats = [] + for video in re.findall(r'" in webpage: + # this node references a single video (i.e. a single node) + + title = None + formats = [] + + if self.is_video_subscriber_only(webpage): + self.report_warning('%s - %s' % (base_node_id, warning_subscribers_only)) + else: + title = self.get_node_title(webpage) + formats = self.get_video_formats(webpage) + self._check_formats(formats, base_node_id) + self._sort_formats(formats) + + return { + 'id': base_node_id, + 'display_id': display_id, + 'title': title, + 'formats': formats, + } + elif "
" in webpage: + # this node references a playlist of subsection videos (i.e. multiple nodes) + + entries = [] + for node_id in re.findall(r'data-node_id=\"([0-9a-z]+)\"\s*title=\"', webpage): + #print "BlenderCloudIE : _real_extract : node_id : %s" % node_id + + webpage_node = self._download_webpage(url_node % node_id, node_id) + + if "
" in webpage_node: + if self.is_video_subscriber_only(webpage_node): + self.report_warning('%s - %s' % (node_id, warning_subscribers_only)) + else: + title = self.get_node_title(webpage_node) + formats = self.get_video_formats(webpage_node) + self._check_formats(formats, node_id) + self._sort_formats(formats) + + entries.append({ + 'id': node_id, + 'display_id': display_id, + 'title': title, + 'formats': formats, + }) + else: + self.report_warning('%s - %s' % (node_id, warning_no_video_sources)) + + #print "BlenderCloudIE : _real_extract : entries : %s" % entries + return self.playlist_result(entries, playlist_id=base_node_id, playlist_title=self.get_node_title(webpage)) + else: + self.report_warning('%s - %s' % (base_node_id, warning_no_video_sources)) + return { + 'id': base_node_id, + 'display_id': display_id, + 'title': None, + 'formats': [], + } + else: + # extract the entire playlist for an entire video section + + webpage = self._download_webpage(url, display_id) + + entries = [] + for node_id in re.findall(r'data-node_id=\"([0-9a-z]+)\"\s*class=\"', webpage): + #print "BlenderCloudIE : _real_extract : node_id : %s" % node_id + + webpage_node = self._download_webpage(url_node % node_id, node_id) + + if "
" in webpage_node: + # this node references a single video (i.e. a single node) + + title = None + formats = [] + + if self.is_video_subscriber_only(webpage_node): + self.report_warning('%s - %s' % (node_id, warning_subscribers_only)) + else: + title = self.get_node_title(webpage_node) + formats = self.get_video_formats(webpage_node) + self._check_formats(formats, node_id) + self._sort_formats(formats) + + entries.append({ + 'id': node_id, + 'display_id': display_id, + 'title': title, + 'formats': formats, + }) + elif "
" in webpage_node: + # this node references a playlist of subsection videos (i.e. multiple nodes) + + for sub_node_id in re.findall(r'data-node_id=\"([0-9a-z]+)\"\s*title=\"', webpage_node): + #print "BlenderCloudIE : _real_extract : sub_node_id : %s" % sub_node_id + + webpage_sub_node = self._download_webpage(url_node % sub_node_id, sub_node_id) + + if "
" in webpage_sub_node: + if self.is_video_subscriber_only(webpage_sub_node): + self.report_warning('%s - %s' % (sub_node_id, warning_subscribers_only)) + else: + title = self.get_node_title(webpage_sub_node) + formats = self.get_video_formats(webpage_sub_node) + self._check_formats(formats, sub_node_id) + self._sort_formats(formats) + + entries.append({ + 'id': sub_node_id, + 'display_id': display_id, + 'title': title, + 'formats': formats, + }) + else: + self.report_warning('%s - %s' % (sub_node_id, warning_no_video_sources)) + else: + self.report_warning('%s - %s' % (node_id, warning_no_video_sources)) + + return self.playlist_result(entries, playlist_id=display_id, playlist_title=self.get_webpage_title(webpage)) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index e1907314d..ddb5fe1a2 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -119,6 +119,7 @@ from .bleacherreport import ( BleacherReportIE, BleacherReportCMSIE, ) +from .blendercloud import (BlenderCloudIE) from .blinkx import BlinkxIE from .bloomberg import BloombergIE from .bokecc import BokeCCIE