From d2727c3134b01657640e76ee9c6e35e0f7c8f751 Mon Sep 17 00:00:00 2001 From: Parmjit Virk Date: Wed, 14 Jun 2017 14:14:33 -0500 Subject: [PATCH 1/8] [BlenderCloud] Add new extractor --- youtube_dl/extractor/blendercloud.py | 255 +++++++++++++++++++++++++++ youtube_dl/extractor/extractors.py | 1 + 2 files changed, 256 insertions(+) create mode 100644 youtube_dl/extractor/blendercloud.py diff --git a/youtube_dl/extractor/blendercloud.py b/youtube_dl/extractor/blendercloud.py new file mode 100644 index 000000000..00c7f51fa --- /dev/null +++ b/youtube_dl/extractor/blendercloud.py @@ -0,0 +1,255 @@ +# coding: utf-8 +from __future__ import unicode_literals +from .common import InfoExtractor +import re + + +class BlenderCloudIE(InfoExtractor): + _VALID_URL = r'https?://cloud\.blender\.org/[^/]+/(?P[0-9a-z-]+)/(?P[0-9a-z]+)?' + + _TESTS = [ + { + # Single video + 'url': 'https://cloud.blender.org/p/game-asset-creation/56041550044a2a00d0d7e068', + 'info_dict': { + 'id': '56041550044a2a00d0d7e068', + 'display_id': 'game-asset-creation', + 'ext': 'mp4', + 'title': 'Introduction', + }, + }, + { + # Playlist (subsection) + 'url': 'https://cloud.blender.org/p/game-asset-creation/56041550044a2a00d0d7e069', + 'info_dict': { + 'id': '56041550044a2a00d0d7e069', + 'title': 'Section 1 - Understanding the Interface', + }, + 'playlist': [ + { + 'info_dict': { + 'id': '56041550044a2a00d0d7e06a', + 'display_id': 'game-asset-creation', + 'ext': 'mp4', + 'title': 'Chapter 01 - First Encounters', + }, + }, + { + 'info_dict': { + 'id': '56041550044a2a00d0d7e06b', + 'display_id': 'game-asset-creation', + 'ext': 'mp4', + 'title': 'Chapter 02 - Navigation', + }, + }, + { + 'info_dict': { + 'id': '56041550044a2a00d0d7e06c', + 'display_id': 'game-asset-creation', + 'ext': 'mp4', + 'title': 'Chapter 03 - Layout Customizing', + }, + }, + { + 'info_dict': { + 'id': '56041550044a2a00d0d7e06d', + 'display_id': 'game-asset-creation', + 'ext': 'mp4', + 'title': 'Chapter 04 - User Preference Changes', + }, + }, + ], + }, + { + # Playlist + 'url': 'https://cloud.blender.org/p/blenderella/', + 'info_dict': { + 'id': 'blenderella', + 'title': 'Learn Character Modeling — Blender Cloud', + }, + 'playlist': [ + { + 'info_dict': { + 'id': '56040ecf044a2a00a515adb0', + 'display_id': 'blenderella', + 'ext': 'mp4', + 'title': '10 - Cheek, Jaw, Forehead, Scalp', + }, + }, + ], + 'expected_warnings': [ + 'Only available to Blender Cloud subscribers.', + 'No video sources available.' + ], + }, + ] + + def get_node_title(self, source): + node_title = None + node_title = self._html_search_regex( + r'(.*?)', source, 'title').strip() + #print "BlenderCloudIE : get_node_title : node_title : %s" % node_title + return node_title + + def get_webpage_title(self, source): + webpage_title = None + webpage_title = self._html_search_regex( + r'(.*?)', source, 'title').strip() + #print "BlenderCloudIE : get_webpage_title : webpage_title : %s" % webpage_title + return webpage_title + + def is_video_subscriber_only(self, source): + errmsg_subscribers_only = 'Only available to Blender Cloud subscribers.' + return True if errmsg_subscribers_only in source else False + + def get_video_formats(self, source): + video_formats = [] + for video in re.findall(r'" in webpage: + # this node references a single video (i.e. a single node) + + title = None + formats = [] + + if self.is_video_subscriber_only(webpage): + self.report_warning('%s - %s' % (base_node_id, warning_subscribers_only)) + else: + title = self.get_node_title(webpage) + formats = self.get_video_formats(webpage) + self._check_formats(formats, base_node_id) + self._sort_formats(formats) + + return { + 'id': base_node_id, + 'display_id': display_id, + 'title': title, + 'formats': formats, + } + elif "
" in webpage: + # this node references a playlist of subsection videos (i.e. multiple nodes) + + entries = [] + for node_id in re.findall(r'data-node_id=\"([0-9a-z]+)\"\s*title=\"', webpage): + #print "BlenderCloudIE : _real_extract : node_id : %s" % node_id + + webpage_node = self._download_webpage(url_node % node_id, node_id) + + if "
" in webpage_node: + if self.is_video_subscriber_only(webpage_node): + self.report_warning('%s - %s' % (node_id, warning_subscribers_only)) + else: + title = self.get_node_title(webpage_node) + formats = self.get_video_formats(webpage_node) + self._check_formats(formats, node_id) + self._sort_formats(formats) + + entries.append({ + 'id': node_id, + 'display_id': display_id, + 'title': title, + 'formats': formats, + }) + else: + self.report_warning('%s - %s' % (node_id, warning_no_video_sources)) + + #print "BlenderCloudIE : _real_extract : entries : %s" % entries + return self.playlist_result(entries, playlist_id=base_node_id, playlist_title=self.get_node_title(webpage)) + else: + self.report_warning('%s - %s' % (base_node_id, warning_no_video_sources)) + return { + 'id': base_node_id, + 'display_id': display_id, + 'title': None, + 'formats': [], + } + else: + # extract the entire playlist for an entire video section + + webpage = self._download_webpage(url, display_id) + + entries = [] + for node_id in re.findall(r'data-node_id=\"([0-9a-z]+)\"\s*class=\"', webpage): + #print "BlenderCloudIE : _real_extract : node_id : %s" % node_id + + webpage_node = self._download_webpage(url_node % node_id, node_id) + + if "
" in webpage_node: + # this node references a single video (i.e. a single node) + + title = None + formats = [] + + if self.is_video_subscriber_only(webpage_node): + self.report_warning('%s - %s' % (node_id, warning_subscribers_only)) + else: + title = self.get_node_title(webpage_node) + formats = self.get_video_formats(webpage_node) + self._check_formats(formats, node_id) + self._sort_formats(formats) + + entries.append({ + 'id': node_id, + 'display_id': display_id, + 'title': title, + 'formats': formats, + }) + elif "
" in webpage_node: + # this node references a playlist of subsection videos (i.e. multiple nodes) + + for sub_node_id in re.findall(r'data-node_id=\"([0-9a-z]+)\"\s*title=\"', webpage_node): + #print "BlenderCloudIE : _real_extract : sub_node_id : %s" % sub_node_id + + webpage_sub_node = self._download_webpage(url_node % sub_node_id, sub_node_id) + + if "
" in webpage_sub_node: + if self.is_video_subscriber_only(webpage_sub_node): + self.report_warning('%s - %s' % (sub_node_id, warning_subscribers_only)) + else: + title = self.get_node_title(webpage_sub_node) + formats = self.get_video_formats(webpage_sub_node) + self._check_formats(formats, sub_node_id) + self._sort_formats(formats) + + entries.append({ + 'id': sub_node_id, + 'display_id': display_id, + 'title': title, + 'formats': formats, + }) + else: + self.report_warning('%s - %s' % (sub_node_id, warning_no_video_sources)) + else: + self.report_warning('%s - %s' % (node_id, warning_no_video_sources)) + + return self.playlist_result(entries, playlist_id=display_id, playlist_title=self.get_webpage_title(webpage)) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index e1907314d..ddb5fe1a2 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -119,6 +119,7 @@ from .bleacherreport import ( BleacherReportIE, BleacherReportCMSIE, ) +from .blendercloud import (BlenderCloudIE) from .blinkx import BlinkxIE from .bloomberg import BloombergIE from .bokecc import BokeCCIE From b4e7c451c367f08b8efc3f8d2ae92e2e4b59006f Mon Sep 17 00:00:00 2001 From: Parmjit Virk Date: Tue, 20 Jun 2017 20:30:39 -0500 Subject: [PATCH 2/8] [BlenderCloud] Tagged static methods --- youtube_dl/extractor/blendercloud.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/blendercloud.py b/youtube_dl/extractor/blendercloud.py index 00c7f51fa..527d41a27 100644 --- a/youtube_dl/extractor/blendercloud.py +++ b/youtube_dl/extractor/blendercloud.py @@ -98,11 +98,13 @@ class BlenderCloudIE(InfoExtractor): #print "BlenderCloudIE : get_webpage_title : webpage_title : %s" % webpage_title return webpage_title - def is_video_subscriber_only(self, source): + @staticmethod + def is_video_subscriber_only(source): errmsg_subscribers_only = 'Only available to Blender Cloud subscribers.' return True if errmsg_subscribers_only in source else False - def get_video_formats(self, source): + @staticmethod + def get_video_formats(source): video_formats = [] for video in re.findall(r' Date: Wed, 21 Jun 2017 19:16:59 -0500 Subject: [PATCH 3/8] [BlenderCloud] refactored code --- youtube_dl/extractor/blendercloud.py | 336 +++++++++++++++------------ youtube_dl/extractor/extractors.py | 5 +- 2 files changed, 192 insertions(+), 149 deletions(-) diff --git a/youtube_dl/extractor/blendercloud.py b/youtube_dl/extractor/blendercloud.py index 527d41a27..9da745121 100644 --- a/youtube_dl/extractor/blendercloud.py +++ b/youtube_dl/extractor/blendercloud.py @@ -4,8 +4,60 @@ from .common import InfoExtractor import re -class BlenderCloudIE(InfoExtractor): - _VALID_URL = r'https?://cloud\.blender\.org/[^/]+/(?P[0-9a-z-]+)/(?P[0-9a-z]+)?' +class BlenderCloudBaseIE(InfoExtractor): + # A video on the Blender Cloud site is referenced by a single alphanumeric node, + # i.e. '56041550044a2a00d0d7e068' + # + # The data we want for any given node ID can be fetched at: + url_node = "https://cloud.blender.org/nodes/%s/view" + + # TODO: Add authentication scheme for subscriber-only videos. + # + # This will require the use of a (paid) Blender ID token available from: + # https://store.blender.org/product/membership/ + # + # For now - ignore any subscriber-only videos and just grab the public ones. + warning_subscribers_only = 'Only available to Blender Cloud subscribers.' + warning_no_video_sources = 'No video sources available.' + + def get_node_title(self, source): + node_title = None + node_title = self._html_search_regex( + r'(.*?)', source, 'title').strip() + #print "BlenderCloudBaseIE : get_node_title : node_title : %s" % node_title + return node_title + + def get_webpage_title(self, source): + webpage_title = None + webpage_title = self._html_search_regex( + r'(.*?)', source, 'title').strip() + #print "BlenderCloudBaseIE : get_webpage_title : webpage_title : %s" % webpage_title + return webpage_title + + @staticmethod + def is_video_subscriber_only(source): + errmsg_subscribers_only = 'Only available to Blender Cloud subscribers.' + return True if errmsg_subscribers_only in source else False + + @staticmethod + def get_video_formats(source): + video_formats = [] + for video in re.findall(r'[0-9a-z-]+)/(?P[0-9a-z]+)/?' _TESTS = [ { @@ -61,8 +113,106 @@ class BlenderCloudIE(InfoExtractor): ], }, { - # Playlist - 'url': 'https://cloud.blender.org/p/blenderella/', + # Playlist (subsection) + 'url': 'https://cloud.blender.org/p/creature-factory-2/5604151f044a2a00caa7b04b', + 'info_dict': { + 'id': '5604151f044a2a00caa7b04b', + 'title': '01 - First steps', + }, + 'playlist': [ + { + 'info_dict': { + 'id': '5604151f044a2a00caa7b04c', + 'display_id': 'creature-factory-2', + 'ext': 'mp4', + 'title': 'Introduction', + }, + }, + ], + 'expected_warnings': [ + 'Only available to Blender Cloud subscribers.' + ], + }, + ] + + def _real_extract(self, url): + #print "BlenderCloudIE : _real_extract : %s" % url + + mobj = re.match(self._VALID_URL, url) + base_node_id = mobj.group('base_node_id') + display_id = mobj.group('display_id') + #print "BlenderCloudIE : _real_extract : base_node_id : %s" % base_node_id + #print "BlenderCloudIE : _real_extract : display_id : %s" % display_id + + # extract a single video -or- a playlist of subsection videos + + webpage = self._download_webpage(self.url_node % base_node_id, base_node_id) + + if '
' in webpage: + # this base node references a single video (i.e. a single node) + + title = None + formats = [] + + if self.is_video_subscriber_only(webpage): + self.report_warning('%s - %s' % (base_node_id, self.warning_subscribers_only)) + else: + title = self.get_node_title(webpage) + formats = self.get_video_formats(webpage) + #self._check_formats(formats, base_node_id) + self._sort_formats(formats) + + return { + 'id': base_node_id, + 'display_id': display_id, + 'title': title, + 'formats': formats, + } + elif '
' in webpage: + # this base node references a playlist of subsection videos (i.e. multiple nodes) + + entries = [] + for node_id in re.findall(r'data-node_id=\"([0-9a-z]+)\"\s*title=\"', webpage): + #print "BlenderCloudIE : _real_extract : node_id : %s" % node_id + + webpage_node = self._download_webpage(self.url_node % node_id, node_id) + + if '
' in webpage_node: + if self.is_video_subscriber_only(webpage_node): + self.report_warning('%s - %s' % (node_id, self.warning_subscribers_only)) + else: + title = self.get_node_title(webpage_node) + formats = self.get_video_formats(webpage_node) + #self._check_formats(formats, node_id) + self._sort_formats(formats) + entries.append({ + 'id': node_id, + 'display_id': display_id, + 'title': title, + 'formats': formats, + }) + else: + self.report_warning('%s - %s' % (node_id, warning_no_video_sources)) + + #print "BlenderCloudIE : _real_extract : entries : %s" % entries + return self.playlist_result(entries, playlist_id=base_node_id, playlist_title=self.get_node_title(webpage)) + else: + self.report_warning('%s - %s' % (base_node_id, self.warning_no_video_sources)) + return { + 'id': base_node_id, + 'display_id': display_id, + 'title': None, + 'formats': [], + } + + +class BlenderCloudPlaylistIE(BlenderCloudBaseIE): + _VALID_URL = r'https?://cloud\.blender\.org/[^/]+/(?P[0-9a-z-]+)/?$' + + _TESTS = [ + { + # Playlist (complete) + 'url': 'https://cloud.blender.org/p/blenderella', 'info_dict': { 'id': 'blenderella', 'title': 'Learn Character Modeling — Blender Cloud', @@ -84,174 +234,64 @@ class BlenderCloudIE(InfoExtractor): }, ] - def get_node_title(self, source): - node_title = None - node_title = self._html_search_regex( - r'(.*?)', source, 'title').strip() - #print "BlenderCloudIE : get_node_title : node_title : %s" % node_title - return node_title - - def get_webpage_title(self, source): - webpage_title = None - webpage_title = self._html_search_regex( - r'(.*?)', source, 'title').strip() - #print "BlenderCloudIE : get_webpage_title : webpage_title : %s" % webpage_title - return webpage_title - - @staticmethod - def is_video_subscriber_only(source): - errmsg_subscribers_only = 'Only available to Blender Cloud subscribers.' - return True if errmsg_subscribers_only in source else False - - @staticmethod - def get_video_formats(source): - video_formats = [] - for video in re.findall(r'" in webpage: + if '
' in webpage_node: # this node references a single video (i.e. a single node) - title = None - formats = [] - - if self.is_video_subscriber_only(webpage): - self.report_warning('%s - %s' % (base_node_id, warning_subscribers_only)) + if self.is_video_subscriber_only(webpage_node): + self.report_warning('%s - %s' % (node_id, self.warning_subscribers_only)) else: - title = self.get_node_title(webpage) - formats = self.get_video_formats(webpage) - self._check_formats(formats, base_node_id) + title = self.get_node_title(webpage_node) + formats = self.get_video_formats(webpage_node) + #self._check_formats(formats, node_id) self._sort_formats(formats) - - return { - 'id': base_node_id, - 'display_id': display_id, - 'title': title, - 'formats': formats, - } - elif "
" in webpage: + entries.append({ + 'id': node_id, + 'display_id': display_id, + 'title': title, + 'formats': formats, + }) + elif '
' in webpage_node: # this node references a playlist of subsection videos (i.e. multiple nodes) - entries = [] - for node_id in re.findall(r'data-node_id=\"([0-9a-z]+)\"\s*title=\"', webpage): - #print "BlenderCloudIE : _real_extract : node_id : %s" % node_id + for sub_node_id in re.findall(r'data-node_id=\"([0-9a-z]+)\"\s*title=\"', webpage_node): + #print "BlenderCloudPlaylistIE : _real_extract : sub_node_id : %s" % sub_node_id - webpage_node = self._download_webpage(url_node % node_id, node_id) + webpage_sub_node = self._download_webpage(self.url_node % sub_node_id, sub_node_id) - if "
" in webpage_node: - if self.is_video_subscriber_only(webpage_node): - self.report_warning('%s - %s' % (node_id, warning_subscribers_only)) + if '
' in webpage_sub_node: + if self.is_video_subscriber_only(webpage_sub_node): + self.report_warning('%s - %s' % (sub_node_id, self.warning_subscribers_only)) else: - title = self.get_node_title(webpage_node) - formats = self.get_video_formats(webpage_node) - self._check_formats(formats, node_id) + title = self.get_node_title(webpage_sub_node) + formats = self.get_video_formats(webpage_sub_node) + #self._check_formats(formats, sub_node_id) self._sort_formats(formats) - entries.append({ - 'id': node_id, + 'id': sub_node_id, 'display_id': display_id, 'title': title, 'formats': formats, }) else: - self.report_warning('%s - %s' % (node_id, warning_no_video_sources)) - - #print "BlenderCloudIE : _real_extract : entries : %s" % entries - return self.playlist_result(entries, playlist_id=base_node_id, playlist_title=self.get_node_title(webpage)) + self.report_warning('%s - %s' % (sub_node_id, self.warning_no_video_sources)) else: - self.report_warning('%s - %s' % (base_node_id, warning_no_video_sources)) - return { - 'id': base_node_id, - 'display_id': display_id, - 'title': None, - 'formats': [], - } - else: - # extract the entire playlist for an entire video section + self.report_warning('%s - %s' % (node_id, self.warning_no_video_sources)) - webpage = self._download_webpage(url, display_id) - - entries = [] - for node_id in re.findall(r'data-node_id=\"([0-9a-z]+)\"\s*class=\"', webpage): - #print "BlenderCloudIE : _real_extract : node_id : %s" % node_id - - webpage_node = self._download_webpage(url_node % node_id, node_id) - - if "
" in webpage_node: - # this node references a single video (i.e. a single node) - - title = None - formats = [] - - if self.is_video_subscriber_only(webpage_node): - self.report_warning('%s - %s' % (node_id, warning_subscribers_only)) - else: - title = self.get_node_title(webpage_node) - formats = self.get_video_formats(webpage_node) - self._check_formats(formats, node_id) - self._sort_formats(formats) - - entries.append({ - 'id': node_id, - 'display_id': display_id, - 'title': title, - 'formats': formats, - }) - elif "
" in webpage_node: - # this node references a playlist of subsection videos (i.e. multiple nodes) - - for sub_node_id in re.findall(r'data-node_id=\"([0-9a-z]+)\"\s*title=\"', webpage_node): - #print "BlenderCloudIE : _real_extract : sub_node_id : %s" % sub_node_id - - webpage_sub_node = self._download_webpage(url_node % sub_node_id, sub_node_id) - - if "
" in webpage_sub_node: - if self.is_video_subscriber_only(webpage_sub_node): - self.report_warning('%s - %s' % (sub_node_id, warning_subscribers_only)) - else: - title = self.get_node_title(webpage_sub_node) - formats = self.get_video_formats(webpage_sub_node) - self._check_formats(formats, sub_node_id) - self._sort_formats(formats) - - entries.append({ - 'id': sub_node_id, - 'display_id': display_id, - 'title': title, - 'formats': formats, - }) - else: - self.report_warning('%s - %s' % (sub_node_id, warning_no_video_sources)) - else: - self.report_warning('%s - %s' % (node_id, warning_no_video_sources)) - - return self.playlist_result(entries, playlist_id=display_id, playlist_title=self.get_webpage_title(webpage)) + return self.playlist_result(entries, playlist_id=display_id, playlist_title=self.get_webpage_title(webpage)) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 14597e242..9dd9e6ae2 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -123,7 +123,10 @@ from .bleacherreport import ( BleacherReportIE, BleacherReportCMSIE, ) -from .blendercloud import (BlenderCloudIE) +from .blendercloud import ( + BlenderCloudIE, + BlenderCloudPlaylistIE +) from .blinkx import BlinkxIE from .bloomberg import BloombergIE from .bokecc import BokeCCIE From c441fc4e47e504de962065e9cb533bf26b27ffcd Mon Sep 17 00:00:00 2001 From: Parmjit Virk Date: Wed, 21 Jun 2017 19:39:06 -0500 Subject: [PATCH 4/8] [BlenderCloud] Removed debug code --- youtube_dl/extractor/blendercloud.py | 43 ++-------------------------- 1 file changed, 2 insertions(+), 41 deletions(-) diff --git a/youtube_dl/extractor/blendercloud.py b/youtube_dl/extractor/blendercloud.py index 9da745121..03d553a63 100644 --- a/youtube_dl/extractor/blendercloud.py +++ b/youtube_dl/extractor/blendercloud.py @@ -24,14 +24,12 @@ class BlenderCloudBaseIE(InfoExtractor): node_title = None node_title = self._html_search_regex( r'(.*?)', source, 'title').strip() - #print "BlenderCloudBaseIE : get_node_title : node_title : %s" % node_title return node_title def get_webpage_title(self, source): webpage_title = None webpage_title = self._html_search_regex( r'(.*?)', source, 'title').strip() - #print "BlenderCloudBaseIE : get_webpage_title : webpage_title : %s" % webpage_title return webpage_title @staticmethod @@ -43,8 +41,6 @@ class BlenderCloudBaseIE(InfoExtractor): def get_video_formats(source): video_formats = [] for video in re.findall(r'[0-9a-z-]+)/(?P[0-9a-z]+)/?' - _TESTS = [ { # Single video @@ -136,32 +131,22 @@ class BlenderCloudIE(BlenderCloudBaseIE): ] def _real_extract(self, url): - #print "BlenderCloudIE : _real_extract : %s" % url - + # extract a single video -or- a playlist of subsection videos mobj = re.match(self._VALID_URL, url) base_node_id = mobj.group('base_node_id') display_id = mobj.group('display_id') - #print "BlenderCloudIE : _real_extract : base_node_id : %s" % base_node_id - #print "BlenderCloudIE : _real_extract : display_id : %s" % display_id - - # extract a single video -or- a playlist of subsection videos - webpage = self._download_webpage(self.url_node % base_node_id, base_node_id) if '
' in webpage: # this base node references a single video (i.e. a single node) - title = None formats = [] - if self.is_video_subscriber_only(webpage): self.report_warning('%s - %s' % (base_node_id, self.warning_subscribers_only)) else: title = self.get_node_title(webpage) formats = self.get_video_formats(webpage) - #self._check_formats(formats, base_node_id) self._sort_formats(formats) - return { 'id': base_node_id, 'display_id': display_id, @@ -170,20 +155,15 @@ class BlenderCloudIE(BlenderCloudBaseIE): } elif '
' in webpage: # this base node references a playlist of subsection videos (i.e. multiple nodes) - entries = [] for node_id in re.findall(r'data-node_id=\"([0-9a-z]+)\"\s*title=\"', webpage): - #print "BlenderCloudIE : _real_extract : node_id : %s" % node_id - webpage_node = self._download_webpage(self.url_node % node_id, node_id) - if '
' in webpage_node: if self.is_video_subscriber_only(webpage_node): self.report_warning('%s - %s' % (node_id, self.warning_subscribers_only)) else: title = self.get_node_title(webpage_node) formats = self.get_video_formats(webpage_node) - #self._check_formats(formats, node_id) self._sort_formats(formats) entries.append({ 'id': node_id, @@ -193,8 +173,6 @@ class BlenderCloudIE(BlenderCloudBaseIE): }) else: self.report_warning('%s - %s' % (node_id, warning_no_video_sources)) - - #print "BlenderCloudIE : _real_extract : entries : %s" % entries return self.playlist_result(entries, playlist_id=base_node_id, playlist_title=self.get_node_title(webpage)) else: self.report_warning('%s - %s' % (base_node_id, self.warning_no_video_sources)) @@ -208,7 +186,6 @@ class BlenderCloudIE(BlenderCloudBaseIE): class BlenderCloudPlaylistIE(BlenderCloudBaseIE): _VALID_URL = r'https?://cloud\.blender\.org/[^/]+/(?P[0-9a-z-]+)/?$' - _TESTS = [ { # Playlist (complete) @@ -235,31 +212,21 @@ class BlenderCloudPlaylistIE(BlenderCloudBaseIE): ] def _real_extract(self, url): - #print "BlenderCloudPlaylistIE : _real_extract : %s" % url - + # extract the complete playlist for an entire video section mobj = re.match(self._VALID_URL, url) display_id = mobj.group('display_id') - #print "BlenderCloudPlaylistIE : _real_extract : display_id : %s" % display_id - - # extract the complete playlist for an entire video section - webpage = self._download_webpage(url, display_id) entries = [] for node_id in re.findall(r'data-node_id=\"([0-9a-z]+)\"\s*class=\"', webpage): - #print "BlenderCloudPlaylistIE : _real_extract : node_id : %s" % node_id - webpage_node = self._download_webpage(self.url_node % node_id, node_id) - if '
' in webpage_node: # this node references a single video (i.e. a single node) - if self.is_video_subscriber_only(webpage_node): self.report_warning('%s - %s' % (node_id, self.warning_subscribers_only)) else: title = self.get_node_title(webpage_node) formats = self.get_video_formats(webpage_node) - #self._check_formats(formats, node_id) self._sort_formats(formats) entries.append({ 'id': node_id, @@ -269,19 +236,14 @@ class BlenderCloudPlaylistIE(BlenderCloudBaseIE): }) elif '
' in webpage_node: # this node references a playlist of subsection videos (i.e. multiple nodes) - for sub_node_id in re.findall(r'data-node_id=\"([0-9a-z]+)\"\s*title=\"', webpage_node): - #print "BlenderCloudPlaylistIE : _real_extract : sub_node_id : %s" % sub_node_id - webpage_sub_node = self._download_webpage(self.url_node % sub_node_id, sub_node_id) - if '
' in webpage_sub_node: if self.is_video_subscriber_only(webpage_sub_node): self.report_warning('%s - %s' % (sub_node_id, self.warning_subscribers_only)) else: title = self.get_node_title(webpage_sub_node) formats = self.get_video_formats(webpage_sub_node) - #self._check_formats(formats, sub_node_id) self._sort_formats(formats) entries.append({ 'id': sub_node_id, @@ -293,5 +255,4 @@ class BlenderCloudPlaylistIE(BlenderCloudBaseIE): self.report_warning('%s - %s' % (sub_node_id, self.warning_no_video_sources)) else: self.report_warning('%s - %s' % (node_id, self.warning_no_video_sources)) - return self.playlist_result(entries, playlist_id=display_id, playlist_title=self.get_webpage_title(webpage)) From 406dabbe3097915d7b2c5bfeef88e4e476871b34 Mon Sep 17 00:00:00 2001 From: Parmjit Virk Date: Thu, 22 Jun 2017 05:58:01 -0500 Subject: [PATCH 5/8] [BlenderCloud] Updated tests --- youtube_dl/extractor/blendercloud.py | 56 ++++++++++++++++++++++++++++ 1 file changed, 56 insertions(+) diff --git a/youtube_dl/extractor/blendercloud.py b/youtube_dl/extractor/blendercloud.py index 03d553a63..542640225 100644 --- a/youtube_dl/extractor/blendercloud.py +++ b/youtube_dl/extractor/blendercloud.py @@ -209,6 +209,62 @@ class BlenderCloudPlaylistIE(BlenderCloudBaseIE): 'No video sources available.' ], }, + { + # Playlist (complete) + 'url': 'https://cloud.blender.org/p/blender-inside-out/', + 'info_dict': { + 'id': 'blender-inside-out', + 'title': 'From Maya and Max to Blender — Blender Cloud', + }, + 'playlist': [ + { + 'info_dict': { + 'id': '560414b7044a2a00c4a6da99', + 'display_id': 'blender-inside-out', + 'ext': 'mp4', + 'title': 'Interface Tour', + }, + }, + { + 'info_dict': { + 'id': '560414b7044a2a00c4a6da9d', + 'display_id': 'blender-inside-out', + 'ext': 'mp4', + 'title': 'Search', + }, + }, + { + 'info_dict': { + 'id': '560414b7044a2a00c4a6da9b', + 'display_id': 'blender-inside-out', + 'ext': 'mp4', + 'title': 'Selection Tools', + }, + }, + { + 'info_dict': { + 'id': '560414b7044a2a00c4a6da9c', + 'display_id': 'blender-inside-out', + 'ext': 'mp4', + 'title': 'Transform Tools', + }, + }, + { + 'info_dict': { + 'id': '560414b7044a2a00c4a6da9a', + 'display_id': 'blender-inside-out', + 'ext': 'mp4', + 'title': 'Viewport Navigation', + }, + }, + ], + 'expected_warnings': [ + 'No video sources available.' + ], + 'params': { + 'only_matching': True, + }, + }, ] def _real_extract(self, url): From b6c11671bcd7dcfb1e0abf45610c3395e44dc274 Mon Sep 17 00:00:00 2001 From: Parmjit Virk Date: Thu, 22 Jun 2017 06:01:31 -0500 Subject: [PATCH 6/8] [BlenderCloud] flake8 corrections --- youtube_dl/extractor/blendercloud.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/blendercloud.py b/youtube_dl/extractor/blendercloud.py index 542640225..fe2b3e9a1 100644 --- a/youtube_dl/extractor/blendercloud.py +++ b/youtube_dl/extractor/blendercloud.py @@ -10,7 +10,7 @@ class BlenderCloudBaseIE(InfoExtractor): # # The data we want for any given node ID can be fetched at: url_node = "https://cloud.blender.org/nodes/%s/view" - + # TODO: Add authentication scheme for subscriber-only videos. # # This will require the use of a (paid) Blender ID token available from: @@ -172,7 +172,7 @@ class BlenderCloudIE(BlenderCloudBaseIE): 'formats': formats, }) else: - self.report_warning('%s - %s' % (node_id, warning_no_video_sources)) + self.report_warning('%s - %s' % (node_id, self.warning_no_video_sources)) return self.playlist_result(entries, playlist_id=base_node_id, playlist_title=self.get_node_title(webpage)) else: self.report_warning('%s - %s' % (base_node_id, self.warning_no_video_sources)) From 2e0eb369f597ea70b26f024a3aea5f2084e4bb62 Mon Sep 17 00:00:00 2001 From: Parmjit Virk Date: Sat, 8 Jul 2017 16:56:49 -0500 Subject: [PATCH 7/8] [BlenderCloud] Removed redundant methods --- youtube_dl/extractor/blendercloud.py | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/blendercloud.py b/youtube_dl/extractor/blendercloud.py index fe2b3e9a1..5314c5afc 100644 --- a/youtube_dl/extractor/blendercloud.py +++ b/youtube_dl/extractor/blendercloud.py @@ -26,12 +26,6 @@ class BlenderCloudBaseIE(InfoExtractor): r'(.*?)', source, 'title').strip() return node_title - def get_webpage_title(self, source): - webpage_title = None - webpage_title = self._html_search_regex( - r'(.*?)', source, 'title').strip() - return webpage_title - @staticmethod def is_video_subscriber_only(source): errmsg_subscribers_only = 'Only available to Blender Cloud subscribers.' @@ -272,6 +266,7 @@ class BlenderCloudPlaylistIE(BlenderCloudBaseIE): mobj = re.match(self._VALID_URL, url) display_id = mobj.group('display_id') webpage = self._download_webpage(url, display_id) + webpage_title = self._html_search_regex(r'(.*?)', webpage, 'title').strip() or None entries = [] for node_id in re.findall(r'data-node_id=\"([0-9a-z]+)\"\s*class=\"', webpage): @@ -311,4 +306,4 @@ class BlenderCloudPlaylistIE(BlenderCloudBaseIE): self.report_warning('%s - %s' % (sub_node_id, self.warning_no_video_sources)) else: self.report_warning('%s - %s' % (node_id, self.warning_no_video_sources)) - return self.playlist_result(entries, playlist_id=display_id, playlist_title=self.get_webpage_title(webpage)) + return self.playlist_result(entries, playlist_id=display_id, playlist_title=webpage_title) From 000892ccc89a7f0d6be7b95883cb41d5e627a43d Mon Sep 17 00:00:00 2001 From: Parmjit Virk Date: Sat, 2 Sep 2017 20:55:20 -0500 Subject: [PATCH 8/8] [BlenderCloud] Refactored code --- youtube_dl/extractor/blendercloud.py | 171 +++++++++++++-------------- 1 file changed, 81 insertions(+), 90 deletions(-) diff --git a/youtube_dl/extractor/blendercloud.py b/youtube_dl/extractor/blendercloud.py index 5314c5afc..56e74242e 100644 --- a/youtube_dl/extractor/blendercloud.py +++ b/youtube_dl/extractor/blendercloud.py @@ -17,19 +17,61 @@ class BlenderCloudBaseIE(InfoExtractor): # https://store.blender.org/product/membership/ # # For now - ignore any subscriber-only videos and just grab the public ones. - warning_subscribers_only = 'Only available to Blender Cloud subscribers.' - warning_no_video_sources = 'No video sources available.' + + def notify_for(self, node_id, notify_type): + notify_message = None + if notify_type == 'subscribers_only': + notify_message = 'Only available to Blender Cloud subscribers.' + elif notify_type == 'no_video_sources_available': + notify_message = 'No video sources available.' + else: + return None + self.report_warning('%s - %s' % (node_id, notify_message)) def get_node_title(self, source): - node_title = None - node_title = self._html_search_regex( - r'(.*?)', source, 'title').strip() - return node_title + return self._html_search_regex( + r'(.*?)', source, 'title').strip() + + def get_video_single(self, node_id, source): + video_title = None + video_formats = [] + if self.is_video(source, 'subscribers_only'): + self.notify_for(node_id, 'subscribers_only') + else: + video_title = self.get_node_title(source) + video_formats = self.get_video_formats(source) + self._sort_formats(video_formats) + return video_title, video_formats + + def get_video_playlist(self, display_id, source): + entries = [] + for node_id in re.findall(r'data-node_id=\"([0-9a-z]+)\"\s*title=\"', source): + webpage_node = self._download_webpage(self.url_node % node_id, node_id) + if self.is_video(webpage_node, 'single'): + title, formats = self.get_video_single(node_id, webpage_node) + if title is not None: + entries.append({ + 'id': node_id, + 'display_id': display_id, + 'title': title, + 'formats': formats, + }) + else: + self.notify_for(node_id, 'no_video_sources_available') + return entries @staticmethod - def is_video_subscriber_only(source): - errmsg_subscribers_only = 'Only available to Blender Cloud subscribers.' - return True if errmsg_subscribers_only in source else False + def is_video(source, check_for): + tag = None + if check_for == 'subscribers_only': + tag = 'Only available to Blender Cloud subscribers.' + elif check_for == 'single': + tag = '
' + elif check_for == 'playlist': + tag = '
' + else: + return False + return True if tag in source else False @staticmethod def get_video_formats(source): @@ -47,7 +89,7 @@ class BlenderCloudBaseIE(InfoExtractor): class BlenderCloudIE(BlenderCloudBaseIE): - _VALID_URL = r'https?://cloud\.blender\.org/[^/]+/(?P[0-9a-z-]+)/(?P[0-9a-z]+)/?' + _VALID_URL = r'https?://cloud\.blender\.org/[^/]+/(?P[0-9a-z-]+)/(?P[0-9a-z]+)/?' _TESTS = [ { # Single video @@ -125,57 +167,27 @@ class BlenderCloudIE(BlenderCloudBaseIE): ] def _real_extract(self, url): - # extract a single video -or- a playlist of subsection videos + # extract a single video, or a playlist of subsection videos mobj = re.match(self._VALID_URL, url) - base_node_id = mobj.group('base_node_id') + node_id = mobj.group('node_id') display_id = mobj.group('display_id') - webpage = self._download_webpage(self.url_node % base_node_id, base_node_id) - - if '
' in webpage: - # this base node references a single video (i.e. a single node) - title = None - formats = [] - if self.is_video_subscriber_only(webpage): - self.report_warning('%s - %s' % (base_node_id, self.warning_subscribers_only)) - else: - title = self.get_node_title(webpage) - formats = self.get_video_formats(webpage) - self._sort_formats(formats) - return { - 'id': base_node_id, - 'display_id': display_id, - 'title': title, - 'formats': formats, - } - elif '
' in webpage: - # this base node references a playlist of subsection videos (i.e. multiple nodes) - entries = [] - for node_id in re.findall(r'data-node_id=\"([0-9a-z]+)\"\s*title=\"', webpage): - webpage_node = self._download_webpage(self.url_node % node_id, node_id) - if '
' in webpage_node: - if self.is_video_subscriber_only(webpage_node): - self.report_warning('%s - %s' % (node_id, self.warning_subscribers_only)) - else: - title = self.get_node_title(webpage_node) - formats = self.get_video_formats(webpage_node) - self._sort_formats(formats) - entries.append({ - 'id': node_id, - 'display_id': display_id, - 'title': title, - 'formats': formats, - }) - else: - self.report_warning('%s - %s' % (node_id, self.warning_no_video_sources)) - return self.playlist_result(entries, playlist_id=base_node_id, playlist_title=self.get_node_title(webpage)) + webpage = self._download_webpage(self.url_node % node_id, node_id) + title = None + formats = [] + if self.is_video(webpage, 'single'): + title, formats = self.get_video_single(node_id, webpage) + elif self.is_video(webpage, 'playlist'): + entries = self.get_video_playlist(display_id, webpage) + return self.playlist_result( + entries, playlist_id=node_id, playlist_title=self.get_node_title(webpage)) else: - self.report_warning('%s - %s' % (base_node_id, self.warning_no_video_sources)) - return { - 'id': base_node_id, - 'display_id': display_id, - 'title': None, - 'formats': [], - } + self.notify_for(node_id, 'no_video_sources_available') + return { + 'id': node_id, + 'display_id': display_id, + 'title': title, + 'formats': formats, + } class BlenderCloudPlaylistIE(BlenderCloudBaseIE): @@ -266,44 +278,23 @@ class BlenderCloudPlaylistIE(BlenderCloudBaseIE): mobj = re.match(self._VALID_URL, url) display_id = mobj.group('display_id') webpage = self._download_webpage(url, display_id) - webpage_title = self._html_search_regex(r'(.*?)', webpage, 'title').strip() or None - entries = [] - for node_id in re.findall(r'data-node_id=\"([0-9a-z]+)\"\s*class=\"', webpage): + for node_id in re.findall(r'data-node_id=\"([0-9a-z]+)\"', webpage): webpage_node = self._download_webpage(self.url_node % node_id, node_id) - if '
' in webpage_node: - # this node references a single video (i.e. a single node) - if self.is_video_subscriber_only(webpage_node): - self.report_warning('%s - %s' % (node_id, self.warning_subscribers_only)) - else: - title = self.get_node_title(webpage_node) - formats = self.get_video_formats(webpage_node) - self._sort_formats(formats) + if self.is_video(webpage_node, 'single'): + title, formats = self.get_video_single(node_id, webpage_node) + if title is not None: entries.append({ 'id': node_id, 'display_id': display_id, 'title': title, 'formats': formats, }) - elif '
' in webpage_node: - # this node references a playlist of subsection videos (i.e. multiple nodes) - for sub_node_id in re.findall(r'data-node_id=\"([0-9a-z]+)\"\s*title=\"', webpage_node): - webpage_sub_node = self._download_webpage(self.url_node % sub_node_id, sub_node_id) - if '
' in webpage_sub_node: - if self.is_video_subscriber_only(webpage_sub_node): - self.report_warning('%s - %s' % (sub_node_id, self.warning_subscribers_only)) - else: - title = self.get_node_title(webpage_sub_node) - formats = self.get_video_formats(webpage_sub_node) - self._sort_formats(formats) - entries.append({ - 'id': sub_node_id, - 'display_id': display_id, - 'title': title, - 'formats': formats, - }) - else: - self.report_warning('%s - %s' % (sub_node_id, self.warning_no_video_sources)) + elif self.is_video(webpage_node, 'playlist'): + entries = self.get_video_playlist(display_id, webpage_node) else: - self.report_warning('%s - %s' % (node_id, self.warning_no_video_sources)) - return self.playlist_result(entries, playlist_id=display_id, playlist_title=webpage_title) + self.notify_for(node_id, 'no_video_sources_available') + return self.playlist_result( + entries, playlist_id=display_id, + playlist_title=self._html_search_regex( + r'(.*?)', webpage, 'title').strip())