[BlenderCloud] refactored code
This commit is contained in:
parent
b4e7c451c3
commit
10b77f4a42
@ -4,8 +4,60 @@ from .common import InfoExtractor
|
|||||||
import re
|
import re
|
||||||
|
|
||||||
|
|
||||||
class BlenderCloudIE(InfoExtractor):
|
class BlenderCloudBaseIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://cloud\.blender\.org/[^/]+/(?P<display_id>[0-9a-z-]+)/(?P<base_node_id>[0-9a-z]+)?'
|
# A video on the Blender Cloud site is referenced by a single alphanumeric node,
|
||||||
|
# i.e. '56041550044a2a00d0d7e068'
|
||||||
|
#
|
||||||
|
# The data we want for any given node ID can be fetched at:
|
||||||
|
url_node = "https://cloud.blender.org/nodes/%s/view"
|
||||||
|
|
||||||
|
# TODO: Add authentication scheme for subscriber-only videos.
|
||||||
|
#
|
||||||
|
# This will require the use of a (paid) Blender ID token available from:
|
||||||
|
# https://store.blender.org/product/membership/
|
||||||
|
#
|
||||||
|
# For now - ignore any subscriber-only videos and just grab the public ones.
|
||||||
|
warning_subscribers_only = 'Only available to Blender Cloud subscribers.'
|
||||||
|
warning_no_video_sources = 'No video sources available.'
|
||||||
|
|
||||||
|
def get_node_title(self, source):
|
||||||
|
node_title = None
|
||||||
|
node_title = self._html_search_regex(
|
||||||
|
r'<div\s*id=\"node-title\"\s*class=\"node-title\">(.*?)</div>', source, 'title').strip()
|
||||||
|
#print "BlenderCloudBaseIE : get_node_title : node_title : %s" % node_title
|
||||||
|
return node_title
|
||||||
|
|
||||||
|
def get_webpage_title(self, source):
|
||||||
|
webpage_title = None
|
||||||
|
webpage_title = self._html_search_regex(
|
||||||
|
r'<title>(.*?)</title>', source, 'title').strip()
|
||||||
|
#print "BlenderCloudBaseIE : get_webpage_title : webpage_title : %s" % webpage_title
|
||||||
|
return webpage_title
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def is_video_subscriber_only(source):
|
||||||
|
errmsg_subscribers_only = 'Only available to Blender Cloud subscribers.'
|
||||||
|
return True if errmsg_subscribers_only in source else False
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def get_video_formats(source):
|
||||||
|
video_formats = []
|
||||||
|
for video in re.findall(r'<source\s*src=\"(.*?)\"\s*type="video/(.*?)"', source):
|
||||||
|
#print "BlenderCloudBaseIE : get_video_formats : video : %s" % video[0]
|
||||||
|
#print "BlenderCloudBaseIE : get_video_formats : video_type : %s" % video[1]
|
||||||
|
video_url = video[0].replace('&', '&')
|
||||||
|
video_format_id = video[1].upper()
|
||||||
|
fmt = {
|
||||||
|
'url': video_url,
|
||||||
|
'format_id': video_format_id,
|
||||||
|
'quality': 2 if video_format_id == 'MP4' else 1,
|
||||||
|
}
|
||||||
|
video_formats.append(fmt)
|
||||||
|
return video_formats
|
||||||
|
|
||||||
|
|
||||||
|
class BlenderCloudIE(BlenderCloudBaseIE):
|
||||||
|
_VALID_URL = r'https?://cloud\.blender\.org/[^/]+/(?P<display_id>[0-9a-z-]+)/(?P<base_node_id>[0-9a-z]+)/?'
|
||||||
|
|
||||||
_TESTS = [
|
_TESTS = [
|
||||||
{
|
{
|
||||||
@ -61,8 +113,106 @@ class BlenderCloudIE(InfoExtractor):
|
|||||||
],
|
],
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
# Playlist
|
# Playlist (subsection)
|
||||||
'url': 'https://cloud.blender.org/p/blenderella/',
|
'url': 'https://cloud.blender.org/p/creature-factory-2/5604151f044a2a00caa7b04b',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '5604151f044a2a00caa7b04b',
|
||||||
|
'title': '01 - First steps',
|
||||||
|
},
|
||||||
|
'playlist': [
|
||||||
|
{
|
||||||
|
'info_dict': {
|
||||||
|
'id': '5604151f044a2a00caa7b04c',
|
||||||
|
'display_id': 'creature-factory-2',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Introduction',
|
||||||
|
},
|
||||||
|
},
|
||||||
|
],
|
||||||
|
'expected_warnings': [
|
||||||
|
'Only available to Blender Cloud subscribers.'
|
||||||
|
],
|
||||||
|
},
|
||||||
|
]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
#print "BlenderCloudIE : _real_extract : %s" % url
|
||||||
|
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
base_node_id = mobj.group('base_node_id')
|
||||||
|
display_id = mobj.group('display_id')
|
||||||
|
#print "BlenderCloudIE : _real_extract : base_node_id : %s" % base_node_id
|
||||||
|
#print "BlenderCloudIE : _real_extract : display_id : %s" % display_id
|
||||||
|
|
||||||
|
# extract a single video -or- a playlist of subsection videos
|
||||||
|
|
||||||
|
webpage = self._download_webpage(self.url_node % base_node_id, base_node_id)
|
||||||
|
|
||||||
|
if '<section class="node-preview video">' in webpage:
|
||||||
|
# this base node references a single video (i.e. a single node)
|
||||||
|
|
||||||
|
title = None
|
||||||
|
formats = []
|
||||||
|
|
||||||
|
if self.is_video_subscriber_only(webpage):
|
||||||
|
self.report_warning('%s - %s' % (base_node_id, self.warning_subscribers_only))
|
||||||
|
else:
|
||||||
|
title = self.get_node_title(webpage)
|
||||||
|
formats = self.get_video_formats(webpage)
|
||||||
|
#self._check_formats(formats, base_node_id)
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': base_node_id,
|
||||||
|
'display_id': display_id,
|
||||||
|
'title': title,
|
||||||
|
'formats': formats,
|
||||||
|
}
|
||||||
|
elif '<section class="node-preview group">' in webpage:
|
||||||
|
# this base node references a playlist of subsection videos (i.e. multiple nodes)
|
||||||
|
|
||||||
|
entries = []
|
||||||
|
for node_id in re.findall(r'data-node_id=\"([0-9a-z]+)\"\s*title=\"', webpage):
|
||||||
|
#print "BlenderCloudIE : _real_extract : node_id : %s" % node_id
|
||||||
|
|
||||||
|
webpage_node = self._download_webpage(self.url_node % node_id, node_id)
|
||||||
|
|
||||||
|
if '<section class="node-preview video">' in webpage_node:
|
||||||
|
if self.is_video_subscriber_only(webpage_node):
|
||||||
|
self.report_warning('%s - %s' % (node_id, self.warning_subscribers_only))
|
||||||
|
else:
|
||||||
|
title = self.get_node_title(webpage_node)
|
||||||
|
formats = self.get_video_formats(webpage_node)
|
||||||
|
#self._check_formats(formats, node_id)
|
||||||
|
self._sort_formats(formats)
|
||||||
|
entries.append({
|
||||||
|
'id': node_id,
|
||||||
|
'display_id': display_id,
|
||||||
|
'title': title,
|
||||||
|
'formats': formats,
|
||||||
|
})
|
||||||
|
else:
|
||||||
|
self.report_warning('%s - %s' % (node_id, warning_no_video_sources))
|
||||||
|
|
||||||
|
#print "BlenderCloudIE : _real_extract : entries : %s" % entries
|
||||||
|
return self.playlist_result(entries, playlist_id=base_node_id, playlist_title=self.get_node_title(webpage))
|
||||||
|
else:
|
||||||
|
self.report_warning('%s - %s' % (base_node_id, self.warning_no_video_sources))
|
||||||
|
return {
|
||||||
|
'id': base_node_id,
|
||||||
|
'display_id': display_id,
|
||||||
|
'title': None,
|
||||||
|
'formats': [],
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class BlenderCloudPlaylistIE(BlenderCloudBaseIE):
|
||||||
|
_VALID_URL = r'https?://cloud\.blender\.org/[^/]+/(?P<display_id>[0-9a-z-]+)/?$'
|
||||||
|
|
||||||
|
_TESTS = [
|
||||||
|
{
|
||||||
|
# Playlist (complete)
|
||||||
|
'url': 'https://cloud.blender.org/p/blenderella',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'blenderella',
|
'id': 'blenderella',
|
||||||
'title': 'Learn Character Modeling — Blender Cloud',
|
'title': 'Learn Character Modeling — Blender Cloud',
|
||||||
@ -84,165 +234,55 @@ class BlenderCloudIE(InfoExtractor):
|
|||||||
},
|
},
|
||||||
]
|
]
|
||||||
|
|
||||||
def get_node_title(self, source):
|
|
||||||
node_title = None
|
|
||||||
node_title = self._html_search_regex(
|
|
||||||
r'<div\s*id=\"node-title\"\s*class=\"node-title\">(.*?)</div>', source, 'title').strip()
|
|
||||||
#print "BlenderCloudIE : get_node_title : node_title : %s" % node_title
|
|
||||||
return node_title
|
|
||||||
|
|
||||||
def get_webpage_title(self, source):
|
|
||||||
webpage_title = None
|
|
||||||
webpage_title = self._html_search_regex(
|
|
||||||
r'<title>(.*?)</title>', source, 'title').strip()
|
|
||||||
#print "BlenderCloudIE : get_webpage_title : webpage_title : %s" % webpage_title
|
|
||||||
return webpage_title
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def is_video_subscriber_only(source):
|
|
||||||
errmsg_subscribers_only = 'Only available to Blender Cloud subscribers.'
|
|
||||||
return True if errmsg_subscribers_only in source else False
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def get_video_formats(source):
|
|
||||||
video_formats = []
|
|
||||||
for video in re.findall(r'<source\s*src=\"(.*?)\"\s*type="video/(.*?)"', source):
|
|
||||||
#print "BlenderCloudIE : get_video_formats : video : %s" % video[0]
|
|
||||||
#print "BlenderCloudIE : get_video_formats : video_type : %s" % video[1]
|
|
||||||
video_url = video[0].replace('&', '&')
|
|
||||||
video_format_id = video[1].upper()
|
|
||||||
fmt = {
|
|
||||||
'url': video_url,
|
|
||||||
'format_id': video_format_id,
|
|
||||||
'quality': 2 if video_format_id == 'MP4' else 1,
|
|
||||||
}
|
|
||||||
video_formats.append(fmt)
|
|
||||||
return video_formats
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
#print "BlenderCloudIE : _real_extract : %s" % url
|
#print "BlenderCloudPlaylistIE : _real_extract : %s" % url
|
||||||
|
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
base_node_id = mobj.group('base_node_id')
|
|
||||||
display_id = mobj.group('display_id')
|
display_id = mobj.group('display_id')
|
||||||
#print "BlenderCloudIE : _real_extract : base_node_id : %s" % base_node_id
|
#print "BlenderCloudPlaylistIE : _real_extract : display_id : %s" % display_id
|
||||||
#print "BlenderCloudIE : _real_extract : display_id : %s" % display_id
|
|
||||||
|
|
||||||
url_node = "https://cloud.blender.org/nodes/%s/view"
|
# extract the complete playlist for an entire video section
|
||||||
|
|
||||||
warning_subscribers_only = 'Only available to Blender Cloud subscribers.'
|
|
||||||
warning_no_video_sources = 'No video sources available.'
|
|
||||||
|
|
||||||
if base_node_id:
|
|
||||||
# extract a single video -or- a playlist of subsection videos
|
|
||||||
|
|
||||||
webpage = self._download_webpage(url_node % base_node_id, base_node_id)
|
|
||||||
|
|
||||||
if "<section class=\"node-preview video\">" in webpage:
|
|
||||||
# this node references a single video (i.e. a single node)
|
|
||||||
|
|
||||||
title = None
|
|
||||||
formats = []
|
|
||||||
|
|
||||||
if self.is_video_subscriber_only(webpage):
|
|
||||||
self.report_warning('%s - %s' % (base_node_id, warning_subscribers_only))
|
|
||||||
else:
|
|
||||||
title = self.get_node_title(webpage)
|
|
||||||
formats = self.get_video_formats(webpage)
|
|
||||||
self._check_formats(formats, base_node_id)
|
|
||||||
self._sort_formats(formats)
|
|
||||||
|
|
||||||
return {
|
|
||||||
'id': base_node_id,
|
|
||||||
'display_id': display_id,
|
|
||||||
'title': title,
|
|
||||||
'formats': formats,
|
|
||||||
}
|
|
||||||
elif "<section class=\"node-preview group\">" in webpage:
|
|
||||||
# this node references a playlist of subsection videos (i.e. multiple nodes)
|
|
||||||
|
|
||||||
entries = []
|
|
||||||
for node_id in re.findall(r'data-node_id=\"([0-9a-z]+)\"\s*title=\"', webpage):
|
|
||||||
#print "BlenderCloudIE : _real_extract : node_id : %s" % node_id
|
|
||||||
|
|
||||||
webpage_node = self._download_webpage(url_node % node_id, node_id)
|
|
||||||
|
|
||||||
if "<section class=\"node-preview video\">" in webpage_node:
|
|
||||||
if self.is_video_subscriber_only(webpage_node):
|
|
||||||
self.report_warning('%s - %s' % (node_id, warning_subscribers_only))
|
|
||||||
else:
|
|
||||||
title = self.get_node_title(webpage_node)
|
|
||||||
formats = self.get_video_formats(webpage_node)
|
|
||||||
self._check_formats(formats, node_id)
|
|
||||||
self._sort_formats(formats)
|
|
||||||
|
|
||||||
entries.append({
|
|
||||||
'id': node_id,
|
|
||||||
'display_id': display_id,
|
|
||||||
'title': title,
|
|
||||||
'formats': formats,
|
|
||||||
})
|
|
||||||
else:
|
|
||||||
self.report_warning('%s - %s' % (node_id, warning_no_video_sources))
|
|
||||||
|
|
||||||
#print "BlenderCloudIE : _real_extract : entries : %s" % entries
|
|
||||||
return self.playlist_result(entries, playlist_id=base_node_id, playlist_title=self.get_node_title(webpage))
|
|
||||||
else:
|
|
||||||
self.report_warning('%s - %s' % (base_node_id, warning_no_video_sources))
|
|
||||||
return {
|
|
||||||
'id': base_node_id,
|
|
||||||
'display_id': display_id,
|
|
||||||
'title': None,
|
|
||||||
'formats': [],
|
|
||||||
}
|
|
||||||
else:
|
|
||||||
# extract the entire playlist for an entire video section
|
|
||||||
|
|
||||||
webpage = self._download_webpage(url, display_id)
|
webpage = self._download_webpage(url, display_id)
|
||||||
|
|
||||||
entries = []
|
entries = []
|
||||||
for node_id in re.findall(r'data-node_id=\"([0-9a-z]+)\"\s*class=\"', webpage):
|
for node_id in re.findall(r'data-node_id=\"([0-9a-z]+)\"\s*class=\"', webpage):
|
||||||
#print "BlenderCloudIE : _real_extract : node_id : %s" % node_id
|
#print "BlenderCloudPlaylistIE : _real_extract : node_id : %s" % node_id
|
||||||
|
|
||||||
webpage_node = self._download_webpage(url_node % node_id, node_id)
|
webpage_node = self._download_webpage(self.url_node % node_id, node_id)
|
||||||
|
|
||||||
if "<section class=\"node-preview video\">" in webpage_node:
|
if '<section class="node-preview video">' in webpage_node:
|
||||||
# this node references a single video (i.e. a single node)
|
# this node references a single video (i.e. a single node)
|
||||||
|
|
||||||
title = None
|
|
||||||
formats = []
|
|
||||||
|
|
||||||
if self.is_video_subscriber_only(webpage_node):
|
if self.is_video_subscriber_only(webpage_node):
|
||||||
self.report_warning('%s - %s' % (node_id, warning_subscribers_only))
|
self.report_warning('%s - %s' % (node_id, self.warning_subscribers_only))
|
||||||
else:
|
else:
|
||||||
title = self.get_node_title(webpage_node)
|
title = self.get_node_title(webpage_node)
|
||||||
formats = self.get_video_formats(webpage_node)
|
formats = self.get_video_formats(webpage_node)
|
||||||
self._check_formats(formats, node_id)
|
#self._check_formats(formats, node_id)
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
entries.append({
|
entries.append({
|
||||||
'id': node_id,
|
'id': node_id,
|
||||||
'display_id': display_id,
|
'display_id': display_id,
|
||||||
'title': title,
|
'title': title,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
})
|
})
|
||||||
elif "<section class=\"node-preview group\">" in webpage_node:
|
elif '<section class="node-preview group">' in webpage_node:
|
||||||
# this node references a playlist of subsection videos (i.e. multiple nodes)
|
# this node references a playlist of subsection videos (i.e. multiple nodes)
|
||||||
|
|
||||||
for sub_node_id in re.findall(r'data-node_id=\"([0-9a-z]+)\"\s*title=\"', webpage_node):
|
for sub_node_id in re.findall(r'data-node_id=\"([0-9a-z]+)\"\s*title=\"', webpage_node):
|
||||||
#print "BlenderCloudIE : _real_extract : sub_node_id : %s" % sub_node_id
|
#print "BlenderCloudPlaylistIE : _real_extract : sub_node_id : %s" % sub_node_id
|
||||||
|
|
||||||
webpage_sub_node = self._download_webpage(url_node % sub_node_id, sub_node_id)
|
webpage_sub_node = self._download_webpage(self.url_node % sub_node_id, sub_node_id)
|
||||||
|
|
||||||
if "<section class=\"node-preview video\">" in webpage_sub_node:
|
if '<section class="node-preview video">' in webpage_sub_node:
|
||||||
if self.is_video_subscriber_only(webpage_sub_node):
|
if self.is_video_subscriber_only(webpage_sub_node):
|
||||||
self.report_warning('%s - %s' % (sub_node_id, warning_subscribers_only))
|
self.report_warning('%s - %s' % (sub_node_id, self.warning_subscribers_only))
|
||||||
else:
|
else:
|
||||||
title = self.get_node_title(webpage_sub_node)
|
title = self.get_node_title(webpage_sub_node)
|
||||||
formats = self.get_video_formats(webpage_sub_node)
|
formats = self.get_video_formats(webpage_sub_node)
|
||||||
self._check_formats(formats, sub_node_id)
|
#self._check_formats(formats, sub_node_id)
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
entries.append({
|
entries.append({
|
||||||
'id': sub_node_id,
|
'id': sub_node_id,
|
||||||
'display_id': display_id,
|
'display_id': display_id,
|
||||||
@ -250,8 +290,8 @@ class BlenderCloudIE(InfoExtractor):
|
|||||||
'formats': formats,
|
'formats': formats,
|
||||||
})
|
})
|
||||||
else:
|
else:
|
||||||
self.report_warning('%s - %s' % (sub_node_id, warning_no_video_sources))
|
self.report_warning('%s - %s' % (sub_node_id, self.warning_no_video_sources))
|
||||||
else:
|
else:
|
||||||
self.report_warning('%s - %s' % (node_id, warning_no_video_sources))
|
self.report_warning('%s - %s' % (node_id, self.warning_no_video_sources))
|
||||||
|
|
||||||
return self.playlist_result(entries, playlist_id=display_id, playlist_title=self.get_webpage_title(webpage))
|
return self.playlist_result(entries, playlist_id=display_id, playlist_title=self.get_webpage_title(webpage))
|
||||||
|
@ -123,7 +123,10 @@ from .bleacherreport import (
|
|||||||
BleacherReportIE,
|
BleacherReportIE,
|
||||||
BleacherReportCMSIE,
|
BleacherReportCMSIE,
|
||||||
)
|
)
|
||||||
from .blendercloud import (BlenderCloudIE)
|
from .blendercloud import (
|
||||||
|
BlenderCloudIE,
|
||||||
|
BlenderCloudPlaylistIE
|
||||||
|
)
|
||||||
from .blinkx import BlinkxIE
|
from .blinkx import BlinkxIE
|
||||||
from .bloomberg import BloombergIE
|
from .bloomberg import BloombergIE
|
||||||
from .bokecc import BokeCCIE
|
from .bokecc import BokeCCIE
|
||||||
|
Loading…
x
Reference in New Issue
Block a user