From 058e8f25ce591fd1d3b499479077528e55d41182 Mon Sep 17 00:00:00 2001 From: Brian Clinkenbeard Date: Sat, 4 Apr 2020 02:17:24 -0700 Subject: [PATCH 1/6] [yuja] add YuJa extractor --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/yuja.py | 81 ++++++++++++++++++++++++++++++ 2 files changed, 82 insertions(+) create mode 100644 youtube_dl/extractor/yuja.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index ef803b8a7..592b4779b 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1479,6 +1479,7 @@ from .youtube import ( YoutubeUserIE, YoutubeWatchLaterIE, ) +from .yuja import YuJaIE from .zapiks import ZapiksIE from .zaq1 import Zaq1IE from .zattoo import ( diff --git a/youtube_dl/extractor/yuja.py b/youtube_dl/extractor/yuja.py new file mode 100644 index 000000000..f987b1cd8 --- /dev/null +++ b/youtube_dl/extractor/yuja.py @@ -0,0 +1,81 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re +from ..utils import unified_timestamp +from .youtube import YoutubeIE + +from .common import InfoExtractor + + +class YuJaIE(InfoExtractor): + # url needs subdomain and either auth or node + _VALID_URL = r'https?://(?P[a-z0-9]+)\.yuja\.com/V/(?:Watch|Video)\?v=(?P[0-9]+)(?:.*)&a=(?P[0-9]+)' + _TESTS = [{ + 'url': 'https://usm.yuja.com/V/Watch?v=256594&node=1155827&a=0', + 'md5': '4bf9ffa3be86e320c85fcb7fe5918fc3', + 'info_dict': { + 'id': '256594', + 'ext': 'mp4', + 'title': 'Expect To Pursue What Matters', + 'thumbnail': 'https://usm.yuja.com/P/DataPage/BroadcastsThumb/218886', + 'description': '', + 'timestamp': 1542230582, + 'upload_date': '20181114', + 'duration': 107.416 + } + }, { + 'url': 'https://ncvps.yuja.com/V/Video?v=578523&node=2618907&a=244955112&autoplay=1', + 'md5': '6cbcfffd905672e4224b54727d6e84b0', + 'info_dict': { + 'id': '578523', + 'ext': 'mp4', + 'title': 'Intro to NCVPS New Helpdesk 2019-2020', + 'thumbnail': 'https://ncvps.yuja.com/P/DataPage/BroadcastsThumb/532754', + 'description': 'This video provides a general overview of what customers need to know for the new NCVPS Helpdesk. ', + 'timestamp': 1576600000, + 'upload_date': '20191217', + 'duration': 236.167 + } + }, { + # youtube embed + 'url': 'https://mayvillestate.yuja.com/V/Watch?v=325580&node=1589970&a=96633478', + 'info_dict': { + 'id': '7YU95IGxOi8', + 'ext': 'mp4', + 'title': '125 Years of Personal Service', + 'upload_date': '20160420', + 'uploader_id': 'MayvilleStateUniv', + 'uploader': 'MayvilleStateUniv' + } + }] + + def _real_extract(self, url): + subdomain, video_id, auth_id = re.match(self._VALID_URL, url).groups() + + # for some URLs, auth ID is 0 and another auth ID must be resolved from the node ID + if auth_id == '0': + _NODE_REGEX = r'https?://(?P[a-z0-9]+)\.yuja\.com/V/(?:Watch|Video)\?v=(?P[0-9]+)(?:.*)&node=(?P[0-9]+)' + subdomain, video_id, node_id = re.match(_NODE_REGEX, url).groups() + # get new link using node ID + direct_link = self._download_json('https://%s.yuja.com/P/Data/VideoJSON?video=%i&node=%i&checkUser=true&a=%s' + % (subdomain, int(video_id), int(node_id), int(auth_id)), video_id, query={})['video']['directLink'] + auth_id = re.match(self._VALID_URL, direct_link).group('auth') + + data = self._download_json('https://%s.yuja.com/P/Data/VideoJSON?video=%i&a=%i&getPlayerType=true' + % (subdomain, int(video_id), int(auth_id)), video_id, query={})['video'] + + # for YouTube embeds + if data.get('youtubeCode'): + return self.url_result(data.get('youtubeCode'), YoutubeIE.ie_key()) + + return { + 'id': video_id, + 'title': data.get('videoTitle'), + 'url': data.get('videoLinkMp4'), + 'thumbnail': 'https://%s.yuja.com%s' % (subdomain, data.get('thumbImage')), + 'description': data.get('description'), + 'timestamp': unified_timestamp(data.get('postedDate')), + # 'automatic_captions': TODO: add captions + 'duration': float(data.get('duration')) + } From 4da3b77f014fd305cd8acdf4b32935591b14664a Mon Sep 17 00:00:00 2001 From: Brian Clinkenbeard Date: Sat, 4 Apr 2020 17:58:35 -0700 Subject: [PATCH 2/6] [yuja] add hls format --- youtube_dl/extractor/yuja.py | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/yuja.py b/youtube_dl/extractor/yuja.py index f987b1cd8..d1827a3cb 100644 --- a/youtube_dl/extractor/yuja.py +++ b/youtube_dl/extractor/yuja.py @@ -69,10 +69,27 @@ class YuJaIE(InfoExtractor): if data.get('youtubeCode'): return self.url_result(data.get('youtubeCode'), YoutubeIE.ie_key()) + formats = [] + if data.get('videoHLSLink'): + formats.append({ + 'format_id': 'mp4_hls', + 'url': data.get('videoHLSLink'), + 'protocol': 'm3u8', + 'ext': 'mp4', + }) + + if data.get('videoLinkMp4'): + formats.append({ + 'format_id': 'mp4', + 'url': data.get('videoLinkMp4'), + 'ext': 'mp4', + }) + return { 'id': video_id, 'title': data.get('videoTitle'), - 'url': data.get('videoLinkMp4'), + # 'url': data.get('videoLinkMp4'), + 'formats': formats, 'thumbnail': 'https://%s.yuja.com%s' % (subdomain, data.get('thumbImage')), 'description': data.get('description'), 'timestamp': unified_timestamp(data.get('postedDate')), From 55d54e32b2a697ca705a3ccf786160ebfe27636a Mon Sep 17 00:00:00 2001 From: Brian Clinkenbeard Date: Sat, 4 Apr 2020 19:16:14 -0700 Subject: [PATCH 3/6] [yuja] fix style --- youtube_dl/extractor/yuja.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/yuja.py b/youtube_dl/extractor/yuja.py index d1827a3cb..cbb10191d 100644 --- a/youtube_dl/extractor/yuja.py +++ b/youtube_dl/extractor/yuja.py @@ -70,24 +70,26 @@ class YuJaIE(InfoExtractor): return self.url_result(data.get('youtubeCode'), YoutubeIE.ie_key()) formats = [] + if data.get('videoHLSLink'): formats.append({ - 'format_id': 'mp4_hls', 'url': data.get('videoHLSLink'), - 'protocol': 'm3u8', 'ext': 'mp4', + 'format_id': 'mp4_hls', + 'protocol': 'm3u8', }) if data.get('videoLinkMp4'): formats.append({ - 'format_id': 'mp4', 'url': data.get('videoLinkMp4'), 'ext': 'mp4', + 'format_id': 'mp4', }) + return { 'id': video_id, - 'title': data.get('videoTitle'), + 'title': data['videoTitle'], # 'url': data.get('videoLinkMp4'), 'formats': formats, 'thumbnail': 'https://%s.yuja.com%s' % (subdomain, data.get('thumbImage')), From 5667922fda0cabe703a4832555ab6ac5385e1e9e Mon Sep 17 00:00:00 2001 From: Brian Clinkenbeard Date: Sat, 4 Apr 2020 20:13:29 -0700 Subject: [PATCH 4/6] [yuja] fix style for flake8 --- youtube_dl/extractor/yuja.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/yuja.py b/youtube_dl/extractor/yuja.py index cbb10191d..4108dade6 100644 --- a/youtube_dl/extractor/yuja.py +++ b/youtube_dl/extractor/yuja.py @@ -77,6 +77,7 @@ class YuJaIE(InfoExtractor): 'ext': 'mp4', 'format_id': 'mp4_hls', 'protocol': 'm3u8', + 'source_preference': 1 }) if data.get('videoLinkMp4'): @@ -84,9 +85,9 @@ class YuJaIE(InfoExtractor): 'url': data.get('videoLinkMp4'), 'ext': 'mp4', 'format_id': 'mp4', + 'source_preference': 0 }) - return { 'id': video_id, 'title': data['videoTitle'], From 1f6aed1bbb3c4719dcf1f987c8ecc17a9c9d7f56 Mon Sep 17 00:00:00 2001 From: Brian Clinkenbeard Date: Sat, 4 Apr 2020 20:31:30 -0700 Subject: [PATCH 5/6] [yuja] reduce line lengths --- youtube_dl/extractor/yuja.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/yuja.py b/youtube_dl/extractor/yuja.py index 4108dade6..f03b5a2e6 100644 --- a/youtube_dl/extractor/yuja.py +++ b/youtube_dl/extractor/yuja.py @@ -57,13 +57,17 @@ class YuJaIE(InfoExtractor): if auth_id == '0': _NODE_REGEX = r'https?://(?P[a-z0-9]+)\.yuja\.com/V/(?:Watch|Video)\?v=(?P[0-9]+)(?:.*)&node=(?P[0-9]+)' subdomain, video_id, node_id = re.match(_NODE_REGEX, url).groups() + # get new link using node ID - direct_link = self._download_json('https://%s.yuja.com/P/Data/VideoJSON?video=%i&node=%i&checkUser=true&a=%s' - % (subdomain, int(video_id), int(node_id), int(auth_id)), video_id, query={})['video']['directLink'] + direct_link = self._download_json( + 'https://%s.yuja.com/P/Data/VideoJSON?video=%i&node=%i&checkUser=true&a=%s' + % (subdomain, int(video_id), int(node_id), int(auth_id)), video_id, query={})['video']['directLink'] + auth_id = re.match(self._VALID_URL, direct_link).group('auth') - data = self._download_json('https://%s.yuja.com/P/Data/VideoJSON?video=%i&a=%i&getPlayerType=true' - % (subdomain, int(video_id), int(auth_id)), video_id, query={})['video'] + data = self._download_json( + 'https://%s.yuja.com/P/Data/VideoJSON?video=%i&a=%i&getPlayerType=true' + % (subdomain, int(video_id), int(auth_id)), video_id, query={})['video'] # for YouTube embeds if data.get('youtubeCode'): @@ -91,7 +95,6 @@ class YuJaIE(InfoExtractor): return { 'id': video_id, 'title': data['videoTitle'], - # 'url': data.get('videoLinkMp4'), 'formats': formats, 'thumbnail': 'https://%s.yuja.com%s' % (subdomain, data.get('thumbImage')), 'description': data.get('description'), From 4e29a30a6f1731fb8c4730923e28699c934fbcbb Mon Sep 17 00:00:00 2001 From: Brian Clinkenbeard Date: Sat, 4 Apr 2020 20:53:29 -0700 Subject: [PATCH 6/6] [yuja] flexibility and conventions --- youtube_dl/extractor/yuja.py | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/youtube_dl/extractor/yuja.py b/youtube_dl/extractor/yuja.py index f03b5a2e6..61d4807ce 100644 --- a/youtube_dl/extractor/yuja.py +++ b/youtube_dl/extractor/yuja.py @@ -2,7 +2,10 @@ from __future__ import unicode_literals import re -from ..utils import unified_timestamp +from ..utils import ( + unified_timestamp, + float_or_none +) from .youtube import YoutubeIE from .common import InfoExtractor @@ -55,13 +58,15 @@ class YuJaIE(InfoExtractor): # for some URLs, auth ID is 0 and another auth ID must be resolved from the node ID if auth_id == '0': - _NODE_REGEX = r'https?://(?P[a-z0-9]+)\.yuja\.com/V/(?:Watch|Video)\?v=(?P[0-9]+)(?:.*)&node=(?P[0-9]+)' - subdomain, video_id, node_id = re.match(_NODE_REGEX, url).groups() + subdomain, video_id, node_id = re.match( + r'https?://(?P[a-z0-9]+)\.yuja\.com/V/(?:Watch|Video)\?v=(?P[0-9]+)(?:.*)&node=(?P[0-9]+)', + url).groups() # get new link using node ID direct_link = self._download_json( 'https://%s.yuja.com/P/Data/VideoJSON?video=%i&node=%i&checkUser=true&a=%s' - % (subdomain, int(video_id), int(node_id), int(auth_id)), video_id, query={})['video']['directLink'] + % (subdomain, int(video_id), int(node_id), int(auth_id)), + video_id, query={})['video']['directLink'] auth_id = re.match(self._VALID_URL, direct_link).group('auth') @@ -81,7 +86,6 @@ class YuJaIE(InfoExtractor): 'ext': 'mp4', 'format_id': 'mp4_hls', 'protocol': 'm3u8', - 'source_preference': 1 }) if data.get('videoLinkMp4'): @@ -89,7 +93,6 @@ class YuJaIE(InfoExtractor): 'url': data.get('videoLinkMp4'), 'ext': 'mp4', 'format_id': 'mp4', - 'source_preference': 0 }) return { @@ -98,7 +101,7 @@ class YuJaIE(InfoExtractor): 'formats': formats, 'thumbnail': 'https://%s.yuja.com%s' % (subdomain, data.get('thumbImage')), 'description': data.get('description'), - 'timestamp': unified_timestamp(data.get('postedDate')), + 'timestamp': unified_timestamp(data.get('postedDate') or data.get('lastModifiedTimestamp')), # 'automatic_captions': TODO: add captions - 'duration': float(data.get('duration')) + 'duration': float_or_none(data.get('duration')) }