diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index ef803b8a7..592b4779b 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1479,6 +1479,7 @@ from .youtube import ( YoutubeUserIE, YoutubeWatchLaterIE, ) +from .yuja import YuJaIE from .zapiks import ZapiksIE from .zaq1 import Zaq1IE from .zattoo import ( diff --git a/youtube_dl/extractor/yuja.py b/youtube_dl/extractor/yuja.py new file mode 100644 index 000000000..f987b1cd8 --- /dev/null +++ b/youtube_dl/extractor/yuja.py @@ -0,0 +1,81 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re +from ..utils import unified_timestamp +from .youtube import YoutubeIE + +from .common import InfoExtractor + + +class YuJaIE(InfoExtractor): + # url needs subdomain and either auth or node + _VALID_URL = r'https?://(?P[a-z0-9]+)\.yuja\.com/V/(?:Watch|Video)\?v=(?P[0-9]+)(?:.*)&a=(?P[0-9]+)' + _TESTS = [{ + 'url': 'https://usm.yuja.com/V/Watch?v=256594&node=1155827&a=0', + 'md5': '4bf9ffa3be86e320c85fcb7fe5918fc3', + 'info_dict': { + 'id': '256594', + 'ext': 'mp4', + 'title': 'Expect To Pursue What Matters', + 'thumbnail': 'https://usm.yuja.com/P/DataPage/BroadcastsThumb/218886', + 'description': '', + 'timestamp': 1542230582, + 'upload_date': '20181114', + 'duration': 107.416 + } + }, { + 'url': 'https://ncvps.yuja.com/V/Video?v=578523&node=2618907&a=244955112&autoplay=1', + 'md5': '6cbcfffd905672e4224b54727d6e84b0', + 'info_dict': { + 'id': '578523', + 'ext': 'mp4', + 'title': 'Intro to NCVPS New Helpdesk 2019-2020', + 'thumbnail': 'https://ncvps.yuja.com/P/DataPage/BroadcastsThumb/532754', + 'description': 'This video provides a general overview of what customers need to know for the new NCVPS Helpdesk. ', + 'timestamp': 1576600000, + 'upload_date': '20191217', + 'duration': 236.167 + } + }, { + # youtube embed + 'url': 'https://mayvillestate.yuja.com/V/Watch?v=325580&node=1589970&a=96633478', + 'info_dict': { + 'id': '7YU95IGxOi8', + 'ext': 'mp4', + 'title': '125 Years of Personal Service', + 'upload_date': '20160420', + 'uploader_id': 'MayvilleStateUniv', + 'uploader': 'MayvilleStateUniv' + } + }] + + def _real_extract(self, url): + subdomain, video_id, auth_id = re.match(self._VALID_URL, url).groups() + + # for some URLs, auth ID is 0 and another auth ID must be resolved from the node ID + if auth_id == '0': + _NODE_REGEX = r'https?://(?P[a-z0-9]+)\.yuja\.com/V/(?:Watch|Video)\?v=(?P[0-9]+)(?:.*)&node=(?P[0-9]+)' + subdomain, video_id, node_id = re.match(_NODE_REGEX, url).groups() + # get new link using node ID + direct_link = self._download_json('https://%s.yuja.com/P/Data/VideoJSON?video=%i&node=%i&checkUser=true&a=%s' + % (subdomain, int(video_id), int(node_id), int(auth_id)), video_id, query={})['video']['directLink'] + auth_id = re.match(self._VALID_URL, direct_link).group('auth') + + data = self._download_json('https://%s.yuja.com/P/Data/VideoJSON?video=%i&a=%i&getPlayerType=true' + % (subdomain, int(video_id), int(auth_id)), video_id, query={})['video'] + + # for YouTube embeds + if data.get('youtubeCode'): + return self.url_result(data.get('youtubeCode'), YoutubeIE.ie_key()) + + return { + 'id': video_id, + 'title': data.get('videoTitle'), + 'url': data.get('videoLinkMp4'), + 'thumbnail': 'https://%s.yuja.com%s' % (subdomain, data.get('thumbImage')), + 'description': data.get('description'), + 'timestamp': unified_timestamp(data.get('postedDate')), + # 'automatic_captions': TODO: add captions + 'duration': float(data.get('duration')) + }