From fbf861458444ac84b649738805f013ac6edc9e8b Mon Sep 17 00:00:00 2001 From: sh!zeeg Date: Wed, 4 Jan 2017 01:51:08 +0300 Subject: [PATCH 1/3] [Beam] Add new extractor --- youtube_dl/extractor/beampro.py | 84 ++++++++++++++++++++++++++++++ youtube_dl/extractor/extractors.py | 1 + 2 files changed, 85 insertions(+) create mode 100644 youtube_dl/extractor/beampro.py diff --git a/youtube_dl/extractor/beampro.py b/youtube_dl/extractor/beampro.py new file mode 100644 index 000000000..fbbdb65e6 --- /dev/null +++ b/youtube_dl/extractor/beampro.py @@ -0,0 +1,84 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( + int_or_none, + ExtractorError, + compat_str, + clean_html, + parse_iso8601, +) + + +class BeamProLiveIE(InfoExtractor): + IE_NAME = 'Beam:live' + _VALID_URL = r'https?://(?:\w+.)?beam.pro/(?P[^?]+)$' + _API_CHANNEL = 'https://beam.pro/api/v1/channels/{0}' + _API_MANIFEST = 'https://beam.pro/api/v1/channels/{0}/manifest.{1}' + _VALID_MANIFESTS = ('smil', 'm3u8', 'light', 'light2', 'ftl', 'ftlOld') + _RATINGS = {'family': 0, 'teen': 13, '18+': 18} + + _TEST = { + 'url': 'http://www.beam.pro/niterhayven', + 'info_dict': { + 'id': '261562', + 'ext': 'mp4', + 'uploader': 'niterhayven', + 'timestamp': 1483477281, + 'age_limit': 18, + 'title': 'Introducing The Witcher 3 // The Grind Starts Now!', + 'thumbnail': r're:https://.*\.jpg$', + 'upload_date': '20170103', + 'uploader_id': 373396, + 'description': 'md5:0b161ac080f15fe05d18a07adb44a74d', + 'is_live': True, + }, + # 'skip': r're:.* is offline$', + 'params': { + 'skip_download': True, + }, + } + + def _real_extract(self, url): + channel_id = self._match_id(url) + chan_data = self._download_json(self._API_CHANNEL.format(channel_id), channel_id) + + if not chan_data.get('online'): + raise ExtractorError('{0} is offline'.format(channel_id), expected=True) + + formats = self._extract_m3u8_formats( + self._API_MANIFEST.format( + chan_data.get('id'), + self._VALID_MANIFESTS[1]), channel_id, ext='mp4', + ) + self._sort_formats(formats, 'vbr') + info = {} + info['formats'] = formats + if chan_data: + info.update(self._extract_info(chan_data)) + + return info + + def _rating_to_age(self, rating): + return self._RATINGS[rating] if rating in self._RATINGS else None + + def _extract_info(self, info): + thumbnail = info['thumbnail'].get('url') if info.get('thumbnail') else None + username = info['user'].get('url') if info.get('username') else None + + return { + 'id': compat_str(info['id']), + 'title': info.get('name') or 'Untitled Broadcast', + 'description': clean_html(info.get('description')), + 'age_limit': self._rating_to_age(info.get('audience')), + 'is_live': True if info.get('online') else False, + 'timestamp': parse_iso8601(info.get('updatedAt')), + # 'release_date': info.get('createdAt'), + # 'upload_date': info.get('updatedAt'), + # 'formats': formats, + 'uploader': info.get('token') or username, + 'uploader_id': int_or_none(info.get('userId')), + 'view_count': int_or_none(info.get('viewersTotal')), + 'thumbnail': thumbnail, + } diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index fcfe87f6f..48673f004 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -91,6 +91,7 @@ from .bbc import ( BBCCoUkPlaylistIE, BBCIE, ) +from .beampro import BeamProLiveIE from .beeg import BeegIE from .behindkink import BehindKinkIE from .bellmedia import BellMediaIE From aee23eccb71185b4e6ceacf8449eae87e2173a67 Mon Sep 17 00:00:00 2001 From: sh!zeeg Date: Wed, 4 Jan 2017 04:11:45 +0300 Subject: [PATCH 2/3] [Beam] make sure "id" and "title" are always presented --- youtube_dl/extractor/beampro.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/beampro.py b/youtube_dl/extractor/beampro.py index fbbdb65e6..3c3a55ad3 100644 --- a/youtube_dl/extractor/beampro.py +++ b/youtube_dl/extractor/beampro.py @@ -34,7 +34,7 @@ class BeamProLiveIE(InfoExtractor): 'description': 'md5:0b161ac080f15fe05d18a07adb44a74d', 'is_live': True, }, - # 'skip': r're:.* is offline$', + 'skip': 'niterhayven is offline', 'params': { 'skip_download': True, }, @@ -57,6 +57,10 @@ class BeamProLiveIE(InfoExtractor): info['formats'] = formats if chan_data: info.update(self._extract_info(chan_data)) + if not info.get('title'): + info['title'] = self._live_title(channel_id) + if not info.get('id'): # barely possible but just in case + info['id'] = compat_str(abs(hash('{0}/{1}'.format(channel_id, formats[0]))) % (10 ** 8)) return info @@ -66,10 +70,11 @@ class BeamProLiveIE(InfoExtractor): def _extract_info(self, info): thumbnail = info['thumbnail'].get('url') if info.get('thumbnail') else None username = info['user'].get('url') if info.get('username') else None + video_id = compat_str(info['id']) if info.get('id') else None return { - 'id': compat_str(info['id']), - 'title': info.get('name') or 'Untitled Broadcast', + 'id': video_id, + 'title': info.get('name'), 'description': clean_html(info.get('description')), 'age_limit': self._rating_to_age(info.get('audience')), 'is_live': True if info.get('online') else False, From 10586356db01a95edc86b88d74ab2b805bb24565 Mon Sep 17 00:00:00 2001 From: sh!zeeg Date: Sat, 7 Jan 2017 21:08:55 +0300 Subject: [PATCH 3/3] [BeamPro] requested fixes --- youtube_dl/extractor/beampro.py | 31 ++++++++++++------------------- 1 file changed, 12 insertions(+), 19 deletions(-) diff --git a/youtube_dl/extractor/beampro.py b/youtube_dl/extractor/beampro.py index 3c3a55ad3..dc0a2b4af 100644 --- a/youtube_dl/extractor/beampro.py +++ b/youtube_dl/extractor/beampro.py @@ -3,11 +3,12 @@ from __future__ import unicode_literals from .common import InfoExtractor from ..utils import ( - int_or_none, ExtractorError, - compat_str, clean_html, + compat_str, + int_or_none, parse_iso8601, + try_get, ) @@ -15,8 +16,7 @@ class BeamProLiveIE(InfoExtractor): IE_NAME = 'Beam:live' _VALID_URL = r'https?://(?:\w+.)?beam.pro/(?P[^?]+)$' _API_CHANNEL = 'https://beam.pro/api/v1/channels/{0}' - _API_MANIFEST = 'https://beam.pro/api/v1/channels/{0}/manifest.{1}' - _VALID_MANIFESTS = ('smil', 'm3u8', 'light', 'light2', 'ftl', 'ftlOld') + _API_MANIFEST = 'https://beam.pro/api/v1/channels/{0}/manifest.m3u8' _RATINGS = {'family': 0, 'teen': 13, '18+': 18} _TEST = { @@ -48,11 +48,9 @@ class BeamProLiveIE(InfoExtractor): raise ExtractorError('{0} is offline'.format(channel_id), expected=True) formats = self._extract_m3u8_formats( - self._API_MANIFEST.format( - chan_data.get('id'), - self._VALID_MANIFESTS[1]), channel_id, ext='mp4', - ) - self._sort_formats(formats, 'vbr') + self._API_MANIFEST.format(chan_data.get('id')), channel_id, ext='mp4') + + self._sort_formats(formats) info = {} info['formats'] = formats if chan_data: @@ -60,28 +58,23 @@ class BeamProLiveIE(InfoExtractor): if not info.get('title'): info['title'] = self._live_title(channel_id) if not info.get('id'): # barely possible but just in case - info['id'] = compat_str(abs(hash('{0}/{1}'.format(channel_id, formats[0]))) % (10 ** 8)) + info['id'] = compat_str(abs(hash(channel_id)) % (10 ** 8)) return info - def _rating_to_age(self, rating): - return self._RATINGS[rating] if rating in self._RATINGS else None - def _extract_info(self, info): - thumbnail = info['thumbnail'].get('url') if info.get('thumbnail') else None - username = info['user'].get('url') if info.get('username') else None + thumbnail = try_get(info, lambda x: x['thumbnail']['url'], compat_str) + username = try_get(info, lambda x: x['user']['url'], compat_str) video_id = compat_str(info['id']) if info.get('id') else None + rating = info.get('audience') return { 'id': video_id, 'title': info.get('name'), 'description': clean_html(info.get('description')), - 'age_limit': self._rating_to_age(info.get('audience')), + 'age_limit': self._RATINGS[rating] if rating in self._RATINGS else None, 'is_live': True if info.get('online') else False, 'timestamp': parse_iso8601(info.get('updatedAt')), - # 'release_date': info.get('createdAt'), - # 'upload_date': info.get('updatedAt'), - # 'formats': formats, 'uploader': info.get('token') or username, 'uploader_id': int_or_none(info.get('userId')), 'view_count': int_or_none(info.get('viewersTotal')),