From c06ca8dcc9cdc70d461769284b1abdcfe4c50b90 Mon Sep 17 00:00:00 2001 From: FA Date: Tue, 7 May 2019 17:17:43 -0700 Subject: [PATCH] Fail if mandatory fields absent. Add test. --- youtube_dl/extractor/earthcam.py | 48 +++++++++++++++++++++----------- 1 file changed, 32 insertions(+), 16 deletions(-) diff --git a/youtube_dl/extractor/earthcam.py b/youtube_dl/extractor/earthcam.py index 0c82c7ee0..2544e8c59 100644 --- a/youtube_dl/extractor/earthcam.py +++ b/youtube_dl/extractor/earthcam.py @@ -3,16 +3,18 @@ from __future__ import unicode_literals from .common import InfoExtractor from ..utils import ( + ExtractorError, urljoin, int_or_none, url_or_none, try_get, + js_to_json, ) class EarthCamIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?earthcam\.com/.*\?.*cam=(?P\w+)' - _TEST = { + _TESTS = [{ 'url': 'https://www.earthcam.com/usa/newyork/timessquare/?cam=tsrobo1', 'info_dict': { 'id': 'tsrobo1', @@ -22,29 +24,43 @@ class EarthCamIE(InfoExtractor): 'view_count': int, 'is_live': True, 'thumbnail': r're:^https?://.*\.(jpg|png)$', - }, - } + }, + }, { + 'url': 'https://www.earthcam.com/usa/louisiana/neworleans/bourbonstreet/?cam=catsmeowkaraoke', + 'info_dict': { + 'id': 'catsmeowkaraoke', + 'ext': 'mp4', + 'title': 'New Orleans, LA', + 'description': 'Get a front row seat to all the wild and crazy stage performances happening at the Cat\'s Meow Karaoke Bar! Over the years, thousands of guests have enjoyed their moment singing in the spotlight at this popular local spot!', + 'view_count': int, + 'is_live': True, + 'thumbnail': r're:^https?://.*\.(jpg|png)$', + } + }] def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - json_str = self._html_search_regex(r'var\s+json_base\s*=\s*(?P{\s*"cam"\s*:\s*{.*}.*});', webpage, 'json', group='jstr') - json_base = self._parse_json(json_str, video_id) - video_info = try_get(json_base, lambda x: x['cam'][video_id], dict) or {} - title = video_info.get("long_title") - description = video_info.get("description") - thumbnail = video_info.get("thumbimage") - view_count = int_or_none(video_info.get("streamviews")) - domain = video_info.get("html5_streamingdomain") - path = video_info.get("html5_streampath") + json_str = self._html_search_regex(r'var\s+json_base\s*=\s*(?P{\s*"cam"\s*:\s*{.*}.*});', webpage, 'json', group='json_str', default='{}') + json_base = self._parse_json(js_to_json(json_str), video_id) + + video_info = jsonn_base['cam'][video_id] + domain = video_info['html5_streamingdomain'] + path = video_info['html5_streampath'] m3u8_url = urljoin(domain, path) + formats = self._extract_m3u8_formats(m3u8_url, video_id, 'mp4', 'm3u8_native') + title = video_info.get('long_title') or self._og_search_title(webpage) + description = video_info.get('description') or self._og_search_description(webpage) + thumbnail = url_or_none(video_info.get('thumbimage')) or self._og_search_thumbnail(webpage) + view_count = int_or_none(video_info.get("streamviews")) + return { 'id': video_id, - 'formats': self._extract_m3u8_formats(m3u8_url, video_id, 'mp4', 'm3u8_native'), - 'title': title or self._og_search_title(webpage), - 'description': description or self._og_search_description(webpage), + 'formats': formats, + 'title': title, + 'description': description, 'view_count': view_count, 'is_live': True, - 'thumbnail': url_or_none(thumbnail), + 'thumbnail': thumbnail, }