From 7c1a8dc8de52d11f0d25e9d20e5ca4d8fef011cc Mon Sep 17 00:00:00 2001 From: Kai Curtis Date: Sat, 10 Mar 2018 17:09:02 -0800 Subject: [PATCH] [TastyTrade] Fix old test, handle alt urls Since TastyTrade also emails out urls for episodes in another format (and actually puts the content into different templates, thus the alternative handling), additional handling is needed to correctly recognize the URL and extract the metadata. --- youtube_dl/extractor/tastytrade.py | 56 ++++++++++++++++++++++++++---- 1 file changed, 50 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/tastytrade.py b/youtube_dl/extractor/tastytrade.py index 7fe96bd5f..74cb681a5 100644 --- a/youtube_dl/extractor/tastytrade.py +++ b/youtube_dl/extractor/tastytrade.py @@ -2,19 +2,28 @@ from __future__ import unicode_literals from .common import InfoExtractor from .ooyala import OoyalaIE +from youtube_dl.utils import ( + ExtractorError, +) + +import json +import re +import sys class TastyTradeIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?tastytrade\.com/tt/shows/[^/]+/episodes/(?P[^/?#&]+)' + _VALID_URL = r'https?://(?:www\.)?tastytrade\.com/tt/(shows|daily_recaps)/[^/]+/episodes/(?P[^/?#&]+)' _TESTS = [{ 'url': 'https://www.tastytrade.com/tt/shows/market-measures/episodes/correlation-in-short-volatility-06-28-2017', 'info_dict': { - 'id': 'F3bnlzbToeI6pLEfRyrlfooIILUjz4nM', + 'id': '8xZW5xYjE6aLXhPwseCpyIf50oQw69JM', 'ext': 'mp4', - 'title': 'A History of Teaming', - 'description': 'md5:2a9033db8da81f2edffa4c99888140b3', - 'duration': 422.255, + 'title': 'Correlation in Short Volatility', + 'description': '[Correlation](https://www.tastytrade.com/tt/learn/correlation) is always changing and positions can be more correlated than we suspect. We can even have...', + 'duration': 753.0, + 'upload_date': '20170628', + 'timestamp': 1498608000, }, 'params': { 'skip_download': True, @@ -23,6 +32,18 @@ class TastyTradeIE(InfoExtractor): }, { 'url': 'https://www.tastytrade.com/tt/shows/daily-dose/episodes/daily-dose-06-30-2017', 'only_matching': True, + }, { + 'url': 'https://www.tastytrade.com/tt/daily_recaps/2018-03-09/episodes/soybeans-corn-its-planting-time-03-09-2018', + 'info_dict': { + 'id': 'lud3BtZTE6vnRdolxKRlwNoZQvb3z_LT', + 'ext': 'mp4', + 'title': 'Soybeans & Corn: It\'s Planting Time', + 'description': 'md5:a523504b1227de1b81faeba2876a6d23', + }, + 'params': { + 'skip_download': True, + }, + 'add_ie': ['Ooyala'], }] def _real_extract(self, url): @@ -33,7 +54,30 @@ class TastyTradeIE(InfoExtractor): r'data-media-id=(["\'])(?P(?:(?!\1).)+)\1', webpage, 'ooyala code', group='code') - info = self._search_json_ld(webpage, display_id, fatal=False) + info = {'id': None, 'title': None, 'description': None} + + try: + info = self._search_json_ld(webpage, display_id, fatal=False) + except ExtractorError as ex: + json_string_match = re.search( + r'var episodeData = \$.parseJSON\("(?P.*)"\)', webpage, 0) + + if (json_string_match): + escaped_json_string = json_string_match.group('episode_json') + + if sys.version_info[0] >= 3: + unescaped_json_string = bytes( + escaped_json_string, "utf-8").decode('unicode_escape') + else: + unescaped_json_string = escaped_json_string.decode( + 'string_escape') + metadata = json.loads(unescaped_json_string) + info = { + 'id': metadata.get('mediaId'), + 'title': metadata.get('title'), + 'description': metadata.get('description') + } + info.update({ '_type': 'url_transparent', 'ie_key': OoyalaIE.ie_key(),