[TastyTrade] Fix old test, handle alt urls

Since TastyTrade also emails out urls for episodes in another format
(and actually puts the content into different templates, thus the
alternative handling), additional handling is needed to correctly
recognize the URL and extract the metadata.
This commit is contained in:
Kai Curtis 2018-03-10 17:09:02 -08:00
parent d116918993
commit 7c1a8dc8de

View File

@ -2,19 +2,28 @@ from __future__ import unicode_literals
from .common import InfoExtractor from .common import InfoExtractor
from .ooyala import OoyalaIE from .ooyala import OoyalaIE
from youtube_dl.utils import (
ExtractorError,
)
import json
import re
import sys
class TastyTradeIE(InfoExtractor): class TastyTradeIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?tastytrade\.com/tt/shows/[^/]+/episodes/(?P<id>[^/?#&]+)' _VALID_URL = r'https?://(?:www\.)?tastytrade\.com/tt/(shows|daily_recaps)/[^/]+/episodes/(?P<id>[^/?#&]+)'
_TESTS = [{ _TESTS = [{
'url': 'https://www.tastytrade.com/tt/shows/market-measures/episodes/correlation-in-short-volatility-06-28-2017', 'url': 'https://www.tastytrade.com/tt/shows/market-measures/episodes/correlation-in-short-volatility-06-28-2017',
'info_dict': { 'info_dict': {
'id': 'F3bnlzbToeI6pLEfRyrlfooIILUjz4nM', 'id': '8xZW5xYjE6aLXhPwseCpyIf50oQw69JM',
'ext': 'mp4', 'ext': 'mp4',
'title': 'A History of Teaming', 'title': 'Correlation in Short Volatility',
'description': 'md5:2a9033db8da81f2edffa4c99888140b3', 'description': '[Correlation](https://www.tastytrade.com/tt/learn/correlation) is always changing and positions can be more correlated than we suspect. We can even have...',
'duration': 422.255, 'duration': 753.0,
'upload_date': '20170628',
'timestamp': 1498608000,
}, },
'params': { 'params': {
'skip_download': True, 'skip_download': True,
@ -23,6 +32,18 @@ class TastyTradeIE(InfoExtractor):
}, { }, {
'url': 'https://www.tastytrade.com/tt/shows/daily-dose/episodes/daily-dose-06-30-2017', 'url': 'https://www.tastytrade.com/tt/shows/daily-dose/episodes/daily-dose-06-30-2017',
'only_matching': True, 'only_matching': True,
}, {
'url': 'https://www.tastytrade.com/tt/daily_recaps/2018-03-09/episodes/soybeans-corn-its-planting-time-03-09-2018',
'info_dict': {
'id': 'lud3BtZTE6vnRdolxKRlwNoZQvb3z_LT',
'ext': 'mp4',
'title': 'Soybeans & Corn: It\'s Planting Time',
'description': 'md5:a523504b1227de1b81faeba2876a6d23',
},
'params': {
'skip_download': True,
},
'add_ie': ['Ooyala'],
}] }]
def _real_extract(self, url): def _real_extract(self, url):
@ -33,7 +54,30 @@ class TastyTradeIE(InfoExtractor):
r'data-media-id=(["\'])(?P<code>(?:(?!\1).)+)\1', r'data-media-id=(["\'])(?P<code>(?:(?!\1).)+)\1',
webpage, 'ooyala code', group='code') webpage, 'ooyala code', group='code')
info = self._search_json_ld(webpage, display_id, fatal=False) info = {'id': None, 'title': None, 'description': None}
try:
info = self._search_json_ld(webpage, display_id, fatal=False)
except ExtractorError as ex:
json_string_match = re.search(
r'var episodeData = \$.parseJSON\("(?P<episode_json>.*)"\)', webpage, 0)
if (json_string_match):
escaped_json_string = json_string_match.group('episode_json')
if sys.version_info[0] >= 3:
unescaped_json_string = bytes(
escaped_json_string, "utf-8").decode('unicode_escape')
else:
unescaped_json_string = escaped_json_string.decode(
'string_escape')
metadata = json.loads(unescaped_json_string)
info = {
'id': metadata.get('mediaId'),
'title': metadata.get('title'),
'description': metadata.get('description')
}
info.update({ info.update({
'_type': 'url_transparent', '_type': 'url_transparent',
'ie_key': OoyalaIE.ie_key(), 'ie_key': OoyalaIE.ie_key(),