[TastyTrade] Fix old test, handle alt urls

Since TastyTrade also emails out urls for episodes in another format (and actually puts the content into different templates, thus the alternative handling), additional handling is needed to correctly recognize the URL and extract the metadata.
2018-03-10 17:09:02 -08:00 · 2018-03-10 17:09:02 -08:00 · 7c1a8dc8de
commit 7c1a8dc8de
parent d116918993
1 changed files with 50 additions and 6 deletions
--- a/youtube_dl/extractor/tastytrade.py
+++ b/youtube_dl/extractor/tastytrade.py
@ -2,19 +2,28 @@ from __future__ import unicode_literals
 from .common import InfoExtractor
 from .ooyala import OoyalaIE
 from youtube_dl.utils import (
    ExtractorError,
 )
 import json
 import re
 import sys
 class TastyTradeIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?tastytrade\.com/tt/shows/[^/]+/episodes/(?P<id>[^/?#&]+)'
+    _VALID_URL = r'https?://(?:www\.)?tastytrade\.com/tt/(shows|daily_recaps)/[^/]+/episodes/(?P<id>[^/?#&]+)'
    _TESTS = [{
        'url': 'https://www.tastytrade.com/tt/shows/market-measures/episodes/correlation-in-short-volatility-06-28-2017',
        'info_dict': {
-            'id': 'F3bnlzbToeI6pLEfRyrlfooIILUjz4nM',
+            'id': '8xZW5xYjE6aLXhPwseCpyIf50oQw69JM',
            'ext': 'mp4',
-            'title': 'A History of Teaming',
+            'title': 'Correlation in Short Volatility',
-            'description': 'md5:2a9033db8da81f2edffa4c99888140b3',
+            'description': '[Correlation](https://www.tastytrade.com/tt/learn/correlation) is always changing and positions can be more correlated than we suspect. We can even have...',
-            'duration': 422.255,
+            'duration': 753.0,
            'upload_date': '20170628',
            'timestamp': 1498608000,
        },
        'params': {
            'skip_download': True,
@ -23,6 +32,18 @@ class TastyTradeIE(InfoExtractor):
    }, {
        'url': 'https://www.tastytrade.com/tt/shows/daily-dose/episodes/daily-dose-06-30-2017',
        'only_matching': True,
    }, {
        'url': 'https://www.tastytrade.com/tt/daily_recaps/2018-03-09/episodes/soybeans-corn-its-planting-time-03-09-2018',
        'info_dict': {
            'id': 'lud3BtZTE6vnRdolxKRlwNoZQvb3z_LT',
            'ext': 'mp4',
            'title': 'Soybeans & Corn: It\'s Planting Time',
            'description': 'md5:a523504b1227de1b81faeba2876a6d23',
        },
        'params': {
            'skip_download': True,
        },
        'add_ie': ['Ooyala'],
    }]
    def _real_extract(self, url):
@ -33,7 +54,30 @@ class TastyTradeIE(InfoExtractor):
            r'data-media-id=(["\'])(?P<code>(?:(?!\1).)+)\1',
            webpage, 'ooyala code', group='code')
-        info = self._search_json_ld(webpage, display_id, fatal=False)
+        info = {'id': None, 'title': None, 'description': None}
        try:
            info = self._search_json_ld(webpage, display_id, fatal=False)
        except ExtractorError as ex:
            json_string_match = re.search(
                r'var episodeData = \$.parseJSON\("(?P<episode_json>.*)"\)', webpage, 0)
            if (json_string_match):
                escaped_json_string = json_string_match.group('episode_json')
                if sys.version_info[0] >= 3:
                    unescaped_json_string = bytes(
                        escaped_json_string, "utf-8").decode('unicode_escape')
                else:
                    unescaped_json_string = escaped_json_string.decode(
                        'string_escape')
                metadata = json.loads(unescaped_json_string)
                info = {
                    'id': metadata.get('mediaId'),
                    'title': metadata.get('title'),
                    'description': metadata.get('description')
                }
        info.update({
            '_type': 'url_transparent',
            'ie_key': OoyalaIE.ie_key(),