From 2107c2b0aa1e7cd9c28a7da56912fdde924f7dd0 Mon Sep 17 00:00:00 2001 From: Mark Lee Date: Sun, 27 Nov 2016 15:33:26 -0800 Subject: [PATCH] [spike] Extract MGID via custom mobile URL If the conventional MGID detection fails, fall back to searching for the mobile app's custom URL format. --- youtube_dl/extractor/mtv.py | 4 ++-- youtube_dl/extractor/spike.py | 11 +++++++++++ 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/mtv.py b/youtube_dl/extractor/mtv.py index 74a3a035e..ff0c6149f 100644 --- a/youtube_dl/extractor/mtv.py +++ b/youtube_dl/extractor/mtv.py @@ -201,7 +201,7 @@ class MTVServicesInfoExtractor(InfoExtractor): [self._get_video_info(item) for item in idoc.findall('.//item')], playlist_title=title, playlist_description=description) - def _extract_mgid(self, webpage): + def _extract_mgid(self, webpage, fatal=True): try: # the url can be http://media.mtvnservices.com/fb/{mgid}.swf # or http://media.mtvnservices.com/{mgid} @@ -221,7 +221,7 @@ class MTVServicesInfoExtractor(InfoExtractor): sm4_embed = self._html_search_meta( 'sm4:video:embed', webpage, 'sm4 embed', default='') mgid = self._search_regex( - r'embed/(mgid:.+?)["\'&?/]', sm4_embed, 'mgid') + r'embed/(mgid:.+?)["\'&?/]', sm4_embed, 'mgid', fatal=fatal) return mgid def _real_extract(self, url): diff --git a/youtube_dl/extractor/spike.py b/youtube_dl/extractor/spike.py index 218785ee4..d1f7bb17a 100644 --- a/youtube_dl/extractor/spike.py +++ b/youtube_dl/extractor/spike.py @@ -1,5 +1,7 @@ from __future__ import unicode_literals +import re + from .mtv import MTVServicesInfoExtractor @@ -32,3 +34,12 @@ class SpikeIE(MTVServicesInfoExtractor): _FEED_URL = 'http://www.spike.com/feeds/mrss/' _MOBILE_TEMPLATE = 'http://m.spike.com/videos/video.rbml?id=%s' + _CUSTOM_URL_REGEX = re.compile(r'spikenetworkapp://([^/]+/[-a-fA-F0-9]+)') + + def _extract_mgid(self, webpage): + mgid = super(SpikeIE, self)._extract_mgid(webpage, fatal=False) + if mgid is None: + url_parts = self._search_regex(self._CUSTOM_URL_REGEX, webpage, 'episode_id') + video_type, episode_id = url_parts.split('/', 1) + mgid = 'mgid:arc:{}:spike.com:{}'.format(video_type, episode_id) + return mgid