[espn] fix info extraction
This commit is contained in:
parent
9d5fb3b58d
commit
aeba993411
@ -1,18 +1,26 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
parse_iso8601,
|
||||||
|
ExtractorError,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class ESPNIE(InfoExtractor):
|
class ESPNIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://espn\.go\.com/(?:[^/]+/)*(?P<id>[^/]+)'
|
_VALID_URL = r'https?://espn\.go\.com/(?:[^/]+/)*(?P<id>[^/]+)'
|
||||||
_WORKING = False
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://espn.go.com/video/clip?id=10365079',
|
'url': 'http://espn.go.com/video/clip?id=10365079',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'FkYWtmazr6Ed8xmvILvKLWjd4QvYZpzG',
|
'id': '10365079',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'dm_140128_30for30Shorts___JudgingJewellv2',
|
'title': '30 for 30 Shorts: Judging Jewell',
|
||||||
'description': '',
|
'description': 'On July 27, 1996, a terrorist\'s bomb exploded in a crowded Centennial Olympic Park during the Atlanta Olympic Games. The death toll might have been far higher if not for security guard Richard Jewell, who hours after his heroism was called a murderer.',
|
||||||
|
'duration': 1302,
|
||||||
|
'timestamp': 1390936111,
|
||||||
|
'upload_date': '20140128',
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
# m3u8 download
|
# m3u8 download
|
||||||
@ -35,21 +43,51 @@ class ESPNIE(InfoExtractor):
|
|||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
|
def _extract_video_info(self, video_data):
|
||||||
|
video_id = str(video_data['id'])
|
||||||
|
formats = []
|
||||||
|
for source in video_data['links']['source'].values():
|
||||||
|
if isinstance(source, dict) and source.get('href'):
|
||||||
|
source_url = source['href']
|
||||||
|
if '.m3u8' in source_url:
|
||||||
|
formats.extend(self._extract_m3u8_formats(source_url, video_id, m3u8_id='hls'))
|
||||||
|
elif '.f4m' in source_url:
|
||||||
|
formats.extend(self._extract_f4m_formats(source_url + '?hdcore=2.10.3', video_id, f4m_id='hds'))
|
||||||
|
elif '.smil' in source_url:
|
||||||
|
formats.extend(self._extract_smil_formats(source_url, video_id))
|
||||||
|
else:
|
||||||
|
formats.append({
|
||||||
|
'url': source_url,
|
||||||
|
})
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': video_data['headline'],
|
||||||
|
'description': video_data.get('description') or video_data.get('caption'),
|
||||||
|
'duration': video_data.get('duration'),
|
||||||
|
'thumbnail': video_data.get('thumbnail'),
|
||||||
|
'timestamp': parse_iso8601(video_data.get('originalPublishDate')),
|
||||||
|
'formats': formats,
|
||||||
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url + ('&xhr=1' if '?' in url else '?xhr=1'), video_id)
|
||||||
|
json_data = self._parse_json(webpage, video_id, fatal=False)
|
||||||
video_id = self._search_regex(
|
if json_data:
|
||||||
r'class="video-play-button"[^>]+data-id="(\d+)',
|
page_type = json_data['meta']['type']
|
||||||
webpage, 'video id')
|
if page_type == 'video':
|
||||||
|
return self._extract_video_info(json_data['content'])
|
||||||
player = self._download_webpage(
|
else:
|
||||||
'https://espn.go.com/video/iframe/twitter/?id=%s' % video_id, video_id)
|
videos_data = json_data['content'].get('video')
|
||||||
|
if videos_data:
|
||||||
pcode = self._search_regex(
|
entries = []
|
||||||
r'["\']pcode=([^"\']+)["\']', player, 'pcode')
|
for video_data in videos_data:
|
||||||
|
entries.append(self._extract_video_info(video_data))
|
||||||
return self.url_result(
|
return self.playlist_result(entries, str(json_data['uid']), json_data['content']['title'], json_data['content']['description'])
|
||||||
'ooyalaexternal:espn:%s:%s' % (video_id, pcode),
|
else:
|
||||||
'OoyalaExternal')
|
raise ExtractorError('No videos in the webpage', expected=True)
|
||||||
|
else:
|
||||||
|
return self.url_result(self._search_regex(r'mobileLink\s*=\s*"([^"]+)";', webpage, 'mobile link'), 'ESPN')
|
||||||
|
Loading…
x
Reference in New Issue
Block a user