Merge branch 'master' of https://github.com/rg3/youtube-dl
This commit is contained in:
commit
d12939e16b
2
Makefile
2
Makefile
@ -1,7 +1,7 @@
|
||||
all: youtube-dl README.md CONTRIBUTING.md README.txt youtube-dl.1 youtube-dl.bash-completion youtube-dl.zsh youtube-dl.fish supportedsites
|
||||
|
||||
clean:
|
||||
rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish youtube_dl/extractor/lazy_extractors.py *.dump *.part *.info.json *.mp4 *.flv *.mp3 *.avi *.mkv *.webm CONTRIBUTING.md.tmp ISSUE_TEMPLATE.md.tmp youtube-dl youtube-dl.exe
|
||||
rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish youtube_dl/extractor/lazy_extractors.py *.dump *.part *.info.json *.mp4 *.flv *.mp3 *.avi *.mkv *.webm *.jpg *.png CONTRIBUTING.md.tmp ISSUE_TEMPLATE.md.tmp youtube-dl youtube-dl.exe
|
||||
find . -name "*.pyc" -delete
|
||||
find . -name "*.class" -delete
|
||||
|
||||
|
@ -50,6 +50,8 @@ from youtube_dl.utils import (
|
||||
sanitize_path,
|
||||
prepend_extension,
|
||||
replace_extension,
|
||||
remove_start,
|
||||
remove_end,
|
||||
remove_quotes,
|
||||
shell_quote,
|
||||
smuggle_url,
|
||||
@ -215,6 +217,16 @@ class TestUtil(unittest.TestCase):
|
||||
self.assertEqual(replace_extension('.abc', 'temp'), '.abc.temp')
|
||||
self.assertEqual(replace_extension('.abc.ext', 'temp'), '.abc.temp')
|
||||
|
||||
def test_remove_start(self):
|
||||
self.assertEqual(remove_start(None, 'A - '), None)
|
||||
self.assertEqual(remove_start('A - B', 'A - '), 'B')
|
||||
self.assertEqual(remove_start('B - A', 'A - '), 'B - A')
|
||||
|
||||
def test_remove_end(self):
|
||||
self.assertEqual(remove_end(None, ' - B'), None)
|
||||
self.assertEqual(remove_end('A - B', ' - B'), 'A')
|
||||
self.assertEqual(remove_end('B - A', ' - B'), 'B - A')
|
||||
|
||||
def test_remove_quotes(self):
|
||||
self.assertEqual(remove_quotes(None), None)
|
||||
self.assertEqual(remove_quotes('"'), '"')
|
||||
|
135
youtube_dl/extractor/abcnews.py
Normal file
135
youtube_dl/extractor/abcnews.py
Normal file
@ -0,0 +1,135 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import calendar
|
||||
import re
|
||||
import time
|
||||
|
||||
from .amp import AMPIE
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urlparse
|
||||
|
||||
|
||||
class AbcNewsVideoIE(AMPIE):
|
||||
IE_NAME = 'abcnews:video'
|
||||
_VALID_URL = 'http://abcnews.go.com/[^/]+/video/(?P<display_id>[0-9a-z-]+)-(?P<id>\d+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://abcnews.go.com/ThisWeek/video/week-exclusive-irans-foreign-minister-zarif-20411932',
|
||||
'info_dict': {
|
||||
'id': '20411932',
|
||||
'ext': 'mp4',
|
||||
'display_id': 'week-exclusive-irans-foreign-minister-zarif',
|
||||
'title': '\'This Week\' Exclusive: Iran\'s Foreign Minister Zarif',
|
||||
'description': 'George Stephanopoulos goes one-on-one with Iranian Foreign Minister Dr. Javad Zarif.',
|
||||
'duration': 180,
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://abcnews.go.com/2020/video/2020-husband-stands-teacher-jail-student-affairs-26119478',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
display_id = mobj.group('display_id')
|
||||
video_id = mobj.group('id')
|
||||
info_dict = self._extract_feed_info(
|
||||
'http://abcnews.go.com/video/itemfeed?id=%s' % video_id)
|
||||
info_dict.update({
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
})
|
||||
return info_dict
|
||||
|
||||
|
||||
class AbcNewsIE(InfoExtractor):
|
||||
IE_NAME = 'abcnews'
|
||||
_VALID_URL = 'https?://abcnews\.go\.com/(?:[^/]+/)+(?P<display_id>[0-9a-z-]+)/story\?id=(?P<id>\d+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://abcnews.go.com/Blotter/News/dramatic-video-rare-death-job-america/story?id=10498713#.UIhwosWHLjY',
|
||||
'info_dict': {
|
||||
'id': '10498713',
|
||||
'ext': 'flv',
|
||||
'display_id': 'dramatic-video-rare-death-job-america',
|
||||
'title': 'Occupational Hazards',
|
||||
'description': 'Nightline investigates the dangers that lurk at various jobs.',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'upload_date': '20100428',
|
||||
'timestamp': 1272412800,
|
||||
},
|
||||
'add_ie': ['AbcNewsVideo'],
|
||||
}, {
|
||||
'url': 'http://abcnews.go.com/Entertainment/justin-timberlake-performs-stop-feeling-eurovision-2016/story?id=39125818',
|
||||
'info_dict': {
|
||||
'id': '39125818',
|
||||
'ext': 'mp4',
|
||||
'display_id': 'justin-timberlake-performs-stop-feeling-eurovision-2016',
|
||||
'title': 'Justin Timberlake Drops Hints For Secret Single',
|
||||
'description': 'Lara Spencer reports the buzziest stories of the day in "GMA" Pop News.',
|
||||
'upload_date': '20160515',
|
||||
'timestamp': 1463329500,
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
# The embedded YouTube video is blocked due to copyright issues
|
||||
'playlist_items': '1',
|
||||
},
|
||||
'add_ie': ['AbcNewsVideo'],
|
||||
}, {
|
||||
'url': 'http://abcnews.go.com/Technology/exclusive-apple-ceo-tim-cook-iphone-cracking-software/story?id=37173343',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
display_id = mobj.group('display_id')
|
||||
video_id = mobj.group('id')
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
video_url = self._search_regex(
|
||||
r'window\.abcnvideo\.url\s*=\s*"([^"]+)"', webpage, 'video URL')
|
||||
full_video_url = compat_urlparse.urljoin(url, video_url)
|
||||
|
||||
youtube_url = self._html_search_regex(
|
||||
r'<iframe[^>]+src="(https://www\.youtube\.com/embed/[^"]+)"',
|
||||
webpage, 'YouTube URL', default=None)
|
||||
|
||||
timestamp = None
|
||||
date_str = self._html_search_regex(
|
||||
r'<span[^>]+class="timestamp">([^<]+)</span>',
|
||||
webpage, 'timestamp', fatal=False)
|
||||
if date_str:
|
||||
tz_offset = 0
|
||||
if date_str.endswith(' ET'): # Eastern Time
|
||||
tz_offset = -5
|
||||
date_str = date_str[:-3]
|
||||
date_formats = ['%b. %d, %Y', '%b %d, %Y, %I:%M %p']
|
||||
for date_format in date_formats:
|
||||
try:
|
||||
timestamp = calendar.timegm(time.strptime(date_str.strip(), date_format))
|
||||
except ValueError:
|
||||
continue
|
||||
if timestamp is not None:
|
||||
timestamp -= tz_offset * 3600
|
||||
|
||||
entry = {
|
||||
'_type': 'url_transparent',
|
||||
'ie_key': AbcNewsVideoIE.ie_key(),
|
||||
'url': full_video_url,
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'timestamp': timestamp,
|
||||
}
|
||||
|
||||
if youtube_url:
|
||||
entries = [entry, self.url_result(youtube_url, 'Youtube')]
|
||||
return self.playlist_result(entries)
|
||||
|
||||
return entry
|
@ -52,7 +52,7 @@ class AMPIE(InfoExtractor):
|
||||
for media_data in media_content:
|
||||
media = media_data['@attributes']
|
||||
media_type = media['type']
|
||||
if media_type == 'video/f4m':
|
||||
if media_type in ('video/f4m', 'application/f4m+xml'):
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
media['url'] + '?hdcore=3.4.0&plugin=aasp-3.4.0.132.124',
|
||||
video_id, f4m_id='hds', fatal=False))
|
||||
@ -61,7 +61,7 @@ class AMPIE(InfoExtractor):
|
||||
media['url'], video_id, 'mp4', m3u8_id='hls', fatal=False))
|
||||
else:
|
||||
formats.append({
|
||||
'format_id': media_data['media-category']['@attributes']['label'],
|
||||
'format_id': media_data.get('media-category', {}).get('@attributes', {}).get('label'),
|
||||
'url': media['url'],
|
||||
'tbr': int_or_none(media.get('bitrate')),
|
||||
'filesize': int_or_none(media.get('fileSize')),
|
||||
|
@ -4,7 +4,10 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import js_to_json
|
||||
from ..utils import (
|
||||
js_to_json,
|
||||
smuggle_url,
|
||||
)
|
||||
|
||||
|
||||
class CBCIE(InfoExtractor):
|
||||
@ -12,57 +15,54 @@ class CBCIE(InfoExtractor):
|
||||
_TESTS = [{
|
||||
# with mediaId
|
||||
'url': 'http://www.cbc.ca/22minutes/videos/clips-season-23/don-cherry-play-offs',
|
||||
'md5': '97e24d09672fc4cf56256d6faa6c25bc',
|
||||
'info_dict': {
|
||||
'id': '2682904050',
|
||||
'ext': 'flv',
|
||||
'ext': 'mp4',
|
||||
'title': 'Don Cherry – All-Stars',
|
||||
'description': 'Don Cherry has a bee in his bonnet about AHL player John Scott because that guy’s got heart.',
|
||||
'timestamp': 1454475540,
|
||||
'timestamp': 1454463000,
|
||||
'upload_date': '20160203',
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
'uploader': 'CBCC-NEW',
|
||||
},
|
||||
}, {
|
||||
# with clipId
|
||||
'url': 'http://www.cbc.ca/archives/entry/1978-robin-williams-freestyles-on-90-minutes-live',
|
||||
'info_dict': {
|
||||
'id': '2487345465',
|
||||
'ext': 'flv',
|
||||
'ext': 'mp4',
|
||||
'title': 'Robin Williams freestyles on 90 Minutes Live',
|
||||
'description': 'Wacky American comedian Robin Williams shows off his infamous "freestyle" comedic talents while being interviewed on CBC\'s 90 Minutes Live.',
|
||||
'upload_date': '19700101',
|
||||
'upload_date': '19780210',
|
||||
'uploader': 'CBCC-NEW',
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
'timestamp': 255977160,
|
||||
},
|
||||
}, {
|
||||
# multiple iframes
|
||||
'url': 'http://www.cbc.ca/natureofthings/blog/birds-eye-view-from-vancouvers-burrard-street-bridge-how-we-got-the-shot',
|
||||
'playlist': [{
|
||||
'md5': '377572d0b49c4ce0c9ad77470e0b96b4',
|
||||
'info_dict': {
|
||||
'id': '2680832926',
|
||||
'ext': 'flv',
|
||||
'ext': 'mp4',
|
||||
'title': 'An Eagle\'s-Eye View Off Burrard Bridge',
|
||||
'description': 'Hercules the eagle flies from Vancouver\'s Burrard Bridge down to a nearby park with a mini-camera strapped to his back.',
|
||||
'upload_date': '19700101',
|
||||
'upload_date': '20160201',
|
||||
'timestamp': 1454342820,
|
||||
'uploader': 'CBCC-NEW',
|
||||
},
|
||||
}, {
|
||||
'md5': '415a0e3f586113894174dfb31aa5bb1a',
|
||||
'info_dict': {
|
||||
'id': '2658915080',
|
||||
'ext': 'flv',
|
||||
'ext': 'mp4',
|
||||
'title': 'Fly like an eagle!',
|
||||
'description': 'Eagle equipped with a mini camera flies from the world\'s tallest tower',
|
||||
'upload_date': '19700101',
|
||||
'upload_date': '20150315',
|
||||
'timestamp': 1426443984,
|
||||
'uploader': 'CBCC-NEW',
|
||||
},
|
||||
}],
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
@ -95,20 +95,23 @@ class CBCPlayerIE(InfoExtractor):
|
||||
'url': 'http://www.cbc.ca/player/play/2683190193',
|
||||
'info_dict': {
|
||||
'id': '2683190193',
|
||||
'ext': 'flv',
|
||||
'ext': 'mp4',
|
||||
'title': 'Gerry Runs a Sweat Shop',
|
||||
'description': 'md5:b457e1c01e8ff408d9d801c1c2cd29b0',
|
||||
'timestamp': 1455067800,
|
||||
'timestamp': 1455071400,
|
||||
'upload_date': '20160210',
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
'uploader': 'CBCC-NEW',
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
return self.url_result(
|
||||
'http://feed.theplatform.com/f/ExhSPC/vms_5akSXx4Ng_Zn?byGuid=%s' % video_id,
|
||||
'ThePlatformFeed', video_id)
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'ie_key': 'ThePlatform',
|
||||
'url': smuggle_url(
|
||||
'http://link.theplatform.com/s/ExhSPC/media/guid/2655402169/%s?mbr=true' % video_id, {
|
||||
'force_smil_url': True
|
||||
}),
|
||||
'id': video_id,
|
||||
}
|
||||
|
@ -3,6 +3,10 @@ from __future__ import unicode_literals
|
||||
|
||||
from .abc import ABCIE
|
||||
from .abc7news import Abc7NewsIE
|
||||
from .abcnews import (
|
||||
AbcNewsIE,
|
||||
AbcNewsVideoIE,
|
||||
)
|
||||
from .academicearth import AcademicEarthCourseIE
|
||||
from .acast import (
|
||||
ACastIE,
|
||||
@ -238,6 +242,7 @@ from .fktv import FKTVIE
|
||||
from .flickr import FlickrIE
|
||||
from .folketinget import FolketingetIE
|
||||
from .footyroom import FootyRoomIE
|
||||
from .formula1 import Formula1IE
|
||||
from .fourtube import FourTubeIE
|
||||
from .fox import FOXIE
|
||||
from .foxgay import FoxgayIE
|
||||
@ -365,6 +370,7 @@ from .kuwo import (
|
||||
)
|
||||
from .la7 import LA7IE
|
||||
from .laola1tv import Laola1TvIE
|
||||
from .learnr import LearnrIE
|
||||
from .lecture2go import Lecture2GoIE
|
||||
from .lemonde import LemondeIE
|
||||
from .leeco import (
|
||||
|
25
youtube_dl/extractor/formula1.py
Normal file
25
youtube_dl/extractor/formula1.py
Normal file
@ -0,0 +1,25 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class Formula1IE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?formula1\.com/content/fom-website/en/video/\d{4}/\d{1,2}/(?P<id>.+?)\.html'
|
||||
_TEST = {
|
||||
'url': 'http://www.formula1.com/content/fom-website/en/video/2016/5/Race_highlights_-_Spain_2016.html',
|
||||
'md5': '8c79e54be72078b26b89e0e111c0502b',
|
||||
'info_dict': {
|
||||
'id': 'JvYXJpMzE6pArfHWm5ARp5AiUmD-gibV',
|
||||
'ext': 'flv',
|
||||
'title': 'Race highlights - Spain 2016',
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
ooyala_embed_code = self._search_regex(
|
||||
r'data-videoid="([^"]+)"', webpage, 'ooyala embed code')
|
||||
return self.url_result(
|
||||
'ooyala:%s' % ooyala_embed_code, 'Ooyala', ooyala_embed_code)
|
33
youtube_dl/extractor/learnr.py
Normal file
33
youtube_dl/extractor/learnr.py
Normal file
@ -0,0 +1,33 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class LearnrIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?learnr\.pro/view/video/(?P<id>[0-9]+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.learnr.pro/view/video/51624-web-development-tutorial-for-beginners-1-how-to-build-webpages-with-html-css-javascript',
|
||||
'md5': '3719fdf0a68397f49899e82c308a89de',
|
||||
'info_dict': {
|
||||
'id': '51624',
|
||||
'ext': 'mp4',
|
||||
'title': 'Web Development Tutorial for Beginners (#1) - How to build webpages with HTML, CSS, Javascript',
|
||||
'description': 'md5:b36dbfa92350176cdf12b4d388485503',
|
||||
'uploader': 'LearnCode.academy',
|
||||
'uploader_id': 'learncodeacademy',
|
||||
'upload_date': '20131021',
|
||||
},
|
||||
'add_ie': ['Youtube'],
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'url': self._search_regex(
|
||||
r"videoId\s*:\s*'([^']+)'", webpage, 'youtube id'),
|
||||
'id': video_id,
|
||||
}
|
@ -1,19 +1,18 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
month_by_name,
|
||||
int_or_none,
|
||||
remove_end,
|
||||
unified_strdate,
|
||||
)
|
||||
|
||||
|
||||
class NDTVIE(InfoExtractor):
|
||||
_VALID_URL = r'^https?://(?:www\.)?ndtv\.com/video/player/[^/]*/[^/]*/(?P<id>[a-z0-9]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?ndtv\.com/video/(?:[^/]+/)+[^/?^&]+-(?P<id>\d+)'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.ndtv.com/video/player/news/ndtv-exclusive-don-t-need-character-certificate-from-rahul-gandhi-says-arvind-kejriwal/300710',
|
||||
'url': 'http://www.ndtv.com/video/news/news/ndtv-exclusive-don-t-need-character-certificate-from-rahul-gandhi-says-arvind-kejriwal-300710',
|
||||
'md5': '39f992dbe5fb531c395d8bbedb1e5e88',
|
||||
'info_dict': {
|
||||
'id': '300710',
|
||||
@ -22,7 +21,7 @@ class NDTVIE(InfoExtractor):
|
||||
'description': 'md5:ab2d4b4a6056c5cb4caa6d729deabf02',
|
||||
'upload_date': '20131208',
|
||||
'duration': 1327,
|
||||
'thumbnail': 'http://i.ndtvimg.com/video/images/vod/medium/2013-12/big_300710_1386518307.jpg',
|
||||
'thumbnail': 're:https?://.*\.jpg',
|
||||
},
|
||||
}
|
||||
|
||||
@ -30,36 +29,19 @@ class NDTVIE(InfoExtractor):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
title = remove_end(self._og_search_title(webpage), ' - NDTV')
|
||||
|
||||
filename = self._search_regex(
|
||||
r"__filename='([^']+)'", webpage, 'video filename')
|
||||
video_url = ('http://bitcast-b.bitgravity.com/ndtvod/23372/ndtv/%s' %
|
||||
filename)
|
||||
video_url = 'http://bitcast-b.bitgravity.com/ndtvod/23372/ndtv/%s' % filename
|
||||
|
||||
duration = int_or_none(self._search_regex(
|
||||
r"__duration='([^']+)'", webpage, 'duration', fatal=False))
|
||||
|
||||
date_m = re.search(r'''(?x)
|
||||
<p\s+class="vod_dateline">\s*
|
||||
Published\s+On:\s*
|
||||
(?P<monthname>[A-Za-z]+)\s+(?P<day>[0-9]+),\s*(?P<year>[0-9]+)
|
||||
''', webpage)
|
||||
upload_date = None
|
||||
upload_date = unified_strdate(self._html_search_meta(
|
||||
'publish-date', webpage, 'upload date', fatal=False))
|
||||
|
||||
if date_m is not None:
|
||||
month = month_by_name(date_m.group('monthname'))
|
||||
if month is not None:
|
||||
upload_date = '%s%02d%02d' % (
|
||||
date_m.group('year'), month, int(date_m.group('day')))
|
||||
|
||||
description = self._og_search_description(webpage)
|
||||
READ_MORE = ' (Read more)'
|
||||
if description.endswith(READ_MORE):
|
||||
description = description[:-len(READ_MORE)]
|
||||
|
||||
title = self._og_search_title(webpage)
|
||||
TITLE_SUFFIX = ' - NDTV'
|
||||
if title.endswith(TITLE_SUFFIX):
|
||||
title = title[:-len(TITLE_SUFFIX)]
|
||||
description = remove_end(self._og_search_description(webpage), ' (Read more)')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
|
@ -2,8 +2,12 @@ from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
sanitized_Request,
|
||||
clean_html,
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
qualities,
|
||||
urlencode_postdata,
|
||||
xpath_text,
|
||||
)
|
||||
|
||||
|
||||
@ -16,12 +20,12 @@ class NFBIE(InfoExtractor):
|
||||
'url': 'https://www.nfb.ca/film/qallunaat_why_white_people_are_funny',
|
||||
'info_dict': {
|
||||
'id': 'qallunaat_why_white_people_are_funny',
|
||||
'ext': 'mp4',
|
||||
'ext': 'flv',
|
||||
'title': 'Qallunaat! Why White People Are Funny ',
|
||||
'description': 'md5:836d8aff55e087d04d9f6df554d4e038',
|
||||
'description': 'md5:6b8e32dde3abf91e58857b174916620c',
|
||||
'duration': 3128,
|
||||
'creator': 'Mark Sandiford',
|
||||
'uploader': 'Mark Sandiford',
|
||||
'uploader_id': 'mark-sandiford',
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
@ -31,65 +35,78 @@ class NFBIE(InfoExtractor):
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
page = self._download_webpage(
|
||||
'https://www.nfb.ca/film/%s' % video_id, video_id,
|
||||
'Downloading film page')
|
||||
|
||||
uploader_id = self._html_search_regex(r'<a class="director-link" href="/explore-all-directors/([^/]+)/"',
|
||||
page, 'director id', fatal=False)
|
||||
uploader = self._html_search_regex(r'<em class="director-name" itemprop="name">([^<]+)</em>',
|
||||
page, 'director name', fatal=False)
|
||||
|
||||
request = sanitized_Request(
|
||||
config = self._download_xml(
|
||||
'https://www.nfb.ca/film/%s/player_config' % video_id,
|
||||
urlencode_postdata({'getConfig': 'true'}))
|
||||
request.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
||||
request.add_header('X-NFB-Referer', 'http://www.nfb.ca/medias/flash/NFBVideoPlayer.swf')
|
||||
video_id, 'Downloading player config XML',
|
||||
data=urlencode_postdata({'getConfig': 'true'}),
|
||||
headers={
|
||||
'Content-Type': 'application/x-www-form-urlencoded',
|
||||
'X-NFB-Referer': 'http://www.nfb.ca/medias/flash/NFBVideoPlayer.swf'
|
||||
})
|
||||
|
||||
config = self._download_xml(request, video_id, 'Downloading player config XML')
|
||||
|
||||
title = None
|
||||
description = None
|
||||
thumbnail = None
|
||||
duration = None
|
||||
formats = []
|
||||
|
||||
def extract_thumbnail(media):
|
||||
thumbnails = {}
|
||||
for asset in media.findall('assets/asset'):
|
||||
thumbnails[asset.get('quality')] = asset.find('default/url').text
|
||||
if not thumbnails:
|
||||
return None
|
||||
if 'high' in thumbnails:
|
||||
return thumbnails['high']
|
||||
return list(thumbnails.values())[0]
|
||||
title, description, thumbnail, duration, uploader, author = [None] * 6
|
||||
thumbnails, formats = [[]] * 2
|
||||
subtitles = {}
|
||||
|
||||
for media in config.findall('./player/stream/media'):
|
||||
if media.get('type') == 'posterImage':
|
||||
thumbnail = extract_thumbnail(media)
|
||||
elif media.get('type') == 'video':
|
||||
duration = int(media.get('duration'))
|
||||
title = media.find('title').text
|
||||
description = media.find('description').text
|
||||
# It seems assets always go from lower to better quality, so no need to sort
|
||||
quality_key = qualities(('low', 'high'))
|
||||
thumbnails = []
|
||||
for asset in media.findall('assets/asset'):
|
||||
for x in asset:
|
||||
asset_url = xpath_text(asset, 'default/url', default=None)
|
||||
if not asset_url:
|
||||
continue
|
||||
quality = asset.get('quality')
|
||||
thumbnails.append({
|
||||
'url': asset_url,
|
||||
'id': quality,
|
||||
'preference': quality_key(quality),
|
||||
})
|
||||
elif media.get('type') == 'video':
|
||||
title = xpath_text(media, 'title', fatal=True)
|
||||
for asset in media.findall('assets/asset'):
|
||||
quality = asset.get('quality')
|
||||
height = int_or_none(self._search_regex(
|
||||
r'^(\d+)[pP]$', quality or '', 'height', default=None))
|
||||
for node in asset:
|
||||
streamer = xpath_text(node, 'streamerURI', default=None)
|
||||
if not streamer:
|
||||
continue
|
||||
play_path = xpath_text(node, 'url', default=None)
|
||||
if not play_path:
|
||||
continue
|
||||
formats.append({
|
||||
'url': x.find('streamerURI').text,
|
||||
'app': x.find('streamerURI').text.split('/', 3)[3],
|
||||
'play_path': x.find('url').text,
|
||||
'url': streamer,
|
||||
'app': streamer.split('/', 3)[3],
|
||||
'play_path': play_path,
|
||||
'rtmp_live': False,
|
||||
'ext': 'mp4',
|
||||
'format_id': '%s-%s' % (x.tag, asset.get('quality')),
|
||||
'ext': 'flv',
|
||||
'format_id': '%s-%s' % (node.tag, quality) if quality else node.tag,
|
||||
'height': height,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
description = clean_html(xpath_text(media, 'description'))
|
||||
uploader = xpath_text(media, 'author')
|
||||
duration = int_or_none(media.get('duration'))
|
||||
for subtitle in media.findall('./subtitles/subtitle'):
|
||||
subtitle_url = xpath_text(subtitle, 'url', default=None)
|
||||
if not subtitle_url:
|
||||
continue
|
||||
lang = xpath_text(subtitle, 'lang', default='en')
|
||||
subtitles.setdefault(lang, []).append({
|
||||
'url': subtitle_url,
|
||||
'ext': (subtitle.get('format') or determine_ext(subtitle_url)).lower(),
|
||||
})
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'thumbnails': thumbnails,
|
||||
'duration': duration,
|
||||
'creator': uploader,
|
||||
'uploader': uploader,
|
||||
'uploader_id': uploader_id,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
@ -47,7 +47,8 @@ class TwentyFourVideoIE(InfoExtractor):
|
||||
|
||||
title = self._og_search_title(webpage)
|
||||
description = self._html_search_regex(
|
||||
r'<span itemprop="description">([^<]+)</span>', webpage, 'description', fatal=False)
|
||||
r'<(p|span)[^>]+itemprop="description"[^>]*>(?P<description>[^<]+)</\1>',
|
||||
webpage, 'description', fatal=False, group='description')
|
||||
thumbnail = self._og_search_thumbnail(webpage)
|
||||
duration = int_or_none(self._og_search_property(
|
||||
'duration', webpage, 'duration', fatal=False))
|
||||
|
@ -1549,15 +1549,11 @@ def setproctitle(title):
|
||||
|
||||
|
||||
def remove_start(s, start):
|
||||
if s.startswith(start):
|
||||
return s[len(start):]
|
||||
return s
|
||||
return s[len(start):] if s is not None and s.startswith(start) else s
|
||||
|
||||
|
||||
def remove_end(s, end):
|
||||
if s.endswith(end):
|
||||
return s[:-len(end)]
|
||||
return s
|
||||
return s[:-len(end)] if s is not None and s.endswith(end) else s
|
||||
|
||||
|
||||
def remove_quotes(s):
|
||||
|
Loading…
x
Reference in New Issue
Block a user