This commit is contained in:
Gilles Habran 2016-06-02 08:44:42 +02:00
commit 47e63682bb
14 changed files with 320 additions and 191 deletions

View File

@ -6,8 +6,8 @@
--- ---
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.05.30.2*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. ### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.06.02*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.05.30.2** - [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.06.02**
### Before submitting an *issue* make sure you have: ### Before submitting an *issue* make sure you have:
- [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
@ -35,7 +35,7 @@ $ youtube-dl -v <your command line>
[debug] User config: [] [debug] User config: []
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
[debug] youtube-dl version 2016.05.30.2 [debug] youtube-dl version 2016.06.02
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
[debug] Proxy map: {} [debug] Proxy map: {}

View File

@ -162,7 +162,7 @@ which means you can modify it, redistribute it or use it however you like.
(experimental) (experimental)
## Download Options: ## Download Options:
-r, --rate-limit LIMIT Maximum download rate in bytes per second -r, --limit-rate RATE Maximum download rate in bytes per second
(e.g. 50K or 4.2M) (e.g. 50K or 4.2M)
-R, --retries RETRIES Number of retries (default is 10), or -R, --retries RETRIES Number of retries (default is 10), or
"infinite". "infinite".

View File

@ -55,6 +55,7 @@
- **arte.tv:future** - **arte.tv:future**
- **arte.tv:info** - **arte.tv:info**
- **arte.tv:magazine** - **arte.tv:magazine**
- **arte.tv:playlist**
- **AtresPlayer** - **AtresPlayer**
- **ATTTechChannel** - **ATTTechChannel**
- **AudiMedia** - **AudiMedia**

View File

@ -61,10 +61,7 @@ class ArteTvIE(InfoExtractor):
} }
class ArteTVPlus7IE(InfoExtractor): class ArteTVBaseIE(InfoExtractor):
IE_NAME = 'arte.tv:+7'
_VALID_URL = r'https?://(?:www\.)?arte\.tv/guide/(?P<lang>fr|de|en|es)/(?:(?:sendungen|emissions|embed)/)?(?P<id>[^/]+)/(?P<name>[^/?#&]+)'
@classmethod @classmethod
def _extract_url_info(cls, url): def _extract_url_info(cls, url):
mobj = re.match(cls._VALID_URL, url) mobj = re.match(cls._VALID_URL, url)
@ -78,60 +75,6 @@ class ArteTVPlus7IE(InfoExtractor):
video_id = mobj.group('id') video_id = mobj.group('id')
return video_id, lang return video_id, lang
def _real_extract(self, url):
video_id, lang = self._extract_url_info(url)
webpage = self._download_webpage(url, video_id)
return self._extract_from_webpage(webpage, video_id, lang)
def _extract_from_webpage(self, webpage, video_id, lang):
patterns_templates = (r'arte_vp_url=["\'](.*?%s.*?)["\']', r'data-url=["\']([^"]+%s[^"]+)["\']')
ids = (video_id, '')
# some pages contain multiple videos (like
# http://www.arte.tv/guide/de/sendungen/XEN/xenius/?vid=055918-015_PLUS7-D),
# so we first try to look for json URLs that contain the video id from
# the 'vid' parameter.
patterns = [t % re.escape(_id) for _id in ids for t in patterns_templates]
json_url = self._html_search_regex(
patterns, webpage, 'json vp url', default=None)
if not json_url:
def find_iframe_url(webpage, default=NO_DEFAULT):
return self._html_search_regex(
r'<iframe[^>]+src=(["\'])(?P<url>.+\bjson_url=.+?)\1',
webpage, 'iframe url', group='url', default=default)
iframe_url = find_iframe_url(webpage, None)
if not iframe_url:
embed_url = self._html_search_regex(
r'arte_vp_url_oembed=\'([^\']+?)\'', webpage, 'embed url', default=None)
if embed_url:
player = self._download_json(
embed_url, video_id, 'Downloading player page')
iframe_url = find_iframe_url(player['html'])
# en and es URLs produce react-based pages with different layout (e.g.
# http://www.arte.tv/guide/en/053330-002-A/carnival-italy?zone=world)
if not iframe_url:
program = self._search_regex(
r'program\s*:\s*({.+?["\']embed_html["\'].+?}),?\s*\n',
webpage, 'program', default=None)
if program:
embed_html = self._parse_json(program, video_id)
if embed_html:
iframe_url = find_iframe_url(embed_html['embed_html'])
if iframe_url:
json_url = compat_parse_qs(
compat_urllib_parse_urlparse(iframe_url).query)['json_url'][0]
if json_url:
title = self._search_regex(
r'<h3[^>]+title=(["\'])(?P<title>.+?)\1',
webpage, 'title', default=None, group='title')
return self._extract_from_json_url(json_url, video_id, lang, title=title)
# Different kind of embed URL (e.g.
# http://www.arte.tv/magazine/trepalium/fr/episode-0406-replay-trepalium)
embed_url = self._search_regex(
r'<iframe[^>]+src=(["\'])(?P<url>.+?)\1',
webpage, 'embed url', group='url')
return self.url_result(embed_url)
def _extract_from_json_url(self, json_url, video_id, lang, title=None): def _extract_from_json_url(self, json_url, video_id, lang, title=None):
info = self._download_json(json_url, video_id) info = self._download_json(json_url, video_id)
player_info = info['videoJsonPlayer'] player_info = info['videoJsonPlayer']
@ -235,6 +178,74 @@ class ArteTVPlus7IE(InfoExtractor):
return info_dict return info_dict
class ArteTVPlus7IE(ArteTVBaseIE):
IE_NAME = 'arte.tv:+7'
_VALID_URL = r'https?://(?:www\.)?arte\.tv/guide/(?P<lang>fr|de|en|es)/(?:(?:sendungen|emissions|embed)/)?(?P<id>[^/]+)/(?P<name>[^/?#&]+)'
_TESTS = [{
'url': 'http://www.arte.tv/guide/de/sendungen/XEN/xenius/?vid=055918-015_PLUS7-D',
'only_matching': True,
}]
@classmethod
def suitable(cls, url):
return False if ArteTVPlaylistIE.suitable(url) else super(ArteTVPlus7IE, cls).suitable(url)
def _real_extract(self, url):
video_id, lang = self._extract_url_info(url)
webpage = self._download_webpage(url, video_id)
return self._extract_from_webpage(webpage, video_id, lang)
def _extract_from_webpage(self, webpage, video_id, lang):
patterns_templates = (r'arte_vp_url=["\'](.*?%s.*?)["\']', r'data-url=["\']([^"]+%s[^"]+)["\']')
ids = (video_id, '')
# some pages contain multiple videos (like
# http://www.arte.tv/guide/de/sendungen/XEN/xenius/?vid=055918-015_PLUS7-D),
# so we first try to look for json URLs that contain the video id from
# the 'vid' parameter.
patterns = [t % re.escape(_id) for _id in ids for t in patterns_templates]
json_url = self._html_search_regex(
patterns, webpage, 'json vp url', default=None)
if not json_url:
def find_iframe_url(webpage, default=NO_DEFAULT):
return self._html_search_regex(
r'<iframe[^>]+src=(["\'])(?P<url>.+\bjson_url=.+?)\1',
webpage, 'iframe url', group='url', default=default)
iframe_url = find_iframe_url(webpage, None)
if not iframe_url:
embed_url = self._html_search_regex(
r'arte_vp_url_oembed=\'([^\']+?)\'', webpage, 'embed url', default=None)
if embed_url:
player = self._download_json(
embed_url, video_id, 'Downloading player page')
iframe_url = find_iframe_url(player['html'])
# en and es URLs produce react-based pages with different layout (e.g.
# http://www.arte.tv/guide/en/053330-002-A/carnival-italy?zone=world)
if not iframe_url:
program = self._search_regex(
r'program\s*:\s*({.+?["\']embed_html["\'].+?}),?\s*\n',
webpage, 'program', default=None)
if program:
embed_html = self._parse_json(program, video_id)
if embed_html:
iframe_url = find_iframe_url(embed_html['embed_html'])
if iframe_url:
json_url = compat_parse_qs(
compat_urllib_parse_urlparse(iframe_url).query)['json_url'][0]
if json_url:
title = self._search_regex(
r'<h3[^>]+title=(["\'])(?P<title>.+?)\1',
webpage, 'title', default=None, group='title')
return self._extract_from_json_url(json_url, video_id, lang, title=title)
# Different kind of embed URL (e.g.
# http://www.arte.tv/magazine/trepalium/fr/episode-0406-replay-trepalium)
embed_url = self._search_regex(
r'<iframe[^>]+src=(["\'])(?P<url>.+?)\1',
webpage, 'embed url', group='url')
return self.url_result(embed_url)
# It also uses the arte_vp_url url from the webpage to extract the information # It also uses the arte_vp_url url from the webpage to extract the information
class ArteTVCreativeIE(ArteTVPlus7IE): class ArteTVCreativeIE(ArteTVPlus7IE):
IE_NAME = 'arte.tv:creative' IE_NAME = 'arte.tv:creative'
@ -267,7 +278,7 @@ class ArteTVInfoIE(ArteTVPlus7IE):
IE_NAME = 'arte.tv:info' IE_NAME = 'arte.tv:info'
_VALID_URL = r'https?://info\.arte\.tv/(?P<lang>fr|de|en|es)/(?:[^/]+/)*(?P<id>[^/?#&]+)' _VALID_URL = r'https?://info\.arte\.tv/(?P<lang>fr|de|en|es)/(?:[^/]+/)*(?P<id>[^/?#&]+)'
_TEST = { _TESTS = [{
'url': 'http://info.arte.tv/fr/service-civique-un-cache-misere', 'url': 'http://info.arte.tv/fr/service-civique-un-cache-misere',
'info_dict': { 'info_dict': {
'id': '067528-000-A', 'id': '067528-000-A',
@ -275,7 +286,7 @@ class ArteTVInfoIE(ArteTVPlus7IE):
'title': 'Service civique, un cache misère ?', 'title': 'Service civique, un cache misère ?',
'upload_date': '20160403', 'upload_date': '20160403',
}, },
} }]
class ArteTVFutureIE(ArteTVPlus7IE): class ArteTVFutureIE(ArteTVPlus7IE):
@ -300,6 +311,8 @@ class ArteTVDDCIE(ArteTVPlus7IE):
IE_NAME = 'arte.tv:ddc' IE_NAME = 'arte.tv:ddc'
_VALID_URL = r'https?://ddc\.arte\.tv/(?P<lang>emission|folge)/(?P<id>[^/?#&]+)' _VALID_URL = r'https?://ddc\.arte\.tv/(?P<lang>emission|folge)/(?P<id>[^/?#&]+)'
_TESTS = []
def _real_extract(self, url): def _real_extract(self, url):
video_id, lang = self._extract_url_info(url) video_id, lang = self._extract_url_info(url)
if lang == 'folge': if lang == 'folge':
@ -318,7 +331,7 @@ class ArteTVConcertIE(ArteTVPlus7IE):
IE_NAME = 'arte.tv:concert' IE_NAME = 'arte.tv:concert'
_VALID_URL = r'https?://concert\.arte\.tv/(?P<lang>fr|de|en|es)/(?P<id>[^/?#&]+)' _VALID_URL = r'https?://concert\.arte\.tv/(?P<lang>fr|de|en|es)/(?P<id>[^/?#&]+)'
_TEST = { _TESTS = [{
'url': 'http://concert.arte.tv/de/notwist-im-pariser-konzertclub-divan-du-monde', 'url': 'http://concert.arte.tv/de/notwist-im-pariser-konzertclub-divan-du-monde',
'md5': '9ea035b7bd69696b67aa2ccaaa218161', 'md5': '9ea035b7bd69696b67aa2ccaaa218161',
'info_dict': { 'info_dict': {
@ -328,14 +341,14 @@ class ArteTVConcertIE(ArteTVPlus7IE):
'upload_date': '20140128', 'upload_date': '20140128',
'description': 'md5:486eb08f991552ade77439fe6d82c305', 'description': 'md5:486eb08f991552ade77439fe6d82c305',
}, },
} }]
class ArteTVCinemaIE(ArteTVPlus7IE): class ArteTVCinemaIE(ArteTVPlus7IE):
IE_NAME = 'arte.tv:cinema' IE_NAME = 'arte.tv:cinema'
_VALID_URL = r'https?://cinema\.arte\.tv/(?P<lang>fr|de|en|es)/(?P<id>.+)' _VALID_URL = r'https?://cinema\.arte\.tv/(?P<lang>fr|de|en|es)/(?P<id>.+)'
_TEST = { _TESTS = [{
'url': 'http://cinema.arte.tv/de/node/38291', 'url': 'http://cinema.arte.tv/de/node/38291',
'md5': '6b275511a5107c60bacbeeda368c3aa1', 'md5': '6b275511a5107c60bacbeeda368c3aa1',
'info_dict': { 'info_dict': {
@ -345,7 +358,7 @@ class ArteTVCinemaIE(ArteTVPlus7IE):
'upload_date': '20160122', 'upload_date': '20160122',
'description': 'md5:7f749bbb77d800ef2be11d54529b96bc', 'description': 'md5:7f749bbb77d800ef2be11d54529b96bc',
}, },
} }]
class ArteTVMagazineIE(ArteTVPlus7IE): class ArteTVMagazineIE(ArteTVPlus7IE):
@ -390,9 +403,41 @@ class ArteTVEmbedIE(ArteTVPlus7IE):
) )
''' '''
_TESTS = []
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id') video_id = mobj.group('id')
lang = mobj.group('lang') lang = mobj.group('lang')
json_url = mobj.group('json_url') json_url = mobj.group('json_url')
return self._extract_from_json_url(json_url, video_id, lang) return self._extract_from_json_url(json_url, video_id, lang)
class ArteTVPlaylistIE(ArteTVBaseIE):
IE_NAME = 'arte.tv:playlist'
_VALID_URL = r'https?://(?:www\.)?arte\.tv/guide/(?P<lang>fr|de|en|es)/[^#]*#collection/(?P<id>PL-\d+)'
_TESTS = [{
'url': 'http://www.arte.tv/guide/de/plus7/?country=DE#collection/PL-013263/ARTETV',
'info_dict': {
'id': 'PL-013263',
'title': 'Areva & Uramin',
},
'playlist_mincount': 6,
}, {
'url': 'http://www.arte.tv/guide/de/playlists?country=DE#collection/PL-013190/ARTETV',
'only_matching': True,
}]
def _real_extract(self, url):
playlist_id, lang = self._extract_url_info(url)
collection = self._download_json(
'https://api.arte.tv/api/player/v1/collectionData/%s/%s?source=videos'
% (lang, playlist_id), playlist_id)
title = collection.get('title')
description = collection.get('shortDescription') or collection.get('teaserText')
entries = [
self._extract_from_json_url(
video['jsonUrl'], video.get('programId') or playlist_id, lang)
for video in collection['videos'] if video.get('jsonUrl')]
return self.playlist_result(entries, playlist_id, title, description)

View File

@ -56,6 +56,7 @@ from .arte import (
ArteTVDDCIE, ArteTVDDCIE,
ArteTVMagazineIE, ArteTVMagazineIE,
ArteTVEmbedIE, ArteTVEmbedIE,
ArteTVPlaylistIE,
) )
from .atresplayer import AtresPlayerIE from .atresplayer import AtresPlayerIE
from .atttechchannel import ATTTechChannelIE from .atttechchannel import ATTTechChannelIE
@ -638,7 +639,10 @@ from .regiotv import RegioTVIE
from .restudy import RestudyIE from .restudy import RestudyIE
from .reuters import ReutersIE from .reuters import ReutersIE
from .reverbnation import ReverbNationIE from .reverbnation import ReverbNationIE
from .revision3 import Revision3IE from .revision3 import (
Revision3EmbedIE,
Revision3IE,
)
from .rice import RICEIE from .rice import RICEIE
from .ringtv import RingTVIE from .ringtv import RingTVIE
from .ro220 import Ro220IE from .ro220 import Ro220IE
@ -677,6 +681,7 @@ from .screencast import ScreencastIE
from .screencastomatic import ScreencastOMaticIE from .screencastomatic import ScreencastOMaticIE
from .screenjunkies import ScreenJunkiesIE from .screenjunkies import ScreenJunkiesIE
from .screenwavemedia import ScreenwaveMediaIE, TeamFourIE from .screenwavemedia import ScreenwaveMediaIE, TeamFourIE
from .seeker import SeekerIE
from .senateisvp import SenateISVPIE from .senateisvp import SenateISVPIE
from .sendtonews import SendtoNewsIE from .sendtonews import SendtoNewsIE
from .servingsys import ServingSysIE from .servingsys import ServingSysIE

View File

@ -881,18 +881,6 @@ class GenericIE(InfoExtractor):
'title': 'EP3S5 - Bon Appétit - Baqueira Mi Corazon !', 'title': 'EP3S5 - Bon Appétit - Baqueira Mi Corazon !',
} }
}, },
# Kaltura embed
{
'url': 'http://www.monumentalnetwork.com/videos/john-carlson-postgame-2-25-15',
'info_dict': {
'id': '1_eergr3h1',
'ext': 'mp4',
'upload_date': '20150226',
'uploader_id': 'MonumentalSports-Kaltura@perfectsensedigital.com',
'timestamp': int,
'title': 'John Carlson Postgame 2/25/15',
},
},
# Kaltura embed (different embed code) # Kaltura embed (different embed code)
{ {
'url': 'http://www.premierchristianradio.com/Shows/Saturday/Unbelievable/Conference-Videos/Os-Guinness-Is-It-Fools-Talk-Unbelievable-Conference-2014', 'url': 'http://www.premierchristianradio.com/Shows/Saturday/Unbelievable/Conference-Videos/Os-Guinness-Is-It-Fools-Talk-Unbelievable-Conference-2014',
@ -918,6 +906,19 @@ class GenericIE(InfoExtractor):
'uploader_id': 'echojecka', 'uploader_id': 'echojecka',
}, },
}, },
# Kaltura embed with single quotes
{
'url': 'http://fod.infobase.com/p_ViewPlaylist.aspx?AssignmentID=NUN8ZY',
'info_dict': {
'id': '0_izeg5utt',
'ext': 'mp4',
'title': '35871',
'timestamp': 1355743100,
'upload_date': '20121217',
'uploader_id': 'batchUser',
},
'add_ie': ['Kaltura'],
},
# Eagle.Platform embed (generic URL) # Eagle.Platform embed (generic URL)
{ {
'url': 'http://lenta.ru/news/2015/03/06/navalny/', 'url': 'http://lenta.ru/news/2015/03/06/navalny/',
@ -1032,14 +1033,18 @@ class GenericIE(InfoExtractor):
}, },
# UDN embed # UDN embed
{ {
'url': 'http://www.udn.com/news/story/7314/822787', 'url': 'https://video.udn.com/news/300346',
'md5': 'fd2060e988c326991037b9aff9df21a6', 'md5': 'fd2060e988c326991037b9aff9df21a6',
'info_dict': { 'info_dict': {
'id': '300346', 'id': '300346',
'ext': 'mp4', 'ext': 'mp4',
'title': '中一中男師變性 全校師生力挺', 'title': '中一中男師變性 全校師生力挺',
'thumbnail': 're:^https?://.*\.jpg$', 'thumbnail': 're:^https?://.*\.jpg$',
} },
'params': {
# m3u8 download
'skip_download': True,
},
}, },
# Ooyala embed # Ooyala embed
{ {
@ -1903,7 +1908,7 @@ class GenericIE(InfoExtractor):
return self.url_result(mobj.group('url'), 'Zapiks') return self.url_result(mobj.group('url'), 'Zapiks')
# Look for Kaltura embeds # Look for Kaltura embeds
mobj = (re.search(r"(?s)kWidget\.(?:thumb)?[Ee]mbed\(\{.*?'wid'\s*:\s*'_?(?P<partner_id>[^']+)',.*?'entry_?[Ii]d'\s*:\s*'(?P<id>[^']+)',", webpage) or mobj = (re.search(r"(?s)kWidget\.(?:thumb)?[Ee]mbed\(\{.*?(?P<q1>['\"])wid(?P=q1)\s*:\s*(?P<q2>['\"])_?(?P<partner_id>[^'\"]+)(?P=q2),.*?(?P<q3>['\"])entry_?[Ii]d(?P=q3)\s*:\s*(?P<q4>['\"])(?P<id>[^'\"]+)(?P=q4),", webpage) or
re.search(r'(?s)(?P<q1>["\'])(?:https?:)?//cdnapi(?:sec)?\.kaltura\.com/.*?(?:p|partner_id)/(?P<partner_id>\d+).*?(?P=q1).*?entry_?[Ii]d\s*:\s*(?P<q2>["\'])(?P<id>.+?)(?P=q2)', webpage)) re.search(r'(?s)(?P<q1>["\'])(?:https?:)?//cdnapi(?:sec)?\.kaltura\.com/.*?(?:p|partner_id)/(?P<partner_id>\d+).*?(?P=q1).*?entry_?[Ii]d\s*:\s*(?P<q2>["\'])(?P<id>.+?)(?P=q2)', webpage))
if mobj is not None: if mobj is not None:
return self.url_result(smuggle_url( return self.url_result(smuggle_url(

View File

@ -13,8 +13,64 @@ from ..utils import (
) )
class Revision3EmbedIE(InfoExtractor):
IE_NAME = 'revision3:embed'
_VALID_URL = r'(?:revision3:(?:(?P<playlist_type>[^:]+):)?|https?://(?:(?:(?:www|embed)\.)?(?:revision3|animalist)|(?:(?:api|embed)\.)?seekernetwork)\.com/player/embed\?videoId=)(?P<playlist_id>\d+)'
_TEST = {
'url': 'http://api.seekernetwork.com/player/embed?videoId=67558',
'md5': '83bcd157cab89ad7318dd7b8c9cf1306',
'info_dict': {
'id': '67558',
'ext': 'mp4',
'title': 'The Pros & Cons Of Zoos',
'description': 'Zoos are often depicted as a terrible place for animals to live, but is there any truth to this?',
'uploader_id': 'dnews',
'uploader': 'DNews',
}
}
_API_KEY = 'ba9c741bce1b9d8e3defcc22193f3651b8867e62'
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
playlist_id = mobj.group('playlist_id')
playlist_type = mobj.group('playlist_type') or 'video_id'
video_data = self._download_json(
'http://revision3.com/api/getPlaylist.json', playlist_id, query={
'api_key': self._API_KEY,
'codecs': 'h264,vp8,theora',
playlist_type: playlist_id,
})['items'][0]
formats = []
for vcodec, media in video_data['media'].items():
for quality_id, quality in media.items():
if quality_id == 'hls':
formats.extend(self._extract_m3u8_formats(
quality['url'], playlist_id, 'mp4',
'm3u8_native', m3u8_id='hls', fatal=False))
else:
formats.append({
'url': quality['url'],
'format_id': '%s-%s' % (vcodec, quality_id),
'tbr': int_or_none(quality.get('bitrate')),
'vcodec': vcodec,
})
self._sort_formats(formats)
return {
'id': playlist_id,
'title': unescapeHTML(video_data['title']),
'description': unescapeHTML(video_data.get('summary')),
'uploader': video_data.get('show', {}).get('name'),
'uploader_id': video_data.get('show', {}).get('slug'),
'duration': int_or_none(video_data.get('duration')),
'formats': formats,
}
class Revision3IE(InfoExtractor): class Revision3IE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?(?P<domain>(?:revision3|testtube|animalist)\.com)/(?P<id>[^/]+(?:/[^/?#]+)?)' IE_NAME = 'revision'
_VALID_URL = r'https?://(?:www\.)?(?P<domain>(?:revision3|animalist)\.com)/(?P<id>[^/]+(?:/[^/?#]+)?)'
_TESTS = [{ _TESTS = [{
'url': 'http://www.revision3.com/technobuffalo/5-google-predictions-for-2016', 'url': 'http://www.revision3.com/technobuffalo/5-google-predictions-for-2016',
'md5': 'd94a72d85d0a829766de4deb8daaf7df', 'md5': 'd94a72d85d0a829766de4deb8daaf7df',
@ -32,52 +88,14 @@ class Revision3IE(InfoExtractor):
} }
}, { }, {
# Show # Show
'url': 'http://testtube.com/brainstuff', 'url': 'http://revision3.com/variant',
'info_dict': { 'only_matching': True,
'id': '251',
'title': 'BrainStuff',
'description': 'Whether the topic is popcorn or particle physics, you can count on the HowStuffWorks team to explore-and explain-the everyday science in the world around us on BrainStuff.',
},
'playlist_mincount': 93,
}, {
'url': 'https://testtube.com/dnews/5-weird-ways-plants-can-eat-animals?utm_source=FB&utm_medium=DNews&utm_campaign=DNewsSocial',
'info_dict': {
'id': '58227',
'display_id': 'dnews/5-weird-ways-plants-can-eat-animals',
'duration': 275,
'ext': 'webm',
'title': '5 Weird Ways Plants Can Eat Animals',
'description': 'Why have some plants evolved to eat meat?',
'upload_date': '20150120',
'timestamp': 1421763300,
'uploader': 'DNews',
'uploader_id': 'dnews',
},
}, {
'url': 'http://testtube.com/tt-editors-picks/the-israel-palestine-conflict-explained-in-ten-min',
'info_dict': {
'id': '71618',
'ext': 'mp4',
'display_id': 'tt-editors-picks/the-israel-palestine-conflict-explained-in-ten-min',
'title': 'The Israel-Palestine Conflict Explained in Ten Minutes',
'description': 'If you\'d like to learn about the struggle between Israelis and Palestinians, this video is a great place to start',
'uploader': 'Editors\' Picks',
'uploader_id': 'tt-editors-picks',
'timestamp': 1453309200,
'upload_date': '20160120',
},
'add_ie': ['Youtube'],
}, { }, {
# Tag # Tag
'url': 'http://testtube.com/tech-news', 'url': 'http://revision3.com/vr',
'info_dict': { 'only_matching': True,
'id': '21018',
'title': 'tech news',
},
'playlist_mincount': 9,
}] }]
_PAGE_DATA_TEMPLATE = 'http://www.%s/apiProxy/ddn/%s?domain=%s' _PAGE_DATA_TEMPLATE = 'http://www.%s/apiProxy/ddn/%s?domain=%s'
_API_KEY = 'ba9c741bce1b9d8e3defcc22193f3651b8867e62'
def _real_extract(self, url): def _real_extract(self, url):
domain, display_id = re.match(self._VALID_URL, url).groups() domain, display_id = re.match(self._VALID_URL, url).groups()
@ -119,33 +137,9 @@ class Revision3IE(InfoExtractor):
}) })
return info return info
video_data = self._download_json(
'http://revision3.com/api/getPlaylist.json?api_key=%s&codecs=h264,vp8,theora&video_id=%s' % (self._API_KEY, video_id),
video_id)['items'][0]
formats = []
for vcodec, media in video_data['media'].items():
for quality_id, quality in media.items():
if quality_id == 'hls':
formats.extend(self._extract_m3u8_formats(
quality['url'], video_id, 'mp4',
'm3u8_native', m3u8_id='hls', fatal=False))
else:
formats.append({
'url': quality['url'],
'format_id': '%s-%s' % (vcodec, quality_id),
'tbr': int_or_none(quality.get('bitrate')),
'vcodec': vcodec,
})
self._sort_formats(formats)
info.update({ info.update({
'title': unescapeHTML(video_data['title']), '_type': 'url_transparent',
'description': unescapeHTML(video_data.get('summary')), 'url': 'revision3:%s' % video_id,
'uploader': video_data.get('show', {}).get('name'),
'uploader_id': video_data.get('show', {}).get('slug'),
'duration': int_or_none(video_data.get('duration')),
'formats': formats,
}) })
return info return info
else: else:

View File

@ -0,0 +1,57 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
class SeekerIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?seeker\.com/(?P<display_id>.*)-(?P<article_id>\d+)\.html'
_TESTS = [{
# player.loadRevision3Item
'url': 'http://www.seeker.com/should-trump-be-required-to-release-his-tax-returns-1833805621.html',
'md5': '30c1dc4030cc715cf05b423d0947ac18',
'info_dict': {
'id': '76243',
'ext': 'webm',
'title': 'Should Trump Be Required To Release His Tax Returns?',
'description': 'Donald Trump has been secretive about his "big," "beautiful" tax returns. So what can we learn if he decides to release them?',
'uploader': 'Seeker Daily',
'uploader_id': 'seekerdaily',
}
}, {
'url': 'http://www.seeker.com/changes-expected-at-zoos-following-recent-gorilla-lion-shootings-1834116536.html',
'playlist': [
{
'md5': '83bcd157cab89ad7318dd7b8c9cf1306',
'info_dict': {
'id': '67558',
'ext': 'mp4',
'title': 'The Pros & Cons Of Zoos',
'description': 'Zoos are often depicted as a terrible place for animals to live, but is there any truth to this?',
'uploader': 'DNews',
'uploader_id': 'dnews',
},
}
],
'info_dict': {
'id': '1834116536',
'title': 'After Gorilla Killing, Changes Ahead for Zoos',
'description': 'The largest association of zoos and others are hoping to learn from recent incidents that led to the shooting deaths of a gorilla and two lions.',
},
}]
def _real_extract(self, url):
display_id, article_id = re.match(self._VALID_URL, url).groups()
webpage = self._download_webpage(url, display_id)
mobj = re.search(r"player\.loadRevision3Item\('([^']+)'\s*,\s*(\d+)\);", webpage)
if mobj:
playlist_type, playlist_id = mobj.groups()
return self.url_result(
'revision3:%s:%s' % (playlist_type, playlist_id), 'Revision3Embed', playlist_id)
else:
entries = [self.url_result('revision3:video_id:%s' % video_id, 'Revision3Embed', video_id) for video_id in re.findall(
r'<iframe[^>]+src=[\'"](?:https?:)?//api\.seekernetwork\.com/player/embed\?videoId=(\d+)', webpage)]
return self.playlist_result(
entries, article_id, self._og_search_title(webpage), self._og_search_description(webpage))

View File

@ -96,20 +96,18 @@ class SpankwireIE(InfoExtractor):
formats = [] formats = []
for height, video_url in zip(heights, video_urls): for height, video_url in zip(heights, video_urls):
path = compat_urllib_parse_urlparse(video_url).path path = compat_urllib_parse_urlparse(video_url).path
_, quality = path.split('/')[4].split('_')[:2] m = re.search(r'/(?P<height>\d+)[pP]_(?P<tbr>\d+)[kK]', path)
f = { if m:
'url': video_url, tbr = int(m.group('tbr'))
'height': height, height = int(m.group('height'))
}
tbr = self._search_regex(r'^(\d+)[Kk]$', quality, 'tbr', default=None)
if tbr:
f.update({
'tbr': int(tbr),
'format_id': '%dp' % height,
})
else: else:
f['format_id'] = quality tbr = None
formats.append(f) formats.append({
'url': video_url,
'format_id': '%dp' % height,
'height': height,
'tbr': tbr,
})
self._sort_formats(formats) self._sort_formats(formats)
age_limit = self._rta_search(webpage) age_limit = self._rta_search(webpage)

View File

@ -48,6 +48,6 @@ class TF1IE(InfoExtractor):
video_id = self._match_id(url) video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
wat_id = self._html_search_regex( wat_id = self._html_search_regex(
r'(["\'])(?:https?:)?//www\.wat\.tv/embedframe/.*?(?P<id>\d{8})(?:.*?)?\1', r'(["\'])(?:https?:)?//www\.wat\.tv/embedframe/.*?(?P<id>\d{8}).*?\1',
webpage, 'wat id', group='id') webpage, 'wat id', group='id')
return self.url_result('wat:%s' % wat_id, 'Wat') return self.url_result('wat:%s' % wat_id, 'Wat')

View File

@ -2,10 +2,13 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import json import json
import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
determine_ext,
int_or_none,
js_to_json, js_to_json,
ExtractorError,
) )
from ..compat import compat_urlparse from ..compat import compat_urlparse
@ -16,13 +19,16 @@ class UDNEmbedIE(InfoExtractor):
_VALID_URL = r'https?:' + _PROTOCOL_RELATIVE_VALID_URL _VALID_URL = r'https?:' + _PROTOCOL_RELATIVE_VALID_URL
_TESTS = [{ _TESTS = [{
'url': 'http://video.udn.com/embed/news/300040', 'url': 'http://video.udn.com/embed/news/300040',
'md5': 'de06b4c90b042c128395a88f0384817e',
'info_dict': { 'info_dict': {
'id': '300040', 'id': '300040',
'ext': 'mp4', 'ext': 'mp4',
'title': '生物老師男變女 全校挺"做自己"', 'title': '生物老師男變女 全校挺"做自己"',
'thumbnail': 're:^https?://.*\.jpg$', 'thumbnail': 're:^https?://.*\.jpg$',
} },
'params': {
# m3u8 download
'skip_download': True,
},
}, { }, {
'url': 'https://video.udn.com/embed/news/300040', 'url': 'https://video.udn.com/embed/news/300040',
'only_matching': True, 'only_matching': True,
@ -38,39 +44,53 @@ class UDNEmbedIE(InfoExtractor):
page = self._download_webpage(url, video_id) page = self._download_webpage(url, video_id)
options = json.loads(js_to_json(self._html_search_regex( options = json.loads(js_to_json(self._html_search_regex(
r'var options\s*=\s*([^;]+);', page, 'video urls dictionary'))) r'var\s+options\s*=\s*([^;]+);', page, 'video urls dictionary')))
video_urls = options['video'] video_urls = options['video']
if video_urls.get('youtube'): if video_urls.get('youtube'):
return self.url_result(video_urls.get('youtube'), 'Youtube') return self.url_result(video_urls.get('youtube'), 'Youtube')
try: formats = []
del video_urls['youtube'] for video_type, api_url in video_urls.items():
except KeyError: if not api_url:
pass continue
formats = [{ video_url = self._download_webpage(
'url': self._download_webpage(
compat_urlparse.urljoin(url, api_url), video_id, compat_urlparse.urljoin(url, api_url), video_id,
'retrieve url for %s video' % video_type), note='retrieve url for %s video' % video_type)
'format_id': video_type,
'preference': 0 if video_type == 'mp4' else -1,
} for video_type, api_url in video_urls.items() if api_url]
if not formats: ext = determine_ext(video_url)
raise ExtractorError('No videos found', expected=True) if ext == 'm3u8':
formats.extend(self._extract_m3u8_formats(
video_url, video_id, ext='mp4', m3u8_id='hls'))
elif ext == 'f4m':
formats.extend(self._extract_f4m_formats(
video_url, video_id, f4m_id='hds'))
else:
mobj = re.search(r'_(?P<height>\d+)p_(?P<tbr>\d+).mp4', video_url)
a_format = {
'url': video_url,
# video_type may be 'mp4', which confuses YoutubeDL
'format_id': 'http-' + video_type,
}
if mobj:
a_format.update({
'height': int_or_none(mobj.group('height')),
'tbr': int_or_none(mobj.group('tbr')),
})
formats.append(a_format)
self._sort_formats(formats) self._sort_formats(formats)
thumbnail = None thumbnails = [{
'url': img_url,
if options.get('gallery') and len(options['gallery']): 'id': img_type,
thumbnail = options['gallery'][0].get('original') } for img_type, img_url in options.get('gallery', [{}])[0].items() if img_url]
return { return {
'id': video_id, 'id': video_id,
'formats': formats, 'formats': formats,
'title': options['title'], 'title': options['title'],
'thumbnail': thumbnail 'thumbnails': thumbnails,
} }

View File

@ -141,6 +141,10 @@ class ViewLiftIE(ViewLiftBaseIE):
}, { }, {
'url': 'http://www.kesari.tv/news/video/1461919076414', 'url': 'http://www.kesari.tv/news/video/1461919076414',
'only_matching': True, 'only_matching': True,
}, {
# Was once Kaltura embed
'url': 'https://www.monumentalsportsnetwork.com/videos/john-carlson-postgame-2-25-15',
'only_matching': True,
}] }]
def _real_extract(self, url): def _real_extract(self, url):

View File

@ -395,8 +395,8 @@ def parseOpts(overrideArguments=None):
downloader = optparse.OptionGroup(parser, 'Download Options') downloader = optparse.OptionGroup(parser, 'Download Options')
downloader.add_option( downloader.add_option(
'-r', '--rate-limit', '-r', '--limit-rate', '--rate-limit',
dest='ratelimit', metavar='LIMIT', dest='ratelimit', metavar='RATE',
help='Maximum download rate in bytes per second (e.g. 50K or 4.2M)') help='Maximum download rate in bytes per second (e.g. 50K or 4.2M)')
downloader.add_option( downloader.add_option(
'-R', '--retries', '-R', '--retries',

View File

@ -1,3 +1,3 @@
from __future__ import unicode_literals from __future__ import unicode_literals
__version__ = '2016.05.30.2' __version__ = '2016.06.02'