Merge branch 'master' of https://github.com/rg3/youtube-dl
This commit is contained in:
commit
faaac9b31e
6
.github/ISSUE_TEMPLATE.md
vendored
6
.github/ISSUE_TEMPLATE.md
vendored
@ -6,8 +6,8 @@
|
||||
|
||||
---
|
||||
|
||||
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.04.24*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
|
||||
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.04.24**
|
||||
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.05.01*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
|
||||
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.05.01**
|
||||
|
||||
### Before submitting an *issue* make sure you have:
|
||||
- [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
|
||||
@ -35,7 +35,7 @@ $ youtube-dl -v <your command line>
|
||||
[debug] User config: []
|
||||
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
|
||||
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
|
||||
[debug] youtube-dl version 2016.04.24
|
||||
[debug] youtube-dl version 2016.05.01
|
||||
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
|
||||
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
|
||||
[debug] Proxy map: {}
|
||||
|
1
AUTHORS
1
AUTHORS
@ -168,3 +168,4 @@ José Joaquín Atria
|
||||
Viťas Strádal
|
||||
Kagami Hiiragi
|
||||
Philip Huppert
|
||||
blahgeek
|
||||
|
@ -338,7 +338,6 @@
|
||||
- **mailru**: Видео@Mail.Ru
|
||||
- **MakersChannel**
|
||||
- **MakerTV**
|
||||
- **Malemotion**
|
||||
- **MatchTV**
|
||||
- **MDR**: MDR.DE and KiKA
|
||||
- **media.ccc.de**
|
||||
@ -375,8 +374,8 @@
|
||||
- **mtvservices:embedded**
|
||||
- **MuenchenTV**: münchen.tv
|
||||
- **MusicPlayOn**
|
||||
- **muzu.tv**
|
||||
- **Mwave**
|
||||
- **MwaveMeetGreet**
|
||||
- **MySpace**
|
||||
- **MySpace:album**
|
||||
- **MySpass**
|
||||
@ -554,7 +553,6 @@
|
||||
- **SenateISVP**
|
||||
- **ServingSys**
|
||||
- **Sexu**
|
||||
- **SexyKarma**: Sexy Karma and Watch Indian Porn
|
||||
- **Shahid**
|
||||
- **Shared**: shared.sx and vivo.sx
|
||||
- **ShareSix**
|
||||
@ -567,8 +565,6 @@
|
||||
- **smotri:broadcast**: Smotri.com broadcasts
|
||||
- **smotri:community**: Smotri.com community videos
|
||||
- **smotri:user**: Smotri.com user videos
|
||||
- **SnagFilms**
|
||||
- **SnagFilmsEmbed**
|
||||
- **Snotr**
|
||||
- **Sohu**
|
||||
- **soundcloud**
|
||||
@ -610,6 +606,7 @@
|
||||
- **Syfy**
|
||||
- **SztvHu**
|
||||
- **Tagesschau**
|
||||
- **tagesschau:player**
|
||||
- **Tapely**
|
||||
- **Tass**
|
||||
- **TDSLifeway**
|
||||
@ -725,6 +722,8 @@
|
||||
- **Vidzi**
|
||||
- **vier**
|
||||
- **vier:videos**
|
||||
- **ViewLift**
|
||||
- **ViewLiftEmbed**
|
||||
- **Viewster**
|
||||
- **Viidea**
|
||||
- **viki**
|
||||
@ -756,6 +755,7 @@
|
||||
- **Walla**
|
||||
- **WashingtonPost**
|
||||
- **wat.tv**
|
||||
- **WatchIndianPorn**: Watch Indian Porn
|
||||
- **WDR**
|
||||
- **wdr:mobile**
|
||||
- **WDRMaus**: Sendung mit der Maus
|
||||
@ -775,6 +775,10 @@
|
||||
- **XFileShare**: XFileShare based sites: GorillaVid.in, daclips.in, movpod.in, fastvideo.in, realvid.net, filehoot.com and vidto.me
|
||||
- **XHamster**
|
||||
- **XHamsterEmbed**
|
||||
- **xiami:album**: 虾米音乐 - 专辑
|
||||
- **xiami:artist**: 虾米音乐 - 歌手
|
||||
- **xiami:collection**: 虾米音乐 - 精选集
|
||||
- **xiami:song**: 虾米音乐
|
||||
- **XMinus**
|
||||
- **XNXX**
|
||||
- **Xstream**
|
||||
|
@ -1,13 +1,9 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
qualities,
|
||||
unified_strdate,
|
||||
parse_iso8601,
|
||||
)
|
||||
|
||||
|
||||
@ -19,14 +15,14 @@ class CCCIE(InfoExtractor):
|
||||
'url': 'https://media.ccc.de/v/30C3_-_5443_-_en_-_saal_g_-_201312281830_-_introduction_to_processor_design_-_byterazor#video',
|
||||
'md5': '3a1eda8f3a29515d27f5adb967d7e740',
|
||||
'info_dict': {
|
||||
'id': '30C3_-_5443_-_en_-_saal_g_-_201312281830_-_introduction_to_processor_design_-_byterazor',
|
||||
'id': '1839',
|
||||
'ext': 'mp4',
|
||||
'title': 'Introduction to Processor Design',
|
||||
'description': 'md5:80be298773966f66d56cb11260b879af',
|
||||
'description': 'md5:df55f6d073d4ceae55aae6f2fd98a0ac',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'view_count': int,
|
||||
'upload_date': '20131228',
|
||||
'duration': 3660,
|
||||
'timestamp': 1388188800,
|
||||
'duration': 3710,
|
||||
}
|
||||
}, {
|
||||
'url': 'https://media.ccc.de/v/32c3-7368-shopshifting#download',
|
||||
@ -34,79 +30,48 @@ class CCCIE(InfoExtractor):
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
event_id = self._search_regex("data-id='(\d+)'", webpage, 'event id')
|
||||
event_data = self._download_json('https://media.ccc.de/public/events/%s' % event_id, event_id)
|
||||
|
||||
if self._downloader.params.get('prefer_free_formats'):
|
||||
preference = qualities(['mp3', 'opus', 'mp4-lq', 'webm-lq', 'h264-sd', 'mp4-sd', 'webm-sd', 'mp4', 'webm', 'mp4-hd', 'h264-hd', 'webm-hd'])
|
||||
else:
|
||||
preference = qualities(['opus', 'mp3', 'webm-lq', 'mp4-lq', 'webm-sd', 'h264-sd', 'mp4-sd', 'webm', 'mp4', 'webm-hd', 'mp4-hd', 'h264-hd'])
|
||||
|
||||
title = self._html_search_regex(
|
||||
r'(?s)<h1>(.*?)</h1>', webpage, 'title')
|
||||
description = self._html_search_regex(
|
||||
r'(?s)<h3>About</h3>(.+?)<h3>',
|
||||
webpage, 'description', fatal=False)
|
||||
upload_date = unified_strdate(self._html_search_regex(
|
||||
r"(?s)<span[^>]+class='[^']*fa-calendar-o'[^>]*>(.+?)</span>",
|
||||
webpage, 'upload date', fatal=False))
|
||||
view_count = int_or_none(self._html_search_regex(
|
||||
r"(?s)<span class='[^']*fa-eye'></span>(.*?)</li>",
|
||||
webpage, 'view count', fatal=False))
|
||||
duration = parse_duration(self._html_search_regex(
|
||||
r'(?s)<span[^>]+class=(["\']).*?fa-clock-o.*?\1[^>]*></span>(?P<duration>.+?)</li',
|
||||
webpage, 'duration', fatal=False, group='duration'))
|
||||
|
||||
matches = re.finditer(r'''(?xs)
|
||||
<(?:span|div)\s+class='label\s+filetype'>(?P<format>[^<]*)</(?:span|div)>\s*
|
||||
<(?:span|div)\s+class='label\s+filetype'>(?P<lang>[^<]*)</(?:span|div)>\s*
|
||||
<a\s+download\s+href='(?P<http_url>[^']+)'>\s*
|
||||
(?:
|
||||
.*?
|
||||
<a\s+(?:download\s+)?href='(?P<torrent_url>[^']+\.torrent)'
|
||||
)?''', webpage)
|
||||
formats = []
|
||||
for m in matches:
|
||||
format = m.group('format')
|
||||
format_id = self._search_regex(
|
||||
r'.*/([a-z0-9_-]+)/[^/]*$',
|
||||
m.group('http_url'), 'format id', default=None)
|
||||
if format_id:
|
||||
format_id = m.group('lang') + '-' + format_id
|
||||
vcodec = 'h264' if 'h264' in format_id else (
|
||||
'none' if format_id in ('mp3', 'opus') else None
|
||||
for recording in event_data.get('recordings', []):
|
||||
recording_url = recording.get('recording_url')
|
||||
if not recording_url:
|
||||
continue
|
||||
language = recording.get('language')
|
||||
folder = recording.get('folder')
|
||||
format_id = None
|
||||
if language:
|
||||
format_id = language
|
||||
if folder:
|
||||
if language:
|
||||
format_id += '-' + folder
|
||||
else:
|
||||
format_id = folder
|
||||
vcodec = 'h264' if 'h264' in folder else (
|
||||
'none' if folder in ('mp3', 'opus') else None
|
||||
)
|
||||
formats.append({
|
||||
'format_id': format_id,
|
||||
'format': format,
|
||||
'language': m.group('lang'),
|
||||
'url': m.group('http_url'),
|
||||
'url': recording_url,
|
||||
'width': int_or_none(recording.get('width')),
|
||||
'height': int_or_none(recording.get('height')),
|
||||
'filesize': int_or_none(recording.get('size'), invscale=1024 * 1024),
|
||||
'language': language,
|
||||
'vcodec': vcodec,
|
||||
'preference': preference(format_id),
|
||||
})
|
||||
|
||||
if m.group('torrent_url'):
|
||||
formats.append({
|
||||
'format_id': 'torrent-%s' % (format if format_id is None else format_id),
|
||||
'format': '%s (torrent)' % format,
|
||||
'proto': 'torrent',
|
||||
'format_note': '(unsupported; will just download the .torrent file)',
|
||||
'vcodec': vcodec,
|
||||
'preference': -100 + preference(format_id),
|
||||
'url': m.group('torrent_url'),
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
thumbnail = self._html_search_regex(
|
||||
r"<video.*?poster='([^']+)'", webpage, 'thumbnail', fatal=False)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'view_count': view_count,
|
||||
'upload_date': upload_date,
|
||||
'duration': duration,
|
||||
'id': event_id,
|
||||
'display_id': display_id,
|
||||
'title': event_data['title'],
|
||||
'description': event_data.get('description'),
|
||||
'thumbnail': event_data.get('thumb_url'),
|
||||
'timestamp': parse_iso8601(event_data.get('date')),
|
||||
'duration': int_or_none(event_data.get('length')),
|
||||
'tags': event_data.get('tags'),
|
||||
'formats': formats,
|
||||
}
|
||||
|
@ -1142,7 +1142,7 @@ class InfoExtractor(object):
|
||||
# Bandwidth of live streams may differ over time thus making
|
||||
# format_id unpredictable. So it's better to keep provided
|
||||
# format_id intact.
|
||||
if last_media_name and not live:
|
||||
if not live:
|
||||
format_id.append(last_media_name if last_media_name else '%d' % (tbr if tbr else len(formats)))
|
||||
f = {
|
||||
'format_id': '-'.join(format_id),
|
||||
|
@ -307,14 +307,17 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
|
||||
'video_uploader', fatal=False)
|
||||
|
||||
available_fmts = []
|
||||
for a, fmt in re.findall(r'(<a[^>]+token="showmedia\.([0-9]{3,4})p"[^>]+>.*?</a>)', webpage):
|
||||
for a, fmt in re.findall(r'(<a[^>]+token=["\']showmedia\.([0-9]{3,4})p["\'][^>]+>)', webpage):
|
||||
attrs = extract_attributes(a)
|
||||
href = attrs.get('href')
|
||||
if href and '/freetrial' in href:
|
||||
continue
|
||||
available_fmts.append(fmt)
|
||||
if not available_fmts:
|
||||
available_fmts = re.findall(r'token="showmedia\.([0-9]{3,4})p"', webpage)
|
||||
for p in (r'token=["\']showmedia\.([0-9]{3,4})p"', r'showmedia\.([0-9]{3,4})p'):
|
||||
available_fmts = re.findall(p, webpage)
|
||||
if available_fmts:
|
||||
break
|
||||
video_encode_ids = []
|
||||
formats = []
|
||||
for fmt in available_fmts:
|
||||
@ -364,6 +367,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
|
||||
'ext': 'flv',
|
||||
})
|
||||
formats.append(format_info)
|
||||
self._sort_formats(formats)
|
||||
|
||||
metadata = self._download_xml(
|
||||
'http://www.crunchyroll.com/xml', video_id,
|
||||
|
@ -12,39 +12,46 @@ class DFBIE(InfoExtractor):
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://tv.dfb.de/video/u-19-em-stimmen-zum-spiel-gegen-russland/11633/',
|
||||
# The md5 is different each time
|
||||
'md5': 'ac0f98a52a330f700b4b3034ad240649',
|
||||
'info_dict': {
|
||||
'id': '11633',
|
||||
'display_id': 'u-19-em-stimmen-zum-spiel-gegen-russland',
|
||||
'ext': 'flv',
|
||||
'ext': 'mp4',
|
||||
'title': 'U 19-EM: Stimmen zum Spiel gegen Russland',
|
||||
'upload_date': '20150714',
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
display_id = mobj.group('display_id')
|
||||
display_id, video_id = re.match(self._VALID_URL, url).groups()
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
player_info = self._download_xml(
|
||||
'http://tv.dfb.de/server/hd_video.php?play=%s' % video_id,
|
||||
display_id)
|
||||
video_info = player_info.find('video')
|
||||
stream_access_url = self._proto_relative_url(video_info.find('url').text.strip())
|
||||
|
||||
f4m_info = self._download_xml(
|
||||
self._proto_relative_url(video_info.find('url').text.strip()), display_id)
|
||||
token_el = f4m_info.find('token')
|
||||
manifest_url = token_el.attrib['url'] + '?' + 'hdnea=' + token_el.attrib['auth'] + '&hdcore=3.2.0'
|
||||
formats = self._extract_f4m_formats(manifest_url, display_id)
|
||||
formats = []
|
||||
# see http://tv.dfb.de/player/js/ajax.js for the method to extract m3u8 formats
|
||||
for sa_url in (stream_access_url, stream_access_url + '&area=&format=iphone'):
|
||||
stream_access_info = self._download_xml(sa_url, display_id)
|
||||
token_el = stream_access_info.find('token')
|
||||
manifest_url = token_el.attrib['url'] + '?' + 'hdnea=' + token_el.attrib['auth']
|
||||
if '.f4m' in manifest_url:
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
manifest_url + '&hdcore=3.2.0',
|
||||
display_id, f4m_id='hds', fatal=False))
|
||||
else:
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
manifest_url, display_id, 'mp4',
|
||||
'm3u8_native', m3u8_id='hls', fatal=False))
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'title': video_info.find('title').text,
|
||||
'thumbnail': self._og_search_thumbnail(webpage),
|
||||
'thumbnail': 'http://tv.dfb.de/images/%s_640x360.jpg' % video_id,
|
||||
'upload_date': unified_strdate(video_info.find('time_date').text),
|
||||
'formats': formats,
|
||||
}
|
||||
|
@ -33,6 +33,7 @@ class DiscoveryIE(InfoExtractor):
|
||||
'duration': 156,
|
||||
'timestamp': 1302032462,
|
||||
'upload_date': '20110405',
|
||||
'uploader_id': '103207',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True, # requires ffmpeg
|
||||
@ -54,7 +55,11 @@ class DiscoveryIE(InfoExtractor):
|
||||
'upload_date': '20140725',
|
||||
'timestamp': 1406246400,
|
||||
'duration': 116,
|
||||
'uploader_id': '103207',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True, # requires ffmpeg
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@ -66,13 +71,19 @@ class DiscoveryIE(InfoExtractor):
|
||||
entries = []
|
||||
|
||||
for idx, video_info in enumerate(info['playlist']):
|
||||
formats = self._extract_m3u8_formats(
|
||||
video_info['src'], display_id, 'mp4', 'm3u8_native', m3u8_id='hls',
|
||||
note='Download m3u8 information for video %d' % (idx + 1))
|
||||
self._sort_formats(formats)
|
||||
subtitles = {}
|
||||
caption_url = video_info.get('captionsUrl')
|
||||
if caption_url:
|
||||
subtitles = {
|
||||
'en': [{
|
||||
'url': caption_url,
|
||||
}]
|
||||
}
|
||||
|
||||
entries.append({
|
||||
'_type': 'url_transparent',
|
||||
'url': 'http://players.brightcove.net/103207/default_default/index.html?videoId=ref:%s' % video_info['referenceId'],
|
||||
'id': compat_str(video_info['id']),
|
||||
'formats': formats,
|
||||
'title': video_info['title'],
|
||||
'description': video_info.get('description'),
|
||||
'duration': parse_duration(video_info.get('video_length')),
|
||||
@ -80,6 +91,7 @@ class DiscoveryIE(InfoExtractor):
|
||||
'thumbnail': video_info.get('thumbnailURL'),
|
||||
'alt_title': video_info.get('secondary_title'),
|
||||
'timestamp': parse_iso8601(video_info.get('publishedDate')),
|
||||
'subtitles': subtitles,
|
||||
})
|
||||
|
||||
return self.playlist_result(entries, display_id, video_title)
|
||||
|
@ -724,7 +724,10 @@ from .svt import (
|
||||
from .swrmediathek import SWRMediathekIE
|
||||
from .syfy import SyfyIE
|
||||
from .sztvhu import SztvHuIE
|
||||
from .tagesschau import TagesschauIE
|
||||
from .tagesschau import (
|
||||
TagesschauPlayerIE,
|
||||
TagesschauIE,
|
||||
)
|
||||
from .tapely import TapelyIE
|
||||
from .tass import TassIE
|
||||
from .tdslifeway import TDSLifewayIE
|
||||
@ -846,7 +849,10 @@ from .veehd import VeeHDIE
|
||||
from .veoh import VeohIE
|
||||
from .vessel import VesselIE
|
||||
from .vesti import VestiIE
|
||||
from .vevo import VevoIE
|
||||
from .vevo import (
|
||||
VevoIE,
|
||||
VevoPlaylistIE,
|
||||
)
|
||||
from .vgtv import (
|
||||
BTArticleIE,
|
||||
BTVestlendingenIE,
|
||||
@ -941,6 +947,12 @@ from .xhamster import (
|
||||
XHamsterIE,
|
||||
XHamsterEmbedIE,
|
||||
)
|
||||
from .xiami import (
|
||||
XiamiSongIE,
|
||||
XiamiAlbumIE,
|
||||
XiamiArtistIE,
|
||||
XiamiCollectionIE
|
||||
)
|
||||
from .xminus import XMinusIE
|
||||
from .xnxx import XNXXIE
|
||||
from .xstream import XstreamIE
|
||||
|
@ -2,6 +2,10 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_HTTPError,
|
||||
compat_urllib_parse_unquote_plus,
|
||||
)
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
determine_ext,
|
||||
@ -27,6 +31,7 @@ class FunimationIE(InfoExtractor):
|
||||
'description': 'md5:1769f43cd5fc130ace8fd87232207892',
|
||||
'thumbnail': 're:https?://.*\.jpg',
|
||||
},
|
||||
'skip': 'Access without user interaction is forbidden by CloudFlare, and video removed',
|
||||
}, {
|
||||
'url': 'http://www.funimation.com/shows/hacksign/videos/official/role-play',
|
||||
'info_dict': {
|
||||
@ -37,6 +42,7 @@ class FunimationIE(InfoExtractor):
|
||||
'description': 'md5:b602bdc15eef4c9bbb201bb6e6a4a2dd',
|
||||
'thumbnail': 're:https?://.*\.jpg',
|
||||
},
|
||||
'skip': 'Access without user interaction is forbidden by CloudFlare',
|
||||
}, {
|
||||
'url': 'http://www.funimation.com/shows/attack-on-titan-junior-high/videos/promotional/broadcast-dub-preview',
|
||||
'info_dict': {
|
||||
@ -47,8 +53,36 @@ class FunimationIE(InfoExtractor):
|
||||
'description': 'md5:f8ec49c0aff702a7832cd81b8a44f803',
|
||||
'thumbnail': 're:https?://.*\.(?:jpg|png)',
|
||||
},
|
||||
'skip': 'Access without user interaction is forbidden by CloudFlare',
|
||||
}]
|
||||
|
||||
_LOGIN_URL = 'http://www.funimation.com/login'
|
||||
|
||||
def _download_webpage(self, *args, **kwargs):
|
||||
try:
|
||||
return super(FunimationIE, self)._download_webpage(*args, **kwargs)
|
||||
except ExtractorError as ee:
|
||||
if isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 403:
|
||||
response = ee.cause.read()
|
||||
if b'>Please complete the security check to access<' in response:
|
||||
raise ExtractorError(
|
||||
'Access to funimation.com is blocked by CloudFlare. '
|
||||
'Please browse to http://www.funimation.com/, solve '
|
||||
'the reCAPTCHA, export browser cookies to a text file,'
|
||||
' and then try again with --cookies YOUR_COOKIE_FILE.',
|
||||
expected=True)
|
||||
raise
|
||||
|
||||
def _extract_cloudflare_session_ua(self, url):
|
||||
ci_session_cookie = self._get_cookies(url).get('ci_session')
|
||||
if ci_session_cookie:
|
||||
ci_session = compat_urllib_parse_unquote_plus(ci_session_cookie.value)
|
||||
# ci_session is a string serialized by PHP function serialize()
|
||||
# This case is simple enough to use regular expressions only
|
||||
return self._search_regex(
|
||||
r'"user_agent";s:\d+:"([^"]+)"', ci_session, 'user agent',
|
||||
default=None)
|
||||
|
||||
def _login(self):
|
||||
(username, password) = self._get_login_info()
|
||||
if username is None:
|
||||
@ -57,8 +91,11 @@ class FunimationIE(InfoExtractor):
|
||||
'email_field': username,
|
||||
'password_field': password,
|
||||
})
|
||||
login_request = sanitized_Request('http://www.funimation.com/login', data, headers={
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 5.2; WOW64; rv:42.0) Gecko/20100101 Firefox/42.0',
|
||||
user_agent = self._extract_cloudflare_session_ua(self._LOGIN_URL)
|
||||
if not user_agent:
|
||||
user_agent = 'Mozilla/5.0 (Windows NT 5.2; WOW64; rv:42.0) Gecko/20100101 Firefox/42.0'
|
||||
login_request = sanitized_Request(self._LOGIN_URL, data, headers={
|
||||
'User-Agent': user_agent,
|
||||
'Content-Type': 'application/x-www-form-urlencoded'
|
||||
})
|
||||
login_page = self._download_webpage(
|
||||
@ -103,11 +140,16 @@ class FunimationIE(InfoExtractor):
|
||||
('mobile', 'Mozilla/5.0 (Linux; Android 4.4.2; Nexus 4 Build/KOT49H) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/34.0.1847.114 Mobile Safari/537.36'),
|
||||
)
|
||||
|
||||
user_agent = self._extract_cloudflare_session_ua(url)
|
||||
if user_agent:
|
||||
USER_AGENTS = ((None, user_agent),)
|
||||
|
||||
for kind, user_agent in USER_AGENTS:
|
||||
request = sanitized_Request(url)
|
||||
request.add_header('User-Agent', user_agent)
|
||||
webpage = self._download_webpage(
|
||||
request, display_id, 'Downloading %s webpage' % kind)
|
||||
request, display_id,
|
||||
'Downloading %s webpage' % kind if kind else 'Downloading webpage')
|
||||
|
||||
playlist = self._parse_json(
|
||||
self._search_regex(
|
||||
|
@ -196,7 +196,7 @@ class PBSIE(InfoExtractor):
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://www.pbs.org/tpt/constitution-usa-peter-sagal/watch/a-more-perfect-union/',
|
||||
'md5': 'ce1888486f0908d555a8093cac9a7362',
|
||||
'md5': '173dc391afd361fa72eab5d3d918968d',
|
||||
'info_dict': {
|
||||
'id': '2365006249',
|
||||
'ext': 'mp4',
|
||||
@ -204,13 +204,10 @@ class PBSIE(InfoExtractor):
|
||||
'description': 'md5:36f341ae62e251b8f5bd2b754b95a071',
|
||||
'duration': 3190,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True, # requires ffmpeg
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'http://www.pbs.org/wgbh/pages/frontline/losing-iraq/',
|
||||
'md5': '143c98aa54a346738a3d78f54c925321',
|
||||
'md5': '6f722cb3c3982186d34b0f13374499c7',
|
||||
'info_dict': {
|
||||
'id': '2365297690',
|
||||
'ext': 'mp4',
|
||||
@ -218,9 +215,6 @@ class PBSIE(InfoExtractor):
|
||||
'description': 'md5:4d3eaa01f94e61b3e73704735f1196d9',
|
||||
'duration': 5050,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True, # requires ffmpeg
|
||||
}
|
||||
},
|
||||
{
|
||||
'url': 'http://www.pbs.org/newshour/bb/education-jan-june12-cyberschools_02-23/',
|
||||
@ -244,9 +238,6 @@ class PBSIE(InfoExtractor):
|
||||
'duration': 6559,
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True, # requires ffmpeg
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'http://www.pbs.org/wgbh/nova/earth/killer-typhoon.html',
|
||||
@ -262,9 +253,6 @@ class PBSIE(InfoExtractor):
|
||||
'upload_date': '20140122',
|
||||
'age_limit': 10,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True, # requires ffmpeg
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'http://www.pbs.org/wgbh/pages/frontline/united-states-of-secrets/',
|
||||
@ -290,6 +278,7 @@ class PBSIE(InfoExtractor):
|
||||
},
|
||||
{
|
||||
'url': 'http://www.pbs.org/video/2365245528/',
|
||||
'md5': '115223d41bd55cda8ae5cd5ed4e11497',
|
||||
'info_dict': {
|
||||
'id': '2365245528',
|
||||
'display_id': '2365245528',
|
||||
@ -299,15 +288,13 @@ class PBSIE(InfoExtractor):
|
||||
'duration': 6851,
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True, # requires ffmpeg
|
||||
},
|
||||
},
|
||||
{
|
||||
# Video embedded in iframe containing angle brackets as attribute's value (e.g.
|
||||
# "<iframe style='position: absolute;<br />\ntop: 0; left: 0;' ...", see
|
||||
# https://github.com/rg3/youtube-dl/issues/7059)
|
||||
'url': 'http://www.pbs.org/food/features/a-chefs-life-season-3-episode-5-prickly-business/',
|
||||
'md5': '84ced42850d78f1d4650297356e95e6f',
|
||||
'info_dict': {
|
||||
'id': '2365546844',
|
||||
'display_id': 'a-chefs-life-season-3-episode-5-prickly-business',
|
||||
@ -317,9 +304,6 @@ class PBSIE(InfoExtractor):
|
||||
'duration': 1480,
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True, # requires ffmpeg
|
||||
},
|
||||
},
|
||||
{
|
||||
# Frontline video embedded via flp2012.js
|
||||
@ -340,6 +324,7 @@ class PBSIE(InfoExtractor):
|
||||
{
|
||||
# Serves hd only via wigget/partnerplayer page
|
||||
'url': 'http://www.pbs.org/video/2365641075/',
|
||||
'md5': 'acfd4c400b48149a44861cb16dd305cf',
|
||||
'info_dict': {
|
||||
'id': '2365641075',
|
||||
'ext': 'mp4',
|
||||
@ -348,9 +333,6 @@ class PBSIE(InfoExtractor):
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'formats': 'mincount:8',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True, # requires ffmpeg
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'http://player.pbs.org/widget/partnerplayer/2365297708/?start=0&end=0&chapterbar=false&endscreen=false&topbar=true',
|
||||
@ -494,6 +476,7 @@ class PBSIE(InfoExtractor):
|
||||
info = video_info
|
||||
|
||||
formats = []
|
||||
http_url = None
|
||||
for num, redirect in enumerate(redirects):
|
||||
redirect_id = redirect.get('eeid')
|
||||
|
||||
@ -514,13 +497,32 @@ class PBSIE(InfoExtractor):
|
||||
|
||||
if determine_ext(format_url) == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
format_url, display_id, 'mp4', preference=1, m3u8_id='hls'))
|
||||
format_url, display_id, 'mp4', m3u8_id='hls', fatal=False))
|
||||
else:
|
||||
formats.append({
|
||||
'url': format_url,
|
||||
'format_id': redirect_id,
|
||||
})
|
||||
if re.search(r'^https?://.*(?:\d+k|baseline)', format_url):
|
||||
http_url = format_url
|
||||
self._remove_duplicate_formats(formats)
|
||||
m3u8_formats = list(filter(
|
||||
lambda f: f.get('protocol') == 'm3u8' and f.get('vcodec') != 'none' and f.get('resolution') != 'multiple',
|
||||
formats))
|
||||
if http_url:
|
||||
for m3u8_format in m3u8_formats:
|
||||
bitrate = self._search_regex(r'(\d+k)', m3u8_format['url'], 'bitrate', default=None)
|
||||
# extract only the formats that we know that they will be available as http format.
|
||||
# https://projects.pbs.org/confluence/display/coveapi/COVE+Video+Specifications
|
||||
if not bitrate or bitrate not in ('400k', '800k', '1200k', '2500k'):
|
||||
continue
|
||||
f = m3u8_format.copy()
|
||||
f.update({
|
||||
'url': re.sub(r'\d+k|baseline', bitrate, http_url),
|
||||
'format_id': m3u8_format['format_id'].replace('hls', 'http'),
|
||||
'protocol': 'http',
|
||||
})
|
||||
formats.append(f)
|
||||
self._sort_formats(formats)
|
||||
|
||||
rating_str = info.get('rating')
|
||||
@ -535,6 +537,19 @@ class PBSIE(InfoExtractor):
|
||||
'ext': 'ttml',
|
||||
'url': closed_captions_url,
|
||||
}]
|
||||
mobj = re.search(r'/(\d+)_Encoded\.dfxp', closed_captions_url)
|
||||
if mobj:
|
||||
ttml_caption_suffix, ttml_caption_id = mobj.group(0, 1)
|
||||
ttml_caption_id = int(ttml_caption_id)
|
||||
subtitles['en'].extend([{
|
||||
'url': closed_captions_url.replace(
|
||||
ttml_caption_suffix, '/%d_Encoded.srt' % (ttml_caption_id + 1)),
|
||||
'ext': 'srt',
|
||||
}, {
|
||||
'url': closed_captions_url.replace(
|
||||
ttml_caption_suffix, '/%d_Encoded.vtt' % (ttml_caption_id + 2)),
|
||||
'ext': 'vtt',
|
||||
}])
|
||||
|
||||
# info['title'] is often incomplete (e.g. 'Full Episode', 'Episode 5', etc)
|
||||
# Try turning it to 'program - title' naming scheme if possible
|
||||
|
@ -20,18 +20,19 @@ class RtlNlIE(InfoExtractor):
|
||||
(?P<id>[0-9a-f-]+)'''
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.rtlxl.nl/#!/rtl-nieuws-132237/6e4203a6-0a5e-3596-8424-c599a59e0677',
|
||||
'md5': 'cc16baa36a6c169391f0764fa6b16654',
|
||||
'url': 'http://www.rtlxl.nl/#!/rtl-nieuws-132237/82b1aad1-4a14-3d7b-b554-b0aed1b2c416',
|
||||
'md5': '473d1946c1fdd050b2c0161a4b13c373',
|
||||
'info_dict': {
|
||||
'id': '6e4203a6-0a5e-3596-8424-c599a59e0677',
|
||||
'id': '82b1aad1-4a14-3d7b-b554-b0aed1b2c416',
|
||||
'ext': 'mp4',
|
||||
'title': 'RTL Nieuws - Laat',
|
||||
'description': 'md5:6b61f66510c8889923b11f2778c72dc5',
|
||||
'timestamp': 1408051800,
|
||||
'upload_date': '20140814',
|
||||
'duration': 576.880,
|
||||
'title': 'RTL Nieuws',
|
||||
'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
|
||||
'timestamp': 1461951000,
|
||||
'upload_date': '20160429',
|
||||
'duration': 1167.96,
|
||||
},
|
||||
}, {
|
||||
# best format avaialble a3t
|
||||
'url': 'http://www.rtl.nl/system/videoplayer/derden/rtlnieuws/video_embed.html#uuid=84ae5571-ac25-4225-ae0c-ef8d9efb2aed/autoplay=false',
|
||||
'md5': 'dea7474214af1271d91ef332fb8be7ea',
|
||||
'info_dict': {
|
||||
@ -39,18 +40,19 @@ class RtlNlIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'timestamp': 1424039400,
|
||||
'title': 'RTL Nieuws - Nieuwe beelden Kopenhagen: chaos direct na aanslag',
|
||||
'thumbnail': 're:^https?://screenshots\.rtl\.nl/system/thumb/sz=[0-9]+x[0-9]+/uuid=84ae5571-ac25-4225-ae0c-ef8d9efb2aed$',
|
||||
'thumbnail': 're:^https?://screenshots\.rtl\.nl/(?:[^/]+/)*sz=[0-9]+x[0-9]+/uuid=84ae5571-ac25-4225-ae0c-ef8d9efb2aed$',
|
||||
'upload_date': '20150215',
|
||||
'description': 'Er zijn nieuwe beelden vrijgegeven die vlak na de aanslag in Kopenhagen zijn gemaakt. Op de video is goed te zien hoe omstanders zich bekommeren om één van de slachtoffers, terwijl de eerste agenten ter plaatse komen.',
|
||||
}
|
||||
}, {
|
||||
# empty synopsis and missing episodes (see https://github.com/rg3/youtube-dl/issues/6275)
|
||||
# best format available nettv
|
||||
'url': 'http://www.rtl.nl/system/videoplayer/derden/rtlnieuws/video_embed.html#uuid=f536aac0-1dc3-4314-920e-3bd1c5b3811a/autoplay=false',
|
||||
'info_dict': {
|
||||
'id': 'f536aac0-1dc3-4314-920e-3bd1c5b3811a',
|
||||
'ext': 'mp4',
|
||||
'title': 'RTL Nieuws - Meer beelden van overval juwelier',
|
||||
'thumbnail': 're:^https?://screenshots\.rtl\.nl/system/thumb/sz=[0-9]+x[0-9]+/uuid=f536aac0-1dc3-4314-920e-3bd1c5b3811a$',
|
||||
'thumbnail': 're:^https?://screenshots\.rtl\.nl/(?:[^/]+/)*sz=[0-9]+x[0-9]+/uuid=f536aac0-1dc3-4314-920e-3bd1c5b3811a$',
|
||||
'timestamp': 1437233400,
|
||||
'upload_date': '20150718',
|
||||
'duration': 30.474,
|
||||
@ -94,22 +96,46 @@ class RtlNlIE(InfoExtractor):
|
||||
videopath = material['videopath']
|
||||
m3u8_url = meta.get('videohost', 'http://manifest.us.rtl.nl') + videopath
|
||||
|
||||
formats = self._extract_m3u8_formats(m3u8_url, uuid, ext='mp4')
|
||||
formats = self._extract_m3u8_formats(
|
||||
m3u8_url, uuid, 'mp4', m3u8_id='hls', fatal=False)
|
||||
|
||||
video_urlpart = videopath.split('/adaptive/')[1][:-5]
|
||||
PG_URL_TEMPLATE = 'http://pg.us.rtl.nl/rtlxl/network/%s/progressive/%s.mp4'
|
||||
|
||||
formats.extend([
|
||||
{
|
||||
'url': PG_URL_TEMPLATE % ('a2m', video_urlpart),
|
||||
'format_id': 'pg-sd',
|
||||
},
|
||||
{
|
||||
'url': PG_URL_TEMPLATE % ('a3m', video_urlpart),
|
||||
'format_id': 'pg-hd',
|
||||
'quality': 0,
|
||||
PG_FORMATS = (
|
||||
('a2t', 512, 288),
|
||||
('a3t', 704, 400),
|
||||
('nettv', 1280, 720),
|
||||
)
|
||||
|
||||
def pg_format(format_id, width, height):
|
||||
return {
|
||||
'url': PG_URL_TEMPLATE % (format_id, video_urlpart),
|
||||
'format_id': 'pg-%s' % format_id,
|
||||
'protocol': 'http',
|
||||
'width': width,
|
||||
'height': height,
|
||||
}
|
||||
])
|
||||
|
||||
if not formats:
|
||||
formats = [pg_format(*pg_tuple) for pg_tuple in PG_FORMATS]
|
||||
else:
|
||||
pg_formats = []
|
||||
for format_id, width, height in PG_FORMATS:
|
||||
try:
|
||||
# Find hls format with the same width and height corresponding
|
||||
# to progressive format and copy metadata from it.
|
||||
f = next(f for f in formats if f.get('height') == height)
|
||||
# hls formats may have invalid width
|
||||
f['width'] = width
|
||||
f_copy = f.copy()
|
||||
f_copy.update(pg_format(format_id, width, height))
|
||||
pg_formats.append(f_copy)
|
||||
except StopIteration:
|
||||
# Missing hls format does mean that no progressive format with
|
||||
# such width and height exists either.
|
||||
pass
|
||||
formats.extend(pg_formats)
|
||||
self._sort_formats(formats)
|
||||
|
||||
thumbnails = []
|
||||
|
@ -4,42 +4,178 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import parse_filesize
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
js_to_json,
|
||||
parse_iso8601,
|
||||
parse_filesize,
|
||||
)
|
||||
|
||||
|
||||
class TagesschauPlayerIE(InfoExtractor):
|
||||
IE_NAME = 'tagesschau:player'
|
||||
_VALID_URL = r'https?://(?:www\.)?tagesschau\.de/multimedia/(?P<kind>audio|video)/(?P=kind)-(?P<id>\d+)~player(?:_[^/?#&]+)?\.html'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.tagesschau.de/multimedia/video/video-179517~player.html',
|
||||
'md5': '8d09548d5c15debad38bee3a4d15ca21',
|
||||
'info_dict': {
|
||||
'id': '179517',
|
||||
'ext': 'mp4',
|
||||
'title': 'Marie Kristin Boese, ARD Berlin, über den zukünftigen Kurs der AfD',
|
||||
'thumbnail': 're:^https?:.*\.jpg$',
|
||||
'formats': 'mincount:6',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.tagesschau.de/multimedia/audio/audio-29417~player.html',
|
||||
'md5': '76e6eec6ebd40740671cf0a2c88617e5',
|
||||
'info_dict': {
|
||||
'id': '29417',
|
||||
'ext': 'mp3',
|
||||
'title': 'Trabi - Bye, bye Rennpappe',
|
||||
'thumbnail': 're:^https?:.*\.jpg$',
|
||||
'formats': 'mincount:2',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.tagesschau.de/multimedia/audio/audio-29417~player_autoplay-true.html',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_FORMATS = {
|
||||
'xs': {'quality': 0},
|
||||
's': {'width': 320, 'height': 180, 'quality': 1},
|
||||
'm': {'width': 512, 'height': 288, 'quality': 2},
|
||||
'l': {'width': 960, 'height': 540, 'quality': 3},
|
||||
'xl': {'width': 1280, 'height': 720, 'quality': 4},
|
||||
'xxl': {'quality': 5},
|
||||
}
|
||||
|
||||
def _extract_via_api(self, kind, video_id):
|
||||
info = self._download_json(
|
||||
'https://www.tagesschau.de/api/multimedia/{0}/{0}-{1}.json'.format(kind, video_id),
|
||||
video_id)
|
||||
title = info['headline']
|
||||
formats = []
|
||||
for media in info['mediadata']:
|
||||
for format_id, format_url in media.items():
|
||||
if determine_ext(format_url) == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
format_url, video_id, 'mp4',
|
||||
entry_protocol='m3u8_native', m3u8_id='hls'))
|
||||
else:
|
||||
formats.append({
|
||||
'url': format_url,
|
||||
'format_id': format_id,
|
||||
'vcodec': 'none' if kind == 'audio' else None,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
timestamp = parse_iso8601(info.get('date'))
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'timestamp': timestamp,
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
# kind = mobj.group('kind').lower()
|
||||
# if kind == 'video':
|
||||
# return self._extract_via_api(kind, video_id)
|
||||
|
||||
# JSON api does not provide some audio formats (e.g. ogg) thus
|
||||
# extractiong audio via webpage
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
title = self._og_search_title(webpage).strip()
|
||||
formats = []
|
||||
|
||||
for media_json in re.findall(r'({src\s*:\s*["\']http[^}]+type\s*:[^}]+})', webpage):
|
||||
media = self._parse_json(js_to_json(media_json), video_id, fatal=False)
|
||||
if not media:
|
||||
continue
|
||||
src = media.get('src')
|
||||
if not src:
|
||||
return
|
||||
quality = media.get('quality')
|
||||
kind = media.get('type', '').split('/')[0]
|
||||
ext = determine_ext(src)
|
||||
f = {
|
||||
'url': src,
|
||||
'format_id': '%s_%s' % (quality, ext) if quality else ext,
|
||||
'ext': ext,
|
||||
'vcodec': 'none' if kind == 'audio' else None,
|
||||
}
|
||||
f.update(self._FORMATS.get(quality, {}))
|
||||
formats.append(f)
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
thumbnail = self._og_search_thumbnail(webpage)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'thumbnail': thumbnail,
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
|
||||
class TagesschauIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?tagesschau\.de/multimedia/(?:[^/]+/)*?[^/#?]+?(?P<id>-?[0-9]+)(?:~_[^/#?]+?)?\.html'
|
||||
_VALID_URL = r'https?://(?:www\.)?tagesschau\.de/(?P<path>[^/]+/(?:[^/]+/)*?(?P<id>[^/#?]+?(?:-?[0-9]+)?))(?:~_?[^/#?]+?)?\.html'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.tagesschau.de/multimedia/video/video-102143.html',
|
||||
'md5': '917a228bc7df7850783bc47979673a09',
|
||||
'md5': 'f7c27a0eff3bfe8c7727e65f8fe1b1e6',
|
||||
'info_dict': {
|
||||
'id': '102143',
|
||||
'id': 'video-102143',
|
||||
'ext': 'mp4',
|
||||
'title': 'Regierungsumbildung in Athen: Neue Minister in Griechenland vereidigt',
|
||||
'description': 'md5:171feccd9d9b3dd54d05d501568f6359',
|
||||
'description': '18.07.2015 20:10 Uhr',
|
||||
'thumbnail': 're:^https?:.*\.jpg$',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.tagesschau.de/multimedia/sendung/ts-5727.html',
|
||||
'md5': '3c54c1f6243d279b706bde660ceec633',
|
||||
'info_dict': {
|
||||
'id': '5727',
|
||||
'id': 'ts-5727',
|
||||
'ext': 'mp4',
|
||||
'description': 'md5:695c01bfd98b7e313c501386327aea59',
|
||||
'title': 'Sendung: tagesschau \t04.12.2014 20:00 Uhr',
|
||||
'description': 'md5:695c01bfd98b7e313c501386327aea59',
|
||||
'thumbnail': 're:^https?:.*\.jpg$',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.tagesschau.de/multimedia/politikimradio/audio-18407.html',
|
||||
'md5': 'aef45de271c4bf0a5db834aa40bf774c',
|
||||
# exclusive audio
|
||||
'url': 'http://www.tagesschau.de/multimedia/audio/audio-29417.html',
|
||||
'md5': '76e6eec6ebd40740671cf0a2c88617e5',
|
||||
'info_dict': {
|
||||
'id': '18407',
|
||||
'id': 'audio-29417',
|
||||
'ext': 'mp3',
|
||||
'title': 'Flüchtlingsdebatte: Hitzig, aber wenig hilfreich',
|
||||
'description': 'Flüchtlingsdebatte: Hitzig, aber wenig hilfreich',
|
||||
'title': 'Trabi - Bye, bye Rennpappe',
|
||||
'description': 'md5:8687dda862cbbe2cfb2df09b56341317',
|
||||
'thumbnail': 're:^https?:.*\.jpg$',
|
||||
},
|
||||
}, {
|
||||
# audio in article
|
||||
'url': 'http://www.tagesschau.de/inland/bnd-303.html',
|
||||
'md5': 'e0916c623e85fc1d2b26b78f299d3958',
|
||||
'info_dict': {
|
||||
'id': 'bnd-303',
|
||||
'ext': 'mp3',
|
||||
'title': 'Viele Baustellen für neuen BND-Chef',
|
||||
'description': 'md5:1e69a54be3e1255b2b07cdbce5bcd8b4',
|
||||
'thumbnail': 're:^https?:.*\.jpg$',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.tagesschau.de/inland/afd-parteitag-135.html',
|
||||
'info_dict': {
|
||||
'id': 'afd-parteitag-135',
|
||||
'title': 'Möchtegern-Underdog mit Machtanspruch',
|
||||
},
|
||||
'playlist_count': 2,
|
||||
}, {
|
||||
'url': 'http://www.tagesschau.de/multimedia/sendung/tsg-3771.html',
|
||||
'only_matching': True,
|
||||
@ -61,63 +197,39 @@ class TagesschauIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'http://www.tagesschau.de/multimedia/video/video-102303~_bab-sendung-211.html',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.tagesschau.de/100sekunden/index.html',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# playlist article with collapsing sections
|
||||
'url': 'http://www.tagesschau.de/wirtschaft/faq-freihandelszone-eu-usa-101.html',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_FORMATS = {
|
||||
's': {'width': 256, 'height': 144, 'quality': 1},
|
||||
'm': {'width': 512, 'height': 288, 'quality': 2},
|
||||
'l': {'width': 960, 'height': 544, 'quality': 3},
|
||||
}
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
return False if TagesschauPlayerIE.suitable(url) else super(TagesschauIE, cls).suitable(url)
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
display_id = video_id.lstrip('-')
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
player_url = self._html_search_meta(
|
||||
'twitter:player', webpage, 'player URL', default=None)
|
||||
if player_url:
|
||||
playerpage = self._download_webpage(
|
||||
player_url, display_id, 'Downloading player page')
|
||||
|
||||
formats = []
|
||||
for media in re.finditer(
|
||||
r'''(?x)
|
||||
(?P<q_url>["\'])(?P<url>http://media.+?)(?P=q_url)
|
||||
,\s*type:(?P<q_type>["\'])(?P<type>video|audio)/(?P<ext>.+?)(?P=q_type)
|
||||
(?:,\s*quality:(?P<q_quality>["\'])(?P<quality>.+?)(?P=q_quality))?
|
||||
''', playerpage):
|
||||
url = media.group('url')
|
||||
type_ = media.group('type')
|
||||
ext = media.group('ext')
|
||||
res = media.group('quality')
|
||||
f = {
|
||||
'format_id': '%s_%s' % (res, ext) if res else ext,
|
||||
'url': url,
|
||||
'ext': ext,
|
||||
'vcodec': 'none' if type_ == 'audio' else None,
|
||||
}
|
||||
f.update(self._FORMATS.get(res, {}))
|
||||
formats.append(f)
|
||||
thumbnail = self._og_search_thumbnail(playerpage)
|
||||
title = self._og_search_title(webpage).strip()
|
||||
description = self._og_search_description(webpage).strip()
|
||||
else:
|
||||
download_text = self._search_regex(
|
||||
r'(?s)<p>Wir bieten dieses Video in folgenden Formaten zum Download an:</p>\s*<div class="controls">(.*?)</div>\s*<p>',
|
||||
webpage, 'download links')
|
||||
def _extract_formats(self, download_text, media_kind):
|
||||
links = re.finditer(
|
||||
r'<div class="button" title="(?P<title>[^"]*)"><a href="(?P<url>[^"]+)">(?P<name>.+?)</a></div>',
|
||||
download_text)
|
||||
formats = []
|
||||
for l in links:
|
||||
link_url = l.group('url')
|
||||
if not link_url:
|
||||
continue
|
||||
format_id = self._search_regex(
|
||||
r'.*/[^/.]+\.([^/]+)\.[^/.]+', l.group('url'), 'format ID')
|
||||
r'.*/[^/.]+\.([^/]+)\.[^/.]+$', link_url, 'format ID',
|
||||
default=determine_ext(link_url))
|
||||
format = {
|
||||
'format_id': format_id,
|
||||
'url': l.group('url'),
|
||||
'format_name': l.group('name'),
|
||||
}
|
||||
title = l.group('title')
|
||||
if title:
|
||||
if media_kind.lower() == 'video':
|
||||
m = re.match(
|
||||
r'''(?x)
|
||||
Video:\s*(?P<vcodec>[a-zA-Z0-9/._-]+)\s*&\#10;
|
||||
@ -125,7 +237,7 @@ class TagesschauIE(InfoExtractor):
|
||||
(?P<vbr>[0-9]+)kbps&\#10;
|
||||
Audio:\s*(?P<abr>[0-9]+)kbps,\s*(?P<audio_desc>[A-Za-z\.0-9]+)&\#10;
|
||||
Größe:\s*(?P<filesize_approx>[0-9.,]+\s+[a-zA-Z]*B)''',
|
||||
l.group('title'))
|
||||
title)
|
||||
if m:
|
||||
format.update({
|
||||
'format_note': m.group('audio_desc'),
|
||||
@ -136,13 +248,57 @@ class TagesschauIE(InfoExtractor):
|
||||
'vbr': int(m.group('vbr')),
|
||||
'filesize_approx': parse_filesize(m.group('filesize_approx')),
|
||||
})
|
||||
else:
|
||||
m = re.match(
|
||||
r'(?P<format>.+?)-Format\s*:\s*(?P<abr>\d+)kbps\s*,\s*(?P<note>.+)',
|
||||
title)
|
||||
if m:
|
||||
format.update({
|
||||
'format_note': '%s, %s' % (m.group('format'), m.group('note')),
|
||||
'vcodec': 'none',
|
||||
'abr': int(m.group('abr')),
|
||||
})
|
||||
formats.append(format)
|
||||
self._sort_formats(formats)
|
||||
return formats
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id') or mobj.group('path')
|
||||
display_id = video_id.lstrip('-')
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
title = self._html_search_regex(
|
||||
r'<span[^>]*class="headline"[^>]*>(.+?)</span>',
|
||||
webpage, 'title', default=None) or self._og_search_title(webpage)
|
||||
|
||||
DOWNLOAD_REGEX = r'(?s)<p>Wir bieten dieses (?P<kind>Video|Audio) in folgenden Formaten zum Download an:</p>\s*<div class="controls">(?P<links>.*?)</div>\s*<p>'
|
||||
|
||||
webpage_type = self._og_search_property('type', webpage, default=None)
|
||||
if webpage_type == 'website': # Article
|
||||
entries = []
|
||||
for num, (entry_title, media_kind, download_text) in enumerate(re.findall(
|
||||
r'(?s)<p[^>]+class="infotext"[^>]*>\s*(?:<a[^>]+>)?\s*<strong>(.+?)</strong>.*?</p>.*?%s' % DOWNLOAD_REGEX,
|
||||
webpage), 1):
|
||||
entries.append({
|
||||
'id': '%s-%d' % (display_id, num),
|
||||
'title': '%s' % entry_title,
|
||||
'formats': self._extract_formats(download_text, media_kind),
|
||||
})
|
||||
if len(entries) > 1:
|
||||
return self.playlist_result(entries, display_id, title)
|
||||
formats = entries[0]['formats']
|
||||
else: # Assume single video
|
||||
download_text = self._search_regex(
|
||||
DOWNLOAD_REGEX, webpage, 'download links', group='links')
|
||||
media_kind = self._search_regex(
|
||||
DOWNLOAD_REGEX, webpage, 'media kind', default='Video', group='kind')
|
||||
formats = self._extract_formats(download_text, media_kind)
|
||||
thumbnail = self._og_search_thumbnail(webpage)
|
||||
description = self._html_search_regex(
|
||||
r'(?s)<p class="teasertext">(.*?)</p>',
|
||||
webpage, 'description', default=None)
|
||||
title = self._html_search_regex(
|
||||
r'<span class="headline".*?>(.*?)</span>', webpage, 'title')
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
|
@ -27,7 +27,7 @@ class TEDIE(InfoExtractor):
|
||||
'''
|
||||
_TESTS = [{
|
||||
'url': 'http://www.ted.com/talks/dan_dennett_on_our_consciousness.html',
|
||||
'md5': 'fc94ac279feebbce69f21c0c6ee82810',
|
||||
'md5': '0de43ac406aa3e4ea74b66c9c7789b13',
|
||||
'info_dict': {
|
||||
'id': '102',
|
||||
'ext': 'mp4',
|
||||
@ -37,21 +37,26 @@ class TEDIE(InfoExtractor):
|
||||
'consciousness, but that half the time our brains are '
|
||||
'actively fooling us.'),
|
||||
'uploader': 'Dan Dennett',
|
||||
'width': 854,
|
||||
'width': 853,
|
||||
'duration': 1308,
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.ted.com/watch/ted-institute/ted-bcg/vishal-sikka-the-beauty-and-power-of-algorithms',
|
||||
'md5': '226f4fb9c62380d11b7995efa4c87994',
|
||||
'md5': 'b899ac15e345fb39534d913f7606082b',
|
||||
'info_dict': {
|
||||
'id': 'vishal-sikka-the-beauty-and-power-of-algorithms',
|
||||
'id': 'tSVI8ta_P4w',
|
||||
'ext': 'mp4',
|
||||
'title': 'Vishal Sikka: The beauty and power of algorithms',
|
||||
'thumbnail': 're:^https?://.+\.jpg',
|
||||
'description': 'Adaptive, intelligent, and consistent, algorithms are emerging as the ultimate app for everything from matching consumers to products to assessing medical diagnoses. Vishal Sikka shares his appreciation for the algorithm, charting both its inherent beauty and its growing power.',
|
||||
}
|
||||
'description': 'md5:6261fdfe3e02f4f579cbbfc00aff73f4',
|
||||
'upload_date': '20140122',
|
||||
'uploader_id': 'TEDInstitute',
|
||||
'uploader': 'TED Institute',
|
||||
},
|
||||
'add_ie': ['Youtube'],
|
||||
}, {
|
||||
'url': 'http://www.ted.com/talks/gabby_giffords_and_mark_kelly_be_passionate_be_courageous_be_your_best',
|
||||
'md5': '71b3ab2f4233012dce09d515c9c39ce2',
|
||||
'info_dict': {
|
||||
'id': '1972',
|
||||
'ext': 'mp4',
|
||||
@ -102,9 +107,9 @@ class TEDIE(InfoExtractor):
|
||||
}]
|
||||
|
||||
_NATIVE_FORMATS = {
|
||||
'low': {'preference': 1, 'width': 320, 'height': 180},
|
||||
'medium': {'preference': 2, 'width': 512, 'height': 288},
|
||||
'high': {'preference': 3, 'width': 854, 'height': 480},
|
||||
'low': {'width': 320, 'height': 180},
|
||||
'medium': {'width': 512, 'height': 288},
|
||||
'high': {'width': 854, 'height': 480},
|
||||
}
|
||||
|
||||
def _extract_info(self, webpage):
|
||||
@ -171,15 +176,21 @@ class TEDIE(InfoExtractor):
|
||||
if finfo:
|
||||
f.update(finfo)
|
||||
|
||||
http_url = None
|
||||
for format_id, resources in talk_info['resources'].items():
|
||||
if format_id == 'h264':
|
||||
for resource in resources:
|
||||
h264_url = resource.get('file')
|
||||
if not h264_url:
|
||||
continue
|
||||
bitrate = int_or_none(resource.get('bitrate'))
|
||||
formats.append({
|
||||
'url': resource['file'],
|
||||
'url': h264_url,
|
||||
'format_id': '%s-%sk' % (format_id, bitrate),
|
||||
'tbr': bitrate,
|
||||
})
|
||||
if re.search('\d+k', h264_url):
|
||||
http_url = h264_url
|
||||
elif format_id == 'rtmp':
|
||||
streamer = talk_info.get('streamer')
|
||||
if not streamer:
|
||||
@ -195,16 +206,24 @@ class TEDIE(InfoExtractor):
|
||||
'tbr': int_or_none(resource.get('bitrate')),
|
||||
})
|
||||
elif format_id == 'hls':
|
||||
hls_formats = self._extract_m3u8_formats(
|
||||
resources.get('stream'), video_name, 'mp4', m3u8_id=format_id)
|
||||
for f in hls_formats:
|
||||
if f.get('format_id') == 'hls-meta':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
resources.get('stream'), video_name, 'mp4', m3u8_id=format_id, fatal=False))
|
||||
|
||||
m3u8_formats = list(filter(
|
||||
lambda f: f.get('protocol') == 'm3u8' and f.get('vcodec') != 'none' and f.get('resolution') != 'multiple',
|
||||
formats))
|
||||
if http_url:
|
||||
for m3u8_format in m3u8_formats:
|
||||
bitrate = self._search_regex(r'(\d+k)', m3u8_format['url'], 'bitrate', default=None)
|
||||
if not bitrate:
|
||||
continue
|
||||
if not f.get('height'):
|
||||
f['vcodec'] = 'none'
|
||||
else:
|
||||
f['acodec'] = 'none'
|
||||
formats.extend(hls_formats)
|
||||
f = m3u8_format.copy()
|
||||
f.update({
|
||||
'url': re.sub(r'\d+k', bitrate, http_url),
|
||||
'format_id': m3u8_format['format_id'].replace('hls', 'http'),
|
||||
'protocol': 'http',
|
||||
})
|
||||
formats.append(f)
|
||||
|
||||
audio_download = talk_info.get('audioDownload')
|
||||
if audio_download:
|
||||
@ -212,7 +231,6 @@ class TEDIE(InfoExtractor):
|
||||
'url': audio_download,
|
||||
'format_id': 'audio',
|
||||
'vcodec': 'none',
|
||||
'preference': -0.5,
|
||||
})
|
||||
|
||||
self._sort_formats(formats)
|
||||
@ -254,7 +272,11 @@ class TEDIE(InfoExtractor):
|
||||
|
||||
config_json = self._html_search_regex(
|
||||
r'"pages\.jwplayer"\s*,\s*({.+?})\s*\)\s*</script>',
|
||||
webpage, 'config')
|
||||
webpage, 'config', default=None)
|
||||
if not config_json:
|
||||
embed_url = self._search_regex(
|
||||
r"<iframe[^>]+class='pages-video-embed__video__object'[^>]+src='([^']+)'", webpage, 'embed url')
|
||||
return self.url_result(self._proto_relative_url(embed_url))
|
||||
config = json.loads(config_json)['config']
|
||||
video_url = config['video']['url']
|
||||
thumbnail = config.get('image', {}).get('url')
|
||||
|
@ -3,7 +3,10 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_etree_fromstring
|
||||
from ..compat import (
|
||||
compat_etree_fromstring,
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
@ -12,13 +15,22 @@ from ..utils import (
|
||||
)
|
||||
|
||||
|
||||
class VevoIE(InfoExtractor):
|
||||
class VevoBaseIE(InfoExtractor):
|
||||
def _extract_json(self, webpage, video_id, item):
|
||||
return self._parse_json(
|
||||
self._search_regex(
|
||||
r'window\.__INITIAL_STORE__\s*=\s*({.+?});\s*</script>',
|
||||
webpage, 'initial store'),
|
||||
video_id)['default'][item]
|
||||
|
||||
|
||||
class VevoIE(VevoBaseIE):
|
||||
'''
|
||||
Accepts urls from vevo.com or in the format 'vevo:{id}'
|
||||
(currently used by MTVIE and MySpaceIE)
|
||||
'''
|
||||
_VALID_URL = r'''(?x)
|
||||
(?:https?://www\.vevo\.com/watch/(?:[^/]+/(?:[^/]+/)?)?|
|
||||
(?:https?://www\.vevo\.com/watch/(?!playlist|genre)(?:[^/]+/(?:[^/]+/)?)?|
|
||||
https?://cache\.vevo\.com/m/html/embed\.html\?video=|
|
||||
https?://videoplayer\.vevo\.com/embed/embedded\?videoId=|
|
||||
vevo:)
|
||||
@ -30,11 +42,15 @@ class VevoIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': 'GB1101300280',
|
||||
'ext': 'mp4',
|
||||
'title': 'Somebody to Die For',
|
||||
'title': 'Hurts - Somebody to Die For',
|
||||
'timestamp': 1372057200,
|
||||
'upload_date': '20130624',
|
||||
'uploader': 'Hurts',
|
||||
'timestamp': 1372057200,
|
||||
'track': 'Somebody to Die For',
|
||||
'artist': 'Hurts',
|
||||
'genre': 'Pop',
|
||||
},
|
||||
'expected_warnings': ['Unable to download SMIL file'],
|
||||
}, {
|
||||
'note': 'v3 SMIL format',
|
||||
'url': 'http://www.vevo.com/watch/cassadee-pope/i-wish-i-could-break-your-heart/USUV71302923',
|
||||
@ -42,23 +58,31 @@ class VevoIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': 'USUV71302923',
|
||||
'ext': 'mp4',
|
||||
'title': 'I Wish I Could Break Your Heart',
|
||||
'title': 'Cassadee Pope - I Wish I Could Break Your Heart',
|
||||
'timestamp': 1392796919,
|
||||
'upload_date': '20140219',
|
||||
'uploader': 'Cassadee Pope',
|
||||
'timestamp': 1392796919,
|
||||
'track': 'I Wish I Could Break Your Heart',
|
||||
'artist': 'Cassadee Pope',
|
||||
'genre': 'Country',
|
||||
},
|
||||
'expected_warnings': ['Unable to download SMIL file'],
|
||||
}, {
|
||||
'note': 'Age-limited video',
|
||||
'url': 'https://www.vevo.com/watch/justin-timberlake/tunnel-vision-explicit/USRV81300282',
|
||||
'info_dict': {
|
||||
'id': 'USRV81300282',
|
||||
'ext': 'mp4',
|
||||
'title': 'Tunnel Vision (Explicit)',
|
||||
'upload_date': '20130703',
|
||||
'title': 'Justin Timberlake - Tunnel Vision (Explicit)',
|
||||
'age_limit': 18,
|
||||
'uploader': 'Justin Timberlake',
|
||||
'timestamp': 1372888800,
|
||||
'upload_date': '20130703',
|
||||
'uploader': 'Justin Timberlake',
|
||||
'track': 'Tunnel Vision (Explicit)',
|
||||
'artist': 'Justin Timberlake',
|
||||
'genre': 'Pop',
|
||||
},
|
||||
'expected_warnings': ['Unable to download SMIL file'],
|
||||
}, {
|
||||
'note': 'No video_info',
|
||||
'url': 'http://www.vevo.com/watch/k-camp-1/Till-I-Die/USUV71503000',
|
||||
@ -66,12 +90,32 @@ class VevoIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': 'USUV71503000',
|
||||
'ext': 'mp4',
|
||||
'title': 'Till I Die',
|
||||
'upload_date': '20151207',
|
||||
'title': 'K Camp - Till I Die',
|
||||
'age_limit': 18,
|
||||
'uploader': 'K Camp',
|
||||
'timestamp': 1449468000,
|
||||
'upload_date': '20151207',
|
||||
'uploader': 'K Camp',
|
||||
'track': 'Till I Die',
|
||||
'artist': 'K Camp',
|
||||
'genre': 'Rap/Hip-Hop',
|
||||
},
|
||||
}, {
|
||||
'note': 'Only available via webpage',
|
||||
'url': 'http://www.vevo.com/watch/GBUV71600656',
|
||||
'md5': '67e79210613865b66a47c33baa5e37fe',
|
||||
'info_dict': {
|
||||
'id': 'GBUV71600656',
|
||||
'ext': 'mp4',
|
||||
'title': 'ABC - Viva Love',
|
||||
'age_limit': 0,
|
||||
'timestamp': 1461830400,
|
||||
'upload_date': '20160428',
|
||||
'uploader': 'ABC',
|
||||
'track': 'Viva Love',
|
||||
'artist': 'ABC',
|
||||
'genre': 'Pop',
|
||||
},
|
||||
'expected_warnings': ['Failed to download video versions info'],
|
||||
}]
|
||||
_SMIL_BASE_URL = 'http://smil.lvl3.vevo.com'
|
||||
_SOURCE_TYPES = {
|
||||
@ -146,8 +190,8 @@ class VevoIE(InfoExtractor):
|
||||
auth_info = self._parse_json(webpage, video_id)
|
||||
self._api_url_template = self.http_scheme() + '//apiv2.vevo.com/%s?token=' + auth_info['access_token']
|
||||
|
||||
def _call_api(self, path, video_id, note, errnote, fatal=True):
|
||||
return self._download_json(self._api_url_template % path, video_id, note, errnote)
|
||||
def _call_api(self, path, *args, **kwargs):
|
||||
return self._download_json(self._api_url_template % path, *args, **kwargs)
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
@ -157,9 +201,11 @@ class VevoIE(InfoExtractor):
|
||||
json_url, video_id, 'Downloading video info', 'Unable to download info')
|
||||
video_info = response.get('video') or {}
|
||||
video_versions = video_info.get('videoVersions')
|
||||
artist = None
|
||||
featured_artist = None
|
||||
uploader = None
|
||||
timestamp = None
|
||||
view_count = None
|
||||
timestamp = None
|
||||
formats = []
|
||||
|
||||
if not video_info:
|
||||
@ -183,12 +229,19 @@ class VevoIE(InfoExtractor):
|
||||
video_versions = self._call_api(
|
||||
'video/%s/streams' % video_id, video_id,
|
||||
'Downloading video versions info',
|
||||
'Failed to download video versions info')
|
||||
'Failed to download video versions info',
|
||||
fatal=False)
|
||||
|
||||
# Some videos are only available via webpage (e.g.
|
||||
# https://github.com/rg3/youtube-dl/issues/9366)
|
||||
if not video_versions:
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
video_versions = self._extract_json(webpage, video_id, 'streams')[video_id][0]
|
||||
|
||||
timestamp = parse_iso8601(video_info.get('releaseDate'))
|
||||
artists = video_info.get('artists')
|
||||
if artists:
|
||||
uploader = artists[0]['name']
|
||||
artist = uploader = artists[0]['name']
|
||||
view_count = int_or_none(video_info.get('views', {}).get('total'))
|
||||
|
||||
for video_version in video_versions:
|
||||
@ -241,7 +294,11 @@ class VevoIE(InfoExtractor):
|
||||
scale=1000)
|
||||
artists = video_info.get('mainArtists')
|
||||
if artists:
|
||||
uploader = artists[0]['artistName']
|
||||
artist = uploader = artists[0]['artistName']
|
||||
|
||||
featured_artists = video_info.get('featuredArtists')
|
||||
if featured_artists:
|
||||
featured_artist = featured_artists[0]['artistName']
|
||||
|
||||
smil_parsed = False
|
||||
for video_version in video_info['videoVersions']:
|
||||
@ -278,7 +335,11 @@ class VevoIE(InfoExtractor):
|
||||
smil_parsed = True
|
||||
self._sort_formats(formats)
|
||||
|
||||
title = video_info['title']
|
||||
track = video_info['title']
|
||||
if featured_artist:
|
||||
artist = '%s ft. %s' % (artist, featured_artist)
|
||||
title = '%s - %s' % (artist, track) if artist else track
|
||||
genre = video_info.get('genres', [None])[0]
|
||||
|
||||
is_explicit = video_info.get('isExplicit')
|
||||
if is_explicit is True:
|
||||
@ -300,4 +361,75 @@ class VevoIE(InfoExtractor):
|
||||
'duration': duration,
|
||||
'view_count': view_count,
|
||||
'age_limit': age_limit,
|
||||
'track': track,
|
||||
'artist': uploader,
|
||||
'genre': genre,
|
||||
}
|
||||
|
||||
|
||||
class VevoPlaylistIE(VevoBaseIE):
|
||||
_VALID_URL = r'https?://www\.vevo\.com/watch/(?P<kind>playlist|genre)/(?P<id>[^/?#&]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.vevo.com/watch/playlist/dadbf4e7-b99f-4184-9670-6f0e547b6a29',
|
||||
'info_dict': {
|
||||
'id': 'dadbf4e7-b99f-4184-9670-6f0e547b6a29',
|
||||
'title': 'Best-Of: Birdman',
|
||||
},
|
||||
'playlist_count': 10,
|
||||
}, {
|
||||
'url': 'http://www.vevo.com/watch/genre/rock',
|
||||
'info_dict': {
|
||||
'id': 'rock',
|
||||
'title': 'Rock',
|
||||
},
|
||||
'playlist_count': 20,
|
||||
}, {
|
||||
'url': 'http://www.vevo.com/watch/playlist/dadbf4e7-b99f-4184-9670-6f0e547b6a29?index=0',
|
||||
'md5': '32dcdfddddf9ec6917fc88ca26d36282',
|
||||
'info_dict': {
|
||||
'id': 'USCMV1100073',
|
||||
'ext': 'mp4',
|
||||
'title': 'Birdman - Y.U. MAD',
|
||||
'timestamp': 1323417600,
|
||||
'upload_date': '20111209',
|
||||
'uploader': 'Birdman',
|
||||
'track': 'Y.U. MAD',
|
||||
'artist': 'Birdman',
|
||||
'genre': 'Rap/Hip-Hop',
|
||||
},
|
||||
'expected_warnings': ['Unable to download SMIL file'],
|
||||
}, {
|
||||
'url': 'http://www.vevo.com/watch/genre/rock?index=0',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
playlist_id = mobj.group('id')
|
||||
playlist_kind = mobj.group('kind')
|
||||
|
||||
webpage = self._download_webpage(url, playlist_id)
|
||||
|
||||
qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
|
||||
index = qs.get('index', [None])[0]
|
||||
|
||||
if index:
|
||||
video_id = self._search_regex(
|
||||
r'<meta[^>]+content=(["\'])vevo://video/(?P<id>.+?)\1[^>]*>',
|
||||
webpage, 'video id', default=None, group='id')
|
||||
if video_id:
|
||||
return self.url_result('vevo:%s' % video_id, VevoIE.ie_key())
|
||||
|
||||
playlists = self._extract_json(webpage, playlist_id, '%ss' % playlist_kind)
|
||||
|
||||
playlist = (list(playlists.values())[0]
|
||||
if playlist_kind == 'playlist' else playlists[playlist_id])
|
||||
|
||||
entries = [
|
||||
self.url_result('vevo:%s' % src, VevoIE.ie_key())
|
||||
for src in playlist['isrcs']]
|
||||
|
||||
return self.playlist_result(
|
||||
entries, playlist.get('playlistId') or playlist_id,
|
||||
playlist.get('name'), playlist.get('description'))
|
||||
|
@ -43,7 +43,7 @@ class VLiveIE(InfoExtractor):
|
||||
status_params = self._download_json(
|
||||
'http://www.vlive.tv/video/status?videoSeq=%s' % video_id,
|
||||
video_id, 'Downloading JSON status',
|
||||
headers={'Referer': url})
|
||||
headers={'Referer': url.encode('utf-8')})
|
||||
status = status_params.get('status')
|
||||
air_start = status_params.get('onAirStartAt', '')
|
||||
is_live = status_params.get('isLive')
|
||||
|
@ -4,16 +4,22 @@ from __future__ import unicode_literals
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
float_or_none,
|
||||
unified_strdate,
|
||||
)
|
||||
|
||||
|
||||
class WSJIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://video-api\.wsj\.com/api-video/player/iframe\.html\?guid=(?P<id>[a-zA-Z0-9-]+)'
|
||||
_VALID_URL = r'''(?x)https?://
|
||||
(?:
|
||||
video-api\.wsj\.com/api-video/player/iframe\.html\?guid=|
|
||||
(?:www\.)?wsj\.com/video/[^/]+/
|
||||
)
|
||||
(?P<id>[a-zA-Z0-9-]+)'''
|
||||
IE_DESC = 'Wall Street Journal'
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
'url': 'http://video-api.wsj.com/api-video/player/iframe.html?guid=1BD01A4C-BFE8-40A5-A42F-8A8AF9898B1A',
|
||||
'md5': '9747d7a6ebc2f4df64b981e1dde9efa9',
|
||||
'md5': 'e230a5bb249075e40793b655a54a02e4',
|
||||
'info_dict': {
|
||||
'id': '1BD01A4C-BFE8-40A5-A42F-8A8AF9898B1A',
|
||||
'ext': 'mp4',
|
||||
@ -24,65 +30,60 @@ class WSJIE(InfoExtractor):
|
||||
'duration': 90,
|
||||
'title': 'Bills Coach Rex Ryan Updates His Old Jets Tattoo',
|
||||
},
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.wsj.com/video/can-alphabet-build-a-smarter-city/359DDAA8-9AC1-489C-82E6-0429C1E430E0.html',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
bitrates = [128, 174, 264, 320, 464, 664, 1264]
|
||||
api_url = (
|
||||
'http://video-api.wsj.com/api-video/find_all_videos.asp?'
|
||||
'type=guid&count=1&query=%s&'
|
||||
'fields=hls,adZone,thumbnailList,guid,state,secondsUntilStartTime,'
|
||||
'author,description,name,linkURL,videoStillURL,duration,videoURL,'
|
||||
'adCategory,catastrophic,linkShortURL,doctypeID,youtubeID,'
|
||||
'titletag,rssURL,wsj-section,wsj-subsection,allthingsd-section,'
|
||||
'allthingsd-subsection,sm-section,sm-subsection,provider,'
|
||||
'formattedCreationDate,keywords,keywordsOmniture,column,editor,'
|
||||
'emailURL,emailPartnerID,showName,omnitureProgramName,'
|
||||
'omnitureVideoFormat,linkRelativeURL,touchCastID,'
|
||||
'omniturePublishDate,%s') % (
|
||||
video_id, ','.join('video%dkMP4Url' % br for br in bitrates))
|
||||
'type=guid&count=1&query=%s&fields=type,hls,videoMP4List,'
|
||||
'thumbnailList,author,description,name,duration,videoURL,'
|
||||
'titletag,formattedCreationDate,keywords,editor' % video_id)
|
||||
info = self._download_json(api_url, video_id)['items'][0]
|
||||
|
||||
# Thumbnails are conveniently in the correct format already
|
||||
thumbnails = info.get('thumbnailList')
|
||||
creator = info.get('author')
|
||||
uploader_id = info.get('editor')
|
||||
categories = info.get('keywords')
|
||||
duration = int_or_none(info.get('duration'))
|
||||
upload_date = unified_strdate(
|
||||
info.get('formattedCreationDate'), day_first=False)
|
||||
title = info.get('name', info.get('titletag'))
|
||||
|
||||
formats = [{
|
||||
'format_id': 'f4m',
|
||||
'format_note': 'f4m (meta URL)',
|
||||
'url': info['videoURL'],
|
||||
}]
|
||||
if info.get('hls'):
|
||||
formats = []
|
||||
|
||||
f4m_url = info.get('videoURL')
|
||||
if f4m_url:
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
f4m_url, video_id, f4m_id='hds', fatal=False))
|
||||
|
||||
m3u8_url = info.get('hls')
|
||||
if m3u8_url:
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
info['hls'], video_id, ext='mp4',
|
||||
preference=0, entry_protocol='m3u8_native'))
|
||||
for br in bitrates:
|
||||
field = 'video%dkMP4Url' % br
|
||||
if info.get(field):
|
||||
entry_protocol='m3u8_native', m3u8_id='hls', fatal=False))
|
||||
|
||||
for v in info.get('videoMP4List', []):
|
||||
mp4_url = v.get('url')
|
||||
if not mp4_url:
|
||||
continue
|
||||
tbr = int_or_none(v.get('bitrate'))
|
||||
formats.append({
|
||||
'format_id': 'mp4-%d' % br,
|
||||
'container': 'mp4',
|
||||
'tbr': br,
|
||||
'url': info[field],
|
||||
'url': mp4_url,
|
||||
'format_id': 'http' + ('-%d' % tbr if tbr else ''),
|
||||
'tbr': tbr,
|
||||
'width': int_or_none(v.get('width')),
|
||||
'height': int_or_none(v.get('height')),
|
||||
'fps': float_or_none(v.get('fps')),
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'thumbnails': thumbnails,
|
||||
'creator': creator,
|
||||
'uploader_id': uploader_id,
|
||||
'duration': duration,
|
||||
'upload_date': upload_date,
|
||||
# Thumbnails are conveniently in the correct format already
|
||||
'thumbnails': info.get('thumbnailList'),
|
||||
'creator': info.get('author'),
|
||||
'uploader_id': info.get('editor'),
|
||||
'duration': int_or_none(info.get('duration')),
|
||||
'upload_date': unified_strdate(info.get(
|
||||
'formattedCreationDate'), day_first=False),
|
||||
'title': title,
|
||||
'categories': categories,
|
||||
'categories': info.get('keywords'),
|
||||
}
|
||||
|
158
youtube_dl/extractor/xiami.py
Normal file
158
youtube_dl/extractor/xiami.py
Normal file
@ -0,0 +1,158 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urllib_parse_unquote
|
||||
from ..utils import int_or_none
|
||||
|
||||
|
||||
class XiamiBaseIE(InfoExtractor):
|
||||
_API_BASE_URL = 'http://www.xiami.com/song/playlist/cat/json/id'
|
||||
|
||||
def _extract_track(self, track, track_id=None):
|
||||
title = track['title']
|
||||
track_url = self._decrypt(track['location'])
|
||||
|
||||
subtitles = {}
|
||||
lyrics_url = track.get('lyric_url') or track.get('lyric')
|
||||
if lyrics_url and lyrics_url.startswith('http'):
|
||||
subtitles['origin'] = [{'url': lyrics_url}]
|
||||
|
||||
return {
|
||||
'id': track.get('song_id') or track_id,
|
||||
'url': track_url,
|
||||
'title': title,
|
||||
'thumbnail': track.get('pic') or track.get('album_pic'),
|
||||
'duration': int_or_none(track.get('length')),
|
||||
'creator': track.get('artist', '').split(';')[0],
|
||||
'track': title,
|
||||
'album': track.get('album_name'),
|
||||
'artist': track.get('artist'),
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
||||
def _extract_tracks(self, item_id, typ=None):
|
||||
playlist = self._download_json(
|
||||
'%s/%s%s' % (self._API_BASE_URL, item_id, '/type/%s' % typ if typ else ''), item_id)
|
||||
return [
|
||||
self._extract_track(track, item_id)
|
||||
for track in playlist['data']['trackList']]
|
||||
|
||||
@staticmethod
|
||||
def _decrypt(origin):
|
||||
n = int(origin[0])
|
||||
origin = origin[1:]
|
||||
short_lenth = len(origin) // n
|
||||
long_num = len(origin) - short_lenth * n
|
||||
l = tuple()
|
||||
for i in range(0, n):
|
||||
length = short_lenth
|
||||
if i < long_num:
|
||||
length += 1
|
||||
l += (origin[0:length], )
|
||||
origin = origin[length:]
|
||||
ans = ''
|
||||
for i in range(0, short_lenth + 1):
|
||||
for j in range(0, n):
|
||||
if len(l[j]) > i:
|
||||
ans += l[j][i]
|
||||
return compat_urllib_parse_unquote(ans).replace('^', '0')
|
||||
|
||||
|
||||
class XiamiSongIE(XiamiBaseIE):
|
||||
IE_NAME = 'xiami:song'
|
||||
IE_DESC = '虾米音乐'
|
||||
_VALID_URL = r'https?://(?:www\.)?xiami\.com/song/(?P<id>[0-9]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.xiami.com/song/1775610518',
|
||||
'md5': '521dd6bea40fd5c9c69f913c232cb57e',
|
||||
'info_dict': {
|
||||
'id': '1775610518',
|
||||
'ext': 'mp3',
|
||||
'title': 'Woman',
|
||||
'thumbnail': r're:http://img\.xiami\.net/images/album/.*\.jpg',
|
||||
'duration': 265,
|
||||
'creator': 'HONNE',
|
||||
'track': 'Woman',
|
||||
'album': 'Woman',
|
||||
'artist': 'HONNE',
|
||||
'subtitles': {
|
||||
'origin': [{
|
||||
'ext': 'lrc',
|
||||
}],
|
||||
},
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.xiami.com/song/1775256504',
|
||||
'md5': '932a3abd45c6aa2b1fdbe028fcb4c4fc',
|
||||
'info_dict': {
|
||||
'id': '1775256504',
|
||||
'ext': 'mp3',
|
||||
'title': '悟空',
|
||||
'thumbnail': r're:http://img\.xiami\.net/images/album/.*\.jpg',
|
||||
'duration': 200,
|
||||
'creator': '戴荃',
|
||||
'track': '悟空',
|
||||
'album': '悟空',
|
||||
'artist': '戴荃',
|
||||
'subtitles': {
|
||||
'origin': [{
|
||||
'ext': 'lrc',
|
||||
}],
|
||||
},
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
return self._extract_tracks(self._match_id(url))[0]
|
||||
|
||||
|
||||
class XiamiPlaylistBaseIE(XiamiBaseIE):
|
||||
def _real_extract(self, url):
|
||||
item_id = self._match_id(url)
|
||||
return self.playlist_result(self._extract_tracks(item_id, self._TYPE), item_id)
|
||||
|
||||
|
||||
class XiamiAlbumIE(XiamiPlaylistBaseIE):
|
||||
IE_NAME = 'xiami:album'
|
||||
IE_DESC = '虾米音乐 - 专辑'
|
||||
_VALID_URL = r'https?://(?:www\.)?xiami\.com/album/(?P<id>[0-9]+)'
|
||||
_TYPE = '1'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.xiami.com/album/2100300444',
|
||||
'info_dict': {
|
||||
'id': '2100300444',
|
||||
},
|
||||
'playlist_count': 10,
|
||||
}, {
|
||||
'url': 'http://www.xiami.com/album/512288?spm=a1z1s.6843761.1110925389.6.hhE9p9',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
|
||||
class XiamiArtistIE(XiamiPlaylistBaseIE):
|
||||
IE_NAME = 'xiami:artist'
|
||||
IE_DESC = '虾米音乐 - 歌手'
|
||||
_VALID_URL = r'https?://(?:www\.)?xiami\.com/artist/(?P<id>[0-9]+)'
|
||||
_TYPE = '2'
|
||||
_TEST = {
|
||||
'url': 'http://www.xiami.com/artist/2132?spm=0.0.0.0.dKaScp',
|
||||
'info_dict': {
|
||||
'id': '2132',
|
||||
},
|
||||
'playlist_count': 20,
|
||||
}
|
||||
|
||||
|
||||
class XiamiCollectionIE(XiamiPlaylistBaseIE):
|
||||
IE_NAME = 'xiami:collection'
|
||||
IE_DESC = '虾米音乐 - 精选集'
|
||||
_VALID_URL = r'https?://(?:www\.)?xiami\.com/collect/(?P<id>[0-9]+)'
|
||||
_TYPE = '3'
|
||||
_TEST = {
|
||||
'url': 'http://www.xiami.com/collect/156527391?spm=a1z1s.2943601.6856193.12.4jpBnr',
|
||||
'info_dict': {
|
||||
'id': '156527391',
|
||||
},
|
||||
'playlist_mincount': 29,
|
||||
}
|
@ -389,23 +389,30 @@ class FFmpegEmbedSubtitlePP(FFmpegPostProcessor):
|
||||
class FFmpegMetadataPP(FFmpegPostProcessor):
|
||||
def run(self, info):
|
||||
metadata = {}
|
||||
if info.get('title') is not None:
|
||||
metadata['title'] = info['title']
|
||||
if info.get('upload_date') is not None:
|
||||
metadata['date'] = info['upload_date']
|
||||
if info.get('artist') is not None:
|
||||
metadata['artist'] = info['artist']
|
||||
elif info.get('uploader') is not None:
|
||||
metadata['artist'] = info['uploader']
|
||||
elif info.get('uploader_id') is not None:
|
||||
metadata['artist'] = info['uploader_id']
|
||||
if info.get('description') is not None:
|
||||
metadata['description'] = info['description']
|
||||
metadata['comment'] = info['description']
|
||||
if info.get('webpage_url') is not None:
|
||||
metadata['purl'] = info['webpage_url']
|
||||
if info.get('album') is not None:
|
||||
metadata['album'] = info['album']
|
||||
|
||||
def add(meta_list, info_list=None):
|
||||
if not info_list:
|
||||
info_list = meta_list
|
||||
if not isinstance(meta_list, (list, tuple)):
|
||||
meta_list = (meta_list,)
|
||||
if not isinstance(info_list, (list, tuple)):
|
||||
info_list = (info_list,)
|
||||
for info_f in info_list:
|
||||
if info.get(info_f) is not None:
|
||||
for meta_f in meta_list:
|
||||
metadata[meta_f] = info[info_f]
|
||||
break
|
||||
|
||||
add('title', ('track', 'title'))
|
||||
add('date', 'upload_date')
|
||||
add(('description', 'comment'), 'description')
|
||||
add('purl', 'webpage_url')
|
||||
add('track', 'track_number')
|
||||
add('artist', ('artist', 'creator', 'uploader', 'uploader_id'))
|
||||
add('genre')
|
||||
add('album')
|
||||
add('album_artist')
|
||||
add('disc', 'disc_number')
|
||||
|
||||
if not metadata:
|
||||
self._downloader.to_screen('[ffmpeg] There isn\'t any metadata to add')
|
||||
|
@ -1,3 +1,3 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
__version__ = '2016.04.24'
|
||||
__version__ = '2016.05.01'
|
||||
|
Loading…
x
Reference in New Issue
Block a user