Merge branch 'master' of https://github.com/rg3/youtube-dl into tumblr

This commit is contained in:
Oli Allen 2015-10-04 18:13:59 +01:00
commit 92e0aee3e4
14 changed files with 413 additions and 19 deletions

View File

@ -8,6 +8,35 @@ import re
ROOT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) ROOT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
README_FILE = os.path.join(ROOT_DIR, 'README.md') README_FILE = os.path.join(ROOT_DIR, 'README.md')
def filter_options(readme):
ret = ''
in_options = False
for line in readme.split('\n'):
if line.startswith('# '):
if line[2:].startswith('OPTIONS'):
in_options = True
else:
in_options = False
if in_options:
if line.lstrip().startswith('-'):
option, description = re.split(r'\s{2,}', line.lstrip())
split_option = option.split(' ')
if not split_option[-1].startswith('-'): # metavar
option = ' '.join(split_option[:-1] + ['*%s*' % split_option[-1]])
# Pandoc's definition_lists. See http://pandoc.org/README.html
# for more information.
ret += '\n%s\n: %s\n' % (option, description)
else:
ret += line.lstrip() + '\n'
else:
ret += line + '\n'
return ret
with io.open(README_FILE, encoding='utf-8') as f: with io.open(README_FILE, encoding='utf-8') as f:
readme = f.read() readme = f.read()
@ -26,6 +55,8 @@ readme = re.sub(r'(?s)^.*?(?=# DESCRIPTION)', '', readme)
readme = re.sub(r'\s+youtube-dl \[OPTIONS\] URL \[URL\.\.\.\]', '', readme) readme = re.sub(r'\s+youtube-dl \[OPTIONS\] URL \[URL\.\.\.\]', '', readme)
readme = PREFIX + readme readme = PREFIX + readme
readme = filter_options(readme)
if sys.version_info < (3, 0): if sys.version_info < (3, 0):
print(readme.encode('utf-8')) print(readme.encode('utf-8'))
else: else:

View File

@ -1232,13 +1232,20 @@ class YoutubeDL(object):
except (ValueError, OverflowError, OSError): except (ValueError, OverflowError, OSError):
pass pass
subtitles = info_dict.get('subtitles')
if subtitles:
for _, subtitle in subtitles.items():
for subtitle_format in subtitle:
if 'ext' not in subtitle_format:
subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
if self.params.get('listsubtitles', False): if self.params.get('listsubtitles', False):
if 'automatic_captions' in info_dict: if 'automatic_captions' in info_dict:
self.list_subtitles(info_dict['id'], info_dict.get('automatic_captions'), 'automatic captions') self.list_subtitles(info_dict['id'], info_dict.get('automatic_captions'), 'automatic captions')
self.list_subtitles(info_dict['id'], info_dict.get('subtitles'), 'subtitles') self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
return return
info_dict['requested_subtitles'] = self.process_subtitles( info_dict['requested_subtitles'] = self.process_subtitles(
info_dict['id'], info_dict.get('subtitles'), info_dict['id'], subtitles,
info_dict.get('automatic_captions')) info_dict.get('automatic_captions'))
# We now pick which formats have to be downloaded # We now pick which formats have to be downloaded

View File

@ -416,7 +416,7 @@ if hasattr(shutil, 'get_terminal_size'): # Python >= 3.3
else: else:
_terminal_size = collections.namedtuple('terminal_size', ['columns', 'lines']) _terminal_size = collections.namedtuple('terminal_size', ['columns', 'lines'])
def compat_get_terminal_size(): def compat_get_terminal_size(fallback=(80, 24)):
columns = compat_getenv('COLUMNS', None) columns = compat_getenv('COLUMNS', None)
if columns: if columns:
columns = int(columns) columns = int(columns)
@ -428,14 +428,20 @@ else:
else: else:
lines = None lines = None
if columns <= 0 or lines <= 0:
try: try:
sp = subprocess.Popen( sp = subprocess.Popen(
['stty', 'size'], ['stty', 'size'],
stdout=subprocess.PIPE, stderr=subprocess.PIPE) stdout=subprocess.PIPE, stderr=subprocess.PIPE)
out, err = sp.communicate() out, err = sp.communicate()
lines, columns = map(int, out.split()) _columns, _lines = map(int, out.split())
except Exception: except Exception:
pass _columns, _lines = _terminal_size(*fallback)
if columns <= 0:
columns = _columns
if lines <= 0:
lines = _lines
return _terminal_size(columns, lines) return _terminal_size(columns, lines)
try: try:

View File

@ -158,6 +158,7 @@ from .eroprofile import EroProfileIE
from .escapist import EscapistIE from .escapist import EscapistIE
from .espn import ESPNIE from .espn import ESPNIE
from .esri import EsriVideoIE from .esri import EsriVideoIE
from .europa import EuropaIE
from .everyonesmixtape import EveryonesMixtapeIE from .everyonesmixtape import EveryonesMixtapeIE
from .exfm import ExfmIE from .exfm import ExfmIE
from .expotv import ExpoTVIE from .expotv import ExpoTVIE
@ -294,6 +295,11 @@ from .lifenews import (
LifeNewsIE, LifeNewsIE,
LifeEmbedIE, LifeEmbedIE,
) )
from .limelight import (
LimelightMediaIE,
LimelightChannelIE,
LimelightChannelListIE,
)
from .liveleak import LiveLeakIE from .liveleak import LiveLeakIE
from .livestream import ( from .livestream import (
LivestreamIE, LivestreamIE,

View File

@ -165,6 +165,7 @@ class InfoExtractor(object):
with the "ext" entry and one of: with the "ext" entry and one of:
* "data": The subtitles file contents * "data": The subtitles file contents
* "url": A URL pointing to the subtitles file * "url": A URL pointing to the subtitles file
"ext" will be calculated from URL if missing
automatic_captions: Like 'subtitles', used by the YoutubeIE for automatic_captions: Like 'subtitles', used by the YoutubeIE for
automatically generated captions automatically generated captions
duration: Length of the video in seconds, as an integer. duration: Length of the video in seconds, as an integer.

View File

@ -10,7 +10,7 @@ from ..utils import (
class EngadgetIE(InfoExtractor): class EngadgetIE(InfoExtractor):
_VALID_URL = r'''(?x)https?://www.engadget.com/ _VALID_URL = r'''(?x)https?://www.engadget.com/
(?:video/5min/(?P<id>\d+)| (?:video(?:/5min)?/(?P<id>\d+)|
[\d/]+/.*?) [\d/]+/.*?)
''' '''

View File

@ -0,0 +1,93 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..compat import compat_urlparse
from ..utils import (
int_or_none,
orderedSet,
parse_duration,
qualities,
unified_strdate,
xpath_text
)
class EuropaIE(InfoExtractor):
_VALID_URL = r'https?://ec\.europa\.eu/avservices/(?:video/player|audio/audioDetails)\.cfm\?.*?\bref=(?P<id>[A-Za-z0-9-]+)'
_TESTS = [{
'url': 'http://ec.europa.eu/avservices/video/player.cfm?ref=I107758',
'md5': '574f080699ddd1e19a675b0ddf010371',
'info_dict': {
'id': 'I107758',
'ext': 'mp4',
'title': 'TRADE - Wikileaks on TTIP',
'description': 'NEW LIVE EC Midday press briefing of 11/08/2015',
'thumbnail': 're:^https?://.*\.jpg$',
'upload_date': '20150811',
'duration': 34,
'view_count': int,
'formats': 'mincount:3',
}
}, {
'url': 'http://ec.europa.eu/avservices/video/player.cfm?sitelang=en&ref=I107786',
'only_matching': True,
}, {
'url': 'http://ec.europa.eu/avservices/audio/audioDetails.cfm?ref=I-109295&sitelang=en',
'only_matching': True,
}]
def _real_extract(self, url):
video_id = self._match_id(url)
playlist = self._download_xml(
'http://ec.europa.eu/avservices/video/player/playlist.cfm?ID=%s' % video_id, video_id)
def get_item(type_, preference):
items = {}
for item in playlist.findall('./info/%s/item' % type_):
lang, label = xpath_text(item, 'lg', default=None), xpath_text(item, 'label', default=None)
if lang and label:
items[lang] = label.strip()
for p in preference:
if items.get(p):
return items[p]
query = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
preferred_lang = query.get('sitelang', ('en', ))[0]
preferred_langs = orderedSet((preferred_lang, 'en', 'int'))
title = get_item('title', preferred_langs) or video_id
description = get_item('description', preferred_langs)
thumbnmail = xpath_text(playlist, './info/thumburl', 'thumbnail')
upload_date = unified_strdate(xpath_text(playlist, './info/date', 'upload date'))
duration = parse_duration(xpath_text(playlist, './info/duration', 'duration'))
view_count = int_or_none(xpath_text(playlist, './info/views', 'views'))
language_preference = qualities(preferred_langs[::-1])
formats = []
for file_ in playlist.findall('./files/file'):
video_url = xpath_text(file_, './url')
if not video_url:
continue
lang = xpath_text(file_, './lg')
formats.append({
'url': video_url,
'format_id': lang,
'format_note': xpath_text(file_, './lglabel'),
'language_preference': language_preference(lang)
})
self._sort_formats(formats)
return {
'id': video_id,
'title': title,
'description': description,
'thumbnail': thumbnmail,
'upload_date': upload_date,
'duration': duration,
'view_count': view_count,
'formats': formats
}

View File

@ -0,0 +1,229 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
determine_ext,
float_or_none,
int_or_none,
)
class LimelightBaseIE(InfoExtractor):
_PLAYLIST_SERVICE_URL = 'http://production-ps.lvp.llnw.net/r/PlaylistService/%s/%s/%s'
_API_URL = 'http://api.video.limelight.com/rest/organizations/%s/%s/%s/%s.json'
def _call_playlist_service(self, item_id, method, fatal=True):
return self._download_json(
self._PLAYLIST_SERVICE_URL % (self._PLAYLIST_SERVICE_PATH, item_id, method),
item_id, 'Downloading PlaylistService %s JSON' % method, fatal=fatal)
def _call_api(self, organization_id, item_id, method):
return self._download_json(
self._API_URL % (organization_id, self._API_PATH, item_id, method),
item_id, 'Downloading API %s JSON' % method)
def _extract(self, item_id, pc_method, mobile_method, meta_method):
pc = self._call_playlist_service(item_id, pc_method)
metadata = self._call_api(pc['orgId'], item_id, meta_method)
mobile = self._call_playlist_service(item_id, mobile_method, fatal=False)
return pc, mobile, metadata
def _extract_info(self, streams, mobile_urls, properties):
video_id = properties['media_id']
formats = []
for stream in streams:
stream_url = stream.get('url')
if not stream_url:
continue
if '.f4m' in stream_url:
formats.extend(self._extract_f4m_formats(stream_url, video_id))
else:
fmt = {
'url': stream_url,
'abr': float_or_none(stream.get('audioBitRate')),
'vbr': float_or_none(stream.get('videoBitRate')),
'fps': float_or_none(stream.get('videoFrameRate')),
'width': int_or_none(stream.get('videoWidthInPixels')),
'height': int_or_none(stream.get('videoHeightInPixels')),
'ext': determine_ext(stream_url)
}
rtmp = re.search(r'^(?P<url>rtmpe?://[^/]+/(?P<app>.+))/(?P<playpath>mp4:.+)$', stream_url)
if rtmp:
format_id = 'rtmp'
if stream.get('videoBitRate'):
format_id += '-%d' % int_or_none(stream['videoBitRate'])
fmt.update({
'url': rtmp.group('url'),
'play_path': rtmp.group('playpath'),
'app': rtmp.group('app'),
'ext': 'flv',
'format_id': format_id,
})
formats.append(fmt)
for mobile_url in mobile_urls:
media_url = mobile_url.get('mobileUrl')
if not media_url:
continue
format_id = mobile_url.get('targetMediaPlatform')
if determine_ext(media_url) == 'm3u8':
formats.extend(self._extract_m3u8_formats(
media_url, video_id, 'mp4', entry_protocol='m3u8_native',
preference=-1, m3u8_id=format_id))
else:
formats.append({
'url': media_url,
'format_id': format_id,
'preference': -1,
})
self._sort_formats(formats)
title = properties['title']
description = properties.get('description')
timestamp = int_or_none(properties.get('publish_date') or properties.get('create_date'))
duration = float_or_none(properties.get('duration_in_milliseconds'), 1000)
filesize = int_or_none(properties.get('total_storage_in_bytes'))
categories = [properties.get('category')]
tags = properties.get('tags', [])
thumbnails = [{
'url': thumbnail['url'],
'width': int_or_none(thumbnail.get('width')),
'height': int_or_none(thumbnail.get('height')),
} for thumbnail in properties.get('thumbnails', []) if thumbnail.get('url')]
subtitles = {}
for caption in properties.get('captions', {}):
lang = caption.get('language_code')
subtitles_url = caption.get('url')
if lang and subtitles_url:
subtitles[lang] = [{
'url': subtitles_url,
}]
return {
'id': video_id,
'title': title,
'description': description,
'formats': formats,
'timestamp': timestamp,
'duration': duration,
'filesize': filesize,
'categories': categories,
'tags': tags,
'thumbnails': thumbnails,
'subtitles': subtitles,
}
class LimelightMediaIE(LimelightBaseIE):
IE_NAME = 'limelight'
_VALID_URL = r'(?:limelight:media:|http://link\.videoplatform\.limelight\.com/media/\??\bmediaId=)(?P<id>[a-z0-9]{32})'
_TESTS = [{
'url': 'http://link.videoplatform.limelight.com/media/?mediaId=3ffd040b522b4485b6d84effc750cd86',
'info_dict': {
'id': '3ffd040b522b4485b6d84effc750cd86',
'ext': 'flv',
'title': 'HaP and the HB Prince Trailer',
'description': 'md5:8005b944181778e313d95c1237ddb640',
'thumbnail': 're:^https?://.*\.jpeg$',
'duration': 144.23,
'timestamp': 1244136834,
'upload_date': '20090604',
},
'params': {
# rtmp download
'skip_download': True,
},
}, {
# video with subtitles
'url': 'limelight:media:a3e00274d4564ec4a9b29b9466432335',
'info_dict': {
'id': 'a3e00274d4564ec4a9b29b9466432335',
'ext': 'flv',
'title': '3Play Media Overview Video',
'description': '',
'thumbnail': 're:^https?://.*\.jpeg$',
'duration': 78.101,
'timestamp': 1338929955,
'upload_date': '20120605',
'subtitles': 'mincount:9',
},
'params': {
# rtmp download
'skip_download': True,
},
}]
_PLAYLIST_SERVICE_PATH = 'media'
_API_PATH = 'media'
def _real_extract(self, url):
video_id = self._match_id(url)
pc, mobile, metadata = self._extract(
video_id, 'getPlaylistByMediaId', 'getMobilePlaylistByMediaId', 'properties')
return self._extract_info(
pc['playlistItems'][0].get('streams', []),
mobile['mediaList'][0].get('mobileUrls', []) if mobile else [],
metadata)
class LimelightChannelIE(LimelightBaseIE):
IE_NAME = 'limelight:channel'
_VALID_URL = r'(?:limelight:channel:|http://link\.videoplatform\.limelight\.com/media/\??\bchannelId=)(?P<id>[a-z0-9]{32})'
_TEST = {
'url': 'http://link.videoplatform.limelight.com/media/?channelId=ab6a524c379342f9b23642917020c082',
'info_dict': {
'id': 'ab6a524c379342f9b23642917020c082',
'title': 'Javascript Sample Code',
},
'playlist_mincount': 3,
}
_PLAYLIST_SERVICE_PATH = 'channel'
_API_PATH = 'channels'
def _real_extract(self, url):
channel_id = self._match_id(url)
pc, mobile, medias = self._extract(
channel_id, 'getPlaylistByChannelId',
'getMobilePlaylistWithNItemsByChannelId?begin=0&count=-1', 'media')
entries = [
self._extract_info(
pc['playlistItems'][i].get('streams', []),
mobile['mediaList'][i].get('mobileUrls', []) if mobile else [],
medias['media_list'][i])
for i in range(len(medias['media_list']))]
return self.playlist_result(entries, channel_id, pc['title'])
class LimelightChannelListIE(LimelightBaseIE):
IE_NAME = 'limelight:channel_list'
_VALID_URL = r'(?:limelight:channel_list:|http://link\.videoplatform\.limelight\.com/media/\?.*?\bchannelListId=)(?P<id>[a-z0-9]{32})'
_TEST = {
'url': 'http://link.videoplatform.limelight.com/media/?channelListId=301b117890c4465c8179ede21fd92e2b',
'info_dict': {
'id': '301b117890c4465c8179ede21fd92e2b',
'title': 'Website - Hero Player',
},
'playlist_mincount': 2,
}
_PLAYLIST_SERVICE_PATH = 'channel_list'
def _real_extract(self, url):
channel_list_id = self._match_id(url)
channel_list = self._call_playlist_service(channel_list_id, 'getMobileChannelListById')
entries = [
self.url_result('limelight:channel:%s' % channel['id'], 'LimelightChannel')
for channel in channel_list['channelList']]
return self.playlist_result(entries, channel_list_id, channel_list['title'])

View File

@ -10,7 +10,6 @@ from ..compat import (
) )
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
clean_html,
) )

View File

@ -134,6 +134,24 @@ class PBSIE(InfoExtractor):
'params': { 'params': {
'skip_download': True, # requires ffmpeg 'skip_download': True, # requires ffmpeg
}, },
},
{
# Video embedded in iframe containing angle brackets as attribute's value (e.g.
# "<iframe style='position: absolute;<br />\ntop: 0; left: 0;' ...", see
# https://github.com/rg3/youtube-dl/issues/7059)
'url': 'http://www.pbs.org/food/features/a-chefs-life-season-3-episode-5-prickly-business/',
'info_dict': {
'id': '2365546844',
'display_id': 'a-chefs-life-season-3-episode-5-prickly-business',
'ext': 'mp4',
'title': "A Chef's Life - Season 3, Ep. 5: Prickly Business",
'description': 'md5:61db2ddf27c9912f09c241014b118ed1',
'duration': 1480,
'thumbnail': 're:^https?://.*\.jpg$',
},
'params': {
'skip_download': True, # requires ffmpeg
},
} }
] ]
@ -167,7 +185,7 @@ class PBSIE(InfoExtractor):
return media_id, presumptive_id, upload_date return media_id, presumptive_id, upload_date
url = self._search_regex( url = self._search_regex(
r'<iframe\s+[^>]*\s+src=["\']([^\'"]+partnerplayer[^\'"]+)["\']', r'(?s)<iframe[^>]+?(?:[a-z-]+?=["\'].*?["\'][^>]+?)*?\bsrc=["\']([^\'"]+partnerplayer[^\'"]+)["\']',
webpage, 'player URL') webpage, 'player URL')
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)

View File

@ -74,7 +74,7 @@ class RuutuIE(InfoExtractor):
preference = -1 if proto == 'rtmp' else 1 preference = -1 if proto == 'rtmp' else 1
label = child.get('label') label = child.get('label')
tbr = int_or_none(child.get('bitrate')) tbr = int_or_none(child.get('bitrate'))
width, height = [int_or_none(x) for x in child.get('resolution', '').split('x')] width, height = [int_or_none(x) for x in child.get('resolution', 'x').split('x')[:2]]
formats.append({ formats.append({
'format_id': '%s-%s' % (proto, label if label else tbr), 'format_id': '%s-%s' % (proto, label if label else tbr),
'url': video_url, 'url': video_url,

View File

@ -16,7 +16,7 @@ from ..utils import (
class TapelyIE(InfoExtractor): class TapelyIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?tape\.ly/(?P<id>[A-Za-z0-9\-_]+)(?:/(?P<songnr>\d+))?' _VALID_URL = r'https?://(?:www\.)?(?:tape\.ly|tapely\.com)/(?P<id>[A-Za-z0-9\-_]+)(?:/(?P<songnr>\d+))?'
_API_URL = 'http://tape.ly/showtape?id={0:}' _API_URL = 'http://tape.ly/showtape?id={0:}'
_S3_SONG_URL = 'http://mytape.s3.amazonaws.com/{0:}' _S3_SONG_URL = 'http://mytape.s3.amazonaws.com/{0:}'
_SOUNDCLOUD_SONG_URL = 'http://api.soundcloud.com{0:}' _SOUNDCLOUD_SONG_URL = 'http://api.soundcloud.com{0:}'
@ -42,6 +42,10 @@ class TapelyIE(InfoExtractor):
'ext': 'm4a', 'ext': 'm4a',
}, },
}, },
{
'url': 'https://tapely.com/my-grief-as-told-by-water',
'only_matching': True,
},
] ]
def _real_extract(self, url): def _real_extract(self, url):