Merge remote-tracking branch 'upstream/master'

This commit is contained in:
Jeff Buchbinder 2015-03-02 14:58:55 -05:00
commit 83cc03b926
41 changed files with 1176 additions and 232 deletions

View File

@ -112,3 +112,4 @@ Frans de Jonge
Robin de Rooij
Ryan Schmidt
Leslie P. Polzer
Duncan Keall

View File

@ -2,6 +2,7 @@ all: youtube-dl README.md CONTRIBUTING.md README.txt youtube-dl.1 youtube-dl.bas
clean:
rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish *.dump *.part *.info.json *.mp4 *.flv *.mp3 *.avi CONTRIBUTING.md.tmp youtube-dl youtube-dl.exe
find -name "*.pyc" -delete
PREFIX ?= /usr/local
BINDIR ?= $(PREFIX)/bin

View File

@ -139,6 +139,8 @@ which means you can modify it, redistribute it or use it however you like.
dislike_count <? 50 & description" .
--no-playlist If the URL refers to a video and a
playlist, download only the video.
--yes-playlist If the URL refers to a video and a
playlist, download the playlist.
--age-limit YEARS download only videos suitable for the given
age
--download-archive FILE Download only videos not listed in the
@ -406,6 +408,8 @@ which means you can modify it, redistribute it or use it however you like.
downloading, similar to find's -exec
syntax. Example: --exec 'adb push {}
/sdcard/Music/ && rm {}'
--convert-subtitles FORMAT Convert the subtitles to other format
(currently supported: srt|ass|vtt)
# CONFIGURATION
@ -525,6 +529,10 @@ YouTube requires an additional signature since September 2012 which is not suppo
In February 2015, the new YouTube player contained a character sequence in a string that was misinterpreted by old versions of youtube-dl. See [above](#how-do-i-update-youtube-dl) for how to update youtube-dl.
### HTTP Error 429: Too Many Requests or 402: Payment Required
These two error codes indicate that the service is blocking your IP address because of overuse. Contact the service and ask them to unblock your IP address, or - if you have acquired a whitelisted IP address already - use the [`--proxy` or `--network-address` options](#network-options) to select another IP address.
### SyntaxError: Non-ASCII character ###
The error

View File

@ -17,6 +17,7 @@
- **AdultSwim**
- **Aftenposten**
- **Aftonbladet**
- **AirMozilla**
- **AlJazeera**
- **Allocine**
- **AlphaPorno**
@ -209,6 +210,7 @@
- **Jove**
- **jpopsuki.tv**
- **Jukebox**
- **Kaltura**
- **Kankan**
- **Karaoketv**
- **keek**
@ -220,6 +222,9 @@
- **Ku6**
- **la7.tv**
- **Laola1Tv**
- **Letv**
- **LetvPlaylist**
- **LetvTv**
- **lifenews**: LIFE | NEWS
- **LiveLeak**
- **livestream**
@ -304,6 +309,7 @@
- **Nuvid**
- **NYTimes**
- **ocw.mit.edu**
- **Odnoklassniki**
- **OktoberfestTV**
- **on.aol.com**
- **Ooyala**
@ -330,6 +336,7 @@
- **PornoXO**
- **PromptFile**
- **prosiebensat1**: ProSiebenSat.1 Digital
- **Puls4**
- **Pyvideo**
- **QuickVid**
- **R7**
@ -408,7 +415,7 @@
- **StreamCZ**
- **StreetVoice**
- **SunPorno**
- **SVTPlay**
- **SVTPlay**: SVT Play and Öppet arkiv
- **SWRMediathek**
- **Syfy**
- **SztvHu**

View File

@ -85,8 +85,11 @@ class TestUtil(unittest.TestCase):
self.assertEqual(
sanitize_filename('New World record at 0:12:34'),
'New World record at 0_12_34')
self.assertEqual(sanitize_filename('--gasdgf'), '_-gasdgf')
self.assertEqual(sanitize_filename('--gasdgf', is_id=True), '--gasdgf')
self.assertEqual(sanitize_filename('.gasdgf'), 'gasdgf')
self.assertEqual(sanitize_filename('.gasdgf', is_id=True), '.gasdgf')
forbidden = '"\0\\/'
for fc in forbidden:
@ -246,6 +249,7 @@ class TestUtil(unittest.TestCase):
self.assertEqual(parse_duration('2.5 hours'), 9000)
self.assertEqual(parse_duration('02:03:04'), 7384)
self.assertEqual(parse_duration('01:02:03:04'), 93784)
self.assertEqual(parse_duration('1 hour 3 minutes'), 3780)
def test_fix_xml_ampersands(self):
self.assertEqual(

View File

@ -4,8 +4,10 @@
from __future__ import absolute_import, unicode_literals
import collections
import contextlib
import datetime
import errno
import fileinput
import io
import itertools
import json
@ -28,6 +30,7 @@ from .compat import (
compat_basestring,
compat_cookiejar,
compat_expanduser,
compat_get_terminal_size,
compat_http_client,
compat_kwargs,
compat_str,
@ -46,7 +49,6 @@ from .utils import (
ExtractorError,
format_bytes,
formatSeconds,
get_term_width,
locked_file,
make_HTTPS_handler,
MaxDownloadsReached,
@ -247,10 +249,10 @@ class YoutubeDL(object):
hls_prefer_native: Use the native HLS downloader instead of ffmpeg/avconv.
The following parameters are not used by YoutubeDL itself, they are used by
the FileDownloader:
the downloader (see youtube_dl/downloader/common.py):
nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
noresizebuffer, retries, continuedl, noprogress, consoletitle,
xattr_set_filesize.
xattr_set_filesize, external_downloader_args.
The following options are used by the post processors:
prefer_ffmpeg: If True, use ffmpeg instead of avconv if both are available,
@ -284,7 +286,7 @@ class YoutubeDL(object):
try:
import pty
master, slave = pty.openpty()
width = get_term_width()
width = compat_get_terminal_size().columns
if width is None:
width_args = []
else:
@ -1300,17 +1302,18 @@ class YoutubeDL(object):
# subtitles download errors are already managed as troubles in relevant IE
# that way it will silently go on when used with unsupporting IE
subtitles = info_dict['requested_subtitles']
ie = self.get_info_extractor(info_dict['extractor_key'])
for sub_lang, sub_info in subtitles.items():
sub_format = sub_info['ext']
if sub_info.get('data') is not None:
sub_data = sub_info['data']
else:
try:
uf = self.urlopen(sub_info['url'])
sub_data = uf.read().decode('utf-8')
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
sub_data = ie._download_webpage(
sub_info['url'], info_dict['id'], note=False)
except ExtractorError as err:
self.report_warning('Unable to download subtitle for "%s": %s' %
(sub_lang, compat_str(err)))
(sub_lang, compat_str(err.cause)))
continue
try:
sub_filename = subtitles_filename(filename, sub_lang, sub_format)
@ -1451,8 +1454,11 @@ class YoutubeDL(object):
return self._download_retcode
def download_with_info_file(self, info_filename):
with io.open(info_filename, 'r', encoding='utf-8') as f:
info = json.load(f)
with contextlib.closing(fileinput.FileInput(
[info_filename], mode='r',
openhook=fileinput.hook_encoded('utf-8'))) as f:
# FileInput doesn't have a read method, we can't call json.load
info = json.loads('\n'.join(f))
try:
self.process_ie_result(info, download=True)
except DownloadError:

View File

@ -9,6 +9,7 @@ import codecs
import io
import os
import random
import shlex
import sys
@ -170,6 +171,9 @@ def _real_main(argv=None):
if opts.recodevideo is not None:
if opts.recodevideo not in ['mp4', 'flv', 'webm', 'ogg', 'mkv']:
parser.error('invalid video recode format specified')
if opts.convertsubtitles is not None:
if opts.convertsubtitles not in ['srt', 'vtt', 'ass']:
parser.error('invalid subtitle format specified')
if opts.date is not None:
date = DateRange.day(opts.date)
@ -223,6 +227,11 @@ def _real_main(argv=None):
'key': 'FFmpegVideoConvertor',
'preferedformat': opts.recodevideo,
})
if opts.convertsubtitles:
postprocessors.append({
'key': 'FFmpegSubtitlesConvertor',
'format': opts.convertsubtitles,
})
if opts.embedsubtitles:
postprocessors.append({
'key': 'FFmpegEmbedSubtitle',
@ -247,6 +256,9 @@ def _real_main(argv=None):
xattr # Confuse flake8
except ImportError:
parser.error('setting filesize xattr requested but python-xattr is not available')
external_downloader_args = None
if opts.external_downloader_args:
external_downloader_args = shlex.split(opts.external_downloader_args)
match_filter = (
None if opts.match_filter is None
else match_filter_func(opts.match_filter))
@ -351,6 +363,7 @@ def _real_main(argv=None):
'no_color': opts.no_color,
'ffmpeg_location': opts.ffmpeg_location,
'hls_prefer_native': opts.hls_prefer_native,
'external_downloader_args': external_downloader_args,
}
with YoutubeDL(ydl_opts) as ydl:

View File

@ -1,9 +1,11 @@
from __future__ import unicode_literals
import collections
import getpass
import optparse
import os
import re
import shutil
import socket
import subprocess
import sys
@ -364,6 +366,33 @@ def workaround_optparse_bug9161():
return real_add_option(self, *bargs, **bkwargs)
optparse.OptionGroup.add_option = _compat_add_option
if hasattr(shutil, 'get_terminal_size'): # Python >= 3.3
compat_get_terminal_size = shutil.get_terminal_size
else:
_terminal_size = collections.namedtuple('terminal_size', ['columns', 'lines'])
def compat_get_terminal_size():
columns = compat_getenv('COLUMNS', None)
if columns:
columns = int(columns)
else:
columns = None
lines = compat_getenv('LINES', None)
if lines:
lines = int(lines)
else:
lines = None
try:
sp = subprocess.Popen(
['stty', 'size'],
stdout=subprocess.PIPE, stderr=subprocess.PIPE)
out, err = sp.communicate()
lines, columns = map(int, out.split())
except:
pass
return _terminal_size(columns, lines)
__all__ = [
'compat_HTTPError',
@ -371,6 +400,7 @@ __all__ = [
'compat_chr',
'compat_cookiejar',
'compat_expanduser',
'compat_get_terminal_size',
'compat_getenv',
'compat_getpass',
'compat_html_entities',

View File

@ -42,6 +42,8 @@ class FileDownloader(object):
max_filesize: Skip files larger than this size
xattr_set_filesize: Set ytdl.filesize user xattribute with expected size.
(experimenatal)
external_downloader_args: A list of additional command-line arguments for the
external downloader.
Subclasses of this one must re-define the real_download method.
"""

View File

@ -51,6 +51,13 @@ class ExternalFD(FileDownloader):
return []
return [command_option, source_address]
def _configuration_args(self, default=[]):
ex_args = self.params.get('external_downloader_args')
if ex_args is None:
return default
assert isinstance(ex_args, list)
return ex_args
def _call_downloader(self, tmpfilename, info_dict):
""" Either overwrite this or implement _make_cmd """
cmd = self._make_cmd(tmpfilename, info_dict)
@ -79,6 +86,7 @@ class CurlFD(ExternalFD):
for key, val in info_dict['http_headers'].items():
cmd += ['--header', '%s: %s' % (key, val)]
cmd += self._source_address('--interface')
cmd += self._configuration_args()
cmd += ['--', info_dict['url']]
return cmd
@ -89,15 +97,16 @@ class WgetFD(ExternalFD):
for key, val in info_dict['http_headers'].items():
cmd += ['--header', '%s: %s' % (key, val)]
cmd += self._source_address('--bind-address')
cmd += self._configuration_args()
cmd += ['--', info_dict['url']]
return cmd
class Aria2cFD(ExternalFD):
def _make_cmd(self, tmpfilename, info_dict):
cmd = [
self.exe, '-c',
'--min-split-size', '1M', '--max-connection-per-server', '4']
cmd = [self.exe, '-c']
cmd += self._configuration_args([
'--min-split-size', '1M', '--max-connection-per-server', '4'])
dn = os.path.dirname(tmpfilename)
if dn:
cmd += ['--dir', dn]

View File

@ -11,6 +11,7 @@ from .common import FileDownloader
from .http import HttpFD
from ..compat import (
compat_urlparse,
compat_urllib_error,
)
from ..utils import (
struct_pack,
@ -121,7 +122,8 @@ class FlvReader(io.BytesIO):
self.read_unsigned_int() # BootstrapinfoVersion
# Profile,Live,Update,Reserved
self.read(1)
flags = self.read_unsigned_char()
live = flags & 0x20 != 0
# time scale
self.read_unsigned_int()
# CurrentMediaTime
@ -160,6 +162,7 @@ class FlvReader(io.BytesIO):
return {
'segments': segments,
'fragments': fragments,
'live': live,
}
def read_bootstrap_info(self):
@ -182,6 +185,10 @@ def build_fragments_list(boot_info):
for segment, fragments_count in segment_run_table['segment_run']:
for _ in range(fragments_count):
res.append((segment, next(fragments_counter)))
if boot_info['live']:
res = res[-2:]
return res
@ -246,6 +253,38 @@ class F4mFD(FileDownloader):
self.report_error('Unsupported DRM')
return media
def _get_bootstrap_from_url(self, bootstrap_url):
bootstrap = self.ydl.urlopen(bootstrap_url).read()
return read_bootstrap_info(bootstrap)
def _update_live_fragments(self, bootstrap_url, latest_fragment):
fragments_list = []
retries = 30
while (not fragments_list) and (retries > 0):
boot_info = self._get_bootstrap_from_url(bootstrap_url)
fragments_list = build_fragments_list(boot_info)
fragments_list = [f for f in fragments_list if f[1] > latest_fragment]
if not fragments_list:
# Retry after a while
time.sleep(5.0)
retries -= 1
if not fragments_list:
self.report_error('Failed to update fragments')
return fragments_list
def _parse_bootstrap_node(self, node, base_url):
if node.text is None:
bootstrap_url = compat_urlparse.urljoin(
base_url, node.attrib['url'])
boot_info = self._get_bootstrap_from_url(bootstrap_url)
else:
bootstrap_url = None
bootstrap = base64.b64decode(node.text)
boot_info = read_bootstrap_info(bootstrap)
return (boot_info, bootstrap_url)
def real_download(self, filename, info_dict):
man_url = info_dict['url']
requested_bitrate = info_dict.get('tbr')
@ -265,18 +304,13 @@ class F4mFD(FileDownloader):
base_url = compat_urlparse.urljoin(man_url, media.attrib['url'])
bootstrap_node = doc.find(_add_ns('bootstrapInfo'))
if bootstrap_node.text is None:
bootstrap_url = compat_urlparse.urljoin(
base_url, bootstrap_node.attrib['url'])
bootstrap = self.ydl.urlopen(bootstrap_url).read()
else:
bootstrap = base64.b64decode(bootstrap_node.text)
boot_info, bootstrap_url = self._parse_bootstrap_node(bootstrap_node, base_url)
live = boot_info['live']
metadata_node = media.find(_add_ns('metadata'))
if metadata_node is not None:
metadata = base64.b64decode(metadata_node.text)
else:
metadata = None
boot_info = read_bootstrap_info(bootstrap)
fragments_list = build_fragments_list(boot_info)
if self.params.get('test', False):
@ -301,7 +335,8 @@ class F4mFD(FileDownloader):
(dest_stream, tmpfilename) = sanitize_open(tmpfilename, 'wb')
write_flv_header(dest_stream)
write_metadata_tag(dest_stream, metadata)
if not live:
write_metadata_tag(dest_stream, metadata)
# This dict stores the download progress, it's updated by the progress
# hook
@ -348,24 +383,45 @@ class F4mFD(FileDownloader):
http_dl.add_progress_hook(frag_progress_hook)
frags_filenames = []
for (seg_i, frag_i) in fragments_list:
while fragments_list:
seg_i, frag_i = fragments_list.pop(0)
name = 'Seg%d-Frag%d' % (seg_i, frag_i)
url = base_url + name
if akamai_pv:
url += '?' + akamai_pv.strip(';')
frag_filename = '%s-%s' % (tmpfilename, name)
success = http_dl.download(frag_filename, {'url': url})
if not success:
return False
with open(frag_filename, 'rb') as down:
down_data = down.read()
reader = FlvReader(down_data)
while True:
_, box_type, box_data = reader.read_box_info()
if box_type == b'mdat':
dest_stream.write(box_data)
break
frags_filenames.append(frag_filename)
try:
success = http_dl.download(frag_filename, {'url': url})
if not success:
return False
with open(frag_filename, 'rb') as down:
down_data = down.read()
reader = FlvReader(down_data)
while True:
_, box_type, box_data = reader.read_box_info()
if box_type == b'mdat':
dest_stream.write(box_data)
break
if live:
os.remove(frag_filename)
else:
frags_filenames.append(frag_filename)
except (compat_urllib_error.HTTPError, ) as err:
if live and (err.code == 404 or err.code == 410):
# We didn't keep up with the live window. Continue
# with the next available fragment.
msg = 'Fragment %d unavailable' % frag_i
self.report_warning(msg)
fragments_list = []
else:
raise
if not fragments_list and live and bootstrap_url:
fragments_list = self._update_live_fragments(bootstrap_url, frag_i)
total_frags += len(fragments_list)
if fragments_list and (fragments_list[0][1] > frag_i + 1):
msg = 'Missed %d fragments' % (fragments_list[0][1] - (frag_i + 1))
self.report_warning(msg)
dest_stream.close()

View File

@ -119,7 +119,9 @@ class RtmpFD(FileDownloader):
# Download using rtmpdump. rtmpdump returns exit code 2 when
# the connection was interrumpted and resuming appears to be
# possible. This is part of rtmpdump's normal usage, AFAIK.
basic_args = ['rtmpdump', '--verbose', '-r', url, '-o', tmpfilename]
basic_args = [
'rtmpdump', '--verbose', '-r', url,
'-o', encodeFilename(tmpfilename, True)]
if player_url is not None:
basic_args += ['--swfVfy', player_url]
if page_url is not None:

View File

@ -8,6 +8,7 @@ from .adobetv import AdobeTVIE
from .adultswim import AdultSwimIE
from .aftenposten import AftenpostenIE
from .aftonbladet import AftonbladetIE
from .airmozilla import AirMozillaIE
from .aljazeera import AlJazeeraIE
from .alphaporno import AlphaPornoIE
from .anitube import AnitubeIE
@ -226,6 +227,7 @@ from .jeuxvideo import JeuxVideoIE
from .jove import JoveIE
from .jukebox import JukeboxIE
from .jpopsukitv import JpopsukiIE
from .kaltura import KalturaIE
from .kankan import KankanIE
from .karaoketv import KaraoketvIE
from .keezmovies import KeezMoviesIE
@ -237,6 +239,11 @@ from .krasview import KrasViewIE
from .ku6 import Ku6IE
from .la7 import LA7IE
from .laola1tv import Laola1TvIE
from .letv import (
LetvIE,
LetvTvIE,
LetvPlaylistIE
)
from .lifenews import LifeNewsIE
from .liveleak import LiveLeakIE
from .livestream import (
@ -340,6 +347,7 @@ from .ntvde import NTVDeIE
from .ntvru import NTVRuIE
from .nytimes import NYTimesIE
from .nuvid import NuvidIE
from .odnoklassniki import OdnoklassnikiIE
from .oktoberfesttv import OktoberfestTVIE
from .ooyala import OoyalaIE
from .openfilm import OpenFilmIE
@ -367,6 +375,7 @@ from .pornotube import PornotubeIE
from .pornoxo import PornoXOIE
from .promptfile import PromptFileIE
from .prosiebensat1 import ProSiebenSat1IE
from .puls4 import Puls4IE
from .pyvideo import PyvideoIE
from .quickvid import QuickVidIE
from .r7 import R7IE

View File

@ -0,0 +1,74 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
int_or_none,
parse_duration,
parse_iso8601,
)
class AirMozillaIE(InfoExtractor):
_VALID_URL = r'https?://air\.mozilla\.org/(?P<id>[0-9a-z-]+)/?'
_TEST = {
'url': 'https://air.mozilla.org/privacy-lab-a-meetup-for-privacy-minded-people-in-san-francisco/',
'md5': '2e3e7486ba5d180e829d453875b9b8bf',
'info_dict': {
'id': '6x4q2w',
'ext': 'mp4',
'title': 'Privacy Lab - a meetup for privacy minded people in San Francisco',
'thumbnail': 're:https://\w+\.cloudfront\.net/6x4q2w/poster\.jpg\?t=\d+',
'description': 'Brings together privacy professionals and others interested in privacy at for-profits, non-profits, and NGOs in an effort to contribute to the state of the ecosystem...',
'timestamp': 1422487800,
'upload_date': '20150128',
'location': 'SFO Commons',
'duration': 3780,
'view_count': int,
'categories': ['Main'],
}
}
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
video_id = self._html_search_regex(r'//vid.ly/(.*?)/embed', webpage, 'id')
embed_script = self._download_webpage('https://vid.ly/{0}/embed'.format(video_id), video_id)
jwconfig = self._search_regex(r'\svar jwconfig = (\{.*?\});\s', embed_script, 'metadata')
metadata = self._parse_json(jwconfig, video_id)
formats = [{
'url': source['file'],
'ext': source['type'],
'format_id': self._search_regex(r'&format=(.*)$', source['file'], 'video format'),
'format': source['label'],
'height': int(source['label'].rstrip('p')),
} for source in metadata['playlist'][0]['sources']]
self._sort_formats(formats)
view_count = int_or_none(self._html_search_regex(
r'Views since archived: ([0-9]+)',
webpage, 'view count', fatal=False))
timestamp = parse_iso8601(self._html_search_regex(
r'<time datetime="(.*?)"', webpage, 'timestamp', fatal=False))
duration = parse_duration(self._search_regex(
r'Duration:\s*(\d+\s*hours?\s*\d+\s*minutes?)',
webpage, 'duration', fatal=False))
return {
'id': video_id,
'title': self._og_search_title(webpage),
'formats': formats,
'url': self._og_search_url(webpage),
'display_id': display_id,
'thumbnail': metadata['playlist'][0].get('image'),
'description': self._og_search_description(webpage),
'timestamp': timestamp,
'location': self._html_search_regex(r'Location: (.*)', webpage, 'location', default=None),
'duration': duration,
'view_count': view_count,
'categories': re.findall(r'<a href=".*?" class="channel">(.*?)</a>', webpage),
}

View File

@ -250,6 +250,8 @@ class ComedyCentralShowsIE(MTVServicesInfoExtractor):
})
self._sort_formats(formats)
subtitles = self._extract_subtitles(cdoc, guid)
virtual_id = show_name + ' ' + epTitle + ' part ' + compat_str(part_num + 1)
entries.append({
'id': guid,
@ -260,6 +262,7 @@ class ComedyCentralShowsIE(MTVServicesInfoExtractor):
'duration': duration,
'thumbnail': thumbnail,
'description': description,
'subtitles': subtitles,
})
return {

View File

@ -767,6 +767,10 @@ class InfoExtractor(object):
formats)
def _is_valid_url(self, url, video_id, item='video'):
url = self._proto_relative_url(url, scheme='http:')
# For now assume non HTTP(S) URLs always valid
if not (url.startswith('http://') or url.startswith('https://')):
return True
try:
self._request_webpage(url, video_id, 'Checking %s URL' % item)
return True
@ -921,39 +925,57 @@ class InfoExtractor(object):
formats = []
rtmp_count = 0
for video in smil.findall('./body/switch/video'):
src = video.get('src')
if not src:
continue
bitrate = int_or_none(video.get('system-bitrate') or video.get('systemBitrate'), 1000)
width = int_or_none(video.get('width'))
height = int_or_none(video.get('height'))
proto = video.get('proto')
if not proto:
if base:
if base.startswith('rtmp'):
proto = 'rtmp'
elif base.startswith('http'):
proto = 'http'
ext = video.get('ext')
if proto == 'm3u8':
formats.extend(self._extract_m3u8_formats(src, video_id, ext))
elif proto == 'rtmp':
rtmp_count += 1
streamer = video.get('streamer') or base
formats.append({
'url': streamer,
'play_path': src,
'ext': 'flv',
'format_id': 'rtmp-%d' % (rtmp_count if bitrate is None else bitrate),
'tbr': bitrate,
'width': width,
'height': height,
})
if smil.findall('./body/seq/video'):
video = smil.findall('./body/seq/video')[0]
fmts, rtmp_count = self._parse_smil_video(video, video_id, base, rtmp_count)
formats.extend(fmts)
else:
for video in smil.findall('./body/switch/video'):
fmts, rtmp_count = self._parse_smil_video(video, video_id, base, rtmp_count)
formats.extend(fmts)
self._sort_formats(formats)
return formats
def _parse_smil_video(self, video, video_id, base, rtmp_count):
src = video.get('src')
if not src:
return ([], rtmp_count)
bitrate = int_or_none(video.get('system-bitrate') or video.get('systemBitrate'), 1000)
width = int_or_none(video.get('width'))
height = int_or_none(video.get('height'))
proto = video.get('proto')
if not proto:
if base:
if base.startswith('rtmp'):
proto = 'rtmp'
elif base.startswith('http'):
proto = 'http'
ext = video.get('ext')
if proto == 'm3u8':
return (self._extract_m3u8_formats(src, video_id, ext), rtmp_count)
elif proto == 'rtmp':
rtmp_count += 1
streamer = video.get('streamer') or base
return ([{
'url': streamer,
'play_path': src,
'ext': 'flv',
'format_id': 'rtmp-%d' % (rtmp_count if bitrate is None else bitrate),
'tbr': bitrate,
'width': width,
'height': height,
}], rtmp_count)
elif proto.startswith('http'):
return ([{
'url': base + src,
'ext': ext or 'flv',
'tbr': bitrate,
'width': width,
'height': height,
}], rtmp_count)
def _live_title(self, name):
""" Generate the title for a live video """
now = datetime.datetime.now()

View File

@ -3,15 +3,18 @@ from __future__ import unicode_literals
from .common import InfoExtractor
from ..compat import (
compat_urllib_parse,
compat_urllib_request,
)
from ..utils import (
ExtractorError,
js_to_json,
parse_duration,
)
class EscapistIE(InfoExtractor):
_VALID_URL = r'https?://?(www\.)?escapistmagazine\.com/videos/view/[^/?#]+/(?P<id>[0-9]+)-[^/?#]*(?:$|[?#])'
_USER_AGENT = 'Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like Gecko'
_TEST = {
'url': 'http://www.escapistmagazine.com/videos/view/the-escapist-presents/6618-Breaking-Down-Baldurs-Gate',
'md5': 'ab3a706c681efca53f0a35f1415cf0d1',
@ -23,12 +26,15 @@ class EscapistIE(InfoExtractor):
'uploader': 'The Escapist Presents',
'title': "Breaking Down Baldur's Gate",
'thumbnail': 're:^https?://.*\.jpg$',
'duration': 264,
}
}
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
webpage_req = compat_urllib_request.Request(url)
webpage_req.add_header('User-Agent', self._USER_AGENT)
webpage = self._download_webpage(webpage_req, video_id)
uploader_id = self._html_search_regex(
r"<h1\s+class='headline'>\s*<a\s+href='/videos/view/(.*?)'",
@ -37,6 +43,7 @@ class EscapistIE(InfoExtractor):
r"<h1\s+class='headline'>(.*?)</a>",
webpage, 'uploader', fatal=False)
description = self._html_search_meta('description', webpage)
duration = parse_duration(self._html_search_meta('duration', webpage))
raw_title = self._html_search_meta('title', webpage, fatal=True)
title = raw_title.partition(' : ')[2]
@ -44,31 +51,42 @@ class EscapistIE(InfoExtractor):
config_url = compat_urllib_parse.unquote(self._html_search_regex(
r'''(?x)
(?:
<param\s+name="flashvars"\s+value="config=|
<param\s+name="flashvars".*?\s+value="config=|
flashvars=&quot;config=
)
([^"&]+)
(https?://[^"&]+)
''',
webpage, 'config URL'))
formats = []
ad_formats = []
def _add_format(name, cfgurl, quality):
def _add_format(name, cfg_url, quality):
cfg_req = compat_urllib_request.Request(cfg_url)
cfg_req.add_header('User-Agent', self._USER_AGENT)
config = self._download_json(
cfgurl, video_id,
cfg_req, video_id,
'Downloading ' + name + ' configuration',
'Unable to download ' + name + ' configuration',
transform_source=js_to_json)
playlist = config['playlist']
video_url = next(
p['url'] for p in playlist
if p.get('eventCategory') == 'Video')
formats.append({
'url': video_url,
'format_id': name,
'quality': quality,
})
for p in playlist:
if p.get('eventCategory') == 'Video':
ar = formats
elif p.get('eventCategory') == 'Video Postroll':
ar = ad_formats
else:
continue
ar.append({
'url': p['url'],
'format_id': name,
'quality': quality,
'http_headers': {
'User-Agent': self._USER_AGENT,
},
})
_add_format('normal', config_url, quality=0)
hq_url = (config_url +
@ -77,10 +95,12 @@ class EscapistIE(InfoExtractor):
_add_format('hq', hq_url, quality=1)
except ExtractorError:
pass # That's fine, we'll just use normal quality
self._sort_formats(formats)
return {
if '/escapist/sales-marketing/' in formats[-1]['url']:
raise ExtractorError('This IP address has been blocked by The Escapist', expected=True)
res = {
'id': video_id,
'formats': formats,
'uploader': uploader,
@ -88,4 +108,21 @@ class EscapistIE(InfoExtractor):
'title': title,
'thumbnail': self._og_search_thumbnail(webpage),
'description': description,
'duration': duration,
}
if self._downloader.params.get('include_ads') and ad_formats:
self._sort_formats(ad_formats)
ad_res = {
'id': '%s-ad' % video_id,
'title': '%s (Postroll)' % title,
'formats': ad_formats,
}
return {
'_type': 'playlist',
'entries': [res, ad_res],
'title': title,
'id': video_id,
}
return res

View File

@ -1,6 +1,8 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
int_or_none,
@ -31,7 +33,7 @@ class GameStarIE(InfoExtractor):
webpage = self._download_webpage(url, video_id)
og_title = self._og_search_title(webpage)
title = og_title.replace(' - Video bei GameStar.de', '').strip()
title = re.sub(r'\s*- Video (bei|-) GameStar\.de$', '', og_title)
url = 'http://gamestar.de/_misc/videos/portal/getVideoUrl.cfm?premium=0&videoId=' + video_id

View File

@ -26,6 +26,7 @@ from ..utils import (
unsmuggle_url,
UnsupportedError,
url_basename,
xpath_text,
)
from .brightcove import BrightcoveIE
from .ooyala import OoyalaIE
@ -557,6 +558,28 @@ class GenericIE(InfoExtractor):
'title': 'EP3S5 - Bon Appétit - Baqueira Mi Corazon !',
}
},
# Kaltura embed
{
'url': 'http://www.monumentalnetwork.com/videos/john-carlson-postgame-2-25-15',
'info_dict': {
'id': '1_eergr3h1',
'ext': 'mp4',
'upload_date': '20150226',
'uploader_id': 'MonumentalSports-Kaltura@perfectsensedigital.com',
'timestamp': int,
'title': 'John Carlson Postgame 2/25/15',
},
},
# RSS feed with enclosure
{
'url': 'http://podcastfeeds.nbcnews.com/audio/podcast/MSNBC-MADDOW-NETCAST-M4V.xml',
'info_dict': {
'id': 'pdv_maddow_netcast_m4v-02-27-2015-201624',
'ext': 'm4v',
'upload_date': '20150228',
'title': 'pdv_maddow_netcast_m4v-02-27-2015-201624',
}
}
]
def report_following_redirect(self, new_url):
@ -568,11 +591,24 @@ class GenericIE(InfoExtractor):
playlist_desc_el = doc.find('./channel/description')
playlist_desc = None if playlist_desc_el is None else playlist_desc_el.text
entries = [{
'_type': 'url',
'url': e.find('link').text,
'title': e.find('title').text,
} for e in doc.findall('./channel/item')]
entries = []
for it in doc.findall('./channel/item'):
next_url = xpath_text(it, 'link', fatal=False)
if not next_url:
enclosure_nodes = it.findall('./enclosure')
for e in enclosure_nodes:
next_url = e.attrib.get('url')
if next_url:
break
if not next_url:
continue
entries.append({
'_type': 'url',
'url': next_url,
'title': it.find('title').text,
})
return {
'_type': 'playlist',
@ -1113,6 +1149,12 @@ class GenericIE(InfoExtractor):
if mobj is not None:
return self.url_result(mobj.group('url'), 'Zapiks')
# Look for Kaltura embeds
mobj = re.search(
r"(?s)kWidget\.(?:thumb)?[Ee]mbed\(\{.*?'wid'\s*:\s*'_?(?P<partner_id>[^']+)',.*?'entry_id'\s*:\s*'(?P<id>[^']+)',", webpage)
if mobj is not None:
return self.url_result('kaltura:%(partner_id)s:%(id)s' % mobj.groupdict(), 'Kaltura')
def check_video(vurl):
if YoutubeIE.suitable(vurl):
return True
@ -1208,7 +1250,9 @@ class GenericIE(InfoExtractor):
return entries[0]
else:
for num, e in enumerate(entries, start=1):
e['title'] = '%s (%d)' % (e['title'], num)
# 'url' results don't have a title
if e.get('title') is not None:
e['title'] = '%s (%d)' % (e['title'], num)
return {
'_type': 'playlist',
'entries': entries,

View File

@ -0,0 +1,138 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..compat import compat_urllib_parse
from ..utils import (
ExtractorError,
int_or_none,
)
class KalturaIE(InfoExtractor):
_VALID_URL = r'''(?x)
(?:kaltura:|
https?://(:?(?:www|cdnapisec)\.)?kaltura\.com/index\.php/kwidget/(?:[^/]+/)*?wid/_
)(?P<partner_id>\d+)
(?::|
/(?:[^/]+/)*?entry_id/
)(?P<id>[0-9a-z_]+)'''
_API_BASE = 'http://cdnapi.kaltura.com/api_v3/index.php?'
_TESTS = [
{
'url': 'kaltura:269692:1_1jc2y3e4',
'md5': '3adcbdb3dcc02d647539e53f284ba171',
'info_dict': {
'id': '1_1jc2y3e4',
'ext': 'mp4',
'title': 'Track 4',
'upload_date': '20131219',
'uploader_id': 'mlundberg@wolfgangsvault.com',
'description': 'The Allman Brothers Band, 12/16/1981',
'thumbnail': 're:^https?://.*/thumbnail/.*',
'timestamp': int,
},
},
{
'url': 'http://www.kaltura.com/index.php/kwidget/cache_st/1300318621/wid/_269692/uiconf_id/3873291/entry_id/1_1jc2y3e4',
'only_matching': True,
},
{
'url': 'https://cdnapisec.kaltura.com/index.php/kwidget/wid/_557781/uiconf_id/22845202/entry_id/1_plr1syf3',
'only_matching': True,
},
]
def _kaltura_api_call(self, video_id, actions, *args, **kwargs):
params = actions[0]
if len(actions) > 1:
for i, a in enumerate(actions[1:], start=1):
for k, v in a.items():
params['%d:%s' % (i, k)] = v
query = compat_urllib_parse.urlencode(params)
url = self._API_BASE + query
data = self._download_json(url, video_id, *args, **kwargs)
status = data if len(actions) == 1 else data[0]
if status.get('objectType') == 'KalturaAPIException':
raise ExtractorError(
'%s said: %s' % (self.IE_NAME, status['message']))
return data
def _get_kaltura_signature(self, video_id, partner_id):
actions = [{
'apiVersion': '3.1',
'expiry': 86400,
'format': 1,
'service': 'session',
'action': 'startWidgetSession',
'widgetId': '_%s' % partner_id,
}]
return self._kaltura_api_call(
video_id, actions, note='Downloading Kaltura signature')['ks']
def _get_video_info(self, video_id, partner_id):
signature = self._get_kaltura_signature(video_id, partner_id)
actions = [
{
'action': 'null',
'apiVersion': '3.1.5',
'clientTag': 'kdp:v3.8.5',
'format': 1, # JSON, 2 = XML, 3 = PHP
'service': 'multirequest',
'ks': signature,
},
{
'action': 'get',
'entryId': video_id,
'service': 'baseentry',
'version': '-1',
},
{
'action': 'getContextData',
'contextDataParams:objectType': 'KalturaEntryContextDataParams',
'contextDataParams:referrer': 'http://www.kaltura.com/',
'contextDataParams:streamerType': 'http',
'entryId': video_id,
'service': 'baseentry',
},
]
return self._kaltura_api_call(
video_id, actions, note='Downloading video info JSON')
def _real_extract(self, url):
video_id = self._match_id(url)
mobj = re.match(self._VALID_URL, url)
partner_id, entry_id = mobj.group('partner_id'), mobj.group('id')
info, source_data = self._get_video_info(entry_id, partner_id)
formats = [{
'format_id': '%(fileExt)s-%(bitrate)s' % f,
'ext': f['fileExt'],
'tbr': f['bitrate'],
'fps': f.get('frameRate'),
'filesize_approx': int_or_none(f.get('size'), invscale=1024),
'container': f.get('containerFormat'),
'vcodec': f.get('videoCodecId'),
'height': f.get('height'),
'width': f.get('width'),
'url': '%s/flavorId/%s' % (info['dataUrl'], f['id']),
} for f in source_data['flavorAssets']]
self._sort_formats(formats)
return {
'id': video_id,
'title': info['name'],
'formats': formats,
'description': info.get('description'),
'thumbnail': info.get('thumbnailUrl'),
'duration': info.get('duration'),
'timestamp': info.get('createdAt'),
'uploader_id': info.get('userId'),
'view_count': info.get('plays'),
}

View File

@ -27,8 +27,6 @@ class Laola1TvIE(InfoExtractor):
}
}
_BROKEN = True # Not really - extractor works fine, but f4m downloader does not support live streams yet.
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
@ -57,11 +55,7 @@ class Laola1TvIE(InfoExtractor):
title = xpath_text(hd_doc, './/video/title', fatal=True)
flash_url = xpath_text(hd_doc, './/video/url', fatal=True)
uploader = xpath_text(hd_doc, './/video/meta_organistation')
is_live = xpath_text(hd_doc, './/video/islive') == 'true'
if is_live:
raise ExtractorError(
'Live streams are not supported by the f4m downloader.')
categories = xpath_text(hd_doc, './/video/meta_sports')
if categories:

View File

@ -0,0 +1,190 @@
# coding: utf-8
from __future__ import unicode_literals
import datetime
import re
import time
from .common import InfoExtractor
from ..compat import (
compat_urlparse,
compat_urllib_parse,
)
from ..utils import (
determine_ext,
ExtractorError,
parse_iso8601,
)
class LetvIE(InfoExtractor):
_VALID_URL = r'http://www\.letv\.com/ptv/vplay/(?P<id>\d+).html'
_TESTS = [{
'url': 'http://www.letv.com/ptv/vplay/22005890.html',
'md5': 'cab23bd68d5a8db9be31c9a222c1e8df',
'info_dict': {
'id': '22005890',
'ext': 'mp4',
'title': '第87届奥斯卡颁奖礼完美落幕 《鸟人》成最大赢家',
'timestamp': 1424747397,
'upload_date': '20150224',
'description': 'md5:a9cb175fd753e2962176b7beca21a47c',
}
}, {
'url': 'http://www.letv.com/ptv/vplay/1415246.html',
'info_dict': {
'id': '1415246',
'ext': 'mp4',
'title': '美人天下01',
'description': 'md5:f88573d9d7225ada1359eaf0dbf8bcda',
},
'expected_warnings': [
'publish time'
]
}]
# http://www.letv.com/ptv/vplay/1118082.html
# This video is available only in Mainland China
@staticmethod
def urshift(val, n):
return val >> n if val >= 0 else (val + 0x100000000) >> n
# ror() and calc_time_key() are reversed from a embedded swf file in KLetvPlayer.swf
def ror(self, param1, param2):
_loc3_ = 0
while _loc3_ < param2:
param1 = self.urshift(param1, 1) + ((param1 & 1) << 31)
_loc3_ += 1
return param1
def calc_time_key(self, param1):
_loc2_ = 773625421
_loc3_ = self.ror(param1, _loc2_ % 13)
_loc3_ = _loc3_ ^ _loc2_
_loc3_ = self.ror(_loc3_, _loc2_ % 17)
return _loc3_
def _real_extract(self, url):
media_id = self._match_id(url)
page = self._download_webpage(url, media_id)
params = {
'id': media_id,
'platid': 1,
'splatid': 101,
'format': 1,
'tkey': self.calc_time_key(int(time.time())),
'domain': 'www.letv.com'
}
play_json = self._download_json(
'http://api.letv.com/mms/out/video/playJson?' + compat_urllib_parse.urlencode(params),
media_id, 'playJson data')
# Check for errors
playstatus = play_json['playstatus']
if playstatus['status'] == 0:
flag = playstatus['flag']
if flag == 1:
msg = 'Country %s auth error' % playstatus['country']
else:
msg = 'Generic error. flag = %d' % flag
raise ExtractorError(msg, expected=True)
playurl = play_json['playurl']
formats = ['350', '1000', '1300', '720p', '1080p']
dispatch = playurl['dispatch']
urls = []
for format_id in formats:
if format_id in dispatch:
media_url = playurl['domain'][0] + dispatch[format_id][0]
# Mimic what flvxz.com do
url_parts = list(compat_urlparse.urlparse(media_url))
qs = dict(compat_urlparse.parse_qs(url_parts[4]))
qs.update({
'platid': '14',
'splatid': '1401',
'tss': 'no',
'retry': 1
})
url_parts[4] = compat_urllib_parse.urlencode(qs)
media_url = compat_urlparse.urlunparse(url_parts)
url_info_dict = {
'url': media_url,
'ext': determine_ext(dispatch[format_id][1])
}
if format_id[-1:] == 'p':
url_info_dict['height'] = format_id[:-1]
urls.append(url_info_dict)
publish_time = parse_iso8601(self._html_search_regex(
r'发布时间&nbsp;([^<>]+) ', page, 'publish time', fatal=False),
delimiter=' ', timezone=datetime.timedelta(hours=8))
description = self._html_search_meta('description', page, fatal=False)
return {
'id': media_id,
'formats': urls,
'title': playurl['title'],
'thumbnail': playurl['pic'],
'description': description,
'timestamp': publish_time,
}
class LetvTvIE(InfoExtractor):
_VALID_URL = r'http://www.letv.com/tv/(?P<id>\d+).html'
_TESTS = [{
'url': 'http://www.letv.com/tv/46177.html',
'info_dict': {
'id': '46177',
'title': '美人天下',
'description': 'md5:395666ff41b44080396e59570dbac01c'
},
'playlist_count': 35
}]
def _real_extract(self, url):
playlist_id = self._match_id(url)
page = self._download_webpage(url, playlist_id)
media_urls = list(set(re.findall(
r'http://www.letv.com/ptv/vplay/\d+.html', page)))
entries = [self.url_result(media_url, ie='Letv')
for media_url in media_urls]
title = self._html_search_meta('keywords', page,
fatal=False).split('')[0]
description = self._html_search_meta('description', page, fatal=False)
return self.playlist_result(entries, playlist_id, playlist_title=title,
playlist_description=description)
class LetvPlaylistIE(LetvTvIE):
_VALID_URL = r'http://tv.letv.com/[a-z]+/(?P<id>[a-z]+)/index.s?html'
_TESTS = [{
'url': 'http://tv.letv.com/izt/wuzetian/index.html',
'info_dict': {
'id': 'wuzetian',
'title': '武媚娘传奇',
'description': 'md5:e12499475ab3d50219e5bba00b3cb248'
},
# This playlist contains some extra videos other than the drama itself
'playlist_mincount': 96
}, {
'url': 'http://tv.letv.com/pzt/lswjzzjc/index.shtml',
'info_dict': {
'id': 'lswjzzjc',
# The title should be "劲舞青春", but I can't find a simple way to
# determine the playlist title
'title': '乐视午间自制剧场',
'description': 'md5:b1eef244f45589a7b5b1af9ff25a4489'
},
'playlist_mincount': 7
}]

View File

@ -15,19 +15,73 @@ from ..utils import (
)
class LyndaIE(InfoExtractor):
class LyndaBaseIE(InfoExtractor):
_LOGIN_URL = 'https://www.lynda.com/login/login.aspx'
_SUCCESSFUL_LOGIN_REGEX = r'isLoggedIn: true'
_ACCOUNT_CREDENTIALS_HINT = 'Use --username and --password options to provide lynda.com account credentials.'
def _real_initialize(self):
self._login()
def _login(self):
(username, password) = self._get_login_info()
if username is None:
return
login_form = {
'username': username,
'password': password,
'remember': 'false',
'stayPut': 'false'
}
request = compat_urllib_request.Request(
self._LOGIN_URL, compat_urllib_parse.urlencode(login_form))
login_page = self._download_webpage(
request, None, 'Logging in as %s' % username)
# Not (yet) logged in
m = re.search(r'loginResultJson = \'(?P<json>[^\']+)\';', login_page)
if m is not None:
response = m.group('json')
response_json = json.loads(response)
state = response_json['state']
if state == 'notlogged':
raise ExtractorError(
'Unable to login, incorrect username and/or password',
expected=True)
# This is when we get popup:
# > You're already logged in to lynda.com on two devices.
# > If you log in here, we'll log you out of another device.
# So, we need to confirm this.
if state == 'conflicted':
confirm_form = {
'username': '',
'password': '',
'resolve': 'true',
'remember': 'false',
'stayPut': 'false',
}
request = compat_urllib_request.Request(
self._LOGIN_URL, compat_urllib_parse.urlencode(confirm_form))
login_page = self._download_webpage(
request, None,
'Confirming log in and log out from another device')
if re.search(self._SUCCESSFUL_LOGIN_REGEX, login_page) is None:
raise ExtractorError('Unable to log in')
class LyndaIE(LyndaBaseIE):
IE_NAME = 'lynda'
IE_DESC = 'lynda.com videos'
_VALID_URL = r'https?://www\.lynda\.com/[^/]+/[^/]+/\d+/(\d+)-\d\.html'
_LOGIN_URL = 'https://www.lynda.com/login/login.aspx'
_VALID_URL = r'https?://www\.lynda\.com/(?:[^/]+/[^/]+/\d+|player/embed)/(?P<id>\d+)'
_NETRC_MACHINE = 'lynda'
_SUCCESSFUL_LOGIN_REGEX = r'isLoggedIn: true'
_TIMECODE_REGEX = r'\[(?P<timecode>\d+:\d+:\d+[\.,]\d+)\]'
ACCOUNT_CREDENTIALS_HINT = 'Use --username and --password options to provide lynda.com account credentials.'
_TEST = {
_TESTS = [{
'url': 'http://www.lynda.com/Bootstrap-tutorials/Using-exercise-files/110885/114408-4.html',
'md5': 'ecfc6862da89489161fb9cd5f5a6fac1',
'info_dict': {
@ -36,25 +90,27 @@ class LyndaIE(InfoExtractor):
'title': 'Using the exercise files',
'duration': 68
}
}
def _real_initialize(self):
self._login()
}, {
'url': 'https://www.lynda.com/player/embed/133770?tr=foo=1;bar=g;fizz=rt&fs=0',
'only_matching': True,
}]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group(1)
video_id = self._match_id(url)
page = self._download_webpage('http://www.lynda.com/ajax/player?videoId=%s&type=video' % video_id, video_id,
'Downloading video JSON')
page = self._download_webpage(
'http://www.lynda.com/ajax/player?videoId=%s&type=video' % video_id,
video_id, 'Downloading video JSON')
video_json = json.loads(page)
if 'Status' in video_json:
raise ExtractorError('lynda returned error: %s' % video_json['Message'], expected=True)
raise ExtractorError(
'lynda returned error: %s' % video_json['Message'], expected=True)
if video_json['HasAccess'] is False:
raise ExtractorError(
'Video %s is only available for members. ' % video_id + self.ACCOUNT_CREDENTIALS_HINT, expected=True)
'Video %s is only available for members. '
% video_id + self._ACCOUNT_CREDENTIALS_HINT, expected=True)
video_id = compat_str(video_json['ID'])
duration = video_json['DurationInSeconds']
@ -97,50 +153,9 @@ class LyndaIE(InfoExtractor):
'formats': formats
}
def _login(self):
(username, password) = self._get_login_info()
if username is None:
return
login_form = {
'username': username,
'password': password,
'remember': 'false',
'stayPut': 'false'
}
request = compat_urllib_request.Request(self._LOGIN_URL, compat_urllib_parse.urlencode(login_form))
login_page = self._download_webpage(request, None, 'Logging in as %s' % username)
# Not (yet) logged in
m = re.search(r'loginResultJson = \'(?P<json>[^\']+)\';', login_page)
if m is not None:
response = m.group('json')
response_json = json.loads(response)
state = response_json['state']
if state == 'notlogged':
raise ExtractorError('Unable to login, incorrect username and/or password', expected=True)
# This is when we get popup:
# > You're already logged in to lynda.com on two devices.
# > If you log in here, we'll log you out of another device.
# So, we need to confirm this.
if state == 'conflicted':
confirm_form = {
'username': '',
'password': '',
'resolve': 'true',
'remember': 'false',
'stayPut': 'false',
}
request = compat_urllib_request.Request(self._LOGIN_URL, compat_urllib_parse.urlencode(confirm_form))
login_page = self._download_webpage(request, None, 'Confirming log in and log out from another device')
if re.search(self._SUCCESSFUL_LOGIN_REGEX, login_page) is None:
raise ExtractorError('Unable to log in')
def _fix_subtitles(self, subs):
srt = ''
seq_counter = 0
for pos in range(0, len(subs) - 1):
seq_current = subs[pos]
m_current = re.match(self._TIMECODE_REGEX, seq_current['Timecode'])
@ -152,8 +167,10 @@ class LyndaIE(InfoExtractor):
continue
appear_time = m_current.group('timecode')
disappear_time = m_next.group('timecode')
text = seq_current['Caption']
srt += '%s\r\n%s --> %s\r\n%s' % (str(pos), appear_time, disappear_time, text)
text = seq_current['Caption'].strip()
if text:
seq_counter += 1
srt += '%s\r\n%s --> %s\r\n%s\r\n\r\n' % (seq_counter, appear_time, disappear_time, text)
if srt:
return srt
@ -166,7 +183,7 @@ class LyndaIE(InfoExtractor):
return {}
class LyndaCourseIE(InfoExtractor):
class LyndaCourseIE(LyndaBaseIE):
IE_NAME = 'lynda:course'
IE_DESC = 'lynda.com online courses'
@ -179,35 +196,37 @@ class LyndaCourseIE(InfoExtractor):
course_path = mobj.group('coursepath')
course_id = mobj.group('courseid')
page = self._download_webpage('http://www.lynda.com/ajax/player?courseId=%s&type=course' % course_id,
course_id, 'Downloading course JSON')
page = self._download_webpage(
'http://www.lynda.com/ajax/player?courseId=%s&type=course' % course_id,
course_id, 'Downloading course JSON')
course_json = json.loads(page)
if 'Status' in course_json and course_json['Status'] == 'NotFound':
raise ExtractorError('Course %s does not exist' % course_id, expected=True)
raise ExtractorError(
'Course %s does not exist' % course_id, expected=True)
unaccessible_videos = 0
videos = []
(username, _) = self._get_login_info()
# Might want to extract videos right here from video['Formats'] as it seems 'Formats' is not provided
# by single video API anymore
for chapter in course_json['Chapters']:
for video in chapter['Videos']:
if username is None and video['HasAccess'] is False:
if video['HasAccess'] is False:
unaccessible_videos += 1
continue
videos.append(video['ID'])
if unaccessible_videos > 0:
self._downloader.report_warning('%s videos are only available for members and will not be downloaded. '
% unaccessible_videos + LyndaIE.ACCOUNT_CREDENTIALS_HINT)
self._downloader.report_warning(
'%s videos are only available for members (or paid members) and will not be downloaded. '
% unaccessible_videos + self._ACCOUNT_CREDENTIALS_HINT)
entries = [
self.url_result('http://www.lynda.com/%s/%s-4.html' %
(course_path, video_id),
'Lynda')
self.url_result(
'http://www.lynda.com/%s/%s-4.html' % (course_path, video_id),
'Lynda')
for video_id in videos]
course_title = course_json['Title']

View File

@ -18,7 +18,7 @@ class MiTeleIE(InfoExtractor):
IE_NAME = 'mitele.es'
_VALID_URL = r'http://www\.mitele\.es/[^/]+/[^/]+/[^/]+/(?P<id>[^/]+)/'
_TEST = {
_TESTS = [{
'url': 'http://www.mitele.es/programas-tv/diario-de/la-redaccion/programa-144/',
'md5': '6a75fe9d0d3275bead0cb683c616fddb',
'info_dict': {
@ -29,7 +29,7 @@ class MiTeleIE(InfoExtractor):
'display_id': 'programa-144',
'duration': 2913,
},
}
}]
def _real_extract(self, url):
episode = self._match_id(url)

View File

@ -5,7 +5,7 @@ from ..utils import int_or_none
class MporaIE(InfoExtractor):
_VALID_URL = r'https?://(www\.)?mpora\.(?:com|de)/videos/(?P<id>[^?#/]+)'
_VALID_URL = r'https?://(?:www\.)?mpora\.(?:com|de)/videos/(?P<id>[^?#/]+)'
IE_NAME = 'MPORA'
_TEST = {
@ -25,7 +25,9 @@ class MporaIE(InfoExtractor):
webpage = self._download_webpage(url, video_id)
data_json = self._search_regex(
r"new FM\.Player\('[^']+',\s*(\{.*?)\).player;", webpage, 'json')
[r"new FM\.Player\('[^']+',\s*(\{.*?)\).player;",
r"new\s+FM\.Kaltura\.Player\('[^']+'\s*,\s*({.+?})\);"],
webpage, 'json')
data = self._parse_json(data_json, video_id)
uploader = data['info_overlay'].get('username')

View File

@ -3,17 +3,13 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
parse_duration,
unified_strdate,
)
class MusicVaultIE(InfoExtractor):
_VALID_URL = r'https?://www\.musicvault\.com/(?P<uploader_id>[^/?#]*)/video/(?P<display_id>[^/?#]*)_(?P<id>[0-9]+)\.html'
_TEST = {
'url': 'http://www.musicvault.com/the-allman-brothers-band/video/straight-from-the-heart_1010863.html',
'md5': '2cdbb3ae75f7fb3519821507d2fb3c15',
'md5': '3adcbdb3dcc02d647539e53f284ba171',
'info_dict': {
'id': '1010863',
'ext': 'mp4',
@ -22,9 +18,10 @@ class MusicVaultIE(InfoExtractor):
'duration': 244,
'uploader': 'The Allman Brothers Band',
'thumbnail': 're:^https?://.*/thumbnail/.*',
'upload_date': '19811216',
'upload_date': '20131219',
'location': 'Capitol Theatre (Passaic, NJ)',
'description': 'Listen to The Allman Brothers Band perform Straight from the Heart at Capitol Theatre (Passaic, NJ) on Dec 16, 1981',
'timestamp': int,
}
}
@ -43,34 +40,24 @@ class MusicVaultIE(InfoExtractor):
r'<h1.*?>(.*?)</h1>', data_div, 'uploader', fatal=False)
title = self._html_search_regex(
r'<h2.*?>(.*?)</h2>', data_div, 'title')
upload_date = unified_strdate(self._html_search_regex(
r'<h3.*?>(.*?)</h3>', data_div, 'uploader', fatal=False))
location = self._html_search_regex(
r'<h4.*?>(.*?)</h4>', data_div, 'location', fatal=False)
duration = parse_duration(self._html_search_meta('duration', webpage))
VIDEO_URL_TEMPLATE = 'http://cdnapi.kaltura.com/p/%(uid)s/sp/%(wid)s/playManifest/entryId/%(entry_id)s/format/url/protocol/http'
kaltura_id = self._search_regex(
r'<div id="video-detail-player" data-kaltura-id="([^"]+)"',
webpage, 'kaltura ID')
video_url = VIDEO_URL_TEMPLATE % {
'entry_id': kaltura_id,
'wid': self._search_regex(r'/wid/_([0-9]+)/', webpage, 'wid'),
'uid': self._search_regex(r'uiconf_id/([0-9]+)/', webpage, 'uid'),
}
wid = self._search_regex(r'/wid/_([0-9]+)/', webpage, 'wid')
return {
'id': mobj.group('id'),
'url': video_url,
'ext': 'mp4',
'_type': 'url_transparent',
'url': 'kaltura:%s:%s' % (wid, kaltura_id),
'ie_key': 'Kaltura',
'display_id': display_id,
'uploader_id': mobj.group('uploader_id'),
'thumbnail': thumbnail,
'description': self._html_search_meta('description', webpage),
'upload_date': upload_date,
'location': location,
'title': title,
'uploader': uploader,
'duration': duration,
}

View File

@ -0,0 +1,85 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
unified_strdate,
int_or_none,
qualities,
)
class OdnoklassnikiIE(InfoExtractor):
_VALID_URL = r'https?://(?:odnoklassniki|ok)\.ru/(?:video|web-api/video/moviePlayer)/(?P<id>\d+)'
_TESTS = [{
'url': 'http://ok.ru/video/20079905452',
'md5': '8e24ad2da6f387948e7a7d44eb8668fe',
'info_dict': {
'id': '20079905452',
'ext': 'mp4',
'title': 'Культура меняет нас (прекрасный ролик!))',
'duration': 100,
'upload_date': '20141207',
'uploader_id': '330537914540',
'uploader': 'Виталий Добровольский',
'like_count': int,
'age_limit': 0,
},
}, {
'url': 'http://ok.ru/web-api/video/moviePlayer/20079905452',
'only_matching': True,
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
player = self._parse_json(
self._search_regex(
r"OKVideo\.start\(({.+?})\s*,\s*'VideoAutoplay_player'", webpage, 'player'),
video_id)
metadata = self._parse_json(player['flashvars']['metadata'], video_id)
movie = metadata['movie']
title = movie['title']
thumbnail = movie.get('poster')
duration = int_or_none(movie.get('duration'))
author = metadata.get('author', {})
uploader_id = author.get('id')
uploader = author.get('name')
upload_date = unified_strdate(self._html_search_meta(
'ya:ovs:upload_date', webpage, 'upload date'))
age_limit = None
adult = self._html_search_meta(
'ya:ovs:adult', webpage, 'age limit')
if adult:
age_limit = 18 if adult == 'true' else 0
like_count = int_or_none(metadata.get('likeCount'))
quality = qualities(('mobile', 'lowest', 'low', 'sd', 'hd'))
formats = [{
'url': f['url'],
'ext': 'mp4',
'format_id': f['name'],
'quality': quality(f['name']),
} for f in metadata['videos']]
return {
'id': video_id,
'title': title,
'thumbnail': thumbnail,
'duration': duration,
'upload_date': upload_date,
'uploader': uploader,
'uploader_id': uploader_id,
'like_count': like_count,
'age_limit': age_limit,
'formats': formats,
}

View File

@ -0,0 +1,88 @@
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
ExtractorError,
unified_strdate,
int_or_none,
)
class Puls4IE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?puls4\.com/video/[^/]+/play/(?P<id>[0-9]+)'
_TESTS = [{
'url': 'http://www.puls4.com/video/pro-und-contra/play/2716816',
'md5': '49f6a6629747eeec43cef6a46b5df81d',
'info_dict': {
'id': '2716816',
'ext': 'mp4',
'title': 'Pro und Contra vom 23.02.2015',
'description': 'md5:293e44634d9477a67122489994675db6',
'duration': 2989,
'upload_date': '20150224',
'uploader': 'PULS_4',
},
'skip': 'Only works from Germany',
}, {
'url': 'http://www.puls4.com/video/kult-spielfilme/play/1298106',
'md5': '6a48316c8903ece8dab9b9a7bf7a59ec',
'info_dict': {
'id': '1298106',
'ext': 'mp4',
'title': 'Lucky Fritz',
},
'skip': 'Only works from Germany',
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
error_message = self._html_search_regex(
r'<div class="message-error">(.+?)</div>',
webpage, 'error message', default=None)
if error_message:
raise ExtractorError(
'%s returned error: %s' % (self.IE_NAME, error_message), expected=True)
real_url = self._html_search_regex(
r'\"fsk-button\".+?href=\"([^"]+)',
webpage, 'fsk_button', default=None)
if real_url:
webpage = self._download_webpage(real_url, video_id)
player = self._search_regex(
r'p4_video_player(?:_iframe)?\("video_\d+_container"\s*,(.+?)\);\s*\}',
webpage, 'player')
player_json = self._parse_json(
'[%s]' % player, video_id,
transform_source=lambda s: s.replace('undefined,', ''))
formats = None
result = None
for v in player_json:
if isinstance(v, list) and not formats:
formats = [{
'url': f['url'],
'format': 'hd' if f.get('hd') else 'sd',
'width': int_or_none(f.get('size_x')),
'height': int_or_none(f.get('size_y')),
'tbr': int_or_none(f.get('bitrate')),
} for f in v]
self._sort_formats(formats)
elif isinstance(v, dict) and not result:
result = {
'id': video_id,
'title': v['videopartname'].strip(),
'description': v.get('videotitle'),
'duration': int_or_none(v.get('videoduration') or v.get('episodeduration')),
'upload_date': unified_strdate(v.get('clipreleasetime')),
'uploader': v.get('channel'),
}
result['formats'] = formats
return result

View File

@ -146,7 +146,7 @@ class RTLnowIE(InfoExtractor):
mobj = re.search(r'.*/(?P<hoster>[^/]+)/videos/(?P<play_path>.+)\.f4m', filename.text)
if mobj:
fmt = {
'url': 'rtmpe://fmspay-fra2.rtl.de/' + mobj.group('hoster'),
'url': 'rtmpe://fms.rtl.de/' + mobj.group('hoster'),
'play_path': 'mp4:' + mobj.group('play_path'),
'page_url': url,
'player_url': video_page_url + 'includes/vodplayer.swf',

View File

@ -8,8 +8,9 @@ import time
from .common import InfoExtractor
from ..compat import compat_urlparse
from ..utils import (
struct_unpack,
float_or_none,
remove_end,
struct_unpack,
)
@ -67,6 +68,7 @@ class RTVEALaCartaIE(InfoExtractor):
'id': '2491869',
'ext': 'mp4',
'title': 'Balonmano - Swiss Cup masculina. Final: España-Suecia',
'duration': 5024.566,
},
}, {
'note': 'Live stream',
@ -113,6 +115,7 @@ class RTVEALaCartaIE(InfoExtractor):
'thumbnail': info.get('image'),
'page_url': url,
'subtitles': subtitles,
'duration': float_or_none(info.get('duration'), scale=1000),
}
def _get_subtitles(self, video_id, sub_file):

View File

@ -180,7 +180,7 @@ class SoundcloudIE(InfoExtractor):
'format_id': key,
'url': url,
'play_path': 'mp3:' + path,
'ext': ext,
'ext': 'flv',
'vcodec': 'none',
})
@ -200,8 +200,9 @@ class SoundcloudIE(InfoExtractor):
if f['format_id'].startswith('rtmp'):
f['protocol'] = 'rtmp'
self._sort_formats(formats)
result['formats'] = formats
self._check_formats(formats, track_id)
self._sort_formats(formats)
result['formats'] = formats
return result

View File

@ -1,6 +1,8 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
determine_ext,
@ -8,23 +10,40 @@ from ..utils import (
class SVTPlayIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?svtplay\.se/video/(?P<id>[0-9]+)'
_TEST = {
IE_DESC = 'SVT Play and Öppet arkiv'
_VALID_URL = r'https?://(?:www\.)?(?P<host>svtplay|oppetarkiv)\.se/video/(?P<id>[0-9]+)'
_TESTS = [{
'url': 'http://www.svtplay.se/video/2609989/sm-veckan/sm-veckan-rally-final-sasong-1-sm-veckan-rally-final',
'md5': 'f4a184968bc9c802a9b41316657aaa80',
'md5': 'ade3def0643fa1c40587a422f98edfd9',
'info_dict': {
'id': '2609989',
'ext': 'mp4',
'ext': 'flv',
'title': 'SM veckan vinter, Örebro - Rally, final',
'duration': 4500,
'thumbnail': 're:^https?://.*[\.-]jpg$',
'age_limit': 0,
},
}
}, {
'url': 'http://www.oppetarkiv.se/video/1058509/rederiet-sasong-1-avsnitt-1-av-318',
'md5': 'c3101a17ce9634f4c1f9800f0746c187',
'info_dict': {
'id': '1058509',
'ext': 'flv',
'title': 'Farlig kryssning',
'duration': 2566,
'thumbnail': 're:^https?://.*[\.-]jpg$',
'age_limit': 0,
},
'skip': 'Only works from Sweden',
}]
def _real_extract(self, url):
video_id = self._match_id(url)
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
host = mobj.group('host')
info = self._download_json(
'http://www.svtplay.se/video/%s?output=json' % video_id, video_id)
'http://www.%s.se/video/%s?output=json' % (host, video_id), video_id)
title = info['context']['title']
thumbnail = info['context'].get('thumbnailImage')
@ -33,11 +52,16 @@ class SVTPlayIE(InfoExtractor):
formats = []
for vr in video_info['videoReferences']:
vurl = vr['url']
if determine_ext(vurl) == 'm3u8':
ext = determine_ext(vurl)
if ext == 'm3u8':
formats.extend(self._extract_m3u8_formats(
vurl, video_id,
ext='mp4', entry_protocol='m3u8_native',
m3u8_id=vr.get('playerType')))
elif ext == 'f4m':
formats.extend(self._extract_f4m_formats(
vurl + '?hdcore=3.3.0', video_id,
f4m_id=vr.get('playerType')))
else:
formats.append({
'format_id': vr.get('playerType'),
@ -46,6 +70,7 @@ class SVTPlayIE(InfoExtractor):
self._sort_formats(formats)
duration = video_info.get('materialLength')
age_limit = 18 if video_info.get('inappropriateForChildren') else 0
return {
'id': video_id,
@ -53,4 +78,5 @@ class SVTPlayIE(InfoExtractor):
'formats': formats,
'thumbnail': thumbnail,
'duration': duration,
'age_limit': age_limit,
}

View File

@ -6,9 +6,9 @@ from .mitele import MiTeleIE
class TelecincoIE(MiTeleIE):
IE_NAME = 'telecinco.es'
_VALID_URL = r'https?://www\.telecinco\.es/[^/]+/[^/]+/[^/]+/(?P<id>.*?)\.html'
_VALID_URL = r'https?://www\.telecinco\.es/[^/]+/[^/]+/(?:[^/]+/)?(?P<id>.*?)\.html'
_TEST = {
_TESTS = [{
'url': 'http://www.telecinco.es/robinfood/temporada-01/t01xp14/Bacalao-cocochas-pil-pil_0_1876350223.html',
'info_dict': {
'id': 'MDSVID20141015_0058',
@ -16,4 +16,7 @@ class TelecincoIE(MiTeleIE):
'title': 'Con Martín Berasategui, hacer un bacalao al ...',
'duration': 662,
},
}
}, {
'url': 'http://www.telecinco.es/informativos/nacional/Pablo_Iglesias-Informativos_Telecinco-entrevista-Pedro_Piqueras_2_1945155182.html',
'only_matching': True,
}]

View File

@ -34,7 +34,15 @@ class TwitchBaseIE(InfoExtractor):
expected=True)
def _download_json(self, url, video_id, note='Downloading JSON metadata'):
response = super(TwitchBaseIE, self)._download_json(url, video_id, note)
headers = {
'Referer': 'http://api.twitch.tv/crossdomain/receiver.html?v=2',
'X-Requested-With': 'XMLHttpRequest',
}
for cookie in self._downloader.cookiejar:
if cookie.name == 'api_token':
headers['Twitch-Api-Token'] = cookie.value
request = compat_urllib_request.Request(url, headers=headers)
response = super(TwitchBaseIE, self)._download_json(request, video_id, note)
self._handle_error(response)
return response

View File

@ -31,7 +31,7 @@ class VKIE(InfoExtractor):
'id': '162222515',
'ext': 'flv',
'title': 'ProtivoGunz - Хуёвая песня',
'uploader': 're:Noize MC.*',
'uploader': 're:(?:Noize MC|Alexander Ilyashenko).*',
'duration': 195,
'upload_date': '20120212',
},
@ -140,7 +140,7 @@ class VKIE(InfoExtractor):
if not video_id:
video_id = '%s_%s' % (mobj.group('oid'), mobj.group('id'))
info_url = 'http://vk.com/al_video.php?act=show&al=1&video=%s' % video_id
info_url = 'http://vk.com/al_video.php?act=show&al=1&module=video&video=%s' % video_id
info_page = self._download_webpage(info_url, video_id)
ERRORS = {
@ -152,7 +152,10 @@ class VKIE(InfoExtractor):
'use --username and --password options to provide account credentials.',
r'<!>Unknown error':
'Video %s does not exist.'
'Video %s does not exist.',
r'<!>Видео временно недоступно':
'Video %s is temporarily unavailable.',
}
for error_re, error_msg in ERRORS.items():

View File

@ -28,6 +28,7 @@ class WDRIE(InfoExtractor):
'title': 'Servicezeit',
'description': 'md5:c8f43e5e815eeb54d0b96df2fba906cb',
'upload_date': '20140310',
'is_live': False
},
'params': {
'skip_download': True,
@ -41,6 +42,7 @@ class WDRIE(InfoExtractor):
'title': 'Marga Spiegel ist tot',
'description': 'md5:2309992a6716c347891c045be50992e4',
'upload_date': '20140311',
'is_live': False
},
'params': {
'skip_download': True,
@ -55,6 +57,7 @@ class WDRIE(InfoExtractor):
'title': 'Erlebte Geschichten: Marga Spiegel (29.11.2009)',
'description': 'md5:2309992a6716c347891c045be50992e4',
'upload_date': '20091129',
'is_live': False
},
},
{
@ -66,6 +69,7 @@ class WDRIE(InfoExtractor):
'title': 'Flavia Coelho: Amar é Amar',
'description': 'md5:7b29e97e10dfb6e265238b32fa35b23a',
'upload_date': '20140717',
'is_live': False
},
},
{
@ -74,6 +78,20 @@ class WDRIE(InfoExtractor):
'info_dict': {
'id': 'mediathek/video/sendungen/quarks_und_co/filterseite-quarks-und-co100',
}
},
{
'url': 'http://www1.wdr.de/mediathek/video/livestream/index.html',
'info_dict': {
'id': 'mdb-103364',
'title': 're:^WDR Fernsehen [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
'description': 'md5:ae2ff888510623bf8d4b115f95a9b7c9',
'ext': 'flv',
'upload_date': '20150212',
'is_live': True
},
'params': {
'skip_download': True,
},
}
]
@ -119,6 +137,10 @@ class WDRIE(InfoExtractor):
video_url = flashvars['dslSrc'][0]
title = flashvars['trackerClipTitle'][0]
thumbnail = flashvars['startPicture'][0] if 'startPicture' in flashvars else None
is_live = flashvars.get('isLive', ['0'])[0] == '1'
if is_live:
title = self._live_title(title)
if 'trackerClipAirTime' in flashvars:
upload_date = flashvars['trackerClipAirTime'][0]
@ -131,6 +153,13 @@ class WDRIE(InfoExtractor):
if video_url.endswith('.f4m'):
video_url += '?hdcore=3.2.0&plugin=aasp-3.2.0.77.18'
ext = 'flv'
elif video_url.endswith('.smil'):
fmt = self._extract_smil_formats(video_url, page_id)[0]
video_url = fmt['url']
sep = '&' if '?' in video_url else '?'
video_url += sep
video_url += 'hdcore=3.3.0&plugin=aasp-3.3.0.99.43'
ext = fmt['ext']
else:
ext = determine_ext(video_url)
@ -144,6 +173,7 @@ class WDRIE(InfoExtractor):
'description': description,
'thumbnail': thumbnail,
'upload_date': upload_date,
'is_live': is_live
}

View File

@ -8,11 +8,11 @@ import sys
from .downloader.external import list_external_downloaders
from .compat import (
compat_expanduser,
compat_get_terminal_size,
compat_getenv,
compat_kwargs,
)
from .utils import (
get_term_width,
write_string,
)
from .version import __version__
@ -100,7 +100,7 @@ def parseOpts(overrideArguments=None):
return opts
# No need to wrap help messages if we're on a wide console
columns = get_term_width()
columns = compat_get_terminal_size().columns
max_width = columns if columns else 80
max_help_position = 80
@ -435,8 +435,12 @@ def parseOpts(overrideArguments=None):
downloader.add_option(
'--external-downloader',
dest='external_downloader', metavar='COMMAND',
help='(experimental) Use the specified external downloader. '
help='Use the specified external downloader. '
'Currently supports %s' % ','.join(list_external_downloaders()))
downloader.add_option(
'--external-downloader-args',
dest='external_downloader_args', metavar='ARGS',
help='Give these arguments to the external downloader.')
workarounds = optparse.OptionGroup(parser, 'Workarounds')
workarounds.add_option(
@ -751,6 +755,10 @@ def parseOpts(overrideArguments=None):
'--exec',
metavar='CMD', dest='exec_cmd',
help='Execute a command on the file after downloading, similar to find\'s -exec syntax. Example: --exec \'adb push {} /sdcard/Music/ && rm {}\'')
postproc.add_option(
'--convert-subtitles', '--convert-subs',
metavar='FORMAT', dest='convertsubtitles', default=None,
help='Convert the subtitles to other format (currently supported: srt|ass|vtt)')
parser.add_option_group(general)
parser.add_option_group(network)

View File

@ -11,6 +11,7 @@ from .ffmpeg import (
FFmpegMergerPP,
FFmpegMetadataPP,
FFmpegVideoConvertorPP,
FFmpegSubtitlesConvertorPP,
)
from .xattrpp import XAttrMetadataPP
from .execafterdownload import ExecAfterDownloadPP
@ -31,6 +32,7 @@ __all__ = [
'FFmpegMergerPP',
'FFmpegMetadataPP',
'FFmpegPostProcessor',
'FFmpegSubtitlesConvertorPP',
'FFmpegVideoConvertorPP',
'XAttrMetadataPP',
]

View File

@ -1,5 +1,6 @@
from __future__ import unicode_literals
import io
import os
import subprocess
import sys
@ -635,3 +636,40 @@ class FFmpegFixupM4aPP(FFmpegPostProcessor):
os.rename(encodeFilename(temp_filename), encodeFilename(filename))
return True, info
class FFmpegSubtitlesConvertorPP(FFmpegPostProcessor):
def __init__(self, downloader=None, format=None):
super(FFmpegSubtitlesConvertorPP, self).__init__(downloader)
self.format = format
def run(self, info):
subs = info.get('requested_subtitles')
filename = info['filepath']
new_ext = self.format
new_format = new_ext
if new_format == 'vtt':
new_format = 'webvtt'
if subs is None:
self._downloader.to_screen('[ffmpeg] There aren\'t any subtitles to convert')
return True, info
self._downloader.to_screen('[ffmpeg] Converting subtitles')
for lang, sub in subs.items():
ext = sub['ext']
if ext == new_ext:
self._downloader.to_screen(
'[ffmpeg] Subtitle file for %s is already in the requested'
'format' % new_ext)
continue
new_file = subtitles_filename(filename, lang, new_ext)
self.run_ffmpeg(
subtitles_filename(filename, lang, ext),
new_file, ['-f', new_format])
with io.open(new_file, 'rt', encoding='utf-8') as f:
subs[lang] = {
'ext': ext,
'data': f.read(),
}
return True, info

View File

@ -35,7 +35,6 @@ import zlib
from .compat import (
compat_basestring,
compat_chr,
compat_getenv,
compat_html_entities,
compat_http_client,
compat_parse_qs,
@ -54,7 +53,7 @@ from .compat import (
compiled_regex_type = type(re.compile(''))
std_headers = {
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20100101 Firefox/10.0 (Chrome)',
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20150101 Firefox/20.0 (Chrome)',
'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
'Accept-Encoding': 'gzip, deflate',
@ -306,6 +305,7 @@ def sanitize_filename(s, restricted=False, is_id=False):
result = result[2:]
if result.startswith('-'):
result = '_' + result[len('-'):]
result = result.lstrip('.')
if not result:
result = '_'
return result
@ -1173,22 +1173,6 @@ def parse_filesize(s):
return int(float(num_str) * mult)
def get_term_width():
columns = compat_getenv('COLUMNS', None)
if columns:
return int(columns)
try:
sp = subprocess.Popen(
['stty', 'size'],
stdout=subprocess.PIPE, stderr=subprocess.PIPE)
out, err = sp.communicate()
return int(out.split()[1])
except:
pass
return None
def month_by_name(name):
""" Return the number of a month by (locale-independently) English name """
@ -1290,6 +1274,7 @@ def parse_duration(s):
(?P<only_mins>[0-9.]+)\s*(?:mins?|minutes?)\s*|
(?P<only_hours>[0-9.]+)\s*(?:hours?)|
\s*(?P<hours_reversed>[0-9]+)\s*(?:[:h]|hours?)\s*(?P<mins_reversed>[0-9]+)\s*(?:[:m]|mins?|minutes?)\s*|
(?:
(?:
(?:(?P<days>[0-9]+)\s*(?:[:d]|days?)\s*)?
@ -1308,10 +1293,14 @@ def parse_duration(s):
return float_or_none(m.group('only_hours'), invscale=60 * 60)
if m.group('secs'):
res += int(m.group('secs'))
if m.group('mins_reversed'):
res += int(m.group('mins_reversed')) * 60
if m.group('mins'):
res += int(m.group('mins')) * 60
if m.group('hours'):
res += int(m.group('hours')) * 60 * 60
if m.group('hours_reversed'):
res += int(m.group('hours_reversed')) * 60 * 60
if m.group('days'):
res += int(m.group('days')) * 24 * 60 * 60
if m.group('ms'):

View File

@ -1,3 +1,3 @@
from __future__ import unicode_literals
__version__ = '2015.02.24.2'
__version__ = '2015.02.28'