diff --git a/AUTHORS b/AUTHORS index bdd2a15dc..4674a5af3 100644 --- a/AUTHORS +++ b/AUTHORS @@ -112,3 +112,4 @@ Frans de Jonge Robin de Rooij Ryan Schmidt Leslie P. Polzer +Duncan Keall diff --git a/Makefile b/Makefile index 708732956..c6c76274f 100644 --- a/Makefile +++ b/Makefile @@ -2,6 +2,7 @@ all: youtube-dl README.md CONTRIBUTING.md README.txt youtube-dl.1 youtube-dl.bas clean: rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish *.dump *.part *.info.json *.mp4 *.flv *.mp3 *.avi CONTRIBUTING.md.tmp youtube-dl youtube-dl.exe + find -name "*.pyc" -delete PREFIX ?= /usr/local BINDIR ?= $(PREFIX)/bin diff --git a/README.md b/README.md index 699401b49..f2909e8d6 100644 --- a/README.md +++ b/README.md @@ -139,6 +139,8 @@ which means you can modify it, redistribute it or use it however you like. dislike_count = 3.3 + compat_get_terminal_size = shutil.get_terminal_size +else: + _terminal_size = collections.namedtuple('terminal_size', ['columns', 'lines']) + + def compat_get_terminal_size(): + columns = compat_getenv('COLUMNS', None) + if columns: + columns = int(columns) + else: + columns = None + lines = compat_getenv('LINES', None) + if lines: + lines = int(lines) + else: + lines = None + + try: + sp = subprocess.Popen( + ['stty', 'size'], + stdout=subprocess.PIPE, stderr=subprocess.PIPE) + out, err = sp.communicate() + lines, columns = map(int, out.split()) + except: + pass + return _terminal_size(columns, lines) + __all__ = [ 'compat_HTTPError', @@ -371,6 +400,7 @@ __all__ = [ 'compat_chr', 'compat_cookiejar', 'compat_expanduser', + 'compat_get_terminal_size', 'compat_getenv', 'compat_getpass', 'compat_html_entities', diff --git a/youtube_dl/downloader/common.py b/youtube_dl/downloader/common.py index 3ae90021a..8ed5c19a6 100644 --- a/youtube_dl/downloader/common.py +++ b/youtube_dl/downloader/common.py @@ -42,6 +42,8 @@ class FileDownloader(object): max_filesize: Skip files larger than this size xattr_set_filesize: Set ytdl.filesize user xattribute with expected size. (experimenatal) + external_downloader_args: A list of additional command-line arguments for the + external downloader. Subclasses of this one must re-define the real_download method. """ diff --git a/youtube_dl/downloader/external.py b/youtube_dl/downloader/external.py index 51c41c704..1673b2382 100644 --- a/youtube_dl/downloader/external.py +++ b/youtube_dl/downloader/external.py @@ -51,6 +51,13 @@ class ExternalFD(FileDownloader): return [] return [command_option, source_address] + def _configuration_args(self, default=[]): + ex_args = self.params.get('external_downloader_args') + if ex_args is None: + return default + assert isinstance(ex_args, list) + return ex_args + def _call_downloader(self, tmpfilename, info_dict): """ Either overwrite this or implement _make_cmd """ cmd = self._make_cmd(tmpfilename, info_dict) @@ -79,6 +86,7 @@ class CurlFD(ExternalFD): for key, val in info_dict['http_headers'].items(): cmd += ['--header', '%s: %s' % (key, val)] cmd += self._source_address('--interface') + cmd += self._configuration_args() cmd += ['--', info_dict['url']] return cmd @@ -89,15 +97,16 @@ class WgetFD(ExternalFD): for key, val in info_dict['http_headers'].items(): cmd += ['--header', '%s: %s' % (key, val)] cmd += self._source_address('--bind-address') + cmd += self._configuration_args() cmd += ['--', info_dict['url']] return cmd class Aria2cFD(ExternalFD): def _make_cmd(self, tmpfilename, info_dict): - cmd = [ - self.exe, '-c', - '--min-split-size', '1M', '--max-connection-per-server', '4'] + cmd = [self.exe, '-c'] + cmd += self._configuration_args([ + '--min-split-size', '1M', '--max-connection-per-server', '4']) dn = os.path.dirname(tmpfilename) if dn: cmd += ['--dir', dn] diff --git a/youtube_dl/downloader/f4m.py b/youtube_dl/downloader/f4m.py index 7b8fe8cf5..3dc796faa 100644 --- a/youtube_dl/downloader/f4m.py +++ b/youtube_dl/downloader/f4m.py @@ -11,6 +11,7 @@ from .common import FileDownloader from .http import HttpFD from ..compat import ( compat_urlparse, + compat_urllib_error, ) from ..utils import ( struct_pack, @@ -121,7 +122,8 @@ class FlvReader(io.BytesIO): self.read_unsigned_int() # BootstrapinfoVersion # Profile,Live,Update,Reserved - self.read(1) + flags = self.read_unsigned_char() + live = flags & 0x20 != 0 # time scale self.read_unsigned_int() # CurrentMediaTime @@ -160,6 +162,7 @@ class FlvReader(io.BytesIO): return { 'segments': segments, 'fragments': fragments, + 'live': live, } def read_bootstrap_info(self): @@ -182,6 +185,10 @@ def build_fragments_list(boot_info): for segment, fragments_count in segment_run_table['segment_run']: for _ in range(fragments_count): res.append((segment, next(fragments_counter))) + + if boot_info['live']: + res = res[-2:] + return res @@ -246,6 +253,38 @@ class F4mFD(FileDownloader): self.report_error('Unsupported DRM') return media + def _get_bootstrap_from_url(self, bootstrap_url): + bootstrap = self.ydl.urlopen(bootstrap_url).read() + return read_bootstrap_info(bootstrap) + + def _update_live_fragments(self, bootstrap_url, latest_fragment): + fragments_list = [] + retries = 30 + while (not fragments_list) and (retries > 0): + boot_info = self._get_bootstrap_from_url(bootstrap_url) + fragments_list = build_fragments_list(boot_info) + fragments_list = [f for f in fragments_list if f[1] > latest_fragment] + if not fragments_list: + # Retry after a while + time.sleep(5.0) + retries -= 1 + + if not fragments_list: + self.report_error('Failed to update fragments') + + return fragments_list + + def _parse_bootstrap_node(self, node, base_url): + if node.text is None: + bootstrap_url = compat_urlparse.urljoin( + base_url, node.attrib['url']) + boot_info = self._get_bootstrap_from_url(bootstrap_url) + else: + bootstrap_url = None + bootstrap = base64.b64decode(node.text) + boot_info = read_bootstrap_info(bootstrap) + return (boot_info, bootstrap_url) + def real_download(self, filename, info_dict): man_url = info_dict['url'] requested_bitrate = info_dict.get('tbr') @@ -265,18 +304,13 @@ class F4mFD(FileDownloader): base_url = compat_urlparse.urljoin(man_url, media.attrib['url']) bootstrap_node = doc.find(_add_ns('bootstrapInfo')) - if bootstrap_node.text is None: - bootstrap_url = compat_urlparse.urljoin( - base_url, bootstrap_node.attrib['url']) - bootstrap = self.ydl.urlopen(bootstrap_url).read() - else: - bootstrap = base64.b64decode(bootstrap_node.text) + boot_info, bootstrap_url = self._parse_bootstrap_node(bootstrap_node, base_url) + live = boot_info['live'] metadata_node = media.find(_add_ns('metadata')) if metadata_node is not None: metadata = base64.b64decode(metadata_node.text) else: metadata = None - boot_info = read_bootstrap_info(bootstrap) fragments_list = build_fragments_list(boot_info) if self.params.get('test', False): @@ -301,7 +335,8 @@ class F4mFD(FileDownloader): (dest_stream, tmpfilename) = sanitize_open(tmpfilename, 'wb') write_flv_header(dest_stream) - write_metadata_tag(dest_stream, metadata) + if not live: + write_metadata_tag(dest_stream, metadata) # This dict stores the download progress, it's updated by the progress # hook @@ -348,24 +383,45 @@ class F4mFD(FileDownloader): http_dl.add_progress_hook(frag_progress_hook) frags_filenames = [] - for (seg_i, frag_i) in fragments_list: + while fragments_list: + seg_i, frag_i = fragments_list.pop(0) name = 'Seg%d-Frag%d' % (seg_i, frag_i) url = base_url + name if akamai_pv: url += '?' + akamai_pv.strip(';') frag_filename = '%s-%s' % (tmpfilename, name) - success = http_dl.download(frag_filename, {'url': url}) - if not success: - return False - with open(frag_filename, 'rb') as down: - down_data = down.read() - reader = FlvReader(down_data) - while True: - _, box_type, box_data = reader.read_box_info() - if box_type == b'mdat': - dest_stream.write(box_data) - break - frags_filenames.append(frag_filename) + try: + success = http_dl.download(frag_filename, {'url': url}) + if not success: + return False + with open(frag_filename, 'rb') as down: + down_data = down.read() + reader = FlvReader(down_data) + while True: + _, box_type, box_data = reader.read_box_info() + if box_type == b'mdat': + dest_stream.write(box_data) + break + if live: + os.remove(frag_filename) + else: + frags_filenames.append(frag_filename) + except (compat_urllib_error.HTTPError, ) as err: + if live and (err.code == 404 or err.code == 410): + # We didn't keep up with the live window. Continue + # with the next available fragment. + msg = 'Fragment %d unavailable' % frag_i + self.report_warning(msg) + fragments_list = [] + else: + raise + + if not fragments_list and live and bootstrap_url: + fragments_list = self._update_live_fragments(bootstrap_url, frag_i) + total_frags += len(fragments_list) + if fragments_list and (fragments_list[0][1] > frag_i + 1): + msg = 'Missed %d fragments' % (fragments_list[0][1] - (frag_i + 1)) + self.report_warning(msg) dest_stream.close() diff --git a/youtube_dl/downloader/rtmp.py b/youtube_dl/downloader/rtmp.py index 0a52c34c7..89e98ae61 100644 --- a/youtube_dl/downloader/rtmp.py +++ b/youtube_dl/downloader/rtmp.py @@ -119,7 +119,9 @@ class RtmpFD(FileDownloader): # Download using rtmpdump. rtmpdump returns exit code 2 when # the connection was interrumpted and resuming appears to be # possible. This is part of rtmpdump's normal usage, AFAIK. - basic_args = ['rtmpdump', '--verbose', '-r', url, '-o', tmpfilename] + basic_args = [ + 'rtmpdump', '--verbose', '-r', url, + '-o', encodeFilename(tmpfilename, True)] if player_url is not None: basic_args += ['--swfVfy', player_url] if page_url is not None: diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index b35173291..b7add0f89 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -8,6 +8,7 @@ from .adobetv import AdobeTVIE from .adultswim import AdultSwimIE from .aftenposten import AftenpostenIE from .aftonbladet import AftonbladetIE +from .airmozilla import AirMozillaIE from .aljazeera import AlJazeeraIE from .alphaporno import AlphaPornoIE from .anitube import AnitubeIE @@ -226,6 +227,7 @@ from .jeuxvideo import JeuxVideoIE from .jove import JoveIE from .jukebox import JukeboxIE from .jpopsukitv import JpopsukiIE +from .kaltura import KalturaIE from .kankan import KankanIE from .karaoketv import KaraoketvIE from .keezmovies import KeezMoviesIE @@ -237,6 +239,11 @@ from .krasview import KrasViewIE from .ku6 import Ku6IE from .la7 import LA7IE from .laola1tv import Laola1TvIE +from .letv import ( + LetvIE, + LetvTvIE, + LetvPlaylistIE +) from .lifenews import LifeNewsIE from .liveleak import LiveLeakIE from .livestream import ( @@ -340,6 +347,7 @@ from .ntvde import NTVDeIE from .ntvru import NTVRuIE from .nytimes import NYTimesIE from .nuvid import NuvidIE +from .odnoklassniki import OdnoklassnikiIE from .oktoberfesttv import OktoberfestTVIE from .ooyala import OoyalaIE from .openfilm import OpenFilmIE @@ -367,6 +375,7 @@ from .pornotube import PornotubeIE from .pornoxo import PornoXOIE from .promptfile import PromptFileIE from .prosiebensat1 import ProSiebenSat1IE +from .puls4 import Puls4IE from .pyvideo import PyvideoIE from .quickvid import QuickVidIE from .r7 import R7IE diff --git a/youtube_dl/extractor/airmozilla.py b/youtube_dl/extractor/airmozilla.py new file mode 100644 index 000000000..611ad1e9d --- /dev/null +++ b/youtube_dl/extractor/airmozilla.py @@ -0,0 +1,74 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + int_or_none, + parse_duration, + parse_iso8601, +) + + +class AirMozillaIE(InfoExtractor): + _VALID_URL = r'https?://air\.mozilla\.org/(?P[0-9a-z-]+)/?' + _TEST = { + 'url': 'https://air.mozilla.org/privacy-lab-a-meetup-for-privacy-minded-people-in-san-francisco/', + 'md5': '2e3e7486ba5d180e829d453875b9b8bf', + 'info_dict': { + 'id': '6x4q2w', + 'ext': 'mp4', + 'title': 'Privacy Lab - a meetup for privacy minded people in San Francisco', + 'thumbnail': 're:https://\w+\.cloudfront\.net/6x4q2w/poster\.jpg\?t=\d+', + 'description': 'Brings together privacy professionals and others interested in privacy at for-profits, non-profits, and NGOs in an effort to contribute to the state of the ecosystem...', + 'timestamp': 1422487800, + 'upload_date': '20150128', + 'location': 'SFO Commons', + 'duration': 3780, + 'view_count': int, + 'categories': ['Main'], + } + } + + def _real_extract(self, url): + display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) + video_id = self._html_search_regex(r'//vid.ly/(.*?)/embed', webpage, 'id') + + embed_script = self._download_webpage('https://vid.ly/{0}/embed'.format(video_id), video_id) + jwconfig = self._search_regex(r'\svar jwconfig = (\{.*?\});\s', embed_script, 'metadata') + metadata = self._parse_json(jwconfig, video_id) + + formats = [{ + 'url': source['file'], + 'ext': source['type'], + 'format_id': self._search_regex(r'&format=(.*)$', source['file'], 'video format'), + 'format': source['label'], + 'height': int(source['label'].rstrip('p')), + } for source in metadata['playlist'][0]['sources']] + self._sort_formats(formats) + + view_count = int_or_none(self._html_search_regex( + r'Views since archived: ([0-9]+)', + webpage, 'view count', fatal=False)) + timestamp = parse_iso8601(self._html_search_regex( + r'