From 4fa74b5252a23c2890ddee52b8ee5811b5bb2987 Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Mon, 21 Jul 2008 23:12:31 +0200 Subject: [PATCH 001/455] Create initial preview version of the new youtube-dl --- .hgignore | 1 + youtube-dl | 438 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 439 insertions(+) create mode 100644 .hgignore create mode 100755 youtube-dl diff --git a/.hgignore b/.hgignore new file mode 100644 index 000000000..3233d6316 --- /dev/null +++ b/.hgignore @@ -0,0 +1 @@ +youtube-dl-old diff --git a/youtube-dl b/youtube-dl new file mode 100755 index 000000000..45773fda2 --- /dev/null +++ b/youtube-dl @@ -0,0 +1,438 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# Author: Ricardo Garcia Gonzalez +# License: Public domain code +import htmlentitydefs +import httplib +import math +import netrc +import os +import os.path +import re +import socket +import string +import sys +import time +import urllib +import urllib2 + +std_headers = { + 'User-Agent': 'UserAgent: Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9) Gecko/2008052906 Firefox/3.0', + 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7', + 'Accept': 'text/xml,application/xml,application/xhtml+xml,text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5', + 'Accept-Language': 'en-us,en;q=0.5', +} + +simple_title_chars = string.ascii_letters.decode('ascii') + string.digits.decode('ascii') + +class FileDownloader(object): + """File Downloader class. + + File downloader objects are the ones responsible of downloading the + actual video file and writing it to disk if the user has requested + it, among some other tasks. In most cases there should be one per + program. As, given a video URL, the downloader doesn't know how to + extract all the needed information, task that InfoExtractors do, it + has to pass the URL to one of them. + + For this, file downloader objects have a method that allows + InfoExtractors to be registered in a given order. When it is passed + a URL, the file downloader handles it to the first InfoExtractor it + finds that reports it's able to handle it. The InfoExtractor returns + all the information to the FileDownloader and the latter downloads the + file or does whatever it's instructed to do. + + File downloaders accept a lot of parameters. In order not to saturate + the object constructor with arguments, it receives a dictionary of + options instead. These options are available through the get_params() + method for the InfoExtractors to use. The FileDownloader also registers + itself as the downloader in charge for the InfoExtractors that are + added to it, so this is a "mutual registration". + + Available options: + + username: Username for authentication purposes. + password: Password for authentication purposes. + usenetrc: Use netrc for authentication instead. + quiet: Do not print messages to stdout. + format: Video format code. + outtmpl: Template for output names. + """ + + _params = None + _ies = [] + + def __init__(self, params): + self._ies = [] + self.set_params(params) + + @staticmethod + def pmkdir(filename): + """Create directory components in filename. Similar to Unix "mkdir -p".""" + components = filename.split(os.sep) + aggregate = [os.sep.join(components[0:x]) for x in xrange(1, len(components))] + for dir in aggregate: + if not os.path.exists(dir): + os.mkdir(dir) + + @staticmethod + def format_bytes(bytes): + if bytes is None: + return 'N/A' + if bytes == 0: + exponent = 0 + else: + exponent = long(math.log(float(bytes), 1024.0)) + suffix = 'bkMGTPEZY'[exponent] + if exponent == 0: + return '%s%s' % (bytes, suffix) + converted = float(bytes) / float(1024**exponent) + return '%.2f%s' % (converted, suffix) + + @staticmethod + def calc_percent(byte_counter, data_len): + if data_len is None: + return '---.-%' + return '%6s' % ('%3.1f%%' % (float(byte_counter) / float(data_len) * 100.0)) + + @staticmethod + def calc_eta(start, now, total, current): + if total is None: + return '--:--' + dif = now - start + if current == 0 or dif < 0.001: # One millisecond + return '--:--' + rate = float(current) / dif + eta = long((float(total) - float(current)) / rate) + (eta_mins, eta_secs) = divmod(eta, 60) + if eta_mins > 99: + return '--:--' + return '%02d:%02d' % (eta_mins, eta_secs) + + @staticmethod + def calc_speed(start, now, bytes): + dif = now - start + if bytes == 0 or dif < 0.001: # One millisecond + return '%9s' % 'N/A b/s' + return '%10s' % ('%s/s' % FileDownloader.format_bytes(float(bytes) / dif)) + + @staticmethod + def best_block_size(elapsed_time, bytes): + new_min = max(bytes / 2.0, 1.0) + new_max = min(max(bytes * 2.0, 1.0), 4194304) # Do not surpass 4 MB + if elapsed_time < 0.001: + return int(new_max) + rate = bytes / elapsed_time + if rate > new_max: + return int(new_max) + if rate < new_min: + return int(new_min) + return int(rate) + + def set_params(self, params): + """Sets parameters.""" + if type(params) != dict: + raise ValueError('params: dictionary expected') + self._params = params + + def get_params(self): + """Get parameters.""" + return self._params + + def add_info_extractor(self, ie): + """Add an InfoExtractor object to the end of the list.""" + self._ies.append(ie) + ie.set_downloader(self) + + def download(self, url_list): + """Download a given list of URLs.""" + for url in url_list: + suitable_found = False + for ie in self._ies: + if not ie.suitable(url): + continue + # Suitable InfoExtractor found + suitable_found = True + for result in ie.extract(url): + if result is None: + continue + try: + filename = self._params['outtmpl'] % result + except (KeyError), err: + sys.stderr.write('ERROR: invalid output template: %s\n' % str(err)) + continue + try: + self.pmkdir(filename) + except (OSError, IOError), err: + sys.stderr.write('ERROR: unable to create directories: %s\n' % str(err)) + continue + try: + outstream = open(filename, 'wb') + except (OSError, IOError), err: + sys.stderr.write('ERROR: unable to open for writing: %s\n' % str(err)) + continue + try: + self._do_download(outstream, result['url']) + outstream.close() + except (OSError, IOError), err: + sys.stderr.write('ERROR: unable to write video data: %s\n' % str(err)) + continue + except (urllib2.URLError, httplib.HTTPException, socket.error), err: + sys.stderr.write('ERROR: unable to download video data: %s\n' % str(err)) + continue + break + if not suitable_found: + sys.stderr.write('ERROR: no suitable InfoExtractor: %s\n' % url) + + def _do_download(self, stream, url): + request = urllib2.Request(url, None, std_headers) + data = urllib2.urlopen(request) + data_len = data.info().get('Content-length', None) + data_len_str = self.format_bytes(data_len) + byte_counter = 0 + block_size = 1024 + start = time.time() + while True: + percent_str = self.calc_percent(byte_counter, data_len) + eta_str = self.calc_eta(start, time.time(), data_len, byte_counter) + speed_str = self.calc_speed(start, time.time(), byte_counter) + + if not self._params.get('quiet', False): + sys.stdout.write('\r[download] %s of %s at %s ETA %s' % + (percent_str, data_len_str, speed_str, eta_str)) + sys.stdout.flush() + + before = time.time() + data_block = data.read(block_size) + after = time.time() + data_block_len = len(data_block) + if data_block_len == 0: + break + byte_counter += data_block_len + stream.write(data_block) + block_size = self.best_block_size(after - before, data_block_len) + + if not self._params.get('quiet', False): + print + + if data_len is not None and str(byte_counter) != data_len: + raise ValueError('Content too short: %s/%s bytes' % (byte_counter, data_len)) + +class InfoExtractor(object): + """Information Extractor class. + + Information extractors are the classes that, given a URL, extract + information from the video (or videos) the URL refers to. This + information includes the real video URL, the video title and simplified + title, author and others. It is returned in a list of dictionaries when + calling its extract() method. It is a list because a URL can refer to + more than one video (think of playlists). The dictionaries must include + the following fields: + + id: Video identifier. + url: Final video URL. + uploader: Nickname of the video uploader. + title: Literal title. + stitle: Simplified title. + ext: Video filename extension. + + Subclasses of this one should re-define the _real_initialize() and + _real_extract() methods, as well as the suitable() static method. + Probably, they should also be instantiated and added to the main + downloader. + """ + + _ready = False + _downloader = None + + def __init__(self, downloader=None): + """Constructor. Receives an optional downloader.""" + self._ready = False + self.set_downloader(downloader) + + @staticmethod + def suitable(url): + """Receives a URL and returns True if suitable for this IE.""" + return True + + def initialize(self): + """Initializes an instance (login, etc).""" + if not self._ready: + self._real_initialize() + self._ready = True + + def extract(self, url): + """Extracts URL information and returns it in list of dicts.""" + self.initialize() + return self._real_extract(url) + + def set_downloader(self, downloader): + """Sets the downloader for this IE.""" + self._downloader = downloader + + def to_stdout(self, message): + if self._downloader is None or not self._downloader.get_params().get('quiet', False): + print message + + def to_stderr(self, message): + sys.stderr.write('%s\n' % message) + + def _real_initialize(self): + """Real initialization process. Redefine in subclasses.""" + pass + + def _real_extract(self, url): + """Real extraction process. Redefine in subclasses.""" + pass + +class YoutubeIE(InfoExtractor): + """Information extractor for youtube.com.""" + + _LOGIN_URL = 'http://www.youtube.com/login?next=/' + _AGE_URL = 'http://www.youtube.com/verify_age?next_url=/' + _NETRC_MACHINE = 'youtube' + + def _real_initialize(self): + if self._downloader is None: + return + + username = None + password = None + downloader_params = self._downloader.get_params() + + # Attempt to use provided username and password or .netrc data + if downloader_params.get('username', None) is not None: + username = downloader_params['username'] + password = downloader_params['password'] + elif downloader_params.get('usenetrc', False): + try: + info = netrc.netrc().authenticators(self._NETRC_MACHINE) + if info is not None: + username = info[0] + password = info[2] + else: + raise netrc.NetrcParseError('No authenticators for %s' % self._NETRC_MACHINE) + except (IOError, netrc.NetrcParseError), err: + self.to_stderr('WARNING: parsing .netrc: %s' % str(err)) + return + + if username is None: + return + + # Log in + login_form = { 'current_form': 'loginForm', + 'next': '/', + 'action_login': 'Log In', + 'username': username, + 'password': password, } + request = urllib2.Request(self._LOGIN_URL, urllib.urlencode(login_form), std_headers) + try: + self.to_stdout('[youtube] Logging in') + login_results = urllib2.urlopen(request).read() + if re.search(r'(?i)]* name="loginForm"', login_results) is not None: + self.to_stderr('WARNING: Unable to log in: bad username or password') + return + except (urllib2.URLError, httplib.HTTPException, socket.error), err: + self.to_stderr('WARNING: Unable to log in: %s' % str(err)) + return + + # Confirm age + age_form = { 'next_url': '/', + 'action_confirm': 'Confirm', } + request = urllib2.Request(self._AGE_URL, urllib.urlencode(age_form), std_headers) + try: + self.to_stdout('[youtube] Confirming age') + age_results = urllib2.urlopen(request).read() + except (urllib2.URLError, httplib.HTTPException, socket.error), err: + sys.exit('ERROR: Unable to confirm age: %s' % str(err)) + + def _real_extract(self, url): + # Extract video id from URL + mobj = re.match(r'^((?:http://)?(?:\w+\.)?youtube\.com/(?:(?:v/)|(?:(?:watch(?:\.php)?)?\?(?:.+&)?v=)))?([0-9A-Za-z_-]+)(?(1).+)?$', url) + if mobj is None: + self.to_stderr('ERROR: Invalid URL: %s' % url) + return [None] + video_id = mobj.group(2) + + # Downloader parameters + format_param = None + if self._downloader is not None: + params = self._downloader.get_params() + format_param = params.get('format', None) + + # Extension + video_extension = {18: 'mp4'}.get(format_param, 'flv') + + # Normalize URL, including format + normalized_url = 'http://www.youtube.com/watch?v=%s' % video_id + if format_param is not None: + normalized_url = '%s&fmt=%s' % (normalized_url, format_param) + request = urllib2.Request(normalized_url, None, std_headers) + try: + self.to_stdout('[youtube] %s: Downloading video webpage' % video_id) + video_webpage = urllib2.urlopen(request).read() + except (urllib2.URLError, httplib.HTTPException, socket.error), err: + sys.exit('ERROR: Unable to download video: %s' % str(err)) + self.to_stdout('[youtube] %s: Extracting video information' % video_id) + + # "t" param + mobj = re.search(r', "t": "([^"]+)"', video_webpage) + if mobj is None: + self.to_stderr('ERROR: Unable to extract "t" parameter') + return [None] + video_real_url = 'http://www.youtube.com/get_video?video_id=%s&t=%s' % (video_id, mobj.group(1)) + if format_param is not None: + video_real_url = '%s&fmt=%s' % (video_real_url, format_param) + + # uploader + mobj = re.search(r'More From: ([^<]*)<', video_webpage) + if mobj is None: + self.to_stderr('ERROR: Unable to extract uploader nickname') + return [None] + video_uploader = mobj.group(1) + + # title + mobj = re.search(r'(?im)YouTube - ([^<]*)', video_webpage) + if mobj is None: + self.to_stderr('ERROR: Unable to extract video title') + return [None] + video_title = mobj.group(1).decode('utf-8') + video_title = re.sub(u'&(.+?);', lambda x: unichr(htmlentitydefs.name2codepoint[x.group(1)]), video_title) + + # simplified title + simple_title = re.sub(u'([^%s]+)' % simple_title_chars, u'_', video_title) + simple_title = simple_title.strip(u'_') + + # Return information + return [{ 'id': video_id, + 'url': video_real_url, + 'uploader': video_uploader, + 'title': video_title, + 'stitle': simple_title, + 'ext': video_extension, + }] + +if __name__ == '__main__': + try: + # General configuration + urllib2.install_opener(urllib2.build_opener(urllib2.ProxyHandler())) + urllib2.install_opener(urllib2.build_opener(urllib2.HTTPCookieProcessor())) + + # Information extractors + youtube_ie = YoutubeIE() + + # File downloader + fd = FileDownloader({ 'usenetrc': False, + 'username': None, + 'password': None, + 'quiet': False, + 'format': None, + 'outtmpl': '%(id)s.%(ext)s' + }) + fd.add_info_extractor(youtube_ie) + fd.download([ 'http://www.youtube.com/watch?v=t7qdwI7TVe8', + 'http://www.youtube.com/watch?v=IJyn3pRcy_Q', + 'http://www.youtube.com/watch?v=DZRXe1wtC-M', ]) + + except KeyboardInterrupt: + sys.exit('\nERROR: Interrupted by user') From b46347267a8c460561d1bc3589c7eab55b6a4655 Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Mon, 21 Jul 2008 23:29:06 +0200 Subject: [PATCH 002/455] Check the output name is not fixed when there are several videos to download --- youtube-dl | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/youtube-dl b/youtube-dl index 45773fda2..dfbdc4a47 100755 --- a/youtube-dl +++ b/youtube-dl @@ -38,7 +38,7 @@ class FileDownloader(object): For this, file downloader objects have a method that allows InfoExtractors to be registered in a given order. When it is passed a URL, the file downloader handles it to the first InfoExtractor it - finds that reports it's able to handle it. The InfoExtractor returns + finds that reports being able to handle it. The InfoExtractor returns all the information to the FileDownloader and the latter downloads the file or does whatever it's instructed to do. @@ -153,9 +153,12 @@ class FileDownloader(object): continue # Suitable InfoExtractor found suitable_found = True - for result in ie.extract(url): - if result is None: - continue + results = [x for x in ie.extract(url) if x is not None] + + if (len(url_list) > 1 or len(results) > 1) and re.search(r'%\(.+?\)s', self._params['outtmpl']) is None: + sys.exit('ERROR: fixed output name but more than one file to download') + + for result in results: try: filename = self._params['outtmpl'] % result except (KeyError), err: From 9fcd8355e57b4ef5605a015a0039d1bbe2375cf3 Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Mon, 21 Jul 2008 23:53:06 +0200 Subject: [PATCH 003/455] Correct a few formatting bugs and add .to_stdout() to downloader --- youtube-dl | 72 ++++++++++++++++++++++++++++++------------------------ 1 file changed, 40 insertions(+), 32 deletions(-) diff --git a/youtube-dl b/youtube-dl index dfbdc4a47..adb4234c7 100755 --- a/youtube-dl +++ b/youtube-dl @@ -84,8 +84,6 @@ class FileDownloader(object): else: exponent = long(math.log(float(bytes), 1024.0)) suffix = 'bkMGTPEZY'[exponent] - if exponent == 0: - return '%s%s' % (bytes, suffix) converted = float(bytes) / float(1024**exponent) return '%.2f%s' % (converted, suffix) @@ -113,7 +111,7 @@ class FileDownloader(object): def calc_speed(start, now, bytes): dif = now - start if bytes == 0 or dif < 0.001: # One millisecond - return '%9s' % 'N/A b/s' + return '%10s' % '---b/s' return '%10s' % ('%s/s' % FileDownloader.format_bytes(float(bytes) / dif)) @staticmethod @@ -144,6 +142,12 @@ class FileDownloader(object): self._ies.append(ie) ie.set_downloader(self) + def to_stdout(self, message, skip_eol=False): + """Print message to stdout if not in quiet mode.""" + if not self._params.get('quiet', False): + sys.stdout.write('%s%s' % (message, ['\n', ''][skip_eol])) + sys.stdout.flush() + def download(self, url_list): """Download a given list of URLs.""" for url in url_list: @@ -199,11 +203,8 @@ class FileDownloader(object): percent_str = self.calc_percent(byte_counter, data_len) eta_str = self.calc_eta(start, time.time(), data_len, byte_counter) speed_str = self.calc_speed(start, time.time(), byte_counter) - - if not self._params.get('quiet', False): - sys.stdout.write('\r[download] %s of %s at %s ETA %s' % - (percent_str, data_len_str, speed_str, eta_str)) - sys.stdout.flush() + self.to_stdout('\r[download] %s of %s at %s ETA %s' % + (percent_str, data_len_str, speed_str, eta_str), skip_eol=True) before = time.time() data_block = data.read(block_size) @@ -215,9 +216,7 @@ class FileDownloader(object): stream.write(data_block) block_size = self.best_block_size(after - before, data_block_len) - if not self._params.get('quiet', False): - print - + self.to_stdout('') if data_len is not None and str(byte_counter) != data_len: raise ValueError('Content too short: %s/%s bytes' % (byte_counter, data_len)) @@ -323,11 +322,13 @@ class YoutubeIE(InfoExtractor): return # Log in - login_form = { 'current_form': 'loginForm', + login_form = { + 'current_form': 'loginForm', 'next': '/', 'action_login': 'Log In', 'username': username, - 'password': password, } + 'password': password, + } request = urllib2.Request(self._LOGIN_URL, urllib.urlencode(login_form), std_headers) try: self.to_stdout('[youtube] Logging in') @@ -340,8 +341,10 @@ class YoutubeIE(InfoExtractor): return # Confirm age - age_form = { 'next_url': '/', - 'action_confirm': 'Confirm', } + age_form = { + 'next_url': '/', + 'action_confirm': 'Confirm', + } request = urllib2.Request(self._AGE_URL, urllib.urlencode(age_form), std_headers) try: self.to_stdout('[youtube] Confirming age') @@ -386,6 +389,7 @@ class YoutubeIE(InfoExtractor): video_real_url = 'http://www.youtube.com/get_video?video_id=%s&t=%s' % (video_id, mobj.group(1)) if format_param is not None: video_real_url = '%s&fmt=%s' % (video_real_url, format_param) + self.to_stdout('[youtube] %s: URL: %s' % (video_id, video_real_url)) # uploader mobj = re.search(r'More From: ([^<]*)<', video_webpage) @@ -407,13 +411,14 @@ class YoutubeIE(InfoExtractor): simple_title = simple_title.strip(u'_') # Return information - return [{ 'id': video_id, - 'url': video_real_url, - 'uploader': video_uploader, - 'title': video_title, - 'stitle': simple_title, - 'ext': video_extension, - }] + return [{ + 'id': video_id, + 'url': video_real_url, + 'uploader': video_uploader, + 'title': video_title, + 'stitle': simple_title, + 'ext': video_extension, + }] if __name__ == '__main__': try: @@ -425,17 +430,20 @@ if __name__ == '__main__': youtube_ie = YoutubeIE() # File downloader - fd = FileDownloader({ 'usenetrc': False, - 'username': None, - 'password': None, - 'quiet': False, - 'format': None, - 'outtmpl': '%(id)s.%(ext)s' - }) + fd = FileDownloader({ + 'usenetrc': False, + 'username': None, + 'password': None, + 'quiet': False, + 'format': None, + 'outtmpl': '%(id)s.%(ext)s' + }) fd.add_info_extractor(youtube_ie) - fd.download([ 'http://www.youtube.com/watch?v=t7qdwI7TVe8', - 'http://www.youtube.com/watch?v=IJyn3pRcy_Q', - 'http://www.youtube.com/watch?v=DZRXe1wtC-M', ]) + fd.download([ + 'http://www.youtube.com/watch?v=t7qdwI7TVe8', + 'http://www.youtube.com/watch?v=IJyn3pRcy_Q', + 'http://www.youtube.com/watch?v=DZRXe1wtC-M', + ]) except KeyboardInterrupt: sys.exit('\nERROR: Interrupted by user') From b609fd549ffd7e3f8450f784eb34e84249d710c7 Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Mon, 21 Jul 2008 23:58:16 +0200 Subject: [PATCH 004/455] Add support for the "simulate" option --- youtube-dl | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/youtube-dl b/youtube-dl index adb4234c7..7323ad4f2 100755 --- a/youtube-dl +++ b/youtube-dl @@ -55,6 +55,7 @@ class FileDownloader(object): password: Password for authentication purposes. usenetrc: Use netrc for authentication instead. quiet: Do not print messages to stdout. + simulate: Do not download the video files. format: Video format code. outtmpl: Template for output names. """ @@ -162,6 +163,9 @@ class FileDownloader(object): if (len(url_list) > 1 or len(results) > 1) and re.search(r'%\(.+?\)s', self._params['outtmpl']) is None: sys.exit('ERROR: fixed output name but more than one file to download') + if self._params.get('simulate', False): + continue + for result in results: try: filename = self._params['outtmpl'] % result @@ -435,6 +439,7 @@ if __name__ == '__main__': 'username': None, 'password': None, 'quiet': False, + 'simulate': True, 'format': None, 'outtmpl': '%(id)s.%(ext)s' }) From 7e5cab673024e93a706afa64f9fc38e25e8df085 Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Tue, 22 Jul 2008 00:07:07 +0200 Subject: [PATCH 005/455] Add .to_stderr() to downloaders --- youtube-dl | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/youtube-dl b/youtube-dl index 7323ad4f2..1c4a05471 100755 --- a/youtube-dl +++ b/youtube-dl @@ -148,6 +148,10 @@ class FileDownloader(object): if not self._params.get('quiet', False): sys.stdout.write('%s%s' % (message, ['\n', ''][skip_eol])) sys.stdout.flush() + + def to_stderr(self, message): + """Print message to stderr.""" + sys.stderr.write('%s\n' % message) def download(self, url_list): """Download a given list of URLs.""" @@ -170,30 +174,30 @@ class FileDownloader(object): try: filename = self._params['outtmpl'] % result except (KeyError), err: - sys.stderr.write('ERROR: invalid output template: %s\n' % str(err)) + self.to_stderr('ERROR: invalid output template: %s' % str(err)) continue try: self.pmkdir(filename) except (OSError, IOError), err: - sys.stderr.write('ERROR: unable to create directories: %s\n' % str(err)) + self.to_stderr('ERROR: unable to create directories: %s' % str(err)) continue try: outstream = open(filename, 'wb') except (OSError, IOError), err: - sys.stderr.write('ERROR: unable to open for writing: %s\n' % str(err)) + self.to_stderr('ERROR: unable to open for writing: %s' % str(err)) continue try: self._do_download(outstream, result['url']) outstream.close() except (OSError, IOError), err: - sys.stderr.write('ERROR: unable to write video data: %s\n' % str(err)) + self.to_stderr('ERROR: unable to write video data: %s' % str(err)) continue except (urllib2.URLError, httplib.HTTPException, socket.error), err: - sys.stderr.write('ERROR: unable to download video data: %s\n' % str(err)) + self.to_stderr('ERROR: unable to download video data: %s' % str(err)) continue break if not suitable_found: - sys.stderr.write('ERROR: no suitable InfoExtractor: %s\n' % url) + self.to_stderr('ERROR: no suitable InfoExtractor: %s' % url) def _do_download(self, stream, url): request = urllib2.Request(url, None, std_headers) From 14c300687e4a2724fb458676cd2b0214acb9647a Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Tue, 22 Jul 2008 09:21:43 +0200 Subject: [PATCH 006/455] Detect malformed output template --- youtube-dl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/youtube-dl b/youtube-dl index 1c4a05471..d3f277110 100755 --- a/youtube-dl +++ b/youtube-dl @@ -173,7 +173,7 @@ class FileDownloader(object): for result in results: try: filename = self._params['outtmpl'] % result - except (KeyError), err: + except (ValueError, KeyError), err: self.to_stderr('ERROR: invalid output template: %s' % str(err)) continue try: @@ -443,9 +443,9 @@ if __name__ == '__main__': 'username': None, 'password': None, 'quiet': False, - 'simulate': True, + 'simulate': False, 'format': None, - 'outtmpl': '%(id)s.%(ext)s' + 'outtmpl': '%(ext)s/%(ext)s/%(id)s.%(ext)s' }) fd.add_info_extractor(youtube_ie) fd.download([ From 22899cea59bf31fb31efab9c03a92214684f94d6 Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Tue, 22 Jul 2008 09:45:49 +0200 Subject: [PATCH 007/455] Improve fixed template checks --- youtube-dl | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/youtube-dl b/youtube-dl index d3f277110..0df6ba141 100755 --- a/youtube-dl +++ b/youtube-dl @@ -152,9 +152,16 @@ class FileDownloader(object): def to_stderr(self, message): """Print message to stderr.""" sys.stderr.write('%s\n' % message) + + def fixed_template(self): + """Checks if the output template is fixed.""" + return (re.search(ur'%\(.+?\)s', self._params['outtmpl']) is None) def download(self, url_list): """Download a given list of URLs.""" + if len(url_list) > 1 and self.fixed_template(): + sys.exit('ERROR: fixed output name but more than one file to download') + for url in url_list: suitable_found = False for ie in self._ies: @@ -164,7 +171,7 @@ class FileDownloader(object): suitable_found = True results = [x for x in ie.extract(url) if x is not None] - if (len(url_list) > 1 or len(results) > 1) and re.search(r'%\(.+?\)s', self._params['outtmpl']) is None: + if len(results) > 1 and self.fixed_template(): sys.exit('ERROR: fixed output name but more than one file to download') if self._params.get('simulate', False): From f97c8db74ef927216d3ccfb719602e7335f4dee5 Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Tue, 22 Jul 2008 09:53:05 +0200 Subject: [PATCH 008/455] Improve some unicode regular expressions --- youtube-dl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/youtube-dl b/youtube-dl index 0df6ba141..914cce37b 100755 --- a/youtube-dl +++ b/youtube-dl @@ -155,7 +155,7 @@ class FileDownloader(object): def fixed_template(self): """Checks if the output template is fixed.""" - return (re.search(ur'%\(.+?\)s', self._params['outtmpl']) is None) + return (re.search(ur'(?u)%\(.+?\)s', self._params['outtmpl']) is None) def download(self, url_list): """Download a given list of URLs.""" @@ -419,11 +419,11 @@ class YoutubeIE(InfoExtractor): self.to_stderr('ERROR: Unable to extract video title') return [None] video_title = mobj.group(1).decode('utf-8') - video_title = re.sub(u'&(.+?);', lambda x: unichr(htmlentitydefs.name2codepoint[x.group(1)]), video_title) + video_title = re.sub(ur'(?u)&(.+?);', lambda x: unichr(htmlentitydefs.name2codepoint[x.group(1)]), video_title) # simplified title - simple_title = re.sub(u'([^%s]+)' % simple_title_chars, u'_', video_title) - simple_title = simple_title.strip(u'_') + simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title) + simple_title = simple_title.strip(ur'_') # Return information return [{ From 7414bdf11d0382e1a7206caa9def3da999510b6b Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Tue, 22 Jul 2008 09:55:32 +0200 Subject: [PATCH 009/455] Update and fix User-Agent header --- youtube-dl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube-dl b/youtube-dl index 914cce37b..6a79bf93a 100755 --- a/youtube-dl +++ b/youtube-dl @@ -17,7 +17,7 @@ import urllib import urllib2 std_headers = { - 'User-Agent': 'UserAgent: Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9) Gecko/2008052906 Firefox/3.0', + 'User-Agent': 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.0.1) Gecko/2008070208 Firefox/3.0.1', 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7', 'Accept': 'text/xml,application/xml,application/xhtml+xml,text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5', 'Accept-Language': 'en-us,en;q=0.5', From 05a84b35cc937952f86c0cf5b0accbd2a376ff93 Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Tue, 22 Jul 2008 10:00:43 +0200 Subject: [PATCH 010/455] Add forceurl and forcetitle downloader options --- youtube-dl | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/youtube-dl b/youtube-dl index 6a79bf93a..cc2ba4262 100755 --- a/youtube-dl +++ b/youtube-dl @@ -55,6 +55,8 @@ class FileDownloader(object): password: Password for authentication purposes. usenetrc: Use netrc for authentication instead. quiet: Do not print messages to stdout. + forceurl: Force printing final URL. + forcetitle: Force printing title. simulate: Do not download the video files. format: Video format code. outtmpl: Template for output names. @@ -174,10 +176,18 @@ class FileDownloader(object): if len(results) > 1 and self.fixed_template(): sys.exit('ERROR: fixed output name but more than one file to download') - if self._params.get('simulate', False): - continue - for result in results: + + # Forced printings + if self._params.get('forcetitle', False): + print result['title'] + if self._params.get('forceurl', False): + print result['url'] + + # Do nothing else if in simulate mode + if self._params.get('simulate', False): + continue + try: filename = self._params['outtmpl'] % result except (ValueError, KeyError), err: @@ -450,6 +460,8 @@ if __name__ == '__main__': 'username': None, 'password': None, 'quiet': False, + 'forceurl': False, + 'forcetitle': False, 'simulate': False, 'format': None, 'outtmpl': '%(ext)s/%(ext)s/%(id)s.%(ext)s' From 656a7dc973f72a02eae4a2cc8c48a72ecf1dbf02 Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Tue, 22 Jul 2008 10:01:37 +0200 Subject: [PATCH 011/455] Modify ignore list --- .hgignore | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.hgignore b/.hgignore index 3233d6316..31a5b2894 100644 --- a/.hgignore +++ b/.hgignore @@ -1 +1,3 @@ +syntax: glob youtube-dl-old +.*.swp From f9f1e798792af95a33c3ea137768369de156f198 Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Tue, 22 Jul 2008 10:14:13 +0200 Subject: [PATCH 012/455] Minor improvements and changes --- youtube-dl | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/youtube-dl b/youtube-dl index cc2ba4262..607109bb2 100755 --- a/youtube-dl +++ b/youtube-dl @@ -392,7 +392,7 @@ class YoutubeIE(InfoExtractor): format_param = params.get('format', None) # Extension - video_extension = {18: 'mp4'}.get(format_param, 'flv') + video_extension = {'18': 'mp4'}.get(format_param, 'flv') # Normalize URL, including format normalized_url = 'http://www.youtube.com/watch?v=%s' % video_id @@ -447,9 +447,15 @@ class YoutubeIE(InfoExtractor): if __name__ == '__main__': try: + # Modules needed only when running the main program + import optparse + # General configuration urllib2.install_opener(urllib2.build_opener(urllib2.ProxyHandler())) urllib2.install_opener(urllib2.build_opener(urllib2.HTTPCookieProcessor())) + socket.setdefaulttimeout(300) # 5 minutes should be enough (famous last words) + + # Parse command line # Information extractors youtube_ie = YoutubeIE() @@ -459,12 +465,12 @@ if __name__ == '__main__': 'usenetrc': False, 'username': None, 'password': None, - 'quiet': False, - 'forceurl': False, - 'forcetitle': False, - 'simulate': False, + 'quiet': True, + 'forceurl': True, + 'forcetitle': True, + 'simulate': True, 'format': None, - 'outtmpl': '%(ext)s/%(ext)s/%(id)s.%(ext)s' + 'outtmpl': '%(id)s.%(ext)s' }) fd.add_info_extractor(youtube_ie) fd.download([ From 209e9e27e777f4c320dada6857b8e984af816e7a Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Tue, 22 Jul 2008 10:56:54 +0200 Subject: [PATCH 013/455] Add command line parse code --- youtube-dl | 80 ++++++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 66 insertions(+), 14 deletions(-) diff --git a/youtube-dl b/youtube-dl index 607109bb2..677f938dd 100755 --- a/youtube-dl +++ b/youtube-dl @@ -448,6 +448,7 @@ class YoutubeIE(InfoExtractor): if __name__ == '__main__': try: # Modules needed only when running the main program + import getpass import optparse # General configuration @@ -456,28 +457,79 @@ if __name__ == '__main__': socket.setdefaulttimeout(300) # 5 minutes should be enough (famous last words) # Parse command line + parser = optparse.OptionParser( + usage='Usage: %prog [options] url...', + version='INTERNAL', + conflict_handler='resolve', + ) + parser.add_option('-h', '--help', + action='help', help='print this help text and exit') + parser.add_option('-v', '--version', + action='version', help='print program version and exit') + parser.add_option('-u', '--username', + dest='username', metavar='UN', help='account username') + parser.add_option('-p', '--password', + dest='password', metavar='PW', help='account password') + parser.add_option('-o', '--output', + dest='outtmpl', metavar='TPL', help='output filename template') + parser.add_option('-q', '--quiet', + action='store_true', dest='quiet', help='activates quiet mode', default=False) + parser.add_option('-s', '--simulate', + action='store_true', dest='simulate', help='do not download video', default=False) + parser.add_option('-t', '--title', + action='store_true', dest='usetitle', help='use title in file name', default=False) + parser.add_option('-l', '--literal', + action='store_true', dest='useliteral', help='use literal title in file name', default=False) + parser.add_option('-n', '--netrc', + action='store_true', dest='usenetrc', help='use .netrc authentication data', default=False) + parser.add_option('-g', '--get-url', + action='store_true', dest='geturl', help='simulate, quiet but print URL', default=False) + parser.add_option('-e', '--get-title', + action='store_true', dest='gettitle', help='simulate, quiet but print title', default=False) + parser.add_option('-f', '--format', + dest='format', metavar='FMT', help='video format code') + parser.add_option('-b', '--best-quality', + action='store_const', dest='video_format', help='alias for -f 18', const='18') + (opts, args) = parser.parse_args() + + # Conflicting, missing and erroneous options + if len(args) < 1: + sys.exit('ERROR: you must provide at least one URL') + if opts.usenetrc and (opts.username is not None or opts.password is not None): + sys.exit('ERROR: using .netrc conflicts with giving username/password') + if opts.password is not None and opts.username is None: + sys.exit('ERROR: account username missing') + if opts.outtmpl is not None and (opts.useliteral or opts.usetitle): + sys.exit('ERROR: using output template conflicts with using title or literal title') + if opts.usetitle and opts.useliteral: + sys.exit('ERROR: using title conflicts with using literal title') + if opts.username is not None and opts.password is None: + opts.password = getpass.getpass('Type account password and press return:') # Information extractors youtube_ie = YoutubeIE() # File downloader fd = FileDownloader({ - 'usenetrc': False, - 'username': None, - 'password': None, - 'quiet': True, - 'forceurl': True, - 'forcetitle': True, - 'simulate': True, - 'format': None, - 'outtmpl': '%(id)s.%(ext)s' + 'usenetrc': opts.usenetrc, + 'username': opts.username, + 'password': opts.password, + 'quiet': (opts.quiet or opts.geturl or opts.gettitle), + 'forceurl': opts.geturl, + 'forcetitle': opts.gettitle, + 'simulate': (opts.simulate or opts.geturl or opts.gettitle), + 'format': opts.format, + 'outtmpl': ((opts.usetitle and '%(stitle)s-%(id)s.%(ext)s') + or (opts.useliteral and '%(title)s-%(id)s.%(ext)s') + or '%(id)s.%(ext)s'), }) fd.add_info_extractor(youtube_ie) - fd.download([ - 'http://www.youtube.com/watch?v=t7qdwI7TVe8', - 'http://www.youtube.com/watch?v=IJyn3pRcy_Q', - 'http://www.youtube.com/watch?v=DZRXe1wtC-M', - ]) + fd.download(args) + #fd.download([ + # 'http://www.youtube.com/watch?v=t7qdwI7TVe8', + # 'http://www.youtube.com/watch?v=IJyn3pRcy_Q', + # 'http://www.youtube.com/watch?v=DZRXe1wtC-M', + # ]) except KeyboardInterrupt: sys.exit('\nERROR: Interrupted by user') From ae2e6e461c4df66e85b973e7fb0c3b686581a4c6 Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Tue, 22 Jul 2008 10:57:19 +0200 Subject: [PATCH 014/455] Remove useless comments --- youtube-dl | 5 ----- 1 file changed, 5 deletions(-) diff --git a/youtube-dl b/youtube-dl index 677f938dd..7d3758654 100755 --- a/youtube-dl +++ b/youtube-dl @@ -525,11 +525,6 @@ if __name__ == '__main__': }) fd.add_info_extractor(youtube_ie) fd.download(args) - #fd.download([ - # 'http://www.youtube.com/watch?v=t7qdwI7TVe8', - # 'http://www.youtube.com/watch?v=IJyn3pRcy_Q', - # 'http://www.youtube.com/watch?v=DZRXe1wtC-M', - # ]) except KeyboardInterrupt: sys.exit('\nERROR: Interrupted by user') From e8e20c76f23e73c8f4f864667b03515ab5d7523f Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Tue, 22 Jul 2008 11:00:38 +0200 Subject: [PATCH 015/455] Fix not taking into account outs.outtmpl --- youtube-dl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube-dl b/youtube-dl index 7d3758654..814e1b68b 100755 --- a/youtube-dl +++ b/youtube-dl @@ -519,7 +519,8 @@ if __name__ == '__main__': 'forcetitle': opts.gettitle, 'simulate': (opts.simulate or opts.geturl or opts.gettitle), 'format': opts.format, - 'outtmpl': ((opts.usetitle and '%(stitle)s-%(id)s.%(ext)s') + 'outtmpl': ((opts.outtmpl is not None and opts.outtmpl) + or (opts.usetitle and '%(stitle)s-%(id)s.%(ext)s') or (opts.useliteral and '%(title)s-%(id)s.%(ext)s') or '%(id)s.%(ext)s'), }) From 98a1fe038c3aa1009e0b91a32cd0bb5e3d658f84 Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Tue, 22 Jul 2008 11:05:22 +0200 Subject: [PATCH 016/455] Replace the directory separator in title --- youtube-dl | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube-dl b/youtube-dl index 814e1b68b..320248764 100755 --- a/youtube-dl +++ b/youtube-dl @@ -430,6 +430,7 @@ class YoutubeIE(InfoExtractor): return [None] video_title = mobj.group(1).decode('utf-8') video_title = re.sub(ur'(?u)&(.+?);', lambda x: unichr(htmlentitydefs.name2codepoint[x.group(1)]), video_title) + video_title = video_title.replace(os.sep, u'%') # simplified title simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title) From bb681b88df7f80f49ee3895d3c272ab4475347b7 Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Tue, 22 Jul 2008 11:16:32 +0200 Subject: [PATCH 017/455] Unify uppercase criteria in errors and trace return code --- youtube-dl | 33 +++++++++++++++++++++++---------- 1 file changed, 23 insertions(+), 10 deletions(-) diff --git a/youtube-dl b/youtube-dl index 320248764..3986ea0d8 100755 --- a/youtube-dl +++ b/youtube-dl @@ -161,6 +161,7 @@ class FileDownloader(object): def download(self, url_list): """Download a given list of URLs.""" + retcode = 0 if len(url_list) > 1 and self.fixed_template(): sys.exit('ERROR: fixed output name but more than one file to download') @@ -171,7 +172,10 @@ class FileDownloader(object): continue # Suitable InfoExtractor found suitable_found = True - results = [x for x in ie.extract(url) if x is not None] + all_results = ie.extract(url) + results = [x for x in all_results if x is not None] + if len(results) != len(all_results): + retcode = 1 if len(results) > 1 and self.fixed_template(): sys.exit('ERROR: fixed output name but more than one file to download') @@ -192,29 +196,37 @@ class FileDownloader(object): filename = self._params['outtmpl'] % result except (ValueError, KeyError), err: self.to_stderr('ERROR: invalid output template: %s' % str(err)) + retcode = 1 continue try: self.pmkdir(filename) except (OSError, IOError), err: self.to_stderr('ERROR: unable to create directories: %s' % str(err)) + retcode = 1 continue try: outstream = open(filename, 'wb') except (OSError, IOError), err: self.to_stderr('ERROR: unable to open for writing: %s' % str(err)) + retcode = 1 continue try: self._do_download(outstream, result['url']) outstream.close() except (OSError, IOError), err: self.to_stderr('ERROR: unable to write video data: %s' % str(err)) + retcode = 1 continue except (urllib2.URLError, httplib.HTTPException, socket.error), err: self.to_stderr('ERROR: unable to download video data: %s' % str(err)) + retcode = 1 continue break if not suitable_found: self.to_stderr('ERROR: no suitable InfoExtractor: %s' % url) + retcode = 1 + + return retcode def _do_download(self, stream, url): request = urllib2.Request(url, None, std_headers) @@ -359,10 +371,10 @@ class YoutubeIE(InfoExtractor): self.to_stdout('[youtube] Logging in') login_results = urllib2.urlopen(request).read() if re.search(r'(?i)]* name="loginForm"', login_results) is not None: - self.to_stderr('WARNING: Unable to log in: bad username or password') + self.to_stderr('WARNING: unable to log in: bad username or password') return except (urllib2.URLError, httplib.HTTPException, socket.error), err: - self.to_stderr('WARNING: Unable to log in: %s' % str(err)) + self.to_stderr('WARNING: unable to log in: %s' % str(err)) return # Confirm age @@ -375,13 +387,13 @@ class YoutubeIE(InfoExtractor): self.to_stdout('[youtube] Confirming age') age_results = urllib2.urlopen(request).read() except (urllib2.URLError, httplib.HTTPException, socket.error), err: - sys.exit('ERROR: Unable to confirm age: %s' % str(err)) + sys.exit('ERROR: unable to confirm age: %s' % str(err)) def _real_extract(self, url): # Extract video id from URL mobj = re.match(r'^((?:http://)?(?:\w+\.)?youtube\.com/(?:(?:v/)|(?:(?:watch(?:\.php)?)?\?(?:.+&)?v=)))?([0-9A-Za-z_-]+)(?(1).+)?$', url) if mobj is None: - self.to_stderr('ERROR: Invalid URL: %s' % url) + self.to_stderr('ERROR: invalid URL: %s' % url) return [None] video_id = mobj.group(2) @@ -403,13 +415,13 @@ class YoutubeIE(InfoExtractor): self.to_stdout('[youtube] %s: Downloading video webpage' % video_id) video_webpage = urllib2.urlopen(request).read() except (urllib2.URLError, httplib.HTTPException, socket.error), err: - sys.exit('ERROR: Unable to download video: %s' % str(err)) + sys.exit('ERROR: unable to download video: %s' % str(err)) self.to_stdout('[youtube] %s: Extracting video information' % video_id) # "t" param mobj = re.search(r', "t": "([^"]+)"', video_webpage) if mobj is None: - self.to_stderr('ERROR: Unable to extract "t" parameter') + self.to_stderr('ERROR: unable to extract "t" parameter') return [None] video_real_url = 'http://www.youtube.com/get_video?video_id=%s&t=%s' % (video_id, mobj.group(1)) if format_param is not None: @@ -419,14 +431,14 @@ class YoutubeIE(InfoExtractor): # uploader mobj = re.search(r'More From: ([^<]*)<', video_webpage) if mobj is None: - self.to_stderr('ERROR: Unable to extract uploader nickname') + self.to_stderr('ERROR: unable to extract uploader nickname') return [None] video_uploader = mobj.group(1) # title mobj = re.search(r'(?im)YouTube - ([^<]*)', video_webpage) if mobj is None: - self.to_stderr('ERROR: Unable to extract video title') + self.to_stderr('ERROR: unable to extract video title') return [None] video_title = mobj.group(1).decode('utf-8') video_title = re.sub(ur'(?u)&(.+?);', lambda x: unichr(htmlentitydefs.name2codepoint[x.group(1)]), video_title) @@ -526,7 +538,8 @@ if __name__ == '__main__': or '%(id)s.%(ext)s'), }) fd.add_info_extractor(youtube_ie) - fd.download(args) + retcode = fd.download(args) + sys.exit(retcode) except KeyboardInterrupt: sys.exit('\nERROR: Interrupted by user') From 0086d1ec32139fab208648d39e9b275ba4a3f942 Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Tue, 22 Jul 2008 11:33:41 +0200 Subject: [PATCH 018/455] Add .trouble() method to downloader to ease tracing retcode --- youtube-dl | 39 ++++++++++++++++++++++++++------------- 1 file changed, 26 insertions(+), 13 deletions(-) diff --git a/youtube-dl b/youtube-dl index 3986ea0d8..1e6b876e1 100755 --- a/youtube-dl +++ b/youtube-dl @@ -60,6 +60,7 @@ class FileDownloader(object): simulate: Do not download the video files. format: Video format code. outtmpl: Template for output names. + ignoreerrors: Do not stop on download errors. """ _params = None @@ -159,6 +160,21 @@ class FileDownloader(object): """Checks if the output template is fixed.""" return (re.search(ur'(?u)%\(.+?\)s', self._params['outtmpl']) is None) + def trouble(self, message=None): + """Determine action to take when a download problem appears. + + Depending on if the downloader has been configured to ignore + download errors or not, this method may exit the program or + not when errors are found, after printing the message. If it + doesn't exit, it returns an error code suitable to be returned + later as a program exit code to indicate error. + """ + if message is not None: + self.to_stderr(message) + if not self._params.get('ignoreerrors', False): + sys.exit(1) + return 1 + def download(self, url_list): """Download a given list of URLs.""" retcode = 0 @@ -175,7 +191,7 @@ class FileDownloader(object): all_results = ie.extract(url) results = [x for x in all_results if x is not None] if len(results) != len(all_results): - retcode = 1 + retcode = self.trouble() if len(results) > 1 and self.fixed_template(): sys.exit('ERROR: fixed output name but more than one file to download') @@ -195,36 +211,30 @@ class FileDownloader(object): try: filename = self._params['outtmpl'] % result except (ValueError, KeyError), err: - self.to_stderr('ERROR: invalid output template: %s' % str(err)) - retcode = 1 + retcode = self.trouble('ERROR: invalid output template: %s' % str(err)) continue try: self.pmkdir(filename) except (OSError, IOError), err: - self.to_stderr('ERROR: unable to create directories: %s' % str(err)) - retcode = 1 + retcode = self.trouble('ERROR: unable to create directories: %s' % str(err)) continue try: outstream = open(filename, 'wb') except (OSError, IOError), err: - self.to_stderr('ERROR: unable to open for writing: %s' % str(err)) - retcode = 1 + retcode = self.trouble('ERROR: unable to open for writing: %s' % str(err)) continue try: self._do_download(outstream, result['url']) outstream.close() except (OSError, IOError), err: - self.to_stderr('ERROR: unable to write video data: %s' % str(err)) - retcode = 1 + retcode = self.trouble('ERROR: unable to write video data: %s' % str(err)) continue except (urllib2.URLError, httplib.HTTPException, socket.error), err: - self.to_stderr('ERROR: unable to download video data: %s' % str(err)) - retcode = 1 + retcode = self.trouble('ERROR: unable to download video data: %s' % str(err)) continue break if not suitable_found: - self.to_stderr('ERROR: no suitable InfoExtractor: %s' % url) - retcode = 1 + retcode = self.trouble('ERROR: no suitable InfoExtractor: %s' % url) return retcode @@ -503,6 +513,8 @@ if __name__ == '__main__': dest='format', metavar='FMT', help='video format code') parser.add_option('-b', '--best-quality', action='store_const', dest='video_format', help='alias for -f 18', const='18') + parser.add_option('-i', '--ignore-errors', + action='store_true', dest='ignoreerrors', help='continue on download errors', default=False) (opts, args) = parser.parse_args() # Conflicting, missing and erroneous options @@ -536,6 +548,7 @@ if __name__ == '__main__': or (opts.usetitle and '%(stitle)s-%(id)s.%(ext)s') or (opts.useliteral and '%(title)s-%(id)s.%(ext)s') or '%(id)s.%(ext)s'), + 'ignoreerrors': opts.ignoreerrors, }) fd.add_info_extractor(youtube_ie) retcode = fd.download(args) From 1c5e23021ed23a1d143d7b0a9554574d1e7050fb Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Tue, 22 Jul 2008 11:41:25 +0200 Subject: [PATCH 019/455] Improve program documentation --- youtube-dl | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube-dl b/youtube-dl index 1e6b876e1..46d45d37d 100755 --- a/youtube-dl +++ b/youtube-dl @@ -67,6 +67,7 @@ class FileDownloader(object): _ies = [] def __init__(self, params): + """Create a FileDownloader object with the given options.""" self._ies = [] self.set_params(params) @@ -305,7 +306,7 @@ class InfoExtractor(object): return True def initialize(self): - """Initializes an instance (login, etc).""" + """Initializes an instance (authentication, etc).""" if not self._ready: self._real_initialize() self._ready = True @@ -320,10 +321,12 @@ class InfoExtractor(object): self._downloader = downloader def to_stdout(self, message): + """Print message to stdout if downloader is not in quiet mode.""" if self._downloader is None or not self._downloader.get_params().get('quiet', False): print message def to_stderr(self, message): + """Print message to stderr.""" sys.stderr.write('%s\n' % message) def _real_initialize(self): From 38acff0e77d773de0b14b95a21ea4cafd43a98c9 Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Tue, 22 Jul 2008 15:26:32 +0200 Subject: [PATCH 020/455] Minor comment --- youtube-dl | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube-dl b/youtube-dl index 46d45d37d..633c87e5d 100755 --- a/youtube-dl +++ b/youtube-dl @@ -368,6 +368,7 @@ class YoutubeIE(InfoExtractor): self.to_stderr('WARNING: parsing .netrc: %s' % str(err)) return + # No authentication to be performed if username is None: return From e5bf0f551bbd3f4f939d48da0a81d32df11ec3da Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Tue, 22 Jul 2008 15:52:56 +0200 Subject: [PATCH 021/455] Add a couple of exceptions to avoid exiting from class methods --- youtube-dl | 37 ++++++++++++++++++++++++++++++------- 1 file changed, 30 insertions(+), 7 deletions(-) diff --git a/youtube-dl b/youtube-dl index 633c87e5d..f3239781b 100755 --- a/youtube-dl +++ b/youtube-dl @@ -25,6 +25,23 @@ std_headers = { simple_title_chars = string.ascii_letters.decode('ascii') + string.digits.decode('ascii') +class DownloadError(Exception): + """Download Error exception. + + This exception may be thrown by FileDownloader objects if they are not + configured to continue on errors. They will contain the appropriate + error message. + """ + pass + +class SameFileError(Exception): + """Same File exception. + + This exception will be thrown by FileDownloader objects if they detect + multiple files would have to be downloaded to the same file on disk. + """ + pass + class FileDownloader(object): """File Downloader class. @@ -165,22 +182,22 @@ class FileDownloader(object): """Determine action to take when a download problem appears. Depending on if the downloader has been configured to ignore - download errors or not, this method may exit the program or + download errors or not, this method may throw an exception or not when errors are found, after printing the message. If it - doesn't exit, it returns an error code suitable to be returned + doesn't raise, it returns an error code suitable to be returned later as a program exit code to indicate error. """ if message is not None: self.to_stderr(message) if not self._params.get('ignoreerrors', False): - sys.exit(1) + raise DownloadError(message) return 1 def download(self, url_list): """Download a given list of URLs.""" retcode = 0 if len(url_list) > 1 and self.fixed_template(): - sys.exit('ERROR: fixed output name but more than one file to download') + raise SameFileError(self._params['outtmpl']) for url in url_list: suitable_found = False @@ -195,7 +212,7 @@ class FileDownloader(object): retcode = self.trouble() if len(results) > 1 and self.fixed_template(): - sys.exit('ERROR: fixed output name but more than one file to download') + raise SameFileError(self._params['outtmpl']) for result in results: @@ -401,7 +418,8 @@ class YoutubeIE(InfoExtractor): self.to_stdout('[youtube] Confirming age') age_results = urllib2.urlopen(request).read() except (urllib2.URLError, httplib.HTTPException, socket.error), err: - sys.exit('ERROR: unable to confirm age: %s' % str(err)) + self.to_stderr('ERROR: unable to confirm age: %s' % str(err)) + return def _real_extract(self, url): # Extract video id from URL @@ -429,7 +447,8 @@ class YoutubeIE(InfoExtractor): self.to_stdout('[youtube] %s: Downloading video webpage' % video_id) video_webpage = urllib2.urlopen(request).read() except (urllib2.URLError, httplib.HTTPException, socket.error), err: - sys.exit('ERROR: unable to download video: %s' % str(err)) + self.to_stderr('ERROR: unable to download video webpage: %s' % str(err)) + return [None] self.to_stdout('[youtube] %s: Extracting video information' % video_id) # "t" param @@ -558,5 +577,9 @@ if __name__ == '__main__': retcode = fd.download(args) sys.exit(retcode) + except DownloadError: + sys.exit(1) + except SameFileError: + sys.exit('ERROR: fixed output name but more than one file to download') except KeyboardInterrupt: sys.exit('\nERROR: Interrupted by user') From 2130d8f6a497ca6219d4b3979cc5dcc6845827fc Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Tue, 22 Jul 2008 15:53:22 +0200 Subject: [PATCH 022/455] Make the downloader print the name of the destination file --- youtube-dl | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube-dl b/youtube-dl index f3239781b..0e5616220 100755 --- a/youtube-dl +++ b/youtube-dl @@ -228,6 +228,7 @@ class FileDownloader(object): try: filename = self._params['outtmpl'] % result + self.to_stdout('[download] Destination: %s' % filename) except (ValueError, KeyError), err: retcode = self.trouble('ERROR: invalid output template: %s' % str(err)) continue From 90663284b2c742ae84d3dd00876af9b681ee78ee Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Tue, 22 Jul 2008 22:18:51 +0200 Subject: [PATCH 023/455] Add --mobile-version program option --- youtube-dl | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/youtube-dl b/youtube-dl index 0e5616220..2c1d1aa3b 100755 --- a/youtube-dl +++ b/youtube-dl @@ -437,7 +437,7 @@ class YoutubeIE(InfoExtractor): format_param = params.get('format', None) # Extension - video_extension = {'18': 'mp4'}.get(format_param, 'flv') + video_extension = {'18': 'mp4', '17': '3gp'}.get(format_param, 'flv') # Normalize URL, including format normalized_url = 'http://www.youtube.com/watch?v=%s' % video_id @@ -537,6 +537,8 @@ if __name__ == '__main__': dest='format', metavar='FMT', help='video format code') parser.add_option('-b', '--best-quality', action='store_const', dest='video_format', help='alias for -f 18', const='18') + parser.add_option('-m', '--mobile-version', + action='store_const', dest='video_format', help='alias for -f 17', const='17') parser.add_option('-i', '--ignore-errors', action='store_true', dest='ignoreerrors', help='continue on download errors', default=False) (opts, args) = parser.parse_args() From bafa5cd969c218642e21af3a337723589b1afb9c Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Tue, 22 Jul 2008 22:40:50 +0200 Subject: [PATCH 024/455] Convert direct uses of .to_stdout() into methods to ease inheritation --- youtube-dl | 52 +++++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 43 insertions(+), 9 deletions(-) diff --git a/youtube-dl b/youtube-dl index 2c1d1aa3b..b0b494ed6 100755 --- a/youtube-dl +++ b/youtube-dl @@ -193,6 +193,19 @@ class FileDownloader(object): raise DownloadError(message) return 1 + def report_destination(self, filename): + """Report destination filename.""" + self.to_stdout('[download] Destination: %s' % filename) + + def report_progress(self, percent_str, data_len_str, speed_str, eta_str): + """Report download progress.""" + self.to_stdout('\r[download] %s of %s at %s ETA %s' % + (percent_str, data_len_str, speed_str, eta_str), skip_eol=True) + + def report_finish(self): + """Report download finished.""" + self.to_stdout('') + def download(self, url_list): """Download a given list of URLs.""" retcode = 0 @@ -228,7 +241,7 @@ class FileDownloader(object): try: filename = self._params['outtmpl'] % result - self.to_stdout('[download] Destination: %s' % filename) + self.report_destination(filename) except (ValueError, KeyError), err: retcode = self.trouble('ERROR: invalid output template: %s' % str(err)) continue @@ -266,12 +279,13 @@ class FileDownloader(object): block_size = 1024 start = time.time() while True: + # Progress message percent_str = self.calc_percent(byte_counter, data_len) eta_str = self.calc_eta(start, time.time(), data_len, byte_counter) speed_str = self.calc_speed(start, time.time(), byte_counter) - self.to_stdout('\r[download] %s of %s at %s ETA %s' % - (percent_str, data_len_str, speed_str, eta_str), skip_eol=True) + self.report_progress(percent_str, data_len_str, speed_str, eta_str) + # Download and write before = time.time() data_block = data.read(block_size) after = time.time() @@ -282,7 +296,7 @@ class FileDownloader(object): stream.write(data_block) block_size = self.best_block_size(after - before, data_block_len) - self.to_stdout('') + self.report_finish() if data_len is not None and str(byte_counter) != data_len: raise ValueError('Content too short: %s/%s bytes' % (byte_counter, data_len)) @@ -362,6 +376,26 @@ class YoutubeIE(InfoExtractor): _AGE_URL = 'http://www.youtube.com/verify_age?next_url=/' _NETRC_MACHINE = 'youtube' + def report_login(self): + """Report attempt to log in.""" + self.to_stdout('[youtube] Logging in') + + def report_age_confirmation(self): + """Report attempt to confirm age.""" + self.to_stdout('[youtube] Confirming age') + + def report_webpage_download(self, video_id): + """Report attempt to download webpage.""" + self.to_stdout('[youtube] %s: Downloading video webpage' % video_id) + + def report_information_extraction(self, video_id): + """Report attempt to extract video information.""" + self.to_stdout('[youtube] %s: Extracting video information' % video_id) + + def report_video_url(self, video_id, video_real_url): + """Report extracted video URL.""" + self.to_stdout('[youtube] %s: URL: %s' % (video_id, video_real_url)) + def _real_initialize(self): if self._downloader is None: return @@ -400,7 +434,7 @@ class YoutubeIE(InfoExtractor): } request = urllib2.Request(self._LOGIN_URL, urllib.urlencode(login_form), std_headers) try: - self.to_stdout('[youtube] Logging in') + self.report_login() login_results = urllib2.urlopen(request).read() if re.search(r'(?i)]* name="loginForm"', login_results) is not None: self.to_stderr('WARNING: unable to log in: bad username or password') @@ -416,7 +450,7 @@ class YoutubeIE(InfoExtractor): } request = urllib2.Request(self._AGE_URL, urllib.urlencode(age_form), std_headers) try: - self.to_stdout('[youtube] Confirming age') + self.report_age_confirmation() age_results = urllib2.urlopen(request).read() except (urllib2.URLError, httplib.HTTPException, socket.error), err: self.to_stderr('ERROR: unable to confirm age: %s' % str(err)) @@ -445,12 +479,12 @@ class YoutubeIE(InfoExtractor): normalized_url = '%s&fmt=%s' % (normalized_url, format_param) request = urllib2.Request(normalized_url, None, std_headers) try: - self.to_stdout('[youtube] %s: Downloading video webpage' % video_id) + self.report_webpage_download(video_id) video_webpage = urllib2.urlopen(request).read() except (urllib2.URLError, httplib.HTTPException, socket.error), err: self.to_stderr('ERROR: unable to download video webpage: %s' % str(err)) return [None] - self.to_stdout('[youtube] %s: Extracting video information' % video_id) + self.report_information_extraction(video_id) # "t" param mobj = re.search(r', "t": "([^"]+)"', video_webpage) @@ -460,7 +494,7 @@ class YoutubeIE(InfoExtractor): video_real_url = 'http://www.youtube.com/get_video?video_id=%s&t=%s' % (video_id, mobj.group(1)) if format_param is not None: video_real_url = '%s&fmt=%s' % (video_real_url, format_param) - self.to_stdout('[youtube] %s: URL: %s' % (video_id, video_real_url)) + self.report_video_url(video_id, video_real_url) # uploader mobj = re.search(r'More From: ([^<]*)<', video_webpage) From d7bc253bb82ac4f8a6ffef568c5b6eee508c7206 Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Tue, 22 Jul 2008 22:47:36 +0200 Subject: [PATCH 025/455] Fix video format command line option parsing and set version number --- youtube-dl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/youtube-dl b/youtube-dl index b0b494ed6..135d14809 100755 --- a/youtube-dl +++ b/youtube-dl @@ -540,7 +540,7 @@ if __name__ == '__main__': # Parse command line parser = optparse.OptionParser( usage='Usage: %prog [options] url...', - version='INTERNAL', + version='2008.07.22', conflict_handler='resolve', ) parser.add_option('-h', '--help', @@ -570,9 +570,9 @@ if __name__ == '__main__': parser.add_option('-f', '--format', dest='format', metavar='FMT', help='video format code') parser.add_option('-b', '--best-quality', - action='store_const', dest='video_format', help='alias for -f 18', const='18') + action='store_const', dest='format', help='alias for -f 18', const='18') parser.add_option('-m', '--mobile-version', - action='store_const', dest='video_format', help='alias for -f 17', const='17') + action='store_const', dest='format', help='alias for -f 17', const='17') parser.add_option('-i', '--ignore-errors', action='store_true', dest='ignoreerrors', help='continue on download errors', default=False) (opts, args) = parser.parse_args() From 3e1cabc338803c398a0c6f02bd0310819ba5f91c Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Tue, 22 Jul 2008 22:52:09 +0200 Subject: [PATCH 026/455] Add old version of webpage --- index.html.in | 173 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 173 insertions(+) create mode 100644 index.html.in diff --git a/index.html.in b/index.html.in new file mode 100644 index 000000000..999c8e9bc --- /dev/null +++ b/index.html.in @@ -0,0 +1,173 @@ + + + + + youtube-dl: Download videos from YouTube.com + + + +

youtube-dl: Download videos from YouTube.com

+ +

What is it?

+ +

youtube-dl is a small command-line program to download videos +from YouTube.com. It requires the Python +interpreter, version 2.4 or later, and it's not platform specific. +It should work in your Unix box, in Windows or in Mac OS X. The latest version +is @PROGRAM_VERSION@. It's licensed under the MIT License, which +means you can modify it, redistribute it or use it however you like +complying with a few simple conditions.

+ +

I'll try to keep it updated if YouTube.com changes the way you access +their videos. After all, it's a simple and short program. However, I can't +guarantee anything. If you detect it stops working, check for new versions +and/or inform me about the problem, indicating the program version you +are using. If the program stops working and I can't solve the problem but +you have a solution, I'd like to know it. If that happens and you feel you +can maintain the program yourself, tell me. My contact information is +at freshmeat.net.

+ +

Thanks for all the feedback received so far. I'm glad people find my +program useful.

+ +

Related projects: +metacafe-dl +pornotube-dl +

+ +

Usage instructions

+ +

In Windows, once you have installed the Python interpreter, save the +program with the .py extension and put it somewhere in the PATH. +Try to follow the +guide to +install youtube-dl under Windows XP.

+ +

In Unix, download it, give it execution permission and copy it to one +of the PATH directories (typically, /usr/local/bin).

+ +

After that, you should be able to call it from the command line as +youtube-dl or youtube-dl.py. I will use youtube-dl +in the following examples. Usage instructions are easy. Use youtube-dl +followed by a video URL or identifier. Example: youtube-dl +"http://www.youtube.com/watch?v=foobar". The video will be saved +to the file foobar.flv in that example. As YouTube.com +videos are in Flash Video format, their extension should be flv. +In Linux and other unices, video players using a recent version of +ffmpeg can play them. That includes MPlayer, VLC, etc. Those two +work under Windows and other platforms, but you could also get a +specific FLV player of your taste.

+ +

If you try to run the program and you receive an error message containing the +keyword SyntaxError near the end, it means your Python interpreter +is too old.

+ +

More usage tips

+ +
    + +
  • You can change the file name of the video using the -o option, like in +youtube-dl -o vid.flv "http://www.youtube.com/watch?v=foobar".
  • + +
  • Some videos require an account to be downloaded, mostly because they're +flagged as mature content. You can pass the program a username and password +for a YouTube.com account with the -u and -p options, like youtube-dl +-u myusername -p mypassword "http://www.youtube.com/watch?v=foobar".
  • + +
  • The account data can also be read from the user .netrc file by indicating +the -n or --netrc option. The machine name is youtube in that +case.
  • + +
  • The simulate mode (activated with -s or --simulate) can be used +to just get the real video URL and use it with a download manager if you +prefer that option.
  • + +
  • The quiet mode (activated with -q or --quiet) can be used to +supress all output messages. This allows, in systems featuring /dev/stdout +and other similar special files, outputting the video data to standard output +in order to pipe it to another program without interferences.
  • + +
  • The program can be told to simply print the final video URL to standard +output using the -g or --get-url option.
  • + +
  • Combined with the above option, the -2 or --title-too option tells the +program to print the video title too.
  • + +
  • The default filename is video_id.flv. But you can also use the +video title in the filename with the -t or --title option, or preserve the +literal title in the filename with the -l or --literal option.
  • + +
  • You can make the program append &fmt=something to the URL +by using the -f or --format option. This makes it possible to download high +quality versions of the videos when available.
  • + +
  • youtube-dl can attempt to download the best quality version of +a video by using the -b or --best-quality option.
  • + +
  • youtube-dl honors the http_proxy environment variable +if you want to use a proxy. Set it to something like +http://proxy.example.com:8080, and do not leave the http:// +prefix out.
  • + +
  • You can get the program version by calling it as youtube-dl +-v or youtube-dl --version.
  • + +
  • For usage instructions, use youtube-dl -h or youtube-dl +--help.
  • + +
  • You can cancel the program at any time pressing Ctrl+C. It may print +some error lines saying something about KeyboardInterrupt. +That's ok.
  • + +
+ +

Download it

+ +

Note that if you directly click on these hyperlinks, your web browser will +most likely display the program contents. It's usually better to +right-click on it and choose the appropriate option, normally called Save +Target As or Save Link As, depending on the web browser you +are using.

+ +

@PROGRAM_VERSION@

+
    +
  • MD5: @PROGRAM_MD5SUM@
  • +
  • SHA1: @PROGRAM_SHA1SUM@
  • +
  • SHA256: @PROGRAM_SHA256SUM@
  • +
+ + + + From bb028346923a1b53bb8852ded546e08a1516dd49 Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Tue, 22 Jul 2008 23:01:27 +0200 Subject: [PATCH 027/455] Add script to regenerate index.html --- generate-index | 15 +++++++++++++++ 1 file changed, 15 insertions(+) create mode 100755 generate-index diff --git a/generate-index b/generate-index new file mode 100755 index 000000000..324001cae --- /dev/null +++ b/generate-index @@ -0,0 +1,15 @@ +#!/usr/bin/env python +import hashlib +import subprocess + +template = file('index.html.in', 'r').read() +version = subprocess.Popen(['./youtube-dl', '--version'], stdout=subprocess.PIPE).communicate()[0] +data = file('youtube-dl', 'rb').read() +md5sum = hashlib.md5(data).hexdigest() +sha1sum = hashlib.sha1(data).hexdigest() +sha256sum = hashlib.sha256(data).hexdigest() +template = template.replace('@PROGRAM_VERSION@', version) +template = template.replace('@PROGRAM_MD5SUM@', md5sum) +template = template.replace('@PROGRAM_SHA1SUM@', sha1sum) +template = template.replace('@PROGRAM_SHA256SUM@', sha256sum) +file('index.html', 'w').write(template) From 3c53b78720fa68ca801e026d981940387a2cb733 Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Tue, 22 Jul 2008 23:02:42 +0200 Subject: [PATCH 028/455] Strip newline from version --- generate-index | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/generate-index b/generate-index index 324001cae..f8f940e4c 100755 --- a/generate-index +++ b/generate-index @@ -3,7 +3,7 @@ import hashlib import subprocess template = file('index.html.in', 'r').read() -version = subprocess.Popen(['./youtube-dl', '--version'], stdout=subprocess.PIPE).communicate()[0] +version = subprocess.Popen(['./youtube-dl', '--version'], stdout=subprocess.PIPE).communicate()[0].strip() data = file('youtube-dl', 'rb').read() md5sum = hashlib.md5(data).hexdigest() sha1sum = hashlib.sha1(data).hexdigest() From 7337efbfe48d87d34935b49ce64d4b434fa4cc4c Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Tue, 22 Jul 2008 23:03:26 +0200 Subject: [PATCH 029/455] Modify ignore filters --- .hgignore | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.hgignore b/.hgignore index 31a5b2894..c7ce8559e 100644 --- a/.hgignore +++ b/.hgignore @@ -1,3 +1,4 @@ syntax: glob -youtube-dl-old +index.html +youtube-dl-* .*.swp From acd3d842987ee4c407093aaa4dedf4f301895adc Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Thu, 24 Jul 2008 09:47:07 +0200 Subject: [PATCH 030/455] Add --rate-limit program option --- youtube-dl | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/youtube-dl b/youtube-dl index 135d14809..4dea34376 100755 --- a/youtube-dl +++ b/youtube-dl @@ -78,6 +78,7 @@ class FileDownloader(object): format: Video format code. outtmpl: Template for output names. ignoreerrors: Do not stop on download errors. + ratelimit: Download speed limit, in bytes/sec. """ _params = None @@ -149,6 +150,16 @@ class FileDownloader(object): return int(new_min) return int(rate) + @staticmethod + def parse_bytes(bytestr): + """Parse a string indicating a byte quantity into a long integer.""" + matchobj = re.match(r'(?i)^(\d+(?:\.\d+)?)([kMGTPEZY]?)$', bytestr) + if matchobj is None: + return None + number = float(matchobj.group(1)) + multiplier = 1024.0 ** 'bkmgtpezy'.index(matchobj.group(2).lower()) + return long(round(number * multiplier)) + def set_params(self, params): """Sets parameters.""" if type(params) != dict: @@ -193,6 +204,19 @@ class FileDownloader(object): raise DownloadError(message) return 1 + def slow_down(self, start_time, byte_counter): + """Sleep if the download speed is over the rate limit.""" + rate_limit = self._params.get('ratelimit', None) + if rate_limit is None or byte_counter == 0: + return + now = time.time() + elapsed = now - start_time + if elapsed <= 0.0: + return + speed = float(byte_counter) / elapsed + if speed > rate_limit: + time.sleep((byte_counter - rate_limit * (now - start_time)) / rate_limit) + def report_destination(self, filename): """Report destination filename.""" self.to_stdout('[download] Destination: %s' % filename) @@ -296,6 +320,9 @@ class FileDownloader(object): stream.write(data_block) block_size = self.best_block_size(after - before, data_block_len) + # Apply rate limit + self.slow_down(start, byte_counter) + self.report_finish() if data_len is not None and str(byte_counter) != data_len: raise ValueError('Content too short: %s/%s bytes' % (byte_counter, data_len)) @@ -575,6 +602,8 @@ if __name__ == '__main__': action='store_const', dest='format', help='alias for -f 17', const='17') parser.add_option('-i', '--ignore-errors', action='store_true', dest='ignoreerrors', help='continue on download errors', default=False) + parser.add_option('-r', '--rate-limit', + dest='ratelimit', metavar='L', help='download rate limit (e.g. 50k or 44.6m)') (opts, args) = parser.parse_args() # Conflicting, missing and erroneous options @@ -590,6 +619,11 @@ if __name__ == '__main__': sys.exit('ERROR: using title conflicts with using literal title') if opts.username is not None and opts.password is None: opts.password = getpass.getpass('Type account password and press return:') + if opts.ratelimit is not None: + numeric_limit = FileDownloader.parse_bytes(opts.ratelimit) + if numeric_limit is None: + sys.exit('ERROR: invalid rate limit specified') + opts.ratelimit = numeric_limit # Information extractors youtube_ie = YoutubeIE() @@ -609,6 +643,7 @@ if __name__ == '__main__': or (opts.useliteral and '%(title)s-%(id)s.%(ext)s') or '%(id)s.%(ext)s'), 'ignoreerrors': opts.ignoreerrors, + 'ratelimit': opts.ratelimit, }) fd.add_info_extractor(youtube_ie) retcode = fd.download(args) From 3af1e1728496f0cccd18946d03be78c5910914be Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Thu, 24 Jul 2008 10:07:46 +0200 Subject: [PATCH 031/455] Fix directory creation not working with absolute paths --- youtube-dl | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube-dl b/youtube-dl index 4dea34376..fe64819c7 100755 --- a/youtube-dl +++ b/youtube-dl @@ -94,6 +94,7 @@ class FileDownloader(object): """Create directory components in filename. Similar to Unix "mkdir -p".""" components = filename.split(os.sep) aggregate = [os.sep.join(components[0:x]) for x in xrange(1, len(components))] + aggregate = ['%s%s' % (x, os.sep) for x in aggregate] # Finish names with separator for dir in aggregate: if not os.path.exists(dir): os.mkdir(dir) From 020f7150aa7727f3a482560499e441d74d0644b2 Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Thu, 24 Jul 2008 15:53:24 +0200 Subject: [PATCH 032/455] Add metacafe.com support and minor changse --- youtube-dl | 130 +++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 127 insertions(+), 3 deletions(-) diff --git a/youtube-dl b/youtube-dl index fe64819c7..c0d85aa56 100755 --- a/youtube-dl +++ b/youtube-dl @@ -253,7 +253,6 @@ class FileDownloader(object): raise SameFileError(self._params['outtmpl']) for result in results: - # Forced printings if self._params.get('forcetitle', False): print result['title'] @@ -363,7 +362,7 @@ class InfoExtractor(object): @staticmethod def suitable(url): """Receives a URL and returns True if suitable for this IE.""" - return True + return False def initialize(self): """Initializes an instance (authentication, etc).""" @@ -400,10 +399,15 @@ class InfoExtractor(object): class YoutubeIE(InfoExtractor): """Information extractor for youtube.com.""" + _VALID_URL = r'^((?:http://)?(?:\w+\.)?youtube\.com/(?:(?:v/)|(?:(?:watch(?:\.php)?)?\?(?:.+&)?v=)))?([0-9A-Za-z_-]+)(?(1).+)?$' _LOGIN_URL = 'http://www.youtube.com/login?next=/' _AGE_URL = 'http://www.youtube.com/verify_age?next_url=/' _NETRC_MACHINE = 'youtube' + @staticmethod + def suitable(url): + return (re.match(YoutubeIE._VALID_URL, url) is not None) + def report_login(self): """Report attempt to log in.""" self.to_stdout('[youtube] Logging in') @@ -486,7 +490,7 @@ class YoutubeIE(InfoExtractor): def _real_extract(self, url): # Extract video id from URL - mobj = re.match(r'^((?:http://)?(?:\w+\.)?youtube\.com/(?:(?:v/)|(?:(?:watch(?:\.php)?)?\?(?:.+&)?v=)))?([0-9A-Za-z_-]+)(?(1).+)?$', url) + mobj = re.match(self._VALID_URL, url) if mobj is None: self.to_stderr('ERROR: invalid URL: %s' % url) return [None] @@ -554,6 +558,124 @@ class YoutubeIE(InfoExtractor): 'ext': video_extension, }] +class MetacafeIE(InfoExtractor): + """Information Extractor for metacafe.com.""" + + _VALID_URL = r'(?:http://)?(?:www\.)?metacafe\.com/watch/([^/]+)/([^/]+)/.*' + _DISCLAIMER = 'http://www.metacafe.com/disclaimer' + _youtube_ie = None + + def __init__(self, youtube_ie, downloader=None): + InfoExtractor.__init__(self, downloader) + self._youtube_ie = youtube_ie + + @staticmethod + def suitable(url): + return (re.match(MetacafeIE._VALID_URL, url) is not None) + + def report_disclaimer(self): + """Report disclaimer retrieval.""" + self.to_stdout('[metacafe] Retrieving disclaimer') + + def report_age_confirmation(self): + """Report attempt to confirm age.""" + self.to_stdout('[metacafe] Confirming age') + + def report_download_webpage(self, video_id): + """Report webpage download.""" + self.to_stdout('[metacafe] %s: Downloading webpage' % video_id) + + def report_extraction(self, video_id): + """Report information extraction.""" + self.to_stdout('[metacafe] %s: Extracting information' % video_id) + + def _real_initialize(self): + # Retrieve disclaimer + request = urllib2.Request(self._DISCLAIMER, None, std_headers) + try: + self.report_disclaimer() + disclaimer = urllib2.urlopen(request).read() + except (urllib2.URLError, httplib.HTTPException, socket.error), err: + self.to_stderr('ERROR: unable to retrieve disclaimer: %s' % str(err)) + return + + # Confirm age + disclaimer_form = { + 'allowAdultContent': '1', + 'submit': "Continue - I'm over 18", + } + request = urllib2.Request('http://www.metacafe.com/watch/', urllib.urlencode(disclaimer_form), std_headers) + try: + self.report_age_confirmation() + disclaimer = urllib2.urlopen(request).read() + except (urllib2.URLError, httplib.HTTPException, socket.error), err: + self.to_stderr('ERROR: unable to confirm age: %s' % str(err)) + return + + def _real_extract(self, url): + # Extract id and simplified title from URL + mobj = re.match(self._VALID_URL, url) + if mobj is None: + self.to_stderr('ERROR: invalid URL: %s' % url) + return [None] + + video_id = mobj.group(1) + + # Check if video comes from YouTube + mobj2 = re.match(r'^yt-(.*)$', video_id) + if mobj2 is not None: + return self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % mobj2.group(1)) + + simple_title = mobj.group(2).decode('utf-8') + video_extension = 'flv' + + # Retrieve video webpage to extract further information + request = urllib2.Request('http://www.metacafe.com/watch/%s/' % video_id) + try: + self.report_download_webpage(video_id) + webpage = urllib2.urlopen(request).read() + except (urllib2.URLError, httplib.HTTPException, socket.error), err: + self.to_stderr('ERROR: unable retrieve video webpage: %s' % str(err)) + return [None] + + # Extract URL, uploader and title from webpage + self.report_extraction(video_id) + mobj = re.search(r'(?m)"mediaURL":"(http.*?\.flv)"', webpage) + if mobj is None: + self.to_stderr('ERROR: unable to extract media URL') + return [None] + mediaURL = mobj.group(1).replace('\\', '') + + mobj = re.search(r'(?m)"gdaKey":"(.*?)"', webpage) + if mobj is None: + self.to_stderr('ERROR: unable to extract gdaKey') + return [None] + gdaKey = mobj.group(1) + + video_url = '%s?__gda__=%s' % (mediaURL, gdaKey) + + mobj = re.search(r'(?im).*?Submitter:
(.*?)', webpage) + if mobj is None: + self.to_stderr('ERROR: unable to extract uploader nickname') + return [None] + video_uploader = re.sub(r'<.*?>', '', mobj.group(1)) + + # Return information + return [{ + 'id': video_id, + 'url': video_url, + 'uploader': video_uploader, + 'title': video_title, + 'stitle': simple_title, + 'ext': video_extension, + }] + if __name__ == '__main__': try: # Modules needed only when running the main program @@ -628,6 +750,7 @@ if __name__ == '__main__': # Information extractors youtube_ie = YoutubeIE() + metacafe_ie = MetacafeIE(youtube_ie) # File downloader fd = FileDownloader({ @@ -646,6 +769,7 @@ if __name__ == '__main__': 'ignoreerrors': opts.ignoreerrors, 'ratelimit': opts.ratelimit, }) + fd.add_info_extractor(metacafe_ie) fd.add_info_extractor(youtube_ie) retcode = fd.download(args) sys.exit(retcode) From 0c2dc87d9e299fb413d103f08df0d03fed55adb1 Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Fri, 25 Jul 2008 12:55:01 +0200 Subject: [PATCH 033/455] Add YoutubePlaylistIE class --- youtube-dl | 62 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 62 insertions(+) diff --git a/youtube-dl b/youtube-dl index c0d85aa56..7eaafdcd5 100755 --- a/youtube-dl +++ b/youtube-dl @@ -676,6 +676,66 @@ class MetacafeIE(InfoExtractor): 'ext': video_extension, }] +class YoutubePlaylistIE(InfoExtractor): + """Information Extractor for YouTube playlists.""" + + _VALID_URL = r'(?:http://)?(?:\w+\.)?youtube.com/view_play_list\?p=(.+)' + _TEMPLATE_URL = 'http://www.youtube.com/view_play_list?p=%s&page=%s' + _VIDEO_INDICATOR = r'/watch\?v=(.+?)&' + _MORE_PAGES_INDICATOR = r'class="pagerNotCurrent">Next' + _youtube_ie = None + + def __init__(self, youtube_ie, downloader=None): + InfoExtractor.__init__(self, downloader) + self._youtube_ie = youtube_ie + + @staticmethod + def suitable(url): + return (re.match(YoutubePlaylistIE._VALID_URL, url) is not None) + + def report_download_page(self, playlist_id, pagenum): + """Report attempt to download playlist page with given number.""" + self.to_stdout('[youtube] PL %s: Downloading page #%s' % (playlist_id, pagenum)) + + def _real_initialize(self): + self._youtube_ie.initialize() + + def _real_extract(self, url): + # Extract playlist id + mobj = re.match(self._VALID_URL, url) + if mobj is None: + self.to_stderr('ERROR: invalid url: %s' % url) + return [None] + + # Download playlist pages + playlist_id = mobj.group(1) + video_ids = [] + pagenum = 1 + + while True: + self.report_download_page(playlist_id, pagenum) + request = urllib2.Request(self._TEMPLATE_URL % (playlist_id, pagenum), None, std_headers) + try: + page = urllib2.urlopen(request).read() + except (urllib2.URLError, httplib.HTTPException, socket.error), err: + self.to_stderr('ERROR: unable to download webpage: %s' % str(err)) + return [None] + + # Extract video identifiers + ids_in_page = set() + for mobj in re.finditer(self._VIDEO_INDICATOR, page): + ids_in_page.add(mobj.group(1)) + video_ids.extend(list(ids_in_page)) + + if self._MORE_PAGES_INDICATOR not in page: + break + pagenum = pagenum + 1 + + information = [] + for id in video_ids: + information.extend(self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)) + return information + if __name__ == '__main__': try: # Modules needed only when running the main program @@ -751,6 +811,7 @@ if __name__ == '__main__': # Information extractors youtube_ie = YoutubeIE() metacafe_ie = MetacafeIE(youtube_ie) + youtube_pl_ie = YoutubePlaylistIE(youtube_ie) # File downloader fd = FileDownloader({ @@ -769,6 +830,7 @@ if __name__ == '__main__': 'ignoreerrors': opts.ignoreerrors, 'ratelimit': opts.ratelimit, }) + fd.add_info_extractor(youtube_pl_ie) fd.add_info_extractor(metacafe_ie) fd.add_info_extractor(youtube_ie) retcode = fd.download(args) From 76a7f364004fba520f21987128ba0a5a7c6e66e6 Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Fri, 25 Jul 2008 13:28:41 +0200 Subject: [PATCH 034/455] Make the most prominent output strings Unicode and fix Unicode title bug --- youtube-dl | 115 +++++++++++++++++++++++++++-------------------------- 1 file changed, 59 insertions(+), 56 deletions(-) diff --git a/youtube-dl b/youtube-dl index 7eaafdcd5..f89c544cc 100755 --- a/youtube-dl +++ b/youtube-dl @@ -179,12 +179,15 @@ class FileDownloader(object): def to_stdout(self, message, skip_eol=False): """Print message to stdout if not in quiet mode.""" if not self._params.get('quiet', False): - sys.stdout.write('%s%s' % (message, ['\n', ''][skip_eol])) + if skip_eol: + print message, + else: + print message sys.stdout.flush() def to_stderr(self, message): """Print message to stderr.""" - sys.stderr.write('%s\n' % message) + print >>sys.stderr, message def fixed_template(self): """Checks if the output template is fixed.""" @@ -220,16 +223,16 @@ class FileDownloader(object): def report_destination(self, filename): """Report destination filename.""" - self.to_stdout('[download] Destination: %s' % filename) + self.to_stdout(u'[download] Destination: %s' % filename) def report_progress(self, percent_str, data_len_str, speed_str, eta_str): """Report download progress.""" - self.to_stdout('\r[download] %s of %s at %s ETA %s' % + self.to_stdout(u'\r[download] %s of %s at %s ETA %s' % (percent_str, data_len_str, speed_str, eta_str), skip_eol=True) def report_finish(self): """Report download finished.""" - self.to_stdout('') + self.to_stdout(u'') def download(self, url_list): """Download a given list of URLs.""" @@ -386,7 +389,7 @@ class InfoExtractor(object): def to_stderr(self, message): """Print message to stderr.""" - sys.stderr.write('%s\n' % message) + print >>sys.stderr, message def _real_initialize(self): """Real initialization process. Redefine in subclasses.""" @@ -410,23 +413,23 @@ class YoutubeIE(InfoExtractor): def report_login(self): """Report attempt to log in.""" - self.to_stdout('[youtube] Logging in') + self.to_stdout(u'[youtube] Logging in') def report_age_confirmation(self): """Report attempt to confirm age.""" - self.to_stdout('[youtube] Confirming age') + self.to_stdout(u'[youtube] Confirming age') def report_webpage_download(self, video_id): """Report attempt to download webpage.""" - self.to_stdout('[youtube] %s: Downloading video webpage' % video_id) + self.to_stdout(u'[youtube] %s: Downloading video webpage' % video_id) def report_information_extraction(self, video_id): """Report attempt to extract video information.""" - self.to_stdout('[youtube] %s: Extracting video information' % video_id) + self.to_stdout(u'[youtube] %s: Extracting video information' % video_id) def report_video_url(self, video_id, video_real_url): """Report extracted video URL.""" - self.to_stdout('[youtube] %s: URL: %s' % (video_id, video_real_url)) + self.to_stdout(u'[youtube] %s: URL: %s' % (video_id, video_real_url)) def _real_initialize(self): if self._downloader is None: @@ -449,7 +452,7 @@ class YoutubeIE(InfoExtractor): else: raise netrc.NetrcParseError('No authenticators for %s' % self._NETRC_MACHINE) except (IOError, netrc.NetrcParseError), err: - self.to_stderr('WARNING: parsing .netrc: %s' % str(err)) + self.to_stderr(u'WARNING: parsing .netrc: %s' % str(err)) return # No authentication to be performed @@ -469,10 +472,10 @@ class YoutubeIE(InfoExtractor): self.report_login() login_results = urllib2.urlopen(request).read() if re.search(r'(?i)]* name="loginForm"', login_results) is not None: - self.to_stderr('WARNING: unable to log in: bad username or password') + self.to_stderr(u'WARNING: unable to log in: bad username or password') return except (urllib2.URLError, httplib.HTTPException, socket.error), err: - self.to_stderr('WARNING: unable to log in: %s' % str(err)) + self.to_stderr(u'WARNING: unable to log in: %s' % str(err)) return # Confirm age @@ -485,14 +488,14 @@ class YoutubeIE(InfoExtractor): self.report_age_confirmation() age_results = urllib2.urlopen(request).read() except (urllib2.URLError, httplib.HTTPException, socket.error), err: - self.to_stderr('ERROR: unable to confirm age: %s' % str(err)) + self.to_stderr(u'ERROR: unable to confirm age: %s' % str(err)) return def _real_extract(self, url): # Extract video id from URL mobj = re.match(self._VALID_URL, url) if mobj is None: - self.to_stderr('ERROR: invalid URL: %s' % url) + self.to_stderr(u'ERROR: invalid URL: %s' % url) return [None] video_id = mobj.group(2) @@ -514,14 +517,14 @@ class YoutubeIE(InfoExtractor): self.report_webpage_download(video_id) video_webpage = urllib2.urlopen(request).read() except (urllib2.URLError, httplib.HTTPException, socket.error), err: - self.to_stderr('ERROR: unable to download video webpage: %s' % str(err)) + self.to_stderr(u'ERROR: unable to download video webpage: %s' % str(err)) return [None] self.report_information_extraction(video_id) # "t" param mobj = re.search(r', "t": "([^"]+)"', video_webpage) if mobj is None: - self.to_stderr('ERROR: unable to extract "t" parameter') + self.to_stderr(u'ERROR: unable to extract "t" parameter') return [None] video_real_url = 'http://www.youtube.com/get_video?video_id=%s&t=%s' % (video_id, mobj.group(1)) if format_param is not None: @@ -531,14 +534,14 @@ class YoutubeIE(InfoExtractor): # uploader mobj = re.search(r'More From: ([^<]*)<', video_webpage) if mobj is None: - self.to_stderr('ERROR: unable to extract uploader nickname') + self.to_stderr(u'ERROR: unable to extract uploader nickname') return [None] video_uploader = mobj.group(1) # title mobj = re.search(r'(?im)YouTube - ([^<]*)', video_webpage) if mobj is None: - self.to_stderr('ERROR: unable to extract video title') + self.to_stderr(u'ERROR: unable to extract video title') return [None] video_title = mobj.group(1).decode('utf-8') video_title = re.sub(ur'(?u)&(.+?);', lambda x: unichr(htmlentitydefs.name2codepoint[x.group(1)]), video_title) @@ -550,12 +553,12 @@ class YoutubeIE(InfoExtractor): # Return information return [{ - 'id': video_id, - 'url': video_real_url, - 'uploader': video_uploader, + 'id': video_id.decode('utf-8'), + 'url': video_real_url.decode('utf-8'), + 'uploader': video_uploader.decode('utf-8'), 'title': video_title, 'stitle': simple_title, - 'ext': video_extension, + 'ext': video_extension.decode('utf-8'), }] class MetacafeIE(InfoExtractor): @@ -575,19 +578,19 @@ class MetacafeIE(InfoExtractor): def report_disclaimer(self): """Report disclaimer retrieval.""" - self.to_stdout('[metacafe] Retrieving disclaimer') + self.to_stdout(u'[metacafe] Retrieving disclaimer') def report_age_confirmation(self): """Report attempt to confirm age.""" - self.to_stdout('[metacafe] Confirming age') + self.to_stdout(u'[metacafe] Confirming age') def report_download_webpage(self, video_id): """Report webpage download.""" - self.to_stdout('[metacafe] %s: Downloading webpage' % video_id) + self.to_stdout(u'[metacafe] %s: Downloading webpage' % video_id) def report_extraction(self, video_id): """Report information extraction.""" - self.to_stdout('[metacafe] %s: Extracting information' % video_id) + self.to_stdout(u'[metacafe] %s: Extracting information' % video_id) def _real_initialize(self): # Retrieve disclaimer @@ -596,7 +599,7 @@ class MetacafeIE(InfoExtractor): self.report_disclaimer() disclaimer = urllib2.urlopen(request).read() except (urllib2.URLError, httplib.HTTPException, socket.error), err: - self.to_stderr('ERROR: unable to retrieve disclaimer: %s' % str(err)) + self.to_stderr(u'ERROR: unable to retrieve disclaimer: %s' % str(err)) return # Confirm age @@ -609,14 +612,14 @@ class MetacafeIE(InfoExtractor): self.report_age_confirmation() disclaimer = urllib2.urlopen(request).read() except (urllib2.URLError, httplib.HTTPException, socket.error), err: - self.to_stderr('ERROR: unable to confirm age: %s' % str(err)) + self.to_stderr(u'ERROR: unable to confirm age: %s' % str(err)) return def _real_extract(self, url): # Extract id and simplified title from URL mobj = re.match(self._VALID_URL, url) if mobj is None: - self.to_stderr('ERROR: invalid URL: %s' % url) + self.to_stderr(u'ERROR: invalid URL: %s' % url) return [None] video_id = mobj.group(1) @@ -635,20 +638,20 @@ class MetacafeIE(InfoExtractor): self.report_download_webpage(video_id) webpage = urllib2.urlopen(request).read() except (urllib2.URLError, httplib.HTTPException, socket.error), err: - self.to_stderr('ERROR: unable retrieve video webpage: %s' % str(err)) + self.to_stderr(u'ERROR: unable retrieve video webpage: %s' % str(err)) return [None] # Extract URL, uploader and title from webpage self.report_extraction(video_id) mobj = re.search(r'(?m)"mediaURL":"(http.*?\.flv)"', webpage) if mobj is None: - self.to_stderr('ERROR: unable to extract media URL') + self.to_stderr(u'ERROR: unable to extract media URL') return [None] mediaURL = mobj.group(1).replace('\\', '') mobj = re.search(r'(?m)"gdaKey":"(.*?)"', webpage) if mobj is None: - self.to_stderr('ERROR: unable to extract gdaKey') + self.to_stderr(u'ERROR: unable to extract gdaKey') return [None] gdaKey = mobj.group(1) @@ -656,24 +659,24 @@ class MetacafeIE(InfoExtractor): mobj = re.search(r'(?im).*?Submitter:
(.*?)', webpage) if mobj is None: - self.to_stderr('ERROR: unable to extract uploader nickname') + self.to_stderr(u'ERROR: unable to extract uploader nickname') return [None] video_uploader = re.sub(r'<.*?>', '', mobj.group(1)) # Return information return [{ - 'id': video_id, - 'url': video_url, - 'uploader': video_uploader, + 'id': video_id.decode('utf-8'), + 'url': video_url.decode('utf-8'), + 'uploader': video_uploader.decode('utf-8'), 'title': video_title, 'stitle': simple_title, - 'ext': video_extension, + 'ext': video_extension.decode('utf-8'), }] class YoutubePlaylistIE(InfoExtractor): @@ -695,7 +698,7 @@ class YoutubePlaylistIE(InfoExtractor): def report_download_page(self, playlist_id, pagenum): """Report attempt to download playlist page with given number.""" - self.to_stdout('[youtube] PL %s: Downloading page #%s' % (playlist_id, pagenum)) + self.to_stdout(u'[youtube] PL %s: Downloading page #%s' % (playlist_id, pagenum)) def _real_initialize(self): self._youtube_ie.initialize() @@ -704,7 +707,7 @@ class YoutubePlaylistIE(InfoExtractor): # Extract playlist id mobj = re.match(self._VALID_URL, url) if mobj is None: - self.to_stderr('ERROR: invalid url: %s' % url) + self.to_stderr(u'ERROR: invalid url: %s' % url) return [None] # Download playlist pages @@ -718,7 +721,7 @@ class YoutubePlaylistIE(InfoExtractor): try: page = urllib2.urlopen(request).read() except (urllib2.URLError, httplib.HTTPException, socket.error), err: - self.to_stderr('ERROR: unable to download webpage: %s' % str(err)) + self.to_stderr(u'ERROR: unable to download webpage: %s' % str(err)) return [None] # Extract video identifiers @@ -791,21 +794,21 @@ if __name__ == '__main__': # Conflicting, missing and erroneous options if len(args) < 1: - sys.exit('ERROR: you must provide at least one URL') + sys.exit(u'ERROR: you must provide at least one URL') if opts.usenetrc and (opts.username is not None or opts.password is not None): - sys.exit('ERROR: using .netrc conflicts with giving username/password') + sys.exit(u'ERROR: using .netrc conflicts with giving username/password') if opts.password is not None and opts.username is None: - sys.exit('ERROR: account username missing') + sys.exit(u'ERROR: account username missing') if opts.outtmpl is not None and (opts.useliteral or opts.usetitle): - sys.exit('ERROR: using output template conflicts with using title or literal title') + sys.exit(u'ERROR: using output template conflicts with using title or literal title') if opts.usetitle and opts.useliteral: - sys.exit('ERROR: using title conflicts with using literal title') + sys.exit(u'ERROR: using title conflicts with using literal title') if opts.username is not None and opts.password is None: - opts.password = getpass.getpass('Type account password and press return:') + opts.password = getpass.getpass(u'Type account password and press return:') if opts.ratelimit is not None: numeric_limit = FileDownloader.parse_bytes(opts.ratelimit) if numeric_limit is None: - sys.exit('ERROR: invalid rate limit specified') + sys.exit(u'ERROR: invalid rate limit specified') opts.ratelimit = numeric_limit # Information extractors @@ -823,10 +826,10 @@ if __name__ == '__main__': 'forcetitle': opts.gettitle, 'simulate': (opts.simulate or opts.geturl or opts.gettitle), 'format': opts.format, - 'outtmpl': ((opts.outtmpl is not None and opts.outtmpl) - or (opts.usetitle and '%(stitle)s-%(id)s.%(ext)s') - or (opts.useliteral and '%(title)s-%(id)s.%(ext)s') - or '%(id)s.%(ext)s'), + 'outtmpl': ((opts.outtmpl is not None and opts.outtmpl.decode()) + or (opts.usetitle and u'%(stitle)s-%(id)s.%(ext)s') + or (opts.useliteral and u'%(title)s-%(id)s.%(ext)s') + or u'%(id)s.%(ext)s'), 'ignoreerrors': opts.ignoreerrors, 'ratelimit': opts.ratelimit, }) @@ -839,6 +842,6 @@ if __name__ == '__main__': except DownloadError: sys.exit(1) except SameFileError: - sys.exit('ERROR: fixed output name but more than one file to download') + sys.exit(u'ERROR: fixed output name but more than one file to download') except KeyboardInterrupt: - sys.exit('\nERROR: Interrupted by user') + sys.exit(u'\nERROR: Interrupted by user') From 97accc0ecea6f6c14ec4151e8dd403c33e0ee980 Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Fri, 25 Jul 2008 14:00:02 +0200 Subject: [PATCH 035/455] Simplify a statement --- youtube-dl | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/youtube-dl b/youtube-dl index f89c544cc..7527a4704 100755 --- a/youtube-dl +++ b/youtube-dl @@ -179,10 +179,7 @@ class FileDownloader(object): def to_stdout(self, message, skip_eol=False): """Print message to stdout if not in quiet mode.""" if not self._params.get('quiet', False): - if skip_eol: - print message, - else: - print message + print u'%s%s' % (message, [u'\n', u''][skip_eol]), sys.stdout.flush() def to_stderr(self, message): From f807dc157e1215c3dbaf82333f6e86a8f98ea76b Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Fri, 25 Jul 2008 23:42:02 +0200 Subject: [PATCH 036/455] Update webpage to reflect changes --- index.html.in | 65 ++++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 52 insertions(+), 13 deletions(-) diff --git a/index.html.in b/index.html.in index 999c8e9bc..40f8fd326 100644 --- a/index.html.in +++ b/index.html.in @@ -31,7 +31,7 @@ li { margin-bottom: 0.5ex; } - #copyright { + .smallnote { font-size: x-small; text-align: center; } @@ -40,15 +40,16 @@

youtube-dl: Download videos from YouTube.com

+

(and more...)

+

What is it?

youtube-dl is a small command-line program to download videos from YouTube.com. It requires the Python interpreter, version 2.4 or later, and it's not platform specific. It should work in your Unix box, in Windows or in Mac OS X. The latest version -is @PROGRAM_VERSION@. It's licensed under the MIT License, which -means you can modify it, redistribute it or use it however you like -complying with a few simple conditions.

+is @PROGRAM_VERSION@. It's released to the public domain, +which means you can modify it, redistribute it or use it however you like.

I'll try to keep it updated if YouTube.com changes the way you access their videos. After all, it's a simple and short program. However, I can't @@ -62,11 +63,6 @@ at freshmeat.net.

Thanks for all the feedback received so far. I'm glad people find my program useful.

-

Related projects: -metacafe-dl -pornotube-dl -

-

Usage instructions

In Windows, once you have installed the Python interpreter, save the @@ -99,7 +95,8 @@ is too old.

  • You can change the file name of the video using the -o option, like in -youtube-dl -o vid.flv "http://www.youtube.com/watch?v=foobar".
  • +youtube-dl -o vid.flv "http://www.youtube.com/watch?v=foobar". +Read the Output template section for more details on this.
  • Some videos require an account to be downloaded, mostly because they're flagged as mature content. You can pass the program a username and password @@ -122,8 +119,8 @@ in order to pipe it to another program without interferences.
  • The program can be told to simply print the final video URL to standard output using the -g or --get-url option.
  • -
  • Combined with the above option, the -2 or --title-too option tells the -program to print the video title too.
  • +
  • In a similar line, the -e or --get-title option tells the program to print +the video title.
  • The default filename is video_id.flv. But you can also use the video title in the filename with the -t or --title option, or preserve the @@ -136,6 +133,12 @@ quality versions of the videos when available.
  • youtube-dl can attempt to download the best quality version of a video by using the -b or --best-quality option.
  • +
  • youtube-dl can attempt to download the mobile quality version of +a video by using the -m or --mobile-version option.
  • + +
  • Normally, the program will stop on the first error, but you can tell it +to attempt to download every video with the -i or --ignore-errors option.
  • +
  • youtube-dl honors the http_proxy environment variable if you want to use a proxy. Set it to something like http://proxy.example.com:8080, and do not leave the http:// @@ -168,6 +171,42 @@ are using.

  • SHA256: @PROGRAM_SHA256SUM@
- +

Output template

+ +

The -o option allows users to indicate a template for the output file names. +The basic usage is not to set any template arguments when downloading a single +file, like in youtube-dl -o funny_video.flv 'http://some/video'. +However, it may contain special sequences that will be replaced when +downloading each video. The special sequences have the format +%(NAME)s. To clarify, that's a percent symbol followed by a +name in parenthesis, followed by a lowercase S. Allowed names are:

+ +
    +
  • id: The sequence will be replaced by the video identifier.
  • +
  • url: The sequence will be replaced by the video URL.
  • +
  • uploader: The sequence will be replaced by the nickname of the +person who uploaded the video.
  • +
  • title: The sequence will be replaced by the literal video +title.
  • +
  • stitle: The sequence will be replaced by a simplified video +title.
  • +
  • ext: The sequence will be replaced by the appropriate +extension.
  • +
+ +

As you may have guessed, the default template is %(id)s.%(ext)s. +When some command line options are used, it's replaced by other templates like +%(title)s-%(id)s.%(ext)s. You can specify your own.

+ +

Authors

+ +
    +
  • Ricardo Garcia Gonzalez: program core, YouTube.com InfoExtractor, +metacafe.com InfoExtractor and YouTube playlist InfoExtractor.
  • +
  • Many other people contributing patches, code, ideas and kind messages. Too +many to be listed here. You know who you are. Thank you very much.
  • +
+ +

Copyright © 2006-2007 Ricardo Garcia Gonzalez

From b1a1f8ea8f40ed26b7071eda139e31493f6f7972 Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Fri, 25 Jul 2008 23:43:00 +0200 Subject: [PATCH 037/455] Improve error message regarding output templates and charsets --- youtube-dl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube-dl b/youtube-dl index 7527a4704..48f5d2bf1 100755 --- a/youtube-dl +++ b/youtube-dl @@ -267,7 +267,7 @@ class FileDownloader(object): filename = self._params['outtmpl'] % result self.report_destination(filename) except (ValueError, KeyError), err: - retcode = self.trouble('ERROR: invalid output template: %s' % str(err)) + retcode = self.trouble('ERROR: invalid output template or system charset: %s' % str(err)) continue try: self.pmkdir(filename) From a7d06f400c36837923c3b0db5fbfc8d90d4a9fd7 Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Fri, 25 Jul 2008 23:43:45 +0200 Subject: [PATCH 038/455] Increase version number --- youtube-dl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube-dl b/youtube-dl index 48f5d2bf1..b80d71f88 100755 --- a/youtube-dl +++ b/youtube-dl @@ -750,7 +750,7 @@ if __name__ == '__main__': # Parse command line parser = optparse.OptionParser( usage='Usage: %prog [options] url...', - version='2008.07.22', + version='2008.07.26', conflict_handler='resolve', ) parser.add_option('-h', '--help', From 535267857601e4962d57108d5c7a755166a4ecff Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Sat, 26 Jul 2008 02:07:03 +0200 Subject: [PATCH 039/455] Improve a couple of detection strings to avoid i18n-related bugs --- youtube-dl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/youtube-dl b/youtube-dl index b80d71f88..3f20da590 100755 --- a/youtube-dl +++ b/youtube-dl @@ -529,7 +529,7 @@ class YoutubeIE(InfoExtractor): self.report_video_url(video_id, video_real_url) # uploader - mobj = re.search(r'More From: ([^<]*)<', video_webpage) + mobj = re.search(r"var watchUsername = '([^']+)';", video_webpage) if mobj is None: self.to_stderr(u'ERROR: unable to extract uploader nickname') return [None] @@ -682,7 +682,7 @@ class YoutubePlaylistIE(InfoExtractor): _VALID_URL = r'(?:http://)?(?:\w+\.)?youtube.com/view_play_list\?p=(.+)' _TEMPLATE_URL = 'http://www.youtube.com/view_play_list?p=%s&page=%s' _VIDEO_INDICATOR = r'/watch\?v=(.+?)&' - _MORE_PAGES_INDICATOR = r'class="pagerNotCurrent">Next' + _MORE_PAGES_INDICATOR = r'/view_play_list?p=%s&page=%s' _youtube_ie = None def __init__(self, youtube_ie, downloader=None): @@ -727,7 +727,7 @@ class YoutubePlaylistIE(InfoExtractor): ids_in_page.add(mobj.group(1)) video_ids.extend(list(ids_in_page)) - if self._MORE_PAGES_INDICATOR not in page: + if (self._MORE_PAGES_INDICATOR % (playlist_id, pagenum + 1)) not in page: break pagenum = pagenum + 1 From 65cd34c5d735f968b03c6e618db4e177ed80f1ac Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Sun, 27 Jul 2008 12:13:49 +0200 Subject: [PATCH 040/455] Add initial version of postprocessing framework --- youtube-dl | 86 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 86 insertions(+) diff --git a/youtube-dl b/youtube-dl index 3f20da590..7690898b2 100755 --- a/youtube-dl +++ b/youtube-dl @@ -42,6 +42,14 @@ class SameFileError(Exception): """ pass +class PostProcessingError(Exception): + """Post Processing exception. + + This exception may be raised by PostProcessor's .run() method to + indicate an error in the postprocessing task. + """ + pass + class FileDownloader(object): """File Downloader class. @@ -83,10 +91,12 @@ class FileDownloader(object): _params = None _ies = [] + _pps = [] def __init__(self, params): """Create a FileDownloader object with the given options.""" self._ies = [] + self._pps = [] self.set_params(params) @staticmethod @@ -176,6 +186,11 @@ class FileDownloader(object): self._ies.append(ie) ie.set_downloader(self) + def add_post_processor(self, pp): + """Add a PostProcessor object to the end of the chain.""" + self._pps.append(pp) + pp.set_downloader(self) + def to_stdout(self, message, skip_eol=False): """Print message to stdout if not in quiet mode.""" if not self._params.get('quiet', False): @@ -288,11 +303,26 @@ class FileDownloader(object): except (urllib2.URLError, httplib.HTTPException, socket.error), err: retcode = self.trouble('ERROR: unable to download video data: %s' % str(err)) continue + try: + self.post_process(filename, result) + except (PostProcessingError), err: + retcode = self.trouble('ERROR: postprocessing: %s' % str(err)) + continue + break if not suitable_found: retcode = self.trouble('ERROR: no suitable InfoExtractor: %s' % url) return retcode + + def post_process(self, filename, ie_info): + """Run the postprocessing chain on the given file.""" + info = dict(ie_info) + info['filepath'] = filename + for pp in self._pps: + info = pp.run(info) + if info is None: + break def _do_download(self, stream, url): request = urllib2.Request(url, None, std_headers) @@ -736,6 +766,62 @@ class YoutubePlaylistIE(InfoExtractor): information.extend(self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)) return information +class PostProcessor(object): + """Post Processor class. + + PostProcessor objects can be added to downloaders with their + add_post_processor() method. When the downloader has finished a + successful download, it will take its internal chain of PostProcessors + and start calling the run() method on each one of them, first with + an initial argument and then with the returned value of the previous + PostProcessor. + + The chain will be stopped if one of them ever returns None or the end + of the chain is reached. + + PostProcessor objects follow a "mutual registration" process similar + to InfoExtractor objects. + """ + + _downloader = None + + def __init__(self, downloader=None): + self._downloader = downloader + + def to_stdout(self, message): + """Print message to stdout if downloader is not in quiet mode.""" + if self._downloader is None or not self._downloader.get_params().get('quiet', False): + print message + + def to_stderr(self, message): + """Print message to stderr.""" + print >>sys.stderr, message + + def set_downloader(self, downloader): + """Sets the downloader for this PP.""" + self._downloader = downloader + + def run(self, information): + """Run the PostProcessor. + + The "information" argument is a dictionary like the ones + returned by InfoExtractors. The only difference is that this + one has an extra field called "filepath" that points to the + downloaded file. + + When this method returns None, the postprocessing chain is + stopped. However, this method may return an information + dictionary that will be passed to the next postprocessing + object in the chain. It can be the one it received after + changing some fields. + + In addition, this method may raise a PostProcessingError + exception that will be taken into account by the downloader + it was called from. + """ + return information # by default, do nothing + +### MAIN PROGRAM ### if __name__ == '__main__': try: # Modules needed only when running the main program From 0ddf38df1877158fff2d374f5791f8c6db64e98b Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Sat, 9 Aug 2008 09:43:25 +0200 Subject: [PATCH 041/455] Bump version string --- youtube-dl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube-dl b/youtube-dl index 7690898b2..ba534e10b 100755 --- a/youtube-dl +++ b/youtube-dl @@ -836,7 +836,7 @@ if __name__ == '__main__': # Parse command line parser = optparse.OptionParser( usage='Usage: %prog [options] url...', - version='2008.07.26', + version='2008.08.09', conflict_handler='resolve', ) parser.add_option('-h', '--help', From 2546e7679f26147fce3ddb3fc49e869a03ec0a2e Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Sat, 13 Sep 2008 13:23:24 +0200 Subject: [PATCH 042/455] Fix metacafe.com and UTF8 output filenames --- youtube-dl | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/youtube-dl b/youtube-dl index ba534e10b..ecc99b7c1 100755 --- a/youtube-dl +++ b/youtube-dl @@ -4,6 +4,7 @@ # License: Public domain code import htmlentitydefs import httplib +import locale import math import netrc import os @@ -592,7 +593,7 @@ class MetacafeIE(InfoExtractor): """Information Extractor for metacafe.com.""" _VALID_URL = r'(?:http://)?(?:www\.)?metacafe\.com/watch/([^/]+)/([^/]+)/.*' - _DISCLAIMER = 'http://www.metacafe.com/disclaimer' + _DISCLAIMER = 'http://www.metacafe.com/family_filter/' _youtube_ie = None def __init__(self, youtube_ie, downloader=None): @@ -631,10 +632,10 @@ class MetacafeIE(InfoExtractor): # Confirm age disclaimer_form = { - 'allowAdultContent': '1', + 'filters': '0', 'submit': "Continue - I'm over 18", } - request = urllib2.Request('http://www.metacafe.com/watch/', urllib.urlencode(disclaimer_form), std_headers) + request = urllib2.Request('http://www.metacafe.com/', urllib.urlencode(disclaimer_form), std_headers) try: self.report_age_confirmation() disclaimer = urllib2.urlopen(request).read() @@ -684,7 +685,7 @@ class MetacafeIE(InfoExtractor): video_url = '%s?__gda__=%s' % (mediaURL, gdaKey) - mobj = re.search(r'(?im)(.*) - Video', webpage) if mobj is None: self.to_stderr(u'ERROR: unable to extract title') return [None] @@ -909,7 +910,7 @@ if __name__ == '__main__': 'forcetitle': opts.gettitle, 'simulate': (opts.simulate or opts.geturl or opts.gettitle), 'format': opts.format, - 'outtmpl': ((opts.outtmpl is not None and opts.outtmpl.decode()) + 'outtmpl': ((opts.outtmpl is not None and opts.outtmpl.decode(locale.getdefaultlocale()[1])) or (opts.usetitle and u'%(stitle)s-%(id)s.%(ext)s') or (opts.useliteral and u'%(title)s-%(id)s.%(ext)s') or u'%(id)s.%(ext)s'), From d853063955454d27721cade9f75db4d8945dd78f Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Sat, 20 Sep 2008 11:17:04 +0200 Subject: [PATCH 043/455] Bump version number --- youtube-dl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube-dl b/youtube-dl index ecc99b7c1..a19571c5b 100755 --- a/youtube-dl +++ b/youtube-dl @@ -837,7 +837,7 @@ if __name__ == '__main__': # Parse command line parser = optparse.OptionParser( usage='Usage: %prog [options] url...', - version='2008.08.09', + version='2008.09.20', conflict_handler='resolve', ) parser.add_option('-h', '--help', From 1e9daf2a487646f55a95eaa119789cf28443fd69 Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Thu, 16 Oct 2008 23:33:19 +0200 Subject: [PATCH 044/455] Make the YouTube login mechanism work across countries --- youtube-dl | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/youtube-dl b/youtube-dl index a19571c5b..c58cb3cfd 100755 --- a/youtube-dl +++ b/youtube-dl @@ -431,8 +431,8 @@ class YoutubeIE(InfoExtractor): """Information extractor for youtube.com.""" _VALID_URL = r'^((?:http://)?(?:\w+\.)?youtube\.com/(?:(?:v/)|(?:(?:watch(?:\.php)?)?\?(?:.+&)?v=)))?([0-9A-Za-z_-]+)(?(1).+)?$' - _LOGIN_URL = 'http://www.youtube.com/login?next=/' - _AGE_URL = 'http://www.youtube.com/verify_age?next_url=/' + _LOGIN_URL = 'http://uk.youtube.com/login?next=/' + _AGE_URL = 'http://uk.youtube.com/verify_age?next_url=/' _NETRC_MACHINE = 'youtube' @staticmethod @@ -537,7 +537,7 @@ class YoutubeIE(InfoExtractor): video_extension = {'18': 'mp4', '17': '3gp'}.get(format_param, 'flv') # Normalize URL, including format - normalized_url = 'http://www.youtube.com/watch?v=%s' % video_id + normalized_url = 'http://uk.youtube.com/watch?v=%s' % video_id if format_param is not None: normalized_url = '%s&fmt=%s' % (normalized_url, format_param) request = urllib2.Request(normalized_url, None, std_headers) @@ -554,7 +554,7 @@ class YoutubeIE(InfoExtractor): if mobj is None: self.to_stderr(u'ERROR: unable to extract "t" parameter') return [None] - video_real_url = 'http://www.youtube.com/get_video?video_id=%s&t=%s' % (video_id, mobj.group(1)) + video_real_url = 'http://uk.youtube.com/get_video?video_id=%s&t=%s' % (video_id, mobj.group(1)) if format_param is not None: video_real_url = '%s&fmt=%s' % (video_real_url, format_param) self.report_video_url(video_id, video_real_url) @@ -655,7 +655,7 @@ class MetacafeIE(InfoExtractor): # Check if video comes from YouTube mobj2 = re.match(r'^yt-(.*)$', video_id) if mobj2 is not None: - return self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % mobj2.group(1)) + return self._youtube_ie.extract('http://uk.youtube.com/watch?v=%s' % mobj2.group(1)) simple_title = mobj.group(2).decode('utf-8') video_extension = 'flv' @@ -711,7 +711,7 @@ class YoutubePlaylistIE(InfoExtractor): """Information Extractor for YouTube playlists.""" _VALID_URL = r'(?:http://)?(?:\w+\.)?youtube.com/view_play_list\?p=(.+)' - _TEMPLATE_URL = 'http://www.youtube.com/view_play_list?p=%s&page=%s' + _TEMPLATE_URL = 'http://uk.youtube.com/view_play_list?p=%s&page=%s' _VIDEO_INDICATOR = r'/watch\?v=(.+?)&' _MORE_PAGES_INDICATOR = r'/view_play_list?p=%s&page=%s' _youtube_ie = None @@ -764,7 +764,7 @@ class YoutubePlaylistIE(InfoExtractor): information = [] for id in video_ids: - information.extend(self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)) + information.extend(self._youtube_ie.extract('http://uk.youtube.com/watch?v=%s' % id)) return information class PostProcessor(object): From 9ca4851a00469aef46c68823050ad70d8f9c75a0 Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Thu, 16 Oct 2008 23:34:04 +0200 Subject: [PATCH 045/455] Bump version number --- youtube-dl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube-dl b/youtube-dl index c58cb3cfd..d8e14f3ee 100755 --- a/youtube-dl +++ b/youtube-dl @@ -837,7 +837,7 @@ if __name__ == '__main__': # Parse command line parser = optparse.OptionParser( usage='Usage: %prog [options] url...', - version='2008.09.20', + version='2008.10.16', conflict_handler='resolve', ) parser.add_option('-h', '--help', From 5487aea5d8f3b39257bd7e25a35239bb16b5a52e Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Sat, 1 Nov 2008 15:50:50 +0100 Subject: [PATCH 046/455] Improve documentation --- index.html.in | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/index.html.in b/index.html.in index 40f8fd326..c8c86a13d 100644 --- a/index.html.in +++ b/index.html.in @@ -96,7 +96,8 @@ is too old.

  • You can change the file name of the video using the -o option, like in youtube-dl -o vid.flv "http://www.youtube.com/watch?v=foobar". -Read the Output template section for more details on this.
  • +Read the Output template section for more details on +this.
  • Some videos require an account to be downloaded, mostly because they're flagged as mature content. You can pass the program a username and password @@ -130,11 +131,9 @@ literal title in the filename with the -l or --literal option.
  • by using the -f or --format option. This makes it possible to download high quality versions of the videos when available. -
  • youtube-dl can attempt to download the best quality version of -a video by using the -b or --best-quality option.
  • +
  • The -b or --best-quality option is an alias for -f 18.
  • -
  • youtube-dl can attempt to download the mobile quality version of -a video by using the -m or --mobile-version option.
  • +
  • The -m or --mobile-version option is an alias for -f 17.
  • Normally, the program will stop on the first error, but you can tell it to attempt to download every video with the -i or --ignore-errors option.
  • @@ -156,7 +155,7 @@ That's ok. -

    Download it

    +

    Download it

    Note that if you directly click on these hyperlinks, your web browser will most likely display the program contents. It's usually better to @@ -189,9 +188,9 @@ person who uploaded the video.

  • title: The sequence will be replaced by the literal video title.
  • stitle: The sequence will be replaced by a simplified video -title.
  • +title, restricted to alphanumeric characters and dashes.
  • ext: The sequence will be replaced by the appropriate -extension.
  • +extension (like flv or mp4).

    As you may have guessed, the default template is %(id)s.%(ext)s. From 27d98b6e25fa76c14ad813bb70e2f53d0fd36ef1 Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Sat, 1 Nov 2008 15:52:51 +0100 Subject: [PATCH 047/455] Fix TypeError in decode() method and unordered playlist URLs --- youtube-dl | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/youtube-dl b/youtube-dl index d8e14f3ee..e5cf43411 100755 --- a/youtube-dl +++ b/youtube-dl @@ -753,10 +753,11 @@ class YoutubePlaylistIE(InfoExtractor): return [None] # Extract video identifiers - ids_in_page = set() + ids_in_page = [] for mobj in re.finditer(self._VIDEO_INDICATOR, page): - ids_in_page.add(mobj.group(1)) - video_ids.extend(list(ids_in_page)) + if mobj.group(1) not in ids_in_page: + ids_in_page.append(mobj.group(1)) + video_ids.extend(ids_in_page) if (self._MORE_PAGES_INDICATOR % (playlist_id, pagenum + 1)) not in page: break @@ -901,6 +902,9 @@ if __name__ == '__main__': youtube_pl_ie = YoutubePlaylistIE(youtube_ie) # File downloader + charset = locale.getdefaultlocale()[1] + if charset is None: + charset = 'ascii' fd = FileDownloader({ 'usenetrc': opts.usenetrc, 'username': opts.username, @@ -910,7 +914,7 @@ if __name__ == '__main__': 'forcetitle': opts.gettitle, 'simulate': (opts.simulate or opts.geturl or opts.gettitle), 'format': opts.format, - 'outtmpl': ((opts.outtmpl is not None and opts.outtmpl.decode(locale.getdefaultlocale()[1])) + 'outtmpl': ((opts.outtmpl is not None and opts.outtmpl.decode(charset)) or (opts.usetitle and u'%(stitle)s-%(id)s.%(ext)s') or (opts.useliteral and u'%(title)s-%(id)s.%(ext)s') or u'%(id)s.%(ext)s'), From 240b737ebd2356f9d117ccd0c14349d4ec88e90e Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Sat, 1 Nov 2008 15:56:46 +0100 Subject: [PATCH 048/455] Bump version number --- youtube-dl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube-dl b/youtube-dl index e5cf43411..e3c324f1f 100755 --- a/youtube-dl +++ b/youtube-dl @@ -838,7 +838,7 @@ if __name__ == '__main__': # Parse command line parser = optparse.OptionParser( usage='Usage: %prog [options] url...', - version='2008.10.16', + version='2008.11.01', conflict_handler='resolve', ) parser.add_option('-h', '--help', From 72ac78b8b0d33092ff531077fe5c2ef7f7422df5 Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Sat, 31 Jan 2009 10:12:22 +0100 Subject: [PATCH 049/455] Fix for YouTube internationalization changes --- youtube-dl | 28 +++++++++++++++++++++------- 1 file changed, 21 insertions(+), 7 deletions(-) diff --git a/youtube-dl b/youtube-dl index e3c324f1f..2aa069c58 100755 --- a/youtube-dl +++ b/youtube-dl @@ -431,14 +431,19 @@ class YoutubeIE(InfoExtractor): """Information extractor for youtube.com.""" _VALID_URL = r'^((?:http://)?(?:\w+\.)?youtube\.com/(?:(?:v/)|(?:(?:watch(?:\.php)?)?\?(?:.+&)?v=)))?([0-9A-Za-z_-]+)(?(1).+)?$' - _LOGIN_URL = 'http://uk.youtube.com/login?next=/' - _AGE_URL = 'http://uk.youtube.com/verify_age?next_url=/' + _LANG_URL = r'http://uk.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1' + _LOGIN_URL = 'http://www.youtube.com/signup?next=/&gl=US&hl=en' + _AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en' _NETRC_MACHINE = 'youtube' @staticmethod def suitable(url): return (re.match(YoutubeIE._VALID_URL, url) is not None) + def report_lang(self): + """Report attempt to set language.""" + self.to_stdout(u'[youtube] Setting language') + def report_login(self): """Report attempt to log in.""" self.to_stdout(u'[youtube] Logging in') @@ -487,6 +492,15 @@ class YoutubeIE(InfoExtractor): if username is None: return + # Set language + request = urllib2.Request(self._LOGIN_URL, None, std_headers) + try: + self.report_lang() + urllib2.urlopen(request).read() + except (urllib2.URLError, httplib.HTTPException, socket.error), err: + self.to_stderr(u'WARNING: unable to set language: %s' % str(err)) + return + # Log in login_form = { 'current_form': 'loginForm', @@ -537,7 +551,7 @@ class YoutubeIE(InfoExtractor): video_extension = {'18': 'mp4', '17': '3gp'}.get(format_param, 'flv') # Normalize URL, including format - normalized_url = 'http://uk.youtube.com/watch?v=%s' % video_id + normalized_url = 'http://www.youtube.com/watch?v=%s&gl=US&hl=en' % video_id if format_param is not None: normalized_url = '%s&fmt=%s' % (normalized_url, format_param) request = urllib2.Request(normalized_url, None, std_headers) @@ -554,7 +568,7 @@ class YoutubeIE(InfoExtractor): if mobj is None: self.to_stderr(u'ERROR: unable to extract "t" parameter') return [None] - video_real_url = 'http://uk.youtube.com/get_video?video_id=%s&t=%s' % (video_id, mobj.group(1)) + video_real_url = 'http://www.youtube.com/get_video?video_id=%s&t=%s' % (video_id, mobj.group(1)) if format_param is not None: video_real_url = '%s&fmt=%s' % (video_real_url, format_param) self.report_video_url(video_id, video_real_url) @@ -655,7 +669,7 @@ class MetacafeIE(InfoExtractor): # Check if video comes from YouTube mobj2 = re.match(r'^yt-(.*)$', video_id) if mobj2 is not None: - return self._youtube_ie.extract('http://uk.youtube.com/watch?v=%s' % mobj2.group(1)) + return self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % mobj2.group(1)) simple_title = mobj.group(2).decode('utf-8') video_extension = 'flv' @@ -711,7 +725,7 @@ class YoutubePlaylistIE(InfoExtractor): """Information Extractor for YouTube playlists.""" _VALID_URL = r'(?:http://)?(?:\w+\.)?youtube.com/view_play_list\?p=(.+)' - _TEMPLATE_URL = 'http://uk.youtube.com/view_play_list?p=%s&page=%s' + _TEMPLATE_URL = 'http://www.youtube.com/view_play_list?p=%s&page=%s&gl=US&hl=en' _VIDEO_INDICATOR = r'/watch\?v=(.+?)&' _MORE_PAGES_INDICATOR = r'/view_play_list?p=%s&page=%s' _youtube_ie = None @@ -765,7 +779,7 @@ class YoutubePlaylistIE(InfoExtractor): information = [] for id in video_ids: - information.extend(self._youtube_ie.extract('http://uk.youtube.com/watch?v=%s' % id)) + information.extend(self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)) return information class PostProcessor(object): From c6fd0bb80680d09eca54a3a428a47e3205d0180f Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Sat, 31 Jan 2009 10:25:59 +0100 Subject: [PATCH 050/455] Add -a (--batch-file) option --- youtube-dl | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/youtube-dl b/youtube-dl index 2aa069c58..1894317b0 100755 --- a/youtube-dl +++ b/youtube-dl @@ -889,10 +889,20 @@ if __name__ == '__main__': action='store_true', dest='ignoreerrors', help='continue on download errors', default=False) parser.add_option('-r', '--rate-limit', dest='ratelimit', metavar='L', help='download rate limit (e.g. 50k or 44.6m)') + parser.add_option('-a', '--batch-file', + dest='batchfile', metavar='F', help='file containing URLs to download') (opts, args) = parser.parse_args() + # Batch file verification + if opts.batchfile is not None: + try: + batchurls = [line.strip() for line in open(opts.batchfile, 'r')] + except IOError: + sys.exit(u'ERROR: batch file could not be read') + all_urls = batchurls + args + # Conflicting, missing and erroneous options - if len(args) < 1: + if len(all_urls) < 1: sys.exit(u'ERROR: you must provide at least one URL') if opts.usenetrc and (opts.username is not None or opts.password is not None): sys.exit(u'ERROR: using .netrc conflicts with giving username/password') @@ -938,7 +948,7 @@ if __name__ == '__main__': fd.add_info_extractor(youtube_pl_ie) fd.add_info_extractor(metacafe_ie) fd.add_info_extractor(youtube_ie) - retcode = fd.download(args) + retcode = fd.download(all_urls) sys.exit(retcode) except DownloadError: From ba72f8a5d1fa1d788c020aaace075614df3f523d Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Sat, 31 Jan 2009 10:46:21 +0100 Subject: [PATCH 051/455] Bump version and increase Firefox version number --- youtube-dl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube-dl b/youtube-dl index 1894317b0..271952af8 100755 --- a/youtube-dl +++ b/youtube-dl @@ -18,7 +18,7 @@ import urllib import urllib2 std_headers = { - 'User-Agent': 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.0.1) Gecko/2008070208 Firefox/3.0.1', + 'User-Agent': 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.0.5) Gecko/2008120122 Firefox/3.0.5', 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7', 'Accept': 'text/xml,application/xml,application/xhtml+xml,text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5', 'Accept-Language': 'en-us,en;q=0.5', @@ -852,7 +852,7 @@ if __name__ == '__main__': # Parse command line parser = optparse.OptionParser( usage='Usage: %prog [options] url...', - version='2008.11.01', + version='2009.01.31', conflict_handler='resolve', ) parser.add_option('-h', '--help', From eb0d2909a85dcdaf17635ae15bf61af385a92e2e Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Sat, 31 Jan 2009 12:05:25 +0100 Subject: [PATCH 052/455] Document new -a option --- index.html.in | 3 +++ 1 file changed, 3 insertions(+) diff --git a/index.html.in b/index.html.in index c8c86a13d..c64361152 100644 --- a/index.html.in +++ b/index.html.in @@ -138,6 +138,9 @@ quality versions of the videos when available.

  • Normally, the program will stop on the first error, but you can tell it to attempt to download every video with the -i or --ignore-errors option.
  • +
  • The -a or --batch-file option lets you specify a file to read URLs from. +The file must contain one URL per line.
  • +
  • youtube-dl honors the http_proxy environment variable if you want to use a proxy. Set it to something like http://proxy.example.com:8080, and do not leave the http:// From d1580ed9908f10d6eb550e1ce8c31d3bc6921570 Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Sat, 31 Jan 2009 12:07:37 +0100 Subject: [PATCH 053/455] Fix NameError --- youtube-dl | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube-dl b/youtube-dl index 271952af8..1ad297c26 100755 --- a/youtube-dl +++ b/youtube-dl @@ -894,6 +894,7 @@ if __name__ == '__main__': (opts, args) = parser.parse_args() # Batch file verification + batchurls = [] if opts.batchfile is not None: try: batchurls = [line.strip() for line in open(opts.batchfile, 'r')] From 25af2bce3ad6a17741388ffa10ae269770c1cfa3 Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Mon, 2 Feb 2009 19:59:48 +0100 Subject: [PATCH 054/455] Include Danny Colligan's YouTube search InfoExtractor --- youtube-dl | 86 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 86 insertions(+) diff --git a/youtube-dl b/youtube-dl index 1ad297c26..29afd930f 100755 --- a/youtube-dl +++ b/youtube-dl @@ -721,6 +721,90 @@ class MetacafeIE(InfoExtractor): 'ext': video_extension.decode('utf-8'), }] + +class YoutubeSearchIE(InfoExtractor): + """Information Extractor for YouTube search queries.""" + _VALID_QUERY = r'ytsearch(\d+|all)?:[\s\S]+' + _TEMPLATE_URL = 'http://www.youtube.com/results?search_query=%s&page=%s&gl=US&hl=en' + _VIDEO_INDICATOR = r'href="/watch\?v=.+?"' + _MORE_PAGES_INDICATOR = r'>Next' + _youtube_ie = None + + def __init__(self, youtube_ie, downloader=None): + InfoExtractor.__init__(self, downloader) + self._youtube_ie = youtube_ie + + @staticmethod + def suitable(url): + return (re.match(YoutubeSearchIE._VALID_QUERY, url) is not None) + + def report_download_page(self, query, pagenum): + """Report attempt to download playlist page with given number.""" + self.to_stdout(u'[youtube] query "%s": Downloading page %s' % (query, pagenum)) + + def _real_initialize(self): + self._youtube_ie.initialize() + + def _real_extract(self, query): + mobj = re.match(self._VALID_QUERY, query) + if mobj is None: + self.to_stderr(u'ERROR: invalid search query "%s"' % query) + return [None] + + prefix, query = query.split(':') + prefix = prefix[8:] + if prefix == '': + return self._download_n_results(query, 1) + elif prefix == 'all': + return self._download_n_results(query, -1) + else: + try: + n = int(prefix) + if n <= 0: + self.to_stderr(u'ERROR: invalid download number %s for query "%s"' % (n, query)) + return [None] + return self._download_n_results(query, n) + except ValueError: # parsing prefix as int fails + return self._download_n_results(query, 1) + + def _download_n_results(self, query, n): + """Downloads a specified number of results for a query""" + + video_ids = [] + already_seen = set() + pagenum = 1 + + while True: + self.report_download_page(query, pagenum) + result_url = self._TEMPLATE_URL % (urllib.quote(query.replace(' ', '+')), pagenum) + request = urllib2.Request(result_url, None, std_headers) + try: + page = urllib2.urlopen(request).read() + except (urllib2.URLError, httplib.HTTPException, socket.error), err: + self.to_stderr(u'ERROR: unable to download webpage: %s' % str(err)) + return [None] + + # Extract video identifiers + for mobj in re.finditer(self._VIDEO_INDICATOR, page): + video_id = page[mobj.span()[0]:mobj.span()[1]].split('=')[2][:-1] + if video_id not in already_seen: + video_ids.append(video_id) + already_seen.add(video_id) + if len(video_ids) == n: + # Specified n videos reached + information = [] + for id in video_ids: + information.extend(self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)) + return information + + if self._MORE_PAGES_INDICATOR not in page: + information = [] + for id in video_ids: + information.extend(self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)) + return information + + pagenum = pagenum + 1 + class YoutubePlaylistIE(InfoExtractor): """Information Extractor for YouTube playlists.""" @@ -925,6 +1009,7 @@ if __name__ == '__main__': youtube_ie = YoutubeIE() metacafe_ie = MetacafeIE(youtube_ie) youtube_pl_ie = YoutubePlaylistIE(youtube_ie) + youtube_search_ie = YoutubeSearchIE(youtube_ie) # File downloader charset = locale.getdefaultlocale()[1] @@ -946,6 +1031,7 @@ if __name__ == '__main__': 'ignoreerrors': opts.ignoreerrors, 'ratelimit': opts.ratelimit, }) + fd.add_info_extractor(youtube_search_ie) fd.add_info_extractor(youtube_pl_ie) fd.add_info_extractor(metacafe_ie) fd.add_info_extractor(youtube_ie) From d1536018a8e28ad81eedb168e212530d43e292bc Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Mon, 2 Feb 2009 20:03:07 +0100 Subject: [PATCH 055/455] Include Danny Colligan in credits --- index.html.in | 1 + 1 file changed, 1 insertion(+) diff --git a/index.html.in b/index.html.in index c64361152..38ee9997e 100644 --- a/index.html.in +++ b/index.html.in @@ -205,6 +205,7 @@ When some command line options are used, it's replaced by other templates like
    • Ricardo Garcia Gonzalez: program core, YouTube.com InfoExtractor, metacafe.com InfoExtractor and YouTube playlist InfoExtractor.
    • +
    • Danny Colligan: YouTube search InfoExtractor, ideas and patches.
    • Many other people contributing patches, code, ideas and kind messages. Too many to be listed here. You know who you are. Thank you very much.
    From a20e4c2f9689ab8970d6d918a97d3e4ce598e33b Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Mon, 2 Feb 2009 20:17:52 +0100 Subject: [PATCH 056/455] Improve documentation of new features in webpage --- index.html.in | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/index.html.in b/index.html.in index 38ee9997e..ddcebcdf5 100644 --- a/index.html.in +++ b/index.html.in @@ -141,6 +141,17 @@ to attempt to download every video with the -i or --ignore-errors option.
  • The -a or --batch-file option lets you specify a file to read URLs from. The file must contain one URL per line.
  • +
  • For YouTube, you can also use the URL of a playlist, and it will download +all the videos in that playlist.
  • + +
  • For YouTube, you can also use the special word ytsearch to +download search results. With ytsearch it will download the +first search result. With ytsearchN, where N is a number, it +will download the first N results. With ytsearchall it will +download every result for that search. In most systems you'll need to +use quotes for multiple words. Example: youtube-dl "ytsearch3:cute +kittens". +
  • youtube-dl honors the http_proxy environment variable if you want to use a proxy. Set it to something like http://proxy.example.com:8080, and do not leave the http:// @@ -158,7 +169,7 @@ That's ok.
  • -

    Download it

    +

    Download it

    Note that if you directly click on these hyperlinks, your web browser will most likely display the program contents. It's usually better to @@ -173,7 +184,7 @@ are using.

  • SHA256: @PROGRAM_SHA256SUM@
  • -

    Output template

    +

    Output template

    The -o option allows users to indicate a template for the output file names. The basic usage is not to set any template arguments when downloading a single From a9633f14577db35ce1bed3f8d623a7be8f287e87 Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Mon, 2 Feb 2009 20:29:44 +0100 Subject: [PATCH 057/455] Use quote_plus instead of manually replacing spaces by plus signs --- youtube-dl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube-dl b/youtube-dl index 29afd930f..3e7f13956 100755 --- a/youtube-dl +++ b/youtube-dl @@ -776,7 +776,7 @@ class YoutubeSearchIE(InfoExtractor): while True: self.report_download_page(query, pagenum) - result_url = self._TEMPLATE_URL % (urllib.quote(query.replace(' ', '+')), pagenum) + result_url = self._TEMPLATE_URL % (urllib.quote_plus(query), pagenum) request = urllib2.Request(result_url, None, std_headers) try: page = urllib2.urlopen(request).read() From 64a6f26c5d3720b36a7486dc9883ec74f0e4a3b2 Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Mon, 2 Feb 2009 20:34:20 +0100 Subject: [PATCH 058/455] Put Danny Colligan as an author in the script itself --- youtube-dl | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube-dl b/youtube-dl index 3e7f13956..f4f64013d 100755 --- a/youtube-dl +++ b/youtube-dl @@ -1,6 +1,7 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- # Author: Ricardo Garcia Gonzalez +# Author: Danny Colligan # License: Public domain code import htmlentitydefs import httplib From 0beeff4b3e208d15869823df65f50be43c282403 Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Wed, 4 Feb 2009 21:38:31 +0100 Subject: [PATCH 059/455] Add que -w or --no-overwrites option --- youtube-dl | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/youtube-dl b/youtube-dl index f4f64013d..de6c7a1d1 100755 --- a/youtube-dl +++ b/youtube-dl @@ -89,6 +89,7 @@ class FileDownloader(object): outtmpl: Template for output names. ignoreerrors: Do not stop on download errors. ratelimit: Download speed limit, in bytes/sec. + nooverwrites: Prevent overwriting files. """ _params = None @@ -286,6 +287,9 @@ class FileDownloader(object): except (ValueError, KeyError), err: retcode = self.trouble('ERROR: invalid output template or system charset: %s' % str(err)) continue + if self._params['nooverwrites'] and os.path.exists(filename): + self.to_stderr('WARNING: file exists: %s; skipping' % filename) + continue try: self.pmkdir(filename) except (OSError, IOError), err: @@ -976,6 +980,8 @@ if __name__ == '__main__': dest='ratelimit', metavar='L', help='download rate limit (e.g. 50k or 44.6m)') parser.add_option('-a', '--batch-file', dest='batchfile', metavar='F', help='file containing URLs to download') + parser.add_option('-w', '--no-overwrites', + action='store_true', dest='nooverwrites', help='do not overwrite files', default=False) (opts, args) = parser.parse_args() # Batch file verification @@ -1031,6 +1037,7 @@ if __name__ == '__main__': or u'%(id)s.%(ext)s'), 'ignoreerrors': opts.ignoreerrors, 'ratelimit': opts.ratelimit, + 'nooverwrites': opts.nooverwrites, }) fd.add_info_extractor(youtube_search_ie) fd.add_info_extractor(youtube_pl_ie) From 111ae3695c4c4c080a9cd15c60c6eea854bb711e Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Wed, 4 Feb 2009 21:39:55 +0100 Subject: [PATCH 060/455] Document new -w option --- index.html.in | 3 +++ 1 file changed, 3 insertions(+) diff --git a/index.html.in b/index.html.in index ddcebcdf5..c503df7df 100644 --- a/index.html.in +++ b/index.html.in @@ -141,6 +141,9 @@ to attempt to download every video with the -i or --ignore-errors option.

  • The -a or --batch-file option lets you specify a file to read URLs from. The file must contain one URL per line.
  • +
  • The program can be told not to overwrite existing files using the -w or +--no-overwrites option.
  • +
  • For YouTube, you can also use the URL of a playlist, and it will download all the videos in that playlist.
  • From fd209848897f45e9df8931d14e937f0b0ed2a547 Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Sat, 7 Feb 2009 12:29:00 +0100 Subject: [PATCH 061/455] Bump version number --- youtube-dl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube-dl b/youtube-dl index de6c7a1d1..a0cafeae9 100755 --- a/youtube-dl +++ b/youtube-dl @@ -941,7 +941,7 @@ if __name__ == '__main__': # Parse command line parser = optparse.OptionParser( usage='Usage: %prog [options] url...', - version='2009.01.31', + version='2009.02.07', conflict_handler='resolve', ) parser.add_option('-h', '--help', From 5121ef207112040fdf58e889393071aeb51cf4f2 Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Sun, 1 Mar 2009 00:00:04 +0100 Subject: [PATCH 062/455] Fix wrong indentation --- youtube-dl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube-dl b/youtube-dl index a0cafeae9..323897a9c 100755 --- a/youtube-dl +++ b/youtube-dl @@ -144,7 +144,7 @@ class FileDownloader(object): return '--:--' return '%02d:%02d' % (eta_mins, eta_secs) - @staticmethod + @staticmethod def calc_speed(start, now, bytes): dif = now - start if bytes == 0 or dif < 0.001: # One millisecond From cc109403853bac92861aab7f0fc5f93398c336b0 Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Mon, 2 Mar 2009 00:02:56 +0100 Subject: [PATCH 063/455] Fix very wrong code for setting the language It turned out that, despite the program working without apparent errors, the code for setting the language was completely wrong. First, it didn't run unless some form of authentication was performed. Second, I misstyped _LANG_URL as _LOGIN_URL, so the language was not being set at all! Amazing it still worked. --- youtube-dl | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/youtube-dl b/youtube-dl index 323897a9c..4ddf62274 100755 --- a/youtube-dl +++ b/youtube-dl @@ -493,12 +493,8 @@ class YoutubeIE(InfoExtractor): self.to_stderr(u'WARNING: parsing .netrc: %s' % str(err)) return - # No authentication to be performed - if username is None: - return - # Set language - request = urllib2.Request(self._LOGIN_URL, None, std_headers) + request = urllib2.Request(self._LANG_URL, None, std_headers) try: self.report_lang() urllib2.urlopen(request).read() @@ -506,6 +502,10 @@ class YoutubeIE(InfoExtractor): self.to_stderr(u'WARNING: unable to set language: %s' % str(err)) return + # No authentication to be performed + if username is None: + return + # Log in login_form = { 'current_form': 'loginForm', From 3e703dd1cd67100cbf7de001377ecacf20298cfa Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Tue, 3 Mar 2009 21:25:04 +0100 Subject: [PATCH 064/455] Remove generator and webpage template, moved to wiki --- generate-index | 15 ---- index.html.in | 229 ------------------------------------------------- 2 files changed, 244 deletions(-) delete mode 100755 generate-index delete mode 100644 index.html.in diff --git a/generate-index b/generate-index deleted file mode 100755 index f8f940e4c..000000000 --- a/generate-index +++ /dev/null @@ -1,15 +0,0 @@ -#!/usr/bin/env python -import hashlib -import subprocess - -template = file('index.html.in', 'r').read() -version = subprocess.Popen(['./youtube-dl', '--version'], stdout=subprocess.PIPE).communicate()[0].strip() -data = file('youtube-dl', 'rb').read() -md5sum = hashlib.md5(data).hexdigest() -sha1sum = hashlib.sha1(data).hexdigest() -sha256sum = hashlib.sha256(data).hexdigest() -template = template.replace('@PROGRAM_VERSION@', version) -template = template.replace('@PROGRAM_MD5SUM@', md5sum) -template = template.replace('@PROGRAM_SHA1SUM@', sha1sum) -template = template.replace('@PROGRAM_SHA256SUM@', sha256sum) -file('index.html', 'w').write(template) diff --git a/index.html.in b/index.html.in deleted file mode 100644 index c503df7df..000000000 --- a/index.html.in +++ /dev/null @@ -1,229 +0,0 @@ - - - - - youtube-dl: Download videos from YouTube.com - - - -

    youtube-dl: Download videos from YouTube.com

    - -

    (and more...)

    - -

    What is it?

    - -

    youtube-dl is a small command-line program to download videos -from YouTube.com. It requires the Python -interpreter, version 2.4 or later, and it's not platform specific. -It should work in your Unix box, in Windows or in Mac OS X. The latest version -is @PROGRAM_VERSION@. It's released to the public domain, -which means you can modify it, redistribute it or use it however you like.

    - -

    I'll try to keep it updated if YouTube.com changes the way you access -their videos. After all, it's a simple and short program. However, I can't -guarantee anything. If you detect it stops working, check for new versions -and/or inform me about the problem, indicating the program version you -are using. If the program stops working and I can't solve the problem but -you have a solution, I'd like to know it. If that happens and you feel you -can maintain the program yourself, tell me. My contact information is -at freshmeat.net.

    - -

    Thanks for all the feedback received so far. I'm glad people find my -program useful.

    - -

    Usage instructions

    - -

    In Windows, once you have installed the Python interpreter, save the -program with the .py extension and put it somewhere in the PATH. -Try to follow the -guide to -install youtube-dl under Windows XP.

    - -

    In Unix, download it, give it execution permission and copy it to one -of the PATH directories (typically, /usr/local/bin).

    - -

    After that, you should be able to call it from the command line as -youtube-dl or youtube-dl.py. I will use youtube-dl -in the following examples. Usage instructions are easy. Use youtube-dl -followed by a video URL or identifier. Example: youtube-dl -"http://www.youtube.com/watch?v=foobar". The video will be saved -to the file foobar.flv in that example. As YouTube.com -videos are in Flash Video format, their extension should be flv. -In Linux and other unices, video players using a recent version of -ffmpeg can play them. That includes MPlayer, VLC, etc. Those two -work under Windows and other platforms, but you could also get a -specific FLV player of your taste.

    - -

    If you try to run the program and you receive an error message containing the -keyword SyntaxError near the end, it means your Python interpreter -is too old.

    - -

    More usage tips

    - -
      - -
    • You can change the file name of the video using the -o option, like in -youtube-dl -o vid.flv "http://www.youtube.com/watch?v=foobar". -Read the Output template section for more details on -this.
    • - -
    • Some videos require an account to be downloaded, mostly because they're -flagged as mature content. You can pass the program a username and password -for a YouTube.com account with the -u and -p options, like youtube-dl --u myusername -p mypassword "http://www.youtube.com/watch?v=foobar".
    • - -
    • The account data can also be read from the user .netrc file by indicating -the -n or --netrc option. The machine name is youtube in that -case.
    • - -
    • The simulate mode (activated with -s or --simulate) can be used -to just get the real video URL and use it with a download manager if you -prefer that option.
    • - -
    • The quiet mode (activated with -q or --quiet) can be used to -supress all output messages. This allows, in systems featuring /dev/stdout -and other similar special files, outputting the video data to standard output -in order to pipe it to another program without interferences.
    • - -
    • The program can be told to simply print the final video URL to standard -output using the -g or --get-url option.
    • - -
    • In a similar line, the -e or --get-title option tells the program to print -the video title.
    • - -
    • The default filename is video_id.flv. But you can also use the -video title in the filename with the -t or --title option, or preserve the -literal title in the filename with the -l or --literal option.
    • - -
    • You can make the program append &fmt=something to the URL -by using the -f or --format option. This makes it possible to download high -quality versions of the videos when available.
    • - -
    • The -b or --best-quality option is an alias for -f 18.
    • - -
    • The -m or --mobile-version option is an alias for -f 17.
    • - -
    • Normally, the program will stop on the first error, but you can tell it -to attempt to download every video with the -i or --ignore-errors option.
    • - -
    • The -a or --batch-file option lets you specify a file to read URLs from. -The file must contain one URL per line.
    • - -
    • The program can be told not to overwrite existing files using the -w or ---no-overwrites option.
    • - -
    • For YouTube, you can also use the URL of a playlist, and it will download -all the videos in that playlist.
    • - -
    • For YouTube, you can also use the special word ytsearch to -download search results. With ytsearch it will download the -first search result. With ytsearchN, where N is a number, it -will download the first N results. With ytsearchall it will -download every result for that search. In most systems you'll need to -use quotes for multiple words. Example: youtube-dl "ytsearch3:cute -kittens". - -
    • youtube-dl honors the http_proxy environment variable -if you want to use a proxy. Set it to something like -http://proxy.example.com:8080, and do not leave the http:// -prefix out.
    • - -
    • You can get the program version by calling it as youtube-dl --v or youtube-dl --version.
    • - -
    • For usage instructions, use youtube-dl -h or youtube-dl ---help.
    • - -
    • You can cancel the program at any time pressing Ctrl+C. It may print -some error lines saying something about KeyboardInterrupt. -That's ok.
    • - -
    - -

    Download it

    - -

    Note that if you directly click on these hyperlinks, your web browser will -most likely display the program contents. It's usually better to -right-click on it and choose the appropriate option, normally called Save -Target As or Save Link As, depending on the web browser you -are using.

    - -

    @PROGRAM_VERSION@

    -
      -
    • MD5: @PROGRAM_MD5SUM@
    • -
    • SHA1: @PROGRAM_SHA1SUM@
    • -
    • SHA256: @PROGRAM_SHA256SUM@
    • -
    - -

    Output template

    - -

    The -o option allows users to indicate a template for the output file names. -The basic usage is not to set any template arguments when downloading a single -file, like in youtube-dl -o funny_video.flv 'http://some/video'. -However, it may contain special sequences that will be replaced when -downloading each video. The special sequences have the format -%(NAME)s. To clarify, that's a percent symbol followed by a -name in parenthesis, followed by a lowercase S. Allowed names are:

    - -
      -
    • id: The sequence will be replaced by the video identifier.
    • -
    • url: The sequence will be replaced by the video URL.
    • -
    • uploader: The sequence will be replaced by the nickname of the -person who uploaded the video.
    • -
    • title: The sequence will be replaced by the literal video -title.
    • -
    • stitle: The sequence will be replaced by a simplified video -title, restricted to alphanumeric characters and dashes.
    • -
    • ext: The sequence will be replaced by the appropriate -extension (like flv or mp4).
    • -
    - -

    As you may have guessed, the default template is %(id)s.%(ext)s. -When some command line options are used, it's replaced by other templates like -%(title)s-%(id)s.%(ext)s. You can specify your own.

    - -

    Authors

    - -
      -
    • Ricardo Garcia Gonzalez: program core, YouTube.com InfoExtractor, -metacafe.com InfoExtractor and YouTube playlist InfoExtractor.
    • -
    • Danny Colligan: YouTube search InfoExtractor, ideas and patches.
    • -
    • Many other people contributing patches, code, ideas and kind messages. Too -many to be listed here. You know who you are. Thank you very much.
    • -
    - -

    Copyright © 2006-2007 Ricardo Garcia Gonzalez

    - - From 7ab2043c9c740e12abf02d065e282f28516939c0 Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Tue, 3 Mar 2009 21:44:00 +0100 Subject: [PATCH 065/455] Bump version number --- youtube-dl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube-dl b/youtube-dl index 4ddf62274..192f9b21f 100755 --- a/youtube-dl +++ b/youtube-dl @@ -941,7 +941,7 @@ if __name__ == '__main__': # Parse command line parser = optparse.OptionParser( usage='Usage: %prog [options] url...', - version='2009.02.07', + version='2009.03.03', conflict_handler='resolve', ) parser.add_option('-h', '--help', From 76800042fd781e2df03db6502aac709e5f72e65b Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Wed, 4 Mar 2009 22:06:18 +0100 Subject: [PATCH 066/455] Replace version number while in progress --- youtube-dl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube-dl b/youtube-dl index 192f9b21f..f230d28fc 100755 --- a/youtube-dl +++ b/youtube-dl @@ -941,7 +941,7 @@ if __name__ == '__main__': # Parse command line parser = optparse.OptionParser( usage='Usage: %prog [options] url...', - version='2009.03.03', + version='INTERNAL', conflict_handler='resolve', ) parser.add_option('-h', '--help', From d0a9affb46f27a5afd5a6a76fdbc843d7189a1ad Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Wed, 4 Mar 2009 22:12:33 +0100 Subject: [PATCH 067/455] Replace setter and getter with simple attribute access --- youtube-dl | 52 +++++++++++++++++++++------------------------------- 1 file changed, 21 insertions(+), 31 deletions(-) diff --git a/youtube-dl b/youtube-dl index f230d28fc..345bf4df9 100755 --- a/youtube-dl +++ b/youtube-dl @@ -71,10 +71,10 @@ class FileDownloader(object): File downloaders accept a lot of parameters. In order not to saturate the object constructor with arguments, it receives a dictionary of - options instead. These options are available through the get_params() - method for the InfoExtractors to use. The FileDownloader also registers - itself as the downloader in charge for the InfoExtractors that are - added to it, so this is a "mutual registration". + options instead. These options are available through the params + attribute for the InfoExtractors to use. The FileDownloader also + registers itself as the downloader in charge for the InfoExtractors + that are added to it, so this is a "mutual registration". Available options: @@ -92,7 +92,7 @@ class FileDownloader(object): nooverwrites: Prevent overwriting files. """ - _params = None + params = None _ies = [] _pps = [] @@ -100,7 +100,7 @@ class FileDownloader(object): """Create a FileDownloader object with the given options.""" self._ies = [] self._pps = [] - self.set_params(params) + self.params = params @staticmethod def pmkdir(filename): @@ -174,16 +174,6 @@ class FileDownloader(object): multiplier = 1024.0 ** 'bkmgtpezy'.index(matchobj.group(2).lower()) return long(round(number * multiplier)) - def set_params(self, params): - """Sets parameters.""" - if type(params) != dict: - raise ValueError('params: dictionary expected') - self._params = params - - def get_params(self): - """Get parameters.""" - return self._params - def add_info_extractor(self, ie): """Add an InfoExtractor object to the end of the list.""" self._ies.append(ie) @@ -196,7 +186,7 @@ class FileDownloader(object): def to_stdout(self, message, skip_eol=False): """Print message to stdout if not in quiet mode.""" - if not self._params.get('quiet', False): + if not self.params.get('quiet', False): print u'%s%s' % (message, [u'\n', u''][skip_eol]), sys.stdout.flush() @@ -206,7 +196,7 @@ class FileDownloader(object): def fixed_template(self): """Checks if the output template is fixed.""" - return (re.search(ur'(?u)%\(.+?\)s', self._params['outtmpl']) is None) + return (re.search(ur'(?u)%\(.+?\)s', self.params['outtmpl']) is None) def trouble(self, message=None): """Determine action to take when a download problem appears. @@ -219,13 +209,13 @@ class FileDownloader(object): """ if message is not None: self.to_stderr(message) - if not self._params.get('ignoreerrors', False): + if not self.params.get('ignoreerrors', False): raise DownloadError(message) return 1 def slow_down(self, start_time, byte_counter): """Sleep if the download speed is over the rate limit.""" - rate_limit = self._params.get('ratelimit', None) + rate_limit = self.params.get('ratelimit', None) if rate_limit is None or byte_counter == 0: return now = time.time() @@ -253,7 +243,7 @@ class FileDownloader(object): """Download a given list of URLs.""" retcode = 0 if len(url_list) > 1 and self.fixed_template(): - raise SameFileError(self._params['outtmpl']) + raise SameFileError(self.params['outtmpl']) for url in url_list: suitable_found = False @@ -268,26 +258,26 @@ class FileDownloader(object): retcode = self.trouble() if len(results) > 1 and self.fixed_template(): - raise SameFileError(self._params['outtmpl']) + raise SameFileError(self.params['outtmpl']) for result in results: # Forced printings - if self._params.get('forcetitle', False): + if self.params.get('forcetitle', False): print result['title'] - if self._params.get('forceurl', False): + if self.params.get('forceurl', False): print result['url'] # Do nothing else if in simulate mode - if self._params.get('simulate', False): + if self.params.get('simulate', False): continue try: - filename = self._params['outtmpl'] % result + filename = self.params['outtmpl'] % result self.report_destination(filename) except (ValueError, KeyError), err: retcode = self.trouble('ERROR: invalid output template or system charset: %s' % str(err)) continue - if self._params['nooverwrites'] and os.path.exists(filename): + if self.params['nooverwrites'] and os.path.exists(filename): self.to_stderr('WARNING: file exists: %s; skipping' % filename) continue try: @@ -417,7 +407,7 @@ class InfoExtractor(object): def to_stdout(self, message): """Print message to stdout if downloader is not in quiet mode.""" - if self._downloader is None or not self._downloader.get_params().get('quiet', False): + if self._downloader is None or not self._downloader.params.get('quiet', False): print message def to_stderr(self, message): @@ -475,7 +465,7 @@ class YoutubeIE(InfoExtractor): username = None password = None - downloader_params = self._downloader.get_params() + downloader_params = self._downloader.params # Attempt to use provided username and password or .netrc data if downloader_params.get('username', None) is not None: @@ -549,7 +539,7 @@ class YoutubeIE(InfoExtractor): # Downloader parameters format_param = None if self._downloader is not None: - params = self._downloader.get_params() + params = self._downloader.params format_param = params.get('format', None) # Extension @@ -895,7 +885,7 @@ class PostProcessor(object): def to_stdout(self, message): """Print message to stdout if downloader is not in quiet mode.""" - if self._downloader is None or not self._downloader.get_params().get('quiet', False): + if self._downloader is None or not self._downloader.params.get('quiet', False): print message def to_stderr(self, message): From 053e77d6ed1a22fbbb84d40aff64617967d61563 Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Wed, 4 Mar 2009 22:23:45 +0100 Subject: [PATCH 068/455] Remove old ignore patterns which are no longer needed --- .hgignore | 2 -- 1 file changed, 2 deletions(-) diff --git a/.hgignore b/.hgignore index c7ce8559e..986cf955b 100644 --- a/.hgignore +++ b/.hgignore @@ -1,4 +1,2 @@ syntax: glob -index.html -youtube-dl-* .*.swp From 79e75f66c88ee1de6018e518bcc1a33cd279f697 Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Sat, 28 Mar 2009 10:26:41 +0100 Subject: [PATCH 069/455] Remove --best-quality option and add proper support for high definition format --- youtube-dl | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/youtube-dl b/youtube-dl index 345bf4df9..bf6973480 100755 --- a/youtube-dl +++ b/youtube-dl @@ -543,7 +543,11 @@ class YoutubeIE(InfoExtractor): format_param = params.get('format', None) # Extension - video_extension = {'18': 'mp4', '17': '3gp'}.get(format_param, 'flv') + video_extension = { + '17': '3gp', + '18': 'mp4', + '22': 'mp4', + }.get(format_param, 'flv') # Normalize URL, including format normalized_url = 'http://www.youtube.com/watch?v=%s&gl=US&hl=en' % video_id @@ -960,10 +964,10 @@ if __name__ == '__main__': action='store_true', dest='gettitle', help='simulate, quiet but print title', default=False) parser.add_option('-f', '--format', dest='format', metavar='FMT', help='video format code') - parser.add_option('-b', '--best-quality', - action='store_const', dest='format', help='alias for -f 18', const='18') parser.add_option('-m', '--mobile-version', action='store_const', dest='format', help='alias for -f 17', const='17') + parser.add_option('-d', '--high-def', + action='store_const', dest='format', help='alias for -f 22', const='22') parser.add_option('-i', '--ignore-errors', action='store_true', dest='ignoreerrors', help='continue on download errors', default=False) parser.add_option('-r', '--rate-limit', From c6b311c5248ec53bfa9267f274ed690783cbc3f1 Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Sat, 28 Mar 2009 10:27:40 +0100 Subject: [PATCH 070/455] Set version number for release --- youtube-dl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube-dl b/youtube-dl index bf6973480..7c01f4fdb 100755 --- a/youtube-dl +++ b/youtube-dl @@ -935,7 +935,7 @@ if __name__ == '__main__': # Parse command line parser = optparse.OptionParser( usage='Usage: %prog [options] url...', - version='INTERNAL', + version='2009.03.28', conflict_handler='resolve', ) parser.add_option('-h', '--help', From e54930cf713e2e7b3117d94e1467f0acbfedf375 Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Sat, 28 Mar 2009 10:34:37 +0100 Subject: [PATCH 071/455] Switch to "INTERNAL" version again --- youtube-dl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube-dl b/youtube-dl index 7c01f4fdb..bf6973480 100755 --- a/youtube-dl +++ b/youtube-dl @@ -935,7 +935,7 @@ if __name__ == '__main__': # Parse command line parser = optparse.OptionParser( usage='Usage: %prog [options] url...', - version='2009.03.28', + version='INTERNAL', conflict_handler='resolve', ) parser.add_option('-h', '--help', From f995f7127c42b1f912bfbfd9f35b22267c9bf3e7 Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Thu, 2 Apr 2009 20:23:13 +0200 Subject: [PATCH 072/455] Remove some extra whitespace --- youtube-dl | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/youtube-dl b/youtube-dl index bf6973480..ba760da47 100755 --- a/youtube-dl +++ b/youtube-dl @@ -18,7 +18,7 @@ import time import urllib import urllib2 -std_headers = { +std_headers = { 'User-Agent': 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.0.5) Gecko/2008120122 Firefox/3.0.5', 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7', 'Accept': 'text/xml,application/xml,application/xhtml+xml,text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5', @@ -729,7 +729,7 @@ class YoutubeSearchIE(InfoExtractor): _MORE_PAGES_INDICATOR = r'>Next' _youtube_ie = None - def __init__(self, youtube_ie, downloader=None): + def __init__(self, youtube_ie, downloader=None): InfoExtractor.__init__(self, downloader) self._youtube_ie = youtube_ie @@ -752,11 +752,11 @@ class YoutubeSearchIE(InfoExtractor): prefix, query = query.split(':') prefix = prefix[8:] - if prefix == '': + if prefix == '': return self._download_n_results(query, 1) - elif prefix == 'all': + elif prefix == 'all': return self._download_n_results(query, -1) - else: + else: try: n = int(prefix) if n <= 0: From af6a92f4c954d8f0e6628076f751d6ac9935a6d6 Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Sun, 5 Apr 2009 11:01:02 +0200 Subject: [PATCH 073/455] Fix issue #5 --- youtube-dl | 27 +++++++++++++++++++++++++-- 1 file changed, 25 insertions(+), 2 deletions(-) diff --git a/youtube-dl b/youtube-dl index ba760da47..2cddafcf5 100755 --- a/youtube-dl +++ b/youtube-dl @@ -435,6 +435,29 @@ class YoutubeIE(InfoExtractor): def suitable(url): return (re.match(YoutubeIE._VALID_URL, url) is not None) + @staticmethod + def htmlentity_transform(matchobj): + """Transforms an HTML entity to a Unicode character.""" + entity = matchobj.group(1) + + # Known non-numeric HTML entity + if entity in htmlentitydefs.name2codepoint: + return unichr(htmlentitydefs.name2codepoint[entity]) + + # Unicode character + mobj = re.match(ur'(?u)#(x?\d+)', entity) + if mobj is not None: + numstr = mobj.group(1) + if numstr.startswith(u'x'): + base = 16 + numstr = u'0%s' % numstr + else: + base = 10 + return unichr(long(numstr, base)) + + # Unknown entity in name, return its literal representation + return (u'&%s;' % entity) + def report_lang(self): """Report attempt to set language.""" self.to_stdout(u'[youtube] Setting language') @@ -458,7 +481,7 @@ class YoutubeIE(InfoExtractor): def report_video_url(self, video_id, video_real_url): """Report extracted video URL.""" self.to_stdout(u'[youtube] %s: URL: %s' % (video_id, video_real_url)) - + def _real_initialize(self): if self._downloader is None: return @@ -585,7 +608,7 @@ class YoutubeIE(InfoExtractor): self.to_stderr(u'ERROR: unable to extract video title') return [None] video_title = mobj.group(1).decode('utf-8') - video_title = re.sub(ur'(?u)&(.+?);', lambda x: unichr(htmlentitydefs.name2codepoint[x.group(1)]), video_title) + video_title = re.sub(ur'(?u)&(.+?);', self.htmlentity_transform, video_title) video_title = video_title.replace(os.sep, u'%') # simplified title From 763826cf2cc4d4944bf3098528e013b49b089248 Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Mon, 6 Apr 2009 17:14:09 +0200 Subject: [PATCH 074/455] Establish version number --- youtube-dl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube-dl b/youtube-dl index 2cddafcf5..4f767cf37 100755 --- a/youtube-dl +++ b/youtube-dl @@ -958,7 +958,7 @@ if __name__ == '__main__': # Parse command line parser = optparse.OptionParser( usage='Usage: %prog [options] url...', - version='INTERNAL', + version='2009.04.06', conflict_handler='resolve', ) parser.add_option('-h', '--help', From 1db4ff60542e695892d34729ebef5da4989eff72 Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Mon, 6 Apr 2009 17:16:40 +0200 Subject: [PATCH 075/455] Restore internal version number indicator --- youtube-dl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube-dl b/youtube-dl index 4f767cf37..2cddafcf5 100755 --- a/youtube-dl +++ b/youtube-dl @@ -958,7 +958,7 @@ if __name__ == '__main__': # Parse command line parser = optparse.OptionParser( usage='Usage: %prog [options] url...', - version='2009.04.06', + version='INTERNAL', conflict_handler='resolve', ) parser.add_option('-h', '--help', From fd9288c315b10f6a741d435485028ce54eee4b24 Mon Sep 17 00:00:00 2001 From: "dannyc@omega" Date: Mon, 6 Apr 2009 17:39:16 -0700 Subject: [PATCH 076/455] Changed ytsearchall to retrieve max 1000 results --- youtube-dl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube-dl b/youtube-dl index 2cddafcf5..e41134546 100755 --- a/youtube-dl +++ b/youtube-dl @@ -751,6 +751,7 @@ class YoutubeSearchIE(InfoExtractor): _VIDEO_INDICATOR = r'href="/watch\?v=.+?"' _MORE_PAGES_INDICATOR = r'>Next' _youtube_ie = None + _max_youtube_results = 1000 def __init__(self, youtube_ie, downloader=None): InfoExtractor.__init__(self, downloader) @@ -778,7 +779,7 @@ class YoutubeSearchIE(InfoExtractor): if prefix == '': return self._download_n_results(query, 1) elif prefix == 'all': - return self._download_n_results(query, -1) + return self._download_n_results(query, self._max_youtube_results) else: try: n = int(prefix) From 257453b92bf07920814ce68620ee8fe0949fb6de Mon Sep 17 00:00:00 2001 From: dannycolligan Date: Tue, 7 Apr 2009 08:21:27 -0700 Subject: [PATCH 077/455] Added cap if user requests ytsearch number over 1000 (with warning) --- youtube-dl | 3 +++ 1 file changed, 3 insertions(+) diff --git a/youtube-dl b/youtube-dl index e41134546..6757a896c 100755 --- a/youtube-dl +++ b/youtube-dl @@ -786,6 +786,9 @@ class YoutubeSearchIE(InfoExtractor): if n <= 0: self.to_stderr(u'ERROR: invalid download number %s for query "%s"' % (n, query)) return [None] + elif n > self._max_youtube_results: + self.to_stderr(u'WARNING: ytsearch returns max %i results (you requested %i)' % (self._max_youtube_results, n)) + n = self._max_youtube_results return self._download_n_results(query, n) except ValueError: # parsing prefix as int fails return self._download_n_results(query, 1) From c8619e01637ae33ff6ed2a770a6222d792cf0771 Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Fri, 10 Apr 2009 00:59:59 +0200 Subject: [PATCH 078/455] Move the code to process an InfoExtractor result to its own method --- youtube-dl | 98 +++++++++++++++++++++++++++++++----------------------- 1 file changed, 56 insertions(+), 42 deletions(-) diff --git a/youtube-dl b/youtube-dl index 6757a896c..c526071e5 100755 --- a/youtube-dl +++ b/youtube-dl @@ -239,6 +239,48 @@ class FileDownloader(object): """Report download finished.""" self.to_stdout(u'') + def process_info(self, info_dict): + """Process a single dictionary returned by an InfoExtractor.""" + # Forced printings + if self.params.get('forcetitle', False): + print info_dict['title'] + if self.params.get('forceurl', False): + print info_dict['url'] + + # Do nothing else if in simulate mode + if self.params.get('simulate', False): + return 0 + + try: + filename = self.params['outtmpl'] % info_dict + self.report_destination(filename) + except (ValueError, KeyError), err: + return self.trouble('ERROR: invalid output template or system charset: %s' % str(err)) + if self.params['nooverwrites'] and os.path.exists(filename): + self.to_stderr('WARNING: file exists: %s; skipping' % filename) + return 0 + try: + self.pmkdir(filename) + except (OSError, IOError), err: + return self.trouble('ERROR: unable to create directories: %s' % str(err)) + try: + outstream = open(filename, 'wb') + except (OSError, IOError), err: + return self.trouble('ERROR: unable to open for writing: %s' % str(err)) + try: + self._do_download(outstream, info_dict['url']) + outstream.close() + except (OSError, IOError), err: + return self.trouble('ERROR: unable to write video data: %s' % str(err)) + except (urllib2.URLError, httplib.HTTPException, socket.error), err: + return self.trouble('ERROR: unable to download video data: %s' % str(err)) + try: + self.post_process(filename, info_dict) + except (PostProcessingError), err: + return self.trouble('ERROR: postprocessing: %s' % str(err)) + + return 0 + def download(self, url_list): """Download a given list of URLs.""" retcode = 0 @@ -248,64 +290,36 @@ class FileDownloader(object): for url in url_list: suitable_found = False for ie in self._ies: + # Go to next InfoExtractor if not suitable if not ie.suitable(url): continue + # Suitable InfoExtractor found suitable_found = True + + # Extract information from URL all_results = ie.extract(url) results = [x for x in all_results if x is not None] + + # See if there were problems extracting any information if len(results) != len(all_results): retcode = self.trouble() + # Two results could go to the same file if len(results) > 1 and self.fixed_template(): raise SameFileError(self.params['outtmpl']) + # Process each result for result in results: - # Forced printings - if self.params.get('forcetitle', False): - print result['title'] - if self.params.get('forceurl', False): - print result['url'] - - # Do nothing else if in simulate mode - if self.params.get('simulate', False): - continue + result = self.process_info(result) - try: - filename = self.params['outtmpl'] % result - self.report_destination(filename) - except (ValueError, KeyError), err: - retcode = self.trouble('ERROR: invalid output template or system charset: %s' % str(err)) - continue - if self.params['nooverwrites'] and os.path.exists(filename): - self.to_stderr('WARNING: file exists: %s; skipping' % filename) - continue - try: - self.pmkdir(filename) - except (OSError, IOError), err: - retcode = self.trouble('ERROR: unable to create directories: %s' % str(err)) - continue - try: - outstream = open(filename, 'wb') - except (OSError, IOError), err: - retcode = self.trouble('ERROR: unable to open for writing: %s' % str(err)) - continue - try: - self._do_download(outstream, result['url']) - outstream.close() - except (OSError, IOError), err: - retcode = self.trouble('ERROR: unable to write video data: %s' % str(err)) - continue - except (urllib2.URLError, httplib.HTTPException, socket.error), err: - retcode = self.trouble('ERROR: unable to download video data: %s' % str(err)) - continue - try: - self.post_process(filename, result) - except (PostProcessingError), err: - retcode = self.trouble('ERROR: postprocessing: %s' % str(err)) - continue + # Do not overwrite an error code with a success code + if result != 0: + retcode = result + # Suitable InfoExtractor had been found; go to next URL break + if not suitable_found: retcode = self.trouble('ERROR: no suitable InfoExtractor: %s' % url) From 9cee6d903589093e4f5d2b2ced297f4af1d65fef Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Wed, 15 Apr 2009 20:01:06 +0200 Subject: [PATCH 079/455] Minor adjustments to closely match what a web browser does --- youtube-dl | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/youtube-dl b/youtube-dl index c526071e5..0abfaef38 100755 --- a/youtube-dl +++ b/youtube-dl @@ -578,6 +578,8 @@ class YoutubeIE(InfoExtractor): if self._downloader is not None: params = self._downloader.params format_param = params.get('format', None) + if format_param is None: + format_param = '34' # Extension video_extension = { @@ -604,7 +606,7 @@ class YoutubeIE(InfoExtractor): if mobj is None: self.to_stderr(u'ERROR: unable to extract "t" parameter') return [None] - video_real_url = 'http://www.youtube.com/get_video?video_id=%s&t=%s' % (video_id, mobj.group(1)) + video_real_url = 'http://www.youtube.com/get_video?video_id=%s&t=%s&el=detailpage&ps=' % (video_id, mobj.group(1)) if format_param is not None: video_real_url = '%s&fmt=%s' % (video_real_url, format_param) self.report_video_url(video_id, video_real_url) From b0eddb2eb4b8cad325ba3224c8550a21f67e4315 Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Wed, 15 Apr 2009 20:17:16 +0200 Subject: [PATCH 080/455] Update User-Agent string --- youtube-dl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube-dl b/youtube-dl index 0abfaef38..20e26f99c 100755 --- a/youtube-dl +++ b/youtube-dl @@ -19,7 +19,7 @@ import urllib import urllib2 std_headers = { - 'User-Agent': 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.0.5) Gecko/2008120122 Firefox/3.0.5', + 'User-Agent': 'Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US; rv:1.9.0.8) Gecko/2009032609 Firefox/3.0.8', 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7', 'Accept': 'text/xml,application/xml,application/xhtml+xml,text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5', 'Accept-Language': 'en-us,en;q=0.5', From 2f4d18a9f78f9d18635c6047e56b33c689b8d026 Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Wed, 22 Apr 2009 22:57:21 +0200 Subject: [PATCH 081/455] Use getpreferredencoding() instead of getdefaultlocale() This fixes issue #7 and is recommended after a bug report I made to the Python team: http://bugs.python.org/issue5815 --- youtube-dl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube-dl b/youtube-dl index 20e26f99c..bc7e3b5fe 100755 --- a/youtube-dl +++ b/youtube-dl @@ -1056,7 +1056,7 @@ if __name__ == '__main__': youtube_search_ie = YoutubeSearchIE(youtube_ie) # File downloader - charset = locale.getdefaultlocale()[1] + charset = locale.getpreferredencoding() if charset is None: charset = 'ascii' fd = FileDownloader({ From 9bf386d74bdc3e1785df03e24a7f875482b43705 Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Thu, 23 Apr 2009 21:43:04 +0200 Subject: [PATCH 082/455] Move the downloader return code to a class member This makes it possible to initialize it with value zero and later let the trouble() overwrite the value. It simplifies error treatment and paves the way for the InfoExtracto objects to call process_info() themselves, which should solve the issues with tor and some other problems. --- youtube-dl | 44 ++++++++++++++++++++++---------------------- 1 file changed, 22 insertions(+), 22 deletions(-) diff --git a/youtube-dl b/youtube-dl index bc7e3b5fe..5c3746b54 100755 --- a/youtube-dl +++ b/youtube-dl @@ -95,11 +95,13 @@ class FileDownloader(object): params = None _ies = [] _pps = [] + _download_retcode = None def __init__(self, params): """Create a FileDownloader object with the given options.""" self._ies = [] self._pps = [] + self._download_retcode = 0 self.params = params @staticmethod @@ -203,15 +205,13 @@ class FileDownloader(object): Depending on if the downloader has been configured to ignore download errors or not, this method may throw an exception or - not when errors are found, after printing the message. If it - doesn't raise, it returns an error code suitable to be returned - later as a program exit code to indicate error. + not when errors are found, after printing the message. """ if message is not None: self.to_stderr(message) if not self.params.get('ignoreerrors', False): raise DownloadError(message) - return 1 + self._download_retcode = 1 def slow_down(self, start_time, byte_counter): """Sleep if the download speed is over the rate limit.""" @@ -249,41 +249,45 @@ class FileDownloader(object): # Do nothing else if in simulate mode if self.params.get('simulate', False): - return 0 + return try: filename = self.params['outtmpl'] % info_dict self.report_destination(filename) except (ValueError, KeyError), err: - return self.trouble('ERROR: invalid output template or system charset: %s' % str(err)) + self.trouble('ERROR: invalid output template or system charset: %s' % str(err)) if self.params['nooverwrites'] and os.path.exists(filename): self.to_stderr('WARNING: file exists: %s; skipping' % filename) - return 0 + return try: self.pmkdir(filename) except (OSError, IOError), err: - return self.trouble('ERROR: unable to create directories: %s' % str(err)) + self.trouble('ERROR: unable to create directories: %s' % str(err)) + return try: outstream = open(filename, 'wb') except (OSError, IOError), err: - return self.trouble('ERROR: unable to open for writing: %s' % str(err)) + self.trouble('ERROR: unable to open for writing: %s' % str(err)) + return try: self._do_download(outstream, info_dict['url']) outstream.close() except (OSError, IOError), err: - return self.trouble('ERROR: unable to write video data: %s' % str(err)) + self.trouble('ERROR: unable to write video data: %s' % str(err)) + return except (urllib2.URLError, httplib.HTTPException, socket.error), err: - return self.trouble('ERROR: unable to download video data: %s' % str(err)) + self.trouble('ERROR: unable to download video data: %s' % str(err)) + return try: self.post_process(filename, info_dict) except (PostProcessingError), err: - return self.trouble('ERROR: postprocessing: %s' % str(err)) + self.trouble('ERROR: postprocessing: %s' % str(err)) + return - return 0 + return def download(self, url_list): """Download a given list of URLs.""" - retcode = 0 if len(url_list) > 1 and self.fixed_template(): raise SameFileError(self.params['outtmpl']) @@ -303,7 +307,7 @@ class FileDownloader(object): # See if there were problems extracting any information if len(results) != len(all_results): - retcode = self.trouble() + self.trouble() # Two results could go to the same file if len(results) > 1 and self.fixed_template(): @@ -311,19 +315,15 @@ class FileDownloader(object): # Process each result for result in results: - result = self.process_info(result) - - # Do not overwrite an error code with a success code - if result != 0: - retcode = result + self.process_info(result) # Suitable InfoExtractor had been found; go to next URL break if not suitable_found: - retcode = self.trouble('ERROR: no suitable InfoExtractor: %s' % url) + self.trouble('ERROR: no suitable InfoExtractor: %s' % url) - return retcode + return self._download_retcode def post_process(self, filename, ie_info): """Run the postprocessing chain on the given file.""" From 3aaf887e9890ca48858751b175d998efc35acb02 Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Thu, 23 Apr 2009 21:58:03 +0200 Subject: [PATCH 083/455] Put the downloader in full control of output messages --- youtube-dl | 94 +++++++++++++++++++++--------------------------------- 1 file changed, 37 insertions(+), 57 deletions(-) diff --git a/youtube-dl b/youtube-dl index 5c3746b54..cedbf5977 100755 --- a/youtube-dl +++ b/youtube-dl @@ -419,15 +419,6 @@ class InfoExtractor(object): """Sets the downloader for this IE.""" self._downloader = downloader - def to_stdout(self, message): - """Print message to stdout if downloader is not in quiet mode.""" - if self._downloader is None or not self._downloader.params.get('quiet', False): - print message - - def to_stderr(self, message): - """Print message to stderr.""" - print >>sys.stderr, message - def _real_initialize(self): """Real initialization process. Redefine in subclasses.""" pass @@ -474,27 +465,27 @@ class YoutubeIE(InfoExtractor): def report_lang(self): """Report attempt to set language.""" - self.to_stdout(u'[youtube] Setting language') + self._downloader.to_stdout(u'[youtube] Setting language') def report_login(self): """Report attempt to log in.""" - self.to_stdout(u'[youtube] Logging in') + self._downloader.to_stdout(u'[youtube] Logging in') def report_age_confirmation(self): """Report attempt to confirm age.""" - self.to_stdout(u'[youtube] Confirming age') + self._downloader.to_stdout(u'[youtube] Confirming age') def report_webpage_download(self, video_id): """Report attempt to download webpage.""" - self.to_stdout(u'[youtube] %s: Downloading video webpage' % video_id) + self._downloader.to_stdout(u'[youtube] %s: Downloading video webpage' % video_id) def report_information_extraction(self, video_id): """Report attempt to extract video information.""" - self.to_stdout(u'[youtube] %s: Extracting video information' % video_id) + self._downloader.to_stdout(u'[youtube] %s: Extracting video information' % video_id) def report_video_url(self, video_id, video_real_url): """Report extracted video URL.""" - self.to_stdout(u'[youtube] %s: URL: %s' % (video_id, video_real_url)) + self._downloader.to_stdout(u'[youtube] %s: URL: %s' % (video_id, video_real_url)) def _real_initialize(self): if self._downloader is None: @@ -517,7 +508,7 @@ class YoutubeIE(InfoExtractor): else: raise netrc.NetrcParseError('No authenticators for %s' % self._NETRC_MACHINE) except (IOError, netrc.NetrcParseError), err: - self.to_stderr(u'WARNING: parsing .netrc: %s' % str(err)) + self._downloader.to_stderr(u'WARNING: parsing .netrc: %s' % str(err)) return # Set language @@ -526,7 +517,7 @@ class YoutubeIE(InfoExtractor): self.report_lang() urllib2.urlopen(request).read() except (urllib2.URLError, httplib.HTTPException, socket.error), err: - self.to_stderr(u'WARNING: unable to set language: %s' % str(err)) + self._downloader.to_stderr(u'WARNING: unable to set language: %s' % str(err)) return # No authentication to be performed @@ -546,10 +537,10 @@ class YoutubeIE(InfoExtractor): self.report_login() login_results = urllib2.urlopen(request).read() if re.search(r'(?i)]* name="loginForm"', login_results) is not None: - self.to_stderr(u'WARNING: unable to log in: bad username or password') + self._downloader.to_stderr(u'WARNING: unable to log in: bad username or password') return except (urllib2.URLError, httplib.HTTPException, socket.error), err: - self.to_stderr(u'WARNING: unable to log in: %s' % str(err)) + self._downloader.to_stderr(u'WARNING: unable to log in: %s' % str(err)) return # Confirm age @@ -562,14 +553,14 @@ class YoutubeIE(InfoExtractor): self.report_age_confirmation() age_results = urllib2.urlopen(request).read() except (urllib2.URLError, httplib.HTTPException, socket.error), err: - self.to_stderr(u'ERROR: unable to confirm age: %s' % str(err)) + self._downloader.to_stderr(u'ERROR: unable to confirm age: %s' % str(err)) return def _real_extract(self, url): # Extract video id from URL mobj = re.match(self._VALID_URL, url) if mobj is None: - self.to_stderr(u'ERROR: invalid URL: %s' % url) + self._downloader.to_stderr(u'ERROR: invalid URL: %s' % url) return [None] video_id = mobj.group(2) @@ -578,8 +569,6 @@ class YoutubeIE(InfoExtractor): if self._downloader is not None: params = self._downloader.params format_param = params.get('format', None) - if format_param is None: - format_param = '34' # Extension video_extension = { @@ -597,14 +586,14 @@ class YoutubeIE(InfoExtractor): self.report_webpage_download(video_id) video_webpage = urllib2.urlopen(request).read() except (urllib2.URLError, httplib.HTTPException, socket.error), err: - self.to_stderr(u'ERROR: unable to download video webpage: %s' % str(err)) + self._downloader.to_stderr(u'ERROR: unable to download video webpage: %s' % str(err)) return [None] self.report_information_extraction(video_id) # "t" param mobj = re.search(r', "t": "([^"]+)"', video_webpage) if mobj is None: - self.to_stderr(u'ERROR: unable to extract "t" parameter') + self._downloader.to_stderr(u'ERROR: unable to extract "t" parameter') return [None] video_real_url = 'http://www.youtube.com/get_video?video_id=%s&t=%s&el=detailpage&ps=' % (video_id, mobj.group(1)) if format_param is not None: @@ -614,14 +603,14 @@ class YoutubeIE(InfoExtractor): # uploader mobj = re.search(r"var watchUsername = '([^']+)';", video_webpage) if mobj is None: - self.to_stderr(u'ERROR: unable to extract uploader nickname') + self._downloader.to_stderr(u'ERROR: unable to extract uploader nickname') return [None] video_uploader = mobj.group(1) # title mobj = re.search(r'(?im)YouTube - ([^<]*)', video_webpage) if mobj is None: - self.to_stderr(u'ERROR: unable to extract video title') + self._downloader.to_stderr(u'ERROR: unable to extract video title') return [None] video_title = mobj.group(1).decode('utf-8') video_title = re.sub(ur'(?u)&(.+?);', self.htmlentity_transform, video_title) @@ -631,7 +620,7 @@ class YoutubeIE(InfoExtractor): simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title) simple_title = simple_title.strip(ur'_') - # Return information + # Process video information return [{ 'id': video_id.decode('utf-8'), 'url': video_real_url.decode('utf-8'), @@ -658,19 +647,19 @@ class MetacafeIE(InfoExtractor): def report_disclaimer(self): """Report disclaimer retrieval.""" - self.to_stdout(u'[metacafe] Retrieving disclaimer') + self._downloader.to_stdout(u'[metacafe] Retrieving disclaimer') def report_age_confirmation(self): """Report attempt to confirm age.""" - self.to_stdout(u'[metacafe] Confirming age') + self._downloader.to_stdout(u'[metacafe] Confirming age') def report_download_webpage(self, video_id): """Report webpage download.""" - self.to_stdout(u'[metacafe] %s: Downloading webpage' % video_id) + self._downloader.to_stdout(u'[metacafe] %s: Downloading webpage' % video_id) def report_extraction(self, video_id): """Report information extraction.""" - self.to_stdout(u'[metacafe] %s: Extracting information' % video_id) + self._downloader.to_stdout(u'[metacafe] %s: Extracting information' % video_id) def _real_initialize(self): # Retrieve disclaimer @@ -679,7 +668,7 @@ class MetacafeIE(InfoExtractor): self.report_disclaimer() disclaimer = urllib2.urlopen(request).read() except (urllib2.URLError, httplib.HTTPException, socket.error), err: - self.to_stderr(u'ERROR: unable to retrieve disclaimer: %s' % str(err)) + self._downloader.to_stderr(u'ERROR: unable to retrieve disclaimer: %s' % str(err)) return # Confirm age @@ -692,14 +681,14 @@ class MetacafeIE(InfoExtractor): self.report_age_confirmation() disclaimer = urllib2.urlopen(request).read() except (urllib2.URLError, httplib.HTTPException, socket.error), err: - self.to_stderr(u'ERROR: unable to confirm age: %s' % str(err)) + self._downloader.to_stderr(u'ERROR: unable to confirm age: %s' % str(err)) return def _real_extract(self, url): # Extract id and simplified title from URL mobj = re.match(self._VALID_URL, url) if mobj is None: - self.to_stderr(u'ERROR: invalid URL: %s' % url) + self._downloader.to_stderr(u'ERROR: invalid URL: %s' % url) return [None] video_id = mobj.group(1) @@ -718,20 +707,20 @@ class MetacafeIE(InfoExtractor): self.report_download_webpage(video_id) webpage = urllib2.urlopen(request).read() except (urllib2.URLError, httplib.HTTPException, socket.error), err: - self.to_stderr(u'ERROR: unable retrieve video webpage: %s' % str(err)) + self._downloader.to_stderr(u'ERROR: unable retrieve video webpage: %s' % str(err)) return [None] # Extract URL, uploader and title from webpage self.report_extraction(video_id) mobj = re.search(r'(?m)"mediaURL":"(http.*?\.flv)"', webpage) if mobj is None: - self.to_stderr(u'ERROR: unable to extract media URL') + self._downloader.to_stderr(u'ERROR: unable to extract media URL') return [None] mediaURL = mobj.group(1).replace('\\', '') mobj = re.search(r'(?m)"gdaKey":"(.*?)"', webpage) if mobj is None: - self.to_stderr(u'ERROR: unable to extract gdaKey') + self._downloader.to_stderr(u'ERROR: unable to extract gdaKey') return [None] gdaKey = mobj.group(1) @@ -739,13 +728,13 @@ class MetacafeIE(InfoExtractor): mobj = re.search(r'(?im)(.*) - Video', webpage) if mobj is None: - self.to_stderr(u'ERROR: unable to extract title') + self._downloader.to_stderr(u'ERROR: unable to extract title') return [None] video_title = mobj.group(1).decode('utf-8') mobj = re.search(r'(?m)
  • .*?Submitter:
    (.*?)
  • ', webpage) if mobj is None: - self.to_stderr(u'ERROR: unable to extract uploader nickname') + self._downloader.to_stderr(u'ERROR: unable to extract uploader nickname') return [None] video_uploader = re.sub(r'<.*?>', '', mobj.group(1)) @@ -779,7 +768,7 @@ class YoutubeSearchIE(InfoExtractor): def report_download_page(self, query, pagenum): """Report attempt to download playlist page with given number.""" - self.to_stdout(u'[youtube] query "%s": Downloading page %s' % (query, pagenum)) + self._downloader.to_stdout(u'[youtube] query "%s": Downloading page %s' % (query, pagenum)) def _real_initialize(self): self._youtube_ie.initialize() @@ -787,7 +776,7 @@ class YoutubeSearchIE(InfoExtractor): def _real_extract(self, query): mobj = re.match(self._VALID_QUERY, query) if mobj is None: - self.to_stderr(u'ERROR: invalid search query "%s"' % query) + self._downloader.to_stderr(u'ERROR: invalid search query "%s"' % query) return [None] prefix, query = query.split(':') @@ -800,10 +789,10 @@ class YoutubeSearchIE(InfoExtractor): try: n = int(prefix) if n <= 0: - self.to_stderr(u'ERROR: invalid download number %s for query "%s"' % (n, query)) + self._downloader.to_stderr(u'ERROR: invalid download number %s for query "%s"' % (n, query)) return [None] elif n > self._max_youtube_results: - self.to_stderr(u'WARNING: ytsearch returns max %i results (you requested %i)' % (self._max_youtube_results, n)) + self._downloader.to_stderr(u'WARNING: ytsearch returns max %i results (you requested %i)' % (self._max_youtube_results, n)) n = self._max_youtube_results return self._download_n_results(query, n) except ValueError: # parsing prefix as int fails @@ -823,7 +812,7 @@ class YoutubeSearchIE(InfoExtractor): try: page = urllib2.urlopen(request).read() except (urllib2.URLError, httplib.HTTPException, socket.error), err: - self.to_stderr(u'ERROR: unable to download webpage: %s' % str(err)) + self._downloader.to_stderr(u'ERROR: unable to download webpage: %s' % str(err)) return [None] # Extract video identifiers @@ -866,7 +855,7 @@ class YoutubePlaylistIE(InfoExtractor): def report_download_page(self, playlist_id, pagenum): """Report attempt to download playlist page with given number.""" - self.to_stdout(u'[youtube] PL %s: Downloading page #%s' % (playlist_id, pagenum)) + self._downloader.to_stdout(u'[youtube] PL %s: Downloading page #%s' % (playlist_id, pagenum)) def _real_initialize(self): self._youtube_ie.initialize() @@ -875,7 +864,7 @@ class YoutubePlaylistIE(InfoExtractor): # Extract playlist id mobj = re.match(self._VALID_URL, url) if mobj is None: - self.to_stderr(u'ERROR: invalid url: %s' % url) + self._downloader.to_stderr(u'ERROR: invalid url: %s' % url) return [None] # Download playlist pages @@ -889,7 +878,7 @@ class YoutubePlaylistIE(InfoExtractor): try: page = urllib2.urlopen(request).read() except (urllib2.URLError, httplib.HTTPException, socket.error), err: - self.to_stderr(u'ERROR: unable to download webpage: %s' % str(err)) + self._downloader.to_stderr(u'ERROR: unable to download webpage: %s' % str(err)) return [None] # Extract video identifiers @@ -930,15 +919,6 @@ class PostProcessor(object): def __init__(self, downloader=None): self._downloader = downloader - def to_stdout(self, message): - """Print message to stdout if downloader is not in quiet mode.""" - if self._downloader is None or not self._downloader.params.get('quiet', False): - print message - - def to_stderr(self, message): - """Print message to stderr.""" - print >>sys.stderr, message - def set_downloader(self, downloader): """Sets the downloader for this PP.""" self._downloader = downloader From 147753eb3380137155039cccc0c5c4f0d4b7136d Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Thu, 23 Apr 2009 22:01:28 +0200 Subject: [PATCH 084/455] Replase self._downloader.to_stderr() with self._downloader.trouble() --- youtube-dl | 48 ++++++++++++++++++++++++------------------------ 1 file changed, 24 insertions(+), 24 deletions(-) diff --git a/youtube-dl b/youtube-dl index cedbf5977..034da35f5 100755 --- a/youtube-dl +++ b/youtube-dl @@ -508,7 +508,7 @@ class YoutubeIE(InfoExtractor): else: raise netrc.NetrcParseError('No authenticators for %s' % self._NETRC_MACHINE) except (IOError, netrc.NetrcParseError), err: - self._downloader.to_stderr(u'WARNING: parsing .netrc: %s' % str(err)) + self._downloader.trouble(u'WARNING: parsing .netrc: %s' % str(err)) return # Set language @@ -517,7 +517,7 @@ class YoutubeIE(InfoExtractor): self.report_lang() urllib2.urlopen(request).read() except (urllib2.URLError, httplib.HTTPException, socket.error), err: - self._downloader.to_stderr(u'WARNING: unable to set language: %s' % str(err)) + self._downloader.trouble(u'WARNING: unable to set language: %s' % str(err)) return # No authentication to be performed @@ -537,10 +537,10 @@ class YoutubeIE(InfoExtractor): self.report_login() login_results = urllib2.urlopen(request).read() if re.search(r'(?i)]* name="loginForm"', login_results) is not None: - self._downloader.to_stderr(u'WARNING: unable to log in: bad username or password') + self._downloader.trouble(u'WARNING: unable to log in: bad username or password') return except (urllib2.URLError, httplib.HTTPException, socket.error), err: - self._downloader.to_stderr(u'WARNING: unable to log in: %s' % str(err)) + self._downloader.trouble(u'WARNING: unable to log in: %s' % str(err)) return # Confirm age @@ -553,14 +553,14 @@ class YoutubeIE(InfoExtractor): self.report_age_confirmation() age_results = urllib2.urlopen(request).read() except (urllib2.URLError, httplib.HTTPException, socket.error), err: - self._downloader.to_stderr(u'ERROR: unable to confirm age: %s' % str(err)) + self._downloader.trouble(u'ERROR: unable to confirm age: %s' % str(err)) return def _real_extract(self, url): # Extract video id from URL mobj = re.match(self._VALID_URL, url) if mobj is None: - self._downloader.to_stderr(u'ERROR: invalid URL: %s' % url) + self._downloader.trouble(u'ERROR: invalid URL: %s' % url) return [None] video_id = mobj.group(2) @@ -586,14 +586,14 @@ class YoutubeIE(InfoExtractor): self.report_webpage_download(video_id) video_webpage = urllib2.urlopen(request).read() except (urllib2.URLError, httplib.HTTPException, socket.error), err: - self._downloader.to_stderr(u'ERROR: unable to download video webpage: %s' % str(err)) + self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err)) return [None] self.report_information_extraction(video_id) # "t" param mobj = re.search(r', "t": "([^"]+)"', video_webpage) if mobj is None: - self._downloader.to_stderr(u'ERROR: unable to extract "t" parameter') + self._downloader.trouble(u'ERROR: unable to extract "t" parameter') return [None] video_real_url = 'http://www.youtube.com/get_video?video_id=%s&t=%s&el=detailpage&ps=' % (video_id, mobj.group(1)) if format_param is not None: @@ -603,14 +603,14 @@ class YoutubeIE(InfoExtractor): # uploader mobj = re.search(r"var watchUsername = '([^']+)';", video_webpage) if mobj is None: - self._downloader.to_stderr(u'ERROR: unable to extract uploader nickname') + self._downloader.trouble(u'ERROR: unable to extract uploader nickname') return [None] video_uploader = mobj.group(1) # title mobj = re.search(r'(?im)YouTube - ([^<]*)', video_webpage) if mobj is None: - self._downloader.to_stderr(u'ERROR: unable to extract video title') + self._downloader.trouble(u'ERROR: unable to extract video title') return [None] video_title = mobj.group(1).decode('utf-8') video_title = re.sub(ur'(?u)&(.+?);', self.htmlentity_transform, video_title) @@ -668,7 +668,7 @@ class MetacafeIE(InfoExtractor): self.report_disclaimer() disclaimer = urllib2.urlopen(request).read() except (urllib2.URLError, httplib.HTTPException, socket.error), err: - self._downloader.to_stderr(u'ERROR: unable to retrieve disclaimer: %s' % str(err)) + self._downloader.trouble(u'ERROR: unable to retrieve disclaimer: %s' % str(err)) return # Confirm age @@ -681,14 +681,14 @@ class MetacafeIE(InfoExtractor): self.report_age_confirmation() disclaimer = urllib2.urlopen(request).read() except (urllib2.URLError, httplib.HTTPException, socket.error), err: - self._downloader.to_stderr(u'ERROR: unable to confirm age: %s' % str(err)) + self._downloader.trouble(u'ERROR: unable to confirm age: %s' % str(err)) return def _real_extract(self, url): # Extract id and simplified title from URL mobj = re.match(self._VALID_URL, url) if mobj is None: - self._downloader.to_stderr(u'ERROR: invalid URL: %s' % url) + self._downloader.trouble(u'ERROR: invalid URL: %s' % url) return [None] video_id = mobj.group(1) @@ -707,20 +707,20 @@ class MetacafeIE(InfoExtractor): self.report_download_webpage(video_id) webpage = urllib2.urlopen(request).read() except (urllib2.URLError, httplib.HTTPException, socket.error), err: - self._downloader.to_stderr(u'ERROR: unable retrieve video webpage: %s' % str(err)) + self._downloader.trouble(u'ERROR: unable retrieve video webpage: %s' % str(err)) return [None] # Extract URL, uploader and title from webpage self.report_extraction(video_id) mobj = re.search(r'(?m)"mediaURL":"(http.*?\.flv)"', webpage) if mobj is None: - self._downloader.to_stderr(u'ERROR: unable to extract media URL') + self._downloader.trouble(u'ERROR: unable to extract media URL') return [None] mediaURL = mobj.group(1).replace('\\', '') mobj = re.search(r'(?m)"gdaKey":"(.*?)"', webpage) if mobj is None: - self._downloader.to_stderr(u'ERROR: unable to extract gdaKey') + self._downloader.trouble(u'ERROR: unable to extract gdaKey') return [None] gdaKey = mobj.group(1) @@ -728,13 +728,13 @@ class MetacafeIE(InfoExtractor): mobj = re.search(r'(?im)(.*) - Video', webpage) if mobj is None: - self._downloader.to_stderr(u'ERROR: unable to extract title') + self._downloader.trouble(u'ERROR: unable to extract title') return [None] video_title = mobj.group(1).decode('utf-8') mobj = re.search(r'(?m)
  • .*?Submitter:
    (.*?)
  • ', webpage) if mobj is None: - self._downloader.to_stderr(u'ERROR: unable to extract uploader nickname') + self._downloader.trouble(u'ERROR: unable to extract uploader nickname') return [None] video_uploader = re.sub(r'<.*?>', '', mobj.group(1)) @@ -776,7 +776,7 @@ class YoutubeSearchIE(InfoExtractor): def _real_extract(self, query): mobj = re.match(self._VALID_QUERY, query) if mobj is None: - self._downloader.to_stderr(u'ERROR: invalid search query "%s"' % query) + self._downloader.trouble(u'ERROR: invalid search query "%s"' % query) return [None] prefix, query = query.split(':') @@ -789,10 +789,10 @@ class YoutubeSearchIE(InfoExtractor): try: n = int(prefix) if n <= 0: - self._downloader.to_stderr(u'ERROR: invalid download number %s for query "%s"' % (n, query)) + self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query)) return [None] elif n > self._max_youtube_results: - self._downloader.to_stderr(u'WARNING: ytsearch returns max %i results (you requested %i)' % (self._max_youtube_results, n)) + self._downloader.trouble(u'WARNING: ytsearch returns max %i results (you requested %i)' % (self._max_youtube_results, n)) n = self._max_youtube_results return self._download_n_results(query, n) except ValueError: # parsing prefix as int fails @@ -812,7 +812,7 @@ class YoutubeSearchIE(InfoExtractor): try: page = urllib2.urlopen(request).read() except (urllib2.URLError, httplib.HTTPException, socket.error), err: - self._downloader.to_stderr(u'ERROR: unable to download webpage: %s' % str(err)) + self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err)) return [None] # Extract video identifiers @@ -864,7 +864,7 @@ class YoutubePlaylistIE(InfoExtractor): # Extract playlist id mobj = re.match(self._VALID_URL, url) if mobj is None: - self._downloader.to_stderr(u'ERROR: invalid url: %s' % url) + self._downloader.trouble(u'ERROR: invalid url: %s' % url) return [None] # Download playlist pages @@ -878,7 +878,7 @@ class YoutubePlaylistIE(InfoExtractor): try: page = urllib2.urlopen(request).read() except (urllib2.URLError, httplib.HTTPException, socket.error), err: - self._downloader.to_stderr(u'ERROR: unable to download webpage: %s' % str(err)) + self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err)) return [None] # Extract video identifiers From 6f21f686293ff94889978c1f39256029d2409d1b Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Thu, 23 Apr 2009 22:20:06 +0200 Subject: [PATCH 085/455] Download videos after extracting information This is achieved by letting the InfoExtractors instruct its downloader to process the information dictionary just after extracting the information. As a consequence, some code is simplified too. --- youtube-dl | 99 ++++++++++++++++++++++++------------------------------ 1 file changed, 44 insertions(+), 55 deletions(-) diff --git a/youtube-dl b/youtube-dl index 034da35f5..545bcb653 100755 --- a/youtube-dl +++ b/youtube-dl @@ -301,21 +301,8 @@ class FileDownloader(object): # Suitable InfoExtractor found suitable_found = True - # Extract information from URL - all_results = ie.extract(url) - results = [x for x in all_results if x is not None] - - # See if there were problems extracting any information - if len(results) != len(all_results): - self.trouble() - - # Two results could go to the same file - if len(results) > 1 and self.fixed_template(): - raise SameFileError(self.params['outtmpl']) - - # Process each result - for result in results: - self.process_info(result) + # Extract information from URL and process it + ie.extract(url) # Suitable InfoExtractor had been found; go to next URL break @@ -508,7 +495,7 @@ class YoutubeIE(InfoExtractor): else: raise netrc.NetrcParseError('No authenticators for %s' % self._NETRC_MACHINE) except (IOError, netrc.NetrcParseError), err: - self._downloader.trouble(u'WARNING: parsing .netrc: %s' % str(err)) + self._downloader.to_stderr(u'WARNING: parsing .netrc: %s' % str(err)) return # Set language @@ -517,7 +504,7 @@ class YoutubeIE(InfoExtractor): self.report_lang() urllib2.urlopen(request).read() except (urllib2.URLError, httplib.HTTPException, socket.error), err: - self._downloader.trouble(u'WARNING: unable to set language: %s' % str(err)) + self._downloader.to_stderr(u'WARNING: unable to set language: %s' % str(err)) return # No authentication to be performed @@ -537,10 +524,10 @@ class YoutubeIE(InfoExtractor): self.report_login() login_results = urllib2.urlopen(request).read() if re.search(r'(?i)]* name="loginForm"', login_results) is not None: - self._downloader.trouble(u'WARNING: unable to log in: bad username or password') + self._downloader.to_stderr(u'WARNING: unable to log in: bad username or password') return except (urllib2.URLError, httplib.HTTPException, socket.error), err: - self._downloader.trouble(u'WARNING: unable to log in: %s' % str(err)) + self._downloader.to_stderr(u'WARNING: unable to log in: %s' % str(err)) return # Confirm age @@ -561,7 +548,7 @@ class YoutubeIE(InfoExtractor): mobj = re.match(self._VALID_URL, url) if mobj is None: self._downloader.trouble(u'ERROR: invalid URL: %s' % url) - return [None] + return video_id = mobj.group(2) # Downloader parameters @@ -587,14 +574,14 @@ class YoutubeIE(InfoExtractor): video_webpage = urllib2.urlopen(request).read() except (urllib2.URLError, httplib.HTTPException, socket.error), err: self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err)) - return [None] + return self.report_information_extraction(video_id) # "t" param mobj = re.search(r', "t": "([^"]+)"', video_webpage) if mobj is None: self._downloader.trouble(u'ERROR: unable to extract "t" parameter') - return [None] + return video_real_url = 'http://www.youtube.com/get_video?video_id=%s&t=%s&el=detailpage&ps=' % (video_id, mobj.group(1)) if format_param is not None: video_real_url = '%s&fmt=%s' % (video_real_url, format_param) @@ -604,14 +591,14 @@ class YoutubeIE(InfoExtractor): mobj = re.search(r"var watchUsername = '([^']+)';", video_webpage) if mobj is None: self._downloader.trouble(u'ERROR: unable to extract uploader nickname') - return [None] + return video_uploader = mobj.group(1) # title mobj = re.search(r'(?im)YouTube - ([^<]*)', video_webpage) if mobj is None: self._downloader.trouble(u'ERROR: unable to extract video title') - return [None] + return video_title = mobj.group(1).decode('utf-8') video_title = re.sub(ur'(?u)&(.+?);', self.htmlentity_transform, video_title) video_title = video_title.replace(os.sep, u'%') @@ -621,14 +608,14 @@ class YoutubeIE(InfoExtractor): simple_title = simple_title.strip(ur'_') # Process video information - return [{ + self._downloader.process_info({ 'id': video_id.decode('utf-8'), 'url': video_real_url.decode('utf-8'), 'uploader': video_uploader.decode('utf-8'), 'title': video_title, 'stitle': simple_title, 'ext': video_extension.decode('utf-8'), - }] + }) class MetacafeIE(InfoExtractor): """Information Extractor for metacafe.com.""" @@ -689,14 +676,15 @@ class MetacafeIE(InfoExtractor): mobj = re.match(self._VALID_URL, url) if mobj is None: self._downloader.trouble(u'ERROR: invalid URL: %s' % url) - return [None] + return video_id = mobj.group(1) # Check if video comes from YouTube mobj2 = re.match(r'^yt-(.*)$', video_id) if mobj2 is not None: - return self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % mobj2.group(1)) + self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % mobj2.group(1)) + return simple_title = mobj.group(2).decode('utf-8') video_extension = 'flv' @@ -708,20 +696,20 @@ class MetacafeIE(InfoExtractor): webpage = urllib2.urlopen(request).read() except (urllib2.URLError, httplib.HTTPException, socket.error), err: self._downloader.trouble(u'ERROR: unable retrieve video webpage: %s' % str(err)) - return [None] + return # Extract URL, uploader and title from webpage self.report_extraction(video_id) mobj = re.search(r'(?m)"mediaURL":"(http.*?\.flv)"', webpage) if mobj is None: self._downloader.trouble(u'ERROR: unable to extract media URL') - return [None] + return mediaURL = mobj.group(1).replace('\\', '') mobj = re.search(r'(?m)"gdaKey":"(.*?)"', webpage) if mobj is None: self._downloader.trouble(u'ERROR: unable to extract gdaKey') - return [None] + return gdaKey = mobj.group(1) video_url = '%s?__gda__=%s' % (mediaURL, gdaKey) @@ -729,24 +717,24 @@ class MetacafeIE(InfoExtractor): mobj = re.search(r'(?im)(.*) - Video', webpage) if mobj is None: self._downloader.trouble(u'ERROR: unable to extract title') - return [None] + return video_title = mobj.group(1).decode('utf-8') mobj = re.search(r'(?m)
  • .*?Submitter:
    (.*?)
  • ', webpage) if mobj is None: self._downloader.trouble(u'ERROR: unable to extract uploader nickname') - return [None] + return video_uploader = re.sub(r'<.*?>', '', mobj.group(1)) - # Return information - return [{ + # Process video information + self._downloader.process_info({ 'id': video_id.decode('utf-8'), 'url': video_url.decode('utf-8'), 'uploader': video_uploader.decode('utf-8'), 'title': video_title, 'stitle': simple_title, 'ext': video_extension.decode('utf-8'), - }] + }) class YoutubeSearchIE(InfoExtractor): @@ -777,26 +765,30 @@ class YoutubeSearchIE(InfoExtractor): mobj = re.match(self._VALID_QUERY, query) if mobj is None: self._downloader.trouble(u'ERROR: invalid search query "%s"' % query) - return [None] + return prefix, query = query.split(':') prefix = prefix[8:] if prefix == '': - return self._download_n_results(query, 1) + self._download_n_results(query, 1) + return elif prefix == 'all': - return self._download_n_results(query, self._max_youtube_results) + self._download_n_results(query, self._max_youtube_results) + return else: try: n = int(prefix) if n <= 0: self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query)) - return [None] + return elif n > self._max_youtube_results: - self._downloader.trouble(u'WARNING: ytsearch returns max %i results (you requested %i)' % (self._max_youtube_results, n)) + self._downloader.to_stderr(u'WARNING: ytsearch returns max %i results (you requested %i)' % (self._max_youtube_results, n)) n = self._max_youtube_results - return self._download_n_results(query, n) + self._download_n_results(query, n) + return except ValueError: # parsing prefix as int fails - return self._download_n_results(query, 1) + self._download_n_results(query, 1) + return def _download_n_results(self, query, n): """Downloads a specified number of results for a query""" @@ -813,7 +805,7 @@ class YoutubeSearchIE(InfoExtractor): page = urllib2.urlopen(request).read() except (urllib2.URLError, httplib.HTTPException, socket.error), err: self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err)) - return [None] + return # Extract video identifiers for mobj in re.finditer(self._VIDEO_INDICATOR, page): @@ -823,16 +815,14 @@ class YoutubeSearchIE(InfoExtractor): already_seen.add(video_id) if len(video_ids) == n: # Specified n videos reached - information = [] for id in video_ids: - information.extend(self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)) - return information + self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id) + return if self._MORE_PAGES_INDICATOR not in page: - information = [] for id in video_ids: - information.extend(self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)) - return information + self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id) + return pagenum = pagenum + 1 @@ -865,7 +855,7 @@ class YoutubePlaylistIE(InfoExtractor): mobj = re.match(self._VALID_URL, url) if mobj is None: self._downloader.trouble(u'ERROR: invalid url: %s' % url) - return [None] + return # Download playlist pages playlist_id = mobj.group(1) @@ -879,7 +869,7 @@ class YoutubePlaylistIE(InfoExtractor): page = urllib2.urlopen(request).read() except (urllib2.URLError, httplib.HTTPException, socket.error), err: self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err)) - return [None] + return # Extract video identifiers ids_in_page = [] @@ -892,10 +882,9 @@ class YoutubePlaylistIE(InfoExtractor): break pagenum = pagenum + 1 - information = [] for id in video_ids: - information.extend(self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)) - return information + self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id) + return class PostProcessor(object): """Post Processor class. From 2851b2ca18be6a34300075a94003b14865b53611 Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Thu, 23 Apr 2009 22:34:58 +0200 Subject: [PATCH 086/455] Update internal documentation to reflect the new behaviour --- youtube-dl | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/youtube-dl b/youtube-dl index 545bcb653..0ec7adb1d 100755 --- a/youtube-dl +++ b/youtube-dl @@ -65,9 +65,10 @@ class FileDownloader(object): For this, file downloader objects have a method that allows InfoExtractors to be registered in a given order. When it is passed a URL, the file downloader handles it to the first InfoExtractor it - finds that reports being able to handle it. The InfoExtractor returns - all the information to the FileDownloader and the latter downloads the - file or does whatever it's instructed to do. + finds that reports being able to handle it. The InfoExtractor extracts + all the information about the video or videos the URL refers to, and + asks the FileDownloader to process the video information, possibly + downloading the video. File downloaders accept a lot of parameters. In order not to saturate the object constructor with arguments, it receives a dictionary of @@ -360,9 +361,10 @@ class InfoExtractor(object): Information extractors are the classes that, given a URL, extract information from the video (or videos) the URL refers to. This information includes the real video URL, the video title and simplified - title, author and others. It is returned in a list of dictionaries when - calling its extract() method. It is a list because a URL can refer to - more than one video (think of playlists). The dictionaries must include + title, author and others. The information is stored in a dictionary + which is then passed to the FileDownloader. The FileDownloader + processes this information possibly downloading the video to the file + system, among other possible outcomes. The dictionaries must include the following fields: id: Video identifier. From 98164eb3b9e90c6cd4711343449f64154e3e4079 Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Sat, 25 Apr 2009 11:11:11 +0200 Subject: [PATCH 087/455] Fix some minor unicode-related problems --- youtube-dl | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/youtube-dl b/youtube-dl index 0ec7adb1d..f276d9caa 100755 --- a/youtube-dl +++ b/youtube-dl @@ -190,7 +190,7 @@ class FileDownloader(object): def to_stdout(self, message, skip_eol=False): """Print message to stdout if not in quiet mode.""" if not self.params.get('quiet', False): - print u'%s%s' % (message, [u'\n', u''][skip_eol]), + print (u'%s%s' % (message, [u'\n', u''][skip_eol])).encode(locale.getpreferredencoding()), sys.stdout.flush() def to_stderr(self, message): @@ -244,9 +244,9 @@ class FileDownloader(object): """Process a single dictionary returned by an InfoExtractor.""" # Forced printings if self.params.get('forcetitle', False): - print info_dict['title'] + print info_dict['title'].encode(locale.getpreferredencoding()) if self.params.get('forceurl', False): - print info_dict['url'] + print info_dict['url'].encode(locale.getpreferredencoding()) # Do nothing else if in simulate mode if self.params.get('simulate', False): @@ -1027,9 +1027,6 @@ if __name__ == '__main__': youtube_search_ie = YoutubeSearchIE(youtube_ie) # File downloader - charset = locale.getpreferredencoding() - if charset is None: - charset = 'ascii' fd = FileDownloader({ 'usenetrc': opts.usenetrc, 'username': opts.username, @@ -1039,7 +1036,7 @@ if __name__ == '__main__': 'forcetitle': opts.gettitle, 'simulate': (opts.simulate or opts.geturl or opts.gettitle), 'format': opts.format, - 'outtmpl': ((opts.outtmpl is not None and opts.outtmpl.decode(charset)) + 'outtmpl': ((opts.outtmpl is not None and opts.outtmpl.decode(locale.getpreferredencoding())) or (opts.usetitle and u'%(stitle)s-%(id)s.%(ext)s') or (opts.useliteral and u'%(title)s-%(id)s.%(ext)s') or u'%(id)s.%(ext)s'), From dbccb6cd84347de647e8ea34da9ed27e4e664a39 Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Sat, 25 Apr 2009 11:52:33 +0200 Subject: [PATCH 088/455] Fix code for metacafe.com (this fixes issue #8) --- youtube-dl | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/youtube-dl b/youtube-dl index f276d9caa..8764acda0 100755 --- a/youtube-dl +++ b/youtube-dl @@ -624,6 +624,7 @@ class MetacafeIE(InfoExtractor): _VALID_URL = r'(?:http://)?(?:www\.)?metacafe\.com/watch/([^/]+)/([^/]+)/.*' _DISCLAIMER = 'http://www.metacafe.com/family_filter/' + _FILTER_POST = 'http://www.metacafe.com/f/index.php?inputType=filter&controllerGroup=user' _youtube_ie = None def __init__(self, youtube_ie, downloader=None): @@ -665,7 +666,7 @@ class MetacafeIE(InfoExtractor): 'filters': '0', 'submit': "Continue - I'm over 18", } - request = urllib2.Request('http://www.metacafe.com/', urllib.urlencode(disclaimer_form), std_headers) + request = urllib2.Request(self._FILTER_POST, urllib.urlencode(disclaimer_form), std_headers) try: self.report_age_confirmation() disclaimer = urllib2.urlopen(request).read() @@ -702,13 +703,13 @@ class MetacafeIE(InfoExtractor): # Extract URL, uploader and title from webpage self.report_extraction(video_id) - mobj = re.search(r'(?m)"mediaURL":"(http.*?\.flv)"', webpage) + mobj = re.search(r'(?m)&mediaURL=(http.*?\.flv)', webpage) if mobj is None: self._downloader.trouble(u'ERROR: unable to extract media URL') return - mediaURL = mobj.group(1).replace('\\', '') + mediaURL = urllib.unquote(mobj.group(1)) - mobj = re.search(r'(?m)"gdaKey":"(.*?)"', webpage) + mobj = re.search(r'(?m)&gdaKey=(.*?)&', webpage) if mobj is None: self._downloader.trouble(u'ERROR: unable to extract gdaKey') return @@ -722,11 +723,11 @@ class MetacafeIE(InfoExtractor): return video_title = mobj.group(1).decode('utf-8') - mobj = re.search(r'(?m)
  • .*?Submitter:
    (.*?)
  • ', webpage) + mobj = re.search(r'(?ms)
  • .*?Submitter:.*?(.*?)<', webpage) if mobj is None: self._downloader.trouble(u'ERROR: unable to extract uploader nickname') return - video_uploader = re.sub(r'<.*?>', '', mobj.group(1)) + video_uploader = mobj.group(1) # Process video information self._downloader.process_info({ From 27c3383e2d87257aaf25a08d1658e694ffc8c289 Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Sat, 25 Apr 2009 11:54:11 +0200 Subject: [PATCH 089/455] Set version number --- youtube-dl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube-dl b/youtube-dl index 8764acda0..a75af8af3 100755 --- a/youtube-dl +++ b/youtube-dl @@ -950,7 +950,7 @@ if __name__ == '__main__': # Parse command line parser = optparse.OptionParser( usage='Usage: %prog [options] url...', - version='INTERNAL', + version='2009.04.25', conflict_handler='resolve', ) parser.add_option('-h', '--help', From a825f0ca83e1bdb12fc6c88377defdfa28156f78 Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Sat, 25 Apr 2009 11:56:01 +0200 Subject: [PATCH 090/455] Revert version number to INTERNAL --- youtube-dl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube-dl b/youtube-dl index a75af8af3..8764acda0 100755 --- a/youtube-dl +++ b/youtube-dl @@ -950,7 +950,7 @@ if __name__ == '__main__': # Parse command line parser = optparse.OptionParser( usage='Usage: %prog [options] url...', - version='2009.04.25', + version='INTERNAL', conflict_handler='resolve', ) parser.add_option('-h', '--help', From b65740e4741e82e41336f68bca81fb39a2f67ffe Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Sat, 25 Apr 2009 13:30:50 +0200 Subject: [PATCH 091/455] Skip blank lines in batch file -- fixes issue #9 --- youtube-dl | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/youtube-dl b/youtube-dl index 8764acda0..e20e59bf8 100755 --- a/youtube-dl +++ b/youtube-dl @@ -997,7 +997,9 @@ if __name__ == '__main__': batchurls = [] if opts.batchfile is not None: try: - batchurls = [line.strip() for line in open(opts.batchfile, 'r')] + batchurls = open(opts.batchfile, 'r').readlines() + batchurls = [x.strip() for x in batchurls] + batchurls = [x for x in batchurls if len(x) > 0] except IOError: sys.exit(u'ERROR: batch file could not be read') all_urls = batchurls + args From 2f11508adadc3a2bc5a7c0549071d114ae109a00 Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Sat, 25 Apr 2009 14:33:52 +0200 Subject: [PATCH 092/455] Minor documentation change --- youtube-dl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube-dl b/youtube-dl index e20e59bf8..496ae036f 100755 --- a/youtube-dl +++ b/youtube-dl @@ -919,7 +919,7 @@ class PostProcessor(object): """Run the PostProcessor. The "information" argument is a dictionary like the ones - returned by InfoExtractors. The only difference is that this + composed by InfoExtractors. The only difference is that this one has an extra field called "filepath" that points to the downloaded file. From 8d2c83eda50f07447ea93c73e17b9c5330106fbc Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Sun, 26 Apr 2009 22:01:50 +0200 Subject: [PATCH 093/455] Update and correct (format,extension) table for YouTube --- youtube-dl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube-dl b/youtube-dl index 496ae036f..9fe58a354 100755 --- a/youtube-dl +++ b/youtube-dl @@ -561,7 +561,8 @@ class YoutubeIE(InfoExtractor): # Extension video_extension = { - '17': '3gp', + '13': '3gp', + '17': 'mp4', '18': 'mp4', '22': 'mp4', }.get(format_param, 'flv') From 7b7759f5a448de7dfda56a804a9124e674ec6765 Mon Sep 17 00:00:00 2001 From: dannycolligan Date: Mon, 27 Apr 2009 22:30:20 -0700 Subject: [PATCH 094/455] Added -b option and created option groups for help prompt --- youtube-dl | 240 +++++++++++++++++++++++++++++++++-------------------- 1 file changed, 148 insertions(+), 92 deletions(-) diff --git a/youtube-dl b/youtube-dl index 9fe58a354..d3acd9d0a 100755 --- a/youtube-dl +++ b/youtube-dl @@ -52,6 +52,13 @@ class PostProcessingError(Exception): """ pass +class UnavailableFormatError(Exception): + """Unavailable Format exception. + + This exception will be thrown when a video is requested + in a format that is not available for that video. + """ + class FileDownloader(object): """File Downloader class. @@ -260,33 +267,38 @@ class FileDownloader(object): if self.params['nooverwrites'] and os.path.exists(filename): self.to_stderr('WARNING: file exists: %s; skipping' % filename) return + try: self.pmkdir(filename) except (OSError, IOError), err: self.trouble('ERROR: unable to create directories: %s' % str(err)) return + try: outstream = open(filename, 'wb') except (OSError, IOError), err: self.trouble('ERROR: unable to open for writing: %s' % str(err)) return + try: self._do_download(outstream, info_dict['url']) outstream.close() except (OSError, IOError), err: - self.trouble('ERROR: unable to write video data: %s' % str(err)) - return + if info_dict['best_quality']: + raise UnavailableFormatError + else: + self.trouble('ERROR: unable to write video data: %s' % str(err)) + return except (urllib2.URLError, httplib.HTTPException, socket.error), err: self.trouble('ERROR: unable to download video data: %s' % str(err)) return + try: self.post_process(filename, info_dict) except (PostProcessingError), err: self.trouble('ERROR: postprocessing: %s' % str(err)) return - return - def download(self, url_list): """Download a given list of URLs.""" if len(url_list) > 1 and self.fixed_template(): @@ -424,6 +436,13 @@ class YoutubeIE(InfoExtractor): _LOGIN_URL = 'http://www.youtube.com/signup?next=/&gl=US&hl=en' _AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en' _NETRC_MACHINE = 'youtube' + _available_formats = ['22', '18', '17', '13'] # listed in order of priority for -b flag + _video_extensions = { + '13': '3gp', + '17': 'mp4', + '18': 'mp4', + '22': 'mp4', + } @staticmethod def suitable(url): @@ -476,6 +495,10 @@ class YoutubeIE(InfoExtractor): """Report extracted video URL.""" self._downloader.to_stdout(u'[youtube] %s: URL: %s' % (video_id, video_real_url)) + def report_unavailable_format(self, video_id, format): + """Report extracted video URL.""" + self._downloader.to_stdout(u'[youtube] %s: Format %s not available' % (video_id, format)) + def _real_initialize(self): if self._downloader is None: return @@ -554,71 +577,87 @@ class YoutubeIE(InfoExtractor): video_id = mobj.group(2) # Downloader parameters + best_quality = False format_param = None + quality_index = 0 if self._downloader is not None: params = self._downloader.params format_param = params.get('format', None) + if format_param == '0': + format_param = self._available_formats[quality_index] + best_quality = True - # Extension - video_extension = { - '13': '3gp', - '17': 'mp4', - '18': 'mp4', - '22': 'mp4', - }.get(format_param, 'flv') + while True: + try: + # Extension + video_extension = self._video_extensions.get(format_param, 'flv') - # Normalize URL, including format - normalized_url = 'http://www.youtube.com/watch?v=%s&gl=US&hl=en' % video_id - if format_param is not None: - normalized_url = '%s&fmt=%s' % (normalized_url, format_param) - request = urllib2.Request(normalized_url, None, std_headers) - try: - self.report_webpage_download(video_id) - video_webpage = urllib2.urlopen(request).read() - except (urllib2.URLError, httplib.HTTPException, socket.error), err: - self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err)) - return - self.report_information_extraction(video_id) - - # "t" param - mobj = re.search(r', "t": "([^"]+)"', video_webpage) - if mobj is None: - self._downloader.trouble(u'ERROR: unable to extract "t" parameter') - return - video_real_url = 'http://www.youtube.com/get_video?video_id=%s&t=%s&el=detailpage&ps=' % (video_id, mobj.group(1)) - if format_param is not None: - video_real_url = '%s&fmt=%s' % (video_real_url, format_param) - self.report_video_url(video_id, video_real_url) + # Normalize URL, including format + normalized_url = 'http://www.youtube.com/watch?v=%s&gl=US&hl=en' % video_id + if format_param is not None: + normalized_url = '%s&fmt=%s' % (normalized_url, format_param) + request = urllib2.Request(normalized_url, None, std_headers) + try: + self.report_webpage_download(video_id) + video_webpage = urllib2.urlopen(request).read() + except (urllib2.URLError, httplib.HTTPException, socket.error), err: + self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err)) + return + self.report_information_extraction(video_id) + + # "t" param + mobj = re.search(r', "t": "([^"]+)"', video_webpage) + if mobj is None: + self._downloader.trouble(u'ERROR: unable to extract "t" parameter') + return + video_real_url = 'http://www.youtube.com/get_video?video_id=%s&t=%s&el=detailpage&ps=' % (video_id, mobj.group(1)) + if format_param is not None: + video_real_url = '%s&fmt=%s' % (video_real_url, format_param) + self.report_video_url(video_id, video_real_url) - # uploader - mobj = re.search(r"var watchUsername = '([^']+)';", video_webpage) - if mobj is None: - self._downloader.trouble(u'ERROR: unable to extract uploader nickname') - return - video_uploader = mobj.group(1) + # uploader + mobj = re.search(r"var watchUsername = '([^']+)';", video_webpage) + if mobj is None: + self._downloader.trouble(u'ERROR: unable to extract uploader nickname') + return + video_uploader = mobj.group(1) - # title - mobj = re.search(r'(?im)YouTube - ([^<]*)', video_webpage) - if mobj is None: - self._downloader.trouble(u'ERROR: unable to extract video title') - return - video_title = mobj.group(1).decode('utf-8') - video_title = re.sub(ur'(?u)&(.+?);', self.htmlentity_transform, video_title) - video_title = video_title.replace(os.sep, u'%') + # title + mobj = re.search(r'(?im)YouTube - ([^<]*)', video_webpage) + if mobj is None: + self._downloader.trouble(u'ERROR: unable to extract video title') + return + video_title = mobj.group(1).decode('utf-8') + video_title = re.sub(ur'(?u)&(.+?);', self.htmlentity_transform, video_title) + video_title = video_title.replace(os.sep, u'%') - # simplified title - simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title) - simple_title = simple_title.strip(ur'_') + # simplified title + simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title) + simple_title = simple_title.strip(ur'_') - # Process video information - self._downloader.process_info({ - 'id': video_id.decode('utf-8'), - 'url': video_real_url.decode('utf-8'), - 'uploader': video_uploader.decode('utf-8'), - 'title': video_title, - 'stitle': simple_title, - 'ext': video_extension.decode('utf-8'), - }) + # Process video information + self._downloader.process_info({ + 'id': video_id.decode('utf-8'), + 'url': video_real_url.decode('utf-8'), + 'uploader': video_uploader.decode('utf-8'), + 'title': video_title, + 'stitle': simple_title, + 'ext': video_extension.decode('utf-8'), + 'best_quality': best_quality, + }) + + return + + except UnavailableFormatError: + if quality_index == len(self._available_formats) - 1: + # I don't ever expect this to happen + self._downloader.trouble(u'ERROR: no known formats available for video') + return + else: + self.report_unavailable_format(video_id, format_param) + quality_index += 1 + format_param = self._available_formats[quality_index] + continue class MetacafeIE(InfoExtractor): """Information Extractor for metacafe.com.""" @@ -738,7 +777,8 @@ class MetacafeIE(InfoExtractor): 'title': video_title, 'stitle': simple_title, 'ext': video_extension.decode('utf-8'), - }) + 'best_quality': False, # TODO + }) class YoutubeSearchIE(InfoExtractor): @@ -950,48 +990,64 @@ if __name__ == '__main__': # Parse command line parser = optparse.OptionParser( - usage='Usage: %prog [options] url...', - version='INTERNAL', - conflict_handler='resolve', - ) + usage='Usage: %prog [options] url...', + version='INTERNAL', + conflict_handler='resolve', + ) + parser.add_option('-h', '--help', action='help', help='print this help text and exit') parser.add_option('-v', '--version', action='version', help='print program version and exit') - parser.add_option('-u', '--username', - dest='username', metavar='UN', help='account username') - parser.add_option('-p', '--password', - dest='password', metavar='PW', help='account password') - parser.add_option('-o', '--output', - dest='outtmpl', metavar='TPL', help='output filename template') - parser.add_option('-q', '--quiet', - action='store_true', dest='quiet', help='activates quiet mode', default=False) - parser.add_option('-s', '--simulate', - action='store_true', dest='simulate', help='do not download video', default=False) - parser.add_option('-t', '--title', - action='store_true', dest='usetitle', help='use title in file name', default=False) - parser.add_option('-l', '--literal', - action='store_true', dest='useliteral', help='use literal title in file name', default=False) - parser.add_option('-n', '--netrc', - action='store_true', dest='usenetrc', help='use .netrc authentication data', default=False) - parser.add_option('-g', '--get-url', - action='store_true', dest='geturl', help='simulate, quiet but print URL', default=False) - parser.add_option('-e', '--get-title', - action='store_true', dest='gettitle', help='simulate, quiet but print title', default=False) - parser.add_option('-f', '--format', - dest='format', metavar='FMT', help='video format code') - parser.add_option('-m', '--mobile-version', - action='store_const', dest='format', help='alias for -f 17', const='17') - parser.add_option('-d', '--high-def', - action='store_const', dest='format', help='alias for -f 22', const='22') parser.add_option('-i', '--ignore-errors', action='store_true', dest='ignoreerrors', help='continue on download errors', default=False) parser.add_option('-r', '--rate-limit', dest='ratelimit', metavar='L', help='download rate limit (e.g. 50k or 44.6m)') - parser.add_option('-a', '--batch-file', + + authentication = optparse.OptionGroup(parser, 'Authentication Options') + authentication.add_option('-u', '--username', + dest='username', metavar='UN', help='account username') + authentication.add_option('-p', '--password', + dest='password', metavar='PW', help='account password') + authentication.add_option('-n', '--netrc', + action='store_true', dest='usenetrc', help='use .netrc authentication data', default=False) + parser.add_option_group(authentication) + + video_format = optparse.OptionGroup(parser, 'Video Format Options') + video_format.add_option('-f', '--format', + dest='format', metavar='FMT', help='video format code') + video_format.add_option('-b', '--best-quality', + action='store_const', dest='format', help='download the best quality video possible', const='0') + video_format.add_option('-m', '--mobile-version', + action='store_const', dest='format', help='alias for -f 17', const='17') + video_format.add_option('-d', '--high-def', + action='store_const', dest='format', help='alias for -f 22', const='22') + parser.add_option_group(video_format) + + verbosity = optparse.OptionGroup(parser, 'Verbosity / Simulation Options') + verbosity.add_option('-q', '--quiet', + action='store_true', dest='quiet', help='activates quiet mode', default=False) + verbosity.add_option('-s', '--simulate', + action='store_true', dest='simulate', help='do not download video', default=False) + verbosity.add_option('-g', '--get-url', + action='store_true', dest='geturl', help='simulate, quiet but print URL', default=False) + verbosity.add_option('-e', '--get-title', + action='store_true', dest='gettitle', help='simulate, quiet but print title', default=False) + verbosity.add_option('-t', '--title', + action='store_true', dest='usetitle', help='use title in file name', default=False) + verbosity.add_option('-l', '--literal', + action='store_true', dest='useliteral', help='use literal title in file name', default=False) + parser.add_option_group(verbosity) + + filesystem = optparse.OptionGroup(parser, 'Filesystem Options') + filesystem.add_option('-o', '--output', + dest='outtmpl', metavar='TPL', help='output filename template') + filesystem.add_option('-a', '--batch-file', dest='batchfile', metavar='F', help='file containing URLs to download') - parser.add_option('-w', '--no-overwrites', + filesystem.add_option('-w', '--no-overwrites', action='store_true', dest='nooverwrites', help='do not overwrite files', default=False) + parser.add_option_group(filesystem) + (opts, args) = parser.parse_args() # Batch file verification From 2740c509b3c19fe8589b6aa6f746005aa12a7f82 Mon Sep 17 00:00:00 2001 From: dannycolligan Date: Tue, 28 Apr 2009 12:35:25 -0700 Subject: [PATCH 095/455] Fixed ambiguity of multiple video option specifiers by dissalowing it; changed some sys.ext calls to parser.error --- youtube-dl | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/youtube-dl b/youtube-dl index d3acd9d0a..8143de35e 100755 --- a/youtube-dl +++ b/youtube-dl @@ -1015,13 +1015,13 @@ if __name__ == '__main__': video_format = optparse.OptionGroup(parser, 'Video Format Options') video_format.add_option('-f', '--format', - dest='format', metavar='FMT', help='video format code') + action='append', dest='format', metavar='FMT', help='video format code') video_format.add_option('-b', '--best-quality', - action='store_const', dest='format', help='download the best quality video possible', const='0') + action='append_const', dest='format', help='download the best quality video possible', const='0') video_format.add_option('-m', '--mobile-version', - action='store_const', dest='format', help='alias for -f 17', const='17') + action='append_const', dest='format', help='alias for -f 17', const='17') video_format.add_option('-d', '--high-def', - action='store_const', dest='format', help='alias for -f 22', const='22') + action='append_const', dest='format', help='alias for -f 22', const='22') parser.add_option_group(video_format) verbosity = optparse.OptionGroup(parser, 'Verbosity / Simulation Options') @@ -1063,22 +1063,24 @@ if __name__ == '__main__': # Conflicting, missing and erroneous options if len(all_urls) < 1: - sys.exit(u'ERROR: you must provide at least one URL') + parser.error(u'you must provide at least one URL') if opts.usenetrc and (opts.username is not None or opts.password is not None): - sys.exit(u'ERROR: using .netrc conflicts with giving username/password') + parser.error(u'using .netrc conflicts with giving username/password') if opts.password is not None and opts.username is None: - sys.exit(u'ERROR: account username missing') + parser.error(u'account username missing') if opts.outtmpl is not None and (opts.useliteral or opts.usetitle): - sys.exit(u'ERROR: using output template conflicts with using title or literal title') + parser.error(u'using output template conflicts with using title or literal title') if opts.usetitle and opts.useliteral: - sys.exit(u'ERROR: using title conflicts with using literal title') + parser.error(u'using title conflicts with using literal title') if opts.username is not None and opts.password is None: opts.password = getpass.getpass(u'Type account password and press return:') if opts.ratelimit is not None: numeric_limit = FileDownloader.parse_bytes(opts.ratelimit) if numeric_limit is None: - sys.exit(u'ERROR: invalid rate limit specified') + parser.error(u'invalid rate limit specified') opts.ratelimit = numeric_limit + if len(opts.format) > 1: + parser.error(u'pass at most one of the video format option flags (-f, -b, -m, -d)') # Information extractors youtube_ie = YoutubeIE() @@ -1095,7 +1097,7 @@ if __name__ == '__main__': 'forceurl': opts.geturl, 'forcetitle': opts.gettitle, 'simulate': (opts.simulate or opts.geturl or opts.gettitle), - 'format': opts.format, + 'format': opts.format[0], 'outtmpl': ((opts.outtmpl is not None and opts.outtmpl.decode(locale.getpreferredencoding())) or (opts.usetitle and u'%(stitle)s-%(id)s.%(ext)s') or (opts.useliteral and u'%(title)s-%(id)s.%(ext)s') From 42bcd27d3bc30c12218630c68af364190f819cc2 Mon Sep 17 00:00:00 2001 From: dannycolligan Date: Tue, 28 Apr 2009 14:39:23 -0700 Subject: [PATCH 096/455] Some consistency changes and ghost-file bugfix after discussion with rg3 --- youtube-dl | 62 ++++++++++++++++++++++++++++++------------------------ 1 file changed, 35 insertions(+), 27 deletions(-) diff --git a/youtube-dl b/youtube-dl index 8143de35e..4acdecd79 100755 --- a/youtube-dl +++ b/youtube-dl @@ -284,11 +284,8 @@ class FileDownloader(object): self._do_download(outstream, info_dict['url']) outstream.close() except (OSError, IOError), err: - if info_dict['best_quality']: - raise UnavailableFormatError - else: - self.trouble('ERROR: unable to write video data: %s' % str(err)) - return + os.remove(filename) + raise UnavailableFormatError except (urllib2.URLError, httplib.HTTPException, socket.error), err: self.trouble('ERROR: unable to download video data: %s' % str(err)) return @@ -643,21 +640,25 @@ class YoutubeIE(InfoExtractor): 'title': video_title, 'stitle': simple_title, 'ext': video_extension.decode('utf-8'), - 'best_quality': best_quality, }) return - except UnavailableFormatError: - if quality_index == len(self._available_formats) - 1: - # I don't ever expect this to happen - self._downloader.trouble(u'ERROR: no known formats available for video') + except UnavailableFormatError, err: + if best_quality: + if quality_index == len(self._available_formats) - 1: + # I don't ever expect this to happen + self._downloader.trouble(u'ERROR: no known formats available for video') + return + else: + self.report_unavailable_format(video_id, format_param) + quality_index += 1 + format_param = self._available_formats[quality_index] + continue + else: + self._downloader.trouble('ERROR: format not available for video') return - else: - self.report_unavailable_format(video_id, format_param) - quality_index += 1 - format_param = self._available_formats[quality_index] - continue + class MetacafeIE(InfoExtractor): """Information Extractor for metacafe.com.""" @@ -769,16 +770,18 @@ class MetacafeIE(InfoExtractor): return video_uploader = mobj.group(1) - # Process video information - self._downloader.process_info({ - 'id': video_id.decode('utf-8'), - 'url': video_url.decode('utf-8'), - 'uploader': video_uploader.decode('utf-8'), - 'title': video_title, - 'stitle': simple_title, - 'ext': video_extension.decode('utf-8'), - 'best_quality': False, # TODO - }) + try: + # Process video information + self._downloader.process_info({ + 'id': video_id.decode('utf-8'), + 'url': video_url.decode('utf-8'), + 'uploader': video_uploader.decode('utf-8'), + 'title': video_title, + 'stitle': simple_title, + 'ext': video_extension.decode('utf-8'), + }) + except UnavailableFormatError: + self._downloader.trouble(u'ERROR: format not available for video') class YoutubeSearchIE(InfoExtractor): @@ -1079,8 +1082,13 @@ if __name__ == '__main__': if numeric_limit is None: parser.error(u'invalid rate limit specified') opts.ratelimit = numeric_limit - if len(opts.format) > 1: + if opts.format is not None and len(opts.format) > 1: parser.error(u'pass at most one of the video format option flags (-f, -b, -m, -d)') + if opts.format is None: + real_format = None + else: + real_format = opts.format[0] + # Information extractors youtube_ie = YoutubeIE() @@ -1097,7 +1105,7 @@ if __name__ == '__main__': 'forceurl': opts.geturl, 'forcetitle': opts.gettitle, 'simulate': (opts.simulate or opts.geturl or opts.gettitle), - 'format': opts.format[0], + 'format': real_format, 'outtmpl': ((opts.outtmpl is not None and opts.outtmpl.decode(locale.getpreferredencoding())) or (opts.usetitle and u'%(stitle)s-%(id)s.%(ext)s') or (opts.useliteral and u'%(title)s-%(id)s.%(ext)s') From 1c76e23e0fca5e16a1fc4f0baca58503ba7ec639 Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Wed, 29 Apr 2009 19:32:40 +0200 Subject: [PATCH 097/455] Move the -t and -l options to the filesystem group --- youtube-dl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/youtube-dl b/youtube-dl index 4acdecd79..91ee099ce 100755 --- a/youtube-dl +++ b/youtube-dl @@ -1036,13 +1036,13 @@ if __name__ == '__main__': action='store_true', dest='geturl', help='simulate, quiet but print URL', default=False) verbosity.add_option('-e', '--get-title', action='store_true', dest='gettitle', help='simulate, quiet but print title', default=False) - verbosity.add_option('-t', '--title', - action='store_true', dest='usetitle', help='use title in file name', default=False) - verbosity.add_option('-l', '--literal', - action='store_true', dest='useliteral', help='use literal title in file name', default=False) parser.add_option_group(verbosity) filesystem = optparse.OptionGroup(parser, 'Filesystem Options') + filesystem.add_option('-t', '--title', + action='store_true', dest='usetitle', help='use title in file name', default=False) + filesystem.add_option('-l', '--literal', + action='store_true', dest='useliteral', help='use literal title in file name', default=False) filesystem.add_option('-o', '--output', dest='outtmpl', metavar='TPL', help='output filename template') filesystem.add_option('-a', '--batch-file', From d09744d0553650065195a212a826e4267bcfed7b Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Sat, 2 May 2009 14:40:29 +0200 Subject: [PATCH 098/455] Add format 35 (flv) as second best in quality --- youtube-dl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube-dl b/youtube-dl index 91ee099ce..2708fe4e7 100755 --- a/youtube-dl +++ b/youtube-dl @@ -433,7 +433,7 @@ class YoutubeIE(InfoExtractor): _LOGIN_URL = 'http://www.youtube.com/signup?next=/&gl=US&hl=en' _AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en' _NETRC_MACHINE = 'youtube' - _available_formats = ['22', '18', '17', '13'] # listed in order of priority for -b flag + _available_formats = ['22', '35', '18', '17', '13'] # listed in order of priority for -b flag _video_extensions = { '13': '3gp', '17': 'mp4', From ad274509aa1eba53d41f15e5226b81419991af64 Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Mon, 4 May 2009 19:31:00 +0200 Subject: [PATCH 099/455] Add an "epoch" keyword to the output template --- youtube-dl | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/youtube-dl b/youtube-dl index 2708fe4e7..db9510f7b 100755 --- a/youtube-dl +++ b/youtube-dl @@ -260,7 +260,9 @@ class FileDownloader(object): return try: - filename = self.params['outtmpl'] % info_dict + template_dict = dict(info_dict) + template_dict['epoch'] = unicode(long(time.time())) + filename = self.params['outtmpl'] % template_dict self.report_destination(filename) except (ValueError, KeyError), err: self.trouble('ERROR: invalid output template or system charset: %s' % str(err)) From 43f35682e9cf509459097e6fdebe2f10c95094f1 Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Mon, 11 May 2009 20:12:18 +0200 Subject: [PATCH 100/455] Put version number in place --- youtube-dl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube-dl b/youtube-dl index db9510f7b..08afab4f8 100755 --- a/youtube-dl +++ b/youtube-dl @@ -996,7 +996,7 @@ if __name__ == '__main__': # Parse command line parser = optparse.OptionParser( usage='Usage: %prog [options] url...', - version='INTERNAL', + version='2009.05.11', conflict_handler='resolve', ) From 0e543200095a6505ebedfb63be4317bba2c3506a Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Mon, 11 May 2009 20:13:44 +0200 Subject: [PATCH 101/455] Restore INTERNAL version number --- youtube-dl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube-dl b/youtube-dl index 08afab4f8..db9510f7b 100755 --- a/youtube-dl +++ b/youtube-dl @@ -996,7 +996,7 @@ if __name__ == '__main__': # Parse command line parser = optparse.OptionParser( usage='Usage: %prog [options] url...', - version='2009.05.11', + version='INTERNAL', conflict_handler='resolve', ) From b74c859d0ffe2250c7730f18f24e9515084e838a Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Wed, 13 May 2009 22:08:34 +0200 Subject: [PATCH 102/455] Use store_const instead of append_const as the latter requires Python 2.5 --- youtube-dl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/youtube-dl b/youtube-dl index db9510f7b..b2dc7952d 100755 --- a/youtube-dl +++ b/youtube-dl @@ -1022,11 +1022,11 @@ if __name__ == '__main__': video_format.add_option('-f', '--format', action='append', dest='format', metavar='FMT', help='video format code') video_format.add_option('-b', '--best-quality', - action='append_const', dest='format', help='download the best quality video possible', const='0') + action='store_const', dest='format', help='download the best quality video possible', const='0') video_format.add_option('-m', '--mobile-version', - action='append_const', dest='format', help='alias for -f 17', const='17') + action='store_const', dest='format', help='alias for -f 17', const='17') video_format.add_option('-d', '--high-def', - action='append_const', dest='format', help='alias for -f 22', const='22') + action='store_const', dest='format', help='alias for -f 22', const='22') parser.add_option_group(video_format) verbosity = optparse.OptionGroup(parser, 'Verbosity / Simulation Options') From 152edc0d4cbd4fa5b220e6dd3ed398a471c9bf82 Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Wed, 13 May 2009 22:09:20 +0200 Subject: [PATCH 103/455] Set version number --- youtube-dl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube-dl b/youtube-dl index b2dc7952d..bb982e22a 100755 --- a/youtube-dl +++ b/youtube-dl @@ -996,7 +996,7 @@ if __name__ == '__main__': # Parse command line parser = optparse.OptionParser( usage='Usage: %prog [options] url...', - version='INTERNAL', + version='2009.05.13', conflict_handler='resolve', ) From cab60d710afc4a00469d8b08a74d733802d2067b Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Wed, 13 May 2009 22:10:01 +0200 Subject: [PATCH 104/455] Put back INTERNAL version --- youtube-dl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube-dl b/youtube-dl index bb982e22a..b2dc7952d 100755 --- a/youtube-dl +++ b/youtube-dl @@ -996,7 +996,7 @@ if __name__ == '__main__': # Parse command line parser = optparse.OptionParser( usage='Usage: %prog [options] url...', - version='2009.05.13', + version='INTERNAL', conflict_handler='resolve', ) From 554f3e284c8740e0384e0389a2d7201d52b81d0e Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Sun, 17 May 2009 09:39:25 +0200 Subject: [PATCH 105/455] Add LATEST_VERSION to further ease checking which is the latest stable version --- LATEST_VERSION | 1 + 1 file changed, 1 insertion(+) create mode 100644 LATEST_VERSION diff --git a/LATEST_VERSION b/LATEST_VERSION new file mode 100644 index 000000000..aee3213d7 --- /dev/null +++ b/LATEST_VERSION @@ -0,0 +1 @@ +2009.05.13 From 097ba9472b8ac3fa832d73ec0d179640db9393b7 Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Sun, 17 May 2009 09:43:05 +0200 Subject: [PATCH 106/455] Remove .hgignore from version tracking --- .hgignore | 2 -- 1 file changed, 2 deletions(-) delete mode 100644 .hgignore diff --git a/.hgignore b/.hgignore deleted file mode 100644 index 986cf955b..000000000 --- a/.hgignore +++ /dev/null @@ -1,2 +0,0 @@ -syntax: glob -.*.swp From 488f6194718cf882344d113052bc85662a638f54 Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Thu, 21 May 2009 20:58:31 +0200 Subject: [PATCH 107/455] Close video file before removing it. --- youtube-dl | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube-dl b/youtube-dl index b2dc7952d..ec4c9b89c 100755 --- a/youtube-dl +++ b/youtube-dl @@ -286,6 +286,7 @@ class FileDownloader(object): self._do_download(outstream, info_dict['url']) outstream.close() except (OSError, IOError), err: + outstream.close() os.remove(filename) raise UnavailableFormatError except (urllib2.URLError, httplib.HTTPException, socket.error), err: From d69a1c9189311997ef7c311296e71b4c8fbdab41 Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Thu, 21 May 2009 20:59:02 +0200 Subject: [PATCH 108/455] Handle "content too short" errors properly --- youtube-dl | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/youtube-dl b/youtube-dl index ec4c9b89c..f6e472445 100755 --- a/youtube-dl +++ b/youtube-dl @@ -58,6 +58,22 @@ class UnavailableFormatError(Exception): This exception will be thrown when a video is requested in a format that is not available for that video. """ + pass + +class ContentTooShortError(Exception): + """Content Too Short exception. + + This exception may be raised by FileDownloader objects when a file they + download is too small for what the server announced first, indicating + the connection was probably interrupted. + """ + # Both in bytes + downloaded = None + expected = None + + def __init__(self, downloaded, expected): + self.downloaded = downloaded + self.expected = expected class FileDownloader(object): """File Downloader class. @@ -292,6 +308,9 @@ class FileDownloader(object): except (urllib2.URLError, httplib.HTTPException, socket.error), err: self.trouble('ERROR: unable to download video data: %s' % str(err)) return + except (ContentTooShortError, ), err: + self.trouble('ERROR: content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded)) + return try: self.post_process(filename, info_dict) @@ -365,7 +384,7 @@ class FileDownloader(object): self.report_finish() if data_len is not None and str(byte_counter) != data_len: - raise ValueError('Content too short: %s/%s bytes' % (byte_counter, data_len)) + raise ContentTooShortError(byte_counter, long(data_len)) class InfoExtractor(object): """Information Extractor class. From 1d50e3d153af01ca6dd825c6e1c94df93bc3bcbe Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Sat, 23 May 2009 00:19:17 +0200 Subject: [PATCH 109/455] Bump version number --- youtube-dl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube-dl b/youtube-dl index f6e472445..4831c11bf 100755 --- a/youtube-dl +++ b/youtube-dl @@ -1016,7 +1016,7 @@ if __name__ == '__main__': # Parse command line parser = optparse.OptionParser( usage='Usage: %prog [options] url...', - version='INTERNAL', + version='2009.05.23', conflict_handler='resolve', ) From 705804f5d113e0e326f9b426b99ac8996aef9990 Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Sat, 23 May 2009 00:20:08 +0200 Subject: [PATCH 110/455] Update version number in LATEST_VERSION --- LATEST_VERSION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/LATEST_VERSION b/LATEST_VERSION index aee3213d7..bdcb472bf 100644 --- a/LATEST_VERSION +++ b/LATEST_VERSION @@ -1 +1 @@ -2009.05.13 +2009.05.23 From 781daeabdb6ffa2b63bf8f7dec715ac8835c558b Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Sat, 23 May 2009 00:22:50 +0200 Subject: [PATCH 111/455] Restore "INTERNAL" version number --- youtube-dl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube-dl b/youtube-dl index 4831c11bf..f6e472445 100755 --- a/youtube-dl +++ b/youtube-dl @@ -1016,7 +1016,7 @@ if __name__ == '__main__': # Parse command line parser = optparse.OptionParser( usage='Usage: %prog [options] url...', - version='2009.05.23', + version='INTERNAL', conflict_handler='resolve', ) From cbfff4db630fef66847d326cdd70b9a6002c0d5b Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Sun, 24 May 2009 11:07:51 +0200 Subject: [PATCH 112/455] Verify URLs in simulate mode (fixes issue #22) --- youtube-dl | 26 ++++++++++++++++++++------ 1 file changed, 20 insertions(+), 6 deletions(-) diff --git a/youtube-dl b/youtube-dl index f6e472445..0aae90505 100755 --- a/youtube-dl +++ b/youtube-dl @@ -200,6 +200,14 @@ class FileDownloader(object): multiplier = 1024.0 ** 'bkmgtpezy'.index(matchobj.group(2).lower()) return long(round(number * multiplier)) + @staticmethod + def verify_url(url): + """Verify a URL is valid and data could be downloaded.""" + request = urllib2.Request(url, None, std_headers) + data = urllib2.urlopen(request) + data.read(1) + data.close() + def add_info_extractor(self, ie): """Add an InfoExtractor object to the end of the list.""" self._ies.append(ie) @@ -265,15 +273,21 @@ class FileDownloader(object): def process_info(self, info_dict): """Process a single dictionary returned by an InfoExtractor.""" - # Forced printings - if self.params.get('forcetitle', False): - print info_dict['title'].encode(locale.getpreferredencoding()) - if self.params.get('forceurl', False): - print info_dict['url'].encode(locale.getpreferredencoding()) - # Do nothing else if in simulate mode if self.params.get('simulate', False): + try: + self.verify_url(info_dict['url']) + except (OSError, IOError, urllib2.URLError, httplib.HTTPException, socket.error), err: + raise UnavailableFormatError + + # Forced printings + if self.params.get('forcetitle', False): + print info_dict['title'].encode(locale.getpreferredencoding()) + if self.params.get('forceurl', False): + print info_dict['url'].encode(locale.getpreferredencoding()) + return + try: template_dict = dict(info_dict) From 968aa884385ba78dc113a64b3dbbb1c5c6541cbd Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Sun, 24 May 2009 11:09:30 +0200 Subject: [PATCH 113/455] Only catch UnavailableFormatError in call to process_info --- youtube-dl | 85 +++++++++++++++++++++++++++--------------------------- 1 file changed, 42 insertions(+), 43 deletions(-) diff --git a/youtube-dl b/youtube-dl index 0aae90505..7532a142f 100755 --- a/youtube-dl +++ b/youtube-dl @@ -288,7 +288,6 @@ class FileDownloader(object): return - try: template_dict = dict(info_dict) template_dict['epoch'] = unicode(long(time.time())) @@ -621,53 +620,53 @@ class YoutubeIE(InfoExtractor): best_quality = True while True: + # Extension + video_extension = self._video_extensions.get(format_param, 'flv') + + # Normalize URL, including format + normalized_url = 'http://www.youtube.com/watch?v=%s&gl=US&hl=en' % video_id + if format_param is not None: + normalized_url = '%s&fmt=%s' % (normalized_url, format_param) + request = urllib2.Request(normalized_url, None, std_headers) try: - # Extension - video_extension = self._video_extensions.get(format_param, 'flv') + self.report_webpage_download(video_id) + video_webpage = urllib2.urlopen(request).read() + except (urllib2.URLError, httplib.HTTPException, socket.error), err: + self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err)) + return + self.report_information_extraction(video_id) + + # "t" param + mobj = re.search(r', "t": "([^"]+)"', video_webpage) + if mobj is None: + self._downloader.trouble(u'ERROR: unable to extract "t" parameter') + return + video_real_url = 'http://www.youtube.com/get_video?video_id=%s&t=%s&el=detailpage&ps=' % (video_id, mobj.group(1)) + if format_param is not None: + video_real_url = '%s&fmt=%s' % (video_real_url, format_param) + self.report_video_url(video_id, video_real_url) - # Normalize URL, including format - normalized_url = 'http://www.youtube.com/watch?v=%s&gl=US&hl=en' % video_id - if format_param is not None: - normalized_url = '%s&fmt=%s' % (normalized_url, format_param) - request = urllib2.Request(normalized_url, None, std_headers) - try: - self.report_webpage_download(video_id) - video_webpage = urllib2.urlopen(request).read() - except (urllib2.URLError, httplib.HTTPException, socket.error), err: - self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err)) - return - self.report_information_extraction(video_id) - - # "t" param - mobj = re.search(r', "t": "([^"]+)"', video_webpage) - if mobj is None: - self._downloader.trouble(u'ERROR: unable to extract "t" parameter') - return - video_real_url = 'http://www.youtube.com/get_video?video_id=%s&t=%s&el=detailpage&ps=' % (video_id, mobj.group(1)) - if format_param is not None: - video_real_url = '%s&fmt=%s' % (video_real_url, format_param) - self.report_video_url(video_id, video_real_url) + # uploader + mobj = re.search(r"var watchUsername = '([^']+)';", video_webpage) + if mobj is None: + self._downloader.trouble(u'ERROR: unable to extract uploader nickname') + return + video_uploader = mobj.group(1) - # uploader - mobj = re.search(r"var watchUsername = '([^']+)';", video_webpage) - if mobj is None: - self._downloader.trouble(u'ERROR: unable to extract uploader nickname') - return - video_uploader = mobj.group(1) + # title + mobj = re.search(r'(?im)YouTube - ([^<]*)', video_webpage) + if mobj is None: + self._downloader.trouble(u'ERROR: unable to extract video title') + return + video_title = mobj.group(1).decode('utf-8') + video_title = re.sub(ur'(?u)&(.+?);', self.htmlentity_transform, video_title) + video_title = video_title.replace(os.sep, u'%') - # title - mobj = re.search(r'(?im)YouTube - ([^<]*)', video_webpage) - if mobj is None: - self._downloader.trouble(u'ERROR: unable to extract video title') - return - video_title = mobj.group(1).decode('utf-8') - video_title = re.sub(ur'(?u)&(.+?);', self.htmlentity_transform, video_title) - video_title = video_title.replace(os.sep, u'%') - - # simplified title - simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title) - simple_title = simple_title.strip(ur'_') + # simplified title + simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title) + simple_title = simple_title.strip(ur'_') + try: # Process video information self._downloader.process_info({ 'id': video_id.decode('utf-8'), From 320becd6920421b07326c245a06b996adc2b4397 Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Mon, 25 May 2009 21:02:59 +0200 Subject: [PATCH 114/455] Remove trails from the "append_const" change (fixes issue #23) --- youtube-dl | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/youtube-dl b/youtube-dl index 7532a142f..5ead5e4c9 100755 --- a/youtube-dl +++ b/youtube-dl @@ -1117,13 +1117,6 @@ if __name__ == '__main__': if numeric_limit is None: parser.error(u'invalid rate limit specified') opts.ratelimit = numeric_limit - if opts.format is not None and len(opts.format) > 1: - parser.error(u'pass at most one of the video format option flags (-f, -b, -m, -d)') - if opts.format is None: - real_format = None - else: - real_format = opts.format[0] - # Information extractors youtube_ie = YoutubeIE() @@ -1140,7 +1133,7 @@ if __name__ == '__main__': 'forceurl': opts.geturl, 'forcetitle': opts.gettitle, 'simulate': (opts.simulate or opts.geturl or opts.gettitle), - 'format': real_format, + 'format': opts.format, 'outtmpl': ((opts.outtmpl is not None and opts.outtmpl.decode(locale.getpreferredencoding())) or (opts.usetitle and u'%(stitle)s-%(id)s.%(ext)s') or (opts.useliteral and u'%(title)s-%(id)s.%(ext)s') From 5745bfdcdccf053300eab4aca8c9959c01fc3b43 Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Mon, 25 May 2009 21:05:50 +0200 Subject: [PATCH 115/455] Bump version number --- LATEST_VERSION | 2 +- youtube-dl | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/LATEST_VERSION b/LATEST_VERSION index bdcb472bf..b457119fd 100644 --- a/LATEST_VERSION +++ b/LATEST_VERSION @@ -1 +1 @@ -2009.05.23 +2009.05.25 diff --git a/youtube-dl b/youtube-dl index 5ead5e4c9..b2920c5a6 100755 --- a/youtube-dl +++ b/youtube-dl @@ -1029,7 +1029,7 @@ if __name__ == '__main__': # Parse command line parser = optparse.OptionParser( usage='Usage: %prog [options] url...', - version='INTERNAL', + version='2009.05.25', conflict_handler='resolve', ) From eb5d184157af39ff65b6103ccea73749cb2e24c1 Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Mon, 25 May 2009 21:06:28 +0200 Subject: [PATCH 116/455] Restore INTERNAL version number --- youtube-dl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube-dl b/youtube-dl index b2920c5a6..5ead5e4c9 100755 --- a/youtube-dl +++ b/youtube-dl @@ -1029,7 +1029,7 @@ if __name__ == '__main__': # Parse command line parser = optparse.OptionParser( usage='Usage: %prog [options] url...', - version='2009.05.25', + version='INTERNAL', conflict_handler='resolve', ) From daa88ccc2e6ac9c184baa62ec411eb13180a00f8 Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Tue, 26 May 2009 21:26:09 +0200 Subject: [PATCH 117/455] Fix TypeError when using the -f option (fixes issue #24) --- youtube-dl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube-dl b/youtube-dl index 5ead5e4c9..9d84c3560 100755 --- a/youtube-dl +++ b/youtube-dl @@ -1053,7 +1053,7 @@ if __name__ == '__main__': video_format = optparse.OptionGroup(parser, 'Video Format Options') video_format.add_option('-f', '--format', - action='append', dest='format', metavar='FMT', help='video format code') + action='store', dest='format', metavar='FMT', help='video format code') video_format.add_option('-b', '--best-quality', action='store_const', dest='format', help='download the best quality video possible', const='0') video_format.add_option('-m', '--mobile-version', From f76c2df64e51b991c4a5a17f30bfa50be00d0e8c Mon Sep 17 00:00:00 2001 From: Paul Ivanov Date: Tue, 26 May 2009 14:06:21 -0700 Subject: [PATCH 118/455] Added -c option (--continue) interrupted downloads will properly resume and append to the previously downloaded data, instead of overwriting the file. There's some error checking - if the length of the file to be download matches the length of the previously downloaded data, we report that this file has already been downloaded and do nothing. If there is some other HTTP 416 'Requested range not satisfiable' error, we simply re-download the whole file (reverting to the original functionality) All other HTTP errors are simply raised. Resuming does not override -w (--nooverwrite), since it is not clear what should happen if file on disk is larger than file to be downloaded. Thus, -c does nothing if -w is present. --- youtube-dl | 27 +++++++++++++++++++++++++-- 1 file changed, 25 insertions(+), 2 deletions(-) diff --git a/youtube-dl b/youtube-dl index 9d84c3560..026073e48 100755 --- a/youtube-dl +++ b/youtube-dl @@ -306,7 +306,7 @@ class FileDownloader(object): return try: - outstream = open(filename, 'wb') + outstream = open(filename, 'ab') except (OSError, IOError), err: self.trouble('ERROR: unable to open for writing: %s' % str(err)) return @@ -368,7 +368,27 @@ class FileDownloader(object): def _do_download(self, stream, url): request = urllib2.Request(url, None, std_headers) - data = urllib2.urlopen(request) + # Resume transfer if filesize is non-zero + resume_len = stream.tell() + if self.params["continue"] and resume_len != 0: + print "[download] Resuming download at byte %d" % resume_len + request.add_header("Range","bytes=%d-" % resume_len) + else: + stream.close() + stream = open(stream.name,'wb') + try: + data = urllib2.urlopen(request) + except urllib2.HTTPError, e: + if not e.code == 416: # 416 is 'Requested range not satisfiable' + raise + data = urllib2.urlopen(url) + if int(data.info()['Content-Length']) == resume_len: + print '[download] %s has already been downloaded' % stream.name + return + else: + print "[download] Unable to resume, restarting download from the beginning" + stream.close() + stream = open(stream.name,'wb') data_len = data.info().get('Content-length', None) data_len_str = self.format_bytes(data_len) byte_counter = 0 @@ -1084,6 +1104,8 @@ if __name__ == '__main__': dest='batchfile', metavar='F', help='file containing URLs to download') filesystem.add_option('-w', '--no-overwrites', action='store_true', dest='nooverwrites', help='do not overwrite files', default=False) + filesystem.add_option('-c', '--continue', + action='store_true', dest='continue_dl', help='resume partially downloaded files', default=False) parser.add_option_group(filesystem) (opts, args) = parser.parse_args() @@ -1141,6 +1163,7 @@ if __name__ == '__main__': 'ignoreerrors': opts.ignoreerrors, 'ratelimit': opts.ratelimit, 'nooverwrites': opts.nooverwrites, + 'continue': opts.continue_dl, }) fd.add_info_extractor(youtube_search_ie) fd.add_info_extractor(youtube_pl_ie) From 7db85b2c70eb933e7b52eebaf2d5401abd502f6b Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Wed, 27 May 2009 22:50:18 +0200 Subject: [PATCH 119/455] Tweaks to ivanov's code --- youtube-dl | 32 ++++++++++++++++++++++++-------- 1 file changed, 24 insertions(+), 8 deletions(-) diff --git a/youtube-dl b/youtube-dl index 026073e48..591b7fc33 100755 --- a/youtube-dl +++ b/youtube-dl @@ -266,6 +266,18 @@ class FileDownloader(object): """Report download progress.""" self.to_stdout(u'\r[download] %s of %s at %s ETA %s' % (percent_str, data_len_str, speed_str, eta_str), skip_eol=True) + + def report_resuming_byte(self, resume_len): + """Report attemtp to resume at given byte.""" + self.to_stdout(u'[download] Resuming download at byte %s' % resume_len) + + def report_file_already_downloaded(self, file_name): + """Report file has already been fully downloaded.""" + self.to_stdout(u'[download] %s has already been downloaded' % file_name) + + def report_unable_to_resume(self): + """Report it was impossible to resume download.""" + self.to_stdout(u'[download] Unable to resume') def report_finish(self): """Report download finished.""" @@ -367,12 +379,14 @@ class FileDownloader(object): break def _do_download(self, stream, url): + basic_request = urllib2.Request(url, None, std_headers) request = urllib2.Request(url, None, std_headers) + # Resume transfer if filesize is non-zero resume_len = stream.tell() - if self.params["continue"] and resume_len != 0: - print "[download] Resuming download at byte %d" % resume_len - request.add_header("Range","bytes=%d-" % resume_len) + if self.params['continuedl'] and resume_len != 0: + self.report_resuming_byte(resume_len) + request.add_header('Range','bytes=%d-' % resume_len) else: stream.close() stream = open(stream.name,'wb') @@ -381,14 +395,16 @@ class FileDownloader(object): except urllib2.HTTPError, e: if not e.code == 416: # 416 is 'Requested range not satisfiable' raise - data = urllib2.urlopen(url) - if int(data.info()['Content-Length']) == resume_len: - print '[download] %s has already been downloaded' % stream.name + data = urllib2.urlopen(basic_request) + content_length = data.info()['Content-Length'] + if content_length is not None and content_length == resume_len: + self.report_file_already_downloaded(self.name) return else: - print "[download] Unable to resume, restarting download from the beginning" + self.report_unable_to_resume() stream.close() stream = open(stream.name,'wb') + data_len = data.info().get('Content-length', None) data_len_str = self.format_bytes(data_len) byte_counter = 0 @@ -1163,7 +1179,7 @@ if __name__ == '__main__': 'ignoreerrors': opts.ignoreerrors, 'ratelimit': opts.ratelimit, 'nooverwrites': opts.nooverwrites, - 'continue': opts.continue_dl, + 'continuedl': opts.continue_dl, }) fd.add_info_extractor(youtube_search_ie) fd.add_info_extractor(youtube_pl_ie) From 6a0015a7e0deaf7f82e64fbfb5ee62d2058dd95f Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Wed, 27 May 2009 23:02:37 +0200 Subject: [PATCH 120/455] Fix missing cast preventing detection of already downloaded file --- youtube-dl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube-dl b/youtube-dl index 591b7fc33..6afdc0e6a 100755 --- a/youtube-dl +++ b/youtube-dl @@ -397,8 +397,8 @@ class FileDownloader(object): raise data = urllib2.urlopen(basic_request) content_length = data.info()['Content-Length'] - if content_length is not None and content_length == resume_len: - self.report_file_already_downloaded(self.name) + if content_length is not None and long(content_length) == resume_len: + self.report_file_already_downloaded(stream.name) return else: self.report_unable_to_resume() From e1f18b8a841f791d6683a890db7a0a11c3b25318 Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Wed, 27 May 2009 23:03:56 +0200 Subject: [PATCH 121/455] Remove integer casts and replace them with long integer casts --- youtube-dl | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/youtube-dl b/youtube-dl index 6afdc0e6a..06ba25d9a 100755 --- a/youtube-dl +++ b/youtube-dl @@ -182,13 +182,13 @@ class FileDownloader(object): new_min = max(bytes / 2.0, 1.0) new_max = min(max(bytes * 2.0, 1.0), 4194304) # Do not surpass 4 MB if elapsed_time < 0.001: - return int(new_max) + return long(new_max) rate = bytes / elapsed_time if rate > new_max: - return int(new_max) + return long(new_max) if rate < new_min: - return int(new_min) - return int(rate) + return long(new_min) + return long(rate) @staticmethod def parse_bytes(bytestr): @@ -895,7 +895,7 @@ class YoutubeSearchIE(InfoExtractor): return else: try: - n = int(prefix) + n = long(prefix) if n <= 0: self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query)) return @@ -904,7 +904,7 @@ class YoutubeSearchIE(InfoExtractor): n = self._max_youtube_results self._download_n_results(query, n) return - except ValueError: # parsing prefix as int fails + except ValueError: # parsing prefix as integer fails self._download_n_results(query, 1) return From 0cd61126fc3e58d7f044a8ef960f9763c8f3af59 Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Wed, 27 May 2009 23:04:18 +0200 Subject: [PATCH 122/455] Document new "continuedl" FileDownloader option --- youtube-dl | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube-dl b/youtube-dl index 06ba25d9a..6730898ac 100755 --- a/youtube-dl +++ b/youtube-dl @@ -114,6 +114,7 @@ class FileDownloader(object): ignoreerrors: Do not stop on download errors. ratelimit: Download speed limit, in bytes/sec. nooverwrites: Prevent overwriting files. + continuedl: Try to continue downloads if possible. """ params = None From 73747955523e1a7208f9f037254a716206ae7531 Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Sat, 30 May 2009 10:50:42 +0200 Subject: [PATCH 123/455] Bump version number --- LATEST_VERSION | 2 +- youtube-dl | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/LATEST_VERSION b/LATEST_VERSION index b457119fd..a013f29e6 100644 --- a/LATEST_VERSION +++ b/LATEST_VERSION @@ -1 +1 @@ -2009.05.25 +2009.05.30 diff --git a/youtube-dl b/youtube-dl index 6730898ac..c83a0a119 100755 --- a/youtube-dl +++ b/youtube-dl @@ -1066,7 +1066,7 @@ if __name__ == '__main__': # Parse command line parser = optparse.OptionParser( usage='Usage: %prog [options] url...', - version='INTERNAL', + version='2009.05.30', conflict_handler='resolve', ) From ff21a710aeadd3d6192f5d94834b990f3c1b7b2a Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Sat, 30 May 2009 10:51:14 +0200 Subject: [PATCH 124/455] Restore INTERNAL version number --- youtube-dl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube-dl b/youtube-dl index c83a0a119..6730898ac 100755 --- a/youtube-dl +++ b/youtube-dl @@ -1066,7 +1066,7 @@ if __name__ == '__main__': # Parse command line parser = optparse.OptionParser( usage='Usage: %prog [options] url...', - version='2009.05.30', + version='INTERNAL', conflict_handler='resolve', ) From 55e7c75e122261c97af78a89d07a244c09bddbc8 Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Sun, 7 Jun 2009 01:11:50 +0200 Subject: [PATCH 125/455] Delay opening file until there is data to write Fixes issue #19. --- youtube-dl | 73 +++++++++++++++++++++++++++++------------------------- 1 file changed, 39 insertions(+), 34 deletions(-) diff --git a/youtube-dl b/youtube-dl index 6730898ac..1d9f955e2 100755 --- a/youtube-dl +++ b/youtube-dl @@ -305,7 +305,6 @@ class FileDownloader(object): template_dict = dict(info_dict) template_dict['epoch'] = unicode(long(time.time())) filename = self.params['outtmpl'] % template_dict - self.report_destination(filename) except (ValueError, KeyError), err: self.trouble('ERROR: invalid output template or system charset: %s' % str(err)) if self.params['nooverwrites'] and os.path.exists(filename): @@ -319,17 +318,8 @@ class FileDownloader(object): return try: - outstream = open(filename, 'ab') + success = self._do_download(filename, info_dict['url']) except (OSError, IOError), err: - self.trouble('ERROR: unable to open for writing: %s' % str(err)) - return - - try: - self._do_download(outstream, info_dict['url']) - outstream.close() - except (OSError, IOError), err: - outstream.close() - os.remove(filename) raise UnavailableFormatError except (urllib2.URLError, httplib.HTTPException, socket.error), err: self.trouble('ERROR: unable to download video data: %s' % str(err)) @@ -338,11 +328,12 @@ class FileDownloader(object): self.trouble('ERROR: content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded)) return - try: - self.post_process(filename, info_dict) - except (PostProcessingError), err: - self.trouble('ERROR: postprocessing: %s' % str(err)) - return + if success: + try: + self.post_process(filename, info_dict) + except (PostProcessingError), err: + self.trouble('ERROR: postprocessing: %s' % str(err)) + return def download(self, url_list): """Download a given list of URLs.""" @@ -379,32 +370,36 @@ class FileDownloader(object): if info is None: break - def _do_download(self, stream, url): + def _do_download(self, filename, url): + stream = None + open_mode = 'ab' + basic_request = urllib2.Request(url, None, std_headers) request = urllib2.Request(url, None, std_headers) - # Resume transfer if filesize is non-zero - resume_len = stream.tell() + # Attempt to resume download with "continuedl" option + if os.path.isfile(filename): + resume_len = os.path.getsize(filename) + else: + resume_len = 0 if self.params['continuedl'] and resume_len != 0: self.report_resuming_byte(resume_len) request.add_header('Range','bytes=%d-' % resume_len) - else: - stream.close() - stream = open(stream.name,'wb') + + # Establish connection try: data = urllib2.urlopen(request) - except urllib2.HTTPError, e: - if not e.code == 416: # 416 is 'Requested range not satisfiable' + except (urllib2.HTTPError, ), err: + if err.code != 416: # 416 is 'Requested range not satisfiable' raise data = urllib2.urlopen(basic_request) content_length = data.info()['Content-Length'] if content_length is not None and long(content_length) == resume_len: - self.report_file_already_downloaded(stream.name) - return + self.report_file_already_downloaded(filename) + return True else: self.report_unable_to_resume() - stream.close() - stream = open(stream.name,'wb') + open_mode = 'wb' data_len = data.info().get('Content-length', None) data_len_str = self.format_bytes(data_len) @@ -412,12 +407,6 @@ class FileDownloader(object): block_size = 1024 start = time.time() while True: - # Progress message - percent_str = self.calc_percent(byte_counter, data_len) - eta_str = self.calc_eta(start, time.time(), data_len, byte_counter) - speed_str = self.calc_speed(start, time.time(), byte_counter) - self.report_progress(percent_str, data_len_str, speed_str, eta_str) - # Download and write before = time.time() data_block = data.read(block_size) @@ -426,15 +415,31 @@ class FileDownloader(object): if data_block_len == 0: break byte_counter += data_block_len + + # Open file just in time + if stream is None: + try: + stream = open(filename, open_mode) + self.report_destination(filename) + except (OSError, IOError), err: + self.trouble('ERROR: unable to open for writing: %s' % str(err)) + return False stream.write(data_block) block_size = self.best_block_size(after - before, data_block_len) + # Progress message + percent_str = self.calc_percent(byte_counter, data_len) + eta_str = self.calc_eta(start, time.time(), data_len, byte_counter) + speed_str = self.calc_speed(start, time.time(), byte_counter) + self.report_progress(percent_str, data_len_str, speed_str, eta_str) + # Apply rate limit self.slow_down(start, byte_counter) self.report_finish() if data_len is not None and str(byte_counter) != data_len: raise ContentTooShortError(byte_counter, long(data_len)) + return True class InfoExtractor(object): """Information Extractor class. From 113e5266cc6d5616aadb1a70590ebe6107d2b524 Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Tue, 9 Jun 2009 20:19:29 +0200 Subject: [PATCH 126/455] Modify "more pages" check in YouTube playlist (fixes issue #29) --- youtube-dl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube-dl b/youtube-dl index 1d9f955e2..9bbecf7d4 100755 --- a/youtube-dl +++ b/youtube-dl @@ -956,7 +956,7 @@ class YoutubePlaylistIE(InfoExtractor): _VALID_URL = r'(?:http://)?(?:\w+\.)?youtube.com/view_play_list\?p=(.+)' _TEMPLATE_URL = 'http://www.youtube.com/view_play_list?p=%s&page=%s&gl=US&hl=en' _VIDEO_INDICATOR = r'/watch\?v=(.+?)&' - _MORE_PAGES_INDICATOR = r'/view_play_list?p=%s&page=%s' + _MORE_PAGES_INDICATOR = r'/view_play_list?p=%s&page=%s' _youtube_ie = None def __init__(self, youtube_ie, downloader=None): @@ -1002,7 +1002,7 @@ class YoutubePlaylistIE(InfoExtractor): ids_in_page.append(mobj.group(1)) video_ids.extend(ids_in_page) - if (self._MORE_PAGES_INDICATOR % (playlist_id, pagenum + 1)) not in page: + if (self._MORE_PAGES_INDICATOR % (playlist_id.upper(), pagenum + 1)) not in page: break pagenum = pagenum + 1 From 8b07dec5f6492af73e8621fd9aa23b03290662aa Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Mon, 29 Jun 2009 09:25:19 +0200 Subject: [PATCH 127/455] Bump version number --- LATEST_VERSION | 2 +- youtube-dl | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/LATEST_VERSION b/LATEST_VERSION index a013f29e6..a54ba968c 100644 --- a/LATEST_VERSION +++ b/LATEST_VERSION @@ -1 +1 @@ -2009.05.30 +2009.06.29 diff --git a/youtube-dl b/youtube-dl index 9bbecf7d4..0fd160c2a 100755 --- a/youtube-dl +++ b/youtube-dl @@ -1071,7 +1071,7 @@ if __name__ == '__main__': # Parse command line parser = optparse.OptionParser( usage='Usage: %prog [options] url...', - version='INTERNAL', + version='2009.06.29', conflict_handler='resolve', ) From 7eb0e897429e1a85b185143b72a656ca8175501c Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Mon, 13 Jul 2009 15:25:14 +0200 Subject: [PATCH 128/455] Properly encode messages sent to stderr (fixes issue #34) --- youtube-dl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube-dl b/youtube-dl index 0fd160c2a..36b6331cb 100755 --- a/youtube-dl +++ b/youtube-dl @@ -227,7 +227,7 @@ class FileDownloader(object): def to_stderr(self, message): """Print message to stderr.""" - print >>sys.stderr, message + print >>sys.stderr, message.encode(locale.getpreferredencoding()) def fixed_template(self): """Checks if the output template is fixed.""" @@ -308,7 +308,7 @@ class FileDownloader(object): except (ValueError, KeyError), err: self.trouble('ERROR: invalid output template or system charset: %s' % str(err)) if self.params['nooverwrites'] and os.path.exists(filename): - self.to_stderr('WARNING: file exists: %s; skipping' % filename) + self.to_stderr(u'WARNING: file exists: %s; skipping' % filename) return try: From df1ceb1fd9d18b873ff3e6e2756e871cf1e394f8 Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Mon, 13 Jul 2009 15:47:06 +0200 Subject: [PATCH 129/455] Include format 5 in best quality list --- youtube-dl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube-dl b/youtube-dl index 36b6331cb..5ca7f2519 100755 --- a/youtube-dl +++ b/youtube-dl @@ -510,7 +510,7 @@ class YoutubeIE(InfoExtractor): _LOGIN_URL = 'http://www.youtube.com/signup?next=/&gl=US&hl=en' _AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en' _NETRC_MACHINE = 'youtube' - _available_formats = ['22', '35', '18', '17', '13'] # listed in order of priority for -b flag + _available_formats = ['22', '35', '18', '5', '17', '13'] # listed in order of priority for -b flag _video_extensions = { '13': '3gp', '17': 'mp4', From 18963a36b060526d89114fa019cd65e1fc425f9b Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Sun, 2 Aug 2009 12:18:52 +0200 Subject: [PATCH 130/455] Fix metacafe.com code due to recent changes in the site --- youtube-dl | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/youtube-dl b/youtube-dl index 5ca7f2519..6d0b32941 100755 --- a/youtube-dl +++ b/youtube-dl @@ -821,19 +821,21 @@ class MetacafeIE(InfoExtractor): # Extract URL, uploader and title from webpage self.report_extraction(video_id) - mobj = re.search(r'(?m)&mediaURL=(http.*?\.flv)', webpage) + mobj = re.search(r'(?m)&mediaURL=([^&]+)', webpage) if mobj is None: self._downloader.trouble(u'ERROR: unable to extract media URL') return mediaURL = urllib.unquote(mobj.group(1)) - mobj = re.search(r'(?m)&gdaKey=(.*?)&', webpage) - if mobj is None: - self._downloader.trouble(u'ERROR: unable to extract gdaKey') - return - gdaKey = mobj.group(1) + #mobj = re.search(r'(?m)&gdaKey=(.*?)&', webpage) + #if mobj is None: + # self._downloader.trouble(u'ERROR: unable to extract gdaKey') + # return + #gdaKey = mobj.group(1) + # + #video_url = '%s?__gda__=%s' % (mediaURL, gdaKey) - video_url = '%s?__gda__=%s' % (mediaURL, gdaKey) + video_url = mediaURL mobj = re.search(r'(?im)(.*) - Video', webpage) if mobj is None: From 110cd3462e4057ccbec5369bf895bdf311fb536b Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Sat, 8 Aug 2009 14:53:13 +0200 Subject: [PATCH 131/455] Update User-agent string --- youtube-dl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube-dl b/youtube-dl index 6d0b32941..aa16ff1e4 100755 --- a/youtube-dl +++ b/youtube-dl @@ -19,7 +19,7 @@ import urllib import urllib2 std_headers = { - 'User-Agent': 'Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US; rv:1.9.0.8) Gecko/2009032609 Firefox/3.0.8', + 'User-Agent': 'Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US; rv:1.9.1.2) Gecko/20090729 Firefox/3.5.2', 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7', 'Accept': 'text/xml,application/xml,application/xhtml+xml,text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5', 'Accept-Language': 'en-us,en;q=0.5', From 8497c36d5af77dc561fa698968bffea868a71f3c Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Sat, 8 Aug 2009 14:54:39 +0200 Subject: [PATCH 132/455] Fix minor problem with size formatting method --- youtube-dl | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/youtube-dl b/youtube-dl index aa16ff1e4..1dfd35556 100755 --- a/youtube-dl +++ b/youtube-dl @@ -143,10 +143,12 @@ class FileDownloader(object): def format_bytes(bytes): if bytes is None: return 'N/A' - if bytes == 0: + if type(bytes) is str: + bytes = float(bytes) + if bytes == 0.0: exponent = 0 else: - exponent = long(math.log(float(bytes), 1024.0)) + exponent = long(math.log(bytes, 1024.0)) suffix = 'bkMGTPEZY'[exponent] converted = float(bytes) / float(1024**exponent) return '%.2f%s' % (converted, suffix) From 71b7300e63780e9495cbb1030061f469a17a66f4 Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Sat, 8 Aug 2009 14:56:06 +0200 Subject: [PATCH 133/455] Use get_video_info to work around captcha problems (fixes issue #31) --- youtube-dl | 47 ++++++++++++++++++++++++++++------------------- 1 file changed, 28 insertions(+), 19 deletions(-) diff --git a/youtube-dl b/youtube-dl index 1dfd35556..65781f959 100755 --- a/youtube-dl +++ b/youtube-dl @@ -559,9 +559,9 @@ class YoutubeIE(InfoExtractor): """Report attempt to confirm age.""" self._downloader.to_stdout(u'[youtube] Confirming age') - def report_webpage_download(self, video_id): - """Report attempt to download webpage.""" - self._downloader.to_stdout(u'[youtube] %s: Downloading video webpage' % video_id) + def report_video_info_webpage_download(self, video_id): + """Report attempt to download video info webpage.""" + self._downloader.to_stdout(u'[youtube] %s: Downloading video info webpage' % video_id) def report_information_extraction(self, video_id): """Report attempt to extract video information.""" @@ -667,42 +667,51 @@ class YoutubeIE(InfoExtractor): # Extension video_extension = self._video_extensions.get(format_param, 'flv') - # Normalize URL, including format - normalized_url = 'http://www.youtube.com/watch?v=%s&gl=US&hl=en' % video_id - if format_param is not None: - normalized_url = '%s&fmt=%s' % (normalized_url, format_param) - request = urllib2.Request(normalized_url, None, std_headers) + # Get video info + video_info_url = 'http://www.youtube.com/get_video_info?&video_id=%s&el=detailpage&ps=default&eurl=&gl=US&hl=en' % video_id + request = urllib2.Request(video_info_url, None, std_headers) try: - self.report_webpage_download(video_id) - video_webpage = urllib2.urlopen(request).read() + self.report_video_info_webpage_download(video_id) + video_info_webpage = urllib2.urlopen(request).read() except (urllib2.URLError, httplib.HTTPException, socket.error), err: - self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err)) + self._downloader.trouble(u'ERROR: unable to download video info webpage: %s' % str(err)) return self.report_information_extraction(video_id) - + # "t" param - mobj = re.search(r', "t": "([^"]+)"', video_webpage) + mobj = re.search(r'(?m)&token=([^&]+)(?:&|$)', video_info_webpage) if mobj is None: - self._downloader.trouble(u'ERROR: unable to extract "t" parameter') + # Attempt to see if YouTube has issued an error message + mobj = re.search(r'(?m)&reason=([^&]+)(?:&|$)', video_info_webpage) + if mobj is None: + self._downloader.trouble(u'ERROR: unable to extract "t" parameter for unknown reason') + stream = open('reportme-ydl-%s.dat' % time.time(), 'wb') + stream.write(video_info_webpage) + stream.close() + else: + reason = urllib.unquote_plus(mobj.group(1)) + self._downloader.trouble(u'ERROR: YouTube said: %s' % reason.decode('utf-8')) return - video_real_url = 'http://www.youtube.com/get_video?video_id=%s&t=%s&el=detailpage&ps=' % (video_id, mobj.group(1)) + token = urllib.unquote(mobj.group(1)) + video_real_url = 'http://www.youtube.com/get_video?video_id=%s&t=%s&eurl=&el=detailpage&ps=default&gl=US&hl=en' % (video_id, token) if format_param is not None: video_real_url = '%s&fmt=%s' % (video_real_url, format_param) self.report_video_url(video_id, video_real_url) # uploader - mobj = re.search(r"var watchUsername = '([^']+)';", video_webpage) + mobj = re.search(r'(?m)&author=([^&]+)(?:&|$)', video_info_webpage) if mobj is None: self._downloader.trouble(u'ERROR: unable to extract uploader nickname') return - video_uploader = mobj.group(1) + video_uploader = urllib.unquote(mobj.group(1)) # title - mobj = re.search(r'(?im)YouTube - ([^<]*)', video_webpage) + mobj = re.search(r'(?m)&title=([^&]+)(?:&|$)', video_info_webpage) if mobj is None: self._downloader.trouble(u'ERROR: unable to extract video title') return - video_title = mobj.group(1).decode('utf-8') + video_title = urllib.unquote(mobj.group(1)) + video_title = video_title.decode('utf-8') video_title = re.sub(ur'(?u)&(.+?);', self.htmlentity_transform, video_title) video_title = video_title.replace(os.sep, u'%') From 0c8beb43f22c20cf0c217cf37c410fb76270bba3 Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Sat, 8 Aug 2009 14:57:56 +0200 Subject: [PATCH 134/455] Bump version number --- LATEST_VERSION | 2 +- youtube-dl | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/LATEST_VERSION b/LATEST_VERSION index a54ba968c..2f8b61a21 100644 --- a/LATEST_VERSION +++ b/LATEST_VERSION @@ -1 +1 @@ -2009.06.29 +2009.08.08 diff --git a/youtube-dl b/youtube-dl index 65781f959..99d07eeec 100755 --- a/youtube-dl +++ b/youtube-dl @@ -1084,7 +1084,7 @@ if __name__ == '__main__': # Parse command line parser = optparse.OptionParser( usage='Usage: %prog [options] url...', - version='2009.06.29', + version='2009.08.08', conflict_handler='resolve', ) From e5b160488246af215f49039ff580b712d75282e8 Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Sat, 8 Aug 2009 14:59:14 +0200 Subject: [PATCH 135/455] Set INTERNAL version number --- youtube-dl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube-dl b/youtube-dl index 99d07eeec..882b10391 100755 --- a/youtube-dl +++ b/youtube-dl @@ -1084,7 +1084,7 @@ if __name__ == '__main__': # Parse command line parser = optparse.OptionParser( usage='Usage: %prog [options] url...', - version='2009.08.08', + version='INTERNAL', conflict_handler='resolve', ) From fade05990c9e4edbf07c91a6723a0e302e82114d Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Mon, 10 Aug 2009 22:10:40 +0200 Subject: [PATCH 136/455] Do not specify any format as a last option for the -b flag --- youtube-dl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube-dl b/youtube-dl index 882b10391..e3dafad69 100755 --- a/youtube-dl +++ b/youtube-dl @@ -512,7 +512,7 @@ class YoutubeIE(InfoExtractor): _LOGIN_URL = 'http://www.youtube.com/signup?next=/&gl=US&hl=en' _AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en' _NETRC_MACHINE = 'youtube' - _available_formats = ['22', '35', '18', '5', '17', '13'] # listed in order of priority for -b flag + _available_formats = ['22', '35', '18', '5', '17', '13', None] # listed in order of priority for -b flag _video_extensions = { '13': '3gp', '17': 'mp4', From d89977437718619a057028566b1e901ea963193e Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Wed, 12 Aug 2009 23:15:22 +0200 Subject: [PATCH 137/455] Accept my_playlists syntax for playlists --- youtube-dl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube-dl b/youtube-dl index e3dafad69..50b9197f2 100755 --- a/youtube-dl +++ b/youtube-dl @@ -966,7 +966,7 @@ class YoutubeSearchIE(InfoExtractor): class YoutubePlaylistIE(InfoExtractor): """Information Extractor for YouTube playlists.""" - _VALID_URL = r'(?:http://)?(?:\w+\.)?youtube.com/view_play_list\?p=(.+)' + _VALID_URL = r'(?:http://)?(?:\w+\.)?youtube.com/(?:view_play_list|my_playlists)\?.*?p=([^&]+).*' _TEMPLATE_URL = 'http://www.youtube.com/view_play_list?p=%s&page=%s&gl=US&hl=en' _VIDEO_INDICATOR = r'/watch\?v=(.+?)&' _MORE_PAGES_INDICATOR = r'/view_play_list?p=%s&page=%s' From 304a4d85ea468fb2a39a7357cb28b64506246d42 Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Sat, 15 Aug 2009 00:33:50 +0200 Subject: [PATCH 138/455] Modify _MORE_PAGES_INDICATOR for searches (fixes issue #41) --- youtube-dl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube-dl b/youtube-dl index 50b9197f2..becf1d25d 100755 --- a/youtube-dl +++ b/youtube-dl @@ -879,7 +879,7 @@ class YoutubeSearchIE(InfoExtractor): _VALID_QUERY = r'ytsearch(\d+|all)?:[\s\S]+' _TEMPLATE_URL = 'http://www.youtube.com/results?search_query=%s&page=%s&gl=US&hl=en' _VIDEO_INDICATOR = r'href="/watch\?v=.+?"' - _MORE_PAGES_INDICATOR = r'>Next' + _MORE_PAGES_INDICATOR = r'(?m)>\s*Next\s*' _youtube_ie = None _max_youtube_results = 1000 @@ -956,7 +956,7 @@ class YoutubeSearchIE(InfoExtractor): self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id) return - if self._MORE_PAGES_INDICATOR not in page: + if re.search(self._MORE_PAGES_INDICATOR, page) is None: for id in video_ids: self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id) return From dd24ff44ab34026dddfeef59ab03b6ec88cdf371 Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Tue, 8 Sep 2009 09:54:14 +0200 Subject: [PATCH 139/455] Bump version number --- LATEST_VERSION | 2 +- youtube-dl | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/LATEST_VERSION b/LATEST_VERSION index 2f8b61a21..d8a0ea96f 100644 --- a/LATEST_VERSION +++ b/LATEST_VERSION @@ -1 +1 @@ -2009.08.08 +2009.09.08 diff --git a/youtube-dl b/youtube-dl index becf1d25d..8c42dfd06 100755 --- a/youtube-dl +++ b/youtube-dl @@ -1084,7 +1084,7 @@ if __name__ == '__main__': # Parse command line parser = optparse.OptionParser( usage='Usage: %prog [options] url...', - version='INTERNAL', + version='2009.09.08', conflict_handler='resolve', ) From 2a04438c7c6de2dc9ef2a0bbaaa8ebc1dc654f44 Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Tue, 8 Sep 2009 09:54:51 +0200 Subject: [PATCH 140/455] Restore INTERNAL version number --- youtube-dl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube-dl b/youtube-dl index 8c42dfd06..becf1d25d 100755 --- a/youtube-dl +++ b/youtube-dl @@ -1084,7 +1084,7 @@ if __name__ == '__main__': # Parse command line parser = optparse.OptionParser( usage='Usage: %prog [options] url...', - version='2009.09.08', + version='INTERNAL', conflict_handler='resolve', ) From eae2666cb43b254b88cf8cc71b654fc6312643da Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Sun, 13 Sep 2009 10:45:04 +0200 Subject: [PATCH 141/455] Handle weird OSX locale settings gracefully (fixes issue #43) --- youtube-dl | 26 +++++++++++++++++++++----- 1 file changed, 21 insertions(+), 5 deletions(-) diff --git a/youtube-dl b/youtube-dl index becf1d25d..472a8e1cc 100755 --- a/youtube-dl +++ b/youtube-dl @@ -27,6 +27,22 @@ std_headers = { simple_title_chars = string.ascii_letters.decode('ascii') + string.digits.decode('ascii') +def preferredencoding(): + """Get preferred encoding. + + Returns the best encoding scheme for the system, based on + locale.getpreferredencoding() and some further tweaks. + """ + try: + pref = locale.getpreferredencoding() + # Mac OSX systems have this problem sometimes + if pref == '': + return 'UTF-8' + return pref + except: + sys.stderr.write('WARNING: problem obtaining preferred encoding. Falling back to UTF-8.\n') + return 'UTF-8' + class DownloadError(Exception): """Download Error exception. @@ -224,12 +240,12 @@ class FileDownloader(object): def to_stdout(self, message, skip_eol=False): """Print message to stdout if not in quiet mode.""" if not self.params.get('quiet', False): - print (u'%s%s' % (message, [u'\n', u''][skip_eol])).encode(locale.getpreferredencoding()), + print (u'%s%s' % (message, [u'\n', u''][skip_eol])).encode(preferredencoding()), sys.stdout.flush() def to_stderr(self, message): """Print message to stderr.""" - print >>sys.stderr, message.encode(locale.getpreferredencoding()) + print >>sys.stderr, message.encode(preferredencoding()) def fixed_template(self): """Checks if the output template is fixed.""" @@ -297,9 +313,9 @@ class FileDownloader(object): # Forced printings if self.params.get('forcetitle', False): - print info_dict['title'].encode(locale.getpreferredencoding()) + print info_dict['title'].encode(preferredencoding()) if self.params.get('forceurl', False): - print info_dict['url'].encode(locale.getpreferredencoding()) + print info_dict['url'].encode(preferredencoding()) return @@ -1191,7 +1207,7 @@ if __name__ == '__main__': 'forcetitle': opts.gettitle, 'simulate': (opts.simulate or opts.geturl or opts.gettitle), 'format': opts.format, - 'outtmpl': ((opts.outtmpl is not None and opts.outtmpl.decode(locale.getpreferredencoding())) + 'outtmpl': ((opts.outtmpl is not None and opts.outtmpl.decode(preferredencoding())) or (opts.usetitle and u'%(stitle)s-%(id)s.%(ext)s') or (opts.useliteral and u'%(title)s-%(id)s.%(ext)s') or u'%(id)s.%(ext)s'), From 30edbf89e484172698f1c71613628de481beb97c Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Sun, 13 Sep 2009 10:46:04 +0200 Subject: [PATCH 142/455] Report final URL with -g and do not print URLs normally (fixes issue #49) --- youtube-dl | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/youtube-dl b/youtube-dl index 472a8e1cc..b1f271793 100755 --- a/youtube-dl +++ b/youtube-dl @@ -221,11 +221,13 @@ class FileDownloader(object): @staticmethod def verify_url(url): - """Verify a URL is valid and data could be downloaded.""" + """Verify a URL is valid and data could be downloaded. Return real data URL.""" request = urllib2.Request(url, None, std_headers) data = urllib2.urlopen(request) data.read(1) + url = data.geturl() data.close() + return url def add_info_extractor(self, ie): """Add an InfoExtractor object to the end of the list.""" @@ -307,7 +309,7 @@ class FileDownloader(object): # Do nothing else if in simulate mode if self.params.get('simulate', False): try: - self.verify_url(info_dict['url']) + info_dict['url'] = self.verify_url(info_dict['url']) except (OSError, IOError, urllib2.URLError, httplib.HTTPException, socket.error), err: raise UnavailableFormatError @@ -583,10 +585,6 @@ class YoutubeIE(InfoExtractor): """Report attempt to extract video information.""" self._downloader.to_stdout(u'[youtube] %s: Extracting video information' % video_id) - def report_video_url(self, video_id, video_real_url): - """Report extracted video URL.""" - self._downloader.to_stdout(u'[youtube] %s: URL: %s' % (video_id, video_real_url)) - def report_unavailable_format(self, video_id, format): """Report extracted video URL.""" self._downloader.to_stdout(u'[youtube] %s: Format %s not available' % (video_id, format)) @@ -712,7 +710,6 @@ class YoutubeIE(InfoExtractor): video_real_url = 'http://www.youtube.com/get_video?video_id=%s&t=%s&eurl=&el=detailpage&ps=default&gl=US&hl=en' % (video_id, token) if format_param is not None: video_real_url = '%s&fmt=%s' % (video_real_url, format_param) - self.report_video_url(video_id, video_real_url) # uploader mobj = re.search(r'(?m)&author=([^&]+)(?:&|$)', video_info_webpage) From ad0525b3e675f03f92c8ba7d1b15554be6884d54 Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Sun, 13 Sep 2009 10:48:21 +0200 Subject: [PATCH 143/455] Bump version number --- LATEST_VERSION | 2 +- youtube-dl | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/LATEST_VERSION b/LATEST_VERSION index d8a0ea96f..70456bc34 100644 --- a/LATEST_VERSION +++ b/LATEST_VERSION @@ -1 +1 @@ -2009.09.08 +2009.09.13 diff --git a/youtube-dl b/youtube-dl index b1f271793..5afff4e61 100755 --- a/youtube-dl +++ b/youtube-dl @@ -1097,7 +1097,7 @@ if __name__ == '__main__': # Parse command line parser = optparse.OptionParser( usage='Usage: %prog [options] url...', - version='INTERNAL', + version='2009.09.13', conflict_handler='resolve', ) From 0833f1eb839a6677ec2e850c763b84164df30433 Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Sun, 13 Sep 2009 10:49:25 +0200 Subject: [PATCH 144/455] Restore INTERNAL version number --- youtube-dl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube-dl b/youtube-dl index 5afff4e61..b1f271793 100755 --- a/youtube-dl +++ b/youtube-dl @@ -1097,7 +1097,7 @@ if __name__ == '__main__': # Parse command line parser = optparse.OptionParser( usage='Usage: %prog [options] url...', - version='2009.09.13', + version='INTERNAL', conflict_handler='resolve', ) From f94b636c3ec6410570e5d384e66d651a89de6abe Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Sun, 20 Sep 2009 00:08:50 +0200 Subject: [PATCH 145/455] Improve preferred encoding detection method --- youtube-dl | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/youtube-dl b/youtube-dl index b1f271793..5efc55e6d 100755 --- a/youtube-dl +++ b/youtube-dl @@ -33,15 +33,15 @@ def preferredencoding(): Returns the best encoding scheme for the system, based on locale.getpreferredencoding() and some further tweaks. """ - try: - pref = locale.getpreferredencoding() - # Mac OSX systems have this problem sometimes - if pref == '': - return 'UTF-8' - return pref - except: - sys.stderr.write('WARNING: problem obtaining preferred encoding. Falling back to UTF-8.\n') - return 'UTF-8' + def yield_preferredencoding(): + try: + pref = locale.getpreferredencoding() + u'TEST'.encode(pref) + except: + pref = 'UTF-8' + while True: + yield pref + return yield_preferredencoding().next() class DownloadError(Exception): """Download Error exception. From f5a5bec35145bad8ecba149b65254bb0cfa70b78 Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Sun, 20 Sep 2009 00:10:32 +0200 Subject: [PATCH 146/455] Avoid using Unicode strings when forming URL requests (fixes issue #50) --- youtube-dl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube-dl b/youtube-dl index 5efc55e6d..047cc37f8 100755 --- a/youtube-dl +++ b/youtube-dl @@ -309,7 +309,7 @@ class FileDownloader(object): # Do nothing else if in simulate mode if self.params.get('simulate', False): try: - info_dict['url'] = self.verify_url(info_dict['url']) + info_dict['url'] = self.verify_url(info_dict['url'].encode('utf-8')).decode('utf-8') except (OSError, IOError, urllib2.URLError, httplib.HTTPException, socket.error), err: raise UnavailableFormatError @@ -338,7 +338,7 @@ class FileDownloader(object): return try: - success = self._do_download(filename, info_dict['url']) + success = self._do_download(filename, info_dict['url'].encode('utf-8')) except (OSError, IOError), err: raise UnavailableFormatError except (urllib2.URLError, httplib.HTTPException, socket.error), err: From 850ab76560f7a7b9274dd382cf3a5a75a7bb3c8d Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Sun, 20 Sep 2009 00:11:11 +0200 Subject: [PATCH 147/455] Use default values for "continuedl" and "nooverwrites" downloader parameters --- youtube-dl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube-dl b/youtube-dl index 047cc37f8..03609e22c 100755 --- a/youtube-dl +++ b/youtube-dl @@ -327,7 +327,7 @@ class FileDownloader(object): filename = self.params['outtmpl'] % template_dict except (ValueError, KeyError), err: self.trouble('ERROR: invalid output template or system charset: %s' % str(err)) - if self.params['nooverwrites'] and os.path.exists(filename): + if self.params.get('nooverwrites', False) and os.path.exists(filename): self.to_stderr(u'WARNING: file exists: %s; skipping' % filename) return @@ -402,7 +402,7 @@ class FileDownloader(object): resume_len = os.path.getsize(filename) else: resume_len = 0 - if self.params['continuedl'] and resume_len != 0: + if self.params.get('continuedl', False) and resume_len != 0: self.report_resuming_byte(resume_len) request.add_header('Range','bytes=%d-' % resume_len) From 583c714fdebdc8d0c3234aba2343803057d99e38 Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Mon, 21 Sep 2009 20:39:51 +0200 Subject: [PATCH 148/455] Allow empty titles because they do appear in some videos (fixes issue #53) --- youtube-dl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube-dl b/youtube-dl index 03609e22c..f57343ea8 100755 --- a/youtube-dl +++ b/youtube-dl @@ -719,7 +719,7 @@ class YoutubeIE(InfoExtractor): video_uploader = urllib.unquote(mobj.group(1)) # title - mobj = re.search(r'(?m)&title=([^&]+)(?:&|$)', video_info_webpage) + mobj = re.search(r'(?m)&title=([^&]*)(?:&|$)', video_info_webpage) if mobj is None: self._downloader.trouble(u'ERROR: unable to extract video title') return From ab1f697827c8cb1a4d09c03e843ebae123ce35f5 Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Sat, 14 Nov 2009 15:04:50 +0100 Subject: [PATCH 149/455] Use unquote_plus to decode video title --- youtube-dl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube-dl b/youtube-dl index f57343ea8..c3d9271c2 100755 --- a/youtube-dl +++ b/youtube-dl @@ -723,7 +723,7 @@ class YoutubeIE(InfoExtractor): if mobj is None: self._downloader.trouble(u'ERROR: unable to extract video title') return - video_title = urllib.unquote(mobj.group(1)) + video_title = urllib.unquote_plus(mobj.group(1)) video_title = video_title.decode('utf-8') video_title = re.sub(ur'(?u)&(.+?);', self.htmlentity_transform, video_title) video_title = video_title.replace(os.sep, u'%') From 4bec29ef4bac6d007fa0f722eb7c55285e66e03a Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Thu, 19 Nov 2009 20:19:47 +0100 Subject: [PATCH 150/455] Add self-updating code --- youtube-dl | 33 ++++++++++++++++++++++++++++++--- 1 file changed, 30 insertions(+), 3 deletions(-) diff --git a/youtube-dl b/youtube-dl index c3d9271c2..d3e8cf4f6 100755 --- a/youtube-dl +++ b/youtube-dl @@ -1089,6 +1089,22 @@ if __name__ == '__main__': import getpass import optparse + # Function to update the program file with the latest version from bitbucket.org + def update_self(downloader, filename): + # Note: downloader only used for options + if not os.access (filename, os.W_OK): + sys.exit('ERROR: no write permissions on %s' % filename) + + downloader.to_stdout('Updating to latest stable version...') + latest_url = 'http://bitbucket.org/rg3/youtube-dl/raw/tip/LATEST_VERSION' + latest_version = urllib.urlopen(latest_url).read().strip() + prog_url = 'http://bitbucket.org/rg3/youtube-dl/raw/%s/youtube-dl' % latest_version + newcontent = urllib.urlopen(prog_url).read() + stream = open(filename, 'w') + stream.write(newcontent) + stream.close() + downloader.to_stdout('Updated to version %s' % latest_version) + # General configuration urllib2.install_opener(urllib2.build_opener(urllib2.ProxyHandler())) urllib2.install_opener(urllib2.build_opener(urllib2.HTTPCookieProcessor())) @@ -1105,6 +1121,8 @@ if __name__ == '__main__': action='help', help='print this help text and exit') parser.add_option('-v', '--version', action='version', help='print program version and exit') + parser.add_option('-U', '--update', + action='store_true', dest='update_self', help='update this program to latest stable version') parser.add_option('-i', '--ignore-errors', action='store_true', dest='ignoreerrors', help='continue on download errors', default=False) parser.add_option('-r', '--rate-limit', @@ -1157,7 +1175,7 @@ if __name__ == '__main__': parser.add_option_group(filesystem) (opts, args) = parser.parse_args() - + # Batch file verification batchurls = [] if opts.batchfile is not None: @@ -1170,8 +1188,6 @@ if __name__ == '__main__': all_urls = batchurls + args # Conflicting, missing and erroneous options - if len(all_urls) < 1: - parser.error(u'you must provide at least one URL') if opts.usenetrc and (opts.username is not None or opts.password is not None): parser.error(u'using .netrc conflicts with giving username/password') if opts.password is not None and opts.username is None: @@ -1217,6 +1233,17 @@ if __name__ == '__main__': fd.add_info_extractor(youtube_pl_ie) fd.add_info_extractor(metacafe_ie) fd.add_info_extractor(youtube_ie) + + # Update version + if opts.update_self: + update_self(fd, sys.argv[0]) + + # Maybe do nothing + if len(all_urls) < 1: + if not opts.update_self: + parser.error(u'you must provide at least one URL') + else: + sys.exit() retcode = fd.download(all_urls) sys.exit(retcode) From d9bc015b3ca1071c008783e8ae8e36f4ca8e4edf Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Fri, 20 Nov 2009 21:51:38 +0100 Subject: [PATCH 151/455] Take format 37 into account (fixes issue #65) --- youtube-dl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube-dl b/youtube-dl index d3e8cf4f6..52bda1b25 100755 --- a/youtube-dl +++ b/youtube-dl @@ -530,12 +530,13 @@ class YoutubeIE(InfoExtractor): _LOGIN_URL = 'http://www.youtube.com/signup?next=/&gl=US&hl=en' _AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en' _NETRC_MACHINE = 'youtube' - _available_formats = ['22', '35', '18', '5', '17', '13', None] # listed in order of priority for -b flag + _available_formats = ['37', '22', '35', '18', '5', '17', '13', None] # listed in order of priority for -b flag _video_extensions = { '13': '3gp', '17': 'mp4', '18': 'mp4', '22': 'mp4', + '37': 'mp4', } @staticmethod From 29f07568051012599320b076d8bddcdb726b3258 Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Tue, 24 Nov 2009 20:40:34 +0100 Subject: [PATCH 152/455] Fix detection of uploader nickname in metacafe (fixes issue #67) --- youtube-dl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube-dl b/youtube-dl index 52bda1b25..739e3cd7c 100755 --- a/youtube-dl +++ b/youtube-dl @@ -868,7 +868,7 @@ class MetacafeIE(InfoExtractor): return video_title = mobj.group(1).decode('utf-8') - mobj = re.search(r'(?ms)
  • .*?Submitter:.*?(.*?)<', webpage) + mobj = re.search(r'(?ms)By:\s*(.+?)<', webpage) if mobj is None: self._downloader.trouble(u'ERROR: unable to extract uploader nickname') return From c39c05cdd7a0868ca94f10bc4a5157863dc53449 Mon Sep 17 00:00:00 2001 From: Archanamiya Date: Wed, 25 Nov 2009 16:34:34 -0500 Subject: [PATCH 153/455] Added support to download all of a user's videos! --- youtube-dl | 58 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 58 insertions(+) diff --git a/youtube-dl b/youtube-dl index 739e3cd7c..ea6245e9d 100755 --- a/youtube-dl +++ b/youtube-dl @@ -1037,6 +1037,62 @@ class YoutubePlaylistIE(InfoExtractor): self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id) return +class YoutubeUserIE(InfoExtractor): + """Information Extractor for YouTube users.""" + + _VALID_URL = r'(?:http://)?(?:\w+\.)?youtube.com/user/(.*)' + _TEMPLATE_URL = 'http://gdata.youtube.com/feeds/api/users/%s' + _VIDEO_INDICATOR = r'http://gdata.youtube.com/feeds/api/videos/(.*)' + _youtube_ie = None + + def __init__(self, youtube_ie, downloader=None): + InfoExtractor.__init__(self, downloader) + self._youtube_ie = youtube_ie + + @staticmethod + def suitable(url): + return (re.match(YoutubeUserIE._VALID_URL, url) is not None) + + def report_download_page(self, username): + """Report attempt to download user page.""" + self._downloader.to_stdout(u'[youtube] USR %s: Downloading page ' % (username)) + + def _real_initialize(self): + self._youtube_ie.initialize() + + def _real_extract(self, url): + # Extract username + mobj = re.match(self._VALID_URL, url) + if mobj is None: + self._downloader.trouble(u'ERROR: invalid url: %s' % url) + return + + # Download user page + username = mobj.group(1) + video_ids = [] + pagenum = 1 + + self.report_download_page(username) + request = urllib2.Request(self._TEMPLATE_URL % (username), None, std_headers) + try: + page = urllib2.urlopen(request).read() + except (urllib2.URLError, httplib.HTTPException, socket.error), err: + self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err)) + return + + # Extract video identifiers + ids_in_page = [] + + for mobj in re.finditer(self._VIDEO_INDICATOR, page): + print mobj.group(1) + if mobj.group(1) not in ids_in_page: + ids_in_page.append(mobj.group(1)) + video_ids.extend(ids_in_page) + + for id in video_ids: + self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id) + return + class PostProcessor(object): """Post Processor class. @@ -1209,6 +1265,7 @@ if __name__ == '__main__': youtube_ie = YoutubeIE() metacafe_ie = MetacafeIE(youtube_ie) youtube_pl_ie = YoutubePlaylistIE(youtube_ie) + youtube_user_ie = YoutubeUserIE(youtube_ie) youtube_search_ie = YoutubeSearchIE(youtube_ie) # File downloader @@ -1232,6 +1289,7 @@ if __name__ == '__main__': }) fd.add_info_extractor(youtube_search_ie) fd.add_info_extractor(youtube_pl_ie) + fd.add_info_extractor(youtube_user_ie) fd.add_info_extractor(metacafe_ie) fd.add_info_extractor(youtube_ie) From 9c457d2a2019f3d52aa17614cc8087989e105413 Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Mon, 21 Dec 2009 21:43:15 +0100 Subject: [PATCH 154/455] Handle file open mode correctly (fixes issue #76) --- youtube-dl | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/youtube-dl b/youtube-dl index ea6245e9d..5f2381448 100755 --- a/youtube-dl +++ b/youtube-dl @@ -392,19 +392,21 @@ class FileDownloader(object): def _do_download(self, filename, url): stream = None - open_mode = 'ab' - + open_mode = 'wb' basic_request = urllib2.Request(url, None, std_headers) request = urllib2.Request(url, None, std_headers) - # Attempt to resume download with "continuedl" option + # Establish possible resume length if os.path.isfile(filename): resume_len = os.path.getsize(filename) else: resume_len = 0 + + # Request parameters in case of being able to resume if self.params.get('continuedl', False) and resume_len != 0: self.report_resuming_byte(resume_len) request.add_header('Range','bytes=%d-' % resume_len) + open_mode = 'ab' # Establish connection try: @@ -412,12 +414,16 @@ class FileDownloader(object): except (urllib2.HTTPError, ), err: if err.code != 416: # 416 is 'Requested range not satisfiable' raise + # Unable to resume data = urllib2.urlopen(basic_request) content_length = data.info()['Content-Length'] + if content_length is not None and long(content_length) == resume_len: + # Because the file had already been fully downloaded self.report_file_already_downloaded(filename) return True else: + # Because the server didn't let us self.report_unable_to_resume() open_mode = 'wb' From a692ca7c4986cc6dd2d4c6d8091b1ed1ca0de7a7 Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Sat, 26 Dec 2009 19:58:11 +0100 Subject: [PATCH 155/455] Bump version number --- LATEST_VERSION | 2 +- youtube-dl | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/LATEST_VERSION b/LATEST_VERSION index 70456bc34..dd7f79455 100644 --- a/LATEST_VERSION +++ b/LATEST_VERSION @@ -1 +1 @@ -2009.09.13 +2009.12.26 diff --git a/youtube-dl b/youtube-dl index 5f2381448..7262ce94d 100755 --- a/youtube-dl +++ b/youtube-dl @@ -1176,7 +1176,7 @@ if __name__ == '__main__': # Parse command line parser = optparse.OptionParser( usage='Usage: %prog [options] url...', - version='INTERNAL', + version='2009.12.26', conflict_handler='resolve', ) From 0487b407a1d25cab8bbdbfa1cdc999e080a10531 Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Sun, 3 Jan 2010 13:12:11 +0100 Subject: [PATCH 156/455] Add support for using rtmpdump --- youtube-dl | 61 +++++++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 49 insertions(+), 12 deletions(-) diff --git a/youtube-dl b/youtube-dl index 7262ce94d..6b21f8a19 100755 --- a/youtube-dl +++ b/youtube-dl @@ -13,10 +13,12 @@ import os.path import re import socket import string +import subprocess import sys import time import urllib import urllib2 +import urlparse std_headers = { 'User-Agent': 'Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US; rv:1.9.1.2) Gecko/20090729 Firefox/3.5.2', @@ -390,7 +392,36 @@ class FileDownloader(object): if info is None: break + def _download_with_rtmpdump(self, filename, url): + self.report_destination(filename) + + # Check for rtmpdump first + try: + subprocess.call(['rtmpdump', '-h'], stdout=(file(os.path.devnull, 'w')), stderr=subprocess.STDOUT) + except (OSError, IOError): + self.trouble(u'ERROR: RTMP download detected but "rtmpdump" could not be run') + return False + + # Download using rtmpdump. rtmpdump returns exit code 2 when + # the connection was interrumpted and resuming appears to be + # possible. This is part of rtmpdump's normal usage, AFAIK. + retval = subprocess.call(['rtmpdump', '-q', '-r', url, '-o', filename] + [[], ['-e']][self.params.get('continuedl', False)]) + while retval == 2: + self.to_stdout(u'\r[rtmpdump] %s bytes' % os.path.getsize(filename), skip_eol=True) + time.sleep(2.0) # This seems to be needed + retval = subprocess.call(['rtmpdump', '-q', '-e', '-r', url, '-o', filename]) + if retval == 0: + self.to_stdout(u'\r[rtmpdump] %s bytes' % os.path.getsize(filename)) + return True + else: + self.trouble('ERROR: rtmpdump exited with code %d' % retval) + return False + def _do_download(self, filename, url): + # Attempt to download using rtmpdump + if url.startswith('rtmp'): + return self._download_with_rtmpdump(filename, url) + stream = None open_mode = 'wb' basic_request = urllib2.Request(url, None, std_headers) @@ -596,6 +627,10 @@ class YoutubeIE(InfoExtractor): """Report extracted video URL.""" self._downloader.to_stdout(u'[youtube] %s: Format %s not available' % (video_id, format)) + def report_rtmp_download(self): + """Indicate the download will use the RTMP protocol.""" + self._downloader.to_stdout(u'[youtube] RTMP download detected') + def _real_initialize(self): if self._downloader is None: return @@ -694,43 +729,45 @@ class YoutubeIE(InfoExtractor): try: self.report_video_info_webpage_download(video_id) video_info_webpage = urllib2.urlopen(request).read() + video_info = urlparse.parse_qs(video_info_webpage) except (urllib2.URLError, httplib.HTTPException, socket.error), err: self._downloader.trouble(u'ERROR: unable to download video info webpage: %s' % str(err)) return self.report_information_extraction(video_id) # "t" param - mobj = re.search(r'(?m)&token=([^&]+)(?:&|$)', video_info_webpage) - if mobj is None: + if 'token' not in video_info: # Attempt to see if YouTube has issued an error message - mobj = re.search(r'(?m)&reason=([^&]+)(?:&|$)', video_info_webpage) - if mobj is None: + if 'reason' not in video_info: self._downloader.trouble(u'ERROR: unable to extract "t" parameter for unknown reason') stream = open('reportme-ydl-%s.dat' % time.time(), 'wb') stream.write(video_info_webpage) stream.close() else: - reason = urllib.unquote_plus(mobj.group(1)) + reason = urllib.unquote_plus(video_info['reason'][0]) self._downloader.trouble(u'ERROR: YouTube said: %s' % reason.decode('utf-8')) return - token = urllib.unquote(mobj.group(1)) + token = urllib.unquote_plus(video_info['token'][0]) video_real_url = 'http://www.youtube.com/get_video?video_id=%s&t=%s&eurl=&el=detailpage&ps=default&gl=US&hl=en' % (video_id, token) if format_param is not None: video_real_url = '%s&fmt=%s' % (video_real_url, format_param) + # Check possible RTMP download + if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'): + self.report_rtmp_download() + video_real_url = video_info['conn'][0] + # uploader - mobj = re.search(r'(?m)&author=([^&]+)(?:&|$)', video_info_webpage) - if mobj is None: + if 'author' not in video_info: self._downloader.trouble(u'ERROR: unable to extract uploader nickname') return - video_uploader = urllib.unquote(mobj.group(1)) + video_uploader = urllib.unquote_plus(video_info['author'][0]) # title - mobj = re.search(r'(?m)&title=([^&]*)(?:&|$)', video_info_webpage) - if mobj is None: + if 'title' not in video_info: self._downloader.trouble(u'ERROR: unable to extract video title') return - video_title = urllib.unquote_plus(mobj.group(1)) + video_title = urllib.unquote_plus(video_info['title'][0]) video_title = video_title.decode('utf-8') video_title = re.sub(ur'(?u)&(.+?);', self.htmlentity_transform, video_title) video_title = video_title.replace(os.sep, u'%') From 75a4cf3c977bd767639414ccfd600d93d533b74c Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Sun, 3 Jan 2010 13:12:34 +0100 Subject: [PATCH 157/455] Fix minor problems with Youtube user InfoExtractor --- youtube-dl | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/youtube-dl b/youtube-dl index 6b21f8a19..69b1a9a37 100755 --- a/youtube-dl +++ b/youtube-dl @@ -1085,7 +1085,7 @@ class YoutubeUserIE(InfoExtractor): _VALID_URL = r'(?:http://)?(?:\w+\.)?youtube.com/user/(.*)' _TEMPLATE_URL = 'http://gdata.youtube.com/feeds/api/users/%s' - _VIDEO_INDICATOR = r'http://gdata.youtube.com/feeds/api/videos/(.*)' + _VIDEO_INDICATOR = r'http://gdata.youtube.com/feeds/api/videos/(.*)' # XXX Fix this. _youtube_ie = None def __init__(self, youtube_ie, downloader=None): @@ -1098,7 +1098,7 @@ class YoutubeUserIE(InfoExtractor): def report_download_page(self, username): """Report attempt to download user page.""" - self._downloader.to_stdout(u'[youtube] USR %s: Downloading page ' % (username)) + self._downloader.to_stdout(u'[youtube] user %s: Downloading page ' % (username)) def _real_initialize(self): self._youtube_ie.initialize() @@ -1127,7 +1127,6 @@ class YoutubeUserIE(InfoExtractor): ids_in_page = [] for mobj in re.finditer(self._VIDEO_INDICATOR, page): - print mobj.group(1) if mobj.group(1) not in ids_in_page: ids_in_page.append(mobj.group(1)) video_ids.extend(ids_in_page) From fe788f2c6f22224363484944254d17662bc5a394 Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Tue, 5 Jan 2010 12:45:30 +0100 Subject: [PATCH 158/455] Bump version number --- LATEST_VERSION | 2 +- youtube-dl | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/LATEST_VERSION b/LATEST_VERSION index dd7f79455..63e7ee31b 100644 --- a/LATEST_VERSION +++ b/LATEST_VERSION @@ -1 +1 @@ -2009.12.26 +2010.01.05 diff --git a/youtube-dl b/youtube-dl index 69b1a9a37..07ef5dcfc 100755 --- a/youtube-dl +++ b/youtube-dl @@ -1212,7 +1212,7 @@ if __name__ == '__main__': # Parse command line parser = optparse.OptionParser( usage='Usage: %prog [options] url...', - version='2009.12.26', + version='2010.01.05', conflict_handler='resolve', ) From a04e80a481cf21b4d9ff02d7fee615ef2af83941 Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Wed, 6 Jan 2010 10:49:38 +0100 Subject: [PATCH 159/455] Add flexibility importing the "parse_qs" function (fixes issue #81) --- youtube-dl | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/youtube-dl b/youtube-dl index 07ef5dcfc..bdc33b57f 100755 --- a/youtube-dl +++ b/youtube-dl @@ -18,7 +18,12 @@ import sys import time import urllib import urllib2 -import urlparse + +# parse_qs was moved from the cgi module to the urlparse module recently. +try: + from urlparse import parse_qs +except ImportError: + from cgi import parse_qs std_headers = { 'User-Agent': 'Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US; rv:1.9.1.2) Gecko/20090729 Firefox/3.5.2', @@ -729,7 +734,7 @@ class YoutubeIE(InfoExtractor): try: self.report_video_info_webpage_download(video_id) video_info_webpage = urllib2.urlopen(request).read() - video_info = urlparse.parse_qs(video_info_webpage) + video_info = parse_qs(video_info_webpage) except (urllib2.URLError, httplib.HTTPException, socket.error), err: self._downloader.trouble(u'ERROR: unable to download video info webpage: %s' % str(err)) return From f1b4bee09d9fbaf5d25c4307f285b7ab4a00461a Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Wed, 6 Jan 2010 10:51:38 +0100 Subject: [PATCH 160/455] Bump version number --- LATEST_VERSION | 2 +- youtube-dl | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/LATEST_VERSION b/LATEST_VERSION index 63e7ee31b..382e966d8 100644 --- a/LATEST_VERSION +++ b/LATEST_VERSION @@ -1 +1 @@ -2010.01.05 +2010.01.06 diff --git a/youtube-dl b/youtube-dl index bdc33b57f..640ed4bdc 100755 --- a/youtube-dl +++ b/youtube-dl @@ -1217,7 +1217,7 @@ if __name__ == '__main__': # Parse command line parser = optparse.OptionParser( usage='Usage: %prog [options] url...', - version='2010.01.05', + version='2010.01.06', conflict_handler='resolve', ) From 49c0028a7a1cb3c2f37770e4749a48ef0146fdfe Mon Sep 17 00:00:00 2001 From: "obeythepenguin@gmail.com" Date: Fri, 15 Jan 2010 16:26:41 -0500 Subject: [PATCH 161/455] patched to add Google Video and Photobucket support --- LATEST_VERSION | 2 +- youtube-dl | 158 +++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 159 insertions(+), 1 deletion(-) diff --git a/LATEST_VERSION b/LATEST_VERSION index 382e966d8..7ed3afa00 100644 --- a/LATEST_VERSION +++ b/LATEST_VERSION @@ -1 +1 @@ -2010.01.06 +2010.01.15 diff --git a/youtube-dl b/youtube-dl index 640ed4bdc..732ebce1d 100755 --- a/youtube-dl +++ b/youtube-dl @@ -2,6 +2,7 @@ # -*- coding: utf-8 -*- # Author: Ricardo Garcia Gonzalez # Author: Danny Colligan +# Author: Benjamin Johnson # License: Public domain code import htmlentitydefs import httplib @@ -936,6 +937,159 @@ class MetacafeIE(InfoExtractor): self._downloader.trouble(u'ERROR: format not available for video') +class GoogleIE(InfoExtractor): + """Information extractor for video.google.com.""" + + _VALID_URL = r'(?:http://)?video\.google\.com/videoplay\?docid=([^\&]+).*' + + def __init__(self, downloader=None): + InfoExtractor.__init__(self, downloader) + + @staticmethod + def suitable(url): + return (re.match(GoogleIE._VALID_URL, url) is not None) + + def report_download_webpage(self, video_id): + """Report webpage download.""" + self._downloader.to_stdout(u'[video.google] %s: Downloading webpage' % video_id) + + def report_extraction(self, video_id): + """Report information extraction.""" + self._downloader.to_stdout(u'[video.google] %s: Extracting information' % video_id) + + def _real_initialize(self): + return + + def _real_extract(self, url): + # Extract id from URL + mobj = re.match(self._VALID_URL, url) + if mobj is None: + self._downloader.trouble(u'ERROR: Invalid URL: %s' % url) + return + + video_id = mobj.group(1) + + video_extension = 'mp4' + + # Retrieve video webpage to extract further information + request = urllib2.Request('http://video.google.com/videoplay?docid=%s' % video_id) + try: + self.report_download_webpage(video_id) + webpage = urllib2.urlopen(request).read() + except (urllib2.URLError, httplib.HTTPException, socket.error), err: + self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err)) + return + + # Extract URL, uploader, and title from webpage + self.report_extraction(video_id) + mobj = re.search(r"download_url:'(.*)'", webpage) + if mobj is None: + self._downloader.trouble(u'ERROR: unable to extract media URL') + return + mediaURL = urllib.unquote(mobj.group(1)) + mediaURL = mediaURL.replace('\\x3d', '\x3d') + mediaURL = mediaURL.replace('\\x26', '\x26') + + video_url = mediaURL + + mobj = re.search(r'(.*)', webpage) + if mobj is None: + self._downloader.trouble(u'ERROR: unable to extract title') + return + video_title = mobj.group(1).decode('utf-8') + + # Google Video doesn't show uploader nicknames? + video_uploader = 'uploader' + + try: + # Process video information + self._downloader.process_info({ + 'id': video_id.decode('utf-8'), + 'url': video_url.decode('utf-8'), + 'uploader': video_uploader.decode('utf-8'), + 'title': video_title.decode('utf-8'), + 'stitle': video_title.decode('utf-8'), + 'ext': video_extension.decode('utf-8'), + }) + except UnavailableFormatError: + self._downloader.trouble(u'ERROR: format not available for video') + + +class PhotobucketIE(InfoExtractor): + """Information extractor for photobucket.com.""" + + _VALID_URL = r'(?:http://)?(?:[a-z0-9]+\.)?photobucket\.com/.*[\?\&]current=(.*\.flv)' + + def __init__(self, downloader=None): + InfoExtractor.__init__(self, downloader) + + @staticmethod + def suitable(url): + return (re.match(PhotobucketIE._VALID_URL, url) is not None) + + def report_download_webpage(self, video_id): + """Report webpage download.""" + self._downloader.to_stdout(u'[photobucket] %s: Downloading webpage' % video_id) + + def report_extraction(self, video_id): + """Report information extraction.""" + self._downloader.to_stdout(u'[photobucket] %s: Extracting information' % video_id) + + def _real_initialize(self): + return + + def _real_extract(self, url): + # Extract id from URL + mobj = re.match(self._VALID_URL, url) + if mobj is None: + self._downloader.trouble(u'ERROR: Invalid URL: %s' % url) + return + + video_id = mobj.group(1) + + video_extension = 'flv' + + # Retrieve video webpage to extract further information + request = urllib2.Request(url) + try: + self.report_download_webpage(video_id) + webpage = urllib2.urlopen(request).read() + except (urllib2.URLError, httplib.HTTPException, socket.error), err: + self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err)) + return + + # Extract URL, uploader, and title from webpage + self.report_extraction(video_id) + mobj = re.search(r'', webpage) + if mobj is None: + self._downloader.trouble(u'ERROR: unable to extract media URL') + return + mediaURL = urllib.unquote(mobj.group(1)) + + video_url = mediaURL + + mobj = re.search(r'(.*) video by (.*) - Photobucket', webpage) + if mobj is None: + self._downloader.trouble(u'ERROR: unable to extract title') + return + video_title = mobj.group(1).decode('utf-8') + + video_uploader = mobj.group(2).decode('utf-8') + + try: + # Process video information + self._downloader.process_info({ + 'id': video_id.decode('utf-8'), + 'url': video_url.decode('utf-8'), + 'uploader': video_uploader.decode('utf-8'), + 'title': video_title.decode('utf-8'), + 'stitle': video_title.decode('utf-8'), + 'ext': video_extension.decode('utf-8'), + }) + except UnavailableFormatError: + self._downloader.trouble(u'ERROR: format not available for video') + + class YoutubeSearchIE(InfoExtractor): """Information Extractor for YouTube search queries.""" _VALID_QUERY = r'ytsearch(\d+|all)?:[\s\S]+' @@ -1314,6 +1468,8 @@ if __name__ == '__main__': youtube_pl_ie = YoutubePlaylistIE(youtube_ie) youtube_user_ie = YoutubeUserIE(youtube_ie) youtube_search_ie = YoutubeSearchIE(youtube_ie) + google_ie = GoogleIE() + photobucket_ie = PhotobucketIE() # File downloader fd = FileDownloader({ @@ -1339,6 +1495,8 @@ if __name__ == '__main__': fd.add_info_extractor(youtube_user_ie) fd.add_info_extractor(metacafe_ie) fd.add_info_extractor(youtube_ie) + fd.add_info_extractor(google_ie) + fd.add_info_extractor(photobucket_ie) # Update version if opts.update_self: From 60f8049d05ff18d3cedaa80bcb9b089a0d4b3e4c Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Sat, 9 Jan 2010 23:53:47 +0100 Subject: [PATCH 162/455] Only verify the URL when it's an HTTP download --- youtube-dl | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/youtube-dl b/youtube-dl index 732ebce1d..fac624e06 100755 --- a/youtube-dl +++ b/youtube-dl @@ -316,10 +316,12 @@ class FileDownloader(object): """Process a single dictionary returned by an InfoExtractor.""" # Do nothing else if in simulate mode if self.params.get('simulate', False): - try: - info_dict['url'] = self.verify_url(info_dict['url'].encode('utf-8')).decode('utf-8') - except (OSError, IOError, urllib2.URLError, httplib.HTTPException, socket.error), err: - raise UnavailableFormatError + # Verify URL if it's an HTTP one + if info_dict['url'].startswith('http'): + try: + info_dict['url'] = self.verify_url(info_dict['url'].encode('utf-8')).decode('utf-8') + except (OSError, IOError, urllib2.URLError, httplib.HTTPException, socket.error), err: + raise UnavailableFormatError # Forced printings if self.params.get('forcetitle', False): From 1c1821f8eb1b6c8f87e8038441343d887c8ef21f Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Tue, 19 Jan 2010 20:04:56 +0100 Subject: [PATCH 163/455] Improve rtmpdump support --- youtube-dl | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/youtube-dl b/youtube-dl index fac624e06..6b56f3f98 100755 --- a/youtube-dl +++ b/youtube-dl @@ -413,11 +413,12 @@ class FileDownloader(object): # Download using rtmpdump. rtmpdump returns exit code 2 when # the connection was interrumpted and resuming appears to be # possible. This is part of rtmpdump's normal usage, AFAIK. - retval = subprocess.call(['rtmpdump', '-q', '-r', url, '-o', filename] + [[], ['-e']][self.params.get('continuedl', False)]) - while retval == 2: + basic_args = ['rtmpdump', '-q', '-r', url, '-o', filename] + retval = subprocess.call(basic_args + [[], ['-e', '-k', '1']][self.params.get('continuedl', False)]) + while retval == 2 or retval == 1: self.to_stdout(u'\r[rtmpdump] %s bytes' % os.path.getsize(filename), skip_eol=True) time.sleep(2.0) # This seems to be needed - retval = subprocess.call(['rtmpdump', '-q', '-e', '-r', url, '-o', filename]) + retval = subprocess.call(basic_args + ['-e'] + [[], ['-k', '1']][retval == 1]) if retval == 0: self.to_stdout(u'\r[rtmpdump] %s bytes' % os.path.getsize(filename)) return True From 91bce611c7634740a12987268a032c49fab2387d Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Tue, 19 Jan 2010 20:26:44 +0100 Subject: [PATCH 164/455] Bump version number --- LATEST_VERSION | 2 +- youtube-dl | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/LATEST_VERSION b/LATEST_VERSION index 7ed3afa00..0f292c9a2 100644 --- a/LATEST_VERSION +++ b/LATEST_VERSION @@ -1 +1 @@ -2010.01.15 +2010.01.19 diff --git a/youtube-dl b/youtube-dl index 6b56f3f98..9e2862123 100755 --- a/youtube-dl +++ b/youtube-dl @@ -1374,7 +1374,7 @@ if __name__ == '__main__': # Parse command line parser = optparse.OptionParser( usage='Usage: %prog [options] url...', - version='2010.01.06', + version='2010.01.19', conflict_handler='resolve', ) From c05fc6a3451cf7617940687b1a5a0003b1d1fb13 Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Sun, 31 Jan 2010 09:25:52 +0100 Subject: [PATCH 165/455] Support simplest new URLs in YouTube --- youtube-dl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube-dl b/youtube-dl index 9e2862123..ff8ad7177 100755 --- a/youtube-dl +++ b/youtube-dl @@ -571,7 +571,7 @@ class InfoExtractor(object): class YoutubeIE(InfoExtractor): """Information extractor for youtube.com.""" - _VALID_URL = r'^((?:http://)?(?:\w+\.)?youtube\.com/(?:(?:v/)|(?:(?:watch(?:\.php)?)?\?(?:.+&)?v=)))?([0-9A-Za-z_-]+)(?(1).+)?$' + _VALID_URL = r'^((?:http://)?(?:\w+\.)?youtube\.com/(?:(?:v/)|(?:(?:watch(?:\.php)?)?[\?#](?:.+&)?v=)))?([0-9A-Za-z_-]+)(?(1).+)?$' _LANG_URL = r'http://uk.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1' _LOGIN_URL = 'http://www.youtube.com/signup?next=/&gl=US&hl=en' _AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en' From 490fd7aea74dd4880765f88ce4a97e01ad2733af Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Fri, 12 Feb 2010 21:01:55 +0100 Subject: [PATCH 166/455] Cherry-pick obeythepenguin's changes and merge them into main branch --- youtube-dl | 197 +++++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 160 insertions(+), 37 deletions(-) diff --git a/youtube-dl b/youtube-dl index ff8ad7177..792acd3ae 100755 --- a/youtube-dl +++ b/youtube-dl @@ -51,6 +51,43 @@ def preferredencoding(): yield pref return yield_preferredencoding().next() +def htmlentity_transform(matchobj): + """Transforms an HTML entity to a Unicode character. + + This function receives a match object and is intended to be used with + the re.sub() function. + """ + entity = matchobj.group(1) + + # Known non-numeric HTML entity + if entity in htmlentitydefs.name2codepoint: + return unichr(htmlentitydefs.name2codepoint[entity]) + + # Unicode character + mobj = re.match(ur'(?u)#(x?\d+)', entity) + if mobj is not None: + numstr = mobj.group(1) + if numstr.startswith(u'x'): + base = 16 + numstr = u'0%s' % numstr + else: + base = 10 + return unichr(long(numstr, base)) + + # Unknown entity in name, return its literal representation + return (u'&%s;' % entity) + +def sanitize_title(utitle): + """Sanitizes a video title so it could be used as part of a filename. + + This triggers different transformations based on the platform we + are running. + """ + utitle = re.sub(ur'(?u)&(.+?);', htmlentity_transform, utitle) + if sys.platform == 'win32': + return re.replace(ur'<>:"\|\?\*', u'-', title) + return utitle.replace(unicode(os.sep), u'%') + class DownloadError(Exception): """Download Error exception. @@ -325,9 +362,9 @@ class FileDownloader(object): # Forced printings if self.params.get('forcetitle', False): - print info_dict['title'].encode(preferredencoding()) + print info_dict['title'].encode(preferredencoding(), 'xmlcharrefreplace') if self.params.get('forceurl', False): - print info_dict['url'].encode(preferredencoding()) + print info_dict['url'].encode(preferredencoding(), 'xmlcharrefreplace') return @@ -589,29 +626,6 @@ class YoutubeIE(InfoExtractor): def suitable(url): return (re.match(YoutubeIE._VALID_URL, url) is not None) - @staticmethod - def htmlentity_transform(matchobj): - """Transforms an HTML entity to a Unicode character.""" - entity = matchobj.group(1) - - # Known non-numeric HTML entity - if entity in htmlentitydefs.name2codepoint: - return unichr(htmlentitydefs.name2codepoint[entity]) - - # Unicode character - mobj = re.match(ur'(?u)#(x?\d+)', entity) - if mobj is not None: - numstr = mobj.group(1) - if numstr.startswith(u'x'): - base = 16 - numstr = u'0%s' % numstr - else: - base = 10 - return unichr(long(numstr, base)) - - # Unknown entity in name, return its literal representation - return (u'&%s;' % entity) - def report_lang(self): """Report attempt to set language.""" self._downloader.to_stdout(u'[youtube] Setting language') @@ -778,8 +792,7 @@ class YoutubeIE(InfoExtractor): return video_title = urllib.unquote_plus(video_info['title'][0]) video_title = video_title.decode('utf-8') - video_title = re.sub(ur'(?u)&(.+?);', self.htmlentity_transform, video_title) - video_title = video_title.replace(os.sep, u'%') + video_title = sanitize_title(video_title) # simplified title simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title) @@ -919,6 +932,7 @@ class MetacafeIE(InfoExtractor): self._downloader.trouble(u'ERROR: unable to extract title') return video_title = mobj.group(1).decode('utf-8') + video_title = sanitize_title(video_title) mobj = re.search(r'(?ms)By:\s*(.+?)<', webpage) if mobj is None: @@ -943,7 +957,7 @@ class MetacafeIE(InfoExtractor): class GoogleIE(InfoExtractor): """Information extractor for video.google.com.""" - _VALID_URL = r'(?:http://)?video\.google\.com/videoplay\?docid=([^\&]+).*' + _VALID_URL = r'(?:http://)?video\.google\.(?:com(?:\.au)?|co\.(?:uk|jp|kr|cr)|ca|de|es|fr|it|nl|pl)/videoplay\?docid=([^\&]+).*' def __init__(self, downloader=None): InfoExtractor.__init__(self, downloader) @@ -975,7 +989,7 @@ class GoogleIE(InfoExtractor): video_extension = 'mp4' # Retrieve video webpage to extract further information - request = urllib2.Request('http://video.google.com/videoplay?docid=%s' % video_id) + request = urllib2.Request('http://video.google.com/videoplay?docid=%s&hl=en&oe=utf-8' % video_id) try: self.report_download_webpage(video_id) webpage = urllib2.urlopen(request).read() @@ -985,7 +999,10 @@ class GoogleIE(InfoExtractor): # Extract URL, uploader, and title from webpage self.report_extraction(video_id) - mobj = re.search(r"download_url:'(.*)'", webpage) + mobj = re.search(r"download_url:'([^']+)'", webpage) + if mobj is None: + video_extension = 'flv' + mobj = re.search(r"(?i)videoUrl\\x3d(.+?)\\x26", webpage) if mobj is None: self._downloader.trouble(u'ERROR: unable to extract media URL') return @@ -1000,9 +1017,10 @@ class GoogleIE(InfoExtractor): self._downloader.trouble(u'ERROR: unable to extract title') return video_title = mobj.group(1).decode('utf-8') + video_title = sanitize_title(video_title) # Google Video doesn't show uploader nicknames? - video_uploader = 'uploader' + video_uploader = 'NA' try: # Process video information @@ -1010,8 +1028,8 @@ class GoogleIE(InfoExtractor): 'id': video_id.decode('utf-8'), 'url': video_url.decode('utf-8'), 'uploader': video_uploader.decode('utf-8'), - 'title': video_title.decode('utf-8'), - 'stitle': video_title.decode('utf-8'), + 'title': video_title, + 'stitle': video_title, 'ext': video_extension.decode('utf-8'), }) except UnavailableFormatError: @@ -1076,6 +1094,7 @@ class PhotobucketIE(InfoExtractor): self._downloader.trouble(u'ERROR: unable to extract title') return video_title = mobj.group(1).decode('utf-8') + video_title = sanitize_title(video_title) video_uploader = mobj.group(2).decode('utf-8') @@ -1084,9 +1103,102 @@ class PhotobucketIE(InfoExtractor): self._downloader.process_info({ 'id': video_id.decode('utf-8'), 'url': video_url.decode('utf-8'), - 'uploader': video_uploader.decode('utf-8'), - 'title': video_title.decode('utf-8'), - 'stitle': video_title.decode('utf-8'), + 'uploader': video_uploader, + 'title': video_title, + 'stitle': video_title, + 'ext': video_extension.decode('utf-8'), + }) + except UnavailableFormatError: + self._downloader.trouble(u'ERROR: format not available for video') + + +class GenericIE(InfoExtractor): + """Generic last-resort information extractor.""" + + def __init__(self, downloader=None): + InfoExtractor.__init__(self, downloader) + + @staticmethod + def suitable(url): + return True + + def report_download_webpage(self, video_id): + """Report webpage download.""" + self._downloader.to_stdout(u'WARNING: Falling back on generic information extractor.') + self._downloader.to_stdout(u'[generic] %s: Downloading webpage' % video_id) + + def report_extraction(self, video_id): + """Report information extraction.""" + self._downloader.to_stdout(u'[generic] %s: Extracting information' % video_id) + + def _real_initialize(self): + return + + def _real_extract(self, url): + video_id = url.split('/')[-1] + request = urllib2.Request(url) + try: + self.report_download_webpage(video_id) + webpage = urllib2.urlopen(request).read() + except (urllib2.URLError, httplib.HTTPException, socket.error), err: + self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err)) + return + except ValueError, err: + # since this is the last-resort InfoExtractor, if + # this error is thrown, it'll be thrown here + self._downloader.trouble(u'ERROR: Invalid URL: %s' % url) + return + + # Start with something easy: JW Player in SWFObject + mobj = re.search(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage) + if mobj is None: + # Broaden the search a little bit + mobj = re.search(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage) + if mobj is None: + self._downloader.trouble(u'ERROR: Invalid URL: %s' % url) + return + + # It's possible that one of the regexes + # matched, but returned an empty group: + if mobj.group(1) is None: + self._downloader.trouble(u'ERROR: Invalid URL: %s' % url) + return + + video_url = urllib.unquote(mobj.group(1)) + video_id = os.path.basename(video_url) + + # here's a fun little line of code for you: + video_extension = os.path.splitext(video_id)[1][1:] + video_id = os.path.splitext(video_id)[0] + + # it's tempting to parse this further, but you would + # have to take into account all the variations like + # Video Title - Site Name + # Site Name | Video Title + # Video Title - Tagline | Site Name + # and so on and so forth; it's just not practical + mobj = re.search(r'(.*)', webpage) + if mobj is None: + self._downloader.trouble(u'ERROR: unable to extract title') + return + video_title = mobj.group(1).decode('utf-8') + video_title = sanitize_title(video_title) + + # video uploader is domain name + mobj = re.match(r'(?:https?://)?([^/]*)/.*', url) + if mobj is None: + self._downloader.trouble(u'ERROR: unable to extract title') + return + video_uploader = mobj.group(1).decode('utf-8') + + try: + # Process video information + self._downloader.process_info({ + 'id': video_id.decode('utf-8'), + 'url': video_url.decode('utf-8'), + 'uploader': video_uploader, + 'title': video_title, + 'stitle': video_title, 'ext': video_extension.decode('utf-8'), }) except UnavailableFormatError: @@ -1112,6 +1224,7 @@ class YoutubeSearchIE(InfoExtractor): def report_download_page(self, query, pagenum): """Report attempt to download playlist page with given number.""" + query = query.decode(preferredencoding()) self._downloader.to_stdout(u'[youtube] query "%s": Downloading page %s' % (query, pagenum)) def _real_initialize(self): @@ -1125,6 +1238,7 @@ class YoutubeSearchIE(InfoExtractor): prefix, query = query.split(':') prefix = prefix[8:] + query = query.encode('utf-8') if prefix == '': self._download_n_results(query, 1) return @@ -1374,7 +1488,7 @@ if __name__ == '__main__': # Parse command line parser = optparse.OptionParser( usage='Usage: %prog [options] url...', - version='2010.01.19', + version='INTERNAL', conflict_handler='resolve', ) @@ -1448,6 +1562,10 @@ if __name__ == '__main__': sys.exit(u'ERROR: batch file could not be read') all_urls = batchurls + args + # Make sure all URLs are in our preferred encoding + for i in range(0, len(all_urls)): + all_urls[i] = unicode(all_urls[i], preferredencoding()) + # Conflicting, missing and erroneous options if opts.usenetrc and (opts.username is not None or opts.password is not None): parser.error(u'using .netrc conflicts with giving username/password') @@ -1473,6 +1591,7 @@ if __name__ == '__main__': youtube_search_ie = YoutubeSearchIE(youtube_ie) google_ie = GoogleIE() photobucket_ie = PhotobucketIE() + generic_ie = GenericIE() # File downloader fd = FileDownloader({ @@ -1501,6 +1620,10 @@ if __name__ == '__main__': fd.add_info_extractor(google_ie) fd.add_info_extractor(photobucket_ie) + # This must come last since it's the + # fallback if none of the others work + fd.add_info_extractor(generic_ie) + # Update version if opts.update_self: update_self(fd, sys.argv[0]) From 4cfeb46544e274fb7a855bfe017b44186021872d Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Fri, 12 Feb 2010 21:07:21 +0100 Subject: [PATCH 167/455] Update user-agent string --- youtube-dl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube-dl b/youtube-dl index 792acd3ae..2e9ac6265 100755 --- a/youtube-dl +++ b/youtube-dl @@ -27,7 +27,7 @@ except ImportError: from cgi import parse_qs std_headers = { - 'User-Agent': 'Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US; rv:1.9.1.2) Gecko/20090729 Firefox/3.5.2', + 'User-Agent': 'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.2) Gecko/20100115 Firefox/3.6' 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7', 'Accept': 'text/xml,application/xml,application/xhtml+xml,text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5', 'Accept-Language': 'en-us,en;q=0.5', From ce9c6a30977f06338f323c0c8346782da323f24b Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Sat, 13 Feb 2010 00:19:46 +0100 Subject: [PATCH 168/455] Fix problem with sanitize_title not replacing Windows directory separator --- youtube-dl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube-dl b/youtube-dl index 2e9ac6265..19159bb85 100755 --- a/youtube-dl +++ b/youtube-dl @@ -85,7 +85,7 @@ def sanitize_title(utitle): """ utitle = re.sub(ur'(?u)&(.+?);', htmlentity_transform, utitle) if sys.platform == 'win32': - return re.replace(ur'<>:"\|\?\*', u'-', title) + utitle = re.replace(ur'<>:"\|\?\*\\', u'-', utitle) return utitle.replace(unicode(os.sep), u'%') class DownloadError(Exception): From c201ebc915355b0082816844498fa56d6d3e2789 Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Sat, 13 Feb 2010 13:27:41 +0100 Subject: [PATCH 169/455] Fix SyntaxError triggered by mistake in user-agent commit --- youtube-dl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube-dl b/youtube-dl index 19159bb85..3533c0f55 100755 --- a/youtube-dl +++ b/youtube-dl @@ -27,7 +27,7 @@ except ImportError: from cgi import parse_qs std_headers = { - 'User-Agent': 'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.2) Gecko/20100115 Firefox/3.6' + 'User-Agent': 'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.2) Gecko/20100115 Firefox/3.6', 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7', 'Accept': 'text/xml,application/xml,application/xhtml+xml,text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5', 'Accept-Language': 'en-us,en;q=0.5', From 31bcb4800152ec3d7a3efb9e59018df989b11153 Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Sat, 13 Feb 2010 13:29:25 +0100 Subject: [PATCH 170/455] Tweak final filename in the open attempt, to be platform and filename-agnostic --- youtube-dl | 32 ++++++++++++++++++++++++-------- 1 file changed, 24 insertions(+), 8 deletions(-) diff --git a/youtube-dl b/youtube-dl index 3533c0f55..c7d3752dc 100755 --- a/youtube-dl +++ b/youtube-dl @@ -78,16 +78,32 @@ def htmlentity_transform(matchobj): return (u'&%s;' % entity) def sanitize_title(utitle): - """Sanitizes a video title so it could be used as part of a filename. - - This triggers different transformations based on the platform we - are running. - """ + """Sanitizes a video title so it could be used as part of a filename.""" utitle = re.sub(ur'(?u)&(.+?);', htmlentity_transform, utitle) - if sys.platform == 'win32': - utitle = re.replace(ur'<>:"\|\?\*\\', u'-', utitle) return utitle.replace(unicode(os.sep), u'%') +def sanitize_open(filename, open_mode): + """Try to open the given filename, and slightly tweak it if this fails. + + Attempts to open the given filename. If this fails, it tries to change + the filename slightly, step by step, until it's either able to open it + or it fails and raises a final exception, like the standard open() + function. + + It returns the tuple (stream, definitive_file_name). + """ + try: + stream = open(filename, open_mode) + return (stream, filename) + except (IOError, OSError), err: + # In case of error, try to remove win32 forbidden chars + filename = re.sub(ur'[<>:"\|\?\*]', u'#', filename) + + # An exception here should be caught in the caller + stream = open(filename, open_mode) + return (stream, filename) + + class DownloadError(Exception): """Download Error exception. @@ -522,7 +538,7 @@ class FileDownloader(object): # Open file just in time if stream is None: try: - stream = open(filename, open_mode) + (stream, filename) = sanitize_open(filename, open_mode) self.report_destination(filename) except (OSError, IOError), err: self.trouble('ERROR: unable to open for writing: %s' % str(err)) From 8cc468de759d5fb729edb0f150fce91159b75e5d Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Sat, 13 Feb 2010 17:27:09 +0100 Subject: [PATCH 171/455] Bump version number --- LATEST_VERSION | 2 +- youtube-dl | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/LATEST_VERSION b/LATEST_VERSION index 0f292c9a2..1039be31f 100644 --- a/LATEST_VERSION +++ b/LATEST_VERSION @@ -1 +1 @@ -2010.01.19 +2010.02.13 diff --git a/youtube-dl b/youtube-dl index c7d3752dc..47c646586 100755 --- a/youtube-dl +++ b/youtube-dl @@ -1504,7 +1504,7 @@ if __name__ == '__main__': # Parse command line parser = optparse.OptionParser( usage='Usage: %prog [options] url...', - version='INTERNAL', + version='2010.02.13', conflict_handler='resolve', ) From bd3cdf6dc42d48d010fad03d8d601b3920c7a8a7 Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Sun, 21 Feb 2010 00:06:00 +0100 Subject: [PATCH 172/455] Do not pass URLs around in Unicode form (fixes issue #92) --- youtube-dl | 4 ---- 1 file changed, 4 deletions(-) diff --git a/youtube-dl b/youtube-dl index 47c646586..fe8a59160 100755 --- a/youtube-dl +++ b/youtube-dl @@ -1578,10 +1578,6 @@ if __name__ == '__main__': sys.exit(u'ERROR: batch file could not be read') all_urls = batchurls + args - # Make sure all URLs are in our preferred encoding - for i in range(0, len(all_urls)): - all_urls[i] = unicode(all_urls[i], preferredencoding()) - # Conflicting, missing and erroneous options if opts.usenetrc and (opts.username is not None or opts.password is not None): parser.error(u'using .netrc conflicts with giving username/password') From 31cbdaafd4bf941b234c0e52413b1354f2f29e94 Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Sun, 21 Feb 2010 00:13:34 +0100 Subject: [PATCH 173/455] Properly support simple titles in the newest InfoExtractors --- youtube-dl | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/youtube-dl b/youtube-dl index fe8a59160..9e15a6c69 100755 --- a/youtube-dl +++ b/youtube-dl @@ -1034,6 +1034,7 @@ class GoogleIE(InfoExtractor): return video_title = mobj.group(1).decode('utf-8') video_title = sanitize_title(video_title) + simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title) # Google Video doesn't show uploader nicknames? video_uploader = 'NA' @@ -1045,7 +1046,7 @@ class GoogleIE(InfoExtractor): 'url': video_url.decode('utf-8'), 'uploader': video_uploader.decode('utf-8'), 'title': video_title, - 'stitle': video_title, + 'stitle': simple_title, 'ext': video_extension.decode('utf-8'), }) except UnavailableFormatError: @@ -1111,6 +1112,7 @@ class PhotobucketIE(InfoExtractor): return video_title = mobj.group(1).decode('utf-8') video_title = sanitize_title(video_title) + simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title) video_uploader = mobj.group(2).decode('utf-8') @@ -1121,7 +1123,7 @@ class PhotobucketIE(InfoExtractor): 'url': video_url.decode('utf-8'), 'uploader': video_uploader, 'title': video_title, - 'stitle': video_title, + 'stitle': simple_title, 'ext': video_extension.decode('utf-8'), }) except UnavailableFormatError: @@ -1199,6 +1201,7 @@ class GenericIE(InfoExtractor): return video_title = mobj.group(1).decode('utf-8') video_title = sanitize_title(video_title) + simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title) # video uploader is domain name mobj = re.match(r'(?:https?://)?([^/]*)/.*', url) @@ -1214,7 +1217,7 @@ class GenericIE(InfoExtractor): 'url': video_url.decode('utf-8'), 'uploader': video_uploader, 'title': video_title, - 'stitle': video_title, + 'stitle': simple_title, 'ext': video_extension.decode('utf-8'), }) except UnavailableFormatError: From 43ab0ca43288f084df1306ffe0acd18c3c97f8b3 Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Sun, 28 Feb 2010 23:49:14 +0100 Subject: [PATCH 174/455] Do not error out on problems printing the file name --- youtube-dl | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/youtube-dl b/youtube-dl index 9e15a6c69..485484c92 100755 --- a/youtube-dl +++ b/youtube-dl @@ -300,11 +300,15 @@ class FileDownloader(object): self._pps.append(pp) pp.set_downloader(self) - def to_stdout(self, message, skip_eol=False): + def to_stdout(self, message, skip_eol=False, ignore_encoding_errors=False): """Print message to stdout if not in quiet mode.""" - if not self.params.get('quiet', False): - print (u'%s%s' % (message, [u'\n', u''][skip_eol])).encode(preferredencoding()), + try: + if not self.params.get('quiet', False): + print (u'%s%s' % (message, [u'\n', u''][skip_eol])).encode(preferredencoding()), sys.stdout.flush() + except (UnicodeEncodeError), err: + if not ignore_encoding_errors: + raise def to_stderr(self, message): """Print message to stderr.""" @@ -342,7 +346,7 @@ class FileDownloader(object): def report_destination(self, filename): """Report destination filename.""" - self.to_stdout(u'[download] Destination: %s' % filename) + self.to_stdout(u'[download] Destination: %s' % filename, ignore_encoding_errors=True) def report_progress(self, percent_str, data_len_str, speed_str, eta_str): """Report download progress.""" @@ -355,7 +359,10 @@ class FileDownloader(object): def report_file_already_downloaded(self, file_name): """Report file has already been fully downloaded.""" - self.to_stdout(u'[download] %s has already been downloaded' % file_name) + try: + self.to_stdout(u'[download] %s has already been downloaded' % file_name) + except (UnicodeEncodeError), err: + self.to_stdout(u'[download] The file has already been downloaded') def report_unable_to_resume(self): """Report it was impossible to resume download.""" From 1392f3f52cffec6054862c9031cc54e377131014 Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Sun, 28 Feb 2010 23:49:52 +0100 Subject: [PATCH 175/455] Give preference to format 34 before format 5 in quality list --- youtube-dl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube-dl b/youtube-dl index 485484c92..10dd7c791 100755 --- a/youtube-dl +++ b/youtube-dl @@ -636,7 +636,7 @@ class YoutubeIE(InfoExtractor): _LOGIN_URL = 'http://www.youtube.com/signup?next=/&gl=US&hl=en' _AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en' _NETRC_MACHINE = 'youtube' - _available_formats = ['37', '22', '35', '18', '5', '17', '13', None] # listed in order of priority for -b flag + _available_formats = ['37', '22', '35', '18', '34', '5', '17', '13', None] # listed in order of priority for -b flag _video_extensions = { '13': '3gp', '17': 'mp4', From d98352478131b2fc6d0d4dcdd3a1fbad4efa7ba1 Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Sun, 7 Mar 2010 11:24:22 +0100 Subject: [PATCH 176/455] Add --no-progress option (fixes issue #98) --- youtube-dl | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/youtube-dl b/youtube-dl index 10dd7c791..c00810f35 100755 --- a/youtube-dl +++ b/youtube-dl @@ -192,6 +192,7 @@ class FileDownloader(object): ratelimit: Download speed limit, in bytes/sec. nooverwrites: Prevent overwriting files. continuedl: Try to continue downloads if possible. + noprogress: Do not print the progress bar. """ params = None @@ -350,6 +351,8 @@ class FileDownloader(object): def report_progress(self, percent_str, data_len_str, speed_str, eta_str): """Report download progress.""" + if self.params.get('noprogress', False): + return self.to_stdout(u'\r[download] %s of %s at %s ETA %s' % (percent_str, data_len_str, speed_str, eta_str), skip_eol=True) @@ -370,7 +373,10 @@ class FileDownloader(object): def report_finish(self): """Report download finished.""" - self.to_stdout(u'') + if self.params.get('noprogress', False): + self.to_stdout(u'[download] Download completed') + else: + self.to_stdout(u'') def process_info(self, info_dict): """Process a single dictionary returned by an InfoExtractor.""" @@ -1558,6 +1564,8 @@ if __name__ == '__main__': action='store_true', dest='geturl', help='simulate, quiet but print URL', default=False) verbosity.add_option('-e', '--get-title', action='store_true', dest='gettitle', help='simulate, quiet but print title', default=False) + verbosity.add_option('--no-progress', + action='store_true', dest='noprogress', help='do not print progress bar', default=False) parser.add_option_group(verbosity) filesystem = optparse.OptionGroup(parser, 'Filesystem Options') @@ -1633,6 +1641,7 @@ if __name__ == '__main__': 'ratelimit': opts.ratelimit, 'nooverwrites': opts.nooverwrites, 'continuedl': opts.continue_dl, + 'noprogress': opts.noprogress, }) fd.add_info_extractor(youtube_search_ie) fd.add_info_extractor(youtube_pl_ie) From 44e16fa17f2b77cdc44801fc1533245068b02da7 Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Sun, 7 Mar 2010 11:25:30 +0100 Subject: [PATCH 177/455] Bump version number --- LATEST_VERSION | 2 +- youtube-dl | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/LATEST_VERSION b/LATEST_VERSION index 1039be31f..69e947131 100644 --- a/LATEST_VERSION +++ b/LATEST_VERSION @@ -1 +1 @@ -2010.02.13 +2010.03.07 diff --git a/youtube-dl b/youtube-dl index c00810f35..97e25f0e2 100755 --- a/youtube-dl +++ b/youtube-dl @@ -1520,7 +1520,7 @@ if __name__ == '__main__': # Parse command line parser = optparse.OptionParser( usage='Usage: %prog [options] url...', - version='2010.02.13', + version='2010.03.07', conflict_handler='resolve', ) From 79f193e5d87f68b345bc34f6b9bca36c58eee68c Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Thu, 11 Mar 2010 22:56:47 +0100 Subject: [PATCH 178/455] Do not use the final URL for -g --- youtube-dl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube-dl b/youtube-dl index 97e25f0e2..4530c5da2 100755 --- a/youtube-dl +++ b/youtube-dl @@ -385,7 +385,7 @@ class FileDownloader(object): # Verify URL if it's an HTTP one if info_dict['url'].startswith('http'): try: - info_dict['url'] = self.verify_url(info_dict['url'].encode('utf-8')).decode('utf-8') + self.verify_url(info_dict['url'].encode('utf-8')).decode('utf-8') except (OSError, IOError, urllib2.URLError, httplib.HTTPException, socket.error), err: raise UnavailableFormatError From 5caacaddc65c7bf756c7c2830043ded3b7685a85 Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Tue, 16 Mar 2010 21:31:53 +0100 Subject: [PATCH 179/455] Bump version number --- LATEST_VERSION | 2 +- youtube-dl | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/LATEST_VERSION b/LATEST_VERSION index 69e947131..92b60dcf3 100644 --- a/LATEST_VERSION +++ b/LATEST_VERSION @@ -1 +1 @@ -2010.03.07 +2010.03.13 diff --git a/youtube-dl b/youtube-dl index 4530c5da2..742fabb49 100755 --- a/youtube-dl +++ b/youtube-dl @@ -1520,7 +1520,7 @@ if __name__ == '__main__': # Parse command line parser = optparse.OptionParser( usage='Usage: %prog [options] url...', - version='2010.03.07', + version='2010.03.13', conflict_handler='resolve', ) From 131bc7651a546d09028cdbb231339b624bf1fbbf Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Fri, 19 Mar 2010 17:51:20 +0100 Subject: [PATCH 180/455] Make the "-" output file name equivalent to /dev/stdout (fixes issue #103) --- youtube-dl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/youtube-dl b/youtube-dl index 742fabb49..b79ab0294 100755 --- a/youtube-dl +++ b/youtube-dl @@ -93,6 +93,8 @@ def sanitize_open(filename, open_mode): It returns the tuple (stream, definitive_file_name). """ try: + if filename == u'-': + return (sys.stdout, filename) stream = open(filename, open_mode) return (stream, filename) except (IOError, OSError), err: From 6ba562b0e46f266fa674eb56d86a3b66f4a007d2 Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Fri, 19 Mar 2010 18:15:43 +0100 Subject: [PATCH 181/455] Added --all-format option from tweaked patch (fixes issue #102) --- youtube-dl | 38 ++++++++++++++++++++++++++++++++------ 1 file changed, 32 insertions(+), 6 deletions(-) diff --git a/youtube-dl b/youtube-dl index b79ab0294..407a393a7 100755 --- a/youtube-dl +++ b/youtube-dl @@ -593,6 +593,7 @@ class InfoExtractor(object): title: Literal title. stitle: Simplified title. ext: Video filename extension. + format: Video format. Subclasses of this one should re-define the _real_initialize() and _real_extract() methods, as well as the suitable() static method. @@ -764,6 +765,7 @@ class YoutubeIE(InfoExtractor): # Downloader parameters best_quality = False + all_formats = False format_param = None quality_index = 0 if self._downloader is not None: @@ -772,6 +774,9 @@ class YoutubeIE(InfoExtractor): if format_param == '0': format_param = self._available_formats[quality_index] best_quality = True + elif format_param == '-1': + format_param = self._available_formats[quality_index] + all_formats = True while True: # Extension @@ -838,20 +843,35 @@ class YoutubeIE(InfoExtractor): 'title': video_title, 'stitle': simple_title, 'ext': video_extension.decode('utf-8'), + 'format': (format_param is None and u'NA' or format_param.decode('utf-8')), }) + if all_formats: + if quality_index == len(self._available_formats) - 1: + # None left to get + return + else: + quality_index += 1 + format_param = self._available_formats[quality_index] + if format_param == None: + return + continue + return except UnavailableFormatError, err: - if best_quality: + if best_quality or all_formats: if quality_index == len(self._available_formats) - 1: # I don't ever expect this to happen - self._downloader.trouble(u'ERROR: no known formats available for video') + if not all_formats: + self._downloader.trouble(u'ERROR: no known formats available for video') return else: self.report_unavailable_format(video_id, format_param) quality_index += 1 format_param = self._available_formats[quality_index] + if format_param == None: + return continue else: self._downloader.trouble('ERROR: format not available for video') @@ -980,6 +1000,7 @@ class MetacafeIE(InfoExtractor): 'title': video_title, 'stitle': simple_title, 'ext': video_extension.decode('utf-8'), + 'format': u'NA', }) except UnavailableFormatError: self._downloader.trouble(u'ERROR: format not available for video') @@ -1051,18 +1072,16 @@ class GoogleIE(InfoExtractor): video_title = sanitize_title(video_title) simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title) - # Google Video doesn't show uploader nicknames? - video_uploader = 'NA' - try: # Process video information self._downloader.process_info({ 'id': video_id.decode('utf-8'), 'url': video_url.decode('utf-8'), - 'uploader': video_uploader.decode('utf-8'), + 'uploader': u'NA', 'title': video_title, 'stitle': simple_title, 'ext': video_extension.decode('utf-8'), + 'format': u'NA', }) except UnavailableFormatError: self._downloader.trouble(u'ERROR: format not available for video') @@ -1140,6 +1159,7 @@ class PhotobucketIE(InfoExtractor): 'title': video_title, 'stitle': simple_title, 'ext': video_extension.decode('utf-8'), + 'format': u'NA', }) except UnavailableFormatError: self._downloader.trouble(u'ERROR: format not available for video') @@ -1234,6 +1254,7 @@ class GenericIE(InfoExtractor): 'title': video_title, 'stitle': simple_title, 'ext': video_extension.decode('utf-8'), + 'format': u'NA', }) except UnavailableFormatError: self._downloader.trouble(u'ERROR: format not available for video') @@ -1555,6 +1576,8 @@ if __name__ == '__main__': action='store_const', dest='format', help='alias for -f 17', const='17') video_format.add_option('-d', '--high-def', action='store_const', dest='format', help='alias for -f 22', const='22') + video_format.add_option('--all-formats', + action='store_const', dest='format', help='download all available video formats', const='-1') parser.add_option_group(video_format) verbosity = optparse.OptionGroup(parser, 'Verbosity / Simulation Options') @@ -1636,6 +1659,9 @@ if __name__ == '__main__': 'simulate': (opts.simulate or opts.geturl or opts.gettitle), 'format': opts.format, 'outtmpl': ((opts.outtmpl is not None and opts.outtmpl.decode(preferredencoding())) + or (opts.format == '-1' and opts.usetitle and u'%(stitle)s-%(id)s-%(format)s.%(ext)s') + or (opts.format == '-1' and opts.useliteral and u'%(title)s-%(id)s-%(format)s.%(ext)s') + or (opts.format == '-1' and u'%(id)s-%(format)s.%(ext)s') or (opts.usetitle and u'%(stitle)s-%(id)s.%(ext)s') or (opts.useliteral and u'%(title)s-%(id)s.%(ext)s') or u'%(id)s.%(ext)s'), From eaf4a7288dc285f10cd5745432129be29cf770a3 Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Tue, 30 Mar 2010 11:50:57 +0200 Subject: [PATCH 182/455] Solve minor aesthetical problem in rtmpdump error messages --- youtube-dl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube-dl b/youtube-dl index 407a393a7..5afd7ebc2 100755 --- a/youtube-dl +++ b/youtube-dl @@ -491,7 +491,7 @@ class FileDownloader(object): self.to_stdout(u'\r[rtmpdump] %s bytes' % os.path.getsize(filename)) return True else: - self.trouble('ERROR: rtmpdump exited with code %d' % retval) + self.trouble('\nERROR: rtmpdump exited with code %d' % retval) return False def _do_download(self, filename, url): From 2ed1ddd0a0dd73f69b3b7f1fc9102fdc028735aa Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Thu, 1 Apr 2010 20:46:00 +0200 Subject: [PATCH 183/455] Request video info webpage using "embedded" instead of "detailpage" In the request for get_video_info, use el=embedded instead of el=detailpage, as if the request was coming from an embedded video player instead of the video webpage. This created problems for some videos, with YouTube replying with "Invalid parameters". This fixes issue #109 and fixes issue #110. --- youtube-dl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube-dl b/youtube-dl index 5afd7ebc2..167fefcc0 100755 --- a/youtube-dl +++ b/youtube-dl @@ -783,7 +783,7 @@ class YoutubeIE(InfoExtractor): video_extension = self._video_extensions.get(format_param, 'flv') # Get video info - video_info_url = 'http://www.youtube.com/get_video_info?&video_id=%s&el=detailpage&ps=default&eurl=&gl=US&hl=en' % video_id + video_info_url = 'http://www.youtube.com/get_video_info?&video_id=%s&el=embedded&ps=default&eurl=&gl=US&hl=en' % video_id request = urllib2.Request(video_info_url, None, std_headers) try: self.report_video_info_webpage_download(video_id) From 61945318317eb2334021231de59afa0f36ddc8f3 Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Thu, 1 Apr 2010 20:55:43 +0200 Subject: [PATCH 184/455] Add Yahoo! Video InfoExtractor, merged from "obeythepenguin" --- youtube-dl | 137 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 137 insertions(+) diff --git a/youtube-dl b/youtube-dl index 167fefcc0..9c939023e 100755 --- a/youtube-dl +++ b/youtube-dl @@ -1165,6 +1165,141 @@ class PhotobucketIE(InfoExtractor): self._downloader.trouble(u'ERROR: format not available for video') +class YahooIE(InfoExtractor): + """Information extractor for video.yahoo.com.""" + + # _VALID_URL matches all Yahoo! Video URLs + # _VPAGE_URL matches only the extractable '/watch/' URLs + _VALID_URL = r'(?:http://)?(?:[a-z]+\.)?video\.yahoo\.com/(?:watch|network)/([0-9]+)(?:/|\?v=)([0-9]+)(?:[#\?].*)?' + _VPAGE_URL = r'(?:http://)?video\.yahoo\.com/watch/([0-9]+)/([0-9]+)(?:[#\?].*)?' + + def __init__(self, downloader=None): + InfoExtractor.__init__(self, downloader) + + @staticmethod + def suitable(url): + return (re.match(YahooIE._VALID_URL, url) is not None) + + def report_download_webpage(self, video_id): + """Report webpage download.""" + self._downloader.to_stdout(u'[video.yahoo] %s: Downloading webpage' % video_id) + + def report_extraction(self, video_id): + """Report information extraction.""" + self._downloader.to_stdout(u'[video.yahoo] %s: Extracting information' % video_id) + + def _real_initialize(self): + return + + def _real_extract(self, url): + # Extract ID from URL + mobj = re.match(self._VALID_URL, url) + if mobj is None: + self._downloader.trouble(u'ERROR: Invalid URL: %s' % url) + return + + video_id = mobj.group(2) + video_extension = 'flv' + + # Rewrite valid but non-extractable URLs as + # extractable English language /watch/ URLs + if re.match(self._VPAGE_URL, url) is None: + request = urllib2.Request(url) + try: + webpage = urllib2.urlopen(request).read() + except (urllib2.URLError, httplib.HTTPException, socket.error), err: + self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err)) + return + + mobj = re.search(r'\("id", "([0-9]+)"\);', webpage) + if mobj is None: + self._downloader.trouble(u'ERROR: Unable to extract id field') + return + yahoo_id = mobj.group(1) + + mobj = re.search(r'\("vid", "([0-9]+)"\);', webpage) + if mobj is None: + self._downloader.trouble(u'ERROR: Unable to extract vid field') + return + yahoo_vid = mobj.group(1) + + url = 'http://video.yahoo.com/watch/%s/%s' % (yahoo_vid, yahoo_id) + return self._real_extract(url) + + # Retrieve video webpage to extract further information + request = urllib2.Request(url) + try: + self.report_download_webpage(video_id) + webpage = urllib2.urlopen(request).read() + except (urllib2.URLError, httplib.HTTPException, socket.error), err: + self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err)) + return + + # Extract uploader and title from webpage + self.report_extraction(video_id) + mobj = re.search(r'', webpage) + if mobj is None: + self._downloader.trouble(u'ERROR: unable to extract video title') + return + video_title = mobj.group(1).decode('utf-8') + simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title) + + mobj = re.search(r'

    (.*)

    ', webpage) + if mobj is None: + self._downloader.trouble(u'ERROR: unable to extract video uploader') + return + video_uploader = mobj.group(1).decode('utf-8') + + # Extract video height and width + mobj = re.search(r'', webpage) + if mobj is None: + self._downloader.trouble(u'ERROR: unable to extract video height') + return + yv_video_height = mobj.group(1) + + mobj = re.search(r'', webpage) + if mobj is None: + self._downloader.trouble(u'ERROR: unable to extract video width') + return + yv_video_width = mobj.group(1) + + # Retrieve video playlist to extract media URL + # I'm not completely sure what all these options are, but we + # seem to need most of them, otherwise the server sends a 401. + yv_lg = 'R0xx6idZnW2zlrKP8xxAIR' # not sure what this represents + yv_bitrate = '700' # according to Wikipedia this is hard-coded + request = urllib2.Request('http://cosmos.bcst.yahoo.com/up/yep/process/getPlaylistFOP.php?node_id=' + video_id + + '&tech=flash&mode=playlist&lg=' + yv_lg + '&bitrate=' + yv_bitrate + '&vidH=' + yv_video_height + + '&vidW=' + yv_video_width + '&swf=as3&rd=video.yahoo.com&tk=null&adsupported=v1,v2,&eventid=1301797') + try: + self.report_download_webpage(video_id) + webpage = urllib2.urlopen(request).read() + except (urllib2.URLError, httplib.HTTPException, socket.error), err: + self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err)) + return + + # Extract media URL from playlist XML + mobj = re.search(r' Date: Fri, 2 Apr 2010 17:13:40 +0200 Subject: [PATCH 185/455] Try el=detailpage if el=embedded fails for YouTube --- youtube-dl | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/youtube-dl b/youtube-dl index 9c939023e..f6db799ec 100755 --- a/youtube-dl +++ b/youtube-dl @@ -783,15 +783,19 @@ class YoutubeIE(InfoExtractor): video_extension = self._video_extensions.get(format_param, 'flv') # Get video info - video_info_url = 'http://www.youtube.com/get_video_info?&video_id=%s&el=embedded&ps=default&eurl=&gl=US&hl=en' % video_id - request = urllib2.Request(video_info_url, None, std_headers) - try: - self.report_video_info_webpage_download(video_id) - video_info_webpage = urllib2.urlopen(request).read() - video_info = parse_qs(video_info_webpage) - except (urllib2.URLError, httplib.HTTPException, socket.error), err: - self._downloader.trouble(u'ERROR: unable to download video info webpage: %s' % str(err)) - return + self.report_video_info_webpage_download(video_id) + for el_type in ['embedded', 'detailpage']: + video_info_url = ('http://www.youtube.com/get_video_info?&video_id=%s&el=%s&ps=default&eurl=&gl=US&hl=en' + % (video_id, el_type)) + request = urllib2.Request(video_info_url, None, std_headers) + try: + video_info_webpage = urllib2.urlopen(request).read() + video_info = parse_qs(video_info_webpage) + if 'token' in video_info: + break + except (urllib2.URLError, httplib.HTTPException, socket.error), err: + self._downloader.trouble(u'ERROR: unable to download video info webpage: %s' % str(err)) + return self.report_information_extraction(video_id) # "t" param From ae3fc475eb2ebb0b73a4f70a76a0e8b083b52079 Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Fri, 2 Apr 2010 17:14:24 +0200 Subject: [PATCH 186/455] Bump version number --- LATEST_VERSION | 2 +- youtube-dl | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/LATEST_VERSION b/LATEST_VERSION index 92b60dcf3..f8bf47a22 100644 --- a/LATEST_VERSION +++ b/LATEST_VERSION @@ -1 +1 @@ -2010.03.13 +2010.04.02 diff --git a/youtube-dl b/youtube-dl index f6db799ec..957438b1d 100755 --- a/youtube-dl +++ b/youtube-dl @@ -1682,7 +1682,7 @@ if __name__ == '__main__': # Parse command line parser = optparse.OptionParser( usage='Usage: %prog [options] url...', - version='2010.03.13', + version='2010.04.02', conflict_handler='resolve', ) From ce5cafea407319ce9a69acabeb748563cc0d7db0 Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Fri, 2 Apr 2010 19:51:54 +0200 Subject: [PATCH 187/455] Change method to detect end of playlist (fixes issue #113) --- youtube-dl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube-dl b/youtube-dl index 957438b1d..3735c7eac 100755 --- a/youtube-dl +++ b/youtube-dl @@ -1496,7 +1496,7 @@ class YoutubePlaylistIE(InfoExtractor): _VALID_URL = r'(?:http://)?(?:\w+\.)?youtube.com/(?:view_play_list|my_playlists)\?.*?p=([^&]+).*' _TEMPLATE_URL = 'http://www.youtube.com/view_play_list?p=%s&page=%s&gl=US&hl=en' _VIDEO_INDICATOR = r'/watch\?v=(.+?)&' - _MORE_PAGES_INDICATOR = r'/view_play_list?p=%s&page=%s' + _MORE_PAGES_INDICATOR = r'(?m)>\s*Next\s*' _youtube_ie = None def __init__(self, youtube_ie, downloader=None): @@ -1542,7 +1542,7 @@ class YoutubePlaylistIE(InfoExtractor): ids_in_page.append(mobj.group(1)) video_ids.extend(ids_in_page) - if (self._MORE_PAGES_INDICATOR % (playlist_id.upper(), pagenum + 1)) not in page: + if re.search(self._MORE_PAGES_INDICATOR, page) is None: break pagenum = pagenum + 1 From 9177ce4d8ce93a7de0c75bdfdfd36bca5d6086f9 Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Sat, 3 Apr 2010 09:45:45 +0200 Subject: [PATCH 188/455] Support new playlist style URL (fixes issue #114) --- youtube-dl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube-dl b/youtube-dl index 3735c7eac..09f1441a4 100755 --- a/youtube-dl +++ b/youtube-dl @@ -1493,7 +1493,7 @@ class YoutubeSearchIE(InfoExtractor): class YoutubePlaylistIE(InfoExtractor): """Information Extractor for YouTube playlists.""" - _VALID_URL = r'(?:http://)?(?:\w+\.)?youtube.com/(?:view_play_list|my_playlists)\?.*?p=([^&]+).*' + _VALID_URL = r'(?:http://)?(?:\w+\.)?youtube.com/(?:(?:view_play_list|my_playlists)\?.*?p=|user/.*?/user/)([^&]+).*' _TEMPLATE_URL = 'http://www.youtube.com/view_play_list?p=%s&page=%s&gl=US&hl=en' _VIDEO_INDICATOR = r'/watch\?v=(.+?)&' _MORE_PAGES_INDICATOR = r'(?m)>\s*Next\s*' From 7d8d06122d2e768f3ecce3ceb2e01d1f3a5ef050 Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Sat, 3 Apr 2010 09:54:36 +0200 Subject: [PATCH 189/455] Add the "ord" template parameter (fixes issue #101) --- youtube-dl | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/youtube-dl b/youtube-dl index 09f1441a4..4cb958336 100755 --- a/youtube-dl +++ b/youtube-dl @@ -201,12 +201,14 @@ class FileDownloader(object): _ies = [] _pps = [] _download_retcode = None + _num_downloads = None def __init__(self, params): """Create a FileDownloader object with the given options.""" self._ies = [] self._pps = [] self._download_retcode = 0 + self._num_downloads = 0 self.params = params @staticmethod @@ -402,6 +404,7 @@ class FileDownloader(object): try: template_dict = dict(info_dict) template_dict['epoch'] = unicode(long(time.time())) + template_dict['ord'] = unicode('%05d' % self._num_downloads) filename = self.params['outtmpl'] % template_dict except (ValueError, KeyError), err: self.trouble('ERROR: invalid output template or system charset: %s' % str(err)) @@ -555,6 +558,7 @@ class FileDownloader(object): try: (stream, filename) = sanitize_open(filename, open_mode) self.report_destination(filename) + self._num_downloads += 1 except (OSError, IOError), err: self.trouble('ERROR: unable to open for writing: %s' % str(err)) return False From 4dd63be1939d5c10c807aa316bc601df0559aa87 Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Sat, 3 Apr 2010 09:58:16 +0200 Subject: [PATCH 190/455] Bump version number --- LATEST_VERSION | 2 +- youtube-dl | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/LATEST_VERSION b/LATEST_VERSION index f8bf47a22..de43f83cf 100644 --- a/LATEST_VERSION +++ b/LATEST_VERSION @@ -1 +1 @@ -2010.04.02 +2010.04.03 diff --git a/youtube-dl b/youtube-dl index 4cb958336..d3ce9ee74 100755 --- a/youtube-dl +++ b/youtube-dl @@ -1686,7 +1686,7 @@ if __name__ == '__main__': # Parse command line parser = optparse.OptionParser( usage='Usage: %prog [options] url...', - version='2010.04.02', + version='2010.04.03', conflict_handler='resolve', ) From 37dfa1e0df8a5be73cff508590be89b7285c65c3 Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Sat, 3 Apr 2010 17:25:13 +0200 Subject: [PATCH 191/455] Also try el=vevo on YouTube if everything else fails (fixes issue #115) --- youtube-dl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube-dl b/youtube-dl index d3ce9ee74..6fb2066e4 100755 --- a/youtube-dl +++ b/youtube-dl @@ -788,7 +788,7 @@ class YoutubeIE(InfoExtractor): # Get video info self.report_video_info_webpage_download(video_id) - for el_type in ['embedded', 'detailpage']: + for el_type in ['embedded', 'detailpage', 'vevo']: video_info_url = ('http://www.youtube.com/get_video_info?&video_id=%s&el=%s&ps=default&eurl=&gl=US&hl=en' % (video_id, el_type)) request = urllib2.Request(video_info_url, None, std_headers) From 554bbdc48cd59180af6eb767152a798d6e19ce5c Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Sun, 4 Apr 2010 13:11:20 +0200 Subject: [PATCH 192/455] Bump version number --- LATEST_VERSION | 2 +- youtube-dl | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/LATEST_VERSION b/LATEST_VERSION index de43f83cf..013f034f3 100644 --- a/LATEST_VERSION +++ b/LATEST_VERSION @@ -1 +1 @@ -2010.04.03 +2010.04.04 diff --git a/youtube-dl b/youtube-dl index 6fb2066e4..43566b615 100755 --- a/youtube-dl +++ b/youtube-dl @@ -1686,7 +1686,7 @@ if __name__ == '__main__': # Parse command line parser = optparse.OptionParser( usage='Usage: %prog [options] url...', - version='2010.04.03', + version='2010.04.04', conflict_handler='resolve', ) From 7e58d56888539c87dc3d8747cd958e3c57acee6f Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Sun, 4 Apr 2010 17:57:59 +0200 Subject: [PATCH 193/455] Merge changes by obeythepenguin --- youtube-dl | 277 ++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 275 insertions(+), 2 deletions(-) diff --git a/youtube-dl b/youtube-dl index 43566b615..64f62e8a1 100755 --- a/youtube-dl +++ b/youtube-dl @@ -398,6 +398,10 @@ class FileDownloader(object): print info_dict['title'].encode(preferredencoding(), 'xmlcharrefreplace') if self.params.get('forceurl', False): print info_dict['url'].encode(preferredencoding(), 'xmlcharrefreplace') + if self.params.get('forcethumbnail', False) and 'thumbnail' in info_dict: + print info_dict['thumbnail'].encode(preferredencoding(), 'xmlcharrefreplace') + if self.params.get('forcedescription', False) and 'description' in info_dict: + print info_dict['description'].encode(preferredencoding(), 'xmlcharrefreplace') return @@ -599,6 +603,14 @@ class InfoExtractor(object): ext: Video filename extension. format: Video format. + The following fields are optional. Their primary purpose is to allow + youtube-dl to serve as the backend for a video search function, such + as the one in youtube2mp3. They are only used when their respective + forced printing functions are called: + + thumbnail: Full URL to a video thumbnail image. + description: One-line video description. + Subclasses of this one should re-define the _real_initialize() and _real_extract() methods, as well as the suitable() static method. Probably, they should also be instantiated and added to the main @@ -842,6 +854,28 @@ class YoutubeIE(InfoExtractor): simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title) simple_title = simple_title.strip(ur'_') + # thumbnail image + if 'thumbnail_url' not in video_info: + self._downloader.trouble(u'WARNING: unable to extract video thumbnail') + video_thumbnail = '' + else: # don't panic if we can't find it + video_thumbnail = urllib.unquote_plus(video_info['thumbnail_url'][0]) + + # get video description + video_description = 'No description available.' # we need something to pass to self._downloader + # this requires an additional HTTP request and a little + # more time, so don't do it unless absolutely necessary + if self._downloader.params.get('forcedescription', False): + video_page_url = 'http://www.youtube.com/watch?v=' + video_id + request = urllib2.Request(video_page_url, None, std_headers) + try: + video_page_webpage = urllib2.urlopen(request).read() + mobj = re.search(r'', video_page_webpage) + if mobj is not None: + video_description = mobj.group(1) + except (urllib2.URLError, httplib.HTTPException, socket.error), err: + pass # don't panic if we can't find it + try: # Process video information self._downloader.process_info({ @@ -852,6 +886,8 @@ class YoutubeIE(InfoExtractor): 'stitle': simple_title, 'ext': video_extension.decode('utf-8'), 'format': (format_param is None and u'NA' or format_param.decode('utf-8')), + 'thumbnail': video_thumbnail.decode('utf-8'), + 'description': video_description.decode('utf-8'), }) if all_formats: @@ -1080,6 +1116,32 @@ class GoogleIE(InfoExtractor): video_title = sanitize_title(video_title) simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title) + # Extract video description + mobj = re.search(r'([^<]*)', webpage) + if mobj is None: + self._downloader.trouble(u'ERROR: unable to extract video description') + return + video_description = mobj.group(1).decode('utf-8') + if not video_description: + video_description = 'No description available.' + + # Extract video thumbnail + if self._downloader.params.get('forcethumbnail', False): + request = urllib2.Request('http://video.google.com/videosearch?q=%s+site:video.google.com&hl=en' % abs(int(video_id))) + try: + webpage = urllib2.urlopen(request).read() + except (urllib2.URLError, httplib.HTTPException, socket.error), err: + self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err)) + return + mobj = re.search(r'', webpage) + if mobj is None: + self._downloader.trouble(u'ERROR: unable to extract video thumbnail') + return + video_thumbnail = mobj.group(1) + else: # we need something to pass to process_info + video_thumbnail = '' + + try: # Process video information self._downloader.process_info({ @@ -1258,6 +1320,21 @@ class YahooIE(InfoExtractor): return video_uploader = mobj.group(1).decode('utf-8') + # Extract video thumbnail + mobj = re.search(r'', webpage) + if mobj is None: + self._downloader.trouble(u'ERROR: unable to extract video thumbnail') + return + video_thumbnail = mobj.group(1).decode('utf-8') + + # Extract video description + mobj = re.search(r'', webpage) + if mobj is None: + self._downloader.trouble(u'ERROR: unable to extract video description') + return + video_description = mobj.group(1).decode('utf-8') + if not video_description: video_description = 'No description available.' + # Extract video height and width mobj = re.search(r'', webpage) if mobj is None: @@ -1303,6 +1380,10 @@ class YahooIE(InfoExtractor): 'title': video_title, 'stitle': simple_title, 'ext': video_extension.decode('utf-8'), + 'thumbnail': video_thumbnail.decode('utf-8'), + 'description': video_description, + 'thumbnail': video_thumbnail, + 'description': video_description, }) except UnavailableFormatError: self._downloader.trouble(u'ERROR: format not available for video') @@ -1494,6 +1575,188 @@ class YoutubeSearchIE(InfoExtractor): pagenum = pagenum + 1 +class GoogleSearchIE(InfoExtractor): + """Information Extractor for Google Video search queries.""" + _VALID_QUERY = r'gvsearch(\d+|all)?:[\s\S]+' + _TEMPLATE_URL = 'http://video.google.com/videosearch?q=%s+site:video.google.com&start=%s&hl=en' + _VIDEO_INDICATOR = r'videoplay\?docid=([^\&>]+)\&' + _MORE_PAGES_INDICATOR = r'Next' + _google_ie = None + _max_google_results = 1000 + + def __init__(self, google_ie, downloader=None): + InfoExtractor.__init__(self, downloader) + self._google_ie = google_ie + + @staticmethod + def suitable(url): + return (re.match(GoogleSearchIE._VALID_QUERY, url) is not None) + + def report_download_page(self, query, pagenum): + """Report attempt to download playlist page with given number.""" + query = query.decode(preferredencoding()) + self._downloader.to_stdout(u'[video.google] query "%s": Downloading page %s' % (query, pagenum)) + + def _real_initialize(self): + self._google_ie.initialize() + + def _real_extract(self, query): + mobj = re.match(self._VALID_QUERY, query) + if mobj is None: + self._downloader.trouble(u'ERROR: invalid search query "%s"' % query) + return + + prefix, query = query.split(':') + prefix = prefix[8:] + query = query.encode('utf-8') + if prefix == '': + self._download_n_results(query, 1) + return + elif prefix == 'all': + self._download_n_results(query, self._max_google_results) + return + else: + try: + n = long(prefix) + if n <= 0: + self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query)) + return + elif n > self._max_google_results: + self._downloader.to_stderr(u'WARNING: gvsearch returns max %i results (you requested %i)' % (self._max_google_results, n)) + n = self._max_google_results + self._download_n_results(query, n) + return + except ValueError: # parsing prefix as integer fails + self._download_n_results(query, 1) + return + + def _download_n_results(self, query, n): + """Downloads a specified number of results for a query""" + + video_ids = [] + already_seen = set() + pagenum = 1 + + while True: + self.report_download_page(query, pagenum) + result_url = self._TEMPLATE_URL % (urllib.quote_plus(query), pagenum) + request = urllib2.Request(result_url, None, std_headers) + try: + page = urllib2.urlopen(request).read() + except (urllib2.URLError, httplib.HTTPException, socket.error), err: + self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err)) + return + + # Extract video identifiers + for mobj in re.finditer(self._VIDEO_INDICATOR, page): + video_id = mobj.group(1) + if video_id not in already_seen: + video_ids.append(video_id) + already_seen.add(video_id) + if len(video_ids) == n: + # Specified n videos reached + for id in video_ids: + self._google_ie.extract('http://video.google.com/videoplay?docid=%s' % id) + return + + if re.search(self._MORE_PAGES_INDICATOR, page) is None: + for id in video_ids: + self._google_ie.extract('http://video.google.com/videoplay?docid=%s' % id) + return + + pagenum = pagenum + 1 + +class YahooSearchIE(InfoExtractor): + """Information Extractor for Yahoo! Video search queries.""" + _VALID_QUERY = r'yvsearch(\d+|all)?:[\s\S]+' + _TEMPLATE_URL = 'http://video.yahoo.com/search/?p=%s&o=%s' + _VIDEO_INDICATOR = r'href="http://video\.yahoo\.com/watch/([0-9]+/[0-9]+)"' + _MORE_PAGES_INDICATOR = r'\s*Next' + _yahoo_ie = None + _max_yahoo_results = 1000 + + def __init__(self, yahoo_ie, downloader=None): + InfoExtractor.__init__(self, downloader) + self._yahoo_ie = yahoo_ie + + @staticmethod + def suitable(url): + return (re.match(YahooSearchIE._VALID_QUERY, url) is not None) + + def report_download_page(self, query, pagenum): + """Report attempt to download playlist page with given number.""" + query = query.decode(preferredencoding()) + self._downloader.to_stdout(u'[video.yahoo] query "%s": Downloading page %s' % (query, pagenum)) + + def _real_initialize(self): + self._yahoo_ie.initialize() + + def _real_extract(self, query): + mobj = re.match(self._VALID_QUERY, query) + if mobj is None: + self._downloader.trouble(u'ERROR: invalid search query "%s"' % query) + return + + prefix, query = query.split(':') + prefix = prefix[8:] + query = query.encode('utf-8') + if prefix == '': + self._download_n_results(query, 1) + return + elif prefix == 'all': + self._download_n_results(query, self._max_yahoo_results) + return + else: + try: + n = long(prefix) + if n <= 0: + self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query)) + return + elif n > self._max_yahoo_results: + self._downloader.to_stderr(u'WARNING: yvsearch returns max %i results (you requested %i)' % (self._max_yahoo_results, n)) + n = self._max_yahoo_results + self._download_n_results(query, n) + return + except ValueError: # parsing prefix as integer fails + self._download_n_results(query, 1) + return + + def _download_n_results(self, query, n): + """Downloads a specified number of results for a query""" + + video_ids = [] + already_seen = set() + pagenum = 1 + + while True: + self.report_download_page(query, pagenum) + result_url = self._TEMPLATE_URL % (urllib.quote_plus(query), pagenum) + request = urllib2.Request(result_url, None, std_headers) + try: + page = urllib2.urlopen(request).read() + except (urllib2.URLError, httplib.HTTPException, socket.error), err: + self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err)) + return + + # Extract video identifiers + for mobj in re.finditer(self._VIDEO_INDICATOR, page): + video_id = mobj.group(1) + if video_id not in already_seen: + video_ids.append(video_id) + already_seen.add(video_id) + if len(video_ids) == n: + # Specified n videos reached + for id in video_ids: + self._yahoo_ie.extract('http://video.yahoo.com/watch/%s' % id) + return + + if re.search(self._MORE_PAGES_INDICATOR, page) is None: + for id in video_ids: + self._yahoo_ie.extract('http://video.yahoo.com/watch/%s' % id) + return + + pagenum = pagenum + 1 + class YoutubePlaylistIE(InfoExtractor): """Information Extractor for YouTube playlists.""" @@ -1732,6 +1995,10 @@ if __name__ == '__main__': action='store_true', dest='geturl', help='simulate, quiet but print URL', default=False) verbosity.add_option('-e', '--get-title', action='store_true', dest='gettitle', help='simulate, quiet but print title', default=False) + verbosity.add_option('--get-thumbnail', + action='store_true', dest='getthumbnail', help='simulate, quiet but print thumbnail URL', default=False) + verbosity.add_option('--get-description', + action='store_true', dest='getdescription', help='simulate, quiet but print video description', default=False) verbosity.add_option('--no-progress', action='store_true', dest='noprogress', help='do not print progress bar', default=False) parser.add_option_group(verbosity) @@ -1788,8 +2055,10 @@ if __name__ == '__main__': youtube_user_ie = YoutubeUserIE(youtube_ie) youtube_search_ie = YoutubeSearchIE(youtube_ie) google_ie = GoogleIE() + google_search_ie = GoogleSearchIE(google_ie) photobucket_ie = PhotobucketIE() yahoo_ie = YahooIE() + yahoo_search_ie = YahooSearchIE(yahoo_ie) generic_ie = GenericIE() # File downloader @@ -1797,10 +2066,12 @@ if __name__ == '__main__': 'usenetrc': opts.usenetrc, 'username': opts.username, 'password': opts.password, - 'quiet': (opts.quiet or opts.geturl or opts.gettitle), + 'quiet': (opts.quiet or opts.geturl or opts.gettitle or opts.getthumbnail or opts.getdescription), 'forceurl': opts.geturl, 'forcetitle': opts.gettitle, - 'simulate': (opts.simulate or opts.geturl or opts.gettitle), + 'forcethumbnail': opts.getthumbnail, + 'forcedescription': opts.getdescription, + 'simulate': (opts.simulate or opts.geturl or opts.gettitle or opts.getthumbnail or opts.getdescription), 'format': opts.format, 'outtmpl': ((opts.outtmpl is not None and opts.outtmpl.decode(preferredencoding())) or (opts.format == '-1' and opts.usetitle and u'%(stitle)s-%(id)s-%(format)s.%(ext)s') @@ -1821,8 +2092,10 @@ if __name__ == '__main__': fd.add_info_extractor(metacafe_ie) fd.add_info_extractor(youtube_ie) fd.add_info_extractor(google_ie) + fd.add_info_extractor(google_search_ie) fd.add_info_extractor(photobucket_ie) fd.add_info_extractor(yahoo_ie) + fd.add_info_extractor(yahoo_search_ie) # This must come last since it's the # fallback if none of the others work From 2bebb386b86963ed644a6f4196d97b82902e218f Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Sat, 17 Apr 2010 17:54:37 +0200 Subject: [PATCH 194/455] Make "all-formats" and "best-quality" download the version without specific format too --- youtube-dl | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/youtube-dl b/youtube-dl index 64f62e8a1..d2bd0bf5f 100755 --- a/youtube-dl +++ b/youtube-dl @@ -891,21 +891,18 @@ class YoutubeIE(InfoExtractor): }) if all_formats: - if quality_index == len(self._available_formats) - 1: + if quality_index == len(self._available_formats): # None left to get return else: quality_index += 1 format_param = self._available_formats[quality_index] - if format_param == None: - return continue - return except UnavailableFormatError, err: if best_quality or all_formats: - if quality_index == len(self._available_formats) - 1: + if quality_index == len(self._available_formats): # I don't ever expect this to happen if not all_formats: self._downloader.trouble(u'ERROR: no known formats available for video') @@ -914,8 +911,6 @@ class YoutubeIE(InfoExtractor): self.report_unavailable_format(video_id, format_param) quality_index += 1 format_param = self._available_formats[quality_index] - if format_param == None: - return continue else: self._downloader.trouble('ERROR: format not available for video') From 131efd1ae0899f261fe9faccb91f5c9c3aa974c5 Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Sat, 17 Apr 2010 18:49:56 +0200 Subject: [PATCH 195/455] Detect errors in video data writes --- youtube-dl | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube-dl b/youtube-dl index d2bd0bf5f..86e247994 100755 --- a/youtube-dl +++ b/youtube-dl @@ -566,7 +566,10 @@ class FileDownloader(object): except (OSError, IOError), err: self.trouble('ERROR: unable to open for writing: %s' % str(err)) return False - stream.write(data_block) + try: + stream.write(data_block) + except (IOError, OSError), err: + self.trouble('ERROR: unable to write data: %s' % str(err)) block_size = self.best_block_size(after - before, data_block_len) # Progress message From 0228ee978899c9ba563b9bfd0ef52336dfe70c30 Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Sat, 17 Apr 2010 18:57:51 +0200 Subject: [PATCH 196/455] Print EOL before "unable to write data" error --- youtube-dl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube-dl b/youtube-dl index 86e247994..6389c656e 100755 --- a/youtube-dl +++ b/youtube-dl @@ -569,7 +569,7 @@ class FileDownloader(object): try: stream.write(data_block) except (IOError, OSError), err: - self.trouble('ERROR: unable to write data: %s' % str(err)) + self.trouble('\nERROR: unable to write data: %s' % str(err)) block_size = self.best_block_size(after - before, data_block_len) # Progress message From 42e3546fb53c016a5a10ffdd60dc50c5f4e1234e Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Mon, 19 Apr 2010 19:25:47 +0200 Subject: [PATCH 197/455] Increment number of downloads when the file has already been fully downloaded --- youtube-dl | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube-dl b/youtube-dl index 6389c656e..72b09a0c2 100755 --- a/youtube-dl +++ b/youtube-dl @@ -536,6 +536,7 @@ class FileDownloader(object): if content_length is not None and long(content_length) == resume_len: # Because the file had already been fully downloaded self.report_file_already_downloaded(filename) + self._num_downloads += 1 return True else: # Because the server didn't let us From 787f2a5d9570bcbd94eae9d54ee2dfaeef1a7ad4 Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Fri, 21 May 2010 21:24:37 +0200 Subject: [PATCH 198/455] Also try no "el" option in get_video_info (fixes issue #130) --- youtube-dl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube-dl b/youtube-dl index 72b09a0c2..0f1d31c4e 100755 --- a/youtube-dl +++ b/youtube-dl @@ -804,8 +804,8 @@ class YoutubeIE(InfoExtractor): # Get video info self.report_video_info_webpage_download(video_id) - for el_type in ['embedded', 'detailpage', 'vevo']: - video_info_url = ('http://www.youtube.com/get_video_info?&video_id=%s&el=%s&ps=default&eurl=&gl=US&hl=en' + for el_type in ['&el=embedded', '&el=detailpage', '&el=vevo', '']: + video_info_url = ('http://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en' % (video_id, el_type)) request = urllib2.Request(video_info_url, None, std_headers) try: From 2a7353b87a3c393250fa20e1911385d5a6559b51 Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Fri, 21 May 2010 21:35:34 +0200 Subject: [PATCH 199/455] Make -a understand dash means stdin --- youtube-dl | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/youtube-dl b/youtube-dl index 0f1d31c4e..14f5d0b54 100755 --- a/youtube-dl +++ b/youtube-dl @@ -2010,7 +2010,7 @@ if __name__ == '__main__': filesystem.add_option('-o', '--output', dest='outtmpl', metavar='TPL', help='output filename template') filesystem.add_option('-a', '--batch-file', - dest='batchfile', metavar='F', help='file containing URLs to download') + dest='batchfile', metavar='F', help='file containing URLs to download (\'-\' for stdin)') filesystem.add_option('-w', '--no-overwrites', action='store_true', dest='nooverwrites', help='do not overwrite files', default=False) filesystem.add_option('-c', '--continue', @@ -2018,12 +2018,16 @@ if __name__ == '__main__': parser.add_option_group(filesystem) (opts, args) = parser.parse_args() - + # Batch file verification batchurls = [] if opts.batchfile is not None: try: - batchurls = open(opts.batchfile, 'r').readlines() + if opts.batchfile == '-': + batchfd = sys.stdin + else: + batchfd = open(opts.batchfile, 'r') + batchurls = batchfd.readlines() batchurls = [x.strip() for x in batchurls] batchurls = [x for x in batchurls if len(x) > 0] except IOError: From e616ec0ca6eaebf1a86d40db800ce17999e30f1d Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Sun, 30 May 2010 19:49:51 +0200 Subject: [PATCH 200/455] Add player signature verification to rtmpdump support (fixes issue #63) --- youtube-dl | 63 +++++++++++++++++++++++++++++++++++++----------------- 1 file changed, 43 insertions(+), 20 deletions(-) diff --git a/youtube-dl b/youtube-dl index 14f5d0b54..83a4e22c0 100755 --- a/youtube-dl +++ b/youtube-dl @@ -423,7 +423,7 @@ class FileDownloader(object): return try: - success = self._do_download(filename, info_dict['url'].encode('utf-8')) + success = self._do_download(filename, info_dict['url'].encode('utf-8'), info_dict.get('player_url', None)) except (OSError, IOError), err: raise UnavailableFormatError except (urllib2.URLError, httplib.HTTPException, socket.error), err: @@ -475,7 +475,7 @@ class FileDownloader(object): if info is None: break - def _download_with_rtmpdump(self, filename, url): + def _download_with_rtmpdump(self, filename, url, player_url): self.report_destination(filename) # Check for rtmpdump first @@ -488,12 +488,16 @@ class FileDownloader(object): # Download using rtmpdump. rtmpdump returns exit code 2 when # the connection was interrumpted and resuming appears to be # possible. This is part of rtmpdump's normal usage, AFAIK. - basic_args = ['rtmpdump', '-q', '-r', url, '-o', filename] + basic_args = ['rtmpdump', '-q'] + [[], ['-W', player_url]][player_url is not None] + ['-r', url, '-o', filename] retval = subprocess.call(basic_args + [[], ['-e', '-k', '1']][self.params.get('continuedl', False)]) while retval == 2 or retval == 1: - self.to_stdout(u'\r[rtmpdump] %s bytes' % os.path.getsize(filename), skip_eol=True) - time.sleep(2.0) # This seems to be needed + prevsize = os.path.getsize(filename) + self.to_stdout(u'\r[rtmpdump] %s bytes' % prevsize, skip_eol=True) + time.sleep(5.0) # This seems to be needed retval = subprocess.call(basic_args + ['-e'] + [[], ['-k', '1']][retval == 1]) + cursize = os.path.getsize(filename) + if prevsize == cursize and retval == 1: + break if retval == 0: self.to_stdout(u'\r[rtmpdump] %s bytes' % os.path.getsize(filename)) return True @@ -501,10 +505,10 @@ class FileDownloader(object): self.trouble('\nERROR: rtmpdump exited with code %d' % retval) return False - def _do_download(self, filename, url): + def _do_download(self, filename, url, player_url): # Attempt to download using rtmpdump if url.startswith('rtmp'): - return self._download_with_rtmpdump(filename, url) + return self._download_with_rtmpdump(filename, url, player_url) stream = None open_mode = 'wb' @@ -606,6 +610,7 @@ class InfoExtractor(object): stitle: Simplified title. ext: Video filename extension. format: Video format. + player_url: SWF Player URL (may be None). The following fields are optional. Their primary purpose is to allow youtube-dl to serve as the backend for a video search function, such @@ -690,6 +695,10 @@ class YoutubeIE(InfoExtractor): """Report attempt to confirm age.""" self._downloader.to_stdout(u'[youtube] Confirming age') + def report_video_webpage_download(self, video_id): + """Report attempt to download video webpage.""" + self._downloader.to_stdout(u'[youtube] %s: Downloading video webpage' % video_id) + def report_video_info_webpage_download(self, video_id): """Report attempt to download video info webpage.""" self._downloader.to_stdout(u'[youtube] %s: Downloading video info webpage' % video_id) @@ -802,6 +811,22 @@ class YoutubeIE(InfoExtractor): # Extension video_extension = self._video_extensions.get(format_param, 'flv') + # Get video webpage + self.report_video_webpage_download(video_id) + request = urllib2.Request('http://www.youtube.com/watch?v=%s&gl=US&hl=en' % video_id, None, std_headers) + try: + video_webpage = urllib2.urlopen(request).read() + except (urllib2.URLError, httplib.HTTPException, socket.error), err: + self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err)) + return + + # Attempt to extract SWF player URL + mobj = re.search(r'swfConfig.*"(http://.*?watch-.*?\.swf)"', video_webpage) + if mobj is not None: + player_url = mobj.group(1) + else: + player_url = None + # Get video info self.report_video_info_webpage_download(video_id) for el_type in ['&el=embedded', '&el=detailpage', '&el=vevo', '']: @@ -865,20 +890,12 @@ class YoutubeIE(InfoExtractor): else: # don't panic if we can't find it video_thumbnail = urllib.unquote_plus(video_info['thumbnail_url'][0]) - # get video description - video_description = 'No description available.' # we need something to pass to self._downloader - # this requires an additional HTTP request and a little - # more time, so don't do it unless absolutely necessary + # description + video_description = 'No description available.' if self._downloader.params.get('forcedescription', False): - video_page_url = 'http://www.youtube.com/watch?v=' + video_id - request = urllib2.Request(video_page_url, None, std_headers) - try: - video_page_webpage = urllib2.urlopen(request).read() - mobj = re.search(r'', video_page_webpage) - if mobj is not None: - video_description = mobj.group(1) - except (urllib2.URLError, httplib.HTTPException, socket.error), err: - pass # don't panic if we can't find it + mobj = re.search(r'', video_webpage) + if mobj is not None: + video_description = mobj.group(1) try: # Process video information @@ -892,6 +909,7 @@ class YoutubeIE(InfoExtractor): 'format': (format_param is None and u'NA' or format_param.decode('utf-8')), 'thumbnail': video_thumbnail.decode('utf-8'), 'description': video_description.decode('utf-8'), + 'player_url': player_url, }) if all_formats: @@ -1044,6 +1062,7 @@ class MetacafeIE(InfoExtractor): 'stitle': simple_title, 'ext': video_extension.decode('utf-8'), 'format': u'NA', + 'player_url': None, }) except UnavailableFormatError: self._downloader.trouble(u'ERROR: format not available for video') @@ -1151,6 +1170,7 @@ class GoogleIE(InfoExtractor): 'stitle': simple_title, 'ext': video_extension.decode('utf-8'), 'format': u'NA', + 'player_url': None, }) except UnavailableFormatError: self._downloader.trouble(u'ERROR: format not available for video') @@ -1229,6 +1249,7 @@ class PhotobucketIE(InfoExtractor): 'stitle': simple_title, 'ext': video_extension.decode('utf-8'), 'format': u'NA', + 'player_url': None, }) except UnavailableFormatError: self._downloader.trouble(u'ERROR: format not available for video') @@ -1383,6 +1404,7 @@ class YahooIE(InfoExtractor): 'description': video_description, 'thumbnail': video_thumbnail, 'description': video_description, + 'player_url': None, }) except UnavailableFormatError: self._downloader.trouble(u'ERROR: format not available for video') @@ -1478,6 +1500,7 @@ class GenericIE(InfoExtractor): 'stitle': simple_title, 'ext': video_extension.decode('utf-8'), 'format': u'NA', + 'player_url': None, }) except UnavailableFormatError: self._downloader.trouble(u'ERROR: format not available for video') From 7031008c98f9dfe0ea592529fe17be88ba9c48d7 Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Sun, 30 May 2010 18:34:56 +0200 Subject: [PATCH 201/455] Add a number of retries with tweaked patch, originally from Neil Channen --- youtube-dl | 58 +++++++++++++++++++++++++++++++++++++----------------- 1 file changed, 40 insertions(+), 18 deletions(-) diff --git a/youtube-dl b/youtube-dl index 83a4e22c0..b21e63b05 100755 --- a/youtube-dl +++ b/youtube-dl @@ -193,6 +193,7 @@ class FileDownloader(object): ignoreerrors: Do not stop on download errors. ratelimit: Download speed limit, in bytes/sec. nooverwrites: Prevent overwriting files. + retries: Number of times to retry for HTTP error 503 continuedl: Try to continue downloads if possible. noprogress: Do not print the progress bar. """ @@ -364,6 +365,10 @@ class FileDownloader(object): """Report attemtp to resume at given byte.""" self.to_stdout(u'[download] Resuming download at byte %s' % resume_len) + def report_retry(self, count, retries): + """Report retry in case of HTTP error 503""" + self.to_stdout(u'[download] Got HTTP error 503. Retrying (attempt %d of %d)...' % (count, retries)) + def report_file_already_downloaded(self, file_name): """Report file has already been fully downloaded.""" try: @@ -527,25 +532,34 @@ class FileDownloader(object): request.add_header('Range','bytes=%d-' % resume_len) open_mode = 'ab' - # Establish connection - try: - data = urllib2.urlopen(request) - except (urllib2.HTTPError, ), err: - if err.code != 416: # 416 is 'Requested range not satisfiable' - raise - # Unable to resume - data = urllib2.urlopen(basic_request) - content_length = data.info()['Content-Length'] + count = 0 + retries = self.params.get('retries', 0) + while True: + # Establish connection + try: + data = urllib2.urlopen(request) + break + except (urllib2.HTTPError, ), err: + if err.code == 503: + # Retry in case of HTTP error 503 + count += 1 + if count <= retries: + self.report_retry(count, retries) + continue + if err.code != 416: # 416 is 'Requested range not satisfiable' + raise + # Unable to resume + data = urllib2.urlopen(basic_request) + content_length = data.info()['Content-Length'] - if content_length is not None and long(content_length) == resume_len: - # Because the file had already been fully downloaded - self.report_file_already_downloaded(filename) - self._num_downloads += 1 - return True - else: - # Because the server didn't let us - self.report_unable_to_resume() - open_mode = 'wb' + if content_length is not None and long(content_length) == resume_len: + # Because the file had already been fully downloaded + self.report_file_already_downloaded(filename) + return True + else: + # Because the server didn't let us + self.report_unable_to_resume() + open_mode = 'wb' data_len = data.info().get('Content-length', None) data_len_str = self.format_bytes(data_len) @@ -1985,6 +1999,8 @@ if __name__ == '__main__': action='store_true', dest='ignoreerrors', help='continue on download errors', default=False) parser.add_option('-r', '--rate-limit', dest='ratelimit', metavar='L', help='download rate limit (e.g. 50k or 44.6m)') + parser.add_option('-R', '--retries', + dest='retries', metavar='T', help='number of retries (default is 10)', default=10) authentication = optparse.OptionGroup(parser, 'Authentication Options') authentication.add_option('-u', '--username', @@ -2073,6 +2089,11 @@ if __name__ == '__main__': if numeric_limit is None: parser.error(u'invalid rate limit specified') opts.ratelimit = numeric_limit + if opts.retries is not None: + try: + opts.retries = long(opts.retries) + except (TypeError, ValueError), err: + parser.error(u'invalid retry count specified') # Information extractors youtube_ie = YoutubeIE() @@ -2109,6 +2130,7 @@ if __name__ == '__main__': 'ignoreerrors': opts.ignoreerrors, 'ratelimit': opts.ratelimit, 'nooverwrites': opts.nooverwrites, + 'retries': opts.retries, 'continuedl': opts.continue_dl, 'noprogress': opts.noprogress, }) From 896a6ea9e2a695f3a2aa116ce6d561071f3c61cd Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Sun, 30 May 2010 17:50:56 +0200 Subject: [PATCH 202/455] Fix for all-formats exception by Valentin Hilbig --- youtube-dl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube-dl b/youtube-dl index b21e63b05..514f71aa6 100755 --- a/youtube-dl +++ b/youtube-dl @@ -927,17 +927,18 @@ class YoutubeIE(InfoExtractor): }) if all_formats: + quality_index += 1 if quality_index == len(self._available_formats): # None left to get return else: - quality_index += 1 format_param = self._available_formats[quality_index] continue return except UnavailableFormatError, err: if best_quality or all_formats: + quality_index += 1 if quality_index == len(self._available_formats): # I don't ever expect this to happen if not all_formats: @@ -945,7 +946,6 @@ class YoutubeIE(InfoExtractor): return else: self.report_unavailable_format(video_id, format_param) - quality_index += 1 format_param = self._available_formats[quality_index] continue else: From 0b59bf4a5e87f3e3c20520e906532ccb916f75cd Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Sun, 30 May 2010 19:46:08 +0200 Subject: [PATCH 203/455] Add webm extension to formats 43 and 45 (fixes issue #139) --- youtube-dl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/youtube-dl b/youtube-dl index 514f71aa6..ebf510d21 100755 --- a/youtube-dl +++ b/youtube-dl @@ -691,6 +691,8 @@ class YoutubeIE(InfoExtractor): '18': 'mp4', '22': 'mp4', '37': 'mp4', + '43': 'webm', + '45': 'webm', } @staticmethod From de3ed1f84a5107461cdc79009702b214819bb008 Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Sun, 6 Jun 2010 18:13:49 +0200 Subject: [PATCH 204/455] Bump version number --- LATEST_VERSION | 2 +- youtube-dl | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/LATEST_VERSION b/LATEST_VERSION index 013f034f3..47865d378 100644 --- a/LATEST_VERSION +++ b/LATEST_VERSION @@ -1 +1 @@ -2010.04.04 +2010.06.06 diff --git a/youtube-dl b/youtube-dl index ebf510d21..5e98dfde4 100755 --- a/youtube-dl +++ b/youtube-dl @@ -1987,7 +1987,7 @@ if __name__ == '__main__': # Parse command line parser = optparse.OptionParser( usage='Usage: %prog [options] url...', - version='2010.04.04', + version='2010.06.06', conflict_handler='resolve', ) From ca6a11fa59aad97e861442045032e585eceabf1a Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Tue, 29 Jun 2010 11:10:12 +0200 Subject: [PATCH 205/455] Forbid forward slash in win32 file names (fixes issue #147) --- youtube-dl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube-dl b/youtube-dl index 5e98dfde4..ea4b3fded 100755 --- a/youtube-dl +++ b/youtube-dl @@ -99,7 +99,7 @@ def sanitize_open(filename, open_mode): return (stream, filename) except (IOError, OSError), err: # In case of error, try to remove win32 forbidden chars - filename = re.sub(ur'[<>:"\|\?\*]', u'#', filename) + filename = re.sub(ur'[/<>:"\|\?\*]', u'#', filename) # An exception here should be caught in the caller stream = open(filename, open_mode) From 2b06c33d19a961061f822377adb057f411ccb067 Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Tue, 29 Jun 2010 11:53:13 +0200 Subject: [PATCH 206/455] Improve some metavar names --- youtube-dl | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/youtube-dl b/youtube-dl index ea4b3fded..7bd4b2aac 100755 --- a/youtube-dl +++ b/youtube-dl @@ -2000,22 +2000,22 @@ if __name__ == '__main__': parser.add_option('-i', '--ignore-errors', action='store_true', dest='ignoreerrors', help='continue on download errors', default=False) parser.add_option('-r', '--rate-limit', - dest='ratelimit', metavar='L', help='download rate limit (e.g. 50k or 44.6m)') + dest='ratelimit', metavar='LIMIT', help='download rate limit (e.g. 50k or 44.6m)') parser.add_option('-R', '--retries', - dest='retries', metavar='T', help='number of retries (default is 10)', default=10) + dest='retries', metavar='RETRIES', help='number of retries (default is 10)', default=10) authentication = optparse.OptionGroup(parser, 'Authentication Options') authentication.add_option('-u', '--username', - dest='username', metavar='UN', help='account username') + dest='username', metavar='USERNAME', help='account username') authentication.add_option('-p', '--password', - dest='password', metavar='PW', help='account password') + dest='password', metavar='PASSWORD', help='account password') authentication.add_option('-n', '--netrc', action='store_true', dest='usenetrc', help='use .netrc authentication data', default=False) parser.add_option_group(authentication) video_format = optparse.OptionGroup(parser, 'Video Format Options') video_format.add_option('-f', '--format', - action='store', dest='format', metavar='FMT', help='video format code') + action='store', dest='format', metavar='FORMAT', help='video format code') video_format.add_option('-b', '--best-quality', action='store_const', dest='format', help='download the best quality video possible', const='0') video_format.add_option('-m', '--mobile-version', @@ -2049,9 +2049,9 @@ if __name__ == '__main__': filesystem.add_option('-l', '--literal', action='store_true', dest='useliteral', help='use literal title in file name', default=False) filesystem.add_option('-o', '--output', - dest='outtmpl', metavar='TPL', help='output filename template') + dest='outtmpl', metavar='TEMPLATE', help='output filename template') filesystem.add_option('-a', '--batch-file', - dest='batchfile', metavar='F', help='file containing URLs to download (\'-\' for stdin)') + dest='batchfile', metavar='FILE', help='file containing URLs to download (\'-\' for stdin)') filesystem.add_option('-w', '--no-overwrites', action='store_true', dest='nooverwrites', help='do not overwrite files', default=False) filesystem.add_option('-c', '--continue', From fd8ede223ef0cc51669b91e65abb67bf0cdb29ce Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Tue, 29 Jun 2010 22:38:51 +0200 Subject: [PATCH 207/455] Include format 43 in best quality list (fixes issue #150) --- youtube-dl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube-dl b/youtube-dl index 7bd4b2aac..fba11fab0 100755 --- a/youtube-dl +++ b/youtube-dl @@ -684,7 +684,8 @@ class YoutubeIE(InfoExtractor): _LOGIN_URL = 'http://www.youtube.com/signup?next=/&gl=US&hl=en' _AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en' _NETRC_MACHINE = 'youtube' - _available_formats = ['37', '22', '35', '18', '34', '5', '17', '13', None] # listed in order of priority for -b flag + # Listed in order of priority for the -b option + _available_formats = ['37', '22', '45', '35', '34', '43', '18', '6', '5', '17', '13', None] _video_extensions = { '13': '3gp', '17': 'mp4', From 4135fa4585a69b88952a235c27385ee775878182 Mon Sep 17 00:00:00 2001 From: Witold Baryluk Date: Fri, 2 Jul 2010 01:53:47 +0200 Subject: [PATCH 208/455] Add support for the Dailymotion Based slightly (idea and one regular expression) on the old anonymous dailymotion-dl.pl, but with fixes (more robust regular expression, extracting author, support for domains other than .com). Simpler due to the fact that youtube-dl provides all needed functionalities. --- youtube-dl | 119 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 119 insertions(+) diff --git a/youtube-dl b/youtube-dl index fba11fab0..b94fe4ce2 100755 --- a/youtube-dl +++ b/youtube-dl @@ -1085,6 +1085,123 @@ class MetacafeIE(InfoExtractor): self._downloader.trouble(u'ERROR: format not available for video') +class DailymotionIE(InfoExtractor): + """Information Extractor for Dailymotion""" + + _VALID_URL = r'(?i)(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/video/([^_/]+)_([^/]+)' + _DISCLAIMER = '' + _FILTER_POST = '' + + def __init__(self, downloader=None): + InfoExtractor.__init__(self, downloader) + + @staticmethod + def suitable(url): + return (re.match(DailymotionIE._VALID_URL, url) is not None) + + def report_disclaimer(self): + """Report disclaimer retrieval.""" + self._downloader.to_stdout(u'[dailymotion] Retrieving disclaimer') + + def report_age_confirmation(self): + """Report attempt to confirm age.""" + self._downloader.to_stdout(u'[dailymotion] Confirming age') + + def report_download_webpage(self, video_id): + """Report webpage download.""" + self._downloader.to_stdout(u'[dailymotion] %s: Downloading webpage' % video_id) + + def report_extraction(self, video_id): + """Report information extraction.""" + self._downloader.to_stdout(u'[dailymotion] %s: Extracting information' % video_id) + + def _real_initialize(self): + return + + # Retrieve disclaimer + request = urllib2.Request(self._DISCLAIMER, None, std_headers) + try: + self.report_disclaimer() + disclaimer = urllib2.urlopen(request).read() + except (urllib2.URLError, httplib.HTTPException, socket.error), err: + self._downloader.trouble(u'ERROR: unable to retrieve disclaimer: %s' % str(err)) + return + + # Confirm age + disclaimer_form = { + 'filters': '0', + 'submit': "Continue - I'm over 18", + } + request = urllib2.Request(self._FILTER_POST, urllib.urlencode(disclaimer_form), std_headers) + try: + self.report_age_confirmation() + disclaimer = urllib2.urlopen(request).read() + except (urllib2.URLError, httplib.HTTPException, socket.error), err: + self._downloader.trouble(u'ERROR: unable to confirm age: %s' % str(err)) + return + + def _real_extract(self, url): + # Extract id and simplified title from URL + mobj = re.match(self._VALID_URL, url) + if mobj is None: + self._downloader.trouble(u'ERROR: invalid URL: %s' % url) + return + + video_id = mobj.group(1) + + simple_title = mobj.group(2).decode('utf-8') + video_extension = 'flv' + + # Retrieve video webpage to extract further information + request = urllib2.Request(url) + try: + self.report_download_webpage(video_id) + webpage = urllib2.urlopen(request).read() + except (urllib2.URLError, httplib.HTTPException, socket.error), err: + self._downloader.trouble(u'ERROR: unable retrieve video webpage: %s' % str(err)) + return + + # Extract URL, uploader and title from webpage + self.report_extraction(video_id) + mobj = re.search(r'(?i)addVariable\(\"video\"\s*,\s*\"([^\"]*)\"\)', webpage) + if mobj is None: + self._downloader.trouble(u'ERROR: unable to extract media URL') + return + mediaURL = urllib.unquote(mobj.group(1)) + + # if needed add http://www.dailymotion.com/ if relative URL + + video_url = mediaURL + + # '' + mobj = re.search(r'(?im)Dailymotion\s*[\-:]\s*(.+?)', webpage) + if mobj is None: + self._downloader.trouble(u'ERROR: unable to extract title') + return + video_title = mobj.group(1).decode('utf-8') + video_title = sanitize_title(video_title) + + mobj = re.search(r'(?im)', webpage) + if mobj is None: + self._downloader.trouble(u'ERROR: unable to extract uploader nickname') + return + video_uploader = mobj.group(1) + + try: + # Process video information + self._downloader.process_info({ + 'id': video_id.decode('utf-8'), + 'url': video_url.decode('utf-8'), + 'uploader': video_uploader.decode('utf-8'), + 'title': video_title, + 'stitle': simple_title, + 'ext': video_extension.decode('utf-8'), + 'format': u'NA', + 'player_url': None, + }) + except UnavailableFormatError: + self._downloader.trouble(u'ERROR: format not available for video') + class GoogleIE(InfoExtractor): """Information extractor for video.google.com.""" @@ -2101,6 +2218,7 @@ if __name__ == '__main__': # Information extractors youtube_ie = YoutubeIE() metacafe_ie = MetacafeIE(youtube_ie) + dailymotion_ie = DailymotionIE() youtube_pl_ie = YoutubePlaylistIE(youtube_ie) youtube_user_ie = YoutubeUserIE(youtube_ie) youtube_search_ie = YoutubeSearchIE(youtube_ie) @@ -2141,6 +2259,7 @@ if __name__ == '__main__': fd.add_info_extractor(youtube_pl_ie) fd.add_info_extractor(youtube_user_ie) fd.add_info_extractor(metacafe_ie) + fd.add_info_extractor(dailymotion_ie) fd.add_info_extractor(youtube_ie) fd.add_info_extractor(google_ie) fd.add_info_extractor(google_search_ie) From 08cf5cb80b1db00fe8a08da944e6915a9999b4ae Mon Sep 17 00:00:00 2001 From: Witold Baryluk Date: Fri, 2 Jul 2010 02:00:14 +0200 Subject: [PATCH 209/455] Remove dead disclaimer/confirmation code. I do knot know how to perform age confirmation, so just removeing it for the time we will know how to do this. --- youtube-dl | 24 ------------------------ 1 file changed, 24 deletions(-) diff --git a/youtube-dl b/youtube-dl index b94fe4ce2..dbe348f1f 100755 --- a/youtube-dl +++ b/youtube-dl @@ -1089,8 +1089,6 @@ class DailymotionIE(InfoExtractor): """Information Extractor for Dailymotion""" _VALID_URL = r'(?i)(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/video/([^_/]+)_([^/]+)' - _DISCLAIMER = '' - _FILTER_POST = '' def __init__(self, downloader=None): InfoExtractor.__init__(self, downloader) @@ -1118,28 +1116,6 @@ class DailymotionIE(InfoExtractor): def _real_initialize(self): return - # Retrieve disclaimer - request = urllib2.Request(self._DISCLAIMER, None, std_headers) - try: - self.report_disclaimer() - disclaimer = urllib2.urlopen(request).read() - except (urllib2.URLError, httplib.HTTPException, socket.error), err: - self._downloader.trouble(u'ERROR: unable to retrieve disclaimer: %s' % str(err)) - return - - # Confirm age - disclaimer_form = { - 'filters': '0', - 'submit': "Continue - I'm over 18", - } - request = urllib2.Request(self._FILTER_POST, urllib.urlencode(disclaimer_form), std_headers) - try: - self.report_age_confirmation() - disclaimer = urllib2.urlopen(request).read() - except (urllib2.URLError, httplib.HTTPException, socket.error), err: - self._downloader.trouble(u'ERROR: unable to confirm age: %s' % str(err)) - return - def _real_extract(self, url): # Extract id and simplified title from URL mobj = re.match(self._VALID_URL, url) From dea147f78e62755c920569cec67b631e28f9a98e Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Tue, 6 Jul 2010 18:51:45 +0200 Subject: [PATCH 210/455] Remove unused methods from the Dailymotion InfoExtractor --- youtube-dl | 8 -------- 1 file changed, 8 deletions(-) diff --git a/youtube-dl b/youtube-dl index dbe348f1f..19acce127 100755 --- a/youtube-dl +++ b/youtube-dl @@ -1097,14 +1097,6 @@ class DailymotionIE(InfoExtractor): def suitable(url): return (re.match(DailymotionIE._VALID_URL, url) is not None) - def report_disclaimer(self): - """Report disclaimer retrieval.""" - self._downloader.to_stdout(u'[dailymotion] Retrieving disclaimer') - - def report_age_confirmation(self): - """Report attempt to confirm age.""" - self._downloader.to_stdout(u'[dailymotion] Confirming age') - def report_download_webpage(self, video_id): """Report webpage download.""" self._downloader.to_stdout(u'[dailymotion] %s: Downloading webpage' % video_id) From 7e2dd306fecee8bc7538f194b470f8e384a3a71a Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Tue, 6 Jul 2010 18:52:46 +0200 Subject: [PATCH 211/455] Make the Dailymotion uploader regexp more flexible because it fails sometimes --- youtube-dl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube-dl b/youtube-dl index 19acce127..36f392c83 100755 --- a/youtube-dl +++ b/youtube-dl @@ -1149,7 +1149,7 @@ class DailymotionIE(InfoExtractor): video_title = mobj.group(1).decode('utf-8') video_title = sanitize_title(video_title) - mobj = re.search(r'(?im)', webpage) + mobj = re.search(r'(?im)', webpage) if mobj is None: self._downloader.trouble(u'ERROR: unable to extract uploader nickname') return From c833bb97dc48319f51d95cdb6bf4d78c35c35fdf Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Tue, 13 Jul 2010 18:01:58 +0200 Subject: [PATCH 212/455] Add support for "original" format in YouTube (fixes issue #155) --- youtube-dl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube-dl b/youtube-dl index 36f392c83..ba8a25a06 100755 --- a/youtube-dl +++ b/youtube-dl @@ -685,13 +685,14 @@ class YoutubeIE(InfoExtractor): _AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en' _NETRC_MACHINE = 'youtube' # Listed in order of priority for the -b option - _available_formats = ['37', '22', '45', '35', '34', '43', '18', '6', '5', '17', '13', None] + _available_formats = ['38', '37', '22', '45', '35', '34', '43', '18', '6', '5', '17', '13', None] _video_extensions = { '13': '3gp', '17': 'mp4', '18': 'mp4', '22': 'mp4', '37': 'mp4', + '38': 'video', '43': 'webm', '45': 'webm', } From f2413e67939cb833424f2036cba627bdb164abfb Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Tue, 13 Jul 2010 18:20:02 +0200 Subject: [PATCH 213/455] Add a --max-quality flag to limit the highest quality (fixes issue #145) --- youtube-dl | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/youtube-dl b/youtube-dl index ba8a25a06..08297e2e0 100755 --- a/youtube-dl +++ b/youtube-dl @@ -189,6 +189,7 @@ class FileDownloader(object): forcetitle: Force printing title. simulate: Do not download the video files. format: Video format code. + format_limit: Highest quality format to try. outtmpl: Template for output names. ignoreerrors: Do not stop on download errors. ratelimit: Download speed limit, in bytes/sec. @@ -819,6 +820,13 @@ class YoutubeIE(InfoExtractor): params = self._downloader.params format_param = params.get('format', None) if format_param == '0': + format_limit = params.get('format_limit', None) + if format_limit is not None: + try: + # Start at a different format if the user has limited the maximum quality + quality_index = self._available_formats.index(format_limit) + except ValueError: + pass format_param = self._available_formats[quality_index] best_quality = True elif format_param == '-1': @@ -2111,6 +2119,8 @@ if __name__ == '__main__': action='store_const', dest='format', help='alias for -f 22', const='22') video_format.add_option('--all-formats', action='store_const', dest='format', help='download all available video formats', const='-1') + video_format.add_option('--max-quality', + action='store', dest='format_limit', metavar='FORMAT', help='highest quality format limit for -b') parser.add_option_group(video_format) verbosity = optparse.OptionGroup(parser, 'Verbosity / Simulation Options') @@ -2210,6 +2220,7 @@ if __name__ == '__main__': 'forcedescription': opts.getdescription, 'simulate': (opts.simulate or opts.geturl or opts.gettitle or opts.getthumbnail or opts.getdescription), 'format': opts.format, + 'format_limit': opts.format_limit, 'outtmpl': ((opts.outtmpl is not None and opts.outtmpl.decode(preferredencoding())) or (opts.format == '-1' and opts.usetitle and u'%(stitle)s-%(id)s-%(format)s.%(ext)s') or (opts.format == '-1' and opts.useliteral and u'%(title)s-%(id)s-%(format)s.%(ext)s') From 823fcda12a9263c8f058be2580e675eaaaf6fa7e Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Tue, 13 Jul 2010 18:22:34 +0200 Subject: [PATCH 214/455] Improve swf player URL detection for RTMP (fixes issue #144) --- youtube-dl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube-dl b/youtube-dl index 08297e2e0..f059887a6 100755 --- a/youtube-dl +++ b/youtube-dl @@ -847,7 +847,7 @@ class YoutubeIE(InfoExtractor): return # Attempt to extract SWF player URL - mobj = re.search(r'swfConfig.*"(http://.*?watch-.*?\.swf)"', video_webpage) + mobj = re.search(r'swfConfig.*"(http://.*?watch.*?-.*?\.swf)"', video_webpage) if mobj is not None: player_url = mobj.group(1) else: From 57edaa5bac5a2ead1041386c398a377b94fe570f Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Tue, 13 Jul 2010 18:53:09 +0200 Subject: [PATCH 215/455] Support the watch_popup syntax in YouTube URLs --- youtube-dl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube-dl b/youtube-dl index f059887a6..ab62006bd 100755 --- a/youtube-dl +++ b/youtube-dl @@ -680,7 +680,7 @@ class InfoExtractor(object): class YoutubeIE(InfoExtractor): """Information extractor for youtube.com.""" - _VALID_URL = r'^((?:http://)?(?:\w+\.)?youtube\.com/(?:(?:v/)|(?:(?:watch(?:\.php)?)?[\?#](?:.+&)?v=)))?([0-9A-Za-z_-]+)(?(1).+)?$' + _VALID_URL = r'^((?:http://)?(?:\w+\.)?youtube\.com/(?:(?:v/)|(?:(?:watch(?:_popup)?(?:\.php)?)?[\?#](?:.+&)?v=)))?([0-9A-Za-z_-]+)(?(1).+)?$' _LANG_URL = r'http://uk.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1' _LOGIN_URL = 'http://www.youtube.com/signup?next=/&gl=US&hl=en' _AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en' From 8da0080d362c8fd958d4027b52f0920635e26cad Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Tue, 13 Jul 2010 19:01:13 +0200 Subject: [PATCH 216/455] Support youtu.be URLs (fixes issue #143) --- youtube-dl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube-dl b/youtube-dl index ab62006bd..71bf6aecc 100755 --- a/youtube-dl +++ b/youtube-dl @@ -680,7 +680,7 @@ class InfoExtractor(object): class YoutubeIE(InfoExtractor): """Information extractor for youtube.com.""" - _VALID_URL = r'^((?:http://)?(?:\w+\.)?youtube\.com/(?:(?:v/)|(?:(?:watch(?:_popup)?(?:\.php)?)?[\?#](?:.+&)?v=)))?([0-9A-Za-z_-]+)(?(1).+)?$' + _VALID_URL = r'^((?:http://)?(?:youtu\.be/|(?:\w+\.)?youtube\.com/(?:(?:v/)|(?:(?:watch(?:_popup)?(?:\.php)?)?[\?#](?:.+&)?v=))))?([0-9A-Za-z_-]+)(?(1).+)?$' _LANG_URL = r'http://uk.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1' _LOGIN_URL = 'http://www.youtube.com/signup?next=/&gl=US&hl=en' _AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en' From 9e9647d9a155870a462679bc750ebb2519fcecff Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Tue, 13 Jul 2010 19:01:43 +0200 Subject: [PATCH 217/455] Add comment about weird .video extension for format 38 --- youtube-dl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube-dl b/youtube-dl index 71bf6aecc..481f9f400 100755 --- a/youtube-dl +++ b/youtube-dl @@ -693,7 +693,7 @@ class YoutubeIE(InfoExtractor): '18': 'mp4', '22': 'mp4', '37': 'mp4', - '38': 'video', + '38': 'video', # You actually don't know if this will be MOV, AVI or whatever '43': 'webm', '45': 'webm', } From df372a655f3b04c510492d5a522b19a3e35f01c8 Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Tue, 13 Jul 2010 19:37:07 +0200 Subject: [PATCH 218/455] Improve video ordinal assignment method (fixes issue #149) --- youtube-dl | 33 ++++++++++++++++++++++++++++++--- 1 file changed, 30 insertions(+), 3 deletions(-) diff --git a/youtube-dl b/youtube-dl index 481f9f400..56500d220 100755 --- a/youtube-dl +++ b/youtube-dl @@ -387,6 +387,10 @@ class FileDownloader(object): self.to_stdout(u'[download] Download completed') else: self.to_stdout(u'') + + def increment_downloads(self): + """Increment the ordinal that assigns a number to each file.""" + self._num_downloads += 1 def process_info(self, info_dict): """Process a single dictionary returned by an InfoExtractor.""" @@ -582,7 +586,6 @@ class FileDownloader(object): try: (stream, filename) = sanitize_open(filename, open_mode) self.report_destination(filename) - self._num_downloads += 1 except (OSError, IOError), err: self.trouble('ERROR: unable to open for writing: %s' % str(err)) return False @@ -809,6 +812,10 @@ class YoutubeIE(InfoExtractor): if mobj is None: self._downloader.trouble(u'ERROR: invalid URL: %s' % url) return + + # At this point we have a new video + if self._downloader is not None: + self._downloader.increment_downloads() video_id = mobj.group(2) # Downloader parameters @@ -1035,6 +1042,10 @@ class MetacafeIE(InfoExtractor): self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % mobj2.group(1)) return + # At this point we have a new video + if self._downloader is not None: + self._downloader.increment_downloads() + simple_title = mobj.group(2).decode('utf-8') video_extension = 'flv' @@ -1124,6 +1135,9 @@ class DailymotionIE(InfoExtractor): self._downloader.trouble(u'ERROR: invalid URL: %s' % url) return + # At this point we have a new video + if self._downloader is not None: + self._downloader.increment_downloads() video_id = mobj.group(1) simple_title = mobj.group(2).decode('utf-8') @@ -1209,6 +1223,9 @@ class GoogleIE(InfoExtractor): self._downloader.trouble(u'ERROR: Invalid URL: %s' % url) return + # At this point we have a new video + if self._downloader is not None: + self._downloader.increment_downloads() video_id = mobj.group(1) video_extension = 'mp4' @@ -1317,6 +1334,9 @@ class PhotobucketIE(InfoExtractor): self._downloader.trouble(u'ERROR: Invalid URL: %s' % url) return + # At this point we have a new video + if self._downloader is not None: + self._downloader.increment_downloads() video_id = mobj.group(1) video_extension = 'flv' @@ -1392,13 +1412,16 @@ class YahooIE(InfoExtractor): def _real_initialize(self): return - def _real_extract(self, url): + def _real_extract(self, url, new_video=True): # Extract ID from URL mobj = re.match(self._VALID_URL, url) if mobj is None: self._downloader.trouble(u'ERROR: Invalid URL: %s' % url) return + # At this point we have a new video + if self._downloader is not None and new_video: + self._downloader.increment_downloads() video_id = mobj.group(2) video_extension = 'flv' @@ -1425,7 +1448,7 @@ class YahooIE(InfoExtractor): yahoo_vid = mobj.group(1) url = 'http://video.yahoo.com/watch/%s/%s' % (yahoo_vid, yahoo_id) - return self._real_extract(url) + return self._real_extract(url, new_video=False) # Retrieve video webpage to extract further information request = urllib2.Request(url) @@ -1544,6 +1567,10 @@ class GenericIE(InfoExtractor): return def _real_extract(self, url): + # At this point we have a new video + if self._downloader is not None: + self._downloader.increment_downloads() + video_id = url.split('/')[-1] request = urllib2.Request(url) try: From 96942e62241bb54dc32d2a006841d9fc075e9851 Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Tue, 13 Jul 2010 19:43:06 +0200 Subject: [PATCH 219/455] Modify User-Agent and Accept headers --- youtube-dl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube-dl b/youtube-dl index 56500d220..e89c9152d 100755 --- a/youtube-dl +++ b/youtube-dl @@ -27,9 +27,9 @@ except ImportError: from cgi import parse_qs std_headers = { - 'User-Agent': 'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.2) Gecko/20100115 Firefox/3.6', + 'User-Agent': 'Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.9.2.6) Gecko/20100627 Firefox/3.6.6', 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7', - 'Accept': 'text/xml,application/xml,application/xhtml+xml,text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5', + 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 'Accept-Language': 'en-us,en;q=0.5', } From 14912efbb7c8d5ffc3d7d7d1b7de9272e1c8e09a Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Thu, 15 Jul 2010 22:05:04 +0200 Subject: [PATCH 220/455] Bump version number --- LATEST_VERSION | 2 +- youtube-dl | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/LATEST_VERSION b/LATEST_VERSION index 47865d378..99bd385bc 100644 --- a/LATEST_VERSION +++ b/LATEST_VERSION @@ -1 +1 @@ -2010.06.06 +2010.07.14 diff --git a/youtube-dl b/youtube-dl index e89c9152d..046b3fd66 100755 --- a/youtube-dl +++ b/youtube-dl @@ -2109,7 +2109,7 @@ if __name__ == '__main__': # Parse command line parser = optparse.OptionParser( usage='Usage: %prog [options] url...', - version='2010.06.06', + version='2010.07.14', conflict_handler='resolve', ) From 9715661c19b585ff5863cb542d5f93c6c210a4ed Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Thu, 22 Jul 2010 20:24:59 +0200 Subject: [PATCH 221/455] Use www. instead of uk. in the language setting webpage for YouTube --- youtube-dl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube-dl b/youtube-dl index 046b3fd66..c3bce1d4c 100755 --- a/youtube-dl +++ b/youtube-dl @@ -684,7 +684,7 @@ class YoutubeIE(InfoExtractor): """Information extractor for youtube.com.""" _VALID_URL = r'^((?:http://)?(?:youtu\.be/|(?:\w+\.)?youtube\.com/(?:(?:v/)|(?:(?:watch(?:_popup)?(?:\.php)?)?[\?#](?:.+&)?v=))))?([0-9A-Za-z_-]+)(?(1).+)?$' - _LANG_URL = r'http://uk.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1' + _LANG_URL = r'http://www.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1' _LOGIN_URL = 'http://www.youtube.com/signup?next=/&gl=US&hl=en' _AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en' _NETRC_MACHINE = 'youtube' From 73f4e7afbab6b4189005bb65deb0325a020ea010 Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Thu, 22 Jul 2010 20:26:37 +0200 Subject: [PATCH 222/455] Rename UnavailableFormatError to UnavailableVideoError --- youtube-dl | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/youtube-dl b/youtube-dl index c3bce1d4c..49c07ae62 100755 --- a/youtube-dl +++ b/youtube-dl @@ -131,7 +131,7 @@ class PostProcessingError(Exception): """ pass -class UnavailableFormatError(Exception): +class UnavailableVideoError(Exception): """Unavailable Format exception. This exception will be thrown when a video is requested @@ -401,7 +401,7 @@ class FileDownloader(object): try: self.verify_url(info_dict['url'].encode('utf-8')).decode('utf-8') except (OSError, IOError, urllib2.URLError, httplib.HTTPException, socket.error), err: - raise UnavailableFormatError + raise UnavailableVideoError # Forced printings if self.params.get('forcetitle', False): @@ -435,7 +435,7 @@ class FileDownloader(object): try: success = self._do_download(filename, info_dict['url'].encode('utf-8'), info_dict.get('player_url', None)) except (OSError, IOError), err: - raise UnavailableFormatError + raise UnavailableVideoError except (urllib2.URLError, httplib.HTTPException, socket.error), err: self.trouble('ERROR: unable to download video data: %s' % str(err)) return @@ -1101,8 +1101,8 @@ class MetacafeIE(InfoExtractor): 'format': u'NA', 'player_url': None, }) - except UnavailableFormatError: - self._downloader.trouble(u'ERROR: format not available for video') + except UnavailableVideoError: + self._downloader.trouble(u'ERROR: unable to download video') class DailymotionIE(InfoExtractor): @@ -1190,8 +1190,8 @@ class DailymotionIE(InfoExtractor): 'format': u'NA', 'player_url': None, }) - except UnavailableFormatError: - self._downloader.trouble(u'ERROR: format not available for video') + except UnavailableVideoError: + self._downloader.trouble(u'ERROR: unable to download video') class GoogleIE(InfoExtractor): """Information extractor for video.google.com.""" @@ -1300,8 +1300,8 @@ class GoogleIE(InfoExtractor): 'format': u'NA', 'player_url': None, }) - except UnavailableFormatError: - self._downloader.trouble(u'ERROR: format not available for video') + except UnavailableVideoError: + self._downloader.trouble(u'ERROR: unable to download video') class PhotobucketIE(InfoExtractor): @@ -1382,8 +1382,8 @@ class PhotobucketIE(InfoExtractor): 'format': u'NA', 'player_url': None, }) - except UnavailableFormatError: - self._downloader.trouble(u'ERROR: format not available for video') + except UnavailableVideoError: + self._downloader.trouble(u'ERROR: unable to download video') class YahooIE(InfoExtractor): @@ -1540,8 +1540,8 @@ class YahooIE(InfoExtractor): 'description': video_description, 'player_url': None, }) - except UnavailableFormatError: - self._downloader.trouble(u'ERROR: format not available for video') + except UnavailableVideoError: + self._downloader.trouble(u'ERROR: unable to download video') class GenericIE(InfoExtractor): @@ -1640,8 +1640,8 @@ class GenericIE(InfoExtractor): 'format': u'NA', 'player_url': None, }) - except UnavailableFormatError: - self._downloader.trouble(u'ERROR: format not available for video') + except UnavailableVideoError, err: + self._downloader.trouble(u'ERROR: unable to download video') class YoutubeSearchIE(InfoExtractor): From 9bf7fa52134838a63591052f40d61afa5894da92 Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Thu, 22 Jul 2010 20:27:35 +0200 Subject: [PATCH 223/455] Do not check for self._downloader being None in several places --- youtube-dl | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/youtube-dl b/youtube-dl index 49c07ae62..8c22081f8 100755 --- a/youtube-dl +++ b/youtube-dl @@ -1043,8 +1043,7 @@ class MetacafeIE(InfoExtractor): return # At this point we have a new video - if self._downloader is not None: - self._downloader.increment_downloads() + self._downloader.increment_downloads() simple_title = mobj.group(2).decode('utf-8') video_extension = 'flv' @@ -1136,8 +1135,7 @@ class DailymotionIE(InfoExtractor): return # At this point we have a new video - if self._downloader is not None: - self._downloader.increment_downloads() + self._downloader.increment_downloads() video_id = mobj.group(1) simple_title = mobj.group(2).decode('utf-8') @@ -1224,8 +1222,7 @@ class GoogleIE(InfoExtractor): return # At this point we have a new video - if self._downloader is not None: - self._downloader.increment_downloads() + self._downloader.increment_downloads() video_id = mobj.group(1) video_extension = 'mp4' @@ -1335,8 +1332,7 @@ class PhotobucketIE(InfoExtractor): return # At this point we have a new video - if self._downloader is not None: - self._downloader.increment_downloads() + self._downloader.increment_downloads() video_id = mobj.group(1) video_extension = 'flv' @@ -1420,8 +1416,7 @@ class YahooIE(InfoExtractor): return # At this point we have a new video - if self._downloader is not None and new_video: - self._downloader.increment_downloads() + self._downloader.increment_downloads() video_id = mobj.group(2) video_extension = 'flv' @@ -1568,8 +1563,7 @@ class GenericIE(InfoExtractor): def _real_extract(self, url): # At this point we have a new video - if self._downloader is not None: - self._downloader.increment_downloads() + self._downloader.increment_downloads() video_id = url.split('/')[-1] request = urllib2.Request(url) From 460d8acbaa44d158d72424665c8699c00873ddfe Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Thu, 22 Jul 2010 20:28:20 +0200 Subject: [PATCH 224/455] Remove some format command line options --- youtube-dl | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/youtube-dl b/youtube-dl index 8c22081f8..e5691c71a 100755 --- a/youtube-dl +++ b/youtube-dl @@ -2132,16 +2132,12 @@ if __name__ == '__main__': video_format = optparse.OptionGroup(parser, 'Video Format Options') video_format.add_option('-f', '--format', action='store', dest='format', metavar='FORMAT', help='video format code') - video_format.add_option('-b', '--best-quality', - action='store_const', dest='format', help='download the best quality video possible', const='0') video_format.add_option('-m', '--mobile-version', action='store_const', dest='format', help='alias for -f 17', const='17') - video_format.add_option('-d', '--high-def', - action='store_const', dest='format', help='alias for -f 22', const='22') video_format.add_option('--all-formats', action='store_const', dest='format', help='download all available video formats', const='-1') video_format.add_option('--max-quality', - action='store', dest='format_limit', metavar='FORMAT', help='highest quality format limit for -b') + action='store', dest='format_limit', metavar='FORMAT', help='highest quality format to download') parser.add_option_group(video_format) verbosity = optparse.OptionGroup(parser, 'Verbosity / Simulation Options') From 497cd3e68e42a3a83173363a7ed33e4910e53f05 Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Thu, 22 Jul 2010 20:29:52 +0200 Subject: [PATCH 225/455] Partially rewrite YouTube InfoExtractor after it stopped working As part of the changes, the program now downloads the highest quality version by default and uses fmt_url_map to decide which formats are really available. --- youtube-dl | 241 ++++++++++++++++++++++------------------------------- 1 file changed, 101 insertions(+), 140 deletions(-) diff --git a/youtube-dl b/youtube-dl index e5691c71a..c08819d6e 100755 --- a/youtube-dl +++ b/youtube-dl @@ -688,8 +688,8 @@ class YoutubeIE(InfoExtractor): _LOGIN_URL = 'http://www.youtube.com/signup?next=/&gl=US&hl=en' _AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en' _NETRC_MACHINE = 'youtube' - # Listed in order of priority for the -b option - _available_formats = ['38', '37', '22', '45', '35', '34', '43', '18', '6', '5', '17', '13', None] + # Listed in order of quality + _available_formats = ['38', '37', '22', '45', '35', '34', '43', '18', '6', '5', '17', '13'] _video_extensions = { '13': '3gp', '17': 'mp4', @@ -812,124 +812,109 @@ class YoutubeIE(InfoExtractor): if mobj is None: self._downloader.trouble(u'ERROR: invalid URL: %s' % url) return - - # At this point we have a new video - if self._downloader is not None: - self._downloader.increment_downloads() video_id = mobj.group(2) - # Downloader parameters - best_quality = False - all_formats = False - format_param = None - quality_index = 0 - if self._downloader is not None: - params = self._downloader.params - format_param = params.get('format', None) - if format_param == '0': - format_limit = params.get('format_limit', None) - if format_limit is not None: - try: - # Start at a different format if the user has limited the maximum quality - quality_index = self._available_formats.index(format_limit) - except ValueError: - pass - format_param = self._available_formats[quality_index] - best_quality = True - elif format_param == '-1': - format_param = self._available_formats[quality_index] - all_formats = True + # Get video webpage + self.report_video_webpage_download(video_id) + request = urllib2.Request('http://www.youtube.com/watch?v=%s&gl=US&hl=en' % video_id, None, std_headers) + try: + video_webpage = urllib2.urlopen(request).read() + except (urllib2.URLError, httplib.HTTPException, socket.error), err: + self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err)) + return + + # Attempt to extract SWF player URL + mobj = re.search(r'swfConfig.*"(http://.*?watch.*?-.*?\.swf)"', video_webpage) + if mobj is not None: + player_url = mobj.group(1) + else: + player_url = None + + # Get video info + self.report_video_info_webpage_download(video_id) + for el_type in ['&el=embedded', '&el=detailpage', '&el=vevo', '']: + video_info_url = ('http://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en' + % (video_id, el_type)) + request = urllib2.Request(video_info_url, None, std_headers) + try: + video_info_webpage = urllib2.urlopen(request).read() + video_info = parse_qs(video_info_webpage) + if 'token' in video_info: + break + except (urllib2.URLError, httplib.HTTPException, socket.error), err: + self._downloader.trouble(u'ERROR: unable to download video info webpage: %s' % str(err)) + return + self.report_information_extraction(video_id) + + # uploader + if 'author' not in video_info: + self._downloader.trouble(u'ERROR: unable to extract uploader nickname') + return + video_uploader = urllib.unquote_plus(video_info['author'][0]) + + # title + if 'title' not in video_info: + self._downloader.trouble(u'ERROR: unable to extract video title') + return + video_title = urllib.unquote_plus(video_info['title'][0]) + video_title = video_title.decode('utf-8') + video_title = sanitize_title(video_title) + + # simplified title + simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title) + simple_title = simple_title.strip(ur'_') + + # thumbnail image + if 'thumbnail_url' not in video_info: + self._downloader.trouble(u'WARNING: unable to extract video thumbnail') + video_thumbnail = '' + else: # don't panic if we can't find it + video_thumbnail = urllib.unquote_plus(video_info['thumbnail_url'][0]) + + # description + video_description = 'No description available.' + if self._downloader.params.get('forcedescription', False): + mobj = re.search(r'', video_webpage) + if mobj is not None: + video_description = mobj.group(1) + + # Decide which formats to download + if 'fmt_url_map' in video_info: + url_map = dict(tuple(pair.split('|')) for pair in video_info['fmt_url_map'][0].split(',')) + format_limit = self._downloader.params.get('format_limit', None) + if format_limit is not None and format_limit in self._available_formats: + format_list = self._available_formats[self._available_formats.index(format_limit):] + else: + format_list = self._available_formats + existing_formats = [x for x in format_list if x in url_map] + if len(existing_formats) == 0: + self._downloader.trouble(u'ERROR: no known formats available for video') + return + requested_format = self._downloader.params.get('format', None) + if requested_format is None: + video_url_list = [(existing_formats[0], url_map[existing_formats[0]])] # Best quality + elif requested_format == '-1': + video_url_list = url_map.items() # All formats + else: + if requested_format not in existing_formats: + self._downloader.trouble(u'ERROR: format not available for video') + return + video_url_list = [(requested_format, url_map[requested_format])] # Specific format + elif 'conn' in video_info and video_info['conn'][0].startswith('rtmp'): + self.report_rtmp_download() + video_url_list = [(None, video_info['conn'][0])] + else: + self._downloader.trouble(u'ERROR: no fmt_url_map or conn information found in video info') + return + + for format_param, video_real_url in video_url_list: + # At this point we have a new video + self._downloader.increment_downloads() - while True: # Extension video_extension = self._video_extensions.get(format_param, 'flv') - # Get video webpage - self.report_video_webpage_download(video_id) - request = urllib2.Request('http://www.youtube.com/watch?v=%s&gl=US&hl=en' % video_id, None, std_headers) - try: - video_webpage = urllib2.urlopen(request).read() - except (urllib2.URLError, httplib.HTTPException, socket.error), err: - self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err)) - return - - # Attempt to extract SWF player URL - mobj = re.search(r'swfConfig.*"(http://.*?watch.*?-.*?\.swf)"', video_webpage) - if mobj is not None: - player_url = mobj.group(1) - else: - player_url = None - - # Get video info - self.report_video_info_webpage_download(video_id) - for el_type in ['&el=embedded', '&el=detailpage', '&el=vevo', '']: - video_info_url = ('http://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en' - % (video_id, el_type)) - request = urllib2.Request(video_info_url, None, std_headers) - try: - video_info_webpage = urllib2.urlopen(request).read() - video_info = parse_qs(video_info_webpage) - if 'token' in video_info: - break - except (urllib2.URLError, httplib.HTTPException, socket.error), err: - self._downloader.trouble(u'ERROR: unable to download video info webpage: %s' % str(err)) - return - self.report_information_extraction(video_id) - - # "t" param - if 'token' not in video_info: - # Attempt to see if YouTube has issued an error message - if 'reason' not in video_info: - self._downloader.trouble(u'ERROR: unable to extract "t" parameter for unknown reason') - stream = open('reportme-ydl-%s.dat' % time.time(), 'wb') - stream.write(video_info_webpage) - stream.close() - else: - reason = urllib.unquote_plus(video_info['reason'][0]) - self._downloader.trouble(u'ERROR: YouTube said: %s' % reason.decode('utf-8')) - return - token = urllib.unquote_plus(video_info['token'][0]) - video_real_url = 'http://www.youtube.com/get_video?video_id=%s&t=%s&eurl=&el=detailpage&ps=default&gl=US&hl=en' % (video_id, token) - if format_param is not None: - video_real_url = '%s&fmt=%s' % (video_real_url, format_param) - - # Check possible RTMP download - if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'): - self.report_rtmp_download() - video_real_url = video_info['conn'][0] - - # uploader - if 'author' not in video_info: - self._downloader.trouble(u'ERROR: unable to extract uploader nickname') - return - video_uploader = urllib.unquote_plus(video_info['author'][0]) - - # title - if 'title' not in video_info: - self._downloader.trouble(u'ERROR: unable to extract video title') - return - video_title = urllib.unquote_plus(video_info['title'][0]) - video_title = video_title.decode('utf-8') - video_title = sanitize_title(video_title) - - # simplified title - simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title) - simple_title = simple_title.strip(ur'_') - - # thumbnail image - if 'thumbnail_url' not in video_info: - self._downloader.trouble(u'WARNING: unable to extract video thumbnail') - video_thumbnail = '' - else: # don't panic if we can't find it - video_thumbnail = urllib.unquote_plus(video_info['thumbnail_url'][0]) - - # description - video_description = 'No description available.' - if self._downloader.params.get('forcedescription', False): - mobj = re.search(r'', video_webpage) - if mobj is not None: - video_description = mobj.group(1) - + # Find the video URL in fmt_url_map or conn paramters try: # Process video information self._downloader.process_info({ @@ -944,32 +929,8 @@ class YoutubeIE(InfoExtractor): 'description': video_description.decode('utf-8'), 'player_url': player_url, }) - - if all_formats: - quality_index += 1 - if quality_index == len(self._available_formats): - # None left to get - return - else: - format_param = self._available_formats[quality_index] - continue - return - - except UnavailableFormatError, err: - if best_quality or all_formats: - quality_index += 1 - if quality_index == len(self._available_formats): - # I don't ever expect this to happen - if not all_formats: - self._downloader.trouble(u'ERROR: no known formats available for video') - return - else: - self.report_unavailable_format(video_id, format_param) - format_param = self._available_formats[quality_index] - continue - else: - self._downloader.trouble('ERROR: format not available for video') - return + except UnavailableVideoError, err: + self._downloader.trouble(u'ERROR: unable to download video') class MetacafeIE(InfoExtractor): From e4db6fd0420a0917724a896d29f17666f8c80427 Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Thu, 22 Jul 2010 20:31:13 +0200 Subject: [PATCH 226/455] Update user agent string --- youtube-dl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube-dl b/youtube-dl index c08819d6e..a15d180a8 100755 --- a/youtube-dl +++ b/youtube-dl @@ -27,7 +27,7 @@ except ImportError: from cgi import parse_qs std_headers = { - 'User-Agent': 'Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.9.2.6) Gecko/20100627 Firefox/3.6.6', + 'User-Agent': 'Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.9.2.7) Gecko/20100720 Firefox/3.6.7', 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 'Accept-Language': 'en-us,en;q=0.5', From 8190e3631ba5f1734e5b45a7e0a0e5999ab70d26 Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Thu, 22 Jul 2010 20:32:05 +0200 Subject: [PATCH 227/455] Bump version number --- LATEST_VERSION | 2 +- youtube-dl | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/LATEST_VERSION b/LATEST_VERSION index 99bd385bc..f3f4e8fab 100644 --- a/LATEST_VERSION +++ b/LATEST_VERSION @@ -1 +1 @@ -2010.07.14 +2010.07.22 diff --git a/youtube-dl b/youtube-dl index a15d180a8..5fd331e79 100755 --- a/youtube-dl +++ b/youtube-dl @@ -2064,7 +2064,7 @@ if __name__ == '__main__': # Parse command line parser = optparse.OptionParser( usage='Usage: %prog [options] url...', - version='2010.07.14', + version='2010.07.22', conflict_handler='resolve', ) From 2e3a32e4acd55d79845464432f49227d6a3b3ade Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Sat, 24 Jul 2010 09:47:01 +0200 Subject: [PATCH 228/455] Restore proper support for webm formats (fixes issue #166) --- youtube-dl | 34 ++++++++++++++++++++++++++++++++-- 1 file changed, 32 insertions(+), 2 deletions(-) diff --git a/youtube-dl b/youtube-dl index 5fd331e79..00d9696d6 100755 --- a/youtube-dl +++ b/youtube-dl @@ -879,7 +879,36 @@ class YoutubeIE(InfoExtractor): video_description = mobj.group(1) # Decide which formats to download - if 'fmt_url_map' in video_info: + requested_format = self._downloader.params.get('format', None) + + if requested_format in ["43", "45"]: # webm formats + # Join the HTML5 beta + html5form = { "enable_html5": "true" } + request = urllib2.Request('http://www.youtube.com/html5', urllib.urlencode(html5form), std_headers) + try: + self._downloader.to_stdout(u'[youtube] Joining the HTML5 Beta') + urllib2.urlopen(request).read() + except (urllib2.URLError, httplib.HTTPException, socket.error), err: + self._downloader.trouble(u'ERROR: unable to join the HTML5 Beta: %s' % str(err)) + return + + # Request the video webpage with webm enabled + request = urllib2.Request('http://www.youtube.com/watch?v=%s&webm=1' % video_id, None, std_headers) + try: + self._downloader.to_stdout(u'[youtube] Requesting HTML5 video webpage') + video_webpage = urllib2.urlopen(request).read() + except (urllib2.URLError, httplib.HTTPException, socket.error), err: + self._downloader.trouble(u'ERROR: unable to get the HTML5 video webpage: %s' % str(err)) + return + + # Find the URL for the requested format + mobj = re.search(ur'setAvailableFormat\("(.*?)".*?"%s"\);' % requested_format, video_webpage) + if mobj is None: + self._downloader.trouble(u'ERROR: format not available for video') + return + video_url_list = [(requested_format, mobj.group(1))] + + elif 'fmt_url_map' in video_info: url_map = dict(tuple(pair.split('|')) for pair in video_info['fmt_url_map'][0].split(',')) format_limit = self._downloader.params.get('format_limit', None) if format_limit is not None and format_limit in self._available_formats: @@ -890,7 +919,6 @@ class YoutubeIE(InfoExtractor): if len(existing_formats) == 0: self._downloader.trouble(u'ERROR: no known formats available for video') return - requested_format = self._downloader.params.get('format', None) if requested_format is None: video_url_list = [(existing_formats[0], url_map[existing_formats[0]])] # Best quality elif requested_format == '-1': @@ -900,9 +928,11 @@ class YoutubeIE(InfoExtractor): self._downloader.trouble(u'ERROR: format not available for video') return video_url_list = [(requested_format, url_map[requested_format])] # Specific format + elif 'conn' in video_info and video_info['conn'][0].startswith('rtmp'): self.report_rtmp_download() video_url_list = [(None, video_info['conn'][0])] + else: self._downloader.trouble(u'ERROR: no fmt_url_map or conn information found in video info') return From 5ce7d172d7613e538118615bcb82c514c95a8dfc Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Sat, 24 Jul 2010 10:23:06 +0200 Subject: [PATCH 229/455] Restore support for the get_video method, fixing many issues --- youtube-dl | 44 +++++++++----------------------------------- 1 file changed, 9 insertions(+), 35 deletions(-) diff --git a/youtube-dl b/youtube-dl index 00d9696d6..c67bd3855 100755 --- a/youtube-dl +++ b/youtube-dl @@ -878,37 +878,14 @@ class YoutubeIE(InfoExtractor): if mobj is not None: video_description = mobj.group(1) + # token + video_token = urllib.unquote_plus(video_info['token'][0]) + # Decide which formats to download requested_format = self._downloader.params.get('format', None) + get_video_template = 'http://www.youtube.com/get_video?video_id=%s&t=%s&eurl=&el=&ps=&asv=&fmt=%%s' % (video_id, video_token) - if requested_format in ["43", "45"]: # webm formats - # Join the HTML5 beta - html5form = { "enable_html5": "true" } - request = urllib2.Request('http://www.youtube.com/html5', urllib.urlencode(html5form), std_headers) - try: - self._downloader.to_stdout(u'[youtube] Joining the HTML5 Beta') - urllib2.urlopen(request).read() - except (urllib2.URLError, httplib.HTTPException, socket.error), err: - self._downloader.trouble(u'ERROR: unable to join the HTML5 Beta: %s' % str(err)) - return - - # Request the video webpage with webm enabled - request = urllib2.Request('http://www.youtube.com/watch?v=%s&webm=1' % video_id, None, std_headers) - try: - self._downloader.to_stdout(u'[youtube] Requesting HTML5 video webpage') - video_webpage = urllib2.urlopen(request).read() - except (urllib2.URLError, httplib.HTTPException, socket.error), err: - self._downloader.trouble(u'ERROR: unable to get the HTML5 video webpage: %s' % str(err)) - return - - # Find the URL for the requested format - mobj = re.search(ur'setAvailableFormat\("(.*?)".*?"%s"\);' % requested_format, video_webpage) - if mobj is None: - self._downloader.trouble(u'ERROR: format not available for video') - return - video_url_list = [(requested_format, mobj.group(1))] - - elif 'fmt_url_map' in video_info: + if 'fmt_url_map' in video_info: url_map = dict(tuple(pair.split('|')) for pair in video_info['fmt_url_map'][0].split(',')) format_limit = self._downloader.params.get('format_limit', None) if format_limit is not None and format_limit in self._available_formats: @@ -920,14 +897,11 @@ class YoutubeIE(InfoExtractor): self._downloader.trouble(u'ERROR: no known formats available for video') return if requested_format is None: - video_url_list = [(existing_formats[0], url_map[existing_formats[0]])] # Best quality + video_url_list = [(existing_formats[0], get_video_template % existing_formats[0])] # Best quality elif requested_format == '-1': - video_url_list = url_map.items() # All formats + video_url_list = [(f, get_video_template % f) for f in existing_formats] # All formats else: - if requested_format not in existing_formats: - self._downloader.trouble(u'ERROR: format not available for video') - return - video_url_list = [(requested_format, url_map[requested_format])] # Specific format + video_url_list = [(requested_format, get_video_template % requested_format)] # Specific format elif 'conn' in video_info and video_info['conn'][0].startswith('rtmp'): self.report_rtmp_download() @@ -960,7 +934,7 @@ class YoutubeIE(InfoExtractor): 'player_url': player_url, }) except UnavailableVideoError, err: - self._downloader.trouble(u'ERROR: unable to download video') + self._downloader.trouble(u'ERROR: unable to download video (format may not be available)') class MetacafeIE(InfoExtractor): From 06f34701fe4f5170a77af8f60dccab578593e841 Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Sat, 24 Jul 2010 10:24:52 +0200 Subject: [PATCH 230/455] Bump version number --- LATEST_VERSION | 2 +- youtube-dl | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/LATEST_VERSION b/LATEST_VERSION index f3f4e8fab..960388100 100644 --- a/LATEST_VERSION +++ b/LATEST_VERSION @@ -1 +1 @@ -2010.07.22 +2010.07.24 diff --git a/youtube-dl b/youtube-dl index c67bd3855..48c13b701 100755 --- a/youtube-dl +++ b/youtube-dl @@ -2068,7 +2068,7 @@ if __name__ == '__main__': # Parse command line parser = optparse.OptionParser( usage='Usage: %prog [options] url...', - version='2010.07.22', + version='2010.07.24', conflict_handler='resolve', ) From f95f29fd25b2b33967e5f9463280118a5ce907a1 Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Sun, 25 Jul 2010 11:55:49 +0200 Subject: [PATCH 231/455] Properly detect YouTube error messages to print them on screen (fixes issue #172) --- youtube-dl | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/youtube-dl b/youtube-dl index 48c13b701..8cfa6dfd6 100755 --- a/youtube-dl +++ b/youtube-dl @@ -844,6 +844,14 @@ class YoutubeIE(InfoExtractor): except (urllib2.URLError, httplib.HTTPException, socket.error), err: self._downloader.trouble(u'ERROR: unable to download video info webpage: %s' % str(err)) return + if 'token' not in video_info: + if 'reason' in video_info: + self._downloader.trouble(u'ERROR: YouTube said: %s' % video_info['reason'][0]) + else: + self._downloader.trouble(u'ERROR: "token" parameter not in video info for unknown reason') + return + + # Start extracting information self.report_information_extraction(video_id) # uploader From 101e0d1e9178ba209dcf1ef79cb532cbb1025d93 Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Tue, 27 Jul 2010 20:11:06 +0200 Subject: [PATCH 232/455] Reorganize request code to make it a bit more robust --- youtube-dl | 66 +++++++++++++++++++++++++----------------------------- 1 file changed, 30 insertions(+), 36 deletions(-) diff --git a/youtube-dl b/youtube-dl index 8cfa6dfd6..cf0336e56 100755 --- a/youtube-dl +++ b/youtube-dl @@ -287,16 +287,6 @@ class FileDownloader(object): multiplier = 1024.0 ** 'bkmgtpezy'.index(matchobj.group(2).lower()) return long(round(number * multiplier)) - @staticmethod - def verify_url(url): - """Verify a URL is valid and data could be downloaded. Return real data URL.""" - request = urllib2.Request(url, None, std_headers) - data = urllib2.urlopen(request) - data.read(1) - url = data.geturl() - data.close() - return url - def add_info_extractor(self, ie): """Add an InfoExtractor object to the end of the list.""" self._ies.append(ie) @@ -396,13 +386,6 @@ class FileDownloader(object): """Process a single dictionary returned by an InfoExtractor.""" # Do nothing else if in simulate mode if self.params.get('simulate', False): - # Verify URL if it's an HTTP one - if info_dict['url'].startswith('http'): - try: - self.verify_url(info_dict['url'].encode('utf-8')).decode('utf-8') - except (OSError, IOError, urllib2.URLError, httplib.HTTPException, socket.error), err: - raise UnavailableVideoError - # Forced printings if self.params.get('forcetitle', False): print info_dict['title'].encode(preferredencoding(), 'xmlcharrefreplace') @@ -539,32 +522,43 @@ class FileDownloader(object): count = 0 retries = self.params.get('retries', 0) - while True: + while count <= retries: # Establish connection try: data = urllib2.urlopen(request) break except (urllib2.HTTPError, ), err: - if err.code == 503: - # Retry in case of HTTP error 503 - count += 1 - if count <= retries: - self.report_retry(count, retries) - continue - if err.code != 416: # 416 is 'Requested range not satisfiable' + if err.code != 503 and err.code != 416: + # Unexpected HTTP error raise - # Unable to resume - data = urllib2.urlopen(basic_request) - content_length = data.info()['Content-Length'] + elif err.code == 416: + # Unable to resume (requested range not satisfiable) + try: + # Open the connection again without the range header + data = urllib2.urlopen(basic_request) + content_length = data.info()['Content-Length'] + except (urllib2.HTTPError, ), err: + if err.code != 503: + raise + else: + # Examine the reported length + if content_length is not None and long(content_length) == resume_len: + # The file had already been fully downloaded + self.report_file_already_downloaded(filename) + return True + else: + # The length does not match, we start the download over + self.report_unable_to_resume() + open_mode = 'wb' + break + # Retry + count += 1 + if count <= retries: + self.report_retry(count, retries) - if content_length is not None and long(content_length) == resume_len: - # Because the file had already been fully downloaded - self.report_file_already_downloaded(filename) - return True - else: - # Because the server didn't let us - self.report_unable_to_resume() - open_mode = 'wb' + if count > retries: + self.trouble(u'ERROR: giving up after %s retries' % retries) + return False data_len = data.info().get('Content-length', None) data_len_str = self.format_bytes(data_len) From 268fb2bdd8962fb2064e32caff95cc098bb0f49a Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Sun, 1 Aug 2010 01:15:43 +0200 Subject: [PATCH 233/455] Consider the file downloaded if the size differs in less than 100 bytes (fixes issue #175) --- youtube-dl | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/youtube-dl b/youtube-dl index cf0336e56..6be25baa4 100755 --- a/youtube-dl +++ b/youtube-dl @@ -542,8 +542,15 @@ class FileDownloader(object): raise else: # Examine the reported length - if content_length is not None and long(content_length) == resume_len: - # The file had already been fully downloaded + if (content_length is not None and + resume_len - 100 < long(content_length) < resume_len + 100): + # The file had already been fully downloaded. + # Explanation to the above condition: in issue #175 it was revealed that + # YouTube sometimes adds or removes a few bytes from the end of the file, + # changing the file size slightly and causing problems for some users. So + # I decided to implement a suggested change and consider the file + # completely downloaded if the file size differs less than 100 bytes from + # the one in the hard drive. self.report_file_already_downloaded(filename) return True else: From 2962317dea05602dc8be45bc1bbff7f2ac8ddd9d Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Sun, 1 Aug 2010 10:40:37 +0200 Subject: [PATCH 234/455] Put back -b option as a placeholder with a warning message --- youtube-dl | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/youtube-dl b/youtube-dl index 6be25baa4..b0c0d1939 100755 --- a/youtube-dl +++ b/youtube-dl @@ -2112,6 +2112,8 @@ if __name__ == '__main__': action='store_const', dest='format', help='download all available video formats', const='-1') video_format.add_option('--max-quality', action='store', dest='format_limit', metavar='FORMAT', help='highest quality format to download') + video_format.add_option('-b', '--best-quality', + action='store_true', dest='bestquality', help='download the best video quality (DEPRECATED)') parser.add_option_group(video_format) verbosity = optparse.OptionGroup(parser, 'Verbosity / Simulation Options') @@ -2164,6 +2166,8 @@ if __name__ == '__main__': all_urls = batchurls + args # Conflicting, missing and erroneous options + if opts.bestquality: + print >>sys.stderr, u'\nWARNING: -b/--best-quality IS DEPRECATED AS IT IS THE DEFAULT BEHAVIOR NOW\n' if opts.usenetrc and (opts.username is not None or opts.password is not None): parser.error(u'using .netrc conflicts with giving username/password') if opts.password is not None and opts.username is None: From 204c9398ab2936a67e7daa9ea5fe989dbd382d5f Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Wed, 4 Aug 2010 18:52:00 +0200 Subject: [PATCH 235/455] Merge Gavin van Lelyveld's patch for --playlist-start option --- youtube-dl | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/youtube-dl b/youtube-dl index b0c0d1939..266ed5854 100755 --- a/youtube-dl +++ b/youtube-dl @@ -543,7 +543,7 @@ class FileDownloader(object): else: # Examine the reported length if (content_length is not None and - resume_len - 100 < long(content_length) < resume_len + 100): + (resume_len - 100 < long(content_length) < resume_len + 100)): # The file had already been fully downloaded. # Explanation to the above condition: in issue #175 it was revealed that # YouTube sometimes adds or removes a few bytes from the end of the file, @@ -1941,6 +1941,11 @@ class YoutubePlaylistIE(InfoExtractor): break pagenum = pagenum + 1 + playliststart = self._downloader.params.get('playliststart', 1) + playliststart -= 1 #our arrays are zero-based but the playlist is 1-based + if playliststart > 0: + video_ids = video_ids[playliststart:] + for id in video_ids: self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id) return @@ -1996,6 +2001,11 @@ class YoutubeUserIE(InfoExtractor): ids_in_page.append(mobj.group(1)) video_ids.extend(ids_in_page) + playliststart = self._downloader.params.get('playliststart', 1) + playliststart = playliststart-1 #our arrays are zero-based but the playlist is 1-based + if playliststart > 0: + video_ids = video_ids[playliststart:] + for id in video_ids: self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id) return @@ -2093,6 +2103,8 @@ if __name__ == '__main__': dest='ratelimit', metavar='LIMIT', help='download rate limit (e.g. 50k or 44.6m)') parser.add_option('-R', '--retries', dest='retries', metavar='RETRIES', help='number of retries (default is 10)', default=10) + parser.add_option('--playlist-start', + dest='playliststart', metavar='NUMBER', help='playlist video to start at (default is 1)', default=1) authentication = optparse.OptionGroup(parser, 'Authentication Options') authentication.add_option('-u', '--username', @@ -2188,6 +2200,11 @@ if __name__ == '__main__': opts.retries = long(opts.retries) except (TypeError, ValueError), err: parser.error(u'invalid retry count specified') + if opts.playliststart is not None: + try: + opts.playliststart = long(opts.playliststart) + except (TypeError, ValueError), err: + parser.error(u'invalid playlist page specified') # Information extractors youtube_ie = YoutubeIE() @@ -2229,6 +2246,7 @@ if __name__ == '__main__': 'retries': opts.retries, 'continuedl': opts.continue_dl, 'noprogress': opts.noprogress, + 'playliststart': opts.playliststart, }) fd.add_info_extractor(youtube_search_ie) fd.add_info_extractor(youtube_pl_ie) From 109626fcc09d1c27946871d2bdc40b9cdb33d7ee Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Wed, 4 Aug 2010 19:05:53 +0200 Subject: [PATCH 236/455] Fix metacafe.com code not working due to gdaKey again (fixes issue #185) --- youtube-dl | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/youtube-dl b/youtube-dl index 266ed5854..09e1349a8 100755 --- a/youtube-dl +++ b/youtube-dl @@ -1039,15 +1039,15 @@ class MetacafeIE(InfoExtractor): return mediaURL = urllib.unquote(mobj.group(1)) - #mobj = re.search(r'(?m)&gdaKey=(.*?)&', webpage) - #if mobj is None: - # self._downloader.trouble(u'ERROR: unable to extract gdaKey') - # return - #gdaKey = mobj.group(1) - # - #video_url = '%s?__gda__=%s' % (mediaURL, gdaKey) - - video_url = mediaURL + # Extract gdaKey if available + mobj = re.search(r'(?m)&gdaKey=(.*?)&', webpage) + if mobj is None: + video_url = mediaURL + #self._downloader.trouble(u'ERROR: unable to extract gdaKey') + #return + else: + gdaKey = mobj.group(1) + video_url = '%s?__gda__=%s' % (mediaURL, gdaKey) mobj = re.search(r'(?im)(.*) - Video', webpage) if mobj is None: From 813962f85a7dee3a37ac2efe60a423c2e80616a5 Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Wed, 4 Aug 2010 19:09:10 +0200 Subject: [PATCH 237/455] Update user-agent string --- youtube-dl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube-dl b/youtube-dl index 09e1349a8..727c331f6 100755 --- a/youtube-dl +++ b/youtube-dl @@ -27,7 +27,7 @@ except ImportError: from cgi import parse_qs std_headers = { - 'User-Agent': 'Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.9.2.7) Gecko/20100720 Firefox/3.6.7', + 'User-Agent': 'Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.9.2.8) Gecko/20100723 Firefox/3.6.8', 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 'Accept-Language': 'en-us,en;q=0.5', From 80cc23304f75c4b1d2ac27e7fa6b5504df6e6687 Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Wed, 4 Aug 2010 19:09:57 +0200 Subject: [PATCH 238/455] Bump version number --- LATEST_VERSION | 2 +- youtube-dl | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/LATEST_VERSION b/LATEST_VERSION index 960388100..a4b357ab0 100644 --- a/LATEST_VERSION +++ b/LATEST_VERSION @@ -1 +1 @@ -2010.07.24 +2010.08.04 diff --git a/youtube-dl b/youtube-dl index 727c331f6..d54694954 100755 --- a/youtube-dl +++ b/youtube-dl @@ -2087,7 +2087,7 @@ if __name__ == '__main__': # Parse command line parser = optparse.OptionParser( usage='Usage: %prog [options] url...', - version='2010.07.24', + version='2010.08.04', conflict_handler='resolve', ) From 8a9f53bebfeeb546ea0222098c64add781cb50b8 Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Thu, 12 Aug 2010 18:28:34 +0200 Subject: [PATCH 239/455] Fix typo in report_resuming_byte doc string (fixes issue #188) --- youtube-dl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube-dl b/youtube-dl index d54694954..b41b57ea8 100755 --- a/youtube-dl +++ b/youtube-dl @@ -353,7 +353,7 @@ class FileDownloader(object): (percent_str, data_len_str, speed_str, eta_str), skip_eol=True) def report_resuming_byte(self, resume_len): - """Report attemtp to resume at given byte.""" + """Report attempt to resume at given byte.""" self.to_stdout(u'[download] Resuming download at byte %s' % resume_len) def report_retry(self, count, retries): From 38ed13444a0576ea98ae5b15aa6422bb4d5737ec Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Thu, 12 Aug 2010 18:40:36 +0200 Subject: [PATCH 240/455] Improve error message on invalid output template and abort execution --- youtube-dl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube-dl b/youtube-dl index b41b57ea8..727fadea4 100755 --- a/youtube-dl +++ b/youtube-dl @@ -404,7 +404,8 @@ class FileDownloader(object): template_dict['ord'] = unicode('%05d' % self._num_downloads) filename = self.params['outtmpl'] % template_dict except (ValueError, KeyError), err: - self.trouble('ERROR: invalid output template or system charset: %s' % str(err)) + self.trouble(u'ERROR: invalid system charset or erroneous output template') + return if self.params.get('nooverwrites', False) and os.path.exists(filename): self.to_stderr(u'WARNING: file exists: %s; skipping' % filename) return From d67e097462e238f47bc9203e5bba0e5120e7f5f9 Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Thu, 12 Aug 2010 18:41:29 +0200 Subject: [PATCH 241/455] Abort download in case of error writing file data to disk --- youtube-dl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube-dl b/youtube-dl index 727fadea4..fb231ae40 100755 --- a/youtube-dl +++ b/youtube-dl @@ -594,7 +594,8 @@ class FileDownloader(object): try: stream.write(data_block) except (IOError, OSError), err: - self.trouble('\nERROR: unable to write data: %s' % str(err)) + self.trouble(u'\nERROR: unable to write data: %s' % str(err)) + return False block_size = self.best_block_size(after - before, data_block_len) # Progress message From db7e31b853d888d129c7eeb8532ac29fd704b977 Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Thu, 12 Aug 2010 18:42:26 +0200 Subject: [PATCH 242/455] Use unicode strings for several error messages that were missing the "u" --- youtube-dl | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/youtube-dl b/youtube-dl index fb231ae40..717d97d96 100755 --- a/youtube-dl +++ b/youtube-dl @@ -413,7 +413,7 @@ class FileDownloader(object): try: self.pmkdir(filename) except (OSError, IOError), err: - self.trouble('ERROR: unable to create directories: %s' % str(err)) + self.trouble(u'ERROR: unable to create directories: %s' % str(err)) return try: @@ -421,17 +421,17 @@ class FileDownloader(object): except (OSError, IOError), err: raise UnavailableVideoError except (urllib2.URLError, httplib.HTTPException, socket.error), err: - self.trouble('ERROR: unable to download video data: %s' % str(err)) + self.trouble(u'ERROR: unable to download video data: %s' % str(err)) return except (ContentTooShortError, ), err: - self.trouble('ERROR: content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded)) + self.trouble(u'ERROR: content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded)) return if success: try: self.post_process(filename, info_dict) except (PostProcessingError), err: - self.trouble('ERROR: postprocessing: %s' % str(err)) + self.trouble(u'ERROR: postprocessing: %s' % str(err)) return def download(self, url_list): @@ -456,7 +456,7 @@ class FileDownloader(object): break if not suitable_found: - self.trouble('ERROR: no suitable InfoExtractor: %s' % url) + self.trouble(u'ERROR: no suitable InfoExtractor: %s' % url) return self._download_retcode @@ -496,7 +496,7 @@ class FileDownloader(object): self.to_stdout(u'\r[rtmpdump] %s bytes' % os.path.getsize(filename)) return True else: - self.trouble('\nERROR: rtmpdump exited with code %d' % retval) + self.trouble(u'\nERROR: rtmpdump exited with code %d' % retval) return False def _do_download(self, filename, url, player_url): @@ -589,7 +589,7 @@ class FileDownloader(object): (stream, filename) = sanitize_open(filename, open_mode) self.report_destination(filename) except (OSError, IOError), err: - self.trouble('ERROR: unable to open for writing: %s' % str(err)) + self.trouble(u'ERROR: unable to open for writing: %s' % str(err)) return False try: stream.write(data_block) From c6c555cf8a3e5457f84c6650b1f96a332db50d44 Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Thu, 12 Aug 2010 19:15:26 +0200 Subject: [PATCH 243/455] Fix metacafe.com downloads for some videos (fixes issue #189) --- youtube-dl | 36 +++++++++++++++++++++++------------- 1 file changed, 23 insertions(+), 13 deletions(-) diff --git a/youtube-dl b/youtube-dl index 717d97d96..e9211b240 100755 --- a/youtube-dl +++ b/youtube-dl @@ -1036,20 +1036,30 @@ class MetacafeIE(InfoExtractor): # Extract URL, uploader and title from webpage self.report_extraction(video_id) mobj = re.search(r'(?m)&mediaURL=([^&]+)', webpage) - if mobj is None: - self._downloader.trouble(u'ERROR: unable to extract media URL') - return - mediaURL = urllib.unquote(mobj.group(1)) - - # Extract gdaKey if available - mobj = re.search(r'(?m)&gdaKey=(.*?)&', webpage) - if mobj is None: - video_url = mediaURL - #self._downloader.trouble(u'ERROR: unable to extract gdaKey') - #return + if mobj is not None: + mediaURL = urllib.unquote(mobj.group(1)) + + # Extract gdaKey if available + mobj = re.search(r'(?m)&gdaKey=(.*?)&', webpage) + if mobj is None: + video_url = mediaURL + else: + gdaKey = mobj.group(1) + video_url = '%s?__gda__=%s' % (mediaURL, gdaKey) else: - gdaKey = mobj.group(1) - video_url = '%s?__gda__=%s' % (mediaURL, gdaKey) + mobj = re.search(r' name="flashvars" value="(.*?)"', webpage) + if mobj is None: + self._downloader.trouble(u'ERROR: unable to extract media URL') + return + vardict = parse_qs(mobj.group(1)) + if 'mediaData' not in vardict: + self._downloader.trouble(u'ERROR: unable to extract media URL') + return + mobj = re.search(r'"mediaURL":"(http.*?)","key":"(.*?)"', vardict['mediaData'][0]) + if mobj is None: + self._downloader.trouble(u'ERROR: unable to extract media URL') + return + video_url = '%s?__gda__=%s' % (mobj.group(1).replace('\\/', '/'), mobj.group(2)) mobj = re.search(r'(?im)(.*) - Video', webpage) if mobj is None: From 6b57e8c5ac169f3406eafcbe5db095242c811748 Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Thu, 12 Aug 2010 19:21:06 +0200 Subject: [PATCH 244/455] Extract the video extension from the media URL in metacafe.com --- youtube-dl | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/youtube-dl b/youtube-dl index e9211b240..7bb8146e0 100755 --- a/youtube-dl +++ b/youtube-dl @@ -1022,7 +1022,6 @@ class MetacafeIE(InfoExtractor): self._downloader.increment_downloads() simple_title = mobj.group(2).decode('utf-8') - video_extension = 'flv' # Retrieve video webpage to extract further information request = urllib2.Request('http://www.metacafe.com/watch/%s/' % video_id) @@ -1038,6 +1037,7 @@ class MetacafeIE(InfoExtractor): mobj = re.search(r'(?m)&mediaURL=([^&]+)', webpage) if mobj is not None: mediaURL = urllib.unquote(mobj.group(1)) + video_extension = mediaURL[-3:] # Extract gdaKey if available mobj = re.search(r'(?m)&gdaKey=(.*?)&', webpage) @@ -1059,7 +1059,9 @@ class MetacafeIE(InfoExtractor): if mobj is None: self._downloader.trouble(u'ERROR: unable to extract media URL') return - video_url = '%s?__gda__=%s' % (mobj.group(1).replace('\\/', '/'), mobj.group(2)) + mediaURL = mobj.group(1).replace('\\/', '/') + video_extension = mediaURL[-3:] + video_url = '%s?__gda__=%s' % (mediaURL, mobj.group(2)) mobj = re.search(r'(?im)(.*) - Video', webpage) if mobj is None: From 2933532c5bd4c49d325f3fa485e0b3c9ea080952 Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Sat, 14 Aug 2010 12:15:31 +0200 Subject: [PATCH 245/455] Allow the #! notation for YouTube URLs found in many links in their website --- youtube-dl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube-dl b/youtube-dl index 7bb8146e0..d7b30adca 100755 --- a/youtube-dl +++ b/youtube-dl @@ -686,7 +686,7 @@ class InfoExtractor(object): class YoutubeIE(InfoExtractor): """Information extractor for youtube.com.""" - _VALID_URL = r'^((?:http://)?(?:youtu\.be/|(?:\w+\.)?youtube\.com/(?:(?:v/)|(?:(?:watch(?:_popup)?(?:\.php)?)?[\?#](?:.+&)?v=))))?([0-9A-Za-z_-]+)(?(1).+)?$' + _VALID_URL = r'^((?:http://)?(?:youtu\.be/|(?:\w+\.)?youtube\.com/(?:(?:v/)|(?:(?:watch(?:_popup)?(?:\.php)?)?(?:\?|#!?)(?:.+&)?v=))))?([0-9A-Za-z_-]+)(?(1).+)?$' _LANG_URL = r'http://www.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1' _LOGIN_URL = 'http://www.youtube.com/signup?next=/&gl=US&hl=en' _AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en' From 8e686771af73876f93e44c52dc0dcaf99a56b6f7 Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Sun, 22 Aug 2010 00:48:55 +0200 Subject: [PATCH 246/455] Decode the reason given on YouTube errors to avoid crashes (fixes issue #193) --- youtube-dl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube-dl b/youtube-dl index d7b30adca..c850f26b3 100755 --- a/youtube-dl +++ b/youtube-dl @@ -849,7 +849,7 @@ class YoutubeIE(InfoExtractor): return if 'token' not in video_info: if 'reason' in video_info: - self._downloader.trouble(u'ERROR: YouTube said: %s' % video_info['reason'][0]) + self._downloader.trouble(u'ERROR: YouTube said: %s' % video_info['reason'][0].decode('utf-8')) else: self._downloader.trouble(u'ERROR: "token" parameter not in video info for unknown reason') return From 33407be7d69a5d59e72d8e6eff1e785c9810db4d Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Sat, 11 Sep 2010 09:47:21 +0200 Subject: [PATCH 247/455] Fix "unable to extract uploader nickname" error with Dailymotion --- youtube-dl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube-dl b/youtube-dl index c850f26b3..379033621 100755 --- a/youtube-dl +++ b/youtube-dl @@ -1158,7 +1158,7 @@ class DailymotionIE(InfoExtractor): video_title = mobj.group(1).decode('utf-8') video_title = sanitize_title(video_title) - mobj = re.search(r'(?im)', webpage) + mobj = re.search(r'(?im)
    .*?(.+?)', webpage) if mobj is None: self._downloader.trouble(u'ERROR: unable to extract uploader nickname') return From 5c44af187542ff7d2334e465f4d6bef56d92f2b1 Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Sat, 11 Sep 2010 09:52:25 +0200 Subject: [PATCH 248/455] Do not print file name on warning message (fixes issue #197) --- youtube-dl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube-dl b/youtube-dl index 379033621..9c1ead646 100755 --- a/youtube-dl +++ b/youtube-dl @@ -407,7 +407,7 @@ class FileDownloader(object): self.trouble(u'ERROR: invalid system charset or erroneous output template') return if self.params.get('nooverwrites', False) and os.path.exists(filename): - self.to_stderr(u'WARNING: file exists: %s; skipping' % filename) + self.to_stderr(u'WARNING: file exists and will be skipped') return try: From bbd4bb037a665234a6521569ad85437651eaa65d Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Sat, 11 Sep 2010 09:54:17 +0200 Subject: [PATCH 249/455] Support the -nocookie suffix in youtube domain name (fixes issue #200) --- youtube-dl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube-dl b/youtube-dl index 9c1ead646..315bfad1e 100755 --- a/youtube-dl +++ b/youtube-dl @@ -686,7 +686,7 @@ class InfoExtractor(object): class YoutubeIE(InfoExtractor): """Information extractor for youtube.com.""" - _VALID_URL = r'^((?:http://)?(?:youtu\.be/|(?:\w+\.)?youtube\.com/(?:(?:v/)|(?:(?:watch(?:_popup)?(?:\.php)?)?(?:\?|#!?)(?:.+&)?v=))))?([0-9A-Za-z_-]+)(?(1).+)?$' + _VALID_URL = r'^((?:http://)?(?:youtu\.be/|(?:\w+\.)?youtube(?:-nocookie)?\.com/(?:(?:v/)|(?:(?:watch(?:_popup)?(?:\.php)?)?(?:\?|#!?)(?:.+&)?v=))))?([0-9A-Za-z_-]+)(?(1).+)?$' _LANG_URL = r'http://www.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1' _LOGIN_URL = 'http://www.youtube.com/signup?next=/&gl=US&hl=en' _AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en' From e86e9474bfdd2cb12f70d34e5cd6d2ff20171887 Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Sat, 11 Sep 2010 09:58:34 +0200 Subject: [PATCH 250/455] Treat HTTP error 500 the same way as 503 (fixes issue #209) --- youtube-dl | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/youtube-dl b/youtube-dl index 315bfad1e..54881a4d8 100755 --- a/youtube-dl +++ b/youtube-dl @@ -194,7 +194,7 @@ class FileDownloader(object): ignoreerrors: Do not stop on download errors. ratelimit: Download speed limit, in bytes/sec. nooverwrites: Prevent overwriting files. - retries: Number of times to retry for HTTP error 503 + retries: Number of times to retry for HTTP error 5xx continuedl: Try to continue downloads if possible. noprogress: Do not print the progress bar. """ @@ -357,8 +357,8 @@ class FileDownloader(object): self.to_stdout(u'[download] Resuming download at byte %s' % resume_len) def report_retry(self, count, retries): - """Report retry in case of HTTP error 503""" - self.to_stdout(u'[download] Got HTTP error 503. Retrying (attempt %d of %d)...' % (count, retries)) + """Report retry in case of HTTP error 5xx""" + self.to_stdout(u'[download] Got server HTTP error. Retrying (attempt %d of %d)...' % (count, retries)) def report_file_already_downloaded(self, file_name): """Report file has already been fully downloaded.""" @@ -529,7 +529,7 @@ class FileDownloader(object): data = urllib2.urlopen(request) break except (urllib2.HTTPError, ), err: - if err.code != 503 and err.code != 416: + if err.code != 500 and err.code != 503 and err.code != 416: # Unexpected HTTP error raise elif err.code == 416: @@ -539,7 +539,7 @@ class FileDownloader(object): data = urllib2.urlopen(basic_request) content_length = data.info()['Content-Length'] except (urllib2.HTTPError, ), err: - if err.code != 503: + if err.code != 503 and err.code != 500: raise else: # Examine the reported length From ac249f421f5d7a9d9272d478f2f5e246ed497c47 Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Sun, 3 Oct 2010 11:05:20 +0200 Subject: [PATCH 251/455] Retry on any 5xx server error --- youtube-dl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube-dl b/youtube-dl index 54881a4d8..48258419f 100755 --- a/youtube-dl +++ b/youtube-dl @@ -529,7 +529,7 @@ class FileDownloader(object): data = urllib2.urlopen(request) break except (urllib2.HTTPError, ), err: - if err.code != 500 and err.code != 503 and err.code != 416: + if (err.code < 500 or err.code >= 600) and err.code != 416: # Unexpected HTTP error raise elif err.code == 416: @@ -539,7 +539,7 @@ class FileDownloader(object): data = urllib2.urlopen(basic_request) content_length = data.info()['Content-Length'] except (urllib2.HTTPError, ), err: - if err.code != 503 and err.code != 500: + if err.code < 500 or err.code >= 600: raise else: # Examine the reported length From f79007e542aad11469163952acacd6f878910cf8 Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Sun, 3 Oct 2010 11:11:59 +0200 Subject: [PATCH 252/455] Bump version number --- LATEST_VERSION | 2 +- youtube-dl | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/LATEST_VERSION b/LATEST_VERSION index a4b357ab0..92947daa5 100644 --- a/LATEST_VERSION +++ b/LATEST_VERSION @@ -1 +1 @@ -2010.08.04 +2010.10.03 diff --git a/youtube-dl b/youtube-dl index 48258419f..51344f27b 100755 --- a/youtube-dl +++ b/youtube-dl @@ -2101,7 +2101,7 @@ if __name__ == '__main__': # Parse command line parser = optparse.OptionParser( usage='Usage: %prog [options] url...', - version='2010.08.04', + version='2010.10.03', conflict_handler='resolve', ) From 7df4635faf855c4d114264557704f6de94867df3 Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Sat, 9 Oct 2010 12:28:15 +0200 Subject: [PATCH 253/455] Use HTTPS for the login URL (fixes issue #163) --- youtube-dl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube-dl b/youtube-dl index 51344f27b..f8e921133 100755 --- a/youtube-dl +++ b/youtube-dl @@ -688,7 +688,7 @@ class YoutubeIE(InfoExtractor): _VALID_URL = r'^((?:http://)?(?:youtu\.be/|(?:\w+\.)?youtube(?:-nocookie)?\.com/(?:(?:v/)|(?:(?:watch(?:_popup)?(?:\.php)?)?(?:\?|#!?)(?:.+&)?v=))))?([0-9A-Za-z_-]+)(?(1).+)?$' _LANG_URL = r'http://www.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1' - _LOGIN_URL = 'http://www.youtube.com/signup?next=/&gl=US&hl=en' + _LOGIN_URL = 'https://www.youtube.com/signup?next=/&gl=US&hl=en' _AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en' _NETRC_MACHINE = 'youtube' # Listed in order of quality From a949a3ae6bda6efa3abc3770e49be34653cc613b Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Mon, 11 Oct 2010 11:21:28 +0200 Subject: [PATCH 254/455] Support "https" in YouTube video URLs (fixes issue #215) --- youtube-dl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube-dl b/youtube-dl index f8e921133..094a4164c 100755 --- a/youtube-dl +++ b/youtube-dl @@ -686,7 +686,7 @@ class InfoExtractor(object): class YoutubeIE(InfoExtractor): """Information extractor for youtube.com.""" - _VALID_URL = r'^((?:http://)?(?:youtu\.be/|(?:\w+\.)?youtube(?:-nocookie)?\.com/(?:(?:v/)|(?:(?:watch(?:_popup)?(?:\.php)?)?(?:\?|#!?)(?:.+&)?v=))))?([0-9A-Za-z_-]+)(?(1).+)?$' + _VALID_URL = r'^((?:https?://)?(?:youtu\.be/|(?:\w+\.)?youtube(?:-nocookie)?\.com/(?:(?:v/)|(?:(?:watch(?:_popup)?(?:\.php)?)?(?:\?|#!?)(?:.+&)?v=))))?([0-9A-Za-z_-]+)(?(1).+)?$' _LANG_URL = r'http://www.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1' _LOGIN_URL = 'https://www.youtube.com/signup?next=/&gl=US&hl=en' _AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en' From e08878f498c69567b213d781da02187869285a5f Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Sat, 23 Oct 2010 12:22:42 +0200 Subject: [PATCH 255/455] Set stdout to binary mode under Windows (fixes issue #218) --- youtube-dl | 3 +++ 1 file changed, 3 insertions(+) diff --git a/youtube-dl b/youtube-dl index 094a4164c..8494be96c 100755 --- a/youtube-dl +++ b/youtube-dl @@ -94,6 +94,9 @@ def sanitize_open(filename, open_mode): """ try: if filename == u'-': + if sys.platform == 'win32': + import msvcrt + msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY) return (sys.stdout, filename) stream = open(filename, open_mode) return (stream, filename) From 80066952bc9f83e3974d2b01d58ba61541238a2b Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Sat, 23 Oct 2010 12:54:00 +0200 Subject: [PATCH 256/455] Add new --cookies option to be able to save cookies to disk (fixes issue #208) --- youtube-dl | 66 +++++++++++++++++++++++++++++++++++++----------------- 1 file changed, 45 insertions(+), 21 deletions(-) diff --git a/youtube-dl b/youtube-dl index 8494be96c..26cdc6ef2 100755 --- a/youtube-dl +++ b/youtube-dl @@ -4,6 +4,7 @@ # Author: Danny Colligan # Author: Benjamin Johnson # License: Public domain code +import cookielib import htmlentitydefs import httplib import locale @@ -184,22 +185,25 @@ class FileDownloader(object): Available options: - username: Username for authentication purposes. - password: Password for authentication purposes. - usenetrc: Use netrc for authentication instead. - quiet: Do not print messages to stdout. - forceurl: Force printing final URL. - forcetitle: Force printing title. - simulate: Do not download the video files. - format: Video format code. - format_limit: Highest quality format to try. - outtmpl: Template for output names. - ignoreerrors: Do not stop on download errors. - ratelimit: Download speed limit, in bytes/sec. - nooverwrites: Prevent overwriting files. - retries: Number of times to retry for HTTP error 5xx - continuedl: Try to continue downloads if possible. - noprogress: Do not print the progress bar. + username: Username for authentication purposes. + password: Password for authentication purposes. + usenetrc: Use netrc for authentication instead. + quiet: Do not print messages to stdout. + forceurl: Force printing final URL. + forcetitle: Force printing title. + forcethumbnail: Force printing thumbnail URL. + forcedescription: Force printing description. + simulate: Do not download the video files. + format: Video format code. + format_limit: Highest quality format to try. + outtmpl: Template for output names. + ignoreerrors: Do not stop on download errors. + ratelimit: Download speed limit, in bytes/sec. + nooverwrites: Prevent overwriting files. + retries: Number of times to retry for HTTP error 5xx + continuedl: Try to continue downloads if possible. + noprogress: Do not print the progress bar. + playliststart: Playlist item to start at. """ params = None @@ -2096,11 +2100,6 @@ if __name__ == '__main__': stream.close() downloader.to_stdout('Updated to version %s' % latest_version) - # General configuration - urllib2.install_opener(urllib2.build_opener(urllib2.ProxyHandler())) - urllib2.install_opener(urllib2.build_opener(urllib2.HTTPCookieProcessor())) - socket.setdefaulttimeout(300) # 5 minutes should be enough (famous last words) - # Parse command line parser = optparse.OptionParser( usage='Usage: %prog [options] url...', @@ -2175,10 +2174,27 @@ if __name__ == '__main__': action='store_true', dest='nooverwrites', help='do not overwrite files', default=False) filesystem.add_option('-c', '--continue', action='store_true', dest='continue_dl', help='resume partially downloaded files', default=False) + filesystem.add_option('--cookies', + dest='cookiefile', metavar='FILE', help='file to dump cookie jar to') parser.add_option_group(filesystem) (opts, args) = parser.parse_args() + # Open appropriate CookieJar + if opts.cookiefile is None: + jar = cookielib.CookieJar() + else: + try: + jar = cookielib.MozillaCookieJar(opts.cookiefile) + except (IOError, OSError), err: + sys.exit(u'ERROR: unable to open cookie file') + + # General configuration + cookie_processor = urllib2.HTTPCookieProcessor(jar) + urllib2.install_opener(urllib2.build_opener(urllib2.ProxyHandler())) + urllib2.install_opener(urllib2.build_opener(cookie_processor)) + socket.setdefaulttimeout(300) # 5 minutes should be enough (famous last words) + # Batch file verification batchurls = [] if opts.batchfile is not None: @@ -2292,6 +2308,14 @@ if __name__ == '__main__': else: sys.exit() retcode = fd.download(all_urls) + + # Dump cookie jar if requested + if opts.cookiefile is not None: + try: + jar.save() + except (IOError, OSError), err: + sys.exit(u'ERROR: unable to save cookie jar') + sys.exit(retcode) except DownloadError: From 331ce0a05d08f39f93d1568b37f3d3252c90b86e Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Sat, 23 Oct 2010 13:19:26 +0200 Subject: [PATCH 257/455] Use stderr for output when the output file is "-" (fixes issue #216) --- youtube-dl | 93 ++++++++++++++++++++++++++++-------------------------- 1 file changed, 49 insertions(+), 44 deletions(-) diff --git a/youtube-dl b/youtube-dl index 26cdc6ef2..e681e2d3f 100755 --- a/youtube-dl +++ b/youtube-dl @@ -204,6 +204,7 @@ class FileDownloader(object): continuedl: Try to continue downloads if possible. noprogress: Do not print the progress bar. playliststart: Playlist item to start at. + logtostderr: Log messages to stderr instead of stdout. """ params = None @@ -211,6 +212,7 @@ class FileDownloader(object): _pps = [] _download_retcode = None _num_downloads = None + _screen_file = None def __init__(self, params): """Create a FileDownloader object with the given options.""" @@ -218,6 +220,7 @@ class FileDownloader(object): self._pps = [] self._download_retcode = 0 self._num_downloads = 0 + self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)] self.params = params @staticmethod @@ -304,12 +307,13 @@ class FileDownloader(object): self._pps.append(pp) pp.set_downloader(self) - def to_stdout(self, message, skip_eol=False, ignore_encoding_errors=False): + def to_screen(self, message, skip_eol=False, ignore_encoding_errors=False): """Print message to stdout if not in quiet mode.""" try: if not self.params.get('quiet', False): - print (u'%s%s' % (message, [u'\n', u''][skip_eol])).encode(preferredencoding()), - sys.stdout.flush() + terminator = [u'\n', u''][skip_eol] + print >>self._screen_file, (u'%s%s' % (message, terminator)).encode(preferredencoding()), + self._screen_file.flush() except (UnicodeEncodeError), err: if not ignore_encoding_errors: raise @@ -350,40 +354,40 @@ class FileDownloader(object): def report_destination(self, filename): """Report destination filename.""" - self.to_stdout(u'[download] Destination: %s' % filename, ignore_encoding_errors=True) + self.to_screen(u'[download] Destination: %s' % filename, ignore_encoding_errors=True) def report_progress(self, percent_str, data_len_str, speed_str, eta_str): """Report download progress.""" if self.params.get('noprogress', False): return - self.to_stdout(u'\r[download] %s of %s at %s ETA %s' % + self.to_screen(u'\r[download] %s of %s at %s ETA %s' % (percent_str, data_len_str, speed_str, eta_str), skip_eol=True) def report_resuming_byte(self, resume_len): """Report attempt to resume at given byte.""" - self.to_stdout(u'[download] Resuming download at byte %s' % resume_len) + self.to_screen(u'[download] Resuming download at byte %s' % resume_len) def report_retry(self, count, retries): """Report retry in case of HTTP error 5xx""" - self.to_stdout(u'[download] Got server HTTP error. Retrying (attempt %d of %d)...' % (count, retries)) + self.to_screen(u'[download] Got server HTTP error. Retrying (attempt %d of %d)...' % (count, retries)) def report_file_already_downloaded(self, file_name): """Report file has already been fully downloaded.""" try: - self.to_stdout(u'[download] %s has already been downloaded' % file_name) + self.to_screen(u'[download] %s has already been downloaded' % file_name) except (UnicodeEncodeError), err: - self.to_stdout(u'[download] The file has already been downloaded') + self.to_screen(u'[download] The file has already been downloaded') def report_unable_to_resume(self): """Report it was impossible to resume download.""" - self.to_stdout(u'[download] Unable to resume') + self.to_screen(u'[download] Unable to resume') def report_finish(self): """Report download finished.""" if self.params.get('noprogress', False): - self.to_stdout(u'[download] Download completed') + self.to_screen(u'[download] Download completed') else: - self.to_stdout(u'') + self.to_screen(u'') def increment_downloads(self): """Increment the ordinal that assigns a number to each file.""" @@ -493,14 +497,14 @@ class FileDownloader(object): retval = subprocess.call(basic_args + [[], ['-e', '-k', '1']][self.params.get('continuedl', False)]) while retval == 2 or retval == 1: prevsize = os.path.getsize(filename) - self.to_stdout(u'\r[rtmpdump] %s bytes' % prevsize, skip_eol=True) + self.to_screen(u'\r[rtmpdump] %s bytes' % prevsize, skip_eol=True) time.sleep(5.0) # This seems to be needed retval = subprocess.call(basic_args + ['-e'] + [[], ['-k', '1']][retval == 1]) cursize = os.path.getsize(filename) if prevsize == cursize and retval == 1: break if retval == 0: - self.to_stdout(u'\r[rtmpdump] %s bytes' % os.path.getsize(filename)) + self.to_screen(u'\r[rtmpdump] %s bytes' % os.path.getsize(filename)) return True else: self.trouble(u'\nERROR: rtmpdump exited with code %d' % retval) @@ -717,35 +721,35 @@ class YoutubeIE(InfoExtractor): def report_lang(self): """Report attempt to set language.""" - self._downloader.to_stdout(u'[youtube] Setting language') + self._downloader.to_screen(u'[youtube] Setting language') def report_login(self): """Report attempt to log in.""" - self._downloader.to_stdout(u'[youtube] Logging in') + self._downloader.to_screen(u'[youtube] Logging in') def report_age_confirmation(self): """Report attempt to confirm age.""" - self._downloader.to_stdout(u'[youtube] Confirming age') + self._downloader.to_screen(u'[youtube] Confirming age') def report_video_webpage_download(self, video_id): """Report attempt to download video webpage.""" - self._downloader.to_stdout(u'[youtube] %s: Downloading video webpage' % video_id) + self._downloader.to_screen(u'[youtube] %s: Downloading video webpage' % video_id) def report_video_info_webpage_download(self, video_id): """Report attempt to download video info webpage.""" - self._downloader.to_stdout(u'[youtube] %s: Downloading video info webpage' % video_id) + self._downloader.to_screen(u'[youtube] %s: Downloading video info webpage' % video_id) def report_information_extraction(self, video_id): """Report attempt to extract video information.""" - self._downloader.to_stdout(u'[youtube] %s: Extracting video information' % video_id) + self._downloader.to_screen(u'[youtube] %s: Extracting video information' % video_id) def report_unavailable_format(self, video_id, format): """Report extracted video URL.""" - self._downloader.to_stdout(u'[youtube] %s: Format %s not available' % (video_id, format)) + self._downloader.to_screen(u'[youtube] %s: Format %s not available' % (video_id, format)) def report_rtmp_download(self): """Indicate the download will use the RTMP protocol.""" - self._downloader.to_stdout(u'[youtube] RTMP download detected') + self._downloader.to_screen(u'[youtube] RTMP download detected') def _real_initialize(self): if self._downloader is None: @@ -973,19 +977,19 @@ class MetacafeIE(InfoExtractor): def report_disclaimer(self): """Report disclaimer retrieval.""" - self._downloader.to_stdout(u'[metacafe] Retrieving disclaimer') + self._downloader.to_screen(u'[metacafe] Retrieving disclaimer') def report_age_confirmation(self): """Report attempt to confirm age.""" - self._downloader.to_stdout(u'[metacafe] Confirming age') + self._downloader.to_screen(u'[metacafe] Confirming age') def report_download_webpage(self, video_id): """Report webpage download.""" - self._downloader.to_stdout(u'[metacafe] %s: Downloading webpage' % video_id) + self._downloader.to_screen(u'[metacafe] %s: Downloading webpage' % video_id) def report_extraction(self, video_id): """Report information extraction.""" - self._downloader.to_stdout(u'[metacafe] %s: Extracting information' % video_id) + self._downloader.to_screen(u'[metacafe] %s: Extracting information' % video_id) def _real_initialize(self): # Retrieve disclaimer @@ -1113,11 +1117,11 @@ class DailymotionIE(InfoExtractor): def report_download_webpage(self, video_id): """Report webpage download.""" - self._downloader.to_stdout(u'[dailymotion] %s: Downloading webpage' % video_id) + self._downloader.to_screen(u'[dailymotion] %s: Downloading webpage' % video_id) def report_extraction(self, video_id): """Report information extraction.""" - self._downloader.to_stdout(u'[dailymotion] %s: Extracting information' % video_id) + self._downloader.to_screen(u'[dailymotion] %s: Extracting information' % video_id) def _real_initialize(self): return @@ -1200,11 +1204,11 @@ class GoogleIE(InfoExtractor): def report_download_webpage(self, video_id): """Report webpage download.""" - self._downloader.to_stdout(u'[video.google] %s: Downloading webpage' % video_id) + self._downloader.to_screen(u'[video.google] %s: Downloading webpage' % video_id) def report_extraction(self, video_id): """Report information extraction.""" - self._downloader.to_stdout(u'[video.google] %s: Extracting information' % video_id) + self._downloader.to_screen(u'[video.google] %s: Extracting information' % video_id) def _real_initialize(self): return @@ -1310,11 +1314,11 @@ class PhotobucketIE(InfoExtractor): def report_download_webpage(self, video_id): """Report webpage download.""" - self._downloader.to_stdout(u'[photobucket] %s: Downloading webpage' % video_id) + self._downloader.to_screen(u'[photobucket] %s: Downloading webpage' % video_id) def report_extraction(self, video_id): """Report information extraction.""" - self._downloader.to_stdout(u'[photobucket] %s: Extracting information' % video_id) + self._downloader.to_screen(u'[photobucket] %s: Extracting information' % video_id) def _real_initialize(self): return @@ -1394,11 +1398,11 @@ class YahooIE(InfoExtractor): def report_download_webpage(self, video_id): """Report webpage download.""" - self._downloader.to_stdout(u'[video.yahoo] %s: Downloading webpage' % video_id) + self._downloader.to_screen(u'[video.yahoo] %s: Downloading webpage' % video_id) def report_extraction(self, video_id): """Report information extraction.""" - self._downloader.to_stdout(u'[video.yahoo] %s: Extracting information' % video_id) + self._downloader.to_screen(u'[video.yahoo] %s: Extracting information' % video_id) def _real_initialize(self): return @@ -1546,12 +1550,12 @@ class GenericIE(InfoExtractor): def report_download_webpage(self, video_id): """Report webpage download.""" - self._downloader.to_stdout(u'WARNING: Falling back on generic information extractor.') - self._downloader.to_stdout(u'[generic] %s: Downloading webpage' % video_id) + self._downloader.to_screen(u'WARNING: Falling back on generic information extractor.') + self._downloader.to_screen(u'[generic] %s: Downloading webpage' % video_id) def report_extraction(self, video_id): """Report information extraction.""" - self._downloader.to_stdout(u'[generic] %s: Extracting information' % video_id) + self._downloader.to_screen(u'[generic] %s: Extracting information' % video_id) def _real_initialize(self): return @@ -1653,7 +1657,7 @@ class YoutubeSearchIE(InfoExtractor): def report_download_page(self, query, pagenum): """Report attempt to download playlist page with given number.""" query = query.decode(preferredencoding()) - self._downloader.to_stdout(u'[youtube] query "%s": Downloading page %s' % (query, pagenum)) + self._downloader.to_screen(u'[youtube] query "%s": Downloading page %s' % (query, pagenum)) def _real_initialize(self): self._youtube_ie.initialize() @@ -1744,7 +1748,7 @@ class GoogleSearchIE(InfoExtractor): def report_download_page(self, query, pagenum): """Report attempt to download playlist page with given number.""" query = query.decode(preferredencoding()) - self._downloader.to_stdout(u'[video.google] query "%s": Downloading page %s' % (query, pagenum)) + self._downloader.to_screen(u'[video.google] query "%s": Downloading page %s' % (query, pagenum)) def _real_initialize(self): self._google_ie.initialize() @@ -1835,7 +1839,7 @@ class YahooSearchIE(InfoExtractor): def report_download_page(self, query, pagenum): """Report attempt to download playlist page with given number.""" query = query.decode(preferredencoding()) - self._downloader.to_stdout(u'[video.yahoo] query "%s": Downloading page %s' % (query, pagenum)) + self._downloader.to_screen(u'[video.yahoo] query "%s": Downloading page %s' % (query, pagenum)) def _real_initialize(self): self._yahoo_ie.initialize() @@ -1925,7 +1929,7 @@ class YoutubePlaylistIE(InfoExtractor): def report_download_page(self, playlist_id, pagenum): """Report attempt to download playlist page with given number.""" - self._downloader.to_stdout(u'[youtube] PL %s: Downloading page #%s' % (playlist_id, pagenum)) + self._downloader.to_screen(u'[youtube] PL %s: Downloading page #%s' % (playlist_id, pagenum)) def _real_initialize(self): self._youtube_ie.initialize() @@ -1989,7 +1993,7 @@ class YoutubeUserIE(InfoExtractor): def report_download_page(self, username): """Report attempt to download user page.""" - self._downloader.to_stdout(u'[youtube] user %s: Downloading page ' % (username)) + self._downloader.to_screen(u'[youtube] user %s: Downloading page ' % (username)) def _real_initialize(self): self._youtube_ie.initialize() @@ -2090,7 +2094,7 @@ if __name__ == '__main__': if not os.access (filename, os.W_OK): sys.exit('ERROR: no write permissions on %s' % filename) - downloader.to_stdout('Updating to latest stable version...') + downloader.to_screen('Updating to latest stable version...') latest_url = 'http://bitbucket.org/rg3/youtube-dl/raw/tip/LATEST_VERSION' latest_version = urllib.urlopen(latest_url).read().strip() prog_url = 'http://bitbucket.org/rg3/youtube-dl/raw/%s/youtube-dl' % latest_version @@ -2098,7 +2102,7 @@ if __name__ == '__main__': stream = open(filename, 'w') stream.write(newcontent) stream.close() - downloader.to_stdout('Updated to version %s' % latest_version) + downloader.to_screen('Updated to version %s' % latest_version) # Parse command line parser = optparse.OptionParser( @@ -2280,6 +2284,7 @@ if __name__ == '__main__': 'continuedl': opts.continue_dl, 'noprogress': opts.noprogress, 'playliststart': opts.playliststart, + 'logtostderr': opts.outtmpl == '-', }) fd.add_info_extractor(youtube_search_ie) fd.add_info_extractor(youtube_pl_ie) From e0c982c8d0b0ca76482c6184ec3e749bbf566b6d Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Sun, 24 Oct 2010 17:31:33 +0200 Subject: [PATCH 258/455] Load cookies if the cookie file exists when starting the program --- youtube-dl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/youtube-dl b/youtube-dl index e681e2d3f..2a51ef421 100755 --- a/youtube-dl +++ b/youtube-dl @@ -2190,6 +2190,8 @@ if __name__ == '__main__': else: try: jar = cookielib.MozillaCookieJar(opts.cookiefile) + if os.path.isfile(opts.cookiefile) and os.access(opts.cookiefile, os.R_OK): + jar.load() except (IOError, OSError), err: sys.exit(u'ERROR: unable to open cookie file') From a6a61601de47e7347cd9d593086367f0f2d835c1 Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Sun, 24 Oct 2010 17:35:08 +0200 Subject: [PATCH 259/455] Update User-Agent string --- youtube-dl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube-dl b/youtube-dl index 2a51ef421..9ae0a4747 100755 --- a/youtube-dl +++ b/youtube-dl @@ -28,7 +28,7 @@ except ImportError: from cgi import parse_qs std_headers = { - 'User-Agent': 'Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.9.2.8) Gecko/20100723 Firefox/3.6.8', + 'User-Agent': 'Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.9.2.11) Gecko/20101019 Firefox/3.6.11', 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 'Accept-Language': 'en-us,en;q=0.5', From c34e358456fea1fffe9e3b14adc19137c04c8f26 Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Sun, 24 Oct 2010 17:35:47 +0200 Subject: [PATCH 260/455] Bump version number --- LATEST_VERSION | 2 +- youtube-dl | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/LATEST_VERSION b/LATEST_VERSION index 92947daa5..8418210b7 100644 --- a/LATEST_VERSION +++ b/LATEST_VERSION @@ -1 +1 @@ -2010.10.03 +2010.10.24 diff --git a/youtube-dl b/youtube-dl index 9ae0a4747..466b506b5 100755 --- a/youtube-dl +++ b/youtube-dl @@ -2107,7 +2107,7 @@ if __name__ == '__main__': # Parse command line parser = optparse.OptionParser( usage='Usage: %prog [options] url...', - version='2010.10.03', + version='2010.10.24', conflict_handler='resolve', ) From 893a13df55d8eadbd64fd64e2f985d039b46facd Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Sun, 31 Oct 2010 15:46:58 +0100 Subject: [PATCH 261/455] Modify autoupdate URLs to match the ones from github.com --- youtube-dl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube-dl b/youtube-dl index 466b506b5..f445c4de6 100755 --- a/youtube-dl +++ b/youtube-dl @@ -2095,9 +2095,9 @@ if __name__ == '__main__': sys.exit('ERROR: no write permissions on %s' % filename) downloader.to_screen('Updating to latest stable version...') - latest_url = 'http://bitbucket.org/rg3/youtube-dl/raw/tip/LATEST_VERSION' + latest_url = 'http://github.com/rg3/youtube-dl/raw/master/LATEST_VERSION' latest_version = urllib.urlopen(latest_url).read().strip() - prog_url = 'http://bitbucket.org/rg3/youtube-dl/raw/%s/youtube-dl' % latest_version + prog_url = 'http://github.com/rg3/youtube-dl/raw/%s/youtube-dl' % latest_version newcontent = urllib.urlopen(prog_url).read() stream = open(filename, 'w') stream.write(newcontent) From 8cc4434116456d71afeeb519cbaa6919d7dc7b89 Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Thu, 4 Nov 2010 23:19:09 +0100 Subject: [PATCH 262/455] Add playlist-end option (courtesy of Nevar Angelo) --- youtube-dl | 37 +++++++++++++++++++++++-------------- 1 file changed, 23 insertions(+), 14 deletions(-) diff --git a/youtube-dl b/youtube-dl index f445c4de6..b099ffd15 100755 --- a/youtube-dl +++ b/youtube-dl @@ -204,6 +204,7 @@ class FileDownloader(object): continuedl: Try to continue downloads if possible. noprogress: Do not print the progress bar. playliststart: Playlist item to start at. + playlistend: Playlist item to end at. logtostderr: Log messages to stderr instead of stdout. """ @@ -1966,11 +1967,10 @@ class YoutubePlaylistIE(InfoExtractor): break pagenum = pagenum + 1 - playliststart = self._downloader.params.get('playliststart', 1) - playliststart -= 1 #our arrays are zero-based but the playlist is 1-based - if playliststart > 0: - video_ids = video_ids[playliststart:] - + playliststart = self._downloader.params.get('playliststart', 1) - 1 + playlistend = self._downloader.params.get('playlistend', -1) + video_ids = video_ids[playliststart:playlistend] + for id in video_ids: self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id) return @@ -2026,10 +2026,9 @@ class YoutubeUserIE(InfoExtractor): ids_in_page.append(mobj.group(1)) video_ids.extend(ids_in_page) - playliststart = self._downloader.params.get('playliststart', 1) - playliststart = playliststart-1 #our arrays are zero-based but the playlist is 1-based - if playliststart > 0: - video_ids = video_ids[playliststart:] + playliststart = self._downloader.params.get('playliststart', 1) - 1 + playlistend = self._downloader.params.get('playlistend', -1) + video_ids = video_ids[playliststart:playlistend] for id in video_ids: self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id) @@ -2125,6 +2124,8 @@ if __name__ == '__main__': dest='retries', metavar='RETRIES', help='number of retries (default is 10)', default=10) parser.add_option('--playlist-start', dest='playliststart', metavar='NUMBER', help='playlist video to start at (default is 1)', default=1) + parser.add_option('--playlist-end', + dest='playlistend', metavar='NUMBER', help='playlist video to end at (default is last)', default=-1) authentication = optparse.OptionGroup(parser, 'Authentication Options') authentication.add_option('-u', '--username', @@ -2239,11 +2240,18 @@ if __name__ == '__main__': opts.retries = long(opts.retries) except (TypeError, ValueError), err: parser.error(u'invalid retry count specified') - if opts.playliststart is not None: - try: - opts.playliststart = long(opts.playliststart) - except (TypeError, ValueError), err: - parser.error(u'invalid playlist page specified') + try: + opts.playliststart = long(opts.playliststart) + if opts.playliststart <= 0: + raise ValueError + except (TypeError, ValueError), err: + parser.error(u'invalid playlist start number specified') + try: + opts.playlistend = long(opts.playlistend) + if opts.playlistend != -1 and (opts.playlistend <= 0 or opts.playlistend < opts.playliststart): + raise ValueError + except (TypeError, ValueError), err: + parser.error(u'invalid playlist end number specified') # Information extractors youtube_ie = YoutubeIE() @@ -2286,6 +2294,7 @@ if __name__ == '__main__': 'continuedl': opts.continue_dl, 'noprogress': opts.noprogress, 'playliststart': opts.playliststart, + 'playlistend': opts.playlistend, 'logtostderr': opts.outtmpl == '-', }) fd.add_info_extractor(youtube_search_ie) From 817e8f523fe6d0d0aeb0b100c4cd10a13546793f Mon Sep 17 00:00:00 2001 From: Nevar Angelo Date: Sat, 6 Nov 2010 22:21:45 +0200 Subject: [PATCH 263/455] Allow comments in batch file. --- youtube-dl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube-dl b/youtube-dl index b099ffd15..24cc454a8 100755 --- a/youtube-dl +++ b/youtube-dl @@ -2212,7 +2212,7 @@ if __name__ == '__main__': batchfd = open(opts.batchfile, 'r') batchurls = batchfd.readlines() batchurls = [x.strip() for x in batchurls] - batchurls = [x for x in batchurls if len(x) > 0] + batchurls = [x for x in batchurls if len(x) > 0 and not re.search(r'^[#/;]', x)] except IOError: sys.exit(u'ERROR: batch file could not be read') all_urls = batchurls + args From 1e47d226e10ff985a8a12e0044381bf59450c7f7 Mon Sep 17 00:00:00 2001 From: Nevar Angelo Date: Sat, 6 Nov 2010 22:34:22 +0200 Subject: [PATCH 264/455] Added command line switch -A --auto-number Numbering downloaded URLs was implemented with %(ord)s in the output template. It has been replaced with the %(autonumber)s sequence and is now also available as a command line switch. --- youtube-dl | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/youtube-dl b/youtube-dl index 24cc454a8..2ec980f97 100755 --- a/youtube-dl +++ b/youtube-dl @@ -413,7 +413,7 @@ class FileDownloader(object): try: template_dict = dict(info_dict) template_dict['epoch'] = unicode(long(time.time())) - template_dict['ord'] = unicode('%05d' % self._num_downloads) + template_dict['autonumber'] = unicode('%05d' % self._num_downloads) filename = self.params['outtmpl'] % template_dict except (ValueError, KeyError), err: self.trouble(u'ERROR: invalid system charset or erroneous output template') @@ -2171,6 +2171,8 @@ if __name__ == '__main__': action='store_true', dest='usetitle', help='use title in file name', default=False) filesystem.add_option('-l', '--literal', action='store_true', dest='useliteral', help='use literal title in file name', default=False) + filesystem.add_option('-A', '--auto-number', + action='store_true', dest='autonumber', help='number downloaded URLs starting from 00000', default=False) filesystem.add_option('-o', '--output', dest='outtmpl', metavar='TEMPLATE', help='output filename template') filesystem.add_option('-a', '--batch-file', @@ -2224,8 +2226,8 @@ if __name__ == '__main__': parser.error(u'using .netrc conflicts with giving username/password') if opts.password is not None and opts.username is None: parser.error(u'account username missing') - if opts.outtmpl is not None and (opts.useliteral or opts.usetitle): - parser.error(u'using output template conflicts with using title or literal title') + if opts.outtmpl is not None and (opts.useliteral or opts.usetitle or opts.autonumber): + parser.error(u'using output template conflicts with using title, literal title or auto number') if opts.usetitle and opts.useliteral: parser.error(u'using title conflicts with using literal title') if opts.username is not None and opts.password is None: @@ -2284,8 +2286,11 @@ if __name__ == '__main__': or (opts.format == '-1' and opts.usetitle and u'%(stitle)s-%(id)s-%(format)s.%(ext)s') or (opts.format == '-1' and opts.useliteral and u'%(title)s-%(id)s-%(format)s.%(ext)s') or (opts.format == '-1' and u'%(id)s-%(format)s.%(ext)s') + or (opts.usetitle and opts.autonumber and u'%(autonumber)s-%(stitle)s-%(id)s.%(ext)s') + or (opts.useliteral and opts.autonumber and u'%(autonumber)s-%(title)s-%(id)s.%(ext)s') or (opts.usetitle and u'%(stitle)s-%(id)s.%(ext)s') or (opts.useliteral and u'%(title)s-%(id)s.%(ext)s') + or (opts.autonumber and u'%(autonumber)s-%(id)s.%(ext)s') or u'%(id)s.%(ext)s'), 'ignoreerrors': opts.ignoreerrors, 'ratelimit': opts.ratelimit, From 5e596cac0adcb7e4321f0c9d84e1a813ab26777e Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Sat, 6 Nov 2010 22:13:59 +0100 Subject: [PATCH 265/455] Minor help text correction --- youtube-dl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube-dl b/youtube-dl index 2ec980f97..dbb505f19 100755 --- a/youtube-dl +++ b/youtube-dl @@ -2172,7 +2172,7 @@ if __name__ == '__main__': filesystem.add_option('-l', '--literal', action='store_true', dest='useliteral', help='use literal title in file name', default=False) filesystem.add_option('-A', '--auto-number', - action='store_true', dest='autonumber', help='number downloaded URLs starting from 00000', default=False) + action='store_true', dest='autonumber', help='number downloaded files starting from 00000', default=False) filesystem.add_option('-o', '--output', dest='outtmpl', metavar='TEMPLATE', help='output filename template') filesystem.add_option('-a', '--batch-file', From b3a27b52171761e23060d6dbd7183359d0bad650 Mon Sep 17 00:00:00 2001 From: Nevar Angelo Date: Wed, 17 Nov 2010 20:55:30 +0200 Subject: [PATCH 266/455] Added 'uploaddate' output sequence for YoutubeIE. --- youtube-dl | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/youtube-dl b/youtube-dl index dbb505f19..1a53c2c41 100755 --- a/youtube-dl +++ b/youtube-dl @@ -5,6 +5,7 @@ # Author: Benjamin Johnson # License: Public domain code import cookielib +import datetime import htmlentitydefs import httplib import locale @@ -894,6 +895,18 @@ class YoutubeIE(InfoExtractor): else: # don't panic if we can't find it video_thumbnail = urllib.unquote_plus(video_info['thumbnail_url'][0]) + # upload date + upload_date = u'NA' + mobj = re.search(r'id="eow-date".*?>(.*?)', video_webpage, re.DOTALL) + if mobj is not None: + upload_date = mobj.group(1).split() + format_expressions = ['%d %B %Y', '%B %d, %Y'] + for expression in format_expressions: + try: + upload_date = datetime.datetime.strptime(upload_date, expression).strftime('%Y%m%d') + except: + pass + # description video_description = 'No description available.' if self._downloader.params.get('forcedescription', False): @@ -948,6 +961,7 @@ class YoutubeIE(InfoExtractor): 'id': video_id.decode('utf-8'), 'url': video_real_url.decode('utf-8'), 'uploader': video_uploader.decode('utf-8'), + 'uploaddate': upload_date, 'title': video_title, 'stitle': simple_title, 'ext': video_extension.decode('utf-8'), @@ -1094,6 +1108,7 @@ class MetacafeIE(InfoExtractor): 'id': video_id.decode('utf-8'), 'url': video_url.decode('utf-8'), 'uploader': video_uploader.decode('utf-8'), + 'uploaddate': u'NA', 'title': video_title, 'stitle': simple_title, 'ext': video_extension.decode('utf-8'), @@ -1182,6 +1197,7 @@ class DailymotionIE(InfoExtractor): 'id': video_id.decode('utf-8'), 'url': video_url.decode('utf-8'), 'uploader': video_uploader.decode('utf-8'), + 'uploaddate': u'NA', 'title': video_title, 'stitle': simple_title, 'ext': video_extension.decode('utf-8'), @@ -1291,6 +1307,7 @@ class GoogleIE(InfoExtractor): 'id': video_id.decode('utf-8'), 'url': video_url.decode('utf-8'), 'uploader': u'NA', + 'uploaddate': u'NA', 'title': video_title, 'stitle': simple_title, 'ext': video_extension.decode('utf-8'), @@ -1372,6 +1389,7 @@ class PhotobucketIE(InfoExtractor): 'id': video_id.decode('utf-8'), 'url': video_url.decode('utf-8'), 'uploader': video_uploader, + 'uploaddate': u'NA', 'title': video_title, 'stitle': simple_title, 'ext': video_extension.decode('utf-8'), @@ -1526,6 +1544,7 @@ class YahooIE(InfoExtractor): 'id': video_id.decode('utf-8'), 'url': video_url, 'uploader': video_uploader, + 'uploaddate': u'NA', 'title': video_title, 'stitle': simple_title, 'ext': video_extension.decode('utf-8'), @@ -1628,6 +1647,7 @@ class GenericIE(InfoExtractor): 'id': video_id.decode('utf-8'), 'url': video_url.decode('utf-8'), 'uploader': video_uploader, + 'uploaddate': u'NA', 'title': video_title, 'stitle': simple_title, 'ext': video_extension.decode('utf-8'), From b620a5f811d94a0296a9809391e3a121ad8d68e9 Mon Sep 17 00:00:00 2001 From: jamiejones Date: Tue, 16 Nov 2010 13:52:23 -0800 Subject: [PATCH 267/455] Correctly parse the player URL in RTMP downloads (closes #11) Fixed several problems courtesy of jamiejones: The parsing for the SWF url was wrong (the "//" are now escaped and the initial .*match needs to be 'ungreedy'), so the -W setting to rtmpdump was not set, causing the decryption of the video to be wrong. Finally, add "&has_verified=1" to the fetch of the HMTL page to allow fetching of age-restricted videos. --- youtube-dl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) mode change 100755 => 100644 youtube-dl diff --git a/youtube-dl b/youtube-dl old mode 100755 new mode 100644 index dbb505f19..6a888cc3f --- a/youtube-dl +++ b/youtube-dl @@ -831,7 +831,7 @@ class YoutubeIE(InfoExtractor): # Get video webpage self.report_video_webpage_download(video_id) - request = urllib2.Request('http://www.youtube.com/watch?v=%s&gl=US&hl=en' % video_id, None, std_headers) + request = urllib2.Request('http://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1' % video_id, None, std_headers) try: video_webpage = urllib2.urlopen(request).read() except (urllib2.URLError, httplib.HTTPException, socket.error), err: @@ -839,9 +839,9 @@ class YoutubeIE(InfoExtractor): return # Attempt to extract SWF player URL - mobj = re.search(r'swfConfig.*"(http://.*?watch.*?-.*?\.swf)"', video_webpage) + mobj = re.search(r'swfConfig.*?"(http:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage) if mobj is not None: - player_url = mobj.group(1) + player_url = re.sub(r'\\(.)', r'\1', mobj.group(1)) else: player_url = None From 05df0c1d4acbc7077187579f16c6661260e62014 Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Fri, 19 Nov 2010 18:37:07 +0100 Subject: [PATCH 268/455] Restore file permissions --- youtube-dl | 0 1 file changed, 0 insertions(+), 0 deletions(-) mode change 100644 => 100755 youtube-dl diff --git a/youtube-dl b/youtube-dl old mode 100644 new mode 100755 From 138b11f36ee5e8018c29621d39c324d98d8291cc Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Fri, 19 Nov 2010 19:31:26 +0100 Subject: [PATCH 269/455] Rework upload date mechanism after detecting problems in several tests --- youtube-dl | 49 ++++++++++++++++++++++++++++++++++--------------- 1 file changed, 34 insertions(+), 15 deletions(-) diff --git a/youtube-dl b/youtube-dl index e164d5c8c..3d20a9d6d 100755 --- a/youtube-dl +++ b/youtube-dl @@ -5,7 +5,6 @@ # Author: Benjamin Johnson # License: Public domain code import cookielib -import datetime import htmlentitydefs import httplib import locale @@ -37,6 +36,21 @@ std_headers = { simple_title_chars = string.ascii_letters.decode('ascii') + string.digits.decode('ascii') +month_name_to_number = { + 'January': '01', + 'February': '02', + 'March': '03', + 'April': '04', + 'May': '05', + 'June': '06', + 'July': '07', + 'August': '08', + 'September': '09', + 'October': '10', + 'November': '11', + 'December': '12', +} + def preferredencoding(): """Get preferred encoding. @@ -899,13 +913,18 @@ class YoutubeIE(InfoExtractor): upload_date = u'NA' mobj = re.search(r'id="eow-date".*?>(.*?)', video_webpage, re.DOTALL) if mobj is not None: - upload_date = mobj.group(1).split() - format_expressions = ['%d %B %Y', '%B %d, %Y'] - for expression in format_expressions: - try: - upload_date = datetime.datetime.strptime(upload_date, expression).strftime('%Y%m%d') - except: - pass + try: + if ',' in mobj.group(1): + # Month Day, Year + m, d, y = mobj.group(1).replace(',', '').split() + else: + # Day Month Year, we'll suppose + d, m, y = mobj.group(1).split() + m = month_name_to_number[m] + d = '%02d' % (long(d)) + upload_date = '%s%s%s' % (y, m, d) + except: + upload_date = u'NA' # description video_description = 'No description available.' @@ -961,7 +980,7 @@ class YoutubeIE(InfoExtractor): 'id': video_id.decode('utf-8'), 'url': video_real_url.decode('utf-8'), 'uploader': video_uploader.decode('utf-8'), - 'uploaddate': upload_date, + 'upload_date': upload_date, 'title': video_title, 'stitle': simple_title, 'ext': video_extension.decode('utf-8'), @@ -1108,7 +1127,7 @@ class MetacafeIE(InfoExtractor): 'id': video_id.decode('utf-8'), 'url': video_url.decode('utf-8'), 'uploader': video_uploader.decode('utf-8'), - 'uploaddate': u'NA', + 'upload_date': u'NA', 'title': video_title, 'stitle': simple_title, 'ext': video_extension.decode('utf-8'), @@ -1197,7 +1216,7 @@ class DailymotionIE(InfoExtractor): 'id': video_id.decode('utf-8'), 'url': video_url.decode('utf-8'), 'uploader': video_uploader.decode('utf-8'), - 'uploaddate': u'NA', + 'upload_date': u'NA', 'title': video_title, 'stitle': simple_title, 'ext': video_extension.decode('utf-8'), @@ -1307,7 +1326,7 @@ class GoogleIE(InfoExtractor): 'id': video_id.decode('utf-8'), 'url': video_url.decode('utf-8'), 'uploader': u'NA', - 'uploaddate': u'NA', + 'upload_date': u'NA', 'title': video_title, 'stitle': simple_title, 'ext': video_extension.decode('utf-8'), @@ -1389,7 +1408,7 @@ class PhotobucketIE(InfoExtractor): 'id': video_id.decode('utf-8'), 'url': video_url.decode('utf-8'), 'uploader': video_uploader, - 'uploaddate': u'NA', + 'upload_date': u'NA', 'title': video_title, 'stitle': simple_title, 'ext': video_extension.decode('utf-8'), @@ -1544,7 +1563,7 @@ class YahooIE(InfoExtractor): 'id': video_id.decode('utf-8'), 'url': video_url, 'uploader': video_uploader, - 'uploaddate': u'NA', + 'upload_date': u'NA', 'title': video_title, 'stitle': simple_title, 'ext': video_extension.decode('utf-8'), @@ -1647,7 +1666,7 @@ class GenericIE(InfoExtractor): 'id': video_id.decode('utf-8'), 'url': video_url.decode('utf-8'), 'uploader': video_uploader, - 'uploaddate': u'NA', + 'upload_date': u'NA', 'title': video_title, 'stitle': simple_title, 'ext': video_extension.decode('utf-8'), From 010ebaf7832f41a5842c2a9ac316943b0efc5c2b Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Fri, 19 Nov 2010 19:40:18 +0100 Subject: [PATCH 270/455] Update User-Agent string --- youtube-dl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube-dl b/youtube-dl index 3d20a9d6d..94e04d4cd 100755 --- a/youtube-dl +++ b/youtube-dl @@ -28,7 +28,7 @@ except ImportError: from cgi import parse_qs std_headers = { - 'User-Agent': 'Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.9.2.11) Gecko/20101019 Firefox/3.6.11', + 'User-Agent': 'Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.9.2.12) Gecko/20101028 Firefox/3.6.12', 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 'Accept-Language': 'en-us,en;q=0.5', From f8dc4414305f111a17d31f53d4a04650d80d34f5 Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Fri, 19 Nov 2010 19:41:09 +0100 Subject: [PATCH 271/455] Bump version number --- LATEST_VERSION | 2 +- youtube-dl | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/LATEST_VERSION b/LATEST_VERSION index 8418210b7..4f8405c40 100644 --- a/LATEST_VERSION +++ b/LATEST_VERSION @@ -1 +1 @@ -2010.10.24 +2010.11.19 diff --git a/youtube-dl b/youtube-dl index 94e04d4cd..24722d292 100755 --- a/youtube-dl +++ b/youtube-dl @@ -2145,7 +2145,7 @@ if __name__ == '__main__': # Parse command line parser = optparse.OptionParser( usage='Usage: %prog [options] url...', - version='2010.10.24', + version='2010.11.19', conflict_handler='resolve', ) From a1f03c7b06d0a811d18499cb2ef09405c52a5db9 Mon Sep 17 00:00:00 2001 From: Nevar Angelo Date: Tue, 30 Nov 2010 18:51:00 +0200 Subject: [PATCH 272/455] Reworked 'upload_date' code for output sequence in YoutubeIE. Reverted to previous version of 'upload_date' and fixed a mistake that prevented the code from working properly. --- youtube-dl | 35 ++++++++--------------------------- 1 file changed, 8 insertions(+), 27 deletions(-) diff --git a/youtube-dl b/youtube-dl index 24722d292..4cea2e85e 100755 --- a/youtube-dl +++ b/youtube-dl @@ -5,6 +5,7 @@ # Author: Benjamin Johnson # License: Public domain code import cookielib +import datetime import htmlentitydefs import httplib import locale @@ -36,21 +37,6 @@ std_headers = { simple_title_chars = string.ascii_letters.decode('ascii') + string.digits.decode('ascii') -month_name_to_number = { - 'January': '01', - 'February': '02', - 'March': '03', - 'April': '04', - 'May': '05', - 'June': '06', - 'July': '07', - 'August': '08', - 'September': '09', - 'October': '10', - 'November': '11', - 'December': '12', -} - def preferredencoding(): """Get preferred encoding. @@ -913,18 +899,13 @@ class YoutubeIE(InfoExtractor): upload_date = u'NA' mobj = re.search(r'id="eow-date".*?>(.*?)', video_webpage, re.DOTALL) if mobj is not None: - try: - if ',' in mobj.group(1): - # Month Day, Year - m, d, y = mobj.group(1).replace(',', '').split() - else: - # Day Month Year, we'll suppose - d, m, y = mobj.group(1).split() - m = month_name_to_number[m] - d = '%02d' % (long(d)) - upload_date = '%s%s%s' % (y, m, d) - except: - upload_date = u'NA' + upload_date = ' '.join(re.sub(r'[/,-]', r' ', mobj.group(1)).split()) + format_expressions = ['%d %B %Y', '%B %d %Y'] + for expression in format_expressions: + try: + upload_date = datetime.datetime.strptime(upload_date, expression).strftime('%Y%m%d') + except: + pass # description video_description = 'No description available.' From 62cf7aaf9a3b9b74ba90d1c17303eb5cabd3f52c Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Sat, 4 Dec 2010 10:38:53 +0100 Subject: [PATCH 273/455] Use a temporary filename to download files --- youtube-dl | 35 ++++++++++++++++++++++++++++------- 1 file changed, 28 insertions(+), 7 deletions(-) diff --git a/youtube-dl b/youtube-dl index 4cea2e85e..22dd230ee 100755 --- a/youtube-dl +++ b/youtube-dl @@ -235,6 +235,11 @@ class FileDownloader(object): if not os.path.exists(dir): os.mkdir(dir) + @staticmethod + def temp_name(filename): + """Returns a temporary filename for the given filename.""" + return filename + '.part' + @staticmethod def format_bytes(bytes): if bytes is None: @@ -353,6 +358,12 @@ class FileDownloader(object): speed = float(byte_counter) / elapsed if speed > rate_limit: time.sleep((byte_counter - rate_limit * (now - start_time)) / rate_limit) + + def try_rename(self, old_filename, new_filename): + try: + os.rename(old_filename, new_filename) + except (IOError, OSError), err: + self.trouble(u'ERROR: unable to rename file') def report_destination(self, filename): """Report destination filename.""" @@ -484,6 +495,7 @@ class FileDownloader(object): def _download_with_rtmpdump(self, filename, url, player_url): self.report_destination(filename) + tmpfilename = self.temp_name(filename) # Check for rtmpdump first try: @@ -495,36 +507,43 @@ class FileDownloader(object): # Download using rtmpdump. rtmpdump returns exit code 2 when # the connection was interrumpted and resuming appears to be # possible. This is part of rtmpdump's normal usage, AFAIK. - basic_args = ['rtmpdump', '-q'] + [[], ['-W', player_url]][player_url is not None] + ['-r', url, '-o', filename] + basic_args = ['rtmpdump', '-q'] + [[], ['-W', player_url]][player_url is not None] + ['-r', url, '-o', tmpfilename] retval = subprocess.call(basic_args + [[], ['-e', '-k', '1']][self.params.get('continuedl', False)]) while retval == 2 or retval == 1: - prevsize = os.path.getsize(filename) + prevsize = os.path.getsize(tmpfilename) self.to_screen(u'\r[rtmpdump] %s bytes' % prevsize, skip_eol=True) time.sleep(5.0) # This seems to be needed retval = subprocess.call(basic_args + ['-e'] + [[], ['-k', '1']][retval == 1]) - cursize = os.path.getsize(filename) + cursize = os.path.getsize(tmpfilename) if prevsize == cursize and retval == 1: break if retval == 0: - self.to_screen(u'\r[rtmpdump] %s bytes' % os.path.getsize(filename)) + self.to_screen(u'\r[rtmpdump] %s bytes' % os.path.getsize(tmpfilename)) + self.try_rename(tmpfilename, filename) return True else: self.trouble(u'\nERROR: rtmpdump exited with code %d' % retval) return False def _do_download(self, filename, url, player_url): + # Check file already present + if self.params.get('continuedl', False) and os.path.isfile(filename): + self.report_file_already_downloaded(filename) + return True + # Attempt to download using rtmpdump if url.startswith('rtmp'): return self._download_with_rtmpdump(filename, url, player_url) + tmpfilename = self.temp_name(filename) stream = None open_mode = 'wb' basic_request = urllib2.Request(url, None, std_headers) request = urllib2.Request(url, None, std_headers) # Establish possible resume length - if os.path.isfile(filename): - resume_len = os.path.getsize(filename) + if os.path.isfile(tmpfilename): + resume_len = os.path.getsize(tmpfilename) else: resume_len = 0 @@ -566,6 +585,7 @@ class FileDownloader(object): # completely downloaded if the file size differs less than 100 bytes from # the one in the hard drive. self.report_file_already_downloaded(filename) + self.try_rename(tmpfilename, filename) return True else: # The length does not match, we start the download over @@ -599,7 +619,7 @@ class FileDownloader(object): # Open file just in time if stream is None: try: - (stream, filename) = sanitize_open(filename, open_mode) + (stream, tmpfilename) = sanitize_open(tmpfilename, open_mode) self.report_destination(filename) except (OSError, IOError), err: self.trouble(u'ERROR: unable to open for writing: %s' % str(err)) @@ -623,6 +643,7 @@ class FileDownloader(object): self.report_finish() if data_len is not None and str(byte_counter) != data_len: raise ContentTooShortError(byte_counter, long(data_len)) + self.try_rename(tmpfilename, filename) return True class InfoExtractor(object): From a9806fd83d7efc4cc1dcd11174c839f637d0c105 Mon Sep 17 00:00:00 2001 From: Vasyl' Vavrychuk Date: Sun, 5 Dec 2010 20:48:22 +0200 Subject: [PATCH 274/455] report_extraction was never called for GenericIE --- youtube-dl | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube-dl b/youtube-dl index 22dd230ee..d0584a312 100755 --- a/youtube-dl +++ b/youtube-dl @@ -1619,6 +1619,7 @@ class GenericIE(InfoExtractor): self._downloader.trouble(u'ERROR: Invalid URL: %s' % url) return + self.report_extraction(video_id) # Start with something easy: JW Player in SWFObject mobj = re.search(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage) if mobj is None: From 6f0ff3bab9f213790817e3344bb5025a7fbea1e4 Mon Sep 17 00:00:00 2001 From: Vasyl' Vavrychuk Date: Sun, 5 Dec 2010 20:57:46 +0200 Subject: [PATCH 275/455] Fixed failure of os.rename after receiving file finished due to file not being closed. Following error happen while at the end of _do_download called try_rename WindowsError: [Error 32] The process cannot access the file because it is being used by another process --- youtube-dl | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube-dl b/youtube-dl index d0584a312..6b5f5693d 100755 --- a/youtube-dl +++ b/youtube-dl @@ -640,6 +640,7 @@ class FileDownloader(object): # Apply rate limit self.slow_down(start, byte_counter) + stream.close() self.report_finish() if data_len is not None and str(byte_counter) != data_len: raise ContentTooShortError(byte_counter, long(data_len)) From 27179cfdba64c40607ec089f53ed754efceff64a Mon Sep 17 00:00:00 2001 From: Vasyl' Vavrychuk Date: Sun, 5 Dec 2010 21:09:14 +0200 Subject: [PATCH 276/455] Implemented depositfiles.com support --- youtube-dl | 82 +++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 81 insertions(+), 1 deletion(-) diff --git a/youtube-dl b/youtube-dl index 6b5f5693d..0e9699807 100755 --- a/youtube-dl +++ b/youtube-dl @@ -110,7 +110,6 @@ def sanitize_open(filename, open_mode): stream = open(filename, open_mode) return (stream, filename) - class DownloadError(Exception): """Download Error exception. @@ -2077,6 +2076,85 @@ class YoutubeUserIE(InfoExtractor): self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id) return +class DepositFilesIE(InfoExtractor): + """Information extractor for depositfiles.com""" + + _VALID_URL = r'(?:http://)?(?:\w+\.)?depositfiles.com/(?:../(?#locale))?files/(.+)' + + def __init__(self, downloader=None): + InfoExtractor.__init__(self, downloader) + + @staticmethod + def suitable(url): + return (re.match(DepositFilesIE._VALID_URL, url) is not None) + + def report_download_webpage(self, file_id): + """Report webpage download.""" + self._downloader.to_screen(u'[DepositFiles] %s: Downloading webpage' % file_id) + + def report_extraction(self, file_id): + """Report information extraction.""" + self._downloader.to_screen(u'[DepositFiles] %s: Extracting information' % file_id) + + def _real_initialize(self): + return + + def _real_extract(self, url): + # At this point we have a new file + self._downloader.increment_downloads() + + file_id = url.split('/')[-1] + # Rebuild url in english locale + url = 'http://depositfiles.com/en/files/' + file_id + + # Retrieve file webpage with 'Free download' button pressed + free_download_indication = { 'gateway_result' : '1' } + request = urllib2.Request(url, urllib.urlencode(free_download_indication), std_headers) + try: + self.report_download_webpage(file_id) + webpage = urllib2.urlopen(request).read() + except (urllib2.URLError, httplib.HTTPException, socket.error), err: + self._downloader.trouble(u'ERROR: Unable to retrieve file webpage: %s' % str(err)) + return + + # Search for the real file URL + mobj = re.search(r'
    (Attention.*?)', webpage, re.DOTALL) + if (mobj is not None) and (mobj.group(1) is not None): + restriction_message = re.sub('\s+', ' ', mobj.group(1)).strip() + self._downloader.trouble(u'ERROR: %s' % restriction_message) + else: + self._downloader.trouble(u'ERROR: unable to extract download URL from: %s' % url) + return + + file_url = mobj.group(1) + file_extension = os.path.splitext(file_url)[1][1:] + + # Search for file title + mobj = re.search(r'', webpage) + if mobj is None: + self._downloader.trouble(u'ERROR: unable to extract title') + return + file_title = mobj.group(1).decode('utf-8') + + try: + # Process file information + self._downloader.process_info({ + 'id': file_id.decode('utf-8'), + 'url': file_url.decode('utf-8'), + 'uploader': u'NA', + 'upload_date': u'NA', + 'title': file_title, + 'stitle': file_title, + 'ext': file_extension.decode('utf-8'), + 'format': u'NA', + 'player_url': None, + }) + except UnavailableVideoError, err: + self._downloader.trouble(u'ERROR: unable to download file') + class PostProcessor(object): """Post Processor class. @@ -2310,6 +2388,7 @@ if __name__ == '__main__': photobucket_ie = PhotobucketIE() yahoo_ie = YahooIE() yahoo_search_ie = YahooSearchIE(yahoo_ie) + deposit_files_ie = DepositFilesIE() generic_ie = GenericIE() # File downloader @@ -2356,6 +2435,7 @@ if __name__ == '__main__': fd.add_info_extractor(photobucket_ie) fd.add_info_extractor(yahoo_ie) fd.add_info_extractor(yahoo_search_ie) + fd.add_info_extractor(deposit_files_ie) # This must come last since it's the # fallback if none of the others work From e567ef93d8833a566f063bab9f9ee2310a131ab1 Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Wed, 8 Dec 2010 11:04:22 +0100 Subject: [PATCH 277/455] Add Vasyl' Vavrychuk to the list of authors --- youtube-dl | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube-dl b/youtube-dl index 0e9699807..d60fa60c9 100755 --- a/youtube-dl +++ b/youtube-dl @@ -3,6 +3,7 @@ # Author: Ricardo Garcia Gonzalez # Author: Danny Colligan # Author: Benjamin Johnson +# Author: Vasyl' Vavrychuk # License: Public domain code import cookielib import datetime From d157d2597a5fe99db60304fe1e89523de78b7981 Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Thu, 9 Dec 2010 19:22:32 +0100 Subject: [PATCH 278/455] Fix YoutubeIE after recent YouTube changes (closes #34) --- youtube-dl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube-dl b/youtube-dl index d60fa60c9..415cf7073 100755 --- a/youtube-dl +++ b/youtube-dl @@ -955,9 +955,9 @@ class YoutubeIE(InfoExtractor): self._downloader.trouble(u'ERROR: no known formats available for video') return if requested_format is None: - video_url_list = [(existing_formats[0], get_video_template % existing_formats[0])] # Best quality + video_url_list = [(existing_formats[0], url_map[existing_formats[0]])] # Best quality elif requested_format == '-1': - video_url_list = [(f, get_video_template % f) for f in existing_formats] # All formats + video_url_list = [(f, url_map[f]) for f in existing_formats] # All formats else: video_url_list = [(requested_format, get_video_template % requested_format)] # Specific format From 7d950ca1d6543d51d066eaceb9ea1d5ac96d92f4 Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Thu, 9 Dec 2010 19:33:04 +0100 Subject: [PATCH 279/455] Improve temporary filename handling of special cases --- youtube-dl | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/youtube-dl b/youtube-dl index 415cf7073..64c590fbf 100755 --- a/youtube-dl +++ b/youtube-dl @@ -238,7 +238,9 @@ class FileDownloader(object): @staticmethod def temp_name(filename): """Returns a temporary filename for the given filename.""" - return filename + '.part' + if filename == u'-' or (os.path.exists(filename) and not os.path.isfile(filename)): + return filename + return filename + u'.part' @staticmethod def format_bytes(bytes): @@ -361,6 +363,8 @@ class FileDownloader(object): def try_rename(self, old_filename, new_filename): try: + if old_filename == new_filename: + return os.rename(old_filename, new_filename) except (IOError, OSError), err: self.trouble(u'ERROR: unable to rename file') From f148ea4473697a1e6666e789b0bfae8f0b4804e4 Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Thu, 9 Dec 2010 19:37:07 +0100 Subject: [PATCH 280/455] Bump version number --- LATEST_VERSION | 2 +- youtube-dl | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/LATEST_VERSION b/LATEST_VERSION index 4f8405c40..a1c4173c8 100644 --- a/LATEST_VERSION +++ b/LATEST_VERSION @@ -1 +1 @@ -2010.11.19 +2010.12.09 diff --git a/youtube-dl b/youtube-dl index 64c590fbf..e6177e363 100755 --- a/youtube-dl +++ b/youtube-dl @@ -2232,7 +2232,7 @@ if __name__ == '__main__': # Parse command line parser = optparse.OptionParser( usage='Usage: %prog [options] url...', - version='2010.11.19', + version='2010.12.09', conflict_handler='resolve', ) From f83ae7816b64ff23fbef602d7ade3fd00b57e5aa Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Thu, 9 Dec 2010 19:57:39 +0100 Subject: [PATCH 281/455] Fix problem when requesting an existing format explicitly with -f --- youtube-dl | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/youtube-dl b/youtube-dl index e6177e363..a8e3bd36c 100755 --- a/youtube-dl +++ b/youtube-dl @@ -944,7 +944,7 @@ class YoutubeIE(InfoExtractor): video_token = urllib.unquote_plus(video_info['token'][0]) # Decide which formats to download - requested_format = self._downloader.params.get('format', None) + req_format = self._downloader.params.get('format', None) get_video_template = 'http://www.youtube.com/get_video?video_id=%s&t=%s&eurl=&el=&ps=&asv=&fmt=%%s' % (video_id, video_token) if 'fmt_url_map' in video_info: @@ -958,12 +958,15 @@ class YoutubeIE(InfoExtractor): if len(existing_formats) == 0: self._downloader.trouble(u'ERROR: no known formats available for video') return - if requested_format is None: + if req_format is None: video_url_list = [(existing_formats[0], url_map[existing_formats[0]])] # Best quality - elif requested_format == '-1': + elif req_format == '-1': video_url_list = [(f, url_map[f]) for f in existing_formats] # All formats else: - video_url_list = [(requested_format, get_video_template % requested_format)] # Specific format + if req_format in url_map: + video_url_list = [(req_format, url_map[req_format])] # Specific format + else: + video_url_list = [(req_format, get_video_template % req_format)] # Specific format elif 'conn' in video_info and video_info['conn'][0].startswith('rtmp'): self.report_rtmp_download() From 106d091e80f609802d3f6dcc3512525458900042 Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Sat, 11 Dec 2010 11:32:13 +0100 Subject: [PATCH 282/455] Do not use 0% as the starting point in resumed downloads (closes #40) --- youtube-dl | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/youtube-dl b/youtube-dl index a8e3bd36c..22b9eba17 100755 --- a/youtube-dl +++ b/youtube-dl @@ -606,8 +606,10 @@ class FileDownloader(object): return False data_len = data.info().get('Content-length', None) + if data_len is not None: + data_len = long(data_len) + resume_len data_len_str = self.format_bytes(data_len) - byte_counter = 0 + byte_counter = 0 + resume_len block_size = 1024 start = time.time() while True: From 5c1327931a91fbc2f621b7f107255463bdf698b9 Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Sat, 11 Dec 2010 11:34:10 +0100 Subject: [PATCH 283/455] Stop attempting to use get_video and detect missing formats ourselves --- youtube-dl | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/youtube-dl b/youtube-dl index 22b9eba17..8f895a5cb 100755 --- a/youtube-dl +++ b/youtube-dl @@ -947,7 +947,6 @@ class YoutubeIE(InfoExtractor): # Decide which formats to download req_format = self._downloader.params.get('format', None) - get_video_template = 'http://www.youtube.com/get_video?video_id=%s&t=%s&eurl=&el=&ps=&asv=&fmt=%%s' % (video_id, video_token) if 'fmt_url_map' in video_info: url_map = dict(tuple(pair.split('|')) for pair in video_info['fmt_url_map'][0].split(',')) @@ -965,10 +964,11 @@ class YoutubeIE(InfoExtractor): elif req_format == '-1': video_url_list = [(f, url_map[f]) for f in existing_formats] # All formats else: - if req_format in url_map: - video_url_list = [(req_format, url_map[req_format])] # Specific format - else: - video_url_list = [(req_format, get_video_template % req_format)] # Specific format + # Specific format + if req_format not in url_map: + self._downloader.trouble(u'ERROR: requested format not available') + return + video_url_list = [(req_format, url_map[req_format])] # Specific format elif 'conn' in video_info and video_info['conn'][0].startswith('rtmp'): self.report_rtmp_download() @@ -1002,7 +1002,7 @@ class YoutubeIE(InfoExtractor): 'player_url': player_url, }) except UnavailableVideoError, err: - self._downloader.trouble(u'ERROR: unable to download video (format may not be available)') + self._downloader.trouble(u'ERROR: unable to download video') class MetacafeIE(InfoExtractor): From ef4f4544a24633655a9347176ed475322e6dacf0 Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Sat, 11 Dec 2010 11:35:28 +0100 Subject: [PATCH 284/455] Remove deprecated -b option and nonworking -m option (closes #39) --- youtube-dl | 6 ------ 1 file changed, 6 deletions(-) diff --git a/youtube-dl b/youtube-dl index 8f895a5cb..4cb84b22a 100755 --- a/youtube-dl +++ b/youtube-dl @@ -2270,14 +2270,10 @@ if __name__ == '__main__': video_format = optparse.OptionGroup(parser, 'Video Format Options') video_format.add_option('-f', '--format', action='store', dest='format', metavar='FORMAT', help='video format code') - video_format.add_option('-m', '--mobile-version', - action='store_const', dest='format', help='alias for -f 17', const='17') video_format.add_option('--all-formats', action='store_const', dest='format', help='download all available video formats', const='-1') video_format.add_option('--max-quality', action='store', dest='format_limit', metavar='FORMAT', help='highest quality format to download') - video_format.add_option('-b', '--best-quality', - action='store_true', dest='bestquality', help='download the best video quality (DEPRECATED)') parser.add_option_group(video_format) verbosity = optparse.OptionGroup(parser, 'Verbosity / Simulation Options') @@ -2351,8 +2347,6 @@ if __name__ == '__main__': all_urls = batchurls + args # Conflicting, missing and erroneous options - if opts.bestquality: - print >>sys.stderr, u'\nWARNING: -b/--best-quality IS DEPRECATED AS IT IS THE DEFAULT BEHAVIOR NOW\n' if opts.usenetrc and (opts.username is not None or opts.password is not None): parser.error(u'using .netrc conflicts with giving username/password') if opts.password is not None and opts.username is None: From b905e5f583d06678ab0a54b53a227208de7c35ee Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Sun, 12 Dec 2010 19:21:09 +0100 Subject: [PATCH 285/455] Fix erroneous "content too short" error message --- youtube-dl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube-dl b/youtube-dl index 4cb84b22a..7d4344ae0 100755 --- a/youtube-dl +++ b/youtube-dl @@ -648,7 +648,7 @@ class FileDownloader(object): stream.close() self.report_finish() - if data_len is not None and str(byte_counter) != data_len: + if data_len is not None and byte_counter != data_len: raise ContentTooShortError(byte_counter, long(data_len)) self.try_rename(tmpfilename, filename) return True From 975a91d0ac950f8558067810ba64adb2fb4feada Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Wed, 15 Dec 2010 21:42:11 +0100 Subject: [PATCH 286/455] Take into account resume_len when calculating speed and ETA --- youtube-dl | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/youtube-dl b/youtube-dl index 7d4344ae0..cf0e9fcb8 100755 --- a/youtube-dl +++ b/youtube-dl @@ -617,10 +617,9 @@ class FileDownloader(object): before = time.time() data_block = data.read(block_size) after = time.time() - data_block_len = len(data_block) - if data_block_len == 0: + if len(data_block) == 0: break - byte_counter += data_block_len + byte_counter += len(data_block) # Open file just in time if stream is None: @@ -635,16 +634,16 @@ class FileDownloader(object): except (IOError, OSError), err: self.trouble(u'\nERROR: unable to write data: %s' % str(err)) return False - block_size = self.best_block_size(after - before, data_block_len) + block_size = self.best_block_size(after - before, len(data_block)) # Progress message percent_str = self.calc_percent(byte_counter, data_len) - eta_str = self.calc_eta(start, time.time(), data_len, byte_counter) - speed_str = self.calc_speed(start, time.time(), byte_counter) + eta_str = self.calc_eta(start, time.time(), data_len - resume_len, byte_counter - resume_len) + speed_str = self.calc_speed(start, time.time(), byte_counter - resume_len) self.report_progress(percent_str, data_len_str, speed_str, eta_str) # Apply rate limit - self.slow_down(start, byte_counter) + self.slow_down(start, byte_counter - resume_len) stream.close() self.report_finish() From a57ed21f6df916188229870fdfbcec5c58c89b0e Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Thu, 16 Dec 2010 07:09:58 +0100 Subject: [PATCH 287/455] Request page compression by default, like Firefox does --- youtube-dl | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube-dl b/youtube-dl index cf0e9fcb8..a0b5577d5 100755 --- a/youtube-dl +++ b/youtube-dl @@ -33,6 +33,7 @@ std_headers = { 'User-Agent': 'Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.9.2.12) Gecko/20101028 Firefox/3.6.12', 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', + 'Accept-Encoding': 'gzip, deflate', 'Accept-Language': 'en-us,en;q=0.5', } From 09cc744c90ccff8d6f52cbea8c4bcb864b897cbb Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Mon, 3 Jan 2011 11:22:49 +0100 Subject: [PATCH 288/455] Print new line before a few error messages --- youtube-dl | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/youtube-dl b/youtube-dl index a0b5577d5..87e0375c4 100755 --- a/youtube-dl +++ b/youtube-dl @@ -1002,7 +1002,7 @@ class YoutubeIE(InfoExtractor): 'player_url': player_url, }) except UnavailableVideoError, err: - self._downloader.trouble(u'ERROR: unable to download video') + self._downloader.trouble(u'\nERROR: unable to download video') class MetacafeIE(InfoExtractor): @@ -1147,7 +1147,7 @@ class MetacafeIE(InfoExtractor): 'player_url': None, }) except UnavailableVideoError: - self._downloader.trouble(u'ERROR: unable to download video') + self._downloader.trouble(u'\nERROR: unable to download video') class DailymotionIE(InfoExtractor): @@ -1236,7 +1236,7 @@ class DailymotionIE(InfoExtractor): 'player_url': None, }) except UnavailableVideoError: - self._downloader.trouble(u'ERROR: unable to download video') + self._downloader.trouble(u'\nERROR: unable to download video') class GoogleIE(InfoExtractor): """Information extractor for video.google.com.""" @@ -1346,7 +1346,7 @@ class GoogleIE(InfoExtractor): 'player_url': None, }) except UnavailableVideoError: - self._downloader.trouble(u'ERROR: unable to download video') + self._downloader.trouble(u'\nERROR: unable to download video') class PhotobucketIE(InfoExtractor): @@ -1428,7 +1428,7 @@ class PhotobucketIE(InfoExtractor): 'player_url': None, }) except UnavailableVideoError: - self._downloader.trouble(u'ERROR: unable to download video') + self._downloader.trouble(u'\nERROR: unable to download video') class YahooIE(InfoExtractor): @@ -1586,7 +1586,7 @@ class YahooIE(InfoExtractor): 'player_url': None, }) except UnavailableVideoError: - self._downloader.trouble(u'ERROR: unable to download video') + self._downloader.trouble(u'\nERROR: unable to download video') class GenericIE(InfoExtractor): @@ -1687,7 +1687,7 @@ class GenericIE(InfoExtractor): 'player_url': None, }) except UnavailableVideoError, err: - self._downloader.trouble(u'ERROR: unable to download video') + self._downloader.trouble(u'\nERROR: unable to download video') class YoutubeSearchIE(InfoExtractor): From e7cf18cb6b5c5c679583636d5b057d328655c7a6 Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Mon, 3 Jan 2011 11:47:23 +0100 Subject: [PATCH 289/455] Add --dump-user-agent option (patch provided by Benjamin Johnson) --- youtube-dl | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/youtube-dl b/youtube-dl index 87e0375c4..614f20c59 100755 --- a/youtube-dl +++ b/youtube-dl @@ -2257,6 +2257,8 @@ if __name__ == '__main__': dest='playliststart', metavar='NUMBER', help='playlist video to start at (default is 1)', default=1) parser.add_option('--playlist-end', dest='playlistend', metavar='NUMBER', help='playlist video to end at (default is last)', default=-1) + parser.add_option('--dump-user-agent', + action='store_true', dest='dump_user_agent', help='display the current browser identification', default=False) authentication = optparse.OptionGroup(parser, 'Authentication Options') authentication.add_option('-u', '--username', @@ -2325,6 +2327,11 @@ if __name__ == '__main__': except (IOError, OSError), err: sys.exit(u'ERROR: unable to open cookie file') + # Dump user agent + if opts.dump_user_agent: + print std_headers['User-Agent'] + sys.exit(0) + # General configuration cookie_processor = urllib2.HTTPCookieProcessor(jar) urllib2.install_opener(urllib2.build_opener(urllib2.ProxyHandler())) From ccbd296bee952961aa09eae700dd4670fef11d7e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mantas=20Mikul=C4=97nas?= Date: Mon, 3 Jan 2011 16:14:19 +0200 Subject: [PATCH 290/455] Added --console-title to display download progress in console window title. This uses SetConsoleTitle() Win32 API for Windows Console, and Xterm escape sequence otherwise. --- youtube-dl | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/youtube-dl b/youtube-dl index 614f20c59..d6aeceabc 100755 --- a/youtube-dl +++ b/youtube-dl @@ -6,6 +6,7 @@ # Author: Vasyl' Vavrychuk # License: Public domain code import cookielib +import ctypes import datetime import htmlentitydefs import httplib @@ -208,6 +209,7 @@ class FileDownloader(object): playliststart: Playlist item to start at. playlistend: Playlist item to end at. logtostderr: Log messages to stderr instead of stdout. + consoletitle: Display progress in console window's titlebar. """ params = None @@ -332,6 +334,17 @@ class FileDownloader(object): """Print message to stderr.""" print >>sys.stderr, message.encode(preferredencoding()) + def to_cons_title(self, message): + """Set console/terminal window title to message.""" + if not self.params.get('consoletitle', False): + return + if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow(): + # c_wchar_p() might not be necessary if `message` is + # already of type unicode() + ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message)) + elif 'TERM' in os.environ: + sys.stderr.write('\033]0;%s\007' % message.encode(preferredencoding())) + def fixed_template(self): """Checks if the output template is fixed.""" return (re.search(ur'(?u)%\(.+?\)s', self.params['outtmpl']) is None) @@ -380,6 +393,8 @@ class FileDownloader(object): return self.to_screen(u'\r[download] %s of %s at %s ETA %s' % (percent_str, data_len_str, speed_str, eta_str), skip_eol=True) + self.to_cons_title(u'youtube-dl - %s of %s at %s ETA %s' % + (percent_str.strip(), data_len_str.strip(), speed_str.strip(), eta_str.strip())) def report_resuming_byte(self, resume_len): """Report attempt to resume at given byte.""" @@ -2293,6 +2308,8 @@ if __name__ == '__main__': action='store_true', dest='getdescription', help='simulate, quiet but print video description', default=False) verbosity.add_option('--no-progress', action='store_true', dest='noprogress', help='do not print progress bar', default=False) + verbosity.add_option('--console-title', + action='store_true', dest='consoletitle', help='display progress in console titlebar', default=False) parser.add_option_group(verbosity) filesystem = optparse.OptionGroup(parser, 'Filesystem Options') @@ -2434,6 +2451,7 @@ if __name__ == '__main__': 'playliststart': opts.playliststart, 'playlistend': opts.playlistend, 'logtostderr': opts.outtmpl == '-', + 'consoletitle': opts.consoletitle, }) fd.add_info_extractor(youtube_search_ie) fd.add_info_extractor(youtube_pl_ie) From d3975459d15e0e3c8e695d36990860b16ad1b97e Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Fri, 7 Jan 2011 10:22:01 +0100 Subject: [PATCH 291/455] Remove trailing whitespace --- youtube-dl | 82 +++++++++++++++++++++++++++--------------------------- 1 file changed, 41 insertions(+), 41 deletions(-) diff --git a/youtube-dl b/youtube-dl index d6aeceabc..6a648e723 100755 --- a/youtube-dl +++ b/youtube-dl @@ -58,7 +58,7 @@ def preferredencoding(): def htmlentity_transform(matchobj): """Transforms an HTML entity to a Unicode character. - + This function receives a match object and is intended to be used with the re.sub() function. """ @@ -115,7 +115,7 @@ def sanitize_open(filename, open_mode): class DownloadError(Exception): """Download Error exception. - + This exception may be thrown by FileDownloader objects if they are not configured to continue on errors. They will contain the appropriate error message. @@ -227,7 +227,7 @@ class FileDownloader(object): self._num_downloads = 0 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)] self.params = params - + @staticmethod def pmkdir(filename): """Create directory components in filename. Similar to Unix "mkdir -p".""" @@ -313,12 +313,12 @@ class FileDownloader(object): """Add an InfoExtractor object to the end of the list.""" self._ies.append(ie) ie.set_downloader(self) - + def add_post_processor(self, pp): """Add a PostProcessor object to the end of the chain.""" self._pps.append(pp) pp.set_downloader(self) - + def to_screen(self, message, skip_eol=False, ignore_encoding_errors=False): """Print message to stdout if not in quiet mode.""" try: @@ -329,11 +329,11 @@ class FileDownloader(object): except (UnicodeEncodeError), err: if not ignore_encoding_errors: raise - + def to_stderr(self, message): """Print message to stderr.""" print >>sys.stderr, message.encode(preferredencoding()) - + def to_cons_title(self, message): """Set console/terminal window title to message.""" if not self.params.get('consoletitle', False): @@ -386,7 +386,7 @@ class FileDownloader(object): def report_destination(self, filename): """Report destination filename.""" self.to_screen(u'[download] Destination: %s' % filename, ignore_encoding_errors=True) - + def report_progress(self, percent_str, data_len_str, speed_str, eta_str): """Report download progress.""" if self.params.get('noprogress', False): @@ -399,29 +399,29 @@ class FileDownloader(object): def report_resuming_byte(self, resume_len): """Report attempt to resume at given byte.""" self.to_screen(u'[download] Resuming download at byte %s' % resume_len) - + def report_retry(self, count, retries): """Report retry in case of HTTP error 5xx""" self.to_screen(u'[download] Got server HTTP error. Retrying (attempt %d of %d)...' % (count, retries)) - + def report_file_already_downloaded(self, file_name): """Report file has already been fully downloaded.""" try: self.to_screen(u'[download] %s has already been downloaded' % file_name) except (UnicodeEncodeError), err: self.to_screen(u'[download] The file has already been downloaded') - + def report_unable_to_resume(self): """Report it was impossible to resume download.""" self.to_screen(u'[download] Unable to resume') - + def report_finish(self): """Report download finished.""" if self.params.get('noprogress', False): self.to_screen(u'[download] Download completed') else: self.to_screen(u'') - + def increment_downloads(self): """Increment the ordinal that assigns a number to each file.""" self._num_downloads += 1 @@ -441,7 +441,7 @@ class FileDownloader(object): print info_dict['description'].encode(preferredencoding(), 'xmlcharrefreplace') return - + try: template_dict = dict(info_dict) template_dict['epoch'] = unicode(long(time.time())) @@ -512,7 +512,7 @@ class FileDownloader(object): info = pp.run(info) if info is None: break - + def _download_with_rtmpdump(self, filename, url, player_url): self.report_destination(filename) tmpfilename = self.temp_name(filename) @@ -730,7 +730,7 @@ class InfoExtractor(object): def set_downloader(self, downloader): """Sets the downloader for this IE.""" self._downloader = downloader - + def _real_initialize(self): """Real initialization process. Redefine in subclasses.""" pass @@ -771,31 +771,31 @@ class YoutubeIE(InfoExtractor): def report_login(self): """Report attempt to log in.""" self._downloader.to_screen(u'[youtube] Logging in') - + def report_age_confirmation(self): """Report attempt to confirm age.""" self._downloader.to_screen(u'[youtube] Confirming age') - + def report_video_webpage_download(self, video_id): """Report attempt to download video webpage.""" self._downloader.to_screen(u'[youtube] %s: Downloading video webpage' % video_id) - + def report_video_info_webpage_download(self, video_id): """Report attempt to download video info webpage.""" self._downloader.to_screen(u'[youtube] %s: Downloading video info webpage' % video_id) - + def report_information_extraction(self, video_id): """Report attempt to extract video information.""" self._downloader.to_screen(u'[youtube] %s: Extracting video information' % video_id) - + def report_unavailable_format(self, video_id, format): """Report extracted video URL.""" self._downloader.to_screen(u'[youtube] %s: Format %s not available' % (video_id, format)) - + def report_rtmp_download(self): """Indicate the download will use the RTMP protocol.""" self._downloader.to_screen(u'[youtube] RTMP download detected') - + def _real_initialize(self): if self._downloader is None: return @@ -851,7 +851,7 @@ class YoutubeIE(InfoExtractor): except (urllib2.URLError, httplib.HTTPException, socket.error), err: self._downloader.to_stderr(u'WARNING: unable to log in: %s' % str(err)) return - + # Confirm age age_form = { 'next_url': '/', @@ -1043,11 +1043,11 @@ class MetacafeIE(InfoExtractor): def report_age_confirmation(self): """Report attempt to confirm age.""" self._downloader.to_screen(u'[metacafe] Confirming age') - + def report_download_webpage(self, video_id): """Report webpage download.""" self._downloader.to_screen(u'[metacafe] %s: Downloading webpage' % video_id) - + def report_extraction(self, video_id): """Report information extraction.""" self._downloader.to_screen(u'[metacafe] %s: Extracting information' % video_id) @@ -1074,7 +1074,7 @@ class MetacafeIE(InfoExtractor): except (urllib2.URLError, httplib.HTTPException, socket.error), err: self._downloader.trouble(u'ERROR: unable to confirm age: %s' % str(err)) return - + def _real_extract(self, url): # Extract id and simplified title from URL mobj = re.match(self._VALID_URL, url) @@ -1110,7 +1110,7 @@ class MetacafeIE(InfoExtractor): if mobj is not None: mediaURL = urllib.unquote(mobj.group(1)) video_extension = mediaURL[-3:] - + # Extract gdaKey if available mobj = re.search(r'(?m)&gdaKey=(.*?)&', webpage) if mobj is None: @@ -1180,7 +1180,7 @@ class DailymotionIE(InfoExtractor): def report_download_webpage(self, video_id): """Report webpage download.""" self._downloader.to_screen(u'[dailymotion] %s: Downloading webpage' % video_id) - + def report_extraction(self, video_id): """Report information extraction.""" self._downloader.to_screen(u'[dailymotion] %s: Extracting information' % video_id) @@ -1717,7 +1717,7 @@ class YoutubeSearchIE(InfoExtractor): def __init__(self, youtube_ie, downloader=None): InfoExtractor.__init__(self, downloader) self._youtube_ie = youtube_ie - + @staticmethod def suitable(url): return (re.match(YoutubeSearchIE._VALID_QUERY, url) is not None) @@ -1729,7 +1729,7 @@ class YoutubeSearchIE(InfoExtractor): def _real_initialize(self): self._youtube_ie.initialize() - + def _real_extract(self, query): mobj = re.match(self._VALID_QUERY, query) if mobj is None: @@ -1808,7 +1808,7 @@ class GoogleSearchIE(InfoExtractor): def __init__(self, google_ie, downloader=None): InfoExtractor.__init__(self, downloader) self._google_ie = google_ie - + @staticmethod def suitable(url): return (re.match(GoogleSearchIE._VALID_QUERY, url) is not None) @@ -1820,7 +1820,7 @@ class GoogleSearchIE(InfoExtractor): def _real_initialize(self): self._google_ie.initialize() - + def _real_extract(self, query): mobj = re.match(self._VALID_QUERY, query) if mobj is None: @@ -1899,7 +1899,7 @@ class YahooSearchIE(InfoExtractor): def __init__(self, yahoo_ie, downloader=None): InfoExtractor.__init__(self, downloader) self._yahoo_ie = yahoo_ie - + @staticmethod def suitable(url): return (re.match(YahooSearchIE._VALID_QUERY, url) is not None) @@ -1911,7 +1911,7 @@ class YahooSearchIE(InfoExtractor): def _real_initialize(self): self._yahoo_ie.initialize() - + def _real_extract(self, query): mobj = re.match(self._VALID_QUERY, query) if mobj is None: @@ -1990,7 +1990,7 @@ class YoutubePlaylistIE(InfoExtractor): def __init__(self, youtube_ie, downloader=None): InfoExtractor.__init__(self, downloader) self._youtube_ie = youtube_ie - + @staticmethod def suitable(url): return (re.match(YoutubePlaylistIE._VALID_URL, url) is not None) @@ -2001,7 +2001,7 @@ class YoutubePlaylistIE(InfoExtractor): def _real_initialize(self): self._youtube_ie.initialize() - + def _real_extract(self, url): # Extract playlist id mobj = re.match(self._VALID_URL, url) @@ -2053,7 +2053,7 @@ class YoutubeUserIE(InfoExtractor): def __init__(self, youtube_ie, downloader=None): InfoExtractor.__init__(self, downloader) self._youtube_ie = youtube_ie - + @staticmethod def suitable(url): return (re.match(YoutubeUserIE._VALID_URL, url) is not None) @@ -2064,7 +2064,7 @@ class YoutubeUserIE(InfoExtractor): def _real_initialize(self): self._youtube_ie.initialize() - + def _real_extract(self, url): # Extract username mobj = re.match(self._VALID_URL, url) @@ -2205,7 +2205,7 @@ class PostProcessor(object): def set_downloader(self, downloader): """Sets the downloader for this PP.""" self._downloader = downloader - + def run(self, information): """Run the PostProcessor. @@ -2225,7 +2225,7 @@ class PostProcessor(object): it was called from. """ return information # by default, do nothing - + ### MAIN PROGRAM ### if __name__ == '__main__': try: From 3fb2c487c05cb9998a7ae6fb1dc8ca2c4d25a9aa Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Fri, 7 Jan 2011 10:23:18 +0100 Subject: [PATCH 292/455] Add --no-part option (closes #48) --- youtube-dl | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/youtube-dl b/youtube-dl index 6a648e723..b2ef1251a 100755 --- a/youtube-dl +++ b/youtube-dl @@ -210,6 +210,7 @@ class FileDownloader(object): playlistend: Playlist item to end at. logtostderr: Log messages to stderr instead of stdout. consoletitle: Display progress in console window's titlebar. + nopart: Do not use temporary .part files. """ params = None @@ -237,14 +238,7 @@ class FileDownloader(object): for dir in aggregate: if not os.path.exists(dir): os.mkdir(dir) - - @staticmethod - def temp_name(filename): - """Returns a temporary filename for the given filename.""" - if filename == u'-' or (os.path.exists(filename) and not os.path.isfile(filename)): - return filename - return filename + u'.part' - + @staticmethod def format_bytes(bytes): if bytes is None: @@ -374,7 +368,14 @@ class FileDownloader(object): speed = float(byte_counter) / elapsed if speed > rate_limit: time.sleep((byte_counter - rate_limit * (now - start_time)) / rate_limit) - + + def temp_name(self, filename): + """Returns a temporary filename for the given filename.""" + if self.params.get('nopart', False) or filename == u'-' or \ + (os.path.exists(filename) and not os.path.isfile(filename)): + return filename + return filename + u'.part' + def try_rename(self, old_filename, new_filename): try: if old_filename == new_filename: @@ -547,7 +548,7 @@ class FileDownloader(object): def _do_download(self, filename, url, player_url): # Check file already present - if self.params.get('continuedl', False) and os.path.isfile(filename): + if self.params.get('continuedl', False) and os.path.isfile(filename) and not self.params.get('nopart', False): self.report_file_already_downloaded(filename) return True @@ -2329,6 +2330,8 @@ if __name__ == '__main__': action='store_true', dest='continue_dl', help='resume partially downloaded files', default=False) filesystem.add_option('--cookies', dest='cookiefile', metavar='FILE', help='file to dump cookie jar to') + filesystem.add_option('--no-part', + action='store_true', dest='nopart', help='do not use .part files', default=False) parser.add_option_group(filesystem) (opts, args) = parser.parse_args() @@ -2452,6 +2455,7 @@ if __name__ == '__main__': 'playlistend': opts.playlistend, 'logtostderr': opts.outtmpl == '-', 'consoletitle': opts.consoletitle, + 'nopart': opts.nopart, }) fd.add_info_extractor(youtube_search_ie) fd.add_info_extractor(youtube_pl_ie) From aac3fe0f4adaad809d25e335c17711c23d51eec0 Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Mon, 10 Jan 2011 12:54:14 +0100 Subject: [PATCH 293/455] Fix bug in regular expression for youtu.be links --- youtube-dl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube-dl b/youtube-dl index b2ef1251a..a09b498ba 100755 --- a/youtube-dl +++ b/youtube-dl @@ -743,7 +743,7 @@ class InfoExtractor(object): class YoutubeIE(InfoExtractor): """Information extractor for youtube.com.""" - _VALID_URL = r'^((?:https?://)?(?:youtu\.be/|(?:\w+\.)?youtube(?:-nocookie)?\.com/(?:(?:v/)|(?:(?:watch(?:_popup)?(?:\.php)?)?(?:\?|#!?)(?:.+&)?v=))))?([0-9A-Za-z_-]+)(?(1).+)?$' + _VALID_URL = r'^((?:https?://)?(?:youtu\.be/|(?:\w+\.)?youtube(?:-nocookie)?\.com/)(?:(?:v/)|(?:(?:watch(?:_popup)?(?:\.php)?)?(?:\?|#!?)(?:.+&)?v=)))?([0-9A-Za-z_-]+)(?(1).+)?$' _LANG_URL = r'http://www.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1' _LOGIN_URL = 'https://www.youtube.com/signup?next=/&gl=US&hl=en' _AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en' From 1987c2325a9764e24104fa8da23b467a5e33cf49 Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Wed, 12 Jan 2011 20:20:37 +0100 Subject: [PATCH 294/455] Add proper support for "gzip" and "deflate" encodings --- youtube-dl | 89 +++++++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 72 insertions(+), 17 deletions(-) diff --git a/youtube-dl b/youtube-dl index a09b498ba..13d42765b 100755 --- a/youtube-dl +++ b/youtube-dl @@ -8,6 +8,7 @@ import cookielib import ctypes import datetime +import gzip import htmlentitydefs import httplib import locale @@ -18,11 +19,13 @@ import os.path import re import socket import string +import StringIO import subprocess import sys import time import urllib import urllib2 +import zlib # parse_qs was moved from the cgi module to the urlparse module recently. try: @@ -161,6 +164,56 @@ class ContentTooShortError(Exception): self.downloaded = downloaded self.expected = expected +class YoutubeDLHandler(urllib2.HTTPHandler): + """Handler for HTTP requests and responses. + + This class, when installed with an OpenerDirector, automatically adds + the standard headers to every HTTP request and handles gzipped and + deflated responses from web servers. If compression is to be avoided in + a particular request, the original request in the program code only has + to include the HTTP header "Youtubedl-No-Compression", which will be + removed before making the real request. + + Part of this code was copied from: + + http://techknack.net/python-urllib2-handlers/ + + Andrew Rowls, the author of that code, agreed to release it to the + public domain. + """ + + @staticmethod + def deflate(data): + try: + return zlib.decompress(data, -zlib.MAX_WBITS) + except zlib.error: + return zlib.decompress(data) + + def http_request(self, req): + for h in std_headers: + if h in req.headers: + del req.headers[h] + req.add_header(h, std_headers[h]) + if 'Youtubedl-no-compression' in req.headers: + if 'Accept-encoding' in req.headers: + del req.headers['Accept-encoding'] + del req.headers['Youtubedl-no-compression'] + return req + + def http_response(self, req, resp): + old_resp = resp + # gzip + if resp.headers.get('Content-encoding', '') == 'gzip': + gz = gzip.GzipFile(fileobj=StringIO.StringIO(resp.read()), mode='r') + resp = urllib2.addinfourl(gz, old_resp.headers, old_resp.url, old_resp.code) + resp.msg = old_resp.msg + # deflate + if resp.headers.get('Content-encoding', '') == 'deflate': + gz = StringIO.StringIO(self.deflate(resp.read())) + resp = urllib2.addinfourl(gz, old_resp.headers, old_resp.url, old_resp.code) + resp.msg = old_resp.msg + return resp + class FileDownloader(object): """File Downloader class. @@ -559,8 +612,11 @@ class FileDownloader(object): tmpfilename = self.temp_name(filename) stream = None open_mode = 'wb' - basic_request = urllib2.Request(url, None, std_headers) - request = urllib2.Request(url, None, std_headers) + + # Do not include the Accept-Encoding header + headers = {'Youtubedl-no-compression': 'True'} + basic_request = urllib2.Request(url, None, headers) + request = urllib2.Request(url, None, headers) # Establish possible resume length if os.path.isfile(tmpfilename): @@ -822,7 +878,7 @@ class YoutubeIE(InfoExtractor): return # Set language - request = urllib2.Request(self._LANG_URL, None, std_headers) + request = urllib2.Request(self._LANG_URL) try: self.report_lang() urllib2.urlopen(request).read() @@ -842,7 +898,7 @@ class YoutubeIE(InfoExtractor): 'username': username, 'password': password, } - request = urllib2.Request(self._LOGIN_URL, urllib.urlencode(login_form), std_headers) + request = urllib2.Request(self._LOGIN_URL, urllib.urlencode(login_form)) try: self.report_login() login_results = urllib2.urlopen(request).read() @@ -858,7 +914,7 @@ class YoutubeIE(InfoExtractor): 'next_url': '/', 'action_confirm': 'Confirm', } - request = urllib2.Request(self._AGE_URL, urllib.urlencode(age_form), std_headers) + request = urllib2.Request(self._AGE_URL, urllib.urlencode(age_form)) try: self.report_age_confirmation() age_results = urllib2.urlopen(request).read() @@ -876,7 +932,7 @@ class YoutubeIE(InfoExtractor): # Get video webpage self.report_video_webpage_download(video_id) - request = urllib2.Request('http://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1' % video_id, None, std_headers) + request = urllib2.Request('http://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1' % video_id) try: video_webpage = urllib2.urlopen(request).read() except (urllib2.URLError, httplib.HTTPException, socket.error), err: @@ -895,7 +951,7 @@ class YoutubeIE(InfoExtractor): for el_type in ['&el=embedded', '&el=detailpage', '&el=vevo', '']: video_info_url = ('http://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en' % (video_id, el_type)) - request = urllib2.Request(video_info_url, None, std_headers) + request = urllib2.Request(video_info_url) try: video_info_webpage = urllib2.urlopen(request).read() video_info = parse_qs(video_info_webpage) @@ -1055,7 +1111,7 @@ class MetacafeIE(InfoExtractor): def _real_initialize(self): # Retrieve disclaimer - request = urllib2.Request(self._DISCLAIMER, None, std_headers) + request = urllib2.Request(self._DISCLAIMER) try: self.report_disclaimer() disclaimer = urllib2.urlopen(request).read() @@ -1068,7 +1124,7 @@ class MetacafeIE(InfoExtractor): 'filters': '0', 'submit': "Continue - I'm over 18", } - request = urllib2.Request(self._FILTER_POST, urllib.urlencode(disclaimer_form), std_headers) + request = urllib2.Request(self._FILTER_POST, urllib.urlencode(disclaimer_form)) try: self.report_age_confirmation() disclaimer = urllib2.urlopen(request).read() @@ -1771,7 +1827,7 @@ class YoutubeSearchIE(InfoExtractor): while True: self.report_download_page(query, pagenum) result_url = self._TEMPLATE_URL % (urllib.quote_plus(query), pagenum) - request = urllib2.Request(result_url, None, std_headers) + request = urllib2.Request(result_url) try: page = urllib2.urlopen(request).read() except (urllib2.URLError, httplib.HTTPException, socket.error), err: @@ -1862,7 +1918,7 @@ class GoogleSearchIE(InfoExtractor): while True: self.report_download_page(query, pagenum) result_url = self._TEMPLATE_URL % (urllib.quote_plus(query), pagenum) - request = urllib2.Request(result_url, None, std_headers) + request = urllib2.Request(result_url) try: page = urllib2.urlopen(request).read() except (urllib2.URLError, httplib.HTTPException, socket.error), err: @@ -1953,7 +2009,7 @@ class YahooSearchIE(InfoExtractor): while True: self.report_download_page(query, pagenum) result_url = self._TEMPLATE_URL % (urllib.quote_plus(query), pagenum) - request = urllib2.Request(result_url, None, std_headers) + request = urllib2.Request(result_url) try: page = urllib2.urlopen(request).read() except (urllib2.URLError, httplib.HTTPException, socket.error), err: @@ -2017,7 +2073,7 @@ class YoutubePlaylistIE(InfoExtractor): while True: self.report_download_page(playlist_id, pagenum) - request = urllib2.Request(self._TEMPLATE_URL % (playlist_id, pagenum), None, std_headers) + request = urllib2.Request(self._TEMPLATE_URL % (playlist_id, pagenum)) try: page = urllib2.urlopen(request).read() except (urllib2.URLError, httplib.HTTPException, socket.error), err: @@ -2079,7 +2135,7 @@ class YoutubeUserIE(InfoExtractor): pagenum = 1 self.report_download_page(username) - request = urllib2.Request(self._TEMPLATE_URL % (username), None, std_headers) + request = urllib2.Request(self._TEMPLATE_URL % (username)) try: page = urllib2.urlopen(request).read() except (urllib2.URLError, httplib.HTTPException, socket.error), err: @@ -2135,7 +2191,7 @@ class DepositFilesIE(InfoExtractor): # Retrieve file webpage with 'Free download' button pressed free_download_indication = { 'gateway_result' : '1' } - request = urllib2.Request(url, urllib.urlencode(free_download_indication), std_headers) + request = urllib2.Request(url, urllib.urlencode(free_download_indication)) try: self.report_download_webpage(file_id) webpage = urllib2.urlopen(request).read() @@ -2354,8 +2410,7 @@ if __name__ == '__main__': # General configuration cookie_processor = urllib2.HTTPCookieProcessor(jar) - urllib2.install_opener(urllib2.build_opener(urllib2.ProxyHandler())) - urllib2.install_opener(urllib2.build_opener(cookie_processor)) + urllib2.install_opener(urllib2.build_opener(urllib2.ProxyHandler(), cookie_processor, YoutubeDLHandler())) socket.setdefaulttimeout(300) # 5 minutes should be enough (famous last words) # Batch file verification From 8cc42e7c1a5bc05a6ca753c0f92732c04ac48ab7 Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Wed, 12 Jan 2011 20:21:43 +0100 Subject: [PATCH 295/455] Fix "unable to rename file" Windows error (closes #56) --- youtube-dl | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/youtube-dl b/youtube-dl index 13d42765b..c49c1b064 100755 --- a/youtube-dl +++ b/youtube-dl @@ -429,6 +429,11 @@ class FileDownloader(object): return filename return filename + u'.part' + def undo_temp_name(self, filename): + if filename.endswith(u'.part'): + return filename[:-len(u'.part')] + return filename + def try_rename(self, old_filename, new_filename): try: if old_filename == new_filename: @@ -698,6 +703,7 @@ class FileDownloader(object): if stream is None: try: (stream, tmpfilename) = sanitize_open(tmpfilename, open_mode) + filename = self.undo_temp_name(tmpfilename) self.report_destination(filename) except (OSError, IOError), err: self.trouble(u'ERROR: unable to open for writing: %s' % str(err)) From 0d8d9877ad7dc7ba893f7e37382add01a01c3d0a Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Wed, 12 Jan 2011 20:22:37 +0100 Subject: [PATCH 296/455] Add support for embedded YouTube playist URLs (closes #54) --- youtube-dl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube-dl b/youtube-dl index c49c1b064..32e334ce4 100755 --- a/youtube-dl +++ b/youtube-dl @@ -2044,7 +2044,7 @@ class YahooSearchIE(InfoExtractor): class YoutubePlaylistIE(InfoExtractor): """Information Extractor for YouTube playlists.""" - _VALID_URL = r'(?:http://)?(?:\w+\.)?youtube.com/(?:(?:view_play_list|my_playlists)\?.*?p=|user/.*?/user/)([^&]+).*' + _VALID_URL = r'(?:http://)?(?:\w+\.)?youtube.com/(?:(?:view_play_list|my_playlists)\?.*?p=|user/.*?/user/|p/)([^&]+).*' _TEMPLATE_URL = 'http://www.youtube.com/view_play_list?p=%s&page=%s&gl=US&hl=en' _VIDEO_INDICATOR = r'/watch\?v=(.+?)&' _MORE_PAGES_INDICATOR = r'(?m)>\s*Next\s*' From 0fe64c04f8ae57ace54404a8b8bfb5deff552e2d Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Wed, 12 Jan 2011 21:07:56 +0100 Subject: [PATCH 297/455] Make the self-updating function a bit more robust --- youtube-dl | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/youtube-dl b/youtube-dl index 32e334ce4..443bb211b 100755 --- a/youtube-dl +++ b/youtube-dl @@ -2296,20 +2296,26 @@ if __name__ == '__main__': import getpass import optparse - # Function to update the program file with the latest version from bitbucket.org + # Function to update the program file with the latest version from the repository. def update_self(downloader, filename): # Note: downloader only used for options - if not os.access (filename, os.W_OK): + if not os.access(filename, os.W_OK): sys.exit('ERROR: no write permissions on %s' % filename) downloader.to_screen('Updating to latest stable version...') - latest_url = 'http://github.com/rg3/youtube-dl/raw/master/LATEST_VERSION' - latest_version = urllib.urlopen(latest_url).read().strip() - prog_url = 'http://github.com/rg3/youtube-dl/raw/%s/youtube-dl' % latest_version - newcontent = urllib.urlopen(prog_url).read() - stream = open(filename, 'w') - stream.write(newcontent) - stream.close() + try: + latest_url = 'http://github.com/rg3/youtube-dl/raw/master/LATEST_VERSION' + latest_version = urllib.urlopen(latest_url).read().strip() + prog_url = 'http://github.com/rg3/youtube-dl/raw/%s/youtube-dl' % latest_version + newcontent = urllib.urlopen(prog_url).read() + except (IOError, OSError), err: + sys.exit('ERROR: unable to download latest version') + try: + stream = open(filename, 'w') + stream.write(newcontent) + stream.close() + except (IOError, OSError), err: + sys.exit('ERROR: unable to overwrite current version') downloader.to_screen('Updated to version %s' % latest_version) # Parse command line From 0d14e225fa031f37c34242551565b6dadbfa51af Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Tue, 18 Jan 2011 19:16:42 +0100 Subject: [PATCH 298/455] Omit code argument in addinfourl for Python 2.4 --- youtube-dl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube-dl b/youtube-dl index 443bb211b..8bdde1704 100755 --- a/youtube-dl +++ b/youtube-dl @@ -205,12 +205,12 @@ class YoutubeDLHandler(urllib2.HTTPHandler): # gzip if resp.headers.get('Content-encoding', '') == 'gzip': gz = gzip.GzipFile(fileobj=StringIO.StringIO(resp.read()), mode='r') - resp = urllib2.addinfourl(gz, old_resp.headers, old_resp.url, old_resp.code) + resp = urllib2.addinfourl(gz, old_resp.headers, old_resp.url) resp.msg = old_resp.msg # deflate if resp.headers.get('Content-encoding', '') == 'deflate': gz = StringIO.StringIO(self.deflate(resp.read())) - resp = urllib2.addinfourl(gz, old_resp.headers, old_resp.url, old_resp.code) + resp = urllib2.addinfourl(gz, old_resp.headers, old_resp.url) resp.msg = old_resp.msg return resp From 7b531c0be61b84884302b564a9fc89edde2cfc67 Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Tue, 18 Jan 2011 20:52:37 +0100 Subject: [PATCH 299/455] Wrap call to addinfourl for compatibility with Python 2.4 --- youtube-dl | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/youtube-dl b/youtube-dl index 8bdde1704..1b20025e7 100755 --- a/youtube-dl +++ b/youtube-dl @@ -189,6 +189,12 @@ class YoutubeDLHandler(urllib2.HTTPHandler): except zlib.error: return zlib.decompress(data) + @staticmethod + def addinfourl_wrapper(stream, headers, url, code): + if hasattr(urllib2.addinfourl, 'getcode'): + return urllib2.addinfourl(stream, headers, url, code) + return urllib2.addinfourl(stream, headers, url) + def http_request(self, req): for h in std_headers: if h in req.headers: @@ -205,12 +211,12 @@ class YoutubeDLHandler(urllib2.HTTPHandler): # gzip if resp.headers.get('Content-encoding', '') == 'gzip': gz = gzip.GzipFile(fileobj=StringIO.StringIO(resp.read()), mode='r') - resp = urllib2.addinfourl(gz, old_resp.headers, old_resp.url) + resp = self.addinfourl_wrapper(gz, old_resp.headers, old_resp.url, old_resp.code) resp.msg = old_resp.msg # deflate if resp.headers.get('Content-encoding', '') == 'deflate': gz = StringIO.StringIO(self.deflate(resp.read())) - resp = urllib2.addinfourl(gz, old_resp.headers, old_resp.url) + resp = self.addinfourl_wrapper(gz, old_resp.headers, old_resp.url, old_resp.code) resp.msg = old_resp.msg return resp From 0f6b00b587e9b47919555f9c250b753419063b46 Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Thu, 20 Jan 2011 20:36:42 +0100 Subject: [PATCH 300/455] Improve addinfourl_wrapper for compatibility with older Python versions --- youtube-dl | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/youtube-dl b/youtube-dl index 1b20025e7..103189b21 100755 --- a/youtube-dl +++ b/youtube-dl @@ -193,7 +193,9 @@ class YoutubeDLHandler(urllib2.HTTPHandler): def addinfourl_wrapper(stream, headers, url, code): if hasattr(urllib2.addinfourl, 'getcode'): return urllib2.addinfourl(stream, headers, url, code) - return urllib2.addinfourl(stream, headers, url) + ret = urllib2.addinfourl(stream, headers, url) + ret.code = code + return ret def http_request(self, req): for h in std_headers: From c02d8e404019975060b195b7c7b6fb39944e321b Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Fri, 21 Jan 2011 18:16:33 +0100 Subject: [PATCH 301/455] Fix dailymotion support (closes #60) --- youtube-dl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube-dl b/youtube-dl index 103189b21..869a32b0a 100755 --- a/youtube-dl +++ b/youtube-dl @@ -1302,7 +1302,7 @@ class DailymotionIE(InfoExtractor): video_title = mobj.group(1).decode('utf-8') video_title = sanitize_title(video_title) - mobj = re.search(r'(?im)
    .*?(.+?)', webpage) + mobj = re.search(r'(?im)(.+?)', webpage) if mobj is None: self._downloader.trouble(u'ERROR: unable to extract uploader nickname') return From 0f7099a59b0d39ee94ae0351c6091c31819d9b36 Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Fri, 21 Jan 2011 18:18:48 +0100 Subject: [PATCH 302/455] Fix omission of Witold Baryluk as the Dailymotion InfoExtractor author --- youtube-dl | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube-dl b/youtube-dl index 869a32b0a..99c9afe0a 100755 --- a/youtube-dl +++ b/youtube-dl @@ -4,6 +4,7 @@ # Author: Danny Colligan # Author: Benjamin Johnson # Author: Vasyl' Vavrychuk +# Author: Witold Baryluk # License: Public domain code import cookielib import ctypes From b940c84a245c27199b78956e283e6e2832bd51b4 Mon Sep 17 00:00:00 2001 From: Lionel Elie Mamane Date: Tue, 25 Jan 2011 08:37:08 +0100 Subject: [PATCH 303/455] Support for youtube.com/embed/XXXXX URLs (closes #63) --- youtube-dl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube-dl b/youtube-dl index 99c9afe0a..640c8777d 100755 --- a/youtube-dl +++ b/youtube-dl @@ -814,7 +814,7 @@ class InfoExtractor(object): class YoutubeIE(InfoExtractor): """Information extractor for youtube.com.""" - _VALID_URL = r'^((?:https?://)?(?:youtu\.be/|(?:\w+\.)?youtube(?:-nocookie)?\.com/)(?:(?:v/)|(?:(?:watch(?:_popup)?(?:\.php)?)?(?:\?|#!?)(?:.+&)?v=)))?([0-9A-Za-z_-]+)(?(1).+)?$' + _VALID_URL = r'^((?:https?://)?(?:youtu\.be/|(?:\w+\.)?youtube(?:-nocookie)?\.com/)(?:(?:(?:v|embed)/)|(?:(?:watch(?:_popup)?(?:\.php)?)?(?:\?|#!?)(?:.+&)?v=)))?([0-9A-Za-z_-]+)(?(1).+)?$' _LANG_URL = r'http://www.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1' _LOGIN_URL = 'https://www.youtube.com/signup?next=/&gl=US&hl=en' _AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en' From 9f7963468bf7f19e0cd2e11ed3ed2829f5c68b78 Mon Sep 17 00:00:00 2001 From: Gergely Imreh Date: Tue, 25 Jan 2011 11:03:16 +0800 Subject: [PATCH 304/455] New option --get-filename to print output filename When using youtube-dl within scripts, it is very useful to know what will be the final output filename with all the title settings and filename templates applied. Add option to the quiet mode operations to print that info. For this I had to move the filename-generation into its own function. As much as I can tell it should work almost always well, ie. not to break things if one not actually interested in the title, like in case of other forced printing. That is, unless there's an invalid system charset or the user specified a wrong output template. In that case probably could be assumed that the user does have a problem (the former) or did want to mess with the filename (the latter). Signed-off-by: Gergely Imreh --- youtube-dl | 31 ++++++++++++++++++++++--------- 1 file changed, 22 insertions(+), 9 deletions(-) diff --git a/youtube-dl b/youtube-dl index 640c8777d..be859a5a1 100755 --- a/youtube-dl +++ b/youtube-dl @@ -258,6 +258,7 @@ class FileDownloader(object): forcetitle: Force printing title. forcethumbnail: Force printing thumbnail URL. forcedescription: Force printing description. + forcefilename: Force printing final filename. simulate: Do not download the video files. format: Video format code. format_limit: Highest quality format to try. @@ -494,8 +495,21 @@ class FileDownloader(object): """Increment the ordinal that assigns a number to each file.""" self._num_downloads += 1 + def prepare_filename(self, info_dict): + """Generate the output filename.""" + try: + template_dict = dict(info_dict) + template_dict['epoch'] = unicode(long(time.time())) + template_dict['autonumber'] = unicode('%05d' % self._num_downloads) + filename = self.params['outtmpl'] % template_dict + return filename + except (ValueError, KeyError), err: + self.trouble(u'ERROR: invalid system charset or erroneous output template') + return None + def process_info(self, info_dict): """Process a single dictionary returned by an InfoExtractor.""" + filename = self.prepare_filename(info_dict) # Do nothing else if in simulate mode if self.params.get('simulate', False): # Forced printings @@ -507,16 +521,12 @@ class FileDownloader(object): print info_dict['thumbnail'].encode(preferredencoding(), 'xmlcharrefreplace') if self.params.get('forcedescription', False) and 'description' in info_dict: print info_dict['description'].encode(preferredencoding(), 'xmlcharrefreplace') + if self.params.get('forcefilename', False) and filename is not None: + print filename.encode(preferredencoding(), 'xmlcharrefreplace') return - try: - template_dict = dict(info_dict) - template_dict['epoch'] = unicode(long(time.time())) - template_dict['autonumber'] = unicode('%05d' % self._num_downloads) - filename = self.params['outtmpl'] % template_dict - except (ValueError, KeyError), err: - self.trouble(u'ERROR: invalid system charset or erroneous output template') + if filename is None: return if self.params.get('nooverwrites', False) and os.path.exists(filename): self.to_stderr(u'WARNING: file exists and will be skipped') @@ -2384,6 +2394,8 @@ if __name__ == '__main__': action='store_true', dest='getthumbnail', help='simulate, quiet but print thumbnail URL', default=False) verbosity.add_option('--get-description', action='store_true', dest='getdescription', help='simulate, quiet but print video description', default=False) + verbosity.add_option('--get-filename', + action='store_true', dest='getfilename', help='simulate, quiet but print output filename', default=False) verbosity.add_option('--no-progress', action='store_true', dest='noprogress', help='do not print progress bar', default=False) verbosity.add_option('--console-title', @@ -2503,12 +2515,13 @@ if __name__ == '__main__': 'usenetrc': opts.usenetrc, 'username': opts.username, 'password': opts.password, - 'quiet': (opts.quiet or opts.geturl or opts.gettitle or opts.getthumbnail or opts.getdescription), + 'quiet': (opts.quiet or opts.geturl or opts.gettitle or opts.getthumbnail or opts.getdescription or opts.getfilename), 'forceurl': opts.geturl, 'forcetitle': opts.gettitle, 'forcethumbnail': opts.getthumbnail, 'forcedescription': opts.getdescription, - 'simulate': (opts.simulate or opts.geturl or opts.gettitle or opts.getthumbnail or opts.getdescription), + 'forcefilename': opts.getfilename, + 'simulate': (opts.simulate or opts.geturl or opts.gettitle or opts.getthumbnail or opts.getdescription or opts.getfilename), 'format': opts.format, 'format_limit': opts.format_limit, 'outtmpl': ((opts.outtmpl is not None and opts.outtmpl.decode(preferredencoding())) From 09bd408c284771224db5665d937b62d6c36759fb Mon Sep 17 00:00:00 2001 From: Gergely Imreh Date: Thu, 27 Jan 2011 13:02:51 +0800 Subject: [PATCH 305/455] Set downloaded file's time stamp from last-modified header This file stamp setting is very relaxed. If there's any problem along the way (no last-modified header, bad time string format, no time set privileges,...) or if nothing is downloaded (e.g. using resumed download but the file was already complete) then nothing is done. --- youtube-dl | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/youtube-dl b/youtube-dl index be859a5a1..e28788e3c 100755 --- a/youtube-dl +++ b/youtube-dl @@ -9,6 +9,7 @@ import cookielib import ctypes import datetime +import email.utils import gzip import htmlentitydefs import httplib @@ -117,6 +118,14 @@ def sanitize_open(filename, open_mode): stream = open(filename, open_mode) return (stream, filename) +def timeconvert(timestr): + """Convert RFC 2822 defined time string into system timestamp""" + timestamp = None + timetuple = email.utils.parsedate_tz(timestr) + if timetuple is not None: + timestamp = email.utils.mktime_tz(timetuple) + return timestamp + class DownloadError(Exception): """Download Error exception. @@ -748,6 +757,15 @@ class FileDownloader(object): if data_len is not None and byte_counter != data_len: raise ContentTooShortError(byte_counter, long(data_len)) self.try_rename(tmpfilename, filename) + # Update file modification time + timestr = data.info().get('last-modified', None) + if timestr is not None: + filetime = timeconvert(timestr) + if filetime is not None: + try: + os.utime(filename,(time.time(), filetime)) + except: + pass return True class InfoExtractor(object): From e30189021db82c98019a9c055b9d81e4a7417b81 Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Fri, 28 Jan 2011 19:59:18 +0100 Subject: [PATCH 306/455] Make the file timestamp feature optional --- youtube-dl | 34 ++++++++++++++++++++++++++-------- 1 file changed, 26 insertions(+), 8 deletions(-) diff --git a/youtube-dl b/youtube-dl index e28788e3c..05869004b 100755 --- a/youtube-dl +++ b/youtube-dl @@ -283,6 +283,7 @@ class FileDownloader(object): logtostderr: Log messages to stderr instead of stdout. consoletitle: Display progress in console window's titlebar. nopart: Do not use temporary .part files. + updatetime: Use the Last-modified header to set output file timestamps. """ params = None @@ -460,6 +461,23 @@ class FileDownloader(object): os.rename(old_filename, new_filename) except (IOError, OSError), err: self.trouble(u'ERROR: unable to rename file') + + def try_utime(self, filename, last_modified_hdr): + """Try to set the last-modified time of the given file.""" + if last_modified_hdr is None: + return + if not os.path.isfile(filename): + return + timestr = last_modified_hdr + if timestr is None: + return + filetime = timeconvert(timestr) + if filetime is None: + return + try: + os.utime(filename,(time.time(), filetime)) + except: + pass def report_destination(self, filename): """Report destination filename.""" @@ -757,15 +775,11 @@ class FileDownloader(object): if data_len is not None and byte_counter != data_len: raise ContentTooShortError(byte_counter, long(data_len)) self.try_rename(tmpfilename, filename) + # Update file modification time - timestr = data.info().get('last-modified', None) - if timestr is not None: - filetime = timeconvert(timestr) - if filetime is not None: - try: - os.utime(filename,(time.time(), filetime)) - except: - pass + if self.params.get('updatetime', True): + self.try_utime(filename, data.info().get('last-modified', None)) + return True class InfoExtractor(object): @@ -2439,6 +2453,9 @@ if __name__ == '__main__': dest='cookiefile', metavar='FILE', help='file to dump cookie jar to') filesystem.add_option('--no-part', action='store_true', dest='nopart', help='do not use .part files', default=False) + filesystem.add_option('--no-mtime', + action='store_false', dest='updatetime', + help='do not use the Last-modified header to set the file modification time', default=True) parser.add_option_group(filesystem) (opts, args) = parser.parse_args() @@ -2563,6 +2580,7 @@ if __name__ == '__main__': 'logtostderr': opts.outtmpl == '-', 'consoletitle': opts.consoletitle, 'nopart': opts.nopart, + 'updatetime': opts.updatetime, }) fd.add_info_extractor(youtube_search_ie) fd.add_info_extractor(youtube_pl_ie) From 6025795d952958391a2325dbdcf41a6baae8d8f4 Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Fri, 28 Jan 2011 19:59:47 +0100 Subject: [PATCH 307/455] Split some long lines --- youtube-dl | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/youtube-dl b/youtube-dl index 05869004b..8dd03daf3 100755 --- a/youtube-dl +++ b/youtube-dl @@ -2393,7 +2393,8 @@ if __name__ == '__main__': parser.add_option('--playlist-end', dest='playlistend', metavar='NUMBER', help='playlist video to end at (default is last)', default=-1) parser.add_option('--dump-user-agent', - action='store_true', dest='dump_user_agent', help='display the current browser identification', default=False) + action='store_true', dest='dump_user_agent', + help='display the current browser identification', default=False) authentication = optparse.OptionGroup(parser, 'Authentication Options') authentication.add_option('-u', '--username', @@ -2423,15 +2424,19 @@ if __name__ == '__main__': verbosity.add_option('-e', '--get-title', action='store_true', dest='gettitle', help='simulate, quiet but print title', default=False) verbosity.add_option('--get-thumbnail', - action='store_true', dest='getthumbnail', help='simulate, quiet but print thumbnail URL', default=False) + action='store_true', dest='getthumbnail', + help='simulate, quiet but print thumbnail URL', default=False) verbosity.add_option('--get-description', - action='store_true', dest='getdescription', help='simulate, quiet but print video description', default=False) + action='store_true', dest='getdescription', + help='simulate, quiet but print video description', default=False) verbosity.add_option('--get-filename', - action='store_true', dest='getfilename', help='simulate, quiet but print output filename', default=False) + action='store_true', dest='getfilename', + help='simulate, quiet but print output filename', default=False) verbosity.add_option('--no-progress', action='store_true', dest='noprogress', help='do not print progress bar', default=False) verbosity.add_option('--console-title', - action='store_true', dest='consoletitle', help='display progress in console titlebar', default=False) + action='store_true', dest='consoletitle', + help='display progress in console titlebar', default=False) parser.add_option_group(verbosity) filesystem = optparse.OptionGroup(parser, 'Filesystem Options') @@ -2440,7 +2445,8 @@ if __name__ == '__main__': filesystem.add_option('-l', '--literal', action='store_true', dest='useliteral', help='use literal title in file name', default=False) filesystem.add_option('-A', '--auto-number', - action='store_true', dest='autonumber', help='number downloaded files starting from 00000', default=False) + action='store_true', dest='autonumber', + help='number downloaded files starting from 00000', default=False) filesystem.add_option('-o', '--output', dest='outtmpl', metavar='TEMPLATE', help='output filename template') filesystem.add_option('-a', '--batch-file', From e0edf1e041caa32195e1d9bcb3966dae2fbc39c7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rog=C3=A9rio=20Brito?= Date: Tue, 23 Nov 2010 21:20:26 -0200 Subject: [PATCH 308/455] Give preference to WebM formats. This patch gives preference to formats that are Free. --- youtube-dl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube-dl b/youtube-dl index 24722d292..cd1e9fc86 100755 --- a/youtube-dl +++ b/youtube-dl @@ -719,7 +719,7 @@ class YoutubeIE(InfoExtractor): _AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en' _NETRC_MACHINE = 'youtube' # Listed in order of quality - _available_formats = ['38', '37', '22', '45', '35', '34', '43', '18', '6', '5', '17', '13'] + _available_formats = ['38', '37', '45', '22', '43', '35', '34', '18', '6', '5', '17', '13'] _video_extensions = { '13': '3gp', '17': 'mp4', From 92743d423a7dfaf0f803deab14475e6343091f20 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rog=C3=A9rio=20Brito?= Date: Thu, 25 Nov 2010 04:24:45 -0200 Subject: [PATCH 309/455] Preliminary downloading from vimeo --- youtube-dl | 114 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 114 insertions(+) diff --git a/youtube-dl b/youtube-dl index 8dd03daf3..edd1d3f29 100755 --- a/youtube-dl +++ b/youtube-dl @@ -1718,6 +1718,118 @@ class YahooIE(InfoExtractor): self._downloader.trouble(u'\nERROR: unable to download video') +class VimeoIE(InfoExtractor): + """Information extractor for vimeo.com.""" + + # _VALID_URL matches Vimeo URLs + _VALID_URL = r'(?:http://)?vimeo\.com/([0-9]+)' + + def __init__(self, downloader=None): + InfoExtractor.__init__(self, downloader) + + @staticmethod + def suitable(url): + return (re.match(VimeoIE._VALID_URL, url) is not None) + + def report_download_webpage(self, video_id): + """Report webpage download.""" + self._downloader.to_screen(u'[video.vimeo] %s: Downloading webpage' % video_id) + + def report_extraction(self, video_id): + """Report information extraction.""" + self._downloader.to_screen(u'[video.vimeo] %s: Extracting information' % video_id) + + def _real_initialize(self): + return + + def _real_extract(self, url, new_video=True): + # Extract ID from URL + mobj = re.match(self._VALID_URL, url) + if mobj is None: + self._downloader.trouble(u'ERROR: Invalid URL: %s' % url) + return + + # At this point we have a new video + self._downloader.increment_downloads() + video_id = mobj.group(1) + video_extension = 'flv' # FIXME + + # Retrieve video webpage to extract further information + request = urllib2.Request("http://vimeo.com/moogaloop/load/clip:%s" % video_id, None, std_headers) + try: + self.report_download_webpage(video_id) + webpage = urllib2.urlopen(request).read() + except (urllib2.URLError, httplib.HTTPException, socket.error), err: + self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err)) + return + + # Extract uploader and title from webpage + self.report_extraction(video_id) + mobj = re.search(r'(.*)', webpage) + if mobj is None: + self._downloader.trouble(u'ERROR: unable to extract video title') + return + video_title = mobj.group(1).decode('utf-8') + simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title) + + mobj = re.search(r'http://vimeo.com/(.*)', webpage) + if mobj is None: + self._downloader.trouble(u'ERROR: unable to extract video uploader') + return + video_uploader = mobj.group(1).decode('utf-8') + + # Extract video thumbnail + mobj = re.search(r'(.*)', webpage) + if mobj is None: + self._downloader.trouble(u'ERROR: unable to extract video thumbnail') + return + video_thumbnail = mobj.group(1).decode('utf-8') + + # # Extract video description + # mobj = re.search(r'', webpage) + # if mobj is None: + # self._downloader.trouble(u'ERROR: unable to extract video description') + # return + # video_description = mobj.group(1).decode('utf-8') + # if not video_description: video_description = 'No description available.' + video_description = 'Foo.' + + # Extract request signature + mobj = re.search(r'(.*)', webpage) + if mobj is None: + self._downloader.trouble(u'ERROR: unable to extract request signature') + return + sig = mobj.group(1).decode('utf-8') + + # Extract request signature expiration + mobj = re.search(r'(.*)', webpage) + if mobj is None: + self._downloader.trouble(u'ERROR: unable to extract request signature expiration') + return + sig_exp = mobj.group(1).decode('utf-8') + + video_url = "http://vimeo.com/moogaloop/play/clip:%s/%s/%s" % (video_id, sig, sig_exp) + + try: + # Process video information + self._downloader.process_info({ + 'id': video_id.decode('utf-8'), + 'url': video_url, + 'uploader': video_uploader, + 'upload_date': u'NA', + 'title': video_title, + 'stitle': simple_title, + 'ext': video_extension.decode('utf-8'), + 'thumbnail': video_thumbnail.decode('utf-8'), + 'description': video_description, + 'thumbnail': video_thumbnail, + 'description': video_description, + 'player_url': None, + }) + except UnavailableVideoError: + self._downloader.trouble(u'ERROR: unable to download video') + + class GenericIE(InfoExtractor): """Generic last-resort information extractor.""" @@ -2537,6 +2649,7 @@ if __name__ == '__main__': parser.error(u'invalid playlist end number specified') # Information extractors + vimeo_ie = VimeoIE() youtube_ie = YoutubeIE() metacafe_ie = MetacafeIE(youtube_ie) dailymotion_ie = DailymotionIE() @@ -2588,6 +2701,7 @@ if __name__ == '__main__': 'nopart': opts.nopart, 'updatetime': opts.updatetime, }) + fd.add_info_extractor(vimeo_ie) fd.add_info_extractor(youtube_search_ie) fd.add_info_extractor(youtube_pl_ie) fd.add_info_extractor(youtube_user_ie) From c5a088d341e3aeaf65fbca02523c02ff3bccee6e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rog=C3=A9rio=20Brito?= Date: Sat, 29 Jan 2011 04:13:54 -0200 Subject: [PATCH 310/455] Use non-greedy regexps, for safety. Since I was very lazy when I coded this, I took the fastest route. Luckily, Vasyl' Vavrychuk pointed this out and I went (after many months) and just did some minor changes. --- youtube-dl | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/youtube-dl b/youtube-dl index edd1d3f29..e7459062d 100755 --- a/youtube-dl +++ b/youtube-dl @@ -1765,21 +1765,21 @@ class VimeoIE(InfoExtractor): # Extract uploader and title from webpage self.report_extraction(video_id) - mobj = re.search(r'(.*)', webpage) + mobj = re.search(r'(.*?)', webpage) if mobj is None: self._downloader.trouble(u'ERROR: unable to extract video title') return video_title = mobj.group(1).decode('utf-8') simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title) - mobj = re.search(r'http://vimeo.com/(.*)', webpage) + mobj = re.search(r'http://vimeo.com/(.*?)', webpage) if mobj is None: self._downloader.trouble(u'ERROR: unable to extract video uploader') return video_uploader = mobj.group(1).decode('utf-8') # Extract video thumbnail - mobj = re.search(r'(.*)', webpage) + mobj = re.search(r'(.*?)', webpage) if mobj is None: self._downloader.trouble(u'ERROR: unable to extract video thumbnail') return @@ -1795,14 +1795,14 @@ class VimeoIE(InfoExtractor): video_description = 'Foo.' # Extract request signature - mobj = re.search(r'(.*)', webpage) + mobj = re.search(r'(.*?)', webpage) if mobj is None: self._downloader.trouble(u'ERROR: unable to extract request signature') return sig = mobj.group(1).decode('utf-8') # Extract request signature expiration - mobj = re.search(r'(.*)', webpage) + mobj = re.search(r'(.*?)', webpage) if mobj is None: self._downloader.trouble(u'ERROR: unable to extract request signature expiration') return From 5aba6ea4fe6ad227d64a7e8b487d7cd7c3ad1f11 Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Sat, 29 Jan 2011 11:55:20 +0100 Subject: [PATCH 311/455] =?UTF-8?q?Add=20YoutubeUserIE=20(code=20courtesy?= =?UTF-8?q?=20of=20Pawe=C5=82=20Paprota)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- youtube-dl | 79 ++++++++++++++++++++++++++++++++++++++---------------- 1 file changed, 56 insertions(+), 23 deletions(-) diff --git a/youtube-dl b/youtube-dl index 8dd03daf3..c8c2ea88e 100755 --- a/youtube-dl +++ b/youtube-dl @@ -5,6 +5,7 @@ # Author: Benjamin Johnson # Author: Vasyl' Vavrychuk # Author: Witold Baryluk +# Author: Paweł Paprota # License: Public domain code import cookielib import ctypes @@ -2159,9 +2160,11 @@ class YoutubePlaylistIE(InfoExtractor): class YoutubeUserIE(InfoExtractor): """Information Extractor for YouTube users.""" - _VALID_URL = r'(?:http://)?(?:\w+\.)?youtube.com/user/(.*)' + _VALID_URL = r'(?:(?:(?:http://)?(?:\w+\.)?youtube.com/user/)|ytuser:)([A-Za-z0-9_-]+)' _TEMPLATE_URL = 'http://gdata.youtube.com/feeds/api/users/%s' - _VIDEO_INDICATOR = r'http://gdata.youtube.com/feeds/api/videos/(.*)' # XXX Fix this. + _GDATA_PAGE_SIZE = 50 + _GDATA_URL = 'http://gdata.youtube.com/feeds/api/users/%s/uploads?max-results=%d&start-index=%d' + _VIDEO_INDICATOR = r'/watch\?v=(.+?)&' _youtube_ie = None def __init__(self, youtube_ie, downloader=None): @@ -2172,9 +2175,10 @@ class YoutubeUserIE(InfoExtractor): def suitable(url): return (re.match(YoutubeUserIE._VALID_URL, url) is not None) - def report_download_page(self, username): + def report_download_page(self, username, start_index): """Report attempt to download user page.""" - self._downloader.to_screen(u'[youtube] user %s: Downloading page ' % (username)) + self._downloader.to_screen(u'[youtube] user %s: Downloading video ids from %d to %d' % + (username, start_index, start_index + self._GDATA_PAGE_SIZE)) def _real_initialize(self): self._youtube_ie.initialize() @@ -2186,34 +2190,63 @@ class YoutubeUserIE(InfoExtractor): self._downloader.trouble(u'ERROR: invalid url: %s' % url) return - # Download user page username = mobj.group(1) + + # Download video ids using YouTube Data API. Result size per + # query is limited (currently to 50 videos) so we need to query + # page by page until there are no video ids - it means we got + # all of them. + video_ids = [] - pagenum = 1 + pagenum = 0 - self.report_download_page(username) - request = urllib2.Request(self._TEMPLATE_URL % (username)) - try: - page = urllib2.urlopen(request).read() - except (urllib2.URLError, httplib.HTTPException, socket.error), err: - self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err)) - return + while True: + start_index = pagenum * self._GDATA_PAGE_SIZE + 1 + self.report_download_page(username, start_index) - # Extract video identifiers - ids_in_page = [] + request = urllib2.Request(self._GDATA_URL % (username, self._GDATA_PAGE_SIZE, start_index)) - for mobj in re.finditer(self._VIDEO_INDICATOR, page): - if mobj.group(1) not in ids_in_page: - ids_in_page.append(mobj.group(1)) - video_ids.extend(ids_in_page) + try: + page = urllib2.urlopen(request).read() + except (urllib2.URLError, httplib.HTTPException, socket.error), err: + self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err)) + return + # Extract video identifiers + ids_in_page = [] + + for mobj in re.finditer(self._VIDEO_INDICATOR, page): + if mobj.group(1) not in ids_in_page: + ids_in_page.append(mobj.group(1)) + + video_ids.extend(ids_in_page) + + # A little optimization - if current page is not + # "full", ie. does not contain PAGE_SIZE video ids then + # we can assume that this page is the last one - there + # are no more ids on further pages - no need to query + # again. + + if len(ids_in_page) < self._GDATA_PAGE_SIZE: + break + + pagenum += 1 + + all_ids_count = len(video_ids) playliststart = self._downloader.params.get('playliststart', 1) - 1 playlistend = self._downloader.params.get('playlistend', -1) - video_ids = video_ids[playliststart:playlistend] - for id in video_ids: - self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id) - return + if playlistend == -1: + video_ids = video_ids[playliststart:] + else: + video_ids = video_ids[playliststart:playlistend] + + self._downloader.to_screen("[youtube] user %s: Collected %d video ids (downloading %d of them)" % + (username, all_ids_count, len(video_ids))) + + for video_id in video_ids: + self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % video_id) + class DepositFilesIE(InfoExtractor): """Information extractor for depositfiles.com""" From 9e0dd8692ea16d62564c6b05c6fdc3f2e0b2f02d Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Sun, 30 Jan 2011 12:58:01 +0100 Subject: [PATCH 312/455] Bump version number --- LATEST_VERSION | 2 +- youtube-dl | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/LATEST_VERSION b/LATEST_VERSION index a1c4173c8..4ab209346 100644 --- a/LATEST_VERSION +++ b/LATEST_VERSION @@ -1 +1 @@ -2010.12.09 +2011.01.30 diff --git a/youtube-dl b/youtube-dl index c8c2ea88e..e980f41f8 100755 --- a/youtube-dl +++ b/youtube-dl @@ -2405,7 +2405,7 @@ if __name__ == '__main__': # Parse command line parser = optparse.OptionParser( usage='Usage: %prog [options] url...', - version='2010.12.09', + version='2011.01.30', conflict_handler='resolve', ) From 5776c3295b085b9177b24152e33049a8c4b0c90b Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Sun, 30 Jan 2011 12:59:18 +0100 Subject: [PATCH 313/455] Update User-Agent string --- youtube-dl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube-dl b/youtube-dl index e980f41f8..4d1e942ff 100755 --- a/youtube-dl +++ b/youtube-dl @@ -37,7 +37,7 @@ except ImportError: from cgi import parse_qs std_headers = { - 'User-Agent': 'Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.9.2.12) Gecko/20101028 Firefox/3.6.12', + 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:2.0b10) Gecko/20100101 Firefox/4.0b10' 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 'Accept-Encoding': 'gzip, deflate', From 16c73c2e513829197c4af5ee62bde88b2b2272e4 Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Sun, 30 Jan 2011 13:03:15 +0100 Subject: [PATCH 314/455] Fix SyntaxError problem (oops) --- youtube-dl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube-dl b/youtube-dl index 4d1e942ff..a4c8f2494 100755 --- a/youtube-dl +++ b/youtube-dl @@ -37,7 +37,7 @@ except ImportError: from cgi import parse_qs std_headers = { - 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:2.0b10) Gecko/20100101 Firefox/4.0b10' + 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:2.0b10) Gecko/20100101 Firefox/4.0b10', 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 'Accept-Encoding': 'gzip, deflate', From f74e22ae280ac1680251350c4672abfeb2a047fe Mon Sep 17 00:00:00 2001 From: Gergely Imreh Date: Mon, 31 Jan 2011 18:54:47 +0800 Subject: [PATCH 315/455] Enable artist playlists in YoutubePlaylistIE Artist playlist pages have different format compared to user playlists, thus more format checking is needed to construct the correct URL. From the artist playlist this method downloads all listed below the "Videos by [Artist Name]" header, plus usually there's one more video on the side, titled "Youtube Mix for [Artist Name]", which has a link format that currently cannot be distinguished from the other videos in the list. --- youtube-dl | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/youtube-dl b/youtube-dl index a4c8f2494..dd875a38e 100755 --- a/youtube-dl +++ b/youtube-dl @@ -2096,8 +2096,8 @@ class YahooSearchIE(InfoExtractor): class YoutubePlaylistIE(InfoExtractor): """Information Extractor for YouTube playlists.""" - _VALID_URL = r'(?:http://)?(?:\w+\.)?youtube.com/(?:(?:view_play_list|my_playlists)\?.*?p=|user/.*?/user/|p/)([^&]+).*' - _TEMPLATE_URL = 'http://www.youtube.com/view_play_list?p=%s&page=%s&gl=US&hl=en' + _VALID_URL = r'(?:http://)?(?:\w+\.)?youtube.com/(?:(?:view_play_list|my_playlists|artist)\?.*?(p|a)=|user/.*?/user/|p/)([^&]+).*' + _TEMPLATE_URL = 'http://www.youtube.com/%s?%s=%s&page=%s&gl=US&hl=en' _VIDEO_INDICATOR = r'/watch\?v=(.+?)&' _MORE_PAGES_INDICATOR = r'(?m)>\s*Next\s*' _youtube_ie = None @@ -2125,13 +2125,19 @@ class YoutubePlaylistIE(InfoExtractor): return # Download playlist pages - playlist_id = mobj.group(1) + # prefix is 'p' as default for playlists but there are other types that need extra care + playlist_prefix = mobj.group(1) + if playlist_prefix == 'a': + playlist_access = 'artist' + else: + playlist_access = 'view_play_list' + playlist_id = mobj.group(2) video_ids = [] pagenum = 1 while True: self.report_download_page(playlist_id, pagenum) - request = urllib2.Request(self._TEMPLATE_URL % (playlist_id, pagenum)) + request = urllib2.Request(self._TEMPLATE_URL % (playlist_access, playlist_prefix, playlist_id, pagenum)) try: page = urllib2.urlopen(request).read() except (urllib2.URLError, httplib.HTTPException, socket.error), err: From f24c674b048003d878a1d6436c1b2af47693f2ac Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rog=C3=A9rio=20Brito?= Date: Fri, 4 Feb 2011 04:02:29 -0200 Subject: [PATCH 316/455] Make some of the comments more descriptive. --- youtube-dl | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/youtube-dl b/youtube-dl index b96156be7..a925c9783 100755 --- a/youtube-dl +++ b/youtube-dl @@ -1764,8 +1764,12 @@ class VimeoIE(InfoExtractor): self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err)) return - # Extract uploader and title from webpage + # Now we begin extracting as much information as we can from what we + # retrieved. First we extract the information common to all extractors, + # and latter we extract those that are Vimeo specific. self.report_extraction(video_id) + + # Extract title mobj = re.search(r'(.*?)', webpage) if mobj is None: self._downloader.trouble(u'ERROR: unable to extract video title') @@ -1773,6 +1777,7 @@ class VimeoIE(InfoExtractor): video_title = mobj.group(1).decode('utf-8') simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title) + # Extract uploader mobj = re.search(r'http://vimeo.com/(.*?)', webpage) if mobj is None: self._downloader.trouble(u'ERROR: unable to extract video uploader') @@ -1795,14 +1800,14 @@ class VimeoIE(InfoExtractor): # if not video_description: video_description = 'No description available.' video_description = 'Foo.' - # Extract request signature + # Vimeo specific: extract request signature mobj = re.search(r'(.*?)', webpage) if mobj is None: self._downloader.trouble(u'ERROR: unable to extract request signature') return sig = mobj.group(1).decode('utf-8') - # Extract request signature expiration + # Vimeo specific: Extract request signature expiration mobj = re.search(r'(.*?)', webpage) if mobj is None: self._downloader.trouble(u'ERROR: unable to extract request signature expiration') From 8cc98b2358fb4554c7af9dcd38fd4c96262e5ac3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rog=C3=A9rio=20Brito?= Date: Fri, 4 Feb 2011 06:15:27 -0200 Subject: [PATCH 317/455] vimeo: Also accept URLs prefixed by www. I hope that this doesn't break anything. `:)` --- youtube-dl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube-dl b/youtube-dl index a925c9783..16d234ebf 100755 --- a/youtube-dl +++ b/youtube-dl @@ -1723,7 +1723,7 @@ class VimeoIE(InfoExtractor): """Information extractor for vimeo.com.""" # _VALID_URL matches Vimeo URLs - _VALID_URL = r'(?:http://)?vimeo\.com/([0-9]+)' + _VALID_URL = r'(?:http://)?(?:www.)?vimeo\.com/([0-9]+)' def __init__(self, downloader=None): InfoExtractor.__init__(self, downloader) From d119b54df6a02d3985284c36586f6ff7e4cac969 Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Sat, 12 Feb 2011 20:19:20 +0100 Subject: [PATCH 318/455] Support more common YouTube playlist URLs --- youtube-dl | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/youtube-dl b/youtube-dl index dd875a38e..0f9724637 100755 --- a/youtube-dl +++ b/youtube-dl @@ -2096,7 +2096,7 @@ class YahooSearchIE(InfoExtractor): class YoutubePlaylistIE(InfoExtractor): """Information Extractor for YouTube playlists.""" - _VALID_URL = r'(?:http://)?(?:\w+\.)?youtube.com/(?:(?:view_play_list|my_playlists|artist)\?.*?(p|a)=|user/.*?/user/|p/)([^&]+).*' + _VALID_URL = r'(?:http://)?(?:\w+\.)?youtube.com/(?:(?:view_play_list|my_playlists|artist)\?.*?(p|a)=|user/.*?/user/|p/|user/.*?#[pg]/c/)([0-9A-Za-z]+)(?:/.*?/([0-9A-Za-z_-]+))?.*' _TEMPLATE_URL = 'http://www.youtube.com/%s?%s=%s&page=%s&gl=US&hl=en' _VIDEO_INDICATOR = r'/watch\?v=(.+?)&' _MORE_PAGES_INDICATOR = r'(?m)>\s*Next\s*' @@ -2124,6 +2124,11 @@ class YoutubePlaylistIE(InfoExtractor): self._downloader.trouble(u'ERROR: invalid url: %s' % url) return + # Single video case + if mobj.group(3) is not None: + self._youtube_ie.extract(mobj.group(3)) + return + # Download playlist pages # prefix is 'p' as default for playlists but there are other types that need extra care playlist_prefix = mobj.group(1) From 7cc3c6fd62d82bac36c583a8d1dc6c2f6da8c178 Mon Sep 17 00:00:00 2001 From: Gergely Imreh Date: Sun, 13 Feb 2011 19:02:56 +0800 Subject: [PATCH 319/455] Fix possible missing parameter in playlist url extraction The "playlist_prefix" parameter was missing when parsing playlist urls that match the recently added format, e.g.: http://www.youtube.com/user/stanforduniversity#g/c/9D558D49CA734A02 For these URLs (basically, for every playlist type so far, except the artist list) playlist_prefix has to be equal to "p" for correct exctraction. --- youtube-dl | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube-dl b/youtube-dl index 0f9724637..26af2e5bc 100755 --- a/youtube-dl +++ b/youtube-dl @@ -2135,6 +2135,7 @@ class YoutubePlaylistIE(InfoExtractor): if playlist_prefix == 'a': playlist_access = 'artist' else: + playlist_prefix = 'p' playlist_access = 'view_play_list' playlist_id = mobj.group(2) video_ids = [] From a7e5259c33851725243b13f01929e75bb40e0ea2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rog=C3=A9rio=20Brito?= Date: Thu, 17 Feb 2011 08:25:45 -0200 Subject: [PATCH 320/455] vimeo: Make regexp more robust. This change makes the VimeoIE work with http://player.vimeo.com/video/19267888 --- youtube-dl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube-dl b/youtube-dl index 16d234ebf..780a6d9a2 100755 --- a/youtube-dl +++ b/youtube-dl @@ -1723,7 +1723,7 @@ class VimeoIE(InfoExtractor): """Information extractor for vimeo.com.""" # _VALID_URL matches Vimeo URLs - _VALID_URL = r'(?:http://)?(?:www.)?vimeo\.com/([0-9]+)' + _VALID_URL = r'(?:http://)?(?:(?:www|player).)?vimeo\.com/(?:video/)?([0-9]+)' def __init__(self, downloader=None): InfoExtractor.__init__(self, downloader) From 9f5f9602131c440919c29aa81b586fe5b83891a8 Mon Sep 17 00:00:00 2001 From: Gergely Imreh Date: Sun, 13 Feb 2011 21:26:58 +0800 Subject: [PATCH 321/455] Facebook info extractor This IE should be full-featured. Public videos can be downloaded without login, e.g: https://www.facebook.com/video/video.php?v=696729990595 Private videos need login, and subject to login rate limit of a couple of tries / minute. --- youtube-dl | 225 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 225 insertions(+) diff --git a/youtube-dl b/youtube-dl index 26af2e5bc..a1061a4e4 100755 --- a/youtube-dl +++ b/youtube-dl @@ -2339,6 +2339,229 @@ class DepositFilesIE(InfoExtractor): except UnavailableVideoError, err: self._downloader.trouble(u'ERROR: unable to download file') +class FacebookIE(InfoExtractor): + """Information Extractor for Facebook""" + + _VALID_URL = r'^(?:https?://)?(?:\w+\.)?facebook.com/video/video.php\?(?:.*?)v=(?P\d+)(?:.*)' + _LOGIN_URL = 'https://login.facebook.com/login.php?m&next=http%3A%2F%2Fm.facebook.com%2Fhome.php&' + _NETRC_MACHINE = 'facebook' + _available_formats = ['highqual', 'lowqual'] + _video_extensions = { + 'highqual': 'mp4', + 'lowqual': 'mp4', + } + + def __init__(self, downloader=None): + InfoExtractor.__init__(self, downloader) + + @staticmethod + def suitable(url): + return (re.match(FacebookIE._VALID_URL, url) is not None) + + def _reporter(self, message): + """Add header and report message.""" + self._downloader.to_screen(u'[facebook] %s' % message) + + def report_login(self): + """Report attempt to log in.""" + self._reporter(u'Logging in') + + def report_video_webpage_download(self, video_id): + """Report attempt to download video webpage.""" + self._reporter(u'%s: Downloading video webpage' % video_id) + + def report_information_extraction(self, video_id): + """Report attempt to extract video information.""" + self._reporter(u'%s: Extracting video information' % video_id) + + def _parse_page(self, video_webpage): + """Extract video information from page""" + # General data + data = {'title': r'class="video_title datawrap">(.*?)(.*?)
    ', + 'owner': r'\("video_owner_name", "(.*?)"\)', + 'upload_date': r'data-date="(.*?)"', + 'thumbnail': r'\("thumb_url", "(?P.*?)"\)', + } + video_info = {} + for piece in data.keys(): + mobj = re.search(data[piece], video_webpage) + if mobj is not None: + video_info[piece] = urllib.unquote_plus(mobj.group(1).decode("unicode_escape")) + + # Video urls + video_urls = {} + for fmt in self._available_formats: + mobj = re.search(r'\("%s_src\", "(.+?)"\)' % fmt, video_webpage) + if mobj is not None: + # URL is in a Javascript segment inside an escaped Unicode format within + # the generally utf-8 page + video_urls[fmt] = urllib.unquote_plus(mobj.group(1).decode("unicode_escape")) + video_info['video_urls'] = video_urls + + return video_info + + def _real_initialize(self): + if self._downloader is None: + return + + useremail = None + password = None + downloader_params = self._downloader.params + + # Attempt to use provided username and password or .netrc data + if downloader_params.get('username', None) is not None: + useremail = downloader_params['username'] + password = downloader_params['password'] + elif downloader_params.get('usenetrc', False): + try: + info = netrc.netrc().authenticators(self._NETRC_MACHINE) + if info is not None: + useremail = info[0] + password = info[2] + else: + raise netrc.NetrcParseError('No authenticators for %s' % self._NETRC_MACHINE) + except (IOError, netrc.NetrcParseError), err: + self._downloader.to_stderr(u'WARNING: parsing .netrc: %s' % str(err)) + return + + if useremail is None: + return + + # Log in + login_form = { + 'email': useremail, + 'pass': password, + 'login': 'Log+In' + } + request = urllib2.Request(self._LOGIN_URL, urllib.urlencode(login_form)) + try: + self.report_login() + login_results = urllib2.urlopen(request).read() + if re.search(r'', login_results) is not None: + self._downloader.to_stderr(u'WARNING: unable to log in: bad username/password, or exceded login rate limit (~3/min). Check credentials or wait.') + return + except (urllib2.URLError, httplib.HTTPException, socket.error), err: + self._downloader.to_stderr(u'WARNING: unable to log in: %s' % str(err)) + return + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + if mobj is None: + self._downloader.trouble(u'ERROR: invalid URL: %s' % url) + return + video_id = mobj.group('ID') + + # Get video webpage + self.report_video_webpage_download(video_id) + request = urllib2.Request('https://www.facebook.com/video/video.php?v=%s' % video_id) + try: + page = urllib2.urlopen(request) + video_webpage = page.read() + except (urllib2.URLError, httplib.HTTPException, socket.error), err: + self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err)) + return + + # Start extracting information + self.report_information_extraction(video_id) + + # Extract information + video_info = self._parse_page(video_webpage) + + # uploader + if 'owner' not in video_info: + self._downloader.trouble(u'ERROR: unable to extract uploader nickname') + return + video_uploader = video_info['owner'] + + # title + if 'title' not in video_info: + self._downloader.trouble(u'ERROR: unable to extract video title') + return + video_title = video_info['title'] + video_title = video_title.decode('utf-8') + video_title = sanitize_title(video_title) + + # simplified title + simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title) + simple_title = simple_title.strip(ur'_') + + # thumbnail image + if 'thumbnail' not in video_info: + self._downloader.trouble(u'WARNING: unable to extract video thumbnail') + video_thumbnail = '' + else: + video_thumbnail = video_info['thumbnail'] + + # upload date + upload_date = u'NA' + if 'upload_date' in video_info: + upload_time = video_info['upload_date'] + timetuple = email.utils.parsedate_tz(upload_time) + if timetuple is not None: + try: + upload_date = time.strftime('%Y%m%d', timetuple[0:9]) + except: + pass + + # description + video_description = 'No description available.' + if (self._downloader.params.get('forcedescription', False) and + 'description' in video_info): + video_description = video_info['description'] + + url_map = video_info['video_urls'] + if len(url_map.keys()) > 0: + # Decide which formats to download + req_format = self._downloader.params.get('format', None) + format_limit = self._downloader.params.get('format_limit', None) + + if format_limit is not None and format_limit in self._available_formats: + format_list = self._available_formats[self._available_formats.index(format_limit):] + else: + format_list = self._available_formats + existing_formats = [x for x in format_list if x in url_map] + if len(existing_formats) == 0: + self._downloader.trouble(u'ERROR: no known formats available for video') + return + if req_format is None: + video_url_list = [(existing_formats[0], url_map[existing_formats[0]])] # Best quality + elif req_format == '-1': + video_url_list = [(f, url_map[f]) for f in existing_formats] # All formats + else: + # Specific format + if req_format not in url_map: + self._downloader.trouble(u'ERROR: requested format not available') + return + video_url_list = [(req_format, url_map[req_format])] # Specific format + + for format_param, video_real_url in video_url_list: + + # At this point we have a new video + self._downloader.increment_downloads() + + # Extension + video_extension = self._video_extensions.get(format_param, 'mp4') + + # Find the video URL in fmt_url_map or conn paramters + try: + # Process video information + self._downloader.process_info({ + 'id': video_id.decode('utf-8'), + 'url': video_real_url.decode('utf-8'), + 'uploader': video_uploader.decode('utf-8'), + 'upload_date': upload_date, + 'title': video_title, + 'stitle': simple_title, + 'ext': video_extension.decode('utf-8'), + 'format': (format_param is None and u'NA' or format_param.decode('utf-8')), + 'thumbnail': video_thumbnail.decode('utf-8'), + 'description': video_description.decode('utf-8'), + 'player_url': None, + }) + except UnavailableVideoError, err: + self._downloader.trouble(u'\nERROR: unable to download video') + class PostProcessor(object): """Post Processor class. @@ -2594,6 +2817,7 @@ if __name__ == '__main__': yahoo_ie = YahooIE() yahoo_search_ie = YahooSearchIE(yahoo_ie) deposit_files_ie = DepositFilesIE() + facebook_ie = FacebookIE() generic_ie = GenericIE() # File downloader @@ -2645,6 +2869,7 @@ if __name__ == '__main__': fd.add_info_extractor(yahoo_ie) fd.add_info_extractor(yahoo_search_ie) fd.add_info_extractor(deposit_files_ie) + fd.add_info_extractor(facebook_ie) # This must come last since it's the # fallback if none of the others work From ef9f8451c8c33308a277b4933d6b3fa728c1adc0 Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Sun, 20 Feb 2011 18:01:57 +0100 Subject: [PATCH 322/455] Add Gergely Imreh to the author list --- youtube-dl | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube-dl b/youtube-dl index a1061a4e4..0dd872e50 100755 --- a/youtube-dl +++ b/youtube-dl @@ -6,6 +6,7 @@ # Author: Vasyl' Vavrychuk # Author: Witold Baryluk # Author: Paweł Paprota +# Author: Gergely Imreh # License: Public domain code import cookielib import ctypes From 87cbd21323195cacb0febc7898a7fdc74ed97f9b Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Fri, 25 Feb 2011 19:05:35 +0100 Subject: [PATCH 323/455] Fix date parsing for YouTube (patch by Drake Wyrm) --- youtube-dl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube-dl b/youtube-dl index 0dd872e50..2e04c05b0 100755 --- a/youtube-dl +++ b/youtube-dl @@ -1059,7 +1059,7 @@ class YoutubeIE(InfoExtractor): mobj = re.search(r'id="eow-date".*?>(.*?)', video_webpage, re.DOTALL) if mobj is not None: upload_date = ' '.join(re.sub(r'[/,-]', r' ', mobj.group(1)).split()) - format_expressions = ['%d %B %Y', '%B %d %Y'] + format_expressions = ['%d %B %Y', '%B %d %Y', '%b %d %Y'] for expression in format_expressions: try: upload_date = datetime.datetime.strptime(upload_date, expression).strftime('%Y%m%d') From 3072fab115b3c89322edc906a8f88f997e46dedd Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Fri, 25 Feb 2011 19:06:58 +0100 Subject: [PATCH 324/455] Add an audio extracting PostProcessor using ffmpeg (closes #2) --- youtube-dl | 93 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 93 insertions(+) diff --git a/youtube-dl b/youtube-dl index 2e04c05b0..b0981da0d 100755 --- a/youtube-dl +++ b/youtube-dl @@ -2609,6 +2609,85 @@ class PostProcessor(object): """ return information # by default, do nothing +class FFmpegExtractAudioPP(PostProcessor): + + def __init__(self, downloader=None, preferredcodec=None): + PostProcessor.__init__(self, downloader) + if preferredcodec is None: + preferredcodec = 'best' + self._preferredcodec = preferredcodec + + @staticmethod + def get_audio_codec(path): + handle = subprocess.Popen(['ffprobe', '-show_streams', path], + stderr=file(os.path.devnull, 'w'), stdout=subprocess.PIPE) + output = handle.communicate()[0] + if handle.wait() != 0: + return None + audio_codec = None + for line in output.split('\n'): + if line.startswith('codec_name='): + audio_codec = line.split('=')[1].strip() + elif line.strip() == 'codec_type=audio' and audio_codec is not None: + return audio_codec + return None + + @staticmethod + def run_ffmpeg(path, out_path, codec, more_opts): + try: + ret = subprocess.call(['ffmpeg', '-y', '-i', path, '-vn', '-acodec', codec] + more_opts + [out_path], + stdout=file(os.path.devnull, 'w'), stderr=subprocess.STDOUT) + return (ret == 0) + except (IOError, OSError): + return False + + def run(self, information): + path = information['filepath'] + + filecodec = self.get_audio_codec(path) + if filecodec is None: + self._downloader.to_stderr(u'WARNING: no audio codec found in file') + return None + + more_opts = [] + if self._preferredcodec == 'best' or self._preferredcodec == filecodec: + if filecodec == 'aac' or filecodec == 'mp3': + # Lossless if possible + acodec = 'copy' + extension = filecodec + if filecodec == 'aac': + more_opts = ['-f', 'adts'] + else: + # MP3 otherwise. + acodec = 'libmp3lame' + extension = 'mp3' + more_opts = ['-ab', '128k'] + else: + # We convert the audio (lossy) + acodec = {'mp3': 'libmp3lame', 'aac': 'aac'}[self._preferredcodec] + extension = self._preferredcodec + more_opts = ['-ab', '128k'] + if self._preferredcodec == 'aac': + more_opts += ['-f', 'adts'] + + (prefix, ext) = os.path.splitext(path) + new_path = prefix + '.' + extension + self._downloader.to_screen(u'[ffmpeg] Destination: %s' % new_path) + status = self.run_ffmpeg(path, new_path, acodec, more_opts) + + if not status: + self._downloader.to_stderr(u'WARNING: error running ffmpeg' % ret) + return None + + try: + os.remove(path) + except (IOError, OSError): + self._downloader.to_stderr(u'WARNING: Unable to remove downloaded video file') + return None + + information['filepath'] = new_path + return information + ### MAIN PROGRAM ### if __name__ == '__main__': try: @@ -2733,6 +2812,13 @@ if __name__ == '__main__': help='do not use the Last-modified header to set the file modification time', default=True) parser.add_option_group(filesystem) + postproc = optparse.OptionGroup(parser, 'Post-processing Options') + postproc.add_option('--extract-audio', action='store_true', dest='extractaudio', default=False, + help='convert video files to audio-only files (requires ffmpeg and ffprobe)') + postproc.add_option('--audio-format', metavar='FORMAT', dest='audioformat', default='best', + help='"best", "aac" or "mp3"; best by default') + parser.add_option_group(postproc) + (opts, args) = parser.parse_args() # Open appropriate CookieJar @@ -2804,6 +2890,9 @@ if __name__ == '__main__': raise ValueError except (TypeError, ValueError), err: parser.error(u'invalid playlist end number specified') + if opts.extractaudio: + if opts.audioformat not in ['best', 'aac', 'mp3']: + parser.error(u'invalid audio format specified') # Information extractors youtube_ie = YoutubeIE() @@ -2876,6 +2965,10 @@ if __name__ == '__main__': # fallback if none of the others work fd.add_info_extractor(generic_ie) + # PostProcessors + if opts.extractaudio: + fd.add_post_processor(FFmpegExtractAudioPP(preferredcodec=opts.audioformat)) + # Update version if opts.update_self: update_self(fd, sys.argv[0]) From afd233c05c983dac0ea9cab4544e1cec3f31f990 Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Fri, 25 Feb 2011 20:11:53 +0100 Subject: [PATCH 325/455] Update User-Agent string --- youtube-dl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube-dl b/youtube-dl index b0981da0d..c45113efe 100755 --- a/youtube-dl +++ b/youtube-dl @@ -38,7 +38,7 @@ except ImportError: from cgi import parse_qs std_headers = { - 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:2.0b10) Gecko/20100101 Firefox/4.0b10', + 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:2.0b11) Gecko/20100101 Firefox/4.0b11', 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 'Accept-Encoding': 'gzip, deflate', From c0768454547d86d16f10b9f2ec9e3bb13b210f1d Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Fri, 25 Feb 2011 20:12:32 +0100 Subject: [PATCH 326/455] Bump version number --- LATEST_VERSION | 2 +- youtube-dl | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/LATEST_VERSION b/LATEST_VERSION index 4ab209346..b8b45af99 100644 --- a/LATEST_VERSION +++ b/LATEST_VERSION @@ -1 +1 @@ -2011.01.30 +2011.02.25 diff --git a/youtube-dl b/youtube-dl index c45113efe..f8271a7fe 100755 --- a/youtube-dl +++ b/youtube-dl @@ -2720,7 +2720,7 @@ if __name__ == '__main__': # Parse command line parser = optparse.OptionParser( usage='Usage: %prog [options] url...', - version='2011.01.30', + version='2011.02.25', conflict_handler='resolve', ) From 1bd9258272dd4884b5c159ae77e20ae75176e8d1 Mon Sep 17 00:00:00 2001 From: Idan Kamara Date: Fri, 25 Feb 2011 22:30:22 +0200 Subject: [PATCH 327/455] Fix stderr print when ffmpeg fails --- youtube-dl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube-dl b/youtube-dl index f8271a7fe..9e9be6778 100755 --- a/youtube-dl +++ b/youtube-dl @@ -2676,7 +2676,7 @@ class FFmpegExtractAudioPP(PostProcessor): status = self.run_ffmpeg(path, new_path, acodec, more_opts) if not status: - self._downloader.to_stderr(u'WARNING: error running ffmpeg' % ret) + self._downloader.to_stderr(u'WARNING: error running ffmpeg') return None try: From da273188f33ace3b48290a8cf35d36033a6fa960 Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Fri, 25 Feb 2011 21:53:26 +0100 Subject: [PATCH 328/455] Catch possible exceptions when running ffprobe --- youtube-dl | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/youtube-dl b/youtube-dl index 9e9be6778..617ac1339 100755 --- a/youtube-dl +++ b/youtube-dl @@ -2619,10 +2619,13 @@ class FFmpegExtractAudioPP(PostProcessor): @staticmethod def get_audio_codec(path): - handle = subprocess.Popen(['ffprobe', '-show_streams', path], - stderr=file(os.path.devnull, 'w'), stdout=subprocess.PIPE) - output = handle.communicate()[0] - if handle.wait() != 0: + try: + handle = subprocess.Popen(['ffprobe', '-show_streams', path], + stderr=file(os.path.devnull, 'w'), stdout=subprocess.PIPE) + output = handle.communicate()[0] + if handle.wait() != 0: + return None + except (IOError, OSError): return None audio_codec = None for line in output.split('\n'): @@ -2646,7 +2649,7 @@ class FFmpegExtractAudioPP(PostProcessor): filecodec = self.get_audio_codec(path) if filecodec is None: - self._downloader.to_stderr(u'WARNING: no audio codec found in file') + self._downloader.to_stderr(u'WARNING: unable to obtain file audio codec with ffprobe') return None more_opts = [] From 820eedcb504acb9666ed589c3ed8cb1a641d0fd1 Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Fri, 25 Feb 2011 21:54:16 +0100 Subject: [PATCH 329/455] Bump version number --- LATEST_VERSION | 2 +- youtube-dl | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/LATEST_VERSION b/LATEST_VERSION index b8b45af99..4851877b4 100644 --- a/LATEST_VERSION +++ b/LATEST_VERSION @@ -1 +1 @@ -2011.02.25 +2011.02.25b diff --git a/youtube-dl b/youtube-dl index 617ac1339..072a91970 100755 --- a/youtube-dl +++ b/youtube-dl @@ -2723,7 +2723,7 @@ if __name__ == '__main__': # Parse command line parser = optparse.OptionParser( usage='Usage: %prog [options] url...', - version='2011.02.25', + version='2011.02.25b', conflict_handler='resolve', ) From 377086af3ddc9cd999225df8bfff5e16670b61eb Mon Sep 17 00:00:00 2001 From: Idan Kamara Date: Fri, 25 Feb 2011 23:19:13 +0200 Subject: [PATCH 330/455] Use '--' to separate the file argument from the options when calling ffmpeg This is to avoid a potential issue if the file name begins with a hyphen since ffmpeg will interpret it as an option --- youtube-dl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube-dl b/youtube-dl index 072a91970..79185b1e7 100755 --- a/youtube-dl +++ b/youtube-dl @@ -2620,7 +2620,7 @@ class FFmpegExtractAudioPP(PostProcessor): @staticmethod def get_audio_codec(path): try: - handle = subprocess.Popen(['ffprobe', '-show_streams', path], + handle = subprocess.Popen(['ffprobe', '-show_streams', '--', path], stderr=file(os.path.devnull, 'w'), stdout=subprocess.PIPE) output = handle.communicate()[0] if handle.wait() != 0: @@ -2638,7 +2638,7 @@ class FFmpegExtractAudioPP(PostProcessor): @staticmethod def run_ffmpeg(path, out_path, codec, more_opts): try: - ret = subprocess.call(['ffmpeg', '-y', '-i', path, '-vn', '-acodec', codec] + more_opts + [out_path], + ret = subprocess.call(['ffmpeg', '-y', '-i', path, '-vn', '-acodec', codec] + more_opts + ['--', out_path], stdout=file(os.path.devnull, 'w'), stderr=subprocess.STDOUT) return (ret == 0) except (IOError, OSError): From b58faab5e7e10c1b3f52342277098d97365825de Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Sat, 26 Feb 2011 00:47:29 +0100 Subject: [PATCH 331/455] Bump version number --- LATEST_VERSION | 2 +- youtube-dl | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/LATEST_VERSION b/LATEST_VERSION index 4851877b4..42b22d923 100644 --- a/LATEST_VERSION +++ b/LATEST_VERSION @@ -1 +1 @@ -2011.02.25b +2011.02.25c diff --git a/youtube-dl b/youtube-dl index 79185b1e7..9a08932b0 100755 --- a/youtube-dl +++ b/youtube-dl @@ -2723,7 +2723,7 @@ if __name__ == '__main__': # Parse command line parser = optparse.OptionParser( usage='Usage: %prog [options] url...', - version='2011.02.25b', + version='2011.02.25c', conflict_handler='resolve', ) From da54ed441267ff3a734e73871099cc1bb0b08543 Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Mon, 28 Feb 2011 19:37:58 +0100 Subject: [PATCH 332/455] Support youtube.com/e/ URLs (closes #88) --- youtube-dl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube-dl b/youtube-dl index 9a08932b0..d8485952b 100755 --- a/youtube-dl +++ b/youtube-dl @@ -858,7 +858,7 @@ class InfoExtractor(object): class YoutubeIE(InfoExtractor): """Information extractor for youtube.com.""" - _VALID_URL = r'^((?:https?://)?(?:youtu\.be/|(?:\w+\.)?youtube(?:-nocookie)?\.com/)(?:(?:(?:v|embed)/)|(?:(?:watch(?:_popup)?(?:\.php)?)?(?:\?|#!?)(?:.+&)?v=)))?([0-9A-Za-z_-]+)(?(1).+)?$' + _VALID_URL = r'^((?:https?://)?(?:youtu\.be/|(?:\w+\.)?youtube(?:-nocookie)?\.com/)(?:(?:(?:v|embed|e)/)|(?:(?:watch(?:_popup)?(?:\.php)?)?(?:\?|#!?)(?:.+&)?v=)))?([0-9A-Za-z_-]+)(?(1).+)?$' _LANG_URL = r'http://www.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1' _LOGIN_URL = 'https://www.youtube.com/signup?next=/&gl=US&hl=en' _AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en' From e3f7e05c274113265b1b780cf6e4a5215f08bc1a Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Tue, 15 Mar 2011 20:03:52 +0100 Subject: [PATCH 333/455] Avoid crash reported in issue #86 --- youtube-dl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube-dl b/youtube-dl index d8485952b..cbfbdca9c 100755 --- a/youtube-dl +++ b/youtube-dl @@ -1079,7 +1079,7 @@ class YoutubeIE(InfoExtractor): # Decide which formats to download req_format = self._downloader.params.get('format', None) - if 'fmt_url_map' in video_info: + if 'fmt_url_map' in video_info and len(video_info['fmt_url_map']) >= 1 and ',' in video_info['fmt_url_map'][0]: url_map = dict(tuple(pair.split('|')) for pair in video_info['fmt_url_map'][0].split(',')) format_limit = self._downloader.params.get('format_limit', None) if format_limit is not None and format_limit in self._available_formats: From 2727dbf78d895885016dac52dff7fdc271a77d8f Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Tue, 15 Mar 2011 20:04:20 +0100 Subject: [PATCH 334/455] Split a couple of lines to make the code more readable --- youtube-dl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/youtube-dl b/youtube-dl index cbfbdca9c..b2cd5e87f 100755 --- a/youtube-dl +++ b/youtube-dl @@ -2620,8 +2620,8 @@ class FFmpegExtractAudioPP(PostProcessor): @staticmethod def get_audio_codec(path): try: - handle = subprocess.Popen(['ffprobe', '-show_streams', '--', path], - stderr=file(os.path.devnull, 'w'), stdout=subprocess.PIPE) + cmd = ['ffprobe', '-show_streams', '--', path] + handle = subprocess.Popen(cmd, stderr=file(os.path.devnull, 'w'), stdout=subprocess.PIPE) output = handle.communicate()[0] if handle.wait() != 0: return None @@ -2638,8 +2638,8 @@ class FFmpegExtractAudioPP(PostProcessor): @staticmethod def run_ffmpeg(path, out_path, codec, more_opts): try: - ret = subprocess.call(['ffmpeg', '-y', '-i', path, '-vn', '-acodec', codec] + more_opts + ['--', out_path], - stdout=file(os.path.devnull, 'w'), stderr=subprocess.STDOUT) + cmd = ['ffmpeg', '-y', '-i', path, '-vn', '-acodec', codec] + more_opts + ['--', out_path] + ret = subprocess.call(cmd, stdout=file(os.path.devnull, 'w'), stderr=subprocess.STDOUT) return (ret == 0) except (IOError, OSError): return False From 3efa45c3a25d455f610439c56c50f2c7cfd0337b Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Tue, 15 Mar 2011 20:12:10 +0100 Subject: [PATCH 335/455] Fix upload date regexp (closes #93) --- youtube-dl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube-dl b/youtube-dl index b2cd5e87f..e3bde779c 100755 --- a/youtube-dl +++ b/youtube-dl @@ -1056,7 +1056,7 @@ class YoutubeIE(InfoExtractor): # upload date upload_date = u'NA' - mobj = re.search(r'id="eow-date".*?>(.*?)', video_webpage, re.DOTALL) + mobj = re.search(r'id="eow-date.*?>(.*?)', video_webpage, re.DOTALL) if mobj is not None: upload_date = ' '.join(re.sub(r'[/,-]', r' ', mobj.group(1)).split()) format_expressions = ['%d %B %Y', '%B %d %Y', '%b %d %Y'] From 4b0d9eed458d862ce938849a9dc88a9a56f57dc2 Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Tue, 29 Mar 2011 20:32:07 +0200 Subject: [PATCH 336/455] Bump version number --- LATEST_VERSION | 2 +- youtube-dl | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/LATEST_VERSION b/LATEST_VERSION index 42b22d923..295c9c4fa 100644 --- a/LATEST_VERSION +++ b/LATEST_VERSION @@ -1 +1 @@ -2011.02.25c +2011.03.29 diff --git a/youtube-dl b/youtube-dl index e3bde779c..3ac27a857 100755 --- a/youtube-dl +++ b/youtube-dl @@ -2723,7 +2723,7 @@ if __name__ == '__main__': # Parse command line parser = optparse.OptionParser( usage='Usage: %prog [options] url...', - version='2011.02.25c', + version='2011.03.29', conflict_handler='resolve', ) From e26005adea7aadf1cc64a17e9e150ed3448ac7e7 Mon Sep 17 00:00:00 2001 From: Amaury Gauthier Date: Thu, 7 Apr 2011 22:45:19 +0200 Subject: [PATCH 337/455] Deletes duplicate entry in process_info dictionary of YahooIE --- youtube-dl | 1 - 1 file changed, 1 deletion(-) diff --git a/youtube-dl b/youtube-dl index 3ac27a857..08e959411 100755 --- a/youtube-dl +++ b/youtube-dl @@ -1713,7 +1713,6 @@ class YahooIE(InfoExtractor): 'thumbnail': video_thumbnail.decode('utf-8'), 'description': video_description, 'thumbnail': video_thumbnail, - 'description': video_description, 'player_url': None, }) except UnavailableVideoError: From 0ecedbdb036120849c2a7eb992ec8a993221e5f1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rog=C3=A9rio=20Brito?= Date: Wed, 20 Apr 2011 21:07:57 -0300 Subject: [PATCH 338/455] vimeo: Remove clutter in some messages. We should make a unified way of printing messages, but let's follow suit and do what the main YoutubeIE does here. --- youtube-dl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube-dl b/youtube-dl index 240b2bc7b..080490ded 100755 --- a/youtube-dl +++ b/youtube-dl @@ -1735,11 +1735,11 @@ class VimeoIE(InfoExtractor): def report_download_webpage(self, video_id): """Report webpage download.""" - self._downloader.to_screen(u'[video.vimeo] %s: Downloading webpage' % video_id) + self._downloader.to_screen(u'[vimeo] %s: Downloading webpage' % video_id) def report_extraction(self, video_id): """Report information extraction.""" - self._downloader.to_screen(u'[video.vimeo] %s: Extracting information' % video_id) + self._downloader.to_screen(u'[vimeo] %s: Extracting information' % video_id) def _real_initialize(self): return From 1e055db69ccffbacad5765887f14879bbe350ce2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rog=C3=A9rio=20Brito?= Date: Wed, 20 Apr 2011 21:15:57 -0300 Subject: [PATCH 339/455] vimeo: Ignore if we are using HTTP/S or not. --- youtube-dl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube-dl b/youtube-dl index 080490ded..17fb82da7 100755 --- a/youtube-dl +++ b/youtube-dl @@ -1724,7 +1724,7 @@ class VimeoIE(InfoExtractor): """Information extractor for vimeo.com.""" # _VALID_URL matches Vimeo URLs - _VALID_URL = r'(?:http://)?(?:(?:www|player).)?vimeo\.com/(?:video/)?([0-9]+)' + _VALID_URL = r'(?:https?://)?(?:(?:www|player).)?vimeo\.com/(?:video/)?([0-9]+)' def __init__(self, downloader=None): InfoExtractor.__init__(self, downloader) From 44c636df8966a1ace617b276f19b5887aa66d612 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rog=C3=A9rio=20Brito?= Date: Wed, 20 Apr 2011 21:20:55 -0300 Subject: [PATCH 340/455] vimeo: Tweak the regexp to allow some extended URLs from vimeo. This, in particular, lets me grab the videos from the beginners channel with URLs like: http://vimeo.com/groups/fivebyfive/videos/22648611 Note that the regexp *will* break for other URLs that we don't know about and that's on purpose: we don't want to accidentally grab videos that would be passed on to other information extractors. --- youtube-dl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube-dl b/youtube-dl index 17fb82da7..f3d7a3f61 100755 --- a/youtube-dl +++ b/youtube-dl @@ -1724,7 +1724,7 @@ class VimeoIE(InfoExtractor): """Information extractor for vimeo.com.""" # _VALID_URL matches Vimeo URLs - _VALID_URL = r'(?:https?://)?(?:(?:www|player).)?vimeo\.com/(?:video/)?([0-9]+)' + _VALID_URL = r'(?:https?://)?(?:(?:www|player).)?vimeo\.com/(?:groups/[^/]+/)?(?:videos?/)?([0-9]+)' def __init__(self, downloader=None): InfoExtractor.__init__(self, downloader) From 2fc31a48723fd4f84c20cf97f810f0171419bcf1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rog=C3=A9rio=20Brito?= Date: Wed, 20 Apr 2011 21:29:29 -0300 Subject: [PATCH 341/455] vimeo: Apparently, all videos in vimeo are served in ISO containers. --- youtube-dl | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/youtube-dl b/youtube-dl index f3d7a3f61..b734c997c 100755 --- a/youtube-dl +++ b/youtube-dl @@ -1754,7 +1754,6 @@ class VimeoIE(InfoExtractor): # At this point we have a new video self._downloader.increment_downloads() video_id = mobj.group(1) - video_extension = 'flv' # FIXME # Retrieve video webpage to extract further information request = urllib2.Request("http://vimeo.com/moogaloop/load/clip:%s" % video_id, None, std_headers) @@ -1826,7 +1825,7 @@ class VimeoIE(InfoExtractor): 'upload_date': u'NA', 'title': video_title, 'stitle': simple_title, - 'ext': video_extension.decode('utf-8'), + 'ext': u'mp4', 'thumbnail': video_thumbnail.decode('utf-8'), 'description': video_description, 'thumbnail': video_thumbnail, From 62a29bbf7bd0b0d9539aa903519f252671e4eebd Mon Sep 17 00:00:00 2001 From: knagano Date: Sat, 7 May 2011 22:53:37 +0900 Subject: [PATCH 342/455] Fixed download from Dailymotion. Fetches FLV URL from "sdURL" in addVariable("sequence") JSON, instead of addVariable("video") which doesnot exist today. Supports new title, uploader nickname format. --- youtube-dl | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/youtube-dl b/youtube-dl index 3ac27a857..377ceff58 100755 --- a/youtube-dl +++ b/youtube-dl @@ -1320,6 +1320,7 @@ class DailymotionIE(InfoExtractor): # Retrieve video webpage to extract further information request = urllib2.Request(url) + request.add_header('Cookie', 'family_filter=off') try: self.report_download_webpage(video_id) webpage = urllib2.urlopen(request).read() @@ -1329,25 +1330,29 @@ class DailymotionIE(InfoExtractor): # Extract URL, uploader and title from webpage self.report_extraction(video_id) - mobj = re.search(r'(?i)addVariable\(\"video\"\s*,\s*\"([^\"]*)\"\)', webpage) + mobj = re.search(r'(?i)addVariable\(\"sequence\"\s*,\s*\"([^\"]+?)\"\)', webpage) if mobj is None: self._downloader.trouble(u'ERROR: unable to extract media URL') return - mediaURL = urllib.unquote(mobj.group(1)) + sequence = urllib.unquote(mobj.group(1)) + mobj = re.search(r',\"sdURL\"\:\"([^\"]+?)\",', sequence) + if mobj is None: + self._downloader.trouble(u'ERROR: unable to extract media URL') + return + mediaURL = urllib.unquote(mobj.group(1)).replace('\\', '') # if needed add http://www.dailymotion.com/ if relative URL video_url = mediaURL - # '' - mobj = re.search(r'(?im)Dailymotion\s*[\-:]\s*(.+?)', webpage) + mobj = re.search(r'(?im)Dailymotion\s*-\s*(.+)\s*-\s*[^<]+?', webpage) if mobj is None: self._downloader.trouble(u'ERROR: unable to extract title') return video_title = mobj.group(1).decode('utf-8') video_title = sanitize_title(video_title) - mobj = re.search(r'(?im)(.+?)', webpage) + mobj = re.search(r'(?im)[^<]+?]+?>([^<]+?)', webpage) if mobj is None: self._downloader.trouble(u'ERROR: unable to extract uploader nickname') return From 18b7f87409bf83957c84b1b553b7cfd3efff54db Mon Sep 17 00:00:00 2001 From: rmanola Date: Mon, 6 Jun 2011 17:46:37 -0700 Subject: [PATCH 343/455] Added option to allow different audio encoding qualities and to allow specify whether erase or not the video when it's need to extract the audio. --- youtube-dl | 31 ++++++++++++++++++++++--------- 1 file changed, 22 insertions(+), 9 deletions(-) mode change 100755 => 100644 youtube-dl diff --git a/youtube-dl b/youtube-dl old mode 100755 new mode 100644 index 3ac27a857..fd2edef6f --- a/youtube-dl +++ b/youtube-dl @@ -2611,11 +2611,17 @@ class PostProcessor(object): class FFmpegExtractAudioPP(PostProcessor): - def __init__(self, downloader=None, preferredcodec=None): + def __init__(self, downloader=None, preferredcodec=None, preferredquality=None, keepvideo=None): PostProcessor.__init__(self, downloader) if preferredcodec is None: preferredcodec = 'best' + if preferredquality is None: + preferredquality = '128K' + if keepvideo is None: + keepvideo = False; self._preferredcodec = preferredcodec + self._preferredquality = preferredquality + self._keepvideo = keepvideo @staticmethod def get_audio_codec(path): @@ -2653,6 +2659,8 @@ class FFmpegExtractAudioPP(PostProcessor): return None more_opts = [] + if (self._preferredquality != '128K') and (self._preferredquality != '160K') and (self._preferredquality != '192K'): + self._preferredquality = '128K' if self._preferredcodec == 'best' or self._preferredcodec == filecodec: if filecodec == 'aac' or filecodec == 'mp3': # Lossless if possible @@ -2664,12 +2672,12 @@ class FFmpegExtractAudioPP(PostProcessor): # MP3 otherwise. acodec = 'libmp3lame' extension = 'mp3' - more_opts = ['-ab', '128k'] + more_opts = ['-ab', self._preferredquality] else: # We convert the audio (lossy) acodec = {'mp3': 'libmp3lame', 'aac': 'aac'}[self._preferredcodec] extension = self._preferredcodec - more_opts = ['-ab', '128k'] + more_opts = ['-ab', self._preferredquality] if self._preferredcodec == 'aac': more_opts += ['-f', 'adts'] @@ -2682,11 +2690,12 @@ class FFmpegExtractAudioPP(PostProcessor): self._downloader.to_stderr(u'WARNING: error running ffmpeg') return None - try: - os.remove(path) - except (IOError, OSError): - self._downloader.to_stderr(u'WARNING: Unable to remove downloaded video file') - return None + if not self._keepvideo: + try: + os.remove(path) + except (IOError, OSError): + self._downloader.to_stderr(u'WARNING: Unable to remove downloaded video file') + return None information['filepath'] = new_path return information @@ -2820,6 +2829,10 @@ if __name__ == '__main__': help='convert video files to audio-only files (requires ffmpeg and ffprobe)') postproc.add_option('--audio-format', metavar='FORMAT', dest='audioformat', default='best', help='"best", "aac" or "mp3"; best by default') + postproc.add_option('--audio-quality', metavar='QUALITY', dest='audioquality', default='128K', + help='128K, 160K or 192K; 128K by default') + postproc.add_option('-k', '--keep-video', action='store_true', dest='keepvideo', default=False, + help='keeps the video file on disk after the post-processing; the video is erased by default') parser.add_option_group(postproc) (opts, args) = parser.parse_args() @@ -2970,7 +2983,7 @@ if __name__ == '__main__': # PostProcessors if opts.extractaudio: - fd.add_post_processor(FFmpegExtractAudioPP(preferredcodec=opts.audioformat)) + fd.add_post_processor(FFmpegExtractAudioPP(preferredcodec=opts.audioformat,preferredquality=opts.audioquality,keepvideo=opts.keepvideo)) # Update version if opts.update_self: From 7745f5d88189530d2270531376c719594333c6f7 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Tue, 21 Jun 2011 22:24:58 +0200 Subject: [PATCH 344/455] Basic blip.tv support --- youtube-dl | 77 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 77 insertions(+) diff --git a/youtube-dl b/youtube-dl index 3ac27a857..a6d0ce434 100755 --- a/youtube-dl +++ b/youtube-dl @@ -15,6 +15,7 @@ import email.utils import gzip import htmlentitydefs import httplib +import json # TODO: json for 2.5 import locale import math import netrc @@ -2563,6 +2564,80 @@ class FacebookIE(InfoExtractor): except UnavailableVideoError, err: self._downloader.trouble(u'\nERROR: unable to download video') +class BlipTVIE(InfoExtractor): + """Information extractor for blip.tv""" + + _VALID_URL = r'^(?:https?://)?(?:\w+\.)?blip.tv/(.+)$' + _URL_EXT = r'^.*\.([a-z0-9]+)$' + + @staticmethod + def suitable(url): + return (re.match(BlipTVIE._VALID_URL, url) is not None) + + def report_download_webpage(self, file_id): + """Report webpage download.""" + self._downloader.to_screen(u'[%s] %s: Downloading webpage' % (self.service_name, file_id)) + + def report_extraction(self, file_id): + """Report information extraction.""" + self._downloader.to_screen(u'[%s] %s: Extracting information' % (self.service_name, file_id)) + + @property + def service_name(self): + return u'blip.tv' + + def _simplify_title(self, title): + res = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', title) + res = res.strip(ur'_') + return res + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + if mobj is None: + self._downloader.trouble(u'ERROR: invalid URL: %s' % url) + return + + json_url = url + ('&' if '?' in url else '?') + 'skin=json&version=2&no_wrap=1' + request = urllib2.Request(json_url) + try: + json_code = urllib2.urlopen(request).read() + except (urllib2.URLError, httplib.HTTPException, socket.error), err: + self._downloader.trouble(u'ERROR: unable to download video info webpage: %s' % str(err)) + return + try: + json_data = json.loads(json_code) + data = json_data['Post'] + + upload_date = datetime.datetime.strptime(data['datestamp'], '%m-%d-%y %H:%M%p').strftime('%Y%m%d') + video_url = data['media']['url'] + umobj = re.match(self._URL_EXT, video_url) + if umobj is None: + raise ValueError('Can not determine filename extension') + ext = umobj.group(1) + + info = { + 'id': data['item_id'], + 'url': video_url, + 'uploader': data['display_name'], + 'upload_date': upload_date, + 'title': data['title'], + 'stitle': self._simplify_title(data['title']), + 'ext': ext, + 'format': data['media']['mimeType'], + 'thumbnail': data['thumbnailUrl'], + 'description': data['description'], + 'player_url': data['embedUrl'] + } + except (ValueError,KeyError), err: + self._downloader.trouble(u'ERROR: unable to parse video information: %s' % str(err)) + return + + try: + self._downloader.process_info(info) + except UnavailableVideoError, err: + self._downloader.trouble(u'\nERROR: unable to download video') + + class PostProcessor(object): """Post Processor class. @@ -2911,6 +2986,7 @@ if __name__ == '__main__': yahoo_search_ie = YahooSearchIE(yahoo_ie) deposit_files_ie = DepositFilesIE() facebook_ie = FacebookIE() + bliptv_ie = BlipTVIE() generic_ie = GenericIE() # File downloader @@ -2963,6 +3039,7 @@ if __name__ == '__main__': fd.add_info_extractor(yahoo_search_ie) fd.add_info_extractor(deposit_files_ie) fd.add_info_extractor(facebook_ie) + fd.add_info_extractor(bliptv_ie) # This must come last since it's the # fallback if none of the others work From aded78d9e213803ff5ab2cdf429c2f8578482194 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Sat, 25 Jun 2011 19:26:29 +0200 Subject: [PATCH 345/455] Support for blip.tv/file URLs --- youtube-dl | 17 +++++------------ 1 file changed, 5 insertions(+), 12 deletions(-) diff --git a/youtube-dl b/youtube-dl index a6d0ce434..fbb0389be 100755 --- a/youtube-dl +++ b/youtube-dl @@ -2567,24 +2567,16 @@ class FacebookIE(InfoExtractor): class BlipTVIE(InfoExtractor): """Information extractor for blip.tv""" - _VALID_URL = r'^(?:https?://)?(?:\w+\.)?blip.tv/(.+)$' + _VALID_URL = r'^(?:https?://)?(?:\w+\.)?blip.tv(/.+)$' _URL_EXT = r'^.*\.([a-z0-9]+)$' @staticmethod def suitable(url): return (re.match(BlipTVIE._VALID_URL, url) is not None) - def report_download_webpage(self, file_id): - """Report webpage download.""" - self._downloader.to_screen(u'[%s] %s: Downloading webpage' % (self.service_name, file_id)) - def report_extraction(self, file_id): """Report information extraction.""" - self._downloader.to_screen(u'[%s] %s: Extracting information' % (self.service_name, file_id)) - - @property - def service_name(self): - return u'blip.tv' + self._downloader.to_screen(u'[blip.tv] %s: Extracting information' % file_id) def _simplify_title(self, title): res = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', title) @@ -2599,6 +2591,7 @@ class BlipTVIE(InfoExtractor): json_url = url + ('&' if '?' in url else '?') + 'skin=json&version=2&no_wrap=1' request = urllib2.Request(json_url) + self.report_extraction(mobj.group(1)) try: json_code = urllib2.urlopen(request).read() except (urllib2.URLError, httplib.HTTPException, socket.error), err: @@ -2606,7 +2599,7 @@ class BlipTVIE(InfoExtractor): return try: json_data = json.loads(json_code) - data = json_data['Post'] + data = json_data['Post'] if 'Post' in json_data else json_data upload_date = datetime.datetime.strptime(data['datestamp'], '%m-%d-%y %H:%M%p').strftime('%Y%m%d') video_url = data['media']['url'] @@ -2629,7 +2622,7 @@ class BlipTVIE(InfoExtractor): 'player_url': data['embedUrl'] } except (ValueError,KeyError), err: - self._downloader.trouble(u'ERROR: unable to parse video information: %s' % str(err)) + self._downloader.trouble(u'ERROR: unable to parse video information: %s' % repr(err)) return try: From c6b55a8d4817a0818a1923db72b0f953ab80c0d4 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Thu, 7 Jul 2011 12:12:20 +0200 Subject: [PATCH 346/455] Full youtube video descriptions, including special characters (2.6+, with fallback for older Pythons) --- youtube-dl | 38 ++++++++++++++++++++++++++++++-------- 1 file changed, 30 insertions(+), 8 deletions(-) diff --git a/youtube-dl b/youtube-dl index fbb0389be..a3522199f 100755 --- a/youtube-dl +++ b/youtube-dl @@ -15,7 +15,6 @@ import email.utils import gzip import htmlentitydefs import httplib -import json # TODO: json for 2.5 import locale import math import netrc @@ -24,20 +23,35 @@ import os.path import re import socket import string -import StringIO import subprocess import sys import time import urllib import urllib2 +import warnings import zlib +try: + import json +except ImportError: + warnings.warn('No JSON support (TODO: insert trivialjson here)') + +try: + import cStringIO as StringIO +except ImportError: + import StringIO + # parse_qs was moved from the cgi module to the urlparse module recently. try: from urlparse import parse_qs except ImportError: from cgi import parse_qs +try: + import lxml.etree +except ImportError: # Python < 2.6 + pass # Handled below + std_headers = { 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:2.0b11) Gecko/20100101 Firefox/4.0b11', 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7', @@ -1068,11 +1082,19 @@ class YoutubeIE(InfoExtractor): pass # description - video_description = 'No description available.' - if self._downloader.params.get('forcedescription', False): - mobj = re.search(r'', video_webpage) - if mobj is not None: - video_description = mobj.group(1) + try: + lxml.etree + except NameError: + video_description = u'No description available.' + if self._downloader.params.get('forcedescription', False): + warnings.warn(u'You are using an old Python version, install Python 2.6+ or lxml. Falling back to old video description extractor.') + mobj = re.search(r'', video_webpage) + if mobj is not None: + video_description = mobj.group(1).decode('utf-8') + else: + html_parser = lxml.etree.HTMLParser(encoding='utf-8') + vwebpage_doc = lxml.etree.parse(StringIO.StringIO(video_webpage), html_parser) + video_description = u''.join(vwebpage_doc.xpath('id("eow-description")//text()')) # token video_token = urllib.unquote_plus(video_info['token'][0]) @@ -1130,7 +1152,7 @@ class YoutubeIE(InfoExtractor): 'ext': video_extension.decode('utf-8'), 'format': (format_param is None and u'NA' or format_param.decode('utf-8')), 'thumbnail': video_thumbnail.decode('utf-8'), - 'description': video_description.decode('utf-8'), + 'description': video_description, 'player_url': player_url, }) except UnavailableVideoError, err: From 8b95c38707b8e6c9f2ce6a754d77b2b8f458cc14 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Thu, 7 Jul 2011 12:47:36 +0200 Subject: [PATCH 347/455] --writedescription option --- youtube-dl | 30 +++++++++++++++++++++++++----- 1 file changed, 25 insertions(+), 5 deletions(-) diff --git a/youtube-dl b/youtube-dl index a3522199f..bb0f90a9c 100755 --- a/youtube-dl +++ b/youtube-dl @@ -8,6 +8,10 @@ # Author: Paweł Paprota # Author: Gergely Imreh # License: Public domain code + +from __future__ import with_statement + +import contextlib import cookielib import ctypes import datetime @@ -301,6 +305,7 @@ class FileDownloader(object): consoletitle: Display progress in console window's titlebar. nopart: Do not use temporary .part files. updatetime: Use the Last-modified header to set output file timestamps. + writedescription: Write the video description to a .description file """ params = None @@ -496,6 +501,10 @@ class FileDownloader(object): except: pass + def report_writedescription(self, descfn): + """ Report that the description file has been written """ + self.to_screen(u'[info] Video description written to: %s' % descfn, ignore_encoding_errors=True) + def report_destination(self, filename): """Report destination filename.""" self.to_screen(u'[download] Destination: %s' % filename, ignore_encoding_errors=True) @@ -582,6 +591,16 @@ class FileDownloader(object): self.trouble(u'ERROR: unable to create directories: %s' % str(err)) return + if self.params.get('writedescription', False): + try: + descfn = filename + '.description' + with contextlib.closing(open(descfn, 'wb')) as descfile: + descfile.write(info_dict['description'].encode('utf-8')) + self.report_writedescription(descfn) + except (OSError, IOError): + self.trouble(u'ERROR: Cannot write description file: %s' % str(descfn)) + return + try: success = self._do_download(filename, info_dict['url'].encode('utf-8'), info_dict.get('player_url', None)) except (OSError, IOError), err: @@ -1086,7 +1105,7 @@ class YoutubeIE(InfoExtractor): lxml.etree except NameError: video_description = u'No description available.' - if self._downloader.params.get('forcedescription', False): + if self._downloader.params.get('forcedescription', False) or self._downloader.params.get('writedescription', False): warnings.warn(u'You are using an old Python version, install Python 2.6+ or lxml. Falling back to old video description extractor.') mobj = re.search(r'', video_webpage) if mobj is not None: @@ -2529,10 +2548,7 @@ class FacebookIE(InfoExtractor): pass # description - video_description = 'No description available.' - if (self._downloader.params.get('forcedescription', False) and - 'description' in video_info): - video_description = video_info['description'] + video_description = video_info.get('description', 'No description available.') url_map = video_info['video_urls'] if len(url_map.keys()) > 0: @@ -2903,6 +2919,9 @@ if __name__ == '__main__': filesystem.add_option('--no-mtime', action='store_false', dest='updatetime', help='do not use the Last-modified header to set the file modification time', default=True) + filesystem.add_option('--write-description', + action='store_true', dest='writedescription', + help='write video description to a .description file', default=False) parser.add_option_group(filesystem) postproc = optparse.OptionGroup(parser, 'Post-processing Options') @@ -3040,6 +3059,7 @@ if __name__ == '__main__': 'consoletitle': opts.consoletitle, 'nopart': opts.nopart, 'updatetime': opts.updatetime, + 'writedescription': opts.writedescription, }) fd.add_info_extractor(youtube_search_ie) fd.add_info_extractor(youtube_pl_ie) From a1cab7cead8554913ba3d0362b3d6fb11c8c7f90 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Thu, 7 Jul 2011 14:10:25 +0200 Subject: [PATCH 348/455] call increment_downloads in blip.tv extractor --- youtube-dl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/youtube-dl b/youtube-dl index bb0f90a9c..466802434 100755 --- a/youtube-dl +++ b/youtube-dl @@ -2646,6 +2646,8 @@ class BlipTVIE(InfoExtractor): raise ValueError('Can not determine filename extension') ext = umobj.group(1) + self._downloader.increment_downloads() + info = { 'id': data['item_id'], 'url': video_url, From 2152ee8601b7abe0481217f5de4950f268e24d08 Mon Sep 17 00:00:00 2001 From: Giovanni Visentini Date: Sat, 9 Jul 2011 14:05:36 +0000 Subject: [PATCH 349/455] Update youtube playlist for use playlist?list=id format --- youtube-dl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube-dl b/youtube-dl index 3ac27a857..96993f536 100755 --- a/youtube-dl +++ b/youtube-dl @@ -2097,7 +2097,7 @@ class YahooSearchIE(InfoExtractor): class YoutubePlaylistIE(InfoExtractor): """Information Extractor for YouTube playlists.""" - _VALID_URL = r'(?:http://)?(?:\w+\.)?youtube.com/(?:(?:view_play_list|my_playlists|artist)\?.*?(p|a)=|user/.*?/user/|p/|user/.*?#[pg]/c/)([0-9A-Za-z]+)(?:/.*?/([0-9A-Za-z_-]+))?.*' + _VALID_URL = r'(?:http://)?(?:\w+\.)?youtube.com/(?:(?:view_play_list|my_playlists|artist|playlist)\?.*?(p|a|list)=|user/.*?/user/|p/|user/.*?#[pg]/c/)([0-9A-Za-z]+)(?:/.*?/([0-9A-Za-z_-]+))?.*' _TEMPLATE_URL = 'http://www.youtube.com/%s?%s=%s&page=%s&gl=US&hl=en' _VIDEO_INDICATOR = r'/watch\?v=(.+?)&' _MORE_PAGES_INDICATOR = r'(?m)>\s*Next\s*' From 437d76c19a98dcc965fdf0e92356f54e5569a565 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Sun, 10 Jul 2011 17:31:54 +0200 Subject: [PATCH 350/455] blip.tv support for python 2.5 with trivialjson --- youtube-dl | 123 +++++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 115 insertions(+), 8 deletions(-) diff --git a/youtube-dl b/youtube-dl index 466802434..64c1f30cd 100755 --- a/youtube-dl +++ b/youtube-dl @@ -7,10 +7,9 @@ # Author: Witold Baryluk # Author: Paweł Paprota # Author: Gergely Imreh +# Author: Philipp Hagemeister # License: Public domain code - from __future__ import with_statement - import contextlib import cookielib import ctypes @@ -35,11 +34,6 @@ import urllib2 import warnings import zlib -try: - import json -except ImportError: - warnings.warn('No JSON support (TODO: insert trivialjson here)') - try: import cStringIO as StringIO except ImportError: @@ -66,6 +60,119 @@ std_headers = { simple_title_chars = string.ascii_letters.decode('ascii') + string.digits.decode('ascii') +try: + import json +except ImportError: # Python <2.5, use trivialjson (https://github.com/phihag/trivialjson): + import re + class json(object): + @staticmethod + def loads(s): + s = s.decode('UTF-8') + def raiseError(msg, i): + raise ValueError(msg + ' at position ' + str(i) + ' of ' + repr(s) + ': ' + repr(s[i:])) + def skipSpace(i, expectMore=True): + while i < len(s) and s[i] in ' \t\r\n': + i += 1 + if expectMore: + if i >= len(s): + raiseError('Premature end', i) + return i + def decodeEscape(match): + esc = match.group(1) + _STATIC = { + '"': '"', + '\\': '\\', + '/': '/', + 'b': unichr(0x8), + 'f': unichr(0xc), + 'n': '\n', + 'r': '\r', + 't': '\t', + } + if esc in _STATIC: + return _STATIC[esc] + if esc[0] == 'u': + if len(esc) == 1+4: + return unichr(int(esc[1:5], 16)) + if len(esc) == 5+6 and esc[5:7] == '\\u': + hi = int(esc[1:5], 16) + low = int(esc[7:11], 16) + return unichr((hi - 0xd800) * 0x400 + low - 0xdc00 + 0x10000) + raise ValueError('Unknown escape ' + str(esc)) + def parseString(i): + i += 1 + e = i + while True: + e = s.index('"', e) + bslashes = 0 + while s[e-bslashes-1] == '\\': + bslashes += 1 + if bslashes % 2 == 1: + e += 1 + continue + break + rexp = re.compile(r'\\(u[dD][89aAbB][0-9a-fA-F]{2}\\u[0-9a-fA-F]{4}|u[0-9a-fA-F]{4}|.|$)') + stri = rexp.sub(decodeEscape, s[i:e]) + return (e+1,stri) + def parseObj(i): + i += 1 + res = {} + i = skipSpace(i) + if s[i] == '}': # Empty dictionary + return (i+1,res) + while True: + if s[i] != '"': + raiseError('Expected a string object key', i) + i,key = parseString(i) + i = skipSpace(i) + if i >= len(s) or s[i] != ':': + raiseError('Expected a colon', i) + i,val = parse(i+1) + res[key] = val + i = skipSpace(i) + if s[i] == '}': + return (i+1, res) + if s[i] != ',': + raiseError('Expected comma or closing curly brace', i) + i = skipSpace(i+1) + def parseArray(i): + res = [] + i = skipSpace(i+1) + if s[i] == ']': # Empty array + return (i+1,res) + while True: + i,val = parse(i) + res.append(val) + i = skipSpace(i) # Raise exception if premature end + if s[i] == ']': + return (i+1, res) + if s[i] != ',': + raiseError('Expected a comma or closing bracket', i) + i = skipSpace(i+1) + def parseDiscrete(i): + for k,v in {'true': True, 'false': False, 'null': None}.items(): + if s.startswith(k, i): + return (i+len(k), v) + raiseError('Not a boolean (or null)', i) + def parseNumber(i): + mobj = re.match('^(-?(0|[1-9][0-9]*)(\.[0-9]*)?([eE][+-]?[0-9]+)?)', s[i:]) + if mobj is None: + raiseError('Not a number', i) + nums = mobj.group(1) + if '.' in nums or 'e' in nums or 'E' in nums: + return (i+len(nums), float(nums)) + return (i+len(nums), int(nums)) + CHARMAP = {'{': parseObj, '[': parseArray, '"': parseString, 't': parseDiscrete, 'f': parseDiscrete, 'n': parseDiscrete} + def parse(i): + i = skipSpace(i) + i,res = CHARMAP.get(s[i], parseNumber)(i) + i = skipSpace(i, False) + return (i,res) + i,res = parse(0) + if i < len(s): + raise ValueError('Extra data at end of input (index ' + str(i) + ' of ' + repr(s) + ': ' + repr(s[i:]) + ')') + return res + def preferredencoding(): """Get preferred encoding. @@ -2831,7 +2938,7 @@ if __name__ == '__main__': # Parse command line parser = optparse.OptionParser( usage='Usage: %prog [options] url...', - version='2011.03.29', + version='2011.07.09-phihag', conflict_handler='resolve', ) From 6eb08fbf8b02452015ad1e09b4c5e8c1d5aa2bfe Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Sun, 10 Jul 2011 21:39:36 +0200 Subject: [PATCH 351/455] + --write-info-json --- youtube-dl | 31 ++++++++++++++++++++++++++++--- 1 file changed, 28 insertions(+), 3 deletions(-) diff --git a/youtube-dl b/youtube-dl index 64c1f30cd..578f473b8 100755 --- a/youtube-dl +++ b/youtube-dl @@ -413,6 +413,7 @@ class FileDownloader(object): nopart: Do not use temporary .part files. updatetime: Use the Last-modified header to set output file timestamps. writedescription: Write the video description to a .description file + writeinfojson: Write the video description to a .info.json file """ params = None @@ -609,8 +610,12 @@ class FileDownloader(object): pass def report_writedescription(self, descfn): - """ Report that the description file has been written """ - self.to_screen(u'[info] Video description written to: %s' % descfn, ignore_encoding_errors=True) + """ Report that the description file is being written """ + self.to_screen(u'[info] Writing video description to: %s' % descfn, ignore_encoding_errors=True) + + def report_writeinfojson(self, infofn): + """ Report that the metadata file has been written """ + self.to_screen(u'[info] Video description metadata as JSON to: %s' % infofn, ignore_encoding_errors=True) def report_destination(self, filename): """Report destination filename.""" @@ -701,13 +706,29 @@ class FileDownloader(object): if self.params.get('writedescription', False): try: descfn = filename + '.description' + self.report_writedescription(descfn) with contextlib.closing(open(descfn, 'wb')) as descfile: descfile.write(info_dict['description'].encode('utf-8')) - self.report_writedescription(descfn) except (OSError, IOError): self.trouble(u'ERROR: Cannot write description file: %s' % str(descfn)) return + print(repr(self.params)) + if self.params.get('writeinfojson', False): + infofn = filename + '.info.json' + self.report_writeinfojson(infofn) + try: + json.dump + except (NameError,AttributeError): + self.trouble(u'ERROR: No JSON encoder found. Update to Python 2.6+, setup a json module, or leave out --write-info-json.') + return + try: + with contextlib.closing(open(infofn, 'wb')) as infof: + json.dump(info_dict, infof) + except (OSError, IOError): + self.trouble(u'ERROR: Cannot write metadata to JSON file: %s' % str(infofn)) + return + try: success = self._do_download(filename, info_dict['url'].encode('utf-8'), info_dict.get('player_url', None)) except (OSError, IOError), err: @@ -3031,6 +3052,9 @@ if __name__ == '__main__': filesystem.add_option('--write-description', action='store_true', dest='writedescription', help='write video description to a .description file', default=False) + filesystem.add_option('--write-info-json', + action='store_true', dest='writeinfojson', + help='write video metadata to a .info.json file', default=False) parser.add_option_group(filesystem) postproc = optparse.OptionGroup(parser, 'Post-processing Options') @@ -3169,6 +3193,7 @@ if __name__ == '__main__': 'nopart': opts.nopart, 'updatetime': opts.updatetime, 'writedescription': opts.writedescription, + 'writeinfojson': opts.writeinfojson, }) fd.add_info_extractor(youtube_search_ie) fd.add_info_extractor(youtube_pl_ie) From 5623100e43b12413790c4017eafb3412e29e517f Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Sun, 10 Jul 2011 23:41:19 +0200 Subject: [PATCH 352/455] remove debugging code --- youtube-dl | 1 - 1 file changed, 1 deletion(-) diff --git a/youtube-dl b/youtube-dl index 578f473b8..762bfc3ef 100755 --- a/youtube-dl +++ b/youtube-dl @@ -713,7 +713,6 @@ class FileDownloader(object): self.trouble(u'ERROR: Cannot write description file: %s' % str(descfn)) return - print(repr(self.params)) if self.params.get('writeinfojson', False): infofn = filename + '.info.json' self.report_writeinfojson(infofn) From 91e6a3855b4d76cefe7415ea9a1bafd98ad6bece Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Mon, 18 Jul 2011 19:43:21 +0200 Subject: [PATCH 353/455] Be lenient about download URLs (Closes #108) --- youtube-dl | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/youtube-dl b/youtube-dl index 762bfc3ef..07904604c 100755 --- a/youtube-dl +++ b/youtube-dl @@ -62,7 +62,7 @@ simple_title_chars = string.ascii_letters.decode('ascii') + string.digits.decode try: import json -except ImportError: # Python <2.5, use trivialjson (https://github.com/phihag/trivialjson): +except ImportError: # Python <2.6, use trivialjson (https://github.com/phihag/trivialjson): import re class json(object): @staticmethod @@ -1241,6 +1241,7 @@ class YoutubeIE(InfoExtractor): html_parser = lxml.etree.HTMLParser(encoding='utf-8') vwebpage_doc = lxml.etree.parse(StringIO.StringIO(video_webpage), html_parser) video_description = u''.join(vwebpage_doc.xpath('id("eow-description")//text()')) + # TODO use another parser # token video_token = urllib.unquote_plus(video_info['token'][0]) @@ -1248,7 +1249,7 @@ class YoutubeIE(InfoExtractor): # Decide which formats to download req_format = self._downloader.params.get('format', None) - if 'fmt_url_map' in video_info and len(video_info['fmt_url_map']) >= 1 and ',' in video_info['fmt_url_map'][0]: + if 'fmt_url_map' in video_info and len(video_info['fmt_url_map']) >= 1: url_map = dict(tuple(pair.split('|')) for pair in video_info['fmt_url_map'][0].split(',')) format_limit = self._downloader.params.get('format_limit', None) if format_limit is not None and format_limit in self._available_formats: From 47b8dab29e8930a752533e74a87befd52ca4a2a8 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Fri, 22 Jul 2011 15:28:42 +0200 Subject: [PATCH 354/455] Removed inaccurate warning --- youtube-dl | 1 - 1 file changed, 1 deletion(-) diff --git a/youtube-dl b/youtube-dl index 07904604c..17a2da2ef 100755 --- a/youtube-dl +++ b/youtube-dl @@ -1233,7 +1233,6 @@ class YoutubeIE(InfoExtractor): except NameError: video_description = u'No description available.' if self._downloader.params.get('forcedescription', False) or self._downloader.params.get('writedescription', False): - warnings.warn(u'You are using an old Python version, install Python 2.6+ or lxml. Falling back to old video description extractor.') mobj = re.search(r'', video_webpage) if mobj is not None: video_description = mobj.group(1).decode('utf-8') From c31b124d7a2c754f3ca5c6f8de8c501cfbad895a Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Sun, 31 Jul 2011 18:09:53 +0200 Subject: [PATCH 355/455] Suppport for youtube video streams (Mentioned in #108) --- youtube-dl | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/youtube-dl b/youtube-dl index 17a2da2ef..27ae816e0 100755 --- a/youtube-dl +++ b/youtube-dl @@ -1248,8 +1248,14 @@ class YoutubeIE(InfoExtractor): # Decide which formats to download req_format = self._downloader.params.get('format', None) + raw_map = None if 'fmt_url_map' in video_info and len(video_info['fmt_url_map']) >= 1: - url_map = dict(tuple(pair.split('|')) for pair in video_info['fmt_url_map'][0].split(',')) + raw_map = video_info['fmt_url_map'][0] + elif 'fmt_stream_map' in video_info and len(video_info['fmt_stream_map']) >= 1: + raw_map = video_info['fmt_stream_map'][0] + + if raw_map is not None: + url_map = dict(tuple(pair.split('|')[:2]) for pair in raw_map.split(',')) format_limit = self._downloader.params.get('format_limit', None) if format_limit is not None and format_limit in self._available_formats: format_list = self._available_formats[self._available_formats.index(format_limit):] From 0ac22e4f5a652f1b470f9daff06c1361e8f93c16 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Thu, 4 Aug 2011 00:04:55 +0200 Subject: [PATCH 356/455] Fix youtube downloads (Closes #135) --- youtube-dl | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/youtube-dl b/youtube-dl index 27ae816e0..0d77585a8 100755 --- a/youtube-dl +++ b/youtube-dl @@ -1248,14 +1248,11 @@ class YoutubeIE(InfoExtractor): # Decide which formats to download req_format = self._downloader.params.get('format', None) - raw_map = None - if 'fmt_url_map' in video_info and len(video_info['fmt_url_map']) >= 1: - raw_map = video_info['fmt_url_map'][0] - elif 'fmt_stream_map' in video_info and len(video_info['fmt_stream_map']) >= 1: - raw_map = video_info['fmt_stream_map'][0] - - if raw_map is not None: - url_map = dict(tuple(pair.split('|')[:2]) for pair in raw_map.split(',')) + if 'url_encoded_fmt_stream_map' in video_info and len(video_info['url_encoded_fmt_stream_map']) >= 1: + url_data_strs = video_info['url_encoded_fmt_stream_map'][0].split(',') + url_data = [dict(pairStr.split('=') for pairStr in uds.split('&')) for uds in url_data_strs] + url_map = dict((ud['itag'], urllib.unquote(ud['url'])) for ud in url_data) + format_limit = self._downloader.params.get('format_limit', None) if format_limit is not None and format_limit in self._available_formats: format_list = self._available_formats[self._available_formats.index(format_limit):] From 8126094cf14dd35c00b59a0435ac34fc0ee9ef57 Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Thu, 4 Aug 2011 19:13:02 +0200 Subject: [PATCH 357/455] Fix YouTube downloads (code by Philipp Hagemeister) --- youtube-dl | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/youtube-dl b/youtube-dl index 3ac27a857..d9307216b 100755 --- a/youtube-dl +++ b/youtube-dl @@ -1079,8 +1079,10 @@ class YoutubeIE(InfoExtractor): # Decide which formats to download req_format = self._downloader.params.get('format', None) - if 'fmt_url_map' in video_info and len(video_info['fmt_url_map']) >= 1 and ',' in video_info['fmt_url_map'][0]: - url_map = dict(tuple(pair.split('|')) for pair in video_info['fmt_url_map'][0].split(',')) + if 'url_encoded_fmt_stream_map' in video_info and len(video_info['url_encoded_fmt_stream_map']) >= 1: + url_data_strs = video_info['url_encoded_fmt_stream_map'][0].split(',') + url_data = [dict(pairStr.split('=') for pairStr in uds.split('&')) for uds in url_data_strs] + url_map = dict((ud['itag'], urllib.unquote(ud['url'])) for ud in url_data) format_limit = self._downloader.params.get('format_limit', None) if format_limit is not None and format_limit in self._available_formats: format_list = self._available_formats[self._available_formats.index(format_limit):] From c44b9ee95e577ebe3a9ba6d69b8c5cef0d445acc Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Thu, 4 Aug 2011 19:14:19 +0200 Subject: [PATCH 358/455] Update User-Agent string --- youtube-dl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube-dl b/youtube-dl index d9307216b..892af57cf 100755 --- a/youtube-dl +++ b/youtube-dl @@ -38,7 +38,7 @@ except ImportError: from cgi import parse_qs std_headers = { - 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:2.0b11) Gecko/20100101 Firefox/4.0b11', + 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:5.0.1) Gecko/20100101 Firefox/5.0.1', 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 'Accept-Encoding': 'gzip, deflate', From 33d507f1fe828b186dec9b61ff4fc6b5fdcf42b2 Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Thu, 4 Aug 2011 19:15:14 +0200 Subject: [PATCH 359/455] Bump version number --- LATEST_VERSION | 2 +- youtube-dl | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/LATEST_VERSION b/LATEST_VERSION index 295c9c4fa..8a3bb049a 100644 --- a/LATEST_VERSION +++ b/LATEST_VERSION @@ -1 +1 @@ -2011.03.29 +2011.08.04 diff --git a/youtube-dl b/youtube-dl index 892af57cf..e8b19c8d0 100755 --- a/youtube-dl +++ b/youtube-dl @@ -2725,7 +2725,7 @@ if __name__ == '__main__': # Parse command line parser = optparse.OptionParser( usage='Usage: %prog [options] url...', - version='2011.03.29', + version='2011.08.04', conflict_handler='resolve', ) From 6bcd846b5212b41877b80358125b2210432e080d Mon Sep 17 00:00:00 2001 From: Daniel Bolton Date: Fri, 5 Aug 2011 19:14:13 -0400 Subject: [PATCH 360/455] Add README.md (markdown file) --- README.md | 88 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 88 insertions(+) create mode 100644 README.md diff --git a/README.md b/README.md new file mode 100644 index 000000000..7fc1267c2 --- /dev/null +++ b/README.md @@ -0,0 +1,88 @@ +# youtube-dl + +## USAGE +*** +youtube-dl [OPTIONS] URL + +## DESCRIPTION +*** +**youtube-dl** is a small command-line program to download videos from +YouTube.com and a few more sites. It requires the Python interpreter, version +2.x (x being at least 5), and it is not platform specific. It should work in +your Unix box, in Windows or in Mac OS X. It is released to the public domain, +which means you can modify it, redistribute it or use it however you like. + +## OPTIONS +*** + -h, --help print this help text and exit + -v, --version print program version and exit + -U, --update update this program to latest stable version + -i, --ignore-errors continue on download errors + -r LIMIT, --rate-limit=LIMIT + download rate limit (e.g. 50k or 44.6m) + -R RETRIES, --retries=RETRIES + number of retries (default is 10) + --playlist-start=NUMBER + playlist video to start at (default is 1) + --playlist-end=NUMBER + playlist video to end at (default is last) + --dump-user-agent display the current browser identification + +### Authentication Options + -u USERNAME, --username=USERNAME + account username + -p PASSWORD, --password=PASSWORD + account password + -n, --netrc use .netrc authentication data + +### Video Format Options + -f FORMAT, --format=FORMAT + video format code + --all-formats download all available video formats + --max-quality=FORMAT + highest quality format to download + +### Verbosity / Simulation Options + -q, --quiet activates quiet mode + -s, --simulate do not download video + -g, --get-url simulate, quiet but print URL + -e, --get-title simulate, quiet but print title + --get-thumbnail simulate, quiet but print thumbnail URL + --get-description simulate, quiet but print video description + --get-filename simulate, quiet but print output filename + --no-progress do not print progress bar + --console-title display progress in console titlebar + +### Filesystem Options + -t, --title use title in file name + -l, --literal use literal title in file name + -A, --auto-number number downloaded files starting from 00000 + -o TEMPLATE, --output=TEMPLATE + output filename template + -a FILE, --batch-file=FILE + file containing URLs to download ('-' for stdin) + -w, --no-overwrites + do not overwrite files + -c, --continue resume partially downloaded files + --cookies=FILE file to dump cookie jar to + --no-part do not use .part files + --no-mtime do not use the Last-modified header to set the file + modification time + +### Post-processing Options: + --extract-audio convert video files to audio-only files (requires + ffmpeg and ffprobe) + --audio-format=FORMAT + "best", "aac" or "mp3"; best by default + +## COPYRIGHT +*** +**youtube-dl**: Copyright © 2006-2011 Ricardo Garcia Gonzalez. The program is +released into the public domain by the copyright holder. This README file was +originally written by Daniel Bolton () and is +likewise released into the public domain. + +## BUGS +*** +Bugs should be reported at: + From 2bf94b311659afe0e3aeb7149edcbb192f8093e7 Mon Sep 17 00:00:00 2001 From: Daniel Bolton Date: Fri, 5 Aug 2011 19:15:57 -0400 Subject: [PATCH 361/455] Remove horizontal rules from README.md --- README.md | 5 ----- 1 file changed, 5 deletions(-) diff --git a/README.md b/README.md index 7fc1267c2..cf38e31fc 100644 --- a/README.md +++ b/README.md @@ -1,11 +1,9 @@ # youtube-dl ## USAGE -*** youtube-dl [OPTIONS] URL ## DESCRIPTION -*** **youtube-dl** is a small command-line program to download videos from YouTube.com and a few more sites. It requires the Python interpreter, version 2.x (x being at least 5), and it is not platform specific. It should work in @@ -13,7 +11,6 @@ your Unix box, in Windows or in Mac OS X. It is released to the public domain, which means you can modify it, redistribute it or use it however you like. ## OPTIONS -*** -h, --help print this help text and exit -v, --version print program version and exit -U, --update update this program to latest stable version @@ -76,13 +73,11 @@ which means you can modify it, redistribute it or use it however you like. "best", "aac" or "mp3"; best by default ## COPYRIGHT -*** **youtube-dl**: Copyright © 2006-2011 Ricardo Garcia Gonzalez. The program is released into the public domain by the copyright holder. This README file was originally written by Daniel Bolton () and is likewise released into the public domain. ## BUGS -*** Bugs should be reported at: From f137bef973729bf1da0f8dfe244d5ff24cb2ad23 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Sat, 6 Aug 2011 11:05:57 +0200 Subject: [PATCH 362/455] Fix RTMP streams and ignore url-less entries --- youtube-dl | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/youtube-dl b/youtube-dl index 0d77585a8..76d41c448 100755 --- a/youtube-dl +++ b/youtube-dl @@ -1248,9 +1248,14 @@ class YoutubeIE(InfoExtractor): # Decide which formats to download req_format = self._downloader.params.get('format', None) - if 'url_encoded_fmt_stream_map' in video_info and len(video_info['url_encoded_fmt_stream_map']) >= 1: + if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'): + self.report_rtmp_download() + video_url_list = [(None, video_info['conn'][0])] + print(repr(video_info['conn'][0])) + elif 'url_encoded_fmt_stream_map' in video_info and len(video_info['url_encoded_fmt_stream_map']) >= 1: url_data_strs = video_info['url_encoded_fmt_stream_map'][0].split(',') url_data = [dict(pairStr.split('=') for pairStr in uds.split('&')) for uds in url_data_strs] + url_data = filter(lambda ud: 'itag' in ud and 'url' in ud, url_data) url_map = dict((ud['itag'], urllib.unquote(ud['url'])) for ud in url_data) format_limit = self._downloader.params.get('format_limit', None) @@ -1272,11 +1277,6 @@ class YoutubeIE(InfoExtractor): self._downloader.trouble(u'ERROR: requested format not available') return video_url_list = [(req_format, url_map[req_format])] # Specific format - - elif 'conn' in video_info and video_info['conn'][0].startswith('rtmp'): - self.report_rtmp_download() - video_url_list = [(None, video_info['conn'][0])] - else: self._downloader.trouble(u'ERROR: no fmt_url_map or conn information found in video info') return From 134cff47abe76b180d9a6cd9e9afc364eb52f332 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Sat, 6 Aug 2011 11:20:28 +0200 Subject: [PATCH 363/455] Remove debugging information --- youtube-dl | 1 - 1 file changed, 1 deletion(-) diff --git a/youtube-dl b/youtube-dl index 76d41c448..3b06908c9 100755 --- a/youtube-dl +++ b/youtube-dl @@ -1251,7 +1251,6 @@ class YoutubeIE(InfoExtractor): if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'): self.report_rtmp_download() video_url_list = [(None, video_info['conn'][0])] - print(repr(video_info['conn'][0])) elif 'url_encoded_fmt_stream_map' in video_info and len(video_info['url_encoded_fmt_stream_map']) >= 1: url_data_strs = video_info['url_encoded_fmt_stream_map'][0].split(',') url_data = [dict(pairStr.split('=') for pairStr in uds.split('&')) for uds in url_data_strs] From 0a3c8b6291bb9750115f5188c8500e624c5ab449 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Sat, 6 Aug 2011 11:47:53 +0200 Subject: [PATCH 364/455] Use alternative imports for Python 2.4 (Closes #138) --- youtube-dl | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/youtube-dl b/youtube-dl index 3b06908c9..9f391db0d 100755 --- a/youtube-dl +++ b/youtube-dl @@ -12,9 +12,7 @@ from __future__ import with_statement import contextlib import cookielib -import ctypes import datetime -import email.utils import gzip import htmlentitydefs import httplib @@ -34,6 +32,13 @@ import urllib2 import warnings import zlib +if os.name == 'nt': + import ctypes + +try: + import email.utils +except ImportError: # Python 2.4 + import email.Utils try: import cStringIO as StringIO except ImportError: From 1293ce58acc898cf8b423c93b45f227c26ee9f96 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Sat, 6 Aug 2011 12:16:07 +0200 Subject: [PATCH 365/455] Fix Python 2.4 compatibility --- youtube-dl | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/youtube-dl b/youtube-dl index 9f391db0d..81dd4b83b 100755 --- a/youtube-dl +++ b/youtube-dl @@ -9,8 +9,6 @@ # Author: Gergely Imreh # Author: Philipp Hagemeister # License: Public domain code -from __future__ import with_statement -import contextlib import cookielib import datetime import gzip @@ -712,8 +710,11 @@ class FileDownloader(object): try: descfn = filename + '.description' self.report_writedescription(descfn) - with contextlib.closing(open(descfn, 'wb')) as descfile: + descfile = open(descfn, 'wb') + try: descfile.write(info_dict['description'].encode('utf-8')) + finally: + descfile.close() except (OSError, IOError): self.trouble(u'ERROR: Cannot write description file: %s' % str(descfn)) return @@ -727,8 +728,11 @@ class FileDownloader(object): self.trouble(u'ERROR: No JSON encoder found. Update to Python 2.6+, setup a json module, or leave out --write-info-json.') return try: - with contextlib.closing(open(infofn, 'wb')) as infof: + infof = open(infofn, 'wb') + try: json.dump(info_dict, infof) + finally: + infof.close() except (OSError, IOError): self.trouble(u'ERROR: Cannot write metadata to JSON file: %s' % str(infofn)) return @@ -2761,7 +2765,11 @@ class BlipTVIE(InfoExtractor): self._downloader.trouble(u'ERROR: invalid URL: %s' % url) return - json_url = url + ('&' if '?' in url else '?') + 'skin=json&version=2&no_wrap=1' + if '?' in url: + cchar = '&' + else: + cchar = '?' + json_url = url + cchar + 'skin=json&version=2&no_wrap=1' request = urllib2.Request(json_url) self.report_extraction(mobj.group(1)) try: @@ -2771,7 +2779,10 @@ class BlipTVIE(InfoExtractor): return try: json_data = json.loads(json_code) - data = json_data['Post'] if 'Post' in json_data else json_data + if 'Post' in json_data: + data = json_data['Post'] + else: + data = json_data upload_date = datetime.datetime.strptime(data['datestamp'], '%m-%d-%y %H:%M%p').strftime('%Y%m%d') video_url = data['media']['url'] From f3dc18d874eb4fd5f6f9099f1e1a13bf8548c1ff Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Sun, 7 Aug 2011 00:02:50 +0200 Subject: [PATCH 366/455] youtube: Better error messages --- youtube-dl | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/youtube-dl b/youtube-dl index 81dd4b83b..c334dee5e 100755 --- a/youtube-dl +++ b/youtube-dl @@ -1286,7 +1286,7 @@ class YoutubeIE(InfoExtractor): return video_url_list = [(req_format, url_map[req_format])] # Specific format else: - self._downloader.trouble(u'ERROR: no fmt_url_map or conn information found in video info') + self._downloader.trouble(u'ERROR: no conn or url_encoded_fmt_stream_map information found in video info') return for format_param, video_real_url in video_url_list: @@ -1296,7 +1296,6 @@ class YoutubeIE(InfoExtractor): # Extension video_extension = self._video_extensions.get(format_param, 'flv') - # Find the video URL in fmt_url_map or conn paramters try: # Process video information self._downloader.process_info({ From 8519c32d25e67efc77e74440f42beac6af7d1204 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Sun, 7 Aug 2011 00:29:25 +0200 Subject: [PATCH 367/455] Use parse_qs instead of homebrewn parsing --- youtube-dl | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/youtube-dl b/youtube-dl index c334dee5e..a809759ab 100755 --- a/youtube-dl +++ b/youtube-dl @@ -1262,9 +1262,9 @@ class YoutubeIE(InfoExtractor): video_url_list = [(None, video_info['conn'][0])] elif 'url_encoded_fmt_stream_map' in video_info and len(video_info['url_encoded_fmt_stream_map']) >= 1: url_data_strs = video_info['url_encoded_fmt_stream_map'][0].split(',') - url_data = [dict(pairStr.split('=') for pairStr in uds.split('&')) for uds in url_data_strs] + url_data = [parse_qs(uds) for uds in url_data_strs] url_data = filter(lambda ud: 'itag' in ud and 'url' in ud, url_data) - url_map = dict((ud['itag'], urllib.unquote(ud['url'])) for ud in url_data) + url_map = dict((ud['itag'][0], ud['url'][0]) for ud in url_data) format_limit = self._downloader.params.get('format_limit', None) if format_limit is not None and format_limit in self._available_formats: @@ -2720,7 +2720,6 @@ class FacebookIE(InfoExtractor): # Extension video_extension = self._video_extensions.get(format_param, 'mp4') - # Find the video URL in fmt_url_map or conn paramters try: # Process video information self._downloader.process_info({ From 86e709d3dee1c0ac6f21edbd11ba92c026bef7bb Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Sun, 7 Aug 2011 13:01:09 +0200 Subject: [PATCH 368/455] Fix youtu.be links (Closes #142) --- youtube-dl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube-dl b/youtube-dl index a809759ab..2a1908d3d 100755 --- a/youtube-dl +++ b/youtube-dl @@ -1028,7 +1028,7 @@ class InfoExtractor(object): class YoutubeIE(InfoExtractor): """Information extractor for youtube.com.""" - _VALID_URL = r'^((?:https?://)?(?:youtu\.be/|(?:\w+\.)?youtube(?:-nocookie)?\.com/)(?:(?:(?:v|embed|e)/)|(?:(?:watch(?:_popup)?(?:\.php)?)?(?:\?|#!?)(?:.+&)?v=)))?([0-9A-Za-z_-]+)(?(1).+)?$' + _VALID_URL = r'^((?:https?://)?(?:youtu\.be/|(?:\w+\.)?youtube(?:-nocookie)?\.com/)(?:(?:(?:v|embed|e)/)|(?:(?:watch(?:_popup)?(?:\.php)?)?(?:\?|#!?)(?:.+&)?v=))?)?([0-9A-Za-z_-]+)(?(1).+)?$' _LANG_URL = r'http://www.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1' _LOGIN_URL = 'https://www.youtube.com/signup?next=/&gl=US&hl=en' _AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en' From 1cab2c6dcf6fae416e08eea368f296b249b2c4bb Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Thu, 18 Aug 2011 09:31:36 +0200 Subject: [PATCH 369/455] Fix blip.tv regular expression to not match blipXtv --- youtube-dl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube-dl b/youtube-dl index 2a1908d3d..6a4209222 100755 --- a/youtube-dl +++ b/youtube-dl @@ -2741,7 +2741,7 @@ class FacebookIE(InfoExtractor): class BlipTVIE(InfoExtractor): """Information extractor for blip.tv""" - _VALID_URL = r'^(?:https?://)?(?:\w+\.)?blip.tv(/.+)$' + _VALID_URL = r'^(?:https?://)?(?:\w+\.)?blip\.tv(/.+)$' _URL_EXT = r'^.*\.([a-z0-9]+)$' @staticmethod From e9cb9c2811ab26ef54cf410b0edc52ae56a5158e Mon Sep 17 00:00:00 2001 From: Georgi Valkov Date: Tue, 23 Aug 2011 14:45:26 +0300 Subject: [PATCH 370/455] Add vim modeline --- youtube-dl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/youtube-dl b/youtube-dl index e8b19c8d0..63051bb33 100755 --- a/youtube-dl +++ b/youtube-dl @@ -3001,3 +3001,5 @@ if __name__ == '__main__': sys.exit(u'ERROR: fixed output name but more than one file to download') except KeyboardInterrupt: sys.exit(u'\nERROR: Interrupted by user') + +# vim: set ts=4 sw=4 sts=4 noet ai si filetype=python: From 2770590d5a4f2cac5eb11c0b9479e51cafbe0709 Mon Sep 17 00:00:00 2001 From: Georgi Valkov Date: Tue, 23 Aug 2011 14:58:22 +0300 Subject: [PATCH 371/455] Use module metadata variables instead of comments --- youtube-dl | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/youtube-dl b/youtube-dl index 63051bb33..69ff8d9f4 100755 --- a/youtube-dl +++ b/youtube-dl @@ -1,13 +1,18 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- -# Author: Ricardo Garcia Gonzalez -# Author: Danny Colligan -# Author: Benjamin Johnson -# Author: Vasyl' Vavrychuk -# Author: Witold Baryluk -# Author: Paweł Paprota -# Author: Gergely Imreh -# License: Public domain code + +__author__ = ( + "Ricardo Garcia Gonzalez", + "Danny Colligan", + "Benjamin Johnson", + "Vasyl' Vavrychuk", + "Witold Baryluk", + "Paweł Paprota", + "Gergely Imreh", + ) + +__license__ = "Public Domain" + import cookielib import ctypes import datetime From 7a9054ec79febd8befb65dada2899228f642d0a3 Mon Sep 17 00:00:00 2001 From: Georgi Valkov Date: Tue, 23 Aug 2011 15:01:51 +0300 Subject: [PATCH 372/455] Fix small indentation inconsistencies --- youtube-dl | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/youtube-dl b/youtube-dl index 69ff8d9f4..d64ec8134 100755 --- a/youtube-dl +++ b/youtube-dl @@ -711,7 +711,7 @@ class FileDownloader(object): else: # Examine the reported length if (content_length is not None and - (resume_len - 100 < long(content_length) < resume_len + 100)): + (resume_len - 100 < long(content_length) < resume_len + 100)): # The file had already been fully downloaded. # Explanation to the above condition: in issue #175 it was revealed that # YouTube sometimes adds or removes a few bytes from the end of the file, @@ -1690,8 +1690,8 @@ class YahooIE(InfoExtractor): yv_lg = 'R0xx6idZnW2zlrKP8xxAIR' # not sure what this represents yv_bitrate = '700' # according to Wikipedia this is hard-coded request = urllib2.Request('http://cosmos.bcst.yahoo.com/up/yep/process/getPlaylistFOP.php?node_id=' + video_id + - '&tech=flash&mode=playlist&lg=' + yv_lg + '&bitrate=' + yv_bitrate + '&vidH=' + yv_video_height + - '&vidW=' + yv_video_width + '&swf=as3&rd=video.yahoo.com&tk=null&adsupported=v1,v2,&eventid=1301797') + '&tech=flash&mode=playlist&lg=' + yv_lg + '&bitrate=' + yv_bitrate + '&vidH=' + yv_video_height + + '&vidW=' + yv_video_width + '&swf=as3&rd=video.yahoo.com&tk=null&adsupported=v1,v2,&eventid=1301797') try: self.report_download_webpage(video_id) webpage = urllib2.urlopen(request).read() @@ -2260,9 +2260,9 @@ class YoutubeUserIE(InfoExtractor): video_ids = video_ids[playliststart:] else: video_ids = video_ids[playliststart:playlistend] - + self._downloader.to_screen("[youtube] user %s: Collected %d video ids (downloading %d of them)" % - (username, all_ids_count, len(video_ids))) + (username, all_ids_count, len(video_ids))) for video_id in video_ids: self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % video_id) @@ -2515,7 +2515,7 @@ class FacebookIE(InfoExtractor): # description video_description = 'No description available.' if (self._downloader.params.get('forcedescription', False) and - 'description' in video_info): + 'description' in video_info): video_description = video_info['description'] url_map = video_info['video_urls'] From 5fb3df4aff7589a6a346578affd0810d079c89c1 Mon Sep 17 00:00:00 2001 From: Georgi Valkov Date: Tue, 23 Aug 2011 15:37:35 +0300 Subject: [PATCH 373/455] Move update_self out of __main__ for clarity --- youtube-dl | 51 ++++++++++++++++++++++++++++----------------------- 1 file changed, 28 insertions(+), 23 deletions(-) diff --git a/youtube-dl b/youtube-dl index d64ec8134..fe1e6b021 100755 --- a/youtube-dl +++ b/youtube-dl @@ -2698,34 +2698,39 @@ class FFmpegExtractAudioPP(PostProcessor): information['filepath'] = new_path return information -### MAIN PROGRAM ### + +def updateSelf(downloader, filename): + ''' Update the program file with the latest version from the repository ''' + # Note: downloader only used for options + if not os.access(filename, os.W_OK): + sys.exit('ERROR: no write permissions on %s' % filename) + + downloader.to_screen('Updating to latest stable version...') + + try: + latest_url = 'http://github.com/rg3/youtube-dl/raw/master/LATEST_VERSION' + latest_version = urllib.urlopen(latest_url).read().strip() + prog_url = 'http://github.com/rg3/youtube-dl/raw/%s/youtube-dl' % latest_version + newcontent = urllib.urlopen(prog_url).read() + except (IOError, OSError), err: + sys.exit('ERROR: unable to download latest version') + + try: + stream = open(filename, 'w') + stream.write(newcontent) + stream.close() + except (IOError, OSError), err: + sys.exit('ERROR: unable to overwrite current version') + + downloader.to_screen('Updated to version %s' % latest_version) + + if __name__ == '__main__': try: # Modules needed only when running the main program import getpass import optparse - # Function to update the program file with the latest version from the repository. - def update_self(downloader, filename): - # Note: downloader only used for options - if not os.access(filename, os.W_OK): - sys.exit('ERROR: no write permissions on %s' % filename) - - downloader.to_screen('Updating to latest stable version...') - try: - latest_url = 'http://github.com/rg3/youtube-dl/raw/master/LATEST_VERSION' - latest_version = urllib.urlopen(latest_url).read().strip() - prog_url = 'http://github.com/rg3/youtube-dl/raw/%s/youtube-dl' % latest_version - newcontent = urllib.urlopen(prog_url).read() - except (IOError, OSError), err: - sys.exit('ERROR: unable to download latest version') - try: - stream = open(filename, 'w') - stream.write(newcontent) - stream.close() - except (IOError, OSError), err: - sys.exit('ERROR: unable to overwrite current version') - downloader.to_screen('Updated to version %s' % latest_version) # Parse command line parser = optparse.OptionParser( @@ -2981,7 +2986,7 @@ if __name__ == '__main__': # Update version if opts.update_self: - update_self(fd, sys.argv[0]) + updateSelf(fd, sys.argv[0]) # Maybe do nothing if len(all_urls) < 1: From 4f9f96f646cb8df69f77b04bf6af9f4a50aa0b47 Mon Sep 17 00:00:00 2001 From: Georgi Valkov Date: Tue, 23 Aug 2011 15:53:36 +0300 Subject: [PATCH 374/455] Option parsing refactoring ; Moved version string to __version__ Brings terser option formatting to youtube-dl: from: -u USERNAME, --username USERNAME to: -u, --username USERNAME --- youtube-dl | 250 ++++++++++++++++++++++++++++++----------------------- 1 file changed, 142 insertions(+), 108 deletions(-) diff --git a/youtube-dl b/youtube-dl index fe1e6b021..d093e2558 100755 --- a/youtube-dl +++ b/youtube-dl @@ -12,6 +12,7 @@ __author__ = ( ) __license__ = "Public Domain" +__version__ = '2011.08.04' import cookielib import ctypes @@ -2725,116 +2726,149 @@ def updateSelf(downloader, filename): downloader.to_screen('Updated to version %s' % latest_version) +def parseOpts(): + # Deferred imports + import getpass + import optparse + + def _format_option_string(option): + ''' ('-o', '--option') -> -o, --format METAVAR''' + + opts = [] + + if option._short_opts: opts.append(option._short_opts[0]) + if option._long_opts: opts.append(option._long_opts[0]) + if len(opts) > 1: opts.insert(1, ', ') + + if option.takes_value(): opts.append(' %s' % option.metavar) + + return "".join(opts) + + fmt = optparse.IndentedHelpFormatter() + fmt.format_option_strings = _format_option_string + + kw = { + 'version' : __version__, + 'formatter' : fmt, + 'usage' : 'Usage : %prog [options] url...', + 'conflict_handler' : 'resolve', + } + + parser = optparse.OptionParser(**kw) + + # option groups + general = optparse.OptionGroup(parser, 'General Options') + authentication = optparse.OptionGroup(parser, 'Authentication Options') + video_format = optparse.OptionGroup(parser, 'Video Format Options') + postproc = optparse.OptionGroup(parser, 'Post-processing Options') + filesystem = optparse.OptionGroup(parser, 'Filesystem Options') + verbosity = optparse.OptionGroup(parser, 'Verbosity / Simulation Options') + + general.add_option('-h', '--help', + action='help', help='print this help text and exit') + general.add_option('-v', '--version', + action='version', help='print program version and exit') + general.add_option('-U', '--update', + action='store_true', dest='update_self', help='update this program to latest stable version') + general.add_option('-i', '--ignore-errors', + action='store_true', dest='ignoreerrors', help='continue on download errors', default=False) + general.add_option('-r', '--rate-limit', + dest='ratelimit', metavar='LIMIT', help='download rate limit (e.g. 50k or 44.6m)') + general.add_option('-R', '--retries', + dest='retries', metavar='RETRIES', help='number of retries (default is 10)', default=10) + general.add_option('--playlist-start', + dest='playliststart', metavar='NUMBER', help='playlist video to start at (default is 1)', default=1) + general.add_option('--playlist-end', + dest='playlistend', metavar='NUMBER', help='playlist video to end at (default is last)', default=-1) + general.add_option('--dump-user-agent', + action='store_true', dest='dump_user_agent', + help='display the current browser identification', default=False) + + authentication.add_option('-u', '--username', + dest='username', metavar='USERNAME', help='account username') + authentication.add_option('-p', '--password', + dest='password', metavar='PASSWORD', help='account password') + authentication.add_option('-n', '--netrc', + action='store_true', dest='usenetrc', help='use .netrc authentication data', default=False) + + + video_format.add_option('-f', '--format', + action='store', dest='format', metavar='FORMAT', help='video format code') + video_format.add_option('--all-formats', + action='store_const', dest='format', help='download all available video formats', const='-1') + video_format.add_option('--max-quality', + action='store', dest='format_limit', metavar='FORMAT', help='highest quality format to download') + + + verbosity.add_option('-q', '--quiet', + action='store_true', dest='quiet', help='activates quiet mode', default=False) + verbosity.add_option('-s', '--simulate', + action='store_true', dest='simulate', help='do not download video', default=False) + verbosity.add_option('-g', '--get-url', + action='store_true', dest='geturl', help='simulate, quiet but print URL', default=False) + verbosity.add_option('-e', '--get-title', + action='store_true', dest='gettitle', help='simulate, quiet but print title', default=False) + verbosity.add_option('--get-thumbnail', + action='store_true', dest='getthumbnail', + help='simulate, quiet but print thumbnail URL', default=False) + verbosity.add_option('--get-description', + action='store_true', dest='getdescription', + help='simulate, quiet but print video description', default=False) + verbosity.add_option('--get-filename', + action='store_true', dest='getfilename', + help='simulate, quiet but print output filename', default=False) + verbosity.add_option('--no-progress', + action='store_true', dest='noprogress', help='do not print progress bar', default=False) + verbosity.add_option('--console-title', + action='store_true', dest='consoletitle', + help='display progress in console titlebar', default=False) + + + filesystem.add_option('-t', '--title', + action='store_true', dest='usetitle', help='use title in file name', default=False) + filesystem.add_option('-l', '--literal', + action='store_true', dest='useliteral', help='use literal title in file name', default=False) + filesystem.add_option('-A', '--auto-number', + action='store_true', dest='autonumber', + help='number downloaded files starting from 00000', default=False) + filesystem.add_option('-o', '--output', + dest='outtmpl', metavar='TEMPLATE', help='output filename template') + filesystem.add_option('-a', '--batch-file', + dest='batchfile', metavar='FILE', help='file containing URLs to download (\'-\' for stdin)') + filesystem.add_option('-w', '--no-overwrites', + action='store_true', dest='nooverwrites', help='do not overwrite files', default=False) + filesystem.add_option('-c', '--continue', + action='store_true', dest='continue_dl', help='resume partially downloaded files', default=False) + filesystem.add_option('--cookies', + dest='cookiefile', metavar='FILE', help='file to dump cookie jar to') + filesystem.add_option('--no-part', + action='store_true', dest='nopart', help='do not use .part files', default=False) + filesystem.add_option('--no-mtime', + action='store_false', dest='updatetime', + help='do not use the Last-modified header to set the file modification time', default=True) + + + postproc.add_option('--extract-audio', action='store_true', dest='extractaudio', default=False, + help='convert video files to audio-only files (requires ffmpeg and ffprobe)') + postproc.add_option('--audio-format', metavar='FORMAT', dest='audioformat', default='best', + help='"best", "aac" or "mp3"; best by default') + + + parser.add_option_group(general) + parser.add_option_group(filesystem) + parser.add_option_group(verbosity) + parser.add_option_group(video_format) + parser.add_option_group(authentication) + parser.add_option_group(postproc) + + opts, args = parser.parse_args() + + return parser, opts, args + + if __name__ == '__main__': try: - # Modules needed only when running the main program - import getpass - import optparse - - - # Parse command line - parser = optparse.OptionParser( - usage='Usage: %prog [options] url...', - version='2011.08.04', - conflict_handler='resolve', - ) - - parser.add_option('-h', '--help', - action='help', help='print this help text and exit') - parser.add_option('-v', '--version', - action='version', help='print program version and exit') - parser.add_option('-U', '--update', - action='store_true', dest='update_self', help='update this program to latest stable version') - parser.add_option('-i', '--ignore-errors', - action='store_true', dest='ignoreerrors', help='continue on download errors', default=False) - parser.add_option('-r', '--rate-limit', - dest='ratelimit', metavar='LIMIT', help='download rate limit (e.g. 50k or 44.6m)') - parser.add_option('-R', '--retries', - dest='retries', metavar='RETRIES', help='number of retries (default is 10)', default=10) - parser.add_option('--playlist-start', - dest='playliststart', metavar='NUMBER', help='playlist video to start at (default is 1)', default=1) - parser.add_option('--playlist-end', - dest='playlistend', metavar='NUMBER', help='playlist video to end at (default is last)', default=-1) - parser.add_option('--dump-user-agent', - action='store_true', dest='dump_user_agent', - help='display the current browser identification', default=False) - - authentication = optparse.OptionGroup(parser, 'Authentication Options') - authentication.add_option('-u', '--username', - dest='username', metavar='USERNAME', help='account username') - authentication.add_option('-p', '--password', - dest='password', metavar='PASSWORD', help='account password') - authentication.add_option('-n', '--netrc', - action='store_true', dest='usenetrc', help='use .netrc authentication data', default=False) - parser.add_option_group(authentication) - - video_format = optparse.OptionGroup(parser, 'Video Format Options') - video_format.add_option('-f', '--format', - action='store', dest='format', metavar='FORMAT', help='video format code') - video_format.add_option('--all-formats', - action='store_const', dest='format', help='download all available video formats', const='-1') - video_format.add_option('--max-quality', - action='store', dest='format_limit', metavar='FORMAT', help='highest quality format to download') - parser.add_option_group(video_format) - - verbosity = optparse.OptionGroup(parser, 'Verbosity / Simulation Options') - verbosity.add_option('-q', '--quiet', - action='store_true', dest='quiet', help='activates quiet mode', default=False) - verbosity.add_option('-s', '--simulate', - action='store_true', dest='simulate', help='do not download video', default=False) - verbosity.add_option('-g', '--get-url', - action='store_true', dest='geturl', help='simulate, quiet but print URL', default=False) - verbosity.add_option('-e', '--get-title', - action='store_true', dest='gettitle', help='simulate, quiet but print title', default=False) - verbosity.add_option('--get-thumbnail', - action='store_true', dest='getthumbnail', - help='simulate, quiet but print thumbnail URL', default=False) - verbosity.add_option('--get-description', - action='store_true', dest='getdescription', - help='simulate, quiet but print video description', default=False) - verbosity.add_option('--get-filename', - action='store_true', dest='getfilename', - help='simulate, quiet but print output filename', default=False) - verbosity.add_option('--no-progress', - action='store_true', dest='noprogress', help='do not print progress bar', default=False) - verbosity.add_option('--console-title', - action='store_true', dest='consoletitle', - help='display progress in console titlebar', default=False) - parser.add_option_group(verbosity) - - filesystem = optparse.OptionGroup(parser, 'Filesystem Options') - filesystem.add_option('-t', '--title', - action='store_true', dest='usetitle', help='use title in file name', default=False) - filesystem.add_option('-l', '--literal', - action='store_true', dest='useliteral', help='use literal title in file name', default=False) - filesystem.add_option('-A', '--auto-number', - action='store_true', dest='autonumber', - help='number downloaded files starting from 00000', default=False) - filesystem.add_option('-o', '--output', - dest='outtmpl', metavar='TEMPLATE', help='output filename template') - filesystem.add_option('-a', '--batch-file', - dest='batchfile', metavar='FILE', help='file containing URLs to download (\'-\' for stdin)') - filesystem.add_option('-w', '--no-overwrites', - action='store_true', dest='nooverwrites', help='do not overwrite files', default=False) - filesystem.add_option('-c', '--continue', - action='store_true', dest='continue_dl', help='resume partially downloaded files', default=False) - filesystem.add_option('--cookies', - dest='cookiefile', metavar='FILE', help='file to dump cookie jar to') - filesystem.add_option('--no-part', - action='store_true', dest='nopart', help='do not use .part files', default=False) - filesystem.add_option('--no-mtime', - action='store_false', dest='updatetime', - help='do not use the Last-modified header to set the file modification time', default=True) - parser.add_option_group(filesystem) - - postproc = optparse.OptionGroup(parser, 'Post-processing Options') - postproc.add_option('--extract-audio', action='store_true', dest='extractaudio', default=False, - help='convert video files to audio-only files (requires ffmpeg and ffprobe)') - postproc.add_option('--audio-format', metavar='FORMAT', dest='audioformat', default='best', - help='"best", "aac" or "mp3"; best by default') - parser.add_option_group(postproc) - - (opts, args) = parser.parse_args() + parser, opts, args = parseOpts() # Open appropriate CookieJar if opts.cookiefile is None: From 51c8e53ffe62a126a89a5a5ebbaf360a639c9352 Mon Sep 17 00:00:00 2001 From: Georgi Valkov Date: Tue, 23 Aug 2011 16:42:51 +0300 Subject: [PATCH 375/455] Set help formatter width to terminal width (prevents wrapping) --- youtube-dl | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/youtube-dl b/youtube-dl index d093e2558..ac27afdc3 100755 --- a/youtube-dl +++ b/youtube-dl @@ -2744,7 +2744,14 @@ def parseOpts(): return "".join(opts) - fmt = optparse.IndentedHelpFormatter() + max_width = 80 + max_help_position = 80 + + # No need to wrap help messages if we're on a wide console + columns = os.environ.get('COLUMNS', None) + if columns: max_width = columns + + fmt = optparse.IndentedHelpFormatter(width=max_width, max_help_position=max_help_position) fmt.format_option_strings = _format_option_string kw = { From 5adcaa43854a4b6bfd0d5e01304bebc7a846fd3d Mon Sep 17 00:00:00 2001 From: Georgi Valkov Date: Tue, 23 Aug 2011 16:48:08 +0300 Subject: [PATCH 376/455] Refactor main function --- youtube-dl | 344 +++++++++++++++++++++++++++-------------------------- 1 file changed, 173 insertions(+), 171 deletions(-) diff --git a/youtube-dl b/youtube-dl index ac27afdc3..34a60afff 100755 --- a/youtube-dl +++ b/youtube-dl @@ -2872,180 +2872,182 @@ def parseOpts(): return parser, opts, args +def main(): + parser, opts, args = parseOpts() + + # Open appropriate CookieJar + if opts.cookiefile is None: + jar = cookielib.CookieJar() + else: + try: + jar = cookielib.MozillaCookieJar(opts.cookiefile) + if os.path.isfile(opts.cookiefile) and os.access(opts.cookiefile, os.R_OK): + jar.load() + except (IOError, OSError), err: + sys.exit(u'ERROR: unable to open cookie file') + + # Dump user agent + if opts.dump_user_agent: + print std_headers['User-Agent'] + sys.exit(0) + + # General configuration + cookie_processor = urllib2.HTTPCookieProcessor(jar) + urllib2.install_opener(urllib2.build_opener(urllib2.ProxyHandler(), cookie_processor, YoutubeDLHandler())) + socket.setdefaulttimeout(300) # 5 minutes should be enough (famous last words) + + # Batch file verification + batchurls = [] + if opts.batchfile is not None: + try: + if opts.batchfile == '-': + batchfd = sys.stdin + else: + batchfd = open(opts.batchfile, 'r') + batchurls = batchfd.readlines() + batchurls = [x.strip() for x in batchurls] + batchurls = [x for x in batchurls if len(x) > 0 and not re.search(r'^[#/;]', x)] + except IOError: + sys.exit(u'ERROR: batch file could not be read') + all_urls = batchurls + args + + # Conflicting, missing and erroneous options + if opts.usenetrc and (opts.username is not None or opts.password is not None): + parser.error(u'using .netrc conflicts with giving username/password') + if opts.password is not None and opts.username is None: + parser.error(u'account username missing') + if opts.outtmpl is not None and (opts.useliteral or opts.usetitle or opts.autonumber): + parser.error(u'using output template conflicts with using title, literal title or auto number') + if opts.usetitle and opts.useliteral: + parser.error(u'using title conflicts with using literal title') + if opts.username is not None and opts.password is None: + opts.password = getpass.getpass(u'Type account password and press return:') + if opts.ratelimit is not None: + numeric_limit = FileDownloader.parse_bytes(opts.ratelimit) + if numeric_limit is None: + parser.error(u'invalid rate limit specified') + opts.ratelimit = numeric_limit + if opts.retries is not None: + try: + opts.retries = long(opts.retries) + except (TypeError, ValueError), err: + parser.error(u'invalid retry count specified') + try: + opts.playliststart = long(opts.playliststart) + if opts.playliststart <= 0: + raise ValueError + except (TypeError, ValueError), err: + parser.error(u'invalid playlist start number specified') + try: + opts.playlistend = long(opts.playlistend) + if opts.playlistend != -1 and (opts.playlistend <= 0 or opts.playlistend < opts.playliststart): + raise ValueError + except (TypeError, ValueError), err: + parser.error(u'invalid playlist end number specified') + if opts.extractaudio: + if opts.audioformat not in ['best', 'aac', 'mp3']: + parser.error(u'invalid audio format specified') + + # Information extractors + youtube_ie = YoutubeIE() + metacafe_ie = MetacafeIE(youtube_ie) + dailymotion_ie = DailymotionIE() + youtube_pl_ie = YoutubePlaylistIE(youtube_ie) + youtube_user_ie = YoutubeUserIE(youtube_ie) + youtube_search_ie = YoutubeSearchIE(youtube_ie) + google_ie = GoogleIE() + google_search_ie = GoogleSearchIE(google_ie) + photobucket_ie = PhotobucketIE() + yahoo_ie = YahooIE() + yahoo_search_ie = YahooSearchIE(yahoo_ie) + deposit_files_ie = DepositFilesIE() + facebook_ie = FacebookIE() + generic_ie = GenericIE() + + # File downloader + fd = FileDownloader({ + 'usenetrc': opts.usenetrc, + 'username': opts.username, + 'password': opts.password, + 'quiet': (opts.quiet or opts.geturl or opts.gettitle or opts.getthumbnail or opts.getdescription or opts.getfilename), + 'forceurl': opts.geturl, + 'forcetitle': opts.gettitle, + 'forcethumbnail': opts.getthumbnail, + 'forcedescription': opts.getdescription, + 'forcefilename': opts.getfilename, + 'simulate': (opts.simulate or opts.geturl or opts.gettitle or opts.getthumbnail or opts.getdescription or opts.getfilename), + 'format': opts.format, + 'format_limit': opts.format_limit, + 'outtmpl': ((opts.outtmpl is not None and opts.outtmpl.decode(preferredencoding())) + or (opts.format == '-1' and opts.usetitle and u'%(stitle)s-%(id)s-%(format)s.%(ext)s') + or (opts.format == '-1' and opts.useliteral and u'%(title)s-%(id)s-%(format)s.%(ext)s') + or (opts.format == '-1' and u'%(id)s-%(format)s.%(ext)s') + or (opts.usetitle and opts.autonumber and u'%(autonumber)s-%(stitle)s-%(id)s.%(ext)s') + or (opts.useliteral and opts.autonumber and u'%(autonumber)s-%(title)s-%(id)s.%(ext)s') + or (opts.usetitle and u'%(stitle)s-%(id)s.%(ext)s') + or (opts.useliteral and u'%(title)s-%(id)s.%(ext)s') + or (opts.autonumber and u'%(autonumber)s-%(id)s.%(ext)s') + or u'%(id)s.%(ext)s'), + 'ignoreerrors': opts.ignoreerrors, + 'ratelimit': opts.ratelimit, + 'nooverwrites': opts.nooverwrites, + 'retries': opts.retries, + 'continuedl': opts.continue_dl, + 'noprogress': opts.noprogress, + 'playliststart': opts.playliststart, + 'playlistend': opts.playlistend, + 'logtostderr': opts.outtmpl == '-', + 'consoletitle': opts.consoletitle, + 'nopart': opts.nopart, + 'updatetime': opts.updatetime, + }) + fd.add_info_extractor(youtube_search_ie) + fd.add_info_extractor(youtube_pl_ie) + fd.add_info_extractor(youtube_user_ie) + fd.add_info_extractor(metacafe_ie) + fd.add_info_extractor(dailymotion_ie) + fd.add_info_extractor(youtube_ie) + fd.add_info_extractor(google_ie) + fd.add_info_extractor(google_search_ie) + fd.add_info_extractor(photobucket_ie) + fd.add_info_extractor(yahoo_ie) + fd.add_info_extractor(yahoo_search_ie) + fd.add_info_extractor(deposit_files_ie) + fd.add_info_extractor(facebook_ie) + + # This must come last since it's the + # fallback if none of the others work + fd.add_info_extractor(generic_ie) + + # PostProcessors + if opts.extractaudio: + fd.add_post_processor(FFmpegExtractAudioPP(preferredcodec=opts.audioformat)) + + # Update version + if opts.update_self: + updateSelf(fd, sys.argv[0]) + + # Maybe do nothing + if len(all_urls) < 1: + if not opts.update_self: + parser.error(u'you must provide at least one URL') + else: + sys.exit() + retcode = fd.download(all_urls) + + # Dump cookie jar if requested + if opts.cookiefile is not None: + try: + jar.save() + except (IOError, OSError), err: + sys.exit(u'ERROR: unable to save cookie jar') + + sys.exit(retcode) + if __name__ == '__main__': try: - parser, opts, args = parseOpts() - - # Open appropriate CookieJar - if opts.cookiefile is None: - jar = cookielib.CookieJar() - else: - try: - jar = cookielib.MozillaCookieJar(opts.cookiefile) - if os.path.isfile(opts.cookiefile) and os.access(opts.cookiefile, os.R_OK): - jar.load() - except (IOError, OSError), err: - sys.exit(u'ERROR: unable to open cookie file') - - # Dump user agent - if opts.dump_user_agent: - print std_headers['User-Agent'] - sys.exit(0) - - # General configuration - cookie_processor = urllib2.HTTPCookieProcessor(jar) - urllib2.install_opener(urllib2.build_opener(urllib2.ProxyHandler(), cookie_processor, YoutubeDLHandler())) - socket.setdefaulttimeout(300) # 5 minutes should be enough (famous last words) - - # Batch file verification - batchurls = [] - if opts.batchfile is not None: - try: - if opts.batchfile == '-': - batchfd = sys.stdin - else: - batchfd = open(opts.batchfile, 'r') - batchurls = batchfd.readlines() - batchurls = [x.strip() for x in batchurls] - batchurls = [x for x in batchurls if len(x) > 0 and not re.search(r'^[#/;]', x)] - except IOError: - sys.exit(u'ERROR: batch file could not be read') - all_urls = batchurls + args - - # Conflicting, missing and erroneous options - if opts.usenetrc and (opts.username is not None or opts.password is not None): - parser.error(u'using .netrc conflicts with giving username/password') - if opts.password is not None and opts.username is None: - parser.error(u'account username missing') - if opts.outtmpl is not None and (opts.useliteral or opts.usetitle or opts.autonumber): - parser.error(u'using output template conflicts with using title, literal title or auto number') - if opts.usetitle and opts.useliteral: - parser.error(u'using title conflicts with using literal title') - if opts.username is not None and opts.password is None: - opts.password = getpass.getpass(u'Type account password and press return:') - if opts.ratelimit is not None: - numeric_limit = FileDownloader.parse_bytes(opts.ratelimit) - if numeric_limit is None: - parser.error(u'invalid rate limit specified') - opts.ratelimit = numeric_limit - if opts.retries is not None: - try: - opts.retries = long(opts.retries) - except (TypeError, ValueError), err: - parser.error(u'invalid retry count specified') - try: - opts.playliststart = long(opts.playliststart) - if opts.playliststart <= 0: - raise ValueError - except (TypeError, ValueError), err: - parser.error(u'invalid playlist start number specified') - try: - opts.playlistend = long(opts.playlistend) - if opts.playlistend != -1 and (opts.playlistend <= 0 or opts.playlistend < opts.playliststart): - raise ValueError - except (TypeError, ValueError), err: - parser.error(u'invalid playlist end number specified') - if opts.extractaudio: - if opts.audioformat not in ['best', 'aac', 'mp3']: - parser.error(u'invalid audio format specified') - - # Information extractors - youtube_ie = YoutubeIE() - metacafe_ie = MetacafeIE(youtube_ie) - dailymotion_ie = DailymotionIE() - youtube_pl_ie = YoutubePlaylistIE(youtube_ie) - youtube_user_ie = YoutubeUserIE(youtube_ie) - youtube_search_ie = YoutubeSearchIE(youtube_ie) - google_ie = GoogleIE() - google_search_ie = GoogleSearchIE(google_ie) - photobucket_ie = PhotobucketIE() - yahoo_ie = YahooIE() - yahoo_search_ie = YahooSearchIE(yahoo_ie) - deposit_files_ie = DepositFilesIE() - facebook_ie = FacebookIE() - generic_ie = GenericIE() - - # File downloader - fd = FileDownloader({ - 'usenetrc': opts.usenetrc, - 'username': opts.username, - 'password': opts.password, - 'quiet': (opts.quiet or opts.geturl or opts.gettitle or opts.getthumbnail or opts.getdescription or opts.getfilename), - 'forceurl': opts.geturl, - 'forcetitle': opts.gettitle, - 'forcethumbnail': opts.getthumbnail, - 'forcedescription': opts.getdescription, - 'forcefilename': opts.getfilename, - 'simulate': (opts.simulate or opts.geturl or opts.gettitle or opts.getthumbnail or opts.getdescription or opts.getfilename), - 'format': opts.format, - 'format_limit': opts.format_limit, - 'outtmpl': ((opts.outtmpl is not None and opts.outtmpl.decode(preferredencoding())) - or (opts.format == '-1' and opts.usetitle and u'%(stitle)s-%(id)s-%(format)s.%(ext)s') - or (opts.format == '-1' and opts.useliteral and u'%(title)s-%(id)s-%(format)s.%(ext)s') - or (opts.format == '-1' and u'%(id)s-%(format)s.%(ext)s') - or (opts.usetitle and opts.autonumber and u'%(autonumber)s-%(stitle)s-%(id)s.%(ext)s') - or (opts.useliteral and opts.autonumber and u'%(autonumber)s-%(title)s-%(id)s.%(ext)s') - or (opts.usetitle and u'%(stitle)s-%(id)s.%(ext)s') - or (opts.useliteral and u'%(title)s-%(id)s.%(ext)s') - or (opts.autonumber and u'%(autonumber)s-%(id)s.%(ext)s') - or u'%(id)s.%(ext)s'), - 'ignoreerrors': opts.ignoreerrors, - 'ratelimit': opts.ratelimit, - 'nooverwrites': opts.nooverwrites, - 'retries': opts.retries, - 'continuedl': opts.continue_dl, - 'noprogress': opts.noprogress, - 'playliststart': opts.playliststart, - 'playlistend': opts.playlistend, - 'logtostderr': opts.outtmpl == '-', - 'consoletitle': opts.consoletitle, - 'nopart': opts.nopart, - 'updatetime': opts.updatetime, - }) - fd.add_info_extractor(youtube_search_ie) - fd.add_info_extractor(youtube_pl_ie) - fd.add_info_extractor(youtube_user_ie) - fd.add_info_extractor(metacafe_ie) - fd.add_info_extractor(dailymotion_ie) - fd.add_info_extractor(youtube_ie) - fd.add_info_extractor(google_ie) - fd.add_info_extractor(google_search_ie) - fd.add_info_extractor(photobucket_ie) - fd.add_info_extractor(yahoo_ie) - fd.add_info_extractor(yahoo_search_ie) - fd.add_info_extractor(deposit_files_ie) - fd.add_info_extractor(facebook_ie) - - # This must come last since it's the - # fallback if none of the others work - fd.add_info_extractor(generic_ie) - - # PostProcessors - if opts.extractaudio: - fd.add_post_processor(FFmpegExtractAudioPP(preferredcodec=opts.audioformat)) - - # Update version - if opts.update_self: - updateSelf(fd, sys.argv[0]) - - # Maybe do nothing - if len(all_urls) < 1: - if not opts.update_self: - parser.error(u'you must provide at least one URL') - else: - sys.exit() - retcode = fd.download(all_urls) - - # Dump cookie jar if requested - if opts.cookiefile is not None: - try: - jar.save() - except (IOError, OSError), err: - sys.exit(u'ERROR: unable to save cookie jar') - - sys.exit(retcode) - + main() except DownloadError: sys.exit(1) except SameFileError: From 6a4f0a114d88965c171d0117db68be64b4db9acd Mon Sep 17 00:00:00 2001 From: Georgi Valkov Date: Tue, 23 Aug 2011 17:03:28 +0300 Subject: [PATCH 377/455] Use `stty size` to find terminal width if we're on linux and COLUMNS is not exported --- youtube-dl | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/youtube-dl b/youtube-dl index 34a60afff..251254765 100755 --- a/youtube-dl +++ b/youtube-dl @@ -2744,11 +2744,21 @@ def parseOpts(): return "".join(opts) + def _find_term_columns(): + columns = os.environ.get('COLUMNS', None) + if columns: return int(columns) + + if sys.platform.startswith('linux'): + try: columns = os.popen('stty size', 'r').read().split()[1] + except: pass + + if columns: return int(columns) + max_width = 80 max_help_position = 80 # No need to wrap help messages if we're on a wide console - columns = os.environ.get('COLUMNS', None) + columns = _find_term_columns() if columns: max_width = columns fmt = optparse.IndentedHelpFormatter(width=max_width, max_help_position=max_help_position) From 4f2a5e06da89913f789463fef919e61f9d8f5be6 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Wed, 24 Aug 2011 23:28:30 +0200 Subject: [PATCH 378/455] Use subprocess to call stty size when COLUMNS is not set --- youtube-dl | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/youtube-dl b/youtube-dl index 1ae68b2b5..32bd99e98 100755 --- a/youtube-dl +++ b/youtube-dl @@ -2996,11 +2996,12 @@ def parseOpts(): if columns: return int(columns) - # TODO: Breaks on phihag's system - #if sys.platform.startswith('linux'): - # try: - # return os.popen('stty size', 'r').read().split()[1] - # except: pass + try: + sp = subprocess.Popen(['stty', 'size'], stdout=subprocess.PIPE, stderr=subprocess.PIPE) + out,err = sp.communicate() + return out.split()[1] + except: + pass return None max_width = 80 From eb0387a848d5d349895932dec6d2676adc0ab9c9 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Thu, 25 Aug 2011 00:08:59 +0200 Subject: [PATCH 379/455] Fix stty detection --- youtube-dl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube-dl b/youtube-dl index 32bd99e98..3d43355c7 100755 --- a/youtube-dl +++ b/youtube-dl @@ -2999,7 +2999,7 @@ def parseOpts(): try: sp = subprocess.Popen(['stty', 'size'], stdout=subprocess.PIPE, stderr=subprocess.PIPE) out,err = sp.communicate() - return out.split()[1] + return int(out.split()[1]) except: pass return None From 4618f3da74c61c79be3187a2818ea1ce6b28bb18 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Thu, 25 Aug 2011 00:09:28 +0200 Subject: [PATCH 380/455] Makefile to recreate README --- Makefile | 15 +++++++++ README.md | 99 +++++++++++++++++++++++++------------------------------ 2 files changed, 59 insertions(+), 55 deletions(-) create mode 100644 Makefile diff --git a/Makefile b/Makefile new file mode 100644 index 000000000..3ed7108f8 --- /dev/null +++ b/Makefile @@ -0,0 +1,15 @@ + + +update-readme: + @options=$$(COLUMNS=80 ./youtube-dl --help | sed -e '1,/.*General Options.*/ d' -e 's/^\W\{2\}\(\w\)/### \1/') && \ + header=$$(sed -e '/.*## OPTIONS/,$$ d' README.md) && \ + footer=$$(sed -e '1,/.*## COPYRIGHT/ d' README.md) && \ + echo "$${header}" > README.md && \ + echo -e '\n## OPTIONS' >> README.md && \ + echo "$${options}" >> README.md&& \ + echo -e '\n## COPYRIGHT' >> README.md && \ + echo "$${footer}" >> README.md + + + +.PHONY: update-readme diff --git a/README.md b/README.md index cf38e31fc..d0c255201 100644 --- a/README.md +++ b/README.md @@ -11,66 +11,56 @@ your Unix box, in Windows or in Mac OS X. It is released to the public domain, which means you can modify it, redistribute it or use it however you like. ## OPTIONS - -h, --help print this help text and exit - -v, --version print program version and exit - -U, --update update this program to latest stable version - -i, --ignore-errors continue on download errors - -r LIMIT, --rate-limit=LIMIT - download rate limit (e.g. 50k or 44.6m) - -R RETRIES, --retries=RETRIES - number of retries (default is 10) - --playlist-start=NUMBER - playlist video to start at (default is 1) - --playlist-end=NUMBER - playlist video to end at (default is last) - --dump-user-agent display the current browser identification + -h, --help print this help text and exit + -v, --version print program version and exit + -U, --update update this program to latest stable version + -i, --ignore-errors continue on download errors + -r, --rate-limit LIMIT download rate limit (e.g. 50k or 44.6m) + -R, --retries RETRIES number of retries (default is 10) + --playlist-start NUMBER playlist video to start at (default is 1) + --playlist-end NUMBER playlist video to end at (default is last) + --dump-user-agent display the current browser identification -### Authentication Options - -u USERNAME, --username=USERNAME - account username - -p PASSWORD, --password=PASSWORD - account password - -n, --netrc use .netrc authentication data +### Filesystem Options: + -t, --title use title in file name + -l, --literal use literal title in file name + -A, --auto-number number downloaded files starting from 00000 + -o, --output TEMPLATE output filename template + -a, --batch-file FILE file containing URLs to download ('-' for stdin) + -w, --no-overwrites do not overwrite files + -c, --continue resume partially downloaded files + --cookies FILE file to dump cookie jar to + --no-part do not use .part files + --no-mtime do not use the Last-modified header to set the file + modification time + --write-description write video description to a .description file + --write-info-json write video metadata to a .info.json file -### Video Format Options - -f FORMAT, --format=FORMAT - video format code - --all-formats download all available video formats - --max-quality=FORMAT - highest quality format to download +### Verbosity / Simulation Options: + -q, --quiet activates quiet mode + -s, --simulate do not download video + -g, --get-url simulate, quiet but print URL + -e, --get-title simulate, quiet but print title + --get-thumbnail simulate, quiet but print thumbnail URL + --get-description simulate, quiet but print video description + --get-filename simulate, quiet but print output filename + --no-progress do not print progress bar + --console-title display progress in console titlebar -### Verbosity / Simulation Options - -q, --quiet activates quiet mode - -s, --simulate do not download video - -g, --get-url simulate, quiet but print URL - -e, --get-title simulate, quiet but print title - --get-thumbnail simulate, quiet but print thumbnail URL - --get-description simulate, quiet but print video description - --get-filename simulate, quiet but print output filename - --no-progress do not print progress bar - --console-title display progress in console titlebar +### Video Format Options: + -f, --format FORMAT video format code + --all-formats download all available video formats + --max-quality FORMAT highest quality format to download -### Filesystem Options - -t, --title use title in file name - -l, --literal use literal title in file name - -A, --auto-number number downloaded files starting from 00000 - -o TEMPLATE, --output=TEMPLATE - output filename template - -a FILE, --batch-file=FILE - file containing URLs to download ('-' for stdin) - -w, --no-overwrites - do not overwrite files - -c, --continue resume partially downloaded files - --cookies=FILE file to dump cookie jar to - --no-part do not use .part files - --no-mtime do not use the Last-modified header to set the file - modification time +### Authentication Options: + -u, --username USERNAME account username + -p, --password PASSWORD account password + -n, --netrc use .netrc authentication data ### Post-processing Options: - --extract-audio convert video files to audio-only files (requires - ffmpeg and ffprobe) - --audio-format=FORMAT - "best", "aac" or "mp3"; best by default + --extract-audio convert video files to audio-only files (requires + ffmpeg and ffprobe) + --audio-format FORMAT "best", "aac" or "mp3"; best by default ## COPYRIGHT **youtube-dl**: Copyright © 2006-2011 Ricardo Garcia Gonzalez. The program is @@ -80,4 +70,3 @@ likewise released into the public domain. ## BUGS Bugs should be reported at: - From 5f9f2b739678dc8a02879714c8987dd887040676 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Sun, 28 Aug 2011 22:10:03 +0200 Subject: [PATCH 381/455] Update: Write downloaded file without modification (allows hashsums) --- youtube-dl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube-dl b/youtube-dl index 3d43355c7..11d8ca01c 100755 --- a/youtube-dl +++ b/youtube-dl @@ -2965,7 +2965,7 @@ def updateSelf(downloader, filename): sys.exit('ERROR: unable to download latest version') try: - stream = open(filename, 'w') + stream = open(filename, 'wb') stream.write(newcontent) stream.close() except (IOError, OSError), err: From 6ae796b1eebcb77b1f0c952682f86956a48fe462 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Sun, 28 Aug 2011 23:17:18 +0200 Subject: [PATCH 382/455] =?UTF-8?q?Credit=20Rog=C3=A9rio=20Brito=20for=20V?= =?UTF-8?q?imeo=20support?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- youtube-dl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube-dl b/youtube-dl index 218782e0d..ecc4c26bb 100755 --- a/youtube-dl +++ b/youtube-dl @@ -9,10 +9,11 @@ __author__ = ( 'Witold Baryluk', 'Paweł Paprota', 'Gergely Imreh', + 'Rogério Brito', ) __license__ = 'Public Domain' -__version__ = '2011.08.24-phihag' +__version__ = '2011.08.28-phihag' import cookielib import datetime From 5fd5ce083843baaf9ed0ecd902c7d5913110bc2b Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Sun, 28 Aug 2011 23:17:32 +0200 Subject: [PATCH 383/455] Add default make target --- Makefile | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 3ed7108f8..21558bb81 100644 --- a/Makefile +++ b/Makefile @@ -1,3 +1,6 @@ +default: update + +update: update-readme update-readme: @@ -12,4 +15,4 @@ update-readme: -.PHONY: update-readme +.PHONY: default update update-readme From d207e7cf88947df1ded016959f921f0e3e172159 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Sun, 28 Aug 2011 23:38:40 +0200 Subject: [PATCH 384/455] Update update mechanism (Closes #4) --- LATEST_VERSION | 1 - youtube-dl | 23 ++++++++++++++--------- 2 files changed, 14 insertions(+), 10 deletions(-) delete mode 100644 LATEST_VERSION diff --git a/LATEST_VERSION b/LATEST_VERSION deleted file mode 100644 index 8a3bb049a..000000000 --- a/LATEST_VERSION +++ /dev/null @@ -1 +0,0 @@ -2011.08.04 diff --git a/youtube-dl b/youtube-dl index 5ffd53bd3..534c7ac87 100755 --- a/youtube-dl +++ b/youtube-dl @@ -15,6 +15,8 @@ __author__ = ( __license__ = 'Public Domain' __version__ = '2011.08.28-phihag' +UPDATE_URL = 'https://raw.github.com/phihag/youtube-dl/master/youtube-dl' + import cookielib import datetime import gzip @@ -3071,24 +3073,27 @@ def updateSelf(downloader, filename): if not os.access(filename, os.W_OK): sys.exit('ERROR: no write permissions on %s' % filename) - downloader.to_screen('Updating to latest stable version...') + downloader.to_screen('Updating to latest version...') try: - latest_url = 'http://github.com/rg3/youtube-dl/raw/master/LATEST_VERSION' - latest_version = urllib.urlopen(latest_url).read().strip() - prog_url = 'http://github.com/rg3/youtube-dl/raw/%s/youtube-dl' % latest_version - newcontent = urllib.urlopen(prog_url).read() + try: + urlh = urllib.urlopen(UPDATE_URL) + newcontent = urlh.read() + finally: + urlh.close() except (IOError, OSError), err: sys.exit('ERROR: unable to download latest version') try: - stream = open(filename, 'wb') - stream.write(newcontent) - stream.close() + outf = open(filename, 'wb') + try: + outf.write(newcontent) + finally: + outf.close() except (IOError, OSError), err: sys.exit('ERROR: unable to overwrite current version') - downloader.to_screen('Updated to version %s' % latest_version) + downloader.to_screen('Updated youtube-dl. Restart to use the new version.') def parseOpts(): # Deferred imports From eb11aaccbb7350618ba784fce1f5511db1fa5c81 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Sun, 28 Aug 2011 23:44:23 +0200 Subject: [PATCH 385/455] Update bug reporting to this fork, so that vimeo/blip.tv issues are reported at phihag/issues instead of rg3/issues (Closes #5) --- README.md | 2 +- youtube-dl | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index d0c255201..8ad8e5ca1 100644 --- a/README.md +++ b/README.md @@ -69,4 +69,4 @@ originally written by Daniel Bolton () and is likewise released into the public domain. ## BUGS -Bugs should be reported at: +Bugs should be reported at: diff --git a/youtube-dl b/youtube-dl index 534c7ac87..ec9802ba1 100755 --- a/youtube-dl +++ b/youtube-dl @@ -10,6 +10,7 @@ __author__ = ( 'Paweł Paprota', 'Gergely Imreh', 'Rogério Brito', + 'Philipp Hagemeister', ) __license__ = 'Public Domain' From e0e56865a0caf52dcc3c8fa7897c94bc292ab91a Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Wed, 31 Aug 2011 21:28:40 +0200 Subject: [PATCH 386/455] Remove stable from help wording (There will be only one main branch for now) --- youtube-dl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube-dl b/youtube-dl index ec9802ba1..c919c4016 100755 --- a/youtube-dl +++ b/youtube-dl @@ -3159,7 +3159,7 @@ def parseOpts(): general.add_option('-v', '--version', action='version', help='print program version and exit') general.add_option('-U', '--update', - action='store_true', dest='update_self', help='update this program to latest stable version') + action='store_true', dest='update_self', help='update this program to latest version') general.add_option('-i', '--ignore-errors', action='store_true', dest='ignoreerrors', help='continue on download errors', default=False) general.add_option('-r', '--rate-limit', From dbddab27992fa609102d512a7762ae8f5b39c55c Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Sat, 3 Sep 2011 11:32:05 +0200 Subject: [PATCH 387/455] Robust error handling in downloading code --- youtube-dl | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/youtube-dl b/youtube-dl index 68c9bc429..3f45e92f2 100755 --- a/youtube-dl +++ b/youtube-dl @@ -930,6 +930,7 @@ class FileDownloader(object): if stream is None: try: (stream, tmpfilename) = sanitize_open(tmpfilename, open_mode) + assert stream is not None filename = self.undo_temp_name(tmpfilename) self.report_destination(filename) except (OSError, IOError), err: @@ -951,6 +952,9 @@ class FileDownloader(object): # Apply rate limit self.slow_down(start, byte_counter - resume_len) + if stream is None: + self.trouble(u'\nERROR: Did not get any data blocks') + return False stream.close() self.report_finish() if data_len is not None and byte_counter != data_len: From 447b1d7170219c391bfefa9efb966adb4e4e4d6b Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Sun, 4 Sep 2011 11:41:54 +0200 Subject: [PATCH 388/455] Added FAQ to README --- README.md | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/README.md b/README.md index 8ad8e5ca1..db75487e3 100644 --- a/README.md +++ b/README.md @@ -62,6 +62,24 @@ which means you can modify it, redistribute it or use it however you like. ffmpeg and ffprobe) --audio-format FORMAT "best", "aac" or "mp3"; best by default +## FAQ + +### Can you please put the -b option back? + +Most people asking this question are not aware that youtube-dl now defaults to downloading the highest available quality as reported by YouTube, which will be 1080p or 720p in some cases, so you no longer need the -b option. For some specific videos, maybe YouTube does not report them to be available in a specific high quality format you''re interested in. In that case, simply request it with the -f option and youtube-dl will try to download it. + +### I get HTTP error 402 when trying to download a video. What''s this? + +Apparently YouTube requires you to pass a CAPTCHA test if you download too much. We''re [considering to provide a way to let you solve the CAPTCHA](https://github.com/phihag/youtube-dl/issues/8), but at the moment, your best course of action is pointing a webbrowser to the youtube URL, solving the CAPTCHA, and restart youtube-dl. + +### I have downloaded a video but how can I play it? + +Once the video is fully downloaded, use any video player, such as [vlc](http://www.videolan.org) or [mplayer](http://www.mplayerhq.hu/). + +### The links provided by youtube-dl -g are not working anymore + +The URLs youtube-dl outputs require the downloader to have the correct cookies. Use the `--cookies` option to write the required cookies into a file, and advise your downloader to read cookies from that file. + ## COPYRIGHT **youtube-dl**: Copyright © 2006-2011 Ricardo Garcia Gonzalez. The program is released into the public domain by the copyright holder. This README file was From 8f88eb1fa70598c9615b7e58ae3f2497627a1925 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Sun, 4 Sep 2011 11:47:58 +0200 Subject: [PATCH 389/455] Update Makefile to new README format --- Makefile | 4 ++-- README.md | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Makefile b/Makefile index 21558bb81..0039f90c9 100644 --- a/Makefile +++ b/Makefile @@ -6,11 +6,11 @@ update: update-readme update-readme: @options=$$(COLUMNS=80 ./youtube-dl --help | sed -e '1,/.*General Options.*/ d' -e 's/^\W\{2\}\(\w\)/### \1/') && \ header=$$(sed -e '/.*## OPTIONS/,$$ d' README.md) && \ - footer=$$(sed -e '1,/.*## COPYRIGHT/ d' README.md) && \ + footer=$$(sed -e '1,/.*## FAQ/ d' README.md) && \ echo "$${header}" > README.md && \ echo -e '\n## OPTIONS' >> README.md && \ echo "$${options}" >> README.md&& \ - echo -e '\n## COPYRIGHT' >> README.md && \ + echo -e '\n## FAQ' >> README.md && \ echo "$${footer}" >> README.md diff --git a/README.md b/README.md index db75487e3..66639ad04 100644 --- a/README.md +++ b/README.md @@ -13,7 +13,7 @@ which means you can modify it, redistribute it or use it however you like. ## OPTIONS -h, --help print this help text and exit -v, --version print program version and exit - -U, --update update this program to latest stable version + -U, --update update this program to latest version -i, --ignore-errors continue on download errors -r, --rate-limit LIMIT download rate limit (e.g. 50k or 44.6m) -R, --retries RETRIES number of retries (default is 10) From c0a10ca8dcbcf345913502f125cf011889118917 Mon Sep 17 00:00:00 2001 From: FND Date: Mon, 5 Sep 2011 09:46:36 +0200 Subject: [PATCH 390/455] fixed PEP8 whitespace issues mostly vertical whitespace and mixed spaces and tabs --- youtube-dl | 88 ++++++++++++++++++++++++++++++++++-------------------- 1 file changed, 55 insertions(+), 33 deletions(-) diff --git a/youtube-dl b/youtube-dl index e8b19c8d0..36ca6baf9 100755 --- a/youtube-dl +++ b/youtube-dl @@ -47,6 +47,7 @@ std_headers = { simple_title_chars = string.ascii_letters.decode('ascii') + string.digits.decode('ascii') + def preferredencoding(): """Get preferred encoding. @@ -63,6 +64,7 @@ def preferredencoding(): yield pref return yield_preferredencoding().next() + def htmlentity_transform(matchobj): """Transforms an HTML entity to a Unicode character. @@ -89,11 +91,13 @@ def htmlentity_transform(matchobj): # Unknown entity in name, return its literal representation return (u'&%s;' % entity) + def sanitize_title(utitle): """Sanitizes a video title so it could be used as part of a filename.""" utitle = re.sub(ur'(?u)&(.+?);', htmlentity_transform, utitle) return utitle.replace(unicode(os.sep), u'%') + def sanitize_open(filename, open_mode): """Try to open the given filename, and slightly tweak it if this fails. @@ -120,13 +124,15 @@ def sanitize_open(filename, open_mode): stream = open(filename, open_mode) return (stream, filename) + def timeconvert(timestr): - """Convert RFC 2822 defined time string into system timestamp""" - timestamp = None - timetuple = email.utils.parsedate_tz(timestr) - if timetuple is not None: - timestamp = email.utils.mktime_tz(timetuple) - return timestamp + """Convert RFC 2822 defined time string into system timestamp""" + timestamp = None + timetuple = email.utils.parsedate_tz(timestr) + if timetuple is not None: + timestamp = email.utils.mktime_tz(timetuple) + return timestamp + class DownloadError(Exception): """Download Error exception. @@ -137,6 +143,7 @@ class DownloadError(Exception): """ pass + class SameFileError(Exception): """Same File exception. @@ -145,6 +152,7 @@ class SameFileError(Exception): """ pass + class PostProcessingError(Exception): """Post Processing exception. @@ -153,6 +161,7 @@ class PostProcessingError(Exception): """ pass + class UnavailableVideoError(Exception): """Unavailable Format exception. @@ -161,6 +170,7 @@ class UnavailableVideoError(Exception): """ pass + class ContentTooShortError(Exception): """Content Too Short exception. @@ -176,6 +186,7 @@ class ContentTooShortError(Exception): self.downloaded = downloaded self.expected = expected + class YoutubeDLHandler(urllib2.HTTPHandler): """Handler for HTTP requests and responses. @@ -185,11 +196,11 @@ class YoutubeDLHandler(urllib2.HTTPHandler): a particular request, the original request in the program code only has to include the HTTP header "Youtubedl-No-Compression", which will be removed before making the real request. - + Part of this code was copied from: - http://techknack.net/python-urllib2-handlers/ - + http://techknack.net/python-urllib2-handlers/ + Andrew Rowls, the author of that code, agreed to release it to the public domain. """ @@ -200,7 +211,7 @@ class YoutubeDLHandler(urllib2.HTTPHandler): return zlib.decompress(data, -zlib.MAX_WBITS) except zlib.error: return zlib.decompress(data) - + @staticmethod def addinfourl_wrapper(stream, headers, url, code): if hasattr(urllib2.addinfourl, 'getcode'): @@ -208,7 +219,7 @@ class YoutubeDLHandler(urllib2.HTTPHandler): ret = urllib2.addinfourl(stream, headers, url) ret.code = code return ret - + def http_request(self, req): for h in std_headers: if h in req.headers: @@ -234,6 +245,7 @@ class YoutubeDLHandler(urllib2.HTTPHandler): resp.msg = old_resp.msg return resp + class FileDownloader(object): """File Downloader class. @@ -325,7 +337,7 @@ class FileDownloader(object): else: exponent = long(math.log(bytes, 1024.0)) suffix = 'bkMGTPEZY'[exponent] - converted = float(bytes) / float(1024**exponent) + converted = float(bytes) / float(1024 ** exponent) return '%.2f%s' % (converted, suffix) @staticmethod @@ -463,7 +475,7 @@ class FileDownloader(object): os.rename(old_filename, new_filename) except (IOError, OSError), err: self.trouble(u'ERROR: unable to rename file') - + def try_utime(self, filename, last_modified_hdr): """Try to set the last-modified time of the given file.""" if last_modified_hdr is None: @@ -477,7 +489,7 @@ class FileDownloader(object): if filetime is None: return try: - os.utime(filename,(time.time(), filetime)) + os.utime(filename, (time.time(), filetime)) except: pass @@ -680,7 +692,7 @@ class FileDownloader(object): # Request parameters in case of being able to resume if self.params.get('continuedl', False) and resume_len != 0: self.report_resuming_byte(resume_len) - request.add_header('Range','bytes=%d-' % resume_len) + request.add_header('Range', 'bytes=%d-' % resume_len) open_mode = 'ab' count = 0 @@ -706,7 +718,7 @@ class FileDownloader(object): else: # Examine the reported length if (content_length is not None and - (resume_len - 100 < long(content_length) < resume_len + 100)): + (resume_len - 100 < long(content_length) < resume_len + 100)): # The file had already been fully downloaded. # Explanation to the above condition: in issue #175 it was revealed that # YouTube sometimes adds or removes a few bytes from the end of the file, @@ -784,6 +796,7 @@ class FileDownloader(object): return True + class InfoExtractor(object): """Information Extractor class. @@ -855,6 +868,7 @@ class InfoExtractor(object): """Real extraction process. Redefine in subclasses.""" pass + class YoutubeIE(InfoExtractor): """Information extractor for youtube.com.""" @@ -1009,7 +1023,7 @@ class YoutubeIE(InfoExtractor): self.report_video_info_webpage_download(video_id) for el_type in ['&el=embedded', '&el=detailpage', '&el=vevo', '']: video_info_url = ('http://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en' - % (video_id, el_type)) + % (video_id, el_type)) request = urllib2.Request(video_info_url) try: video_info_webpage = urllib2.urlopen(request).read() @@ -1371,6 +1385,7 @@ class DailymotionIE(InfoExtractor): except UnavailableVideoError: self._downloader.trouble(u'\nERROR: unable to download video') + class GoogleIE(InfoExtractor): """Information extractor for video.google.com.""" @@ -1464,7 +1479,6 @@ class GoogleIE(InfoExtractor): else: # we need something to pass to process_info video_thumbnail = '' - try: # Process video information self._downloader.process_info({ @@ -1664,7 +1678,8 @@ class YahooIE(InfoExtractor): self._downloader.trouble(u'ERROR: unable to extract video description') return video_description = mobj.group(1).decode('utf-8') - if not video_description: video_description = 'No description available.' + if not video_description: + video_description = 'No description available.' # Extract video height and width mobj = re.search(r'', webpage) @@ -1685,8 +1700,8 @@ class YahooIE(InfoExtractor): yv_lg = 'R0xx6idZnW2zlrKP8xxAIR' # not sure what this represents yv_bitrate = '700' # according to Wikipedia this is hard-coded request = urllib2.Request('http://cosmos.bcst.yahoo.com/up/yep/process/getPlaylistFOP.php?node_id=' + video_id + - '&tech=flash&mode=playlist&lg=' + yv_lg + '&bitrate=' + yv_bitrate + '&vidH=' + yv_video_height + - '&vidW=' + yv_video_width + '&swf=as3&rd=video.yahoo.com&tk=null&adsupported=v1,v2,&eventid=1301797') + '&tech=flash&mode=playlist&lg=' + yv_lg + '&bitrate=' + yv_bitrate + '&vidH=' + yv_video_height + + '&vidW=' + yv_video_width + '&swf=as3&rd=video.yahoo.com&tk=null&adsupported=v1,v2,&eventid=1301797') try: self.report_download_webpage(video_id) webpage = urllib2.urlopen(request).read() @@ -1779,11 +1794,11 @@ class GenericIE(InfoExtractor): return video_url = urllib.unquote(mobj.group(1)) - video_id = os.path.basename(video_url) + video_id = os.path.basename(video_url) # here's a fun little line of code for you: video_extension = os.path.splitext(video_id)[1][1:] - video_id = os.path.splitext(video_id)[0] + video_id = os.path.splitext(video_id)[0] # it's tempting to parse this further, but you would # have to take into account all the variations like @@ -1856,7 +1871,7 @@ class YoutubeSearchIE(InfoExtractor): prefix, query = query.split(':') prefix = prefix[8:] - query = query.encode('utf-8') + query = query.encode('utf-8') if prefix == '': self._download_n_results(query, 1) return @@ -1870,7 +1885,7 @@ class YoutubeSearchIE(InfoExtractor): self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query)) return elif n > self._max_youtube_results: - self._downloader.to_stderr(u'WARNING: ytsearch returns max %i results (you requested %i)' % (self._max_youtube_results, n)) + self._downloader.to_stderr(u'WARNING: ytsearch returns max %i results (you requested %i)' % (self._max_youtube_results, n)) n = self._max_youtube_results self._download_n_results(query, n) return @@ -1914,6 +1929,7 @@ class YoutubeSearchIE(InfoExtractor): pagenum = pagenum + 1 + class GoogleSearchIE(InfoExtractor): """Information Extractor for Google Video search queries.""" _VALID_QUERY = r'gvsearch(\d+|all)?:[\s\S]+' @@ -1947,7 +1963,7 @@ class GoogleSearchIE(InfoExtractor): prefix, query = query.split(':') prefix = prefix[8:] - query = query.encode('utf-8') + query = query.encode('utf-8') if prefix == '': self._download_n_results(query, 1) return @@ -1961,7 +1977,7 @@ class GoogleSearchIE(InfoExtractor): self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query)) return elif n > self._max_google_results: - self._downloader.to_stderr(u'WARNING: gvsearch returns max %i results (you requested %i)' % (self._max_google_results, n)) + self._downloader.to_stderr(u'WARNING: gvsearch returns max %i results (you requested %i)' % (self._max_google_results, n)) n = self._max_google_results self._download_n_results(query, n) return @@ -2005,6 +2021,7 @@ class GoogleSearchIE(InfoExtractor): pagenum = pagenum + 1 + class YahooSearchIE(InfoExtractor): """Information Extractor for Yahoo! Video search queries.""" _VALID_QUERY = r'yvsearch(\d+|all)?:[\s\S]+' @@ -2038,7 +2055,7 @@ class YahooSearchIE(InfoExtractor): prefix, query = query.split(':') prefix = prefix[8:] - query = query.encode('utf-8') + query = query.encode('utf-8') if prefix == '': self._download_n_results(query, 1) return @@ -2052,7 +2069,7 @@ class YahooSearchIE(InfoExtractor): self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query)) return elif n > self._max_yahoo_results: - self._downloader.to_stderr(u'WARNING: yvsearch returns max %i results (you requested %i)' % (self._max_yahoo_results, n)) + self._downloader.to_stderr(u'WARNING: yvsearch returns max %i results (you requested %i)' % (self._max_yahoo_results, n)) n = self._max_yahoo_results self._download_n_results(query, n) return @@ -2096,6 +2113,7 @@ class YahooSearchIE(InfoExtractor): pagenum = pagenum + 1 + class YoutubePlaylistIE(InfoExtractor): """Information Extractor for YouTube playlists.""" @@ -2172,6 +2190,7 @@ class YoutubePlaylistIE(InfoExtractor): self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id) return + class YoutubeUserIE(InfoExtractor): """Information Extractor for YouTube users.""" @@ -2193,7 +2212,7 @@ class YoutubeUserIE(InfoExtractor): def report_download_page(self, username, start_index): """Report attempt to download user page.""" self._downloader.to_screen(u'[youtube] user %s: Downloading video ids from %d to %d' % - (username, start_index, start_index + self._GDATA_PAGE_SIZE)) + (username, start_index, start_index + self._GDATA_PAGE_SIZE)) def _real_initialize(self): self._youtube_ie.initialize() @@ -2255,9 +2274,9 @@ class YoutubeUserIE(InfoExtractor): video_ids = video_ids[playliststart:] else: video_ids = video_ids[playliststart:playlistend] - + self._downloader.to_screen("[youtube] user %s: Collected %d video ids (downloading %d of them)" % - (username, all_ids_count, len(video_ids))) + (username, all_ids_count, len(video_ids))) for video_id in video_ids: self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % video_id) @@ -2342,6 +2361,7 @@ class DepositFilesIE(InfoExtractor): except UnavailableVideoError, err: self._downloader.trouble(u'ERROR: unable to download file') + class FacebookIE(InfoExtractor): """Information Extractor for Facebook""" @@ -2510,7 +2530,7 @@ class FacebookIE(InfoExtractor): # description video_description = 'No description available.' if (self._downloader.params.get('forcedescription', False) and - 'description' in video_info): + 'description' in video_info): video_description = video_info['description'] url_map = video_info['video_urls'] @@ -2565,6 +2585,7 @@ class FacebookIE(InfoExtractor): except UnavailableVideoError, err: self._downloader.trouble(u'\nERROR: unable to download video') + class PostProcessor(object): """Post Processor class. @@ -2611,6 +2632,7 @@ class PostProcessor(object): """ return information # by default, do nothing + class FFmpegExtractAudioPP(PostProcessor): def __init__(self, downloader=None, preferredcodec=None): From 50891fece71b05bce92f694451656988b53a7038 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Tue, 6 Sep 2011 17:32:22 +0200 Subject: [PATCH 391/455] Use os.makedirs instead of homebrewn pmkdir --- youtube-dl | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/youtube-dl b/youtube-dl index 7ac27b5a0..0b06c69cc 100755 --- a/youtube-dl +++ b/youtube-dl @@ -455,16 +455,6 @@ class FileDownloader(object): self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)] self.params = params - @staticmethod - def pmkdir(filename): - """Create directory components in filename. Similar to Unix "mkdir -p".""" - components = filename.split(os.sep) - aggregate = [os.sep.join(components[0:x]) for x in xrange(1, len(components))] - aggregate = ['%s%s' % (x, os.sep) for x in aggregate] # Finish names with separator - for dir in aggregate: - if not os.path.exists(dir): - os.mkdir(dir) - @staticmethod def format_bytes(bytes): if bytes is None: @@ -721,7 +711,7 @@ class FileDownloader(object): return try: - self.pmkdir(filename) + os.makedirs(os.path.dirname(filename)) except (OSError, IOError), err: self.trouble(u'ERROR: unable to create directories: %s' % str(err)) return From eb99a7ee5f7bd36fa9dfcbaf0590ecc2854e3e30 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Tue, 6 Sep 2011 17:42:45 +0200 Subject: [PATCH 392/455] Bump version to 2011.09.06 --- youtube-dl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube-dl b/youtube-dl index 0b06c69cc..cc6462cc0 100755 --- a/youtube-dl +++ b/youtube-dl @@ -14,7 +14,7 @@ __author__ = ( ) __license__ = 'Public Domain' -__version__ = '2011.08.28-phihag' +__version__ = '2011.09.06-phihag' UPDATE_URL = 'https://raw.github.com/phihag/youtube-dl/master/youtube-dl' From e5e74ffb97106949c64000e3d4266d0bbf08cc7c Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Tue, 6 Sep 2011 17:56:05 +0200 Subject: [PATCH 393/455] Fix os.makedirs in Windows --- youtube-dl | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/youtube-dl b/youtube-dl index cc6462cc0..153d4132f 100755 --- a/youtube-dl +++ b/youtube-dl @@ -711,7 +711,9 @@ class FileDownloader(object): return try: - os.makedirs(os.path.dirname(filename)) + dn = os.path.dirname(filename) + if dn != '' and not os.path.exists(dn): + os.makedirs(dn) except (OSError, IOError), err: self.trouble(u'ERROR: unable to create directories: %s' % str(err)) return From 9b0a8bc1982a2f10f6e79e9e8fe4a787e4d665e2 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Tue, 6 Sep 2011 23:56:32 +0200 Subject: [PATCH 394/455] myvideo.de support --- youtube-dl | 79 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 79 insertions(+) diff --git a/youtube-dl b/youtube-dl index 153d4132f..f32716f1e 100755 --- a/youtube-dl +++ b/youtube-dl @@ -2954,6 +2954,82 @@ class BlipTVIE(InfoExtractor): self._downloader.trouble(u'\nERROR: unable to download video') +class MyVideoIE(InfoExtractor): + """Information Extractor for myvideo.de.""" + + _VALID_URL = r'(?:http://)?(?:www\.)?myvideo\.de/watch/([0-9]+)/([^?/]+).*' + + def __init__(self, downloader=None): + InfoExtractor.__init__(self, downloader) + + @staticmethod + def suitable(url): + return (re.match(MyVideoIE._VALID_URL, url) is not None) + + def report_download_webpage(self, video_id): + """Report webpage download.""" + self._downloader.to_screen(u'[myvideo] %s: Downloading webpage' % video_id) + + def report_extraction(self, video_id): + """Report information extraction.""" + self._downloader.to_screen(u'[myvideo] %s: Extracting information' % video_id) + + def _real_initialize(self): + return + + def _real_extract(self,url): + mobj = re.match(self._VALID_URL, url) + if mobj is None: + self._download.trouble(u'ERROR: invalid URL: %s' % url) + return + + video_id = mobj.group(1) + simple_title = mobj.group(2).decode('utf-8') + # should actually not be necessary + simple_title = sanitize_title(simple_title) + simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', simple_title) + + # Get video webpage + request = urllib2.Request('http://www.myvideo.de/watch/%s' % video_id) + try: + self.report_download_webpage(video_id) + webpage = urllib2.urlopen(request).read() + except (urllib2.URLError, httplib.HTTPException, socket.error), err: + self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err)) + return + + self.report_extraction(video_id) + mobj = re.search(r'', + webpage) + if mobj is None: + self._downloader.trouble(u'ERROR: unable to extract media URL') + return + video_url = mobj.group(1) + ('/%s.flv' % video_id) + + mobj = re.search('([^<]+)', webpage) + if mobj is None: + self._downloader.trouble(u'ERROR: unable to extract title') + return + + video_title = mobj.group(1) + video_title = sanitize_title(video_title) + + try: + print(video_url) + self._downloader.process_info({ + 'id': video_id, + 'url': video_url, + 'uploader': u'NA', + 'upload_date': u'NA', + 'title': video_title, + 'stitle': simple_title, + 'ext': u'flv', + 'format': u'NA', + 'player_url': None, + }) + except UnavailableVideoError: + self._downloader.trouble(u'\nERROR: Unable to download video') + class PostProcessor(object): """Post Processor class. @@ -3369,6 +3445,8 @@ def main(): facebook_ie = FacebookIE() bliptv_ie = BlipTVIE() vimeo_ie = VimeoIE() + myvideo_ie = MyVideoIE() + generic_ie = GenericIE() # File downloader @@ -3425,6 +3503,7 @@ def main(): fd.add_info_extractor(facebook_ie) fd.add_info_extractor(bliptv_ie) fd.add_info_extractor(vimeo_ie) + fd.add_info_extractor(myvideo_ie) # This must come last since it's the # fallback if none of the others work From 6fc5b0bb17f814579c8e3b130a4ff0824333e959 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Tue, 6 Sep 2011 23:58:00 +0200 Subject: [PATCH 395/455] =?UTF-8?q?Credit=20S=C3=B6ren=20Schulze=20for=20m?= =?UTF-8?q?yvideo=20support?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- youtube-dl | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube-dl b/youtube-dl index f32716f1e..135e05c0f 100755 --- a/youtube-dl +++ b/youtube-dl @@ -11,6 +11,7 @@ __author__ = ( 'Gergely Imreh', 'Rogério Brito', 'Philipp Hagemeister', + 'Sören Schulze', ) __license__ = 'Public Domain' From cec3a53cbdc84bf83062a1016a7c6bd77393a9ea Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Wed, 7 Sep 2011 09:35:22 +0200 Subject: [PATCH 396/455] Do not try to re-encode unicode filenames (Closes #13) --- youtube-dl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/youtube-dl b/youtube-dl index 135e05c0f..2a116042e 100755 --- a/youtube-dl +++ b/youtube-dl @@ -716,7 +716,7 @@ class FileDownloader(object): if dn != '' and not os.path.exists(dn): os.makedirs(dn) except (OSError, IOError), err: - self.trouble(u'ERROR: unable to create directories: %s' % str(err)) + self.trouble(u'ERROR: unable to create directory ' + unicode(err)) return if self.params.get('writedescription', False): @@ -729,7 +729,7 @@ class FileDownloader(object): finally: descfile.close() except (OSError, IOError): - self.trouble(u'ERROR: Cannot write description file: %s' % str(descfn)) + self.trouble(u'ERROR: Cannot write description file ' + descfn) return if self.params.get('writeinfojson', False): @@ -747,7 +747,7 @@ class FileDownloader(object): finally: infof.close() except (OSError, IOError): - self.trouble(u'ERROR: Cannot write metadata to JSON file: %s' % str(infofn)) + self.trouble(u'ERROR: Cannot write metadata to JSON file ' + infofn) return try: From c8e30044b8180d88ff49a2d1540fd34a81dacfee Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Wed, 7 Sep 2011 21:36:06 +0200 Subject: [PATCH 397/455] Rudimentary support for comedycentral (rtmpdump currently broken) --- youtube-dl | 97 ++++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 95 insertions(+), 2 deletions(-) diff --git a/youtube-dl b/youtube-dl index 2a116042e..23603438d 100755 --- a/youtube-dl +++ b/youtube-dl @@ -63,6 +63,11 @@ try: except ImportError: pass # Handled below +try: + import xml.etree.ElementTree +except ImportError: # Python<2.5 + pass # Not officially supported, but let it slip + std_headers = { 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:5.0.1) Gecko/20100101 Firefox/5.0.1', 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7', @@ -817,7 +822,7 @@ class FileDownloader(object): # Download using rtmpdump. rtmpdump returns exit code 2 when # the connection was interrumpted and resuming appears to be # possible. This is part of rtmpdump's normal usage, AFAIK. - basic_args = ['rtmpdump', '-q'] + [[], ['-W', player_url]][player_url is not None] + ['-r', url, '-o', tmpfilename] + basic_args = ['rtmpdump'] + [[], ['-W', player_url]][player_url is not None] + ['-r', url, '-o', tmpfilename] retval = subprocess.call(basic_args + [[], ['-e', '-k', '1']][self.params.get('continuedl', False)]) while retval == 2 or retval == 1: prevsize = os.path.getsize(tmpfilename) @@ -3031,6 +3036,91 @@ class MyVideoIE(InfoExtractor): except UnavailableVideoError: self._downloader.trouble(u'\nERROR: Unable to download video') +class ComedyCentralIE(InfoExtractor): + """Information extractor for blip.tv""" + + _VALID_URL = r'^(?:https?://)?(www\.)?(thedailyshow|colbertnation)\.com/full-episodes/(.*)$' + + @staticmethod + def suitable(url): + return (re.match(ComedyCentralIE._VALID_URL, url) is not None) + + def report_extraction(self, episode_id): + self._downloader.to_screen(u'[comedycentral] %s: Extracting information' % episode_id) + + def report_config_download(self, episode_id): + self._downloader.to_screen(u'[comedycentral] %s: Downloading configuration' % episode_id) + + def _simplify_title(self, title): + res = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', title) + res = res.strip(ur'_') + return res + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + if mobj is None: + self._downloader.trouble(u'ERROR: invalid URL: %s' % url) + return + epTitle = mobj.group(3) + + req = urllib2.Request(url) + self.report_extraction(epTitle) + try: + html = urllib2.urlopen(req).read() + except (urllib2.URLError, httplib.HTTPException, socket.error), err: + self._downloader.trouble(u'ERROR: unable to download webpage: %s' % unicode(err)) + return + + mMovieParams = re.findall('', html) + if len(mMovieParams) == 0: + self._downloader.trouble(u'ERROR: unable to find Flash URL in webpage ' + url) + return + ACT_COUNT = 4 + mediaNum = int(mMovieParams[0][1]) - ACT_COUNT + + for actNum in range(ACT_COUNT): + mediaId = mMovieParams[0][0] + str(mediaNum + actNum) + configUrl = ('http://www.comedycentral.com/global/feeds/entertainment/media/mediaGenEntertainment.jhtml?' + + urllib.urlencode({'uri': mediaId})) + configReq = urllib2.Request(configUrl) + self.report_config_download(epTitle) + try: + configXml = urllib2.urlopen(configReq).read() + except (urllib2.URLError, httplib.HTTPException, socket.error), err: + self._downloader.trouble(u'ERROR: unable to download webpage: %s' % unicode(err)) + return + + cdoc = xml.etree.ElementTree.fromstring(configXml) + turls = [] + for rendition in cdoc.findall('.//rendition'): + finfo = (rendition.attrib['bitrate'], rendition.findall('./src')[0].text) + turls.append(finfo) + + # For now, just pick the highest bitrate + format,video_url = turls[-1] + + self._downloader.increment_downloads() + actTitle = epTitle + '-act' + str(actNum+1) + info = { + 'id': epTitle, + 'url': video_url, + 'uploader': 'NA', + 'upload_date': 'NA', + 'title': actTitle, + 'stitle': self._simplify_title(actTitle), + 'ext': 'mp4', + 'format': format, + 'thumbnail': None, + 'description': 'TODO: Not yet supported', + 'player_url': None + } + + try: + self._downloader.process_info(info) + except UnavailableVideoError, err: + self._downloader.trouble(u'\nERROR: unable to download video') + + class PostProcessor(object): """Post Processor class. @@ -3375,7 +3465,8 @@ def main(): # General configuration cookie_processor = urllib2.HTTPCookieProcessor(jar) - urllib2.install_opener(urllib2.build_opener(urllib2.ProxyHandler(), cookie_processor, YoutubeDLHandler())) + opener = urllib2.build_opener(urllib2.ProxyHandler(), cookie_processor, YoutubeDLHandler()) + urllib2.install_opener(opener) socket.setdefaulttimeout(300) # 5 minutes should be enough (famous last words) # Batch file verification @@ -3447,6 +3538,7 @@ def main(): bliptv_ie = BlipTVIE() vimeo_ie = VimeoIE() myvideo_ie = MyVideoIE() + comedycentral_ie = ComedyCentralIE() generic_ie = GenericIE() @@ -3505,6 +3597,7 @@ def main(): fd.add_info_extractor(bliptv_ie) fd.add_info_extractor(vimeo_ie) fd.add_info_extractor(myvideo_ie) + fd.add_info_extractor(comedycentral_ie) # This must come last since it's the # fallback if none of the others work From 0f862ea18cdfdc4489c0b1915d52bd2296c1ebc3 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Wed, 7 Sep 2011 21:43:19 +0200 Subject: [PATCH 398/455] comedycentral: include player URL (still broken) --- youtube-dl | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/youtube-dl b/youtube-dl index 23603438d..71e7aa8d8 100755 --- a/youtube-dl +++ b/youtube-dl @@ -3071,15 +3071,17 @@ class ComedyCentralIE(InfoExtractor): self._downloader.trouble(u'ERROR: unable to download webpage: %s' % unicode(err)) return - mMovieParams = re.findall('', html) + mMovieParams = re.findall('', html) if len(mMovieParams) == 0: self._downloader.trouble(u'ERROR: unable to find Flash URL in webpage ' + url) return ACT_COUNT = 4 - mediaNum = int(mMovieParams[0][1]) - ACT_COUNT + player_url = mMovieParams[0][0] + mediaNum = int(mMovieParams[0][2]) - ACT_COUNT + movieId = mMovieParams[0][1] for actNum in range(ACT_COUNT): - mediaId = mMovieParams[0][0] + str(mediaNum + actNum) + mediaId = movieId + str(mediaNum + actNum) configUrl = ('http://www.comedycentral.com/global/feeds/entertainment/media/mediaGenEntertainment.jhtml?' + urllib.urlencode({'uri': mediaId})) configReq = urllib2.Request(configUrl) @@ -3112,7 +3114,7 @@ class ComedyCentralIE(InfoExtractor): 'format': format, 'thumbnail': None, 'description': 'TODO: Not yet supported', - 'player_url': None + 'player_url': player_url } try: From fedf9f390210d0a06f323f0476681b607ee57b0f Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Wed, 7 Sep 2011 22:06:09 +0200 Subject: [PATCH 399/455] Basic comedycentral (The Daily Show) support (Will work as soon as rtmpdump gets fixed) --- youtube-dl | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/youtube-dl b/youtube-dl index 71e7aa8d8..a1245a8b4 100755 --- a/youtube-dl +++ b/youtube-dl @@ -3051,6 +3051,9 @@ class ComedyCentralIE(InfoExtractor): def report_config_download(self, episode_id): self._downloader.to_screen(u'[comedycentral] %s: Downloading configuration' % episode_id) + def report_player_url(self, episode_id): + self._downloader.to_screen(u'[comedycentral] %s: Determining player URL' % episode_id) + def _simplify_title(self, title): res = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', title) res = res.strip(ur'_') @@ -3076,10 +3079,19 @@ class ComedyCentralIE(InfoExtractor): self._downloader.trouble(u'ERROR: unable to find Flash URL in webpage ' + url) return ACT_COUNT = 4 - player_url = mMovieParams[0][0] + first_player_url = mMovieParams[0][0] mediaNum = int(mMovieParams[0][2]) - ACT_COUNT movieId = mMovieParams[0][1] + playerReq = urllib2.Request(first_player_url) + self.report_player_url(epTitle) + try: + playerResponse = urllib2.urlopen(playerReq) + except (urllib2.URLError, httplib.HTTPException, socket.error), err: + self._downloader.trouble(u'ERROR: unable to download player: %s' % unicode(err)) + return + player_url = playerResponse.geturl() + for actNum in range(ACT_COUNT): mediaId = movieId + str(mediaNum + actNum) configUrl = ('http://www.comedycentral.com/global/feeds/entertainment/media/mediaGenEntertainment.jhtml?' + @@ -3102,7 +3114,7 @@ class ComedyCentralIE(InfoExtractor): format,video_url = turls[-1] self._downloader.increment_downloads() - actTitle = epTitle + '-act' + str(actNum+1) + actTitle = 'act' + str(actNum+1) info = { 'id': epTitle, 'url': video_url, From 46c8c43266ebeb6013c1424cd7ec5a43ee57fef0 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Wed, 7 Sep 2011 22:42:33 +0200 Subject: [PATCH 400/455] Switch around act and episode title (makes -t nicer) --- youtube-dl | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/youtube-dl b/youtube-dl index a1245a8b4..f3472f253 100755 --- a/youtube-dl +++ b/youtube-dl @@ -3103,7 +3103,7 @@ class ComedyCentralIE(InfoExtractor): except (urllib2.URLError, httplib.HTTPException, socket.error), err: self._downloader.trouble(u'ERROR: unable to download webpage: %s' % unicode(err)) return - + cdoc = xml.etree.ElementTree.fromstring(configXml) turls = [] for rendition in cdoc.findall('.//rendition'): @@ -3116,19 +3116,19 @@ class ComedyCentralIE(InfoExtractor): self._downloader.increment_downloads() actTitle = 'act' + str(actNum+1) info = { - 'id': epTitle, + 'id': actTitle, 'url': video_url, 'uploader': 'NA', 'upload_date': 'NA', - 'title': actTitle, - 'stitle': self._simplify_title(actTitle), + 'title': epTitle, + 'stitle': self._simplify_title(epTitle), 'ext': 'mp4', 'format': format, 'thumbnail': None, 'description': 'TODO: Not yet supported', 'player_url': player_url } - + try: self._downloader.process_info(info) except UnavailableVideoError, err: From a88bc6bbd388efba2e7c6534a185cf57bebfe046 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Wed, 7 Sep 2011 23:15:26 +0200 Subject: [PATCH 401/455] Temporarily fix dailyshow+colbertnation media IDs --- youtube-dl | 32 ++++++++++++++++++++++++-------- 1 file changed, 24 insertions(+), 8 deletions(-) diff --git a/youtube-dl b/youtube-dl index f3472f253..651e9d3a1 100755 --- a/youtube-dl +++ b/youtube-dl @@ -3074,13 +3074,22 @@ class ComedyCentralIE(InfoExtractor): self._downloader.trouble(u'ERROR: unable to download webpage: %s' % unicode(err)) return - mMovieParams = re.findall('', html) + mMovieParams = re.findall('', html) if len(mMovieParams) == 0: self._downloader.trouble(u'ERROR: unable to find Flash URL in webpage ' + url) return - ACT_COUNT = 4 + show_id = mMovieParams[0][2] + ACT_COUNT = { # TODO: Detect this dynamically + 'thedailyshow.com': 4, + 'colbertnation.com': 3, + }.get(show_id, 4) + OFFSET = { + 'thedailyshow.com': -ACT_COUNT, + 'colbertnation.com': 1, + }.get(show_id, -ACT_COUNT) + first_player_url = mMovieParams[0][0] - mediaNum = int(mMovieParams[0][2]) - ACT_COUNT + mediaNum = int(mMovieParams[0][3]) + OFFSET movieId = mMovieParams[0][1] playerReq = urllib2.Request(first_player_url) @@ -3093,6 +3102,7 @@ class ComedyCentralIE(InfoExtractor): player_url = playerResponse.geturl() for actNum in range(ACT_COUNT): + actTitle = 'act' + str(actNum+1) mediaId = movieId + str(mediaNum + actNum) configUrl = ('http://www.comedycentral.com/global/feeds/entertainment/media/mediaGenEntertainment.jhtml?' + urllib.urlencode({'uri': mediaId})) @@ -3110,18 +3120,23 @@ class ComedyCentralIE(InfoExtractor): finfo = (rendition.attrib['bitrate'], rendition.findall('./src')[0].text) turls.append(finfo) + if len(turls) == 0: + self._downloader.trouble(u'\nERROR: unable to download ' + actTitle + ': No videos found') + continue + # For now, just pick the highest bitrate format,video_url = turls[-1] self._downloader.increment_downloads() - actTitle = 'act' + str(actNum+1) + + effTitle = show_id.replace('.com', '') + '-' + epTitle info = { 'id': actTitle, 'url': video_url, - 'uploader': 'NA', + 'uploader': show_id, 'upload_date': 'NA', - 'title': epTitle, - 'stitle': self._simplify_title(epTitle), + 'title': effTitle, + 'stitle': self._simplify_title(effTitle), 'ext': 'mp4', 'format': format, 'thumbnail': None, @@ -3132,7 +3147,8 @@ class ComedyCentralIE(InfoExtractor): try: self._downloader.process_info(info) except UnavailableVideoError, err: - self._downloader.trouble(u'\nERROR: unable to download video') + self._downloader.trouble(u'\nERROR: unable to download ' + actTitle) + continue class PostProcessor(object): From 5991ddfd7adf7a45f8637b313bad881b646891ea Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Thu, 8 Sep 2011 18:49:28 +0200 Subject: [PATCH 402/455] comedycentral: Use media number instead of act number as ID --- youtube-dl | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/youtube-dl b/youtube-dl index 651e9d3a1..017a4610c 100755 --- a/youtube-dl +++ b/youtube-dl @@ -3089,7 +3089,7 @@ class ComedyCentralIE(InfoExtractor): }.get(show_id, -ACT_COUNT) first_player_url = mMovieParams[0][0] - mediaNum = int(mMovieParams[0][3]) + OFFSET + startMediaNum = int(mMovieParams[0][3]) + OFFSET movieId = mMovieParams[0][1] playerReq = urllib2.Request(first_player_url) @@ -3102,8 +3102,8 @@ class ComedyCentralIE(InfoExtractor): player_url = playerResponse.geturl() for actNum in range(ACT_COUNT): - actTitle = 'act' + str(actNum+1) - mediaId = movieId + str(mediaNum + actNum) + mediaNum = startMediaNum + actNum + mediaId = movieId + str(mediaNum) configUrl = ('http://www.comedycentral.com/global/feeds/entertainment/media/mediaGenEntertainment.jhtml?' + urllib.urlencode({'uri': mediaId})) configReq = urllib2.Request(configUrl) @@ -3121,7 +3121,7 @@ class ComedyCentralIE(InfoExtractor): turls.append(finfo) if len(turls) == 0: - self._downloader.trouble(u'\nERROR: unable to download ' + actTitle + ': No videos found') + self._downloader.trouble(u'\nERROR: unable to download ' + str(mediaNum) + ': No videos found') continue # For now, just pick the highest bitrate @@ -3131,7 +3131,7 @@ class ComedyCentralIE(InfoExtractor): effTitle = show_id.replace('.com', '') + '-' + epTitle info = { - 'id': actTitle, + 'id': str(mediaNum), 'url': video_url, 'uploader': show_id, 'upload_date': 'NA', @@ -3147,7 +3147,7 @@ class ComedyCentralIE(InfoExtractor): try: self._downloader.process_info(info) except UnavailableVideoError, err: - self._downloader.trouble(u'\nERROR: unable to download ' + actTitle) + self._downloader.trouble(u'\nERROR: unable to download ' + str(mediaNum)) continue From d793aebaed09f9ec19e5b7d07e8e3063545e9a72 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Fri, 9 Sep 2011 08:14:01 +0200 Subject: [PATCH 403/455] comedycentral: 1 seems to be the constant correct offset --- youtube-dl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/youtube-dl b/youtube-dl index 017a4610c..1b2ccae2f 100755 --- a/youtube-dl +++ b/youtube-dl @@ -15,7 +15,7 @@ __author__ = ( ) __license__ = 'Public Domain' -__version__ = '2011.09.06-phihag' +__version__ = '2011.09.09-phihag' UPDATE_URL = 'https://raw.github.com/phihag/youtube-dl/master/youtube-dl' @@ -3084,9 +3084,9 @@ class ComedyCentralIE(InfoExtractor): 'colbertnation.com': 3, }.get(show_id, 4) OFFSET = { - 'thedailyshow.com': -ACT_COUNT, + 'thedailyshow.com': 1, 'colbertnation.com': 1, - }.get(show_id, -ACT_COUNT) + }.get(show_id, 1) first_player_url = mMovieParams[0][0] startMediaNum = int(mMovieParams[0][3]) + OFFSET From 454d6691d8f38b24bd0bcac9ea77993f9a8f5852 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Fri, 9 Sep 2011 08:41:52 +0200 Subject: [PATCH 404/455] Include ERROR: no fmt_url_map or conn information found in video info in FAQ --- README.md | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 66639ad04..af03fc0a3 100644 --- a/README.md +++ b/README.md @@ -80,6 +80,10 @@ Once the video is fully downloaded, use any video player, such as [vlc](http://w The URLs youtube-dl outputs require the downloader to have the correct cookies. Use the `--cookies` option to write the required cookies into a file, and advise your downloader to read cookies from that file. +### ERROR: no fmt_url_map or conn information found in video info + +youtube has switched to a new video info format in July 2011 which is not supported by old versions of youtube-dl. You can update youtube-dl with `sudo youtube-dl -U`. + ## COPYRIGHT **youtube-dl**: Copyright © 2006-2011 Ricardo Garcia Gonzalez. The program is released into the public domain by the copyright holder. This README file was @@ -87,4 +91,4 @@ originally written by Daniel Bolton () and is likewise released into the public domain. ## BUGS -Bugs should be reported at: +Bugs and suggestions should be reported at: From e133e1213f3242bec6c3139f43c686bc84d71bfa Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Fri, 9 Sep 2011 08:47:00 +0200 Subject: [PATCH 405/455] README: More bug filing instructions --- README.md | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/README.md b/README.md index af03fc0a3..f292e85d7 100644 --- a/README.md +++ b/README.md @@ -91,4 +91,12 @@ originally written by Daniel Bolton () and is likewise released into the public domain. ## BUGS + Bugs and suggestions should be reported at: + +Please include: + +* Your exact command line, like `youtube-dl -t "http://www.youtube.com/watch?v=uHlDtZ6Oc3s&feature=channel_video_title"`. A common mistake is not to escape the `&`. Putting URLs in quotes should solve this problem. +* The output of `youtube-dl --version` +* The output of `python --version` +* The name and version of your Operating System ("Ubuntu 11.04 x64" or "Windows 7 x64" is usually enough). From 5a2ba45e09aca26f3277089194f963e4e8ae1909 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Tue, 13 Sep 2011 21:51:06 +0200 Subject: [PATCH 406/455] Clarify README --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index f292e85d7..345a0b752 100644 --- a/README.md +++ b/README.md @@ -82,7 +82,7 @@ The URLs youtube-dl outputs require the downloader to have the correct cookies. ### ERROR: no fmt_url_map or conn information found in video info -youtube has switched to a new video info format in July 2011 which is not supported by old versions of youtube-dl. You can update youtube-dl with `sudo youtube-dl -U`. +youtube has switched to a new video info format in July 2011 which is not supported by old versions of youtube-dl. You can update youtube-dl with `sudo youtube-dl --update`. ## COPYRIGHT **youtube-dl**: Copyright © 2006-2011 Ricardo Garcia Gonzalez. The program is From f166bccc8f4366531783d0e0c4c1eb3a585cdfb0 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Tue, 13 Sep 2011 21:51:44 +0200 Subject: [PATCH 407/455] Allow downloading current thedailyshow episode with youtube-dl :tds --- youtube-dl | 32 ++++++++++++++++++++++++++++---- 1 file changed, 28 insertions(+), 4 deletions(-) diff --git a/youtube-dl b/youtube-dl index 1b2ccae2f..9d379dcd1 100755 --- a/youtube-dl +++ b/youtube-dl @@ -3037,9 +3037,9 @@ class MyVideoIE(InfoExtractor): self._downloader.trouble(u'\nERROR: Unable to download video') class ComedyCentralIE(InfoExtractor): - """Information extractor for blip.tv""" + """Information extractor for The Daily Show and Colbert Report """ - _VALID_URL = r'^(?:https?://)?(www\.)?(thedailyshow|colbertnation)\.com/full-episodes/(.*)$' + _VALID_URL = r'^(:(?Ptds|thedailyshow|cr|colbert|colbertnation|colbertreport))|(https?://)?(www\.)(?Pthedailyshow|colbertnation)\.com/full-episodes/(?P.*)$' @staticmethod def suitable(url): @@ -3064,15 +3064,39 @@ class ComedyCentralIE(InfoExtractor): if mobj is None: self._downloader.trouble(u'ERROR: invalid URL: %s' % url) return - epTitle = mobj.group(3) + + if mobj.group('shortname'): + if mobj.group('shortname') in ('tds', 'thedailyshow'): + url = 'http://www.thedailyshow.com/full-episodes/' + else: + url = 'http://www.colbertnation.com/full-episodes/' + mobj = re.match(self._VALID_URL, url) + assert mobj is not None + + dlNewest = not mobj.group('episode') + if dlNewest: + epTitle = mobj.group('showname') + else: + epTitle = mobj.group('episode') req = urllib2.Request(url) self.report_extraction(epTitle) try: - html = urllib2.urlopen(req).read() + htmlHandle = urllib2.urlopen(req) + html = htmlHandle.read() except (urllib2.URLError, httplib.HTTPException, socket.error), err: self._downloader.trouble(u'ERROR: unable to download webpage: %s' % unicode(err)) return + if dlNewest: + url = htmlHandle.geturl() + mobj = re.match(self._VALID_URL, url) + if mobj is None: + self._downloader.trouble(u'ERROR: Invalid redirected URL: ' + url) + return + if mobj.group('episode') == '': + self._downloader.trouble(u'ERROR: Redirected URL is still not specific: ' + url) + return + epTitle = mobj.group('episode') mMovieParams = re.findall('', html) if len(mMovieParams) == 0: From b576abb4572c61289269db61da8c472d487a7a03 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Tue, 13 Sep 2011 22:29:50 +0200 Subject: [PATCH 408/455] Automatically generate LATEST_VERSION (Closes #16) --- LATEST_VERSION | 1 + Makefile | 6 ++++-- 2 files changed, 5 insertions(+), 2 deletions(-) create mode 100644 LATEST_VERSION diff --git a/LATEST_VERSION b/LATEST_VERSION new file mode 100644 index 000000000..afa0b7718 --- /dev/null +++ b/LATEST_VERSION @@ -0,0 +1 @@ +2011.09.13 diff --git a/Makefile b/Makefile index 0039f90c9..e725dc720 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,9 @@ default: update -update: update-readme +update: update-readme update-latest +update-latest: + ./youtube-dl --version > LATEST_VERSION update-readme: @options=$$(COLUMNS=80 ./youtube-dl --help | sed -e '1,/.*General Options.*/ d' -e 's/^\W\{2\}\(\w\)/### \1/') && \ @@ -15,4 +17,4 @@ update-readme: -.PHONY: default update update-readme +.PHONY: default update update-latest update-readme From 93e1659586d0741f5f7987e6c5232e9636f7df72 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Tue, 13 Sep 2011 22:39:20 +0200 Subject: [PATCH 409/455] Bump version number (remove -phihag) --- youtube-dl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube-dl b/youtube-dl index 9d379dcd1..4bb84386a 100755 --- a/youtube-dl +++ b/youtube-dl @@ -15,7 +15,7 @@ __author__ = ( ) __license__ = 'Public Domain' -__version__ = '2011.09.09-phihag' +__version__ = '2011.09.13' UPDATE_URL = 'https://raw.github.com/phihag/youtube-dl/master/youtube-dl' From 803abae20608d48c666e8401a32133bdebcf44cb Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Tue, 13 Sep 2011 23:54:30 +0200 Subject: [PATCH 410/455] Do not claim copyright in README (Closes #157) --- README.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 345a0b752..0e1f78a6a 100644 --- a/README.md +++ b/README.md @@ -85,10 +85,10 @@ The URLs youtube-dl outputs require the downloader to have the correct cookies. youtube has switched to a new video info format in July 2011 which is not supported by old versions of youtube-dl. You can update youtube-dl with `sudo youtube-dl --update`. ## COPYRIGHT -**youtube-dl**: Copyright © 2006-2011 Ricardo Garcia Gonzalez. The program is -released into the public domain by the copyright holder. This README file was -originally written by Daniel Bolton () and is -likewise released into the public domain. + +youtube-dl is released into the public domain by the copyright holders. + +This README file was originally written by Daniel Bolton () and is likewise released into the public domain. ## BUGS From 8236e85178b2f9af08a1cef3fb5da5ac26a541d9 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Tue, 13 Sep 2011 23:58:31 +0200 Subject: [PATCH 411/455] s#phihag#rg3 --- README.md | 4 ++-- youtube-dl | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 0e1f78a6a..b1511f38e 100644 --- a/README.md +++ b/README.md @@ -70,7 +70,7 @@ Most people asking this question are not aware that youtube-dl now defaults to d ### I get HTTP error 402 when trying to download a video. What''s this? -Apparently YouTube requires you to pass a CAPTCHA test if you download too much. We''re [considering to provide a way to let you solve the CAPTCHA](https://github.com/phihag/youtube-dl/issues/8), but at the moment, your best course of action is pointing a webbrowser to the youtube URL, solving the CAPTCHA, and restart youtube-dl. +Apparently YouTube requires you to pass a CAPTCHA test if you download too much. We''re [considering to provide a way to let you solve the CAPTCHA](https://github.com/rg3/youtube-dl/issues/154), but at the moment, your best course of action is pointing a webbrowser to the youtube URL, solving the CAPTCHA, and restart youtube-dl. ### I have downloaded a video but how can I play it? @@ -92,7 +92,7 @@ This README file was originally written by Daniel Bolton ( +Bugs and suggestions should be reported at: Please include: diff --git a/youtube-dl b/youtube-dl index 781dff758..0e8347a86 100755 --- a/youtube-dl +++ b/youtube-dl @@ -17,7 +17,7 @@ __author__ = ( __license__ = 'Public Domain' __version__ = '2011.09.13' -UPDATE_URL = 'https://raw.github.com/phihag/youtube-dl/master/youtube-dl' +UPDATE_URL = 'https://raw.github.com/rg3/youtube-dl/master/youtube-dl' import cookielib import datetime From b90bcbe79e2fb71a687e4caa40447f3cecd63542 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Tue, 13 Sep 2011 23:58:46 +0200 Subject: [PATCH 412/455] Bump version number --- youtube-dl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube-dl b/youtube-dl index 0e8347a86..50d83cf2d 100755 --- a/youtube-dl +++ b/youtube-dl @@ -15,7 +15,7 @@ __author__ = ( ) __license__ = 'Public Domain' -__version__ = '2011.09.13' +__version__ = '2011.09.14' UPDATE_URL = 'https://raw.github.com/rg3/youtube-dl/master/youtube-dl' From d0922f29a3f3084f2ab65236dc53d3764762c0f1 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Wed, 14 Sep 2011 00:04:46 +0200 Subject: [PATCH 413/455] Update LATEST_VERSION (oops) --- LATEST_VERSION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/LATEST_VERSION b/LATEST_VERSION index afa0b7718..c34db6478 100644 --- a/LATEST_VERSION +++ b/LATEST_VERSION @@ -1 +1 @@ -2011.09.13 +2011.09.14 From b487ef0833678930ed58901ca67d3a13b84007ff Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Wed, 14 Sep 2011 21:17:05 +0200 Subject: [PATCH 414/455] Fully implement comedycentral downloader --- youtube-dl | 70 +++++++++++++++++++++++++++++++----------------------- 1 file changed, 40 insertions(+), 30 deletions(-) diff --git a/youtube-dl b/youtube-dl index 50d83cf2d..3ecae2994 100755 --- a/youtube-dl +++ b/youtube-dl @@ -822,7 +822,7 @@ class FileDownloader(object): # Download using rtmpdump. rtmpdump returns exit code 2 when # the connection was interrumpted and resuming appears to be # possible. This is part of rtmpdump's normal usage, AFAIK. - basic_args = ['rtmpdump'] + [[], ['-W', player_url]][player_url is not None] + ['-r', url, '-o', tmpfilename] + basic_args = ['rtmpdump', '-q'] + [[], ['-W', player_url]][player_url is not None] + ['-r', url, '-o', tmpfilename] retval = subprocess.call(basic_args + [[], ['-e', '-k', '1']][self.params.get('continuedl', False)]) while retval == 2 or retval == 1: prevsize = os.path.getsize(tmpfilename) @@ -832,6 +832,11 @@ class FileDownloader(object): cursize = os.path.getsize(tmpfilename) if prevsize == cursize and retval == 1: break + # Some rtmp streams seem abort after ~ 99.8%. Don't complain for those + if prevsize == cursize and retval == 2 and cursize > 1024: + self.to_screen(u'\r[rtmpdump] Could not download the whole video. This can happen for some advertisements.') + retval = 0 + break if retval == 0: self.to_screen(u'\r[rtmpdump] %s bytes' % os.path.getsize(tmpfilename)) self.try_rename(tmpfilename, filename) @@ -3055,6 +3060,9 @@ class ComedyCentralIE(InfoExtractor): def report_config_download(self, episode_id): self._downloader.to_screen(u'[comedycentral] %s: Downloading configuration' % episode_id) + def report_index_download(self, episode_id): + self._downloader.to_screen(u'[comedycentral] %s: Downloading show index' % episode_id) + def report_player_url(self, episode_id): self._downloader.to_screen(u'[comedycentral] %s: Determining player URL' % episode_id) @@ -3102,36 +3110,38 @@ class ComedyCentralIE(InfoExtractor): return epTitle = mobj.group('episode') - mMovieParams = re.findall('', html) + mMovieParams = re.findall('', html) if len(mMovieParams) == 0: self._downloader.trouble(u'ERROR: unable to find Flash URL in webpage ' + url) return - show_id = mMovieParams[0][2] - ACT_COUNT = { # TODO: Detect this dynamically - 'thedailyshow.com': 4, - 'colbertnation.com': 3, - }.get(show_id, 4) - OFFSET = { - 'thedailyshow.com': 1, - 'colbertnation.com': 1, - }.get(show_id, 1) - first_player_url = mMovieParams[0][0] - startMediaNum = int(mMovieParams[0][3]) + OFFSET - movieId = mMovieParams[0][1] - - playerReq = urllib2.Request(first_player_url) + playerUrl_raw = mMovieParams[0][0] self.report_player_url(epTitle) try: - playerResponse = urllib2.urlopen(playerReq) + urlHandle = urllib2.urlopen(playerUrl_raw) + playerUrl = urlHandle.geturl() except (urllib2.URLError, httplib.HTTPException, socket.error), err: - self._downloader.trouble(u'ERROR: unable to download player: %s' % unicode(err)) + self._downloader.trouble(u'ERROR: unable to find out player URL: ' + unicode(err)) return - player_url = playerResponse.geturl() - for actNum in range(ACT_COUNT): - mediaNum = startMediaNum + actNum - mediaId = movieId + str(mediaNum) + uri = mMovieParams[0][1] + indexUrl = 'http://shadow.comedycentral.com/feeds/video_player/mrss/?' + urllib.urlencode({'uri': uri}) + self.report_index_download(epTitle) + try: + indexXml = urllib2.urlopen(indexUrl).read() + except (urllib2.URLError, httplib.HTTPException, socket.error), err: + self._downloader.trouble(u'ERROR: unable to download episode index: ' + unicode(err)) + return + + idoc = xml.etree.ElementTree.fromstring(indexXml) + itemEls = idoc.findall('.//item') + for itemEl in itemEls: + mediaId = itemEl.findall('./guid')[0].text + shortMediaId = mediaId.split(':')[-1] + showId = mediaId.split(':')[-2].replace('.com', '') + officialTitle = itemEl.findall('./title')[0].text + officialDate = itemEl.findall('./pubDate')[0].text + configUrl = ('http://www.comedycentral.com/global/feeds/entertainment/media/mediaGenEntertainment.jhtml?' + urllib.urlencode({'uri': mediaId})) configReq = urllib2.Request(configUrl) @@ -3149,7 +3159,7 @@ class ComedyCentralIE(InfoExtractor): turls.append(finfo) if len(turls) == 0: - self._downloader.trouble(u'\nERROR: unable to download ' + str(mediaNum) + ': No videos found') + self._downloader.trouble(u'\nERROR: unable to download ' + mediaId + ': No videos found') continue # For now, just pick the highest bitrate @@ -3157,25 +3167,25 @@ class ComedyCentralIE(InfoExtractor): self._downloader.increment_downloads() - effTitle = show_id.replace('.com', '') + '-' + epTitle + effTitle = showId + '-' + epTitle info = { - 'id': str(mediaNum), + 'id': shortMediaId, 'url': video_url, - 'uploader': show_id, - 'upload_date': 'NA', + 'uploader': showId, + 'upload_date': officialDate, 'title': effTitle, 'stitle': self._simplify_title(effTitle), 'ext': 'mp4', 'format': format, 'thumbnail': None, - 'description': 'TODO: Not yet supported', - 'player_url': player_url + 'description': officialTitle, + 'player_url': playerUrl } try: self._downloader.process_info(info) except UnavailableVideoError, err: - self._downloader.trouble(u'\nERROR: unable to download ' + str(mediaNum)) + self._downloader.trouble(u'\nERROR: unable to download ' + mediaId) continue From f2a3a3522cffcf0f6fdf0be5046a32c00c52c361 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Wed, 14 Sep 2011 21:18:22 +0200 Subject: [PATCH 415/455] typo in README --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index b1511f38e..0217eee60 100644 --- a/README.md +++ b/README.md @@ -68,7 +68,7 @@ which means you can modify it, redistribute it or use it however you like. Most people asking this question are not aware that youtube-dl now defaults to downloading the highest available quality as reported by YouTube, which will be 1080p or 720p in some cases, so you no longer need the -b option. For some specific videos, maybe YouTube does not report them to be available in a specific high quality format you''re interested in. In that case, simply request it with the -f option and youtube-dl will try to download it. -### I get HTTP error 402 when trying to download a video. What''s this? +### I get HTTP error 402 when trying to download a video. What's this? Apparently YouTube requires you to pass a CAPTCHA test if you download too much. We''re [considering to provide a way to let you solve the CAPTCHA](https://github.com/rg3/youtube-dl/issues/154), but at the moment, your best course of action is pointing a webbrowser to the youtube URL, solving the CAPTCHA, and restart youtube-dl. From a2f7e3a5bbb27861a9fac1cd5fe7de1640343187 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Wed, 14 Sep 2011 21:19:33 +0200 Subject: [PATCH 416/455] Clarify usage --- README.md | 2 +- youtube-dl | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 0217eee60..6aa5ea3de 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ # youtube-dl ## USAGE -youtube-dl [OPTIONS] URL +youtube-dl [OPTIONS] URL [URL...] ## DESCRIPTION **youtube-dl** is a small command-line program to download videos from diff --git a/youtube-dl b/youtube-dl index 3ecae2994..e01cdc75f 100755 --- a/youtube-dl +++ b/youtube-dl @@ -3391,7 +3391,7 @@ def parseOpts(): kw = { 'version' : __version__, 'formatter' : fmt, - 'usage' : '%prog [options] url...', + 'usage' : '%prog [options] url [url...]', 'conflict_handler' : 'resolve', } From 1d2e86aed9fab2a26a56c5342ddbaf4ea1724d42 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Wed, 14 Sep 2011 21:20:23 +0200 Subject: [PATCH 417/455] Decapitalize options in README for consistency with youtube-dl --help --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 6aa5ea3de..9fdbc95c7 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ # youtube-dl ## USAGE -youtube-dl [OPTIONS] URL [URL...] +youtube-dl [options] url [url...] ## DESCRIPTION **youtube-dl** is a small command-line program to download videos from From 8c5dc3ad4024eab1d167fb62a92eeabf7d895e59 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Wed, 14 Sep 2011 21:39:41 +0200 Subject: [PATCH 418/455] Simplify IE index --- youtube-dl | 58 +++++++++++++++++++++--------------------------------- 1 file changed, 22 insertions(+), 36 deletions(-) diff --git a/youtube-dl b/youtube-dl index e01cdc75f..5aff9c08c 100755 --- a/youtube-dl +++ b/youtube-dl @@ -3591,24 +3591,29 @@ def main(): # Information extractors youtube_ie = YoutubeIE() - metacafe_ie = MetacafeIE(youtube_ie) - dailymotion_ie = DailymotionIE() - youtube_pl_ie = YoutubePlaylistIE(youtube_ie) - youtube_user_ie = YoutubeUserIE(youtube_ie) - youtube_search_ie = YoutubeSearchIE(youtube_ie) google_ie = GoogleIE() - google_search_ie = GoogleSearchIE(google_ie) - photobucket_ie = PhotobucketIE() yahoo_ie = YahooIE() - yahoo_search_ie = YahooSearchIE(yahoo_ie) - deposit_files_ie = DepositFilesIE() - facebook_ie = FacebookIE() - bliptv_ie = BlipTVIE() - vimeo_ie = VimeoIE() - myvideo_ie = MyVideoIE() - comedycentral_ie = ComedyCentralIE() + extractors = [ # Order does matter + youtube_ie, + MetacafeIE(youtube_ie), + DailymotionIE(), + YoutubePlaylistIE(youtube_ie), + YoutubeUserIE(youtube_ie), + YoutubeSearchIE(youtube_ie), + google_ie, + GoogleSearchIE(google_ie), + PhotobucketIE(), + yahoo_ie, + YahooSearchIE(yahoo_ie), + DepositFilesIE(), + FacebookIE(), + BlipTVIE(), + VimeoIE(), + MyVideoIE(), + ComedyCentralIE(), - generic_ie = GenericIE() + GenericIE() + ] # File downloader fd = FileDownloader({ @@ -3649,27 +3654,8 @@ def main(): 'writedescription': opts.writedescription, 'writeinfojson': opts.writeinfojson, }) - fd.add_info_extractor(youtube_search_ie) - fd.add_info_extractor(youtube_pl_ie) - fd.add_info_extractor(youtube_user_ie) - fd.add_info_extractor(metacafe_ie) - fd.add_info_extractor(dailymotion_ie) - fd.add_info_extractor(youtube_ie) - fd.add_info_extractor(google_ie) - fd.add_info_extractor(google_search_ie) - fd.add_info_extractor(photobucket_ie) - fd.add_info_extractor(yahoo_ie) - fd.add_info_extractor(yahoo_search_ie) - fd.add_info_extractor(deposit_files_ie) - fd.add_info_extractor(facebook_ie) - fd.add_info_extractor(bliptv_ie) - fd.add_info_extractor(vimeo_ie) - fd.add_info_extractor(myvideo_ie) - fd.add_info_extractor(comedycentral_ie) - - # This must come last since it's the - # fallback if none of the others work - fd.add_info_extractor(generic_ie) + for extractor in extractors: + fd.add_info_extractor(extractor) # PostProcessors if opts.extractaudio: From f9c68787146e6278df0f29d0d4e2f0d4199f49b0 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Wed, 14 Sep 2011 22:26:53 +0200 Subject: [PATCH 419/455] Support for The Escapist --- youtube-dl | 89 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 89 insertions(+) diff --git a/youtube-dl b/youtube-dl index 5aff9c08c..719edeb9b 100755 --- a/youtube-dl +++ b/youtube-dl @@ -23,6 +23,7 @@ import cookielib import datetime import gzip import htmlentitydefs +import HTMLParser import httplib import locale import math @@ -3189,6 +3190,93 @@ class ComedyCentralIE(InfoExtractor): continue +class EscapistIE(InfoExtractor): + """Information extractor for The Escapist """ + + _VALID_URL = r'^(https?://)?(www\.)escapistmagazine.com/videos/view/(?P[^/]+)/(?P[^/?]+)[/?].*$' + + @staticmethod + def suitable(url): + return (re.match(EscapistIE._VALID_URL, url) is not None) + + def report_extraction(self, showName): + self._downloader.to_screen(u'[escapist] %s: Extracting information' % showName) + + def report_config_download(self, showName): + self._downloader.to_screen(u'[escapist] %s: Downloading configuration' % showName) + + def _simplify_title(self, title): + res = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', title) + res = res.strip(ur'_') + return res + + def _real_extract(self, url): + htmlParser = HTMLParser.HTMLParser() + + mobj = re.match(self._VALID_URL, url) + if mobj is None: + self._downloader.trouble(u'ERROR: invalid URL: %s' % url) + return + showName = mobj.group('showname') + videoId = mobj.group('episode') + + self.report_extraction(showName) + try: + webPage = urllib2.urlopen(url).read() + except (urllib2.URLError, httplib.HTTPException, socket.error), err: + self._downloader.trouble(u'ERROR: unable to download webpage: ' + unicode(err)) + return + + descMatch = re.search(' Date: Sat, 23 Jul 2011 00:51:06 -0700 Subject: [PATCH 420/455] Add --match-title and --reject-title (Closes #132) --- youtube-dl | 28 ++++++++++++++++++++++++---- 1 file changed, 24 insertions(+), 4 deletions(-) diff --git a/youtube-dl b/youtube-dl index 719edeb9b..0973cc4ad 100755 --- a/youtube-dl +++ b/youtube-dl @@ -438,6 +438,8 @@ class FileDownloader(object): noprogress: Do not print the progress bar. playliststart: Playlist item to start at. playlistend: Playlist item to end at. + matchtitle: Download only matching titles. + rejecttitle: Reject downloads for matching titles. logtostderr: Log messages to stderr instead of stdout. consoletitle: Display progress in console window's titlebar. nopart: Do not use temporary .part files. @@ -713,6 +715,17 @@ class FileDownloader(object): if filename is None: return + + matchtitle=self.params.get('matchtitle',False) + rejecttitle=self.params.get('rejecttitle',False) + title=info_dict['title'].encode(preferredencoding(), 'xmlcharrefreplace') + if matchtitle and not re.search(matchtitle, title, re.IGNORECASE): + self.to_screen(u'[download] "%s" title did not match pattern "%s"' % (title, matchtitle)) + return + if rejecttitle and re.search(rejecttitle, title, re.IGNORECASE): + self.to_screen(u'[download] "%s" title matched reject pattern "%s"' % (title, rejecttitle)) + return + if self.params.get('nooverwrites', False) and os.path.exists(filename): self.to_stderr(u'WARNING: file exists and will be skipped') return @@ -3487,6 +3500,7 @@ def parseOpts(): # option groups general = optparse.OptionGroup(parser, 'General Options') + selection = optparse.OptionGroup(parser, 'Video Selection') authentication = optparse.OptionGroup(parser, 'Authentication Options') video_format = optparse.OptionGroup(parser, 'Video Format Options') postproc = optparse.OptionGroup(parser, 'Post-processing Options') @@ -3505,14 +3519,17 @@ def parseOpts(): dest='ratelimit', metavar='LIMIT', help='download rate limit (e.g. 50k or 44.6m)') general.add_option('-R', '--retries', dest='retries', metavar='RETRIES', help='number of retries (default is 10)', default=10) - general.add_option('--playlist-start', - dest='playliststart', metavar='NUMBER', help='playlist video to start at (default is 1)', default=1) - general.add_option('--playlist-end', - dest='playlistend', metavar='NUMBER', help='playlist video to end at (default is last)', default=-1) general.add_option('--dump-user-agent', action='store_true', dest='dump_user_agent', help='display the current browser identification', default=False) + selection.add_option('--playlist-start', + dest='playliststart', metavar='NUMBER', help='playlist video to start at (default is 1)', default=1) + selection.add_option('--playlist-end', + dest='playlistend', metavar='NUMBER', help='playlist video to end at (default is last)', default=-1) + selection.add_option('--match-title', dest='matchtitle', metavar='REGEX',help='download only matching titles (regex or caseless sub-string)') + selection.add_option('--reject-title', dest='rejecttitle', metavar='REGEX',help='skip download for matching titles (regex or caseless sub-string)') + authentication.add_option('-u', '--username', dest='username', metavar='USERNAME', help='account username') authentication.add_option('-p', '--password', @@ -3590,6 +3607,7 @@ def parseOpts(): parser.add_option_group(general) + parser.add_option_group(selection) parser.add_option_group(filesystem) parser.add_option_group(verbosity) parser.add_option_group(video_format) @@ -3742,6 +3760,8 @@ def main(): 'updatetime': opts.updatetime, 'writedescription': opts.writedescription, 'writeinfojson': opts.writeinfojson, + 'matchtitle': opts.matchtitle, + 'rejecttitle': opts.rejecttitle, }) for extractor in extractors: fd.add_info_extractor(extractor) From 08c1d0d3bc43f344f58f16ff215f231f4b727f44 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Wed, 14 Sep 2011 22:55:09 +0200 Subject: [PATCH 421/455] Update README --- README.md | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 9fdbc95c7..9fdd7d3e2 100644 --- a/README.md +++ b/README.md @@ -17,9 +17,15 @@ which means you can modify it, redistribute it or use it however you like. -i, --ignore-errors continue on download errors -r, --rate-limit LIMIT download rate limit (e.g. 50k or 44.6m) -R, --retries RETRIES number of retries (default is 10) + --dump-user-agent display the current browser identification + +### Video Selection: --playlist-start NUMBER playlist video to start at (default is 1) --playlist-end NUMBER playlist video to end at (default is last) - --dump-user-agent display the current browser identification + --match-title REGEX download only matching titles (regex or caseless + sub-string) + --reject-title REGEX skip download for matching titles (regex or + caseless sub-string) ### Filesystem Options: -t, --title use title in file name From e5b9fac28173738dfb545393aca5595c2aafe5bd Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Wed, 14 Sep 2011 22:55:26 +0200 Subject: [PATCH 422/455] Bump version number --- youtube-dl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube-dl b/youtube-dl index 0973cc4ad..733e23105 100755 --- a/youtube-dl +++ b/youtube-dl @@ -15,7 +15,7 @@ __author__ = ( ) __license__ = 'Public Domain' -__version__ = '2011.09.14' +__version__ = '2011.09.15' UPDATE_URL = 'https://raw.github.com/rg3/youtube-dl/master/youtube-dl' From c23cec29a396cca227aed511f85632d406d8c706 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Wed, 14 Sep 2011 23:03:01 +0200 Subject: [PATCH 423/455] Update LATEST_VERSION (and wait for a script to do it so I do not forget ;) ) --- LATEST_VERSION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/LATEST_VERSION b/LATEST_VERSION index c34db6478..35b79da09 100644 --- a/LATEST_VERSION +++ b/LATEST_VERSION @@ -1 +1 @@ -2011.09.14 +2011.09.15 From afb5b55de640d1e0ee8fe486415b7a2ee2321816 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Thu, 15 Sep 2011 09:59:03 +0200 Subject: [PATCH 424/455] Proper warning if xml.etree.ElementTree is not available --- youtube-dl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube-dl b/youtube-dl index 733e23105..ae4917bba 100755 --- a/youtube-dl +++ b/youtube-dl @@ -66,8 +66,8 @@ except ImportError: try: import xml.etree.ElementTree -except ImportError: # Python<2.5 - pass # Not officially supported, but let it slip +except ImportError: # Python<2.5: Not officially supported, but let it slip + warnings.warn('xml.etree.ElementTree support is missing. Consider upgrading to Python >= 2.5 if you get related errors.') std_headers = { 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:5.0.1) Gecko/20100101 Firefox/5.0.1', From bdb3f7a76960269c043a3081cc3eef050e693313 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Thu, 15 Sep 2011 10:06:14 +0200 Subject: [PATCH 425/455] Simplify suitable --- youtube-dl | 100 +++++++---------------------------------------------- 1 file changed, 12 insertions(+), 88 deletions(-) diff --git a/youtube-dl b/youtube-dl index ae4917bba..cd8e57b06 100755 --- a/youtube-dl +++ b/youtube-dl @@ -1026,9 +1026,8 @@ class InfoExtractor(object): description: One-line video description. Subclasses of this one should re-define the _real_initialize() and - _real_extract() methods, as well as the suitable() static method. - Probably, they should also be instantiated and added to the main - downloader. + _real_extract() methods and define a _VALID_URL regexp. + Probably, they should also be added to the list of extractors. """ _ready = False @@ -1039,10 +1038,9 @@ class InfoExtractor(object): self._ready = False self.set_downloader(downloader) - @staticmethod - def suitable(url): + def suitable(self, url): """Receives a URL and returns True if suitable for this IE.""" - return False + return re.match(self._VALID_URL, url) is not None def initialize(self): """Initializes an instance (authentication, etc).""" @@ -1089,10 +1087,6 @@ class YoutubeIE(InfoExtractor): '45': 'webm', } - @staticmethod - def suitable(url): - return (re.match(YoutubeIE._VALID_URL, url) is not None) - def report_lang(self): """Report attempt to set language.""" self._downloader.to_screen(u'[youtube] Setting language') @@ -1370,10 +1364,6 @@ class MetacafeIE(InfoExtractor): InfoExtractor.__init__(self, downloader) self._youtube_ie = youtube_ie - @staticmethod - def suitable(url): - return (re.match(MetacafeIE._VALID_URL, url) is not None) - def report_disclaimer(self): """Report disclaimer retrieval.""" self._downloader.to_screen(u'[metacafe] Retrieving disclaimer') @@ -1511,10 +1501,6 @@ class DailymotionIE(InfoExtractor): def __init__(self, downloader=None): InfoExtractor.__init__(self, downloader) - @staticmethod - def suitable(url): - return (re.match(DailymotionIE._VALID_URL, url) is not None) - def report_download_webpage(self, video_id): """Report webpage download.""" self._downloader.to_screen(u'[dailymotion] %s: Downloading webpage' % video_id) @@ -1605,10 +1591,6 @@ class GoogleIE(InfoExtractor): def __init__(self, downloader=None): InfoExtractor.__init__(self, downloader) - @staticmethod - def suitable(url): - return (re.match(GoogleIE._VALID_URL, url) is not None) - def report_download_webpage(self, video_id): """Report webpage download.""" self._downloader.to_screen(u'[video.google] %s: Downloading webpage' % video_id) @@ -1715,10 +1697,6 @@ class PhotobucketIE(InfoExtractor): def __init__(self, downloader=None): InfoExtractor.__init__(self, downloader) - @staticmethod - def suitable(url): - return (re.match(PhotobucketIE._VALID_URL, url) is not None) - def report_download_webpage(self, video_id): """Report webpage download.""" self._downloader.to_screen(u'[photobucket] %s: Downloading webpage' % video_id) @@ -1800,10 +1778,6 @@ class YahooIE(InfoExtractor): def __init__(self, downloader=None): InfoExtractor.__init__(self, downloader) - @staticmethod - def suitable(url): - return (re.match(YahooIE._VALID_URL, url) is not None) - def report_download_webpage(self, video_id): """Report webpage download.""" self._downloader.to_screen(u'[video.yahoo] %s: Downloading webpage' % video_id) @@ -1956,10 +1930,6 @@ class VimeoIE(InfoExtractor): def __init__(self, downloader=None): InfoExtractor.__init__(self, downloader) - @staticmethod - def suitable(url): - return (re.match(VimeoIE._VALID_URL, url) is not None) - def report_download_webpage(self, video_id): """Report webpage download.""" self._downloader.to_screen(u'[vimeo] %s: Downloading webpage' % video_id) @@ -2066,13 +2036,11 @@ class VimeoIE(InfoExtractor): class GenericIE(InfoExtractor): """Generic last-resort information extractor.""" + _VALID_URL = '.*' + def __init__(self, downloader=None): InfoExtractor.__init__(self, downloader) - @staticmethod - def suitable(url): - return True - def report_download_webpage(self, video_id): """Report webpage download.""" self._downloader.to_screen(u'WARNING: Falling back on generic information extractor.') @@ -2166,7 +2134,7 @@ class GenericIE(InfoExtractor): class YoutubeSearchIE(InfoExtractor): """Information Extractor for YouTube search queries.""" - _VALID_QUERY = r'ytsearch(\d+|all)?:[\s\S]+' + _VALID_URL = r'ytsearch(\d+|all)?:[\s\S]+' _TEMPLATE_URL = 'http://www.youtube.com/results?search_query=%s&page=%s&gl=US&hl=en' _VIDEO_INDICATOR = r'href="/watch\?v=.+?"' _MORE_PAGES_INDICATOR = r'(?m)>\s*Next\s*' @@ -2177,10 +2145,6 @@ class YoutubeSearchIE(InfoExtractor): InfoExtractor.__init__(self, downloader) self._youtube_ie = youtube_ie - @staticmethod - def suitable(url): - return (re.match(YoutubeSearchIE._VALID_QUERY, url) is not None) - def report_download_page(self, query, pagenum): """Report attempt to download playlist page with given number.""" query = query.decode(preferredencoding()) @@ -2190,7 +2154,7 @@ class YoutubeSearchIE(InfoExtractor): self._youtube_ie.initialize() def _real_extract(self, query): - mobj = re.match(self._VALID_QUERY, query) + mobj = re.match(self._VALID_URL, query) if mobj is None: self._downloader.trouble(u'ERROR: invalid search query "%s"' % query) return @@ -2258,7 +2222,7 @@ class YoutubeSearchIE(InfoExtractor): class GoogleSearchIE(InfoExtractor): """Information Extractor for Google Video search queries.""" - _VALID_QUERY = r'gvsearch(\d+|all)?:[\s\S]+' + _VALID_URL = r'gvsearch(\d+|all)?:[\s\S]+' _TEMPLATE_URL = 'http://video.google.com/videosearch?q=%s+site:video.google.com&start=%s&hl=en' _VIDEO_INDICATOR = r'videoplay\?docid=([^\&>]+)\&' _MORE_PAGES_INDICATOR = r'Next' @@ -2269,10 +2233,6 @@ class GoogleSearchIE(InfoExtractor): InfoExtractor.__init__(self, downloader) self._google_ie = google_ie - @staticmethod - def suitable(url): - return (re.match(GoogleSearchIE._VALID_QUERY, url) is not None) - def report_download_page(self, query, pagenum): """Report attempt to download playlist page with given number.""" query = query.decode(preferredencoding()) @@ -2282,7 +2242,7 @@ class GoogleSearchIE(InfoExtractor): self._google_ie.initialize() def _real_extract(self, query): - mobj = re.match(self._VALID_QUERY, query) + mobj = re.match(self._VALID_URL, query) if mobj is None: self._downloader.trouble(u'ERROR: invalid search query "%s"' % query) return @@ -2350,7 +2310,7 @@ class GoogleSearchIE(InfoExtractor): class YahooSearchIE(InfoExtractor): """Information Extractor for Yahoo! Video search queries.""" - _VALID_QUERY = r'yvsearch(\d+|all)?:[\s\S]+' + _VALID_URL = r'yvsearch(\d+|all)?:[\s\S]+' _TEMPLATE_URL = 'http://video.yahoo.com/search/?p=%s&o=%s' _VIDEO_INDICATOR = r'href="http://video\.yahoo\.com/watch/([0-9]+/[0-9]+)"' _MORE_PAGES_INDICATOR = r'\s*Next' @@ -2361,10 +2321,6 @@ class YahooSearchIE(InfoExtractor): InfoExtractor.__init__(self, downloader) self._yahoo_ie = yahoo_ie - @staticmethod - def suitable(url): - return (re.match(YahooSearchIE._VALID_QUERY, url) is not None) - def report_download_page(self, query, pagenum): """Report attempt to download playlist page with given number.""" query = query.decode(preferredencoding()) @@ -2374,7 +2330,7 @@ class YahooSearchIE(InfoExtractor): self._yahoo_ie.initialize() def _real_extract(self, query): - mobj = re.match(self._VALID_QUERY, query) + mobj = re.match(self._VALID_URL, query) if mobj is None: self._downloader.trouble(u'ERROR: invalid search query "%s"' % query) return @@ -2453,10 +2409,6 @@ class YoutubePlaylistIE(InfoExtractor): InfoExtractor.__init__(self, downloader) self._youtube_ie = youtube_ie - @staticmethod - def suitable(url): - return (re.match(YoutubePlaylistIE._VALID_URL, url) is not None) - def report_download_page(self, playlist_id, pagenum): """Report attempt to download playlist page with given number.""" self._downloader.to_screen(u'[youtube] PL %s: Downloading page #%s' % (playlist_id, pagenum)) @@ -2531,10 +2483,6 @@ class YoutubeUserIE(InfoExtractor): InfoExtractor.__init__(self, downloader) self._youtube_ie = youtube_ie - @staticmethod - def suitable(url): - return (re.match(YoutubeUserIE._VALID_URL, url) is not None) - def report_download_page(self, username, start_index): """Report attempt to download user page.""" self._downloader.to_screen(u'[youtube] user %s: Downloading video ids from %d to %d' % @@ -2616,10 +2564,6 @@ class DepositFilesIE(InfoExtractor): def __init__(self, downloader=None): InfoExtractor.__init__(self, downloader) - @staticmethod - def suitable(url): - return (re.match(DepositFilesIE._VALID_URL, url) is not None) - def report_download_webpage(self, file_id): """Report webpage download.""" self._downloader.to_screen(u'[DepositFiles] %s: Downloading webpage' % file_id) @@ -2703,10 +2647,6 @@ class FacebookIE(InfoExtractor): def __init__(self, downloader=None): InfoExtractor.__init__(self, downloader) - @staticmethod - def suitable(url): - return (re.match(FacebookIE._VALID_URL, url) is not None) - def _reporter(self, message): """Add header and report message.""" self._downloader.to_screen(u'[facebook] %s' % message) @@ -2913,10 +2853,6 @@ class BlipTVIE(InfoExtractor): _VALID_URL = r'^(?:https?://)?(?:\w+\.)?blip\.tv(/.+)$' _URL_EXT = r'^.*\.([a-z0-9]+)$' - @staticmethod - def suitable(url): - return (re.match(BlipTVIE._VALID_URL, url) is not None) - def report_extraction(self, file_id): """Report information extraction.""" self._downloader.to_screen(u'[blip.tv] %s: Extracting information' % file_id) @@ -2991,10 +2927,6 @@ class MyVideoIE(InfoExtractor): def __init__(self, downloader=None): InfoExtractor.__init__(self, downloader) - @staticmethod - def suitable(url): - return (re.match(MyVideoIE._VALID_URL, url) is not None) - def report_download_webpage(self, video_id): """Report webpage download.""" self._downloader.to_screen(u'[myvideo] %s: Downloading webpage' % video_id) @@ -3064,10 +2996,6 @@ class ComedyCentralIE(InfoExtractor): _VALID_URL = r'^(:(?Ptds|thedailyshow|cr|colbert|colbertnation|colbertreport))|(https?://)?(www\.)(?Pthedailyshow|colbertnation)\.com/full-episodes/(?P.*)$' - @staticmethod - def suitable(url): - return (re.match(ComedyCentralIE._VALID_URL, url) is not None) - def report_extraction(self, episode_id): self._downloader.to_screen(u'[comedycentral] %s: Extracting information' % episode_id) @@ -3208,10 +3136,6 @@ class EscapistIE(InfoExtractor): _VALID_URL = r'^(https?://)?(www\.)escapistmagazine.com/videos/view/(?P[^/]+)/(?P[^/?]+)[/?].*$' - @staticmethod - def suitable(url): - return (re.match(EscapistIE._VALID_URL, url) is not None) - def report_extraction(self, showName): self._downloader.to_screen(u'[escapist] %s: Extracting information' % showName) From f3098c4d8abed9644af5e1b461c2cf5042113653 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Thu, 15 Sep 2011 10:43:49 +0200 Subject: [PATCH 426/455] --list-extractors (Closes #161) --- youtube-dl | 106 +++++++++++++++++++++++++++++++++++------------------ 1 file changed, 71 insertions(+), 35 deletions(-) diff --git a/youtube-dl b/youtube-dl index cd8e57b06..dbcf1c9fb 100755 --- a/youtube-dl +++ b/youtube-dl @@ -1086,6 +1086,7 @@ class YoutubeIE(InfoExtractor): '43': 'webm', '45': 'webm', } + IE_NAME = u'youtube' def report_lang(self): """Report attempt to set language.""" @@ -1359,6 +1360,7 @@ class MetacafeIE(InfoExtractor): _DISCLAIMER = 'http://www.metacafe.com/family_filter/' _FILTER_POST = 'http://www.metacafe.com/f/index.php?inputType=filter&controllerGroup=user' _youtube_ie = None + IE_NAME = u'metacafe' def __init__(self, youtube_ie, downloader=None): InfoExtractor.__init__(self, downloader) @@ -1497,6 +1499,7 @@ class DailymotionIE(InfoExtractor): """Information Extractor for Dailymotion""" _VALID_URL = r'(?i)(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/video/([^_/]+)_([^/]+)' + IE_NAME = u'dailymotion' def __init__(self, downloader=None): InfoExtractor.__init__(self, downloader) @@ -1587,6 +1590,7 @@ class GoogleIE(InfoExtractor): """Information extractor for video.google.com.""" _VALID_URL = r'(?:http://)?video\.google\.(?:com(?:\.au)?|co\.(?:uk|jp|kr|cr)|ca|de|es|fr|it|nl|pl)/videoplay\?docid=([^\&]+).*' + IE_NAME = u'video.google' def __init__(self, downloader=None): InfoExtractor.__init__(self, downloader) @@ -1693,6 +1697,7 @@ class PhotobucketIE(InfoExtractor): """Information extractor for photobucket.com.""" _VALID_URL = r'(?:http://)?(?:[a-z0-9]+\.)?photobucket\.com/.*[\?\&]current=(.*\.flv)' + IE_NAME = u'photobucket' def __init__(self, downloader=None): InfoExtractor.__init__(self, downloader) @@ -1774,6 +1779,7 @@ class YahooIE(InfoExtractor): # _VPAGE_URL matches only the extractable '/watch/' URLs _VALID_URL = r'(?:http://)?(?:[a-z]+\.)?video\.yahoo\.com/(?:watch|network)/([0-9]+)(?:/|\?v=)([0-9]+)(?:[#\?].*)?' _VPAGE_URL = r'(?:http://)?video\.yahoo\.com/watch/([0-9]+)/([0-9]+)(?:[#\?].*)?' + IE_NAME = u'video.yahoo' def __init__(self, downloader=None): InfoExtractor.__init__(self, downloader) @@ -1926,6 +1932,7 @@ class VimeoIE(InfoExtractor): # _VALID_URL matches Vimeo URLs _VALID_URL = r'(?:https?://)?(?:(?:www|player).)?vimeo\.com/(?:groups/[^/]+/)?(?:videos?/)?([0-9]+)' + IE_NAME = u'vimeo' def __init__(self, downloader=None): InfoExtractor.__init__(self, downloader) @@ -2036,7 +2043,8 @@ class VimeoIE(InfoExtractor): class GenericIE(InfoExtractor): """Generic last-resort information extractor.""" - _VALID_URL = '.*' + _VALID_URL = r'.*' + IE_NAME = u'generic' def __init__(self, downloader=None): InfoExtractor.__init__(self, downloader) @@ -2140,6 +2148,7 @@ class YoutubeSearchIE(InfoExtractor): _MORE_PAGES_INDICATOR = r'(?m)>\s*Next\s*' _youtube_ie = None _max_youtube_results = 1000 + IE_NAME = u'youtube:search' def __init__(self, youtube_ie, downloader=None): InfoExtractor.__init__(self, downloader) @@ -2228,6 +2237,7 @@ class GoogleSearchIE(InfoExtractor): _MORE_PAGES_INDICATOR = r'Next' _google_ie = None _max_google_results = 1000 + IE_NAME = u'video.google:search' def __init__(self, google_ie, downloader=None): InfoExtractor.__init__(self, downloader) @@ -2316,6 +2326,7 @@ class YahooSearchIE(InfoExtractor): _MORE_PAGES_INDICATOR = r'\s*Next' _yahoo_ie = None _max_yahoo_results = 1000 + IE_NAME = u'video.yahoo:search' def __init__(self, yahoo_ie, downloader=None): InfoExtractor.__init__(self, downloader) @@ -2404,6 +2415,7 @@ class YoutubePlaylistIE(InfoExtractor): _VIDEO_INDICATOR = r'/watch\?v=(.+?)&' _MORE_PAGES_INDICATOR = r'(?m)>\s*Next\s*' _youtube_ie = None + IE_NAME = u'youtube:playlist' def __init__(self, youtube_ie, downloader=None): InfoExtractor.__init__(self, downloader) @@ -2478,6 +2490,7 @@ class YoutubeUserIE(InfoExtractor): _GDATA_URL = 'http://gdata.youtube.com/feeds/api/users/%s/uploads?max-results=%d&start-index=%d' _VIDEO_INDICATOR = r'/watch\?v=(.+?)&' _youtube_ie = None + IE_NAME = u'youtube:user' def __init__(self, youtube_ie, downloader=None): InfoExtractor.__init__(self, downloader) @@ -2560,6 +2573,7 @@ class DepositFilesIE(InfoExtractor): """Information extractor for depositfiles.com""" _VALID_URL = r'(?:http://)?(?:\w+\.)?depositfiles.com/(?:../(?#locale))?files/(.+)' + IE_NAME = u'DepositFiles' def __init__(self, downloader=None): InfoExtractor.__init__(self, downloader) @@ -2643,6 +2657,7 @@ class FacebookIE(InfoExtractor): 'highqual': 'mp4', 'lowqual': 'mp4', } + IE_NAME = u'facebook' def __init__(self, downloader=None): InfoExtractor.__init__(self, downloader) @@ -2852,6 +2867,7 @@ class BlipTVIE(InfoExtractor): _VALID_URL = r'^(?:https?://)?(?:\w+\.)?blip\.tv(/.+)$' _URL_EXT = r'^.*\.([a-z0-9]+)$' + IE_NAME = u'blip.tv' def report_extraction(self, file_id): """Report information extraction.""" @@ -2923,6 +2939,7 @@ class MyVideoIE(InfoExtractor): """Information Extractor for myvideo.de.""" _VALID_URL = r'(?:http://)?(?:www\.)?myvideo\.de/watch/([0-9]+)/([^?/]+).*' + IE_NAME = u'myvideo' def __init__(self, downloader=None): InfoExtractor.__init__(self, downloader) @@ -2994,7 +3011,8 @@ class MyVideoIE(InfoExtractor): class ComedyCentralIE(InfoExtractor): """Information extractor for The Daily Show and Colbert Report """ - _VALID_URL = r'^(:(?Ptds|thedailyshow|cr|colbert|colbertnation|colbertreport))|(https?://)?(www\.)(?Pthedailyshow|colbertnation)\.com/full-episodes/(?P.*)$' + _VALID_URL = r'^(:(?Ptds|thedailyshow|cr|colbert|colbertnation|colbertreport))|(https?://)?(www\.)?(?Pthedailyshow|colbertnation)\.com/full-episodes/(?P.*)$' + IE_NAME = u'comedycentral' def report_extraction(self, episode_id): self._downloader.to_screen(u'[comedycentral] %s: Extracting information' % episode_id) @@ -3135,6 +3153,7 @@ class EscapistIE(InfoExtractor): """Information extractor for The Escapist """ _VALID_URL = r'^(https?://)?(www\.)escapistmagazine.com/videos/view/(?P[^/]+)/(?P[^/?]+)[/?].*$' + IE_NAME = u'escapist' def report_extraction(self, showName): self._downloader.to_screen(u'[escapist] %s: Extracting information' % showName) @@ -3446,6 +3465,9 @@ def parseOpts(): general.add_option('--dump-user-agent', action='store_true', dest='dump_user_agent', help='display the current browser identification', default=False) + general.add_option('--list-extractors', + action='store_true', dest='list_extractors', + help='List all supported extractors and the URLs they would handle', default=False) selection.add_option('--playlist-start', dest='playliststart', metavar='NUMBER', help='playlist video to start at (default is 1)', default=1) @@ -3542,6 +3564,36 @@ def parseOpts(): return parser, opts, args +def gen_extractors(): + """ Return a list of an instance of every supported extractor. + The order does matter; the first extractor matched is the one handling the URL. + """ + youtube_ie = YoutubeIE() + google_ie = GoogleIE() + yahoo_ie = YahooIE() + return [ + youtube_ie, + MetacafeIE(youtube_ie), + DailymotionIE(), + YoutubePlaylistIE(youtube_ie), + YoutubeUserIE(youtube_ie), + YoutubeSearchIE(youtube_ie), + google_ie, + GoogleSearchIE(google_ie), + PhotobucketIE(), + yahoo_ie, + YahooSearchIE(yahoo_ie), + DepositFilesIE(), + FacebookIE(), + BlipTVIE(), + VimeoIE(), + MyVideoIE(), + ComedyCentralIE(), + EscapistIE(), + + GenericIE() + ] + def main(): parser, opts, args = parseOpts() @@ -3561,12 +3613,6 @@ def main(): print std_headers['User-Agent'] sys.exit(0) - # General configuration - cookie_processor = urllib2.HTTPCookieProcessor(jar) - opener = urllib2.build_opener(urllib2.ProxyHandler(), cookie_processor, YoutubeDLHandler()) - urllib2.install_opener(opener) - socket.setdefaulttimeout(300) # 5 minutes should be enough (famous last words) - # Batch file verification batchurls = [] if opts.batchfile is not None: @@ -3582,6 +3628,23 @@ def main(): sys.exit(u'ERROR: batch file could not be read') all_urls = batchurls + args + # General configuration + cookie_processor = urllib2.HTTPCookieProcessor(jar) + opener = urllib2.build_opener(urllib2.ProxyHandler(), cookie_processor, YoutubeDLHandler()) + urllib2.install_opener(opener) + socket.setdefaulttimeout(300) # 5 minutes should be enough (famous last words) + + extractors = gen_extractors() + + if opts.list_extractors: + for ie in extractors: + print(ie.IE_NAME) + matchedUrls = filter(lambda url: ie.suitable(url), all_urls) + all_urls = filter(lambda url: url not in matchedUrls, all_urls) + for mu in matchedUrls: + print(u' ' + mu) + sys.exit(0) + # Conflicting, missing and erroneous options if opts.usenetrc and (opts.username is not None or opts.password is not None): parser.error(u'using .netrc conflicts with giving username/password') @@ -3619,33 +3682,6 @@ def main(): if opts.audioformat not in ['best', 'aac', 'mp3']: parser.error(u'invalid audio format specified') - # Information extractors - youtube_ie = YoutubeIE() - google_ie = GoogleIE() - yahoo_ie = YahooIE() - extractors = [ # Order does matter - youtube_ie, - MetacafeIE(youtube_ie), - DailymotionIE(), - YoutubePlaylistIE(youtube_ie), - YoutubeUserIE(youtube_ie), - YoutubeSearchIE(youtube_ie), - google_ie, - GoogleSearchIE(google_ie), - PhotobucketIE(), - yahoo_ie, - YahooSearchIE(yahoo_ie), - DepositFilesIE(), - FacebookIE(), - BlipTVIE(), - VimeoIE(), - MyVideoIE(), - ComedyCentralIE(), - EscapistIE(), - - GenericIE() - ] - # File downloader fd = FileDownloader({ 'usenetrc': opts.usenetrc, From 9b4556c469d63ec49374f98237e623cfe88cda93 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Thu, 15 Sep 2011 11:36:49 +0200 Subject: [PATCH 427/455] New option --skip-download (Closes #162) --- youtube-dl | 63 +++++++++++++++++++++++++++++------------------------- 1 file changed, 34 insertions(+), 29 deletions(-) diff --git a/youtube-dl b/youtube-dl index dbcf1c9fb..4f6706e2a 100755 --- a/youtube-dl +++ b/youtube-dl @@ -697,20 +697,21 @@ class FileDownloader(object): def process_info(self, info_dict): """Process a single dictionary returned by an InfoExtractor.""" filename = self.prepare_filename(info_dict) + + # Forced printings + if self.params.get('forcetitle', False): + print info_dict['title'].encode(preferredencoding(), 'xmlcharrefreplace') + if self.params.get('forceurl', False): + print info_dict['url'].encode(preferredencoding(), 'xmlcharrefreplace') + if self.params.get('forcethumbnail', False) and 'thumbnail' in info_dict: + print info_dict['thumbnail'].encode(preferredencoding(), 'xmlcharrefreplace') + if self.params.get('forcedescription', False) and 'description' in info_dict: + print info_dict['description'].encode(preferredencoding(), 'xmlcharrefreplace') + if self.params.get('forcefilename', False) and filename is not None: + print filename.encode(preferredencoding(), 'xmlcharrefreplace') + # Do nothing else if in simulate mode if self.params.get('simulate', False): - # Forced printings - if self.params.get('forcetitle', False): - print info_dict['title'].encode(preferredencoding(), 'xmlcharrefreplace') - if self.params.get('forceurl', False): - print info_dict['url'].encode(preferredencoding(), 'xmlcharrefreplace') - if self.params.get('forcethumbnail', False) and 'thumbnail' in info_dict: - print info_dict['thumbnail'].encode(preferredencoding(), 'xmlcharrefreplace') - if self.params.get('forcedescription', False) and 'description' in info_dict: - print info_dict['description'].encode(preferredencoding(), 'xmlcharrefreplace') - if self.params.get('forcefilename', False) and filename is not None: - print filename.encode(preferredencoding(), 'xmlcharrefreplace') - return if filename is None: @@ -769,23 +770,24 @@ class FileDownloader(object): self.trouble(u'ERROR: Cannot write metadata to JSON file ' + infofn) return - try: - success = self._do_download(filename, info_dict['url'].encode('utf-8'), info_dict.get('player_url', None)) - except (OSError, IOError), err: - raise UnavailableVideoError - except (urllib2.URLError, httplib.HTTPException, socket.error), err: - self.trouble(u'ERROR: unable to download video data: %s' % str(err)) - return - except (ContentTooShortError, ), err: - self.trouble(u'ERROR: content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded)) - return - - if success: + if not self.params.get('skip_download', False): try: - self.post_process(filename, info_dict) - except (PostProcessingError), err: - self.trouble(u'ERROR: postprocessing: %s' % str(err)) + success = self._do_download(filename, info_dict['url'].encode('utf-8'), info_dict.get('player_url', None)) + except (OSError, IOError), err: + raise UnavailableVideoError + except (urllib2.URLError, httplib.HTTPException, socket.error), err: + self.trouble(u'ERROR: unable to download video data: %s' % str(err)) return + except (ContentTooShortError, ), err: + self.trouble(u'ERROR: content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded)) + return + + if success: + try: + self.post_process(filename, info_dict) + except (PostProcessingError), err: + self.trouble(u'ERROR: postprocessing: %s' % str(err)) + return def download(self, url_list): """Download a given list of URLs.""" @@ -3495,7 +3497,9 @@ def parseOpts(): verbosity.add_option('-q', '--quiet', action='store_true', dest='quiet', help='activates quiet mode', default=False) verbosity.add_option('-s', '--simulate', - action='store_true', dest='simulate', help='do not download video', default=False) + action='store_true', dest='simulate', help='do not download the video and do not write anything to disk', default=False) + verbosity.add_option('--skip-download', + action='store_true', dest='skip_download', help='do not download the video', default=False) verbosity.add_option('-g', '--get-url', action='store_true', dest='geturl', help='simulate, quiet but print URL', default=False) verbosity.add_option('-e', '--get-title', @@ -3693,7 +3697,8 @@ def main(): 'forcethumbnail': opts.getthumbnail, 'forcedescription': opts.getdescription, 'forcefilename': opts.getfilename, - 'simulate': (opts.simulate or opts.geturl or opts.gettitle or opts.getthumbnail or opts.getdescription or opts.getfilename), + 'simulate': opts.simulate, + 'skip_download': (opts.skip_download or opts.simulate or opts.geturl or opts.gettitle or opts.getthumbnail or opts.getdescription or opts.getfilename), 'format': opts.format, 'format_limit': opts.format_limit, 'outtmpl': ((opts.outtmpl is not None and opts.outtmpl.decode(preferredencoding())) From 36597dc40f311f3b209f92f6fefae7466decc758 Mon Sep 17 00:00:00 2001 From: Kegan Date: Sat, 23 Apr 2011 14:55:40 +0800 Subject: [PATCH 428/455] Updated to stamp extracted audio file with HTTP last modified date. --- youtube-dl | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/youtube-dl b/youtube-dl index 4f6706e2a..c41228c93 100755 --- a/youtube-dl +++ b/youtube-dl @@ -625,11 +625,12 @@ class FileDownloader(object): return filetime = timeconvert(timestr) if filetime is None: - return + return filetime try: os.utime(filename, (time.time(), filetime)) except: pass + return filetime def report_writedescription(self, descfn): """ Report that the description file is being written """ @@ -772,7 +773,8 @@ class FileDownloader(object): if not self.params.get('skip_download', False): try: - success = self._do_download(filename, info_dict['url'].encode('utf-8'), info_dict.get('player_url', None)) + success,add_data = self._do_download(filename, info_dict['url'].encode('utf-8'), info_dict.get('player_url', None)) + info_dict.update(add_data) except (OSError, IOError), err: raise UnavailableVideoError except (urllib2.URLError, httplib.HTTPException, socket.error), err: @@ -992,10 +994,11 @@ class FileDownloader(object): self.try_rename(tmpfilename, filename) # Update file modification time + filetime = None if self.params.get('updatetime', True): - self.try_utime(filename, data.info().get('last-modified', None)) + filetime = self.try_utime(filename, data.info().get('last-modified', None)) - return True + return True, {'filetime': filetime} class InfoExtractor(object): @@ -3355,6 +3358,13 @@ class FFmpegExtractAudioPP(PostProcessor): self._downloader.to_stderr(u'WARNING: error running ffmpeg') return None + # Try to update the date time for extracted audio file. + if information.get('filetime') is not None: + try: + os.utime(new_path, (time.time(), information['filetime'])) + except: + self._downloader.to_stderr(u'WARNING: Cannot update utime of audio file') + try: os.remove(path) except (IOError, OSError): From c52b01f326186004f434e654944b7264c918727c Mon Sep 17 00:00:00 2001 From: Kegan Date: Sat, 23 Apr 2011 14:56:06 +0800 Subject: [PATCH 429/455] Added ability to download worst quality video file only. (Closes #113) --- youtube-dl | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/youtube-dl b/youtube-dl index c41228c93..2fc88b0e8 100755 --- a/youtube-dl +++ b/youtube-dl @@ -1320,6 +1320,8 @@ class YoutubeIE(InfoExtractor): return if req_format is None: video_url_list = [(existing_formats[0], url_map[existing_formats[0]])] # Best quality + elif req_format == 'worst': + video_url_list = [(existing_formats[len(existing_formats)-1], url_map[existing_formats[len(existing_formats)-1]])] # worst quality elif req_format == '-1': video_url_list = [(f, url_map[f]) for f in existing_formats] # All formats else: @@ -2832,6 +2834,8 @@ class FacebookIE(InfoExtractor): return if req_format is None: video_url_list = [(existing_formats[0], url_map[existing_formats[0]])] # Best quality + elif req_format == 'worst': + video_url_list = [(existing_formats[len(existing_formats)-1], url_map[existing_formats[len(existing_formats)-1]])] # worst quality elif req_format == '-1': video_url_list = [(f, url_map[f]) for f in existing_formats] # All formats else: From da0db53a758ffe45767d9f8ea8faa3a605ba9458 Mon Sep 17 00:00:00 2001 From: Dominik Heidler Date: Tue, 12 Apr 2011 21:58:16 +0200 Subject: [PATCH 430/455] added option to get the available formats for a video (Closes #106) --- youtube-dl | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/youtube-dl b/youtube-dl index 2fc88b0e8..d01b259cd 100755 --- a/youtube-dl +++ b/youtube-dl @@ -710,6 +710,8 @@ class FileDownloader(object): print info_dict['description'].encode(preferredencoding(), 'xmlcharrefreplace') if self.params.get('forcefilename', False) and filename is not None: print filename.encode(preferredencoding(), 'xmlcharrefreplace') + if self.params.get('forceformat', False): + print info_dict['format'].encode(preferredencoding(), 'xmlcharrefreplace') # Do nothing else if in simulate mode if self.params.get('simulate', False): @@ -3527,6 +3529,9 @@ def parseOpts(): verbosity.add_option('--get-filename', action='store_true', dest='getfilename', help='simulate, quiet but print output filename', default=False) + verbosity.add_option('--get-format', + action='store_true', dest='getformat', + help='simulate, quiet but print output format', default=False) verbosity.add_option('--no-progress', action='store_true', dest='noprogress', help='do not print progress bar', default=False) verbosity.add_option('--console-title', @@ -3705,14 +3710,15 @@ def main(): 'usenetrc': opts.usenetrc, 'username': opts.username, 'password': opts.password, - 'quiet': (opts.quiet or opts.geturl or opts.gettitle or opts.getthumbnail or opts.getdescription or opts.getfilename), + 'quiet': (opts.quiet or opts.geturl or opts.gettitle or opts.getthumbnail or opts.getdescription or opts.getfilename or opts.getformat), 'forceurl': opts.geturl, 'forcetitle': opts.gettitle, 'forcethumbnail': opts.getthumbnail, 'forcedescription': opts.getdescription, 'forcefilename': opts.getfilename, + 'forceformat': opts.getformat, 'simulate': opts.simulate, - 'skip_download': (opts.skip_download or opts.simulate or opts.geturl or opts.gettitle or opts.getthumbnail or opts.getdescription or opts.getfilename), + 'skip_download': (opts.skip_download or opts.simulate or opts.geturl or opts.gettitle or opts.getthumbnail or opts.getdescription or opts.getfilename or opts.getformat), 'format': opts.format, 'format_limit': opts.format_limit, 'outtmpl': ((opts.outtmpl is not None and opts.outtmpl.decode(preferredencoding())) From 6a1ca41e17f703931308e197638800f7f0c29411 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Thu, 15 Sep 2011 12:45:56 +0200 Subject: [PATCH 431/455] Update README --- README.md | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 9fdd7d3e2..790540bc8 100644 --- a/README.md +++ b/README.md @@ -18,6 +18,8 @@ which means you can modify it, redistribute it or use it however you like. -r, --rate-limit LIMIT download rate limit (e.g. 50k or 44.6m) -R, --retries RETRIES number of retries (default is 10) --dump-user-agent display the current browser identification + --list-extractors List all supported extractors and the URLs they + would handle ### Video Selection: --playlist-start NUMBER playlist video to start at (default is 1) @@ -44,12 +46,15 @@ which means you can modify it, redistribute it or use it however you like. ### Verbosity / Simulation Options: -q, --quiet activates quiet mode - -s, --simulate do not download video + -s, --simulate do not download the video and do not write anything + to disk + --skip-download do not download the video -g, --get-url simulate, quiet but print URL -e, --get-title simulate, quiet but print title --get-thumbnail simulate, quiet but print thumbnail URL --get-description simulate, quiet but print video description --get-filename simulate, quiet but print output filename + --get-format simulate, quiet but print output format --no-progress do not print progress bar --console-title display progress in console titlebar @@ -67,6 +72,9 @@ which means you can modify it, redistribute it or use it however you like. --extract-audio convert video files to audio-only files (requires ffmpeg and ffprobe) --audio-format FORMAT "best", "aac" or "mp3"; best by default + --audio-quality QUALITY ffmpeg audio bitrate specification, 128k by default + -k, --keep-video keeps the video file on disk after the post- + processing; the video is erased by default ## FAQ From 5260e68f64781099b1540008bbd31be832760628 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Thu, 15 Sep 2011 18:47:36 +0200 Subject: [PATCH 432/455] Add format fallback --- youtube-dl | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/youtube-dl b/youtube-dl index a2100aa6d..7483fcfac 100755 --- a/youtube-dl +++ b/youtube-dl @@ -1320,18 +1320,24 @@ class YoutubeIE(InfoExtractor): if len(existing_formats) == 0: self._downloader.trouble(u'ERROR: no known formats available for video') return - if req_format is None: + if req_format is None or req_format == 'best': video_url_list = [(existing_formats[0], url_map[existing_formats[0]])] # Best quality elif req_format == 'worst': video_url_list = [(existing_formats[len(existing_formats)-1], url_map[existing_formats[len(existing_formats)-1]])] # worst quality - elif req_format == '-1': + elif req_format in ('-1', 'all'): video_url_list = [(f, url_map[f]) for f in existing_formats] # All formats else: - # Specific format - if req_format not in url_map: + # Specific formats. We pick the first in a slash-delimeted sequence. + # For example, if '1/2/3/4' is requested and '2' and '4' are available, we pick '2'. + req_formats = req_format.split('/') + video_url_list = None + for rf in req_formats: + if rf in url_map: + video_url_list = [(rf, url_map[rf])] + break + if video_url_list is None: self._downloader.trouble(u'ERROR: requested format not available') return - video_url_list = [(req_format, url_map[req_format])] # Specific format else: self._downloader.trouble(u'ERROR: no conn or url_encoded_fmt_stream_map information found in video info') return @@ -3512,7 +3518,7 @@ def parseOpts(): video_format.add_option('-f', '--format', action='store', dest='format', metavar='FORMAT', help='video format code') video_format.add_option('--all-formats', - action='store_const', dest='format', help='download all available video formats', const='-1') + action='store_const', dest='format', help='download all available video formats', const='all') video_format.add_option('--max-quality', action='store', dest='format_limit', metavar='FORMAT', help='highest quality format to download') From ef357c4bf29277d939d122710051459afad26a5d Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Thu, 15 Sep 2011 18:48:29 +0200 Subject: [PATCH 433/455] Bump version number --- LATEST_VERSION | 2 +- youtube-dl | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/LATEST_VERSION b/LATEST_VERSION index 35b79da09..d44b115a6 100644 --- a/LATEST_VERSION +++ b/LATEST_VERSION @@ -1 +1 @@ -2011.09.15 +2011.09.16 diff --git a/youtube-dl b/youtube-dl index 7483fcfac..685bd2b8c 100755 --- a/youtube-dl +++ b/youtube-dl @@ -15,7 +15,7 @@ __author__ = ( ) __license__ = 'Public Domain' -__version__ = '2011.09.15' +__version__ = '2011.09.16' UPDATE_URL = 'https://raw.github.com/rg3/youtube-dl/master/youtube-dl' From 10e7194db16c769cd8da98a8d541f7f5452afe84 Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Fri, 20 May 2011 08:59:53 +0200 Subject: [PATCH 434/455] If --continue is not enabled, set resume_len to zero. This corrects the reporting of download progress (which previously started at a value greater than zero). --- youtube-dl | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/youtube-dl b/youtube-dl index 685bd2b8c..e304df557 100755 --- a/youtube-dl +++ b/youtube-dl @@ -877,7 +877,6 @@ class FileDownloader(object): tmpfilename = self.temp_name(filename) stream = None - open_mode = 'wb' # Do not include the Accept-Encoding header headers = {'Youtubedl-no-compression': 'True'} @@ -890,11 +889,14 @@ class FileDownloader(object): else: resume_len = 0 - # Request parameters in case of being able to resume - if self.params.get('continuedl', False) and resume_len != 0: - self.report_resuming_byte(resume_len) - request.add_header('Range', 'bytes=%d-' % resume_len) - open_mode = 'ab' + open_mode = 'wb' + if resume_len != 0: + if self.params.get('continuedl', False): + self.report_resuming_byte(resume_len) + request.add_header('Range','bytes=%d-' % resume_len) + open_mode = 'ab' + else: + resume_len = 0 count = 0 retries = self.params.get('retries', 0) From 18bb3d1e3595f61b16228dc8a03108789ce8b7b1 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Thu, 15 Sep 2011 19:12:04 +0200 Subject: [PATCH 435/455] Make --continue the default and provide --no-continue (Closes #119) --- youtube-dl | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube-dl b/youtube-dl index e304df557..624df4bea 100755 --- a/youtube-dl +++ b/youtube-dl @@ -3568,7 +3568,10 @@ def parseOpts(): filesystem.add_option('-w', '--no-overwrites', action='store_true', dest='nooverwrites', help='do not overwrite files', default=False) filesystem.add_option('-c', '--continue', - action='store_true', dest='continue_dl', help='resume partially downloaded files', default=False) + action='store_true', dest='continue_dl', help='resume partially downloaded files', default=True) + filesystem.add_option('--no-continue', + action='store_false', dest='continue_dl', + help='do not resume partially downloaded files (restart from beginning)') filesystem.add_option('--cookies', dest='cookiefile', metavar='FILE', help='file to dump cookie jar to') filesystem.add_option('--no-part', From 366cbfb04a5954bd2e68a44fca72fce88585c2d4 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Thu, 15 Sep 2011 19:22:18 +0200 Subject: [PATCH 436/455] Fix _do_download signature --- youtube-dl | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/youtube-dl b/youtube-dl index 624df4bea..a787bf6ee 100755 --- a/youtube-dl +++ b/youtube-dl @@ -775,8 +775,7 @@ class FileDownloader(object): if not self.params.get('skip_download', False): try: - success,add_data = self._do_download(filename, info_dict['url'].encode('utf-8'), info_dict.get('player_url', None)) - info_dict.update(add_data) + success = self._do_download(filename, info_dict) except (OSError, IOError), err: raise UnavailableVideoError except (urllib2.URLError, httplib.HTTPException, socket.error), err: @@ -865,7 +864,10 @@ class FileDownloader(object): self.trouble(u'\nERROR: rtmpdump exited with code %d' % retval) return False - def _do_download(self, filename, url, player_url): + def _do_download(self, filename, info_dict): + url = info_dict['url'] + player_url = info_dict.get('player_url', None) + # Check file already present if self.params.get('continuedl', False) and os.path.isfile(filename) and not self.params.get('nopart', False): self.report_file_already_downloaded(filename) @@ -998,11 +1000,10 @@ class FileDownloader(object): self.try_rename(tmpfilename, filename) # Update file modification time - filetime = None if self.params.get('updatetime', True): - filetime = self.try_utime(filename, data.info().get('last-modified', None)) + info_dict['filetime'] = self.try_utime(filename, data.info().get('last-modified', None)) - return True, {'filetime': filetime} + return True class InfoExtractor(object): From eca1b76f01d1caff6f85304cd50550d64d37a590 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Thu, 15 Sep 2011 19:23:17 +0200 Subject: [PATCH 437/455] Update README --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index 790540bc8..7ade255b2 100644 --- a/README.md +++ b/README.md @@ -37,6 +37,8 @@ which means you can modify it, redistribute it or use it however you like. -a, --batch-file FILE file containing URLs to download ('-' for stdin) -w, --no-overwrites do not overwrite files -c, --continue resume partially downloaded files + --no-continue do not resume partially downloaded files (restart + from beginning) --cookies FILE file to dump cookie jar to --no-part do not use .part files --no-mtime do not use the Last-modified header to set the file From cc025e1226f8d09482f9f5083f942965c7b4f8e6 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Thu, 15 Sep 2011 19:23:52 +0200 Subject: [PATCH 438/455] release 2011.09.17 --- LATEST_VERSION | 2 +- youtube-dl | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/LATEST_VERSION b/LATEST_VERSION index d44b115a6..de3e640a8 100644 --- a/LATEST_VERSION +++ b/LATEST_VERSION @@ -1 +1 @@ -2011.09.16 +2011.09.17 diff --git a/youtube-dl b/youtube-dl index a787bf6ee..89bdc2a5b 100755 --- a/youtube-dl +++ b/youtube-dl @@ -15,7 +15,7 @@ __author__ = ( ) __license__ = 'Public Domain' -__version__ = '2011.09.16' +__version__ = '2011.09.17' UPDATE_URL = 'https://raw.github.com/rg3/youtube-dl/master/youtube-dl' From c25303c3d5a9431b788dd1634e6ece8c310c6bf0 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Thu, 15 Sep 2011 19:27:21 +0200 Subject: [PATCH 439/455] Set continue to false again; we need to send to actually send a HEAD request to determine whether we can continue or not --- youtube-dl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube-dl b/youtube-dl index 89bdc2a5b..e45ba4fca 100755 --- a/youtube-dl +++ b/youtube-dl @@ -3569,7 +3569,7 @@ def parseOpts(): filesystem.add_option('-w', '--no-overwrites', action='store_true', dest='nooverwrites', help='do not overwrite files', default=False) filesystem.add_option('-c', '--continue', - action='store_true', dest='continue_dl', help='resume partially downloaded files', default=True) + action='store_true', dest='continue_dl', help='resume partially downloaded files', default=False) filesystem.add_option('--no-continue', action='store_false', dest='continue_dl', help='do not resume partially downloaded files (restart from beginning)') From 7b1a2bbe178d0eb35f92242223558ba9ccef581b Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Thu, 15 Sep 2011 19:29:16 +0200 Subject: [PATCH 440/455] release 2011.09.18 --- LATEST_VERSION | 2 +- youtube-dl | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/LATEST_VERSION b/LATEST_VERSION index de3e640a8..ed64508c2 100644 --- a/LATEST_VERSION +++ b/LATEST_VERSION @@ -1 +1 @@ -2011.09.17 +2011.09.18 diff --git a/youtube-dl b/youtube-dl index e45ba4fca..fa1e482ba 100755 --- a/youtube-dl +++ b/youtube-dl @@ -15,7 +15,7 @@ __author__ = ( ) __license__ = 'Public Domain' -__version__ = '2011.09.17' +__version__ = '2011.09.18' UPDATE_URL = 'https://raw.github.com/rg3/youtube-dl/master/youtube-dl' From 2736595628392257a43a226c34bfce4fd7858ce8 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Thu, 15 Sep 2011 20:09:30 +0200 Subject: [PATCH 441/455] Do not update if already up-to-date (Closes #166) --- youtube-dl | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/youtube-dl b/youtube-dl index fa1e482ba..34e86e027 100755 --- a/youtube-dl +++ b/youtube-dl @@ -3409,6 +3409,11 @@ def updateSelf(downloader, filename): try: urlh = urllib.urlopen(UPDATE_URL) newcontent = urlh.read() + + vmatch = re.search("__version__ = '([^']+)'", newcontent) + if vmatch is not None and vmatch.group(1) == __version__: + downloader.to_screen('youtube-dl is up-to-date (' + __version__ + ')') + return finally: urlh.close() except (IOError, OSError), err: From eb6c37da432a8281675fd9740488ac967f83c782 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Thu, 15 Sep 2011 20:10:27 +0200 Subject: [PATCH 442/455] Clarified "restart" --- youtube-dl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube-dl b/youtube-dl index 34e86e027..8cfbf3cb3 100755 --- a/youtube-dl +++ b/youtube-dl @@ -3428,7 +3428,7 @@ def updateSelf(downloader, filename): except (IOError, OSError), err: sys.exit('ERROR: unable to overwrite current version') - downloader.to_screen('Updated youtube-dl. Restart to use the new version.') + downloader.to_screen('Updated youtube-dl. Restart youtube-dl to use the new version.') def parseOpts(): # Deferred imports From 67035ede49520a3cab088315976d20d5dcbf2b26 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Thu, 15 Sep 2011 20:24:21 +0200 Subject: [PATCH 443/455] Fix progress message when Content-Length is not set --- youtube-dl | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/youtube-dl b/youtube-dl index 8cfbf3cb3..1d7087a34 100755 --- a/youtube-dl +++ b/youtube-dl @@ -982,10 +982,13 @@ class FileDownloader(object): block_size = self.best_block_size(after - before, len(data_block)) # Progress message - percent_str = self.calc_percent(byte_counter, data_len) - eta_str = self.calc_eta(start, time.time(), data_len - resume_len, byte_counter - resume_len) speed_str = self.calc_speed(start, time.time(), byte_counter - resume_len) - self.report_progress(percent_str, data_len_str, speed_str, eta_str) + if data_len is None: + self.report_progress('Unknown %', data_len_str, speed_str, 'Unknown ETA') + else: + percent_str = self.calc_percent(byte_counter, data_len) + eta_str = self.calc_eta(start, time.time(), data_len - resume_len, byte_counter - resume_len) + self.report_progress(percent_str, data_len_str, speed_str, eta_str) # Apply rate limit self.slow_down(start, byte_counter - resume_len) From 2d8acd8039af34260b98db5f6bc5d2532a82f91a Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Thu, 15 Sep 2011 20:25:22 +0200 Subject: [PATCH 444/455] Fix escapist URL match --- youtube-dl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube-dl b/youtube-dl index 1d7087a34..8b525e7d3 100755 --- a/youtube-dl +++ b/youtube-dl @@ -3175,7 +3175,7 @@ class ComedyCentralIE(InfoExtractor): class EscapistIE(InfoExtractor): """Information extractor for The Escapist """ - _VALID_URL = r'^(https?://)?(www\.)escapistmagazine.com/videos/view/(?P[^/]+)/(?P[^/?]+)[/?].*$' + _VALID_URL = r'^(https?://)?(www\.)?escapistmagazine.com/videos/view/(?P[^/]+)/(?P[^/?]+)[/?]?.*$' IE_NAME = u'escapist' def report_extraction(self, showName): From 1cde6f1d528d95f15b6138fa6a4658789628ab9d Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Fri, 16 Sep 2011 22:31:31 +0200 Subject: [PATCH 445/455] Prevent youtube IE from taking youtube playlists --- youtube-dl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/youtube-dl b/youtube-dl index 8b525e7d3..a525dc412 100755 --- a/youtube-dl +++ b/youtube-dl @@ -1082,7 +1082,7 @@ class InfoExtractor(object): class YoutubeIE(InfoExtractor): """Information extractor for youtube.com.""" - _VALID_URL = r'^((?:https?://)?(?:youtu\.be/|(?:\w+\.)?youtube(?:-nocookie)?\.com/)(?:(?:(?:v|embed|e)/)|(?:(?:watch(?:_popup)?(?:\.php)?)?(?:\?|#!?)(?:.+&)?v=))?)?([0-9A-Za-z_-]+)(?(1).+)?$' + _VALID_URL = r'^((?:https?://)?(?:youtu\.be/|(?:\w+\.)?youtube(?:-nocookie)?\.com/)(?!view_play_list|my_playlists|artist|playlist)(?:(?:(?:v|embed|e)/)|(?:(?:watch(?:_popup)?(?:\.php)?)?(?:\?|#!?)(?:.+&)?v=))?)?([0-9A-Za-z_-]+)(?(1).+)?$' _LANG_URL = r'http://www.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1' _LOGIN_URL = 'https://www.youtube.com/signup?next=/&gl=US&hl=en' _AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en' @@ -3626,12 +3626,12 @@ def gen_extractors(): google_ie = GoogleIE() yahoo_ie = YahooIE() return [ - youtube_ie, - MetacafeIE(youtube_ie), - DailymotionIE(), YoutubePlaylistIE(youtube_ie), YoutubeUserIE(youtube_ie), YoutubeSearchIE(youtube_ie), + youtube_ie, + MetacafeIE(youtube_ie), + DailymotionIE(), google_ie, GoogleSearchIE(google_ie), PhotobucketIE(), From 08a5b7f8004b1a5ff57f93830e66b6a0063c38c3 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Fri, 16 Sep 2011 22:33:08 +0200 Subject: [PATCH 446/455] Release 2011.09.18b --- LATEST_VERSION | 2 +- youtube-dl | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/LATEST_VERSION b/LATEST_VERSION index ed64508c2..4ed2a076e 100644 --- a/LATEST_VERSION +++ b/LATEST_VERSION @@ -1 +1 @@ -2011.09.18 +2011.09.18b diff --git a/youtube-dl b/youtube-dl index a525dc412..df0391feb 100755 --- a/youtube-dl +++ b/youtube-dl @@ -15,7 +15,7 @@ __author__ = ( ) __license__ = 'Public Domain' -__version__ = '2011.09.18' +__version__ = '2011.09.18b' UPDATE_URL = 'https://raw.github.com/rg3/youtube-dl/master/youtube-dl' From 44424ceee92533f501e9595b5ffc10d2ee12545a Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Sat, 17 Sep 2011 00:39:51 +0200 Subject: [PATCH 447/455] Prefer mp4 over webm - seems to work better for most users --- youtube-dl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube-dl b/youtube-dl index df0391feb..53d454873 100755 --- a/youtube-dl +++ b/youtube-dl @@ -1088,7 +1088,7 @@ class YoutubeIE(InfoExtractor): _AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en' _NETRC_MACHINE = 'youtube' # Listed in order of quality - _available_formats = ['38', '37', '45', '22', '43', '35', '34', '18', '6', '5', '17', '13'] + _available_formats = ['38', '37', '22', '45', '35', '43', '34', '18', '6', '5', '17', '13'] _video_extensions = { '13': '3gp', '17': 'mp4', From 7b417b388a6880c708df9908ded1ea21a4af260d Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Sat, 17 Sep 2011 00:51:25 +0200 Subject: [PATCH 448/455] Add youtube format 44 --- youtube-dl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube-dl b/youtube-dl index 53d454873..dbee119c9 100755 --- a/youtube-dl +++ b/youtube-dl @@ -1088,7 +1088,7 @@ class YoutubeIE(InfoExtractor): _AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en' _NETRC_MACHINE = 'youtube' # Listed in order of quality - _available_formats = ['38', '37', '22', '45', '35', '43', '34', '18', '6', '5', '17', '13'] + _available_formats = ['38', '37', '22', '45', '35', '44', '34', '43', '18', '6', '5', '17', '13'] _video_extensions = { '13': '3gp', '17': 'mp4', @@ -1097,6 +1097,7 @@ class YoutubeIE(InfoExtractor): '37': 'mp4', '38': 'video', # You actually don't know if this will be MOV, AVI or whatever '43': 'webm', + '44': 'webm', '45': 'webm', } IE_NAME = u'youtube' From 767414a292201045a9aad6bd3d333153b43f2007 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Sat, 17 Sep 2011 00:58:14 +0200 Subject: [PATCH 449/455] Prefer format 18 over 43 --- youtube-dl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube-dl b/youtube-dl index dbee119c9..c85b137b1 100755 --- a/youtube-dl +++ b/youtube-dl @@ -1088,7 +1088,7 @@ class YoutubeIE(InfoExtractor): _AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en' _NETRC_MACHINE = 'youtube' # Listed in order of quality - _available_formats = ['38', '37', '22', '45', '35', '44', '34', '43', '18', '6', '5', '17', '13'] + _available_formats = ['38', '37', '22', '45', '35', '44', '34', '18', '43', '6', '5', '17', '13'] _video_extensions = { '13': '3gp', '17': 'mp4', From daa982bc019dacbf09ab7ee198a010482b9d4356 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Sat, 17 Sep 2011 00:58:44 +0200 Subject: [PATCH 450/455] release 2011.09.18c: Prefer mp4 over webm --- LATEST_VERSION | 2 +- youtube-dl | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/LATEST_VERSION b/LATEST_VERSION index 4ed2a076e..3cc325adc 100644 --- a/LATEST_VERSION +++ b/LATEST_VERSION @@ -1 +1 @@ -2011.09.18b +2011.09.18c diff --git a/youtube-dl b/youtube-dl index c85b137b1..38472272f 100755 --- a/youtube-dl +++ b/youtube-dl @@ -15,7 +15,7 @@ __author__ = ( ) __license__ = 'Public Domain' -__version__ = '2011.09.18b' +__version__ = '2011.09.18c' UPDATE_URL = 'https://raw.github.com/rg3/youtube-dl/master/youtube-dl' From abb870d1adb7440d0ac8bb397d8aea7e3057539d Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Sun, 18 Sep 2011 18:50:23 +0200 Subject: [PATCH 451/455] Clarify --cookies option --- youtube-dl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube-dl b/youtube-dl index 38472272f..02c31e4db 100755 --- a/youtube-dl +++ b/youtube-dl @@ -3583,7 +3583,7 @@ def parseOpts(): action='store_false', dest='continue_dl', help='do not resume partially downloaded files (restart from beginning)') filesystem.add_option('--cookies', - dest='cookiefile', metavar='FILE', help='file to dump cookie jar to') + dest='cookiefile', metavar='FILE', help='file to read cookies from and dump cookie jar in') filesystem.add_option('--no-part', action='store_true', dest='nopart', help='do not use .part files', default=False) filesystem.add_option('--no-mtime', From 58384838c31977305a13d45438743fc6a08383de Mon Sep 17 00:00:00 2001 From: Richard Clamp Date: Wed, 21 Sep 2011 17:29:25 +0100 Subject: [PATCH 452/455] Add support for vorbis files to --extract-audio Add Ogg Vorbis as a file type when extracting the audio from a file. This can be the 'best' codec if the source clip is a webm container. --- youtube-dl | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/youtube-dl b/youtube-dl index 02c31e4db..0e7f5d6eb 100755 --- a/youtube-dl +++ b/youtube-dl @@ -3351,12 +3351,14 @@ class FFmpegExtractAudioPP(PostProcessor): more_opts = [] if self._preferredcodec == 'best' or self._preferredcodec == filecodec: - if filecodec == 'aac' or filecodec == 'mp3': + if filecodec == 'aac' or filecodec == 'mp3' or filecodec == 'vorbis': # Lossless if possible acodec = 'copy' extension = filecodec if filecodec == 'aac': more_opts = ['-f', 'adts'] + if filecodec == 'vorbis': + extension = 'ogg' else: # MP3 otherwise. acodec = 'libmp3lame' @@ -3366,13 +3368,15 @@ class FFmpegExtractAudioPP(PostProcessor): more_opts += ['-ab', self._preferredquality] else: # We convert the audio (lossy) - acodec = {'mp3': 'libmp3lame', 'aac': 'aac'}[self._preferredcodec] + acodec = {'mp3': 'libmp3lame', 'aac': 'aac', 'vorbis': 'libvorbis'}[self._preferredcodec] extension = self._preferredcodec more_opts = [] if self._preferredquality is not None: more_opts += ['-ab', self._preferredquality] if self._preferredcodec == 'aac': more_opts += ['-f', 'adts'] + if self._preferredcodec == 'vorbis': + extension = 'ogg' (prefix, ext) = os.path.splitext(path) new_path = prefix + '.' + extension @@ -3600,7 +3604,7 @@ def parseOpts(): postproc.add_option('--extract-audio', action='store_true', dest='extractaudio', default=False, help='convert video files to audio-only files (requires ffmpeg and ffprobe)') postproc.add_option('--audio-format', metavar='FORMAT', dest='audioformat', default='best', - help='"best", "aac" or "mp3"; best by default') + help='"best", "aac", "vorbis" or "mp3"; best by default') postproc.add_option('--audio-quality', metavar='QUALITY', dest='audioquality', default='128K', help='ffmpeg audio bitrate specification, 128k by default') postproc.add_option('-k', '--keep-video', action='store_true', dest='keepvideo', default=False, @@ -3734,7 +3738,7 @@ def main(): except (TypeError, ValueError), err: parser.error(u'invalid playlist end number specified') if opts.extractaudio: - if opts.audioformat not in ['best', 'aac', 'mp3']: + if opts.audioformat not in ['best', 'aac', 'mp3', 'vorbis']: parser.error(u'invalid audio format specified') # File downloader From 3ce59dae886b52c66b33409d659b7ca4e3f77f63 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Wed, 21 Sep 2011 18:48:51 +0200 Subject: [PATCH 453/455] Update README with new options --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 7ade255b2..915a30206 100644 --- a/README.md +++ b/README.md @@ -39,7 +39,7 @@ which means you can modify it, redistribute it or use it however you like. -c, --continue resume partially downloaded files --no-continue do not resume partially downloaded files (restart from beginning) - --cookies FILE file to dump cookie jar to + --cookies FILE file to read cookies from and dump cookie jar in --no-part do not use .part files --no-mtime do not use the Last-modified header to set the file modification time @@ -73,7 +73,7 @@ which means you can modify it, redistribute it or use it however you like. ### Post-processing Options: --extract-audio convert video files to audio-only files (requires ffmpeg and ffprobe) - --audio-format FORMAT "best", "aac" or "mp3"; best by default + --audio-format FORMAT "best", "aac", "vorbis" or "mp3"; best by default --audio-quality QUALITY ffmpeg audio bitrate specification, 128k by default -k, --keep-video keeps the video file on disk after the post- processing; the video is erased by default From efb113c736cda1300f4c30ba8b130fbe79dd1277 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Wed, 21 Sep 2011 18:49:08 +0200 Subject: [PATCH 454/455] Simplify test --- youtube-dl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube-dl b/youtube-dl index 0e7f5d6eb..2b08fab74 100755 --- a/youtube-dl +++ b/youtube-dl @@ -3351,7 +3351,7 @@ class FFmpegExtractAudioPP(PostProcessor): more_opts = [] if self._preferredcodec == 'best' or self._preferredcodec == filecodec: - if filecodec == 'aac' or filecodec == 'mp3' or filecodec == 'vorbis': + if filecodec in ['aac', 'mp3', 'vorbis']: # Lossless if possible acodec = 'copy' extension = filecodec From 7401ca7c5f5fe84f4d2e638adc4cd50e2a9b6285 Mon Sep 17 00:00:00 2001 From: knagano Date: Mon, 26 Sep 2011 20:57:58 +0900 Subject: [PATCH 455/455] DailymotionIE: Added hd1080URL, hd720URL, hqURL support. Now video_uploader is optional. --- youtube-dl | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/youtube-dl b/youtube-dl index 2b08fab74..59ff1a854 100755 --- a/youtube-dl +++ b/youtube-dl @@ -1563,12 +1563,19 @@ class DailymotionIE(InfoExtractor): # Extract URL, uploader and title from webpage self.report_extraction(video_id) - mobj = re.search(r'(?i)addVariable\(\"sequence\"\s*,\s*\"([^\"]+?)\"\)', webpage) + mobj = re.search(r'(?i)addVariable\(\"sequence\"\s*,\s*\"([^\"]+)\"\)', webpage) if mobj is None: self._downloader.trouble(u'ERROR: unable to extract media URL') return sequence = urllib.unquote(mobj.group(1)) - mobj = re.search(r',\"sdURL\"\:\"([^\"]+?)\",', sequence) + + mobj = None + for key in ['hd1080URL', 'hd720URL', 'hqURL', 'sdURL']: + pattern = re.compile(r',\"%s\"\:\"([^\"]+)\",' % key) + mobj = re.search(pattern, sequence) + if mobj is not None: + self._downloader.to_screen(u'[dailymotion] Using %s' % key) + break if mobj is None: self._downloader.trouble(u'ERROR: unable to extract media URL') return @@ -1578,18 +1585,19 @@ class DailymotionIE(InfoExtractor): video_url = mediaURL - mobj = re.search(r'(?im)Dailymotion\s*-\s*(.+)\s*-\s*[^<]+?', webpage) + mobj = re.search(r'(?im)Dailymotion\s*-\s*(.+)\s*-\s*[^<]+', webpage) if mobj is None: self._downloader.trouble(u'ERROR: unable to extract title') return video_title = mobj.group(1).decode('utf-8') video_title = sanitize_title(video_title) - mobj = re.search(r'(?im)[^<]+?]+?>([^<]+?)', webpage) + video_uploader = '' + mobj = re.search(r'(?im)[^<]+]+>([^<]+)', webpage) if mobj is None: - self._downloader.trouble(u'ERROR: unable to extract uploader nickname') - return - video_uploader = mobj.group(1) + self._downloader.to_screen(u'WARNING: unable to extract uploader nickname') + else: + video_uploader = mobj.group(1) try: # Process video information