diff --git a/.gitignore b/.gitignore index 564bde1d1..77469b8a7 100644 --- a/.gitignore +++ b/.gitignore @@ -17,3 +17,4 @@ youtube-dl.tar.gz .coverage cover/ updates_key.pem +*.egg-info diff --git a/Makefile b/Makefile index b47433573..966a685e1 100644 --- a/Makefile +++ b/Makefile @@ -7,6 +7,7 @@ PREFIX=/usr/local BINDIR=$(PREFIX)/bin MANDIR=$(PREFIX)/man SYSCONFDIR=/etc +PYTHON=/usr/bin/env python install: youtube-dl youtube-dl.1 youtube-dl.bash-completion install -d $(DESTDIR)$(BINDIR) @@ -27,7 +28,7 @@ tar: youtube-dl.tar.gz youtube-dl: youtube_dl/*.py zip --quiet youtube-dl youtube_dl/*.py zip --quiet --junk-paths youtube-dl youtube_dl/__main__.py - echo '#!/usr/bin/env python' > youtube-dl + echo '#!$(PYTHON)' > youtube-dl cat youtube-dl.zip >> youtube-dl rm youtube-dl.zip chmod a+x youtube-dl diff --git a/README.md b/README.md index 71bad017d..a14dac9f4 100644 --- a/README.md +++ b/README.md @@ -38,6 +38,10 @@ which means you can modify it, redistribute it or use it however you like. --reject-title REGEX skip download for matching titles (regex or caseless sub-string) --max-downloads NUMBER Abort after downloading NUMBER files + --min-filesize SIZE Do not download any videos smaller than SIZE (e.g. + 50k or 44.6m) + --max-filesize SIZE Do not download any videos larger than SIZE (e.g. + 50k or 44.6m) ## Filesystem Options: -t, --title use title in file name diff --git a/devscripts/release.sh b/devscripts/release.sh index 561499ccb..a5f07fd61 100755 --- a/devscripts/release.sh +++ b/devscripts/release.sh @@ -83,4 +83,7 @@ ROOT=$(pwd) ) rm -rf build +echo "Uploading to PyPi ..." +python setup.py sdist upload + echo "\n### DONE!" diff --git a/setup.py b/setup.py index 6d019dcbb..eb7b1a212 100644 --- a/setup.py +++ b/setup.py @@ -2,10 +2,14 @@ # -*- coding: utf-8 -*- from __future__ import print_function -from distutils.core import setup import pkg_resources import sys +try: + from setuptools import setup +except ImportError: + from distutils.core import setup + try: import py2exe """This will create an exe that needs Microsoft Visual C++ 2008 Redistributable Package""" diff --git a/test/test_download.py b/test/test_download.py index 5877c42b3..f1bccf58c 100644 --- a/test/test_download.py +++ b/test/test_download.py @@ -98,7 +98,7 @@ def generator(test_case): for tc in test_cases: if not test_case.get('params', {}).get('skip_download', False): - self.assertTrue(os.path.exists(tc['file'])) + self.assertTrue(os.path.exists(tc['file']), msg='Missing file ' + tc['file']) self.assertTrue(tc['file'] in finished_hook_called) self.assertTrue(os.path.exists(tc['file'] + '.info.json')) if 'md5' in tc: @@ -107,11 +107,7 @@ def generator(test_case): with io.open(tc['file'] + '.info.json', encoding='utf-8') as infof: info_dict = json.load(infof) for (info_field, value) in tc.get('info_dict', {}).items(): - if value.startswith('md5:'): - md5_info_value = hashlib.md5(info_dict.get(info_field, '')).hexdigest() - self.assertEqual(value[3:], md5_info_value) - else: - self.assertEqual(value, info_dict.get(info_field)) + self.assertEqual(value, info_dict.get(info_field)) finally: for tc in test_cases: _try_rm(tc['file']) diff --git a/test/tests.json b/test/tests.json index 2c2137ce4..d2058c21f 100644 --- a/test/tests.json +++ b/test/tests.json @@ -76,7 +76,8 @@ "name": "StanfordOpenClassroom", "md5": "544a9468546059d4e80d76265b0443b8", "url": "http://openclassroom.stanford.edu/MainFolder/VideoPage.php?course=PracticalUnix&video=intro-environment&speed=100", - "file": "PracticalUnix_intro-environment.mp4" + "file": "PracticalUnix_intro-environment.mp4", + "skip": "Currently offline" }, { "name": "XNXX", @@ -181,37 +182,12 @@ }, { "name": "ComedyCentral", - "url": "http://www.thedailyshow.com/full-episodes/thu-december-13-2012-kristen-stewart", - "playlist": [ - { - "file": "422204.mp4", - "md5": "7a7abe068b31ff03e7b8a37596e72380", - "info_dict": { - "title": "thedailyshow-thu-december-13-2012-kristen-stewart part 1" - } - }, - { - "file": "422205.mp4", - "md5": "30552b7274c94dbb933f64600eadddd2", - "info_dict": { - "title": "thedailyshow-thu-december-13-2012-kristen-stewart part 2" - } - }, - { - "file": "422206.mp4", - "md5": "1f4c0664b352cb8e8fe85d5da4fbee91", - "info_dict": { - "title": "thedailyshow-thu-december-13-2012-kristen-stewart part 3" - } - }, - { - "file": "422207.mp4", - "md5": "f61ee8a4e6bd1308438e03badad78554", - "info_dict": { - "title": "thedailyshow-thu-december-13-2012-kristen-stewart part 4" - } - } - ] + "url": "http://www.thedailyshow.com/watch/thu-december-13-2012/kristen-stewart", + "file": "422212.mp4", + "md5": "4e2f5cb088a83cd8cdb7756132f9739d", + "info_dict": { + "title": "thedailyshow-kristen-stewart part 1" + } }, { "name": "RBMARadio", @@ -225,5 +201,80 @@ "uploader_id": "ford-lopatin", "location": "Spain" } + }, + { + "name": "Facebook", + "url": "https://www.facebook.com/photo.php?v=120708114770723", + "file": "120708114770723.mp4", + "md5": "48975a41ccc4b7a581abd68651c1a5a8", + "info_dict": { + "title": "PEOPLE ARE AWESOME 2013", + "duration": 279 + } + }, + { + "name": "EightTracks", + "url": "http://8tracks.com/ytdl/youtube-dl-test-tracks-a", + "playlist": [ + { + "file": "11885610.m4a", + "md5": "96ce57f24389fc8734ce47f4c1abcc55", + "info_dict": { + "title": "youtue-dl project<>\"' - youtube-dl test track 1 \"'/\\\u00e4\u21ad", + "uploader_id": "ytdl" + } + }, + { + "file": "11885608.m4a", + "md5": "4ab26f05c1f7291ea460a3920be8021f", + "info_dict": { + "title": "youtube-dl project - youtube-dl test track 2 \"'/\\\u00e4\u21ad", + "uploader_id": "ytdl" + + } + }, + { + "file": "11885679.m4a", + "md5": "d30b5b5f74217410f4689605c35d1fd7", + "info_dict": { + "title": "youtube-dl project as well - youtube-dl test track 3 \"'/\\\u00e4\u21ad" + } + }, + { + "file": "11885680.m4a", + "md5": "4eb0a669317cd725f6bbd336a29f923a", + "info_dict": { + "title": "youtube-dl project as well - youtube-dl test track 4 \"'/\\\u00e4\u21ad" + } + }, + { + "file": "11885682.m4a", + "md5": "1893e872e263a2705558d1d319ad19e8", + "info_dict": { + "title": "PH - youtube-dl test track 5 \"'/\\\u00e4\u21ad" + } + }, + { + "file": "11885683.m4a", + "md5": "b673c46f47a216ab1741ae8836af5899", + "info_dict": { + "title": "PH - youtube-dl test track 6 \"'/\\\u00e4\u21ad" + } + }, + { + "file": "11885684.m4a", + "md5": "1d74534e95df54986da7f5abf7d842b7", + "info_dict": { + "title": "phihag - youtube-dl test track 7 \"'/\\\u00e4\u21ad" + } + }, + { + "file": "11885685.m4a", + "md5": "f081f47af8f6ae782ed131d38b9cd1c0", + "info_dict": { + "title": "phihag - youtube-dl test track 8 \"'/\\\u00e4\u21ad" + } + } + ] } ] diff --git a/youtube_dl/FileDownloader.py b/youtube_dl/FileDownloader.py index a00a2b3b2..8ad7045b0 100644 --- a/youtube_dl/FileDownloader.py +++ b/youtube_dl/FileDownloader.py @@ -82,6 +82,8 @@ class FileDownloader(object): subtitleslang: Language of the subtitles to download test: Download only first bytes to test the downloader. keepvideo: Keep the video file after post-processing + min_filesize: Skip files smaller than this size + max_filesize: Skip files larger than this size """ params = None @@ -767,6 +769,15 @@ class FileDownloader(object): data_len = data.info().get('Content-length', None) if data_len is not None: data_len = int(data_len) + resume_len + min_data_len = self.params.get("min_filesize", None) + max_data_len = self.params.get("max_filesize", None) + if min_data_len is not None and data_len < min_data_len: + self.to_screen(u'\r[download] File is smaller than min-filesize (%s bytes < %s bytes). Aborting.' % (data_len, min_data_len)) + return False + if max_data_len is not None and data_len > max_data_len: + self.to_screen(u'\r[download] File is larger than max-filesize (%s bytes > %s bytes). Aborting.' % (data_len, max_data_len)) + return False + data_len_str = self.format_bytes(data_len) byte_counter = 0 + resume_len block_size = self.params.get('buffersize', 1024) diff --git a/youtube_dl/InfoExtractors.py b/youtube_dl/InfoExtractors.py index dcd7ca647..ac3ecea92 100755 --- a/youtube_dl/InfoExtractors.py +++ b/youtube_dl/InfoExtractors.py @@ -5,6 +5,7 @@ from __future__ import absolute_import import base64 import datetime +import itertools import netrc import os import re @@ -263,13 +264,18 @@ class YoutubeIE(InfoExtractor): srt_lang = list(srt_lang_list.keys())[0] if not srt_lang in srt_lang_list: return (u'WARNING: no closed captions found in the specified language', None) - request = compat_urllib_request.Request('http://www.youtube.com/api/timedtext?lang=%s&name=%s&v=%s' % (srt_lang, srt_lang_list[srt_lang], video_id)) + params = compat_urllib_parse.urlencode({ + 'lang': srt_lang, + 'name': srt_lang_list[srt_lang].encode('utf-8'), + 'v': video_id, + }) + url = 'http://www.youtube.com/api/timedtext?' + params try: - srt_xml = compat_urllib_request.urlopen(request).read().decode('utf-8') + srt_xml = compat_urllib_request.urlopen(url).read().decode('utf-8') except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: return (u'WARNING: unable to download video subtitles: %s' % compat_str(err), None) if not srt_xml: - return (u'WARNING: unable to download video subtitles', None) + return (u'WARNING: Did not fetch video subtitles', None) return (None, self._closed_captions_xml_to_srt(srt_xml)) def _print_formats(self, formats): @@ -972,7 +978,7 @@ class VimeoIE(InfoExtractor): """Information extractor for vimeo.com.""" # _VALID_URL matches Vimeo URLs - _VALID_URL = r'(?:https?://)?(?:(?:www|player).)?vimeo\.com/(?:(?:groups|album)/[^/]+/)?(?:videos?/)?([0-9]+)' + _VALID_URL = r'(?Phttps?://)?(?:(?:www|player)\.)?vimeo\.com/(?:(?:groups|album)/[^/]+/)?(?Pplay_redirect_hls\?clip_id=)?(?:videos?/)?(?P[0-9]+)' IE_NAME = u'vimeo' def __init__(self, downloader=None): @@ -993,7 +999,11 @@ class VimeoIE(InfoExtractor): self._downloader.trouble(u'ERROR: Invalid URL: %s' % url) return - video_id = mobj.group(1) + video_id = mobj.group('id') + if not mobj.group('proto'): + url = 'https://' + url + if mobj.group('direct_link'): + url = 'https://vimeo.com/' + video_id # Retrieve video webpage to extract further information request = compat_urllib_request.Request(url, None, std_headers) @@ -1980,62 +1990,14 @@ class DepositFilesIE(InfoExtractor): class FacebookIE(InfoExtractor): """Information Extractor for Facebook""" - _WORKING = False _VALID_URL = r'^(?:https?://)?(?:\w+\.)?facebook\.com/(?:video/video|photo)\.php\?(?:.*?)v=(?P\d+)(?:.*)' _LOGIN_URL = 'https://login.facebook.com/login.php?m&next=http%3A%2F%2Fm.facebook.com%2Fhome.php&' _NETRC_MACHINE = 'facebook' - _available_formats = ['video', 'highqual', 'lowqual'] - _video_extensions = { - 'video': 'mp4', - 'highqual': 'mp4', - 'lowqual': 'mp4', - } IE_NAME = u'facebook' - def __init__(self, downloader=None): - InfoExtractor.__init__(self, downloader) - - def _reporter(self, message): - """Add header and report message.""" - self._downloader.to_screen(u'[facebook] %s' % message) - def report_login(self): """Report attempt to log in.""" - self._reporter(u'Logging in') - - def report_video_webpage_download(self, video_id): - """Report attempt to download video webpage.""" - self._reporter(u'%s: Downloading video webpage' % video_id) - - def report_information_extraction(self, video_id): - """Report attempt to extract video information.""" - self._reporter(u'%s: Extracting video information' % video_id) - - def _parse_page(self, video_webpage): - """Extract video information from page""" - # General data - data = {'title': r'\("video_title", "(.*?)"\)', - 'description': r'
(.*?)
', - 'owner': r'\("video_owner_name", "(.*?)"\)', - 'thumbnail': r'\("thumb_url", "(?P.*?)"\)', - } - video_info = {} - for piece in data.keys(): - mobj = re.search(data[piece], video_webpage) - if mobj is not None: - video_info[piece] = compat_urllib_parse.unquote_plus(mobj.group(1).decode("unicode_escape")) - - # Video urls - video_urls = {} - for fmt in self._available_formats: - mobj = re.search(r'\("%s_src\", "(.+?)"\)' % fmt, video_webpage) - if mobj is not None: - # URL is in a Javascript segment inside an escaped Unicode format within - # the generally utf-8 page - video_urls[fmt] = compat_urllib_parse.unquote_plus(mobj.group(1).decode("unicode_escape")) - video_info['video_urls'] = video_urls - - return video_info + self._downloader.to_screen(u'[%s] Logging in' % self.IE_NAME) def _real_initialize(self): if self._downloader is None: @@ -2088,100 +2050,35 @@ class FacebookIE(InfoExtractor): return video_id = mobj.group('ID') - # Get video webpage - self.report_video_webpage_download(video_id) - request = compat_urllib_request.Request('https://www.facebook.com/video/video.php?v=%s' % video_id) - try: - page = compat_urllib_request.urlopen(request) - video_webpage = page.read() - except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: - self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % compat_str(err)) - return + url = 'https://www.facebook.com/video/video.php?v=%s' % video_id + webpage = self._download_webpage(url, video_id) - # Start extracting information - self.report_information_extraction(video_id) + BEFORE = '[["allowFullScreen","true"],["allowScriptAccess","always"],["salign","tl"],["scale","noscale"],["wmode","opaque"]].forEach(function(param) {swf.addParam(param[0], param[1]);});\n' + AFTER = '.forEach(function(variable) {swf.addVariable(variable[0], variable[1]);});' + m = re.search(re.escape(BEFORE) + '(.*?)' + re.escape(AFTER), webpage) + if not m: + raise ExtractorError(u'Cannot parse data') + data = dict(json.loads(m.group(1))) + params_raw = compat_urllib_parse.unquote(data['params']) + params = json.loads(params_raw) + video_url = params['hd_src'] + video_duration = int(params['video_duration']) - # Extract information - video_info = self._parse_page(video_webpage) + m = re.search('

([^<]+)

', webpage) + if not m: + raise ExtractorError(u'Cannot find title in webpage') + video_title = unescapeHTML(m.group(1)) - # uploader - if 'owner' not in video_info: - self._downloader.trouble(u'ERROR: unable to extract uploader nickname') - return - video_uploader = video_info['owner'] + info = { + 'id': video_id, + 'title': video_title, + 'url': video_url, + 'ext': 'mp4', + 'duration': video_duration, + 'thumbnail': params['thumbnail_src'], + } + return [info] - # title - if 'title' not in video_info: - self._downloader.trouble(u'ERROR: unable to extract video title') - return - video_title = video_info['title'] - video_title = video_title.decode('utf-8') - - # thumbnail image - if 'thumbnail' not in video_info: - self._downloader.trouble(u'WARNING: unable to extract video thumbnail') - video_thumbnail = '' - else: - video_thumbnail = video_info['thumbnail'] - - # upload date - upload_date = None - if 'upload_date' in video_info: - upload_time = video_info['upload_date'] - timetuple = email.utils.parsedate_tz(upload_time) - if timetuple is not None: - try: - upload_date = time.strftime('%Y%m%d', timetuple[0:9]) - except: - pass - - # description - video_description = video_info.get('description', 'No description available.') - - url_map = video_info['video_urls'] - if url_map: - # Decide which formats to download - req_format = self._downloader.params.get('format', None) - format_limit = self._downloader.params.get('format_limit', None) - - if format_limit is not None and format_limit in self._available_formats: - format_list = self._available_formats[self._available_formats.index(format_limit):] - else: - format_list = self._available_formats - existing_formats = [x for x in format_list if x in url_map] - if len(existing_formats) == 0: - self._downloader.trouble(u'ERROR: no known formats available for video') - return - if req_format is None: - video_url_list = [(existing_formats[0], url_map[existing_formats[0]])] # Best quality - elif req_format == 'worst': - video_url_list = [(existing_formats[len(existing_formats)-1], url_map[existing_formats[len(existing_formats)-1]])] # worst quality - elif req_format == '-1': - video_url_list = [(f, url_map[f]) for f in existing_formats] # All formats - else: - # Specific format - if req_format not in url_map: - self._downloader.trouble(u'ERROR: requested format not available') - return - video_url_list = [(req_format, url_map[req_format])] # Specific format - - results = [] - for format_param, video_real_url in video_url_list: - # Extension - video_extension = self._video_extensions.get(format_param, 'mp4') - - results.append({ - 'id': video_id.decode('utf-8'), - 'url': video_real_url.decode('utf-8'), - 'uploader': video_uploader.decode('utf-8'), - 'upload_date': upload_date, - 'title': video_title, - 'ext': video_extension.decode('utf-8'), - 'format': (format_param is None and u'NA' or format_param.decode('utf-8')), - 'thumbnail': video_thumbnail.decode('utf-8'), - 'description': video_description.decode('utf-8'), - }) - return results class BlipTVIE(InfoExtractor): """Information extractor for blip.tv""" @@ -2983,8 +2880,7 @@ class StanfordOpenClassroomIE(InfoExtractor): def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) if mobj is None: - self._downloader.trouble(u'ERROR: invalid URL: %s' % url) - return + raise ExtractorError(u'Invalid URL: %s' % url) if mobj.group('course') and mobj.group('video'): # A specific video course = mobj.group('course') @@ -3021,12 +2917,9 @@ class StanfordOpenClassroomIE(InfoExtractor): 'upload_date': None, } - self.report_download_webpage(info['id']) - try: - coursepage = compat_urllib_request.urlopen(url).read() - except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: - self._downloader.trouble(u'ERROR: unable to download course info page: ' + compat_str(err)) - return + coursepage = self._download_webpage(url, info['id'], + note='Downloading course info page', + errnote='Unable to download course info page') m = re.search('

([^<]+)

', coursepage) if m: @@ -3050,7 +2943,6 @@ class StanfordOpenClassroomIE(InfoExtractor): assert entry['type'] == 'reference' results += self.extract(entry['url']) return results - else: # Root page info = { 'id': 'Stanford OpenClassroom', @@ -3927,8 +3819,6 @@ class PornotubeIE(InfoExtractor): return [info] - - class YouJizzIE(InfoExtractor): """Information extractor for youjizz.com.""" _VALID_URL = r'^(?:https?://)?(?:\w+\.)?youjizz\.com/videos/(?P[^.]+).html$' @@ -3975,6 +3865,49 @@ class YouJizzIE(InfoExtractor): return [info] +class EightTracksIE(InfoExtractor): + IE_NAME = '8tracks' + _VALID_URL = r'https?://8tracks.com/(?P[^/]+)/(?P[^/#]+)(?:#.*)?$' + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + if mobj is None: + raise ExtractorError(u'Invalid URL: %s' % url) + playlist_id = mobj.group('id') + + webpage = self._download_webpage(url, playlist_id) + + m = re.search(r"new TRAX.Mix\((.*?)\);\n*\s*TRAX.initSearchAutocomplete\('#search'\);", webpage, flags=re.DOTALL) + if not m: + raise ExtractorError(u'Cannot find trax information') + json_like = m.group(1) + data = json.loads(json_like) + + session = str(random.randint(0, 1000000000)) + mix_id = data['id'] + track_count = data['tracks_count'] + first_url = 'http://8tracks.com/sets/%s/play?player=sm&mix_id=%s&format=jsonh' % (session, mix_id) + next_url = first_url + res = [] + for i in itertools.count(): + api_json = self._download_webpage(next_url, playlist_id, + note=u'Downloading song information %s/%s' % (str(i+1), track_count), + errnote=u'Failed to download song information') + api_data = json.loads(api_json) + track_data = api_data[u'set']['track'] + info = { + 'id': track_data['id'], + 'url': track_data['track_file_stream_url'], + 'title': track_data['performer'] + u' - ' + track_data['name'], + 'raw_title': track_data['name'], + 'uploader_id': data['user']['login'], + 'ext': 'm4a', + } + res.append(info) + if api_data['set']['at_last_track']: + break + next_url = 'http://8tracks.com/sets/%s/next?player=sm&mix_id=%s&format=jsonh&track_id=%s' % (session, mix_id, track_data['id']) + return res def gen_extractors(): """ Return a list of an instance of every supported extractor. @@ -4021,6 +3954,7 @@ def gen_extractors(): SteamIE(), UstreamIE(), RBMARadioIE(), + EightTracksIE(), GenericIE() ] diff --git a/youtube_dl/PostProcessor.py b/youtube_dl/PostProcessor.py index 545b6992b..70dc01004 100644 --- a/youtube_dl/PostProcessor.py +++ b/youtube_dl/PostProcessor.py @@ -143,10 +143,10 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor): more_opts = [] if self._preferredcodec == 'best' or self._preferredcodec == filecodec or (self._preferredcodec == 'm4a' and filecodec == 'aac'): - if self._preferredcodec == 'm4a' and filecodec == 'aac': + if filecodec == 'aac' and self._preferredcodec in ['m4a', 'best']: # Lossless, but in another container acodec = 'copy' - extension = self._preferredcodec + extension = 'm4a' more_opts = [self._exes['avconv'] and '-bsf:a' or '-absf', 'aac_adtstoasc'] elif filecodec in ['aac', 'mp3', 'vorbis', 'opus']: # Lossless if possible diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 86be14b1e..b15dc4e19 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -150,6 +150,9 @@ def parseOpts(): selection.add_option('--match-title', dest='matchtitle', metavar='REGEX',help='download only matching titles (regex or caseless sub-string)') selection.add_option('--reject-title', dest='rejecttitle', metavar='REGEX',help='skip download for matching titles (regex or caseless sub-string)') selection.add_option('--max-downloads', metavar='NUMBER', dest='max_downloads', help='Abort after downloading NUMBER files', default=None) + selection.add_option('--min-filesize', metavar='SIZE', dest='min_filesize', help="Do not download any videos smaller than SIZE (e.g. 50k or 44.6m)", default=None) + selection.add_option('--max-filesize', metavar='SIZE', dest='max_filesize', help="Do not download any videos larger than SIZE (e.g. 50k or 44.6m)", default=None) + authentication.add_option('-u', '--username', dest='username', metavar='USERNAME', help='account username') @@ -290,10 +293,13 @@ def _real_main(): else: try: jar = compat_cookiejar.MozillaCookieJar(opts.cookiefile) - if os.path.isfile(opts.cookiefile) and os.access(opts.cookiefile, os.R_OK): + if os.access(opts.cookiefile, os.R_OK): jar.load() except (IOError, OSError) as err: - sys.exit(u'ERROR: unable to open cookie file') + if opts.verbose: + traceback.print_exc() + sys.stderr.write(u'ERROR: unable to open cookie file\n') + sys.exit(101) # Set user agent if opts.user_agent is not None: std_headers['User-Agent'] = opts.user_agent @@ -353,6 +359,16 @@ def _real_main(): if numeric_limit is None: parser.error(u'invalid rate limit specified') opts.ratelimit = numeric_limit + if opts.min_filesize is not None: + numeric_limit = FileDownloader.parse_bytes(opts.min_filesize) + if numeric_limit is None: + parser.error(u'invalid min_filesize specified') + opts.min_filesize = numeric_limit + if opts.max_filesize is not None: + numeric_limit = FileDownloader.parse_bytes(opts.max_filesize) + if numeric_limit is None: + parser.error(u'invalid max_filesize specified') + opts.max_filesize = numeric_limit if opts.retries is not None: try: opts.retries = int(opts.retries) @@ -442,6 +458,8 @@ def _real_main(): 'verbose': opts.verbose, 'test': opts.test, 'keepvideo': opts.keepvideo, + 'min_filesize': opts.min_filesize, + 'max_filesize': opts.max_filesize }) if opts.verbose: diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 532e8c782..e6ce028d6 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -280,6 +280,12 @@ class AttrParser(compat_html_parser.HTMLParser): lines[-1] = lines[-1][:self.result[2][1]-self.result[1][1]] lines[-1] = lines[-1][:self.result[2][1]] return '\n'.join(lines).strip() +# Hack for https://github.com/rg3/youtube-dl/issues/662 +if sys.version_info < (2, 7, 3): + AttrParser.parse_endtag = (lambda self, i: + i + len("") + if self.rawdata[i:].startswith("") + else compat_html_parser.HTMLParser.parse_endtag(self, i)) def get_element_by_id(id, html): """Return the content of the tag with the specified ID in the passed HTML document""" diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 9322a3bfe..8b231ae80 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,2 +1,2 @@ -__version__ = '2013.01.13' +__version__ = '2013.02.02'