From 271d3fbdaa92e1db51b704c123d5526cea67e5e3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Fri, 25 Jan 2013 15:11:03 +0100 Subject: [PATCH 01/28] Option in makefile to select python interpreter --- Makefile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Makefile b/Makefile index b47433573..7aae2c309 100644 --- a/Makefile +++ b/Makefile @@ -7,6 +7,7 @@ PREFIX=/usr/local BINDIR=$(PREFIX)/bin MANDIR=$(PREFIX)/man SYSCONFDIR=/etc +PYTHON=/usr/bin/env python install: youtube-dl youtube-dl.1 youtube-dl.bash-completion install -d $(DESTDIR)$(BINDIR) @@ -27,7 +28,7 @@ tar: youtube-dl.tar.gz youtube-dl: youtube_dl/*.py zip --quiet youtube-dl youtube_dl/*.py zip --quiet --junk-paths youtube-dl youtube_dl/__main__.py - echo '#!/usr/bin/env python' > youtube-dl + echo '#! $(PYTHON)' > youtube-dl cat youtube-dl.zip >> youtube-dl rm youtube-dl.zip chmod a+x youtube-dl From 30e9f4496b7261526d753f54fa00fd5fedcefb05 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Fri, 25 Jan 2013 16:54:25 +0100 Subject: [PATCH 02/28] Drop md5: spec for now (unused and breaks int values) --- test/test_download.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/test/test_download.py b/test/test_download.py index 5877c42b3..14ac511d2 100644 --- a/test/test_download.py +++ b/test/test_download.py @@ -107,11 +107,7 @@ def generator(test_case): with io.open(tc['file'] + '.info.json', encoding='utf-8') as infof: info_dict = json.load(infof) for (info_field, value) in tc.get('info_dict', {}).items(): - if value.startswith('md5:'): - md5_info_value = hashlib.md5(info_dict.get(info_field, '')).hexdigest() - self.assertEqual(value[3:], md5_info_value) - else: - self.assertEqual(value, info_dict.get(info_field)) + self.assertEqual(value, info_dict.get(info_field)) finally: for tc in test_cases: _try_rm(tc['file']) From b954070d7064f37ee9f3feae8a60c90c42e30b11 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Fri, 25 Jan 2013 16:54:48 +0100 Subject: [PATCH 03/28] Fix Facebook (Closes #375) --- test/tests.json | 10 +++ youtube_dl/InfoExtractors.py | 163 ++++++----------------------------- 2 files changed, 34 insertions(+), 139 deletions(-) diff --git a/test/tests.json b/test/tests.json index 2c2137ce4..ef12d3019 100644 --- a/test/tests.json +++ b/test/tests.json @@ -225,5 +225,15 @@ "uploader_id": "ford-lopatin", "location": "Spain" } + }, + { + "name": "Facebook", + "url": "https://www.facebook.com/photo.php?v=120708114770723", + "file": "120708114770723.mp4", + "md5": "48975a41ccc4b7a581abd68651c1a5a8", + "info_dict": { + "title": "PEOPLE ARE AWESOME 2013", + "duration": 279 + } } ] diff --git a/youtube_dl/InfoExtractors.py b/youtube_dl/InfoExtractors.py index dcd7ca647..a708cc750 100755 --- a/youtube_dl/InfoExtractors.py +++ b/youtube_dl/InfoExtractors.py @@ -1980,62 +1980,14 @@ class DepositFilesIE(InfoExtractor): class FacebookIE(InfoExtractor): """Information Extractor for Facebook""" - _WORKING = False _VALID_URL = r'^(?:https?://)?(?:\w+\.)?facebook\.com/(?:video/video|photo)\.php\?(?:.*?)v=(?P\d+)(?:.*)' _LOGIN_URL = 'https://login.facebook.com/login.php?m&next=http%3A%2F%2Fm.facebook.com%2Fhome.php&' _NETRC_MACHINE = 'facebook' - _available_formats = ['video', 'highqual', 'lowqual'] - _video_extensions = { - 'video': 'mp4', - 'highqual': 'mp4', - 'lowqual': 'mp4', - } IE_NAME = u'facebook' - def __init__(self, downloader=None): - InfoExtractor.__init__(self, downloader) - - def _reporter(self, message): - """Add header and report message.""" - self._downloader.to_screen(u'[facebook] %s' % message) - def report_login(self): """Report attempt to log in.""" - self._reporter(u'Logging in') - - def report_video_webpage_download(self, video_id): - """Report attempt to download video webpage.""" - self._reporter(u'%s: Downloading video webpage' % video_id) - - def report_information_extraction(self, video_id): - """Report attempt to extract video information.""" - self._reporter(u'%s: Extracting video information' % video_id) - - def _parse_page(self, video_webpage): - """Extract video information from page""" - # General data - data = {'title': r'\("video_title", "(.*?)"\)', - 'description': r'
(.*?)
', - 'owner': r'\("video_owner_name", "(.*?)"\)', - 'thumbnail': r'\("thumb_url", "(?P.*?)"\)', - } - video_info = {} - for piece in data.keys(): - mobj = re.search(data[piece], video_webpage) - if mobj is not None: - video_info[piece] = compat_urllib_parse.unquote_plus(mobj.group(1).decode("unicode_escape")) - - # Video urls - video_urls = {} - for fmt in self._available_formats: - mobj = re.search(r'\("%s_src\", "(.+?)"\)' % fmt, video_webpage) - if mobj is not None: - # URL is in a Javascript segment inside an escaped Unicode format within - # the generally utf-8 page - video_urls[fmt] = compat_urllib_parse.unquote_plus(mobj.group(1).decode("unicode_escape")) - video_info['video_urls'] = video_urls - - return video_info + self._downloader.to_screen(u'[%s] Logging in' % self.IE_NAME) def _real_initialize(self): if self._downloader is None: @@ -2088,100 +2040,33 @@ class FacebookIE(InfoExtractor): return video_id = mobj.group('ID') - # Get video webpage - self.report_video_webpage_download(video_id) - request = compat_urllib_request.Request('https://www.facebook.com/video/video.php?v=%s' % video_id) - try: - page = compat_urllib_request.urlopen(request) - video_webpage = page.read() - except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: - self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % compat_str(err)) - return + url = 'https://www.facebook.com/video/video.php?v=%s' % video_id + webpage = self._download_webpage(url, video_id) - # Start extracting information - self.report_information_extraction(video_id) + BEFORE = '[["allowFullScreen","true"],["allowScriptAccess","always"],["salign","tl"],["scale","noscale"],["wmode","opaque"]].forEach(function(param) {swf.addParam(param[0], param[1]);});\n' + AFTER = '.forEach(function(variable) {swf.addVariable(variable[0], variable[1]);});' + m = re.search(re.escape(BEFORE) + '(.*?)' + re.escape(AFTER), webpage) + if not m: + raise ExtractorError(u'Cannot parse data') + data = dict(json.loads(m.group(1))) + video_url = compat_urllib_parse.unquote(data['hd_src']) + video_duration = int(data['video_duration']) - # Extract information - video_info = self._parse_page(video_webpage) + m = re.search('

([^<]+)

', webpage) + if not m: + raise ExtractorError(u'Cannot find title in webpage') + video_title = unescapeHTML(m.group(1)) - # uploader - if 'owner' not in video_info: - self._downloader.trouble(u'ERROR: unable to extract uploader nickname') - return - video_uploader = video_info['owner'] + info = { + 'id': video_id, + 'title': video_title, + 'url': video_url, + 'ext': 'mp4', + 'duration': video_duration, + 'thumbnail': data['thumbnail_src'], + } + return [info] - # title - if 'title' not in video_info: - self._downloader.trouble(u'ERROR: unable to extract video title') - return - video_title = video_info['title'] - video_title = video_title.decode('utf-8') - - # thumbnail image - if 'thumbnail' not in video_info: - self._downloader.trouble(u'WARNING: unable to extract video thumbnail') - video_thumbnail = '' - else: - video_thumbnail = video_info['thumbnail'] - - # upload date - upload_date = None - if 'upload_date' in video_info: - upload_time = video_info['upload_date'] - timetuple = email.utils.parsedate_tz(upload_time) - if timetuple is not None: - try: - upload_date = time.strftime('%Y%m%d', timetuple[0:9]) - except: - pass - - # description - video_description = video_info.get('description', 'No description available.') - - url_map = video_info['video_urls'] - if url_map: - # Decide which formats to download - req_format = self._downloader.params.get('format', None) - format_limit = self._downloader.params.get('format_limit', None) - - if format_limit is not None and format_limit in self._available_formats: - format_list = self._available_formats[self._available_formats.index(format_limit):] - else: - format_list = self._available_formats - existing_formats = [x for x in format_list if x in url_map] - if len(existing_formats) == 0: - self._downloader.trouble(u'ERROR: no known formats available for video') - return - if req_format is None: - video_url_list = [(existing_formats[0], url_map[existing_formats[0]])] # Best quality - elif req_format == 'worst': - video_url_list = [(existing_formats[len(existing_formats)-1], url_map[existing_formats[len(existing_formats)-1]])] # worst quality - elif req_format == '-1': - video_url_list = [(f, url_map[f]) for f in existing_formats] # All formats - else: - # Specific format - if req_format not in url_map: - self._downloader.trouble(u'ERROR: requested format not available') - return - video_url_list = [(req_format, url_map[req_format])] # Specific format - - results = [] - for format_param, video_real_url in video_url_list: - # Extension - video_extension = self._video_extensions.get(format_param, 'mp4') - - results.append({ - 'id': video_id.decode('utf-8'), - 'url': video_real_url.decode('utf-8'), - 'uploader': video_uploader.decode('utf-8'), - 'upload_date': upload_date, - 'title': video_title, - 'ext': video_extension.decode('utf-8'), - 'format': (format_param is None and u'NA' or format_param.decode('utf-8')), - 'thumbnail': video_thumbnail.decode('utf-8'), - 'description': video_description.decode('utf-8'), - }) - return results class BlipTVIE(InfoExtractor): """Information extractor for blip.tv""" From ccf65f9deee0edb007222aa957f4da6516478ae3 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Sun, 27 Jan 2013 03:01:23 +0100 Subject: [PATCH 04/28] 8tracks IE (Closes #652) --- test/tests.json | 62 ++++++++++++++++++++++++++++++++++++ youtube_dl/InfoExtractors.py | 45 ++++++++++++++++++++++++-- 2 files changed, 105 insertions(+), 2 deletions(-) diff --git a/test/tests.json b/test/tests.json index ef12d3019..a46ff491b 100644 --- a/test/tests.json +++ b/test/tests.json @@ -235,5 +235,67 @@ "title": "PEOPLE ARE AWESOME 2013", "duration": 279 } + }, + { + "name": "EightTracks", + "url": "http://8tracks.com/ytdl/youtube-dl-test-tracks-a", + "playlist": [ + { + "file": "11885610.m4a", + "md5": "96ce57f24389fc8734ce47f4c1abcc55", + "info_dict": { + "title": "youtube-dl test track 1 \"'/\\\u00e4\u21ad" + } + }, + { + "file": "11885608.m4a", + "md5": "4ab26f05c1f7291ea460a3920be8021f", + "info_dict": { + "title": "youtube-dl test track 2 \"'/\\\u00e4\u21ad" + } + }, + { + "file": "11885679.m4a", + "md5": "d30b5b5f74217410f4689605c35d1fd7", + "info_dict": { + "title": "youtube-dl test track 3 \"'/\\\u00e4\u21ad" + } + }, + { + "file": "11885680.m4a", + "md5": "4eb0a669317cd725f6bbd336a29f923a", + "info_dict": { + "title": "youtube-dl test track 4 \"'/\\\u00e4\u21ad" + } + }, + { + "file": "11885682.m4a", + "md5": "1893e872e263a2705558d1d319ad19e8", + "info_dict": { + "title": "youtube-dl test track 5 \"'/\\\u00e4\u21ad" + } + }, + { + "file": "11885683.m4a", + "md5": "b673c46f47a216ab1741ae8836af5899", + "info_dict": { + "title": "youtube-dl test track 6 \"'/\\\u00e4\u21ad" + } + }, + { + "file": "11885684.m4a", + "md5": "1d74534e95df54986da7f5abf7d842b7", + "info_dict": { + "title": "youtube-dl test track 7 \"'/\\\u00e4\u21ad" + } + }, + { + "file": "11885685.m4a", + "md5": "f081f47af8f6ae782ed131d38b9cd1c0", + "info_dict": { + "title": "youtube-dl test track 8 \"'/\\\u00e4\u21ad" + } + } + ] } ] diff --git a/youtube_dl/InfoExtractors.py b/youtube_dl/InfoExtractors.py index a708cc750..50a5a5cfb 100755 --- a/youtube_dl/InfoExtractors.py +++ b/youtube_dl/InfoExtractors.py @@ -5,6 +5,7 @@ from __future__ import absolute_import import base64 import datetime +import itertools import netrc import os import re @@ -3812,8 +3813,6 @@ class PornotubeIE(InfoExtractor): return [info] - - class YouJizzIE(InfoExtractor): """Information extractor for youjizz.com.""" _VALID_URL = r'^(?:https?://)?(?:\w+\.)?youjizz\.com/videos/(?P[^.]+).html$' @@ -3860,6 +3859,47 @@ class YouJizzIE(InfoExtractor): return [info] +class EightTracksIE(InfoExtractor): + IE_NAME = '8tracks' + _VALID_URL = r'https?://8tracks.com/(?P[^/]+)/(?P[^/]+)' + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + if mobj is None: + raise ExtractorError(u'Invalid URL: %s' % url) + playlist_id = mobj.group('id') + + webpage = self._download_webpage(url, playlist_id) + + m = re.search(r"new TRAX.Mix\((.*?)\);\n*\s*TRAX.initSearchAutocomplete\('#search'\);", webpage, flags=re.DOTALL) + if not m: + raise ExtractorError(u'Cannot find trax information') + json_like = m.group(1) + data = json.loads(json_like) + + session = str(random.randint(0, 1000000000)) + mix_id = data['id'] + track_count = data['tracks_count'] + first_url = 'http://8tracks.com/sets/%s/play?player=sm&mix_id=%s&format=jsonh' % (session, mix_id) + next_url = first_url + res = [] + for i in itertools.count(): + api_json = self._download_webpage(next_url, playlist_id, + note=u'Downloading song information %s/%s' % (str(i+1), track_count), + errnote=u'Failed to download song information') + api_data = json.loads(api_json) + track_data = api_data[u'set']['track'] + info = { + 'id': track_data['id'], + 'url': track_data['track_file_stream_url'], + 'title': track_data['name'], + 'ext': 'm4a', + } + res.append(info) + if api_data['set']['at_last_track']: + break + next_url = 'http://8tracks.com/sets/%s/next?player=sm&mix_id=%s&format=jsonh&track_id=%s' % (session, mix_id, track_data['id']) + return res def gen_extractors(): """ Return a list of an instance of every supported extractor. @@ -3906,6 +3946,7 @@ def gen_extractors(): SteamIE(), UstreamIE(), RBMARadioIE(), + EightTracksIE(), GenericIE() ] From c3a1642eade3233a36aae48ff8b27a91027c1c40 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Sun, 27 Jan 2013 03:03:02 +0100 Subject: [PATCH 05/28] release 2013.01.27 --- youtube_dl/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 9322a3bfe..dfd7d6cec 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,2 +1,2 @@ -__version__ = '2013.01.13' +__version__ = '2013.01.27' From c67598c3e1b396e998b4dc4e74275e6e059606f9 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Sun, 27 Jan 2013 03:07:07 +0100 Subject: [PATCH 06/28] Remove space before shebang --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 7aae2c309..966a685e1 100644 --- a/Makefile +++ b/Makefile @@ -28,7 +28,7 @@ tar: youtube-dl.tar.gz youtube-dl: youtube_dl/*.py zip --quiet youtube-dl youtube_dl/*.py zip --quiet --junk-paths youtube-dl youtube_dl/__main__.py - echo '#! $(PYTHON)' > youtube-dl + echo '#!$(PYTHON)' > youtube-dl cat youtube-dl.zip >> youtube-dl rm youtube-dl.zip chmod a+x youtube-dl From d0d51a8afa6e8e0691d14610254ccf080f50ba69 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Sun, 27 Jan 2013 03:27:46 +0100 Subject: [PATCH 07/28] 8tracks: Include performer as uploader --- test/tests.json | 3 ++- youtube_dl/InfoExtractors.py | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/test/tests.json b/test/tests.json index a46ff491b..2fdd7f0f5 100644 --- a/test/tests.json +++ b/test/tests.json @@ -244,7 +244,8 @@ "file": "11885610.m4a", "md5": "96ce57f24389fc8734ce47f4c1abcc55", "info_dict": { - "title": "youtube-dl test track 1 \"'/\\\u00e4\u21ad" + "title": "youtube-dl test track 1 \"'/\\\u00e4\u21ad", + "uploader": "youtue-dl project<>\"'" } }, { diff --git a/youtube_dl/InfoExtractors.py b/youtube_dl/InfoExtractors.py index 50a5a5cfb..0af59bce2 100755 --- a/youtube_dl/InfoExtractors.py +++ b/youtube_dl/InfoExtractors.py @@ -3893,6 +3893,7 @@ class EightTracksIE(InfoExtractor): 'id': track_data['id'], 'url': track_data['track_file_stream_url'], 'title': track_data['name'], + 'uploader': track_data['performer'], 'ext': 'm4a', } res.append(info) From da4de959dfb4cef9b79e6b65ed7dbf6f95092904 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Sun, 27 Jan 2013 04:05:53 +0100 Subject: [PATCH 08/28] 8tracks: Better default titles --- test/tests.json | 20 +++++++++++--------- youtube_dl/InfoExtractors.py | 5 +++-- 2 files changed, 14 insertions(+), 11 deletions(-) diff --git a/test/tests.json b/test/tests.json index 2fdd7f0f5..a06266689 100644 --- a/test/tests.json +++ b/test/tests.json @@ -244,57 +244,59 @@ "file": "11885610.m4a", "md5": "96ce57f24389fc8734ce47f4c1abcc55", "info_dict": { - "title": "youtube-dl test track 1 \"'/\\\u00e4\u21ad", - "uploader": "youtue-dl project<>\"'" + "title": "youtue-dl project<>\"' - youtube-dl test track 1 \"'/\\\u00e4\u21ad", + "uploader_id": "ytdl" } }, { "file": "11885608.m4a", "md5": "4ab26f05c1f7291ea460a3920be8021f", "info_dict": { - "title": "youtube-dl test track 2 \"'/\\\u00e4\u21ad" + "title": "youtube-dl project - youtube-dl test track 2 \"'/\\\u00e4\u21ad", + "uploader_id": "ytdl" + } }, { "file": "11885679.m4a", "md5": "d30b5b5f74217410f4689605c35d1fd7", "info_dict": { - "title": "youtube-dl test track 3 \"'/\\\u00e4\u21ad" + "title": "youtube-dl project as well - youtube-dl test track 3 \"'/\\\u00e4\u21ad" } }, { "file": "11885680.m4a", "md5": "4eb0a669317cd725f6bbd336a29f923a", "info_dict": { - "title": "youtube-dl test track 4 \"'/\\\u00e4\u21ad" + "title": "youtube-dl project as well - youtube-dl test track 4 \"'/\\\u00e4\u21ad" } }, { "file": "11885682.m4a", "md5": "1893e872e263a2705558d1d319ad19e8", "info_dict": { - "title": "youtube-dl test track 5 \"'/\\\u00e4\u21ad" + "title": "PH - youtube-dl test track 5 \"'/\\\u00e4\u21ad" } }, { "file": "11885683.m4a", "md5": "b673c46f47a216ab1741ae8836af5899", "info_dict": { - "title": "youtube-dl test track 6 \"'/\\\u00e4\u21ad" + "title": "PH - youtube-dl test track 6 \"'/\\\u00e4\u21ad" } }, { "file": "11885684.m4a", "md5": "1d74534e95df54986da7f5abf7d842b7", "info_dict": { - "title": "youtube-dl test track 7 \"'/\\\u00e4\u21ad" + "title": "phihag - youtube-dl test track 7 \"'/\\\u00e4\u21ad" } }, { "file": "11885685.m4a", "md5": "f081f47af8f6ae782ed131d38b9cd1c0", "info_dict": { - "title": "youtube-dl test track 8 \"'/\\\u00e4\u21ad" + "title": "phihag - youtube-dl test track 8 \"'/\\\u00e4\u21ad" } } ] diff --git a/youtube_dl/InfoExtractors.py b/youtube_dl/InfoExtractors.py index 0af59bce2..ff085b0ee 100755 --- a/youtube_dl/InfoExtractors.py +++ b/youtube_dl/InfoExtractors.py @@ -3892,8 +3892,9 @@ class EightTracksIE(InfoExtractor): info = { 'id': track_data['id'], 'url': track_data['track_file_stream_url'], - 'title': track_data['name'], - 'uploader': track_data['performer'], + 'title': track_data['performer'] + u' - ' + track_data['name'], + 'raw_title': track_data['name'], + 'uploader_id': data['user']['login'], 'ext': 'm4a', } res.append(info) From 25580f3251a8dcaa04a3edd9af328372c7caca4a Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Sun, 27 Jan 2013 04:15:12 +0100 Subject: [PATCH 09/28] 8tracks: Ignore hashes --- youtube_dl/InfoExtractors.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/InfoExtractors.py b/youtube_dl/InfoExtractors.py index ff085b0ee..7545ae0b9 100755 --- a/youtube_dl/InfoExtractors.py +++ b/youtube_dl/InfoExtractors.py @@ -3861,7 +3861,7 @@ class YouJizzIE(InfoExtractor): class EightTracksIE(InfoExtractor): IE_NAME = '8tracks' - _VALID_URL = r'https?://8tracks.com/(?P[^/]+)/(?P[^/]+)' + _VALID_URL = r'https?://8tracks.com/(?P[^/]+)/(?P[^/#]+)(?:#.*)?$' def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) From f0bad2b026310d06f8c2da0aee076f620cf5bc2e Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Sun, 27 Jan 2013 15:23:26 +0100 Subject: [PATCH 10/28] Fix Stanford (Closes #653) --- youtube_dl/InfoExtractors.py | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/youtube_dl/InfoExtractors.py b/youtube_dl/InfoExtractors.py index 7545ae0b9..0860937ee 100755 --- a/youtube_dl/InfoExtractors.py +++ b/youtube_dl/InfoExtractors.py @@ -2869,8 +2869,7 @@ class StanfordOpenClassroomIE(InfoExtractor): def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) if mobj is None: - self._downloader.trouble(u'ERROR: invalid URL: %s' % url) - return + raise ExtractorError(u'Invalid URL: %s' % url) if mobj.group('course') and mobj.group('video'): # A specific video course = mobj.group('course') @@ -2907,12 +2906,9 @@ class StanfordOpenClassroomIE(InfoExtractor): 'upload_date': None, } - self.report_download_webpage(info['id']) - try: - coursepage = compat_urllib_request.urlopen(url).read() - except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: - self._downloader.trouble(u'ERROR: unable to download course info page: ' + compat_str(err)) - return + coursepage = self._download_webpage(url, info['id'], + note='Downloading course info page', + errnote='Unable to download course info page') m = re.search('

([^<]+)

', coursepage) if m: @@ -2936,7 +2932,6 @@ class StanfordOpenClassroomIE(InfoExtractor): assert entry['type'] == 'reference' results += self.extract(entry['url']) return results - else: # Root page info = { 'id': 'Stanford OpenClassroom', From ec71c13ab891566abff9010710afb915e8f22523 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Sun, 27 Jan 2013 18:33:58 +0100 Subject: [PATCH 11/28] release 2013.01.28 --- youtube_dl/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index dfd7d6cec..50fe6f8d7 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,2 +1,2 @@ -__version__ = '2013.01.27' +__version__ = '2013.01.28' From a32b573ccb71de6d50cdb4dcf8e44928c70f92a6 Mon Sep 17 00:00:00 2001 From: David Coppa Date: Wed, 30 Jan 2013 15:31:38 +0100 Subject: [PATCH 12/28] Try setuptools first, then fallback to distutils.core --- setup.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 6d019dcbb..eb7b1a212 100644 --- a/setup.py +++ b/setup.py @@ -2,10 +2,14 @@ # -*- coding: utf-8 -*- from __future__ import print_function -from distutils.core import setup import pkg_resources import sys +try: + from setuptools import setup +except ImportError: + from distutils.core import setup + try: import py2exe """This will create an exe that needs Microsoft Visual C++ 2008 Redistributable Package""" From 3b024e17afcfe12f4ea55e9a200b9cbd61ec3f99 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Fri, 1 Feb 2013 17:29:50 +0100 Subject: [PATCH 13/28] Work around buggy HTML Parser in Python < 2.7.3 (Closes #662) --- youtube_dl/utils.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 532e8c782..e6ce028d6 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -280,6 +280,12 @@ class AttrParser(compat_html_parser.HTMLParser): lines[-1] = lines[-1][:self.result[2][1]-self.result[1][1]] lines[-1] = lines[-1][:self.result[2][1]] return '\n'.join(lines).strip() +# Hack for https://github.com/rg3/youtube-dl/issues/662 +if sys.version_info < (2, 7, 3): + AttrParser.parse_endtag = (lambda self, i: + i + len("") + if self.rawdata[i:].startswith("") + else compat_html_parser.HTMLParser.parse_endtag(self, i)) def get_element_by_id(id, html): """Return the content of the tag with the specified ID in the passed HTML document""" From 233a22960af8043515b17780b5bd69566dc90b36 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Fri, 1 Feb 2013 17:46:03 +0100 Subject: [PATCH 14/28] Switch ComedyCentral test to a permanent URL (They delete full episodes older than a month) --- test/test_download.py | 2 +- test/tests.json | 37 ++++++------------------------------- 2 files changed, 7 insertions(+), 32 deletions(-) diff --git a/test/test_download.py b/test/test_download.py index 14ac511d2..f1bccf58c 100644 --- a/test/test_download.py +++ b/test/test_download.py @@ -98,7 +98,7 @@ def generator(test_case): for tc in test_cases: if not test_case.get('params', {}).get('skip_download', False): - self.assertTrue(os.path.exists(tc['file'])) + self.assertTrue(os.path.exists(tc['file']), msg='Missing file ' + tc['file']) self.assertTrue(tc['file'] in finished_hook_called) self.assertTrue(os.path.exists(tc['file'] + '.info.json')) if 'md5' in tc: diff --git a/test/tests.json b/test/tests.json index a06266689..8fda1f1a9 100644 --- a/test/tests.json +++ b/test/tests.json @@ -181,37 +181,12 @@ }, { "name": "ComedyCentral", - "url": "http://www.thedailyshow.com/full-episodes/thu-december-13-2012-kristen-stewart", - "playlist": [ - { - "file": "422204.mp4", - "md5": "7a7abe068b31ff03e7b8a37596e72380", - "info_dict": { - "title": "thedailyshow-thu-december-13-2012-kristen-stewart part 1" - } - }, - { - "file": "422205.mp4", - "md5": "30552b7274c94dbb933f64600eadddd2", - "info_dict": { - "title": "thedailyshow-thu-december-13-2012-kristen-stewart part 2" - } - }, - { - "file": "422206.mp4", - "md5": "1f4c0664b352cb8e8fe85d5da4fbee91", - "info_dict": { - "title": "thedailyshow-thu-december-13-2012-kristen-stewart part 3" - } - }, - { - "file": "422207.mp4", - "md5": "f61ee8a4e6bd1308438e03badad78554", - "info_dict": { - "title": "thedailyshow-thu-december-13-2012-kristen-stewart part 4" - } - } - ] + "url": "http://www.thedailyshow.com/watch/thu-december-13-2012/kristen-stewart", + "file": "422212.mp4", + "md5": "4e2f5cb088a83cd8cdb7756132f9739d", + "info_dict": { + "title": "thedailyshow-kristen-stewart part 1" + } }, { "name": "RBMARadio", From edba5137b8e08bdf143a0dd4c43f90f0cc54ec4c Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Fri, 1 Feb 2013 17:56:22 +0100 Subject: [PATCH 15/28] Fix Facebook IE --- youtube_dl/InfoExtractors.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/youtube_dl/InfoExtractors.py b/youtube_dl/InfoExtractors.py index 0860937ee..8d8c591f7 100755 --- a/youtube_dl/InfoExtractors.py +++ b/youtube_dl/InfoExtractors.py @@ -2050,8 +2050,10 @@ class FacebookIE(InfoExtractor): if not m: raise ExtractorError(u'Cannot parse data') data = dict(json.loads(m.group(1))) - video_url = compat_urllib_parse.unquote(data['hd_src']) - video_duration = int(data['video_duration']) + params_raw = compat_urllib_parse.unquote(data['params']) + params = json.loads(params_raw) + video_url = params['hd_src'] + video_duration = int(params['video_duration']) m = re.search('

([^<]+)

', webpage) if not m: @@ -2064,7 +2066,7 @@ class FacebookIE(InfoExtractor): 'url': video_url, 'ext': 'mp4', 'duration': video_duration, - 'thumbnail': data['thumbnail_src'], + 'thumbnail': params['thumbnail_src'], } return [info] From 9cd5e4fce8d1236e0d3eb0fc6874b86f2749e60a Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Fri, 1 Feb 2013 17:57:32 +0100 Subject: [PATCH 16/28] release 2013.02.01 --- youtube_dl/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 50fe6f8d7..eed8f325c 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,2 +1,2 @@ -__version__ = '2013.01.28' +__version__ = '2013.02.01' From 450a30cae8bb1689dea8cd3548b8f48f94f5a663 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Fri, 1 Feb 2013 18:01:53 +0100 Subject: [PATCH 17/28] Add PyPi upload to release script --- devscripts/release.sh | 3 +++ 1 file changed, 3 insertions(+) diff --git a/devscripts/release.sh b/devscripts/release.sh index 561499ccb..d6c8e4d5e 100755 --- a/devscripts/release.sh +++ b/devscripts/release.sh @@ -83,4 +83,7 @@ ROOT=$(pwd) ) rm -rf build +echo "Uploading to PyPi ..." +pip sdist upload + echo "\n### DONE!" From 9e982f9e4eff4219f8c10df7529280b7eab16236 Mon Sep 17 00:00:00 2001 From: Jeff Crouse Date: Tue, 22 Jan 2013 00:50:42 -0500 Subject: [PATCH 18/28] Added "min-filesize" and "max-filesize" options --- README.md | 2 ++ youtube_dl/FileDownloader.py | 11 +++++++++++ youtube_dl/__init__.py | 16 ++++++++++++++++ 3 files changed, 29 insertions(+) diff --git a/README.md b/README.md index 71bad017d..f637e0b1b 100644 --- a/README.md +++ b/README.md @@ -38,6 +38,8 @@ which means you can modify it, redistribute it or use it however you like. --reject-title REGEX skip download for matching titles (regex or caseless sub-string) --max-downloads NUMBER Abort after downloading NUMBER files + --min-filesize SIZE Do not download any videos smaller than SIZE (e.g. 50k or 44.6m) + --max-filesize SIZE Do not download any videos larger than SIZE (e.g. 50k or 44.6m) ## Filesystem Options: -t, --title use title in file name diff --git a/youtube_dl/FileDownloader.py b/youtube_dl/FileDownloader.py index e3131bbe6..49b032a1b 100644 --- a/youtube_dl/FileDownloader.py +++ b/youtube_dl/FileDownloader.py @@ -82,6 +82,8 @@ class FileDownloader(object): subtitleslang: Language of the subtitles to download test: Download only first bytes to test the downloader. keepvideo: Keep the video file after post-processing + min_filesize: Skip files smaller than this size + max_filesize: Skip files larger than this size """ params = None @@ -712,6 +714,15 @@ class FileDownloader(object): data_len = data.info().get('Content-length', None) if data_len is not None: data_len = int(data_len) + resume_len + min_data_len = self.params.get("min_filesize", None) + max_data_len = self.params.get("max_filesize", None) + if min_data_len is not None and data_len < min_data_len: + self.to_screen(u'\r[download] File is smaller than min-filesize (%s bytes < %s bytes). Aborting.' % (data_len, min_data_len)) + return False + if max_data_len is not None and data_len > max_data_len: + self.to_screen(u'\r[download] File is larger than max-filesize (%s bytes > %s bytes). Aborting.' % (data_len, max_data_len)) + return False + data_len_str = self.format_bytes(data_len) byte_counter = 0 + resume_len block_size = self.params.get('buffersize', 1024) diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 10e9c8b8c..bdab38a4e 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -151,6 +151,10 @@ def parseOpts(): selection.add_option('--reject-title', dest='rejecttitle', metavar='REGEX',help='skip download for matching titles (regex or caseless sub-string)') selection.add_option('--max-downloads', metavar='NUMBER', dest='max_downloads', help='Abort after downloading NUMBER files', default=None) + selection.add_option('--min-filesize', metavar='SIZE', dest='min_filesize', help="Skip files smaller than this size", default=None) + selection.add_option('--max-filesize', metavar='SIZE', dest='max_filesize', help="Skip files larger than this size", default=None) + + authentication.add_option('-u', '--username', dest='username', metavar='USERNAME', help='account username') authentication.add_option('-p', '--password', @@ -349,6 +353,16 @@ def _real_main(): if numeric_limit is None: parser.error(u'invalid rate limit specified') opts.ratelimit = numeric_limit + if opts.min_filesize is not None: + numeric_limit = FileDownloader.parse_bytes(opts.min_filesize) + if numeric_limit is None: + parser.error(u'invalid min_filesize specified') + opts.min_filesize = numeric_limit + if opts.max_filesize is not None: + numeric_limit = FileDownloader.parse_bytes(opts.max_filesize) + if numeric_limit is None: + parser.error(u'invalid max_filesize specified') + opts.max_filesize = numeric_limit if opts.retries is not None: try: opts.retries = int(opts.retries) @@ -438,6 +452,8 @@ def _real_main(): 'verbose': opts.verbose, 'test': opts.test, 'keepvideo': opts.keepvideo, + 'min_filesize': opts.min_filesize, + 'max_filesize': opts.max_filesize }) if opts.verbose: From 0e33684194c9e364c70e0da3400b9568fa636538 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Fri, 1 Feb 2013 18:23:20 +0100 Subject: [PATCH 19/28] Switch to m4a by default (Closes #240) --- youtube_dl/PostProcessor.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/PostProcessor.py b/youtube_dl/PostProcessor.py index 545b6992b..70dc01004 100644 --- a/youtube_dl/PostProcessor.py +++ b/youtube_dl/PostProcessor.py @@ -143,10 +143,10 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor): more_opts = [] if self._preferredcodec == 'best' or self._preferredcodec == filecodec or (self._preferredcodec == 'm4a' and filecodec == 'aac'): - if self._preferredcodec == 'm4a' and filecodec == 'aac': + if filecodec == 'aac' and self._preferredcodec in ['m4a', 'best']: # Lossless, but in another container acodec = 'copy' - extension = self._preferredcodec + extension = 'm4a' more_opts = [self._exes['avconv'] and '-bsf:a' or '-absf', 'aac_adtstoasc'] elif filecodec in ['aac', 'mp3', 'vorbis', 'opus']: # Lossless if possible From 229cac754aa134d6d14ea7f66904fcc95629c1f1 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Sat, 2 Feb 2013 13:51:54 +0100 Subject: [PATCH 20/28] Improve cookie error handling --- youtube_dl/__init__.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index bdab38a4e..7ed9bfc0d 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -290,10 +290,13 @@ def _real_main(): else: try: jar = compat_cookiejar.MozillaCookieJar(opts.cookiefile) - if os.path.isfile(opts.cookiefile) and os.access(opts.cookiefile, os.R_OK): + if os.access(opts.cookiefile, os.R_OK): jar.load() except (IOError, OSError) as err: - sys.exit(u'ERROR: unable to open cookie file') + if opts.verbose: + traceback.print_exc() + sys.stderr.write(u'ERROR: unable to open cookie file\n') + sys.exit(101) # Set user agent if opts.user_agent is not None: std_headers['User-Agent'] = opts.user_agent From b47bbac393be2b835ac5037def964f19305c28d4 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Sat, 2 Feb 2013 14:40:41 +0100 Subject: [PATCH 21/28] Disable Stanford OC test for now, and enable escapist --- test/tests.json | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/test/tests.json b/test/tests.json index 8fda1f1a9..0bfbad722 100644 --- a/test/tests.json +++ b/test/tests.json @@ -76,7 +76,8 @@ "name": "StanfordOpenClassroom", "md5": "544a9468546059d4e80d76265b0443b8", "url": "http://openclassroom.stanford.edu/MainFolder/VideoPage.php?course=PracticalUnix&video=intro-environment&speed=100", - "file": "PracticalUnix_intro-environment.mp4" + "file": "PracticalUnix_intro-environment.mp4", + "skip": "Currently offline" }, { "name": "XNXX", @@ -113,8 +114,7 @@ "name": "Escapist", "url": "http://www.escapistmagazine.com/videos/view/the-escapist-presents/6618-Breaking-Down-Baldurs-Gate", "file": "6618-Breaking-Down-Baldurs-Gate.flv", - "md5": "c6793dbda81388f4264c1ba18684a74d", - "skip": "Fails with timeout on Travis" + "md5": "c6793dbda81388f4264c1ba18684a74d" }, { "name": "GooglePlus", From dbf2ba3d61da04a8b3fce0ef560fdb4e74654523 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Sat, 2 Feb 2013 14:44:22 +0100 Subject: [PATCH 22/28] Better help for new options --- README.md | 4 ++-- youtube_dl/__init__.py | 5 ++--- youtube_dl/version.py | 2 +- 3 files changed, 5 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index f637e0b1b..1012d78b6 100644 --- a/README.md +++ b/README.md @@ -38,8 +38,8 @@ which means you can modify it, redistribute it or use it however you like. --reject-title REGEX skip download for matching titles (regex or caseless sub-string) --max-downloads NUMBER Abort after downloading NUMBER files - --min-filesize SIZE Do not download any videos smaller than SIZE (e.g. 50k or 44.6m) - --max-filesize SIZE Do not download any videos larger than SIZE (e.g. 50k or 44.6m) + --min-filesize SIZE Skip files smaller than this size + --max-filesize SIZE Skip files larger than this size ## Filesystem Options: -t, --title use title in file name diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 7ed9bfc0d..0a1041862 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -150,9 +150,8 @@ def parseOpts(): selection.add_option('--match-title', dest='matchtitle', metavar='REGEX',help='download only matching titles (regex or caseless sub-string)') selection.add_option('--reject-title', dest='rejecttitle', metavar='REGEX',help='skip download for matching titles (regex or caseless sub-string)') selection.add_option('--max-downloads', metavar='NUMBER', dest='max_downloads', help='Abort after downloading NUMBER files', default=None) - - selection.add_option('--min-filesize', metavar='SIZE', dest='min_filesize', help="Skip files smaller than this size", default=None) - selection.add_option('--max-filesize', metavar='SIZE', dest='max_filesize', help="Skip files larger than this size", default=None) + selection.add_option('--min-filesize', metavar='SIZE', dest='min_filesize', help="Do not download any videos smaller than SIZE (e.g. 50k or 44.6m)", default=None) + selection.add_option('--max-filesize', metavar='SIZE', dest='max_filesize', help="Do not download any videos larger than SIZE (e.g. 50k or 44.6m)", default=None) authentication.add_option('-u', '--username', diff --git a/youtube_dl/version.py b/youtube_dl/version.py index eed8f325c..8b231ae80 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,2 +1,2 @@ -__version__ = '2013.02.01' +__version__ = '2013.02.02' From 085c8b75a6a809fb0ff151d661e7822c276be9b3 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Sat, 2 Feb 2013 14:45:38 +0100 Subject: [PATCH 23/28] release 2013.02.02 --- README.md | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 1012d78b6..a14dac9f4 100644 --- a/README.md +++ b/README.md @@ -38,8 +38,10 @@ which means you can modify it, redistribute it or use it however you like. --reject-title REGEX skip download for matching titles (regex or caseless sub-string) --max-downloads NUMBER Abort after downloading NUMBER files - --min-filesize SIZE Skip files smaller than this size - --max-filesize SIZE Skip files larger than this size + --min-filesize SIZE Do not download any videos smaller than SIZE (e.g. + 50k or 44.6m) + --max-filesize SIZE Do not download any videos larger than SIZE (e.g. + 50k or 44.6m) ## Filesystem Options: -t, --title use title in file name From ccb0cae134914f174ac15c1e22e62b69219ba5b9 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Sat, 2 Feb 2013 14:52:38 +0100 Subject: [PATCH 24/28] Fix automatic release (oops) --- devscripts/release.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/devscripts/release.sh b/devscripts/release.sh index d6c8e4d5e..a5f07fd61 100755 --- a/devscripts/release.sh +++ b/devscripts/release.sh @@ -84,6 +84,6 @@ ROOT=$(pwd) rm -rf build echo "Uploading to PyPi ..." -pip sdist upload +python setup.py sdist upload echo "\n### DONE!" From 3a9918d37f9b5ac8afde9e556cce70aba3d74ecb Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Sat, 2 Feb 2013 14:53:34 +0100 Subject: [PATCH 25/28] Escapist continues to be flaky on travis --- test/tests.json | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/test/tests.json b/test/tests.json index 0bfbad722..d2058c21f 100644 --- a/test/tests.json +++ b/test/tests.json @@ -114,7 +114,8 @@ "name": "Escapist", "url": "http://www.escapistmagazine.com/videos/view/the-escapist-presents/6618-Breaking-Down-Baldurs-Gate", "file": "6618-Breaking-Down-Baldurs-Gate.flv", - "md5": "c6793dbda81388f4264c1ba18684a74d" + "md5": "c6793dbda81388f4264c1ba18684a74d", + "skip": "Fails with timeout on Travis" }, { "name": "GooglePlus", From fb778e66dfbcd5ffaf9cb9a7abfda4399c4b684a Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Tue, 5 Feb 2013 13:30:02 +0100 Subject: [PATCH 26/28] Fix encoding in youtube subtitle download (Closes #669) --- youtube_dl/InfoExtractors.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/youtube_dl/InfoExtractors.py b/youtube_dl/InfoExtractors.py index 8d8c591f7..b99a6c505 100755 --- a/youtube_dl/InfoExtractors.py +++ b/youtube_dl/InfoExtractors.py @@ -264,13 +264,18 @@ class YoutubeIE(InfoExtractor): srt_lang = list(srt_lang_list.keys())[0] if not srt_lang in srt_lang_list: return (u'WARNING: no closed captions found in the specified language', None) - request = compat_urllib_request.Request('http://www.youtube.com/api/timedtext?lang=%s&name=%s&v=%s' % (srt_lang, srt_lang_list[srt_lang], video_id)) + params = compat_urllib_parse.urlencode({ + 'lang': srt_lang, + 'name': srt_lang_list[srt_lang].encode('utf-8'), + 'v': video_id, + }) + url = 'http://www.youtube.com/api/timedtext?' + params try: - srt_xml = compat_urllib_request.urlopen(request).read().decode('utf-8') + srt_xml = compat_urllib_request.urlopen(url).read().decode('utf-8') except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: return (u'WARNING: unable to download video subtitles: %s' % compat_str(err), None) if not srt_xml: - return (u'WARNING: unable to download video subtitles', None) + return (u'WARNING: Did not fetch video subtitles', None) return (None, self._closed_captions_xml_to_srt(srt_xml)) def _print_formats(self, formats): From 8edc2cf8ca866fe1aded3f7b3ccf6df277b2e9f7 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Tue, 5 Feb 2013 13:42:08 +0100 Subject: [PATCH 27/28] Support direct vimeo links (Closes #666) --- youtube_dl/InfoExtractors.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/youtube_dl/InfoExtractors.py b/youtube_dl/InfoExtractors.py index b99a6c505..ac3ecea92 100755 --- a/youtube_dl/InfoExtractors.py +++ b/youtube_dl/InfoExtractors.py @@ -978,7 +978,7 @@ class VimeoIE(InfoExtractor): """Information extractor for vimeo.com.""" # _VALID_URL matches Vimeo URLs - _VALID_URL = r'(?:https?://)?(?:(?:www|player).)?vimeo\.com/(?:(?:groups|album)/[^/]+/)?(?:videos?/)?([0-9]+)' + _VALID_URL = r'(?Phttps?://)?(?:(?:www|player)\.)?vimeo\.com/(?:(?:groups|album)/[^/]+/)?(?Pplay_redirect_hls\?clip_id=)?(?:videos?/)?(?P[0-9]+)' IE_NAME = u'vimeo' def __init__(self, downloader=None): @@ -999,7 +999,11 @@ class VimeoIE(InfoExtractor): self._downloader.trouble(u'ERROR: Invalid URL: %s' % url) return - video_id = mobj.group(1) + video_id = mobj.group('id') + if not mobj.group('proto'): + url = 'https://' + url + if mobj.group('direct_link'): + url = 'https://vimeo.com/' + video_id # Retrieve video webpage to extract further information request = compat_urllib_request.Request(url, None, std_headers) From bfc6ea7935bf2aad3aa5a2e07487e57ca8ec84b0 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Tue, 5 Feb 2013 13:42:52 +0100 Subject: [PATCH 28/28] Ignore PyPi metadata --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 564bde1d1..77469b8a7 100644 --- a/.gitignore +++ b/.gitignore @@ -17,3 +17,4 @@ youtube-dl.tar.gz .coverage cover/ updates_key.pem +*.egg-info