From 36300346092937a8320fa4ae7303bc1b746a69ab Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 2 Mar 2015 03:30:18 +0600 Subject: [PATCH 001/118] [vk] Fix test (Closes #5100) --- youtube_dl/extractor/vk.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/vk.py b/youtube_dl/extractor/vk.py index 842263f34..cc384adbf 100644 --- a/youtube_dl/extractor/vk.py +++ b/youtube_dl/extractor/vk.py @@ -31,7 +31,7 @@ class VKIE(InfoExtractor): 'id': '162222515', 'ext': 'flv', 'title': 'ProtivoGunz - Хуёвая песня', - 'uploader': 're:Noize MC.*', + 'uploader': 're:(?:Noize MC|Alexander Ilyashenko).*', 'duration': 195, 'upload_date': '20120212', }, From 7594be85ffebee0c1ba457855ee7f4917e96499b Mon Sep 17 00:00:00 2001 From: Sergey Date: Mon, 2 Mar 2015 11:49:39 +0200 Subject: [PATCH 002/118] [lynda] Check for the empty subtitle --- youtube_dl/extractor/lynda.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/lynda.py b/youtube_dl/extractor/lynda.py index 5dc22da22..a84019bdf 100644 --- a/youtube_dl/extractor/lynda.py +++ b/youtube_dl/extractor/lynda.py @@ -144,6 +144,7 @@ class LyndaIE(InfoExtractor): def _fix_subtitles(self, subs): srt = '' + seq_counter = 0 for pos in range(0, len(subs) - 1): seq_current = subs[pos] m_current = re.match(self._TIMECODE_REGEX, seq_current['Timecode']) @@ -155,8 +156,10 @@ class LyndaIE(InfoExtractor): continue appear_time = m_current.group('timecode') disappear_time = m_next.group('timecode') - text = seq_current['Caption'].lstrip() - srt += '%s\r\n%s --> %s\r\n%s' % (str(pos), appear_time, disappear_time, text) + text = seq_current['Caption'].strip() + if text: + seq_counter += 1 + srt += '%s\r\n%s --> %s\r\n%s\r\n\r\n' % (seq_counter, appear_time, disappear_time, text) if srt: return srt From 30cbd4e0d68361a20b036fc90bd53a8fa9a463dd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 2 Mar 2015 22:12:10 +0600 Subject: [PATCH 003/118] [lynda] Completely skip videos we don't have access to, extract base class and modernize (Closes #5093) --- youtube_dl/extractor/lynda.py | 149 ++++++++++++++++++---------------- 1 file changed, 81 insertions(+), 68 deletions(-) diff --git a/youtube_dl/extractor/lynda.py b/youtube_dl/extractor/lynda.py index a84019bdf..1f02bef44 100644 --- a/youtube_dl/extractor/lynda.py +++ b/youtube_dl/extractor/lynda.py @@ -15,18 +15,72 @@ from ..utils import ( ) -class LyndaIE(InfoExtractor): +class LyndaBaseIE(InfoExtractor): + _LOGIN_URL = 'https://www.lynda.com/login/login.aspx' + _SUCCESSFUL_LOGIN_REGEX = r'isLoggedIn: true' + _ACCOUNT_CREDENTIALS_HINT = 'Use --username and --password options to provide lynda.com account credentials.' + + def _real_initialize(self): + self._login() + + def _login(self): + (username, password) = self._get_login_info() + if username is None: + return + + login_form = { + 'username': username, + 'password': password, + 'remember': 'false', + 'stayPut': 'false' + } + request = compat_urllib_request.Request( + self._LOGIN_URL, compat_urllib_parse.urlencode(login_form)) + login_page = self._download_webpage( + request, None, 'Logging in as %s' % username) + + # Not (yet) logged in + m = re.search(r'loginResultJson = \'(?P[^\']+)\';', login_page) + if m is not None: + response = m.group('json') + response_json = json.loads(response) + state = response_json['state'] + + if state == 'notlogged': + raise ExtractorError( + 'Unable to login, incorrect username and/or password', + expected=True) + + # This is when we get popup: + # > You're already logged in to lynda.com on two devices. + # > If you log in here, we'll log you out of another device. + # So, we need to confirm this. + if state == 'conflicted': + confirm_form = { + 'username': '', + 'password': '', + 'resolve': 'true', + 'remember': 'false', + 'stayPut': 'false', + } + request = compat_urllib_request.Request( + self._LOGIN_URL, compat_urllib_parse.urlencode(confirm_form)) + login_page = self._download_webpage( + request, None, + 'Confirming log in and log out from another device') + + if re.search(self._SUCCESSFUL_LOGIN_REGEX, login_page) is None: + raise ExtractorError('Unable to log in') + + +class LyndaIE(LyndaBaseIE): IE_NAME = 'lynda' IE_DESC = 'lynda.com videos' - _VALID_URL = r'https?://www\.lynda\.com/(?:[^/]+/[^/]+/\d+|player/embed)/(\d+)' - _LOGIN_URL = 'https://www.lynda.com/login/login.aspx' + _VALID_URL = r'https?://www\.lynda\.com/(?:[^/]+/[^/]+/\d+|player/embed)/(?P\d+)' _NETRC_MACHINE = 'lynda' - _SUCCESSFUL_LOGIN_REGEX = r'isLoggedIn: true' _TIMECODE_REGEX = r'\[(?P\d+:\d+:\d+[\.,]\d+)\]' - ACCOUNT_CREDENTIALS_HINT = 'Use --username and --password options to provide lynda.com account credentials.' - _TESTS = [{ 'url': 'http://www.lynda.com/Bootstrap-tutorials/Using-exercise-files/110885/114408-4.html', 'md5': 'ecfc6862da89489161fb9cd5f5a6fac1', @@ -41,23 +95,22 @@ class LyndaIE(InfoExtractor): 'only_matching': True, }] - def _real_initialize(self): - self._login() - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group(1) + video_id = self._match_id(url) - page = self._download_webpage('http://www.lynda.com/ajax/player?videoId=%s&type=video' % video_id, video_id, - 'Downloading video JSON') + page = self._download_webpage( + 'http://www.lynda.com/ajax/player?videoId=%s&type=video' % video_id, + video_id, 'Downloading video JSON') video_json = json.loads(page) if 'Status' in video_json: - raise ExtractorError('lynda returned error: %s' % video_json['Message'], expected=True) + raise ExtractorError( + 'lynda returned error: %s' % video_json['Message'], expected=True) if video_json['HasAccess'] is False: raise ExtractorError( - 'Video %s is only available for members. ' % video_id + self.ACCOUNT_CREDENTIALS_HINT, expected=True) + 'Video %s is only available for members. ' + % video_id + self._ACCOUNT_CREDENTIALS_HINT, expected=True) video_id = compat_str(video_json['ID']) duration = video_json['DurationInSeconds'] @@ -100,48 +153,6 @@ class LyndaIE(InfoExtractor): 'formats': formats } - def _login(self): - (username, password) = self._get_login_info() - if username is None: - return - - login_form = { - 'username': username, - 'password': password, - 'remember': 'false', - 'stayPut': 'false' - } - request = compat_urllib_request.Request(self._LOGIN_URL, compat_urllib_parse.urlencode(login_form)) - login_page = self._download_webpage(request, None, 'Logging in as %s' % username) - - # Not (yet) logged in - m = re.search(r'loginResultJson = \'(?P[^\']+)\';', login_page) - if m is not None: - response = m.group('json') - response_json = json.loads(response) - state = response_json['state'] - - if state == 'notlogged': - raise ExtractorError('Unable to login, incorrect username and/or password', expected=True) - - # This is when we get popup: - # > You're already logged in to lynda.com on two devices. - # > If you log in here, we'll log you out of another device. - # So, we need to confirm this. - if state == 'conflicted': - confirm_form = { - 'username': '', - 'password': '', - 'resolve': 'true', - 'remember': 'false', - 'stayPut': 'false', - } - request = compat_urllib_request.Request(self._LOGIN_URL, compat_urllib_parse.urlencode(confirm_form)) - login_page = self._download_webpage(request, None, 'Confirming log in and log out from another device') - - if re.search(self._SUCCESSFUL_LOGIN_REGEX, login_page) is None: - raise ExtractorError('Unable to log in') - def _fix_subtitles(self, subs): srt = '' seq_counter = 0 @@ -172,7 +183,7 @@ class LyndaIE(InfoExtractor): return {} -class LyndaCourseIE(InfoExtractor): +class LyndaCourseIE(LyndaBaseIE): IE_NAME = 'lynda:course' IE_DESC = 'lynda.com online courses' @@ -185,35 +196,37 @@ class LyndaCourseIE(InfoExtractor): course_path = mobj.group('coursepath') course_id = mobj.group('courseid') - page = self._download_webpage('http://www.lynda.com/ajax/player?courseId=%s&type=course' % course_id, - course_id, 'Downloading course JSON') + page = self._download_webpage( + 'http://www.lynda.com/ajax/player?courseId=%s&type=course' % course_id, + course_id, 'Downloading course JSON') course_json = json.loads(page) if 'Status' in course_json and course_json['Status'] == 'NotFound': - raise ExtractorError('Course %s does not exist' % course_id, expected=True) + raise ExtractorError( + 'Course %s does not exist' % course_id, expected=True) unaccessible_videos = 0 videos = [] - (username, _) = self._get_login_info() # Might want to extract videos right here from video['Formats'] as it seems 'Formats' is not provided # by single video API anymore for chapter in course_json['Chapters']: for video in chapter['Videos']: - if username is None and video['HasAccess'] is False: + if video['HasAccess'] is False: unaccessible_videos += 1 continue videos.append(video['ID']) if unaccessible_videos > 0: - self._downloader.report_warning('%s videos are only available for members and will not be downloaded. ' - % unaccessible_videos + LyndaIE.ACCOUNT_CREDENTIALS_HINT) + self._downloader.report_warning( + '%s videos are only available for members (or paid members) and will not be downloaded. ' + % unaccessible_videos + self._ACCOUNT_CREDENTIALS_HINT) entries = [ - self.url_result('http://www.lynda.com/%s/%s-4.html' % - (course_path, video_id), - 'Lynda') + self.url_result( + 'http://www.lynda.com/%s/%s-4.html' % (course_path, video_id), + 'Lynda') for video_id in videos] course_title = course_json['Title'] From 2f0f6578c3e4b82a5decc6f906a5f58b7b34c1a9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 2 Mar 2015 22:38:44 +0600 Subject: [PATCH 004/118] [extractor/common] Assume non HTTP(S) URLs valid --- youtube_dl/extractor/common.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 7977fa8d0..cf39c0c21 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -767,6 +767,10 @@ class InfoExtractor(object): formats) def _is_valid_url(self, url, video_id, item='video'): + url = self._proto_relative_url(url, scheme='http:') + # For now assume non HTTP(S) URLs always valid + if not (url.startswith('http://') or url.startswith('https://')): + return True try: self._request_webpage(url, video_id, 'Checking %s URL' % item) return True From 562ceab13dc3a19243d5ab9d4d5927031a608334 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 2 Mar 2015 22:39:32 +0600 Subject: [PATCH 005/118] [soundcloud] Check direct links validity (Closes #5101) --- youtube_dl/extractor/soundcloud.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/soundcloud.py b/youtube_dl/extractor/soundcloud.py index c5284fa67..95788098e 100644 --- a/youtube_dl/extractor/soundcloud.py +++ b/youtube_dl/extractor/soundcloud.py @@ -200,8 +200,9 @@ class SoundcloudIE(InfoExtractor): if f['format_id'].startswith('rtmp'): f['protocol'] = 'rtmp' - self._sort_formats(formats) - result['formats'] = formats + self._check_formats(formats, track_id) + self._sort_formats(formats) + result['formats'] = formats return result From 295df4edb97e3c0dc0ecd95746bb2c455607a4a3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 2 Mar 2015 22:47:07 +0600 Subject: [PATCH 006/118] [soundcloud] Fix glitches (#5101) --- youtube_dl/extractor/soundcloud.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/soundcloud.py b/youtube_dl/extractor/soundcloud.py index 95788098e..9d4505972 100644 --- a/youtube_dl/extractor/soundcloud.py +++ b/youtube_dl/extractor/soundcloud.py @@ -180,7 +180,7 @@ class SoundcloudIE(InfoExtractor): 'format_id': key, 'url': url, 'play_path': 'mp3:' + path, - 'ext': ext, + 'ext': 'flv', 'vcodec': 'none', }) From c75f0b361a1b00f6ac1298615d6fee101994b2b9 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Mon, 2 Mar 2015 15:06:09 +0100 Subject: [PATCH 007/118] [downloader/external] Add support for custom options (Fixes #4885, closes #5098) --- youtube_dl/YoutubeDL.py | 4 ++-- youtube_dl/__init__.py | 5 +++++ youtube_dl/downloader/common.py | 2 ++ youtube_dl/downloader/external.py | 15 ++++++++++++--- youtube_dl/options.py | 6 +++++- 5 files changed, 26 insertions(+), 6 deletions(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 15367c4e3..e0baa9872 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -249,10 +249,10 @@ class YoutubeDL(object): hls_prefer_native: Use the native HLS downloader instead of ffmpeg/avconv. The following parameters are not used by YoutubeDL itself, they are used by - the FileDownloader: + the downloader (see youtube_dl/downloader/common.py): nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test, noresizebuffer, retries, continuedl, noprogress, consoletitle, - xattr_set_filesize. + xattr_set_filesize, external_downloader_args. The following options are used by the post processors: prefer_ffmpeg: If True, use ffmpeg instead of avconv if both are available, diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 49f382695..6056da1be 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -9,6 +9,7 @@ import codecs import io import os import random +import shlex import sys @@ -255,6 +256,9 @@ def _real_main(argv=None): xattr # Confuse flake8 except ImportError: parser.error('setting filesize xattr requested but python-xattr is not available') + external_downloader_args = None + if opts.external_downloader_args: + external_downloader_args = shlex.split(opts.external_downloader_args) match_filter = ( None if opts.match_filter is None else match_filter_func(opts.match_filter)) @@ -359,6 +363,7 @@ def _real_main(argv=None): 'no_color': opts.no_color, 'ffmpeg_location': opts.ffmpeg_location, 'hls_prefer_native': opts.hls_prefer_native, + 'external_downloader_args': external_downloader_args, } with YoutubeDL(ydl_opts) as ydl: diff --git a/youtube_dl/downloader/common.py b/youtube_dl/downloader/common.py index 3ae90021a..8ed5c19a6 100644 --- a/youtube_dl/downloader/common.py +++ b/youtube_dl/downloader/common.py @@ -42,6 +42,8 @@ class FileDownloader(object): max_filesize: Skip files larger than this size xattr_set_filesize: Set ytdl.filesize user xattribute with expected size. (experimenatal) + external_downloader_args: A list of additional command-line arguments for the + external downloader. Subclasses of this one must re-define the real_download method. """ diff --git a/youtube_dl/downloader/external.py b/youtube_dl/downloader/external.py index 51c41c704..1673b2382 100644 --- a/youtube_dl/downloader/external.py +++ b/youtube_dl/downloader/external.py @@ -51,6 +51,13 @@ class ExternalFD(FileDownloader): return [] return [command_option, source_address] + def _configuration_args(self, default=[]): + ex_args = self.params.get('external_downloader_args') + if ex_args is None: + return default + assert isinstance(ex_args, list) + return ex_args + def _call_downloader(self, tmpfilename, info_dict): """ Either overwrite this or implement _make_cmd """ cmd = self._make_cmd(tmpfilename, info_dict) @@ -79,6 +86,7 @@ class CurlFD(ExternalFD): for key, val in info_dict['http_headers'].items(): cmd += ['--header', '%s: %s' % (key, val)] cmd += self._source_address('--interface') + cmd += self._configuration_args() cmd += ['--', info_dict['url']] return cmd @@ -89,15 +97,16 @@ class WgetFD(ExternalFD): for key, val in info_dict['http_headers'].items(): cmd += ['--header', '%s: %s' % (key, val)] cmd += self._source_address('--bind-address') + cmd += self._configuration_args() cmd += ['--', info_dict['url']] return cmd class Aria2cFD(ExternalFD): def _make_cmd(self, tmpfilename, info_dict): - cmd = [ - self.exe, '-c', - '--min-split-size', '1M', '--max-connection-per-server', '4'] + cmd = [self.exe, '-c'] + cmd += self._configuration_args([ + '--min-split-size', '1M', '--max-connection-per-server', '4']) dn = os.path.dirname(tmpfilename) if dn: cmd += ['--dir', dn] diff --git a/youtube_dl/options.py b/youtube_dl/options.py index a2ffe96bc..df2be7b74 100644 --- a/youtube_dl/options.py +++ b/youtube_dl/options.py @@ -435,8 +435,12 @@ def parseOpts(overrideArguments=None): downloader.add_option( '--external-downloader', dest='external_downloader', metavar='COMMAND', - help='(experimental) Use the specified external downloader. ' + help='Use the specified external downloader. ' 'Currently supports %s' % ','.join(list_external_downloaders())) + downloader.add_option( + '--external-downloader-args', + dest='external_downloader_args', metavar='ARGS', + help='Give these arguments to the external downloader.') workarounds = optparse.OptionGroup(parser, 'Workarounds') workarounds.add_option( From 76c73715fb1e0eee61ace5ff7855d8237abdcd54 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Mon, 2 Mar 2015 15:21:11 +0100 Subject: [PATCH 008/118] [generic] Parse RSS enclosure URLs (Fixes #5091) --- youtube_dl/extractor/generic.py | 34 ++++++++++++++++++++++++++++----- 1 file changed, 29 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 27e2bc300..5dc53685c 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -26,6 +26,7 @@ from ..utils import ( unsmuggle_url, UnsupportedError, url_basename, + xpath_text, ) from .brightcove import BrightcoveIE from .ooyala import OoyalaIE @@ -569,6 +570,16 @@ class GenericIE(InfoExtractor): 'title': 'John Carlson Postgame 2/25/15', }, }, + # RSS feed with enclosure + { + 'url': 'http://podcastfeeds.nbcnews.com/audio/podcast/MSNBC-MADDOW-NETCAST-M4V.xml', + 'info_dict': { + 'id': 'pdv_maddow_netcast_m4v-02-27-2015-201624', + 'ext': 'm4v', + 'upload_date': '20150228', + 'title': 'pdv_maddow_netcast_m4v-02-27-2015-201624', + } + } ] def report_following_redirect(self, new_url): @@ -580,11 +591,24 @@ class GenericIE(InfoExtractor): playlist_desc_el = doc.find('./channel/description') playlist_desc = None if playlist_desc_el is None else playlist_desc_el.text - entries = [{ - '_type': 'url', - 'url': e.find('link').text, - 'title': e.find('title').text, - } for e in doc.findall('./channel/item')] + entries = [] + for it in doc.findall('./channel/item'): + next_url = xpath_text(it, 'link', fatal=False) + if not next_url: + enclosure_nodes = it.findall('./enclosure') + for e in enclosure_nodes: + next_url = e.attrib.get('url') + if next_url: + break + + if not next_url: + continue + + entries.append({ + '_type': 'url', + 'url': next_url, + 'title': it.find('title').text, + }) return { '_type': 'playlist', From a7440261c52d6518a429210aaae598095112c61b Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Mon, 2 Mar 2015 19:07:17 +0100 Subject: [PATCH 009/118] [utils] Streap leading dots Fixes #2865, closes #5087 --- test/test_utils.py | 3 +++ youtube_dl/utils.py | 1 + 2 files changed, 4 insertions(+) diff --git a/test/test_utils.py b/test/test_utils.py index 3fba8ae11..64fad58ad 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -85,8 +85,11 @@ class TestUtil(unittest.TestCase): self.assertEqual( sanitize_filename('New World record at 0:12:34'), 'New World record at 0_12_34') + self.assertEqual(sanitize_filename('--gasdgf'), '_-gasdgf') self.assertEqual(sanitize_filename('--gasdgf', is_id=True), '--gasdgf') + self.assertEqual(sanitize_filename('.gasdgf'), 'gasdgf') + self.assertEqual(sanitize_filename('.gasdgf', is_id=True), '.gasdgf') forbidden = '"\0\\/' for fc in forbidden: diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index d4938ec36..1d3401bc2 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -305,6 +305,7 @@ def sanitize_filename(s, restricted=False, is_id=False): result = result[2:] if result.startswith('-'): result = '_' + result[len('-'):] + result = result.lstrip('.') if not result: result = '_' return result From 91410c9bfa9fd8f01fb817474bcc7b0db5cabf95 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Tue, 3 Mar 2015 00:03:06 +0100 Subject: [PATCH 010/118] [letv] Add --cn-verification-proxy (Closes #5077) --- youtube_dl/YoutubeDL.py | 5 ++++- youtube_dl/__init__.py | 1 + youtube_dl/extractor/letv.py | 34 ++++++++++++++++++++++++++++------ youtube_dl/options.py | 6 ++++++ youtube_dl/utils.py | 10 ++++++++++ 5 files changed, 49 insertions(+), 7 deletions(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index e0baa9872..915963d96 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -54,6 +54,7 @@ from .utils import ( MaxDownloadsReached, PagedList, parse_filesize, + PerRequestProxyHandler, PostProcessingError, platform_name, preferredencoding, @@ -183,6 +184,8 @@ class YoutubeDL(object): prefer_insecure: Use HTTP instead of HTTPS to retrieve information. At the moment, this is only supported by YouTube. proxy: URL of the proxy server to use + cn_verification_proxy: URL of the proxy to use for IP address verification + on Chinese sites. (Experimental) socket_timeout: Time to wait for unresponsive hosts, in seconds bidi_workaround: Work around buggy terminals without bidirectional text support, using fridibi @@ -1762,7 +1765,7 @@ class YoutubeDL(object): # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805) if 'http' in proxies and 'https' not in proxies: proxies['https'] = proxies['http'] - proxy_handler = compat_urllib_request.ProxyHandler(proxies) + proxy_handler = PerRequestProxyHandler(proxies) debuglevel = 1 if self.params.get('debug_printtraffic') else 0 https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel) diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 6056da1be..a08ddd670 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -364,6 +364,7 @@ def _real_main(argv=None): 'ffmpeg_location': opts.ffmpeg_location, 'hls_prefer_native': opts.hls_prefer_native, 'external_downloader_args': external_downloader_args, + 'cn_verification_proxy': opts.cn_verification_proxy, } with YoutubeDL(ydl_opts) as ydl: diff --git a/youtube_dl/extractor/letv.py b/youtube_dl/extractor/letv.py index 583ce35b9..fd5fd260e 100644 --- a/youtube_dl/extractor/letv.py +++ b/youtube_dl/extractor/letv.py @@ -7,8 +7,9 @@ import time from .common import InfoExtractor from ..compat import ( - compat_urlparse, compat_urllib_parse, + compat_urllib_request, + compat_urlparse, ) from ..utils import ( determine_ext, @@ -42,9 +43,23 @@ class LetvIE(InfoExtractor): 'expected_warnings': [ 'publish time' ] + }, { + 'note': 'This video is available only in Mainland China, thus a proxy is needed', + 'url': 'http://www.letv.com/ptv/vplay/1118082.html', + 'md5': 'f80936fbe20fb2f58648e81386ff7927', + 'info_dict': { + 'id': '1118082', + 'ext': 'mp4', + 'title': '与龙共舞 完整版', + 'description': 'md5:7506a5eeb1722bb9d4068f85024e3986', + }, + 'expected_warnings': [ + 'publish time' + ], + 'params': { + 'cn_verification_proxy': 'proxy.uku.im:8888' + }, }] - # http://www.letv.com/ptv/vplay/1118082.html - # This video is available only in Mainland China @staticmethod def urshift(val, n): @@ -76,8 +91,14 @@ class LetvIE(InfoExtractor): 'tkey': self.calc_time_key(int(time.time())), 'domain': 'www.letv.com' } + play_json_req = compat_urllib_request.Request( + 'http://api.letv.com/mms/out/video/playJson?' + compat_urllib_parse.urlencode(params) + ) + play_json_req.add_header( + 'Ytdl-Request-Proxy', + self._downloader.params.get('cn_verification_proxy')) play_json = self._download_json( - 'http://api.letv.com/mms/out/video/playJson?' + compat_urllib_parse.urlencode(params), + play_json_req, media_id, 'playJson data') # Check for errors @@ -114,7 +135,8 @@ class LetvIE(InfoExtractor): url_info_dict = { 'url': media_url, - 'ext': determine_ext(dispatch[format_id][1]) + 'ext': determine_ext(dispatch[format_id][1]), + 'format_id': format_id, } if format_id[-1:] == 'p': @@ -123,7 +145,7 @@ class LetvIE(InfoExtractor): urls.append(url_info_dict) publish_time = parse_iso8601(self._html_search_regex( - r'发布时间 ([^<>]+) ', page, 'publish time', fatal=False), + r'发布时间 ([^<>]+) ', page, 'publish time', default=None), delimiter=' ', timezone=datetime.timedelta(hours=8)) description = self._html_search_meta('description', page, fatal=False) diff --git a/youtube_dl/options.py b/youtube_dl/options.py index df2be7b74..a4ca8adc4 100644 --- a/youtube_dl/options.py +++ b/youtube_dl/options.py @@ -195,6 +195,12 @@ def parseOpts(overrideArguments=None): action='store_const', const='::', dest='source_address', help='Make all connections via IPv6 (experimental)', ) + network.add_option( + '--cn-verification-proxy', + dest='cn_verification_proxy', default=None, metavar='URL', + help='Use this proxy to verify the IP address for some Chinese sites. ' + 'The default proxy specified by --proxy (or none, if the options is not present) is used for the actual downloading. (experimental)' + ) selection = optparse.OptionGroup(parser, 'Video Selection') selection.add_option( diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 1d3401bc2..b568288fa 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -1768,3 +1768,13 @@ def match_filter_func(filter_str): video_title = info_dict.get('title', info_dict.get('id', 'video')) return '%s does not pass filter %s, skipping ..' % (video_title, filter_str) return _match_func + + +class PerRequestProxyHandler(compat_urllib_request.ProxyHandler): + def proxy_open(self, req, proxy, type): + req_proxy = req.headers.get('Ytdl-Request-Proxy') + if req_proxy is not None: + proxy = req_proxy + del req.headers['Ytdl-Request-Proxy'] + return compat_urllib_request.ProxyHandler.proxy_open( + self, req, proxy, type) From 07490f8017a83b7607686499074c41212fa0a44f Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Tue, 3 Mar 2015 00:05:05 +0100 Subject: [PATCH 011/118] release 2015.03.03 --- README.md | 411 ++++++++++++------------------------------ youtube_dl/version.py | 2 +- 2 files changed, 119 insertions(+), 294 deletions(-) diff --git a/README.md b/README.md index f2909e8d6..d05549125 100644 --- a/README.md +++ b/README.md @@ -47,211 +47,107 @@ which means you can modify it, redistribute it or use it however you like. # OPTIONS -h, --help print this help text and exit --version print program version and exit - -U, --update update this program to latest version. Make - sure that you have sufficient permissions - (run with sudo if needed) - -i, --ignore-errors continue on download errors, for example to - skip unavailable videos in a playlist - --abort-on-error Abort downloading of further videos (in the - playlist or the command line) if an error - occurs + -U, --update update this program to latest version. Make sure that you have sufficient permissions (run with sudo if needed) + -i, --ignore-errors continue on download errors, for example to skip unavailable videos in a playlist + --abort-on-error Abort downloading of further videos (in the playlist or the command line) if an error occurs --dump-user-agent display the current browser identification - --list-extractors List all supported extractors and the URLs - they would handle - --extractor-descriptions Output descriptions of all supported - extractors - --default-search PREFIX Use this prefix for unqualified URLs. For - example "gvsearch2:" downloads two videos - from google videos for youtube-dl "large - apple". Use the value "auto" to let - youtube-dl guess ("auto_warning" to emit a - warning when guessing). "error" just throws - an error. The default value "fixup_error" - repairs broken URLs, but emits an error if - this is not possible instead of searching. - --ignore-config Do not read configuration files. When given - in the global configuration file /etc - /youtube-dl.conf: Do not read the user - configuration in ~/.config/youtube- - dl/config (%APPDATA%/youtube-dl/config.txt - on Windows) - --flat-playlist Do not extract the videos of a playlist, - only list them. + --list-extractors List all supported extractors and the URLs they would handle + --extractor-descriptions Output descriptions of all supported extractors + --default-search PREFIX Use this prefix for unqualified URLs. For example "gvsearch2:" downloads two videos from google videos for youtube-dl "large apple". + Use the value "auto" to let youtube-dl guess ("auto_warning" to emit a warning when guessing). "error" just throws an error. The + default value "fixup_error" repairs broken URLs, but emits an error if this is not possible instead of searching. + --ignore-config Do not read configuration files. When given in the global configuration file /etc/youtube-dl.conf: Do not read the user configuration + in ~/.config/youtube-dl/config (%APPDATA%/youtube-dl/config.txt on Windows) + --flat-playlist Do not extract the videos of a playlist, only list them. --no-color Do not emit color codes in output. ## Network Options: - --proxy URL Use the specified HTTP/HTTPS proxy. Pass in - an empty string (--proxy "") for direct - connection + --proxy URL Use the specified HTTP/HTTPS proxy. Pass in an empty string (--proxy "") for direct connection --socket-timeout SECONDS Time to wait before giving up, in seconds - --source-address IP Client-side IP address to bind to - (experimental) - -4, --force-ipv4 Make all connections via IPv4 - (experimental) - -6, --force-ipv6 Make all connections via IPv6 - (experimental) + --source-address IP Client-side IP address to bind to (experimental) + -4, --force-ipv4 Make all connections via IPv4 (experimental) + -6, --force-ipv6 Make all connections via IPv6 (experimental) ## Video Selection: --playlist-start NUMBER playlist video to start at (default is 1) --playlist-end NUMBER playlist video to end at (default is last) - --playlist-items ITEM_SPEC playlist video items to download. Specify - indices of the videos in the playlist - seperated by commas like: "--playlist-items - 1,2,5,8" if you want to download videos - indexed 1, 2, 5, 8 in the playlist. You can - specify range: "--playlist-items - 1-3,7,10-13", it will download the videos - at index 1, 2, 3, 7, 10, 11, 12 and 13. - --match-title REGEX download only matching titles (regex or - caseless sub-string) - --reject-title REGEX skip download for matching titles (regex or - caseless sub-string) + --playlist-items ITEM_SPEC playlist video items to download. Specify indices of the videos in the playlist seperated by commas like: "--playlist-items 1,2,5,8" + if you want to download videos indexed 1, 2, 5, 8 in the playlist. You can specify range: "--playlist-items 1-3,7,10-13", it will + download the videos at index 1, 2, 3, 7, 10, 11, 12 and 13. + --match-title REGEX download only matching titles (regex or caseless sub-string) + --reject-title REGEX skip download for matching titles (regex or caseless sub-string) --max-downloads NUMBER Abort after downloading NUMBER files - --min-filesize SIZE Do not download any videos smaller than - SIZE (e.g. 50k or 44.6m) - --max-filesize SIZE Do not download any videos larger than SIZE - (e.g. 50k or 44.6m) + --min-filesize SIZE Do not download any videos smaller than SIZE (e.g. 50k or 44.6m) + --max-filesize SIZE Do not download any videos larger than SIZE (e.g. 50k or 44.6m) --date DATE download only videos uploaded in this date - --datebefore DATE download only videos uploaded on or before - this date (i.e. inclusive) - --dateafter DATE download only videos uploaded on or after - this date (i.e. inclusive) - --min-views COUNT Do not download any videos with less than - COUNT views - --max-views COUNT Do not download any videos with more than - COUNT views - --match-filter FILTER (Experimental) Generic video filter. - Specify any key (see help for -o for a list - of available keys) to match if the key is - present, !key to check if the key is not - present,key > NUMBER (like "comment_count > - 12", also works with >=, <, <=, !=, =) to - compare against a number, and & to require - multiple matches. Values which are not - known are excluded unless you put a - question mark (?) after the operator.For - example, to only match videos that have - been liked more than 100 times and disliked - less than 50 times (or the dislike - functionality is not available at the given - service), but who also have a description, - use --match-filter "like_count > 100 & + --datebefore DATE download only videos uploaded on or before this date (i.e. inclusive) + --dateafter DATE download only videos uploaded on or after this date (i.e. inclusive) + --min-views COUNT Do not download any videos with less than COUNT views + --max-views COUNT Do not download any videos with more than COUNT views + --match-filter FILTER (Experimental) Generic video filter. Specify any key (see help for -o for a list of available keys) to match if the key is present, + !key to check if the key is not present,key > NUMBER (like "comment_count > 12", also works with >=, <, <=, !=, =) to compare against + a number, and & to require multiple matches. Values which are not known are excluded unless you put a question mark (?) after the + operator.For example, to only match videos that have been liked more than 100 times and disliked less than 50 times (or the dislike + functionality is not available at the given service), but who also have a description, use --match-filter "like_count > 100 & dislike_count 10M]"). This works for - filesize, height, width, tbr, abr, vbr, - asr, and fps and the comparisons <, <=, >, - >=, =, != and for ext, acodec, vcodec, - container, and protocol and the comparisons - =, != . Formats for which the value is not - known are excluded unless you put a - question mark (?) after the operator. You - can combine format filters, so -f "[height - <=? 720][tbr>500]" selects up to 720p - videos (or videos where the height is not - known) with a bitrate of at least 500 - KBit/s. By default, youtube-dl will pick - the best quality. Use commas to download - multiple audio formats, such as -f - 136/137/mp4/bestvideo,140/m4a/bestaudio. - You can merge the video and audio of two - formats into a single file using -f + (requires ffmpeg or - avconv), for example -f + -f, --format FORMAT video format code, specify the order of preference using slashes, as in -f 22/17/18 . Instead of format codes, you can select by + extension for the extensions aac, m4a, mp3, mp4, ogg, wav, webm. You can also use the special names "best", "bestvideo", "bestaudio", + "worst". You can filter the video results by putting a condition in brackets, as in -f "best[height=720]" (or -f "[filesize>10M]"). + This works for filesize, height, width, tbr, abr, vbr, asr, and fps and the comparisons <, <=, >, >=, =, != and for ext, acodec, + vcodec, container, and protocol and the comparisons =, != . Formats for which the value is not known are excluded unless you put a + question mark (?) after the operator. You can combine format filters, so -f "[height <=? 720][tbr>500]" selects up to 720p videos + (or videos where the height is not known) with a bitrate of at least 500 KBit/s. By default, youtube-dl will pick the best quality. + Use commas to download multiple audio formats, such as -f 136/137/mp4/bestvideo,140/m4a/bestaudio. You can merge the video and audio + of two formats into a single file using -f + (requires ffmpeg or avconv), for example -f bestvideo+bestaudio. --all-formats download all available video formats - --prefer-free-formats prefer free video formats unless a specific - one is requested + --prefer-free-formats prefer free video formats unless a specific one is requested --max-quality FORMAT highest quality format to download -F, --list-formats list all available formats - --youtube-skip-dash-manifest Do not download the DASH manifest on - YouTube videos - --merge-output-format FORMAT If a merge is required (e.g. - bestvideo+bestaudio), output to given - container format. One of mkv, mp4, ogg, - webm, flv.Ignored if no merge is required + --youtube-skip-dash-manifest Do not download the DASH manifest on YouTube videos + --merge-output-format FORMAT If a merge is required (e.g. bestvideo+bestaudio), output to given container format. One of mkv, mp4, ogg, webm, flv.Ignored if no + merge is required ## Subtitle Options: --write-sub write subtitle file - --write-auto-sub write automatic subtitle file (youtube - only) - --all-subs downloads all the available subtitles of - the video + --write-auto-sub write automatic subtitle file (youtube only) + --all-subs downloads all the available subtitles of the video --list-subs lists all available subtitles for the video - --sub-format FORMAT subtitle format, accepts formats - preference, for example: "ass/srt/best" - --sub-lang LANGS languages of the subtitles to download - (optional) separated by commas, use IETF - language tags like 'en,pt' + --sub-format FORMAT subtitle format, accepts formats preference, for example: "ass/srt/best" + --sub-lang LANGS languages of the subtitles to download (optional) separated by commas, use IETF language tags like 'en,pt' ## Authentication Options: -u, --username USERNAME login with this account ID - -p, --password PASSWORD account password. If this option is left - out, youtube-dl will ask interactively. + -p, --password PASSWORD account password. If this option is left out, youtube-dl will ask interactively. -2, --twofactor TWOFACTOR two-factor auth code -n, --netrc use .netrc authentication data --video-password PASSWORD video password (vimeo, smotri) ## Post-processing Options: - -x, --extract-audio convert video files to audio-only files - (requires ffmpeg or avconv and ffprobe or - avprobe) - --audio-format FORMAT "best", "aac", "vorbis", "mp3", "m4a", - "opus", or "wav"; "best" by default - --audio-quality QUALITY ffmpeg/avconv audio quality specification, - insert a value between 0 (better) and 9 - (worse) for VBR or a specific bitrate like - 128K (default 5) - --recode-video FORMAT Encode the video to another format if - necessary (currently supported: - mp4|flv|ogg|webm|mkv) - -k, --keep-video keeps the video file on disk after the - post-processing; the video is erased by - default - --no-post-overwrites do not overwrite post-processed files; the - post-processed files are overwritten by - default - --embed-subs embed subtitles in the video (only for mp4 - videos) + -x, --extract-audio convert video files to audio-only files (requires ffmpeg or avconv and ffprobe or avprobe) + --audio-format FORMAT "best", "aac", "vorbis", "mp3", "m4a", "opus", or "wav"; "best" by default + --audio-quality QUALITY ffmpeg/avconv audio quality specification, insert a value between 0 (better) and 9 (worse) for VBR or a specific bitrate like 128K + (default 5) + --recode-video FORMAT Encode the video to another format if necessary (currently supported: mp4|flv|ogg|webm|mkv) + -k, --keep-video keeps the video file on disk after the post-processing; the video is erased by default + --no-post-overwrites do not overwrite post-processed files; the post-processed files are overwritten by default + --embed-subs embed subtitles in the video (only for mp4 videos) --embed-thumbnail embed thumbnail in the audio as cover art --add-metadata write metadata to the video file - --xattrs write metadata to the video file's xattrs - (using dublin core and xdg standards) - --fixup POLICY Automatically correct known faults of the - file. One of never (do nothing), warn (only - emit a warning), detect_or_warn(the - default; fix file if we can, warn - otherwise) - --prefer-avconv Prefer avconv over ffmpeg for running the - postprocessors (default) - --prefer-ffmpeg Prefer ffmpeg over avconv for running the - postprocessors - --ffmpeg-location PATH Location of the ffmpeg/avconv binary; - either the path to the binary or its - containing directory. - --exec CMD Execute a command on the file after - downloading, similar to find's -exec - syntax. Example: --exec 'adb push {} - /sdcard/Music/ && rm {}' - --convert-subtitles FORMAT Convert the subtitles to other format - (currently supported: srt|ass|vtt) + --xattrs write metadata to the video file's xattrs (using dublin core and xdg standards) + --fixup POLICY Automatically correct known faults of the file. One of never (do nothing), warn (only emit a warning), detect_or_warn(the default; + fix file if we can, warn otherwise) + --prefer-avconv Prefer avconv over ffmpeg for running the postprocessors (default) + --prefer-ffmpeg Prefer ffmpeg over avconv for running the postprocessors + --ffmpeg-location PATH Location of the ffmpeg/avconv binary; either the path to the binary or its containing directory. + --exec CMD Execute a command on the file after downloading, similar to find's -exec syntax. Example: --exec 'adb push {} /sdcard/Music/ && rm + {}' + --convert-subtitles FORMAT Convert the subtitles to other format (currently supported: srt|ass|vtt) # CONFIGURATION diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 5582348ba..3a49e52c9 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2015.02.28' +__version__ = '2015.03.03' From 499bfcbfd09e85f053d7e8943a8d47fed9349b0e Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Tue, 3 Mar 2015 12:59:17 +0100 Subject: [PATCH 012/118] Make sure netrc works for all extractors with login support Fixes #5112 --- test/test_netrc.py | 26 ++++++++++++++++++++++++++ youtube_dl/extractor/atresplayer.py | 1 + youtube_dl/extractor/crunchyroll.py | 1 + youtube_dl/extractor/gdcvault.py | 1 + youtube_dl/extractor/lynda.py | 1 + youtube_dl/extractor/twitch.py | 1 + 6 files changed, 31 insertions(+) create mode 100644 test/test_netrc.py diff --git a/test/test_netrc.py b/test/test_netrc.py new file mode 100644 index 000000000..7cf3a6a2e --- /dev/null +++ b/test/test_netrc.py @@ -0,0 +1,26 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import os +import sys +import unittest +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + + +from youtube_dl.extractor import ( + gen_extractors, +) + + +class TestNetRc(unittest.TestCase): + def test_netrc_present(self): + for ie in gen_extractors(): + if not hasattr(ie, '_login'): + continue + self.assertTrue( + hasattr(ie, '_NETRC_MACHINE'), + 'Extractor %s supports login, but is missing a _NETRC_MACHINE property' % ie.IE_NAME) + + +if __name__ == '__main__': + unittest.main() diff --git a/youtube_dl/extractor/atresplayer.py b/youtube_dl/extractor/atresplayer.py index 7669e0e3d..29f8795d3 100644 --- a/youtube_dl/extractor/atresplayer.py +++ b/youtube_dl/extractor/atresplayer.py @@ -19,6 +19,7 @@ from ..utils import ( class AtresPlayerIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?atresplayer\.com/television/[^/]+/[^/]+/[^/]+/(?P.+?)_\d+\.html' + _NETRC_MACHINE = 'atresplayer' _TESTS = [ { 'url': 'http://www.atresplayer.com/television/programas/el-club-de-la-comedia/temporada-4/capitulo-10-especial-solidario-nochebuena_2014122100174.html', diff --git a/youtube_dl/extractor/crunchyroll.py b/youtube_dl/extractor/crunchyroll.py index f1da7d09b..e64b88fbc 100644 --- a/youtube_dl/extractor/crunchyroll.py +++ b/youtube_dl/extractor/crunchyroll.py @@ -29,6 +29,7 @@ from ..aes import ( class CrunchyrollIE(InfoExtractor): _VALID_URL = r'https?://(?:(?Pwww|m)\.)?(?Pcrunchyroll\.(?:com|fr)/(?:[^/]*/[^/?&]*?|media/\?id=)(?P[0-9]+))(?:[/?&]|$)' + _NETRC_MACHINE = 'crunchyroll' _TESTS = [{ 'url': 'http://www.crunchyroll.com/wanna-be-the-strongest-in-the-world/episode-1-an-idol-wrestler-is-born-645513', 'info_dict': { diff --git a/youtube_dl/extractor/gdcvault.py b/youtube_dl/extractor/gdcvault.py index f7b467b0a..51796f3a4 100644 --- a/youtube_dl/extractor/gdcvault.py +++ b/youtube_dl/extractor/gdcvault.py @@ -12,6 +12,7 @@ from ..utils import remove_end class GDCVaultIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?gdcvault\.com/play/(?P\d+)/(?P(\w|-)+)' + _NETRC_MACHINE = 'gdcvault' _TESTS = [ { 'url': 'http://www.gdcvault.com/play/1019721/Doki-Doki-Universe-Sweet-Simple', diff --git a/youtube_dl/extractor/lynda.py b/youtube_dl/extractor/lynda.py index 1f02bef44..cfd3b14f4 100644 --- a/youtube_dl/extractor/lynda.py +++ b/youtube_dl/extractor/lynda.py @@ -19,6 +19,7 @@ class LyndaBaseIE(InfoExtractor): _LOGIN_URL = 'https://www.lynda.com/login/login.aspx' _SUCCESSFUL_LOGIN_REGEX = r'isLoggedIn: true' _ACCOUNT_CREDENTIALS_HINT = 'Use --username and --password options to provide lynda.com account credentials.' + _NETRC_MACHINE = 'lynda' def _real_initialize(self): self._login() diff --git a/youtube_dl/extractor/twitch.py b/youtube_dl/extractor/twitch.py index 4b0ce54df..8af136147 100644 --- a/youtube_dl/extractor/twitch.py +++ b/youtube_dl/extractor/twitch.py @@ -23,6 +23,7 @@ class TwitchBaseIE(InfoExtractor): _API_BASE = 'https://api.twitch.tv' _USHER_BASE = 'http://usher.twitch.tv' _LOGIN_URL = 'https://secure.twitch.tv/user/login' + _NETRC_MACHINE = 'twitch' def _handle_error(self, response): if not isinstance(response, dict): From 2461f79d2ad9eee44644f6187e366125a29aa70f Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Tue, 3 Mar 2015 13:56:06 +0100 Subject: [PATCH 013/118] [utils] Correct per-request proxy handling --- youtube_dl/YoutubeDL.py | 3 ++- youtube_dl/extractor/letv.py | 10 ++-------- youtube_dl/utils.py | 15 +++++++++++++-- 3 files changed, 17 insertions(+), 11 deletions(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 915963d96..df2aebb59 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -1771,7 +1771,8 @@ class YoutubeDL(object): https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel) ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel) opener = compat_urllib_request.build_opener( - https_handler, proxy_handler, cookie_processor, ydlh) + proxy_handler, https_handler, cookie_processor, ydlh) + # Delete the default user-agent header, which would otherwise apply in # cases where our custom HTTP handler doesn't come into play # (See https://github.com/rg3/youtube-dl/issues/1309 for details) diff --git a/youtube_dl/extractor/letv.py b/youtube_dl/extractor/letv.py index fd5fd260e..85eee141b 100644 --- a/youtube_dl/extractor/letv.py +++ b/youtube_dl/extractor/letv.py @@ -40,9 +40,6 @@ class LetvIE(InfoExtractor): 'title': '美人天下01', 'description': 'md5:f88573d9d7225ada1359eaf0dbf8bcda', }, - 'expected_warnings': [ - 'publish time' - ] }, { 'note': 'This video is available only in Mainland China, thus a proxy is needed', 'url': 'http://www.letv.com/ptv/vplay/1118082.html', @@ -53,11 +50,8 @@ class LetvIE(InfoExtractor): 'title': '与龙共舞 完整版', 'description': 'md5:7506a5eeb1722bb9d4068f85024e3986', }, - 'expected_warnings': [ - 'publish time' - ], 'params': { - 'cn_verification_proxy': 'proxy.uku.im:8888' + 'cn_verification_proxy': 'http://proxy.uku.im:8888' }, }] @@ -95,7 +89,7 @@ class LetvIE(InfoExtractor): 'http://api.letv.com/mms/out/video/playJson?' + compat_urllib_parse.urlencode(params) ) play_json_req.add_header( - 'Ytdl-Request-Proxy', + 'Ytdl-request-proxy', self._downloader.params.get('cn_verification_proxy')) play_json = self._download_json( play_json_req, diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index b568288fa..7426e2a1f 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -1771,10 +1771,21 @@ def match_filter_func(filter_str): class PerRequestProxyHandler(compat_urllib_request.ProxyHandler): + def __init__(self, proxies=None): + # Set default handlers + for type in ('http', 'https'): + setattr(self, '%s_open' % type, + lambda r, proxy='__noproxy__', type=type, meth=self.proxy_open: + meth(r, proxy, type)) + return compat_urllib_request.ProxyHandler.__init__(self, proxies) + def proxy_open(self, req, proxy, type): - req_proxy = req.headers.get('Ytdl-Request-Proxy') + req_proxy = req.headers.get('Ytdl-request-proxy') if req_proxy is not None: proxy = req_proxy - del req.headers['Ytdl-Request-Proxy'] + del req.headers['Ytdl-request-proxy'] + + if proxy == '__noproxy__': + return None # No Proxy return compat_urllib_request.ProxyHandler.proxy_open( self, req, proxy, type) From 7fde87c77da41f24faf34b37f5d05c029ac23d4f Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Tue, 3 Mar 2015 13:59:38 +0100 Subject: [PATCH 014/118] release 2015.03.03.1 --- README.md | 2 ++ youtube_dl/version.py | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index d05549125..5b9dd2cea 100644 --- a/README.md +++ b/README.md @@ -67,6 +67,8 @@ which means you can modify it, redistribute it or use it however you like. --source-address IP Client-side IP address to bind to (experimental) -4, --force-ipv4 Make all connections via IPv4 (experimental) -6, --force-ipv6 Make all connections via IPv6 (experimental) + --cn-verification-proxy URL Use this proxy to verify the IP address for some Chinese sites. The default proxy specified by --proxy (or none, if the options is + not present) is used for the actual downloading. (experimental) ## Video Selection: --playlist-start NUMBER playlist video to start at (default is 1) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 3a49e52c9..252933993 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2015.03.03' +__version__ = '2015.03.03.1' From e0d0572b731b79393a5c5121acf4a43017160936 Mon Sep 17 00:00:00 2001 From: chaos33 <919946h43k48> Date: Tue, 3 Mar 2015 22:53:05 +0800 Subject: [PATCH 015/118] fix youporn extractor's json search regex --- youtube_dl/extractor/youporn.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/youporn.py b/youtube_dl/extractor/youporn.py index 107c9ac36..a278386d4 100644 --- a/youtube_dl/extractor/youporn.py +++ b/youtube_dl/extractor/youporn.py @@ -47,7 +47,7 @@ class YouPornIE(InfoExtractor): # Get JSON parameters json_params = self._search_regex( - r'var currentVideo = new Video\((.*)\)[,;]', + r'var videoJason = (.*)[,;]', webpage, 'JSON parameters') try: params = json.loads(json_params) From 50c9949d7ae8f76d7a105481e0e692de3448c385 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 3 Mar 2015 21:39:04 +0600 Subject: [PATCH 016/118] [youporn] Imrove JSON regex and preserve the old one --- youtube_dl/extractor/youporn.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/youporn.py b/youtube_dl/extractor/youporn.py index a278386d4..e4c855ee0 100644 --- a/youtube_dl/extractor/youporn.py +++ b/youtube_dl/extractor/youporn.py @@ -47,7 +47,8 @@ class YouPornIE(InfoExtractor): # Get JSON parameters json_params = self._search_regex( - r'var videoJason = (.*)[,;]', + [r'var\s+videoJa?son\s*=\s*({.+?});', + r'var\s+currentVideo\s*=\s*new\s+Video\((.+?)\)[,;]'], webpage, 'JSON parameters') try: params = json.loads(json_params) From 22d362831978ea2b83ea0c21a634237dc2406ab6 Mon Sep 17 00:00:00 2001 From: Naglis Jonaitis Date: Tue, 3 Mar 2015 18:39:23 +0200 Subject: [PATCH 017/118] [tvplay] Adapt _VALID_URL and test case to domain name change --- youtube_dl/extractor/tvplay.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/tvplay.py b/youtube_dl/extractor/tvplay.py index 9a53a3c74..e83e31a31 100644 --- a/youtube_dl/extractor/tvplay.py +++ b/youtube_dl/extractor/tvplay.py @@ -16,6 +16,7 @@ class TVPlayIE(InfoExtractor): _VALID_URL = r'''(?x)http://(?:www\.)? (?:tvplay\.lv/parraides| tv3play\.lt/programos| + play\.tv3\.lt/programos| tv3play\.ee/sisu| tv3play\.se/program| tv6play\.se/program| @@ -45,7 +46,7 @@ class TVPlayIE(InfoExtractor): }, }, { - 'url': 'http://www.tv3play.lt/programos/moterys-meluoja-geriau/409229?autostart=true', + 'url': 'http://play.tv3.lt/programos/moterys-meluoja-geriau/409229?autostart=true', 'info_dict': { 'id': '409229', 'ext': 'flv', From dc570c4951d397ded7535393a9f0972c9cd0bc04 Mon Sep 17 00:00:00 2001 From: Naglis Jonaitis Date: Tue, 3 Mar 2015 18:41:34 +0200 Subject: [PATCH 018/118] [lrt] Pass --realtime to rtmpdump --- youtube_dl/extractor/lrt.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/lrt.py b/youtube_dl/extractor/lrt.py index 9c2fbdd96..e3236f7b5 100644 --- a/youtube_dl/extractor/lrt.py +++ b/youtube_dl/extractor/lrt.py @@ -52,6 +52,7 @@ class LRTIE(InfoExtractor): 'url': data['streamer'], 'play_path': 'mp4:%s' % data['file'], 'preference': -1, + 'rtmp_real_time': True, }) else: formats.extend( From 123397317c9906bf53a23b6b426db0b08c70da32 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Tue, 3 Mar 2015 18:45:56 +0100 Subject: [PATCH 019/118] [downloader/http] Remove wrong '_hook_progress' call (fixes #5117) --- youtube_dl/downloader/http.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/youtube_dl/downloader/http.py b/youtube_dl/downloader/http.py index 2e3dac825..5e9982799 100644 --- a/youtube_dl/downloader/http.py +++ b/youtube_dl/downloader/http.py @@ -218,12 +218,6 @@ class HttpFD(FileDownloader): if tmpfilename != '-': stream.close() - self._hook_progress({ - 'downloaded_bytes': byte_counter, - 'total_bytes': data_len, - 'tmpfilename': tmpfilename, - 'status': 'error', - }) if data_len is not None and byte_counter != data_len: raise ContentTooShortError(byte_counter, int(data_len)) self.try_rename(tmpfilename, filename) From f28fe66970370b5df649aa457e52cf99f6a49905 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Wed, 4 Mar 2015 12:14:38 +0100 Subject: [PATCH 020/118] [downloader/http] Add missing fields for _hook_progress call It would fail if you run 'youtube-dl --no-part URL' a second time when the file has already been downloaded. (Reported in Fedora: https://bugzilla.redhat.com/show_bug.cgi?id=1195779) --- youtube_dl/downloader/http.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/youtube_dl/downloader/http.py b/youtube_dl/downloader/http.py index 5e9982799..4047d7167 100644 --- a/youtube_dl/downloader/http.py +++ b/youtube_dl/downloader/http.py @@ -92,6 +92,8 @@ class HttpFD(FileDownloader): self._hook_progress({ 'filename': filename, 'status': 'finished', + 'downloaded_bytes': resume_len, + 'total_bytes': resume_len, }) return True else: From 12a129ec6dde840c23e8b1e5d7552de93fce7be4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 5 Mar 2015 02:36:53 +0600 Subject: [PATCH 021/118] [playwire] Add extractor --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/playwire.py | 78 ++++++++++++++++++++++++++++++++ 2 files changed, 79 insertions(+) create mode 100644 youtube_dl/extractor/playwire.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index ffcc7d9ab..5ca534cdf 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -364,6 +364,7 @@ from .planetaplay import PlanetaPlayIE from .played import PlayedIE from .playfm import PlayFMIE from .playvid import PlayvidIE +from .playwire import PlaywireIE from .podomatic import PodomaticIE from .pornhd import PornHdIE from .pornhub import ( diff --git a/youtube_dl/extractor/playwire.py b/youtube_dl/extractor/playwire.py new file mode 100644 index 000000000..bdc71017b --- /dev/null +++ b/youtube_dl/extractor/playwire.py @@ -0,0 +1,78 @@ +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + xpath_text, + float_or_none, + int_or_none, +) + + +class PlaywireIE(InfoExtractor): + _VALID_URL = r'https?://(?:config|cdn)\.playwire\.com(?:/v2)?/(?P\d+)/(?:videos/v2|embed|config)/(?P\d+)' + _TESTS = [{ + 'url': 'http://config.playwire.com/14907/videos/v2/3353705/player.json', + 'md5': 'e6398701e3595888125729eaa2329ed9', + 'info_dict': { + 'id': '3353705', + 'ext': 'mp4', + 'title': 'S04_RM_UCL_Rus', + 'thumbnail': 're:^http://.*\.png$', + 'duration': 145.94, + }, + }, { + 'url': 'http://cdn.playwire.com/11625/embed/85228.html', + 'only_matching': True, + }, { + 'url': 'http://config.playwire.com/12421/videos/v2/3389892/zeus.json', + 'only_matching': True, + }, { + 'url': 'http://cdn.playwire.com/v2/12342/config/1532636.json', + 'only_matching': True, + }] + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + publisher_id, video_id = mobj.group('publisher_id'), mobj.group('id') + + player = self._download_json( + 'http://config.playwire.com/%s/videos/v2/%s/zeus.json' % (publisher_id, video_id), + video_id) + + title = player['settings']['title'] + duration = float_or_none(player.get('duration'), 1000) + + content = player['content'] + thumbnail = content.get('poster') + src = content['media']['f4m'] + + f4m = self._download_xml(src, video_id) + base_url = xpath_text(f4m, './{http://ns.adobe.com/f4m/1.0}baseURL', 'base url', fatal=True) + formats = [] + for media in f4m.findall('./{http://ns.adobe.com/f4m/1.0}media'): + media_url = media.get('url') + if not media_url: + continue + tbr = int_or_none(media.get('bitrate')) + width = int_or_none(media.get('width')) + height = int_or_none(media.get('height')) + f = { + 'url': '%s/%s' % (base_url, media.attrib['url']), + 'tbr': tbr, + 'width': width, + 'height': height, + } + if not (tbr or width or height): + f['quality'] = 1 if '-hd.' in media_url else 0 + formats.append(f) + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': title, + 'thumbnail': thumbnail, + 'duration': duration, + 'formats': formats, + } From c2ebea6580481a89c35f9447898b17241ad71ac0 Mon Sep 17 00:00:00 2001 From: Naglis Jonaitis Date: Thu, 5 Mar 2015 14:45:38 +0200 Subject: [PATCH 022/118] [extremetube] Fix extraction (Closes #5127) --- youtube_dl/extractor/extremetube.py | 30 ++++++++++++++++++----------- 1 file changed, 19 insertions(+), 11 deletions(-) diff --git a/youtube_dl/extractor/extremetube.py b/youtube_dl/extractor/extremetube.py index 36ba33128..c826a5404 100644 --- a/youtube_dl/extractor/extremetube.py +++ b/youtube_dl/extractor/extremetube.py @@ -4,11 +4,11 @@ import re from .common import InfoExtractor from ..compat import ( - compat_urllib_parse_urlparse, + compat_parse_qs, compat_urllib_request, - compat_urllib_parse, ) from ..utils import ( + qualities, str_to_int, ) @@ -17,7 +17,7 @@ class ExtremeTubeIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?(?Pextremetube\.com/.*?video/.+?(?P[0-9]+))(?:[/?&]|$)' _TESTS = [{ 'url': 'http://www.extremetube.com/video/music-video-14-british-euro-brit-european-cumshots-swallow-652431', - 'md5': '1fb9228f5e3332ec8c057d6ac36f33e0', + 'md5': '344d0c6d50e2f16b06e49ca011d8ac69', 'info_dict': { 'id': '652431', 'ext': 'mp4', @@ -49,19 +49,27 @@ class ExtremeTubeIE(InfoExtractor): r'Views:\s*\s*([\d,\.]+)', webpage, 'view count', fatal=False)) - video_url = compat_urllib_parse.unquote(self._html_search_regex( - r'video_url=(.+?)&', webpage, 'video_url')) - path = compat_urllib_parse_urlparse(video_url).path - format = path.split('/')[5].split('_')[:2] - format = "-".join(format) + flash_vars = compat_parse_qs(self._search_regex( + r']+?name="flashvars"[^>]+?value="([^"]+)"', webpage, 'flash vars')) + + formats = [] + quality = qualities(['180p', '240p', '360p', '480p', '720p', '1080p']) + for k, vals in flash_vars.items(): + m = re.match(r'quality_(?P[0-9]+p)$', k) + if m is not None: + formats.append({ + 'format_id': m.group('quality'), + 'quality': quality(m.group('quality')), + 'url': vals[0], + }) + + self._sort_formats(formats) return { 'id': video_id, 'title': video_title, + 'formats': formats, 'uploader': uploader, 'view_count': view_count, - 'url': video_url, - 'format': format, - 'format_id': format, 'age_limit': 18, } From 9609f02e3ca87e56fc5571e41040ddbcf2a79a73 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Thu, 5 Mar 2015 22:34:56 +0100 Subject: [PATCH 023/118] [vidme] Modernize --- youtube_dl/extractor/vidme.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/vidme.py b/youtube_dl/extractor/vidme.py index 5c89824c1..339c3d897 100644 --- a/youtube_dl/extractor/vidme.py +++ b/youtube_dl/extractor/vidme.py @@ -1,7 +1,5 @@ from __future__ import unicode_literals -import re - from .common import InfoExtractor from ..utils import ( int_or_none, @@ -28,12 +26,11 @@ class VidmeIE(InfoExtractor): } def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') - + video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - video_url = self._html_search_regex(r' Date: Fri, 6 Mar 2015 10:44:24 +0100 Subject: [PATCH 024/118] [travis] Declare 3.2 (Fixes #5144) --- .travis.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.travis.yml b/.travis.yml index fb34299fc..511bee64c 100644 --- a/.travis.yml +++ b/.travis.yml @@ -2,6 +2,7 @@ language: python python: - "2.6" - "2.7" + - "3.2" - "3.3" - "3.4" before_install: From a0bb7c5593b08339f34dc43d9bc1b199ddc3ff7d Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Fri, 6 Mar 2015 10:49:42 +0100 Subject: [PATCH 025/118] [extractor/common] Improve m3u format IDs (#5143) --- youtube_dl/extractor/common.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index cf39c0c21..8ff76342f 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -883,8 +883,12 @@ class InfoExtractor(object): formats.append({'url': format_url(line)}) continue tbr = int_or_none(last_info.get('BANDWIDTH'), scale=1000) + format_id = last_media.get('NAME') + if not format_id: + format_id = '-'.join(filter(None, [ + m3u8_id, 'm3u8-%d' % (tbr if tbr else len(formats))])) f = { - 'format_id': '-'.join(filter(None, [m3u8_id, 'm3u8-%d' % (tbr if tbr else len(formats))])), + 'format_id': format_id, 'url': format_url(line.strip()), 'tbr': tbr, 'ext': ext, From d0e958c71c8c561d1c3917d114370fcb85972181 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Fri, 6 Mar 2015 10:53:49 +0100 Subject: [PATCH 026/118] [twitch:vod] Prefer source stream (Fixes #5143) --- youtube_dl/extractor/twitch.py | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/twitch.py b/youtube_dl/extractor/twitch.py index 8af136147..b058891bd 100644 --- a/youtube_dl/extractor/twitch.py +++ b/youtube_dl/extractor/twitch.py @@ -85,6 +85,14 @@ class TwitchBaseIE(InfoExtractor): raise ExtractorError( 'Unable to login: %s' % m.group('msg').strip(), expected=True) + def _prefer_source(self, formats): + try: + source = next(f for f in formats if f['format_id'] == 'Source') + source['preference'] = 10 + except StopIteration: + pass # No Source stream present + self._sort_formats(formats) + class TwitchItemBaseIE(TwitchBaseIE): def _download_info(self, item, item_id): @@ -209,6 +217,7 @@ class TwitchVodIE(TwitchItemBaseIE): '%s/vod/%s?nauth=%s&nauthsig=%s' % (self._USHER_BASE, item_id, access_token['token'], access_token['sig']), item_id, 'mp4') + self._prefer_source(formats) info['formats'] = formats return info @@ -357,13 +366,7 @@ class TwitchStreamIE(TwitchBaseIE): '%s/api/channel/hls/%s.m3u8?%s' % (self._USHER_BASE, channel_id, compat_urllib_parse.urlencode(query).encode('utf-8')), channel_id, 'mp4') - - # prefer the 'source' stream, the others are limited to 30 fps - def _sort_source(f): - if f.get('m3u8_media') is not None and f['m3u8_media'].get('NAME') == 'Source': - return 1 - return 0 - formats = sorted(formats, key=_sort_source) + self._prefer_source(formats) view_count = stream.get('viewers') timestamp = parse_iso8601(stream.get('created_at')) From 8dc9d361c22754e6b6b52136d496d754dea65b46 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 6 Mar 2015 22:52:50 +0600 Subject: [PATCH 027/118] [extractor/common] Fix format_id when `last_media` is None and always include `m3u8_id` if present The rationale behind `m3u8_id` was to resolve duplicates when processing several m3u8 playlists within the same media that give equal resulting `format_id`'s, e.g. `youtube-dl http://www.rts.ch/play/tv/passe-moi-les-jumelles/video/la-fee-des-bois-mustang-les-chemins-du-vent?id=3854925 -F` --- youtube_dl/extractor/common.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 8ff76342f..06bce12e0 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -883,12 +883,13 @@ class InfoExtractor(object): formats.append({'url': format_url(line)}) continue tbr = int_or_none(last_info.get('BANDWIDTH'), scale=1000) - format_id = last_media.get('NAME') - if not format_id: - format_id = '-'.join(filter(None, [ - m3u8_id, 'm3u8-%d' % (tbr if tbr else len(formats))])) + format_id = [] + if m3u8_id: + format_id.append(m3u8_id) + last_media_name = last_media.get('NAME') if last_media else None + format_id.append(last_media_name if last_media_name else '%d' % (tbr if tbr else len(formats))) f = { - 'format_id': format_id, + 'format_id': '-'.join(format_id), 'url': format_url(line.strip()), 'tbr': tbr, 'ext': ext, From bd05aa4e24774a9aa9f68a8379900e6b756aae11 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sat, 7 Mar 2015 00:53:52 +0800 Subject: [PATCH 028/118] [Yam] Add an error detection and update test cases --- youtube_dl/extractor/yam.py | 34 ++++++++++++++++++++++++++++------ 1 file changed, 28 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/yam.py b/youtube_dl/extractor/yam.py index b294767c5..19ad74d04 100644 --- a/youtube_dl/extractor/yam.py +++ b/youtube_dl/extractor/yam.py @@ -8,6 +8,7 @@ from ..compat import compat_urlparse from ..utils import ( float_or_none, month_by_abbreviation, + ExtractorError, ) @@ -28,23 +29,44 @@ class YamIE(InfoExtractor): } }, { # An external video hosted on YouTube - 'url': 'http://mymedia.yam.com/m/3598173', - 'md5': '0238ceec479c654e8c2f1223755bf3e9', + 'url': 'http://mymedia.yam.com/m/3599430', + 'md5': '03127cf10d8f35d120a9e8e52e3b17c6', 'info_dict': { - 'id': 'pJ2Deys283c', + 'id': 'CNpEoQlrIgA', 'ext': 'mp4', - 'upload_date': '20150202', + 'upload_date': '20150306', 'uploader': '新莊社大瑜伽社', - 'description': 'md5:f5cc72f0baf259a70fb731654b0d2eff', + 'description': 'md5:11e2e405311633ace874f2e6226c8b17', 'uploader_id': '2323agoy', - 'title': '外婆的澎湖灣KTV-潘安邦', + 'title': '20090412陽明山二子坪-1', } + }, { + 'url': 'http://mymedia.yam.com/m/3598173', + 'info_dict': { + 'id': '3598173', + 'ext': 'mp4', + }, + 'skip': 'cause Yam system error', + }, { + 'url': 'http://mymedia.yam.com/m/3599437', + 'info_dict': { + 'id': '3599437', + 'ext': 'mp4', + }, + 'skip': 'invalid YouTube URL', }] def _real_extract(self, url): video_id = self._match_id(url) page = self._download_webpage(url, video_id) + # Check for errors + system_msg = self._html_search_regex( + r'系統訊息(?:
|\n|\r)*([^<>]+)
', page, 'system message', + default=None) + if system_msg: + raise ExtractorError(system_msg, expected=True) + # Is it hosted externally on YouTube? youtube_url = self._html_search_regex( r' Date: Fri, 6 Mar 2015 22:53:53 +0600 Subject: [PATCH 029/118] [extractor/common] Remove 'm3u8' from quality selection URL --- youtube_dl/extractor/common.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 06bce12e0..f9e8e2bad 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -839,7 +839,7 @@ class InfoExtractor(object): m3u8_id=None): formats = [{ - 'format_id': '-'.join(filter(None, [m3u8_id, 'm3u8-meta'])), + 'format_id': '-'.join(filter(None, [m3u8_id, 'meta'])), 'url': m3u8_url, 'ext': ext, 'protocol': 'm3u8', From 3c6f245083db85d7ba9ae5444561339dc1b02671 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Fri, 6 Mar 2015 18:16:56 +0100 Subject: [PATCH 030/118] [vimeo] Fix upload date extraction --- youtube_dl/extractor/vimeo.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py index 8f540f578..3771769e1 100644 --- a/youtube_dl/extractor/vimeo.py +++ b/youtube_dl/extractor/vimeo.py @@ -20,6 +20,7 @@ from ..utils import ( RegexNotFoundError, smuggle_url, std_headers, + unified_strdate, unsmuggle_url, urlencode_postdata, ) @@ -140,6 +141,7 @@ class VimeoIE(VimeoBaseInfoExtractor): 'description': 'md5:8678b246399b070816b12313e8b4eb5c', 'uploader_id': 'atencio', 'uploader': 'Peter Atencio', + 'upload_date': '20130927', 'duration': 187, }, }, @@ -323,9 +325,9 @@ class VimeoIE(VimeoBaseInfoExtractor): # Extract upload date video_upload_date = None - mobj = re.search(r' Date: Fri, 6 Mar 2015 19:08:27 +0100 Subject: [PATCH 031/118] [vimeo] Fix and use '_verify_video_password' (#5001) It only supports verifying the password over https now. Use it instead of manually setting the 'password' cookie because it allows to check if the password is correct. --- youtube_dl/extractor/vimeo.py | 20 ++++++-------------- 1 file changed, 6 insertions(+), 14 deletions(-) diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py index 3771769e1..61f2d1ee8 100644 --- a/youtube_dl/extractor/vimeo.py +++ b/youtube_dl/extractor/vimeo.py @@ -4,7 +4,6 @@ from __future__ import unicode_literals import json import re import itertools -import hashlib from .common import InfoExtractor from ..compat import ( @@ -178,17 +177,15 @@ class VimeoIE(VimeoBaseInfoExtractor): password = self._downloader.params.get('videopassword', None) if password is None: raise ExtractorError('This video is protected by a password, use the --video-password option', expected=True) - token = self._search_regex(r'xsrft: \'(.*?)\'', webpage, 'login token') - data = compat_urllib_parse.urlencode({ + token = self._search_regex(r'xsrft = \'(.*?)\'', webpage, 'login token') + data = urlencode_postdata({ 'password': password, 'token': token, }) - # I didn't manage to use the password with https - if url.startswith('https'): - pass_url = url.replace('https', 'http') - else: - pass_url = url - password_request = compat_urllib_request.Request(pass_url + '/password', data) + if url.startswith('http://'): + # vimeo only supports https now, but the user can give an http url + url = url.replace('http://', 'https://') + password_request = compat_urllib_request.Request(url + '/password', data) password_request.add_header('Content-Type', 'application/x-www-form-urlencoded') password_request.add_header('Cookie', 'xsrft=%s' % token) return self._download_webpage( @@ -227,11 +224,6 @@ class VimeoIE(VimeoBaseInfoExtractor): if mobj.group('pro') or mobj.group('player'): url = 'http://player.vimeo.com/video/' + video_id - password = self._downloader.params.get('videopassword', None) - if password: - headers['Cookie'] = '%s_password=%s' % ( - video_id, hashlib.md5(password.encode('utf-8')).hexdigest()) - # Retrieve video webpage to extract further information request = compat_urllib_request.Request(url, None, headers) try: From d1508cd68d0a3e470860dc7f225509b2e422d34b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Fri, 6 Mar 2015 22:16:26 +0100 Subject: [PATCH 032/118] [vimeo:album] Fix password protected videos Since it only uses https now, don't recognize http urls. --- youtube_dl/extractor/vimeo.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py index 61f2d1ee8..0d1c92633 100644 --- a/youtube_dl/extractor/vimeo.py +++ b/youtube_dl/extractor/vimeo.py @@ -429,10 +429,10 @@ class VimeoChannelIE(InfoExtractor): name="([^"]+)"\s+ value="([^"]*)" ''', login_form)) - token = self._search_regex(r'xsrft: \'(.*?)\'', webpage, 'login token') + token = self._search_regex(r'xsrft = \'(.*?)\'', webpage, 'login token') fields['token'] = token fields['password'] = password - post = compat_urllib_parse.urlencode(fields) + post = urlencode_postdata(fields) password_path = self._search_regex( r'action="([^"]+)"', login_form, 'password URL') password_url = compat_urlparse.urljoin(page_url, password_path) @@ -494,10 +494,10 @@ class VimeoUserIE(VimeoChannelIE): class VimeoAlbumIE(VimeoChannelIE): IE_NAME = 'vimeo:album' - _VALID_URL = r'https?://vimeo\.com/album/(?P\d+)' + _VALID_URL = r'https://vimeo\.com/album/(?P\d+)' _TITLE_RE = r'