From c7667c2d7f602aecfd8a39f26d8151a363ba0b5e Mon Sep 17 00:00:00 2001 From: SyxbEaEQ2 Date: Thu, 31 Jul 2014 03:08:24 +0200 Subject: [PATCH 0001/1937] [downloader/(common/http)] Changes calculation of the rate-limit. (Fix #2297, fix #2140, fix #595, fix #2370) --- youtube_dl/downloader/common.py | 15 +++++++++------ youtube_dl/downloader/http.py | 31 ++++++++++++++++++++++--------- 2 files changed, 31 insertions(+), 15 deletions(-) diff --git a/youtube_dl/downloader/common.py b/youtube_dl/downloader/common.py index 917f3450e..6404e1928 100644 --- a/youtube_dl/downloader/common.py +++ b/youtube_dl/downloader/common.py @@ -77,8 +77,10 @@ class FileDownloader(object): def calc_eta(start, now, total, current): if total is None: return None + if now is None: + now = time.time() dif = now - start - if current == 0 or dif < 0.001: # One millisecond + if current == 0 or dif < 0.001: # One millisecond return None rate = float(current) / dif return int((float(total) - float(current)) / rate) @@ -92,7 +94,7 @@ class FileDownloader(object): @staticmethod def calc_speed(start, now, bytes): dif = now - start - if bytes == 0 or dif < 0.001: # One millisecond + if bytes == 0 or dif < 0.001: # One millisecond return None return float(bytes) / dif @@ -105,7 +107,7 @@ class FileDownloader(object): @staticmethod def best_block_size(elapsed_time, bytes): new_min = max(bytes / 2.0, 1.0) - new_max = min(max(bytes * 2.0, 1.0), 4194304) # Do not surpass 4 MB + new_max = min(max(bytes * 2.0, 1.0), 4194304) # Do not surpass 4 MB if elapsed_time < 0.001: return int(new_max) rate = bytes / elapsed_time @@ -143,18 +145,19 @@ class FileDownloader(object): def report_error(self, *args, **kargs): self.ydl.report_error(*args, **kargs) - def slow_down(self, start_time, byte_counter): + def slow_down(self, start_time, now, byte_counter): """Sleep if the download speed is over the rate limit.""" rate_limit = self.params.get('ratelimit', None) if rate_limit is None or byte_counter == 0: return - now = time.time() + if now is None: + now = time.time() elapsed = now - start_time if elapsed <= 0.0: return speed = float(byte_counter) / elapsed if speed > rate_limit: - time.sleep((byte_counter - rate_limit * (now - start_time)) / rate_limit) + time.sleep((byte_counter / rate_limit) - elapsed) def temp_name(self, filename): """Returns a temporary filename for the given filename.""" diff --git a/youtube_dl/downloader/http.py b/youtube_dl/downloader/http.py index f79e6a995..462be2739 100644 --- a/youtube_dl/downloader/http.py +++ b/youtube_dl/downloader/http.py @@ -128,16 +128,21 @@ class HttpFD(FileDownloader): byte_counter = 0 + resume_len block_size = self.params.get('buffersize', 1024) start = time.time() + + # measure time over whole while-loop, so slow_down() and best_block_size() work together properly + now = None # needed for slow_down() in the first loop run + before = start # start measuring while True: + # Download and write - before = time.time() data_block = data.read(block_size if not is_test else min(block_size, data_len - byte_counter)) - after = time.time() - if len(data_block) == 0: - break byte_counter += len(data_block) - # Open file just in time + # exit loop when download is finished + if len(data_block) == 0: + break + + # Open destination file just in time if stream is None: try: (stream, tmpfilename) = sanitize_open(tmpfilename, open_mode) @@ -153,11 +158,22 @@ class HttpFD(FileDownloader): self.to_stderr(u"\n") self.report_error(u'unable to write data: %s' % str(err)) return False + + # Apply rate limit + self.slow_down(start, now, byte_counter - resume_len) + + # end measuring of one loop run + now = time.time() + after = now + + # Adjust block size if not self.params.get('noresizebuffer', False): block_size = self.best_block_size(after - before, len(data_block)) + before = after + # Progress message - speed = self.calc_speed(start, time.time(), byte_counter - resume_len) + speed = self.calc_speed(start, now, byte_counter - resume_len) if data_len is None: eta = percent = None else: @@ -178,9 +194,6 @@ class HttpFD(FileDownloader): if is_test and byte_counter == data_len: break - # Apply rate limit - self.slow_down(start, byte_counter - resume_len) - if stream is None: self.to_stderr(u"\n") self.report_error(u'Did not get any data blocks') From 03359e9864bfb925f577fa5b16c3ef22884127aa Mon Sep 17 00:00:00 2001 From: rupertbaxter2 Date: Sun, 3 Aug 2014 07:34:04 -0700 Subject: [PATCH 0002/1937] Added --sleep-interval option --- youtube_dl/__init__.py | 8 ++++++++ youtube_dl/downloader/common.py | 3 +++ youtube_dl/utils.py | 3 +++ 3 files changed, 14 insertions(+) diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 429630ce5..2bd5ec33b 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -351,6 +351,8 @@ def parseOpts(overrideArguments=None): downloader.add_option('-r', '--rate-limit', dest='ratelimit', metavar='LIMIT', help='maximum download rate in bytes per second (e.g. 50K or 4.2M)') + downloader.add_option('--sleep-interval', + dest='sleepinterval', metavar='SLEEPINTERVAL', help='number of seconds to sleep between downloads (default is %default)', default="0") downloader.add_option('-R', '--retries', dest='retries', metavar='RETRIES', help='number of retries (default is %default)', default=10) downloader.add_option('--buffer-size', @@ -671,6 +673,11 @@ def _real_main(argv=None): if numeric_limit is None: parser.error(u'invalid rate limit specified') opts.ratelimit = numeric_limit + if opts.sleepinterval is not None: + try: + opts.sleepinterval = abs(int(opts.sleepinterval)) + except ValueError: + parser.error(u'invalid sleep interval specified') if opts.min_filesize is not None: numeric_limit = FileDownloader.parse_bytes(opts.min_filesize) if numeric_limit is None: @@ -767,6 +774,7 @@ def _real_main(argv=None): 'restrictfilenames': opts.restrictfilenames, 'ignoreerrors': opts.ignoreerrors, 'ratelimit': opts.ratelimit, + 'sleepinterval': opts.sleepinterval, 'nooverwrites': opts.nooverwrites, 'retries': opts.retries, 'buffersize': opts.buffersize, diff --git a/youtube_dl/downloader/common.py b/youtube_dl/downloader/common.py index 917f3450e..8e0e386bf 100644 --- a/youtube_dl/downloader/common.py +++ b/youtube_dl/downloader/common.py @@ -278,6 +278,9 @@ class FileDownloader(object): """Download to a filename using the info from info_dict Return True on success and False otherwise """ + sleep_interval = self.params.get('sleepinterval', 0) + self.to_screen(u'[download] Sleeping %d seconds...' %sleep_interval) + time.sleep(sleep_interval) # Check file already present if self.params.get('continuedl', False) and os.path.isfile(encodeFilename(filename)) and not self.params.get('nopart', False): self.report_file_already_downloaded(filename) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index e40b367c2..d199d26d2 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -6,6 +6,7 @@ import codecs import contextlib import ctypes import datetime +import time import email.utils import errno import getpass @@ -747,6 +748,8 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler): del req.headers['User-agent'] req.headers['User-agent'] = req.headers['Youtubedl-user-agent'] del req.headers['Youtubedl-user-agent'] + #print("sleeping\n") + #time.sleep(1) return req def http_response(self, req, resp): From 2f61fe4cccc1ef4186943f4eed2e89f8fe2e2c23 Mon Sep 17 00:00:00 2001 From: rupertbaxter2 Date: Sun, 3 Aug 2014 07:38:04 -0700 Subject: [PATCH 0003/1937] Removed unneccesary changes to utils.py --- youtube_dl/utils.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index d199d26d2..e40b367c2 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -6,7 +6,6 @@ import codecs import contextlib import ctypes import datetime -import time import email.utils import errno import getpass @@ -748,8 +747,6 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler): del req.headers['User-agent'] req.headers['User-agent'] = req.headers['Youtubedl-user-agent'] del req.headers['Youtubedl-user-agent'] - #print("sleeping\n") - #time.sleep(1) return req def http_response(self, req, resp): From 00cf122d7a79e81a2b328b7352d23eb0bdb17e52 Mon Sep 17 00:00:00 2001 From: SyxbEaEQ2 Date: Wed, 6 Aug 2014 20:53:04 +0200 Subject: [PATCH 0004/1937] [downloader/common] Fix possible negative sleep time in slow_down() --- youtube_dl/downloader/common.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/downloader/common.py b/youtube_dl/downloader/common.py index 6404e1928..33ebbf6b4 100644 --- a/youtube_dl/downloader/common.py +++ b/youtube_dl/downloader/common.py @@ -157,7 +157,7 @@ class FileDownloader(object): return speed = float(byte_counter) / elapsed if speed > rate_limit: - time.sleep((byte_counter / rate_limit) - elapsed) + time.sleep(max((byte_counter / rate_limit) - elapsed, 0)) def temp_name(self, filename): """Returns a temporary filename for the given filename.""" From a42c9215983c4d62d1c000c9dede6e0850dbb5e4 Mon Sep 17 00:00:00 2001 From: rupertbaxter2 Date: Wed, 13 Aug 2014 04:38:40 -0700 Subject: [PATCH 0005/1937] Removed sleep and sleep output when interval is zero --- youtube_dl/downloader/common.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/youtube_dl/downloader/common.py b/youtube_dl/downloader/common.py index 48e829deb..c1da065b5 100644 --- a/youtube_dl/downloader/common.py +++ b/youtube_dl/downloader/common.py @@ -279,8 +279,9 @@ class FileDownloader(object): Return True on success and False otherwise """ sleep_interval = self.params.get('sleepinterval', 0) - self.to_screen(u'[download] Sleeping %d seconds...' %sleep_interval) - time.sleep(sleep_interval) + if sleep_interval > 0: + self.to_screen(u'[download] Sleeping %d seconds...' %sleep_interval) + time.sleep(sleep_interval) # Check file already present if self.params.get('continuedl', False) and os.path.isfile(encodeFilename(filename)) and not self.params.get('nopart', False): self.report_file_already_downloaded(filename) From b8874d4d4ea3becfde813d451d884bce558fe213 Mon Sep 17 00:00:00 2001 From: megustamucho Date: Tue, 9 Sep 2014 12:46:58 +1000 Subject: [PATCH 0006/1937] [tube8] Improved regex --- youtube_dl/extractor/tube8.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/tube8.py b/youtube_dl/extractor/tube8.py index 08a48c05a..39f20c546 100644 --- a/youtube_dl/extractor/tube8.py +++ b/youtube_dl/extractor/tube8.py @@ -14,7 +14,7 @@ from ..aes import aes_decrypt_text class Tube8IE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?tube8\.com/(?:[^/]+/){2}(?P\d+)' + _VALID_URL = r'https?://(?:www\.)?tube8\.com/(?:gay/|shemale/)?(?:[^/]+/){2}(?P\d+)' _TEST = { 'url': 'http://www.tube8.com/teen/kasia-music-video/229795/', 'md5': '44bf12b98313827dd52d35b8706a4ea0', From 94b539d15505daf5213d5c4de7c2fde08b5d2f40 Mon Sep 17 00:00:00 2001 From: megustamucho Date: Tue, 9 Sep 2014 12:46:58 +1000 Subject: [PATCH 0007/1937] [tube8] Improved regex --- youtube_dl/extractor/tube8.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/tube8.py b/youtube_dl/extractor/tube8.py index 08a48c05a..39f20c546 100644 --- a/youtube_dl/extractor/tube8.py +++ b/youtube_dl/extractor/tube8.py @@ -14,7 +14,7 @@ from ..aes import aes_decrypt_text class Tube8IE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?tube8\.com/(?:[^/]+/){2}(?P\d+)' + _VALID_URL = r'https?://(?:www\.)?tube8\.com/(?:gay/|shemale/)?(?:[^/]+/){2}(?P\d+)' _TEST = { 'url': 'http://www.tube8.com/teen/kasia-music-video/229795/', 'md5': '44bf12b98313827dd52d35b8706a4ea0', From 67abbe95273f59f4a04486172e6d422a10b6afb3 Mon Sep 17 00:00:00 2001 From: Naglis Jonaitis Date: Wed, 17 Sep 2014 22:57:01 +0300 Subject: [PATCH 0008/1937] [videomega] Add new extractor. Closes #3775 --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/videomega.py | 59 +++++++++++++++++++++++++++++++ 2 files changed, 60 insertions(+) create mode 100644 youtube_dl/extractor/videomega.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index f715c3310..75831b40a 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -393,6 +393,7 @@ from .videobam import VideoBamIE from .videodetective import VideoDetectiveIE from .videolecturesnet import VideoLecturesNetIE from .videofyme import VideofyMeIE +from .videomega import VideoMegaIE from .videopremium import VideoPremiumIE from .videott import VideoTtIE from .videoweed import VideoWeedIE diff --git a/youtube_dl/extractor/videomega.py b/youtube_dl/extractor/videomega.py new file mode 100644 index 000000000..1b6b65839 --- /dev/null +++ b/youtube_dl/extractor/videomega.py @@ -0,0 +1,59 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + compat_urllib_parse, + remove_start, +) + + +class VideoMegaIE(InfoExtractor): + _VALID_URL = r'''(?x)https?:// + (?:www\.)?videomega\.tv/ + (?:iframe\.php)?\?ref=(?P[A-Za-z0-9]+) + ''' + _TEST = { + 'url': 'http://videomega.tv/?ref=GKeGPVedBe', + 'md5': '240fb5bcf9199961f48eb17839b084d6', + 'info_dict': { + 'id': 'GKeGPVedBe', + 'ext': 'mp4', + 'title': 'XXL - All Sports United', + 'thumbnail': 're:^https?://.*\.jpg$', + } + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + + url = 'http://videomega.tv/iframe.php?ref={0:}'.format(video_id) + webpage = self._download_webpage(url, video_id) + + escaped_data = self._search_regex( + 'unescape\("([^"]+)"\)', webpage, 'escaped data') + playlist = compat_urllib_parse.unquote(escaped_data) + + thumbnail = self._search_regex( + r'image:\s*"([^"]+)"', playlist, 'thumbnail', fatal=False) + url = self._search_regex(r'file:\s*"([^"]+)"', playlist, 'URL') + title = self._html_search_regex( + r'(.*?)', webpage, 'title') + if title: + title = remove_start(title, 'VideoMega.tv - ') + + formats = [] + formats.append({ + 'format_id': 'sd', + 'url': url, + }) + + return { + 'id': video_id, + 'title': title, + 'formats': formats, + 'thumbnail': thumbnail, + } From 0e59b9fffb12255a16577dca7710b7738feca75c Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Thu, 18 Sep 2014 00:18:27 +0200 Subject: [PATCH 0009/1937] [videomega] Simplify (#3786) * Use raw strings (r'foo') for regular expressions (enables highlighting and avoids some errors). * title is always true-ish --- youtube_dl/extractor/videomega.py | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/youtube_dl/extractor/videomega.py b/youtube_dl/extractor/videomega.py index 1b6b65839..29c4e0101 100644 --- a/youtube_dl/extractor/videomega.py +++ b/youtube_dl/extractor/videomega.py @@ -34,22 +34,20 @@ class VideoMegaIE(InfoExtractor): webpage = self._download_webpage(url, video_id) escaped_data = self._search_regex( - 'unescape\("([^"]+)"\)', webpage, 'escaped data') + r'unescape\("([^"]+)"\)', webpage, 'escaped data') playlist = compat_urllib_parse.unquote(escaped_data) thumbnail = self._search_regex( r'image:\s*"([^"]+)"', playlist, 'thumbnail', fatal=False) url = self._search_regex(r'file:\s*"([^"]+)"', playlist, 'URL') - title = self._html_search_regex( - r'(.*?)', webpage, 'title') - if title: - title = remove_start(title, 'VideoMega.tv - ') + title = remove_start(self._html_search_regex( + r'(.*?)', webpage, 'title'), 'VideoMega.tv - ') - formats = [] - formats.append({ + formats = [{ 'format_id': 'sd', 'url': url, - }) + }] + self._sort_formats(formats) return { 'id': video_id, From 9296738f20c1335498a78c99a86767e9bae4f6d2 Mon Sep 17 00:00:00 2001 From: dequis Date: Thu, 18 Sep 2014 03:02:03 -0300 Subject: [PATCH 0010/1937] [soundcloud] Support api urls with secret_token, Closes #3707 --- youtube_dl/extractor/soundcloud.py | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/soundcloud.py b/youtube_dl/extractor/soundcloud.py index b78aed7f0..129f587ec 100644 --- a/youtube_dl/extractor/soundcloud.py +++ b/youtube_dl/extractor/soundcloud.py @@ -31,7 +31,8 @@ class SoundcloudIE(InfoExtractor): (?!sets/|likes/?(?:$|[?#])) (?P[\w\d-]+)/? (?P<token>[^?]+?)?(?:[?].*)?$) - |(?:api\.soundcloud\.com/tracks/(?P<track_id>\d+)) + |(?:api\.soundcloud\.com/tracks/(?P<track_id>\d+) + (?:/?\?secret_token=(?P<secret_token>[^&]+?))?$) |(?P<player>(?:w|player|p.)\.soundcloud\.com/player/?.*?url=.*) ) ''' @@ -80,6 +81,20 @@ class SoundcloudIE(InfoExtractor): 'duration': 9, }, }, + # private link (alt format) + { + 'url': 'https://api.soundcloud.com/tracks/123998367?secret_token=s-8Pjrp', + 'md5': 'aa0dd32bfea9b0c5ef4f02aacd080604', + 'info_dict': { + 'id': '123998367', + 'ext': 'mp3', + 'title': 'Youtube - Dl Test Video \'\' Ä↭', + 'uploader': 'jaimeMF', + 'description': 'test chars: \"\'/\\ä↭', + 'upload_date': '20131209', + 'duration': 9, + }, + }, # downloadable song { 'url': 'https://soundcloud.com/oddsamples/bus-brakes', @@ -197,6 +212,9 @@ class SoundcloudIE(InfoExtractor): if track_id is not None: info_json_url = 'http://api.soundcloud.com/tracks/' + track_id + '.json?client_id=' + self._CLIENT_ID full_title = track_id + token = mobj.group('secret_token') + if token: + info_json_url += "&secret_token=" + token elif mobj.group('player'): query = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query) return self.url_result(query['url'][0]) From 2f834e938192a61fd4a32fa98bffb5e1b614bc29 Mon Sep 17 00:00:00 2001 From: dequis <dx@dxzone.com.ar> Date: Thu, 18 Sep 2014 06:35:11 -0300 Subject: [PATCH 0011/1937] [soundcloud] Secret playlists and sets Closes #3707 again. No test cases because I don't know what urls to use that won't be turned into public eventually (as it happened with the first one in that ticket) --- youtube_dl/extractor/soundcloud.py | 30 +++++++++++++++++++++--------- 1 file changed, 21 insertions(+), 9 deletions(-) diff --git a/youtube_dl/extractor/soundcloud.py b/youtube_dl/extractor/soundcloud.py index 129f587ec..2bed3c350 100644 --- a/youtube_dl/extractor/soundcloud.py +++ b/youtube_dl/extractor/soundcloud.py @@ -238,7 +238,7 @@ class SoundcloudIE(InfoExtractor): class SoundcloudSetIE(SoundcloudIE): - _VALID_URL = r'https?://(?:www\.)?soundcloud\.com/([\w\d-]+)/sets/([\w\d-]+)' + _VALID_URL = r'https?://(?:www\.)?soundcloud\.com/(?P<uploader>[\w\d-]+)/sets/(?P<slug_title>[\w\d-]+)(?:/(?P<token>[^?/]+))?' IE_NAME = 'soundcloud:set' _TESTS = [{ 'url': 'https://soundcloud.com/the-concept-band/sets/the-royal-concept-ep', @@ -252,14 +252,19 @@ class SoundcloudSetIE(SoundcloudIE): mobj = re.match(self._VALID_URL, url) # extract uploader (which is in the url) - uploader = mobj.group(1) + uploader = mobj.group('uploader') # extract simple title (uploader + slug of song title) - slug_title = mobj.group(2) + slug_title = mobj.group('slug_title') full_title = '%s/sets/%s' % (uploader, slug_title) + url = 'http://soundcloud.com/%s/sets/%s' % (uploader, slug_title) + + token = mobj.group('token') + if token: + full_title += '/' + token + url += '/' + token self.report_resolve(full_title) - url = 'http://soundcloud.com/%s/sets/%s' % (uploader, slug_title) resolv_url = self._resolv_url(url) info = self._download_json(resolv_url, full_title) @@ -270,7 +275,7 @@ class SoundcloudSetIE(SoundcloudIE): return { '_type': 'playlist', - 'entries': [self._extract_info_dict(track) for track in info['tracks']], + 'entries': [self._extract_info_dict(track, secret_token=token) for track in info['tracks']], 'id': info['id'], 'title': info['title'], } @@ -333,7 +338,7 @@ class SoundcloudUserIE(SoundcloudIE): class SoundcloudPlaylistIE(SoundcloudIE): - _VALID_URL = r'https?://api\.soundcloud\.com/playlists/(?P<id>[0-9]+)' + _VALID_URL = r'https?://api\.soundcloud\.com/playlists/(?P<id>[0-9]+)(?:/?\?secret_token=(?P<token>[^&]+?))$' IE_NAME = 'soundcloud:playlist' _TESTS = [ @@ -353,14 +358,21 @@ class SoundcloudPlaylistIE(SoundcloudIE): playlist_id = mobj.group('id') base_url = '%s//api.soundcloud.com/playlists/%s.json?' % (self.http_scheme(), playlist_id) - data = compat_urllib_parse.urlencode({ + data_dict = { 'client_id': self._CLIENT_ID, - }) + } + token = mobj.group('token') + + if token: + data_dict['secret_token'] = token + + data = compat_urllib_parse.urlencode(data_dict) data = self._download_json( base_url + data, playlist_id, 'Downloading playlist') entries = [ - self._extract_info_dict(t, quiet=True) for t in data['tracks']] + self._extract_info_dict(t, quiet=True, secret_token=token) + for t in data['tracks']] return { '_type': 'playlist', From 2914e5f00f6ebcc59712b7091a87988408ff3c88 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 18 Sep 2014 20:56:54 +0700 Subject: [PATCH 0012/1937] [drtuber] Fix categories --- youtube_dl/extractor/drtuber.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/drtuber.py b/youtube_dl/extractor/drtuber.py index d5bfd7f22..ca274dff6 100644 --- a/youtube_dl/extractor/drtuber.py +++ b/youtube_dl/extractor/drtuber.py @@ -19,7 +19,7 @@ class DrTuberIE(InfoExtractor): 'like_count': int, 'dislike_count': int, 'comment_count': int, - 'categories': list, # NSFW + 'categories': ['Babe', 'Blonde', 'Erotic', 'Outdoor', 'Softcore', 'Solo'], 'thumbnail': 're:https?://.*\.jpg$', 'age_limit': 18, } @@ -52,9 +52,9 @@ class DrTuberIE(InfoExtractor): r'<span class="comments_count">([\d,\.]+)</span>', webpage, 'comment count', fatal=False)) - cats_str = self._html_search_regex( - r'<meta name="keywords" content="([^"]+)"', webpage, 'categories', fatal=False) - categories = None if cats_str is None else cats_str.split(' ') + cats_str = self._search_regex( + r'<span>Categories:</span><div>(.+?)</div>', webpage, 'categories', fatal=False) + categories = [] if not cats_str else re.findall(r'<a title="([^"]+)"', cats_str) return { 'id': video_id, From 109a540e7a4c5741fa77b68b4f346f42dc1cda97 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Thu, 18 Sep 2014 16:57:34 +0200 Subject: [PATCH 0013/1937] [ign] Fix extraction --- youtube_dl/extractor/ign.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/ign.py b/youtube_dl/extractor/ign.py index 9e8b69f57..ac7804ad9 100644 --- a/youtube_dl/extractor/ign.py +++ b/youtube_dl/extractor/ign.py @@ -71,6 +71,7 @@ class IGNIE(InfoExtractor): def _find_video_id(self, webpage): res_id = [ + r'"video_id"\s*:\s*"(.*?)"', r'data-video-id="(.+?)"', r'<object id="vid_(.+?)"', r'<meta name="og:image" content=".*/(.+?)-(.+?)/.+.jpg"', @@ -85,7 +86,7 @@ class IGNIE(InfoExtractor): webpage = self._download_webpage(url, name_or_id) if page_type != 'video': multiple_urls = re.findall( - '<param name="flashvars" value="[^"]*?url=(https?://www\.ign\.com/videos/.*?)["&]', + '<param name="flashvars"[^>]*value="[^"]*?url=(https?://www\.ign\.com/videos/.*?)["&]', webpage) if multiple_urls: return [self.url_result(u, ie='IGN') for u in multiple_urls] From 09b23c902b5ab4a4ca9607128128d110a3c41875 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Thu, 18 Sep 2014 17:02:53 +0200 Subject: [PATCH 0014/1937] [1up.com] Urls end now with '.html' --- youtube_dl/extractor/ign.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/ign.py b/youtube_dl/extractor/ign.py index ac7804ad9..12e9e61c4 100644 --- a/youtube_dl/extractor/ign.py +++ b/youtube_dl/extractor/ign.py @@ -112,13 +112,13 @@ class IGNIE(InfoExtractor): class OneUPIE(IGNIE): - _VALID_URL = r'https?://gamevideos\.1up\.com/(?P<type>video)/id/(?P<name_or_id>.+)' + _VALID_URL = r'https?://gamevideos\.1up\.com/(?P<type>video)/id/(?P<name_or_id>.+)\.html' IE_NAME = '1up.com' _DESCRIPTION_RE = r'<div id="vid_summary">(.+?)</div>' _TESTS = [{ - 'url': 'http://gamevideos.1up.com/video/id/34976', + 'url': 'http://gamevideos.1up.com/video/id/34976.html', 'md5': '68a54ce4ebc772e4b71e3123d413163d', 'info_dict': { 'id': '34976', From e2e5dae64da60c37af65c7cffd18475a30fcbad3 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Thu, 18 Sep 2014 18:40:19 +0200 Subject: [PATCH 0015/1937] Add -f m4a --- youtube_dl/YoutubeDL.py | 2 +- youtube_dl/options.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 9519594c9..eaba40bf2 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -708,7 +708,7 @@ class YoutubeDL(object): if video_formats: return video_formats[0] else: - extensions = ['mp4', 'flv', 'webm', '3gp'] + extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a'] if format_spec in extensions: filter_f = lambda f: f['ext'] == format_spec else: diff --git a/youtube_dl/options.py b/youtube_dl/options.py index 31baab469..7df20ae61 100644 --- a/youtube_dl/options.py +++ b/youtube_dl/options.py @@ -218,7 +218,7 @@ def parseOpts(overrideArguments=None): video_format.add_option('-f', '--format', action='store', dest='format', metavar='FORMAT', default=None, - help='video format code, specify the order of preference using slashes: "-f 22/17/18". "-f mp4" and "-f flv" are also supported. You can also use the special names "best", "bestvideo", "bestaudio", "worst", "worstvideo" and "worstaudio". By default, youtube-dl will pick the best quality.') + help='video format code, specify the order of preference using slashes: -f 22/17/18 . -f mp4 , -f m4a and -f flv are also supported. You can also use the special names "best", "bestvideo", "bestaudio", "worst", "worstvideo" and "worstaudio". By default, youtube-dl will pick the best quality.') video_format.add_option('--all-formats', action='store_const', dest='format', help='download all available video formats', const='all') video_format.add_option('--prefer-free-formats', From 1de33fafd94c7e0d4ccede711ef7f13bd3e2301b Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Thu, 18 Sep 2014 18:43:49 +0200 Subject: [PATCH 0016/1937] [YoutubeDL] Allow downloading multiple formats with , --- youtube_dl/YoutubeDL.py | 43 +++++++++++++++++++++-------------------- youtube_dl/options.py | 2 +- 2 files changed, 23 insertions(+), 22 deletions(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index eaba40bf2..a1713dc5a 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -809,28 +809,29 @@ class YoutubeDL(object): if req_format in ('-1', 'all'): formats_to_download = formats else: - # We can accept formats requested in the format: 34/5/best, we pick - # the first that is available, starting from left - req_formats = req_format.split('/') - for rf in req_formats: - if re.match(r'.+?\+.+?', rf) is not None: - # Two formats have been requested like '137+139' - format_1, format_2 = rf.split('+') - formats_info = (self.select_format(format_1, formats), - self.select_format(format_2, formats)) - if all(formats_info): - selected_format = { - 'requested_formats': formats_info, - 'format': rf, - 'ext': formats_info[0]['ext'], - } + for rfstr in req_format.split(','): + # We can accept formats requested in the format: 34/5/best, we pick + # the first that is available, starting from left + req_formats = rfstr.split('/') + for rf in req_formats: + if re.match(r'.+?\+.+?', rf) is not None: + # Two formats have been requested like '137+139' + format_1, format_2 = rf.split('+') + formats_info = (self.select_format(format_1, formats), + self.select_format(format_2, formats)) + if all(formats_info): + selected_format = { + 'requested_formats': formats_info, + 'format': rf, + 'ext': formats_info[0]['ext'], + } + else: + selected_format = None else: - selected_format = None - else: - selected_format = self.select_format(rf, formats) - if selected_format is not None: - formats_to_download = [selected_format] - break + selected_format = self.select_format(rf, formats) + if selected_format is not None: + formats_to_download.append(selected_format) + break if not formats_to_download: raise ExtractorError('requested format not available', expected=True) diff --git a/youtube_dl/options.py b/youtube_dl/options.py index 7df20ae61..44dcb1e34 100644 --- a/youtube_dl/options.py +++ b/youtube_dl/options.py @@ -218,7 +218,7 @@ def parseOpts(overrideArguments=None): video_format.add_option('-f', '--format', action='store', dest='format', metavar='FORMAT', default=None, - help='video format code, specify the order of preference using slashes: -f 22/17/18 . -f mp4 , -f m4a and -f flv are also supported. You can also use the special names "best", "bestvideo", "bestaudio", "worst", "worstvideo" and "worstaudio". By default, youtube-dl will pick the best quality.') + help='video format code, specify the order of preference using slashes: -f 22/17/18 . -f mp4 , -f m4a and -f flv are also supported. You can also use the special names "best", "bestvideo", "bestaudio", "worst", "worstvideo" and "worstaudio". By default, youtube-dl will pick the best quality. Use commas to download multiple audio formats, such as -f 136/137/mp4/bestvideo,140/m4a/bestaudio') video_format.add_option('--all-formats', action='store_const', dest='format', help='download all available video formats', const='all') video_format.add_option('--prefer-free-formats', From fd78a4d3e63f191e0774584d9b71bf25a2d8dbcf Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Thu, 18 Sep 2014 18:43:59 +0200 Subject: [PATCH 0017/1937] release 2014.09.18 --- README.md | 15 +++++++++------ youtube_dl/version.py | 2 +- 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index 5cc959ac5..5d15decb5 100644 --- a/README.md +++ b/README.md @@ -227,12 +227,15 @@ which means you can modify it, redistribute it or use it however you like. ## Video Format Options: -f, --format FORMAT video format code, specify the order of - preference using slashes: "-f 22/17/18". - "-f mp4" and "-f flv" are also supported. - You can also use the special names "best", - "bestvideo", "bestaudio", "worst", - "worstvideo" and "worstaudio". By default, - youtube-dl will pick the best quality. + preference using slashes: -f 22/17/18 . -f + mp4 , -f m4a and -f flv are also + supported. You can also use the special + names "best", "bestvideo", "bestaudio", + "worst", "worstvideo" and "worstaudio". By + default, youtube-dl will pick the best + quality. Use commas to download multiple + audio formats, such as -f + 136/137/mp4/bestvideo,140/m4a/bestaudio --all-formats download all available video formats --prefer-free-formats prefer free video formats unless a specific one is requested diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 23892a8bd..430509ba3 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,2 +1,2 @@ -__version__ = '2014.09.16.1' +__version__ = '2014.09.18' From 0529eef5a4513d8f3c042f09fe5485e1c41e2f08 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Thu, 18 Sep 2014 18:54:03 +0200 Subject: [PATCH 0018/1937] [hypestat] Unify allmyvideos and vidspot (Closes #3788) --- youtube_dl/extractor/__init__.py | 2 +- .../extractor/{allmyvideos.py => hypestat.py} | 18 +++++++++++++----- 2 files changed, 14 insertions(+), 6 deletions(-) rename youtube_dl/extractor/{allmyvideos.py => hypestat.py} (77%) diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 75831b40a..97693018f 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -6,7 +6,6 @@ from .aftonbladet import AftonbladetIE from .anitube import AnitubeIE from .anysex import AnySexIE from .aol import AolIE -from .allmyvideos import AllmyvideosIE from .allocine import AllocineIE from .aparat import AparatIE from .appletrailers import AppleTrailersIE @@ -151,6 +150,7 @@ from .howcast import HowcastIE from .howstuffworks import HowStuffWorksIE from .huffpost import HuffPostIE from .hypem import HypemIE +from .hypestat import HypestatIE from .iconosquare import IconosquareIE from .ign import IGNIE, OneUPIE from .imdb import ( diff --git a/youtube_dl/extractor/allmyvideos.py b/youtube_dl/extractor/hypestat.py similarity index 77% rename from youtube_dl/extractor/allmyvideos.py rename to youtube_dl/extractor/hypestat.py index e6c60e7e4..8b8db30ae 100644 --- a/youtube_dl/extractor/allmyvideos.py +++ b/youtube_dl/extractor/hypestat.py @@ -11,11 +11,11 @@ from ..utils import ( ) -class AllmyvideosIE(InfoExtractor): - IE_NAME = 'allmyvideos.net' - _VALID_URL = r'https?://allmyvideos\.net/(?P<id>[a-zA-Z0-9_-]+)' +class HypestatIE(InfoExtractor): + IE_DESC = 'allmyvideos.net and vidspot.net' + _VALID_URL = r'https?://(?:allmyvideos|vidspot)\.net/(?P<id>[a-zA-Z0-9_-]+)' - _TEST = { + _TESTS = [{ 'url': 'http://allmyvideos.net/jih3nce3x6wn', 'md5': '710883dee1bfc370ecf9fa6a89307c88', 'info_dict': { @@ -23,7 +23,15 @@ class AllmyvideosIE(InfoExtractor): 'ext': 'mp4', 'title': 'youtube-dl test video', }, - } + }, { + 'url': 'http://vidspot.net/l2ngsmhs8ci5', + 'md5': '710883dee1bfc370ecf9fa6a89307c88', + 'info_dict': { + 'id': 'l2ngsmhs8ci5', + 'ext': 'mp4', + 'title': 'youtube-dl test video', + }, + }] def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) From 37bfe8ace4dcd1b476a54aedb7f39b88e7bb527e Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Thu, 18 Sep 2014 18:56:02 +0200 Subject: [PATCH 0019/1937] [hypestat] Match URLs with www. and https:// --- youtube_dl/extractor/hypestat.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/hypestat.py b/youtube_dl/extractor/hypestat.py index 8b8db30ae..e1a142268 100644 --- a/youtube_dl/extractor/hypestat.py +++ b/youtube_dl/extractor/hypestat.py @@ -13,7 +13,7 @@ from ..utils import ( class HypestatIE(InfoExtractor): IE_DESC = 'allmyvideos.net and vidspot.net' - _VALID_URL = r'https?://(?:allmyvideos|vidspot)\.net/(?P<id>[a-zA-Z0-9_-]+)' + _VALID_URL = r'https?://(?:www\.)?(?:allmyvideos|vidspot)\.net/(?P<id>[a-zA-Z0-9_-]+)' _TESTS = [{ 'url': 'http://allmyvideos.net/jih3nce3x6wn', @@ -31,6 +31,9 @@ class HypestatIE(InfoExtractor): 'ext': 'mp4', 'title': 'youtube-dl test video', }, + }, { + 'url': 'https://www.vidspot.net/l2ngsmhs8ci5', + 'only_matching': True, }] def _real_extract(self, url): From 46f74bcf5c5fc876e3a966408cb8bde6d6ef15e0 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Thu, 18 Sep 2014 18:57:04 +0200 Subject: [PATCH 0020/1937] [soundcloud] Fix non-secret playlists --- youtube_dl/extractor/soundcloud.py | 23 ++++++++++------------- 1 file changed, 10 insertions(+), 13 deletions(-) diff --git a/youtube_dl/extractor/soundcloud.py b/youtube_dl/extractor/soundcloud.py index 2bed3c350..4719ba45c 100644 --- a/youtube_dl/extractor/soundcloud.py +++ b/youtube_dl/extractor/soundcloud.py @@ -338,20 +338,17 @@ class SoundcloudUserIE(SoundcloudIE): class SoundcloudPlaylistIE(SoundcloudIE): - _VALID_URL = r'https?://api\.soundcloud\.com/playlists/(?P<id>[0-9]+)(?:/?\?secret_token=(?P<token>[^&]+?))$' + _VALID_URL = r'https?://api\.soundcloud\.com/playlists/(?P<id>[0-9]+)(?:/?\?secret_token=(?P<token>[^&]+?))?$' IE_NAME = 'soundcloud:playlist' - _TESTS = [ - - { - 'url': 'http://api.soundcloud.com/playlists/4110309', - 'info_dict': { - 'id': '4110309', - 'title': 'TILT Brass - Bowery Poetry Club, August \'03 [Non-Site SCR 02]', - 'description': 're:.*?TILT Brass - Bowery Poetry Club', - }, - 'playlist_count': 6, - } - ] + _TESTS = [{ + 'url': 'http://api.soundcloud.com/playlists/4110309', + 'info_dict': { + 'id': '4110309', + 'title': 'TILT Brass - Bowery Poetry Club, August \'03 [Non-Site SCR 02]', + 'description': 're:.*?TILT Brass - Bowery Poetry Club', + }, + 'playlist_count': 6, + }] def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) From 589d3d7c7ae18875060caa15f5547c0194932e55 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Thu, 18 Sep 2014 21:37:09 +0200 Subject: [PATCH 0021/1937] [moniker] rename from hypestat (#3788) --- youtube_dl/extractor/__init__.py | 2 +- youtube_dl/extractor/{hypestat.py => moniker.py} | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) rename youtube_dl/extractor/{hypestat.py => moniker.py} (98%) diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 97693018f..a9a33c40f 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -150,7 +150,6 @@ from .howcast import HowcastIE from .howstuffworks import HowStuffWorksIE from .huffpost import HuffPostIE from .hypem import HypemIE -from .hypestat import HypestatIE from .iconosquare import IconosquareIE from .ign import IGNIE, OneUPIE from .imdb import ( @@ -209,6 +208,7 @@ from .mpora import MporaIE from .moevideo import MoeVideoIE from .mofosex import MofosexIE from .mojvideo import MojvideoIE +from .moniker import MonikerIE from .mooshare import MooshareIE from .morningstar import MorningstarIE from .motherless import MotherlessIE diff --git a/youtube_dl/extractor/hypestat.py b/youtube_dl/extractor/moniker.py similarity index 98% rename from youtube_dl/extractor/hypestat.py rename to youtube_dl/extractor/moniker.py index e1a142268..79bb2ca59 100644 --- a/youtube_dl/extractor/hypestat.py +++ b/youtube_dl/extractor/moniker.py @@ -11,7 +11,7 @@ from ..utils import ( ) -class HypestatIE(InfoExtractor): +class MonikerIE(InfoExtractor): IE_DESC = 'allmyvideos.net and vidspot.net' _VALID_URL = r'https?://(?:www\.)?(?:allmyvideos|vidspot)\.net/(?P<id>[a-zA-Z0-9_-]+)' From 7267bd536fb81cb1bdcc6554219a0b66a75b31a6 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Fri, 19 Sep 2014 09:57:53 +0200 Subject: [PATCH 0022/1937] [muenchentv] Add support (Fixes #3507) --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/common.py | 2 + youtube_dl/extractor/muenchentv.py | 77 ++++++++++++++++++++++++++++++ 3 files changed, 80 insertions(+) create mode 100644 youtube_dl/extractor/muenchentv.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index a9a33c40f..625666acb 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -221,6 +221,7 @@ from .mtv import ( MTVServicesEmbeddedIE, MTVIggyIE, ) +from .muenchentv import MuenchenTVIE from .musicplayon import MusicPlayOnIE from .musicvault import MusicVaultIE from .muzu import MuzuTVIE diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 929dd1e97..9c30a1d33 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -130,6 +130,8 @@ class InfoExtractor(object): by YoutubeDL if it's missing) categories: A list of categories that the video falls in, for example ["Sports", "Berlin"] + is_live: True, False, or None (=unknown). Whether this video is a + live stream that goes on instead of a fixed-length video. Unless mentioned otherwise, the fields should be Unicode strings. diff --git a/youtube_dl/extractor/muenchentv.py b/youtube_dl/extractor/muenchentv.py new file mode 100644 index 000000000..3a938861b --- /dev/null +++ b/youtube_dl/extractor/muenchentv.py @@ -0,0 +1,77 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import datetime +import json + +from .common import InfoExtractor +from ..utils import ( + determine_ext, + int_or_none, + js_to_json, +) + + +class MuenchenTVIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?muenchen\.tv/livestream' + IE_DESC = 'münchen.tv' + _TEST = { + 'url': 'http://www.muenchen.tv/livestream/', + 'info_dict': { + 'id': '5334', + 'display_id': 'live', + 'ext': 'mp4', + 'title': 're:^münchen.tv-Livestream [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', + 'is_live': True, + }, + 'params': { + 'skip_download': True, + } + } + + def _real_extract(self, url): + display_id = 'live' + webpage = self._download_webpage(url, display_id) + + now = datetime.datetime.now() + now_str = now.strftime("%Y-%m-%d %H:%M") + title = self._og_search_title(webpage) + ' ' + now_str + + data_js = self._search_regex( + r'(?s)\nplaylist:\s*(\[.*?}\]),related:', + webpage, 'playlist configuration') + data_json = js_to_json(data_js) + data = json.loads(data_json)[0] + + video_id = data['mediaid'] + thumbnail = data.get('image') + + formats = [] + for format_num, s in enumerate(data['sources']): + ext = determine_ext(s['file'], None) + label_str = s.get('label') + if label_str is None: + label_str = '_%d' % format_num + + if ext is None: + format_id = label_str + else: + format_id = '%s-%s' % (ext, label_str) + + formats.append({ + 'url': s['file'], + 'tbr': int_or_none(s.get('label')), + 'ext': 'mp4', + 'format_id': format_id, + 'preference': -100 if '.smil' in s['file'] else 0, + }) + self._sort_formats(formats) + + return { + 'id': video_id, + 'display_id': display_id, + 'title': title, + 'formats': formats, + 'is_live': True, + } + From f566d9f1d54a61497a17c5ed62a32ee1387483bd Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Fri, 19 Sep 2014 09:58:01 +0200 Subject: [PATCH 0023/1937] release 2014.09.19 --- youtube_dl/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 430509ba3..940e9c8cf 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,2 +1,2 @@ -__version__ = '2014.09.18' +__version__ = '2014.09.19' From 532f5bff70cc32f54f38fbce9233a88faf4423b9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 19 Sep 2014 20:58:50 +0700 Subject: [PATCH 0024/1937] [franceinter] Fix extraction and modernize --- youtube_dl/extractor/franceinter.py | 28 +++++++++++++++++++++------- 1 file changed, 21 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/franceinter.py b/youtube_dl/extractor/franceinter.py index deb1b0b9d..6613ee17a 100644 --- a/youtube_dl/extractor/franceinter.py +++ b/youtube_dl/extractor/franceinter.py @@ -4,16 +4,21 @@ from __future__ import unicode_literals import re from .common import InfoExtractor +from ..utils import int_or_none class FranceInterIE(InfoExtractor): - _VALID_URL = r'http://(?:www\.)?franceinter\.fr/player/reecouter\?play=(?P<id>[0-9]{6})' + _VALID_URL = r'http://(?:www\.)?franceinter\.fr/player/reecouter\?play=(?P<id>[0-9]+)' _TEST = { 'url': 'http://www.franceinter.fr/player/reecouter?play=793962', - 'file': '793962.mp3', 'md5': '4764932e466e6f6c79c317d2e74f6884', "info_dict": { - "title": "L’Histoire dans les jeux vidéo", + 'id': '793962', + 'ext': 'mp3', + 'title': 'L’Histoire dans les jeux vidéo', + 'description': 'md5:7e93ddb4451e7530022792240a3049c7', + 'timestamp': 1387369800, + 'upload_date': '20131218', }, } @@ -22,17 +27,26 @@ class FranceInterIE(InfoExtractor): video_id = mobj.group('id') webpage = self._download_webpage(url, video_id) - title = self._html_search_regex( - r'<span class="roll_overflow">(.*?)</span></h1>', webpage, 'title') + path = self._search_regex( - r'&urlAOD=(.*?)&startTime', webpage, 'video url') + r'<a id="player".+?href="([^"]+)"', webpage, 'video url') video_url = 'http://www.franceinter.fr/' + path + title = self._html_search_regex( + r'<span class="title">(.+?)</span>', webpage, 'title') + description = self._html_search_regex( + r'<span class="description">(.*?)</span>', + webpage, 'description', fatal=False) + timestamp = int_or_none(self._search_regex( + r'data-date="(\d+)"', webpage, 'upload date', fatal=False)) + return { 'id': video_id, + 'title': title, + 'description': description, + 'timestamp': timestamp, 'formats': [{ 'url': video_url, 'vcodec': 'none', }], - 'title': title, } From 5aa38e75b27b428b67f9f7083c44051881c98fd8 Mon Sep 17 00:00:00 2001 From: Carlos Ramos <carlos.ramos1@alu.uclm.es> Date: Fri, 19 Sep 2014 22:46:57 +0200 Subject: [PATCH 0025/1937] [played] Add new extractor --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/played.py | 57 ++++++++++++++++++++++++++++++++ 2 files changed, 58 insertions(+) create mode 100644 youtube_dl/extractor/played.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 625666acb..9ee3f9190 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -266,6 +266,7 @@ from .parliamentliveuk import ParliamentLiveUKIE from .patreon import PatreonIE from .pbs import PBSIE from .photobucket import PhotobucketIE +from .played import PlayedIE from .playfm import PlayFMIE from .playvid import PlayvidIE from .podomatic import PodomaticIE diff --git a/youtube_dl/extractor/played.py b/youtube_dl/extractor/played.py new file mode 100644 index 000000000..a396e62e5 --- /dev/null +++ b/youtube_dl/extractor/played.py @@ -0,0 +1,57 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re +import time +import os.path + +from .common import InfoExtractor +from ..utils import ( + compat_urllib_parse, + compat_urllib_request, +) + + +class PlayedIE(InfoExtractor): + IE_NAME = 'played.to' + _VALID_URL = r'https?://played\.to/(?P<id>[a-zA-Z0-9_-]+)' + + _TEST = { + 'url': 'http://played.to/j2f2sfiiukgt', + 'md5': 'c2bd75a368e82980e7257bf500c00637', + 'info_dict': { + 'id': 'j2f2sfiiukgt', + 'ext': 'flv', + 'title': 'youtube-dl_test_video.mp4', + }, + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + + orig_webpage = self._download_webpage(url, video_id) + fields = re.findall(r'type="hidden" name="(.+?)"\s* value="?(.+?)">', orig_webpage) + data = dict(fields) + + self.to_screen('%s: Waiting for timeout' % video_id) + time.sleep(2) + + post = compat_urllib_parse.urlencode(data) + headers = { + b'Content-Type': b'application/x-www-form-urlencoded', + } + req = compat_urllib_request.Request(url, post, headers) + webpage = self._download_webpage( + req, video_id, note='Downloading video page ...') + + title = os.path.splitext(data['fname'])[0] + + video_url = self._search_regex( + r'file: "?(.+?)",', webpage, 'video URL') + + return { + 'id': video_id, + 'title': title, + 'url': video_url, + } \ No newline at end of file From 746c67d72f760f2805dbc125e5a3863aa0d569e3 Mon Sep 17 00:00:00 2001 From: Naglis Jonaitis <njonaitis@gmail.com> Date: Sat, 20 Sep 2014 03:02:11 +0300 Subject: [PATCH 0026/1937] [wistia] Use API and make more generic --- youtube_dl/extractor/generic.py | 23 +++++++++++++++++++++++ youtube_dl/extractor/wistia.py | 15 +++++++++------ 2 files changed, 32 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 40eeaad16..2d77f604a 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -382,6 +382,19 @@ class GenericIE(InfoExtractor): 'thumbnail': 're:^https?://.*\.jpg$', }, }, + # Wistia embed + { + 'url': 'http://education-portal.com/academy/lesson/north-american-exploration-failed-colonies-of-spain-france-england.html#lesson', + 'md5': '8788b683c777a5cf25621eaf286d0c23', + 'info_dict': { + 'id': '1cfaf6b7ea', + 'ext': 'mov', + 'title': 'md5:51364a8d3d009997ba99656004b5e20d', + 'duration': 643.0, + 'filesize': 182808282, + 'uploader': 'education-portal.com', + }, + }, ] def report_download_webpage(self, video_id): @@ -654,6 +667,16 @@ class GenericIE(InfoExtractor): 'title': video_title, 'id': video_id, } + match = re.search(r'(?:id=["\']wistia_|data-wistiaid=["\']|Wistia\.embed\(["\'])(?P<id>[^"\']+)', webpage) + if match: + return { + '_type': 'url_transparent', + 'url': 'http://fast.wistia.net/embed/iframe/{0:}'.format(match.group('id')), + 'ie_key': 'Wistia', + 'uploader': video_uploader, + 'title': video_title, + 'id': match.group('id') + } # Look for embedded blip.tv player mobj = re.search(r'<meta\s[^>]*https?://api\.blip\.tv/\w+/redirect/\w+/(\d+)', webpage) diff --git a/youtube_dl/extractor/wistia.py b/youtube_dl/extractor/wistia.py index e6bfa9e14..748443f81 100644 --- a/youtube_dl/extractor/wistia.py +++ b/youtube_dl/extractor/wistia.py @@ -1,13 +1,14 @@ from __future__ import unicode_literals -import json import re from .common import InfoExtractor +from ..utils import ExtractorError, compat_urllib_request class WistiaIE(InfoExtractor): _VALID_URL = r'https?://(?:fast\.)?wistia\.net/embed/iframe/(?P<id>[a-z0-9]+)' + _API_URL = 'http://fast.wistia.com/embed/medias/{0:}.json' _TEST = { 'url': 'http://fast.wistia.net/embed/iframe/sh7fpupwlt', @@ -24,11 +25,13 @@ class WistiaIE(InfoExtractor): mobj = re.match(self._VALID_URL, url) video_id = mobj.group('id') - webpage = self._download_webpage(url, video_id) - data_json = self._html_search_regex( - r'Wistia\.iframeInit\((.*?), {}\);', webpage, 'video data') - - data = json.loads(data_json) + request = compat_urllib_request.Request(self._API_URL.format(video_id)) + request.add_header('Referer', url) # Some videos require this. + data_json = self._download_json(request, video_id) + if data_json.get('error'): + raise ExtractorError('Error while getting the playlist', + expected=True) + data = data_json['media'] formats = [] thumbnails = [] From 3e8fcd9fa1ae23ee3f0370dd948411a5f74c03dc Mon Sep 17 00:00:00 2001 From: Marco Schuster <marco+github@m-s-d.eu> Date: Sat, 20 Sep 2014 02:32:41 +0200 Subject: [PATCH 0027/1937] [divxstage] added .to TLD Example video "http://www.divxstage.eu/video/930c52709d2" which gets redirected to .to TLD --- youtube_dl/extractor/divxstage.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/divxstage.py b/youtube_dl/extractor/divxstage.py index 4ca3f37a2..b88379e06 100644 --- a/youtube_dl/extractor/divxstage.py +++ b/youtube_dl/extractor/divxstage.py @@ -7,7 +7,7 @@ class DivxStageIE(NovaMovIE): IE_NAME = 'divxstage' IE_DESC = 'DivxStage' - _VALID_URL = NovaMovIE._VALID_URL_TEMPLATE % {'host': 'divxstage\.(?:eu|net|ch|co|at|ag)'} + _VALID_URL = NovaMovIE._VALID_URL_TEMPLATE % {'host': 'divxstage\.(?:eu|net|ch|co|at|ag|to)'} _HOST = 'www.divxstage.eu' @@ -24,4 +24,4 @@ class DivxStageIE(NovaMovIE): 'title': 'youtubedl test video', 'description': 'This is a test video for youtubedl.', } - } \ No newline at end of file + } From 752297631ffd9a51535e650f4444a36f820f01f4 Mon Sep 17 00:00:00 2001 From: Sergey M <dstftw@gmail.com> Date: Sun, 21 Sep 2014 06:20:42 +0700 Subject: [PATCH 0028/1937] [noco] Adapt to API v1.1 (Closes #3797) --- youtube_dl/extractor/noco.py | 73 ++++++++++++++++++++++++++---------- 1 file changed, 53 insertions(+), 20 deletions(-) diff --git a/youtube_dl/extractor/noco.py b/youtube_dl/extractor/noco.py index 959fdf590..e3ec9ed15 100644 --- a/youtube_dl/extractor/noco.py +++ b/youtube_dl/extractor/noco.py @@ -2,6 +2,8 @@ from __future__ import unicode_literals import re +import time +import hashlib from .common import InfoExtractor from ..utils import ( @@ -17,6 +19,7 @@ from ..utils import ( class NocoIE(InfoExtractor): _VALID_URL = r'http://(?:(?:www\.)?noco\.tv/emission/|player\.noco\.tv/\?idvideo=)(?P<id>\d+)' _LOGIN_URL = 'http://noco.tv/do.php' + _API_URL_TEMPLATE = 'https://api.noco.tv/1.1/%s?ts=%s&tk=%s' _NETRC_MACHINE = 'noco' _TEST = { @@ -55,33 +58,52 @@ class NocoIE(InfoExtractor): login = self._download_json(request, None, 'Logging in as %s' % username) if 'erreur' in login: - raise ExtractorError('Unable to login: %s' % clean_html(login['erreur']), expected=True) + raise ExtractorError('Unable to login: %s' % clean_html(login['erreur']), expected=True) + + def _call_api(self, path, video_id, note): + ts = compat_str(int(time.time() * 1000)) + tk = hashlib.md5(hashlib.md5(ts).hexdigest() + '#8S?uCraTedap6a').hexdigest() + url = self._API_URL_TEMPLATE % (path, ts, tk) + + resp = self._download_json(url, video_id, note) + + if isinstance(resp, dict) and resp.get('error'): + self._raise_error(resp['error'], resp['description']) + + return resp + + def _raise_error(self, error, description): + raise ExtractorError( + '%s returned error: %s - %s' % (self.IE_NAME, error, description), + expected=True) def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) video_id = mobj.group('id') - medias = self._download_json( - 'https://api.noco.tv/1.0/video/medias/%s' % video_id, video_id, 'Downloading video JSON') + medias = self._call_api( + 'shows/%s/medias' % video_id, + video_id, 'Downloading video JSON') + + qualities = self._call_api( + 'qualities', + video_id, 'Downloading qualities JSON') formats = [] - for fmt in medias['fr']['video_list']['default']['quality_list']: - format_id = fmt['quality_key'] + for format_id, fmt in medias['fr']['video_list']['none']['quality_list'].items(): - file = self._download_json( - 'https://api.noco.tv/1.0/video/file/%s/fr/%s' % (format_id.lower(), video_id), + video = self._call_api( + 'shows/%s/video/%s/fr' % (video_id, format_id.lower()), video_id, 'Downloading %s video JSON' % format_id) - file_url = file['file'] + file_url = video['file'] if not file_url: continue - if file_url == 'forbidden': - raise ExtractorError( - '%s returned error: %s - %s' % ( - self.IE_NAME, file['popmessage']['title'], file['popmessage']['message']), - expected=True) + if file_url in ['forbidden', 'not found']: + popmessage = video['popmessage'] + self._raise_error(popmessage['title'], popmessage['message']) formats.append({ 'url': file_url, @@ -91,20 +113,31 @@ class NocoIE(InfoExtractor): 'abr': fmt['audiobitrate'], 'vbr': fmt['videobitrate'], 'filesize': fmt['filesize'], - 'format_note': fmt['quality_name'], - 'preference': fmt['priority'], + 'format_note': qualities[format_id]['quality_name'], + 'preference': qualities[format_id]['priority'], }) self._sort_formats(formats) - show = self._download_json( - 'https://api.noco.tv/1.0/shows/show/%s' % video_id, video_id, 'Downloading show JSON')[0] + show = self._call_api( + 'shows/by_id/%s' % video_id, + video_id, 'Downloading show JSON')[0] - upload_date = unified_strdate(show['indexed']) + upload_date = unified_strdate(show['online_date_start_utc']) uploader = show['partner_name'] uploader_id = show['partner_key'] duration = show['duration_ms'] / 1000.0 - thumbnail = show['screenshot'] + + thumbnails = [] + for thumbnail_key, thumbnail_url in show.items(): + m = re.search(r'^screenshot_(?P<width>\d+)x(?P<height>\d+)$', thumbnail_key) + if not m: + continue + thumbnails.append({ + 'url': thumbnail_url, + 'width': int(m.group('width')), + 'height': int(m.group('height')), + }) episode = show.get('show_TT') or show.get('show_OT') family = show.get('family_TT') or show.get('family_OT') @@ -124,7 +157,7 @@ class NocoIE(InfoExtractor): 'id': video_id, 'title': title, 'description': description, - 'thumbnail': thumbnail, + 'thumbnails': thumbnails, 'upload_date': upload_date, 'uploader': uploader, 'uploader_id': uploader_id, From 58e7071a2ced491a6ecd8a8bcb1b4533a2b0fd8f Mon Sep 17 00:00:00 2001 From: Sergey M <dstftw@gmail.com> Date: Sun, 21 Sep 2014 06:37:11 +0700 Subject: [PATCH 0029/1937] [tube8] Improve _VALID_URL and add display_id --- youtube_dl/extractor/tube8.py | 37 ++++++++++++++++++++++------------- 1 file changed, 23 insertions(+), 14 deletions(-) diff --git a/youtube_dl/extractor/tube8.py b/youtube_dl/extractor/tube8.py index 39f20c546..64a1e9030 100644 --- a/youtube_dl/extractor/tube8.py +++ b/youtube_dl/extractor/tube8.py @@ -14,27 +14,35 @@ from ..aes import aes_decrypt_text class Tube8IE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?tube8\.com/(?:gay/|shemale/)?(?:[^/]+/){2}(?P<id>\d+)' - _TEST = { - 'url': 'http://www.tube8.com/teen/kasia-music-video/229795/', - 'md5': '44bf12b98313827dd52d35b8706a4ea0', - 'info_dict': { - 'id': '229795', - 'ext': 'mp4', - 'description': 'hot teen Kasia grinding', - 'uploader': 'unknown', - 'title': 'Kasia music video', - 'age_limit': 18, - } - } + _VALID_URL = r'https?://(?:www\.)?tube8\.com/(?:[^/]+/)+(?P<display_id>[^/]+)/(?P<id>\d+)' + _TESTS = [ + { + 'url': 'http://www.tube8.com/teen/kasia-music-video/229795/', + 'md5': '44bf12b98313827dd52d35b8706a4ea0', + 'info_dict': { + 'id': '229795', + 'display_id': 'kasia-music-video', + 'ext': 'mp4', + 'description': 'hot teen Kasia grinding', + 'uploader': 'unknown', + 'title': 'Kasia music video', + 'age_limit': 18, + } + }, + { + 'url': 'http://www.tube8.com/shemale/teen/blonde-cd-gets-kidnapped-by-two-blacks-and-punished-for-being-a-slutty-girl/19569151/', + 'only_matching': True, + }, + ] def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) video_id = mobj.group('id') + display_id = mobj.group('display_id') req = compat_urllib_request.Request(url) req.add_header('Cookie', 'age_verified=1') - webpage = self._download_webpage(req, video_id) + webpage = self._download_webpage(req, display_id) flashvars = json.loads(self._html_search_regex( r'var flashvars\s*=\s*({.+?})', webpage, 'flashvars')) @@ -70,6 +78,7 @@ class Tube8IE(InfoExtractor): return { 'id': video_id, + 'display_id': display_id, 'url': video_url, 'title': title, 'description': description, From 522c55b7f2622b6138a2781db362d822b4fed32d Mon Sep 17 00:00:00 2001 From: Naglis Jonaitis <njonaitis@gmail.com> Date: Sun, 21 Sep 2014 03:26:12 +0300 Subject: [PATCH 0030/1937] [mgoon] Add new extractor --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/mgoon.py | 87 ++++++++++++++++++++++++++++++++ 2 files changed, 88 insertions(+) create mode 100644 youtube_dl/extractor/mgoon.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 625666acb..fb546eeae 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -199,6 +199,7 @@ from .malemotion import MalemotionIE from .mdr import MDRIE from .metacafe import MetacafeIE from .metacritic import MetacriticIE +from .mgoon import MgoonIE from .ministrygrid import MinistryGridIE from .mit import TechTVMITIE, MITIE, OCWMITIE from .mitele import MiTeleIE diff --git a/youtube_dl/extractor/mgoon.py b/youtube_dl/extractor/mgoon.py new file mode 100644 index 000000000..94bc87b00 --- /dev/null +++ b/youtube_dl/extractor/mgoon.py @@ -0,0 +1,87 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + ExtractorError, + qualities, + unified_strdate, +) + + +class MgoonIE(InfoExtractor): + _VALID_URL = r'''(?x)https?://(?:www\.)? + (?:(:?m\.)?mgoon\.com/(?:ch/(?:.+)/v|play/view)| + video\.mgoon\.com)/(?P<id>[0-9]+)''' + _API_URL = 'http://mpos.mgoon.com/player/video?id={0:}' + _TESTS = [ + { + 'url': 'http://m.mgoon.com/ch/hi6618/v/5582148', + 'md5': 'dd46bb66ab35cf6d51cc812fd82da79d', + 'info_dict': { + 'id': '5582148', + 'uploader_id': 'hi6618', + 'duration': 240.419, + 'upload_date': '20131220', + 'ext': 'mp4', + 'title': 'md5:543aa4c27a4931d371c3f433e8cebebc', + 'thumbnail': 're:^https?://.*\.jpg$', + } + }, + { + 'url': 'http://www.mgoon.com/play/view/5582148', + 'only_matching': True, + }, + { + 'url': 'http://video.mgoon.com/5582148', + 'only_matching': True, + }, + ] + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + + data = self._download_json(self._API_URL.format(video_id), video_id) + + if data.get('errorInfo', {}).get('code') != 'NONE': + raise ExtractorError('%s encountered an error: %s' % ( + self.IE_NAME, data['errorInfo']['message']), expected=True) + + v_info = data['videoInfo'] + title = v_info.get('v_title') + thumbnail = v_info.get('v_thumbnail') + duration = v_info.get('v_duration') + upload_date = unified_strdate(v_info.get('v_reg_date')) + uploader_id = data.get('userInfo', {}).get('u_alias') + if duration: + duration /= 1000.0 + + age_limit = None + if data.get('accessInfo', {}).get('code') == 'VIDEO_STATUS_ADULT': + age_limit = 18 + + formats = [] + get_quality = qualities(['360p', '480p', '720p', '1080p']) + for fmt in data['videoFiles']: + formats.append({ + 'format_id': fmt['label'], + 'quality': get_quality(fmt['label']), + 'url': fmt['url'], + 'ext': fmt['format'], + + }) + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': title, + 'formats': formats, + 'thumbnail': thumbnail, + 'duration': duration, + 'upload_date': upload_date, + 'uploader_id': uploader_id, + 'age_limit': age_limit, + } From 72e450c5550ae26b5b36216be1c001f64479c773 Mon Sep 17 00:00:00 2001 From: Anton Larionov <diffident.cat@gmail.com> Date: Sun, 21 Sep 2014 13:21:29 +0400 Subject: [PATCH 0031/1937] [thvideo] Add support for THVideo --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/thvideo.py | 55 ++++++++++++++++++++++++++++++++ 2 files changed, 56 insertions(+) create mode 100644 youtube_dl/extractor/thvideo.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index fb546eeae..ae5b4e9e6 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -362,6 +362,7 @@ from .thisav import ThisAVIE from .tinypic import TinyPicIE from .tlc import TlcIE, TlcDeIE from .tnaflix import TNAFlixIE +from .thvideo import THVideoIE from .toutv import TouTvIE from .toypics import ToypicsUserIE, ToypicsIE from .traileraddict import TrailerAddictIE diff --git a/youtube_dl/extractor/thvideo.py b/youtube_dl/extractor/thvideo.py new file mode 100644 index 000000000..9fa14d3c4 --- /dev/null +++ b/youtube_dl/extractor/thvideo.py @@ -0,0 +1,55 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + unified_strdate +) + + +class THVideoIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?thvideo\.tv/v/th(?P<id>[0-9]+)' + _TEST = { + 'url': 'http://thvideo.tv/v/th1987/', + 'md5': 'fa107b1f73817e325e9433505a70db50', + 'info_dict': { + 'id': '1987', + 'ext': 'mp4', + 'title': '【动画】秘封活动记录 ~ The Sealed Esoteric History.分镜稿预览', + 'display_id': 'th1987', + 'thumbnail': 'http://thvideo.tv/uploadfile/2014/0722/20140722013459856.jpg', + 'description': '社团京都幻想剧团的第一个东方二次同人动画作品「秘封活动记录 ~ The Sealed Esoteric History.」 本视频是该动画第一期的分镜草稿...', + 'upload_date': '20140722' + } + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + + # extract download link from mobile player page + webpage_player = self._download_webpage('http://thvideo.tv/mobile.php?cid=%s-0' % video_id, video_id) + video_url = self._html_search_regex(r'<source src="(.*?)" type', webpage_player, 'video url') + + # extract video info from main page + webpage = self._download_webpage(url, video_id) + title = self._og_search_title(webpage) + display_id = 'th%s' % video_id + thumbnail = self._og_search_thumbnail(webpage) + description = self._og_search_description(webpage) + upload_date_raw = self._html_search_regex(r'span itemprop="datePublished" content="(.*?)">', webpage, + 'upload date', fatal=False) + upload_date = unified_strdate(upload_date_raw) + + return { + 'id': video_id, + 'ext': 'mp4', + 'url': video_url, + 'title': title, + 'display_id': display_id, + 'thumbnail': thumbnail, + 'description': description, + 'upload_date': upload_date + } \ No newline at end of file From 7bd4b4229a126a9f47035beec8a13eff08804850 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Sun, 21 Sep 2014 13:40:22 +0200 Subject: [PATCH 0032/1937] [dropbox] Recognize 'https://www.dropbox.com/sh/*' urls (fixes #3795) And extract the title from the url last path component. --- youtube_dl/extractor/dropbox.py | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/dropbox.py b/youtube_dl/extractor/dropbox.py index 1e1763abf..817a9bd61 100644 --- a/youtube_dl/extractor/dropbox.py +++ b/youtube_dl/extractor/dropbox.py @@ -5,24 +5,29 @@ import os.path import re from .common import InfoExtractor -from ..utils import compat_urllib_parse_unquote +from ..utils import compat_urllib_parse_unquote, url_basename class DropboxIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?dropbox[.]com/s/(?P<id>[a-zA-Z0-9]{15})/(?P<title>[^?#]*)' - _TEST = { + _VALID_URL = r'https?://(?:www\.)?dropbox[.]com/sh?/(?P<id>[a-zA-Z0-9]{15})/.*' + _TESTS = [{ 'url': 'https://www.dropbox.com/s/nelirfsxnmcfbfh/youtube-dl%20test%20video%20%27%C3%A4%22BaW_jenozKc.mp4?dl=0', 'info_dict': { 'id': 'nelirfsxnmcfbfh', 'ext': 'mp4', 'title': 'youtube-dl test video \'ä"BaW_jenozKc' } - } + }, + { + 'url': 'https://www.dropbox.com/sh/662glsejgzoj9sr/AAByil3FGH9KFNZ13e08eSa1a/Pregame%20Ceremony%20Program%20PA%2020140518.m4v', + 'only_matching': True, + }, + ] def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) video_id = mobj.group('id') - fn = compat_urllib_parse_unquote(mobj.group('title')) + fn = compat_urllib_parse_unquote(url_basename(url)) title = os.path.splitext(fn)[0] video_url = ( re.sub(r'[?&]dl=0', '', url) + From b28c8403b2c1ef51f04520e8116176b1fee12dcb Mon Sep 17 00:00:00 2001 From: Naglis Jonaitis <njonaitis@gmail.com> Date: Sun, 21 Sep 2014 15:13:35 +0300 Subject: [PATCH 0033/1937] [yourupload] Add new extractor. Fixes #3085 --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/yourupload.py | 58 ++++++++++++++++++++++++++++++ 2 files changed, 59 insertions(+) create mode 100644 youtube_dl/extractor/yourupload.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index fb546eeae..1a6033320 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -448,6 +448,7 @@ from .yahoo import ( from .youjizz import YouJizzIE from .youku import YoukuIE from .youporn import YouPornIE +from .yourupload import YourUploadIE from .youtube import ( YoutubeIE, YoutubeChannelIE, diff --git a/youtube_dl/extractor/yourupload.py b/youtube_dl/extractor/yourupload.py new file mode 100644 index 000000000..40fc4165f --- /dev/null +++ b/youtube_dl/extractor/yourupload.py @@ -0,0 +1,58 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor + + +class YourUploadIE(InfoExtractor): + _VALID_URL = r'''(?x)https?://(?:www\.)? + (?:yourupload\.com/watch| + embed\.yourupload\.com| + embed\.yucache\.net + )/(?P<id>[A-Za-z0-9]+) + ''' + _TESTS = [ + { + 'url': 'http://yourupload.com/watch/14i14h', + 'md5': 'bf5c2f95c4c917536e80936af7bc51e1', + 'info_dict': { + 'id': '14i14h', + 'ext': 'mp4', + 'title': 'BigBuckBunny_320x180.mp4', + 'thumbnail': 're:^https?://.*\.jpe?g', + } + }, + { + 'url': 'http://embed.yourupload.com/14i14h', + 'only_matching': True, + }, + { + 'url': 'http://embed.yucache.net/14i14h?client_file_id=803349', + 'only_matching': True, + }, + ] + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + + url = 'http://embed.yucache.net/{0:}'.format(video_id) + webpage = self._download_webpage(url, video_id) + + title = self._og_search_title(webpage) + thumbnail = self._og_search_thumbnail(webpage) + url = self._og_search_video_url(webpage) + + formats = [{ + 'format_id': 'sd', + 'url': url, + }] + + return { + 'id': video_id, + 'title': title, + 'formats': formats, + 'thumbnail': thumbnail, + } From b509a4b17643422b750e5258f538894105c58d42 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Sun, 21 Sep 2014 15:43:09 +0200 Subject: [PATCH 0034/1937] [downloader/f4m] If <pv-2.0> is in the manifest, add it to the fragments urls query (fixes #3176) It's used in some akamai videos (for example for theplatform.com). --- youtube_dl/downloader/f4m.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/youtube_dl/downloader/f4m.py b/youtube_dl/downloader/f4m.py index 71353f607..b3be16ff1 100644 --- a/youtube_dl/downloader/f4m.py +++ b/youtube_dl/downloader/f4m.py @@ -16,6 +16,7 @@ from ..utils import ( format_bytes, encodeFilename, sanitize_open, + xpath_text, ) @@ -251,6 +252,8 @@ class F4mFD(FileDownloader): # We only download the first fragment fragments_list = fragments_list[:1] total_frags = len(fragments_list) + # For some akamai manifests we'll need to add a query to the fragment url + akamai_pv = xpath_text(doc, _add_ns('pv-2.0')) tmpfilename = self.temp_name(filename) (dest_stream, tmpfilename) = sanitize_open(tmpfilename, 'wb') @@ -290,6 +293,8 @@ class F4mFD(FileDownloader): for (seg_i, frag_i) in fragments_list: name = 'Seg%d-Frag%d' % (seg_i, frag_i) url = base_url + name + if akamai_pv: + url += '?' + akamai_pv.strip(';') frag_filename = '%s-%s' % (tmpfilename, name) success = http_dl.download(frag_filename, {'url': url}) if not success: From dd41e8c82bb14bda0c407f9f0865cfb112e8fc30 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Sun, 21 Sep 2014 15:47:58 +0200 Subject: [PATCH 0035/1937] [theplatform] Extract all formats for f4m videos --- youtube_dl/extractor/theplatform.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/youtube_dl/extractor/theplatform.py b/youtube_dl/extractor/theplatform.py index b6b2dba9c..031a958fa 100644 --- a/youtube_dl/extractor/theplatform.py +++ b/youtube_dl/extractor/theplatform.py @@ -62,10 +62,7 @@ class ThePlatformIE(InfoExtractor): # the parameters are from syfy.com, other sites may use others, # they also work for nbc.com f4m_url += '&g=UXWGVKRWHFSP&hdcore=3.0.3' - formats = [{ - 'ext': 'flv', - 'url': f4m_url, - }] + formats = self._extract_f4m_formats(f4m_url, video_id) else: base_url = head.find(_x('smil:meta')).attrib['base'] switch = body.find(_x('smil:switch')) From 224ce0d87299cf54469baccb9922e78f9594d029 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Sun, 21 Sep 2014 15:49:04 +0200 Subject: [PATCH 0036/1937] [nbc] Update test --- youtube_dl/extractor/nbc.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/nbc.py b/youtube_dl/extractor/nbc.py index d2e4acbad..e75ab7c39 100644 --- a/youtube_dl/extractor/nbc.py +++ b/youtube_dl/extractor/nbc.py @@ -16,9 +16,9 @@ class NBCIE(InfoExtractor): _TEST = { 'url': 'http://www.nbc.com/chicago-fire/video/i-am-a-firefighter/2734188', - 'md5': '54d0fbc33e0b853a65d7b4de5c06d64e', + # md5 checksum is not stable 'info_dict': { - 'id': 'u1RInQZRN7QJ', + 'id': 'bTmnLCvIbaaH', 'ext': 'flv', 'title': 'I Am a Firefighter', 'description': 'An emergency puts Dawson\'sf irefighter skills to the ultimate test in this four-part digital series.', From e35cb78c4099263c26f717669463a3c025c30d17 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Sun, 21 Sep 2014 16:08:38 +0200 Subject: [PATCH 0037/1937] [theplatform] Correctly extract videos that don't use f4m or rtmp (reported in #3176) --- youtube_dl/extractor/sbs.py | 2 +- youtube_dl/extractor/theplatform.py | 48 ++++++++++++++++++----------- 2 files changed, 31 insertions(+), 19 deletions(-) diff --git a/youtube_dl/extractor/sbs.py b/youtube_dl/extractor/sbs.py index 34058fd4b..214990e7a 100644 --- a/youtube_dl/extractor/sbs.py +++ b/youtube_dl/extractor/sbs.py @@ -21,7 +21,7 @@ class SBSIE(InfoExtractor): 'md5': '3150cf278965eeabb5b4cea1c963fe0a', 'info_dict': { 'id': '320403011771', - 'ext': 'flv', + 'ext': 'mp4', 'title': 'Dingo Conservation', 'description': 'Dingoes are on the brink of extinction; most of the animals we think are dingoes are in fact crossbred with wild dogs. This family run a dingo conservation park to prevent their extinction', 'thumbnail': 're:http://.*\.jpg', diff --git a/youtube_dl/extractor/theplatform.py b/youtube_dl/extractor/theplatform.py index 031a958fa..0be793b1c 100644 --- a/youtube_dl/extractor/theplatform.py +++ b/youtube_dl/extractor/theplatform.py @@ -5,6 +5,7 @@ import json from .common import InfoExtractor from ..utils import ( + compat_str, ExtractorError, xpath_with_ns, ) @@ -55,7 +56,7 @@ class ThePlatformIE(InfoExtractor): body = meta.find(_x('smil:body')) f4m_node = body.find(_x('smil:seq//smil:video')) - if f4m_node is not None: + if f4m_node is not None and '.f4m' in f4m_node.attrib['src']: f4m_url = f4m_node.attrib['src'] if 'manifest.f4m?' not in f4m_url: f4m_url += '?' @@ -64,24 +65,35 @@ class ThePlatformIE(InfoExtractor): f4m_url += '&g=UXWGVKRWHFSP&hdcore=3.0.3' formats = self._extract_f4m_formats(f4m_url, video_id) else: - base_url = head.find(_x('smil:meta')).attrib['base'] - switch = body.find(_x('smil:switch')) formats = [] - for f in switch.findall(_x('smil:video')): - attr = f.attrib - width = int(attr['width']) - height = int(attr['height']) - vbr = int(attr['system-bitrate']) // 1000 - format_id = '%dx%d_%dk' % (width, height, vbr) - formats.append({ - 'format_id': format_id, - 'url': base_url, - 'play_path': 'mp4:' + attr['src'], - 'ext': 'flv', - 'width': width, - 'height': height, - 'vbr': vbr, - }) + switch = body.find(_x('smil:switch')) + if switch is not None: + base_url = head.find(_x('smil:meta')).attrib['base'] + for f in switch.findall(_x('smil:video')): + attr = f.attrib + width = int(attr['width']) + height = int(attr['height']) + vbr = int(attr['system-bitrate']) // 1000 + format_id = '%dx%d_%dk' % (width, height, vbr) + formats.append({ + 'format_id': format_id, + 'url': base_url, + 'play_path': 'mp4:' + attr['src'], + 'ext': 'flv', + 'width': width, + 'height': height, + 'vbr': vbr, + }) + else: + switch = body.find(_x('smil:seq//smil:switch')) + for f in switch.findall(_x('smil:video')): + attr = f.attrib + vbr = int(attr['system-bitrate']) // 1000 + formats.append({ + 'format_id': compat_str(vbr), + 'url': attr['src'], + 'vbr': vbr, + }) self._sort_formats(formats) return { From df8f53f752c0f01577dcc5d63c6d9a81d924770b Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Sun, 21 Sep 2014 16:32:38 +0200 Subject: [PATCH 0038/1937] [thvideo] Support mobile URLs as well --- youtube_dl/extractor/thvideo.py | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/youtube_dl/extractor/thvideo.py b/youtube_dl/extractor/thvideo.py index 9fa14d3c4..607e947bb 100644 --- a/youtube_dl/extractor/thvideo.py +++ b/youtube_dl/extractor/thvideo.py @@ -10,7 +10,7 @@ from ..utils import ( class THVideoIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?thvideo\.tv/v/th(?P<id>[0-9]+)' + _VALID_URL = r'http://(?:www\.)?thvideo\.tv/(?:v/th|mobile\.php\?cid=)(?P<id>[0-9]+)' _TEST = { 'url': 'http://thvideo.tv/v/th1987/', 'md5': 'fa107b1f73817e325e9433505a70db50', @@ -30,18 +30,22 @@ class THVideoIE(InfoExtractor): video_id = mobj.group('id') # extract download link from mobile player page - webpage_player = self._download_webpage('http://thvideo.tv/mobile.php?cid=%s-0' % video_id, video_id) - video_url = self._html_search_regex(r'<source src="(.*?)" type', webpage_player, 'video url') + webpage_player = self._download_webpage( + 'http://thvideo.tv/mobile.php?cid=%s-0' % (video_id), + video_id, note='Downloading video source page') + video_url = self._html_search_regex( + r'<source src="(.*?)" type', webpage_player, 'video url') # extract video info from main page - webpage = self._download_webpage(url, video_id) + webpage = self._download_webpage( + 'http://thvideo.tv/v/th%s' % (video_id), video_id) title = self._og_search_title(webpage) display_id = 'th%s' % video_id thumbnail = self._og_search_thumbnail(webpage) description = self._og_search_description(webpage) - upload_date_raw = self._html_search_regex(r'span itemprop="datePublished" content="(.*?)">', webpage, - 'upload date', fatal=False) - upload_date = unified_strdate(upload_date_raw) + upload_date = unified_strdate(self._html_search_regex( + r'span itemprop="datePublished" content="(.*?)">', webpage, + 'upload date', fatal=False)) return { 'id': video_id, @@ -52,4 +56,4 @@ class THVideoIE(InfoExtractor): 'thumbnail': thumbnail, 'description': description, 'upload_date': upload_date - } \ No newline at end of file + } From d0df92928bc099775e18f6413e387713839012ba Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Sun, 21 Sep 2014 16:53:00 +0200 Subject: [PATCH 0039/1937] [npo] Add extractor for tegenlicht.vpro.nl (closes #3778) --- youtube_dl/extractor/__init__.py | 5 ++++- youtube_dl/extractor/npo.py | 30 ++++++++++++++++++++++++++++++ 2 files changed, 34 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 1a6033320..bca34ae73 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -249,7 +249,10 @@ from .nosvideo import NosVideoIE from .novamov import NovaMovIE from .nowness import NownessIE from .nowvideo import NowVideoIE -from .npo import NPOIE +from .npo import ( + NPOIE, + TegenlichtVproIE, +) from .nrk import ( NRKIE, NRKTVIE, diff --git a/youtube_dl/extractor/npo.py b/youtube_dl/extractor/npo.py index 7a154e94a..f36d446d2 100644 --- a/youtube_dl/extractor/npo.py +++ b/youtube_dl/extractor/npo.py @@ -7,6 +7,7 @@ from ..utils import ( unified_strdate, parse_duration, qualities, + url_basename, ) @@ -55,7 +56,9 @@ class NPOIE(InfoExtractor): def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) video_id = mobj.group('id') + return self._get_info(video_id) + def _get_info(self, video_id): metadata = self._download_json( 'http://e.omroep.nl/metadata/aflevering/%s' % video_id, video_id, @@ -106,3 +109,30 @@ class NPOIE(InfoExtractor): 'duration': parse_duration(metadata.get('tijdsduur')), 'formats': formats, } + + +class TegenlichtVproIE(NPOIE): + IE_NAME = 'tegenlicht.vpro.nl' + _VALID_URL = r'https?://tegenlicht\.vpro\.nl/afleveringen/.*?' + + _TESTS = [ + { + 'url': 'http://tegenlicht.vpro.nl/afleveringen/2012-2013/de-toekomst-komt-uit-afrika.html', + 'md5': 'f8065e4e5a7824068ed3c7e783178f2c', + 'info_dict': { + 'id': 'VPWON_1169289', + 'ext': 'm4v', + 'title': 'Tegenlicht', + 'description': 'md5:d6476bceb17a8c103c76c3b708f05dd1', + 'upload_date': '20130225', + }, + }, + ] + + def _real_extract(self, url): + name = url_basename(url) + webpage = self._download_webpage(url, name) + urn = self._html_search_meta('mediaurn', webpage) + info_page = self._download_json( + 'http://rs.vpro.nl/v2/api/media/%s.json' % urn, name) + return self._get_info(info_page['mid']) From f90d95edeb981481834f4b092b4c2ac793f225f9 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Mon, 22 Sep 2014 13:07:23 +0200 Subject: [PATCH 0040/1937] release 2014.09.22 --- youtube_dl/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 940e9c8cf..4bf208b67 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,2 +1,2 @@ -__version__ = '2014.09.19' +__version__ = '2014.09.22' From 273dea42487461884926b4d810ebf74e541dc8b0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 22 Sep 2014 18:58:22 +0700 Subject: [PATCH 0041/1937] [playfm] Fix view count and add comment count --- youtube_dl/extractor/playfm.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/playfm.py b/youtube_dl/extractor/playfm.py index 72df4d842..ebc046804 100644 --- a/youtube_dl/extractor/playfm.py +++ b/youtube_dl/extractor/playfm.py @@ -10,6 +10,7 @@ from ..utils import ( ExtractorError, float_or_none, int_or_none, + str_to_int, ) @@ -29,6 +30,7 @@ class PlayFMIE(InfoExtractor): 'duration': 5627.428, 'upload_date': '20140712', 'view_count': int, + 'comment_count': int, 'thumbnail': 're:^https?://.*\.jpg$', }, } @@ -51,7 +53,8 @@ class PlayFMIE(InfoExtractor): recording = rec_doc.find('./recording') title = recording.find('./title').text - view_count = int_or_none(recording.find('./stats/playcount').text) + view_count = str_to_int(recording.find('./stats/playcount').text) + comment_count = str_to_int(recording.find('./stats/comments').text) duration = float_or_none(recording.find('./duration').text, scale=1000) thumbnail = recording.find('./image').text @@ -75,6 +78,7 @@ class PlayFMIE(InfoExtractor): 'title': title, 'upload_date': upload_date, 'view_count': view_count, + 'comment_count': comment_count, 'duration': duration, 'thumbnail': thumbnail, 'uploader': uploader, From 63cddb6477e785ca2bfb6e3bb1ac2af20aa9842c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Mon, 22 Sep 2014 14:11:08 +0200 Subject: [PATCH 0042/1937] [sbs] Recognize urls with format 'http://www.sbs.com.au/ondemand/video/<id>' (#3811) --- youtube_dl/extractor/sbs.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/sbs.py b/youtube_dl/extractor/sbs.py index 214990e7a..409f8540a 100644 --- a/youtube_dl/extractor/sbs.py +++ b/youtube_dl/extractor/sbs.py @@ -12,7 +12,7 @@ from ..utils import ( class SBSIE(InfoExtractor): IE_DESC = 'sbs.com.au' - _VALID_URL = r'https?://(?:www\.)?sbs\.com\.au/ondemand/video/single/(?P<id>[0-9]+)/' + _VALID_URL = r'https?://(?:www\.)?sbs\.com\.au/ondemand/video/(?:single/)?(?P<id>[0-9]+)' _TESTS = [{ # Original URL is handled by the generic IE which finds the iframe: @@ -27,6 +27,10 @@ class SBSIE(InfoExtractor): 'thumbnail': 're:http://.*\.jpg', }, 'add_ies': ['generic'], + }, + { + 'url': 'http://www.sbs.com.au/ondemand/video/320403011771/Dingo-Conservation-The-Feed', + 'only_matching': True, }] def _real_extract(self, url): From 094d42fe443c8f7ad5bd9049d63317195ab8fd3a Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Mon, 22 Sep 2014 18:15:07 +0200 Subject: [PATCH 0043/1937] release 2014.09.22.1 --- youtube_dl/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 4bf208b67..2853c79c9 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,2 +1,2 @@ -__version__ = '2014.09.22' +__version__ = '2014.09.22.1' From 632e5684ce797eb8a7372eb25dd4ce299f2e66de Mon Sep 17 00:00:00 2001 From: Naglis Jonaitis <njonaitis@gmail.com> Date: Tue, 23 Sep 2014 00:28:19 +0300 Subject: [PATCH 0044/1937] [nfl] Add new extractor. (Closes #3815) --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/nfl.py | 103 +++++++++++++++++++++++++++++++ 2 files changed, 104 insertions(+) create mode 100644 youtube_dl/extractor/nfl.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 244d22297..1f1fc0eb2 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -240,6 +240,7 @@ from .ndtv import NDTVIE from .newgrounds import NewgroundsIE from .newstube import NewstubeIE from .nfb import NFBIE +from .nfl import NFLIE from .nhl import NHLIE, NHLVideocenterIE from .niconico import NiconicoIE from .ninegag import NineGagIE diff --git a/youtube_dl/extractor/nfl.py b/youtube_dl/extractor/nfl.py new file mode 100644 index 000000000..f53596f5e --- /dev/null +++ b/youtube_dl/extractor/nfl.py @@ -0,0 +1,103 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + ExtractorError, + int_or_none, + remove_end, +) + + +class NFLIE(InfoExtractor): + IE_NAME = 'nfl.com' + _VALID_URL = r'(?x)https?://(?:www\.)?nfl\.com/(?:videos/(?:.+)/|.*?\#video=)(?P<id>\d..[0-9]+)' + _PLAYER_CONFIG_URL = 'http://www.nfl.com/static/content/static/config/video/config.json' + _TEST = { + 'url': 'http://www.nfl.com/videos/nfl-game-highlights/0ap3000000398478/Week-3-Redskins-vs-Eagles-highlights', + 'skip_download': True, # md5 sum fluctuates + 'info_dict': { + 'id': '0ap3000000398478', + 'ext': 'mp4', + 'title': 'Week 3: Washington Redskins vs. Philadelphia Eagles highlights', + 'description': 'md5:56323bfb0ac4ee5ab24bd05fdf3bf478', + 'upload_date': '20140921', + 'timestamp': 1411337580, + 'thumbnail': 're:^https?://.*\.jpg$', + } + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + + config = self._download_json(self._PLAYER_CONFIG_URL, video_id, + note='Downloading player config') + url_template = 'http://nfl.com{contentURLTemplate:s}'.format(**config) + video_data = self._download_json(url_template.format(id=video_id), video_id) + + cdns = config.get('cdns') + if not cdns: + raise ExtractorError('Failed to get CDN data', expected=True) + + formats = [] + streams = video_data.get('cdnData', {}).get('bitrateInfo', []) + for name, cdn in cdns.items(): + # LimeLight streams don't seem to work + if cdn.get('name') == 'LIMELIGHT': + continue + + protocol = cdn.get('protocol') + host = remove_end(cdn.get('host', ''), '/') + if not (protocol and host): + continue + + path_prefix = cdn.get('pathprefix', '') + if path_prefix and not path_prefix.endswith('/'): + path_prefix = '%s/' % path_prefix + + get_url = lambda p: '{protocol:s}://{host:s}/{prefix:s}{path:}'.format( + protocol=protocol, + host=host, + prefix=path_prefix, + path=p, + ) + + if protocol == 'rtmp': + preference = -2 + elif 'prog' in name.lower(): + preference = -1 + else: + preference = 0 + + for stream in streams: + path = stream.get('path') + if not path: + continue + + formats.append({ + 'url': get_url(path), + 'vbr': int_or_none(stream.get('rate', 0), 1000), + 'preference': preference, + 'format_note': name, + }) + + self._sort_formats(formats) + + thumbnail = None + for q in ('xl', 'l', 'm', 's', 'xs'): + thumbnail = video_data.get('imagePaths', {}).get(q) + if thumbnail: + break + + return { + 'id': video_id, + 'title': video_data.get('storyHeadline'), + 'formats': formats, + 'description': video_data.get('caption'), + 'duration': video_data.get('duration'), + 'thumbnail': thumbnail, + 'timestamp': int_or_none(video_data.get('posted'), 1000), + } From f7d159cf953bd1884ca45f535327f3016998270c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 23 Sep 2014 19:13:11 +0700 Subject: [PATCH 0045/1937] [noco] Encode before passing to hashlib.md5 (Closes #3816) --- youtube_dl/extractor/noco.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/noco.py b/youtube_dl/extractor/noco.py index e3ec9ed15..7f1bc6377 100644 --- a/youtube_dl/extractor/noco.py +++ b/youtube_dl/extractor/noco.py @@ -62,7 +62,7 @@ class NocoIE(InfoExtractor): def _call_api(self, path, video_id, note): ts = compat_str(int(time.time() * 1000)) - tk = hashlib.md5(hashlib.md5(ts).hexdigest() + '#8S?uCraTedap6a').hexdigest() + tk = hashlib.md5((hashlib.md5(ts.encode('ascii')).hexdigest() + '#8S?uCraTedap6a').encode('ascii')).hexdigest() url = self._API_URL_TEMPLATE % (path, ts, tk) resp = self._download_json(url, video_id, note) From 86916dae4b8604431205d11ccfa5f9796c0798dc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 23 Sep 2014 19:58:35 +0700 Subject: [PATCH 0046/1937] [wat] Capture and output error message --- youtube_dl/extractor/wat.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/wat.py b/youtube_dl/extractor/wat.py index 46b4d9133..268e2f618 100644 --- a/youtube_dl/extractor/wat.py +++ b/youtube_dl/extractor/wat.py @@ -5,7 +5,10 @@ import re import hashlib from .common import InfoExtractor -from ..utils import unified_strdate +from ..utils import ( + ExtractorError, + unified_strdate, +) class WatIE(InfoExtractor): @@ -57,6 +60,11 @@ class WatIE(InfoExtractor): video_info = self.download_video_info(real_id) + error_desc = video_info.get('error_desc') + if error_desc: + raise ExtractorError( + '%s returned error: %s' % (self.IE_NAME, error_desc), expected=True) + geo_list = video_info.get('geoList') country = geo_list[0] if geo_list else '' From bd5650ac64fedd1c1ad7b90c4ec4ff5d4c053bc0 Mon Sep 17 00:00:00 2001 From: Naglis Jonaitis <njonaitis@gmail.com> Date: Tue, 23 Sep 2014 20:42:28 +0300 Subject: [PATCH 0047/1937] [nfl] Fix test case - download, but don't check md5 --- youtube_dl/extractor/nfl.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/nfl.py b/youtube_dl/extractor/nfl.py index f53596f5e..963c4587c 100644 --- a/youtube_dl/extractor/nfl.py +++ b/youtube_dl/extractor/nfl.py @@ -17,7 +17,7 @@ class NFLIE(InfoExtractor): _PLAYER_CONFIG_URL = 'http://www.nfl.com/static/content/static/config/video/config.json' _TEST = { 'url': 'http://www.nfl.com/videos/nfl-game-highlights/0ap3000000398478/Week-3-Redskins-vs-Eagles-highlights', - 'skip_download': True, # md5 sum fluctuates + # 'md5': '5eb8c40a727dda106d510e5d6ffa79e5', # md5 checksum fluctuates 'info_dict': { 'id': '0ap3000000398478', 'ext': 'mp4', From 4bc3a23ec5d2c1bbcdc5289393881606604922c7 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Wed, 24 Sep 2014 09:49:53 +0200 Subject: [PATCH 0048/1937] [youtube] Modernize --- youtube_dl/extractor/youtube.py | 103 +++++++++++++++++--------------- 1 file changed, 54 insertions(+), 49 deletions(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index b54c69122..602be9859 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -307,69 +307,74 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): IE_NAME = 'youtube' _TESTS = [ { - u"url": u"http://www.youtube.com/watch?v=BaW_jenozKc", - u"file": u"BaW_jenozKc.mp4", - u"info_dict": { - u"title": u"youtube-dl test video \"'/\\ä↭𝕐", - u"uploader": u"Philipp Hagemeister", - u"uploader_id": u"phihag", - u"upload_date": u"20121002", - u"description": u"test chars: \"'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .", - u"categories": [u'Science & Technology'], + 'url': 'http://www.youtube.com/watch?v=BaW_jenozKc', + 'info_dict': { + 'id': 'BaW_jenozKc', + 'ext': 'mp4', + 'title': 'youtube-dl test video "\'/\\ä↭𝕐', + 'uploader': 'Philipp Hagemeister', + 'uploader_id': 'phihag', + 'upload_date': '20121002', + 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .', + 'categories': ['Science & Technology'], 'like_count': int, 'dislike_count': int, } }, { - u"url": u"http://www.youtube.com/watch?v=UxxajLWwzqY", - u"file": u"UxxajLWwzqY.mp4", - u"note": u"Test generic use_cipher_signature video (#897)", - u"info_dict": { - u"upload_date": u"20120506", - u"title": u"Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]", - u"description": u"md5:fea86fda2d5a5784273df5c7cc994d9f", - u"uploader": u"Icona Pop", - u"uploader_id": u"IconaPop" + 'url': 'http://www.youtube.com/watch?v=UxxajLWwzqY', + 'note': 'Test generic use_cipher_signature video (#897)', + 'info_dict': { + 'id': 'UxxajLWwzqY', + 'ext': 'mp4', + 'upload_date': '20120506', + 'title': 'Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]', + 'description': 'md5:fea86fda2d5a5784273df5c7cc994d9f', + 'uploader': 'Icona Pop', + 'uploader_id': 'IconaPop', } }, { - u"url": u"https://www.youtube.com/watch?v=07FYdnEawAQ", - u"file": u"07FYdnEawAQ.mp4", - u"note": u"Test VEVO video with age protection (#956)", - u"info_dict": { - u"upload_date": u"20130703", - u"title": u"Justin Timberlake - Tunnel Vision (Explicit)", - u"description": u"md5:64249768eec3bc4276236606ea996373", - u"uploader": u"justintimberlakeVEVO", - u"uploader_id": u"justintimberlakeVEVO" + 'url': 'https://www.youtube.com/watch?v=07FYdnEawAQ', + 'note': 'Test VEVO video with age protection (#956)', + 'info_dict': { + 'id': '07FYdnEawAQ', + 'ext': 'mp4', + 'upload_date': '20130703', + 'title': 'Justin Timberlake - Tunnel Vision (Explicit)', + 'description': 'md5:64249768eec3bc4276236606ea996373', + 'uploader': 'justintimberlakeVEVO', + 'uploader_id': 'justintimberlakeVEVO', } }, { - u"url": u"//www.YouTube.com/watch?v=yZIXLfi8CZQ", - u"file": u"yZIXLfi8CZQ.mp4", - u"note": u"Embed-only video (#1746)", - u"info_dict": { - u"upload_date": u"20120608", - u"title": u"Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012", - u"description": u"md5:09b78bd971f1e3e289601dfba15ca4f7", - u"uploader": u"SET India", - u"uploader_id": u"setindia" + 'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ', + 'note': 'Embed-only video (#1746)', + 'info_dict': { + 'id': 'yZIXLfi8CZQ', + 'ext': 'mp4', + 'upload_date': '20120608', + 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012', + 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7', + 'uploader': 'SET India', + 'uploader_id': 'setindia' } }, { - u"url": u"http://www.youtube.com/watch?v=a9LDPn-MO4I", - u"file": u"a9LDPn-MO4I.m4a", - u"note": u"256k DASH audio (format 141) via DASH manifest", - u"info_dict": { - u"upload_date": "20121002", - u"uploader_id": "8KVIDEO", - u"description": '', - u"uploader": "8KVIDEO", - u"title": "UHDTV TEST 8K VIDEO.mp4" + 'url': 'http://www.youtube.com/watch?v=a9LDPn-MO4I', + 'note': '256k DASH audio (format 141) via DASH manifest', + 'info_dict': { + 'id': 'a9LDPn-MO4I', + 'ext': 'm4a', + 'upload_date': '20121002', + 'uploader_id': '8KVIDEO', + 'description': '', + 'uploader': '8KVIDEO', + 'title': 'UHDTV TEST 8K VIDEO.mp4' }, - u"params": { - u"youtube_include_dash_manifest": True, - u"format": "141", + 'params': { + 'youtube_include_dash_manifest': True, + 'format': '141', }, }, # DASH manifest with encrypted signature @@ -384,7 +389,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): 'uploader_id': 'AfrojackVEVO', 'upload_date': '20131011', }, - u"params": { + 'params': { 'youtube_include_dash_manifest': True, 'format': '141', }, From 69ea8ca42cd4fc62fdd4e7f18defb3b23da618d2 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Wed, 24 Sep 2014 09:51:45 +0200 Subject: [PATCH 0049/1937] [youtube] Remove superfluous unicode specifiers --- youtube_dl/extractor/youtube.py | 136 ++++++++++++++++---------------- 1 file changed, 68 insertions(+), 68 deletions(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 602be9859..0257ee2f9 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -46,7 +46,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor): def _set_language(self): return bool(self._download_webpage( self._LANG_URL, None, - note=u'Setting language', errnote='unable to set language', + note='Setting language', errnote='unable to set language', fatal=False)) def _login(self): @@ -61,13 +61,13 @@ class YoutubeBaseInfoExtractor(InfoExtractor): # No authentication to be performed if username is None: if self._LOGIN_REQUIRED: - raise ExtractorError(u'No login info available, needed for using %s.' % self.IE_NAME, expected=True) + raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True) return True login_page = self._download_webpage( self._LOGIN_URL, None, - note=u'Downloading login page', - errnote=u'unable to fetch login page', fatal=False) + note='Downloading login page', + errnote='unable to fetch login page', fatal=False) if login_page is False: return @@ -105,12 +105,12 @@ class YoutubeBaseInfoExtractor(InfoExtractor): req = compat_urllib_request.Request(self._LOGIN_URL, login_data) login_results = self._download_webpage( req, None, - note=u'Logging in', errnote=u'unable to log in', fatal=False) + note='Logging in', errnote='unable to log in', fatal=False) if login_results is False: return False if re.search(r'id="errormsg_0_Passwd"', login_results) is not None: - raise ExtractorError(u'Please use your account password and a two-factor code instead of an application-specific password.', expected=True) + raise ExtractorError('Please use your account password and a two-factor code instead of an application-specific password.', expected=True) # Two-Factor # TODO add SMS and phone call support - these require making a request and then prompting the user @@ -119,19 +119,19 @@ class YoutubeBaseInfoExtractor(InfoExtractor): tfa_code = self._get_tfa_info() if tfa_code is None: - self._downloader.report_warning(u'Two-factor authentication required. Provide it with --twofactor <code>') - self._downloader.report_warning(u'(Note that only TOTP (Google Authenticator App) codes work at this time.)') + self._downloader.report_warning('Two-factor authentication required. Provide it with --twofactor <code>') + self._downloader.report_warning('(Note that only TOTP (Google Authenticator App) codes work at this time.)') return False # Unlike the first login form, secTok and timeStmp are both required for the TFA form match = re.search(r'id="secTok"\n\s+value=\'(.+)\'/>', login_results, re.M | re.U) if match is None: - self._downloader.report_warning(u'Failed to get secTok - did the page structure change?') + self._downloader.report_warning('Failed to get secTok - did the page structure change?') secTok = match.group(1) match = re.search(r'id="timeStmp"\n\s+value=\'(.+)\'/>', login_results, re.M | re.U) if match is None: - self._downloader.report_warning(u'Failed to get timeStmp - did the page structure change?') + self._downloader.report_warning('Failed to get timeStmp - did the page structure change?') timeStmp = match.group(1) tfa_form_strs = { @@ -155,23 +155,23 @@ class YoutubeBaseInfoExtractor(InfoExtractor): tfa_req = compat_urllib_request.Request(self._TWOFACTOR_URL, tfa_data) tfa_results = self._download_webpage( tfa_req, None, - note=u'Submitting TFA code', errnote=u'unable to submit tfa', fatal=False) + note='Submitting TFA code', errnote='unable to submit tfa', fatal=False) if tfa_results is False: return False if re.search(r'(?i)<form[^>]* id="gaia_secondfactorform"', tfa_results) is not None: - self._downloader.report_warning(u'Two-factor code expired. Please try again, or use a one-use backup code instead.') + self._downloader.report_warning('Two-factor code expired. Please try again, or use a one-use backup code instead.') return False if re.search(r'(?i)<form[^>]* id="gaia_loginform"', tfa_results) is not None: - self._downloader.report_warning(u'unable to log in - did the page structure change?') + self._downloader.report_warning('unable to log in - did the page structure change?') return False if re.search(r'smsauth-interstitial-reviewsettings', tfa_results) is not None: - self._downloader.report_warning(u'Your Google account has a security notice. Please log in on your web browser, resolve the notice, and try again.') + self._downloader.report_warning('Your Google account has a security notice. Please log in on your web browser, resolve the notice, and try again.') return False if re.search(r'(?i)<form[^>]* id="gaia_loginform"', login_results) is not None: - self._downloader.report_warning(u'unable to log in: bad username or password') + self._downloader.report_warning('unable to log in: bad username or password') return False return True @@ -185,7 +185,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor): self._download_webpage( req, None, - note=u'Confirming age', errnote=u'Unable to confirm age') + note='Confirming age', errnote='Unable to confirm age') return True def _real_initialize(self): @@ -402,19 +402,19 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): def report_video_info_webpage_download(self, video_id): """Report attempt to download video info webpage.""" - self.to_screen(u'%s: Downloading video info webpage' % video_id) + self.to_screen('%s: Downloading video info webpage' % video_id) def report_information_extraction(self, video_id): """Report attempt to extract video information.""" - self.to_screen(u'%s: Extracting video information' % video_id) + self.to_screen('%s: Extracting video information' % video_id) def report_unavailable_format(self, video_id, format): """Report extracted video URL.""" - self.to_screen(u'%s: Format %s not available' % (video_id, format)) + self.to_screen('%s: Format %s not available' % (video_id, format)) def report_rtmp_download(self): """Indicate the download will use the RTMP protocol.""" - self.to_screen(u'RTMP download detected') + self.to_screen('RTMP download detected') def _signature_cache_id(self, example_sig): """ Return a string representation of a signature """ @@ -434,21 +434,21 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): player_type, player_id, self._signature_cache_id(example_sig)) assert os.path.basename(func_id) == func_id - cache_spec = self._downloader.cache.load(u'youtube-sigfuncs', func_id) + cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id) if cache_spec is not None: return lambda s: ''.join(s[i] for i in cache_spec) if player_type == 'js': code = self._download_webpage( player_url, video_id, - note=u'Downloading %s player %s' % (player_type, player_id), - errnote=u'Download of %s failed' % player_url) + note='Downloading %s player %s' % (player_type, player_id), + errnote='Download of %s failed' % player_url) res = self._parse_sig_js(code) elif player_type == 'swf': urlh = self._request_webpage( player_url, video_id, - note=u'Downloading %s player %s' % (player_type, player_id), - errnote=u'Download of %s failed' % player_url) + note='Downloading %s player %s' % (player_type, player_id), + errnote='Download of %s failed' % player_url) code = urlh.read() res = self._parse_sig_swf(code) else: @@ -459,15 +459,15 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): cache_res = res(test_string) cache_spec = [ord(c) for c in cache_res] - self._downloader.cache.store(u'youtube-sigfuncs', func_id, cache_spec) + self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec) return res def _print_sig_code(self, func, example_sig): def gen_sig_code(idxs): def _genslice(start, end, step): starts = '' if start == 0 else str(start) - ends = (u':%d' % (end+step)) if end + step >= 0 else ':' - steps = '' if step == 1 else (u':%d' % step) + ends = (':%d' % (end+step)) if end + step >= 0 else ':' + steps = '' if step == 1 else (':%d' % step) return 's[%s%s%s]' % (starts, ends, steps) step = None @@ -497,9 +497,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): expr_code = ' + '.join(gen_sig_code(cache_spec)) signature_id_tuple = '(%s)' % ( ', '.join(compat_str(len(p)) for p in example_sig.split('.'))) - code = (u'if tuple(len(p) for p in s.split(\'.\')) == %s:\n' + code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n' ' return %s\n') % (signature_id_tuple, expr_code) - self.to_screen(u'Extracted signature function:\n' + code) + self.to_screen('Extracted signature function:\n' + code) def _parse_sig_js(self, jscode): funcname = self._search_regex( @@ -521,9 +521,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): """Turn the encrypted s field into a working signature""" if player_url is None: - raise ExtractorError(u'Cannot decrypt signature without player_url') + raise ExtractorError('Cannot decrypt signature without player_url') - if player_url.startswith(u'//'): + if player_url.startswith('//'): player_url = 'https:' + player_url try: player_id = (player_url, self._signature_cache_id(s)) @@ -547,7 +547,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): 'https://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id, video_id, note=False) except ExtractorError as err: - self._downloader.report_warning(u'unable to download video subtitles: %s' % compat_str(err)) + self._downloader.report_warning('unable to download video subtitles: %s' % compat_str(err)) return {} lang_list = re.findall(r'name="([^"]*)"[^>]+lang_code="([\w\-]+)"', sub_list) @@ -565,7 +565,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): url = 'https://www.youtube.com/api/timedtext?' + params sub_lang_list[lang] = url if not sub_lang_list: - self._downloader.report_warning(u'video doesn\'t have subtitles') + self._downloader.report_warning('video doesn\'t have subtitles') return {} return sub_lang_list @@ -573,7 +573,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): """We need the webpage for getting the captions url, pass it as an argument to speed up the process.""" sub_format = self._downloader.params.get('subtitlesformat', 'srt') - self.to_screen(u'%s: Looking for automatic captions' % video_id) + self.to_screen('%s: Looking for automatic captions' % video_id) mobj = re.search(r';ytplayer.config = ({.*?});', webpage) err_msg = 'Couldn\'t find automatic captions for %s' % video_id if mobj is None: @@ -594,7 +594,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): caption_list = self._download_xml(list_url, video_id) original_lang_node = caption_list.find('track') if original_lang_node is None or original_lang_node.attrib.get('kind') != 'asr' : - self._downloader.report_warning(u'Video doesn\'t have automatic captions') + self._downloader.report_warning('Video doesn\'t have automatic captions') return {} original_lang = original_lang_node.attrib['lang_code'] @@ -620,7 +620,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): def extract_id(cls, url): mobj = re.match(cls._VALID_URL, url, re.VERBOSE) if mobj is None: - raise ExtractorError(u'Invalid URL: %s' % url) + raise ExtractorError('Invalid URL: %s' % url) video_id = mobj.group(2) return video_id @@ -640,7 +640,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): def _extract_annotations(self, video_id): url = 'https://www.youtube.com/annotations_invideo?features=1&legacy=1&video_id=%s' % video_id - return self._download_webpage(url, video_id, note=u'Searching for annotations.', errnote=u'Unable to download video annotations.') + return self._download_webpage(url, video_id, note='Searching for annotations.', errnote='Unable to download video annotations.') def _real_extract(self, url): proto = ( @@ -710,14 +710,14 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): # Check for "rental" videos if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info: - raise ExtractorError(u'"rental" videos not supported') + raise ExtractorError('"rental" videos not supported') # Start extracting information self.report_information_extraction(video_id) # uploader if 'author' not in video_info: - raise ExtractorError(u'Unable to extract uploader name') + raise ExtractorError('Unable to extract uploader name') video_uploader = compat_urllib_parse.unquote_plus(video_info['author'][0]) # uploader_id @@ -726,13 +726,13 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): if mobj is not None: video_uploader_id = mobj.group(1) else: - self._downloader.report_warning(u'unable to extract uploader nickname') + self._downloader.report_warning('unable to extract uploader nickname') # title if 'title' in video_info: video_title = video_info['title'][0] else: - self._downloader.report_warning(u'Unable to extract video title') + self._downloader.report_warning('Unable to extract video title') video_title = '_' # thumbnail image @@ -742,7 +742,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): if m_thumb is not None: video_thumbnail = m_thumb.group(1) elif 'thumbnail_url' not in video_info: - self._downloader.report_warning(u'unable to extract video thumbnail') + self._downloader.report_warning('unable to extract video thumbnail') video_thumbnail = None else: # don't panic if we can't find it video_thumbnail = compat_urllib_parse.unquote_plus(video_info['thumbnail_url'][0]) @@ -796,8 +796,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): if count is not None: return int(count.replace(',', '')) return None - like_count = _extract_count(u'like') - dislike_count = _extract_count(u'dislike') + like_count = _extract_count('like') + dislike_count = _extract_count('dislike') # subtitles video_subtitles = self.extract_subtitles(video_id, video_webpage) @@ -807,7 +807,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): return if 'length_seconds' not in video_info: - self._downloader.report_warning(u'unable to extract video duration') + self._downloader.report_warning('unable to extract video duration') video_duration = None else: video_duration = int(compat_urllib_parse.unquote_plus(video_info['length_seconds'][0])) @@ -828,11 +828,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): # Easy way to know if the 's' value is in url_encoded_fmt_stream_map # this signatures are encrypted if 'url_encoded_fmt_stream_map' not in args: - raise ValueError(u'No stream_map present') # caught below + raise ValueError('No stream_map present') # caught below re_signature = re.compile(r'[&,]s=') m_s = re_signature.search(args['url_encoded_fmt_stream_map']) if m_s is not None: - self.to_screen(u'%s: Encrypted signatures detected.' % video_id) + self.to_screen('%s: Encrypted signatures detected.' % video_id) video_info['url_encoded_fmt_stream_map'] = [args['url_encoded_fmt_stream_map']] m_s = re_signature.search(args.get('adaptive_fmts', '')) if m_s is not None: @@ -910,7 +910,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): player_desc = 'html5 player %s' % player_version parts_sizes = self._signature_cache_id(encrypted_sig) - self.to_screen(u'{%s} signature length %s, %s' % + self.to_screen('{%s} signature length %s, %s' % (format_id, parts_sizes, player_desc)) signature = self._decrypt_signature( @@ -925,7 +925,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): url_map = self._extract_from_m3u8(manifest_url, video_id) formats = _map_to_format_list(url_map) else: - raise ExtractorError(u'no conn, hlsvp or url_encoded_fmt_stream_map information found in video info') + raise ExtractorError('no conn, hlsvp or url_encoded_fmt_stream_map information found in video info') # Look for the DASH manifest if (self._downloader.params.get('youtube_include_dash_manifest', False)): @@ -946,9 +946,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): dash_manifest_url = re.sub(r'/s/([\w\.]+)', decrypt_sig, dash_manifest_url) dash_doc = self._download_xml( dash_manifest_url, video_id, - note=u'Downloading DASH manifest', - errnote=u'Could not download DASH manifest') - for r in dash_doc.findall(u'.//{urn:mpeg:DASH:schema:MPD:2011}Representation'): + note='Downloading DASH manifest', + errnote='Could not download DASH manifest') + for r in dash_doc.findall('.//{urn:mpeg:DASH:schema:MPD:2011}Representation'): url_el = r.find('{urn:mpeg:DASH:schema:MPD:2011}BaseURL') if url_el is None: continue @@ -974,7 +974,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): existing_format.update(f) except (ExtractorError, KeyError) as e: - self.report_warning(u'Skipping DASH manifest: %s' % e, video_id) + self.report_warning('Skipping DASH manifest: %s' % e, video_id) self._sort_formats(formats) @@ -1095,7 +1095,7 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor): # Extract playlist id mobj = re.match(self._VALID_URL, url) if mobj is None: - raise ExtractorError(u'Invalid URL: %s' % url) + raise ExtractorError('Invalid URL: %s' % url) playlist_id = mobj.group(1) or mobj.group(2) # Check if it's a video-specific URL @@ -1103,16 +1103,16 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor): if 'v' in query_dict: video_id = query_dict['v'][0] if self._downloader.params.get('noplaylist'): - self.to_screen(u'Downloading just video %s because of --no-playlist' % video_id) + self.to_screen('Downloading just video %s because of --no-playlist' % video_id) return self.url_result(video_id, 'Youtube', video_id=video_id) else: - self.to_screen(u'Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id)) + self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id)) if playlist_id.startswith('RD'): # Mixes require a custom extraction process return self._extract_mix(playlist_id) if playlist_id.startswith('TL'): - raise ExtractorError(u'For downloading YouTube.com top lists, use ' + raise ExtractorError('For downloading YouTube.com top lists, use ' 'the "yttoplist" keyword, for example "youtube-dl \'yttoplist:music:Top Tracks\'"', expected=True) url = self._TEMPLATE_URL % playlist_id @@ -1157,7 +1157,7 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor): class YoutubeTopListIE(YoutubePlaylistIE): IE_NAME = 'youtube:toplist' - IE_DESC = (u'YouTube.com top lists, "yttoplist:{channel}:{list title}"' + IE_DESC = ('YouTube.com top lists, "yttoplist:{channel}:{list title}"' ' (Example: "yttoplist:music:Top Tracks")') _VALID_URL = r'yttoplist:(?P<chann>.*?):(?P<title>.*?)$' _TESTS = [] @@ -1207,7 +1207,7 @@ class YoutubeChannelIE(InfoExtractor): # Extract channel id mobj = re.match(self._VALID_URL, url) if mobj is None: - raise ExtractorError(u'Invalid URL: %s' % url) + raise ExtractorError('Invalid URL: %s' % url) # Download channel page channel_id = mobj.group(1) @@ -1229,7 +1229,7 @@ class YoutubeChannelIE(InfoExtractor): for pagenum in itertools.count(1): url = self._MORE_PAGES_URL % (pagenum, channel_id) page = self._download_json( - url, channel_id, note=u'Downloading page #%s' % pagenum, + url, channel_id, note='Downloading page #%s' % pagenum, transform_source=uppercase_escape) ids_in_page = self.extract_videos_from_page(page['content_html']) @@ -1238,7 +1238,7 @@ class YoutubeChannelIE(InfoExtractor): if self._MORE_PAGES_INDICATOR not in page['load_more_widget_html']: break - self._downloader.to_screen(u'[youtube] Channel %s: Found %i videos' % (channel_id, len(video_ids))) + self._downloader.to_screen('[youtube] Channel %s: Found %i videos' % (channel_id, len(video_ids))) url_entries = [self.url_result(video_id, 'Youtube', video_id=video_id) for video_id in video_ids] @@ -1265,7 +1265,7 @@ class YoutubeUserIE(InfoExtractor): # Extract username mobj = re.match(self._VALID_URL, url) if mobj is None: - raise ExtractorError(u'Invalid URL: %s' % url) + raise ExtractorError('Invalid URL: %s' % url) username = mobj.group(1) @@ -1286,7 +1286,7 @@ class YoutubeUserIE(InfoExtractor): try: response = json.loads(page) except ValueError as err: - raise ExtractorError(u'Invalid JSON in API response: ' + compat_str(err)) + raise ExtractorError('Invalid JSON in API response: ' + compat_str(err)) if 'entry' not in response['feed']: return @@ -1327,9 +1327,9 @@ class YoutubeSearchIE(SearchInfoExtractor): compat_urllib_parse.quote_plus(query.encode('utf-8')), (PAGE_SIZE * pagenum) + 1) data_json = self._download_webpage( - result_url, video_id=u'query "%s"' % query, - note=u'Downloading page %s' % (pagenum + 1), - errnote=u'Unable to download API page') + result_url, video_id='query "%s"' % query, + note='Downloading page %s' % (pagenum + 1), + errnote='Unable to download API page') data = json.loads(data_json) api_response = data['data'] @@ -1404,7 +1404,7 @@ class YoutubeShowIE(InfoExtractor): webpage = self._download_webpage(url, show_name, 'Downloading show webpage') # There's one playlist for each season of the show m_seasons = list(re.finditer(r'href="(/playlist\?list=.*?)"', webpage)) - self.to_screen(u'%s: Found %s seasons' % (show_name, len(m_seasons))) + self.to_screen('%s: Found %s seasons' % (show_name, len(m_seasons))) return [self.url_result('https://www.youtube.com' + season.group(1), 'YoutubePlaylist') for season in m_seasons] From cdc628a498b8f2198d057ba1ba78e86d8915e3aa Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Wed, 24 Sep 2014 10:25:47 +0200 Subject: [PATCH 0050/1937] [youtube] Move more tests to extractors --- test/test_youtube_lists.py | 39 ------------------ youtube_dl/extractor/youtube.py | 71 ++++++++++++++++++++++++++++----- 2 files changed, 62 insertions(+), 48 deletions(-) diff --git a/test/test_youtube_lists.py b/test/test_youtube_lists.py index 1fa99f88b..410f9edc2 100644 --- a/test/test_youtube_lists.py +++ b/test/test_youtube_lists.py @@ -10,7 +10,6 @@ from test.helper import FakeYDL from youtube_dl.extractor import ( - YoutubeUserIE, YoutubePlaylistIE, YoutubeIE, YoutubeChannelIE, @@ -43,28 +42,6 @@ class TestYoutubeLists(unittest.TestCase): self.assertEqual(len(entries), 25) self.assertEqual(YoutubeIE().extract_id(entries[-1]['url']), 'rYefUsYuEp0') - def test_youtube_channel(self): - dl = FakeYDL() - ie = YoutubeChannelIE(dl) - #test paginated channel - result = ie.extract('https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w') - self.assertTrue(len(result['entries']) > 90) - #test autogenerated channel - result = ie.extract('https://www.youtube.com/channel/HCtnHdj3df7iM/videos') - self.assertTrue(len(result['entries']) >= 18) - - def test_youtube_user(self): - dl = FakeYDL() - ie = YoutubeUserIE(dl) - result = ie.extract('https://www.youtube.com/user/TheLinuxFoundation') - self.assertTrue(len(result['entries']) >= 320) - - def test_youtube_show(self): - dl = FakeYDL() - ie = YoutubeShowIE(dl) - result = ie.extract('http://www.youtube.com/show/airdisasters') - self.assertTrue(len(result) >= 3) - def test_youtube_mix(self): dl = FakeYDL() ie = YoutubePlaylistIE(dl) @@ -83,21 +60,5 @@ class TestYoutubeLists(unittest.TestCase): entries = result['entries'] self.assertEqual(len(entries), 100) - def test_youtube_toplist(self): - dl = FakeYDL() - ie = YoutubeTopListIE(dl) - result = ie.extract('yttoplist:music:Trending') - entries = result['entries'] - self.assertTrue(len(entries) >= 5) - - def test_youtube_search_url(self): - dl = FakeYDL() - ie = YoutubeSearchURLIE(dl) - result = ie.extract('https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video') - entries = result['entries'] - self.assertIsPlaylist(result) - self.assertEqual(result['title'], 'youtube-dl test video') - self.assertTrue(len(entries) >= 5) - if __name__ == '__main__': unittest.main() diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 0257ee2f9..2ef76b69b 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1160,16 +1160,25 @@ class YoutubeTopListIE(YoutubePlaylistIE): IE_DESC = ('YouTube.com top lists, "yttoplist:{channel}:{list title}"' ' (Example: "yttoplist:music:Top Tracks")') _VALID_URL = r'yttoplist:(?P<chann>.*?):(?P<title>.*?)$' - _TESTS = [] + _TESTS = [{ + 'url': 'yttoplist:music:Trending', + 'playlist_mincount': 5, + 'skip': 'Only works for logged-in users', + }] def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) channel = mobj.group('chann') title = mobj.group('title') query = compat_urllib_parse.urlencode({'title': title}) - playlist_re = 'href="([^"]+?%s.*?)"' % re.escape(query) - channel_page = self._download_webpage('https://www.youtube.com/%s' % channel, title) - link = self._html_search_regex(playlist_re, channel_page, 'list') + channel_page = self._download_webpage( + 'https://www.youtube.com/%s' % channel, title) + link = self._html_search_regex( + r'''(?x) + <a\s+href="([^"]+)".*?>\s* + <span\s+class="branded-page-module-title-text">\s* + <span[^>]*>.*?%s.*?</span>''' % re.escape(query), + channel_page, 'list') url = compat_urlparse.urljoin('https://www.youtube.com/', link) video_re = r'data-index="\d+".*?data-video-id="([0-9A-Za-z_-]{11})"' @@ -1195,6 +1204,11 @@ class YoutubeChannelIE(InfoExtractor): _MORE_PAGES_INDICATOR = 'yt-uix-load-more' _MORE_PAGES_URL = 'https://www.youtube.com/c4_browse_ajax?action_load_more_videos=1&flow=list&paging=%s&view=0&sort=da&channel_id=%s' IE_NAME = 'youtube:channel' + _TESTS = [{ + 'note': 'paginated channel', + 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w', + 'playlist_mincount': 91, + }] def extract_videos_from_page(self, page): ids_in_page = [] @@ -1253,6 +1267,17 @@ class YoutubeUserIE(InfoExtractor): _GDATA_URL = 'https://gdata.youtube.com/feeds/api/users/%s/uploads?max-results=%d&start-index=%d&alt=json' IE_NAME = 'youtube:user' + _TESTS = [{ + 'url': 'https://www.youtube.com/user/TheLinuxFoundation', + 'playlist_mincount': 320, + 'info_dict': { + 'title': 'TheLinuxFoundation', + } + }, { + 'url': 'ytuser:phihag', + 'only_matching': True, + }] + @classmethod def suitable(cls, url): # Don't return True if the url can be extracted with other youtube @@ -1361,6 +1386,13 @@ class YoutubeSearchURLIE(InfoExtractor): IE_DESC = 'YouTube.com search URLs' IE_NAME = 'youtube:search_url' _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?search_query=(?P<query>[^&]+)(?:[&]|$)' + _TESTS = [{ + 'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video', + 'playlist_mincount': 5, + 'info_dict': { + 'title': 'youtube-dl test video', + } + }] def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) @@ -1395,17 +1427,38 @@ class YoutubeSearchURLIE(InfoExtractor): class YoutubeShowIE(InfoExtractor): IE_DESC = 'YouTube.com (multi-season) shows' - _VALID_URL = r'https?://www\.youtube\.com/show/(.*)' + _VALID_URL = r'https?://www\.youtube\.com/show/(?P<id>[^?#]*)' IE_NAME = 'youtube:show' + _TESTS = [{ + 'url': 'http://www.youtube.com/show/airdisasters', + 'playlist_mincount': 3, + 'info_dict': { + 'id': 'airdisasters', + 'title': 'Air Disasters', + } + }] def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) - show_name = mobj.group(1) - webpage = self._download_webpage(url, show_name, 'Downloading show webpage') + playlist_id = mobj.group('id') + webpage = self._download_webpage( + url, playlist_id, 'Downloading show webpage') # There's one playlist for each season of the show m_seasons = list(re.finditer(r'href="(/playlist\?list=.*?)"', webpage)) - self.to_screen('%s: Found %s seasons' % (show_name, len(m_seasons))) - return [self.url_result('https://www.youtube.com' + season.group(1), 'YoutubePlaylist') for season in m_seasons] + self.to_screen('%s: Found %s seasons' % (playlist_id, len(m_seasons))) + entries = [ + self.url_result( + 'https://www.youtube.com' + season.group(1), 'YoutubePlaylist') + for season in m_seasons + ] + title = self._og_search_title(webpage, fatal=False) + + return { + '_type': 'playlist', + 'id': playlist_id, + 'title': title, + 'entries': entries, + } class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor): From ac7553d031ffa6cdcdb109330467eb7c423ffd13 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Wed, 24 Sep 2014 10:34:29 +0200 Subject: [PATCH 0051/1937] [youtube] Support embed/videoseries URLs (#3821) --- youtube_dl/extractor/youtube.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 2ef76b69b..ae9564862 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -211,7 +211,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): youtube\.googleapis\.com/) # the various hostnames, with wildcard subdomains (?:.*?\#/)? # handle anchor (#/) redirect urls (?: # the various things that can precede the ID: - (?:(?:v|embed|e)/) # v/ or embed/ or e/ + (?:(?:v|embed|e)/(?!videoseries)) # v/ or embed/ or e/ |(?: # or the v= param in all its forms (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx) (?:\?|\#!?) # the params delimiter ? or # or #! @@ -1005,7 +1005,7 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor): (?:\w+\.)? youtube\.com/ (?: - (?:course|view_play_list|my_playlists|artist|playlist|watch) + (?:course|view_play_list|my_playlists|artist|playlist|watch|embed/videoseries) \? (?:.*?&)*? (?:p|a|list)= | p/ ) @@ -1061,6 +1061,13 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor): 'title': 'YDL_safe_search', }, 'playlist_count': 2, + }, { + 'note': 'embedded', + 'url': 'http://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu', + 'playlist_count': 4, + 'info_dict': { + 'title': 'JODA15', + } }] def _real_initialize(self): From cc746841e76a0ab6a1bb65400ca496a105f65821 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Wed, 24 Sep 2014 10:46:33 +0200 Subject: [PATCH 0052/1937] [flickr] Modernize --- youtube_dl/extractor/flickr.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/youtube_dl/extractor/flickr.py b/youtube_dl/extractor/flickr.py index 21ea5ec2b..e09982e88 100644 --- a/youtube_dl/extractor/flickr.py +++ b/youtube_dl/extractor/flickr.py @@ -10,13 +10,13 @@ from ..utils import ( class FlickrIE(InfoExtractor): - """Information Extractor for Flickr videos""" - _VALID_URL = r'(?:https?://)?(?:www\.|secure\.)?flickr\.com/photos/(?P<uploader_id>[\w\-_@]+)/(?P<id>\d+).*' + _VALID_URL = r'https?://(?:www\.|secure\.)?flickr\.com/photos/(?P<uploader_id>[\w\-_@]+)/(?P<id>\d+).*' _TEST = { 'url': 'http://www.flickr.com/photos/forestwander-nature-pictures/5645318632/in/photostream/', - 'file': '5645318632.mp4', 'md5': '6fdc01adbc89d72fc9c4f15b4a4ba87b', 'info_dict': { + 'id': '5645318632', + 'ext': 'mp4', "description": "Waterfalls in the Springtime at Dark Hollow Waterfalls. These are located just off of Skyline Drive in Virginia. They are only about 6/10 of a mile hike but it is a pretty steep hill and a good climb back up.", "uploader_id": "forestwander-nature-pictures", "title": "Dark Hollow Waterfalls" @@ -49,12 +49,12 @@ class FlickrIE(InfoExtractor): raise ExtractorError('Unable to extract video url') video_url = mobj.group(1) + unescapeHTML(mobj.group(2)) - return [{ - 'id': video_id, - 'url': video_url, - 'ext': 'mp4', - 'title': self._og_search_title(webpage), + return { + 'id': video_id, + 'url': video_url, + 'ext': 'mp4', + 'title': self._og_search_title(webpage), 'description': self._og_search_description(webpage), - 'thumbnail': self._og_search_thumbnail(webpage), + 'thumbnail': self._og_search_thumbnail(webpage), 'uploader_id': video_uploader_id, - }] + } From 3b2f933b01c30a8b3a6bd7fb8418b44167ca30c5 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Wed, 24 Sep 2014 11:05:14 +0200 Subject: [PATCH 0053/1937] [generic] Allow embedded YoutubePlaylists (Fixes #3821) --- youtube_dl/extractor/generic.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 40eeaad16..a3bfeb174 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -584,7 +584,9 @@ class GenericIE(InfoExtractor): # Helper method def _playlist_from_matches(matches, getter, ie=None): - urlrs = orderedSet(self.url_result(getter(m), ie) for m in matches) + urlrs = orderedSet( + self.url_result(self._proto_relative_url(getter(m)), ie) + for m in matches) return self.playlist_result( urlrs, playlist_id=video_id, playlist_title=video_title) @@ -633,7 +635,7 @@ class GenericIE(InfoExtractor): \1''', webpage) if matches: return _playlist_from_matches( - matches, lambda m: unescapeHTML(m[1]), ie='Youtube') + matches, lambda m: unescapeHTML(m[1])) # Look for embedded Dailymotion player matches = re.findall( From 2f771f6c99480684522f2ccdfac25d69c1470ea5 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Wed, 24 Sep 2014 11:06:46 +0200 Subject: [PATCH 0054/1937] release 2014.09.24 --- youtube_dl/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 2853c79c9..960fd59a3 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,2 +1,2 @@ -__version__ = '2014.09.22.1' +__version__ = '2014.09.24' From f0b5d6af74469d8216aebfe8079dbe1516188b89 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Wed, 24 Sep 2014 14:16:56 +0200 Subject: [PATCH 0055/1937] [vevo] Support 1080p videos (Fixes #3656) --- youtube_dl/downloader/__init__.py | 3 ++ youtube_dl/downloader/hls.py | 47 +++++++++++++++++++++++++++++++ youtube_dl/extractor/common.py | 16 +++++++++-- youtube_dl/extractor/vevo.py | 40 +++++++++++++++++++++++++- 4 files changed, 102 insertions(+), 4 deletions(-) diff --git a/youtube_dl/downloader/__init__.py b/youtube_dl/downloader/__init__.py index 4ea5811a5..3f941596e 100644 --- a/youtube_dl/downloader/__init__.py +++ b/youtube_dl/downloader/__init__.py @@ -2,6 +2,7 @@ from __future__ import unicode_literals from .common import FileDownloader from .hls import HlsFD +from .hls import NativeHlsFD from .http import HttpFD from .mplayer import MplayerFD from .rtmp import RtmpFD @@ -19,6 +20,8 @@ def get_suitable_downloader(info_dict): if url.startswith('rtmp'): return RtmpFD + if protocol == 'm3u8_native': + return NativeHlsFD if (protocol == 'm3u8') or (protocol is None and determine_ext(url) == 'm3u8'): return HlsFD if url.startswith('mms') or url.startswith('rtsp'): diff --git a/youtube_dl/downloader/hls.py b/youtube_dl/downloader/hls.py index 32852f333..8040bdf08 100644 --- a/youtube_dl/downloader/hls.py +++ b/youtube_dl/downloader/hls.py @@ -1,8 +1,12 @@ +from __future__ import unicode_literals + import os +import re import subprocess from .common import FileDownloader from ..utils import ( + compat_urlparse, check_executable, encodeFilename, ) @@ -43,3 +47,46 @@ class HlsFD(FileDownloader): self.to_stderr(u"\n") self.report_error(u'%s exited with code %d' % (program, retval)) return False + + +class NativeHlsFD(FileDownloader): + """ A more limited implementation that does not require ffmpeg """ + + def real_download(self, filename, info_dict): + url = info_dict['url'] + self.report_destination(filename) + tmpfilename = self.temp_name(filename) + + self.to_screen( + '[hlsnative] %s: Downloading m3u8 manifest' % info_dict['id']) + data = self.ydl.urlopen(url).read() + s = data.decode('utf-8', 'ignore') + segment_urls = [] + for line in s.splitlines(): + line = line.strip() + if line and not line.startswith('#'): + segment_url = ( + line + if re.match(r'^https?://', line) + else compat_urlparse.urljoin(url, line)) + segment_urls.append(segment_url) + + byte_counter = 0 + with open(tmpfilename, 'wb') as outf: + for i, segurl in enumerate(segment_urls): + segment = self.ydl.urlopen(segurl).read() + outf.write(segment) + byte_counter += len(segment) + self.to_screen( + '[hlsnative] %s: Downloading segment %d / %d' % + (info_dict['id'], i + 1, len(segment_urls))) + + self._hook_progress({ + 'downloaded_bytes': byte_counter, + 'total_bytes': byte_counter, + 'filename': filename, + 'status': 'finished', + }) + self.try_rename(tmpfilename, filename) + return True + diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 9c30a1d33..60cab6f4e 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -15,6 +15,7 @@ from ..utils import ( compat_http_client, compat_urllib_error, compat_urllib_parse_urlparse, + compat_urlparse, compat_str, clean_html, @@ -640,7 +641,9 @@ class InfoExtractor(object): return formats - def _extract_m3u8_formats(self, m3u8_url, video_id, ext=None): + def _extract_m3u8_formats(self, m3u8_url, video_id, ext=None, + entry_protocol='m3u8', preference=None): + formats = [{ 'format_id': 'm3u8-meta', 'url': m3u8_url, @@ -651,6 +654,11 @@ class InfoExtractor(object): 'format_note': 'Quality selection URL', }] + format_url = lambda u: ( + u + if re.match(r'^https?://', u) + else compat_urlparse.urljoin(m3u8_url, u)) + m3u8_doc = self._download_webpage(m3u8_url, video_id) last_info = None kv_rex = re.compile( @@ -667,15 +675,17 @@ class InfoExtractor(object): continue else: if last_info is None: - formats.append({'url': line}) + formats.append({'url': format_url(line)}) continue tbr = int_or_none(last_info.get('BANDWIDTH'), scale=1000) f = { 'format_id': 'm3u8-%d' % (tbr if tbr else len(formats)), - 'url': line.strip(), + 'url': format_url(line.strip()), 'tbr': tbr, 'ext': ext, + 'protocol': entry_protocol, + 'preference': preference, } codecs = last_info.get('CODECS') if codecs: diff --git a/youtube_dl/extractor/vevo.py b/youtube_dl/extractor/vevo.py index d2ffd1b6b..5e54a35d4 100644 --- a/youtube_dl/extractor/vevo.py +++ b/youtube_dl/extractor/vevo.py @@ -6,6 +6,7 @@ import xml.etree.ElementTree from .common import InfoExtractor from ..utils import ( compat_HTTPError, + compat_urllib_request, ExtractorError, ) @@ -69,6 +70,21 @@ class VevoIE(InfoExtractor): }] _SMIL_BASE_URL = 'http://smil.lvl3.vevo.com/' + def _real_initialize(self): + req = compat_urllib_request.Request( + 'http://www.vevo.com/auth', data=b'') + webpage = self._download_webpage( + req, None, + note='Retrieving oauth token', + errnote='Unable to retrieve oauth token', + fatal=False) + if webpage is False: + self._oauth_token = None + else: + self._oauth_token = self._search_regex( + r'access_token":\s*"([^"]+)"', + webpage, 'access token', fatal=False) + def _formats_from_json(self, video_info): last_version = {'version': -1} for version in video_info['videoVersions']: @@ -129,6 +145,26 @@ class VevoIE(InfoExtractor): }) return formats + def _download_api_formats(self, video_id): + if not self._oauth_token: + self._downloader.report_warning( + 'No oauth token available, skipping API HLS download') + return [] + + api_url = 'https://apiv2.vevo.com/video/%s/streams/hls?token=%s' % ( + video_id, self._oauth_token) + api_data = self._download_json( + api_url, video_id, + note='Downloading HLS formats', + errnote='Failed to download HLS format list', fatal=False) + if api_data is None: + return [] + + m3u8_url = api_data[0]['url'] + return self._extract_m3u8_formats( + m3u8_url, video_id, entry_protocol='m3u8_native', ext='mp4', + preference=0) + def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) video_id = mobj.group('id') @@ -152,6 +188,9 @@ class VevoIE(InfoExtractor): else: age_limit = None + # Download via HLS API + formats.extend(self._download_api_formats(video_id)) + # Download SMIL smil_blocks = sorted(( f for f in video_info['videoVersions'] @@ -166,7 +205,6 @@ class VevoIE(InfoExtractor): fatal=False) if smil_url_m is not None: smil_url = smil_url_m - try: smil_xml = self._download_webpage(smil_url, video_id, 'Downloading SMIL info') From eb73f2649f41e80063d6f2f3e4b6345eb90f9777 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Wed, 24 Sep 2014 14:17:33 +0200 Subject: [PATCH 0056/1937] [vevo] Skip SMIL download --- youtube_dl/extractor/vevo.py | 24 ------------------------ 1 file changed, 24 deletions(-) diff --git a/youtube_dl/extractor/vevo.py b/youtube_dl/extractor/vevo.py index 5e54a35d4..1edeece3f 100644 --- a/youtube_dl/extractor/vevo.py +++ b/youtube_dl/extractor/vevo.py @@ -191,30 +191,6 @@ class VevoIE(InfoExtractor): # Download via HLS API formats.extend(self._download_api_formats(video_id)) - # Download SMIL - smil_blocks = sorted(( - f for f in video_info['videoVersions'] - if f['sourceType'] == 13), - key=lambda f: f['version']) - - smil_url = '%s/Video/V2/VFILE/%s/%sr.smil' % ( - self._SMIL_BASE_URL, video_id, video_id.lower()) - if smil_blocks: - smil_url_m = self._search_regex( - r'url="([^"]+)"', smil_blocks[-1]['data'], 'SMIL URL', - fatal=False) - if smil_url_m is not None: - smil_url = smil_url_m - try: - smil_xml = self._download_webpage(smil_url, video_id, - 'Downloading SMIL info') - formats.extend(self._formats_from_smil(smil_xml)) - except ExtractorError as ee: - if not isinstance(ee.cause, compat_HTTPError): - raise - self._downloader.report_warning( - 'Cannot download SMIL information, falling back to JSON ..') - self._sort_formats(formats) timestamp_ms = int(self._search_regex( r'/Date\((\d+)\)/', video_info['launchDate'], 'launch date')) From 0b97f3a93690ea5449790acc1274df8900d141aa Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Wed, 24 Sep 2014 14:17:42 +0200 Subject: [PATCH 0057/1937] release 2014.09.24.1 --- youtube_dl/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 960fd59a3..ecbd578db 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,2 +1,2 @@ -__version__ = '2014.09.24' +__version__ = '2014.09.24.1' From b686fc18dacaa6994c646c171368b99e168b619a Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Wed, 24 Sep 2014 14:38:40 +0200 Subject: [PATCH 0058/1937] [hlsnative] Support test parameter --- youtube_dl/downloader/common.py | 1 + youtube_dl/downloader/hls.py | 18 +++++++++++++++--- youtube_dl/downloader/http.py | 2 -- youtube_dl/extractor/vevo.py | 4 ++-- 4 files changed, 18 insertions(+), 7 deletions(-) diff --git a/youtube_dl/downloader/common.py b/youtube_dl/downloader/common.py index 9ce97f5fe..f85f0c94e 100644 --- a/youtube_dl/downloader/common.py +++ b/youtube_dl/downloader/common.py @@ -42,6 +42,7 @@ class FileDownloader(object): Subclasses of this one must re-define the real_download method. """ + _TEST_FILE_SIZE = 10241 params = None def __init__(self, ydl, params): diff --git a/youtube_dl/downloader/hls.py b/youtube_dl/downloader/hls.py index 8040bdf08..56cce2811 100644 --- a/youtube_dl/downloader/hls.py +++ b/youtube_dl/downloader/hls.py @@ -7,6 +7,7 @@ import subprocess from .common import FileDownloader from ..utils import ( compat_urlparse, + compat_urllib_request, check_executable, encodeFilename, ) @@ -71,15 +72,26 @@ class NativeHlsFD(FileDownloader): else compat_urlparse.urljoin(url, line)) segment_urls.append(segment_url) + is_test = self.params.get('test', False) + remaining_bytes = self._TEST_FILE_SIZE if is_test else None byte_counter = 0 with open(tmpfilename, 'wb') as outf: for i, segurl in enumerate(segment_urls): - segment = self.ydl.urlopen(segurl).read() - outf.write(segment) - byte_counter += len(segment) self.to_screen( '[hlsnative] %s: Downloading segment %d / %d' % (info_dict['id'], i + 1, len(segment_urls))) + seg_req = compat_urllib_request.Request(segurl) + if remaining_bytes: + seg_req.add_header('Range', 'bytes=0-%d' % (remaining_bytes - 1)) + + segment = self.ydl.urlopen(seg_req).read() + if remaining_bytes: + segment = segment[:remaining_bytes] + remaining_bytes -= len(segment) + outf.write(segment) + byte_counter += len(segment) + if remaining_bytes <= 0: + break self._hook_progress({ 'downloaded_bytes': byte_counter, diff --git a/youtube_dl/downloader/http.py b/youtube_dl/downloader/http.py index 6caf7451e..f62555ce0 100644 --- a/youtube_dl/downloader/http.py +++ b/youtube_dl/downloader/http.py @@ -14,8 +14,6 @@ from ..utils import ( class HttpFD(FileDownloader): - _TEST_FILE_SIZE = 10241 - def real_download(self, filename, info_dict): url = info_dict['url'] tmpfilename = self.temp_name(filename) diff --git a/youtube_dl/extractor/vevo.py b/youtube_dl/extractor/vevo.py index 1edeece3f..ebab8b86c 100644 --- a/youtube_dl/extractor/vevo.py +++ b/youtube_dl/extractor/vevo.py @@ -25,7 +25,7 @@ class VevoIE(InfoExtractor): _TESTS = [{ 'url': 'http://www.vevo.com/watch/hurts/somebody-to-die-for/GB1101300280', - "md5": "06bea460acb744eab74a9d7dcb4bfd61", + "md5": "95ee28ee45e70130e3ab02b0f579ae23", 'info_dict': { 'id': 'GB1101300280', 'ext': 'mp4', @@ -41,7 +41,7 @@ class VevoIE(InfoExtractor): }, { 'note': 'v3 SMIL format', 'url': 'http://www.vevo.com/watch/cassadee-pope/i-wish-i-could-break-your-heart/USUV71302923', - 'md5': '893ec0e0d4426a1d96c01de8f2bdff58', + 'md5': 'f6ab09b034f8c22969020b042e5ac7fc', 'info_dict': { 'id': 'USUV71302923', 'ext': 'mp4', From 6b08cdf626afc71740e539a83ef570999df2c50b Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Thu, 25 Sep 2014 01:58:49 +0200 Subject: [PATCH 0059/1937] [youtube] Support for embedded /p players (Fixes #3821) --- youtube_dl/extractor/generic.py | 2 +- youtube_dl/extractor/youtube.py | 7 +++++++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index a3bfeb174..0dcbb39db 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -631,7 +631,7 @@ class GenericIE(InfoExtractor): ) (["\']) (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/ - (?:embed|v)/.+?) + (?:embed|v|p)/.+?) \1''', webpage) if matches: return _playlist_from_matches( diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index ae9564862..99198e380 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1068,6 +1068,13 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor): 'info_dict': { 'title': 'JODA15', } + }, { + 'note': 'Embedded SWF player', + 'url': 'http://www.youtube.com/p/YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ?hl=en_US&fs=1&rel=0', + 'playlist_count': 4, + 'info_dict': { + 'title': 'JODA7', + } }] def _real_initialize(self): From 4bbf157794084e1ca076b63c402bc5aab4a5ad0a Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Thu, 25 Sep 2014 01:59:45 +0200 Subject: [PATCH 0060/1937] release 2014.09.25 --- youtube_dl/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index ecbd578db..c17701d6a 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,2 +1,2 @@ -__version__ = '2014.09.24.1' +__version__ = '2014.09.25' From fec02bcc90ad26ac5bbd11173fa83db91b3858bb Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Thu, 25 Sep 2014 09:21:45 +0200 Subject: [PATCH 0061/1937] [hlsnative] Correct handling when remaining_bytes is None --- youtube_dl/downloader/hls.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/youtube_dl/downloader/hls.py b/youtube_dl/downloader/hls.py index 56cce2811..68eafa403 100644 --- a/youtube_dl/downloader/hls.py +++ b/youtube_dl/downloader/hls.py @@ -81,16 +81,16 @@ class NativeHlsFD(FileDownloader): '[hlsnative] %s: Downloading segment %d / %d' % (info_dict['id'], i + 1, len(segment_urls))) seg_req = compat_urllib_request.Request(segurl) - if remaining_bytes: + if remaining_bytes is not None: seg_req.add_header('Range', 'bytes=0-%d' % (remaining_bytes - 1)) segment = self.ydl.urlopen(seg_req).read() - if remaining_bytes: + if remaining_bytes is not None: segment = segment[:remaining_bytes] remaining_bytes -= len(segment) outf.write(segment) byte_counter += len(segment) - if remaining_bytes <= 0: + if remaining_bytes is not None and remaining_bytes <= 0: break self._hook_progress({ From 8a32b82e46b73680a9287336e455e6e38894bff3 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Thu, 25 Sep 2014 09:58:09 +0200 Subject: [PATCH 0062/1937] [youku] Modernize somewhat --- youtube_dl/extractor/youku.py | 89 ++++++++++++++++++----------------- 1 file changed, 45 insertions(+), 44 deletions(-) diff --git a/youtube_dl/extractor/youku.py b/youtube_dl/extractor/youku.py index a8fd40c83..07ed7cbd1 100644 --- a/youtube_dl/extractor/youku.py +++ b/youtube_dl/extractor/youku.py @@ -1,5 +1,7 @@ # coding: utf-8 +from __future__ import unicode_literals + import json import math import random @@ -13,18 +15,25 @@ from ..utils import ( class YoukuIE(InfoExtractor): - _VALID_URL = r'(?:(?:http://)?(?:v|player)\.youku\.com/(?:v_show/id_|player\.php/sid/)|youku:)(?P<ID>[A-Za-z0-9]+)(?:\.html|/v\.swf|)' - _TEST = { - u"url": u"http://v.youku.com/v_show/id_XNDgyMDQ2NTQw.html", - u"file": u"XNDgyMDQ2NTQw_part00.flv", - u"md5": u"ffe3f2e435663dc2d1eea34faeff5b5b", - u"params": {u"test": False}, - u"info_dict": { - u"title": u"youtube-dl test video \"'/\\ä↭𝕐" + _VALID_URL = r'''(?x) + (?: + http://(?:v|player)\.youku\.com/(?:v_show/id_|player\.php/sid/)| + youku:) + (?P<id>[A-Za-z0-9]+)(?:\.html|/v\.swf|) + ''' + _TEST = { + 'url': 'http://v.youku.com/v_show/id_XNDgyMDQ2NTQw.html', + 'md5': 'ffe3f2e435663dc2d1eea34faeff5b5b', + 'params': { + 'test': False + }, + 'info_dict': { + 'id': 'XNDgyMDQ2NTQw_part00', + 'ext': 'flv', + 'title': 'youtube-dl test video "\'/\\ä↭𝕐' } } - def _gen_sid(self): nowTime = int(time.time() * 1000) random1 = random.randint(1000,1998) @@ -55,49 +64,42 @@ class YoukuIE(InfoExtractor): def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) - if mobj is None: - raise ExtractorError(u'Invalid URL: %s' % url) - video_id = mobj.group('ID') + video_id = mobj.group('id') info_url = 'http://v.youku.com/player/getPlayList/VideoIDS/' + video_id - jsondata = self._download_webpage(info_url, video_id) + config = self._download_json(info_url, video_id) - self.report_extraction(video_id) - try: - config = json.loads(jsondata) - error_code = config['data'][0].get('error_code') - if error_code: - # -8 means blocked outside China. - error = config['data'][0].get('error') # Chinese and English, separated by newline. - raise ExtractorError(error or u'Server reported error %i' % error_code, - expected=True) + error_code = config['data'][0].get('error_code') + if error_code: + # -8 means blocked outside China. + error = config['data'][0].get('error') # Chinese and English, separated by newline. + raise ExtractorError(error or 'Server reported error %i' % error_code, + expected=True) - video_title = config['data'][0]['title'] - seed = config['data'][0]['seed'] + video_title = config['data'][0]['title'] + seed = config['data'][0]['seed'] - format = self._downloader.params.get('format', None) - supported_format = list(config['data'][0]['streamfileids'].keys()) + format = self._downloader.params.get('format', None) + supported_format = list(config['data'][0]['streamfileids'].keys()) - if format is None or format == 'best': - if 'hd2' in supported_format: - format = 'hd2' - else: - format = 'flv' - ext = u'flv' - elif format == 'worst': - format = 'mp4' - ext = u'mp4' + # TODO proper format selection + if format is None or format == 'best': + if 'hd2' in supported_format: + format = 'hd2' else: format = 'flv' - ext = u'flv' + ext = 'flv' + elif format == 'worst': + format = 'mp4' + ext = 'mp4' + else: + format = 'flv' + ext = 'flv' - - fileid = config['data'][0]['streamfileids'][format] - keys = [s['k'] for s in config['data'][0]['segs'][format]] - # segs is usually a dictionary, but an empty *list* if an error occured. - except (UnicodeDecodeError, ValueError, KeyError): - raise ExtractorError(u'Unable to extract info section') + fileid = config['data'][0]['streamfileids'][format] + keys = [s['k'] for s in config['data'][0]['segs'][format]] + # segs is usually a dictionary, but an empty *list* if an error occured. files_info=[] sid = self._gen_sid() @@ -106,9 +108,8 @@ class YoukuIE(InfoExtractor): #column 8,9 of fileid represent the segment number #fileid[7:9] should be changed for index, key in enumerate(keys): - temp_fileid = '%s%02X%s' % (fileid[0:8], index, fileid[10:]) - download_url = 'http://f.youku.com/player/getFlvPath/sid/%s_%02X/st/flv/fileid/%s?k=%s' % (sid, index, temp_fileid, key) + download_url = 'http://k.youku.com/player/getFlvPath/sid/%s_%02X/st/flv/fileid/%s?k=%s' % (sid, index, temp_fileid, key) info = { 'id': '%s_part%02d' % (video_id, index), From 54e9a4af951f26edd7719f1a1b56e0a92d2791ec Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 25 Sep 2014 20:33:11 +0700 Subject: [PATCH 0063/1937] [wat] Skip test --- youtube_dl/extractor/wat.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/wat.py b/youtube_dl/extractor/wat.py index 268e2f618..bf9e40bad 100644 --- a/youtube_dl/extractor/wat.py +++ b/youtube_dl/extractor/wat.py @@ -40,6 +40,7 @@ class WatIE(InfoExtractor): 'upload_date': '20140816', 'duration': 2910, }, + 'skip': "Ce contenu n'est pas disponible pour l'instant.", }, ] From fbd3162e4918a2e1321ebdcec47ac84a8b121fbe Mon Sep 17 00:00:00 2001 From: Sergey M <dstftw@gmail.com> Date: Thu, 25 Sep 2014 20:48:54 +0700 Subject: [PATCH 0064/1937] [vube] Add DMCA notice --- youtube_dl/extractor/vube.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/youtube_dl/extractor/vube.py b/youtube_dl/extractor/vube.py index 2544c24bd..bcca4897a 100644 --- a/youtube_dl/extractor/vube.py +++ b/youtube_dl/extractor/vube.py @@ -6,6 +6,7 @@ from .common import InfoExtractor from ..utils import ( int_or_none, compat_str, + ExtractorError, ) @@ -102,6 +103,11 @@ class VubeIE(InfoExtractor): self._sort_formats(formats) + if not formats and video.get('vst') == 'dmca': + raise ExtractorError( + 'This video has been removed in response to a complaint received under the US Digital Millennium Copyright Act.', + expected=True) + title = video['title'] description = video.get('description') thumbnail = self._proto_relative_url(video.get('thumbnail_src'), scheme='http:') From 9a0d98bb401a809eaed68623a8534b3874d079e8 Mon Sep 17 00:00:00 2001 From: Sergey M <dstftw@gmail.com> Date: Thu, 25 Sep 2014 20:57:18 +0700 Subject: [PATCH 0065/1937] [vube] Update tests --- youtube_dl/extractor/vube.py | 27 ++++++++++++++++++++++++--- 1 file changed, 24 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/vube.py b/youtube_dl/extractor/vube.py index bcca4897a..1b2f731e9 100644 --- a/youtube_dl/extractor/vube.py +++ b/youtube_dl/extractor/vube.py @@ -17,6 +17,24 @@ class VubeIE(InfoExtractor): _TESTS = [ { + 'url': 'http://vube.com/trending/William+Wei/Y8NUZ69Tf7?t=s', + 'md5': 'e7aabe1f8f1aa826b9e4735e1f9cee42', + 'info_dict': { + 'id': 'Y8NUZ69Tf7', + 'ext': 'mp4', + 'title': 'Best Drummer Ever [HD]', + 'description': 'md5:2d63c4b277b85c2277761c2cf7337d71', + 'thumbnail': 're:^https?://.*\.jpg', + 'uploader': 'William', + 'timestamp': 1406876915, + 'upload_date': '20140801', + 'duration': 258.051, + 'like_count': int, + 'dislike_count': int, + 'comment_count': int, + 'categories': ['amazing', 'hd', 'best drummer ever', 'william wei', 'bucket drumming', 'street drummer', 'epic street drumming'], + }, + }, { 'url': 'http://vube.com/Chiara+Grispo+Video+Channel/YL2qNPkqon', 'md5': 'db7aba89d4603dadd627e9d1973946fe', 'info_dict': { @@ -33,7 +51,8 @@ class VubeIE(InfoExtractor): 'dislike_count': int, 'comment_count': int, 'categories': ['pop', 'music', 'cover', 'singing', 'jessie j', 'price tag', 'chiara grispo'], - } + }, + 'skip': 'Removed due to DMCA', }, { 'url': 'http://vube.com/SerainaMusic/my-7-year-old-sister-and-i-singing-alive-by-krewella/UeBhTudbfS?t=s&n=1', @@ -52,7 +71,8 @@ class VubeIE(InfoExtractor): 'dislike_count': int, 'comment_count': int, 'categories': ['seraina', 'jessica', 'krewella', 'alive'], - } + }, + 'skip': 'Removed due to DMCA', }, { 'url': 'http://vube.com/vote/Siren+Gene/0nmsMY5vEq?n=2&t=s', 'md5': '0584fc13b50f887127d9d1007589d27f', @@ -70,7 +90,8 @@ class VubeIE(InfoExtractor): 'dislike_count': int, 'comment_count': int, 'categories': ['let it go', 'cover', 'idina menzel', 'frozen', 'singing', 'disney', 'siren gene'], - } + }, + 'skip': 'Removed due to DMCA', } ] From 6a5af6acb9131d702b0d206242053b202440dbb9 Mon Sep 17 00:00:00 2001 From: Mats <d912e3@gmail.com> Date: Thu, 25 Sep 2014 16:25:53 +0200 Subject: [PATCH 0066/1937] [golem] Add new extractor --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/golem.py | 131 +++++++++++++++++++++++++++++++ 2 files changed, 132 insertions(+) create mode 100644 youtube_dl/extractor/golem.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 1f1fc0eb2..71fe38ca0 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -135,6 +135,7 @@ from .gametrailers import GametrailersIE from .gdcvault import GDCVaultIE from .generic import GenericIE from .godtube import GodTubeIE +from .golem import GolemIE from .googleplus import GooglePlusIE from .googlesearch import GoogleSearchIE from .gorillavid import GorillaVidIE diff --git a/youtube_dl/extractor/golem.py b/youtube_dl/extractor/golem.py new file mode 100644 index 000000000..afb620b1c --- /dev/null +++ b/youtube_dl/extractor/golem.py @@ -0,0 +1,131 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import compat_urlparse + + +class GolemIE(InfoExtractor): + _VALID_URL = r'^https?://video\.golem\.de/.+?/(?P<id>.+?)/' + _TEST = { + 'url': 'http://video.golem.de/handy/14095/iphone-6-und-6-plus-test.html', + 'md5': 'c1a2c0a3c863319651c7c992c5ee29bf', + 'info_dict': { + 'id': '14095', + 'format_id': 'high', + 'ext': 'mp4', + 'title': 'iPhone 6 und 6 Plus - Test', + 'duration': 300, + 'filesize': 65309548, + } + } + + _CONFIG = 'https://video.golem.de/xml/{}.xml' + _PREFIX = 'http://video.golem.de' + + def _warn(self, fmt, *args): + self.report_warning(fmt.format(*args), self._id) + + def _extract_format(self, elem): + format_id = elem.tag + + url = elem.findtext('./url') + if url == '': + self._warn("{}: url: empty, skipping", format_id) + return None + + fmt = { + 'format_id': format_id, + 'url': compat_urlparse.urljoin(self._PREFIX, url) + } + + try: + _, ext = elem.findtext('./filename', '').rsplit('.', 1) + except ValueError: + self._warn('{}: ext: missing extension', format_id) + else: + fmt['ext'] = ext + + filesize = elem.findtext('./filesize') + if filesize is not None: + try: + fmt['filesize'] = int(filesize) + except ValueError as e: + self._warn('{}: filesize: {}', format_id, e) + + width = elem.get('width') + if width is not None: + try: + fmt['width'] = int(width) + except ValueError as e: + self._warn('{}: width: {}', format_id, e) + + height = elem.get('height') + if height is not None: + try: + fmt['height'] = int(height) + except ValueError as e: + self._warn('{}: height: {}', format_id, e) + + return fmt + + def _extract_thumbnail(self, elem): + url = elem.findtext('./url') + if url == '': + return None + thumb = { + 'url': compat_urlparse.urljoin(self._PREFIX, url) + } + + width = elem.get('width') + if width is not None: + try: + thumb['width'] = int(width) + except ValueError as e: + self._warn('thumbnail: width: {}', e) + + height = elem.get('height') + if height is not None: + try: + thumb['height'] = int(height) + except ValueError as e: + self._warn('thumbnail: height: {}', e) + + return thumb + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + self._id = mobj.group('id') + + config = self._download_xml(self._CONFIG.format(self._id), self._id) + + info = { + 'id': self._id, + 'title': config.findtext('./title', 'golem') + } + + formats = [] + for e in config.findall('./*[url]'): + fmt = self._extract_format(e) + if fmt is not None: + formats.append(fmt) + self._sort_formats(formats) + info['formats'] = formats + + thumbnails = [] + for e in config.findall('.//teaser[url]'): + thumb = self._extract_thumbnail(e) + if thumb is not None: + thumbnails.append(thumb) + info['thumbnails'] = thumbnails + + playtime = config.findtext('./playtime') + if playtime is not None: + try: + info['duration'] = round(float(playtime)) + except ValueError as e: + self._warn('duration: {}', e) + + return info From 11b3ce85097430e1d26ddff0f51aa895c9d5af43 Mon Sep 17 00:00:00 2001 From: Naglis Jonaitis <njonaitis@gmail.com> Date: Thu, 25 Sep 2014 17:57:38 +0300 Subject: [PATCH 0067/1937] [crunchyroll] Allow to list subtitles (fixes #3805) --- youtube_dl/extractor/crunchyroll.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/crunchyroll.py b/youtube_dl/extractor/crunchyroll.py index 4903764f7..f99888ecc 100644 --- a/youtube_dl/extractor/crunchyroll.py +++ b/youtube_dl/extractor/crunchyroll.py @@ -9,7 +9,7 @@ import xml.etree.ElementTree from hashlib import sha1 from math import pow, sqrt, floor -from .common import InfoExtractor +from .subtitles import SubtitlesInfoExtractor from ..utils import ( ExtractorError, compat_urllib_parse, @@ -26,7 +26,7 @@ from ..aes import ( ) -class CrunchyrollIE(InfoExtractor): +class CrunchyrollIE(SubtitlesInfoExtractor): _VALID_URL = r'https?://(?:(?P<prefix>www|m)\.)?(?P<url>crunchyroll\.com/(?:[^/]*/[^/?&]*?|media/\?id=)(?P<video_id>[0-9]+))(?:[/?&]|$)' _TEST = { 'url': 'http://www.crunchyroll.com/wanna-be-the-strongest-in-the-world/episode-1-an-idol-wrestler-is-born-645513', @@ -271,6 +271,10 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text else: subtitles[lang_code] = self._convert_subtitles_to_srt(subtitle) + if self._downloader.params.get('listsubtitles', False): + self._list_available_subtitles(video_id, subtitles) + return + return { 'id': video_id, 'title': video_title, From 8e6f8051f084f445015140f1f88ac770f3c0f43d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Sat, 27 Sep 2014 10:53:02 +0200 Subject: [PATCH 0068/1937] [vbox7] Don't set the extension to 'flv' (fixes #3836) --- youtube_dl/extractor/vbox7.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/youtube_dl/extractor/vbox7.py b/youtube_dl/extractor/vbox7.py index df115d251..ebd64f0f5 100644 --- a/youtube_dl/extractor/vbox7.py +++ b/youtube_dl/extractor/vbox7.py @@ -19,7 +19,7 @@ class Vbox7IE(InfoExtractor): 'md5': '99f65c0c9ef9b682b97313e052734c3f', 'info_dict': { 'id': '249bb972c2', - 'ext': 'flv', + 'ext': 'mp4', 'title': 'Смях! Чудо - чист за секунди - Скрита камера', }, } @@ -50,7 +50,6 @@ class Vbox7IE(InfoExtractor): return { 'id': video_id, 'url': final_url, - 'ext': 'flv', 'title': title, 'thumbnail': thumbnail_url, } From 497339fa0e633c8b1dcebf3f70670f6d96ee2d62 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 27 Sep 2014 22:29:27 +0700 Subject: [PATCH 0069/1937] [anysex] Fix extraction --- youtube_dl/extractor/anysex.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/anysex.py b/youtube_dl/extractor/anysex.py index bc64423a3..ad86d6e58 100644 --- a/youtube_dl/extractor/anysex.py +++ b/youtube_dl/extractor/anysex.py @@ -35,7 +35,7 @@ class AnySexIE(InfoExtractor): title = self._html_search_regex(r'<title>(.*?)', webpage, 'title') description = self._html_search_regex( - r'
([^<]+)
', webpage, 'description', fatal=False) + r'
]*>([^<]+)
', webpage, 'description', fatal=False) thumbnail = self._html_search_regex( r'preview_url\s*:\s*\'(.*?)\'', webpage, 'thumbnail', fatal=False) @@ -43,7 +43,7 @@ class AnySexIE(InfoExtractor): r'([^<]+)', webpage) duration = parse_duration(self._search_regex( - r'Duration: (\d+:\d+)', webpage, 'duration', fatal=False)) + r'Duration: (?:)?(\d+:\d+)', webpage, 'duration', fatal=False)) view_count = int_or_none(self._html_search_regex( r'Views: (\d+)', webpage, 'view count', fatal=False)) From 2f9e8776df664e21aee18b05c468a56b03fe4417 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 27 Sep 2014 22:36:53 +0700 Subject: [PATCH 0070/1937] [extremetube] Fix extraction --- youtube_dl/extractor/extremetube.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/extremetube.py b/youtube_dl/extractor/extremetube.py index 14a196ffc..aacbf1414 100644 --- a/youtube_dl/extractor/extremetube.py +++ b/youtube_dl/extractor/extremetube.py @@ -7,6 +7,7 @@ from ..utils import ( compat_urllib_parse_urlparse, compat_urllib_request, compat_urllib_parse, + str_to_int, ) @@ -20,6 +21,7 @@ class ExtremeTubeIE(InfoExtractor): 'ext': 'mp4', 'title': 'Music Video 14 british euro brit european cumshots swallow', 'uploader': 'unknown', + 'view_count': int, 'age_limit': 18, } }, { @@ -39,8 +41,12 @@ class ExtremeTubeIE(InfoExtractor): video_title = self._html_search_regex( r'

]*?title="([^"]+)"[^>]*>', webpage, 'title') uploader = self._html_search_regex( - r'>Posted by:(?=<)(?:\s|<[^>]*>)*(.+?)\|', webpage, 'uploader', - fatal=False) + r'Uploaded by:\s*\s*(.+?)\s*', + webpage, 'uploader', fatal=False) + view_count = str_to_int(self._html_search_regex( + r'Views:\s*\s*([\d,\.]+)', + webpage, 'view count', fatal=False)) + video_url = compat_urllib_parse.unquote(self._html_search_regex( r'video_url=(.+?)&', webpage, 'video_url')) path = compat_urllib_parse_urlparse(video_url).path @@ -51,6 +57,7 @@ class ExtremeTubeIE(InfoExtractor): 'id': video_id, 'title': video_title, 'uploader': uploader, + 'view_count': view_count, 'url': video_url, 'format': format, 'format_id': format, From 2a1325fdde5c88fc052710b3c42fcc0d73153901 Mon Sep 17 00:00:00 2001 From: net Date: Sat, 27 Sep 2014 20:11:22 +0300 Subject: [PATCH 0071/1937] [ynet] Add new extractor --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/ynet.py | 47 ++++++++++++++++++++++++++++++++ 2 files changed, 48 insertions(+) create mode 100644 youtube_dl/extractor/ynet.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 1f1fc0eb2..944e356ae 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -450,6 +450,7 @@ from .yahoo import ( YahooNewsIE, YahooSearchIE, ) +from .ynet import YnetIE from .youjizz import YouJizzIE from .youku import YoukuIE from .youporn import YouPornIE diff --git a/youtube_dl/extractor/ynet.py b/youtube_dl/extractor/ynet.py new file mode 100644 index 000000000..94d253679 --- /dev/null +++ b/youtube_dl/extractor/ynet.py @@ -0,0 +1,47 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re +import base64 +import json + +from .common import InfoExtractor +from youtube_dl.utils import compat_urllib_parse_urlparse, compat_urllib_parse + +class YnetIE(InfoExtractor): + _VALID_URL = r'http://.*ynet\.co\.il/.*/0,7340,(?PL(-[0-9]+)+),00\.html' + _TEST = { + 'url': 'http://hot.ynet.co.il/home/0,7340,L-11659-99244,00.html', + 'info_dict': { + 'id': 'L-11659-99244', + 'ext': 'flv', + 'title': 'md5:3dba12d2837ee2ad9652cc64af652b16', + 'thumbnail': 'http://hot.ynet.co.il/PicServer4/2014/09/23/5606015/AMERICAN_COMMUNE1_T.jpg', + } + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + + id = mobj.group('id') + + webpage = self._download_webpage(url, id) + + content = compat_urllib_parse.unquote_plus(self._og_search_video_url(webpage).decode('utf-8')) + + player_url = re.match('(http.*\.swf)\?' ,content).group(1) + + config = json.loads(re.match('.*config\=(.*)' ,content).group(1)) + + f4m_url = config['clip']['url'] + + title = re.sub(': Video$', '', self._og_search_title(webpage)) + + return { + 'id': id, + 'title': title, + 'formats': self._extract_f4m_formats(f4m_url, id), + 'thumbnail': self._og_search_thumbnail(webpage), + 'player_url': player_url, + } + From b66745288e50cff42ff711e63242b5d97e80cd4f Mon Sep 17 00:00:00 2001 From: net Date: Sat, 27 Sep 2014 20:21:46 +0300 Subject: [PATCH 0072/1937] [sport5] Add new extractor --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/sport5.py | 70 ++++++++++++++++++++++++++++++++ 2 files changed, 71 insertions(+) create mode 100644 youtube_dl/extractor/sport5.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 1f1fc0eb2..c3a4d3c9a 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -340,6 +340,7 @@ from .spiegel import SpiegelIE, SpiegelArticleIE from .spiegeltv import SpiegeltvIE from .spike import SpikeIE from .sportdeutschland import SportDeutschlandIE +from .sport5 import Sport5IE from .stanfordoc import StanfordOpenClassroomIE from .steam import SteamIE from .streamcloud import StreamcloudIE diff --git a/youtube_dl/extractor/sport5.py b/youtube_dl/extractor/sport5.py new file mode 100644 index 000000000..9a4e39a43 --- /dev/null +++ b/youtube_dl/extractor/sport5.py @@ -0,0 +1,70 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from youtube_dl.utils import compat_str, compat_urlretrieve + + + +class Sport5IE(InfoExtractor): + _VALID_URL = r'http://.*sport5\.co\.il' + _TESTS = [{ + 'url': 'http://vod.sport5.co.il/?Vc=147&Vi=176331&Page=1', + 'info_dict': { + 'id': 's5-Y59xx1-GUh2', + 'ext': 'mp4', + 'title': 'md5:4a2a5eba7e7dc88fdc446cbca8a41c79', + } + }, { + 'url': 'http://www.sport5.co.il/articles.aspx?FolderID=3075&docID=176372&lang=HE', + 'info_dict': { + 'id': 's5-SiXxx1-hKh2', + 'ext': 'mp4', + 'title': 'md5:5cb1c6bfc0f16086e59f6683013f8e02', + } + } + ] + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + + webpage = self._download_webpage(url, '') + + media_id = self._html_search_regex('clipId=(s5-\w+-\w+)', webpage, 'media id') + + xml = self._download_xml( + 'http://sport5-metadata-rr-d.nsacdn.com/vod/vod/%s/HDS/metadata.xml' % media_id, + media_id, 'Downloading media XML') + + title = xml.find('./Title').text + duration = xml.find('./Duration').text + description = xml.find('./Description').text + thumbnail = xml.find('./PosterLinks/PosterIMG').text + player_url = xml.find('./PlaybackLinks/PlayerUrl').text + file_els = xml.findall('./PlaybackLinks/FileURL') + + formats = [] + + for file_el in file_els: + bitrate = file_el.attrib.get('bitrate') + width = int(file_el.attrib.get('width')) + height = int(file_el.attrib.get('height')) + formats.append({ + 'url': compat_str(file_el.text), + 'ext': 'mp4', + 'height': height, + 'width': width + }) + + self._sort_formats(formats) + + return { + 'id': media_id, + 'title': title, + 'thumbnail': thumbnail, + 'duration': duration, + 'formats': formats, + 'player_url': player_url, + } \ No newline at end of file From 0155549d6cec6f49279ebe4a5a73cf6dcc6716fe Mon Sep 17 00:00:00 2001 From: Mats Date: Sat, 27 Sep 2014 19:28:01 +0200 Subject: [PATCH 0073/1937] [heise] Add new extractor --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/heise.py | 120 +++++++++++++++++++++++++++++++ 2 files changed, 121 insertions(+) create mode 100644 youtube_dl/extractor/heise.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 1f1fc0eb2..d0417a1f2 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -141,6 +141,7 @@ from .gorillavid import GorillaVidIE from .goshgay import GoshgayIE from .grooveshark import GroovesharkIE from .hark import HarkIE +from .heise import HeiseIE from .helsinki import HelsinkiIE from .hentaistigma import HentaiStigmaIE from .hornbunny import HornBunnyIE diff --git a/youtube_dl/extractor/heise.py b/youtube_dl/extractor/heise.py new file mode 100644 index 000000000..b3cb10fde --- /dev/null +++ b/youtube_dl/extractor/heise.py @@ -0,0 +1,120 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + ExtractorError, + compat_urllib_parse, + get_meta_content, + parse_iso8601, +) + + +class HeiseIE(InfoExtractor): + _VALID_URL = ( + r'^https?://(?:www\.)?heise\.de/video/artikel/' + + r'.+?(?P[0-9]+)\.html$' + ) + _TEST = { + 'url': ( + 'http://www.heise.de/video/artikel/Podcast-c-t-uplink-3-3-' + + 'Owncloud-Tastaturen-Peilsender-Smartphone-2404147.html' + ), + 'md5': 'ffed432483e922e88545ad9f2f15d30e', + 'info_dict': { + 'id': '2404147', + 'ext': 'mp4', + 'title': ( + "Podcast: c't uplink 3.3 – Owncloud / Tastaturen / " + + "Peilsender Smartphone" + ), + 'format_id': 'mp4_720', + 'timestamp': 1411812600, + 'upload_date': '20140927', + } + } + + _CONFIG = ( + r'".+?\?sequenz=(?P.+?)&container=(?P.+?)' + + r'(?:&hd=(?P.+?))?(?:&signature=(?P.+?))?&callback=\?"' + ) + _PREFIX = 'http://www.heise.de/videout/info?' + + def _warn(self, fmt, *args): + self.report_warning(fmt.format(*args), self._id) + + def _parse_config_url(self, html): + m = re.search(self._CONFIG, html) + if not m: + raise ExtractorError('No config found') + + qs = compat_urllib_parse.urlencode(dict((k, v) for k, v + in m.groupdict().items() + if v is not None)) + return self._PREFIX + qs + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + self._id = mobj.group('id') + + html = self._download_webpage(url, self._id) + config = self._download_json(self._parse_config_url(html), self._id) + + info = { + 'id': self._id + } + + title = get_meta_content('fulltitle', html) + if title: + info['title'] = title + elif config.get('title'): + info['title'] = config['title'] + else: + self._warn('title: not found') + info['title'] = 'heise' + + if (not config.get('formats') or + not hasattr(config['formats'], 'items')): + raise ExtractorError('No formats found') + + formats = [] + for t, rs in config['formats'].items(): + if not rs or not hasattr(rs, 'items'): + self._warn('formats: {0}: no resolutions', t) + continue + + for res, obj in rs.items(): + format_id = '{0}_{1}'.format(t, res) + + if (not obj or not obj.get('url') or + not isinstance(obj['url'], str)): + self._warn('formats: {0}: no url', format_id) + continue + + fmt = { + 'url': obj['url'], + 'format_id': format_id + } + try: + fmt['height'] = int(res) + except ValueError as e: + self._warn('formats: {0}: height: {1}', t, e) + + formats.append(fmt) + + self._sort_formats(formats) + info['formats'] = formats + + if config.get('poster') and isinstance(config['poster'], str): + info['thumbnail'] = config['poster'] + + date = get_meta_content('date', html) + if date and isinstance(date, str): + try: + info['timestamp'] = parse_iso8601(date) + except ValueError as e: + self._warn('timestamp: {0}', e) + + return info From 70752ccefd2dcb54d131644aea38c324c81ff168 Mon Sep 17 00:00:00 2001 From: Mats Date: Sat, 27 Sep 2014 19:35:55 +0200 Subject: [PATCH 0074/1937] [golem] Don't omit positional argument specifiers Required by Python 2.6. --- youtube_dl/extractor/golem.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/youtube_dl/extractor/golem.py b/youtube_dl/extractor/golem.py index afb620b1c..6a64b5d95 100644 --- a/youtube_dl/extractor/golem.py +++ b/youtube_dl/extractor/golem.py @@ -22,7 +22,7 @@ class GolemIE(InfoExtractor): } } - _CONFIG = 'https://video.golem.de/xml/{}.xml' + _CONFIG = 'https://video.golem.de/xml/{0}.xml' _PREFIX = 'http://video.golem.de' def _warn(self, fmt, *args): @@ -33,7 +33,7 @@ class GolemIE(InfoExtractor): url = elem.findtext('./url') if url == '': - self._warn("{}: url: empty, skipping", format_id) + self._warn("{0}: url: empty, skipping", format_id) return None fmt = { @@ -44,7 +44,7 @@ class GolemIE(InfoExtractor): try: _, ext = elem.findtext('./filename', '').rsplit('.', 1) except ValueError: - self._warn('{}: ext: missing extension', format_id) + self._warn('{0}: ext: missing extension', format_id) else: fmt['ext'] = ext @@ -53,21 +53,21 @@ class GolemIE(InfoExtractor): try: fmt['filesize'] = int(filesize) except ValueError as e: - self._warn('{}: filesize: {}', format_id, e) + self._warn('{0}: filesize: {1}', format_id, e) width = elem.get('width') if width is not None: try: fmt['width'] = int(width) except ValueError as e: - self._warn('{}: width: {}', format_id, e) + self._warn('{0}: width: {1}', format_id, e) height = elem.get('height') if height is not None: try: fmt['height'] = int(height) except ValueError as e: - self._warn('{}: height: {}', format_id, e) + self._warn('{0}: height: {1}', format_id, e) return fmt @@ -84,14 +84,14 @@ class GolemIE(InfoExtractor): try: thumb['width'] = int(width) except ValueError as e: - self._warn('thumbnail: width: {}', e) + self._warn('thumbnail: width: {0}', e) height = elem.get('height') if height is not None: try: thumb['height'] = int(height) except ValueError as e: - self._warn('thumbnail: height: {}', e) + self._warn('thumbnail: height: {0}', e) return thumb @@ -126,6 +126,6 @@ class GolemIE(InfoExtractor): try: info['duration'] = round(float(playtime)) except ValueError as e: - self._warn('duration: {}', e) + self._warn('duration: {0}', e) return info From 68b09730461de20395cee9427dc469fa9edc4022 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 28 Sep 2014 02:07:42 +0700 Subject: [PATCH 0075/1937] [YoutubeDL] Expect all kind of strings in urlopen Now it doesn't fail if req is python2's str --- youtube_dl/YoutubeDL.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index a1713dc5a..b485dbdf1 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -1250,12 +1250,13 @@ class YoutubeDL(object): # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991) # To work around aforementioned issue we will replace request's original URL with # percent-encoded one - url = req if isinstance(req, compat_str) else req.get_full_url() + req_is_string = isinstance(req, basestring) + url = req if req_is_string else req.get_full_url() url_escaped = escape_url(url) # Substitute URL if any change after escaping if url != url_escaped: - if isinstance(req, compat_str): + if req_is_string: req = url_escaped else: req = compat_urllib_request.Request( From 7b7518124ee433484b485502671e011017bc1897 Mon Sep 17 00:00:00 2001 From: Mats Date: Sat, 27 Sep 2014 21:12:23 +0200 Subject: [PATCH 0076/1937] [heise] Don't check string type Before Python 3 could be unicode, so don't check at all. --- youtube_dl/extractor/heise.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/heise.py b/youtube_dl/extractor/heise.py index b3cb10fde..73c953181 100644 --- a/youtube_dl/extractor/heise.py +++ b/youtube_dl/extractor/heise.py @@ -88,8 +88,7 @@ class HeiseIE(InfoExtractor): for res, obj in rs.items(): format_id = '{0}_{1}'.format(t, res) - if (not obj or not obj.get('url') or - not isinstance(obj['url'], str)): + if not obj or not obj.get('url'): self._warn('formats: {0}: no url', format_id) continue @@ -107,11 +106,11 @@ class HeiseIE(InfoExtractor): self._sort_formats(formats) info['formats'] = formats - if config.get('poster') and isinstance(config['poster'], str): + if config.get('poster'): info['thumbnail'] = config['poster'] date = get_meta_content('date', html) - if date and isinstance(date, str): + if date: try: info['timestamp'] = parse_iso8601(date) except ValueError as e: From 0b75c2a88ba56a84322db6cc1a298d7e52b44b2a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 28 Sep 2014 02:31:14 +0700 Subject: [PATCH 0077/1937] [sport5] Capture error message and improve --- youtube_dl/extractor/sport5.py | 88 +++++++++++++++++++++------------- 1 file changed, 55 insertions(+), 33 deletions(-) diff --git a/youtube_dl/extractor/sport5.py b/youtube_dl/extractor/sport5.py index 9a4e39a43..3f680bfc6 100644 --- a/youtube_dl/extractor/sport5.py +++ b/youtube_dl/extractor/sport5.py @@ -4,67 +4,89 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from youtube_dl.utils import compat_str, compat_urlretrieve - +from ..utils import ExtractorError class Sport5IE(InfoExtractor): - _VALID_URL = r'http://.*sport5\.co\.il' - _TESTS = [{ + _VALID_URL = r'http://(?:www|vod)?\.sport5\.co\.il/.*\b(?:Vi|docID)=(?P\d+)' + _TESTS = [ + { 'url': 'http://vod.sport5.co.il/?Vc=147&Vi=176331&Page=1', 'info_dict': { 'id': 's5-Y59xx1-GUh2', 'ext': 'mp4', - 'title': 'md5:4a2a5eba7e7dc88fdc446cbca8a41c79', - } + 'title': 'ולנסיה-קורדובה 0:3', + 'description': 'אלקאסר, גאייה ופגולי סידרו לקבוצה של נונו ניצחון על קורדובה ואת המקום הראשון בליגה', + 'duration': 228, + 'categories': list, + }, + 'skip': 'Blocked outside of Israel', }, { 'url': 'http://www.sport5.co.il/articles.aspx?FolderID=3075&docID=176372&lang=HE', 'info_dict': { 'id': 's5-SiXxx1-hKh2', 'ext': 'mp4', - 'title': 'md5:5cb1c6bfc0f16086e59f6683013f8e02', - } + 'title': 'GOALS_CELTIC_270914.mp4', + 'description': '', + 'duration': 87, + 'categories': list, + }, + 'skip': 'Blocked outside of Israel', } ] def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) + media_id = mobj.group('id') - webpage = self._download_webpage(url, '') + webpage = self._download_webpage(url, media_id) - media_id = self._html_search_regex('clipId=(s5-\w+-\w+)', webpage, 'media id') + video_id = self._html_search_regex('clipId=([\w-]+)', webpage, 'video id') - xml = self._download_xml( - 'http://sport5-metadata-rr-d.nsacdn.com/vod/vod/%s/HDS/metadata.xml' % media_id, - media_id, 'Downloading media XML') + metadata = self._download_xml( + 'http://sport5-metadata-rr-d.nsacdn.com/vod/vod/%s/HDS/metadata.xml' % video_id, + video_id) - title = xml.find('./Title').text - duration = xml.find('./Duration').text - description = xml.find('./Description').text - thumbnail = xml.find('./PosterLinks/PosterIMG').text - player_url = xml.find('./PlaybackLinks/PlayerUrl').text - file_els = xml.findall('./PlaybackLinks/FileURL') + error = metadata.find('./Error') + if error is not None: + raise ExtractorError( + '%s returned error: %s - %s' % ( + self.IE_NAME, + error.find('./Name').text, + error.find('./Description').text), + expected=True) - formats = [] + title = metadata.find('./Title').text + description = metadata.find('./Description').text + duration = int(metadata.find('./Duration').text) - for file_el in file_els: - bitrate = file_el.attrib.get('bitrate') - width = int(file_el.attrib.get('width')) - height = int(file_el.attrib.get('height')) - formats.append({ - 'url': compat_str(file_el.text), - 'ext': 'mp4', - 'height': height, - 'width': width - }) + posters_el = metadata.find('./PosterLinks') + thumbnails = [{ + 'url': thumbnail.text, + 'width': int(thumbnail.get('width')), + 'height': int(thumbnail.get('height')), + } for thumbnail in posters_el.findall('./PosterIMG')] if posters_el is not None else [] + categories_el = metadata.find('./Categories') + categories = [ + cat.get('name') for cat in categories_el.findall('./Category') + ] if categories_el is not None else [] + + formats = [{ + 'url': fmt.text, + 'ext': 'mp4', + 'vbr': int(fmt.get('bitrate')), + 'width': int(fmt.get('width')), + 'height': int(fmt.get('height')), + } for fmt in metadata.findall('./PlaybackLinks/FileURL')] self._sort_formats(formats) return { - 'id': media_id, + 'id': video_id, 'title': title, - 'thumbnail': thumbnail, + 'description': description, + 'thumbnails': thumbnails, 'duration': duration, + 'categories': categories, 'formats': formats, - 'player_url': player_url, } \ No newline at end of file From f776d8f6081b305ba7ccc1bda323aa510a01db7b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 28 Sep 2014 02:35:46 +0700 Subject: [PATCH 0078/1937] [sport5] Keep alphanumeric order --- youtube_dl/extractor/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index c3a4d3c9a..5a14540a3 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -339,8 +339,8 @@ from .spankwire import SpankwireIE from .spiegel import SpiegelIE, SpiegelArticleIE from .spiegeltv import SpiegeltvIE from .spike import SpikeIE -from .sportdeutschland import SportDeutschlandIE from .sport5 import Sport5IE +from .sportdeutschland import SportDeutschlandIE from .stanfordoc import StanfordOpenClassroomIE from .steam import SteamIE from .streamcloud import StreamcloudIE From ee0d90707a38537355bab8527edd9a42d6514aa9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 28 Sep 2014 02:48:41 +0700 Subject: [PATCH 0079/1937] [YoutubeDL] Fix string check for python3 --- youtube_dl/YoutubeDL.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index b485dbdf1..4a9610355 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -1250,7 +1250,7 @@ class YoutubeDL(object): # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991) # To work around aforementioned issue we will replace request's original URL with # percent-encoded one - req_is_string = isinstance(req, basestring) + req_is_string = isinstance(req, basestring if sys.version_info < (3, 0) else compat_str) url = req if req_is_string else req.get_full_url() url_escaped = escape_url(url) From c6641823238ac70091520fe9b4b02ec3d41cb1a9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 28 Sep 2014 03:26:38 +0700 Subject: [PATCH 0080/1937] [ynet] Remove unused stuff, simplify and improve --- youtube_dl/extractor/ynet.py | 63 ++++++++++++++++++++---------------- 1 file changed, 35 insertions(+), 28 deletions(-) diff --git a/youtube_dl/extractor/ynet.py b/youtube_dl/extractor/ynet.py index 94d253679..66d53962a 100644 --- a/youtube_dl/extractor/ynet.py +++ b/youtube_dl/extractor/ynet.py @@ -2,46 +2,53 @@ from __future__ import unicode_literals import re -import base64 import json from .common import InfoExtractor -from youtube_dl.utils import compat_urllib_parse_urlparse, compat_urllib_parse +from ..utils import compat_urllib_parse + class YnetIE(InfoExtractor): - _VALID_URL = r'http://.*ynet\.co\.il/.*/0,7340,(?PL(-[0-9]+)+),00\.html' - _TEST = { - 'url': 'http://hot.ynet.co.il/home/0,7340,L-11659-99244,00.html', - 'info_dict': { - 'id': 'L-11659-99244', - 'ext': 'flv', - 'title': 'md5:3dba12d2837ee2ad9652cc64af652b16', - 'thumbnail': 'http://hot.ynet.co.il/PicServer4/2014/09/23/5606015/AMERICAN_COMMUNE1_T.jpg', + _VALID_URL = r'http://.*ynet\.co\.il/.*/0,7340,(?PL(?:-[0-9]+)+),00\.html' + _TESTS = [ + { + 'url': 'http://hot.ynet.co.il/home/0,7340,L-11659-99244,00.html', + 'md5': '002b44ee2f33d50363a1c153bed524cf', + 'info_dict': { + 'id': 'L-11659-99244', + 'ext': 'flv', + 'title': 'איש לא יודע מאיפה באנו', + 'thumbnail': 're:^https?://.*\.jpg', + } + }, { + 'url': 'http://hot.ynet.co.il/home/0,7340,L-8859-84418,00.html', + 'md5': '6455046ae1b48cf7e2b7cae285e53a16', + 'info_dict': { + 'id': 'L-8859-84418', + 'ext': 'flv', + 'title': "צפו: הנשיקה הלוהטת של תורגי' ויוליה פלוטקין", + 'thumbnail': 're:^https?://.*\.jpg', + } } - } + ] def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') - id = mobj.group('id') - - webpage = self._download_webpage(url, id) + webpage = self._download_webpage(url, video_id) - content = compat_urllib_parse.unquote_plus(self._og_search_video_url(webpage).decode('utf-8')) - - player_url = re.match('(http.*\.swf)\?' ,content).group(1) - - config = json.loads(re.match('.*config\=(.*)' ,content).group(1)) - - f4m_url = config['clip']['url'] - - title = re.sub(': Video$', '', self._og_search_title(webpage)) + content = compat_urllib_parse.unquote_plus(self._og_search_video_url(webpage)) + config = json.loads(self._search_regex(r'config=({.+?})$', content, 'video config')) + f4m_url = config['clip']['url'] + title = self._og_search_title(webpage) + m = re.search(r'ynet - HOT -- (["\']+)(?P.+?)\1', title) + if m: + title = m.group('title') return { - 'id': id, + 'id': video_id, 'title': title, - 'formats': self._extract_f4m_formats(f4m_url, id), + 'formats': self._extract_f4m_formats(f4m_url, video_id), 'thumbnail': self._og_search_thumbnail(webpage), - 'player_url': player_url, - } - + } \ No newline at end of file From a89435a7a8e0574239531bfeedc437ae14b13902 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 28 Sep 2014 03:30:41 +0700 Subject: [PATCH 0081/1937] [ynet] Improve _VALID_URL --- youtube_dl/extractor/ynet.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/ynet.py b/youtube_dl/extractor/ynet.py index 66d53962a..24872861a 100644 --- a/youtube_dl/extractor/ynet.py +++ b/youtube_dl/extractor/ynet.py @@ -9,7 +9,7 @@ from ..utils import compat_urllib_parse class YnetIE(InfoExtractor): - _VALID_URL = r'http://.*ynet\.co\.il/.*/0,7340,(?P<id>L(?:-[0-9]+)+),00\.html' + _VALID_URL = r'http://(?:.+?\.)?ynet\.co\.il/(?:.+?/)?0,7340,(?P<id>L(?:-[0-9]+)+),00\.html' _TESTS = [ { 'url': 'http://hot.ynet.co.il/home/0,7340,L-11659-99244,00.html', From 5e43e3803c462d7a0f5ac85f8b54ab24f271cb0a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 28 Sep 2014 03:45:15 +0700 Subject: [PATCH 0082/1937] Credit @lenaten for ynet (#3840) and sport5 (#3841) --- youtube_dl/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 20d7a57ce..7f2b4dfcc 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -78,6 +78,7 @@ __authors__ = ( 'Hari Padmanaban', 'Carlos Ramos', '5moufl', + 'lenaten', ) __license__ = 'Public Domain' From c95eeb7b80e5007259df260b64874b675a802431 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Sun, 28 Sep 2014 08:49:03 +0200 Subject: [PATCH 0083/1937] [eitb] Modernize --- youtube_dl/extractor/eitb.py | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/youtube_dl/extractor/eitb.py b/youtube_dl/extractor/eitb.py index 4ba323148..2cba82532 100644 --- a/youtube_dl/extractor/eitb.py +++ b/youtube_dl/extractor/eitb.py @@ -1,4 +1,6 @@ # encoding: utf-8 +from __future__ import unicode_literals + import re from .common import InfoExtractor @@ -7,20 +9,20 @@ from ..utils import ExtractorError class EitbIE(InfoExtractor): - IE_NAME = u'eitb.tv' + IE_NAME = 'eitb.tv' _VALID_URL = r'https?://www\.eitb\.tv/(eu/bideoa|es/video)/[^/]+/(?P<playlist_id>\d+)/(?P<chapter_id>\d+)' _TEST = { - u'add_ie': ['Brightcove'], - u'url': u'http://www.eitb.tv/es/video/60-minutos-60-minutos-2013-2014/2677100210001/2743577154001/lasa-y-zabala-30-anos/', - u'md5': u'edf4436247185adee3ea18ce64c47998', - u'info_dict': { - u'id': u'2743577154001', - u'ext': u'mp4', - u'title': u'60 minutos (Lasa y Zabala, 30 años)', + 'add_ie': ['Brightcove'], + 'url': 'http://www.eitb.tv/es/video/60-minutos-60-minutos-2013-2014/2677100210001/2743577154001/lasa-y-zabala-30-anos/', + 'md5': 'edf4436247185adee3ea18ce64c47998', + 'info_dict': { + 'id': '2743577154001', + 'ext': 'mp4', + 'title': '60 minutos (Lasa y Zabala, 30 años)', # All videos from eitb has this description in the brightcove info - u'description': u'.', - u'uploader': u'Euskal Telebista', + 'description': '.', + 'uploader': 'Euskal Telebista', }, } @@ -30,7 +32,7 @@ class EitbIE(InfoExtractor): webpage = self._download_webpage(url, chapter_id) bc_url = BrightcoveIE._extract_brightcove_url(webpage) if bc_url is None: - raise ExtractorError(u'Could not extract the Brightcove url') + raise ExtractorError('Could not extract the Brightcove url') # The BrightcoveExperience object doesn't contain the video id, we set # it manually bc_url += '&%40videoPlayer={0}'.format(chapter_id) From f4b1c7adb81555fde0dff390b48e4139438b4071 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Sun, 28 Sep 2014 08:53:52 +0200 Subject: [PATCH 0084/1937] [muenchentv] Move live title generation to common --- youtube_dl/extractor/common.py | 7 +++++++ youtube_dl/extractor/muenchentv.py | 5 +---- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 60cab6f4e..403791e6b 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1,6 +1,7 @@ from __future__ import unicode_literals import base64 +import datetime import hashlib import json import netrc @@ -705,6 +706,12 @@ class InfoExtractor(object): self._sort_formats(formats) return formats + def _live_title(self, name): + """ Generate the title for a live video """ + now = datetime.datetime.now() + now_str = now.strftime("%Y-%m-%d %H:%M") + return name + ' ' + now_str + class SearchInfoExtractor(InfoExtractor): """ diff --git a/youtube_dl/extractor/muenchentv.py b/youtube_dl/extractor/muenchentv.py index 3a938861b..7cb6749be 100644 --- a/youtube_dl/extractor/muenchentv.py +++ b/youtube_dl/extractor/muenchentv.py @@ -1,7 +1,6 @@ # coding: utf-8 from __future__ import unicode_literals -import datetime import json from .common import InfoExtractor @@ -33,9 +32,7 @@ class MuenchenTVIE(InfoExtractor): display_id = 'live' webpage = self._download_webpage(url, display_id) - now = datetime.datetime.now() - now_str = now.strftime("%Y-%m-%d %H:%M") - title = self._og_search_title(webpage) + ' ' + now_str + title = self._live_title(self._og_search_title(webpage)) data_js = self._search_regex( r'(?s)\nplaylist:\s*(\[.*?}\]),related:', From ed9266db90023500e687aa634b55e11742c2e18c Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Sun, 28 Sep 2014 09:31:58 +0200 Subject: [PATCH 0085/1937] [common] Add new helper function _match_id --- youtube_dl/extractor/abc.py | 3 +-- youtube_dl/extractor/common.py | 8 ++++++++ 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/abc.py b/youtube_dl/extractor/abc.py index 7d89f44ee..69f89320c 100644 --- a/youtube_dl/extractor/abc.py +++ b/youtube_dl/extractor/abc.py @@ -22,8 +22,7 @@ class ABCIE(InfoExtractor): } def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') + video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) urls_info_json = self._search_regex( diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 403791e6b..8d6a6f601 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -165,6 +165,14 @@ class InfoExtractor(object): cls._VALID_URL_RE = re.compile(cls._VALID_URL) return cls._VALID_URL_RE.match(url) is not None + @classmethod + def _match_id(cls, url): + if '_VALID_URL_RE' not in cls.__dict__: + cls._VALID_URL_RE = re.compile(cls._VALID_URL) + m = cls._VALID_URL_RE.match(url) + assert m + return m.group('id') + @classmethod def working(cls): """Getter method for _WORKING.""" From 394599f422b11f54efa78123296867efa45a1a2c Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Sun, 28 Sep 2014 09:48:51 +0200 Subject: [PATCH 0086/1937] [oktoberfesttv] Add new extractor (Fixes #3845) --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/oktoberfesttv.py | 47 +++++++++++++++++++++++++++ 2 files changed, 48 insertions(+) create mode 100644 youtube_dl/extractor/oktoberfesttv.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 079221567..629280215 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -261,6 +261,7 @@ from .nrk import ( from .ntv import NTVIE from .nytimes import NYTimesIE from .nuvid import NuvidIE +from .oktoberfesttv import OktoberfestTVIE from .ooyala import OoyalaIE from .orf import ( ORFTVthekIE, diff --git a/youtube_dl/extractor/oktoberfesttv.py b/youtube_dl/extractor/oktoberfesttv.py new file mode 100644 index 000000000..4a41c0542 --- /dev/null +++ b/youtube_dl/extractor/oktoberfesttv.py @@ -0,0 +1,47 @@ +# encoding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor + + +class OktoberfestTVIE(InfoExtractor): + _VALID_URL = r'https?://www\.oktoberfest-tv\.de/[^/]+/[^/]+/video/(?P<id>[^/?#]+)' + + _TEST = { + 'url': 'http://www.oktoberfest-tv.de/de/kameras/video/hb-zelt', + 'info_dict': { + 'id': 'hb-zelt', + 'ext': 'mp4', + 'title': 're:^Live-Kamera: Hofbräuzelt [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', + 'thumbnail': 're:^https?://.*\.jpg$', + 'is_live': True, + }, + 'params': { + 'skip_download': True, + } + } + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + title = self._live_title(self._html_search_regex( + r'<h1><strong>.*?</strong>(.*?)</h1>', webpage, 'title')) + + clip = self._search_regex( + r"clip:\s*\{\s*url:\s*'([^']+)'", webpage, 'clip') + ncurl = self._search_regex( + r"netConnectionUrl:\s*'([^']+)'", webpage, 'rtmp base') + video_url = ncurl + clip + thumbnail = self._search_regex( + r"canvas:\s*\{\s*backgroundImage:\s*'url\(([^)]+)\)'", webpage, + 'thumbnail', fatal=False) + + return { + 'id': video_id, + 'title': title, + 'url': video_url, + 'ext': 'mp4', + 'is_live': True, + 'thumbnail': thumbnail, + } From 88fbe4c2ccdae7f917b6c9a2655f0878b6e4308c Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Sun, 28 Sep 2014 09:49:42 +0200 Subject: [PATCH 0087/1937] release 2014.09.28 --- youtube_dl/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index c17701d6a..e62bef2cf 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,2 +1,2 @@ -__version__ = '2014.09.25' +__version__ = '2014.09.28' From b14f3a4c1da00cbee8775904c24c4d0547018ae0 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Sun, 28 Sep 2014 10:34:55 +0200 Subject: [PATCH 0088/1937] [golem] Simplify (#3828) --- youtube_dl/extractor/common.py | 23 ++++++ youtube_dl/extractor/golem.py | 124 +++++++++------------------------ 2 files changed, 56 insertions(+), 91 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 8d6a6f601..f43a0a569 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -22,6 +22,7 @@ from ..utils import ( clean_html, compiled_regex_type, ExtractorError, + float_or_none, int_or_none, RegexNotFoundError, sanitize_filename, @@ -720,6 +721,28 @@ class InfoExtractor(object): now_str = now.strftime("%Y-%m-%d %H:%M") return name + ' ' + now_str + def _int(self, v, name, fatal=False, **kwargs): + res = int_or_none(v, **kwargs) + if 'get_attr' in kwargs: + print(getattr(v, kwargs['get_attr'])) + if res is None: + msg = 'Failed to extract %s: Could not parse value %r' % (name, v) + if fatal: + raise ExtractorError(msg) + else: + self._downloader.report_warning(msg) + return res + + def _float(self, v, name, fatal=False, **kwargs): + res = float_or_none(v, **kwargs) + if res is None: + msg = 'Failed to extract %s: Could not parse value %r' % (name, v) + if fatal: + raise ExtractorError(msg) + else: + self._downloader.report_warning(msg) + return res + class SearchInfoExtractor(InfoExtractor): """ diff --git a/youtube_dl/extractor/golem.py b/youtube_dl/extractor/golem.py index 6a64b5d95..a237f19ee 100644 --- a/youtube_dl/extractor/golem.py +++ b/youtube_dl/extractor/golem.py @@ -4,7 +4,10 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..utils import compat_urlparse +from ..utils import ( + compat_urlparse, + determine_ext, +) class GolemIE(InfoExtractor): @@ -17,115 +20,54 @@ class GolemIE(InfoExtractor): 'format_id': 'high', 'ext': 'mp4', 'title': 'iPhone 6 und 6 Plus - Test', - 'duration': 300, + 'duration': 300.44, 'filesize': 65309548, } } - _CONFIG = 'https://video.golem.de/xml/{0}.xml' _PREFIX = 'http://video.golem.de' - def _warn(self, fmt, *args): - self.report_warning(fmt.format(*args), self._id) - - def _extract_format(self, elem): - format_id = elem.tag - - url = elem.findtext('./url') - if url == '': - self._warn("{0}: url: empty, skipping", format_id) - return None - - fmt = { - 'format_id': format_id, - 'url': compat_urlparse.urljoin(self._PREFIX, url) - } - - try: - _, ext = elem.findtext('./filename', '').rsplit('.', 1) - except ValueError: - self._warn('{0}: ext: missing extension', format_id) - else: - fmt['ext'] = ext - - filesize = elem.findtext('./filesize') - if filesize is not None: - try: - fmt['filesize'] = int(filesize) - except ValueError as e: - self._warn('{0}: filesize: {1}', format_id, e) - - width = elem.get('width') - if width is not None: - try: - fmt['width'] = int(width) - except ValueError as e: - self._warn('{0}: width: {1}', format_id, e) - - height = elem.get('height') - if height is not None: - try: - fmt['height'] = int(height) - except ValueError as e: - self._warn('{0}: height: {1}', format_id, e) - - return fmt - - def _extract_thumbnail(self, elem): - url = elem.findtext('./url') - if url == '': - return None - thumb = { - 'url': compat_urlparse.urljoin(self._PREFIX, url) - } - - width = elem.get('width') - if width is not None: - try: - thumb['width'] = int(width) - except ValueError as e: - self._warn('thumbnail: width: {0}', e) - - height = elem.get('height') - if height is not None: - try: - thumb['height'] = int(height) - except ValueError as e: - self._warn('thumbnail: height: {0}', e) - - return thumb - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - self._id = mobj.group('id') + video_id = self._match_id(url) - config = self._download_xml(self._CONFIG.format(self._id), self._id) + config = self._download_xml( + 'https://video.golem.de/xml/{0}.xml'.format(video_id), video_id) info = { - 'id': self._id, - 'title': config.findtext('./title', 'golem') + 'id': video_id, + 'title': config.findtext('./title', 'golem'), + 'duration': self._float(config.findtext('./playtime'), 'duration'), } formats = [] for e in config.findall('./*[url]'): - fmt = self._extract_format(e) - if fmt is not None: - formats.append(fmt) + url = e.findtext('./url') + if not url: + self._downloader.report_warning( + "{0}: url: empty, skipping".format(e.tag)) + continue + + formats.append({ + 'format_id': e.tag, + 'url': compat_urlparse.urljoin(self._PREFIX, url), + 'height': self._int(e.get('height'), 'height'), + 'width': self._int(e.get('width'), 'width'), + 'filesize': self._int(e.findtext('filesize'), 'filesize'), + 'ext': determine_ext(e.findtext('./filename')), + }) self._sort_formats(formats) info['formats'] = formats thumbnails = [] for e in config.findall('.//teaser[url]'): - thumb = self._extract_thumbnail(e) - if thumb is not None: - thumbnails.append(thumb) + url = e.findtext('./url') + if not url: + continue + thumbnails.append({ + 'url': compat_urlparse.urljoin(self._PREFIX, url), + 'width': self._int(e.get('width'), 'thumbnail width'), + 'height': self._int(e.get('height'), 'thumbnail height'), + }) info['thumbnails'] = thumbnails - playtime = config.findtext('./playtime') - if playtime is not None: - try: - info['duration'] = round(float(playtime)) - except ValueError as e: - self._warn('duration: {0}', e) - return info From 5a8b77551d930d4672159a015f553e64be111492 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Sun, 28 Sep 2014 10:40:49 +0200 Subject: [PATCH 0089/1937] [heise] Simplify (#3842) --- youtube_dl/extractor/heise.py | 92 ++++++++++------------------------- 1 file changed, 26 insertions(+), 66 deletions(-) diff --git a/youtube_dl/extractor/heise.py b/youtube_dl/extractor/heise.py index 73c953181..05d4efb8c 100644 --- a/youtube_dl/extractor/heise.py +++ b/youtube_dl/extractor/heise.py @@ -1,34 +1,28 @@ # coding: utf-8 from __future__ import unicode_literals -import re - from .common import InfoExtractor from ..utils import ( - ExtractorError, - compat_urllib_parse, get_meta_content, parse_iso8601, ) class HeiseIE(InfoExtractor): - _VALID_URL = ( - r'^https?://(?:www\.)?heise\.de/video/artikel/' + - r'.+?(?P<id>[0-9]+)\.html$' - ) + _VALID_URL = r'''(?x) + https?://(?:www\.)?heise\.de/video/artikel/ + .+?(?P<id>[0-9]+)\.html(?:$|[?#]) + ''' _TEST = { 'url': ( - 'http://www.heise.de/video/artikel/Podcast-c-t-uplink-3-3-' + - 'Owncloud-Tastaturen-Peilsender-Smartphone-2404147.html' + 'http://www.heise.de/video/artikel/Podcast-c-t-uplink-3-3-Owncloud-Tastaturen-Peilsender-Smartphone-2404147.html' ), 'md5': 'ffed432483e922e88545ad9f2f15d30e', 'info_dict': { 'id': '2404147', 'ext': 'mp4', 'title': ( - "Podcast: c't uplink 3.3 – Owncloud / Tastaturen / " + - "Peilsender Smartphone" + "Podcast: c't uplink 3.3 – Owncloud / Tastaturen / Peilsender Smartphone" ), 'format_id': 'mp4_720', 'timestamp': 1411812600, @@ -36,84 +30,50 @@ class HeiseIE(InfoExtractor): } } - _CONFIG = ( - r'".+?\?sequenz=(?P<sequenz>.+?)&container=(?P<container>.+?)' + - r'(?:&hd=(?P<hd>.+?))?(?:&signature=(?P<signature>.+?))?&callback=\?"' - ) - _PREFIX = 'http://www.heise.de/videout/info?' - - def _warn(self, fmt, *args): - self.report_warning(fmt.format(*args), self._id) - - def _parse_config_url(self, html): - m = re.search(self._CONFIG, html) - if not m: - raise ExtractorError('No config found') - - qs = compat_urllib_parse.urlencode(dict((k, v) for k, v - in m.groupdict().items() - if v is not None)) - return self._PREFIX + qs - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - self._id = mobj.group('id') + video_id = self._match_id(url) - html = self._download_webpage(url, self._id) - config = self._download_json(self._parse_config_url(html), self._id) + webpage = self._download_webpage(url, video_id) + json_url = self._search_regex( + r'json_url:\s*"([^"]+)"', webpage, 'json URL') + config = self._download_json(json_url, video_id) info = { - 'id': self._id + 'id': video_id, + 'thumbnail': config.get('poster'), + 'timestamp': parse_iso8601(get_meta_content('date', webpage)), } - title = get_meta_content('fulltitle', html) + title = get_meta_content('fulltitle', webpage) if title: info['title'] = title elif config.get('title'): info['title'] = config['title'] else: - self._warn('title: not found') - info['title'] = 'heise' - - if (not config.get('formats') or - not hasattr(config['formats'], 'items')): - raise ExtractorError('No formats found') + info['title'] = self._og_search_title(webpage) formats = [] for t, rs in config['formats'].items(): if not rs or not hasattr(rs, 'items'): - self._warn('formats: {0}: no resolutions', t) + self._downloader.report_warning( + 'formats: {0}: no resolutions'.format(t)) continue - for res, obj in rs.items(): - format_id = '{0}_{1}'.format(t, res) + for height_str, obj in rs.items(): + format_id = '{0}_{1}'.format(t, height_str) if not obj or not obj.get('url'): - self._warn('formats: {0}: no url', format_id) + self._downloader.report_warning( + 'formats: {0}: no url'.format(format_id)) continue - fmt = { + formats.append({ 'url': obj['url'], - 'format_id': format_id - } - try: - fmt['height'] = int(res) - except ValueError as e: - self._warn('formats: {0}: height: {1}', t, e) - - formats.append(fmt) + 'format_id': format_id, + 'height': self._int(height_str, 'height'), + }) self._sort_formats(formats) info['formats'] = formats - if config.get('poster'): - info['thumbnail'] = config['poster'] - - date = get_meta_content('date', html) - if date: - try: - info['timestamp'] = parse_iso8601(date) - except ValueError as e: - self._warn('timestamp: {0}', e) - return info From c121a75b368a0c75de7416cbb36d9b9f40a7f1a7 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Sun, 28 Sep 2014 10:49:12 +0200 Subject: [PATCH 0090/1937] [heise] Add support for description --- youtube_dl/extractor/heise.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/youtube_dl/extractor/heise.py b/youtube_dl/extractor/heise.py index 05d4efb8c..f97b1e085 100644 --- a/youtube_dl/extractor/heise.py +++ b/youtube_dl/extractor/heise.py @@ -27,6 +27,7 @@ class HeiseIE(InfoExtractor): 'format_id': 'mp4_720', 'timestamp': 1411812600, 'upload_date': '20140927', + 'description': 'In uplink-Episode 3.3 geht es darum, wie man sich von Cloud-Anbietern emanzipieren kann, worauf man beim Kauf einer Tastatur achten sollte und was Smartphones über uns verraten.', } } @@ -42,6 +43,7 @@ class HeiseIE(InfoExtractor): 'id': video_id, 'thumbnail': config.get('poster'), 'timestamp': parse_iso8601(get_meta_content('date', webpage)), + 'description': self._og_search_description(webpage), } title = get_meta_content('fulltitle', webpage) From c84178977268df1fb705bc8fd8cf3aa73158139a Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Sun, 28 Sep 2014 10:49:58 +0200 Subject: [PATCH 0091/1937] [muenchentv] Add thumbnail --- youtube_dl/extractor/muenchentv.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/youtube_dl/extractor/muenchentv.py b/youtube_dl/extractor/muenchentv.py index 7cb6749be..c7f6beb9c 100644 --- a/youtube_dl/extractor/muenchentv.py +++ b/youtube_dl/extractor/muenchentv.py @@ -22,6 +22,7 @@ class MuenchenTVIE(InfoExtractor): 'ext': 'mp4', 'title': 're:^münchen.tv-Livestream [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', 'is_live': True, + 'thumbnail': 're:^https?://.*\.jpg$' }, 'params': { 'skip_download': True, @@ -70,5 +71,6 @@ class MuenchenTVIE(InfoExtractor): 'title': title, 'formats': formats, 'is_live': True, + 'thumbnail': thumbnail, } From 51ee08c4bb044bb670c8a6b855ba48a91892d27b Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Sun, 28 Sep 2014 10:50:43 +0200 Subject: [PATCH 0092/1937] Remove unused imports --- youtube_dl/extractor/ard.py | 2 -- youtube_dl/extractor/golem.py | 2 -- youtube_dl/extractor/vevo.py | 1 - youtube_dl/extractor/youku.py | 1 - 4 files changed, 6 deletions(-) diff --git a/youtube_dl/extractor/ard.py b/youtube_dl/extractor/ard.py index 54cec1c2f..8de9c11ea 100644 --- a/youtube_dl/extractor/ard.py +++ b/youtube_dl/extractor/ard.py @@ -8,8 +8,6 @@ from ..utils import ( determine_ext, ExtractorError, qualities, - compat_urllib_parse_urlparse, - compat_urllib_parse, int_or_none, parse_duration, unified_strdate, diff --git a/youtube_dl/extractor/golem.py b/youtube_dl/extractor/golem.py index a237f19ee..bebfe8568 100644 --- a/youtube_dl/extractor/golem.py +++ b/youtube_dl/extractor/golem.py @@ -1,8 +1,6 @@ # coding: utf-8 from __future__ import unicode_literals -import re - from .common import InfoExtractor from ..utils import ( compat_urlparse, diff --git a/youtube_dl/extractor/vevo.py b/youtube_dl/extractor/vevo.py index ebab8b86c..5b1a3ec78 100644 --- a/youtube_dl/extractor/vevo.py +++ b/youtube_dl/extractor/vevo.py @@ -5,7 +5,6 @@ import xml.etree.ElementTree from .common import InfoExtractor from ..utils import ( - compat_HTTPError, compat_urllib_request, ExtractorError, ) diff --git a/youtube_dl/extractor/youku.py b/youtube_dl/extractor/youku.py index 07ed7cbd1..48d47a245 100644 --- a/youtube_dl/extractor/youku.py +++ b/youtube_dl/extractor/youku.py @@ -2,7 +2,6 @@ from __future__ import unicode_literals -import json import math import random import re From 38c4d41b744660463abbb333737e031d9c87243e Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Sun, 28 Sep 2014 10:55:12 +0200 Subject: [PATCH 0093/1937] [played] Simplify (#3798) --- youtube_dl/extractor/played.py | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/played.py b/youtube_dl/extractor/played.py index a396e62e5..db40da43b 100644 --- a/youtube_dl/extractor/played.py +++ b/youtube_dl/extractor/played.py @@ -14,7 +14,7 @@ from ..utils import ( class PlayedIE(InfoExtractor): IE_NAME = 'played.to' - _VALID_URL = r'https?://played\.to/(?P<id>[a-zA-Z0-9_-]+)' + _VALID_URL = r'https?://(?:www\.)?played\.to/(?P<id>[a-zA-Z0-9_-]+)' _TEST = { 'url': 'http://played.to/j2f2sfiiukgt', @@ -27,15 +27,14 @@ class PlayedIE(InfoExtractor): } def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') + video_id = self._match_id(url) orig_webpage = self._download_webpage(url, video_id) - fields = re.findall(r'type="hidden" name="(.+?)"\s* value="?(.+?)">', orig_webpage) + fields = re.findall( + r'type="hidden" name="([^"]+)"\s+value="([^"]+)">', orig_webpage) data = dict(fields) - self.to_screen('%s: Waiting for timeout' % video_id) - time.sleep(2) + self._sleep(2, video_id) post = compat_urllib_parse.urlencode(data) headers = { @@ -54,4 +53,4 @@ class PlayedIE(InfoExtractor): 'id': video_id, 'title': title, 'url': video_url, - } \ No newline at end of file + } From 76e7d1e74b10b99ed9289b0c30c5f4933f9d841e Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Sun, 28 Sep 2014 10:56:36 +0200 Subject: [PATCH 0094/1937] [played] Remove unused import --- youtube_dl/extractor/played.py | 1 - 1 file changed, 1 deletion(-) diff --git a/youtube_dl/extractor/played.py b/youtube_dl/extractor/played.py index db40da43b..645a1e06d 100644 --- a/youtube_dl/extractor/played.py +++ b/youtube_dl/extractor/played.py @@ -2,7 +2,6 @@ from __future__ import unicode_literals import re -import time import os.path from .common import InfoExtractor From d6e6a4225650ff220c7fe0687d883552e4b45bde Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Sun, 28 Sep 2014 12:14:16 +0200 Subject: [PATCH 0095/1937] [vimeo:likes] Add new extractor (Fixes #3835) --- test/test_download.py | 4 +++- youtube_dl/extractor/__init__.py | 5 +++-- youtube_dl/extractor/generic.py | 10 ++++------ youtube_dl/extractor/vimeo.py | 33 ++++++++++++++++++++++++++++++++ 4 files changed, 43 insertions(+), 9 deletions(-) diff --git a/test/test_download.py b/test/test_download.py index 2b8ac6975..8178015ea 100644 --- a/test/test_download.py +++ b/test/test_download.py @@ -139,7 +139,9 @@ def generator(test_case): if is_playlist: self.assertEqual(res_dict['_type'], 'playlist') + self.assertTrue('entries' in res_dict) expect_info_dict(self, test_case.get('info_dict', {}), res_dict) + if 'playlist_mincount' in test_case: assertGreaterEqual( self, @@ -188,7 +190,7 @@ def generator(test_case): expect_info_dict(self, tc.get('info_dict', {}), info_dict) finally: try_rm_tcs_files() - if is_playlist and res_dict is not None: + if is_playlist and res_dict is not None and res_dict.get('entries'): # Remove all other files that may have been extracted if the # extractor returns full results even with extract_flat res_tcs = [{'info_dict': e} for e in res_dict['entries']] diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 6ab3eeaf5..86bff185b 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -412,11 +412,12 @@ from .videoweed import VideoWeedIE from .vidme import VidmeIE from .vimeo import ( VimeoIE, - VimeoChannelIE, - VimeoUserIE, VimeoAlbumIE, + VimeoChannelIE, VimeoGroupsIE, + VimeoLikesIE, VimeoReviewIE, + VimeoUserIE, VimeoWatchLaterIE, ) from .vimple import VimpleIE diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 367f930dd..0dfa4853d 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -397,12 +397,6 @@ class GenericIE(InfoExtractor): }, ] - def report_download_webpage(self, video_id): - """Report webpage download.""" - if not self._downloader.params.get('test', False): - self._downloader.report_warning('Falling back on generic information extractor.') - super(GenericIE, self).report_download_webpage(video_id) - def report_following_redirect(self, new_url): """Report information extraction.""" self._downloader.to_screen('[redirect] Following redirect to %s' % new_url) @@ -502,6 +496,7 @@ class GenericIE(InfoExtractor): url, smuggled_data = unsmuggle_url(url) force_videoid = None + is_intentional = smuggled_data and smuggled_data.get('to_generic') if smuggled_data and 'force_videoid' in smuggled_data: force_videoid = smuggled_data['force_videoid'] video_id = force_videoid @@ -544,6 +539,9 @@ class GenericIE(InfoExtractor): 'upload_date': upload_date, } + if not self._downloader.params.get('test', False) and not is_intentional: + self._downloader.report_warning('Falling back on generic information extractor.') + try: webpage = self._download_webpage(url, video_id) except ValueError: diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py index bc01d7fbf..4be1b8785 100644 --- a/youtube_dl/extractor/vimeo.py +++ b/youtube_dl/extractor/vimeo.py @@ -15,6 +15,7 @@ from ..utils import ( get_element_by_attribute, ExtractorError, RegexNotFoundError, + smuggle_url, std_headers, unsmuggle_url, urlencode_postdata, @@ -529,3 +530,35 @@ class VimeoWatchLaterIE(VimeoBaseInfoExtractor, VimeoChannelIE): def _real_extract(self, url): return self._extract_videos('watchlater', 'https://vimeo.com/home/watchlater') + + +class VimeoLikesIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?vimeo\.com/user(?P<id>[0-9]+)/likes(?:$|[?#])' + IE_NAME = 'vimeo:likes' + IE_DESC = 'Vimeo user likes' + _TEST = { + 'url': 'https://vimeo.com/user20132939/likes', + 'playlist_mincount': 4, + 'add_ies': ['Generic'], + "info_dict": { + "description": "Videos Philipp Hagemeister likes on Vimeo.", + "title": "Vimeo / Philipp Hagemeister's likes", + }, + 'params': { + 'extract_flat': False, + }, + } + + def _real_extract(self, url): + user_id = self._match_id(url) + rss_url = '%s//vimeo.com/user%s/likes/rss' % ( + self.http_scheme(), user_id) + surl = smuggle_url(rss_url, { + 'force_videoid': '%s_likes' % user_id, + 'to_generic': True, + }) + + return { + '_type': 'url', + 'url': surl, + } From 22dd3fad8623472cfe681fdfbaa346e0c8f5fb84 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Sun, 28 Sep 2014 12:14:25 +0200 Subject: [PATCH 0096/1937] release 2014.09.28.1 --- youtube_dl/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index e62bef2cf..eb4356811 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,2 +1,2 @@ -__version__ = '2014.09.28' +__version__ = '2014.09.28.1' From 4bc77c8417ca0340d09dcebb311d06aa7d5ba0ac Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Sun, 28 Sep 2014 13:52:21 +0200 Subject: [PATCH 0097/1937] [README] Use _match_id helper function --- README.md | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/README.md b/README.md index 5d15decb5..5e0d07997 100644 --- a/README.md +++ b/README.md @@ -442,8 +442,6 @@ If you want to add support for a new site, you can follow this quick list (assum # coding: utf-8 from __future__ import unicode_literals - import re - from .common import InfoExtractor @@ -466,8 +464,7 @@ If you want to add support for a new site, you can follow this quick list (assum } def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') + video_id = self._match_id(url) # TODO more code goes here, for example ... webpage = self._download_webpage(url, video_id) From 7f5c0c4a19cf72b6ede80ee0fea4611d8bd45010 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 28 Sep 2014 22:10:20 +0700 Subject: [PATCH 0098/1937] [README] Clarify test's md5 filesize (#3846) --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 5e0d07997..0f7442906 100644 --- a/README.md +++ b/README.md @@ -449,7 +449,7 @@ If you want to add support for a new site, you can follow this quick list (assum _VALID_URL = r'https?://(?:www\.)?yourextractor\.com/watch/(?P<id>[0-9]+)' _TEST = { 'url': 'http://yourextractor.com/watch/42', - 'md5': 'TODO: md5 sum of the first 10KiB of the video file', + 'md5': 'TODO: md5 sum of the first 10241 bytes of the video file (use --test)', 'info_dict': { 'id': '42', 'ext': 'mp4', From dfee83234b642a94255d52d992295b980ce2a5f7 Mon Sep 17 00:00:00 2001 From: Naglis Jonaitis <njonaitis@gmail.com> Date: Sun, 28 Sep 2014 19:25:28 +0300 Subject: [PATCH 0099/1937] [nfl] Prefer progressive downloads --- youtube_dl/extractor/nfl.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/nfl.py b/youtube_dl/extractor/nfl.py index 963c4587c..668d99512 100644 --- a/youtube_dl/extractor/nfl.py +++ b/youtube_dl/extractor/nfl.py @@ -17,11 +17,11 @@ class NFLIE(InfoExtractor): _PLAYER_CONFIG_URL = 'http://www.nfl.com/static/content/static/config/video/config.json' _TEST = { 'url': 'http://www.nfl.com/videos/nfl-game-highlights/0ap3000000398478/Week-3-Redskins-vs-Eagles-highlights', - # 'md5': '5eb8c40a727dda106d510e5d6ffa79e5', # md5 checksum fluctuates + 'md5': '394ef771ddcd1354f665b471d78ec4c6', 'info_dict': { 'id': '0ap3000000398478', 'ext': 'mp4', - 'title': 'Week 3: Washington Redskins vs. Philadelphia Eagles highlights', + 'title': 'Week 3: Redskins vs. Eagles highlights', 'description': 'md5:56323bfb0ac4ee5ab24bd05fdf3bf478', 'upload_date': '20140921', 'timestamp': 1411337580, @@ -66,9 +66,9 @@ class NFLIE(InfoExtractor): ) if protocol == 'rtmp': - preference = -2 - elif 'prog' in name.lower(): preference = -1 + elif 'prog' in name.lower(): + preference = 1 else: preference = 0 @@ -94,7 +94,7 @@ class NFLIE(InfoExtractor): return { 'id': video_id, - 'title': video_data.get('storyHeadline'), + 'title': video_data.get('headline'), 'formats': formats, 'description': video_data.get('caption'), 'duration': video_data.get('duration'), From 5f4c318844180d51745303979682a0a482f05328 Mon Sep 17 00:00:00 2001 From: Naglis Jonaitis <njonaitis@gmail.com> Date: Sun, 28 Sep 2014 21:48:26 +0300 Subject: [PATCH 0100/1937] [nfl] Support team micro-sites (fixes #3831) --- youtube_dl/extractor/nfl.py | 159 +++++++++++++++++++++++------------- 1 file changed, 100 insertions(+), 59 deletions(-) diff --git a/youtube_dl/extractor/nfl.py b/youtube_dl/extractor/nfl.py index 668d99512..4832b3ce4 100644 --- a/youtube_dl/extractor/nfl.py +++ b/youtube_dl/extractor/nfl.py @@ -6,6 +6,7 @@ import re from .common import InfoExtractor from ..utils import ( ExtractorError, + compat_urllib_parse, int_or_none, remove_end, ) @@ -13,76 +14,116 @@ from ..utils import ( class NFLIE(InfoExtractor): IE_NAME = 'nfl.com' - _VALID_URL = r'(?x)https?://(?:www\.)?nfl\.com/(?:videos/(?:.+)/|.*?\#video=)(?P<id>\d..[0-9]+)' - _PLAYER_CONFIG_URL = 'http://www.nfl.com/static/content/static/config/video/config.json' - _TEST = { - 'url': 'http://www.nfl.com/videos/nfl-game-highlights/0ap3000000398478/Week-3-Redskins-vs-Eagles-highlights', - 'md5': '394ef771ddcd1354f665b471d78ec4c6', - 'info_dict': { - 'id': '0ap3000000398478', - 'ext': 'mp4', - 'title': 'Week 3: Redskins vs. Eagles highlights', - 'description': 'md5:56323bfb0ac4ee5ab24bd05fdf3bf478', - 'upload_date': '20140921', - 'timestamp': 1411337580, - 'thumbnail': 're:^https?://.*\.jpg$', + _VALID_URL = r'''(?x)https?:// + (?P<host>(?:www\.)?(?:nfl\.com|.*?\.clubs\.nfl\.com))/ + (?:.+?/)* + (?P<id>(?:\d[a-z]{2}\d{13}|\w{8}\-(?:\w{4}\-){3}\w{12}))''' + _TESTS = [ + { + 'url': 'http://www.nfl.com/videos/nfl-game-highlights/0ap3000000398478/Week-3-Redskins-vs-Eagles-highlights', + 'md5': '394ef771ddcd1354f665b471d78ec4c6', + 'info_dict': { + 'id': '0ap3000000398478', + 'ext': 'mp4', + 'title': 'Week 3: Redskins vs. Eagles highlights', + 'description': 'md5:56323bfb0ac4ee5ab24bd05fdf3bf478', + 'upload_date': '20140921', + 'timestamp': 1411337580, + 'thumbnail': 're:^https?://.*\.jpg$', + } + }, + { + 'url': 'http://prod.www.steelers.clubs.nfl.com/video-and-audio/videos/LIVE_Post_Game_vs_Browns/9d72f26a-9e2b-4718-84d3-09fb4046c266', + 'md5': 'cf85bdb4bc49f6e9d3816d130c78279c', + 'info_dict': { + 'id': '9d72f26a-9e2b-4718-84d3-09fb4046c266', + 'ext': 'mp4', + 'title': 'LIVE: Post Game vs. Browns', + 'description': 'md5:6a97f7e5ebeb4c0e69a418a89e0636e8', + 'upload_date': '20131229', + 'timestamp': 1388354455, + 'thumbnail': 're:^https?://.*\.jpg$', + } + } + ] + + @staticmethod + def prepend_host(host, url): + if not url.startswith('http'): + if not url.startswith('/'): + url = '/%s' % url + url = 'http://{0:}{1:}'.format(host, url) + return url + + @staticmethod + def format_from_stream(stream, protocol, host, path_prefix='', + preference=0, note=None): + url = '{protocol:}://{host:}/{prefix:}{path:}'.format( + protocol=protocol, + host=host, + prefix=path_prefix, + path=stream.get('path'), + ) + return { + 'url': url, + 'vbr': int_or_none(stream.get('rate', 0), 1000), + 'preference': preference, + 'format_note': note, } - } def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') + video_id, host = mobj.group('id'), mobj.group('host') - config = self._download_json(self._PLAYER_CONFIG_URL, video_id, + webpage = self._download_webpage(url, video_id) + + config_url = NFLIE.prepend_host(host, self._search_regex( + r'(?:config|configURL)\s*:\s*"([^"]+)"', webpage, 'config URL')) + config = self._download_json(config_url, video_id, note='Downloading player config') - url_template = 'http://nfl.com{contentURLTemplate:s}'.format(**config) - video_data = self._download_json(url_template.format(id=video_id), video_id) - - cdns = config.get('cdns') - if not cdns: - raise ExtractorError('Failed to get CDN data', expected=True) + url_template = NFLIE.prepend_host( + host, '{contentURLTemplate:}'.format(**config)) + video_data = self._download_json( + url_template.format(id=video_id), video_id) formats = [] - streams = video_data.get('cdnData', {}).get('bitrateInfo', []) - for name, cdn in cdns.items(): - # LimeLight streams don't seem to work - if cdn.get('name') == 'LIMELIGHT': - continue - - protocol = cdn.get('protocol') - host = remove_end(cdn.get('host', ''), '/') - if not (protocol and host): - continue - - path_prefix = cdn.get('pathprefix', '') - if path_prefix and not path_prefix.endswith('/'): - path_prefix = '%s/' % path_prefix - - get_url = lambda p: '{protocol:s}://{host:s}/{prefix:s}{path:}'.format( - protocol=protocol, - host=host, - prefix=path_prefix, - path=p, - ) - - if protocol == 'rtmp': - preference = -1 - elif 'prog' in name.lower(): - preference = 1 - else: - preference = 0 - + cdn_data = video_data.get('cdnData', {}) + streams = cdn_data.get('bitrateInfo', []) + if cdn_data.get('format') == 'EXTERNAL_HTTP_STREAM': + parts = compat_urllib_parse.urlparse(cdn_data.get('uri')) + protocol, host = parts.scheme, parts.netloc for stream in streams: - path = stream.get('path') - if not path: + formats.append( + NFLIE.format_from_stream(stream, protocol, host)) + else: + cdns = config.get('cdns') + if not cdns: + raise ExtractorError('Failed to get CDN data', expected=True) + + for name, cdn in cdns.items(): + # LimeLight streams don't seem to work + if cdn.get('name') == 'LIMELIGHT': continue - formats.append({ - 'url': get_url(path), - 'vbr': int_or_none(stream.get('rate', 0), 1000), - 'preference': preference, - 'format_note': name, - }) + protocol = cdn.get('protocol') + host = remove_end(cdn.get('host', ''), '/') + if not (protocol and host): + continue + + prefix = cdn.get('pathprefix', '') + if prefix and not prefix.endswith('/'): + prefix = '%s/' % prefix + + preference = 0 + if protocol == 'rtmp': + preference = -2 + elif 'prog' in name.lower(): + preference = 1 + + for stream in streams: + formats.append( + NFLIE.format_from_stream(stream, protocol, host, + prefix, preference, name)) self._sort_formats(formats) From 67077b182b698ac56cec9525a2669d5cee394226 Mon Sep 17 00:00:00 2001 From: Anton Larionov <diffident.cat@gmail.com> Date: Sun, 28 Sep 2014 23:36:55 +0400 Subject: [PATCH 0101/1937] [thvideo] Add support for playlists --- youtube_dl/extractor/__init__.py | 5 ++++- youtube_dl/extractor/thvideo.py | 24 ++++++++++++++++++++++++ 2 files changed, 28 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 86bff185b..89a9d8106 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -371,7 +371,10 @@ from .thisav import ThisAVIE from .tinypic import TinyPicIE from .tlc import TlcIE, TlcDeIE from .tnaflix import TNAFlixIE -from .thvideo import THVideoIE +from .thvideo import ( + THVideoIE, + THVideoPlaylistIE +) from .toutv import TouTvIE from .toypics import ToypicsUserIE, ToypicsIE from .traileraddict import TrailerAddictIE diff --git a/youtube_dl/extractor/thvideo.py b/youtube_dl/extractor/thvideo.py index 607e947bb..0ae20ea30 100644 --- a/youtube_dl/extractor/thvideo.py +++ b/youtube_dl/extractor/thvideo.py @@ -57,3 +57,27 @@ class THVideoIE(InfoExtractor): 'description': description, 'upload_date': upload_date } + + +class THVideoPlaylistIE(InfoExtractor): + _VALID_URL = r'http?://(?:www\.)?thvideo\.tv/mylist(?P<id>[0-9]+)' + _TEST = { + 'url': 'http://thvideo.tv/mylist2', + 'info_dict': { + 'id': '2', + 'title': '幻想万華鏡', + }, + 'playlist_mincount': 23, + } + + def _real_extract(self, url): + webpage = self._download_webpage(url, 'playlist') + mobj = re.match(self._VALID_URL, url) + list_id = mobj.group('id') + list_title = self._html_search_regex(r'<h1 class="show_title">(.*?)<b id', webpage, 'playlist title') + + entries = [ + self.url_result('http://thvideo.tv/v/th' + id, 'THVideo') + for id in re.findall(r'<dd><a href="http://thvideo.tv/v/th(\d+)/" target=', webpage)] + + return self.playlist_result(entries, list_id, list_title) \ No newline at end of file From d2e32f7df56ab497175437bffdcdfedbd71ca8d9 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Mon, 29 Sep 2014 00:23:41 +0200 Subject: [PATCH 0102/1937] Do not use HTML characters in output This messes up the format when people paste it outside of code tags. --- youtube_dl/options.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/options.py b/youtube_dl/options.py index 44dcb1e34..f651337ad 100644 --- a/youtube_dl/options.py +++ b/youtube_dl/options.py @@ -87,7 +87,7 @@ def parseOpts(overrideArguments=None): for private_opt in ['-p', '--password', '-u', '--username', '--video-password']: try: i = opts.index(private_opt) - opts[i+1] = '<PRIVATE>' + opts[i+1] = 'PRIVATE' except ValueError: pass return opts From 9c44d2429b90dece734df778c63b04c15e91c1ca Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Mon, 29 Sep 2014 00:36:06 +0200 Subject: [PATCH 0103/1937] [vimeo:likes] Support large like lists (Fixes #3847) --- test/test_utils.py | 9 ++++- youtube_dl/extractor/vimeo.py | 66 ++++++++++++++++++++++----------- youtube_dl/extractor/youtube.py | 4 +- youtube_dl/utils.py | 39 +++++++++++++++++-- 4 files changed, 89 insertions(+), 29 deletions(-) diff --git a/test/test_utils.py b/test/test_utils.py index 3efbed29d..6419b3ca9 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -22,7 +22,8 @@ from youtube_dl.utils import ( fix_xml_ampersands, get_meta_content, orderedSet, - PagedList, + OnDemandPagedList, + InAdvancePagedList, parse_duration, read_batch_urls, sanitize_filename, @@ -246,10 +247,14 @@ class TestUtil(unittest.TestCase): for i in range(firstid, upto): yield i - pl = PagedList(get_page, pagesize) + pl = OnDemandPagedList(get_page, pagesize) got = pl.getslice(*sliceargs) self.assertEqual(got, expected) + iapl = InAdvancePagedList(get_page, size // pagesize + 1, pagesize) + got = iapl.getslice(*sliceargs) + self.assertEqual(got, expected) + testPL(5, 2, (), [0, 1, 2, 3, 4]) testPL(5, 2, (1,), [1, 2, 3, 4]) testPL(5, 2, (2,), [2, 3, 4]) diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py index 4be1b8785..403d0bb28 100644 --- a/youtube_dl/extractor/vimeo.py +++ b/youtube_dl/extractor/vimeo.py @@ -8,18 +8,19 @@ import itertools from .common import InfoExtractor from .subtitles import SubtitlesInfoExtractor from ..utils import ( + clean_html, compat_HTTPError, compat_urllib_parse, compat_urllib_request, - clean_html, - get_element_by_attribute, + compat_urlparse, ExtractorError, + get_element_by_attribute, + InAdvancePagedList, + int_or_none, RegexNotFoundError, - smuggle_url, std_headers, unsmuggle_url, urlencode_postdata, - int_or_none, ) @@ -533,32 +534,55 @@ class VimeoWatchLaterIE(VimeoBaseInfoExtractor, VimeoChannelIE): class VimeoLikesIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?vimeo\.com/user(?P<id>[0-9]+)/likes(?:$|[?#])' + _VALID_URL = r'https?://(?:www\.)?vimeo\.com/user(?P<id>[0-9]+)/likes/?(?:$|[?#]|sort:)' IE_NAME = 'vimeo:likes' IE_DESC = 'Vimeo user likes' _TEST = { - 'url': 'https://vimeo.com/user20132939/likes', - 'playlist_mincount': 4, - 'add_ies': ['Generic'], + 'url': 'https://vimeo.com/user755559/likes/', + 'playlist_mincount': 293, "info_dict": { - "description": "Videos Philipp Hagemeister likes on Vimeo.", - "title": "Vimeo / Philipp Hagemeister's likes", - }, - 'params': { - 'extract_flat': False, + "description": "See all the videos urza likes", + "title": 'Videos urza likes', }, } def _real_extract(self, url): user_id = self._match_id(url) - rss_url = '%s//vimeo.com/user%s/likes/rss' % ( - self.http_scheme(), user_id) - surl = smuggle_url(rss_url, { - 'force_videoid': '%s_likes' % user_id, - 'to_generic': True, - }) + webpage = self._download_webpage(url, user_id) + page_count = self._int( + self._search_regex( + r'''(?x)<li><a\s+href="[^"]+"\s+data-page="([0-9]+)"> + .*?</a></li>\s*<li\s+class="pagination_next"> + ''', webpage, 'page count'), + 'page count', fatal=True) + PAGE_SIZE = 12 + title = self._html_search_regex( + r'(?s)<h1>(.+?)</h1>', webpage, 'title', fatal=False) + description = self._html_search_meta('description', webpage) + + def _get_page(idx): + page_url = '%s//vimeo.com/user%s/likes/page:%d/sort:date' % ( + self.http_scheme(), user_id, idx + 1) + webpage = self._download_webpage( + page_url, user_id, + note='Downloading page %d/%d' % (idx + 1, page_count)) + video_list = self._search_regex( + r'(?s)<ol class="js-browse_list[^"]+"[^>]*>(.*?)</ol>', + webpage, 'video content') + paths = re.findall( + r'<li[^>]*>\s*<a\s+href="([^"]+)"', video_list) + for path in paths: + yield { + '_type': 'url', + 'url': compat_urlparse.urljoin(page_url, path), + } + + pl = InAdvancePagedList(_get_page, page_count, PAGE_SIZE) return { - '_type': 'url', - 'url': surl, + '_type': 'playlist', + 'id': 'user%s_likes' % user_id, + 'title': title, + 'description': description, + 'entries': pl, } diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 99198e380..045507bc7 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -26,7 +26,7 @@ from ..utils import ( get_element_by_attribute, ExtractorError, int_or_none, - PagedList, + OnDemandPagedList, unescapeHTML, unified_strdate, orderedSet, @@ -1341,7 +1341,7 @@ class YoutubeUserIE(InfoExtractor): 'id': video_id, 'title': title, } - url_results = PagedList(download_page, self._GDATA_PAGE_SIZE) + url_results = OnDemandPagedList(download_page, self._GDATA_PAGE_SIZE) return self.playlist_result(url_results, playlist_title=username) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index b644f4e92..9f49507c1 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -1384,14 +1384,16 @@ def check_executable(exe, args=[]): class PagedList(object): - def __init__(self, pagefunc, pagesize): - self._pagefunc = pagefunc - self._pagesize = pagesize - def __len__(self): # This is only useful for tests return len(self.getslice()) + +class OnDemandPagedList(PagedList): + def __init__(self, pagefunc, pagesize): + self._pagefunc = pagefunc + self._pagesize = pagesize + def getslice(self, start=0, end=None): res = [] for pagenum in itertools.count(start // self._pagesize): @@ -1430,6 +1432,35 @@ class PagedList(object): return res +class InAdvancePagedList(PagedList): + def __init__(self, pagefunc, pagecount, pagesize): + self._pagefunc = pagefunc + self._pagecount = pagecount + self._pagesize = pagesize + + def getslice(self, start=0, end=None): + res = [] + start_page = start // self._pagesize + end_page = ( + self._pagecount if end is None else (end // self._pagesize + 1)) + skip_elems = start - start_page * self._pagesize + only_more = None if end is None else end - start + for pagenum in range(start_page, end_page): + page = list(self._pagefunc(pagenum)) + if skip_elems: + page = page[skip_elems:] + skip_elems = None + if only_more is not None: + if len(page) < only_more: + only_more -= len(page) + else: + page = page[:only_more] + res.extend(page) + break + res.extend(page) + return res + + def uppercase_escape(s): unicode_escape = codecs.getdecoder('unicode_escape') return re.sub( From 1770ed9e86a147eceb86210dec0aefcf0d94ab52 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Mon, 29 Sep 2014 00:38:37 +0200 Subject: [PATCH 0104/1937] [thvideo] Simplify (#3848) --- youtube_dl/extractor/thvideo.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/thvideo.py b/youtube_dl/extractor/thvideo.py index 0ae20ea30..496f15d80 100644 --- a/youtube_dl/extractor/thvideo.py +++ b/youtube_dl/extractor/thvideo.py @@ -26,8 +26,7 @@ class THVideoIE(InfoExtractor): } def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') + video_id = self._match_id(url) # extract download link from mobile player page webpage_player = self._download_webpage( @@ -71,13 +70,15 @@ class THVideoPlaylistIE(InfoExtractor): } def _real_extract(self, url): - webpage = self._download_webpage(url, 'playlist') - mobj = re.match(self._VALID_URL, url) - list_id = mobj.group('id') - list_title = self._html_search_regex(r'<h1 class="show_title">(.*?)<b id', webpage, 'playlist title') + playlist_id = self._match_id(url) + + webpage = self._download_webpage(url, playlist_id) + list_title = self._html_search_regex( + r'<h1 class="show_title">(.*?)<b id', webpage, 'playlist title', + fatal=False) entries = [ self.url_result('http://thvideo.tv/v/th' + id, 'THVideo') for id in re.findall(r'<dd><a href="http://thvideo.tv/v/th(\d+)/" target=', webpage)] - return self.playlist_result(entries, list_id, list_title) \ No newline at end of file + return self.playlist_result(entries, playlist_id, list_title) From e2dce5378191e315ad86785aac7e786c86a1a121 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Mon, 29 Sep 2014 01:39:26 +0200 Subject: [PATCH 0105/1937] [youtube] Always request webpage in English (Fixes #3844) --- youtube_dl/extractor/youtube.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 045507bc7..61228817e 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -655,7 +655,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): # Get video webpage url = proto + '://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1' % video_id - video_webpage = self._download_webpage(url, video_id) + req = compat_urllib_request.Request(url) + req.add_header('Cookie', 'PREF=hl=en') + video_webpage = self._download_webpage(req, video_id) # Attempt to extract SWF player URL mobj = re.search(r'swfConfig.*?"(https?:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage) From a43ee88c6f888196b47cb1e12463a64ada0ead12 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Mon, 29 Sep 2014 01:51:53 +0200 Subject: [PATCH 0106/1937] release 2014.09.29 --- youtube_dl/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index eb4356811..17e5ea8e2 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,2 +1,2 @@ -__version__ = '2014.09.28.1' +__version__ = '2014.09.29' From a1f934b171dcc8e1215ee30d0715ce562eb220e3 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Mon, 29 Sep 2014 02:04:16 +0200 Subject: [PATCH 0107/1937] [youtube] Correct language cookie handling --- youtube_dl/extractor/youtube.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 61228817e..9041cfa87 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -655,9 +655,17 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): # Get video webpage url = proto + '://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1' % video_id - req = compat_urllib_request.Request(url) - req.add_header('Cookie', 'PREF=hl=en') - video_webpage = self._download_webpage(req, video_id) + pref_cookies = [ + c for c in self._downloader.cookiejar + if c.domain == '.youtube.com' and c.name == 'PREF'] + for pc in pref_cookies: + if 'hl=' in pc.value: + pc.value = re.sub(r'hl=[^&]+', 'hl=en', pc.value) + else: + if pc.value: + pc.value += '&' + pc.value += 'hl=en' + video_webpage = self._download_webpage(url, video_id) # Attempt to extract SWF player URL mobj = re.search(r'swfConfig.*?"(https?:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage) From f5b7e6a842b00bab8320d30608dd4a10a4752a17 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Mon, 29 Sep 2014 02:04:28 +0200 Subject: [PATCH 0108/1937] release 2014.09.29.1 --- youtube_dl/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 17e5ea8e2..885df83c0 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,2 +1,2 @@ -__version__ = '2014.09.29' +__version__ = '2014.09.29.1' From 27aede907436fa58600cd46bb04e7eae6e1e9279 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Mon, 29 Sep 2014 04:48:50 +0200 Subject: [PATCH 0109/1937] [pbs] Add support for series/jwplayer type video (Fixes #3849) --- youtube_dl/extractor/pbs.py | 39 +++++++++++++++++++++++++++++-------- 1 file changed, 31 insertions(+), 8 deletions(-) diff --git a/youtube_dl/extractor/pbs.py b/youtube_dl/extractor/pbs.py index 2adfde909..8f140d626 100644 --- a/youtube_dl/extractor/pbs.py +++ b/youtube_dl/extractor/pbs.py @@ -4,6 +4,7 @@ import re from .common import InfoExtractor from ..utils import ( + unified_strdate, US_RATINGS, ) @@ -11,10 +12,10 @@ from ..utils import ( class PBSIE(InfoExtractor): _VALID_URL = r'''(?x)https?:// (?: - # Direct video URL - video\.pbs\.org/(?:viralplayer|video)/(?P<id>[0-9]+)/? | - # Article with embedded player - (?:www\.)?pbs\.org/(?:[^/]+/){2,5}(?P<presumptive_id>[^/]+)/?(?:$|[?\#]) | + # Direct video URL + video\.pbs\.org/(?:viralplayer|video)/(?P<id>[0-9]+)/? | + # Article with embedded player (or direct video) + (?:www\.)?pbs\.org/(?:[^/]+/){2,5}(?P<presumptive_id>[^/]+?)(?:\.html)?/?(?:$|[?\#]) | # Player video\.pbs\.org/(?:widget/)?partnerplayer/(?P<player_id>[^/]+)/ ) @@ -65,10 +66,25 @@ class PBSIE(InfoExtractor): 'duration': 6559, 'thumbnail': 're:^https?://.*\.jpg$', } + }, + { + 'url': 'http://www.pbs.org/wgbh/nova/earth/killer-typhoon.html', + 'md5': '908f3e5473a693b266b84e25e1cf9703', + 'info_dict': { + 'id': '2365160389', + 'display_id': 'killer-typhoon', + 'ext': 'mp4', + 'description': 'md5:c741d14e979fc53228c575894094f157', + 'title': 'Killer Typhoon', + 'duration': 3172, + 'thumbnail': 're:^https?://.*\.jpg$', + 'upload_date': '20140122', + } } + ] - def _extract_ids(self, url): + def _extract_webpage(self, url): mobj = re.match(self._VALID_URL, url) presumptive_id = mobj.group('presumptive_id') @@ -76,15 +92,20 @@ class PBSIE(InfoExtractor): if presumptive_id: webpage = self._download_webpage(url, display_id) + upload_date = unified_strdate(self._search_regex( + r'<input type="hidden" id="air_date_[0-9]+" value="([^"]+)"', + webpage, 'upload date', default=None)) + MEDIA_ID_REGEXES = [ r"div\s*:\s*'videoembed'\s*,\s*mediaid\s*:\s*'(\d+)'", # frontline video embed r'class="coveplayerid">([^<]+)<', # coveplayer + r'<input type="hidden" id="pbs_video_id_[0-9]+" value="([0-9]+)"/>', # jwplayer ] media_id = self._search_regex( MEDIA_ID_REGEXES, webpage, 'media ID', fatal=False, default=None) if media_id: - return media_id, presumptive_id + return media_id, presumptive_id, upload_date url = self._search_regex( r'<iframe\s+(?:class|id)=["\']partnerPlayer["\'].*?\s+src=["\'](.*?)["\']>', @@ -104,10 +125,10 @@ class PBSIE(InfoExtractor): video_id = mobj.group('id') display_id = video_id - return video_id, display_id + return video_id, display_id, None def _real_extract(self, url): - video_id, display_id = self._extract_ids(url) + video_id, display_id, upload_date = self._extract_webpage(url) info_url = 'http://video.pbs.org/videoInfo/%s?format=json' % video_id info = self._download_json(info_url, display_id) @@ -119,6 +140,7 @@ class PBSIE(InfoExtractor): return { 'id': video_id, + 'display_id': display_id, 'title': info['title'], 'url': info['alternate_encoding']['url'], 'ext': 'mp4', @@ -126,4 +148,5 @@ class PBSIE(InfoExtractor): 'thumbnail': info.get('image_url'), 'duration': info.get('duration'), 'age_limit': age_limit, + 'upload_date': upload_date, } From 35d3e63d24c524922cb39ba36cb5f6de12400504 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Mon, 29 Sep 2014 04:49:11 +0200 Subject: [PATCH 0110/1937] release 2014.09.29.2 --- youtube_dl/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 885df83c0..1384b496b 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,2 +1,2 @@ -__version__ = '2014.09.29.1' +__version__ = '2014.09.29.2' From 25930395225c45a9e5045ada291d37817371b086 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Mon, 29 Sep 2014 04:58:29 +0200 Subject: [PATCH 0111/1937] [vimeo] Use regexps to find description This fixes descriptions on 2.6 and makes the code simpler. --- youtube_dl/extractor/vimeo.py | 15 +++------------ 1 file changed, 3 insertions(+), 12 deletions(-) diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py index 403d0bb28..a002555a9 100644 --- a/youtube_dl/extractor/vimeo.py +++ b/youtube_dl/extractor/vimeo.py @@ -275,18 +275,9 @@ class VimeoIE(VimeoBaseInfoExtractor, SubtitlesInfoExtractor): _, video_thumbnail = sorted((int(width if width.isdigit() else 0), t_url) for (width, t_url) in video_thumbs.items())[-1] # Extract video description - video_description = None - try: - video_description = get_element_by_attribute("class", "description_wrapper", webpage) - if video_description: - video_description = clean_html(video_description) - except AssertionError as err: - # On some pages like (http://player.vimeo.com/video/54469442) the - # html tags are not closed, python 2.6 cannot handle it - if err.args[0] == 'we should not get here!': - pass - else: - raise + video_description = self._html_search_regex( + r'(?s)<div class="[^"]*description"[^>]*>(.*?)</div>', + webpage, 'description', fatal=False) # Extract video duration video_duration = int_or_none(config["video"].get("duration")) From 12548cd9330222848f6b49fe9eac91aaff897325 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Mon, 29 Sep 2014 05:02:58 +0200 Subject: [PATCH 0112/1937] [worldstarhiphop] Correct title extraction --- youtube_dl/extractor/worldstarhiphop.py | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/youtube_dl/extractor/worldstarhiphop.py b/youtube_dl/extractor/worldstarhiphop.py index 4e89acd81..bda3870db 100644 --- a/youtube_dl/extractor/worldstarhiphop.py +++ b/youtube_dl/extractor/worldstarhiphop.py @@ -13,37 +13,35 @@ class WorldStarHipHopIE(InfoExtractor): "info_dict": { "id": "wshh6a7q1ny0G34ZwuIO", "ext": "mp4", - "title": "Video: KO Of The Week: MMA Fighter Gets Knocked Out By Swift Head Kick!" + "title": "KO Of The Week: MMA Fighter Gets Knocked Out By Swift Head Kick!" } } def _real_extract(self, url): - m = re.match(self._VALID_URL, url) - video_id = m.group('id') + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) - webpage_src = self._download_webpage(url, video_id) - - m_vevo_id = re.search(r'videoId=(.*?)&?', - webpage_src) + m_vevo_id = re.search(r'videoId=(.*?)&?', webpage) if m_vevo_id is not None: return self.url_result('vevo:%s' % m_vevo_id.group(1), ie='Vevo') video_url = self._search_regex( - r'so\.addVariable\("file","(.*?)"\)', webpage_src, 'video URL') + r'so\.addVariable\("file","(.*?)"\)', webpage, 'video URL') if 'youtube' in video_url: return self.url_result(video_url, ie='Youtube') video_title = self._html_search_regex( - r"<title>(.*)", webpage_src, 'title') + r'(?s)
\s*

(.*?)

', + webpage, 'title') # Getting thumbnail and if not thumbnail sets correct title for WSHH candy video. thumbnail = self._html_search_regex( - r'rel="image_src" href="(.*)" />', webpage_src, 'thumbnail', + r'rel="image_src" href="(.*)" />', webpage, 'thumbnail', fatal=False) if not thumbnail: - _title = r"""candytitles.*>(.*)""" - mobj = re.search(_title, webpage_src) + _title = r'candytitles.*>(.*)' + mobj = re.search(_title, webpage) if mobj is not None: video_title = mobj.group(1) From 6043f1df4e4f74bd0ade52b3fc0938ff241366dc Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Mon, 29 Sep 2014 05:05:06 +0200 Subject: [PATCH 0113/1937] [ign] Return proper playlist object --- youtube_dl/extractor/ign.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/ign.py b/youtube_dl/extractor/ign.py index 12e9e61c4..c80185b53 100644 --- a/youtube_dl/extractor/ign.py +++ b/youtube_dl/extractor/ign.py @@ -89,7 +89,12 @@ class IGNIE(InfoExtractor): ']*value="[^"]*?url=(https?://www\.ign\.com/videos/.*?)["&]', webpage) if multiple_urls: - return [self.url_result(u, ie='IGN') for u in multiple_urls] + entries = [self.url_result(u, ie='IGN') for u in multiple_urls] + return { + '_type': 'playlist', + 'id': name_or_id, + 'entries': entries, + } video_id = self._find_video_id(webpage) result = self._get_video_info(video_id) From a8eb5a8e610a2b90eac2789d5b5f3cda81f543bb Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Mon, 29 Sep 2014 05:12:57 +0200 Subject: [PATCH 0114/1937] [generic] Fix testcases --- youtube_dl/extractor/generic.py | 18 +++++++++--------- youtube_dl/extractor/ted.py | 2 +- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 0dfa4853d..263aa8579 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -180,13 +180,13 @@ class GenericIE(InfoExtractor): # Embedded TED video { 'url': 'http://en.support.wordpress.com/videos/ted-talks/', - 'md5': 'deeeabcc1085eb2ba205474e7235a3d5', + 'md5': '65fdff94098e4a607385a60c5177c638', 'info_dict': { - 'id': '981', + 'id': '1969', 'ext': 'mp4', - 'title': 'My web playroom', - 'uploader': 'Ze Frank', - 'description': 'md5:ddb2a40ecd6b6a147e400e535874947b', + 'title': 'Hidden miracles of the natural world', + 'uploader': 'Louie Schwartzberg', + 'description': 'md5:8145d19d320ff3e52f28401f4c4283b9', } }, # Embeded Ustream video @@ -295,13 +295,13 @@ class GenericIE(InfoExtractor): { 'url': 'https://play.google.com/store/apps/details?id=com.gameloft.android.ANMP.GloftA8HM', 'info_dict': { - 'id': 'jpSGZsgga_I', + 'id': '4vAffPZIT44', 'ext': 'mp4', - 'title': 'Asphalt 8: Airborne - Launch Trailer', + 'title': 'Asphalt 8: Airborne - Update - Welcome to Dubai!', 'uploader': 'Gameloft', 'uploader_id': 'gameloft', - 'upload_date': '20130821', - 'description': 'md5:87bd95f13d8be3e7da87a5f2c443106a', + 'upload_date': '20140828', + 'description': 'md5:c80da9ed3d83ae6d1876c834de03e1c4', }, 'params': { 'skip_download': True, diff --git a/youtube_dl/extractor/ted.py b/youtube_dl/extractor/ted.py index 1cca47771..d5e28efad 100644 --- a/youtube_dl/extractor/ted.py +++ b/youtube_dl/extractor/ted.py @@ -149,7 +149,7 @@ class TEDIE(SubtitlesInfoExtractor): thumbnail = 'http://' + thumbnail return { 'id': video_id, - 'title': talk_info['title'], + 'title': talk_info['title'].strip(), 'uploader': talk_info['speaker'], 'thumbnail': thumbnail, 'description': self._og_search_description(webpage), From dbe3043cd6d6bf468495df1e9f927a8c512e82a0 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Mon, 29 Sep 2014 05:15:42 +0200 Subject: [PATCH 0115/1937] [ynet] Fix test checksums --- youtube_dl/extractor/ynet.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/ynet.py b/youtube_dl/extractor/ynet.py index 24872861a..944d7da38 100644 --- a/youtube_dl/extractor/ynet.py +++ b/youtube_dl/extractor/ynet.py @@ -13,7 +13,7 @@ class YnetIE(InfoExtractor): _TESTS = [ { 'url': 'http://hot.ynet.co.il/home/0,7340,L-11659-99244,00.html', - 'md5': '002b44ee2f33d50363a1c153bed524cf', + 'md5': '4b29cb57c3dddd57642b3f051f535b07', 'info_dict': { 'id': 'L-11659-99244', 'ext': 'flv', @@ -22,7 +22,7 @@ class YnetIE(InfoExtractor): } }, { 'url': 'http://hot.ynet.co.il/home/0,7340,L-8859-84418,00.html', - 'md5': '6455046ae1b48cf7e2b7cae285e53a16', + 'md5': '8194c2ea221e9a639cac96b6b0753dc5', 'info_dict': { 'id': 'L-8859-84418', 'ext': 'flv', @@ -33,9 +33,7 @@ class YnetIE(InfoExtractor): ] def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') - + video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) content = compat_urllib_parse.unquote_plus(self._og_search_video_url(webpage)) From 8ff14175e228a30f9940a69a4e72ca3a2a99aaf6 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Mon, 29 Sep 2014 05:17:16 +0200 Subject: [PATCH 0116/1937] [sportdeutschland] Fix testcase --- youtube_dl/extractor/sportdeutschland.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/sportdeutschland.py b/youtube_dl/extractor/sportdeutschland.py index 185353bef..abb827783 100644 --- a/youtube_dl/extractor/sportdeutschland.py +++ b/youtube_dl/extractor/sportdeutschland.py @@ -17,11 +17,11 @@ class SportDeutschlandIE(InfoExtractor): 'info_dict': { 'id': 'live-li-ning-badminton-weltmeisterschaft-2014-kopenhagen', 'ext': 'mp4', - 'title': 'LIVE: Li-Ning Badminton Weltmeisterschaft 2014 Kopenhagen', + 'title': 're:Li-Ning Badminton Weltmeisterschaft 2014 Kopenhagen', 'categories': ['Badminton'], 'view_count': int, 'thumbnail': 're:^https?://.*\.jpg$', - 'description': 're:^Die Badminton-WM 2014 aus Kopenhagen LIVE', + 'description': 're:Die Badminton-WM 2014 aus Kopenhagen bei Sportdeutschland\.TV', 'timestamp': int, 'upload_date': 're:^201408[23][0-9]$', }, From 761e1645e075ff9f8c5aeb8d0f2a4cfac71fb528 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Mon, 29 Sep 2014 05:18:45 +0200 Subject: [PATCH 0117/1937] [generic] Remove unstable test checksum --- youtube_dl/extractor/generic.py | 1 - 1 file changed, 1 deletion(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 263aa8579..742bc2856 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -155,7 +155,6 @@ class GenericIE(InfoExtractor): # funnyordie embed { 'url': 'http://www.theguardian.com/world/2014/mar/11/obama-zach-galifianakis-between-two-ferns', - 'md5': '7cf780be104d40fea7bae52eed4a470e', 'info_dict': { 'id': '18e820ec3f', 'ext': 'mp4', From 5e4f06197f3d949bf89ee7e156391ca78121bf16 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Mon, 29 Sep 2014 05:19:56 +0200 Subject: [PATCH 0118/1937] [facebook] Fix test case --- youtube_dl/extractor/facebook.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/facebook.py b/youtube_dl/extractor/facebook.py index 60e68d98a..3ad993751 100644 --- a/youtube_dl/extractor/facebook.py +++ b/youtube_dl/extractor/facebook.py @@ -35,7 +35,7 @@ class FacebookIE(InfoExtractor): 'id': '637842556329505', 'ext': 'mp4', 'duration': 38, - 'title': 'Did you know Kei Nishikori is the first Asian man to ever reach a Grand Slam fin...', + 'title': 're:Did you know Kei Nishikori is the first Asian man to ever reach a Grand Slam', } }, { 'note': 'Video without discernible title', From 6be451f422090601e25b6a9b1f801f521f1ca41f Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Mon, 29 Sep 2014 05:23:58 +0200 Subject: [PATCH 0119/1937] [youtube] Remove swf signature test cases These files are now 0 Bytes --- test/test_youtube_signature.py | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/test/test_youtube_signature.py b/test/test_youtube_signature.py index 604e76ab6..df2cb09f2 100644 --- a/test/test_youtube_signature.py +++ b/test/test_youtube_signature.py @@ -47,18 +47,6 @@ _TESTS = [ '2ACFC7A61CA478CD21425E5A57EBD73DDC78E22A.2094302436B2D377D14A3BBA23022D023B8BC25AA', 'A52CB8B320D22032ABB3A41D773D2B6342034902.A22E87CDD37DBE75A5E52412DC874AC16A7CFCA2', ), - ( - 'http://s.ytimg.com/yts/swfbin/player-vfl5vIhK2/watch_as3.swf', - 'swf', - 86, - 'O1I3456789abcde0ghijklmnopqrstuvwxyzABCDEFGHfJKLMN2PQRSTUVWXY\\!"#$%&\'()*+,-./:;<=>?' - ), - ( - 'http://s.ytimg.com/yts/swfbin/player-vflmDyk47/watch_as3.swf', - 'swf', - 'F375F75BF2AFDAAF2666E43868D46816F83F13E81C46.3725A8218E446A0DECD33F79DC282994D6AA92C92C9', - '9C29AA6D499282CD97F33DCED0A644E8128A5273.64C18E31F38361864D86834E6662FAADFA2FB57F' - ), ( 'https://s.ytimg.com/yts/jsbin/html5player-en_US-vflBb0OQx.js', 'js', From e50e2fcd4deaab50da506a0abf1bafed16085cd7 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Mon, 29 Sep 2014 05:40:20 +0200 Subject: [PATCH 0120/1937] [br] fix test case --- youtube_dl/extractor/br.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/br.py b/youtube_dl/extractor/br.py index 4e2960c62..2e277c8c3 100644 --- a/youtube_dl/extractor/br.py +++ b/youtube_dl/extractor/br.py @@ -26,6 +26,8 @@ class BRIE(InfoExtractor): 'title': 'Wenn das Traditions-Theater wackelt', 'description': 'Heimatsound-Festival 2014: Wenn das Traditions-Theater wackelt', 'duration': 34, + 'uploader': 'BR', + 'upload_date': '20140802', } }, { @@ -66,8 +68,7 @@ class BRIE(InfoExtractor): ] def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - display_id = mobj.group('id') + display_id = self._match_id(url) page = self._download_webpage(url, display_id) xml_url = self._search_regex( r"return BRavFramework\.register\(BRavFramework\('avPlayer_(?:[a-f0-9-]{36})'\)\.setup\({dataURL:'(/(?:[a-z0-9\-]+/)+[a-z0-9/~_.-]+)'}\)\);", page, 'XMLURL') From 8157ae39042298831afc8f8e5d67619d21e3e00b Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Mon, 29 Sep 2014 05:48:56 +0200 Subject: [PATCH 0121/1937] [golem] Fix under 2.6 It's a sad story; 2.6 does not support any non-trivial xpaths. --- youtube_dl/extractor/golem.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/golem.py b/youtube_dl/extractor/golem.py index bebfe8568..53714f47f 100644 --- a/youtube_dl/extractor/golem.py +++ b/youtube_dl/extractor/golem.py @@ -38,11 +38,9 @@ class GolemIE(InfoExtractor): } formats = [] - for e in config.findall('./*[url]'): + for e in config: url = e.findtext('./url') if not url: - self._downloader.report_warning( - "{0}: url: empty, skipping".format(e.tag)) continue formats.append({ @@ -57,7 +55,7 @@ class GolemIE(InfoExtractor): info['formats'] = formats thumbnails = [] - for e in config.findall('.//teaser[url]'): + for e in config.findall('.//teaser'): url = e.findtext('./url') if not url: continue From 2a7b4681c6628e2a17b9b980333af1011e482058 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Mon, 29 Sep 2014 05:51:41 +0200 Subject: [PATCH 0122/1937] [godtube] Fix on Python 2.6 --- youtube_dl/extractor/godtube.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/godtube.py b/youtube_dl/extractor/godtube.py index 73bd6d890..363dc6608 100644 --- a/youtube_dl/extractor/godtube.py +++ b/youtube_dl/extractor/godtube.py @@ -36,16 +36,16 @@ class GodTubeIE(InfoExtractor): 'http://www.godtube.com/resource/mediaplayer/%s.xml' % video_id.lower(), video_id, 'Downloading player config XML') - video_url = config.find('.//file').text - uploader = config.find('.//author').text - timestamp = parse_iso8601(config.find('.//date').text) - duration = parse_duration(config.find('.//duration').text) - thumbnail = config.find('.//image').text + video_url = config.find('file').text + uploader = config.find('author').text + timestamp = parse_iso8601(config.find('date').text) + duration = parse_duration(config.find('duration').text) + thumbnail = config.find('image').text media = self._download_xml( 'http://www.godtube.com/media/xml/?v=%s' % video_id, video_id, 'Downloading media XML') - title = media.find('.//title').text + title = media.find('title').text return { 'id': video_id, From 989b4b2b86588c011314200c0d30db965f79105e Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Mon, 29 Sep 2014 06:15:46 +0200 Subject: [PATCH 0123/1937] [utils:YoutubeDLHandler] Work around brain-dead Python 2.6 httplib In 2.6, the httplib sends fragments! Remove those (fixes generic_26 on 2.6). --- youtube_dl/utils.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 9f49507c1..950cd1a7a 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -799,6 +799,12 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler): del req.headers['User-agent'] req.headers['User-agent'] = req.headers['Youtubedl-user-agent'] del req.headers['Youtubedl-user-agent'] + + if sys.version_info < (2, 7) and '#' in req.get_full_url(): + # Python 2.6 is brain-dead when it comes to fragments + req._Request__original = req._Request__original.partition('#')[0] + req._Request__r_type = req._Request__r_type.partition('#')[0] + return req def http_response(self, req, resp): From 8c23945c727dab01eabbc6e134cbb80db34d3120 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Mon, 29 Sep 2014 06:19:18 +0200 Subject: [PATCH 0124/1937] [eporner] Adapt to changed default format --- youtube_dl/extractor/eporner.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/eporner.py b/youtube_dl/extractor/eporner.py index 522aa3d63..bb231ecb1 100644 --- a/youtube_dl/extractor/eporner.py +++ b/youtube_dl/extractor/eporner.py @@ -14,11 +14,11 @@ class EpornerIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?eporner\.com/hd-porn/(?P\d+)/(?P[\w-]+)' _TEST = { 'url': 'http://www.eporner.com/hd-porn/95008/Infamous-Tiffany-Teen-Strip-Tease-Video/', - 'md5': '3b427ae4b9d60619106de3185c2987cd', + 'md5': '39d486f046212d8e1b911c52ab4691f8', 'info_dict': { 'id': '95008', 'display_id': 'Infamous-Tiffany-Teen-Strip-Tease-Video', - 'ext': 'flv', + 'ext': 'mp4', 'title': 'Infamous Tiffany Teen Strip Tease Video', 'duration': 194, 'view_count': int, From 80bcefcd77415eff62b722c0a432e5c217a1d64f Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Mon, 29 Sep 2014 06:22:54 +0200 Subject: [PATCH 0125/1937] [cliphunter] Remove duration --- youtube_dl/extractor/cliphunter.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/youtube_dl/extractor/cliphunter.py b/youtube_dl/extractor/cliphunter.py index 65c12136a..d4227e6eb 100644 --- a/youtube_dl/extractor/cliphunter.py +++ b/youtube_dl/extractor/cliphunter.py @@ -35,7 +35,6 @@ class CliphunterIE(InfoExtractor): 'title': 'Fun Jynx Maze solo', 'thumbnail': 're:^https?://.*\.jpg$', 'age_limit': 18, - 'duration': 1317, } } @@ -86,14 +85,11 @@ class CliphunterIE(InfoExtractor): thumbnail = self._search_regex( r"var\s+mov_thumb\s*=\s*'([^']+)';", webpage, 'thumbnail', fatal=False) - duration = int_or_none(self._search_regex( - r'pl_dur\s*=\s*([0-9]+)', webpage, 'duration', fatal=False)) return { 'id': video_id, 'title': video_title, 'formats': formats, - 'duration': duration, 'age_limit': self._rta_search(webpage), 'thumbnail': thumbnail, } From 937f935db0932fcbd6402068c0147f07f78af4ed Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Mon, 29 Sep 2014 12:15:23 +0200 Subject: [PATCH 0126/1937] [jukebox] Remove md5 sum, it fluctuates --- youtube_dl/extractor/jukebox.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/jukebox.py b/youtube_dl/extractor/jukebox.py index 9b553b9fa..5aa32bf09 100644 --- a/youtube_dl/extractor/jukebox.py +++ b/youtube_dl/extractor/jukebox.py @@ -11,10 +11,9 @@ from ..utils import ( class JukeboxIE(InfoExtractor): - _VALID_URL = r'^http://www\.jukebox?\..+?\/.+[,](?P[a-z0-9\-]+)\.html' + _VALID_URL = r'^http://www\.jukebox?\..+?\/.+[,](?P[a-z0-9\-]+)\.html' _TEST = { 'url': 'http://www.jukebox.es/kosheen/videoclip,pride,r303r.html', - 'md5': '1574e9b4d6438446d5b7dbcdf2786276', 'info_dict': { 'id': 'r303r', 'ext': 'flv', @@ -24,8 +23,7 @@ class JukeboxIE(InfoExtractor): } def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('video_id') + video_id = self._match_id(url) html = self._download_webpage(url, video_id) iframe_url = unescapeHTML(self._search_regex(r'
', page): + return self.playlist_result([self.url_result(vid) for vid in vids], video_id) + + title = self._html_search_regex( + r'
[^<]*

([^<]+)

', page, 'title') + + return { + '_type': 'url', + 'id': video_id, + 'url': vids[0], + 'title': title, + } + + +class GoGoAnimeSearchIE(InfoExtractor): + IE_NAME = 'gogoanime:search' + IE_DESC = 'GoGoAnime Search' + + _VALID_URL = r'http://www\.gogoanime\.com/.*\?s=(?P[^&]*)' + _TEST = { + 'url': 'http://www.gogoanime.com/?s=bokusatsu', + 'info_dict': { + 'id': 'bokusatsu' + }, + 'playlist_count': 6 + } + + def _real_extract(self, url): + playlist_id = self._match_id(url) + webpage = self._download_webpage(url, playlist_id) + + posts = re.findall( + r'
[^<]*]*>[^<]*.+)' + + _TESTS = [{ + 'url': 'http://play44.net/embed.php?w=600&h=438&vid=M/mahou-shoujo-madoka-magica-07.flv', + 'md5': 'e37e99d665f503dd2db952f7c4dba9e6', + 'info_dict': { + 'id': 'mahou-shoujo-madoka-magica-07', + 'ext': 'flv', + 'title': 'mahou-shoujo-madoka-magica-07', + } + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + page = self._download_webpage(url, video_id) + + video_url = compat_urllib_parse.unquote(self._html_search_regex( + r'_url = "(https?://[^"]+?)";', page, 'url')) + title = self._search_regex(r'.*/(?P[^.]*).', video_url, 'title') + + return { + 'id': title, + 'url': video_url, + 'title': title, + } + + +class ByZooIE(Play44IE): + _VALID_URL = r'http://[w.]*byzoo\.org/embed\.php[^/]*/(?P<id>.+)' + + _TESTS = [{ + 'url': 'http://byzoo.org/embed.php?w=600&h=438&vid=at/nw/mahou_shoujo_madoka_magica_movie_3_-_part1.mp4', + 'md5': '455c83dabe2cd9fd74a87612b01fe017', + 'info_dict': { + 'id': 'mahou_shoujo_madoka_magica_movie_3_-_part1', + 'ext': 'mp4', + 'title': 'mahou_shoujo_madoka_magica_movie_3_-_part1', + } + }] + + +class Video44IE(Play44IE): + _VALID_URL = r'http://[w.]*video44\.net/.*file=(?P<id>[^&].).*' + + _TESTS = [{ + 'url': 'http://www.video44.net/gogo/?w=600&h=438&file=chaoshead-12.flv&sv=1', + 'md5': '43eaec6d0beb10e8d42459b9f108aff3', + 'info_dict': { + 'id': 'chaoshead-12', + 'ext': 'mp4', + 'title': 'chaoshead-12', + } + }] + + +class VideoWingIE(Play44IE): + _VALID_URL = r'''(?x) + http://[w.]*videowing\.[^/]*/ + (?: + .*video=/* + |embed/ + ) + (?P<id>[^&?.]+) + ''' + + _TESTS = [{ + 'url': 'http://videowing.me/embed?w=718&h=438&video=ongoing/boku_wa_tomodachi_ga_sukunai_-_05.mp4', + 'md5': '4ed320e353ed26c742c4f12a9c210b60', + 'info_dict': { + 'id': 'boku_wa_tomodachi_ga_sukunai_-_05', + 'ext': 'mp4', + 'title': 'boku_wa_tomodachi_ga_sukunai_-_05', + } + }, { + 'url': 'http://videowing.me/embed/a8d6a39522df066bd734a69f2334497e?w=600&h=438', + 'md5': '33fdd71581357018c226f95c5cedcfd7', + 'info_dict': { + 'id': 'mahoushoujomadokamagicamovie1part1', + 'ext': 'flv', + 'title': 'mahoushoujomadokamagicamovie1part1', + } + }] + + +class PlayPandaIE(Play44IE): + _VALID_URL = r'http://[w.]*playpanda\.[^/]*/.*vid=/*(?P<id>[^&].).*' + + _TESTS = [{ + 'url': 'http://playpanda.net/embed.php?w=718&h=438&vid=at/nw/boku_wa_tomodachi_ga_sukunai_-_05.mp4', + 'md5': '4ed320e353ed26c742c4f12a9c210b60', + 'info_dict': { + 'id': 'boku_wa_tomodachi_ga_sukunai_-_05', + 'ext': 'mp4', + 'title': 'boku_wa_tomodachi_ga_sukunai_-_05', + 'description': 'boku_wa_tomodachi_ga_sukunai_-_05' + } + }] + + +class VideoZooIE(Play44IE): + _VALID_URL = r'http://[w.]*videozoo\.[^/]*/.*vid=/*(?P<id>[^&].).*' + + _TESTS = [{ + 'url': 'http://videozoo.me/embed.php?w=718&h=438&vid=at/nw/boku_wa_tomodachi_ga_sukunai_-_05.mp4', + 'md5': '4ed320e353ed26c742c4f12a9c210b60', + 'info_dict': { + 'id': 'boku_wa_tomodachi_ga_sukunai_-_05', + 'ext': 'mp4', + 'title': 'boku_wa_tomodachi_ga_sukunai_-_05', + } + }] + + +class PlayBBIE(Play44IE): + _VALID_URL = r'http://[w.]*playbb\.[^/]*/.*vid=/*(?P<id>[^&].).*' + + _TESTS = [{ + 'url': 'http://playbb.me/embed.php?w=718&h=438&vid=at/nw/boku_wa_tomodachi_ga_sukunai_-_05.mp4', + 'md5': '4ed320e353ed26c742c4f12a9c210b60', + 'info_dict': { + 'id': 'boku_wa_tomodachi_ga_sukunai_-_05', + 'ext': 'mp4', + 'title': 'boku_wa_tomodachi_ga_sukunai_-_05', + } + }] + + +class EasyVideoIE(Play44IE): + _VALID_URL = r'http://[w.]*easyvideo\.[^/]*/.*file=/*(?P<id>[^&.]+)' + + _TESTS = [{ + 'url': 'http://easyvideo.me/gogo/?w=718&h=438&file=bokuwatomodachigasukunai-04.flv&sv=1', + 'md5': '26178b57629b7650106d72b191137176', + 'info_dict': { + 'id': 'bokuwatomodachigasukunai-04', + 'ext': 'mp4', + 'title': 'bokuwatomodachigasukunai-04', + }, + 'skip': 'Blocked in Germany', + }] diff --git a/youtube_dl/extractor/soulanime.py b/youtube_dl/extractor/soulanime.py new file mode 100644 index 000000000..7adb10c03 --- /dev/null +++ b/youtube_dl/extractor/soulanime.py @@ -0,0 +1,74 @@ +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor + + +class SoulAnimeWatchingIE(InfoExtractor): + IE_NAME = "soulanime:watching" + IE_DESC = "SoulAnime video" + _TEST = { + 'url': 'http://www.soul-anime.net/watching/seirei-tsukai-no-blade-dance-episode-9/', + 'md5': '05fae04abf72298098b528e98abf4298', + 'info_dict': { + 'id': 'seirei-tsukai-no-blade-dance-episode-9', + 'ext': 'mp4', + 'title': 'seirei-tsukai-no-blade-dance-episode-9', + 'description': 'seirei-tsukai-no-blade-dance-episode-9' + } + } + _VALID_URL = r'http://[w.]*soul-anime\.(?P<domain>[^/]+)/watch[^/]*/(?P<id>[^/]+)' + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + domain = mobj.group('domain') + + page = self._download_webpage(url, video_id) + + video_url_encoded = self._html_search_regex( + r'<div id="download">[^<]*<a href="(?P<url>[^"]+)"', page, 'url') + video_url = "http://www.soul-anime." + domain + video_url_encoded + + vid = self._request_webpage(video_url, video_id) + ext = vid.info().gettype().split("/")[1] + + return { + 'id': video_id, + 'url': video_url, + 'ext': ext, + 'title': video_id, + 'description': video_id + } + + +class SoulAnimeSeriesIE(InfoExtractor): + IE_NAME = "soulanime:series" + IE_DESC = "SoulAnime Series" + + _VALID_URL = r'http://[w.]*soul-anime\.(?P<domain>[^/]+)/anime./(?P<id>[^/]+)' + + _EPISODE_REGEX = r'<option value="(/watch[^/]*/[^"]+)">[^<]*</option>' + + _TEST = { + 'url': 'http://www.soul-anime.net/anime1/black-rock-shooter-tv/', + 'info_dict': { + 'id': 'black-rock-shooter-tv' + }, + 'playlist_count': 8 + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + series_id = mobj.group('id') + domain = mobj.group('domain') + + pattern = re.compile(self._EPISODE_REGEX) + + page = self._download_webpage(url, series_id, "Downloading series page") + mobj = pattern.findall(page) + + entries = [self.url_result("http://www.soul-anime." + domain + obj) for obj in mobj] + + return self.playlist_result(entries, series_id) diff --git a/youtube_dl/extractor/videofun.py b/youtube_dl/extractor/videofun.py new file mode 100644 index 000000000..0364b9d32 --- /dev/null +++ b/youtube_dl/extractor/videofun.py @@ -0,0 +1,36 @@ +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( + compat_urllib_parse +) + + +class VideoFunIE(InfoExtractor): + _VALID_URL = r'http://[w.]*videofun\.me/embed/(?P<id>[0-9a-f]+)' + + _TEST = { + 'url': 'http://videofun.me/embed/8267659be070860af600fee7deadbcdb?w=600&h=438', + 'md5': 'e37e99d665f503dd2db952f7c4dba9e6', + 'info_dict': { + 'id': 'Mahou-Shoujo-Madoka-Magica-07', + 'ext': 'flv', + 'title': 'Mahou-Shoujo-Madoka-Magica-07', + } + } + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage( + url, video_id, 'Downloading video page') + + video_url_encoded = self._html_search_regex( + r'url: "(http://gateway\.videofun\.me[^"]+)"', webpage, 'video url') + video_url = compat_urllib_parse.unquote(video_url_encoded) + title = self._html_search_regex(r'.*/([^.]*)\.', video_url, 'title') + + return { + 'id': title, + 'url': video_url, + 'title': title, + } From 95ceeec72200ed3b2c94a54650eb69dfe946e595 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Sun, 4 Jan 2015 02:05:35 +0100 Subject: [PATCH 1025/1937] Remove unused import --- youtube_dl/downloader/mplayer.py | 1 - 1 file changed, 1 deletion(-) diff --git a/youtube_dl/downloader/mplayer.py b/youtube_dl/downloader/mplayer.py index 34b23b5c2..72cef30ea 100644 --- a/youtube_dl/downloader/mplayer.py +++ b/youtube_dl/downloader/mplayer.py @@ -4,7 +4,6 @@ import os import subprocess from .common import FileDownloader -from ..compat import compat_subprocess_get_DEVNULL from ..utils import ( check_executable, encodeFilename, From c11125f9ed952f9b7ebd06c15eacadcc6005dd8c Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Sun, 4 Jan 2015 02:06:53 +0100 Subject: [PATCH 1026/1937] [tests] Remove format 138 from tests (#4559) --- test/test_YoutubeDL.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test_YoutubeDL.py b/test/test_YoutubeDL.py index f8e4f930e..730f7ec26 100644 --- a/test/test_YoutubeDL.py +++ b/test/test_YoutubeDL.py @@ -218,7 +218,7 @@ class TestFormatSelection(unittest.TestCase): # 3D '85', '84', '102', '83', '101', '82', '100', # Dash video - '138', '137', '248', '136', '247', '135', '246', + '137', '248', '136', '247', '135', '246', '245', '244', '134', '243', '133', '242', '160', # Dash audio '141', '172', '140', '171', '139', From 8848314c08284f6a4b8f3c3529bf2e3f1b72610c Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Sun, 4 Jan 2015 02:08:18 +0100 Subject: [PATCH 1027/1937] [Makefile] Make offline tests actually work offline --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 71470eedb..e53a367ef 100644 --- a/Makefile +++ b/Makefile @@ -46,7 +46,7 @@ test: ot: offlinetest offlinetest: codetest - nosetests --verbose test --exclude test_download --exclude test_age_restriction --exclude test_subtitles --exclude test_write_annotations + nosetests --verbose test --exclude test_download --exclude test_age_restriction --exclude test_subtitles --exclude test_write_annotations --exclude test_youtube_lists tar: youtube-dl.tar.gz From 2ccd1b10e58cc8e5173dc1aeedc2b3f0ef9b55bf Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Sun, 4 Jan 2015 02:20:45 +0100 Subject: [PATCH 1028/1937] [soulanime] Fix under Python 3 --- youtube_dl/extractor/soulanime.py | 10 ++++++++-- youtube_dl/utils.py | 11 +++++++++++ 2 files changed, 19 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/soulanime.py b/youtube_dl/extractor/soulanime.py index 7adb10c03..feef33e27 100644 --- a/youtube_dl/extractor/soulanime.py +++ b/youtube_dl/extractor/soulanime.py @@ -3,6 +3,10 @@ from __future__ import unicode_literals import re from .common import InfoExtractor +from ..utils import ( + HEADRequest, + urlhandle_detect_ext, +) class SoulAnimeWatchingIE(InfoExtractor): @@ -31,8 +35,10 @@ class SoulAnimeWatchingIE(InfoExtractor): r'<div id="download">[^<]*<a href="(?P<url>[^"]+)"', page, 'url') video_url = "http://www.soul-anime." + domain + video_url_encoded - vid = self._request_webpage(video_url, video_id) - ext = vid.info().gettype().split("/")[1] + ext_req = HEADRequest(video_url) + ext_handle = self._request_webpage( + ext_req, video_id, note='Determining extension') + ext = urlhandle_detect_ext(ext_handle) return { 'id': video_id, diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index efbe64fb3..bdfe053a7 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -1550,3 +1550,14 @@ def ytdl_is_updateable(): def args_to_str(args): # Get a short string representation for a subprocess command return ' '.join(shlex_quote(a) for a in args) + + +def urlhandle_detect_ext(url_handle): + try: + url_handle.headers + getheader = lambda h: url_handle.headers[h] + except AttributeError: # Python < 3 + getheader = url_handle.info().getheader + + return getheader('Content-Type').split("/")[1] + From 7a1818c99b1729796f62c341b1b3f809cd47dbf8 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Sun, 4 Jan 2015 03:15:27 +0100 Subject: [PATCH 1029/1937] [vk] Add support for rutube embeds (Fixes #4514) --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/rutube.py | 31 +++++++++++++++++++++++++++++++ youtube_dl/extractor/vk.py | 9 +++++++++ 3 files changed, 41 insertions(+) diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index c3dc09f75..143cd5c49 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -359,6 +359,7 @@ from .ruhd import RUHDIE from .rutube import ( RutubeIE, RutubeChannelIE, + RutubeEmbedIE, RutubeMovieIE, RutubePersonIE, ) diff --git a/youtube_dl/extractor/rutube.py b/youtube_dl/extractor/rutube.py index b72b5a586..5b1c3577a 100644 --- a/youtube_dl/extractor/rutube.py +++ b/youtube_dl/extractor/rutube.py @@ -70,6 +70,37 @@ class RutubeIE(InfoExtractor): } +class RutubeEmbedIE(InfoExtractor): + IE_NAME = 'rutube:embed' + IE_DESC = 'Rutube embedded videos' + _VALID_URL = 'https?://rutube\.ru/video/embed/(?P<id>[0-9]+)' + + _TEST = { + 'url': 'http://rutube.ru/video/embed/6722881?vk_puid37=&vk_puid38=', + 'info_dict': { + 'id': 'a10e53b86e8f349080f718582ce4c661', + 'ext': 'mp4', + 'upload_date': '20131223', + 'uploader_id': '297833', + 'description': 'Видео группы ★http://vk.com/foxkidsreset★ музей Fox Kids и Jetix<br/><br/> восстановлено и сделано в шикоформате subziro89 http://vk.com/subziro89', + 'uploader': 'subziro89 ILya', + 'title': 'Мистический городок Эйри в Индиан 5 серия озвучка subziro89', + }, + 'params': { + 'skip_download': 'Requires ffmpeg', + }, + } + + def _real_extract(self, url): + embed_id = self._match_id(url) + webpage = self._download_webpage(url, embed_id) + + canonical_url = self._html_search_regex( + r'<link\s+rel="canonical"\s+href="([^"]+?)"', webpage, + 'Canonical URL') + return self.url_result(canonical_url, 'Rutube') + + class RutubeChannelIE(InfoExtractor): IE_NAME = 'rutube:channel' IE_DESC = 'Rutube channels' diff --git a/youtube_dl/extractor/vk.py b/youtube_dl/extractor/vk.py index 542e9198a..129de6cf3 100644 --- a/youtube_dl/extractor/vk.py +++ b/youtube_dl/extractor/vk.py @@ -164,6 +164,15 @@ class VKIE(InfoExtractor): self.to_screen('Youtube video detected') return self.url_result(m_yt.group(1), 'Youtube') + m_rutube = re.search( + r'\ssrc="((?:https?:)?//rutube\.ru\\?/video\\?/embed(?:.*?))\\?"', info_page) + assert m_rutube + if m_rutube is not None: + self.to_screen('rutube video detected') + rutube_url = self._proto_relative_url( + m_rutube.group(1).replace('\\', '')) + return self.url_result(rutube_url) + m_opts = re.search(r'(?s)var\s+opts\s*=\s*({.*?});', info_page) if m_opts: m_opts_url = re.search(r"url\s*:\s*'([^']+)", m_opts.group(1)) From 26886e6140a684058064c30237ef096332e1510f Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Sun, 4 Jan 2015 03:15:48 +0100 Subject: [PATCH 1030/1937] release 2015.01.04 --- youtube_dl/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 2a54b9bbe..09813928a 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2015.01.03' +__version__ = '2015.01.04' From f4858a71035549cf82b258d01dda5060aef707b7 Mon Sep 17 00:00:00 2001 From: Christopher Krooss <c.krooss@gmail.com> Date: Sun, 4 Jan 2015 13:33:26 +0100 Subject: [PATCH 1031/1937] Add support for Radio Bremen --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/radiobremen.py | 55 +++++++++++++++++++++++++++++ 2 files changed, 56 insertions(+) create mode 100644 youtube_dl/extractor/radiobremen.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 143cd5c49..349f4fe71 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -339,6 +339,7 @@ from .prosiebensat1 import ProSiebenSat1IE from .pyvideo import PyvideoIE from .quickvid import QuickVidIE from .radiode import RadioDeIE +from .radiobremen import RadioBremenIE from .radiofrance import RadioFranceIE from .rai import RaiIE from .rbmaradio import RBMARadioIE diff --git a/youtube_dl/extractor/radiobremen.py b/youtube_dl/extractor/radiobremen.py new file mode 100644 index 000000000..68c78c4f9 --- /dev/null +++ b/youtube_dl/extractor/radiobremen.py @@ -0,0 +1,55 @@ +# -*- coding: utf-8 -*- + +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor + + +class RadioBremenIE(InfoExtractor): + _VALID_URL = r'http?://(?:www\.)?radiobremen\.de/mediathek/(index\.html)?\?id=(?P<video_id>[0-9]+)' + IE_NAME = 'radiobremen' + + _TEST = { + 'url': 'http://www.radiobremen.de/mediathek/index.html?id=114720', + 'info_dict': { + 'id': '114720', + 'ext': 'mp4', + 'height': 288, + 'width': 512, + 'title': 'buten un binnen vom 22. Dezember', + 'description': 'Unter anderem mit diesen Themen: 45 Flüchtlinge sind in Worpswede angekommen +++ Freies Internet für alle: Bremer arbeiten an einem flächendeckenden W-Lan-Netzwerk +++ Aktivisten kämpfen für das Unibad +++ So war das Wetter 2014 +++', + }, + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('video_id') + + meta_url = "http://www.radiobremen.de/apps/php/mediathek/metadaten.php?id=%s" % video_id + meta_doc = self._download_webpage(meta_url, video_id, 'Downloading metadata') + title = self._html_search_regex("<h1.*>(?P<title>.+)</h1>", meta_doc, "title") + description = self._html_search_regex("<p>(?P<description>.*)</p>", meta_doc, "description") + duration = self._html_search_regex("Länge:</td>\s+<td>(?P<duration>[0-9]+:[0-9]+)</td>", meta_doc, "duration") + + page_doc = self._download_webpage(url, video_id, 'Downloading video information') + pattern = "ardformatplayerclassic\(\'playerbereich\',\'(?P<width>[0-9]+)\',\'.*\',\'(?P<video_id>[0-9]+)\',\'(?P<secret>[0-9]+)\',\'(?P<thumbnail>.+)\',\'\'\)" + mobj = re.search(pattern, page_doc) + width, video_id, secret, thumbnail = int(mobj.group("width")), mobj.group("video_id"), mobj.group("secret"), mobj.group("thumbnail") + video_url = "http://dl-ondemand.radiobremen.de/mediabase/{:}/{:}_{:}_{:}.mp4".format(video_id, video_id, secret, width) + + return { + 'id': video_id, + 'title': title, + 'description': description, + 'duration': duration, + 'formats': [ + {'url': video_url, + 'ext': 'mp4', + 'width': width, + 'protocol': 'http' + } + ], + 'thumbnail': thumbnail, + } From 63948fc62c7f0bfcfe7b2ce102ab6e4e87de558c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Sun, 4 Jan 2015 13:40:30 +0100 Subject: [PATCH 1032/1937] [downloader/hls] Respect the 'prefer_ffmpeg' option --- youtube_dl/downloader/hls.py | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/youtube_dl/downloader/hls.py b/youtube_dl/downloader/hls.py index 5bb0f3cfd..aa58b52ab 100644 --- a/youtube_dl/downloader/hls.py +++ b/youtube_dl/downloader/hls.py @@ -11,7 +11,6 @@ from ..compat import ( compat_urllib_request, ) from ..utils import ( - check_executable, encodeFilename, ) @@ -27,16 +26,13 @@ class HlsFD(FileDownloader): '-bsf:a', 'aac_adtstoasc', encodeFilename(tmpfilename, for_subprocess=True)] - for program in ['avconv', 'ffmpeg']: - if check_executable(program, ['-version']): - break - else: + ffpp = FFmpegPostProcessor(downloader=self) + program = ffpp._executable + if program is None: self.report_error('m3u8 download detected but ffmpeg or avconv could not be found. Please install one.') return False - cmd = [program] + args - - ffpp = FFmpegPostProcessor(downloader=self) ffpp.check_version() + cmd = [program] + args retval = subprocess.call(cmd) if retval == 0: From bc1fc5ddbcba784778cbdd98c051ff2493178515 Mon Sep 17 00:00:00 2001 From: Christopher Krooss <c.krooss@gmail.com> Date: Sun, 4 Jan 2015 14:02:07 +0100 Subject: [PATCH 1033/1937] Don't check for height as it's not provided --- youtube_dl/extractor/radiobremen.py | 1 - 1 file changed, 1 deletion(-) diff --git a/youtube_dl/extractor/radiobremen.py b/youtube_dl/extractor/radiobremen.py index 68c78c4f9..6d130d3d9 100644 --- a/youtube_dl/extractor/radiobremen.py +++ b/youtube_dl/extractor/radiobremen.py @@ -16,7 +16,6 @@ class RadioBremenIE(InfoExtractor): 'info_dict': { 'id': '114720', 'ext': 'mp4', - 'height': 288, 'width': 512, 'title': 'buten un binnen vom 22. Dezember', 'description': 'Unter anderem mit diesen Themen: 45 Flüchtlinge sind in Worpswede angekommen +++ Freies Internet für alle: Bremer arbeiten an einem flächendeckenden W-Lan-Netzwerk +++ Aktivisten kämpfen für das Unibad +++ So war das Wetter 2014 +++', From bc3e582fe457f9239dc3a3386cbfd0e7db167404 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Sun, 4 Jan 2015 14:02:17 +0100 Subject: [PATCH 1034/1937] Don't use '-shortest' option for merging formats (closes #4220, closes #4580) With avconv and older versions of ffmpeg the video is partially copied. The duration difference between the audio and the video seem to be really small, so it's probably not noticeable. --- youtube_dl/postprocessor/ffmpeg.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/postprocessor/ffmpeg.py b/youtube_dl/postprocessor/ffmpeg.py index 048525efc..473536dcc 100644 --- a/youtube_dl/postprocessor/ffmpeg.py +++ b/youtube_dl/postprocessor/ffmpeg.py @@ -520,7 +520,7 @@ class FFmpegMetadataPP(FFmpegPostProcessor): class FFmpegMergerPP(FFmpegPostProcessor): def run(self, info): filename = info['filepath'] - args = ['-c', 'copy', '-map', '0:v:0', '-map', '1:a:0', '-shortest'] + args = ['-c', 'copy', '-map', '0:v:0', '-map', '1:a:0'] self._downloader.to_screen('[ffmpeg] Merging formats into "%s"' % filename) self.run_ffmpeg_multiple_files(info['__files_to_merge'], filename, args) return True, info From 9fda6ee39fa2da1949af5e9b95633e3df3c6f6b4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Sun, 4 Jan 2015 14:06:23 +0100 Subject: [PATCH 1035/1937] [tf1] Remove unused import --- youtube_dl/extractor/tf1.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/youtube_dl/extractor/tf1.py b/youtube_dl/extractor/tf1.py index 07cc81226..025d0877c 100644 --- a/youtube_dl/extractor/tf1.py +++ b/youtube_dl/extractor/tf1.py @@ -1,8 +1,6 @@ # coding: utf-8 from __future__ import unicode_literals -import re - from .common import InfoExtractor From 1d2d0e3ff2b4e55810039caf267bb9ad086f3610 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Sun, 4 Jan 2015 14:07:06 +0100 Subject: [PATCH 1036/1937] utils: Remove blank line at the end of file --- youtube_dl/utils.py | 1 - 1 file changed, 1 deletion(-) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index bdfe053a7..d4951c406 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -1560,4 +1560,3 @@ def urlhandle_detect_ext(url_handle): getheader = url_handle.info().getheader return getheader('Content-Type').split("/")[1] - From 67c2bcdf4cf83f9ac32e5f1f50a8b4b38d2ac624 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Sun, 4 Jan 2015 19:19:15 +0100 Subject: [PATCH 1037/1937] Remove extractors which infringe copyright (#4554) --- youtube_dl/extractor/__init__.py | 19 ---- youtube_dl/extractor/gogoanime.py | 76 --------------- youtube_dl/extractor/play44.py | 149 ------------------------------ youtube_dl/extractor/videofun.py | 36 -------- 4 files changed, 280 deletions(-) delete mode 100644 youtube_dl/extractor/gogoanime.py delete mode 100644 youtube_dl/extractor/play44.py delete mode 100644 youtube_dl/extractor/videofun.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 143cd5c49..613e8e05b 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -164,10 +164,6 @@ from .globo import GloboIE from .godtube import GodTubeIE from .goldenmoustache import GoldenMoustacheIE from .golem import GolemIE -from .gogoanime import ( - GoGoAnimeIE, - GoGoAnimeSearchIE -) from .googleplus import GooglePlusIE from .googlesearch import GoogleSearchIE from .gorillavid import GorillaVidIE @@ -317,16 +313,6 @@ from .phoenix import PhoenixIE from .photobucket import PhotobucketIE from .planetaplay import PlanetaPlayIE from .played import PlayedIE -from .play44 import ( - Play44IE, - ByZooIE, - Video44IE, - VideoWingIE, - PlayPandaIE, - VideoZooIE, - PlayBBIE, - EasyVideoIE -) from .playfm import PlayFMIE from .playvid import PlayvidIE from .podomatic import PodomaticIE @@ -388,10 +374,6 @@ from .smotri import ( from .snotr import SnotrIE from .sockshare import SockshareIE from .sohu import SohuIE -from .soulanime import ( - SoulAnimeWatchingIE, - SoulAnimeSeriesIE -) from .soundcloud import ( SoundcloudIE, SoundcloudSetIE, @@ -486,7 +468,6 @@ from .viddler import ViddlerIE from .videobam import VideoBamIE from .videodetective import VideoDetectiveIE from .videolecturesnet import VideoLecturesNetIE -from .videofun import VideoFunIE from .videofyme import VideofyMeIE from .videomega import VideoMegaIE from .videopremium import VideoPremiumIE diff --git a/youtube_dl/extractor/gogoanime.py b/youtube_dl/extractor/gogoanime.py deleted file mode 100644 index d4f4ecc58..000000000 --- a/youtube_dl/extractor/gogoanime.py +++ /dev/null @@ -1,76 +0,0 @@ -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor -from ..utils import ( - ExtractorError, - compat_urllib_parse, - get_element_by_attribute, - unescapeHTML -) - - -class GoGoAnimeIE(InfoExtractor): - IE_NAME = 'gogoanime' - IE_DESC = 'GoGoAnime' - _VALID_URL = r'http://www.gogoanime.com/(?P<id>[A-Za-z0-9-]+)' - - _TEST = { - 'url': 'http://www.gogoanime.com/mahou-shoujo-madoka-magica-movie-1', - 'info_dict': { - 'id': 'mahou-shoujo-madoka-magica-movie-1' - }, - 'playlist_count': 3 - } - - def _real_extract(self, url): - video_id = self._match_id(url) - page = self._download_webpage(url, video_id) - - if 'Oops! Page Not Found</font>' in page: - raise ExtractorError('Video does not exist', expected=True) - - content = get_element_by_attribute("class", "postcontent", page) - vids = re.findall(r'<iframe[^>]*?src=[\'"](h[^\'"]+)[\'"]', content) - vids = [ - unescapeHTML(compat_urllib_parse.unquote(x)) - for x in vids if not re.search(r".*videofun.*", x)] - - if re.search(r'<div class="postcontent">[^<]*<p><iframe src=[\'"][^>]+></iframe><br />', page): - return self.playlist_result([self.url_result(vid) for vid in vids], video_id) - - title = self._html_search_regex( - r'<div class="postdesc">[^<]*<h1>([^<]+)</h1>', page, 'title') - - return { - '_type': 'url', - 'id': video_id, - 'url': vids[0], - 'title': title, - } - - -class GoGoAnimeSearchIE(InfoExtractor): - IE_NAME = 'gogoanime:search' - IE_DESC = 'GoGoAnime Search' - - _VALID_URL = r'http://www\.gogoanime\.com/.*\?s=(?P<id>[^&]*)' - _TEST = { - 'url': 'http://www.gogoanime.com/?s=bokusatsu', - 'info_dict': { - 'id': 'bokusatsu' - }, - 'playlist_count': 6 - } - - def _real_extract(self, url): - playlist_id = self._match_id(url) - webpage = self._download_webpage(url, playlist_id) - - posts = re.findall( - r'<div class="postlist">[^<]*<p[^>]*>[^<]*<a href="(?P<url>[^"]+)"', - webpage) - - return self.playlist_result( - [self.url_result(p) for p in posts], playlist_id) diff --git a/youtube_dl/extractor/play44.py b/youtube_dl/extractor/play44.py deleted file mode 100644 index b8696e516..000000000 --- a/youtube_dl/extractor/play44.py +++ /dev/null @@ -1,149 +0,0 @@ -from __future__ import unicode_literals - -from .common import InfoExtractor -from ..utils import ( - compat_urllib_parse -) - - -class Play44IE(InfoExtractor): - _VALID_URL = r'http://[w.]*play44\.net/embed\.php[^/]*/(?P<id>.+)' - - _TESTS = [{ - 'url': 'http://play44.net/embed.php?w=600&h=438&vid=M/mahou-shoujo-madoka-magica-07.flv', - 'md5': 'e37e99d665f503dd2db952f7c4dba9e6', - 'info_dict': { - 'id': 'mahou-shoujo-madoka-magica-07', - 'ext': 'flv', - 'title': 'mahou-shoujo-madoka-magica-07', - } - }] - - def _real_extract(self, url): - video_id = self._match_id(url) - page = self._download_webpage(url, video_id) - - video_url = compat_urllib_parse.unquote(self._html_search_regex( - r'_url = "(https?://[^"]+?)";', page, 'url')) - title = self._search_regex(r'.*/(?P<title>[^.]*).', video_url, 'title') - - return { - 'id': title, - 'url': video_url, - 'title': title, - } - - -class ByZooIE(Play44IE): - _VALID_URL = r'http://[w.]*byzoo\.org/embed\.php[^/]*/(?P<id>.+)' - - _TESTS = [{ - 'url': 'http://byzoo.org/embed.php?w=600&h=438&vid=at/nw/mahou_shoujo_madoka_magica_movie_3_-_part1.mp4', - 'md5': '455c83dabe2cd9fd74a87612b01fe017', - 'info_dict': { - 'id': 'mahou_shoujo_madoka_magica_movie_3_-_part1', - 'ext': 'mp4', - 'title': 'mahou_shoujo_madoka_magica_movie_3_-_part1', - } - }] - - -class Video44IE(Play44IE): - _VALID_URL = r'http://[w.]*video44\.net/.*file=(?P<id>[^&].).*' - - _TESTS = [{ - 'url': 'http://www.video44.net/gogo/?w=600&h=438&file=chaoshead-12.flv&sv=1', - 'md5': '43eaec6d0beb10e8d42459b9f108aff3', - 'info_dict': { - 'id': 'chaoshead-12', - 'ext': 'mp4', - 'title': 'chaoshead-12', - } - }] - - -class VideoWingIE(Play44IE): - _VALID_URL = r'''(?x) - http://[w.]*videowing\.[^/]*/ - (?: - .*video=/* - |embed/ - ) - (?P<id>[^&?.]+) - ''' - - _TESTS = [{ - 'url': 'http://videowing.me/embed?w=718&h=438&video=ongoing/boku_wa_tomodachi_ga_sukunai_-_05.mp4', - 'md5': '4ed320e353ed26c742c4f12a9c210b60', - 'info_dict': { - 'id': 'boku_wa_tomodachi_ga_sukunai_-_05', - 'ext': 'mp4', - 'title': 'boku_wa_tomodachi_ga_sukunai_-_05', - } - }, { - 'url': 'http://videowing.me/embed/a8d6a39522df066bd734a69f2334497e?w=600&h=438', - 'md5': '33fdd71581357018c226f95c5cedcfd7', - 'info_dict': { - 'id': 'mahoushoujomadokamagicamovie1part1', - 'ext': 'flv', - 'title': 'mahoushoujomadokamagicamovie1part1', - } - }] - - -class PlayPandaIE(Play44IE): - _VALID_URL = r'http://[w.]*playpanda\.[^/]*/.*vid=/*(?P<id>[^&].).*' - - _TESTS = [{ - 'url': 'http://playpanda.net/embed.php?w=718&h=438&vid=at/nw/boku_wa_tomodachi_ga_sukunai_-_05.mp4', - 'md5': '4ed320e353ed26c742c4f12a9c210b60', - 'info_dict': { - 'id': 'boku_wa_tomodachi_ga_sukunai_-_05', - 'ext': 'mp4', - 'title': 'boku_wa_tomodachi_ga_sukunai_-_05', - 'description': 'boku_wa_tomodachi_ga_sukunai_-_05' - } - }] - - -class VideoZooIE(Play44IE): - _VALID_URL = r'http://[w.]*videozoo\.[^/]*/.*vid=/*(?P<id>[^&].).*' - - _TESTS = [{ - 'url': 'http://videozoo.me/embed.php?w=718&h=438&vid=at/nw/boku_wa_tomodachi_ga_sukunai_-_05.mp4', - 'md5': '4ed320e353ed26c742c4f12a9c210b60', - 'info_dict': { - 'id': 'boku_wa_tomodachi_ga_sukunai_-_05', - 'ext': 'mp4', - 'title': 'boku_wa_tomodachi_ga_sukunai_-_05', - } - }] - - -class PlayBBIE(Play44IE): - _VALID_URL = r'http://[w.]*playbb\.[^/]*/.*vid=/*(?P<id>[^&].).*' - - _TESTS = [{ - 'url': 'http://playbb.me/embed.php?w=718&h=438&vid=at/nw/boku_wa_tomodachi_ga_sukunai_-_05.mp4', - 'md5': '4ed320e353ed26c742c4f12a9c210b60', - 'info_dict': { - 'id': 'boku_wa_tomodachi_ga_sukunai_-_05', - 'ext': 'mp4', - 'title': 'boku_wa_tomodachi_ga_sukunai_-_05', - } - }] - - -class EasyVideoIE(Play44IE): - _VALID_URL = r'http://[w.]*easyvideo\.[^/]*/.*file=/*(?P<id>[^&.]+)' - - _TESTS = [{ - 'url': 'http://easyvideo.me/gogo/?w=718&h=438&file=bokuwatomodachigasukunai-04.flv&sv=1', - 'md5': '26178b57629b7650106d72b191137176', - 'info_dict': { - 'id': 'bokuwatomodachigasukunai-04', - 'ext': 'mp4', - 'title': 'bokuwatomodachigasukunai-04', - }, - 'skip': 'Blocked in Germany', - }] diff --git a/youtube_dl/extractor/videofun.py b/youtube_dl/extractor/videofun.py deleted file mode 100644 index 0364b9d32..000000000 --- a/youtube_dl/extractor/videofun.py +++ /dev/null @@ -1,36 +0,0 @@ -from __future__ import unicode_literals - -from .common import InfoExtractor -from ..utils import ( - compat_urllib_parse -) - - -class VideoFunIE(InfoExtractor): - _VALID_URL = r'http://[w.]*videofun\.me/embed/(?P<id>[0-9a-f]+)' - - _TEST = { - 'url': 'http://videofun.me/embed/8267659be070860af600fee7deadbcdb?w=600&h=438', - 'md5': 'e37e99d665f503dd2db952f7c4dba9e6', - 'info_dict': { - 'id': 'Mahou-Shoujo-Madoka-Magica-07', - 'ext': 'flv', - 'title': 'Mahou-Shoujo-Madoka-Magica-07', - } - } - - def _real_extract(self, url): - video_id = self._match_id(url) - webpage = self._download_webpage( - url, video_id, 'Downloading video page') - - video_url_encoded = self._html_search_regex( - r'url: "(http://gateway\.videofun\.me[^"]+)"', webpage, 'video url') - video_url = compat_urllib_parse.unquote(video_url_encoded) - title = self._html_search_regex(r'.*/([^.]*)\.', video_url, 'title') - - return { - 'id': title, - 'url': video_url, - 'title': title, - } From 2f985f4bb4938ee13356bda0436fde18f8c0e434 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Mon, 5 Jan 2015 00:18:43 +0100 Subject: [PATCH 1038/1937] [youtube:toplist] Remove extractor They use now normal playlists (their id is PL*). --- youtube_dl/extractor/__init__.py | 1 - youtube_dl/extractor/youtube.py | 46 -------------------------------- 2 files changed, 47 deletions(-) diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 613e8e05b..79e6bba45 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -546,7 +546,6 @@ from .youtube import ( YoutubeSearchURLIE, YoutubeShowIE, YoutubeSubscriptionsIE, - YoutubeTopListIE, YoutubeTruncatedIDIE, YoutubeTruncatedURLIE, YoutubeUserIE, diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index e9bf39a00..d1bbf0b01 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1206,9 +1206,6 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor): if playlist_id.startswith('RD'): # Mixes require a custom extraction process return self._extract_mix(playlist_id) - if playlist_id.startswith('TL'): - raise ExtractorError('For downloading YouTube.com top lists, use ' - 'the "yttoplist" keyword, for example "youtube-dl \'yttoplist:music:Top Tracks\'"', expected=True) url = self._TEMPLATE_URL % playlist_id page = self._download_webpage(url, playlist_id) @@ -1254,49 +1251,6 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor): return self.playlist_result(url_results, playlist_id, playlist_title) -class YoutubeTopListIE(YoutubePlaylistIE): - IE_NAME = 'youtube:toplist' - IE_DESC = ('YouTube.com top lists, "yttoplist:{channel}:{list title}"' - ' (Example: "yttoplist:music:Top Tracks")') - _VALID_URL = r'yttoplist:(?P<chann>.*?):(?P<title>.*?)$' - _TESTS = [{ - 'url': 'yttoplist:music:Trending', - 'playlist_mincount': 5, - 'skip': 'Only works for logged-in users', - }] - - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - channel = mobj.group('chann') - title = mobj.group('title') - query = compat_urllib_parse.urlencode({'title': title}) - channel_page = self._download_webpage( - 'https://www.youtube.com/%s' % channel, title) - link = self._html_search_regex( - r'''(?x) - <a\s+href="([^"]+)".*?>\s* - <span\s+class="branded-page-module-title-text">\s* - <span[^>]*>.*?%s.*?</span>''' % re.escape(query), - channel_page, 'list') - url = compat_urlparse.urljoin('https://www.youtube.com/', link) - - video_re = r'data-index="\d+".*?data-video-id="([0-9A-Za-z_-]{11})"' - ids = [] - # sometimes the webpage doesn't contain the videos - # retry until we get them - for i in itertools.count(0): - msg = 'Downloading Youtube mix' - if i > 0: - msg += ', retry #%d' % i - - webpage = self._download_webpage(url, title, msg) - ids = orderedSet(re.findall(video_re, webpage)) - if ids: - break - url_results = self._ids_to_results(ids) - return self.playlist_result(url_results, playlist_title=title) - - class YoutubeChannelIE(InfoExtractor): IE_DESC = 'YouTube.com channels' _VALID_URL = r'https?://(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com)/channel/(?P<id>[0-9A-Za-z_-]+)' From caf90bfaa5434d9ff7035d8575b842b076178ca3 Mon Sep 17 00:00:00 2001 From: Naglis Jonaitis <njonaitis@gmail.com> Date: Mon, 5 Jan 2015 02:22:01 +0200 Subject: [PATCH 1039/1937] [webofstories] Add new extractor (Closes #4585) --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/webofstories.py | 102 +++++++++++++++++++++++++++ 2 files changed, 103 insertions(+) create mode 100644 youtube_dl/extractor/webofstories.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 79e6bba45..0c8729384 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -511,6 +511,7 @@ from .wdr import ( WDRMobileIE, WDRMausIE, ) +from .webofstories import WebOfStoriesIE from .weibo import WeiboIE from .wimp import WimpIE from .wistia import WistiaIE diff --git a/youtube_dl/extractor/webofstories.py b/youtube_dl/extractor/webofstories.py new file mode 100644 index 000000000..396cf4e83 --- /dev/null +++ b/youtube_dl/extractor/webofstories.py @@ -0,0 +1,102 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import int_or_none + + +class WebOfStoriesIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?webofstories\.com/play/(?:[^/]+/)?(?P<id>[0-9]+)' + _VIDEO_DOMAIN = 'http://eu-mobile.webofstories.com/' + _GREAT_LIFE_STREAMER = 'rtmp://eu-cdn1.webofstories.com/cfx/st/' + _USER_STREAMER = 'rtmp://eu-users.webofstories.com/cfx/st/' + _TESTS = [ + { + 'url': 'http://www.webofstories.com/play/hans.bethe/71', + 'md5': '373e4dd915f60cfe3116322642ddf364', + 'info_dict': { + 'id': '4536', + 'ext': 'mp4', + 'title': 'The temperature of the sun', + 'thumbnail': 're:^https?://.*\.jpg$', + 'description': 'Hans Bethe talks about calculating the temperature of the sun', + 'duration': 238, + } + }, + { + 'url': 'http://www.webofstories.com/play/55908', + 'md5': '2985a698e1fe3211022422c4b5ed962c', + 'info_dict': { + 'id': '55908', + 'ext': 'mp4', + 'title': 'The story of Gemmata obscuriglobus', + 'thumbnail': 're:^https?://.*\.jpg$', + 'description': 'Planctomycete talks about The story of Gemmata obscuriglobus', + 'duration': 169, + } + }, + ] + + def _real_extract(self, url): + video_id = self._match_id(url) + + webpage = self._download_webpage(url, video_id) + title = self._og_search_title(webpage) + description = self._html_search_meta('description', webpage) + thumbnail = self._og_search_thumbnail(webpage) + + story_filename = self._search_regex( + r'\.storyFileName\("([^"]+)"\)', webpage, 'story filename') + speaker_id = self._search_regex( + r'\.speakerId\("([^"]+)"\)', webpage, 'speaker ID') + story_id = self._search_regex( + r'\.storyId\((\d+)\)', webpage, 'story ID') + speaker_type = self._search_regex( + r'\.speakerType\("([^"]+)"\)', webpage, 'speaker type') + great_life = self._search_regex( + r'isGreatLifeStory\s*=\s*(true|false)', webpage, 'great life story') + is_great_life_series = great_life == 'true' + duration = int_or_none(self._search_regex( + r'\.duration\((\d+)\)', webpage, 'duration', fatal=False)) + + # URL building, see: http://www.webofstories.com/scripts/player.js + ms_prefix = '' + if speaker_type.lower() == 'ms': + ms_prefix = 'mini_sites/' + + if is_great_life_series: + mp4_url = '{0:}lives/{1:}/{2:}.mp4'.format( + self._VIDEO_DOMAIN, speaker_id, story_filename) + rtmp_ext = 'flv' + streamer = self._GREAT_LIFE_STREAMER + play_path = 'stories/{0:}/{1:}'.format( + speaker_id, story_filename) + else: + mp4_url = '{0:}{1:}{2:}/{3:}.mp4'.format( + self._VIDEO_DOMAIN, ms_prefix, speaker_id, story_filename) + rtmp_ext = 'mp4' + streamer = self._USER_STREAMER + play_path = 'mp4:{0:}{1:}/{2}.mp4'.format( + ms_prefix, speaker_id, story_filename) + + formats = [{ + 'format_id': 'mp4_sd', + 'url': mp4_url, + }, { + 'format_id': 'rtmp_sd', + 'page_url': url, + 'url': streamer, + 'ext': rtmp_ext, + 'play_path': play_path, + }] + + self._sort_formats(formats) + + return { + 'id': story_id, + 'title': title, + 'formats': formats, + 'thumbnail': thumbnail, + 'description': description, + 'duration': duration, + } From adf3c58ad31e7376f085271a02fdfe56b1e75989 Mon Sep 17 00:00:00 2001 From: Naglis Jonaitis <njonaitis@gmail.com> Date: Mon, 5 Jan 2015 02:55:12 +0200 Subject: [PATCH 1040/1937] [lrt] Fix missing provider key Also, modernize a bit. --- youtube_dl/extractor/lrt.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/lrt.py b/youtube_dl/extractor/lrt.py index d72d470aa..9c2fbdd96 100644 --- a/youtube_dl/extractor/lrt.py +++ b/youtube_dl/extractor/lrt.py @@ -2,7 +2,6 @@ from __future__ import unicode_literals import re -import json from .common import InfoExtractor from ..utils import ( @@ -28,7 +27,6 @@ class LRTIE(InfoExtractor): 'params': { 'skip_download': True, # HLS download }, - } def _real_extract(self, url): @@ -44,7 +42,9 @@ class LRTIE(InfoExtractor): formats = [] for js in re.findall(r'(?s)config:\s*(\{.*?\})', webpage): - data = json.loads(js_to_json(js)) + data = self._parse_json(js, video_id, transform_source=js_to_json) + if 'provider' not in data: + continue if data['provider'] == 'rtmp': formats.append({ 'format_id': 'rtmp', From bdf80aa542da15437545ae9c17cd5c80e17e171f Mon Sep 17 00:00:00 2001 From: Bart Kappenburg <bartkappenburg@gmail.com> Date: Mon, 5 Jan 2015 11:51:24 +0100 Subject: [PATCH 1041/1937] Update rtlnl.py Added support for the non-www version of rtlxl.nl by making "www." optional. --- youtube_dl/extractor/rtlnl.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/rtlnl.py b/youtube_dl/extractor/rtlnl.py index d029b0ec5..a3ca79f2c 100644 --- a/youtube_dl/extractor/rtlnl.py +++ b/youtube_dl/extractor/rtlnl.py @@ -8,7 +8,7 @@ from ..utils import parse_duration class RtlXlIE(InfoExtractor): IE_NAME = 'rtlxl.nl' - _VALID_URL = r'https?://www\.rtlxl\.nl/#!/[^/]+/(?P<uuid>[^/?]+)' + _VALID_URL = r'https?://(www\.)?rtlxl\.nl/#!/[^/]+/(?P<uuid>[^/?]+)' _TEST = { 'url': 'http://www.rtlxl.nl/#!/rtl-nieuws-132237/6e4203a6-0a5e-3596-8424-c599a59e0677', From a4c3f486394ae8ead64e8e634433670639e3080f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Mon, 5 Jan 2015 11:46:40 +0100 Subject: [PATCH 1042/1937] [vimple] Replace tests The first one seems to be no longer available and the second was an episode from a tv show. --- youtube_dl/extractor/vimple.py | 23 ++++++----------------- 1 file changed, 6 insertions(+), 17 deletions(-) diff --git a/youtube_dl/extractor/vimple.py b/youtube_dl/extractor/vimple.py index 33d370e1c..ee3d86117 100644 --- a/youtube_dl/extractor/vimple.py +++ b/youtube_dl/extractor/vimple.py @@ -14,28 +14,17 @@ class VimpleIE(InfoExtractor): IE_DESC = 'Vimple.ru' _VALID_URL = r'https?://(player.vimple.ru/iframe|vimple.ru)/(?P<id>[a-f0-9]{10,})' _TESTS = [ - # Quality: Large, from iframe { - 'url': 'http://player.vimple.ru/iframe/b132bdfd71b546d3972f9ab9a25f201c', + 'url': 'http://vimple.ru/c0f6b1687dcd4000a97ebe70068039cf', + 'md5': '2e750a330ed211d3fd41821c6ad9a279', 'info_dict': { - 'id': 'b132bdfd71b546d3972f9ab9a25f201c', - 'title': 'great-escape-minecraft.flv', + 'id': 'c0f6b1687dcd4000a97ebe70068039cf', 'ext': 'mp4', - 'duration': 352, - 'webpage_url': 'http://vimple.ru/b132bdfd71b546d3972f9ab9a25f201c', + 'title': 'Sunset', + 'duration': 20, + 'thumbnail': 're:https?://.*?\.jpg', }, }, - # Quality: Medium, from mainpage - { - 'url': 'http://vimple.ru/a15950562888453b8e6f9572dc8600cd', - 'info_dict': { - 'id': 'a15950562888453b8e6f9572dc8600cd', - 'title': 'DB 01', - 'ext': 'flv', - 'duration': 1484, - 'webpage_url': 'http://vimple.ru/a15950562888453b8e6f9572dc8600cd', - } - }, ] def _real_extract(self, url): From 628bc4d1e73ddef2b67eb6aba7b642c2e0ea894e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Mon, 5 Jan 2015 12:28:35 +0100 Subject: [PATCH 1043/1937] [khanacademy] Update test --- youtube_dl/extractor/khanacademy.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/khanacademy.py b/youtube_dl/extractor/khanacademy.py index 408d00944..08a671fa8 100644 --- a/youtube_dl/extractor/khanacademy.py +++ b/youtube_dl/extractor/khanacademy.py @@ -22,8 +22,10 @@ class KhanAcademyIE(InfoExtractor): 'description': 'The perfect cipher', 'duration': 176, 'uploader': 'Brit Cruise', + 'uploader_id': 'khanacademy', 'upload_date': '20120411', - } + }, + 'add_ie': ['Youtube'], }, { 'url': 'https://www.khanacademy.org/math/applied-math/cryptography', 'info_dict': { From 75311a7e160912550e3c07642a5635f85f72cb0d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Mon, 5 Jan 2015 12:29:32 +0100 Subject: [PATCH 1044/1937] .travis.yml: Remove my email from the list --- .travis.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index c6cc7a994..f14014414 100644 --- a/.travis.yml +++ b/.travis.yml @@ -9,7 +9,6 @@ notifications: email: - filippo.valsorda@gmail.com - phihag@phihag.de - - jaime.marquinez.ferrandiz+travis@gmail.com - yasoob.khld@gmail.com # irc: # channels: From 87830900a95f95308dac565f9da12387edea65e5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Mon, 5 Jan 2015 13:07:24 +0100 Subject: [PATCH 1045/1937] [generic] Update some tests --- youtube_dl/extractor/generic.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 493afb57d..5c41ff517 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -131,12 +131,13 @@ class GenericIE(InfoExtractor): # ooyala video { 'url': 'http://www.rollingstone.com/music/videos/norwegian-dj-cashmere-cat-goes-spartan-on-with-me-premiere-20131219', - 'md5': '5644c6ca5d5782c1d0d350dad9bd840c', + 'md5': '166dd577b433b4d4ebfee10b0824d8ff', 'info_dict': { 'id': 'BwY2RxaTrTkslxOfcan0UCf0YqyvWysJ', 'ext': 'mp4', 'title': '2cc213299525360.mov', # that's what we get }, + 'add_ie': ['Ooyala'], }, # google redirect { @@ -146,7 +147,7 @@ class GenericIE(InfoExtractor): 'ext': 'mp4', 'upload_date': '20130224', 'uploader_id': 'TheVerge', - 'description': 'Chris Ziegler takes a look at the Alcatel OneTouch Fire and the ZTE Open; two of the first Firefox OS handsets to be officially announced.', + 'description': 're:^Chris Ziegler takes a look at the\.*', 'uploader': 'The Verge', 'title': 'First Firefox OS phones side-by-side', }, From cd791a5ea08b77dab37c15efa7e064c07144cb6a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Mon, 5 Jan 2015 13:11:13 +0100 Subject: [PATCH 1046/1937] [ted] Add support for embed-ssl.ted.com embedded videos --- youtube_dl/extractor/generic.py | 2 +- youtube_dl/extractor/ted.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 5c41ff517..2d871f8b4 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -926,7 +926,7 @@ class GenericIE(InfoExtractor): # Look for embedded TED player mobj = re.search( - r'<iframe[^>]+?src=(["\'])(?P<url>http://embed\.ted\.com/.+?)\1', webpage) + r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed(?:-ssl)?\.ted\.com/.+?)\1', webpage) if mobj is not None: return self.url_result(mobj.group('url'), 'TED') diff --git a/youtube_dl/extractor/ted.py b/youtube_dl/extractor/ted.py index 944177426..10b3b706a 100644 --- a/youtube_dl/extractor/ted.py +++ b/youtube_dl/extractor/ted.py @@ -13,7 +13,7 @@ from ..compat import ( class TEDIE(SubtitlesInfoExtractor): _VALID_URL = r'''(?x) (?P<proto>https?://) - (?P<type>www|embed)(?P<urlmain>\.ted\.com/ + (?P<type>www|embed(?:-ssl)?)(?P<urlmain>\.ted\.com/ ( (?P<type_playlist>playlists(?:/\d+)?) # We have a playlist | @@ -98,7 +98,7 @@ class TEDIE(SubtitlesInfoExtractor): def _real_extract(self, url): m = re.match(self._VALID_URL, url, re.VERBOSE) - if m.group('type') == 'embed': + if m.group('type').startswith('embed'): desktop_url = m.group('proto') + 'www' + m.group('urlmain') return self.url_result(desktop_url, 'TED') name = m.group('name') From a285b6377b46518ca45d6a41481bf920b353a857 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Mon, 5 Jan 2015 13:59:49 +0100 Subject: [PATCH 1047/1937] [normalboots] Skip download in test, it uses rtmp --- youtube_dl/extractor/normalboots.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/normalboots.py b/youtube_dl/extractor/normalboots.py index 3d35b11ac..c13ff0d65 100644 --- a/youtube_dl/extractor/normalboots.py +++ b/youtube_dl/extractor/normalboots.py @@ -22,7 +22,11 @@ class NormalbootsIE(InfoExtractor): 'description': 'Jon is late for Christmas. Typical. Thanks to: Paul Ritchey for Co-Writing/Filming: http://www.youtube.com/user/ContinueShow Michael Azzi for Christmas Intro Animation: http://michafrar.tumblr.com/ Jerrod Waters for Christmas Intro Music: http://www.youtube.com/user/xXJerryTerryXx Casey Ormond for ‘Tense Battle Theme’:\xa0http://www.youtube.com/Kiamet/', 'uploader': 'JonTron', 'upload_date': '20140125', - } + }, + 'params': { + # rtmp download + 'skip_download': True, + }, } def _real_extract(self, url): From 03ff2cc1c49c82daf2218b76e169c2d679447f03 Mon Sep 17 00:00:00 2001 From: oteng <otengkwaku@gmail.com> Date: Mon, 5 Jan 2015 16:28:24 +0000 Subject: [PATCH 1048/1937] [Auengine] corrected extractions logic The way the video download url was been extracted was not working well so i change it for it to extract the correct url --- .gitignore | 2 ++ youtube_dl/extractor/auengine.py | 16 ++++++---------- 2 files changed, 8 insertions(+), 10 deletions(-) diff --git a/.gitignore b/.gitignore index 86312d4e4..0422adf44 100644 --- a/.gitignore +++ b/.gitignore @@ -31,3 +31,5 @@ updates_key.pem test/testdata .tox youtube-dl.zsh +.idea +.idea/* \ No newline at end of file diff --git a/youtube_dl/extractor/auengine.py b/youtube_dl/extractor/auengine.py index 014a21952..17c3ad2ef 100644 --- a/youtube_dl/extractor/auengine.py +++ b/youtube_dl/extractor/auengine.py @@ -29,17 +29,12 @@ class AUEngineIE(InfoExtractor): webpage = self._download_webpage(url, video_id) title = self._html_search_regex(r'<title>(?P<title>.+?)', webpage, 'title') title = title.strip() - links = re.findall(r'\s(?:file|url):\s*["\']([^\'"]+)["\']', webpage) - links = map(compat_urllib_parse.unquote, links) + video_url = re.findall(r'http://\w+.auengine.com/vod/.*[^\W]', webpage) + video_url = map(compat_urllib_parse.unquote, video_url)[0] + thumbnail = re.findall(r'http://\w+.auengine.com/thumb/.*[^\W]', webpage) + thumbnail = map(compat_urllib_parse.unquote, thumbnail)[0] - thumbnail = None - video_url = None - for link in links: - if link.endswith('.png'): - thumbnail = link - elif '/videos/' in link: - video_url = link - if not video_url: + if video_url == "" and thumbnail =="": raise ExtractorError('Could not find video URL') ext = '.' + determine_ext(video_url) if ext == title[-len(ext):]: @@ -52,3 +47,4 @@ class AUEngineIE(InfoExtractor): 'thumbnail': thumbnail, 'http_referer': 'http://www.auengine.com/flowplayer/flowplayer.commercial-3.2.14.swf', } + From 9d247bbd2d972953fbb9e8f9aee67472d3854883 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Mon, 5 Jan 2015 18:13:19 +0100 Subject: [PATCH 1049/1937] [radiobremen] Fix under Python 2.6 and fix duration --- youtube_dl/extractor/radiobremen.py | 30 +++++++++++++++-------------- 1 file changed, 16 insertions(+), 14 deletions(-) diff --git a/youtube_dl/extractor/radiobremen.py b/youtube_dl/extractor/radiobremen.py index 6d130d3d9..9f7e6af15 100644 --- a/youtube_dl/extractor/radiobremen.py +++ b/youtube_dl/extractor/radiobremen.py @@ -5,10 +5,11 @@ from __future__ import unicode_literals import re from .common import InfoExtractor +from ..utils import parse_duration class RadioBremenIE(InfoExtractor): - _VALID_URL = r'http?://(?:www\.)?radiobremen\.de/mediathek/(index\.html)?\?id=(?P[0-9]+)' + _VALID_URL = r'http?://(?:www\.)?radiobremen\.de/mediathek/(?:index\.html)?\?id=(?P[0-9]+)' IE_NAME = 'radiobremen' _TEST = { @@ -16,6 +17,7 @@ class RadioBremenIE(InfoExtractor): 'info_dict': { 'id': '114720', 'ext': 'mp4', + 'duration': 1685, 'width': 512, 'title': 'buten un binnen vom 22. Dezember', 'description': 'Unter anderem mit diesen Themen: 45 Flüchtlinge sind in Worpswede angekommen +++ Freies Internet für alle: Bremer arbeiten an einem flächendeckenden W-Lan-Netzwerk +++ Aktivisten kämpfen für das Unibad +++ So war das Wetter 2014 +++', @@ -23,32 +25,32 @@ class RadioBremenIE(InfoExtractor): } def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('video_id') + video_id = self._match_id(url) meta_url = "http://www.radiobremen.de/apps/php/mediathek/metadaten.php?id=%s" % video_id meta_doc = self._download_webpage(meta_url, video_id, 'Downloading metadata') title = self._html_search_regex("(?P.+)</h1>", meta_doc, "title") description = self._html_search_regex("<p>(?P<description>.*)</p>", meta_doc, "description") - duration = self._html_search_regex("Länge:</td>\s+<td>(?P<duration>[0-9]+:[0-9]+)</td>", meta_doc, "duration") + duration = parse_duration( + self._html_search_regex("Länge:</td>\s+<td>(?P<duration>[0-9]+:[0-9]+)</td>", meta_doc, "duration")) page_doc = self._download_webpage(url, video_id, 'Downloading video information') pattern = "ardformatplayerclassic\(\'playerbereich\',\'(?P<width>[0-9]+)\',\'.*\',\'(?P<video_id>[0-9]+)\',\'(?P<secret>[0-9]+)\',\'(?P<thumbnail>.+)\',\'\'\)" mobj = re.search(pattern, page_doc) - width, video_id, secret, thumbnail = int(mobj.group("width")), mobj.group("video_id"), mobj.group("secret"), mobj.group("thumbnail") - video_url = "http://dl-ondemand.radiobremen.de/mediabase/{:}/{:}_{:}_{:}.mp4".format(video_id, video_id, secret, width) + video_url = ( + "http://dl-ondemand.radiobremen.de/mediabase/%s/%s_%s_%s.mp4" % + (video_id, video_id, mobj.group("secret"), mobj.group('width'))) + formats = [{ + 'url': video_url, + 'ext': 'mp4', + 'width': int(mobj.group("width")), + }] return { 'id': video_id, 'title': title, 'description': description, 'duration': duration, - 'formats': [ - {'url': video_url, - 'ext': 'mp4', - 'width': width, - 'protocol': 'http' - } - ], - 'thumbnail': thumbnail, + 'formats': formats, + 'thumbnail': mobj.group('thumbnail'), } From aa80652f47b3df14664556913d4f14172c9ec4fb Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Mon, 5 Jan 2015 18:14:09 +0100 Subject: [PATCH 1050/1937] [radiobremen] Add test for thumbnail --- youtube_dl/extractor/radiobremen.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/radiobremen.py b/youtube_dl/extractor/radiobremen.py index 9f7e6af15..057dc15ab 100644 --- a/youtube_dl/extractor/radiobremen.py +++ b/youtube_dl/extractor/radiobremen.py @@ -20,6 +20,7 @@ class RadioBremenIE(InfoExtractor): 'duration': 1685, 'width': 512, 'title': 'buten un binnen vom 22. Dezember', + 'thumbnail': 're:https?://.*\.jpg$', 'description': 'Unter anderem mit diesen Themen: 45 Flüchtlinge sind in Worpswede angekommen +++ Freies Internet für alle: Bremer arbeiten an einem flächendeckenden W-Lan-Netzwerk +++ Aktivisten kämpfen für das Unibad +++ So war das Wetter 2014 +++', }, } From 5e3e1c82d828bc54f6873d2c7bdab315713e9a02 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Mon, 5 Jan 2015 18:14:39 +0100 Subject: [PATCH 1051/1937] Credit @ckrooss for radiobremen (#4632) --- AUTHORS | 1 + 1 file changed, 1 insertion(+) diff --git a/AUTHORS b/AUTHORS index 9b548cf25..a63c97ae0 100644 --- a/AUTHORS +++ b/AUTHORS @@ -99,3 +99,4 @@ Max Reimann Cédric Luthi Thijs Vermeir Joel Leclerc +Christopher Krooss From d7cc31b63e1efaf5762f38897d4c717901e127e3 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Mon, 5 Jan 2015 18:16:47 +0100 Subject: [PATCH 1052/1937] [generic] PEP8 --- youtube_dl/extractor/generic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 2d871f8b4..7a5bf9392 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -926,7 +926,7 @@ class GenericIE(InfoExtractor): # Look for embedded TED player mobj = re.search( - r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed(?:-ssl)?\.ted\.com/.+?)\1', webpage) + r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed(?:-ssl)?\.ted\.com/.+?)\1', webpage) if mobj is not None: return self.url_result(mobj.group('url'), 'TED') From dda620e88c68e995afcc3cd35b9d360cb42527a0 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Mon, 5 Jan 2015 18:17:03 +0100 Subject: [PATCH 1053/1937] [radiobremen] Make code more readable and more resilient to failures --- youtube_dl/extractor/radiobremen.py | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/youtube_dl/extractor/radiobremen.py b/youtube_dl/extractor/radiobremen.py index 057dc15ab..0d706312e 100644 --- a/youtube_dl/extractor/radiobremen.py +++ b/youtube_dl/extractor/radiobremen.py @@ -29,15 +29,21 @@ class RadioBremenIE(InfoExtractor): video_id = self._match_id(url) meta_url = "http://www.radiobremen.de/apps/php/mediathek/metadaten.php?id=%s" % video_id - meta_doc = self._download_webpage(meta_url, video_id, 'Downloading metadata') - title = self._html_search_regex("<h1.*>(?P<title>.+)</h1>", meta_doc, "title") - description = self._html_search_regex("<p>(?P<description>.*)</p>", meta_doc, "description") - duration = parse_duration( - self._html_search_regex("Länge:</td>\s+<td>(?P<duration>[0-9]+:[0-9]+)</td>", meta_doc, "duration")) + meta_doc = self._download_webpage( + meta_url, video_id, 'Downloading metadata') + title = self._html_search_regex( + r"<h1.*>(?P<title>.+)</h1>", meta_doc, "title") + description = self._html_search_regex( + r"<p>(?P<description>.*)</p>", meta_doc, "description", fatal=False) + duration = parse_duration(self._html_search_regex( + r"Länge:</td>\s+<td>(?P<duration>[0-9]+:[0-9]+)</td>", + meta_doc, "duration", fatal=False)) - page_doc = self._download_webpage(url, video_id, 'Downloading video information') - pattern = "ardformatplayerclassic\(\'playerbereich\',\'(?P<width>[0-9]+)\',\'.*\',\'(?P<video_id>[0-9]+)\',\'(?P<secret>[0-9]+)\',\'(?P<thumbnail>.+)\',\'\'\)" - mobj = re.search(pattern, page_doc) + page_doc = self._download_webpage( + url, video_id, 'Downloading video information') + mobj = re.search( + r"ardformatplayerclassic\(\'playerbereich\',\'(?P<width>[0-9]+)\',\'.*\',\'(?P<video_id>[0-9]+)\',\'(?P<secret>[0-9]+)\',\'(?P<thumbnail>.+)\',\'\'\)", + page_doc) video_url = ( "http://dl-ondemand.radiobremen.de/mediabase/%s/%s_%s_%s.mp4" % (video_id, video_id, mobj.group("secret"), mobj.group('width'))) From 6291438073e35adc94f573a43625fb54a64cf733 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Mon, 5 Jan 2015 18:21:32 +0100 Subject: [PATCH 1054/1937] [auengine] Simplify (#4643) --- youtube_dl/extractor/auengine.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/youtube_dl/extractor/auengine.py b/youtube_dl/extractor/auengine.py index 17c3ad2ef..a1b666be0 100644 --- a/youtube_dl/extractor/auengine.py +++ b/youtube_dl/extractor/auengine.py @@ -7,6 +7,7 @@ from ..compat import compat_urllib_parse from ..utils import ( determine_ext, ExtractorError, + remove_end, ) @@ -27,18 +28,18 @@ class AUEngineIE(InfoExtractor): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - title = self._html_search_regex(r'<title>(?P<title>.+?)', webpage, 'title') - title = title.strip() - video_url = re.findall(r'http://\w+.auengine.com/vod/.*[^\W]', webpage) - video_url = map(compat_urllib_parse.unquote, video_url)[0] - thumbnail = re.findall(r'http://\w+.auengine.com/thumb/.*[^\W]', webpage) - thumbnail = map(compat_urllib_parse.unquote, thumbnail)[0] + title = self._html_search_regex( + r'\s*(?P<title>.+?)\s*', webpage, 'title') + video_urls = re.findall(r'http://\w+.auengine.com/vod/.*[^\W]', webpage) + video_url = compat_urllib_parse.unquote(video_urls[0]) + thumbnails = re.findall(r'http://\w+.auengine.com/thumb/.*[^\W]', webpage) + thumbnail = compat_urllib_parse.unquote(thumbnails[0]) - if video_url == "" and thumbnail =="": + if not video_url: raise ExtractorError('Could not find video URL') + ext = '.' + determine_ext(video_url) - if ext == title[-len(ext):]: - title = title[:-len(ext)] + title = remove_end(title, ext) return { 'id': video_id, @@ -47,4 +48,3 @@ class AUEngineIE(InfoExtractor): 'thumbnail': thumbnail, 'http_referer': 'http://www.auengine.com/flowplayer/flowplayer.commercial-3.2.14.swf', } - From f4bca0b348fe1f4f65c939b496973062180e0c4f Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Mon, 5 Jan 2015 18:44:29 +0100 Subject: [PATCH 1055/1937] release 2015.01.05 --- youtube_dl/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 09813928a..086f0ebf0 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2015.01.04' +__version__ = '2015.01.05' From 8f9529cd0559bdbe6c568cfd765f9129666a77be Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Mon, 5 Jan 2015 19:14:50 +0100 Subject: [PATCH 1056/1937] [motorsport] Fix extraction and make trailing '/' optional They directly embed a youtube video now. --- youtube_dl/extractor/motorsport.py | 60 ++++++++++++------------------ 1 file changed, 23 insertions(+), 37 deletions(-) diff --git a/youtube_dl/extractor/motorsport.py b/youtube_dl/extractor/motorsport.py index f5ca74e97..c1a482dba 100644 --- a/youtube_dl/extractor/motorsport.py +++ b/youtube_dl/extractor/motorsport.py @@ -1,63 +1,49 @@ # coding: utf-8 from __future__ import unicode_literals -import hashlib -import json -import time - from .common import InfoExtractor from ..compat import ( - compat_parse_qs, - compat_str, -) -from ..utils import ( - int_or_none, + compat_urlparse, ) class MotorsportIE(InfoExtractor): IE_DESC = 'motorsport.com' - _VALID_URL = r'http://www\.motorsport\.com/[^/?#]+/video/(?:[^/?#]+/)(?P[^/]+)/(?:$|[?#])' + _VALID_URL = r'http://www\.motorsport\.com/[^/?#]+/video/(?:[^/?#]+/)(?P[^/]+)/?(?:$|[?#])' _TEST = { 'url': 'http://www.motorsport.com/f1/video/main-gallery/red-bull-racing-2014-rules-explained/', - 'md5': '5592cb7c5005d9b2c163df5ac3dc04e4', 'info_dict': { - 'id': '7063', + 'id': '2-T3WuR-KMM', 'ext': 'mp4', 'title': 'Red Bull Racing: 2014 Rules Explained', - 'duration': 207, + 'duration': 208, 'description': 'A new clip from Red Bull sees Daniel Ricciardo and Sebastian Vettel explain the 2014 Formula One regulations – which are arguably the most complex the sport has ever seen.', - 'uploader': 'rainiere', - 'thumbnail': r're:^http://.*motorsport\.com/.+\.jpg$' - } + 'uploader': 'mcomstaff', + 'uploader_id': 'UC334JIYKkVnyFoNCclfZtHQ', + 'upload_date': '20140903', + 'thumbnail': r're:^https?://.+\.jpg$' + }, + 'add_ie': ['Youtube'], + 'params': { + 'skip_download': True, + }, } def _real_extract(self, url): display_id = self._match_id(url) webpage = self._download_webpage(url, display_id) - flashvars_code = self._html_search_regex( - r'Video by: (.*?)', webpage, - 'uploader', fatal=False) + iframe_path = self._html_search_regex( + r'