From 3442b30ab2a5f168caa45a7371aca0f4103fdd86 Mon Sep 17 00:00:00 2001 From: anovicecodemonkey Date: Sun, 18 May 2014 23:15:09 +0930 Subject: [PATCH 001/440] [generic] Support data-video-url for YouTube embeds (Fixes #2862) --- youtube_dl/extractor/generic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 0e5cf0efb..69381f777 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -473,7 +473,7 @@ class GenericIE(InfoExtractor): # Look for embedded YouTube player matches = re.findall(r'''(?x) - (?:]+?src=|embedSWF\(\s*) + (?:]+?src=|data-video-url=|embedSWF\(\s*) (["\'])(?P(?:https?:)?//(?:www\.)?youtube\.com/ (?:embed|v)/.+?) \1''', webpage) From dd06c95e43ebbef35e2ef8b589bd25a646166553 Mon Sep 17 00:00:00 2001 From: Keith Beckman Date: Tue, 20 May 2014 02:47:34 -0400 Subject: [PATCH 002/440] Added new IE for Grooveshark --- youtube_dl/__init__.py | 1 + youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/grooveshark.py | 200 ++++++++++++++++++++++++++++ 3 files changed, 202 insertions(+) create mode 100644 youtube_dl/extractor/grooveshark.py diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 4e657e297..cd4e9b484 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -56,6 +56,7 @@ __authors__ = ( 'Nicolas Évrard', 'Jason Normore', 'Hoje Lee', + 'Keith Beckman' ) __license__ = 'Public Domain' diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 3503c76b7..3f6c67fa5 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -109,6 +109,7 @@ from .gdcvault import GDCVaultIE from .generic import GenericIE from .googleplus import GooglePlusIE from .googlesearch import GoogleSearchIE +from .grooveshark import GroovesharkIE from .hark import HarkIE from .helsinki import HelsinkiIE from .hentaistigma import HentaiStigmaIE diff --git a/youtube_dl/extractor/grooveshark.py b/youtube_dl/extractor/grooveshark.py new file mode 100644 index 000000000..6798addce --- /dev/null +++ b/youtube_dl/extractor/grooveshark.py @@ -0,0 +1,200 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import time +import math +import re +from urlparse import urlparse, urlunparse, urldefrag +from urllib import quote, urlencode +from os.path import basename + +from .common import InfoExtractor +from ..utils import ExtractorError, compat_urllib_request, compat_html_parser + +class GroovesharkHtmlParser(compat_html_parser.HTMLParser): + def __init__(self): + self._current_object = None + self.objects = [] + compat_html_parser.HTMLParser.__init__(self) + + def handle_starttag(self, tag, attrs): + attrs = dict((k, v) for k, v in attrs) + if tag == 'object': + self._current_object = { 'attrs': attrs, 'params': [] } + elif tag == 'param': + self._current_object['params'].append(attrs) + + def handle_endtag(self, tag): + if tag == 'object': + self.objects.append(self._current_object) + self._current_object = None + + @classmethod + def extract_object_tags(cls, html): + p = cls() + p.feed(html) + p.close() + return p.objects + +class GroovesharkIE(InfoExtractor): + _VALID_URL = r'https?://(www\.)?grooveshark\.com/#!/s/([^/]+)/([^/]+)' + _TEST = { + 'url': 'http://grooveshark.com/#!/s/Jolene+Tenth+Key+Remix+Ft+Will+Sessions/6SS1DW?src=5', + 'md5': 'bbccc50b19daca23b8f961152c1dc95b', + 'info_dict': { + 'id': '6SS1DW', + 'title': 'Jolene (Tenth Key Remix ft. Will Sessions)', + 'ext': 'mp3', + 'duration': 227 + } + } + + do_playerpage_request = True + do_bootstrap_request = True + + def _parse_target(self, target): + uri = urlparse(target) + hash = uri.fragment[1:].split('?')[0] + token = basename(hash.rstrip('/')) + return (uri, hash, token) + + def _build_bootstrap_url(self, target): + (uri, hash, token) = self._parse_target(target) + query = 'getCommunicationToken=1&hash=%s&%d' % (quote(hash, safe=''), self.ts) + return (urlunparse((uri.scheme, uri.netloc, '/preload.php', None, query, None)), token) + + def _build_meta_url(self, target): + (uri, hash, token) = self._parse_target(target) + query = 'hash=%s&%d' % (quote(hash, safe=''), self.ts) + return (urlunparse((uri.scheme, uri.netloc, '/preload.php', None, query, None)), token) + + def _build_stream_url(self, meta): + return urlunparse(('http', meta['streamKey']['ip'], '/stream.php', None, None, None)) + + def _build_swf_referer(self, target, obj): + (uri, _, _) = self._parse_target(target) + return urlunparse((uri.scheme, uri.netloc, obj['attrs']['data'], None, None, None)) + + def _transform_bootstrap(self, js): + return re.split('^\s*try\s*{', js, flags=re.M)[0] \ + .split(' = ', 1)[1].strip().rstrip(';') + + def _transform_meta(self, js): + return js.split('\n')[0].split('=')[1].rstrip(';') + + def _get_meta(self, target): + (meta_url, token) = self._build_meta_url(target) + self.to_screen('Metadata URL: %s' % meta_url) + + headers = {'Referer': urldefrag(target)[0]} + req = compat_urllib_request.Request(meta_url, headers=headers) + res = self._download_json(req, token, + transform_source=self._transform_meta) + + if 'getStreamKeyWithSong' not in res: + raise ExtractorError( + 'Metadata not found. URL may be malformed, or Grooveshark API may have changed.') + + if res['getStreamKeyWithSong'] is None: + raise ExtractorError( + 'Metadata download failed, probably due to Grooveshark anti-abuse throttling. Wait at least an hour before retrying from this IP.', + expected=True) + + return res['getStreamKeyWithSong'] + + def _get_bootstrap(self, target): + (bootstrap_url, token) = self._build_bootstrap_url(target) + + headers = {'Referer': urldefrag(target)[0]} + req = compat_urllib_request.Request(bootstrap_url, headers=headers) + res = self._download_json(req, token, fatal=False, + note='Downloading player bootstrap data', + errnote='Unable to download player bootstrap data', + transform_source=self._transform_bootstrap) + return res + + def _get_playerpage(self, target): + (_, _, token) = self._parse_target(target) + + res = self._download_webpage( + target, token, + note='Downloading player page', + errnote='Unable to download player page', + fatal=False) + + if res is not None: + o = GroovesharkHtmlParser.extract_object_tags(res) + return (res, [x for x in o if x['attrs']['id'] == 'jsPlayerEmbed']) + + return (res, None) + + def _real_extract(self, url): + (target_uri, _, token) = self._parse_target(url) + + # 1. Fill cookiejar by making a request to the player page + if self.do_playerpage_request: + (_, player_objs) = self._get_playerpage(url) + if player_objs is not None: + swf_referer = self._build_swf_referer(url, player_objs[0]) + self.to_screen('SWF Referer: %s' % swf_referer) + + # 2. Ask preload.php for swf bootstrap data to better mimic webapp + if self.do_bootstrap_request: + bootstrap = self._get_bootstrap(url) + self.to_screen('CommunicationToken: %s' % bootstrap['getCommunicationToken']) + + # 3. Ask preload.php for track metadata. + meta = self._get_meta(url) + + # 4. Construct stream request for track. + stream_url = self._build_stream_url(meta) + duration = int(math.ceil(float(meta['streamKey']['uSecs']) / 1000000)) + post_dict = {'streamKey': meta['streamKey']['streamKey']} + post_data = urlencode(post_dict).encode('utf-8') + headers = { + 'Content-Length': len(post_data), + 'Content-Type': 'application/x-www-form-urlencoded' + } + + if 'swf_referer' in locals(): + headers['Referer'] = swf_referer + + req = compat_urllib_request.Request(streamurl, post_data, headers) + + info_dict = { + 'id': token, + 'title': meta['song']['Name'], + 'http_method': 'POST', + 'url': stream_url, + 'ext': 'mp3', + 'format': 'mp3 audio', + 'duration': duration, + + 'post_data': post_data, + 'post_dict': post_dict, + 'headers': headers, + 'request': req + } + + if 'swf_referer' in locals(): + info_dict['http_referer'] = swf_referer + + return info_dict + + def _real_initialize(self): + self.ts = int(time.time() * 1000) # timestamp in millis + + def _download_json(self, url_or_request, video_id, + note=u'Downloading JSON metadata', + errnote=u'Unable to download JSON metadata', + fatal=True, + transform_source=None): + try: + out = super(GroovesharkIE, self)._download_json( + url_or_request, video_id, note, errnote, transform_source) + return out + except ExtractorError as ee: + if fatal: + raise ee + return None + From 7ed806d24165a1ce909e2a1de340703392b216cd Mon Sep 17 00:00:00 2001 From: Keith Beckman Date: Tue, 20 May 2014 02:55:21 -0400 Subject: [PATCH 003/440] Fixed pyflakes and pep8 warnings --- youtube_dl/extractor/grooveshark.py | 45 +++++++++++++++-------------- 1 file changed, 24 insertions(+), 21 deletions(-) diff --git a/youtube_dl/extractor/grooveshark.py b/youtube_dl/extractor/grooveshark.py index 6798addce..165cccf69 100644 --- a/youtube_dl/extractor/grooveshark.py +++ b/youtube_dl/extractor/grooveshark.py @@ -11,6 +11,7 @@ from os.path import basename from .common import InfoExtractor from ..utils import ExtractorError, compat_urllib_request, compat_html_parser + class GroovesharkHtmlParser(compat_html_parser.HTMLParser): def __init__(self): self._current_object = None @@ -20,10 +21,10 @@ class GroovesharkHtmlParser(compat_html_parser.HTMLParser): def handle_starttag(self, tag, attrs): attrs = dict((k, v) for k, v in attrs) if tag == 'object': - self._current_object = { 'attrs': attrs, 'params': [] } + self._current_object = {'attrs': attrs, 'params': []} elif tag == 'param': self._current_object['params'].append(attrs) - + def handle_endtag(self, tag): if tag == 'object': self.objects.append(self._current_object) @@ -36,6 +37,7 @@ class GroovesharkHtmlParser(compat_html_parser.HTMLParser): p.close() return p.objects + class GroovesharkIE(InfoExtractor): _VALID_URL = r'https?://(www\.)?grooveshark\.com/#!/s/([^/]+)/([^/]+)' _TEST = { @@ -104,7 +106,7 @@ class GroovesharkIE(InfoExtractor): def _get_bootstrap(self, target): (bootstrap_url, token) = self._build_bootstrap_url(target) - + headers = {'Referer': urldefrag(target)[0]} req = compat_urllib_request.Request(bootstrap_url, headers=headers) res = self._download_json(req, token, fatal=False, @@ -112,25 +114,25 @@ class GroovesharkIE(InfoExtractor): errnote='Unable to download player bootstrap data', transform_source=self._transform_bootstrap) return res - + def _get_playerpage(self, target): (_, _, token) = self._parse_target(target) - + res = self._download_webpage( target, token, note='Downloading player page', errnote='Unable to download player page', fatal=False) - + if res is not None: o = GroovesharkHtmlParser.extract_object_tags(res) return (res, [x for x in o if x['attrs']['id'] == 'jsPlayerEmbed']) - + return (res, None) - + def _real_extract(self, url): (target_uri, _, token) = self._parse_target(url) - + # 1. Fill cookiejar by making a request to the player page if self.do_playerpage_request: (_, player_objs) = self._get_playerpage(url) @@ -142,10 +144,10 @@ class GroovesharkIE(InfoExtractor): if self.do_bootstrap_request: bootstrap = self._get_bootstrap(url) self.to_screen('CommunicationToken: %s' % bootstrap['getCommunicationToken']) - + # 3. Ask preload.php for track metadata. meta = self._get_meta(url) - + # 4. Construct stream request for track. stream_url = self._build_stream_url(meta) duration = int(math.ceil(float(meta['streamKey']['uSecs']) / 1000000)) @@ -154,13 +156,13 @@ class GroovesharkIE(InfoExtractor): headers = { 'Content-Length': len(post_data), 'Content-Type': 'application/x-www-form-urlencoded' - } + } if 'swf_referer' in locals(): - headers['Referer'] = swf_referer + headers['Referer'] = swf_referer + + req = compat_urllib_request.Request(stream_url, post_data, headers) - req = compat_urllib_request.Request(streamurl, post_data, headers) - info_dict = { 'id': token, 'title': meta['song']['Name'], @@ -169,16 +171,18 @@ class GroovesharkIE(InfoExtractor): 'ext': 'mp3', 'format': 'mp3 audio', 'duration': duration, - + + # various ways of supporting the download request. + # remove keys unnecessary to the eventual post implementation 'post_data': post_data, 'post_dict': post_dict, 'headers': headers, 'request': req - } - + } + if 'swf_referer' in locals(): - info_dict['http_referer'] = swf_referer - + info_dict['http_referer'] = swf_referer + return info_dict def _real_initialize(self): @@ -197,4 +201,3 @@ class GroovesharkIE(InfoExtractor): if fatal: raise ee return None - From ee1a7032d51757aceab806db707979203626e9ea Mon Sep 17 00:00:00 2001 From: Keith Beckman Date: Tue, 20 May 2014 22:28:32 -0400 Subject: [PATCH 004/440] Fixed errors found by travisci: py26: re.split can't take flags. use inline flags or re.compile py27: info_dict must be serializable. remove request object py335, py34: no urlparse module. use utils.compat_urlparse --- youtube_dl/extractor/grooveshark.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/grooveshark.py b/youtube_dl/extractor/grooveshark.py index 165cccf69..77c5a9172 100644 --- a/youtube_dl/extractor/grooveshark.py +++ b/youtube_dl/extractor/grooveshark.py @@ -4,13 +4,18 @@ from __future__ import unicode_literals import time import math import re -from urlparse import urlparse, urlunparse, urldefrag + from urllib import quote, urlencode from os.path import basename from .common import InfoExtractor from ..utils import ExtractorError, compat_urllib_request, compat_html_parser +from ..utils import compat_urlparse +urlparse = compat_urlparse.urlparse +urlunparse = compat_urlparse.urlunparse +urldefrag = compat_urlparse.urldefrag + class GroovesharkHtmlParser(compat_html_parser.HTMLParser): def __init__(self): @@ -78,7 +83,7 @@ class GroovesharkIE(InfoExtractor): return urlunparse((uri.scheme, uri.netloc, obj['attrs']['data'], None, None, None)) def _transform_bootstrap(self, js): - return re.split('^\s*try\s*{', js, flags=re.M)[0] \ + return re.split('(?m)^\s*try\s*{', js)[0] \ .split(' = ', 1)[1].strip().rstrip(';') def _transform_meta(self, js): @@ -161,8 +166,6 @@ class GroovesharkIE(InfoExtractor): if 'swf_referer' in locals(): headers['Referer'] = swf_referer - req = compat_urllib_request.Request(stream_url, post_data, headers) - info_dict = { 'id': token, 'title': meta['song']['Name'], @@ -176,8 +179,7 @@ class GroovesharkIE(InfoExtractor): # remove keys unnecessary to the eventual post implementation 'post_data': post_data, 'post_dict': post_dict, - 'headers': headers, - 'request': req + 'headers': headers } if 'swf_referer' in locals(): From 212a5e28bae61f764e8e802e403a15cbe62f0dc6 Mon Sep 17 00:00:00 2001 From: anovicecodemonkey Date: Wed, 21 May 2014 19:04:55 +0930 Subject: [PATCH 005/440] Add a duplicate check to /extractor/common.py playlist_result function --- youtube_dl/extractor/common.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index db472aace..26dd9882f 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -343,6 +343,16 @@ class InfoExtractor(object): @staticmethod def playlist_result(entries, playlist_id=None, playlist_title=None): """Returns a playlist""" + # Ensure we don't have any duplicates in the playlist + seen = set() + new_list = [] + for url in entries: + theurl = tuple(url.items()) + if theurl not in seen: + seen.add(theurl) + new_list.append(url) + entries = new_list + video_info = {'_type': 'playlist', 'entries': entries} if playlist_id: From 610134730abfdaaa226de2092d8ad5d731d5b54b Mon Sep 17 00:00:00 2001 From: anovicecodemonkey Date: Wed, 21 May 2014 19:25:37 +0930 Subject: [PATCH 006/440] Add a _TEST_ --- youtube_dl/extractor/generic.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 69381f777..c1e533821 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -260,6 +260,20 @@ class GenericIE(InfoExtractor): 'uploader': 'Spi0n', }, 'add_ie': ['Dailymotion'], + }, + # YouTube embed via + { + 'url': 'https://play.google.com/store/apps/details?id=com.gameloft.android.ANMP.GloftA8HM', + 'md5': 'c267b1ab6d736057d64babaa37e07a66', + 'info_dict': { + 'id': 'Ybd-qmqYYpA', + 'ext': 'mp4', + 'title': 'Asphalt 8: Airborne - Chinese Great Wall - Android Game Trailer', + 'uploader': 'gameloftandroid', + 'uploader_id': 'gameloftandroid', + 'upload_date': '20140321', + 'description': 'md5:9c6dca5dd75b7131ce482ccf080749d6' + } } ] From 37e3cbe22e0bfa6b98a6343be88e1c8c2c7ac41f Mon Sep 17 00:00:00 2001 From: anovicecodemonkey Date: Sun, 1 Jun 2014 01:16:35 +0930 Subject: [PATCH 007/440] Move duplicate check to generic.py --- youtube_dl/extractor/common.py | 10 ---------- youtube_dl/extractor/generic.py | 24 ++++++++++++++++++++++++ 2 files changed, 24 insertions(+), 10 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 26dd9882f..db472aace 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -343,16 +343,6 @@ class InfoExtractor(object): @staticmethod def playlist_result(entries, playlist_id=None, playlist_title=None): """Returns a playlist""" - # Ensure we don't have any duplicates in the playlist - seen = set() - new_list = [] - for url in entries: - theurl = tuple(url.items()) - if theurl not in seen: - seen.add(theurl) - new_list.append(url) - entries = new_list - video_info = {'_type': 'playlist', 'entries': entries} if playlist_id: diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index c1e533821..dfa8d6153 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -494,6 +494,14 @@ class GenericIE(InfoExtractor): if matches: urlrs = [self.url_result(unescapeHTML(tuppl[1]), 'Youtube') for tuppl in matches] + # First, ensure we have a duplicate free list of entries + seen = set() + new_list = [] + theurl = tuple(url.items()) + if theurl not in seen: + seen.add(theurl) + new_list.append(url) + urlrs = new_list return self.playlist_result( urlrs, playlist_id=video_id, playlist_title=video_title) @@ -503,6 +511,14 @@ class GenericIE(InfoExtractor): if matches: urlrs = [self.url_result(unescapeHTML(tuppl[1])) for tuppl in matches] + # First, ensure we have a duplicate free list of entries + seen = set() + new_list = [] + theurl = tuple(url.items()) + if theurl not in seen: + seen.add(theurl) + new_list.append(url) + urlrs = new_list return self.playlist_result( urlrs, playlist_id=video_id, playlist_title=video_title) @@ -615,6 +631,14 @@ class GenericIE(InfoExtractor): if matches: urlrs = [self.url_result(unescapeHTML(eurl), 'FunnyOrDie') for eurl in matches] + # First, ensure we have a duplicate free list of entries + seen = set() + new_list = [] + theurl = tuple(url.items()) + if theurl not in seen: + seen.add(theurl) + new_list.append(url) + urlrs = new_list return self.playlist_result( urlrs, playlist_id=video_id, playlist_title=video_title) From c065fd35ae045ce537b7bfe9f1efa14e8bddc21b Mon Sep 17 00:00:00 2001 From: Ole Ernst Date: Sun, 13 Jul 2014 12:16:25 +0200 Subject: [PATCH 008/440] [gameone] add playlist capability --- youtube_dl/extractor/__init__.py | 5 ++++- youtube_dl/extractor/gameone.py | 16 ++++++++++++++++ 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index a03f9d3ad..17b695a56 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -106,7 +106,10 @@ from .freesound import FreesoundIE from .freespeech import FreespeechIE from .funnyordie import FunnyOrDieIE from .gamekings import GamekingsIE -from .gameone import GameOneIE +from .gameone import ( + GameOneIE, + GameOnePlaylistIE, +) from .gamespot import GameSpotIE from .gametrailers import GametrailersIE from .gdcvault import GDCVaultIE diff --git a/youtube_dl/extractor/gameone.py b/youtube_dl/extractor/gameone.py index b580f52fb..0a0fb19e6 100644 --- a/youtube_dl/extractor/gameone.py +++ b/youtube_dl/extractor/gameone.py @@ -1,6 +1,7 @@ # coding: utf-8 from __future__ import unicode_literals +import datetime import re from .common import InfoExtractor @@ -88,3 +89,18 @@ class GameOneIE(InfoExtractor): 'age_limit': age_limit, 'timestamp': timestamp, } + +class GameOnePlaylistIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?gameone\.de(?:/tv)?/?$' + + def _real_extract(self, url): + this_year = datetime.date.today().year + webpage = self._download_webpage('http://www.gameone.de/tv/year/%d' % this_year, this_year) + max_id = max(map(int, re.findall(r' Date: Wed, 23 Jul 2014 10:56:09 +0800 Subject: [PATCH 009/440] if there is more than one subtitle for the language, use the first one --- youtube_dl/extractor/youtube.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index a346f4c96..73a01107d 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -493,6 +493,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): sub_lang_list = {} for l in lang_list: lang = l[1] + if lang in sub_lang_list: + continue params = compat_urllib_parse.urlencode({ 'lang': lang, 'v': video_id, From 71b6065009d2bbc0e25b46368e73ebd807d3fca0 Mon Sep 17 00:00:00 2001 From: Ole Ernst Date: Wed, 23 Jul 2014 09:32:01 +0200 Subject: [PATCH 010/440] [gameone] add playlist test --- test/test_playlists.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/test/test_playlists.py b/test/test_playlists.py index 1a38a667b..3bc353604 100644 --- a/test/test_playlists.py +++ b/test/test_playlists.py @@ -50,6 +50,7 @@ from youtube_dl.extractor import ( InstagramUserIE, CSpanIE, AolIE, + GameOnePlaylistIE, ) @@ -395,5 +396,13 @@ class TestPlaylists(unittest.TestCase): self.assertEqual(result['id'], 'rbhagwati2') self.assertTrue(len(result['entries']) >= 179) + def test_GameOne_playlist(self): + dl = FakeYDL() + ie = GameOnePlaylistIE(dl) + result = ie.extract('http://www.gameone.de/tv') + self.assertIsPlaylist(result) + self.assertEqual(result['title'], 'GameOne') + assertGreaterEqual(self, len(result['entries']), 294) + if __name__ == '__main__': unittest.main() From 8c778adc39fbaa79a6e885532b933364c9952817 Mon Sep 17 00:00:00 2001 From: Ole Ernst Date: Wed, 23 Jul 2014 10:00:50 +0200 Subject: [PATCH 011/440] [gameone] simplify playlist extractor --- youtube_dl/extractor/gameone.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/youtube_dl/extractor/gameone.py b/youtube_dl/extractor/gameone.py index 0a0fb19e6..12f757329 100644 --- a/youtube_dl/extractor/gameone.py +++ b/youtube_dl/extractor/gameone.py @@ -1,7 +1,6 @@ # coding: utf-8 from __future__ import unicode_literals -import datetime import re from .common import InfoExtractor @@ -94,8 +93,7 @@ class GameOnePlaylistIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?gameone\.de(?:/tv)?/?$' def _real_extract(self, url): - this_year = datetime.date.today().year - webpage = self._download_webpage('http://www.gameone.de/tv/year/%d' % this_year, this_year) + webpage = self._download_webpage('http://www.gameone.de/tv', 'TV') max_id = max(map(int, re.findall(r' Date: Wed, 23 Jul 2014 12:16:26 +0200 Subject: [PATCH 012/440] [youtube] Extract the 'sts' parameter from the webpage (fixes #3327) --- youtube_dl/extractor/youtube.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index a346f4c96..2b346d4f8 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -611,7 +611,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): data = compat_urllib_parse.urlencode({ 'video_id': video_id, 'eurl': 'https://youtube.googleapis.com/v/' + video_id, - 'sts':'16268', + 'sts': self._search_regex( + r'"sts"\s*:\s*(\d+)', video_webpage, 'sts'), }) video_info_url = proto + '://www.youtube.com/get_video_info?' + data video_info_webpage = self._download_webpage(video_info_url, video_id, From 8944ec0109b1e9c847f178755123d5453400dd50 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 23 Jul 2014 19:29:15 +0700 Subject: [PATCH 013/440] [krasview] Add extractor (Closes #3313) --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/krasview.py | 59 ++++++++++++++++++++++++++++++++ 2 files changed, 60 insertions(+) create mode 100644 youtube_dl/extractor/krasview.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 8d63d9281..80aa2dfbb 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -151,6 +151,7 @@ from .khanacademy import KhanAcademyIE from .kickstarter import KickStarterIE from .keek import KeekIE from .kontrtube import KontrTubeIE +from .krasview import KrasViewIE from .ku6 import Ku6IE from .la7 import LA7IE from .lifenews import LifeNewsIE diff --git a/youtube_dl/extractor/krasview.py b/youtube_dl/extractor/krasview.py new file mode 100644 index 000000000..6f3d2345b --- /dev/null +++ b/youtube_dl/extractor/krasview.py @@ -0,0 +1,59 @@ +# encoding: utf-8 +from __future__ import unicode_literals + +import json +import re + +from .common import InfoExtractor +from ..utils import ( + int_or_none, + unescapeHTML, +) + + +class KrasViewIE(InfoExtractor): + IE_DESC = 'Красвью' + _VALID_URL = r'https?://krasview\.ru/video/(?P\d+)' + + _TEST = { + 'url': 'http://krasview.ru/video/512228', + 'md5': '3b91003cf85fc5db277870c8ebd98eae', + 'info_dict': { + 'id': '512228', + 'ext': 'mp4', + 'title': 'Снег, лёд, заносы', + 'description': 'Снято в городе Нягань, в Ханты-Мансийском автономном округе.', + 'duration': 27, + 'thumbnail': 're:^https?://.*\.jpg', + }, + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + + webpage = self._download_webpage(url, video_id) + + flashvars = json.loads(self._search_regex( + r'flashvars\s*:\s*({.+?})\s*}\);', webpage, 'flashvars')) + + video_url = flashvars['url'] + title = unescapeHTML(flashvars['title']) + description = unescapeHTML(flashvars.get('subtitle') or self._og_search_description(webpage, default=None)) + thumbnail = flashvars['image'] + duration = int(flashvars['duration']) + filesize = int(flashvars['size']) + width = int_or_none(self._og_search_property('video:width', webpage, 'video width')) + height = int_or_none(self._og_search_property('video:height', webpage, 'video height')) + + return { + 'id': video_id, + 'url': video_url, + 'title': title, + 'description': description, + 'thumbnail': thumbnail, + 'duration': duration, + 'filesize': filesize, + 'width': width, + 'height': height, + } From 825abb81759d76e53127644a45e1d6cb7ff4f654 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Thu, 24 Jul 2014 10:41:12 +0200 Subject: [PATCH 014/440] [jsinterp] Implement splice and general improvement I still get 403s on YouTube though. --- youtube_dl/jsinterp.py | 88 ++++++++++++++++++++++++++++-------------- 1 file changed, 59 insertions(+), 29 deletions(-) diff --git a/youtube_dl/jsinterp.py b/youtube_dl/jsinterp.py index 13ad5ba1a..34e5307fa 100644 --- a/youtube_dl/jsinterp.py +++ b/youtube_dl/jsinterp.py @@ -1,5 +1,6 @@ from __future__ import unicode_literals +import json import re from .utils import ( @@ -40,8 +41,9 @@ class JSInterpreter(object): assign = lambda v: v expr = stmt[len('return '):] else: - raise ExtractorError( - 'Cannot determine left side of statement in %r' % stmt) + # Try interpreting it as an expression + expr = stmt + assign = lambda v: v v = self.interpret_expression(expr, local_vars, allow_recursion) return assign(v) @@ -53,35 +55,62 @@ class JSInterpreter(object): if expr.isalpha(): return local_vars[expr] - m = re.match(r'^(?P[a-z]+)\.(?P.*)$', expr) - if m: - member = m.group('member') - variable = m.group('in') + try: + return json.loads(expr) + except ValueError: + pass - if variable not in local_vars: + m = re.match( + r'^(?P[a-z]+)\.(?P[^(]+)(?:\(+(?P[^()]*)\))?$', + expr) + if m: + variable = m.group('var') + member = m.group('member') + arg_str = m.group('args') + + if variable in local_vars: + obj = local_vars[variable] + else: if variable not in self._objects: self._objects[variable] = self.extract_object(variable) obj = self._objects[variable] - key, args = member.split('(', 1) - args = args.strip(')') - argvals = [int(v) if v.isdigit() else local_vars[v] - for v in args.split(',')] - return obj[key](argvals) - val = local_vars[variable] - if member == 'split("")': - return list(val) - if member == 'join("")': - return ''.join(val) - if member == 'length': - return len(val) - if member == 'reverse()': - return val[::-1] - slice_m = re.match(r'slice\((?P.*)\)', member) - if slice_m: - idx = self.interpret_expression( - slice_m.group('idx'), local_vars, allow_recursion - 1) - return val[idx:] + if arg_str is None: + # Member access + if member == 'length': + return len(obj) + return obj[member] + + assert expr.endswith(')') + # Function call + if arg_str == '': + argvals = tuple() + else: + argvals = tuple([ + self.interpret_expression(v, local_vars, allow_recursion) + for v in arg_str.split(',')]) + + if member == 'split': + assert argvals == ('',) + return list(obj) + if member == 'join': + assert len(argvals) == 1 + return argvals[0].join(obj) + if member == 'reverse': + assert len(argvals) == 0 + return obj[::-1] + if member == 'slice': + assert len(argvals) == 1 + return obj[argvals[0]:] + if member == 'splice': + assert isinstance(obj, list) + index, howMany = argvals + res = [] + for i in range(index, min(index + howMany, len(obj))): + res.append(obj.pop(i)) + return res + + return obj[member](argvals) m = re.match( r'^(?P[a-z]+)\[(?P.+)\]$', expr) @@ -100,13 +129,14 @@ class JSInterpreter(object): return a % b m = re.match( - r'^(?P[a-zA-Z$]+)\((?P[a-z0-9,]+)\)$', expr) + r'^(?P[.a-zA-Z$]+)\((?P[a-z0-9,]+)\)$', expr) if m: fname = m.group('func') + argvals = tuple([ + int(v) if v.isdigit() else local_vars[v] + for v in m.group('args').split(',')]) if fname not in self._functions: self._functions[fname] = self.extract_function(fname) - argvals = [int(v) if v.isdigit() else local_vars[v] - for v in m.group('args').split(',')] return self._functions[fname](argvals) raise ExtractorError('Unsupported JS expression %r' % expr) From ebe832dc3777861fd20e38cf7588013e9df80e0e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Thu, 24 Jul 2014 11:08:31 +0200 Subject: [PATCH 015/440] [jsinterp] 'reverse' modifies the array in place (fixes #3334) --- test/test_youtube_signature.py | 8 +++++++- youtube_dl/jsinterp.py | 3 ++- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/test/test_youtube_signature.py b/test/test_youtube_signature.py index f0f33f1db..8f1afd432 100644 --- a/test/test_youtube_signature.py +++ b/test/test_youtube_signature.py @@ -62,7 +62,13 @@ _TESTS = [ u'js', 84, u'123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQ0STUVWXYZ!"#$%&\'()*+,@./:;<=>' - ) + ), + ( + u'https://s.ytimg.com/yts/jsbin/html5player-en_US-vfl9FYC6l.js', + u'js', + 83, + u'123456789abcdefghijklmnopqr0tuvwxyzABCDETGHIJKLMNOPQRS>UVWXYZ!"#$%&\'()*+,-./:;<=F' + ), ] diff --git a/youtube_dl/jsinterp.py b/youtube_dl/jsinterp.py index 34e5307fa..95e6948ff 100644 --- a/youtube_dl/jsinterp.py +++ b/youtube_dl/jsinterp.py @@ -98,7 +98,8 @@ class JSInterpreter(object): return argvals[0].join(obj) if member == 'reverse': assert len(argvals) == 0 - return obj[::-1] + obj.reverse() + return obj if member == 'slice': assert len(argvals) == 1 return obj[argvals[0]:] From 7272eab9d006b4835b7dbe34ea57d7551fc03803 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Thu, 24 Jul 2014 11:24:43 +0200 Subject: [PATCH 016/440] release 2014.07.24 --- youtube_dl/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index dca400d5e..725af0d2e 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,2 +1,2 @@ -__version__ = '2014.07.23.2' +__version__ = '2014.07.24' From 892e3192fbed45c0fa13c91c3c18e432b95a4018 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Thu, 24 Jul 2014 11:33:33 +0200 Subject: [PATCH 017/440] [jsinterp] Do not expect dot in simple function call --- youtube_dl/jsinterp.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/jsinterp.py b/youtube_dl/jsinterp.py index 95e6948ff..5731e264b 100644 --- a/youtube_dl/jsinterp.py +++ b/youtube_dl/jsinterp.py @@ -130,7 +130,7 @@ class JSInterpreter(object): return a % b m = re.match( - r'^(?P[.a-zA-Z$]+)\((?P[a-z0-9,]+)\)$', expr) + r'^(?P[a-zA-Z$]+)\((?P[a-z0-9,]+)\)$', expr) if m: fname = m.group('func') argvals = tuple([ From 42f4dcfe41c72a48333e750a21b7cac9a59655a4 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Thu, 24 Jul 2014 11:39:54 +0200 Subject: [PATCH 018/440] [test_youtube_signatures] Modernize --- test/test_youtube_signature.py | 60 ++++++++++++++++++---------------- 1 file changed, 31 insertions(+), 29 deletions(-) diff --git a/test/test_youtube_signature.py b/test/test_youtube_signature.py index 8f1afd432..811d3a93e 100644 --- a/test/test_youtube_signature.py +++ b/test/test_youtube_signature.py @@ -1,5 +1,7 @@ #!/usr/bin/env python +from __future__ import unicode_literals + # Allow direct execution import os import sys @@ -16,58 +18,58 @@ from youtube_dl.utils import compat_str, compat_urlretrieve _TESTS = [ ( - u'https://s.ytimg.com/yts/jsbin/html5player-vflHOr_nV.js', - u'js', + 'https://s.ytimg.com/yts/jsbin/html5player-vflHOr_nV.js', + 'js', 86, - u'>=<;:/.-[+*)(\'&%$#"!ZYX0VUTSRQPONMLKJIHGFEDCBA\\yxwvutsrqponmlkjihgfedcba987654321', + '>=<;:/.-[+*)(\'&%$#"!ZYX0VUTSRQPONMLKJIHGFEDCBA\\yxwvutsrqponmlkjihgfedcba987654321', ), ( - u'https://s.ytimg.com/yts/jsbin/html5player-vfldJ8xgI.js', - u'js', + 'https://s.ytimg.com/yts/jsbin/html5player-vfldJ8xgI.js', + 'js', 85, - u'3456789a0cdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRS[UVWXYZ!"#$%&\'()*+,-./:;<=>?@', + '3456789a0cdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRS[UVWXYZ!"#$%&\'()*+,-./:;<=>?@', ), ( - u'https://s.ytimg.com/yts/jsbin/html5player-vfle-mVwz.js', - u'js', + 'https://s.ytimg.com/yts/jsbin/html5player-vfle-mVwz.js', + 'js', 90, - u']\\[@?>=<;:/.-,+*)(\'&%$#"hZYXWVUTSRQPONMLKJIHGFEDCBAzyxwvutsrqponmlkjiagfedcb39876', + ']\\[@?>=<;:/.-,+*)(\'&%$#"hZYXWVUTSRQPONMLKJIHGFEDCBAzyxwvutsrqponmlkjiagfedcb39876', ), ( - u'https://s.ytimg.com/yts/jsbin/html5player-en_US-vfl0Cbn9e.js', - u'js', + 'https://s.ytimg.com/yts/jsbin/html5player-en_US-vfl0Cbn9e.js', + 'js', 84, - u'O1I3456789abcde0ghijklmnopqrstuvwxyzABCDEFGHfJKLMN2PQRSTUVW@YZ!"#$%&\'()*+,-./:;<=', + 'O1I3456789abcde0ghijklmnopqrstuvwxyzABCDEFGHfJKLMN2PQRSTUVW@YZ!"#$%&\'()*+,-./:;<=', ), ( - u'https://s.ytimg.com/yts/jsbin/html5player-en_US-vflXGBaUN.js', - u'js', - u'2ACFC7A61CA478CD21425E5A57EBD73DDC78E22A.2094302436B2D377D14A3BBA23022D023B8BC25AA', - u'A52CB8B320D22032ABB3A41D773D2B6342034902.A22E87CDD37DBE75A5E52412DC874AC16A7CFCA2', + 'https://s.ytimg.com/yts/jsbin/html5player-en_US-vflXGBaUN.js', + 'js', + '2ACFC7A61CA478CD21425E5A57EBD73DDC78E22A.2094302436B2D377D14A3BBA23022D023B8BC25AA', + 'A52CB8B320D22032ABB3A41D773D2B6342034902.A22E87CDD37DBE75A5E52412DC874AC16A7CFCA2', ), ( - u'http://s.ytimg.com/yts/swfbin/player-vfl5vIhK2/watch_as3.swf', - u'swf', + 'http://s.ytimg.com/yts/swfbin/player-vfl5vIhK2/watch_as3.swf', + 'swf', 86, - u'O1I3456789abcde0ghijklmnopqrstuvwxyzABCDEFGHfJKLMN2PQRSTUVWXY\\!"#$%&\'()*+,-./:;<=>?' + 'O1I3456789abcde0ghijklmnopqrstuvwxyzABCDEFGHfJKLMN2PQRSTUVWXY\\!"#$%&\'()*+,-./:;<=>?' ), ( - u'http://s.ytimg.com/yts/swfbin/player-vflmDyk47/watch_as3.swf', - u'swf', - u'F375F75BF2AFDAAF2666E43868D46816F83F13E81C46.3725A8218E446A0DECD33F79DC282994D6AA92C92C9', - u'9C29AA6D499282CD97F33DCED0A644E8128A5273.64C18E31F38361864D86834E6662FAADFA2FB57F' + 'http://s.ytimg.com/yts/swfbin/player-vflmDyk47/watch_as3.swf', + 'swf', + 'F375F75BF2AFDAAF2666E43868D46816F83F13E81C46.3725A8218E446A0DECD33F79DC282994D6AA92C92C9', + '9C29AA6D499282CD97F33DCED0A644E8128A5273.64C18E31F38361864D86834E6662FAADFA2FB57F' ), ( - u'https://s.ytimg.com/yts/jsbin/html5player-en_US-vflBb0OQx.js', - u'js', + 'https://s.ytimg.com/yts/jsbin/html5player-en_US-vflBb0OQx.js', + 'js', 84, - u'123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQ0STUVWXYZ!"#$%&\'()*+,@./:;<=>' + '123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQ0STUVWXYZ!"#$%&\'()*+,@./:;<=>' ), ( - u'https://s.ytimg.com/yts/jsbin/html5player-en_US-vfl9FYC6l.js', - u'js', + 'https://s.ytimg.com/yts/jsbin/html5player-en_US-vfl9FYC6l.js', + 'js', 83, - u'123456789abcdefghijklmnopqr0tuvwxyzABCDETGHIJKLMNOPQRS>UVWXYZ!"#$%&\'()*+,-./:;<=F' + '123456789abcdefghijklmnopqr0tuvwxyzABCDETGHIJKLMNOPQRS>UVWXYZ!"#$%&\'()*+,-./:;<=F' ), ] From 2fe3d240ccce3139f1293c97735cb3457de85b83 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Thu, 24 Jul 2014 11:46:21 +0200 Subject: [PATCH 019/440] Regroup and hide workaround options These options are rarely necessary. Hide them to make the important options in the general group more obvious. --- youtube_dl/__init__.py | 41 ++++++++++++++++++++++++++--------------- 1 file changed, 26 insertions(+), 15 deletions(-) diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index de7bc0f5f..c3eb29bf4 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -222,6 +222,7 @@ def parseOpts(overrideArguments=None): downloader = optparse.OptionGroup(parser, 'Download Options') postproc = optparse.OptionGroup(parser, 'Post-processing Options') filesystem = optparse.OptionGroup(parser, 'Filesystem Options') + workarounds = optparse.OptionGroup(parser, 'Workarounds') verbosity = optparse.OptionGroup(parser, 'Verbosity / Simulation Options') general.add_option('-h', '--help', @@ -238,14 +239,6 @@ def parseOpts(overrideArguments=None): general.add_option('--dump-user-agent', action='store_true', dest='dump_user_agent', help='display the current browser identification', default=False) - general.add_option('--user-agent', - dest='user_agent', help='specify a custom user agent', metavar='UA') - general.add_option('--referer', - dest='referer', help='specify a custom referer, use if the video access is restricted to one domain', - metavar='REF', default=None) - general.add_option('--add-header', - dest='headers', help='specify a custom HTTP header and its value, separated by a colon \':\'. You can use this option multiple times', action="append", - metavar='FIELD:VALUE') general.add_option('--list-extractors', action='store_true', dest='list_extractors', help='List all supported extractors and the URLs they would handle', default=False) @@ -255,10 +248,6 @@ def parseOpts(overrideArguments=None): general.add_option( '--proxy', dest='proxy', default=None, metavar='URL', help='Use the specified HTTP/HTTPS proxy. Pass in an empty string (--proxy "") for direct connection') - general.add_option('--no-check-certificate', action='store_true', dest='no_check_certificate', default=False, help='Suppress HTTPS certificate validation.') - general.add_option( - '--prefer-insecure', '--prefer-unsecure', action='store_true', dest='prefer_insecure', - help='Use an unencrypted connection to retrieve information about the video. (Currently supported only for YouTube)') general.add_option( '--cache-dir', dest='cachedir', default=get_cachedir(), metavar='DIR', help='Location in the filesystem where youtube-dl can store some downloaded information permanently. By default $XDG_CACHE_HOME/youtube-dl or ~/.cache/youtube-dl . At the moment, only YouTube player files (for videos with obfuscated signatures) are cached, but that may change.') @@ -279,9 +268,6 @@ def parseOpts(overrideArguments=None): '--ignore-config', action='store_true', help='Do not read configuration files. When given in the global configuration file /etc/youtube-dl.conf: do not read the user configuration in ~/.config/youtube-dl.conf (%APPDATA%/youtube-dl/config.txt on Windows)') - general.add_option( - '--encoding', dest='encoding', metavar='ENCODING', - help='Force the specified encoding (experimental)') selection.add_option( '--playlist-start', @@ -382,6 +368,30 @@ def parseOpts(overrideArguments=None): help='do not automatically adjust the buffer size. By default, the buffer size is automatically resized from an initial value of SIZE.', default=False) downloader.add_option('--test', action='store_true', dest='test', default=False, help=optparse.SUPPRESS_HELP) + workarounds.add_option( + '--encoding', dest='encoding', metavar='ENCODING', + help='Force the specified encoding (experimental)') + workarounds.add_option( + '--no-check-certificate', action='store_true', + dest='no_check_certificate', default=False, + help='Suppress HTTPS certificate validation.') + workarounds.add_option( + '--prefer-insecure', '--prefer-unsecure', action='store_true', dest='prefer_insecure', + help='Use an unencrypted connection to retrieve information about the video. (Currently supported only for YouTube)') + workarounds.add_option( + '--user-agent', metavar='UA', + dest='user_agent', help='specify a custom user agent') + workarounds.add_option( + '--referer', metavar='REF', + dest='referer', default=None, + help='specify a custom referer, use if the video access is restricted to one domain', + ) + workarounds.add_option( + '--add-header', metavar='FIELD:VALUE', + dest='headers', action='append', + help='specify a custom HTTP header and its value, separated by a colon \':\'. You can use this option multiple times', + ) + verbosity.add_option('-q', '--quiet', action='store_true', dest='quiet', help='activates quiet mode', default=False) verbosity.add_option( @@ -534,6 +544,7 @@ def parseOpts(overrideArguments=None): parser.add_option_group(downloader) parser.add_option_group(filesystem) parser.add_option_group(verbosity) + parser.add_option_group(workarounds) parser.add_option_group(video_format) parser.add_option_group(subtitles) parser.add_option_group(authentication) From fbb2fc5580411ce2fbc088963210db7f833441a8 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Thu, 24 Jul 2014 11:49:26 +0200 Subject: [PATCH 020/440] Group cache-related options under filesystem --- youtube_dl/__init__.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index c3eb29bf4..fbd03faf4 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -248,12 +248,6 @@ def parseOpts(overrideArguments=None): general.add_option( '--proxy', dest='proxy', default=None, metavar='URL', help='Use the specified HTTP/HTTPS proxy. Pass in an empty string (--proxy "") for direct connection') - general.add_option( - '--cache-dir', dest='cachedir', default=get_cachedir(), metavar='DIR', - help='Location in the filesystem where youtube-dl can store some downloaded information permanently. By default $XDG_CACHE_HOME/youtube-dl or ~/.cache/youtube-dl . At the moment, only YouTube player files (for videos with obfuscated signatures) are cached, but that may change.') - general.add_option( - '--no-cache-dir', action='store_const', const=None, dest='cachedir', - help='Disable filesystem caching') general.add_option( '--socket-timeout', dest='socket_timeout', type=float, default=None, help=u'Time to wait before giving up, in seconds') @@ -511,6 +505,12 @@ def parseOpts(overrideArguments=None): filesystem.add_option('--write-thumbnail', action='store_true', dest='writethumbnail', help='write thumbnail image to disk', default=False) + filesystem.add_option( + '--cache-dir', dest='cachedir', default=get_cachedir(), metavar='DIR', + help='Location in the filesystem where youtube-dl can store some downloaded information permanently. By default $XDG_CACHE_HOME/youtube-dl or ~/.cache/youtube-dl . At the moment, only YouTube player files (for videos with obfuscated signatures) are cached, but that may change.') + filesystem.add_option( + '--no-cache-dir', action='store_const', const=None, dest='cachedir', + help='Disable filesystem caching') postproc.add_option('-x', '--extract-audio', action='store_true', dest='extractaudio', default=False, From a11165ecc6c2123dece4218d311707aff191009e Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Thu, 24 Jul 2014 11:50:49 +0200 Subject: [PATCH 021/440] Reorder filesytem options * Push down the deprecated ones * Roughly order file-name, no-*, write-*, further options --- youtube_dl/__init__.py | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index fbd03faf4..e0b629621 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -443,12 +443,10 @@ def parseOpts(overrideArguments=None): help='Display sent and read HTTP traffic') - filesystem.add_option('-t', '--title', - action='store_true', dest='usetitle', help='use title in file name (default)', default=False) + filesystem.add_option('-a', '--batch-file', + dest='batchfile', metavar='FILE', help='file containing URLs to download (\'-\' for stdin)') filesystem.add_option('--id', action='store_true', dest='useid', help='use only video ID in file name', default=False) - filesystem.add_option('-l', '--literal', - action='store_true', dest='usetitle', help='[deprecated] alias of --title', default=False) filesystem.add_option('-A', '--auto-number', action='store_true', dest='autonumber', help='number downloaded files starting from 00000', default=False) @@ -474,11 +472,10 @@ def parseOpts(overrideArguments=None): filesystem.add_option('--restrict-filenames', action='store_true', dest='restrictfilenames', help='Restrict filenames to only ASCII characters, and avoid "&" and spaces in filenames', default=False) - filesystem.add_option('-a', '--batch-file', - dest='batchfile', metavar='FILE', help='file containing URLs to download (\'-\' for stdin)') - filesystem.add_option('--load-info', - dest='load_info_filename', metavar='FILE', - help='json file containing the video information (created with the "--write-json" option)') + filesystem.add_option('-t', '--title', + action='store_true', dest='usetitle', help='use title in file name (default)', default=False) + filesystem.add_option('-l', '--literal', + action='store_true', dest='usetitle', help='[deprecated] alias of --title', default=False) filesystem.add_option('-w', '--no-overwrites', action='store_true', dest='nooverwrites', help='do not overwrite files', default=False) filesystem.add_option('-c', '--continue', @@ -486,8 +483,6 @@ def parseOpts(overrideArguments=None): filesystem.add_option('--no-continue', action='store_false', dest='continue_dl', help='do not resume partially downloaded files (restart from beginning)') - filesystem.add_option('--cookies', - dest='cookiefile', metavar='FILE', help='file to read cookies from and dump cookie jar in') filesystem.add_option('--no-part', action='store_true', dest='nopart', help='do not use .part files', default=False) filesystem.add_option('--no-mtime', @@ -505,6 +500,11 @@ def parseOpts(overrideArguments=None): filesystem.add_option('--write-thumbnail', action='store_true', dest='writethumbnail', help='write thumbnail image to disk', default=False) + filesystem.add_option('--load-info', + dest='load_info_filename', metavar='FILE', + help='json file containing the video information (created with the "--write-json" option)') + filesystem.add_option('--cookies', + dest='cookiefile', metavar='FILE', help='file to read cookies from and dump cookie jar in') filesystem.add_option( '--cache-dir', dest='cachedir', default=get_cachedir(), metavar='DIR', help='Location in the filesystem where youtube-dl can store some downloaded information permanently. By default $XDG_CACHE_HOME/youtube-dl or ~/.cache/youtube-dl . At the moment, only YouTube player files (for videos with obfuscated signatures) are cached, but that may change.') From 4e99f48817d1218fedc7c1b600c167553f82b604 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Thu, 24 Jul 2014 11:52:16 +0200 Subject: [PATCH 022/440] deprecate --title This is the default already. If you want a specific format, pick it with -o or --id. --- youtube_dl/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index e0b629621..cb8847111 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -473,7 +473,7 @@ def parseOpts(overrideArguments=None): action='store_true', dest='restrictfilenames', help='Restrict filenames to only ASCII characters, and avoid "&" and spaces in filenames', default=False) filesystem.add_option('-t', '--title', - action='store_true', dest='usetitle', help='use title in file name (default)', default=False) + action='store_true', dest='usetitle', help='[deprecated] use title in file name (default)', default=False) filesystem.add_option('-l', '--literal', action='store_true', dest='usetitle', help='[deprecated] alias of --title', default=False) filesystem.add_option('-w', '--no-overwrites', From 052421ff0971ec3d269dc9e87e76c80dffcd2e60 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Thu, 24 Jul 2014 12:16:16 +0200 Subject: [PATCH 023/440] Add --rm-cache-dir --- youtube_dl/__init__.py | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index cb8847111..6ff0be00f 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -76,6 +76,7 @@ import optparse import os import random import shlex +import shutil import sys @@ -511,6 +512,9 @@ def parseOpts(overrideArguments=None): filesystem.add_option( '--no-cache-dir', action='store_const', const=None, dest='cachedir', help='Disable filesystem caching') + filesystem.add_option( + '--rm-cache-dir', action='store_true', dest='rm_cachedir', + help='Delete all filesystem cache files') postproc.add_option('-x', '--extract-audio', action='store_true', dest='extractaudio', default=False, @@ -844,9 +848,26 @@ def _real_main(argv=None): if opts.update_self: update_self(ydl.to_screen, opts.verbose) + # Remove cache dir + if opts.rm_cachedir: + if opts.cachedir is None: + ydl.to_screen(u'No cache dir specified (Did you combine --no-cache-dir and --rm-cache-dir?)') + else: + if ('.cache' not in opts.cachedir) or ('youtube-dl' not in opts.cachedir): + ydl.to_screen(u'Not removing directory %s - this does not look like a cache dir') + retcode = 141 + else: + ydl.to_screen( + u'Removing cache dir %s .' % opts.cachedir, + skip_eol=True) + if os.path.exists(opts.cachedir): + ydl.to_screen(u'.', skip_eol=True) + shutil.rmtree(opts.cachedir) + ydl.to_screen(u'.') + # Maybe do nothing if (len(all_urls) < 1) and (opts.load_info_filename is None): - if not opts.update_self: + if not (opts.update_self or opts.rm_cachedir): parser.error(u'you must provide at least one URL') else: sys.exit() From 4192b51c7c599d1fb1114dcee8abaa44bc48d03b Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Thu, 24 Jul 2014 13:29:44 +0200 Subject: [PATCH 024/440] Replace failure handling with up-front check. The only time that write_string should fail is if the Python is completely braindead. Check for that condition and output a more accurate warning. See #3326 for details. --- youtube_dl/YoutubeDL.py | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 4ff1ae0e8..f295174cf 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -1234,21 +1234,18 @@ class YoutubeDL(object): if not self.params.get('verbose'): return + if type('') is not compat_str: + # Python 2.6 on SLES11 SP1 (https://github.com/rg3/youtube-dl/issues/3326) + self.report_warning( + 'Your Python is broken! Update to a newer and supported version') + encoding_str = ( '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % ( locale.getpreferredencoding(), sys.getfilesystemencoding(), sys.stdout.encoding, self.get_encoding())) - try: - write_string(encoding_str, encoding=None) - except: - errmsg = 'Failed to write encoding string %r' % encoding_str - try: - sys.stdout.write(errmsg) - except: - pass - raise IOError(errmsg) + write_string(encoding_str, encoding=None) self._write_string('[debug] youtube-dl version ' + __version__ + '\n') try: From 916c1452178c68bc7133b84f1538d19d5a7db17e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 24 Jul 2014 21:12:45 +0700 Subject: [PATCH 025/440] [shared] Add extractor (Closes #3312) --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/shared.py | 57 ++++++++++++++++++++++++++++++++ 2 files changed, 58 insertions(+) create mode 100644 youtube_dl/extractor/shared.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 80aa2dfbb..2bb93d938 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -259,6 +259,7 @@ from .savefrom import SaveFromIE from .scivee import SciVeeIE from .screencast import ScreencastIE from .servingsys import ServingSysIE +from .shared import SharedIE from .sina import SinaIE from .slideshare import SlideshareIE from .slutload import SlutloadIE diff --git a/youtube_dl/extractor/shared.py b/youtube_dl/extractor/shared.py new file mode 100644 index 000000000..8607482be --- /dev/null +++ b/youtube_dl/extractor/shared.py @@ -0,0 +1,57 @@ +from __future__ import unicode_literals + +import re +import base64 + +from .common import InfoExtractor +from ..utils import ( + ExtractorError, + compat_urllib_request, + compat_urllib_parse, + int_or_none, +) + + +class SharedIE(InfoExtractor): + _VALID_URL = r'http://shared\.sx/(?P[\da-z]{10})' + + _TEST = { + 'url': 'http://shared.sx/0060718775', + 'md5': '53e1c58fc3e777ae1dfe9e57ba2f9c72', + 'info_dict': { + 'id': '0060718775', + 'ext': 'mp4', + 'title': 'Big Buck Bunny Trailer', + }, + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + + page = self._download_webpage(url, video_id) + + if re.search(r'>File does not exist<', page) is not None: + raise ExtractorError('Video %s does not exist' % video_id, expected=True) + + download_form = dict(re.findall(r' Date: Fri, 25 Jul 2014 06:49:26 +0200 Subject: [PATCH 026/440] [youtube] Fix player ID display --- youtube_dl/extractor/youtube.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 2b346d4f8..c48d1b8ef 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -836,7 +836,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): player_desc = 'flash player %s' % player_version else: player_version = self._search_regex( - r'html5player-(.+?)\.js', video_webpage, + r'html5player-([^/]+?)(?:/html5player)?\.js', + player_url, 'html5 player', fatal=False) player_desc = u'html5 player %s' % player_version From 16f8e9df8a414a182ddd5870a4c98a4725e911cf Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Fri, 25 Jul 2014 06:54:52 +0200 Subject: [PATCH 027/440] [jsinterp] Allow uppercase object names --- youtube_dl/jsinterp.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/jsinterp.py b/youtube_dl/jsinterp.py index 5731e264b..7f00586ae 100644 --- a/youtube_dl/jsinterp.py +++ b/youtube_dl/jsinterp.py @@ -61,7 +61,7 @@ class JSInterpreter(object): pass m = re.match( - r'^(?P[a-z]+)\.(?P[^(]+)(?:\(+(?P[^()]*)\))?$', + r'^(?P[a-zA-Z0-9_]+)\.(?P[^(]+)(?:\(+(?P[^()]*)\))?$', expr) if m: variable = m.group('var') From 4bc7009e8a6aed2b61866ef12ca3d60065ebcd0e Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Fri, 25 Jul 2014 07:00:54 +0200 Subject: [PATCH 028/440] [jsinterp] Add new testcase --- test/test_youtube_signature.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/test/test_youtube_signature.py b/test/test_youtube_signature.py index 811d3a93e..604e76ab6 100644 --- a/test/test_youtube_signature.py +++ b/test/test_youtube_signature.py @@ -71,6 +71,12 @@ _TESTS = [ 83, '123456789abcdefghijklmnopqr0tuvwxyzABCDETGHIJKLMNOPQRS>UVWXYZ!"#$%&\'()*+,-./:;<=F' ), + ( + 'https://s.ytimg.com/yts/jsbin/html5player-en_US-vflCGk6yw/html5player.js', + 'js', + '4646B5181C6C3020DF1D9C7FCFEA.AD80ABF70C39BD369CCCAE780AFBB98FA6B6CB42766249D9488C288', + '82C8849D94266724DC6B6AF89BBFA087EACCD963.B93C07FBA084ACAEFCF7C9D1FD0203C6C1815B6B' + ) ] @@ -83,7 +89,7 @@ class TestSignature(unittest.TestCase): def make_tfunc(url, stype, sig_input, expected_sig): - m = re.match(r'.*-([a-zA-Z0-9_-]+)(?:/watch_as3)?\.[a-z]+$', url) + m = re.match(r'.*-([a-zA-Z0-9_-]+)(?:/watch_as3|/html5player)?\.[a-z]+$', url) assert m, '%r should follow URL format' % url test_id = m.group(1) From 4cb71e9b6ac547d198d8cd3b98cfec23b6a89f02 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Fri, 25 Jul 2014 07:04:39 +0200 Subject: [PATCH 029/440] [jsinterp] Fix slice --- youtube_dl/jsinterp.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/jsinterp.py b/youtube_dl/jsinterp.py index 7f00586ae..c40cd376d 100644 --- a/youtube_dl/jsinterp.py +++ b/youtube_dl/jsinterp.py @@ -108,7 +108,7 @@ class JSInterpreter(object): index, howMany = argvals res = [] for i in range(index, min(index + howMany, len(obj))): - res.append(obj.pop(i)) + res.append(obj.pop(index)) return res return obj[member](argvals) From 36f35428831788d8c8f074e96bffa733dd094dae Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Fri, 25 Jul 2014 07:05:17 +0200 Subject: [PATCH 030/440] release 2014.07.25 --- README.md | 58 +++++++++++++++++++++++-------------------- youtube_dl/version.py | 2 +- 2 files changed, 32 insertions(+), 28 deletions(-) diff --git a/README.md b/README.md index fb2f776c9..af880ccc2 100644 --- a/README.md +++ b/README.md @@ -38,12 +38,6 @@ which means you can modify it, redistribute it or use it however you like. playlist or the command line) if an error occurs --dump-user-agent display the current browser identification - --user-agent UA specify a custom user agent - --referer REF specify a custom referer, use if the video - access is restricted to one domain - --add-header FIELD:VALUE specify a custom HTTP header and its value, - separated by a colon ':'. You can use this - option multiple times --list-extractors List all supported extractors and the URLs they would handle --extractor-descriptions Output descriptions of all supported @@ -51,18 +45,6 @@ which means you can modify it, redistribute it or use it however you like. --proxy URL Use the specified HTTP/HTTPS proxy. Pass in an empty string (--proxy "") for direct connection - --no-check-certificate Suppress HTTPS certificate validation. - --prefer-insecure Use an unencrypted connection to retrieve - information about the video. (Currently - supported only for YouTube) - --cache-dir DIR Location in the filesystem where youtube-dl - can store some downloaded information - permanently. By default $XDG_CACHE_HOME - /youtube-dl or ~/.cache/youtube-dl . At the - moment, only YouTube player files (for - videos with obfuscated signatures) are - cached, but that may change. - --no-cache-dir Disable filesystem caching --socket-timeout None Time to wait before giving up, in seconds --bidi-workaround Work around terminals that lack bidirectional text support. Requires bidiv @@ -79,7 +61,6 @@ which means you can modify it, redistribute it or use it however you like. configuration in ~/.config/youtube-dl.conf (%APPDATA%/youtube-dl/config.txt on Windows) - --encoding ENCODING Force the specified encoding (experimental) ## Video Selection: --playlist-start NUMBER playlist video to start at (default is 1) @@ -125,9 +106,9 @@ which means you can modify it, redistribute it or use it however you like. of SIZE. ## Filesystem Options: - -t, --title use title in file name (default) + -a, --batch-file FILE file containing URLs to download ('-' for + stdin) --id use only video ID in file name - -l, --literal [deprecated] alias of --title -A, --auto-number number downloaded files starting from 00000 -o, --output TEMPLATE output filename template. Use %(title)s to get the title, %(uploader)s for the @@ -160,18 +141,15 @@ which means you can modify it, redistribute it or use it however you like. --restrict-filenames Restrict filenames to only ASCII characters, and avoid "&" and spaces in filenames - -a, --batch-file FILE file containing URLs to download ('-' for - stdin) - --load-info FILE json file containing the video information - (created with the "--write-json" option) + -t, --title [deprecated] use title in file name + (default) + -l, --literal [deprecated] alias of --title -w, --no-overwrites do not overwrite files -c, --continue force resume of partially downloaded files. By default, youtube-dl will resume downloads if possible. --no-continue do not resume partially downloaded files (restart from beginning) - --cookies FILE file to read cookies from and dump cookie - jar in --no-part do not use .part files --no-mtime do not use the Last-modified header to set the file modification time @@ -181,6 +159,19 @@ which means you can modify it, redistribute it or use it however you like. --write-annotations write video annotations to a .annotation file --write-thumbnail write thumbnail image to disk + --load-info FILE json file containing the video information + (created with the "--write-json" option) + --cookies FILE file to read cookies from and dump cookie + jar in + --cache-dir DIR Location in the filesystem where youtube-dl + can store some downloaded information + permanently. By default $XDG_CACHE_HOME + /youtube-dl or ~/.cache/youtube-dl . At the + moment, only YouTube player files (for + videos with obfuscated signatures) are + cached, but that may change. + --no-cache-dir Disable filesystem caching + --rm-cache-dir Delete all filesystem cache files ## Verbosity / Simulation Options: -q, --quiet activates quiet mode @@ -210,6 +201,19 @@ which means you can modify it, redistribute it or use it however you like. problems --print-traffic Display sent and read HTTP traffic +## Workarounds: + --encoding ENCODING Force the specified encoding (experimental) + --no-check-certificate Suppress HTTPS certificate validation. + --prefer-insecure Use an unencrypted connection to retrieve + information about the video. (Currently + supported only for YouTube) + --user-agent UA specify a custom user agent + --referer REF specify a custom referer, use if the video + access is restricted to one domain + --add-header FIELD:VALUE specify a custom HTTP header and its value, + separated by a colon ':'. You can use this + option multiple times + ## Video Format Options: -f, --format FORMAT video format code, specify the order of preference using slashes: "-f 22/17/18". diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 725af0d2e..479b98abf 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,2 +1,2 @@ -__version__ = '2014.07.24' +__version__ = '2014.07.25' From c3415d1bac09acfc2bd26e9187632a837628c10d Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Fri, 25 Jul 2014 10:43:03 +0200 Subject: [PATCH 031/440] [extractor/common] PEP8 --- youtube_dl/extractor/common.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 88f12797c..52c00186e 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -373,7 +373,8 @@ class InfoExtractor(object): else: for p in pattern: mobj = re.search(p, string, flags) - if mobj: break + if mobj: + break if os.name != 'nt' and sys.stderr.isatty(): _name = u'\033[0;34m%s\033[0m' % name From 5d2519e5bffd726215cf2ec50e1aeff622e2c258 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Fri, 25 Jul 2014 10:45:04 +0200 Subject: [PATCH 032/440] [gdcvault] Add support for direct URL video type Fixes #3356 --- youtube_dl/extractor/gdcvault.py | 42 ++++++++++++++++++++++++++++---- 1 file changed, 37 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/gdcvault.py b/youtube_dl/extractor/gdcvault.py index 89d5994ee..de14ae1fb 100644 --- a/youtube_dl/extractor/gdcvault.py +++ b/youtube_dl/extractor/gdcvault.py @@ -8,6 +8,7 @@ from ..utils import ( compat_urllib_request, ) + class GDCVaultIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?gdcvault\.com/play/(?P\d+)/(?P(\w|-)+)' _TESTS = [ @@ -31,6 +32,15 @@ class GDCVaultIE(InfoExtractor): 'skip_download': True, # Requires rtmpdump } }, + { + 'url': 'http://www.gdcvault.com/play/1015301/Thexder-Meets-Windows-95-or', + 'md5': 'a5eb77996ef82118afbbe8e48731b98e', + 'info_dict': { + 'id': '1015301', + 'ext': 'flv', + 'title': 'Thexder Meets Windows 95, or Writing Great Games in the Windows 95 Environment', + } + } ] def _parse_mp4(self, xml_description): @@ -103,18 +113,40 @@ class GDCVaultIE(InfoExtractor): webpage_url = 'http://www.gdcvault.com/play/' + video_id start_page = self._download_webpage(webpage_url, video_id) - xml_root = self._html_search_regex(r'', + start_page, 'xml root', default=None) if xml_root is None: # Probably need to authenticate - start_page = self._login(webpage_url, video_id) - if start_page is None: + login_res = self._login(webpage_url, video_id) + if login_res is None: self.report_warning('Could not login.') else: + start_page = login_res # Grab the url from the authenticated page - xml_root = self._html_search_regex(r'', + start_page, 'xml root') - xml_name = self._html_search_regex(r'', + start_page, 'xml filename', default=None) if xml_name is None: # Fallback to the older format xml_name = self._html_search_regex(r'