From 3af1fac7b0f43778e44b3b86e0c74bf25fb6f489 Mon Sep 17 00:00:00 2001 From: remitamine Date: Sat, 18 Jul 2015 09:51:59 +0100 Subject: [PATCH 01/73] [dcn] Add new extractor --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/dcn.py | 46 ++++++++++++++++++++++++++++++++ 2 files changed, 47 insertions(+) create mode 100644 youtube_dl/extractor/dcn.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 06f21064b..cc0da81d1 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -110,6 +110,7 @@ from .dailymotion import ( ) from .daum import DaumIE from .dbtv import DBTVIE +from .dcn import DcnIE from .dctp import DctpTvIE from .deezer import DeezerPlaylistIE from .dfb import DFBIE diff --git a/youtube_dl/extractor/dcn.py b/youtube_dl/extractor/dcn.py new file mode 100644 index 000000000..5263def4c --- /dev/null +++ b/youtube_dl/extractor/dcn.py @@ -0,0 +1,46 @@ +from .common import InfoExtractor + +class DcnIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?dcndigital\.ae/(?:#/)?(?:video/.+|show/\d+/.+?)/(?P\d+)/?' + _TEST = { + 'url': 'http://www.dcndigital.ae/#/show/199074/%D8%B1%D8%AD%D9%84%D8%A9-%D8%A7%D9%84%D8%B9%D9%85%D8%B1-%D8%A7%D9%84%D8%AD%D9%84%D9%82%D8%A9-1/17375/6887', + 'info_dict': + { + 'id': '17375', + 'ext': 'm3u8', + 'title': 'رحلة العمر : الحلقة 1', + 'description': '"في هذه الحلقة من برنامج رحلة العمر يقدّم الدكتور عمر عبد الكافي تبسيطاً لمناسك الحج والعمرة ويجيب مباشرة على استفسارات حجاج بيت الله الحرام بخصوص مناسك الحج والعمرة1"', + 'thumbnail': 'http://admin.mangomolo.com/analytics/uploads/71/images/media/2/2cefc09d7bec80afa754682f40e49503.jpg', + 'duration': '2041' + } + } + + def _real_extract(self, url): + video_id = self._match_id(url) + json_data = self._download_json( + 'http://admin.mangomolo.com/analytics/index.php/plus/video?id='+video_id, + video_id + ) + title = json_data['title_ar']; + thumbnail = 'http://admin.mangomolo.com/analytics/'+json_data['img']; + duration = json_data['duration']; + description = json_data['description_ar']; + webpage = self._download_webpage( + 'http://admin.mangomolo.com/analytics/index.php/customers/embed/video?id='+json_data['id']+'&user_id='+json_data['user_id']+'&countries=Q0M=&w=100%&h=100%&filter=DENY&signature='+json_data['signature'], + video_id + ) + m3u8_url = self._html_search_regex( + r'file: "(?P.*?)"', + webpage, + 'm3u8_url', + group='m3u8_url' + ) + formats = self._extract_m3u8_formats(m3u8_url, video_id) + return { + 'id': video_id, + 'title': title, + 'thumbnail': thumbnail, + 'duration': duration, + 'description': description, + 'formats': formats, + } From 9d681c2bb3b75a666b76d8e346ffab66b65f9132 Mon Sep 17 00:00:00 2001 From: remitamine Date: Sat, 18 Jul 2015 10:00:24 +0100 Subject: [PATCH 02/73] remove unnecessary group name --- youtube_dl/extractor/dcn.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/dcn.py b/youtube_dl/extractor/dcn.py index 5263def4c..f76ebda9e 100644 --- a/youtube_dl/extractor/dcn.py +++ b/youtube_dl/extractor/dcn.py @@ -30,10 +30,9 @@ class DcnIE(InfoExtractor): video_id ) m3u8_url = self._html_search_regex( - r'file: "(?P.*?)"', + r'file:\s*"([^"]+)', webpage, - 'm3u8_url', - group='m3u8_url' + 'm3u8_url' ) formats = self._extract_m3u8_formats(m3u8_url, video_id) return { From 1a117a77287e7dbd4d92f29062dabcf4efb86cb5 Mon Sep 17 00:00:00 2001 From: remitamine Date: Fri, 24 Jul 2015 12:00:20 +0100 Subject: [PATCH 03/73] [clipfish] extract mp4 video link --- youtube_dl/extractor/clipfish.py | 37 ++++++++++++++++---------------- 1 file changed, 19 insertions(+), 18 deletions(-) diff --git a/youtube_dl/extractor/clipfish.py b/youtube_dl/extractor/clipfish.py index a5c3cb7c6..09dfaac60 100644 --- a/youtube_dl/extractor/clipfish.py +++ b/youtube_dl/extractor/clipfish.py @@ -1,13 +1,11 @@ from __future__ import unicode_literals -import re -import time -import xml.etree.ElementTree - from .common import InfoExtractor from ..utils import ( ExtractorError, - parse_duration, + int_or_none, + js_to_json, + determine_ext, ) @@ -17,37 +15,40 @@ class ClipfishIE(InfoExtractor): _VALID_URL = r'^https?://(?:www\.)?clipfish\.de/.*?/video/(?P[0-9]+)/' _TEST = { 'url': 'http://www.clipfish.de/special/game-trailer/video/3966754/fifa-14-e3-2013-trailer/', - 'md5': '2521cd644e862936cf2e698206e47385', + 'md5': '79bc922f3e8a9097b3d68a93780fd475', 'info_dict': { 'id': '3966754', 'ext': 'mp4', 'title': 'FIFA 14 - E3 2013 Trailer', 'duration': 82, - }, - 'skip': 'Blocked in the US' + } } def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group(1) + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + video_info = self._parse_json( + js_to_json(self._html_search_regex('var videoObject = ({[^}]+?})', webpage, 'videoObject')), + video_id + ) + info_url = self._parse_json( + js_to_json(self._html_search_regex('var globalFlashvars = ({[^}]+?})', webpage, 'globalFlashvars')), + video_id + )['data'] - info_url = ('http://www.clipfish.de/devxml/videoinfo/%s?ts=%d' % - (video_id, int(time.time()))) doc = self._download_xml( info_url, video_id, note='Downloading info page') title = doc.find('title').text video_url = doc.find('filename').text - if video_url is None: - xml_bytes = xml.etree.ElementTree.tostring(doc) - raise ExtractorError('Cannot find video URL in document %r' % - xml_bytes) thumbnail = doc.find('imageurl').text - duration = parse_duration(doc.find('duration').text) + duration = int_or_none(video_info['length']) + formats = [{'url': video_info['videourl']},{'url': video_url}] + self._sort_formats(formats) return { 'id': video_id, 'title': title, - 'url': video_url, + 'formats': formats, 'thumbnail': thumbnail, 'duration': duration, } From a107193e4b7a3d5414dd7422263c34ac0e309ec4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 2 Aug 2015 01:13:21 +0600 Subject: [PATCH 04/73] [extractor/common] Extract f4m and m3u8 formats, subtitles and info --- youtube_dl/extractor/common.py | 200 ++++++++++++++++++++++++--------- 1 file changed, 149 insertions(+), 51 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index dc5080504..f9578b838 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -18,6 +18,7 @@ from ..compat import ( compat_HTTPError, compat_http_client, compat_urllib_error, + compat_urllib_parse, compat_urllib_parse_urlparse, compat_urllib_request, compat_urlparse, @@ -37,6 +38,7 @@ from ..utils import ( RegexNotFoundError, sanitize_filename, unescapeHTML, + url_basename, ) @@ -978,69 +980,165 @@ class InfoExtractor(object): self._sort_formats(formats) return formats - # TODO: improve extraction - def _extract_smil_formats(self, smil_url, video_id, fatal=True): - smil = self._download_xml( - smil_url, video_id, 'Downloading SMIL file', - 'Unable to download SMIL file', fatal=fatal) + @staticmethod + def _xpath_ns(path, namespace=None): + if not namespace: + return path + out = [] + for c in path.split('/'): + if not c or c == '.': + out.append(c) + else: + out.append('{%s}%s' % (namespace, c)) + return '/'.join(out) + + def _extract_smil_formats(self, smil_url, video_id, fatal=True, f4m_params=None): + smil = self._download_smil(smil_url, video_id, fatal=fatal) + if smil is False: assert not fatal return [] - base = smil.find('./head/meta').get('base') + namespace = self._search_regex( + r'{([^}]+)?}smil', smil.tag, 'namespace', default=None) + + return self._parse_smil_formats( + smil, smil_url, video_id, namespace=namespace, f4m_params=f4m_params) + + def _extract_smil_info(self, smil_url, video_id, fatal=True, f4m_params=None): + smil = self._download_smil(smil_url, video_id, fatal=fatal) + if smil is False: + return {} + return self._parse_smil(smil, smil_url, video_id, f4m_params=f4m_params) + + def _download_smil(self, smil_url, video_id, fatal=True): + return self._download_xml( + smil_url, video_id, 'Downloading SMIL file', + 'Unable to download SMIL file', fatal=fatal) + + def _parse_smil(self, smil, smil_url, video_id, f4m_params=None): + namespace = self._search_regex( + r'{([^}]+)?}smil', smil.tag, 'namespace', default=None) + + formats = self._parse_smil_formats( + smil, smil_url, video_id, namespace=namespace, f4m_params=f4m_params) + subtitles = self._parse_smil_subtitles(smil, namespace=namespace) + + video_id = os.path.splitext(url_basename(smil_url))[0] + title = None + description = None + for meta in smil.findall(self._xpath_ns('./head/meta', namespace)): + name = meta.attrib.get('name') + content = meta.attrib.get('content') + if not name or not content: + continue + if not title and name == 'title': + title = content + elif not description and name in ('description', 'abstract'): + description = content + + return { + 'id': video_id, + 'title': title or video_id, + 'description': description, + 'formats': formats, + 'subtitles': subtitles, + } + + def _parse_smil_formats(self, smil, smil_url, video_id, namespace=None, f4m_params=None): + base = smil_url + for meta in smil.findall(self._xpath_ns('./head/meta', namespace)): + b = meta.get('base') or meta.get('httpBase') + if b: + base = b + break formats = [] rtmp_count = 0 - if smil.findall('./body/seq/video'): - video = smil.findall('./body/seq/video')[0] - fmts, rtmp_count = self._parse_smil_video(video, video_id, base, rtmp_count) - formats.extend(fmts) - else: - for video in smil.findall('./body/switch/video'): - fmts, rtmp_count = self._parse_smil_video(video, video_id, base, rtmp_count) - formats.extend(fmts) + http_count = 0 + + videos = smil.findall(self._xpath_ns('.//video', namespace)) + for video in videos: + src = video.get('src') + if not src: + continue + + bitrate = int_or_none(video.get('system-bitrate') or video.get('systemBitrate'), 1000) + filesize = int_or_none(video.get('size') or video.get('fileSize')) + width = int_or_none(video.get('width')) + height = int_or_none(video.get('height')) + proto = video.get('proto') + ext = video.get('ext') + src_ext = determine_ext(src) + streamer = video.get('streamer') or base + + if proto == 'rtmp' or streamer.startswith('rtmp'): + rtmp_count += 1 + formats.append({ + 'url': streamer, + 'play_path': src, + 'ext': 'flv', + 'format_id': 'rtmp-%d' % (rtmp_count if bitrate is None else bitrate), + 'tbr': bitrate, + 'filesize': filesize, + 'width': width, + 'height': height, + }) + continue + + src_url = src if src.startswith('http') else compat_urlparse.urljoin(base, src) + + if proto == 'm3u8' or src_ext == 'm3u8': + formats.extend(self._extract_m3u8_formats( + src_url, video_id, ext or 'mp4', m3u8_id='hls')) + continue + + if src_ext == 'f4m': + f4m_url = src_url + if not f4m_params: + f4m_params = { + 'hdcore': '3.2.0', + 'plugin': 'flowplayer-3.2.0.1', + } + f4m_url += '&' if '?' in f4m_url else '?' + f4m_url += compat_urllib_parse.urlencode(f4m_params).encode('utf-8') + formats.extend(self._extract_f4m_formats(f4m_url, video_id, f4m_id='hds')) + continue + + if src_url.startswith('http'): + http_count += 1 + formats.append({ + 'url': src_url, + 'ext': ext or src_ext or 'flv', + 'format_id': 'http-%d' % (bitrate or http_count), + 'tbr': bitrate, + 'filesize': filesize, + 'width': width, + 'height': height, + }) + continue self._sort_formats(formats) return formats - def _parse_smil_video(self, video, video_id, base, rtmp_count): - src = video.get('src') - if not src: - return [], rtmp_count - bitrate = int_or_none(video.get('system-bitrate') or video.get('systemBitrate'), 1000) - width = int_or_none(video.get('width')) - height = int_or_none(video.get('height')) - proto = video.get('proto') - if not proto: - if base: - if base.startswith('rtmp'): - proto = 'rtmp' - elif base.startswith('http'): - proto = 'http' - ext = video.get('ext') - if proto == 'm3u8': - return self._extract_m3u8_formats(src, video_id, ext), rtmp_count - elif proto == 'rtmp': - rtmp_count += 1 - streamer = video.get('streamer') or base - return ([{ - 'url': streamer, - 'play_path': src, - 'ext': 'flv', - 'format_id': 'rtmp-%d' % (rtmp_count if bitrate is None else bitrate), - 'tbr': bitrate, - 'width': width, - 'height': height, - }], rtmp_count) - elif proto.startswith('http'): - return ([{ - 'url': base + src, - 'ext': ext or 'flv', - 'tbr': bitrate, - 'width': width, - 'height': height, - }], rtmp_count) + def _parse_smil_subtitles(self, smil, namespace=None): + subtitles = {} + for num, textstream in enumerate(smil.findall(self._xpath_ns('.//textstream', namespace))): + src = textstream.get('src') + if not src: + continue + ext = textstream.get('ext') or determine_ext(src) + if not ext: + type_ = textstream.get('type') + if type_ == 'text/srt': + ext = 'srt' + lang = textstream.get('systemLanguage') or textstream.get('systemLanguageName') + subtitles.setdefault(lang, []).append({ + 'url': src, + 'ext': ext, + }) + return subtitles def _live_title(self, name): """ Generate the title for a live video """ From e5e8d20a3a65832c74b002f247866fcbb92e9246 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 2 Aug 2015 01:13:59 +0600 Subject: [PATCH 05/73] [extractor/generic] Improve generic SMIL detection --- youtube_dl/extractor/generic.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 8cef61c3c..6900ed96f 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -1110,11 +1110,13 @@ class GenericIE(InfoExtractor): self.report_extraction(video_id) - # Is it an RSS feed? + # Is it an RSS feed or a SMIL file? try: doc = parse_xml(webpage) if doc.tag == 'rss': return self._extract_rss(url, video_id, doc) + elif re.match(r'^(?:{[^}]+})?smil$', doc.tag): + return self._parse_smil(doc, url, video_id) except compat_xml_parse_error: pass From 308cfe0ab3ec7122602ba2d6a4e3acd2caa7a757 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 2 Aug 2015 01:14:41 +0600 Subject: [PATCH 06/73] [test_downloader] Respect --force-generic-extractor --- test/test_download.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/test/test_download.py b/test/test_download.py index 1110357a7..284418834 100644 --- a/test/test_download.py +++ b/test/test_download.py @@ -136,7 +136,9 @@ def generator(test_case): # We're not using .download here sine that is just a shim # for outside error handling, and returns the exit code # instead of the result dict. - res_dict = ydl.extract_info(test_case['url']) + res_dict = ydl.extract_info( + test_case['url'], + force_generic_extractor=params.get('force_generic_extractor', False)) except (DownloadError, ExtractorError) as err: # Check if the exception is not a network related one if not err.exc_info[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError, compat_http_client.BadStatusLine) or (err.exc_info[0] == compat_HTTPError and err.exc_info[1].code == 503): From 645f814544f9d40386e504a1eb8cf3558f2c109e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 2 Aug 2015 01:15:33 +0600 Subject: [PATCH 07/73] [test/helper] Allow dicts for mincount --- test/helper.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/helper.py b/test/helper.py index e1129e58f..c8b34654d 100644 --- a/test/helper.py +++ b/test/helper.py @@ -133,8 +133,8 @@ def expect_info_dict(self, got_dict, expected_dict): elif isinstance(expected, compat_str) and expected.startswith('mincount:'): got = got_dict.get(info_field) self.assertTrue( - isinstance(got, list), - 'Expected field %s to be a list, but it is of type %s' % ( + isinstance(got, (list, dict)), + 'Expected field %s to be a list or a dict, but it is of type %s' % ( info_field, type(got).__name__)) expected_num = int(expected.partition(':')[2]) assertGreaterEqual( From 8765222d2211cd6f2a40611249181af0bbb2d531 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 2 Aug 2015 01:16:21 +0600 Subject: [PATCH 08/73] [extractor/generic] Add generic SMIL tests --- youtube_dl/extractor/generic.py | 68 +++++++++++++++++++++++++++++++++ 1 file changed, 68 insertions(+) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 6900ed96f..27584c44c 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -130,6 +130,74 @@ class GenericIE(InfoExtractor): 'title': 'pdv_maddow_netcast_m4v-02-27-2015-201624', } }, + # SMIL from http://videolectures.net/promogram_igor_mekjavic_eng + { + 'url': 'http://videolectures.net/promogram_igor_mekjavic_eng/video/1/smil.xml', + 'info_dict': { + 'id': 'smil', + 'ext': 'mp4', + 'title': 'Automatics, robotics and biocybernetics', + 'description': 'md5:815fc1deb6b3a2bff99de2d5325be482', + 'formats': 'mincount:16', + 'subtitles': 'mincount:1', + }, + 'params': { + 'force_generic_extractor': True, + 'skip_download': True, + }, + }, + # SMIL from http://www1.wdr.de/mediathek/video/livestream/index.html + { + 'url': 'http://metafilegenerator.de/WDR/WDR_FS/hds/hds.smil', + 'info_dict': { + 'id': 'hds', + 'ext': 'flv', + 'title': 'hds', + 'formats': 'mincount:1', + }, + 'params': { + 'skip_download': True, + }, + }, + # SMIL from https://www.restudy.dk/video/play/id/1637 + { + 'url': 'https://www.restudy.dk/awsmedia/SmilDirectory/video_1637.xml', + 'info_dict': { + 'id': 'video_1637', + 'ext': 'flv', + 'title': 'video_1637', + 'formats': 'mincount:3', + }, + 'params': { + 'skip_download': True, + }, + }, + # SMIL from http://adventure.howstuffworks.com/5266-cool-jobs-iditarod-musher-video.htm + { + 'url': 'http://services.media.howstuffworks.com/videos/450221/smil-service.smil', + 'info_dict': { + 'id': 'smil-service', + 'ext': 'flv', + 'title': 'smil-service', + 'formats': 'mincount:1', + }, + 'params': { + 'skip_download': True, + }, + }, + # SMIL from http://new.livestream.com/CoheedandCambria/WebsterHall/videos/4719370 + { + 'url': 'http://api.new.livestream.com/accounts/1570303/events/1585861/videos/4719370.smil', + 'info_dict': { + 'id': '4719370', + 'ext': 'mp4', + 'title': '571de1fd-47bc-48db-abf9-238872a58d1f', + 'formats': 'mincount:3', + }, + 'params': { + 'skip_download': True, + }, + }, # google redirect { 'url': 'http://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=web&cd=1&cad=rja&ved=0CCUQtwIwAA&url=http%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DcmQHVoWB5FY&ei=F-sNU-LLCaXk4QT52ICQBQ&usg=AFQjCNEw4hL29zgOohLXvpJ-Bdh2bils1Q&bvm=bv.61965928,d.bGE', From 41c3a5a7beebbf5f60c5edb5093d564f0829c5c1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 2 Aug 2015 01:20:49 +0600 Subject: [PATCH 09/73] [extractor/common] Fix python 3 --- youtube_dl/extractor/common.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index f9578b838..c123d9fca 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1101,7 +1101,7 @@ class InfoExtractor(object): 'plugin': 'flowplayer-3.2.0.1', } f4m_url += '&' if '?' in f4m_url else '?' - f4m_url += compat_urllib_parse.urlencode(f4m_params).encode('utf-8') + f4m_url += compat_urllib_parse.urlencode(f4m_params) formats.extend(self._extract_f4m_formats(f4m_url, video_id, f4m_id='hds')) continue From 17712eeb1933f53696c1fc53606174e988a96472 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 2 Aug 2015 01:31:17 +0600 Subject: [PATCH 10/73] [extractor/common] Extract namespace parse routine --- youtube_dl/extractor/common.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index c123d9fca..717dcec7b 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -999,8 +999,7 @@ class InfoExtractor(object): assert not fatal return [] - namespace = self._search_regex( - r'{([^}]+)?}smil', smil.tag, 'namespace', default=None) + namespace = self._parse_smil_namespace(smil) return self._parse_smil_formats( smil, smil_url, video_id, namespace=namespace, f4m_params=f4m_params) @@ -1017,8 +1016,7 @@ class InfoExtractor(object): 'Unable to download SMIL file', fatal=fatal) def _parse_smil(self, smil, smil_url, video_id, f4m_params=None): - namespace = self._search_regex( - r'{([^}]+)?}smil', smil.tag, 'namespace', default=None) + namespace = self._parse_smil_namespace(smil) formats = self._parse_smil_formats( smil, smil_url, video_id, namespace=namespace, f4m_params=f4m_params) @@ -1045,6 +1043,10 @@ class InfoExtractor(object): 'subtitles': subtitles, } + def _parse_smil_namespace(self, smil): + return self._search_regex( + r'(?i)^{([^}]+)?}smil$', smil.tag, 'namespace', default=None) + def _parse_smil_formats(self, smil, smil_url, video_id, namespace=None, f4m_params=None): base = smil_url for meta in smil.findall(self._xpath_ns('./head/meta', namespace)): From d41d04c0f513ad3b83ab6aee60cf2201710b6063 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 2 Aug 2015 06:35:35 +0600 Subject: [PATCH 11/73] [videolectures] Fix _VALID_URL --- youtube_dl/extractor/videolecturesnet.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/videolecturesnet.py b/youtube_dl/extractor/videolecturesnet.py index d6a7eb203..24584dc80 100644 --- a/youtube_dl/extractor/videolecturesnet.py +++ b/youtube_dl/extractor/videolecturesnet.py @@ -12,7 +12,7 @@ from ..utils import ( class VideoLecturesNetIE(InfoExtractor): - _VALID_URL = r'http://(?:www\.)?videolectures\.net/(?P[^/#?]+)/' + _VALID_URL = r'http://(?:www\.)?videolectures\.net/(?P[^/#?]+)(?:/?[#?].*)?$' IE_NAME = 'videolectures.net' _TEST = { From 51f267d9d4d26c3cd67f318a2040513946f2b4d3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 6 Aug 2015 22:01:01 +0600 Subject: [PATCH 12/73] [YoutubeDL:utils] Move percent encode non-ASCII URLs workaround to http_request and simplify (Closes #6457) --- youtube_dl/YoutubeDL.py | 21 --------------------- youtube_dl/utils.py | 20 ++++++++++++++++++++ 2 files changed, 20 insertions(+), 21 deletions(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 1446b3254..079d42ce8 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -1860,27 +1860,6 @@ class YoutubeDL(object): def urlopen(self, req): """ Start an HTTP download """ - - # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not - # always respected by websites, some tend to give out URLs with non percent-encoded - # non-ASCII characters (see telemb.py, ard.py [#3412]) - # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991) - # To work around aforementioned issue we will replace request's original URL with - # percent-encoded one - req_is_string = isinstance(req, compat_basestring) - url = req if req_is_string else req.get_full_url() - url_escaped = escape_url(url) - - # Substitute URL if any change after escaping - if url != url_escaped: - if req_is_string: - req = url_escaped - else: - req_type = HEADRequest if req.get_method() == 'HEAD' else compat_urllib_request.Request - req = req_type( - url_escaped, data=req.data, headers=req.headers, - origin_req_host=req.origin_req_host, unverifiable=req.unverifiable) - return self._opener.open(req, timeout=self._socket_timeout) def print_debug_header(self): diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 78dc2b449..c7db75f80 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -651,6 +651,26 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler): return ret def http_request(self, req): + # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not + # always respected by websites, some tend to give out URLs with non percent-encoded + # non-ASCII characters (see telemb.py, ard.py [#3412]) + # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991) + # To work around aforementioned issue we will replace request's original URL with + # percent-encoded one + # Since redirects are also affected (e.g. http://www.southpark.de/alle-episoden/s18e09) + # the code of this workaround has been moved here from YoutubeDL.urlopen() + url = req.get_full_url() + url_escaped = escape_url(url) + + # Substitute URL if any change after escaping + if url != url_escaped: + req_type = HEADRequest if req.get_method() == 'HEAD' else compat_urllib_request.Request + new_req = req_type( + url_escaped, data=req.data, headers=req.headers, + origin_req_host=req.origin_req_host, unverifiable=req.unverifiable) + new_req.timeout = req.timeout + req = new_req + for h, v in std_headers.items(): # Capitalize is needed because of Python bug 2275: http://bugs.python.org/issue2275 # The dict keys are capitalized because of this bug by urllib From bd690a9f9368095f561184778fb2f3ef12c66342 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 6 Aug 2015 22:01:31 +0600 Subject: [PATCH 13/73] [southpark:de] Add test for non-ASCII in URLs --- youtube_dl/extractor/southpark.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/youtube_dl/extractor/southpark.py b/youtube_dl/extractor/southpark.py index 7fb165a87..87b650468 100644 --- a/youtube_dl/extractor/southpark.py +++ b/youtube_dl/extractor/southpark.py @@ -45,6 +45,14 @@ class SouthParkDeIE(SouthParkIE): 'title': 'The Government Won\'t Respect My Privacy', 'description': 'Cartman explains the benefits of "Shitter" to Stan, Kyle and Craig.', }, + }, { + # non-ASCII characters in initial URL + 'url': 'http://www.southpark.de/alle-episoden/s18e09-hashtag-aufwärmen', + 'playlist_count': 4, + }, { + # non-ASCII characters in redirect URL + 'url': 'http://www.southpark.de/alle-episoden/s18e09', + 'playlist_count': 4, }] From 4f34cdb0a87a506d25a352ff265678c86cb9b979 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 6 Aug 2015 23:56:44 +0600 Subject: [PATCH 14/73] [southpark:de] Skip test --- youtube_dl/extractor/southpark.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/southpark.py b/youtube_dl/extractor/southpark.py index 87b650468..ad63a8785 100644 --- a/youtube_dl/extractor/southpark.py +++ b/youtube_dl/extractor/southpark.py @@ -53,6 +53,7 @@ class SouthParkDeIE(SouthParkIE): # non-ASCII characters in redirect URL 'url': 'http://www.southpark.de/alle-episoden/s18e09', 'playlist_count': 4, + 'skip': 'Broken python 3', }] From 671302b5c0ff8cefa5f26e599423ef7799b19631 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 7 Aug 2015 00:08:11 +0600 Subject: [PATCH 15/73] [YoutubeDL] Remove unused imports --- youtube_dl/YoutubeDL.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 079d42ce8..cad6b026e 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -28,7 +28,6 @@ if os.name == 'nt': import ctypes from .compat import ( - compat_basestring, compat_cookiejar, compat_expanduser, compat_get_terminal_size, @@ -40,7 +39,6 @@ from .compat import ( compat_urllib_request, ) from .utils import ( - escape_url, ContentTooShortError, date_from_str, DateRange, @@ -51,7 +49,6 @@ from .utils import ( ExtractorError, format_bytes, formatSeconds, - HEADRequest, locked_file, make_HTTPS_handler, MaxDownloadsReached, From cd6b555e19c601d575679dd29da0080eda7f8890 Mon Sep 17 00:00:00 2001 From: remitamine Date: Thu, 6 Aug 2015 19:17:50 +0100 Subject: [PATCH 16/73] [dcn] add origin to api request and fix the test and check with flake8 --- youtube_dl/extractor/dcn.py | 30 ++++++++++++++++++++---------- 1 file changed, 20 insertions(+), 10 deletions(-) diff --git a/youtube_dl/extractor/dcn.py b/youtube_dl/extractor/dcn.py index f76ebda9e..d44e8cef0 100644 --- a/youtube_dl/extractor/dcn.py +++ b/youtube_dl/extractor/dcn.py @@ -1,4 +1,9 @@ +# coding: utf-8 +from __future__ import unicode_literals + from .common import InfoExtractor +from ..compat import compat_urllib_request + class DcnIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?dcndigital\.ae/(?:#/)?(?:video/.+|show/\d+/.+?)/(?P\d+)/?' @@ -9,24 +14,29 @@ class DcnIE(InfoExtractor): 'id': '17375', 'ext': 'm3u8', 'title': 'رحلة العمر : الحلقة 1', - 'description': '"في هذه الحلقة من برنامج رحلة العمر يقدّم الدكتور عمر عبد الكافي تبسيطاً لمناسك الحج والعمرة ويجيب مباشرة على استفسارات حجاج بيت الله الحرام بخصوص مناسك الحج والعمرة1"', + 'description': 'في هذه الحلقة من برنامج رحلة العمر يقدّم الدكتور عمر عبد الكافي تبسيطاً لمناسك الحج والعمرة ويجيب مباشرة على استفسارات حجاج بيت الله الحرام بخصوص مناسك الحج والعمرة\n1', 'thumbnail': 'http://admin.mangomolo.com/analytics/uploads/71/images/media/2/2cefc09d7bec80afa754682f40e49503.jpg', 'duration': '2041' - } + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, } def _real_extract(self, url): video_id = self._match_id(url) - json_data = self._download_json( - 'http://admin.mangomolo.com/analytics/index.php/plus/video?id='+video_id, - video_id + request = compat_urllib_request.Request( + 'http://admin.mangomolo.com/analytics/index.php/plus/video?id=' + video_id, + headers={'Origin': 'http://www.dcndigital.ae'} ) - title = json_data['title_ar']; - thumbnail = 'http://admin.mangomolo.com/analytics/'+json_data['img']; - duration = json_data['duration']; - description = json_data['description_ar']; + json_data = self._download_json(request, video_id) + title = json_data['title_ar'] + thumbnail = 'http://admin.mangomolo.com/analytics/' + json_data['img'] + duration = json_data['duration'] + description = json_data['description_ar'] webpage = self._download_webpage( - 'http://admin.mangomolo.com/analytics/index.php/customers/embed/video?id='+json_data['id']+'&user_id='+json_data['user_id']+'&countries=Q0M=&w=100%&h=100%&filter=DENY&signature='+json_data['signature'], + 'http://admin.mangomolo.com/analytics/index.php/customers/embed/video?id=' + json_data['id'] + '&user_id=' + json_data['user_id'] + '&countries=Q0M=&w=100%&h=100%&filter=DENY&signature=' + json_data['signature'], video_id ) m3u8_url = self._html_search_regex( From 5a4d9ddb218e761fe7ab15d197690e0cb132a536 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 7 Aug 2015 01:26:40 +0600 Subject: [PATCH 17/73] [utils] Percent-encode redirect URL of Location header (Closes #6457) --- youtube_dl/utils.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index c7db75f80..e265c7574 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -715,6 +715,17 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler): gz = io.BytesIO(self.deflate(resp.read())) resp = self.addinfourl_wrapper(gz, old_resp.headers, old_resp.url, old_resp.code) resp.msg = old_resp.msg + # Percent-encode redirect URL of Location HTTP header to satisfy RFC 3986 + if 300 <= resp.code < 400: + location = resp.headers.get('Location') + if location: + # As of RFC 2616 default charset is iso-8859-1 that is respected by python 3 + if sys.version_info >= (3, 0): + location = location.encode('iso-8859-1').decode('utf-8') + location_escaped = escape_url(location) + if location != location_escaped: + del resp.headers['Location'] + resp.headers['Location'] = location_escaped return resp https_request = http_request From 9663bd3abb78911bddad75742bd41006677d628e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 7 Aug 2015 01:27:07 +0600 Subject: [PATCH 18/73] [southpark:de] Enable non-ASCII redirect URL test --- youtube_dl/extractor/southpark.py | 1 - 1 file changed, 1 deletion(-) diff --git a/youtube_dl/extractor/southpark.py b/youtube_dl/extractor/southpark.py index ad63a8785..87b650468 100644 --- a/youtube_dl/extractor/southpark.py +++ b/youtube_dl/extractor/southpark.py @@ -53,7 +53,6 @@ class SouthParkDeIE(SouthParkIE): # non-ASCII characters in redirect URL 'url': 'http://www.southpark.de/alle-episoden/s18e09', 'playlist_count': 4, - 'skip': 'Broken python 3', }] From 3eb5fdb58112032a9831eda1d2e3b8a151ea217f Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Thu, 6 Aug 2015 22:55:43 +0200 Subject: [PATCH 19/73] release 2015.08.06 --- youtube_dl/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index fa157cadb..b81d5e658 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2015.07.28' +__version__ = '2015.08.06' From 430b092a5f59fbe407b92ebcb0c42b9f7062a334 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Thu, 6 Aug 2015 23:06:21 +0200 Subject: [PATCH 20/73] release 2015.08.06.1 --- youtube_dl/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index b81d5e658..9f209499c 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2015.08.06' +__version__ = '2015.08.06.1' From 6d30cf04db9c9662dbb30c2490e24eb5c6dca4c3 Mon Sep 17 00:00:00 2001 From: remitamine Date: Fri, 7 Aug 2015 10:01:18 +0100 Subject: [PATCH 21/73] [dcn] fix type and key errors --- youtube_dl/extractor/dcn.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/dcn.py b/youtube_dl/extractor/dcn.py index d44e8cef0..22ff35b56 100644 --- a/youtube_dl/extractor/dcn.py +++ b/youtube_dl/extractor/dcn.py @@ -3,6 +3,7 @@ from __future__ import unicode_literals from .common import InfoExtractor from ..compat import compat_urllib_request +from ..utils import int_or_none class DcnIE(InfoExtractor): @@ -16,7 +17,7 @@ class DcnIE(InfoExtractor): 'title': 'رحلة العمر : الحلقة 1', 'description': 'في هذه الحلقة من برنامج رحلة العمر يقدّم الدكتور عمر عبد الكافي تبسيطاً لمناسك الحج والعمرة ويجيب مباشرة على استفسارات حجاج بيت الله الحرام بخصوص مناسك الحج والعمرة\n1', 'thumbnail': 'http://admin.mangomolo.com/analytics/uploads/71/images/media/2/2cefc09d7bec80afa754682f40e49503.jpg', - 'duration': '2041' + 'duration': 2041 }, 'params': { # m3u8 download @@ -32,9 +33,9 @@ class DcnIE(InfoExtractor): ) json_data = self._download_json(request, video_id) title = json_data['title_ar'] - thumbnail = 'http://admin.mangomolo.com/analytics/' + json_data['img'] - duration = json_data['duration'] - description = json_data['description_ar'] + thumbnail = 'http://admin.mangomolo.com/analytics/' + json_data.get('img') + duration = int_or_none(json_data.get('duration')) + description = json_data.get('description_ar') webpage = self._download_webpage( 'http://admin.mangomolo.com/analytics/index.php/customers/embed/video?id=' + json_data['id'] + '&user_id=' + json_data['user_id'] + '&countries=Q0M=&w=100%&h=100%&filter=DENY&signature=' + json_data['signature'], video_id From 8002ac9e0a88d918735c06599dbf8f2005f79666 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 7 Aug 2015 19:04:44 +0600 Subject: [PATCH 22/73] [nowtv] Add support for .at TLD --- youtube_dl/extractor/nowtv.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/nowtv.py b/youtube_dl/extractor/nowtv.py index ad938fb62..78e8851c0 100644 --- a/youtube_dl/extractor/nowtv.py +++ b/youtube_dl/extractor/nowtv.py @@ -14,7 +14,7 @@ from ..utils import ( class NowTVIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?nowtv\.de/(?:rtl|rtl2|rtlnitro|superrtl|ntv|vox)/(?P.+?)/(?:player|preview)' + _VALID_URL = r'https?://(?:www\.)?nowtv\.(?:de|at)/(?:rtl|rtl2|rtlnitro|superrtl|ntv|vox)/(?P.+?)/(?:player|preview)' _TESTS = [{ # rtl From acc1adbe7ab93657cd4d303cee1fba4464931a50 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 7 Aug 2015 19:50:54 +0600 Subject: [PATCH 23/73] [nowtv] Add support for .ch TLD --- youtube_dl/extractor/nowtv.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/nowtv.py b/youtube_dl/extractor/nowtv.py index 78e8851c0..fc21d8e3f 100644 --- a/youtube_dl/extractor/nowtv.py +++ b/youtube_dl/extractor/nowtv.py @@ -14,7 +14,7 @@ from ..utils import ( class NowTVIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?nowtv\.(?:de|at)/(?:rtl|rtl2|rtlnitro|superrtl|ntv|vox)/(?P.+?)/(?:player|preview)' + _VALID_URL = r'https?://(?:www\.)?nowtv\.(?:de|at|ch)/(?:rtl|rtl2|rtlnitro|superrtl|ntv|vox)/(?P.+?)/(?:player|preview)' _TESTS = [{ # rtl From 0f422256d6eea5aff062a4c35d7434cd118c7a0b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 7 Aug 2015 19:51:09 +0600 Subject: [PATCH 24/73] [nowtv] Add .at test --- youtube_dl/extractor/nowtv.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/youtube_dl/extractor/nowtv.py b/youtube_dl/extractor/nowtv.py index fc21d8e3f..66c627bec 100644 --- a/youtube_dl/extractor/nowtv.py +++ b/youtube_dl/extractor/nowtv.py @@ -127,6 +127,9 @@ class NowTVIE(InfoExtractor): }, { 'url': 'http://www.nowtv.de/rtl/bauer-sucht-frau/die-neuen-bauern-und-eine-hochzeit/preview', 'only_matching': True, + }, { + 'url': 'http://www.nowtv.at/rtl/bauer-sucht-frau/die-neuen-bauern-und-eine-hochzeit/preview?return=/rtl/bauer-sucht-frau/die-neuen-bauern-und-eine-hochzeit', + 'only_matching': True, }] def _real_extract(self, url): From f94639fadf91312bf3365802981f506ecba698dc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 8 Aug 2015 00:06:03 +0600 Subject: [PATCH 25/73] [dcn] Improve --- youtube_dl/extractor/__init__.py | 2 +- youtube_dl/extractor/dcn.py | 78 ++++++++++++++++++++++---------- 2 files changed, 54 insertions(+), 26 deletions(-) diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index eb8ef1fe3..922d9b3d8 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -118,7 +118,7 @@ from .dailymotion import ( ) from .daum import DaumIE from .dbtv import DBTVIE -from .dcn import DcnIE +from .dcn import DCNIE from .dctp import DctpTvIE from .deezer import DeezerPlaylistIE from .dfb import DFBIE diff --git a/youtube_dl/extractor/dcn.py b/youtube_dl/extractor/dcn.py index 22ff35b56..b98a6c032 100644 --- a/youtube_dl/extractor/dcn.py +++ b/youtube_dl/extractor/dcn.py @@ -2,22 +2,30 @@ from __future__ import unicode_literals from .common import InfoExtractor -from ..compat import compat_urllib_request -from ..utils import int_or_none +from ..compat import ( + compat_urllib_parse, + compat_urllib_request, +) +from ..utils import ( + int_or_none, + parse_iso8601, +) -class DcnIE(InfoExtractor): +class DCNIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?dcndigital\.ae/(?:#/)?(?:video/.+|show/\d+/.+?)/(?P\d+)/?' _TEST = { 'url': 'http://www.dcndigital.ae/#/show/199074/%D8%B1%D8%AD%D9%84%D8%A9-%D8%A7%D9%84%D8%B9%D9%85%D8%B1-%D8%A7%D9%84%D8%AD%D9%84%D9%82%D8%A9-1/17375/6887', 'info_dict': { 'id': '17375', - 'ext': 'm3u8', + 'ext': 'mp4', 'title': 'رحلة العمر : الحلقة 1', - 'description': 'في هذه الحلقة من برنامج رحلة العمر يقدّم الدكتور عمر عبد الكافي تبسيطاً لمناسك الحج والعمرة ويجيب مباشرة على استفسارات حجاج بيت الله الحرام بخصوص مناسك الحج والعمرة\n1', - 'thumbnail': 'http://admin.mangomolo.com/analytics/uploads/71/images/media/2/2cefc09d7bec80afa754682f40e49503.jpg', - 'duration': 2041 + 'description': 'md5:0156e935d870acb8ef0a66d24070c6d6', + 'thumbnail': 're:^https?://.*\.jpg$', + 'duration': 2041, + 'timestamp': 1227504126, + 'upload_date': '20081124', }, 'params': { # m3u8 download @@ -27,30 +35,50 @@ class DcnIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) + request = compat_urllib_request.Request( - 'http://admin.mangomolo.com/analytics/index.php/plus/video?id=' + video_id, - headers={'Origin': 'http://www.dcndigital.ae'} - ) - json_data = self._download_json(request, video_id) - title = json_data['title_ar'] - thumbnail = 'http://admin.mangomolo.com/analytics/' + json_data.get('img') - duration = int_or_none(json_data.get('duration')) - description = json_data.get('description_ar') + 'http://admin.mangomolo.com/analytics/index.php/plus/video?id=%s' % video_id, + headers={'Origin': 'http://www.dcndigital.ae'}) + + video = self._download_json(request, video_id) + title = video.get('title_en') or video['title_ar'] + webpage = self._download_webpage( - 'http://admin.mangomolo.com/analytics/index.php/customers/embed/video?id=' + json_data['id'] + '&user_id=' + json_data['user_id'] + '&countries=Q0M=&w=100%&h=100%&filter=DENY&signature=' + json_data['signature'], - video_id - ) - m3u8_url = self._html_search_regex( - r'file:\s*"([^"]+)', - webpage, - 'm3u8_url' - ) - formats = self._extract_m3u8_formats(m3u8_url, video_id) + 'http://admin.mangomolo.com/analytics/index.php/customers/embed/video?' + + compat_urllib_parse.urlencode({ + 'id': video['id'], + 'user_id': video['user_id'], + 'signature': video['signature'], + 'countries': 'Q0M=', + 'filter': 'DENY', + }), video_id) + + m3u8_url = self._html_search_regex(r'file:\s*"([^"]+)', webpage, 'm3u8 url') + formats = self._extract_m3u8_formats( + m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native', m3u8_id='hls') + + rtsp_url = self._search_regex( + r']+href="(rtsp://[^"]+)"', webpage, 'rtsp url', fatal=False) + if rtsp_url: + formats.append({ + 'url': rtsp_url, + 'format_id': 'rtsp', + }) + + self._sort_formats(formats) + + img = video.get('img') + thumbnail = 'http://admin.mangomolo.com/analytics/%s' % img if img else None + duration = int_or_none(video.get('duration')) + description = video.get('description_en') or video.get('description_ar') + timestamp = parse_iso8601(video.get('create_time') or video.get('update_time'), ' ') + return { 'id': video_id, 'title': title, + 'description': description, 'thumbnail': thumbnail, 'duration': duration, - 'description': description, + 'timestamp': timestamp, 'formats': formats, } From 4a7434d0b09e14b773c2d278c8299efa6225b84e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 8 Aug 2015 00:19:40 +0600 Subject: [PATCH 26/73] [dcn] Simplify _VALID_URL --- youtube_dl/extractor/dcn.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/dcn.py b/youtube_dl/extractor/dcn.py index b98a6c032..82261e25c 100644 --- a/youtube_dl/extractor/dcn.py +++ b/youtube_dl/extractor/dcn.py @@ -13,7 +13,7 @@ from ..utils import ( class DCNIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?dcndigital\.ae/(?:#/)?(?:video/.+|show/\d+/.+?)/(?P\d+)/?' + _VALID_URL = r'https?://(?:www\.)?dcndigital\.ae/(?:#/)?(?:video/.+|show/\d+/.+?)/(?P\d+)' _TEST = { 'url': 'http://www.dcndigital.ae/#/show/199074/%D8%B1%D8%AD%D9%84%D8%A9-%D8%A7%D9%84%D8%B9%D9%85%D8%B1-%D8%A7%D9%84%D8%AD%D9%84%D9%82%D8%A9-1/17375/6887', 'info_dict': From fd5d8270dcd6d8baada3390a4a1cae5bdbcb6da4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 8 Aug 2015 01:10:41 +0600 Subject: [PATCH 27/73] [clipfish] Fix extraction, minimize requests, get rid of drm hds, extract m3u8 and more metadata --- youtube_dl/extractor/clipfish.py | 56 ++++++++++++++++++++------------ 1 file changed, 35 insertions(+), 21 deletions(-) diff --git a/youtube_dl/extractor/clipfish.py b/youtube_dl/extractor/clipfish.py index 09dfaac60..7af903571 100644 --- a/youtube_dl/extractor/clipfish.py +++ b/youtube_dl/extractor/clipfish.py @@ -1,18 +1,19 @@ from __future__ import unicode_literals +import re + from .common import InfoExtractor from ..utils import ( - ExtractorError, + determine_ext, int_or_none, js_to_json, - determine_ext, + parse_iso8601, + remove_end, ) class ClipfishIE(InfoExtractor): - IE_NAME = 'clipfish' - - _VALID_URL = r'^https?://(?:www\.)?clipfish\.de/.*?/video/(?P[0-9]+)/' + _VALID_URL = r'https?://(?:www\.)?clipfish\.de/(?:[^/]+/)+video/(?P[0-9]+)' _TEST = { 'url': 'http://www.clipfish.de/special/game-trailer/video/3966754/fifa-14-e3-2013-trailer/', 'md5': '79bc922f3e8a9097b3d68a93780fd475', @@ -20,35 +21,48 @@ class ClipfishIE(InfoExtractor): 'id': '3966754', 'ext': 'mp4', 'title': 'FIFA 14 - E3 2013 Trailer', + 'timestamp': 1370938118, + 'upload_date': '20130611', 'duration': 82, } } def _real_extract(self, url): video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - video_info = self._parse_json( - js_to_json(self._html_search_regex('var videoObject = ({[^}]+?})', webpage, 'videoObject')), - video_id - ) - info_url = self._parse_json( - js_to_json(self._html_search_regex('var globalFlashvars = ({[^}]+?})', webpage, 'globalFlashvars')), - video_id - )['data'] - doc = self._download_xml( - info_url, video_id, note='Downloading info page') - title = doc.find('title').text - video_url = doc.find('filename').text - thumbnail = doc.find('imageurl').text - duration = int_or_none(video_info['length']) - formats = [{'url': video_info['videourl']},{'url': video_url}] + webpage = self._download_webpage(url, video_id) + + video_info = self._parse_json( + js_to_json(self._html_search_regex( + '(?s)videoObject\s*=\s*({.+?});', webpage, 'video object')), + video_id) + + formats = [] + for video_url in re.findall(r'var\s+videourl\s*=\s*"([^"]+)"', webpage): + ext = determine_ext(video_url) + if ext == 'm3u8': + formats.append({ + 'url': video_url.replace('de.hls.fra.clipfish.de', 'hls.fra.clipfish.de'), + 'ext': 'mp4', + 'format_id': 'hls', + }) + else: + formats.append({ + 'url': video_url, + 'format_id': ext, + }) self._sort_formats(formats) + title = remove_end(self._og_search_title(webpage), ' - Video') + thumbnail = self._og_search_thumbnail(webpage) + duration = int_or_none(video_info.get('length')) + timestamp = parse_iso8601(self._html_search_meta('uploadDate', webpage, 'upload date')) + return { 'id': video_id, 'title': title, 'formats': formats, 'thumbnail': thumbnail, 'duration': duration, + 'timestamp': timestamp, } From 8a37aa1517ccc474b3e2831b77e48534cb8ed47c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 8 Aug 2015 01:55:59 +0600 Subject: [PATCH 28/73] [extractor/generic] Expand ooyala regex (Closes #6485) --- youtube_dl/extractor/generic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 6df89f814..649c0bce6 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -1320,7 +1320,7 @@ class GenericIE(InfoExtractor): return self.url_result(mobj.group('url')) # Look for Ooyala videos - mobj = (re.search(r'player\.ooyala\.com/[^"?]+\?[^"]*?(?:embedCode|ec)=(?P[^"&]+)', webpage) or + mobj = (re.search(r'player\.ooyala\.com/[^"?]+[?#][^"]*?(?:embedCode|ec)=(?P[^"&]+)', webpage) or re.search(r'OO\.Player\.create\([\'"].*?[\'"],\s*[\'"](?P.{32})[\'"]', webpage) or re.search(r'SBN\.VideoLinkset\.ooyala\([\'"](?P.{32})[\'"]\)', webpage) or re.search(r'data-ooyala-video-id\s*=\s*[\'"](?P.{32})[\'"]', webpage)) From bf94d763ba73e09fd77d25110c7219254b63c786 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 8 Aug 2015 02:00:49 +0600 Subject: [PATCH 29/73] [extractor/generic] Add test for #6485 --- youtube_dl/extractor/generic.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 649c0bce6..469909a51 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -236,6 +236,19 @@ class GenericIE(InfoExtractor): }, 'add_ie': ['Ooyala'], }, + { + # ooyala video embedded with http://player.ooyala.com/iframe.js + 'url': 'http://www.macrumors.com/2015/07/24/steve-jobs-the-man-in-the-machine-first-trailer/', + 'info_dict': { + 'id': 'p0MGJndjoG5SOKqO_hZJuZFPB-Tr5VgB', + 'ext': 'mp4', + 'title': '"Steve Jobs: Man in the Machine" trailer', + 'description': 'The first trailer for the Alex Gibney documentary "Steve Jobs: Man in the Machine."', + }, + 'params': { + 'skip_download': True, + }, + }, # multiple ooyala embeds on SBN network websites { 'url': 'http://www.sbnation.com/college-football-recruiting/2015/2/3/7970291/national-signing-day-rationalizations-itll-be-ok-itll-be-ok', From e0ac521438218e978b9c4bbcd92cfc2d5fef79cb Mon Sep 17 00:00:00 2001 From: vijayanand nandam Date: Thu, 6 Aug 2015 22:42:58 +0530 Subject: [PATCH 30/73] adding support for axel download manager --- youtube_dl/downloader/external.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/youtube_dl/downloader/external.py b/youtube_dl/downloader/external.py index 1d5cc9904..30699934b 100644 --- a/youtube_dl/downloader/external.py +++ b/youtube_dl/downloader/external.py @@ -83,6 +83,16 @@ class CurlFD(ExternalFD): return cmd +class AxelFD(ExternalFD): + def _make_cmd(self, tmpfilename, info_dict): + cmd = [self.exe, '-o', tmpfilename] + for key, val in info_dict['http_headers'].items(): + cmd += ['-H', '%s: %s' % (key, val)] + cmd += self._configuration_args() + cmd += ['--', info_dict['url']] + return cmd + + class WgetFD(ExternalFD): def _make_cmd(self, tmpfilename, info_dict): cmd = [self.exe, '-O', tmpfilename, '-nv', '--no-cookies'] From 5b0c40da24b5ddb789428de731e02ac8759a363c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 8 Aug 2015 03:36:29 +0600 Subject: [PATCH 31/73] [extractor/common] Expand meta regex --- youtube_dl/extractor/common.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index dc5080504..507ea5ec0 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -636,7 +636,7 @@ class InfoExtractor(object): @staticmethod def _meta_regex(prop): return r'''(?isx)]+(?:itemprop|name|property)=(["\']?)%s\1) + (?=[^>]+(?:itemprop|name|property|id)=(["\']?)%s\1) [^>]+?content=(["\'])(?P.*?)\2''' % re.escape(prop) def _og_search_property(self, prop, html, name=None, **kargs): From 3550821fb4ca2f0e47542a7fa16b6543b06df724 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 8 Aug 2015 03:38:55 +0600 Subject: [PATCH 32/73] [periscope] Add extractor (Closes #5850, closes #6459) --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/periscope.py | 66 +++++++++++++++++++++++++++++++ 2 files changed, 67 insertions(+) create mode 100644 youtube_dl/extractor/periscope.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 922d9b3d8..bd86a5be2 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -432,6 +432,7 @@ from .orf import ( from .parliamentliveuk import ParliamentLiveUKIE from .patreon import PatreonIE from .pbs import PBSIE +from .periscope import PeriscopeIE from .philharmoniedeparis import PhilharmonieDeParisIE from .phoenix import PhoenixIE from .photobucket import PhotobucketIE diff --git a/youtube_dl/extractor/periscope.py b/youtube_dl/extractor/periscope.py new file mode 100644 index 000000000..5219e1a75 --- /dev/null +++ b/youtube_dl/extractor/periscope.py @@ -0,0 +1,66 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( + parse_iso8601, + unescapeHTML, +) + + +class PeriscopeIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?periscope\.tv/w/(?P[^/?#]+)' + _TEST = { + 'url': 'https://www.periscope.tv/w/aJUQnjY3MjA3ODF8NTYxMDIyMDl2zCg2pECBgwTqRpQuQD352EMPTKQjT4uqlM3cgWFA-g==', + 'md5': '65b57957972e503fcbbaeed8f4fa04ca', + 'info_dict': { + 'id': '56102209', + 'ext': 'mp4', + 'title': 'Bec Boop - 🚠✈️🇬🇧 Fly above #London in Emirates Air Line cable car at night 🇬🇧✈️🚠 #BoopScope 🎀💗', + 'timestamp': 1438978559, + 'upload_date': '20150807', + 'uploader': 'Bec Boop', + 'uploader_id': '1465763', + }, + 'skip': 'Expires in 24 hours', + } + + def _real_extract(self, url): + video_id = self._match_id(url) + + replay = self._download_json( + 'https://api.periscope.tv/api/v2/getAccessPublic?token=%s' % video_id, video_id) + + video_url = replay['replay_url'] + + webpage = self._download_webpage(url, video_id) + + broadcast_data = self._parse_json( + unescapeHTML(self._html_search_meta( + 'broadcast-data', webpage, 'broadcast data', fatal=True)), + video_id) + + broadcast = broadcast_data['broadcast'] + status = broadcast['status'] + + uploader = broadcast.get('user_display_name') or broadcast_data.get('user', {}).get('display_name') + uploader_id = broadcast.get('user_id') or broadcast_data.get('user', {}).get('id') + + title = '%s - %s' % (uploader, status) if uploader else status + timestamp = parse_iso8601(broadcast.get('created_at')) + + thumbnails = [{ + 'url': broadcast[image], + } for image in ('image_url', 'image_url_small') if broadcast.get(image)] + + return { + 'id': broadcast.get('id') or video_id, + 'url': video_url, + 'ext': 'mp4', + 'protocol': 'm3u8_native', + 'title': title, + 'timestamp': timestamp, + 'uploader': uploader, + 'uploader_id': uploader_id, + 'thumbnails': thumbnails, + } From 621d6a9516e0f9cd8c45e12904f4d4b7615e7fb8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 8 Aug 2015 04:00:52 +0600 Subject: [PATCH 33/73] [periscope] Switch to API for broadcast data --- youtube_dl/extractor/periscope.py | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/youtube_dl/extractor/periscope.py b/youtube_dl/extractor/periscope.py index 5219e1a75..11648a511 100644 --- a/youtube_dl/extractor/periscope.py +++ b/youtube_dl/extractor/periscope.py @@ -25,21 +25,17 @@ class PeriscopeIE(InfoExtractor): 'skip': 'Expires in 24 hours', } + def _call_api(self, method, token): + return self._download_json( + 'https://api.periscope.tv/api/v2/%s?token=%s' % (method, token), token) + def _real_extract(self, url): - video_id = self._match_id(url) - - replay = self._download_json( - 'https://api.periscope.tv/api/v2/getAccessPublic?token=%s' % video_id, video_id) + token = self._match_id(url) + replay = self._call_api('getAccessPublic', token) video_url = replay['replay_url'] - webpage = self._download_webpage(url, video_id) - - broadcast_data = self._parse_json( - unescapeHTML(self._html_search_meta( - 'broadcast-data', webpage, 'broadcast data', fatal=True)), - video_id) - + broadcast_data = self._call_api('getBroadcastPublic', token) broadcast = broadcast_data['broadcast'] status = broadcast['status'] @@ -54,7 +50,7 @@ class PeriscopeIE(InfoExtractor): } for image in ('image_url', 'image_url_small') if broadcast.get(image)] return { - 'id': broadcast.get('id') or video_id, + 'id': broadcast.get('id') or token, 'url': video_url, 'ext': 'mp4', 'protocol': 'm3u8_native', From 1e83741c9a5d67e8bbe65510d41b558361496fe8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 8 Aug 2015 05:33:53 +0600 Subject: [PATCH 34/73] [periscope] Add support for running streams --- youtube_dl/extractor/periscope.py | 34 +++++++++++++++++++++++-------- 1 file changed, 25 insertions(+), 9 deletions(-) diff --git a/youtube_dl/extractor/periscope.py b/youtube_dl/extractor/periscope.py index 11648a511..de53b752d 100644 --- a/youtube_dl/extractor/periscope.py +++ b/youtube_dl/extractor/periscope.py @@ -2,13 +2,15 @@ from __future__ import unicode_literals from .common import InfoExtractor -from ..utils import ( - parse_iso8601, - unescapeHTML, +from ..compat import ( + compat_urllib_parse, + compat_urllib_request, ) +from ..utils import parse_iso8601 class PeriscopeIE(InfoExtractor): + IE_DESC = 'Periscope' _VALID_URL = r'https?://(?:www\.)?periscope\.tv/w/(?P[^/?#]+)' _TEST = { 'url': 'https://www.periscope.tv/w/aJUQnjY3MjA3ODF8NTYxMDIyMDl2zCg2pECBgwTqRpQuQD352EMPTKQjT4uqlM3cgWFA-g==', @@ -32,9 +34,6 @@ class PeriscopeIE(InfoExtractor): def _real_extract(self, url): token = self._match_id(url) - replay = self._call_api('getAccessPublic', token) - video_url = replay['replay_url'] - broadcast_data = self._call_api('getBroadcastPublic', token) broadcast = broadcast_data['broadcast'] status = broadcast['status'] @@ -43,20 +42,37 @@ class PeriscopeIE(InfoExtractor): uploader_id = broadcast.get('user_id') or broadcast_data.get('user', {}).get('id') title = '%s - %s' % (uploader, status) if uploader else status + state = broadcast.get('state').lower() + if state == 'running': + title = self._live_title(title) timestamp = parse_iso8601(broadcast.get('created_at')) thumbnails = [{ 'url': broadcast[image], } for image in ('image_url', 'image_url_small') if broadcast.get(image)] + stream = self._call_api('getAccessPublic', token) + + formats = [] + for format_id in ('replay', 'rtmp', 'hls', 'https_hls'): + video_url = stream.get(format_id + '_url') + if not video_url: + continue + f = { + 'url': video_url, + 'ext': 'flv' if format_id == 'rtmp' else 'mp4', + } + if format_id != 'rtmp': + f['protocol'] = 'm3u8_native' if state == 'ended' else 'm3u8' + formats.append(f) + self._sort_formats(formats) + return { 'id': broadcast.get('id') or token, - 'url': video_url, - 'ext': 'mp4', - 'protocol': 'm3u8_native', 'title': title, 'timestamp': timestamp, 'uploader': uploader, 'uploader_id': uploader_id, 'thumbnails': thumbnails, + 'formats': formats, } From 428e4e4a850df81031e8267dddf759da605639e3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 8 Aug 2015 05:37:38 +0600 Subject: [PATCH 35/73] [quickscope] Add extractor --- youtube_dl/extractor/__init__.py | 5 ++++- youtube_dl/extractor/periscope.py | 21 +++++++++++++++++++++ 2 files changed, 25 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index bd86a5be2..e38e77a27 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -432,7 +432,10 @@ from .orf import ( from .parliamentliveuk import ParliamentLiveUKIE from .patreon import PatreonIE from .pbs import PBSIE -from .periscope import PeriscopeIE +from .periscope import ( + PeriscopeIE, + QuickscopeIE, +) from .philharmoniedeparis import PhilharmonieDeParisIE from .phoenix import PhoenixIE from .photobucket import PhotobucketIE diff --git a/youtube_dl/extractor/periscope.py b/youtube_dl/extractor/periscope.py index de53b752d..578b53a24 100644 --- a/youtube_dl/extractor/periscope.py +++ b/youtube_dl/extractor/periscope.py @@ -76,3 +76,24 @@ class PeriscopeIE(InfoExtractor): 'thumbnails': thumbnails, 'formats': formats, } + + +class QuickscopeIE(InfoExtractor): + IE_DESC = 'Quisck Scope' + _VALID_URL = r'https?://watchonperiscope\.com/broadcast/(?P\d+)' + _TEST = { + 'url': 'https://watchonperiscope.com/broadcast/56180087', + 'only_matching': True, + } + + def _real_extract(self, url): + broadcast_id = self._match_id(url) + request = compat_urllib_request.Request( + 'https://watchonperiscope.com/api/accessChannel', compat_urllib_parse.urlencode({ + 'broadcast_id': broadcast_id, + 'entry_ticket': '', + 'from_push': 'false', + 'uses_sessions': 'true', + }).encode('utf-8')) + return self.url_result( + self._download_json(request, broadcast_id)['share_url'], 'Periscope') From b2f82948ee5eadc483c01dc589b82426bb32ba68 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 8 Aug 2015 05:40:41 +0600 Subject: [PATCH 36/73] [quickscope] Fix typo --- youtube_dl/extractor/periscope.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/periscope.py b/youtube_dl/extractor/periscope.py index 578b53a24..8ad936758 100644 --- a/youtube_dl/extractor/periscope.py +++ b/youtube_dl/extractor/periscope.py @@ -79,7 +79,7 @@ class PeriscopeIE(InfoExtractor): class QuickscopeIE(InfoExtractor): - IE_DESC = 'Quisck Scope' + IE_DESC = 'Quick Scope' _VALID_URL = r'https?://watchonperiscope\.com/broadcast/(?P\d+)' _TEST = { 'url': 'https://watchonperiscope.com/broadcast/56180087', From 154655a85ae8b7740aa9fe7821544050fd65641b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 8 Aug 2015 19:21:05 +0600 Subject: [PATCH 37/73] [downloader/external] Respect --no-check-certificate for wget --- youtube_dl/downloader/external.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/youtube_dl/downloader/external.py b/youtube_dl/downloader/external.py index 30699934b..07ce59f7d 100644 --- a/youtube_dl/downloader/external.py +++ b/youtube_dl/downloader/external.py @@ -51,6 +51,9 @@ class ExternalFD(FileDownloader): return [] return [command_option, source_address] + def _no_check_certificate(self, command_option): + return [command_option] if self.params.get('nocheckcertificate', False) else [] + def _configuration_args(self, default=[]): ex_args = self.params.get('external_downloader_args') if ex_args is None: @@ -99,6 +102,7 @@ class WgetFD(ExternalFD): for key, val in info_dict['http_headers'].items(): cmd += ['--header', '%s: %s' % (key, val)] cmd += self._source_address('--bind-address') + cmd += self._no_check_certificate('--no-check-certificate') cmd += self._configuration_args() cmd += ['--', info_dict['url']] return cmd From b465083f45e63fe8aeb0255b5cea7dfbf0770a2b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 8 Aug 2015 21:27:10 +0600 Subject: [PATCH 38/73] [sexykarma] Fix test --- youtube_dl/extractor/sexykarma.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/sexykarma.py b/youtube_dl/extractor/sexykarma.py index 6446d26dc..e33483674 100644 --- a/youtube_dl/extractor/sexykarma.py +++ b/youtube_dl/extractor/sexykarma.py @@ -29,6 +29,7 @@ class SexyKarmaIE(InfoExtractor): 'view_count': int, 'comment_count': int, 'categories': list, + 'age_limit': 18, } }, { 'url': 'http://www.sexykarma.com/gonewild/video/pot-pixie-tribute-8Id6EZPbuHf.html', From b61b7787cbef408154695bbb9f5c3d29a70fdd38 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 8 Aug 2015 21:30:57 +0600 Subject: [PATCH 39/73] [91porn] Extract age limit --- youtube_dl/extractor/porn91.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/youtube_dl/extractor/porn91.py b/youtube_dl/extractor/porn91.py index 72d1b2718..3e15533e9 100644 --- a/youtube_dl/extractor/porn91.py +++ b/youtube_dl/extractor/porn91.py @@ -22,6 +22,7 @@ class Porn91IE(InfoExtractor): 'title': '18岁大一漂亮学妹,水嫩性感,再爽一次!', 'ext': 'mp4', 'duration': 431, + 'age_limit': 18, } } @@ -68,4 +69,5 @@ class Porn91IE(InfoExtractor): 'url': video_url, 'duration': duration, 'comment_count': comment_count, + 'age_limit': self._rta_search(webpage), } From 8e2b1be12791b4e62c463562b570661e7b2c5852 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 8 Aug 2015 21:42:50 +0600 Subject: [PATCH 40/73] [test/helper] Make age_limit checkable field --- test/helper.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/helper.py b/test/helper.py index c8b34654d..cb6eec8d9 100644 --- a/test/helper.py +++ b/test/helper.py @@ -160,7 +160,7 @@ def expect_info_dict(self, got_dict, expected_dict): # Are checkable fields missing from the test case definition? test_info_dict = dict((key, value if not isinstance(value, compat_str) or len(value) < 250 else 'md5:' + md5(value)) for key, value in got_dict.items() - if value and key in ('id', 'title', 'description', 'uploader', 'upload_date', 'timestamp', 'uploader_id', 'location')) + if value and key in ('id', 'title', 'description', 'uploader', 'upload_date', 'timestamp', 'uploader_id', 'location', 'age_limit')) missing_keys = set(test_info_dict.keys()) - set(expected_dict.keys()) if missing_keys: def _repr(v): From 18c3281f9e1e32e00c778b149137fc91accb3b1e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 8 Aug 2015 21:43:20 +0600 Subject: [PATCH 41/73] [24video] Fix test --- youtube_dl/extractor/fourtube.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/fourtube.py b/youtube_dl/extractor/fourtube.py index b2284ab01..3bb4f6239 100644 --- a/youtube_dl/extractor/fourtube.py +++ b/youtube_dl/extractor/fourtube.py @@ -32,6 +32,7 @@ class FourTubeIE(InfoExtractor): 'view_count': int, 'like_count': int, 'categories': list, + 'age_limit': 18, } } From 464e792496665b2e3dcabf5df43a45604673730a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 8 Aug 2015 21:51:21 +0600 Subject: [PATCH 42/73] [vpro] Override npo IE_NAME --- youtube_dl/extractor/npo.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/npo.py b/youtube_dl/extractor/npo.py index 0c2d02c10..eb12fb810 100644 --- a/youtube_dl/extractor/npo.py +++ b/youtube_dl/extractor/npo.py @@ -407,6 +407,7 @@ class NPORadioFragmentIE(InfoExtractor): class VPROIE(NPOIE): + IE_NAME = 'vpro' _VALID_URL = r'https?://(?:www\.)?(?:tegenlicht\.)?vpro\.nl/(?:[^/]+/){2,}(?P[^/]+)\.html' _TESTS = [ From d7bb8884afc8651b0ad86046dcd56a5330c98dd5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 8 Aug 2015 21:58:24 +0600 Subject: [PATCH 43/73] [break] Add age_limit to test --- youtube_dl/extractor/breakcom.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/breakcom.py b/youtube_dl/extractor/breakcom.py index 809287d14..aa08051b1 100644 --- a/youtube_dl/extractor/breakcom.py +++ b/youtube_dl/extractor/breakcom.py @@ -18,6 +18,7 @@ class BreakIE(InfoExtractor): 'id': '2468056', 'ext': 'mp4', 'title': 'When Girls Act Like D-Bags', + 'age_limit': 13, } }, { 'url': 'http://www.break.com/video/ugc/baby-flex-2773063', From 9f2e7c2f34c48942a2a3e55532dd0d0ef8ed4d98 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 8 Aug 2015 22:04:48 +0600 Subject: [PATCH 44/73] [ok] Add age_limit to tests --- youtube_dl/extractor/odnoklassniki.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/youtube_dl/extractor/odnoklassniki.py b/youtube_dl/extractor/odnoklassniki.py index 215ffe87b..e5fd1ba04 100644 --- a/youtube_dl/extractor/odnoklassniki.py +++ b/youtube_dl/extractor/odnoklassniki.py @@ -25,6 +25,7 @@ class OdnoklassnikiIE(InfoExtractor): 'uploader_id': '330537914540', 'uploader': 'Виталий Добровольский', 'like_count': int, + 'age_limit': 0, }, }, { # metadataUrl @@ -38,6 +39,7 @@ class OdnoklassnikiIE(InfoExtractor): 'uploader_id': '534380003155', 'uploader': 'Андрей Мещанинов', 'like_count': int, + 'age_limit': 0, }, }, { 'url': 'http://ok.ru/web-api/video/moviePlayer/20079905452', From 887e9bc7b561f9b2b97dec8f99f9c04392d95d40 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 8 Aug 2015 22:08:54 +0600 Subject: [PATCH 45/73] [ok] Update tests --- youtube_dl/extractor/odnoklassniki.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/odnoklassniki.py b/youtube_dl/extractor/odnoklassniki.py index e5fd1ba04..003d27de7 100644 --- a/youtube_dl/extractor/odnoklassniki.py +++ b/youtube_dl/extractor/odnoklassniki.py @@ -16,12 +16,13 @@ class OdnoklassnikiIE(InfoExtractor): _TESTS = [{ # metadata in JSON 'url': 'http://ok.ru/video/20079905452', - 'md5': '8e24ad2da6f387948e7a7d44eb8668fe', + 'md5': '6ba728d85d60aa2e6dd37c9e70fdc6bc', 'info_dict': { 'id': '20079905452', 'ext': 'mp4', 'title': 'Культура меняет нас (прекрасный ролик!))', 'duration': 100, + 'upload_date': '20141207', 'uploader_id': '330537914540', 'uploader': 'Виталий Добровольский', 'like_count': int, @@ -36,8 +37,9 @@ class OdnoklassnikiIE(InfoExtractor): 'ext': 'mp4', 'title': 'Девушка без комплексов ...', 'duration': 191, + 'upload_date': '20150518', 'uploader_id': '534380003155', - 'uploader': 'Андрей Мещанинов', + 'uploader': '☭ Андрей Мещанинов ☭', 'like_count': int, 'age_limit': 0, }, From c8d1be772daa496759bd85cb95c4ec799294c7f8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 8 Aug 2015 22:11:06 +0600 Subject: [PATCH 46/73] [rutube] Add age_limit to test --- youtube_dl/extractor/rutube.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/rutube.py b/youtube_dl/extractor/rutube.py index 5b1c3577a..d94dc7399 100644 --- a/youtube_dl/extractor/rutube.py +++ b/youtube_dl/extractor/rutube.py @@ -30,6 +30,7 @@ class RutubeIE(InfoExtractor): 'uploader': 'NTDRussian', 'uploader_id': '29790', 'upload_date': '20131016', + 'age_limit': 0, }, 'params': { # It requires ffmpeg (m3u8 download) From 08df685fe7764ef9f7dc271075340e4effc5e621 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 9 Aug 2015 08:51:37 +0600 Subject: [PATCH 47/73] [videolectures] Fix _VALID_URL for test_no_duplicates to pass --- youtube_dl/extractor/videolecturesnet.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/videolecturesnet.py b/youtube_dl/extractor/videolecturesnet.py index 24584dc80..ef2da5632 100644 --- a/youtube_dl/extractor/videolecturesnet.py +++ b/youtube_dl/extractor/videolecturesnet.py @@ -12,7 +12,7 @@ from ..utils import ( class VideoLecturesNetIE(InfoExtractor): - _VALID_URL = r'http://(?:www\.)?videolectures\.net/(?P[^/#?]+)(?:/?[#?].*)?$' + _VALID_URL = r'http://(?:www\.)?videolectures\.net/(?P[^/#?]+)/*(?:[#?].*)?$' IE_NAME = 'videolectures.net' _TEST = { From 12bb392a0ff8adbde2ced75b0c4976d0aabc7f4f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 9 Aug 2015 17:10:40 +0600 Subject: [PATCH 48/73] [vimeo] Fix password protected videos (Closes #6507) --- youtube_dl/extractor/vimeo.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py index 10d6745af..4c4e3c72a 100644 --- a/youtube_dl/extractor/vimeo.py +++ b/youtube_dl/extractor/vimeo.py @@ -203,7 +203,7 @@ class VimeoIE(VimeoBaseInfoExtractor): url = url.replace('http://', 'https://') password_request = compat_urllib_request.Request(url + '/password', data) password_request.add_header('Content-Type', 'application/x-www-form-urlencoded') - password_request.add_header('Cookie', 'xsrft=%s' % token) + password_request.add_header('Referer', url) return self._download_webpage( password_request, video_id, 'Verifying the password', 'Wrong password') From 8d6765cf48138cc44fdbaee4e8c7a199ae348bb5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 9 Aug 2015 19:07:18 +0600 Subject: [PATCH 49/73] [extractor/generic] Add generic support for xspf playist extraction --- youtube_dl/extractor/common.py | 41 ++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index def6caa0d..e201ea6db 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -39,6 +39,8 @@ from ..utils import ( sanitize_filename, unescapeHTML, url_basename, + xpath_text, + xpath_with_ns, ) @@ -1142,6 +1144,45 @@ class InfoExtractor(object): }) return subtitles + def _extract_xspf_playlist(self, playlist_url, playlist_id): + playlist = self._download_xml( + playlist_url, playlist_id, 'Downloading xpsf playlist', + 'Unable to download xspf manifest') + + NS_MAP = { + 'xspf': 'http://xspf.org/ns/0/', + 's1': 'http://static.streamone.nl/player/ns/0', + } + + entries = [] + for track in playlist.findall(xpath_with_ns('./xspf:trackList/xspf:track', NS_MAP)): + title = xpath_text( + track, xpath_with_ns('./xspf:title', NS_MAP), 'title') + description = xpath_text( + track, xpath_with_ns('./xspf:annotation', NS_MAP), 'description') + thumbnail = xpath_text( + track, xpath_with_ns('./xspf:image', NS_MAP), 'thumbnail') + duration = float_or_none( + xpath_text(track, xpath_with_ns('./xspf:duration', NS_MAP), 'duration'), 1000) + + formats = [{ + 'url': location.text, + 'format_id': location.get(xpath_with_ns('s1:label', NS_MAP)), + 'width': int_or_none(location.get(xpath_with_ns('s1:width', NS_MAP))), + 'height': int_or_none(location.get(xpath_with_ns('s1:height', NS_MAP))), + } for location in track.findall(xpath_with_ns('./xspf:location', NS_MAP))] + self._sort_formats(formats) + + entries.append({ + 'id': playlist_id, + 'title': title, + 'description': description, + 'thumbnail': thumbnail, + 'duration': duration, + 'formats': formats, + }) + return entries + def _live_title(self, name): """ Generate the title for a live video """ now = datetime.datetime.now() From e0b9d78fab76e2c2819c8a9a7512ad4533319b72 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 9 Aug 2015 19:09:50 +0600 Subject: [PATCH 50/73] [extractor/common] Clarify playlists can have description field --- youtube_dl/extractor/common.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index e201ea6db..9b4775e0a 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -204,8 +204,8 @@ class InfoExtractor(object): There must be a key "entries", which is a list, an iterable, or a PagedList object, each element of which is a valid dictionary by this specification. - Additionally, playlists can have "title" and "id" attributes with the same - semantics as videos (see above). + Additionally, playlists can have "title", "description" and "id" attributes + with the same semantics as videos (see above). _type "multi_video" indicates that there are multiple videos that From 3a30508b943c044e5f684b703ff58ac352686f63 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 9 Aug 2015 19:11:23 +0600 Subject: [PATCH 51/73] [telegraaf] Add extractor (Closes #6492) --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/telegraaf.py | 35 +++++++++++++++++++++++++++++++ 2 files changed, 36 insertions(+) create mode 100644 youtube_dl/extractor/telegraaf.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index e38e77a27..dad3ec87f 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -596,6 +596,7 @@ from .techtalks import TechTalksIE from .ted import TEDIE from .telebruxelles import TeleBruxellesIE from .telecinco import TelecincoIE +from .telegraaf import TelegraafIE from .telemb import TeleMBIE from .teletask import TeleTaskIE from .tenplay import TenPlayIE diff --git a/youtube_dl/extractor/telegraaf.py b/youtube_dl/extractor/telegraaf.py new file mode 100644 index 000000000..6f8333cfc --- /dev/null +++ b/youtube_dl/extractor/telegraaf.py @@ -0,0 +1,35 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import remove_end + + +class TelegraafIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?telegraaf\.nl/tv/(?:[^/]+/)+(?P\d+)/[^/]+\.html' + _TEST = { + 'url': 'http://www.telegraaf.nl/tv/nieuws/binnenland/24353229/__Tikibad_ontruimd_wegens_brand__.html', + 'md5': '83245a9779bcc4a24454bfd53c65b6dc', + 'info_dict': { + 'id': '24353229', + 'ext': 'mp4', + 'title': 'Tikibad ontruimd wegens brand', + 'description': 'md5:05ca046ff47b931f9b04855015e163a4', + 'thumbnail': 're:^https?://.*\.jpg$', + 'duration': 33, + }, + } + + def _real_extract(self, url): + playlist_id = self._match_id(url) + + webpage = self._download_webpage(url, playlist_id) + + playlist_url = self._search_regex( + r"iframe\.loadPlayer\('([^']+)'", webpage, 'player') + + entries = self._extract_xspf_playlist(playlist_url, playlist_id) + title = remove_end(self._og_search_title(webpage), ' - VIDEO') + description = self._og_search_description(webpage) + + return self.playlist_result(entries, playlist_id, title, description) From f32143469fd0a2720bd40908ea8360490983b97d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 9 Aug 2015 19:15:00 +0600 Subject: [PATCH 52/73] [tweakers] Use _extract_xspf_playlist --- youtube_dl/extractor/tweakers.py | 42 +++----------------------------- 1 file changed, 4 insertions(+), 38 deletions(-) diff --git a/youtube_dl/extractor/tweakers.py b/youtube_dl/extractor/tweakers.py index c80ec15cf..4bbe76e96 100644 --- a/youtube_dl/extractor/tweakers.py +++ b/youtube_dl/extractor/tweakers.py @@ -25,41 +25,7 @@ class TweakersIE(InfoExtractor): } def _real_extract(self, url): - video_id = self._match_id(url) - - playlist = self._download_xml( - 'https://tweakers.net/video/s1playlist/%s/playlist.xspf' % video_id, - video_id) - - NS_MAP = { - 'xspf': 'http://xspf.org/ns/0/', - 's1': 'http://static.streamone.nl/player/ns/0', - } - - track = playlist.find(xpath_with_ns('./xspf:trackList/xspf:track', NS_MAP)) - - title = xpath_text( - track, xpath_with_ns('./xspf:title', NS_MAP), 'title') - description = xpath_text( - track, xpath_with_ns('./xspf:annotation', NS_MAP), 'description') - thumbnail = xpath_text( - track, xpath_with_ns('./xspf:image', NS_MAP), 'thumbnail') - duration = float_or_none( - xpath_text(track, xpath_with_ns('./xspf:duration', NS_MAP), 'duration'), - 1000) - - formats = [{ - 'url': location.text, - 'format_id': location.get(xpath_with_ns('s1:label', NS_MAP)), - 'width': int_or_none(location.get(xpath_with_ns('s1:width', NS_MAP))), - 'height': int_or_none(location.get(xpath_with_ns('s1:height', NS_MAP))), - } for location in track.findall(xpath_with_ns('./xspf:location', NS_MAP))] - - return { - 'id': video_id, - 'title': title, - 'description': description, - 'thumbnail': thumbnail, - 'duration': duration, - 'formats': formats, - } + playlist_id = self._match_id(url) + entries = self._extract_xspf_playlist( + 'https://tweakers.net/video/s1playlist/%s/playlist.xspf' % playlist_id, playlist_id) + return self.playlist_result(entries, playlist_id) From 0dcb318f622d944ad0f5c23c32c9bc9b00e76aaa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 9 Aug 2015 19:15:20 +0600 Subject: [PATCH 53/73] [tweakers] Fix test --- youtube_dl/extractor/tweakers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/tweakers.py b/youtube_dl/extractor/tweakers.py index 4bbe76e96..6eeffb1cc 100644 --- a/youtube_dl/extractor/tweakers.py +++ b/youtube_dl/extractor/tweakers.py @@ -13,7 +13,7 @@ class TweakersIE(InfoExtractor): _VALID_URL = r'https?://tweakers\.net/video/(?P\d+)' _TEST = { 'url': 'https://tweakers.net/video/9926/new-nintendo-3ds-xl-op-alle-fronten-beter.html', - 'md5': '1b5afa817403bb5baa08359dca31e6df', + 'md5': '3147e4ddad366f97476a93863e4557c8', 'info_dict': { 'id': '9926', 'ext': 'mp4', From 98044462b1035000a44b35a41f4f780b2e844f2e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 9 Aug 2015 19:18:50 +0600 Subject: [PATCH 54/73] [extractor/common] Use playlist id as default title --- youtube_dl/extractor/common.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 9b4775e0a..be91e03e9 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1157,7 +1157,7 @@ class InfoExtractor(object): entries = [] for track in playlist.findall(xpath_with_ns('./xspf:trackList/xspf:track', NS_MAP)): title = xpath_text( - track, xpath_with_ns('./xspf:title', NS_MAP), 'title') + track, xpath_with_ns('./xspf:title', NS_MAP), 'title', default=playlist_id) description = xpath_text( track, xpath_with_ns('./xspf:annotation', NS_MAP), 'description') thumbnail = xpath_text( From fb2f339fec20c35cb62c1da682e0dfd418faef81 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 9 Aug 2015 19:21:25 +0600 Subject: [PATCH 55/73] [dhm] Use _extract_xspf_playlist --- youtube_dl/extractor/dhm.py | 25 +++++++------------------ 1 file changed, 7 insertions(+), 18 deletions(-) diff --git a/youtube_dl/extractor/dhm.py b/youtube_dl/extractor/dhm.py index 3ed1f1663..127eb0439 100644 --- a/youtube_dl/extractor/dhm.py +++ b/youtube_dl/extractor/dhm.py @@ -34,24 +34,14 @@ class DHMIE(InfoExtractor): }] def _real_extract(self, url): - video_id = self._match_id(url) + playlist_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) + webpage = self._download_webpage(url, playlist_id) playlist_url = self._search_regex( r"file\s*:\s*'([^']+)'", webpage, 'playlist url') - playlist = self._download_xml(playlist_url, video_id) - - track = playlist.find( - './{http://xspf.org/ns/0/}trackList/{http://xspf.org/ns/0/}track') - - video_url = xpath_text( - track, './{http://xspf.org/ns/0/}location', - 'video url', fatal=True) - thumbnail = xpath_text( - track, './{http://xspf.org/ns/0/}image', - 'thumbnail') + entries = self._extract_xspf_playlist(playlist_url, playlist_id) title = self._search_regex( [r'dc:title="([^"]+)"', r' »([^<]+)'], @@ -63,11 +53,10 @@ class DHMIE(InfoExtractor): r'Length\s*\s*:\s*([^<]+)', webpage, 'duration', default=None)) - return { - 'id': video_id, - 'url': video_url, + entries[0].update({ 'title': title, 'description': description, 'duration': duration, - 'thumbnail': thumbnail, - } + }) + + return self.playlist_result(entries, playlist_id) From 942acef594428b5f5c7e0ed7860cb6d725d8f1e5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 9 Aug 2015 19:41:55 +0600 Subject: [PATCH 56/73] [extractor/common] Extract _parse_xspf --- youtube_dl/extractor/common.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index be91e03e9..5982055be 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1144,11 +1144,15 @@ class InfoExtractor(object): }) return subtitles - def _extract_xspf_playlist(self, playlist_url, playlist_id): - playlist = self._download_xml( + def _extract_xspf_playlist(self, playlist_url, playlist_id, fatal=True): + xspf = self._download_xml( playlist_url, playlist_id, 'Downloading xpsf playlist', - 'Unable to download xspf manifest') + 'Unable to download xspf manifest', fatal=fatal) + if xspf is False: + return [] + return self._parse_xspf(xspf, playlist_id) + def _parse_xspf(self, playlist, playlist_id): NS_MAP = { 'xspf': 'http://xspf.org/ns/0/', 's1': 'http://static.streamone.nl/player/ns/0', From 729accb48221bd72e40076939616792c1c6fc15f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 9 Aug 2015 19:43:42 +0600 Subject: [PATCH 57/73] [extractor/generic] Add support for xspf playlists --- youtube_dl/extractor/generic.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 901f77304..a382d6be4 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -1198,6 +1198,8 @@ class GenericIE(InfoExtractor): return self._extract_rss(url, video_id, doc) elif re.match(r'^(?:{[^}]+})?smil$', doc.tag): return self._parse_smil(doc, url, video_id) + elif doc.tag == '{http://xspf.org/ns/0/}playlist': + return self.playlist_result(self._parse_xspf(doc, video_id), video_id) except compat_xml_parse_error: pass @@ -1799,7 +1801,8 @@ class GenericIE(InfoExtractor): # here's a fun little line of code for you: video_id = os.path.splitext(video_id)[0] - if determine_ext(video_url) == 'smil': + ext = determine_ext(video_url) + if ext == 'smil': entries.append({ 'id': video_id, 'formats': self._extract_smil_formats(video_url, video_id), @@ -1807,6 +1810,8 @@ class GenericIE(InfoExtractor): 'title': video_title, 'age_limit': age_limit, }) + elif ext == 'xspf': + return self.playlist_result(self._extract_xspf_playlist(video_url, video_id), video_id) else: entries.append({ 'id': video_id, From 1de5cd3ba51ce67d9a1cd3b40157058e78e46692 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 9 Aug 2015 19:47:08 +0600 Subject: [PATCH 58/73] [extractor/generic] Add test for xspf playlist --- youtube_dl/extractor/generic.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index a382d6be4..4756a658f 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -198,6 +198,21 @@ class GenericIE(InfoExtractor): 'skip_download': True, }, }, + # XSPF playlist from http://www.telegraaf.nl/tv/nieuws/binnenland/24353229/__Tikibad_ontruimd_wegens_brand__.html + { + 'url': 'http://www.telegraaf.nl/xml/playlist/2015/8/7/mZlp2ctYIUEB.xspf', + 'info_dict': { + 'id': 'mZlp2ctYIUEB', + 'ext': 'mp4', + 'title': 'Tikibad ontruimd wegens brand', + 'description': 'md5:05ca046ff47b931f9b04855015e163a4', + 'thumbnail': 're:^https?://.*\.jpg$', + 'duration': 33, + }, + 'params': { + 'skip_download': True, + }, + }, # google redirect { 'url': 'http://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=web&cd=1&cad=rja&ved=0CCUQtwIwAA&url=http%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DcmQHVoWB5FY&ei=F-sNU-LLCaXk4QT52ICQBQ&usg=AFQjCNEw4hL29zgOohLXvpJ-Bdh2bils1Q&bvm=bv.61965928,d.bGE', From 0791ac1b4415601f464f9656a4485b3ae6b67f4e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 9 Aug 2015 19:47:58 +0600 Subject: [PATCH 59/73] [extractor/generic] Clarify comment --- youtube_dl/extractor/generic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 4756a658f..376feecae 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -1206,7 +1206,7 @@ class GenericIE(InfoExtractor): self.report_extraction(video_id) - # Is it an RSS feed or a SMIL file? + # Is it an RSS feed, a SMIL file or a XSPF playlist? try: doc = parse_xml(webpage) if doc.tag == 'rss': From 27c7114af6b82bfe8be6b8e4dfa6e11dd1356044 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Sun, 9 Aug 2015 20:13:02 +0200 Subject: [PATCH 60/73] release 2015.08.09 --- README.md | 2 +- docs/supportedsites.md | 8 ++++++-- youtube_dl/version.py | 2 +- 3 files changed, 8 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 02b9775f9..15baf75ce 100644 --- a/README.md +++ b/README.md @@ -108,7 +108,7 @@ which means you can modify it, redistribute it or use it however you like. --playlist-reverse Download playlist videos in reverse order --xattr-set-filesize Set file xattribute ytdl.filesize with expected filesize (experimental) --hls-prefer-native Use the native HLS downloader instead of ffmpeg (experimental) - --external-downloader COMMAND Use the specified external downloader. Currently supports aria2c,curl,httpie,wget + --external-downloader COMMAND Use the specified external downloader. Currently supports aria2c,axel,curl,httpie,wget --external-downloader-args ARGS Give these arguments to the external downloader ## Filesystem Options: diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 657935dc6..e21471102 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -86,7 +86,7 @@ - **chirbit:profile** - **Cinchcast** - **Cinemassacre** - - **clipfish** + - **Clipfish** - **cliphunter** - **Clipsyndicate** - **Cloudy** @@ -116,6 +116,7 @@ - **DailymotionCloud** - **daum.net** - **DBTV** + - **DCN** - **DctpTv** - **DeezerPlaylist** - **defense.gouv.fr** @@ -351,7 +352,6 @@ - **NowTV** - **nowvideo**: NowVideo - **npo**: npo.nl and ntr.nl - - **npo**: npo.nl and ntr.nl - **npo.nl:live** - **npo.nl:radio** - **npo.nl:radio:fragment** @@ -377,6 +377,7 @@ - **parliamentlive.tv**: UK parliament videos - **Patreon** - **PBS** + - **Periscope**: Periscope - **PhilharmonieDeParis**: Philharmonie de Paris - **Phoenix** - **Photobucket** @@ -406,6 +407,7 @@ - **qqmusic:playlist**: QQ音乐 - 歌单 - **qqmusic:singer**: QQ音乐 - 歌手 - **qqmusic:toplist**: QQ音乐 - 排行榜 + - **Quickscope**: Quick Scope - **QuickVid** - **R7** - **radio.de** @@ -518,6 +520,7 @@ - **ted** - **TeleBruxelles** - **telecinco.es** + - **Telegraaf** - **TeleMB** - **TeleTask** - **TenPlay** @@ -621,6 +624,7 @@ - **Vodlocker** - **VoiceRepublic** - **Vporn** + - **vpro**: npo.nl and ntr.nl - **VRT** - **vube**: Vube.com - **VuClip** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 9f209499c..6462d4477 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2015.08.06.1' +__version__ = '2015.08.09' From c5864a8ce6379dca300f447cca12a5a946d67d3d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 10 Aug 2015 21:38:58 +0600 Subject: [PATCH 61/73] [fc2] Fix python 2.6 (Closes #6512) --- youtube_dl/extractor/fc2.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/fc2.py b/youtube_dl/extractor/fc2.py index 1ccc1a964..e4f7195a8 100644 --- a/youtube_dl/extractor/fc2.py +++ b/youtube_dl/extractor/fc2.py @@ -86,7 +86,7 @@ class FC2IE(InfoExtractor): info_url = ( "http://video.fc2.com/ginfo.php?mimi={1:s}&href={2:s}&v={0:s}&fversion=WIN%2011%2C6%2C602%2C180&from=2&otag=0&upid={0:s}&tk=null&". - format(video_id, mimi, compat_urllib_request.quote(refer, safe='').replace('.', '%2E'))) + format(video_id, mimi, compat_urllib_request.quote(refer, safe=b'').replace('.', '%2E'))) info_webpage = self._download_webpage( info_url, video_id, note='Downloading info page') From f6c3664d717857a7994f189a01a00402df2b4168 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 10 Aug 2015 23:35:08 +0600 Subject: [PATCH 62/73] [vimeo] Fix login (Closes #6488) --- youtube_dl/extractor/vimeo.py | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py index 4c4e3c72a..5bce78ac0 100644 --- a/youtube_dl/extractor/vimeo.py +++ b/youtube_dl/extractor/vimeo.py @@ -29,6 +29,7 @@ from ..utils import ( class VimeoBaseInfoExtractor(InfoExtractor): _NETRC_MACHINE = 'vimeo' _LOGIN_REQUIRED = False + _LOGIN_URL = 'https://vimeo.com/log_in' def _login(self): (username, password) = self._get_login_info() @@ -37,21 +38,25 @@ class VimeoBaseInfoExtractor(InfoExtractor): raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True) return self.report_login() - login_url = 'https://vimeo.com/log_in' - webpage = self._download_webpage(login_url, None, False) - token = self._search_regex(r'xsrft":"(.*?)"', webpage, 'login token') + webpage = self._download_webpage(self._LOGIN_URL, None, False) + token = self._extract_xsrft(webpage) data = urlencode_postdata({ + 'action': 'login', 'email': username, 'password': password, - 'action': 'login', 'service': 'vimeo', 'token': token, }) - login_request = compat_urllib_request.Request(login_url, data) + login_request = compat_urllib_request.Request(self._LOGIN_URL, data) login_request.add_header('Content-Type', 'application/x-www-form-urlencoded') - login_request.add_header('Cookie', 'xsrft=%s' % token) + login_request.add_header('Referer', self._LOGIN_URL) self._download_webpage(login_request, None, False, 'Wrong login info') + def _extract_xsrft(self, webpage): + return self._search_regex( + r'xsrft\s*[=:]\s*(?P["\'])(?P.+?)(?P=q)', + webpage, 'login token', group='xsrft') + class VimeoIE(VimeoBaseInfoExtractor): """Information extractor for vimeo.com.""" @@ -193,7 +198,7 @@ class VimeoIE(VimeoBaseInfoExtractor): password = self._downloader.params.get('videopassword', None) if password is None: raise ExtractorError('This video is protected by a password, use the --video-password option', expected=True) - token = self._search_regex(r'xsrft[\s=:"\']+([^"\']+)', webpage, 'login token') + token = self._extract_xsrft(webpage) data = urlencode_postdata({ 'password': password, 'token': token, @@ -422,7 +427,7 @@ class VimeoIE(VimeoBaseInfoExtractor): } -class VimeoChannelIE(InfoExtractor): +class VimeoChannelIE(VimeoBaseInfoExtractor): IE_NAME = 'vimeo:channel' _VALID_URL = r'https://vimeo\.com/channels/(?P[^/?#]+)/?(?:$|[?#])' _MORE_PAGES_INDICATOR = r' Date: Mon, 10 Aug 2015 23:58:01 +0600 Subject: [PATCH 63/73] [vimeo:watchlater] Fix extraction (Closes #3886) --- youtube_dl/extractor/vimeo.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py index 5bce78ac0..1eeb4618e 100644 --- a/youtube_dl/extractor/vimeo.py +++ b/youtube_dl/extractor/vimeo.py @@ -431,6 +431,7 @@ class VimeoChannelIE(VimeoBaseInfoExtractor): IE_NAME = 'vimeo:channel' _VALID_URL = r'https://vimeo\.com/channels/(?P[^/?#]+)/?(?:$|[?#])' _MORE_PAGES_INDICATOR = r']+?title="(.*?)"' _TESTS = [{ 'url': 'https://vimeo.com/channels/tributes', @@ -445,7 +446,7 @@ class VimeoChannelIE(VimeoBaseInfoExtractor): return '%s/videos/page:%d/' % (base_url, pagenum) def _extract_list_title(self, webpage): - return self._html_search_regex(self._TITLE_RE, webpage, 'list title') + return self._TITLE or self._html_search_regex(self._TITLE_RE, webpage, 'list title') def _login_list_password(self, page_url, list_id, webpage): login_form = self._search_regex( @@ -611,11 +612,11 @@ class VimeoReviewIE(InfoExtractor): class VimeoWatchLaterIE(VimeoChannelIE): IE_NAME = 'vimeo:watchlater' IE_DESC = 'Vimeo watch later list, "vimeowatchlater" keyword (requires authentication)' - _VALID_URL = r'https://vimeo\.com/home/watchlater|:vimeowatchlater' + _VALID_URL = r'https://vimeo\.com/(?:home/)?watchlater|:vimeowatchlater' + _TITLE = 'Watch Later' _LOGIN_REQUIRED = True - _TITLE_RE = r'href="/home/watchlater".*?>(.*?)<' _TESTS = [{ - 'url': 'https://vimeo.com/home/watchlater', + 'url': 'https://vimeo.com/watchlater', 'only_matching': True, }] @@ -631,7 +632,7 @@ class VimeoWatchLaterIE(VimeoChannelIE): return request def _real_extract(self, url): - return self._extract_videos('watchlater', 'https://vimeo.com/home/watchlater') + return self._extract_videos('watchlater', 'https://vimeo.com/watchlater') class VimeoLikesIE(InfoExtractor): From 11b5605815d685263b271b4e061c43f9cb55a08c Mon Sep 17 00:00:00 2001 From: Puck Meerburg Date: Mon, 10 Aug 2015 20:52:38 +0200 Subject: [PATCH 64/73] [youtube] Use the first v= argument in the URL This is according to how youtube handles multiple v= values in one URL. Before this, it was possible to make a single URL show up differently on youtube itself, and if you downloaded/viewed it with youtube-dl/mpv --- youtube_dl/extractor/youtube.py | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 67a1df9a0..eaf058cfb 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -213,7 +213,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): |(?: # or the v= param in all its forms (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx) (?:\?|\#!?) # the params delimiter ? or # or #! - (?:.*?&)? # any other preceding param (like /?s=tuff&v=xxxx) + (?:.*?&)?? # any other preceding param (like /?s=tuff&v=xxxx) v= ) )) @@ -380,6 +380,23 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'uploader_id': 'setindia' } }, + { + 'url': 'http://www.youtube.com/watch?v=BaW_jenozKcj&v=UxxajLWwzqY', + 'note': 'Use the first video ID in the URL', + 'info_dict': { + 'id': 'BaW_jenozKc', + 'ext': 'mp4', + 'title': 'youtube-dl test video "\'/\\ä↭𝕐', + 'uploader': 'Philipp Hagemeister', + 'uploader_id': 'phihag', + 'upload_date': '20121002', + 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .', + 'categories': ['Science & Technology'], + 'tags': ['youtube-dl'], + 'like_count': int, + 'dislike_count': int, + } + }, { 'url': 'http://www.youtube.com/watch?v=a9LDPn-MO4I', 'note': '256k DASH audio (format 141) via DASH manifest', From b29440aee64027b3e4145070b0235193752b4d9f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 11 Aug 2015 01:17:41 +0600 Subject: [PATCH 65/73] [vimeo:user] Do not match watchlater --- youtube_dl/extractor/vimeo.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py index 1eeb4618e..50df79ca1 100644 --- a/youtube_dl/extractor/vimeo.py +++ b/youtube_dl/extractor/vimeo.py @@ -505,7 +505,7 @@ class VimeoChannelIE(VimeoBaseInfoExtractor): class VimeoUserIE(VimeoChannelIE): IE_NAME = 'vimeo:user' - _VALID_URL = r'https://vimeo\.com/(?![0-9]+(?:$|[?#/]))(?P[^/]+)(?:/videos|[#?]|$)' + _VALID_URL = r'https://vimeo\.com/(?!(?:[0-9]+|watchlater)(?:$|[?#/]))(?P[^/]+)(?:/videos|[#?]|$)' _TITLE_RE = r']+?class="user">([^<>]+?)' _TESTS = [{ 'url': 'https://vimeo.com/nkistudio/videos', From 34a7de2970d8bbceeb3f485d64a57f67489a44d3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 11 Aug 2015 01:22:06 +0600 Subject: [PATCH 66/73] [youtube] Skip download for multiple v= test --- youtube_dl/extractor/youtube.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index eaf058cfb..01dbbfa3c 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -395,7 +395,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'tags': ['youtube-dl'], 'like_count': int, 'dislike_count': int, - } + }, + 'params': { + 'skip_download': True, + }, }, { 'url': 'http://www.youtube.com/watch?v=a9LDPn-MO4I', From 34952f09e175e0b78c929fddf56f82ccf028dc5c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 11 Aug 2015 01:24:53 +0600 Subject: [PATCH 67/73] [youtube] Add age limit to tests --- youtube_dl/extractor/youtube.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 01dbbfa3c..e74a39095 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -365,6 +365,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'description': 'md5:64249768eec3bc4276236606ea996373', 'uploader': 'justintimberlakeVEVO', 'uploader_id': 'justintimberlakeVEVO', + 'age_limit': 18, } }, { @@ -475,6 +476,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'uploader': 'The Witcher', 'uploader_id': 'WitcherGame', 'upload_date': '20140605', + 'age_limit': 18, }, }, # Age-gate video with encrypted signature @@ -488,6 +490,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'uploader': 'LloydVEVO', 'uploader_id': 'LloydVEVO', 'upload_date': '20110629', + 'age_limit': 18, }, }, # video_info is None (https://github.com/rg3/youtube-dl/issues/4421) From fb0d12c6cbcabd6f9e84d51c82dea6778d0bb863 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 11 Aug 2015 01:46:25 +0600 Subject: [PATCH 68/73] [pbs] Add age limit to tests --- youtube_dl/extractor/pbs.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/pbs.py b/youtube_dl/extractor/pbs.py index a53479aad..683c81de3 100644 --- a/youtube_dl/extractor/pbs.py +++ b/youtube_dl/extractor/pbs.py @@ -92,6 +92,7 @@ class PBSIE(InfoExtractor): 'duration': 3172, 'thumbnail': 're:^https?://.*\.jpg$', 'upload_date': '20140122', + 'age_limit': 10, }, 'params': { 'skip_download': True, # requires ffmpeg From b1ac38fadc65049dc6f9611fa7e9649de1e7eb93 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 11 Aug 2015 01:49:23 +0600 Subject: [PATCH 69/73] [tvplay] Add age limit to tests --- youtube_dl/extractor/tvplay.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/tvplay.py b/youtube_dl/extractor/tvplay.py index 79863e781..b4683de54 100644 --- a/youtube_dl/extractor/tvplay.py +++ b/youtube_dl/extractor/tvplay.py @@ -104,6 +104,7 @@ class TVPlayIE(InfoExtractor): 'duration': 1492, 'timestamp': 1330522854, 'upload_date': '20120229', + 'age_limit': 18, }, 'params': { # rtmp download From bf812ef71438036c23640f29bd7ae955289720ed Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 11 Aug 2015 23:00:45 +0600 Subject: [PATCH 70/73] [downloader/external] Forward --proxy to wget and aria2c --- youtube_dl/downloader/external.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/youtube_dl/downloader/external.py b/youtube_dl/downloader/external.py index 07ce59f7d..49d806ee4 100644 --- a/youtube_dl/downloader/external.py +++ b/youtube_dl/downloader/external.py @@ -51,6 +51,14 @@ class ExternalFD(FileDownloader): return [] return [command_option, source_address] + def _option(self, command_option, param): + param = self.params.get(param) + if param is None: + return [] + if isinstance(param, bool): + return [command_option] + return [command_option, param] + def _no_check_certificate(self, command_option): return [command_option] if self.params.get('nocheckcertificate', False) else [] @@ -102,6 +110,7 @@ class WgetFD(ExternalFD): for key, val in info_dict['http_headers'].items(): cmd += ['--header', '%s: %s' % (key, val)] cmd += self._source_address('--bind-address') + cmd += self._option('--proxy', 'proxy') cmd += self._no_check_certificate('--no-check-certificate') cmd += self._configuration_args() cmd += ['--', info_dict['url']] @@ -120,6 +129,7 @@ class Aria2cFD(ExternalFD): for key, val in info_dict['http_headers'].items(): cmd += ['--header', '%s: %s' % (key, val)] cmd += self._source_address('--interface') + cmd += self._option('--all-proxy', 'proxy') cmd += ['--', info_dict['url']] return cmd From 9f3da138606773339de9accc2bc6522ea88185fe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 11 Aug 2015 23:05:04 +0600 Subject: [PATCH 71/73] [downloader/external] Use generic _option --- youtube_dl/downloader/external.py | 17 ++++------------- 1 file changed, 4 insertions(+), 13 deletions(-) diff --git a/youtube_dl/downloader/external.py b/youtube_dl/downloader/external.py index 49d806ee4..6c310346c 100644 --- a/youtube_dl/downloader/external.py +++ b/youtube_dl/downloader/external.py @@ -45,12 +45,6 @@ class ExternalFD(FileDownloader): def supports(cls, info_dict): return info_dict['protocol'] in ('http', 'https', 'ftp', 'ftps') - def _source_address(self, command_option): - source_address = self.params.get('source_address') - if source_address is None: - return [] - return [command_option, source_address] - def _option(self, command_option, param): param = self.params.get(param) if param is None: @@ -59,9 +53,6 @@ class ExternalFD(FileDownloader): return [command_option] return [command_option, param] - def _no_check_certificate(self, command_option): - return [command_option] if self.params.get('nocheckcertificate', False) else [] - def _configuration_args(self, default=[]): ex_args = self.params.get('external_downloader_args') if ex_args is None: @@ -88,7 +79,7 @@ class CurlFD(ExternalFD): cmd = [self.exe, '--location', '-o', tmpfilename] for key, val in info_dict['http_headers'].items(): cmd += ['--header', '%s: %s' % (key, val)] - cmd += self._source_address('--interface') + cmd += self._option('--interface', 'source_address') cmd += self._configuration_args() cmd += ['--', info_dict['url']] return cmd @@ -109,9 +100,9 @@ class WgetFD(ExternalFD): cmd = [self.exe, '-O', tmpfilename, '-nv', '--no-cookies'] for key, val in info_dict['http_headers'].items(): cmd += ['--header', '%s: %s' % (key, val)] - cmd += self._source_address('--bind-address') + cmd += self._option('--bind-address', 'source_address') cmd += self._option('--proxy', 'proxy') - cmd += self._no_check_certificate('--no-check-certificate') + cmd += self._option('--no-check-certificate', 'nocheckcertificate') cmd += self._configuration_args() cmd += ['--', info_dict['url']] return cmd @@ -128,7 +119,7 @@ class Aria2cFD(ExternalFD): cmd += ['--out', os.path.basename(tmpfilename)] for key, val in info_dict['http_headers'].items(): cmd += ['--header', '%s: %s' % (key, val)] - cmd += self._source_address('--interface') + cmd += self._option('--interface', 'source_address') cmd += self._option('--all-proxy', 'proxy') cmd += ['--', info_dict['url']] return cmd From 0a19d4ccd6914d8547fd3e42fd279c960d9f8fad Mon Sep 17 00:00:00 2001 From: sceext Date: Wed, 12 Aug 2015 14:01:48 +0800 Subject: [PATCH 72/73] [iqiyi] update md5 salt (2015-08-10 Zombie) --- youtube_dl/extractor/iqiyi.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/iqiyi.py b/youtube_dl/extractor/iqiyi.py index afb7f4e61..dfc6d58a0 100644 --- a/youtube_dl/extractor/iqiyi.py +++ b/youtube_dl/extractor/iqiyi.py @@ -201,7 +201,7 @@ class IqiyiIE(InfoExtractor): return raw_data def get_enc_key(self, swf_url, video_id): - enc_key = '8e29ab5666d041c3a1ea76e06dabdffb' + enc_key = '3601ba290e4f4662848c710e2122007e' # last update at 2015-08-10 for Zombie return enc_key def _real_extract(self, url): From f57b7835e21b00a1b2205b4bcfba50c630ff68b4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 12 Aug 2015 21:27:58 +0600 Subject: [PATCH 73/73] [youtube] Update tests --- youtube_dl/extractor/youtube.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index e74a39095..facd837ad 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -442,7 +442,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'id': 'nfWlot6h_JM', 'ext': 'm4a', 'title': 'Taylor Swift - Shake It Off', - 'description': 'md5:2acfda1b285bdd478ccec22f9918199d', + 'description': 'md5:95f66187cd7c8b2c13eb78e1223b63c3', 'uploader': 'TaylorSwiftVEVO', 'uploader_id': 'TaylorSwiftVEVO', 'upload_date': '20140818', @@ -515,7 +515,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'info_dict': { 'id': 'lqQg6PlCWgI', 'ext': 'mp4', - 'upload_date': '20120731', + 'upload_date': '20120724', 'uploader_id': 'olympic', 'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games', 'uploader': 'Olympics', @@ -544,7 +544,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'url': 'qEJwOuvDf7I', 'info_dict': { 'id': 'qEJwOuvDf7I', - 'ext': 'mp4', + 'ext': 'webm', 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге', 'description': '', 'upload_date': '20150404',