From 6cbb20bb090845898fcc368beed45708f05bf908 Mon Sep 17 00:00:00 2001 From: DarkstaIkers Date: Tue, 29 Mar 2016 14:26:24 -0300 Subject: [PATCH 0001/1571] Update crunchyroll.py --- youtube_dl/extractor/crunchyroll.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/crunchyroll.py b/youtube_dl/extractor/crunchyroll.py index 8ae3f2890..44c720aaa 100644 --- a/youtube_dl/extractor/crunchyroll.py +++ b/youtube_dl/extractor/crunchyroll.py @@ -184,7 +184,7 @@ class CrunchyrollIE(CrunchyrollBaseIE): output += 'WrapStyle: %s\n' % sub_root.attrib['wrap_style'] output += 'PlayResX: %s\n' % sub_root.attrib['play_res_x'] output += 'PlayResY: %s\n' % sub_root.attrib['play_res_y'] - output += """ScaledBorderAndShadow: yes + output += """ScaledBorderAndShadow: no [V4+ Styles] Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding From 7be15d40976bf40f44bc47301d4e839a1e171e52 Mon Sep 17 00:00:00 2001 From: PeterDing Date: Fri, 29 Jul 2016 23:21:50 +0800 Subject: [PATCH 0002/1571] [bilibili] Support episodes [extractor/bilibili] add md5 for testing [extractor/bilibili] remove unnecessary headers [extractor/bilibili] correct _TESTS; find thumbnail for episode [extractor/bilibili] [Fix] restore removed tests --- youtube_dl/extractor/bilibili.py | 40 ++++++++++++++++++++++++++------ 1 file changed, 33 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/bilibili.py b/youtube_dl/extractor/bilibili.py index a332fbb69..35313c62b 100644 --- a/youtube_dl/extractor/bilibili.py +++ b/youtube_dl/extractor/bilibili.py @@ -12,9 +12,13 @@ from ..utils import ( unified_timestamp, ) +HEADERS = { + 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8', +} + class BiliBiliIE(InfoExtractor): - _VALID_URL = r'https?://www\.bilibili\.(?:tv|com)/video/av(?P\d+)' + _VALID_URL = r'https?://(www.|bangumi.|)bilibili\.(?:tv|com)/(video/av|anime/v/)(?P\d+)' _TESTS = [{ 'url': 'http://www.bilibili.tv/video/av1074402/', @@ -77,6 +81,17 @@ class BiliBiliIE(InfoExtractor): 'skip_download': True, }, 'expected_warnings': ['upload time'], + }, { + 'url': 'http://bangumi.bilibili.com/anime/v/40068', + 'md5': '08d539a0884f3deb7b698fb13ba69696', + 'info_dict': { + 'id': '40068', + 'ext': 'mp4', + 'duration': 1402.357, + 'title': '混沌武士 : 第7集 四面楚歌 A Risky Racket', + 'description': "故事发生在日本的江户时代。风是一个小酒馆的打工女。一日,酒馆里来了一群恶霸,虽然他们的举动令风十分不满,但是毕竟风只是一届女流,无法对他们采取什么行动,只能在心里嘟哝。这时,酒家里又进来了个“不良份子”无幻,说以50个丸子帮她搞定这群人,风觉得他莫名其妙,也就没多搭理他。而在这时,风因为一个意外而将茶水泼在了恶霸头领——龙次郎身上。愤怒的恶霸们欲将风的手指砍掉,风在无奈中大喊道:“丸子100个!”……   另一方面,龙次郎的父亲也就是当地的代官,依仗自己有着雄厚的保镖实力,在当地欺压穷人,当看到一穷人无法交齐足够的钱过桥时,欲下令将其杀死,武士仁看不惯这一幕,于是走上前,与代官的保镖交手了……   酒馆内,因为风答应给无幻100个团子,无幻将恶霸们打败了,就在这时,仁进来了。好战的无幻立刻向仁发了战书,最后两败俱伤,被代官抓入牢房,预计第二天斩首……   得知该状况的风,为报救命之恩,来到了刑场,利用烟花救出了无幻和仁。而风则以救命恩人的身份,命令二人和她一起去寻找带着向日葵香味的武士……(by百科)", + 'thumbnail': 're:^http?://.+\.jpg', + }, }] _APP_KEY = '6f90a59ac58a4123' @@ -84,13 +99,20 @@ class BiliBiliIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - cid = compat_parse_qs(self._search_regex( - [r'EmbedPlayer\([^)]+,\s*"([^"]+)"\)', - r']+src="https://secure\.bilibili\.com/secure,([^"]+)"'], - webpage, 'player parameters'))['cid'][0] + _is_episode = 'anime/v' in url + if not _is_episode: + cid = compat_parse_qs(self._search_regex( + [r'EmbedPlayer\([^)]+,\s*"([^"]+)"\)', + r']+src="https://secure\.bilibili\.com/secure,([^"]+)"'], + webpage, 'player parameters'))['cid'][0] + else: + url_t = 'http://bangumi.bilibili.com/web_api/get_source' + js = self._download_json(url_t, video_id, + data='episode_id=%s' % video_id, + headers=HEADERS) + cid = js['result']['cid'] payload = 'appkey=%s&cid=%s&otype=json&quality=2&type=mp4' % (self._APP_KEY, cid) sign = hashlib.md5((payload + self._BILIBILI_KEY).encode('utf-8')).hexdigest() @@ -125,6 +147,10 @@ class BiliBiliIE(InfoExtractor): description = self._html_search_meta('description', webpage) timestamp = unified_timestamp(self._html_search_regex( r']+datetime="([^"]+)"', webpage, 'upload time', fatal=False)) + if _is_episode: + thumbnail = self._html_search_meta('og:image', webpage) + else: + thumbnail = self._html_search_meta('thumbnailUrl', webpage) # TODO 'view_count' requires deobfuscating Javascript info = { @@ -132,7 +158,7 @@ class BiliBiliIE(InfoExtractor): 'title': title, 'description': description, 'timestamp': timestamp, - 'thumbnail': self._html_search_meta('thumbnailUrl', webpage), + 'thumbnail': thumbnail, 'duration': float_or_none(video_info.get('timelength'), scale=1000), } From 196c6ba06792ec38238631d9173fc146822baa7e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 31 Aug 2016 22:12:37 +0700 Subject: [PATCH 0003/1571] [facebook] Extract timestamp (Closes #10508) --- youtube_dl/extractor/facebook.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/facebook.py b/youtube_dl/extractor/facebook.py index 0fb781a73..228b0b6d7 100644 --- a/youtube_dl/extractor/facebook.py +++ b/youtube_dl/extractor/facebook.py @@ -15,6 +15,7 @@ from ..compat import ( from ..utils import ( error_to_compat_str, ExtractorError, + int_or_none, limit_length, sanitized_Request, urlencode_postdata, @@ -62,6 +63,8 @@ class FacebookIE(InfoExtractor): 'ext': 'mp4', 'title': 're:Did you know Kei Nishikori is the first Asian man to ever reach a Grand Slam', 'uploader': 'Tennis on Facebook', + 'upload_date': '20140908', + 'timestamp': 1410199200, } }, { 'note': 'Video without discernible title', @@ -71,6 +74,8 @@ class FacebookIE(InfoExtractor): 'ext': 'mp4', 'title': 'Facebook video #274175099429670', 'uploader': 'Asif Nawab Butt', + 'upload_date': '20140506', + 'timestamp': 1399398998, }, 'expected_warnings': [ 'title' @@ -78,12 +83,14 @@ class FacebookIE(InfoExtractor): }, { 'note': 'Video with DASH manifest', 'url': 'https://www.facebook.com/video.php?v=957955867617029', - 'md5': '54706e4db4f5ad58fbad82dde1f1213f', + 'md5': 'b2c28d528273b323abe5c6ab59f0f030', 'info_dict': { 'id': '957955867617029', 'ext': 'mp4', 'title': 'When you post epic content on instagram.com/433 8 million followers, this is ...', 'uploader': 'Demy de Zeeuw', + 'upload_date': '20160110', + 'timestamp': 1452431627, }, }, { 'url': 'https://www.facebook.com/maxlayn/posts/10153807558977570', @@ -306,12 +313,16 @@ class FacebookIE(InfoExtractor): if not video_title: video_title = 'Facebook video #%s' % video_id uploader = clean_html(get_element_by_id('fbPhotoPageAuthorName', webpage)) + timestamp = int_or_none(self._search_regex( + r']+data-utime=["\'](\d+)', webpage, + 'timestamp', default=None)) info_dict = { 'id': video_id, 'title': video_title, 'formats': formats, 'uploader': uploader, + 'timestamp': timestamp, } return webpage, info_dict From 7a3e849f6eaf51b1d86b843a63664012ced2258c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 31 Aug 2016 22:23:55 +0700 Subject: [PATCH 0004/1571] [porncom] Extract categories and tags (Closes #10510) --- youtube_dl/extractor/porncom.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/porncom.py b/youtube_dl/extractor/porncom.py index 4baf79688..d85e0294d 100644 --- a/youtube_dl/extractor/porncom.py +++ b/youtube_dl/extractor/porncom.py @@ -26,6 +26,8 @@ class PornComIE(InfoExtractor): 'duration': 551, 'view_count': int, 'age_limit': 18, + 'categories': list, + 'tags': list, }, }, { 'url': 'http://se.porn.com/videos/marsha-may-rides-seth-on-top-of-his-thick-cock-2658067', @@ -75,7 +77,14 @@ class PornComIE(InfoExtractor): self._sort_formats(formats) view_count = str_to_int(self._search_regex( - r'class=["\']views["\'][^>]*>

([\d,.]+)', webpage, 'view count')) + r'class=["\']views["\'][^>]*>

([\d,.]+)', webpage, + 'view count', fatal=False)) + + def extract_list(kind): + s = self._search_regex( + r'(?s)]*>%s:(.+?)

' % kind.capitalize(), + webpage, kind, fatal=False) + return re.findall(r']+>([^<]+)', s or '') return { 'id': video_id, @@ -86,4 +95,6 @@ class PornComIE(InfoExtractor): 'view_count': view_count, 'formats': formats, 'age_limit': 18, + 'categories': extract_list('categories'), + 'tags': extract_list('tags'), } From f8fd510eb4b2733a5c083d767d45baa88b289298 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Wed, 31 Aug 2016 18:31:49 +0100 Subject: [PATCH 0005/1571] [limelight] skip ism manifests and reduce requests --- youtube_dl/extractor/limelight.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/limelight.py b/youtube_dl/extractor/limelight.py index a425bafe3..6752ffee2 100644 --- a/youtube_dl/extractor/limelight.py +++ b/youtube_dl/extractor/limelight.py @@ -34,11 +34,12 @@ class LimelightBaseIE(InfoExtractor): def _extract_info(self, streams, mobile_urls, properties): video_id = properties['media_id'] formats = [] - + urls = [] for stream in streams: stream_url = stream.get('url') - if not stream_url or stream.get('drmProtected'): + if not stream_url or stream.get('drmProtected') or stream_url in urls: continue + urls.append(stream_url) ext = determine_ext(stream_url) if ext == 'f4m': formats.extend(self._extract_f4m_formats( @@ -58,9 +59,11 @@ class LimelightBaseIE(InfoExtractor): format_id = 'rtmp' if stream.get('videoBitRate'): format_id += '-%d' % int_or_none(stream['videoBitRate']) + http_url = 'http://%s/%s' % (rtmp.group('host').replace('csl.', 'cpl.'), rtmp.group('playpath')[4:]) + urls.append(http_url) http_fmt = fmt.copy() http_fmt.update({ - 'url': 'http://%s/%s' % (rtmp.group('host').replace('csl.', 'cpl.'), rtmp.group('playpath')[4:]), + 'url': http_url, 'format_id': format_id.replace('rtmp', 'http'), }) formats.append(http_fmt) @@ -76,8 +79,9 @@ class LimelightBaseIE(InfoExtractor): for mobile_url in mobile_urls: media_url = mobile_url.get('mobileUrl') format_id = mobile_url.get('targetMediaPlatform') - if not media_url or format_id == 'Widevine': + if not media_url or format_id in ('Widevine', 'SmoothStreaming') or media_url in urls: continue + urls.append(media_url) ext = determine_ext(media_url) if ext == 'm3u8': formats.extend(self._extract_m3u8_formats( From 2896dd73bc2c9844175258086c0300395722e5c9 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Thu, 1 Sep 2016 08:00:13 +0100 Subject: [PATCH 0006/1571] [cbs] extract once formats(closes #10515) --- youtube_dl/extractor/cbs.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/cbs.py b/youtube_dl/extractor/cbs.py index c72ed2dbb..3f4dea40c 100644 --- a/youtube_dl/extractor/cbs.py +++ b/youtube_dl/extractor/cbs.py @@ -51,7 +51,7 @@ class CBSIE(CBSBaseIE): path = 'dJ5BDC/media/guid/2198311517/' + guid smil_url = 'http://link.theplatform.com/s/%s?mbr=true' % path formats, subtitles = self._extract_theplatform_smil(smil_url + '&manifest=m3u', guid) - for r in ('HLS&formats=M3U', 'RTMP', 'WIFI', '3G'): + for r in ('OnceURL&formats=M3U', 'HLS&formats=M3U', 'RTMP', 'WIFI', '3G'): try: tp_formats, _ = self._extract_theplatform_smil(smil_url + '&assetTypes=' + r, guid, 'Downloading %s SMIL data' % r.split('&')[0]) formats.extend(tp_formats) From 165c54e97d10705614934d5b1d86d90c06951b7c Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Thu, 1 Sep 2016 16:28:03 +0800 Subject: [PATCH 0007/1571] =?UTF-8?q?[southpark.cc.com:espa=C3=B1ol]=20Ski?= =?UTF-8?q?p=20geo-restricted=20=5FTESTS?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Breaks https://travis-ci.org/rg3/youtube-dl/jobs/156728175 --- youtube_dl/extractor/southpark.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/southpark.py b/youtube_dl/extractor/southpark.py index a147f7db1..e2a9e45ac 100644 --- a/youtube_dl/extractor/southpark.py +++ b/youtube_dl/extractor/southpark.py @@ -35,6 +35,7 @@ class SouthParkEsIE(SouthParkIE): 'description': 'Cartman Consigue Una Sonda Anal', }, 'playlist_count': 4, + 'skip': 'Geo-restricted', }] From 746a695b362cb602625ed7357294bb18de133883 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Thu, 1 Sep 2016 16:42:35 +0800 Subject: [PATCH 0008/1571] [myvidster] Update _TESTS (closes #10473) --- youtube_dl/extractor/myvidster.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/myvidster.py b/youtube_dl/extractor/myvidster.py index 731c24542..2117d302d 100644 --- a/youtube_dl/extractor/myvidster.py +++ b/youtube_dl/extractor/myvidster.py @@ -13,7 +13,7 @@ class MyVidsterIE(InfoExtractor): 'id': '3685814', 'title': 'md5:7d8427d6d02c4fbcef50fe269980c749', 'upload_date': '20141027', - 'uploader_id': 'utkualp', + 'uploader': 'utkualp', 'ext': 'mp4', 'age_limit': 18, }, From 05d4612947d6dbfaedb8f2a00daa5f29d85f73df Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Thu, 1 Sep 2016 16:58:16 +0800 Subject: [PATCH 0009/1571] [movingimage] Adapt to the new domain name and fix extraction Closes #10466 --- ChangeLog | 6 +++++ youtube_dl/extractor/extractors.py | 2 +- .../extractor/{ssa.py => movingimage.py} | 26 +++++++------------ 3 files changed, 17 insertions(+), 17 deletions(-) rename youtube_dl/extractor/{ssa.py => movingimage.py} (65%) diff --git a/ChangeLog b/ChangeLog index 0f8076d96..877e8112e 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +version + +Extractors +* [movingimage] Fix for the new site name (#10466) + + version 2016.08.31 Extractors diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 21efa96b2..8d0688f53 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -486,6 +486,7 @@ from .motherless import MotherlessIE from .motorsport import MotorsportIE from .movieclips import MovieClipsIE from .moviezine import MoviezineIE +from .movingimage import MovingImageIE from .msn import MSNIE from .mtv import ( MTVIE, @@ -806,7 +807,6 @@ from .srgssr import ( SRGSSRPlayIE, ) from .srmediathek import SRMediathekIE -from .ssa import SSAIE from .stanfordoc import StanfordOpenClassroomIE from .steam import SteamIE from .streamable import StreamableIE diff --git a/youtube_dl/extractor/ssa.py b/youtube_dl/extractor/movingimage.py similarity index 65% rename from youtube_dl/extractor/ssa.py rename to youtube_dl/extractor/movingimage.py index 54d1843f2..bb789c32e 100644 --- a/youtube_dl/extractor/ssa.py +++ b/youtube_dl/extractor/movingimage.py @@ -7,22 +7,19 @@ from ..utils import ( ) -class SSAIE(InfoExtractor): - _VALID_URL = r'https?://ssa\.nls\.uk/film/(?P\d+)' +class MovingImageIE(InfoExtractor): + _VALID_URL = r'https?://movingimage\.nls\.uk/film/(?P\d+)' _TEST = { - 'url': 'http://ssa.nls.uk/film/3561', + 'url': 'http://movingimage.nls.uk/film/3561', + 'md5': '4caa05c2b38453e6f862197571a7be2f', 'info_dict': { 'id': '3561', - 'ext': 'flv', + 'ext': 'mp4', 'title': 'SHETLAND WOOL', 'description': 'md5:c5afca6871ad59b4271e7704fe50ab04', 'duration': 900, 'thumbnail': 're:^https?://.*\.jpg$', }, - 'params': { - # rtmp download - 'skip_download': True, - }, } def _real_extract(self, url): @@ -30,10 +27,9 @@ class SSAIE(InfoExtractor): webpage = self._download_webpage(url, video_id) - streamer = self._search_regex( - r"'streamer'\s*,\S*'(rtmp[^']+)'", webpage, 'streamer') - play_path = self._search_regex( - r"'file'\s*,\s*'([^']+)'", webpage, 'file').rpartition('.')[0] + formats = self._extract_m3u8_formats( + self._html_search_regex(r'file\s*:\s*"([^"]+)"', webpage, 'm3u8 manifest URL'), + video_id, ext='mp4', entry_protocol='m3u8_native') def search_field(field_name, fatal=False): return self._search_regex( @@ -44,13 +40,11 @@ class SSAIE(InfoExtractor): description = unescapeHTML(search_field('Description')) duration = parse_duration(search_field('Running time')) thumbnail = self._search_regex( - r"'image'\s*,\s*'([^']+)'", webpage, 'thumbnails', fatal=False) + r"image\s*:\s*'([^']+)'", webpage, 'thumbnail', fatal=False) return { 'id': video_id, - 'url': streamer, - 'play_path': play_path, - 'ext': 'flv', + 'formats': formats, 'title': title, 'description': description, 'duration': duration, From 4c8ab6fd715249290feab89bbc86eb803b459993 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Thu, 1 Sep 2016 17:04:41 +0800 Subject: [PATCH 0010/1571] [thvideo] Remove extractor. Website down. Closes #10464 According to a screenshot in http://tieba.baidu.com/p/4691302183, thvideo.tv is shut down "temporarily". I see no clues that it will be up again, so I remove it here. --- ChangeLog | 1 + youtube_dl/extractor/extractors.py | 4 -- youtube_dl/extractor/thvideo.py | 84 ------------------------------ 3 files changed, 1 insertion(+), 88 deletions(-) delete mode 100644 youtube_dl/extractor/thvideo.py diff --git a/ChangeLog b/ChangeLog index 877e8112e..2e75c003d 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,6 +1,7 @@ version Extractors +- [thvideo] Remove extractor (#10464) * [movingimage] Fix for the new site name (#10466) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 8d0688f53..459d776b3 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -869,10 +869,6 @@ from .tnaflix import ( MovieFapIE, ) from .toggle import ToggleIE -from .thvideo import ( - THVideoIE, - THVideoPlaylistIE -) from .toutv import TouTvIE from .toypics import ToypicsUserIE, ToypicsIE from .traileraddict import TrailerAddictIE diff --git a/youtube_dl/extractor/thvideo.py b/youtube_dl/extractor/thvideo.py deleted file mode 100644 index 406f4a826..000000000 --- a/youtube_dl/extractor/thvideo.py +++ /dev/null @@ -1,84 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor -from ..utils import ( - unified_strdate -) - - -class THVideoIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?thvideo\.tv/(?:v/th|mobile\.php\?cid=)(?P[0-9]+)' - _TEST = { - 'url': 'http://thvideo.tv/v/th1987/', - 'md5': 'fa107b1f73817e325e9433505a70db50', - 'info_dict': { - 'id': '1987', - 'ext': 'mp4', - 'title': '【动画】秘封活动记录 ~ The Sealed Esoteric History.分镜稿预览', - 'display_id': 'th1987', - 'thumbnail': 'http://thvideo.tv/uploadfile/2014/0722/20140722013459856.jpg', - 'description': '社团京都幻想剧团的第一个东方二次同人动画作品「秘封活动记录 ~ The Sealed Esoteric History.」 本视频是该动画第一期的分镜草稿...', - 'upload_date': '20140722' - } - } - - def _real_extract(self, url): - video_id = self._match_id(url) - - # extract download link from mobile player page - webpage_player = self._download_webpage( - 'http://thvideo.tv/mobile.php?cid=%s-0' % (video_id), - video_id, note='Downloading video source page') - video_url = self._html_search_regex( - r'', webpage, - 'upload date', fatal=False)) - - return { - 'id': video_id, - 'ext': 'mp4', - 'url': video_url, - 'title': title, - 'display_id': display_id, - 'thumbnail': thumbnail, - 'description': description, - 'upload_date': upload_date - } - - -class THVideoPlaylistIE(InfoExtractor): - _VALID_URL = r'http?://(?:www\.)?thvideo\.tv/mylist(?P[0-9]+)' - _TEST = { - 'url': 'http://thvideo.tv/mylist2', - 'info_dict': { - 'id': '2', - 'title': '幻想万華鏡', - }, - 'playlist_mincount': 23, - } - - def _real_extract(self, url): - playlist_id = self._match_id(url) - - webpage = self._download_webpage(url, playlist_id) - list_title = self._html_search_regex( - r'

(.*?)\d+)' + _TEST = { + 'url': 'https://app.curiositystream.com/video/2', + 'md5': 'a0074c190e6cddaf86900b28d3e9ee7a', + 'info_dict': { + 'id': '2', + 'ext': 'mp4', + 'title': 'How Did You Develop The Internet?', + 'description': 'Vint Cerf, Google\'s Chief Internet Evangelist, describes how he and Bob Kahn created the internet.', + 'timestamp': 1448388615, + 'upload_date': '20151124', + } + } + + def _real_extract(self, url): + video_id = self._match_id(url) + media = self._call_api('media/' + video_id, video_id) + return self._extract_media_info(media) + + +class CuriosityStreamCollectionIE(CuriosityStreamBaseIE): + IE_NAME = 'curiositystream:collection' + _VALID_URL = r'https?://app\.curiositystream\.com/collection/(?P\d+)' + _TEST = { + 'url': 'https://app.curiositystream.com/collection/2', + 'info_dict': { + 'id': '2', + 'title': 'Curious Minds: The Internet', + 'description': 'How is the internet shaping our lives in the 21st Century?', + }, + 'playlist_mincount': 17, + } + + def _real_extract(self, url): + collection_id = self._match_id(url) + collection = self._call_api( + 'collections/' + collection_id, collection_id) + entries = [] + for media in collection.get('media', []): + entries.append(self._extract_media_info(media)) + return self.playlist_result( + entries, collection_id, + collection.get('title'), collection.get('description')) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 459d776b3..0c2436b67 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -194,6 +194,10 @@ from .ctsnews import CtsNewsIE from .ctv import CTVIE from .ctvnews import CTVNewsIE from .cultureunplugged import CultureUnpluggedIE +from .curiositystream import ( + CuriosityStreamIE, + CuriosityStreamCollectionIE, +) from .cwtv import CWTVIE from .dailymail import DailyMailIE from .dailymotion import ( From 9250181f37cf0289c02d18ab91203c6181f9cc71 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 1 Sep 2016 21:37:25 +0700 Subject: [PATCH 0012/1571] [extractor/common] Restore NAME usage from EXT-X-MEDIA tag for formats codes in _extract_m3u8_formats (Closes #10522) --- youtube_dl/extractor/common.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index da0af29ec..36d43fd50 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1202,6 +1202,7 @@ class InfoExtractor(object): 'preference': preference, }] last_info = None + last_media = None for line in m3u8_doc.splitlines(): if line.startswith('#EXT-X-STREAM-INF:'): last_info = parse_m3u8_attributes(line) @@ -1224,6 +1225,10 @@ class InfoExtractor(object): 'protocol': entry_protocol, 'preference': preference, }) + else: + # When there is no URI in EXT-X-MEDIA let this tag's + # data be used by regular URI lines below + last_media = media elif line.startswith('#') or not line.strip(): continue else: @@ -1234,13 +1239,14 @@ class InfoExtractor(object): format_id = [] if m3u8_id: format_id.append(m3u8_id) + last_media_name = last_media.get('NAME') if last_media else None + # Despite specification does not mention NAME attribute for + # EXT-X-STREAM-INF it still sometimes may be present + stream_name = last_info.get('NAME') or last_media_name # Bandwidth of live streams may differ over time thus making # format_id unpredictable. So it's better to keep provided # format_id intact. if not live: - # Despite specification does not mention NAME attribute for - # EXT-X-STREAM-INF it still sometimes may be present - stream_name = last_info.get('NAME') format_id.append(stream_name if stream_name else '%d' % (tbr if tbr else len(formats))) f = { 'format_id': '-'.join(format_id), From e816c9d158629ef054c1cc77eecf83043d06fe8c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 1 Sep 2016 22:18:16 +0700 Subject: [PATCH 0013/1571] [extractor/common] Simplify _extract_m3u8_formats --- youtube_dl/extractor/common.py | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 36d43fd50..a9c7a8d16 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1201,8 +1201,8 @@ class InfoExtractor(object): 'protocol': entry_protocol, 'preference': preference, }] - last_info = None - last_media = None + last_info = {} + last_media = {} for line in m3u8_doc.splitlines(): if line.startswith('#EXT-X-STREAM-INF:'): last_info = parse_m3u8_attributes(line) @@ -1232,17 +1232,13 @@ class InfoExtractor(object): elif line.startswith('#') or not line.strip(): continue else: - if last_info is None: - formats.append({'url': format_url(line)}) - continue tbr = int_or_none(last_info.get('AVERAGE-BANDWIDTH') or last_info.get('BANDWIDTH'), scale=1000) format_id = [] if m3u8_id: format_id.append(m3u8_id) - last_media_name = last_media.get('NAME') if last_media else None # Despite specification does not mention NAME attribute for # EXT-X-STREAM-INF it still sometimes may be present - stream_name = last_info.get('NAME') or last_media_name + stream_name = last_info.get('NAME') or last_media.get('NAME') # Bandwidth of live streams may differ over time thus making # format_id unpredictable. So it's better to keep provided # format_id intact. @@ -1275,6 +1271,7 @@ class InfoExtractor(object): f.update(parse_codecs(last_info.get('CODECS'))) formats.append(f) last_info = {} + last_media = {} return formats @staticmethod From f6af0f888b03e8c072b86c04492cc84c966c9f15 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 1 Sep 2016 23:15:01 +0700 Subject: [PATCH 0014/1571] [youporn] Fix categories and tags extraction (Closes #10521) --- youtube_dl/extractor/youporn.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/youtube_dl/extractor/youporn.py b/youtube_dl/extractor/youporn.py index 0df2d76ee..0265a64a7 100644 --- a/youtube_dl/extractor/youporn.py +++ b/youtube_dl/extractor/youporn.py @@ -35,7 +35,7 @@ class YouPornIE(InfoExtractor): 'age_limit': 18, }, }, { - # Anonymous User uploader + # Unknown uploader 'url': 'http://www.youporn.com/watch/561726/big-tits-awesome-brunette-on-amazing-webcam-show/?from=related3&al=2&from_id=561726&pos=4', 'info_dict': { 'id': '561726', @@ -44,7 +44,7 @@ class YouPornIE(InfoExtractor): 'title': 'Big Tits Awesome Brunette On amazing webcam show', 'description': 'http://sweetlivegirls.com Big Tits Awesome Brunette On amazing webcam show.mp4', 'thumbnail': 're:^https?://.*\.jpg$', - 'uploader': 'Anonymous User', + 'uploader': 'Unknown', 'upload_date': '20111125', 'average_rating': int, 'view_count': int, @@ -140,17 +140,17 @@ class YouPornIE(InfoExtractor): r'>All [Cc]omments? \(([\d,.]+)\)', webpage, 'comment count', fatal=False)) - def extract_tag_box(title): - tag_box = self._search_regex( - (r']+class=["\']tagBoxTitle["\'][^>]*>\s*%s\b.*?\s*' - ']+class=["\']tagBoxContent["\']>(.+?)') % re.escape(title), - webpage, '%s tag box' % title, default=None) + def extract_tag_box(regex, title): + tag_box = self._search_regex(regex, webpage, title, default=None) if not tag_box: return [] return re.findall(r']+href=[^>]+>([^<]+)', tag_box) - categories = extract_tag_box('Category') - tags = extract_tag_box('Tags') + categories = extract_tag_box( + r'(?s)Categories:.*?]+>(.+?)', 'categories') + tags = extract_tag_box( + r'(?s)Tags:.*?\s*]+class=["\']tagBoxContent["\'][^>]*>(.+?)', + 'tags') return { 'id': video_id, From 8fb6af6bba201c9f750aadb7b092704195c7f8e0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 1 Sep 2016 23:32:28 +0700 Subject: [PATCH 0015/1571] [exfm] Remove extractor (Closes #10482) --- youtube_dl/extractor/exfm.py | 58 ------------------------------ youtube_dl/extractor/extractors.py | 1 - 2 files changed, 59 deletions(-) delete mode 100644 youtube_dl/extractor/exfm.py diff --git a/youtube_dl/extractor/exfm.py b/youtube_dl/extractor/exfm.py deleted file mode 100644 index 09ed4f2b5..000000000 --- a/youtube_dl/extractor/exfm.py +++ /dev/null @@ -1,58 +0,0 @@ -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor - - -class ExfmIE(InfoExtractor): - IE_NAME = 'exfm' - IE_DESC = 'ex.fm' - _VALID_URL = r'https?://(?:www\.)?ex\.fm/song/(?P[^/]+)' - _SOUNDCLOUD_URL = r'http://(?:www\.)?api\.soundcloud\.com/tracks/([^/]+)/stream' - _TESTS = [ - { - 'url': 'http://ex.fm/song/eh359', - 'md5': 'e45513df5631e6d760970b14cc0c11e7', - 'info_dict': { - 'id': '44216187', - 'ext': 'mp3', - 'title': 'Test House "Love Is Not Enough" (Extended Mix) DeadJournalist Exclusive', - 'uploader': 'deadjournalist', - 'upload_date': '20120424', - 'description': 'Test House \"Love Is Not Enough\" (Extended Mix) DeadJournalist Exclusive', - }, - 'note': 'Soundcloud song', - 'skip': 'The site is down too often', - }, - { - 'url': 'http://ex.fm/song/wddt8', - 'md5': '966bd70741ac5b8570d8e45bfaed3643', - 'info_dict': { - 'id': 'wddt8', - 'ext': 'mp3', - 'title': 'Safe and Sound', - 'uploader': 'Capital Cities', - }, - 'skip': 'The site is down too often', - }, - ] - - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - song_id = mobj.group('id') - info_url = 'http://ex.fm/api/v3/song/%s' % song_id - info = self._download_json(info_url, song_id)['song'] - song_url = info['url'] - if re.match(self._SOUNDCLOUD_URL, song_url) is not None: - self.to_screen('Soundcloud song detected') - return self.url_result(song_url.replace('/stream', ''), 'Soundcloud') - return { - 'id': song_id, - 'url': song_url, - 'ext': 'mp3', - 'title': info['title'], - 'thumbnail': info['image']['large'], - 'uploader': info['artist'], - 'view_count': info['loved_count'], - } diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 0c2436b67..7b59d5db2 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -261,7 +261,6 @@ from .espn import ESPNIE from .esri import EsriVideoIE from .europa import EuropaIE from .everyonesmixtape import EveryonesMixtapeIE -from .exfm import ExfmIE from .expotv import ExpoTVIE from .extremetube import ExtremeTubeIE from .eyedotv import EyedoTVIE From af95ee94b4554449db175ae44060a66c89bd96ed Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 1 Sep 2016 23:38:49 +0700 Subject: [PATCH 0016/1571] [glide] Fix extraction (Closes #10478) --- youtube_dl/extractor/glide.py | 15 +++------------ 1 file changed, 3 insertions(+), 12 deletions(-) diff --git a/youtube_dl/extractor/glide.py b/youtube_dl/extractor/glide.py index 62ff84835..50f698803 100644 --- a/youtube_dl/extractor/glide.py +++ b/youtube_dl/extractor/glide.py @@ -14,10 +14,8 @@ class GlideIE(InfoExtractor): 'info_dict': { 'id': 'UZF8zlmuQbe4mr+7dCiQ0w==', 'ext': 'mp4', - 'title': 'Damon Timm\'s Glide message', + 'title': "Damon's Glide message", 'thumbnail': 're:^https?://.*?\.cloudfront\.net/.*\.jpg$', - 'uploader': 'Damon Timm', - 'upload_date': '20140919', } } @@ -27,7 +25,8 @@ class GlideIE(InfoExtractor): webpage = self._download_webpage(url, video_id) title = self._html_search_regex( - r'(.+?)', webpage, 'title') + r'(.+?)', webpage, + 'title', default=None) or self._og_search_title(webpage) video_url = self._proto_relative_url(self._search_regex( r']+src=(["\'])(?P.+?)\1', webpage, 'video URL', default=None, @@ -36,18 +35,10 @@ class GlideIE(InfoExtractor): r']+id=["\']video-thumbnail["\'][^>]+src=(["\'])(?P.+?)\1', webpage, 'thumbnail url', default=None, group='url')) or self._og_search_thumbnail(webpage) - uploader = self._search_regex( - r']+class=["\']info-name["\'][^>]*>([^<]+)', - webpage, 'uploader', fatal=False) - upload_date = unified_strdate(self._search_regex( - r']+class="info-date"[^>]*>([^<]+)', - webpage, 'upload date', fatal=False)) return { 'id': video_id, 'title': title, 'url': video_url, 'thumbnail': thumbnail, - 'uploader': uploader, - 'upload_date': upload_date, } From 8276d3b87a54f43ca2f47b7709a6557ea979327c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 1 Sep 2016 23:46:15 +0700 Subject: [PATCH 0017/1571] [thestar] Fix extraction (Closes #10465) --- youtube_dl/extractor/thestar.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/thestar.py b/youtube_dl/extractor/thestar.py index ba1380abc..c3f118894 100644 --- a/youtube_dl/extractor/thestar.py +++ b/youtube_dl/extractor/thestar.py @@ -2,8 +2,6 @@ from __future__ import unicode_literals from .common import InfoExtractor -from .brightcove import BrightcoveLegacyIE -from ..compat import compat_parse_qs class TheStarIE(InfoExtractor): @@ -30,6 +28,9 @@ class TheStarIE(InfoExtractor): def _real_extract(self, url): display_id = self._match_id(url) webpage = self._download_webpage(url, display_id) - brightcove_legacy_url = BrightcoveLegacyIE._extract_brightcove_url(webpage) - brightcove_id = compat_parse_qs(brightcove_legacy_url)['@videoPlayer'][0] - return self.url_result(self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id, 'BrightcoveNew', brightcove_id) + brightcove_id = self._search_regex( + r'mainartBrightcoveVideoId["\']?\s*:\s*["\']?(\d+)', + webpage, 'brightcove id') + return self.url_result( + self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id, + 'BrightcoveNew', brightcove_id) From f97ec8bcb95b45d9a657392cd24eabfadb4053e5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 1 Sep 2016 23:46:58 +0700 Subject: [PATCH 0018/1571] [glide] Remove unused import --- youtube_dl/extractor/glide.py | 1 - 1 file changed, 1 deletion(-) diff --git a/youtube_dl/extractor/glide.py b/youtube_dl/extractor/glide.py index 50f698803..f0d951396 100644 --- a/youtube_dl/extractor/glide.py +++ b/youtube_dl/extractor/glide.py @@ -2,7 +2,6 @@ from __future__ import unicode_literals from .common import InfoExtractor -from ..utils import unified_strdate class GlideIE(InfoExtractor): From 4191779dcda8a80faf6e53579e011b63ee5c3878 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Thu, 1 Sep 2016 19:07:41 +0100 Subject: [PATCH 0019/1571] [nytimes] improve extraction --- youtube_dl/extractor/nytimes.py | 93 +++++++++++++++++++++++---------- 1 file changed, 64 insertions(+), 29 deletions(-) diff --git a/youtube_dl/extractor/nytimes.py b/youtube_dl/extractor/nytimes.py index 681683e86..142c34256 100644 --- a/youtube_dl/extractor/nytimes.py +++ b/youtube_dl/extractor/nytimes.py @@ -1,26 +1,37 @@ from __future__ import unicode_literals +import hmac +import hashlib +import base64 + from .common import InfoExtractor from ..utils import ( float_or_none, int_or_none, parse_iso8601, + mimetype2ext, + determine_ext, ) class NYTimesBaseIE(InfoExtractor): + _SECRET = b'pX(2MbU2);4N{7J8)>YwKRJ+/pQ3JkiU2Q^V>mFYv6g6gYvt6v' + def _extract_video_from_id(self, video_id): - video_data = self._download_json( - 'http://www.nytimes.com/svc/video/api/v2/video/%s' % video_id, - video_id, 'Downloading video JSON') + # Authorization generation algorithm is reverse engineered from `signer` in + # http://graphics8.nytimes.com/video/vhs/vhs-2.x.min.js + path = '/svc/video/api/v3/video/' + video_id + hm = hmac.new(self._SECRET, (path + ':vhs').encode(), hashlib.sha512).hexdigest() + video_data = self._download_json('http://www.nytimes.com' + path, video_id, 'Downloading video JSON', headers={ + 'Authorization': 'NYTV ' + base64.b64encode(hm.encode()).decode(), + 'X-NYTV': 'vhs', + }, fatal=False) + if not video_data: + video_data = self._download_json( + 'http://www.nytimes.com/svc/video/api/v2/video/' + video_id, + video_id, 'Downloading video JSON') title = video_data['headline'] - description = video_data.get('summary') - duration = float_or_none(video_data.get('duration'), 1000) - - uploader = video_data.get('byline') - publication_date = video_data.get('publication_date') - timestamp = parse_iso8601(publication_date[:-8]) if publication_date else None def get_file_size(file_size): if isinstance(file_size, int): @@ -28,35 +39,59 @@ class NYTimesBaseIE(InfoExtractor): elif isinstance(file_size, dict): return int(file_size.get('value', 0)) else: - return 0 + return None - formats = [ - { - 'url': video['url'], - 'format_id': video.get('type'), - 'vcodec': video.get('video_codec'), - 'width': int_or_none(video.get('width')), - 'height': int_or_none(video.get('height')), - 'filesize': get_file_size(video.get('fileSize')), - } for video in video_data['renditions'] if video.get('url') - ] + urls = [] + formats = [] + for video in video_data.get('renditions', []): + video_url = video.get('url') + format_id = video.get('type') + if not video_url or format_id == 'thumbs' or video_url in urls: + continue + urls.append(video_url) + ext = mimetype2ext(video.get('mimetype')) or determine_ext(video_url) + if ext == 'm3u8': + formats.extend(self._extract_m3u8_formats( + video_url, video_id, 'mp4', 'm3u8_native', + m3u8_id=format_id or 'hls', fatal=False)) + elif ext == 'mpd': + continue + # formats.extend(self._extract_mpd_formats( + # video_url, video_id, format_id or 'dash', fatal=False)) + else: + formats.append({ + 'url': video_url, + 'format_id': format_id, + 'vcodec': video.get('videoencoding') or video.get('video_codec'), + 'width': int_or_none(video.get('width')), + 'height': int_or_none(video.get('height')), + 'filesize': get_file_size(video.get('file_size') or video.get('fileSize')), + 'tbr': int_or_none(video.get('bitrate'), 1000), + 'ext': ext, + }) self._sort_formats(formats) - thumbnails = [ - { - 'url': 'http://www.nytimes.com/%s' % image['url'], + thumbnails = [] + for image in video_data.get('images', []): + image_url = image.get('url') + if not image_url: + continue + thumbnails.append({ + 'url': 'http://www.nytimes.com/' + image_url, 'width': int_or_none(image.get('width')), 'height': int_or_none(image.get('height')), - } for image in video_data.get('images', []) if image.get('url') - ] + }) + + publication_date = video_data.get('publication_date') + timestamp = parse_iso8601(publication_date[:-8]) if publication_date else None return { 'id': video_id, 'title': title, - 'description': description, + 'description': video_data.get('summary'), 'timestamp': timestamp, - 'uploader': uploader, - 'duration': duration, + 'uploader': video_data.get('byline'), + 'duration': float_or_none(video_data.get('duration'), 1000), 'formats': formats, 'thumbnails': thumbnails, } @@ -67,7 +102,7 @@ class NYTimesIE(NYTimesBaseIE): _TESTS = [{ 'url': 'http://www.nytimes.com/video/opinion/100000002847155/verbatim-what-is-a-photocopier.html?playlistId=100000001150263', - 'md5': '18a525a510f942ada2720db5f31644c0', + 'md5': 'd665342765db043f7e225cff19df0f2d', 'info_dict': { 'id': '100000002847155', 'ext': 'mov', From b207d5ebd4eab80e07673aba9696d240d1009bcf Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Thu, 1 Sep 2016 19:46:58 +0100 Subject: [PATCH 0020/1571] [curiositystream] don't cache auth token --- youtube_dl/extractor/curiositystream.py | 28 +++++++++---------------- 1 file changed, 10 insertions(+), 18 deletions(-) diff --git a/youtube_dl/extractor/curiositystream.py b/youtube_dl/extractor/curiositystream.py index 7105e3c4c..e3c99468c 100644 --- a/youtube_dl/extractor/curiositystream.py +++ b/youtube_dl/extractor/curiositystream.py @@ -33,24 +33,16 @@ class CuriosityStreamBaseIE(InfoExtractor): return result['data'] def _real_initialize(self): - if not self._auth_token: - user = self._downloader.cache.load('curiositystream', 'user') or {} - self._auth_token = user.get('auth_token') - if not self._auth_token: - (email, password) = self._get_login_info() - if email is None: - return - result = self._download_json( - self._API_BASE_URL + 'login', None, data=urlencode_postdata({ - 'email': email, - 'password': password, - })) - self._handle_errors(result) - self._auth_token = result['message']['auth_token'] - self._downloader.cache.store( - 'curiositystream', 'user', { - 'auth_token': self._auth_token, - }) + (email, password) = self._get_login_info() + if email is None: + return + result = self._download_json( + self._API_BASE_URL + 'login', None, data=urlencode_postdata({ + 'email': email, + 'password': password, + })) + self._handle_errors(result) + self._auth_token = result['message']['auth_token'] def _extract_media_info(self, media): video_id = compat_str(media['id']) From 6150502e4709b6b2ebc226c9c38fa346b9358699 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Thu, 1 Sep 2016 22:14:40 +0100 Subject: [PATCH 0021/1571] [adobepass] check for authz_token expiration(#10527) --- youtube_dl/extractor/adobepass.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/adobepass.py b/youtube_dl/extractor/adobepass.py index 9e3a3e362..68ec37e00 100644 --- a/youtube_dl/extractor/adobepass.py +++ b/youtube_dl/extractor/adobepass.py @@ -37,6 +37,10 @@ class AdobePassIE(InfoExtractor): return self._search_regex( '<%s>(.+?)' % (tag, tag), xml_str, tag) + def is_expired(token, date_ele): + token_expires = unified_timestamp(re.sub(r'[_ ]GMT', '', xml_text(token, date_ele))) + return token_expires and token_expires <= int(time.time()) + mvpd_headers = { 'ap_42': 'anonymous', 'ap_11': 'Linux i686', @@ -47,11 +51,8 @@ class AdobePassIE(InfoExtractor): guid = xml_text(resource, 'guid') requestor_info = self._downloader.cache.load('mvpd', requestor_id) or {} authn_token = requestor_info.get('authn_token') - if authn_token: - token_expires = unified_timestamp(re.sub(r'[_ ]GMT', '', xml_text(authn_token, 'simpleTokenExpires'))) - if token_expires and token_expires <= int(time.time()): - authn_token = None - requestor_info = {} + if authn_token and is_expired(authn_token, 'simpleTokenExpires'): + authn_token = None if not authn_token: # TODO add support for other TV Providers mso_id = 'DTV' @@ -98,6 +99,8 @@ class AdobePassIE(InfoExtractor): self._downloader.cache.store('mvpd', requestor_id, requestor_info) authz_token = requestor_info.get(guid) + if authz_token and is_expired(authz_token, 'simpleTokenTTL'): + authz_token = None if not authz_token: authorize = self._download_webpage( self._SERVICE_PROVIDER_TEMPLATE % 'authorize', video_id, From 2c3e0af93e00d7e2e20283be12541aaebabfa1bf Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Fri, 2 Sep 2016 09:53:04 +0100 Subject: [PATCH 0022/1571] [go] Add new extractor --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/go.py | 101 +++++++++++++++++++++++++++++ 2 files changed, 102 insertions(+) create mode 100644 youtube_dl/extractor/go.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 7b59d5db2..2bcd5a0cd 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -318,6 +318,7 @@ from .globo import ( GloboIE, GloboArticleIE, ) +from .go import GoIE from .godtube import GodTubeIE from .godtv import GodTVIE from .golem import GolemIE diff --git a/youtube_dl/extractor/go.py b/youtube_dl/extractor/go.py new file mode 100644 index 000000000..6a437c54d --- /dev/null +++ b/youtube_dl/extractor/go.py @@ -0,0 +1,101 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + int_or_none, + determine_ext, + parse_age_limit, +) + + +class GoIE(InfoExtractor): + _BRANDS = { + 'abc': '001', + 'freeform': '002', + 'watchdisneychannel': '004', + 'watchdisneyjunior': '008', + 'watchdisneyxd': '009', + } + _VALID_URL = r'https?://(?:(?P%s)\.)?go\.com/.*?vdka(?P\w+)' % '|'.join(_BRANDS.keys()) + _TESTS = [{ + 'url': 'http://abc.go.com/shows/castle/video/most-recent/vdka0_g86w5onx', + 'info_dict': { + 'id': '0_g86w5onx', + 'ext': 'mp4', + 'title': 'Sneak Peek: Language Arts', + 'description': 'md5:7dcdab3b2d17e5217c953256af964e9c', + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, + }, { + 'url': 'http://abc.go.com/shows/after-paradise/video/most-recent/vdka3335601', + 'only_matching': True, + }] + + def _real_extract(self, url): + sub_domain, video_id = re.match(self._VALID_URL, url).groups() + video_data = self._download_json( + 'http://api.contents.watchabc.go.com/vp2/ws/contents/3000/videos/%s/001/-1/-1/-1/%s/-1/-1.json' % (self._BRANDS[sub_domain], video_id), + video_id)['video'][0] + title = video_data['title'] + + formats = [] + for asset in video_data.get('assets', {}).get('asset', []): + asset_url = asset.get('value') + if not asset_url: + continue + format_id = asset.get('format') + ext = determine_ext(asset_url) + if ext == 'm3u8': + formats.extend(self._extract_m3u8_formats( + asset_url, video_id, 'mp4', m3u8_id=format_id or 'hls', fatal=False)) + else: + formats.append({ + 'format_id': format_id, + 'url': asset_url, + 'ext': ext, + }) + self._sort_formats(formats) + + subtitles = {} + for cc in video_data.get('closedcaption', {}).get('src', []): + cc_url = cc.get('value') + if not cc_url: + continue + ext = determine_ext(cc_url) + if ext == 'xml': + ext = 'ttml' + subtitles.setdefault(cc.get('lang'), []).append({ + 'url': cc_url, + 'ext': ext, + }) + + thumbnails = [] + for thumbnail in video_data.get('thumbnails', {}).get('thumbnail', []): + thumbnail_url = thumbnail.get('value') + if not thumbnail_url: + continue + thumbnails.append({ + 'url': thumbnail_url, + 'width': int_or_none(thumbnail.get('width')), + 'height': int_or_none(thumbnail.get('height')), + }) + + return { + 'id': video_id, + 'title': title, + 'description': video_data.get('longdescription') or video_data.get('description'), + 'duration': int_or_none(video_data.get('duration', {}).get('value'), 1000), + 'age_limit': parse_age_limit(video_data.get('tvrating', {}).get('rating')), + 'episode_number': int_or_none(video_data.get('episodenumber')), + 'series': video_data.get('show', {}).get('title'), + 'season_number': int_or_none(video_data.get('season', {}).get('num')), + 'thumbnails': thumbnails, + 'formats': formats, + 'subtitles': subtitles, + } From 349fc5c705d6b81ae53d698972f40b1125bee13e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 2 Sep 2016 21:13:50 +0700 Subject: [PATCH 0023/1571] [facebook:plugins:video] Add extractor (Closes #10530) --- youtube_dl/extractor/extractors.py | 5 ++++- youtube_dl/extractor/facebook.py | 29 +++++++++++++++++++++++++++++ 2 files changed, 33 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 2bcd5a0cd..bc616223e 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -264,7 +264,10 @@ from .everyonesmixtape import EveryonesMixtapeIE from .expotv import ExpoTVIE from .extremetube import ExtremeTubeIE from .eyedotv import EyedoTVIE -from .facebook import FacebookIE +from .facebook import ( + FacebookIE, + FacebookPluginsVideoIE, +) from .faz import FazIE from .fc2 import FC2IE from .fczenit import FczenitIE diff --git a/youtube_dl/extractor/facebook.py b/youtube_dl/extractor/facebook.py index 228b0b6d7..3a220e995 100644 --- a/youtube_dl/extractor/facebook.py +++ b/youtube_dl/extractor/facebook.py @@ -351,3 +351,32 @@ class FacebookIE(InfoExtractor): self._VIDEO_PAGE_TEMPLATE % video_id, video_id, fatal_if_no_video=True) return info_dict + + +class FacebookPluginsVideoIE(InfoExtractor): + _VALID_URL = r'https?://(?:[\w-]+\.)?facebook\.com/plugins/video\.php\?.*?\bhref=(?Phttps.+)' + + _TESTS = [{ + 'url': 'https://www.facebook.com/plugins/video.php?href=https%3A%2F%2Fwww.facebook.com%2Fgov.sg%2Fvideos%2F10154383743583686%2F&show_text=0&width=560', + 'md5': '5954e92cdfe51fe5782ae9bda7058a07', + 'info_dict': { + 'id': '10154383743583686', + 'ext': 'mp4', + 'title': 'What to do during the haze?', + 'uploader': 'Gov.sg', + 'upload_date': '20160826', + 'timestamp': 1472184808, + }, + 'add_ie': [FacebookIE.ie_key()], + }, { + 'url': 'https://www.facebook.com/plugins/video.php?href=https%3A%2F%2Fwww.facebook.com%2Fvideo.php%3Fv%3D10204634152394104', + 'only_matching': True, + }, { + 'url': 'https://www.facebook.com/plugins/video.php?href=https://www.facebook.com/gov.sg/videos/10154383743583686/&show_text=0&width=560', + 'only_matching': True, + }] + + def _real_extract(self, url): + return self.url_result( + compat_urllib_parse_unquote(self._match_id(url)), + FacebookIE.ie_key()) From 5e9e3d0f6bf2055c557f360758d6d7eb146edcba Mon Sep 17 00:00:00 2001 From: Sebastian Blunt Date: Fri, 2 Sep 2016 14:48:56 +0200 Subject: [PATCH 0024/1571] [drtv] Add support for dr.dk/nyheder It's the same video player, the only difference is that the video player is loaded differently, and certain metadata (title and description) is not available under dr.dk/mu, so make it by default get that from some of the html meta tags. Skip the dr.dk/tv test dr.dk/tv videos are only available for between 7 and 90 days due to Danish law, and in certain cases may be readded. Skip this test as it is no longer available. --- youtube_dl/extractor/drtv.py | 28 ++++++++++++++++++++++------ 1 file changed, 22 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/drtv.py b/youtube_dl/extractor/drtv.py index 2d74ff855..e210cb610 100644 --- a/youtube_dl/extractor/drtv.py +++ b/youtube_dl/extractor/drtv.py @@ -5,13 +5,14 @@ from .common import InfoExtractor from ..utils import ( ExtractorError, parse_iso8601, + remove_end, ) class DRTVIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?dr\.dk/tv/se/(?:[^/]+/)*(?P[\da-z-]+)(?:[/#?]|$)' + _VALID_URL = r'https?://(?:www\.)?dr\.dk/(?:tv/se|nyheder)/(?:[^/]+/)*(?P[\da-z-]+)(?:[/#?]|$)' - _TEST = { + _TESTS = [{ 'url': 'https://www.dr.dk/tv/se/boern/ultra/panisk-paske/panisk-paske-5', 'md5': 'dc515a9ab50577fa14cc4e4b0265168f', 'info_dict': { @@ -23,7 +24,20 @@ class DRTVIE(InfoExtractor): 'upload_date': '20150322', 'duration': 1455, }, - } + 'skip': 'Video is no longer available', + }, { + 'url': 'https://www.dr.dk/nyheder/indland/live-christianias-rydning-af-pusher-street-er-i-gang', + 'md5': '2ada5074f9e79afc0d324a8e9784d850', + 'info_dict': { + 'id': 'christiania-pusher-street-ryddes-drdkrjpo', + 'ext': 'mp4', + 'title': 'LIVE Christianias rydning af Pusher Street er i gang', + 'description': '- Det er det fedeste, der er sket i 20 år, fortæller christianit til DR Nyheder.', + 'timestamp': 1472800279, + 'upload_date': '20160902', + 'duration': 131.4, + } + }] def _real_extract(self, url): video_id = self._match_id(url) @@ -35,7 +49,8 @@ class DRTVIE(InfoExtractor): 'Video %s is not available' % video_id, expected=True) video_id = self._search_regex( - r'data-(?:material-identifier|episode-slug)="([^"]+)"', + (r'data-(?:material-identifier|episode-slug)="([^"]+)"', + r'data-resource="[^>"]+mu/programcard/expanded/([^"]+)"'), webpage, 'video id') programcard = self._download_json( @@ -43,8 +58,9 @@ class DRTVIE(InfoExtractor): video_id, 'Downloading video JSON') data = programcard['Data'][0] - title = data['Title'] - description = data['Description'] + title = remove_end(self._og_search_title(webpage), ' | TV | DR') or data['Title'] + description = self._og_search_description(webpage) or data['Description'] + timestamp = parse_iso8601(data['CreatedTime']) thumbnail = None From 6562d34a8cbdb93de77a8042f7409ebe31e3e3e4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 2 Sep 2016 22:57:48 +0700 Subject: [PATCH 0025/1571] [utils] Improve mimetype2ext --- test/test_utils.py | 9 +++++++++ youtube_dl/utils.py | 2 +- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/test/test_utils.py b/test/test_utils.py index d16ea7f77..405c5d351 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -39,6 +39,7 @@ from youtube_dl.utils import ( is_html, js_to_json, limit_length, + mimetype2ext, ohdave_rsa_encrypt, OnDemandPagedList, orderedSet, @@ -625,6 +626,14 @@ class TestUtil(unittest.TestCase): limit_length('foo bar baz asd', 12).startswith('foo bar')) self.assertTrue('...' in limit_length('foo bar baz asd', 12)) + def test_mimetype2ext(self): + self.assertEqual(mimetype2ext(None), None) + self.assertEqual(mimetype2ext('video/x-flv'), 'flv') + self.assertEqual(mimetype2ext('application/x-mpegURL'), 'm3u8') + self.assertEqual(mimetype2ext('text/vtt'), 'vtt') + self.assertEqual(mimetype2ext('text/vtt;charset=utf-8'), 'vtt') + self.assertEqual(mimetype2ext('text/html; charset=utf-8'), 'html') + def test_parse_codecs(self): self.assertEqual(parse_codecs(''), {}) self.assertEqual(parse_codecs('avc1.77.30, mp4a.40.2'), { diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 1091f17f3..904f23fd7 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -2148,7 +2148,7 @@ def mimetype2ext(mt): return ext _, _, res = mt.rpartition('/') - res = res.lower() + res = res.split(';')[0].strip().lower() return { '3gpp': '3gp', From 6066d03db02b9c545435b2b8faffe2e0f6c66702 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 2 Sep 2016 23:02:15 +0700 Subject: [PATCH 0026/1571] [drtv] Modernize and make more robust --- youtube_dl/extractor/drtv.py | 53 ++++++++++++++++++++++-------------- 1 file changed, 33 insertions(+), 20 deletions(-) diff --git a/youtube_dl/extractor/drtv.py b/youtube_dl/extractor/drtv.py index e210cb610..7122449a3 100644 --- a/youtube_dl/extractor/drtv.py +++ b/youtube_dl/extractor/drtv.py @@ -4,6 +4,9 @@ from __future__ import unicode_literals from .common import InfoExtractor from ..utils import ( ExtractorError, + int_or_none, + float_or_none, + mimetype2ext, parse_iso8601, remove_end, ) @@ -58,10 +61,12 @@ class DRTVIE(InfoExtractor): video_id, 'Downloading video JSON') data = programcard['Data'][0] - title = remove_end(self._og_search_title(webpage), ' | TV | DR') or data['Title'] - description = self._og_search_description(webpage) or data['Description'] + title = remove_end(self._og_search_title( + webpage, default=None), ' | TV | DR') or data['Title'] + description = self._og_search_description( + webpage, default=None) or data.get('Description') - timestamp = parse_iso8601(data['CreatedTime']) + timestamp = parse_iso8601(data.get('CreatedTime')) thumbnail = None duration = None @@ -72,16 +77,18 @@ class DRTVIE(InfoExtractor): subtitles = {} for asset in data['Assets']: - if asset['Kind'] == 'Image': - thumbnail = asset['Uri'] - elif asset['Kind'] == 'VideoResource': - duration = asset['DurationInMilliseconds'] / 1000.0 - restricted_to_denmark = asset['RestrictedToDenmark'] - spoken_subtitles = asset['Target'] == 'SpokenSubtitles' - for link in asset['Links']: - uri = link['Uri'] - target = link['Target'] - format_id = target + if asset.get('Kind') == 'Image': + thumbnail = asset.get('Uri') + elif asset.get('Kind') == 'VideoResource': + duration = float_or_none(asset.get('DurationInMilliseconds'), 1000) + restricted_to_denmark = asset.get('RestrictedToDenmark') + spoken_subtitles = asset.get('Target') == 'SpokenSubtitles' + for link in asset.get('Links', []): + uri = link.get('Uri') + if not uri: + continue + target = link.get('Target') + format_id = target or '' preference = None if spoken_subtitles: preference = -1 @@ -92,8 +99,8 @@ class DRTVIE(InfoExtractor): video_id, preference, f4m_id=format_id)) elif target == 'HLS': formats.extend(self._extract_m3u8_formats( - uri, video_id, 'mp4', preference=preference, - m3u8_id=format_id)) + uri, video_id, 'mp4', entry_protocol='m3u8_native', + preference=preference, m3u8_id=format_id)) else: bitrate = link.get('Bitrate') if bitrate: @@ -101,7 +108,7 @@ class DRTVIE(InfoExtractor): formats.append({ 'url': uri, 'format_id': format_id, - 'tbr': bitrate, + 'tbr': int_or_none(bitrate), 'ext': link.get('FileFormat'), }) subtitles_list = asset.get('SubtitlesList') @@ -110,12 +117,18 @@ class DRTVIE(InfoExtractor): 'Danish': 'da', } for subs in subtitles_list: - lang = subs['Language'] - subtitles[LANGS.get(lang, lang)] = [{'url': subs['Uri'], 'ext': 'vtt'}] + if not subs.get('Uri'): + continue + lang = subs.get('Language') or 'da' + subtitles.setdefault(LANGS.get(lang, lang), []).append({ + 'url': subs['Uri'], + 'ext': mimetype2ext(subs.get('MimeType')) or 'vtt' + }) if not formats and restricted_to_denmark: - raise ExtractorError( - 'Unfortunately, DR is not allowed to show this program outside Denmark.', expected=True) + self.raise_geo_restricted( + 'Unfortunately, DR is not allowed to show this program outside Denmark.', + expected=True) self._sort_formats(formats) From dacb3a864a8c89edb312cd28c3de1605a5467d0f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 2 Sep 2016 23:43:20 +0700 Subject: [PATCH 0027/1571] [youtube:playlist] Fallback to video extraction for video/playlist URLs when playlist is broken (Closes #10537) --- youtube_dl/extractor/youtube.py | 56 +++++++++++++++++++++++++++++---- 1 file changed, 50 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index d5d5b7334..ea98fbf69 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1841,6 +1841,28 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor): 'id': 'UUXw-G3eDE9trcvY2sBMM_aA', }, 'playlist_mincout': 21, + }, { + # Playlist URL that does not actually serve a playlist + 'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4', + 'info_dict': { + 'id': 'FqZTN594JQw', + 'ext': 'webm', + 'title': "Smiley's People 01 detective, Adventure Series, Action", + 'uploader': 'STREEM', + 'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng', + 'uploader_url': 're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng', + 'upload_date': '20150526', + 'license': 'Standard YouTube License', + 'description': 'md5:507cdcb5a49ac0da37a920ece610be80', + 'categories': ['People & Blogs'], + 'tags': list, + 'like_count': int, + 'dislike_count': int, + }, + 'params': { + 'skip_download': True, + }, + 'add_ie': [YoutubeIE.ie_key()], }] def _real_initialize(self): @@ -1901,9 +1923,20 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor): playlist_title = self._html_search_regex( r'(?s)

]*>\s*(.*?)\s*

', - page, 'title') + page, 'title', default=None) - return self.playlist_result(self._entries(page, playlist_id), playlist_id, playlist_title) + has_videos = True + + if not playlist_title: + try: + # Some playlist URLs don't actually serve a playlist (e.g. + # https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4) + next(self._entries(page, playlist_id)) + except StopIteration: + has_videos = False + + return has_videos, self.playlist_result( + self._entries(page, playlist_id), playlist_id, playlist_title) def _check_download_just_video(self, url, playlist_id): # Check if it's a video-specific URL @@ -1912,9 +1945,11 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor): video_id = query_dict['v'][0] if self._downloader.params.get('noplaylist'): self.to_screen('Downloading just video %s because of --no-playlist' % video_id) - return self.url_result(video_id, 'Youtube', video_id=video_id) + return video_id, self.url_result(video_id, 'Youtube', video_id=video_id) else: self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id)) + return video_id, None + return None, None def _real_extract(self, url): # Extract playlist id @@ -1923,7 +1958,7 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor): raise ExtractorError('Invalid URL: %s' % url) playlist_id = mobj.group(1) or mobj.group(2) - video = self._check_download_just_video(url, playlist_id) + video_id, video = self._check_download_just_video(url, playlist_id) if video: return video @@ -1931,7 +1966,15 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor): # Mixes require a custom extraction process return self._extract_mix(playlist_id) - return self._extract_playlist(playlist_id) + has_videos, playlist = self._extract_playlist(playlist_id) + if has_videos or not video_id: + return playlist + + # Some playlist URLs don't actually serve a playlist (see + # https://github.com/rg3/youtube-dl/issues/10537). + # Fallback to plain video extraction if there is a video id + # along with playlist id. + return self.url_result(video_id, 'Youtube', video_id=video_id) class YoutubeChannelIE(YoutubePlaylistBaseInfoExtractor): @@ -2312,7 +2355,8 @@ class YoutubeWatchLaterIE(YoutubePlaylistIE): video = self._check_download_just_video(url, 'WL') if video: return video - return self._extract_playlist('WL') + _, playlist = self._extract_playlist('WL') + return playlist class YoutubeFavouritesIE(YoutubeBaseInfoExtractor): From c2b2c7e1386056698ee1b0de5427ea90abf8e9c3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 2 Sep 2016 23:50:42 +0700 Subject: [PATCH 0028/1571] [utils] Add quicktime to mimetype2ext --- youtube_dl/utils.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 904f23fd7..ed199c4ad 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -2168,6 +2168,7 @@ def mimetype2ext(mt): 'f4m+xml': 'f4m', 'hds+xml': 'f4m', 'vnd.ms-sstr+xml': 'ism', + 'quicktime': 'mov', }.get(res, res) From 3fcce30289a475901728af7a8dbe85304105b8ac Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 2 Sep 2016 23:53:17 +0700 Subject: [PATCH 0029/1571] [drtv] Update tests --- youtube_dl/extractor/drtv.py | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/youtube_dl/extractor/drtv.py b/youtube_dl/extractor/drtv.py index 7122449a3..88d096b30 100644 --- a/youtube_dl/extractor/drtv.py +++ b/youtube_dl/extractor/drtv.py @@ -16,21 +16,23 @@ class DRTVIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?dr\.dk/(?:tv/se|nyheder)/(?:[^/]+/)*(?P[\da-z-]+)(?:[/#?]|$)' _TESTS = [{ - 'url': 'https://www.dr.dk/tv/se/boern/ultra/panisk-paske/panisk-paske-5', - 'md5': 'dc515a9ab50577fa14cc4e4b0265168f', + 'url': 'https://www.dr.dk/tv/se/boern/ultra/klassen-ultra/klassen-darlig-taber-10', + 'md5': '25e659cccc9a2ed956110a299fdf5983', 'info_dict': { - 'id': 'panisk-paske-5', + 'id': 'klassen-darlig-taber-10', 'ext': 'mp4', - 'title': 'Panisk Påske (5)', - 'description': 'md5:ca14173c5ab24cd26b0fcc074dff391c', - 'timestamp': 1426984612, - 'upload_date': '20150322', - 'duration': 1455, + 'title': 'Klassen - Dårlig taber (10)', + 'description': 'md5:815fe1b7fa656ed80580f31e8b3c79aa', + 'timestamp': 1471991907, + 'upload_date': '20160823', + 'duration': 606.84, + }, + 'params': { + 'skip_download': True, }, - 'skip': 'Video is no longer available', }, { 'url': 'https://www.dr.dk/nyheder/indland/live-christianias-rydning-af-pusher-street-er-i-gang', - 'md5': '2ada5074f9e79afc0d324a8e9784d850', + 'md5': '2c37175c718155930f939ef59952474a', 'info_dict': { 'id': 'christiania-pusher-street-ryddes-drdkrjpo', 'ext': 'mp4', @@ -39,7 +41,7 @@ class DRTVIE(InfoExtractor): 'timestamp': 1472800279, 'upload_date': '20160902', 'duration': 131.4, - } + }, }] def _real_extract(self, url): From 6496ccb41398971373a2f7162a0684dd12f0b56e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 3 Sep 2016 01:17:15 +0700 Subject: [PATCH 0030/1571] [youtube] Add support for rental videos' previews (Closes #10532) --- youtube_dl/extractor/youtube.py | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index ea98fbf69..4c8edef8d 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -844,6 +844,24 @@ class YoutubeIE(YoutubeBaseInfoExtractor): # YouTube Red paid video (https://github.com/rg3/youtube-dl/issues/10059) 'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo', 'only_matching': True, + }, + { + # Rental video preview + 'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg', + 'info_dict': { + 'id': 'uGpuVWrhIzE', + 'ext': 'mp4', + 'title': 'Piku - Trailer', + 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb', + 'upload_date': '20150811', + 'uploader': 'FlixMatrix', + 'uploader_id': 'FlixMatrixKaravan', + 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan', + 'license': 'Standard YouTube License', + }, + 'params': { + 'skip_download': True, + }, } ] @@ -1254,6 +1272,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor): # Convert to the same format returned by compat_parse_qs video_info = dict((k, [v]) for k, v in args.items()) add_dash_mpd(video_info) + # Rental video is not rented but preview is available (e.g. + # https://www.youtube.com/watch?v=yYr8q0y5Jfg, + # https://github.com/rg3/youtube-dl/issues/10532) + if not video_info and args.get('ypc_vid'): + return self.url_result( + args['ypc_vid'], YoutubeIE.ie_key(), video_id=args['ypc_vid']) if args.get('livestream') == '1' or args.get('live_playback') == 1: is_live = True if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True): From 3a7d35b982fac19ca47b87358001379fafbd5731 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 3 Sep 2016 01:42:33 +0700 Subject: [PATCH 0031/1571] Credit @C4K3 for #10536 --- AUTHORS | 1 + 1 file changed, 1 insertion(+) diff --git a/AUTHORS b/AUTHORS index b9a602c12..c4bef040a 100644 --- a/AUTHORS +++ b/AUTHORS @@ -182,3 +182,4 @@ Rob van Bekkum Petr Zvoníček Pratyush Singh Aleksander Nitecki +Sebastian Blunt From 4b3a6076586a38450fa9633480d175a13e33dac7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 3 Sep 2016 01:45:17 +0700 Subject: [PATCH 0032/1571] [ChangeLog] Actualize --- ChangeLog | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/ChangeLog b/ChangeLog index 2e75c003d..eb05fe77e 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,8 +1,32 @@ version +Core +* Restore usage of NAME attribute from EXT-X-MEDIA tag for formats codes in + _extract_m3u8_formats (#10522) +* Handle semicolon in mimetype2ext + + Extractors ++ [youtube] Add support for rental videos' previews (#10532) +* [youtube:playlist] Fallback to video extraction for video/playlist URLs when + no playlist is actually served (#10537) ++ [drtv] Add support for dr.dk/nyheder (#10536) ++ [facebook:plugins:video] Add extractor (#10530) ++ [go] Add extractor for *.go.com sites +* [adobepass] Check for authz_token expiration (#10527) +* [nytimes] improve extraction +* [thestar] Fix extraction (#10465) +* [glide] Fix extraction (#10478) +- [exfm] Remove extractor (#10482) +* [youporn] Fix categories and tags extraction (#10521) ++ [curiositystream] Add extractor for app.curiositystream.com - [thvideo] Remove extractor (#10464) * [movingimage] Fix for the new site name (#10466) ++ [cbs] Add support for once formats (#10515) +* [limelight] Skip ism snd duplicate manifests ++ [porncom] Extract categories and tags (#10510) ++ [facebook] Extract timestamp (#10508) ++ [yahoo] Extract more formats version 2016.08.31 From 86c3bbbcede6efa175f5a93e02511fe32585521f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 3 Sep 2016 01:46:41 +0700 Subject: [PATCH 0033/1571] release 2016.09.03 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- docs/supportedsites.md | 9 +++++---- youtube_dl/version.py | 2 +- 4 files changed, 10 insertions(+), 9 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 2caca5115..fc18e733b 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.08.31*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.08.31** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.09.03*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.09.03** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2016.08.31 +[debug] youtube-dl version 2016.09.03 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index eb05fe77e..68dbeb696 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version +version 2016.09.03 Core * Restore usage of NAME attribute from EXT-X-MEDIA tag for formats codes in diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 42bf291e2..015332bca 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -171,6 +171,8 @@ - **CTVNews** - **culturebox.francetvinfo.fr** - **CultureUnplugged** + - **curiositystream** + - **curiositystream:collection** - **CWTV** - **DailyMail** - **dailymotion** @@ -223,11 +225,11 @@ - **EsriVideo** - **Europa** - **EveryonesMixtape** - - **exfm**: ex.fm - **ExpoTV** - **ExtremeTube** - **EyedoTV** - **facebook** + - **FacebookPluginsVideo** - **faz.net** - **fc2** - **Fczenit** @@ -271,6 +273,7 @@ - **Glide**: Glide mobile video messages (glide.me) - **Globo** - **GloboArticle** + - **Go** - **GodTube** - **GodTV** - **Golem** @@ -406,6 +409,7 @@ - **MovieClips** - **MovieFap** - **Moviezine** + - **MovingImage** - **MPORA** - **MSN** - **mtg**: MTG services @@ -659,7 +663,6 @@ - **sr:mediathek**: Saarländischer Rundfunk - **SRGSSR** - **SRGSSRPlay**: srf.ch, rts.ch, rsi.ch, rtr.ch and swissinfo.ch play sites - - **SSA** - **stanfordoc**: Stanford Open ClassRoom - **Steam** - **Stitcher** @@ -702,8 +705,6 @@ - **TheStar** - **ThisAmericanLife** - **ThisAV** - - **THVideo** - - **THVideoPlaylist** - **tinypic**: tinypic.com videos - **tlc.de** - **TMZ** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index fe442dd88..5be8c0122 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2016.08.31' +__version__ = '2016.09.03' From dedb1770295d214225a3a31b5f99da877cf01eee Mon Sep 17 00:00:00 2001 From: Christian Pointner Date: Sat, 3 Sep 2016 01:50:26 +0200 Subject: [PATCH 0034/1571] Fix parsing of HTML5 media elements This fixes an error in _parse_html5_media_entries in case an audio or video tag directly uses a src attribute insted of elements in it's body. --- youtube_dl/extractor/common.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index a9c7a8d16..a82968162 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1749,7 +1749,7 @@ class InfoExtractor(object): media_attributes = extract_attributes(media_tag) src = media_attributes.get('src') if src: - _, formats = _media_formats(src) + _, formats = _media_formats(src, media_type) media_info['formats'].extend(formats) media_info['thumbnail'] = media_attributes.get('poster') if media_content: From cf0efe96366259a5f0f07ae79280bfa17dc6f6e7 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sat, 3 Sep 2016 17:25:03 +0800 Subject: [PATCH 0035/1571] [fc2:embed] New extractor for Flash player URLs Closes #10512 --- ChangeLog | 6 ++++ youtube_dl/extractor/extractors.py | 5 ++- youtube_dl/extractor/fc2.py | 58 ++++++++++++++++++++++++++---- 3 files changed, 61 insertions(+), 8 deletions(-) diff --git a/ChangeLog b/ChangeLog index 68dbeb696..065fc83a8 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +version + +Extractors ++ [fc2] Recognize Flash player URLs (#10512) + + version 2016.09.03 Core diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index bc616223e..d851e5f36 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -269,7 +269,10 @@ from .facebook import ( FacebookPluginsVideoIE, ) from .faz import FazIE -from .fc2 import FC2IE +from .fc2 import ( + FC2IE, + FC2EmbedIE, +) from .fczenit import FczenitIE from .firstpost import FirstpostIE from .firsttv import FirstTVIE diff --git a/youtube_dl/extractor/fc2.py b/youtube_dl/extractor/fc2.py index c7d69ff1f..b9e58d4df 100644 --- a/youtube_dl/extractor/fc2.py +++ b/youtube_dl/extractor/fc2.py @@ -1,10 +1,12 @@ -#! -*- coding: utf-8 -*- +# coding: utf-8 from __future__ import unicode_literals import hashlib +import re from .common import InfoExtractor from ..compat import ( + compat_parse_qs, compat_urllib_request, compat_urlparse, ) @@ -16,7 +18,7 @@ from ..utils import ( class FC2IE(InfoExtractor): - _VALID_URL = r'^https?://video\.fc2\.com/(?:[^/]+/)*content/(?P[^/]+)' + _VALID_URL = r'^(?:https?://video\.fc2\.com/(?:[^/]+/)*content/|fc2:)(?P[^/]+)' IE_NAME = 'fc2' _NETRC_MACHINE = 'fc2' _TESTS = [{ @@ -75,12 +77,17 @@ class FC2IE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) self._login() - webpage = self._download_webpage(url, video_id) - self._downloader.cookiejar.clear_session_cookies() # must clear - self._login() + webpage = None + if not url.startswith('fc2:'): + webpage = self._download_webpage(url, video_id) + self._downloader.cookiejar.clear_session_cookies() # must clear + self._login() - title = self._og_search_title(webpage) - thumbnail = self._og_search_thumbnail(webpage) + title = 'FC2 video %s' % video_id + thumbnail = None + if webpage is not None: + title = self._og_search_title(webpage) + thumbnail = self._og_search_thumbnail(webpage) refer = url.replace('/content/', '/a/content/') if '/a/content/' not in url else url mimi = hashlib.md5((video_id + '_gGddgPfeaf_gzyr').encode('utf-8')).hexdigest() @@ -113,3 +120,40 @@ class FC2IE(InfoExtractor): 'ext': 'flv', 'thumbnail': thumbnail, } + + +class FC2EmbedIE(InfoExtractor): + _VALID_URL = r'https?://video\.fc2\.com/flv2\.swf\?(?P.+)' + IE_NAME = 'fc2:embed' + + _TEST = { + 'url': 'http://video.fc2.com/flv2.swf?t=201404182936758512407645&i=20130316kwishtfitaknmcgd76kjd864hso93htfjcnaogz629mcgfs6rbfk0hsycma7shkf85937cbchfygd74&i=201403223kCqB3Ez&d=2625&sj=11&lang=ja&rel=1&from=11&cmt=1&tk=TlRBM09EQTNNekU9&tl=プリズン・ブレイク%20S1-01%20マイケル%20【吹替】', + 'md5': 'b8aae5334cb691bdb1193a88a6ab5d5a', + 'info_dict': { + 'id': '201403223kCqB3Ez', + 'ext': 'flv', + 'title': 'プリズン・ブレイク S1-01 マイケル 【吹替】', + 'thumbnail': 're:^https?://.*\.jpg$', + }, + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + query = compat_parse_qs(mobj.group('query')) + + video_id = query['i'][-1] + title = query.get('tl', ['FC2 video %s' % video_id])[0] + + sj = query.get('sj', [None])[0] + thumbnail = None + if sj: + # See thumbnailImagePath() in ServerConst.as of flv2.swf + thumbnail = 'http://video%s-thumbnail.fc2.com/up/pic/%s.jpg' % ( + sj, '/'.join((video_id[:6], video_id[6:8], video_id[-2], video_id[-1], video_id))) + + return { + '_type': 'url_transparent', + 'url': 'fc2:%s' % video_id, + 'title': title, + 'thumbnail': thumbnail, + } From cdc783510bb575b2318b1d7d42fb98f0c0f0df18 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sat, 3 Sep 2016 18:16:19 +0800 Subject: [PATCH 0036/1571] [foxnews:insider] Add new extractor Closes #10445 --- ChangeLog | 1 + youtube_dl/extractor/extractors.py | 5 +++- youtube_dl/extractor/foxnews.py | 48 +++++++++++++++++++++++++++++- 3 files changed, 52 insertions(+), 2 deletions(-) diff --git a/ChangeLog b/ChangeLog index 065fc83a8..199983674 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,6 +1,7 @@ version Extractors ++ [foxnews] Add support for FoxNews Insider (#10445) + [fc2] Recognize Flash player URLs (#10512) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index d851e5f36..8c6ee0503 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -287,7 +287,10 @@ from .formula1 import Formula1IE from .fourtube import FourTubeIE from .fox import FOXIE from .foxgay import FoxgayIE -from .foxnews import FoxNewsIE +from .foxnews import ( + FoxNewsIE, + FoxNewsInsiderIE, +) from .foxsports import FoxSportsIE from .franceculture import FranceCultureIE from .franceinter import FranceInterIE diff --git a/youtube_dl/extractor/foxnews.py b/youtube_dl/extractor/foxnews.py index b04da2415..5c7acd795 100644 --- a/youtube_dl/extractor/foxnews.py +++ b/youtube_dl/extractor/foxnews.py @@ -3,11 +3,12 @@ from __future__ import unicode_literals import re from .amp import AMPIE +from .common import InfoExtractor class FoxNewsIE(AMPIE): IE_DESC = 'Fox News and Fox Business Video' - _VALID_URL = r'https?://(?Pvideo\.fox(?:news|business)\.com)/v/(?:video-embed\.html\?video_id=)?(?P\d+)' + _VALID_URL = r'https?://(?Pvideo\.(?:insider\.)?fox(?:news|business)\.com)/v/(?:video-embed\.html\?video_id=)?(?P\d+)' _TESTS = [ { 'url': 'http://video.foxnews.com/v/3937480/frozen-in-time/#sp=show-clips', @@ -49,6 +50,11 @@ class FoxNewsIE(AMPIE): 'url': 'http://video.foxbusiness.com/v/4442309889001', 'only_matching': True, }, + { + # From http://insider.foxnews.com/2016/08/25/univ-wisconsin-student-group-pushing-silence-certain-words + 'url': 'http://video.insider.foxnews.com/v/video-embed.html?video_id=5099377331001&autoplay=true&share_url=http://insider.foxnews.com/2016/08/25/univ-wisconsin-student-group-pushing-silence-certain-words&share_title=Student%20Group:%20Saying%20%27Politically%20Correct,%27%20%27Trash%27%20and%20%27Lame%27%20Is%20Offensive&share=true', + 'only_matching': True, + }, ] def _real_extract(self, url): @@ -58,3 +64,43 @@ class FoxNewsIE(AMPIE): 'http://%s/v/feed/video/%s.js?template=fox' % (host, video_id)) info['id'] = video_id return info + + +class FoxNewsInsiderIE(InfoExtractor): + _VALID_URL = r'https?://insider\.foxnews\.com/([^/]+/)+(?P[a-z-]+)' + IE_NAME = 'foxnews:insider' + + _TEST = { + 'url': 'http://insider.foxnews.com/2016/08/25/univ-wisconsin-student-group-pushing-silence-certain-words', + 'md5': 'a10c755e582d28120c62749b4feb4c0c', + 'info_dict': { + 'id': '5099377331001', + 'display_id': 'univ-wisconsin-student-group-pushing-silence-certain-words', + 'ext': 'mp4', + 'title': 'Student Group: Saying \'Politically Correct,\' \'Trash\' and \'Lame\' Is Offensive', + 'description': 'Is campus censorship getting out of control?', + 'timestamp': 1472168725, + 'upload_date': '20160825', + 'thumbnail': 're:^https?://.*\.jpg$', + }, + 'add_ie': [FoxNewsIE.ie_key()], + } + + def _real_extract(self, url): + display_id = self._match_id(url) + + webpage = self._download_webpage(url, display_id) + + embed_url = self._html_search_meta('embedUrl', webpage, 'embed URL') + + title = self._og_search_title(webpage) + description = self._og_search_description(webpage) + + return { + '_type': 'url_transparent', + 'ie_key': FoxNewsIE.ie_key(), + 'url': embed_url, + 'display_id': display_id, + 'title': title, + 'description': description, + } From ed2bfe93aaa11f49f7b2b92b581abb6aa385dfbf Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sat, 3 Sep 2016 18:22:00 +0800 Subject: [PATCH 0037/1571] [fc2:embed] Add ie_key --- youtube_dl/extractor/fc2.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/fc2.py b/youtube_dl/extractor/fc2.py index b9e58d4df..c032d4d02 100644 --- a/youtube_dl/extractor/fc2.py +++ b/youtube_dl/extractor/fc2.py @@ -153,6 +153,7 @@ class FC2EmbedIE(InfoExtractor): return { '_type': 'url_transparent', + 'ie_key': FC2IE.ie_key(), 'url': 'fc2:%s' % video_id, 'title': title, 'thumbnail': thumbnail, From 45aab4d30b7c3fc03c9be9680550cba88bd85b5c Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sat, 3 Sep 2016 18:37:36 +0800 Subject: [PATCH 0038/1571] [youjizz] Fix extraction. The site has moved to HTML5 Closes #10437 --- ChangeLog | 1 + youtube_dl/extractor/youjizz.py | 43 +++++++-------------------------- 2 files changed, 10 insertions(+), 34 deletions(-) diff --git a/ChangeLog b/ChangeLog index 199983674..2809e55d7 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,6 +1,7 @@ version Extractors +* [youjizz] Fix extraction (#10437) + [foxnews] Add support for FoxNews Insider (#10445) + [fc2] Recognize Flash player URLs (#10512) diff --git a/youtube_dl/extractor/youjizz.py b/youtube_dl/extractor/youjizz.py index 31e2f9263..b50f34e9b 100644 --- a/youtube_dl/extractor/youjizz.py +++ b/youtube_dl/extractor/youjizz.py @@ -1,21 +1,16 @@ from __future__ import unicode_literals -import re - from .common import InfoExtractor -from ..utils import ( - ExtractorError, -) class YouJizzIE(InfoExtractor): _VALID_URL = r'https?://(?:\w+\.)?youjizz\.com/videos/(?:[^/#?]+)?-(?P[0-9]+)\.html(?:$|[?#])' _TESTS = [{ 'url': 'http://www.youjizz.com/videos/zeichentrick-1-2189178.html', - 'md5': '07e15fa469ba384c7693fd246905547c', + 'md5': '78fc1901148284c69af12640e01c6310', 'info_dict': { 'id': '2189178', - 'ext': 'flv', + 'ext': 'mp4', 'title': 'Zeichentrick 1', 'age_limit': 18, } @@ -27,38 +22,18 @@ class YouJizzIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) + # YouJizz's HTML5 player has invalid HTML + webpage = webpage.replace('"controls', '" controls') age_limit = self._rta_search(webpage) video_title = self._html_search_regex( r'\s*(.*)\s*', webpage, 'title') - embed_page_url = self._search_regex( - r'(https?://www.youjizz.com/videos/embed/[0-9]+)', - webpage, 'embed page') - webpage = self._download_webpage( - embed_page_url, video_id, note='downloading embed page') + info_dict = self._parse_html5_media_entries(url, webpage, video_id)[0] - # Get the video URL - m_playlist = re.search(r'so.addVariable\("playlist", ?"(?P.+?)"\);', webpage) - if m_playlist is not None: - playlist_url = m_playlist.group('playlist') - playlist_page = self._download_webpage(playlist_url, video_id, - 'Downloading playlist page') - m_levels = list(re.finditer(r'[^"]+)"\)\);', - webpage, 'video URL') - - return { + info_dict.update({ 'id': video_id, - 'url': video_url, 'title': video_title, - 'ext': 'flv', - 'format': 'flv', - 'player_url': embed_page_url, 'age_limit': age_limit, - } + }) + + return info_dict From 9603b6601208333bc49e0c69199f0e652a7aaea3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 27 Aug 2016 04:52:18 +0700 Subject: [PATCH 0039/1571] Introduce --skip-unavailable-fragments --- youtube_dl/__init__.py | 1 + youtube_dl/downloader/fragment.py | 10 ++++++++-- youtube_dl/options.py | 10 +++++++++- 3 files changed, 18 insertions(+), 3 deletions(-) diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index a9730292c..42128272a 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -318,6 +318,7 @@ def _real_main(argv=None): 'nooverwrites': opts.nooverwrites, 'retries': opts.retries, 'fragment_retries': opts.fragment_retries, + 'skip_unavailable_fragments': opts.skip_unavailable_fragments, 'buffersize': opts.buffersize, 'noresizebuffer': opts.noresizebuffer, 'continuedl': opts.continue_dl, diff --git a/youtube_dl/downloader/fragment.py b/youtube_dl/downloader/fragment.py index ba903ae10..b4a798f8f 100644 --- a/youtube_dl/downloader/fragment.py +++ b/youtube_dl/downloader/fragment.py @@ -22,14 +22,20 @@ class FragmentFD(FileDownloader): Available options: - fragment_retries: Number of times to retry a fragment for HTTP error (DASH only) + fragment_retries: Number of times to retry a fragment for HTTP error (DASH + and hlsnative only) + skip_unavailable_fragments: + Skip unavailable fragments (DASH and hlsnative only) """ def report_retry_fragment(self, fragment_name, count, retries): self.to_screen( - '[download] Got server HTTP error. Retrying fragment %s (attempt %d of %s)...' + '[download] Got server HTTP error: %s. Retrying fragment %s (attempt %d of %s)...' % (fragment_name, count, self.format_retries(retries))) + def report_skip_fragment(self, fragment_name): + self.to_screen('[download] Skipping fragment %s...' % fragment_name) + def _prepare_and_start_frag_download(self, ctx): self._prepare_frag_download(ctx) self._start_frag_download(ctx) diff --git a/youtube_dl/options.py b/youtube_dl/options.py index 5d62deef4..56f312f57 100644 --- a/youtube_dl/options.py +++ b/youtube_dl/options.py @@ -423,7 +423,15 @@ def parseOpts(overrideArguments=None): downloader.add_option( '--fragment-retries', dest='fragment_retries', metavar='RETRIES', default=10, - help='Number of retries for a fragment (default is %default), or "infinite" (DASH only)') + help='Number of retries for a fragment (default is %default), or "infinite" (DASH and hlsnative only)') + downloader.add_option( + '--skip-unavailable-fragments', + action='store_true', dest='skip_unavailable_fragments', default=True, + help='Skip unavailable fragments (DASH and hlsnative only)') + general.add_option( + '--abort-on-unavailable-fragment', + action='store_false', dest='skip_unavailable_fragments', + help='Abort downloading when some fragment is not available') downloader.add_option( '--buffer-size', dest='buffersize', metavar='SIZE', default='1024', From 25afc2a7830e281e849609202b4f70728664bdb7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 27 Aug 2016 04:55:55 +0700 Subject: [PATCH 0040/1571] [downloader/dash:hls] Respect --fragment-retries and --skip-unavailable-fragments (Closes #10165, closes #10448) --- youtube_dl/downloader/dash.py | 12 +++++----- youtube_dl/downloader/hls.py | 41 +++++++++++++++++++++++++++++------ 2 files changed, 41 insertions(+), 12 deletions(-) diff --git a/youtube_dl/downloader/dash.py b/youtube_dl/downloader/dash.py index 8bbab9dbc..cbcee324d 100644 --- a/youtube_dl/downloader/dash.py +++ b/youtube_dl/downloader/dash.py @@ -38,6 +38,7 @@ class DashSegmentsFD(FragmentFD): segments_filenames = [] fragment_retries = self.params.get('fragment_retries', 0) + skip_unavailable_fragments = self.params.get('skip_unavailable_fragments', True) def append_url_to_file(target_url, tmp_filename, segment_name): target_filename = '%s-%s' % (tmp_filename, segment_name) @@ -52,19 +53,20 @@ class DashSegmentsFD(FragmentFD): down.close() segments_filenames.append(target_sanitized) break - except (compat_urllib_error.HTTPError, ) as err: + except compat_urllib_error.HTTPError: # YouTube may often return 404 HTTP error for a fragment causing the # whole download to fail. However if the same fragment is immediately # retried with the same request data this usually succeeds (1-2 attemps # is usually enough) thus allowing to download the whole file successfully. - # So, we will retry all fragments that fail with 404 HTTP error for now. - if err.code != 404: - raise - # Retry fragment + # To be future-proof we will retry all fragments that fail with any + # HTTP error. count += 1 if count <= fragment_retries: self.report_retry_fragment(segment_name, count, fragment_retries) if count > fragment_retries: + if skip_unavailable_fragments: + self.report_skip_fragment(segment_name) + return self.report_error('giving up after %s fragment retries' % fragment_retries) return False diff --git a/youtube_dl/downloader/hls.py b/youtube_dl/downloader/hls.py index baaff44d5..7412620a5 100644 --- a/youtube_dl/downloader/hls.py +++ b/youtube_dl/downloader/hls.py @@ -13,6 +13,7 @@ from .fragment import FragmentFD from .external import FFmpegFD from ..compat import ( + compat_urllib_error, compat_urlparse, compat_struct_pack, ) @@ -83,6 +84,10 @@ class HlsFD(FragmentFD): self._prepare_and_start_frag_download(ctx) + fragment_retries = self.params.get('fragment_retries', 0) + skip_unavailable_fragments = self.params.get('skip_unavailable_fragments', True) + test = self.params.get('test', False) + extra_query = None extra_param_to_segment_url = info_dict.get('extra_param_to_segment_url') if extra_param_to_segment_url: @@ -99,15 +104,37 @@ class HlsFD(FragmentFD): line if re.match(r'^https?://', line) else compat_urlparse.urljoin(man_url, line)) - frag_filename = '%s-Frag%d' % (ctx['tmpfilename'], i) + frag_name = 'Frag%d' % i + frag_filename = '%s-%s' % (ctx['tmpfilename'], frag_name) if extra_query: frag_url = update_url_query(frag_url, extra_query) - success = ctx['dl'].download(frag_filename, {'url': frag_url}) - if not success: + count = 0 + while count <= fragment_retries: + try: + success = ctx['dl'].download(frag_filename, {'url': frag_url}) + if not success: + return False + down, frag_sanitized = sanitize_open(frag_filename, 'rb') + frag_content = down.read() + down.close() + break + except compat_urllib_error.HTTPError: + # Unavailable (possibly temporary) fragments may be served. + # First we try to retry then either skip or abort. + # See https://github.com/rg3/youtube-dl/issues/10165, + # https://github.com/rg3/youtube-dl/issues/10448). + count += 1 + if count <= fragment_retries: + self.report_retry_fragment(frag_name, count, fragment_retries) + if count > fragment_retries: + if skip_unavailable_fragments: + i += 1 + media_sequence += 1 + self.report_skip_fragment(frag_name) + continue + self.report_error( + 'giving up after %s fragment retries' % fragment_retries) return False - down, frag_sanitized = sanitize_open(frag_filename, 'rb') - frag_content = down.read() - down.close() if decrypt_info['METHOD'] == 'AES-128': iv = decrypt_info.get('IV') or compat_struct_pack('>8xq', media_sequence) frag_content = AES.new( @@ -115,7 +142,7 @@ class HlsFD(FragmentFD): ctx['dest_stream'].write(frag_content) frags_filenames.append(frag_sanitized) # We only download the first fragment during the test - if self.params.get('test', False): + if test: break i += 1 media_sequence += 1 From 2e99cd30c3108fd8da6a9f9fadfa89852c8d8826 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 27 Aug 2016 04:57:59 +0700 Subject: [PATCH 0041/1571] [downloader/dash:hls] Report exact fragment error on retry --- youtube_dl/downloader/dash.py | 4 ++-- youtube_dl/downloader/fragment.py | 5 +++-- youtube_dl/downloader/hls.py | 4 ++-- 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/youtube_dl/downloader/dash.py b/youtube_dl/downloader/dash.py index cbcee324d..e087cf142 100644 --- a/youtube_dl/downloader/dash.py +++ b/youtube_dl/downloader/dash.py @@ -53,7 +53,7 @@ class DashSegmentsFD(FragmentFD): down.close() segments_filenames.append(target_sanitized) break - except compat_urllib_error.HTTPError: + except compat_urllib_error.HTTPError as err: # YouTube may often return 404 HTTP error for a fragment causing the # whole download to fail. However if the same fragment is immediately # retried with the same request data this usually succeeds (1-2 attemps @@ -62,7 +62,7 @@ class DashSegmentsFD(FragmentFD): # HTTP error. count += 1 if count <= fragment_retries: - self.report_retry_fragment(segment_name, count, fragment_retries) + self.report_retry_fragment(err, segment_name, count, fragment_retries) if count > fragment_retries: if skip_unavailable_fragments: self.report_skip_fragment(segment_name) diff --git a/youtube_dl/downloader/fragment.py b/youtube_dl/downloader/fragment.py index b4a798f8f..84aacf7db 100644 --- a/youtube_dl/downloader/fragment.py +++ b/youtube_dl/downloader/fragment.py @@ -6,6 +6,7 @@ import time from .common import FileDownloader from .http import HttpFD from ..utils import ( + error_to_compat_str, encodeFilename, sanitize_open, ) @@ -28,10 +29,10 @@ class FragmentFD(FileDownloader): Skip unavailable fragments (DASH and hlsnative only) """ - def report_retry_fragment(self, fragment_name, count, retries): + def report_retry_fragment(self, err, fragment_name, count, retries): self.to_screen( '[download] Got server HTTP error: %s. Retrying fragment %s (attempt %d of %s)...' - % (fragment_name, count, self.format_retries(retries))) + % (error_to_compat_str(err), fragment_name, count, self.format_retries(retries))) def report_skip_fragment(self, fragment_name): self.to_screen('[download] Skipping fragment %s...' % fragment_name) diff --git a/youtube_dl/downloader/hls.py b/youtube_dl/downloader/hls.py index 7412620a5..5d70abf62 100644 --- a/youtube_dl/downloader/hls.py +++ b/youtube_dl/downloader/hls.py @@ -118,14 +118,14 @@ class HlsFD(FragmentFD): frag_content = down.read() down.close() break - except compat_urllib_error.HTTPError: + except compat_urllib_error.HTTPError as err: # Unavailable (possibly temporary) fragments may be served. # First we try to retry then either skip or abort. # See https://github.com/rg3/youtube-dl/issues/10165, # https://github.com/rg3/youtube-dl/issues/10448). count += 1 if count <= fragment_retries: - self.report_retry_fragment(frag_name, count, fragment_retries) + self.report_retry_fragment(err, frag_name, count, fragment_retries) if count > fragment_retries: if skip_unavailable_fragments: i += 1 From 4a69fa04e0074a3d5938ffb03decff9cc33f5d3d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 30 Aug 2016 22:28:14 +0700 Subject: [PATCH 0042/1571] [downloader/dash] Abort download immediately after giving up on some fragment --- youtube_dl/downloader/dash.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/youtube_dl/downloader/dash.py b/youtube_dl/downloader/dash.py index e087cf142..efeae02a3 100644 --- a/youtube_dl/downloader/dash.py +++ b/youtube_dl/downloader/dash.py @@ -66,14 +66,17 @@ class DashSegmentsFD(FragmentFD): if count > fragment_retries: if skip_unavailable_fragments: self.report_skip_fragment(segment_name) - return + return True self.report_error('giving up after %s fragment retries' % fragment_retries) return False + return True if initialization_url: - append_url_to_file(initialization_url, ctx['tmpfilename'], 'Init') + if not append_url_to_file(initialization_url, ctx['tmpfilename'], 'Init'): + return False for i, segment_url in enumerate(segment_urls): - append_url_to_file(segment_url, ctx['tmpfilename'], 'Seg%d' % i) + if not append_url_to_file(segment_url, ctx['tmpfilename'], 'Seg%d' % i): + return False self._finish_frag_download(ctx) From 7e5dc339de14547aa7b489e88b4c456ec613ba9d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 4 Sep 2016 00:29:01 +0700 Subject: [PATCH 0043/1571] [youtube:watchlater] Fix extraction (Closes #10544) --- youtube_dl/extractor/youtube.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 4c8edef8d..0bc85af74 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -2376,7 +2376,7 @@ class YoutubeWatchLaterIE(YoutubePlaylistIE): }] def _real_extract(self, url): - video = self._check_download_just_video(url, 'WL') + _, video = self._check_download_just_video(url, 'WL') if video: return video _, playlist = self._extract_playlist('WL') From 091624f9da491ef3a98e63367bf4ffd9836dafde Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 4 Sep 2016 03:39:13 +0700 Subject: [PATCH 0044/1571] [vimple] Extend _VALID_URL (Closes #10547) --- youtube_dl/extractor/vimple.py | 35 +++++++++++++++++----------------- 1 file changed, 18 insertions(+), 17 deletions(-) diff --git a/youtube_dl/extractor/vimple.py b/youtube_dl/extractor/vimple.py index 92321d66e..7fd9b777b 100644 --- a/youtube_dl/extractor/vimple.py +++ b/youtube_dl/extractor/vimple.py @@ -28,23 +28,24 @@ class SprutoBaseIE(InfoExtractor): class VimpleIE(SprutoBaseIE): IE_DESC = 'Vimple - one-click video hosting' - _VALID_URL = r'https?://(?:player\.vimple\.ru/iframe|vimple\.ru)/(?P[\da-f-]{32,36})' - _TESTS = [ - { - 'url': 'http://vimple.ru/c0f6b1687dcd4000a97ebe70068039cf', - 'md5': '2e750a330ed211d3fd41821c6ad9a279', - 'info_dict': { - 'id': 'c0f6b168-7dcd-4000-a97e-be70068039cf', - 'ext': 'mp4', - 'title': 'Sunset', - 'duration': 20, - 'thumbnail': 're:https?://.*?\.jpg', - }, - }, { - 'url': 'http://player.vimple.ru/iframe/52e1beec-1314-4a83-aeac-c61562eadbf9', - 'only_matching': True, - } - ] + _VALID_URL = r'https?://(?:player\.vimple\.(?:ru|co)/iframe|vimple\.(?:ru|co))/(?P[\da-f-]{32,36})' + _TESTS = [{ + 'url': 'http://vimple.ru/c0f6b1687dcd4000a97ebe70068039cf', + 'md5': '2e750a330ed211d3fd41821c6ad9a279', + 'info_dict': { + 'id': 'c0f6b168-7dcd-4000-a97e-be70068039cf', + 'ext': 'mp4', + 'title': 'Sunset', + 'duration': 20, + 'thumbnail': 're:https?://.*?\.jpg', + }, + }, { + 'url': 'http://player.vimple.ru/iframe/52e1beec-1314-4a83-aeac-c61562eadbf9', + 'only_matching': True, + }, { + 'url': 'http://vimple.co/04506a053f124483b8fb05ed73899f19', + 'only_matching': True, + }] def _real_extract(self, url): video_id = self._match_id(url) From 37c7490ac62d4aacbf9103bf6760d20f21984a55 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 4 Sep 2016 04:59:46 +0700 Subject: [PATCH 0045/1571] [espn] Extend _VALID_URL (Closes #10549) --- youtube_dl/extractor/espn.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/espn.py b/youtube_dl/extractor/espn.py index 66c08bec4..6d10f8e68 100644 --- a/youtube_dl/extractor/espn.py +++ b/youtube_dl/extractor/espn.py @@ -5,7 +5,7 @@ from ..utils import remove_end class ESPNIE(InfoExtractor): - _VALID_URL = r'https?://espn\.go\.com/(?:[^/]+/)*(?P[^/]+)' + _VALID_URL = r'https?://(?:espn\.go|(?:www\.)?espn)\.com/(?:[^/]+/)*(?P[^/]+)' _TESTS = [{ 'url': 'http://espn.go.com/video/clip?id=10365079', 'md5': '60e5d097a523e767d06479335d1bdc58', @@ -47,6 +47,9 @@ class ESPNIE(InfoExtractor): }, { 'url': 'http://espn.go.com/nba/playoffs/2015/story/_/id/12887571/john-wall-washington-wizards-no-swelling-left-hand-wrist-game-5-return', 'only_matching': True, + }, { + 'url': 'http://www.espn.com/video/clip?id=10365079', + 'only_matching': True, }] def _real_extract(self, url): From 622638512b8241c39837b634e75c44cf9105a299 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sun, 4 Sep 2016 16:25:59 +0800 Subject: [PATCH 0046/1571] [rottentomatoes] Fix extraction Closes #10467 --- ChangeLog | 1 + youtube_dl/extractor/rottentomatoes.py | 30 +++++++++++++++++++------- 2 files changed, 23 insertions(+), 8 deletions(-) diff --git a/ChangeLog b/ChangeLog index 2809e55d7..e6a2d24e1 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,6 +1,7 @@ version Extractors +* [rottentomatoes] Fix extraction (#10467) * [youjizz] Fix extraction (#10437) + [foxnews] Add support for FoxNews Insider (#10445) + [fc2] Recognize Flash player URLs (#10512) diff --git a/youtube_dl/extractor/rottentomatoes.py b/youtube_dl/extractor/rottentomatoes.py index f9cd48790..df39ed3f2 100644 --- a/youtube_dl/extractor/rottentomatoes.py +++ b/youtube_dl/extractor/rottentomatoes.py @@ -1,8 +1,7 @@ from __future__ import unicode_literals from .common import InfoExtractor -from ..compat import compat_urlparse -from .internetvideoarchive import InternetVideoArchiveIE +from ..utils import js_to_json class RottenTomatoesIE(InfoExtractor): @@ -11,21 +10,36 @@ class RottenTomatoesIE(InfoExtractor): _TEST = { 'url': 'http://www.rottentomatoes.com/m/toy_story_3/trailers/11028566/', 'info_dict': { - 'id': '613340', + 'id': '11028566', 'ext': 'mp4', 'title': 'Toy Story 3', + 'thumbnail': 're:^https?://.*\.jpg$', }, } def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - og_video = self._og_search_video_url(webpage) - query = compat_urlparse.urlparse(og_video).query + + params = self._parse_json( + self._search_regex(r'(?s)RTVideo\(({.+?})\);', webpage, 'player parameters'), + video_id, transform_source=lambda s: js_to_json(s.replace('window.location.href', '""'))) + + formats = [] + if params.get('urlHLS'): + formats.extend(self._extract_m3u8_formats( + params['urlHLS'], video_id, ext='mp4', + entry_protocol='m3u8_native', m3u8_id='hls', fatal=False)) + if params.get('urlMP4'): + formats.append({ + 'url': params['urlMP4'], + 'format_id': 'mp4', + }) + self._sort_formats(formats) return { - '_type': 'url_transparent', - 'url': InternetVideoArchiveIE._build_xml_url(query), - 'ie_key': InternetVideoArchiveIE.ie_key(), + 'id': video_id, 'title': self._og_search_title(webpage), + 'formats': formats, + 'thumbnail': params.get('thumbnailImg'), } From b29cd56591f1ef001d9f30bdff87789815f1fa0c Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sun, 4 Sep 2016 17:01:39 +0800 Subject: [PATCH 0047/1571] [pornovoisines] Fix extraction (closes #10469) --- ChangeLog | 1 + youtube_dl/extractor/pornovoisines.py | 80 +++++++++++++++------------ 2 files changed, 47 insertions(+), 34 deletions(-) diff --git a/ChangeLog b/ChangeLog index e6a2d24e1..616b55803 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,6 +1,7 @@ version Extractors +* [pornvoisines] Fix extraction (#10469) * [rottentomatoes] Fix extraction (#10467) * [youjizz] Fix extraction (#10437) + [foxnews] Add support for FoxNews Insider (#10445) diff --git a/youtube_dl/extractor/pornovoisines.py b/youtube_dl/extractor/pornovoisines.py index 6b51e5c54..58f557e39 100644 --- a/youtube_dl/extractor/pornovoisines.py +++ b/youtube_dl/extractor/pornovoisines.py @@ -2,7 +2,6 @@ from __future__ import unicode_literals import re -import random from .common import InfoExtractor from ..utils import ( @@ -13,61 +12,69 @@ from ..utils import ( class PornoVoisinesIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?pornovoisines\.com/showvideo/(?P\d+)/(?P[^/]+)' - - _VIDEO_URL_TEMPLATE = 'http://stream%d.pornovoisines.com' \ - '/static/media/video/transcoded/%s-640x360-1000-trscded.mp4' - - _SERVER_NUMBERS = (1, 2) + _VALID_URL = r'https?://(?:www\.)?pornovoisines\.com/videos/show/(?P\d+)/(?P[^/.]+)' _TEST = { - 'url': 'http://www.pornovoisines.com/showvideo/1285/recherche-appartement/', - 'md5': '5ac670803bc12e9e7f9f662ce64cf1d1', + 'url': 'http://www.pornovoisines.com/videos/show/919/recherche-appartement.html', + 'md5': '6f8aca6a058592ab49fe701c8ba8317b', 'info_dict': { - 'id': '1285', + 'id': '919', 'display_id': 'recherche-appartement', 'ext': 'mp4', 'title': 'Recherche appartement', - 'description': 'md5:819ea0b785e2a04667a1a01cdc89594e', + 'description': 'md5:fe10cb92ae2dd3ed94bb4080d11ff493', 'thumbnail': 're:^https?://.*\.jpg$', 'upload_date': '20140925', 'duration': 120, 'view_count': int, 'average_rating': float, - 'categories': ['Débutantes', 'Scénario', 'Sodomie'], + 'categories': ['Débutante', 'Débutantes', 'Scénario', 'Sodomie'], 'age_limit': 18, + 'subtitles': { + 'fr': [{ + 'ext': 'vtt', + }] + }, } } - @classmethod - def build_video_url(cls, num): - return cls._VIDEO_URL_TEMPLATE % (random.choice(cls._SERVER_NUMBERS), num) - def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) video_id = mobj.group('id') display_id = mobj.group('display_id') + settings_url = self._download_json( + 'http://www.pornovoisines.com/api/video/%s/getsettingsurl/' % video_id, + video_id, note='Getting settings URL')['video_settings_url'] + settings = self._download_json(settings_url, video_id)['data'] + + formats = [] + for kind, data in settings['variants'].items(): + if kind == 'HLS': + formats.extend(self._extract_m3u8_formats( + data, video_id, ext='mp4', entry_protocol='m3u8_native', m3u8_id='hls')) + elif kind == 'MP4': + for item in data: + formats.append({ + 'url': item['url'], + 'height': item.get('height'), + 'bitrate': item.get('bitrate'), + }) + self._sort_formats(formats) + webpage = self._download_webpage(url, video_id) - video_url = self.build_video_url(video_id) + title = self._og_search_title(webpage) + description = self._og_search_description(webpage) - title = self._html_search_regex( - r'

(.+?)

', webpage, 'title', flags=re.DOTALL) - description = self._html_search_regex( - r'
(.+?)
', - webpage, 'description', fatal=False, flags=re.DOTALL) - - thumbnail = self._search_regex( - r'
\s*]+class=([\'"])thumb\1[^>]*src=([\'"])(?P[^"]+)\2', + webpage, 'thumbnail', fatal=False, group='url') upload_date = unified_strdate(self._search_regex( - r'Publié le ([\d-]+)', webpage, 'upload date', fatal=False)) - duration = int_or_none(self._search_regex( - 'Durée (\d+)', webpage, 'duration', fatal=False)) + r'Le\s*([\d/]+)', webpage, 'upload date', fatal=False)) + duration = settings.get('main', {}).get('duration') view_count = int_or_none(self._search_regex( r'(\d+) vues', webpage, 'view count', fatal=False)) average_rating = self._search_regex( @@ -75,15 +82,19 @@ class PornoVoisinesIE(InfoExtractor): if average_rating: average_rating = float_or_none(average_rating.replace(',', '.')) - categories = self._html_search_meta( - 'keywords', webpage, 'categories', fatal=False) + categories = self._html_search_regex( + r'(?s)Catégories\s*:\s*(.+?)', webpage, 'categories', fatal=False) if categories: categories = [category.strip() for category in categories.split(',')] + subtitles = {'fr': [{ + 'url': subtitle, + } for subtitle in settings.get('main', {}).get('vtt_tracks', {}).values()]} + return { 'id': video_id, 'display_id': display_id, - 'url': video_url, + 'formats': formats, 'title': title, 'description': description, 'thumbnail': thumbnail, @@ -93,4 +104,5 @@ class PornoVoisinesIE(InfoExtractor): 'average_rating': average_rating, 'categories': categories, 'age_limit': 18, + 'subtitles': subtitles, } From 919cf1a62f022c61cfa65498e8c1b1cc0d21046e Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sat, 3 Sep 2016 23:00:52 +0800 Subject: [PATCH 0048/1571] [downloader/dash] Abort if the first segment fails Closes #10497, Closes #10542 --- ChangeLog | 4 ++++ youtube_dl/downloader/dash.py | 20 +++++++++++++------- 2 files changed, 17 insertions(+), 7 deletions(-) diff --git a/ChangeLog b/ChangeLog index 616b55803..1d277b562 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,9 @@ version +Core +* If the first segment of DASH fails, abort the whole download process to + prevent throttling (#10497) + Extractors * [pornvoisines] Fix extraction (#10469) * [rottentomatoes] Fix extraction (#10467) diff --git a/youtube_dl/downloader/dash.py b/youtube_dl/downloader/dash.py index efeae02a3..41fc9cfc2 100644 --- a/youtube_dl/downloader/dash.py +++ b/youtube_dl/downloader/dash.py @@ -40,7 +40,8 @@ class DashSegmentsFD(FragmentFD): fragment_retries = self.params.get('fragment_retries', 0) skip_unavailable_fragments = self.params.get('skip_unavailable_fragments', True) - def append_url_to_file(target_url, tmp_filename, segment_name): + def process_segment(segment, tmp_filename, fatal): + target_url, segment_name = segment target_filename = '%s-%s' % (tmp_filename, segment_name) count = 0 while count <= fragment_retries: @@ -64,18 +65,23 @@ class DashSegmentsFD(FragmentFD): if count <= fragment_retries: self.report_retry_fragment(err, segment_name, count, fragment_retries) if count > fragment_retries: - if skip_unavailable_fragments: + if not fatal: self.report_skip_fragment(segment_name) return True self.report_error('giving up after %s fragment retries' % fragment_retries) return False return True - if initialization_url: - if not append_url_to_file(initialization_url, ctx['tmpfilename'], 'Init'): - return False - for i, segment_url in enumerate(segment_urls): - if not append_url_to_file(segment_url, ctx['tmpfilename'], 'Seg%d' % i): + segments_to_download = [(initialization_url, 'Init')] if initialization_url else [] + segments_to_download.extend([ + (segment_url, 'Seg%d' % i) + for i, segment_url in enumerate(segment_urls)]) + + for i, segment in enumerate(segments_to_download): + # In DASH, the first segment contains necessary headers to + # generate a valid MP4 file, so always abort for the first segment + fatal = i == 0 or not skip_unavailable_fragments + if not process_segment(segment, ctx['tmpfilename'], fatal): return False self._finish_frag_download(ctx) From 0def758782c273e0a1c9984f895638845796715b Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Sun, 4 Sep 2016 11:42:15 +0100 Subject: [PATCH 0049/1571] [internetvideoarchive] extract all formats --- youtube_dl/extractor/common.py | 14 +++++++------- youtube_dl/extractor/internetvideoarchive.py | 15 ++++++++++++--- 2 files changed, 19 insertions(+), 10 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index a82968162..6edd5a769 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1163,13 +1163,6 @@ class InfoExtractor(object): m3u8_id=None, note=None, errnote=None, fatal=True, live=False): - formats = [self._m3u8_meta_format(m3u8_url, ext, preference, m3u8_id)] - - format_url = lambda u: ( - u - if re.match(r'^https?://', u) - else compat_urlparse.urljoin(m3u8_url, u)) - res = self._download_webpage_handle( m3u8_url, video_id, note=note or 'Downloading m3u8 information', @@ -1180,6 +1173,13 @@ class InfoExtractor(object): m3u8_doc, urlh = res m3u8_url = urlh.geturl() + formats = [self._m3u8_meta_format(m3u8_url, ext, preference, m3u8_id)] + + format_url = lambda u: ( + u + if re.match(r'^https?://', u) + else compat_urlparse.urljoin(m3u8_url, u)) + # We should try extracting formats only from master playlists [1], i.e. # playlists that describe available qualities. On the other hand media # playlists [2] should be returned as is since they contain just the media diff --git a/youtube_dl/extractor/internetvideoarchive.py b/youtube_dl/extractor/internetvideoarchive.py index 45add007f..76cc5ec3e 100644 --- a/youtube_dl/extractor/internetvideoarchive.py +++ b/youtube_dl/extractor/internetvideoarchive.py @@ -48,13 +48,23 @@ class InternetVideoArchiveIE(InfoExtractor): # There are multiple videos in the playlist whlie only the first one # matches the video played in browsers video_info = configuration['playlist'][0] + title = video_info['title'] formats = [] for source in video_info['sources']: file_url = source['file'] if determine_ext(file_url) == 'm3u8': - formats.extend(self._extract_m3u8_formats( - file_url, video_id, ext='mp4', m3u8_id='hls')) + m3u8_formats = self._extract_m3u8_formats( + file_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False) + if m3u8_formats: + formats.extend(m3u8_formats) + file_url = m3u8_formats[0]['url'] + formats.extend(self._extract_f4m_formats( + file_url.replace('.m3u8', '.f4m'), + video_id, f4m_id='hds', fatal=False)) + formats.extend(self._extract_mpd_formats( + file_url.replace('.m3u8', '.mpd'), + video_id, mpd_id='dash', fatal=False)) else: a_format = { 'url': file_url, @@ -70,7 +80,6 @@ class InternetVideoArchiveIE(InfoExtractor): self._sort_formats(formats) - title = video_info['title'] description = video_info.get('description') thumbnail = video_info.get('image') else: From 100bd86a68b5ee84669d162c9bcda31616f6596a Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Sun, 4 Sep 2016 11:44:13 +0100 Subject: [PATCH 0050/1571] [rottentomatoes] delegate extraction to InternetVideoArchiveIE --- youtube_dl/extractor/rottentomatoes.py | 25 ++++++------------------- 1 file changed, 6 insertions(+), 19 deletions(-) diff --git a/youtube_dl/extractor/rottentomatoes.py b/youtube_dl/extractor/rottentomatoes.py index df39ed3f2..23abf7a27 100644 --- a/youtube_dl/extractor/rottentomatoes.py +++ b/youtube_dl/extractor/rottentomatoes.py @@ -1,7 +1,7 @@ from __future__ import unicode_literals from .common import InfoExtractor -from ..utils import js_to_json +from .internetvideoarchive import InternetVideoArchiveIE class RottenTomatoesIE(InfoExtractor): @@ -13,6 +13,7 @@ class RottenTomatoesIE(InfoExtractor): 'id': '11028566', 'ext': 'mp4', 'title': 'Toy Story 3', + 'description': 'From the creators of the beloved TOY STORY films, comes a story that will reunite the gang in a whole new way.', 'thumbnail': 're:^https?://.*\.jpg$', }, } @@ -20,26 +21,12 @@ class RottenTomatoesIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - - params = self._parse_json( - self._search_regex(r'(?s)RTVideo\(({.+?})\);', webpage, 'player parameters'), - video_id, transform_source=lambda s: js_to_json(s.replace('window.location.href', '""'))) - - formats = [] - if params.get('urlHLS'): - formats.extend(self._extract_m3u8_formats( - params['urlHLS'], video_id, ext='mp4', - entry_protocol='m3u8_native', m3u8_id='hls', fatal=False)) - if params.get('urlMP4'): - formats.append({ - 'url': params['urlMP4'], - 'format_id': 'mp4', - }) - self._sort_formats(formats) + iva_id = self._search_regex(r'publishedid=(\d+)', webpage, 'internet video archive id') return { + '_type': 'url_transparent', + 'url': 'http://video.internetvideoarchive.net/player/6/configuration.ashx?domain=www.videodetective.com&customerid=69249&playerid=641&publishedid=' + iva_id, + 'ie_key': InternetVideoArchiveIE.ie_key(), 'id': video_id, 'title': self._og_search_title(webpage), - 'formats': formats, - 'thumbnail': params.get('thumbnailImg'), } From feaa5ad787cdc28e4b6979f1c7798134b1bee723 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 4 Sep 2016 20:12:34 +0700 Subject: [PATCH 0051/1571] [youtube:playlist] Extend _VALID_URL --- youtube_dl/extractor/youtube.py | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 0bc85af74..8fc26bd02 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -264,7 +264,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): ) )? # all until now is optional -> you can pass the naked ID ([0-9A-Za-z_-]{11}) # here is it! the YouTube video ID - (?!.*?&list=) # combined list/video URLs are handled by the playlist IE + (?!.*?\blist=) # combined list/video URLs are handled by the playlist IE (?(1).+)? # if we found the ID, everything can follow $""" _NEXT_URL_RE = r'[\?&]next_url=([^&]+)' @@ -1778,11 +1778,14 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor): _VALID_URL = r"""(?x)(?: (?:https?://)? (?:\w+\.)? - youtube\.com/ (?: - (?:course|view_play_list|my_playlists|artist|playlist|watch|embed/videoseries) - \? (?:.*?[&;])*? (?:p|a|list)= - | p/ + youtube\.com/ + (?: + (?:course|view_play_list|my_playlists|artist|playlist|watch|embed/videoseries) + \? (?:.*?[&;])*? (?:p|a|list)= + | p/ + )| + youtu\.be/[0-9A-Za-z_-]{11}\?.*?\blist= ) ( (?:PL|LL|EC|UU|FL|RD|UL)?[0-9A-Za-z-_]{10,} @@ -1887,6 +1890,9 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor): 'skip_download': True, }, 'add_ie': [YoutubeIE.ie_key()], + }, { + 'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21', + 'only_matching': True, }] def _real_initialize(self): From 433af6ad3002424ecb316e23946722d54010dbe1 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Sun, 4 Sep 2016 14:18:41 +0100 Subject: [PATCH 0052/1571] [theplatform] fix player regex(closes #10546) --- youtube_dl/extractor/theplatform.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/theplatform.py b/youtube_dl/extractor/theplatform.py index 23067e8c6..6febf805b 100644 --- a/youtube_dl/extractor/theplatform.py +++ b/youtube_dl/extractor/theplatform.py @@ -96,7 +96,7 @@ class ThePlatformBaseIE(OnceIE): class ThePlatformIE(ThePlatformBaseIE, AdobePassIE): _VALID_URL = r'''(?x) (?:https?://(?:link|player)\.theplatform\.com/[sp]/(?P[^/]+)/ - (?:(?:(?:[^/]+/)+select/)?(?Pmedia/(?:guid/\d+/)?)|(?P(?:[^/\?]+/(?:swf|config)|onsite)/select/))? + (?:(?:(?:[^/]+/)+select/)?(?Pmedia/(?:guid/\d+/)?)?|(?P(?:[^/\?]+/(?:swf|config)|onsite)/select/))? |theplatform:)(?P[^/\?&]+)''' _TESTS = [{ @@ -116,6 +116,7 @@ class ThePlatformIE(ThePlatformBaseIE, AdobePassIE): # rtmp download 'skip_download': True, }, + 'skip': '404 Not Found', }, { # from http://www.cnet.com/videos/tesla-model-s-a-second-step-towards-a-cleaner-motoring-future/ 'url': 'http://link.theplatform.com/s/kYEXFC/22d_qsQ6MIRT', From d9606d9b6cb44ee7600abf63333db4b88532a391 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 4 Sep 2016 20:51:48 +0700 Subject: [PATCH 0053/1571] release 2016.09.04 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- README.md | 7 ++++++- docs/supportedsites.md | 2 ++ youtube_dl/version.py | 2 +- 5 files changed, 13 insertions(+), 6 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index fc18e733b..1ddb3ef85 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.09.03*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.09.03** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.09.04*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.09.04** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2016.09.03 +[debug] youtube-dl version 2016.09.04 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index 1d277b562..a26f5d4aa 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version +version 2016.09.04 Core * If the first segment of DASH fails, abort the whole download process to diff --git a/README.md b/README.md index 87465aa5e..207b633db 100644 --- a/README.md +++ b/README.md @@ -89,6 +89,8 @@ which means you can modify it, redistribute it or use it however you like. --mark-watched Mark videos watched (YouTube only) --no-mark-watched Do not mark videos watched (YouTube only) --no-color Do not emit color codes in output + --abort-on-unavailable-fragment Abort downloading when some fragment is not + available ## Network Options: --proxy URL Use the specified HTTP/HTTPS/SOCKS proxy. @@ -173,7 +175,10 @@ which means you can modify it, redistribute it or use it however you like. -R, --retries RETRIES Number of retries (default is 10), or "infinite". --fragment-retries RETRIES Number of retries for a fragment (default - is 10), or "infinite" (DASH only) + is 10), or "infinite" (DASH and hlsnative + only) + --skip-unavailable-fragments Skip unavailable fragments (DASH and + hlsnative only) --buffer-size SIZE Size of download buffer (e.g. 1024 or 16K) (default is 1024) --no-resize-buffer Do not automatically adjust the buffer diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 015332bca..9e21016f7 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -232,6 +232,7 @@ - **FacebookPluginsVideo** - **faz.net** - **fc2** + - **fc2:embed** - **Fczenit** - **features.aol.com** - **fernsehkritik.tv** @@ -245,6 +246,7 @@ - **FOX** - **Foxgay** - **FoxNews**: Fox News and Fox Business Video + - **foxnews:insider** - **FoxSports** - **france2.fr:generation-quoi** - **FranceCulture** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 5be8c0122..3d12a47e8 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2016.09.03' +__version__ = '2016.09.04' From 8112bfeabae792754f51e0c012ed34c4dc521bac Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 4 Sep 2016 20:57:18 +0700 Subject: [PATCH 0054/1571] [ChangeLog] Actualize --- ChangeLog | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/ChangeLog b/ChangeLog index a26f5d4aa..a542496a3 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,12 +1,26 @@ -version 2016.09.04 +version Core -* If the first segment of DASH fails, abort the whole download process to - prevent throttling (#10497) +* In DASH downloader if the first segment fails, abort the whole download + process to prevent throttling (#10497) ++ Add support for --skip-unavailable-fragments and --fragment retries in + hlsnative downloader (#10165, #10448). ++ Add support for --skip-unavailable-fragments in DASH downloader ++ Introduce --skip-unavailable-fragments option for fragment based downloaders + that allows to skip fragments unavailable due to a HTTP error +* Fix extraction of video/audio entries with src attribute in + _parse_html5_media_entries (#10540) Extractors +* [theplatform] Relax URL regular expression (#10546) +* [youtube:playlist] Extend URL regular expression +* [rottentomatoes] Delegate extraction to internetvideoarchive extractor +* [internetvideoarchive] Extract all formats * [pornvoisines] Fix extraction (#10469) * [rottentomatoes] Fix extraction (#10467) +* [espn] Extend URL regular expression (#10549) +* [vimple] Extend URL regular expression (#10547) +* [youtube:watchlater] Fix extraction (#10544) * [youjizz] Fix extraction (#10437) + [foxnews] Add support for FoxNews Insider (#10445) + [fc2] Recognize Flash player URLs (#10512) @@ -19,7 +33,6 @@ Core _extract_m3u8_formats (#10522) * Handle semicolon in mimetype2ext - Extractors + [youtube] Add support for rental videos' previews (#10532) * [youtube:playlist] Fallback to video extraction for video/playlist URLs when From 48094901086534533ca89283067f2ab732857654 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 4 Sep 2016 20:58:28 +0700 Subject: [PATCH 0055/1571] release 2016.09.04.1 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- youtube_dl/version.py | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 1ddb3ef85..c03092442 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.09.04*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.09.04** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.09.04.1*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.09.04.1** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2016.09.04 +[debug] youtube-dl version 2016.09.04.1 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index a542496a3..d392513ce 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version +version 2016.09.04.1 Core * In DASH downloader if the first segment fails, abort the whole download diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 3d12a47e8..b2ea6dac6 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2016.09.04' +__version__ = '2016.09.04.1' From 78e762d23c48f85c61a8afcae29307912000a7dd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mat=C4=9Bj=20Cepl?= Date: Thu, 1 Sep 2016 17:31:08 +0200 Subject: [PATCH 0056/1571] Add new extractor for TV Noe (Czech Christian TV). Fixes #10520 --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/tvnoe.py | 44 ++++++++++++++++++++++++++++++ 2 files changed, 45 insertions(+) create mode 100644 youtube_dl/extractor/tvnoe.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 8c6ee0503..e47adc26c 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -916,6 +916,7 @@ from .tvc import ( ) from .tvigle import TvigleIE from .tvland import TVLandIE +from .tvnoe import TVNoeIE from .tvp import ( TVPEmbedIE, TVPIE, diff --git a/youtube_dl/extractor/tvnoe.py b/youtube_dl/extractor/tvnoe.py new file mode 100644 index 000000000..d50261ddd --- /dev/null +++ b/youtube_dl/extractor/tvnoe.py @@ -0,0 +1,44 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .jwplatform import JWPlatformBaseIE +from ..utils import clean_html, get_element_by_class, js_to_json + + +class TVNoeIE(JWPlatformBaseIE): + _VALID_URL = r'https?://(www\.)?tvnoe\.cz/video/(?P[0-9]+)' + _TEST = { + 'url': 'http://www.tvnoe.cz/video/10362', + 'md5': 'aee983f279aab96ec45ab6e2abb3c2ca', + 'info_dict': { + 'id': '10362', + 'ext': 'mp4', + 'series': 'Noční univerzita', + 'title': 'prof. Tomáš Halík, Th.D. - ' + + 'Návrat náboženství a střet civilizací', + 'description': 'md5:f337bae384e1a531a52c55ebc50fff41', + } + } + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + iframe_url = self._search_regex(r']+src="([^"]+)"', + webpage, 'iframe src attribute') + + ifs_page = self._download_webpage(iframe_url, video_id) + jwplayer_data = self._parse_json(self._find_jwplayer_data(ifs_page), + video_id, transform_source=js_to_json) + info_dict = self._parse_jwplayer_data( + jwplayer_data, video_id, require_title=False, base_url=iframe_url) + + info_dict.update({ + 'id': video_id, + 'title': clean_html( + get_element_by_class('field-name-field-podnazev', webpage)), + 'description': clean_html(get_element_by_class('field-name-body', + webpage)), + 'series': clean_html(get_element_by_class('title', webpage)) + }) + return info_dict From 9127e1533d294eb672d783d1eeed15aeb9b2cbe1 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Mon, 5 Sep 2016 13:37:36 +0800 Subject: [PATCH 0057/1571] [tvnoe] PEP8 and coding style --- youtube_dl/extractor/tvnoe.py | 27 ++++++++++++++++----------- 1 file changed, 16 insertions(+), 11 deletions(-) diff --git a/youtube_dl/extractor/tvnoe.py b/youtube_dl/extractor/tvnoe.py index d50261ddd..1cd3e6a58 100644 --- a/youtube_dl/extractor/tvnoe.py +++ b/youtube_dl/extractor/tvnoe.py @@ -2,7 +2,11 @@ from __future__ import unicode_literals from .jwplatform import JWPlatformBaseIE -from ..utils import clean_html, get_element_by_class, js_to_json +from ..utils import ( + clean_html, + get_element_by_class, + js_to_json, +) class TVNoeIE(JWPlatformBaseIE): @@ -14,8 +18,7 @@ class TVNoeIE(JWPlatformBaseIE): 'id': '10362', 'ext': 'mp4', 'series': 'Noční univerzita', - 'title': 'prof. Tomáš Halík, Th.D. - ' + - 'Návrat náboženství a střet civilizací', + 'title': 'prof. Tomáš Halík, Th.D. - Návrat náboženství a střet civilizací', 'description': 'md5:f337bae384e1a531a52c55ebc50fff41', } } @@ -24,21 +27,23 @@ class TVNoeIE(JWPlatformBaseIE): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - iframe_url = self._search_regex(r']+src="([^"]+)"', - webpage, 'iframe src attribute') + iframe_url = self._search_regex( + r']+src="([^"]+)"', webpage, 'iframe URL') ifs_page = self._download_webpage(iframe_url, video_id) - jwplayer_data = self._parse_json(self._find_jwplayer_data(ifs_page), - video_id, transform_source=js_to_json) + jwplayer_data = self._parse_json( + self._find_jwplayer_data(ifs_page), + video_id, transform_source=js_to_json) info_dict = self._parse_jwplayer_data( jwplayer_data, video_id, require_title=False, base_url=iframe_url) info_dict.update({ 'id': video_id, - 'title': clean_html( - get_element_by_class('field-name-field-podnazev', webpage)), - 'description': clean_html(get_element_by_class('field-name-body', - webpage)), + 'title': clean_html(get_element_by_class( + 'field-name-field-podnazev', webpage)), + 'description': clean_html(get_element_by_class( + 'field-name-body', webpage)), 'series': clean_html(get_element_by_class('title', webpage)) }) + return info_dict From b49ad71ce1d985165e07fd0f59f80f677434ad84 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Mon, 5 Sep 2016 13:38:55 +0800 Subject: [PATCH 0058/1571] [ChangeLog] Update for #10524 --- ChangeLog | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/ChangeLog b/ChangeLog index d392513ce..0be9b0fbb 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +version + +Extractors ++ [tvnoe] New extractor (#10524) + + version 2016.09.04.1 Core From 95be19d436d1938d104310e194e85ea5a10c3353 Mon Sep 17 00:00:00 2001 From: Xie Yanbo Date: Sun, 4 Sep 2016 23:23:40 +0800 Subject: [PATCH 0059/1571] [miaopai] Add new extractor --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/miaopai.py | 44 ++++++++++++++++++++++++++++++ 2 files changed, 45 insertions(+) create mode 100644 youtube_dl/extractor/miaopai.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 8c6ee0503..d511b04bc 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -471,6 +471,7 @@ from .metacafe import MetacafeIE from .metacritic import MetacriticIE from .mgoon import MgoonIE from .mgtv import MGTVIE +from .miaopai import MiaoPaiIE from .microsoftvirtualacademy import ( MicrosoftVirtualAcademyIE, MicrosoftVirtualAcademyCourseIE, diff --git a/youtube_dl/extractor/miaopai.py b/youtube_dl/extractor/miaopai.py new file mode 100644 index 000000000..c36b441b8 --- /dev/null +++ b/youtube_dl/extractor/miaopai.py @@ -0,0 +1,44 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import sanitized_Request + + +class MiaoPaiIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?miaopai\.com/show/(?P[-A-Za-z0-9~_]+).htm' + _TEST = { + 'url': 'http://www.miaopai.com/show/n~0hO7sfV1nBEw4Y29-Hqg__.htm', + 'md5': '095ed3f1cd96b821add957bdc29f845b', + 'info_dict': { + 'id': 'n~0hO7sfV1nBEw4Y29-Hqg__', + 'ext': 'mp4', + 'title': '西游记音乐会的秒拍视频', + 'thumbnail': 're:^https?://.*/n~0hO7sfV1nBEw4Y29-Hqg___m.jpg', + } + } + + _USER_AGENT_IPAD = 'User-Agent:Mozilla/5.0 ' \ + '(iPad; CPU OS 9_1 like Mac OS X) ' \ + 'AppleWebKit/601.1.46 (KHTML, like Gecko) ' \ + 'Version/9.0 Mobile/13B143 Safari/601.1' + + def _real_extract(self, url): + video_id = self._match_id(url) + request = sanitized_Request(url) + request.add_header('User-Agent', self._USER_AGENT_IPAD) + webpage = self._download_webpage(request, video_id) + + title = self._html_search_regex(r'([^<]*)', + webpage, + 'title') + regex = r"""
]*data-url=['"]([^'"]*\.jpg)['"]""" + thumbnail = self._html_search_regex(regex, webpage, '') + regex = r"""
]*data-url=['"]([^'"]*\.jpg)['"]""" thumbnail = self._html_search_regex(regex, webpage, '') - regex = r"""
\s+', + content) + for article_id, article_url, _, article_title in articles: + resolved_article_url = compat_urlparse.urljoin(base_url, article_url) + entries.append(self.url_result( + resolved_article_url, + ie='PolskieRadio', + video_id=article_id, + video_title=article_title)) + + return entries + + @classmethod + def suitable(cls, url): + return False if PolskieRadioIE.suitable(url) else super(PolskieRadioProgrammeIE, cls).suitable(url) + + def _real_extract(self, url): + programme_id = self._match_id(url) + webpage = self._download_webpage(url, programme_id) + + title = self._html_search_regex( + r'(.+?)', + webpage, 'title', fatal=False) + description = None + + entries = self._get_entries_from_page_content(url, webpage) + + pages = re.findall(r' 1: + page_url_root = next(url for _, url, _ in pages if len(url) > 0) + for page_number in range(2, page_count + 1): + page_url = page_url_root + str(page_number) + resolved_page_url = compat_urlparse.urljoin(url, page_url) + page_content = self._download_webpage( + resolved_page_url, programme_id, + note="Downloading page number %d" % page_number) + entries.extend(self._get_entries_from_page_content(url, page_content)) + + return self.playlist_result(entries, programme_id, title, description) + + class PolskieRadioIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?polskieradio\.pl/\d+/\d+/Artykul/(?P[0-9]+)' _TESTS = [{ From 948cd5b72d6d3f8a8b70717de50f7f58c260e65b Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Tue, 6 Sep 2016 20:44:45 +0100 Subject: [PATCH 0071/1571] [wat] extract dash formats --- youtube_dl/extractor/wat.py | 70 ++++++++++++++++++++++--------------- 1 file changed, 41 insertions(+), 29 deletions(-) diff --git a/youtube_dl/extractor/wat.py b/youtube_dl/extractor/wat.py index 9f1b8b4b5..20fef1f04 100644 --- a/youtube_dl/extractor/wat.py +++ b/youtube_dl/extractor/wat.py @@ -86,38 +86,50 @@ class WatIE(InfoExtractor): def extract_url(path_template, url_type): req_url = 'http://www.wat.tv/get/%s' % (path_template % video_id) - head = self._request_webpage(HEADRequest(req_url), video_id, 'Extracting %s url' % url_type) - red_url = head.geturl() - if req_url == red_url: - raise ExtractorError( - '%s said: Sorry, this video is not available from your country.' % self.IE_NAME, - expected=True) - return red_url + head = self._request_webpage(HEADRequest(req_url), video_id, 'Extracting %s url' % url_type, fatal=False) + if head: + red_url = head.geturl() + if req_url != red_url: + return red_url + return None + + def remove_bitrate_limit(manifest_url): + return re.sub(r'(?:max|min)_bitrate=\d+&?', '', manifest_url) formats = [] try: - http_url = extract_url('android5/%s.mp4', 'http') - m3u8_url = extract_url('ipad/%s.m3u8', 'm3u8') - m3u8_formats = self._extract_m3u8_formats( - m3u8_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls') - formats.extend(m3u8_formats) - formats.extend(self._extract_f4m_formats( - m3u8_url.replace('ios.', 'web.').replace('.m3u8', '.f4m'), - video_id, f4m_id='hds', fatal=False)) - for m3u8_format in m3u8_formats: - vbr, abr = m3u8_format.get('vbr'), m3u8_format.get('abr') - if not vbr or not abr: - continue - format_id = m3u8_format['format_id'].replace('hls', 'http') - fmt_url = re.sub(r'%s-\d+00-\d+' % video_id, '%s-%d00-%d' % (video_id, round(vbr / 100), round(abr)), http_url) - if self._is_valid_url(fmt_url, video_id, format_id): - f = m3u8_format.copy() - f.update({ - 'url': fmt_url, - 'format_id': format_id, - 'protocol': 'http', - }) - formats.append(f) + manifest_urls = self._download_json( + 'http://www.wat.tv/get/webhtml/' + video_id, video_id) + m3u8_url = manifest_urls.get('hls') + if m3u8_url: + m3u8_url = remove_bitrate_limit(m3u8_url) + m3u8_formats = self._extract_m3u8_formats( + m3u8_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False) + if m3u8_formats: + formats.extend(m3u8_formats) + formats.extend(self._extract_f4m_formats( + m3u8_url.replace('ios', 'web').replace('.m3u8', '.f4m'), + video_id, f4m_id='hds', fatal=False)) + http_url = extract_url('android5/%s.mp4', 'http') + if http_url: + for m3u8_format in m3u8_formats: + vbr, abr = m3u8_format.get('vbr'), m3u8_format.get('abr') + if not vbr or not abr: + continue + format_id = m3u8_format['format_id'].replace('hls', 'http') + fmt_url = re.sub(r'%s-\d+00-\d+' % video_id, '%s-%d00-%d' % (video_id, round(vbr / 100), round(abr)), http_url) + if self._is_valid_url(fmt_url, video_id, format_id): + f = m3u8_format.copy() + f.update({ + 'url': fmt_url, + 'format_id': format_id, + 'protocol': 'http', + }) + formats.append(f) + mpd_url = manifest_urls.get('mpd') + if mpd_url: + formats.extend(self._extract_mpd_formats(remove_bitrate_limit( + mpd_url), video_id, mpd_id='dash', fatal=False)) self._sort_formats(formats) except ExtractorError: abr = 64 From 8258f4457ce37fd24fb6044b5948f1816fcf3b94 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Tue, 6 Sep 2016 20:47:42 +0100 Subject: [PATCH 0072/1571] [lci] Add new extractor(closes #10573) --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/lci.py | 24 ++++++++++++++++++++++++ 2 files changed, 25 insertions(+) create mode 100644 youtube_dl/extractor/lci.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index f72faa8a7..192d29134 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -424,6 +424,7 @@ from .kuwo import ( ) from .la7 import LA7IE from .laola1tv import Laola1TvIE +from .lci import LCIIE from .lcp import ( LcpPlayIE, LcpIE, diff --git a/youtube_dl/extractor/lci.py b/youtube_dl/extractor/lci.py new file mode 100644 index 000000000..af34829e7 --- /dev/null +++ b/youtube_dl/extractor/lci.py @@ -0,0 +1,24 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor + + +class LCIIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?lci\.fr/[^/]+/[\w-]+-(?P\d+)\.html' + _TEST = { + 'url': 'http://www.lci.fr/international/etats-unis-a-j-62-hillary-clinton-reste-sans-voix-2001679.html', + 'md5': '2fdb2538b884d4d695f9bd2bde137e6c', + 'info_dict': { + 'id': '13244802', + 'ext': 'mp4', + 'title': 'Hillary Clinton et sa quinte de toux, en plein meeting', + 'description': 'md5:a4363e3a960860132f8124b62f4a01c9', + } + } + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + wat_id = self._search_regex(r'data-watid=[\'"](\d+)', webpage, 'wat id') + return self.url_result('wat:' + wat_id, 'Wat', wat_id) From aa3f9fe695134719de87837238045beec10c4ab5 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Wed, 7 Sep 2016 10:38:59 +0200 Subject: [PATCH 0073/1571] Explain why and why not to specify --hls-prefer-native This has been asked at http://stackoverflow.com/questions/39357037/what-does-youtube-dl-option-hls-prefer-native-do-any-downside-adding-to-youtu --- README.md | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/README.md b/README.md index 207b633db..7543f81ac 100644 --- a/README.md +++ b/README.md @@ -851,6 +851,16 @@ will download the complete `PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re` playlist and cre youtube-dl --download-archive archive.txt "https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re" +### Should I add `--hls-prefer-native` into my config? + +When youtube-dl detects an HLS video, it can download it either with the built-in downloader or ffmpeg. Since many HLS streams are slightly invalid and ffmpeg/youtube-dl each handle some invalid cases better than the other, there is an option to switch the downloader if needed. + +When youtube-dl knows that one particular downloader works better for a given website, that downloader will be picked. Otherwise, youtube-dl will pick the best downloader for general compatibility, which at the moment happens to be ffmpeg. This choice may change in future versions of youtube-dl, with improvements of the built-in downloader and/or ffmpeg. + +In particular, the generic extractor (used when your website is not in the [list of supported sites by youtube-dl](http://rg3.github.io/youtube-dl/supportedsites.html) cannot mandate one specific downloader. + +If you put either `--hls-prefer-native` or `--hls-prefer-ffmpeg` into your configuration, a different subset of videos will fail to download correctly. Instead, it is much better to [file an issue](https://yt-dl.org/bug) or a pull request which details why the native or the ffmpeg HLS downloader is a better choice for your use case. + ### Can you add support for this anime video site, or site which shows current movies for free? As a matter of policy (as well as legality), youtube-dl does not include support for services that specialize in infringing copyright. As a rule of thumb, if you cannot easily find a video that the service is quite obviously allowed to distribute (i.e. that has been uploaded by the creator, the creator's distributor, or is published under a free license), the service is probably unfit for inclusion to youtube-dl. From 846d8b76a07f617b595e0f1a2ae7ba22acafd98d Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Wed, 7 Sep 2016 10:10:43 +0100 Subject: [PATCH 0074/1571] [cctv] Add new extractor(closes #8153) --- youtube_dl/extractor/cctv.py | 53 ++++++++++++++++++++++++++++++ youtube_dl/extractor/extractors.py | 1 + 2 files changed, 54 insertions(+) create mode 100644 youtube_dl/extractor/cctv.py diff --git a/youtube_dl/extractor/cctv.py b/youtube_dl/extractor/cctv.py new file mode 100644 index 000000000..72a72cb73 --- /dev/null +++ b/youtube_dl/extractor/cctv.py @@ -0,0 +1,53 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import float_or_none + + +class CCTVIE(InfoExtractor): + _VALID_URL = r'''(?x)https?://(?:.+?\.)? + (?: + cctv\.(?:com|cn)| + cntv\.cn + )/ + (?: + video/[^/]+/(?P[0-9a-f]{32})| + \d{4}/\d{2}/\d{2}/(?PVID[0-9A-Za-z]+) + )''' + _TESTS = [{ + 'url': 'http://english.cntv.cn/2016/09/03/VIDEhnkB5y9AgHyIEVphCEz1160903.shtml', + 'md5': '819c7b49fc3927d529fb4cd555621823', + 'info_dict': { + 'id': '454368eb19ad44a1925bf1eb96140a61', + 'ext': 'mp4', + 'title': 'Portrait of Real Current Life 09/03/2016 Modern Inventors Part 1', + } + }, { + 'url': 'http://tv.cctv.com/2016/09/07/VIDE5C1FnlX5bUywlrjhxXOV160907.shtml', + 'only_matching': True, + }, { + 'url': 'http://tv.cntv.cn/video/C39296/95cfac44cabd3ddc4a9438780a4e5c44', + 'only_matching': True + }] + + def _real_extract(self, url): + video_id, display_id = re.match(self._VALID_URL, url).groups() + if not video_id: + webpage = self._download_webpage(url, display_id) + video_id = self._search_regex( + r'(?:fo\.addVariable\("videoCenterId",\s*|guid\s*=\s*)"([0-9a-f]{32})', + webpage, 'video_id') + api_data = self._download_json( + 'http://vdn.apps.cntv.cn/api/getHttpVideoInfo.do?pid=' + video_id, video_id) + m3u8_url = re.sub(r'maxbr=\d+&?', '', api_data['hls_url']) + + return { + 'id': video_id, + 'title': api_data['title'], + 'formats': self._extract_m3u8_formats( + m3u8_url, video_id, 'mp4', 'm3u8_native', fatal=False), + 'duration': float_or_none(api_data.get('video', {}).get('totalLength')), + } diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 192d29134..9fe824c67 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -146,6 +146,7 @@ from .cbsnews import ( ) from .cbssports import CBSSportsIE from .ccc import CCCIE +from .cctv import CCTVIE from .cda import CDAIE from .ceskatelevize import CeskaTelevizeIE from .channel9 import Channel9IE From 9d54b02baeb1f9c7d63ba5833f33b16d0874c355 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Wed, 7 Sep 2016 14:43:20 +0100 Subject: [PATCH 0075/1571] [puls4] fix extraction(closes #10583) --- youtube_dl/extractor/prosiebensat1.py | 218 +++++++++++++------------- youtube_dl/extractor/puls4.py | 109 +++++-------- 2 files changed, 148 insertions(+), 179 deletions(-) diff --git a/youtube_dl/extractor/prosiebensat1.py b/youtube_dl/extractor/prosiebensat1.py index c6eee3b72..7335dc2af 100644 --- a/youtube_dl/extractor/prosiebensat1.py +++ b/youtube_dl/extractor/prosiebensat1.py @@ -15,7 +15,111 @@ from ..utils import ( ) -class ProSiebenSat1IE(InfoExtractor): +class ProSiebenSat1BaseIE(InfoExtractor): + def _extract_video_info(self, url, clip_id): + client_location = url + + video = self._download_json( + 'http://vas.sim-technik.de/vas/live/v2/videos', + clip_id, 'Downloading videos JSON', query={ + 'access_token': self._TOKEN, + 'client_location': client_location, + 'client_name': self._CLIENT_NAME, + 'ids': clip_id, + })[0] + + if video.get('is_protected') is True: + raise ExtractorError('This video is DRM protected.', expected=True) + + duration = float_or_none(video.get('duration')) + source_ids = [compat_str(source['id']) for source in video['sources']] + + client_id = self._SALT[:2] + sha1(''.join([clip_id, self._SALT, self._TOKEN, client_location, self._SALT, self._CLIENT_NAME]).encode('utf-8')).hexdigest() + + sources = self._download_json( + 'http://vas.sim-technik.de/vas/live/v2/videos/%s/sources' % clip_id, + clip_id, 'Downloading sources JSON', query={ + 'access_token': self._TOKEN, + 'client_id': client_id, + 'client_location': client_location, + 'client_name': self._CLIENT_NAME, + }) + server_id = sources['server_id'] + + def fix_bitrate(bitrate): + bitrate = int_or_none(bitrate) + if not bitrate: + return None + return (bitrate // 1000) if bitrate % 1000 == 0 else bitrate + + formats = [] + for source_id in source_ids: + client_id = self._SALT[:2] + sha1(''.join([self._SALT, clip_id, self._TOKEN, server_id, client_location, source_id, self._SALT, self._CLIENT_NAME]).encode('utf-8')).hexdigest() + urls = self._download_json( + 'http://vas.sim-technik.de/vas/live/v2/videos/%s/sources/url' % clip_id, + clip_id, 'Downloading urls JSON', fatal=False, query={ + 'access_token': self._TOKEN, + 'client_id': client_id, + 'client_location': client_location, + 'client_name': self._CLIENT_NAME, + 'server_id': server_id, + 'source_ids': source_id, + }) + if not urls: + continue + if urls.get('status_code') != 0: + raise ExtractorError('This video is unavailable', expected=True) + urls_sources = urls['sources'] + if isinstance(urls_sources, dict): + urls_sources = urls_sources.values() + for source in urls_sources: + source_url = source.get('url') + if not source_url: + continue + protocol = source.get('protocol') + mimetype = source.get('mimetype') + if mimetype == 'application/f4m+xml' or 'f4mgenerator' in source_url or determine_ext(source_url) == 'f4m': + formats.extend(self._extract_f4m_formats( + source_url, clip_id, f4m_id='hds', fatal=False)) + elif mimetype == 'application/x-mpegURL': + formats.extend(self._extract_m3u8_formats( + source_url, clip_id, 'mp4', 'm3u8_native', + m3u8_id='hls', fatal=False)) + else: + tbr = fix_bitrate(source['bitrate']) + if protocol in ('rtmp', 'rtmpe'): + mobj = re.search(r'^(?Prtmpe?://[^/]+)/(?P.+)$', source_url) + if not mobj: + continue + path = mobj.group('path') + mp4colon_index = path.rfind('mp4:') + app = path[:mp4colon_index] + play_path = path[mp4colon_index:] + formats.append({ + 'url': '%s/%s' % (mobj.group('url'), app), + 'app': app, + 'play_path': play_path, + 'player_url': 'http://livepassdl.conviva.com/hf/ver/2.79.0.17083/LivePassModuleMain.swf', + 'page_url': 'http://www.prosieben.de', + 'tbr': tbr, + 'ext': 'flv', + 'format_id': 'rtmp%s' % ('-%d' % tbr if tbr else ''), + }) + else: + formats.append({ + 'url': source_url, + 'tbr': tbr, + 'format_id': 'http%s' % ('-%d' % tbr if tbr else ''), + }) + self._sort_formats(formats) + + return { + 'duration': duration, + 'formats': formats, + } + + +class ProSiebenSat1IE(ProSiebenSat1BaseIE): IE_NAME = 'prosiebensat1' IE_DESC = 'ProSiebenSat.1 Digital' _VALID_URL = r'https?://(?:www\.)?(?:(?:prosieben|prosiebenmaxx|sixx|sat1|kabeleins|the-voice-of-germany|7tv)\.(?:de|at|ch)|ran\.de|fem\.com)/(?P.+)' @@ -188,6 +292,9 @@ class ProSiebenSat1IE(InfoExtractor): }, ] + _TOKEN = 'prosieben' + _SALT = '01!8d8F_)r9]4s[qeuXfP%' + _CLIENT_NAME = 'kolibri-2.0.19-splec4' _CLIPID_REGEXES = [ r'"clip_id"\s*:\s+"(\d+)"', r'clipid: "(\d+)"', @@ -234,123 +341,22 @@ class ProSiebenSat1IE(InfoExtractor): def _extract_clip(self, url, webpage): clip_id = self._html_search_regex( self._CLIPID_REGEXES, webpage, 'clip id') - - access_token = 'prosieben' - client_name = 'kolibri-2.0.19-splec4' - client_location = url - - video = self._download_json( - 'http://vas.sim-technik.de/vas/live/v2/videos', - clip_id, 'Downloading videos JSON', query={ - 'access_token': access_token, - 'client_location': client_location, - 'client_name': client_name, - 'ids': clip_id, - })[0] - - if video.get('is_protected') is True: - raise ExtractorError('This video is DRM protected.', expected=True) - - duration = float_or_none(video.get('duration')) - source_ids = [compat_str(source['id']) for source in video['sources']] - - g = '01!8d8F_)r9]4s[qeuXfP%' - client_id = g[:2] + sha1(''.join([clip_id, g, access_token, client_location, g, client_name]).encode('utf-8')).hexdigest() - - sources = self._download_json( - 'http://vas.sim-technik.de/vas/live/v2/videos/%s/sources' % clip_id, - clip_id, 'Downloading sources JSON', query={ - 'access_token': access_token, - 'client_id': client_id, - 'client_location': client_location, - 'client_name': client_name, - }) - server_id = sources['server_id'] - title = self._html_search_regex(self._TITLE_REGEXES, webpage, 'title') - - def fix_bitrate(bitrate): - bitrate = int_or_none(bitrate) - if not bitrate: - return None - return (bitrate // 1000) if bitrate % 1000 == 0 else bitrate - - formats = [] - for source_id in source_ids: - client_id = g[:2] + sha1(''.join([g, clip_id, access_token, server_id, client_location, source_id, g, client_name]).encode('utf-8')).hexdigest() - urls = self._download_json( - 'http://vas.sim-technik.de/vas/live/v2/videos/%s/sources/url' % clip_id, - clip_id, 'Downloading urls JSON', fatal=False, query={ - 'access_token': access_token, - 'client_id': client_id, - 'client_location': client_location, - 'client_name': client_name, - 'server_id': server_id, - 'source_ids': source_id, - }) - if not urls: - continue - if urls.get('status_code') != 0: - raise ExtractorError('This video is unavailable', expected=True) - urls_sources = urls['sources'] - if isinstance(urls_sources, dict): - urls_sources = urls_sources.values() - for source in urls_sources: - source_url = source.get('url') - if not source_url: - continue - protocol = source.get('protocol') - mimetype = source.get('mimetype') - if mimetype == 'application/f4m+xml' or 'f4mgenerator' in source_url or determine_ext(source_url) == 'f4m': - formats.extend(self._extract_f4m_formats( - source_url, clip_id, f4m_id='hds', fatal=False)) - elif mimetype == 'application/x-mpegURL': - formats.extend(self._extract_m3u8_formats( - source_url, clip_id, 'mp4', 'm3u8_native', - m3u8_id='hls', fatal=False)) - else: - tbr = fix_bitrate(source['bitrate']) - if protocol in ('rtmp', 'rtmpe'): - mobj = re.search(r'^(?Prtmpe?://[^/]+)/(?P.+)$', source_url) - if not mobj: - continue - path = mobj.group('path') - mp4colon_index = path.rfind('mp4:') - app = path[:mp4colon_index] - play_path = path[mp4colon_index:] - formats.append({ - 'url': '%s/%s' % (mobj.group('url'), app), - 'app': app, - 'play_path': play_path, - 'player_url': 'http://livepassdl.conviva.com/hf/ver/2.79.0.17083/LivePassModuleMain.swf', - 'page_url': 'http://www.prosieben.de', - 'tbr': tbr, - 'ext': 'flv', - 'format_id': 'rtmp%s' % ('-%d' % tbr if tbr else ''), - }) - else: - formats.append({ - 'url': source_url, - 'tbr': tbr, - 'format_id': 'http%s' % ('-%d' % tbr if tbr else ''), - }) - self._sort_formats(formats) - + info = self._extract_video_info(url, clip_id) description = self._html_search_regex( self._DESCRIPTION_REGEXES, webpage, 'description', fatal=False) thumbnail = self._og_search_thumbnail(webpage) upload_date = unified_strdate(self._html_search_regex( self._UPLOAD_DATE_REGEXES, webpage, 'upload date', default=None)) - return { + info.update({ 'id': clip_id, 'title': title, 'description': description, 'thumbnail': thumbnail, 'upload_date': upload_date, - 'duration': duration, - 'formats': formats, - } + }) + return info def _extract_playlist(self, url, webpage): playlist_id = self._html_search_regex( diff --git a/youtube_dl/extractor/puls4.py b/youtube_dl/extractor/puls4.py index fca30e1aa..9c2ccbe2d 100644 --- a/youtube_dl/extractor/puls4.py +++ b/youtube_dl/extractor/puls4.py @@ -1,88 +1,51 @@ # -*- coding: utf-8 -*- from __future__ import unicode_literals -from .common import InfoExtractor +from .prosiebensat1 import ProSiebenSat1BaseIE from ..utils import ( - ExtractorError, unified_strdate, - int_or_none, + parse_duration, + compat_str, ) -class Puls4IE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?puls4\.com/video/[^/]+/play/(?P[0-9]+)' +class Puls4IE(ProSiebenSat1BaseIE): + _VALID_URL = r'https?://(?:www\.)?puls4\.com/(?P(?:[^/]+/)*?videos/[^?#]+)' _TESTS = [{ - 'url': 'http://www.puls4.com/video/pro-und-contra/play/2716816', - 'md5': '49f6a6629747eeec43cef6a46b5df81d', + 'url': 'http://www.puls4.com/2-minuten-2-millionen/staffel-3/videos/2min2miotalk/Tobias-Homberger-von-myclubs-im-2min2miotalk-118118', + 'md5': 'fd3c6b0903ac72c9d004f04bc6bb3e03', 'info_dict': { - 'id': '2716816', - 'ext': 'mp4', - 'title': 'Pro und Contra vom 23.02.2015', - 'description': 'md5:293e44634d9477a67122489994675db6', - 'duration': 2989, - 'upload_date': '20150224', + 'id': '118118', + 'ext': 'flv', + 'title': 'Tobias Homberger von myclubs im #2min2miotalk', + 'description': 'md5:f9def7c5e8745d6026d8885487d91955', + 'upload_date': '20160830', 'uploader': 'PULS_4', }, - 'skip': 'Only works from Germany', - }, { - 'url': 'http://www.puls4.com/video/kult-spielfilme/play/1298106', - 'md5': '6a48316c8903ece8dab9b9a7bf7a59ec', - 'info_dict': { - 'id': '1298106', - 'ext': 'mp4', - 'title': 'Lucky Fritz', - }, - 'skip': 'Only works from Germany', }] + _TOKEN = 'puls4' + _SALT = '01!kaNgaiNgah1Ie4AeSha' + _CLIENT_NAME = '' def _real_extract(self, url): - video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - - error_message = self._html_search_regex( - r']+class="message-error"[^>]*>(.+?)
', - webpage, 'error message', default=None) - if error_message: - raise ExtractorError( - '%s returned error: %s' % (self.IE_NAME, error_message), expected=True) - - real_url = self._html_search_regex( - r'\"fsk-button\".+?href=\"([^"]+)', - webpage, 'fsk_button', default=None) - if real_url: - webpage = self._download_webpage(real_url, video_id) - - player = self._search_regex( - r'p4_video_player(?:_iframe)?\("video_\d+_container"\s*,(.+?)\);\s*\}', - webpage, 'player') - - player_json = self._parse_json( - '[%s]' % player, video_id, - transform_source=lambda s: s.replace('undefined,', '')) - - formats = None - result = None - - for v in player_json: - if isinstance(v, list) and not formats: - formats = [{ - 'url': f['url'], - 'format': 'hd' if f.get('hd') else 'sd', - 'width': int_or_none(f.get('size_x')), - 'height': int_or_none(f.get('size_y')), - 'tbr': int_or_none(f.get('bitrate')), - } for f in v] - self._sort_formats(formats) - elif isinstance(v, dict) and not result: - result = { - 'id': video_id, - 'title': v['videopartname'].strip(), - 'description': v.get('videotitle'), - 'duration': int_or_none(v.get('videoduration') or v.get('episodeduration')), - 'upload_date': unified_strdate(v.get('clipreleasetime')), - 'uploader': v.get('channel'), - } - - result['formats'] = formats - - return result + path = self._match_id(url) + content_path = self._download_json( + 'http://www.puls4.com/api/json-fe/page/' + path, path)['content'][0]['url'] + media = self._download_json( + 'http://www.puls4.com' + content_path, + content_path)['mediaCurrent'] + player_content = media['playerContent'] + info = self._extract_video_info(url, player_content['id']) + info.update({ + 'id': compat_str(media['objectId']), + 'title': player_content['title'], + 'description': media.get('description'), + 'thumbnail': media.get('previewLink'), + 'upload_date': unified_strdate(media.get('date')), + 'duration': parse_duration(player_content.get('duration')), + 'episode': player_content.get('episodePartName'), + 'show': media.get('channel'), + 'season_id': player_content.get('seasonId'), + 'uploader': player_content.get('sourceCompany'), + }) + return info From 92c9c2a88b8bc6e19b6fc9ac5e3814aa75c437e9 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Wed, 7 Sep 2016 22:21:26 +0800 Subject: [PATCH 0076/1571] [moevideo] Skip another removed test (#10474) --- youtube_dl/extractor/moevideo.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/moevideo.py b/youtube_dl/extractor/moevideo.py index 978d5d5bf..91ee9c4e9 100644 --- a/youtube_dl/extractor/moevideo.py +++ b/youtube_dl/extractor/moevideo.py @@ -35,7 +35,8 @@ class MoeVideoIE(InfoExtractor): 'height': 360, 'duration': 179, 'filesize': 17822500, - } + }, + 'skip': 'Video has been removed', }, { 'url': 'http://playreplay.net/video/77107.7f325710a627383d40540d8e991a', From 84b91dd4e3cd45498fb6cd3c7aa611816f61a3d5 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Wed, 7 Sep 2016 23:07:50 +0800 Subject: [PATCH 0077/1571] [gamestar] Fix metadata extraction (closes #10479) --- ChangeLog | 1 + youtube_dl/extractor/gamestar.py | 51 +++++++++++--------------------- 2 files changed, 18 insertions(+), 34 deletions(-) diff --git a/ChangeLog b/ChangeLog index f6858f1d8..8322af9d1 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,6 +1,7 @@ version Extractors +* [gamestar] Fix metadata extraction (#10479) + [bilibili] Support episodes (#10190) + [tvnoe] New extractor (#10524) diff --git a/youtube_dl/extractor/gamestar.py b/youtube_dl/extractor/gamestar.py index 69058a583..341e72733 100644 --- a/youtube_dl/extractor/gamestar.py +++ b/youtube_dl/extractor/gamestar.py @@ -1,14 +1,10 @@ # coding: utf-8 from __future__ import unicode_literals -import re - from .common import InfoExtractor from ..utils import ( int_or_none, - parse_duration, - str_to_int, - unified_strdate, + remove_end, ) @@ -21,8 +17,9 @@ class GameStarIE(InfoExtractor): 'id': '76110', 'ext': 'mp4', 'title': 'Hobbit 3: Die Schlacht der Fünf Heere - Teaser-Trailer zum dritten Teil', - 'description': 'Der Teaser-Trailer zu Hobbit 3: Die Schlacht der Fünf Heere zeigt einige Szenen aus dem dritten Teil der Saga und kündigt den vollständigen Trailer an.', - 'thumbnail': 'http://images.gamestar.de/images/idgwpgsgp/bdb/2494525/600x.jpg', + 'description': 'Der Teaser-Trailer zu Hobbit 3: Die Schlacht der Fünf Heere zeigt einige Szenen aus dem dritten Teil der Saga und kündigt den...', + 'thumbnail': 're:^https?://.*\.jpg$', + 'timestamp': 1406542020, 'upload_date': '20140728', 'duration': 17 } @@ -32,41 +29,27 @@ class GameStarIE(InfoExtractor): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - og_title = self._og_search_title(webpage) - title = re.sub(r'\s*- Video (bei|-) GameStar\.de$', '', og_title) - url = 'http://gamestar.de/_misc/videos/portal/getVideoUrl.cfm?premium=0&videoId=' + video_id - description = self._og_search_description(webpage).strip() - - thumbnail = self._proto_relative_url( - self._og_search_thumbnail(webpage), scheme='http:') - - upload_date = unified_strdate(self._html_search_regex( - r'Datum: ([0-9]+\.[0-9]+\.[0-9]+)  ', - webpage, 'upload_date', fatal=False)) - - duration = parse_duration(self._html_search_regex( - r'  Länge: ([0-9]+:[0-9]+)', webpage, 'duration', - fatal=False)) - - view_count = str_to_int(self._html_search_regex( - r'  Zuschauer: ([0-9\.]+)  ', webpage, - 'view_count', fatal=False)) + # TODO: there are multiple ld+json objects in the webpage, + # while _search_json_ld finds only the first one + json_ld = self._parse_json(self._search_regex( + r'(?s)]+type=(["\'])application/ld\+json\1[^>]*>(?P[^<]+VideoObject[^<]+)', + webpage, 'JSON-LD', group='json_ld'), video_id) + info_dict = self._json_ld(json_ld, video_id) + info_dict['title'] = remove_end(info_dict['title'], ' - GameStar') + view_count = json_ld.get('interactionCount') comment_count = int_or_none(self._html_search_regex( - r'>Kommentieren \(([0-9]+)\)', webpage, 'comment_count', + r'([0-9]+) Kommentare', webpage, 'comment_count', fatal=False)) - return { + info_dict.update({ 'id': video_id, - 'title': title, 'url': url, 'ext': 'mp4', - 'thumbnail': thumbnail, - 'description': description, - 'upload_date': upload_date, - 'duration': duration, 'view_count': view_count, 'comment_count': comment_count - } + }) + + return info_dict From f87feb4b688f7b6e02714f8405c8343e27956400 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Thu, 8 Sep 2016 00:28:33 +0800 Subject: [PATCH 0078/1571] [miaopai] Coding style (#10556) --- youtube_dl/extractor/miaopai.py | 32 ++++++++++++++------------------ 1 file changed, 14 insertions(+), 18 deletions(-) diff --git a/youtube_dl/extractor/miaopai.py b/youtube_dl/extractor/miaopai.py index 2477d1009..f9e35ac7f 100644 --- a/youtube_dl/extractor/miaopai.py +++ b/youtube_dl/extractor/miaopai.py @@ -2,11 +2,10 @@ from __future__ import unicode_literals from .common import InfoExtractor -from ..utils import sanitized_Request class MiaoPaiIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?miaopai\.com/show/(?P[-A-Za-z0-9~_]+).htm' + _VALID_URL = r'https?://(?:www\.)?miaopai\.com/show/(?P[-A-Za-z0-9~_]+)' _TEST = { 'url': 'http://www.miaopai.com/show/n~0hO7sfV1nBEw4Y29-Hqg__.htm', 'md5': '095ed3f1cd96b821add957bdc29f845b', @@ -18,27 +17,24 @@ class MiaoPaiIE(InfoExtractor): } } - _USER_AGENT_IPAD = 'User-Agent:Mozilla/5.0 ' \ - '(iPad; CPU OS 9_1 like Mac OS X) ' \ - 'AppleWebKit/601.1.46 (KHTML, like Gecko) ' \ - 'Version/9.0 Mobile/13B143 Safari/601.1' + _USER_AGENT_IPAD = 'Mozilla/5.0 (iPad; CPU OS 9_1 like Mac OS X) AppleWebKit/601.1.46 (KHTML, like Gecko) Version/9.0 Mobile/13B143 Safari/601.1' def _real_extract(self, url): video_id = self._match_id(url) - request = sanitized_Request(url) - request.add_header('User-Agent', self._USER_AGENT_IPAD) - webpage = self._download_webpage(request, video_id) + webpage = self._download_webpage( + url, video_id, headers={'User-Agent': self._USER_AGENT_IPAD}) - title = self._html_search_regex(r'([^<]*)', - webpage, - 'title') - regex = r"""
]*data-url=['"]([^'"]*\.jpg)['"]""" - thumbnail = self._html_search_regex(regex, webpage, '') + title = self._html_search_regex( + r'([^<]+)', webpage, 'title') + thumbnail = self._html_search_regex( + r']+class=(?P[\'"]).*\bvideo_img\b.*(?P=q1)[^>]+data-url=(?P[\'"])(?P[^\'"]+)(?P=q2)', + webpage, 'thumbnail', fatal=False, group='url') videos = self._parse_html5_media_entries(url, webpage, video_id) info = videos[0] - info.update({'id': video_id, - 'title': title, - 'thumbnail': thumbnail, - }) + info.update({ + 'id': video_id, + 'title': title, + 'thumbnail': thumbnail, + }) return info From b84d311d53a4cc023eb458072c09cf382f45c762 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Thu, 8 Sep 2016 00:29:55 +0800 Subject: [PATCH 0079/1571] [ChangeLog] Update for #10556 --- ChangeLog | 1 + 1 file changed, 1 insertion(+) diff --git a/ChangeLog b/ChangeLog index 8322af9d1..52c066c99 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,6 +1,7 @@ version Extractors ++ [miaopai] New extractor (#10556) * [gamestar] Fix metadata extraction (#10479) + [bilibili] Support episodes (#10190) + [tvnoe] New extractor (#10524) From d7e794928d1b1386a8f5960ce6ece3ae88d975a1 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Wed, 7 Sep 2016 17:29:06 +0100 Subject: [PATCH 0080/1571] [tlc] fix query string parsing --- youtube_dl/extractor/tlc.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/tlc.py b/youtube_dl/extractor/tlc.py index abad3ff64..88eb83d74 100644 --- a/youtube_dl/extractor/tlc.py +++ b/youtube_dl/extractor/tlc.py @@ -1,10 +1,14 @@ # encoding: utf-8 from __future__ import unicode_literals + import re from .common import InfoExtractor from .brightcove import BrightcoveLegacyIE -from ..compat import compat_parse_qs +from ..compat import ( + compat_parse_qs, + compat_urlparse, +) class TlcDeIE(InfoExtractor): @@ -35,5 +39,5 @@ class TlcDeIE(InfoExtractor): title = mobj.group('title') webpage = self._download_webpage(url, title) brightcove_legacy_url = BrightcoveLegacyIE._extract_brightcove_url(webpage) - brightcove_id = compat_parse_qs(brightcove_legacy_url)['@videoPlayer'][0] + brightcove_id = compat_parse_qs(compat_urlparse.urlparse(brightcove_legacy_url).query)['@videoPlayer'][0] return self.url_result(self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id, 'BrightcoveNew', brightcove_id) From 6656a8248166039d83a3fb4401a7e11e03202ac9 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Wed, 7 Sep 2016 17:32:35 +0100 Subject: [PATCH 0081/1571] [rmcdecouverte] Add new extractor(closes #9709) --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/rmcdecouverte.py | 39 +++++++++++++++++++++++++++ 2 files changed, 40 insertions(+) create mode 100644 youtube_dl/extractor/rmcdecouverte.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 522ef7d8b..8d9c2ae13 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -724,6 +724,7 @@ from .revision3 import ( ) from .rice import RICEIE from .ringtv import RingTVIE +from .rmcdecouverte import RMCDecouverteIE from .ro220 import Ro220IE from .rockstargames import RockstarGamesIE from .roosterteeth import RoosterTeethIE diff --git a/youtube_dl/extractor/rmcdecouverte.py b/youtube_dl/extractor/rmcdecouverte.py new file mode 100644 index 000000000..f3bb4fa66 --- /dev/null +++ b/youtube_dl/extractor/rmcdecouverte.py @@ -0,0 +1,39 @@ +# encoding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from .brightcove import BrightcoveLegacyIE +from ..compat import ( + compat_parse_qs, + compat_urlparse, +) + + +class RMCDecouverteIE(InfoExtractor): + _VALID_URL = r'https?://rmcdecouverte\.bfmtv\.com/mediaplayer-replay.*?\bid=(?P\d+)' + + _TEST = { + 'url': 'http://rmcdecouverte.bfmtv.com/mediaplayer-replay/?id=1430&title=LES%20HEROS%20DU%2088e%20ETAGE', + 'info_dict': { + 'id': '5111223049001', + 'ext': 'mp4', + 'title': ': LES HEROS DU 88e ETAGE', + 'description': 'Découvrez comment la bravoure de deux hommes dans la Tour Nord du World Trade Center a sauvé la vie d\'innombrables personnes le 11 septembre 2001.', + 'uploader_id': '1969646226001', + 'upload_date': '20160904', + 'timestamp': 1472951103, + }, + 'params': { + # rtmp download + 'skip_download': True, + }, + 'skip': 'Only works from France', + } + BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/1969646226001/default_default/index.html?videoId=%s' + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + brightcove_legacy_url = BrightcoveLegacyIE._extract_brightcove_url(webpage) + brightcove_id = compat_parse_qs(compat_urlparse.urlparse(brightcove_legacy_url).query)['@videoPlayer'][0] + return self.url_result(self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id, 'BrightcoveNew', brightcove_id) From e78a5428b6c725bd01064f080d84700b83030b66 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Thu, 8 Sep 2016 01:59:31 +0800 Subject: [PATCH 0082/1571] [foxgay] Fix extraction (closes #10480) --- ChangeLog | 1 + youtube_dl/extractor/foxgay.py | 48 ++++++++++++++++++++++------------ 2 files changed, 32 insertions(+), 17 deletions(-) diff --git a/ChangeLog b/ChangeLog index 52c066c99..cec87d5cd 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,6 +1,7 @@ version Extractors +* [foxgay] Fix extraction (#10480) + [miaopai] New extractor (#10556) * [gamestar] Fix metadata extraction (#10479) + [bilibili] Support episodes (#10190) diff --git a/youtube_dl/extractor/foxgay.py b/youtube_dl/extractor/foxgay.py index 70c1a815d..39174fcec 100644 --- a/youtube_dl/extractor/foxgay.py +++ b/youtube_dl/extractor/foxgay.py @@ -1,18 +1,24 @@ from __future__ import unicode_literals +import itertools + from .common import InfoExtractor +from ..utils import ( + get_element_by_id, + remove_end, +) class FoxgayIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?foxgay\.com/videos/(?:\S+-)?(?P\d+)\.shtml' _TEST = { 'url': 'http://foxgay.com/videos/fuck-turkish-style-2582.shtml', - 'md5': '80d72beab5d04e1655a56ad37afe6841', + 'md5': '344558ccfea74d33b7adbce22e577f54', 'info_dict': { 'id': '2582', 'ext': 'mp4', - 'title': 'md5:6122f7ae0fc6b21ebdf59c5e083ce25a', - 'description': 'md5:5e51dc4405f1fd315f7927daed2ce5cf', + 'title': 'Fuck Turkish-style', + 'description': 'md5:6ae2d9486921891efe89231ace13ffdf', 'age_limit': 18, 'thumbnail': 're:https?://.*\.jpg$', }, @@ -22,27 +28,35 @@ class FoxgayIE(InfoExtractor): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - title = self._html_search_regex( - r'(?P<title>.*?)', - webpage, 'title', fatal=False) - description = self._html_search_regex( - r'

(?P.*?)

', - webpage, 'description', fatal=False) + title = remove_end(self._html_search_regex( + r'([^<]+)', webpage, 'title'), ' - Foxgay.com') + description = get_element_by_id('inf_tit', webpage) + # The default user-agent with foxgay cookies leads to pages without videos + self._downloader.cookiejar.clear('.foxgay.com') # Find the URL for the iFrame which contains the actual video. + iframe_url = self._html_search_regex( + r']+src=([\'"])(?P[^\'"]+)\1', webpage, + 'video frame', group='url') iframe = self._download_webpage( - self._html_search_regex(r'iframe src="(?P.*?)"', webpage, 'video frame'), - video_id) - video_url = self._html_search_regex( - r"v_path = '(?Phttp://.*?)'", iframe, 'url') - thumb_url = self._html_search_regex( - r"t_path = '(?Phttp://.*?)'", iframe, 'thumbnail', fatal=False) + iframe_url, video_id, headers={'User-Agent': 'curl/7.50.1'}, + note='Downloading video frame') + video_data = self._parse_json(self._search_regex( + r'video_data\s*=\s*([^;]+);', iframe, 'video data'), video_id) + + formats = [{ + 'url': source, + 'height': resolution, + } for source, resolution in zip( + video_data['sources'], video_data.get('resolutions', itertools.repeat(None)))] + + self._sort_formats(formats) return { 'id': video_id, 'title': title, - 'url': video_url, + 'formats': formats, 'description': description, - 'thumbnail': thumb_url, + 'thumbnail': video_data.get('act_vid', {}).get('thumb'), 'age_limit': 18, } From 89f257d6e57131a266efae629334fe5f4bcf96e9 Mon Sep 17 00:00:00 2001 From: stepshal Date: Thu, 8 Sep 2016 13:52:22 +0700 Subject: [PATCH 0083/1571] Add support for https for rest of the exctractors. --- youtube_dl/extractor/abcnews.py | 2 +- youtube_dl/extractor/dailymotion.py | 2 +- youtube_dl/extractor/karaoketv.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/abcnews.py b/youtube_dl/extractor/abcnews.py index b61a6327c..8a5ae42f0 100644 --- a/youtube_dl/extractor/abcnews.py +++ b/youtube_dl/extractor/abcnews.py @@ -12,7 +12,7 @@ from ..compat import compat_urlparse class AbcNewsVideoIE(AMPIE): IE_NAME = 'abcnews:video' - _VALID_URL = 'http://abcnews.go.com/[^/]+/video/(?P[0-9a-z-]+)-(?P\d+)' + _VALID_URL = 'https?://abcnews.go.com/[^/]+/video/(?P[0-9a-z-]+)-(?P\d+)' _TESTS = [{ 'url': 'http://abcnews.go.com/ThisWeek/video/week-exclusive-irans-foreign-minister-zarif-20411932', diff --git a/youtube_dl/extractor/dailymotion.py b/youtube_dl/extractor/dailymotion.py index 496883d15..62b0747a5 100644 --- a/youtube_dl/extractor/dailymotion.py +++ b/youtube_dl/extractor/dailymotion.py @@ -394,7 +394,7 @@ class DailymotionUserIE(DailymotionPlaylistIE): class DailymotionCloudIE(DailymotionBaseInfoExtractor): - _VALID_URL_PREFIX = r'http://api\.dmcloud\.net/(?:player/)?embed/' + _VALID_URL_PREFIX = r'https?://api\.dmcloud\.net/(?:player/)?embed/' _VALID_URL = r'%s[^/]+/(?P[^/?]+)' % _VALID_URL_PREFIX _VALID_EMBED_URL = r'%s[^/]+/[^\'"]+' % _VALID_URL_PREFIX diff --git a/youtube_dl/extractor/karaoketv.py b/youtube_dl/extractor/karaoketv.py index a6050c4de..2f37786df 100644 --- a/youtube_dl/extractor/karaoketv.py +++ b/youtube_dl/extractor/karaoketv.py @@ -5,7 +5,7 @@ from .common import InfoExtractor class KaraoketvIE(InfoExtractor): - _VALID_URL = r'http://www.karaoketv.co.il/[^/]+/(?P\d+)' + _VALID_URL = r'https?://www.karaoketv.co.il/[^/]+/(?P\d+)' _TEST = { 'url': 'http://www.karaoketv.co.il/%D7%A9%D7%99%D7%A8%D7%99_%D7%A7%D7%A8%D7%99%D7%95%D7%A7%D7%99/58356/%D7%90%D7%99%D7%96%D7%95%D7%9F', 'info_dict': { From 881f35479d50f22955fa21b148aa849a64d9dfc0 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Thu, 8 Sep 2016 17:22:43 +0800 Subject: [PATCH 0084/1571] Credit @xyb for miaopai extractor (#10556) --- AUTHORS | 1 + 1 file changed, 1 insertion(+) diff --git a/AUTHORS b/AUTHORS index 78660f014..937742c5d 100644 --- a/AUTHORS +++ b/AUTHORS @@ -184,3 +184,4 @@ Pratyush Singh Aleksander Nitecki Sebastian Blunt Matěj Cepl +Xie Yanbo From 3f612f076708973fce52d6b6053b24e2234a9c26 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Thu, 8 Sep 2016 17:39:29 +0800 Subject: [PATCH 0085/1571] Fix _VALID_URLs further (#10594) --- youtube_dl/extractor/abcnews.py | 2 +- youtube_dl/extractor/karaoketv.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/abcnews.py b/youtube_dl/extractor/abcnews.py index 8a5ae42f0..01247fead 100644 --- a/youtube_dl/extractor/abcnews.py +++ b/youtube_dl/extractor/abcnews.py @@ -12,7 +12,7 @@ from ..compat import compat_urlparse class AbcNewsVideoIE(AMPIE): IE_NAME = 'abcnews:video' - _VALID_URL = 'https?://abcnews.go.com/[^/]+/video/(?P[0-9a-z-]+)-(?P\d+)' + _VALID_URL = r'https?://abcnews\.go\.com/[^/]+/video/(?P[0-9a-z-]+)-(?P\d+)' _TESTS = [{ 'url': 'http://abcnews.go.com/ThisWeek/video/week-exclusive-irans-foreign-minister-zarif-20411932', diff --git a/youtube_dl/extractor/karaoketv.py b/youtube_dl/extractor/karaoketv.py index 2f37786df..bad46005b 100644 --- a/youtube_dl/extractor/karaoketv.py +++ b/youtube_dl/extractor/karaoketv.py @@ -5,7 +5,7 @@ from .common import InfoExtractor class KaraoketvIE(InfoExtractor): - _VALID_URL = r'https?://www.karaoketv.co.il/[^/]+/(?P\d+)' + _VALID_URL = r'https?://www\.karaoketv\.co\.il/[^/]+/(?P\d+)' _TEST = { 'url': 'http://www.karaoketv.co.il/%D7%A9%D7%99%D7%A8%D7%99_%D7%A7%D7%A8%D7%99%D7%95%D7%A7%D7%99/58356/%D7%90%D7%99%D7%96%D7%95%D7%9F', 'info_dict': { From 25042f73722c37e4ec88030cf69e23ae76c4359b Mon Sep 17 00:00:00 2001 From: stepshal Date: Thu, 8 Sep 2016 17:04:57 +0700 Subject: [PATCH 0086/1571] Add missing r prefix for _VALID_URLs --- youtube_dl/extractor/abcnews.py | 2 +- youtube_dl/extractor/ard.py | 2 +- youtube_dl/extractor/globo.py | 4 ++-- youtube_dl/extractor/onet.py | 2 +- youtube_dl/extractor/rutube.py | 2 +- youtube_dl/extractor/spiegel.py | 2 +- youtube_dl/extractor/twitter.py | 2 +- youtube_dl/extractor/youtube.py | 2 +- 8 files changed, 9 insertions(+), 9 deletions(-) diff --git a/youtube_dl/extractor/abcnews.py b/youtube_dl/extractor/abcnews.py index 01247fead..6ae5d9a96 100644 --- a/youtube_dl/extractor/abcnews.py +++ b/youtube_dl/extractor/abcnews.py @@ -49,7 +49,7 @@ class AbcNewsVideoIE(AMPIE): class AbcNewsIE(InfoExtractor): IE_NAME = 'abcnews' - _VALID_URL = 'https?://abcnews\.go\.com/(?:[^/]+/)+(?P[0-9a-z-]+)/story\?id=(?P\d+)' + _VALID_URL = r'https?://abcnews\.go\.com/(?:[^/]+/)+(?P[0-9a-z-]+)/story\?id=(?P\d+)' _TESTS = [{ 'url': 'http://abcnews.go.com/Blotter/News/dramatic-video-rare-death-job-america/story?id=10498713#.UIhwosWHLjY', diff --git a/youtube_dl/extractor/ard.py b/youtube_dl/extractor/ard.py index 07e67dd33..3a806a69b 100644 --- a/youtube_dl/extractor/ard.py +++ b/youtube_dl/extractor/ard.py @@ -238,7 +238,7 @@ class ARDMediathekIE(InfoExtractor): class ARDIE(InfoExtractor): - _VALID_URL = '(?Phttps?://(www\.)?daserste\.de/[^?#]+/videos/(?P[^/?#]+)-(?P[0-9]+))\.html' + _VALID_URL = r'(?Phttps?://(www\.)?daserste\.de/[^?#]+/videos/(?P[^/?#]+)-(?P[0-9]+))\.html' _TEST = { 'url': 'http://www.daserste.de/information/reportage-dokumentation/dokus/videos/die-story-im-ersten-mission-unter-falscher-flagge-100.html', 'md5': 'd216c3a86493f9322545e045ddc3eb35', diff --git a/youtube_dl/extractor/globo.py b/youtube_dl/extractor/globo.py index dbacbfc61..5638be48f 100644 --- a/youtube_dl/extractor/globo.py +++ b/youtube_dl/extractor/globo.py @@ -19,7 +19,7 @@ from ..utils import ( class GloboIE(InfoExtractor): - _VALID_URL = '(?:globo:|https?://.+?\.globo\.com/(?:[^/]+/)*(?:v/(?:[^/]+/)?|videos/))(?P\d{7,})' + _VALID_URL = r'(?:globo:|https?://.+?\.globo\.com/(?:[^/]+/)*(?:v/(?:[^/]+/)?|videos/))(?P\d{7,})' _API_URL_TEMPLATE = 'http://api.globovideos.com/videos/%s/playlist' _SECURITY_URL_TEMPLATE = 'http://security.video.globo.com/videos/%s/hash?player=flash&version=17.0.0.132&resource_id=%s' @@ -396,7 +396,7 @@ class GloboIE(InfoExtractor): class GloboArticleIE(InfoExtractor): - _VALID_URL = 'https?://.+?\.globo\.com/(?:[^/]+/)*(?P[^/]+)(?:\.html)?' + _VALID_URL = r'https?://.+?\.globo\.com/(?:[^/]+/)*(?P[^/]+)(?:\.html)?' _VIDEOID_REGEXES = [ r'\bdata-video-id=["\'](\d{7,})', diff --git a/youtube_dl/extractor/onet.py b/youtube_dl/extractor/onet.py index fc22ad5eb..9cbc7c2e2 100644 --- a/youtube_dl/extractor/onet.py +++ b/youtube_dl/extractor/onet.py @@ -90,7 +90,7 @@ class OnetBaseIE(InfoExtractor): class OnetIE(OnetBaseIE): - _VALID_URL = 'https?://(?:www\.)?onet\.tv/[a-z]/[a-z]+/(?P[0-9a-z-]+)/(?P[0-9a-z]+)' + _VALID_URL = r'https?://(?:www\.)?onet\.tv/[a-z]/[a-z]+/(?P[0-9a-z-]+)/(?P[0-9a-z]+)' IE_NAME = 'onet.tv' _TEST = { diff --git a/youtube_dl/extractor/rutube.py b/youtube_dl/extractor/rutube.py index 9ca4ae147..5d0ace5bf 100644 --- a/youtube_dl/extractor/rutube.py +++ b/youtube_dl/extractor/rutube.py @@ -88,7 +88,7 @@ class RutubeIE(InfoExtractor): class RutubeEmbedIE(InfoExtractor): IE_NAME = 'rutube:embed' IE_DESC = 'Rutube embedded videos' - _VALID_URL = 'https?://rutube\.ru/(?:video|play)/embed/(?P[0-9]+)' + _VALID_URL = r'https?://rutube\.ru/(?:video|play)/embed/(?P[0-9]+)' _TESTS = [{ 'url': 'http://rutube.ru/video/embed/6722881?vk_puid37=&vk_puid38=', diff --git a/youtube_dl/extractor/spiegel.py b/youtube_dl/extractor/spiegel.py index 3c552807e..74cb3a08a 100644 --- a/youtube_dl/extractor/spiegel.py +++ b/youtube_dl/extractor/spiegel.py @@ -103,7 +103,7 @@ class SpiegelIE(InfoExtractor): class SpiegelArticleIE(InfoExtractor): - _VALID_URL = 'https?://www\.spiegel\.de/(?!video/)[^?#]*?-(?P[0-9]+)\.html' + _VALID_URL = r'https?://www\.spiegel\.de/(?!video/)[^?#]*?-(?P[0-9]+)\.html' IE_NAME = 'Spiegel:Article' IE_DESC = 'Articles on spiegel.de' _TESTS = [{ diff --git a/youtube_dl/extractor/twitter.py b/youtube_dl/extractor/twitter.py index b73842986..c5a5843b6 100644 --- a/youtube_dl/extractor/twitter.py +++ b/youtube_dl/extractor/twitter.py @@ -342,7 +342,7 @@ class TwitterIE(InfoExtractor): class TwitterAmplifyIE(TwitterBaseIE): IE_NAME = 'twitter:amplify' - _VALID_URL = 'https?://amp\.twimg\.com/v/(?P[0-9a-f\-]{36})' + _VALID_URL = r'https?://amp\.twimg\.com/v/(?P[0-9a-f\-]{36})' _TEST = { 'url': 'https://amp.twimg.com/v/0ba0c3c7-0af3-4c0a-bed5-7efd1ffa2951', diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 8fc26bd02..5082cb589 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -2417,7 +2417,7 @@ class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor): class YoutubeHistoryIE(YoutubeFeedsInfoExtractor): IE_DESC = 'Youtube watch history, ":ythistory" for short (requires authentication)' - _VALID_URL = 'https?://www\.youtube\.com/feed/history|:ythistory' + _VALID_URL = r'https?://www\.youtube\.com/feed/history|:ythistory' _FEED_NAME = 'history' _PLAYLIST_TITLE = 'Youtube History' From 010d034fca60d94435d719e554663fba1894f8b3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 8 Sep 2016 22:38:49 +0700 Subject: [PATCH 0087/1571] [videomore] Fix extraction (Closes #10592) --- youtube_dl/extractor/videomore.py | 72 +++++++++++-------------------- 1 file changed, 25 insertions(+), 47 deletions(-) diff --git a/youtube_dl/extractor/videomore.py b/youtube_dl/extractor/videomore.py index 04e95c66e..328b5b7fb 100644 --- a/youtube_dl/extractor/videomore.py +++ b/youtube_dl/extractor/videomore.py @@ -6,8 +6,7 @@ import re from .common import InfoExtractor from ..utils import ( int_or_none, - parse_age_limit, - parse_iso8601, + xpath_element, xpath_text, ) @@ -17,38 +16,32 @@ class VideomoreIE(InfoExtractor): _VALID_URL = r'videomore:(?P\d+)$|https?://videomore\.ru/(?:(?:embed|[^/]+/[^/]+)/|[^/]+\?.*\btrack_id=)(?P\d+)(?:[/?#&]|\.(?:xml|json)|$)' _TESTS = [{ 'url': 'http://videomore.ru/kino_v_detalayah/5_sezon/367617', - 'md5': '70875fbf57a1cd004709920381587185', + 'md5': '44455a346edc0d509ac5b5a5b531dc35', 'info_dict': { 'id': '367617', 'ext': 'flv', - 'title': 'В гостях Алексей Чумаков и Юлия Ковальчук', - 'description': 'В гостях – лучшие романтические комедии года, «Выживший» Иньярриту и «Стив Джобс» Дэнни Бойла.', + 'title': 'Кино в деталях 5 сезон В гостях Алексей Чумаков и Юлия Ковальчук', 'series': 'Кино в деталях', 'episode': 'В гостях Алексей Чумаков и Юлия Ковальчук', - 'episode_number': None, - 'season': 'Сезон 2015', - 'season_number': 5, 'thumbnail': 're:^https?://.*\.jpg', 'duration': 2910, - 'age_limit': 16, 'view_count': int, + 'comment_count': int, + 'age_limit': 16, }, }, { 'url': 'http://videomore.ru/embed/259974', 'info_dict': { 'id': '259974', 'ext': 'flv', - 'title': '80 серия', - 'description': '«Медведей» ждет решающий матч. Макеев выясняет отношения со Стрельцовым. Парни узнают подробности прошлого Макеева.', + 'title': 'Молодежка 2 сезон 40 серия', 'series': 'Молодежка', - 'episode': '80 серия', - 'episode_number': 40, - 'season': '2 сезон', - 'season_number': 2, + 'episode': '40 серия', 'thumbnail': 're:^https?://.*\.jpg', 'duration': 2809, - 'age_limit': 16, 'view_count': int, + 'comment_count': int, + 'age_limit': 16, }, 'params': { 'skip_download': True, @@ -58,13 +51,8 @@ class VideomoreIE(InfoExtractor): 'info_dict': { 'id': '341073', 'ext': 'flv', - 'title': 'Команда проиграла из-за Бакина?', - 'description': 'Молодежка 3 сезон скоро', - 'series': 'Молодежка', + 'title': 'Промо Команда проиграла из-за Бакина?', 'episode': 'Команда проиграла из-за Бакина?', - 'episode_number': None, - 'season': 'Промо', - 'season_number': 99, 'thumbnail': 're:^https?://.*\.jpg', 'duration': 29, 'age_limit': 16, @@ -109,43 +97,33 @@ class VideomoreIE(InfoExtractor): 'http://videomore.ru/video/tracks/%s.xml' % video_id, video_id, 'Downloading video XML') - video_url = xpath_text(video, './/video_url', 'video url', fatal=True) + item = xpath_element(video, './/playlist/item', fatal=True) + + title = xpath_text( + item, ('./title', './episode_name'), 'title', fatal=True) + + video_url = xpath_text(item, './video_url', 'video url', fatal=True) formats = self._extract_f4m_formats(video_url, video_id, f4m_id='hds') self._sort_formats(formats) - data = self._download_json( - 'http://videomore.ru/video/tracks/%s.json' % video_id, - video_id, 'Downloading video JSON') + thumbnail = xpath_text(item, './thumbnail_url') + duration = int_or_none(xpath_text(item, './duration')) + view_count = int_or_none(xpath_text(item, './views')) + comment_count = int_or_none(xpath_text(item, './count_comments')) + age_limit = int_or_none(xpath_text(item, './min_age')) - title = data.get('title') or data['project_title'] - description = data.get('description') or data.get('description_raw') - timestamp = parse_iso8601(data.get('published_at')) - duration = int_or_none(data.get('duration')) - view_count = int_or_none(data.get('views')) - age_limit = parse_age_limit(data.get('min_age')) - thumbnails = [{ - 'url': thumbnail, - } for thumbnail in data.get('big_thumbnail_urls', [])] - - series = data.get('project_title') - episode = data.get('title') - episode_number = int_or_none(data.get('episode_of_season') or None) - season = data.get('season_title') - season_number = int_or_none(data.get('season_pos') or None) + series = xpath_text(item, './project_name') + episode = xpath_text(item, './episode_name') return { 'id': video_id, 'title': title, - 'description': description, 'series': series, 'episode': episode, - 'episode_number': episode_number, - 'season': season, - 'season_number': season_number, - 'thumbnails': thumbnails, - 'timestamp': timestamp, + 'thumbnail': thumbnail, 'duration': duration, 'view_count': view_count, + 'comment_count': comment_count, 'age_limit': age_limit, 'formats': formats, } From 2fdc7b0e0438e7f72380cfff11b619fe36234ebd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 8 Sep 2016 22:40:02 +0700 Subject: [PATCH 0088/1571] [viafree] PEP 8 --- youtube_dl/extractor/tvplay.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/tvplay.py b/youtube_dl/extractor/tvplay.py index c2a6e4e39..c0fec2594 100644 --- a/youtube_dl/extractor/tvplay.py +++ b/youtube_dl/extractor/tvplay.py @@ -390,7 +390,7 @@ class ViafreeIE(InfoExtractor): if thumbnail: video_id = self._search_regex( r'https?://[^/]+/imagecache/(?:[^/]+/)+seasons/\d+/(\d{6,})/', - thumbnail, 'video id', default=None) + thumbnail, 'video id', default=None) if not video_id: video_id = self._search_regex( From 7a979da8cb91abd7386cc2986d2ec2f4a2debb4c Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Thu, 8 Sep 2016 16:42:47 +0100 Subject: [PATCH 0089/1571] [yahoo] Look for Brightcove Legacy Studio embeds(closes #9345) --- youtube_dl/extractor/yahoo.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/yahoo.py b/youtube_dl/extractor/yahoo.py index d7a81ab8c..91f0a0dbb 100644 --- a/youtube_dl/extractor/yahoo.py +++ b/youtube_dl/extractor/yahoo.py @@ -19,7 +19,10 @@ from ..utils import ( determine_ext, ) -from .brightcove import BrightcoveNewIE +from .brightcove import ( + BrightcoveLegacyIE, + BrightcoveNewIE, +) from .nbc import NBCSportsVPlayerIE @@ -223,6 +226,11 @@ class YahooIE(InfoExtractor): if nbc_sports_url: return self.url_result(nbc_sports_url, NBCSportsVPlayerIE.ie_key()) + # Look for Brightcove Legacy Studio embeds + bc_url = BrightcoveLegacyIE._extract_brightcove_url(webpage) + if bc_url: + return self.url_result(bc_url, BrightcoveLegacyIE.ie_key()) + # Look for Brightcove New Studio embeds bc_url = BrightcoveNewIE._extract_url(webpage) if bc_url: From 6cfcb8ac3634f1735093a791fa56b96bddabe14b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 8 Sep 2016 22:44:34 +0700 Subject: [PATCH 0090/1571] [tvnoe] Do not capture unused groups in _VALID_URL --- youtube_dl/extractor/tvnoe.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/tvnoe.py b/youtube_dl/extractor/tvnoe.py index 1cd3e6a58..6d5c74826 100644 --- a/youtube_dl/extractor/tvnoe.py +++ b/youtube_dl/extractor/tvnoe.py @@ -10,7 +10,7 @@ from ..utils import ( class TVNoeIE(JWPlatformBaseIE): - _VALID_URL = r'https?://(www\.)?tvnoe\.cz/video/(?P[0-9]+)' + _VALID_URL = r'https?://(?:www\.)?tvnoe\.cz/video/(?P[0-9]+)' _TEST = { 'url': 'http://www.tvnoe.cz/video/10362', 'md5': 'aee983f279aab96ec45ab6e2abb3c2ca', From 37720844f6d40878bd2f29ea8311c7988ed3fc6a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 8 Sep 2016 22:52:39 +0700 Subject: [PATCH 0091/1571] [jwplatform] Extract height from label --- youtube_dl/extractor/jwplatform.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/jwplatform.py b/youtube_dl/extractor/jwplatform.py index ce3126943..7aaa65476 100644 --- a/youtube_dl/extractor/jwplatform.py +++ b/youtube_dl/extractor/jwplatform.py @@ -63,10 +63,17 @@ class JWPlatformBaseIE(InfoExtractor): 'ext': ext, }) else: + height = int_or_none(source.get('height')) + if height is None: + # Often no height is provided but there is a label in + # format like 1080p. + height = int_or_none(self._search_regex( + r'^(\d{3,})[pP]$', source.get('label') or '', + 'height', default=None)) a_format = { 'url': source_url, 'width': int_or_none(source.get('width')), - 'height': int_or_none(source.get('height')), + 'height': height, 'ext': ext, } if source_url.startswith('rtmp'): From ad0e2b3359862eb6ac786d8fefd5e20035b051cd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 8 Sep 2016 23:15:58 +0700 Subject: [PATCH 0092/1571] [abcotvs] Add support for ABC Owned Television Stations --- youtube_dl/extractor/abcotvs.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/abcotvs.py b/youtube_dl/extractor/abcotvs.py index 53a900e50..054bb0596 100644 --- a/youtube_dl/extractor/abcotvs.py +++ b/youtube_dl/extractor/abcotvs.py @@ -12,6 +12,7 @@ from ..utils import ( class ABCOTVSIE(InfoExtractor): IE_NAME = 'abcotvs' + IE_DESC = 'ABC Owned Television Stations' _VALID_URL = r'https?://(?:abc(?:7(?:news|ny|chicago)?|11|13|30)|6abc)\.com(?:/[^/]+/(?P[^/]+))?/(?P\d+)' _TESTS = [ { From 2abad67e52c034ff4a94bad00c31f9ba4aa496f2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 8 Sep 2016 23:32:16 +0700 Subject: [PATCH 0093/1571] [ChangeLog] Actualize --- ChangeLog | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/ChangeLog b/ChangeLog index cec87d5cd..e74f64b9a 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,11 +1,25 @@ version Extractors ++ [jwplatform] Extract height from format label ++ [yahoo] Extract Brightcove Legacy Studio embeds (#9345) +* [videomore] Fix extraction (#10592) * [foxgay] Fix extraction (#10480) -+ [miaopai] New extractor (#10556) ++ [rmcdecouverte] Add extractor for rmcdecouverte.bfmtv.com (#9709) * [gamestar] Fix metadata extraction (#10479) -+ [bilibili] Support episodes (#10190) -+ [tvnoe] New extractor (#10524) +* [puls4] Fix extraction (#10583) ++ [cctv] Add extractor for CCTV and CNTV (#8153) ++ [lci] Add extractor for lci.fr (#10573) ++ [wat] Extract DASH formats ++ [viafree] Improve video id detection (#10569) ++ [trutv] Add extractor for trutv.com (#10519) ++ [nick] Add support for nickelodeon.nl (#10559) ++ [abcotvs:clips] Add support for clips.abcotvs.com ++ [abcotvs] Add support for ABC Owned Television Stations sites (#9551) ++ [miaopai] Add extractor for miaopai.com (#10556) +* [gamestar] Fix metadata extraction (#10479) ++ [bilibili] Add support for episodes (#10190) ++ [tvnoe] Add extractor for tvnoe.cz (#10524) version 2016.09.04.1 From b71783719000af0806bbd6995e934de2520792fd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 8 Sep 2016 23:46:14 +0700 Subject: [PATCH 0094/1571] release 2016.09.08 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- docs/supportedsites.md | 9 ++++++++- youtube_dl/version.py | 2 +- 4 files changed, 13 insertions(+), 6 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index c03092442..a983bf432 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.09.04.1*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.09.04.1** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.09.08*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.09.08** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2016.09.04.1 +[debug] youtube-dl version 2016.09.08 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index e74f64b9a..d84f447ba 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version +version 2016.09.08 Extractors + [jwplatform] Extract height from format label diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 9e21016f7..e6be746a8 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -19,9 +19,10 @@ - **9now.com.au** - **abc.net.au** - **abc.net.au:iview** - - **Abc7News** - **abcnews** - **abcnews:video** + - **abcotvs**: ABC Owned Television Stations + - **abcotvs:clips** - **AcademicEarth:Course** - **acast** - **acast:channel** @@ -128,6 +129,7 @@ - **CBSNews**: CBS News - **CBSNewsLiveVideo**: CBS News Live Videos - **CBSSports** + - **CCTV** - **CDA** - **CeskaTelevize** - **channel9**: Channel 9 @@ -352,6 +354,7 @@ - **kuwo:song**: 酷我音乐 - **la7.it** - **Laola1Tv** + - **LCI** - **Lcp** - **LcpPlay** - **Le**: 乐视网 @@ -390,6 +393,7 @@ - **Metacritic** - **Mgoon** - **MGTV**: 芒果TV + - **MiaoPai** - **Minhateca** - **MinistryGrid** - **Minoto** @@ -576,6 +580,7 @@ - **revision3:embed** - **RICE** - **RingTV** + - **RMCDecouverte** - **RockstarGames** - **RoosterTeeth** - **RottenTomatoes** @@ -721,6 +726,7 @@ - **TrailerAddict** (Currently broken) - **Trilulilu** - **trollvids** + - **TruTV** - **Tube8** - **TubiTv** - **tudou** @@ -742,6 +748,7 @@ - **TVCArticle** - **tvigle**: Интернет-телевидение Tvigle.ru - **tvland.com** + - **TVNoe** - **tvp**: Telewizja Polska - **tvp:embed**: Telewizja Polska - **tvp:series** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index b2ea6dac6..941ffb3f6 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2016.09.04.1' +__version__ = '2016.09.08' From 4614ad7b591577793321b8e761510cf08ceb558b Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Thu, 8 Sep 2016 20:45:13 +0100 Subject: [PATCH 0095/1571] [parliamentliveuk] fix extraction(closes #9137) --- youtube_dl/extractor/parliamentliveuk.py | 57 +++++++++--------------- 1 file changed, 22 insertions(+), 35 deletions(-) diff --git a/youtube_dl/extractor/parliamentliveuk.py b/youtube_dl/extractor/parliamentliveuk.py index 0a423a08f..874aacc55 100644 --- a/youtube_dl/extractor/parliamentliveuk.py +++ b/youtube_dl/extractor/parliamentliveuk.py @@ -1,53 +1,40 @@ from __future__ import unicode_literals -import re - from .common import InfoExtractor class ParliamentLiveUKIE(InfoExtractor): IE_NAME = 'parliamentlive.tv' IE_DESC = 'UK parliament videos' - _VALID_URL = r'https?://www\.parliamentlive\.tv/Main/Player\.aspx\?(?:[^&]+&)*?meetingId=(?P[0-9]+)' + _VALID_URL = r'https?://(?:www\.)?parliamentlive\.tv/Event/Index/(?P[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})' _TEST = { - 'url': 'http://www.parliamentlive.tv/Main/Player.aspx?meetingId=15121&player=windowsmedia', + 'url': 'http://parliamentlive.tv/Event/Index/c1e9d44d-fd6c-4263-b50f-97ed26cc998b', 'info_dict': { - 'id': '15121', - 'ext': 'asf', - 'title': 'hoc home affairs committee, 18 mar 2014.pm', - 'description': 'md5:033b3acdf83304cd43946b2d5e5798d1', + 'id': 'c1e9d44d-fd6c-4263-b50f-97ed26cc998b', + 'ext': 'mp4', + 'title': 'Home Affairs Committee', + 'uploader_id': 'FFMPEG-01', + 'timestamp': 1422696664, + 'upload_date': '20150131', }, - 'params': { - 'skip_download': True, # Requires mplayer (mms) - } } def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') - webpage = self._download_webpage(url, video_id) - - asx_url = self._html_search_regex( - r'embed.*?src="([^"]+)" name="MediaPlayer"', webpage, - 'metadata URL') - asx = self._download_xml(asx_url, video_id, 'Downloading ASX metadata') - video_url = asx.find('.//REF').attrib['HREF'] - - title = self._search_regex( - r'''(?x)player\.setClipDetails\( - (?:(?:[0-9]+|"[^"]+"),\s*){2} - "([^"]+",\s*"[^"]+)" - ''', - webpage, 'title').replace('", "', ', ') - description = self._html_search_regex( - r'(?s)(.*?)', - webpage, 'description') - + video_id = self._match_id(url) + webpage = self._download_webpage( + 'http://vodplayer.parliamentlive.tv/?mid=' + video_id, video_id) + widget_config = self._parse_json(self._search_regex( + r'kWidgetConfig\s*=\s*({.+});', + webpage, 'kaltura widget config'), video_id) + kaltura_url = 'kaltura:%s:%s' % (widget_config['wid'][1:], widget_config['entry_id']) + event_title = self._download_json( + 'http://parliamentlive.tv/Event/GetShareVideo/' + video_id, video_id)['event']['title'] return { + '_type': 'url_transparent', 'id': video_id, - 'ext': 'asf', - 'url': video_url, - 'title': title, - 'description': description, + 'title': event_title, + 'description': '', + 'url': kaltura_url, + 'ie_key': 'Kaltura', } From 4d5726b0d7562aff975c91303b8ae8cfc4de8c51 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Thu, 8 Sep 2016 22:53:44 +0100 Subject: [PATCH 0096/1571] [telequebec] Add new extractor(closes #1999) --- youtube_dl/extractor/telequebec.py | 36 ++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) create mode 100644 youtube_dl/extractor/telequebec.py diff --git a/youtube_dl/extractor/telequebec.py b/youtube_dl/extractor/telequebec.py new file mode 100644 index 000000000..4043fcb92 --- /dev/null +++ b/youtube_dl/extractor/telequebec.py @@ -0,0 +1,36 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import int_or_none + + +class TeleQuebecIE(InfoExtractor): + _VALID_URL = r'https?://zonevideo\.telequebec\.tv/media/(?P\d+)' + _TEST = { + 'url': 'http://zonevideo.telequebec.tv/media/20984/le-couronnement-de-new-york/couronnement-de-new-york', + 'md5': 'fe95a0957e5707b1b01f5013e725c90f', + 'info_dict': { + 'id': '20984', + 'ext': 'mp4', + 'title': 'Le couronnement de New York', + 'description': 'md5:f5b3d27a689ec6c1486132b2d687d432', + 'upload_date': '20160220', + 'timestamp': 1455965438, + } + } + + def _real_extract(self, url): + media_id = self._match_id(url) + media_data = self._download_json( + 'https://mnmedias.api.telequebec.tv/api/v2/media/' + media_id, + media_id)['media'] + return { + '_type': 'url_transparent', + 'id': media_id, + 'url': 'limelight:media:' + media_data['streamInfo']['sourceId'], + 'title': media_data['title'], + 'description': media_data.get('descriptions', [{'text': None}])[0].get('text'), + 'duration': int_or_none(media_data.get('durationInMilliseconds'), 1000), + 'ie_key': 'LimelightMedia', + } From cb9cbd84ed2471140ce58480b8ed373f8abb5816 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Thu, 8 Sep 2016 22:55:27 +0100 Subject: [PATCH 0097/1571] [extractors] add import for TeleQuebecIE --- youtube_dl/extractor/extractors.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 8d9c2ae13..b7b630e9d 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -861,6 +861,7 @@ from .telebruxelles import TeleBruxellesIE from .telecinco import TelecincoIE from .telegraaf import TelegraafIE from .telemb import TeleMBIE +from .telequebec import TeleQuebecIE from .teletask import TeleTaskIE from .telewebion import TelewebionIE from .testurl import TestURLIE From bb5ebd4453c2f051af96b20e4a78f6563054684b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 9 Sep 2016 22:16:21 +0700 Subject: [PATCH 0098/1571] [canvas] Add support for een.be (Closes #10605) --- youtube_dl/extractor/canvas.py | 32 +++++++++++++++++++++++++++----- 1 file changed, 27 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/canvas.py b/youtube_dl/extractor/canvas.py index ec6d24d96..ef0691dcd 100644 --- a/youtube_dl/extractor/canvas.py +++ b/youtube_dl/extractor/canvas.py @@ -1,11 +1,13 @@ from __future__ import unicode_literals +import re + from .common import InfoExtractor from ..utils import float_or_none class CanvasIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?canvas\.be/video/(?:[^/]+/)*(?P[^/?#&]+)' + _VALID_URL = r'https?://(?:www\.)?(?Pcanvas|een)\.be/(?:[^/]+/)*(?P[^/?#&]+)' _TESTS = [{ 'url': 'http://www.canvas.be/video/de-afspraak/najaar-2015/de-afspraak-veilt-voor-de-warmste-week', 'md5': 'ea838375a547ac787d4064d8c7860a6c', @@ -38,22 +40,42 @@ class CanvasIE(InfoExtractor): 'params': { 'skip_download': True, } + }, { + 'url': 'https://www.een.be/sorry-voor-alles/herbekijk-sorry-voor-alles', + 'info_dict': { + 'id': 'mz-ast-11a587f8-b921-4266-82e2-0bce3e80d07f', + 'display_id': 'herbekijk-sorry-voor-alles', + 'ext': 'mp4', + 'title': 'Herbekijk Sorry voor alles', + 'description': 'md5:8bb2805df8164e5eb95d6a7a29dc0dd3', + 'thumbnail': 're:^https?://.*\.jpg$', + 'duration': 3788.06, + }, + 'params': { + 'skip_download': True, + } + }, { + 'url': 'https://www.canvas.be/check-point/najaar-2016/de-politie-uw-vriend', + 'only_matching': True, }] def _real_extract(self, url): - display_id = self._match_id(url) + mobj = re.match(self._VALID_URL, url) + site_id, display_id = mobj.group('site_id'), mobj.group('id') webpage = self._download_webpage(url, display_id) - title = self._search_regex( + title = (self._search_regex( r']+class="video__body__header__title"[^>]*>(.+?)

', - webpage, 'title', default=None) or self._og_search_title(webpage) + webpage, 'title', default=None) or self._og_search_title( + webpage)).strip() video_id = self._html_search_regex( r'data-video=(["\'])(?P.+?)\1', webpage, 'video id', group='id') data = self._download_json( - 'https://mediazone.vrt.be/api/v1/canvas/assets/%s' % video_id, display_id) + 'https://mediazone.vrt.be/api/v1/%s/assets/%s' + % (site_id, video_id), display_id) formats = [] for target in data['targetUrls']: From c6129feb7f8313941a4d2044fa4b45ceaa0a91c7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 9 Sep 2016 23:20:45 +0700 Subject: [PATCH 0099/1571] [ketnet] Add extractor (Closes #10343) --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/ketnet.py | 52 ++++++++++++++++++++++++++++++ 2 files changed, 53 insertions(+) create mode 100644 youtube_dl/extractor/ketnet.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index b7b630e9d..38dc33674 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -407,6 +407,7 @@ from .kankan import KankanIE from .karaoketv import KaraoketvIE from .karrierevideos import KarriereVideosIE from .keezmovies import KeezMoviesIE +from .ketnet import KetnetIE from .khanacademy import KhanAcademyIE from .kickstarter import KickStarterIE from .keek import KeekIE diff --git a/youtube_dl/extractor/ketnet.py b/youtube_dl/extractor/ketnet.py new file mode 100644 index 000000000..aaf3f807a --- /dev/null +++ b/youtube_dl/extractor/ketnet.py @@ -0,0 +1,52 @@ +from __future__ import unicode_literals + +from .common import InfoExtractor + + +class KetnetIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?ketnet\.be/(?:[^/]+/)*(?P[^/?#&]+)' + _TESTS = [{ + 'url': 'https://www.ketnet.be/kijken/zomerse-filmpjes', + 'md5': 'd907f7b1814ef0fa285c0475d9994ed7', + 'info_dict': { + 'id': 'zomerse-filmpjes', + 'ext': 'mp4', + 'title': 'Gluur mee op de filmset en op Pennenzakkenrock', + 'description': 'Gluur mee met Ghost Rockers op de filmset', + 'thumbnail': 're:^https?://.*\.jpg$', + } + }, { + 'url': 'https://www.ketnet.be/kijken/karrewiet/uitzending-8-september-2016', + 'only_matching': True, + }, { + 'url': 'https://www.ketnet.be/achter-de-schermen/sien-repeteert-voor-stars-for-life', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + + webpage = self._download_webpage(url, video_id) + + config = self._parse_json( + self._search_regex( + r'(?s)playerConfig\s*=\s*({.+?})\s*;', webpage, + 'player config'), + video_id) + + title = config['title'] + + formats = self._extract_m3u8_formats( + config['source']['hls'], video_id, 'mp4', + entry_protocol='m3u8_native', m3u8_id='hls') + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': title, + 'description': config.get('description'), + 'thumbnail': config.get('image'), + 'series': config.get('program'), + 'episode': config.get('episode'), + 'formats': formats, + } From 6c3affcb18f7eabf7d428e5efe474e0547ab25cb Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sat, 10 Sep 2016 20:09:09 +0800 Subject: [PATCH 0100/1571] [newgrounds] Fix uploader extraction Closes #10584 Also change test URLs to HTTPS, as proposed by @stepshal in #10593. Closes #10593 --- ChangeLog | 6 ++++++ youtube_dl/extractor/newgrounds.py | 23 ++++++++--------------- 2 files changed, 14 insertions(+), 15 deletions(-) diff --git a/ChangeLog b/ChangeLog index d84f447ba..fafe445cb 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +version + +Extractors +* [newgrounds] Fix uploader extraction (#10584) + + version 2016.09.08 Extractors diff --git a/youtube_dl/extractor/newgrounds.py b/youtube_dl/extractor/newgrounds.py index 705940323..9bea610c8 100644 --- a/youtube_dl/extractor/newgrounds.py +++ b/youtube_dl/extractor/newgrounds.py @@ -1,15 +1,12 @@ from __future__ import unicode_literals -import json -import re - from .common import InfoExtractor class NewgroundsIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?newgrounds\.com/(?:audio/listen|portal/view)/(?P[0-9]+)' _TESTS = [{ - 'url': 'http://www.newgrounds.com/audio/listen/549479', + 'url': 'https://www.newgrounds.com/audio/listen/549479', 'md5': 'fe6033d297591288fa1c1f780386f07a', 'info_dict': { 'id': '549479', @@ -18,7 +15,7 @@ class NewgroundsIE(InfoExtractor): 'uploader': 'Burn7', } }, { - 'url': 'http://www.newgrounds.com/portal/view/673111', + 'url': 'https://www.newgrounds.com/portal/view/673111', 'md5': '3394735822aab2478c31b1004fe5e5bc', 'info_dict': { 'id': '673111', @@ -29,24 +26,20 @@ class NewgroundsIE(InfoExtractor): }] def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - music_id = mobj.group('id') - webpage = self._download_webpage(url, music_id) + media_id = self._match_id(url) + webpage = self._download_webpage(url, media_id) title = self._html_search_regex( r'([^>]+)', webpage, 'title') uploader = self._html_search_regex( - [r',"artist":"([^"]+)",', r'[\'"]owner[\'"]\s*:\s*[\'"]([^\'"]+)[\'"],'], - webpage, 'uploader') + r'Author\s*]+>([^<]+)', webpage, 'uploader', fatal=False) - music_url_json_string = self._html_search_regex( - r'({"url":"[^"]+"),', webpage, 'music url') + '}' - music_url_json = json.loads(music_url_json_string) - music_url = music_url_json['url'] + music_url = self._parse_json(self._search_regex( + r'"url":("[^"]+"),', webpage, ''), media_id) return { - 'id': music_id, + 'id': media_id, 'title': title, 'url': music_url, 'uploader': uploader, From b29f842e0eb095248ff39d1fa28c5b4941793246 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 10 Sep 2016 20:46:45 +0700 Subject: [PATCH 0101/1571] [canalplus] Add support for c8.fr (Closes #10577) --- youtube_dl/extractor/canalplus.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/youtube_dl/extractor/canalplus.py b/youtube_dl/extractor/canalplus.py index 61463f249..69e8f4f57 100644 --- a/youtube_dl/extractor/canalplus.py +++ b/youtube_dl/extractor/canalplus.py @@ -23,6 +23,7 @@ class CanalplusIE(InfoExtractor): (?:(?:www|m)\.)?canalplus\.fr| (?:www\.)?piwiplus\.fr| (?:www\.)?d8\.tv| + (?:www\.)?c8\.fr| (?:www\.)?d17\.tv| (?:www\.)?itele\.fr )/(?:(?:[^/]+/)*(?P[^/?#&]+))?(?:\?.*\bvid=(?P\d+))?| @@ -35,6 +36,7 @@ class CanalplusIE(InfoExtractor): 'canalplus': 'cplus', 'piwiplus': 'teletoon', 'd8': 'd8', + 'c8': 'd8', 'd17': 'd17', 'itele': 'itele', } From 84a18e9b908eb0b770f03603200026a06f4f08b4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 10 Sep 2016 22:01:49 +0700 Subject: [PATCH 0102/1571] [polskieradio:category] Improve extraction --- youtube_dl/extractor/extractors.py | 5 +- youtube_dl/extractor/polskieradio.py | 158 ++++++++++++++------------- 2 files changed, 84 insertions(+), 79 deletions(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 6a142996f..96f3d3fcb 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -671,7 +671,10 @@ from .pluralsight import ( ) from .podomatic import PodomaticIE from .pokemon import PokemonIE -from .polskieradio import PolskieRadioIE, PolskieRadioProgrammeIE +from .polskieradio import ( + PolskieRadioIE, + PolskieRadioCategoryIE, +) from .porn91 import Porn91IE from .porncom import PornComIE from .pornhd import PornHdIE diff --git a/youtube_dl/extractor/polskieradio.py b/youtube_dl/extractor/polskieradio.py index c51d3d9be..5ff173774 100644 --- a/youtube_dl/extractor/polskieradio.py +++ b/youtube_dl/extractor/polskieradio.py @@ -1,6 +1,7 @@ # coding: utf-8 from __future__ import unicode_literals +import itertools import re from .common import InfoExtractor @@ -10,90 +11,13 @@ from ..compat import ( compat_urlparse ) from ..utils import ( + extract_attributes, int_or_none, strip_or_none, unified_timestamp, ) -class PolskieRadioProgrammeIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?polskieradio\.pl/\d+(,[^/]+)?/(?P\d+)' - _TESTS = [{ - 'url': 'http://www.polskieradio.pl/7/5102,HISTORIA-ZYWA', - 'info_dict': { - 'id': '5102', - 'title': 'HISTORIA ŻYWA', - }, - 'playlist_mincount': 34, - }, { - 'url': 'http://www.polskieradio.pl/7/4807', - 'info_dict': { - 'id': '4807', - 'title': 'Vademecum 1050. rocznicy Chrztu Polski' - }, - 'playlist_mincount': 5 - }, { - 'url': 'http://www.polskieradio.pl/7/129,Sygnaly-dnia?ref=source', - 'only_matching': True - }, { - 'url': 'http://www.polskieradio.pl/37,RedakcjaKatolicka/4143,Kierunek-Krakow', - 'info_dict': { - 'id': '4143', - 'title': 'Kierunek Kraków', - }, - 'playlist_mincount': 61 - }, { - 'url': 'http://www.polskieradio.pl/7,Jedynka/5102,HISTORIA-ZYWA', - 'only_matching': True - }] - - def _get_entries_from_page_content(self, base_url, content): - entries = [] - - articles = re.findall( - r'
\s+', - content) - for article_id, article_url, _, article_title in articles: - resolved_article_url = compat_urlparse.urljoin(base_url, article_url) - entries.append(self.url_result( - resolved_article_url, - ie='PolskieRadio', - video_id=article_id, - video_title=article_title)) - - return entries - - @classmethod - def suitable(cls, url): - return False if PolskieRadioIE.suitable(url) else super(PolskieRadioProgrammeIE, cls).suitable(url) - - def _real_extract(self, url): - programme_id = self._match_id(url) - webpage = self._download_webpage(url, programme_id) - - title = self._html_search_regex( - r'(.+?)', - webpage, 'title', fatal=False) - description = None - - entries = self._get_entries_from_page_content(url, webpage) - - pages = re.findall(r' 1: - page_url_root = next(url for _, url, _ in pages if len(url) > 0) - for page_number in range(2, page_count + 1): - page_url = page_url_root + str(page_number) - resolved_page_url = compat_urlparse.urljoin(url, page_url) - page_content = self._download_webpage( - resolved_page_url, programme_id, - note="Downloading page number %d" % page_number) - entries.extend(self._get_entries_from_page_content(url, page_content)) - - return self.playlist_result(entries, programme_id, title, description) - - class PolskieRadioIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?polskieradio\.pl/\d+/\d+/Artykul/(?P[0-9]+)' _TESTS = [{ @@ -176,3 +100,81 @@ class PolskieRadioIE(InfoExtractor): description = strip_or_none(self._og_search_description(webpage)) return self.playlist_result(entries, playlist_id, title, description) + + +class PolskieRadioCategoryIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?polskieradio\.pl/\d+(?:,[^/]+)?/(?P\d+)' + _TESTS = [{ + 'url': 'http://www.polskieradio.pl/7/5102,HISTORIA-ZYWA', + 'info_dict': { + 'id': '5102', + 'title': 'HISTORIA ŻYWA', + }, + 'playlist_mincount': 38, + }, { + 'url': 'http://www.polskieradio.pl/7/4807', + 'info_dict': { + 'id': '4807', + 'title': 'Vademecum 1050. rocznicy Chrztu Polski' + }, + 'playlist_mincount': 5 + }, { + 'url': 'http://www.polskieradio.pl/7/129,Sygnaly-dnia?ref=source', + 'only_matching': True + }, { + 'url': 'http://www.polskieradio.pl/37,RedakcjaKatolicka/4143,Kierunek-Krakow', + 'info_dict': { + 'id': '4143', + 'title': 'Kierunek Kraków', + }, + 'playlist_mincount': 61 + }, { + 'url': 'http://www.polskieradio.pl/10,czworka/214,muzyka', + 'info_dict': { + 'id': '214', + 'title': 'Muzyka', + }, + 'playlist_mincount': 61 + }, { + 'url': 'http://www.polskieradio.pl/7,Jedynka/5102,HISTORIA-ZYWA', + 'only_matching': True, + }, { + 'url': 'http://www.polskieradio.pl/8,Dwojka/196,Publicystyka', + 'only_matching': True, + }] + + @classmethod + def suitable(cls, url): + return False if PolskieRadioIE.suitable(url) else super(PolskieRadioCategoryIE, cls).suitable(url) + + def _entries(self, url, page, category_id): + content = page + for page_num in itertools.count(2): + for a_entry, entry_id in re.findall( + r'(?s)]+>.*?(]+href=["\']/\d+/\d+/Artykul/(\d+)[^>]+>).*?
', + content): + entry = extract_attributes(a_entry) + href = entry.get('href') + if not href: + continue + yield self.url_result( + compat_urlparse.urljoin(url, href), PolskieRadioIE.ie_key(), + entry_id, entry.get('title')) + mobj = re.search( + r']+class=["\']next["\'][^>]*>\s*]+href=(["\'])(?P(?:(?!\1).)+)\1', + content) + if not mobj: + break + next_url = compat_urlparse.urljoin(url, mobj.group('url')) + content = self._download_webpage( + next_url, category_id, 'Downloading page %s' % page_num) + + def _real_extract(self, url): + category_id = self._match_id(url) + webpage = self._download_webpage(url, category_id) + title = self._html_search_regex( + r'([^<]+) - [^<]+ - [^<]+', + webpage, 'title', fatal=False) + return self.playlist_result( + self._entries(url, webpage, category_id), + category_id, title) From 732424375017a033f5b398b0f3dc2c6d47f3d3fd Mon Sep 17 00:00:00 2001 From: Scott Leggett Date: Mon, 5 Sep 2016 22:41:08 +1000 Subject: [PATCH 0103/1571] [9now] Fix extraction --- youtube_dl/extractor/ninenow.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/ninenow.py b/youtube_dl/extractor/ninenow.py index faa577237..907b42609 100644 --- a/youtube_dl/extractor/ninenow.py +++ b/youtube_dl/extractor/ninenow.py @@ -44,7 +44,14 @@ class NineNowIE(InfoExtractor): page_data = self._parse_json(self._search_regex( r'window\.__data\s*=\s*({.*?});', webpage, 'page data'), display_id) - common_data = page_data.get('episode', {}).get('episode') or page_data.get('clip', {}).get('clip') + current_key = ( + page_data.get('episode', {}).get('currentEpisodeKey') or + page_data.get('clip', {}).get('currentClipKey') + ) + common_data = ( + page_data.get('episode', {}).get('episodeCache', {}).get(current_key, {}).get('episode') or + page_data.get('clip', {}).get('clipCache', {}).get(current_key, {}).get('clip') + ) video_data = common_data['video'] if video_data.get('drm'): From 56c0ead4d3b9f365f0562678504879be8e79b89c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 11 Sep 2016 00:42:13 +0700 Subject: [PATCH 0104/1571] [9now] Improve video data extraction (Closes #10561) --- youtube_dl/extractor/ninenow.py | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/youtube_dl/extractor/ninenow.py b/youtube_dl/extractor/ninenow.py index 907b42609..351bea7ba 100644 --- a/youtube_dl/extractor/ninenow.py +++ b/youtube_dl/extractor/ninenow.py @@ -44,14 +44,20 @@ class NineNowIE(InfoExtractor): page_data = self._parse_json(self._search_regex( r'window\.__data\s*=\s*({.*?});', webpage, 'page data'), display_id) - current_key = ( - page_data.get('episode', {}).get('currentEpisodeKey') or - page_data.get('clip', {}).get('currentClipKey') - ) - common_data = ( - page_data.get('episode', {}).get('episodeCache', {}).get(current_key, {}).get('episode') or - page_data.get('clip', {}).get('clipCache', {}).get(current_key, {}).get('clip') - ) + + for kind in ('episode', 'clip'): + current_key = page_data.get(kind, {}).get( + 'current%sKey' % kind.capitalize()) + if not current_key: + continue + cache = page_data.get(kind, {}).get('%sCache' % kind, {}) + if not cache: + continue + common_data = (cache.get(current_key) or list(cache.values())[0])[kind] + break + else: + raise ExtractorError('Unable to find video data') + video_data = common_data['video'] if video_data.get('drm'): From 2512b17493fced6b469d9610c1ad5c5af52870f3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 11 Sep 2016 01:27:20 +0700 Subject: [PATCH 0105/1571] [lrt] Fix audio extraction (Closes #10566) --- youtube_dl/extractor/lrt.py | 46 ++++++++++++++++++++++++++++++------- 1 file changed, 38 insertions(+), 8 deletions(-) diff --git a/youtube_dl/extractor/lrt.py b/youtube_dl/extractor/lrt.py index 1072405b3..f5c997ef4 100644 --- a/youtube_dl/extractor/lrt.py +++ b/youtube_dl/extractor/lrt.py @@ -1,8 +1,11 @@ # coding: utf-8 from __future__ import unicode_literals +import re + from .common import InfoExtractor from ..utils import ( + determine_ext, int_or_none, parse_duration, remove_end, @@ -12,8 +15,10 @@ from ..utils import ( class LRTIE(InfoExtractor): IE_NAME = 'lrt.lt' _VALID_URL = r'https?://(?:www\.)?lrt\.lt/mediateka/irasas/(?P[0-9]+)' - _TEST = { + _TESTS = [{ + # m3u8 download 'url': 'http://www.lrt.lt/mediateka/irasas/54391/', + 'md5': 'fe44cf7e4ab3198055f2c598fc175cb0', 'info_dict': { 'id': '54391', 'ext': 'mp4', @@ -23,20 +28,45 @@ class LRTIE(InfoExtractor): 'view_count': int, 'like_count': int, }, - 'params': { - 'skip_download': True, # m3u8 download + }, { + # direct mp3 download + 'url': 'http://www.lrt.lt/mediateka/irasas/1013074524/', + 'md5': '389da8ca3cad0f51d12bed0c844f6a0a', + 'info_dict': { + 'id': '1013074524', + 'ext': 'mp3', + 'title': 'Kita tema 2016-09-05 15:05', + 'description': 'md5:1b295a8fc7219ed0d543fc228c931fb5', + 'duration': 3008, + 'view_count': int, + 'like_count': int, }, - } + }] def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) title = remove_end(self._og_search_title(webpage), ' - LRT') - m3u8_url = self._search_regex( - r'file\s*:\s*(["\'])(?P.+?)\1\s*\+\s*location\.hash\.substring\(1\)', - webpage, 'm3u8 url', group='url') - formats = self._extract_m3u8_formats(m3u8_url, video_id, 'mp4') + + formats = [] + for _, file_url in re.findall( + r'file\s*:\s*(["\'])(?P(?:(?!\1).)+)\1', webpage): + ext = determine_ext(file_url) + if ext not in ('m3u8', 'mp3'): + continue + # mp3 served as m3u8 produces stuttered media file + if ext == 'm3u8' and '.mp3' in file_url: + continue + if ext == 'm3u8': + formats.extend(self._extract_m3u8_formats( + file_url, video_id, 'mp4', entry_protocol='m3u8_native', + fatal=False)) + elif ext == 'mp3': + formats.append({ + 'url': file_url, + 'vcodec': 'none', + }) self._sort_formats(formats) thumbnail = self._og_search_thumbnail(webpage) From 1e35999c1e4637174e2532c457431315b5e186d9 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Sat, 10 Sep 2016 19:43:09 +0100 Subject: [PATCH 0106/1571] [tfo] Add new extractor --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/tfo.py | 53 ++++++++++++++++++++++++++++++ 2 files changed, 54 insertions(+) create mode 100644 youtube_dl/extractor/tfo.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 96f3d3fcb..124e909fb 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -870,6 +870,7 @@ from .teletask import TeleTaskIE from .telewebion import TelewebionIE from .testurl import TestURLIE from .tf1 import TF1IE +from .tfo import TFOIE from .theintercept import TheInterceptIE from .theplatform import ( ThePlatformIE, diff --git a/youtube_dl/extractor/tfo.py b/youtube_dl/extractor/tfo.py new file mode 100644 index 000000000..6f1eeac57 --- /dev/null +++ b/youtube_dl/extractor/tfo.py @@ -0,0 +1,53 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import json + +from .common import InfoExtractor +from ..utils import ( + HEADRequest, + ExtractorError, + int_or_none, +) + + +class TFOIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?tfo\.org/(?:en|fr)/(?:[^/]+/){2}(?P\d+)' + _TEST = { + 'url': 'http://www.tfo.org/en/universe/tfo-247/100463871/video-game-hackathon', + 'md5': '47c987d0515561114cf03d1226a9d4c7', + 'info_dict': { + 'id': '100463871', + 'ext': 'mp4', + 'title': 'Video Game Hackathon', + 'description': 'md5:558afeba217c6c8d96c60e5421795c07', + 'upload_date': '20160212', + 'timestamp': 1455310233, + } + } + + def _real_extract(self, url): + video_id = self._match_id(url) + self._request_webpage(HEADRequest('http://www.tfo.org/'), video_id) + infos = self._download_json( + 'http://www.tfo.org/api/web/video/get_infos', video_id, data=json.dumps({ + 'product_id': video_id, + }).encode(), headers={ + 'X-tfo-session': self._get_cookies('http://www.tfo.org/')['tfo-session'].value, + }) + if infos.get('success') == 0: + raise ExtractorError('%s said: %s' % (self.IE_NAME, infos['msg']), expected=True) + video_data = infos['data'] + + return { + '_type': 'url_transparent', + 'id': video_id, + 'url': 'limelight:media:' + video_data['llid'], + 'title': video_data['title'], + 'description': video_data.get('description'), + 'series': video_data.get('collection'), + 'season_number': int_or_none(video_data.get('season')), + 'episode_number': int_or_none(video_data.get('episode')), + 'duration': int_or_none(video_data.get('duration')), + 'ie_key': 'LimelightMedia', + } From 001a5fd3d75b311102264cf3920c6aa5b2322e51 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sun, 11 Sep 2016 03:02:00 +0800 Subject: [PATCH 0107/1571] [iwara] Fix extraction after relaunch Closes #10462, closes #3215 --- ChangeLog | 1 + youtube_dl/extractor/extractors.py | 2 +- youtube_dl/extractor/iwara.py | 77 ++++++++++++++++++++++++++++++ youtube_dl/extractor/trollvids.py | 36 -------------- 4 files changed, 79 insertions(+), 37 deletions(-) create mode 100644 youtube_dl/extractor/iwara.py delete mode 100644 youtube_dl/extractor/trollvids.py diff --git a/ChangeLog b/ChangeLog index fafe445cb..387dc7bf6 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,6 +1,7 @@ version Extractors +* [iwara] Fix extraction after relaunch (#10462, #3215) * [newgrounds] Fix uploader extraction (#10584) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 124e909fb..2e795260e 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -395,6 +395,7 @@ from .ivi import ( IviCompilationIE ) from .ivideon import IvideonIE +from .iwara import IwaraIE from .izlesene import IzleseneIE from .jeuxvideo import JeuxVideoIE from .jove import JoveIE @@ -899,7 +900,6 @@ from .toutv import TouTvIE from .toypics import ToypicsUserIE, ToypicsIE from .traileraddict import TrailerAddictIE from .trilulilu import TriluliluIE -from .trollvids import TrollvidsIE from .trutv import TruTVIE from .tube8 import Tube8IE from .tubitv import TubiTvIE diff --git a/youtube_dl/extractor/iwara.py b/youtube_dl/extractor/iwara.py new file mode 100644 index 000000000..8d7e7f472 --- /dev/null +++ b/youtube_dl/extractor/iwara.py @@ -0,0 +1,77 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..compat import compat_urllib_parse_urlparse +from ..utils import remove_end + + +class IwaraIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.|ecchi\.)?iwara\.tv/videos/(?P[a-zA-Z0-9]+)' + _TESTS = [{ + 'url': 'http://iwara.tv/videos/amVwUl1EHpAD9RD', + 'md5': '1d53866b2c514b23ed69e4352fdc9839', + 'info_dict': { + 'id': 'amVwUl1EHpAD9RD', + 'ext': 'mp4', + 'title': '【MMD R-18】ガールフレンド carry_me_off', + 'age_limit': 18, + }, + }, { + 'url': 'http://ecchi.iwara.tv/videos/Vb4yf2yZspkzkBO', + 'md5': '7e5f1f359cd51a027ba4a7b7710a50f0', + 'info_dict': { + 'id': '0B1LvuHnL-sRFNXB1WHNqbGw4SXc', + 'ext': 'mp4', + 'title': '[3D Hentai] Kyonyu Ã\x97 Genkai Ã\x97 Emaki Shinobi Girls.mp4', + 'age_limit': 18, + }, + 'add_ie': ['GoogleDrive'], + }, { + 'url': 'http://www.iwara.tv/videos/nawkaumd6ilezzgq', + 'md5': '1d85f1e5217d2791626cff5ec83bb189', + 'info_dict': { + 'id': '6liAP9s2Ojc', + 'ext': 'mp4', + 'age_limit': 0, + 'title': '[MMD] Do It Again Ver.2 [1080p 60FPS] (Motion,Camera,Wav+DL)', + 'description': 'md5:590c12c0df1443d833fbebe05da8c47a', + 'upload_date': '20160910', + 'uploader': 'aMMDsork', + 'uploader_id': 'UCVOFyOSCyFkXTYYHITtqB7A', + }, + 'add_ie': ['Youtube'], + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + + webpage, urlh = self._download_webpage_handle(url, video_id) + + hostname = compat_urllib_parse_urlparse(urlh.geturl()).hostname + # ecchi is 'sexy' in Japanese + age_limit = 18 if hostname.split('.')[0] == 'ecchi' else 0 + + entries = self._parse_html5_media_entries(url, webpage, video_id) + + if not entries: + iframe_url = self._html_search_regex( + r']+src=([\'"])(?P[^\'"]+)\1', + webpage, 'iframe URL', group='url') + return { + '_type': 'url_transparent', + 'url': iframe_url, + 'age_limit': age_limit, + } + + title = remove_end(self._html_search_regex( + r'([^<]+)', webpage, 'title'), ' | Iwara') + + info_dict = entries[0] + info_dict.update({ + 'id': video_id, + 'title': title, + 'age_limit': age_limit, + }) + + return info_dict diff --git a/youtube_dl/extractor/trollvids.py b/youtube_dl/extractor/trollvids.py deleted file mode 100644 index 657705623..000000000 --- a/youtube_dl/extractor/trollvids.py +++ /dev/null @@ -1,36 +0,0 @@ -# encoding: utf-8 -from __future__ import unicode_literals - -import re - -from .nuevo import NuevoBaseIE - - -class TrollvidsIE(NuevoBaseIE): - _VALID_URL = r'https?://(?:www\.)?trollvids\.com/video/(?P\d+)/(?P[^/?#&]+)' - IE_NAME = 'trollvids' - _TEST = { - 'url': 'http://trollvids.com/video/2349002/%E3%80%90MMD-R-18%E3%80%91%E3%82%AC%E3%83%BC%E3%83%AB%E3%83%95%E3%83%AC%E3%83%B3%E3%83%89-carrymeoff', - 'md5': '1d53866b2c514b23ed69e4352fdc9839', - 'info_dict': { - 'id': '2349002', - 'ext': 'mp4', - 'title': '【MMD R-18】ガールフレンド carry_me_off', - 'age_limit': 18, - 'duration': 216.78, - }, - } - - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') - display_id = mobj.group('display_id') - - info = self._extract_nuevo( - 'http://trollvids.com/nuevo/player/config.php?v=%s' % video_id, - video_id) - info.update({ - 'display_id': display_id, - 'age_limit': 18 - }) - return info From bfcda07a2710738c32f63fdb4e09e177acc53df3 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sun, 11 Sep 2016 04:06:00 +0800 Subject: [PATCH 0108/1571] [abc:iview] Skip the test. They are removed soon --- youtube_dl/extractor/abc.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/youtube_dl/extractor/abc.py b/youtube_dl/extractor/abc.py index c7b6df7d0..3792bd232 100644 --- a/youtube_dl/extractor/abc.py +++ b/youtube_dl/extractor/abc.py @@ -100,6 +100,7 @@ class ABCIViewIE(InfoExtractor): IE_NAME = 'abc.net.au:iview' _VALID_URL = r'https?://iview\.abc\.net\.au/programs/[^/]+/(?P[^/?#]+)' + # ABC iview programs are normally available for 14 days only. _TESTS = [{ 'url': 'http://iview.abc.net.au/programs/gardening-australia/FA1505V024S00', 'md5': '979d10b2939101f0d27a06b79edad536', @@ -112,6 +113,7 @@ class ABCIViewIE(InfoExtractor): 'uploader_id': 'abc1', 'timestamp': 1471719600, }, + 'skip': 'Video gone', }] def _real_extract(self, url): From 2cb93afcd8a8a1f086a97ef3791fa033ddc1610a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 11 Sep 2016 14:59:14 +0700 Subject: [PATCH 0109/1571] [viafree] Improve video id extraction (Closes #10615) --- youtube_dl/extractor/tvplay.py | 36 +++++++++++++++++++++++++++++----- 1 file changed, 31 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/tvplay.py b/youtube_dl/extractor/tvplay.py index c0fec2594..5548ff2ac 100644 --- a/youtube_dl/extractor/tvplay.py +++ b/youtube_dl/extractor/tvplay.py @@ -16,6 +16,7 @@ from ..utils import ( parse_iso8601, qualities, try_get, + js_to_json, update_url_query, ) @@ -367,6 +368,10 @@ class ViafreeIE(InfoExtractor): 'skip_download': True, }, 'add_ie': [TVPlayIE.ie_key()], + }, { + # Different og:image URL schema + 'url': 'www.viafree.se/program/reality/sommaren-med-youtube-stjarnorna/sasong-1/avsnitt-2', + 'only_matching': True, }, { 'url': 'http://www.viafree.no/programmer/underholdning/det-beste-vorspielet/sesong-2/episode-1', 'only_matching': True, @@ -384,14 +389,35 @@ class ViafreeIE(InfoExtractor): webpage = self._download_webpage(url, video_id) + data = self._parse_json( + self._search_regex( + r'(?s)window\.App\s*=\s*({.+?})\s*;\s* Date: Sun, 11 Sep 2016 18:32:45 +0800 Subject: [PATCH 0110/1571] [foxnews] Support Fox News Articles (closes #10598) --- ChangeLog | 1 + youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/foxnews.py | 40 +++++++++++++++++++++++++++--- 3 files changed, 39 insertions(+), 3 deletions(-) diff --git a/ChangeLog b/ChangeLog index 387dc7bf6..a73a35e88 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,6 +1,7 @@ version Extractors ++ [foxnews] Support Fox News articles (#10598) * [iwara] Fix extraction after relaunch (#10462, #3215) * [newgrounds] Fix uploader extraction (#10584) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 2e795260e..e9027fb69 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -292,6 +292,7 @@ from .fourtube import FourTubeIE from .fox import FOXIE from .foxgay import FoxgayIE from .foxnews import ( + FoxNewsVideoIE, FoxNewsIE, FoxNewsInsiderIE, ) diff --git a/youtube_dl/extractor/foxnews.py b/youtube_dl/extractor/foxnews.py index 5c7acd795..3e9a6a08c 100644 --- a/youtube_dl/extractor/foxnews.py +++ b/youtube_dl/extractor/foxnews.py @@ -6,7 +6,8 @@ from .amp import AMPIE from .common import InfoExtractor -class FoxNewsIE(AMPIE): +class FoxNewsVideoIE(AMPIE): + IE_NAME = 'foxnews:video' IE_DESC = 'Fox News and Fox Business Video' _VALID_URL = r'https?://(?Pvideo\.(?:insider\.)?fox(?:news|business)\.com)/v/(?:video-embed\.html\?video_id=)?(?P\d+)' _TESTS = [ @@ -66,6 +67,35 @@ class FoxNewsIE(AMPIE): return info +class FoxNewsIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?foxnews\.com/(?!v)([^/]+/)+(?P[a-z-]+)' + IE_NAME = 'foxnews' + + _TEST = { + 'url': 'http://www.foxnews.com/politics/2016/09/08/buzz-about-bud-clinton-camp-denies-claims-wore-earpiece-at-forum.html', + 'md5': '62aa5a781b308fdee212ebb6f33ae7ef', + 'info_dict': { + 'id': '5116295019001', + 'ext': 'mp4', + 'title': 'Trump and Clinton asked to defend positions on Iraq War', + 'description': 'Veterans react on \'The Kelly File\'', + 'timestamp': 1473299755, + 'upload_date': '20160908', + }, + } + + def _real_extract(self, url): + display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) + + video_id = self._html_search_regex( + r'data-video-id=([\'"])(?P[^\'"]+)\1', + webpage, 'video ID', group='id') + return self.url_result( + 'http://video.foxnews.com/v/' + video_id, + FoxNewsVideoIE.ie_key()) + + class FoxNewsInsiderIE(InfoExtractor): _VALID_URL = r'https?://insider\.foxnews\.com/([^/]+/)+(?P[a-z-]+)' IE_NAME = 'foxnews:insider' @@ -83,7 +113,11 @@ class FoxNewsInsiderIE(InfoExtractor): 'upload_date': '20160825', 'thumbnail': 're:^https?://.*\.jpg$', }, - 'add_ie': [FoxNewsIE.ie_key()], + 'params': { + # m3u8 download + 'skip_download': True, + }, + 'add_ie': [FoxNewsVideoIE.ie_key()], } def _real_extract(self, url): @@ -98,7 +132,7 @@ class FoxNewsInsiderIE(InfoExtractor): return { '_type': 'url_transparent', - 'ie_key': FoxNewsIE.ie_key(), + 'ie_key': FoxNewsVideoIE.ie_key(), 'url': embed_url, 'display_id': display_id, 'title': title, From f01115c933bdf6a3d741bb2f306d26b4df943a40 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sun, 11 Sep 2016 18:36:59 +0800 Subject: [PATCH 0111/1571] [openload] Temporary fix (#10408) --- youtube_dl/extractor/openload.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/openload.py b/youtube_dl/extractor/openload.py index 03baf8e32..76316ca2f 100644 --- a/youtube_dl/extractor/openload.py +++ b/youtube_dl/extractor/openload.py @@ -60,7 +60,7 @@ class OpenloadIE(InfoExtractor): if j >= 33 and j <= 126: j = ((j + 14) % 94) + 33 if idx == len(enc_data) - 1: - j += 1 + j += 3 video_url_chars += compat_chr(j) video_url = 'https://openload.co/stream/%s?mime=true' % ''.join(video_url_chars) From fea74acad8e8ebc1fda1d24a10c085c6771a71be Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sun, 11 Sep 2016 18:53:05 +0800 Subject: [PATCH 0112/1571] [foxnews] Revert to old extractor names --- youtube_dl/extractor/extractors.py | 2 +- youtube_dl/extractor/foxnews.py | 14 +++++++------- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index e9027fb69..a3cd9c289 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -292,8 +292,8 @@ from .fourtube import FourTubeIE from .fox import FOXIE from .foxgay import FoxgayIE from .foxnews import ( - FoxNewsVideoIE, FoxNewsIE, + FoxNewsArticleIE, FoxNewsInsiderIE, ) from .foxsports import FoxSportsIE diff --git a/youtube_dl/extractor/foxnews.py b/youtube_dl/extractor/foxnews.py index 3e9a6a08c..229bcb175 100644 --- a/youtube_dl/extractor/foxnews.py +++ b/youtube_dl/extractor/foxnews.py @@ -6,8 +6,8 @@ from .amp import AMPIE from .common import InfoExtractor -class FoxNewsVideoIE(AMPIE): - IE_NAME = 'foxnews:video' +class FoxNewsIE(AMPIE): + IE_NAME = 'foxnews' IE_DESC = 'Fox News and Fox Business Video' _VALID_URL = r'https?://(?Pvideo\.(?:insider\.)?fox(?:news|business)\.com)/v/(?:video-embed\.html\?video_id=)?(?P\d+)' _TESTS = [ @@ -67,9 +67,9 @@ class FoxNewsVideoIE(AMPIE): return info -class FoxNewsIE(InfoExtractor): +class FoxNewsArticleIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?foxnews\.com/(?!v)([^/]+/)+(?P[a-z-]+)' - IE_NAME = 'foxnews' + IE_NAME = 'foxnews:article' _TEST = { 'url': 'http://www.foxnews.com/politics/2016/09/08/buzz-about-bud-clinton-camp-denies-claims-wore-earpiece-at-forum.html', @@ -93,7 +93,7 @@ class FoxNewsIE(InfoExtractor): webpage, 'video ID', group='id') return self.url_result( 'http://video.foxnews.com/v/' + video_id, - FoxNewsVideoIE.ie_key()) + FoxNewsIE.ie_key()) class FoxNewsInsiderIE(InfoExtractor): @@ -117,7 +117,7 @@ class FoxNewsInsiderIE(InfoExtractor): # m3u8 download 'skip_download': True, }, - 'add_ie': [FoxNewsVideoIE.ie_key()], + 'add_ie': [FoxNewsIE.ie_key()], } def _real_extract(self, url): @@ -132,7 +132,7 @@ class FoxNewsInsiderIE(InfoExtractor): return { '_type': 'url_transparent', - 'ie_key': FoxNewsVideoIE.ie_key(), + 'ie_key': FoxNewsIE.ie_key(), 'url': embed_url, 'display_id': display_id, 'title': title, From 6bb05b32a990b8fb961971fcb8110d292cf953e7 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sun, 11 Sep 2016 19:22:51 +0800 Subject: [PATCH 0113/1571] [pornhub] Extract categories and tags (closes #10499) --- ChangeLog | 1 + youtube_dl/extractor/pornhub.py | 16 ++++++++++++++++ 2 files changed, 17 insertions(+) diff --git a/ChangeLog b/ChangeLog index a73a35e88..5d6609987 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,6 +1,7 @@ version Extractors ++ [pornhub] Extract categories and tags (#10499) + [foxnews] Support Fox News articles (#10598) * [iwara] Fix extraction after relaunch (#10462, #3215) * [newgrounds] Fix uploader extraction (#10584) diff --git a/youtube_dl/extractor/pornhub.py b/youtube_dl/extractor/pornhub.py index 20976c101..0724efc09 100644 --- a/youtube_dl/extractor/pornhub.py +++ b/youtube_dl/extractor/pornhub.py @@ -15,6 +15,7 @@ from ..compat import ( from ..utils import ( ExtractorError, int_or_none, + js_to_json, orderedSet, sanitized_Request, str_to_int, @@ -48,6 +49,8 @@ class PornHubIE(InfoExtractor): 'dislike_count': int, 'comment_count': int, 'age_limit': 18, + 'tags': list, + 'categories': list, }, }, { # non-ASCII title @@ -63,6 +66,8 @@ class PornHubIE(InfoExtractor): 'dislike_count': int, 'comment_count': int, 'age_limit': 18, + 'tags': list, + 'categories': list, }, 'params': { 'skip_download': True, @@ -183,6 +188,15 @@ class PornHubIE(InfoExtractor): }) self._sort_formats(formats) + page_params = self._parse_json(self._search_regex( + r'page_params\.zoneDetails\[([\'"])[^\'"]+\1\]\s*=\s*(?P{[^}]+})', + webpage, 'page parameters', group='data', default='{}'), + video_id, transform_source=js_to_json, fatal=False) + tags = categories = None + if page_params: + tags = page_params.get('tags', '').split(',') + categories = page_params.get('categories', '').split(',') + return { 'id': video_id, 'uploader': video_uploader, @@ -195,6 +209,8 @@ class PornHubIE(InfoExtractor): 'comment_count': comment_count, 'formats': formats, 'age_limit': 18, + 'tags': tags, + 'categories': categories, } From 6599c72527ca8434589c010c48164494ab4c2469 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 11 Sep 2016 22:50:36 +0700 Subject: [PATCH 0114/1571] [tube8] Extract categories and tags (Closes #10579) --- youtube_dl/extractor/tube8.py | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/youtube_dl/extractor/tube8.py b/youtube_dl/extractor/tube8.py index 4053f6c21..e937b2396 100644 --- a/youtube_dl/extractor/tube8.py +++ b/youtube_dl/extractor/tube8.py @@ -1,5 +1,7 @@ from __future__ import unicode_literals +import re + from ..utils import ( int_or_none, str_to_int, @@ -21,7 +23,13 @@ class Tube8IE(KeezMoviesIE): 'title': 'Kasia music video', 'age_limit': 18, 'duration': 230, + 'categories': ['Teen'], + 'tags': ['dancing'], + }, + 'params': { + 'proxy': '127.0.0.1:8118', } + }, { 'url': 'http://www.tube8.com/shemale/teen/blonde-cd-gets-kidnapped-by-two-blacks-and-punished-for-being-a-slutty-girl/19569151/', 'only_matching': True, @@ -51,6 +59,17 @@ class Tube8IE(KeezMoviesIE): r'(\d+)', webpage, 'comment count', fatal=False)) + category = self._search_regex( + r'Category:\s*\s*]+href=[^>]+>([^<]+)', + webpage, 'category', fatal=False) + categories = [category] if category else None + + tags_str = self._search_regex( + r'(?s)Tags:\s*(.+?)]+href=[^>]+>([^<]+)', tags_str)] if tags_str else None + info.update({ 'description': description, 'uploader': uploader, @@ -58,6 +77,8 @@ class Tube8IE(KeezMoviesIE): 'like_count': like_count, 'dislike_count': dislike_count, 'comment_count': comment_count, + 'categories': categories, + 'tags': tags, }) return info From bc9186c8822db456dae93d053a34e60b7887405a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 11 Sep 2016 22:51:12 +0700 Subject: [PATCH 0115/1571] [tvplay] Remove unused import --- youtube_dl/extractor/tvplay.py | 1 - 1 file changed, 1 deletion(-) diff --git a/youtube_dl/extractor/tvplay.py b/youtube_dl/extractor/tvplay.py index 5548ff2ac..58ffc0e6f 100644 --- a/youtube_dl/extractor/tvplay.py +++ b/youtube_dl/extractor/tvplay.py @@ -16,7 +16,6 @@ from ..utils import ( parse_iso8601, qualities, try_get, - js_to_json, update_url_query, ) From 1c81476cbb167776e7b1454bf135fb7ebf62547f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 11 Sep 2016 23:20:09 +0700 Subject: [PATCH 0116/1571] release 2016.09.11 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- docs/supportedsites.md | 9 +++++++-- youtube_dl/version.py | 2 +- 4 files changed, 12 insertions(+), 7 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index a983bf432..d7195712b 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.09.08*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.09.08** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.09.11*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.09.11** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2016.09.08 +[debug] youtube-dl version 2016.09.11 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index 5d6609987..21d9f6275 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version +version 2016.09.11 Extractors + [pornhub] Extract categories and tags (#10499) diff --git a/docs/supportedsites.md b/docs/supportedsites.md index e6be746a8..7a7b268d3 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -247,7 +247,8 @@ - **Formula1** - **FOX** - **Foxgay** - - **FoxNews**: Fox News and Fox Business Video + - **foxnews**: Fox News and Fox Business Video + - **foxnews:article** - **foxnews:insider** - **FoxSports** - **france2.fr:generation-quoi** @@ -326,6 +327,7 @@ - **ivi**: ivi.ru - **ivi:compilation**: ivi.ru compilations - **ivideon**: Ivideon TV + - **Iwara** - **Izlesene** - **JeuxVideo** - **Jove** @@ -339,6 +341,7 @@ - **KarriereVideos** - **keek** - **KeezMovies** + - **Ketnet** - **KhanAcademy** - **KickStarter** - **KonserthusetPlay** @@ -540,6 +543,7 @@ - **podomatic** - **Pokemon** - **PolskieRadio** + - **PolskieRadioCategory** - **PornCom** - **PornHd** - **PornHub**: PornHub and Thumbzilla @@ -701,9 +705,11 @@ - **Telecinco**: telecinco.es, cuatro.com and mediaset.es - **Telegraaf** - **TeleMB** + - **TeleQuebec** - **TeleTask** - **Telewebion** - **TF1** + - **TFO** - **TheIntercept** - **ThePlatform** - **ThePlatformFeed** @@ -725,7 +731,6 @@ - **ToypicsUser**: Toypics user profile - **TrailerAddict** (Currently broken) - **Trilulilu** - - **trollvids** - **TruTV** - **Tube8** - **TubiTv** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 941ffb3f6..5f572391c 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2016.09.08' +__version__ = '2016.09.11' From eb87d4545a58be369723eddf5433b4198d64d367 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 11 Sep 2016 23:29:25 +0700 Subject: [PATCH 0117/1571] [devscripts/release.sh] Add ChangeLog reminder prompt --- devscripts/release.sh | 3 +++ 1 file changed, 3 insertions(+) diff --git a/devscripts/release.sh b/devscripts/release.sh index ca6ae1b49..981d37ca7 100755 --- a/devscripts/release.sh +++ b/devscripts/release.sh @@ -60,6 +60,9 @@ if ! type pandoc >/dev/null 2>/dev/null; then echo 'ERROR: pandoc is missing'; e if ! python3 -c 'import rsa' 2>/dev/null; then echo 'ERROR: python3-rsa is missing'; exit 1; fi if ! python3 -c 'import wheel' 2>/dev/null; then echo 'ERROR: wheel is missing'; exit 1; fi +read -p "Is ChangeLog up to date? (y/n) " -n 1 +if [[ ! $REPLY =~ ^[Yy]$ ]]; then exit 1; + /bin/echo -e "\n### First of all, testing..." make clean if $skip_tests ; then From d667ab7fad8d04a318b54e95d7a764e1667d80bd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 11 Sep 2016 23:30:18 +0700 Subject: [PATCH 0118/1571] [ChangeLog] Actualize --- ChangeLog | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/ChangeLog b/ChangeLog index 21d9f6275..9183f29e8 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,10 +1,22 @@ -version 2016.09.11 +version Extractors ++ [tube8] Extract categories and tags (#10579) + [pornhub] Extract categories and tags (#10499) -+ [foxnews] Support Fox News articles (#10598) +* [openload] Temporary fix (#10408) ++ [foxnews] Add support Fox News articles (#10598) +* [viafree] Improve video id extraction (#10615) * [iwara] Fix extraction after relaunch (#10462, #3215) ++ [tfo] Add extractor for tfo.org +* [lrt] Fix audio extraction (#10566) +* [9now] Fix extraction (#10561) ++ [canalplus] Add support for c8.fr (#10577) * [newgrounds] Fix uploader extraction (#10584) ++ [polskieradio:category] Add support for category lists (#10576) ++ [ketnet] Add extractor for ketnet.be (#10343) ++ [canvas] Add support for een.be (#10605) ++ [telequebec] Add extractor for telequebec.tv (#1999) +* [parliamentliveuk] Fix extraction (#9137) version 2016.09.08 From fc150cba1d6763ab115319c5726b5081b0f49106 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 11 Sep 2016 23:32:01 +0700 Subject: [PATCH 0119/1571] [devscripts/release.sh] Add missing fi --- devscripts/release.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/devscripts/release.sh b/devscripts/release.sh index 981d37ca7..1af61aa0b 100755 --- a/devscripts/release.sh +++ b/devscripts/release.sh @@ -61,7 +61,7 @@ if ! python3 -c 'import rsa' 2>/dev/null; then echo 'ERROR: python3-rsa is missi if ! python3 -c 'import wheel' 2>/dev/null; then echo 'ERROR: wheel is missing'; exit 1; fi read -p "Is ChangeLog up to date? (y/n) " -n 1 -if [[ ! $REPLY =~ ^[Yy]$ ]]; then exit 1; +if [[ ! $REPLY =~ ^[Yy]$ ]]; then exit 1; fi /bin/echo -e "\n### First of all, testing..." make clean From 0307d6fba6d3b793acac5785b2cee39e3dfbffcc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 11 Sep 2016 23:33:20 +0700 Subject: [PATCH 0120/1571] release 2016.09.11.1 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- youtube_dl/version.py | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index d7195712b..e87fed573 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.09.11*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.09.11** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.09.11.1*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.09.11.1** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2016.09.11 +[debug] youtube-dl version 2016.09.11.1 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index 9183f29e8..669544815 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version +version 2016.09.11.1 Extractors + [tube8] Extract categories and tags (#10579) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 5f572391c..903aede58 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2016.09.11' +__version__ = '2016.09.11.1' From ee7e672eb0eca7a916845b359511262935f9ef1b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 11 Sep 2016 23:44:22 +0700 Subject: [PATCH 0121/1571] [tube8] Remove proxy settings from test --- youtube_dl/extractor/tube8.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/youtube_dl/extractor/tube8.py b/youtube_dl/extractor/tube8.py index e937b2396..1853a1104 100644 --- a/youtube_dl/extractor/tube8.py +++ b/youtube_dl/extractor/tube8.py @@ -26,10 +26,6 @@ class Tube8IE(KeezMoviesIE): 'categories': ['Teen'], 'tags': ['dancing'], }, - 'params': { - 'proxy': '127.0.0.1:8118', - } - }, { 'url': 'http://www.tube8.com/shemale/teen/blonde-cd-gets-kidnapped-by-two-blacks-and-punished-for-being-a-slutty-girl/19569151/', 'only_matching': True, From be457302267b456412fb9848bcb8ce36874d8d7e Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Mon, 12 Sep 2016 02:55:15 +0800 Subject: [PATCH 0122/1571] [nbc] Add new extractor for NBC Olympics (#10295, #10361) --- ChangeLog | 6 +++++ youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/nbc.py | 40 ++++++++++++++++++++++++++++++ 3 files changed, 47 insertions(+) diff --git a/ChangeLog b/ChangeLog index 669544815..46eea0626 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +version + +Extractors ++ [nbc] Add support for NBC Olympics (#10361) + + version 2016.09.11.1 Extractors diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index a3cd9c289..522691de1 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -534,6 +534,7 @@ from .nbc import ( CSNNEIE, NBCIE, NBCNewsIE, + NBCOlympicsIE, NBCSportsIE, NBCSportsVPlayerIE, ) diff --git a/youtube_dl/extractor/nbc.py b/youtube_dl/extractor/nbc.py index f694e210b..f37bf2f30 100644 --- a/youtube_dl/extractor/nbc.py +++ b/youtube_dl/extractor/nbc.py @@ -335,3 +335,43 @@ class NBCNewsIE(ThePlatformIE): 'url': 'http://feed.theplatform.com/f/2E2eJC/nnd_NBCNews?byId=%s' % video_id, 'ie_key': 'ThePlatformFeed', } + + +class NBCOlympicsIE(InfoExtractor): + _VALID_URL = r'https?://www\.nbcolympics\.com/video/(?P[a-z-]+)' + + _TEST = { + # Geo-restricted to US + 'url': 'http://www.nbcolympics.com/video/justin-roses-son-leo-was-tears-after-his-dad-won-gold', + 'md5': '54fecf846d05429fbaa18af557ee523a', + 'info_dict': { + 'id': 'WjTBzDXx5AUq', + 'display_id': 'justin-roses-son-leo-was-tears-after-his-dad-won-gold', + 'ext': 'mp4', + 'title': 'Rose\'s son Leo was in tears after his dad won gold', + 'description': 'Olympic gold medalist Justin Rose gets emotional talking to the impact his win in men\'s golf has already had on his children.', + 'timestamp': 1471274964, + 'upload_date': '20160815', + 'uploader': 'NBCU-SPORTS', + }, + } + + def _real_extract(self, url): + display_id = self._match_id(url) + + webpage = self._download_webpage(url, display_id) + + drupal_settings = self._parse_json(self._search_regex( + r'jQuery\.extend\(Drupal\.settings\s*,\s*({.+?})\);', + webpage, 'drupal settings'), display_id) + + iframe_url = drupal_settings['vod']['iframe_url'] + theplatform_url = iframe_url.replace( + 'vplayer.nbcolympics.com', 'player.theplatform.com') + + return { + '_type': 'url_transparent', + 'url': theplatform_url, + 'ie_key': ThePlatformIE.ie_key(), + 'display_id': display_id, + } From 546edb2efabb18f9eb0eecb2f8719fcb777e99a7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 12 Sep 2016 21:01:31 +0700 Subject: [PATCH 0123/1571] [ISSUE_TEMPLATE_tmpl.md] Fix typo --- .github/ISSUE_TEMPLATE_tmpl.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/ISSUE_TEMPLATE_tmpl.md b/.github/ISSUE_TEMPLATE_tmpl.md index a5e6a4233..4112f53bb 100644 --- a/.github/ISSUE_TEMPLATE_tmpl.md +++ b/.github/ISSUE_TEMPLATE_tmpl.md @@ -55,4 +55,4 @@ $ youtube-dl -v ### Description of your *issue*, suggested solution and other information Explanation of your *issue* in arbitrary form goes here. Please make sure the [description is worded well enough to be understood](https://github.com/rg3/youtube-dl#is-the-description-of-the-issue-itself-sufficient). Provide as much context and examples as possible. -If work on your *issue* required an account credentials please provide them or explain how one can obtain them. +If work on your *issue* requires an account credentials please provide them or explain how one can obtain them. From d002e919863c910e52c623ee544e93fe41af4665 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 12 Sep 2016 21:48:45 +0700 Subject: [PATCH 0124/1571] [vimeo:ondemand] Pass Referer along with embed URL (#10624) --- youtube_dl/extractor/vimeo.py | 28 +++++++++++++++++++++++++--- 1 file changed, 25 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py index 7e854f326..50aacc6ac 100644 --- a/youtube_dl/extractor/vimeo.py +++ b/youtube_dl/extractor/vimeo.py @@ -350,6 +350,10 @@ class VimeoIE(VimeoBaseInfoExtractor): } ] + @staticmethod + def _smuggle_referrer(url, referrer_url): + return smuggle_url(url, {'http_headers': {'Referer': referrer_url}}) + @staticmethod def _extract_vimeo_url(url, webpage): # Look for embedded (iframe) Vimeo player @@ -357,8 +361,7 @@ class VimeoIE(VimeoBaseInfoExtractor): r']+?src=(["\'])(?P(?:https?:)?//player\.vimeo\.com/video/.+?)\1', webpage) if mobj: player_url = unescapeHTML(mobj.group('url')) - surl = smuggle_url(player_url, {'http_headers': {'Referer': url}}) - return surl + return VimeoIE._smuggle_referrer(player_url, url) # Look for embedded (swf embed) Vimeo player mobj = re.search( r']+?src="((?:https?:)?//(?:www\.)?vimeo\.com/moogaloop\.swf.+?)"', webpage) @@ -585,6 +588,20 @@ class VimeoOndemandIE(VimeoBaseInfoExtractor): 'uploader_url': 're:https?://(?:www\.)?vimeo\.com/gumfilms', 'uploader_id': 'gumfilms', }, + }, { + # requires Referer to be passed along with og:video:url + 'url': 'https://vimeo.com/ondemand/36938/126682985', + 'info_dict': { + 'id': '126682985', + 'ext': 'mp4', + 'title': 'Rävlock, rätt läte på rätt plats', + 'uploader': 'Lindroth & Norin', + 'uploader_url': 're:https?://(?:www\.)?vimeo\.com/user14430847', + 'uploader_id': 'user14430847', + }, + 'params': { + 'skip_download': True, + }, }, { 'url': 'https://vimeo.com/ondemand/nazmaalik', 'only_matching': True, @@ -599,7 +616,12 @@ class VimeoOndemandIE(VimeoBaseInfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - return self.url_result(self._og_search_video_url(webpage), VimeoIE.ie_key()) + return self.url_result( + # Some videos require Referer to be passed along with og:video:url + # similarly to generic vimeo embeds (e.g. + # https://vimeo.com/ondemand/36938/126682985). + VimeoIE._smuggle_referrer(self._og_search_video_url(webpage), url), + VimeoIE.ie_key()) class VimeoChannelIE(VimeoBaseInfoExtractor): From a5ff05df1af97613c979f85ab2f6f610f60be910 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 12 Sep 2016 21:49:31 +0700 Subject: [PATCH 0125/1571] [extractor/generic] Add vimeo embed that requires Referer passed --- youtube_dl/extractor/generic.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 24b217715..2e46ca179 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -1369,6 +1369,11 @@ class GenericIE(InfoExtractor): }, 'add_ie': ['Vimeo'], }, + { + # generic vimeo embed that requires original URL passed as Referer + 'url': 'http://racing4everyone.eu/2016/07/30/formula-1-2016-round12-germany/', + 'only_matching': True, + }, { 'url': 'https://support.arkena.com/display/PLAY/Ways+to+embed+your+video', 'md5': 'b96f2f71b359a8ecd05ce4e1daa72365', From e8bcd982ccee87e45a5cc8b116cc4452c81b0453 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 12 Sep 2016 22:33:00 +0700 Subject: [PATCH 0126/1571] [kaltura] Skip chun format --- youtube_dl/extractor/kaltura.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/youtube_dl/extractor/kaltura.py b/youtube_dl/extractor/kaltura.py index 6a8464998..22a06e4ae 100644 --- a/youtube_dl/extractor/kaltura.py +++ b/youtube_dl/extractor/kaltura.py @@ -262,6 +262,10 @@ class KalturaIE(InfoExtractor): # Continue if asset is not ready if f.get('status') != 2: continue + # Original format that's not available (e.g. kaltura:1926081:0_c03e1b5g) + # skip for now. + if f.get('fileExt') == 'chun': + continue video_url = sign_url( '%s/flavorId/%s' % (data_url, f['id'])) formats.append({ From 1d16035bb4ec516d25326ce5ff35affb4ff1f13c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 12 Sep 2016 22:43:45 +0700 Subject: [PATCH 0127/1571] [kaltura] Improve audio detection --- youtube_dl/extractor/kaltura.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/kaltura.py b/youtube_dl/extractor/kaltura.py index 22a06e4ae..5a8403777 100644 --- a/youtube_dl/extractor/kaltura.py +++ b/youtube_dl/extractor/kaltura.py @@ -268,6 +268,10 @@ class KalturaIE(InfoExtractor): continue video_url = sign_url( '%s/flavorId/%s' % (data_url, f['id'])) + # audio-only has no videoCodecId (e.g. kaltura:1926081:0_c03e1b5g + # -f mp4-56) + vcodec = 'none' if 'videoCodecId' not in f and f.get( + 'frameRate') == 0 else f.get('videoCodecId') formats.append({ 'format_id': '%(fileExt)s-%(bitrate)s' % f, 'ext': f.get('fileExt'), @@ -275,7 +279,7 @@ class KalturaIE(InfoExtractor): 'fps': int_or_none(f.get('frameRate')), 'filesize_approx': int_or_none(f.get('size'), invscale=1024), 'container': f.get('containerFormat'), - 'vcodec': f.get('videoCodecId'), + 'vcodec': vcodec, 'height': int_or_none(f.get('height')), 'width': int_or_none(f.get('width')), 'url': video_url, From a6ccc3e518eabf61cc41575e52361d5ea79e3796 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 12 Sep 2016 23:05:52 +0700 Subject: [PATCH 0128/1571] [safari] Improve ids regexes (#10617) --- youtube_dl/extractor/safari.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/safari.py b/youtube_dl/extractor/safari.py index 08ddbe3c4..eabe41efe 100644 --- a/youtube_dl/extractor/safari.py +++ b/youtube_dl/extractor/safari.py @@ -103,13 +103,13 @@ class SafariIE(SafariBaseIE): webpage = self._download_webpage(url, video_id) reference_id = self._search_regex( - r'data-reference-id=(["\'])(?P.+?)\1', + r'data-reference-id=(["\'])(?P(?:(?!\1).)+)\1', webpage, 'kaltura reference id', group='id') partner_id = self._search_regex( - r'data-partner-id=(["\'])(?P.+?)\1', + r'data-partner-id=(["\'])(?P(?:(?!\1).)+)\1', webpage, 'kaltura widget id', group='id') ui_id = self._search_regex( - r'data-ui-id=(["\'])(?P.+?)\1', + r'data-ui-id=(["\'])(?P(?:(?!\1).)+)\1', webpage, 'kaltura uiconf id', group='id') query = { From fcba157e8049350c5386cc3b850626320d9ff7eb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 12 Sep 2016 23:29:43 +0700 Subject: [PATCH 0129/1571] [ISSUE_TEMPLATE_tmpl.md] Fix typo --- .github/ISSUE_TEMPLATE_tmpl.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/ISSUE_TEMPLATE_tmpl.md b/.github/ISSUE_TEMPLATE_tmpl.md index 4112f53bb..ab9968129 100644 --- a/.github/ISSUE_TEMPLATE_tmpl.md +++ b/.github/ISSUE_TEMPLATE_tmpl.md @@ -55,4 +55,4 @@ $ youtube-dl -v ### Description of your *issue*, suggested solution and other information Explanation of your *issue* in arbitrary form goes here. Please make sure the [description is worded well enough to be understood](https://github.com/rg3/youtube-dl#is-the-description-of-the-issue-itself-sufficient). Provide as much context and examples as possible. -If work on your *issue* requires an account credentials please provide them or explain how one can obtain them. +If work on your *issue* requires account credentials please provide them or explain how one can obtain them. From 7a7309219cae70e14f58e904591a77360bfbc985 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Mon, 12 Sep 2016 23:39:11 +0100 Subject: [PATCH 0130/1571] [adobepass] add an option to specify mso_id and support for ROGERS TV Provider(closes #10606) --- youtube_dl/YoutubeDL.py | 1 + youtube_dl/__init__.py | 1 + youtube_dl/extractor/adobepass.py | 49 +++++++++++++++++++++++-------- youtube_dl/options.py | 4 +++ 4 files changed, 42 insertions(+), 13 deletions(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 805733fb7..f70d5f49a 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -131,6 +131,7 @@ class YoutubeDL(object): username: Username for authentication purposes. password: Password for authentication purposes. videopassword: Password for accessing a video. + ap_mso_id Adobe Pass Multiple-system operator Identifier. usenetrc: Use netrc for authentication instead. verbose: Print additional info to stdout. quiet: Do not print messages to stdout. diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 42128272a..2b1b841c9 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -293,6 +293,7 @@ def _real_main(argv=None): 'password': opts.password, 'twofactor': opts.twofactor, 'videopassword': opts.videopassword, + 'ap_mso_id': opts.ap_mso_id, 'quiet': (opts.quiet or any_getting or any_printing), 'no_warnings': opts.no_warnings, 'forceurl': opts.geturl, diff --git a/youtube_dl/extractor/adobepass.py b/youtube_dl/extractor/adobepass.py index 68ec37e00..454a6af8d 100644 --- a/youtube_dl/extractor/adobepass.py +++ b/youtube_dl/extractor/adobepass.py @@ -6,10 +6,12 @@ import time import xml.etree.ElementTree as etree from .common import InfoExtractor +from ..compat import compat_urlparse from ..utils import ( unescapeHTML, urlencode_postdata, unified_timestamp, + ExtractorError, ) @@ -41,6 +43,11 @@ class AdobePassIE(InfoExtractor): token_expires = unified_timestamp(re.sub(r'[_ ]GMT', '', xml_text(token, date_ele))) return token_expires and token_expires <= int(time.time()) + def raise_mvpd_required(): + raise ExtractorError('This video is only available for users of participating TV providers. ' + 'Use --ap-mso-id to specify Adobe Pass Multiple-system operator Identifier ' + 'and --netrc to provide account credentials.', expected=True) + mvpd_headers = { 'ap_42': 'anonymous', 'ap_11': 'Linux i686', @@ -55,19 +62,26 @@ class AdobePassIE(InfoExtractor): authn_token = None if not authn_token: # TODO add support for other TV Providers - mso_id = 'DTV' + mso_id = self._downloader.params.get('ap_mso_id') + if not mso_id: + raise_mvpd_required() username, password = self._get_netrc_login_info(mso_id) if not username or not password: - return '' + return raise_mvpd_required() - def post_form(form_page, note, data={}): + def post_form(form_page_res, note, data={}): + form_page, urlh = form_page_res post_url = self._html_search_regex(r']+action=(["\'])(?P.+?)\1', form_page, 'post url', group='url') - return self._download_webpage( - post_url, video_id, note, data=urlencode_postdata(data or self._hidden_inputs(form_page)), headers={ + if not re.match(r'https?://', post_url): + post_url = compat_urlparse.urljoin(urlh.geturl(), post_url) + form_data = self._hidden_inputs(form_page) + form_data.update(data) + return self._download_webpage_handle( + post_url, video_id, note, data=urlencode_postdata(form_data), headers={ 'Content-Type': 'application/x-www-form-urlencoded', }) - provider_redirect_page = self._download_webpage( + provider_redirect_page_res = self._download_webpage_handle( self._SERVICE_PROVIDER_TEMPLATE % 'authenticate/saml', video_id, 'Downloading Provider Redirect Page', query={ 'noflash': 'true', @@ -77,13 +91,22 @@ class AdobePassIE(InfoExtractor): 'domain_name': 'adobe.com', 'redirect_url': url, }) - provider_login_page = post_form( - provider_redirect_page, 'Downloading Provider Login Page') - mvpd_confirm_page = post_form(provider_login_page, 'Logging in', { - 'username': username, - 'password': password, - }) - post_form(mvpd_confirm_page, 'Confirming Login') + provider_login_page_res = post_form( + provider_redirect_page_res, 'Downloading Provider Login Page') + login_data = {} + if mso_id == 'DTV': + login_data = { + 'username': username, + 'password': password, + } + elif mso_id == 'Rogers': + login_data = { + 'UserName': username, + 'UserPassword': password, + } + mvpd_confirm_page_res = post_form(provider_login_page_res, 'Logging in', login_data) + if mso_id == 'DTV': + post_form(mvpd_confirm_page_res, 'Confirming Login') session = self._download_webpage( self._SERVICE_PROVIDER_TEMPLATE % 'session', video_id, diff --git a/youtube_dl/options.py b/youtube_dl/options.py index 56f312f57..c4057ce59 100644 --- a/youtube_dl/options.py +++ b/youtube_dl/options.py @@ -350,6 +350,10 @@ def parseOpts(overrideArguments=None): '--video-password', dest='videopassword', metavar='PASSWORD', help='Video password (vimeo, smotri, youku)') + authentication.add_option( + '--ap-mso-id', + dest='ap_mso_id', metavar='APMSOID', + help='Adobe Pass Multiple-system operator Identifier(DTV, Rogers)') video_format = optparse.OptionGroup(parser, 'Video Format Options') video_format.add_option( From 45396dd2ed3bc7ab9ac6f9b5a5f51179b629abb7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 13 Sep 2016 23:20:25 +0700 Subject: [PATCH 0131/1571] [nhk] Fix extraction (Closes #10633) --- youtube_dl/extractor/nhk.py | 31 ++++++++++++++++--------------- 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/youtube_dl/extractor/nhk.py b/youtube_dl/extractor/nhk.py index 691bdfa4e..5c8cd76dc 100644 --- a/youtube_dl/extractor/nhk.py +++ b/youtube_dl/extractor/nhk.py @@ -1,14 +1,15 @@ from __future__ import unicode_literals from .common import InfoExtractor +from ..utils import ExtractorError class NhkVodIE(InfoExtractor): - _VALID_URL = r'https?://www3\.nhk\.or\.jp/nhkworld/en/vod/(?P.+?)\.html' + _VALID_URL = r'https?://www3\.nhk\.or\.jp/nhkworld/en/vod/(?P[^/]+/[^/?#&]+)' _TEST = { # Videos available only for a limited period of time. Visit # http://www3.nhk.or.jp/nhkworld/en/vod/ for working samples. - 'url': 'http://www3.nhk.or.jp/nhkworld/en/vod/tokyofashion/20160815.html', + 'url': 'http://www3.nhk.or.jp/nhkworld/en/vod/tokyofashion/20160815', 'info_dict': { 'id': 'A1bnNiNTE6nY3jLllS-BIISfcC_PpvF5', 'ext': 'flv', @@ -19,25 +20,25 @@ class NhkVodIE(InfoExtractor): }, 'skip': 'Videos available only for a limited period of time', } + _API_URL = 'http://api.nhk.or.jp/nhkworld/vodesdlist/v1/all/all/all.json?apikey=EJfK8jdS57GqlupFgAfAAwr573q01y6k' def _real_extract(self, url): video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) + data = self._download_json(self._API_URL, video_id) - embed_code = self._search_regex( - r'nw_vod_ooplayer\([^,]+,\s*(["\'])(?P(?:(?!\1).)+)\1', - webpage, 'ooyala embed code', group='id') + try: + episode = next( + e for e in data['data']['episodes'] + if e.get('url') and video_id in e['url']) + except StopIteration: + raise ExtractorError('Unable to find episode') - title = self._search_regex( - r']+class=["\']episode-detail["\']>\s*([^<]+)', - webpage, 'title', default=None) - description = self._html_search_regex( - r'(?s)]+class=["\']description["\'][^>]*>(.+?)

', - webpage, 'description', default=None) - series = self._search_regex( - r']+class=["\']detail-top-player-title[^>]+>]+>([^<]+)', - webpage, 'series', default=None) + embed_code = episode['vod_id'] + + title = episode.get('sub_title_clean') or episode['sub_title'] + description = episode.get('description_clean') or episode.get('description') + series = episode.get('title_clean') or episode.get('title') return { '_type': 'url_transparent', From 8414c2da31a5ff3cc5ba84fdd537d714d04949f8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 13 Sep 2016 23:22:16 +0700 Subject: [PATCH 0132/1571] [adobepass] PEP 8 --- youtube_dl/extractor/adobepass.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/adobepass.py b/youtube_dl/extractor/adobepass.py index 454a6af8d..50a208085 100644 --- a/youtube_dl/extractor/adobepass.py +++ b/youtube_dl/extractor/adobepass.py @@ -44,7 +44,8 @@ class AdobePassIE(InfoExtractor): return token_expires and token_expires <= int(time.time()) def raise_mvpd_required(): - raise ExtractorError('This video is only available for users of participating TV providers. ' + raise ExtractorError( + 'This video is only available for users of participating TV providers. ' 'Use --ap-mso-id to specify Adobe Pass Multiple-system operator Identifier ' 'and --netrc to provide account credentials.', expected=True) From 1b6712ab2378b2e8eb59f372fb51193f8d3bdc97 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Tue, 13 Sep 2016 22:16:01 +0100 Subject: [PATCH 0133/1571] [adobepass] add specific options for adobe pass authentication - add --ap-username and --ap-password option to specify TV provider username and password in the cmd line - add --ap-retries option to limit the number of retries - add --list-ap-msi-ids to list the supported TV Providers --- youtube_dl/YoutubeDL.py | 4 +- youtube_dl/__init__.py | 15 +++ youtube_dl/extractor/adobepass.py | 206 ++++++++++++++++-------------- youtube_dl/extractor/common.py | 10 +- youtube_dl/options.py | 24 +++- 5 files changed, 155 insertions(+), 104 deletions(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index f70d5f49a..9c2c26280 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -131,7 +131,9 @@ class YoutubeDL(object): username: Username for authentication purposes. password: Password for authentication purposes. videopassword: Password for accessing a video. - ap_mso_id Adobe Pass Multiple-system operator Identifier. + ap_mso_id: Adobe Pass Multiple-system operator Identifier. + ap_username: TV Provider username for authentication purposes. + ap_password: TV Provider password for authentication purposes. usenetrc: Use netrc for authentication instead. verbose: Print additional info to stdout. quiet: Do not print messages to stdout. diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 2b1b841c9..052f20ee7 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -34,12 +34,14 @@ from .utils import ( setproctitle, std_headers, write_string, + render_table, ) from .update import update_self from .downloader import ( FileDownloader, ) from .extractor import gen_extractors, list_extractors +from .extractor.adobepass import MSO_INFO from .YoutubeDL import YoutubeDL @@ -118,18 +120,26 @@ def _real_main(argv=None): desc += ' (Example: "%s%s:%s" )' % (ie.SEARCH_KEY, random.choice(_COUNTS), random.choice(_SEARCHES)) write_string(desc + '\n', out=sys.stdout) sys.exit(0) + if opts.list_ap_mso_ids: + table = [[mso_id, mso_info['name']] for mso_id, mso_info in MSO_INFO.items()] + write_string('Supported TV Providers:\n' + render_table(['mso id', 'mso name'], table) + '\n', out=sys.stdout) + sys.exit(0) # Conflicting, missing and erroneous options if opts.usenetrc and (opts.username is not None or opts.password is not None): parser.error('using .netrc conflicts with giving username/password') if opts.password is not None and opts.username is None: parser.error('account username missing\n') + if opts.ap_password is not None and opts.ap_username is None: + parser.error('TV Provider account username missing\n') if opts.outtmpl is not None and (opts.usetitle or opts.autonumber or opts.useid): parser.error('using output template conflicts with using title, video ID or auto number') if opts.usetitle and opts.useid: parser.error('using title conflicts with using video ID') if opts.username is not None and opts.password is None: opts.password = compat_getpass('Type account password and press [Return]: ') + if opts.ap_username is not None and opts.ap_password is None: + opts.ap_password = compat_getpass('Type TV provider account password and press [Return]: ') if opts.ratelimit is not None: numeric_limit = FileDownloader.parse_bytes(opts.ratelimit) if numeric_limit is None: @@ -169,6 +179,8 @@ def _real_main(argv=None): opts.retries = parse_retries(opts.retries) if opts.fragment_retries is not None: opts.fragment_retries = parse_retries(opts.fragment_retries) + if opts.ap_retries is not None: + opts.ap_retries = parse_retries(opts.ap_retries) if opts.buffersize is not None: numeric_buffersize = FileDownloader.parse_bytes(opts.buffersize) if numeric_buffersize is None: @@ -294,6 +306,9 @@ def _real_main(argv=None): 'twofactor': opts.twofactor, 'videopassword': opts.videopassword, 'ap_mso_id': opts.ap_mso_id, + 'ap_username': opts.ap_username, + 'ap_password': opts.ap_password, + 'ap_retries': opts.ap_retries, 'quiet': (opts.quiet or any_getting or any_printing), 'no_warnings': opts.no_warnings, 'forceurl': opts.geturl, diff --git a/youtube_dl/extractor/adobepass.py b/youtube_dl/extractor/adobepass.py index 50a208085..9add6c0f8 100644 --- a/youtube_dl/extractor/adobepass.py +++ b/youtube_dl/extractor/adobepass.py @@ -15,6 +15,20 @@ from ..utils import ( ) +MSO_INFO = { + 'DTV': { + 'name': 'DirecTV', + 'username_field': 'username', + 'password_field': 'password', + }, + 'Rogers': { + 'name': 'Rogers Cable', + 'username_field': 'UserName', + 'password_field': 'UserPassword', + }, +} + + class AdobePassIE(InfoExtractor): _SERVICE_PROVIDER_TEMPLATE = 'https://sp.auth.adobe.com/adobe-services/%s' _USER_AGENT = 'Mozilla/5.0 (X11; Linux i686; rv:47.0) Gecko/20100101 Firefox/47.0' @@ -43,6 +57,18 @@ class AdobePassIE(InfoExtractor): token_expires = unified_timestamp(re.sub(r'[_ ]GMT', '', xml_text(token, date_ele))) return token_expires and token_expires <= int(time.time()) + def post_form(form_page_res, note, data={}): + form_page, urlh = form_page_res + post_url = self._html_search_regex(r']+action=(["\'])(?P.+?)\1', form_page, 'post url', group='url') + if not re.match(r'https?://', post_url): + post_url = compat_urlparse.urljoin(urlh.geturl(), post_url) + form_data = self._hidden_inputs(form_page) + form_data.update(data) + return self._download_webpage_handle( + post_url, video_id, note, data=urlencode_postdata(form_data), headers={ + 'Content-Type': 'application/x-www-form-urlencoded', + }) + def raise_mvpd_required(): raise ExtractorError( 'This video is only available for users of participating TV providers. ' @@ -57,105 +83,95 @@ class AdobePassIE(InfoExtractor): } guid = xml_text(resource, 'guid') - requestor_info = self._downloader.cache.load('mvpd', requestor_id) or {} - authn_token = requestor_info.get('authn_token') - if authn_token and is_expired(authn_token, 'simpleTokenExpires'): - authn_token = None - if not authn_token: - # TODO add support for other TV Providers - mso_id = self._downloader.params.get('ap_mso_id') - if not mso_id: - raise_mvpd_required() - username, password = self._get_netrc_login_info(mso_id) - if not username or not password: - return raise_mvpd_required() + retries = self._downloader.params.get('ap_retries', 3) + count = 0 + while count < retries: + requestor_info = self._downloader.cache.load('mvpd', requestor_id) or {} + authn_token = requestor_info.get('authn_token') + if authn_token and is_expired(authn_token, 'simpleTokenExpires'): + authn_token = None + if not authn_token: + # TODO add support for other TV Providers + mso_id = self._downloader.params.get('ap_mso_id') + if not mso_id: + raise_mvpd_required() + if mso_id not in MSO_INFO: + raise ExtractorError( + 'Unsupported TV Provider, use --list-ap-mso-ids to get a list of supported TV Providers' % mso_id, expected=True) + username, password = self._get_login_info('ap_username', 'ap_password', mso_id) + if not username or not password: + raise_mvpd_required() + mso_info = MSO_INFO[mso_id] - def post_form(form_page_res, note, data={}): - form_page, urlh = form_page_res - post_url = self._html_search_regex(r']+action=(["\'])(?P.+?)\1', form_page, 'post url', group='url') - if not re.match(r'https?://', post_url): - post_url = compat_urlparse.urljoin(urlh.geturl(), post_url) - form_data = self._hidden_inputs(form_page) - form_data.update(data) - return self._download_webpage_handle( - post_url, video_id, note, data=urlencode_postdata(form_data), headers={ - 'Content-Type': 'application/x-www-form-urlencoded', + provider_redirect_page_res = self._download_webpage_handle( + self._SERVICE_PROVIDER_TEMPLATE % 'authenticate/saml', video_id, + 'Downloading Provider Redirect Page', query={ + 'noflash': 'true', + 'mso_id': mso_id, + 'requestor_id': requestor_id, + 'no_iframe': 'false', + 'domain_name': 'adobe.com', + 'redirect_url': url, }) - - provider_redirect_page_res = self._download_webpage_handle( - self._SERVICE_PROVIDER_TEMPLATE % 'authenticate/saml', video_id, - 'Downloading Provider Redirect Page', query={ - 'noflash': 'true', - 'mso_id': mso_id, - 'requestor_id': requestor_id, - 'no_iframe': 'false', - 'domain_name': 'adobe.com', - 'redirect_url': url, + provider_login_page_res = post_form( + provider_redirect_page_res, 'Downloading Provider Login Page') + mvpd_confirm_page_res = post_form(provider_login_page_res, 'Logging in', { + mso_info['username_field']: username, + mso_info['password_field']: password, }) - provider_login_page_res = post_form( - provider_redirect_page_res, 'Downloading Provider Login Page') - login_data = {} - if mso_id == 'DTV': - login_data = { - 'username': username, - 'password': password, - } - elif mso_id == 'Rogers': - login_data = { - 'UserName': username, - 'UserPassword': password, - } - mvpd_confirm_page_res = post_form(provider_login_page_res, 'Logging in', login_data) - if mso_id == 'DTV': - post_form(mvpd_confirm_page_res, 'Confirming Login') + if mso_id == 'DTV': + post_form(mvpd_confirm_page_res, 'Confirming Login') - session = self._download_webpage( - self._SERVICE_PROVIDER_TEMPLATE % 'session', video_id, - 'Retrieving Session', data=urlencode_postdata({ - '_method': 'GET', + session = self._download_webpage( + self._SERVICE_PROVIDER_TEMPLATE % 'session', video_id, + 'Retrieving Session', data=urlencode_postdata({ + '_method': 'GET', + 'requestor_id': requestor_id, + }), headers=mvpd_headers) + if '' + ('|'.join(re.escape(po) for po in PRIVATE_OPTS)) + ')=.+$') def _scrub_eq(o): @@ -350,10 +350,28 @@ def parseOpts(overrideArguments=None): '--video-password', dest='videopassword', metavar='PASSWORD', help='Video password (vimeo, smotri, youku)') - authentication.add_option( + + adobe_pass = optparse.OptionGroup(parser, 'Adobe Pass Options') + adobe_pass.add_option( '--ap-mso-id', dest='ap_mso_id', metavar='APMSOID', - help='Adobe Pass Multiple-system operator Identifier(DTV, Rogers)') + help='Adobe Pass Multiple-system operator Identifier') + adobe_pass.add_option( + '--ap-username', + dest='ap_username', metavar='APUSERNAME', + help='TV Provider Login with this account ID') + adobe_pass.add_option( + '--ap-password', + dest='ap_password', metavar='APPASSWORD', + help='TV Provider Account password. If this option is left out, youtube-dl will ask interactively.') + adobe_pass.add_option( + '--list-ap-mso-ids', + action='store_true', dest='list_ap_mso_ids', default=False, + help='List all supported TV Providers') + adobe_pass.add_option( + '--ap-retries', + dest='ap_retries', metavar='APRETRIES', default=3, + help='Number of retries for Adobe Pass Authorization requests') video_format = optparse.OptionGroup(parser, 'Video Format Options') video_format.add_option( From 4875ff68476ff7de9733c80effb652fc6ab07ea0 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Wed, 14 Sep 2016 22:01:31 +0800 Subject: [PATCH 0134/1571] [bilibili] Remove copyrighted test cases MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit I can't find any English or Chinese material that claims BiliBili has bought legal redistribution permissions for copyrighted products from copyrighted holders. References for removed test cases: "刀语": https://en.wikipedia.org/wiki/Katanagatari, by White Fox "哆啦A梦": https://en.wikipedia.org/wiki/Doraemon, by Shin-Ei Animation "岳父岳母真难当": https://en.wikipedia.org/wiki/Serial_(Bad)_Weddings, by Les films du 24 "混沌武士": https://en.wikipedia.org/wiki/Samurai_Champloo, by Manglobe I shouldn't have added them to _TESTS --- youtube_dl/extractor/bilibili.py | 61 ++------------------------------ 1 file changed, 2 insertions(+), 59 deletions(-) diff --git a/youtube_dl/extractor/bilibili.py b/youtube_dl/extractor/bilibili.py index 8fa96d3a0..9f5c12ab9 100644 --- a/youtube_dl/extractor/bilibili.py +++ b/youtube_dl/extractor/bilibili.py @@ -17,7 +17,7 @@ from ..utils import ( class BiliBiliIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.|bangumi\.|)bilibili\.(?:tv|com)/(?:video/av|anime/v/)(?P\d+)' - _TESTS = [{ + _TEST = { 'url': 'http://www.bilibili.tv/video/av1074402/', 'md5': '9fa226fe2b8a9a4d5a69b4c6a183417e', 'info_dict': { @@ -32,64 +32,7 @@ class BiliBiliIE(InfoExtractor): 'uploader': '菊子桑', 'uploader_id': '156160', }, - }, { - 'url': 'http://www.bilibili.com/video/av1041170/', - 'info_dict': { - 'id': '1041170', - 'ext': 'mp4', - 'title': '【BD1080P】刀语【诸神&异域】', - 'description': '这是个神奇的故事~每个人不留弹幕不给走哦~切利哦!~', - 'duration': 3382.259, - 'timestamp': 1396530060, - 'upload_date': '20140403', - 'thumbnail': 're:^https?://.+\.jpg', - 'uploader': '枫叶逝去', - 'uploader_id': '520116', - }, - }, { - 'url': 'http://www.bilibili.com/video/av4808130/', - 'info_dict': { - 'id': '4808130', - 'ext': 'mp4', - 'title': '【长篇】哆啦A梦443【钉铛】', - 'description': '(2016.05.27)来组合客人的脸吧&amp;寻母六千里锭 抱歉,又轮到周日上班现在才到家 封面www.pixiv.net/member_illust.php?mode=medium&amp;illust_id=56912929', - 'duration': 1493.995, - 'timestamp': 1464564180, - 'upload_date': '20160529', - 'thumbnail': 're:^https?://.+\.jpg', - 'uploader': '喜欢拉面', - 'uploader_id': '151066', - }, - }, { - # Missing upload time - 'url': 'http://www.bilibili.com/video/av1867637/', - 'info_dict': { - 'id': '1867637', - 'ext': 'mp4', - 'title': '【HDTV】【喜剧】岳父岳母真难当 (2014)【法国票房冠军】', - 'description': '一个信奉天主教的法国旧式传统资产阶级家庭中有四个女儿。三个女儿却分别找了阿拉伯、犹太、中国丈夫,老夫老妻唯独期盼剩下未嫁的小女儿能找一个信奉天主教的法国白人,结果没想到小女儿找了一位非裔黑人……【这次应该不会跳帧了】', - 'duration': 5760.0, - 'uploader': '黑夜为猫', - 'uploader_id': '610729', - 'thumbnail': 're:^https?://.+\.jpg', - }, - 'params': { - # Just to test metadata extraction - 'skip_download': True, - }, - 'expected_warnings': ['upload time'], - }, { - 'url': 'http://bangumi.bilibili.com/anime/v/40068', - 'md5': '08d539a0884f3deb7b698fb13ba69696', - 'info_dict': { - 'id': '40068', - 'ext': 'mp4', - 'duration': 1402.357, - 'title': '混沌武士 : 第7集 四面楚歌 A Risky Racket', - 'description': 'md5:6a9622b911565794c11f25f81d6a97d2', - 'thumbnail': 're:^http?://.+\.jpg', - }, - }] + } _APP_KEY = '6f90a59ac58a4123' _BILIBILI_KEY = '0bfd84cc3940035173f35e6777508326' From 86d68f906e21a6674f9f8676b22a47414b6c9fd2 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Wed, 14 Sep 2016 22:11:49 +0800 Subject: [PATCH 0135/1571] [bilibili] Fix extraction for videos without backup_url (#10647) --- ChangeLog | 1 + youtube_dl/extractor/bilibili.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/ChangeLog b/ChangeLog index 46eea0626..25c916eb2 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,6 +1,7 @@ version Extractors +* [bilibili] Fix extraction for specific videos (#10647) + [nbc] Add support for NBC Olympics (#10361) diff --git a/youtube_dl/extractor/bilibili.py b/youtube_dl/extractor/bilibili.py index 9f5c12ab9..2d174e6f9 100644 --- a/youtube_dl/extractor/bilibili.py +++ b/youtube_dl/extractor/bilibili.py @@ -67,7 +67,7 @@ class BiliBiliIE(InfoExtractor): 'url': durl['url'], 'filesize': int_or_none(durl['size']), }] - for backup_url in durl['backup_url']: + for backup_url in durl.get('backup_url', []): formats.append({ 'url': backup_url, # backup URLs have lower priorities From 5712c0f42639cd183b0dfbc51482592e790e99d1 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Wed, 14 Sep 2016 16:36:42 +0100 Subject: [PATCH 0136/1571] [adobepass] remove unnecessary option --- youtube_dl/__init__.py | 3 --- youtube_dl/extractor/adobepass.py | 3 +-- youtube_dl/options.py | 4 ---- 3 files changed, 1 insertion(+), 9 deletions(-) diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 052f20ee7..cdff3df65 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -179,8 +179,6 @@ def _real_main(argv=None): opts.retries = parse_retries(opts.retries) if opts.fragment_retries is not None: opts.fragment_retries = parse_retries(opts.fragment_retries) - if opts.ap_retries is not None: - opts.ap_retries = parse_retries(opts.ap_retries) if opts.buffersize is not None: numeric_buffersize = FileDownloader.parse_bytes(opts.buffersize) if numeric_buffersize is None: @@ -308,7 +306,6 @@ def _real_main(argv=None): 'ap_mso_id': opts.ap_mso_id, 'ap_username': opts.ap_username, 'ap_password': opts.ap_password, - 'ap_retries': opts.ap_retries, 'quiet': (opts.quiet or any_getting or any_printing), 'no_warnings': opts.no_warnings, 'forceurl': opts.geturl, diff --git a/youtube_dl/extractor/adobepass.py b/youtube_dl/extractor/adobepass.py index 9add6c0f8..913a817d2 100644 --- a/youtube_dl/extractor/adobepass.py +++ b/youtube_dl/extractor/adobepass.py @@ -83,9 +83,8 @@ class AdobePassIE(InfoExtractor): } guid = xml_text(resource, 'guid') - retries = self._downloader.params.get('ap_retries', 3) count = 0 - while count < retries: + while count < 2: requestor_info = self._downloader.cache.load('mvpd', requestor_id) or {} authn_token = requestor_info.get('authn_token') if authn_token and is_expired(authn_token, 'simpleTokenExpires'): diff --git a/youtube_dl/options.py b/youtube_dl/options.py index b99201a20..342ae3be3 100644 --- a/youtube_dl/options.py +++ b/youtube_dl/options.py @@ -368,10 +368,6 @@ def parseOpts(overrideArguments=None): '--list-ap-mso-ids', action='store_true', dest='list_ap_mso_ids', default=False, help='List all supported TV Providers') - adobe_pass.add_option( - '--ap-retries', - dest='ap_retries', metavar='APRETRIES', default=3, - help='Number of retries for Adobe Pass Authorization requests') video_format = optparse.OptionGroup(parser, 'Video Format Options') video_format.add_option( From b690ea15ebe7549854962f02987a8faaa6d41f53 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 14 Sep 2016 22:45:23 +0700 Subject: [PATCH 0137/1571] [viafree] Fix test --- youtube_dl/extractor/tvplay.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/tvplay.py b/youtube_dl/extractor/tvplay.py index 58ffc0e6f..3eda0a399 100644 --- a/youtube_dl/extractor/tvplay.py +++ b/youtube_dl/extractor/tvplay.py @@ -369,7 +369,7 @@ class ViafreeIE(InfoExtractor): 'add_ie': [TVPlayIE.ie_key()], }, { # Different og:image URL schema - 'url': 'www.viafree.se/program/reality/sommaren-med-youtube-stjarnorna/sasong-1/avsnitt-2', + 'url': 'http://www.viafree.se/program/reality/sommaren-med-youtube-stjarnorna/sasong-1/avsnitt-2', 'only_matching': True, }, { 'url': 'http://www.viafree.no/programmer/underholdning/det-beste-vorspielet/sesong-2/episode-1', From 925194022cd661747771e58bad41e5f7ae118999 Mon Sep 17 00:00:00 2001 From: stepshal Date: Thu, 8 Sep 2016 18:29:05 +0700 Subject: [PATCH 0138/1571] Improve some _VALID_URLs --- youtube_dl/extractor/abc.py | 2 +- youtube_dl/extractor/aljazeera.py | 2 +- youtube_dl/extractor/azubu.py | 2 +- youtube_dl/extractor/bbc.py | 2 +- youtube_dl/extractor/bpb.py | 2 +- youtube_dl/extractor/camdemy.py | 2 +- youtube_dl/extractor/cbssports.py | 2 +- youtube_dl/extractor/ceskatelevize.py | 2 +- youtube_dl/extractor/chirbit.py | 2 +- youtube_dl/extractor/cmt.py | 2 +- youtube_dl/extractor/criterion.py | 2 +- youtube_dl/extractor/dctp.py | 2 +- youtube_dl/extractor/democracynow.py | 2 +- youtube_dl/extractor/engadget.py | 2 +- youtube_dl/extractor/expotv.py | 2 +- youtube_dl/extractor/freespeech.py | 2 +- youtube_dl/extractor/gamestar.py | 2 +- youtube_dl/extractor/googleplus.py | 2 +- youtube_dl/extractor/goshgay.py | 2 +- youtube_dl/extractor/hark.py | 2 +- youtube_dl/extractor/hotnewhiphop.py | 2 +- youtube_dl/extractor/imdb.py | 2 +- youtube_dl/extractor/karaoketv.py | 2 +- youtube_dl/extractor/kickstarter.py | 2 +- youtube_dl/extractor/kuwo.py | 8 ++++---- youtube_dl/extractor/litv.py | 2 +- youtube_dl/extractor/lynda.py | 2 +- youtube_dl/extractor/macgamestore.py | 2 +- youtube_dl/extractor/metacritic.py | 2 +- youtube_dl/extractor/mgtv.py | 2 +- youtube_dl/extractor/ministrygrid.py | 2 +- youtube_dl/extractor/mitele.py | 2 +- youtube_dl/extractor/motorsport.py | 2 +- youtube_dl/extractor/moviezine.py | 2 +- youtube_dl/extractor/myspass.py | 2 +- youtube_dl/extractor/nbc.py | 6 +++--- youtube_dl/extractor/ndr.py | 8 ++++---- youtube_dl/extractor/nextmedia.py | 6 +++--- youtube_dl/extractor/niconico.py | 2 +- youtube_dl/extractor/oktoberfesttv.py | 2 +- youtube_dl/extractor/openload.py | 2 +- youtube_dl/extractor/periscope.py | 2 +- youtube_dl/extractor/playvid.py | 2 +- youtube_dl/extractor/qqmusic.py | 6 +++--- youtube_dl/extractor/rottentomatoes.py | 2 +- youtube_dl/extractor/roxwel.py | 2 +- youtube_dl/extractor/rtve.py | 6 +++--- youtube_dl/extractor/screenjunkies.py | 2 +- youtube_dl/extractor/senateisvp.py | 2 +- youtube_dl/extractor/slideshare.py | 2 +- youtube_dl/extractor/spiegel.py | 2 +- youtube_dl/extractor/syfy.py | 2 +- youtube_dl/extractor/teachingchannel.py | 2 +- youtube_dl/extractor/telecinco.py | 2 +- youtube_dl/extractor/telewebion.py | 2 +- youtube_dl/extractor/theintercept.py | 2 +- youtube_dl/extractor/thescene.py | 2 +- youtube_dl/extractor/tlc.py | 2 +- youtube_dl/extractor/udemy.py | 2 +- youtube_dl/extractor/ustream.py | 4 ++-- youtube_dl/extractor/vevo.py | 4 ++-- youtube_dl/extractor/videodetective.py | 2 +- youtube_dl/extractor/weiqitv.py | 2 +- youtube_dl/extractor/yam.py | 2 +- youtube_dl/extractor/youtube.py | 12 ++++++------ 65 files changed, 86 insertions(+), 86 deletions(-) diff --git a/youtube_dl/extractor/abc.py b/youtube_dl/extractor/abc.py index 3792bd232..465249bbf 100644 --- a/youtube_dl/extractor/abc.py +++ b/youtube_dl/extractor/abc.py @@ -13,7 +13,7 @@ from ..utils import ( class ABCIE(InfoExtractor): IE_NAME = 'abc.net.au' - _VALID_URL = r'https?://www\.abc\.net\.au/news/(?:[^/]+/){1,2}(?P\d+)' + _VALID_URL = r'https?://(?:www\.)?abc\.net\.au/news/(?:[^/]+/){1,2}(?P\d+)' _TESTS = [{ 'url': 'http://www.abc.net.au/news/2014-11-05/australia-to-staff-ebola-treatment-centre-in-sierra-leone/5868334', diff --git a/youtube_dl/extractor/aljazeera.py b/youtube_dl/extractor/aljazeera.py index b081695d8..388e578d5 100644 --- a/youtube_dl/extractor/aljazeera.py +++ b/youtube_dl/extractor/aljazeera.py @@ -4,7 +4,7 @@ from .common import InfoExtractor class AlJazeeraIE(InfoExtractor): - _VALID_URL = r'https?://www\.aljazeera\.com/programmes/.*?/(?P[^/]+)\.html' + _VALID_URL = r'https?://(?:www\.)?aljazeera\.com/programmes/.*?/(?P[^/]+)\.html' _TEST = { 'url': 'http://www.aljazeera.com/programmes/the-slum/2014/08/deliverance-201482883754237240.html', diff --git a/youtube_dl/extractor/azubu.py b/youtube_dl/extractor/azubu.py index a813eb429..72e1bd59d 100644 --- a/youtube_dl/extractor/azubu.py +++ b/youtube_dl/extractor/azubu.py @@ -103,7 +103,7 @@ class AzubuIE(InfoExtractor): class AzubuLiveIE(InfoExtractor): - _VALID_URL = r'https?://www.azubu.tv/(?P[^/]+)$' + _VALID_URL = r'https?://(?:www\.)?azubu\.tv/(?P[^/]+)$' _TEST = { 'url': 'http://www.azubu.tv/MarsTVMDLen', diff --git a/youtube_dl/extractor/bbc.py b/youtube_dl/extractor/bbc.py index deb9cc1c0..b17916137 100644 --- a/youtube_dl/extractor/bbc.py +++ b/youtube_dl/extractor/bbc.py @@ -1028,7 +1028,7 @@ class BBCIE(BBCCoUkIE): class BBCCoUkArticleIE(InfoExtractor): - _VALID_URL = r'https?://www.bbc.co.uk/programmes/articles/(?P[a-zA-Z0-9]+)' + _VALID_URL = r'https?://(?:www\.)?bbc\.co\.uk/programmes/articles/(?P[a-zA-Z0-9]+)' IE_NAME = 'bbc.co.uk:article' IE_DESC = 'BBC articles' diff --git a/youtube_dl/extractor/bpb.py b/youtube_dl/extractor/bpb.py index 6ad45a1e6..9661ade4f 100644 --- a/youtube_dl/extractor/bpb.py +++ b/youtube_dl/extractor/bpb.py @@ -12,7 +12,7 @@ from ..utils import ( class BpbIE(InfoExtractor): IE_DESC = 'Bundeszentrale für politische Bildung' - _VALID_URL = r'https?://www\.bpb\.de/mediathek/(?P[0-9]+)/' + _VALID_URL = r'https?://(?:www\.)?bpb\.de/mediathek/(?P[0-9]+)/' _TEST = { 'url': 'http://www.bpb.de/mediathek/297/joachim-gauck-zu-1989-und-die-erinnerung-an-die-ddr', diff --git a/youtube_dl/extractor/camdemy.py b/youtube_dl/extractor/camdemy.py index 268c34392..d4e6fbdce 100644 --- a/youtube_dl/extractor/camdemy.py +++ b/youtube_dl/extractor/camdemy.py @@ -112,7 +112,7 @@ class CamdemyIE(InfoExtractor): class CamdemyFolderIE(InfoExtractor): - _VALID_URL = r'https?://www.camdemy.com/folder/(?P\d+)' + _VALID_URL = r'https?://(?:www\.)?camdemy\.com/folder/(?P\d+)' _TESTS = [{ # links with trailing slash 'url': 'http://www.camdemy.com/folder/450', diff --git a/youtube_dl/extractor/cbssports.py b/youtube_dl/extractor/cbssports.py index bf7915626..3a62c840b 100644 --- a/youtube_dl/extractor/cbssports.py +++ b/youtube_dl/extractor/cbssports.py @@ -4,7 +4,7 @@ from .cbs import CBSBaseIE class CBSSportsIE(CBSBaseIE): - _VALID_URL = r'https?://www\.cbssports\.com/video/player/[^/]+/(?P\d+)' + _VALID_URL = r'https?://(?:www\.)?cbssports\.com/video/player/[^/]+/(?P\d+)' _TESTS = [{ 'url': 'http://www.cbssports.com/video/player/videos/708337219968/0/ben-simmons-the-next-lebron?-not-so-fast', diff --git a/youtube_dl/extractor/ceskatelevize.py b/youtube_dl/extractor/ceskatelevize.py index 5a58d1777..87c2e7089 100644 --- a/youtube_dl/extractor/ceskatelevize.py +++ b/youtube_dl/extractor/ceskatelevize.py @@ -17,7 +17,7 @@ from ..utils import ( class CeskaTelevizeIE(InfoExtractor): - _VALID_URL = r'https?://www\.ceskatelevize\.cz/(porady|ivysilani)/(?:[^/]+/)*(?P[^/#?]+)/*(?:[#?].*)?$' + _VALID_URL = r'https?://(?:www\.)?ceskatelevize\.cz/(porady|ivysilani)/(?:[^/]+/)*(?P[^/#?]+)/*(?:[#?].*)?$' _TESTS = [{ 'url': 'http://www.ceskatelevize.cz/ivysilani/ivysilani/10441294653-hyde-park-civilizace/214411058091220', 'info_dict': { diff --git a/youtube_dl/extractor/chirbit.py b/youtube_dl/extractor/chirbit.py index b43518652..61aed0167 100644 --- a/youtube_dl/extractor/chirbit.py +++ b/youtube_dl/extractor/chirbit.py @@ -65,7 +65,7 @@ class ChirbitIE(InfoExtractor): class ChirbitProfileIE(InfoExtractor): IE_NAME = 'chirbit:profile' - _VALID_URL = r'https?://(?:www\.)?chirbit.com/(?:rss/)?(?P[^/]+)' + _VALID_URL = r'https?://(?:www\.)?chirbit\.com/(?:rss/)?(?P[^/]+)' _TEST = { 'url': 'http://chirbit.com/ScarletBeauty', 'info_dict': { diff --git a/youtube_dl/extractor/cmt.py b/youtube_dl/extractor/cmt.py index f24568dcc..ac3bdfe8f 100644 --- a/youtube_dl/extractor/cmt.py +++ b/youtube_dl/extractor/cmt.py @@ -6,7 +6,7 @@ from ..utils import ExtractorError class CMTIE(MTVIE): IE_NAME = 'cmt.com' - _VALID_URL = r'https?://www\.cmt\.com/(?:videos|shows)/(?:[^/]+/)*(?P\d+)' + _VALID_URL = r'https?://(?:www\.)?cmt\.com/(?:videos|shows)/(?:[^/]+/)*(?P\d+)' _FEED_URL = 'http://www.cmt.com/sitewide/apps/player/embed/rss/' _TESTS = [{ diff --git a/youtube_dl/extractor/criterion.py b/youtube_dl/extractor/criterion.py index dedb810a0..ad32673a8 100644 --- a/youtube_dl/extractor/criterion.py +++ b/youtube_dl/extractor/criterion.py @@ -7,7 +7,7 @@ from .common import InfoExtractor class CriterionIE(InfoExtractor): - _VALID_URL = r'https?://www\.criterion\.com/films/(?P[0-9]+)-.+' + _VALID_URL = r'https?://(?:www\.)?criterion\.com/films/(?P[0-9]+)-.+' _TEST = { 'url': 'http://www.criterion.com/films/184-le-samourai', 'md5': 'bc51beba55685509883a9a7830919ec3', diff --git a/youtube_dl/extractor/dctp.py b/youtube_dl/extractor/dctp.py index 9099f5046..a47e04993 100644 --- a/youtube_dl/extractor/dctp.py +++ b/youtube_dl/extractor/dctp.py @@ -6,7 +6,7 @@ from ..compat import compat_str class DctpTvIE(InfoExtractor): - _VALID_URL = r'https?://www.dctp.tv/(#/)?filme/(?P.+?)/$' + _VALID_URL = r'https?://(?:www\.)?dctp\.tv/(#/)?filme/(?P.+?)/$' _TEST = { 'url': 'http://www.dctp.tv/filme/videoinstallation-fuer-eine-kaufhausfassade/', 'info_dict': { diff --git a/youtube_dl/extractor/democracynow.py b/youtube_dl/extractor/democracynow.py index 65a98d789..bdfe638b4 100644 --- a/youtube_dl/extractor/democracynow.py +++ b/youtube_dl/extractor/democracynow.py @@ -13,7 +13,7 @@ from ..utils import ( class DemocracynowIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?democracynow.org/(?P[^\?]*)' + _VALID_URL = r'https?://(?:www\.)?democracynow\.org/(?P[^\?]*)' IE_NAME = 'democracynow' _TESTS = [{ 'url': 'http://www.democracynow.org/shows/2015/7/3', diff --git a/youtube_dl/extractor/engadget.py b/youtube_dl/extractor/engadget.py index a39e9010d..65635c18b 100644 --- a/youtube_dl/extractor/engadget.py +++ b/youtube_dl/extractor/engadget.py @@ -4,7 +4,7 @@ from .common import InfoExtractor class EngadgetIE(InfoExtractor): - _VALID_URL = r'https?://www.engadget.com/video/(?P[^/?#]+)' + _VALID_URL = r'https?://(?:www\.)?engadget\.com/video/(?P[^/?#]+)' _TESTS = [{ # video with 5min ID diff --git a/youtube_dl/extractor/expotv.py b/youtube_dl/extractor/expotv.py index 971c918a4..ef11962f3 100644 --- a/youtube_dl/extractor/expotv.py +++ b/youtube_dl/extractor/expotv.py @@ -8,7 +8,7 @@ from ..utils import ( class ExpoTVIE(InfoExtractor): - _VALID_URL = r'https?://www\.expotv\.com/videos/[^?#]*/(?P[0-9]+)($|[?#])' + _VALID_URL = r'https?://(?:www\.)?expotv\.com/videos/[^?#]*/(?P[0-9]+)($|[?#])' _TEST = { 'url': 'http://www.expotv.com/videos/reviews/3/40/NYX-Butter-lipstick/667916', 'md5': 'fe1d728c3a813ff78f595bc8b7a707a8', diff --git a/youtube_dl/extractor/freespeech.py b/youtube_dl/extractor/freespeech.py index 1477708bb..0a70ca763 100644 --- a/youtube_dl/extractor/freespeech.py +++ b/youtube_dl/extractor/freespeech.py @@ -8,7 +8,7 @@ from .common import InfoExtractor class FreespeechIE(InfoExtractor): IE_NAME = 'freespeech.org' - _VALID_URL = r'https://www\.freespeech\.org/video/(?P.+)' + _VALID_URL = r'https?://(?:www\.)?freespeech\.org/video/(?P<title>.+)' _TEST = { 'add_ie': ['Youtube'], 'url': 'https://www.freespeech.org/video/obama-romney-campaign-colorado-ahead-debate-0', diff --git a/youtube_dl/extractor/gamestar.py b/youtube_dl/extractor/gamestar.py index 341e72733..55a34604a 100644 --- a/youtube_dl/extractor/gamestar.py +++ b/youtube_dl/extractor/gamestar.py @@ -9,7 +9,7 @@ from ..utils import ( class GameStarIE(InfoExtractor): - _VALID_URL = r'https?://www\.gamestar\.de/videos/.*,(?P<id>[0-9]+)\.html' + _VALID_URL = r'https?://(?:www\.)?gamestar\.de/videos/.*,(?P<id>[0-9]+)\.html' _TEST = { 'url': 'http://www.gamestar.de/videos/trailer,3/hobbit-3-die-schlacht-der-fuenf-heere,76110.html', 'md5': '96974ecbb7fd8d0d20fca5a00810cea7', diff --git a/youtube_dl/extractor/googleplus.py b/youtube_dl/extractor/googleplus.py index 731bacd67..427499b11 100644 --- a/youtube_dl/extractor/googleplus.py +++ b/youtube_dl/extractor/googleplus.py @@ -10,7 +10,7 @@ from ..utils import unified_strdate class GooglePlusIE(InfoExtractor): IE_DESC = 'Google Plus' - _VALID_URL = r'https://plus\.google\.com/(?:[^/]+/)*?posts/(?P<id>\w+)' + _VALID_URL = r'https?://plus\.google\.com/(?:[^/]+/)*?posts/(?P<id>\w+)' IE_NAME = 'plus.google' _TEST = { 'url': 'https://plus.google.com/u/0/108897254135232129896/posts/ZButuJc6CtH', diff --git a/youtube_dl/extractor/goshgay.py b/youtube_dl/extractor/goshgay.py index 0c015141f..a43abd154 100644 --- a/youtube_dl/extractor/goshgay.py +++ b/youtube_dl/extractor/goshgay.py @@ -11,7 +11,7 @@ from ..utils import ( class GoshgayIE(InfoExtractor): - _VALID_URL = r'https?://www\.goshgay\.com/video(?P<id>\d+?)($|/)' + _VALID_URL = r'https?://(?:www\.)?goshgay\.com/video(?P<id>\d+?)($|/)' _TEST = { 'url': 'http://www.goshgay.com/video299069/diesel_sfw_xxx_video', 'md5': '4b6db9a0a333142eb9f15913142b0ed1', diff --git a/youtube_dl/extractor/hark.py b/youtube_dl/extractor/hark.py index b6cc15b6f..749e9154f 100644 --- a/youtube_dl/extractor/hark.py +++ b/youtube_dl/extractor/hark.py @@ -5,7 +5,7 @@ from .common import InfoExtractor class HarkIE(InfoExtractor): - _VALID_URL = r'https?://www\.hark\.com/clips/(?P<id>.+?)-.+' + _VALID_URL = r'https?://(?:www\.)?hark\.com/clips/(?P<id>.+?)-.+' _TEST = { 'url': 'http://www.hark.com/clips/mmbzyhkgny-obama-beyond-the-afghan-theater-we-only-target-al-qaeda-on-may-23-2013', 'md5': '6783a58491b47b92c7c1af5a77d4cbee', diff --git a/youtube_dl/extractor/hotnewhiphop.py b/youtube_dl/extractor/hotnewhiphop.py index 9db565209..34163725f 100644 --- a/youtube_dl/extractor/hotnewhiphop.py +++ b/youtube_dl/extractor/hotnewhiphop.py @@ -12,7 +12,7 @@ from ..utils import ( class HotNewHipHopIE(InfoExtractor): - _VALID_URL = r'https?://www\.hotnewhiphop\.com/.*\.(?P<id>.*)\.html' + _VALID_URL = r'https?://(?:www\.)?hotnewhiphop\.com/.*\.(?P<id>.*)\.html' _TEST = { 'url': 'http://www.hotnewhiphop.com/freddie-gibbs-lay-it-down-song.1435540.html', 'md5': '2c2cd2f76ef11a9b3b581e8b232f3d96', diff --git a/youtube_dl/extractor/imdb.py b/youtube_dl/extractor/imdb.py index 3a6a6f5ad..f0fc8d49a 100644 --- a/youtube_dl/extractor/imdb.py +++ b/youtube_dl/extractor/imdb.py @@ -94,7 +94,7 @@ class ImdbIE(InfoExtractor): class ImdbListIE(InfoExtractor): IE_NAME = 'imdb:list' IE_DESC = 'Internet Movie Database lists' - _VALID_URL = r'https?://www\.imdb\.com/list/(?P<id>[\da-zA-Z_-]{11})' + _VALID_URL = r'https?://(?:www\.)?imdb\.com/list/(?P<id>[\da-zA-Z_-]{11})' _TEST = { 'url': 'http://www.imdb.com/list/JFs9NWw6XI0', 'info_dict': { diff --git a/youtube_dl/extractor/karaoketv.py b/youtube_dl/extractor/karaoketv.py index bad46005b..bfccf89b0 100644 --- a/youtube_dl/extractor/karaoketv.py +++ b/youtube_dl/extractor/karaoketv.py @@ -5,7 +5,7 @@ from .common import InfoExtractor class KaraoketvIE(InfoExtractor): - _VALID_URL = r'https?://www\.karaoketv\.co\.il/[^/]+/(?P<id>\d+)' + _VALID_URL = r'https?://(?:www\.)?karaoketv\.co\.il/[^/]+/(?P<id>\d+)' _TEST = { 'url': 'http://www.karaoketv.co.il/%D7%A9%D7%99%D7%A8%D7%99_%D7%A7%D7%A8%D7%99%D7%95%D7%A7%D7%99/58356/%D7%90%D7%99%D7%96%D7%95%D7%9F', 'info_dict': { diff --git a/youtube_dl/extractor/kickstarter.py b/youtube_dl/extractor/kickstarter.py index c61e78622..fbe499497 100644 --- a/youtube_dl/extractor/kickstarter.py +++ b/youtube_dl/extractor/kickstarter.py @@ -6,7 +6,7 @@ from ..utils import smuggle_url class KickStarterIE(InfoExtractor): - _VALID_URL = r'https?://www\.kickstarter\.com/projects/(?P<id>[^/]*)/.*' + _VALID_URL = r'https?://(?:www\.)?kickstarter\.com/projects/(?P<id>[^/]*)/.*' _TESTS = [{ 'url': 'https://www.kickstarter.com/projects/1404461844/intersection-the-story-of-josh-grant/description', 'md5': 'c81addca81327ffa66c642b5d8b08cab', diff --git a/youtube_dl/extractor/kuwo.py b/youtube_dl/extractor/kuwo.py index 0eeb9ffeb..ba621ca7b 100644 --- a/youtube_dl/extractor/kuwo.py +++ b/youtube_dl/extractor/kuwo.py @@ -59,7 +59,7 @@ class KuwoBaseIE(InfoExtractor): class KuwoIE(KuwoBaseIE): IE_NAME = 'kuwo:song' IE_DESC = '酷我音乐' - _VALID_URL = r'https?://www\.kuwo\.cn/yinyue/(?P<id>\d+)' + _VALID_URL = r'https?://(?:www\.)?kuwo\.cn/yinyue/(?P<id>\d+)' _TESTS = [{ 'url': 'http://www.kuwo.cn/yinyue/635632/', 'info_dict': { @@ -139,7 +139,7 @@ class KuwoIE(KuwoBaseIE): class KuwoAlbumIE(InfoExtractor): IE_NAME = 'kuwo:album' IE_DESC = '酷我音乐 - 专辑' - _VALID_URL = r'https?://www\.kuwo\.cn/album/(?P<id>\d+?)/' + _VALID_URL = r'https?://(?:www\.)?kuwo\.cn/album/(?P<id>\d+?)/' _TEST = { 'url': 'http://www.kuwo.cn/album/502294/', 'info_dict': { @@ -200,7 +200,7 @@ class KuwoChartIE(InfoExtractor): class KuwoSingerIE(InfoExtractor): IE_NAME = 'kuwo:singer' IE_DESC = '酷我音乐 - 歌手' - _VALID_URL = r'https?://www\.kuwo\.cn/mingxing/(?P<id>[^/]+)' + _VALID_URL = r'https?://(?:www\.)?kuwo\.cn/mingxing/(?P<id>[^/]+)' _TESTS = [{ 'url': 'http://www.kuwo.cn/mingxing/bruno+mars/', 'info_dict': { @@ -296,7 +296,7 @@ class KuwoCategoryIE(InfoExtractor): class KuwoMvIE(KuwoBaseIE): IE_NAME = 'kuwo:mv' IE_DESC = '酷我音乐 - MV' - _VALID_URL = r'https?://www\.kuwo\.cn/mv/(?P<id>\d+?)/' + _VALID_URL = r'https?://(?:www\.)?kuwo\.cn/mv/(?P<id>\d+?)/' _TEST = { 'url': 'http://www.kuwo.cn/mv/6480076/', 'info_dict': { diff --git a/youtube_dl/extractor/litv.py b/youtube_dl/extractor/litv.py index 05c6579f1..a3784e6c6 100644 --- a/youtube_dl/extractor/litv.py +++ b/youtube_dl/extractor/litv.py @@ -14,7 +14,7 @@ from ..utils import ( class LiTVIE(InfoExtractor): - _VALID_URL = r'https?://www\.litv\.tv/(?:vod|promo)/[^/]+/(?:content\.do)?\?.*?\b(?:content_)?id=(?P<id>[^&]+)' + _VALID_URL = r'https?://(?:www\.)?litv\.tv/(?:vod|promo)/[^/]+/(?:content\.do)?\?.*?\b(?:content_)?id=(?P<id>[^&]+)' _URL_TEMPLATE = 'https://www.litv.tv/vod/%s/content.do?id=%s' diff --git a/youtube_dl/extractor/lynda.py b/youtube_dl/extractor/lynda.py index a98c4c530..299873ecc 100644 --- a/youtube_dl/extractor/lynda.py +++ b/youtube_dl/extractor/lynda.py @@ -94,7 +94,7 @@ class LyndaBaseIE(InfoExtractor): class LyndaIE(LyndaBaseIE): IE_NAME = 'lynda' IE_DESC = 'lynda.com videos' - _VALID_URL = r'https?://www\.lynda\.com/(?:[^/]+/[^/]+/\d+|player/embed)/(?P<id>\d+)' + _VALID_URL = r'https?://(?:www\.)?lynda\.com/(?:[^/]+/[^/]+/\d+|player/embed)/(?P<id>\d+)' _TIMECODE_REGEX = r'\[(?P<timecode>\d+:\d+:\d+[\.,]\d+)\]' diff --git a/youtube_dl/extractor/macgamestore.py b/youtube_dl/extractor/macgamestore.py index 3cd4a3a19..43db9929c 100644 --- a/youtube_dl/extractor/macgamestore.py +++ b/youtube_dl/extractor/macgamestore.py @@ -7,7 +7,7 @@ from ..utils import ExtractorError class MacGameStoreIE(InfoExtractor): IE_NAME = 'macgamestore' IE_DESC = 'MacGameStore trailers' - _VALID_URL = r'https?://www\.macgamestore\.com/mediaviewer\.php\?trailer=(?P<id>\d+)' + _VALID_URL = r'https?://(?:www\.)?macgamestore\.com/mediaviewer\.php\?trailer=(?P<id>\d+)' _TEST = { 'url': 'http://www.macgamestore.com/mediaviewer.php?trailer=2450', diff --git a/youtube_dl/extractor/metacritic.py b/youtube_dl/extractor/metacritic.py index 444ec0310..7d468d78b 100644 --- a/youtube_dl/extractor/metacritic.py +++ b/youtube_dl/extractor/metacritic.py @@ -9,7 +9,7 @@ from ..utils import ( class MetacriticIE(InfoExtractor): - _VALID_URL = r'https?://www\.metacritic\.com/.+?/trailers/(?P<id>\d+)' + _VALID_URL = r'https?://(?:www\.)?metacritic\.com/.+?/trailers/(?P<id>\d+)' _TESTS = [{ 'url': 'http://www.metacritic.com/game/playstation-4/infamous-second-son/trailers/3698222', diff --git a/youtube_dl/extractor/mgtv.py b/youtube_dl/extractor/mgtv.py index 27bdff8b2..e0bb5d208 100644 --- a/youtube_dl/extractor/mgtv.py +++ b/youtube_dl/extractor/mgtv.py @@ -6,7 +6,7 @@ from ..utils import int_or_none class MGTVIE(InfoExtractor): - _VALID_URL = r'https?://www\.mgtv\.com/v/(?:[^/]+/)*(?P<id>\d+)\.html' + _VALID_URL = r'https?://(?:www\.)?mgtv\.com/v/(?:[^/]+/)*(?P<id>\d+)\.html' IE_DESC = '芒果TV' _TESTS = [{ diff --git a/youtube_dl/extractor/ministrygrid.py b/youtube_dl/extractor/ministrygrid.py index e48eba3fa..10190d5f6 100644 --- a/youtube_dl/extractor/ministrygrid.py +++ b/youtube_dl/extractor/ministrygrid.py @@ -8,7 +8,7 @@ from ..utils import ( class MinistryGridIE(InfoExtractor): - _VALID_URL = r'https?://www\.ministrygrid.com/([^/?#]*/)*(?P<id>[^/#?]+)/?(?:$|[?#])' + _VALID_URL = r'https?://(?:www\.)?ministrygrid\.com/([^/?#]*/)*(?P<id>[^/#?]+)/?(?:$|[?#])' _TEST = { 'url': 'http://www.ministrygrid.com/training-viewer/-/training/t4g-2014-conference/the-gospel-by-numbers-4/the-gospel-by-numbers', diff --git a/youtube_dl/extractor/mitele.py b/youtube_dl/extractor/mitele.py index cd169f361..2294745d4 100644 --- a/youtube_dl/extractor/mitele.py +++ b/youtube_dl/extractor/mitele.py @@ -74,7 +74,7 @@ class MiTeleBaseIE(InfoExtractor): class MiTeleIE(MiTeleBaseIE): IE_DESC = 'mitele.es' - _VALID_URL = r'https?://www\.mitele\.es/(?:[^/]+/){3}(?P<id>[^/]+)/' + _VALID_URL = r'https?://(?:www\.)?mitele\.es/(?:[^/]+/){3}(?P<id>[^/]+)/' _TESTS = [{ 'url': 'http://www.mitele.es/programas-tv/diario-de/la-redaccion/programa-144/', diff --git a/youtube_dl/extractor/motorsport.py b/youtube_dl/extractor/motorsport.py index 370328b36..c9d1ab64d 100644 --- a/youtube_dl/extractor/motorsport.py +++ b/youtube_dl/extractor/motorsport.py @@ -9,7 +9,7 @@ from ..compat import ( class MotorsportIE(InfoExtractor): IE_DESC = 'motorsport.com' - _VALID_URL = r'https?://www\.motorsport\.com/[^/?#]+/video/(?:[^/?#]+/)(?P<id>[^/]+)/?(?:$|[?#])' + _VALID_URL = r'https?://(?:www\.)?motorsport\.com/[^/?#]+/video/(?:[^/?#]+/)(?P<id>[^/]+)/?(?:$|[?#])' _TEST = { 'url': 'http://www.motorsport.com/f1/video/main-gallery/red-bull-racing-2014-rules-explained/', 'info_dict': { diff --git a/youtube_dl/extractor/moviezine.py b/youtube_dl/extractor/moviezine.py index f130b75c4..aa091a62c 100644 --- a/youtube_dl/extractor/moviezine.py +++ b/youtube_dl/extractor/moviezine.py @@ -7,7 +7,7 @@ from .common import InfoExtractor class MoviezineIE(InfoExtractor): - _VALID_URL = r'https?://www\.moviezine\.se/video/(?P<id>[^?#]+)' + _VALID_URL = r'https?://(?:www\.)?moviezine\.se/video/(?P<id>[^?#]+)' _TEST = { 'url': 'http://www.moviezine.se/video/205866', diff --git a/youtube_dl/extractor/myspass.py b/youtube_dl/extractor/myspass.py index 1ca7b1a9e..2afe535b5 100644 --- a/youtube_dl/extractor/myspass.py +++ b/youtube_dl/extractor/myspass.py @@ -11,7 +11,7 @@ from ..utils import ( class MySpassIE(InfoExtractor): - _VALID_URL = r'https?://www\.myspass\.de/.*' + _VALID_URL = r'https?://(?:www\.)?myspass\.de/.*' _TEST = { 'url': 'http://www.myspass.de/myspass/shows/tvshows/absolute-mehrheit/Absolute-Mehrheit-vom-17022013-Die-Highlights-Teil-2--/11741/', 'md5': '0b49f4844a068f8b33f4b7c88405862b', diff --git a/youtube_dl/extractor/nbc.py b/youtube_dl/extractor/nbc.py index f37bf2f30..7f1bd9229 100644 --- a/youtube_dl/extractor/nbc.py +++ b/youtube_dl/extractor/nbc.py @@ -13,7 +13,7 @@ from ..utils import ( class NBCIE(InfoExtractor): - _VALID_URL = r'https?://www\.nbc\.com/(?:[^/]+/)+(?P<id>n?\d+)' + _VALID_URL = r'https?://(?:www\.)?nbc\.com/(?:[^/]+/)+(?P<id>n?\d+)' _TESTS = [ { @@ -138,7 +138,7 @@ class NBCSportsVPlayerIE(InfoExtractor): class NBCSportsIE(InfoExtractor): # Does not include https because its certificate is invalid - _VALID_URL = r'https?://www\.nbcsports\.com//?(?:[^/]+/)+(?P<id>[0-9a-z-]+)' + _VALID_URL = r'https?://(?:www\.)?nbcsports\.com//?(?:[^/]+/)+(?P<id>[0-9a-z-]+)' _TEST = { 'url': 'http://www.nbcsports.com//college-basketball/ncaab/tom-izzo-michigan-st-has-so-much-respect-duke', @@ -161,7 +161,7 @@ class NBCSportsIE(InfoExtractor): class CSNNEIE(InfoExtractor): - _VALID_URL = r'https?://www\.csnne\.com/video/(?P<id>[0-9a-z-]+)' + _VALID_URL = r'https?://(?:www\.)?csnne\.com/video/(?P<id>[0-9a-z-]+)' _TEST = { 'url': 'http://www.csnne.com/video/snc-evening-update-wright-named-red-sox-no-5-starter', diff --git a/youtube_dl/extractor/ndr.py b/youtube_dl/extractor/ndr.py index 0cded6b5c..e3b0da2e9 100644 --- a/youtube_dl/extractor/ndr.py +++ b/youtube_dl/extractor/ndr.py @@ -23,7 +23,7 @@ class NDRBaseIE(InfoExtractor): class NDRIE(NDRBaseIE): IE_NAME = 'ndr' IE_DESC = 'NDR.de - Norddeutscher Rundfunk' - _VALID_URL = r'https?://www\.ndr\.de/(?:[^/]+/)*(?P<id>[^/?#]+),[\da-z]+\.html' + _VALID_URL = r'https?://(?:www\.)?ndr\.de/(?:[^/]+/)*(?P<id>[^/?#]+),[\da-z]+\.html' _TESTS = [{ # httpVideo, same content id 'url': 'http://www.ndr.de/fernsehen/Party-Poette-und-Parade,hafengeburtstag988.html', @@ -105,7 +105,7 @@ class NDRIE(NDRBaseIE): class NJoyIE(NDRBaseIE): IE_NAME = 'njoy' IE_DESC = 'N-JOY' - _VALID_URL = r'https?://www\.n-joy\.de/(?:[^/]+/)*(?:(?P<display_id>[^/?#]+),)?(?P<id>[\da-z]+)\.html' + _VALID_URL = r'https?://(?:www\.)?n-joy\.de/(?:[^/]+/)*(?:(?P<display_id>[^/?#]+),)?(?P<id>[\da-z]+)\.html' _TESTS = [{ # httpVideo, same content id 'url': 'http://www.n-joy.de/entertainment/comedy/comedy_contest/Benaissa-beim-NDR-Comedy-Contest,comedycontest2480.html', @@ -238,7 +238,7 @@ class NDREmbedBaseIE(InfoExtractor): class NDREmbedIE(NDREmbedBaseIE): IE_NAME = 'ndr:embed' - _VALID_URL = r'https?://www\.ndr\.de/(?:[^/]+/)*(?P<id>[\da-z]+)-(?:player|externalPlayer)\.html' + _VALID_URL = r'https?://(?:www\.)?ndr\.de/(?:[^/]+/)*(?P<id>[\da-z]+)-(?:player|externalPlayer)\.html' _TESTS = [{ 'url': 'http://www.ndr.de/fernsehen/sendungen/ndr_aktuell/ndraktuell28488-player.html', 'md5': '8b9306142fe65bbdefb5ce24edb6b0a9', @@ -332,7 +332,7 @@ class NDREmbedIE(NDREmbedBaseIE): class NJoyEmbedIE(NDREmbedBaseIE): IE_NAME = 'njoy:embed' - _VALID_URL = r'https?://www\.n-joy\.de/(?:[^/]+/)*(?P<id>[\da-z]+)-(?:player|externalPlayer)_[^/]+\.html' + _VALID_URL = r'https?://(?:www\.)?n-joy\.de/(?:[^/]+/)*(?P<id>[\da-z]+)-(?:player|externalPlayer)_[^/]+\.html' _TESTS = [{ # httpVideo 'url': 'http://www.n-joy.de/events/reeperbahnfestival/doku948-player_image-bc168e87-5263-4d6d-bd27-bb643005a6de_theme-n-joy.html', diff --git a/youtube_dl/extractor/nextmedia.py b/youtube_dl/extractor/nextmedia.py index aae7aeeeb..a08e48c4b 100644 --- a/youtube_dl/extractor/nextmedia.py +++ b/youtube_dl/extractor/nextmedia.py @@ -7,7 +7,7 @@ from ..utils import parse_iso8601 class NextMediaIE(InfoExtractor): IE_DESC = '蘋果日報' - _VALID_URL = r'https?://hk.apple.nextmedia.com/[^/]+/[^/]+/(?P<date>\d+)/(?P<id>\d+)' + _VALID_URL = r'https?://hk\.apple\.nextmedia\.com/[^/]+/[^/]+/(?P<date>\d+)/(?P<id>\d+)' _TESTS = [{ 'url': 'http://hk.apple.nextmedia.com/realtime/news/20141108/53109199', 'md5': 'dff9fad7009311c421176d1ac90bfe4f', @@ -68,7 +68,7 @@ class NextMediaIE(InfoExtractor): class NextMediaActionNewsIE(NextMediaIE): IE_DESC = '蘋果日報 - 動新聞' - _VALID_URL = r'https?://hk.dv.nextmedia.com/actionnews/[^/]+/(?P<date>\d+)/(?P<id>\d+)/\d+' + _VALID_URL = r'https?://hk\.dv\.nextmedia\.com/actionnews/[^/]+/(?P<date>\d+)/(?P<id>\d+)/\d+' _TESTS = [{ 'url': 'http://hk.dv.nextmedia.com/actionnews/hit/20150121/19009428/20061460', 'md5': '05fce8ffeed7a5e00665d4b7cf0f9201', @@ -93,7 +93,7 @@ class NextMediaActionNewsIE(NextMediaIE): class AppleDailyIE(NextMediaIE): IE_DESC = '臺灣蘋果日報' - _VALID_URL = r'https?://(www|ent).appledaily.com.tw/(?:animation|appledaily|enews|realtimenews)/[^/]+/[^/]+/(?P<date>\d+)/(?P<id>\d+)(/.*)?' + _VALID_URL = r'https?://(www|ent)\.appledaily\.com\.tw/(?:animation|appledaily|enews|realtimenews)/[^/]+/[^/]+/(?P<date>\d+)/(?P<id>\d+)(/.*)?' _TESTS = [{ 'url': 'http://ent.appledaily.com.tw/enews/article/entertainment/20150128/36354694', 'md5': 'a843ab23d150977cc55ef94f1e2c1e4d', diff --git a/youtube_dl/extractor/niconico.py b/youtube_dl/extractor/niconico.py index dd75a48af..6eaaa8416 100644 --- a/youtube_dl/extractor/niconico.py +++ b/youtube_dl/extractor/niconico.py @@ -252,7 +252,7 @@ class NiconicoIE(InfoExtractor): class NiconicoPlaylistIE(InfoExtractor): - _VALID_URL = r'https?://www\.nicovideo\.jp/mylist/(?P<id>\d+)' + _VALID_URL = r'https?://(?:www\.)?nicovideo\.jp/mylist/(?P<id>\d+)' _TEST = { 'url': 'http://www.nicovideo.jp/mylist/27411728', diff --git a/youtube_dl/extractor/oktoberfesttv.py b/youtube_dl/extractor/oktoberfesttv.py index 4a41c0542..f2ccc53dc 100644 --- a/youtube_dl/extractor/oktoberfesttv.py +++ b/youtube_dl/extractor/oktoberfesttv.py @@ -5,7 +5,7 @@ from .common import InfoExtractor class OktoberfestTVIE(InfoExtractor): - _VALID_URL = r'https?://www\.oktoberfest-tv\.de/[^/]+/[^/]+/video/(?P<id>[^/?#]+)' + _VALID_URL = r'https?://(?:www\.)?oktoberfest-tv\.de/[^/]+/[^/]+/video/(?P<id>[^/?#]+)' _TEST = { 'url': 'http://www.oktoberfest-tv.de/de/kameras/video/hb-zelt', diff --git a/youtube_dl/extractor/openload.py b/youtube_dl/extractor/openload.py index 76316ca2f..c261a7455 100644 --- a/youtube_dl/extractor/openload.py +++ b/youtube_dl/extractor/openload.py @@ -13,7 +13,7 @@ from ..utils import ( class OpenloadIE(InfoExtractor): - _VALID_URL = r'https://openload.(?:co|io)/(?:f|embed)/(?P<id>[a-zA-Z0-9-_]+)' + _VALID_URL = r'https?://openload\.(?:co|io)/(?:f|embed)/(?P<id>[a-zA-Z0-9-_]+)' _TESTS = [{ 'url': 'https://openload.co/f/kUEfGclsU9o', diff --git a/youtube_dl/extractor/periscope.py b/youtube_dl/extractor/periscope.py index 6c640089d..eb1aeba46 100644 --- a/youtube_dl/extractor/periscope.py +++ b/youtube_dl/extractor/periscope.py @@ -94,7 +94,7 @@ class PeriscopeIE(PeriscopeBaseIE): class PeriscopeUserIE(PeriscopeBaseIE): - _VALID_URL = r'https?://www\.periscope\.tv/(?P<id>[^/]+)/?$' + _VALID_URL = r'https?://(?:www\.)?periscope\.tv/(?P<id>[^/]+)/?$' IE_DESC = 'Periscope user videos' IE_NAME = 'periscope:user' diff --git a/youtube_dl/extractor/playvid.py b/youtube_dl/extractor/playvid.py index 78d219299..79c2db085 100644 --- a/youtube_dl/extractor/playvid.py +++ b/youtube_dl/extractor/playvid.py @@ -14,7 +14,7 @@ from ..utils import ( class PlayvidIE(InfoExtractor): - _VALID_URL = r'https?://www\.playvid\.com/watch(\?v=|/)(?P<id>.+?)(?:#|$)' + _VALID_URL = r'https?://(?:www\.)?playvid\.com/watch(\?v=|/)(?P<id>.+?)(?:#|$)' _TESTS = [{ 'url': 'http://www.playvid.com/watch/RnmBNgtrrJu', 'md5': 'ffa2f6b2119af359f544388d8c01eb6c', diff --git a/youtube_dl/extractor/qqmusic.py b/youtube_dl/extractor/qqmusic.py index ff0af9543..37cb9e2c9 100644 --- a/youtube_dl/extractor/qqmusic.py +++ b/youtube_dl/extractor/qqmusic.py @@ -18,7 +18,7 @@ from ..utils import ( class QQMusicIE(InfoExtractor): IE_NAME = 'qqmusic' IE_DESC = 'QQ音乐' - _VALID_URL = r'https?://y.qq.com/#type=song&mid=(?P<id>[0-9A-Za-z]+)' + _VALID_URL = r'https?://y\.qq\.com/#type=song&mid=(?P<id>[0-9A-Za-z]+)' _TESTS = [{ 'url': 'http://y.qq.com/#type=song&mid=004295Et37taLD', 'md5': '9ce1c1c8445f561506d2e3cfb0255705', @@ -172,7 +172,7 @@ class QQPlaylistBaseIE(InfoExtractor): class QQMusicSingerIE(QQPlaylistBaseIE): IE_NAME = 'qqmusic:singer' IE_DESC = 'QQ音乐 - 歌手' - _VALID_URL = r'https?://y.qq.com/#type=singer&mid=(?P<id>[0-9A-Za-z]+)' + _VALID_URL = r'https?://y\.qq\.com/#type=singer&mid=(?P<id>[0-9A-Za-z]+)' _TEST = { 'url': 'http://y.qq.com/#type=singer&mid=001BLpXF2DyJe2', 'info_dict': { @@ -217,7 +217,7 @@ class QQMusicSingerIE(QQPlaylistBaseIE): class QQMusicAlbumIE(QQPlaylistBaseIE): IE_NAME = 'qqmusic:album' IE_DESC = 'QQ音乐 - 专辑' - _VALID_URL = r'https?://y.qq.com/#type=album&mid=(?P<id>[0-9A-Za-z]+)' + _VALID_URL = r'https?://y\.qq\.com/#type=album&mid=(?P<id>[0-9A-Za-z]+)' _TESTS = [{ 'url': 'http://y.qq.com/#type=album&mid=000gXCTb2AhRR1', diff --git a/youtube_dl/extractor/rottentomatoes.py b/youtube_dl/extractor/rottentomatoes.py index 23abf7a27..1d404d20a 100644 --- a/youtube_dl/extractor/rottentomatoes.py +++ b/youtube_dl/extractor/rottentomatoes.py @@ -5,7 +5,7 @@ from .internetvideoarchive import InternetVideoArchiveIE class RottenTomatoesIE(InfoExtractor): - _VALID_URL = r'https?://www\.rottentomatoes\.com/m/[^/]+/trailers/(?P<id>\d+)' + _VALID_URL = r'https?://(?:www\.)?rottentomatoes\.com/m/[^/]+/trailers/(?P<id>\d+)' _TEST = { 'url': 'http://www.rottentomatoes.com/m/toy_story_3/trailers/11028566/', diff --git a/youtube_dl/extractor/roxwel.py b/youtube_dl/extractor/roxwel.py index 41638c1d0..65284643b 100644 --- a/youtube_dl/extractor/roxwel.py +++ b/youtube_dl/extractor/roxwel.py @@ -7,7 +7,7 @@ from ..utils import unified_strdate, determine_ext class RoxwelIE(InfoExtractor): - _VALID_URL = r'https?://www\.roxwel\.com/player/(?P<filename>.+?)(\.|\?|$)' + _VALID_URL = r'https?://(?:www\.)?roxwel\.com/player/(?P<filename>.+?)(\.|\?|$)' _TEST = { 'url': 'http://www.roxwel.com/player/passionpittakeawalklive.html', diff --git a/youtube_dl/extractor/rtve.py b/youtube_dl/extractor/rtve.py index 34f9c4a99..f1b92f6da 100644 --- a/youtube_dl/extractor/rtve.py +++ b/youtube_dl/extractor/rtve.py @@ -64,7 +64,7 @@ def _decrypt_url(png): class RTVEALaCartaIE(InfoExtractor): IE_NAME = 'rtve.es:alacarta' IE_DESC = 'RTVE a la carta' - _VALID_URL = r'https?://www\.rtve\.es/(m/)?(alacarta/videos|filmoteca)/[^/]+/[^/]+/(?P<id>\d+)' + _VALID_URL = r'https?://(?:www\.)?rtve\.es/(m/)?(alacarta/videos|filmoteca)/[^/]+/[^/]+/(?P<id>\d+)' _TESTS = [{ 'url': 'http://www.rtve.es/alacarta/videos/balonmano/o-swiss-cup-masculina-final-espana-suecia/2491869/', @@ -184,7 +184,7 @@ class RTVEInfantilIE(InfoExtractor): class RTVELiveIE(InfoExtractor): IE_NAME = 'rtve.es:live' IE_DESC = 'RTVE.es live streams' - _VALID_URL = r'https?://www\.rtve\.es/directo/(?P<id>[a-zA-Z0-9-]+)' + _VALID_URL = r'https?://(?:www\.)?rtve\.es/directo/(?P<id>[a-zA-Z0-9-]+)' _TESTS = [{ 'url': 'http://www.rtve.es/directo/la-1/', @@ -226,7 +226,7 @@ class RTVELiveIE(InfoExtractor): class RTVETelevisionIE(InfoExtractor): IE_NAME = 'rtve.es:television' - _VALID_URL = r'https?://www\.rtve\.es/television/[^/]+/[^/]+/(?P<id>\d+).shtml' + _VALID_URL = r'https?://(?:www\.)?rtve\.es/television/[^/]+/[^/]+/(?P<id>\d+).shtml' _TEST = { 'url': 'http://www.rtve.es/television/20160628/revolucion-del-movil/1364141.shtml', diff --git a/youtube_dl/extractor/screenjunkies.py b/youtube_dl/extractor/screenjunkies.py index dd0a6ba19..02e574cd8 100644 --- a/youtube_dl/extractor/screenjunkies.py +++ b/youtube_dl/extractor/screenjunkies.py @@ -11,7 +11,7 @@ from ..utils import ( class ScreenJunkiesIE(InfoExtractor): - _VALID_URL = r'https?://www.screenjunkies.com/video/(?P<display_id>[^/]+?)(?:-(?P<id>\d+))?(?:[/?#&]|$)' + _VALID_URL = r'https?://(?:www\.)?screenjunkies\.com/video/(?P<display_id>[^/]+?)(?:-(?P<id>\d+))?(?:[/?#&]|$)' _TESTS = [{ 'url': 'http://www.screenjunkies.com/video/best-quentin-tarantino-movie-2841915', 'md5': '5c2b686bec3d43de42bde9ec047536b0', diff --git a/youtube_dl/extractor/senateisvp.py b/youtube_dl/extractor/senateisvp.py index c5f474dd1..35540c082 100644 --- a/youtube_dl/extractor/senateisvp.py +++ b/youtube_dl/extractor/senateisvp.py @@ -48,7 +48,7 @@ class SenateISVPIE(InfoExtractor): ['arch', '', 'http://ussenate-f.akamaihd.net/'] ] _IE_NAME = 'senate.gov' - _VALID_URL = r'https?://www\.senate\.gov/isvp/?\?(?P<qs>.+)' + _VALID_URL = r'https?://(?:www\.)?senate\.gov/isvp/?\?(?P<qs>.+)' _TESTS = [{ 'url': 'http://www.senate.gov/isvp/?comm=judiciary&type=live&stt=&filename=judiciary031715&auto_play=false&wmode=transparent&poster=http%3A%2F%2Fwww.judiciary.senate.gov%2Fthemes%2Fjudiciary%2Fimages%2Fvideo-poster-flash-fit.png', 'info_dict': { diff --git a/youtube_dl/extractor/slideshare.py b/youtube_dl/extractor/slideshare.py index 4967c1b77..74a1dc672 100644 --- a/youtube_dl/extractor/slideshare.py +++ b/youtube_dl/extractor/slideshare.py @@ -14,7 +14,7 @@ from ..utils import ( class SlideshareIE(InfoExtractor): - _VALID_URL = r'https?://www\.slideshare\.net/[^/]+?/(?P<title>.+?)($|\?)' + _VALID_URL = r'https?://(?:www\.)?slideshare\.net/[^/]+?/(?P<title>.+?)($|\?)' _TEST = { 'url': 'http://www.slideshare.net/Dataversity/keynote-presentation-managing-scale-and-complexity', diff --git a/youtube_dl/extractor/spiegel.py b/youtube_dl/extractor/spiegel.py index 74cb3a08a..b41d9f59f 100644 --- a/youtube_dl/extractor/spiegel.py +++ b/youtube_dl/extractor/spiegel.py @@ -103,7 +103,7 @@ class SpiegelIE(InfoExtractor): class SpiegelArticleIE(InfoExtractor): - _VALID_URL = r'https?://www\.spiegel\.de/(?!video/)[^?#]*?-(?P<id>[0-9]+)\.html' + _VALID_URL = r'https?://(?:www\.)?spiegel\.de/(?!video/)[^?#]*?-(?P<id>[0-9]+)\.html' IE_NAME = 'Spiegel:Article' IE_DESC = 'Articles on spiegel.de' _TESTS = [{ diff --git a/youtube_dl/extractor/syfy.py b/youtube_dl/extractor/syfy.py index ab8bab5cd..def7e5a2c 100644 --- a/youtube_dl/extractor/syfy.py +++ b/youtube_dl/extractor/syfy.py @@ -8,7 +8,7 @@ from ..utils import ( class SyfyIE(AdobePassIE): - _VALID_URL = r'https?://www\.syfy\.com/(?:[^/]+/)?videos/(?P<id>[^/?#]+)' + _VALID_URL = r'https?://(?:www\.)?syfy\.com/(?:[^/]+/)?videos/(?P<id>[^/?#]+)' _TESTS = [{ 'url': 'http://www.syfy.com/theinternetruinedmylife/videos/the-internet-ruined-my-life-season-1-trailer', 'info_dict': { diff --git a/youtube_dl/extractor/teachingchannel.py b/youtube_dl/extractor/teachingchannel.py index d14d93e3a..e89759714 100644 --- a/youtube_dl/extractor/teachingchannel.py +++ b/youtube_dl/extractor/teachingchannel.py @@ -7,7 +7,7 @@ from .ooyala import OoyalaIE class TeachingChannelIE(InfoExtractor): - _VALID_URL = r'https?://www\.teachingchannel\.org/videos/(?P<title>.+)' + _VALID_URL = r'https?://(?:www\.)?teachingchannel\.org/videos/(?P<title>.+)' _TEST = { 'url': 'https://www.teachingchannel.org/videos/teacher-teaming-evolution', diff --git a/youtube_dl/extractor/telecinco.py b/youtube_dl/extractor/telecinco.py index 2ecfd0405..d5abfc9e4 100644 --- a/youtube_dl/extractor/telecinco.py +++ b/youtube_dl/extractor/telecinco.py @@ -6,7 +6,7 @@ from .mitele import MiTeleBaseIE class TelecincoIE(MiTeleBaseIE): IE_DESC = 'telecinco.es, cuatro.com and mediaset.es' - _VALID_URL = r'https?://www\.(?:telecinco\.es|cuatro\.com|mediaset\.es)/(?:[^/]+/)+(?P<id>.+?)\.html' + _VALID_URL = r'https?://(?:www\.)?(?:telecinco\.es|cuatro\.com|mediaset\.es)/(?:[^/]+/)+(?P<id>.+?)\.html' _TESTS = [{ 'url': 'http://www.telecinco.es/robinfood/temporada-01/t01xp14/Bacalao-cocochas-pil-pil_0_1876350223.html', diff --git a/youtube_dl/extractor/telewebion.py b/youtube_dl/extractor/telewebion.py index 77916c601..7786b2813 100644 --- a/youtube_dl/extractor/telewebion.py +++ b/youtube_dl/extractor/telewebion.py @@ -5,7 +5,7 @@ from .common import InfoExtractor class TelewebionIE(InfoExtractor): - _VALID_URL = r'https?://www\.telewebion\.com/#!/episode/(?P<id>\d+)' + _VALID_URL = r'https?://(?:www\.)?telewebion\.com/#!/episode/(?P<id>\d+)' _TEST = { 'url': 'http://www.telewebion.com/#!/episode/1263668/', diff --git a/youtube_dl/extractor/theintercept.py b/youtube_dl/extractor/theintercept.py index 8cb3c3669..ec6f4ecaa 100644 --- a/youtube_dl/extractor/theintercept.py +++ b/youtube_dl/extractor/theintercept.py @@ -11,7 +11,7 @@ from ..utils import ( class TheInterceptIE(InfoExtractor): - _VALID_URL = r'https://theintercept.com/fieldofvision/(?P<id>[^/?#]+)' + _VALID_URL = r'https?://theintercept\.com/fieldofvision/(?P<id>[^/?#]+)' _TESTS = [{ 'url': 'https://theintercept.com/fieldofvision/thisisacoup-episode-four-surrender-or-die/', 'md5': '145f28b41d44aab2f87c0a4ac8ec95bd', diff --git a/youtube_dl/extractor/thescene.py b/youtube_dl/extractor/thescene.py index 3e4e14031..ce1326c03 100644 --- a/youtube_dl/extractor/thescene.py +++ b/youtube_dl/extractor/thescene.py @@ -7,7 +7,7 @@ from ..utils import qualities class TheSceneIE(InfoExtractor): - _VALID_URL = r'https://thescene\.com/watch/[^/]+/(?P<id>[^/#?]+)' + _VALID_URL = r'https?://thescene\.com/watch/[^/]+/(?P<id>[^/#?]+)' _TEST = { 'url': 'https://thescene.com/watch/vogue/narciso-rodriguez-spring-2013-ready-to-wear', diff --git a/youtube_dl/extractor/tlc.py b/youtube_dl/extractor/tlc.py index 88eb83d74..ce4f91f46 100644 --- a/youtube_dl/extractor/tlc.py +++ b/youtube_dl/extractor/tlc.py @@ -13,7 +13,7 @@ from ..compat import ( class TlcDeIE(InfoExtractor): IE_NAME = 'tlc.de' - _VALID_URL = r'https?://www\.tlc\.de/(?:[^/]+/)*videos/(?P<title>[^/?#]+)?(?:.*#(?P<id>\d+))?' + _VALID_URL = r'https?://(?:www\.)?tlc\.de/(?:[^/]+/)*videos/(?P<title>[^/?#]+)?(?:.*#(?P<id>\d+))?' _TEST = { 'url': 'http://www.tlc.de/sendungen/breaking-amish/videos/#3235167922001', diff --git a/youtube_dl/extractor/udemy.py b/youtube_dl/extractor/udemy.py index 89b869559..c2f507233 100644 --- a/youtube_dl/extractor/udemy.py +++ b/youtube_dl/extractor/udemy.py @@ -307,7 +307,7 @@ class UdemyIE(InfoExtractor): class UdemyCourseIE(UdemyIE): IE_NAME = 'udemy:course' - _VALID_URL = r'https?://www\.udemy\.com/(?P<id>[^/?#&]+)' + _VALID_URL = r'https?://(?:www\.)?udemy\.com/(?P<id>[^/?#&]+)' _TESTS = [] @classmethod diff --git a/youtube_dl/extractor/ustream.py b/youtube_dl/extractor/ustream.py index 54605d863..a3dc9d33e 100644 --- a/youtube_dl/extractor/ustream.py +++ b/youtube_dl/extractor/ustream.py @@ -14,7 +14,7 @@ from ..utils import ( class UstreamIE(InfoExtractor): - _VALID_URL = r'https?://www\.ustream\.tv/(?P<type>recorded|embed|embed/recorded)/(?P<id>\d+)' + _VALID_URL = r'https?://(?:www\.)?ustream\.tv/(?P<type>recorded|embed|embed/recorded)/(?P<id>\d+)' IE_NAME = 'ustream' _TESTS = [{ 'url': 'http://www.ustream.tv/recorded/20274954', @@ -117,7 +117,7 @@ class UstreamIE(InfoExtractor): class UstreamChannelIE(InfoExtractor): - _VALID_URL = r'https?://www\.ustream\.tv/channel/(?P<slug>.+)' + _VALID_URL = r'https?://(?:www\.)?ustream\.tv/channel/(?P<slug>.+)' IE_NAME = 'ustream:channel' _TEST = { 'url': 'http://www.ustream.tv/channel/channeljapan', diff --git a/youtube_dl/extractor/vevo.py b/youtube_dl/extractor/vevo.py index 388b4debe..783efda7d 100644 --- a/youtube_dl/extractor/vevo.py +++ b/youtube_dl/extractor/vevo.py @@ -31,7 +31,7 @@ class VevoIE(VevoBaseIE): (currently used by MTVIE and MySpaceIE) ''' _VALID_URL = r'''(?x) - (?:https?://www\.vevo\.com/watch/(?!playlist|genre)(?:[^/]+/(?:[^/]+/)?)?| + (?:https?://(?:www\.)?vevo\.com/watch/(?!playlist|genre)(?:[^/]+/(?:[^/]+/)?)?| https?://cache\.vevo\.com/m/html/embed\.html\?video=| https?://videoplayer\.vevo\.com/embed/embedded\?videoId=| vevo:) @@ -374,7 +374,7 @@ class VevoIE(VevoBaseIE): class VevoPlaylistIE(VevoBaseIE): - _VALID_URL = r'https?://www\.vevo\.com/watch/(?P<kind>playlist|genre)/(?P<id>[^/?#&]+)' + _VALID_URL = r'https?://(?:www\.)?vevo\.com/watch/(?P<kind>playlist|genre)/(?P<id>[^/?#&]+)' _TESTS = [{ 'url': 'http://www.vevo.com/watch/playlist/dadbf4e7-b99f-4184-9670-6f0e547b6a29', diff --git a/youtube_dl/extractor/videodetective.py b/youtube_dl/extractor/videodetective.py index 2ed5d9643..a19411a05 100644 --- a/youtube_dl/extractor/videodetective.py +++ b/youtube_dl/extractor/videodetective.py @@ -6,7 +6,7 @@ from .internetvideoarchive import InternetVideoArchiveIE class VideoDetectiveIE(InfoExtractor): - _VALID_URL = r'https?://www\.videodetective\.com/[^/]+/[^/]+/(?P<id>\d+)' + _VALID_URL = r'https?://(?:www\.)?videodetective\.com/[^/]+/[^/]+/(?P<id>\d+)' _TEST = { 'url': 'http://www.videodetective.com/movies/kick-ass-2/194487', diff --git a/youtube_dl/extractor/weiqitv.py b/youtube_dl/extractor/weiqitv.py index 3dafbeec2..8e09156c2 100644 --- a/youtube_dl/extractor/weiqitv.py +++ b/youtube_dl/extractor/weiqitv.py @@ -6,7 +6,7 @@ from .common import InfoExtractor class WeiqiTVIE(InfoExtractor): IE_DESC = 'WQTV' - _VALID_URL = r'https?://www\.weiqitv\.com/index/video_play\?videoId=(?P<id>[A-Za-z0-9]+)' + _VALID_URL = r'https?://(?:www\.)?weiqitv\.com/index/video_play\?videoId=(?P<id>[A-Za-z0-9]+)' _TESTS = [{ 'url': 'http://www.weiqitv.com/index/video_play?videoId=53c744f09874f0e76a8b46f3', diff --git a/youtube_dl/extractor/yam.py b/youtube_dl/extractor/yam.py index 63bbc0634..ef5535547 100644 --- a/youtube_dl/extractor/yam.py +++ b/youtube_dl/extractor/yam.py @@ -15,7 +15,7 @@ from ..utils import ( class YamIE(InfoExtractor): IE_DESC = '蕃薯藤yam天空部落' - _VALID_URL = r'https?://mymedia.yam.com/m/(?P<id>\d+)' + _VALID_URL = r'https?://mymedia\.yam\.com/m/(?P<id>\d+)' _TESTS = [{ # An audio hosted on Yam diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 5082cb589..5ca903825 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -2302,7 +2302,7 @@ class YoutubeSearchURLIE(YoutubePlaylistBaseInfoExtractor): class YoutubeShowIE(YoutubePlaylistsBaseInfoExtractor): IE_DESC = 'YouTube.com (multi-season) shows' - _VALID_URL = r'https?://www\.youtube\.com/show/(?P<id>[^?#]*)' + _VALID_URL = r'https?://(?:www\.)?youtube\.com/show/(?P<id>[^?#]*)' IE_NAME = 'youtube:show' _TESTS = [{ 'url': 'https://www.youtube.com/show/airdisasters', @@ -2371,7 +2371,7 @@ class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor): class YoutubeWatchLaterIE(YoutubePlaylistIE): IE_NAME = 'youtube:watchlater' IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)' - _VALID_URL = r'https?://www\.youtube\.com/(?:feed/watch_later|(?:playlist|watch)\?(?:.+&)?list=WL)|:ytwatchlater' + _VALID_URL = r'https?://(?:www\.)?youtube\.com/(?:feed/watch_later|(?:playlist|watch)\?(?:.+&)?list=WL)|:ytwatchlater' _TESTS = [{ 'url': 'https://www.youtube.com/playlist?list=WL', @@ -2392,7 +2392,7 @@ class YoutubeWatchLaterIE(YoutubePlaylistIE): class YoutubeFavouritesIE(YoutubeBaseInfoExtractor): IE_NAME = 'youtube:favorites' IE_DESC = 'YouTube.com favourite videos, ":ytfav" for short (requires authentication)' - _VALID_URL = r'https?://www\.youtube\.com/my_favorites|:ytfav(?:ou?rites)?' + _VALID_URL = r'https?://(?:www\.)?youtube\.com/my_favorites|:ytfav(?:ou?rites)?' _LOGIN_REQUIRED = True def _real_extract(self, url): @@ -2403,21 +2403,21 @@ class YoutubeFavouritesIE(YoutubeBaseInfoExtractor): class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor): IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)' - _VALID_URL = r'https?://www\.youtube\.com/feed/recommended|:ytrec(?:ommended)?' + _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/recommended|:ytrec(?:ommended)?' _FEED_NAME = 'recommended' _PLAYLIST_TITLE = 'Youtube Recommended videos' class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor): IE_DESC = 'YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)' - _VALID_URL = r'https?://www\.youtube\.com/feed/subscriptions|:ytsubs(?:criptions)?' + _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/subscriptions|:ytsubs(?:criptions)?' _FEED_NAME = 'subscriptions' _PLAYLIST_TITLE = 'Youtube Subscriptions' class YoutubeHistoryIE(YoutubeFeedsInfoExtractor): IE_DESC = 'Youtube watch history, ":ythistory" for short (requires authentication)' - _VALID_URL = r'https?://www\.youtube\.com/feed/history|:ythistory' + _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/history|:ythistory' _FEED_NAME = 'history' _PLAYLIST_TITLE = 'Youtube History' From 014b7e6b25be5583c772af054cd7a1e37a327088 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Wed, 14 Sep 2016 17:07:05 +0100 Subject: [PATCH 0139/1571] [go] add support for free full episodes(#10439) --- youtube_dl/extractor/go.py | 27 ++++++++++++++++++++++++--- 1 file changed, 24 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/go.py b/youtube_dl/extractor/go.py index 6a437c54d..7925c1e22 100644 --- a/youtube_dl/extractor/go.py +++ b/youtube_dl/extractor/go.py @@ -8,6 +8,8 @@ from ..utils import ( int_or_none, determine_ext, parse_age_limit, + urlencode_postdata, + ExtractorError, ) @@ -19,7 +21,7 @@ class GoIE(InfoExtractor): 'watchdisneyjunior': '008', 'watchdisneyxd': '009', } - _VALID_URL = r'https?://(?:(?P<sub_domain>%s)\.)?go\.com/.*?vdka(?P<id>\w+)' % '|'.join(_BRANDS.keys()) + _VALID_URL = r'https?://(?:(?P<sub_domain>%s)\.)?go\.com/(?:[^/]+/)*(?:vdka(?P<id>\w+)|season-\d+/\d+-(?P<display_id>[^/?#]+))' % '|'.join(_BRANDS.keys()) _TESTS = [{ 'url': 'http://abc.go.com/shows/castle/video/most-recent/vdka0_g86w5onx', 'info_dict': { @@ -38,9 +40,13 @@ class GoIE(InfoExtractor): }] def _real_extract(self, url): - sub_domain, video_id = re.match(self._VALID_URL, url).groups() + sub_domain, video_id, display_id = re.match(self._VALID_URL, url).groups() + if not video_id: + webpage = self._download_webpage(url, display_id) + video_id = self._search_regex(r'data-video-id=["\']VDKA(\w+)', webpage, 'video id') + brand = self._BRANDS[sub_domain] video_data = self._download_json( - 'http://api.contents.watchabc.go.com/vp2/ws/contents/3000/videos/%s/001/-1/-1/-1/%s/-1/-1.json' % (self._BRANDS[sub_domain], video_id), + 'http://api.contents.watchabc.go.com/vp2/ws/contents/3000/videos/%s/001/-1/-1/-1/%s/-1/-1.json' % (brand, video_id), video_id)['video'][0] title = video_data['title'] @@ -52,6 +58,21 @@ class GoIE(InfoExtractor): format_id = asset.get('format') ext = determine_ext(asset_url) if ext == 'm3u8': + video_type = video_data.get('type') + if video_type == 'lf': + entitlement = self._download_json( + 'https://api.entitlement.watchabc.go.com/vp2/ws-secure/entitlement/2020/authorize.json', + video_id, data=urlencode_postdata({ + 'video_id': video_data['id'], + 'video_type': video_type, + 'brand': brand, + 'device': '001', + })) + errors = entitlement.get('errors', {}).get('errors', []) + if errors: + error_massege = ', '.join([error['message'] for error in errors]) + raise ExtractorError('%s said: %s' % (self.IE_NAME, error_massege), expected=True) + asset_url += '?' + entitlement['uplynkData']['sessionKey'] formats.extend(self._extract_m3u8_formats( asset_url, video_id, 'mp4', m3u8_id=format_id or 'hls', fatal=False)) else: From 353f340e11d7fc4a0a4973ddd85bc93b1061a487 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Wed, 14 Sep 2016 17:22:42 +0100 Subject: [PATCH 0140/1571] [go] fix typo --- youtube_dl/extractor/go.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/go.py b/youtube_dl/extractor/go.py index 7925c1e22..c7776b186 100644 --- a/youtube_dl/extractor/go.py +++ b/youtube_dl/extractor/go.py @@ -70,8 +70,8 @@ class GoIE(InfoExtractor): })) errors = entitlement.get('errors', {}).get('errors', []) if errors: - error_massege = ', '.join([error['message'] for error in errors]) - raise ExtractorError('%s said: %s' % (self.IE_NAME, error_massege), expected=True) + error_message = ', '.join([error['message'] for error in errors]) + raise ExtractorError('%s said: %s' % (self.IE_NAME, error_message), expected=True) asset_url += '?' + entitlement['uplynkData']['sessionKey'] formats.extend(self._extract_m3u8_formats( asset_url, video_id, 'mp4', m3u8_id=format_id or 'hls', fatal=False)) From 6db354a9f4c62c3cc47918adc13e1e4b63146c80 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Thu, 15 Sep 2016 00:53:04 +0800 Subject: [PATCH 0141/1571] [kuwo] Update _TESTS --- youtube_dl/extractor/kuwo.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/kuwo.py b/youtube_dl/extractor/kuwo.py index ba621ca7b..081af86f6 100644 --- a/youtube_dl/extractor/kuwo.py +++ b/youtube_dl/extractor/kuwo.py @@ -82,7 +82,7 @@ class KuwoIE(KuwoBaseIE): 'upload_date': '20150518', }, 'params': { - 'format': 'mp3-320' + 'format': 'mp3-320', }, }, { 'url': 'http://www.kuwo.cn/yinyue/3197154?catalog=yueku2016', @@ -181,7 +181,7 @@ class KuwoChartIE(InfoExtractor): 'info_dict': { 'id': '香港中文龙虎榜', }, - 'playlist_mincount': 10, + 'playlist_mincount': 7, } def _real_extract(self, url): @@ -303,7 +303,7 @@ class KuwoMvIE(KuwoBaseIE): 'id': '6480076', 'ext': 'mp4', 'title': 'My HouseMV', - 'creator': 'PM02:00', + 'creator': '2PM', }, # In this video, music URLs (anti.s) are blocked outside China and # USA, while the MV URL (mvurl) is available globally, so force the MV From 961516bfd1f3b514859f03766d282824ba8a76f5 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Thu, 15 Sep 2016 00:56:15 +0800 Subject: [PATCH 0142/1571] [kwuo:song] Improve error detection (closes #10650) --- ChangeLog | 1 + youtube_dl/extractor/kuwo.py | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/ChangeLog b/ChangeLog index 25c916eb2..c3c8bf037 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,6 +1,7 @@ version <unreleased> Extractors +* [kwuo] Improve error detection (#10650) * [bilibili] Fix extraction for specific videos (#10647) + [nbc] Add support for NBC Olympics (#10361) diff --git a/youtube_dl/extractor/kuwo.py b/youtube_dl/extractor/kuwo.py index 081af86f6..63e10125e 100644 --- a/youtube_dl/extractor/kuwo.py +++ b/youtube_dl/extractor/kuwo.py @@ -91,10 +91,10 @@ class KuwoIE(KuwoBaseIE): def _real_extract(self, url): song_id = self._match_id(url) - webpage = self._download_webpage( + webpage, urlh = self._download_webpage_handle( url, song_id, note='Download song detail info', errnote='Unable to get song detail info') - if '对不起,该歌曲由于版权问题已被下线,将返回网站首页' in webpage: + if song_id not in urlh.geturl() or '对不起,该歌曲由于版权问题已被下线,将返回网站首页' in webpage: raise ExtractorError('this song has been offline because of copyright issues', expected=True) song_name = self._html_search_regex( From a942d6cb48994c5ff14ccef8773fb086a5544970 Mon Sep 17 00:00:00 2001 From: renalid <renaud.euvrard@MAC-1636.local> Date: Fri, 2 Sep 2016 18:31:52 +0200 Subject: [PATCH 0143/1571] [utils,franceinter] Add french months' names and fix extraction Update of the "FranceInter" radio extractor : webpages HTML structure had changed, the extractor didn't work. So I updated this extractor to get the mp3 URL and all details. --- youtube_dl/extractor/franceinter.py | 38 ++++++++++++++++------------- youtube_dl/utils.py | 13 ++++++++-- 2 files changed, 32 insertions(+), 19 deletions(-) diff --git a/youtube_dl/extractor/franceinter.py b/youtube_dl/extractor/franceinter.py index 2369f868d..6dad8d712 100644 --- a/youtube_dl/extractor/franceinter.py +++ b/youtube_dl/extractor/franceinter.py @@ -2,20 +2,24 @@ from __future__ import unicode_literals from .common import InfoExtractor -from ..utils import int_or_none +from ..utils import ( + unified_timestamp, + month_by_name, +) class FranceInterIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?franceinter\.fr/player/reecouter\?play=(?P<id>[0-9]+)' + _VALID_URL = r'https?://(?:www\.)?franceinter\.fr/emissions/(?P<id>[^?#]+)' + _TEST = { - 'url': 'http://www.franceinter.fr/player/reecouter?play=793962', + 'url': 'https://www.franceinter.fr/emissions/la-marche-de-l-histoire/la-marche-de-l-histoire-18-decembre-2013', 'md5': '4764932e466e6f6c79c317d2e74f6884', 'info_dict': { - 'id': '793962', + 'id': 'la-marche-de-l-histoire/la-marche-de-l-histoire-18-decembre-2013', 'ext': 'mp3', - 'title': 'L’Histoire dans les jeux vidéo', - 'description': 'md5:7e93ddb4451e7530022792240a3049c7', - 'timestamp': 1387369800, + 'title': 'L’Histoire dans les jeux vidéo du 18 décembre 2013 - France Inter', + 'description': 'L’Histoire dans les jeux vidéo du 18 décembre 2013 par Jean Lebrun en replay sur France Inter. Retrouvez l\'émission en réécoute gratuite et abonnez-vous au podcast !', + 'timestamp': 1387324800, 'upload_date': '20131218', }, } @@ -25,17 +29,17 @@ class FranceInterIE(InfoExtractor): webpage = self._download_webpage(url, video_id) - path = self._search_regex( - r'<a id="player".+?href="([^"]+)"', webpage, 'video url') - video_url = 'http://www.franceinter.fr/' + path + video_url = self._search_regex( + r'<button class="replay-button playable" data-is-aod="1" data-url="([^"]+)"', webpage, 'video url') - title = self._html_search_regex( - r'<span class="title-diffusion">(.+?)</span>', webpage, 'title') - description = self._html_search_regex( - r'<span class="description">(.*?)</span>', - webpage, 'description', fatal=False) - timestamp = int_or_none(self._search_regex( - r'data-date="(\d+)"', webpage, 'upload date', fatal=False)) + title = self._og_search_title(webpage) + description = self._og_search_description(webpage) + + extractdate = self._search_regex('(\d{2}-([a-zA-Z\s]+)-\d{4}$)', url, 'extractdate', fatal=False) + extractdate = extractdate.split('-') + extractdate = extractdate[2] + "," + str(month_by_name(extractdate[1], 'fr')) + "," + extractdate[0] + + timestamp = unified_timestamp(extractdate) return { 'id': video_id, diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index ed199c4ad..623ced625 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -91,6 +91,10 @@ ENGLISH_MONTH_NAMES = [ 'January', 'February', 'March', 'April', 'May', 'June', 'July', 'August', 'September', 'October', 'November', 'December'] +FRENCH_MONTH_NAMES = [ + 'janvier', 'fevrier', 'mars', 'avril', 'mai', 'juin', + 'juillet', 'aout', 'septembre', 'octobre', 'novembre', 'decembre'] + KNOWN_EXTENSIONS = ( 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'aac', 'flv', 'f4v', 'f4a', 'f4b', @@ -1587,11 +1591,16 @@ def parse_count(s): return lookup_unit_table(_UNIT_TABLE, s) -def month_by_name(name): +def month_by_name(name, lang='en'): """ Return the number of a month by (locale-independently) English name """ + name_list = ENGLISH_MONTH_NAMES + + if lang == 'fr': + name_list = FRENCH_MONTH_NAMES + try: - return ENGLISH_MONTH_NAMES.index(name) + 1 + return name_list.index(name) + 1 except ValueError: return None From f6717dec8abe7c0d34e704732b53665a9415fa2e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 14 Sep 2016 23:13:55 +0700 Subject: [PATCH 0144/1571] [utils] Improve month_by_name and add tests --- test/test_utils.py | 11 +++++++++++ youtube_dl/utils.py | 16 ++++++++-------- 2 files changed, 19 insertions(+), 8 deletions(-) diff --git a/test/test_utils.py b/test/test_utils.py index 405c5d351..4ebca8744 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -40,6 +40,7 @@ from youtube_dl.utils import ( js_to_json, limit_length, mimetype2ext, + month_by_name, ohdave_rsa_encrypt, OnDemandPagedList, orderedSet, @@ -634,6 +635,16 @@ class TestUtil(unittest.TestCase): self.assertEqual(mimetype2ext('text/vtt;charset=utf-8'), 'vtt') self.assertEqual(mimetype2ext('text/html; charset=utf-8'), 'html') + def test_month_by_name(self): + self.assertEqual(month_by_name(None), None) + self.assertEqual(month_by_name('December', 'en'), 12) + self.assertEqual(month_by_name('decembre', 'fr'), 12) + self.assertEqual(month_by_name('December'), 12) + self.assertEqual(month_by_name('decembre'), None) + self.assertEqual(month_by_name('Unknown', 'unknown'), None) + + def test_m + def test_parse_codecs(self): self.assertEqual(parse_codecs(''), {}) self.assertEqual(parse_codecs('avc1.77.30, mp4a.40.2'), { diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 623ced625..a4ef15908 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -91,9 +91,12 @@ ENGLISH_MONTH_NAMES = [ 'January', 'February', 'March', 'April', 'May', 'June', 'July', 'August', 'September', 'October', 'November', 'December'] -FRENCH_MONTH_NAMES = [ - 'janvier', 'fevrier', 'mars', 'avril', 'mai', 'juin', - 'juillet', 'aout', 'septembre', 'octobre', 'novembre', 'decembre'] +MONTH_NAMES = { + 'en': ENGLISH_MONTH_NAMES, + 'fr': [ + 'janvier', 'fevrier', 'mars', 'avril', 'mai', 'juin', + 'juillet', 'aout', 'septembre', 'octobre', 'novembre', 'decembre'], +} KNOWN_EXTENSIONS = ( 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'aac', @@ -1594,13 +1597,10 @@ def parse_count(s): def month_by_name(name, lang='en'): """ Return the number of a month by (locale-independently) English name """ - name_list = ENGLISH_MONTH_NAMES - - if lang == 'fr': - name_list = FRENCH_MONTH_NAMES + month_names = MONTH_NAMES.get(lang, MONTH_NAMES['en']) try: - return name_list.index(name) + 1 + return month_names.index(name) + 1 except ValueError: return None From 3e4185c3965579c2cc10922384694c2465be4557 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 14 Sep 2016 23:57:01 +0700 Subject: [PATCH 0145/1571] [utils] Use native french month names --- test/test_utils.py | 6 ++---- youtube_dl/utils.py | 4 ++-- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/test/test_utils.py b/test/test_utils.py index 4ebca8744..9789d8611 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -638,13 +638,11 @@ class TestUtil(unittest.TestCase): def test_month_by_name(self): self.assertEqual(month_by_name(None), None) self.assertEqual(month_by_name('December', 'en'), 12) - self.assertEqual(month_by_name('decembre', 'fr'), 12) + self.assertEqual(month_by_name('décembre', 'fr'), 12) self.assertEqual(month_by_name('December'), 12) - self.assertEqual(month_by_name('decembre'), None) + self.assertEqual(month_by_name('décembre'), None) self.assertEqual(month_by_name('Unknown', 'unknown'), None) - def test_m - def test_parse_codecs(self): self.assertEqual(parse_codecs(''), {}) self.assertEqual(parse_codecs('avc1.77.30, mp4a.40.2'), { diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index a4ef15908..69ca88c85 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -94,8 +94,8 @@ ENGLISH_MONTH_NAMES = [ MONTH_NAMES = { 'en': ENGLISH_MONTH_NAMES, 'fr': [ - 'janvier', 'fevrier', 'mars', 'avril', 'mai', 'juin', - 'juillet', 'aout', 'septembre', 'octobre', 'novembre', 'decembre'], + 'janvier', 'février', 'mars', 'avril', 'mai', 'juin', + 'juillet', 'août', 'septembre', 'octobre', 'novembre', 'décembre'], } KNOWN_EXTENSIONS = ( From 0002962f3feb86ec8c14429af7ecddc17815fa93 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 14 Sep 2016 23:59:13 +0700 Subject: [PATCH 0146/1571] [franceinter] Improve extraction (Closes #10538) --- youtube_dl/extractor/franceinter.py | 29 ++++++++++++++++------------- 1 file changed, 16 insertions(+), 13 deletions(-) diff --git a/youtube_dl/extractor/franceinter.py b/youtube_dl/extractor/franceinter.py index 6dad8d712..0d58f89c5 100644 --- a/youtube_dl/extractor/franceinter.py +++ b/youtube_dl/extractor/franceinter.py @@ -2,10 +2,8 @@ from __future__ import unicode_literals from .common import InfoExtractor -from ..utils import ( - unified_timestamp, - month_by_name, -) +from ..compat import compat_str +from ..utils import month_by_name class FranceInterIE(InfoExtractor): @@ -18,8 +16,7 @@ class FranceInterIE(InfoExtractor): 'id': 'la-marche-de-l-histoire/la-marche-de-l-histoire-18-decembre-2013', 'ext': 'mp3', 'title': 'L’Histoire dans les jeux vidéo du 18 décembre 2013 - France Inter', - 'description': 'L’Histoire dans les jeux vidéo du 18 décembre 2013 par Jean Lebrun en replay sur France Inter. Retrouvez l\'émission en réécoute gratuite et abonnez-vous au podcast !', - 'timestamp': 1387324800, + 'description': 'md5:7f2ce449894d1e585932273080fb410d', 'upload_date': '20131218', }, } @@ -30,22 +27,28 @@ class FranceInterIE(InfoExtractor): webpage = self._download_webpage(url, video_id) video_url = self._search_regex( - r'<button class="replay-button playable" data-is-aod="1" data-url="([^"]+)"', webpage, 'video url') + r'(?s)<div[^>]+class=["\']page-diffusion["\'][^>]*>.*?<button[^>]+data-url=(["\'])(?P<url>(?:(?!\1).)+)\1', + webpage, 'video url', group='url') title = self._og_search_title(webpage) description = self._og_search_description(webpage) - extractdate = self._search_regex('(\d{2}-([a-zA-Z\s]+)-\d{4}$)', url, 'extractdate', fatal=False) - extractdate = extractdate.split('-') - extractdate = extractdate[2] + "," + str(month_by_name(extractdate[1], 'fr')) + "," + extractdate[0] - - timestamp = unified_timestamp(extractdate) + upload_date_str = self._search_regex( + r'class=["\']cover-emission-period["\'][^>]*>[^<]+\s+(\d{1,2}\s+[^\s]+\s+\d{4})<', + webpage, 'upload date', fatal=False) + if upload_date_str: + upload_date_list = upload_date_str.split() + upload_date_list.reverse() + upload_date_list[1] = compat_str(month_by_name(upload_date_list[1], lang='fr')) + upload_date = ''.join(upload_date_list) + else: + upload_date = None return { 'id': video_id, 'title': title, 'description': description, - 'timestamp': timestamp, + 'upload_date': upload_date, 'formats': [{ 'url': video_url, 'vcodec': 'none', From 797c636bcb02d1199015b753d26430eec13c4b2b Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Wed, 14 Sep 2016 18:58:47 +0100 Subject: [PATCH 0147/1571] [ap] improve adobe pass names and parse error handling --- youtube_dl/YoutubeDL.py | 2 +- youtube_dl/__init__.py | 8 +++++--- youtube_dl/extractor/adobepass.py | 9 +++------ youtube_dl/options.py | 12 ++++++------ 4 files changed, 15 insertions(+), 16 deletions(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 9c2c26280..29d8517a3 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -131,7 +131,7 @@ class YoutubeDL(object): username: Username for authentication purposes. password: Password for authentication purposes. videopassword: Password for accessing a video. - ap_mso_id: Adobe Pass Multiple-system operator Identifier. + ap_mso: Adobe Pass Multiple-system operator Identifier. ap_username: TV Provider username for authentication purposes. ap_password: TV Provider password for authentication purposes. usenetrc: Use netrc for authentication instead. diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index cdff3df65..5614ef0fb 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -120,9 +120,9 @@ def _real_main(argv=None): desc += ' (Example: "%s%s:%s" )' % (ie.SEARCH_KEY, random.choice(_COUNTS), random.choice(_SEARCHES)) write_string(desc + '\n', out=sys.stdout) sys.exit(0) - if opts.list_ap_mso_ids: + if opts.ap_mso_list: table = [[mso_id, mso_info['name']] for mso_id, mso_info in MSO_INFO.items()] - write_string('Supported TV Providers:\n' + render_table(['mso id', 'mso name'], table) + '\n', out=sys.stdout) + write_string('Supported TV Providers:\n' + render_table(['mso', 'mso name'], table) + '\n', out=sys.stdout) sys.exit(0) # Conflicting, missing and erroneous options @@ -165,6 +165,8 @@ def _real_main(argv=None): parser.error('max sleep interval must be greater than or equal to min sleep interval') else: opts.max_sleep_interval = opts.sleep_interval + if opts.ap_mso and opts.ap_mso not in MSO_INFO: + parser.error('Unsupported TV Provider, use --ap-mso-list to get a list of supported TV Providers') def parse_retries(retries): if retries in ('inf', 'infinite'): @@ -303,7 +305,7 @@ def _real_main(argv=None): 'password': opts.password, 'twofactor': opts.twofactor, 'videopassword': opts.videopassword, - 'ap_mso_id': opts.ap_mso_id, + 'ap_mso': opts.ap_mso, 'ap_username': opts.ap_username, 'ap_password': opts.ap_password, 'quiet': (opts.quiet or any_getting or any_printing), diff --git a/youtube_dl/extractor/adobepass.py b/youtube_dl/extractor/adobepass.py index 913a817d2..8ef5a96ce 100644 --- a/youtube_dl/extractor/adobepass.py +++ b/youtube_dl/extractor/adobepass.py @@ -72,8 +72,8 @@ class AdobePassIE(InfoExtractor): def raise_mvpd_required(): raise ExtractorError( 'This video is only available for users of participating TV providers. ' - 'Use --ap-mso-id to specify Adobe Pass Multiple-system operator Identifier ' - 'and --netrc to provide account credentials.', expected=True) + 'Use --ap-mso to specify Adobe Pass Multiple-system operator Identifier ' + 'and --ap-username and --ap-password or --netrc to provide account credentials.', expected=True) mvpd_headers = { 'ap_42': 'anonymous', @@ -91,12 +91,9 @@ class AdobePassIE(InfoExtractor): authn_token = None if not authn_token: # TODO add support for other TV Providers - mso_id = self._downloader.params.get('ap_mso_id') + mso_id = self._downloader.params.get('ap_mso') if not mso_id: raise_mvpd_required() - if mso_id not in MSO_INFO: - raise ExtractorError( - 'Unsupported TV Provider, use --list-ap-mso-ids to get a list of supported TV Providers' % mso_id, expected=True) username, password = self._get_login_info('ap_username', 'ap_password', mso_id) if not username or not password: raise_mvpd_required() diff --git a/youtube_dl/options.py b/youtube_dl/options.py index 342ae3be3..46c326b3d 100644 --- a/youtube_dl/options.py +++ b/youtube_dl/options.py @@ -353,20 +353,20 @@ def parseOpts(overrideArguments=None): adobe_pass = optparse.OptionGroup(parser, 'Adobe Pass Options') adobe_pass.add_option( - '--ap-mso-id', - dest='ap_mso_id', metavar='APMSOID', + '--ap-mso', + dest='ap_mso', metavar='MSO', help='Adobe Pass Multiple-system operator Identifier') adobe_pass.add_option( '--ap-username', - dest='ap_username', metavar='APUSERNAME', + dest='ap_username', metavar='USERNAME', help='TV Provider Login with this account ID') adobe_pass.add_option( '--ap-password', - dest='ap_password', metavar='APPASSWORD', + dest='ap_password', metavar='PASSWORD', help='TV Provider Account password. If this option is left out, youtube-dl will ask interactively.') adobe_pass.add_option( - '--list-ap-mso-ids', - action='store_true', dest='list_ap_mso_ids', default=False, + '--ap-mso-list', + action='store_true', dest='ap_mso_list', default=False, help='List all supported TV Providers') video_format = optparse.OptionGroup(parser, 'Video Format Options') From 87148bb7110ed54ef50f0660dfe0a735cdede3ca Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Wed, 14 Sep 2016 20:21:09 +0100 Subject: [PATCH 0148/1571] [adobepass] rename --ap-mso-list option to --ap-list-mso --- youtube_dl/__init__.py | 4 ++-- youtube_dl/options.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 5614ef0fb..1cf3140a0 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -120,7 +120,7 @@ def _real_main(argv=None): desc += ' (Example: "%s%s:%s" )' % (ie.SEARCH_KEY, random.choice(_COUNTS), random.choice(_SEARCHES)) write_string(desc + '\n', out=sys.stdout) sys.exit(0) - if opts.ap_mso_list: + if opts.ap_list_mso: table = [[mso_id, mso_info['name']] for mso_id, mso_info in MSO_INFO.items()] write_string('Supported TV Providers:\n' + render_table(['mso', 'mso name'], table) + '\n', out=sys.stdout) sys.exit(0) @@ -166,7 +166,7 @@ def _real_main(argv=None): else: opts.max_sleep_interval = opts.sleep_interval if opts.ap_mso and opts.ap_mso not in MSO_INFO: - parser.error('Unsupported TV Provider, use --ap-mso-list to get a list of supported TV Providers') + parser.error('Unsupported TV Provider, use --ap-list-mso to get a list of supported TV Providers') def parse_retries(retries): if retries in ('inf', 'infinite'): diff --git a/youtube_dl/options.py b/youtube_dl/options.py index 46c326b3d..b2e863119 100644 --- a/youtube_dl/options.py +++ b/youtube_dl/options.py @@ -365,8 +365,8 @@ def parseOpts(overrideArguments=None): dest='ap_password', metavar='PASSWORD', help='TV Provider Account password. If this option is left out, youtube-dl will ask interactively.') adobe_pass.add_option( - '--ap-mso-list', - action='store_true', dest='ap_mso_list', default=False, + '--ap-list-mso', + action='store_true', dest='ap_list_mso', default=False, help='List all supported TV Providers') video_format = optparse.OptionGroup(parser, 'Video Format Options') From c035dba19e815eca4a21f17918e96c2e2bd55d6b Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Thu, 15 Sep 2016 08:12:12 +0100 Subject: [PATCH 0149/1571] [bellmedia] add support for more sites --- youtube_dl/extractor/{ctv.py => bellmedia.py} | 39 ++++++++++++++++--- youtube_dl/extractor/extractors.py | 2 +- 2 files changed, 35 insertions(+), 6 deletions(-) rename youtube_dl/extractor/{ctv.py => bellmedia.py} (54%) diff --git a/youtube_dl/extractor/ctv.py b/youtube_dl/extractor/bellmedia.py similarity index 54% rename from youtube_dl/extractor/ctv.py rename to youtube_dl/extractor/bellmedia.py index a1fe86316..32326ed9e 100644 --- a/youtube_dl/extractor/ctv.py +++ b/youtube_dl/extractor/bellmedia.py @@ -6,8 +6,25 @@ import re from .common import InfoExtractor -class CTVIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?(?P<domain>ctv|tsn|bnn|thecomedynetwork)\.ca/.*?(?:\bvid=|-vid|~|%7E)(?P<id>[0-9.]+)' +class BellMediaIE(InfoExtractor): + _VALID_URL = r'''(?x)https?://(?:www\.)? + (?P<domain> + (?: + ctv| + tsn| + bnn| + thecomedynetwork| + discovery| + discoveryvelocity| + sciencechannel| + investigationdiscovery| + animalplanet| + bravo| + mtv| + space + )\.ca| + much\.com + )/.*?(?:\bvid=|-vid|~|%7E|/(?:episode)?)(?P<id>[0-9]{6})''' _TESTS = [{ 'url': 'http://www.ctv.ca/video/player?vid=706966', 'md5': 'ff2ebbeae0aa2dcc32a830c3fd69b7b0', @@ -32,15 +49,27 @@ class CTVIE(InfoExtractor): }, { 'url': 'http://www.ctv.ca/YourMorning/Video/S1E6-Monday-August-29-2016-vid938009', 'only_matching': True, + }, { + 'url': 'http://www.much.com/shows/atmidnight/episode948007/tuesday-september-13-2016', + 'only_matching': True, + }, { + 'url': 'http://www.much.com/shows/the-almost-impossible-gameshow/928979/episode-6', + 'only_matching': True, }] + _DOMAINS = { + 'thecomedynetwork': 'comedy', + 'discoveryvelocity': 'discvel', + 'sciencechannel': 'discsci', + 'investigationdiscovery': 'invdisc', + 'animalplanet': 'aniplan', + } def _real_extract(self, url): domain, video_id = re.match(self._VALID_URL, url).groups() - if domain == 'thecomedynetwork': - domain = 'comedy' + domain = domain.split('.')[0] return { '_type': 'url_transparent', 'id': video_id, - 'url': '9c9media:%s_web:%s' % (domain, video_id), + 'url': '9c9media:%s_web:%s' % (self._DOMAINS.get(domain, domain), video_id), 'ie_key': 'NineCNineMedia', } diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 522691de1..dd0579425 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -93,6 +93,7 @@ from .bbc import ( ) from .beeg import BeegIE from .behindkink import BehindKinkIE +from .bellmedia import BellMediaIE from .beatportpro import BeatportProIE from .bet import BetIE from .bigflix import BigflixIE @@ -195,7 +196,6 @@ from .crunchyroll import ( ) from .cspan import CSpanIE from .ctsnews import CtsNewsIE -from .ctv import CTVIE from .ctvnews import CTVNewsIE from .cultureunplugged import CultureUnpluggedIE from .curiositystream import ( From 95be29e1c6b7a06ac444d5142582ebece79698ff Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 15 Sep 2016 20:58:02 +0700 Subject: [PATCH 0150/1571] [twitch] Fix api calls (Closes #10654, closes #10660) --- youtube_dl/extractor/twitch.py | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/youtube_dl/extractor/twitch.py b/youtube_dl/extractor/twitch.py index 359a8859c..af6d890b0 100644 --- a/youtube_dl/extractor/twitch.py +++ b/youtube_dl/extractor/twitch.py @@ -32,6 +32,7 @@ class TwitchBaseIE(InfoExtractor): _API_BASE = 'https://api.twitch.tv' _USHER_BASE = 'https://usher.ttvnw.net' _LOGIN_URL = 'http://www.twitch.tv/login' + _CLIENT_ID = 'jzkbprff40iqj646a697cyrvl0zt2m6' _NETRC_MACHINE = 'twitch' def _handle_error(self, response): @@ -44,15 +45,9 @@ class TwitchBaseIE(InfoExtractor): expected=True) def _call_api(self, path, item_id, note): - headers = { - 'Referer': 'http://api.twitch.tv/crossdomain/receiver.html?v=2', - 'X-Requested-With': 'XMLHttpRequest', - } - for cookie in self._downloader.cookiejar: - if cookie.name == 'api_token': - headers['Twitch-Api-Token'] = cookie.value response = self._download_json( - '%s/%s' % (self._API_BASE, path), item_id, note) + '%s/%s' % (self._API_BASE, path), item_id, note, + headers={'Client-ID': self._CLIENT_ID}) self._handle_error(response) return response From eb5b1fc0211e89f386c4f5563cc1d5d4edeb3c55 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 15 Sep 2016 21:53:35 +0700 Subject: [PATCH 0151/1571] [crunchyroll] Fix authentication (Closes #10655) --- youtube_dl/extractor/crunchyroll.py | 47 +++++++++++++++++++++++------ 1 file changed, 38 insertions(+), 9 deletions(-) diff --git a/youtube_dl/extractor/crunchyroll.py b/youtube_dl/extractor/crunchyroll.py index 6d3abb52f..1b69bd0b6 100644 --- a/youtube_dl/extractor/crunchyroll.py +++ b/youtube_dl/extractor/crunchyroll.py @@ -34,22 +34,51 @@ from ..aes import ( class CrunchyrollBaseIE(InfoExtractor): + _LOGIN_URL = 'https://www.crunchyroll.com/login' + _LOGIN_FORM = 'login_form' _NETRC_MACHINE = 'crunchyroll' def _login(self): (username, password) = self._get_login_info() if username is None: return - self.report_login() - login_url = 'https://www.crunchyroll.com/?a=formhandler' - data = urlencode_postdata({ - 'formname': 'RpcApiUser_Login', - 'name': username, - 'password': password, + + login_page = self._download_webpage( + self._LOGIN_URL, None, 'Downloading login page') + + login_form_str = self._search_regex( + r'(?P<form><form[^>]+?id=(["\'])%s\2[^>]*>)' % self._LOGIN_FORM, + login_page, 'login form', group='form') + + post_url = extract_attributes(login_form_str).get('action') + if not post_url: + post_url = self._LOGIN_URL + elif not post_url.startswith('http'): + post_url = compat_urlparse.urljoin(self._LOGIN_URL, post_url) + + login_form = self._form_hidden_inputs(self._LOGIN_FORM, login_page) + + login_form.update({ + 'login_form[name]': username, + 'login_form[password]': password, }) - login_request = sanitized_Request(login_url, data) - login_request.add_header('Content-Type', 'application/x-www-form-urlencoded') - self._download_webpage(login_request, None, False, 'Wrong login info') + + response = self._download_webpage( + post_url, None, 'Logging in', 'Wrong login info', + data=urlencode_postdata(login_form), + headers={'Content-Type': 'application/x-www-form-urlencoded'}) + + # Successful login + if '<title>Redirecting' in response: + return + + error = self._html_search_regex( + '(?s)<ul[^>]+class=["\']messages["\'][^>]*>(.+?)</ul>', + response, 'error message', default=None) + if error: + raise ExtractorError('Unable to login: %s' % error, expected=True) + + raise ExtractorError('Unable to log in') def _real_initialize(self): self._login() From c8498368549048a578d5f30773aaa9760454983c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 15 Sep 2016 21:54:48 +0700 Subject: [PATCH 0152/1571] [utils] Improve _hidden_inputs --- youtube_dl/extractor/common.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index ff19270ae..e413799f9 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -888,16 +888,16 @@ class InfoExtractor(object): def _hidden_inputs(html): html = re.sub(r'<!--(?:(?!<!--).)*-->', '', html) hidden_inputs = {} - for input in re.findall(r'(?i)<input([^>]+)>', html): - if not re.search(r'type=(["\'])(?:hidden|submit)\1', input): + for input in re.findall(r'(?i)(<input[^>]+>)', html): + attrs = extract_attributes(input) + if not input: continue - name = re.search(r'(?:name|id)=(["\'])(?P<value>.+?)\1', input) - if not name: + if attrs.get('type') not in ('hidden', 'submit'): continue - value = re.search(r'value=(["\'])(?P<value>.*?)\1', input) - if not value: - continue - hidden_inputs[name.group('value')] = value.group('value') + name = attrs.get('name') or attrs.get('id') + value = attrs.get('value') + if name and value is not None: + hidden_inputs[name] = value return hidden_inputs def _form_hidden_inputs(self, form_id, html): From 537f753399ed9fd07fcb9285a2a3330010394c85 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 15 Sep 2016 22:17:17 +0700 Subject: [PATCH 0153/1571] [options] Improve Adobe Pass wording --- youtube_dl/options.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/youtube_dl/options.py b/youtube_dl/options.py index b2e863119..100d21310 100644 --- a/youtube_dl/options.py +++ b/youtube_dl/options.py @@ -355,19 +355,19 @@ def parseOpts(overrideArguments=None): adobe_pass.add_option( '--ap-mso', dest='ap_mso', metavar='MSO', - help='Adobe Pass Multiple-system operator Identifier') + help='Adobe Pass multiple-system operator (TV provider) identifier, use --ap-list-mso for a list of available MSOs') adobe_pass.add_option( '--ap-username', dest='ap_username', metavar='USERNAME', - help='TV Provider Login with this account ID') + help='Multiple-system operator account login') adobe_pass.add_option( '--ap-password', dest='ap_password', metavar='PASSWORD', - help='TV Provider Account password. If this option is left out, youtube-dl will ask interactively.') + help='Multiple-system operator account password. If this option is left out, youtube-dl will ask interactively.') adobe_pass.add_option( '--ap-list-mso', action='store_true', dest='ap_list_mso', default=False, - help='List all supported TV Providers') + help='List all supported multiple-system operators') video_format = optparse.OptionGroup(parser, 'Video Format Options') video_format.add_option( From d2522b86ac7d1eff1f00e21bcd976a2616b6a6d2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 15 Sep 2016 22:18:31 +0700 Subject: [PATCH 0154/1571] [options] Actually print Adobe Pass options sections in --help --- youtube_dl/options.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/options.py b/youtube_dl/options.py index 100d21310..53497fbc6 100644 --- a/youtube_dl/options.py +++ b/youtube_dl/options.py @@ -831,6 +831,7 @@ def parseOpts(overrideArguments=None): parser.add_option_group(video_format) parser.add_option_group(subtitles) parser.add_option_group(authentication) + parser.add_option_group(adobe_pass) parser.add_option_group(postproc) if overrideArguments is not None: From 1da50aa34e9fa0fd927de8197dcf2884551dd800 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 15 Sep 2016 22:24:55 +0700 Subject: [PATCH 0155/1571] [YoutubeDL] Improve Adobe Pass options' wording --- youtube_dl/YoutubeDL.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 29d8517a3..442aa663b 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -131,9 +131,9 @@ class YoutubeDL(object): username: Username for authentication purposes. password: Password for authentication purposes. videopassword: Password for accessing a video. - ap_mso: Adobe Pass Multiple-system operator Identifier. - ap_username: TV Provider username for authentication purposes. - ap_password: TV Provider password for authentication purposes. + ap_mso: Adobe Pass multiple-system operator identifier. + ap_username: Multiple-system operator account username. + ap_password: Multiple-system operator account password. usenetrc: Use netrc for authentication instead. verbose: Print additional info to stdout. quiet: Do not print messages to stdout. From 2133565cec3646680600d314b93e535f6fa52339 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 15 Sep 2016 22:26:37 +0700 Subject: [PATCH 0156/1571] [extractor/common] Simplify _get_login_info --- youtube_dl/extractor/common.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index e413799f9..9627816b4 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -689,8 +689,6 @@ class InfoExtractor(object): if self._downloader is None: return (None, None) - username = None - password = None downloader_params = self._downloader.params # Attempt to use provided username and password or .netrc data @@ -700,7 +698,7 @@ class InfoExtractor(object): else: username, password = self._get_netrc_login_info(netrc_machine) - return (username, password) + return username, password def _get_tfa_info(self, note='two-factor verification code'): """ From 32443dd346594d64b579af714f4828287492c464 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 15 Sep 2016 22:34:29 +0700 Subject: [PATCH 0157/1571] [extractor/common] Update _get_login_info's comment --- youtube_dl/extractor/common.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 9627816b4..95ea3fca5 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -683,7 +683,10 @@ class InfoExtractor(object): def _get_login_info(self, username_option='username', password_option='password', netrc_machine=None): """ Get the login info as (username, password) - It will look in the netrc file using the _NETRC_MACHINE value + First look for the manually specified credentials using username_option + and password_option as keys in params dictionary. If no such credentials + available look in the netrc file using the netrc_machine or _NETRC_MACHINE + value. If there's no info available, return (None, None) """ if self._downloader is None: From dcce092e0aa92799f1e3a51ce5aae611af4d70d5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 15 Sep 2016 22:35:12 +0700 Subject: [PATCH 0158/1571] [extractor/common] Simplify _get_netrc_login_info and carry long lines --- youtube_dl/extractor/common.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 95ea3fca5..4f738b9fc 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -674,11 +674,13 @@ class InfoExtractor(object): username = info[0] password = info[2] else: - raise netrc.NetrcParseError('No authenticators for %s' % netrc_machine) + raise netrc.NetrcParseError( + 'No authenticators for %s' % netrc_machine) except (IOError, netrc.NetrcParseError) as err: - self._downloader.report_warning('parsing .netrc: %s' % error_to_compat_str(err)) + self._downloader.report_warning( + 'parsing .netrc: %s' % error_to_compat_str(err)) - return (username, password) + return username, password def _get_login_info(self, username_option='username', password_option='password', netrc_machine=None): """ From 1dec2c8a0e00e8ed53ddd030347ce9225df9964e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 15 Sep 2016 22:47:45 +0700 Subject: [PATCH 0159/1571] [adobepass] Change mvpd cache section name In order to better emphasize it's relation to Adobe Pass --- youtube_dl/extractor/adobepass.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/adobepass.py b/youtube_dl/extractor/adobepass.py index 8ef5a96ce..01932e5e6 100644 --- a/youtube_dl/extractor/adobepass.py +++ b/youtube_dl/extractor/adobepass.py @@ -32,6 +32,7 @@ MSO_INFO = { class AdobePassIE(InfoExtractor): _SERVICE_PROVIDER_TEMPLATE = 'https://sp.auth.adobe.com/adobe-services/%s' _USER_AGENT = 'Mozilla/5.0 (X11; Linux i686; rv:47.0) Gecko/20100101 Firefox/47.0' + _MVPD_CACHE = 'ap-mvpd' @staticmethod def _get_mvpd_resource(provider_id, title, guid, rating): @@ -85,7 +86,7 @@ class AdobePassIE(InfoExtractor): guid = xml_text(resource, 'guid') count = 0 while count < 2: - requestor_info = self._downloader.cache.load('mvpd', requestor_id) or {} + requestor_info = self._downloader.cache.load(self._MVPD_CACHE, requestor_id) or {} authn_token = requestor_info.get('authn_token') if authn_token and is_expired(authn_token, 'simpleTokenExpires'): authn_token = None @@ -125,12 +126,12 @@ class AdobePassIE(InfoExtractor): 'requestor_id': requestor_id, }), headers=mvpd_headers) if '<pendingLogout' in session: - self._downloader.cache.store('mvpd', requestor_id, {}) + self._downloader.cache.store(self._MVPD_CACHE, requestor_id, {}) count += 1 continue authn_token = unescapeHTML(xml_text(session, 'authnToken')) requestor_info['authn_token'] = authn_token - self._downloader.cache.store('mvpd', requestor_id, requestor_info) + self._downloader.cache.store(self._MVPD_CACHE, requestor_id, requestor_info) authz_token = requestor_info.get(guid) if authz_token and is_expired(authz_token, 'simpleTokenTTL'): @@ -146,12 +147,12 @@ class AdobePassIE(InfoExtractor): 'userMeta': '1', }), headers=mvpd_headers) if '<pendingLogout' in authorize: - self._downloader.cache.store('mvpd', requestor_id, {}) + self._downloader.cache.store(self._MVPD_CACHE, requestor_id, {}) count += 1 continue authz_token = unescapeHTML(xml_text(authorize, 'authzToken')) requestor_info[guid] = authz_token - self._downloader.cache.store('mvpd', requestor_id, requestor_info) + self._downloader.cache.store(self._MVPD_CACHE, requestor_id, requestor_info) mvpd_headers.update({ 'ap_19': xml_text(authn_token, 'simpleSamlNameID'), @@ -167,7 +168,7 @@ class AdobePassIE(InfoExtractor): 'hashed_guid': 'false', }), headers=mvpd_headers) if '<pendingLogout' in short_authorize: - self._downloader.cache.store('mvpd', requestor_id, {}) + self._downloader.cache.store(self._MVPD_CACHE, requestor_id, {}) count += 1 continue return short_authorize From 490b755769a364ca0624390453e36321d5182d3e Mon Sep 17 00:00:00 2001 From: stepshal <nessento@openmailbox.org> Date: Wed, 14 Sep 2016 23:03:26 +0700 Subject: [PATCH 0160/1571] Improve some id regexes --- youtube_dl/extractor/canvas.py | 2 +- youtube_dl/extractor/nfl.py | 2 +- youtube_dl/extractor/npo.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/canvas.py b/youtube_dl/extractor/canvas.py index ef0691dcd..d183d5d52 100644 --- a/youtube_dl/extractor/canvas.py +++ b/youtube_dl/extractor/canvas.py @@ -71,7 +71,7 @@ class CanvasIE(InfoExtractor): webpage)).strip() video_id = self._html_search_regex( - r'data-video=(["\'])(?P<id>.+?)\1', webpage, 'video id', group='id') + r'data-video=(["\'])(?P<id>(?:(?!\1).)+)\1', webpage, 'video id', group='id') data = self._download_json( 'https://mediazone.vrt.be/api/v1/%s/assets/%s' diff --git a/youtube_dl/extractor/nfl.py b/youtube_dl/extractor/nfl.py index 200874d68..3930d16f1 100644 --- a/youtube_dl/extractor/nfl.py +++ b/youtube_dl/extractor/nfl.py @@ -165,7 +165,7 @@ class NFLIE(InfoExtractor): group='config')) # For articles, the id in the url is not the video id video_id = self._search_regex( - r'(?:<nflcs:avplayer[^>]+data-content[Ii]d\s*=\s*|content[Ii]d\s*:\s*)(["\'])(?P<id>.+?)\1', + r'(?:<nflcs:avplayer[^>]+data-content[Ii]d\s*=\s*|content[Ii]d\s*:\s*)(["\'])(?P<id>(?:(?!\1).)+)\1', webpage, 'video id', default=video_id, group='id') config = self._download_json(config_url, video_id, 'Downloading player config') url_template = NFLIE.prepend_host( diff --git a/youtube_dl/extractor/npo.py b/youtube_dl/extractor/npo.py index 87f5675c7..3293bdb17 100644 --- a/youtube_dl/extractor/npo.py +++ b/youtube_dl/extractor/npo.py @@ -429,7 +429,7 @@ class SchoolTVIE(InfoExtractor): display_id = self._match_id(url) webpage = self._download_webpage(url, display_id) video_id = self._search_regex( - r'data-mid=(["\'])(?P<id>.+?)\1', webpage, 'video_id', group='id') + r'data-mid=(["\'])(?P<id>(?:(?!\1).)+)\1', webpage, 'video_id', group='id') return { '_type': 'url_transparent', 'ie_key': 'NPO', From e6bf3621e703a7cd0d62736a1765b0ccff5adfe6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 15 Sep 2016 23:31:16 +0700 Subject: [PATCH 0161/1571] [ChangeLog] Actualize --- ChangeLog | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/ChangeLog b/ChangeLog index c3c8bf037..cd1f2fdf1 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,8 +1,26 @@ version <unreleased> +Core +* Improve _hidden_inputs ++ Introduce improved explicit Adobe Pass support ++ Add --ap-mso to provide multiple-system operator identifier ++ Add --ap-username to provide MSO account username ++ Add --ap-password to provide MSO account password ++ Add --ap-list-mso to list all supported MSOs ++ Add support for Rogers Cable multiple-system operator (#10606) + Extractors -* [kwuo] Improve error detection (#10650) +* [crunchyroll] Fix authentication (#10655) +* [twitch] Fix API calls (#10654, #10660) ++ [bellmedia] Add support for more Bell Media Television sites +* [franceinter] Fix extraction (#10538, #2105) +* [kuwo] Improve error detection (#10650) ++ [go] Add support for free full episodes (#10439) * [bilibili] Fix extraction for specific videos (#10647) +* [nhk] Fix extraction (#10633) +* [kaltura] Improve audio detection +* [kaltura] Skip chun format ++ [vimeo:ondemand] Pass Referer along with embed URL (#10624) + [nbc] Add support for NBC Olympics (#10361) From f5e008d134f5e69920829cfd7a5ce5ae57d275c1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 15 Sep 2016 23:46:11 +0700 Subject: [PATCH 0162/1571] release 2016.09.15 --- .github/ISSUE_TEMPLATE.md | 8 ++++---- ChangeLog | 2 +- README.md | 11 +++++++++++ docs/supportedsites.md | 3 ++- youtube_dl/version.py | 2 +- 5 files changed, 19 insertions(+), 7 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index e87fed573..61cea757c 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.09.11.1*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.09.11.1** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.09.15*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.09.15** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v <your command line> [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2016.09.11.1 +[debug] youtube-dl version 2016.09.15 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} @@ -55,4 +55,4 @@ $ youtube-dl -v <your command line> ### Description of your *issue*, suggested solution and other information Explanation of your *issue* in arbitrary form goes here. Please make sure the [description is worded well enough to be understood](https://github.com/rg3/youtube-dl#is-the-description-of-the-issue-itself-sufficient). Provide as much context and examples as possible. -If work on your *issue* required an account credentials please provide them or explain how one can obtain them. +If work on your *issue* requires account credentials please provide them or explain how one can obtain them. diff --git a/ChangeLog b/ChangeLog index cd1f2fdf1..4583537ac 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version <unreleased> +version 2016.09.15 Core * Improve _hidden_inputs diff --git a/README.md b/README.md index 7543f81ac..4debe15fe 100644 --- a/README.md +++ b/README.md @@ -358,6 +358,17 @@ which means you can modify it, redistribute it or use it however you like. -n, --netrc Use .netrc authentication data --video-password PASSWORD Video password (vimeo, smotri, youku) +## Adobe Pass Options: + --ap-mso MSO Adobe Pass multiple-system operator (TV + provider) identifier, use --ap-list-mso for + a list of available MSOs + --ap-username USERNAME Multiple-system operator account login + --ap-password PASSWORD Multiple-system operator account password. + If this option is left out, youtube-dl will + ask interactively. + --ap-list-mso List all supported multiple-system + operators + ## Post-processing Options: -x, --extract-audio Convert video files to audio-only files (requires ffmpeg or avconv and ffprobe or diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 7a7b268d3..fcb618561 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -89,6 +89,7 @@ - **BeatportPro** - **Beeg** - **BehindKink** + - **BellMedia** - **Bet** - **Bigflix** - **Bild**: Bild.de @@ -169,7 +170,6 @@ - **CSNNE** - **CSpan**: C-SPAN - **CtsNews**: 華視新聞 - - **CTV** - **CTVNews** - **culturebox.francetvinfo.fr** - **CultureUnplugged** @@ -445,6 +445,7 @@ - **NBA** - **NBC** - **NBCNews** + - **NBCOlympics** - **NBCSports** - **NBCSportsVPlayer** - **ndr**: NDR.de - Norddeutscher Rundfunk diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 903aede58..081fd6ef0 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2016.09.11.1' +__version__ = '2016.09.15' From 9d8985a165ebdc9fd8d72e7536253c42162b58a6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 16 Sep 2016 00:54:34 +0700 Subject: [PATCH 0163/1571] [tv4] Fix hls and hds formats (Closes #10659) --- youtube_dl/extractor/tv4.py | 49 ++++++++++++++++++++++++------------- 1 file changed, 32 insertions(+), 17 deletions(-) diff --git a/youtube_dl/extractor/tv4.py b/youtube_dl/extractor/tv4.py index 343edf206..5d2d8f132 100644 --- a/youtube_dl/extractor/tv4.py +++ b/youtube_dl/extractor/tv4.py @@ -2,9 +2,13 @@ from __future__ import unicode_literals from .common import InfoExtractor +from ..compat import compat_str from ..utils import ( ExtractorError, + int_or_none, parse_iso8601, + try_get, + update_url_query, ) @@ -65,36 +69,47 @@ class TV4IE(InfoExtractor): video_id = self._match_id(url) info = self._download_json( - 'http://www.tv4play.se/player/assets/%s.json' % video_id, video_id, 'Downloading video info JSON') + 'http://www.tv4play.se/player/assets/%s.json' % video_id, + video_id, 'Downloading video info JSON') # If is_geo_restricted is true, it doesn't necessarily mean we can't download it - if info['is_geo_restricted']: + if info.get('is_geo_restricted'): self.report_warning('This content might not be available in your country due to licensing restrictions.') - if info['requires_subscription']: + if info.get('requires_subscription'): raise ExtractorError('This content requires subscription.', expected=True) - sources_data = self._download_json( - 'https://prima.tv4play.se/api/web/asset/%s/play.json?protocol=http&videoFormat=MP4' % video_id, video_id, 'Downloading sources JSON') - sources = sources_data['playback'] + title = info['title'] formats = [] - for item in sources.get('items', {}).get('item', []): - ext, bitrate = item['mediaFormat'], item['bitrate'] - formats.append({ - 'format_id': '%s_%s' % (ext, bitrate), - 'tbr': bitrate, - 'ext': ext, - 'url': item['url'], - }) + # http formats are linked with unresolvable host + for kind in ('hls', ''): + data = self._download_json( + 'https://prima.tv4play.se/api/web/asset/%s/play.json' % video_id, + video_id, 'Downloading sources JSON', query={ + 'protocol': kind, + 'videoFormat': 'MP4+WEBVTTS+WEBVTT', + }) + item = try_get(data, lambda x: x['playback']['items']['item'], dict) + manifest_url = item.get('url') + if not isinstance(manifest_url, compat_str): + continue + if kind == 'hls': + formats.extend(self._extract_m3u8_formats( + manifest_url, video_id, 'mp4', entry_protocol='m3u8_native', + m3u8_id=kind, fatal=False)) + else: + formats.extend(self._extract_f4m_formats( + update_url_query(manifest_url, {'hdcore': '3.8.0'}), + video_id, f4m_id='hds', fatal=False)) self._sort_formats(formats) return { 'id': video_id, - 'title': info['title'], + 'title': title, 'formats': formats, 'description': info.get('description'), 'timestamp': parse_iso8601(info.get('broadcast_date_time')), - 'duration': info.get('duration'), + 'duration': int_or_none(info.get('duration')), 'thumbnail': info.get('image'), - 'is_live': sources.get('live'), + 'is_live': info.get('is_live') is True, } From 52dc8a9b3f1af7abda6652a75b906d70809c475d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 16 Sep 2016 22:02:59 +0700 Subject: [PATCH 0164/1571] [franceinter] Fix upload date extraction --- youtube_dl/extractor/franceinter.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/franceinter.py b/youtube_dl/extractor/franceinter.py index 0d58f89c5..1a1232ade 100644 --- a/youtube_dl/extractor/franceinter.py +++ b/youtube_dl/extractor/franceinter.py @@ -10,14 +10,14 @@ class FranceInterIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?franceinter\.fr/emissions/(?P<id>[^?#]+)' _TEST = { - 'url': 'https://www.franceinter.fr/emissions/la-marche-de-l-histoire/la-marche-de-l-histoire-18-decembre-2013', - 'md5': '4764932e466e6f6c79c317d2e74f6884', + 'url': 'https://www.franceinter.fr/emissions/la-tete-au-carre/la-tete-au-carre-14-septembre-2016', + 'md5': '4e3aeb58fe0e83d7b0581fa213c409d0', 'info_dict': { - 'id': 'la-marche-de-l-histoire/la-marche-de-l-histoire-18-decembre-2013', + 'id': 'la-tete-au-carre/la-tete-au-carre-14-septembre-2016', 'ext': 'mp3', - 'title': 'L’Histoire dans les jeux vidéo du 18 décembre 2013 - France Inter', - 'description': 'md5:7f2ce449894d1e585932273080fb410d', - 'upload_date': '20131218', + 'title': 'Et si les rêves pouvaient nous aider à agir dans notre vie quotidienne ?', + 'description': 'md5:a245dd62cf5bf51de915f8d9956d180a', + 'upload_date': '20160914', }, } @@ -39,7 +39,7 @@ class FranceInterIE(InfoExtractor): if upload_date_str: upload_date_list = upload_date_str.split() upload_date_list.reverse() - upload_date_list[1] = compat_str(month_by_name(upload_date_list[1], lang='fr')) + upload_date_list[1] = '%02d' % (month_by_name(upload_date_list[1], lang='fr') or 0) upload_date = ''.join(upload_date_list) else: upload_date = None From 98b7506e96b5ac107a777d8bb8900623d832fba4 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Fri, 16 Sep 2016 17:36:22 +0100 Subject: [PATCH 0165/1571] [toutv] add support for authentication(closes #10669) --- youtube_dl/extractor/radiocanada.py | 55 ++++++++++++++++---------- youtube_dl/extractor/toutv.py | 60 ++++++++++++++++++++++++++++- 2 files changed, 92 insertions(+), 23 deletions(-) diff --git a/youtube_dl/extractor/radiocanada.py b/youtube_dl/extractor/radiocanada.py index 8ec402646..6751270ee 100644 --- a/youtube_dl/extractor/radiocanada.py +++ b/youtube_dl/extractor/radiocanada.py @@ -13,6 +13,7 @@ from ..utils import ( xpath_element, ExtractorError, determine_protocol, + unsmuggle_url, ) @@ -35,28 +36,51 @@ class RadioCanadaIE(InfoExtractor): } def _real_extract(self, url): + url, smuggled_data = unsmuggle_url(url, {}) app_code, video_id = re.match(self._VALID_URL, url).groups() - device_types = ['ipad', 'android'] + metadata = self._download_xml( + 'http://api.radio-canada.ca/metaMedia/v1/index.ashx', + video_id, note='Downloading metadata XML', query={ + 'appCode': app_code, + 'idMedia': video_id, + }) + + def get_meta(name): + el = find_xpath_attr(metadata, './/Meta', 'name', name) + return el.text if el is not None else None + + if get_meta('protectionType'): + raise ExtractorError('This video is DRM protected.', expected=True) + + device_types = ['ipad'] if app_code != 'toutv': device_types.append('flash') + if not smuggled_data: + device_types.append('android') formats = [] # TODO: extract f4m formats # f4m formats can be extracted using flashhd device_type but they produce unplayable file for device_type in device_types: - v_data = self._download_xml( - 'http://api.radio-canada.ca/validationMedia/v1/Validation.ashx', - video_id, note='Downloading %s XML' % device_type, query={ - 'appCode': app_code, - 'idMedia': video_id, - 'connectionType': 'broadband', - 'multibitrate': 'true', - 'deviceType': device_type, + validation_url = 'http://api.radio-canada.ca/validationMedia/v1/Validation.ashx' + query = { + 'appCode': app_code, + 'idMedia': video_id, + 'connectionType': 'broadband', + 'multibitrate': 'true', + 'deviceType': device_type, + } + if smuggled_data: + validation_url = 'https://services.radio-canada.ca/media/validation/v2/' + query.update(smuggled_data) + else: + query.update({ # paysJ391wsHjbOJwvCs26toz and bypasslock are used to bypass geo-restriction 'paysJ391wsHjbOJwvCs26toz': 'CA', 'bypasslock': 'NZt5K62gRqfc', - }, fatal=False) + }) + v_data = self._download_xml(validation_url, video_id, note='Downloading %s XML' % device_type, query=query, fatal=False) v_url = xpath_text(v_data, 'url') if not v_url: continue @@ -101,17 +125,6 @@ class RadioCanadaIE(InfoExtractor): f4m_id='hds', fatal=False)) self._sort_formats(formats) - metadata = self._download_xml( - 'http://api.radio-canada.ca/metaMedia/v1/index.ashx', - video_id, note='Downloading metadata XML', query={ - 'appCode': app_code, - 'idMedia': video_id, - }) - - def get_meta(name): - el = find_xpath_attr(metadata, './/Meta', 'name', name) - return el.text if el is not None else None - return { 'id': video_id, 'title': get_meta('Title'), diff --git a/youtube_dl/extractor/toutv.py b/youtube_dl/extractor/toutv.py index 54c2d0aa6..d2d5c1171 100644 --- a/youtube_dl/extractor/toutv.py +++ b/youtube_dl/extractor/toutv.py @@ -2,12 +2,22 @@ from __future__ import unicode_literals from .common import InfoExtractor -from ..utils import int_or_none +from ..utils import ( + int_or_none, + js_to_json, + ExtractorError, + urlencode_postdata, + extract_attributes, + smuggle_url, +) class TouTvIE(InfoExtractor): + _NETRC_MACHINE = 'toutv' IE_NAME = 'tou.tv' _VALID_URL = r'https?://ici\.tou\.tv/(?P<id>[a-zA-Z0-9_-]+/S[0-9]+E[0-9]+)' + _access_token = None + _claims = None _TEST = { 'url': 'http://ici.tou.tv/garfield-tout-court/S2015E17', @@ -22,18 +32,64 @@ class TouTvIE(InfoExtractor): # m3u8 download 'skip_download': True, }, + 'skip': '404 Not Found', } + def _real_initialize(self): + email, password = self._get_login_info() + if email is None: + return + state = 'http://ici.tou.tv//' + webpage = self._download_webpage(state, None, 'Downloading homepage') + toutvlogin = self._parse_json(self._search_regex( + r'(?s)toutvlogin\s*=\s*({.+?});', webpage, 'toutvlogin'), None, js_to_json) + authorize_url = toutvlogin['host'] + '/auth/oauth/v2/authorize' + login_webpage = self._download_webpage( + authorize_url, None, 'Downloading login page', query={ + 'client_id': toutvlogin['clientId'], + 'redirect_uri': 'https://ici.tou.tv/login/loginCallback', + 'response_type': 'token', + 'scope': 'media-drmt openid profile email id.write media-validation.read.privileged', + 'state': state, + }) + login_form = self._search_regex( + r'(?s)(<form[^>]+id="Form-login".+?</form>)', login_webpage, 'login form') + form_data = self._hidden_inputs(login_form) + form_data.update({ + 'login-email': email, + 'login-password': password, + }) + post_url = extract_attributes(login_form).get('action') or authorize_url + _, urlh = self._download_webpage_handle( + post_url, None, 'Logging in', data=urlencode_postdata(form_data)) + self._access_token = self._search_regex( + r'access_token=([\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})', + urlh.geturl(), 'access token') + self._claims = self._download_json( + 'https://services.radio-canada.ca/media/validation/v2/getClaims', + None, 'Extracting Claims', query={ + 'token': self._access_token, + 'access_token': self._access_token, + })['claims'] + def _real_extract(self, url): path = self._match_id(url) metadata = self._download_json('http://ici.tou.tv/presentation/%s' % path, path) + if metadata.get('IsDrm'): + raise ExtractorError('This video is DRM protected.', expected=True) video_id = metadata['IdMedia'] details = metadata['Details'] title = details['OriginalTitle'] + video_url = 'radiocanada:%s:%s' % (metadata.get('AppCode', 'toutv'), video_id) + if self._access_token and self._claims: + video_url = smuggle_url(video_url, { + 'access_token': self._access_token, + 'claims': self._claims, + }) return { '_type': 'url_transparent', - 'url': 'radiocanada:%s:%s' % (metadata.get('AppCode', 'toutv'), video_id), + 'url': video_url, 'id': video_id, 'title': title, 'thumbnail': details.get('ImageUrl'), From 6ad0219556cefe60239027633193cc9f1dc9fb1d Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Fri, 16 Sep 2016 19:30:38 +0100 Subject: [PATCH 0166/1571] [common] add helper method for Wowza Streaming Engine format extraction --- youtube_dl/extractor/common.py | 43 +++++++++++++++++++++++++++++ youtube_dl/extractor/vier.py | 4 +-- youtube_dl/extractor/vodplatform.py | 25 ++--------------- youtube_dl/extractor/vrt.py | 40 ++++++--------------------- 4 files changed, 55 insertions(+), 57 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 4f738b9fc..c00023458 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1795,6 +1795,49 @@ class InfoExtractor(object): m3u8_id='hls', fatal=False)) return formats + def _extract_wowza_formats(self, url, video_id, m3u8_entry_protocol='m3u8_native', skip_protocols=[]): + url = re.sub(r'/(?:manifest|playlist|jwplayer)\.(?:m3u8|f4m|mpd|smil)', '', url) + url_base = self._search_regex(r'(?:https?|rtmp|rtsp)(://[^?]+)', url, 'format url') + http_base_url = 'http' + url_base + formats = [] + if 'm3u8' not in skip_protocols: + formats.extend(self._extract_m3u8_formats( + http_base_url + '/playlist.m3u8', video_id, 'mp4', + m3u8_entry_protocol, m3u8_id='hls', fatal=False)) + if 'f4m' not in skip_protocols: + formats.extend(self._extract_f4m_formats( + http_base_url + '/manifest.f4m', + video_id, f4m_id='hds', fatal=False)) + if re.search(r'(?:/smil:|\.smil)', url_base): + if 'dash' not in skip_protocols: + formats.extend(self._extract_mpd_formats( + http_base_url + '/manifest.mpd', + video_id, mpd_id='dash', fatal=False)) + if 'smil' not in skip_protocols: + rtmp_formats = self._extract_smil_formats( + http_base_url + '/jwplayer.smil', + video_id, fatal=False) + for rtmp_format in rtmp_formats: + rtsp_format = rtmp_format.copy() + rtsp_format['url'] = '%s/%s' % (rtmp_format['url'], rtmp_format['play_path']) + del rtsp_format['play_path'] + del rtsp_format['ext'] + rtsp_format.update({ + 'url': rtsp_format['url'].replace('rtmp://', 'rtsp://'), + 'format_id': rtmp_format['format_id'].replace('rtmp', 'rtsp'), + 'protocol': 'rtsp', + }) + formats.extend([rtmp_format, rtsp_format]) + else: + for protocol in ('rtmp', 'rtsp'): + if protocol not in skip_protocols: + formats.append({ + 'url': protocol + url_base, + 'format_id': protocol, + 'protocol': protocol, + }) + return formats + def _live_title(self, name): """ Generate the title for a live video """ now = datetime.datetime.now() diff --git a/youtube_dl/extractor/vier.py b/youtube_dl/extractor/vier.py index 6645c6186..dc142a245 100644 --- a/youtube_dl/extractor/vier.py +++ b/youtube_dl/extractor/vier.py @@ -48,8 +48,8 @@ class VierIE(InfoExtractor): [r'data-filename="([^"]+)"', r'"filename"\s*:\s*"([^"]+)"'], webpage, 'filename') - playlist_url = 'http://vod.streamcloud.be/%s/mp4:_definst_/%s.mp4/playlist.m3u8' % (application, filename) - formats = self._extract_m3u8_formats(playlist_url, display_id, 'mp4') + playlist_url = 'http://vod.streamcloud.be/%s/_definst_/mp4:%s.mp4/playlist.m3u8' % (application, filename) + formats = self._extract_wowza_formats(playlist_url, display_id) self._sort_formats(formats) title = self._og_search_title(webpage, default=display_id) diff --git a/youtube_dl/extractor/vodplatform.py b/youtube_dl/extractor/vodplatform.py index 7bdd8b1dc..239644340 100644 --- a/youtube_dl/extractor/vodplatform.py +++ b/youtube_dl/extractor/vodplatform.py @@ -25,29 +25,8 @@ class VODPlatformIE(InfoExtractor): title = unescapeHTML(self._og_search_title(webpage)) hidden_inputs = self._hidden_inputs(webpage) - base_url = self._search_regex( - '(.*/)(?:playlist.m3u8|manifest.mpd)', - hidden_inputs.get('HiddenmyhHlsLink') or hidden_inputs['HiddenmyDashLink'], - 'base url') - formats = self._extract_m3u8_formats( - base_url + 'playlist.m3u8', video_id, 'mp4', - 'm3u8_native', m3u8_id='hls', fatal=False) - formats.extend(self._extract_mpd_formats( - base_url + 'manifest.mpd', video_id, - mpd_id='dash', fatal=False)) - rtmp_formats = self._extract_smil_formats( - base_url + 'jwplayer.smil', video_id, fatal=False) - for rtmp_format in rtmp_formats: - rtsp_format = rtmp_format.copy() - rtsp_format['url'] = '%s/%s' % (rtmp_format['url'], rtmp_format['play_path']) - del rtsp_format['play_path'] - del rtsp_format['ext'] - rtsp_format.update({ - 'url': rtsp_format['url'].replace('rtmp://', 'rtsp://'), - 'format_id': rtmp_format['format_id'].replace('rtmp', 'rtsp'), - 'protocol': 'rtsp', - }) - formats.extend([rtmp_format, rtsp_format]) + formats = self._extract_wowza_formats( + hidden_inputs.get('HiddenmyhHlsLink') or hidden_inputs['HiddenmyDashLink'], video_id, skip_protocols=['f4m', 'smil']) self._sort_formats(formats) return { diff --git a/youtube_dl/extractor/vrt.py b/youtube_dl/extractor/vrt.py index bec7ab327..00c72e346 100644 --- a/youtube_dl/extractor/vrt.py +++ b/youtube_dl/extractor/vrt.py @@ -5,7 +5,6 @@ import re from .common import InfoExtractor from ..utils import ( - determine_ext, float_or_none, ) @@ -75,7 +74,6 @@ class VRTIE(InfoExtractor): }, { 'url': 'http://cobra.canvas.be/cm/cobra/videozone/rubriek/film-videozone/1.2377055', - 'md5': '', 'info_dict': { 'id': '2377055', 'ext': 'mp4', @@ -119,39 +117,17 @@ class VRTIE(InfoExtractor): video_id, 'mp4', m3u8_id='hls', fatal=False)) if src: - if determine_ext(src) == 'm3u8': - formats.extend(self._extract_m3u8_formats( - src, video_id, 'mp4', entry_protocol='m3u8_native', - m3u8_id='hls', fatal=False)) - formats.extend(self._extract_f4m_formats( - src.replace('playlist.m3u8', 'manifest.f4m'), - video_id, f4m_id='hds', fatal=False)) - if 'data-video-geoblocking="true"' not in webpage: - rtmp_formats = self._extract_smil_formats( - src.replace('playlist.m3u8', 'jwplayer.smil'), - video_id, fatal=False) - formats.extend(rtmp_formats) - for rtmp_format in rtmp_formats: - rtmp_format_c = rtmp_format.copy() - rtmp_format_c['url'] = '%s/%s' % (rtmp_format['url'], rtmp_format['play_path']) - del rtmp_format_c['play_path'] - del rtmp_format_c['ext'] - http_format = rtmp_format_c.copy() + formats = self._extract_wowza_formats(src, video_id) + if 'data-video-geoblocking="true"' not in webpage: + for f in formats: + if f['url'].startswith('rtsp://'): + http_format = f.copy() http_format.update({ - 'url': rtmp_format_c['url'].replace('rtmp://', 'http://').replace('vod.', 'download.').replace('/_definst_/', '/').replace('mp4:', ''), - 'format_id': rtmp_format['format_id'].replace('rtmp', 'http'), + 'url': f['url'].replace('rtsp://', 'http://').replace('vod.', 'download.').replace('/_definst_/', '/').replace('mp4:', ''), + 'format_id': f['format_id'].replace('rtsp', 'http'), 'protocol': 'http', }) - rtsp_format = rtmp_format_c.copy() - rtsp_format.update({ - 'url': rtsp_format['url'].replace('rtmp://', 'rtsp://'), - 'format_id': rtmp_format['format_id'].replace('rtmp', 'rtsp'), - 'protocol': 'rtsp', - }) - formats.extend([http_format, rtsp_format]) - else: - formats.extend(self._extract_f4m_formats( - '%s/manifest.f4m' % src, video_id, f4m_id='hds', fatal=False)) + formats.append(http_format) if not formats and 'data-video-geoblocking="true"' in webpage: self.raise_geo_restricted('This video is only available in Belgium') From 7d273a387aade7665cd25eee69d94ee615d9a4b9 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Fri, 16 Sep 2016 19:31:39 +0100 Subject: [PATCH 0167/1571] [mangomolo] add support for Mangomolo embeds --- youtube_dl/extractor/awaan.py | 63 ++++++++++++------------------ youtube_dl/extractor/extractors.py | 4 ++ youtube_dl/extractor/generic.py | 29 ++++++++++++++ youtube_dl/extractor/mangomolo.py | 54 +++++++++++++++++++++++++ 4 files changed, 111 insertions(+), 39 deletions(-) create mode 100644 youtube_dl/extractor/mangomolo.py diff --git a/youtube_dl/extractor/awaan.py b/youtube_dl/extractor/awaan.py index bdf23c6a9..66d7515bc 100644 --- a/youtube_dl/extractor/awaan.py +++ b/youtube_dl/extractor/awaan.py @@ -50,25 +50,6 @@ class AWAANBaseIE(InfoExtractor): 'is_live': is_live, } - def _extract_video_formats(self, webpage, video_id, m3u8_entry_protocol): - formats = [] - format_url_base = 'http' + self._html_search_regex( - [ - r'file\s*:\s*"https?(://[^"]+)/playlist.m3u8', - r'<a[^>]+href="rtsp(://[^"]+)"' - ], webpage, 'format url') - formats.extend(self._extract_mpd_formats( - format_url_base + '/manifest.mpd', - video_id, mpd_id='dash', fatal=False)) - formats.extend(self._extract_m3u8_formats( - format_url_base + '/playlist.m3u8', video_id, 'mp4', - m3u8_entry_protocol, m3u8_id='hls', fatal=False)) - formats.extend(self._extract_f4m_formats( - format_url_base + '/manifest.f4m', - video_id, f4m_id='hds', fatal=False)) - self._sort_formats(formats) - return formats - class AWAANVideoIE(AWAANBaseIE): IE_NAME = 'awaan:video' @@ -99,16 +80,18 @@ class AWAANVideoIE(AWAANBaseIE): video_id, headers={'Origin': 'http://awaan.ae'}) info = self._parse_video_data(video_data, video_id, False) - webpage = self._download_webpage( - 'http://admin.mangomolo.com/analytics/index.php/customers/embed/video?' + - compat_urllib_parse_urlencode({ - 'id': video_data['id'], - 'user_id': video_data['user_id'], - 'signature': video_data['signature'], - 'countries': 'Q0M=', - 'filter': 'DENY', - }), video_id) - info['formats'] = self._extract_video_formats(webpage, video_id, 'm3u8_native') + embed_url = 'http://admin.mangomolo.com/analytics/index.php/customers/embed/video?' + compat_urllib_parse_urlencode({ + 'id': video_data['id'], + 'user_id': video_data['user_id'], + 'signature': video_data['signature'], + 'countries': 'Q0M=', + 'filter': 'DENY', + }) + info.update({ + '_type': 'url_transparent', + 'url': embed_url, + 'ie_key': 'MangomoloVideo', + }) return info @@ -138,16 +121,18 @@ class AWAANLiveIE(AWAANBaseIE): channel_id, headers={'Origin': 'http://awaan.ae'}) info = self._parse_video_data(channel_data, channel_id, True) - webpage = self._download_webpage( - 'http://admin.mangomolo.com/analytics/index.php/customers/embed/index?' + - compat_urllib_parse_urlencode({ - 'id': base64.b64encode(channel_data['user_id'].encode()).decode(), - 'channelid': base64.b64encode(channel_data['id'].encode()).decode(), - 'signature': channel_data['signature'], - 'countries': 'Q0M=', - 'filter': 'DENY', - }), channel_id) - info['formats'] = self._extract_video_formats(webpage, channel_id, 'm3u8') + embed_url = 'http://admin.mangomolo.com/analytics/index.php/customers/embed/index?' + compat_urllib_parse_urlencode({ + 'id': base64.b64encode(channel_data['user_id'].encode()).decode(), + 'channelid': base64.b64encode(channel_data['id'].encode()).decode(), + 'signature': channel_data['signature'], + 'countries': 'Q0M=', + 'filter': 'DENY', + }) + info.update({ + '_type': 'url_transparent', + 'url': embed_url, + 'ie_key': 'MangomoloLive', + }) return info diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index dd0579425..4baf4cd48 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -472,6 +472,10 @@ from .macgamestore import MacGameStoreIE from .mailru import MailRuIE from .makerschannel import MakersChannelIE from .makertv import MakerTVIE +from .mangomolo import ( + MangomoloVideoIE, + MangomoloLiveIE, +) from .matchtv import MatchTVIE from .mdr import MDRIE from .meta import METAIE diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 2e46ca179..e01305942 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -2254,6 +2254,35 @@ class GenericIE(InfoExtractor): return self.url_result( self._proto_relative_url(unescapeHTML(mobj.group('url'))), 'VODPlatform') + # Look for Mangomolo embeds + mobj = re.search( + r'''(?x)<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?admin\.mangomolo.com/analytics/index\.php/customers/embed/ + (?: + video\?.*?\bid=(?P<video_id>\d+)| + index\?.*?\bchannelid=(?P<channel_id>(?:[A-Za-z0-9+/=]|%2B|%2F|%3D)+) + ).+?)\1''', webpage) + if mobj is not None: + info = { + '_type': 'url_transparent', + 'url': self._proto_relative_url(unescapeHTML(mobj.group('url'))), + 'title': video_title, + 'description': video_description, + 'thumbnail': video_thumbnail, + 'uploader': video_uploader, + } + video_id = mobj.group('video_id') + if video_id: + info.update({ + 'ie_key': 'MangomoloVideo', + 'id': video_id, + }) + else: + info.update({ + 'ie_key': 'MangomoloLive', + 'id': mobj.group('channel_id'), + }) + return info + # Look for Instagram embeds instagram_embed_url = InstagramIE._extract_embed_url(webpage) if instagram_embed_url is not None: diff --git a/youtube_dl/extractor/mangomolo.py b/youtube_dl/extractor/mangomolo.py new file mode 100644 index 000000000..8cac8ace2 --- /dev/null +++ b/youtube_dl/extractor/mangomolo.py @@ -0,0 +1,54 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import base64 + +from .common import InfoExtractor +from ..compat import compat_urllib_parse_unquote +from ..utils import ( + int_or_none, +) + + +class MangomoloBaseIE(InfoExtractor): + def _get_real_id(self, page_id): + return page_id + + def _real_extract(self, url): + page_id = self._get_real_id(self._match_id(url)) + webpage = self._download_webpage(url, page_id) + hidden_inputs = self._hidden_inputs(webpage) + m3u8_entry_protocol = 'm3u8' if self._IS_LIVE else 'm3u8_native' + + format_url = self._html_search_regex( + [ + r'file\s*:\s*"(https?://[^"]+?/playlist.m3u8)', + r'<a[^>]+href="(rtsp://[^"]+)"' + ], webpage, 'format url') + formats = self._extract_wowza_formats( + format_url, page_id, m3u8_entry_protocol, ['smil']) + self._sort_formats(formats) + + return { + 'id': page_id, + 'title': self._live_title(page_id) if self._IS_LIVE else page_id, + 'uploader_id': hidden_inputs.get('userid'), + 'duration': int_or_none(hidden_inputs.get('duration')), + 'is_live': self._IS_LIVE, + 'formats': formats, + } + + +class MangomoloVideoIE(MangomoloBaseIE): + IENAME = 'mangomolo:video' + _VALID_URL = r'https?://admin\.mangomolo.com/analytics/index\.php/customers/embed/video\?.*?\bid=(?P<id>\d+)' + _IS_LIVE = False + + +class MangomoloLiveIE(MangomoloBaseIE): + IENAME = 'mangomolo:live' + _VALID_URL = r'https?://admin\.mangomolo.com/analytics/index\.php/customers/embed/index\?.*?\bchannelid=(?P<id>(?:[A-Za-z0-9+/=]|%2B|%2F|%3D)+)' + _IS_LIVE = True + + def _get_real_id(self, page_id): + return base64.b64decode(compat_urllib_parse_unquote(page_id).encode()).decode() From fc86d4eed0bf10f8f90326472811e5b4d4ad4bd9 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Fri, 16 Sep 2016 20:10:47 +0100 Subject: [PATCH 0168/1571] [mangomolo] fix typo --- youtube_dl/extractor/mangomolo.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/mangomolo.py b/youtube_dl/extractor/mangomolo.py index 8cac8ace2..2db503f2b 100644 --- a/youtube_dl/extractor/mangomolo.py +++ b/youtube_dl/extractor/mangomolo.py @@ -40,13 +40,13 @@ class MangomoloBaseIE(InfoExtractor): class MangomoloVideoIE(MangomoloBaseIE): - IENAME = 'mangomolo:video' + IE_NAME = 'mangomolo:video' _VALID_URL = r'https?://admin\.mangomolo.com/analytics/index\.php/customers/embed/video\?.*?\bid=(?P<id>\d+)' _IS_LIVE = False class MangomoloLiveIE(MangomoloBaseIE): - IENAME = 'mangomolo:live' + IE_NAME = 'mangomolo:live' _VALID_URL = r'https?://admin\.mangomolo.com/analytics/index\.php/customers/embed/index\?.*?\bchannelid=(?P<id>(?:[A-Za-z0-9+/=]|%2B|%2F|%3D)+)' _IS_LIVE = True From 30d9e20938fa91ece09c376b67030647215d48df Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Fri, 16 Sep 2016 22:06:55 +0100 Subject: [PATCH 0169/1571] [postprocessor/ffmpeg] apply FFmpegFixupM3u8PP only for videos with aac codec(#5591) --- youtube_dl/postprocessor/ffmpeg.py | 63 +++++++++++++++--------------- 1 file changed, 31 insertions(+), 32 deletions(-) diff --git a/youtube_dl/postprocessor/ffmpeg.py b/youtube_dl/postprocessor/ffmpeg.py index fa99b0c2a..8d1214ee2 100644 --- a/youtube_dl/postprocessor/ffmpeg.py +++ b/youtube_dl/postprocessor/ffmpeg.py @@ -139,6 +139,30 @@ class FFmpegPostProcessor(PostProcessor): def probe_executable(self): return self._paths[self.probe_basename] + def get_audio_codec(self, path): + if not self.probe_available: + raise PostProcessingError('ffprobe or avprobe not found. Please install one.') + try: + cmd = [ + encodeFilename(self.probe_executable, True), + encodeArgument('-show_streams'), + encodeFilename(self._ffmpeg_filename_argument(path), True)] + if self._downloader.params.get('verbose', False): + self._downloader.to_screen('[debug] %s command line: %s' % (self.basename, shell_quote(cmd))) + handle = subprocess.Popen(cmd, stderr=compat_subprocess_get_DEVNULL(), stdout=subprocess.PIPE, stdin=subprocess.PIPE) + output = handle.communicate()[0] + if handle.wait() != 0: + return None + except (IOError, OSError): + return None + audio_codec = None + for line in output.decode('ascii', 'ignore').split('\n'): + if line.startswith('codec_name='): + audio_codec = line.split('=')[1].strip() + elif line.strip() == 'codec_type=audio' and audio_codec is not None: + return audio_codec + return None + def run_ffmpeg_multiple_files(self, input_paths, out_path, opts): self.check_version() @@ -188,31 +212,6 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor): self._preferredquality = preferredquality self._nopostoverwrites = nopostoverwrites - def get_audio_codec(self, path): - - if not self.probe_available: - raise PostProcessingError('ffprobe or avprobe not found. Please install one.') - try: - cmd = [ - encodeFilename(self.probe_executable, True), - encodeArgument('-show_streams'), - encodeFilename(self._ffmpeg_filename_argument(path), True)] - if self._downloader.params.get('verbose', False): - self._downloader.to_screen('[debug] %s command line: %s' % (self.basename, shell_quote(cmd))) - handle = subprocess.Popen(cmd, stderr=compat_subprocess_get_DEVNULL(), stdout=subprocess.PIPE, stdin=subprocess.PIPE) - output = handle.communicate()[0] - if handle.wait() != 0: - return None - except (IOError, OSError): - return None - audio_codec = None - for line in output.decode('ascii', 'ignore').split('\n'): - if line.startswith('codec_name='): - audio_codec = line.split('=')[1].strip() - elif line.strip() == 'codec_type=audio' and audio_codec is not None: - return audio_codec - return None - def run_ffmpeg(self, path, out_path, codec, more_opts): if codec is None: acodec_opts = [] @@ -504,15 +503,15 @@ class FFmpegFixupM4aPP(FFmpegPostProcessor): class FFmpegFixupM3u8PP(FFmpegPostProcessor): def run(self, info): filename = info['filepath'] - temp_filename = prepend_extension(filename, 'temp') + if self.get_audio_codec(filename) == 'aac': + temp_filename = prepend_extension(filename, 'temp') - options = ['-c', 'copy', '-f', 'mp4', '-bsf:a', 'aac_adtstoasc'] - self._downloader.to_screen('[ffmpeg] Fixing malformated aac bitstream in "%s"' % filename) - self.run_ffmpeg(filename, temp_filename, options) - - os.remove(encodeFilename(filename)) - os.rename(encodeFilename(temp_filename), encodeFilename(filename)) + options = ['-c', 'copy', '-f', 'mp4', '-bsf:a', 'aac_adtstoasc'] + self._downloader.to_screen('[ffmpeg] Fixing malformated aac bitstream in "%s"' % filename) + self.run_ffmpeg(filename, temp_filename, options) + os.remove(encodeFilename(filename)) + os.rename(encodeFilename(temp_filename), encodeFilename(filename)) return [], info From d05ef09d9d94fa70335af5fbaab385b37b16d705 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Sat, 17 Sep 2016 08:11:01 +0100 Subject: [PATCH 0170/1571] [mangomolo] fix domain regex --- youtube_dl/extractor/generic.py | 2 +- youtube_dl/extractor/mangomolo.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index e01305942..92a6e5146 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -2256,7 +2256,7 @@ class GenericIE(InfoExtractor): # Look for Mangomolo embeds mobj = re.search( - r'''(?x)<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?admin\.mangomolo.com/analytics/index\.php/customers/embed/ + r'''(?x)<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?admin\.mangomolo\.com/analytics/index\.php/customers/embed/ (?: video\?.*?\bid=(?P<video_id>\d+)| index\?.*?\bchannelid=(?P<channel_id>(?:[A-Za-z0-9+/=]|%2B|%2F|%3D)+) diff --git a/youtube_dl/extractor/mangomolo.py b/youtube_dl/extractor/mangomolo.py index 2db503f2b..1885ac7df 100644 --- a/youtube_dl/extractor/mangomolo.py +++ b/youtube_dl/extractor/mangomolo.py @@ -41,13 +41,13 @@ class MangomoloBaseIE(InfoExtractor): class MangomoloVideoIE(MangomoloBaseIE): IE_NAME = 'mangomolo:video' - _VALID_URL = r'https?://admin\.mangomolo.com/analytics/index\.php/customers/embed/video\?.*?\bid=(?P<id>\d+)' + _VALID_URL = r'https?://admin\.mangomolo\.com/analytics/index\.php/customers/embed/video\?.*?\bid=(?P<id>\d+)' _IS_LIVE = False class MangomoloLiveIE(MangomoloBaseIE): IE_NAME = 'mangomolo:live' - _VALID_URL = r'https?://admin\.mangomolo.com/analytics/index\.php/customers/embed/index\?.*?\bchannelid=(?P<id>(?:[A-Za-z0-9+/=]|%2B|%2F|%3D)+)' + _VALID_URL = r'https?://admin\.mangomolo\.com/analytics/index\.php/customers/embed/index\?.*?\bchannelid=(?P<id>(?:[A-Za-z0-9+/=]|%2B|%2F|%3D)+)' _IS_LIVE = True def _get_real_id(self, page_id): From c51a7f0b2f2454bfe0b53f9d79567b3210e015b3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 17 Sep 2016 15:44:37 +0700 Subject: [PATCH 0171/1571] [franceinter] Fix upload date extraction --- youtube_dl/extractor/franceinter.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/franceinter.py b/youtube_dl/extractor/franceinter.py index 1a1232ade..707b9e00d 100644 --- a/youtube_dl/extractor/franceinter.py +++ b/youtube_dl/extractor/franceinter.py @@ -2,7 +2,6 @@ from __future__ import unicode_literals from .common import InfoExtractor -from ..compat import compat_str from ..utils import month_by_name @@ -10,14 +9,14 @@ class FranceInterIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?franceinter\.fr/emissions/(?P<id>[^?#]+)' _TEST = { - 'url': 'https://www.franceinter.fr/emissions/la-tete-au-carre/la-tete-au-carre-14-septembre-2016', - 'md5': '4e3aeb58fe0e83d7b0581fa213c409d0', + 'url': 'https://www.franceinter.fr/emissions/affaires-sensibles/affaires-sensibles-07-septembre-2016', + 'md5': '9e54d7bdb6fdc02a841007f8a975c094', 'info_dict': { - 'id': 'la-tete-au-carre/la-tete-au-carre-14-septembre-2016', + 'id': 'affaires-sensibles/affaires-sensibles-07-septembre-2016', 'ext': 'mp3', - 'title': 'Et si les rêves pouvaient nous aider à agir dans notre vie quotidienne ?', - 'description': 'md5:a245dd62cf5bf51de915f8d9956d180a', - 'upload_date': '20160914', + 'title': 'Affaire Cahuzac : le contentieux du compte en Suisse', + 'description': 'md5:401969c5d318c061f86bda1fa359292b', + 'upload_date': '20160907', }, } @@ -40,6 +39,7 @@ class FranceInterIE(InfoExtractor): upload_date_list = upload_date_str.split() upload_date_list.reverse() upload_date_list[1] = '%02d' % (month_by_name(upload_date_list[1], lang='fr') or 0) + upload_date_list[2] = '%02d' % int(upload_date_list[2]) upload_date = ''.join(upload_date_list) else: upload_date = None From e14c82bd6b6cfc1e904b067350d818657c911e07 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sat, 17 Sep 2016 18:45:08 +0800 Subject: [PATCH 0172/1571] [jwplatform] Use js_to_json to detect more JWPlayers --- ChangeLog | 6 ++++++ youtube_dl/extractor/jwplatform.py | 6 ++++-- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/ChangeLog b/ChangeLog index 4583537ac..a9f7cee53 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +version <unreleased> + +Extractors +* [jwplatform] Improve JWPlayer detection + + version 2016.09.15 Core diff --git a/youtube_dl/extractor/jwplatform.py b/youtube_dl/extractor/jwplatform.py index 7aaa65476..38199fcd0 100644 --- a/youtube_dl/extractor/jwplatform.py +++ b/youtube_dl/extractor/jwplatform.py @@ -9,6 +9,7 @@ from ..utils import ( determine_ext, float_or_none, int_or_none, + js_to_json, mimetype2ext, ) @@ -19,14 +20,15 @@ class JWPlatformBaseIE(InfoExtractor): # TODO: Merge this with JWPlayer-related codes in generic.py mobj = re.search( - 'jwplayer\((?P<quote>[\'"])[^\'" ]+(?P=quote)\)\.setup\((?P<options>[^)]+)\)', + r'jwplayer\((?P<quote>[\'"])[^\'" ]+(?P=quote)\)\.setup\s*\((?P<options>[^)]+)\)', webpage) if mobj: return mobj.group('options') def _extract_jwplayer_data(self, webpage, video_id, *args, **kwargs): jwplayer_data = self._parse_json( - self._find_jwplayer_data(webpage), video_id) + self._find_jwplayer_data(webpage), video_id, + transform_source=js_to_json) return self._parse_jwplayer_data( jwplayer_data, video_id, *args, **kwargs) From 584d6f3457205b547c8969f11eade117f871ec8f Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sat, 17 Sep 2016 18:46:43 +0800 Subject: [PATCH 0173/1571] [thisav] Recognize jwplayers (closes #10447) --- ChangeLog | 1 + youtube_dl/extractor/thisav.py | 39 ++++++++++++++++++++++++---------- 2 files changed, 29 insertions(+), 11 deletions(-) diff --git a/ChangeLog b/ChangeLog index a9f7cee53..b0a65bde2 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,6 +1,7 @@ version <unreleased> Extractors ++ [thisav] Recognize HTML5 videos (#10447) * [jwplatform] Improve JWPlayer detection diff --git a/youtube_dl/extractor/thisav.py b/youtube_dl/extractor/thisav.py index 7f323c938..027a8e907 100644 --- a/youtube_dl/extractor/thisav.py +++ b/youtube_dl/extractor/thisav.py @@ -3,13 +3,12 @@ from __future__ import unicode_literals import re -from .common import InfoExtractor -from ..utils import determine_ext +from .jwplatform import JWPlatformBaseIE -class ThisAVIE(InfoExtractor): +class ThisAVIE(JWPlatformBaseIE): _VALID_URL = r'https?://(?:www\.)?thisav\.com/video/(?P<id>[0-9]+)/.*' - _TEST = { + _TESTS = [{ 'url': 'http://www.thisav.com/video/47734/%98%26sup1%3B%83%9E%83%82---just-fit.html', 'md5': '0480f1ef3932d901f0e0e719f188f19b', 'info_dict': { @@ -19,7 +18,17 @@ class ThisAVIE(InfoExtractor): 'uploader': 'dj7970', 'uploader_id': 'dj7970' } - } + }, { + 'url': 'http://www.thisav.com/video/242352/nerdy-18yo-big-ass-tattoos-and-glasses.html', + 'md5': 'ba90c076bd0f80203679e5b60bf523ee', + 'info_dict': { + 'id': '242352', + 'ext': 'mp4', + 'title': 'Nerdy 18yo Big Ass Tattoos and Glasses', + 'uploader': 'cybersluts', + 'uploader_id': 'cybersluts', + }, + }] def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) @@ -28,20 +37,28 @@ class ThisAVIE(InfoExtractor): webpage = self._download_webpage(url, video_id) title = self._html_search_regex(r'<h1>([^<]*)</h1>', webpage, 'title') video_url = self._html_search_regex( - r"addVariable\('file','([^']+)'\);", webpage, 'video url') + r"addVariable\('file','([^']+)'\);", webpage, 'video url', default=None) + if video_url: + info_dict = { + 'formats': [{ + 'url': video_url, + }], + } + else: + info_dict = self._extract_jwplayer_data( + webpage, video_id, require_title=False) uploader = self._html_search_regex( r': <a href="http://www.thisav.com/user/[0-9]+/(?:[^"]+)">([^<]+)</a>', webpage, 'uploader name', fatal=False) uploader_id = self._html_search_regex( r': <a href="http://www.thisav.com/user/[0-9]+/([^"]+)">(?:[^<]+)</a>', webpage, 'uploader id', fatal=False) - ext = determine_ext(video_url) - return { + info_dict.update({ 'id': video_id, - 'url': video_url, 'uploader': uploader, 'uploader_id': uploader_id, 'title': title, - 'ext': ext, - } + }) + + return info_dict From a0d5077c8dfa9fa31ebf3e63fdb1b2a7a5182a81 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 6 Sep 2016 01:18:57 +0700 Subject: [PATCH 0174/1571] [extractor/common] Introduce fragments interface --- youtube_dl/extractor/common.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index c00023458..566ed7a4d 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -86,7 +86,9 @@ class InfoExtractor(object): from worst to best quality. Potential fields: - * url Mandatory. The URL of the video file + * url Mandatory. The URL of the video file or URL of + the manifest file in case of fragmented media + (DASH, hls, hds). * ext Will be calculated from URL if missing * format A human-readable description of the format ("mp4 container with h264/opus"). @@ -115,6 +117,11 @@ class InfoExtractor(object): download, lower-case. "http", "https", "rtsp", "rtmp", "rtmpe", "m3u8", "m3u8_native" or "http_dash_segments". + * fragments A list of fragments of the fragmented media, + with the following entries: + * "url" (mandatory) - fragment's URL + * "duration" (optional, int or float) + * "filesize" (optional, int) * preference Order number of this format. If this field is present and not None, the formats get sorted by this field, regardless of all other values. From b4c1d6e800a5b28accf4ba588b8fa3f0c420ce13 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 6 Sep 2016 01:21:57 +0700 Subject: [PATCH 0175/1571] [extractor/common] Expose fragments interface for dashsegments formats --- youtube_dl/extractor/common.py | 142 +++++++++++++++++++++++---------- 1 file changed, 99 insertions(+), 43 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 566ed7a4d..e637b33d5 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1551,42 +1551,52 @@ class InfoExtractor(object): def extract_multisegment_info(element, ms_parent_info): ms_info = ms_parent_info.copy() + + # As per [1, 5.3.9.2.2] SegmentList and SegmentTemplate share some + # common attributes and elements. We will only extract relevant + # for us. + def extract_common(source): + segment_timeline = source.find(_add_ns('SegmentTimeline')) + if segment_timeline is not None: + s_e = segment_timeline.findall(_add_ns('S')) + if s_e: + ms_info['total_number'] = 0 + ms_info['s'] = [] + for s in s_e: + r = int(s.get('r', 0)) + ms_info['total_number'] += 1 + r + ms_info['s'].append({ + 't': int(s.get('t', 0)), + # @d is mandatory (see [1, 5.3.9.6.2, Table 17, page 60]) + 'd': int(s.attrib['d']), + 'r': r, + }) + start_number = source.get('startNumber') + if start_number: + ms_info['start_number'] = int(start_number) + timescale = source.get('timescale') + if timescale: + ms_info['timescale'] = int(timescale) + segment_duration = source.get('duration') + if segment_duration: + ms_info['segment_duration'] = int(segment_duration) + + def extract_Initialization(source): + initialization = source.find(_add_ns('Initialization')) + if initialization is not None: + ms_info['initialization_url'] = initialization.attrib['sourceURL'] + segment_list = element.find(_add_ns('SegmentList')) if segment_list is not None: + extract_common(segment_list) + extract_Initialization(segment_list) segment_urls_e = segment_list.findall(_add_ns('SegmentURL')) if segment_urls_e: ms_info['segment_urls'] = [segment.attrib['media'] for segment in segment_urls_e] - initialization = segment_list.find(_add_ns('Initialization')) - if initialization is not None: - ms_info['initialization_url'] = initialization.attrib['sourceURL'] else: segment_template = element.find(_add_ns('SegmentTemplate')) if segment_template is not None: - start_number = segment_template.get('startNumber') - if start_number: - ms_info['start_number'] = int(start_number) - segment_timeline = segment_template.find(_add_ns('SegmentTimeline')) - if segment_timeline is not None: - s_e = segment_timeline.findall(_add_ns('S')) - if s_e: - ms_info['total_number'] = 0 - ms_info['s'] = [] - for s in s_e: - r = int(s.get('r', 0)) - ms_info['total_number'] += 1 + r - ms_info['s'].append({ - 't': int(s.get('t', 0)), - # @d is mandatory (see [1, 5.3.9.6.2, Table 17, page 60]) - 'd': int(s.attrib['d']), - 'r': r, - }) - else: - timescale = segment_template.get('timescale') - if timescale: - ms_info['timescale'] = int(timescale) - segment_duration = segment_template.get('duration') - if segment_duration: - ms_info['segment_duration'] = int(segment_duration) + extract_common(segment_template) media_template = segment_template.get('media') if media_template: ms_info['media_template'] = media_template @@ -1594,11 +1604,14 @@ class InfoExtractor(object): if initialization: ms_info['initialization_url'] = initialization else: - initialization = segment_template.find(_add_ns('Initialization')) - if initialization is not None: - ms_info['initialization_url'] = initialization.attrib['sourceURL'] + extract_Initialization(segment_template) return ms_info + def combine_url(base_url, target_url): + if re.match(r'^https?://', target_url): + return target_url + return '%s%s%s' % (base_url, '' if base_url.endswith('/') else '/', target_url) + mpd_duration = parse_duration(mpd_doc.get('mediaPresentationDuration')) formats = [] for period in mpd_doc.findall(_add_ns('Period')): @@ -1655,9 +1668,7 @@ class InfoExtractor(object): } representation_ms_info = extract_multisegment_info(representation, adaption_set_ms_info) if 'segment_urls' not in representation_ms_info and 'media_template' in representation_ms_info: - if 'total_number' not in representation_ms_info and 'segment_duration': - segment_duration = float(representation_ms_info['segment_duration']) / float(representation_ms_info['timescale']) - representation_ms_info['total_number'] = int(math.ceil(float(period_duration) / segment_duration)) + media_template = representation_ms_info['media_template'] media_template = media_template.replace('$RepresentationID$', representation_id) media_template = re.sub(r'\$(Number|Bandwidth|Time)\$', r'%(\1)d', media_template) @@ -1666,7 +1677,11 @@ class InfoExtractor(object): # As per [1, 5.3.9.4.4, Table 16, page 55] $Number$ and $Time$ # can't be used at the same time - if '%(Number' in media_template: + if '%(Number' in media_template and 's' not in representation_ms_info: + segment_duration = None + if 'total_number' not in representation_ms_info and 'segment_duration': + segment_duration = float_or_none(representation_ms_info['segment_duration'], representation_ms_info['timescale']) + representation_ms_info['total_number'] = int(math.ceil(float(period_duration) / segment_duration)) representation_ms_info['segment_urls'] = [ media_template % { 'Number': segment_number, @@ -1675,28 +1690,65 @@ class InfoExtractor(object): for segment_number in range( representation_ms_info['start_number'], representation_ms_info['total_number'] + representation_ms_info['start_number'])] + representation_ms_info['fragments'] = [{ + 'url': media_template % { + 'Number': segment_number, + 'Bandwidth': representation_attrib.get('bandwidth'), + }, + 'duration': segment_duration, + } for segment_number in range( + representation_ms_info['start_number'], + representation_ms_info['total_number'] + representation_ms_info['start_number'])] else: + # $Number*$ or $Time$ in media template with S list available + # Example $Number*$: http://www.svtplay.se/klipp/9023742/stopptid-om-bjorn-borg + # Example $Time$: https://play.arkena.com/embed/avp/v2/player/media/b41dda37-d8e7-4d3f-b1b5-9a9db578bdfe/1/129411 representation_ms_info['segment_urls'] = [] + representation_ms_info['fragments'] = [] segment_time = 0 + segment_d = None + segment_number = representation_ms_info['start_number'] def add_segment_url(): - representation_ms_info['segment_urls'].append( - media_template % { - 'Time': segment_time, - 'Bandwidth': representation_attrib.get('bandwidth'), - } - ) + segment_url = media_template % { + 'Time': segment_time, + 'Bandwidth': representation_attrib.get('bandwidth'), + 'Number': segment_number, + } + representation_ms_info['segment_urls'].append(segment_url) + representation_ms_info['fragments'].append({ + 'url': segment_url, + 'duration': float_or_none(segment_d, representation_ms_info['timescale']), + }) for num, s in enumerate(representation_ms_info['s']): segment_time = s.get('t') or segment_time + segment_d = s['d'] add_segment_url() + segment_number += 1 for r in range(s.get('r', 0)): - segment_time += s['d'] + segment_time += segment_d add_segment_url() - segment_time += s['d'] + segment_number += 1 + segment_time += segment_d + elif 'segment_urls' in representation_ms_info and 's' in representation_ms_info: + # No media template + # Example: https://www.youtube.com/watch?v=iXZV5uAYMJI + # or any YouTube dashsegments video + fragments = [] + s_num = 0 + for segment_url in representation_ms_info['segment_urls']: + s = representation_ms_info['s'][s_num] + for r in range(s.get('r', 0) + 1): + fragments.append({ + 'url': segment_url, + 'duration': float_or_none(s['d'], representation_ms_info['timescale']), + }) + representation_ms_info['fragments'] = fragments if 'segment_urls' in representation_ms_info: f.update({ 'segment_urls': representation_ms_info['segment_urls'], + 'fragments': [], 'protocol': 'http_dash_segments', }) if 'initialization_url' in representation_ms_info: @@ -1706,6 +1758,10 @@ class InfoExtractor(object): }) if not f.get('url'): f['url'] = initialization_url + f['fragments'].append({'url': initialization_url}) + f['fragments'].extend(representation_ms_info['fragments']) + for fragment in f['fragments']: + fragment['url'] = combine_url(base_url, fragment['url']) try: existing_format = next( fo for fo in formats From 21d21b0c72a731d4ff5affa2182fbe1687c031a0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 17 Sep 2016 19:25:31 +0700 Subject: [PATCH 0176/1571] [svt] Fix DASH formats extraction --- youtube_dl/extractor/svt.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/svt.py b/youtube_dl/extractor/svt.py index 1c04dfb7b..fb0a4b24e 100644 --- a/youtube_dl/extractor/svt.py +++ b/youtube_dl/extractor/svt.py @@ -16,7 +16,7 @@ class SVTBaseIE(InfoExtractor): def _extract_video(self, video_info, video_id): formats = [] for vr in video_info['videoReferences']: - player_type = vr.get('playerType') + player_type = vr.get('playerType') or vr.get('format') vurl = vr['url'] ext = determine_ext(vurl) if ext == 'm3u8': From 86f4d14f817acaee1f1f544cd9b06d47bc2a5180 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 17 Sep 2016 20:35:22 +0700 Subject: [PATCH 0177/1571] Refactor fragments interface and dash segments downloader - Eliminate segment_urls and initialization_url + Introduce manifest_url (manifest may contain unfragmented data in this case url will be used for direct media URL and manifest_url for manifest itself correspondingly) * Rewrite dashsegments downloader to use fragments data * Improve generic mpd extraction --- youtube_dl/downloader/dash.py | 35 +++++++++++---------------------- youtube_dl/extractor/common.py | 31 +++++++++++------------------ youtube_dl/extractor/generic.py | 4 +++- 3 files changed, 26 insertions(+), 44 deletions(-) diff --git a/youtube_dl/downloader/dash.py b/youtube_dl/downloader/dash.py index 41fc9cfc2..8437dde30 100644 --- a/youtube_dl/downloader/dash.py +++ b/youtube_dl/downloader/dash.py @@ -1,7 +1,6 @@ from __future__ import unicode_literals import os -import re from .fragment import FragmentFD from ..compat import compat_urllib_error @@ -19,34 +18,32 @@ class DashSegmentsFD(FragmentFD): FD_NAME = 'dashsegments' def real_download(self, filename, info_dict): - base_url = info_dict['url'] - segment_urls = [info_dict['segment_urls'][0]] if self.params.get('test', False) else info_dict['segment_urls'] - initialization_url = info_dict.get('initialization_url') + segments = info_dict['fragments'][:1] if self.params.get( + 'test', False) else info_dict['fragments'] ctx = { 'filename': filename, - 'total_frags': len(segment_urls) + (1 if initialization_url else 0), + 'total_frags': len(segments), } self._prepare_and_start_frag_download(ctx) - def combine_url(base_url, target_url): - if re.match(r'^https?://', target_url): - return target_url - return '%s%s%s' % (base_url, '' if base_url.endswith('/') else '/', target_url) - segments_filenames = [] fragment_retries = self.params.get('fragment_retries', 0) skip_unavailable_fragments = self.params.get('skip_unavailable_fragments', True) - def process_segment(segment, tmp_filename, fatal): - target_url, segment_name = segment + def process_segment(segment, tmp_filename, num): + segment_url = segment['url'] + segment_name = 'Frag%d' % num target_filename = '%s-%s' % (tmp_filename, segment_name) + # In DASH, the first segment contains necessary headers to + # generate a valid MP4 file, so always abort for the first segment + fatal = num == 0 or not skip_unavailable_fragments count = 0 while count <= fragment_retries: try: - success = ctx['dl'].download(target_filename, {'url': combine_url(base_url, target_url)}) + success = ctx['dl'].download(target_filename, {'url': segment_url}) if not success: return False down, target_sanitized = sanitize_open(target_filename, 'rb') @@ -72,16 +69,8 @@ class DashSegmentsFD(FragmentFD): return False return True - segments_to_download = [(initialization_url, 'Init')] if initialization_url else [] - segments_to_download.extend([ - (segment_url, 'Seg%d' % i) - for i, segment_url in enumerate(segment_urls)]) - - for i, segment in enumerate(segments_to_download): - # In DASH, the first segment contains necessary headers to - # generate a valid MP4 file, so always abort for the first segment - fatal = i == 0 or not skip_unavailable_fragments - if not process_segment(segment, ctx['tmpfilename'], fatal): + for i, segment in enumerate(segments): + if not process_segment(segment, ctx['tmpfilename'], i): return False self._finish_frag_download(ctx) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index e637b33d5..f35311e7a 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -86,9 +86,10 @@ class InfoExtractor(object): from worst to best quality. Potential fields: - * url Mandatory. The URL of the video file or URL of - the manifest file in case of fragmented media - (DASH, hls, hds). + * url Mandatory. The URL of the video file + * manifest_url + The URL of the manifest file in case of + fragmented media (DASH, hls, hds) * ext Will be calculated from URL if missing * format A human-readable description of the format ("mp4 container with h264/opus"). @@ -1528,9 +1529,10 @@ class InfoExtractor(object): mpd_base_url = re.match(r'https?://.+/', urlh.geturl()).group() return self._parse_mpd_formats( - compat_etree_fromstring(mpd.encode('utf-8')), mpd_id, mpd_base_url, formats_dict=formats_dict) + compat_etree_fromstring(mpd.encode('utf-8')), mpd_id, mpd_base_url, + formats_dict=formats_dict, mpd_url=mpd_url) - def _parse_mpd_formats(self, mpd_doc, mpd_id=None, mpd_base_url='', formats_dict={}): + def _parse_mpd_formats(self, mpd_doc, mpd_id=None, mpd_base_url='', formats_dict={}, mpd_url=None): """ Parse formats from MPD manifest. References: @@ -1654,6 +1656,7 @@ class InfoExtractor(object): f = { 'format_id': '%s-%s' % (mpd_id, representation_id) if mpd_id else representation_id, 'url': base_url, + 'manifest_url': mpd_url, 'ext': mimetype2ext(mime_type), 'width': int_or_none(representation_attrib.get('width')), 'height': int_or_none(representation_attrib.get('height')), @@ -1682,14 +1685,6 @@ class InfoExtractor(object): if 'total_number' not in representation_ms_info and 'segment_duration': segment_duration = float_or_none(representation_ms_info['segment_duration'], representation_ms_info['timescale']) representation_ms_info['total_number'] = int(math.ceil(float(period_duration) / segment_duration)) - representation_ms_info['segment_urls'] = [ - media_template % { - 'Number': segment_number, - 'Bandwidth': representation_attrib.get('bandwidth'), - } - for segment_number in range( - representation_ms_info['start_number'], - representation_ms_info['total_number'] + representation_ms_info['start_number'])] representation_ms_info['fragments'] = [{ 'url': media_template % { 'Number': segment_number, @@ -1703,7 +1698,6 @@ class InfoExtractor(object): # $Number*$ or $Time$ in media template with S list available # Example $Number*$: http://www.svtplay.se/klipp/9023742/stopptid-om-bjorn-borg # Example $Time$: https://play.arkena.com/embed/avp/v2/player/media/b41dda37-d8e7-4d3f-b1b5-9a9db578bdfe/1/129411 - representation_ms_info['segment_urls'] = [] representation_ms_info['fragments'] = [] segment_time = 0 segment_d = None @@ -1715,7 +1709,6 @@ class InfoExtractor(object): 'Bandwidth': representation_attrib.get('bandwidth'), 'Number': segment_number, } - representation_ms_info['segment_urls'].append(segment_url) representation_ms_info['fragments'].append({ 'url': segment_url, 'duration': float_or_none(segment_d, representation_ms_info['timescale']), @@ -1745,17 +1738,15 @@ class InfoExtractor(object): 'duration': float_or_none(s['d'], representation_ms_info['timescale']), }) representation_ms_info['fragments'] = fragments - if 'segment_urls' in representation_ms_info: + # NB: MPD manifest may contain direct URLs to unfragmented media. + # No fragments key is present in this case. + if 'fragments' in representation_ms_info: f.update({ - 'segment_urls': representation_ms_info['segment_urls'], 'fragments': [], 'protocol': 'http_dash_segments', }) if 'initialization_url' in representation_ms_info: initialization_url = representation_ms_info['initialization_url'].replace('$RepresentationID$', representation_id) - f.update({ - 'initialization_url': initialization_url, - }) if not f.get('url'): f['url'] = initialization_url f['fragments'].append({'url': initialization_url}) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 92a6e5146..c1792c534 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -1657,7 +1657,9 @@ class GenericIE(InfoExtractor): return self.playlist_result(self._parse_xspf(doc, video_id), video_id) elif re.match(r'(?i)^(?:{[^}]+})?MPD$', doc.tag): info_dict['formats'] = self._parse_mpd_formats( - doc, video_id, mpd_base_url=url.rpartition('/')[0]) + doc, video_id, + mpd_base_url=full_response.geturl().rpartition('/')[0], + mpd_url=url) self._sort_formats(info_dict['formats']) return info_dict elif re.match(r'^{http://ns\.adobe\.com/f4m/[12]\.0}manifest$', doc.tag): From 30d0b549be5696f24b87471a0e691f9afca4a9c8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 17 Sep 2016 21:33:38 +0700 Subject: [PATCH 0178/1571] [extractor/common] Add manifest_url for hls and hds formats --- youtube_dl/extractor/common.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index f35311e7a..9c8991542 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1150,6 +1150,7 @@ class InfoExtractor(object): formats.append({ 'format_id': format_id, 'url': manifest_url, + 'manifest_url': manifest_url, 'ext': 'flv' if bootstrap_info is not None else None, 'tbr': tbr, 'width': width, @@ -1255,9 +1256,11 @@ class InfoExtractor(object): # format_id intact. if not live: format_id.append(stream_name if stream_name else '%d' % (tbr if tbr else len(formats))) + manifest_url = format_url(line.strip()) f = { 'format_id': '-'.join(format_id), - 'url': format_url(line.strip()), + 'url': manifest_url, + 'manifest_url': manifest_url, 'tbr': tbr, 'ext': ext, 'fps': float_or_none(last_info.get('FRAME-RATE')), From 26394d021df1137301b1508bd00dd3478c15116c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 17 Sep 2016 23:34:10 +0700 Subject: [PATCH 0179/1571] [globo:article] Add support for multiple videos (Closes #10653) --- youtube_dl/extractor/globo.py | 39 +++++++++++++++++++++++++---------- 1 file changed, 28 insertions(+), 11 deletions(-) diff --git a/youtube_dl/extractor/globo.py b/youtube_dl/extractor/globo.py index 5638be48f..dc7b2661c 100644 --- a/youtube_dl/extractor/globo.py +++ b/youtube_dl/extractor/globo.py @@ -2,6 +2,7 @@ from __future__ import unicode_literals import random +import re import math from .common import InfoExtractor @@ -14,6 +15,7 @@ from ..utils import ( ExtractorError, float_or_none, int_or_none, + orderedSet, str_or_none, ) @@ -63,6 +65,9 @@ class GloboIE(InfoExtractor): }, { 'url': 'http://canaloff.globo.com/programas/desejar-profundo/videos/4518560.html', 'only_matching': True, + }, { + 'url': 'globo:3607726', + 'only_matching': True, }] class MD5(object): @@ -396,7 +401,7 @@ class GloboIE(InfoExtractor): class GloboArticleIE(InfoExtractor): - _VALID_URL = r'https?://.+?\.globo\.com/(?:[^/]+/)*(?P<id>[^/]+)(?:\.html)?' + _VALID_URL = r'https?://.+?\.globo\.com/(?:[^/]+/)*(?P<id>[^/.]+)(?:\.html)?' _VIDEOID_REGEXES = [ r'\bdata-video-id=["\'](\d{7,})', @@ -408,15 +413,20 @@ class GloboArticleIE(InfoExtractor): _TESTS = [{ 'url': 'http://g1.globo.com/jornal-nacional/noticia/2014/09/novidade-na-fiscalizacao-de-bagagem-pela-receita-provoca-discussoes.html', - 'md5': '307fdeae4390ccfe6ba1aa198cf6e72b', 'info_dict': { - 'id': '3652183', - 'ext': 'mp4', - 'title': 'Receita Federal explica como vai fiscalizar bagagens de quem retorna ao Brasil de avião', - 'duration': 110.711, - 'uploader': 'Rede Globo', - 'uploader_id': '196', - } + 'id': 'novidade-na-fiscalizacao-de-bagagem-pela-receita-provoca-discussoes', + 'title': 'Novidade na fiscalização de bagagem pela Receita provoca discussões', + 'description': 'md5:c3c4b4d4c30c32fce460040b1ac46b12', + }, + 'playlist_count': 1, + }, { + 'url': 'http://g1.globo.com/pr/parana/noticia/2016/09/mpf-denuncia-lula-marisa-e-mais-seis-na-operacao-lava-jato.html', + 'info_dict': { + 'id': 'mpf-denuncia-lula-marisa-e-mais-seis-na-operacao-lava-jato', + 'title': "Lula era o 'comandante máximo' do esquema da Lava Jato, diz MPF", + 'description': 'md5:8aa7cc8beda4dc71cc8553e00b77c54c', + }, + 'playlist_count': 6, }, { 'url': 'http://gq.globo.com/Prazeres/Poder/noticia/2015/10/all-o-desafio-assista-ao-segundo-capitulo-da-serie.html', 'only_matching': True, @@ -435,5 +445,12 @@ class GloboArticleIE(InfoExtractor): def _real_extract(self, url): display_id = self._match_id(url) webpage = self._download_webpage(url, display_id) - video_id = self._search_regex(self._VIDEOID_REGEXES, webpage, 'video id') - return self.url_result('globo:%s' % video_id, 'Globo') + video_ids = [] + for video_regex in self._VIDEOID_REGEXES: + video_ids.extend(re.findall(video_regex, webpage)) + entries = [ + self.url_result('globo:%s' % video_id, GloboIE.ie_key()) + for video_id in orderedSet(video_ids)] + title = self._og_search_title(webpage, fatal=False) + description = self._html_search_meta('description', webpage) + return self.playlist_result(entries, display_id, title, description) From 190d2027d0b6c785cf789edf6c1bdac2ef650a66 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 18 Sep 2016 07:22:06 +0700 Subject: [PATCH 0180/1571] [xfileshare] Add title regex for streamin.to and fallback to video id (Closes #10646) --- youtube_dl/extractor/xfileshare.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/xfileshare.py b/youtube_dl/extractor/xfileshare.py index 995aada0d..de344bad2 100644 --- a/youtube_dl/extractor/xfileshare.py +++ b/youtube_dl/extractor/xfileshare.py @@ -124,12 +124,14 @@ class XFileShareIE(InfoExtractor): webpage = self._download_webpage(req, video_id, 'Downloading video page') title = (self._search_regex( - [r'style="z-index: [0-9]+;">([^<]+)</span>', + (r'style="z-index: [0-9]+;">([^<]+)</span>', r'<td nowrap>([^<]+)</td>', r'h4-fine[^>]*>([^<]+)<', r'>Watch (.+) ', - r'<h2 class="video-page-head">([^<]+)</h2>'], - webpage, 'title', default=None) or self._og_search_title(webpage)).strip() + r'<h2 class="video-page-head">([^<]+)</h2>', + r'<h2 style="[^"]*color:#403f3d[^"]*"[^>]*>([^<]+)<'), # streamin.to + webpage, 'title', default=None) or self._og_search_title( + webpage, default=None) or video_id).strip() def extract_video_url(default=NO_DEFAULT): return self._search_regex( From 14ae11efab64baf4994688490474609554c1bf80 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 18 Sep 2016 16:56:40 +0700 Subject: [PATCH 0181/1571] [vyborymos] Add extractor (Closes #10692) --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/vyborymos.py | 55 ++++++++++++++++++++++++++++++ 2 files changed, 56 insertions(+) create mode 100644 youtube_dl/extractor/vyborymos.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 4baf4cd48..8166fd4f9 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1069,6 +1069,7 @@ from .vporn import VpornIE from .vrt import VRTIE from .vube import VubeIE from .vuclip import VuClipIE +from .vyborymos import VyboryMosIE from .walla import WallaIE from .washingtonpost import ( WashingtonPostIE, diff --git a/youtube_dl/extractor/vyborymos.py b/youtube_dl/extractor/vyborymos.py new file mode 100644 index 000000000..884aecb71 --- /dev/null +++ b/youtube_dl/extractor/vyborymos.py @@ -0,0 +1,55 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor + + +class VyboryMosIE(InfoExtractor): + _VALID_URL = r'https?://vybory\.mos\.ru/(?:#precinct/|account/channels\?.*?\bstation_id=)(?P<id>\d+)' + _TESTS = [{ + 'url': 'http://vybory.mos.ru/#precinct/13636', + 'info_dict': { + 'id': '13636', + 'ext': 'mp4', + 'title': 're:^Участковая избирательная комиссия №2231 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', + 'description': 'Россия, Москва, улица Введенского, 32А', + 'is_live': True, + }, + 'params': { + 'skip_download': True, + } + }, { + 'url': 'http://vybory.mos.ru/account/channels?station_id=13636', + 'only_matching': True, + }] + + def _real_extract(self, url): + station_id = self._match_id(url) + + channels = self._download_json( + 'http://vybory.mos.ru/account/channels?station_id=%s' % station_id, + station_id) + + formats = [] + for cam_num, (sid, hosts, name, _) in enumerate(channels, 1): + for num, host in enumerate(hosts, 1): + formats.append({ + 'url': 'http://%s/master.m3u8?sid=%s' % (host, sid), + 'ext': 'mp4', + 'format_id': 'camera%d-host%d' % (cam_num, num), + 'format_note': '%s, %s' % (name, host), + }) + + info = self._download_json( + 'http://vybory.mos.ru/json/voting_stations/136/%s.json' % station_id, + station_id, 'Downloading station info') + + title = info['name'] + + return { + 'id': station_id, + 'title': self._live_title(title), + 'description': info.get('address'), + 'is_live': True, + 'formats': formats, + } From 9ca93b99d110f58ec9b280020fb5fede2441794e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 18 Sep 2016 17:15:22 +0700 Subject: [PATCH 0182/1571] [ChangeLog] Actualize --- ChangeLog | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/ChangeLog b/ChangeLog index b0a65bde2..dd11a17b9 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,8 +1,23 @@ version <unreleased> +Core ++ Introduce manifest_url and fragments fields in formats dictionary for + fragmented media ++ Provide manifest_url field for DASH segments, HLS and HDS ++ Provide fragments field for DASH segments +* Rework DASH segments downloader to use fragments field ++ Add helper method for Wowza Streaming Engine formats extraction + Extractors ++ [vyborymos] Add extractor for vybory.mos.ru (#10692) ++ [xfileshare] Add title regular expression for streamin.to (#10646) ++ [globo:article] Add support for multiple videos (#10653) + [thisav] Recognize HTML5 videos (#10447) * [jwplatform] Improve JWPlayer detection ++ [mangomolo] Add support for Mangomolo embeds ++ [toutv] Add support for authentication (#10669) +* [franceinter] Fix upload date extraction +* [tv4] Fix HLS and HDS formats extraction (#10659) version 2016.09.15 From 3acff9423df437dd4bd1530a69011fc9ddc74ad1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 18 Sep 2016 17:16:55 +0700 Subject: [PATCH 0183/1571] release 2016.09.18 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- docs/supportedsites.md | 3 +++ youtube_dl/version.py | 2 +- 4 files changed, 8 insertions(+), 5 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 61cea757c..b9d8ebad7 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.09.15*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.09.15** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.09.18*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.09.18** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v <your command line> [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2016.09.15 +[debug] youtube-dl version 2016.09.18 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index dd11a17b9..a71fadfa7 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version <unreleased> +version 2016.09.18 Core + Introduce manifest_url and fragments fields in formats dictionary for diff --git a/docs/supportedsites.md b/docs/supportedsites.md index fcb618561..95a137393 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -388,6 +388,8 @@ - **mailru**: Видео@Mail.Ru - **MakersChannel** - **MakerTV** + - **mangomolo:live** + - **mangomolo:video** - **MatchTV** - **MDR**: MDR.DE and KiKA - **media.ccc.de** @@ -849,6 +851,7 @@ - **VRT** - **vube**: Vube.com - **VuClip** + - **VyboryMos** - **Walla** - **washingtonpost** - **washingtonpost:article** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 081fd6ef0..5ae6a72aa 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2016.09.15' +__version__ = '2016.09.18' From a1da888d0cc92fdf3506b30ee85ce241e9090408 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 18 Sep 2016 17:28:41 +0700 Subject: [PATCH 0184/1571] [vyborymos] Improve station info extraction --- youtube_dl/extractor/vyborymos.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/vyborymos.py b/youtube_dl/extractor/vyborymos.py index 884aecb71..9e703c4b6 100644 --- a/youtube_dl/extractor/vyborymos.py +++ b/youtube_dl/extractor/vyborymos.py @@ -2,6 +2,7 @@ from __future__ import unicode_literals from .common import InfoExtractor +from ..compat import compat_str class VyboryMosIE(InfoExtractor): @@ -28,7 +29,7 @@ class VyboryMosIE(InfoExtractor): channels = self._download_json( 'http://vybory.mos.ru/account/channels?station_id=%s' % station_id, - station_id) + station_id, 'Downloading channels JSON') formats = [] for cam_num, (sid, hosts, name, _) in enumerate(channels, 1): @@ -41,14 +42,13 @@ class VyboryMosIE(InfoExtractor): }) info = self._download_json( - 'http://vybory.mos.ru/json/voting_stations/136/%s.json' % station_id, - station_id, 'Downloading station info') - - title = info['name'] + 'http://vybory.mos.ru/json/voting_stations/%s/%s.json' + % (compat_str(station_id)[:3], station_id), + station_id, 'Downloading station JSON', fatal=False) return { 'id': station_id, - 'title': self._live_title(title), + 'title': self._live_title(info['name'] if info else station_id), 'description': info.get('address'), 'is_live': True, 'formats': formats, From d8dbf8707d4e45a939fc74c76bb919771007f8ba Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sun, 18 Sep 2016 18:33:54 +0800 Subject: [PATCH 0185/1571] [thisav] Improve title extraction (closes #10682) I didn't add a test case as the one in #10682 looks like a copyrighted product. --- ChangeLog | 6 ++++++ youtube_dl/extractor/thisav.py | 5 ++++- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/ChangeLog b/ChangeLog index a71fadfa7..18f9fa861 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +version <unreleased> + +Extractors +* [thisav] Improve title extraction (#10682) + + version 2016.09.18 Core diff --git a/youtube_dl/extractor/thisav.py b/youtube_dl/extractor/thisav.py index 027a8e907..4473a3c77 100644 --- a/youtube_dl/extractor/thisav.py +++ b/youtube_dl/extractor/thisav.py @@ -4,6 +4,7 @@ from __future__ import unicode_literals import re from .jwplatform import JWPlatformBaseIE +from ..utils import remove_end class ThisAVIE(JWPlatformBaseIE): @@ -35,7 +36,9 @@ class ThisAVIE(JWPlatformBaseIE): video_id = mobj.group('id') webpage = self._download_webpage(url, video_id) - title = self._html_search_regex(r'<h1>([^<]*)</h1>', webpage, 'title') + title = remove_end(self._html_search_regex( + r'<title>([^<]+)', webpage, 'title'), + ' - 視頻 - ThisAV.com-世界第一中文成人娛樂網站') video_url = self._html_search_regex( r"addVariable\('file','([^']+)'\);", webpage, 'video url', default=None) if video_url: From cc764a6da8530248f9810397a22b20c972877a97 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 18 Sep 2016 19:10:18 +0700 Subject: [PATCH 0186/1571] [twitch:stream] Remove fallback to profile extraction when stream is offline Main page does not contain profile videos anymore --- youtube_dl/extractor/twitch.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/youtube_dl/extractor/twitch.py b/youtube_dl/extractor/twitch.py index af6d890b0..bc352391e 100644 --- a/youtube_dl/extractor/twitch.py +++ b/youtube_dl/extractor/twitch.py @@ -400,11 +400,8 @@ class TwitchStreamIE(TwitchBaseIE): 'kraken/streams/%s' % channel_id, channel_id, 'Downloading stream JSON').get('stream') - # Fallback on profile extraction if stream is offline if not stream: - return self.url_result( - 'http://www.twitch.tv/%s/profile' % channel_id, - 'TwitchProfile', channel_id) + raise ExtractorError('%s is offline' % channel_id, expected=True) # Channel name may be typed if different case than the original channel name # (e.g. http://www.twitch.tv/TWITCHPLAYSPOKEMON) that will lead to constructing From 70b4cf9b1b8a2c2935ca7384d7545463cfd4ea16 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 19 Sep 2016 02:50:06 +0700 Subject: [PATCH 0187/1571] [crunchyroll] Check if already logged in (Closes #10700) --- youtube_dl/extractor/crunchyroll.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/crunchyroll.py b/youtube_dl/extractor/crunchyroll.py index 1b69bd0b6..e4c10ad24 100644 --- a/youtube_dl/extractor/crunchyroll.py +++ b/youtube_dl/extractor/crunchyroll.py @@ -46,6 +46,13 @@ class CrunchyrollBaseIE(InfoExtractor): login_page = self._download_webpage( self._LOGIN_URL, None, 'Downloading login page') + def is_logged(webpage): + return 'Redirecting' in webpage + + # Already logged in + if is_logged(login_page): + return + login_form_str = self._search_regex( r'(?P<form><form[^>]+?id=(["\'])%s\2[^>]*>)' % self._LOGIN_FORM, login_page, 'login form', group='form') @@ -69,7 +76,7 @@ class CrunchyrollBaseIE(InfoExtractor): headers={'Content-Type': 'application/x-www-form-urlencoded'}) # Successful login - if '<title>Redirecting' in response: + if is_logged(response): return error = self._html_search_regex( From 59fd8f931d274cc702a7e260e9ec996f8db7c9f0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 19 Sep 2016 02:57:14 +0700 Subject: [PATCH 0188/1571] [ChangeLog] Actualize --- ChangeLog | 3 +++ 1 file changed, 3 insertions(+) diff --git a/ChangeLog b/ChangeLog index 18f9fa861..c67d5f650 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,7 +1,10 @@ version <unreleased> Extractors ++ [crunchyroll] Check if already authenticated (#10700) +- [twitch:stream] Remove fallback to profile extraction when stream is offline * [thisav] Improve title extraction (#10682) +* [vyborymos] Improve station info extraction version 2016.09.18 From cb57386873a053b3328a78f48cf27f23ca6897d1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 19 Sep 2016 02:58:32 +0700 Subject: [PATCH 0189/1571] release 2016.09.19 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- youtube_dl/version.py | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index b9d8ebad7..8b28d784a 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.09.18*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.09.18** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.09.19*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.09.19** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v <your command line> [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2016.09.18 +[debug] youtube-dl version 2016.09.19 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index c67d5f650..24077c430 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version <unreleased> +version 2016.09.19 Extractors + [crunchyroll] Check if already authenticated (#10700) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 5ae6a72aa..9d3138181 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2016.09.18' +__version__ = '2016.09.19' From c38f06818df83f5f46cbdee1069bfaf53a537cc8 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Tue, 20 Sep 2016 11:55:30 +0100 Subject: [PATCH 0190/1571] add support for Adobe Pass auth in tbs,tnt and trutv extractors(fixes #10642)(closes #10222)(closes #10519) --- youtube_dl/extractor/adobepass.py | 2 +- youtube_dl/extractor/tbs.py | 13 +++++-------- youtube_dl/extractor/trutv.py | 12 ++++++++++++ youtube_dl/extractor/turner.py | 17 ++++++++++------- 4 files changed, 28 insertions(+), 16 deletions(-) diff --git a/youtube_dl/extractor/adobepass.py b/youtube_dl/extractor/adobepass.py index 01932e5e6..c787e0962 100644 --- a/youtube_dl/extractor/adobepass.py +++ b/youtube_dl/extractor/adobepass.py @@ -83,7 +83,7 @@ class AdobePassIE(InfoExtractor): 'User-Agent': self._USER_AGENT, } - guid = xml_text(resource, 'guid') + guid = xml_text(resource, 'guid') if '<' in resource else resource count = 0 while count < 2: requestor_info = self._downloader.cache.load(self._MVPD_CACHE, requestor_id) or {} diff --git a/youtube_dl/extractor/tbs.py b/youtube_dl/extractor/tbs.py index 0c351e045..bf93eb868 100644 --- a/youtube_dl/extractor/tbs.py +++ b/youtube_dl/extractor/tbs.py @@ -4,10 +4,7 @@ from __future__ import unicode_literals import re from .turner import TurnerBaseIE -from ..utils import ( - extract_attributes, - ExtractorError, -) +from ..utils import extract_attributes class TBSIE(TurnerBaseIE): @@ -37,10 +34,6 @@ class TBSIE(TurnerBaseIE): site = domain[:3] webpage = self._download_webpage(url, display_id) video_params = extract_attributes(self._search_regex(r'(<[^>]+id="page-video"[^>]*>)', webpage, 'video params')) - if video_params.get('isAuthRequired') == 'true': - raise ExtractorError( - 'This video is only available via cable service provider subscription that' - ' is not currently supported.', expected=True) query = None clip_id = video_params.get('clipid') if clip_id: @@ -56,4 +49,8 @@ class TBSIE(TurnerBaseIE): 'media_src': 'http://androidhls-secure.cdn.turner.com/%s/big' % site, 'tokenizer_src': 'http://www.%s.com/video/processors/services/token_ipadAdobe.do' % domain, }, + }, { + 'url': url, + 'site_name': site.upper(), + 'auth_required': video_params.get('isAuthRequired') != 'false', }) diff --git a/youtube_dl/extractor/trutv.py b/youtube_dl/extractor/trutv.py index e60d8a181..3a5782525 100644 --- a/youtube_dl/extractor/trutv.py +++ b/youtube_dl/extractor/trutv.py @@ -22,9 +22,17 @@ class TruTVIE(TurnerBaseIE): def _real_extract(self, url): path, video_id = re.match(self._VALID_URL, url).groups() + auth_required = False if path: data_src = 'http://www.trutv.com/video/cvp/v2/xml/content.xml?id=%s.xml' % path else: + webpage = self._download_webpage(url, video_id) + video_id = self._search_regex( + r"TTV\.TVE\.episodeId\s*=\s*'([^']+)';", + webpage, 'video id', default=video_id) + auth_required = self._search_regex( + r'TTV\.TVE\.authRequired\s*=\s*(true|false);', + webpage, 'auth required', default='false') == 'true' data_src = 'http://www.trutv.com/tveverywhere/services/cvpXML.do?titleId=' + video_id return self._extract_cvp_info( data_src, path, { @@ -32,4 +40,8 @@ class TruTVIE(TurnerBaseIE): 'media_src': 'http://androidhls-secure.cdn.turner.com/trutv/big', 'tokenizer_src': 'http://www.trutv.com/tveverywhere/processors/services/token_ipadAdobe.do', }, + }, { + 'url': url, + 'site_name': 'truTV', + 'auth_required': auth_required, }) diff --git a/youtube_dl/extractor/turner.py b/youtube_dl/extractor/turner.py index 4228c1ccc..57ffedb87 100644 --- a/youtube_dl/extractor/turner.py +++ b/youtube_dl/extractor/turner.py @@ -3,7 +3,7 @@ from __future__ import unicode_literals import re -from .common import InfoExtractor +from .adobepass import AdobePassIE from ..compat import compat_str from ..utils import ( xpath_text, @@ -16,11 +16,11 @@ from ..utils import ( ) -class TurnerBaseIE(InfoExtractor): +class TurnerBaseIE(AdobePassIE): def _extract_timestamp(self, video_data): return int_or_none(xpath_attr(video_data, 'dateCreated', 'uts')) - def _extract_cvp_info(self, data_src, video_id, path_data={}): + def _extract_cvp_info(self, data_src, video_id, path_data={}, ap_data={}): video_data = self._download_xml(data_src, video_id) video_id = video_data.attrib['id'] title = xpath_text(video_data, 'headline', fatal=True) @@ -70,11 +70,14 @@ class TurnerBaseIE(InfoExtractor): secure_path = self._search_regex(r'https?://[^/]+(.+/)', video_url, 'secure path') + '*' token = tokens.get(secure_path) if not token: + query = { + 'path': secure_path, + 'videoId': content_id, + } + if ap_data.get('auth_required'): + query['accessToken'] = self._extract_mvpd_auth(ap_data['url'], video_id, ap_data['site_name'], ap_data['site_name']) auth = self._download_xml( - secure_path_data['tokenizer_src'], video_id, query={ - 'path': secure_path, - 'videoId': content_id, - }) + secure_path_data['tokenizer_src'], video_id, query=query) error_msg = xpath_text(auth, 'error/msg') if error_msg: raise ExtractorError(error_msg, expected=True) From e33a7253b23e0adca9a3cb9a3856952c922a3357 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Tue, 20 Sep 2016 15:52:23 +0100 Subject: [PATCH 0191/1571] [fox] add support for Adobe Pass auth(closes #8584) --- youtube_dl/extractor/fox.py | 26 +++++++++++++++++++------- 1 file changed, 19 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/fox.py b/youtube_dl/extractor/fox.py index 9f406b17e..9f2e5d065 100644 --- a/youtube_dl/extractor/fox.py +++ b/youtube_dl/extractor/fox.py @@ -1,14 +1,14 @@ # coding: utf-8 from __future__ import unicode_literals -from .common import InfoExtractor +from .adobepass import AdobePassIE from ..utils import ( smuggle_url, update_url_query, ) -class FOXIE(InfoExtractor): +class FOXIE(AdobePassIE): _VALID_URL = r'https?://(?:www\.)?fox\.com/watch/(?P<id>[0-9]+)' _TEST = { 'url': 'http://www.fox.com/watch/255180355939/7684182528', @@ -30,14 +30,26 @@ class FOXIE(InfoExtractor): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - release_url = self._parse_json(self._search_regex( - r'"fox_pdk_player"\s*:\s*({[^}]+?})', webpage, 'fox_pdk_player'), - video_id)['release_url'] + settings = self._parse_json(self._search_regex( + r'jQuery\.extend\(Drupal\.settings\s*,\s*({.+?})\);', + webpage, 'drupal settings'), video_id) + fox_pdk_player = settings['fox_pdk_player'] + release_url = fox_pdk_player['release_url'] + query = { + 'mbr': 'true', + 'switch': 'http' + } + if fox_pdk_player.get('access') == 'locked': + ap_p = settings['foxAdobePassProvider'] + rating = ap_p.get('videoRating') + if rating == 'n/a': + rating = None + resource = self._get_mvpd_resource('fbc-fox', None, ap_p['videoGUID'], rating) + query['auth'] = self._extract_mvpd_auth(url, video_id, 'fbc-fox', resource) return { '_type': 'url_transparent', 'ie_key': 'ThePlatform', - 'url': smuggle_url(update_url_query( - release_url, {'switch': 'http'}), {'force_smil_url': True}), + 'url': smuggle_url(update_url_query(release_url, query), {'force_smil_url': True}), 'id': video_id, } From 4bfd294e2f83301921494c02e497cccf1a26cfd5 Mon Sep 17 00:00:00 2001 From: coolsa <noob.cloud@gmail.com> Date: Sun, 18 Sep 2016 03:53:05 -0600 Subject: [PATCH 0192/1571] [soundcloud] Extract license metadata --- youtube_dl/extractor/soundcloud.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/soundcloud.py b/youtube_dl/extractor/soundcloud.py index 9635c2b49..47b84809f 100644 --- a/youtube_dl/extractor/soundcloud.py +++ b/youtube_dl/extractor/soundcloud.py @@ -53,6 +53,7 @@ class SoundcloudIE(InfoExtractor): 'uploader': 'E.T. ExTerrestrial Music', 'title': 'Lostin Powers - She so Heavy (SneakPreview) Adrian Ackers Blueprint 1', 'duration': 143, + 'license': 'all-rights-reserved', } }, # not streamable song @@ -66,6 +67,7 @@ class SoundcloudIE(InfoExtractor): 'uploader': 'The Royal Concept', 'upload_date': '20120521', 'duration': 227, + 'license': 'all-rights-reserved', }, 'params': { # rtmp @@ -84,6 +86,7 @@ class SoundcloudIE(InfoExtractor): 'description': 'test chars: \"\'/\\ä↭', 'upload_date': '20131209', 'duration': 9, + 'license': 'all-rights-reserved', }, }, # private link (alt format) @@ -98,6 +101,7 @@ class SoundcloudIE(InfoExtractor): 'description': 'test chars: \"\'/\\ä↭', 'upload_date': '20131209', 'duration': 9, + 'license': 'all-rights-reserved', }, }, # downloadable song @@ -112,6 +116,7 @@ class SoundcloudIE(InfoExtractor): 'uploader': 'oddsamples', 'upload_date': '20140109', 'duration': 17, + 'license': 'cc-by-sa', }, }, ] @@ -138,8 +143,8 @@ class SoundcloudIE(InfoExtractor): name = full_title or track_id if quiet: self.report_extraction(name) - thumbnail = info['artwork_url'] + track_license = info['license'] if thumbnail is not None: thumbnail = thumbnail.replace('-large', '-t500x500') ext = 'mp3' @@ -152,6 +157,7 @@ class SoundcloudIE(InfoExtractor): 'thumbnail': thumbnail, 'duration': int_or_none(info.get('duration'), 1000), 'webpage_url': info.get('permalink_url'), + 'license': track_license, } formats = [] if info.get('downloadable', False): @@ -222,6 +228,7 @@ class SoundcloudIE(InfoExtractor): track_id = mobj.group('track_id') token = None + if track_id is not None: info_json_url = 'http://api.soundcloud.com/tracks/' + track_id + '.json?client_id=' + self._CLIENT_ID full_title = track_id From f62a77b99a73ed3acf8406efaa34d08c73682be3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 20 Sep 2016 21:55:57 +0700 Subject: [PATCH 0193/1571] [soundcloud] Modernize --- youtube_dl/extractor/soundcloud.py | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/youtube_dl/extractor/soundcloud.py b/youtube_dl/extractor/soundcloud.py index 47b84809f..513c54829 100644 --- a/youtube_dl/extractor/soundcloud.py +++ b/youtube_dl/extractor/soundcloud.py @@ -143,21 +143,20 @@ class SoundcloudIE(InfoExtractor): name = full_title or track_id if quiet: self.report_extraction(name) - thumbnail = info['artwork_url'] - track_license = info['license'] - if thumbnail is not None: + thumbnail = info.get('artwork_url') + if isinstance(thumbnail, compat_str): thumbnail = thumbnail.replace('-large', '-t500x500') ext = 'mp3' result = { 'id': track_id, - 'uploader': info['user']['username'], - 'upload_date': unified_strdate(info['created_at']), + 'uploader': info.get('user', {}).get('username'), + 'upload_date': unified_strdate(info.get('created_at')), 'title': info['title'], - 'description': info['description'], + 'description': info.get('description'), 'thumbnail': thumbnail, 'duration': int_or_none(info.get('duration'), 1000), 'webpage_url': info.get('permalink_url'), - 'license': track_license, + 'license': info.get('license'), } formats = [] if info.get('downloadable', False): @@ -227,7 +226,6 @@ class SoundcloudIE(InfoExtractor): raise ExtractorError('Invalid URL: %s' % url) track_id = mobj.group('track_id') - token = None if track_id is not None: info_json_url = 'http://api.soundcloud.com/tracks/' + track_id + '.json?client_id=' + self._CLIENT_ID From 1ae0ae5db0bc9c388de970c71880e2f3dc400cc3 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Tue, 20 Sep 2016 18:51:29 +0100 Subject: [PATCH 0194/1571] [cartoonnetwork] add support Adobe Pass auth --- youtube_dl/extractor/cartoonnetwork.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/youtube_dl/extractor/cartoonnetwork.py b/youtube_dl/extractor/cartoonnetwork.py index 688a6375e..086ec90c9 100644 --- a/youtube_dl/extractor/cartoonnetwork.py +++ b/youtube_dl/extractor/cartoonnetwork.py @@ -33,4 +33,10 @@ class CartoonNetworkIE(TurnerBaseIE): 'media_src': 'http://androidhls-secure.cdn.turner.com/toon/big', 'tokenizer_src': 'http://www.cartoonnetwork.com/cntv/mvpd/processors/services/token_ipadAdobe.do', }, + }, { + 'url': url, + 'site_name': 'CartoonNetwork', + 'auth_required': self._search_regex( + r'_cnglobal\.cvpFullOrPreviewAuth\s*=\s*(true|false);', + webpage, 'auth required', default='false') == 'true', }) From 3a5a18705f2a7faf64a4b69665511ef5f0c6084d Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Wed, 21 Sep 2016 15:56:31 +0100 Subject: [PATCH 0195/1571] [adobepass] add support MSO that depend on watchTVeverywhere(closes #10709) --- youtube_dl/extractor/adobepass.py | 1264 ++++++++++++++++++++++++++++- 1 file changed, 1259 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/adobepass.py b/youtube_dl/extractor/adobepass.py index c787e0962..8f7ed6ef2 100644 --- a/youtube_dl/extractor/adobepass.py +++ b/youtube_dl/extractor/adobepass.py @@ -17,15 +17,1269 @@ from ..utils import ( MSO_INFO = { 'DTV': { - 'name': 'DirecTV', + 'name': 'DIRECTV', 'username_field': 'username', 'password_field': 'password', }, 'Rogers': { - 'name': 'Rogers Cable', + 'name': 'Rogers', 'username_field': 'UserName', 'password_field': 'UserPassword', }, + 'thr030': { + 'name': '3 Rivers Communications' + }, + 'com140': { + 'name': 'Access Montana' + }, + 'acecommunications': { + 'name': 'AcenTek' + }, + 'acm010': { + 'name': 'Acme Communications' + }, + 'ada020': { + 'name': 'Adams Cable Service' + }, + 'alb020': { + 'name': 'Albany Mutual Telephone' + }, + 'algona': { + 'name': 'Algona Municipal Utilities' + }, + 'allwest': { + 'name': 'All West Communications' + }, + 'all025': { + 'name': 'Allen\'s Communications' + }, + 'spl010': { + 'name': 'Alliance Communications' + }, + 'all070': { + 'name': 'ALLO Communications' + }, + 'alpine': { + 'name': 'Alpine Communications' + }, + 'hun015': { + 'name': 'American Broadband' + }, + 'nwc010': { + 'name': 'American Broadband Missouri' + }, + 'com130-02': { + 'name': 'American Community Networks' + }, + 'com130-01': { + 'name': 'American Warrior Networks' + }, + 'tom020': { + 'name': 'Amherst Telephone/Tomorrow Valley' + }, + 'tvc020': { + 'name': 'Andycable' + }, + 'arkwest': { + 'name': 'Arkwest Communications' + }, + 'art030': { + 'name': 'Arthur Mutual Telephone Company' + }, + 'arvig': { + 'name': 'Arvig' + }, + 'nttcash010': { + 'name': 'Ashland Home Net' + }, + 'astound': { + 'name': 'Astound (now Wave)' + }, + 'dix030': { + 'name': 'ATC Broadband' + }, + 'ara010': { + 'name': 'ATC Communications' + }, + 'she030-02': { + 'name': 'Ayersville Communications' + }, + 'baldwin': { + 'name': 'Baldwin Lightstream' + }, + 'bal040': { + 'name': 'Ballard TV' + }, + 'cit025': { + 'name': 'Bardstown Cable TV' + }, + 'bay030': { + 'name': 'Bay Country Communications' + }, + 'tel095': { + 'name': 'Beaver Creek Cooperative Telephone' + }, + 'bea020': { + 'name': 'Beaver Valley Cable' + }, + 'bee010': { + 'name': 'Bee Line Cable' + }, + 'wir030': { + 'name': 'Beehive Broadband' + }, + 'bra020': { + 'name': 'BELD' + }, + 'bel020': { + 'name': 'Bellevue Municipal Cable' + }, + 'vol040-01': { + 'name': 'Ben Lomand Connect / BLTV' + }, + 'bev010': { + 'name': 'BEVCOMM' + }, + 'big020': { + 'name': 'Big Sandy Broadband' + }, + 'ble020': { + 'name': 'Bledsoe Telephone Cooperative' + }, + 'bvt010': { + 'name': 'Blue Valley Tele-Communications' + }, + 'bra050': { + 'name': 'Brandenburg Telephone Co.' + }, + 'bte010': { + 'name': 'Bristol Tennessee Essential Services' + }, + 'annearundel': { + 'name': 'Broadstripe' + }, + 'btc010': { + 'name': 'BTC Communications' + }, + 'btc040': { + 'name': 'BTC Vision - Nahunta' + }, + 'bul010': { + 'name': 'Bulloch Telephone Cooperative' + }, + 'but010': { + 'name': 'Butler-Bremer Communications' + }, + 'tel160-csp': { + 'name': 'C Spire SNAP' + }, + 'csicable': { + 'name': 'Cable Services Inc.' + }, + 'cableamerica': { + 'name': 'CableAmerica' + }, + 'cab038': { + 'name': 'CableSouth Media 3' + }, + 'weh010-camtel': { + 'name': 'Cam-Tel Company' + }, + 'car030': { + 'name': 'Cameron Communications' + }, + 'canbytel': { + 'name': 'Canby Telcom' + }, + 'crt020': { + 'name': 'CapRock Tv' + }, + 'car050': { + 'name': 'Carnegie Cable' + }, + 'cas': { + 'name': 'CAS Cable' + }, + 'casscomm': { + 'name': 'CASSCOMM' + }, + 'mid180-02': { + 'name': 'Catalina Broadband Solutions' + }, + 'cccomm': { + 'name': 'CC Communications' + }, + 'nttccde010': { + 'name': 'CDE Lightband' + }, + 'cfunet': { + 'name': 'Cedar Falls Utilities' + }, + 'dem010-01': { + 'name': 'Celect-Bloomer Telephone Area' + }, + 'dem010-02': { + 'name': 'Celect-Bruce Telephone Area' + }, + 'dem010-03': { + 'name': 'Celect-Citizens Connected Area' + }, + 'dem010-04': { + 'name': 'Celect-Elmwood/Spring Valley Area' + }, + 'dem010-06': { + 'name': 'Celect-Mosaic Telecom' + }, + 'dem010-05': { + 'name': 'Celect-West WI Telephone Area' + }, + 'net010-02': { + 'name': 'Cellcom/Nsight Telservices' + }, + 'cen100': { + 'name': 'CentraCom' + }, + 'nttccst010': { + 'name': 'Central Scott / CSTV' + }, + 'cha035': { + 'name': 'Chaparral CableVision' + }, + 'cha050': { + 'name': 'Chariton Valley Communication Corporation, Inc.' + }, + 'cha060': { + 'name': 'Chatmoss Cablevision' + }, + 'nttcche010': { + 'name': 'Cherokee Communications' + }, + 'che050': { + 'name': 'Chesapeake Bay Communications' + }, + 'cimtel': { + 'name': 'Cim-Tel Cable, LLC.' + }, + 'cit180': { + 'name': 'Citizens Cablevision - Floyd, VA' + }, + 'cit210': { + 'name': 'Citizens Cablevision, Inc.' + }, + 'cit040': { + 'name': 'Citizens Fiber' + }, + 'cit250': { + 'name': 'Citizens Mutual' + }, + 'war040': { + 'name': 'Citizens Telephone Corporation' + }, + 'wat025': { + 'name': 'City Of Monroe' + }, + 'wadsworth': { + 'name': 'CityLink' + }, + 'nor100': { + 'name': 'CL Tel' + }, + 'cla010': { + 'name': 'Clarence Telephone and Cedar Communications' + }, + 'ser060': { + 'name': 'Clear Choice Communications' + }, + 'tac020': { + 'name': 'Click! Cable TV' + }, + 'war020': { + 'name': 'CLICK1.NET' + }, + 'cml010': { + 'name': 'CML Telephone Cooperative Association' + }, + 'cns': { + 'name': 'CNS' + }, + 'com160': { + 'name': 'Co-Mo Connect' + }, + 'coa020': { + 'name': 'Coast Communications' + }, + 'coa030': { + 'name': 'Coaxial Cable TV' + }, + 'mid055': { + 'name': 'Cobalt TV (Mid-State Community TV)' + }, + 'col070': { + 'name': 'Columbia Power & Water Systems' + }, + 'col080': { + 'name': 'Columbus Telephone' + }, + 'nor105': { + 'name': 'Communications 1 Cablevision, Inc.' + }, + 'com150': { + 'name': 'Community Cable & Broadband' + }, + 'com020': { + 'name': 'Community Communications Company' + }, + 'coy010': { + 'name': 'commZoom' + }, + 'com025': { + 'name': 'Complete Communication Services' + }, + 'cat020': { + 'name': 'Comporium' + }, + 'com071': { + 'name': 'ComSouth Telesys' + }, + 'consolidatedcable': { + 'name': 'Consolidated' + }, + 'conwaycorp': { + 'name': 'Conway Corporation' + }, + 'coo050': { + 'name': 'Coon Valley Telecommunications Inc' + }, + 'coo080': { + 'name': 'Cooperative Telephone Company' + }, + 'cpt010': { + 'name': 'CP-TEL' + }, + 'cra010': { + 'name': 'Craw-Kan Telephone' + }, + 'crestview': { + 'name': 'Crestview Cable Communications' + }, + 'cross': { + 'name': 'Cross TV' + }, + 'cro030': { + 'name': 'Crosslake Communications' + }, + 'ctc040': { + 'name': 'CTC - Brainerd MN' + }, + 'phe030': { + 'name': 'CTV-Beam - East Alabama' + }, + 'cun010': { + 'name': 'Cunningham Telephone & Cable' + }, + 'dpc010': { + 'name': 'D & P Communications' + }, + 'dak030': { + 'name': 'Dakota Central Telecommunications' + }, + 'nttcdel010': { + 'name': 'Delcambre Telephone LLC' + }, + 'tel160-del': { + 'name': 'Delta Telephone Company' + }, + 'sal040': { + 'name': 'DiamondNet' + }, + 'ind060-dc': { + 'name': 'Direct Communications' + }, + 'doy010': { + 'name': 'Doylestown Cable TV' + }, + 'dic010': { + 'name': 'DRN' + }, + 'dtc020': { + 'name': 'DTC' + }, + 'dtc010': { + 'name': 'DTC Cable (Delhi)' + }, + 'dum010': { + 'name': 'Dumont Telephone Company' + }, + 'dun010': { + 'name': 'Dunkerton Telephone Cooperative' + }, + 'cci010': { + 'name': 'Duo County Telecom' + }, + 'eagle': { + 'name': 'Eagle Communications' + }, + 'weh010-east': { + 'name': 'East Arkansas Cable TV' + }, + 'eatel': { + 'name': 'EATEL Video, LLC' + }, + 'ell010': { + 'name': 'ECTA' + }, + 'emerytelcom': { + 'name': 'Emery Telcom Video LLC' + }, + 'nor200': { + 'name': 'Empire Access' + }, + 'endeavor': { + 'name': 'Endeavor Communications' + }, + 'sun045': { + 'name': 'Enhanced Telecommunications Corporation' + }, + 'mid030': { + 'name': 'enTouch' + }, + 'epb020': { + 'name': 'EPB Smartnet' + }, + 'jea010': { + 'name': 'EPlus Broadband' + }, + 'com065': { + 'name': 'ETC' + }, + 'ete010': { + 'name': 'Etex Communications' + }, + 'fbc-tele': { + 'name': 'F&B Communications' + }, + 'fal010': { + 'name': 'Falcon Broadband' + }, + 'fam010': { + 'name': 'FamilyView CableVision' + }, + 'far020': { + 'name': 'Farmers Mutual Telephone Company' + }, + 'fay010': { + 'name': 'Fayetteville Public Utilities' + }, + 'sal060': { + 'name': 'fibrant' + }, + 'fid010': { + 'name': 'Fidelity Communications' + }, + 'for030': { + 'name': 'FJ Communications' + }, + 'fli020': { + 'name': 'Flint River Communications' + }, + 'far030': { + 'name': 'FMT - Jesup' + }, + 'foo010': { + 'name': 'Foothills Communications' + }, + 'for080': { + 'name': 'Forsyth CableNet' + }, + 'fbcomm': { + 'name': 'Frankfort Plant Board' + }, + 'tel160-fra': { + 'name': 'Franklin Telephone Company' + }, + 'nttcftc010': { + 'name': 'FTC' + }, + 'fullchannel': { + 'name': 'Full Channel, Inc.' + }, + 'gar040': { + 'name': 'Gardonville Cooperative Telephone Association' + }, + 'gbt010': { + 'name': 'GBT Communications, Inc.' + }, + 'tec010': { + 'name': 'Genuine Telecom' + }, + 'clr010': { + 'name': 'Giant Communications' + }, + 'gla010': { + 'name': 'Glasgow EPB' + }, + 'gle010': { + 'name': 'Glenwood Telecommunications' + }, + 'gra060': { + 'name': 'GLW Broadband Inc.' + }, + 'goldenwest': { + 'name': 'Golden West Cablevision' + }, + 'vis030': { + 'name': 'Grantsburg Telcom' + }, + 'gpcom': { + 'name': 'Great Plains Communications' + }, + 'gri010': { + 'name': 'Gridley Cable Inc' + }, + 'hbc010': { + 'name': 'H&B Cable Services' + }, + 'hae010': { + 'name': 'Haefele TV Inc.' + }, + 'htc010': { + 'name': 'Halstad Telephone Company' + }, + 'har005': { + 'name': 'Harlan Municipal Utilities' + }, + 'har020': { + 'name': 'Hart Communications' + }, + 'ced010': { + 'name': 'Hartelco TV' + }, + 'hea040': { + 'name': 'Heart of Iowa Communications Cooperative' + }, + 'htc020': { + 'name': 'Hickory Telephone Company' + }, + 'nttchig010': { + 'name': 'Highland Communication Services' + }, + 'hig030': { + 'name': 'Highland Media' + }, + 'spc010': { + 'name': 'Hilliary Communications' + }, + 'hin020': { + 'name': 'Hinton CATV Co.' + }, + 'hometel': { + 'name': 'HomeTel Entertainment, Inc.' + }, + 'hoodcanal': { + 'name': 'Hood Canal Communications' + }, + 'weh010-hope': { + 'name': 'Hope - Prescott Cable TV' + }, + 'horizoncable': { + 'name': 'Horizon Cable TV, Inc.' + }, + 'hor040': { + 'name': 'Horizon Chillicothe Telephone' + }, + 'htc030': { + 'name': 'HTC Communications Co. - IL' + }, + 'htccomm': { + 'name': 'HTC Communications, Inc. - IA' + }, + 'wal005': { + 'name': 'Huxley Communications' + }, + 'imon': { + 'name': 'ImOn Communications' + }, + 'ind040': { + 'name': 'Independence Telecommunications' + }, + 'rrc010': { + 'name': 'Inland Networks' + }, + 'stc020': { + 'name': 'Innovative Cable TV St Croix' + }, + 'car100': { + 'name': 'Innovative Cable TV St Thomas-St John' + }, + 'icc010': { + 'name': 'Inside Connect Cable' + }, + 'int100': { + 'name': 'Integra Telecom' + }, + 'int050': { + 'name': 'Interstate Telecommunications Coop' + }, + 'irv010': { + 'name': 'Irvine Cable' + }, + 'k2c010': { + 'name': 'K2 Communications' + }, + 'kal010': { + 'name': 'Kalida Telephone Company, Inc.' + }, + 'kal030': { + 'name': 'Kalona Cooperative Telephone Company' + }, + 'kmt010': { + 'name': 'KMTelecom' + }, + 'kpu010': { + 'name': 'KPU Telecommunications' + }, + 'kuh010': { + 'name': 'Kuhn Communications, Inc.' + }, + 'lak130': { + 'name': 'Lakeland Communications' + }, + 'lan010': { + 'name': 'Langco' + }, + 'lau020': { + 'name': 'Laurel Highland Total Communications, Inc.' + }, + 'leh010': { + 'name': 'Lehigh Valley Cooperative Telephone' + }, + 'bra010': { + 'name': 'Limestone Cable/Bracken Cable' + }, + 'loc020': { + 'name': 'LISCO' + }, + 'lit020': { + 'name': 'Litestream' + }, + 'tel140': { + 'name': 'LivCom' + }, + 'loc010': { + 'name': 'LocalTel Communications' + }, + 'weh010-longview': { + 'name': 'Longview - Kilgore Cable TV' + }, + 'lon030': { + 'name': 'Lonsdale Video Ventures, LLC' + }, + 'lns010': { + 'name': 'Lost Nation-Elwood Telephone Co.' + }, + 'nttclpc010': { + 'name': 'LPC Connect' + }, + 'lumos': { + 'name': 'Lumos Networks' + }, + 'madison': { + 'name': 'Madison Communications' + }, + 'mad030': { + 'name': 'Madison County Cable Inc.' + }, + 'nttcmah010': { + 'name': 'Mahaska Communication Group' + }, + 'mar010': { + 'name': 'Marne & Elk Horn Telephone Company' + }, + 'mcc040': { + 'name': 'McClure Telephone Co.' + }, + 'mctv': { + 'name': 'MCTV' + }, + 'merrimac': { + 'name': 'Merrimac Communications Ltd.' + }, + 'metronet': { + 'name': 'Metronet' + }, + 'mhtc': { + 'name': 'MHTC' + }, + 'midhudson': { + 'name': 'Mid-Hudson Cable' + }, + 'midrivers': { + 'name': 'Mid-Rivers Communications' + }, + 'mid045': { + 'name': 'Midstate Communications' + }, + 'mil080': { + 'name': 'Milford Communications' + }, + 'min030': { + 'name': 'MINET' + }, + 'nttcmin010': { + 'name': 'Minford TV' + }, + 'san040-02': { + 'name': 'Mitchell Telecom' + }, + 'mlg010': { + 'name': 'MLGC' + }, + 'mon060': { + 'name': 'Mon-Cre TVE' + }, + 'mou110': { + 'name': 'Mountain Telephone' + }, + 'mou050': { + 'name': 'Mountain Village Cable' + }, + 'mtacomm': { + 'name': 'MTA Communications, LLC' + }, + 'mtc010': { + 'name': 'MTC Cable' + }, + 'med040': { + 'name': 'MTC Technologies' + }, + 'man060': { + 'name': 'MTCC' + }, + 'mtc030': { + 'name': 'MTCO Communications' + }, + 'mul050': { + 'name': 'Mulberry Telecommunications' + }, + 'mur010': { + 'name': 'Murray Electric System' + }, + 'musfiber': { + 'name': 'MUS FiberNET' + }, + 'mpw': { + 'name': 'Muscatine Power & Water' + }, + 'nttcsli010': { + 'name': 'myEVTV.com' + }, + 'nor115': { + 'name': 'NCC' + }, + 'nor260': { + 'name': 'NDTC' + }, + 'nctc': { + 'name': 'Nebraska Central Telecom, Inc.' + }, + 'nel020': { + 'name': 'Nelsonville TV Cable' + }, + 'nem010': { + 'name': 'Nemont' + }, + 'new075': { + 'name': 'New Hope Telephone Cooperative' + }, + 'nor240': { + 'name': 'NICP' + }, + 'cic010': { + 'name': 'NineStar Connect' + }, + 'nktelco': { + 'name': 'NKTelco' + }, + 'nortex': { + 'name': 'Nortex Communications' + }, + 'nor140': { + 'name': 'North Central Telephone Cooperative' + }, + 'nor030': { + 'name': 'Northland Communications' + }, + 'nor075': { + 'name': 'Northwest Communications' + }, + 'nor125': { + 'name': 'Norwood Light Broadband' + }, + 'net010': { + 'name': 'Nsight Telservices' + }, + 'dur010': { + 'name': 'Ntec' + }, + 'nts010': { + 'name': 'NTS Communications' + }, + 'new045': { + 'name': 'NU-Telecom' + }, + 'nulink': { + 'name': 'NuLink' + }, + 'jam030': { + 'name': 'NVC' + }, + 'far035': { + 'name': 'OmniTel Communications' + }, + 'onesource': { + 'name': 'OneSource Communications' + }, + 'cit230': { + 'name': 'Opelika Power Services' + }, + 'daltonutilities': { + 'name': 'OptiLink' + }, + 'mid140': { + 'name': 'OPTURA' + }, + 'ote010': { + 'name': 'OTEC Communication Company' + }, + 'cci020': { + 'name': 'Packerland Broadband' + }, + 'pan010': { + 'name': 'Panora Telco/Guthrie Center Communications' + }, + 'otter': { + 'name': 'Park Region Telephone & Otter Tail Telcom' + }, + 'mid050': { + 'name': 'Partner Communications Cooperative' + }, + 'fib010': { + 'name': 'Pathway' + }, + 'paulbunyan': { + 'name': 'Paul Bunyan Communications' + }, + 'pem020': { + 'name': 'Pembroke Telephone Company' + }, + 'mck010': { + 'name': 'Peoples Rural Telephone Cooperative' + }, + 'pul010': { + 'name': 'PES Energize' + }, + 'phi010': { + 'name': 'Philippi Communications System' + }, + 'phonoscope': { + 'name': 'Phonoscope Cable' + }, + 'pin070': { + 'name': 'Pine Belt Communications, Inc.' + }, + 'weh010-pine': { + 'name': 'Pine Bluff Cable TV' + }, + 'pin060': { + 'name': 'Pineland Telephone Cooperative' + }, + 'cam010': { + 'name': 'Pinpoint Communications' + }, + 'pio060': { + 'name': 'Pioneer Broadband' + }, + 'pioncomm': { + 'name': 'Pioneer Communications' + }, + 'pioneer': { + 'name': 'Pioneer DTV' + }, + 'pla020': { + 'name': 'Plant TiftNet, Inc.' + }, + 'par010': { + 'name': 'PLWC' + }, + 'pro035': { + 'name': 'PMT' + }, + 'vik011': { + 'name': 'Polar Cablevision' + }, + 'pottawatomie': { + 'name': 'Pottawatomie Telephone Co.' + }, + 'premiercomm': { + 'name': 'Premier Communications' + }, + 'psc010': { + 'name': 'PSC' + }, + 'pan020': { + 'name': 'PTCI' + }, + 'qco010': { + 'name': 'QCOL' + }, + 'qua010': { + 'name': 'Quality Cablevision' + }, + 'rad010': { + 'name': 'Radcliffe Telephone Company' + }, + 'car040': { + 'name': 'Rainbow Communications' + }, + 'rai030': { + 'name': 'Rainier Connect' + }, + 'ral010': { + 'name': 'Ralls Technologies' + }, + 'rct010': { + 'name': 'RC Technologies' + }, + 'red040': { + 'name': 'Red River Communications' + }, + 'ree010': { + 'name': 'Reedsburg Utility Commission' + }, + 'mol010': { + 'name': 'Reliance Connects- Oregon' + }, + 'res020': { + 'name': 'Reserve Telecommunications' + }, + 'weh010-resort': { + 'name': 'Resort TV Cable' + }, + 'rld010': { + 'name': 'Richland Grant Telephone Cooperative, Inc.' + }, + 'riv030': { + 'name': 'River Valley Telecommunications Coop' + }, + 'rockportcable': { + 'name': 'Rock Port Cablevision' + }, + 'rsf010': { + 'name': 'RS Fiber' + }, + 'rtc': { + 'name': 'RTC Communication Corp' + }, + 'res040': { + 'name': 'RTC-Reservation Telephone Coop.' + }, + 'rte010': { + 'name': 'RTEC Communications' + }, + 'stc010': { + 'name': 'S&T' + }, + 'san020': { + 'name': 'San Bruno Cable TV' + }, + 'san040-01': { + 'name': 'Santel' + }, + 'sav010': { + 'name': 'SCI Broadband-Savage Communications Inc.' + }, + 'sco050': { + 'name': 'Scottsboro Electric Power Board' + }, + 'scr010': { + 'name': 'Scranton Telephone Company' + }, + 'selco': { + 'name': 'SELCO' + }, + 'she010': { + 'name': 'Shentel' + }, + 'she030': { + 'name': 'Sherwood Mutual Telephone Association, Inc.' + }, + 'ind060-ssc': { + 'name': 'Silver Star Communications' + }, + 'sjoberg': { + 'name': 'Sjoberg\'s Inc.' + }, + 'sou025': { + 'name': 'SKT' + }, + 'sky050': { + 'name': 'SkyBest TV' + }, + 'nttcsmi010': { + 'name': 'Smithville Communications' + }, + 'woo010': { + 'name': 'Solarus' + }, + 'sou075': { + 'name': 'South Central Rural Telephone Cooperative' + }, + 'sou065': { + 'name': 'South Holt Cablevision, Inc.' + }, + 'sou035': { + 'name': 'South Slope Cooperative Communications' + }, + 'spa020': { + 'name': 'Spanish Fork Community Network' + }, + 'spe010': { + 'name': 'Spencer Municipal Utilities' + }, + 'spi005': { + 'name': 'Spillway Communications, Inc.' + }, + 'srt010': { + 'name': 'SRT' + }, + 'cccsmc010': { + 'name': 'St. Maarten Cable TV' + }, + 'sta025': { + 'name': 'Star Communications' + }, + 'sco020': { + 'name': 'STE' + }, + 'uin010': { + 'name': 'STRATA Networks' + }, + 'sum010': { + 'name': 'Sumner Cable TV' + }, + 'pie010': { + 'name': 'Surry TV/PCSI TV' + }, + 'swa010': { + 'name': 'Swayzee Communications' + }, + 'sweetwater': { + 'name': 'Sweetwater Cable Television Co' + }, + 'weh010-talequah': { + 'name': 'Tahlequah Cable TV' + }, + 'tct': { + 'name': 'TCT' + }, + 'tel050': { + 'name': 'Tele-Media Company' + }, + 'com050': { + 'name': 'The Community Agency' + }, + 'thr020': { + 'name': 'Three River' + }, + 'cab140': { + 'name': 'Town & Country Technologies' + }, + 'tra010': { + 'name': 'Trans-Video' + }, + 'tre010': { + 'name': 'Trenton TV Cable Company' + }, + 'tcc': { + 'name': 'Tri County Communications Cooperative' + }, + 'tri025': { + 'name': 'TriCounty Telecom' + }, + 'tri110': { + 'name': 'TrioTel Communications, Inc.' + }, + 'tro010': { + 'name': 'Troy Cablevision, Inc.' + }, + 'tsc': { + 'name': 'TSC' + }, + 'cit220': { + 'name': 'Tullahoma Utilities Board' + }, + 'tvc030': { + 'name': 'TV Cable of Rensselaer' + }, + 'tvc015': { + 'name': 'TVC Cable' + }, + 'cab180': { + 'name': 'TVision' + }, + 'twi040': { + 'name': 'Twin Lakes' + }, + 'tvtinc': { + 'name': 'Twin Valley' + }, + 'uis010': { + 'name': 'Union Telephone Company' + }, + 'uni110': { + 'name': 'United Communications - TN' + }, + 'uni120': { + 'name': 'United Services' + }, + 'uss020': { + 'name': 'US Sonet' + }, + 'cab060': { + 'name': 'USA Communications' + }, + 'she005': { + 'name': 'USA Communications/Shellsburg, IA' + }, + 'val040': { + 'name': 'Valley TeleCom Group' + }, + 'val025': { + 'name': 'Valley Telecommunications' + }, + 'val030': { + 'name': 'Valparaiso Broadband' + }, + 'cla050': { + 'name': 'Vast Broadband' + }, + 'sul015': { + 'name': 'Venture Communications Cooperative, Inc.' + }, + 'ver025': { + 'name': 'Vernon Communications Co-op' + }, + 'weh010-vicksburg': { + 'name': 'Vicksburg Video' + }, + 'vis070': { + 'name': 'Vision Communications' + }, + 'volcanotel': { + 'name': 'Volcano Vision, Inc.' + }, + 'vol040-02': { + 'name': 'VolFirst / BLTV' + }, + 'ver070': { + 'name': 'VTel' + }, + 'nttcvtx010': { + 'name': 'VTX1' + }, + 'bci010-02': { + 'name': 'Vyve Broadband' + }, + 'wab020': { + 'name': 'Wabash Mutual Telephone' + }, + 'waitsfield': { + 'name': 'Waitsfield Cable' + }, + 'wal010': { + 'name': 'Walnut Communications' + }, + 'wavebroadband': { + 'name': 'Wave' + }, + 'wav030': { + 'name': 'Waverly Communications Utility' + }, + 'wbi010': { + 'name': 'WBI' + }, + 'web020': { + 'name': 'Webster-Calhoun Cooperative Telephone Association' + }, + 'wes005': { + 'name': 'West Alabama TV Cable' + }, + 'carolinata': { + 'name': 'West Carolina Communications' + }, + 'wct010': { + 'name': 'West Central Telephone Association' + }, + 'wes110': { + 'name': 'West River Cooperative Telephone Company' + }, + 'ani030': { + 'name': 'WesTel Systems' + }, + 'westianet': { + 'name': 'Western Iowa Networks' + }, + 'nttcwhi010': { + 'name': 'Whidbey Telecom' + }, + 'weh010-white': { + 'name': 'White County Cable TV' + }, + 'wes130': { + 'name': 'Wiatel' + }, + 'wik010': { + 'name': 'Wiktel' + }, + 'wil070': { + 'name': 'Wilkes Communications, Inc./RiverStreet Networks' + }, + 'wil015': { + 'name': 'Wilson Communications' + }, + 'win010': { + 'name': 'Windomnet/SMBS' + }, + 'win090': { + 'name': 'Windstream Cable TV' + }, + 'wcta': { + 'name': 'Winnebago Cooperative Telecom Association' + }, + 'wtc010': { + 'name': 'WTC' + }, + 'wil040': { + 'name': 'WTC Communications, Inc.' + }, + 'wya010': { + 'name': 'Wyandotte Cable' + }, + 'hin020-02': { + 'name': 'X-Stream Services' + }, + 'xit010': { + 'name': 'XIT Communications' + }, + 'yel010': { + 'name': 'Yelcot Communications' + }, + 'mid180-01': { + 'name': 'yondoo' + }, + 'cou060': { + 'name': 'Zito Media' + }, } @@ -113,10 +1367,10 @@ class AdobePassIE(InfoExtractor): provider_login_page_res = post_form( provider_redirect_page_res, 'Downloading Provider Login Page') mvpd_confirm_page_res = post_form(provider_login_page_res, 'Logging in', { - mso_info['username_field']: username, - mso_info['password_field']: password, + mso_info.get('username_field', 'username'): username, + mso_info.get('password_field', 'password'): password, }) - if mso_id == 'DTV': + if mso_id != 'Rogers': post_form(mvpd_confirm_page_res, 'Confirming Login') session = self._download_webpage( From 12f211d0cbd25554ff3116ee173ffc3f25d0e453 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 21 Sep 2016 22:51:36 +0700 Subject: [PATCH 0196/1571] [videomore] Fix embed regex --- youtube_dl/extractor/videomore.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/videomore.py b/youtube_dl/extractor/videomore.py index 328b5b7fb..8a11ff848 100644 --- a/youtube_dl/extractor/videomore.py +++ b/youtube_dl/extractor/videomore.py @@ -84,7 +84,7 @@ class VideomoreIE(InfoExtractor): @staticmethod def _extract_url(webpage): mobj = re.search( - r'<object[^>]+data=(["\'])https?://videomore.ru/player\.swf\?.*config=(?P<url>https?://videomore\.ru/(?:[^/]+/)+\d+\.xml).*\1', + r'<object[^>]+data=(["\'])https?://videomore\.ru/player\.swf\?.*config=(?P<url>https?://videomore\.ru/(?:[^/]+/)+\d+\.xml).*\1', webpage) if mobj: return mobj.group('url') From 1978540a5122c53012e17a78841f3da0df77fd34 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Wed, 21 Sep 2016 21:49:52 +0100 Subject: [PATCH 0197/1571] [ooyala] extract all hls formats --- youtube_dl/extractor/ooyala.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/ooyala.py b/youtube_dl/extractor/ooyala.py index 2038a6ba5..72ec20938 100644 --- a/youtube_dl/extractor/ooyala.py +++ b/youtube_dl/extractor/ooyala.py @@ -47,7 +47,7 @@ class OoyalaBaseIE(InfoExtractor): delivery_type = stream['delivery_type'] if delivery_type == 'hls' or ext == 'm3u8': formats.extend(self._extract_m3u8_formats( - s_url, embed_code, 'mp4', 'm3u8_native', + re.sub(r'/ip(?:ad|hone)/', '/all/', s_url), embed_code, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)) elif delivery_type == 'hds' or ext == 'f4m': formats.extend(self._extract_f4m_formats( From 0a439c5c4c1a6a2ee54465c5ad893ffb768539d3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 22 Sep 2016 21:48:53 +0700 Subject: [PATCH 0198/1571] [udemy] Stringify video id --- youtube_dl/extractor/udemy.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/udemy.py b/youtube_dl/extractor/udemy.py index c2f507233..cce29c6e0 100644 --- a/youtube_dl/extractor/udemy.py +++ b/youtube_dl/extractor/udemy.py @@ -5,6 +5,7 @@ import re from .common import InfoExtractor from ..compat import ( compat_HTTPError, + compat_str, compat_urllib_request, compat_urlparse, ) @@ -207,7 +208,7 @@ class UdemyIE(InfoExtractor): if youtube_url: return self.url_result(youtube_url, 'Youtube') - video_id = asset['id'] + video_id = compat_str(asset['id']) thumbnail = asset.get('thumbnail_url') or asset.get('thumbnailUrl') duration = float_or_none(asset.get('data', {}).get('duration')) From e3d6bdc8fc48ddf0bea324c9196297e539669aaf Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Fri, 23 Sep 2016 01:11:13 +0800 Subject: [PATCH 0199/1571] [ustream] Support HLS streams (closes #10698) --- ChangeLog | 5 ++ youtube_dl/extractor/ustream.py | 122 +++++++++++++++++++++++++++++++- 2 files changed, 126 insertions(+), 1 deletion(-) diff --git a/ChangeLog b/ChangeLog index 24077c430..5122af4c0 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,8 @@ +version <unreleased> + +Extractors ++ [ustream] Support the new HLS streams (#10698) + version 2016.09.19 Extractors diff --git a/youtube_dl/extractor/ustream.py b/youtube_dl/extractor/ustream.py index a3dc9d33e..0c06bf36b 100644 --- a/youtube_dl/extractor/ustream.py +++ b/youtube_dl/extractor/ustream.py @@ -1,15 +1,20 @@ from __future__ import unicode_literals +import random import re from .common import InfoExtractor from ..compat import ( + compat_str, compat_urlparse, ) from ..utils import ( + encode_data_uri, ExtractorError, int_or_none, float_or_none, + mimetype2ext, + str_or_none, ) @@ -47,8 +52,108 @@ class UstreamIE(InfoExtractor): 'id': '10299409', }, 'playlist_count': 3, + }, { + 'url': 'http://www.ustream.tv/recorded/91343263', + 'info_dict': { + 'id': '91343263', + 'ext': 'mp4', + 'title': 'GitHub Universe - General Session - Day 1', + 'upload_date': '20160914', + 'description': 'GitHub Universe - General Session - Day 1', + 'timestamp': 1473872730, + 'uploader': 'wa0dnskeqkr', + 'uploader_id': '38977840', + }, + 'params': { + 'skip_download': True, # m3u8 download + }, }] + def _get_stream_info(self, url, video_id, app_id_ver, extra_note=None): + def num_to_hex(n): + return hex(n)[2:] + + rnd = random.randrange + + if not extra_note: + extra_note = '' + + conn_info = self._download_json( + 'http://r%d-1-%s-recorded-lp-live.ums.ustream.tv/1/ustream' % (rnd(1e8), video_id), + video_id, note='Downloading connection info' + extra_note, + query={ + 'type': 'viewer', + 'appId': app_id_ver[0], + 'appVersion': app_id_ver[1], + 'rsid': '%s:%s' % (num_to_hex(rnd(1e8)), num_to_hex(rnd(1e8))), + 'rpin': '_rpin.%d' % rnd(1e15), + 'referrer': url, + 'media': video_id, + 'application': 'recorded', + }) + host = conn_info[0]['args'][0]['host'] + connection_id = conn_info[0]['args'][0]['connectionId'] + + return self._download_json( + 'http://%s/1/ustream?connectionId=%s' % (host, connection_id), + video_id, note='Downloading stream info' + extra_note) + + def _get_streams(self, url, video_id, app_id_ver): + # Sometimes the return dict does not have 'stream' + for trial_count in range(3): + stream_info = self._get_stream_info( + url, video_id, app_id_ver, + extra_note=' (try %d)' % (trial_count + 1) if trial_count > 0 else '') + if 'stream' in stream_info[0]['args'][0]: + return stream_info[0]['args'][0]['stream'] + return [] + + def _parse_segmented_mp4(self, dash_stream_info): + def resolve_dash_template(template, idx, chunk_hash): + return template.replace('%', compat_str(idx), 1).replace('%', chunk_hash) + + formats = [] + for stream in dash_stream_info['streams']: + # Use only one provider to avoid too many formats + provider = dash_stream_info['providers'][0] + fragments = [{ + 'url': resolve_dash_template( + provider['url'] + stream['initUrl'], 0, dash_stream_info['hashes']['0']) + }] + for idx in range(dash_stream_info['videoLength'] // dash_stream_info['chunkTime']): + fragments.append({ + 'url': resolve_dash_template( + provider['url'] + stream['segmentUrl'], idx, + dash_stream_info['hashes'][compat_str(idx // 10 * 10)]) + }) + content_type = stream['contentType'] + kind = content_type.split('/')[0] + f = { + 'format_id': '-'.join(filter(None, [ + 'dash', kind, str_or_none(stream.get('bitrate'))])), + 'protocol': 'http_dash_segments', + # TODO: generate a MPD doc for external players? + 'url': encode_data_uri(b'<MPD/>', 'text/xml'), + 'ext': mimetype2ext(content_type), + 'height': stream.get('height'), + 'width': stream.get('width'), + 'fragments': fragments, + } + if kind == 'video': + f.update({ + 'vcodec': stream.get('codec'), + 'acodec': 'none', + 'vbr': stream.get('bitrate'), + }) + else: + f.update({ + 'vcodec': 'none', + 'acodec': stream.get('codec'), + 'abr': stream.get('bitrate'), + }) + formats.append(f) + return formats + def _real_extract(self, url): m = re.match(self._VALID_URL, url) video_id = m.group('id') @@ -86,7 +191,22 @@ class UstreamIE(InfoExtractor): 'url': video_url, 'ext': format_id, 'filesize': filesize, - } for format_id, video_url in video['media_urls'].items()] + } for format_id, video_url in video['media_urls'].items() if video_url] + + if not formats: + hls_streams = self._get_streams(url, video_id, app_id_ver=(11, 2)) + if hls_streams: + # m3u8_native leads to intermittent ContentTooShortError + formats.extend(self._extract_m3u8_formats( + hls_streams[0]['url'], video_id, ext='mp4', m3u8_id='hls')) + + ''' + # DASH streams handling is incomplete as 'url' is missing + dash_streams = self._get_streams(url, video_id, app_id_ver=(3, 1)) + if dash_streams: + formats.extend(self._parse_segmented_mp4(dash_streams)) + ''' + self._sort_formats(formats) description = video.get('description') From 628406db960c032eb68ef318ce9fecf6b8329834 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Fri, 23 Sep 2016 01:13:56 +0800 Subject: [PATCH 0200/1571] [Makefile] Cleanup files from fragment-based downloaders --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 354052c50..ac234fcb0 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ all: youtube-dl README.md CONTRIBUTING.md README.txt youtube-dl.1 youtube-dl.bash-completion youtube-dl.zsh youtube-dl.fish supportedsites clean: - rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish youtube_dl/extractor/lazy_extractors.py *.dump *.part *.info.json *.mp4 *.m4a *.flv *.mp3 *.avi *.mkv *.webm *.jpg *.png CONTRIBUTING.md.tmp ISSUE_TEMPLATE.md.tmp youtube-dl youtube-dl.exe + rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish youtube_dl/extractor/lazy_extractors.py *.dump *.part* *.info.json *.mp4 *.m4a *.flv *.mp3 *.avi *.mkv *.webm *.jpg *.png CONTRIBUTING.md.tmp ISSUE_TEMPLATE.md.tmp youtube-dl youtube-dl.exe find . -name "*.pyc" -delete find . -name "*.class" -delete From 4ddcb5999d0323fb83c5b879127d31763f5d63e2 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Fri, 23 Sep 2016 01:47:01 +0800 Subject: [PATCH 0201/1571] [openload] Fix extraction (closes #10408, closes #10727) Thanks to @daniel100097 for providing a working version --- ChangeLog | 1 + youtube_dl/extractor/openload.py | 5 +++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/ChangeLog b/ChangeLog index 5122af4c0..6c72bae90 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,6 +1,7 @@ version <unreleased> Extractors +* [openload] Fix extraction (#10408) + [ustream] Support the new HLS streams (#10698) version 2016.09.19 diff --git a/youtube_dl/extractor/openload.py b/youtube_dl/extractor/openload.py index c261a7455..b6e3ac250 100644 --- a/youtube_dl/extractor/openload.py +++ b/youtube_dl/extractor/openload.py @@ -51,7 +51,8 @@ class OpenloadIE(InfoExtractor): # declared to be freely used in youtube-dl # See https://github.com/rg3/youtube-dl/issues/10408 enc_data = self._html_search_regex( - r'<span[^>]+id="hiddenurl"[^>]*>([^<]+)</span>', webpage, 'encrypted data') + r'<span[^>]*>([^<]+)</span>\s*<span[^>]*>[^<]+</span>\s*<span[^>]+id="streamurl"', + webpage, 'encrypted data') video_url_chars = [] @@ -60,7 +61,7 @@ class OpenloadIE(InfoExtractor): if j >= 33 and j <= 126: j = ((j + 14) % 94) + 33 if idx == len(enc_data) - 1: - j += 3 + j += 2 video_url_chars += compat_chr(j) video_url = 'https://openload.co/stream/%s?mime=true' % ''.join(video_url_chars) From 45cae3b021828cc6f7a67c7a14645ae6f0806f59 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Thu, 22 Sep 2016 19:27:57 +0100 Subject: [PATCH 0202/1571] [cbs] extract info from thunder videoPlayerService(closes #10728) --- youtube_dl/extractor/cbs.py | 58 ++++++++++++++++++++++++++----------- 1 file changed, 41 insertions(+), 17 deletions(-) diff --git a/youtube_dl/extractor/cbs.py b/youtube_dl/extractor/cbs.py index 3f4dea40c..58f258c54 100644 --- a/youtube_dl/extractor/cbs.py +++ b/youtube_dl/extractor/cbs.py @@ -4,7 +4,9 @@ from .theplatform import ThePlatformFeedIE from ..utils import ( int_or_none, find_xpath_attr, - ExtractorError, + xpath_element, + xpath_text, + update_url_query, ) @@ -47,27 +49,49 @@ class CBSIE(CBSBaseIE): 'only_matching': True, }] - def _extract_video_info(self, guid): - path = 'dJ5BDC/media/guid/2198311517/' + guid - smil_url = 'http://link.theplatform.com/s/%s?mbr=true' % path - formats, subtitles = self._extract_theplatform_smil(smil_url + '&manifest=m3u', guid) - for r in ('OnceURL&formats=M3U', 'HLS&formats=M3U', 'RTMP', 'WIFI', '3G'): - try: - tp_formats, _ = self._extract_theplatform_smil(smil_url + '&assetTypes=' + r, guid, 'Downloading %s SMIL data' % r.split('&')[0]) - formats.extend(tp_formats) - except ExtractorError: + def _extract_video_info(self, content_id): + items_data = self._download_xml( + 'http://can.cbs.com/thunder/player/videoPlayerService.php', + content_id, query={'partner': 'cbs', 'contentId': content_id}) + video_data = xpath_element(items_data, './/item') + title = xpath_text(video_data, 'videoTitle', 'title', True) + tp_path = 'dJ5BDC/media/guid/2198311517/%s' % content_id + tp_release_url = 'http://link.theplatform.com/s/' + tp_path + + asset_types = [] + subtitles = {} + formats = [] + for item in items_data.findall('.//item'): + asset_type = xpath_text(item, 'assetType') + if not asset_type or asset_type in asset_types: continue + asset_types.append(asset_type) + query = { + 'mbr': 'true', + 'assetTypes': asset_type, + } + if asset_type.startswith('HLS') or asset_type in ('OnceURL', 'StreamPack'): + query['formats'] = 'MPEG4,M3U' + elif asset_type in ('RTMP', 'WIFI', '3G'): + query['formats'] = 'MPEG4,FLV' + tp_formats, tp_subtitles = self._extract_theplatform_smil( + update_url_query(tp_release_url, query), content_id, + 'Downloading %s SMIL data' % asset_type) + formats.extend(tp_formats) + subtitles = self._merge_subtitles(subtitles, tp_subtitles) self._sort_formats(formats) - metadata = self._download_theplatform_metadata(path, guid) - info = self._parse_theplatform_metadata(metadata) + + info = self._extract_theplatform_metadata(tp_path, content_id) info.update({ - 'id': guid, + 'id': content_id, + 'title': title, + 'series': xpath_text(video_data, 'seriesTitle'), + 'season_number': int_or_none(xpath_text(video_data, 'seasonNumber')), + 'episode_number': int_or_none(xpath_text(video_data, 'episodeNumber')), + 'duration': int_or_none(xpath_text(video_data, 'videoLength'), 1000), + 'thumbnail': xpath_text(video_data, 'previewImageURL'), 'formats': formats, 'subtitles': subtitles, - 'series': metadata.get('cbs$SeriesTitle'), - 'season_number': int_or_none(metadata.get('cbs$SeasonNumber')), - 'episode': metadata.get('cbs$EpisodeTitle'), - 'episode_number': int_or_none(metadata.get('cbs$EpisodeNumber')), }) return info From 71ad00c09fecd3ecc84784cf215537cad0a79595 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 23 Sep 2016 21:08:16 +0700 Subject: [PATCH 0203/1571] [prosiebensat1] Add support for kabeleinsdoku (Closes #10732) --- youtube_dl/extractor/prosiebensat1.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/prosiebensat1.py b/youtube_dl/extractor/prosiebensat1.py index 7335dc2af..5a29b844d 100644 --- a/youtube_dl/extractor/prosiebensat1.py +++ b/youtube_dl/extractor/prosiebensat1.py @@ -122,7 +122,7 @@ class ProSiebenSat1BaseIE(InfoExtractor): class ProSiebenSat1IE(ProSiebenSat1BaseIE): IE_NAME = 'prosiebensat1' IE_DESC = 'ProSiebenSat.1 Digital' - _VALID_URL = r'https?://(?:www\.)?(?:(?:prosieben|prosiebenmaxx|sixx|sat1|kabeleins|the-voice-of-germany|7tv)\.(?:de|at|ch)|ran\.de|fem\.com)/(?P<id>.+)' + _VALID_URL = r'https?://(?:www\.)?(?:(?:prosieben|prosiebenmaxx|sixx|sat1|kabeleins|the-voice-of-germany|7tv|kabeleinsdoku)\.(?:de|at|ch)|ran\.de|fem\.com)/(?P<id>.+)' _TESTS = [ { @@ -290,6 +290,11 @@ class ProSiebenSat1IE(ProSiebenSat1BaseIE): 'skip_download': True, }, }, + { + # geo restricted to Germany + 'url': 'http://www.kabeleinsdoku.de/tv/mayday-alarm-im-cockpit/video/102-notlandung-im-hudson-river-ganze-folge', + 'only_matching': True, + }, ] _TOKEN = 'prosieben' From 24628cf7db46ecce3fe56d387266c556cd9210ba Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 24 Sep 2016 02:01:01 +0700 Subject: [PATCH 0204/1571] [soundcloud:playlist] Provide video id for playlist entries (Closes #10733) --- youtube_dl/extractor/soundcloud.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/soundcloud.py b/youtube_dl/extractor/soundcloud.py index 513c54829..496cc5d8e 100644 --- a/youtube_dl/extractor/soundcloud.py +++ b/youtube_dl/extractor/soundcloud.py @@ -477,7 +477,11 @@ class SoundcloudPlaylistIE(SoundcloudIE): data = self._download_json( base_url + data, playlist_id, 'Downloading playlist') - entries = [self.url_result(track['permalink_url'], 'Soundcloud') for track in data['tracks']] + entries = [ + self.url_result( + track['permalink_url'], SoundcloudIE.ie_key(), + video_id=compat_str(track['id']) if track.get('id') else None) + for track in data['tracks'] if track.get('permalink_url')] return { '_type': 'playlist', From 8eec691e8a89d0094b806b86111fbcfd0ade64c9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 24 Sep 2016 02:12:49 +0700 Subject: [PATCH 0205/1571] [ChangeLog] Actualize --- ChangeLog | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/ChangeLog b/ChangeLog index 6c72bae90..e0908aa30 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,8 +1,23 @@ version <unreleased> +Core ++ Add support for watchTVeverywhere.com authentication provider based MSOs for + Adobe Pass authentication (#10709) + Extractors ++ [soundcloud:playlist] Provide video id for early playlist entries (#10733) ++ [prosiebensat1] Add support for kabeleinsdoku (#10732) +* [cbs] Extract info from thunder videoPlayerService (#10728) * [openload] Fix extraction (#10408) + [ustream] Support the new HLS streams (#10698) ++ [ooyala] Extract all HLS formats ++ [cartoonnetwork] Add support for Adobe Pass authentication ++ [soundcloud] Extract license metadata ++ [fox] Add support for Adobe Pass authentication (#8584) ++ [tbs] Add support for Adobe Pass authentication (#10642, #10222) ++ [trutv] Add support for Adobe Pass authentication (#10519) ++ [turner] Add support for Adobe Pass authentication + version 2016.09.19 From e6332059ac66bfc91ed18e5b15d9238e4283ee7a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 24 Sep 2016 02:16:47 +0700 Subject: [PATCH 0206/1571] release 2016.09.24 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- youtube_dl/version.py | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 8b28d784a..7669ab9b7 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.09.19*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.09.19** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.09.24*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.09.24** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v <your command line> [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2016.09.19 +[debug] youtube-dl version 2016.09.24 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index e0908aa30..a1c4df479 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version <unreleased> +version 2016.09.24 Core + Add support for watchTVeverywhere.com authentication provider based MSOs for diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 9d3138181..2af6380b8 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2016.09.19' +__version__ = '2016.09.24' From 5968d7d2fe619e85eb424d6e47d000f0b295d4a2 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sat, 24 Sep 2016 14:20:42 +0800 Subject: [PATCH 0207/1571] [extractor/common] Improved support for HTML5 subtitles Ref: #10625 In a strict sense, <track>s with kind=captions are not subtitles. [1] openload misuses this attribute, and I guess there will be more examples, so I add it to common.py. Also allow extracting information for subtitles-only <video> or <audio> tags, which is the case of openload. [1] https://www.w3.org/TR/html5/embedded-content-0.html#attr-track-kind --- ChangeLog | 6 ++++++ youtube_dl/extractor/common.py | 4 ++-- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/ChangeLog b/ChangeLog index a1c4df479..ebe4ff0e8 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +vesion <unreleased> + +Core ++ Improved support for HTML5 subtitles + + version 2016.09.24 Core diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 9c8991542..5cb4479ec 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1828,7 +1828,7 @@ class InfoExtractor(object): for track_tag in re.findall(r'<track[^>]+>', media_content): track_attributes = extract_attributes(track_tag) kind = track_attributes.get('kind') - if not kind or kind == 'subtitles': + if not kind or kind in ('subtitles', 'captions'): src = track_attributes.get('src') if not src: continue @@ -1836,7 +1836,7 @@ class InfoExtractor(object): media_info['subtitles'].setdefault(lang, []).append({ 'url': absolute_url(src), }) - if media_info['formats']: + if media_info['formats'] or media_info['subtitles']: entries.append(media_info) return entries From 0711995bcac2f44e09a943521dceb1c54bf8ffb7 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sat, 24 Sep 2016 14:27:08 +0800 Subject: [PATCH 0208/1571] [openload] Support subtitles (closes #10625) --- ChangeLog | 3 +++ youtube_dl/extractor/openload.py | 24 +++++++++++++++++++++++- 2 files changed, 26 insertions(+), 1 deletion(-) diff --git a/ChangeLog b/ChangeLog index ebe4ff0e8..766cc477b 100644 --- a/ChangeLog +++ b/ChangeLog @@ -3,6 +3,9 @@ vesion <unreleased> Core + Improved support for HTML5 subtitles +Extractors ++ [openload] Support subtitles (#10625) + version 2016.09.24 diff --git a/youtube_dl/extractor/openload.py b/youtube_dl/extractor/openload.py index b6e3ac250..4f5175136 100644 --- a/youtube_dl/extractor/openload.py +++ b/youtube_dl/extractor/openload.py @@ -24,6 +24,22 @@ class OpenloadIE(InfoExtractor): 'title': 'skyrim_no-audio_1080.mp4', 'thumbnail': 're:^https?://.*\.jpg$', }, + }, { + 'url': 'https://openload.co/embed/rjC09fkPLYs', + 'info_dict': { + 'id': 'rjC09fkPLYs', + 'ext': 'mp4', + 'title': 'movie.mp4', + 'thumbnail': 're:^https?://.*\.jpg$', + 'subtitles': { + 'en': [{ + 'ext': 'vtt', + }], + }, + }, + 'params': { + 'skip_download': True, # test subtitles only + }, }, { 'url': 'https://openload.co/embed/kUEfGclsU9o/skyrim_no-audio_1080.mp4', 'only_matching': True, @@ -71,11 +87,17 @@ class OpenloadIE(InfoExtractor): 'title', default=None) or self._html_search_meta( 'description', webpage, 'title', fatal=True) - return { + entries = self._parse_html5_media_entries(url, webpage, video_id) + subtitles = entries[0]['subtitles'] if entries else None + + info_dict = { 'id': video_id, 'title': title, 'thumbnail': self._og_search_thumbnail(webpage, default=None), 'url': video_url, # Seems all videos have extensions in their titles 'ext': determine_ext(title), + 'subtitles': subtitles, } + + return info_dict From 8add4bfecb73f44cffe3cbf33941fc409564149b Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Sat, 24 Sep 2016 10:41:38 +0100 Subject: [PATCH 0209/1571] [mtv] add support for new website urls(closes #8169)(closes #9808) --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/mtv.py | 21 +++++++++++++++++++++ 2 files changed, 22 insertions(+) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 8166fd4f9..bf1f70885 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -516,6 +516,7 @@ from .movingimage import MovingImageIE from .msn import MSNIE from .mtv import ( MTVIE, + MTVVideoIE, MTVServicesEmbeddedIE, MTVDEIE, ) diff --git a/youtube_dl/extractor/mtv.py b/youtube_dl/extractor/mtv.py index bdda68819..84a2dcb62 100644 --- a/youtube_dl/extractor/mtv.py +++ b/youtube_dl/extractor/mtv.py @@ -270,6 +270,27 @@ class MTVServicesEmbeddedIE(MTVServicesInfoExtractor): class MTVIE(MTVServicesInfoExtractor): + _VALID_URL = r'(?x)https?://(?:www\.)?mtv\.com/(video-clips|full-episodes)/(?P<id>[^/?#.]+)' + _FEED_URL = 'http://www.mtv.com/feeds/mrss/' + + _TESTS = [{ + 'url': 'http://www.mtv.com/video-clips/vl8qof/unlocking-the-truth-trailer', + 'md5': '1edbcdf1e7628e414a8c5dcebca3d32b', + 'info_dict': { + 'id': '5e14040d-18a4-47c4-a582-43ff602de88e', + 'ext': 'mp4', + 'title': 'Unlocking The Truth|July 18, 2016|1|101|Trailer', + 'description': '"Unlocking the Truth" premieres August 17th at 11/10c.', + 'timestamp': 1468846800, + 'upload_date': '20160718', + }, + }, { + 'url': 'http://www.mtv.com/full-episodes/94tujl/unlocking-the-truth-gates-of-hell-season-1-ep-101', + 'only_matching': True, + }] + + +class MTVVideoIE(MTVServicesInfoExtractor): _VALID_URL = r'''(?x)^https?:// (?:(?:www\.)?mtv\.com/videos/.+?/(?P<videoid>[0-9]+)/[^/]+$| m\.mtv\.com/videos/video\.rbml\?.*?id=(?P<mgid>[^&]+))''' From a54ffb8aa778062901dd15b020576bc7d472ae40 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Sat, 24 Sep 2016 10:50:14 +0100 Subject: [PATCH 0210/1571] [mtv] add common IE_NAME prefix for MTVIE and MTVVideoIE --- youtube_dl/extractor/mtv.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/mtv.py b/youtube_dl/extractor/mtv.py index 84a2dcb62..2e9580b10 100644 --- a/youtube_dl/extractor/mtv.py +++ b/youtube_dl/extractor/mtv.py @@ -270,7 +270,8 @@ class MTVServicesEmbeddedIE(MTVServicesInfoExtractor): class MTVIE(MTVServicesInfoExtractor): - _VALID_URL = r'(?x)https?://(?:www\.)?mtv\.com/(video-clips|full-episodes)/(?P<id>[^/?#.]+)' + IE_NAME = 'mtv' + _VALID_URL = r'https?://(?:www\.)?mtv\.com/(video-clips|full-episodes)/(?P<id>[^/?#.]+)' _FEED_URL = 'http://www.mtv.com/feeds/mrss/' _TESTS = [{ @@ -291,6 +292,7 @@ class MTVIE(MTVServicesInfoExtractor): class MTVVideoIE(MTVServicesInfoExtractor): + IE_NAME = 'mtv:video' _VALID_URL = r'''(?x)^https?:// (?:(?:www\.)?mtv\.com/videos/.+?/(?P<videoid>[0-9]+)/[^/]+$| m\.mtv\.com/videos/video\.rbml\?.*?id=(?P<mgid>[^&]+))''' From f0bc5a8609786633d8b51ab4255c1f0fdb941f73 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sat, 24 Sep 2016 19:57:55 +0800 Subject: [PATCH 0211/1571] [twitter] Support Periscope embeds (closes #10737) Also update _TESTS --- ChangeLog | 1 + youtube_dl/extractor/periscope.py | 9 +++++++ youtube_dl/extractor/twitter.py | 45 +++++++++++++++++++++++++------ 3 files changed, 47 insertions(+), 8 deletions(-) diff --git a/ChangeLog b/ChangeLog index 766cc477b..5c96dc179 100644 --- a/ChangeLog +++ b/ChangeLog @@ -4,6 +4,7 @@ Core + Improved support for HTML5 subtitles Extractors ++ [twitter] Support Periscope embeds (#10737) + [openload] Support subtitles (#10625) diff --git a/youtube_dl/extractor/periscope.py b/youtube_dl/extractor/periscope.py index eb1aeba46..e8b2f11c6 100644 --- a/youtube_dl/extractor/periscope.py +++ b/youtube_dl/extractor/periscope.py @@ -1,6 +1,8 @@ # coding: utf-8 from __future__ import unicode_literals +import re + from .common import InfoExtractor from ..utils import ( parse_iso8601, @@ -41,6 +43,13 @@ class PeriscopeIE(PeriscopeBaseIE): 'only_matching': True, }] + @staticmethod + def _extract_url(webpage): + mobj = re.search( + r'<iframe[^>]+src=([\'"])(?P<url>(?:https?:)?//(?:www\.)?periscope\.tv/(?:(?!\1).)+)\1', webpage) + if mobj: + return mobj.group('url') + def _real_extract(self, url): token = self._match_id(url) diff --git a/youtube_dl/extractor/twitter.py b/youtube_dl/extractor/twitter.py index c5a5843b6..3411fcf7e 100644 --- a/youtube_dl/extractor/twitter.py +++ b/youtube_dl/extractor/twitter.py @@ -4,6 +4,7 @@ from __future__ import unicode_literals import re from .common import InfoExtractor +from ..compat import compat_urlparse from ..utils import ( determine_ext, float_or_none, @@ -13,6 +14,8 @@ from ..utils import ( ExtractorError, ) +from .periscope import PeriscopeIE + class TwitterBaseIE(InfoExtractor): def _get_vmap_video_url(self, vmap_url, video_id): @@ -48,12 +51,12 @@ class TwitterCardIE(TwitterBaseIE): }, { 'url': 'https://twitter.com/i/cards/tfw/v1/654001591733886977', - 'md5': 'd4724ffe6d2437886d004fa5de1043b3', + 'md5': 'b6d9683dd3f48e340ded81c0e917ad46', 'info_dict': { 'id': 'dq4Oj5quskI', 'ext': 'mp4', 'title': 'Ubuntu 11.10 Overview', - 'description': 'Take a quick peek at what\'s new and improved in Ubuntu 11.10.\n\nOnce installed take a look at 10 Things to Do After Installing: http://www.omgubuntu.co.uk/2011/10/10...', + 'description': 'md5:a831e97fa384863d6e26ce48d1c43376', 'upload_date': '20111013', 'uploader': 'OMG! Ubuntu!', 'uploader_id': 'omgubuntu', @@ -100,12 +103,17 @@ class TwitterCardIE(TwitterBaseIE): return self.url_result(iframe_url) config = self._parse_json(self._html_search_regex( - r'data-(?:player-)?config="([^"]+)"', webpage, 'data player config'), + r'data-(?:player-)?config="([^"]+)"', webpage, + 'data player config', default='{}'), video_id) if config.get('source_type') == 'vine': return self.url_result(config['player_url'], 'Vine') + periscope_url = PeriscopeIE._extract_url(webpage) + if periscope_url: + return self.url_result(periscope_url, PeriscopeIE.ie_key()) + def _search_dimensions_in_video_url(a_format, video_url): m = re.search(r'/(?P<width>\d+)x(?P<height>\d+)/', video_url) if m: @@ -244,10 +252,10 @@ class TwitterIE(InfoExtractor): 'info_dict': { 'id': '700207533655363584', 'ext': 'mp4', - 'title': 'Donte The Dumbass - BEAT PROD: @suhmeduh #Damndaniel', - 'description': 'Donte The Dumbass on Twitter: "BEAT PROD: @suhmeduh https://t.co/HBrQ4AfpvZ #Damndaniel https://t.co/byBooq2ejZ"', + 'title': 'JG - BEAT PROD: @suhmeduh #Damndaniel', + 'description': 'JG on Twitter: "BEAT PROD: @suhmeduh https://t.co/HBrQ4AfpvZ #Damndaniel https://t.co/byBooq2ejZ"', 'thumbnail': 're:^https?://.*\.jpg', - 'uploader': 'Donte The Dumbass', + 'uploader': 'JG', 'uploader_id': 'jaydingeer', }, 'params': { @@ -278,6 +286,18 @@ class TwitterIE(InfoExtractor): 'params': { 'skip_download': True, # requires ffmpeg }, + }, { + 'url': 'https://twitter.com/OPP_HSD/status/779210622571536384', + 'info_dict': { + 'id': '1zqKVVlkqLaKB', + 'ext': 'mp4', + 'title': 'Sgt Kerry Schmidt - Ontario Provincial Police - Road rage, mischief, assault, rollover and fire in one occurrence', + 'upload_date': '20160923', + 'uploader_id': 'OPP_HSD', + 'uploader': 'Sgt Kerry Schmidt - Ontario Provincial Police', + 'timestamp': 1474613214, + }, + 'add_ie': ['Periscope'], }] def _real_extract(self, url): @@ -328,13 +348,22 @@ class TwitterIE(InfoExtractor): }) return info + twitter_card_url = None if 'class="PlayableMedia' in webpage: + twitter_card_url = '%s//twitter.com/i/videos/tweet/%s' % (self.http_scheme(), twid) + else: + twitter_card_iframe_url = self._search_regex( + r'data-full-card-iframe-url=([\'"])(?P<url>(?:(?!\1).)+)\1', + webpage, 'Twitter card iframe URL', default=None, group='url') + if twitter_card_iframe_url: + twitter_card_url = compat_urlparse.urljoin(url, twitter_card_iframe_url) + + if twitter_card_url: info.update({ '_type': 'url_transparent', 'ie_key': 'TwitterCard', - 'url': '%s//twitter.com/i/videos/tweet/%s' % (self.http_scheme(), twid), + 'url': twitter_card_url, }) - return info raise ExtractorError('There\'s no video in this tweet.') From 8e45e1cc4d706e6b43dac8105acf3592fa3d4725 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 24 Sep 2016 19:18:01 +0700 Subject: [PATCH 0212/1571] [soundcloud] Generalize playlist entries extraction (#10733) --- youtube_dl/extractor/soundcloud.py | 42 ++++++++++++++++++------------ 1 file changed, 26 insertions(+), 16 deletions(-) diff --git a/youtube_dl/extractor/soundcloud.py b/youtube_dl/extractor/soundcloud.py index 496cc5d8e..f3cb35f77 100644 --- a/youtube_dl/extractor/soundcloud.py +++ b/youtube_dl/extractor/soundcloud.py @@ -260,7 +260,20 @@ class SoundcloudIE(InfoExtractor): return self._extract_info_dict(info, full_title, secret_token=token) -class SoundcloudSetIE(SoundcloudIE): +class SoundcloudBaseIE(SoundcloudIE): + @staticmethod + def _extract_id(e): + return compat_str(e['id']) if e.get('id') else None + + def _extract_track_entries(self, tracks): + return [ + self.url_result( + track['permalink_url'], SoundcloudIE.ie_key(), + video_id=self._extract_id(track)) + for track in tracks if track.get('permalink_url')] + + +class SoundcloudSetIE(SoundcloudBaseIE): _VALID_URL = r'https?://(?:(?:www|m)\.)?soundcloud\.com/(?P<uploader>[\w\d-]+)/sets/(?P<slug_title>[\w\d-]+)(?:/(?P<token>[^?/]+))?' IE_NAME = 'soundcloud:set' _TESTS = [{ @@ -299,7 +312,7 @@ class SoundcloudSetIE(SoundcloudIE): msgs = (compat_str(err['error_message']) for err in info['errors']) raise ExtractorError('unable to download video webpage: %s' % ','.join(msgs)) - entries = [self.url_result(track['permalink_url'], 'Soundcloud') for track in info['tracks']] + entries = self._extract_track_entries(info['tracks']) return { '_type': 'playlist', @@ -309,7 +322,7 @@ class SoundcloudSetIE(SoundcloudIE): } -class SoundcloudUserIE(SoundcloudIE): +class SoundcloudUserIE(SoundcloudBaseIE): _VALID_URL = r'''(?x) https?:// (?:(?:www|m)\.)?soundcloud\.com/ @@ -326,21 +339,21 @@ class SoundcloudUserIE(SoundcloudIE): 'id': '114582580', 'title': 'The Akashic Chronicler (All)', }, - 'playlist_mincount': 111, + 'playlist_mincount': 74, }, { 'url': 'https://soundcloud.com/the-akashic-chronicler/tracks', 'info_dict': { 'id': '114582580', 'title': 'The Akashic Chronicler (Tracks)', }, - 'playlist_mincount': 50, + 'playlist_mincount': 37, }, { 'url': 'https://soundcloud.com/the-akashic-chronicler/sets', 'info_dict': { 'id': '114582580', 'title': 'The Akashic Chronicler (Playlists)', }, - 'playlist_mincount': 3, + 'playlist_mincount': 2, }, { 'url': 'https://soundcloud.com/the-akashic-chronicler/reposts', 'info_dict': { @@ -359,7 +372,7 @@ class SoundcloudUserIE(SoundcloudIE): 'url': 'https://soundcloud.com/grynpyret/spotlight', 'info_dict': { 'id': '7098329', - 'title': 'Grynpyret (Spotlight)', + 'title': 'GRYNPYRET (Spotlight)', }, 'playlist_mincount': 1, }] @@ -421,13 +434,14 @@ class SoundcloudUserIE(SoundcloudIE): for cand in candidates: if isinstance(cand, dict): permalink_url = cand.get('permalink_url') + entry_id = self._extract_id(cand) if permalink_url and permalink_url.startswith('http'): - return permalink_url + return permalink_url, entry_id for e in collection: - permalink_url = resolve_permalink_url((e, e.get('track'), e.get('playlist'))) + permalink_url, entry_id = resolve_permalink_url((e, e.get('track'), e.get('playlist'))) if permalink_url: - entries.append(self.url_result(permalink_url)) + entries.append(self.url_result(permalink_url, video_id=entry_id)) next_href = response.get('next_href') if not next_href: @@ -447,7 +461,7 @@ class SoundcloudUserIE(SoundcloudIE): } -class SoundcloudPlaylistIE(SoundcloudIE): +class SoundcloudPlaylistIE(SoundcloudBaseIE): _VALID_URL = r'https?://api\.soundcloud\.com/playlists/(?P<id>[0-9]+)(?:/?\?secret_token=(?P<token>[^&]+?))?$' IE_NAME = 'soundcloud:playlist' _TESTS = [{ @@ -477,11 +491,7 @@ class SoundcloudPlaylistIE(SoundcloudIE): data = self._download_json( base_url + data, playlist_id, 'Downloading playlist') - entries = [ - self.url_result( - track['permalink_url'], SoundcloudIE.ie_key(), - video_id=compat_str(track['id']) if track.get('id') else None) - for track in data['tracks'] if track.get('permalink_url')] + entries = self._extract_track_entries(data['tracks']) return { '_type': 'playlist', From 7518a61d416133bff8b99c693dfca0b15c0d5b7e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 24 Sep 2016 19:29:49 +0700 Subject: [PATCH 0213/1571] [soundcloud] Fix typo in playlist base class name --- youtube_dl/extractor/soundcloud.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/soundcloud.py b/youtube_dl/extractor/soundcloud.py index f3cb35f77..1a8114aa7 100644 --- a/youtube_dl/extractor/soundcloud.py +++ b/youtube_dl/extractor/soundcloud.py @@ -260,7 +260,7 @@ class SoundcloudIE(InfoExtractor): return self._extract_info_dict(info, full_title, secret_token=token) -class SoundcloudBaseIE(SoundcloudIE): +class SoundcloudPlaylistBaseIE(SoundcloudIE): @staticmethod def _extract_id(e): return compat_str(e['id']) if e.get('id') else None @@ -273,7 +273,7 @@ class SoundcloudBaseIE(SoundcloudIE): for track in tracks if track.get('permalink_url')] -class SoundcloudSetIE(SoundcloudBaseIE): +class SoundcloudSetIE(SoundcloudPlaylistBaseIE): _VALID_URL = r'https?://(?:(?:www|m)\.)?soundcloud\.com/(?P<uploader>[\w\d-]+)/sets/(?P<slug_title>[\w\d-]+)(?:/(?P<token>[^?/]+))?' IE_NAME = 'soundcloud:set' _TESTS = [{ @@ -322,7 +322,7 @@ class SoundcloudSetIE(SoundcloudBaseIE): } -class SoundcloudUserIE(SoundcloudBaseIE): +class SoundcloudUserIE(SoundcloudPlaylistBaseIE): _VALID_URL = r'''(?x) https?:// (?:(?:www|m)\.)?soundcloud\.com/ @@ -461,7 +461,7 @@ class SoundcloudUserIE(SoundcloudBaseIE): } -class SoundcloudPlaylistIE(SoundcloudBaseIE): +class SoundcloudPlaylistIE(SoundcloudPlaylistBaseIE): _VALID_URL = r'https?://api\.soundcloud\.com/playlists/(?P<id>[0-9]+)(?:/?\?secret_token=(?P<token>[^&]+?))?$' IE_NAME = 'soundcloud:playlist' _TESTS = [{ From 6f126d903f46d976a380a5b4265084e5a21a3c09 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Sat, 24 Sep 2016 15:38:19 +0100 Subject: [PATCH 0214/1571] [download/hls] Delegate downloading to ffmpeg for live streams --- youtube_dl/downloader/hls.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/youtube_dl/downloader/hls.py b/youtube_dl/downloader/hls.py index 5d70abf62..541b92ee1 100644 --- a/youtube_dl/downloader/hls.py +++ b/youtube_dl/downloader/hls.py @@ -31,7 +31,7 @@ class HlsFD(FragmentFD): FD_NAME = 'hlsnative' @staticmethod - def can_download(manifest): + def can_download(manifest, info_dict): UNSUPPORTED_FEATURES = ( r'#EXT-X-KEY:METHOD=(?!NONE|AES-128)', # encrypted streams [1] r'#EXT-X-BYTERANGE', # playlists composed of byte ranges of media files [2] @@ -53,6 +53,7 @@ class HlsFD(FragmentFD): ) check_results = [not re.search(feature, manifest) for feature in UNSUPPORTED_FEATURES] check_results.append(can_decrypt_frag or '#EXT-X-KEY:METHOD=AES-128' not in manifest) + check_results.append(not info_dict.get('is_live')) return all(check_results) def real_download(self, filename, info_dict): @@ -62,7 +63,7 @@ class HlsFD(FragmentFD): s = manifest.decode('utf-8', 'ignore') - if not self.can_download(s): + if not self.can_download(s, info_dict): self.report_warning( 'hlsnative has detected features it does not support, ' 'extraction will be delegated to ffmpeg') From 27e99078d337cdc77a5a7228998d3b2fe722e7cb Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Sat, 24 Sep 2016 15:39:06 +0100 Subject: [PATCH 0215/1571] [brightcove:new] add support for live streams --- youtube_dl/extractor/brightcove.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/brightcove.py b/youtube_dl/extractor/brightcove.py index aeb22be16..2ec55b185 100644 --- a/youtube_dl/extractor/brightcove.py +++ b/youtube_dl/extractor/brightcove.py @@ -621,15 +621,21 @@ class BrightcoveNewIE(InfoExtractor): 'url': text_track['src'], }) + is_live = False + duration = float_or_none(json_data.get('duration'), 1000) + if duration and duration < 0: + is_live = True + return { 'id': video_id, - 'title': title, + 'title': self._live_title(title) if is_live else title, 'description': clean_html(json_data.get('description')), 'thumbnail': json_data.get('thumbnail') or json_data.get('poster'), - 'duration': float_or_none(json_data.get('duration'), 1000), + 'duration': duration, 'timestamp': parse_iso8601(json_data.get('published_at')), 'uploader_id': account_id, 'formats': formats, 'subtitles': subtitles, 'tags': json_data.get('tags', []), + 'is_live': is_live, } From e71a450956c808d469b983e5ffde1a63aff24390 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Sat, 24 Sep 2016 21:55:53 +0100 Subject: [PATCH 0216/1571] [common] add hdcore sign to akamai f4m formats --- youtube_dl/extractor/common.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 5cb4479ec..1076b46da 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1842,10 +1842,15 @@ class InfoExtractor(object): def _extract_akamai_formats(self, manifest_url, video_id): formats = [] + hdcore_sign = 'hdcore=3.7.0' f4m_url = re.sub(r'(https?://.+?)/i/', r'\1/z/', manifest_url).replace('/master.m3u8', '/manifest.f4m') - formats.extend(self._extract_f4m_formats( - update_url_query(f4m_url, {'hdcore': '3.7.0'}), - video_id, f4m_id='hds', fatal=False)) + if 'hdcore=' not in f4m_url: + f4m_url += ('&' if '?' in f4m_url else '?') + hdcore_sign + f4m_formats = self._extract_f4m_formats( + f4m_url, video_id, f4m_id='hds', fatal=False) + for entry in f4m_formats: + entry.update({'extra_param_to_segment_url': hdcore_sign}) + formats.extend(f4m_formats) m3u8_url = re.sub(r'(https?://.+?)/z/', r'\1/i/', manifest_url).replace('/manifest.f4m', '/master.m3u8') formats.extend(self._extract_m3u8_formats( m3u8_url, video_id, 'mp4', 'm3u8_native', From 7fd57de6fb146ffca594e4ae632d7ff217926b52 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Sat, 24 Sep 2016 21:59:48 +0100 Subject: [PATCH 0217/1571] [cbsnews:livevideo] fix extraction and extract m3u8 formats --- youtube_dl/extractor/cbsnews.py | 31 +++++++++++++++---------------- 1 file changed, 15 insertions(+), 16 deletions(-) diff --git a/youtube_dl/extractor/cbsnews.py b/youtube_dl/extractor/cbsnews.py index 4aa6917a0..216989230 100644 --- a/youtube_dl/extractor/cbsnews.py +++ b/youtube_dl/extractor/cbsnews.py @@ -9,6 +9,7 @@ from ..utils import ( class CBSNewsIE(CBSIE): + IE_NAME = 'cbsnews' IE_DESC = 'CBS News' _VALID_URL = r'https?://(?:www\.)?cbsnews\.com/(?:news|videos)/(?P<id>[\da-z_-]+)' @@ -68,15 +69,16 @@ class CBSNewsIE(CBSIE): class CBSNewsLiveVideoIE(InfoExtractor): + IE_NAME = 'cbsnews:livevideo' IE_DESC = 'CBS News Live Videos' - _VALID_URL = r'https?://(?:www\.)?cbsnews\.com/live/video/(?P<id>[\da-z_-]+)' + _VALID_URL = r'https?://(?:www\.)?cbsnews\.com/live/video/(?P<id>[^/?#]+)' # Live videos get deleted soon. See http://www.cbsnews.com/live/ for the latest examples _TEST = { 'url': 'http://www.cbsnews.com/live/video/clinton-sanders-prepare-to-face-off-in-nh/', 'info_dict': { 'id': 'clinton-sanders-prepare-to-face-off-in-nh', - 'ext': 'flv', + 'ext': 'mp4', 'title': 'Clinton, Sanders Prepare To Face Off In NH', 'duration': 334, }, @@ -84,25 +86,22 @@ class CBSNewsLiveVideoIE(InfoExtractor): } def _real_extract(self, url): - video_id = self._match_id(url) + display_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) + video_info = self._download_json( + 'http://feeds.cbsn.cbsnews.com/rundown/story', display_id, query={ + 'device': 'desktop', + 'dvr_slug': display_id, + }) - video_info = self._parse_json(self._html_search_regex( - r'data-story-obj=\'({.+?})\'', webpage, 'video JSON info'), video_id)['story'] - - hdcore_sign = 'hdcore=3.3.1' - f4m_formats = self._extract_f4m_formats(video_info['url'] + '&' + hdcore_sign, video_id) - if f4m_formats: - for entry in f4m_formats: - # URLs without the extra param induce an 404 error - entry.update({'extra_param_to_segment_url': hdcore_sign}) - self._sort_formats(f4m_formats) + formats = self._extract_akamai_formats(video_info['url'], display_id) + self._sort_formats(formats) return { - 'id': video_id, + 'id': display_id, + 'display_id': display_id, 'title': video_info['headline'], 'thumbnail': video_info.get('thumbnail_url_hd') or video_info.get('thumbnail_url_sd'), 'duration': parse_duration(video_info.get('segmentDur')), - 'formats': f4m_formats, + 'formats': formats, } From 63c583eb2c9a906ba1075da289afdde29b385fff Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 25 Sep 2016 04:43:10 +0700 Subject: [PATCH 0218/1571] [prosiebensat1] Add support for sat1gold (#10745) --- youtube_dl/extractor/prosiebensat1.py | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/prosiebensat1.py b/youtube_dl/extractor/prosiebensat1.py index 5a29b844d..2f5aa530a 100644 --- a/youtube_dl/extractor/prosiebensat1.py +++ b/youtube_dl/extractor/prosiebensat1.py @@ -122,7 +122,17 @@ class ProSiebenSat1BaseIE(InfoExtractor): class ProSiebenSat1IE(ProSiebenSat1BaseIE): IE_NAME = 'prosiebensat1' IE_DESC = 'ProSiebenSat.1 Digital' - _VALID_URL = r'https?://(?:www\.)?(?:(?:prosieben|prosiebenmaxx|sixx|sat1|kabeleins|the-voice-of-germany|7tv|kabeleinsdoku)\.(?:de|at|ch)|ran\.de|fem\.com)/(?P<id>.+)' + _VALID_URL = r'''(?x) + https?:// + (?:www\.)? + (?: + (?: + prosieben|prosiebenmaxx|sixx|sat1(?:gold)?|kabeleins|the-voice-of-germany|7tv|kabeleinsdoku + )\.(?:de|at|ch)| + ran\.de|fem\.com + ) + /(?P<id>.+) + ''' _TESTS = [ { @@ -295,6 +305,11 @@ class ProSiebenSat1IE(ProSiebenSat1BaseIE): 'url': 'http://www.kabeleinsdoku.de/tv/mayday-alarm-im-cockpit/video/102-notlandung-im-hudson-river-ganze-folge', 'only_matching': True, }, + { + # geo restricted to Germany + 'url': 'http://www.sat1gold.de/tv/edel-starck/video/11-staffel-1-episode-1-partner-wider-willen-ganze-folge', + 'only_matching': True, + }, ] _TOKEN = 'prosieben' From ddde91952f4eec796b14eb258c0cb33dda3935bc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 25 Sep 2016 05:36:18 +0700 Subject: [PATCH 0219/1571] [prosiebensat1] Fix playlist support (Closes #10745) --- youtube_dl/extractor/prosiebensat1.py | 39 ++++++++++++++++++--------- 1 file changed, 26 insertions(+), 13 deletions(-) diff --git a/youtube_dl/extractor/prosiebensat1.py b/youtube_dl/extractor/prosiebensat1.py index 2f5aa530a..a064de05e 100644 --- a/youtube_dl/extractor/prosiebensat1.py +++ b/youtube_dl/extractor/prosiebensat1.py @@ -310,6 +310,10 @@ class ProSiebenSat1IE(ProSiebenSat1BaseIE): 'url': 'http://www.sat1gold.de/tv/edel-starck/video/11-staffel-1-episode-1-partner-wider-willen-ganze-folge', 'only_matching': True, }, + { + 'url': 'http://www.sat1gold.de/tv/edel-starck/playlist/die-gesamte-1-staffel', + 'only_matching': True, + }, ] _TOKEN = 'prosieben' @@ -381,19 +385,28 @@ class ProSiebenSat1IE(ProSiebenSat1BaseIE): def _extract_playlist(self, url, webpage): playlist_id = self._html_search_regex( self._PLAYLIST_ID_REGEXES, webpage, 'playlist id') - for regex in self._PLAYLIST_CLIP_REGEXES: - playlist_clips = re.findall(regex, webpage) - if playlist_clips: - title = self._html_search_regex( - self._TITLE_REGEXES, webpage, 'title') - description = self._html_search_regex( - self._DESCRIPTION_REGEXES, webpage, 'description', fatal=False) - entries = [ - self.url_result( - re.match('(.+?//.+?)/', url).group(1) + clip_path, - 'ProSiebenSat1') - for clip_path in playlist_clips] - return self.playlist_result(entries, playlist_id, title, description) + playlist = self._parse_json( + self._search_regex( + 'var\s+contentResources\s*=\s*(\[.+?\]);\s*</script', + webpage, 'playlist'), + playlist_id) + entries = [] + for item in playlist: + clip_id = item.get('id') or item.get('upc') + if not clip_id: + continue + info = self._extract_video_info(url, clip_id) + info.update({ + 'id': clip_id, + 'title': item.get('title') or item.get('teaser', {}).get('headline'), + 'description': item.get('teaser', {}).get('description'), + 'thumbnail': item.get('poster'), + 'duration': float_or_none(item.get('duration')), + 'series': item.get('tvShowTitle'), + 'uploader': item.get('broadcastPublisher'), + }) + entries.append(info) + return self.playlist_result(entries, playlist_id) def _real_extract(self, url): video_id = self._match_id(url) From f92bb612c69957c3803aaf14aea1d03a7d7d917f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 25 Sep 2016 06:14:32 +0700 Subject: [PATCH 0220/1571] [mwave] Relax _VALID_URLs (Closes #10735, closes #10748) --- youtube_dl/extractor/mwave.py | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/mwave.py b/youtube_dl/extractor/mwave.py index a103e0323..fea1caf47 100644 --- a/youtube_dl/extractor/mwave.py +++ b/youtube_dl/extractor/mwave.py @@ -9,9 +9,9 @@ from ..utils import ( class MwaveIE(InfoExtractor): - _VALID_URL = r'https?://mwave\.interest\.me/mnettv/videodetail\.m\?searchVideoDetailVO\.clip_id=(?P<id>[0-9]+)' + _VALID_URL = r'https?://mwave\.interest\.me/(?:[^/]+/)?mnettv/videodetail\.m\?searchVideoDetailVO\.clip_id=(?P<id>[0-9]+)' _URL_TEMPLATE = 'http://mwave.interest.me/mnettv/videodetail.m?searchVideoDetailVO.clip_id=%s' - _TEST = { + _TESTS = [{ 'url': 'http://mwave.interest.me/mnettv/videodetail.m?searchVideoDetailVO.clip_id=168859', # md5 is unstable 'info_dict': { @@ -23,7 +23,10 @@ class MwaveIE(InfoExtractor): 'duration': 206, 'view_count': int, } - } + }, { + 'url': 'http://mwave.interest.me/en/mnettv/videodetail.m?searchVideoDetailVO.clip_id=176199', + 'only_matching': True, + }] def _real_extract(self, url): video_id = self._match_id(url) @@ -60,8 +63,8 @@ class MwaveIE(InfoExtractor): class MwaveMeetGreetIE(InfoExtractor): - _VALID_URL = r'https?://mwave\.interest\.me/meetgreet/view/(?P<id>\d+)' - _TEST = { + _VALID_URL = r'https?://mwave\.interest\.me/(?:[^/]+/)?meetgreet/view/(?P<id>\d+)' + _TESTS = [{ 'url': 'http://mwave.interest.me/meetgreet/view/256', 'info_dict': { 'id': '173294', @@ -72,7 +75,10 @@ class MwaveMeetGreetIE(InfoExtractor): 'duration': 3634, 'view_count': int, } - } + }, { + 'url': 'http://mwave.interest.me/en/meetgreet/view/256', + 'only_matching': True, + }] def _real_extract(self, url): video_id = self._match_id(url) From 0a078550b9ac570cb357c2af74a39068d08ce1ee Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 25 Sep 2016 06:19:17 +0700 Subject: [PATCH 0221/1571] [prosiebensat1] Improve _VALID_URL --- youtube_dl/extractor/prosiebensat1.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/prosiebensat1.py b/youtube_dl/extractor/prosiebensat1.py index a064de05e..84d04aa69 100644 --- a/youtube_dl/extractor/prosiebensat1.py +++ b/youtube_dl/extractor/prosiebensat1.py @@ -127,7 +127,7 @@ class ProSiebenSat1IE(ProSiebenSat1BaseIE): (?:www\.)? (?: (?: - prosieben|prosiebenmaxx|sixx|sat1(?:gold)?|kabeleins|the-voice-of-germany|7tv|kabeleinsdoku + prosieben(?:maxx)?|sixx|sat1(?:gold)?|kabeleins(?:doku)?|the-voice-of-germany|7tv )\.(?:de|at|ch)| ran\.de|fem\.com ) From 493353c7fd5d15fa35152915c10c7249277b5ed0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 25 Sep 2016 06:25:57 +0700 Subject: [PATCH 0222/1571] [prosiebensat1] Add support for advopedia --- youtube_dl/extractor/prosiebensat1.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/prosiebensat1.py b/youtube_dl/extractor/prosiebensat1.py index 84d04aa69..873d4f981 100644 --- a/youtube_dl/extractor/prosiebensat1.py +++ b/youtube_dl/extractor/prosiebensat1.py @@ -127,9 +127,9 @@ class ProSiebenSat1IE(ProSiebenSat1BaseIE): (?:www\.)? (?: (?: - prosieben(?:maxx)?|sixx|sat1(?:gold)?|kabeleins(?:doku)?|the-voice-of-germany|7tv + prosieben(?:maxx)?|sixx|sat1(?:gold)?|kabeleins(?:doku)?|the-voice-of-germany|7tv|advopedia )\.(?:de|at|ch)| - ran\.de|fem\.com + ran\.de|fem\.com|advopedia\.de ) /(?P<id>.+) ''' @@ -314,6 +314,10 @@ class ProSiebenSat1IE(ProSiebenSat1BaseIE): 'url': 'http://www.sat1gold.de/tv/edel-starck/playlist/die-gesamte-1-staffel', 'only_matching': True, }, + { + 'url': 'http://www.advopedia.de/videos/lenssen-klaert-auf/lenssen-klaert-auf-folge-8-staffel-3-feiertage-und-freie-tage', + 'only_matching': True, + }, ] _TOKEN = 'prosieben' From 2d3d29976b2c83e8daab62a0f2a61c232692a310 Mon Sep 17 00:00:00 2001 From: stepshal <nessento@openmailbox.org> Date: Sat, 17 Sep 2016 21:48:20 +0700 Subject: [PATCH 0223/1571] [youtube] Change test URLs from http to https --- youtube_dl/extractor/youtube.py | 34 ++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 5ca903825..f86823112 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -369,7 +369,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): IE_NAME = 'youtube' _TESTS = [ { - 'url': 'http://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9', + 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9', 'info_dict': { 'id': 'BaW_jenozKc', 'ext': 'mp4', @@ -389,7 +389,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): } }, { - 'url': 'http://www.youtube.com/watch?v=UxxajLWwzqY', + 'url': 'https://www.youtube.com/watch?v=UxxajLWwzqY', 'note': 'Test generic use_cipher_signature video (#897)', 'info_dict': { 'id': 'UxxajLWwzqY', @@ -443,7 +443,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): } }, { - 'url': 'http://www.youtube.com/watch?v=BaW_jenozKc&v=UxxajLWwzqY', + 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=UxxajLWwzqY', 'note': 'Use the first video ID in the URL', 'info_dict': { 'id': 'BaW_jenozKc', @@ -465,7 +465,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): }, }, { - 'url': 'http://www.youtube.com/watch?v=a9LDPn-MO4I', + 'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I', 'note': '256k DASH audio (format 141) via DASH manifest', 'info_dict': { 'id': 'a9LDPn-MO4I', @@ -539,7 +539,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): }, # Normal age-gate video (No vevo, embed allowed) { - 'url': 'http://youtube.com/watch?v=HtVdAasjOgU', + 'url': 'https://youtube.com/watch?v=HtVdAasjOgU', 'info_dict': { 'id': 'HtVdAasjOgU', 'ext': 'mp4', @@ -555,7 +555,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): }, # Age-gate video with encrypted signature { - 'url': 'http://www.youtube.com/watch?v=6kLq3WMV1nU', + 'url': 'https://www.youtube.com/watch?v=6kLq3WMV1nU', 'info_dict': { 'id': '6kLq3WMV1nU', 'ext': 'mp4', @@ -748,11 +748,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'skip': 'Not multifeed anymore', }, { - 'url': 'http://vid.plus/FlRa-iH7PGw', + 'url': 'https://vid.plus/FlRa-iH7PGw', 'only_matching': True, }, { - 'url': 'http://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html', + 'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html', 'only_matching': True, }, { @@ -1846,7 +1846,7 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor): 'playlist_count': 2, }, { 'note': 'embedded', - 'url': 'http://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu', + 'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu', 'playlist_count': 4, 'info_dict': { 'title': 'JODA15', @@ -1854,7 +1854,7 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor): } }, { 'note': 'Embedded SWF player', - 'url': 'http://www.youtube.com/p/YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ?hl=en_US&fs=1&rel=0', + 'url': 'https://www.youtube.com/p/YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ?hl=en_US&fs=1&rel=0', 'playlist_count': 4, 'info_dict': { 'title': 'JODA7', @@ -2156,7 +2156,7 @@ class YoutubeLiveIE(YoutubeBaseInfoExtractor): IE_NAME = 'youtube:live' _TESTS = [{ - 'url': 'http://www.youtube.com/user/TheYoungTurks/live', + 'url': 'https://www.youtube.com/user/TheYoungTurks/live', 'info_dict': { 'id': 'a48o2S1cPoo', 'ext': 'mp4', @@ -2176,7 +2176,7 @@ class YoutubeLiveIE(YoutubeBaseInfoExtractor): 'skip_download': True, }, }, { - 'url': 'http://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live', + 'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live', 'only_matching': True, }] @@ -2201,7 +2201,7 @@ class YoutubePlaylistsIE(YoutubePlaylistsBaseInfoExtractor): IE_NAME = 'youtube:playlists' _TESTS = [{ - 'url': 'http://www.youtube.com/user/ThirstForScience/playlists', + 'url': 'https://www.youtube.com/user/ThirstForScience/playlists', 'playlist_mincount': 4, 'info_dict': { 'id': 'ThirstForScience', @@ -2209,7 +2209,7 @@ class YoutubePlaylistsIE(YoutubePlaylistsBaseInfoExtractor): }, }, { # with "Load more" button - 'url': 'http://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd', + 'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd', 'playlist_mincount': 70, 'info_dict': { 'id': 'igorkle1', @@ -2442,10 +2442,10 @@ class YoutubeTruncatedURLIE(InfoExtractor): ''' _TESTS = [{ - 'url': 'http://www.youtube.com/watch?annotation_id=annotation_3951667041', + 'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041', 'only_matching': True, }, { - 'url': 'http://www.youtube.com/watch?', + 'url': 'https://www.youtube.com/watch?', 'only_matching': True, }, { 'url': 'https://www.youtube.com/watch?x-yt-cl=84503534', @@ -2466,7 +2466,7 @@ class YoutubeTruncatedURLIE(InfoExtractor): 'Did you forget to quote the URL? Remember that & is a meta ' 'character in most shells, so you want to put the URL in quotes, ' 'like youtube-dl ' - '"http://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" ' + '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" ' ' or simply youtube-dl BaW_jenozKc .', expected=True) From f3625cc4ca8d8683b900e070ad7acd58b1fac5c7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 25 Sep 2016 18:08:35 +0700 Subject: [PATCH 0224/1571] [PULL_REQUEST_TEMPLATE.md] Add Unlicense notice --- .github/PULL_REQUEST_TEMPLATE.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index f24bb4b09..3a168b7b1 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -10,6 +10,10 @@ - [ ] At least skimmed through [adding new extractor tutorial](https://github.com/rg3/youtube-dl#adding-support-for-a-new-site) and [youtube-dl coding conventions](https://github.com/rg3/youtube-dl#youtube-dl-coding-conventions) sections - [ ] [Searched](https://github.com/rg3/youtube-dl/search?q=is%3Apr&type=Issues) the bugtracker for similar pull requests +### In order to be accepted and merged into youtube-dl each piece of code must be in public domain or released under [Unlicense](http://unlicense.org/). Check one of the following options: +- [ ] I am the original original author of this code and I am willing to release it under [Unlicense](http://unlicense.org/) +- [ ] I am not the original author of this code but it is in public domain or released under [Unlicense](http://unlicense.org/) (provide reliable evidence) + ### What is the purpose of your *pull request*? - [ ] Bug fix - [ ] New extractor From e590b7ff9e8e408bb9ec4da58ab6847686d29dbc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 25 Sep 2016 18:09:46 +0700 Subject: [PATCH 0225/1571] [PULL_REQUEST_TEMPLATE.md] Add checkable Improvement options PR's purpose --- .github/PULL_REQUEST_TEMPLATE.md | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index 3a168b7b1..89e8a3188 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -16,6 +16,7 @@ ### What is the purpose of your *pull request*? - [ ] Bug fix +- [ ] Improvement - [ ] New extractor - [ ] New feature From a3d8b3816802c76beffa48789eac5181e02db3dc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 25 Sep 2016 21:58:17 +0700 Subject: [PATCH 0226/1571] [npo] Generalize playlist extractors --- youtube_dl/extractor/npo.py | 63 +++++++++++++++---------------------- 1 file changed, 26 insertions(+), 37 deletions(-) diff --git a/youtube_dl/extractor/npo.py b/youtube_dl/extractor/npo.py index 3293bdb17..f95867d58 100644 --- a/youtube_dl/extractor/npo.py +++ b/youtube_dl/extractor/npo.py @@ -438,9 +438,29 @@ class SchoolTVIE(InfoExtractor): } -class VPROIE(NPOIE): +class NPOPlaylistBaseIE(NPOIE): + def _real_extract(self, url): + playlist_id = self._match_id(url) + + webpage = self._download_webpage(url, playlist_id) + + entries = [ + self.url_result('npo:%s' % video_id if not video_id.startswith('http') else video_id) + for video_id in re.findall(self._PLAYLIST_ENTRY_RE, webpage) + ] + + playlist_title = self._html_search_regex( + self._PLAYLIST_TITLE_RE, webpage, 'playlist title', + default=None) or self._og_search_title(webpage) + + return self.playlist_result(entries, playlist_id, playlist_title) + + +class VPROIE(NPOPlaylistBaseIE): IE_NAME = 'vpro' _VALID_URL = r'https?://(?:www\.)?(?:tegenlicht\.)?vpro\.nl/(?:[^/]+/){2,}(?P<id>[^/]+)\.html' + _PLAYLIST_TITLE_RE = r'<title>\s*([^>]+?)\s*-\s*Teledoc\s*-\s*VPRO\s*' + _PLAYLIST_ENTRY_RE = r'data-media-id="([^"]+)"' _TESTS = [ { @@ -473,48 +493,17 @@ class VPROIE(NPOIE): } ] - def _real_extract(self, url): - playlist_id = self._match_id(url) - webpage = self._download_webpage(url, playlist_id) - - entries = [ - self.url_result('npo:%s' % video_id if not video_id.startswith('http') else video_id) - for video_id in re.findall(r'data-media-id="([^"]+)"', webpage) - ] - - playlist_title = self._search_regex( - r'\s*([^>]+?)\s*-\s*Teledoc\s*-\s*VPRO\s*', - webpage, 'playlist title', default=None) or self._og_search_title(webpage) - - return self.playlist_result(entries, playlist_id, playlist_title) - - -class WNLIE(InfoExtractor): +class WNLIE(NPOPlaylistBaseIE): _VALID_URL = r'https?://(?:www\.)?omroepwnl\.nl/video/detail/(?P[^/]+)__\d+' + _PLAYLIST_TITLE_RE = r'(?s)]+class="subject"[^>]*>(.+?)' + _PLAYLIST_ENTRY_RE = r']+href="([^"]+)"[^>]+class="js-mid"[^>]*>Deel \d+' - _TEST = { + _TESTS = [{ 'url': 'http://www.omroepwnl.nl/video/detail/vandaag-de-dag-6-mei__060515', 'info_dict': { 'id': 'vandaag-de-dag-6-mei', 'title': 'Vandaag de Dag 6 mei', }, 'playlist_count': 4, - } - - def _real_extract(self, url): - playlist_id = self._match_id(url) - - webpage = self._download_webpage(url, playlist_id) - - entries = [ - self.url_result('npo:%s' % video_id, 'NPO') - for video_id, part in re.findall( - r']+href="([^"]+)"[^>]+class="js-mid"[^>]*>(Deel \d+)', webpage) - ] - - playlist_title = self._html_search_regex( - r'(?s)]+class="subject"[^>]*>(.+?)', - webpage, 'playlist title') - - return self.playlist_result(entries, playlist_id, playlist_title) + }] From ddb19772d572ae2118664a22d083a8f31fc63d8d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 25 Sep 2016 22:03:18 +0700 Subject: [PATCH 0227/1571] [vpro] Fix playlist title extraction and update tests --- youtube_dl/extractor/npo.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/npo.py b/youtube_dl/extractor/npo.py index f95867d58..ff02d0309 100644 --- a/youtube_dl/extractor/npo.py +++ b/youtube_dl/extractor/npo.py @@ -459,7 +459,7 @@ class NPOPlaylistBaseIE(NPOIE): class VPROIE(NPOPlaylistBaseIE): IE_NAME = 'vpro' _VALID_URL = r'https?://(?:www\.)?(?:tegenlicht\.)?vpro\.nl/(?:[^/]+/){2,}(?P[^/]+)\.html' - _PLAYLIST_TITLE_RE = r'\s*([^>]+?)\s*-\s*Teledoc\s*-\s*VPRO\s*' + _PLAYLIST_TITLE_RE = r']+class=["\'].*?\bmedia-platform-title\b.*?["\'][^>]*>([^<]+)' _PLAYLIST_ENTRY_RE = r'data-media-id="([^"]+)"' _TESTS = [ @@ -473,12 +473,13 @@ class VPROIE(NPOPlaylistBaseIE): 'description': 'md5:52cf4eefbc96fffcbdc06d024147abea', 'upload_date': '20130225', }, + 'skip': 'Video gone', }, { 'url': 'http://www.vpro.nl/programmas/2doc/2015/sergio-herman.html', 'info_dict': { 'id': 'sergio-herman', - 'title': 'Sergio Herman: Fucking perfect', + 'title': 'sergio herman: fucking perfect', }, 'playlist_count': 2, }, @@ -487,7 +488,7 @@ class VPROIE(NPOPlaylistBaseIE): 'url': 'http://www.vpro.nl/programmas/2doc/2015/education-education.html', 'info_dict': { 'id': 'education-education', - 'title': '2Doc', + 'title': 'education education', }, 'playlist_count': 2, } From 5742c18bc1ea3da5b0fd480e75fcdf099220e52f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 25 Sep 2016 22:19:00 +0700 Subject: [PATCH 0228/1571] [npo] Add support for anderetijden.nl (Closes #10754) --- youtube_dl/extractor/npo.py | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/npo.py b/youtube_dl/extractor/npo.py index ff02d0309..66035a77c 100644 --- a/youtube_dl/extractor/npo.py +++ b/youtube_dl/extractor/npo.py @@ -5,6 +5,7 @@ import re from .common import InfoExtractor from ..utils import ( fix_xml_ampersands, + orderedSet, parse_duration, qualities, strip_jsonp, @@ -446,7 +447,7 @@ class NPOPlaylistBaseIE(NPOIE): entries = [ self.url_result('npo:%s' % video_id if not video_id.startswith('http') else video_id) - for video_id in re.findall(self._PLAYLIST_ENTRY_RE, webpage) + for video_id in orderedSet(re.findall(self._PLAYLIST_ENTRY_RE, webpage)) ] playlist_title = self._html_search_regex( @@ -508,3 +509,18 @@ class WNLIE(NPOPlaylistBaseIE): }, 'playlist_count': 4, }] + + +class AndereTijdenIE(NPOPlaylistBaseIE): + _VALID_URL = r'https?://(?:www\.)?anderetijden\.nl/programma/(?:[^/]+/)+(?P[^/?#&]+)' + _PLAYLIST_TITLE_RE = r'(?s)]+class=["\'].*?\bpage-title\b.*?["\'][^>]*>(.+?)' + _PLAYLIST_ENTRY_RE = r']+class=["\']episode-container episode-page["\'][^>]+data-prid=["\'](.+?)["\']' + + _TESTS = [{ + 'url': 'http://anderetijden.nl/programma/1/Andere-Tijden/aflevering/676/Duitse-soldaten-over-de-Slag-bij-Arnhem', + 'info_dict': { + 'id': 'Duitse-soldaten-over-de-Slag-bij-Arnhem', + 'title': 'Duitse soldaten over de Slag bij Arnhem', + }, + 'playlist_count': 3, + }] From f1ee462c82381d3c68673500c0491fe477030c3b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 25 Sep 2016 22:38:36 +0700 Subject: [PATCH 0229/1571] [PULL_REQUEST_TEMPLATE.md] Fix typo --- .github/PULL_REQUEST_TEMPLATE.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index 89e8a3188..46fa26f02 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -11,7 +11,7 @@ - [ ] [Searched](https://github.com/rg3/youtube-dl/search?q=is%3Apr&type=Issues) the bugtracker for similar pull requests ### In order to be accepted and merged into youtube-dl each piece of code must be in public domain or released under [Unlicense](http://unlicense.org/). Check one of the following options: -- [ ] I am the original original author of this code and I am willing to release it under [Unlicense](http://unlicense.org/) +- [ ] I am the original author of this code and I am willing to release it under [Unlicense](http://unlicense.org/) - [ ] I am not the original author of this code but it is in public domain or released under [Unlicense](http://unlicense.org/) (provide reliable evidence) ### What is the purpose of your *pull request*? From 2d5b4af0070f8aa6f3f4eb8fdabef5d006f6429a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 25 Sep 2016 23:30:57 +0700 Subject: [PATCH 0230/1571] [extractors] Add import for anderetijden extractor --- youtube_dl/extractor/extractors.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index bf1f70885..23fd2a308 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -612,13 +612,14 @@ from .nowtv import ( ) from .noz import NozIE from .npo import ( + AndereTijdenIE, NPOIE, NPOLiveIE, NPORadioIE, NPORadioFragmentIE, SchoolTVIE, VPROIE, - WNLIE + WNLIE, ) from .npr import NprIE from .nrk import ( From d3c97bad6181e1d3dc0cb4eece041e1cfb0ba6bc Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Mon, 26 Sep 2016 14:14:37 +0800 Subject: [PATCH 0231/1571] Ignore and cleanup 3gp files --- .gitignore | 1 + Makefile | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index a802c75a1..002b700f5 100644 --- a/.gitignore +++ b/.gitignore @@ -29,6 +29,7 @@ updates_key.pem *.m4a *.m4v *.mp3 +*.3gp *.part *.swp test/testdata diff --git a/Makefile b/Makefile index ac234fcb0..a2763a664 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ all: youtube-dl README.md CONTRIBUTING.md README.txt youtube-dl.1 youtube-dl.bash-completion youtube-dl.zsh youtube-dl.fish supportedsites clean: - rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish youtube_dl/extractor/lazy_extractors.py *.dump *.part* *.info.json *.mp4 *.m4a *.flv *.mp3 *.avi *.mkv *.webm *.jpg *.png CONTRIBUTING.md.tmp ISSUE_TEMPLATE.md.tmp youtube-dl youtube-dl.exe + rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish youtube_dl/extractor/lazy_extractors.py *.dump *.part* *.info.json *.mp4 *.m4a *.flv *.mp3 *.avi *.mkv *.webm *.3gp *.jpg *.png CONTRIBUTING.md.tmp ISSUE_TEMPLATE.md.tmp youtube-dl youtube-dl.exe find . -name "*.pyc" -delete find . -name "*.class" -delete From fffb9cff944cfab11f311900ee8138f28f7232d3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 26 Sep 2016 22:15:58 +0700 Subject: [PATCH 0232/1571] [kaltura] Speed up embed regexes (#10764) --- youtube_dl/extractor/kaltura.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/kaltura.py b/youtube_dl/extractor/kaltura.py index 5a8403777..91bc3a0a7 100644 --- a/youtube_dl/extractor/kaltura.py +++ b/youtube_dl/extractor/kaltura.py @@ -105,20 +105,20 @@ class KalturaIE(InfoExtractor): kWidget\.(?:thumb)?[Ee]mbed\( \{.*? (?P['\"])wid(?P=q1)\s*:\s* - (?P['\"])_?(?P[^'\"]+)(?P=q2),.*? + (?P['\"])_?(?P(?:(?!(?P=q2)).)+)(?P=q2),.*? (?P['\"])entry_?[Ii]d(?P=q3)\s*:\s* - (?P['\"])(?P[^'\"]+)(?P=q4), + (?P['\"])(?P(?:(?!(?P=q4)).)+)(?P=q4), """, webpage) or re.search( r'''(?xs) (?P["\']) - (?:https?:)?//cdnapi(?:sec)?\.kaltura\.com/.*?(?:p|partner_id)/(?P\d+).*? + (?:https?:)?//cdnapi(?:sec)?\.kaltura\.com/(?:(?!(?P=q1)).)*(?:p|partner_id)/(?P\d+)(?:(?!(?P=q1)).)* (?P=q1).*? (?: entry_?[Ii]d| (?P["\'])entry_?[Ii]d(?P=q2) )\s*:\s* - (?P["\'])(?P.+?)(?P=q3) + (?P["\'])(?P(?:(?!(?P=q3)).)+)(?P=q3) ''', webpage)) if mobj: embed_info = mobj.groupdict() From d3dbb46330461c0c70c3aae47b69d27882cfc325 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ond=C5=99ej=20B=C3=A1rta?= Date: Sun, 25 Sep 2016 12:44:46 +0200 Subject: [PATCH 0233/1571] [promptfile] Fix extraction (Closes #10634) --- youtube_dl/extractor/promptfile.py | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/promptfile.py b/youtube_dl/extractor/promptfile.py index f93bd19ff..54c4aee13 100644 --- a/youtube_dl/extractor/promptfile.py +++ b/youtube_dl/extractor/promptfile.py @@ -15,12 +15,12 @@ from ..utils import ( class PromptFileIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?promptfile\.com/l/(?P[0-9A-Z\-]+)' _TEST = { - 'url': 'http://www.promptfile.com/l/D21B4746E9-F01462F0FF', - 'md5': 'd1451b6302da7215485837aaea882c4c', + 'url': 'http://www.promptfile.com/l/86D1CE8462-576CAAE416', + 'md5': '5a7e285a26e0d66d9a263fae91bc92ce', 'info_dict': { - 'id': 'D21B4746E9-F01462F0FF', + 'id': '86D1CE8462-576CAAE416', 'ext': 'mp4', - 'title': 'Birds.mp4', + 'title': 'oceans.mp4', 'thumbnail': 're:^https?://.*\.jpg$', } } @@ -33,14 +33,20 @@ class PromptFileIE(InfoExtractor): raise ExtractorError('Video %s does not exist' % video_id, expected=True) + chash_pattern = r'\$\("#chash"\)\.val\("(.+)"\+\$\("#chash"\)' + chash = self._html_search_regex(chash_pattern, webpage, "chash") fields = self._hidden_inputs(webpage) + k = list(fields)[0] + fields[k] = chash + fields[k] + post = urlencode_postdata(fields) req = sanitized_Request(url, post) req.add_header('Content-type', 'application/x-www-form-urlencoded') webpage = self._download_webpage( req, video_id, 'Downloading video page') - url = self._html_search_regex(r'url:\s*\'([^\']+)\'', webpage, 'URL') + url_pattern = r'', webpage, 'title') thumbnail = self._html_search_regex( From 72c3d02d294b04b35a19417b31ad497e7540caa3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 26 Sep 2016 23:39:54 +0700 Subject: [PATCH 0234/1571] [promptfile] Improve and modernize --- youtube_dl/extractor/promptfile.py | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/youtube_dl/extractor/promptfile.py b/youtube_dl/extractor/promptfile.py index 54c4aee13..d40cca06f 100644 --- a/youtube_dl/extractor/promptfile.py +++ b/youtube_dl/extractor/promptfile.py @@ -7,7 +7,6 @@ from .common import InfoExtractor from ..utils import ( determine_ext, ExtractorError, - sanitized_Request, urlencode_postdata, ) @@ -33,20 +32,23 @@ class PromptFileIE(InfoExtractor): raise ExtractorError('Video %s does not exist' % video_id, expected=True) - chash_pattern = r'\$\("#chash"\)\.val\("(.+)"\+\$\("#chash"\)' - chash = self._html_search_regex(chash_pattern, webpage, "chash") + chash = self._search_regex( + r'val\("([^"]*)"\s*\+\s*\$\("#chash"\)', webpage, 'chash') fields = self._hidden_inputs(webpage) - k = list(fields)[0] - fields[k] = chash + fields[k] + keys = list(fields.keys()) + chash_key = keys[0] if len(keys) == 1 else next( + key for key in keys if key.startswith('cha')) + fields[chash_key] = chash + fields[chash_key] - post = urlencode_postdata(fields) - req = sanitized_Request(url, post) - req.add_header('Content-type', 'application/x-www-form-urlencoded') webpage = self._download_webpage( - req, video_id, 'Downloading video page') + url, video_id, 'Downloading video page', + data=urlencode_postdata(fields), + headers={'Content-type': 'application/x-www-form-urlencoded'}) - url_pattern = r']+href=(["\'])(?P(?:(?!\1).)+)\1[^>]*>\s*Download File', + r']+href=(["\'])(?Phttps?://(?:www\.)?promptfile\.com/file/(?:(?!\1).)+)\1'), + webpage, 'video url', group='url') title = self._html_search_regex( r'', webpage, 'title') thumbnail = self._html_search_regex( @@ -55,7 +57,7 @@ class PromptFileIE(InfoExtractor): formats = [{ 'format_id': 'sd', - 'url': url, + 'url': video_url, 'ext': determine_ext(title), }] self._sort_formats(formats) From d75d9e343e91527c1fe34678e913ae16a0eafbdd Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Tue, 27 Sep 2016 14:38:41 +0800 Subject: [PATCH 0235/1571] [einthusan] Fix extraction (closes #10714) --- ChangeLog | 1 + youtube_dl/extractor/einthusan.py | 14 ++++++++------ 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/ChangeLog b/ChangeLog index 5c96dc179..fdebb89b9 100644 --- a/ChangeLog +++ b/ChangeLog @@ -4,6 +4,7 @@ Core + Improved support for HTML5 subtitles Extractors +* [einthusan] Fix extraction (#10714) + [twitter] Support Periscope embeds (#10737) + [openload] Support subtitles (#10625) diff --git a/youtube_dl/extractor/einthusan.py b/youtube_dl/extractor/einthusan.py index f7339702c..443865ad2 100644 --- a/youtube_dl/extractor/einthusan.py +++ b/youtube_dl/extractor/einthusan.py @@ -14,7 +14,7 @@ class EinthusanIE(InfoExtractor): _TESTS = [ { 'url': 'http://www.einthusan.com/movies/watch.php?id=2447', - 'md5': 'af244f4458cd667205e513d75da5b8b1', + 'md5': 'd71379996ff5b7f217eca034c34e3461', 'info_dict': { 'id': '2447', 'ext': 'mp4', @@ -25,13 +25,13 @@ class EinthusanIE(InfoExtractor): }, { 'url': 'http://www.einthusan.com/movies/watch.php?id=1671', - 'md5': 'ef63c7a803e22315880ed182c10d1c5c', + 'md5': 'b16a6fd3c67c06eb7c79c8a8615f4213', 'info_dict': { 'id': '1671', 'ext': 'mp4', 'title': 'Soodhu Kavvuum', 'thumbnail': 're:^https?://.*\.jpg$', - 'description': 'md5:05d8a0c0281a4240d86d76e14f2f4d51', + 'description': 'md5:b40f2bf7320b4f9414f3780817b2af8c', } }, ] @@ -50,9 +50,11 @@ class EinthusanIE(InfoExtractor): video_id = self._search_regex( r'data-movieid=["\'](\d+)', webpage, 'video id', default=video_id) - video_url = self._download_webpage( + m3u8_url = self._download_webpage( 'http://cdn.einthusan.com/geturl/%s/hd/London,Washington,Toronto,Dallas,San,Sydney/' - % video_id, video_id) + % video_id, video_id, headers={'Referer': url}) + formats = self._extract_m3u8_formats( + m3u8_url, video_id, ext='mp4', entry_protocol='m3u8_native') description = self._html_search_meta('description', webpage) thumbnail = self._html_search_regex( @@ -64,7 +66,7 @@ class EinthusanIE(InfoExtractor): return { 'id': video_id, 'title': title, - 'url': video_url, + 'formats': formats, 'thumbnail': thumbnail, 'description': description, } From 93933c9819fa1282081a5f0761cbeabc9fbea336 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Tue, 27 Sep 2016 15:28:37 +0100 Subject: [PATCH 0236/1571] [awaan:video] fix test(closes #10773) --- youtube_dl/extractor/awaan.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/awaan.py b/youtube_dl/extractor/awaan.py index 66d7515bc..a2603bbff 100644 --- a/youtube_dl/extractor/awaan.py +++ b/youtube_dl/extractor/awaan.py @@ -66,6 +66,7 @@ class AWAANVideoIE(AWAANBaseIE): 'duration': 2041, 'timestamp': 1227504126, 'upload_date': '20081124', + 'uploader_id': '71', }, }, { 'url': 'http://awaan.ae/video/26723981/%D8%AF%D8%A7%D8%B1-%D8%A7%D9%84%D8%B3%D9%84%D8%A7%D9%85:-%D8%AE%D9%8A%D8%B1-%D8%AF%D9%88%D8%B1-%D8%A7%D9%84%D8%A3%D9%86%D8%B5%D8%A7%D8%B1', From 2342733f850c979c6f23ea2e83dfcb176fb08fa5 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Tue, 27 Sep 2016 15:29:50 +0100 Subject: [PATCH 0237/1571] fix tests related to 1978540a5122c53012e17a78841f3da0df77fd34(closes #10774) --- youtube_dl/extractor/formula1.py | 6 +++++- youtube_dl/extractor/voxmedia.py | 10 ++++++++-- 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/formula1.py b/youtube_dl/extractor/formula1.py index 8c417ab65..fecfc28ae 100644 --- a/youtube_dl/extractor/formula1.py +++ b/youtube_dl/extractor/formula1.py @@ -11,9 +11,13 @@ class Formula1IE(InfoExtractor): 'md5': '8c79e54be72078b26b89e0e111c0502b', 'info_dict': { 'id': 'JvYXJpMzE6pArfHWm5ARp5AiUmD-gibV', - 'ext': 'flv', + 'ext': 'mp4', 'title': 'Race highlights - Spain 2016', }, + 'params': { + # m3u8 download + 'skip_download': True, + }, 'add_ie': ['Ooyala'], }, { 'url': 'http://www.formula1.com/en/video/2016/5/Race_highlights_-_Spain_2016.html', diff --git a/youtube_dl/extractor/voxmedia.py b/youtube_dl/extractor/voxmedia.py index b1b32ad44..f8e331493 100644 --- a/youtube_dl/extractor/voxmedia.py +++ b/youtube_dl/extractor/voxmedia.py @@ -9,13 +9,16 @@ class VoxMediaIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?(?:theverge|vox|sbnation|eater|polygon|curbed|racked)\.com/(?:[^/]+/)*(?P[^/?]+)' _TESTS = [{ 'url': 'http://www.theverge.com/2014/6/27/5849272/material-world-how-google-discovered-what-software-is-made-of', - 'md5': '73856edf3e89a711e70d5cf7cb280b37', 'info_dict': { 'id': '11eXZobjrG8DCSTgrNjVinU-YmmdYjhe', 'ext': 'mp4', 'title': 'Google\'s new material design direction', 'description': 'md5:2f44f74c4d14a1f800ea73e1c6832ad2', }, + 'params': { + # m3u8 download + 'skip_download': True, + }, 'add_ie': ['Ooyala'], }, { # data-ooyala-id @@ -31,13 +34,16 @@ class VoxMediaIE(InfoExtractor): }, { # volume embed 'url': 'http://www.vox.com/2016/3/31/11336640/mississippi-lgbt-religious-freedom-bill', - 'md5': '375c483c5080ab8cd85c9c84cfc2d1e4', 'info_dict': { 'id': 'wydzk3dDpmRz7PQoXRsTIX6XTkPjYL0b', 'ext': 'mp4', 'title': 'The new frontier of LGBTQ civil rights, explained', 'description': 'md5:0dc58e94a465cbe91d02950f770eb93f', }, + 'params': { + # m3u8 download + 'skip_download': True, + }, 'add_ie': ['Ooyala'], }, { # youtube embed From f9dd86a112835e04e271e8d1d844f250e6ff0c5e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 27 Sep 2016 21:37:33 +0700 Subject: [PATCH 0238/1571] [npo] Clarify IE_NAMEs (Closes #10775) --- youtube_dl/extractor/npo.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/youtube_dl/extractor/npo.py b/youtube_dl/extractor/npo.py index 66035a77c..9c7cc777b 100644 --- a/youtube_dl/extractor/npo.py +++ b/youtube_dl/extractor/npo.py @@ -497,6 +497,7 @@ class VPROIE(NPOPlaylistBaseIE): class WNLIE(NPOPlaylistBaseIE): + IE_NAME = 'wnl' _VALID_URL = r'https?://(?:www\.)?omroepwnl\.nl/video/detail/(?P[^/]+)__\d+' _PLAYLIST_TITLE_RE = r'(?s)]+class="subject"[^>]*>(.+?)' _PLAYLIST_ENTRY_RE = r']+href="([^"]+)"[^>]+class="js-mid"[^>]*>Deel \d+' @@ -512,6 +513,7 @@ class WNLIE(NPOPlaylistBaseIE): class AndereTijdenIE(NPOPlaylistBaseIE): + IE_NAME = 'anderetijden' _VALID_URL = r'https?://(?:www\.)?anderetijden\.nl/programma/(?:[^/]+/)+(?P[^/?#&]+)' _PLAYLIST_TITLE_RE = r'(?s)]+class=["\'].*?\bpage-title\b.*?["\'][^>]*>(.+?)' _PLAYLIST_ENTRY_RE = r']+class=["\']episode-container episode-page["\'][^>]+data-prid=["\'](.+?)["\']' From 1a2fbe322ee2d711b474f32a7d3f331791fb1881 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 27 Sep 2016 21:55:51 +0700 Subject: [PATCH 0239/1571] [periscope] Treat timed_out state as finished stream --- youtube_dl/extractor/periscope.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/periscope.py b/youtube_dl/extractor/periscope.py index e8b2f11c6..61043cad5 100644 --- a/youtube_dl/extractor/periscope.py +++ b/youtube_dl/extractor/periscope.py @@ -87,7 +87,7 @@ class PeriscopeIE(PeriscopeBaseIE): 'ext': 'flv' if format_id == 'rtmp' else 'mp4', } if format_id != 'rtmp': - f['protocol'] = 'm3u8_native' if state == 'ended' else 'm3u8' + f['protocol'] = 'm3u8_native' if state in ('ended', 'timed_out') else 'm3u8' formats.append(f) self._sort_formats(formats) From e3845525906228091fdf446f2cf2e9a20e93f59f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kacper=20Michaj=C5=82ow?= Date: Sun, 25 Sep 2016 05:39:29 +0200 Subject: [PATCH 0240/1571] [vk] Add support for dailymotion embeds Fixes #10661 --- youtube_dl/extractor/vk.py | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/vk.py b/youtube_dl/extractor/vk.py index cd22df25a..f26e0732c 100644 --- a/youtube_dl/extractor/vk.py +++ b/youtube_dl/extractor/vk.py @@ -23,8 +23,9 @@ from ..utils import ( unified_strdate, urlencode_postdata, ) -from .vimeo import VimeoIE +from .dailymotion import DailymotionIE from .pladform import PladformIE +from .vimeo import VimeoIE class VKBaseIE(InfoExtractor): @@ -210,6 +211,23 @@ class VKIE(VKBaseIE): 'view_count': int, }, }, + { + # dailymotion embed + 'url': 'https://vk.com/video-37468416_456239855', + 'info_dict': { + 'id': 'k3lz2cmXyRuJQSjGHUv', + 'ext': 'mp4', + 'title': 'md5:d52606645c20b0ddbb21655adaa4f56f', + 'description': 'md5:c651358f03c56f1150b555c26d90a0fd', + 'uploader': 'AniLibria.Tv', + 'upload_date': '20160914', + 'uploader_id': 'x1p5vl5', + 'timestamp': 1473877246, + }, + 'params': { + 'skip_download': True, + } + }, { # video key is extra_data not url\d+ 'url': 'http://vk.com/video-110305615_171782105', @@ -315,6 +333,10 @@ class VKIE(VKBaseIE): m_rutube.group(1).replace('\\', '')) return self.url_result(rutube_url) + dailymotion_urls = DailymotionIE._extract_urls(info_page) + if dailymotion_urls: + return self.url_result(dailymotion_urls[0], DailymotionIE.ie_key()) + m_opts = re.search(r'(?s)var\s+opts\s*=\s*({.+?});', info_page) if m_opts: m_opts_url = re.search(r"url\s*:\s*'((?!/\b)[^']+)", m_opts.group(1)) From cdfcc4ce95e351c3f560fa3c07ae7d4ab188ef25 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 27 Sep 2016 22:27:10 +0700 Subject: [PATCH 0241/1571] [mtv] Improve _VALID_URL --- youtube_dl/extractor/mtv.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/mtv.py b/youtube_dl/extractor/mtv.py index 2e9580b10..74a3a035e 100644 --- a/youtube_dl/extractor/mtv.py +++ b/youtube_dl/extractor/mtv.py @@ -271,7 +271,7 @@ class MTVServicesEmbeddedIE(MTVServicesInfoExtractor): class MTVIE(MTVServicesInfoExtractor): IE_NAME = 'mtv' - _VALID_URL = r'https?://(?:www\.)?mtv\.com/(video-clips|full-episodes)/(?P[^/?#.]+)' + _VALID_URL = r'https?://(?:www\.)?mtv\.com/(?:video-clips|full-episodes)/(?P[^/?#.]+)' _FEED_URL = 'http://www.mtv.com/feeds/mrss/' _TESTS = [{ From dd2cffeeec8feac8fe52924760b2cb368249396a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 27 Sep 2016 22:43:35 +0700 Subject: [PATCH 0242/1571] [ChangeLog] Actualize --- ChangeLog | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/ChangeLog b/ChangeLog index fdebb89b9..33c94ef55 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,9 +1,24 @@ vesion Core ++ Add hdcore query parameter to akamai f4m formats ++ Delegate HLS live streams downloading to ffmpeg + Improved support for HTML5 subtitles Extractors ++ [vk] Add support for dailymotion embeds (#10661) +* [einthusan] Fix extraction (#10714) +* [promptfile] Fix extraction (#10634) +* [kaltura] Speed up embed regular expressions (#10764) ++ [npo] Add support for anderetijden.nl (#10754) ++ [prosiebensat1] Add support for advopedia sites +* [mwave] Relax URL regular expression (#10735, #10748) +* [prosiebensat1] Fix playlist support (#10745) ++ [prosiebensat1] Add support for sat1gold sites (#10745) ++ [cbsnews:livevideo] Fix extraction and extract m3u8 formats ++ [brightcove:new] Add support for live streams +* [soundcloud] Generalize playlist entries extraction (#10733) ++ [mtv] Add support for new URL schema (#8169, #9808) * [einthusan] Fix extraction (#10714) + [twitter] Support Periscope embeds (#10737) + [openload] Support subtitles (#10625) From c8f45f763cac3c0d0e4ca35ba072d8d321957e85 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 27 Sep 2016 23:03:00 +0700 Subject: [PATCH 0243/1571] [ChangeLog] Remove duplicate --- ChangeLog | 1 - 1 file changed, 1 deletion(-) diff --git a/ChangeLog b/ChangeLog index 33c94ef55..0d15b6a82 100644 --- a/ChangeLog +++ b/ChangeLog @@ -7,7 +7,6 @@ Core Extractors + [vk] Add support for dailymotion embeds (#10661) -* [einthusan] Fix extraction (#10714) * [promptfile] Fix extraction (#10634) * [kaltura] Speed up embed regular expressions (#10764) + [npo] Add support for anderetijden.nl (#10754) From 8f0cf20ab987019c3ba66c375450f80bb1cfe281 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 27 Sep 2016 23:09:46 +0700 Subject: [PATCH 0244/1571] release 2016.09.27 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- docs/supportedsites.md | 10 ++++++---- youtube_dl/version.py | 2 +- 4 files changed, 11 insertions(+), 9 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 7669ab9b7..273eb8c0b 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.09.24*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.09.24** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.09.27*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.09.27** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2016.09.24 +[debug] youtube-dl version 2016.09.27 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index 0d15b6a82..f8149cc30 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -vesion +vesion 2016.09.27 Core + Add hdcore query parameter to akamai f4m formats diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 95a137393..26f275577 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -40,6 +40,7 @@ - **Allocine** - **AlphaPorno** - **AMCNetworks** + - **anderetijden**: npo.nl and ntr.nl - **AnimeOnDemand** - **anitube.se** - **AnySex** @@ -127,8 +128,8 @@ - **CBS** - **CBSInteractive** - **CBSLocal** - - **CBSNews**: CBS News - - **CBSNewsLiveVideo**: CBS News Live Videos + - **cbsnews**: CBS News + - **cbsnews:livevideo**: CBS News Live Videos - **CBSSports** - **CCTV** - **CDA** @@ -424,8 +425,9 @@ - **MPORA** - **MSN** - **mtg**: MTG services - - **MTV** + - **mtv** - **mtv.de** + - **mtv:video** - **mtvservices:embedded** - **MuenchenTV**: münchen.tv - **MusicPlayOn** @@ -865,7 +867,7 @@ - **wholecloud**: WholeCloud - **Wimp** - **Wistia** - - **WNL** + - **wnl**: npo.nl and ntr.nl - **WorldStarHipHop** - **wrzuta.pl** - **wrzuta.pl:playlist** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 2af6380b8..af0c2cfc4 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2016.09.24' +__version__ = '2016.09.27' From 8bfda726c20198b7e68a805967917ef1a79e9b91 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Wed, 28 Sep 2016 16:34:27 +0100 Subject: [PATCH 0245/1571] [limelight:media] improve http formats extraction --- youtube_dl/extractor/limelight.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/limelight.py b/youtube_dl/extractor/limelight.py index 6752ffee2..b7bfa7a6d 100644 --- a/youtube_dl/extractor/limelight.py +++ b/youtube_dl/extractor/limelight.py @@ -59,7 +59,7 @@ class LimelightBaseIE(InfoExtractor): format_id = 'rtmp' if stream.get('videoBitRate'): format_id += '-%d' % int_or_none(stream['videoBitRate']) - http_url = 'http://%s/%s' % (rtmp.group('host').replace('csl.', 'cpl.'), rtmp.group('playpath')[4:]) + http_url = 'http://cpl.delvenetworks.com/' + rtmp.group('playpath')[4:] urls.append(http_url) http_fmt = fmt.copy() http_fmt.update({ From f533490bb7b2d25b9c6fe7ccd381ebe2bef7d4f8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 28 Sep 2016 22:58:25 +0700 Subject: [PATCH 0246/1571] [ketnet] Extract mzsource formats (#10770) --- youtube_dl/extractor/ketnet.py | 26 +++++++++++++++++++++++--- 1 file changed, 23 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/ketnet.py b/youtube_dl/extractor/ketnet.py index aaf3f807a..eb0a16008 100644 --- a/youtube_dl/extractor/ketnet.py +++ b/youtube_dl/extractor/ketnet.py @@ -21,6 +21,10 @@ class KetnetIE(InfoExtractor): }, { 'url': 'https://www.ketnet.be/achter-de-schermen/sien-repeteert-voor-stars-for-life', 'only_matching': True, + }, { + # mzsource, geo restricted to Belgium + 'url': 'https://www.ketnet.be/kijken/nachtwacht/de-bermadoe', + 'only_matching': True, }] def _real_extract(self, url): @@ -36,9 +40,25 @@ class KetnetIE(InfoExtractor): title = config['title'] - formats = self._extract_m3u8_formats( - config['source']['hls'], video_id, 'mp4', - entry_protocol='m3u8_native', m3u8_id='hls') + formats = [] + for source_key in ('', 'mz'): + source = config.get('%ssource' % source_key) + if not isinstance(source, dict): + continue + for format_id, format_url in source.items(): + if format_id == 'hls': + formats.extend(self._extract_m3u8_formats( + format_url, video_id, 'mp4', + entry_protocol='m3u8_native', m3u8_id=format_id, + fatal=False)) + elif format_id == 'hds': + formats.extend(self._extract_f4m_formats( + format_url, video_id, f4m_id=format_id, fatal=False)) + else: + formats.append({ + 'url': format_url, + 'format_id': format_id, + }) self._sort_formats(formats) return { From a56e74e2713ed45f4096735cf49d1d97b5e75389 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?D=C3=A9stin=20Reed?= Date: Wed, 28 Sep 2016 16:54:06 +0200 Subject: [PATCH 0247/1571] [Instagram] Extract comments --- youtube_dl/extractor/instagram.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/youtube_dl/extractor/instagram.py b/youtube_dl/extractor/instagram.py index 8f7f232be..5ebc30a10 100644 --- a/youtube_dl/extractor/instagram.py +++ b/youtube_dl/extractor/instagram.py @@ -29,6 +29,7 @@ class InstagramIE(InfoExtractor): 'uploader': 'Naomi Leonor Phan-Quang', 'like_count': int, 'comment_count': int, + 'comments': list, }, }, { # missing description @@ -44,6 +45,7 @@ class InstagramIE(InfoExtractor): 'uploader': 'Britney Spears', 'like_count': int, 'comment_count': int, + 'comments': list, }, 'params': { 'skip_download': True, @@ -101,6 +103,14 @@ class InstagramIE(InfoExtractor): uploader_id = media.get('owner', {}).get('username') like_count = int_or_none(media.get('likes', {}).get('count')) comment_count = int_or_none(media.get('comments', {}).get('count')) + comments = [{ + 'author': comment.get('user', {}).get('username'), + 'author_id': comment.get('user', {}).get('id'), + 'id': comment.get('id'), + 'text': comment.get('text'), + 'timestamp': int_or_none(comment.get('created_at')), + } for comment in media.get('comments', {}).get('nodes', []) + if comment.get('text')] if not video_url: video_url = self._og_search_video_url(webpage, secure=False) @@ -131,6 +141,7 @@ class InstagramIE(InfoExtractor): 'uploader': uploader, 'like_count': like_count, 'comment_count': comment_count, + 'comments': comments, } From 0d72ff9c51ecc84aae1717c05f8b73ad94199687 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Thu, 29 Sep 2016 21:39:35 +0800 Subject: [PATCH 0248/1571] [leeco] Recognize more Le Sports URLs (#10794) --- ChangeLog | 8 +++++++- youtube_dl/extractor/leeco.py | 8 +++++++- 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/ChangeLog b/ChangeLog index f8149cc30..70da55c90 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,10 @@ -vesion 2016.09.27 +version + +Extractors ++ [leeco] Recognize more Le Sports URLs (#10794) + + +version 2016.09.27 Core + Add hdcore query parameter to akamai f4m formats diff --git a/youtube_dl/extractor/leeco.py b/youtube_dl/extractor/leeco.py index e9cc9aa59..c48a5aad1 100644 --- a/youtube_dl/extractor/leeco.py +++ b/youtube_dl/extractor/leeco.py @@ -29,7 +29,7 @@ from ..utils import ( class LeIE(InfoExtractor): IE_DESC = '乐视网' - _VALID_URL = r'https?://(?:www\.le\.com/ptv/vplay|sports\.le\.com/video)/(?P\d+)\.html' + _VALID_URL = r'https?://(?:www\.le\.com/ptv/vplay|(?:sports\.le|(?:www\.)?lesports)\.com/(?:match|video))/(?P\d+)\.html' _URL_TEMPLATE = 'http://www.le.com/ptv/vplay/%s.html' @@ -73,6 +73,12 @@ class LeIE(InfoExtractor): }, { 'url': 'http://sports.le.com/video/25737697.html', 'only_matching': True, + }, { + 'url': 'http://www.lesports.com/match/1023203003.html', + 'only_matching': True, + }, { + 'url': 'http://sports.le.com/match/1023203003.html', + 'only_matching': True, }] # ror() and calc_time_key() are reversed from a embedded swf file in KLetvPlayer.swf From 93aa0b631878b62f756c83e1069a14cd2d8775f1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 29 Sep 2016 23:04:10 +0700 Subject: [PATCH 0249/1571] [vk] Add support for finished live streams (#10799) --- youtube_dl/extractor/vk.py | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/vk.py b/youtube_dl/extractor/vk.py index f26e0732c..1d089c9d7 100644 --- a/youtube_dl/extractor/vk.py +++ b/youtube_dl/extractor/vk.py @@ -226,7 +226,7 @@ class VKIE(VKBaseIE): }, 'params': { 'skip_download': True, - } + }, }, { # video key is extra_data not url\d+ @@ -241,6 +241,18 @@ class VKIE(VKBaseIE): 'view_count': int, }, }, + { + # finished live stream, live_mp4 + 'url': 'https://vk.com/videos-387766?z=video-387766_456242764%2Fpl_-387766_-2', + 'md5': '90d22d051fccbbe9becfccc615be6791', + 'info_dict': { + 'id': '456242764', + 'ext': 'mp4', + 'title': 'ИгроМир 2016 — день 1', + 'uploader': 'Игромания', + 'duration': 5239, + }, + }, { # removed video, just testing that we match the pattern 'url': 'http://vk.com/feed?z=video-43215063_166094326%2Fbb50cacd3177146d7a', @@ -366,7 +378,10 @@ class VKIE(VKBaseIE): formats = [] for k, v in data.items(): - if not k.startswith('url') and not k.startswith('cache') and k != 'extra_data' or not v: + if (not k.startswith('url') and not k.startswith('cache') + and k not in ('extra_data', 'live_mp4')): + continue + if not isinstance(v, compat_str) or not v.startswith('http'): continue height = int_or_none(self._search_regex( r'^(?:url|cache)(\d+)', k, 'height', default=None)) From 475f8a458099c64d367356471069bd0ff2bd1b0d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 29 Sep 2016 23:21:39 +0700 Subject: [PATCH 0250/1571] [vk] Add support for running live streams (Closes #10799) --- youtube_dl/extractor/vk.py | 47 ++++++++++++++++++++++++++------------ 1 file changed, 33 insertions(+), 14 deletions(-) diff --git a/youtube_dl/extractor/vk.py b/youtube_dl/extractor/vk.py index 1d089c9d7..9f7a593ef 100644 --- a/youtube_dl/extractor/vk.py +++ b/youtube_dl/extractor/vk.py @@ -253,6 +253,12 @@ class VKIE(VKBaseIE): 'duration': 5239, }, }, + { + # live stream, hls and rtmp links,most likely already finished live + # stream by the time you are reading this comment + 'url': 'https://vk.com/video-140332_456239111', + 'only_matching': True, + }, { # removed video, just testing that we match the pattern 'url': 'http://vk.com/feed?z=video-43215063_166094326%2Fbb50cacd3177146d7a', @@ -361,6 +367,11 @@ class VKIE(VKBaseIE): data_json = self._search_regex(r'var\s+vars\s*=\s*({.+?});', info_page, 'vars') data = json.loads(data_json) + title = unescapeHTML(data['md_title']) + + if data.get('live') == 2: + title = self._live_title(title) + # Extract upload date upload_date = None mobj = re.search(r'id="mv_date(?:_views)?_wrap"[^>]*>([a-zA-Z]+ [0-9]+), ([0-9]+) at', info_page) @@ -377,25 +388,33 @@ class VKIE(VKBaseIE): r'([\d,.]+)', views, 'view count', fatal=False)) formats = [] - for k, v in data.items(): - if (not k.startswith('url') and not k.startswith('cache') - and k not in ('extra_data', 'live_mp4')): + for format_id, format_url in data.items(): + if not isinstance(format_url, compat_str) or not format_url.startswith(('http', '//', 'rtmp')): continue - if not isinstance(v, compat_str) or not v.startswith('http'): - continue - height = int_or_none(self._search_regex( - r'^(?:url|cache)(\d+)', k, 'height', default=None)) - formats.append({ - 'format_id': k, - 'url': v, - 'height': height, - }) + if format_id.startswith(('url', 'cache')) or format_id in ('extra_data', 'live_mp4'): + height = int_or_none(self._search_regex( + r'^(?:url|cache)(\d+)', format_id, 'height', default=None)) + formats.append({ + 'format_id': format_id, + 'url': format_url, + 'height': height, + }) + elif format_id == 'hls': + formats.extend(self._extract_m3u8_formats( + format_url, video_id, 'mp4', m3u8_id=format_id, + fatal=False, live=True)) + elif format_id == 'rtmp': + formats.append({ + 'format_id': format_id, + 'url': format_url, + 'ext': 'flv', + }) self._sort_formats(formats) return { - 'id': compat_str(data['vid']), + 'id': compat_str(data.get('vid') or video_id), 'formats': formats, - 'title': unescapeHTML(data['md_title']), + 'title': title, 'thumbnail': data.get('jpg'), 'uploader': data.get('md_author'), 'duration': data.get('duration'), From efa97bdcf1f1e90d1b51a09324d7869dcd70729b Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Fri, 30 Sep 2016 00:28:32 +0800 Subject: [PATCH 0251/1571] Move write_xattr to utils.py There are some other places that use xattr functions. It's better to move it to a common place so that others can use it. --- youtube_dl/postprocessor/xattrpp.py | 114 ++-------------------------- youtube_dl/utils.py | 99 ++++++++++++++++++++++++ 2 files changed, 106 insertions(+), 107 deletions(-) diff --git a/youtube_dl/postprocessor/xattrpp.py b/youtube_dl/postprocessor/xattrpp.py index e39ca60aa..fbdfa02ac 100644 --- a/youtube_dl/postprocessor/xattrpp.py +++ b/youtube_dl/postprocessor/xattrpp.py @@ -1,37 +1,15 @@ from __future__ import unicode_literals -import os -import subprocess -import sys -import errno - from .common import PostProcessor from ..compat import compat_os_name from ..utils import ( - check_executable, hyphenate_date, - version_tuple, - PostProcessingError, - encodeArgument, - encodeFilename, + write_xattr, + XAttrMetadataError, + XAttrUnavailableError, ) -class XAttrMetadataError(PostProcessingError): - def __init__(self, code=None, msg='Unknown error'): - super(XAttrMetadataError, self).__init__(msg) - self.code = code - - # Parsing code and msg - if (self.code in (errno.ENOSPC, errno.EDQUOT) or - 'No space left' in self.msg or 'Disk quota excedded' in self.msg): - self.reason = 'NO_SPACE' - elif self.code == errno.E2BIG or 'Argument list too long' in self.msg: - self.reason = 'VALUE_TOO_LONG' - else: - self.reason = 'NOT_SUPPORTED' - - class XAttrMetadataPP(PostProcessor): # @@ -48,88 +26,6 @@ class XAttrMetadataPP(PostProcessor): def run(self, info): """ Set extended attributes on downloaded file (if xattr support is found). """ - # This mess below finds the best xattr tool for the job and creates a - # "write_xattr" function. - try: - # try the pyxattr module... - import xattr - - # Unicode arguments are not supported in python-pyxattr until - # version 0.5.0 - # See https://github.com/rg3/youtube-dl/issues/5498 - pyxattr_required_version = '0.5.0' - if version_tuple(xattr.__version__) < version_tuple(pyxattr_required_version): - self._downloader.report_warning( - 'python-pyxattr is detected but is too old. ' - 'youtube-dl requires %s or above while your version is %s. ' - 'Falling back to other xattr implementations' % ( - pyxattr_required_version, xattr.__version__)) - - raise ImportError - - def write_xattr(path, key, value): - try: - xattr.set(path, key, value) - except EnvironmentError as e: - raise XAttrMetadataError(e.errno, e.strerror) - - except ImportError: - if compat_os_name == 'nt': - # Write xattrs to NTFS Alternate Data Streams: - # http://en.wikipedia.org/wiki/NTFS#Alternate_data_streams_.28ADS.29 - def write_xattr(path, key, value): - assert ':' not in key - assert os.path.exists(path) - - ads_fn = path + ':' + key - try: - with open(ads_fn, 'wb') as f: - f.write(value) - except EnvironmentError as e: - raise XAttrMetadataError(e.errno, e.strerror) - else: - user_has_setfattr = check_executable('setfattr', ['--version']) - user_has_xattr = check_executable('xattr', ['-h']) - - if user_has_setfattr or user_has_xattr: - - def write_xattr(path, key, value): - value = value.decode('utf-8') - if user_has_setfattr: - executable = 'setfattr' - opts = ['-n', key, '-v', value] - elif user_has_xattr: - executable = 'xattr' - opts = ['-w', key, value] - - cmd = ([encodeFilename(executable, True)] + - [encodeArgument(o) for o in opts] + - [encodeFilename(path, True)]) - - try: - p = subprocess.Popen( - cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE) - except EnvironmentError as e: - raise XAttrMetadataError(e.errno, e.strerror) - stdout, stderr = p.communicate() - stderr = stderr.decode('utf-8', 'replace') - if p.returncode != 0: - raise XAttrMetadataError(p.returncode, stderr) - - else: - # On Unix, and can't find pyxattr, setfattr, or xattr. - if sys.platform.startswith('linux'): - self._downloader.report_error( - "Couldn't find a tool to set the xattrs. " - "Install either the python 'pyxattr' or 'xattr' " - "modules, or the GNU 'attr' package " - "(which contains the 'setfattr' tool).") - else: - self._downloader.report_error( - "Couldn't find a tool to set the xattrs. " - "Install either the python 'xattr' module, " - "or the 'xattr' binary.") - # Write the metadata to the file's xattrs self._downloader.to_screen('[metadata] Writing metadata to file\'s xattrs') @@ -159,6 +55,10 @@ class XAttrMetadataPP(PostProcessor): return [], info + except XAttrUnavailableError as e: + self._downloader.report_error(str(e)) + return [], info + except XAttrMetadataError as e: if e.reason == 'NO_SPACE': self._downloader.report_warning( diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 69ca88c85..fcbfa0d76 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -42,6 +42,7 @@ from .compat import ( compat_html_entities_html5, compat_http_client, compat_kwargs, + compat_os_name, compat_parse_qs, compat_shlex_quote, compat_socket_create_connection, @@ -775,6 +776,25 @@ class ContentTooShortError(Exception): self.expected = expected +class XAttrMetadataError(Exception): + def __init__(self, code=None, msg='Unknown error'): + super(XAttrMetadataError, self).__init__(msg) + self.code = code + + # Parsing code and msg + if (self.code in (errno.ENOSPC, errno.EDQUOT) or + 'No space left' in self.msg or 'Disk quota excedded' in self.msg): + self.reason = 'NO_SPACE' + elif self.code == errno.E2BIG or 'Argument list too long' in self.msg: + self.reason = 'VALUE_TOO_LONG' + else: + self.reason = 'NOT_SUPPORTED' + + +class XAttrUnavailableError(Exception): + pass + + def _create_http_connection(ydl_handler, http_class, is_https, *args, **kwargs): # Working around python 2 bug (see http://bugs.python.org/issue17849) by limiting # expected HTTP responses to meet HTTP/1.0 or later (see also @@ -3131,3 +3151,82 @@ def decode_png(png_data): current_row.append(color) return width, height, pixels + + +def write_xattr(path, key, value): + # This mess below finds the best xattr tool for the job + try: + # try the pyxattr module... + import xattr + + # Unicode arguments are not supported in python-pyxattr until + # version 0.5.0 + # See https://github.com/rg3/youtube-dl/issues/5498 + pyxattr_required_version = '0.5.0' + if version_tuple(xattr.__version__) < version_tuple(pyxattr_required_version): + # TODO: fallback to CLI tools + raise XAttrUnavailableError( + 'python-pyxattr is detected but is too old. ' + 'youtube-dl requires %s or above while your version is %s. ' + 'Falling back to other xattr implementations' % ( + pyxattr_required_version, xattr.__version__)) + + try: + xattr.set(path, key, value) + except EnvironmentError as e: + raise XAttrMetadataError(e.errno, e.strerror) + + except ImportError: + if compat_os_name == 'nt': + # Write xattrs to NTFS Alternate Data Streams: + # http://en.wikipedia.org/wiki/NTFS#Alternate_data_streams_.28ADS.29 + assert ':' not in key + assert os.path.exists(path) + + ads_fn = path + ':' + key + try: + with open(ads_fn, 'wb') as f: + f.write(value) + except EnvironmentError as e: + raise XAttrMetadataError(e.errno, e.strerror) + else: + user_has_setfattr = check_executable('setfattr', ['--version']) + user_has_xattr = check_executable('xattr', ['-h']) + + if user_has_setfattr or user_has_xattr: + + value = value.decode('utf-8') + if user_has_setfattr: + executable = 'setfattr' + opts = ['-n', key, '-v', value] + elif user_has_xattr: + executable = 'xattr' + opts = ['-w', key, value] + + cmd = ([encodeFilename(executable, True)] + + [encodeArgument(o) for o in opts] + + [encodeFilename(path, True)]) + + try: + p = subprocess.Popen( + cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE) + except EnvironmentError as e: + raise XAttrMetadataError(e.errno, e.strerror) + stdout, stderr = p.communicate() + stderr = stderr.decode('utf-8', 'replace') + if p.returncode != 0: + raise XAttrMetadataError(p.returncode, stderr) + + else: + # On Unix, and can't find pyxattr, setfattr, or xattr. + if sys.platform.startswith('linux'): + raise XAttrUnavailableError( + "Couldn't find a tool to set the xattrs. " + "Install either the python 'pyxattr' or 'xattr' " + "modules, or the GNU 'attr' package " + "(which contains the 'setfattr' tool).") + else: + raise XAttrUnavailableError( + "Couldn't find a tool to set the xattrs. " + "Install either the python 'xattr' module, " + "or the 'xattr' binary.") From 3aa3953d28dae68b87aa83682043b5eec0973ddc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kacper=20Michaj=C5=82ow?= Date: Sun, 25 Sep 2016 20:26:58 +0200 Subject: [PATCH 0252/1571] [vk] Fix date and view count extraction. --- youtube_dl/extractor/vk.py | 17 +++++------------ youtube_dl/utils.py | 2 ++ 2 files changed, 7 insertions(+), 12 deletions(-) diff --git a/youtube_dl/extractor/vk.py b/youtube_dl/extractor/vk.py index 9f7a593ef..3cfbd97af 100644 --- a/youtube_dl/extractor/vk.py +++ b/youtube_dl/extractor/vk.py @@ -373,19 +373,12 @@ class VKIE(VKBaseIE): title = self._live_title(title) # Extract upload date - upload_date = None - mobj = re.search(r'id="mv_date(?:_views)?_wrap"[^>]*>([a-zA-Z]+ [0-9]+), ([0-9]+) at', info_page) - if mobj is not None: - mobj.group(1) + ' ' + mobj.group(2) - upload_date = unified_strdate(mobj.group(1) + ' ' + mobj.group(2)) + upload_date = unified_strdate(self._html_search_regex( + r'class="mv_info_date[^>]*>([^<]*)<', info_page, 'upload date', default=None)) - view_count = None - views = self._html_search_regex( - r'"mv_views_count_number"[^>]*>(.+?\bviews?)<', - info_page, 'view count', default=None) - if views: - view_count = str_to_int(self._search_regex( - r'([\d,.]+)', views, 'view count', fatal=False)) + view_count = str_to_int(self._html_search_regex( + r'class="mv_views_count[^>]*>([\d,.]+)', + info_page, 'view count', default=None)) formats = [] for format_id, format_url in data.items(): diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index fcbfa0d76..243d09034 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -124,6 +124,8 @@ DATE_FORMATS = ( '%d %b %Y', '%B %d %Y', '%b %d %Y', + '%b %d %Y at %H:%M', + '%b %d %Y at %H:%M:%S', '%b %dst %Y %I:%M', '%b %dnd %Y %I:%M', '%b %dth %Y %I:%M', From c6eed6b8c000672f0515d916dda54002c7fca356 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 29 Sep 2016 23:47:25 +0700 Subject: [PATCH 0253/1571] [utils] Lower priority for rare date formats and add tests --- test/test_utils.py | 2 ++ youtube_dl/utils.py | 4 ++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/test/test_utils.py b/test/test_utils.py index 9789d8611..b1b2effca 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -292,6 +292,7 @@ class TestUtil(unittest.TestCase): self.assertEqual(unified_strdate('25-09-2014'), '20140925') self.assertEqual(unified_strdate('27.02.2016 17:30'), '20160227') self.assertEqual(unified_strdate('UNKNOWN DATE FORMAT'), None) + self.assertEqual(unified_strdate('Feb 7, 2016 at 6:35 pm'), '20160207') def test_unified_timestamps(self): self.assertEqual(unified_timestamp('December 21, 2010'), 1292889600) @@ -312,6 +313,7 @@ class TestUtil(unittest.TestCase): self.assertEqual(unified_timestamp('27.02.2016 17:30'), 1456594200) self.assertEqual(unified_timestamp('UNKNOWN DATE FORMAT'), None) self.assertEqual(unified_timestamp('May 16, 2016 11:15 PM'), 1463440500) + self.assertEqual(unified_timestamp('Feb 7, 2016 at 6:35 pm'), 1454870100) def test_determine_ext(self): self.assertEqual(determine_ext('http://example.com/foo/bar.mp4/?download'), 'mp4') diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 243d09034..d2dfa8013 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -124,8 +124,6 @@ DATE_FORMATS = ( '%d %b %Y', '%B %d %Y', '%b %d %Y', - '%b %d %Y at %H:%M', - '%b %d %Y at %H:%M:%S', '%b %dst %Y %I:%M', '%b %dnd %Y %I:%M', '%b %dth %Y %I:%M', @@ -144,6 +142,8 @@ DATE_FORMATS = ( '%Y-%m-%dT%H:%M:%S', '%Y-%m-%dT%H:%M:%S.%f', '%Y-%m-%dT%H:%M', + '%b %d %Y at %H:%M', + '%b %d %Y at %H:%M:%S', ) DATE_FORMATS_DAY_FIRST = list(DATE_FORMATS) From a7ee8a00f4af9853d06ed895c5023cc6b573fd57 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 29 Sep 2016 23:48:21 +0700 Subject: [PATCH 0254/1571] [vk] Extract timestamp (Closes #10760) --- youtube_dl/extractor/vk.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/vk.py b/youtube_dl/extractor/vk.py index 3cfbd97af..77f5cebcf 100644 --- a/youtube_dl/extractor/vk.py +++ b/youtube_dl/extractor/vk.py @@ -20,7 +20,7 @@ from ..utils import ( remove_start, str_to_int, unescapeHTML, - unified_strdate, + unified_timestamp, urlencode_postdata, ) from .dailymotion import DailymotionIE @@ -106,6 +106,7 @@ class VKIE(VKBaseIE): 'title': 'ProtivoGunz - Хуёвая песня', 'uploader': 're:(?:Noize MC|Alexander Ilyashenko).*', 'duration': 195, + 'timestamp': 1329060660, 'upload_date': '20120212', 'view_count': int, }, @@ -119,6 +120,7 @@ class VKIE(VKBaseIE): 'uploader': 'Tom Cruise', 'title': 'No name', 'duration': 9, + 'timestamp': 1374374880, 'upload_date': '20130721', 'view_count': int, } @@ -195,6 +197,7 @@ class VKIE(VKBaseIE): 'upload_date': '20150709', 'view_count': int, }, + 'skip': 'Removed', }, { # youtube embed @@ -237,6 +240,7 @@ class VKIE(VKBaseIE): 'ext': 'mp4', 'title': 'S-Dance, репетиции к The way show', 'uploader': 'THE WAY SHOW | 17 апреля', + 'timestamp': 1454870100, 'upload_date': '20160207', 'view_count': int, }, @@ -373,8 +377,9 @@ class VKIE(VKBaseIE): title = self._live_title(title) # Extract upload date - upload_date = unified_strdate(self._html_search_regex( - r'class="mv_info_date[^>]*>([^<]*)<', info_page, 'upload date', default=None)) + timestamp = unified_timestamp(self._html_search_regex( + r'class=["\']mv_info_date[^>]*>([^<]+)(?:<|from)', info_page, + 'upload date', fatal=False)) view_count = str_to_int(self._html_search_regex( r'class="mv_views_count[^>]*>([\d,.]+)', @@ -411,7 +416,7 @@ class VKIE(VKBaseIE): 'thumbnail': data.get('jpg'), 'uploader': data.get('md_author'), 'duration': data.get('duration'), - 'upload_date': upload_date, + 'timestamp': timestamp, 'view_count': view_count, } From 70d7b323b6556eb693bec43a1eb10ded889184b0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 29 Sep 2016 23:51:52 +0700 Subject: [PATCH 0255/1571] [vk] Improve view count extraction --- youtube_dl/extractor/vk.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/vk.py b/youtube_dl/extractor/vk.py index 77f5cebcf..58799d413 100644 --- a/youtube_dl/extractor/vk.py +++ b/youtube_dl/extractor/vk.py @@ -255,6 +255,7 @@ class VKIE(VKBaseIE): 'title': 'ИгроМир 2016 — день 1', 'uploader': 'Игромания', 'duration': 5239, + 'view_count': int, }, }, { @@ -376,14 +377,13 @@ class VKIE(VKBaseIE): if data.get('live') == 2: title = self._live_title(title) - # Extract upload date timestamp = unified_timestamp(self._html_search_regex( - r'class=["\']mv_info_date[^>]*>([^<]+)(?:<|from)', info_page, + r'class=["\']mv_info_date[^>]+>([^<]+)(?:<|from)', info_page, 'upload date', fatal=False)) - view_count = str_to_int(self._html_search_regex( - r'class="mv_views_count[^>]*>([\d,.]+)', - info_page, 'view count', default=None)) + view_count = str_to_int(self._search_regex( + r'class=["\']mv_views_count[^>]+>\s*([\d,.]+)', + info_page, 'view count', fatal=False)) formats = [] for format_id, format_url in data.items(): From af33dd8ee7da49b5daf1582b2870deaa5427444b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 30 Sep 2016 00:13:03 +0700 Subject: [PATCH 0256/1571] [aftonbladet] Remove extractor --- youtube_dl/extractor/aftonbladet.py | 64 ----------------------------- youtube_dl/extractor/extractors.py | 1 - 2 files changed, 65 deletions(-) delete mode 100644 youtube_dl/extractor/aftonbladet.py diff --git a/youtube_dl/extractor/aftonbladet.py b/youtube_dl/extractor/aftonbladet.py deleted file mode 100644 index 5766b4fe8..000000000 --- a/youtube_dl/extractor/aftonbladet.py +++ /dev/null @@ -1,64 +0,0 @@ -# encoding: utf-8 -from __future__ import unicode_literals - -from .common import InfoExtractor -from ..utils import int_or_none - - -class AftonbladetIE(InfoExtractor): - _VALID_URL = r'https?://tv\.aftonbladet\.se/abtv/articles/(?P[0-9]+)' - _TEST = { - 'url': 'http://tv.aftonbladet.se/abtv/articles/36015', - 'info_dict': { - 'id': '36015', - 'ext': 'mp4', - 'title': 'Vulkanutbrott i rymden - nu släpper NASA bilderna', - 'description': 'Jupiters måne mest aktiv av alla himlakroppar', - 'timestamp': 1394142732, - 'upload_date': '20140306', - }, - } - - def _real_extract(self, url): - video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - - # find internal video meta data - meta_url = 'http://aftonbladet-play-metadata.cdn.drvideo.aptoma.no/video/%s.json' - player_config = self._parse_json(self._html_search_regex( - r'data-player-config="([^"]+)"', webpage, 'player config'), video_id) - internal_meta_id = player_config['aptomaVideoId'] - internal_meta_url = meta_url % internal_meta_id - internal_meta_json = self._download_json( - internal_meta_url, video_id, 'Downloading video meta data') - - # find internal video formats - format_url = 'http://aftonbladet-play.videodata.drvideo.aptoma.no/actions/video/?id=%s' - internal_video_id = internal_meta_json['videoId'] - internal_formats_url = format_url % internal_video_id - internal_formats_json = self._download_json( - internal_formats_url, video_id, 'Downloading video formats') - - formats = [] - for fmt in internal_formats_json['formats']['http']['pseudostreaming']['mp4']: - p = fmt['paths'][0] - formats.append({ - 'url': 'http://%s:%d/%s/%s' % (p['address'], p['port'], p['path'], p['filename']), - 'ext': 'mp4', - 'width': int_or_none(fmt.get('width')), - 'height': int_or_none(fmt.get('height')), - 'tbr': int_or_none(fmt.get('bitrate')), - 'protocol': 'http', - }) - self._sort_formats(formats) - - return { - 'id': video_id, - 'title': internal_meta_json['title'], - 'formats': formats, - 'thumbnail': internal_meta_json.get('imageUrl'), - 'description': internal_meta_json.get('shortPreamble'), - 'timestamp': int_or_none(internal_meta_json.get('timePublished')), - 'duration': int_or_none(internal_meta_json.get('duration')), - 'view_count': int_or_none(internal_meta_json.get('views')), - } diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 23fd2a308..09b3b4942 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -31,7 +31,6 @@ from .aenetworks import ( HistoryTopicIE, ) from .afreecatv import AfreecaTVIE -from .aftonbladet import AftonbladetIE from .airmozilla import AirMozillaIE from .aljazeera import AlJazeeraIE from .alphaporno import AlphaPornoIE From b0582fc80615ec94c37e14015bd9bbfef6745aa4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 30 Sep 2016 00:15:09 +0700 Subject: [PATCH 0257/1571] [vgtv] Add support for tv.aftonbladet.se (Closes #10800) --- youtube_dl/extractor/vgtv.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/vgtv.py b/youtube_dl/extractor/vgtv.py index 185756301..3b38ac700 100644 --- a/youtube_dl/extractor/vgtv.py +++ b/youtube_dl/extractor/vgtv.py @@ -22,6 +22,7 @@ class VGTVIE(XstreamIE): 'fvn.no/fvntv': 'fvntv', 'aftenposten.no/webtv': 'aptv', 'ap.vgtv.no/webtv': 'aptv', + 'tv.aftonbladet.se/abtv': 'abtv', } _APP_NAME_TO_VENDOR = { @@ -30,6 +31,7 @@ class VGTVIE(XstreamIE): 'satv': 'sa', 'fvntv': 'fvn', 'aptv': 'ap', + 'abtv': 'ab', } _VALID_URL = r'''(?x) @@ -40,7 +42,8 @@ class VGTVIE(XstreamIE): /? (?: \#!/(?:video|live)/| - embed?.*id= + embed?.*id=| + articles/ )| (?P %s @@ -135,6 +138,14 @@ class VGTVIE(XstreamIE): 'url': 'http://www.vgtv.no/#!/video/127205/inside-the-mind-of-favela-funk', 'only_matching': True, }, + { + 'url': 'http://tv.aftonbladet.se/abtv/articles/36015', + 'only_matching': True, + }, + { + 'url': 'abtv:140026', + 'only_matching': True, + } ] def _real_extract(self, url): From de6babf92252ea5828a9c17d76766357cff3e440 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 30 Sep 2016 22:30:34 +0700 Subject: [PATCH 0258/1571] [tvland] Extend _VALID_URL (Closes #10812) --- youtube_dl/extractor/tvland.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/tvland.py b/youtube_dl/extractor/tvland.py index cb76a2a58..957cf1ea2 100644 --- a/youtube_dl/extractor/tvland.py +++ b/youtube_dl/extractor/tvland.py @@ -6,7 +6,7 @@ from .mtv import MTVServicesInfoExtractor class TVLandIE(MTVServicesInfoExtractor): IE_NAME = 'tvland.com' - _VALID_URL = r'https?://(?:www\.)?tvland\.com/(?:video-clips|episodes)/(?P[^/?#.]+)' + _VALID_URL = r'https?://(?:www\.)?tvland\.com/(?:video-clips|(?:full-)?episodes)/(?P[^/?#.]+)' _FEED_URL = 'http://www.tvland.com/feeds/mrss/' _TESTS = [{ # Geo-restricted. Without a proxy metadata are still there. With a @@ -28,4 +28,7 @@ class TVLandIE(MTVServicesInfoExtractor): 'upload_date': '20151228', 'timestamp': 1451289600, }, + }, { + 'url': 'http://www.tvland.com/full-episodes/iu0hz6/younger-a-kiss-is-just-a-kiss-season-3-ep-301', + 'only_matching': True, }] From 16097822582b839a3744b54af90f7b3fd7132d26 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?D=C3=A9stin=20Reed?= Date: Wed, 28 Sep 2016 17:28:16 +0200 Subject: [PATCH 0259/1571] [Instagram] Extract video dimensions --- youtube_dl/extractor/instagram.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/instagram.py b/youtube_dl/extractor/instagram.py index 5ebc30a10..dde435189 100644 --- a/youtube_dl/extractor/instagram.py +++ b/youtube_dl/extractor/instagram.py @@ -84,7 +84,7 @@ class InstagramIE(InfoExtractor): webpage = self._download_webpage(url, video_id) (video_url, description, thumbnail, timestamp, uploader, - uploader_id, like_count, comment_count) = [None] * 8 + uploader_id, like_count, comment_count, height, width) = [None] * 10 shared_data = self._parse_json( self._search_regex( @@ -96,6 +96,8 @@ class InstagramIE(InfoExtractor): shared_data, lambda x: x['entry_data']['PostPage'][0]['media'], dict) if media: video_url = media.get('video_url') + height = int_or_none(media.get('dimensions', {}).get('height')) + width = int_or_none(media.get('dimensions', {}).get('width')) description = media.get('caption') thumbnail = media.get('display_src') timestamp = int_or_none(media.get('date')) @@ -115,6 +117,12 @@ class InstagramIE(InfoExtractor): if not video_url: video_url = self._og_search_video_url(webpage, secure=False) + formats = [{ + 'url': video_url, + 'width': width, + 'height': height, + }] + if not uploader_id: uploader_id = self._search_regex( r'"owner"\s*:\s*{\s*"username"\s*:\s*"(.+?)"', @@ -131,7 +139,7 @@ class InstagramIE(InfoExtractor): return { 'id': video_id, - 'url': video_url, + 'formats': formats, 'ext': 'mp4', 'title': 'Video by %s' % uploader_id, 'description': description, From a1001f47fc19adf983859bb281f08a09bd7f7e9b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 1 Oct 2016 00:16:08 +0700 Subject: [PATCH 0260/1571] [instagram] PEP 8 --- youtube_dl/extractor/instagram.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/instagram.py b/youtube_dl/extractor/instagram.py index dde435189..196407b06 100644 --- a/youtube_dl/extractor/instagram.py +++ b/youtube_dl/extractor/instagram.py @@ -111,8 +111,8 @@ class InstagramIE(InfoExtractor): 'id': comment.get('id'), 'text': comment.get('text'), 'timestamp': int_or_none(comment.get('created_at')), - } for comment in media.get('comments', {}).get('nodes', []) - if comment.get('text')] + } for comment in media.get( + 'comments', {}).get('nodes', []) if comment.get('text')] if not video_url: video_url = self._og_search_video_url(webpage, secure=False) From eaf9b22f94f37487d75457423a9a293dee1b1d32 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?D=C3=A9stin=20Reed?= Date: Fri, 30 Sep 2016 20:03:25 +0200 Subject: [PATCH 0261/1571] [clubic] Rely on _match_id and _parse_json --- youtube_dl/extractor/clubic.py | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/youtube_dl/extractor/clubic.py b/youtube_dl/extractor/clubic.py index 2fba93543..f7ee3a8f8 100644 --- a/youtube_dl/extractor/clubic.py +++ b/youtube_dl/extractor/clubic.py @@ -1,9 +1,6 @@ # coding: utf-8 from __future__ import unicode_literals -import json -import re - from .common import InfoExtractor from ..utils import ( clean_html, @@ -30,16 +27,14 @@ class ClubicIE(InfoExtractor): }] def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') + video_id = self._match_id(url) player_url = 'http://player.m6web.fr/v1/player/clubic/%s.html' % video_id player_page = self._download_webpage(player_url, video_id) - config_json = self._search_regex( + config = self._parse_json(self._search_regex( r'(?m)M6\.Player\.config\s*=\s*(\{.+?\});$', player_page, - 'configuration') - config = json.loads(config_json) + 'configuration'), video_id) video_info = config['videoInfo'] sources = config['sources'] From d7753d194803086d97ffe47f022c47c906ebcc71 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Fri, 30 Sep 2016 00:49:14 +0800 Subject: [PATCH 0262/1571] [downloader/http] Use write_xattr function for --xattr-set-filesize --- youtube_dl/__init__.py | 6 ------ youtube_dl/downloader/http.py | 8 +++++--- 2 files changed, 5 insertions(+), 9 deletions(-) diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 1cf3140a0..72141b983 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -283,12 +283,6 @@ def _real_main(argv=None): 'key': 'ExecAfterDownload', 'exec_cmd': opts.exec_cmd, }) - if opts.xattr_set_filesize: - try: - import xattr - xattr # Confuse flake8 - except ImportError: - parser.error('setting filesize xattr requested but python-xattr is not available') external_downloader_args = None if opts.external_downloader_args: external_downloader_args = compat_shlex_split(opts.external_downloader_args) diff --git a/youtube_dl/downloader/http.py b/youtube_dl/downloader/http.py index f8b69d186..11294d106 100644 --- a/youtube_dl/downloader/http.py +++ b/youtube_dl/downloader/http.py @@ -13,6 +13,9 @@ from ..utils import ( encodeFilename, sanitize_open, sanitized_Request, + write_xattr, + XAttrMetadataError, + XAttrUnavailableError, ) @@ -179,9 +182,8 @@ class HttpFD(FileDownloader): if self.params.get('xattr_set_filesize', False) and data_len is not None: try: - import xattr - xattr.setxattr(tmpfilename, 'user.ytdl.filesize', str(data_len)) - except(OSError, IOError, ImportError) as err: + write_xattr(tmpfilename, 'user.ytdl.filesize', str(data_len)) + except (XAttrUnavailableError, XAttrMetadataError) as err: self.report_error('unable to set filesize xattr: %s' % str(err)) try: From e295618f9e1c1fc404d9baa4ccef961d3eb3ea88 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sat, 1 Oct 2016 15:22:48 +0800 Subject: [PATCH 0263/1571] [dctp] Fix extraction (closes #10734) --- ChangeLog | 1 + youtube_dl/extractor/dctp.py | 67 ++++++++++++++++-------------------- 2 files changed, 31 insertions(+), 37 deletions(-) diff --git a/ChangeLog b/ChangeLog index 70da55c90..efc3e494e 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,6 +1,7 @@ version Extractors +* [dctp] Fix extraction (#10734) + [leeco] Recognize more Le Sports URLs (#10794) diff --git a/youtube_dl/extractor/dctp.py b/youtube_dl/extractor/dctp.py index a47e04993..14ba88715 100644 --- a/youtube_dl/extractor/dctp.py +++ b/youtube_dl/extractor/dctp.py @@ -1,61 +1,54 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals from .common import InfoExtractor -from ..compat import compat_str +from ..utils import unified_strdate class DctpTvIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?dctp\.tv/(#/)?filme/(?P.+?)/$' _TEST = { 'url': 'http://www.dctp.tv/filme/videoinstallation-fuer-eine-kaufhausfassade/', + 'md5': '174dd4a8a6225cf5655952f969cfbe24', 'info_dict': { - 'id': '1324', + 'id': '95eaa4f33dad413aa17b4ee613cccc6c', 'display_id': 'videoinstallation-fuer-eine-kaufhausfassade', - 'ext': 'flv', - 'title': 'Videoinstallation für eine Kaufhausfassade' + 'ext': 'mp4', + 'title': 'Videoinstallation für eine Kaufhausfassade', + 'description': 'Kurzfilm', + 'upload_date': '20110407', + 'thumbnail': 're:^https?://.*\.jpg$', }, - 'params': { - # rtmp download - 'skip_download': True, - } } def _real_extract(self, url): video_id = self._match_id(url) - base_url = 'http://dctp-ivms2-restapi.s3.amazonaws.com/' - version_json = self._download_json( - base_url + 'version.json', - video_id, note='Determining file version') - version = version_json['version_name'] - info_json = self._download_json( - '{0}{1}/restapi/slugs/{2}.json'.format(base_url, version, video_id), - video_id, note='Fetching object ID') - object_id = compat_str(info_json['object_id']) - meta_json = self._download_json( - '{0}{1}/restapi/media/{2}.json'.format(base_url, version, object_id), - video_id, note='Downloading metadata') - uuid = meta_json['uuid'] - title = meta_json['title'] - wide = meta_json['is_wide'] - if wide: - ratio = '16x9' - else: - ratio = '4x3' - play_path = 'mp4:{0}_dctp_0500_{1}.m4v'.format(uuid, ratio) + webpage = self._download_webpage(url, video_id) + + object_id = self._html_search_meta('DC.identifier', webpage) servers_json = self._download_json( - 'http://www.dctp.tv/streaming_servers/', + 'http://www.dctp.tv/elastic_streaming_client/get_streaming_server/', video_id, note='Downloading server list') - url = servers_json[0]['endpoint'] + server = servers_json[0]['server'] + m3u8_path = self._search_regex( + r'\'([^\'"]+/playlist\.m3u8)"', webpage, 'm3u8 path') + formats = self._extract_m3u8_formats( + 'http://%s%s' % (server, m3u8_path), video_id, ext='mp4', + entry_protocol='m3u8_native') + + title = self._og_search_title(webpage) + description = self._html_search_meta('DC.description', webpage) + upload_date = unified_strdate( + self._html_search_meta('DC.date.created', webpage)) + thumbnail = self._og_search_thumbnail(webpage) return { 'id': object_id, 'title': title, - 'format': 'rtmp', - 'url': url, - 'play_path': play_path, - 'rtmp_real_time': True, - 'ext': 'flv', - 'display_id': video_id + 'formats': formats, + 'display_id': video_id, + 'description': description, + 'upload_date': upload_date, + 'thumbnail': thumbnail, } From 9bd7bd0b8054231adbeb2a0eddd42a0b969fd6c4 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sat, 1 Oct 2016 16:37:49 +0800 Subject: [PATCH 0264/1571] [twitch] Skip a 404 test --- youtube_dl/extractor/twitch.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/twitch.py b/youtube_dl/extractor/twitch.py index bc352391e..46c2cfe7b 100644 --- a/youtube_dl/extractor/twitch.py +++ b/youtube_dl/extractor/twitch.py @@ -247,6 +247,7 @@ class TwitchVodIE(TwitchItemBaseIE): # m3u8 download 'skip_download': True, }, + 'skip': 'HTTP Error 404: Not Found', }] def _real_extract(self, url): From 9c51a2464276f5eb26d1b571d32052df55d6ead9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?D=C3=A9stin=20Reed?= Date: Fri, 30 Sep 2016 20:06:08 +0200 Subject: [PATCH 0265/1571] [criterion] Rely on _match_id, improve regex and add thumbnail to test --- youtube_dl/extractor/criterion.py | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/criterion.py b/youtube_dl/extractor/criterion.py index ad32673a8..cf6a5d6cb 100644 --- a/youtube_dl/extractor/criterion.py +++ b/youtube_dl/extractor/criterion.py @@ -1,8 +1,6 @@ -# -*- coding: utf-8 -*- +# coding: utf-8 from __future__ import unicode_literals -import re - from .common import InfoExtractor @@ -16,20 +14,20 @@ class CriterionIE(InfoExtractor): 'ext': 'mp4', 'title': 'Le Samouraï', 'description': 'md5:a2b4b116326558149bef81f76dcbb93f', + 'thumbnail': 're:^https?://.*\.jpg$', } } def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') + video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) final_url = self._search_regex( - r'so.addVariable\("videoURL", "(.+?)"\)\;', webpage, 'video url') + r'so\.addVariable\("videoURL", "(.+?)"\)\;', webpage, 'video url') title = self._og_search_title(webpage) description = self._html_search_meta('description', webpage) thumbnail = self._search_regex( - r'so.addVariable\("thumbnailURL", "(.+?)"\)\;', + r'so\.addVariable\("thumbnailURL", "(.+?)"\)\;', webpage, 'thumbnail url') return { From d54739a2e6a8dc089e7530afda0a1cfe355a6fef Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sat, 1 Oct 2016 19:58:13 +0800 Subject: [PATCH 0266/1571] [downloader/http] xattr values should be bytes --- youtube_dl/downloader/http.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/downloader/http.py b/youtube_dl/downloader/http.py index 11294d106..af405b950 100644 --- a/youtube_dl/downloader/http.py +++ b/youtube_dl/downloader/http.py @@ -182,7 +182,7 @@ class HttpFD(FileDownloader): if self.params.get('xattr_set_filesize', False) and data_len is not None: try: - write_xattr(tmpfilename, 'user.ytdl.filesize', str(data_len)) + write_xattr(tmpfilename, 'user.ytdl.filesize', str(data_len).encode('utf-8')) except (XAttrUnavailableError, XAttrMetadataError) as err: self.report_error('unable to set filesize xattr: %s' % str(err)) From 53a7e3d2879feac7b1b6f714692581057b9b5f6b Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sat, 1 Oct 2016 20:13:04 +0800 Subject: [PATCH 0267/1571] [utils] Support xattr as well as pyxattr Closes #9054 There are two xattr packages in Python, pyxattr [1] and xattr [2]. They have different APIs. In old days pyxattr supports Linux only and xattr supports Linux, Mac, FreeBSD and Solaris, and pyxattr supports Linux only. Recently pyxattr adds support for Mac OS X. [3] An old version of [2] is shipped with Mac OS X. However, some Linux distributions have pyxattr only, for example PLD-Linux [4] and old Arch Linux. [5] As a result, supporting both is the way to go. [1] https://github.com/iustin/pyxattr [2] https://github.com/xattr/xattr [3] https://github.com/iustin/pyxattr/pull/9 [4] https://github.com/rg3/youtube-dl/issues/5498 [5] https://git.archlinux.org/svntogit/community.git/commit/?id=427c4c76401e386d865ccddea4fbfdc74df80492 https://git.archlinux.org/svntogit/community.git/commit/?id=59b40da7b69622a6761d364a8b07909e9cccaa56 python-xattr is added on 2016/06/29 while pyxattr is there for more than 6 years --- ChangeLog | 4 ++++ youtube_dl/utils.py | 29 +++++++++++++++++------------ 2 files changed, 21 insertions(+), 12 deletions(-) diff --git a/ChangeLog b/ChangeLog index efc3e494e..8ef39cd63 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,9 @@ version +Core ++ Support pyxattr as well as python-xattr for --xattrs and + --xattr-set-filesize (#9054) + Extractors * [dctp] Fix extraction (#10734) + [leeco] Recognize more Le Sports URLs (#10794) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index d2dfa8013..c259f8bff 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -3161,20 +3161,25 @@ def write_xattr(path, key, value): # try the pyxattr module... import xattr - # Unicode arguments are not supported in python-pyxattr until - # version 0.5.0 - # See https://github.com/rg3/youtube-dl/issues/5498 - pyxattr_required_version = '0.5.0' - if version_tuple(xattr.__version__) < version_tuple(pyxattr_required_version): - # TODO: fallback to CLI tools - raise XAttrUnavailableError( - 'python-pyxattr is detected but is too old. ' - 'youtube-dl requires %s or above while your version is %s. ' - 'Falling back to other xattr implementations' % ( - pyxattr_required_version, xattr.__version__)) + if hasattr(xattr, 'set'): # pyxattr + # Unicode arguments are not supported in python-pyxattr until + # version 0.5.0 + # See https://github.com/rg3/youtube-dl/issues/5498 + pyxattr_required_version = '0.5.0' + if version_tuple(xattr.__version__) < version_tuple(pyxattr_required_version): + # TODO: fallback to CLI tools + raise XAttrUnavailableError( + 'python-pyxattr is detected but is too old. ' + 'youtube-dl requires %s or above while your version is %s. ' + 'Falling back to other xattr implementations' % ( + pyxattr_required_version, xattr.__version__)) + + setxattr = xattr.set + else: # xattr + setxattr = xattr.setxattr try: - xattr.set(path, key, value) + setxattr(path, key, value) except EnvironmentError as e: raise XAttrMetadataError(e.errno, e.strerror) From e1e97c2446ab6fcffcfae738e0c7f29ff58a9dec Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 1 Oct 2016 22:50:47 +0700 Subject: [PATCH 0268/1571] [periscope:user] Fix extraction (Closes #10820) --- youtube_dl/extractor/periscope.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/periscope.py b/youtube_dl/extractor/periscope.py index 61043cad5..0e3623024 100644 --- a/youtube_dl/extractor/periscope.py +++ b/youtube_dl/extractor/periscope.py @@ -132,7 +132,7 @@ class PeriscopeUserIE(PeriscopeBaseIE): user = list(data_store['UserCache']['users'].values())[0]['user'] user_id = user['id'] - session_id = data_store['SessionToken']['broadcastHistory']['token']['session_id'] + session_id = data_store['SessionToken']['public']['broadcastHistory']['token']['session_id'] broadcasts = self._call_api( 'getUserBroadcastsPublic', From 4da4516973b56bcaa65794a8ae0856cf54740c54 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?D=C3=A9stin=20Reed?= Date: Fri, 30 Sep 2016 19:59:08 +0200 Subject: [PATCH 0269/1571] [byutv] Rely on _match_id and _parse_json --- youtube_dl/extractor/byutv.py | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/byutv.py b/youtube_dl/extractor/byutv.py index 3aec601f8..b2d25eec0 100644 --- a/youtube_dl/extractor/byutv.py +++ b/youtube_dl/extractor/byutv.py @@ -1,6 +1,5 @@ from __future__ import unicode_literals -import json import re from .common import InfoExtractor @@ -8,7 +7,7 @@ from ..utils import ExtractorError class BYUtvIE(InfoExtractor): - _VALID_URL = r'^https?://(?:www\.)?byutv.org/watch/[0-9a-f-]+/(?P[^/?#]+)' + _VALID_URL = r'^https?://(?:www\.)?byutv.org/watch/[0-9a-f-]+/(?P[^/?#]+)' _TEST = { 'url': 'http://www.byutv.org/watch/6587b9a3-89d2-42a6-a7f7-fd2f81840a7d/studio-c-season-5-episode-5', 'md5': '05850eb8c749e2ee05ad5a1c34668493', @@ -27,15 +26,15 @@ class BYUtvIE(InfoExtractor): } def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('video_id') + video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) episode_code = self._search_regex( r'(?s)episode:(.*?\}),\s*\n', webpage, 'episode information') - episode_json = re.sub( - r'(\n\s+)([a-zA-Z]+):\s+\'(.*?)\'', r'\1"\2": "\3"', episode_code) - ep = json.loads(episode_json) + + ep = self._parse_json( + episode_code, video_id, transform_source=lambda s: + re.sub(r'(\n\s+)([a-zA-Z]+):\s+\'(.*?)\'', r'\1"\2": "\3"', s)) if ep['providerType'] == 'Ooyala': return { From 6d2549fb4f2a646b6b6898db5281cde669277626 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 2 Oct 2016 00:44:54 +0700 Subject: [PATCH 0270/1571] [byutv] Fix id and display id --- youtube_dl/extractor/byutv.py | 46 ++++++++++++++++++++--------------- 1 file changed, 26 insertions(+), 20 deletions(-) diff --git a/youtube_dl/extractor/byutv.py b/youtube_dl/extractor/byutv.py index b2d25eec0..084cc7ae2 100644 --- a/youtube_dl/extractor/byutv.py +++ b/youtube_dl/extractor/byutv.py @@ -7,15 +7,15 @@ from ..utils import ExtractorError class BYUtvIE(InfoExtractor): - _VALID_URL = r'^https?://(?:www\.)?byutv.org/watch/[0-9a-f-]+/(?P[^/?#]+)' - _TEST = { + _VALID_URL = r'https?://(?:www\.)?byutv.org/watch/(?P[0-9a-f-]+)(?:/(?P[^/?#&]+))?' + _TESTS = [{ 'url': 'http://www.byutv.org/watch/6587b9a3-89d2-42a6-a7f7-fd2f81840a7d/studio-c-season-5-episode-5', - 'md5': '05850eb8c749e2ee05ad5a1c34668493', 'info_dict': { - 'id': 'studio-c-season-5-episode-5', + 'id': '6587b9a3-89d2-42a6-a7f7-fd2f81840a7d', + 'display_id': 'studio-c-season-5-episode-5', 'ext': 'mp4', - 'description': 'md5:e07269172baff037f8e8bf9956bc9747', 'title': 'Season 5 Episode 5', + 'description': 'md5:e07269172baff037f8e8bf9956bc9747', 'thumbnail': 're:^https?://.*\.jpg$', 'duration': 1486.486, }, @@ -23,28 +23,34 @@ class BYUtvIE(InfoExtractor): 'skip_download': True, }, 'add_ie': ['Ooyala'], - } + }, { + 'url': 'http://www.byutv.org/watch/6587b9a3-89d2-42a6-a7f7-fd2f81840a7d', + 'only_matching': True, + }] def _real_extract(self, url): - video_id = self._match_id(url) + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + display_id = mobj.group('display_id') or video_id - webpage = self._download_webpage(url, video_id) + webpage = self._download_webpage(url, display_id) episode_code = self._search_regex( r'(?s)episode:(.*?\}),\s*\n', webpage, 'episode information') ep = self._parse_json( - episode_code, video_id, transform_source=lambda s: + episode_code, display_id, transform_source=lambda s: re.sub(r'(\n\s+)([a-zA-Z]+):\s+\'(.*?)\'', r'\1"\2": "\3"', s)) - if ep['providerType'] == 'Ooyala': - return { - '_type': 'url_transparent', - 'ie_key': 'Ooyala', - 'url': 'ooyala:%s' % ep['providerId'], - 'id': video_id, - 'title': ep['title'], - 'description': ep.get('description'), - 'thumbnail': ep.get('imageThumbnail'), - } - else: + if ep['providerType'] != 'Ooyala': raise ExtractorError('Unsupported provider %s' % ep['provider']) + + return { + '_type': 'url_transparent', + 'ie_key': 'Ooyala', + 'url': 'ooyala:%s' % ep['providerId'], + 'id': video_id, + 'display_id': display_id, + 'title': ep['title'], + 'description': ep.get('description'), + 'thumbnail': ep.get('imageThumbnail'), + } From f6ba581f89fc764e4eaf3045ff5b63e27ad66cbf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 2 Oct 2016 00:50:07 +0700 Subject: [PATCH 0271/1571] [byutv:event] Add extractor --- youtube_dl/extractor/byutv.py | 39 +++++++++++++++++++++++++++++- youtube_dl/extractor/extractors.py | 5 +++- 2 files changed, 42 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/byutv.py b/youtube_dl/extractor/byutv.py index 084cc7ae2..4be175d70 100644 --- a/youtube_dl/extractor/byutv.py +++ b/youtube_dl/extractor/byutv.py @@ -7,7 +7,7 @@ from ..utils import ExtractorError class BYUtvIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?byutv.org/watch/(?P[0-9a-f-]+)(?:/(?P[^/?#&]+))?' + _VALID_URL = r'https?://(?:www\.)?byutv\.org/watch/(?!event/)(?P[0-9a-f-]+)(?:/(?P[^/?#&]+))?' _TESTS = [{ 'url': 'http://www.byutv.org/watch/6587b9a3-89d2-42a6-a7f7-fd2f81840a7d/studio-c-season-5-episode-5', 'info_dict': { @@ -54,3 +54,40 @@ class BYUtvIE(InfoExtractor): 'description': ep.get('description'), 'thumbnail': ep.get('imageThumbnail'), } + + +class BYUtvEventIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?byutv\.org/watch/event/(?P[0-9a-f-]+)' + _TEST = { + 'url': 'http://www.byutv.org/watch/event/29941b9b-8bf6-48d2-aebf-7a87add9e34b', + 'info_dict': { + 'id': '29941b9b-8bf6-48d2-aebf-7a87add9e34b', + 'ext': 'mp4', + 'title': 'Toledo vs. BYU (9/30/16)', + }, + 'params': { + 'skip_download': True, + }, + 'add_ie': ['Ooyala'], + } + + def _real_extract(self, url): + video_id = self._match_id(url) + + webpage = self._download_webpage(url, video_id) + + ooyala_id = self._search_regex( + r'providerId\s*:\s*(["\'])(?P(?:(?!\1).)+)\1', + webpage, 'ooyala id', group='id') + + title = self._search_regex( + r'class=["\']description["\'][^>]*>\s*

([^<]+)

', webpage, + 'title').strip() + + return { + '_type': 'url_transparent', + 'ie_key': 'Ooyala', + 'url': 'ooyala:%s' % ooyala_id, + 'id': video_id, + 'title': title, + } diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 09b3b4942..e8928307c 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -116,7 +116,10 @@ from .brightcove import ( BrightcoveNewIE, ) from .buzzfeed import BuzzFeedIE -from .byutv import BYUtvIE +from .byutv import ( + BYUtvIE, + BYUtvEventIE, +) from .c56 import C56IE from .camdemy import ( CamdemyIE, From b19e275d99c8dfe121ba0dc3478e4eb9c83e4f9b Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sun, 2 Oct 2016 02:12:14 +0800 Subject: [PATCH 0272/1571] [__init__] Fix lost xattr if --embed-thumbnail used Reported at https://github.com/rg3/youtube-dl/issues/9054#issuecomment-250451823 --- ChangeLog | 1 + youtube_dl/__init__.py | 6 ++++-- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/ChangeLog b/ChangeLog index 8ef39cd63..acceb9d02 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,6 +1,7 @@ version Core +* Fix possibly lost extended attributes + Support pyxattr as well as python-xattr for --xattrs and --xattr-set-filesize (#9054) diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 72141b983..f84b866df 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -266,8 +266,6 @@ def _real_main(argv=None): postprocessors.append({ 'key': 'FFmpegEmbedSubtitle', }) - if opts.xattrs: - postprocessors.append({'key': 'XAttrMetadata'}) if opts.embedthumbnail: already_have_thumbnail = opts.writethumbnail or opts.write_all_thumbnails postprocessors.append({ @@ -276,6 +274,10 @@ def _real_main(argv=None): }) if not already_have_thumbnail: opts.writethumbnail = True + # XAttrMetadataPP should be run after post-processors that may change file + # contents + if opts.xattrs: + postprocessors.append({'key': 'XAttrMetadata'}) # Please keep ExecAfterDownload towards the bottom as it allows the user to modify the final file in any way. # So if the user is able to remove the file before your postprocessor runs it might cause a few problems. if opts.exec_cmd: From bd2644120526429783c55e885f7042633826d7da Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sun, 2 Oct 2016 03:03:41 +0800 Subject: [PATCH 0273/1571] [utils] Fix xattr error handling --- youtube_dl/extractor/generic.py | 16 ++++++++++++---- youtube_dl/utils.py | 1 + 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index c1792c534..489b3c7c1 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -2332,12 +2332,23 @@ class GenericIE(InfoExtractor): info_dict.update(json_ld) return info_dict + # Look for HTML5 media + entries = self._parse_html5_media_entries(url, webpage, video_id, m3u8_id='hls') + if entries: + for entry in entries: + entry.update({ + 'id': video_id, + 'title': video_title, + }) + self._sort_formats(entry['formats']) + return self.playlist_result(entries) + def check_video(vurl): if YoutubeIE.suitable(vurl): return True vpath = compat_urlparse.urlparse(vurl).path vext = determine_ext(vpath) - return '.' in vpath and vext not in ('swf', 'png', 'jpg', 'srt', 'sbv', 'sub', 'vtt', 'ttml') + return '.' in vpath and vext not in ('swf', 'png', 'jpg', 'srt', 'sbv', 'sub', 'vtt', 'ttml', 'js') def filter_video(urls): return list(filter(check_video, urls)) @@ -2387,9 +2398,6 @@ class GenericIE(InfoExtractor): # We only look in og:video if the MIME type is a video, don't try if it's a Flash player: if m_video_type is not None: found = filter_video(re.findall(r'.*?]*)?\s+src=["\'](.*?)["\']', webpage) if not found: REDIRECT_REGEX = r'[0-9]{,2};\s*(?:URL|url)=\'?([^\'"]+)' found = re.search( diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index c259f8bff..044520037 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -782,6 +782,7 @@ class XAttrMetadataError(Exception): def __init__(self, code=None, msg='Unknown error'): super(XAttrMetadataError, self).__init__(msg) self.code = code + self.msg = msg # Parsing code and msg if (self.code in (errno.ENOSPC, errno.EDQUOT) or From fd152641726a3f5e47a6a5065f8e9b6fe2623c11 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sun, 2 Oct 2016 05:24:31 +0800 Subject: [PATCH 0274/1571] [jwplatform] Support old-style jwplayer playlists --- ChangeLog | 1 + youtube_dl/extractor/jwplatform.py | 6 ++++++ 2 files changed, 7 insertions(+) diff --git a/ChangeLog b/ChangeLog index acceb9d02..d48a09122 100644 --- a/ChangeLog +++ b/ChangeLog @@ -6,6 +6,7 @@ Core --xattr-set-filesize (#9054) Extractors +* [jwplatform] Improve JWPlayer handling * [dctp] Fix extraction (#10734) + [leeco] Recognize more Le Sports URLs (#10794) diff --git a/youtube_dl/extractor/jwplatform.py b/youtube_dl/extractor/jwplatform.py index 38199fcd0..e10f7e9f9 100644 --- a/youtube_dl/extractor/jwplatform.py +++ b/youtube_dl/extractor/jwplatform.py @@ -39,6 +39,12 @@ class JWPlatformBaseIE(InfoExtractor): jwplayer_data = {'playlist': [jwplayer_data]} entries = [] + + # JWPlayer backward compatibility: single playlist item + # https://github.com/jwplayer/jwplayer/blob/v7.7.0/src/js/playlist/playlist.js#L10 + if not isinstance(jwplayer_data['playlist'], list): + jwplayer_data['playlist'] = [jwplayer_data['playlist']] + for video_data in jwplayer_data['playlist']: # JWPlayer backward compatibility: flattened sources # https://github.com/jwplayer/jwplayer/blob/v7.4.3/src/js/playlist/item.js#L29-L35 From 99ed78c79e94c14ce24bc5bdccaf9573d4f83552 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sun, 2 Oct 2016 14:07:49 +0800 Subject: [PATCH 0275/1571] [jwplatform] Support DASH streams --- ChangeLog | 1 + youtube_dl/extractor/jwplatform.py | 6 +++++- youtube_dl/extractor/rudo.py | 2 +- 3 files changed, 7 insertions(+), 2 deletions(-) diff --git a/ChangeLog b/ChangeLog index d48a09122..f5172a864 100644 --- a/ChangeLog +++ b/ChangeLog @@ -6,6 +6,7 @@ Core --xattr-set-filesize (#9054) Extractors ++ [jwplatform] Support DASH streams in JWPlayer * [jwplatform] Improve JWPlayer handling * [dctp] Fix extraction (#10734) + [leeco] Recognize more Le Sports URLs (#10794) diff --git a/youtube_dl/extractor/jwplatform.py b/youtube_dl/extractor/jwplatform.py index e10f7e9f9..5d56e0a28 100644 --- a/youtube_dl/extractor/jwplatform.py +++ b/youtube_dl/extractor/jwplatform.py @@ -32,7 +32,8 @@ class JWPlatformBaseIE(InfoExtractor): return self._parse_jwplayer_data( jwplayer_data, video_id, *args, **kwargs) - def _parse_jwplayer_data(self, jwplayer_data, video_id=None, require_title=True, m3u8_id=None, rtmp_params=None, base_url=None): + def _parse_jwplayer_data(self, jwplayer_data, video_id=None, require_title=True, + m3u8_id=None, mpd_id=None, rtmp_params=None, base_url=None): # JWPlayer backward compatibility: flattened playlists # https://github.com/jwplayer/jwplayer/blob/v7.4.3/src/js/api/config.js#L81-L96 if 'playlist' not in jwplayer_data: @@ -63,6 +64,9 @@ class JWPlatformBaseIE(InfoExtractor): if source_type == 'hls' or ext == 'm3u8': formats.extend(self._extract_m3u8_formats( source_url, this_video_id, 'mp4', 'm3u8_native', m3u8_id=m3u8_id, fatal=False)) + elif ext == 'mpd': + formats.extend(self._extract_mpd_formats( + source_url, this_video_id, mpd_id=mpd_id, fatal=False)) # https://github.com/jwplayer/jwplayer/blob/master/src/js/providers/default.js#L67 elif source_type.startswith('audio') or ext in ('oga', 'aac', 'mp3', 'mpeg', 'vorbis'): formats.append({ diff --git a/youtube_dl/extractor/rudo.py b/youtube_dl/extractor/rudo.py index 38366b784..9a330c196 100644 --- a/youtube_dl/extractor/rudo.py +++ b/youtube_dl/extractor/rudo.py @@ -43,7 +43,7 @@ class RudoIE(JWPlatformBaseIE): transform_source=lambda s: js_to_json(re.sub(r'encodeURI\([^)]+\)', '""', s))) info_dict = self._parse_jwplayer_data( - jwplayer_data, video_id, require_title=False, m3u8_id='hls') + jwplayer_data, video_id, require_title=False, m3u8_id='hls', mpd_id='dash') info_dict.update({ 'title': self._og_search_title(webpage), From 703b3afa93326c96bc5faf753305ab95c4e98b10 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sun, 2 Oct 2016 14:25:06 +0800 Subject: [PATCH 0276/1571] [amcnetworks] Skip a restricted _TEST --- youtube_dl/extractor/amcnetworks.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/amcnetworks.py b/youtube_dl/extractor/amcnetworks.py index c739d2c99..d2b03b177 100644 --- a/youtube_dl/extractor/amcnetworks.py +++ b/youtube_dl/extractor/amcnetworks.py @@ -28,6 +28,7 @@ class AMCNetworksIE(ThePlatformIE): # m3u8 download 'skip_download': True, }, + 'skip': 'Requires TV provider accounts', }, { 'url': 'http://www.bbcamerica.com/shows/the-hunt/full-episodes/season-1/episode-01-the-hardest-challenge', 'only_matching': True, From 26406d33c7808bdff38ffcda36d2d6a4e5bb4f4d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 2 Oct 2016 15:56:33 +0700 Subject: [PATCH 0277/1571] [ChangeLog] Actualize --- ChangeLog | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/ChangeLog b/ChangeLog index f5172a864..277cc2ee1 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,15 +1,26 @@ version Core -* Fix possibly lost extended attributes +* Fix possibly lost extended attributes during post-processing + Support pyxattr as well as python-xattr for --xattrs and --xattr-set-filesize (#9054) Extractors + [jwplatform] Support DASH streams in JWPlayer -* [jwplatform] Improve JWPlayer handling ++ [jwplatform] Support old-style JWPlayer playlists ++ [byutv:event] Add extractor +* [periscope:user] Fix extraction (#10820) * [dctp] Fix extraction (#10734) ++ [instagram] Extract video dimensions (#10790) ++ [tvland] Extend URL regular expression (#10812) ++ [vgtv] Add support for tv.aftonbladet.se (#10800) +- [aftonbladet] Remove extractor +* [vk] Fix timestamp and view count extraction (#10760) ++ [vk] Add support for running and finished live streams (#10799) + [leeco] Recognize more Le Sports URLs (#10794) ++ [instagram] Extract comments (#10788) ++ [ketnet] Extract mzsource formats (#10770) +* [limelight:media] Improve HTTP formats extraction version 2016.09.27 From 6c152ce20f7bd5f1fbb786abe70c4aa3412aef26 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 2 Oct 2016 15:58:00 +0700 Subject: [PATCH 0278/1571] release 2016.10.02 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- docs/supportedsites.md | 2 +- youtube_dl/version.py | 2 +- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 273eb8c0b..e813e4c59 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.09.27*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.09.27** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.10.02*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.10.02** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2016.09.27 +[debug] youtube-dl version 2016.10.02 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index 277cc2ee1..4f64edabb 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version +version 2016.10.02 Core * Fix possibly lost extended attributes during post-processing diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 26f275577..828ed0ba9 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -34,7 +34,6 @@ - **AdultSwim** - **aenetworks**: A+E Networks: A&E, Lifetime, History.com, FYI Network - **AfreecaTV**: afreecatv.com - - **Aftonbladet** - **AirMozilla** - **AlJazeera** - **Allocine** @@ -112,6 +111,7 @@ - **bt:vestlendingen**: Bergens Tidende - Vestlendingen - **BuzzFeed** - **BYUtv** + - **BYUtvEvent** - **Camdemy** - **CamdemyFolder** - **CamWithHer** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index af0c2cfc4..161ba4391 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2016.09.27' +__version__ = '2016.10.02' From 567a5996cac5f3ba2d06748cbbfb295eab48074c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?D=C3=A9stin=20Reed?= Date: Sat, 1 Oct 2016 15:34:46 +0200 Subject: [PATCH 0279/1571] [pornoxo] Use JWPlatform to improve metadata extraction --- youtube_dl/extractor/pornoxo.py | 31 ++++++++++++------------------- 1 file changed, 12 insertions(+), 19 deletions(-) diff --git a/youtube_dl/extractor/pornoxo.py b/youtube_dl/extractor/pornoxo.py index 202f58673..3c9087f2d 100644 --- a/youtube_dl/extractor/pornoxo.py +++ b/youtube_dl/extractor/pornoxo.py @@ -2,13 +2,13 @@ from __future__ import unicode_literals import re -from .common import InfoExtractor +from .jwplatform import JWPlatformBaseIE from ..utils import ( str_to_int, ) -class PornoXOIE(InfoExtractor): +class PornoXOIE(JWPlatformBaseIE): _VALID_URL = r'https?://(?:www\.)?pornoxo\.com/videos/(?P\d+)/(?P[^/]+)\.html' _TEST = { 'url': 'http://www.pornoxo.com/videos/7564/striptease-from-sexy-secretary.html', @@ -17,7 +17,8 @@ class PornoXOIE(InfoExtractor): 'id': '7564', 'ext': 'flv', 'title': 'Striptease From Sexy Secretary!', - 'description': 'Striptease From Sexy Secretary!', + 'display_id': 'striptease-from-sexy-secretary', + 'description': 'md5:0ee35252b685b3883f4a1d38332f9980', 'categories': list, # NSFW 'thumbnail': 're:https?://.*\.jpg$', 'age_limit': 18, @@ -26,23 +27,14 @@ class PornoXOIE(InfoExtractor): def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') + video_id, display_id = mobj.groups() webpage = self._download_webpage(url, video_id) - - video_url = self._html_search_regex( - r'\'file\'\s*:\s*"([^"]+)"', webpage, 'video_url') + video_data = self._extract_jwplayer_data(webpage, video_id, require_title=False) title = self._html_search_regex( r'([^<]+)\s*-\s*PornoXO', webpage, 'title') - description = self._html_search_regex( - r'<meta name="description" content="([^"]+)\s*featuring', - webpage, 'description', fatal=False) - - thumbnail = self._html_search_regex( - r'\'image\'\s*:\s*"([^"]+)"', webpage, 'thumbnail', fatal=False) - view_count = str_to_int(self._html_search_regex( r'[vV]iews:\s*([0-9,]+)', webpage, 'view count', fatal=False)) @@ -53,13 +45,14 @@ class PornoXOIE(InfoExtractor): None if categories_str is None else categories_str.split(',')) - return { + video_data.update({ 'id': video_id, - 'url': video_url, 'title': title, - 'description': description, - 'thumbnail': thumbnail, + 'display_id': display_id, + 'description': self._html_search_meta('description', webpage), 'categories': categories, 'view_count': view_count, 'age_limit': 18, - } + }) + + return video_data From ee5de4e38e3629ffc5d6360e06fa5dcfd43cbeb3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 3 Oct 2016 00:54:02 +0700 Subject: [PATCH 0280/1571] [nhl] Add support for wch2016.com (Closes #10833) --- youtube_dl/extractor/nhl.py | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/nhl.py b/youtube_dl/extractor/nhl.py index b04d21113..26149c88f 100644 --- a/youtube_dl/extractor/nhl.py +++ b/youtube_dl/extractor/nhl.py @@ -245,7 +245,11 @@ class NHLVideocenterCategoryIE(NHLBaseInfoExtractor): class NHLIE(InfoExtractor): IE_NAME = 'nhl.com' - _VALID_URL = r'https?://(?:www\.)?nhl\.com/([^/]+/)*c-(?P<id>\d+)' + _VALID_URL = r'https?://(?:www\.)?(?P<site>nhl|wch2016)\.com/(?:[^/]+/)*c-(?P<id>\d+)' + _SITES_MAP = { + 'nhl': 'nhl', + 'wch2016': 'wch', + } _TESTS = [{ # type=video 'url': 'https://www.nhl.com/video/anisimov-cleans-up-mess/t-277752844/c-43663503', @@ -270,13 +274,20 @@ class NHLIE(InfoExtractor): 'upload_date': '20160204', 'timestamp': 1454544904, }, + }, { + 'url': 'https://www.wch2016.com/video/caneur-best-of-game-2-micd-up/t-281230378/c-44983703', + 'only_matching': True, + }, { + 'url': 'https://www.wch2016.com/news/3-stars-team-europe-vs-team-canada/c-282195068', + 'only_matching': True, }] def _real_extract(self, url): - tmp_id = self._match_id(url) + mobj = re.match(self._VALID_URL, url) + tmp_id, site = mobj.group('id'), mobj.group('site') video_data = self._download_json( - 'https://nhl.bamcontent.com/nhl/id/v1/%s/details/web-v1.json' % tmp_id, - tmp_id) + 'https://nhl.bamcontent.com/%s/id/v1/%s/details/web-v1.json' + % (self._SITES_MAP[site], tmp_id), tmp_id) if video_data.get('type') == 'article': video_data = video_data['media'] From c1084ddb0c87dac450d2b7c1b1cfef386d6f4481 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Mon, 3 Oct 2016 15:27:09 +0100 Subject: [PATCH 0281/1571] [thisoldhouse] Add new extractor(closes #10837) --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/thisoldhouse.py | 32 ++++++++++++++++++++++++++++ 2 files changed, 33 insertions(+) create mode 100644 youtube_dl/extractor/thisoldhouse.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index e8928307c..dca4973d4 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -892,6 +892,7 @@ from .thesixtyone import TheSixtyOneIE from .thestar import TheStarIE from .thisamericanlife import ThisAmericanLifeIE from .thisav import ThisAVIE +from .thisoldhouse import ThisOldHouseIE from .threeqsdn import ThreeQSDNIE from .tinypic import TinyPicIE from .tlc import TlcDeIE diff --git a/youtube_dl/extractor/thisoldhouse.py b/youtube_dl/extractor/thisoldhouse.py new file mode 100644 index 000000000..7629f0d10 --- /dev/null +++ b/youtube_dl/extractor/thisoldhouse.py @@ -0,0 +1,32 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor + + +class ThisOldHouseIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?thisoldhouse\.com/(?:watch|how-to)/(?P<id>[^/?#]+)' + _TESTS = [{ + 'url': 'https://www.thisoldhouse.com/how-to/how-to-build-storage-bench', + 'md5': '568acf9ca25a639f0c4ff905826b662f', + 'info_dict': { + 'id': '2REGtUDQ', + 'ext': 'mp4', + 'title': 'How to Build a Storage Bench', + 'description': 'In the workshop, Tom Silva and Kevin O\'Connor build a storage bench for an entryway.', + 'timestamp': 1442548800, + 'upload_date': '20150918', + } + }, { + 'url': 'https://www.thisoldhouse.com/watch/arlington-arts-crafts-arts-and-crafts-class-begins', + 'only_matching': True, + }] + + def _real_extract(self, url): + display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) + drupal_settings = self._parse_json(self._search_regex( + r'jQuery\.extend\(Drupal\.settings\s*,\s*({.+?})\);', + webpage, 'drupal settings'), display_id) + video_id = drupal_settings['jwplatform']['video_id'] + return self.url_result('jwplatform:' + video_id, 'JWPlatform', video_id) From dcdb292fddc82ae11f4c0b647815a45c88a6b6d5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?D=C3=A9stin=20Reed?= <trox1972@users.noreply.github.com> Date: Sun, 2 Oct 2016 13:39:18 +0200 Subject: [PATCH 0282/1571] Unify coding cookie --- devscripts/lazy_load_template.py | 2 +- docs/conf.py | 2 +- setup.py | 2 +- youtube_dl/YoutubeDL.py | 2 +- youtube_dl/__init__.py | 2 +- youtube_dl/extractor/adobepass.py | 2 +- youtube_dl/extractor/allocine.py | 2 +- youtube_dl/extractor/arte.py | 2 +- youtube_dl/extractor/brightcove.py | 2 +- youtube_dl/extractor/canalplus.py | 2 +- youtube_dl/extractor/cbsnews.py | 2 +- youtube_dl/extractor/ceskatelevize.py | 2 +- youtube_dl/extractor/comcarcoff.py | 2 +- youtube_dl/extractor/crunchyroll.py | 2 +- youtube_dl/extractor/daum.py | 2 +- youtube_dl/extractor/dramafever.py | 2 +- youtube_dl/extractor/eitb.py | 2 +- youtube_dl/extractor/embedly.py | 2 +- youtube_dl/extractor/faz.py | 2 +- youtube_dl/extractor/firsttv.py | 2 +- youtube_dl/extractor/folketinget.py | 2 +- youtube_dl/extractor/francetv.py | 2 +- youtube_dl/extractor/generic.py | 2 +- youtube_dl/extractor/goshgay.py | 2 +- youtube_dl/extractor/hark.py | 2 +- youtube_dl/extractor/helsinki.py | 2 +- youtube_dl/extractor/ina.py | 2 +- youtube_dl/extractor/jpopsukitv.py | 2 +- youtube_dl/extractor/kickstarter.py | 2 +- youtube_dl/extractor/kontrtube.py | 2 +- youtube_dl/extractor/krasview.py | 2 +- youtube_dl/extractor/lifenews.py | 2 +- youtube_dl/extractor/m6.py | 2 +- youtube_dl/extractor/mailru.py | 2 +- youtube_dl/extractor/moviezine.py | 2 +- youtube_dl/extractor/musicplayon.py | 2 +- youtube_dl/extractor/myspace.py | 2 +- youtube_dl/extractor/naver.py | 2 +- youtube_dl/extractor/newstube.py | 2 +- youtube_dl/extractor/niconico.py | 2 +- youtube_dl/extractor/noco.py | 2 +- youtube_dl/extractor/normalboots.py | 2 +- youtube_dl/extractor/nova.py | 2 +- youtube_dl/extractor/nowness.py | 2 +- youtube_dl/extractor/nrk.py | 2 +- youtube_dl/extractor/ntvru.py | 2 +- youtube_dl/extractor/nuevo.py | 2 +- youtube_dl/extractor/oktoberfesttv.py | 2 +- youtube_dl/extractor/pandoratv.py | 2 +- youtube_dl/extractor/patreon.py | 2 +- youtube_dl/extractor/porn91.py | 2 +- youtube_dl/extractor/prosiebensat1.py | 2 +- youtube_dl/extractor/puls4.py | 2 +- youtube_dl/extractor/radiobremen.py | 2 +- youtube_dl/extractor/rmcdecouverte.py | 2 +- youtube_dl/extractor/rtl2.py | 2 +- youtube_dl/extractor/rtve.py | 2 +- youtube_dl/extractor/ruhd.py | 2 +- youtube_dl/extractor/rutube.py | 2 +- youtube_dl/extractor/rutv.py | 2 +- youtube_dl/extractor/safari.py | 2 +- youtube_dl/extractor/sapo.py | 2 +- youtube_dl/extractor/sbs.py | 2 +- youtube_dl/extractor/screencast.py | 2 +- youtube_dl/extractor/screenwavemedia.py | 2 +- youtube_dl/extractor/smotri.py | 2 +- youtube_dl/extractor/sohu.py | 2 +- youtube_dl/extractor/soundcloud.py | 2 +- youtube_dl/extractor/southpark.py | 2 +- youtube_dl/extractor/spiegel.py | 2 +- youtube_dl/extractor/srmediathek.py | 2 +- youtube_dl/extractor/streamcz.py | 2 +- youtube_dl/extractor/swrmediathek.py | 2 +- youtube_dl/extractor/sztvhu.py | 2 +- youtube_dl/extractor/tagesschau.py | 2 +- youtube_dl/extractor/tass.py | 2 +- youtube_dl/extractor/teachertube.py | 2 +- youtube_dl/extractor/teamcoco.py | 2 +- youtube_dl/extractor/theintercept.py | 2 +- youtube_dl/extractor/theplatform.py | 2 +- youtube_dl/extractor/tlc.py | 2 +- youtube_dl/extractor/toypics.py | 2 +- youtube_dl/extractor/tumblr.py | 2 +- youtube_dl/extractor/tv2.py | 2 +- youtube_dl/extractor/tvigle.py | 2 +- youtube_dl/extractor/vbox7.py | 2 +- youtube_dl/extractor/vesti.py | 2 +- youtube_dl/extractor/vimeo.py | 2 +- youtube_dl/extractor/vk.py | 2 +- youtube_dl/extractor/vodlocker.py | 2 +- youtube_dl/extractor/wdr.py | 2 +- youtube_dl/extractor/wrzuta.py | 2 +- youtube_dl/extractor/wsj.py | 2 +- youtube_dl/extractor/xboxclips.py | 2 +- youtube_dl/extractor/xnxx.py | 2 +- youtube_dl/extractor/xuite.py | 2 +- youtube_dl/extractor/zingmp3.py | 2 +- youtube_dl/postprocessor/embedthumbnail.py | 2 +- youtube_dl/utils.py | 2 +- 99 files changed, 99 insertions(+), 99 deletions(-) diff --git a/devscripts/lazy_load_template.py b/devscripts/lazy_load_template.py index 2e6e6641b..c4e5fc1f4 100644 --- a/devscripts/lazy_load_template.py +++ b/devscripts/lazy_load_template.py @@ -1,4 +1,4 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals import re diff --git a/docs/conf.py b/docs/conf.py index 594ca61a6..0aaf1b8fc 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -1,4 +1,4 @@ -# -*- coding: utf-8 -*- +# coding: utf-8 # # youtube-dl documentation build configuration file, created by # sphinx-quickstart on Fri Mar 14 21:05:43 2014. diff --git a/setup.py b/setup.py index 508b27f37..ce6dd1870 100644 --- a/setup.py +++ b/setup.py @@ -1,5 +1,5 @@ #!/usr/bin/env python -# -*- coding: utf-8 -*- +# coding: utf-8 from __future__ import print_function diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 442aa663b..99825e343 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -1,5 +1,5 @@ #!/usr/bin/env python -# -*- coding: utf-8 -*- +# coding: utf-8 from __future__ import absolute_import, unicode_literals diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index f84b866df..643393558 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -1,5 +1,5 @@ #!/usr/bin/env python -# -*- coding: utf-8 -*- +# coding: utf-8 from __future__ import unicode_literals diff --git a/youtube_dl/extractor/adobepass.py b/youtube_dl/extractor/adobepass.py index 8f7ed6ef2..d62010cb2 100644 --- a/youtube_dl/extractor/adobepass.py +++ b/youtube_dl/extractor/adobepass.py @@ -1,4 +1,4 @@ -# -*- coding: utf-8 -*- +# coding: utf-8 from __future__ import unicode_literals import re diff --git a/youtube_dl/extractor/allocine.py b/youtube_dl/extractor/allocine.py index 190bc2cc8..7d280d871 100644 --- a/youtube_dl/extractor/allocine.py +++ b/youtube_dl/extractor/allocine.py @@ -1,4 +1,4 @@ -# -*- coding: utf-8 -*- +# coding: utf-8 from __future__ import unicode_literals import re diff --git a/youtube_dl/extractor/arte.py b/youtube_dl/extractor/arte.py index e0c5c1804..dbac24b18 100644 --- a/youtube_dl/extractor/arte.py +++ b/youtube_dl/extractor/arte.py @@ -1,4 +1,4 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals import re diff --git a/youtube_dl/extractor/brightcove.py b/youtube_dl/extractor/brightcove.py index 2ec55b185..945cf19e8 100644 --- a/youtube_dl/extractor/brightcove.py +++ b/youtube_dl/extractor/brightcove.py @@ -1,4 +1,4 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals import re diff --git a/youtube_dl/extractor/canalplus.py b/youtube_dl/extractor/canalplus.py index 69e8f4f57..6dab226af 100644 --- a/youtube_dl/extractor/canalplus.py +++ b/youtube_dl/extractor/canalplus.py @@ -1,4 +1,4 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals import re diff --git a/youtube_dl/extractor/cbsnews.py b/youtube_dl/extractor/cbsnews.py index 216989230..91b0f5fa9 100644 --- a/youtube_dl/extractor/cbsnews.py +++ b/youtube_dl/extractor/cbsnews.py @@ -1,4 +1,4 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals from .common import InfoExtractor diff --git a/youtube_dl/extractor/ceskatelevize.py b/youtube_dl/extractor/ceskatelevize.py index 87c2e7089..4ec79d19d 100644 --- a/youtube_dl/extractor/ceskatelevize.py +++ b/youtube_dl/extractor/ceskatelevize.py @@ -1,4 +1,4 @@ -# -*- coding: utf-8 -*- +# coding: utf-8 from __future__ import unicode_literals import re diff --git a/youtube_dl/extractor/comcarcoff.py b/youtube_dl/extractor/comcarcoff.py index 747c245c8..588aad0d9 100644 --- a/youtube_dl/extractor/comcarcoff.py +++ b/youtube_dl/extractor/comcarcoff.py @@ -1,4 +1,4 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals from .common import InfoExtractor diff --git a/youtube_dl/extractor/crunchyroll.py b/youtube_dl/extractor/crunchyroll.py index e4c10ad24..c38fd095a 100644 --- a/youtube_dl/extractor/crunchyroll.py +++ b/youtube_dl/extractor/crunchyroll.py @@ -1,4 +1,4 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals import re diff --git a/youtube_dl/extractor/daum.py b/youtube_dl/extractor/daum.py index b5c310ccb..732b4362a 100644 --- a/youtube_dl/extractor/daum.py +++ b/youtube_dl/extractor/daum.py @@ -1,4 +1,4 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals diff --git a/youtube_dl/extractor/dramafever.py b/youtube_dl/extractor/dramafever.py index 3b6529f4b..c11595612 100644 --- a/youtube_dl/extractor/dramafever.py +++ b/youtube_dl/extractor/dramafever.py @@ -1,4 +1,4 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals import itertools diff --git a/youtube_dl/extractor/eitb.py b/youtube_dl/extractor/eitb.py index 713cb7b32..ee5ead18b 100644 --- a/youtube_dl/extractor/eitb.py +++ b/youtube_dl/extractor/eitb.py @@ -1,4 +1,4 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals from .common import InfoExtractor diff --git a/youtube_dl/extractor/embedly.py b/youtube_dl/extractor/embedly.py index 1cdb11e34..a5820b21e 100644 --- a/youtube_dl/extractor/embedly.py +++ b/youtube_dl/extractor/embedly.py @@ -1,4 +1,4 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals from .common import InfoExtractor diff --git a/youtube_dl/extractor/faz.py b/youtube_dl/extractor/faz.py index fd535457d..4bc8fc512 100644 --- a/youtube_dl/extractor/faz.py +++ b/youtube_dl/extractor/faz.py @@ -1,4 +1,4 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals from .common import InfoExtractor diff --git a/youtube_dl/extractor/firsttv.py b/youtube_dl/extractor/firsttv.py index 332d12020..6b662cc3c 100644 --- a/youtube_dl/extractor/firsttv.py +++ b/youtube_dl/extractor/firsttv.py @@ -1,4 +1,4 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals from .common import InfoExtractor diff --git a/youtube_dl/extractor/folketinget.py b/youtube_dl/extractor/folketinget.py index 75399fa7d..b3df93f28 100644 --- a/youtube_dl/extractor/folketinget.py +++ b/youtube_dl/extractor/folketinget.py @@ -1,4 +1,4 @@ -# -*- coding: utf-8 -*- +# coding: utf-8 from __future__ import unicode_literals from .common import InfoExtractor diff --git a/youtube_dl/extractor/francetv.py b/youtube_dl/extractor/francetv.py index 3233f66d5..e7068d1ae 100644 --- a/youtube_dl/extractor/francetv.py +++ b/youtube_dl/extractor/francetv.py @@ -1,4 +1,4 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 489b3c7c1..9ea306e3a 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -1,4 +1,4 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals diff --git a/youtube_dl/extractor/goshgay.py b/youtube_dl/extractor/goshgay.py index a43abd154..74e1720ee 100644 --- a/youtube_dl/extractor/goshgay.py +++ b/youtube_dl/extractor/goshgay.py @@ -1,4 +1,4 @@ -# -*- coding: utf-8 -*- +# coding: utf-8 from __future__ import unicode_literals from .common import InfoExtractor diff --git a/youtube_dl/extractor/hark.py b/youtube_dl/extractor/hark.py index 749e9154f..342a6130e 100644 --- a/youtube_dl/extractor/hark.py +++ b/youtube_dl/extractor/hark.py @@ -1,4 +1,4 @@ -# -*- coding: utf-8 -*- +# coding: utf-8 from __future__ import unicode_literals from .common import InfoExtractor diff --git a/youtube_dl/extractor/helsinki.py b/youtube_dl/extractor/helsinki.py index 93107b306..575fb332a 100644 --- a/youtube_dl/extractor/helsinki.py +++ b/youtube_dl/extractor/helsinki.py @@ -1,4 +1,4 @@ -# -*- coding: utf-8 -*- +# coding: utf-8 from __future__ import unicode_literals diff --git a/youtube_dl/extractor/ina.py b/youtube_dl/extractor/ina.py index 65712abc2..9544ff9d4 100644 --- a/youtube_dl/extractor/ina.py +++ b/youtube_dl/extractor/ina.py @@ -1,4 +1,4 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals import re diff --git a/youtube_dl/extractor/jpopsukitv.py b/youtube_dl/extractor/jpopsukitv.py index 122e2dd8c..4b5f346d1 100644 --- a/youtube_dl/extractor/jpopsukitv.py +++ b/youtube_dl/extractor/jpopsukitv.py @@ -1,4 +1,4 @@ -# coding=utf-8 +# coding: utf-8 from __future__ import unicode_literals from .common import InfoExtractor diff --git a/youtube_dl/extractor/kickstarter.py b/youtube_dl/extractor/kickstarter.py index fbe499497..d4da8f484 100644 --- a/youtube_dl/extractor/kickstarter.py +++ b/youtube_dl/extractor/kickstarter.py @@ -1,4 +1,4 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals from .common import InfoExtractor diff --git a/youtube_dl/extractor/kontrtube.py b/youtube_dl/extractor/kontrtube.py index 704bd7b34..1fda45107 100644 --- a/youtube_dl/extractor/kontrtube.py +++ b/youtube_dl/extractor/kontrtube.py @@ -1,4 +1,4 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals import re diff --git a/youtube_dl/extractor/krasview.py b/youtube_dl/extractor/krasview.py index 0ae8ebd68..cf8876fa1 100644 --- a/youtube_dl/extractor/krasview.py +++ b/youtube_dl/extractor/krasview.py @@ -1,4 +1,4 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals import json diff --git a/youtube_dl/extractor/lifenews.py b/youtube_dl/extractor/lifenews.py index 87120ecd1..afce2010e 100644 --- a/youtube_dl/extractor/lifenews.py +++ b/youtube_dl/extractor/lifenews.py @@ -1,4 +1,4 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals import re diff --git a/youtube_dl/extractor/m6.py b/youtube_dl/extractor/m6.py index 39d2742c8..9806875e8 100644 --- a/youtube_dl/extractor/m6.py +++ b/youtube_dl/extractor/m6.py @@ -1,4 +1,4 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals from .common import InfoExtractor diff --git a/youtube_dl/extractor/mailru.py b/youtube_dl/extractor/mailru.py index 9a7098c43..f7cc3c832 100644 --- a/youtube_dl/extractor/mailru.py +++ b/youtube_dl/extractor/mailru.py @@ -1,4 +1,4 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals import re diff --git a/youtube_dl/extractor/moviezine.py b/youtube_dl/extractor/moviezine.py index aa091a62c..478e39967 100644 --- a/youtube_dl/extractor/moviezine.py +++ b/youtube_dl/extractor/moviezine.py @@ -1,4 +1,4 @@ -# -*- coding: utf-8 -*- +# coding: utf-8 from __future__ import unicode_literals import re diff --git a/youtube_dl/extractor/musicplayon.py b/youtube_dl/extractor/musicplayon.py index 2174e5665..1854d59a5 100644 --- a/youtube_dl/extractor/musicplayon.py +++ b/youtube_dl/extractor/musicplayon.py @@ -1,4 +1,4 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals from .common import InfoExtractor diff --git a/youtube_dl/extractor/myspace.py b/youtube_dl/extractor/myspace.py index 0d5238d77..ab32e632e 100644 --- a/youtube_dl/extractor/myspace.py +++ b/youtube_dl/extractor/myspace.py @@ -1,4 +1,4 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals import re diff --git a/youtube_dl/extractor/naver.py b/youtube_dl/extractor/naver.py index 0891d2772..055070ff5 100644 --- a/youtube_dl/extractor/naver.py +++ b/youtube_dl/extractor/naver.py @@ -1,4 +1,4 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals import re diff --git a/youtube_dl/extractor/newstube.py b/youtube_dl/extractor/newstube.py index 0092b85ce..e3f35f1d8 100644 --- a/youtube_dl/extractor/newstube.py +++ b/youtube_dl/extractor/newstube.py @@ -1,4 +1,4 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals import re diff --git a/youtube_dl/extractor/niconico.py b/youtube_dl/extractor/niconico.py index 6eaaa8416..a104e33f8 100644 --- a/youtube_dl/extractor/niconico.py +++ b/youtube_dl/extractor/niconico.py @@ -1,4 +1,4 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals import re diff --git a/youtube_dl/extractor/noco.py b/youtube_dl/extractor/noco.py index 06f2bda07..70ff2ab36 100644 --- a/youtube_dl/extractor/noco.py +++ b/youtube_dl/extractor/noco.py @@ -1,4 +1,4 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals import re diff --git a/youtube_dl/extractor/normalboots.py b/youtube_dl/extractor/normalboots.py index af44c3bb5..6aa0895b8 100644 --- a/youtube_dl/extractor/normalboots.py +++ b/youtube_dl/extractor/normalboots.py @@ -1,4 +1,4 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals from .common import InfoExtractor diff --git a/youtube_dl/extractor/nova.py b/youtube_dl/extractor/nova.py index 17671ad39..103952345 100644 --- a/youtube_dl/extractor/nova.py +++ b/youtube_dl/extractor/nova.py @@ -1,4 +1,4 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals import re diff --git a/youtube_dl/extractor/nowness.py b/youtube_dl/extractor/nowness.py index 74860eb20..7e5346316 100644 --- a/youtube_dl/extractor/nowness.py +++ b/youtube_dl/extractor/nowness.py @@ -1,4 +1,4 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals from .brightcove import ( diff --git a/youtube_dl/extractor/nrk.py b/youtube_dl/extractor/nrk.py index ed42eb301..d471eb20c 100644 --- a/youtube_dl/extractor/nrk.py +++ b/youtube_dl/extractor/nrk.py @@ -1,4 +1,4 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals import re diff --git a/youtube_dl/extractor/ntvru.py b/youtube_dl/extractor/ntvru.py index e8702ebcd..7d7a785ab 100644 --- a/youtube_dl/extractor/ntvru.py +++ b/youtube_dl/extractor/ntvru.py @@ -1,4 +1,4 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals from .common import InfoExtractor diff --git a/youtube_dl/extractor/nuevo.py b/youtube_dl/extractor/nuevo.py index ef093dec2..87fb94d1f 100644 --- a/youtube_dl/extractor/nuevo.py +++ b/youtube_dl/extractor/nuevo.py @@ -1,4 +1,4 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals from .common import InfoExtractor diff --git a/youtube_dl/extractor/oktoberfesttv.py b/youtube_dl/extractor/oktoberfesttv.py index f2ccc53dc..50fbbc79c 100644 --- a/youtube_dl/extractor/oktoberfesttv.py +++ b/youtube_dl/extractor/oktoberfesttv.py @@ -1,4 +1,4 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals from .common import InfoExtractor diff --git a/youtube_dl/extractor/pandoratv.py b/youtube_dl/extractor/pandoratv.py index 8d49f5c4a..2b07958bb 100644 --- a/youtube_dl/extractor/pandoratv.py +++ b/youtube_dl/extractor/pandoratv.py @@ -1,4 +1,4 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals from .common import InfoExtractor diff --git a/youtube_dl/extractor/patreon.py b/youtube_dl/extractor/patreon.py index 229750665..a6a2c273f 100644 --- a/youtube_dl/extractor/patreon.py +++ b/youtube_dl/extractor/patreon.py @@ -1,4 +1,4 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals from .common import InfoExtractor diff --git a/youtube_dl/extractor/porn91.py b/youtube_dl/extractor/porn91.py index 9894f3262..073fc3e21 100644 --- a/youtube_dl/extractor/porn91.py +++ b/youtube_dl/extractor/porn91.py @@ -1,4 +1,4 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals from ..compat import ( diff --git a/youtube_dl/extractor/prosiebensat1.py b/youtube_dl/extractor/prosiebensat1.py index 873d4f981..7cc07a2ad 100644 --- a/youtube_dl/extractor/prosiebensat1.py +++ b/youtube_dl/extractor/prosiebensat1.py @@ -1,4 +1,4 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals import re diff --git a/youtube_dl/extractor/puls4.py b/youtube_dl/extractor/puls4.py index 9c2ccbe2d..1c54af002 100644 --- a/youtube_dl/extractor/puls4.py +++ b/youtube_dl/extractor/puls4.py @@ -1,4 +1,4 @@ -# -*- coding: utf-8 -*- +# coding: utf-8 from __future__ import unicode_literals from .prosiebensat1 import ProSiebenSat1BaseIE diff --git a/youtube_dl/extractor/radiobremen.py b/youtube_dl/extractor/radiobremen.py index 19a751da0..0aa8d059b 100644 --- a/youtube_dl/extractor/radiobremen.py +++ b/youtube_dl/extractor/radiobremen.py @@ -1,4 +1,4 @@ -# -*- coding: utf-8 -*- +# coding: utf-8 from __future__ import unicode_literals diff --git a/youtube_dl/extractor/rmcdecouverte.py b/youtube_dl/extractor/rmcdecouverte.py index f3bb4fa66..2340dae53 100644 --- a/youtube_dl/extractor/rmcdecouverte.py +++ b/youtube_dl/extractor/rmcdecouverte.py @@ -1,4 +1,4 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals from .common import InfoExtractor diff --git a/youtube_dl/extractor/rtl2.py b/youtube_dl/extractor/rtl2.py index de004671d..cb4ee8803 100644 --- a/youtube_dl/extractor/rtl2.py +++ b/youtube_dl/extractor/rtl2.py @@ -1,4 +1,4 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals import re diff --git a/youtube_dl/extractor/rtve.py b/youtube_dl/extractor/rtve.py index f1b92f6da..6a43b036e 100644 --- a/youtube_dl/extractor/rtve.py +++ b/youtube_dl/extractor/rtve.py @@ -1,4 +1,4 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals import base64 diff --git a/youtube_dl/extractor/ruhd.py b/youtube_dl/extractor/ruhd.py index 1f7c26299..ce631b46c 100644 --- a/youtube_dl/extractor/ruhd.py +++ b/youtube_dl/extractor/ruhd.py @@ -1,4 +1,4 @@ -# -*- coding: utf-8 -*- +# coding: utf-8 from __future__ import unicode_literals from .common import InfoExtractor diff --git a/youtube_dl/extractor/rutube.py b/youtube_dl/extractor/rutube.py index 5d0ace5bf..fd1df925b 100644 --- a/youtube_dl/extractor/rutube.py +++ b/youtube_dl/extractor/rutube.py @@ -1,4 +1,4 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals import re diff --git a/youtube_dl/extractor/rutv.py b/youtube_dl/extractor/rutv.py index a2379eb04..a5e672c0a 100644 --- a/youtube_dl/extractor/rutv.py +++ b/youtube_dl/extractor/rutv.py @@ -1,4 +1,4 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals import re diff --git a/youtube_dl/extractor/safari.py b/youtube_dl/extractor/safari.py index eabe41efe..8b35fd244 100644 --- a/youtube_dl/extractor/safari.py +++ b/youtube_dl/extractor/safari.py @@ -1,4 +1,4 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals import re diff --git a/youtube_dl/extractor/sapo.py b/youtube_dl/extractor/sapo.py index 172cc1275..49a9b313a 100644 --- a/youtube_dl/extractor/sapo.py +++ b/youtube_dl/extractor/sapo.py @@ -1,4 +1,4 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals import re diff --git a/youtube_dl/extractor/sbs.py b/youtube_dl/extractor/sbs.py index 96472fbc4..43131fb7e 100644 --- a/youtube_dl/extractor/sbs.py +++ b/youtube_dl/extractor/sbs.py @@ -1,4 +1,4 @@ -# -*- coding: utf-8 -*- +# coding: utf-8 from __future__ import unicode_literals from .common import InfoExtractor diff --git a/youtube_dl/extractor/screencast.py b/youtube_dl/extractor/screencast.py index 356631700..ed9de9648 100644 --- a/youtube_dl/extractor/screencast.py +++ b/youtube_dl/extractor/screencast.py @@ -1,4 +1,4 @@ -# -*- coding: utf-8 -*- +# coding: utf-8 from __future__ import unicode_literals from .common import InfoExtractor diff --git a/youtube_dl/extractor/screenwavemedia.py b/youtube_dl/extractor/screenwavemedia.py index 40333c825..7d77e8825 100644 --- a/youtube_dl/extractor/screenwavemedia.py +++ b/youtube_dl/extractor/screenwavemedia.py @@ -1,4 +1,4 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals import re diff --git a/youtube_dl/extractor/smotri.py b/youtube_dl/extractor/smotri.py index 114358786..def46abda 100644 --- a/youtube_dl/extractor/smotri.py +++ b/youtube_dl/extractor/smotri.py @@ -1,4 +1,4 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals import re diff --git a/youtube_dl/extractor/sohu.py b/youtube_dl/extractor/sohu.py index 48e2ba2dd..30760ca06 100644 --- a/youtube_dl/extractor/sohu.py +++ b/youtube_dl/extractor/sohu.py @@ -1,4 +1,4 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals import re diff --git a/youtube_dl/extractor/soundcloud.py b/youtube_dl/extractor/soundcloud.py index 1a8114aa7..3b7ecb3c3 100644 --- a/youtube_dl/extractor/soundcloud.py +++ b/youtube_dl/extractor/soundcloud.py @@ -1,4 +1,4 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals import re diff --git a/youtube_dl/extractor/southpark.py b/youtube_dl/extractor/southpark.py index e2a9e45ac..08f8c5744 100644 --- a/youtube_dl/extractor/southpark.py +++ b/youtube_dl/extractor/southpark.py @@ -1,4 +1,4 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals from .mtv import MTVServicesInfoExtractor diff --git a/youtube_dl/extractor/spiegel.py b/youtube_dl/extractor/spiegel.py index b41d9f59f..ec1b60388 100644 --- a/youtube_dl/extractor/spiegel.py +++ b/youtube_dl/extractor/spiegel.py @@ -1,4 +1,4 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals import re diff --git a/youtube_dl/extractor/srmediathek.py b/youtube_dl/extractor/srmediathek.py index 409d50304..b03272f7a 100644 --- a/youtube_dl/extractor/srmediathek.py +++ b/youtube_dl/extractor/srmediathek.py @@ -1,4 +1,4 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals from .ard import ARDMediathekIE diff --git a/youtube_dl/extractor/streamcz.py b/youtube_dl/extractor/streamcz.py index d3d2b7eb7..9e533103c 100644 --- a/youtube_dl/extractor/streamcz.py +++ b/youtube_dl/extractor/streamcz.py @@ -1,4 +1,4 @@ -# -*- coding: utf-8 -*- +# coding: utf-8 from __future__ import unicode_literals import hashlib diff --git a/youtube_dl/extractor/swrmediathek.py b/youtube_dl/extractor/swrmediathek.py index 58073eefe..6d69f7686 100644 --- a/youtube_dl/extractor/swrmediathek.py +++ b/youtube_dl/extractor/swrmediathek.py @@ -1,4 +1,4 @@ -# -*- coding: utf-8 -*- +# coding: utf-8 from __future__ import unicode_literals import re diff --git a/youtube_dl/extractor/sztvhu.py b/youtube_dl/extractor/sztvhu.py index f562aa6d3..cfad33146 100644 --- a/youtube_dl/extractor/sztvhu.py +++ b/youtube_dl/extractor/sztvhu.py @@ -1,4 +1,4 @@ -# -*- coding: utf-8 -*- +# coding: utf-8 from __future__ import unicode_literals from .common import InfoExtractor diff --git a/youtube_dl/extractor/tagesschau.py b/youtube_dl/extractor/tagesschau.py index 136e18f96..8670cee28 100644 --- a/youtube_dl/extractor/tagesschau.py +++ b/youtube_dl/extractor/tagesschau.py @@ -1,4 +1,4 @@ -# -*- coding: utf-8 -*- +# coding: utf-8 from __future__ import unicode_literals import re diff --git a/youtube_dl/extractor/tass.py b/youtube_dl/extractor/tass.py index c4ef70778..5293393ef 100644 --- a/youtube_dl/extractor/tass.py +++ b/youtube_dl/extractor/tass.py @@ -1,4 +1,4 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals import json diff --git a/youtube_dl/extractor/teachertube.py b/youtube_dl/extractor/teachertube.py index 82675431f..df5d5556f 100644 --- a/youtube_dl/extractor/teachertube.py +++ b/youtube_dl/extractor/teachertube.py @@ -1,4 +1,4 @@ -# -*- coding: utf-8 -*- +# coding: utf-8 from __future__ import unicode_literals import re diff --git a/youtube_dl/extractor/teamcoco.py b/youtube_dl/extractor/teamcoco.py index 79a778920..75346393b 100644 --- a/youtube_dl/extractor/teamcoco.py +++ b/youtube_dl/extractor/teamcoco.py @@ -1,4 +1,4 @@ -# -*- coding: utf-8 -*- +# coding: utf-8 from __future__ import unicode_literals import base64 diff --git a/youtube_dl/extractor/theintercept.py b/youtube_dl/extractor/theintercept.py index ec6f4ecaa..f23b58713 100644 --- a/youtube_dl/extractor/theintercept.py +++ b/youtube_dl/extractor/theintercept.py @@ -1,4 +1,4 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals from .common import InfoExtractor diff --git a/youtube_dl/extractor/theplatform.py b/youtube_dl/extractor/theplatform.py index 6febf805b..cfbf7f4e1 100644 --- a/youtube_dl/extractor/theplatform.py +++ b/youtube_dl/extractor/theplatform.py @@ -1,4 +1,4 @@ -# -*- coding: utf-8 -*- +# coding: utf-8 from __future__ import unicode_literals import re diff --git a/youtube_dl/extractor/tlc.py b/youtube_dl/extractor/tlc.py index ce4f91f46..fd145ba42 100644 --- a/youtube_dl/extractor/tlc.py +++ b/youtube_dl/extractor/tlc.py @@ -1,4 +1,4 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals import re diff --git a/youtube_dl/extractor/toypics.py b/youtube_dl/extractor/toypics.py index 2579ba8c6..938e05076 100644 --- a/youtube_dl/extractor/toypics.py +++ b/youtube_dl/extractor/toypics.py @@ -1,4 +1,4 @@ -# -*- coding:utf-8 -*- +# coding: utf-8 from __future__ import unicode_literals from .common import InfoExtractor diff --git a/youtube_dl/extractor/tumblr.py b/youtube_dl/extractor/tumblr.py index 4d8b57111..ebe411e12 100644 --- a/youtube_dl/extractor/tumblr.py +++ b/youtube_dl/extractor/tumblr.py @@ -1,4 +1,4 @@ -# -*- coding: utf-8 -*- +# coding: utf-8 from __future__ import unicode_literals import re diff --git a/youtube_dl/extractor/tv2.py b/youtube_dl/extractor/tv2.py index f225ec684..bd28267b0 100644 --- a/youtube_dl/extractor/tv2.py +++ b/youtube_dl/extractor/tv2.py @@ -1,4 +1,4 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals import re diff --git a/youtube_dl/extractor/tvigle.py b/youtube_dl/extractor/tvigle.py index ead4c00c7..f3817ab28 100644 --- a/youtube_dl/extractor/tvigle.py +++ b/youtube_dl/extractor/tvigle.py @@ -1,4 +1,4 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals import re diff --git a/youtube_dl/extractor/vbox7.py b/youtube_dl/extractor/vbox7.py index e17988573..a1e0851b7 100644 --- a/youtube_dl/extractor/vbox7.py +++ b/youtube_dl/extractor/vbox7.py @@ -1,4 +1,4 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals import re diff --git a/youtube_dl/extractor/vesti.py b/youtube_dl/extractor/vesti.py index cb64ae0bd..5ab716880 100644 --- a/youtube_dl/extractor/vesti.py +++ b/youtube_dl/extractor/vesti.py @@ -1,4 +1,4 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals import re diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py index 50aacc6ac..309a47bf0 100644 --- a/youtube_dl/extractor/vimeo.py +++ b/youtube_dl/extractor/vimeo.py @@ -1,4 +1,4 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals import json diff --git a/youtube_dl/extractor/vk.py b/youtube_dl/extractor/vk.py index 58799d413..ac77bc623 100644 --- a/youtube_dl/extractor/vk.py +++ b/youtube_dl/extractor/vk.py @@ -1,4 +1,4 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals import collections diff --git a/youtube_dl/extractor/vodlocker.py b/youtube_dl/extractor/vodlocker.py index a938a4007..c85b474d2 100644 --- a/youtube_dl/extractor/vodlocker.py +++ b/youtube_dl/extractor/vodlocker.py @@ -1,4 +1,4 @@ -# -*- coding: utf-8 -*- +# coding: utf-8 from __future__ import unicode_literals from .common import InfoExtractor diff --git a/youtube_dl/extractor/wdr.py b/youtube_dl/extractor/wdr.py index 390f9e830..f7e6360a3 100644 --- a/youtube_dl/extractor/wdr.py +++ b/youtube_dl/extractor/wdr.py @@ -1,4 +1,4 @@ -# -*- coding: utf-8 -*- +# coding: utf-8 from __future__ import unicode_literals import re diff --git a/youtube_dl/extractor/wrzuta.py b/youtube_dl/extractor/wrzuta.py index bdd7097ba..0f53f1bcb 100644 --- a/youtube_dl/extractor/wrzuta.py +++ b/youtube_dl/extractor/wrzuta.py @@ -1,4 +1,4 @@ -# -*- coding: utf-8 -*- +# coding: utf-8 from __future__ import unicode_literals import re diff --git a/youtube_dl/extractor/wsj.py b/youtube_dl/extractor/wsj.py index a83e68b17..deb7483ae 100644 --- a/youtube_dl/extractor/wsj.py +++ b/youtube_dl/extractor/wsj.py @@ -1,4 +1,4 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals from .common import InfoExtractor diff --git a/youtube_dl/extractor/xboxclips.py b/youtube_dl/extractor/xboxclips.py index b113ab1c4..d9c277bc3 100644 --- a/youtube_dl/extractor/xboxclips.py +++ b/youtube_dl/extractor/xboxclips.py @@ -1,4 +1,4 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals from .common import InfoExtractor diff --git a/youtube_dl/extractor/xnxx.py b/youtube_dl/extractor/xnxx.py index bcb140305..e0a6255dc 100644 --- a/youtube_dl/extractor/xnxx.py +++ b/youtube_dl/extractor/xnxx.py @@ -1,4 +1,4 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals from .common import InfoExtractor diff --git a/youtube_dl/extractor/xuite.py b/youtube_dl/extractor/xuite.py index a66daee46..4b9c1ee9c 100644 --- a/youtube_dl/extractor/xuite.py +++ b/youtube_dl/extractor/xuite.py @@ -1,4 +1,4 @@ -# -*- coding: utf-8 -*- +# coding: utf-8 from __future__ import unicode_literals import base64 diff --git a/youtube_dl/extractor/zingmp3.py b/youtube_dl/extractor/zingmp3.py index bd708b42c..0f0e9d0eb 100644 --- a/youtube_dl/extractor/zingmp3.py +++ b/youtube_dl/extractor/zingmp3.py @@ -1,4 +1,4 @@ -# coding=utf-8 +# coding: utf-8 from __future__ import unicode_literals import re diff --git a/youtube_dl/postprocessor/embedthumbnail.py b/youtube_dl/postprocessor/embedthumbnail.py index 3bad5a266..2e4789eb2 100644 --- a/youtube_dl/postprocessor/embedthumbnail.py +++ b/youtube_dl/postprocessor/embedthumbnail.py @@ -1,4 +1,4 @@ -# -*- coding: utf-8 -*- +# coding: utf-8 from __future__ import unicode_literals diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 044520037..0569d231c 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -1,5 +1,5 @@ #!/usr/bin/env python -# -*- coding: utf-8 -*- +# coding: utf-8 from __future__ import unicode_literals From 215ff6e0f3b092aac9edc91b8026ffc7b55d8b70 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Mon, 3 Oct 2016 18:16:55 +0100 Subject: [PATCH 0283/1571] [theweatherchannel] Add new extractor(closes #7188) --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/theweatherchannel.py | 79 +++++++++++++++++++++++ 2 files changed, 80 insertions(+) create mode 100644 youtube_dl/extractor/theweatherchannel.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index dca4973d4..f67e19526 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -890,6 +890,7 @@ from .theplatform import ( from .thescene import TheSceneIE from .thesixtyone import TheSixtyOneIE from .thestar import TheStarIE +from .theweatherchannel import TheWeatherChannelIE from .thisamericanlife import ThisAmericanLifeIE from .thisav import ThisAVIE from .thisoldhouse import ThisOldHouseIE diff --git a/youtube_dl/extractor/theweatherchannel.py b/youtube_dl/extractor/theweatherchannel.py new file mode 100644 index 000000000..c34a49d03 --- /dev/null +++ b/youtube_dl/extractor/theweatherchannel.py @@ -0,0 +1,79 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .theplatform import ThePlatformIE +from ..utils import ( + determine_ext, + parse_duration, +) + + +class TheWeatherChannelIE(ThePlatformIE): + _VALID_URL = r'https?://(?:www\.)?weather\.com/(?:[^/]+/)*video/(?P<id>[^/?#]+)' + _TESTS = [{ + 'url': 'https://weather.com/series/great-outdoors/video/ice-climber-is-in-for-a-shock', + 'md5': 'ab924ac9574e79689c24c6b95e957def', + 'info_dict': { + 'id': 'cc82397e-cc3f-4d11-9390-a785add090e8', + 'ext': 'mp4', + 'title': 'Ice Climber Is In For A Shock', + 'description': 'md5:55606ce1378d4c72e6545e160c9d9695', + 'uploader': 'TWC - Digital (No Distro)', + 'uploader_id': '6ccd5455-16bb-46f2-9c57-ff858bb9f62c', + } + }] + + def _real_extract(self, url): + display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) + drupal_settings = self._parse_json(self._search_regex( + r'jQuery\.extend\(Drupal\.settings\s*,\s*({.+?})\);', + webpage, 'drupal settings'), display_id) + video_id = drupal_settings['twc']['contexts']['node']['uuid'] + video_data = self._download_json( + 'https://dsx.weather.com/cms/v4/asset-collection/en_US/' + video_id, video_id) + seo_meta = video_data.get('seometa', {}) + title = video_data.get('title') or seo_meta['title'] + + urls = [] + thumbnails = [] + formats = [] + for variant_id, variant_url in video_data.get('variants', []).items(): + variant_url = variant_url.strip() + if not variant_url or variant_url in urls: + continue + urls.append(variant_url) + ext = determine_ext(variant_url) + if ext == 'jpg': + thumbnails.append({ + 'url': variant_url, + 'id': variant_id, + }) + elif ThePlatformIE.suitable(variant_url): + tp_formats, _ = self._extract_theplatform_smil(variant_url, video_id) + formats.extend(tp_formats) + elif ext == 'm3u8': + formats.extend(self._extract_m3u8_formats( + variant_url, video_id, 'mp4', 'm3u8_native', + m3u8_id=variant_id, fatal=False)) + elif ext == 'f4m': + formats.extend(self._extract_f4m_formats( + variant_url, video_id, f4m_id=variant_id, fatal=False)) + else: + formats.append({ + 'url': variant_url, + 'format_id': variant_id, + }) + self._sort_formats(formats) + + return { + 'id': video_id, + 'display_id': display_id, + 'title': title, + 'description': video_data.get('description') or seo_meta.get('description') or seo_meta.get('og:description'), + 'duration': parse_duration(video_data.get('duration')), + 'uploader': video_data.get('providername'), + 'uploader_id': video_data.get('providerid'), + 'thumbnails': thumbnails, + 'formats': formats, + } From c1b2a0858cafc3362e5da73b9fb737f18cde4618 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 4 Oct 2016 02:10:23 +0700 Subject: [PATCH 0284/1571] [youtube:live] Extend _VALID_URL (Closes #10839) --- youtube_dl/extractor/youtube.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index f86823112..cb266eab6 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -2152,7 +2152,7 @@ class YoutubeUserIE(YoutubeChannelIE): class YoutubeLiveIE(YoutubeBaseInfoExtractor): IE_DESC = 'YouTube.com live streams' - _VALID_URL = r'(?P<base_url>https?://(?:\w+\.)?youtube\.com/(?:user|channel)/(?P<id>[^/]+))/live' + _VALID_URL = r'(?P<base_url>https?://(?:\w+\.)?youtube\.com/(?:user|channel|c)/(?P<id>[^/]+))/live' IE_NAME = 'youtube:live' _TESTS = [{ @@ -2178,6 +2178,9 @@ class YoutubeLiveIE(YoutubeBaseInfoExtractor): }, { 'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live', 'only_matching': True, + }, { + 'url': 'https://www.youtube.com/c/CommanderVideoHq/live', + 'only_matching': True, }] def _real_extract(self, url): From 539c881bfc1380890a55a08dbf970900328f8ec5 Mon Sep 17 00:00:00 2001 From: Aleksander Nitecki <ixendr@itogi.re> Date: Mon, 3 Oct 2016 21:47:19 +0200 Subject: [PATCH 0285/1571] [techtalks] Allow URL-s with name part omitted. --- youtube_dl/extractor/techtalks.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/techtalks.py b/youtube_dl/extractor/techtalks.py index 16e945d8e..0ec7b1273 100644 --- a/youtube_dl/extractor/techtalks.py +++ b/youtube_dl/extractor/techtalks.py @@ -10,7 +10,7 @@ from ..utils import ( class TechTalksIE(InfoExtractor): - _VALID_URL = r'https?://techtalks\.tv/talks/[^/]*/(?P<id>\d+)/' + _VALID_URL = r'https?://techtalks\.tv/talks/(?:[^/]*/)?(?P<id>\d+)/' _TEST = { 'url': 'http://techtalks.tv/talks/learning-topic-models-going-beyond-svd/57758/', From 6eb5503b12286ef9813ee22e95622f09dab2ebe5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 4 Oct 2016 02:54:36 +0700 Subject: [PATCH 0286/1571] [techtalks] Relax _VALID_URL --- youtube_dl/extractor/techtalks.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/techtalks.py b/youtube_dl/extractor/techtalks.py index 0ec7b1273..a5b62c717 100644 --- a/youtube_dl/extractor/techtalks.py +++ b/youtube_dl/extractor/techtalks.py @@ -10,9 +10,9 @@ from ..utils import ( class TechTalksIE(InfoExtractor): - _VALID_URL = r'https?://techtalks\.tv/talks/(?:[^/]*/)?(?P<id>\d+)/' + _VALID_URL = r'https?://techtalks\.tv/talks/(?:[^/]+/)?(?P<id>\d+)' - _TEST = { + _TESTS = [{ 'url': 'http://techtalks.tv/talks/learning-topic-models-going-beyond-svd/57758/', 'info_dict': { 'id': '57758', @@ -38,7 +38,10 @@ class TechTalksIE(InfoExtractor): # rtmp download 'skip_download': True, }, - } + }, { + 'url': 'http://techtalks.tv/talks/57758', + 'only_matching': True, + }] def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) From 7232e54813481dc7b9b2ea9f70499a49badd75cc Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Tue, 4 Oct 2016 07:59:53 +0100 Subject: [PATCH 0287/1571] [tonline] Add new extractor(#10376) --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/tonline.py | 59 ++++++++++++++++++++++++++++++ 2 files changed, 60 insertions(+) create mode 100644 youtube_dl/extractor/tonline.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index f67e19526..e73956923 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -908,6 +908,7 @@ from .tnaflix import ( MovieFapIE, ) from .toggle import ToggleIE +from .tonline import TOnlineIE from .toutv import TouTvIE from .toypics import ToypicsUserIE, ToypicsIE from .traileraddict import TrailerAddictIE diff --git a/youtube_dl/extractor/tonline.py b/youtube_dl/extractor/tonline.py new file mode 100644 index 000000000..cc11eae2a --- /dev/null +++ b/youtube_dl/extractor/tonline.py @@ -0,0 +1,59 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import int_or_none + + +class TOnlineIE(InfoExtractor): + IE_NAME = 't-online.de' + _VALID_URL = r'https?://(?:www\.)?t-online\.de/tv/(?:[^/]+/)*id_(?P<id>\d+)' + _TEST = { + 'url': 'http://www.t-online.de/tv/sport/fussball/id_79166266/drittes-remis-zidane-es-muss-etwas-passieren-.html', + 'md5': '7d94dbdde5f9d77c5accc73c39632c29', + 'info_dict': { + 'id': '79166266', + 'ext': 'mp4', + 'title': 'Drittes Remis! Zidane: "Es muss etwas passieren"', + 'description': 'Es läuft nicht rund bei Real Madrid. Das 1:1 gegen den SD Eibar war das dritte Unentschieden in Folge in der Liga.', + } + } + + def _real_extract(self, url): + video_id = self._match_id(url) + video_data = self._download_json( + 'http://www.t-online.de/tv/id_%s/tid_json_video' % video_id, video_id) + title = video_data['subtitle'] + + formats = [] + for asset in video_data.get('assets', []): + asset_source = asset.get('source') or asset.get('source2') + if not asset_source: + continue + formats_id = [] + for field_key in ('type', 'profile'): + field_value = asset.get(field_key) + if field_value: + formats_id.append(field_value) + formats.append({ + 'format_id': '-'.join(formats_id), + 'url': asset_source, + }) + + thumbnails = [] + for image in video_data.get('images', []): + image_source = image.get('source') + if not image_source: + continue + thumbnails.append({ + 'url': image_source, + }) + + return { + 'id': video_id, + 'title': title, + 'description': video_data.get('description'), + 'duration': int_or_none(video_data.get('duration')), + 'thumbnails': thumbnails, + 'formats': formats, + } From 185744f92f172a5cd1db317fbf87fee733cfdfe6 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Tue, 4 Oct 2016 10:30:57 +0100 Subject: [PATCH 0288/1571] [lego] Add new extractor(closes #10369) --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/lego.py | 86 ++++++++++++++++++++++++++++++ 2 files changed, 87 insertions(+) create mode 100644 youtube_dl/extractor/lego.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index e73956923..feee06004 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -437,6 +437,7 @@ from .lcp import ( ) from .learnr import LearnrIE from .lecture2go import Lecture2GoIE +from .lego import LEGOIE from .lemonde import LemondeIE from .leeco import ( LeIE, diff --git a/youtube_dl/extractor/lego.py b/youtube_dl/extractor/lego.py new file mode 100644 index 000000000..5be7d622c --- /dev/null +++ b/youtube_dl/extractor/lego.py @@ -0,0 +1,86 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..compat import compat_str +from ..utils import ( + unescapeHTML, + int_or_none, +) + + +class LEGOIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?lego\.com/(?:[^/]+/)*videos/(?:[^/]+/)*[^/?#]+-(?P<id>[0-9a-f]+)' + _TEST = { + 'url': 'http://www.lego.com/en-us/videos/themes/club/blocumentary-kawaguchi-55492d823b1b4d5e985787fa8c2973b1', + 'md5': 'f34468f176cfd76488767fc162c405fa', + 'info_dict': { + 'id': '55492d823b1b4d5e985787fa8c2973b1', + 'ext': 'mp4', + 'title': 'Blocumentary Great Creations: Akiyuki Kawaguchi', + } + } + _BITRATES = [256, 512, 1024, 1536, 2560] + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage( + 'http://www.lego.com/en-US/mediaplayer/video/' + video_id, video_id) + title = self._search_regex(r'<title>(.+?)', webpage, 'title') + video_data = self._parse_json(unescapeHTML(self._search_regex( + r"video='([^']+)'", webpage, 'video data')), video_id) + progressive_base = self._search_regex( + r'data-video-progressive-url="([^"]+)"', + webpage, 'progressive base', default='https://lc-mediaplayerns-live-s.legocdn.com/') + streaming_base = self._search_regex( + r'data-video-streaming-url="([^"]+)"', + webpage, 'streaming base', default='http://legoprod-f.akamaihd.net/') + item_id = video_data['ItemId'] + + net_storage_path = video_data.get('NetStoragePath') or '/'.join([item_id[:2], item_id[2:4]]) + base_path = '_'.join([item_id, video_data['VideoId'], video_data['Locale'], compat_str(video_data['VideoVersion'])]) + path = '/'.join([net_storage_path, base_path]) + streaming_path = ','.join(map(lambda bitrate: compat_str(bitrate), self._BITRATES)) + + formats = self._extract_akamai_formats( + '%si/s/public/%s_,%s,.mp4.csmil/master.m3u8' % (streaming_base, path, streaming_path), video_id) + m3u8_formats = list(filter( + lambda f: f.get('protocol') == 'm3u8_native' and f.get('vcodec') != 'none' and f.get('resolution') != 'multiple', + formats)) + if len(m3u8_formats) == len(self._BITRATES): + self._sort_formats(m3u8_formats) + for bitrate, m3u8_format in zip(self._BITRATES, m3u8_formats): + progressive_base_url = '%spublic/%s_%d.' % (progressive_base, path, bitrate) + mp4_f = m3u8_format.copy() + mp4_f.update({ + 'url': progressive_base_url + 'mp4', + 'format_id': m3u8_format['format_id'].replace('hls', 'mp4'), + 'protocol': 'http', + }) + web_f = { + 'url': progressive_base_url + 'webm', + 'format_id': m3u8_format['format_id'].replace('hls', 'webm'), + 'width': m3u8_format['width'], + 'height': m3u8_format['height'], + 'tbr': m3u8_format.get('tbr'), + 'ext': 'webm', + } + formats.extend([web_f, mp4_f]) + else: + for bitrate in self._BITRATES: + for ext in ('web', 'mp4'): + formats.append({ + 'format_id': '%s-%s' % (ext, bitrate), + 'url': '%spublic/%s_%d.%s' % (progressive_base, path, bitrate, ext), + 'tbr': bitrate, + 'ext': ext, + }) + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': title, + 'thumbnail': video_data.get('CoverImageUrl'), + 'duration': int_or_none(video_data.get('Length')), + 'formats': formats, + } From 0a33bb2cb2ca401ffe88e520d7bbd7482d976cbc Mon Sep 17 00:00:00 2001 From: Steffan Donal Date: Tue, 4 Oct 2016 09:52:02 +0100 Subject: [PATCH 0289/1571] Rename "Steffan 'Ruirize' James" to "Steffan Donal" Legal name change! --- AUTHORS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/AUTHORS b/AUTHORS index 937742c5d..b6456052d 100644 --- a/AUTHORS +++ b/AUTHORS @@ -26,7 +26,7 @@ Albert Kim Pierre Rudloff Huarong Huo Ismael Mejía -Steffan 'Ruirize' James +Steffan Donal Andras Elso Jelle van der Waa Marcin Cieślak From b1d798887e5bc26c938fe8c07ae5ccf382568f58 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 5 Oct 2016 23:43:08 +0700 Subject: [PATCH 0290/1571] [npo] Add support for 2doc.nl (Closes #10842) --- youtube_dl/extractor/npo.py | 26 ++++++++++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/npo.py b/youtube_dl/extractor/npo.py index 9c7cc777b..c3915ec6e 100644 --- a/youtube_dl/extractor/npo.py +++ b/youtube_dl/extractor/npo.py @@ -459,8 +459,9 @@ class NPOPlaylistBaseIE(NPOIE): class VPROIE(NPOPlaylistBaseIE): IE_NAME = 'vpro' - _VALID_URL = r'https?://(?:www\.)?(?:tegenlicht\.)?vpro\.nl/(?:[^/]+/){2,}(?P[^/]+)\.html' - _PLAYLIST_TITLE_RE = r']+class=["\'].*?\bmedia-platform-title\b.*?["\'][^>]*>([^<]+)' + _VALID_URL = r'https?://(?:www\.)?(?:(?:tegenlicht\.)?vpro|2doc)\.nl/(?:[^/]+/)*(?P[^/]+)\.html' + _PLAYLIST_TITLE_RE = (r']+class=["\'].*?\bmedia-platform-title\b.*?["\'][^>]*>([^<]+)', + r']+class=["\'].*?\bmedia-platform-subtitle\b.*?["\'][^>]*>([^<]+)') _PLAYLIST_ENTRY_RE = r'data-media-id="([^"]+)"' _TESTS = [ @@ -492,6 +493,27 @@ class VPROIE(NPOPlaylistBaseIE): 'title': 'education education', }, 'playlist_count': 2, + }, + { + 'url': 'http://www.2doc.nl/documentaires/series/2doc/2015/oktober/de-tegenprestatie.html', + 'info_dict': { + 'id': 'de-tegenprestatie', + 'title': 'De Tegenprestatie', + }, + 'playlist_count': 2, + }, { + 'url': 'http://www.2doc.nl/speel~VARA_101375237~mh17-het-verdriet-van-nederland~.html', + 'info_dict': { + 'id': 'VARA_101375237', + 'ext': 'm4v', + 'title': 'MH17: Het verdriet van Nederland', + 'description': 'md5:09e1a37c1fdb144621e22479691a9f18', + 'upload_date': '20150716', + }, + 'params': { + # Skip because of m3u8 download + 'skip_download': True + }, } ] From 017eb829343dfff9b70ab7f2278053f35cee953c Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Wed, 5 Oct 2016 18:27:02 +0100 Subject: [PATCH 0291/1571] [npo] detect geo restriction --- youtube_dl/extractor/npo.py | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/npo.py b/youtube_dl/extractor/npo.py index c3915ec6e..c91f58461 100644 --- a/youtube_dl/extractor/npo.py +++ b/youtube_dl/extractor/npo.py @@ -3,6 +3,7 @@ from __future__ import unicode_literals import re from .common import InfoExtractor +from ..compat import compat_HTTPError from ..utils import ( fix_xml_ampersands, orderedSet, @@ -10,6 +11,7 @@ from ..utils import ( qualities, strip_jsonp, unified_strdate, + ExtractorError, ) @@ -181,9 +183,16 @@ class NPOIE(NPOBaseIE): continue streams = format_info.get('streams') if streams: - video_info = self._download_json( - streams[0] + '&type=json', - video_id, 'Downloading %s stream JSON' % format_id) + try: + video_info = self._download_json( + streams[0] + '&type=json', + video_id, 'Downloading %s stream JSON' % format_id) + except ExtractorError as ee: + if isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 404: + error = (self._parse_json(ee.cause.read().decode(), video_id, fatal=False) or {}).get('errorstring') + if error: + raise ExtractorError(error, expected=True) + raise else: video_info = format_info video_url = video_info.get('url') From 33898fb19c1af161c503ebce8f9a4774fecee45e Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Thu, 6 Oct 2016 10:45:57 +0100 Subject: [PATCH 0292/1571] [nzz] Add new extractor(#4407) --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/nzz.py | 36 ++++++++++++++++++++++++++++++ 2 files changed, 37 insertions(+) create mode 100644 youtube_dl/extractor/nzz.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index feee06004..72bc4f57c 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -638,6 +638,7 @@ from .nytimes import ( NYTimesArticleIE, ) from .nuvid import NuvidIE +from .nzz import NZZIE from .odatv import OdaTVIE from .odnoklassniki import OdnoklassnikiIE from .oktoberfesttv import OktoberfestTVIE diff --git a/youtube_dl/extractor/nzz.py b/youtube_dl/extractor/nzz.py new file mode 100644 index 000000000..2d352f53f --- /dev/null +++ b/youtube_dl/extractor/nzz.py @@ -0,0 +1,36 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + extract_attributes, +) + + +class NZZIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?nzz\.ch/(?:[^/]+/)*[^/?#]+-ld\.(?P\d+)' + _TEST = { + 'url': 'http://www.nzz.ch/zuerich/gymizyte/gymizyte-schreiben-schueler-heute-noch-diktate-ld.9153', + 'info_dict': { + 'id': '9153', + }, + 'playlist_mincount': 6, + } + + def _real_extract(self, url): + page_id = self._match_id(url) + webpage = self._download_webpage(url, page_id) + + entries = [] + for player_element in re.findall(r'(<[^>]+class="kalturaPlayer"[^>]*>)', webpage): + player_params = extract_attributes(player_element) + if player_params.get('data-type') not in ('kaltura_singleArticle',): + self.report_warning('Unsupported player type') + continue + entry_id = player_params['data-id'] + entries.append(self.url_result( + 'kaltura:1750922:' + entry_id, 'Kaltura', entry_id)) + + return self.playlist_result(entries, page_id) From 09b9c45e242cb9e85beaa98b4783ec02065f1ec6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 6 Oct 2016 23:22:52 +0700 Subject: [PATCH 0293/1571] [generic] Add support for multiple vimeo embeds (Closes #10862) --- youtube_dl/extractor/generic.py | 6 +++--- youtube_dl/extractor/vimeo.py | 36 ++++++++++++++++++--------------- youtube_dl/extractor/vk.py | 2 +- 3 files changed, 24 insertions(+), 20 deletions(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 9ea306e3a..8ef8fb5f4 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -1754,9 +1754,9 @@ class GenericIE(InfoExtractor): if matches: return _playlist_from_matches(matches, ie='RtlNl') - vimeo_url = VimeoIE._extract_vimeo_url(url, webpage) - if vimeo_url is not None: - return self.url_result(vimeo_url) + vimeo_urls = VimeoIE._extract_urls(url, webpage) + if vimeo_urls: + return _playlist_from_matches(vimeo_urls, ie=VimeoIE.ie_key()) vid_me_embed_url = self._search_regex( r'src=[\'"](https?://vid\.me/[^\'"]+)[\'"]', diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py index 309a47bf0..ea8fc5908 100644 --- a/youtube_dl/extractor/vimeo.py +++ b/youtube_dl/extractor/vimeo.py @@ -355,23 +355,27 @@ class VimeoIE(VimeoBaseInfoExtractor): return smuggle_url(url, {'http_headers': {'Referer': referrer_url}}) @staticmethod - def _extract_vimeo_url(url, webpage): + def _extract_urls(url, webpage): + urls = [] # Look for embedded (iframe) Vimeo player - mobj = re.search( - r']+?src=(["\'])(?P(?:https?:)?//player\.vimeo\.com/video/.+?)\1', webpage) - if mobj: - player_url = unescapeHTML(mobj.group('url')) - return VimeoIE._smuggle_referrer(player_url, url) - # Look for embedded (swf embed) Vimeo player - mobj = re.search( - r']+?src="((?:https?:)?//(?:www\.)?vimeo\.com/moogaloop\.swf.+?)"', webpage) - if mobj: - return mobj.group(1) - # Look more for non-standard embedded Vimeo player - mobj = re.search( - r']+src=(?P[\'"])(?P(?:https?:)?//(?:www\.)?vimeo\.com/[0-9]+)(?P=q1)', webpage) - if mobj: - return mobj.group('url') + for mobj in re.finditer( + r']+?src=(["\'])(?P(?:https?:)?//player\.vimeo\.com/video/.+?)\1', webpage): + urls.append(VimeoIE._smuggle_referrer(unescapeHTML(mobj.group('url')), url)) + PLAIN_EMBED_RE = ( + # Look for embedded (swf embed) Vimeo player + r']+?src=(["\'])(?P(?:https?:)?//(?:www\.)?vimeo\.com/moogaloop\.swf.+?)\1', + # Look more for non-standard embedded Vimeo player + r']+src=(["\'])(?P(?:https?:)?//(?:www\.)?vimeo\.com/[0-9]+)\1', + ) + for embed_re in PLAIN_EMBED_RE: + for mobj in re.finditer(embed_re, webpage): + urls.append(mobj.group('url')) + return urls + + @staticmethod + def _extract_url(url, webpage): + urls = VimeoIE._extract_urls(url, webpage) + return urls[0] if urls else None def _verify_player_video_password(self, url, video_id): password = self._downloader.params.get('videopassword') diff --git a/youtube_dl/extractor/vk.py b/youtube_dl/extractor/vk.py index ac77bc623..df43ba867 100644 --- a/youtube_dl/extractor/vk.py +++ b/youtube_dl/extractor/vk.py @@ -341,7 +341,7 @@ class VKIE(VKBaseIE): if youtube_url: return self.url_result(youtube_url, 'Youtube') - vimeo_url = VimeoIE._extract_vimeo_url(url, info_page) + vimeo_url = VimeoIE._extract_url(url, info_page) if vimeo_url is not None: return self.url_result(vimeo_url) From 831a34caa2112a9b2d867e05f8a4debf965e8389 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Fri, 7 Oct 2016 03:28:41 +0800 Subject: [PATCH 0294/1571] [Makefilea] Fix for GNU make < 4 Closes #9387 The shell assignment operator != was introduced in GNU make 4.0, or specifically the commit in [1]. This fix removes such usages and fallback to a more portable syntax. Tested with: * GNU make 3.82 on CentOS 7.2 * bmake 20150910 on CentOS 7.2, source RPM from Fedora 24 [2] * GNU make 4.2.1 on Arch Linux (Arch official package) * bmake 20160926 on Arch Linux (Arch official package) * GNU make 3.82 on Arch Linux (Compiled from source) * Apple bsdmake-24 on macOS Sierra, binary package from Homebrew Thanks @bdeyal for the feedback of the first tests [1] http://git.savannah.gnu.org/cgit/make.git/commit/?id=b34438bee83ee906a23b881f257e684a0993b9b1 [2] http://koji.fedoraproject.org/koji/buildinfo?buildID=716769 --- ChangeLog | 6 ++++++ Makefile | 4 ++-- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/ChangeLog b/ChangeLog index 4f64edabb..be1cf90fb 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +version + +Core +* Support for GNU make < 4 is fixed (#9387) + + version 2016.10.02 Core diff --git a/Makefile b/Makefile index a2763a664..7393e3e1e 100644 --- a/Makefile +++ b/Makefile @@ -12,7 +12,7 @@ SHAREDIR ?= $(PREFIX)/share PYTHON ?= /usr/bin/env python # set SYSCONFDIR to /etc if PREFIX=/usr or PREFIX=/usr/local -SYSCONFDIR != if [ $(PREFIX) = /usr -o $(PREFIX) = /usr/local ]; then echo /etc; else echo $(PREFIX)/etc; fi +SYSCONFDIR = $$(if [ $(PREFIX) = /usr -o $(PREFIX) = /usr/local ]; then echo /etc; else echo $(PREFIX)/etc; fi) install: youtube-dl youtube-dl.1 youtube-dl.bash-completion youtube-dl.zsh youtube-dl.fish install -d $(DESTDIR)$(BINDIR) @@ -90,7 +90,7 @@ fish-completion: youtube-dl.fish lazy-extractors: youtube_dl/extractor/lazy_extractors.py -_EXTRACTOR_FILES != find youtube_dl/extractor -iname '*.py' -and -not -iname 'lazy_extractors.py' +_EXTRACTOR_FILES = $$(find youtube_dl/extractor -iname '*.py' -and -not -iname 'lazy_extractors.py') youtube_dl/extractor/lazy_extractors.py: devscripts/make_lazy_extractors.py devscripts/lazy_load_template.py $(_EXTRACTOR_FILES) $(PYTHON) devscripts/make_lazy_extractors.py $@ From c0a7b9b348bb580d32fc94ee90c1b3b02b668a9e Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Fri, 7 Oct 2016 16:02:53 +0800 Subject: [PATCH 0295/1571] Revert "[Makefilea] Fix for GNU make < 4" This reverts commit 831a34caa2112a9b2d867e05f8a4debf965e8389. The reverted commit breaks lazy extractors. --- ChangeLog | 6 ------ Makefile | 4 ++-- 2 files changed, 2 insertions(+), 8 deletions(-) diff --git a/ChangeLog b/ChangeLog index be1cf90fb..4f64edabb 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,9 +1,3 @@ -version - -Core -* Support for GNU make < 4 is fixed (#9387) - - version 2016.10.02 Core diff --git a/Makefile b/Makefile index 7393e3e1e..a2763a664 100644 --- a/Makefile +++ b/Makefile @@ -12,7 +12,7 @@ SHAREDIR ?= $(PREFIX)/share PYTHON ?= /usr/bin/env python # set SYSCONFDIR to /etc if PREFIX=/usr or PREFIX=/usr/local -SYSCONFDIR = $$(if [ $(PREFIX) = /usr -o $(PREFIX) = /usr/local ]; then echo /etc; else echo $(PREFIX)/etc; fi) +SYSCONFDIR != if [ $(PREFIX) = /usr -o $(PREFIX) = /usr/local ]; then echo /etc; else echo $(PREFIX)/etc; fi install: youtube-dl youtube-dl.1 youtube-dl.bash-completion youtube-dl.zsh youtube-dl.fish install -d $(DESTDIR)$(BINDIR) @@ -90,7 +90,7 @@ fish-completion: youtube-dl.fish lazy-extractors: youtube_dl/extractor/lazy_extractors.py -_EXTRACTOR_FILES = $$(find youtube_dl/extractor -iname '*.py' -and -not -iname 'lazy_extractors.py') +_EXTRACTOR_FILES != find youtube_dl/extractor -iname '*.py' -and -not -iname 'lazy_extractors.py' youtube_dl/extractor/lazy_extractors.py: devscripts/make_lazy_extractors.py devscripts/lazy_load_template.py $(_EXTRACTOR_FILES) $(PYTHON) devscripts/make_lazy_extractors.py $@ From 3d83a1ae924902a0421bea8e2e6cd57bb34ee299 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Fri, 7 Oct 2016 17:50:45 +0800 Subject: [PATCH 0296/1571] [generic] Support direct MMS links (closes #10838) --- ChangeLog | 6 ++++++ youtube_dl/extractor/generic.py | 19 +++++++++++++++++++ 2 files changed, 25 insertions(+) diff --git a/ChangeLog b/ChangeLog index 4f64edabb..55e60758d 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +version + +Extractors ++ [generic] Support direct MMS links (#10838) + + version 2016.10.02 Core diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 8ef8fb5f4..1f18cbfe9 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -1412,6 +1412,18 @@ class GenericIE(InfoExtractor): }, 'playlist_mincount': 3, }, + { + # Direct MMS link + 'url': 'mms://kentro.kaist.ac.kr/200907/MilesReid(0709).wmv', + 'info_dict': { + 'id': 'MilesReid(0709)', + 'ext': 'wmv', + 'title': 'MilesReid(0709)', + }, + 'params': { + 'skip_download': True, # rtsp downloads, requiring mplayer or mpv + }, + }, # { # # TODO: find another test # # http://schema.org/VideoObject @@ -1551,6 +1563,13 @@ class GenericIE(InfoExtractor): else: video_id = compat_urllib_parse_unquote(os.path.splitext(url.rstrip('/').split('/')[-1])[0]) + if parsed_url.scheme == 'mms': + return { + 'id': video_id, + 'title': video_id, + 'url': url, + } + self.to_screen('%s: Requesting header' % video_id) head_req = HEADRequest(url) From 98763ee354ffc13a57f28dbd006729affacb6d30 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Fri, 7 Oct 2016 19:20:53 +0800 Subject: [PATCH 0297/1571] [extractor/common] Add id and title helpers for generic IEs --- youtube_dl/extractor/common.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 1076b46da..da192728f 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -21,6 +21,7 @@ from ..compat import ( compat_os_name, compat_str, compat_urllib_error, + compat_urllib_parse_unquote, compat_urllib_parse_urlencode, compat_urllib_request, compat_urlparse, @@ -2020,6 +2021,12 @@ class InfoExtractor(object): headers['Ytdl-request-proxy'] = geo_verification_proxy return headers + def _generic_id(self, url): + return compat_urllib_parse_unquote(os.path.splitext(url.rstrip('/').split('/')[-1])[0]) + + def _generic_title(self, url): + return compat_urllib_parse_unquote(os.path.splitext(url_basename(url))[0]) + class SearchInfoExtractor(InfoExtractor): """ From 9dcd6fd3aae77571116ee8b823b6b9224d0ef2ad Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Fri, 7 Oct 2016 19:22:30 +0800 Subject: [PATCH 0298/1571] [generic,commonprotocols] Move mms suuport from GenericIE And use _generic_* helpers in those extractors --- ChangeLog | 2 +- youtube_dl/extractor/commonprotocols.py | 36 ++++++++++++++++++++----- youtube_dl/extractor/generic.py | 24 ++--------------- 3 files changed, 33 insertions(+), 29 deletions(-) diff --git a/ChangeLog b/ChangeLog index 55e60758d..3aa4d67f5 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,7 +1,7 @@ version Extractors -+ [generic] Support direct MMS links (#10838) ++ [commonprotocols] Support direct MMS links (#10838) version 2016.10.02 diff --git a/youtube_dl/extractor/commonprotocols.py b/youtube_dl/extractor/commonprotocols.py index 5d130a170..d98331a4e 100644 --- a/youtube_dl/extractor/commonprotocols.py +++ b/youtube_dl/extractor/commonprotocols.py @@ -1,13 +1,9 @@ from __future__ import unicode_literals -import os - from .common import InfoExtractor from ..compat import ( - compat_urllib_parse_unquote, compat_urlparse, ) -from ..utils import url_basename class RtmpIE(InfoExtractor): @@ -23,8 +19,8 @@ class RtmpIE(InfoExtractor): }] def _real_extract(self, url): - video_id = compat_urllib_parse_unquote(os.path.splitext(url.rstrip('/').split('/')[-1])[0]) - title = compat_urllib_parse_unquote(os.path.splitext(url_basename(url))[0]) + video_id = self._generic_id(url) + title = self._generic_title(url) return { 'id': video_id, 'title': title, @@ -34,3 +30,31 @@ class RtmpIE(InfoExtractor): 'format_id': compat_urlparse.urlparse(url).scheme, }], } + + +class MmsIE(InfoExtractor): + IE_DESC = False # Do not list + _VALID_URL = r'(?i)mms://.+' + + _TEST = { + # Direct MMS link + 'url': 'mms://kentro.kaist.ac.kr/200907/MilesReid(0709).wmv', + 'info_dict': { + 'id': 'MilesReid(0709)', + 'ext': 'wmv', + 'title': 'MilesReid(0709)', + }, + 'params': { + 'skip_download': True, # rtsp downloads, requiring mplayer or mpv + }, + } + + def _real_extract(self, url): + video_id = self._generic_id(url) + title = self._generic_title(url) + + return { + 'id': video_id, + 'title': title, + 'url': url, + } diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 1f18cbfe9..7b8a9cf9a 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -27,7 +27,6 @@ from ..utils import ( unified_strdate, unsmuggle_url, UnsupportedError, - url_basename, xpath_text, ) from .brightcove import ( @@ -1412,18 +1411,6 @@ class GenericIE(InfoExtractor): }, 'playlist_mincount': 3, }, - { - # Direct MMS link - 'url': 'mms://kentro.kaist.ac.kr/200907/MilesReid(0709).wmv', - 'info_dict': { - 'id': 'MilesReid(0709)', - 'ext': 'wmv', - 'title': 'MilesReid(0709)', - }, - 'params': { - 'skip_download': True, # rtsp downloads, requiring mplayer or mpv - }, - }, # { # # TODO: find another test # # http://schema.org/VideoObject @@ -1561,14 +1548,7 @@ class GenericIE(InfoExtractor): force_videoid = smuggled_data['force_videoid'] video_id = force_videoid else: - video_id = compat_urllib_parse_unquote(os.path.splitext(url.rstrip('/').split('/')[-1])[0]) - - if parsed_url.scheme == 'mms': - return { - 'id': video_id, - 'title': video_id, - 'url': url, - } + video_id = self._generic_id(url) self.to_screen('%s: Requesting header' % video_id) @@ -1597,7 +1577,7 @@ class GenericIE(InfoExtractor): info_dict = { 'id': video_id, - 'title': compat_urllib_parse_unquote(os.path.splitext(url_basename(url))[0]), + 'title': self._generic_title(url), 'upload_date': unified_strdate(head_response.headers.get('Last-Modified')) } From 85bcdd081ce0009bcb7135d8d68192d34969e168 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Fri, 7 Oct 2016 19:31:26 +0800 Subject: [PATCH 0299/1571] [extractors] Add MmsIE --- youtube_dl/extractor/extractors.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 72bc4f57c..5c1d2abfb 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -186,7 +186,10 @@ from .comedycentral import ( ) from .comcarcoff import ComCarCoffIE from .commonmistakes import CommonMistakesIE, UnicodeBOMIE -from .commonprotocols import RtmpIE +from .commonprotocols import ( + MmsIE, + RtmpIE, +) from .condenast import CondeNastIE from .cracked import CrackedIE from .crackle import CrackleIE From 38588ab9770813cb92013b870edc15def4f9ac1c Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Fri, 7 Oct 2016 20:04:49 +0800 Subject: [PATCH 0300/1571] [facebook] Fix for new handleServerJS syntax (closes #10846) According to the dump file in #10846, handleServerJS() now accepts an optional second argument. It's a string from available dump files. --- ChangeLog | 1 + youtube_dl/extractor/facebook.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/ChangeLog b/ChangeLog index 3aa4d67f5..7aa0787ca 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,6 +1,7 @@ version Extractors +* [facebook] Fix video extraction (#10846) + [commonprotocols] Support direct MMS links (#10838) diff --git a/youtube_dl/extractor/facebook.py b/youtube_dl/extractor/facebook.py index 3a220e995..801573459 100644 --- a/youtube_dl/extractor/facebook.py +++ b/youtube_dl/extractor/facebook.py @@ -258,7 +258,7 @@ class FacebookIE(InfoExtractor): if not video_data: server_js_data = self._parse_json(self._search_regex( - r'handleServerJS\(({.+})\);', webpage, 'server js data', default='{}'), video_id) + r'handleServerJS\(({.+})(?:\);|,")', webpage, 'server js data', default='{}'), video_id) for item in server_js_data.get('instances', []): if item[1][0] == 'VideoConfig': video_data = video_data_list2dict(item[2][0]['videoData']) From 3c6b3bf2217e91c0d01ca65fa2b013ffa132fdbc Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Fri, 7 Oct 2016 15:53:03 +0100 Subject: [PATCH 0301/1571] [iprima] detect geo restriction --- youtube_dl/extractor/iprima.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/youtube_dl/extractor/iprima.py b/youtube_dl/extractor/iprima.py index 788bbe0d5..da2cdc656 100644 --- a/youtube_dl/extractor/iprima.py +++ b/youtube_dl/extractor/iprima.py @@ -81,6 +81,9 @@ class IPrimaIE(InfoExtractor): for _, src in re.findall(r'src["\']\s*:\s*(["\'])(.+?)\1', playerpage): extract_formats(src) + if not formats and '>GEO_IP_NOT_ALLOWED<' in playerpage: + self.raise_geo_restricted() + self._sort_formats(formats) return { From f475e8812197027ba7770a421e7fc7094ee8ae0d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 7 Oct 2016 22:15:26 +0700 Subject: [PATCH 0302/1571] [vimeo] PEP 8 [ci skip] --- youtube_dl/extractor/vimeo.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py index ea8fc5908..a46c5c282 100644 --- a/youtube_dl/extractor/vimeo.py +++ b/youtube_dl/extractor/vimeo.py @@ -359,7 +359,8 @@ class VimeoIE(VimeoBaseInfoExtractor): urls = [] # Look for embedded (iframe) Vimeo player for mobj in re.finditer( - r']+?src=(["\'])(?P(?:https?:)?//player\.vimeo\.com/video/.+?)\1', webpage): + r']+?src=(["\'])(?P(?:https?:)?//player\.vimeo\.com/video/.+?)\1', + webpage): urls.append(VimeoIE._smuggle_referrer(unescapeHTML(mobj.group('url')), url)) PLAIN_EMBED_RE = ( # Look for embedded (swf embed) Vimeo player From 888f8d6ba40f17d8f8a13ca6259e0312d9befc87 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 7 Oct 2016 22:23:16 +0700 Subject: [PATCH 0303/1571] [ChangeLog] Actualize --- ChangeLog | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/ChangeLog b/ChangeLog index 7aa0787ca..fb248f9e9 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,8 +1,21 @@ version Extractors ++ [iprima] Detect geo restriction * [facebook] Fix video extraction (#10846) + [commonprotocols] Support direct MMS links (#10838) ++ [generic] Add support for multiple vimeo embeds (#10862) ++ [nzz] Add support for nzz.ch (#4407) ++ [npo] Detect geo restriction ++ [npo] Add support for 2doc.nl (#10842) ++ [lego] Add support for lego.com (#10369) ++ [tonline] Add support for t-online.de (#10376) +* [techtalks] Relax URL regular expression (#10840) +* [youtube:live] Extend URL regular expression (#10839) ++ [theweatherchannel] Add support for weather.com (#7188) ++ [thisoldhouse] Add support for thisoldhouse.com (#10837) ++ [nhl] Add support for wch2016.com (#10833) +* [pornoxo] Use JWPlatform to improve metadata extraction version 2016.10.02 From dd4291f72984037286dfd1800fdc07204b0b621a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 7 Oct 2016 22:25:30 +0700 Subject: [PATCH 0304/1571] release 2016.10.07 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- docs/supportedsites.md | 5 +++++ youtube_dl/version.py | 2 +- 4 files changed, 10 insertions(+), 5 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index e813e4c59..15a93776b 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.10.02*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.10.02** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.10.07*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.10.07** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2016.10.02 +[debug] youtube-dl version 2016.10.07 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index fb248f9e9..7e9b2b873 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version +version 2016.10.07 Extractors + [iprima] Detect geo restriction diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 828ed0ba9..5bbef0c41 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -364,6 +364,7 @@ - **Le**: 乐视网 - **Learnr** - **Lecture2Go** + - **LEGO** - **Lemonde** - **LePlaylist** - **LetvCloud**: 乐视云 @@ -507,6 +508,7 @@ - **Nuvid** - **NYTimes** - **NYTimesArticle** + - **NZZ** - **ocw.mit.edu** - **OdaTV** - **Odnoklassniki** @@ -692,6 +694,7 @@ - **SWRMediathek** - **Syfy** - **SztvHu** + - **t-online.de** - **Tagesschau** - **tagesschau:player** - **Tass** @@ -721,8 +724,10 @@ - **TheScene** - **TheSixtyOne** - **TheStar** + - **TheWeatherChannel** - **ThisAmericanLife** - **ThisAV** + - **ThisOldHouse** - **tinypic**: tinypic.com videos - **tlc.de** - **TMZ** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 161ba4391..ac0921b7a 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2016.10.02' +__version__ = '2016.10.07' From 1dd58e14d846a64a3c014531b1dc7a377648c73b Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Sat, 8 Oct 2016 08:33:02 +0100 Subject: [PATCH 0305/1571] [lego] improve info extraction and bypass geo restriction(closes #10872) --- youtube_dl/extractor/lego.py | 88 ++++++++++++++++++++++++++---------- 1 file changed, 65 insertions(+), 23 deletions(-) diff --git a/youtube_dl/extractor/lego.py b/youtube_dl/extractor/lego.py index 5be7d622c..d3bca6435 100644 --- a/youtube_dl/extractor/lego.py +++ b/youtube_dl/extractor/lego.py @@ -1,45 +1,86 @@ # coding: utf-8 from __future__ import unicode_literals +import re + from .common import InfoExtractor from ..compat import compat_str from ..utils import ( unescapeHTML, - int_or_none, + parse_duration, + get_element_by_class, ) class LEGOIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?lego\.com/(?:[^/]+/)*videos/(?:[^/]+/)*[^/?#]+-(?P[0-9a-f]+)' - _TEST = { + _VALID_URL = r'https?://(?:www\.)?lego\.com/(?P[^/]+)/(?:[^/]+/)*videos/(?:[^/]+/)*[^/?#]+-(?P[0-9a-f]+)' + _TESTS = [{ 'url': 'http://www.lego.com/en-us/videos/themes/club/blocumentary-kawaguchi-55492d823b1b4d5e985787fa8c2973b1', 'md5': 'f34468f176cfd76488767fc162c405fa', 'info_dict': { 'id': '55492d823b1b4d5e985787fa8c2973b1', 'ext': 'mp4', 'title': 'Blocumentary Great Creations: Akiyuki Kawaguchi', - } - } + 'description': 'Blocumentary Great Creations: Akiyuki Kawaguchi', + }, + }, { + # geo-restricted but the contentUrl contain a valid url + 'url': 'http://www.lego.com/nl-nl/videos/themes/nexoknights/episode-20-kingdom-of-heroes-13bdc2299ab24d9685701a915b3d71e7##sp=399', + 'md5': '4c3fec48a12e40c6e5995abc3d36cc2e', + 'info_dict': { + 'id': '13bdc2299ab24d9685701a915b3d71e7', + 'ext': 'mp4', + 'title': 'Aflevering 20 - Helden van het koninkrijk', + 'description': 'md5:8ee499aac26d7fa8bcb0cedb7f9c3941', + }, + }, { + # special characters in title + 'url': 'http://www.lego.com/en-us/starwars/videos/lego-star-wars-force-surprise-9685ee9d12e84ff38e84b4e3d0db533d', + 'info_dict': { + 'id': '9685ee9d12e84ff38e84b4e3d0db533d', + 'ext': 'mp4', + 'title': 'Force Surprise – LEGO® Star Wars™ Microfighters', + 'description': 'md5:9c673c96ce6f6271b88563fe9dc56de3', + }, + 'params': { + 'skip_download': True, + }, + }] _BITRATES = [256, 512, 1024, 1536, 2560] def _real_extract(self, url): - video_id = self._match_id(url) - webpage = self._download_webpage( - 'http://www.lego.com/en-US/mediaplayer/video/' + video_id, video_id) - title = self._search_regex(r'(.+?)', webpage, 'title') - video_data = self._parse_json(unescapeHTML(self._search_regex( - r"video='([^']+)'", webpage, 'video data')), video_id) - progressive_base = self._search_regex( - r'data-video-progressive-url="([^"]+)"', - webpage, 'progressive base', default='https://lc-mediaplayerns-live-s.legocdn.com/') - streaming_base = self._search_regex( - r'data-video-streaming-url="([^"]+)"', - webpage, 'streaming base', default='http://legoprod-f.akamaihd.net/') - item_id = video_data['ItemId'] + locale, video_id = re.match(self._VALID_URL, url).groups() + webpage = self._download_webpage(url, video_id) + title = get_element_by_class('video-header', webpage).strip() + progressive_base = 'https://lc-mediaplayerns-live-s.legocdn.com/' + streaming_base = 'http://legoprod-f.akamaihd.net/' + content_url = self._html_search_meta('contentUrl', webpage) + path = self._search_regex( + r'(?:https?:)?//[^/]+/(?:[iz]/s/)?public/(.+)_[0-9,]+\.(?:mp4|webm)', + content_url, 'video path', default=None) + if not path: + player_url = self._proto_relative_url(self._search_regex( + r']+src="((?:https?)?//(?:www\.)?lego\.com/[^/]+/mediaplayer/video/[^"]+)', + webpage, 'player url', default=None)) + if not player_url: + base_url = self._proto_relative_url(self._search_regex( + r'data-baseurl="([^"]+)"', webpage, 'base url', + default='http://www.lego.com/%s/mediaplayer/video/' % locale)) + player_url = base_url + video_id + player_webpage = self._download_webpage(player_url, video_id) + video_data = self._parse_json(unescapeHTML(self._search_regex( + r"video='([^']+)'", player_webpage, 'video data')), video_id) + progressive_base = self._search_regex( + r'data-video-progressive-url="([^"]+)"', + player_webpage, 'progressive base', default='https://lc-mediaplayerns-live-s.legocdn.com/') + streaming_base = self._search_regex( + r'data-video-streaming-url="([^"]+)"', + player_webpage, 'streaming base', default='http://legoprod-f.akamaihd.net/') + item_id = video_data['ItemId'] - net_storage_path = video_data.get('NetStoragePath') or '/'.join([item_id[:2], item_id[2:4]]) - base_path = '_'.join([item_id, video_data['VideoId'], video_data['Locale'], compat_str(video_data['VideoVersion'])]) - path = '/'.join([net_storage_path, base_path]) + net_storage_path = video_data.get('NetStoragePath') or '/'.join([item_id[:2], item_id[2:4]]) + base_path = '_'.join([item_id, video_data['VideoId'], video_data['Locale'], compat_str(video_data['VideoVersion'])]) + path = '/'.join([net_storage_path, base_path]) streaming_path = ','.join(map(lambda bitrate: compat_str(bitrate), self._BITRATES)) formats = self._extract_akamai_formats( @@ -80,7 +121,8 @@ class LEGOIE(InfoExtractor): return { 'id': video_id, 'title': title, - 'thumbnail': video_data.get('CoverImageUrl'), - 'duration': int_or_none(video_data.get('Length')), + 'description': self._html_search_meta('description', webpage), + 'thumbnail': self._html_search_meta('thumbnail', webpage), + 'duration': parse_duration(self._html_search_meta('duration', webpage)), 'formats': formats, } From 3adb9d119e049d2bbc92fe2b56f1a22f4a664892 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?D=C3=A9stin=20Reed?= Date: Fri, 30 Sep 2016 19:54:12 +0200 Subject: [PATCH 0306/1571] [reverbnation] Modernize --- youtube_dl/extractor/reverbnation.py | 27 +++++++++++++++++---------- 1 file changed, 17 insertions(+), 10 deletions(-) diff --git a/youtube_dl/extractor/reverbnation.py b/youtube_dl/extractor/reverbnation.py index 3c6725aeb..52f18e231 100644 --- a/youtube_dl/extractor/reverbnation.py +++ b/youtube_dl/extractor/reverbnation.py @@ -1,7 +1,5 @@ from __future__ import unicode_literals -import re - from .common import InfoExtractor from ..utils import str_or_none @@ -10,20 +8,19 @@ class ReverbNationIE(InfoExtractor): _VALID_URL = r'^https?://(?:www\.)?reverbnation\.com/.*?/song/(?P\d+).*?$' _TESTS = [{ 'url': 'http://www.reverbnation.com/alkilados/song/16965047-mona-lisa', - 'md5': '3da12ebca28c67c111a7f8b262d3f7a7', + 'md5': 'c0aaf339bcee189495fdf5a8c8ba8645', 'info_dict': { 'id': '16965047', 'ext': 'mp3', 'title': 'MONA LISA', 'uploader': 'ALKILADOS', 'uploader_id': '216429', - 'thumbnail': 're:^https://gp1\.wac\.edgecastcdn\.net/.*?\.jpg$' + 'thumbnail': 're:^https?://.*\.jpg', }, }] def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - song_id = mobj.group('id') + song_id = self._match_id(url) api_res = self._download_json( 'https://api.reverbnation.com/song/%s' % song_id, @@ -31,14 +28,24 @@ class ReverbNationIE(InfoExtractor): note='Downloading information of song %s' % song_id ) + thumbnails = [] + if api_res.get('image'): + thumbnails.append({ + 'url': api_res.get('image'), + }) + if api_res.get('thumbnail'): + thumbnails.append({ + 'url': api_res.get('thumbnail'), + 'preference': -2, + }) + return { 'id': song_id, - 'title': api_res.get('name'), - 'url': api_res.get('url'), + 'title': api_res['name'], + 'url': api_res['url'], 'uploader': api_res.get('artist', {}).get('name'), 'uploader_id': str_or_none(api_res.get('artist', {}).get('id')), - 'thumbnail': self._proto_relative_url( - api_res.get('image', api_res.get('thumbnail'))), + 'thumbnails': thumbnails, 'ext': 'mp3', 'vcodec': 'none', } From f68901e50a9286aa4d82348cac0e85e26359c81c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 9 Oct 2016 01:02:35 +0700 Subject: [PATCH 0307/1571] [reverbnation] Eliminate code duplication in thumbnails extraction --- youtube_dl/extractor/reverbnation.py | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/youtube_dl/extractor/reverbnation.py b/youtube_dl/extractor/reverbnation.py index 52f18e231..4875009e5 100644 --- a/youtube_dl/extractor/reverbnation.py +++ b/youtube_dl/extractor/reverbnation.py @@ -1,7 +1,10 @@ from __future__ import unicode_literals from .common import InfoExtractor -from ..utils import str_or_none +from ..utils import ( + qualities, + str_or_none, +) class ReverbNationIE(InfoExtractor): @@ -28,16 +31,15 @@ class ReverbNationIE(InfoExtractor): note='Downloading information of song %s' % song_id ) + THUMBNAILS = ('thumbnail', 'image') + quality = qualities(THUMBNAILS) thumbnails = [] - if api_res.get('image'): - thumbnails.append({ - 'url': api_res.get('image'), - }) - if api_res.get('thumbnail'): - thumbnails.append({ - 'url': api_res.get('thumbnail'), - 'preference': -2, - }) + for thumb_key in THUMBNAILS: + if api_res.get(thumb_key): + thumbnails.append({ + 'url': api_res[thumb_key], + 'preference': quality(thumb_key) + }) return { 'id': song_id, From 2b51dac1f9750f6eb4988f3c23b0e8f618136b6b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?D=C3=A9stin=20Reed?= Date: Sat, 1 Oct 2016 13:57:18 +0200 Subject: [PATCH 0308/1571] [slutload] Fix test and simplify --- youtube_dl/extractor/slutload.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/slutload.py b/youtube_dl/extractor/slutload.py index 7efb29f65..18cc7721e 100644 --- a/youtube_dl/extractor/slutload.py +++ b/youtube_dl/extractor/slutload.py @@ -1,7 +1,5 @@ from __future__ import unicode_literals -import re - from .common import InfoExtractor @@ -9,7 +7,7 @@ class SlutloadIE(InfoExtractor): _VALID_URL = r'^https?://(?:\w+\.)?slutload\.com/video/[^/]+/(?P[^/]+)/?$' _TEST = { 'url': 'http://www.slutload.com/video/virginie-baisee-en-cam/TD73btpBqSxc/', - 'md5': '0cf531ae8006b530bd9df947a6a0df77', + 'md5': '868309628ba00fd488cf516a113fd717', 'info_dict': { 'id': 'TD73btpBqSxc', 'ext': 'mp4', @@ -20,9 +18,7 @@ class SlutloadIE(InfoExtractor): } def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') - + video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) video_title = self._html_search_regex(r'

([^<]+)', From 8204c733523675d505a8c726ec65b65e15485ce1 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sun, 9 Oct 2016 18:22:55 +0800 Subject: [PATCH 0309/1571] [Makefile] Fix for GNU make < 4 (closes #9387) Shell assignment operator in BSD make != is ported to GNU make in version 4.0, so 3.x doesn't work. I choose to drop BSD make support as installing GNU make on *BSD systems is easier than installing newer GNU make. --- ChangeLog | 6 ++++++ Makefile | 4 ++-- README.md | 2 +- 3 files changed, 9 insertions(+), 3 deletions(-) diff --git a/ChangeLog b/ChangeLog index 7e9b2b873..3d3473a4b 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +version + +Core +* [Makefile] Support for GNU make < 4 is fixed; BSD make dropped (#9387) + + version 2016.10.07 Extractors diff --git a/Makefile b/Makefile index a2763a664..8d66e48c9 100644 --- a/Makefile +++ b/Makefile @@ -12,7 +12,7 @@ SHAREDIR ?= $(PREFIX)/share PYTHON ?= /usr/bin/env python # set SYSCONFDIR to /etc if PREFIX=/usr or PREFIX=/usr/local -SYSCONFDIR != if [ $(PREFIX) = /usr -o $(PREFIX) = /usr/local ]; then echo /etc; else echo $(PREFIX)/etc; fi +SYSCONFDIR = $(shell if [ $(PREFIX) = /usr -o $(PREFIX) = /usr/local ]; then echo /etc; else echo $(PREFIX)/etc; fi) install: youtube-dl youtube-dl.1 youtube-dl.bash-completion youtube-dl.zsh youtube-dl.fish install -d $(DESTDIR)$(BINDIR) @@ -90,7 +90,7 @@ fish-completion: youtube-dl.fish lazy-extractors: youtube_dl/extractor/lazy_extractors.py -_EXTRACTOR_FILES != find youtube_dl/extractor -iname '*.py' -and -not -iname 'lazy_extractors.py' +_EXTRACTOR_FILES = $(shell find youtube_dl/extractor -iname '*.py' -and -not -iname 'lazy_extractors.py') youtube_dl/extractor/lazy_extractors.py: devscripts/make_lazy_extractors.py devscripts/lazy_load_template.py $(_EXTRACTOR_FILES) $(PYTHON) devscripts/make_lazy_extractors.py $@ diff --git a/README.md b/README.md index 4debe15fe..1cb44b2cf 100644 --- a/README.md +++ b/README.md @@ -923,7 +923,7 @@ To run the test, simply invoke your favorite test runner, or execute a test file If you want to create a build of youtube-dl yourself, you'll need * python -* make (both GNU make and BSD make are supported) +* make (only GNU make is supported) * pandoc * zip * nosetests From b0082629a9cf65796d503786c45c144d992010e7 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sun, 9 Oct 2016 18:42:15 +0800 Subject: [PATCH 0310/1571] =?UTF-8?q?[nextmedia]=20Support=20action=20news?= =?UTF-8?q?=20(=E5=8B=95=E6=96=B0=E8=81=9E)=20on=20Apple=20Daily?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- ChangeLog | 3 +++ youtube_dl/extractor/nextmedia.py | 5 ++++- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/ChangeLog b/ChangeLog index 3d3473a4b..f74c6b5a4 100644 --- a/ChangeLog +++ b/ChangeLog @@ -3,6 +3,9 @@ version Core * [Makefile] Support for GNU make < 4 is fixed; BSD make dropped (#9387) +Extractors ++ [nextmedia] Recognize action news on AppleDaily + version 2016.10.07 diff --git a/youtube_dl/extractor/nextmedia.py b/youtube_dl/extractor/nextmedia.py index a08e48c4b..dee9056d3 100644 --- a/youtube_dl/extractor/nextmedia.py +++ b/youtube_dl/extractor/nextmedia.py @@ -93,7 +93,7 @@ class NextMediaActionNewsIE(NextMediaIE): class AppleDailyIE(NextMediaIE): IE_DESC = '臺灣蘋果日報' - _VALID_URL = r'https?://(www|ent)\.appledaily\.com\.tw/(?:animation|appledaily|enews|realtimenews)/[^/]+/[^/]+/(?P\d+)/(?P\d+)(/.*)?' + _VALID_URL = r'https?://(www|ent)\.appledaily\.com\.tw/(?:animation|appledaily|enews|realtimenews|actionnews)/[^/]+/[^/]+/(?P\d+)/(?P\d+)(/.*)?' _TESTS = [{ 'url': 'http://ent.appledaily.com.tw/enews/article/entertainment/20150128/36354694', 'md5': 'a843ab23d150977cc55ef94f1e2c1e4d', @@ -154,6 +154,9 @@ class AppleDailyIE(NextMediaIE): 'description': 'md5:7b859991a6a4fedbdf3dd3b66545c748', 'upload_date': '20140417', }, + }, { + 'url': 'http://www.appledaily.com.tw/actionnews/appledaily/7/20161003/960588/', + 'only_matching': True, }] _URL_PATTERN = r'\{url: \'(.+)\'\}' From 65f4c1de3d442a49367597a80687fddcf3d142a2 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sun, 9 Oct 2016 18:58:15 +0800 Subject: [PATCH 0311/1571] [allocine] Fix extraction (closes #10860) I change the URL of the third test case, because now the original URL does not contain a video anymore, and there's no easy to get the real URL from the /film/ one. --- ChangeLog | 1 + youtube_dl/extractor/allocine.py | 57 ++++++++++++-------------------- 2 files changed, 22 insertions(+), 36 deletions(-) diff --git a/ChangeLog b/ChangeLog index f74c6b5a4..6c6053a2a 100644 --- a/ChangeLog +++ b/ChangeLog @@ -4,6 +4,7 @@ Core * [Makefile] Support for GNU make < 4 is fixed; BSD make dropped (#9387) Extractors +* [allocine] Fix extraction (#10860) + [nextmedia] Recognize action news on AppleDaily diff --git a/youtube_dl/extractor/allocine.py b/youtube_dl/extractor/allocine.py index 7d280d871..b292ffdd9 100644 --- a/youtube_dl/extractor/allocine.py +++ b/youtube_dl/extractor/allocine.py @@ -1,29 +1,25 @@ # coding: utf-8 from __future__ import unicode_literals -import re -import json - from .common import InfoExtractor -from ..compat import compat_str from ..utils import ( qualities, - unescapeHTML, - xpath_element, + url_basename, ) class AllocineIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?allocine\.fr/(?Particle|video|film)/(fichearticle_gen_carticle=|player_gen_cmedia=|fichefilm_gen_cfilm=|video-)(?P[0-9]+)(?:\.html)?' + _VALID_URL = r'https?://(?:www\.)?allocine\.fr/(?:article|video|film)/(?:fichearticle_gen_carticle=|player_gen_cmedia=|fichefilm_gen_cfilm=|video-)(?P[0-9]+)(?:\.html)?' _TESTS = [{ 'url': 'http://www.allocine.fr/article/fichearticle_gen_carticle=18635087.html', 'md5': '0c9fcf59a841f65635fa300ac43d8269', 'info_dict': { 'id': '19546517', + 'display_id': '18635087', 'ext': 'mp4', 'title': 'Astérix - Le Domaine des Dieux Teaser VF', - 'description': 'md5:abcd09ce503c6560512c14ebfdb720d2', + 'description': 'md5:4a754271d9c6f16c72629a8a993ee884', 'thumbnail': 're:http://.*\.jpg', }, }, { @@ -31,19 +27,21 @@ class AllocineIE(InfoExtractor): 'md5': 'd0cdce5d2b9522ce279fdfec07ff16e0', 'info_dict': { 'id': '19540403', + 'display_id': '19540403', 'ext': 'mp4', 'title': 'Planes 2 Bande-annonce VF', 'description': 'Regardez la bande annonce du film Planes 2 (Planes 2 Bande-annonce VF). Planes 2, un film de Roberts Gannaway', 'thumbnail': 're:http://.*\.jpg', }, }, { - 'url': 'http://www.allocine.fr/film/fichefilm_gen_cfilm=181290.html', + 'url': 'http://www.allocine.fr/video/player_gen_cmedia=19544709&cfilm=181290.html', 'md5': '101250fb127ef9ca3d73186ff22a47ce', 'info_dict': { 'id': '19544709', + 'display_id': '19544709', 'ext': 'mp4', 'title': 'Dragons 2 - Bande annonce finale VF', - 'description': 'md5:601d15393ac40f249648ef000720e7e3', + 'description': 'md5:6cdd2d7c2687d4c6aafe80a35e17267a', 'thumbnail': 're:http://.*\.jpg', }, }, { @@ -52,43 +50,30 @@ class AllocineIE(InfoExtractor): }] def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - typ = mobj.group('typ') - display_id = mobj.group('id') + display_id = self._match_id(url) webpage = self._download_webpage(url, display_id) - if typ == 'film': - video_id = self._search_regex(r'href="/video/player_gen_cmedia=([0-9]+).+"', webpage, 'video id') - else: - player = self._search_regex(r'data-player=\'([^\']+)\'>', webpage, 'data player', default=None) - if player: - player_data = json.loads(player) - video_id = compat_str(player_data['refMedia']) - else: - model = self._search_regex(r'data-model="([^"]+)">', webpage, 'data model') - model_data = self._parse_json(unescapeHTML(model), display_id) - video_id = compat_str(model_data['id']) + model = self._html_search_regex( + r'data-model="([^"]+)"', webpage, 'data model') + model_data = self._parse_json(model, display_id) - xml = self._download_xml('http://www.allocine.fr/ws/AcVisiondataV4.ashx?media=%s' % video_id, display_id) - - video = xpath_element(xml, './/AcVisionVideo').attrib quality = qualities(['ld', 'md', 'hd']) formats = [] - for k, v in video.items(): - if re.match(r'.+_path', k): - format_id = k.split('_')[0] - formats.append({ - 'format_id': format_id, - 'quality': quality(format_id), - 'url': v, - }) + for video_url in model_data['sources'].values(): + video_id, format_id = url_basename(video_url).split('_')[:2] + formats.append({ + 'format_id': format_id, + 'quality': quality(format_id), + 'url': video_url, + }) self._sort_formats(formats) return { 'id': video_id, - 'title': video['videoTitle'], + 'display_id': display_id, + 'title': model_data['title'], 'thumbnail': self._og_search_thumbnail(webpage), 'formats': formats, 'description': self._og_search_description(webpage), From 176006a1202cc6ef3d0a768ace41f97516c76c6d Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sun, 9 Oct 2016 19:41:44 +0800 Subject: [PATCH 0312/1571] [allocine] Fix for /video/ videos (closes #10860) --- youtube_dl/extractor/allocine.py | 58 ++++++++++++++++++++++++-------- 1 file changed, 44 insertions(+), 14 deletions(-) diff --git a/youtube_dl/extractor/allocine.py b/youtube_dl/extractor/allocine.py index b292ffdd9..517b06def 100644 --- a/youtube_dl/extractor/allocine.py +++ b/youtube_dl/extractor/allocine.py @@ -3,6 +3,7 @@ from __future__ import unicode_literals from .common import InfoExtractor from ..utils import ( + remove_end, qualities, url_basename, ) @@ -46,7 +47,14 @@ class AllocineIE(InfoExtractor): }, }, { 'url': 'http://www.allocine.fr/video/video-19550147/', - 'only_matching': True, + 'md5': '3566c0668c0235e2d224fd8edb389f67', + 'info_dict': { + 'id': '19550147', + 'ext': 'mp4', + 'title': 'Faux Raccord N°123 - Les gaffes de Cliffhanger', + 'description': 'md5:bc734b83ffa2d8a12188d9eb48bb6354', + 'thumbnail': 're:http://.*\.jpg', + }, }] def _real_extract(self, url): @@ -54,26 +62,48 @@ class AllocineIE(InfoExtractor): webpage = self._download_webpage(url, display_id) - model = self._html_search_regex( - r'data-model="([^"]+)"', webpage, 'data model') - model_data = self._parse_json(model, display_id) - + formats = [] quality = qualities(['ld', 'md', 'hd']) - formats = [] - for video_url in model_data['sources'].values(): - video_id, format_id = url_basename(video_url).split('_')[:2] - formats.append({ - 'format_id': format_id, - 'quality': quality(format_id), - 'url': video_url, - }) + model = self._html_search_regex( + r'data-model="([^"]+)"', webpage, 'data model', default=None) + if model: + model_data = self._parse_json(model, display_id) + + for video_url in model_data['sources'].values(): + video_id, format_id = url_basename(video_url).split('_')[:2] + formats.append({ + 'format_id': format_id, + 'quality': quality(format_id), + 'url': video_url, + }) + + title = model_data['title'] + else: + video_id = display_id + media_data = self._download_json( + 'http://www.allocine.fr/ws/AcVisiondataV5.ashx?media=%s' % video_id, display_id) + for key, value in media_data['video'].items(): + if not key.endswith('Path'): + continue + + format_id = key[:-len('Path')] + formats.append({ + 'format_id': format_id, + 'quality': quality(format_id), + 'url': value, + }) + + title = remove_end(self._html_search_regex( + r'(?s)(.+?)', webpage, 'title' + ).strip(), ' - AlloCiné') + self._sort_formats(formats) return { 'id': video_id, 'display_id': display_id, - 'title': model_data['title'], + 'title': title, 'thumbnail': self._og_search_thumbnail(webpage), 'formats': formats, 'description': self._og_search_description(webpage), From 71cdcb23316b55baf9330741ede4c77d08e4d77f Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Tue, 11 Oct 2016 12:30:35 +0800 Subject: [PATCH 0313/1571] [hbo] Support episode pages (closes #10892) --- ChangeLog | 1 + youtube_dl/extractor/hbo.py | 63 ++++++++++++++++++++++++++++--------- 2 files changed, 50 insertions(+), 14 deletions(-) diff --git a/ChangeLog b/ChangeLog index 6c6053a2a..d49682c8b 100644 --- a/ChangeLog +++ b/ChangeLog @@ -4,6 +4,7 @@ Core * [Makefile] Support for GNU make < 4 is fixed; BSD make dropped (#9387) Extractors ++ [hbo] Add support for episode pages (#10892) * [allocine] Fix extraction (#10860) + [nextmedia] Recognize action news on AppleDaily diff --git a/youtube_dl/extractor/hbo.py b/youtube_dl/extractor/hbo.py index dad0f3994..3606d64fd 100644 --- a/youtube_dl/extractor/hbo.py +++ b/youtube_dl/extractor/hbo.py @@ -12,17 +12,7 @@ from ..utils import ( ) -class HBOIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?hbo\.com/video/video\.html\?.*vid=(?P[0-9]+)' - _TEST = { - 'url': 'http://www.hbo.com/video/video.html?autoplay=true&g=u&vid=1437839', - 'md5': '1c33253f0c7782142c993c0ba62a8753', - 'info_dict': { - 'id': '1437839', - 'ext': 'mp4', - 'title': 'Ep. 64 Clip: Encryption', - } - } +class HBOBaseIE(InfoExtractor): _FORMATS_INFO = { '1920': { 'width': 1280, @@ -50,8 +40,7 @@ class HBOIE(InfoExtractor): }, } - def _real_extract(self, url): - video_id = self._match_id(url) + def _extract_from_id(self, video_id): video_data = self._download_xml( 'http://render.lv3.hbo.com/data/content/global/videos/data/%s.xml' % video_id, video_id) title = xpath_text(video_data, 'title', 'title', True) @@ -116,7 +105,53 @@ class HBOIE(InfoExtractor): return { 'id': video_id, 'title': title, - 'duration': parse_duration(xpath_element(video_data, 'duration/tv14')), + 'duration': parse_duration(xpath_text(video_data, 'duration/tv14')), 'formats': formats, 'thumbnails': thumbnails, } + + +class HBOIE(HBOBaseIE): + _VALID_URL = r'https?://(?:www\.)?hbo\.com/video/video\.html\?.*vid=(?P[0-9]+)' + _TEST = { + 'url': 'http://www.hbo.com/video/video.html?autoplay=true&g=u&vid=1437839', + 'md5': '1c33253f0c7782142c993c0ba62a8753', + 'info_dict': { + 'id': '1437839', + 'ext': 'mp4', + 'title': 'Ep. 64 Clip: Encryption', + 'thumbnail': 're:https?://.*\.jpg$', + 'duration': 1072, + } + } + + def _real_extract(self, url): + video_id = self._match_id(url) + return self._extract_from_id(video_id) + + +class HBOEpisodeIE(HBOBaseIE): + _VALID_URL = r'https?://(?:www\.)?hbo\.com/(?!video)([^/]+/)+video/(?P[0-9a-z-]+)\.html' + + _TESTS = [{ + 'url': 'http://www.hbo.com/girls/episodes/5/52-i-love-you-baby/video/ep-52-inside-the-episode.html?autoplay=true', + 'md5': '689132b253cc0ab7434237fc3a293210', + 'info_dict': { + 'id': '1439518', + 'ext': 'mp4', + 'title': 'Ep. 52: Inside the Episode', + 'thumbnail': 're:https?://.*\.jpg$', + 'duration': 240, + }, + }] + + def _real_extract(self, url): + display_id = self._match_id(url) + + webpage = self._download_webpage(url, display_id) + + video_id = self._search_regex( + r'(?P[\'"])videoId(?P=q1)\s*:\s*(?P[\'"])(?P\d+)(?P=q2)', + webpage, 'video ID', group='video_id') + + return self._extract_from_id(video_id) From 27b8d2ee9535e19cdaed69de7a08ba0e026700e0 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Tue, 11 Oct 2016 12:41:30 +0800 Subject: [PATCH 0314/1571] [hbo] Add display_id and another test (#10892) --- youtube_dl/extractor/hbo.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/hbo.py b/youtube_dl/extractor/hbo.py index 3606d64fd..cbf774377 100644 --- a/youtube_dl/extractor/hbo.py +++ b/youtube_dl/extractor/hbo.py @@ -138,11 +138,15 @@ class HBOEpisodeIE(HBOBaseIE): 'md5': '689132b253cc0ab7434237fc3a293210', 'info_dict': { 'id': '1439518', + 'display_id': 'ep-52-inside-the-episode', 'ext': 'mp4', 'title': 'Ep. 52: Inside the Episode', 'thumbnail': 're:https?://.*\.jpg$', 'duration': 240, }, + }, { + 'url': 'http://www.hbo.com/game-of-thrones/about/video/season-5-invitation-to-the-set.html?autoplay=true', + 'only_matching': True, }] def _real_extract(self, url): @@ -154,4 +158,7 @@ class HBOEpisodeIE(HBOBaseIE): r'(?P[\'"])videoId(?P=q1)\s*:\s*(?P[\'"])(?P\d+)(?P=q2)', webpage, 'video ID', group='video_id') - return self._extract_from_id(video_id) + info_dict = self._extract_from_id(video_id) + info_dict['display_id'] = display_id + + return info_dict From f165ca70eb4f7911949278e17751092a3cc8619f Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Tue, 11 Oct 2016 12:53:27 +0800 Subject: [PATCH 0315/1571] [abc.net.au:iview] Fix for non-series videos (closes #10895) --- ChangeLog | 1 + youtube_dl/extractor/abc.py | 22 +++++++++++----------- 2 files changed, 12 insertions(+), 11 deletions(-) diff --git a/ChangeLog b/ChangeLog index d49682c8b..26f01790a 100644 --- a/ChangeLog +++ b/ChangeLog @@ -4,6 +4,7 @@ Core * [Makefile] Support for GNU make < 4 is fixed; BSD make dropped (#9387) Extractors +* [abc.net.au:iview] Fix for standalone (non series) videos (#10895) + [hbo] Add support for episode pages (#10892) * [allocine] Fix extraction (#10860) + [nextmedia] Recognize action news on AppleDaily diff --git a/youtube_dl/extractor/abc.py b/youtube_dl/extractor/abc.py index 465249bbf..0247cabf9 100644 --- a/youtube_dl/extractor/abc.py +++ b/youtube_dl/extractor/abc.py @@ -102,16 +102,16 @@ class ABCIViewIE(InfoExtractor): # ABC iview programs are normally available for 14 days only. _TESTS = [{ - 'url': 'http://iview.abc.net.au/programs/gardening-australia/FA1505V024S00', - 'md5': '979d10b2939101f0d27a06b79edad536', + 'url': 'http://iview.abc.net.au/programs/diaries-of-a-broken-mind/ZX9735A001S00', + 'md5': 'cde42d728b3b7c2b32b1b94b4a548afc', 'info_dict': { - 'id': 'FA1505V024S00', + 'id': 'ZX9735A001S00', 'ext': 'mp4', - 'title': 'Series 27 Ep 24', - 'description': 'md5:b28baeae7504d1148e1d2f0e3ed3c15d', - 'upload_date': '20160820', - 'uploader_id': 'abc1', - 'timestamp': 1471719600, + 'title': 'Diaries Of A Broken Mind', + 'description': 'md5:7de3903874b7a1be279fe6b68718fc9e', + 'upload_date': '20161010', + 'uploader_id': 'abc2', + 'timestamp': 1476064920, }, 'skip': 'Video gone', }] @@ -121,7 +121,7 @@ class ABCIViewIE(InfoExtractor): webpage = self._download_webpage(url, video_id) video_params = self._parse_json(self._search_regex( r'videoParams\s*=\s*({.+?});', webpage, 'video params'), video_id) - title = video_params['title'] + title = video_params.get('title') or video_params['seriesTitle'] stream = next(s for s in video_params['playlist'] if s.get('type') == 'program') formats = self._extract_akamai_formats(stream['hds-unmetered'], video_id) @@ -144,8 +144,8 @@ class ABCIViewIE(InfoExtractor): 'timestamp': parse_iso8601(video_params.get('pubDate'), ' '), 'series': video_params.get('seriesTitle'), 'series_id': video_params.get('seriesHouseNumber') or video_id[:7], - 'episode_number': int_or_none(self._html_search_meta('episodeNumber', webpage)), - 'episode': self._html_search_meta('episode_title', webpage), + 'episode_number': int_or_none(self._html_search_meta('episodeNumber', webpage, default=None)), + 'episode': self._html_search_meta('episode_title', webpage, default=None), 'uploader_id': video_params.get('channel'), 'formats': formats, 'subtitles': subtitles, From 555787d717985531b3beba566cb976fd3f849aaa Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Tue, 11 Oct 2016 17:44:35 +0800 Subject: [PATCH 0316/1571] [streamable] Add helper for extracting embedded videos --- youtube_dl/extractor/streamable.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/youtube_dl/extractor/streamable.py b/youtube_dl/extractor/streamable.py index 1c61437a4..56b926448 100644 --- a/youtube_dl/extractor/streamable.py +++ b/youtube_dl/extractor/streamable.py @@ -1,6 +1,8 @@ # coding: utf-8 from __future__ import unicode_literals +import re + from .common import InfoExtractor from ..utils import ( ExtractorError, @@ -48,6 +50,15 @@ class StreamableIE(InfoExtractor): } ] + @staticmethod + def _extract_url(webpage): + print(webpage) + mobj = re.search( + r']+src=(?P[\'"])(?P(?:https?:)?//streamable\.com/(?:(?!\1).+))(?P=q1)', + webpage) + if mobj: + return mobj.group('src') + def _real_extract(self, url): video_id = self._match_id(url) From c452e69d3d3e6bbbec298a5d4b032cb502cef0ab Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Tue, 11 Oct 2016 17:46:13 +0800 Subject: [PATCH 0317/1571] [footyroom] Fix extraction and update _TESTS (closes #10810) --- ChangeLog | 1 + youtube_dl/extractor/footyroom.py | 32 ++++++++++++++++++------------- 2 files changed, 20 insertions(+), 13 deletions(-) diff --git a/ChangeLog b/ChangeLog index 26f01790a..76c446a6d 100644 --- a/ChangeLog +++ b/ChangeLog @@ -4,6 +4,7 @@ Core * [Makefile] Support for GNU make < 4 is fixed; BSD make dropped (#9387) Extractors +* [footyroom] Fix extraction (#10810) * [abc.net.au:iview] Fix for standalone (non series) videos (#10895) + [hbo] Add support for episode pages (#10892) * [allocine] Fix extraction (#10860) diff --git a/youtube_dl/extractor/footyroom.py b/youtube_dl/extractor/footyroom.py index d2503ae2e..118325b6d 100644 --- a/youtube_dl/extractor/footyroom.py +++ b/youtube_dl/extractor/footyroom.py @@ -2,25 +2,27 @@ from __future__ import unicode_literals from .common import InfoExtractor +from .streamable import StreamableIE class FootyRoomIE(InfoExtractor): - _VALID_URL = r'https?://footyroom\.com/(?P[^/]+)' + _VALID_URL = r'https?://footyroom\.com/matches/(?P\d+)' _TESTS = [{ - 'url': 'http://footyroom.com/schalke-04-0-2-real-madrid-2015-02/', + 'url': 'http://footyroom.com/matches/79922154/hull-city-vs-chelsea/review', 'info_dict': { - 'id': 'schalke-04-0-2-real-madrid-2015-02', - 'title': 'Schalke 04 0 – 2 Real Madrid', + 'id': '79922154', + 'title': 'VIDEO Hull City 0 - 2 Chelsea', }, - 'playlist_count': 3, - 'skip': 'Video for this match is not available', + 'playlist_count': 2, + 'add_ie': [StreamableIE.ie_key()], }, { - 'url': 'http://footyroom.com/georgia-0-2-germany-2015-03/', + 'url': 'http://footyroom.com/matches/75817984/georgia-vs-germany/review', 'info_dict': { - 'id': 'georgia-0-2-germany-2015-03', - 'title': 'Georgia 0 – 2 Germany', + 'id': '75817984', + 'title': 'VIDEO Georgia 0 - 2 Germany', }, 'playlist_count': 1, + 'add_ie': ['Playwire'] }] def _real_extract(self, url): @@ -28,9 +30,8 @@ class FootyRoomIE(InfoExtractor): webpage = self._download_webpage(url, playlist_id) - playlist = self._parse_json( - self._search_regex( - r'VideoSelector\.load\((\[.+?\])\);', webpage, 'video selector'), + playlist = self._parse_json(self._search_regex( + r'DataStore\.media\s*=\s*([^;]+)', webpage, 'media data'), playlist_id) playlist_title = self._og_search_title(webpage) @@ -40,11 +41,16 @@ class FootyRoomIE(InfoExtractor): payload = video.get('payload') if not payload: continue - playwire_url = self._search_regex( + playwire_url = self._html_search_regex( r'data-config="([^"]+)"', payload, 'playwire url', default=None) if playwire_url: entries.append(self.url_result(self._proto_relative_url( playwire_url, 'http:'), 'Playwire')) + streamable_url = StreamableIE._extract_url(payload) + if streamable_url: + entries.append(self.url_result( + streamable_url, StreamableIE.ie_key())) + return self.playlist_result(entries, playlist_id, playlist_title) From 3d643f4cec5ded2be9958d5cd0e31176b2074e37 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Tue, 11 Oct 2016 17:46:52 +0800 Subject: [PATCH 0318/1571] [hbo] Add HBOEpisodeIE (#10892) --- youtube_dl/extractor/extractors.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 5c1d2abfb..08bed8b0c 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -348,7 +348,10 @@ from .goshgay import GoshgayIE from .gputechconf import GPUTechConfIE from .groupon import GrouponIE from .hark import HarkIE -from .hbo import HBOIE +from .hbo import ( + HBOIE, + HBOEpisodeIE, +) from .hearthisat import HearThisAtIE from .heise import HeiseIE from .hellporno import HellPornoIE From 55642487f072565bea3b2826b836a1a3159a3807 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Tue, 11 Oct 2016 20:50:52 +0800 Subject: [PATCH 0319/1571] [nhl] Skip invalid m3u8 formats (closes #10713) --- ChangeLog | 1 + youtube_dl/extractor/nhl.py | 18 ++++++++++++++++-- 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/ChangeLog b/ChangeLog index 76c446a6d..9a7e7133b 100644 --- a/ChangeLog +++ b/ChangeLog @@ -4,6 +4,7 @@ Core * [Makefile] Support for GNU make < 4 is fixed; BSD make dropped (#9387) Extractors +* [nhl] Correctly handle invalid formats (#10713) * [footyroom] Fix extraction (#10810) * [abc.net.au:iview] Fix for standalone (non series) videos (#10895) + [hbo] Add support for episode pages (#10892) diff --git a/youtube_dl/extractor/nhl.py b/youtube_dl/extractor/nhl.py index 26149c88f..62ce800c0 100644 --- a/youtube_dl/extractor/nhl.py +++ b/youtube_dl/extractor/nhl.py @@ -274,6 +274,18 @@ class NHLIE(InfoExtractor): 'upload_date': '20160204', 'timestamp': 1454544904, }, + }, { + # Some m3u8 URLs are invalid (https://github.com/rg3/youtube-dl/issues/10713) + 'url': 'https://www.nhl.com/predators/video/poile-laviolette-on-subban-trade/t-277437416/c-44315003', + 'md5': '50b2bb47f405121484dda3ccbea25459', + 'info_dict': { + 'id': '44315003', + 'ext': 'mp4', + 'title': 'Poile, Laviolette on Subban trade', + 'description': 'General manager David Poile and head coach Peter Laviolette share their thoughts on acquiring P.K. Subban from Montreal (06/29/16)', + 'timestamp': 1467242866, + 'upload_date': '20160629', + }, }, { 'url': 'https://www.wch2016.com/video/caneur-best-of-game-2-micd-up/t-281230378/c-44983703', 'only_matching': True, @@ -301,9 +313,11 @@ class NHLIE(InfoExtractor): continue ext = determine_ext(playback_url) if ext == 'm3u8': - formats.extend(self._extract_m3u8_formats( + m3u8_formats = self._extract_m3u8_formats( playback_url, video_id, 'mp4', 'm3u8_native', - m3u8_id=playback.get('name', 'hls'), fatal=False)) + m3u8_id=playback.get('name', 'hls'), fatal=False) + self._check_formats(m3u8_formats, video_id) + formats.extend(m3u8_formats) else: height = int_or_none(playback.get('height')) formats.append({ From cea364f70c97dad933fa38698f3c9df1bdb485cf Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Wed, 12 Oct 2016 01:40:28 +0800 Subject: [PATCH 0320/1571] [extractor/common] Support HTML media elements without child nodes --- ChangeLog | 1 + youtube_dl/extractor/common.py | 6 +++++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/ChangeLog b/ChangeLog index 9a7e7133b..49488c888 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,6 +1,7 @@ version Core ++ Support HTML media elements without child nodes * [Makefile] Support for GNU make < 4 is fixed; BSD make dropped (#9387) Extractors diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index da192728f..431cef831 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1802,7 +1802,11 @@ class InfoExtractor(object): return is_plain_url, formats entries = [] - for media_tag, media_type, media_content in re.findall(r'(?s)(<(?Pvideo|audio)[^>]*>)(.*?)', webpage): + media_tags = [(media_tag, media_type, '') + for media_tag, media_type + in re.findall(r'(?s)(<(video|audio)[^>]*/>)', webpage)] + media_tags.extend(re.findall(r'(?s)(<(?Pvideo|audio)[^>]*>)(.*?)', webpage)) + for media_tag, media_type, media_content in media_tags: media_info = { 'formats': [], 'subtitles': {}, From 6f20b65e728ee30d9b987a39932a3355501f7f67 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Wed, 12 Oct 2016 01:41:41 +0800 Subject: [PATCH 0321/1571] [test/test_http] Update tests After switching to HTML5 extraction helpers in generic.py, the result info_dict is always a playlist. --- test/test_http.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/test_http.py b/test/test_http.py index fdc68ccb4..bb0a098e4 100644 --- a/test/test_http.py +++ b/test/test_http.py @@ -87,7 +87,7 @@ class TestHTTP(unittest.TestCase): ydl = YoutubeDL({'logger': FakeLogger()}) r = ydl.extract_info('http://localhost:%d/302' % self.port) - self.assertEqual(r['url'], 'http://localhost:%d/vid.mp4' % self.port) + self.assertEqual(r['entries'][0]['url'], 'http://localhost:%d/vid.mp4' % self.port) class TestHTTPS(unittest.TestCase): @@ -111,7 +111,7 @@ class TestHTTPS(unittest.TestCase): ydl = YoutubeDL({'logger': FakeLogger(), 'nocheckcertificate': True}) r = ydl.extract_info('https://localhost:%d/video.html' % self.port) - self.assertEqual(r['url'], 'https://localhost:%d/vid.mp4' % self.port) + self.assertEqual(r['entries'][0]['url'], 'https://localhost:%d/vid.mp4' % self.port) def _build_proxy_handler(name): From a093cfc78b584a6e5dbc4bbca525f9e40af9522d Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Wed, 12 Oct 2016 01:48:06 +0800 Subject: [PATCH 0322/1571] [vimeo:review] Fix extraction (#10900) Now Vimeo Review videos uses React. Thanks @davekaro for analyzing the problem! --- ChangeLog | 1 + youtube_dl/extractor/vimeo.py | 8 +++++--- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/ChangeLog b/ChangeLog index 49488c888..3e16a2cb3 100644 --- a/ChangeLog +++ b/ChangeLog @@ -5,6 +5,7 @@ Core * [Makefile] Support for GNU make < 4 is fixed; BSD make dropped (#9387) Extractors +* [vimeo:review] Fix extraction (#10900) * [nhl] Correctly handle invalid formats (#10713) * [footyroom] Fix extraction (#10810) * [abc.net.au:iview] Fix for standalone (non series) videos (#10895) diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py index a46c5c282..b566241cc 100644 --- a/youtube_dl/extractor/vimeo.py +++ b/youtube_dl/extractor/vimeo.py @@ -837,6 +837,7 @@ class VimeoReviewIE(VimeoBaseInfoExtractor): 'params': { 'videopassword': 'holygrail', }, + 'skip': 'video gone', }] def _real_initialize(self): @@ -844,9 +845,10 @@ class VimeoReviewIE(VimeoBaseInfoExtractor): def _get_config_url(self, webpage_url, video_id, video_password_verified=False): webpage = self._download_webpage(webpage_url, video_id) - config_url = self._html_search_regex( - r'data-config-url="([^"]+)"', webpage, 'config URL', - default=NO_DEFAULT if video_password_verified else None) + data = self._parse_json(self._search_regex( + r'window\s*=\s*_extend\(window,\s*({.+?})\);', webpage, 'data', + default=NO_DEFAULT if video_password_verified else '{}'), video_id) + config_url = data.get('vimeo_esi', {}).get('config', {}).get('configUrl') if config_url is None: self._verify_video_password(webpage_url, video_id, webpage) config_url = self._get_config_url( From 9feb1c97318bbd575af6c2737dfe66412e1c0bb6 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Wed, 12 Oct 2016 21:45:49 +0800 Subject: [PATCH 0323/1571] [dailymotion] Fix extraction and update _TESTS Closes #10901 Seems all videos use player V5 syntax now --- ChangeLog | 1 + youtube_dl/extractor/dailymotion.py | 6 ++++-- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/ChangeLog b/ChangeLog index 3e16a2cb3..fd3e8c2fa 100644 --- a/ChangeLog +++ b/ChangeLog @@ -5,6 +5,7 @@ Core * [Makefile] Support for GNU make < 4 is fixed; BSD make dropped (#9387) Extractors +* [dailymotion] Fix extraction (#10901) * [vimeo:review] Fix extraction (#10900) * [nhl] Correctly handle invalid formats (#10713) * [footyroom] Fix extraction (#10810) diff --git a/youtube_dl/extractor/dailymotion.py b/youtube_dl/extractor/dailymotion.py index 62b0747a5..4a3314ea7 100644 --- a/youtube_dl/extractor/dailymotion.py +++ b/youtube_dl/extractor/dailymotion.py @@ -94,7 +94,8 @@ class DailymotionIE(DailymotionBaseInfoExtractor): 'title': 'Leanna Decker - Cyber Girl Of The Year Desires Nude [Playboy Plus]', 'uploader': 'HotWaves1012', 'age_limit': 18, - } + }, + 'skip': 'video gone', }, # geo-restricted, player v5 { @@ -144,7 +145,8 @@ class DailymotionIE(DailymotionBaseInfoExtractor): player_v5 = self._search_regex( [r'buildPlayer\(({.+?})\);\n', # See https://github.com/rg3/youtube-dl/issues/7826 r'playerV5\s*=\s*dmp\.create\([^,]+?,\s*({.+?})\);', - r'buildPlayer\(({.+?})\);'], + r'buildPlayer\(({.+?})\);', + r'var\s+config\s*=\s*({.+?});'], webpage, 'player v5', default=None) if player_v5: player = self._parse_json(player_v5, video_id) From 591e384552f44fe5d77015d17fa7f71efa66f778 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 12 Oct 2016 21:22:12 +0700 Subject: [PATCH 0324/1571] [streamable] Remove debug output --- youtube_dl/extractor/streamable.py | 1 - 1 file changed, 1 deletion(-) diff --git a/youtube_dl/extractor/streamable.py b/youtube_dl/extractor/streamable.py index 56b926448..2c26fa689 100644 --- a/youtube_dl/extractor/streamable.py +++ b/youtube_dl/extractor/streamable.py @@ -52,7 +52,6 @@ class StreamableIE(InfoExtractor): @staticmethod def _extract_url(webpage): - print(webpage) mobj = re.search( r']+src=(?P[\'"])(?P(?:https?:)?//streamable\.com/(?:(?!\1).+))(?P=q1)', webpage) From bcd6276520e67f59b95dffdb280703328cab82de Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 12 Oct 2016 21:22:33 +0700 Subject: [PATCH 0325/1571] [downloader/common] Remove debug output --- youtube_dl/downloader/common.py | 1 - 1 file changed, 1 deletion(-) diff --git a/youtube_dl/downloader/common.py b/youtube_dl/downloader/common.py index 8482cbd84..3dc144b4e 100644 --- a/youtube_dl/downloader/common.py +++ b/youtube_dl/downloader/common.py @@ -346,7 +346,6 @@ class FileDownloader(object): min_sleep_interval = self.params.get('sleep_interval') if min_sleep_interval: max_sleep_interval = self.params.get('max_sleep_interval', min_sleep_interval) - print(min_sleep_interval, max_sleep_interval) sleep_interval = random.uniform(min_sleep_interval, max_sleep_interval) self.to_screen('[download] Sleeping %s seconds...' % sleep_interval) time.sleep(sleep_interval) From 7104ae799c18e36070d91d570d48c55d651cd4b2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 12 Oct 2016 21:25:04 +0700 Subject: [PATCH 0326/1571] [ChangeLog] Actualize --- ChangeLog | 1 + 1 file changed, 1 insertion(+) diff --git a/ChangeLog b/ChangeLog index fd3e8c2fa..cc526429a 100644 --- a/ChangeLog +++ b/ChangeLog @@ -13,6 +13,7 @@ Extractors + [hbo] Add support for episode pages (#10892) * [allocine] Fix extraction (#10860) + [nextmedia] Recognize action news on AppleDaily +* [lego] Improve info extraction and bypass geo restriction (#10872) version 2016.10.07 From 5c4bfd4da5d532bf8d5aaf1bb37396f7cfbc786b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 12 Oct 2016 21:30:05 +0700 Subject: [PATCH 0327/1571] release 2016.10.12 --- .github/ISSUE_TEMPLATE.md | 6 +++--- CONTRIBUTING.md | 2 +- ChangeLog | 2 +- docs/supportedsites.md | 1 + youtube_dl/version.py | 2 +- 5 files changed, 7 insertions(+), 6 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 15a93776b..865817681 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.10.07*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.10.07** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.10.12*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.10.12** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2016.10.07 +[debug] youtube-dl version 2016.10.12 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 95392030e..62acf9abd 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -85,7 +85,7 @@ To run the test, simply invoke your favorite test runner, or execute a test file If you want to create a build of youtube-dl yourself, you'll need * python -* make (both GNU make and BSD make are supported) +* make (only GNU make is supported) * pandoc * zip * nosetests diff --git a/ChangeLog b/ChangeLog index cc526429a..e3a733410 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version +version 2016.10.12 Core + Support HTML media elements without child nodes diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 5bbef0c41..9b540b3df 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -289,6 +289,7 @@ - **Groupon** - **Hark** - **HBO** + - **HBOEpisode** - **HearThisAt** - **Heise** - **HellPorno** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index ac0921b7a..44cc18828 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2016.10.07' +__version__ = '2016.10.12' From 580d41193169d004c94145ef03d5c53f06d5a57c Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Fri, 14 Oct 2016 00:44:28 +0800 Subject: [PATCH 0328/1571] [parliamentliveuk] Recognize lower case URLs Closes #10912 Seems parliamentliveuk matches URLs case-insentive. For example this URL also works: http://parliamentlive.tv/EvEnt/Index/3F24936f-130f-40bf-9a5d-b3d6479da6a4 --- ChangeLog | 6 ++++++ youtube_dl/extractor/parliamentliveuk.py | 9 ++++++--- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/ChangeLog b/ChangeLog index e3a733410..d2b78a489 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +version + +Extractors +* [parliamentliveuk] Lower case URLs are now recognized (#10912) + + version 2016.10.12 Core diff --git a/youtube_dl/extractor/parliamentliveuk.py b/youtube_dl/extractor/parliamentliveuk.py index 874aacc55..ebdab8db9 100644 --- a/youtube_dl/extractor/parliamentliveuk.py +++ b/youtube_dl/extractor/parliamentliveuk.py @@ -6,9 +6,9 @@ from .common import InfoExtractor class ParliamentLiveUKIE(InfoExtractor): IE_NAME = 'parliamentlive.tv' IE_DESC = 'UK parliament videos' - _VALID_URL = r'https?://(?:www\.)?parliamentlive\.tv/Event/Index/(?P[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})' + _VALID_URL = r'(?i)https?://(?:www\.)?parliamentlive\.tv/Event/Index/(?P[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})' - _TEST = { + _TESTS = [{ 'url': 'http://parliamentlive.tv/Event/Index/c1e9d44d-fd6c-4263-b50f-97ed26cc998b', 'info_dict': { 'id': 'c1e9d44d-fd6c-4263-b50f-97ed26cc998b', @@ -18,7 +18,10 @@ class ParliamentLiveUKIE(InfoExtractor): 'timestamp': 1422696664, 'upload_date': '20150131', }, - } + }, { + 'url': 'http://parliamentlive.tv/event/index/3f24936f-130f-40bf-9a5d-b3d6479da6a4', + 'only_matching': True, + }] def _real_extract(self, url): video_id = self._match_id(url) From b7f59a3bf69b5c935be085551d30ce4d0b8a97d4 Mon Sep 17 00:00:00 2001 From: Philip Xu Date: Thu, 13 Oct 2016 21:51:26 -0400 Subject: [PATCH 0329/1571] [huajiao] Add new extractor --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/huajiao.py | 50 ++++++++++++++++++++++++++++++ 2 files changed, 51 insertions(+) create mode 100644 youtube_dl/extractor/huajiao.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 08bed8b0c..75e16af4e 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -372,6 +372,7 @@ from .hrti import ( HRTiIE, HRTiPlaylistIE, ) +from .huajiao import HuajiaoIE from .huffpost import HuffPostIE from .hypem import HypemIE from .iconosquare import IconosquareIE diff --git a/youtube_dl/extractor/huajiao.py b/youtube_dl/extractor/huajiao.py new file mode 100644 index 000000000..352b48120 --- /dev/null +++ b/youtube_dl/extractor/huajiao.py @@ -0,0 +1,50 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from ..utils import parse_duration, parse_iso8601 +from .common import InfoExtractor + + +class HuajiaoIE(InfoExtractor): + IE_DESC = '花椒直播' + _VALID_URL = r'https?://(?:www\.)?huajiao\.com/l/(?P[0-9]+)' + _TEST = { + 'url': 'http://www.huajiao.com/l/38941232', + 'md5': 'd08bf9ac98787d24d1e4c0283f2d372d', + 'info_dict': { + 'id': '38941232', + 'ext': 'mp4', + 'title': '#新人求关注#', + 'description': 're:.*', + 'duration': 2424.0, + 'thumbnail': 're:^https?://.*\.jpg$', + 'timestamp': 1475866459, + 'upload_date': '20161007', + 'uploader': 'Penny_余姿昀', + 'uploader_id': '75206005', + } + } + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + feed_json = self._search_regex( + r'var\s*feed\s*=\s*({.*})', webpage, 'feed json str') + feed = self._parse_json(feed_json, video_id) + + description = self._html_search_meta( + 'description', webpage, 'description', fatal=False) + + return { + 'id': video_id, + 'title': feed['feed']['formated_title'], + 'description': description, + 'duration': parse_duration(feed['feed']['duration']), + 'thumbnail': feed['feed']['image'], + 'timestamp': parse_iso8601(feed['creatime'], ' '), + 'uploader': feed['author']['nickname'], + 'uploader_id': feed['author']['uid'], + 'formats': self._extract_m3u8_formats( + feed['feed']['m3u8'], video_id, 'mp4', 'm3u8_native'), + } From a5f847314582d4464e587120c6e696399ff121cb Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Fri, 14 Oct 2016 18:20:01 +0800 Subject: [PATCH 0330/1571] [cbsinteractive] Fix extraction for cnet.com --- ChangeLog | 1 + youtube_dl/extractor/cbsinteractive.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/ChangeLog b/ChangeLog index d2b78a489..edd547811 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,6 +1,7 @@ version Extractors +* [cbsinteractive] Fix extraction for cnet.com * [parliamentliveuk] Lower case URLs are now recognized (#10912) diff --git a/youtube_dl/extractor/cbsinteractive.py b/youtube_dl/extractor/cbsinteractive.py index 821db20b2..57b18e81d 100644 --- a/youtube_dl/extractor/cbsinteractive.py +++ b/youtube_dl/extractor/cbsinteractive.py @@ -63,7 +63,7 @@ class CBSInteractiveIE(ThePlatformIE): webpage = self._download_webpage(url, display_id) data_json = self._html_search_regex( - r"data-(?:cnet|zdnet)-video(?:-uvp)?-options='([^']+)'", + r"data-(?:cnet|zdnet)-video(?:-uvp(?:js)?)?-options='([^']+)'", webpage, 'data json') data = self._parse_json(data_json, display_id) vdata = data.get('video') or data['videos'][0] From e2004ccaf711ff9aa9c0b647c3d6219093fb6c2a Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Fri, 14 Oct 2016 20:26:12 +0800 Subject: [PATCH 0331/1571] [canalplus] Fix video_id and update _TESTS Some tests are gone, and some redirect to different videos --- ChangeLog | 1 + youtube_dl/extractor/canalplus.py | 64 +++++++++++++++---------------- 2 files changed, 33 insertions(+), 32 deletions(-) diff --git a/ChangeLog b/ChangeLog index edd547811..32390f227 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,6 +1,7 @@ version Extractors +* [canalplus] Fix extraction for some videos * [cbsinteractive] Fix extraction for cnet.com * [parliamentliveuk] Lower case URLs are now recognized (#10912) diff --git a/youtube_dl/extractor/canalplus.py b/youtube_dl/extractor/canalplus.py index 6dab226af..1c3c41d26 100644 --- a/youtube_dl/extractor/canalplus.py +++ b/youtube_dl/extractor/canalplus.py @@ -6,11 +6,13 @@ import re from .common import InfoExtractor from ..compat import compat_urllib_parse_urlparse from ..utils import ( + dict_get, ExtractorError, HEADRequest, - unified_strdate, - qualities, int_or_none, + qualities, + remove_end, + unified_strdate, ) @@ -43,47 +45,46 @@ class CanalplusIE(InfoExtractor): _TESTS = [{ 'url': 'http://www.canalplus.fr/c-emissions/pid1830-c-zapping.html?vid=1192814', - 'md5': '41f438a4904f7664b91b4ed0dec969dc', 'info_dict': { - 'id': '1192814', + 'id': '1405510', + 'display_id': 'pid1830-c-zapping', 'ext': 'mp4', - 'title': "L'Année du Zapping 2014 - L'Année du Zapping 2014", - 'description': "Toute l'année 2014 dans un Zapping exceptionnel !", - 'upload_date': '20150105', + 'title': 'Zapping - 02/07/2016', + 'description': 'Le meilleur de toutes les chaînes, tous les jours', + 'upload_date': '20160702', }, }, { 'url': 'http://www.piwiplus.fr/videos-piwi/pid1405-le-labyrinthe-boing-super-ranger.html?vid=1108190', 'info_dict': { 'id': '1108190', - 'ext': 'flv', - 'title': 'Le labyrinthe - Boing super ranger', + 'display_id': 'pid1405-le-labyrinthe-boing-super-ranger', + 'ext': 'mp4', + 'title': 'BOING SUPER RANGER - Ep : Le labyrinthe', 'description': 'md5:4cea7a37153be42c1ba2c1d3064376ff', 'upload_date': '20140724', }, 'skip': 'Only works from France', }, { - 'url': 'http://www.d8.tv/d8-docs-mags/pid5198-d8-en-quete-d-actualite.html?vid=1390231', + 'url': 'http://www.c8.fr/c8-divertissement/ms-touche-pas-a-mon-poste/pid6318-videos-integrales.html', + 'md5': '4b47b12b4ee43002626b97fad8fb1de5', 'info_dict': { - 'id': '1390231', + 'id': '1420213', + 'display_id': 'pid6318-videos-integrales', 'ext': 'mp4', - 'title': "Vacances pas chères : prix discount ou grosses dépenses ? - En quête d'actualité", - 'description': 'md5:edb6cf1cb4a1e807b5dd089e1ac8bfc6', - 'upload_date': '20160512', - }, - 'params': { - 'skip_download': True, + 'title': 'TPMP ! Même le matin - Les 35H de Baba - 14/10/2016', + 'description': 'md5:f96736c1b0ffaa96fd5b9e60ad871799', + 'upload_date': '20161014', }, + 'skip': 'Only works from France', }, { - 'url': 'http://www.itele.fr/chroniques/invite-bruce-toussaint/thierry-solere-nicolas-sarkozy-officialisera-sa-candidature-a-la-primaire-quand-il-le-voudra-167224', + 'url': 'http://www.itele.fr/chroniques/invite-michael-darmon/rachida-dati-nicolas-sarkozy-est-le-plus-en-phase-avec-les-inquietudes-des-francais-171510', 'info_dict': { - 'id': '1398334', + 'id': '1420176', + 'display_id': 'rachida-dati-nicolas-sarkozy-est-le-plus-en-phase-avec-les-inquietudes-des-francais-171510', 'ext': 'mp4', - 'title': "L'invité de Bruce Toussaint du 07/06/2016 - ", - 'description': 'md5:40ac7c9ad0feaeb6f605bad986f61324', - 'upload_date': '20160607', - }, - 'params': { - 'skip_download': True, + 'title': 'L\'invité de Michaël Darmon du 14/10/2016 - ', + 'description': 'Chaque matin du lundi au vendredi, Michaël Darmon reçoit un invité politique à 8h25.', + 'upload_date': '20161014', }, }, { 'url': 'http://m.canalplus.fr/?vid=1398231', @@ -95,18 +96,17 @@ class CanalplusIE(InfoExtractor): def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) - video_id = mobj.groupdict().get('id') or mobj.groupdict().get('vid') site_id = self._SITE_ID_MAP[compat_urllib_parse_urlparse(url).netloc.rsplit('.', 2)[-2]] # Beware, some subclasses do not define an id group - display_id = mobj.group('display_id') or video_id + display_id = remove_end(dict_get(mobj.groupdict(), ('display_id', 'id', 'vid')), '.html') - if video_id is None: - webpage = self._download_webpage(url, display_id) - video_id = self._search_regex( - [r']+?videoId=(["\'])(?P\d+)', r'id=["\']canal_video_player(?P\d+)'], - webpage, 'video id', group='id') + webpage = self._download_webpage(url, display_id) + video_id = self._search_regex( + [r']+?videoId=(["\'])(?P\d+)', + r'id=["\']canal_video_player(?P\d+)'], + webpage, 'video id', group='id') info_url = self._VIDEO_INFO_TEMPLATE % (site_id, video_id) video_data = self._download_json(info_url, video_id, 'Downloading video JSON') From 146969e05bc2e2774aa96c62030cdb85ca5c7667 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Fri, 14 Oct 2016 23:42:11 +0800 Subject: [PATCH 0332/1571] [videomore] Support ' + PLAYER_REGEX = r'