From c4c9b8440cd19838a1ef283cc54ebf0630905698 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 4 Nov 2016 05:02:31 +0700 Subject: [PATCH 01/45] [extractor/common] Tolerate malformed RESOLUTION attribute in m3u8 manifests (closes #11113) --- youtube_dl/extractor/common.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 50841f0cf..5f4c984a9 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1280,9 +1280,10 @@ class InfoExtractor(object): } resolution = last_info.get('RESOLUTION') if resolution: - width_str, height_str = resolution.split('x') - f['width'] = int(width_str) - f['height'] = int(height_str) + mobj = re.search(r'(?P\d+)[xX](?P\d+)', resolution) + if mobj: + f['width'] = int(mobj.group('width')) + f['height'] = int(mobj.group('height')) # Unified Streaming Platform mobj = re.search( r'audio.*?(?:%3D|=)(\d+)(?:-video.*?(?:%3D|=)(\d+))?', f['url']) From f93ac1d17571d6ddf9cfb56f0bb51bdef6a04799 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 4 Nov 2016 21:17:56 +0700 Subject: [PATCH 02/45] [anvato] Extract more metadata --- youtube_dl/extractor/anvato.py | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/youtube_dl/extractor/anvato.py b/youtube_dl/extractor/anvato.py index cb29cf111..46ca1899e 100644 --- a/youtube_dl/extractor/anvato.py +++ b/youtube_dl/extractor/anvato.py @@ -157,14 +157,7 @@ class AnvatoIE(InfoExtractor): video_data_url, video_id, transform_source=strip_jsonp, data=json.dumps(payload).encode('utf-8')) - def _extract_anvato_videos(self, webpage, video_id): - anvplayer_data = self._parse_json(self._html_search_regex( - r']+data-anvp=\'([^\']+)\'', webpage, - 'Anvato player data'), video_id) - - video_id = anvplayer_data['video'] - access_key = anvplayer_data['accessKey'] - + def _get_anvato_videos(self, access_key, video_id): video_data = self._get_video_json(access_key, video_id) formats = [] @@ -218,7 +211,19 @@ class AnvatoIE(InfoExtractor): 'formats': formats, 'title': video_data.get('def_title'), 'description': video_data.get('def_description'), + 'tags': video_data.get('def_tags', '').split(','), 'categories': video_data.get('categories'), 'thumbnail': video_data.get('thumbnail'), + 'timestamp': int_or_none(video_data.get( + 'ts_published') or video_data.get('ts_added')), + 'uploader': video_data.get('mcp_id'), + 'duration': int_or_none(video_data.get('duration')), 'subtitles': subtitles, } + + def _extract_anvato_videos(self, webpage, video_id): + anvplayer_data = self._parse_json(self._html_search_regex( + r']+data-anvp=\'([^\']+)\'', webpage, + 'Anvato player data'), video_id) + return self._get_anvato_videos( + anvplayer_data['accessKey'], anvplayer_data['video']) From f3c705f8ec3505240bdfbe622693c3cd2ce10857 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 4 Nov 2016 21:32:30 +0700 Subject: [PATCH 03/45] [fox9] Add extractor (closes #11110) --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/fox9.py | 43 ++++++++++++++++++++++++++++++ 2 files changed, 44 insertions(+) create mode 100644 youtube_dl/extractor/fox9.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 499239a22..d7ad5b8fc 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -296,6 +296,7 @@ from .footyroom import FootyRoomIE from .formula1 import Formula1IE from .fourtube import FourTubeIE from .fox import FOXIE +from .fox9 import FOX9IE from .foxgay import FoxgayIE from .foxnews import ( FoxNewsIE, diff --git a/youtube_dl/extractor/fox9.py b/youtube_dl/extractor/fox9.py new file mode 100644 index 000000000..56d9975d0 --- /dev/null +++ b/youtube_dl/extractor/fox9.py @@ -0,0 +1,43 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .anvato import AnvatoIE +from ..utils import js_to_json + + +class FOX9IE(AnvatoIE): + _VALID_URL = r'https?://(?:www\.)?fox9\.com/(?:[^/]+/)+(?P\d+)-story' + _TESTS = [{ + 'url': 'http://www.fox9.com/news/215123287-story', + 'md5': 'd6e1b2572c3bab8a849c9103615dd243', + 'info_dict': { + 'id': '314473', + 'ext': 'mp4', + 'title': 'Bear climbs tree in downtown Duluth', + 'description': 'md5:6a36bfb5073a411758a752455408ac90', + 'duration': 51, + 'timestamp': 1478123580, + 'upload_date': '20161102', + 'uploader': 'EPFOX', + 'categories': ['News', 'Sports'], + 'tags': ['news', 'video'], + }, + }, { + 'url': 'http://www.fox9.com/news/investigators/214070684-story', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + + webpage = self._download_webpage(url, video_id) + + video_id = self._parse_json( + self._search_regex( + r'AnvatoPlaylist\s*\(\s*(\[.+?\])\s*\)\s*;', + webpage, 'anvato playlist'), + video_id, transform_source=js_to_json)[0]['video'] + + return self._get_anvato_videos( + 'anvato_epfox_app_web_prod_b3373168e12f423f41504f207000188daf88251b', + video_id) From c897af8aacae08852c70fbcec9fa6c42e6e278f7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 4 Nov 2016 21:33:08 +0700 Subject: [PATCH 04/45] [cbslocal] Update test --- youtube_dl/extractor/cbslocal.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/youtube_dl/extractor/cbslocal.py b/youtube_dl/extractor/cbslocal.py index 4bcd104af..289709c97 100644 --- a/youtube_dl/extractor/cbslocal.py +++ b/youtube_dl/extractor/cbslocal.py @@ -22,6 +22,7 @@ class CBSLocalIE(AnvatoIE): 'thumbnail': 're:^https?://.*', 'timestamp': 1463440500, 'upload_date': '20160516', + 'uploader': 'CBS', 'subtitles': { 'en': 'mincount:5', }, @@ -35,6 +36,7 @@ class CBSLocalIE(AnvatoIE): 'Syndication\\Curb.tv', 'Content\\News' ], + 'tags': ['CBS 2 News Evening'], }, }, { # SendtoNews embed From 640aff1d0c3d008f1286bb49b559938a7b5cd65a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 4 Nov 2016 21:45:24 +0700 Subject: [PATCH 05/45] [anvato] Improve formats extraction --- youtube_dl/extractor/anvato.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/anvato.py b/youtube_dl/extractor/anvato.py index 46ca1899e..623f44dce 100644 --- a/youtube_dl/extractor/anvato.py +++ b/youtube_dl/extractor/anvato.py @@ -163,9 +163,10 @@ class AnvatoIE(InfoExtractor): formats = [] for published_url in video_data['published_urls']: video_url = published_url['embed_url'] + media_format = published_url.get('format') ext = determine_ext(video_url) - if ext == 'smil': + if ext == 'smil' or media_format == 'smil': formats.extend(self._extract_smil_formats(video_url, video_id)) continue @@ -176,7 +177,7 @@ class AnvatoIE(InfoExtractor): 'tbr': tbr if tbr != 0 else None, } - if ext == 'm3u8': + if ext == 'm3u8' or media_format in ('m3u8', 'm3u8-variant'): # Not using _extract_m3u8_formats here as individual media # playlists are also included in published_urls. if tbr is None: @@ -187,7 +188,7 @@ class AnvatoIE(InfoExtractor): 'format_id': '-'.join(filter(None, ['hls', compat_str(tbr)])), 'ext': 'mp4', }) - elif ext == 'mp3': + elif ext == 'mp3' or media_format == 'mp3': a_format['vcodec'] = 'none' else: a_format.update({ From 09ffe34b001ed0af411f964305ccb7595a745a99 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 4 Nov 2016 21:59:42 +0700 Subject: [PATCH 06/45] [ChangeLog] Actualize --- ChangeLog | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/ChangeLog b/ChangeLog index ec26e0c8d..75a01388a 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,6 +1,19 @@ version +Core +* [extractor/common] Tolerate malformed RESOLUTION attribute in m3u8 + manifests (#11113) +* [downloader/ism] Fix AVC Decoder Configuration Record + Extractors ++ [fox9] Add support for fox9.com (#11110) ++ [anvato] Extract more metadata and improve formats extraction +* [vodlocker] Improve removed videos detection (#11106) ++ [vzaar] Add support for vzaar.com (#11093) ++ [vice] Add support for uplynk preplay videos (#11101) +* [tubitv] Fix extraction (#11061) ++ [shahid] Add support for authentication (#11091) ++ [radiocanada] Add subtitles support (#11096) + [generic] Add support for ISM manifests From b30e4c275431124453ba4bc67b99d05bbc7ae8b4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 4 Nov 2016 22:07:54 +0700 Subject: [PATCH 07/45] release 2016.11.04 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- docs/supportedsites.md | 2 ++ youtube_dl/version.py | 2 +- 4 files changed, 7 insertions(+), 5 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 975ea8700..f8b195fe7 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.11.02*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.11.02** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.11.04*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.11.04** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2016.11.02 +[debug] youtube-dl version 2016.11.04 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index 75a01388a..8a98f6f7e 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version +version 2016.11.04 Core * [extractor/common] Tolerate malformed RESOLUTION attribute in m3u8 diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 7ed6b9006..e7e452d93 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -247,6 +247,7 @@ - **FootyRoom** - **Formula1** - **FOX** + - **FOX9** - **Foxgay** - **foxnews**: Fox News and Fox Business Video - **foxnews:article** @@ -870,6 +871,7 @@ - **vube**: Vube.com - **VuClip** - **VyboryMos** + - **Vzaar** - **Walla** - **washingtonpost** - **washingtonpost:article** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 7cdd94f29..7ae7f6279 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2016.11.02' +__version__ = '2016.11.04' From de328af36264c35a1af6037b1a39f42d5832887a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 5 Nov 2016 03:24:42 +0700 Subject: [PATCH 08/45] [toutv] Relax _VALID_URL (closes #11121) --- youtube_dl/extractor/toutv.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/toutv.py b/youtube_dl/extractor/toutv.py index d2d5c1171..573f2ff6b 100644 --- a/youtube_dl/extractor/toutv.py +++ b/youtube_dl/extractor/toutv.py @@ -15,11 +15,11 @@ from ..utils import ( class TouTvIE(InfoExtractor): _NETRC_MACHINE = 'toutv' IE_NAME = 'tou.tv' - _VALID_URL = r'https?://ici\.tou\.tv/(?P[a-zA-Z0-9_-]+/S[0-9]+E[0-9]+)' + _VALID_URL = r'https?://ici\.tou\.tv/(?P[a-zA-Z0-9_-]+(?:/S[0-9]+E[0-9]+)?)' _access_token = None _claims = None - _TEST = { + _TESTS = [{ 'url': 'http://ici.tou.tv/garfield-tout-court/S2015E17', 'info_dict': { 'id': '122017', @@ -33,7 +33,10 @@ class TouTvIE(InfoExtractor): 'skip_download': True, }, 'skip': '404 Not Found', - } + }, { + 'url': 'http://ici.tou.tv/hackers', + 'only_matching': True, + }] def _real_initialize(self): email, password = self._get_login_info() From f420902a3b144c94fba449537e474aca0e101112 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 6 Nov 2016 21:11:18 +0700 Subject: [PATCH 09/45] [yahoo] Add another content id regex (closes #11088) --- youtube_dl/extractor/yahoo.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/youtube_dl/extractor/yahoo.py b/youtube_dl/extractor/yahoo.py index 91f0a0dbb..ca92c60c3 100644 --- a/youtube_dl/extractor/yahoo.py +++ b/youtube_dl/extractor/yahoo.py @@ -201,6 +201,19 @@ class YahooIE(InfoExtractor): }, 'skip': 'redirect to https://www.yahoo.com/music', }, + { + # ytwnews://cavideo/ + 'url': 'https://tw.video.yahoo.com/movie-tw/單車天使-中文版預-092316541.html', + 'info_dict': { + 'id': 'ba133ff2-0793-3510-b636-59dfe9ff6cff', + 'ext': 'mp4', + 'title': '單車天使 - 中文版預', + 'description': '中文版預', + }, + 'params': { + 'skip_download': True, + }, + }, ] def _real_extract(self, url): @@ -270,6 +283,7 @@ class YahooIE(InfoExtractor): r'%s[^}]*"ccm_id"\s*:\s*"([^"]+)"' % re.escape(page_id), r']data-uuid=["\']([^"\']+)', r'yahoo://article/view\?.*\buuid=([^&"\']+)', + r']+["\']ytwnews://cavideo/(?:[^/]+/)+([\da-fA-F-]+)[&"\']', ] video_id = self._search_regex( CONTENT_ID_REGEXES, webpage, 'content ID') From b61cd51869d382d19dbd232cc74e010bb2b1ed12 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 6 Nov 2016 21:16:33 +0700 Subject: [PATCH 10/45] [yahoo] Add test and improve some content id regex --- youtube_dl/extractor/yahoo.py | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/yahoo.py b/youtube_dl/extractor/yahoo.py index ca92c60c3..4951414e9 100644 --- a/youtube_dl/extractor/yahoo.py +++ b/youtube_dl/extractor/yahoo.py @@ -201,6 +201,19 @@ class YahooIE(InfoExtractor): }, 'skip': 'redirect to https://www.yahoo.com/music', }, + { + # yahoo://article/ + 'url': 'https://www.yahoo.com/movies/video/true-story-trailer-173000497.html', + 'info_dict': { + 'id': '071c4013-ce30-3a93-a5b2-e0413cd4a9d1', + 'ext': 'mp4', + 'title': "'True Story' Trailer", + 'description': 'True Story', + }, + 'params': { + 'skip_download': True, + }, + }, { # ytwnews://cavideo/ 'url': 'https://tw.video.yahoo.com/movie-tw/單車天使-中文版預-092316541.html', @@ -282,7 +295,7 @@ class YahooIE(InfoExtractor): r'"first_videoid"\s*:\s*"([^"]+)"', r'%s[^}]*"ccm_id"\s*:\s*"([^"]+)"' % re.escape(page_id), r']data-uuid=["\']([^"\']+)', - r'yahoo://article/view\?.*\buuid=([^&"\']+)', + r']+yahoo://article/view\?.*\buuid=([^&"\']+)', r']+["\']ytwnews://cavideo/(?:[^/]+/)+([\da-fA-F-]+)[&"\']', ] video_id = self._search_regex( From 519d8970496125bca8a7067d841e5c5e5263c26d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 6 Nov 2016 21:28:51 +0700 Subject: [PATCH 11/45] [drtuber] Add support for embed URLs --- youtube_dl/extractor/drtuber.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/drtuber.py b/youtube_dl/extractor/drtuber.py index e8870c460..8baad18f6 100644 --- a/youtube_dl/extractor/drtuber.py +++ b/youtube_dl/extractor/drtuber.py @@ -10,8 +10,8 @@ from ..utils import ( class DrTuberIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?drtuber\.com/video/(?P\d+)/(?P[\w-]+)' - _TEST = { + _VALID_URL = r'https?://(?:www\.)?drtuber\.com/(?:video|embed)/(?P\d+)(?:/(?P[\w-]+))?' + _TESTS = [{ 'url': 'http://www.drtuber.com/video/1740434/hot-perky-blonde-naked-golf', 'md5': '93e680cf2536ad0dfb7e74d94a89facd', 'info_dict': { @@ -25,14 +25,18 @@ class DrTuberIE(InfoExtractor): 'thumbnail': 're:https?://.*\.jpg$', 'age_limit': 18, } - } + }, { + 'url': 'http://www.drtuber.com/embed/489939', + 'only_matching': True, + }] def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) video_id = mobj.group('id') - display_id = mobj.group('display_id') + display_id = mobj.group('display_id') or video_id - webpage = self._download_webpage(url, display_id) + webpage = self._download_webpage( + 'http://www.drtuber.com/video/%s' % video_id, display_id) video_url = self._html_search_regex( r' Date: Sun, 6 Nov 2016 21:29:15 +0700 Subject: [PATCH 12/45] [drtuber] Fix title extraction --- youtube_dl/extractor/drtuber.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/drtuber.py b/youtube_dl/extractor/drtuber.py index 8baad18f6..95ecef660 100644 --- a/youtube_dl/extractor/drtuber.py +++ b/youtube_dl/extractor/drtuber.py @@ -42,7 +42,7 @@ class DrTuberIE(InfoExtractor): r']*>

([^<]+)<', + (r'class="title_watch"[^>]*><(?:p|h\d+)[^>]*>([^<]+)<', r']+class="title_substrate">([^<]+)

', r'([^<]+) - \d+'), webpage, 'title') From 37e7a71c6c777635c4ed02339339ff27ccb04f58 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 6 Nov 2016 21:33:51 +0700 Subject: [PATCH 13/45] [extractor/generic] Add support for drtuber embds (closes #11098) --- youtube_dl/extractor/drtuber.py | 6 ++++++ youtube_dl/extractor/generic.py | 6 ++++++ 2 files changed, 12 insertions(+) diff --git a/youtube_dl/extractor/drtuber.py b/youtube_dl/extractor/drtuber.py index 95ecef660..22da8e481 100644 --- a/youtube_dl/extractor/drtuber.py +++ b/youtube_dl/extractor/drtuber.py @@ -30,6 +30,12 @@ class DrTuberIE(InfoExtractor): 'only_matching': True, }] + @staticmethod + def _extract_urls(webpage): + return re.findall( + r'<iframe[^>]+?src=["\'](?P<url>(?:https?:)?//(?:www\.)?drtuber\.com/embed/\d+)', + webpage) + def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) video_id = mobj.group('id') diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index a0a45dce0..5c2782754 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -47,6 +47,7 @@ from .svt import SVTIE from .pornhub import PornHubIE from .xhamster import XHamsterEmbedIE from .tnaflix import TNAFlixNetworkEmbedIE +from .drtuber import DrTuberIE from .vimeo import VimeoIE from .dailymotion import ( DailymotionIE, @@ -1996,6 +1997,11 @@ class GenericIE(InfoExtractor): if tnaflix_urls: return _playlist_from_matches(tnaflix_urls, ie=TNAFlixNetworkEmbedIE.ie_key()) + # Look for embedded DrTuber player + drtuber_urls = DrTuberIE._extract_urls(webpage) + if drtuber_urls: + return _playlist_from_matches(drtuber_urls, ie=DrTuberIE.ie_key()) + # Look for embedded Tvigle player mobj = re.search( r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//cloud\.tvigle\.ru/video/.+?)\1', webpage) From 5021ca6c13e3d011dc24ecf38d326e3a59e726a1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 6 Nov 2016 21:39:29 +0700 Subject: [PATCH 14/45] [redtube] Add support for embed URLs --- youtube_dl/extractor/redtube.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/redtube.py b/youtube_dl/extractor/redtube.py index 721fc3a9e..7d9285ffb 100644 --- a/youtube_dl/extractor/redtube.py +++ b/youtube_dl/extractor/redtube.py @@ -10,8 +10,8 @@ from ..utils import ( class RedTubeIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?redtube\.com/(?P<id>[0-9]+)' - _TEST = { + _VALID_URL = r'https?://(?:(?:www\.)?redtube\.com/|embed\.redtube\.com/\?.*?\bid=)(?P<id>[0-9]+)' + _TESTS = [{ 'url': 'http://www.redtube.com/66418', 'md5': '7b8c22b5e7098a3e1c09709df1126d2d', 'info_dict': { @@ -23,11 +23,15 @@ class RedTubeIE(InfoExtractor): 'view_count': int, 'age_limit': 18, } - } + }, { + 'url': 'http://embed.redtube.com/?bgcolor=000000&id=1443286', + 'only_matching': True, + }] def _real_extract(self, url): video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) + webpage = self._download_webpage( + 'http://www.redtube.com/%s' % video_id, video_id) if any(s in webpage for s in ['video-deleted-info', '>This video has been removed']): raise ExtractorError('Video %s has been removed' % video_id, expected=True) From e28ed498e64545f02f2d3dbccf97ecf0e47aa82a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 6 Nov 2016 21:42:41 +0700 Subject: [PATCH 15/45] [extractor/generic] Add support for redtube embds (closes #11099) --- youtube_dl/extractor/generic.py | 6 ++++++ youtube_dl/extractor/redtube.py | 8 ++++++++ 2 files changed, 14 insertions(+) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 5c2782754..b1315a9c8 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -48,6 +48,7 @@ from .pornhub import PornHubIE from .xhamster import XHamsterEmbedIE from .tnaflix import TNAFlixNetworkEmbedIE from .drtuber import DrTuberIE +from .redtube import RedTubeIE from .vimeo import VimeoIE from .dailymotion import ( DailymotionIE, @@ -2002,6 +2003,11 @@ class GenericIE(InfoExtractor): if drtuber_urls: return _playlist_from_matches(drtuber_urls, ie=DrTuberIE.ie_key()) + # Look for embedded RedTube player + redtube_urls = RedTubeIE._extract_urls(webpage) + if redtube_urls: + return _playlist_from_matches(redtube_urls, ie=RedTubeIE.ie_key()) + # Look for embedded Tvigle player mobj = re.search( r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//cloud\.tvigle\.ru/video/.+?)\1', webpage) diff --git a/youtube_dl/extractor/redtube.py b/youtube_dl/extractor/redtube.py index 7d9285ffb..c367a6ae7 100644 --- a/youtube_dl/extractor/redtube.py +++ b/youtube_dl/extractor/redtube.py @@ -1,5 +1,7 @@ from __future__ import unicode_literals +import re + from .common import InfoExtractor from ..utils import ( ExtractorError, @@ -28,6 +30,12 @@ class RedTubeIE(InfoExtractor): 'only_matching': True, }] + @staticmethod + def _extract_urls(webpage): + return re.findall( + r'<iframe[^>]+?src=["\'](?P<url>(?:https?:)?//embed\.redtube\.com/\?.*?\bid=\d+)', + webpage) + def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage( From b52c9ef1655042688a4822d241af398592b951f9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 6 Nov 2016 21:52:00 +0700 Subject: [PATCH 16/45] [extractor/generic] Improve support for pornhub embeds (closes #11100) --- youtube_dl/extractor/generic.py | 10 +++++----- youtube_dl/extractor/pornhub.py | 13 ++++++------- 2 files changed, 11 insertions(+), 12 deletions(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index b1315a9c8..bde65fa27 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -1983,11 +1983,6 @@ class GenericIE(InfoExtractor): if sportbox_urls: return _playlist_from_matches(sportbox_urls, ie='SportBoxEmbed') - # Look for embedded PornHub player - pornhub_url = PornHubIE._extract_url(webpage) - if pornhub_url: - return self.url_result(pornhub_url, 'PornHub') - # Look for embedded XHamster player xhamster_urls = XHamsterEmbedIE._extract_urls(webpage) if xhamster_urls: @@ -1998,6 +1993,11 @@ class GenericIE(InfoExtractor): if tnaflix_urls: return _playlist_from_matches(tnaflix_urls, ie=TNAFlixNetworkEmbedIE.ie_key()) + # Look for embedded PornHub player + pornhub_urls = PornHubIE._extract_urls(webpage) + if pornhub_urls: + return _playlist_from_matches(pornhub_urls, ie=PornHubIE.ie_key()) + # Look for embedded DrTuber player drtuber_urls = DrTuberIE._extract_urls(webpage) if drtuber_urls: diff --git a/youtube_dl/extractor/pornhub.py b/youtube_dl/extractor/pornhub.py index 0724efc09..40dbe6967 100644 --- a/youtube_dl/extractor/pornhub.py +++ b/youtube_dl/extractor/pornhub.py @@ -33,7 +33,7 @@ class PornHubIE(InfoExtractor): (?:[a-z]+\.)?pornhub\.com/(?:view_video\.php\?viewkey=|embed/)| (?:www\.)?thumbzilla\.com/video/ ) - (?P<id>[0-9a-z]+) + (?P<id>[\da-z]+) ''' _TESTS = [{ 'url': 'http://www.pornhub.com/view_video.php?viewkey=648719015', @@ -96,12 +96,11 @@ class PornHubIE(InfoExtractor): 'only_matching': True, }] - @classmethod - def _extract_url(cls, webpage): - mobj = re.search( - r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?pornhub\.com/embed/\d+)\1', webpage) - if mobj: - return mobj.group('url') + @staticmethod + def _extract_urls(webpage): + return re.findall( + r'<iframe[^>]+?src=["\'](?P<url>(?:https?:)?//(?:www\.)?pornhub\.com/embed/[\da-z]+)', + webpage) def _extract_count(self, pattern, webpage, name): return str_to_int(self._search_regex( From 98708e6cbdc8b94723b30341b714a94905c70c7c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 6 Nov 2016 23:20:15 +0700 Subject: [PATCH 17/45] [ard] Remove age restriction check (closes #11129) --- youtube_dl/extractor/ard.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/youtube_dl/extractor/ard.py b/youtube_dl/extractor/ard.py index 95ada0274..35f3656f1 100644 --- a/youtube_dl/extractor/ard.py +++ b/youtube_dl/extractor/ard.py @@ -178,8 +178,6 @@ class ARDMediathekIE(InfoExtractor): ('>Leider liegt eine Störung vor.', 'Video %s is unavailable'), ('>Der gewünschte Beitrag ist nicht mehr verfügbar.<', 'Video %s is no longer available'), - ('Diese Sendung ist für Jugendliche unter 12 Jahren nicht geeignet. Der Clip ist deshalb nur von 20 bis 6 Uhr verfügbar.', - 'This program is only suitable for those aged 12 and older. Video %s is therefore only available between 8 pm and 6 am.'), ) for pattern, message in ERRORS: From cb882540e85ea01e83745f24454ebee733044d4f Mon Sep 17 00:00:00 2001 From: DarkZeros <mailszeros@gmail.com> Date: Tue, 4 Oct 2016 02:04:24 +0100 Subject: [PATCH 18/45] [mitele] Fix extraction after website redesign (fixes #10824) --- ChangeLog | 6 +++ youtube_dl/extractor/mitele.py | 93 +++++++++++++++++++--------------- youtube_dl/extractor/ooyala.py | 7 +-- 3 files changed, 63 insertions(+), 43 deletions(-) diff --git a/ChangeLog b/ChangeLog index 8a98f6f7e..7ca72b3d7 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +version <unreleased> + +Extractors +* [mitele] Fix extraction after website redesign (#10824) + + version 2016.11.04 Core diff --git a/youtube_dl/extractor/mitele.py b/youtube_dl/extractor/mitele.py index 2294745d4..c41ab1e91 100644 --- a/youtube_dl/extractor/mitele.py +++ b/youtube_dl/extractor/mitele.py @@ -1,19 +1,20 @@ # coding: utf-8 from __future__ import unicode_literals -import re +import uuid from .common import InfoExtractor from ..compat import ( + compat_str, compat_urllib_parse_urlencode, compat_urlparse, ) from ..utils import ( - get_element_by_attribute, int_or_none, - remove_start, extract_attributes, determine_ext, + smuggle_url, + parse_duration, ) @@ -72,16 +73,14 @@ class MiTeleBaseIE(InfoExtractor): } -class MiTeleIE(MiTeleBaseIE): +class MiTeleIE(InfoExtractor): IE_DESC = 'mitele.es' - _VALID_URL = r'https?://(?:www\.)?mitele\.es/(?:[^/]+/){3}(?P<id>[^/]+)/' + _VALID_URL = r'https?://(?:www\.)?mitele\.es/programas-tv/(?:[^/]+/)(?P<id>[^/]+)/player' _TESTS = [{ - 'url': 'http://www.mitele.es/programas-tv/diario-de/la-redaccion/programa-144/', - # MD5 is unstable + 'url': 'http://www.mitele.es/programas-tv/diario-de/57b0dfb9c715da65618b4afa/player', 'info_dict': { - 'id': '0NF1jJnxS1Wu3pHrmvFyw2', - 'display_id': 'programa-144', + 'id': '57b0dfb9c715da65618b4afa', 'ext': 'mp4', 'title': 'Tor, la web invisible', 'description': 'md5:3b6fce7eaa41b2d97358726378d9369f', @@ -91,57 +90,71 @@ class MiTeleIE(MiTeleBaseIE): 'thumbnail': 're:(?i)^https?://.*\.jpg$', 'duration': 2913, }, + 'add_ie': ['Ooyala'], }, { # no explicit title - 'url': 'http://www.mitele.es/programas-tv/cuarto-milenio/temporada-6/programa-226/', + 'url': 'http://www.mitele.es/programas-tv/cuarto-milenio/57b0de3dc915da14058b4876/player', 'info_dict': { - 'id': 'eLZSwoEd1S3pVyUm8lc6F', - 'display_id': 'programa-226', + 'id': '57b0de3dc915da14058b4876', 'ext': 'mp4', - 'title': 'Cuarto Milenio - Temporada 6 - Programa 226', - 'description': 'md5:50daf9fadefa4e62d9fc866d0c015701', + 'title': 'Cuarto Milenio Temporada 6 Programa 226', + 'description': 'md5:5ff132013f0cd968ffbf1f5f3538a65f', 'series': 'Cuarto Milenio', 'season': 'Temporada 6', 'episode': 'Programa 226', 'thumbnail': 're:(?i)^https?://.*\.jpg$', - 'duration': 7312, + 'duration': 7313, }, 'params': { 'skip_download': True, }, + 'add_ie': ['Ooyala'], }] def _real_extract(self, url): - display_id = self._match_id(url) + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) - webpage = self._download_webpage(url, display_id) + gigya_url = self._search_regex(r'<gigya-api>[^>]*</gigya-api>[^>]*<script\s*src="([^"]*)">[^>]*</script>', webpage, 'gigya', default=None) + gigya_sc = self._download_webpage(compat_urlparse.urljoin(r'http://www.mitele.es/', gigya_url), video_id, 'Downloading gigya script') + # Get a appKey/uuid for getting the session key + appKey_var = self._search_regex(r'value\("appGridApplicationKey",([0-9a-f]+)\)', gigya_sc, 'appKey variable') + appKey = self._search_regex(r'var %s="([0-9a-f]+)"' % appKey_var, gigya_sc, 'appKey') + uid = compat_str(uuid.uuid4()) + session_url = 'https://appgrid-api.cloud.accedo.tv/session?appKey=%s&uuid=%s' % (appKey, uid) + session_json = self._download_json(session_url, video_id, 'Downloading session keys') + sessionKey = compat_str(session_json['sessionKey']) - info = self._get_player_info(url, webpage) + paths_url = 'https://appgrid-api.cloud.accedo.tv/metadata/general_configuration,%20web_configuration?sessionKey=' + sessionKey + paths = self._download_json(paths_url, video_id, 'Downloading paths JSON') + ooyala_s = paths['general_configuration']['api_configuration']['ooyala_search'] + data_p = ( + 'http://' + ooyala_s['base_url'] + ooyala_s['full_path'] + ooyala_s['provider_id'] + + '/docs/' + video_id + '?include_titles=Series,Season&product_name=test&format=full') + data = self._download_json(data_p, video_id, 'Downloading data JSON') + source = data['hits']['hits'][0]['_source'] + embedCode = source['offers'][0]['embed_codes'][0] - title = self._search_regex( - r'class="Destacado-text"[^>]*>\s*<strong>([^<]+)</strong>', - webpage, 'title', default=None) + titles = source['localizable_titles'][0] + title = titles.get('title_medium') or titles['title_long'] + episode = titles['title_sort_name'] + description = titles['summary_long'] + titles_series = source['localizable_titles_series'][0] + series = titles_series['title_long'] + titles_season = source['localizable_titles_season'][0] + season = titles_season['title_medium'] + duration = parse_duration(source['videos'][0]['duration']) - mobj = re.search(r'''(?sx) - class="Destacado-text"[^>]*>.*?<h1>\s* - <span>(?P<series>[^<]+)</span>\s* - <span>(?P<season>[^<]+)</span>\s* - <span>(?P<episode>[^<]+)</span>''', webpage) - series, season, episode = mobj.groups() if mobj else [None] * 3 - - if not title: - if mobj: - title = '%s - %s - %s' % (series, season, episode) - else: - title = remove_start(self._search_regex( - r'<title>([^<]+)', webpage, 'title'), 'Ver online ') - - info.update({ - 'display_id': display_id, + return { + '_type': 'url_transparent', + # for some reason only HLS is supported + 'url': smuggle_url('ooyala:' + embedCode, {'supportedformats': 'm3u8'}), + 'id': video_id, 'title': title, - 'description': get_element_by_attribute('class', 'text', webpage), + 'description': description, 'series': series, 'season': season, 'episode': episode, - }) - return info + 'duration': duration, + 'thumbnail': source['images'][0]['url'], + } diff --git a/youtube_dl/extractor/ooyala.py b/youtube_dl/extractor/ooyala.py index 72ec20938..c2807d0f6 100644 --- a/youtube_dl/extractor/ooyala.py +++ b/youtube_dl/extractor/ooyala.py @@ -18,7 +18,7 @@ class OoyalaBaseIE(InfoExtractor): _CONTENT_TREE_BASE = _PLAYER_BASE + 'player_api/v1/content_tree/' _AUTHORIZATION_URL_TEMPLATE = _PLAYER_BASE + 'sas/player_api/v2/authorization/embed_code/%s/%s?' - def _extract(self, content_tree_url, video_id, domain='example.org'): + def _extract(self, content_tree_url, video_id, domain='example.org', supportedformats=None): content_tree = self._download_json(content_tree_url, video_id)['content_tree'] metadata = content_tree[list(content_tree)[0]] embed_code = metadata['embed_code'] @@ -29,7 +29,7 @@ class OoyalaBaseIE(InfoExtractor): self._AUTHORIZATION_URL_TEMPLATE % (pcode, embed_code) + compat_urllib_parse_urlencode({ 'domain': domain, - 'supportedFormats': 'mp4,rtmp,m3u8,hds', + 'supportedFormats': supportedformats or 'mp4,rtmp,m3u8,hds', }), video_id) cur_auth_data = auth_data['authorization_data'][embed_code] @@ -145,8 +145,9 @@ class OoyalaIE(OoyalaBaseIE): url, smuggled_data = unsmuggle_url(url, {}) embed_code = self._match_id(url) domain = smuggled_data.get('domain') + supportedformats = smuggled_data.get('supportedformats') content_tree_url = self._CONTENT_TREE_BASE + 'embed_code/%s/%s' % (embed_code, embed_code) - return self._extract(content_tree_url, embed_code, domain) + return self._extract(content_tree_url, embed_code, domain, supportedformats) class OoyalaExternalIE(OoyalaBaseIE): From 97726317ac8e905dc72e75c7c2a823280c51af00 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 7 Nov 2016 23:53:22 +0700 Subject: [PATCH 19/45] [README.md] Mention HTTP headers and alternative way to obtain cookies and headers in -g FAQ --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 0f4088adc..98e374420 100644 --- a/README.md +++ b/README.md @@ -758,7 +758,7 @@ Once the video is fully downloaded, use any video player, such as [mpv](https:// ### I extracted a video URL with `-g`, but it does not play on another machine / in my webbrowser. -It depends a lot on the service. In many cases, requests for the video (to download/play it) must come from the same IP address and with the same cookies. Use the `--cookies` option to write the required cookies into a file, and advise your downloader to read cookies from that file. Some sites also require a common user agent to be used, use `--dump-user-agent` to see the one in use by youtube-dl. +It depends a lot on the service. In many cases, requests for the video (to download/play it) must come from the same IP address and with the same cookies and/or HTTP headers. Use the `--cookies` option to write the required cookies into a file, and advise your downloader to read cookies from that file. Some sites also require a common user agent to be used, use `--dump-user-agent` to see the one in use by youtube-dl. You can also get necessary cookies and HTTP headers from JSON output obtained with `--dump-json`. It may be beneficial to use IPv6; in some cases, the restrictions are only applied to IPv4. Some services (sometimes only for a subset of videos) do not restrict the video URL by IP address, cookie, or user-agent, but these are the exception rather than the rule. From ebc7ab1e231483f189290608425a23590cae6af9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 8 Nov 2016 00:29:12 +0700 Subject: [PATCH 20/45] [espn] Fix extraction (closes #11041) --- youtube_dl/extractor/espn.py | 126 ++++++++++++++++++++++++++--------- 1 file changed, 94 insertions(+), 32 deletions(-) diff --git a/youtube_dl/extractor/espn.py b/youtube_dl/extractor/espn.py index 6d10f8e68..8795e0ddf 100644 --- a/youtube_dl/extractor/espn.py +++ b/youtube_dl/extractor/espn.py @@ -1,38 +1,117 @@ from __future__ import unicode_literals from .common import InfoExtractor -from ..utils import remove_end +from ..compat import compat_str +from ..utils import ( + determine_ext, + int_or_none, + unified_timestamp, +) class ESPNIE(InfoExtractor): - _VALID_URL = r'https?://(?:espn\.go|(?:www\.)?espn)\.com/(?:[^/]+/)*(?P[^/]+)' + _VALID_URL = r'https?://(?:espn\.go|(?:www\.)?espn)\.com/video/clip(?:\?.*?\bid=|/_/id/)(?P\d+)' _TESTS = [{ 'url': 'http://espn.go.com/video/clip?id=10365079', - 'md5': '60e5d097a523e767d06479335d1bdc58', 'info_dict': { - 'id': 'FkYWtmazr6Ed8xmvILvKLWjd4QvYZpzG', + 'id': '10365079', 'ext': 'mp4', 'title': '30 for 30 Shorts: Judging Jewell', - 'description': None, + 'description': 'md5:39370c2e016cb4ecf498ffe75bef7f0f', + 'timestamp': 1390936111, + 'upload_date': '20140128', }, 'params': { 'skip_download': True, }, - 'add_ie': ['OoyalaExternal'], }, { # intl video, from http://www.espnfc.us/video/mls-highlights/150/video/2743663/must-see-moments-best-of-the-mls-season 'url': 'http://espn.go.com/video/clip?id=2743663', - 'md5': 'f4ac89b59afc7e2d7dbb049523df6768', 'info_dict': { - 'id': '50NDFkeTqRHB0nXBOK-RGdSG5YQPuxHg', + 'id': '2743663', 'ext': 'mp4', 'title': 'Must-See Moments: Best of the MLS season', + 'description': 'md5:4c2d7232beaea572632bec41004f0aeb', + 'timestamp': 1449446454, + 'upload_date': '20151207', }, 'params': { 'skip_download': True, }, - 'add_ie': ['OoyalaExternal'], + 'expected_warnings': ['Unable to download f4m manifest'], }, { + 'url': 'http://www.espn.com/video/clip?id=10365079', + 'only_matching': True, + }, { + 'url': 'http://www.espn.com/video/clip/_/id/17989860', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + + clip = self._download_json( + 'http://api-app.espn.com/v1/video/clips/%s' % video_id, + video_id)['videos'][0] + + title = clip['headline'] + + format_urls = set() + formats = [] + + def traverse_source(source, base_source_id=None): + for source_id, source in source.items(): + if isinstance(source, compat_str): + extract_source(source, base_source_id) + elif isinstance(source, dict): + traverse_source( + source, + '%s-%s' % (base_source_id, source_id) + if base_source_id else source_id) + + def extract_source(source_url, source_id=None): + if source_url in format_urls: + return + format_urls.add(source_url) + ext = determine_ext(source_url) + if ext == 'smil': + formats.extend(self._extract_smil_formats( + source_url, video_id, fatal=False)) + elif ext == 'f4m': + formats.extend(self._extract_f4m_formats( + source_url, video_id, f4m_id=source_id, fatal=False)) + elif ext == 'm3u8': + formats.extend(self._extract_m3u8_formats( + source_url, video_id, 'mp4', entry_protocol='m3u8_native', + m3u8_id=source_id, fatal=False)) + else: + formats.append({ + 'url': source_url, + 'format_id': source_id, + }) + + traverse_source(clip['links']['source']) + self._sort_formats(formats) + + description = clip.get('caption') or clip.get('description') + thumbnail = clip.get('thumbnail') + duration = int_or_none(clip.get('duration')) + timestamp = unified_timestamp(clip.get('originalPublishDate')) + + return { + 'id': video_id, + 'title': title, + 'description': description, + 'thumbnail': thumbnail, + 'timestamp': timestamp, + 'duration': duration, + 'formats': formats, + } + + +class ESPNArticleIE(InfoExtractor): + _VALID_URL = r'https?://(?:espn\.go|(?:www\.)?espn)\.com/(?:[^/]+/)*(?P[^/]+)' + _TESTS = [{ 'url': 'https://espn.go.com/video/iframe/twitter/?cms=espn&id=10365079', 'only_matching': True, }, { @@ -47,11 +126,12 @@ class ESPNIE(InfoExtractor): }, { 'url': 'http://espn.go.com/nba/playoffs/2015/story/_/id/12887571/john-wall-washington-wizards-no-swelling-left-hand-wrist-game-5-return', 'only_matching': True, - }, { - 'url': 'http://www.espn.com/video/clip?id=10365079', - 'only_matching': True, }] + @classmethod + def suitable(cls, url): + return False if ESPNIE.suitable(url) else super(ESPNArticleIE, cls).suitable(url) + def _real_extract(self, url): video_id = self._match_id(url) @@ -61,23 +141,5 @@ class ESPNIE(InfoExtractor): r'class=(["\']).*?video-play-button.*?\1[^>]+data-id=["\'](?P\d+)', webpage, 'video id', group='id') - cms = 'espn' - if 'data-source="intl"' in webpage: - cms = 'intl' - player_url = 'https://espn.go.com/video/iframe/twitter/?id=%s&cms=%s' % (video_id, cms) - player = self._download_webpage( - player_url, video_id) - - pcode = self._search_regex( - r'["\']pcode=([^"\']+)["\']', player, 'pcode') - - title = remove_end( - self._og_search_title(webpage), - '- ESPN Video').strip() - - return { - '_type': 'url_transparent', - 'url': 'ooyalaexternal:%s:%s:%s' % (cms, video_id, pcode), - 'ie_key': 'OoyalaExternal', - 'title': title, - } + return self.url_result( + 'http://espn.go.com/video/clip?id=%s' % video_id, ESPNIE.ie_key()) From 5d47b38cf5acb252e126ebdd81a21b5035256bed Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Tue, 8 Nov 2016 21:53:41 +0800 Subject: [PATCH 21/45] [tmz:article] Fix extraction (closes #11052) --- ChangeLog | 1 + youtube_dl/extractor/tmz.py | 16 ++++++++-------- 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/ChangeLog b/ChangeLog index 7ca72b3d7..78c78afcd 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,6 +1,7 @@ version Extractors +* [tmz:article] Fix extraction (#11052) * [mitele] Fix extraction after website redesign (#10824) diff --git a/youtube_dl/extractor/tmz.py b/youtube_dl/extractor/tmz.py index 979856e9a..419f9d92e 100644 --- a/youtube_dl/extractor/tmz.py +++ b/youtube_dl/extractor/tmz.py @@ -32,12 +32,15 @@ class TMZArticleIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?tmz\.com/\d{4}/\d{2}/\d{2}/(?P[^/]+)/?' _TEST = { 'url': 'http://www.tmz.com/2015/04/19/bobby-brown-bobbi-kristina-awake-video-concert', - 'md5': 'e482a414a38db73087450e3a6ce69d00', + 'md5': '3316ff838ae5bb7f642537825e1e90d2', 'info_dict': { 'id': '0_6snoelag', - 'ext': 'mp4', + 'ext': 'mov', 'title': 'Bobby Brown Tells Crowd ... Bobbi Kristina is Awake', 'description': 'Bobby Brown stunned his audience during a concert Saturday night, when he told the crowd, "Bobbi is awake. She\'s watching me."', + 'timestamp': 1429467813, + 'upload_date': '20150419', + 'uploader_id': 'batchUser', } } @@ -45,12 +48,9 @@ class TMZArticleIE(InfoExtractor): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - embedded_video_info_str = self._html_search_regex( - r'tmzVideoEmbedV2\("([^)]+)"\);', webpage, 'embedded video info') - - embedded_video_info = self._parse_json( - embedded_video_info_str, video_id, - transform_source=lambda s: s.replace('\\', '')) + embedded_video_info = self._parse_json(self._html_search_regex( + r'tmzVideoEmbed\(({.+?})\);', webpage, 'embedded video info'), + video_id) return self.url_result( 'http://www.tmz.com/videos/%s/' % embedded_video_info['id']) From f700afa24c802c2a157a67e00c874679678d5062 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 8 Nov 2016 22:09:03 +0700 Subject: [PATCH 22/45] [ChangeLog] Actualize --- ChangeLog | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/ChangeLog b/ChangeLog index 78c78afcd..e61585904 100644 --- a/ChangeLog +++ b/ChangeLog @@ -2,7 +2,16 @@ version Extractors * [tmz:article] Fix extraction (#11052) +* [espn] Fix extraction (#11041) * [mitele] Fix extraction after website redesign (#10824) +- [ard] Remove age restriction check (#11129) +* [generic] Improve support for pornhub.com embeds (#11100) ++ [generic] Add support for redtube.com embeds (#11099) ++ [generic] Add support for drtuber.com embeds (#11098) ++ [redtube] Add support for embed URLs ++ [drtuber] Add support for embed URLs ++ [yahoo] Improve content id extraction (#11088) +* [toutv] Relax URL regular expression (#11121) version 2016.11.04 From c58e07a7aae2af4f6fdd503cc0749d6ee9bdc908 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 8 Nov 2016 22:11:21 +0700 Subject: [PATCH 23/45] release 2016.11.08 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- youtube_dl/version.py | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index f8b195fe7..f871f37d9 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.11.04*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.11.04** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.11.08*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.11.08** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2016.11.04 +[debug] youtube-dl version 2016.11.08 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index e61585904..5f1564c81 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version +version 2016.11.08 Extractors * [tmz:article] Fix extraction (#11052) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 7ae7f6279..b2ee2f345 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2016.11.04' +__version__ = '2016.11.08' From 9946aa5ccfe0b944f36cf18f41fcf4db28da8a6f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 8 Nov 2016 22:26:33 +0700 Subject: [PATCH 24/45] [franceculture] Fix extraction (closes #11140) --- youtube_dl/extractor/franceculture.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/franceculture.py b/youtube_dl/extractor/franceculture.py index 186da0d3b..56048ffc2 100644 --- a/youtube_dl/extractor/franceculture.py +++ b/youtube_dl/extractor/franceculture.py @@ -29,7 +29,7 @@ class FranceCultureIE(InfoExtractor): webpage = self._download_webpage(url, display_id) video_url = self._search_regex( - r'(?s)]+class="[^"]*?title-zone-diffusion[^"]*?"[^>]*>.*?]+href="([^"]+)"', + r'(?s)]+class="[^"]*?title-zone-diffusion[^"]*?"[^>]*>.*?]+data-asset-source="([^"]+)"', webpage, 'video path') title = self._og_search_title(webpage) @@ -38,7 +38,7 @@ class FranceCultureIE(InfoExtractor): '(?s)]+class="date"[^>]*>.*?]+class="inner"[^>]*>([^<]+)<', webpage, 'upload date', fatal=False)) thumbnail = self._search_regex( - r'(?s)]+itemtype="https://schema.org/ImageObject"[^>]*>.*?]+data-pagespeed-(?:lazy|high-res)-src="([^"]+)"', + r'(?s)]+itemtype="https://schema.org/ImageObject"[^>]*>.*?]+data-dejavu-src="([^"]+)"', webpage, 'thumbnail', fatal=False) uploader = self._html_search_regex( r'(?s)
(.*?)', From 4719af097c47f4e28f4a16eb86275caf35552dfe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 8 Nov 2016 22:27:02 +0700 Subject: [PATCH 25/45] [extractors] Add forgotten import for espn:article --- youtube_dl/extractor/extractors.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index d7ad5b8fc..578359a5e 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -267,7 +267,10 @@ from .engadget import EngadgetIE from .eporner import EpornerIE from .eroprofile import EroProfileIE from .escapist import EscapistIE -from .espn import ESPNIE +from .espn import ( + ESPNIE, + ESPNArticleIE, +) from .esri import EsriVideoIE from .europa import EuropaIE from .everyonesmixtape import EveryonesMixtapeIE From 6590925c2759f4ea33ded5e5bcb76a0ad8f3d2d7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 8 Nov 2016 22:29:16 +0700 Subject: [PATCH 26/45] [ChangeLog] Actualize --- ChangeLog | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/ChangeLog b/ChangeLog index 5f1564c81..c33ab4ec6 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,10 @@ +version + +Extractors +* [espn:article] Fix support for espn.com articles +* [franceculture] Fix extraction (#11140) + + version 2016.11.08 Extractors From db3367f43ee607364da493191acd745da889e3ef Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 8 Nov 2016 22:30:53 +0700 Subject: [PATCH 27/45] release 2016.11.08.1 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- docs/supportedsites.md | 1 + youtube_dl/version.py | 2 +- 4 files changed, 6 insertions(+), 5 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index f871f37d9..bfae97ddd 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.11.08*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.11.08** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.11.08.1*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.11.08.1** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2016.11.08 +[debug] youtube-dl version 2016.11.08.1 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index c33ab4ec6..21b212e86 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version +version 2016.11.08.1 Extractors * [espn:article] Fix support for espn.com articles diff --git a/docs/supportedsites.md b/docs/supportedsites.md index e7e452d93..77832504a 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -225,6 +225,7 @@ - **EroProfile** - **Escapist** - **ESPN** + - **ESPNArticle** - **EsriVideo** - **Europa** - **EveryonesMixtape** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index b2ee2f345..69df88c6e 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2016.11.08' +__version__ = '2016.11.08.1' From 3eaaa8abace00d22a85a8b0b2c8ae1d6c4d52781 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Thu, 10 Nov 2016 14:52:34 +0800 Subject: [PATCH 28/45] [audioboom] Recognize /posts/ URLs (closes #11149) --- ChangeLog | 6 ++++++ youtube_dl/extractor/audioboom.py | 9 ++++++--- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/ChangeLog b/ChangeLog index 21b212e86..d97156e20 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +version + +Extractors ++ [audioboom] Recognize /posts/ URLs (#11149) + + version 2016.11.08.1 Extractors diff --git a/youtube_dl/extractor/audioboom.py b/youtube_dl/extractor/audioboom.py index 2ec2d7092..d7d1c6306 100644 --- a/youtube_dl/extractor/audioboom.py +++ b/youtube_dl/extractor/audioboom.py @@ -6,8 +6,8 @@ from ..utils import float_or_none class AudioBoomIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?audioboom\.com/boos/(?P[0-9]+)' - _TEST = { + _VALID_URL = r'https?://(?:www\.)?audioboom\.com/(?:boos|posts)/(?P[0-9]+)' + _TESTS = [{ 'url': 'https://audioboom.com/boos/4279833-3-09-2016-czaban-hour-3?t=0', 'md5': '63a8d73a055c6ed0f1e51921a10a5a76', 'info_dict': { @@ -19,7 +19,10 @@ class AudioBoomIE(InfoExtractor): 'uploader': 'Steve Czaban', 'uploader_url': 're:https?://(?:www\.)?audioboom\.com/channel/steveczabanyahoosportsradio', } - } + }, { + 'url': 'https://audioboom.com/posts/4279833-3-09-2016-czaban-hour-3?t=0', + 'only_matching': True, + }] def _real_extract(self, url): video_id = self._match_id(url) From bc40b3a5ba44006c23daf7fe0ed872af5e33bdc5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 11 Nov 2016 03:26:29 +0700 Subject: [PATCH 29/45] [eagleplatform] Fix extraction (closes #11160) --- youtube_dl/extractor/eagleplatform.py | 53 ++++++++++++++++----------- 1 file changed, 32 insertions(+), 21 deletions(-) diff --git a/youtube_dl/extractor/eagleplatform.py b/youtube_dl/extractor/eagleplatform.py index d4dfda8cd..c2f593eca 100644 --- a/youtube_dl/extractor/eagleplatform.py +++ b/youtube_dl/extractor/eagleplatform.py @@ -4,11 +4,13 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..compat import compat_HTTPError +from ..compat import ( + compat_HTTPError, + compat_str, +) from ..utils import ( ExtractorError, int_or_none, - url_basename, ) @@ -77,7 +79,7 @@ class EaglePlatformIE(InfoExtractor): if status != 200: raise ExtractorError(' '.join(response['errors']), expected=True) - def _download_json(self, url_or_request, video_id, note='Downloading JSON metadata'): + def _download_json(self, url_or_request, video_id, note='Downloading JSON metadata', *args, **kwargs): try: response = super(EaglePlatformIE, self)._download_json(url_or_request, video_id, note) except ExtractorError as ee: @@ -116,29 +118,38 @@ class EaglePlatformIE(InfoExtractor): m3u8_url = self._get_video_url(secure_m3u8, video_id, 'Downloading m3u8 JSON') m3u8_formats = self._extract_m3u8_formats( - m3u8_url, video_id, - 'mp4', entry_protocol='m3u8_native', m3u8_id='hls') + m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native', + m3u8_id='hls', fatal=False) formats.extend(m3u8_formats) - mp4_url = self._get_video_url( + m3u8_formats_dict = {} + for f in m3u8_formats: + if f.get('height') is not None: + m3u8_formats_dict[f['height']] = f + + mp4_data = self._download_json( # Secure mp4 URL is constructed according to Player.prototype.mp4 from # http://lentaru.media.eagleplatform.com/player/player.js - re.sub(r'm3u8|hlsvod|hls|f4m', 'mp4', secure_m3u8), - video_id, 'Downloading mp4 JSON') - mp4_url_basename = url_basename(mp4_url) - for m3u8_format in m3u8_formats: - mobj = re.search('/([^/]+)/index\.m3u8', m3u8_format['url']) - if mobj: - http_format = m3u8_format.copy() - video_url = mp4_url.replace(mp4_url_basename, mobj.group(1)) - if not self._is_valid_url(video_url, video_id): + re.sub(r'm3u8|hlsvod|hls|f4m', 'mp4s', secure_m3u8), + video_id, 'Downloading mp4 JSON', fatal=False) + if mp4_data: + for format_id, format_url in mp4_data.get('data', {}).items(): + if not isinstance(format_url, compat_str): continue - http_format.update({ - 'url': video_url, - 'format_id': m3u8_format['format_id'].replace('hls', 'http'), - 'protocol': 'http', - }) - formats.append(http_format) + height = int_or_none(format_id) + if height is not None and m3u8_formats_dict.get(height): + f = m3u8_formats_dict[height].copy() + f.update({ + 'format_id': f['format_id'].replace('hls', 'http'), + 'protocol': 'http', + }) + else: + f = { + 'format_id': 'http-%s' % format_id, + 'height': int_or_none(format_id), + } + f['url'] = format_url + formats.append(f) self._sort_formats(formats) From 8b1aeadc33cdb1eef8079e67d522d8a39676bb53 Mon Sep 17 00:00:00 2001 From: cpm Date: Fri, 11 Nov 2016 15:48:19 -0500 Subject: [PATCH 30/45] [plays] Fix extraction --- youtube_dl/extractor/plays.py | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/youtube_dl/extractor/plays.py b/youtube_dl/extractor/plays.py index c3c38cf4a..5ac0292fa 100644 --- a/youtube_dl/extractor/plays.py +++ b/youtube_dl/extractor/plays.py @@ -10,12 +10,12 @@ from ..utils import int_or_none class PlaysTVIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?plays\.tv/video/(?P[0-9a-f]{18})' _TEST = { - 'url': 'http://plays.tv/video/56af17f56c95335490/when-you-outplay-the-azir-wall', + 'url': 'https://plays.tv/video/56af17f56c95335490/when-you-outplay-the-azir-wall', 'md5': 'dfeac1198506652b5257a62762cec7bc', 'info_dict': { 'id': '56af17f56c95335490', 'ext': 'mp4', - 'title': 'When you outplay the Azir wall', + 'title': 'Bjergsen - When you outplay the Azir wall', 'description': 'Posted by Bjergsen', } } @@ -24,14 +24,11 @@ class PlaysTVIE(InfoExtractor): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - title = self._og_search_title(webpage) - content = self._parse_json( - self._search_regex( - r'R\.bindContent\(({.+?})\);', webpage, - 'content'), video_id)['content'] + content = self._search_json_ld(webpage, video_id) + title = content['title'] mpd_url, sources = re.search( r'(?s)]+data-mpd="([^"]+)"[^>]*>(.+?)', - content).groups() + webpage).groups() formats = self._extract_mpd_formats( self._proto_relative_url(mpd_url), video_id, mpd_id='DASH') for format_id, height, format_url in re.findall(r' Date: Sat, 12 Nov 2016 23:01:05 +0700 Subject: [PATCH 31/45] [extractor/common] Improve thumbnail extraction from JSON-LD --- youtube_dl/extractor/common.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 5f4c984a9..bc5d6a4c3 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -886,7 +886,7 @@ class InfoExtractor(object): 'url': e.get('contentUrl'), 'title': unescapeHTML(e.get('name')), 'description': unescapeHTML(e.get('description')), - 'thumbnail': e.get('thumbnailUrl'), + 'thumbnail': e.get('thumbnailUrl') or e.get('thumbnailURL'), 'duration': parse_duration(e.get('duration')), 'timestamp': unified_timestamp(e.get('uploadDate')), 'filesize': float_or_none(e.get('contentSize')), From 3d2729514f432ac4d80b8dffbacb893b603f6d68 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 12 Nov 2016 23:08:05 +0700 Subject: [PATCH 32/45] [plays] Improve extraction and add support for embed URLs --- youtube_dl/extractor/plays.py | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/youtube_dl/extractor/plays.py b/youtube_dl/extractor/plays.py index 5ac0292fa..ddfc6f148 100644 --- a/youtube_dl/extractor/plays.py +++ b/youtube_dl/extractor/plays.py @@ -8,8 +8,8 @@ from ..utils import int_or_none class PlaysTVIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?plays\.tv/video/(?P[0-9a-f]{18})' - _TEST = { + _VALID_URL = r'https?://(?:www\.)?plays\.tv/(?:video|embeds)/(?P[0-9a-f]{18})' + _TESTS = [{ 'url': 'https://plays.tv/video/56af17f56c95335490/when-you-outplay-the-azir-wall', 'md5': 'dfeac1198506652b5257a62762cec7bc', 'info_dict': { @@ -18,14 +18,18 @@ class PlaysTVIE(InfoExtractor): 'title': 'Bjergsen - When you outplay the Azir wall', 'description': 'Posted by Bjergsen', } - } + }, { + 'url': 'https://plays.tv/embeds/56af17f56c95335490', + 'only_matching': True, + }] def _real_extract(self, url): video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) + webpage = self._download_webpage( + 'https://plays.tv/video/%s' % video_id, video_id) + + info = self._search_json_ld(webpage, video_id,) - content = self._search_json_ld(webpage, video_id) - title = content['title'] mpd_url, sources = re.search( r'(?s)]+data-mpd="([^"]+)"[^>]*>(.+?)', webpage).groups() @@ -39,10 +43,11 @@ class PlaysTVIE(InfoExtractor): }) self._sort_formats(formats) - return { + info.update({ 'id': video_id, - 'title': title, 'description': self._og_search_description(webpage), - 'thumbnail': self._og_search_thumbnail(webpage), + 'thumbnail': info.get('thumbnail') or self._og_search_thumbnail(webpage), 'formats': formats, - } + }) + + return info From 577281b0c6b2fc774e03abf3465d47d909ba31e5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kacper=20Michaj=C5=82ow?= Date: Sun, 16 Oct 2016 03:04:17 +0200 Subject: [PATCH 33/45] [cda] Fix and improve extraction Fixes #10929 --- youtube_dl/extractor/cda.py | 65 ++++++++++++++++++++++++++++--------- 1 file changed, 49 insertions(+), 16 deletions(-) diff --git a/youtube_dl/extractor/cda.py b/youtube_dl/extractor/cda.py index 8af318703..e00bdaf66 100755 --- a/youtube_dl/extractor/cda.py +++ b/youtube_dl/extractor/cda.py @@ -5,14 +5,16 @@ import re from .common import InfoExtractor from ..utils import ( - decode_packed_codes, ExtractorError, - parse_duration + float_or_none, + int_or_none, + parse_duration, ) class CDAIE(InfoExtractor): _VALID_URL = r'https?://(?:(?:www\.)?cda\.pl/video|ebd\.cda\.pl/[0-9]+x[0-9]+)/(?P[0-9a-z]+)' + _BASE_URL = 'http://www.cda.pl/' _TESTS = [{ 'url': 'http://www.cda.pl/video/5749950c', 'md5': '6f844bf51b15f31fae165365707ae970', @@ -21,6 +23,9 @@ class CDAIE(InfoExtractor): 'ext': 'mp4', 'height': 720, 'title': 'Oto dlaczego przed zakrętem należy zwolnić.', + 'description': 'md5:269ccd135d550da90d1662651fcb9772', + 'thumbnail': 're:^https?://.*\.jpg$', + 'average_rating': float, 'duration': 39 } }, { @@ -30,6 +35,11 @@ class CDAIE(InfoExtractor): 'id': '57413289', 'ext': 'mp4', 'title': 'Lądowanie na lotnisku na Maderze', + 'description': 'md5:60d76b71186dcce4e0ba6d4bbdb13e1a', + 'thumbnail': 're:^https?://.*\.jpg$', + 'uploader': 'crash404', + 'view_count': int, + 'average_rating': float, 'duration': 137 } }, { @@ -39,31 +49,55 @@ class CDAIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) - webpage = self._download_webpage('http://ebd.cda.pl/0x0/' + video_id, video_id) + self._set_cookie('cda.pl', 'cda.player', 'html5') + webpage = self._download_webpage( + self._BASE_URL + '/video/' + video_id, video_id) if 'Ten film jest dostępny dla użytkowników premium' in webpage: raise ExtractorError('This video is only available for premium users.', expected=True) - title = self._html_search_regex(r'(.+?)', webpage, 'title') - formats = [] + uploader = self._search_regex(r'''(?x) + <(span|meta)[^>]+itemprop=(["\'])author\2[^>]*> + (?:<\1[^>]*>[^<]*|(?!)(?:.|\n))*? + <(span|meta)[^>]+itemprop=(["\'])name\4[^>]*>(?P[^<]+) + ''', webpage, 'uploader', default=None, group='uploader') + view_count = self._search_regex( + r'Odsłony:(?:\s| )*([0-9]+)', webpage, + 'view_count', default=None) + average_rating = self._search_regex( + r'<(?:span|meta)[^>]+itemprop=(["\'])ratingValue\1[^>]*>(?P[0-9.]+)', + webpage, 'rating', fatal=False, group='rating_value') + info_dict = { 'id': video_id, - 'title': title, + 'title': self._og_search_title(webpage), + 'description': self._og_search_description(webpage), + 'uploader': uploader, + 'view_count': int_or_none(view_count), + 'average_rating': float_or_none(average_rating), + 'thumbnail': self._og_search_thumbnail(webpage), 'formats': formats, 'duration': None, } def extract_format(page, version): - unpacked = decode_packed_codes(page) - format_url = self._search_regex( - r"(?:file|url)\s*:\s*(\\?[\"'])(?Phttp.+?)\1", unpacked, - '%s url' % version, fatal=False, group='url') - if not format_url: + json_str = self._search_regex( + r'player_data=(\\?["\'])(?P.+?)\1', page, + '%s player_json' % version, fatal=False, group='player_data') + if not json_str: + return + player_data = self._parse_json( + json_str, '%s player_data' % version, fatal=False) + if not player_data: + return + video = player_data.get('video') + if not video or 'file' not in video: + self.report_warning('Unable to extract %s version information' % version) return f = { - 'url': format_url, + 'url': video['file'], } m = re.search( r']+data-quality="(?P[^"]+)"[^>]+href="[^"]+"[^>]+class="[^"]*quality-btn-active[^"]*">(?P[0-9]+)p', @@ -75,9 +109,7 @@ class CDAIE(InfoExtractor): }) info_dict['formats'].append(f) if not info_dict['duration']: - info_dict['duration'] = parse_duration(self._search_regex( - r"duration\s*:\s*(\\?[\"'])(?P.+?)\1", - unpacked, 'duration', fatal=False, group='duration')) + info_dict['duration'] = parse_duration(video.get('duration')) extract_format(webpage, 'default') @@ -85,7 +117,8 @@ class CDAIE(InfoExtractor): r']+data-quality="[^"]+"[^>]+href="([^"]+)"[^>]+class="quality-btn"[^>]*>([0-9]+p)', webpage): webpage = self._download_webpage( - href, video_id, 'Downloading %s version information' % resolution, fatal=False) + self._BASE_URL + href, video_id, + 'Downloading %s version information' % resolution, fatal=False) if not webpage: # Manually report warning because empty page is returned when # invalid version is requested. From 4ea4c0bb2248b4de7efc152a4ee91d104ac79bc5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 13 Nov 2016 05:43:34 +0700 Subject: [PATCH 34/45] [extractor/common] Fix Bandwidth substitution in media template (closes #11175) --- youtube_dl/extractor/common.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index bc5d6a4c3..05c51fac9 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1703,7 +1703,7 @@ class InfoExtractor(object): representation_ms_info['fragments'] = [{ 'url': media_template % { 'Number': segment_number, - 'Bandwidth': representation_attrib.get('bandwidth'), + 'Bandwidth': int_or_none(representation_attrib.get('bandwidth')), }, 'duration': segment_duration, } for segment_number in range( @@ -1721,7 +1721,7 @@ class InfoExtractor(object): def add_segment_url(): segment_url = media_template % { 'Time': segment_time, - 'Bandwidth': representation_attrib.get('bandwidth'), + 'Bandwidth': int_or_none(representation_attrib.get('bandwidth')), 'Number': segment_number, } representation_ms_info['fragments'].append({ From e58609b22ccf51859376ec20d657cba5365631d8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 13 Nov 2016 06:02:26 +0700 Subject: [PATCH 35/45] [afreecatv] Add support for vod.afreecatv.com (closes #11174) --- youtube_dl/extractor/afreecatv.py | 26 +++++++++++++++++++------- 1 file changed, 19 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/afreecatv.py b/youtube_dl/extractor/afreecatv.py index 518c61f67..75b366993 100644 --- a/youtube_dl/extractor/afreecatv.py +++ b/youtube_dl/extractor/afreecatv.py @@ -11,6 +11,7 @@ from ..compat import ( from ..utils import ( ExtractorError, int_or_none, + update_url_query, xpath_element, xpath_text, ) @@ -18,12 +19,18 @@ from ..utils import ( class AfreecaTVIE(InfoExtractor): IE_DESC = 'afreecatv.com' - _VALID_URL = r'''(?x)^ - https?://(?:(live|afbbs|www)\.)?afreeca(?:tv)?\.com(?::\d+)? - (?: - /app/(?:index|read_ucc_bbs)\.cgi| - /player/[Pp]layer\.(?:swf|html)) - \?.*?\bnTitleNo=(?P\d+)''' + _VALID_URL = r'''(?x) + https?:// + (?: + (?:(?:live|afbbs|www)\.)?afreeca(?:tv)?\.com(?::\d+)? + (?: + /app/(?:index|read_ucc_bbs)\.cgi| + /player/[Pp]layer\.(?:swf|html) + )\?.*?\bnTitleNo=| + vod\.afreecatv\.com/PLAYER/STATION/ + ) + (?P\d+) + ''' _TESTS = [{ 'url': 'http://live.afreecatv.com:8079/app/index.cgi?szType=read_ucc_bbs&szBjId=dailyapril&nStationNo=16711924&nBbsNo=18605867&nTitleNo=36164052&szSkin=', 'md5': 'f72c89fe7ecc14c1b5ce506c4996046e', @@ -66,6 +73,9 @@ class AfreecaTVIE(InfoExtractor): }, { 'url': 'http://www.afreecatv.com/player/Player.swf?szType=szBjId=djleegoon&nStationNo=11273158&nBbsNo=13161095&nTitleNo=36327652', 'only_matching': True, + }, { + 'url': 'http://vod.afreecatv.com/PLAYER/STATION/15055030', + 'only_matching': True, }] @staticmethod @@ -83,7 +93,9 @@ class AfreecaTVIE(InfoExtractor): info_url = compat_urlparse.urlunparse(parsed_url._replace( netloc='afbbs.afreecatv.com:8080', path='/api/video/get_video_info.php')) - video_xml = self._download_xml(info_url, video_id) + + video_xml = self._download_xml( + update_url_query(info_url, {'nTitleNo': video_id}), video_id) if xpath_element(video_xml, './track/video/file') is None: raise ExtractorError('Specified AfreecaTV video does not exist', From 754e6c8322705cd9953c5f1032ed9dae35c38b27 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 13 Nov 2016 20:54:34 +0700 Subject: [PATCH 36/45] [nrk] Workaround geo restriction and improve error messages --- youtube_dl/extractor/nrk.py | 39 +++++++++++++++++++++++++++++++++---- 1 file changed, 35 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/nrk.py b/youtube_dl/extractor/nrk.py index 3700b7ab2..aed98141b 100644 --- a/youtube_dl/extractor/nrk.py +++ b/youtube_dl/extractor/nrk.py @@ -1,6 +1,7 @@ # coding: utf-8 from __future__ import unicode_literals +import random import re from .common import InfoExtractor @@ -14,6 +15,24 @@ from ..utils import ( class NRKBaseIE(InfoExtractor): + _faked_ip = None + + def _download_webpage(self, *args, **kwargs): + # NRK checks X-Forwarded-For HTTP header in order to figure out the + # origin of the client behind proxy. This allows to bypass geo + # restriction by faking this header's value to some Norway IP. + # We will do so once we encounter any geo restriction error. + if self._faked_ip: + kwargs.setdefault('headers', {})['X-Forwarded-For'] = self._faked_ip + return super(NRKBaseIE, self)._download_webpage(*args, **kwargs) + + def _fake_ip(self): + # Use fake IP from 37.191.128.0/17 in order to workaround geo + # restriction + def octet(lb=0, ub=255): + return random.randint(lb, ub) + self._faked_ip = '37.191.%d.%d' % (octet(128), octet()) + def _real_extract(self, url): video_id = self._match_id(url) @@ -70,10 +89,22 @@ class NRKBaseIE(InfoExtractor): }] if not entries: - if data.get('usageRights', {}).get('isGeoBlocked'): - raise ExtractorError( - 'NRK har ikke rettigheter til å vise dette programmet utenfor Norge', - expected=True) + message_type = data.get('messageType') + if message_type == 'ProgramIsGeoBlocked' and not self._faked_ip: + self.report_warning( + 'Video is geo restricted, trying to fake IP') + self._fake_ip() + return self._real_extract(url) + + MESSAGES = { + 'ProgramRightsAreNotReady': 'Du kan dessverre ikke se eller høre programmet', + 'ProgramRightsHasExpired': 'Programmet har gått ut', + 'ProgramIsGeoBlocked': 'NRK har ikke rettigheter til å vise dette programmet utenfor Norge', + } + raise ExtractorError( + '%s said: %s' % (self.IE_NAME, MESSAGES.get( + message_type, message_type)), + expected=True) conviva = data.get('convivaStatistics') or {} series = conviva.get('seriesName') or data.get('seriesTitle') From 690355551c084a942db9820b4c83b65f73fb2d4c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 13 Nov 2016 22:06:16 +0700 Subject: [PATCH 37/45] [downoader/fragment,f4m,hls] Add internal support for custom HTTP headers --- youtube_dl/downloader/f4m.py | 8 ++++++-- youtube_dl/downloader/fragment.py | 5 +++++ youtube_dl/downloader/hls.py | 8 ++++++-- 3 files changed, 17 insertions(+), 4 deletions(-) diff --git a/youtube_dl/downloader/f4m.py b/youtube_dl/downloader/f4m.py index 80c21d40b..688e086eb 100644 --- a/youtube_dl/downloader/f4m.py +++ b/youtube_dl/downloader/f4m.py @@ -314,7 +314,8 @@ class F4mFD(FragmentFD): man_url = info_dict['url'] requested_bitrate = info_dict.get('tbr') self.to_screen('[%s] Downloading f4m manifest' % self.FD_NAME) - urlh = self.ydl.urlopen(man_url) + + urlh = self.ydl.urlopen(self._prepare_url(info_dict, man_url)) man_url = urlh.geturl() # Some manifests may be malformed, e.g. prosiebensat1 generated manifests # (see https://github.com/rg3/youtube-dl/issues/6215#issuecomment-121704244 @@ -387,7 +388,10 @@ class F4mFD(FragmentFD): url_parsed = base_url_parsed._replace(path=base_url_parsed.path + name, query='&'.join(query)) frag_filename = '%s-%s' % (ctx['tmpfilename'], name) try: - success = ctx['dl'].download(frag_filename, {'url': url_parsed.geturl()}) + success = ctx['dl'].download(frag_filename, { + 'url': url_parsed.geturl(), + 'http_headers': info_dict.get('http_headers'), + }) if not success: return False (down, frag_sanitized) = sanitize_open(frag_filename, 'rb') diff --git a/youtube_dl/downloader/fragment.py b/youtube_dl/downloader/fragment.py index 84aacf7db..60df627a6 100644 --- a/youtube_dl/downloader/fragment.py +++ b/youtube_dl/downloader/fragment.py @@ -9,6 +9,7 @@ from ..utils import ( error_to_compat_str, encodeFilename, sanitize_open, + sanitized_Request, ) @@ -37,6 +38,10 @@ class FragmentFD(FileDownloader): def report_skip_fragment(self, fragment_name): self.to_screen('[download] Skipping fragment %s...' % fragment_name) + def _prepare_url(self, info_dict, url): + headers = info_dict.get('http_headers') + return sanitized_Request(url, None, headers) if headers else url + def _prepare_and_start_frag_download(self, ctx): self._prepare_frag_download(ctx) self._start_frag_download(ctx) diff --git a/youtube_dl/downloader/hls.py b/youtube_dl/downloader/hls.py index 541b92ee1..7373ec05f 100644 --- a/youtube_dl/downloader/hls.py +++ b/youtube_dl/downloader/hls.py @@ -59,7 +59,8 @@ class HlsFD(FragmentFD): def real_download(self, filename, info_dict): man_url = info_dict['url'] self.to_screen('[%s] Downloading m3u8 manifest' % self.FD_NAME) - manifest = self.ydl.urlopen(man_url).read() + + manifest = self.ydl.urlopen(self._prepare_url(info_dict, man_url)).read() s = manifest.decode('utf-8', 'ignore') @@ -112,7 +113,10 @@ class HlsFD(FragmentFD): count = 0 while count <= fragment_retries: try: - success = ctx['dl'].download(frag_filename, {'url': frag_url}) + success = ctx['dl'].download(frag_filename, { + 'url': frag_url, + 'http_headers': info_dict.get('http_headers'), + }) if not success: return False down, frag_sanitized = sanitize_open(frag_filename, 'rb') From 7e08e2cab02b0284e72171b3ba6b946b49f12331 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 13 Nov 2016 22:28:29 +0700 Subject: [PATCH 38/45] [nrk] Add X-Forwarded-For HTTP header in info dict --- youtube_dl/extractor/nrk.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/nrk.py b/youtube_dl/extractor/nrk.py index aed98141b..1f2204833 100644 --- a/youtube_dl/extractor/nrk.py +++ b/youtube_dl/extractor/nrk.py @@ -17,14 +17,15 @@ from ..utils import ( class NRKBaseIE(InfoExtractor): _faked_ip = None - def _download_webpage(self, *args, **kwargs): + def _download_webpage_handle(self, *args, **kwargs): # NRK checks X-Forwarded-For HTTP header in order to figure out the # origin of the client behind proxy. This allows to bypass geo # restriction by faking this header's value to some Norway IP. # We will do so once we encounter any geo restriction error. if self._faked_ip: - kwargs.setdefault('headers', {})['X-Forwarded-For'] = self._faked_ip - return super(NRKBaseIE, self)._download_webpage(*args, **kwargs) + # NB: str is intentional + kwargs.setdefault(str('headers'), {})['X-Forwarded-For'] = self._faked_ip + return super(NRKBaseIE, self)._download_webpage_handle(*args, **kwargs) def _fake_ip(self): # Use fake IP from 37.191.128.0/17 in order to workaround geo @@ -43,6 +44,8 @@ class NRKBaseIE(InfoExtractor): title = data.get('fullTitle') or data.get('mainTitle') or data['title'] video_id = data.get('id') or video_id + http_headers = {'X-Forwarded-For': self._faked_ip} if self._faked_ip else {} + entries = [] media_assets = data.get('mediaAssets') @@ -73,6 +76,7 @@ class NRKBaseIE(InfoExtractor): 'duration': duration, 'subtitles': subtitles, 'formats': formats, + 'http_headers': http_headers, }) if not entries: From 50913b82414488bbf625a00f1844ca84dec094dd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 13 Nov 2016 22:29:36 +0700 Subject: [PATCH 39/45] [nrk] Improve geo restriction detection --- youtube_dl/extractor/nrk.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/nrk.py b/youtube_dl/extractor/nrk.py index 1f2204833..c89aac63e 100644 --- a/youtube_dl/extractor/nrk.py +++ b/youtube_dl/extractor/nrk.py @@ -93,8 +93,9 @@ class NRKBaseIE(InfoExtractor): }] if not entries: - message_type = data.get('messageType') - if message_type == 'ProgramIsGeoBlocked' and not self._faked_ip: + message_type = data.get('messageType', '') + # Can be ProgramIsGeoBlocked or ChannelIsGeoBlocked* + if 'IsGeoBlocked' in message_type and not self._faked_ip: self.report_warning( 'Video is geo restricted, trying to fake IP') self._fake_ip() From dbffd00ba940164df6f144577902d2cd8cf27e71 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 14 Nov 2016 02:37:21 +0700 Subject: [PATCH 40/45] [ChangeLog] Actualize --- ChangeLog | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/ChangeLog b/ChangeLog index d97156e20..1b3962d2c 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,6 +1,18 @@ version +Core ++ [downoader/fragment,f4m,hls] Respect HTTP headers from info dict +* [extractor/common] Fix media templates with Bandwidth substitution pattern in + MPD manifests (#11175) +* [extractor/common] Improve thumbnail extraction from JSON-LD + Extractors ++ [nrk] Workaround geo restriction ++ [nrk] Improve error detection and messages ++ [afreecatv] Add support for vod.afreecatv.com (#11174) +* [cda] Fix and improve extraction (#10929, #10936) +* [plays] Fix extraction (#11165) +* [eagleplatform] Fix extraction (#11160) + [audioboom] Recognize /posts/ URLs (#11149) From b3d4bd05f9c3fff52efe669d7930b1647d6e2612 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 14 Nov 2016 02:39:50 +0700 Subject: [PATCH 41/45] release 2016.11.14 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- youtube_dl/version.py | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index bfae97ddd..f96b5672d 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.11.08.1*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.11.08.1** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.11.14*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.11.14** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2016.11.08.1 +[debug] youtube-dl version 2016.11.14 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index 1b3962d2c..791ffb7b6 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version +version 2016.11.14 Core + [downoader/fragment,f4m,hls] Respect HTTP headers from info dict diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 69df88c6e..1f2b246e4 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2016.11.08.1' +__version__ = '2016.11.14' From 9f60134a9dd06f89961c34da1e6611d599ea6102 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 14 Nov 2016 02:46:12 +0700 Subject: [PATCH 42/45] [ChangeLog] Actualize --- ChangeLog | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ChangeLog b/ChangeLog index 791ffb7b6..1b3962d2c 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version 2016.11.14 +version Core + [downoader/fragment,f4m,hls] Respect HTTP headers from info dict From 6b4dfa28197af9939ffa1cff90124300c46742f2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 14 Nov 2016 02:48:15 +0700 Subject: [PATCH 43/45] release 2016.11.14.1 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- youtube_dl/version.py | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index f96b5672d..fef9fc7a2 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.11.14*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.11.14** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.11.14.1*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.11.14.1** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2016.11.14 +[debug] youtube-dl version 2016.11.14.1 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index 1b3962d2c..577709c44 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version +version 2016.11.14.1 Core + [downoader/fragment,f4m,hls] Respect HTTP headers from info dict diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 1f2b246e4..9557b2000 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2016.11.14' +__version__ = '2016.11.14.1' From 49b69ad91c4a638eb8b9bdb9846819fa9274a38e Mon Sep 17 00:00:00 2001 From: ping Date: Tue, 15 Nov 2016 23:07:17 +0800 Subject: [PATCH 44/45] [vlive] Prefer locale over language for subtitles id --- youtube_dl/extractor/vlive.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/vlive.py b/youtube_dl/extractor/vlive.py index 8d671cca7..c3aa57cd6 100644 --- a/youtube_dl/extractor/vlive.py +++ b/youtube_dl/extractor/vlive.py @@ -116,7 +116,7 @@ class VLiveIE(InfoExtractor): subtitles = {} for caption in playinfo.get('captions', {}).get('list', []): - lang = dict_get(caption, ('language', 'locale', 'country', 'label')) + lang = dict_get(caption, ('locale', 'language', 'country', 'label')) if lang and caption.get('source'): subtitles[lang] = [{ 'ext': 'vtt', From 58355a3bf163349831b076c0fac4c09a286d5aa4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 15 Nov 2016 22:11:47 +0700 Subject: [PATCH 45/45] [vlive] Add test for #11203 --- youtube_dl/extractor/vlive.py | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/vlive.py b/youtube_dl/extractor/vlive.py index c3aa57cd6..acf9fda48 100644 --- a/youtube_dl/extractor/vlive.py +++ b/youtube_dl/extractor/vlive.py @@ -17,7 +17,7 @@ from ..compat import compat_urllib_parse_urlencode class VLiveIE(InfoExtractor): IE_NAME = 'vlive' _VALID_URL = r'https?://(?:(?:www|m)\.)?vlive\.tv/video/(?P[0-9]+)' - _TEST = { + _TESTS = [{ 'url': 'http://www.vlive.tv/video/1326', 'md5': 'cc7314812855ce56de70a06a27314983', 'info_dict': { @@ -27,7 +27,20 @@ class VLiveIE(InfoExtractor): 'creator': "Girl's Day", 'view_count': int, }, - } + }, { + 'url': 'http://www.vlive.tv/video/16937', + 'info_dict': { + 'id': '16937', + 'ext': 'mp4', + 'title': '[V LIVE] 첸백시 걍방', + 'creator': 'EXO', + 'view_count': int, + 'subtitles': 'mincount:12', + }, + 'params': { + 'skip_download': True, + }, + }] def _real_extract(self, url): video_id = self._match_id(url)