From 4eece8ba572dfd009ea2d980bfc36d0adacb16d0 Mon Sep 17 00:00:00 2001 From: Andy Savicki Date: Wed, 16 Nov 2016 02:37:28 +0300 Subject: [PATCH 01/48] [funnyordie] Improve extraction --- youtube_dl/extractor/funnyordie.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/funnyordie.py b/youtube_dl/extractor/funnyordie.py index 8c5ffc9e8..7664dd584 100644 --- a/youtube_dl/extractor/funnyordie.py +++ b/youtube_dl/extractor/funnyordie.py @@ -11,7 +11,7 @@ class FunnyOrDieIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?funnyordie\.com/(?Pembed|articles|videos)/(?P[0-9a-f]+)(?:$|[?#/])' _TESTS = [{ 'url': 'http://www.funnyordie.com/videos/0732f586d7/heart-shaped-box-literal-video-version', - 'md5': 'bcd81e0c4f26189ee09be362ad6e6ba9', + 'md5': 'c26b9ee0e1ca138c12071f59572ba9c7', 'info_dict': { 'id': '0732f586d7', 'ext': 'mp4', @@ -51,10 +51,7 @@ class FunnyOrDieIE(InfoExtractor): formats = [] - formats.extend(self._extract_m3u8_formats( - m3u8_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)) - - bitrates = [int(bitrate) for bitrate in re.findall(r'[,/]v(\d+)[,/]', m3u8_url)] + bitrates = [int(bitrate) for bitrate in re.findall(r'[,/]v(\d+)(?=[,/])', m3u8_url)] bitrates.sort() for bitrate in bitrates: @@ -65,6 +62,11 @@ class FunnyOrDieIE(InfoExtractor): 'vbr': bitrate, }) + self._check_formats(formats, video_id) + + formats.extend(self._extract_m3u8_formats( + m3u8_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)) + subtitles = {} for src, src_lang in re.findall(r' Date: Mon, 21 Nov 2016 23:46:55 +0700 Subject: [PATCH 02/48] [funnyordie] Copy formats' metadata from hls and sort formats --- youtube_dl/extractor/funnyordie.py | 47 +++++++++++++++++++++++------- 1 file changed, 37 insertions(+), 10 deletions(-) diff --git a/youtube_dl/extractor/funnyordie.py b/youtube_dl/extractor/funnyordie.py index 7664dd584..f2928b5fe 100644 --- a/youtube_dl/extractor/funnyordie.py +++ b/youtube_dl/extractor/funnyordie.py @@ -11,7 +11,7 @@ class FunnyOrDieIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?funnyordie\.com/(?Pembed|articles|videos)/(?P[0-9a-f]+)(?:$|[?#/])' _TESTS = [{ 'url': 'http://www.funnyordie.com/videos/0732f586d7/heart-shaped-box-literal-video-version', - 'md5': 'c26b9ee0e1ca138c12071f59572ba9c7', + 'md5': 'bcd81e0c4f26189ee09be362ad6e6ba9', 'info_dict': { 'id': '0732f586d7', 'ext': 'mp4', @@ -28,6 +28,9 @@ class FunnyOrDieIE(InfoExtractor): 'description': 'Please use this to sell something. www.jonlajoie.com', 'thumbnail': 're:^http:.*\.jpg$', }, + 'params': { + 'skip_download': True, + }, }, { 'url': 'http://www.funnyordie.com/articles/ebf5e34fc8/10-hours-of-walking-in-nyc-as-a-man', 'only_matching': True, @@ -51,21 +54,45 @@ class FunnyOrDieIE(InfoExtractor): formats = [] + m3u8_formats = self._extract_m3u8_formats( + m3u8_url, video_id, 'mp4', 'm3u8_native', + m3u8_id='hls', fatal=False) + source_formats = list(filter( + lambda f: f.get('vcodec') != 'none' and f.get('resolution') != 'multiple', + m3u8_formats)) + bitrates = [int(bitrate) for bitrate in re.findall(r'[,/]v(\d+)(?=[,/])', m3u8_url)] bitrates.sort() - for bitrate in bitrates: - for link in links: - formats.append({ - 'url': self._proto_relative_url('%s%d.%s' % (link[0], bitrate, link[1])), - 'format_id': '%s-%d' % (link[1], bitrate), - 'vbr': bitrate, - }) + if source_formats: + self._sort_formats(source_formats) + for bitrate, f in zip(bitrates, source_formats or [{}] * len(bitrates)): + for path, ext in links: + ff = f.copy() + if ff: + if ext != 'mp4': + ff = dict( + [(k, v) for k, v in ff.items() + if k in ('height', 'width', 'format_id')]) + ff.update({ + 'format_id': ff['format_id'].replace('hls', ext), + 'ext': ext, + 'protocol': 'http', + }) + else: + ff.update({ + 'format_id': '%s-%d' % (ext, bitrate), + 'vbr': bitrate, + }) + ff['url'] = self._proto_relative_url( + '%s%d.%s' % (path, bitrate, ext)) + formats.append(ff) self._check_formats(formats, video_id) - formats.extend(self._extract_m3u8_formats( - m3u8_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)) + formats.extend(m3u8_formats) + self._sort_formats( + formats, field_preference=('height', 'width', 'tbr', 'format_id')) subtitles = {} for src, src_lang in re.findall(r' Date: Tue, 22 Nov 2016 20:40:57 +0800 Subject: [PATCH 03/48] [amcnetworks] Recognize more BBC America URLs Closes #11263 --- ChangeLog | 1 + youtube_dl/extractor/amcnetworks.py | 5 ++++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/ChangeLog b/ChangeLog index 0d8174408..9ed42315e 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,6 +1,7 @@ version Extractors ++ [amcnetworks] Recognize more BBC America URLs (#11263) - [Crunchyroll] ScaledBorderAndShadow are removed from ASS subtitles (#8207, #9028) diff --git a/youtube_dl/extractor/amcnetworks.py b/youtube_dl/extractor/amcnetworks.py index d2b03b177..87c803e94 100644 --- a/youtube_dl/extractor/amcnetworks.py +++ b/youtube_dl/extractor/amcnetworks.py @@ -10,7 +10,7 @@ from ..utils import ( class AMCNetworksIE(ThePlatformIE): - _VALID_URL = r'https?://(?:www\.)?(?:amc|bbcamerica|ifc|wetv)\.com/(?:movies/|shows/[^/]+/(?:full-episodes/)?season-\d+/episode-\d+(?:-(?:[^/]+/)?|/))(?P[^/?#]+)' + _VALID_URL = r'https?://(?:www\.)?(?:amc|bbcamerica|ifc|wetv)\.com/(?:movies/|shows/[^/]+/(?:full-episodes/)?[^/]+/episode-\d+(?:-(?:[^/]+/)?|/))(?P[^/?#]+)' _TESTS = [{ 'url': 'http://www.ifc.com/shows/maron/season-04/episode-01/step-1', 'md5': '', @@ -41,6 +41,9 @@ class AMCNetworksIE(ThePlatformIE): }, { 'url': 'http://www.ifc.com/movies/chaos', 'only_matching': True, + }, { + 'url': 'http://www.bbcamerica.com/shows/doctor-who/full-episodes/the-power-of-the-daleks/episode-01-episode-1-color-version', + 'only_matching': True, }] def _real_extract(self, url): From 868630fbe5843ea9da5fd6fa826516f0dcbed20e Mon Sep 17 00:00:00 2001 From: Andy Savicki Date: Sun, 20 Nov 2016 02:12:22 +0300 Subject: [PATCH 04/48] [hellporno] Add support for hellporno.net and improve ext extraction --- youtube_dl/extractor/hellporno.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/hellporno.py b/youtube_dl/extractor/hellporno.py index 7a1c75b65..10da14067 100644 --- a/youtube_dl/extractor/hellporno.py +++ b/youtube_dl/extractor/hellporno.py @@ -6,12 +6,13 @@ from .common import InfoExtractor from ..utils import ( js_to_json, remove_end, + determine_ext, ) class HellPornoIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?hellporno\.com/videos/(?P[^/]+)' - _TEST = { + _VALID_URL = r'https?://(?:www\.)?hellporno\.(?:com/videos|net/v)/(?P[^/]+)' + _TESTS = [{ 'url': 'http://hellporno.com/videos/dixie-is-posing-with-naked-ass-very-erotic/', 'md5': '1fee339c610d2049699ef2aa699439f1', 'info_dict': { @@ -22,7 +23,10 @@ class HellPornoIE(InfoExtractor): 'thumbnail': 're:https?://.*\.jpg$', 'age_limit': 18, } - } + }, { + 'url': 'http://hellporno.net/v/186271/', + 'only_matching': True, + }] def _real_extract(self, url): display_id = self._match_id(url) @@ -38,7 +42,7 @@ class HellPornoIE(InfoExtractor): video_id = flashvars.get('video_id') thumbnail = flashvars.get('preview_url') - ext = flashvars.get('postfix', '.mp4')[1:] + ext = determine_ext(flashvars.get('postfix'), 'mp4') formats = [] for video_url_key in ['video_url', 'video_alt_url']: From c8f56741dd531685e61f0f4418107318663f5ff3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 22 Nov 2016 22:29:37 +0700 Subject: [PATCH 05/48] [ChangeLog] Actualize --- ChangeLog | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/ChangeLog b/ChangeLog index 9ed42315e..4127fd24f 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,9 +1,16 @@ version Extractors +* [hellporno] Fix video extension extraction (#11247) ++ [hellporno] Add support for hellporno.net (#11247) + [amcnetworks] Recognize more BBC America URLs (#11263) -- [Crunchyroll] ScaledBorderAndShadow are removed from ASS subtitles - (#8207, #9028) +* [funnyordie] Improve extraction (#11208) +* [extractor/generic] Improve limelight embeds support +- [crunchyroll] Remove ScaledBorderAndShadow from ASS subtitles (#8207, #9028) +* [bandcamp] Fix free downloads extraction and extract all formats (#11067) +* [twitter:card] Relax URL regular expression (#11225) ++ [tvanouvelles] Add support for tvanouvelles.ca (#10616) + version 2016.11.18 From 3b5daf07362e401e84a5c32482dc3c9416bdd000 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 22 Nov 2016 22:32:16 +0700 Subject: [PATCH 06/48] release 2016.11.22 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- docs/supportedsites.md | 2 ++ youtube_dl/version.py | 2 +- 4 files changed, 7 insertions(+), 5 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 85ac137a1..b7fa566c8 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.11.18*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.11.18** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.11.22*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.11.22** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2016.11.18 +[debug] youtube-dl version 2016.11.22 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index 4127fd24f..2b35952fe 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version +version 2016.11.22 Extractors * [hellporno] Fix video extension extraction (#11247) diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 77832504a..7c485349d 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -771,6 +771,8 @@ - **TV2Article** - **TV3** - **TV4**: tv4.se and tv4play.se + - **TVANouvelles** + - **TVANouvellesArticle** - **TVC** - **TVCArticle** - **tvigle**: Интернет-телевидение Tvigle.ru diff --git a/youtube_dl/version.py b/youtube_dl/version.py index ef9ccc08a..3c746baac 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2016.11.18' +__version__ = '2016.11.22' From c867adc68c5dda0fafb2535c1a02ea32549b9d10 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 23 Nov 2016 23:28:32 +0700 Subject: [PATCH 07/48] [youtube:playlist] Pass disable_polymer in query (closes #11193, closes #11270) --- youtube_dl/extractor/youtube.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 7ccb875a5..bd24a2838 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1796,7 +1796,7 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor): | ((?:PL|LL|EC|UU|FL|RD|UL)[0-9A-Za-z-_]{10,}) )""" - _TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s' + _TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s&disable_polymer=true' _VIDEO_RE = r'href="\s*/watch\?v=(?P[0-9A-Za-z_-]{11})&[^"]*?index=(?P\d+)(?:[^>]+>(?P[^<]+))?' IE_NAME = 'youtube:playlist' _TESTS = [{ From 44444f0d3ba8e448cc824d7722d865794fb6d5d3 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Thu, 24 Nov 2016 20:32:17 +0800 Subject: [PATCH 08/48] [cbslocal] Support newyork.cbslocal.com Closes #11285 --- ChangeLog | 6 +++++ youtube_dl/extractor/cbslocal.py | 39 ++++++++++++++++++++++++++++---- 2 files changed, 41 insertions(+), 4 deletions(-) diff --git a/ChangeLog b/ChangeLog index 2b35952fe..7e784ed76 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +version <unreleased> + +Extractors ++ [cbslocal] Recognize New York site (#11285) + + version 2016.11.22 Extractors diff --git a/youtube_dl/extractor/cbslocal.py b/youtube_dl/extractor/cbslocal.py index 289709c97..8d5f11dd1 100644 --- a/youtube_dl/extractor/cbslocal.py +++ b/youtube_dl/extractor/cbslocal.py @@ -4,11 +4,14 @@ from __future__ import unicode_literals from .anvato import AnvatoIE from .sendtonews import SendtoNewsIE from ..compat import compat_urlparse -from ..utils import unified_timestamp +from ..utils import ( + parse_iso8601, + unified_timestamp, +) class CBSLocalIE(AnvatoIE): - _VALID_URL = r'https?://[a-z]+\.cbslocal\.com/\d+/\d+/\d+/(?P<id>[0-9a-z-]+)' + _VALID_URL = r'https?://[a-z]+\.cbslocal\.com/(?:\d+/\d+/\d+|video)/(?P<id>[0-9a-z-]+)' _TESTS = [{ # Anvato backend @@ -49,6 +52,31 @@ class CBSLocalIE(AnvatoIE): # m3u8 download 'skip_download': True, }, + }, { + 'url': 'http://newyork.cbslocal.com/video/3580809-a-very-blue-anniversary/', + 'info_dict': { + 'id': '3580809', + 'ext': 'mp4', + 'title': 'A Very Blue Anniversary', + 'description': 'CBS2’s Cindy Hsu has more.', + 'thumbnail': 're:^https?://.*', + 'timestamp': 1479962220, + 'upload_date': '20161124', + 'uploader': 'CBS', + 'subtitles': { + 'en': 'mincount:5', + }, + 'categories': [ + 'Stations\\Spoken Word\\WCBSTV', + 'Syndication\\AOL', + 'Syndication\\MSN', + 'Syndication\\NDN', + 'Syndication\\Yahoo', + 'Content\\News', + 'Content\\News\\Local News', + ], + 'tags': ['CBS 2 News Weekends', 'Cindy Hsu', 'Blue Man Group'], + }, }] def _real_extract(self, url): @@ -64,8 +92,11 @@ class CBSLocalIE(AnvatoIE): info_dict = self._extract_anvato_videos(webpage, display_id) time_str = self._html_search_regex( - r'class="entry-date">([^<]+)<', webpage, 'released date', fatal=False) - timestamp = unified_timestamp(time_str) + r'class="entry-date">([^<]+)<', webpage, 'released date', default=None) + if time_str: + timestamp = unified_timestamp(time_str) + else: + timestamp = parse_iso8601(self._html_search_meta('uploadDate', webpage)) info_dict.update({ 'display_id': display_id, From b68599ed473c24477cefb3f09580e7a8cbb666d9 Mon Sep 17 00:00:00 2001 From: zurfyx <zurfyx@gmail.com> Date: Sat, 19 Nov 2016 19:23:49 +0100 Subject: [PATCH 09/48] [mitele] Relax _VALID_URL --- youtube_dl/extractor/mitele.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/mitele.py b/youtube_dl/extractor/mitele.py index c41ab1e91..48d94992c 100644 --- a/youtube_dl/extractor/mitele.py +++ b/youtube_dl/extractor/mitele.py @@ -75,7 +75,7 @@ class MiTeleBaseIE(InfoExtractor): class MiTeleIE(InfoExtractor): IE_DESC = 'mitele.es' - _VALID_URL = r'https?://(?:www\.)?mitele\.es/programas-tv/(?:[^/]+/)(?P<id>[^/]+)/player' + _VALID_URL = r'https?://(?:www\.)?mitele\.es/(?:[^/]+/)+(?P<id>[^/]+)/player' _TESTS = [{ 'url': 'http://www.mitele.es/programas-tv/diario-de/57b0dfb9c715da65618b4afa/player', @@ -109,6 +109,9 @@ class MiTeleIE(InfoExtractor): 'skip_download': True, }, 'add_ie': ['Ooyala'], + }, { + 'url': 'http://www.mitele.es/series-online/la-que-se-avecina/57aac5c1c915da951a8b45ed/player', + 'only_matching': True, }] def _real_extract(self, url): From 8eb7b5c3f170d8791d37ae980cd5024eba1c83c1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 24 Nov 2016 22:43:02 +0700 Subject: [PATCH 10/48] [mitele] Modernize and extract more metadata --- youtube_dl/extractor/mitele.py | 90 +++++++++++++++++++++++++--------- 1 file changed, 66 insertions(+), 24 deletions(-) diff --git a/youtube_dl/extractor/mitele.py b/youtube_dl/extractor/mitele.py index 48d94992c..f577836be 100644 --- a/youtube_dl/extractor/mitele.py +++ b/youtube_dl/extractor/mitele.py @@ -86,7 +86,10 @@ class MiTeleIE(InfoExtractor): 'description': 'md5:3b6fce7eaa41b2d97358726378d9369f', 'series': 'Diario de', 'season': 'La redacción', + 'season_number': 14, + 'season_id': 'diario_de_t14_11981', 'episode': 'Programa 144', + 'episode_number': 3, 'thumbnail': 're:(?i)^https?://.*\.jpg$', 'duration': 2913, }, @@ -101,7 +104,10 @@ class MiTeleIE(InfoExtractor): 'description': 'md5:5ff132013f0cd968ffbf1f5f3538a65f', 'series': 'Cuarto Milenio', 'season': 'Temporada 6', + 'season_number': 6, + 'season_id': 'cuarto_milenio_t06_12715', 'episode': 'Programa 226', + 'episode_number': 24, 'thumbnail': 're:(?i)^https?://.*\.jpg$', 'duration': 7313, }, @@ -118,35 +124,68 @@ class MiTeleIE(InfoExtractor): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - gigya_url = self._search_regex(r'<gigya-api>[^>]*</gigya-api>[^>]*<script\s*src="([^"]*)">[^>]*</script>', webpage, 'gigya', default=None) - gigya_sc = self._download_webpage(compat_urlparse.urljoin(r'http://www.mitele.es/', gigya_url), video_id, 'Downloading gigya script') + gigya_url = self._search_regex( + r'<gigya-api>[^>]*</gigya-api>[^>]*<script\s+src="([^"]*)">[^>]*</script>', + webpage, 'gigya', default=None) + gigya_sc = self._download_webpage( + compat_urlparse.urljoin('http://www.mitele.es/', gigya_url), + video_id, 'Downloading gigya script') + # Get a appKey/uuid for getting the session key - appKey_var = self._search_regex(r'value\("appGridApplicationKey",([0-9a-f]+)\)', gigya_sc, 'appKey variable') - appKey = self._search_regex(r'var %s="([0-9a-f]+)"' % appKey_var, gigya_sc, 'appKey') - uid = compat_str(uuid.uuid4()) - session_url = 'https://appgrid-api.cloud.accedo.tv/session?appKey=%s&uuid=%s' % (appKey, uid) - session_json = self._download_json(session_url, video_id, 'Downloading session keys') - sessionKey = compat_str(session_json['sessionKey']) + appKey_var = self._search_regex( + r'value\s*\(\s*["\']appGridApplicationKey["\']\s*,\s*([0-9a-f]+)', + gigya_sc, 'appKey variable') + appKey = self._search_regex( + r'var\s+%s\s*=\s*["\']([0-9a-f]+)' % appKey_var, gigya_sc, 'appKey') + + session_json = self._download_json( + 'https://appgrid-api.cloud.accedo.tv/session', + video_id, 'Downloading session keys', query={ + 'appKey': appKey, + 'uuid': compat_str(uuid.uuid4()), + }) + + paths = self._download_json( + 'https://appgrid-api.cloud.accedo.tv/metadata/general_configuration,%20web_configuration', + video_id, 'Downloading paths JSON', + query={'sessionKey': compat_str(session_json['sessionKey'])}) - paths_url = 'https://appgrid-api.cloud.accedo.tv/metadata/general_configuration,%20web_configuration?sessionKey=' + sessionKey - paths = self._download_json(paths_url, video_id, 'Downloading paths JSON') ooyala_s = paths['general_configuration']['api_configuration']['ooyala_search'] - data_p = ( - 'http://' + ooyala_s['base_url'] + ooyala_s['full_path'] + ooyala_s['provider_id'] + - '/docs/' + video_id + '?include_titles=Series,Season&product_name=test&format=full') - data = self._download_json(data_p, video_id, 'Downloading data JSON') - source = data['hits']['hits'][0]['_source'] - embedCode = source['offers'][0]['embed_codes'][0] + source = self._download_json( + 'http://%s%s%s/docs/%s' % ( + ooyala_s['base_url'], ooyala_s['full_path'], + ooyala_s['provider_id'], video_id), + video_id, 'Downloading data JSON', query={ + 'include_titles': 'Series,Season', + 'product_name': 'test', + 'format': 'full', + })['hits']['hits'][0]['_source'] + embedCode = source['offers'][0]['embed_codes'][0] titles = source['localizable_titles'][0] + title = titles.get('title_medium') or titles['title_long'] - episode = titles['title_sort_name'] - description = titles['summary_long'] - titles_series = source['localizable_titles_series'][0] - series = titles_series['title_long'] - titles_season = source['localizable_titles_season'][0] - season = titles_season['title_medium'] - duration = parse_duration(source['videos'][0]['duration']) + + description = titles.get('summary_long') or titles.get('summary_medium') + + def get(key1, key2): + value1 = source.get(key1) + if not value1 or not isinstance(value1, list): + return + if not isinstance(value1[0], dict): + return + return value1[0].get(key2) + + series = get('localizable_titles_series', 'title_medium') + + season = get('localizable_titles_season', 'title_medium') + season_number = int_or_none(source.get('season_number')) + season_id = source.get('season_id') + + episode = titles.get('title_sort_name') + episode_number = int_or_none(source.get('episode_number')) + + duration = parse_duration(get('videos', 'duration')) return { '_type': 'url_transparent', @@ -157,7 +196,10 @@ class MiTeleIE(InfoExtractor): 'description': description, 'series': series, 'season': season, + 'season_number': season_number, + 'season_id': season_id, 'episode': episode, + 'episode_number': episode_number, 'duration': duration, - 'thumbnail': source['images'][0]['url'], + 'thumbnail': get('images', 'url'), } From 8b27d83e4e07064898c5ec842e916c84cf7a1826 Mon Sep 17 00:00:00 2001 From: "Andrew J. Erickson" <andy@bolt.me> Date: Wed, 9 Nov 2016 14:54:17 -0800 Subject: [PATCH 11/48] vevo: fixing naming when there are featured artists --- youtube_dl/extractor/vevo.py | 31 +++++++++++++++++++++++++++---- 1 file changed, 27 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/vevo.py b/youtube_dl/extractor/vevo.py index 783efda7d..ce607945f 100644 --- a/youtube_dl/extractor/vevo.py +++ b/youtube_dl/extractor/vevo.py @@ -91,14 +91,30 @@ class VevoIE(VevoBaseIE): 'info_dict': { 'id': 'USUV71503000', 'ext': 'mp4', - 'title': 'K Camp - Till I Die', + 'title': 'K Camp ft. T.I. - Till I Die', 'age_limit': 18, 'timestamp': 1449468000, 'upload_date': '20151207', 'uploader': 'K Camp', 'track': 'Till I Die', 'artist': 'K Camp', - 'genre': 'Rap/Hip-Hop', + 'genre': 'Hip-Hop', + }, + }, { + 'note': 'Featured test', + 'url': 'https://www.vevo.com/watch/lemaitre/Wait/USUV71402190', + 'md5': 'd28675e5e8805035d949dc5cf161071d', + 'info_dict': { + 'id': 'USUV71402190', + 'ext': 'mp4', + 'title': 'Lemaitre ft. LoLo - Wait', + 'age_limit': 0, + 'timestamp': 1413432000, + 'upload_date': '20141016', + 'uploader': 'Lemaitre', + 'track': 'Wait', + 'artist': 'Lemaitre', + 'genre': 'Electronic', }, }, { 'note': 'Only available via webpage', @@ -242,8 +258,15 @@ class VevoIE(VevoBaseIE): timestamp = parse_iso8601(video_info.get('releaseDate')) artists = video_info.get('artists') - if artists: - artist = uploader = artists[0]['name'] + for curr_artist in artists: + if 'role' in curr_artist: + if curr_artist['role'] == 'Featured': + featured_artist = curr_artist['name'] + elif curr_artist['role'] == 'Main': + artist = uploader = curr_artist['name'] + else: + artist = uploader = curr_artist['name'] + break view_count = int_or_none(video_info.get('views', {}).get('total')) for video_version in video_versions: From e94eeb1dd3e3171e6409313c619b248da0dd4886 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 24 Nov 2016 23:09:35 +0700 Subject: [PATCH 12/48] [vevo] Simplify artists extraction --- youtube_dl/extractor/vevo.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/vevo.py b/youtube_dl/extractor/vevo.py index ce607945f..5aa097885 100644 --- a/youtube_dl/extractor/vevo.py +++ b/youtube_dl/extractor/vevo.py @@ -259,14 +259,10 @@ class VevoIE(VevoBaseIE): timestamp = parse_iso8601(video_info.get('releaseDate')) artists = video_info.get('artists') for curr_artist in artists: - if 'role' in curr_artist: - if curr_artist['role'] == 'Featured': - featured_artist = curr_artist['name'] - elif curr_artist['role'] == 'Main': - artist = uploader = curr_artist['name'] + if curr_artist.get('role') == 'Featured': + featured_artist = curr_artist['name'] else: artist = uploader = curr_artist['name'] - break view_count = int_or_none(video_info.get('views', {}).get('total')) for video_version in video_versions: From 1db058466dfa8c0e647dbd57938b63f04a7a84c7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 24 Nov 2016 23:10:58 +0700 Subject: [PATCH 13/48] [vevo] Allow video info to fail in tests --- youtube_dl/extractor/vevo.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/vevo.py b/youtube_dl/extractor/vevo.py index 5aa097885..d82261e5e 100644 --- a/youtube_dl/extractor/vevo.py +++ b/youtube_dl/extractor/vevo.py @@ -51,7 +51,7 @@ class VevoIE(VevoBaseIE): 'artist': 'Hurts', 'genre': 'Pop', }, - 'expected_warnings': ['Unable to download SMIL file'], + 'expected_warnings': ['Unable to download SMIL file', 'Unable to download info'], }, { 'note': 'v3 SMIL format', 'url': 'http://www.vevo.com/watch/cassadee-pope/i-wish-i-could-break-your-heart/USUV71302923', @@ -67,7 +67,7 @@ class VevoIE(VevoBaseIE): 'artist': 'Cassadee Pope', 'genre': 'Country', }, - 'expected_warnings': ['Unable to download SMIL file'], + 'expected_warnings': ['Unable to download SMIL file', 'Unable to download info'], }, { 'note': 'Age-limited video', 'url': 'https://www.vevo.com/watch/justin-timberlake/tunnel-vision-explicit/USRV81300282', @@ -83,7 +83,7 @@ class VevoIE(VevoBaseIE): 'artist': 'Justin Timberlake', 'genre': 'Pop', }, - 'expected_warnings': ['Unable to download SMIL file'], + 'expected_warnings': ['Unable to download SMIL file', 'Unable to download info'], }, { 'note': 'No video_info', 'url': 'http://www.vevo.com/watch/k-camp-1/Till-I-Die/USUV71503000', @@ -100,6 +100,7 @@ class VevoIE(VevoBaseIE): 'artist': 'K Camp', 'genre': 'Hip-Hop', }, + 'expected_warnings': ['Unable to download SMIL file', 'Unable to download info'], }, { 'note': 'Featured test', 'url': 'https://www.vevo.com/watch/lemaitre/Wait/USUV71402190', @@ -116,6 +117,7 @@ class VevoIE(VevoBaseIE): 'artist': 'Lemaitre', 'genre': 'Electronic', }, + 'expected_warnings': ['Unable to download SMIL file', 'Unable to download info'], }, { 'note': 'Only available via webpage', 'url': 'http://www.vevo.com/watch/GBUV71600656', From 74394b5e10c1a681022e99fe1955837fb9078f69 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 25 Nov 2016 23:37:32 +0700 Subject: [PATCH 14/48] [puls4] Relax _VALID_URL (closes #11267) --- youtube_dl/extractor/puls4.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/puls4.py b/youtube_dl/extractor/puls4.py index 1c54af002..80091b85f 100644 --- a/youtube_dl/extractor/puls4.py +++ b/youtube_dl/extractor/puls4.py @@ -10,7 +10,7 @@ from ..utils import ( class Puls4IE(ProSiebenSat1BaseIE): - _VALID_URL = r'https?://(?:www\.)?puls4\.com/(?P<id>(?:[^/]+/)*?videos/[^?#]+)' + _VALID_URL = r'https?://(?:www\.)?puls4\.com/(?P<id>[^?#&]+)' _TESTS = [{ 'url': 'http://www.puls4.com/2-minuten-2-millionen/staffel-3/videos/2min2miotalk/Tobias-Homberger-von-myclubs-im-2min2miotalk-118118', 'md5': 'fd3c6b0903ac72c9d004f04bc6bb3e03', @@ -22,6 +22,12 @@ class Puls4IE(ProSiebenSat1BaseIE): 'upload_date': '20160830', 'uploader': 'PULS_4', }, + }, { + 'url': 'http://www.puls4.com/pro-und-contra/wer-wird-prasident/Ganze-Folgen/Wer-wird-Praesident.-Norbert-Hofer', + 'only_matching': True, + }, { + 'url': 'http://www.puls4.com/pro-und-contra/wer-wird-prasident/Ganze-Folgen/Wer-wird-Praesident-Analyse-des-Interviews-mit-Norbert-Hofer-416598', + 'only_matching': True, }] _TOKEN = 'puls4' _SALT = '01!kaNgaiNgah1Ie4AeSha' From 9338a0eae34de9e81bc6b1cee5a000bc6ff9256c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 26 Nov 2016 00:13:46 +0700 Subject: [PATCH 15/48] [viki] Fix rtmp formats extraction (closes #11255) --- youtube_dl/extractor/viki.py | 29 ++++++++++++++++++++++++----- 1 file changed, 24 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/viki.py b/youtube_dl/extractor/viki.py index 4351ac457..cb8bfb348 100644 --- a/youtube_dl/extractor/viki.py +++ b/youtube_dl/extractor/viki.py @@ -1,11 +1,12 @@ # coding: utf-8 from __future__ import unicode_literals -import json -import time -import hmac import hashlib +import hmac import itertools +import json +import re +import time from .common import InfoExtractor from ..utils import ( @@ -276,9 +277,13 @@ class VikiIE(VikiBaseIE): height = int_or_none(self._search_regex( r'^(\d+)[pP]$', format_id, 'height', default=None)) for protocol, format_dict in stream_dict.items(): + # rtmps URLs does not seem to work + if protocol == 'rtmps': + continue + format_url = format_dict['url'] if format_id == 'm3u8': m3u8_formats = self._extract_m3u8_formats( - format_dict['url'], video_id, 'mp4', + format_url, video_id, 'mp4', entry_protocol='m3u8_native', preference=-1, m3u8_id='m3u8-%s' % protocol, fatal=False) # Despite CODECS metadata in m3u8 all video-only formats @@ -287,9 +292,23 @@ class VikiIE(VikiBaseIE): if f.get('acodec') == 'none' and f.get('vcodec') != 'none': f['acodec'] = None formats.extend(m3u8_formats) + elif format_url.startswith('rtmp'): + mobj = re.search( + r'^(?P<url>rtmp://[^/]+/(?P<app>.+?))/(?P<playpath>mp4:.+)$', + format_url) + if not mobj: + continue + formats.append({ + 'format_id': 'rtmp-%s' % format_id, + 'ext': 'flv', + 'url': mobj.group('url'), + 'play_path': mobj.group('playpath'), + 'app': mobj.group('app'), + 'page_url': url, + }) else: formats.append({ - 'url': format_dict['url'], + 'url': format_url, 'format_id': '%s-%s' % (format_id, protocol), 'height': height, }) From 560c8c6ec033b7b436c49b708d9d7362e7672aa1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 26 Nov 2016 00:14:09 +0700 Subject: [PATCH 16/48] [viki] Prefer hls --- youtube_dl/extractor/viki.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/viki.py b/youtube_dl/extractor/viki.py index cb8bfb348..9c48701c1 100644 --- a/youtube_dl/extractor/viki.py +++ b/youtube_dl/extractor/viki.py @@ -284,7 +284,7 @@ class VikiIE(VikiBaseIE): if format_id == 'm3u8': m3u8_formats = self._extract_m3u8_formats( format_url, video_id, 'mp4', - entry_protocol='m3u8_native', preference=-1, + entry_protocol='m3u8_native', m3u8_id='m3u8-%s' % protocol, fatal=False) # Despite CODECS metadata in m3u8 all video-only formats # are actually video+audio From 69016738688199f95e6f732e4a5c68c99988309c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 27 Nov 2016 15:40:28 +0700 Subject: [PATCH 17/48] [azubu] Add support for azubu.uol.com.br (closes #11305) --- youtube_dl/extractor/azubu.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/azubu.py b/youtube_dl/extractor/azubu.py index 72e1bd59d..1eebf5dfd 100644 --- a/youtube_dl/extractor/azubu.py +++ b/youtube_dl/extractor/azubu.py @@ -11,7 +11,7 @@ from ..utils import ( class AzubuIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?azubu\.tv/[^/]+#!/play/(?P<id>\d+)' + _VALID_URL = r'https?://(?:www\.)?azubu\.(?:tv|uol.com.br)/[^/]+#!/play/(?P<id>\d+)' _TESTS = [ { 'url': 'http://www.azubu.tv/GSL#!/play/15575/2014-hot6-cup-last-big-match-ro8-day-1', @@ -103,12 +103,15 @@ class AzubuIE(InfoExtractor): class AzubuLiveIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?azubu\.tv/(?P<id>[^/]+)$' + _VALID_URL = r'https?://(?:www\.)?azubu\.(?:tv|uol.com.br)/(?P<id>[^/]+)$' - _TEST = { + _TESTS = [{ 'url': 'http://www.azubu.tv/MarsTVMDLen', 'only_matching': True, - } + }, { + 'url': 'http://azubu.uol.com.br/adolfz', + 'only_matching': True, + }] def _real_extract(self, url): user = self._match_id(url) From f25e1c8d8c145ea4044b56786256cd71f861cf62 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 27 Nov 2016 19:54:59 +0700 Subject: [PATCH 18/48] [webcaster] Add support for webcaster.pro --- youtube_dl/extractor/extractors.py | 4 ++ youtube_dl/extractor/webcaster.py | 85 ++++++++++++++++++++++++++++++ 2 files changed, 89 insertions(+) create mode 100644 youtube_dl/extractor/webcaster.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 9107f0b96..d71d01de3 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1121,6 +1121,10 @@ from .wdr import ( WDRIE, WDRMobileIE, ) +from .webcaster import ( + WebcasterIE, + WebcasterFeedIE, +) from .webofstories import ( WebOfStoriesIE, WebOfStoriesPlaylistIE, diff --git a/youtube_dl/extractor/webcaster.py b/youtube_dl/extractor/webcaster.py new file mode 100644 index 000000000..d366511a2 --- /dev/null +++ b/youtube_dl/extractor/webcaster.py @@ -0,0 +1,85 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + determine_ext, + xpath_text, +) + + +class WebcasterIE(InfoExtractor): + _VALID_URL = r'https?://bl\.webcaster\.pro/(?:quote|media)/start/free_(?P<id>[^/]+)' + _TESTS = [{ + # http://video.khl.ru/quotes/393859 + 'url': 'http://bl.webcaster.pro/quote/start/free_c8cefd240aa593681c8d068cff59f407_hd/q393859/eb173f99dd5f558674dae55f4ba6806d/1480289104?sr%3D105%26fa%3D1%26type_id%3D18', + 'md5': '0c162f67443f30916ff1c89425dcd4cd', + 'info_dict': { + 'id': 'c8cefd240aa593681c8d068cff59f407_hd', + 'ext': 'mp4', + 'title': 'Сибирь - Нефтехимик. Лучшие моменты первого периода', + 'thumbnail': 're:^https?://.*\.jpg$', + }, + }, { + 'url': 'http://bl.webcaster.pro/media/start/free_6246c7a4453ac4c42b4398f840d13100_hd/2_2991109016/e8d0d82587ef435480118f9f9c41db41/4635726126', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + + video = self._download_xml(url, video_id) + + title = xpath_text(video, './/event_name', 'event name', fatal=True) + + def make_id(parts, separator): + return separator.join(filter(None, parts)) + + formats = [] + for format_id in (None, 'noise'): + track_tag = make_id(('track', format_id), '_') + for track in video.findall('.//iphone/%s' % track_tag): + track_url = track.text + if not track_url: + continue + if determine_ext(track_url) == 'm3u8': + m3u8_formats = self._extract_m3u8_formats( + track_url, video_id, 'mp4', + entry_protocol='m3u8_native', + m3u8_id=make_id(('hls', format_id), '-'), fatal=False) + for f in m3u8_formats: + f.update({ + 'source_preference': 0 if format_id == 'noise' else 1, + 'format_note': track.get('title'), + }) + formats.extend(m3u8_formats) + self._sort_formats(formats) + + thumbnail = xpath_text(video, './/image', 'thumbnail') + + return { + 'id': video_id, + 'title': title, + 'thumbnail': thumbnail, + 'formats': formats, + } + + +class WebcasterFeedIE(InfoExtractor): + _VALID_URL = r'https?://bl\.webcaster\.pro/feed/start/free_(?P<id>[^/]+)' + _TEST = { + 'url': 'http://bl.webcaster.pro/feed/start/free_c8cefd240aa593681c8d068cff59f407_hd/q393859/eb173f99dd5f558674dae55f4ba6806d/1480289104', + 'only_matching': True, + } + + def _real_extract(self, url): + video_id = self._match_id(url) + + feed = self._download_xml(url, video_id) + + video_url = xpath_text( + feed, ('video_hd', 'video'), 'video url', fatal=True) + + return self.url_result(video_url, WebcasterIE.ie_key()) From 83f1481baae72ca17364a12bec6ebcbe30234a3f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 27 Nov 2016 19:56:32 +0700 Subject: [PATCH 19/48] [extractor/generic] Add support for webcaster.pro embeds --- youtube_dl/extractor/generic.py | 6 ++++++ youtube_dl/extractor/webcaster.py | 17 +++++++++++++++++ 2 files changed, 23 insertions(+) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index f9707c155..5aac65162 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -60,6 +60,7 @@ from .screenwavemedia import ScreenwaveMediaIE from .mtv import MTVServicesEmbeddedIE from .pladform import PladformIE from .videomore import VideomoreIE +from .webcaster import WebcasterFeedIE from .googledrive import GoogleDriveIE from .jwplatform import JWPlatformIE from .digiteka import DigitekaIE @@ -2140,6 +2141,11 @@ class GenericIE(InfoExtractor): if videomore_url: return self.url_result(videomore_url) + # Look for Webcaster embeds + webcaster_url = WebcasterFeedIE._extract_url(self, webpage) + if webcaster_url: + return self.url_result(webcaster_url, ie=WebcasterFeedIE.ie_key()) + # Look for Playwire embeds mobj = re.search( r'<script[^>]+data-config=(["\'])(?P<url>(?:https?:)?//config\.playwire\.com/.+?)\1', webpage) diff --git a/youtube_dl/extractor/webcaster.py b/youtube_dl/extractor/webcaster.py index d366511a2..7486cb347 100644 --- a/youtube_dl/extractor/webcaster.py +++ b/youtube_dl/extractor/webcaster.py @@ -74,6 +74,23 @@ class WebcasterFeedIE(InfoExtractor): 'only_matching': True, } + @staticmethod + def _extract_url(ie, webpage): + mobj = re.search( + r'<(?:object|a[^>]+class=["\']webcaster-player["\'])[^>]+data(?:-config)?=(["\']).*?config=(?P<url>https?://bl\.webcaster\.pro/feed/start/free_.*?)(?:[?&]|\1)', + webpage) + if mobj: + return mobj.group('url') + for secure in (True, False): + video_url = ie._og_search_video_url( + webpage, secure=secure, default=None) + if video_url: + mobj = re.search( + r'config=(?P<url>https?://bl\.webcaster\.pro/feed/start/free_[^?&=]+)', + video_url) + if mobj: + return mobj.group('url') + def _real_extract(self, url): video_id = self._match_id(url) From 294d4926d70d9b0bde38288c872a77ae5a95c6b3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 27 Nov 2016 20:04:03 +0700 Subject: [PATCH 20/48] [ChangeLog] Actualize --- ChangeLog | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/ChangeLog b/ChangeLog index 7e784ed76..bb07fef32 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,7 +1,15 @@ version <unreleased> Extractors ++ [webcaster] Add support for webcaster.pro ++ [azubu] Add support for azubu.uol.com.br (#11305) +* [viki] Prefer hls formats +* [viki] Fix rtmp formats extraction (#11255) +* [puls4] Relax URL regular expression (#11267) +* [vevo] Improve artist extraction (#10911) +* [mitele] Relax URL regular expression and extract more metadata (#11244) + [cbslocal] Recognize New York site (#11285) ++ [youtube:playlist] Pass disable_polymer in URL query (#11193) version 2016.11.22 From 2b380fc299adbea416b4bf81ea9a4c7d11c294f6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 27 Nov 2016 20:05:32 +0700 Subject: [PATCH 21/48] release 2016.11.27 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- docs/supportedsites.md | 2 ++ youtube_dl/version.py | 2 +- 4 files changed, 7 insertions(+), 5 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index b7fa566c8..0d96f651f 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.11.22*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.11.22** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.11.27*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.11.27** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v <your command line> [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2016.11.22 +[debug] youtube-dl version 2016.11.27 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index bb07fef32..5515a08ff 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version <unreleased> +version 2016.11.27 Extractors + [webcaster] Add support for webcaster.pro diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 7c485349d..d9ad7bd1f 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -882,6 +882,8 @@ - **WatchIndianPorn**: Watch Indian Porn - **WDR** - **wdr:mobile** + - **Webcaster** + - **WebcasterFeed** - **WebOfStories** - **WebOfStoriesPlaylist** - **WeiqiTV**: WQTV diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 3c746baac..db7da3985 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2016.11.22' +__version__ = '2016.11.27' From 51b1378eeddb60bd99199741f2fcee29d8389142 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sun, 27 Nov 2016 22:01:07 +0800 Subject: [PATCH 22/48] Ignore and clean .swf files Some videos on NicoNico are swf --- .gitignore | 1 + Makefile | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index 87754f90f..9ce4b5e2d 100644 --- a/.gitignore +++ b/.gitignore @@ -33,6 +33,7 @@ updates_key.pem *.wav *.ape *.mkv +*.swf *.part *.swp test/testdata diff --git a/Makefile b/Makefile index 68bbf5e96..9d1ddc9d1 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ all: youtube-dl README.md CONTRIBUTING.md README.txt youtube-dl.1 youtube-dl.bash-completion youtube-dl.zsh youtube-dl.fish supportedsites clean: - rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish youtube_dl/extractor/lazy_extractors.py *.dump *.part* *.info.json *.mp4 *.m4a *.flv *.mp3 *.avi *.mkv *.webm *.3gp *.wav *.ape *.jpg *.png CONTRIBUTING.md.tmp ISSUE_TEMPLATE.md.tmp youtube-dl youtube-dl.exe + rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish youtube_dl/extractor/lazy_extractors.py *.dump *.part* *.info.json *.mp4 *.m4a *.flv *.mp3 *.avi *.mkv *.webm *.3gp *.wav *.ape *.swf *.jpg *.png CONTRIBUTING.md.tmp ISSUE_TEMPLATE.md.tmp youtube-dl youtube-dl.exe find . -name "*.pyc" -delete find . -name "*.class" -delete From 89533199160d484f94a9923016cb9a7921ae4956 Mon Sep 17 00:00:00 2001 From: felix <felix.von.s@posteo.de> Date: Mon, 28 Nov 2016 17:17:56 +0100 Subject: [PATCH 23/48] [screenwavemedia] Remove extractor Rewrite TeamFourStar and Normalboots extractors in terms of JWPlatform --- youtube_dl/extractor/extractors.py | 2 +- youtube_dl/extractor/generic.py | 16 --- youtube_dl/extractor/normalboots.py | 12 +- youtube_dl/extractor/screenwavemedia.py | 146 ------------------------ youtube_dl/extractor/teamfourstar.py | 48 ++++++++ 5 files changed, 54 insertions(+), 170 deletions(-) delete mode 100644 youtube_dl/extractor/screenwavemedia.py create mode 100644 youtube_dl/extractor/teamfourstar.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index d71d01de3..563457fcb 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -804,7 +804,6 @@ from .scivee import SciVeeIE from .screencast import ScreencastIE from .screencastomatic import ScreencastOMaticIE from .screenjunkies import ScreenJunkiesIE -from .screenwavemedia import ScreenwaveMediaIE, TeamFourIE from .seeker import SeekerIE from .senateisvp import SenateISVPIE from .sendtonews import SendtoNewsIE @@ -897,6 +896,7 @@ from .teachertube import ( ) from .teachingchannel import TeachingChannelIE from .teamcoco import TeamcocoIE +from .teamfourstar import TeamFourStarIE from .techtalks import TechTalksIE from .ted import TEDIE from .tele13 import Tele13IE diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 5aac65162..3949c8bf7 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -56,7 +56,6 @@ from .dailymotion import ( ) from .onionstudios import OnionStudiosIE from .viewlift import ViewLiftEmbedIE -from .screenwavemedia import ScreenwaveMediaIE from .mtv import MTVServicesEmbeddedIE from .pladform import PladformIE from .videomore import VideomoreIE @@ -1190,16 +1189,6 @@ class GenericIE(InfoExtractor): 'duration': 248.667, }, }, - # ScreenwaveMedia embed - { - 'url': 'http://www.thecinemasnob.com/the-cinema-snob/a-nightmare-on-elm-street-2-freddys-revenge1', - 'md5': '24ace5baba0d35d55c6810b51f34e9e0', - 'info_dict': { - 'id': 'cinemasnob-55d26273809dd', - 'ext': 'mp4', - 'title': 'cinemasnob', - }, - }, # BrightcoveInPageEmbed embed { 'url': 'http://www.geekandsundry.com/tabletop-bonus-wils-final-thoughts-on-dread/', @@ -2212,11 +2201,6 @@ class GenericIE(InfoExtractor): if jwplatform_url: return self.url_result(jwplatform_url, 'JWPlatform') - # Look for ScreenwaveMedia embeds - mobj = re.search(ScreenwaveMediaIE.EMBED_PATTERN, webpage) - if mobj is not None: - return self.url_result(unescapeHTML(mobj.group('url')), 'ScreenwaveMedia') - # Look for Digiteka embeds digiteka_url = DigitekaIE._extract_url(webpage) if digiteka_url: diff --git a/youtube_dl/extractor/normalboots.py b/youtube_dl/extractor/normalboots.py index 6aa0895b8..61fe571df 100644 --- a/youtube_dl/extractor/normalboots.py +++ b/youtube_dl/extractor/normalboots.py @@ -2,7 +2,7 @@ from __future__ import unicode_literals from .common import InfoExtractor -from .screenwavemedia import ScreenwaveMediaIE +from .jwplatform import JWPlatformIE from ..utils import ( unified_strdate, @@ -25,7 +25,7 @@ class NormalbootsIE(InfoExtractor): # m3u8 download 'skip_download': True, }, - 'add_ie': ['ScreenwaveMedia'], + 'add_ie': ['JWPlatform'], } def _real_extract(self, url): @@ -39,15 +39,13 @@ class NormalbootsIE(InfoExtractor): r'<span style="text-transform:uppercase; font-size:inherit;">[A-Za-z]+, (?P<date>.*)</span>', webpage, 'date', fatal=False)) - screenwavemedia_url = self._html_search_regex( - ScreenwaveMediaIE.EMBED_PATTERN, webpage, 'screenwave URL', - group='url') + jwplatform_url = JWPlatformIE._extract_url(webpage) return { '_type': 'url_transparent', 'id': video_id, - 'url': screenwavemedia_url, - 'ie_key': ScreenwaveMediaIE.ie_key(), + 'url': jwplatform_url, + 'ie_key': JWPlatformIE.ie_key(), 'title': self._og_search_title(webpage), 'description': self._og_search_description(webpage), 'thumbnail': self._og_search_thumbnail(webpage), diff --git a/youtube_dl/extractor/screenwavemedia.py b/youtube_dl/extractor/screenwavemedia.py deleted file mode 100644 index 7d77e8825..000000000 --- a/youtube_dl/extractor/screenwavemedia.py +++ /dev/null @@ -1,146 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor -from ..utils import ( - int_or_none, - unified_strdate, - js_to_json, -) - - -class ScreenwaveMediaIE(InfoExtractor): - _VALID_URL = r'(?:https?:)?//player\d?\.screenwavemedia\.com/(?:play/)?[a-zA-Z]+\.php\?.*\bid=(?P<id>[A-Za-z0-9-]+)' - EMBED_PATTERN = r'src=(["\'])(?P<url>(?:https?:)?//player\d?\.screenwavemedia\.com/(?:play/)?[a-zA-Z]+\.php\?.*\bid=.+?)\1' - _TESTS = [{ - 'url': 'http://player.screenwavemedia.com/play/play.php?playerdiv=videoarea&companiondiv=squareAd&id=Cinemassacre-19911', - 'only_matching': True, - }] - - def _real_extract(self, url): - video_id = self._match_id(url) - - playerdata = self._download_webpage( - 'http://player.screenwavemedia.com/player.php?id=%s' % video_id, - video_id, 'Downloading player webpage') - - vidtitle = self._search_regex( - r'\'vidtitle\'\s*:\s*"([^"]+)"', playerdata, 'vidtitle').replace('\\/', '/') - - playerconfig = self._download_webpage( - 'http://player.screenwavemedia.com/player.js', - video_id, 'Downloading playerconfig webpage') - - videoserver = self._search_regex(r'SWMServer\s*=\s*"([\d\.]+)"', playerdata, 'videoserver') - - sources = self._parse_json( - js_to_json( - re.sub( - r'(?s)/\*.*?\*/', '', - self._search_regex( - r'sources\s*:\s*(\[[^\]]+?\])', playerconfig, - 'sources', - ).replace( - "' + thisObj.options.videoserver + '", - videoserver - ).replace( - "' + playerVidId + '", - video_id - ) - ) - ), - video_id, fatal=False - ) - - # Fallback to hardcoded sources if JS changes again - if not sources: - self.report_warning('Falling back to a hardcoded list of streams') - sources = [{ - 'file': 'http://%s/vod/%s_%s.mp4' % (videoserver, video_id, format_id), - 'type': 'mp4', - 'label': format_label, - } for format_id, format_label in ( - ('low', '144p Low'), ('med', '160p Med'), ('high', '360p High'), ('hd1', '720p HD1'))] - sources.append({ - 'file': 'http://%s/vod/smil:%s.smil/playlist.m3u8' % (videoserver, video_id), - 'type': 'hls', - }) - - formats = [] - for source in sources: - file_ = source.get('file') - if not file_: - continue - if source.get('type') == 'hls': - formats.extend(self._extract_m3u8_formats(file_, video_id, ext='mp4')) - else: - format_id = self._search_regex( - r'_(.+?)\.[^.]+$', file_, 'format id', default=None) - if not self._is_valid_url(file_, video_id, format_id or 'video'): - continue - format_label = source.get('label') - height = int_or_none(self._search_regex( - r'^(\d+)[pP]', format_label, 'height', default=None)) - formats.append({ - 'url': file_, - 'format_id': format_id, - 'format': format_label, - 'ext': source.get('type'), - 'height': height, - }) - self._sort_formats(formats, field_preference=('height', 'width', 'tbr', 'format_id')) - - return { - 'id': video_id, - 'title': vidtitle, - 'formats': formats, - } - - -class TeamFourIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?teamfourstar\.com/video/(?P<id>[a-z0-9\-]+)/?' - _TEST = { - 'url': 'http://teamfourstar.com/video/a-moment-with-tfs-episode-4/', - 'info_dict': { - 'id': 'TeamFourStar-5292a02f20bfa', - 'ext': 'mp4', - 'upload_date': '20130401', - 'description': 'Check out this and more on our website: http://teamfourstar.com\nTFS Store: http://sharkrobot.com/team-four-star\nFollow on Twitter: http://twitter.com/teamfourstar\nLike on FB: http://facebook.com/teamfourstar', - 'title': 'A Moment With TFS Episode 4', - }, - 'params': { - # m3u8 download - 'skip_download': True, - }, - } - - def _real_extract(self, url): - display_id = self._match_id(url) - webpage = self._download_webpage(url, display_id) - - playerdata_url = self._search_regex( - r'src="(http://player\d?\.screenwavemedia\.com/(?:play/)?[a-zA-Z]+\.php\?[^"]*\bid=.+?)"', - webpage, 'player data URL') - - video_title = self._html_search_regex( - r'<div class="heroheadingtitle">(?P<title>.+?)</div>', - webpage, 'title') - video_date = unified_strdate(self._html_search_regex( - r'<div class="heroheadingdate">(?P<date>.+?)</div>', - webpage, 'date', fatal=False)) - video_description = self._html_search_regex( - r'(?s)<div class="postcontent">(?P<description>.+?)</div>', - webpage, 'description', fatal=False) - video_thumbnail = self._og_search_thumbnail(webpage) - - return { - '_type': 'url_transparent', - 'display_id': display_id, - 'title': video_title, - 'description': video_description, - 'upload_date': video_date, - 'thumbnail': video_thumbnail, - 'url': playerdata_url, - } diff --git a/youtube_dl/extractor/teamfourstar.py b/youtube_dl/extractor/teamfourstar.py new file mode 100644 index 000000000..a4db2ca98 --- /dev/null +++ b/youtube_dl/extractor/teamfourstar.py @@ -0,0 +1,48 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from .jwplatform import JWPlatformIE +from ..utils import unified_strdate + + +class TeamFourStarIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?teamfourstar\.com/(?P<id>[a-z0-9\-]+)/?' + _TEST = { + 'url': 'http://teamfourstar.com/tfs-abridged-parody-episode-1-2/', + 'info_dict': { + 'id': '0WdZO31W', + 'title': 'TFS Abridged Parody Episode 1', + 'description': 'Episode 1: The Return of Raditz! … Wait…\nCast\nMasakoX – Goku, Roshi\nLanipator – Piccolo, Radditz, Krillin, Vegeta\nVegeta3986 – Radditz, Yamcha, Oolong, Gohan\nHbi2k – Farmer with Shotgun\nMegami33 – Bulma, Puar\nTakahata101 – Nappa\nKaiserNeko – SpacePod\nSongs\nMorgenstemning by Edvard Hagerup Grieg\nCha-La-Head-Cha-La by Kageyama Hiranobu\nWE DO NOT OWN DRAGONBALL. DragonBall is Owned by TOEI ANIMATION, Ltd. and Licensed by FUNimation Productions, Ltd.. All Rights Reserved. DragonBall, DragonBall Z, DragonBall GT and all logos, character names and distinctive likenesses thereof are trademarks of TOEI ANIMATION, Ltd.\nThis is nothing more than a Parody made for entertainment purposes only.', + 'ext': 'mp4', + 'timestamp': 1394168400, + 'upload_date': '20080508', + }, + } + + def _real_extract(self, url): + display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) + + jwplatform_url = JWPlatformIE._extract_url(webpage) + + video_title = self._html_search_regex( + r'<h1 class="entry-title">(?P<title>.+?)</h1>', + webpage, 'title') + video_date = unified_strdate(self._html_search_regex( + r'<span class="meta-date date updated">(?P<date>.+?)</span>', + webpage, 'date', fatal=False)) + video_description = self._html_search_regex( + r'(?s)<div class="content-inner">.*?(?P<description><p>.+?)</div>', + webpage, 'description', fatal=False) + video_thumbnail = self._og_search_thumbnail(webpage) + + return { + '_type': 'url_transparent', + 'display_id': display_id, + 'title': video_title, + 'description': video_description, + 'upload_date': video_date, + 'thumbnail': video_thumbnail, + 'url': jwplatform_url, + } From c2530d3319fd32adfc43cc349b9491040ee631d5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 28 Nov 2016 23:22:29 +0700 Subject: [PATCH 24/48] [teamfourstar] Simplify _VALID_URL and relax regexes --- youtube_dl/extractor/teamfourstar.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/teamfourstar.py b/youtube_dl/extractor/teamfourstar.py index a4db2ca98..a8c6ed7be 100644 --- a/youtube_dl/extractor/teamfourstar.py +++ b/youtube_dl/extractor/teamfourstar.py @@ -7,13 +7,13 @@ from ..utils import unified_strdate class TeamFourStarIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?teamfourstar\.com/(?P<id>[a-z0-9\-]+)/?' + _VALID_URL = r'https?://(?:www\.)?teamfourstar\.com/(?P<id>[a-z0-9\-]+)' _TEST = { 'url': 'http://teamfourstar.com/tfs-abridged-parody-episode-1-2/', 'info_dict': { 'id': '0WdZO31W', 'title': 'TFS Abridged Parody Episode 1', - 'description': 'Episode 1: The Return of Raditz! … Wait…\nCast\nMasakoX – Goku, Roshi\nLanipator – Piccolo, Radditz, Krillin, Vegeta\nVegeta3986 – Radditz, Yamcha, Oolong, Gohan\nHbi2k – Farmer with Shotgun\nMegami33 – Bulma, Puar\nTakahata101 – Nappa\nKaiserNeko – SpacePod\nSongs\nMorgenstemning by Edvard Hagerup Grieg\nCha-La-Head-Cha-La by Kageyama Hiranobu\nWE DO NOT OWN DRAGONBALL. DragonBall is Owned by TOEI ANIMATION, Ltd. and Licensed by FUNimation Productions, Ltd.. All Rights Reserved. DragonBall, DragonBall Z, DragonBall GT and all logos, character names and distinctive likenesses thereof are trademarks of TOEI ANIMATION, Ltd.\nThis is nothing more than a Parody made for entertainment purposes only.', + 'description': 'md5:d60bc389588ebab2ee7ad432bda953ae', 'ext': 'mp4', 'timestamp': 1394168400, 'upload_date': '20080508', @@ -27,13 +27,13 @@ class TeamFourStarIE(InfoExtractor): jwplatform_url = JWPlatformIE._extract_url(webpage) video_title = self._html_search_regex( - r'<h1 class="entry-title">(?P<title>.+?)</h1>', + r'<h1[^>]+class="entry-title"[^>]*>(?P<title>.+?)</h1>', webpage, 'title') video_date = unified_strdate(self._html_search_regex( - r'<span class="meta-date date updated">(?P<date>.+?)</span>', + r'<span[^>]+class="meta-date date updated"[^>]*>(?P<date>.+?)</span>', webpage, 'date', fatal=False)) video_description = self._html_search_regex( - r'(?s)<div class="content-inner">.*?(?P<description><p>.+?)</div>', + r'(?s)<div[^>]+class="content-inner"[^>]*>.*?(?P<description><p>.+?)</div>', webpage, 'description', fatal=False) video_thumbnail = self._og_search_thumbnail(webpage) From cc61fc3934bb3d130e814e2d2345fe6cda2ad9c3 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Tue, 29 Nov 2016 10:11:08 +0100 Subject: [PATCH 25/48] [comedycentral] Add new extractor for full-episodes CC seems to have added yet another indirection for full episodes - the mgid is now only in a linked feed. This may be a little brittle, but it's better than failing outright. Plus, the current The Daily Show episode now works :) --- youtube_dl/extractor/comedycentral.py | 33 ++++++++++++++++++++++++++- youtube_dl/extractor/extractors.py | 1 + 2 files changed, 33 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/comedycentral.py b/youtube_dl/extractor/comedycentral.py index 88346dde7..528ff7fa3 100644 --- a/youtube_dl/extractor/comedycentral.py +++ b/youtube_dl/extractor/comedycentral.py @@ -6,7 +6,7 @@ from .common import InfoExtractor class ComedyCentralIE(MTVServicesInfoExtractor): _VALID_URL = r'''(?x)https?://(?:www\.)?cc\.com/ - (video-clips|episodes|cc-studios|video-collections|full-episodes|shows) + (video-clips|episodes|cc-studios|video-collections|shows) /(?P<title>.*)''' _FEED_URL = 'http://comedycentral.com/feeds/mrss/' @@ -27,6 +27,37 @@ class ComedyCentralIE(MTVServicesInfoExtractor): }] +class ComedyCentralFullEpisodesIE(MTVServicesInfoExtractor): + _VALID_URL = r'''(?x)https?://(?:www\.)?cc\.com/ + (?:full-episodes) + /(?P<id>[^?]+)''' + _FEED_URL = 'http://comedycentral.com/feeds/mrss/' + + _TESTS = [{ + 'url': 'http://www.cc.com/full-episodes/pv391a/the-daily-show-with-trevor-noah-november-28--2016---ryan-speedo-green-season-22-ep-22028', + 'info_dict': { + 'description': 'Donald Trump is accused of exploiting his president-elect status for personal gain, Cuban leader Fidel Castro dies, and Ryan Speedo Green discusses "Sing for Your Life."', + 'title': 'November 28, 2016 - Ryan Speedo Green', + }, + 'playlist_count': 4, + }] + + def _real_extract(self, url): + playlist_id = self._match_id(url) + webpage = self._download_webpage(url, playlist_id) + + feed_json = self._search_regex(r'var triforceManifestFeed\s*=\s*(\{.+?\});\n', webpage, 'triforce feeed') + feed = self._parse_json(feed_json, playlist_id) + zones = feed['manifest']['zones'] + + video_zone = zones['t2_lc_promo1'] + feed = self._download_json(video_zone['feed'], playlist_id) + mgid = feed['result']['data']['id'] + + videos_info = self._get_videos_info(mgid) + return videos_info + + class ToshIE(MTVServicesInfoExtractor): IE_DESC = 'Tosh.0' _VALID_URL = r'^https?://tosh\.cc\.com/video-(?:clips|collections)/[^/]+/(?P<videotitle>[^/?#]+)' diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 563457fcb..46d007b7d 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -180,6 +180,7 @@ from .cnn import ( from .coub import CoubIE from .collegerama import CollegeRamaIE from .comedycentral import ( + ComedyCentralFullEpisodesIE, ComedyCentralIE, ComedyCentralShortnameIE, ComedyCentralTVIE, From 6303fc820417423585b681a4415b0020e0e8dd31 Mon Sep 17 00:00:00 2001 From: Mark Lee <malept@users.noreply.github.com> Date: Tue, 29 Nov 2016 08:06:01 -0800 Subject: [PATCH 26/48] [spike] Fix full episodes extraction --- youtube_dl/extractor/mtv.py | 5 +++-- youtube_dl/extractor/spike.py | 20 ++++++++++++++++++++ 2 files changed, 23 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/mtv.py b/youtube_dl/extractor/mtv.py index 74a3a035e..03351917e 100644 --- a/youtube_dl/extractor/mtv.py +++ b/youtube_dl/extractor/mtv.py @@ -13,6 +13,7 @@ from ..utils import ( fix_xml_ampersands, float_or_none, HEADRequest, + NO_DEFAULT, RegexNotFoundError, sanitized_Request, strip_or_none, @@ -201,7 +202,7 @@ class MTVServicesInfoExtractor(InfoExtractor): [self._get_video_info(item) for item in idoc.findall('.//item')], playlist_title=title, playlist_description=description) - def _extract_mgid(self, webpage): + def _extract_mgid(self, webpage, default=NO_DEFAULT): try: # the url can be http://media.mtvnservices.com/fb/{mgid}.swf # or http://media.mtvnservices.com/{mgid} @@ -221,7 +222,7 @@ class MTVServicesInfoExtractor(InfoExtractor): sm4_embed = self._html_search_meta( 'sm4:video:embed', webpage, 'sm4 embed', default='') mgid = self._search_regex( - r'embed/(mgid:.+?)["\'&?/]', sm4_embed, 'mgid') + r'embed/(mgid:.+?)["\'&?/]', sm4_embed, 'mgid', default=default) return mgid def _real_extract(self, url): diff --git a/youtube_dl/extractor/spike.py b/youtube_dl/extractor/spike.py index 218785ee4..abfee3ece 100644 --- a/youtube_dl/extractor/spike.py +++ b/youtube_dl/extractor/spike.py @@ -1,5 +1,7 @@ from __future__ import unicode_literals +import re + from .mtv import MTVServicesInfoExtractor @@ -16,6 +18,15 @@ class SpikeIE(MTVServicesInfoExtractor): 'timestamp': 1388120400, 'upload_date': '20131227', }, + }, { + 'url': 'http://www.spike.com/full-episodes/j830qm/lip-sync-battle-joel-mchale-vs-jim-rash-season-2-ep-209', + 'md5': 'b25c6f16418aefb9ad5a6cae2559321f', + 'info_dict': { + 'id': '37ace3a8-1df6-48be-85b8-38df8229e241', + 'ext': 'mp4', + 'title': 'Lip Sync Battle|April 28, 2016|2|209|Joel McHale Vs. Jim Rash|Act 1', + 'description': 'md5:a739ca8f978a7802f67f8016d27ce114', + }, }, { 'url': 'http://www.spike.com/video-clips/lhtu8m/', 'only_matching': True, @@ -32,3 +43,12 @@ class SpikeIE(MTVServicesInfoExtractor): _FEED_URL = 'http://www.spike.com/feeds/mrss/' _MOBILE_TEMPLATE = 'http://m.spike.com/videos/video.rbml?id=%s' + _CUSTOM_URL_REGEX = re.compile(r'spikenetworkapp://([^/]+/[-a-fA-F0-9]+)') + + def _extract_mgid(self, webpage): + mgid = super(SpikeIE, self)._extract_mgid(webpage, default=None) + if mgid is None: + url_parts = self._search_regex(self._CUSTOM_URL_REGEX, webpage, 'episode_id') + video_type, episode_id = url_parts.split('/', 1) + mgid = 'mgid:arc:{0}:spike.com:{1}'.format(video_type, episode_id) + return mgid From 3779d524dfd3cf72120847b235d4a3906e47a4f8 Mon Sep 17 00:00:00 2001 From: Varun <mailvarunest@gmail.com> Date: Tue, 29 Nov 2016 22:07:30 +0530 Subject: [PATCH 27/48] [liveleak] Add support for youtube embeds --- youtube_dl/extractor/liveleak.py | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/liveleak.py b/youtube_dl/extractor/liveleak.py index ea0565ac0..40fef9bb6 100644 --- a/youtube_dl/extractor/liveleak.py +++ b/youtube_dl/extractor/liveleak.py @@ -54,6 +54,19 @@ class LiveLeakIE(InfoExtractor): 'title': 'Crazy Hungarian tourist films close call waterspout in Croatia', 'thumbnail': 're:^https?://.*\.jpg$' } + }, { + # Covers https://github.com/rg3/youtube-dl/pull/10664#issuecomment-247439521 + 'url' : 'http://m.liveleak.com/view?i=763_1473349649', + 'add_ie': ['Youtube'], + 'info_dict': { + 'id': '763_1473349649', + 'ext': 'mp4', + 'title': 'Reporters and public officials ignore epidemic of black on asian violence in Sacramento | Colin Flaherty', + 'description': 'Colin being the warrior he is and showing the injustice Asians in Sacramento are being subjected to.', + 'uploader': 'Ziz', + 'upload_date': '20160908', + 'uploader_id': 'UCEbta5E_jqlZmEJsriTEtnw' + } }] @staticmethod @@ -87,7 +100,7 @@ class LiveLeakIE(InfoExtractor): else: # Maybe an embed? embed_url = self._search_regex( - r'<iframe[^>]+src="(http://www.prochan.com/embed\?[^"]+)"', + r'<iframe[^>]+src="((?:(?:http://www.prochan.com/embed\?)|(?:http://www.youtube.com/embed))[^"]+)"', webpage, 'embed URL') return { '_type': 'url_transparent', @@ -107,6 +120,7 @@ class LiveLeakIE(InfoExtractor): 'format_note': s.get('label'), 'url': s['file'], } for i, s in enumerate(sources)] + for i, s in enumerate(sources): # Removing '.h264_*.mp4' gives the raw video, which is essentially # the same video without the LiveLeak logo at the top (see From 8b0d3ee64ee20de35d0828b01ece98f59bb19e1d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 29 Nov 2016 23:42:19 +0700 Subject: [PATCH 28/48] [liveleak] Simplify and PEP 8 --- youtube_dl/extractor/liveleak.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/liveleak.py b/youtube_dl/extractor/liveleak.py index 40fef9bb6..b84e4dd6c 100644 --- a/youtube_dl/extractor/liveleak.py +++ b/youtube_dl/extractor/liveleak.py @@ -56,7 +56,7 @@ class LiveLeakIE(InfoExtractor): } }, { # Covers https://github.com/rg3/youtube-dl/pull/10664#issuecomment-247439521 - 'url' : 'http://m.liveleak.com/view?i=763_1473349649', + 'url': 'http://m.liveleak.com/view?i=763_1473349649', 'add_ie': ['Youtube'], 'info_dict': { 'id': '763_1473349649', @@ -66,7 +66,10 @@ class LiveLeakIE(InfoExtractor): 'uploader': 'Ziz', 'upload_date': '20160908', 'uploader_id': 'UCEbta5E_jqlZmEJsriTEtnw' - } + }, + 'params': { + 'skip_download': True, + }, }] @staticmethod @@ -100,7 +103,7 @@ class LiveLeakIE(InfoExtractor): else: # Maybe an embed? embed_url = self._search_regex( - r'<iframe[^>]+src="((?:(?:http://www.prochan.com/embed\?)|(?:http://www.youtube.com/embed))[^"]+)"', + r'<iframe[^>]+src="(https?://(?:www\.)?(?:prochan|youtube)\.com/embed[^"]+)"', webpage, 'embed URL') return { '_type': 'url_transparent', From db75f14d8a6eb998f08d2774f5d609a02ef13646 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 30 Nov 2016 04:19:38 +0700 Subject: [PATCH 29/48] [ruutu] Detect DRM videos --- youtube_dl/extractor/ruutu.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/youtube_dl/extractor/ruutu.py b/youtube_dl/extractor/ruutu.py index 2fce4e81b..6db3e3e93 100644 --- a/youtube_dl/extractor/ruutu.py +++ b/youtube_dl/extractor/ruutu.py @@ -5,6 +5,7 @@ from .common import InfoExtractor from ..compat import compat_urllib_parse_urlparse from ..utils import ( determine_ext, + ExtractorError, int_or_none, xpath_attr, xpath_text, @@ -101,6 +102,11 @@ class RuutuIE(InfoExtractor): }) extract_formats(video_xml.find('./Clip')) + + drm = xpath_text(video_xml, './Clip/DRM', default=None) + if not formats and drm: + raise ExtractorError('This video is DRM protected.', expected=True) + self._sort_formats(formats) return { From f882554815c42381e84af98860434b040b2d127c Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Wed, 30 Nov 2016 11:52:19 +0100 Subject: [PATCH 30/48] [comedcycentral] Give /shows/.+/full-episodes URLs to the COmedyCentralFullEpisodesIE --- youtube_dl/extractor/comedycentral.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/comedycentral.py b/youtube_dl/extractor/comedycentral.py index 528ff7fa3..0239dfd84 100644 --- a/youtube_dl/extractor/comedycentral.py +++ b/youtube_dl/extractor/comedycentral.py @@ -6,7 +6,7 @@ from .common import InfoExtractor class ComedyCentralIE(MTVServicesInfoExtractor): _VALID_URL = r'''(?x)https?://(?:www\.)?cc\.com/ - (video-clips|episodes|cc-studios|video-collections|shows) + (video-clips|episodes|cc-studios|video-collections|shows(?=/[^/]+/(?!full-episodes))) /(?P<title>.*)''' _FEED_URL = 'http://comedycentral.com/feeds/mrss/' @@ -29,7 +29,7 @@ class ComedyCentralIE(MTVServicesInfoExtractor): class ComedyCentralFullEpisodesIE(MTVServicesInfoExtractor): _VALID_URL = r'''(?x)https?://(?:www\.)?cc\.com/ - (?:full-episodes) + (?:full-episodes|shows(?=/[^/]+/full-episodes)) /(?P<id>[^?]+)''' _FEED_URL = 'http://comedycentral.com/feeds/mrss/' @@ -40,6 +40,9 @@ class ComedyCentralFullEpisodesIE(MTVServicesInfoExtractor): 'title': 'November 28, 2016 - Ryan Speedo Green', }, 'playlist_count': 4, + }, { + 'url': 'http://www.cc.com/shows/the-daily-show-with-trevor-noah/full-episodes', + 'only_matching': True, }] def _real_extract(self, url): From 4c4765dba23c40136d575ab58b26e410ec42212a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 30 Nov 2016 23:17:30 +0700 Subject: [PATCH 31/48] [soundcloud] Update client id (closes #11327) --- youtube_dl/extractor/soundcloud.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/soundcloud.py b/youtube_dl/extractor/soundcloud.py index 3b7ecb3c3..5a201eaa8 100644 --- a/youtube_dl/extractor/soundcloud.py +++ b/youtube_dl/extractor/soundcloud.py @@ -121,7 +121,7 @@ class SoundcloudIE(InfoExtractor): }, ] - _CLIENT_ID = '02gUJC0hH2ct1EGOcYXQIzRFU91c72Ea' + _CLIENT_ID = 'fDoItMDbsbZz8dY16ZzARCZmzgHBPotA' _IPHONE_CLIENT_ID = '376f225bf427445fc4bfb6b99b72e0bf' @staticmethod From f150530f4d536ebf5375efe96b2362062e02797e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 1 Dec 2016 00:13:06 +0700 Subject: [PATCH 32/48] [ChangeLog] Actualize --- ChangeLog | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/ChangeLog b/ChangeLog index 5515a08ff..0d5ab2eb3 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,16 @@ +version <unreleased> + +Extractors +* [soundcloud] Update client id (#11327) +* [ruutu] Detect DRM protected videos ++ [liveleak] Add support for youtube embeds (#10688) +* [spike] Fix full episodes support (#11312) +* [comedycentral] Fix full episodes support +* [normalboots] Rewrite in terms of JWPlatform (#11184) +* [teamfourstar] Rewrite in terms of JWPlatform (#11184) +- [screenwavemedia] Remove extractor (#11184) + + version 2016.11.27 Extractors From 73ec479c7d787c58d249583f4bb00657c370a938 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 1 Dec 2016 00:15:12 +0700 Subject: [PATCH 33/48] release 2016.12.01 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- docs/supportedsites.md | 4 ++-- youtube_dl/version.py | 2 +- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 0d96f651f..36559dd7b 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.11.27*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.11.27** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.12.01*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.12.01** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v <your command line> [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2016.11.27 +[debug] youtube-dl version 2016.12.01 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index 0d5ab2eb3..a91de7b63 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version <unreleased> +version 2016.12.01 Extractors * [soundcloud] Update client id (#11327) diff --git a/docs/supportedsites.md b/docs/supportedsites.md index d9ad7bd1f..edb76d9cc 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -158,6 +158,7 @@ - **CollegeRama** - **ComCarCoff** - **ComedyCentral** + - **ComedyCentralFullEpisodes** - **ComedyCentralShortname** - **ComedyCentralTV** - **CondeNast**: Condé Nast media group: Allure, Architectural Digest, Ars Technica, Bon Appétit, Brides, Condé Nast, Condé Nast Traveler, Details, Epicurious, GQ, Glamour, Golf Digest, SELF, Teen Vogue, The New Yorker, Vanity Fair, Vogue, W Magazine, WIRED @@ -643,7 +644,6 @@ - **Screencast** - **ScreencastOMatic** - **ScreenJunkies** - - **ScreenwaveMedia** - **Seeker** - **SenateISVP** - **SendtoNews** @@ -715,7 +715,7 @@ - **teachertube:user:collection**: teachertube.com user and collection videos - **TeachingChannel** - **Teamcoco** - - **TeamFour** + - **TeamFourStar** - **TechTalks** - **techtv.mit.edu** - **ted** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index db7da3985..1acb630af 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2016.11.27' +__version__ = '2016.12.01' From 98b08f94b15930e359fa0d85834d7e9651ce6445 Mon Sep 17 00:00:00 2001 From: Laneone <dude.1996@live.com> Date: Thu, 1 Dec 2016 00:01:21 +0530 Subject: [PATCH 34/48] [README.md] Fix typo Just a minor spelling mistake in the readme --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index ea9131c3a..840932298 100644 --- a/README.md +++ b/README.md @@ -664,7 +664,7 @@ $ youtube-dl -f 'bestvideo[ext=mp4]+bestaudio[ext=m4a]/best[ext=mp4]/best' # Download best format available but not better that 480p $ youtube-dl -f 'bestvideo[height<=480]+bestaudio/best[height<=480]' -# Download best video only format but no bigger that 50 MB +# Download best video only format but no bigger than 50 MB $ youtube-dl -f 'best[filesize<50M]' # Download best format available via direct link over HTTP/HTTPS protocol From d17bfe4095a10bc52402e17d088c66e86f5f0bde Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Thu, 1 Dec 2016 14:56:52 +0800 Subject: [PATCH 35/48] [thisoldhouse] Recognize /tv-episode/ URLs and update _TESTS Closes #11271 --- ChangeLog | 5 +++++ youtube_dl/extractor/thisoldhouse.py | 7 +++++-- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/ChangeLog b/ChangeLog index a91de7b63..bf5f26943 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,8 @@ +version <unreleased> + +Extractors ++ [thisoldhouse] Recognize /tv-episode/ URLs (#11271) + version 2016.12.01 Extractors diff --git a/youtube_dl/extractor/thisoldhouse.py b/youtube_dl/extractor/thisoldhouse.py index 7629f0d10..197258df1 100644 --- a/youtube_dl/extractor/thisoldhouse.py +++ b/youtube_dl/extractor/thisoldhouse.py @@ -5,10 +5,10 @@ from .common import InfoExtractor class ThisOldHouseIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?thisoldhouse\.com/(?:watch|how-to)/(?P<id>[^/?#]+)' + _VALID_URL = r'https?://(?:www\.)?thisoldhouse\.com/(?:watch|how-to|tv-episode)/(?P<id>[^/?#]+)' _TESTS = [{ 'url': 'https://www.thisoldhouse.com/how-to/how-to-build-storage-bench', - 'md5': '568acf9ca25a639f0c4ff905826b662f', + 'md5': '946f05bbaa12a33f9ae35580d2dfcfe3', 'info_dict': { 'id': '2REGtUDQ', 'ext': 'mp4', @@ -20,6 +20,9 @@ class ThisOldHouseIE(InfoExtractor): }, { 'url': 'https://www.thisoldhouse.com/watch/arlington-arts-crafts-arts-and-crafts-class-begins', 'only_matching': True, + }, { + 'url': 'https://www.thisoldhouse.com/tv-episode/ask-toh-shelf-rough-electric', + 'only_matching': True, }] def _real_extract(self, url): From a94e7f4a0ca333aabf08adb1c329b4b5b8a5d897 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Thu, 1 Dec 2016 12:15:35 +0100 Subject: [PATCH 36/48] [aenetworks] extract more formats(closes #11321) --- youtube_dl/extractor/aenetworks.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/aenetworks.py b/youtube_dl/extractor/aenetworks.py index 6adb6d824..c5e079a40 100644 --- a/youtube_dl/extractor/aenetworks.py +++ b/youtube_dl/extractor/aenetworks.py @@ -26,7 +26,7 @@ class AENetworksIE(AENetworksBaseIE): _VALID_URL = r'https?://(?:www\.)?(?P<domain>(?:history|aetv|mylifetime)\.com|fyi\.tv)/(?:shows/(?P<show_path>[^/]+(?:/[^/]+){0,2})|movies/(?P<movie_display_id>[^/]+)/full-movie)' _TESTS = [{ 'url': 'http://www.history.com/shows/mountain-men/season-1/episode-1', - 'md5': '8ff93eb073449f151d6b90c0ae1ef0c7', + 'md5': 'a97a65f7e823ae10e9244bc5433d5fe6', 'info_dict': { 'id': '22253814', 'ext': 'mp4', @@ -99,7 +99,7 @@ class AENetworksIE(AENetworksBaseIE): query = { 'mbr': 'true', - 'assetTypes': 'medium_video_s3' + 'assetTypes': 'high_video_s3' } video_id = self._html_search_meta('aetn:VideoID', webpage) media_url = self._search_regex( @@ -155,7 +155,7 @@ class HistoryTopicIE(AENetworksBaseIE): 'id': 'world-war-i-history', 'title': 'World War I History', }, - 'playlist_mincount': 24, + 'playlist_mincount': 23, }, { 'url': 'http://www.history.com/topics/world-war-i-history/videos', 'only_matching': True, @@ -193,7 +193,8 @@ class HistoryTopicIE(AENetworksBaseIE): return self.theplatform_url_result( release_url, video_id, { 'mbr': 'true', - 'switch': 'hls' + 'switch': 'hls', + 'assetTypes': 'high_video_ak', }) else: webpage = self._download_webpage(url, topic_id) @@ -203,6 +204,7 @@ class HistoryTopicIE(AENetworksBaseIE): entries.append(self.theplatform_url_result( video_attributes['data-release-url'], video_attributes['data-id'], { 'mbr': 'true', - 'switch': 'hls' + 'switch': 'hls', + 'assetTypes': 'high_video_ak', })) return self.playlist_result(entries, topic_id, get_element_by_attribute('class', 'show-title', webpage)) From 83442966194640d9bc00e7f3086aa5e8b25c4ae3 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sat, 3 Dec 2016 21:53:41 +0800 Subject: [PATCH 37/48] [socks] Fix error reporting (#11355) --- youtube_dl/socks.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/socks.py b/youtube_dl/socks.py index 63d19b3a5..fece28062 100644 --- a/youtube_dl/socks.py +++ b/youtube_dl/socks.py @@ -60,7 +60,7 @@ class ProxyError(IOError): def __init__(self, code=None, msg=None): if code is not None and msg is None: - msg = self.CODES.get(code) and 'unknown error' + msg = self.CODES.get(code) or 'unknown error' super(ProxyError, self).__init__(code, msg) From 9b5288c92ae43436a5d48775bbe1ee537588625f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 4 Dec 2016 23:20:14 +0700 Subject: [PATCH 38/48] [1tv] Improve extraction and add support for playlists (closes #11335) --- youtube_dl/extractor/firsttv.py | 105 +++++++++++++++++++++----------- 1 file changed, 70 insertions(+), 35 deletions(-) diff --git a/youtube_dl/extractor/firsttv.py b/youtube_dl/extractor/firsttv.py index 6b662cc3c..4463d3d20 100644 --- a/youtube_dl/extractor/firsttv.py +++ b/youtube_dl/extractor/firsttv.py @@ -2,7 +2,10 @@ from __future__ import unicode_literals from .common import InfoExtractor -from ..compat import compat_urlparse +from ..compat import ( + compat_str, + compat_urlparse, +) from ..utils import ( int_or_none, qualities, @@ -22,8 +25,7 @@ class FirstTVIE(InfoExtractor): 'info_dict': { 'id': '40049', 'ext': 'mp4', - 'title': 'Гость Людмила Сенчина. Наедине со всеми. Выпуск от 12.02.2015', - 'description': 'md5:36a39c1d19618fec57d12efe212a8370', + 'title': 'Гость Людмила Сенчина. Наедине со всеми. Выпуск от 12.02.2015', 'thumbnail': 're:^https?://.*\.(?:jpg|JPG)$', 'upload_date': '20150212', 'duration': 2694, @@ -34,8 +36,7 @@ class FirstTVIE(InfoExtractor): 'info_dict': { 'id': '364746', 'ext': 'mp4', - 'title': 'Весенняя аллергия. Доброе утро. Фрагмент выпуска от 07.04.2016', - 'description': 'md5:a242eea0031fd180a4497d52640a9572', + 'title': 'Весенняя аллергия. Доброе утро. Фрагмент выпуска от 07.04.2016', 'thumbnail': 're:^https?://.*\.(?:jpg|JPG)$', 'upload_date': '20160407', 'duration': 179, @@ -44,6 +45,17 @@ class FirstTVIE(InfoExtractor): 'params': { 'skip_download': True, }, + }, { + 'url': 'http://www.1tv.ru/news/issue/2016-12-01/14:00', + 'info_dict': { + 'id': '14:00', + 'title': 'Выпуск новостей в 14:00 1 декабря 2016 года. Новости. Первый канал', + 'description': 'md5:2e921b948f8c1ff93901da78ebdb1dfd', + }, + 'playlist_count': 13, + }, { + 'url': 'http://www.1tv.ru/shows/tochvtoch-supersezon/vystupleniya/evgeniy-dyatlov-vladimir-vysockiy-koni-priveredlivye-toch-v-toch-supersezon-fragment-vypuska-ot-06-11-2016', + 'only_matching': True, }] def _real_extract(self, url): @@ -51,43 +63,66 @@ class FirstTVIE(InfoExtractor): webpage = self._download_webpage(url, display_id) playlist_url = compat_urlparse.urljoin(url, self._search_regex( - r'data-playlist-url="([^"]+)', webpage, 'playlist url')) + r'data-playlist-url=(["\'])(?P<url>(?:(?!\1).)+)\1', + webpage, 'playlist url', group='url')) - item = self._download_json(playlist_url, display_id)[0] - video_id = item['id'] - quality = qualities(('ld', 'sd', 'hd', )) - formats = [] - for f in item.get('mbr', []): - src = f.get('src') - if not src: - continue - fname = f.get('name') - formats.append({ - 'url': src, - 'format_id': fname, - 'quality': quality(fname), + parsed_url = compat_urlparse.urlparse(playlist_url) + qs = compat_urlparse.parse_qs(parsed_url.query) + item_ids = qs.get('videos_ids[]') or qs.get('news_ids[]') + + items = self._download_json(playlist_url, display_id) + + if item_ids: + items = [ + item for item in items + if item.get('uid') and compat_str(item['uid']) in item_ids] + else: + items = [items[0]] + + entries = [] + QUALITIES = ('ld', 'sd', 'hd', ) + + for item in items: + title = item['title'] + quality = qualities(QUALITIES) + formats = [] + for f in item.get('mbr', []): + src = f.get('src') + if not src or not isinstance(src, compat_str): + continue + tbr = int_or_none(self._search_regex( + r'_(\d{3,})\.mp4', src, 'tbr', default=None)) + formats.append({ + 'url': src, + 'format_id': f.get('name'), + 'tbr': tbr, + 'quality': quality(f.get('name')), + }) + self._sort_formats(formats) + + thumbnail = item.get('poster') or self._og_search_thumbnail(webpage) + duration = int_or_none(item.get('duration') or self._html_search_meta( + 'video:duration', webpage, 'video duration', fatal=False)) + upload_date = unified_strdate(self._html_search_meta( + 'ya:ovs:upload_date', webpage, 'upload date', default=None)) + + entries.append({ + 'id': item.get('id') or uid, + 'thumbnail': thumbnail, + 'title': title, + 'upload_date': upload_date, + 'duration': int_or_none(duration), + 'formats': formats }) - self._sort_formats(formats) title = self._html_search_regex( (r'<div class="tv_translation">\s*<h1><a href="[^"]+">([^<]*)</a>', r"'title'\s*:\s*'([^']+)'"), - webpage, 'title', default=None) or item['title'] + webpage, 'title', default=None) or self._og_search_title( + webpage, default=None) description = self._html_search_regex( r'<div class="descr">\s*<div> </div>\s*<p>([^<]*)</p></div>', webpage, 'description', default=None) or self._html_search_meta( - 'description', webpage, 'description') - duration = int_or_none(self._html_search_meta( - 'video:duration', webpage, 'video duration', fatal=False)) - upload_date = unified_strdate(self._html_search_meta( - 'ya:ovs:upload_date', webpage, 'upload date', fatal=False)) + 'description', webpage, 'description', default=None) - return { - 'id': video_id, - 'thumbnail': item.get('poster') or self._og_search_thumbnail(webpage), - 'title': title, - 'description': description, - 'upload_date': upload_date, - 'duration': int_or_none(duration), - 'formats': formats - } + return self.playlist_result(entries, display_id, title, description) From 4bd7d9d4ae05319ebf6eb2aeffce7bde4fa7b6cf Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Mon, 5 Dec 2016 00:31:02 +0800 Subject: [PATCH 39/48] [socks] Refine exception model for better error handling 1. ProxyError now inherits from socket.error instead of IOError The only functions socks.py overrides are connect and connect_ex. In Python 2.x and Python <= 3.2, socket functions raises socket.error. In newer Python versions, those functions raises OSError instead. The name socket.error is preserved as an alias of OSError for backward compability. To keep socks.py compatible with Python's standard library, it should raise the same exception as raw sockets. See PEP 3151 (https://www.python.org/dev/peps/pep-3151/) for more information about the change in Python 3.3. 2. Raise EOFError instead of IOError when the socket receives less data than it expects There's no common convention, but both ftplib and telnetlib raises EOFError for similar situations. socks.py follows them. Closes #11355 In #11355, only Python 2 is affected. In Python 3, both socket.error and IOError are alias of OSError, so AbstractHTTPHandler.do_open correctly catches the error and thus InfoExtractor._is_valid_url works fine. --- youtube_dl/socks.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/socks.py b/youtube_dl/socks.py index fece28062..0f5d7bdb2 100644 --- a/youtube_dl/socks.py +++ b/youtube_dl/socks.py @@ -55,7 +55,7 @@ class Socks5AddressType(object): ATYP_IPV6 = 0x04 -class ProxyError(IOError): +class ProxyError(socket.error): ERR_SUCCESS = 0x00 def __init__(self, code=None, msg=None): @@ -123,7 +123,7 @@ class sockssocket(socket.socket): while len(data) < cnt: cur = self.recv(cnt - len(data)) if not cur: - raise IOError('{0} bytes missing'.format(cnt - len(data))) + raise EOFError('{0} bytes missing'.format(cnt - len(data))) data += cur return data From 3ed81714d8db61ea6d1633184af15d239af0445c Mon Sep 17 00:00:00 2001 From: vordep <up201303880@fe.up.pt> Date: Sun, 4 Dec 2016 23:53:49 +0000 Subject: [PATCH 40/48] [fusion] Update ooyala id regex --- youtube_dl/extractor/fusion.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/fusion.py b/youtube_dl/extractor/fusion.py index b4ab4cbb7..ede729b52 100644 --- a/youtube_dl/extractor/fusion.py +++ b/youtube_dl/extractor/fusion.py @@ -29,7 +29,7 @@ class FusionIE(InfoExtractor): webpage = self._download_webpage(url, display_id) ooyala_code = self._search_regex( - r'data-video-id=(["\'])(?P<code>.+?)\1', + r'data-ooyala-id=(["\'])(?P<code>(?:(?!\1).)+)\1', webpage, 'ooyala code', group='code') return OoyalaIE._build_url_result(ooyala_code) From 4afa4ff223365601603b6a1cc77eb9d96d8e629d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 5 Dec 2016 23:28:57 +0700 Subject: [PATCH 41/48] [1tv] Fix video id extraction --- youtube_dl/extractor/firsttv.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/firsttv.py b/youtube_dl/extractor/firsttv.py index 4463d3d20..47673e2d4 100644 --- a/youtube_dl/extractor/firsttv.py +++ b/youtube_dl/extractor/firsttv.py @@ -107,7 +107,7 @@ class FirstTVIE(InfoExtractor): 'ya:ovs:upload_date', webpage, 'upload date', default=None)) entries.append({ - 'id': item.get('id') or uid, + 'id': compat_str(item.get('id') or item['uid']), 'thumbnail': thumbnail, 'title': title, 'upload_date': upload_date, From 875ddd740902dd0de15d21939ef75fbfc2535f30 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 6 Dec 2016 00:41:03 +0700 Subject: [PATCH 42/48] [bloomberg] Add another video id regex (closes #11371) --- youtube_dl/extractor/bloomberg.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/bloomberg.py b/youtube_dl/extractor/bloomberg.py index 2a8cd64b9..c5e11e8eb 100644 --- a/youtube_dl/extractor/bloomberg.py +++ b/youtube_dl/extractor/bloomberg.py @@ -45,7 +45,8 @@ class BloombergIE(InfoExtractor): name = self._match_id(url) webpage = self._download_webpage(url, name) video_id = self._search_regex( - r'["\']bmmrId["\']\s*:\s*(["\'])(?P<url>.+?)\1', + (r'["\']bmmrId["\']\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1', + r'videoId\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1'), webpage, 'id', group='url', default=None) if not video_id: bplayer_data = self._parse_json(self._search_regex( From 283d1c6a8bec0150a498c6909893179335f06f0f Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Tue, 6 Dec 2016 19:01:09 +0100 Subject: [PATCH 43/48] [telebruxelles] extract all formats and add support for emission urls --- youtube_dl/extractor/telebruxelles.py | 45 +++++++++++++-------------- 1 file changed, 21 insertions(+), 24 deletions(-) diff --git a/youtube_dl/extractor/telebruxelles.py b/youtube_dl/extractor/telebruxelles.py index eefecc490..5886e9c1b 100644 --- a/youtube_dl/extractor/telebruxelles.py +++ b/youtube_dl/extractor/telebruxelles.py @@ -7,33 +7,30 @@ from .common import InfoExtractor class TeleBruxellesIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?(?:telebruxelles|bx1)\.be/(news|sport|dernier-jt)/?(?P<id>[^/#?]+)' + _VALID_URL = r'https?://(?:www\.)?(?:telebruxelles|bx1)\.be/(news|sport|dernier-jt|emission)/?(?P<id>[^/#?]+)' _TESTS = [{ - 'url': 'http://www.telebruxelles.be/news/auditions-devant-parlement-francken-galant-tres-attendus/', - 'md5': '59439e568c9ee42fb77588b2096b214f', + 'url': 'http://bx1.be/news/que-risque-lauteur-dune-fausse-alerte-a-la-bombe/', + 'md5': 'a2a67a5b1c3e8c9d33109b902f474fd9', 'info_dict': { - 'id': '11942', - 'display_id': 'auditions-devant-parlement-francken-galant-tres-attendus', - 'ext': 'flv', - 'title': 'Parlement : Francken et Galant répondent aux interpellations de l’opposition', - 'description': 're:Les auditions des ministres se poursuivent*' - }, - 'params': { - 'skip_download': 'requires rtmpdump' + 'id': '158856', + 'display_id': 'que-risque-lauteur-dune-fausse-alerte-a-la-bombe', + 'ext': 'mp4', + 'title': 'Que risque l’auteur d’une fausse alerte à la bombe ?', + 'description': 'md5:3cf8df235d44ebc5426373050840e466', }, }, { - 'url': 'http://www.telebruxelles.be/sport/basket-brussels-bat-mons-80-74/', - 'md5': '181d3fbdcf20b909309e5aef5c6c6047', + 'url': 'http://bx1.be/sport/futsal-schaerbeek-sincline-5-3-a-thulin/', + 'md5': 'dfe07ecc9c153ceba8582ac912687675', 'info_dict': { - 'id': '10091', - 'display_id': 'basket-brussels-bat-mons-80-74', - 'ext': 'flv', - 'title': 'Basket : le Brussels bat Mons 80-74', - 'description': 're:^Ils l\u2019on fait ! En basket, le B*', - }, - 'params': { - 'skip_download': 'requires rtmpdump' + 'id': '158433', + 'display_id': 'futsal-schaerbeek-sincline-5-3-a-thulin', + 'ext': 'mp4', + 'title': 'Futsal : Schaerbeek s’incline 5-3 à Thulin', + 'description': 'md5:fd013f1488d5e2dceb9cebe39e2d569b', }, + }, { + 'url': 'http://bx1.be/emission/bxenf1-gastronomie/', + 'only_matching': True, }] def _real_extract(self, url): @@ -50,13 +47,13 @@ class TeleBruxellesIE(InfoExtractor): r'file\s*:\s*"(rtmp://[^/]+/vod/mp4:"\s*\+\s*"[^"]+"\s*\+\s*".mp4)"', webpage, 'RTMP url') rtmp_url = re.sub(r'"\s*\+\s*"', '', rtmp_url) + formats = self._extract_wowza_formats(rtmp_url, article_id or display_id) + self._sort_formats(formats) return { 'id': article_id or display_id, 'display_id': display_id, 'title': title, 'description': description, - 'url': rtmp_url, - 'ext': 'flv', - 'rtmp_live': True # if rtmpdump is not called with "--live" argument, the download is blocked and can be completed + 'formats': formats, } From 7441915b1e53e2a26f4c78796c4755a36b9e1b8d Mon Sep 17 00:00:00 2001 From: Serkora <Serkora@users.noreply.github.com> Date: Thu, 8 Dec 2016 00:46:42 +0800 Subject: [PATCH 44/48] [pandoratv] Fix extraction (closes #11023) --- youtube_dl/extractor/pandoratv.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/youtube_dl/extractor/pandoratv.py b/youtube_dl/extractor/pandoratv.py index 2b07958bb..3e37ae01d 100644 --- a/youtube_dl/extractor/pandoratv.py +++ b/youtube_dl/extractor/pandoratv.py @@ -5,12 +5,14 @@ from .common import InfoExtractor from ..compat import ( compat_str, compat_urlparse, + compat_urllib_request, ) from ..utils import ( ExtractorError, float_or_none, parse_duration, str_to_int, + urlencode_postdata, ) @@ -56,6 +58,18 @@ class PandoraTVIE(InfoExtractor): r'^v(\d+)[Uu]rl$', format_id, 'height', default=None) if not height: continue + + post_data = {'prgid': video_id, 'runtime': info.get('runtime'), 'vod_url': format_url} + play_url = self._download_json('http://m.pandora.tv/?c=api&m=play_url', video_id, + data=urlencode_postdata(post_data), + headers={ + 'Origin': url, + 'Content-Type': 'application/x-www-form-urlencoded' + }) + format_url = play_url.get('url') + if not format_url: + continue + formats.append({ 'format_id': '%sp' % height, 'url': format_url, From f43795e56bc55b99e89c8fafee5613921cf1fffc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 7 Dec 2016 23:50:10 +0700 Subject: [PATCH 45/48] [pandoratv] PEP 8 and simplify --- youtube_dl/extractor/pandoratv.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/pandoratv.py b/youtube_dl/extractor/pandoratv.py index 3e37ae01d..cbb1968d3 100644 --- a/youtube_dl/extractor/pandoratv.py +++ b/youtube_dl/extractor/pandoratv.py @@ -5,7 +5,6 @@ from .common import InfoExtractor from ..compat import ( compat_str, compat_urlparse, - compat_urllib_request, ) from ..utils import ( ExtractorError, @@ -59,13 +58,17 @@ class PandoraTVIE(InfoExtractor): if not height: continue - post_data = {'prgid': video_id, 'runtime': info.get('runtime'), 'vod_url': format_url} - play_url = self._download_json('http://m.pandora.tv/?c=api&m=play_url', video_id, - data=urlencode_postdata(post_data), + play_url = self._download_json( + 'http://m.pandora.tv/?c=api&m=play_url', video_id, + data=urlencode_postdata({ + 'prgid': video_id, + 'runtime': info.get('runtime'), + 'vod_url': format_url, + }), headers={ 'Origin': url, - 'Content-Type': 'application/x-www-form-urlencoded' - }) + 'Content-Type': 'application/x-www-form-urlencoded', + }) format_url = play_url.get('url') if not format_url: continue From 6c20a0bb99e626db6870747b6329ad9c9064c123 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Fri, 9 Dec 2016 02:15:16 +0800 Subject: [PATCH 46/48] [openload] Fix extraction (closes #10408) --- ChangeLog | 1 + youtube_dl/extractor/openload.py | 90 +++++--------------------------- 2 files changed, 14 insertions(+), 77 deletions(-) diff --git a/ChangeLog b/ChangeLog index bf5f26943..9d7de1f95 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,6 +1,7 @@ version <unreleased> Extractors +* [openload] Fix extraction (#10408) + [thisoldhouse] Recognize /tv-episode/ URLs (#11271) version 2016.12.01 diff --git a/youtube_dl/extractor/openload.py b/youtube_dl/extractor/openload.py index 7f19b1ba5..84aa12585 100644 --- a/youtube_dl/extractor/openload.py +++ b/youtube_dl/extractor/openload.py @@ -1,21 +1,12 @@ # coding: utf-8 -from __future__ import unicode_literals, division - -import re +from __future__ import unicode_literals from .common import InfoExtractor -from ..compat import ( - compat_chr, - compat_ord, -) +from ..compat import compat_chr from ..utils import ( determine_ext, ExtractorError, ) -from ..jsinterp import ( - JSInterpreter, - _NAME_RE -) class OpenloadIE(InfoExtractor): @@ -62,44 +53,6 @@ class OpenloadIE(InfoExtractor): 'only_matching': True, }] - def openload_decode(self, txt): - symbol_dict = { - '(゚Д゚) [゚Θ゚]': '_', - '(゚Д゚) [゚ω゚ノ]': 'a', - '(゚Д゚) [゚Θ゚ノ]': 'b', - '(゚Д゚) [\'c\']': 'c', - '(゚Д゚) [゚ー゚ノ]': 'd', - '(゚Д゚) [゚Д゚ノ]': 'e', - '(゚Д゚) [1]': 'f', - '(゚Д゚) [\'o\']': 'o', - '(o゚ー゚o)': 'u', - '(゚Д゚) [\'c\']': 'c', - '((゚ー゚) + (o^_^o))': '7', - '((o^_^o) +(o^_^o) +(c^_^o))': '6', - '((゚ー゚) + (゚Θ゚))': '5', - '(-~3)': '4', - '(-~-~1)': '3', - '(-~1)': '2', - '(-~0)': '1', - '((c^_^o)-(c^_^o))': '0', - } - delim = '(゚Д゚)[゚ε゚]+' - end_token = '(゚Д゚)[゚o゚]' - symbols = '|'.join(map(re.escape, symbol_dict.keys())) - txt = re.sub('(%s)\+\s?' % symbols, lambda m: symbol_dict[m.group(1)], txt) - ret = '' - for aacode in re.findall(r'{0}\+\s?{1}(.*?){0}'.format(re.escape(end_token), re.escape(delim)), txt): - for aachar in aacode.split(delim): - if aachar.isdigit(): - ret += compat_chr(int(aachar, 8)) - else: - m = re.match(r'^u([\da-f]{4})$', aachar) - if m: - ret += compat_chr(int(m.group(1), 16)) - else: - self.report_warning("Cannot decode: %s" % aachar) - return ret - def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage('https://openload.co/embed/%s/' % video_id, video_id) @@ -107,36 +60,20 @@ class OpenloadIE(InfoExtractor): if 'File not found' in webpage or 'deleted by the owner' in webpage: raise ExtractorError('File not found', expected=True) - # The following decryption algorithm is written by @yokrysty and - # declared to be freely used in youtube-dl - # See https://github.com/rg3/youtube-dl/issues/10408 - enc_data = self._html_search_regex( - r'<span[^>]*>([^<]+)</span>\s*<span[^>]*>[^<]+</span>\s*<span[^>]+id="streamurl"', - webpage, 'encrypted data') + ol_id = self._search_regex( + '<span[^>]+id="[a-zA-Z0-9]+x"[^>]*>([0-9]+)</span>', + webpage, 'openload ID') - enc_code = self._html_search_regex(r'<script[^>]+>(゚ω゚[^<]+)</script>', - webpage, 'encrypted code') + first_two_chars = int(float(ol_id[0:][:2])) + urlcode = '' + num = 2 - js_code = self.openload_decode(enc_code) - jsi = JSInterpreter(js_code) + while num < len(ol_id): + urlcode += compat_chr(int(float(ol_id[num:][:3])) - + first_two_chars * int(float(ol_id[num + 3:][:2]))) + num += 5 - m_offset_fun = self._search_regex(r'slice\(0\s*-\s*(%s)\(\)' % _NAME_RE, js_code, 'javascript offset function') - m_diff_fun = self._search_regex(r'charCodeAt\(0\)\s*\+\s*(%s)\(\)' % _NAME_RE, js_code, 'javascript diff function') - - offset = jsi.call_function(m_offset_fun) - diff = jsi.call_function(m_diff_fun) - - video_url_chars = [] - - for idx, c in enumerate(enc_data): - j = compat_ord(c) - if j >= 33 and j <= 126: - j = ((j + 14) % 94) + 33 - if idx == len(enc_data) - offset: - j += diff - video_url_chars += compat_chr(j) - - video_url = 'https://openload.co/stream/%s?mime=true' % ''.join(video_url_chars) + video_url = 'https://openload.co/stream/' + urlcode title = self._og_search_title(webpage, default=None) or self._search_regex( r'<span[^>]+class=["\']title["\'][^>]*>([^<]+)', webpage, @@ -155,5 +92,4 @@ class OpenloadIE(InfoExtractor): 'ext': determine_ext(title), 'subtitles': subtitles, } - return info_dict From 9ed3495eaeefdbeec5b72bd0a6575c56bc6c01c9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 9 Dec 2016 02:41:49 +0700 Subject: [PATCH 47/48] [ChangeLog] Actualize --- ChangeLog | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/ChangeLog b/ChangeLog index 9d7de1f95..f1d76dcd4 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,7 +1,18 @@ version <unreleased> +Core +* [socks] Fix error reporting (#11355) + Extractors * [openload] Fix extraction (#10408) +* [pandoratv] Fix extraction (#11023) ++ [telebruxelles] Add support for emission URLs +* [telebruxelles] Extract all formats ++ [bloomberg] Add another video id regular expression (#11371) +* [fusion] Update ooyala id regular expression (#11364) ++ [1tv] Add support for playlists (#11335) +* [1tv] Improve extraction (#11335) ++ [aenetworks] Extract more formats (#11321) + [thisoldhouse] Recognize /tv-episode/ URLs (#11271) version 2016.12.01 From 18ece70c4df2a4de5c7582905aa007d1237008a7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 9 Dec 2016 02:46:18 +0700 Subject: [PATCH 48/48] release 2016.12.09 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- youtube_dl/version.py | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 36559dd7b..49ae3afb6 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.12.01*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.12.01** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.12.09*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.12.09** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v <your command line> [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2016.12.01 +[debug] youtube-dl version 2016.12.09 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index f1d76dcd4..f906cad2b 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version <unreleased> +version 2016.12.09 Core * [socks] Fix error reporting (#11355) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 1acb630af..a8e299802 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2016.12.01' +__version__ = '2016.12.09'