From d20b1c6725fce956b44413cced449b7d09b12de2 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Thu, 20 Jul 2017 18:14:14 +0800 Subject: [PATCH 01/24] [dispeak] Recognize sevt subdomain (closes #13276) --- ChangeLog | 6 ++++++ youtube_dl/extractor/dispeak.py | 6 +++++- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/ChangeLog b/ChangeLog index 7d71fc5e1..8e442d159 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +version + +Extractors ++ [dispeak] Recognize sevt subdomain (#13276) + + version 2017.07.15 Core diff --git a/youtube_dl/extractor/dispeak.py b/youtube_dl/extractor/dispeak.py index a78cb8a2a..c05f601e2 100644 --- a/youtube_dl/extractor/dispeak.py +++ b/youtube_dl/extractor/dispeak.py @@ -13,7 +13,7 @@ from ..utils import ( class DigitallySpeakingIE(InfoExtractor): - _VALID_URL = r'https?://(?:evt\.dispeak|events\.digitallyspeaking)\.com/(?:[^/]+/)+xml/(?P[^.]+)\.xml' + _VALID_URL = r'https?://(?:s?evt\.dispeak|events\.digitallyspeaking)\.com/(?:[^/]+/)+xml/(?P[^.]+)\.xml' _TESTS = [{ # From http://gdcvault.com/play/1023460/Tenacious-Design-and-The-Interface @@ -28,6 +28,10 @@ class DigitallySpeakingIE(InfoExtractor): # From http://www.gdcvault.com/play/1014631/Classic-Game-Postmortem-PAC 'url': 'http://events.digitallyspeaking.com/gdc/sf11/xml/12396_1299111843500GMPX.xml', 'only_matching': True, + }, { + # From http://www.gdcvault.com/play/1013700/Advanced-Material + 'url': 'http://sevt.dispeak.com/ubm/gdc/eur10/xml/11256_1282118587281VNIT.xml', + 'only_matching': True, }] def _parse_mp4(self, metadata): From 85f5a74b6cf44b0c8b612c264c36eaabd958f501 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Thu, 20 Jul 2017 21:19:09 +0800 Subject: [PATCH 02/24] [tbs] Mark as broken and skip invalid tests --- youtube_dl/extractor/tbs.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/tbs.py b/youtube_dl/extractor/tbs.py index bf93eb868..e9474533f 100644 --- a/youtube_dl/extractor/tbs.py +++ b/youtube_dl/extractor/tbs.py @@ -8,6 +8,9 @@ from ..utils import extract_attributes class TBSIE(TurnerBaseIE): + # https://github.com/rg3/youtube-dl/issues/13658 + _WORKING = False + _VALID_URL = r'https?://(?:www\.)?(?Ptbs|tntdrama)\.com/videos/(?:[^/]+/)+(?P[^/?#]+)\.html' _TESTS = [{ 'url': 'http://www.tbs.com/videos/people-of-earth/season-1/extras/2007318/theatrical-trailer.html', @@ -17,7 +20,8 @@ class TBSIE(TurnerBaseIE): 'ext': 'mp4', 'title': 'Theatrical Trailer', 'description': 'Catch the latest comedy from TBS, People of Earth, premiering Halloween night--Monday, October 31, at 9/8c.', - } + }, + 'skip': 'TBS videos are deleted after a while', }, { 'url': 'http://www.tntdrama.com/videos/good-behavior/season-1/extras/1538823/you-better-run.html', 'md5': 'ce53c6ead5e9f3280b4ad2031a6fab56', @@ -26,7 +30,8 @@ class TBSIE(TurnerBaseIE): 'ext': 'mp4', 'title': 'You Better Run', 'description': 'Letty Raines must figure out what she\'s running toward while running away from her past. Good Behavior premieres November 15 at 9/8c.', - } + }, + 'skip': 'TBS videos are deleted after a while', }] def _real_extract(self, url): From fa63cf6c2301972b7d0ae76fb7a11c7d1a2786a9 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Thu, 20 Jul 2017 22:57:51 +0800 Subject: [PATCH 03/24] [youku:show] Fix playlist extraction (closes #13248) --- ChangeLog | 1 + youtube_dl/extractor/youku.py | 66 ++++++++++++++++++++--------------- 2 files changed, 39 insertions(+), 28 deletions(-) diff --git a/ChangeLog b/ChangeLog index 8e442d159..a83523cb9 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,6 +1,7 @@ version Extractors +* [youku:show] Fix playlist extraction (#13248) + [dispeak] Recognize sevt subdomain (#13276) diff --git a/youtube_dl/extractor/youku.py b/youtube_dl/extractor/youku.py index dcce15d77..4ae9adb51 100644 --- a/youtube_dl/extractor/youku.py +++ b/youtube_dl/extractor/youku.py @@ -1,7 +1,6 @@ # coding: utf-8 from __future__ import unicode_literals -import itertools import random import re import string @@ -222,50 +221,61 @@ class YoukuShowIE(InfoExtractor): _VALID_URL = r'https?://list\.youku\.com/show/id_(?P[0-9a-z]+)\.html' IE_NAME = 'youku:show' - _TEST = { + _TESTS = [{ 'url': 'http://list.youku.com/show/id_zc7c670be07ff11e48b3f.html', 'info_dict': { 'id': 'zc7c670be07ff11e48b3f', - 'title': '花千骨 未删减版', + 'title': '花千骨 DVD版', 'description': 'md5:a1ae6f5618571bbeb5c9821f9c81b558', }, 'playlist_count': 50, - } + }, { + # Episode number not starting from 1 + 'url': 'http://list.youku.com/show/id_zefbfbd70efbfbd780bef.html', + 'info_dict': { + 'id': 'zefbfbd70efbfbd780bef', + 'title': '超级飞侠3', + 'description': 'md5:275715156abebe5ccc2a1992e9d56b98', + }, + 'playlist_count': 24, + }] - _PAGE_SIZE = 40 + def _extract_entries(self, playlist_data_url, show_id, idx, query, url): + query['callback'] = 'cb' + playlist_data = self._download_json( + playlist_data_url, show_id, query=query, + note='Downloading playlist data page %d' % (idx + 1), + transform_source=lambda s: js_to_json(strip_jsonp(s)))['html'] + drama_list = (get_element_by_class('p-drama-grid', playlist_data) or + get_element_by_class('p-drama-half-row', playlist_data)) + if drama_list is None: + raise ExtractorError('No episodes found') + video_urls = re.findall(r']+href="([^"]+)"', drama_list) + return playlist_data, [ + self.url_result(urljoin(url, video_url), YoukuIE.ie_key()) + for video_url in video_urls] def _real_extract(self, url): show_id = self._match_id(url) webpage = self._download_webpage(url, show_id) - entries = [] page_config = self._parse_json(self._search_regex( r'var\s+PageConfig\s*=\s*({.+});', webpage, 'page config'), show_id, transform_source=js_to_json) - for idx in itertools.count(0): - if idx == 0: - playlist_data_url = 'http://list.youku.com/show/module' - query = {'id': page_config['showid'], 'tab': 'point'} - else: - playlist_data_url = 'http://list.youku.com/show/point' - query = { + first_page, entries = self._extract_entries( + 'http://list.youku.com/show/module', show_id, 0, { + 'id': page_config['showid'], + 'tab': 'showInfo', + }, url) + # The first reload_id has the same items as first_page + reload_ids = re.findall(']+data-id="([^"]+)">', first_page)[1:] + for idx, reload_id in enumerate(reload_ids): + _, new_entries = self._extract_entries( + 'http://list.youku.com/show/episode', show_id, idx + 1, { 'id': page_config['showid'], - 'stage': 'reload_%d' % (self._PAGE_SIZE * idx + 1), - } - query['callback'] = 'cb' - playlist_data = self._download_json( - playlist_data_url, show_id, query=query, - note='Downloading playlist data page %d' % (idx + 1), - transform_source=lambda s: js_to_json(strip_jsonp(s)))['html'] - video_urls = re.findall( - r']+class="p-thumb"[^<]+]+href="([^"]+)"', - playlist_data) - new_entries = [ - self.url_result(urljoin(url, video_url), YoukuIE.ie_key()) - for video_url in video_urls] + 'stage': reload_id, + }, url) entries.extend(new_entries) - if len(new_entries) < self._PAGE_SIZE: - break desc = self._html_search_meta('description', webpage, fatal=False) playlist_title = desc.split(',')[0] if desc else None From 3fcf346ac16e6fe1963a3eab861d6bd9c32ce6db Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Thu, 20 Jul 2017 23:20:46 +0800 Subject: [PATCH 04/24] [youku:show] Refine playlist extraction Handle playlists that the initial page is not the first page --- youtube_dl/extractor/youku.py | 34 +++++++++++++++++++++++----------- 1 file changed, 23 insertions(+), 11 deletions(-) diff --git a/youtube_dl/extractor/youku.py b/youtube_dl/extractor/youku.py index 4ae9adb51..0c4bc2eda 100644 --- a/youtube_dl/extractor/youku.py +++ b/youtube_dl/extractor/youku.py @@ -13,7 +13,6 @@ from ..utils import ( js_to_json, str_or_none, strip_jsonp, - urljoin, ) @@ -238,13 +237,16 @@ class YoukuShowIE(InfoExtractor): 'description': 'md5:275715156abebe5ccc2a1992e9d56b98', }, 'playlist_count': 24, + }, { + # Ongoing playlist. The initial page is the last one + 'url': 'http://list.youku.com/show/id_za7c275ecd7b411e1a19e.html', + 'only_matchine': True, }] - def _extract_entries(self, playlist_data_url, show_id, idx, query, url): + def _extract_entries(self, playlist_data_url, show_id, note, query): query['callback'] = 'cb' playlist_data = self._download_json( - playlist_data_url, show_id, query=query, - note='Downloading playlist data page %d' % (idx + 1), + playlist_data_url, show_id, query=query, note=note, transform_source=lambda s: js_to_json(strip_jsonp(s)))['html'] drama_list = (get_element_by_class('p-drama-grid', playlist_data) or get_element_by_class('p-drama-half-row', playlist_data)) @@ -252,29 +254,39 @@ class YoukuShowIE(InfoExtractor): raise ExtractorError('No episodes found') video_urls = re.findall(r']+href="([^"]+)"', drama_list) return playlist_data, [ - self.url_result(urljoin(url, video_url), YoukuIE.ie_key()) + self.url_result(self._proto_relative_url(video_url, 'http:'), YoukuIE.ie_key()) for video_url in video_urls] def _real_extract(self, url): show_id = self._match_id(url) webpage = self._download_webpage(url, show_id) + entries = [] page_config = self._parse_json(self._search_regex( r'var\s+PageConfig\s*=\s*({.+});', webpage, 'page config'), show_id, transform_source=js_to_json) - first_page, entries = self._extract_entries( - 'http://list.youku.com/show/module', show_id, 0, { + first_page, initial_entries = self._extract_entries( + 'http://list.youku.com/show/module', show_id, + note='Downloading initial playlist data page', + query={ 'id': page_config['showid'], 'tab': 'showInfo', - }, url) + }) + first_page_reload_id = self._html_search_regex( + r']+id="(reload_\d+)', first_page, 'first page reload id') # The first reload_id has the same items as first_page - reload_ids = re.findall(']+data-id="([^"]+)">', first_page)[1:] + reload_ids = re.findall(']+data-id="([^"]+)">', first_page) for idx, reload_id in enumerate(reload_ids): + if reload_id == first_page_reload_id: + entries.extend(initial_entries) + continue _, new_entries = self._extract_entries( - 'http://list.youku.com/show/episode', show_id, idx + 1, { + 'http://list.youku.com/show/episode', show_id, + note='Downloading playlist data page %d' % (idx + 1), + query={ 'id': page_config['showid'], 'stage': reload_id, - }, url) + }) entries.extend(new_entries) desc = self._html_search_meta('description', webpage, fatal=False) From c653326a1425f4c271f387fde7a706bf4b52a7a3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 20 Jul 2017 22:49:52 +0700 Subject: [PATCH 05/24] [funnyordie] Extract more metadata (closes #13677) --- youtube_dl/extractor/funnyordie.py | 64 ++++++++++++++++++++++++++---- 1 file changed, 56 insertions(+), 8 deletions(-) diff --git a/youtube_dl/extractor/funnyordie.py b/youtube_dl/extractor/funnyordie.py index 49409369c..f85e7de14 100644 --- a/youtube_dl/extractor/funnyordie.py +++ b/youtube_dl/extractor/funnyordie.py @@ -1,10 +1,14 @@ from __future__ import unicode_literals -import json import re from .common import InfoExtractor -from ..utils import ExtractorError +from ..utils import ( + ExtractorError, + float_or_none, + int_or_none, + unified_timestamp, +) class FunnyOrDieIE(InfoExtractor): @@ -18,6 +22,10 @@ class FunnyOrDieIE(InfoExtractor): 'title': 'Heart-Shaped Box: Literal Video Version', 'description': 'md5:ea09a01bc9a1c46d9ab696c01747c338', 'thumbnail': r're:^http:.*\.jpg$', + 'uploader': 'DASjr', + 'timestamp': 1317904928, + 'upload_date': '20111006', + 'duration': 318.3, }, }, { 'url': 'http://www.funnyordie.com/embed/e402820827', @@ -27,6 +35,8 @@ class FunnyOrDieIE(InfoExtractor): 'title': 'Please Use This Song (Jon Lajoie)', 'description': 'Please use this to sell something. www.jonlajoie.com', 'thumbnail': r're:^http:.*\.jpg$', + 'timestamp': 1398988800, + 'upload_date': '20140502', }, 'params': { 'skip_download': True, @@ -100,15 +110,53 @@ class FunnyOrDieIE(InfoExtractor): 'url': 'http://www.funnyordie.com%s' % src, }] - post_json = self._search_regex( - r'fb_post\s*=\s*(\{.*?\});', webpage, 'post details') - post = json.loads(post_json) + timestamp = unified_timestamp(self._html_search_meta( + 'uploadDate', webpage, 'timestamp', default=None)) + + uploader = self._html_search_regex( + r']+\bclass=["\']channel-preview-name[^>]+>(.+?) Date: Thu, 20 Jul 2017 23:22:36 +0700 Subject: [PATCH 06/24] [egghead:lesson] Add extractor (#6635) --- youtube_dl/extractor/egghead.py | 49 ++++++++++++++++++++++++++++++ youtube_dl/extractor/extractors.py | 5 ++- 2 files changed, 53 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/egghead.py b/youtube_dl/extractor/egghead.py index c86f52319..e4a3046af 100644 --- a/youtube_dl/extractor/egghead.py +++ b/youtube_dl/extractor/egghead.py @@ -2,6 +2,11 @@ from __future__ import unicode_literals from .common import InfoExtractor +from ..utils import ( + int_or_none, + try_get, + unified_timestamp, +) class EggheadCourseIE(InfoExtractor): @@ -33,3 +38,47 @@ class EggheadCourseIE(InfoExtractor): return self.playlist_result( entries, playlist_id, course.get('title'), course.get('description')) + + +class EggheadLessonIE(InfoExtractor): + IE_DESC = 'egghead.io lesson' + IE_NAME = 'egghead:lesson' + _VALID_URL = r'https://egghead\.io/lessons/(?P[^/?#&]+)' + _TEST = { + 'url': 'https://egghead.io/lessons/javascript-linear-data-flow-with-container-style-types-box', + 'info_dict': { + 'id': 'fv5yotjxcg', + 'ext': 'mp4', + 'title': 'Create linear data flow with container style types (Box)', + 'description': 'md5:9aa2cdb6f9878ed4c39ec09e85a8150e', + 'thumbnail': r're:^https?:.*\.jpg$', + 'timestamp': 1481296768, + 'upload_date': '20161209', + 'duration': 304, + 'view_count': 0, + 'tags': ['javascript', 'free'], + }, + 'params': { + 'skip_download': True, + }, + } + + def _real_extract(self, url): + lesson_id = self._match_id(url) + + lesson = self._download_json( + 'https://egghead.io/api/v1/lessons/%s' % lesson_id, lesson_id) + + return { + '_type': 'url_transparent', + 'ie_key': 'Wistia', + 'url': 'wistia:%s' % lesson['wistia_id'], + 'id': lesson['wistia_id'], + 'title': lesson.get('title'), + 'description': lesson.get('summary'), + 'thumbnail': lesson.get('thumb_nail'), + 'timestamp': unified_timestamp(lesson.get('published_at')), + 'duration': int_or_none(lesson.get('duration')), + 'view_count': int_or_none(lesson.get('plays_count')), + 'tags': try_get(lesson, lambda x: x['tag_list'], list), + } diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index e8a066b83..db7616caa 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -298,7 +298,10 @@ from .dw import ( from .eagleplatform import EaglePlatformIE from .ebaumsworld import EbaumsWorldIE from .echomsk import EchoMskIE -from .egghead import EggheadCourseIE +from .egghead import ( + EggheadCourseIE, + EggheadLessonIE, +) from .ehow import EHowIE from .eighttracks import EightTracksIE from .einthusan import EinthusanIE From 0396806f671e5828c2abdeb8048acf8b654507b6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 21 Jul 2017 00:13:32 +0700 Subject: [PATCH 07/24] [YoutubeDL] Do not override id, extractor and extractor_key in url_transparent All these meta fields must be borrowed from final extractor that actually performs extraction. This commit fixes extractor id in download archives for url_transparent downloads. Previously, 'transparent' extractor was erroneously used for extractor archive id, e.g. 'eggheadlesson 4n8ugwwj5t' instead of 'wistia 4n8ugwwj5t'. --- test/test_YoutubeDL.py | 7 ++++++- youtube_dl/YoutubeDL.py | 2 +- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/test/test_YoutubeDL.py b/test/test_YoutubeDL.py index 70989e232..e0decb81c 100644 --- a/test/test_YoutubeDL.py +++ b/test/test_YoutubeDL.py @@ -41,6 +41,7 @@ def _make_result(formats, **kwargs): 'id': 'testid', 'title': 'testttitle', 'extractor': 'testex', + 'extractor_key': 'TestEx', } res.update(**kwargs) return res @@ -761,7 +762,8 @@ class TestYoutubeDL(unittest.TestCase): '_type': 'url_transparent', 'url': 'foo2:', 'ie_key': 'Foo2', - 'title': 'foo1 title' + 'title': 'foo1 title', + 'id': 'foo1_id', } class Foo2IE(InfoExtractor): @@ -787,6 +789,9 @@ class TestYoutubeDL(unittest.TestCase): downloaded = ydl.downloaded_info_dicts[0] self.assertEqual(downloaded['url'], TEST_URL) self.assertEqual(downloaded['title'], 'foo1 title') + self.assertEqual(downloaded['id'], 'testid') + self.assertEqual(downloaded['extractor'], 'testex') + self.assertEqual(downloaded['extractor_key'], 'TestEx') if __name__ == '__main__': diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 89c07be29..f94836d06 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -860,7 +860,7 @@ class YoutubeDL(object): force_properties = dict( (k, v) for k, v in ie_result.items() if v is not None) - for f in ('_type', 'url', 'ie_key'): + for f in ('_type', 'url', 'id', 'extractor', 'extractor_key', 'ie_key'): if f in force_properties: del force_properties[f] new_result = info.copy() From 7d9a1db1110b13e8e6b65613ebb3daf7f0ff3c4c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 22 Jul 2017 11:40:46 +0700 Subject: [PATCH 08/24] [dramafever] Remove video id from title (closes #13699) --- youtube_dl/extractor/dramafever.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/dramafever.py b/youtube_dl/extractor/dramafever.py index e7abc8889..03fa3aabc 100644 --- a/youtube_dl/extractor/dramafever.py +++ b/youtube_dl/extractor/dramafever.py @@ -12,6 +12,7 @@ from ..utils import ( ExtractorError, clean_html, int_or_none, + remove_end, sanitized_Request, urlencode_postdata ) @@ -73,7 +74,7 @@ class DramaFeverIE(DramaFeverBaseIE): 'info_dict': { 'id': '4512.1', 'ext': 'mp4', - 'title': 'Cooking with Shin 4512.1', + 'title': 'Cooking with Shin', 'description': 'md5:a8eec7942e1664a6896fcd5e1287bfd0', 'episode': 'Episode 1', 'episode_number': 1, @@ -91,7 +92,7 @@ class DramaFeverIE(DramaFeverBaseIE): 'info_dict': { 'id': '4826.4', 'ext': 'mp4', - 'title': 'Mnet Asian Music Awards 2015 4826.4', + 'title': 'Mnet Asian Music Awards 2015', 'description': 'md5:3ff2ee8fedaef86e076791c909cf2e91', 'episode': 'Mnet Asian Music Awards 2015 - Part 3', 'episode_number': 4, @@ -122,6 +123,10 @@ class DramaFeverIE(DramaFeverBaseIE): countries=self._GEO_COUNTRIES) raise + # title is postfixed with video id for some reason, removing + if info.get('title'): + info['title'] = remove_end(info['title'], video_id).strip() + series_id, episode_number = video_id.split('.') episode_info = self._download_json( # We only need a single episode info, so restricting page size to one episode From f76c02c87b479310b0e090216895879257b1062a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 22 Jul 2017 11:41:40 +0700 Subject: [PATCH 09/24] [dramafever] Fix tests --- youtube_dl/extractor/dramafever.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/dramafever.py b/youtube_dl/extractor/dramafever.py index 03fa3aabc..9a498d72a 100644 --- a/youtube_dl/extractor/dramafever.py +++ b/youtube_dl/extractor/dramafever.py @@ -73,7 +73,7 @@ class DramaFeverIE(DramaFeverBaseIE): 'url': 'http://www.dramafever.com/drama/4512/1/Cooking_with_Shin/', 'info_dict': { 'id': '4512.1', - 'ext': 'mp4', + 'ext': 'flv', 'title': 'Cooking with Shin', 'description': 'md5:a8eec7942e1664a6896fcd5e1287bfd0', 'episode': 'Episode 1', @@ -81,7 +81,7 @@ class DramaFeverIE(DramaFeverBaseIE): 'thumbnail': r're:^https?://.*\.jpg', 'timestamp': 1404336058, 'upload_date': '20140702', - 'duration': 343, + 'duration': 344, }, 'params': { # m3u8 download @@ -91,7 +91,7 @@ class DramaFeverIE(DramaFeverBaseIE): 'url': 'http://www.dramafever.com/drama/4826/4/Mnet_Asian_Music_Awards_2015/?ap=1', 'info_dict': { 'id': '4826.4', - 'ext': 'mp4', + 'ext': 'flv', 'title': 'Mnet Asian Music Awards 2015', 'description': 'md5:3ff2ee8fedaef86e076791c909cf2e91', 'episode': 'Mnet Asian Music Awards 2015 - Part 3', @@ -99,7 +99,7 @@ class DramaFeverIE(DramaFeverBaseIE): 'thumbnail': r're:^https?://.*\.jpg', 'timestamp': 1450213200, 'upload_date': '20151215', - 'duration': 5602, + 'duration': 5359, }, 'params': { # m3u8 download From 359aa2fdd145d11a29a04f620fed95acbf142f66 Mon Sep 17 00:00:00 2001 From: dubber0 Date: Sat, 22 Jul 2017 14:15:55 +0200 Subject: [PATCH 10/24] [npo] Add support for npo3.nl URLs --- youtube_dl/extractor/npo.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/npo.py b/youtube_dl/extractor/npo.py index 516b1e941..fa4ef20c5 100644 --- a/youtube_dl/extractor/npo.py +++ b/youtube_dl/extractor/npo.py @@ -28,7 +28,7 @@ class NPOBaseIE(InfoExtractor): class NPOIE(NPOBaseIE): IE_NAME = 'npo' - IE_DESC = 'npo.nl and ntr.nl' + IE_DESC = 'npo.nl, ntr.nl, omroepwnl.nl, zapp.nl and npo3.nl' _VALID_URL = r'''(?x) (?: npo:| @@ -38,7 +38,7 @@ class NPOIE(NPOBaseIE): npo\.nl/(?!(?:live|radio)/)(?:[^/]+/){2}| ntr\.nl/(?:[^/]+/){2,}| omroepwnl\.nl/video/fragment/[^/]+__| - zapp\.nl/[^/]+/[^/]+/ + (?:zapp|npo3)\.nl/(?:[^/]+/){2} ) ) (?P[^/?#]+) @@ -146,6 +146,9 @@ class NPOIE(NPOBaseIE): }, { 'url': 'http://www.zapp.nl/beste-vrienden-quiz/extra-video-s/WO_NTR_1067990', 'only_matching': True, + }, { + 'url': 'https://www.npo3.nl/3onderzoekt/16-09-2015/VPWON_1239870', + 'only_matching': True, }, { # live stream 'url': 'npo:LI_NL1_4188102', From 327c8364f11f23dd919e8009c6adb021c34054fa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 22 Jul 2017 21:35:14 +0700 Subject: [PATCH 11/24] [sportbox:embed] Fix extraction --- youtube_dl/extractor/sportbox.py | 61 ++++++++++++++++++-------------- 1 file changed, 34 insertions(+), 27 deletions(-) diff --git a/youtube_dl/extractor/sportbox.py b/youtube_dl/extractor/sportbox.py index e7bd5bf91..54497c880 100644 --- a/youtube_dl/extractor/sportbox.py +++ b/youtube_dl/extractor/sportbox.py @@ -4,7 +4,11 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..utils import js_to_json +from ..utils import ( + determine_ext, + int_or_none, + js_to_json, +) class SportBoxEmbedIE(InfoExtractor): @@ -14,8 +18,10 @@ class SportBoxEmbedIE(InfoExtractor): 'info_dict': { 'id': '211355', 'ext': 'mp4', - 'title': 'В Новороссийске прошел детский турнир «Поле славы боевой»', + 'title': '211355', 'thumbnail': r're:^https?://.*\.jpg$', + 'duration': 292, + 'view_count': int, }, 'params': { # m3u8 download @@ -24,6 +30,9 @@ class SportBoxEmbedIE(InfoExtractor): }, { 'url': 'http://news.sportbox.ru/vdl/player?nid=370908&only_player=1&autostart=false&playeri=2&height=340&width=580', 'only_matching': True, + }, { + 'url': 'https://news.sportbox.ru/vdl/player/media/193095', + 'only_matching': True, }] @staticmethod @@ -37,36 +46,34 @@ class SportBoxEmbedIE(InfoExtractor): webpage = self._download_webpage(url, video_id) + wjplayer_data = self._parse_json( + self._search_regex( + r'(?s)wjplayer\(({.+?})\);', webpage, 'wjplayer settings'), + video_id, transform_source=js_to_json) + formats = [] - - def cleanup_js(code): - # desktop_advert_config contains complex Javascripts and we don't need it - return js_to_json(re.sub(r'desktop_advert_config.*', '', code)) - - jwplayer_data = self._parse_json(self._search_regex( - r'(?s)player\.setup\(({.+?})\);', webpage, 'jwplayer settings'), video_id, - transform_source=cleanup_js) - - hls_url = jwplayer_data.get('hls_url') - if hls_url: - formats.extend(self._extract_m3u8_formats( - hls_url, video_id, ext='mp4', m3u8_id='hls')) - - rtsp_url = jwplayer_data.get('rtsp_url') - if rtsp_url: - formats.append({ - 'url': rtsp_url, - 'format_id': 'rtsp', - }) - + for source in wjplayer_data['sources']: + src = source.get('src') + if not src: + continue + if determine_ext(src) == 'm3u8': + formats.extend(self._extract_m3u8_formats( + src, video_id, 'mp4', entry_protocol='m3u8_native', + m3u8_id='hls', fatal=False)) + else: + formats.append({ + 'url': src, + }) self._sort_formats(formats) - title = jwplayer_data['node_title'] - thumbnail = jwplayer_data.get('image_url') + view_count = int_or_none(self._search_regex( + r'Просмотров\s*:\s*(\d+)', webpage, 'view count', default=None)) return { 'id': video_id, - 'title': title, - 'thumbnail': thumbnail, + 'title': video_id, + 'thumbnail': wjplayer_data.get('poster'), + 'duration': int_or_none(wjplayer_data.get('duration')), + 'view_count': view_count, 'formats': formats, } From 0017d9ad6de831384e74db14a821e4c94020c9ac Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 23 Jul 2017 00:12:01 +0700 Subject: [PATCH 12/24] [YoutubeDL] Improve default format specification (closes #13704) --- test/test_YoutubeDL.py | 11 +++++++++++ youtube_dl/YoutubeDL.py | 31 +++++++++++++++++++++++-------- 2 files changed, 34 insertions(+), 8 deletions(-) diff --git a/test/test_YoutubeDL.py b/test/test_YoutubeDL.py index e0decb81c..4af14f9db 100644 --- a/test/test_YoutubeDL.py +++ b/test/test_YoutubeDL.py @@ -449,6 +449,17 @@ class TestFormatSelection(unittest.TestCase): pass self.assertEqual(ydl.downloaded_info_dicts, []) + def test_default_format_spec(self): + ydl = YDL({'simulate': True}) + self.assertEqual(ydl._default_format_spec({}), 'bestvideo+bestaudio/best') + + ydl = YDL({'outtmpl': '-'}) + self.assertEqual(ydl._default_format_spec({}), 'best') + + ydl = YDL({}) + self.assertEqual(ydl._default_format_spec({}, download=False), 'bestvideo+bestaudio/best') + self.assertEqual(ydl._default_format_spec({'is_live': True}), 'best') + class TestYoutubeDL(unittest.TestCase): def test_subtitles(self): diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index f94836d06..367ae3533 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -1064,6 +1064,25 @@ class YoutubeDL(object): return op(actual_value, comparison_value) return _filter + def _default_format_spec(self, info_dict, download=True): + req_format_list = [] + + def can_have_partial_formats(): + if self.params.get('simulate', False): + return True + if not download: + return True + if self.params.get('outtmpl', DEFAULT_OUTTMPL) == '-': + return False + if info_dict.get('is_live'): + return False + merger = FFmpegMergerPP(self) + return merger.available and merger.can_merge() + if can_have_partial_formats(): + req_format_list.append('bestvideo+bestaudio') + req_format_list.append('best') + return '/'.join(req_format_list) + def build_format_selector(self, format_spec): def syntax_error(note, start): message = ( @@ -1534,14 +1553,10 @@ class YoutubeDL(object): req_format = self.params.get('format') if req_format is None: - req_format_list = [] - if (self.params.get('outtmpl', DEFAULT_OUTTMPL) != '-' and - not info_dict.get('is_live')): - merger = FFmpegMergerPP(self) - if merger.available and merger.can_merge(): - req_format_list.append('bestvideo+bestaudio') - req_format_list.append('best') - req_format = '/'.join(req_format_list) + req_format = self._default_format_spec(info_dict, download=download) + if self.params.get('verbose'): + self.to_stdout('[debug] Default format spec: %s' % req_format) + format_selector = self.build_format_selector(req_format) # While in format selection we may need to have an access to the original From e0f1fb0a27612c2398df59dd85194edfdf8cbc2a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 23 Jul 2017 00:25:23 +0700 Subject: [PATCH 13/24] [mtv] Skip missing video parts (closes #13690) --- youtube_dl/extractor/mtv.py | 28 ++++++++++++++++++++++------ youtube_dl/extractor/vh1.py | 12 ++++++++---- 2 files changed, 30 insertions(+), 10 deletions(-) diff --git a/youtube_dl/extractor/mtv.py b/youtube_dl/extractor/mtv.py index 8acea1461..fc098cd13 100644 --- a/youtube_dl/extractor/mtv.py +++ b/youtube_dl/extractor/mtv.py @@ -83,7 +83,7 @@ class MTVServicesInfoExtractor(InfoExtractor): hls_url = rendition.find('./src').text formats.extend(self._extract_m3u8_formats( hls_url, video_id, ext='mp4', entry_protocol='m3u8_native', - m3u8_id='hls')) + m3u8_id='hls', fatal=False)) else: # fms try: @@ -106,7 +106,8 @@ class MTVServicesInfoExtractor(InfoExtractor): }]) except (KeyError, TypeError): raise ExtractorError('Invalid rendition field.') - self._sort_formats(formats) + if formats: + self._sort_formats(formats) return formats def _extract_subtitles(self, mdoc, mtvn_id): @@ -133,8 +134,11 @@ class MTVServicesInfoExtractor(InfoExtractor): mediagen_url += 'acceptMethods=' mediagen_url += 'hls' if use_hls else 'fms' - mediagen_doc = self._download_xml(mediagen_url, video_id, - 'Downloading video urls') + mediagen_doc = self._download_xml( + mediagen_url, video_id, 'Downloading video urls', fatal=False) + + if mediagen_doc is False: + return None item = mediagen_doc.find('./video/item') if item is not None and item.get('type') == 'text': @@ -174,6 +178,13 @@ class MTVServicesInfoExtractor(InfoExtractor): formats = self._extract_video_formats(mediagen_doc, mtvn_id, video_id) + # Some parts of complete video may be missing (e.g. missing Act 3 in + # http://www.southpark.de/alle-episoden/s14e01-sexual-healing) + if not formats: + return None + + self._sort_formats(formats) + return { 'title': title, 'formats': formats, @@ -205,9 +216,14 @@ class MTVServicesInfoExtractor(InfoExtractor): title = xpath_text(idoc, './channel/title') description = xpath_text(idoc, './channel/description') + entries = [] + for item in idoc.findall('.//item'): + info = self._get_video_info(item, use_hls) + if info: + entries.append(info) + return self.playlist_result( - [self._get_video_info(item, use_hls) for item in idoc.findall('.//item')], - playlist_title=title, playlist_description=description) + entries, playlist_title=title, playlist_description=description) def _extract_triforce_mgid(self, webpage, data_zone=None, video_id=None): triforce_feed = self._parse_json(self._search_regex( diff --git a/youtube_dl/extractor/vh1.py b/youtube_dl/extractor/vh1.py index 6be3774b7..570fa45ea 100644 --- a/youtube_dl/extractor/vh1.py +++ b/youtube_dl/extractor/vh1.py @@ -121,7 +121,11 @@ class VH1IE(MTVIE): idoc = self._download_xml( doc_url, video_id, 'Downloading info', transform_source=fix_xml_ampersands) - return self.playlist_result( - [self._get_video_info(item) for item in idoc.findall('.//item')], - playlist_id=video_id, - ) + + entries = [] + for item in idoc.findall('.//item'): + info = self._get_video_info(item) + if info: + entries.append(info) + + return self.playlist_result(entries, playlist_id=video_id) From 935d6c20c00536cf39cf2844295266e64492bb10 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 23 Jul 2017 00:44:50 +0700 Subject: [PATCH 14/24] [vidio] Make duration non fatal and fix typo --- youtube_dl/extractor/vidio.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/vidio.py b/youtube_dl/extractor/vidio.py index 701bb1d01..01da32f1c 100644 --- a/youtube_dl/extractor/vidio.py +++ b/youtube_dl/extractor/vidio.py @@ -56,7 +56,8 @@ class VidioIE(InfoExtractor): self._sort_formats(formats) duration = int_or_none(duration or self._search_regex( - r'data-video-duration=(["\'])(?P\d+)\1', webpage, 'duration')) + r'data-video-duration=(["\'])(?P\d+)\1', webpage, + 'duration', fatal=False, group='duration')) thumbnail = thumbnail or self._og_search_thumbnail(webpage) like_count = int_or_none(self._search_regex( From 71dde5eecf07ae3a8871e5d4a05a944097e17fb9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 23 Jul 2017 00:59:07 +0700 Subject: [PATCH 15/24] [itv] Fix production id extraction (closes #13671) --- youtube_dl/extractor/itv.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/itv.py b/youtube_dl/extractor/itv.py index f3156804d..26c48e4b8 100644 --- a/youtube_dl/extractor/itv.py +++ b/youtube_dl/extractor/itv.py @@ -59,12 +59,18 @@ class ITVIE(InfoExtractor): def _add_sub_element(element, name): return etree.SubElement(element, _add_ns(name)) + production_id = ( + params.get('data-video-autoplay-id') or + '%s#001' % ( + params.get('data-video-episode-id') or + video_id.replace('a', '/'))) + req_env = etree.Element(_add_ns('soapenv:Envelope')) _add_sub_element(req_env, 'soapenv:Header') body = _add_sub_element(req_env, 'soapenv:Body') get_playlist = _add_sub_element(body, ('tem:GetPlaylist')) request = _add_sub_element(get_playlist, 'tem:request') - _add_sub_element(request, 'itv:ProductionId').text = params['data-video-id'] + _add_sub_element(request, 'itv:ProductionId').text = production_id _add_sub_element(request, 'itv:RequestGuid').text = compat_str(uuid.uuid4()).upper() vodcrid = _add_sub_element(request, 'itv:Vodcrid') _add_sub_element(vodcrid, 'com:Id') From 425f41319aec6940195818e980005cef4946eb75 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 23 Jul 2017 01:06:08 +0700 Subject: [PATCH 16/24] [ChangeLog] Actualize --- ChangeLog | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/ChangeLog b/ChangeLog index a83523cb9..cd8a34a86 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,8 +1,28 @@ version +Core +* [YoutubeDL] Improve default format specification (#13704) +* [YoutubeDL] Do not override id, extractor and extractor_key for + url_transparent entities +* [extractor/common] Fix playlist_from_matches + Extractors +* [itv] Fix production id extraction (#13671, #13703) +* [vidio] Make duration non fatal and fix typo +* [mtv] Skip missing video parts (#13690) +* [sportbox:embed] Fix extraction ++ [npo] Add support for npo3.nl URLs (#13695) +* [dramafever] Remove video id from title (#13699) ++ [egghead:lesson] Add support for lessons (#6635) +* [funnyordie] Extract more metadata (#13677) * [youku:show] Fix playlist extraction (#13248) + [dispeak] Recognize sevt subdomain (#13276) +* [adn] Improve error reporting (#13663) +* [crunchyroll] Relax series and season regex (#13659) ++ [spiegel:article] Add support for nexx iframe embeds (#13029) ++ [nexx:embed] Add support for iframe embeds +* [nexx] Improve JS embed extraction ++ [pearvideo] Add support for pearvideo.com (#13031) version 2017.07.15 From 0db492c02a60dbfb44514833445bf267f5319ae2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 23 Jul 2017 01:09:09 +0700 Subject: [PATCH 17/24] release 2017.07.23 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- docs/supportedsites.md | 13 ++++++++----- youtube_dl/version.py | 2 +- 4 files changed, 13 insertions(+), 10 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 0f20d0485..37d09d796 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.07.15*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.07.15** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.07.23*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.07.23** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2017.07.15 +[debug] youtube-dl version 2017.07.23 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index cd8a34a86..302d32aab 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version +version 2017.07.23 Core * [YoutubeDL] Improve default format specification (#13704) diff --git a/docs/supportedsites.md b/docs/supportedsites.md index d7304ba06..eb09c470c 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -42,7 +42,7 @@ - **Allocine** - **AlphaPorno** - **AMCNetworks** - - **anderetijden**: npo.nl and ntr.nl + - **anderetijden**: npo.nl, ntr.nl, omroepwnl.nl, zapp.nl and npo3.nl - **AnimeOnDemand** - **anitube.se** - **Anvato** @@ -238,6 +238,7 @@ - **EbaumsWorld** - **EchoMsk** - **egghead:course**: egghead.io course + - **egghead:lesson**: egghead.io lesson - **eHow** - **Einthusan** - **eitb.tv** @@ -522,6 +523,7 @@ - **NextMediaActionNews**: 蘋果日報 - 動新聞 - **NextTV**: 壹電視 - **Nexx** + - **NexxEmbed** - **nfb**: National Film Board of Canada - **nfl.com** - **NhkVod** @@ -552,7 +554,7 @@ - **NowTVList** - **nowvideo**: NowVideo - **Noz** - - **npo**: npo.nl and ntr.nl + - **npo**: npo.nl, ntr.nl, omroepwnl.nl, zapp.nl and npo3.nl - **npo.nl:live** - **npo.nl:radio** - **npo.nl:radio:fragment** @@ -596,6 +598,7 @@ - **Patreon** - **pbs**: Public Broadcasting Service (PBS) and member stations: PBS: Public Broadcasting Service, APT - Alabama Public Television (WBIQ), GPB/Georgia Public Broadcasting (WGTV), Mississippi Public Broadcasting (WMPN), Nashville Public Television (WNPT), WFSU-TV (WFSU), WSRE (WSRE), WTCI (WTCI), WPBA/Channel 30 (WPBA), Alaska Public Media (KAKM), Arizona PBS (KAET), KNME-TV/Channel 5 (KNME), Vegas PBS (KLVX), AETN/ARKANSAS ETV NETWORK (KETS), KET (WKLE), WKNO/Channel 10 (WKNO), LPB/LOUISIANA PUBLIC BROADCASTING (WLPB), OETA (KETA), Ozarks Public Television (KOZK), WSIU Public Broadcasting (WSIU), KEET TV (KEET), KIXE/Channel 9 (KIXE), KPBS San Diego (KPBS), KQED (KQED), KVIE Public Television (KVIE), PBS SoCal/KOCE (KOCE), ValleyPBS (KVPT), CONNECTICUT PUBLIC TELEVISION (WEDH), KNPB Channel 5 (KNPB), SOPTV (KSYS), Rocky Mountain PBS (KRMA), KENW-TV3 (KENW), KUED Channel 7 (KUED), Wyoming PBS (KCWC), Colorado Public Television / KBDI 12 (KBDI), KBYU-TV (KBYU), Thirteen/WNET New York (WNET), WGBH/Channel 2 (WGBH), WGBY (WGBY), NJTV Public Media NJ (WNJT), WLIW21 (WLIW), mpt/Maryland Public Television (WMPB), WETA Television and Radio (WETA), WHYY (WHYY), PBS 39 (WLVT), WVPT - Your Source for PBS and More! (WVPT), Howard University Television (WHUT), WEDU PBS (WEDU), WGCU Public Media (WGCU), WPBT2 (WPBT), WUCF TV (WUCF), WUFT/Channel 5 (WUFT), WXEL/Channel 42 (WXEL), WLRN/Channel 17 (WLRN), WUSF Public Broadcasting (WUSF), ETV (WRLK), UNC-TV (WUNC), PBS Hawaii - Oceanic Cable Channel 10 (KHET), Idaho Public Television (KAID), KSPS (KSPS), OPB (KOPB), KWSU/Channel 10 & KTNW/Channel 31 (KWSU), WILL-TV (WILL), Network Knowledge - WSEC/Springfield (WSEC), WTTW11 (WTTW), Iowa Public Television/IPTV (KDIN), Nine Network (KETC), PBS39 Fort Wayne (WFWA), WFYI Indianapolis (WFYI), Milwaukee Public Television (WMVS), WNIN (WNIN), WNIT Public Television (WNIT), WPT (WPNE), WVUT/Channel 22 (WVUT), WEIU/Channel 51 (WEIU), WQPT-TV (WQPT), WYCC PBS Chicago (WYCC), WIPB-TV (WIPB), WTIU (WTIU), CET (WCET), ThinkTVNetwork (WPTD), WBGU-TV (WBGU), WGVU TV (WGVU), NET1 (KUON), Pioneer Public Television (KWCM), SDPB Television (KUSD), TPT (KTCA), KSMQ (KSMQ), KPTS/Channel 8 (KPTS), KTWU/Channel 11 (KTWU), East Tennessee PBS (WSJK), WCTE-TV (WCTE), WLJT, Channel 11 (WLJT), WOSU TV (WOSU), WOUB/WOUC (WOUB), WVPB (WVPB), WKYU-PBS (WKYU), KERA 13 (KERA), MPBN (WCBB), Mountain Lake PBS (WCFE), NHPTV (WENH), Vermont PBS (WETK), witf (WITF), WQED Multimedia (WQED), WMHT Educational Telecommunications (WMHT), Q-TV (WDCQ), WTVS Detroit Public TV (WTVS), CMU Public Television (WCMU), WKAR-TV (WKAR), WNMU-TV Public TV 13 (WNMU), WDSE - WRPT (WDSE), WGTE TV (WGTE), Lakeland Public Television (KAWE), KMOS-TV - Channels 6.1, 6.2 and 6.3 (KMOS), MontanaPBS (KUSM), KRWG/Channel 22 (KRWG), KACV (KACV), KCOS/Channel 13 (KCOS), WCNY/Channel 24 (WCNY), WNED (WNED), WPBS (WPBS), WSKG Public TV (WSKG), WXXI (WXXI), WPSU (WPSU), WVIA Public Media Studios (WVIA), WTVI (WTVI), Western Reserve PBS (WNEO), WVIZ/PBS ideastream (WVIZ), KCTS 9 (KCTS), Basin PBS (KPBT), KUHT / Channel 8 (KUHT), KLRN (KLRN), KLRU (KLRU), WTJX Channel 12 (WTJX), WCVE PBS (WCVE), KBTC Public Television (KBTC) - **pcmag** + - **PearVideo** - **People** - **periscope**: Periscope - **periscope:user**: Periscope user videos @@ -772,7 +775,7 @@ - **tagesschau:player** - **Tass** - **TastyTrade** - - **TBS** + - **TBS** (Currently broken) - **TDSLifeway** - **teachertube**: teachertube.com videos - **teachertube:user:collection**: teachertube.com user and collection videos @@ -950,7 +953,7 @@ - **VoiceRepublic** - **VoxMedia** - **Vporn** - - **vpro**: npo.nl and ntr.nl + - **vpro**: npo.nl, ntr.nl, omroepwnl.nl, zapp.nl and npo3.nl - **Vrak** - **VRT**: deredactie.be, sporza.be, cobra.be and cobra.canvas.be - **vrv** @@ -976,7 +979,7 @@ - **wholecloud**: WholeCloud - **Wimp** - **Wistia** - - **wnl**: npo.nl and ntr.nl + - **wnl**: npo.nl, ntr.nl, omroepwnl.nl, zapp.nl and npo3.nl - **WorldStarHipHop** - **wrzuta.pl** - **wrzuta.pl:playlist** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 82e166fef..a8dbb93e3 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2017.07.15' +__version__ = '2017.07.23' From 905d18a7aa42263c66f311ac0cdf46b2caa2f4d6 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sun, 23 Jul 2017 16:21:35 +0800 Subject: [PATCH 18/24] [options] Correctly hide login info from debug outputs (#13696) Iterate over opts instead of PRIVATE_OPTS for both performance and correctness --- ChangeLog | 6 ++++++ test/test_options.py | 26 ++++++++++++++++++++++++++ youtube_dl/options.py | 38 ++++++++++++++++++-------------------- 3 files changed, 50 insertions(+), 20 deletions(-) create mode 100644 test/test_options.py diff --git a/ChangeLog b/ChangeLog index 302d32aab..8e63b5c11 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +version + +Core +* [options] Correctly hide login info from debug outputs (#13696) + + version 2017.07.23 Core diff --git a/test/test_options.py b/test/test_options.py new file mode 100644 index 000000000..785281fe3 --- /dev/null +++ b/test/test_options.py @@ -0,0 +1,26 @@ +# coding: utf-8 + +from __future__ import unicode_literals + +# Allow direct execution +import os +import sys +import unittest +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +from youtube_dl.options import _hide_login_info + + +class TestOptions(unittest.TestCase): + def test_hide_login_inf(self): + self.assertEqual(_hide_login_info(['-u', 'foo', '-p', 'bar']), + ['-u', 'PRIVATE', '-p', 'PRIVATE']) + self.assertEqual(_hide_login_info(['-u']), ['-u']) + self.assertEqual(_hide_login_info(['-u', 'foo', '-u', 'bar']), + ['-u', 'PRIVATE', '-u', 'PRIVATE']) + self.assertEqual(_hide_login_info(['--username=foo']), + ['--username=PRIVATE']) + + +if __name__ == '__main__': + unittest.main() diff --git a/youtube_dl/options.py b/youtube_dl/options.py index 79e9fd12c..38439c971 100644 --- a/youtube_dl/options.py +++ b/youtube_dl/options.py @@ -20,6 +20,24 @@ from .utils import ( from .version import __version__ +def _hide_login_info(opts): + PRIVATE_OPTS = set(['-p', '--password', '-u', '--username', '--video-password', '--ap-password', '--ap-username']) + eqre = re.compile('^(?P' + ('|'.join(re.escape(po) for po in PRIVATE_OPTS)) + ')=.+$') + + def _scrub_eq(o): + m = eqre.match(o) + if m: + return m.group('key') + '=PRIVATE' + else: + return o + + opts = list(map(_scrub_eq, opts)) + for idx, opt in enumerate(opts): + if opt in PRIVATE_OPTS and idx + 1 < len(opts): + opts[idx + 1] = 'PRIVATE' + return opts + + def parseOpts(overrideArguments=None): def _readOptions(filename_bytes, default=[]): try: @@ -93,26 +111,6 @@ def parseOpts(overrideArguments=None): def _comma_separated_values_options_callback(option, opt_str, value, parser): setattr(parser.values, option.dest, value.split(',')) - def _hide_login_info(opts): - PRIVATE_OPTS = ['-p', '--password', '-u', '--username', '--video-password', '--ap-password', '--ap-username'] - eqre = re.compile('^(?P' + ('|'.join(re.escape(po) for po in PRIVATE_OPTS)) + ')=.+$') - - def _scrub_eq(o): - m = eqre.match(o) - if m: - return m.group('key') + '=PRIVATE' - else: - return o - - opts = list(map(_scrub_eq, opts)) - for private_opt in PRIVATE_OPTS: - try: - i = opts.index(private_opt) - opts[i + 1] = 'PRIVATE' - except ValueError: - pass - return opts - # No need to wrap help messages if we're on a wide console columns = compat_get_terminal_size().columns max_width = columns if columns else 80 From 73095e013fb1bc4a1e676d7be77a103f0013a227 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sun, 23 Jul 2017 16:24:18 +0800 Subject: [PATCH 19/24] [options] Typo --- test/test_options.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test_options.py b/test/test_options.py index 785281fe3..3a25a6ba3 100644 --- a/test/test_options.py +++ b/test/test_options.py @@ -12,7 +12,7 @@ from youtube_dl.options import _hide_login_info class TestOptions(unittest.TestCase): - def test_hide_login_inf(self): + def test_hide_login_info(self): self.assertEqual(_hide_login_info(['-u', 'foo', '-p', 'bar']), ['-u', 'PRIVATE', '-p', 'PRIVATE']) self.assertEqual(_hide_login_info(['-u']), ['-u']) From e3ce912c3d767fcb1a1225d05ac64da1acab94aa Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sun, 23 Jul 2017 16:25:17 +0800 Subject: [PATCH 20/24] [niconico] improve error reporting (#13696) --- youtube_dl/extractor/niconico.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/youtube_dl/extractor/niconico.py b/youtube_dl/extractor/niconico.py index 695e32e59..79b9952c3 100644 --- a/youtube_dl/extractor/niconico.py +++ b/youtube_dl/extractor/niconico.py @@ -147,6 +147,9 @@ class NiconicoIE(InfoExtractor): elif 'closed' in flv_info: raise ExtractorError('Niconico videos now require logging in', expected=True) + elif 'error' in flv_info: + raise ExtractorError('%s reports error: %s' % ( + self.IE_NAME, flv_info['error'][0]), expected=True) else: raise ExtractorError('Unable to find video URL') From 3150976669ef2ffc9f4eee9e99a6e70730bc22fb Mon Sep 17 00:00:00 2001 From: nyuszika7h Date: Sun, 23 Jul 2017 15:33:18 +0200 Subject: [PATCH 21/24] [ISSUE_TEMPLATE_tmpl.md] Minor improvements --- .github/ISSUE_TEMPLATE_tmpl.md | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/ISSUE_TEMPLATE_tmpl.md b/.github/ISSUE_TEMPLATE_tmpl.md index df79503d3..26f61d3b4 100644 --- a/.github/ISSUE_TEMPLATE_tmpl.md +++ b/.github/ISSUE_TEMPLATE_tmpl.md @@ -1,16 +1,16 @@ ## Please follow the guide below - You will be asked some questions and requested to provide some information, please read them **carefully** and answer honestly -- Put an `x` into all the boxes [ ] relevant to your *issue* (like that [x]) -- Use *Preview* tab to see how your issue will actually look like +- Put an `x` into all the boxes [ ] relevant to your *issue* (like this: `[x]`) +- Use the *Preview* tab to see what your issue will actually look like --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *%(version)s*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *%(version)s*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. - [ ] I've **verified** and **I assure** that I'm running youtube-dl **%(version)s** ### Before submitting an *issue* make sure you have: -- [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections +- [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections - [ ] [Searched](https://github.com/rg3/youtube-dl/search?type=Issues) the bugtracker for similar issues including closed ones ### What is the purpose of your *issue*? @@ -28,9 +28,9 @@ ### If the purpose of this *issue* is a *bug report*, *site support request* or you are not completely sure provide the full verbose output as follows: -Add `-v` flag to **your command line** you run youtube-dl with, copy the **whole** output and insert it here. It should look similar to one below (replace it with **your** log inserted between triple ```): +Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl -v `), copy the **whole** output and insert it here. It should look similar to one below (replace it with **your** log inserted between triple ```): + ``` -$ youtube-dl -v [debug] System config: [] [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] From f0e31e32c940d8529353f40bd2426163c3199216 Mon Sep 17 00:00:00 2001 From: nyuszika7h Date: Sun, 23 Jul 2017 15:40:04 +0200 Subject: [PATCH 22/24] [nick] Automate geo-restriction bypass (#13711) --- youtube_dl/extractor/nick.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/nick.py b/youtube_dl/extractor/nick.py index 08a75929e..1fa19cde4 100644 --- a/youtube_dl/extractor/nick.py +++ b/youtube_dl/extractor/nick.py @@ -12,6 +12,7 @@ class NickIE(MTVServicesInfoExtractor): IE_NAME = 'nick.com' _VALID_URL = r'https?://(?:(?:www|beta)\.)?nick(?:jr)?\.com/(?:[^/]+/)?(?:videos/clip|[^/]+/videos)/(?P[^/?#.]+)' _FEED_URL = 'http://udat.mtvnservices.com/service1/dispatch.htm' + _GEO_COUNTRIES = ['US'] _TESTS = [{ 'url': 'http://www.nick.com/videos/clip/alvinnn-and-the-chipmunks-112-full-episode.html', 'playlist': [ From 70bfab0e9ac8ddb7da67d71633c8c4b0704054cf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 23 Jul 2017 21:00:19 +0700 Subject: [PATCH 23/24] [mtv] Improve thumbnal extraction --- youtube_dl/extractor/mtv.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/youtube_dl/extractor/mtv.py b/youtube_dl/extractor/mtv.py index fc098cd13..25af5ddfd 100644 --- a/youtube_dl/extractor/mtv.py +++ b/youtube_dl/extractor/mtv.py @@ -50,8 +50,7 @@ class MTVServicesInfoExtractor(InfoExtractor): thumb_node = itemdoc.find(search_path) if thumb_node is None: return None - else: - return thumb_node.attrib['url'] + return thumb_node.get('url') or thumb_node.text or None def _extract_mobile_video_formats(self, mtvn_id): webpage_url = self._MOBILE_TEMPLATE % mtvn_id From c99d6890cb46626870474e5c1092d9772096c4b6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 23 Jul 2017 21:00:56 +0700 Subject: [PATCH 24/24] [nickru] Add extractor --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/nick.py | 18 ++++++++++++++++++ 2 files changed, 19 insertions(+) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index db7616caa..2513f2587 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -673,6 +673,7 @@ from .nick import ( NickIE, NickDeIE, NickNightIE, + NickRuIE, ) from .niconico import NiconicoIE, NiconicoPlaylistIE from .ninecninemedia import ( diff --git a/youtube_dl/extractor/nick.py b/youtube_dl/extractor/nick.py index 1fa19cde4..b688637bc 100644 --- a/youtube_dl/extractor/nick.py +++ b/youtube_dl/extractor/nick.py @@ -125,3 +125,21 @@ class NickNightIE(NickDeIE): return self._search_regex( r'mrss\s*:\s*(["\'])(?Phttp.+?)\1', webpage, 'mrss url', group='url') + + +class NickRuIE(MTVServicesInfoExtractor): + IE_NAME = 'nickelodeonru' + _VALID_URL = r'https?://(?:www\.)nickelodeon\.ru/(?:playlist|shows|videos)/(?:[^/]+/)*(?P[^/?#&]+)' + _TESTS = [{ + 'url': 'http://www.nickelodeon.ru/shows/henrydanger/videos/episodes/3-sezon-15-seriya-licenziya-na-polyot/pmomfb#playlist/7airc6', + 'only_matching': True, + }, { + 'url': 'http://www.nickelodeon.ru/videos/smotri-na-nickelodeon-v-iyule/g9hvh7', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + mgid = self._extract_mgid(webpage) + return self.url_result('http://media.mtvnservices.com/embed/%s' % mgid)