From 9df6b03caf45e7144a288d31caae0bf0472a48f7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 21 Oct 2016 03:00:03 +0700 Subject: [PATCH 01/16] [pluralsight] Adapt to new API (closes #10972) --- youtube_dl/extractor/pluralsight.py | 49 +++++++++++++---------------- 1 file changed, 21 insertions(+), 28 deletions(-) diff --git a/youtube_dl/extractor/pluralsight.py b/youtube_dl/extractor/pluralsight.py index b66adfc00..039e6eeb0 100644 --- a/youtube_dl/extractor/pluralsight.py +++ b/youtube_dl/extractor/pluralsight.py @@ -23,7 +23,7 @@ from ..utils import ( class PluralsightBaseIE(InfoExtractor): - _API_BASE = 'http://app.pluralsight.com' + _API_BASE = 'https://app.pluralsight.com' class PluralsightIE(PluralsightBaseIE): @@ -102,7 +102,7 @@ class PluralsightIE(PluralsightBaseIE): 'm': name, } captions = self._download_json( - '%s/training/Player/Captions' % self._API_BASE, video_id, + '%s/player/retrieve-captions' % self._API_BASE, video_id, 'Downloading captions JSON', 'Unable to download captions JSON', fatal=False, data=json.dumps(captions_post).encode('utf-8'), headers={'Content-Type': 'application/json;charset=utf-8'}) @@ -147,28 +147,22 @@ class PluralsightIE(PluralsightBaseIE): author = qs.get('author', [None])[0] name = qs.get('name', [None])[0] clip_id = qs.get('clip', [None])[0] - course = qs.get('course', [None])[0] + course_name = qs.get('course', [None])[0] - if any(not f for f in (author, name, clip_id, course,)): + if any(not f for f in (author, name, clip_id, course_name,)): raise ExtractorError('Invalid URL', expected=True) display_id = '%s-%s' % (name, clip_id) - webpage = self._download_webpage(url, display_id) + parsed_url = compat_urlparse.urlparse(url) - modules = self._search_regex( - r'moduleCollection\s*:\s*new\s+ModuleCollection\((\[.+?\])\s*,\s*\$rootScope\)', - webpage, 'modules', default=None) + payload_url = compat_urlparse.urlunparse(parsed_url._replace( + netloc='app.pluralsight.com', path='player/api/v1/payload')) - if modules: - collection = self._parse_json(modules, display_id) - else: - # Webpage may be served in different layout (see - # https://github.com/rg3/youtube-dl/issues/7607) - collection = self._parse_json( - self._search_regex( - r'var\s+initialState\s*=\s*({.+?});\n', webpage, 'initial state'), - display_id)['course']['modules'] + course = self._download_json( + payload_url, display_id, headers={'Referer': url})['payload']['course'] + + collection = course['modules'] module, clip = None, None @@ -209,8 +203,7 @@ class PluralsightIE(PluralsightBaseIE): # Some courses also offer widescreen resolution for high quality (see # https://github.com/rg3/youtube-dl/issues/7766) - widescreen = True if re.search( - r'courseSupportsWidescreenVideoFormats\s*:\s*true', webpage) else False + widescreen = course.get('supportsWideScreenVideoFormats') is True best_quality = 'high-widescreen' if widescreen else 'high' if widescreen: for allowed_quality in ALLOWED_QUALITIES: @@ -239,18 +232,18 @@ class PluralsightIE(PluralsightBaseIE): for quality in qualities_: f = QUALITIES[quality].copy() clip_post = { - 'a': author, - 'cap': 'false', - 'cn': clip_id, - 'course': course, - 'lc': 'en', - 'm': name, - 'mt': ext, - 'q': '%dx%d' % (f['width'], f['height']), + 'author': author, + 'includeCaptions': False, + 'clipIndex': int(clip_id), + 'courseName': course_name, + 'locale': 'en', + 'moduleName': name, + 'mediaType': ext, + 'quality': '%dx%d' % (f['width'], f['height']), } format_id = '%s-%s' % (ext, quality) clip_url = self._download_webpage( - '%s/training/Player/ViewClip' % self._API_BASE, display_id, + '%s/video/clips/viewclip' % self._API_BASE, display_id, 'Downloading %s URL' % format_id, fatal=False, data=json.dumps(clip_post).encode('utf-8'), headers={'Content-Type': 'application/json;charset=utf-8'}) From 0ebb86bd182beaa349249b6672247101a85340ec Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 21 Oct 2016 03:07:03 +0700 Subject: [PATCH 02/16] [ChangeLog] Actualize --- ChangeLog | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/ChangeLog b/ChangeLog index edfd711af..6a2e470a4 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,7 +1,13 @@ version +Core +- Disable thumbnails embedding in mkv ++ Add support for Comcast multiple-system operator (#10819) + Extractors +* [pluralsight] Adapt to new API (#10972) * [openload] Fix extraction (#10408, #10971) ++ [natgeo] Extract m3u8 formats (#10959) version 2016.10.19 From 9ce0077485b9484be0aec122f6d3d1e8b4858293 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 21 Oct 2016 03:08:42 +0700 Subject: [PATCH 03/16] release 2016.10.21 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- youtube_dl/version.py | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 2c9e1f370..9652100a3 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.10.19*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.10.19** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.10.21*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.10.21** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2016.10.19 +[debug] youtube-dl version 2016.10.21 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index 6a2e470a4..f4d605a06 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version +version 2016.10.21 Core - Disable thumbnails embedding in mkv diff --git a/youtube_dl/version.py b/youtube_dl/version.py index b883dbdff..d5c0b701f 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2016.10.19' +__version__ = '2016.10.21' From f8ae2c7f307e4f1bd5f3057e5c15a2a154eeea05 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 21 Oct 2016 04:35:32 +0700 Subject: [PATCH 04/16] [pluralsight] Process all clip URLs (closes #10984) --- youtube_dl/extractor/pluralsight.py | 34 +++++++++++++++++++---------- 1 file changed, 23 insertions(+), 11 deletions(-) diff --git a/youtube_dl/extractor/pluralsight.py b/youtube_dl/extractor/pluralsight.py index 039e6eeb0..2683c0a72 100644 --- a/youtube_dl/extractor/pluralsight.py +++ b/youtube_dl/extractor/pluralsight.py @@ -4,7 +4,6 @@ import collections import json import os import random -import re from .common import InfoExtractor from ..compat import ( @@ -242,9 +241,9 @@ class PluralsightIE(PluralsightBaseIE): 'quality': '%dx%d' % (f['width'], f['height']), } format_id = '%s-%s' % (ext, quality) - clip_url = self._download_webpage( + viewclip = self._download_json( '%s/video/clips/viewclip' % self._API_BASE, display_id, - 'Downloading %s URL' % format_id, fatal=False, + 'Downloading %s viewclip JSON' % format_id, fatal=False, data=json.dumps(clip_post).encode('utf-8'), headers={'Content-Type': 'application/json;charset=utf-8'}) @@ -258,15 +257,28 @@ class PluralsightIE(PluralsightBaseIE): random.randint(2, 5), display_id, '%(video_id)s: Waiting for %(timeout)s seconds to avoid throttling') - if not clip_url: + if not viewclip: continue - f.update({ - 'url': clip_url, - 'ext': ext, - 'format_id': format_id, - 'quality': quality_key(quality), - }) - formats.append(f) + + clip_urls = viewclip.get('urls') + if not isinstance(clip_urls, list): + continue + + for clip_url_data in clip_urls: + clip_url = clip_url_data.get('url') + if not clip_url: + continue + cdn = clip_url_data.get('cdn') + clip_f = f.copy() + clip_f.update({ + 'url': clip_url, + 'ext': ext, + 'format_id': '%s-%s' % (format_id, cdn) if cdn else format_id, + 'quality': quality_key(quality), + 'source_preference': int_or_none(clip_url_data.get('rank')), + }) + formats.append(clip_f) + self._sort_formats(formats) duration = int_or_none( From 859447a28d3a787435df75470d8e39d2b078fdb6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 21 Oct 2016 04:38:14 +0700 Subject: [PATCH 05/16] [adobepass] PEP 8 --- youtube_dl/extractor/adobepass.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/adobepass.py b/youtube_dl/extractor/adobepass.py index b6d215a55..12eeab271 100644 --- a/youtube_dl/extractor/adobepass.py +++ b/youtube_dl/extractor/adobepass.py @@ -1376,19 +1376,23 @@ class AdobePassIE(InfoExtractor): provider_redirect_page, urlh = provider_redirect_page_res # Check for Comcast auto login if 'automatically signing you in' in provider_redirect_page: - oauth_redirect_url = self._html_search_regex(r'window\.location\s*=\s*[\'"]([^\'"]+)', + oauth_redirect_url = self._html_search_regex( + r'window\.location\s*=\s*[\'"]([^\'"]+)', provider_redirect_page, 'oauth redirect') # Just need to process the request. No useful data comes back - self._download_webpage(oauth_redirect_url, video_id, 'Confirming auto login') + self._download_webpage( + oauth_redirect_url, video_id, 'Confirming auto login') else: if '
Date: Fri, 21 Oct 2016 04:53:26 +0700 Subject: [PATCH 06/16] [ChangeLog] Actualize --- ChangeLog | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/ChangeLog b/ChangeLog index f4d605a06..6c67dcab5 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +version + +Extractors ++ [pluralsight] Process all clip URLs (#10984) + + version 2016.10.21 Core From 69c2d42bd730b4ea07fe5ba9015049423b71c8a9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 21 Oct 2016 04:57:28 +0700 Subject: [PATCH 07/16] release 2016.10.21.1 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- youtube_dl/version.py | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 9652100a3..0a051ad35 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.10.21*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.10.21** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.10.21.1*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.10.21.1** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2016.10.21 +[debug] youtube-dl version 2016.10.21.1 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index 6c67dcab5..4987fb7ca 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version +version 2016.10.21.1 Extractors + [pluralsight] Process all clip URLs (#10984) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index d5c0b701f..583c82988 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2016.10.21' +__version__ = '2016.10.21.1' From 00ca7552317bb69ce8eb84582d658d5e52997394 Mon Sep 17 00:00:00 2001 From: John Hawkinson Date: Fri, 21 Oct 2016 20:44:49 -0400 Subject: [PATCH 08/16] [get_exe_version] Do version probes with <&- When doing version probes for ffmpeg, do the equivalent of calling it as: ffmpeg -version <&- Where <&- is shell syntax for closing stdin before calling the program. This is roughly equivalent to Date: Sat, 22 Oct 2016 13:04:05 +0800 Subject: [PATCH 09/16] [utils] Clarify for redirecting STDIN in get_exe_version() --- youtube_dl/utils.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index a89ff6908..2770c5f1c 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -1818,6 +1818,9 @@ def get_exe_version(exe, args=['--version'], """ Returns the version of the specified executable, or False if the executable is not present """ try: + # STDIN should be redirected too. On UNIX-like systems, ffmpeg triggers + # SIGTTOU if youtube-dl is run in the background. + # See https://github.com/rg3/youtube-dl/issues/955#issuecomment-209789656 out, _ = subprocess.Popen( [encodeArgument(exe)] + args, stdin=subprocess.PIPE, From 5378f8ce0d59d0a948d2597b175b2b1cff3e8bb2 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sat, 22 Oct 2016 13:08:56 +0800 Subject: [PATCH 10/16] [ChangeLog] Update for #10996 --- ChangeLog | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/ChangeLog b/ChangeLog index 4987fb7ca..825e357a4 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +version + +Core +* Running youtube-dl in the background is fixed (#10996, #10706, #955) + + version 2016.10.21.1 Extractors From 425f3fdfcb4a559da836bbada82f2bb06d34b707 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 22 Oct 2016 21:15:39 +0700 Subject: [PATCH 11/16] [pluralsight] Fix subtitles conversion (closes #10990) --- youtube_dl/extractor/pluralsight.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/pluralsight.py b/youtube_dl/extractor/pluralsight.py index 2683c0a72..0ffd41ecd 100644 --- a/youtube_dl/extractor/pluralsight.py +++ b/youtube_dl/extractor/pluralsight.py @@ -11,6 +11,7 @@ from ..compat import ( compat_urlparse, ) from ..utils import ( + dict_get, ExtractorError, float_or_none, int_or_none, @@ -119,14 +120,17 @@ class PluralsightIE(PluralsightBaseIE): @staticmethod def _convert_subtitles(duration, subs): srt = '' + TIME_OFFSET_KEYS = ('displayTimeOffset', 'DisplayTimeOffset') + TEXT_KEYS = ('text', 'Text') for num, current in enumerate(subs): current = subs[num] - start, text = float_or_none( - current.get('DisplayTimeOffset')), current.get('Text') + start, text = ( + float_or_none(dict_get(current, TIME_OFFSET_KEYS)), + dict_get(current, TEXT_KEYS)) if start is None or text is None: continue end = duration if num == len(subs) - 1 else float_or_none( - subs[num + 1].get('DisplayTimeOffset')) + dict_get(subs[num + 1], TIME_OFFSET_KEYS)) if end is None: continue srt += os.linesep.join( From 9aa929d33778e9073e554421a53520f81af43eac Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 23 Oct 2016 17:20:45 +0700 Subject: [PATCH 12/16] [twitch:stream] Add support for rebroadcasts (closes #10995) --- youtube_dl/extractor/twitch.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/twitch.py b/youtube_dl/extractor/twitch.py index 46c2cfe7b..77414a242 100644 --- a/youtube_dl/extractor/twitch.py +++ b/youtube_dl/extractor/twitch.py @@ -398,7 +398,7 @@ class TwitchStreamIE(TwitchBaseIE): channel_id = self._match_id(url) stream = self._call_api( - 'kraken/streams/%s' % channel_id, channel_id, + 'kraken/streams/%s?stream_type=all' % channel_id, channel_id, 'Downloading stream JSON').get('stream') if not stream: @@ -417,6 +417,7 @@ class TwitchStreamIE(TwitchBaseIE): query = { 'allow_source': 'true', 'allow_audio_only': 'true', + 'allow_spectre': 'true', 'p': random.randint(1000000, 10000000), 'player': 'twitchweb', 'segment_preference': '4', From 9dc13a67807fd38070d95cb23deca3761476e9de Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 23 Oct 2016 18:07:56 +0700 Subject: [PATCH 13/16] [vivo] Fix extraction (closes #11003) --- youtube_dl/extractor/extractors.py | 5 +- youtube_dl/extractor/shared.py | 110 +++++++++++++++++------------ 2 files changed, 70 insertions(+), 45 deletions(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index a693f8c56..6f7d9b65b 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -796,7 +796,10 @@ from .sendtonews import SendtoNewsIE from .servingsys import ServingSysIE from .sexu import SexuIE from .shahid import ShahidIE -from .shared import SharedIE +from .shared import ( + SharedIE, + VivoIE, +) from .sharesix import ShareSixIE from .sina import SinaIE from .sixplay import SixPlayIE diff --git a/youtube_dl/extractor/shared.py b/youtube_dl/extractor/shared.py index d592dfeb8..89e19e927 100644 --- a/youtube_dl/extractor/shared.py +++ b/youtube_dl/extractor/shared.py @@ -10,11 +10,38 @@ from ..utils import ( ) -class SharedIE(InfoExtractor): - IE_DESC = 'shared.sx and vivo.sx' - _VALID_URL = r'https?://(?:shared|vivo)\.sx/(?P[\da-z]{10})' +class SharedBaseIE(InfoExtractor): + def _real_extract(self, url): + video_id = self._match_id(url) - _TESTS = [{ + webpage, urlh = self._download_webpage_handle(url, video_id) + + if self._FILE_NOT_FOUND in webpage: + raise ExtractorError( + 'Video %s does not exist' % video_id, expected=True) + + video_url = self._extract_video_url(webpage, video_id, url) + + title = base64.b64decode(self._html_search_meta( + 'full:title', webpage, 'title').encode('utf-8')).decode('utf-8') + filesize = int_or_none(self._html_search_meta( + 'full:size', webpage, 'file size', fatal=False)) + + return { + 'id': video_id, + 'url': video_url, + 'ext': 'mp4', + 'filesize': filesize, + 'title': title, + } + + +class SharedIE(SharedBaseIE): + IE_DESC = 'shared.sx' + _VALID_URL = r'https?://shared\.sx/(?P[\da-z]{10})' + _FILE_NOT_FOUND = '>File does not exist<' + + _TEST = { 'url': 'http://shared.sx/0060718775', 'md5': '106fefed92a8a2adb8c98e6a0652f49b', 'info_dict': { @@ -23,7 +50,32 @@ class SharedIE(InfoExtractor): 'title': 'Bmp4', 'filesize': 1720110, }, - }, { + } + + def _extract_video_url(self, webpage, video_id, url): + download_form = self._hidden_inputs(webpage) + + video_page = self._download_webpage( + url, video_id, 'Downloading video page', + data=urlencode_postdata(download_form), + headers={ + 'Content-Type': 'application/x-www-form-urlencoded', + 'Referer': url, + }) + + video_url = self._html_search_regex( + r'data-url=(["\'])(?P(?:(?!\1).)+)\1', + video_page, 'video URL', group='url') + + return video_url + + +class VivoIE(SharedBaseIE): + IE_DESC = 'vivo.sx' + _VALID_URL = r'https?://vivo\.sx/(?P[\da-z]{10})' + _FILE_NOT_FOUND = '>The file you have requested does not exists or has been removed' + + _TEST = { 'url': 'http://vivo.sx/d7ddda0e78', 'md5': '15b3af41be0b4fe01f4df075c2678b2c', 'info_dict': { @@ -32,43 +84,13 @@ class SharedIE(InfoExtractor): 'title': 'Chicken', 'filesize': 528031, }, - }] + } - def _real_extract(self, url): - video_id = self._match_id(url) - - webpage, urlh = self._download_webpage_handle(url, video_id) - - if '>File does not exist<' in webpage: - raise ExtractorError( - 'Video %s does not exist' % video_id, expected=True) - - download_form = self._hidden_inputs(webpage) - - video_page = self._download_webpage( - urlh.geturl(), video_id, 'Downloading video page', - data=urlencode_postdata(download_form), - headers={ - 'Content-Type': 'application/x-www-form-urlencoded', - 'Referer': urlh.geturl(), - }) - - video_url = self._html_search_regex( - r'data-url=(["\'])(?P(?:(?!\1).)+)\1', - video_page, 'video URL', group='url') - title = base64.b64decode(self._html_search_meta( - 'full:title', webpage, 'title').encode('utf-8')).decode('utf-8') - filesize = int_or_none(self._html_search_meta( - 'full:size', webpage, 'file size', fatal=False)) - thumbnail = self._html_search_regex( - r'data-poster=(["\'])(?P(?:(?!\1).)+)\1', - video_page, 'thumbnail', default=None, group='url') - - return { - 'id': video_id, - 'url': video_url, - 'ext': 'mp4', - 'filesize': filesize, - 'title': title, - 'thumbnail': thumbnail, - } + def _extract_video_url(self, webpage, video_id, *args): + return self._parse_json( + self._search_regex( + r'InitializeStream\s*\(\s*(["\'])(?P(?:(?!\1).)+)\1', + webpage, 'stream', group='url'), + video_id, + transform_source=lambda x: base64.b64decode( + x.encode('ascii')).decode('utf-8'))[0] From f16f8505b18eb618aa830f21d6b97152e602dfb9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 23 Oct 2016 18:48:50 +0700 Subject: [PATCH 14/16] [vimeo] Delegate ondemand redirects to ondemand extractor (closes #10994) --- youtube_dl/extractor/vimeo.py | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py index b566241cc..51c69a80c 100644 --- a/youtube_dl/extractor/vimeo.py +++ b/youtube_dl/extractor/vimeo.py @@ -322,6 +322,22 @@ class VimeoIE(VimeoBaseInfoExtractor): }, 'expected_warnings': ['Unable to download JSON metadata'], }, + { + # redirects to ondemand extractor and should be passed throught it + # for successful extraction + 'url': 'https://vimeo.com/73445910', + 'info_dict': { + 'id': '73445910', + 'ext': 'mp4', + 'title': 'The Reluctant Revolutionary', + 'uploader': '10Ft Films', + 'uploader_url': 're:https?://(?:www\.)?vimeo\.com/tenfootfilms', + 'uploader_id': 'tenfootfilms', + }, + 'params': { + 'skip_download': True, + }, + }, { 'url': 'http://vimeo.com/moogaloop.swf?clip_id=2539741', 'only_matching': True, @@ -414,7 +430,12 @@ class VimeoIE(VimeoBaseInfoExtractor): # Retrieve video webpage to extract further information request = sanitized_Request(url, headers=headers) try: - webpage = self._download_webpage(request, video_id) + webpage, urlh = self._download_webpage_handle(request, video_id) + # Some URLs redirect to ondemand can't be extracted with + # this extractor right away thus should be passed through + # ondemand extractor (e.g. https://vimeo.com/73445910) + if VimeoOndemandIE.suitable(urlh.geturl()): + return self.url_result(urlh.geturl(), VimeoOndemandIE.ie_key()) except ExtractorError as ee: if isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 403: errmsg = ee.cause.read() From 9dde0e04e6d952977ecfd85ceac883106e7ac1ee Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sun, 23 Oct 2016 23:22:09 +0800 Subject: [PATCH 15/16] [litv] Fix extraction (#11006) --- ChangeLog | 3 +++ youtube_dl/extractor/litv.py | 39 ++++++++++++++++-------------------- 2 files changed, 20 insertions(+), 22 deletions(-) diff --git a/ChangeLog b/ChangeLog index 825e357a4..7dabde861 100644 --- a/ChangeLog +++ b/ChangeLog @@ -3,6 +3,9 @@ version Core * Running youtube-dl in the background is fixed (#10996, #10706, #955) +Extractors +* [litv] Fix extraction + version 2016.10.21.1 diff --git a/youtube_dl/extractor/litv.py b/youtube_dl/extractor/litv.py index a3784e6c6..ded717cf2 100644 --- a/youtube_dl/extractor/litv.py +++ b/youtube_dl/extractor/litv.py @@ -2,7 +2,6 @@ from __future__ import unicode_literals import json -import re from .common import InfoExtractor from ..utils import ( @@ -52,8 +51,8 @@ class LiTVIE(InfoExtractor): 'skip': 'Georestricted to Taiwan', }] - def _extract_playlist(self, season_list, video_id, vod_data, view_data, prompt=True): - episode_title = view_data['title'] + def _extract_playlist(self, season_list, video_id, program_info, prompt=True): + episode_title = program_info['title'] content_id = season_list['contentId'] if prompt: @@ -61,7 +60,7 @@ class LiTVIE(InfoExtractor): all_episodes = [ self.url_result(smuggle_url( - self._URL_TEMPLATE % (view_data['contentType'], episode['contentId']), + self._URL_TEMPLATE % (program_info['contentType'], episode['contentId']), {'force_noplaylist': True})) # To prevent infinite recursion for episode in season_list['episode']] @@ -80,19 +79,15 @@ class LiTVIE(InfoExtractor): webpage = self._download_webpage(url, video_id) - view_data = dict(map(lambda t: (t[0], t[2]), re.findall( - r'viewData\.([a-zA-Z]+)\s*=\s*(["\'])([^"\']+)\2', - webpage))) - - vod_data = self._parse_json(self._search_regex( - 'var\s+vod\s*=\s*([^;]+)', webpage, 'VOD data', default='{}'), + program_info = self._parse_json(self._search_regex( + 'var\s+programInfo\s*=\s*([^;]+)', webpage, 'VOD data', default='{}'), video_id) - season_list = list(vod_data.get('seasonList', {}).values()) + season_list = list(program_info.get('seasonList', {}).values()) if season_list: if not noplaylist: return self._extract_playlist( - season_list[0], video_id, vod_data, view_data, + season_list[0], video_id, program_info, prompt=noplaylist_prompt) if noplaylist_prompt: @@ -102,8 +97,8 @@ class LiTVIE(InfoExtractor): # endpoint gives the same result as the data embedded in the webpage. # If georestricted, there are no embedded data, so an extra request is # necessary to get the error code - if 'assetId' not in view_data: - view_data = self._download_json( + if 'assetId' not in program_info: + program_info = self._download_json( 'https://www.litv.tv/vod/ajax/getProgramInfo', video_id, query={'contentId': video_id}, headers={'Accept': 'application/json'}) @@ -112,9 +107,9 @@ class LiTVIE(InfoExtractor): webpage, 'video data', default='{}'), video_id) if not video_data: payload = { - 'assetId': view_data['assetId'], - 'watchDevices': view_data['watchDevices'], - 'contentType': view_data['contentType'], + 'assetId': program_info['assetId'], + 'watchDevices': program_info['watchDevices'], + 'contentType': program_info['contentType'], } video_data = self._download_json( 'https://www.litv.tv/vod/getMainUrl', video_id, @@ -136,11 +131,11 @@ class LiTVIE(InfoExtractor): # LiTV HLS segments doesn't like compressions a_format.setdefault('http_headers', {})['Youtubedl-no-compression'] = True - title = view_data['title'] + view_data.get('secondaryMark', '') - description = view_data.get('description') - thumbnail = view_data.get('imageFile') - categories = [item['name'] for item in vod_data.get('category', [])] - episode = int_or_none(view_data.get('episode')) + title = program_info['title'] + program_info.get('secondaryMark', '') + description = program_info.get('description') + thumbnail = program_info.get('imageFile') + categories = [item['name'] for item in program_info.get('category', [])] + episode = int_or_none(program_info.get('episode')) return { 'id': video_id, From 5ace137bf4af2dda4ee17e72716d78783700b07d Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Mon, 24 Oct 2016 15:13:17 +0800 Subject: [PATCH 16/16] [dotsub] Support vimeo embed (closes #10964) --- ChangeLog | 1 + youtube_dl/extractor/dotsub.py | 46 ++++++++++++++++++++++++++++------ 2 files changed, 39 insertions(+), 8 deletions(-) diff --git a/ChangeLog b/ChangeLog index 7dabde861..f64dcbc48 100644 --- a/ChangeLog +++ b/ChangeLog @@ -4,6 +4,7 @@ Core * Running youtube-dl in the background is fixed (#10996, #10706, #955) Extractors ++ [dotsub] Support Vimeo embed (#10964) * [litv] Fix extraction diff --git a/youtube_dl/extractor/dotsub.py b/youtube_dl/extractor/dotsub.py index fd64d1a7f..1f75352ca 100644 --- a/youtube_dl/extractor/dotsub.py +++ b/youtube_dl/extractor/dotsub.py @@ -9,7 +9,7 @@ from ..utils import ( class DotsubIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?dotsub\.com/view/(?P[^/]+)' - _TEST = { + _TESTS = [{ 'url': 'https://dotsub.com/view/9c63db2a-fa95-4838-8e6e-13deafe47f09', 'md5': '21c7ff600f545358134fea762a6d42b6', 'info_dict': { @@ -24,7 +24,24 @@ class DotsubIE(InfoExtractor): 'upload_date': '20131130', 'view_count': int, } - } + }, { + 'url': 'https://dotsub.com/view/747bcf58-bd59-45b7-8c8c-ac312d084ee6', + 'md5': '2bb4a83896434d5c26be868c609429a3', + 'info_dict': { + 'id': '168006778', + 'ext': 'mp4', + 'title': 'Apartments and flats in Raipur the white symphony', + 'description': 'md5:784d0639e6b7d1bc29530878508e38fe', + 'thumbnail': 're:^https?://dotsub.com/media/747bcf58-bd59-45b7-8c8c-ac312d084ee6/p', + 'duration': 290, + 'timestamp': 1476767794.2809999, + 'upload_date': '20160525', + 'uploader': 'parthivi001', + 'uploader_id': 'user52596202', + 'view_count': int, + }, + 'add_ie': ['Vimeo'], + }] def _real_extract(self, url): video_id = self._match_id(url) @@ -37,12 +54,23 @@ class DotsubIE(InfoExtractor): webpage = self._download_webpage(url, video_id) video_url = self._search_regex( [r']+src="([^"]+)"', r'"file"\s*:\s*\'([^\']+)'], - webpage, 'video url') + webpage, 'video url', default=None) + info_dict = { + 'id': video_id, + 'url': video_url, + 'ext': 'flv', + } - return { - 'id': video_id, - 'url': video_url, - 'ext': 'flv', + if not video_url: + setup_data = self._parse_json(self._html_search_regex( + r'(?s)data-setup=([\'"])(?P(?!\1).+?)\1', + webpage, 'setup data', group='content'), video_id) + info_dict = { + '_type': 'url_transparent', + 'url': setup_data['src'], + } + + info_dict.update({ 'title': info['title'], 'description': info.get('description'), 'thumbnail': info.get('screenshotURI'), @@ -50,4 +78,6 @@ class DotsubIE(InfoExtractor): 'uploader': info.get('user'), 'timestamp': float_or_none(info.get('dateCreated'), 1000), 'view_count': int_or_none(info.get('numberOfViews')), - } + }) + + return info_dict