From 05dee6c520eb959316a2e58203cbd5d30e908bc3 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Mon, 13 Nov 2017 19:15:16 +0100 Subject: [PATCH 001/177] [crunchyroll] extract old rtmp formats --- youtube_dl/extractor/crunchyroll.py | 154 ++++++++++++++++------------ 1 file changed, 91 insertions(+), 63 deletions(-) diff --git a/youtube_dl/extractor/crunchyroll.py b/youtube_dl/extractor/crunchyroll.py index 18ef3da10..b53f2d705 100644 --- a/youtube_dl/extractor/crunchyroll.py +++ b/youtube_dl/extractor/crunchyroll.py @@ -38,6 +38,16 @@ class CrunchyrollBaseIE(InfoExtractor): _LOGIN_FORM = 'login_form' _NETRC_MACHINE = 'crunchyroll' + def _call_rpc_api(self, method, video_id, note=None, data=None): + data = data or {} + data['req'] = 'RpcApi' + method + data = compat_urllib_parse_urlencode(data).encode('utf-8') + return self._download_xml( + 'http://www.crunchyroll.com/xml/', + video_id, note, fatal=False, data=data, headers={ + 'Content-Type': 'application/x-www-form-urlencoded', + }) + def _login(self): (username, password) = self._get_login_info() if username is None: @@ -377,15 +387,19 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text def _get_subtitles(self, video_id, webpage): subtitles = {} for sub_id, sub_name in re.findall(r'\bssid=([0-9]+)"[^>]+?\btitle="([^"]+)', webpage): - sub_page = self._download_webpage( - 'http://www.crunchyroll.com/xml/?req=RpcApiSubtitle_GetXml&subtitle_script_id=' + sub_id, - video_id, note='Downloading subtitles for ' + sub_name) - id = self._search_regex(r'id=\'([0-9]+)', sub_page, 'subtitle_id', fatal=False) - iv = self._search_regex(r'([^<]+)', sub_page, 'subtitle_iv', fatal=False) - data = self._search_regex(r'([^<]+)', sub_page, 'subtitle_data', fatal=False) - if not id or not iv or not data: + sub_doc = self._call_rpc_api( + 'Subtitle_GetXml', video_id, + 'Downloading subtitles for ' + sub_name, data={ + 'subtitle_script_id': sub_id, + }) + if not sub_doc: continue - subtitle = self._decrypt_subtitles(data, iv, id).decode('utf-8') + sid = sub_doc.get('id') + iv = xpath_text(sub_doc, 'iv', 'subtitle iv') + data = xpath_text(sub_doc, 'data', 'subtitle data') + if not sid or not iv or not data: + continue + subtitle = self._decrypt_subtitles(data, iv, sid).decode('utf-8') lang_code = self._search_regex(r'lang_code=["\']([^"\']+)', subtitle, 'subtitle_lang_code', fatal=False) if not lang_code: continue @@ -456,65 +470,79 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text for fmt in available_fmts: stream_quality, stream_format = self._FORMAT_IDS[fmt] video_format = fmt + 'p' - streamdata_req = sanitized_Request( - 'http://www.crunchyroll.com/xml/?req=RpcApiVideoPlayer_GetStandardConfig&media_id=%s&video_format=%s&video_quality=%s' - % (video_id, stream_format, stream_quality), - compat_urllib_parse_urlencode({'current_page': url}).encode('utf-8')) - streamdata_req.add_header('Content-Type', 'application/x-www-form-urlencoded') - streamdata = self._download_xml( - streamdata_req, video_id, - note='Downloading media info for %s' % video_format) - stream_info = streamdata.find('./{default}preload/stream_info') - video_encode_id = xpath_text(stream_info, './video_encode_id') - if video_encode_id in video_encode_ids: - continue - video_encode_ids.append(video_encode_id) + stream_infos = [] + streamdata = self._call_rpc_api( + 'VideoPlayer_GetStandardConfig', video_id, + 'Downloading media info for %s' % video_format, data={ + 'media_id': video_id, + 'video_format': stream_format, + 'video_quality': stream_quality, + 'current_page': url, + }) + if streamdata: + stream_info = streamdata.find('./{default}preload/stream_info') + if stream_info: + stream_infos.append(stream_info) + stream_info = self._call_rpc_api( + 'VideoEncode_GetStreamInfo', video_id, + 'Downloading stream info for %s' % video_format, data={ + 'media_id': video_id, + 'video_format': stream_format, + 'video_encode_quality': stream_quality, + }) + if stream_info: + stream_infos.append(stream_info) + for stream_info in stream_infos: + video_encode_id = xpath_text(stream_info, './video_encode_id') + if video_encode_id in video_encode_ids: + continue + video_encode_ids.append(video_encode_id) - video_file = xpath_text(stream_info, './file') - if not video_file: - continue - if video_file.startswith('http'): - formats.extend(self._extract_m3u8_formats( - video_file, video_id, 'mp4', entry_protocol='m3u8_native', - m3u8_id='hls', fatal=False)) - continue - - video_url = xpath_text(stream_info, './host') - if not video_url: - continue - metadata = stream_info.find('./metadata') - format_info = { - 'format': video_format, - 'format_id': video_format, - 'height': int_or_none(xpath_text(metadata, './height')), - 'width': int_or_none(xpath_text(metadata, './width')), - } - - if '.fplive.net/' in video_url: - video_url = re.sub(r'^rtmpe?://', 'http://', video_url.strip()) - parsed_video_url = compat_urlparse.urlparse(video_url) - direct_video_url = compat_urlparse.urlunparse(parsed_video_url._replace( - netloc='v.lvlt.crcdn.net', - path='%s/%s' % (remove_end(parsed_video_url.path, '/'), video_file.split(':')[-1]))) - if self._is_valid_url(direct_video_url, video_id, video_format): - format_info.update({ - 'url': direct_video_url, - }) - formats.append(format_info) + video_file = xpath_text(stream_info, './file') + if not video_file: + continue + if video_file.startswith('http'): + formats.extend(self._extract_m3u8_formats( + video_file, video_id, 'mp4', entry_protocol='m3u8_native', + m3u8_id='hls', fatal=False)) continue - format_info.update({ - 'url': video_url, - 'play_path': video_file, - 'ext': 'flv', - }) - formats.append(format_info) - self._sort_formats(formats) + video_url = xpath_text(stream_info, './host') + if not video_url: + continue + metadata = stream_info.find('./metadata') + format_info = { + 'format': video_format, + 'height': int_or_none(xpath_text(metadata, './height')), + 'width': int_or_none(xpath_text(metadata, './width')), + } - metadata = self._download_xml( - 'http://www.crunchyroll.com/xml', video_id, - note='Downloading media info', query={ - 'req': 'RpcApiVideoPlayer_GetMediaMetadata', + if '.fplive.net/' in video_url: + video_url = re.sub(r'^rtmpe?://', 'http://', video_url.strip()) + parsed_video_url = compat_urlparse.urlparse(video_url) + direct_video_url = compat_urlparse.urlunparse(parsed_video_url._replace( + netloc='v.lvlt.crcdn.net', + path='%s/%s' % (remove_end(parsed_video_url.path, '/'), video_file.split(':')[-1]))) + if self._is_valid_url(direct_video_url, video_id, video_format): + format_info.update({ + 'format_id': 'http-' + video_format, + 'url': direct_video_url, + }) + formats.append(format_info) + continue + + format_info.update({ + 'format_id': 'rtmp-' + video_format, + 'url': video_url, + 'play_path': video_file, + 'ext': 'flv', + }) + formats.append(format_info) + self._sort_formats(formats, ('height', 'width', 'tbr', 'fps')) + + metadata = self._call_rpc_api( + 'VideoPlayer_GetMediaMetadata', video_id, + note='Downloading media info', data={ 'media_id': video_id, }) From 5871ebac473e723376722a37baecf51d6ae7d781 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 14 Nov 2017 01:43:20 +0700 Subject: [PATCH 002/177] [YoutubeDL] Fix playlist range optimization for --playlist-items (closes #14740) --- youtube_dl/YoutubeDL.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 342d6b47c..68721e9ab 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -948,7 +948,8 @@ class YoutubeDL(object): report_download(n_entries) else: # iterable if playlistitems: - entries = make_playlistitems_entries(list(ie_entries)) + entries = make_playlistitems_entries(list(itertools.islice( + ie_entries, 0, max(playlistitems)))) else: entries = list(itertools.islice( ie_entries, playliststart, playlistend)) From 0987f2ddb27a27506c697ad9dae2ccbf24fc786d Mon Sep 17 00:00:00 2001 From: Timendum Date: Tue, 14 Nov 2017 16:34:45 +0100 Subject: [PATCH 003/177] [vshare] Fix extraction (closes #14473) --- youtube_dl/extractor/generic.py | 16 +++++++++++++++ youtube_dl/extractor/vshare.py | 36 +++++++++++++++++++++++++-------- 2 files changed, 44 insertions(+), 8 deletions(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 2a9c3e2de..31564e550 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -102,6 +102,7 @@ from .joj import JojIE from .megaphone import MegaphoneIE from .vzaar import VzaarIE from .channel9 import Channel9IE +from .vshare import VShareIE class GenericIE(InfoExtractor): @@ -1921,6 +1922,16 @@ class GenericIE(InfoExtractor): 'title': 'Rescue Kit 14 Free Edition - Getting started', }, 'playlist_count': 4, + }, + { + # vshare embed + 'url': 'https://youtube-dl-demo.neocities.org/vshare.html', + 'md5': '17b39f55b5497ae8b59f5fbce8e35886', + 'info_dict': { + 'id': '0f64ce6', + 'title': 'vl14062007715967', + 'ext': 'mp4', + } } # { # # TODO: find another test @@ -2879,6 +2890,11 @@ class GenericIE(InfoExtractor): return self.playlist_from_matches( channel9_urls, video_id, video_title, ie=Channel9IE.ie_key()) + vshare_urls = VShareIE._extract_urls(webpage) + if vshare_urls: + return self.playlist_from_matches( + vshare_urls, video_id, video_title, ie=VShareIE.ie_key()) + def merge_dicts(dict1, dict2): merged = {} for k, v in dict1.items(): diff --git a/youtube_dl/extractor/vshare.py b/youtube_dl/extractor/vshare.py index 5addbc280..ea39a9051 100644 --- a/youtube_dl/extractor/vshare.py +++ b/youtube_dl/extractor/vshare.py @@ -1,14 +1,18 @@ # coding: utf-8 from __future__ import unicode_literals +import re + from .common import InfoExtractor +from ..compat import compat_chr +from ..utils import decode_packed_codes class VShareIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?vshare\.io/[dv]/(?P[^/?#&]+)' _TESTS = [{ 'url': 'https://vshare.io/d/0f64ce6', - 'md5': '16d7b8fef58846db47419199ff1ab3e7', + 'md5': '17b39f55b5497ae8b59f5fbce8e35886', 'info_dict': { 'id': '0f64ce6', 'title': 'vl14062007715967', @@ -19,20 +23,36 @@ class VShareIE(InfoExtractor): 'only_matching': True, }] + def _extract_packed(self, webpage): + packed = self._search_regex(r'(eval\(function.+)', webpage, 'packed code') + unpacked = decode_packed_codes(packed) + digits = self._search_regex(r'\[((?:\d+,?)+)\]', unpacked, 'digits') + digits = digits.split(',') + digits = [int(digit) for digit in digits] + key_digit = self._search_regex(r'fromCharCode\(.+?(\d+)\)}', unpacked, 'key digit') + chars = [compat_chr(d - int(key_digit)) for d in digits] + return ''.join(chars) + def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage( - 'https://vshare.io/d/%s' % video_id, video_id) + 'https://vshare.io/v/%s/width-650/height-430/1' % video_id, video_id) - title = self._html_search_regex( - r'(?s)
(.+?)
', webpage, 'title') - video_url = self._search_regex( - r']+href=(["\'])(?P(?:https?:)?//.+?)\1[^>]*>[Cc]lick\s+here', - webpage, 'video url', group='url') + title = self._html_search_regex(r'([^<]+)', webpage, 'title') + title = title.split(' - ')[0] + unpacked = self._extract_packed(webpage) + video_urls = re.findall(r']+?src=["\'](?P(?:https?:)?//(?:www\.)?vshare\.io/v/[^/?#&]+)', + webpage) From ff31f2d5c3750364b013a9bf59b85cebd0cee1fd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 14 Nov 2017 22:39:54 +0700 Subject: [PATCH 004/177] [vshare] Capture and output error message --- youtube_dl/extractor/vshare.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/vshare.py b/youtube_dl/extractor/vshare.py index ea39a9051..20ce22e16 100644 --- a/youtube_dl/extractor/vshare.py +++ b/youtube_dl/extractor/vshare.py @@ -5,7 +5,10 @@ import re from .common import InfoExtractor from ..compat import compat_chr -from ..utils import decode_packed_codes +from ..utils import ( + decode_packed_codes, + ExtractorError, +) class VShareIE(InfoExtractor): @@ -42,6 +45,12 @@ class VShareIE(InfoExtractor): title = self._html_search_regex(r'([^<]+)', webpage, 'title') title = title.split(' - ')[0] + error = self._html_search_regex( + r'(?s)]+\bclass=["\']xxx-error[^>]+>(.+?) Date: Tue, 14 Nov 2017 22:49:25 +0700 Subject: [PATCH 005/177] [vshare] Improve extraction, fix formats sorting and carry long lines --- youtube_dl/extractor/vshare.py | 41 ++++++++++++++++++++-------------- 1 file changed, 24 insertions(+), 17 deletions(-) diff --git a/youtube_dl/extractor/vshare.py b/youtube_dl/extractor/vshare.py index 20ce22e16..e4ec77889 100644 --- a/youtube_dl/extractor/vshare.py +++ b/youtube_dl/extractor/vshare.py @@ -26,13 +26,20 @@ class VShareIE(InfoExtractor): 'only_matching': True, }] + @staticmethod + def _extract_urls(webpage): + return re.findall( + r']+?src=["\'](?P(?:https?:)?//(?:www\.)?vshare\.io/v/[^/?#&]+)', + webpage) + def _extract_packed(self, webpage): - packed = self._search_regex(r'(eval\(function.+)', webpage, 'packed code') + packed = self._search_regex( + r'(eval\(function.+)', webpage, 'packed code') unpacked = decode_packed_codes(packed) digits = self._search_regex(r'\[((?:\d+,?)+)\]', unpacked, 'digits') - digits = digits.split(',') - digits = [int(digit) for digit in digits] - key_digit = self._search_regex(r'fromCharCode\(.+?(\d+)\)}', unpacked, 'key digit') + digits = [int(digit) for digit in digits.split(',')] + key_digit = self._search_regex( + r'fromCharCode\(.+?(\d+)\)}', unpacked, 'key digit') chars = [compat_chr(d - int(key_digit)) for d in digits] return ''.join(chars) @@ -40,9 +47,11 @@ class VShareIE(InfoExtractor): video_id = self._match_id(url) webpage = self._download_webpage( - 'https://vshare.io/v/%s/width-650/height-430/1' % video_id, video_id) + 'https://vshare.io/v/%s/width-650/height-430/1' % video_id, + video_id) - title = self._html_search_regex(r'([^<]+)', webpage, 'title') + title = self._html_search_regex( + r'([^<]+)', webpage, 'title') title = title.split(' - ')[0] error = self._html_search_regex( @@ -51,17 +60,15 @@ class VShareIE(InfoExtractor): if error: raise ExtractorError(error, expected=True) - unpacked = self._extract_packed(webpage) - video_urls = re.findall(r'%s' % self._extract_packed(webpage), + video_id)[0] + + self._sort_formats(info['formats']) + + info.update({ 'id': video_id, 'title': title, - 'formats': formats, - } + }) - @staticmethod - def _extract_urls(webpage): - return re.findall( - r']+?src=["\'](?P(?:https?:)?//(?:www\.)?vshare\.io/v/[^/?#&]+)', - webpage) + return info From ea2295842f79c9efff3a9abce1d0eee7de4953d6 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Tue, 14 Nov 2017 17:41:30 +0100 Subject: [PATCH 006/177] [common] skip Apple FairPlay m3u8 manifests(closes #14741) --- youtube_dl/extractor/common.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index e2d9f52b0..a9d68fc0c 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1356,6 +1356,9 @@ class InfoExtractor(object): if '#EXT-X-FAXS-CM:' in m3u8_doc: # Adobe Flash Access return [] + if re.search(r'#EXT-X-SESSION-KEY:.*?URI="skd://', m3u8_doc): # Apple FairPlay + return [] + formats = [] format_url = lambda u: ( From fae0eb42ec4309fe7fb8476d30621ba1d60fa168 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 14 Nov 2017 23:59:30 +0700 Subject: [PATCH 007/177] [ChangeLog] Actualize --- ChangeLog | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/ChangeLog b/ChangeLog index cedab4723..6ed0f011f 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,7 +1,25 @@ version +Core +* [common] Skip Apple FairPlay m3u8 manifests (#14741) +* [YoutubeDL] Fix playlist range optimization for --playlist-items (#14740) + Extractors +* [vshare] Capture and output error message +* [vshare] Fix extraction (#14473) +* [crunchyroll] Extract old RTMP formats +* [tva] Fix extraction (#14736) +* [gamespot] Lower preference of HTTP formats (#14652) +* [instagram:user] Fix extraction (#14699) +* [ccma] Fix typo (#14730) +- Remove sensitive data from logging in messages +* [instagram:user] Fix extraction (#14699) ++ [gamespot] Add support for article URLs (#14652) +* [gamespot] Skip Brightcove Once HTTP formats (#14652) +* [cartoonnetwork] Update tokenizer_src (#14666) + [wsj] Recognize another URL pattern (#14704) +* [pandatv] Update API URL and sign format URLs (#14693) +* [crunchyroll] Use old login method (#11572) version 2017.11.06 From 08e45b39e76419f63aa43d5008257789d8a30bf8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 15 Nov 2017 00:15:42 +0700 Subject: [PATCH 008/177] release 2017.11.15 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- youtube_dl/version.py | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index be6e6ddab..4dd1a6e59 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.11.06*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.11.06** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.11.15*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.11.15** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2017.11.06 +[debug] youtube-dl version 2017.11.15 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index 6ed0f011f..6b4befb8f 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version +version 2017.11.15 Core * [common] Skip Apple FairPlay m3u8 manifests (#14741) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 8b67d23fe..1c3cbefeb 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2017.11.06' +__version__ = '2017.11.15' From 9cbd4dda10ad248a5268ec1e0e563cf97024a8b9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 15 Nov 2017 22:14:54 +0700 Subject: [PATCH 009/177] [instagram] Fix description, timestamp and counters extraction (closes #14755) --- youtube_dl/extractor/instagram.py | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/instagram.py b/youtube_dl/extractor/instagram.py index 20db31f86..a77f619d2 100644 --- a/youtube_dl/extractor/instagram.py +++ b/youtube_dl/extractor/instagram.py @@ -130,13 +130,21 @@ class InstagramIE(InfoExtractor): video_url = media.get('video_url') height = int_or_none(media.get('dimensions', {}).get('height')) width = int_or_none(media.get('dimensions', {}).get('width')) - description = media.get('caption') + description = try_get( + media, lambda x: x['edge_media_to_caption']['edges'][0]['node']['text'], + compat_str) or media.get('caption') thumbnail = media.get('display_src') - timestamp = int_or_none(media.get('date')) + timestamp = int_or_none(media.get('taken_at_timestamp') or media.get('date')) uploader = media.get('owner', {}).get('full_name') uploader_id = media.get('owner', {}).get('username') - like_count = int_or_none(media.get('likes', {}).get('count')) - comment_count = int_or_none(media.get('comments', {}).get('count')) + + def get_count(key, kind): + return int_or_none(try_get( + media, (lambda x: x['edge_media_%s' % key]['count'], + lambda x: x['%ss' % kind]['count']))) + like_count = get_count('preview_like', 'like') + comment_count = get_count('to_comment', 'comment') + comments = [{ 'author': comment.get('user', {}).get('username'), 'author_id': comment.get('user', {}).get('id'), From 3192d4bc7a063983c3a82bc4320c16d65679307a Mon Sep 17 00:00:00 2001 From: Windom Date: Thu, 16 Nov 2017 20:05:04 +0200 Subject: [PATCH 010/177] [spankbang] Add support for mobile URLs and fix test --- youtube_dl/extractor/spankbang.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/spankbang.py b/youtube_dl/extractor/spankbang.py index 2863e53b5..e6c2dcfc4 100644 --- a/youtube_dl/extractor/spankbang.py +++ b/youtube_dl/extractor/spankbang.py @@ -7,7 +7,7 @@ from ..utils import ExtractorError class SpankBangIE(InfoExtractor): - _VALID_URL = r'https?://(?:(?:www|[a-z]{2})\.)?spankbang\.com/(?P[\da-z]+)/video' + _VALID_URL = r'https?://(?:(?:www|m|[a-z]{2})\.)?spankbang\.com/(?P[\da-z]+)/video' _TESTS = [{ 'url': 'http://spankbang.com/3vvn/video/fantasy+solo', 'md5': '1cc433e1d6aa14bc376535b8679302f7', @@ -15,7 +15,7 @@ class SpankBangIE(InfoExtractor): 'id': '3vvn', 'ext': 'mp4', 'title': 'fantasy solo', - 'description': 'Watch fantasy solo free HD porn video - 05 minutes - dillion harper masturbates on a bed free adult movies.', + 'description': 'Watch fantasy solo free HD porn video - 05 minutes - Babe,Masturbation,Solo,Toy - dillion harper masturbates on a bed free adult movies sexy clips.', 'thumbnail': r're:^https?://.*\.jpg$', 'uploader': 'silly2587', 'age_limit': 18, @@ -28,6 +28,10 @@ class SpankBangIE(InfoExtractor): # no uploader 'url': 'http://spankbang.com/lklg/video/sex+with+anyone+wedding+edition+2', 'only_matching': True, + }, { + # mobile page + 'url': 'http://m.spankbang.com/1o2de/video/can+t+remember+her+name', + 'only_matching': True, }] def _real_extract(self, url): From 38db52adf35c2134444e5b6b601e9567797e9195 Mon Sep 17 00:00:00 2001 From: Windom Date: Thu, 16 Nov 2017 20:50:07 +0200 Subject: [PATCH 011/177] [drtuber] Add support for mobile URLs --- youtube_dl/extractor/drtuber.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/drtuber.py b/youtube_dl/extractor/drtuber.py index c5d56a9ad..c88b3126b 100644 --- a/youtube_dl/extractor/drtuber.py +++ b/youtube_dl/extractor/drtuber.py @@ -10,7 +10,7 @@ from ..utils import ( class DrTuberIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?drtuber\.com/(?:video|embed)/(?P\d+)(?:/(?P[\w-]+))?' + _VALID_URL = r'https?://(?:(?:www|m)\.)?drtuber\.com/(?:video|embed)/(?P\d+)(?:/(?P[\w-]+))?' _TESTS = [{ 'url': 'http://www.drtuber.com/video/1740434/hot-perky-blonde-naked-golf', 'md5': '93e680cf2536ad0dfb7e74d94a89facd', @@ -28,6 +28,9 @@ class DrTuberIE(InfoExtractor): }, { 'url': 'http://www.drtuber.com/embed/489939', 'only_matching': True, + }, { + 'url': 'http://m.drtuber.com/video/3893529/lingerie-blowjob-from-beautiful-teen', + 'only_matching': True, }] @staticmethod From f610dbb05f8d17cc95437958835a437c3777b38c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 18 Nov 2017 19:02:56 +0700 Subject: [PATCH 012/177] [extractor/common] Use final URL when dumping request (closes #14769) --- youtube_dl/extractor/common.py | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index a9d68fc0c..8e4ee0deb 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -592,19 +592,11 @@ class InfoExtractor(object): if not encoding: encoding = self._guess_encoding_from_content(content_type, webpage_bytes) if self._downloader.params.get('dump_intermediate_pages', False): - try: - url = url_or_request.get_full_url() - except AttributeError: - url = url_or_request - self.to_screen('Dumping request to ' + url) + self.to_screen('Dumping request to ' + urlh.geturl()) dump = base64.b64encode(webpage_bytes).decode('ascii') self._downloader.to_screen(dump) if self._downloader.params.get('write_pages', False): - try: - url = url_or_request.get_full_url() - except AttributeError: - url = url_or_request - basen = '%s_%s' % (video_id, url) + basen = '%s_%s' % (video_id, urlh.geturl()) if len(basen) > 240: h = '___' + hashlib.md5(basen.encode('utf-8')).hexdigest() basen = basen[:240 - len(h)] + h From a9efdf3d4a18ec5657ea50f31715e1b88a945820 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Sun, 19 Nov 2017 12:59:31 +0100 Subject: [PATCH 013/177] [livestream] make smil extraction non fatal(fixes #14792) --- youtube_dl/extractor/livestream.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/livestream.py b/youtube_dl/extractor/livestream.py index 317ebbc4e..c4776bbf3 100644 --- a/youtube_dl/extractor/livestream.py +++ b/youtube_dl/extractor/livestream.py @@ -114,7 +114,7 @@ class LivestreamIE(InfoExtractor): smil_url = video_data.get('smil_url') if smil_url: - formats.extend(self._extract_smil_formats(smil_url, video_id)) + formats.extend(self._extract_smil_formats(smil_url, video_id, fatal=False)) m3u8_url = video_data.get('m3u8_url') if m3u8_url: From 8f639411042d35cd3be6eeff485e3015bafce4d7 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Wed, 22 Nov 2017 22:49:48 +0800 Subject: [PATCH 014/177] [youku] Fix extraction; update ccode (closes #14815) --- ChangeLog | 6 ++++++ youtube_dl/extractor/youku.py | 2 +- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/ChangeLog b/ChangeLog index 6b4befb8f..e3b7750f6 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +version + +Extractors +* [youku] Fix extraction; update ccode (#14815) + + version 2017.11.15 Core diff --git a/youtube_dl/extractor/youku.py b/youtube_dl/extractor/youku.py index 0c4bc2eda..6822a30bc 100644 --- a/youtube_dl/extractor/youku.py +++ b/youtube_dl/extractor/youku.py @@ -154,7 +154,7 @@ class YoukuIE(InfoExtractor): # request basic data basic_data_params = { 'vid': video_id, - 'ccode': '0402' if 'tudou.com' in url else '0401', + 'ccode': '0502', 'client_ip': '192.168.1.1', 'utid': cna, 'client_ts': time.time() / 1000, From 2688664762f406b1ba2913af25ee3a2d2ba58038 Mon Sep 17 00:00:00 2001 From: enigmaquip Date: Wed, 22 Nov 2017 16:39:11 -0700 Subject: [PATCH 015/177] [culturebox] Fix extraction (closes #14827) --- youtube_dl/extractor/francetv.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/francetv.py b/youtube_dl/extractor/francetv.py index 2bcbb3e39..037e538cc 100644 --- a/youtube_dl/extractor/francetv.py +++ b/youtube_dl/extractor/francetv.py @@ -363,6 +363,6 @@ class CultureboxIE(FranceTVBaseInfoExtractor): raise ExtractorError('Video %s is not available' % name, expected=True) video_id, catalogue = self._search_regex( - r'"http://videos\.francetv\.fr/video/([^@]+@[^"]+)"', webpage, 'video id').split('@') + r'"https?://videos\.francetv\.fr/video/([^@]+@[^"]+)"', webpage, 'video id').split('@') return self._extract_video(video_id, catalogue) From 939be9adfe810ada7dbd5e9032bcfec19fafa14b Mon Sep 17 00:00:00 2001 From: John Hawkinson Date: Wed, 22 Nov 2017 11:47:02 -0500 Subject: [PATCH 016/177] [JWPlatform] Support iframes Support content.jwplatform... src attributes inside