From 0e6f914b3b40ef2ca78d82051a194faaad64dd9e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20H=C3=B6pfl?= Date: Wed, 13 Feb 2019 16:29:43 +0100 Subject: [PATCH 01/17] [vivo] Fix extraction (closes #18906) --- youtube_dl/extractor/shared.py | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/shared.py b/youtube_dl/extractor/shared.py index 931a0f70e..eade8fd9e 100644 --- a/youtube_dl/extractor/shared.py +++ b/youtube_dl/extractor/shared.py @@ -1,5 +1,7 @@ from __future__ import unicode_literals +import re + from .common import InfoExtractor from ..compat import compat_b64decode from ..utils import ( @@ -7,6 +9,7 @@ from ..utils import ( int_or_none, url_or_none, urlencode_postdata, + unescapeHTML, ) @@ -22,8 +25,7 @@ class SharedBaseIE(InfoExtractor): video_url = self._extract_video_url(webpage, video_id, url) - title = compat_b64decode(self._html_search_meta( - 'full:title', webpage, 'title')).decode('utf-8') + title = self._extract_title(webpage) filesize = int_or_none(self._html_search_meta( 'full:size', webpage, 'file size', fatal=False)) @@ -35,6 +37,10 @@ class SharedBaseIE(InfoExtractor): 'title': title, } + def _extract_title(self, webpage): + return compat_b64decode(self._html_search_meta( + 'full:title', webpage, 'title')).decode('utf-8') + class SharedIE(SharedBaseIE): IE_DESC = 'shared.sx' @@ -86,6 +92,14 @@ class VivoIE(SharedBaseIE): }, } + def _extract_title(self, webpage): + data_title = self._search_regex( + r'data-name\s*=\s*(["\'])(?P(?:(?!\1).)+)\1', webpage, + 'title', default=None, group='title') + if data_title: + return unescapeHTML(re.sub(r"\.[a-z0-9]{3,4}$", "", data_title)) + return self._og_search_title(webpage) + def _extract_video_url(self, webpage, video_id, *args): def decode_url(encoded_url): return compat_b64decode(encoded_url).decode('utf-8') From e438e8146965d2c650c1575dc97809bcc9504f88 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 23 May 2019 03:04:58 +0700 Subject: [PATCH 02/17] [vivo] Improve extraction (closes #19217) --- youtube_dl/extractor/shared.py | 31 +++++++++++++++++++++---------- 1 file changed, 21 insertions(+), 10 deletions(-) diff --git a/youtube_dl/extractor/shared.py b/youtube_dl/extractor/shared.py index eade8fd9e..ff575f592 100644 --- a/youtube_dl/extractor/shared.py +++ b/youtube_dl/extractor/shared.py @@ -1,15 +1,15 @@ from __future__ import unicode_literals -import re - from .common import InfoExtractor from ..compat import compat_b64decode from ..utils import ( + determine_ext, ExtractorError, int_or_none, + KNOWN_EXTENSIONS, + parse_filesize, url_or_none, urlencode_postdata, - unescapeHTML, ) @@ -26,8 +26,7 @@ class SharedBaseIE(InfoExtractor): video_url = self._extract_video_url(webpage, video_id, url) title = self._extract_title(webpage) - filesize = int_or_none(self._html_search_meta( - 'full:size', webpage, 'file size', fatal=False)) + filesize = int_or_none(self._extract_filesize(webpage)) return { 'id': video_id, @@ -41,6 +40,10 @@ class SharedBaseIE(InfoExtractor): return compat_b64decode(self._html_search_meta( 'full:title', webpage, 'title')).decode('utf-8') + def _extract_filesize(self, webpage): + return self._html_search_meta( + 'full:size', webpage, 'file size', fatal=False) + class SharedIE(SharedBaseIE): IE_DESC = 'shared.sx' @@ -88,19 +91,27 @@ class VivoIE(SharedBaseIE): 'id': 'd7ddda0e78', 'ext': 'mp4', 'title': 'Chicken', - 'filesize': 528031, + 'filesize': 515659, }, } def _extract_title(self, webpage): - data_title = self._search_regex( + title = self._html_search_regex( r'data-name\s*=\s*(["\'])(?P<title>(?:(?!\1).)+)\1', webpage, 'title', default=None, group='title') - if data_title: - return unescapeHTML(re.sub(r"\.[a-z0-9]{3,4}$", "", data_title)) + if title: + ext = determine_ext(title) + if ext.lower() in KNOWN_EXTENSIONS: + title = title.rpartition('.' + ext)[0] + return title return self._og_search_title(webpage) - def _extract_video_url(self, webpage, video_id, *args): + def _extract_filesize(self, webpage): + return parse_filesize(self._search_regex( + r'data-type=["\']video["\'][^>]*>Watch.*?<strong>\s*\((.+?)\)', + webpage, 'filesize', fatal=False)) + + def _extract_video_url(self, webpage, video_id, url): def decode_url(encoded_url): return compat_b64decode(encoded_url).decode('utf-8') From ea7538209468f630075d08d44ef7b0119f78d2eb Mon Sep 17 00:00:00 2001 From: smed79 <1873139+smed79@users.noreply.github.com> Date: Wed, 22 May 2019 21:30:17 +0100 Subject: [PATCH 03/17] [openload] Add support for oload.press (#21135) --- youtube_dl/extractor/openload.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/openload.py b/youtube_dl/extractor/openload.py index a8e906858..b96be6f64 100644 --- a/youtube_dl/extractor/openload.py +++ b/youtube_dl/extractor/openload.py @@ -244,7 +244,7 @@ class PhantomJSwrapper(object): class OpenloadIE(InfoExtractor): - _DOMAINS = r'(?:openload\.(?:co|io|link|pw)|oload\.(?:tv|stream|site|xyz|win|download|cloud|cc|icu|fun|club|info|pw|live|space|services)|oladblock\.(?:services|xyz|me)|openloed\.co)' + _DOMAINS = r'(?:openload\.(?:co|io|link|pw)|oload\.(?:tv|stream|site|xyz|win|download|cloud|cc|icu|fun|club|info|press|pw|live|space|services)|oladblock\.(?:services|xyz|me)|openloed\.co)' _VALID_URL = r'''(?x) https?:// (?P<host> @@ -357,6 +357,9 @@ class OpenloadIE(InfoExtractor): }, { 'url': 'https://oload.services/embed/bs1NWj1dCag/', 'only_matching': True, + }, { + 'url': 'https://oload.press/embed/drTBl1aOTvk/', + 'only_matching': True, }, { 'url': 'https://oladblock.services/f/b8NWEgkqNLI/', 'only_matching': True, From 612300a686fd83d475b7fddc17cb2ccd8ca0b5ef Mon Sep 17 00:00:00 2001 From: ealgase <mostdigitsofpi@gmail.com> Date: Wed, 22 May 2019 16:38:48 -0400 Subject: [PATCH 04/17] [novamov] Remove extractors (#21077) Sites no longer exist --- youtube_dl/extractor/extractors.py | 7 - youtube_dl/extractor/generic.py | 13 -- youtube_dl/extractor/novamov.py | 212 ----------------------------- 3 files changed, 232 deletions(-) delete mode 100644 youtube_dl/extractor/novamov.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 3037b5a45..e5aee96c2 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -772,13 +772,6 @@ from .nova import ( NovaEmbedIE, NovaIE, ) -from .novamov import ( - AuroraVidIE, - CloudTimeIE, - NowVideoIE, - VideoWeedIE, - WholeCloudIE, -) from .nowness import ( NownessIE, NownessPlaylistIE, diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 3a13c62eb..eeb0d25f6 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -2583,19 +2583,6 @@ class GenericIE(InfoExtractor): if mobj is not None: return self.url_result(mobj.group(1), 'Mpora') - # Look for embedded NovaMov-based player - mobj = re.search( - r'''(?x)<(?:pagespeed_)?iframe[^>]+?src=(["\']) - (?P<url>http://(?:(?:embed|www)\.)? - (?:novamov\.com| - nowvideo\.(?:ch|sx|eu|at|ag|co)| - videoweed\.(?:es|com)| - movshare\.(?:net|sx|ag)| - divxstage\.(?:eu|net|ch|co|at|ag)) - /embed\.php.+?)\1''', webpage) - if mobj is not None: - return self.url_result(mobj.group('url')) - # Look for embedded Facebook player facebook_urls = FacebookIE._extract_urls(webpage) if facebook_urls: diff --git a/youtube_dl/extractor/novamov.py b/youtube_dl/extractor/novamov.py deleted file mode 100644 index 829c71960..000000000 --- a/youtube_dl/extractor/novamov.py +++ /dev/null @@ -1,212 +0,0 @@ -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor -from ..compat import compat_urlparse -from ..utils import ( - ExtractorError, - NO_DEFAULT, - sanitized_Request, - urlencode_postdata, -) - - -class NovaMovIE(InfoExtractor): - IE_NAME = 'novamov' - IE_DESC = 'NovaMov' - - _VALID_URL_TEMPLATE = r'''(?x) - http:// - (?: - (?:www\.)?%(host)s/(?:file|video|mobile/\#/videos)/| - (?:(?:embed|www)\.)%(host)s/embed(?:\.php|/)?\?(?:.*?&)?\bv= - ) - (?P<id>[a-z\d]{13}) - ''' - _VALID_URL = _VALID_URL_TEMPLATE % {'host': r'novamov\.com'} - - _HOST = 'www.novamov.com' - - _FILE_DELETED_REGEX = r'This file no longer exists on our servers!</h2>' - _FILEKEY_REGEX = r'flashvars\.filekey=(?P<filekey>"?[^"]+"?);' - _TITLE_REGEX = r'(?s)<div class="v_tab blockborder rounded5" id="v_tab1">\s*<h3>([^<]+)</h3>' - _DESCRIPTION_REGEX = r'(?s)<div class="v_tab blockborder rounded5" id="v_tab1">\s*<h3>[^<]+</h3><p>([^<]+)</p>' - _URL_TEMPLATE = 'http://%s/video/%s' - - _TEST = None - - def _check_existence(self, webpage, video_id): - if re.search(self._FILE_DELETED_REGEX, webpage) is not None: - raise ExtractorError('Video %s does not exist' % video_id, expected=True) - - def _real_extract(self, url): - video_id = self._match_id(url) - - url = self._URL_TEMPLATE % (self._HOST, video_id) - - webpage = self._download_webpage( - url, video_id, 'Downloading video page') - - self._check_existence(webpage, video_id) - - def extract_filekey(default=NO_DEFAULT): - filekey = self._search_regex( - self._FILEKEY_REGEX, webpage, 'filekey', default=default) - if filekey is not default and (filekey[0] != '"' or filekey[-1] != '"'): - return self._search_regex( - r'var\s+%s\s*=\s*"([^"]+)"' % re.escape(filekey), webpage, 'filekey', default=default) - else: - return filekey - - filekey = extract_filekey(default=None) - - if not filekey: - fields = self._hidden_inputs(webpage) - post_url = self._search_regex( - r'<form[^>]+action=(["\'])(?P<url>.+?)\1', webpage, - 'post url', default=url, group='url') - if not post_url.startswith('http'): - post_url = compat_urlparse.urljoin(url, post_url) - request = sanitized_Request( - post_url, urlencode_postdata(fields)) - request.add_header('Content-Type', 'application/x-www-form-urlencoded') - request.add_header('Referer', post_url) - webpage = self._download_webpage( - request, video_id, 'Downloading continue to the video page') - self._check_existence(webpage, video_id) - - filekey = extract_filekey() - - title = self._html_search_regex(self._TITLE_REGEX, webpage, 'title') - description = self._html_search_regex(self._DESCRIPTION_REGEX, webpage, 'description', default='', fatal=False) - - api_response = self._download_webpage( - 'http://%s/api/player.api.php?key=%s&file=%s' % (self._HOST, filekey, video_id), video_id, - 'Downloading video api response') - - response = compat_urlparse.parse_qs(api_response) - - if 'error_msg' in response: - raise ExtractorError('%s returned error: %s' % (self.IE_NAME, response['error_msg'][0]), expected=True) - - video_url = response['url'][0] - - return { - 'id': video_id, - 'url': video_url, - 'title': title, - 'description': description - } - - -class WholeCloudIE(NovaMovIE): - IE_NAME = 'wholecloud' - IE_DESC = 'WholeCloud' - - _VALID_URL = NovaMovIE._VALID_URL_TEMPLATE % {'host': r'(?:wholecloud\.net|movshare\.(?:net|sx|ag))'} - - _HOST = 'www.wholecloud.net' - - _FILE_DELETED_REGEX = r'>This file no longer exists on our servers.<' - _TITLE_REGEX = r'<strong>Title:</strong> ([^<]+)</p>' - _DESCRIPTION_REGEX = r'<strong>Description:</strong> ([^<]+)</p>' - - _TEST = { - 'url': 'http://www.wholecloud.net/video/559e28be54d96', - 'md5': 'abd31a2132947262c50429e1d16c1bfd', - 'info_dict': { - 'id': '559e28be54d96', - 'ext': 'flv', - 'title': 'dissapeared image', - 'description': 'optical illusion dissapeared image magic illusion', - } - } - - -class NowVideoIE(NovaMovIE): - IE_NAME = 'nowvideo' - IE_DESC = 'NowVideo' - - _VALID_URL = NovaMovIE._VALID_URL_TEMPLATE % {'host': r'nowvideo\.(?:to|ch|ec|sx|eu|at|ag|co|li)'} - - _HOST = 'www.nowvideo.to' - - _FILE_DELETED_REGEX = r'>This file no longer exists on our servers.<' - _TITLE_REGEX = r'<h4>([^<]+)</h4>' - _DESCRIPTION_REGEX = r'</h4>\s*<p>([^<]+)</p>' - - _TEST = { - 'url': 'http://www.nowvideo.sx/video/f1d6fce9a968b', - 'md5': '12c82cad4f2084881d8bc60ee29df092', - 'info_dict': { - 'id': 'f1d6fce9a968b', - 'ext': 'flv', - 'title': 'youtubedl test video BaWjenozKc', - 'description': 'Description', - }, - } - - -class VideoWeedIE(NovaMovIE): - IE_NAME = 'videoweed' - IE_DESC = 'VideoWeed' - - _VALID_URL = NovaMovIE._VALID_URL_TEMPLATE % {'host': r'videoweed\.(?:es|com)'} - - _HOST = 'www.videoweed.es' - - _FILE_DELETED_REGEX = r'>This file no longer exists on our servers.<' - _TITLE_REGEX = r'<h1 class="text_shadow">([^<]+)</h1>' - _URL_TEMPLATE = 'http://%s/file/%s' - - _TEST = { - 'url': 'http://www.videoweed.es/file/b42178afbea14', - 'md5': 'abd31a2132947262c50429e1d16c1bfd', - 'info_dict': { - 'id': 'b42178afbea14', - 'ext': 'flv', - 'title': 'optical illusion dissapeared image magic illusion', - 'description': '' - }, - } - - -class CloudTimeIE(NovaMovIE): - IE_NAME = 'cloudtime' - IE_DESC = 'CloudTime' - - _VALID_URL = NovaMovIE._VALID_URL_TEMPLATE % {'host': r'cloudtime\.to'} - - _HOST = 'www.cloudtime.to' - - _FILE_DELETED_REGEX = r'>This file no longer exists on our servers.<' - _TITLE_REGEX = r'<div[^>]+class=["\']video_det["\'][^>]*>\s*<strong>([^<]+)</strong>' - - _TEST = None - - -class AuroraVidIE(NovaMovIE): - IE_NAME = 'auroravid' - IE_DESC = 'AuroraVid' - - _VALID_URL = NovaMovIE._VALID_URL_TEMPLATE % {'host': r'auroravid\.to'} - - _HOST = 'www.auroravid.to' - - _FILE_DELETED_REGEX = r'This file no longer exists on our servers!<' - - _TESTS = [{ - 'url': 'http://www.auroravid.to/video/4rurhn9x446jj', - 'md5': '7205f346a52bbeba427603ba10d4b935', - 'info_dict': { - 'id': '4rurhn9x446jj', - 'ext': 'flv', - 'title': 'search engine optimization', - 'description': 'search engine optimization is used to rank the web page in the google search engine' - }, - 'skip': '"Invalid token" errors abound (in web interface as well as youtube-dl, there is nothing we can do about it.)' - }, { - 'url': 'http://www.auroravid.to/embed/?v=4rurhn9x446jj', - 'only_matching': True, - }] From 186d185b6ecdee102866777121d6abe9ed7f59ba Mon Sep 17 00:00:00 2001 From: Malte Kiefer <malte.kiefer@mailgermania.de> Date: Wed, 22 May 2019 22:46:20 +0200 Subject: [PATCH 05/17] [streamcloud] Reduce waiting time to 6 seconds (#21092) --- youtube_dl/extractor/streamcloud.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/streamcloud.py b/youtube_dl/extractor/streamcloud.py index 4a410611d..b97bb4374 100644 --- a/youtube_dl/extractor/streamcloud.py +++ b/youtube_dl/extractor/streamcloud.py @@ -45,7 +45,7 @@ class StreamcloudIE(InfoExtractor): value="([^"]*)" ''', orig_webpage) - self._sleep(12, video_id) + self._sleep(6, video_id) webpage = self._download_webpage( url, video_id, data=urlencode_postdata(fields), headers={ From bbf1defe586f4b4cb7b35aa3da67c5dc786d9a2c Mon Sep 17 00:00:00 2001 From: Georgi Saev <georgi.saev@gmail.com> Date: Wed, 22 May 2019 23:51:50 +0300 Subject: [PATCH 06/17] [bitchute] Fix uploader extraction (#21076) --- youtube_dl/extractor/bitchute.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/bitchute.py b/youtube_dl/extractor/bitchute.py index 4f39424f5..1d69dafbd 100644 --- a/youtube_dl/extractor/bitchute.py +++ b/youtube_dl/extractor/bitchute.py @@ -65,8 +65,9 @@ class BitChuteIE(InfoExtractor): webpage, default=None) or self._html_search_meta( 'twitter:image:src', webpage, 'thumbnail') uploader = self._html_search_regex( - r'(?s)<p\b[^>]+\bclass=["\']video-author[^>]+>(.+?)</p>', webpage, - 'uploader', fatal=False) + (r'(?s)<div class=["\']channel-banner.*?<p\b[^>]+\bclass=["\']name[^>]+>(.+?)</p>', + r'(?s)<p\b[^>]+\bclass=["\']video-author[^>]+>(.+?)</p>'), + webpage, 'uploader', fatal=False) return { 'id': video_id, From 2c53c0ebc63b7fbb36d05491d5d3796d3e511e26 Mon Sep 17 00:00:00 2001 From: NRTICN <50528161+NRTICN@users.noreply.github.com> Date: Wed, 22 May 2019 20:56:54 +0000 Subject: [PATCH 07/17] [pornhub] Use https (#21061) --- youtube_dl/extractor/pornhub.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/pornhub.py b/youtube_dl/extractor/pornhub.py index bf8f0be88..cb59d526f 100644 --- a/youtube_dl/extractor/pornhub.py +++ b/youtube_dl/extractor/pornhub.py @@ -170,7 +170,7 @@ class PornHubIE(PornHubBaseIE): def dl_webpage(platform): self._set_cookie(host, 'platform', platform) return self._download_webpage( - 'http://www.%s/view_video.php?viewkey=%s' % (host, video_id), + 'https://www.%s/view_video.php?viewkey=%s' % (host, video_id), video_id, 'Downloading %s webpage' % platform) webpage = dl_webpage('pc') From afd4985f72a6641907aee1cd0b4b42da524b0ff4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 23 May 2019 06:06:49 +0700 Subject: [PATCH 08/17] [travis] Force dist to Ubuntu Trusty by default According to https://blog.travis-ci.com/2019-04-15-xenial-default-build-environment Ubuntu Xenial is now default, but it lacks python 2.6, 3.2 and 3.3 support needed by tests --- .travis.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.travis.yml b/.travis.yml index 82e81d078..6d16c2955 100644 --- a/.travis.yml +++ b/.travis.yml @@ -9,6 +9,7 @@ python: - "3.6" - "pypy" - "pypy3" +dist: trusty env: - YTDL_TEST_SET=core - YTDL_TEST_SET=download From 9c5f2988b91609d49f7010ac580376f42e01d4f1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 23 May 2019 23:38:01 +0700 Subject: [PATCH 09/17] [criterion] Remove extractor (closes #21195) --- youtube_dl/extractor/criterion.py | 39 ------------------------------ youtube_dl/extractor/extractors.py | 1 - 2 files changed, 40 deletions(-) delete mode 100644 youtube_dl/extractor/criterion.py diff --git a/youtube_dl/extractor/criterion.py b/youtube_dl/extractor/criterion.py deleted file mode 100644 index f7815b905..000000000 --- a/youtube_dl/extractor/criterion.py +++ /dev/null @@ -1,39 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -from .common import InfoExtractor - - -class CriterionIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?criterion\.com/films/(?P<id>[0-9]+)-.+' - _TEST = { - 'url': 'http://www.criterion.com/films/184-le-samourai', - 'md5': 'bc51beba55685509883a9a7830919ec3', - 'info_dict': { - 'id': '184', - 'ext': 'mp4', - 'title': 'Le Samouraï', - 'description': 'md5:a2b4b116326558149bef81f76dcbb93f', - 'thumbnail': r're:^https?://.*\.jpg$', - } - } - - def _real_extract(self, url): - video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - - final_url = self._search_regex( - r'so\.addVariable\("videoURL", "(.+?)"\)\;', webpage, 'video url') - title = self._og_search_title(webpage) - description = self._html_search_meta('description', webpage) - thumbnail = self._search_regex( - r'so\.addVariable\("thumbnailURL", "(.+?)"\)\;', - webpage, 'thumbnail url') - - return { - 'id': video_id, - 'url': final_url, - 'title': title, - 'description': description, - 'thumbnail': thumbnail, - } diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index e5aee96c2..7705f9bdd 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -240,7 +240,6 @@ from .condenast import CondeNastIE from .corus import CorusIE from .cracked import CrackedIE from .crackle import CrackleIE -from .criterion import CriterionIE from .crooksandliars import CrooksAndLiarsIE from .crunchyroll import ( CrunchyrollIE, From 8af49fc276b2cf2154b9342de4b4cd66f9d17af9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 23 May 2019 23:48:06 +0700 Subject: [PATCH 10/17] [pornflip] Remove extractor --- youtube_dl/extractor/extractors.py | 1 - youtube_dl/extractor/pornflip.py | 101 ----------------------------- 2 files changed, 102 deletions(-) delete mode 100644 youtube_dl/extractor/pornflip.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 7705f9bdd..eb5efd1e8 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -888,7 +888,6 @@ from .polskieradio import ( from .popcorntv import PopcornTVIE from .porn91 import Porn91IE from .porncom import PornComIE -from .pornflip import PornFlipIE from .pornhd import PornHdIE from .pornhub import ( PornHubIE, diff --git a/youtube_dl/extractor/pornflip.py b/youtube_dl/extractor/pornflip.py deleted file mode 100644 index 025985fbc..000000000 --- a/youtube_dl/extractor/pornflip.py +++ /dev/null @@ -1,101 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -from .common import InfoExtractor -from ..compat import ( - compat_parse_qs, - compat_str, -) -from ..utils import ( - int_or_none, - try_get, - unified_timestamp, -) - - -class PornFlipIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?pornflip\.com/(?:v|embed)/(?P<id>[^/?#&]+)' - _TESTS = [{ - 'url': 'https://www.pornflip.com/v/wz7DfNhMmep', - 'md5': '98c46639849145ae1fd77af532a9278c', - 'info_dict': { - 'id': 'wz7DfNhMmep', - 'ext': 'mp4', - 'title': '2 Amateurs swallow make his dream cumshots true', - 'thumbnail': r're:^https?://.*\.jpg$', - 'duration': 112, - 'timestamp': 1481655502, - 'upload_date': '20161213', - 'uploader_id': '106786', - 'uploader': 'figifoto', - 'view_count': int, - 'age_limit': 18, - } - }, { - 'url': 'https://www.pornflip.com/embed/wz7DfNhMmep', - 'only_matching': True, - }, { - 'url': 'https://www.pornflip.com/v/EkRD6-vS2-s', - 'only_matching': True, - }, { - 'url': 'https://www.pornflip.com/embed/EkRD6-vS2-s', - 'only_matching': True, - }, { - 'url': 'https://www.pornflip.com/v/NG9q6Pb_iK8', - 'only_matching': True, - }] - - def _real_extract(self, url): - video_id = self._match_id(url) - - webpage = self._download_webpage( - 'https://www.pornflip.com/v/%s' % video_id, video_id) - - flashvars = compat_parse_qs(self._search_regex( - r'<embed[^>]+flashvars=(["\'])(?P<flashvars>(?:(?!\1).)+)\1', - webpage, 'flashvars', group='flashvars')) - - title = flashvars['video_vars[title]'][0] - - def flashvar(kind): - return try_get( - flashvars, lambda x: x['video_vars[%s]' % kind][0], compat_str) - - formats = [] - for key, value in flashvars.items(): - if not (value and isinstance(value, list)): - continue - format_url = value[0] - if key == 'video_vars[hds_manifest]': - formats.extend(self._extract_mpd_formats( - format_url, video_id, mpd_id='dash', fatal=False)) - continue - height = self._search_regex( - r'video_vars\[video_urls\]\[(\d+)', key, 'height', default=None) - if not height: - continue - formats.append({ - 'url': format_url, - 'format_id': 'http-%s' % height, - 'height': int_or_none(height), - }) - self._sort_formats(formats) - - uploader = self._html_search_regex( - (r'<span[^>]+class="name"[^>]*>\s*<a[^>]+>\s*<strong>(?P<uploader>[^<]+)', - r'<meta[^>]+content=(["\'])[^>]*\buploaded by (?P<uploader>.+?)\1'), - webpage, 'uploader', fatal=False, group='uploader') - - return { - 'id': video_id, - 'formats': formats, - 'title': title, - 'thumbnail': flashvar('big_thumb'), - 'duration': int_or_none(flashvar('duration')), - 'timestamp': unified_timestamp(self._html_search_meta( - 'uploadDate', webpage, 'timestamp')), - 'uploader_id': flashvar('author_id'), - 'uploader': uploader, - 'view_count': int_or_none(flashvar('views')), - 'age_limit': 18, - } From f856816b940d95e995c2b9995d01097ab144a1af Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 23 May 2019 23:52:11 +0700 Subject: [PATCH 11/17] [extractor/common] Strip src attribute for HTML5 entries code (closes #18485, closes #21169) --- youtube_dl/extractor/common.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 937237b3f..9c3e9eec6 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -67,6 +67,7 @@ from ..utils import ( sanitized_Request, sanitize_filename, str_or_none, + strip_or_none, unescapeHTML, unified_strdate, unified_timestamp, @@ -2480,7 +2481,7 @@ class InfoExtractor(object): 'subtitles': {}, } media_attributes = extract_attributes(media_tag) - src = media_attributes.get('src') + src = strip_or_none(media_attributes.get('src')) if src: _, formats = _media_formats(src, media_type) media_info['formats'].extend(formats) @@ -2490,7 +2491,7 @@ class InfoExtractor(object): s_attr = extract_attributes(source_tag) # data-video-src and data-src are non standard but seen # several times in the wild - src = dict_get(s_attr, ('src', 'data-video-src', 'data-src')) + src = strip_or_none(dict_get(s_attr, ('src', 'data-video-src', 'data-src'))) if not src: continue f = parse_content_type(s_attr.get('type')) @@ -2533,7 +2534,7 @@ class InfoExtractor(object): track_attributes = extract_attributes(track_tag) kind = track_attributes.get('kind') if not kind or kind in ('subtitles', 'captions'): - src = track_attributes.get('src') + src = strip_or_none(track_attributes.get('src')) if not src: continue lang = track_attributes.get('srclang') or track_attributes.get('lang') or track_attributes.get('label') From 53cd37bac50e7a927deba5b67a4412301b96230d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 23 May 2019 23:58:35 +0700 Subject: [PATCH 12/17] [utils] Improve strip_or_none --- test/test_utils.py | 13 +++++++++++++ youtube_dl/utils.py | 4 ++-- 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/test/test_utils.py b/test/test_utils.py index 9ef0e422b..71980b3fc 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -73,6 +73,7 @@ from youtube_dl.utils import ( smuggle_url, str_to_int, strip_jsonp, + strip_or_none, timeconvert, unescapeHTML, unified_strdate, @@ -752,6 +753,18 @@ class TestUtil(unittest.TestCase): d = json.loads(stripped) self.assertEqual(d, {'status': 'success'}) + def test_strip_or_none(self): + self.assertEqual(strip_or_none(' abc'), 'abc') + self.assertEqual(strip_or_none('abc '), 'abc') + self.assertEqual(strip_or_none(' abc '), 'abc') + self.assertEqual(strip_or_none('\tabc\t'), 'abc') + self.assertEqual(strip_or_none('\n\tabc\n\t'), 'abc') + self.assertEqual(strip_or_none('abc'), 'abc') + self.assertEqual(strip_or_none(''), '') + self.assertEqual(strip_or_none(None), None) + self.assertEqual(strip_or_none(42), None) + self.assertEqual(strip_or_none([]), None) + def test_uppercase_escape(self): self.assertEqual(uppercase_escape('aä'), 'aä') self.assertEqual(uppercase_escape('\\U0001d550'), '𝕐') diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 9be9b2e76..ead9bd862 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -1951,8 +1951,8 @@ def bool_or_none(v, default=None): return v if isinstance(v, bool) else default -def strip_or_none(v): - return None if v is None else v.strip() +def strip_or_none(v, default=None): + return v.strip() if isinstance(v, compat_str) else default def url_or_none(url): From 11ec06de7f01bced1f4f4b6484e5190cbb9ed9e3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 24 May 2019 00:35:46 +0700 Subject: [PATCH 13/17] [24video] Add support for 24video.site (closes #21193) --- youtube_dl/extractor/twentyfourvideo.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/twentyfourvideo.py b/youtube_dl/extractor/twentyfourvideo.py index 4b3b3e705..d16f55500 100644 --- a/youtube_dl/extractor/twentyfourvideo.py +++ b/youtube_dl/extractor/twentyfourvideo.py @@ -14,7 +14,7 @@ from ..utils import ( class TwentyFourVideoIE(InfoExtractor): IE_NAME = '24video' - _VALID_URL = r'https?://(?P<host>(?:www\.)?24video\.(?:net|me|xxx|sexy?|tube|adult))/(?:video/(?:view|xml)/|player/new24_play\.swf\?id=)(?P<id>\d+)' + _VALID_URL = r'https?://(?P<host>(?:www\.)?24video\.(?:net|me|xxx|sexy?|tube|adult|site))/(?:video/(?:view|xml)/|player/new24_play\.swf\?id=)(?P<id>\d+)' _TESTS = [{ 'url': 'http://www.24video.net/video/view/1044982', @@ -42,6 +42,9 @@ class TwentyFourVideoIE(InfoExtractor): }, { 'url': 'http://www.24video.tube/video/view/2363750', 'only_matching': True, + }, { + 'url': 'https://www.24video.site/video/view/2640421', + 'only_matching': True, }] def _real_extract(self, url): From f4cc2ca503239aadd5cb75f83cb873bc5816dfdf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 24 May 2019 00:38:06 +0700 Subject: [PATCH 14/17] [24video] Add support for porno.24video.net (closes #21194) --- youtube_dl/extractor/twentyfourvideo.py | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/twentyfourvideo.py b/youtube_dl/extractor/twentyfourvideo.py index d16f55500..1d66eeaff 100644 --- a/youtube_dl/extractor/twentyfourvideo.py +++ b/youtube_dl/extractor/twentyfourvideo.py @@ -14,7 +14,18 @@ from ..utils import ( class TwentyFourVideoIE(InfoExtractor): IE_NAME = '24video' - _VALID_URL = r'https?://(?P<host>(?:www\.)?24video\.(?:net|me|xxx|sexy?|tube|adult|site))/(?:video/(?:view|xml)/|player/new24_play\.swf\?id=)(?P<id>\d+)' + _VALID_URL = r'''(?x) + https?:// + (?P<host> + (?:(?:www|porno)\.)?24video\. + (?:net|me|xxx|sexy?|tube|adult|site) + )/ + (?: + video/(?:(?:view|xml)/)?| + player/new24_play\.swf\?id= + ) + (?P<id>\d+) + ''' _TESTS = [{ 'url': 'http://www.24video.net/video/view/1044982', @@ -45,6 +56,9 @@ class TwentyFourVideoIE(InfoExtractor): }, { 'url': 'https://www.24video.site/video/view/2640421', 'only_matching': True, + }, { + 'url': 'https://porno.24video.net/video/2640421-vsya-takaya-gibkaya-i-v-masle', + 'only_matching': True, }] def _real_extract(self, url): From 3fe774722b0855d572b6885ff20fb6f4f96f3e08 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Stefan=20P=C3=B6schel?= <github@basicmaster.de> Date: Sun, 19 May 2019 18:11:17 +0200 Subject: [PATCH 15/17] [srgssrplay] Add support for popupvideoplayer URLs --- youtube_dl/extractor/srgssr.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/srgssr.py b/youtube_dl/extractor/srgssr.py index bb73eb1d5..ff1b67876 100644 --- a/youtube_dl/extractor/srgssr.py +++ b/youtube_dl/extractor/srgssr.py @@ -106,7 +106,7 @@ class SRGSSRIE(InfoExtractor): class SRGSSRPlayIE(InfoExtractor): IE_DESC = 'srf.ch, rts.ch, rsi.ch, rtr.ch and swissinfo.ch play sites' - _VALID_URL = r'https?://(?:(?:www|play)\.)?(?P<bu>srf|rts|rsi|rtr|swissinfo)\.ch/play/(?:tv|radio)/[^/]+/(?P<type>video|audio)/[^?]+\?id=(?P<id>[0-9a-f\-]{36}|\d+)' + _VALID_URL = r'https?://(?:(?:www|play)\.)?(?P<bu>srf|rts|rsi|rtr|swissinfo)\.ch/play/(?:tv|radio)/(?:[^/]+/|popup)(?P<type>video|audio)(?:/[^?]+|player)\?id=(?P<id>[0-9a-f\-]{36}|\d+)' _TESTS = [{ 'url': 'http://www.srf.ch/play/tv/10vor10/video/snowden-beantragt-asyl-in-russland?id=28e1a57d-5b76-4399-8ab3-9097f071e6c5', @@ -163,6 +163,17 @@ class SRGSSRPlayIE(InfoExtractor): # m3u8 download 'skip_download': True, } + }, { + 'url': 'https://www.srf.ch/play/tv/popupvideoplayer?id=c4dba0ca-e75b-43b2-a34f-f708a4932e01', + 'md5': 'f6247aa7c905b81c9ba7f50fb22e2fbd', + 'info_dict': { + 'id': 'c4dba0ca-e75b-43b2-a34f-f708a4932e01', + 'ext': 'mp4', + 'upload_date': '20190122', + 'title': 'Erster Selfie-Stick (1983)', + 'description': 'md5:23a6b40024e583137e4137f5946543c1', + 'timestamp': 1548155133, + } }] def _real_extract(self, url): From 25b83c2a0e29c75372f0ce26d2b4ecf493e8b28c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 24 May 2019 00:43:22 +0700 Subject: [PATCH 16/17] [srgssrplay] Improve _VALID_URL (closes #21155) --- youtube_dl/extractor/srgssr.py | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/youtube_dl/extractor/srgssr.py b/youtube_dl/extractor/srgssr.py index ff1b67876..170dce87f 100644 --- a/youtube_dl/extractor/srgssr.py +++ b/youtube_dl/extractor/srgssr.py @@ -106,7 +106,16 @@ class SRGSSRIE(InfoExtractor): class SRGSSRPlayIE(InfoExtractor): IE_DESC = 'srf.ch, rts.ch, rsi.ch, rtr.ch and swissinfo.ch play sites' - _VALID_URL = r'https?://(?:(?:www|play)\.)?(?P<bu>srf|rts|rsi|rtr|swissinfo)\.ch/play/(?:tv|radio)/(?:[^/]+/|popup)(?P<type>video|audio)(?:/[^?]+|player)\?id=(?P<id>[0-9a-f\-]{36}|\d+)' + _VALID_URL = r'''(?x) + https?:// + (?:(?:www|play)\.)? + (?P<bu>srf|rts|rsi|rtr|swissinfo)\.ch/play/(?:tv|radio)/ + (?: + [^/]+/(?P<type>video|audio)/[^?]+| + popup(?P<type_2>video|audio)player + ) + \?id=(?P<id>[0-9a-f\-]{36}|\d+) + ''' _TESTS = [{ 'url': 'http://www.srf.ch/play/tv/10vor10/video/snowden-beantragt-asyl-in-russland?id=28e1a57d-5b76-4399-8ab3-9097f071e6c5', @@ -165,18 +174,13 @@ class SRGSSRPlayIE(InfoExtractor): } }, { 'url': 'https://www.srf.ch/play/tv/popupvideoplayer?id=c4dba0ca-e75b-43b2-a34f-f708a4932e01', - 'md5': 'f6247aa7c905b81c9ba7f50fb22e2fbd', - 'info_dict': { - 'id': 'c4dba0ca-e75b-43b2-a34f-f708a4932e01', - 'ext': 'mp4', - 'upload_date': '20190122', - 'title': 'Erster Selfie-Stick (1983)', - 'description': 'md5:23a6b40024e583137e4137f5946543c1', - 'timestamp': 1548155133, - } + 'only_matching': True, }] def _real_extract(self, url): - bu, media_type, media_id = re.match(self._VALID_URL, url).groups() + mobj = re.match(self._VALID_URL, url) + bu = mobj.group('bu') + media_type = mobj.group('type') or mobj.group('type_2') + media_id = mobj.group('id') # other info can be extracted from url + '&layout=json' return self.url_result('srgssr:%s:%s:%s' % (bu[:3], media_type, media_id), 'SRGSSR') From 0d297518904f8537e8103052d19f0940a272413d Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Sat, 25 May 2019 23:14:47 +0100 Subject: [PATCH 17/17] [youtube] improve DRM protected videos detection(#1774) --- youtube_dl/extractor/youtube.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 06005f8d2..5f1957a59 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1789,9 +1789,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor): raise ExtractorError( 'YouTube said: %s' % unavailable_message, expected=True, video_id=video_id) - if video_info.get('license_info'): - raise ExtractorError('This video is DRM protected.', expected=True) - video_details = try_get( player_response, lambda x: x['videoDetails'], dict) or {} @@ -1927,7 +1924,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): formats = [] for url_data_str in encoded_url_map.split(','): url_data = compat_parse_qs(url_data_str) - if 'itag' not in url_data or 'url' not in url_data: + if 'itag' not in url_data or 'url' not in url_data or url_data.get('drm_families'): continue stream_type = int_or_none(try_get(url_data, lambda x: x['stream_type'][0])) # Unsupported FORMAT_STREAM_TYPE_OTF @@ -2323,6 +2320,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor): '"token" parameter not in video info for unknown reason', video_id=video_id) + if not formats and (video_info.get('license_info') or try_get(player_response, lambda x: x['streamingData']['licenseInfos'])): + raise ExtractorError('This video is DRM protected.', expected=True) + self._sort_formats(formats) self.mark_watched(video_id, video_info, player_response)