From 1ec0696196fc23a2484b4687d54e73e704036672 Mon Sep 17 00:00:00 2001 From: JChris246 Date: Tue, 5 Feb 2019 14:46:52 -0400 Subject: [PATCH] [SpankBangPlaylist] Add new extractor --- youtube_dl/extractor/extractors.py | 5 ++++- youtube_dl/extractor/spankbang.py | 33 +++++++++++++++++++++++++++++- 2 files changed, 36 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 693c16e49..d7685cd87 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1058,7 +1058,10 @@ from .southpark import ( SouthParkEsIE, SouthParkNlIE ) -from .spankbang import SpankBangIE +from .spankbang import ( + SpankBangIE, + SpankBangPlaylistIE, +) from .spankwire import SpankwireIE from .spiegel import SpiegelIE, SpiegelArticleIE from .spiegeltv import SpiegeltvIE diff --git a/youtube_dl/extractor/spankbang.py b/youtube_dl/extractor/spankbang.py index 67500b69c..067c702ef 100644 --- a/youtube_dl/extractor/spankbang.py +++ b/youtube_dl/extractor/spankbang.py @@ -12,7 +12,7 @@ from ..utils import ( class SpankBangIE(InfoExtractor): - _VALID_URL = r'https?://(?:(?:www|m|[a-z]{2})\.)?spankbang\.com/(?P[\da-z]+)/video' + _VALID_URL = r'https?://(?:(?:www|m|[a-z]{2})\.)?spankbang\.com/(?P[\da-z-]+)/(?:video|playlist)' _TESTS = [{ 'url': 'http://spankbang.com/3vvn/video/fantasy+solo', 'md5': '1cc433e1d6aa14bc376535b8679302f7', @@ -94,3 +94,34 @@ class SpankBangIE(InfoExtractor): 'formats': formats, 'age_limit': age_limit, } + + +class SpankBangPlaylistIE(InfoExtractor): + _VALID_URL = r'https?://(?:(?:www|m|[a-z]{2})\.)?spankbang\.com/(?P[\da-z]+)/playlist' + _TEST = { + 'url': 'https://spankbang.com/ug0k/playlist/big+ass+titties', + 'info_dict': { + 'id': 'ug0k', + 'title': 'Big Ass Titties playlist', + }, + 'playlist_mincount': 2, + } + + def _extract_entries(self, webpage): + return [ + self.url_result( + 'http://www.%s/%s' % ('spankbang.com', video_url), + SpankBangIE.ie_key()) + for video_url in re.findall( + r'href="/?([\da-z-]+/playlist/[^"]+)', webpage) + ] + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + playlist_id = mobj.group('id') + webpage = self._download_webpage(url, playlist_id) + + entries = self._extract_entries(webpage) + title = self._search_regex(r'

(.+)

', webpage, 'playlist_title') + + return self.playlist_result(entries, playlist_id, title)