[SpankBangPlaylistIE] change single video regex, used urljoin, eliminate duplicate video capture, playlist title is no longer fatal, used _match_id
This commit is contained in:
parent
1ec0696196
commit
1a7f6314b6
@ -8,11 +8,12 @@ from ..utils import (
|
|||||||
parse_duration,
|
parse_duration,
|
||||||
parse_resolution,
|
parse_resolution,
|
||||||
str_to_int,
|
str_to_int,
|
||||||
|
urljoin,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class SpankBangIE(InfoExtractor):
|
class SpankBangIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:(?:www|m|[a-z]{2})\.)?spankbang\.com/(?P<id>[\da-z-]+)/(?:video|playlist)'
|
_VALID_URL = r'https?://(?:(?:www|m|[a-z]{2})\.)?spankbang\.com/(?P<id>[\da-z]+)/video'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://spankbang.com/3vvn/video/fantasy+solo',
|
'url': 'http://spankbang.com/3vvn/video/fantasy+solo',
|
||||||
'md5': '1cc433e1d6aa14bc376535b8679302f7',
|
'md5': '1cc433e1d6aa14bc376535b8679302f7',
|
||||||
@ -107,21 +108,27 @@ class SpankBangPlaylistIE(InfoExtractor):
|
|||||||
'playlist_mincount': 2,
|
'playlist_mincount': 2,
|
||||||
}
|
}
|
||||||
|
|
||||||
def _extract_entries(self, webpage):
|
def _extract_entries(self, webpage, id):
|
||||||
return [
|
video_items = re.findall(r'<div[^>]+class=[\'"].*?video-item[^>]*>\s*(.+?)>', webpage)
|
||||||
self.url_result(
|
|
||||||
'http://www.%s/%s' % ('spankbang.com', video_url),
|
entries = []
|
||||||
SpankBangIE.ie_key())
|
if video_items:
|
||||||
for video_url in re.findall(
|
for div in video_items:
|
||||||
r'href="/?([\da-z-]+/playlist/[^"]+)', webpage)
|
page_url = self._search_regex(
|
||||||
]
|
r'href="/?(' + id + '-[\da-z]+/playlist/[^"]+)', div, 'page url', default=None)
|
||||||
|
|
||||||
|
if page_url:
|
||||||
|
page = self._download_webpage(urljoin('http://spankbang.com', page_url), id)
|
||||||
|
canonical_url = self._search_regex(
|
||||||
|
r'link rel="canonical" href="(.+?)"', page, 'canonical_url')
|
||||||
|
entries.append(self.url_result(canonical_url, SpankBangIE.ie_key()))
|
||||||
|
return entries
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
playlist_id = self._match_id(url)
|
||||||
playlist_id = mobj.group('id')
|
|
||||||
webpage = self._download_webpage(url, playlist_id)
|
webpage = self._download_webpage(url, playlist_id)
|
||||||
|
|
||||||
entries = self._extract_entries(webpage)
|
entries = self._extract_entries(webpage, playlist_id)
|
||||||
title = self._search_regex(r'<h1>(.+)</h1>', webpage, 'playlist_title')
|
title = self._search_regex(r'<h1>(.+)</h1>', webpage, 'playlist_title', fatal=False)
|
||||||
|
|
||||||
return self.playlist_result(entries, playlist_id, title)
|
return self.playlist_result(entries, playlist_id, title)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user