Fix ruleporn, fixes #15344

The videofile is in the html now. Simplifies the extractor a lot. Might
be part of solution for #20323.
This commit is contained in:
charon2019 2019-03-12 15:56:24 +01:00
parent e7e3ec828b
commit 57b48c246f

View File

@ -1,9 +1,9 @@
from __future__ import unicode_literals from __future__ import unicode_literals
from .nuevo import NuevoBaseIE from .common import InfoExtractor
class RulePornIE(NuevoBaseIE): class RulePornIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?ruleporn\.com/(?:[^/?#&]+/)*(?P<id>[^/?#&]+)' _VALID_URL = r'https?://(?:www\.)?ruleporn\.com/(?:[^/?#&]+/)*(?P<id>[^/?#&]+)'
_TEST = { _TEST = {
'url': 'http://ruleporn.com/brunette-nympho-chick-takes-her-boyfriend-in-every-angle/', 'url': 'http://ruleporn.com/brunette-nympho-chick-takes-her-boyfriend-in-every-angle/',
@ -24,21 +24,19 @@ class RulePornIE(NuevoBaseIE):
webpage = self._download_webpage(url, display_id) webpage = self._download_webpage(url, display_id)
video_id = self._search_regex( url = self._search_regex(
r'lovehomeporn\.com/embed/(\d+)', webpage, 'video id') r'<source[^>]+src="(https?://media.ruleporn.com/media/videos/[a-zA-Z0-9/]+\.mp4)[^>]+>',
webpage, 'url')
title = self._search_regex( title = self._search_regex(
r'<h2[^>]+title=(["\'])(?P<url>.+?)\1', r'<h1>(.+?)</h1>',
webpage, 'title', group='url') webpage, 'title')
description = self._html_search_meta('description', webpage) description = self._html_search_meta('description', webpage)
info = self._extract_nuevo( return {
'http://lovehomeporn.com/media/nuevo/econfig.php?key=%s&rp=true' % video_id, 'id': display_id,
video_id)
info.update({
'display_id': display_id,
'title': title, 'title': title,
'description': description, 'description': description,
'age_limit': 18 'age_limit': 18,
}) 'url': url
return info }