[Tubepatrol] Add new extractor

This commit is contained in:
Parmjit Virk 2017-06-05 03:26:36 -05:00
parent 537191826f
commit 0358596360
2 changed files with 89 additions and 1 deletions

View File

@ -1020,6 +1020,7 @@ from .traileraddict import TrailerAddictIE
from .trilulilu import TriluliluIE from .trilulilu import TriluliluIE
from .trutv import TruTVIE from .trutv import TruTVIE
from .tube8 import Tube8IE from .tube8 import Tube8IE
from .tubepatrol import TubepatrolIE
from .tubitv import TubiTvIE from .tubitv import TubiTvIE
from .tumblr import TumblrIE from .tumblr import TumblrIE
from .tunein import ( from .tunein import (

View File

@ -0,0 +1,87 @@
# coding: utf-8
from __future__ import unicode_literals
from ..utils import ExtractorError
from .common import InfoExtractor
import re
class TubepatrolIE(InfoExtractor):
# i.e. http://tubepatrol.sex/to/767066/plump-asian-loves-fucking-and-sucking.html
_VALID_URL = r'http?://(?:www\.)?tubepatrol\.sex/[^/]+/(?P<id>\d+)/(?P<display_id>[^/]+)\.html'
_TESTS = [
{
# MPEG-4 video format
'url': 'http://tubepatrol.sex/to/555439/ani-black-fox-new-czech-anal-slut-legalporno-trailer.html',
'info_dict': {
'id': '555439',
'display_id': 'ani-black-fox-new-czech-anal-slut-legalporno-trailer',
'ext': 'mp4',
'title': 'Ani Black Fox New Czech Anal Slut [legalporno Trailer]',
},
},
{
# Flash video format
'url': 'http://tubepatrol.sex/to/3934608/ad4x-video-dp-de-kelly-lee-trailer-hd-porn-quebec.html',
'info_dict': {
'id': '3934608',
'display_id': 'ad4x-video-dp-de-kelly-lee-trailer-hd-porn-quebec',
'ext': 'flv',
'title': 'AD4X Video - DP De Kelly Lee Trailer HD - Porn Quebec',
},
},
]
def _real_extract(self, url):
# Basic extractor implementation - Video ID, Display ID, Title, URL
# IDs
mobj = re.match(self._VALID_URL, url)
try:
video_id = mobj.group('id')
display_id = mobj.group('display_id')
except Exception as inst:
raise ExtractorError('%s said: %s' % (self.IE_NAME, "The video or display ID could not be extracted: %s" % inst), expected=True)
# get the webpage source code
webpage = self._download_webpage(url, video_id)
# Title
# first try the generic header text
video_title = self._html_search_regex(r'<h1>(.+?)</h1>', webpage, 'video_title', default=None)
if video_title is None:
# fallback to the link text provided for embed
video_title = self._html_search_regex(r'<a\shref="%s">(.+?)</a>' % url, webpage, 'video_title', default=None)
if video_title is None:
raise ExtractorError('%s said: %s' % (self.IE_NAME, "The video title could not be extracted"), expected=True)
# the URL for the video file is contained in a seperate link as:
# https://borfos.com/kt_player/player.php?id=<video_id>
flashvars_webpage = self._download_webpage('https://borfos.com/kt_player/player.php?id=%s' % video_id, video_id)
flashvars_data = self._search_regex(r'(?s)flashvars\s*=\s*({.+?})', flashvars_webpage, 'flashvars_data', default=None)
if flashvars_data is None:
raise ExtractorError('%s said: %s' % (self.IE_NAME, "The flash player data could not be extracted"), expected=True)
# URL
# yes, we are going to use a regex to extract the video URL instead of using the JSON approach
#
# this is done because a bunch of extraneous fields in the flash data contain wonky characters
# that screw up the call to _parse_json() and we do not care for these fields anyway, so ...
# first try the generic url
video_url = self._search_regex(r'(?s)video_url:\s"(.+?)",\s*', flashvars_data, 'video_url', default=None)
if video_url is None:
# fallback to the HTML5 url
video_url = self._search_regex(r'(?s)video_html5_url:\s"(.+?)",\s*', flashvars_data, 'video_url', default=None)
if video_url is None:
raise ExtractorError('%s said: %s' % (self.IE_NAME, "The video URL could not be extracted"), expected=True)
return {
'id': video_id,
'display_id': display_id,
'title': video_title,
'url': video_url
}