[porn] Addded new extractor for porn.com

2016-08-07 09:49:29 -05:00 · 2016-08-07 09:49:29 -05:00 · a1447448af
commit a1447448af
parent 6bb0fbf9fb
2 changed files with 55 additions and 0 deletions
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@ -643,6 +643,8 @@ from .pornhub import (
    PornHubPlaylistIE,
    PornHubUserVideosIE,
 )
+
+from .porn import PornIE
 from .pornotube import PornotubeIE
 from .pornovoisines import PornoVoisinesIE
 from .pornoxo import PornoXOIE
--- a/youtube_dl/extractor/porn.py
+++ b/youtube_dl/extractor/porn.py
@ -0,0 +1,53 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+import re
+
+
+class PornIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?porn\.com/videos/.+'
+    _TEST = {
+
+        'url': 'http://www.porn.com/videos/marsha-may-rides-seth-on-top-of-his-thick-cock-2658067',
+        'info_dict': {
+            'id': '2658067',
+            'ext': 'mp4',
+            'title': 'Marsha May rides Seth on top of his thick cock',
+            # TODO more properties, either as:
+                # * A value
+                # * MD5 checksum; start the string with md5:
+                # * A regular expression; start the string with re:
+                # * Any Python type (for example int or float)
+                                                                                    
+            }
+    }
+
+    def _real_extract(self, url):
+        video_id = self._search_regex(r'(?:\w-)+(\d+)', url, 'video_id')
+        webpage = self._download_webpage(url, video_id)
+
+        video_urls = re.findall('"([^"]*(?=mp4).*?)"', webpage)
+        title = self._search_regex(r'title:"([^title"].*?)"', webpage, 'video_title')
+
+        formats = []
+        for vid in video_urls:
+
+            match = re.match('.*_(\d{3})\.mp4.*', vid)
+            if match:
+                resolution = match.group(1) + 'p'
+            else:
+                resolution = ""
+
+            a_format = {
+                    'id': video_id,
+                    'url': vid,
+                    'resolution': resolution,
+                    }
+            formats.append(a_format)
+
+        return {
+            'id': video_id,
+            'title': title,
+            'formats': formats
+         }