Fixed extractor

2019-02-02 13:13:48 -04:00 · 2019-02-02 13:13:48 -04:00 · d472ea4192
commit d472ea4192
parent 7c5307f4c4
1 changed files with 19 additions and 29 deletions
--- a/youtube_dl/extractor/vporn.py
+++ b/youtube_dl/extractor/vporn.py
@ -6,8 +6,8 @@ from .common import InfoExtractor
 from ..utils import (
    ExtractorError,
    parse_duration,
+    parse_resolution,
    str_to_int,
-    urljoin,
 )


@ -64,47 +64,37 @@ class VpornIE(InfoExtractor):

        title = self._html_search_regex(
            r'videoname\s*=\s*\'([^\']+)\'', webpage, 'title').strip()
-        description = self._html_search_regex(
-            r'class="(?:descr|description_txt)">(.*?)</div>',
-            webpage, 'description', fatal=False)
-        thumbnail = urljoin('http://www.vporn.com', self._html_search_regex(
-            r'flashvars\.imageUrl\s*=\s*"([^"]+)"', webpage, 'description',
-            default=None))

-        uploader = self._html_search_regex(
-            r'(?s)Uploaded by:.*?<a href="/user/[^"]+"[^>]*>(.+?)</a>',
-            webpage, 'uploader', fatal=False)
+        description = self._search_regex(r'[^>]*class="(?:sidebar-box)"[^>]*>[\n]<p>(.*?)</p>',
+                                         webpage, 'description', fatal=False)

-        categories = re.findall(r'<a href="/cat/[^"]+"[^>]*>([^<]+)</a>', webpage)
+        thumbnail = self._search_regex(r'<video[^>]+poster="([^"])"', webpage, 'thumbnail', default=None) or self._search_regex(r'posterurl\s=\s\'([^\']+)', webpage, 'thumbnail', fatal=False)
+
+        uploader = self._search_regex(r'class="avatarname">(.*?)</span>',
+                                      webpage, 'uploader', fatal=False)
+
+        categories = re.findall(r'<a[^>]*class="tags links"[^>]*>([^<]+)</a>', webpage)

        duration = parse_duration(self._search_regex(
-            r'Runtime:\s*</span>\s*(\d+ min \d+ sec)',
+            r'class="durat-img"[^>]*>\s*(\d+ min \d+ sec)',
            webpage, 'duration', fatal=False))

        view_count = str_to_int(self._search_regex(
-            r'class="views">([\d,\.]+) [Vv]iews<',
+            r'class="view-count">[\n]([\d,\.]+) [Vv]iews[\n]<',
            webpage, 'view count', fatal=False))
+
        comment_count = str_to_int(self._html_search_regex(
            r"'Comments \(([\d,\.]+)\)'",
            webpage, 'comment count', default=None))

        formats = []
-
-        for video in re.findall(r'flashvars\.videoUrl([^=]+?)\s*=\s*"(https?://[^"]+)"', webpage):
-            video_url = video[1]
-            fmt = {
-                'url': video_url,
-                'format_id': video[0],
-            }
-            m = re.search(r'_(?P<width>\d+)x(?P<height>\d+)_(?P<vbr>\d+)k\.mp4$', video_url)
-            if m:
-                fmt.update({
-                    'width': int(m.group('width')),
-                    'height': int(m.group('height')),
-                    'vbr': int(m.group('vbr')),
-                })
-            formats.append(fmt)
-
+        for mobj in re.finditer(r'<source[^>]+src="([^"]+)"[^>]+label="([^"]+)[^>]*>', webpage):
+            f = parse_resolution(mobj.group(2))
+            f.update({
+                'url': mobj.group(1),
+                'format_id': mobj.group(2),
+            })
+            formats.append(f)
        self._sort_formats(formats)

        return {