From 97b6e3011370a851b942bca144afb7cb08a57f5d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 22 Jun 2017 00:20:45 +0700 Subject: [PATCH 1/3] [youporn] Fix title extraction (closes #13456) --- youtube_dl/extractor/youporn.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/youporn.py b/youtube_dl/extractor/youporn.py index 34ab878a4..7bc2cefc7 100644 --- a/youtube_dl/extractor/youporn.py +++ b/youtube_dl/extractor/youporn.py @@ -68,9 +68,12 @@ class YouPornIE(InfoExtractor): webpage = self._download_webpage(request, display_id) title = self._search_regex( - [r'(?:video_titles|videoTitle)\s*[:=]\s*(["\'])(?P.+?)\1', - r'<h1[^>]+class=["\']heading\d?["\'][^>]*>([^<])<'], - webpage, 'title', group='title') + [r'(?:video_titles|videoTitle|title)\s*[:=]\s*(["\'])(?P<title>(?:(?!\1).)+)\1', + r'<h1[^>]+class=["\']heading\d?["\'][^>]*>(?P<title>[^<]+)<'], + webpage, 'title', group='title', + default=None) or self._og_search_title( + webpage, default=None) or self._html_search_meta( + 'title', webpage, fatal=True) links = [] From d4893e764bbda206b6194884e67b0acaf231d0d6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 22 Jun 2017 00:40:15 +0700 Subject: [PATCH 2/3] [youporn] Improve formats extraction --- youtube_dl/extractor/youporn.py | 32 ++++++++++++++++++++++++-------- 1 file changed, 24 insertions(+), 8 deletions(-) diff --git a/youtube_dl/extractor/youporn.py b/youtube_dl/extractor/youporn.py index 7bc2cefc7..64e3c3c94 100644 --- a/youtube_dl/extractor/youporn.py +++ b/youtube_dl/extractor/youporn.py @@ -3,6 +3,7 @@ from __future__ import unicode_literals import re from .common import InfoExtractor +from ..compat import compat_str from ..utils import ( int_or_none, sanitized_Request, @@ -68,7 +69,7 @@ class YouPornIE(InfoExtractor): webpage = self._download_webpage(request, display_id) title = self._search_regex( - [r'(?:video_titles|videoTitle|title)\s*[:=]\s*(["\'])(?P<title>(?:(?!\1).)+)\1', + [r'(?:video_titles|videoTitle)\s*[:=]\s*(["\'])(?P<title>(?:(?!\1).)+)\1', r'<h1[^>]+class=["\']heading\d?["\'][^>]*>(?P<title>[^<]+)<'], webpage, 'title', group='title', default=None) or self._og_search_title( @@ -77,22 +78,37 @@ class YouPornIE(InfoExtractor): links = [] + # Main source + definitions = self._parse_json( + self._search_regex( + r'mediaDefinition\s*=\s*(\[.+?\]);', webpage, + 'media definitions', default='[]'), + video_id, fatal=False) + if definitions: + for definition in definitions: + if not isinstance(definition, dict): + continue + video_url = definition.get('videoUrl') + if isinstance(video_url, compat_str) and video_url: + links.append(video_url) + + # Fallback #1, this also contains extra low quality 180p format + for _, link in re.findall(r'<a[^>]+href=(["\'])(http.+?)\1[^>]+title=["\']Download [Vv]ideo', webpage): + links.append(link) + + # Fallback #2 (unavailable as at 22.06.2017) sources = self._search_regex( r'(?s)sources\s*:\s*({.+?})', webpage, 'sources', default=None) if sources: for _, link in re.findall(r'[^:]+\s*:\s*(["\'])(http.+?)\1', sources): links.append(link) - # Fallback #1 + # Fallback #3 (unavailable as at 22.06.2017) for _, link in re.findall( - r'(?:videoUrl|videoSrc|videoIpadUrl|html5PlayerSrc)\s*[:=]\s*(["\'])(http.+?)\1', webpage): + r'(?:videoSrc|videoIpadUrl|html5PlayerSrc)\s*[:=]\s*(["\'])(http.+?)\1', webpage): links.append(link) - # Fallback #2, this also contains extra low quality 180p format - for _, link in re.findall(r'<a[^>]+href=(["\'])(http.+?)\1[^>]+title=["\']Download [Vv]ideo', webpage): - links.append(link) - - # Fallback #3, encrypted links + # Fallback #4, encrypted links (unavailable as at 22.06.2017) for _, encrypted_link in re.findall( r'encryptedQuality\d{3,4}URL\s*=\s*(["\'])([\da-zA-Z+/=]+)\1', webpage): links.append(aes_decrypt_text(encrypted_link, title, 32).decode('utf-8')) From 18166bb8e8db6bbeb1f279e236b9808e7d197dd8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 22 Jun 2017 00:47:02 +0700 Subject: [PATCH 3/3] [youporn] Fix upload date extraction --- youtube_dl/extractor/youporn.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/youporn.py b/youtube_dl/extractor/youporn.py index 64e3c3c94..547adefeb 100644 --- a/youtube_dl/extractor/youporn.py +++ b/youtube_dl/extractor/youporn.py @@ -27,7 +27,7 @@ class YouPornIE(InfoExtractor): 'description': 'Love & Sex Answers: http://bit.ly/DanAndJenn -- Is It Unhealthy To Masturbate Daily?', 'thumbnail': r're:^https?://.*\.jpg$', 'uploader': 'Ask Dan And Jennifer', - 'upload_date': '20101221', + 'upload_date': '20101217', 'average_rating': int, 'view_count': int, 'comment_count': int, @@ -46,7 +46,7 @@ class YouPornIE(InfoExtractor): 'description': 'http://sweetlivegirls.com Big Tits Awesome Brunette On amazing webcam show.mp4', 'thumbnail': r're:^https?://.*\.jpg$', 'uploader': 'Unknown', - 'upload_date': '20111125', + 'upload_date': '20110418', 'average_rating': int, 'view_count': int, 'comment_count': int, @@ -143,7 +143,8 @@ class YouPornIE(InfoExtractor): r'(?s)<div[^>]+class=["\']submitByLink["\'][^>]*>(.+?)</div>', webpage, 'uploader', fatal=False) upload_date = unified_strdate(self._html_search_regex( - r'(?s)<div[^>]+class=["\']videoInfo(?:Date|Time)["\'][^>]*>(.+?)</div>', + [r'Date\s+[Aa]dded:\s*<span>([^<]+)', + r'(?s)<div[^>]+class=["\']videoInfo(?:Date|Time)["\'][^>]*>(.+?)</div>'], webpage, 'upload date', fatal=False)) age_limit = self._rta_search(webpage)