From c3b0894baf12a2eaa7d892f307e329ee93aeaf8e Mon Sep 17 00:00:00 2001 From: Patrick Jattke Date: Wed, 8 Nov 2017 13:58:51 +0100 Subject: [PATCH 01/12] - Added template for extractor vidlii.py --- youtube_dl/extractor/extractors.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 92f7e9027..1a39f8214 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1195,6 +1195,7 @@ from .vidme import ( VidmeUserIE, VidmeUserLikesIE, ) +from .vidlii import VidliiIE from .vidzi import VidziIE from .vier import VierIE, VierVideosIE from .viewlift import ( From 7d1b38c97204c79a7b82fe931fb2e7b601ce733a Mon Sep 17 00:00:00 2001 From: Patrick Jattke Date: Wed, 8 Nov 2017 14:09:57 +0100 Subject: [PATCH 02/12] - Added template for extractor vidlii.py, last commit only added vidlii extractor into extractors.py --- youtube_dl/extractor/vidlii.py | 38 ++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) create mode 100644 youtube_dl/extractor/vidlii.py diff --git a/youtube_dl/extractor/vidlii.py b/youtube_dl/extractor/vidlii.py new file mode 100644 index 000000000..acdf0a687 --- /dev/null +++ b/youtube_dl/extractor/vidlii.py @@ -0,0 +1,38 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor + + +class VidliiIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?yourextractor\.com/watch/(?P[0-9]+)' + _TEST = { + 'url': 'https://yourextractor.com/watch/42', + 'md5': 'TODO: md5 sum of the first 10241 bytes of the video file (use --test)', + 'info_dict': { + 'id': '42', + 'ext': 'mp4', + 'title': 'Video title goes here', + 'thumbnail': r're:^https?://.*\.jpg$', + # TODO more properties, either as: + # * A value + # * MD5 checksum; start the string with md5: + # * A regular expression; start the string with re: + # * Any Python type (for example int or float) + } + } + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + # TODO more code goes here, for example ... + title = self._html_search_regex(r'

(.+?)

', webpage, 'title') + + return { + 'id': video_id, + 'title': title, + 'description': self._og_search_description(webpage), + 'uploader': self._search_regex(r']+id="uploader"[^>]*>([^<]+)<', webpage, 'uploader', fatal=False), + # TODO more properties (see youtube_dl/extractor/common.py) + } \ No newline at end of file From f4d55fbe1313c3dc9b018cd09734fad248990642 Mon Sep 17 00:00:00 2001 From: Patrick Jattke Date: Thu, 9 Nov 2017 09:38:47 +0100 Subject: [PATCH 03/12] - Implemented basic extraction for vidlii but requires improvement --- youtube_dl/extractor/vidlii.py | 55 +++++++++++++++++++++++++++++----- 1 file changed, 48 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/vidlii.py b/youtube_dl/extractor/vidlii.py index acdf0a687..c69d44a7c 100644 --- a/youtube_dl/extractor/vidlii.py +++ b/youtube_dl/extractor/vidlii.py @@ -1,11 +1,16 @@ # coding: utf-8 from __future__ import unicode_literals +import re + +from ..utils import ( + int_or_none, + get_element_by_id) from .common import InfoExtractor class VidliiIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?yourextractor\.com/watch/(?P[0-9]+)' + _VALID_URL = r'(?:https*?:\/\/)*(?:www\.)*vidlii.com\/watch\?v=(?P[^?\s]{11})' _TEST = { 'url': 'https://yourextractor.com/watch/42', 'md5': 'TODO: md5 sum of the first 10241 bytes of the video file (use --test)', @@ -23,16 +28,52 @@ class VidliiIE(InfoExtractor): } def _real_extract(self, url): + # get required video properties video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - - # TODO more code goes here, for example ... title = self._html_search_regex(r'

(.+?)

', webpage, 'title') + description = get_element_by_id('des_text', webpage).strip() + uploader = self._html_search_regex( + r']+class="wt_person"[^>]*>(?:[^<]+)Category:\s*<\/div>[\s\r]*
[\s\r]*]*>[\s]*([^<]*)', webpage) + duration = int_or_none(self._html_search_meta('video:duration', webpage, 'duration', default=False)) + view_count = int_or_none( + self._html_search_regex(r']+class="w_views"[^>]*>([^<]+?)<\/strong>', webpage, + 'view_count')) + comment_count = int_or_none(self._html_search_regex(r']+id="cmt_num"[^>]*>([^<]+?)<\/span>', webpage, + 'comment_count')) + average_rating = int_or_none( + self._html_search_regex(r'{[\s\r]*\$\("#rateYo"\).rateYo\({[^}]*rating:\s*([0-9]*?),[^}]*}', + webpage, 'average_rating')) + thumbnail_link = self._html_search_regex(r'videoInfo[\s]*=[\s]*{[^}]*img:[\s]*(?:"|\')([^"]*?)(?:"|\')', + webpage, 'thumbnail') + thumbnail = 'https://www.vidlii.com%s' % thumbnail_link + type = self._og_search_property('type', webpage, 'type') + + # use youtube-dl --print-json to show extracted metadata or debugger (watch value) return { 'id': video_id, 'title': title, - 'description': self._og_search_description(webpage), - 'uploader': self._search_regex(r']+id="uploader"[^>]*>([^<]+)<', webpage, 'uploader', fatal=False), - # TODO more properties (see youtube_dl/extractor/common.py) - } \ No newline at end of file + 'description': description, + 'uploader': uploader, + 'url': url, + 'uploader_url': uploader_url, + 'upload_date': upload_date, # should we use release_date instead? + 'categories': categories, + 'tags': tags, + 'duration': duration, + 'view_count': view_count, + 'comment_count': comment_count, + 'average_rating': average_rating, + 'thumbnail': thumbnail, + 'type': type + } From f6c8c65c145d8aa05fdebad6b717e1c4ea2df482 Mon Sep 17 00:00:00 2001 From: Patrick Jattke Date: Wed, 15 Nov 2017 22:55:56 +0100 Subject: [PATCH 04/12] - Improved extraction for Vidlii (added fallbacks, simplified RegExes) - Added first test --- youtube_dl/extractor/vidlii.py | 128 ++++++++++++++++++++++++--------- 1 file changed, 93 insertions(+), 35 deletions(-) diff --git a/youtube_dl/extractor/vidlii.py b/youtube_dl/extractor/vidlii.py index c69d44a7c..f7a213a23 100644 --- a/youtube_dl/extractor/vidlii.py +++ b/youtube_dl/extractor/vidlii.py @@ -3,23 +3,37 @@ from __future__ import unicode_literals import re +from .common import InfoExtractor from ..utils import ( int_or_none, - get_element_by_id) -from .common import InfoExtractor + get_element_by_id, str_or_none, get_element_by_class, strip_or_none, + float_or_none) class VidliiIE(InfoExtractor): _VALID_URL = r'(?:https*?:\/\/)*(?:www\.)*vidlii.com\/watch\?v=(?P[^?\s]{11})' _TEST = { - 'url': 'https://yourextractor.com/watch/42', - 'md5': 'TODO: md5 sum of the first 10241 bytes of the video file (use --test)', + 'url': 'https://www.vidlii.com/watch?v=tJluaH4BJ3v', + 'md5': '9bf7d1e005dfa909b6efb0a1ff5175e2', 'info_dict': { - 'id': '42', - 'ext': 'mp4', - 'title': 'Video title goes here', - 'thumbnail': r're:^https?://.*\.jpg$', - # TODO more properties, either as: + 'id': 'tJluaH4BJ3v', + 'title': 'Vidlii is against me', + 'description': 'I have HAD it. Vidlii does not like me. I have tried to uplaod videos and submit them to the contest and no ne of my videos show up so maybe it is broken for everyone else but this one was trying to submit it because I wanted to submit to the contest :) Tanks I hope the website is fixed PS: Jan you are cool please add my video', + 'thumbnail': 'https://www.vidlii.com/usfi/thmp/tJluaH4BJ3v.jpg', + 'uploader': 'APPle5auc31995', + 'url': 'https://cdn.vidlii.com/videos/tJluaH4BJ3v.mp4', + 'uploader_url': 'https://www.vidlii.com/user/APPle5auc31995', + 'upload_date': '20171107', + 'categories': 'News & Politics', + 'tags': ['Vidlii', 'Jan', 'Videogames'], + 'duration': 212, + # TODO this might change in future, how to handle? + 'view_count': 230, + # TODO this might change in future, how to handle? + 'comment_count': 13, + 'average_rating': 1.8571428571429, + 'type': 'video', + 'ext': 'mp4' # * A value # * MD5 checksum; start the string with md5: # * A regular expression; start the string with re: @@ -30,35 +44,79 @@ class VidliiIE(InfoExtractor): def _real_extract(self, url): # get required video properties video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) - title = self._html_search_regex(r'

(.+?)

', webpage, 'title') - description = get_element_by_id('des_text', webpage).strip() - uploader = self._html_search_regex( - r']+class="wt_person"[^>]*>(?:[^<]+)(.+?)', webpage, + 'title', default=None)) or str_or_none( + self._html_search_regex(r'([^<]+?)', webpage, + 'title', default=None)) or str_or_none( + self._html_search_meta('twitter:title', webpage, 'title', + default=False)) + description = strip_or_none( + get_element_by_id('des_text', webpage).strip()) + + uploader = str_or_none( + self._html_search_regex( + r']+class="wt_person"[^>]*>(?:[^<]+)]+?class=["\']avt2\s*["\'][^>]+?alt=["\']([^"\']+?)["\']', + webpage, 'uploader', default=None)) + + url = self._html_search_regex( + r'videoInfo[\s]*=[\s]*{[^}]*src:[\s]*(?:"|\')([^"]*?)(?:"|\')', + webpage, 'url', default=None) # get additional properties uploader_url = "https://www.vidlii.com/user/%s" % uploader - upload_date = self._html_search_meta('datePublished', webpage, 'upload_date', default=False).replace('-', '') - categories = self._html_search_regex( - r'
Category:\s*<\/div>[\s\r]*
[\s\r]*]*>[\s]*([^<]*)', webpage) - duration = int_or_none(self._html_search_meta('video:duration', webpage, 'duration', default=False)) - view_count = int_or_none( - self._html_search_regex(r']+class="w_views"[^>]*>([^<]+?)<\/strong>', webpage, - 'view_count')) - comment_count = int_or_none(self._html_search_regex(r']+id="cmt_num"[^>]*>([^<]+?)<\/span>', webpage, - 'comment_count')) - average_rating = int_or_none( - self._html_search_regex(r'{[\s\r]*\$\("#rateYo"\).rateYo\({[^}]*rating:\s*([0-9]*?),[^}]*}', - webpage, 'average_rating')) - thumbnail_link = self._html_search_regex(r'videoInfo[\s]*=[\s]*{[^}]*img:[\s]*(?:"|\')([^"]*?)(?:"|\')', - webpage, 'thumbnail') - thumbnail = 'https://www.vidlii.com%s' % thumbnail_link - type = self._og_search_property('type', webpage, 'type') - # use youtube-dl --print-json to show extracted metadata or debugger (watch value) + upload_date = str_or_none( + self._html_search_meta('datePublished', webpage, 'upload_date', + default=False).replace("-", + "")) or str_or_none( + self._html_search_regex(r'(.+?)', webpage, + 'upload_date', default="").replace("-", + "")) + categories = self._html_search_regex( + r'
Category:\s*<\/div>[\s\r]*
[\s\r]*]*>[\s]*([^<]*)', + webpage) or None + duration = int_or_none( + self._html_search_meta('video:duration', webpage, 'duration', + default=False)) or int_or_none( + self._html_search_regex( + r'videoInfo[^=]*=[^{]*{[^}]*dur:([^,}]*?),', webpage, + 'duration', default=None)) + view_count_fallback = re.findall(r'([^<]*?)', + get_element_by_class("w_views", + webpage)) + view_count_fallback = view_count_fallback[ + 0] if view_count_fallback else None + view_count = int_or_none(self._html_search_regex( + r'Views:[^<]*([^<]*?)<\/strong>', webpage, + 'view_count', default=None)) or int_or_none( + view_count_fallback) + + comment_count = int_or_none(self._html_search_regex( + r'Comments:[^<]*([^<]*?)<\/strong>', webpage, + 'comment_count', default=None)) or int_or_none( + self._html_search_regex( + r']+id="cmt_num"[^>]*>([^<]+?)<\/span>', webpage, + 'comment_count', default=None)) + average_rating = float_or_none( + self._html_search_regex( + r'{[\s\r]*\$\("#rateYo"\).rateYo\({[^}]*rating:\s*([^,]*?),[^}.]*}', + webpage, 'average_rating', default=None)) + thumbnail_link = self._html_search_regex( + r'videoInfo[\s]*=[\s]*{[^}]*img:[\s]*(?:"|\')([^"]*?)(?:"|\')', + webpage, 'thumbnail', default=None) + thumbnail = 'https://www.vidlii.com%s' % thumbnail_link + video_type = self._og_search_property('type', webpage, 'type') return { 'id': video_id, @@ -67,7 +125,7 @@ class VidliiIE(InfoExtractor): 'uploader': uploader, 'url': url, 'uploader_url': uploader_url, - 'upload_date': upload_date, # should we use release_date instead? + 'upload_date': upload_date, 'categories': categories, 'tags': tags, 'duration': duration, @@ -75,5 +133,5 @@ class VidliiIE(InfoExtractor): 'comment_count': comment_count, 'average_rating': average_rating, 'thumbnail': thumbnail, - 'type': type + 'type': video_type } From 7a07ca1b5a013a6d8a32c6e311d7647d54c7d2a9 Mon Sep 17 00:00:00 2001 From: Manuel Date: Fri, 17 Nov 2017 10:00:17 +0100 Subject: [PATCH 05/12] Test 2 und 3 hinzugefuegt --- youtube_dl/extractor/vidlii.py | 42 +++++++++++++++++++++++++++++++--- 1 file changed, 39 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/vidlii.py b/youtube_dl/extractor/vidlii.py index f7a213a23..0cce7b2ce 100644 --- a/youtube_dl/extractor/vidlii.py +++ b/youtube_dl/extractor/vidlii.py @@ -12,7 +12,7 @@ from ..utils import ( class VidliiIE(InfoExtractor): _VALID_URL = r'(?:https*?:\/\/)*(?:www\.)*vidlii.com\/watch\?v=(?P[^?\s]{11})' - _TEST = { + _TESTS = [{ 'url': 'https://www.vidlii.com/watch?v=tJluaH4BJ3v', 'md5': '9bf7d1e005dfa909b6efb0a1ff5175e2', 'info_dict': { @@ -28,7 +28,7 @@ class VidliiIE(InfoExtractor): 'tags': ['Vidlii', 'Jan', 'Videogames'], 'duration': 212, # TODO this might change in future, how to handle? - 'view_count': 230, + 'view_count': 233, # TODO this might change in future, how to handle? 'comment_count': 13, 'average_rating': 1.8571428571429, @@ -39,7 +39,43 @@ class VidliiIE(InfoExtractor): # * A regular expression; start the string with re: # * Any Python type (for example int or float) } - } + }, { + 'url': 'https://www.vidlii.com/watch?v=vBo2IcrwOkO', + 'md5': 'b42640a596b4dc986702567d49268963', + 'info_dict': { + 'id': 'vBo2IcrwOkO', + 'ext': 'mp4', + 'title': '(OLD VIDEO) i like youtube!!', + 'thumbnail': 'https://www.vidlii.com/usfi/thmp/vBo2IcrwOkO.jpg', + 'upload_date': '20171011', + 'description':'Original upload date:
\nMarch 10th 2011
\nCredit goes to people who own content in the video', + 'uploader': 'MyEditedVideoSpartan' + # TODO more properties, either as: + # * A value + # * MD5 checksum; start the string with md5: + # * A regular expression; start the string with re: + # * Any Python type (for example int or float) + + } + + }, { + 'url': 'https://www.vidlii.com/watch?v=E8SeUE3J5EV', + 'md5': 'f202427f9b31171f0fdd0ddeacb24720', + 'info_dict': { + 'id': 'E8SeUE3J5EV', + 'ext': 'mp4', + 'title': 'Games make you violent', + 'thumbnail': 'https://www.vidlii.com/usfi/thmp/E8SeUE3J5EV.jpg', + 'upload_date': '20171116', + 'description':'Games are made by the communistic feminist fbi cia jews and they control your mind and make you want to kill', + 'uploader': 'APPle5auc31995' + # TODO more properties, either as: + # * A value + # * MD5 checksum; start the string with md5: + # * A regular expression; start the string with re: + # * Any Python type (for example int or float) + } + }] def _real_extract(self, url): # get required video properties From b7ae646eed3a64745b0f547a37b41bf53181a292 Mon Sep 17 00:00:00 2001 From: Patrick Jattke Date: Fri, 17 Nov 2017 11:07:24 +0100 Subject: [PATCH 06/12] - Added tests for extractor vidlii - Tested fallbacks by use of assertions --- youtube_dl/extractor/vidlii.py | 116 ++++++++++++++++----------------- 1 file changed, 56 insertions(+), 60 deletions(-) diff --git a/youtube_dl/extractor/vidlii.py b/youtube_dl/extractor/vidlii.py index 0cce7b2ce..c2fe392c2 100644 --- a/youtube_dl/extractor/vidlii.py +++ b/youtube_dl/extractor/vidlii.py @@ -27,53 +27,32 @@ class VidliiIE(InfoExtractor): 'categories': 'News & Politics', 'tags': ['Vidlii', 'Jan', 'Videogames'], 'duration': 212, - # TODO this might change in future, how to handle? - 'view_count': 233, - # TODO this might change in future, how to handle? - 'comment_count': 13, - 'average_rating': 1.8571428571429, + 'view_count': int, + 'comment_count': int, + 'average_rating': float, 'type': 'video', 'ext': 'mp4' - # * A value - # * MD5 checksum; start the string with md5: - # * A regular expression; start the string with re: - # * Any Python type (for example int or float) } - }, { + }, { 'url': 'https://www.vidlii.com/watch?v=vBo2IcrwOkO', 'md5': 'b42640a596b4dc986702567d49268963', 'info_dict': { 'id': 'vBo2IcrwOkO', - 'ext': 'mp4', 'title': '(OLD VIDEO) i like youtube!!', + 'description': 'Original upload date:
\nMarch 10th 2011
\nCredit goes to people who own content in the video', 'thumbnail': 'https://www.vidlii.com/usfi/thmp/vBo2IcrwOkO.jpg', + 'uploader': 'MyEditedVideoSpartan', + 'url': 'https://cdn.vidlii.com/videos/vBo2IcrwOkO.mp4', + 'uploader_url': 'https://www.vidlii.com/user/MyEditedVideoSpartan', 'upload_date': '20171011', - 'description':'Original upload date:
\nMarch 10th 2011
\nCredit goes to people who own content in the video', - 'uploader': 'MyEditedVideoSpartan' - # TODO more properties, either as: - # * A value - # * MD5 checksum; start the string with md5: - # * A regular expression; start the string with re: - # * Any Python type (for example int or float) - - } - - }, { - 'url': 'https://www.vidlii.com/watch?v=E8SeUE3J5EV', - 'md5': 'f202427f9b31171f0fdd0ddeacb24720', - 'info_dict': { - 'id': 'E8SeUE3J5EV', - 'ext': 'mp4', - 'title': 'Games make you violent', - 'thumbnail': 'https://www.vidlii.com/usfi/thmp/E8SeUE3J5EV.jpg', - 'upload_date': '20171116', - 'description':'Games are made by the communistic feminist fbi cia jews and they control your mind and make you want to kill', - 'uploader': 'APPle5auc31995' - # TODO more properties, either as: - # * A value - # * MD5 checksum; start the string with md5: - # * A regular expression; start the string with re: - # * Any Python type (for example int or float) + 'categories': 'Film & Animation', + 'tags': None, + 'duration': 34, + 'view_count': int, + 'comment_count': int, + 'average_rating': float, + 'type': 'video', + 'ext': 'mp4' } }] @@ -83,24 +62,33 @@ class VidliiIE(InfoExtractor): webpage = self._download_webpage(url, video_id) - title = str_or_none( + title_1 = str_or_none( self._html_search_regex(r'

(.+?)

', webpage, - 'title', default=None)) or str_or_none( + 'title', default=None)) + title_2 = str_or_none( self._html_search_regex(r'([^<]+?)', webpage, - 'title', default=None)) or str_or_none( + 'title', default=None)).replace( + " - VidLii", "") + title_3 = str_or_none( self._html_search_meta('twitter:title', webpage, 'title', - default=False)) + default=False)).replace(" - VidLii", "") + # assert title_1 == title_2 == title_3, "TITLE fallback is not working" + title = title_1 or title_2 or title_3 + description = strip_or_none( get_element_by_id('des_text', webpage).strip()) - uploader = str_or_none( + uploader_1 = str_or_none( self._html_search_regex( r']+class="wt_person"[^>]*>(?:[^<]+)]+?class=["\']avt2\s*["\'][^>]+?alt=["\']([^"\']+?)["\']', webpage, 'uploader', default=None)) + # assert uploader_1 == uploader_2, "UPLOADER fallback is not working" + uploader = uploader_1 or uploader_2 url = self._html_search_regex( r'videoInfo[\s]*=[\s]*{[^}]*src:[\s]*(?:"|\')([^"]*?)(?:"|\')', @@ -109,41 +97,49 @@ class VidliiIE(InfoExtractor): # get additional properties uploader_url = "https://www.vidlii.com/user/%s" % uploader + # returns date as YYYYMMDD upload_date = str_or_none( self._html_search_meta('datePublished', webpage, 'upload_date', default=False).replace("-", - "")) or str_or_none( - self._html_search_regex(r'(.+?)', webpage, - 'upload_date', default="").replace("-", - "")) + "")) + categories = self._html_search_regex( r'
Category:\s*<\/div>[\s\r]*
[\s\r]*]*>[\s]*([^<]*)', webpage) or None - duration = int_or_none( + duration_1 = int_or_none( self._html_search_meta('video:duration', webpage, 'duration', - default=False)) or int_or_none( + default=False)) + duration_2 = int_or_none( self._html_search_regex( r'videoInfo[^=]*=[^{]*{[^}]*dur:([^,}]*?),', webpage, 'duration', default=None)) - view_count_fallback = re.findall(r'([^<]*?)', - get_element_by_class("w_views", - webpage)) - view_count_fallback = view_count_fallback[ - 0] if view_count_fallback else None - view_count = int_or_none(self._html_search_regex( - r'Views:[^<]*([^<]*?)<\/strong>', webpage, - 'view_count', default=None)) or int_or_none( - view_count_fallback) + # assert duration_1 == duration_2, "DURATION fallback is not working" + duration = duration_1 or duration_2 - comment_count = int_or_none(self._html_search_regex( + view_count_1 = int_or_none(self._html_search_regex( + r'Views:[^<]*([^<]*?)<\/strong>', webpage, + 'view_count', default=None)) + view_count_2 = re.findall(r'([^<]*?)', + get_element_by_class("w_views", + webpage)) + view_count_2 = int_or_none(view_count_2[ + 0]) if view_count_2 else None + # assert view_count_1 == view_count_2, "VIEW COUNT fallback is not working" + view_count = view_count_1 or view_count_2 + + comment_count_1 = int_or_none(self._html_search_regex( r'Comments:[^<]*([^<]*?)<\/strong>', webpage, - 'comment_count', default=None)) or int_or_none( + 'comment_count', default=None)) + comment_count_2 = int_or_none( self._html_search_regex( r']+id="cmt_num"[^>]*>([^<]+?)<\/span>', webpage, 'comment_count', default=None)) + # assert comment_count_1 == comment_count_2, "COMMENT COUNT fallback is not working" + comment_count = comment_count_1 or comment_count_2 + average_rating = float_or_none( self._html_search_regex( r'{[\s\r]*\$\("#rateYo"\).rateYo\({[^}]*rating:\s*([^,]*?),[^}.]*}', From a334c0cbd02c487bd9d2e9b1b9ea4ee7ff7f73ea Mon Sep 17 00:00:00 2001 From: Patrick Jattke Date: Fri, 17 Nov 2017 11:10:58 +0100 Subject: [PATCH 07/12] [vidlii] Add new extractor --- youtube_dl/extractor/vidlii.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/youtube_dl/extractor/vidlii.py b/youtube_dl/extractor/vidlii.py index c2fe392c2..cfe168da7 100644 --- a/youtube_dl/extractor/vidlii.py +++ b/youtube_dl/extractor/vidlii.py @@ -57,9 +57,7 @@ class VidliiIE(InfoExtractor): }] def _real_extract(self, url): - # get required video properties video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) title_1 = str_or_none( From 68aff22f474999fc2a7877d26c9dd0a0c285b38e Mon Sep 17 00:00:00 2001 From: Patrick Jattke Date: Fri, 17 Nov 2017 11:10:58 +0100 Subject: [PATCH 08/12] [vidlii] Add new extractor, see #14472 --- youtube_dl/extractor/vidlii.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/vidlii.py b/youtube_dl/extractor/vidlii.py index c2fe392c2..7079cdcd8 100644 --- a/youtube_dl/extractor/vidlii.py +++ b/youtube_dl/extractor/vidlii.py @@ -57,11 +57,10 @@ class VidliiIE(InfoExtractor): }] def _real_extract(self, url): - # get required video properties video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) + # extract basic properties of video title_1 = str_or_none( self._html_search_regex(r'

(.+?)

', webpage, 'title', default=None)) @@ -125,8 +124,7 @@ class VidliiIE(InfoExtractor): view_count_2 = re.findall(r'([^<]*?)', get_element_by_class("w_views", webpage)) - view_count_2 = int_or_none(view_count_2[ - 0]) if view_count_2 else None + view_count_2 = int_or_none(view_count_2[0]) if view_count_2 else None # assert view_count_1 == view_count_2, "VIEW COUNT fallback is not working" view_count = view_count_1 or view_count_2 From 4a79e5ce791ceb33c0eed500964fa104d3dadc3f Mon Sep 17 00:00:00 2001 From: Patrick Jattke Date: Fri, 17 Nov 2017 11:25:31 +0100 Subject: [PATCH 09/12] [vidlii] Add new extractor. See issue #14472 --- youtube_dl/extractor/vidlii.py | 45 ++++++++++++---------------------- 1 file changed, 15 insertions(+), 30 deletions(-) diff --git a/youtube_dl/extractor/vidlii.py b/youtube_dl/extractor/vidlii.py index 7079cdcd8..b71160396 100644 --- a/youtube_dl/extractor/vidlii.py +++ b/youtube_dl/extractor/vidlii.py @@ -61,33 +61,26 @@ class VidliiIE(InfoExtractor): webpage = self._download_webpage(url, video_id) # extract basic properties of video - title_1 = str_or_none( + title = str_or_none( self._html_search_regex(r'

(.+?)

', webpage, - 'title', default=None)) - title_2 = str_or_none( + 'title', default=None)) or str_or_none( self._html_search_regex(r'([^<]+?)', webpage, 'title', default=None)).replace( - " - VidLii", "") - title_3 = str_or_none( + " - VidLii", "") or str_or_none( self._html_search_meta('twitter:title', webpage, 'title', default=False)).replace(" - VidLii", "") - # assert title_1 == title_2 == title_3, "TITLE fallback is not working" - title = title_1 or title_2 or title_3 description = strip_or_none( get_element_by_id('des_text', webpage).strip()) - uploader_1 = str_or_none( + uploader = str_or_none( self._html_search_regex( r']+class="wt_person"[^>]*>(?:[^<]+)]+?class=["\']avt2\s*["\'][^>]+?alt=["\']([^"\']+?)["\']', webpage, 'uploader', default=None)) - # assert uploader_1 == uploader_2, "UPLOADER fallback is not working" - uploader = uploader_1 or uploader_2 url = self._html_search_regex( r'videoInfo[\s]*=[\s]*{[^}]*src:[\s]*(?:"|\')([^"]*?)(?:"|\')', @@ -108,35 +101,27 @@ class VidliiIE(InfoExtractor): 'categories', default=None) tags = re.findall(r'([^<]*?)
', + get_element_by_class("w_views", + webpage)) + view_count_fb = view_count_fb[0] if view_count_fb else None + view_count = int_or_none(self._html_search_regex( r'Views:[^<]*([^<]*?)<\/strong>', webpage, - 'view_count', default=None)) - view_count_2 = re.findall(r'([^<]*?)', - get_element_by_class("w_views", - webpage)) - view_count_2 = int_or_none(view_count_2[0]) if view_count_2 else None - # assert view_count_1 == view_count_2, "VIEW COUNT fallback is not working" - view_count = view_count_1 or view_count_2 + 'view_count', default=None)) or int_or_none(view_count_fb) - comment_count_1 = int_or_none(self._html_search_regex( + comment_count = int_or_none(self._html_search_regex( r'Comments:[^<]*([^<]*?)<\/strong>', webpage, - 'comment_count', default=None)) - comment_count_2 = int_or_none( + 'comment_count', default=None)) or int_or_none( self._html_search_regex( r']+id="cmt_num"[^>]*>([^<]+?)<\/span>', webpage, 'comment_count', default=None)) - # assert comment_count_1 == comment_count_2, "COMMENT COUNT fallback is not working" - comment_count = comment_count_1 or comment_count_2 average_rating = float_or_none( self._html_search_regex( From 0d3ea70cf3b8c12f39714273aee0755fef1b0328 Mon Sep 17 00:00:00 2001 From: Patrick Jattke Date: Wed, 22 Nov 2017 13:44:08 +0100 Subject: [PATCH 10/12] [vidlii] Fixed suggestions for new extractor (see pull request for issue #14779) --- youtube_dl/extractor/vidlii.py | 95 +++++++++++++--------------------- 1 file changed, 36 insertions(+), 59 deletions(-) diff --git a/youtube_dl/extractor/vidlii.py b/youtube_dl/extractor/vidlii.py index b71160396..6be0fb374 100644 --- a/youtube_dl/extractor/vidlii.py +++ b/youtube_dl/extractor/vidlii.py @@ -6,8 +6,8 @@ import re from .common import InfoExtractor from ..utils import ( int_or_none, - get_element_by_id, str_or_none, get_element_by_class, strip_or_none, - float_or_none) + get_element_by_id, get_element_by_class, strip_or_none, + float_or_none, urljoin) class VidliiIE(InfoExtractor): @@ -18,7 +18,10 @@ class VidliiIE(InfoExtractor): 'info_dict': { 'id': 'tJluaH4BJ3v', 'title': 'Vidlii is against me', - 'description': 'I have HAD it. Vidlii does not like me. I have tried to uplaod videos and submit them to the contest and no ne of my videos show up so maybe it is broken for everyone else but this one was trying to submit it because I wanted to submit to the contest :) Tanks I hope the website is fixed PS: Jan you are cool please add my video', + 'description': 'I have HAD it. Vidlii does not like me. I have tried to uplaod videos and submit them to the ' + 'contest and no ne of my videos show up so maybe it is broken for everyone else but this one was ' + 'trying to submit it because I wanted to submit to the contest :) Tanks I hope the website is ' + 'fixed PS: Jan you are cool please add my video', 'thumbnail': 'https://www.vidlii.com/usfi/thmp/tJluaH4BJ3v.jpg', 'uploader': 'APPle5auc31995', 'url': 'https://cdn.vidlii.com/videos/tJluaH4BJ3v.mp4', @@ -61,76 +64,50 @@ class VidliiIE(InfoExtractor): webpage = self._download_webpage(url, video_id) # extract basic properties of video - title = str_or_none( - self._html_search_regex(r'

(.+?)

', webpage, - 'title', default=None)) or str_or_none( - self._html_search_regex(r'([^<]+?)', webpage, - 'title', default=None)).replace( - " - VidLii", "") or str_or_none( - self._html_search_meta('twitter:title', webpage, 'title', - default=False)).replace(" - VidLii", "") + title = self._html_search_regex(r'

(.+?)

', webpage, 'title', default=None) or self._html_search_regex( + r'([^<]+?)', webpage, 'title', default="").replace( + " - VidLii", "") or self._html_search_meta('twitter:title', webpage, 'title', + default="").replace(" - VidLii", "") - description = strip_or_none( - get_element_by_id('des_text', webpage).strip()) + description = strip_or_none(get_element_by_id('des_text', webpage)) - uploader = str_or_none( - self._html_search_regex( - r']+class="wt_person"[^>]*>(?:[^<]+)
]+?class=["\']avt2\s*["\'][^>]+?alt=["\']([^"\']+?)["\']', - webpage, 'uploader', default=None)) + uploader = self._html_search_regex(r']+class="wt_person"[^>]*>(?:[^<]+)]+?class=["\']avt2\s*["\'][^>]+?alt=["\']([^"\']+?)["\']', + webpage, 'uploader', default=None) - url = self._html_search_regex( - r'videoInfo[\s]*=[\s]*{[^}]*src:[\s]*(?:"|\')([^"]*?)(?:"|\')', - webpage, 'url', default=None) + url = self._html_search_regex(r'videoInfo[\s]*=[\s]*{[^}]*src:[\s]*(?:"|\')([^"]*?)(?:"|\')', webpage, 'url', + default=None) # get additional properties - uploader_url = "https://www.vidlii.com/user/%s" % uploader + uploader_url = urljoin("https://www.vidlii.com/user/", uploader) # returns date as YYYYMMDD - upload_date = str_or_none( - self._html_search_meta('datePublished', webpage, 'upload_date', - default=False).replace("-", - "")) + upload_date = self._html_search_meta('datePublished', webpage, 'upload_date', default="").replace("-", "") categories = self._html_search_regex( - r'
Category:\s*<\/div>[\s\r]*
[\s\r]*]*>[\s]*([^<]*)', - webpage) or None - duration = int_or_none( - self._html_search_meta('video:duration', webpage, 'duration', - default=False)) or int_or_none( - self._html_search_regex( - r'videoInfo[^=]*=[^{]*{[^}]*dur:([^,}]*?),', webpage, - 'duration', default=None)) + r'
Category:\s*<\/div>[\s\r]*
[\s\r]*]*>[\s]*([^<]*)', webpage) or None + duration = int_or_none(self._html_search_meta('video:duration', webpage, 'duration', default=False)) or int_or_none( + self._html_search_regex(r'videoInfo[^=]*=[^{]*{[^}]*dur:([^,}]*?),', webpage, 'duration', default=None)) - view_count_fb = re.findall(r'([^<]*?)', - get_element_by_class("w_views", - webpage)) + view_count_fb = re.findall(r'([^<]*?)', get_element_by_class("w_views", webpage)) view_count_fb = view_count_fb[0] if view_count_fb else None - view_count = int_or_none(self._html_search_regex( - r'Views:[^<]*([^<]*?)<\/strong>', webpage, - 'view_count', default=None)) or int_or_none(view_count_fb) + view_count = int_or_none(self._html_search_regex(r'Views:[^<]*([^<]*?)<\/strong>', webpage, 'view_count', + default=None)) or int_or_none(view_count_fb) - comment_count = int_or_none(self._html_search_regex( - r'Comments:[^<]*([^<]*?)<\/strong>', webpage, - 'comment_count', default=None)) or int_or_none( - self._html_search_regex( - r']+id="cmt_num"[^>]*>([^<]+?)<\/span>', webpage, - 'comment_count', default=None)) + comment_count = int_or_none( + self._html_search_regex(r'Comments:[^<]*([^<]*?)<\/strong>', webpage, 'comment_count', + default=None) or + self._html_search_regex(r']+id="cmt_num"[^>]*>([^<]+?)<\/span>', webpage, 'comment_count', default=None)) average_rating = float_or_none( - self._html_search_regex( - r'{[\s\r]*\$\("#rateYo"\).rateYo\({[^}]*rating:\s*([^,]*?),[^}.]*}', - webpage, 'average_rating', default=None)) - thumbnail_link = self._html_search_regex( - r'videoInfo[\s]*=[\s]*{[^}]*img:[\s]*(?:"|\')([^"]*?)(?:"|\')', - webpage, 'thumbnail', default=None) - thumbnail = 'https://www.vidlii.com%s' % thumbnail_link + self._html_search_regex(r'{[\s\r]*\$\("#rateYo"\).rateYo\({[^}]*rating:\s*([^,]*?),[^}.]*}', + webpage, 'average_rating', default=None)) + thumbnail_link = self._html_search_regex(r'videoInfo[\s]*=[\s]*{[^}]*img:[\s]*(?:"|\')([^"]*?)(?:"|\')', webpage, + 'thumbnail', default=None) + thumbnail = urljoin("https://www.vidlii.com/", thumbnail_link) video_type = self._og_search_property('type', webpage, 'type') return { From 5463f0abd41c7740fdbd2d21034032b30ba6d482 Mon Sep 17 00:00:00 2001 From: Patrick Jattke Date: Wed, 22 Nov 2017 20:35:02 +0100 Subject: [PATCH 11/12] [vidlii] Fixed suggestions for new extractor (see pull request for issue #14779) - improved regular expressions - implemented use of helper functions js_to_json, unified_strdate --- youtube_dl/extractor/vidlii.py | 59 +++++++++++++++++++--------------- 1 file changed, 33 insertions(+), 26 deletions(-) diff --git a/youtube_dl/extractor/vidlii.py b/youtube_dl/extractor/vidlii.py index 6be0fb374..e7638d562 100644 --- a/youtube_dl/extractor/vidlii.py +++ b/youtube_dl/extractor/vidlii.py @@ -7,7 +7,7 @@ from .common import InfoExtractor from ..utils import ( int_or_none, get_element_by_id, get_element_by_class, strip_or_none, - float_or_none, urljoin) + float_or_none, urljoin, js_to_json, unified_strdate) class VidliiIE(InfoExtractor): @@ -63,51 +63,58 @@ class VidliiIE(InfoExtractor): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) + # extract videoInfo variable for further use + videoInfo_dict = self._parse_json(js_to_json(self._html_search_regex(r'var videoInfo\s*=\s*({[^}]*})', webpage, + 'videoInfo', fatal=False)), video_id) + # extract basic properties of video - title = self._html_search_regex(r'

(.+?)

', webpage, 'title', default=None) or self._html_search_regex( - r'([^<]+?)', webpage, 'title', default="").replace( - " - VidLii", "") or self._html_search_meta('twitter:title', webpage, 'title', - default="").replace(" - VidLii", "") + title = (self._html_search_regex(r'([^<]+?)', webpage, 'title', default='') or + self._html_search_meta('twitter:title', webpage, 'title', default='')).replace(" - VidLii", + "") or self._html_search_regex( + r'

(.+?)

', webpage, 'title', default=None) description = strip_or_none(get_element_by_id('des_text', webpage)) - uploader = self._html_search_regex(r']+class="wt_person"[^>]*>(?:[^<]+)]+?class=["\']avt2\s*["\'][^>]+?alt=["\']([^"\']+?)["\']', - webpage, 'uploader', default=None) + uploader = self._html_search_regex( + r']+class="wt_person"[^>]*>(?:[^<]+)]+?class=(' + r'?:"avt2\s*"|\'avt2\s*\')[^>]+?alt=(?:"([^"]+?)"|\'([^\']+?)\')>', webpage, 'uploader', default=None, + fatal=False) - url = self._html_search_regex(r'videoInfo[\s]*=[\s]*{[^}]*src:[\s]*(?:"|\')([^"]*?)(?:"|\')', webpage, 'url', - default=None) + video_url = videoInfo_dict.get("src") # get additional properties - uploader_url = urljoin("https://www.vidlii.com/user/", uploader) + uploader_url = urljoin('https://www.vidlii.com/user/', uploader) # returns date as YYYYMMDD - upload_date = self._html_search_meta('datePublished', webpage, 'upload_date', default="").replace("-", "") + upload_date = unified_strdate( + self._html_search_meta('datePublished', webpage, 'upload_date', default=None, + fatal=False) or self._html_search_regex(r'(['r'^<]*?)', webpage, + 'upload_date', + default=None, fatal=False)) categories = self._html_search_regex( r'
Category:\s*<\/div>[\s\r]*
[\s\r]*]*>[\s]*([^<]*)', webpage) or None - duration = int_or_none(self._html_search_meta('video:duration', webpage, 'duration', default=False)) or int_or_none( - self._html_search_regex(r'videoInfo[^=]*=[^{]*{[^}]*dur:([^,}]*?),', webpage, 'duration', default=None)) + default=None, fatal=False) + tags = re.findall(r'([^<]*?)', get_element_by_class("w_views", webpage)) view_count_fb = view_count_fb[0] if view_count_fb else None view_count = int_or_none(self._html_search_regex(r'Views:[^<]*([^<]*?)<\/strong>', webpage, 'view_count', - default=None)) or int_or_none(view_count_fb) + default=None, fatal=False)) or int_or_none(view_count_fb) comment_count = int_or_none( - self._html_search_regex(r'Comments:[^<]*([^<]*?)<\/strong>', webpage, 'comment_count', - default=None) or - self._html_search_regex(r']+id="cmt_num"[^>]*>([^<]+?)<\/span>', webpage, 'comment_count', default=None)) + self._html_search_regex(r'Comments:[^<]*([^<]*?)<\/strong>|]+id="cmt_num"[^>]*>([' + r'^<]+?)<\/span>', webpage, 'comment_count', + default=None, fatal=False)) average_rating = float_or_none( self._html_search_regex(r'{[\s\r]*\$\("#rateYo"\).rateYo\({[^}]*rating:\s*([^,]*?),[^}.]*}', - webpage, 'average_rating', default=None)) - thumbnail_link = self._html_search_regex(r'videoInfo[\s]*=[\s]*{[^}]*img:[\s]*(?:"|\')([^"]*?)(?:"|\')', webpage, - 'thumbnail', default=None) - thumbnail = urljoin("https://www.vidlii.com/", thumbnail_link) + webpage, 'average_rating', default=None, fatal=False)) + thumbnail_link = videoInfo_dict.get("img") + thumbnail = urljoin('https://www.vidlii.com/', thumbnail_link) video_type = self._og_search_property('type', webpage, 'type') return { @@ -115,7 +122,7 @@ class VidliiIE(InfoExtractor): 'title': title, 'description': description, 'uploader': uploader, - 'url': url, + 'url': video_url, 'uploader_url': uploader_url, 'upload_date': upload_date, 'categories': categories, From d362c2b98e712d84345209e251c5706ab65bdf5e Mon Sep 17 00:00:00 2001 From: Patrick Jattke Date: Thu, 28 Dec 2017 14:03:14 +0100 Subject: [PATCH 12/12] [vidlii] Fixed suggestions for new extractor (see pull request for issue #14779) - improved regular expressions - improved tests - improved code style --- youtube_dl/extractor/vidlii.py | 57 ++++++++++++++-------------------- 1 file changed, 24 insertions(+), 33 deletions(-) diff --git a/youtube_dl/extractor/vidlii.py b/youtube_dl/extractor/vidlii.py index e7638d562..08be2d032 100644 --- a/youtube_dl/extractor/vidlii.py +++ b/youtube_dl/extractor/vidlii.py @@ -11,20 +11,16 @@ from ..utils import ( class VidliiIE(InfoExtractor): - _VALID_URL = r'(?:https*?:\/\/)*(?:www\.)*vidlii.com\/watch\?v=(?P[^?\s]{11})' + _VALID_URL = r'https?://(?:www\.)?vidlii.com/watch\?v=(?P.{11})' _TESTS = [{ 'url': 'https://www.vidlii.com/watch?v=tJluaH4BJ3v', 'md5': '9bf7d1e005dfa909b6efb0a1ff5175e2', 'info_dict': { 'id': 'tJluaH4BJ3v', 'title': 'Vidlii is against me', - 'description': 'I have HAD it. Vidlii does not like me. I have tried to uplaod videos and submit them to the ' - 'contest and no ne of my videos show up so maybe it is broken for everyone else but this one was ' - 'trying to submit it because I wanted to submit to the contest :) Tanks I hope the website is ' - 'fixed PS: Jan you are cool please add my video', - 'thumbnail': 'https://www.vidlii.com/usfi/thmp/tJluaH4BJ3v.jpg', + 'description': 'md5:de24ab8a9a310976d66bebb824aa2420', + 'thumbnail': 're:https://.*.jpg', 'uploader': 'APPle5auc31995', - 'url': 'https://cdn.vidlii.com/videos/tJluaH4BJ3v.mp4', 'uploader_url': 'https://www.vidlii.com/user/APPle5auc31995', 'upload_date': '20171107', 'categories': 'News & Politics', @@ -33,7 +29,6 @@ class VidliiIE(InfoExtractor): 'view_count': int, 'comment_count': int, 'average_rating': float, - 'type': 'video', 'ext': 'mp4' } }, { @@ -43,9 +38,8 @@ class VidliiIE(InfoExtractor): 'id': 'vBo2IcrwOkO', 'title': '(OLD VIDEO) i like youtube!!', 'description': 'Original upload date:
\nMarch 10th 2011
\nCredit goes to people who own content in the video', - 'thumbnail': 'https://www.vidlii.com/usfi/thmp/vBo2IcrwOkO.jpg', + 'thumbnail': 're:https://.*.jpg', 'uploader': 'MyEditedVideoSpartan', - 'url': 'https://cdn.vidlii.com/videos/vBo2IcrwOkO.mp4', 'uploader_url': 'https://www.vidlii.com/user/MyEditedVideoSpartan', 'upload_date': '20171011', 'categories': 'Film & Animation', @@ -54,7 +48,6 @@ class VidliiIE(InfoExtractor): 'view_count': int, 'comment_count': int, 'average_rating': float, - 'type': 'video', 'ext': 'mp4' } }] @@ -64,23 +57,23 @@ class VidliiIE(InfoExtractor): webpage = self._download_webpage(url, video_id) # extract videoInfo variable for further use - videoInfo_dict = self._parse_json(js_to_json(self._html_search_regex(r'var videoInfo\s*=\s*({[^}]*})', webpage, - 'videoInfo', fatal=False)), video_id) + videoInfo_dict = self._parse_json(js_to_json(self._html_search_regex(r'var\s*videoInfo\s*=\s*({[^}]*})', webpage, + 'videoInfo', fatal=True)), video_id) # extract basic properties of video - title = (self._html_search_regex(r'([^<]+?)', webpage, 'title', default='') or - self._html_search_meta('twitter:title', webpage, 'title', default='')).replace(" - VidLii", - "") or self._html_search_regex( - r'

(.+?)

', webpage, 'title', default=None) + title = (self._html_search_regex(r'([^<]+?)', webpage, 'title', default='', fatal=True) or + self._html_search_meta('twitter:title', webpage, 'title', default='', fatal=True)).replace(' - VidLii', '') \ + or self._html_search_regex(r'

(.+?)

', webpage, 'title', default=None, fatal=True) description = strip_or_none(get_element_by_id('des_text', webpage)) - uploader = self._html_search_regex( - r']+class="wt_person"[^>]*>(?:[^<]+)
]+?class=(' - r'?:"avt2\s*"|\'avt2\s*\')[^>]+?alt=(?:"([^"]+?)"|\'([^\']+?)\')>', webpage, 'uploader', default=None, - fatal=False) + uploader_patterns = [r']+class="wt_person"[^>]*>(?:[^<]+)]+?class=(?:"avt2\s*"', + r'\'avt2\s*\')[^>]+?alt=(?:"([^"]+?)"', + r'\'([^\']+?)\')>'] + uploader = self._html_search_regex(uploader_patterns, webpage, 'uploader', fatal=False) - video_url = videoInfo_dict.get("src") + video_url = videoInfo_dict.get('src') # get additional properties uploader_url = urljoin('https://www.vidlii.com/user/', uploader) @@ -93,29 +86,28 @@ class VidliiIE(InfoExtractor): default=None, fatal=False)) categories = self._html_search_regex( - r'
Category:\s*<\/div>[\s\r]*
[\s\r]*]*>([^<]*?)', webpage, 'categories', default=None, fatal=False) tags = re.findall(r'([^<]*?)', get_element_by_class("w_views", webpage)) + view_count_fb = re.findall(r'([^<]*?)', get_element_by_class('w_views', webpage) or '') view_count_fb = view_count_fb[0] if view_count_fb else None - view_count = int_or_none(self._html_search_regex(r'Views:[^<]*([^<]*?)<\/strong>', webpage, 'view_count', + view_count = int_or_none(self._html_search_regex(r'Views:[^<]*([^<]*?)', webpage, 'view_count', default=None, fatal=False)) or int_or_none(view_count_fb) + comment_count_patterns = [r'Comments:[^<]*([^<]*?)', + r']+id="cmt_num"[^>]*>([^<]+?)'] comment_count = int_or_none( - self._html_search_regex(r'Comments:[^<]*([^<]*?)<\/strong>|]+id="cmt_num"[^>]*>([' - r'^<]+?)<\/span>', webpage, 'comment_count', - default=None, fatal=False)) + self._html_search_regex(comment_count_patterns, webpage, 'comment_count', default=None, fatal=False)) average_rating = float_or_none( - self._html_search_regex(r'{[\s\r]*\$\("#rateYo"\).rateYo\({[^}]*rating:\s*([^,]*?),[^}.]*}', + self._html_search_regex(r'rating:\s*([^,]*),', webpage, 'average_rating', default=None, fatal=False)) - thumbnail_link = videoInfo_dict.get("img") + thumbnail_link = videoInfo_dict.get('img') thumbnail = urljoin('https://www.vidlii.com/', thumbnail_link) - video_type = self._og_search_property('type', webpage, 'type') return { 'id': video_id, @@ -132,5 +124,4 @@ class VidliiIE(InfoExtractor): 'comment_count': comment_count, 'average_rating': average_rating, 'thumbnail': thumbnail, - 'type': video_type }