diff --git a/youtube_dl/extractor/vidlii.py b/youtube_dl/extractor/vidlii.py index 6be0fb374..e7638d562 100644 --- a/youtube_dl/extractor/vidlii.py +++ b/youtube_dl/extractor/vidlii.py @@ -7,7 +7,7 @@ from .common import InfoExtractor from ..utils import ( int_or_none, get_element_by_id, get_element_by_class, strip_or_none, - float_or_none, urljoin) + float_or_none, urljoin, js_to_json, unified_strdate) class VidliiIE(InfoExtractor): @@ -63,51 +63,58 @@ class VidliiIE(InfoExtractor): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) + # extract videoInfo variable for further use + videoInfo_dict = self._parse_json(js_to_json(self._html_search_regex(r'var videoInfo\s*=\s*({[^}]*})', webpage, + 'videoInfo', fatal=False)), video_id) + # extract basic properties of video - title = self._html_search_regex(r'

(.+?)

', webpage, 'title', default=None) or self._html_search_regex( - r'([^<]+?)', webpage, 'title', default="").replace( - " - VidLii", "") or self._html_search_meta('twitter:title', webpage, 'title', - default="").replace(" - VidLii", "") + title = (self._html_search_regex(r'([^<]+?)', webpage, 'title', default='') or + self._html_search_meta('twitter:title', webpage, 'title', default='')).replace(" - VidLii", + "") or self._html_search_regex( + r'

(.+?)

', webpage, 'title', default=None) description = strip_or_none(get_element_by_id('des_text', webpage)) - uploader = self._html_search_regex(r']+class="wt_person"[^>]*>(?:[^<]+)]+?class=["\']avt2\s*["\'][^>]+?alt=["\']([^"\']+?)["\']', - webpage, 'uploader', default=None) + uploader = self._html_search_regex( + r']+class="wt_person"[^>]*>(?:[^<]+)]+?class=(' + r'?:"avt2\s*"|\'avt2\s*\')[^>]+?alt=(?:"([^"]+?)"|\'([^\']+?)\')>', webpage, 'uploader', default=None, + fatal=False) - url = self._html_search_regex(r'videoInfo[\s]*=[\s]*{[^}]*src:[\s]*(?:"|\')([^"]*?)(?:"|\')', webpage, 'url', - default=None) + video_url = videoInfo_dict.get("src") # get additional properties - uploader_url = urljoin("https://www.vidlii.com/user/", uploader) + uploader_url = urljoin('https://www.vidlii.com/user/', uploader) # returns date as YYYYMMDD - upload_date = self._html_search_meta('datePublished', webpage, 'upload_date', default="").replace("-", "") + upload_date = unified_strdate( + self._html_search_meta('datePublished', webpage, 'upload_date', default=None, + fatal=False) or self._html_search_regex(r'(['r'^<]*?)', webpage, + 'upload_date', + default=None, fatal=False)) categories = self._html_search_regex( r'
Category:\s*<\/div>[\s\r]*
[\s\r]*]*>[\s]*([^<]*)', webpage) or None - duration = int_or_none(self._html_search_meta('video:duration', webpage, 'duration', default=False)) or int_or_none( - self._html_search_regex(r'videoInfo[^=]*=[^{]*{[^}]*dur:([^,}]*?),', webpage, 'duration', default=None)) + default=None, fatal=False) + tags = re.findall(r'([^<]*?)', get_element_by_class("w_views", webpage)) view_count_fb = view_count_fb[0] if view_count_fb else None view_count = int_or_none(self._html_search_regex(r'Views:[^<]*([^<]*?)<\/strong>', webpage, 'view_count', - default=None)) or int_or_none(view_count_fb) + default=None, fatal=False)) or int_or_none(view_count_fb) comment_count = int_or_none( - self._html_search_regex(r'Comments:[^<]*([^<]*?)<\/strong>', webpage, 'comment_count', - default=None) or - self._html_search_regex(r']+id="cmt_num"[^>]*>([^<]+?)<\/span>', webpage, 'comment_count', default=None)) + self._html_search_regex(r'Comments:[^<]*([^<]*?)<\/strong>|]+id="cmt_num"[^>]*>([' + r'^<]+?)<\/span>', webpage, 'comment_count', + default=None, fatal=False)) average_rating = float_or_none( self._html_search_regex(r'{[\s\r]*\$\("#rateYo"\).rateYo\({[^}]*rating:\s*([^,]*?),[^}.]*}', - webpage, 'average_rating', default=None)) - thumbnail_link = self._html_search_regex(r'videoInfo[\s]*=[\s]*{[^}]*img:[\s]*(?:"|\')([^"]*?)(?:"|\')', webpage, - 'thumbnail', default=None) - thumbnail = urljoin("https://www.vidlii.com/", thumbnail_link) + webpage, 'average_rating', default=None, fatal=False)) + thumbnail_link = videoInfo_dict.get("img") + thumbnail = urljoin('https://www.vidlii.com/', thumbnail_link) video_type = self._og_search_property('type', webpage, 'type') return { @@ -115,7 +122,7 @@ class VidliiIE(InfoExtractor): 'title': title, 'description': description, 'uploader': uploader, - 'url': url, + 'url': video_url, 'uploader_url': uploader_url, 'upload_date': upload_date, 'categories': categories,