diff --git a/youtube_dl/extractor/vidlii.py b/youtube_dl/extractor/vidlii.py index acdf0a687..c69d44a7c 100644 --- a/youtube_dl/extractor/vidlii.py +++ b/youtube_dl/extractor/vidlii.py @@ -1,11 +1,16 @@ # coding: utf-8 from __future__ import unicode_literals +import re + +from ..utils import ( + int_or_none, + get_element_by_id) from .common import InfoExtractor class VidliiIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?yourextractor\.com/watch/(?P[0-9]+)' + _VALID_URL = r'(?:https*?:\/\/)*(?:www\.)*vidlii.com\/watch\?v=(?P[^?\s]{11})' _TEST = { 'url': 'https://yourextractor.com/watch/42', 'md5': 'TODO: md5 sum of the first 10241 bytes of the video file (use --test)', @@ -23,16 +28,52 @@ class VidliiIE(InfoExtractor): } def _real_extract(self, url): + # get required video properties video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - - # TODO more code goes here, for example ... title = self._html_search_regex(r'

(.+?)

', webpage, 'title') + description = get_element_by_id('des_text', webpage).strip() + uploader = self._html_search_regex( + r']+class="wt_person"[^>]*>(?:[^<]+)Category:\s*<\/div>[\s\r]*
[\s\r]*]*>[\s]*([^<]*)', webpage) + duration = int_or_none(self._html_search_meta('video:duration', webpage, 'duration', default=False)) + view_count = int_or_none( + self._html_search_regex(r']+class="w_views"[^>]*>([^<]+?)<\/strong>', webpage, + 'view_count')) + comment_count = int_or_none(self._html_search_regex(r']+id="cmt_num"[^>]*>([^<]+?)<\/span>', webpage, + 'comment_count')) + average_rating = int_or_none( + self._html_search_regex(r'{[\s\r]*\$\("#rateYo"\).rateYo\({[^}]*rating:\s*([0-9]*?),[^}]*}', + webpage, 'average_rating')) + thumbnail_link = self._html_search_regex(r'videoInfo[\s]*=[\s]*{[^}]*img:[\s]*(?:"|\')([^"]*?)(?:"|\')', + webpage, 'thumbnail') + thumbnail = 'https://www.vidlii.com%s' % thumbnail_link + type = self._og_search_property('type', webpage, 'type') + + # use youtube-dl --print-json to show extracted metadata or debugger (watch value) return { 'id': video_id, 'title': title, - 'description': self._og_search_description(webpage), - 'uploader': self._search_regex(r']+id="uploader"[^>]*>([^<]+)<', webpage, 'uploader', fatal=False), - # TODO more properties (see youtube_dl/extractor/common.py) - } \ No newline at end of file + 'description': description, + 'uploader': uploader, + 'url': url, + 'uploader_url': uploader_url, + 'upload_date': upload_date, # should we use release_date instead? + 'categories': categories, + 'tags': tags, + 'duration': duration, + 'view_count': view_count, + 'comment_count': comment_count, + 'average_rating': average_rating, + 'thumbnail': thumbnail, + 'type': type + }