diff --git a/youtube_dl/extractor/genvideos.py b/youtube_dl/extractor/genvideos.py index eb976ca9e..0ba035eda 100644 --- a/youtube_dl/extractor/genvideos.py +++ b/youtube_dl/extractor/genvideos.py @@ -3,17 +3,25 @@ from __future__ import unicode_literals from .common import InfoExtractor +import requests +import json +from urlparse import parse_qs, urlparse + class GenVideosIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?yourextractor\.com/watch/(?P[0-9]+)' + _VALID_URL = r'https?://(?:www\.)?genvideos.org/watch\?v=(?P\w+)#?' #Tests only the basic url format. Example - https://genvideos.org/watch?v=kMjlhMWE5OT + # TODO check for other possible url formats also + # For example + # * http://genvideos.com/watch_kMjlhMWE5OT.html#video=tBa-Q-WkbPqwzs34b7ArqU7VomQMb2n-RAlARWKWKTI + # * http://genvideos.org/watch_kMjlhMWE5OT.html#video=tBa-Q-WkbPqwzs34b7ArqU7VomQMb2n-RAlARWKWKTI _TEST = { - 'url': 'http://yourextractor.com/watch/42', + 'url': 'http://genvideos.org/watch?v=kMjlhMWE5OT', 'md5': 'TODO: md5 sum of the first 10241 bytes of the video file (use --test)', 'info_dict': { - 'id': '42', + 'id': 'kMjlhMWE5OT', 'ext': 'mp4', - 'title': 'Video title goes here', - 'thumbnail': 're:^https?://.*\.jpg$', + 'title': 'The Hunger Games (2012) - HD 1080p', + #'thumbnail': 're:^https?://.*\.jpg$', # TODO more properties, either as: # * A value # * MD5 checksum; start the string with md5: @@ -26,13 +34,25 @@ class GenVideosIE(InfoExtractor): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - # TODO more code goes here, for example ... title = self._html_search_regex(r'

(.+?)

', webpage, 'title') + #TODO retrieve video url + urls_data = requests.post( + "https://genvideos.org/video_info/iframe", + data={'v':video_id}, + headers={'referer': 'https://genvideos.org/'} + ) #returns json containing the url of the video (in 360p, 720p and 1080p). + #For example - {"360":"\/\/html5player.org\/embed?url=https%3A%2F%2Flh3.googleusercontent.com%2FW6-SNGaDLWNyucD3pMqa1uMBapGDbtMTOtwpXrEu-w%3Dm18","720":"\/\/html5player.org\/embed?url=https%3A%2F%2Flh3.googleusercontent.com%2FW6-SNGaDLWNyucD3pMqa1uMBapGDbtMTOtwpXrEu-w%3Dm22","1080":"\/\/html5player.org\/embed?url=https%3A%2F%2Flh3.googleusercontent.com%2FW6-SNGaDLWNyucD3pMqa1uMBapGDbtMTOtwpXrEu-w%3Dm37"} + urls_data_json = json.loads(r.text) + _360p_url = parse_qs(urlparse(urls_data_json['360']).query)['url'] + # TODO : return all possible formats instead of just 360p + return { 'id': video_id, 'title': title, - 'description': self._og_search_description(webpage), - 'uploader': self._search_regex(r']+id="uploader"[^>]*>([^<]+)<', webpage, 'uploader', fatal=False), + 'url': _360p_url + #'description': self._og_search_description(webpage), + #'uploader': self._search_regex(r']+id="uploader"[^>]*>([^<]+)<', webpage, 'uploader', fatal=False), # TODO more properties (see youtube_dl/extractor/common.py) - } \ No newline at end of file + } + \ No newline at end of file