diff --git a/youtube_dl/extractor/go90.py b/youtube_dl/extractor/go90.py index cf51d7308..c4322cdc7 100644 --- a/youtube_dl/extractor/go90.py +++ b/youtube_dl/extractor/go90.py @@ -1,10 +1,9 @@ # coding: utf-8 from __future__ import unicode_literals -import re -import urllib #DEBUG - from .common import InfoExtractor +from .uplynk import UplynkPreplayIE +from ..utils import sanitize_url class Go90IE(InfoExtractor): @@ -16,7 +15,7 @@ class Go90IE(InfoExtractor): 'id': '07d47f43a7b04eb5b693252f2bd1086b', 'ext': 'mp4', 'title': 't@gged S1:E1 #shotgun', - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', # TODO more properties, either as: # * A value # * MD5 checksum; start the string with md5: @@ -29,52 +28,31 @@ class Go90IE(InfoExtractor): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - # TODO more code goes here, for example ... - #title = self._html_search_regex(r'

(.+?)

', webpage, 'title') - - series_title = self._html_search_regex(r']* data-reactid="90">(.+?)', webpage, 'series_title') - season_episode_numbers = self._html_search_regex(r'(.+?)', webpage, 'season_episode_numbers') - episode_title = self._html_search_regex(r'(.+?)', webpage, 'episode_title') - - title = series_title + " " + season_episode_numbers + " " + episode_title - #print "[!!!] " + title - - #page_data_json = self._search_regex(r']*>window\.__data=(.+?);\s*', webpage, 'page_data', flags=re.DOTALL) - #print self.transform_source(page_data_json) - #page_data = self._parse_json(page_data_json, video_id, transform_source=self.transform_source) - - - - video_api = "https://www.go90.com/api/metadata/video/" + video_id - - video_api_data = self._download_json(video_api, video_id) #TODO: overwrite `note=` to output better explanation - #print "[!!!] " + video_api_data['url'] - - video_token_url = re.sub(r'^//', 'https://', video_api_data['url']) #TODO: use utils.sanitize_url() - #print "[!!!] " + video_token_url - - video_token_data = self._download_json(video_token_url, video_id) #TODO: overwrite `note=` to output better explanation - #print "[!!!] " + video_token_data['playURL'] - - m3u8_url = video_token_data['playURL'] - - #DEBUG - testfile = urllib.URLopener() - testfile.retrieve(m3u8_url, video_id + ".m3u8") - #/DEBUG - - formats = [] - formats.extend(self._extract_m3u8_formats( - m3u8_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)) - return { - 'id': video_id, - 'title': title, - 'description': self._og_search_description(webpage), - 'formats': formats, - #'uploader': self._search_regex(r']+id="uploader"[^>]*>([^<]+)<', webpage, 'uploader', fatal=False), - # TODO more properties (see youtube_dl/extractor/common.py) - } - - #def transform_source(self, json_string): - # return re.sub(re.sub(r':function.*?},([\[{"])', ':"",\g<1>', json_string, flags=re.DOTALL) \ No newline at end of file + # scrape data from webpage + page_data = {} + self.to_screen("Scrape data from webpage") + + page_data['id'] = video_id + + video_title = self._html_search_regex( + r']*>\s*(.*)\s*', webpage, 'title') + page_data['title'] = video_title + self.to_screen("Title: " + page_data['title']) + + + # retrieve upLynk data + video_api = "https://www.go90.com/api/metadata/video/" + video_id + video_api_data = self._download_json(video_api, video_id) #TODO: overwrite `note=` to output better explanation + video_token_url = sanitize_url(video_api_data['url']) + + uplynk_preplay = UplynkPreplayIE(self._downloader) + uplynk_data = uplynk_preplay.extract(video_token_url) + + + # merge data + video_data = uplynk_data.copy() + video_data.update(page_data) + # TODO more properties (see youtube_dl/extractor/common.py) + + return video_data