From 4b0f34d56761edf5b619c2a0553cb821cc17fa29 Mon Sep 17 00:00:00 2001 From: "M.Yasoob Khalid" Date: Fri, 28 Jun 2013 10:34:01 +0500 Subject: [PATCH] Added an IE for gamespot. Although gamespot allows downloading but it is only available to registered users. With this IE no registration is required. --- youtube_dl/extractor/__init__.py | 2 ++ youtube_dl/extractor/gamespot.py | 34 ++++++++++++++++++++++++++++++++ 2 files changed, 36 insertions(+) create mode 100644 youtube_dl/extractor/gamespot.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index a9aa7e506..1032dd1d4 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -15,6 +15,7 @@ from .escapist import EscapistIE from .facebook import FacebookIE from .flickr import FlickrIE from .funnyordie import FunnyOrDieIE +from .gamespot import GameSpotIE from .gametrailers import GametrailersIE from .generic import GenericIE from .googleplus import GooglePlusIE @@ -140,6 +141,7 @@ def gen_extractors(): WimpIE(), HotNewHipHopIE(), AUEngineIE(), + GameSpotIE(), GenericIE() ] diff --git a/youtube_dl/extractor/gamespot.py b/youtube_dl/extractor/gamespot.py new file mode 100644 index 000000000..37d7df8f5 --- /dev/null +++ b/youtube_dl/extractor/gamespot.py @@ -0,0 +1,34 @@ +import re + +from .common import InfoExtractor + + +class GameSpotIE(InfoExtractor): + _VALID_URL = r'(?:http://)?(?:www\.)?gamespot\.com/([^/]+)/videos/([^/]+)-([^/d]+)/' + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group(3).split("-")[-1] + webpage = self._download_webpage(url, video_id) + title = self._search_regex(r'(.+?)', + webpage, 'video title').replace('- GameSpot.com','') + upload_date = self._search_regex(r"'publish_date':'([^/d]+)','edid'", + webpage, 'upload date') + description = self._search_regex(r'', + webpage, 'video Description') + info_url = "http://www.gamespot.com/pages/video_player/xml.php?id="+str(video_id) + info_webpage = self._download_webpage(info_url, video_id , "Downloading info webpage") + final_url = self._search_regex(r"(.+?)", + info_webpage, 'download url') + thumbnail_url = self._search_regex(r'(.+?)', + info_webpage, 'download url') + ext = final_url.split('.')[-1] + return [{ + 'id' : video_id, + 'url' : final_url, + 'ext' : ext, + 'title' : title, + 'thumbnail' : thumbnail_url, + 'upload_date' : upload_date, + 'description' : description, + }]