From 8e73a370f98e51c92a75475261b7c1421d8ca0af Mon Sep 17 00:00:00 2001 From: Jeff Buchbinder Date: Wed, 4 Mar 2015 15:02:13 -0500 Subject: [PATCH 1/4] [Primesharetv] Add primeshare.tv extractor, still need test data --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/primesharetv.py | 46 ++++++++++++++++++++++++++++ 2 files changed, 47 insertions(+) create mode 100644 youtube_dl/extractor/primesharetv.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index b7add0f89..faa93d4c2 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -373,6 +373,7 @@ from .pornhub import ( ) from .pornotube import PornotubeIE from .pornoxo import PornoXOIE +from .primesharetv import PrimesharetvIE from .promptfile import PromptFileIE from .prosiebensat1 import ProSiebenSat1IE from .puls4 import Puls4IE diff --git a/youtube_dl/extractor/primesharetv.py b/youtube_dl/extractor/primesharetv.py new file mode 100644 index 000000000..967125abc --- /dev/null +++ b/youtube_dl/extractor/primesharetv.py @@ -0,0 +1,46 @@ +# encoding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( + int_or_none, + parse_filesize, + unified_strdate, + urlencode_postdata, +) +from ..compat import ( + compat_urllib_request, +) + +class PrimesharetvIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?primeshare\.tv/download/(?P.*)(?:.*)' + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + self._sleep(9, video_id) + + hashtoken = self._search_regex(r' name="hash" value="(.*?)" ', webpage, 'hash token') + data = urlencode_postdata({ + 'hash': hashtoken, + }) + headers = { + 'Referer': url, + 'Content-Type': 'application/x-www-form-urlencoded', + } + video_page_request = compat_urllib_request.Request(url, data, headers=headers) + video_page = self._download_webpage(video_page_request, None, False, '') + + video_url = self._html_search_regex( + r'url: \'(http://l\.primeshare\.tv[^\']+)\',', video_page, 'video url') + + title = self._html_search_regex( + r'

Watch [^\(]+\(([^/)]+)\) ', video_page, 'title') + + return { + 'id': video_id, + 'url': video_url, + 'title': title, + 'ext': 'mp4', + } From 6980f235682388311ea65a891a422e39f339da2a Mon Sep 17 00:00:00 2001 From: Jeff Buchbinder Date: Wed, 4 Mar 2015 15:18:06 -0500 Subject: [PATCH 2/4] [Primesharetv] Add public domain example video --- youtube_dl/extractor/primesharetv.py | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/primesharetv.py b/youtube_dl/extractor/primesharetv.py index 967125abc..7c545761b 100644 --- a/youtube_dl/extractor/primesharetv.py +++ b/youtube_dl/extractor/primesharetv.py @@ -15,6 +15,19 @@ from ..compat import ( class PrimesharetvIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?primeshare\.tv/download/(?P.*)(?:.*)' + _TESTS = [ + { + 'url': 'http://primeshare.tv/download/238790B611', + 'md5': 'bb41f9f6c0dd434c729f04ce5b677192', + 'info_dict': { + 'id': '238790B611', + 'ext': 'mp4', + "title": "Public Domain - 1960s Commercial - Crest Toothpaste-YKsuFona [...]", + "duration": 10, + }, + } + ] + def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) @@ -33,7 +46,7 @@ class PrimesharetvIE(InfoExtractor): video_page = self._download_webpage(video_page_request, None, False, '') video_url = self._html_search_regex( - r'url: \'(http://l\.primeshare\.tv[^\']+)\',', video_page, 'video url') + r'url: \'(http://[a-z0-9]+\.primeshare\.tv:443/file/get/[^\']+)\',', video_page, 'video url') title = self._html_search_regex( r'

Watch [^\(]+\(([^/)]+)\) ', video_page, 'title') From 6aab6ef34c395a424533ed35837a613038b20b9a Mon Sep 17 00:00:00 2001 From: Jeff Buchbinder Date: Wed, 4 Mar 2015 15:27:51 -0500 Subject: [PATCH 3/4] [Primevideotv] Fix dirty branch (other module) --- youtube_dl/extractor/__init__.py | 1 - youtube_dl/extractor/megavideozeu.py | 39 ---------------------------- 2 files changed, 40 deletions(-) delete mode 100644 youtube_dl/extractor/megavideozeu.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index faa93d4c2..79ca8a0fe 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -262,7 +262,6 @@ from .macgamestore import MacGameStoreIE from .mailru import MailRuIE from .malemotion import MalemotionIE from .mdr import MDRIE -from .megavideozeu import MegavideozeuIE from .metacafe import MetacafeIE from .metacritic import MetacriticIE from .mgoon import MgoonIE diff --git a/youtube_dl/extractor/megavideozeu.py b/youtube_dl/extractor/megavideozeu.py deleted file mode 100644 index e77b5f734..000000000 --- a/youtube_dl/extractor/megavideozeu.py +++ /dev/null @@ -1,39 +0,0 @@ -# encoding: utf-8 -from __future__ import unicode_literals - -from .common import InfoExtractor -from ..utils import ( - int_or_none, - parse_filesize, - unified_strdate, -) - - -class MegavideozeuIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?megavideoz\.eu/video/(?P.*)(?:.*)' - - def _real_extract(self, url): - tmp_video_id = self._match_id(url) - - webpage = self._download_webpage(url, tmp_video_id) - - config_php = self._html_search_regex( - r'var cnf = \'([^\']+)\'', webpage, 'config.php url') - - configpage = self._download_webpage(config_php, tmp_video_id) - - video_id = self._html_search_regex( - r'([^<]+)', configpage, 'video id') - video_url = self._html_search_regex( - r'([^<]+)', configpage, 'video URL') - title = self._html_search_regex( - r'<!\[CDATA\[([^\]]+)', configpage, 'title') - duration = int_or_none(self._html_search_regex( - r'<duration>([0-9]+)', configpage, 'duration', fatal=False)) - - return { - 'id': video_id, - 'url': video_url, - 'title': title, - 'duration': duration - } From 576d4a4d8ad0d1d6ea3b74cde4e46871bae234c8 Mon Sep 17 00:00:00 2001 From: Jeff Buchbinder <jeff@ourexchange.net> Date: Wed, 4 Mar 2015 17:38:21 -0500 Subject: [PATCH 4/4] [Primesharetv] Handle file not existing properly. --- youtube_dl/extractor/primesharetv.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/primesharetv.py b/youtube_dl/extractor/primesharetv.py index 7c545761b..570fd2210 100644 --- a/youtube_dl/extractor/primesharetv.py +++ b/youtube_dl/extractor/primesharetv.py @@ -1,8 +1,11 @@ # encoding: utf-8 from __future__ import unicode_literals +import re + from .common import InfoExtractor from ..utils import ( + ExtractorError, int_or_none, parse_filesize, unified_strdate, @@ -31,10 +34,12 @@ class PrimesharetvIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) + if re.search(r'<h1>File not exist</h1>', webpage) is not None: + raise ExtractorError('The file does not exist', expected=True) + hashtoken = self._search_regex(r' name="hash" value="(.*?)" ', webpage, 'hash token') self._sleep(9, video_id) - hashtoken = self._search_regex(r' name="hash" value="(.*?)" ', webpage, 'hash token') data = urlencode_postdata({ 'hash': hashtoken, }) @@ -44,7 +49,6 @@ class PrimesharetvIE(InfoExtractor): } video_page_request = compat_urllib_request.Request(url, data, headers=headers) video_page = self._download_webpage(video_page_request, None, False, '') - video_url = self._html_search_regex( r'url: \'(http://[a-z0-9]+\.primeshare\.tv:443/file/get/[^\']+)\',', video_page, 'video url') @@ -57,3 +61,8 @@ class PrimesharetvIE(InfoExtractor): 'title': title, 'ext': 'mp4', } + + def _debug_print(self, txt): + if self._downloader.params.get('verbose'): + self.to_screen('[debug] %s' % txt) +