From 6403be9611ba31176760cab959652a5b88dbfaae Mon Sep 17 00:00:00 2001 From: Hormoz K Date: Sat, 4 Aug 2018 10:32:24 -0400 Subject: [PATCH] add mp3 support for RJ --- docs/supportedsites.md | 3 +- youtube_dl/extractor/extractors.py | 5 +- youtube_dl/extractor/radiojavan.py | 84 ++++++++++++++++++++---------- 3 files changed, 62 insertions(+), 30 deletions(-) diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 4bf2ec81b..c597ba2d4 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -690,7 +690,8 @@ - **radiocanada** - **RadioCanadaAudioVideo** - **radiofrance** - - **RadioJavan** + - **RadioJavanMp3** + - **RadioJavanVideo** - **Rai** - **RaiPlay** - **RaiPlayLive** diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index c7a91a986..7810d3acf 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -886,7 +886,10 @@ from .radiocanada import ( RadioCanadaAudioVideoIE, ) from .radiode import RadioDeIE -from .radiojavan import RadioJavanIE +from .radiojavan import ( + RadioJavanVideoIE, + RadioJavanMp3IE, +) from .radiobremen import RadioBremenIE from .radiofrance import RadioFranceIE from .rai import ( diff --git a/youtube_dl/extractor/radiojavan.py b/youtube_dl/extractor/radiojavan.py index 4124bcd45..f9b48f4b7 100644 --- a/youtube_dl/extractor/radiojavan.py +++ b/youtube_dl/extractor/radiojavan.py @@ -10,44 +10,23 @@ from ..utils import ( ) -class RadioJavanIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?radiojavan\.com/videos/video/(?P[^/]+)/?' - _HOST_TRACKER_URL = 'https://www.radiojavan.com/videos/video_host' - _TEST = { - 'url': 'http://www.radiojavan.com/videos/video/chaartaar-ashoobam', - 'md5': 'e85208ffa3ca8b83534fca9fe19af95b', - 'info_dict': { - 'id': 'chaartaar-ashoobam', - 'ext': 'mp4', - 'title': 'Chaartaar - Ashoobam', - 'thumbnail': r're:^https?://.*\.jpe?g$', - 'upload_date': '20150215', - 'view_count': int, - 'like_count': int, - 'dislike_count': int, - } - } - +class RadioJavanBaseIE(InfoExtractor): def _real_extract(self, url): - video_id = self._match_id(url) + media_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) + webpage = self._download_webpage(url, media_id) download_host = self._download_json( self._HOST_TRACKER_URL, - video_id, - data=urlencode_postdata({'id': video_id}), + media_id, + data=urlencode_postdata({'id': media_id}), headers={ 'Content-Type': 'application/x-www-form-urlencoded', 'Referer': url, } )['host'] - formats = [{ - 'url': '%s/%s' % (download_host, video_path), - 'format_id': '%sp' % height, - 'height': int(height), - } for height, video_path in re.findall(r"RJ\.video(\d+)p\s*=\s*'/?([^']+)'", webpage)] + formats = self.get_formats(webpage, download_host) self._sort_formats(formats) title = self._og_search_title(webpage) @@ -68,7 +47,7 @@ class RadioJavanIE(InfoExtractor): webpage, 'dislike count', fatal=False)) return { - 'id': video_id, + 'id': media_id, 'title': title, 'thumbnail': thumbnail, 'upload_date': upload_date, @@ -77,3 +56,52 @@ class RadioJavanIE(InfoExtractor): 'dislike_count': dislike_count, 'formats': formats, } + + +class RadioJavanVideoIE(RadioJavanBaseIE): + _VALID_URL = r'https?://(?:www\.)?radiojavan\.com/videos/video/(?P[^/]+)/?' + _HOST_TRACKER_URL = 'https://www.radiojavan.com/videos/video_host' + _TEST = { + 'url': 'http://www.radiojavan.com/videos/video/chaartaar-ashoobam', + 'md5': 'e85208ffa3ca8b83534fca9fe19af95b', + 'info_dict': { + 'id': 'chaartaar-ashoobam', + 'ext': 'mp4', + 'title': 'Chaartaar - Ashoobam', + 'thumbnail': r're:^https?://.*\.jpe?g$', + 'upload_date': '20150215', + 'view_count': int, + 'like_count': int, + 'dislike_count': int, + } + } + + def get_formats(self, webpage, download_host): + return [{ + 'url': '%s/%s' % (download_host, video_path), + 'format_id': '%sp' % height, + 'height': int(height), + } for height, video_path in re.findall(r"RJ\.video(\d+)p\s*=\s*'/?([^']+)'", webpage)] + + +class RadioJavanMp3IE(RadioJavanBaseIE): + _VALID_URL = r'https?://(?:www\.)?radiojavan\.com/mp3s/mp3/(?P[^/?]+)/?' + _HOST_TRACKER_URL = 'https://www.radiojavan.com/mp3s/mp3_host' + _TEST = { + 'url': 'https://www.radiojavan.com/mp3s/mp3/Mazyar-Fallahi-Baran-Fallahi-Begoo-Boro', + 'md5': '9601a5a94ced3a2f772f8d18170a8920', + 'info_dict': { + 'id': 'Mazyar-Fallahi-Baran-Fallahi-Begoo-Boro', + 'ext': 'mp3', + 'title': 'Mazyar Fallahi & Baran Fallahi - Begoo Boro', + 'thumbnail': r're:^https?://.*\.jpe?g$', + 'upload_date': '20180729', + 'view_count': int, + 'like_count': int, + 'dislike_count': int, + } + } + + def get_formats(self, webpage, download_host): + mp3_path = re.findall(r"RJ\.currentMP3Url\s*=\s*'/?([^']+)'", webpage)[0] + return [{'url': '%s/media/%s.mp3' % (download_host, mp3_path)}]