From 2190b6aaa1a65ad172f2e34382045b5753402cdc Mon Sep 17 00:00:00 2001 From: ealgase Date: Tue, 20 Nov 2018 17:16:19 -0500 Subject: [PATCH 1/5] [narando] Add new extractor --- youtube_dl/extractor/extractors.py | 3 +++ youtube_dl/extractor/narando.py | 42 ++++++++++++++++++++++++++++++ 2 files changed, 45 insertions(+) create mode 100644 youtube_dl/extractor/narando.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 87c7d8b0c..ee54ea1b3 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1480,3 +1480,6 @@ from .zattoo import ( from .zdf import ZDFIE, ZDFChannelIE from .zingmp3 import ZingMp3IE from .zype import ZypeIE + + +from .narando import NarandoIE diff --git a/youtube_dl/extractor/narando.py b/youtube_dl/extractor/narando.py new file mode 100644 index 000000000..a492c7b3f --- /dev/null +++ b/youtube_dl/extractor/narando.py @@ -0,0 +1,42 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor + +class NarandoIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?narando\.com/articles/(?P([a-zA-Z]|-)+)' + _TEST = { + 'url': 'https://narando.com/articles/an-ihrem-selbstlob-erkennt-man-sie', + 'md5': 'd20f671f0395bab8f8285d1f6e8f965e', + 'info_dict': { +# 'id': 'b2t4t789kxgy9g7ms4rwjvvw', was being used as id previously, is internal video id + 'id': 'an-ihrem-selbstlob-erkennt-man-sie', + 'ext': 'mp3', + 'title': 'An ihrem Selbstlob erkennt man sie', + 'url': 'https://static.narando.com/sounds/10492/original.mp3', + # TODO more properties, either as: + # * A value + # * MD5 checksum; start the string with md5: + # * A regular expression; start the string with re: + # * Any Python type (for example int or float) + } + } + + def _real_extract(self, url): + video_id = self._match_id(url) +# webpage = self._download_webpage(url,"?") +# print(url) +# print('https://narando.com/articles/'+video_id) + webpage = self._download_webpage('https://narando.com/articles/'+video_id+"?", video_id)#for some reason, this absolutely refused to work, so I'm negating the video_id and just adding it directly + # TODO more code goes here, for example ... + title = self._html_search_regex(r'

(.+?)

', webpage, 'title') +# print(title) + player_id = self._html_search_regex(" ".join(r'[\n\r].*https:\/\/narando.com\/r\/\s*([^"]*)'.split()), webpage, 'player_id') + player_page = self._download_webpage('https://narando.com/widget?r='+player_id+'&',player_id)#same as above + download_url = self._html_search_regex(r'.
\s*([^?]*)', player_page, 'mp3_ddl') + return { + 'id': video_id, + 'title': title, + 'url': download_url, + # TODO more properties (see youtube_dl/extractor/common.py) + } From a729d43d9e5c98035f7200182a4805e8aa4087fd Mon Sep 17 00:00:00 2001 From: ealgase Date: Tue, 20 Nov 2018 18:28:29 -0500 Subject: [PATCH 2/5] [narando] Add description support and improve code to meet youtube-dl's standards --- youtube_dl/extractor/narando.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/narando.py b/youtube_dl/extractor/narando.py index a492c7b3f..66a733597 100644 --- a/youtube_dl/extractor/narando.py +++ b/youtube_dl/extractor/narando.py @@ -14,6 +14,7 @@ class NarandoIE(InfoExtractor): 'ext': 'mp3', 'title': 'An ihrem Selbstlob erkennt man sie', 'url': 'https://static.narando.com/sounds/10492/original.mp3', + 'description': u'omnisophie.com: Kaum eine Woche vergeht, dass nicht jemand mir gegenüber seine Mathematik-Unkenntnisse tränenlos beweint. „In Mathe war ich niemals gut.“ Diese Leute sagen mir das wohl, weil ich Mathematiker bin, und da gehört so ein fröhliches „Understatement“ zum Small Talk. So wie wenn ich selbst bedauernd-entschuldigend auf meine grauen Haare zeige. Ich kann eben auch nicht alles bieten... „Mathe kann ich nicht“, „Ich habe kein Internet“ oder „Ich will auch bewusst nicht alles können“ wird fast wie Eigenlob vorgetragen.', # TODO more properties, either as: # * A value # * MD5 checksum; start the string with md5: @@ -27,16 +28,18 @@ class NarandoIE(InfoExtractor): # webpage = self._download_webpage(url,"?") # print(url) # print('https://narando.com/articles/'+video_id) - webpage = self._download_webpage('https://narando.com/articles/'+video_id+"?", video_id)#for some reason, this absolutely refused to work, so I'm negating the video_id and just adding it directly + webpage = self._download_webpage('https://narando.com/articles/'+video_id, video_id) # TODO more code goes here, for example ... title = self._html_search_regex(r'

(.+?)

', webpage, 'title') # print(title) player_id = self._html_search_regex(" ".join(r'[\n\r].*https:\/\/narando.com\/r\/\s*([^"]*)'.split()), webpage, 'player_id') - player_page = self._download_webpage('https://narando.com/widget?r='+player_id+'&',player_id)#same as above - download_url = self._html_search_regex(r'.
\s*([^?]*)', player_page, 'mp3_ddl') + player_page = self._download_webpage('https://narando.com/widget?r='+player_id, player_id) + download_url = self._html_search_regex(r'.
\s*([^?]*)', player_page, 'download_url') + description = self._html_search_regex(ur'', webpage, 'description') return { 'id': video_id, 'title': title, 'url': download_url, + 'description': description, # TODO more properties (see youtube_dl/extractor/common.py) } From fed1f5ee0f9dccc2bc71a251f0c5e7cd33c80c92 Mon Sep 17 00:00:00 2001 From: ealgase Date: Tue, 20 Nov 2018 18:38:08 -0500 Subject: [PATCH 3/5] [narando] fix flake8 issues --- youtube_dl/extractor/narando.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/narando.py b/youtube_dl/extractor/narando.py index 66a733597..bdb36f3e9 100644 --- a/youtube_dl/extractor/narando.py +++ b/youtube_dl/extractor/narando.py @@ -3,13 +3,13 @@ from __future__ import unicode_literals from .common import InfoExtractor + class NarandoIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?narando\.com/articles/(?P([a-zA-Z]|-)+)' _TEST = { 'url': 'https://narando.com/articles/an-ihrem-selbstlob-erkennt-man-sie', 'md5': 'd20f671f0395bab8f8285d1f6e8f965e', 'info_dict': { -# 'id': 'b2t4t789kxgy9g7ms4rwjvvw', was being used as id previously, is internal video id 'id': 'an-ihrem-selbstlob-erkennt-man-sie', 'ext': 'mp3', 'title': 'An ihrem Selbstlob erkennt man sie', @@ -28,14 +28,14 @@ class NarandoIE(InfoExtractor): # webpage = self._download_webpage(url,"?") # print(url) # print('https://narando.com/articles/'+video_id) - webpage = self._download_webpage('https://narando.com/articles/'+video_id, video_id) + webpage = self._download_webpage('https://narando.com/articles/' + video_id, video_id) # TODO more code goes here, for example ... title = self._html_search_regex(r'

(.+?)

', webpage, 'title') # print(title) player_id = self._html_search_regex(" ".join(r'[\n\r].*https:\/\/narando.com\/r\/\s*([^"]*)'.split()), webpage, 'player_id') - player_page = self._download_webpage('https://narando.com/widget?r='+player_id, player_id) + player_page = self._download_webpage('https://narando.com/widget?r=' + player_id, player_id) download_url = self._html_search_regex(r'.
\s*([^?]*)', player_page, 'download_url') - description = self._html_search_regex(ur'', webpage, 'description') + description = self._html_search_regex(r'', webpage, 'description') return { 'id': video_id, 'title': title, From d33506b6d754807abaa11566287114cebe9109d9 Mon Sep 17 00:00:00 2001 From: ealgase Date: Tue, 20 Nov 2018 20:39:07 -0500 Subject: [PATCH 4/5] [narando] Fix bad method of extracting player_id --- youtube_dl/extractor/narando.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/narando.py b/youtube_dl/extractor/narando.py index bdb36f3e9..78282e1b8 100644 --- a/youtube_dl/extractor/narando.py +++ b/youtube_dl/extractor/narando.py @@ -32,7 +32,7 @@ class NarandoIE(InfoExtractor): # TODO more code goes here, for example ... title = self._html_search_regex(r'

(.+?)

', webpage, 'title') # print(title) - player_id = self._html_search_regex(" ".join(r'[\n\r].*https:\/\/narando.com\/r\/\s*([^"]*)'.split()), webpage, 'player_id') + player_id = self._html_search_regex(r'[\n\r].*https:\/\/narando.com\/r\/\s*([^"]*)', webpage, 'player_id') player_page = self._download_webpage('https://narando.com/widget?r=' + player_id, player_id) download_url = self._html_search_regex(r'.
\s*([^?]*)', player_page, 'download_url') description = self._html_search_regex(r'', webpage, 'description') From 92ae267c88520d95cc56198eb8f6f389778c32a0 Mon Sep 17 00:00:00 2001 From: ealgase Date: Tue, 20 Nov 2018 22:16:41 -0500 Subject: [PATCH 5/5] [narando] seperate [narando:player] extractor, improve code readability --- youtube_dl/extractor/extractors.py | 2 +- youtube_dl/extractor/narando.py | 48 ++++++++++++++++++++++-------- 2 files changed, 36 insertions(+), 14 deletions(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index ee54ea1b3..b7cca0c25 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1482,4 +1482,4 @@ from .zingmp3 import ZingMp3IE from .zype import ZypeIE -from .narando import NarandoIE +from .narando import NarandoIE, NarandoPlayerIE diff --git a/youtube_dl/extractor/narando.py b/youtube_dl/extractor/narando.py index 78282e1b8..6673b8007 100644 --- a/youtube_dl/extractor/narando.py +++ b/youtube_dl/extractor/narando.py @@ -5,6 +5,7 @@ from .common import InfoExtractor class NarandoIE(InfoExtractor): + IE_NAME = "narando" _VALID_URL = r'https?://(?:www\.)?narando\.com/articles/(?P([a-zA-Z]|-)+)' _TEST = { 'url': 'https://narando.com/articles/an-ihrem-selbstlob-erkennt-man-sie', @@ -15,31 +16,52 @@ class NarandoIE(InfoExtractor): 'title': 'An ihrem Selbstlob erkennt man sie', 'url': 'https://static.narando.com/sounds/10492/original.mp3', 'description': u'omnisophie.com: Kaum eine Woche vergeht, dass nicht jemand mir gegenüber seine Mathematik-Unkenntnisse tränenlos beweint. „In Mathe war ich niemals gut.“ Diese Leute sagen mir das wohl, weil ich Mathematiker bin, und da gehört so ein fröhliches „Understatement“ zum Small Talk. So wie wenn ich selbst bedauernd-entschuldigend auf meine grauen Haare zeige. Ich kann eben auch nicht alles bieten... „Mathe kann ich nicht“, „Ich habe kein Internet“ oder „Ich will auch bewusst nicht alles können“ wird fast wie Eigenlob vorgetragen.', - # TODO more properties, either as: - # * A value - # * MD5 checksum; start the string with md5: - # * A regular expression; start the string with re: - # * Any Python type (for example int or float) } } def _real_extract(self, url): video_id = self._match_id(url) -# webpage = self._download_webpage(url,"?") -# print(url) -# print('https://narando.com/articles/'+video_id) + webpage = self._download_webpage('https://narando.com/articles/' + video_id, video_id) - # TODO more code goes here, for example ... + title = self._html_search_regex(r'

(.+?)

', webpage, 'title') -# print(title) + player_id = self._html_search_regex(r'[\n\r].*https:\/\/narando.com\/r\/\s*([^"]*)', webpage, 'player_id') - player_page = self._download_webpage('https://narando.com/widget?r=' + player_id, player_id) - download_url = self._html_search_regex(r'.
\s*([^?]*)', player_page, 'download_url') + mobj = NarandoPlayerIE() + download_url = mobj._real_extract("https://narando.com/widget?r=" + player_id)['url'] description = self._html_search_regex(r'', webpage, 'description') return { 'id': video_id, 'title': title, 'url': download_url, 'description': description, - # TODO more properties (see youtube_dl/extractor/common.py) + } + + +class NarandoPlayerIE(InfoExtractor): + IE_NAME = "narando:player" + _VALID_URL = r'https://narando.com/widget\?r=(?P\w+)' + _TEST = { + 'url': 'https://narando.com/widget?r=b2t4t789kxgy9g7ms4rwjvvw', + 'md5': 'd20f671f0395bab8f8285d1f6e8f965e', + 'info_dict': { + 'id': 'b2t4t789kxgy9g7ms4rwjvvw', + 'ext': 'mp3', + 'title': 'An ihrem Selbstlob erkennt man sie', + 'url': 'https://static.narando.com/sounds/10492/original.mp3', + } + } + + def _real_extract(self, url): + video_id = self._match_id(url) + print(video_id) + webpage = self._download_webpage('https://narando.com/widget?r=' + video_id, video_id) + print(webpage) + title = self._html_search_regex(r'narando \| (.+?)', webpage, 'title') + + download_url = self._html_search_regex(r'.
\s*([^?]*)', webpage, 'download_url') + return { + 'id': video_id, + 'title': title, + 'url': download_url, }