From 45f72f2cd0b38da22e59bcfa54bcf3d122b9b31d Mon Sep 17 00:00:00 2001 From: Ashwin Dhakaita Date: Sun, 4 Nov 2018 07:55:51 +0530 Subject: [PATCH 1/4] Added extractor for News18.com --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/news18.py | 23 +++++++++++++++++++++++ 2 files changed, 24 insertions(+) create mode 100644 youtube_dl/extractor/news18.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 17b576df3..917dd7240 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -708,6 +708,7 @@ from .newgrounds import ( NewgroundsIE, NewgroundsPlaylistIE, ) +from .news18 import News18IE from .newstube import NewstubeIE from .nextmedia import ( NextMediaIE, diff --git a/youtube_dl/extractor/news18.py b/youtube_dl/extractor/news18.py new file mode 100644 index 000000000..0f0e3b94e --- /dev/null +++ b/youtube_dl/extractor/news18.py @@ -0,0 +1,23 @@ +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor + +class News18IE(InfoExtractor): + _VALID_URL = r'''https?:\/\/www\.news18\.com[a-zA-Z0-9_\/-]+-(?P\d+)\.html''' + + def _real_extract(self, url): + IE_NAME = 'News18' + video_id = self._match_id(url) + webpage = self._download_webpage(url,video_id) + video_url = self._search_regex(r'(?Phttps?:\/\/vodpd\.news18\.com[\/\w_-]+\.mp4)', webpage, 'video URL',default='') + title = self._og_search_title(webpage) + + return { + 'url': video_url, + 'id': video_id, + 'title': title, + 'ext': '.mp4' + } + From f3bffc9ec623583bd52454d9e73109c3ba88720e Mon Sep 17 00:00:00 2001 From: Ashwin Dhakaita Date: Sun, 4 Nov 2018 09:38:58 +0530 Subject: [PATCH 2/4] [news18] Add new extractor --- youtube_dl/extractor/news18.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/news18.py b/youtube_dl/extractor/news18.py index 0f0e3b94e..241f066c0 100644 --- a/youtube_dl/extractor/news18.py +++ b/youtube_dl/extractor/news18.py @@ -6,6 +6,15 @@ from .common import InfoExtractor class News18IE(InfoExtractor): _VALID_URL = r'''https?:\/\/www\.news18\.com[a-zA-Z0-9_\/-]+-(?P\d+)\.html''' + _TEST = { + 'url' : 'https://www.news18.com/news/ivideos/inside-naxal-bastion-news18-visits-the-villages-voting-first-time-ever-1928149.html', + 'md5' : 'cb5a78310f3e583da5ba0de38b450938', + 'info_dict': { + 'id': '1928149', + 'ext': 'mp4', + 'title': 'Inside Naxal Bastion: News18 Visits The Villages Voting First Time Ever', + } + } def _real_extract(self, url): IE_NAME = 'News18' @@ -14,10 +23,13 @@ class News18IE(InfoExtractor): video_url = self._search_regex(r'(?Phttps?:\/\/vodpd\.news18\.com[\/\w_-]+\.mp4)', webpage, 'video URL',default='') title = self._og_search_title(webpage) + print(video_url) + print(video_id) + print(title) return { 'url': video_url, 'id': video_id, 'title': title, - 'ext': '.mp4' + 'ext': 'mp4' } From 3819e62a61d8029ae78a42af4a453395e65b0c6d Mon Sep 17 00:00:00 2001 From: Ashwin Dhakaita Date: Sun, 4 Nov 2018 09:48:36 +0530 Subject: [PATCH 3/4] [news18] Add new extractor --- youtube_dl/extractor/news18.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/youtube_dl/extractor/news18.py b/youtube_dl/extractor/news18.py index 241f066c0..22f8dfc0e 100644 --- a/youtube_dl/extractor/news18.py +++ b/youtube_dl/extractor/news18.py @@ -23,9 +23,6 @@ class News18IE(InfoExtractor): video_url = self._search_regex(r'(?Phttps?:\/\/vodpd\.news18\.com[\/\w_-]+\.mp4)', webpage, 'video URL',default='') title = self._og_search_title(webpage) - print(video_url) - print(video_id) - print(title) return { 'url': video_url, 'id': video_id, From 8b85d2d0195c36cd1cb4aa732401c9171a73b1cd Mon Sep 17 00:00:00 2001 From: Ashwin Dhakaita Date: Sun, 4 Nov 2018 10:13:07 +0530 Subject: [PATCH 4/4] [news18] Add new extractor --- youtube_dl/extractor/news18.py | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/youtube_dl/extractor/news18.py b/youtube_dl/extractor/news18.py index 22f8dfc0e..4776c444b 100644 --- a/youtube_dl/extractor/news18.py +++ b/youtube_dl/extractor/news18.py @@ -1,14 +1,13 @@ from __future__ import unicode_literals -import re - from .common import InfoExtractor + class News18IE(InfoExtractor): _VALID_URL = r'''https?:\/\/www\.news18\.com[a-zA-Z0-9_\/-]+-(?P\d+)\.html''' _TEST = { - 'url' : 'https://www.news18.com/news/ivideos/inside-naxal-bastion-news18-visits-the-villages-voting-first-time-ever-1928149.html', - 'md5' : 'cb5a78310f3e583da5ba0de38b450938', + 'url': 'https://www.news18.com/news/ivideos/inside-naxal-bastion-news18-visits-the-villages-voting-first-time-ever-1928149.html', + 'md5': 'cb5a78310f3e583da5ba0de38b450938', 'info_dict': { 'id': '1928149', 'ext': 'mp4', @@ -17,10 +16,9 @@ class News18IE(InfoExtractor): } def _real_extract(self, url): - IE_NAME = 'News18' video_id = self._match_id(url) - webpage = self._download_webpage(url,video_id) - video_url = self._search_regex(r'(?Phttps?:\/\/vodpd\.news18\.com[\/\w_-]+\.mp4)', webpage, 'video URL',default='') + webpage = self._download_webpage(url, video_id) + video_url = self._search_regex(r'(?Phttps?:\/\/vodpd\.news18\.com[\/\w_-]+\.mp4)', webpage, 'video URL', default='') title = self._og_search_title(webpage) return { @@ -29,4 +27,3 @@ class News18IE(InfoExtractor): 'title': title, 'ext': 'mp4' } -