From 828043d57b822def29ad8ac228433f0f694ee478 Mon Sep 17 00:00:00 2001 From: Urgau Date: Tue, 12 Jun 2018 17:23:34 +0200 Subject: [PATCH 1/4] [foxnews:article] Fix video ID extraction Fix #15810 --- youtube_dl/extractor/foxnews.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/foxnews.py b/youtube_dl/extractor/foxnews.py index dc0662f74..e407b3d53 100644 --- a/youtube_dl/extractor/foxnews.py +++ b/youtube_dl/extractor/foxnews.py @@ -89,7 +89,7 @@ class FoxNewsArticleIE(InfoExtractor): webpage = self._download_webpage(url, display_id) video_id = self._html_search_regex( - r'data-video-id=([\'"])(?P[^\'"]+)\1', + (r'data-video-id=([\'"])(?P[^\'"]+)\1', r'https?.*foxnews.*video_id=(?P[\d]+)'), webpage, 'video ID', group='id') return self.url_result( 'http://video.foxnews.com/v/' + video_id, From 379d44c19754f6d3e91a092576ce94adbcf33047 Mon Sep 17 00:00:00 2001 From: Urgau Date: Tue, 12 Jun 2018 17:28:27 +0200 Subject: [PATCH 2/4] Add new test --- youtube_dl/extractor/foxnews.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/youtube_dl/extractor/foxnews.py b/youtube_dl/extractor/foxnews.py index e407b3d53..a6b5c083a 100644 --- a/youtube_dl/extractor/foxnews.py +++ b/youtube_dl/extractor/foxnews.py @@ -43,6 +43,17 @@ class FoxNewsIE(AMPIE): 'skip_download': True, }, }, + { + 'url': 'http://www.foxnews.com/us/2018/03/09/parkland-survivor-kyle-kashuv-on-meeting-trump-his-app-to-prevent-another-school-shooting.amp.html?__twitter_impression=true', + 'info_dict': { + 'id': '5748266721001', + 'ext': 'mp4', + 'title': "Kyle Kashuv has a positive message for the Trump White House", + 'description': "Marjory Stoneman Douglas student disagrees with classmates.", + 'duration': 229, + 'thumbnail': r're:^https?://.*\.jpg$', + }, + }, { 'url': 'http://video.foxnews.com/v/video-embed.html?video_id=3937480&d=video.foxnews.com', 'only_matching': True, From cd6a4939ef1a7ac9005cc04a87493ef92693249a Mon Sep 17 00:00:00 2001 From: Urgau Date: Tue, 12 Jun 2018 17:39:47 +0200 Subject: [PATCH 3/4] Fix test --- youtube_dl/extractor/foxnews.py | 50 +++++++++++++++++---------------- 1 file changed, 26 insertions(+), 24 deletions(-) diff --git a/youtube_dl/extractor/foxnews.py b/youtube_dl/extractor/foxnews.py index a6b5c083a..a8e983ce6 100644 --- a/youtube_dl/extractor/foxnews.py +++ b/youtube_dl/extractor/foxnews.py @@ -31,8 +31,8 @@ class FoxNewsIE(AMPIE): 'info_dict': { 'id': '3922535568001', 'ext': 'mp4', - 'title': "Rep. Luis Gutierrez on if Obama's immigration plan is legal", - 'description': "Congressman discusses president's plan", + 'title': 'Rep. Luis Gutierrez on if Obama\'s immigration plan is legal', + 'description': 'Congressman discusses president\'s plan', 'duration': 292, 'timestamp': 1417662047, 'upload_date': '20141204', @@ -43,17 +43,6 @@ class FoxNewsIE(AMPIE): 'skip_download': True, }, }, - { - 'url': 'http://www.foxnews.com/us/2018/03/09/parkland-survivor-kyle-kashuv-on-meeting-trump-his-app-to-prevent-another-school-shooting.amp.html?__twitter_impression=true', - 'info_dict': { - 'id': '5748266721001', - 'ext': 'mp4', - 'title': "Kyle Kashuv has a positive message for the Trump White House", - 'description': "Marjory Stoneman Douglas student disagrees with classmates.", - 'duration': 229, - 'thumbnail': r're:^https?://.*\.jpg$', - }, - }, { 'url': 'http://video.foxnews.com/v/video-embed.html?video_id=3937480&d=video.foxnews.com', 'only_matching': True, @@ -82,18 +71,31 @@ class FoxNewsArticleIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?foxnews\.com/(?!v)([^/]+/)+(?P[a-z-]+)' IE_NAME = 'foxnews:article' - _TEST = { - 'url': 'http://www.foxnews.com/politics/2016/09/08/buzz-about-bud-clinton-camp-denies-claims-wore-earpiece-at-forum.html', - 'md5': '62aa5a781b308fdee212ebb6f33ae7ef', - 'info_dict': { - 'id': '5116295019001', - 'ext': 'mp4', - 'title': 'Trump and Clinton asked to defend positions on Iraq War', - 'description': 'Veterans react on \'The Kelly File\'', - 'timestamp': 1473299755, - 'upload_date': '20160908', + _TESTS = [ + { + 'url': 'http://www.foxnews.com/politics/2016/09/08/buzz-about-bud-clinton-camp-denies-claims-wore-earpiece-at-forum.html', + 'md5': '62aa5a781b308fdee212ebb6f33ae7ef', + 'info_dict': { + 'id': '5116295019001', + 'ext': 'mp4', + 'title': 'Trump and Clinton asked to defend positions on Iraq War', + 'description': 'Veterans react on \'The Kelly File\'', + 'timestamp': 1473299755, + 'upload_date': '20160908', + }, }, - } + { + 'url': 'http://www.foxnews.com/us/2018/03/09/parkland-survivor-kyle-kashuv-on-meeting-trump-his-app-to-prevent-another-school-shooting.amp.html?__twitter_impression=true', + 'info_dict': { + 'id': '5748266721001', + 'ext': 'mp4', + 'title': 'Kyle Kashuv has a positive message for the Trump White House', + 'description': 'Marjory Stoneman Douglas student disagrees with classmates.', + 'duration': 229, + 'thumbnail': r're:^https?://.*\.jpg$', + }, + }, + ] def _real_extract(self, url): display_id = self._match_id(url) From ce19545c464103c32a936a273e9b94c2516548ee Mon Sep 17 00:00:00 2001 From: Urgau Date: Tue, 12 Jun 2018 22:41:48 +0200 Subject: [PATCH 4/4] Improve regex --- youtube_dl/extractor/foxnews.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/foxnews.py b/youtube_dl/extractor/foxnews.py index a8e983ce6..835eb9fb9 100644 --- a/youtube_dl/extractor/foxnews.py +++ b/youtube_dl/extractor/foxnews.py @@ -31,8 +31,8 @@ class FoxNewsIE(AMPIE): 'info_dict': { 'id': '3922535568001', 'ext': 'mp4', - 'title': 'Rep. Luis Gutierrez on if Obama\'s immigration plan is legal', - 'description': 'Congressman discusses president\'s plan', + 'title': "Rep. Luis Gutierrez on if Obama's immigration plan is legal", + 'description': "Congressman discusses president's plan", 'duration': 292, 'timestamp': 1417662047, 'upload_date': '20141204', @@ -102,7 +102,7 @@ class FoxNewsArticleIE(InfoExtractor): webpage = self._download_webpage(url, display_id) video_id = self._html_search_regex( - (r'data-video-id=([\'"])(?P[^\'"]+)\1', r'https?.*foxnews.*video_id=(?P[\d]+)'), + (r'data-video-id=([\'"])(?P[^\'"]+)\1', r'[\d]+).*>'), webpage, 'video ID', group='id') return self.url_result( 'http://video.foxnews.com/v/' + video_id,