From 57182dca509a476825754682a9107c8bfb8f7f08 Mon Sep 17 00:00:00 2001 From: Bastian de Groot Date: Sat, 14 Apr 2018 17:14:51 +0200 Subject: [PATCH 1/7] [generic] prefer enclosures over following links --- youtube_dl/extractor/generic.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index e3cb5c5ce..994a6511b 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -2009,13 +2009,14 @@ class GenericIE(InfoExtractor): entries = [] for it in doc.findall('./channel/item'): - next_url = xpath_text(it, 'link', fatal=False) - if not next_url: - enclosure_nodes = it.findall('./enclosure') - for e in enclosure_nodes: - next_url = e.attrib.get('url') - if next_url: - break + enclosure_nodes = it.findall('./enclosure') + for e in enclosure_nodes: + next_url = e.attrib.get('url') + if next_url: + break + + if not enclosure_nodes: + next_url = xpath_text(it, 'link', fatal=False) if not next_url: continue From 4f8ed0968b1a20546946069f9efc60ce41b13ac3 Mon Sep 17 00:00:00 2001 From: Bastian de Groot Date: Sat, 14 Apr 2018 18:28:49 +0200 Subject: [PATCH 2/7] [generic] prevent reference before assignment errors --- youtube_dl/extractor/generic.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 994a6511b..98c9f9015 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -2009,6 +2009,7 @@ class GenericIE(InfoExtractor): entries = [] for it in doc.findall('./channel/item'): + next_url = None enclosure_nodes = it.findall('./enclosure') for e in enclosure_nodes: next_url = e.attrib.get('url') @@ -2018,7 +2019,7 @@ class GenericIE(InfoExtractor): if not enclosure_nodes: next_url = xpath_text(it, 'link', fatal=False) - if not next_url: + if next_url is None: continue entries.append({ From 0aa4e2a2be0c78f25c2a358e9da53ef314868575 Mon Sep 17 00:00:00 2001 From: Bastian de Groot Date: Sat, 14 Apr 2018 19:49:08 +0200 Subject: [PATCH 3/7] [generic] account for empty strings --- youtube_dl/extractor/generic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 98c9f9015..178422437 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -2019,7 +2019,7 @@ class GenericIE(InfoExtractor): if not enclosure_nodes: next_url = xpath_text(it, 'link', fatal=False) - if next_url is None: + if not next_url: continue entries.append({ From 199025c7d2fff07eb9d6da5bbda9e3277a574fb5 Mon Sep 17 00:00:00 2001 From: Bastian de Groot Date: Sat, 14 Apr 2018 22:09:36 +0200 Subject: [PATCH 4/7] [generic] follow rss link when enclosure url is empty --- youtube_dl/extractor/generic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 178422437..2de74a16b 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -2016,7 +2016,7 @@ class GenericIE(InfoExtractor): if next_url: break - if not enclosure_nodes: + if not next_url: next_url = xpath_text(it, 'link', fatal=False) if not next_url: From 31749dd02423f6a8594bfd439259e868a5c7cef0 Mon Sep 17 00:00:00 2001 From: Bastian de Groot Date: Sat, 28 Apr 2018 13:08:16 +0200 Subject: [PATCH 5/7] Add test for RSS enclosure prioritization --- youtube_dl/extractor/generic.py | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 2de74a16b..19c24395a 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -190,6 +190,27 @@ class GenericIE(InfoExtractor): 'title': 'pdv_maddow_netcast_m4v-02-27-2015-201624', } }, + # RSS feed with enclosures and unsupported rss urls + { + 'url': 'http://www.hellointernet.fm/podcast?format=rss', + 'info_dict': { + 'id': 'http://www.hellointernet.fm/podcast?format=rss', + 'description': 'CGP Grey and Brady Haran talk about YouTube, life, work, whatever.', + 'title': 'Hello Internet', + }, + 'playlist': [{ + 'info_dict': { + 'id': '101', + 'ext': 'mp3', + 'upload_date': '20180426', + 'title': u' \u200d \u200d ', + }, + }], + 'playlist_mincount': 99, + 'params': { + 'skip_download': True, + }, + }, # SMIL from http://videolectures.net/promogram_igor_mekjavic_eng { 'url': 'http://videolectures.net/promogram_igor_mekjavic_eng/video/1/smil.xml', From f7d767f2722d00852106f43c263ca90567b4e939 Mon Sep 17 00:00:00 2001 From: Bastian de Groot Date: Sat, 28 Apr 2018 13:19:57 +0200 Subject: [PATCH 6/7] Remove unicode prefix --- youtube_dl/extractor/generic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 19c24395a..7dc7dfc40 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -203,7 +203,7 @@ class GenericIE(InfoExtractor): 'id': '101', 'ext': 'mp3', 'upload_date': '20180426', - 'title': u' \u200d \u200d ', + 'title': ' \u200d \u200d ', }, }], 'playlist_mincount': 99, From 0a0ca99ab414961d584c39e62fcabf0fb5657817 Mon Sep 17 00:00:00 2001 From: Sergey M Date: Sun, 29 Apr 2018 22:12:31 +0700 Subject: [PATCH 7/7] Update generic.py --- youtube_dl/extractor/generic.py | 15 ++------------- 1 file changed, 2 insertions(+), 13 deletions(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 7dc7dfc40..c548a1649 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -190,7 +190,7 @@ class GenericIE(InfoExtractor): 'title': 'pdv_maddow_netcast_m4v-02-27-2015-201624', } }, - # RSS feed with enclosures and unsupported rss urls + # RSS feed with enclosures and unsupported link URLs { 'url': 'http://www.hellointernet.fm/podcast?format=rss', 'info_dict': { @@ -198,18 +198,7 @@ class GenericIE(InfoExtractor): 'description': 'CGP Grey and Brady Haran talk about YouTube, life, work, whatever.', 'title': 'Hello Internet', }, - 'playlist': [{ - 'info_dict': { - 'id': '101', - 'ext': 'mp3', - 'upload_date': '20180426', - 'title': ' \u200d \u200d ', - }, - }], - 'playlist_mincount': 99, - 'params': { - 'skip_download': True, - }, + 'playlist_mincount': 100, }, # SMIL from http://videolectures.net/promogram_igor_mekjavic_eng {