From 52828f4ca50be0e0a036097ad3d7d0ddaacfb416 Mon Sep 17 00:00:00 2001 From: sollicitudin Date: Thu, 17 Jan 2019 11:14:04 +0700 Subject: [PATCH 1/4] Convert archiveorg thumbnail url to absolute url --- youtube_dl/extractor/archiveorg.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/archiveorg.py b/youtube_dl/extractor/archiveorg.py index c79c58e82..0d69d9ad5 100644 --- a/youtube_dl/extractor/archiveorg.py +++ b/youtube_dl/extractor/archiveorg.py @@ -48,6 +48,10 @@ class ArchiveOrgIE(InfoExtractor): def get_optional(metadata, field): return metadata.get(field, [None])[0] + + def convert_relative_to_absolute_thumbnail(metadata): + if not metadata['thumbnail'].startswith('http'): + metadata.update({'thumbnail': ''.join(('http://archive.org', metadata.get('thumbnail')))}) metadata = self._download_json( 'http://archive.org/details/' + video_id, video_id, query={ @@ -62,4 +66,8 @@ class ArchiveOrgIE(InfoExtractor): 'uploader': get_optional(metadata, 'creator'), 'upload_date': unified_strdate(get_optional(metadata, 'date')), }) - return info + convert_relative_to_absolute_thumbnail(info) + else: + for entry in info['entries']: + convert_relative_to_absolute_thumbnail(entry) + return info \ No newline at end of file From 5bd55c857265dcb31221c02d862fb3481a6dd579 Mon Sep 17 00:00:00 2001 From: sollicitudin Date: Thu, 17 Jan 2019 11:19:35 +0700 Subject: [PATCH 2/4] Minor formatting fix --- youtube_dl/extractor/archiveorg.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/archiveorg.py b/youtube_dl/extractor/archiveorg.py index 0d69d9ad5..c6f50ff8c 100644 --- a/youtube_dl/extractor/archiveorg.py +++ b/youtube_dl/extractor/archiveorg.py @@ -48,7 +48,7 @@ class ArchiveOrgIE(InfoExtractor): def get_optional(metadata, field): return metadata.get(field, [None])[0] - + def convert_relative_to_absolute_thumbnail(metadata): if not metadata['thumbnail'].startswith('http'): metadata.update({'thumbnail': ''.join(('http://archive.org', metadata.get('thumbnail')))}) @@ -70,4 +70,4 @@ class ArchiveOrgIE(InfoExtractor): else: for entry in info['entries']: convert_relative_to_absolute_thumbnail(entry) - return info \ No newline at end of file + return info From 0c1ac73227b8ab8fbbc71f90547b3cb8da3599ad Mon Sep 17 00:00:00 2001 From: sollicitudin Date: Thu, 17 Jan 2019 11:33:17 +0700 Subject: [PATCH 3/4] Switch to utils.urljoin --- youtube_dl/extractor/archiveorg.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/archiveorg.py b/youtube_dl/extractor/archiveorg.py index c6f50ff8c..f04eefe9f 100644 --- a/youtube_dl/extractor/archiveorg.py +++ b/youtube_dl/extractor/archiveorg.py @@ -4,6 +4,7 @@ from .common import InfoExtractor from ..utils import ( unified_strdate, clean_html, + urljoin, ) @@ -51,7 +52,7 @@ class ArchiveOrgIE(InfoExtractor): def convert_relative_to_absolute_thumbnail(metadata): if not metadata['thumbnail'].startswith('http'): - metadata.update({'thumbnail': ''.join(('http://archive.org', metadata.get('thumbnail')))}) + metadata.update({'thumbnail': urljoin('http://archive.org', metadata.get('thumbnail'))}) metadata = self._download_json( 'http://archive.org/details/' + video_id, video_id, query={ From 55f381cc99ac5fbb45bffeb7c011f82cb57b7317 Mon Sep 17 00:00:00 2001 From: sollicitudin Date: Thu, 17 Jan 2019 11:36:03 +0700 Subject: [PATCH 4/4] Switch to utils.urljoin --- youtube_dl/extractor/archiveorg.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/youtube_dl/extractor/archiveorg.py b/youtube_dl/extractor/archiveorg.py index f04eefe9f..f34f32b7e 100644 --- a/youtube_dl/extractor/archiveorg.py +++ b/youtube_dl/extractor/archiveorg.py @@ -51,8 +51,7 @@ class ArchiveOrgIE(InfoExtractor): return metadata.get(field, [None])[0] def convert_relative_to_absolute_thumbnail(metadata): - if not metadata['thumbnail'].startswith('http'): - metadata.update({'thumbnail': urljoin('http://archive.org', metadata.get('thumbnail'))}) + metadata.update({'thumbnail': urljoin('http://archive.org', metadata.get('thumbnail'))}) metadata = self._download_json( 'http://archive.org/details/' + video_id, video_id, query={