From 8df0c2c7a598fa429df55b0e7d826d1daf502b36 Mon Sep 17 00:00:00 2001 From: TinyToweringTree <54483833+TinyToweringTree@users.noreply.github.com> Date: Fri, 24 Jan 2020 15:03:48 +0100 Subject: [PATCH] [archiveorg] Fix extraction (closes #21330, closes #23586, closes #23700) --- youtube_dl/extractor/archiveorg.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/archiveorg.py b/youtube_dl/extractor/archiveorg.py index c79c58e82..a65fdd7d0 100644 --- a/youtube_dl/extractor/archiveorg.py +++ b/youtube_dl/extractor/archiveorg.py @@ -40,9 +40,12 @@ class ArchiveOrgIE(InfoExtractor): video_id = self._match_id(url) webpage = self._download_webpage( 'http://archive.org/embed/' + video_id, video_id) + input_element_with_playlist = self._search_regex( + r'(<\s*input.*\s*class\s*=\s*[\'"].*\s*js-play8-playlist\s*.*[\'"]\s*.*>)', + webpage, 'jwplayer playlist') jwplayer_playlist = self._parse_json(self._search_regex( - r"(?s)Play\('[^']+'\s*,\s*(\[.+\])\s*,\s*{.*?}\)", - webpage, 'jwplayer playlist'), video_id) + r'.*\s+value\s*=\s*[\'"](.+)[\'"][\s/]', + input_element_with_playlist, 'playlist data'), video_id) info = self._parse_jwplayer_data( {'playlist': jwplayer_playlist}, video_id, base_url=url) @@ -52,7 +55,7 @@ class ArchiveOrgIE(InfoExtractor): metadata = self._download_json( 'http://archive.org/details/' + video_id, video_id, query={ 'output': 'json', - })['metadata'] + }).get('metadata', {}) info.update({ 'title': get_optional(metadata, 'title') or info.get('title'), 'description': clean_html(get_optional(metadata, 'description')),