From 796a691e95ce86bf4db6edc13f8aac2cc6290536 Mon Sep 17 00:00:00 2001 From: Crypto90 Date: Thu, 2 Jul 2020 21:59:11 +0200 Subject: [PATCH] Now also extracting youtube playlist videos count and resulting it as duration for playlist searches. Updated _VIDEO_RE regex for YoutubeSearchBaseInfoExtractor class to extract videos count for youtube playlist results and passing the result to "duration" output field. So "duration" shows the duration in "HH:MM:SS" for video results or the amount of videos in a youtube playlist for a playlist result eg. "1,234" or "24" . --- youtube_dl/extractor/youtube.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 43a233ddb..9271aea1c 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -332,7 +332,7 @@ class YoutubePlaylistBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor): yield self.url_result(video_id, 'Youtube', video_id, video_title, video_duration) elif len(video_id) > 11: # Youtube playlist id found - yield self.url_result('https://www.youtube.com/playlist?list=%s' % video_id, 'YoutubePlaylist', video_id, video_title) + yield self.url_result('https://www.youtube.com/playlist?list=%s' % video_id, 'YoutubePlaylist', video_id, video_title, video_duration) def extract_videos_from_page_impl(self, video_re, page, ids_in_page, titles_in_page, durations_in_page): for mobj in re.finditer(video_re, page): @@ -349,6 +349,9 @@ class YoutubePlaylistBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor): if video_title == '► Play all': video_title = None video_duration = mobj.group('duration') if 'duration' in mobj.groupdict() else None + playlist_count = mobj.group('plcounter') if 'plcounter' in mobj.groupdict() else None + if playlist_id is not None and playlist_count is not None: + video_duration = playlist_count if video_duration: video_duration = video_duration.strip() try: @@ -3165,7 +3168,7 @@ class YoutubePlaylistsIE(YoutubePlaylistsBaseInfoExtractor): class YoutubeSearchBaseInfoExtractor(YoutubePlaylistBaseInfoExtractor): - _VIDEO_RE = r'href="\s*/watch\?v=(?P[0-9A-Za-z_-]{11})(&list=(?P[0-9A-Za-z_-]+))?(?:[^"]*"[^>]+\btitle="(?P[^"]+))?(.*Duration:\s*(?P<duration>([0-1]?[0-9]|2[0-3]):[0-5][0-9]))?' + _VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})(&list=(?P<plid>[0-9A-Za-z_-]+))?(?:[^"]*"[^>]+\btitle="(?P<title>[^"]+))?(?:((?!formatted-video-count-label)[\s\S])*[^\d]+(?P<plcounter>[0-9,.]+))?(.*Duration:\s*(?P<duration>([0-1]?[0-9]|2[0-3]):[0-5][0-9]))?' class YoutubeSearchIE(SearchInfoExtractor, YoutubeSearchBaseInfoExtractor):