Now also extracting youtube playlist videos count and resulting it as duration for playlist searches.

Updated _VIDEO_RE regex for YoutubeSearchBaseInfoExtractor class to extract videos count for youtube playlist results and passing the result to "duration" output field.
So "duration" shows the duration in "HH:MM:SS" for video results or the amount of videos in a youtube playlist for a playlist result eg. "1,234" or "24" .
This commit is contained in:
Crypto90 2020-07-02 21:59:11 +02:00 committed by GitHub
parent 0289ffc4f4
commit 796a691e95
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -332,7 +332,7 @@ class YoutubePlaylistBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
yield self.url_result(video_id, 'Youtube', video_id, video_title, video_duration) yield self.url_result(video_id, 'Youtube', video_id, video_title, video_duration)
elif len(video_id) > 11: elif len(video_id) > 11:
# Youtube playlist id found # Youtube playlist id found
yield self.url_result('https://www.youtube.com/playlist?list=%s' % video_id, 'YoutubePlaylist', video_id, video_title) yield self.url_result('https://www.youtube.com/playlist?list=%s' % video_id, 'YoutubePlaylist', video_id, video_title, video_duration)
def extract_videos_from_page_impl(self, video_re, page, ids_in_page, titles_in_page, durations_in_page): def extract_videos_from_page_impl(self, video_re, page, ids_in_page, titles_in_page, durations_in_page):
for mobj in re.finditer(video_re, page): for mobj in re.finditer(video_re, page):
@ -349,6 +349,9 @@ class YoutubePlaylistBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
if video_title == '► Play all': if video_title == '► Play all':
video_title = None video_title = None
video_duration = mobj.group('duration') if 'duration' in mobj.groupdict() else None video_duration = mobj.group('duration') if 'duration' in mobj.groupdict() else None
playlist_count = mobj.group('plcounter') if 'plcounter' in mobj.groupdict() else None
if playlist_id is not None and playlist_count is not None:
video_duration = playlist_count
if video_duration: if video_duration:
video_duration = video_duration.strip() video_duration = video_duration.strip()
try: try:
@ -3165,7 +3168,7 @@ class YoutubePlaylistsIE(YoutubePlaylistsBaseInfoExtractor):
class YoutubeSearchBaseInfoExtractor(YoutubePlaylistBaseInfoExtractor): class YoutubeSearchBaseInfoExtractor(YoutubePlaylistBaseInfoExtractor):
_VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})(&amp;list=(?P<plid>[0-9A-Za-z_-]+))?(?:[^"]*"[^>]+\btitle="(?P<title>[^"]+))?(.*Duration:\s*(?P<duration>([0-1]?[0-9]|2[0-3]):[0-5][0-9]))?' _VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})(&amp;list=(?P<plid>[0-9A-Za-z_-]+))?(?:[^"]*"[^>]+\btitle="(?P<title>[^"]+))?(?:((?!formatted-video-count-label)[\s\S])*[^\d]+(?P<plcounter>[0-9,.]+))?(.*Duration:\s*(?P<duration>([0-1]?[0-9]|2[0-3]):[0-5][0-9]))?'
class YoutubeSearchIE(SearchInfoExtractor, YoutubeSearchBaseInfoExtractor): class YoutubeSearchIE(SearchInfoExtractor, YoutubeSearchBaseInfoExtractor):