Fix playlist order

The "index" attribute does not seem to correspond to the playlist
order (at least, not exactly).

An OrderedDict would really help here.  If support for Python 2.6 is
ever dropped...  :)
This commit is contained in:
Adam Porter 2015-09-28 02:04:24 -05:00
parent b5b683bc51
commit edb0e97b2e
2 changed files with 27 additions and 13 deletions

View File

@ -58,12 +58,21 @@ class TestYoutubeLists(unittest.TestCase):
# Save generator output
playlist = [v for v in result['entries']]
# Find videos in playlist
for video in videos:
matching_videos = [v for v in playlist if v['id'] == video['id']]
self.assertEqual(len(matching_videos), 1)
self.assertEqual(matching_videos[0]['title'], video['title'])
# TODO: It would be good to check that the videos are returned
# in the correct order (not necessarily back-to-back), which,
# of course, requires creating the test data in the correct
# order. The reason is that simple mistakes (like forgetting
# that dicts don't keep insertion order) can result in the
# order being wrong. This could be in a separate test, or it
# could go here.
def test_youtube_playlist_noplaylist(self):
dl = FakeYDL()
dl.params['noplaylist'] = True

View File

@ -1586,11 +1586,19 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
matches = self._VIDEO_RE.finditer(content_html)
# Get videos from current page. Using OrderedDict to
# avoid duplicates would make this much simpler.
# avoid duplicates would make this much
# simpler. Lacking that, we store the order of the
# videos as video_num so we can sort the dict, keeping
# the order of the playlist. We have to avoid
# duplicates because it seems that every video in the
# playlist shows up in the HTML/JSON twice: once
# without a title, and once with a title. Maybe using
# something like bs4 instead of regexps would also be a
# good idea.
new_videos = {}
num = 0
for m in matches:
video_index = m.group('index')
if video_index == '0':
if m.group('index') == '0':
# Ignore link with index 0
continue
@ -1602,21 +1610,18 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
video_title = None
if video_id in new_videos:
# Duplicate video
# Video is already in dict
if video_title and not new_videos[video_id]['title']:
# Set missing title
new_videos[video_id]['title'] = video_title
new_videos[video_id]['index'] = video_index
else:
# New video
new_videos[video_id] = {'index': int(video_index),
'title': video_title}
# Video not in dict
new_videos[video_id] = {'num': num, 'title': video_title}
# Sort videos by index
new_videos = sorted(new_videos.iteritems(), key=lambda v: v[1]['index'])
num += 1
# Sort videos by playlist order
new_videos = sorted(new_videos.iteritems(), key=lambda v: v[1]['num'])
# Yield current list of videos
for video in new_videos: