From bd1340d29483f52bddf27f11adbc3ad488cb0968 Mon Sep 17 00:00:00 2001
From: Glenn Slayden <5589855+glenn-slayden@users.noreply.github.com>
Date: Wed, 24 Jun 2020 23:02:06 -0700
Subject: [PATCH] Prevent HTTP 301 for YouTube playlist continuations
When a YouTube playlist or channel listing has more than one page of videos, the continuation URLs specify `youtube.com` instead of `www.youtube.com`. This causes an unnecessary HTTP round-trip for each continuation page the extractor accesses.
**Example**
youtube-dl -s --print-traffic https://www.youtube.com/channel/UCBR8-60-B28hp2BmDPdntcQ
**Before**
GET /playlist?list=UUBR8-60-B28hp2BmDPdntcQ&disable_polymer=true
Host: www.youtube.com
HTTP/1.1 200 OK
GET /browse_ajax?action_continuation=1&continuation=4qmFsgIsEhpWTFVVQlI4LTYwLUIyOGhwMkJtRFBkbnRjURoOZWdaUVZEcERSMUUlM0Q%253D&disable_polymer=true
Host: youtube.com
HTTP/1.1 301 Moved Permanently
Location: https://www.youtube.com/browse_ajax?action_continuation=1&continuation=4qmFsgIsEhpWTFVVQlI4LTYwLUIyOGhwMkJtRFBkbnRjURoOZWdaUVZEcERSMUUlM0Q%253D&disable_polymer=true
GET /browse_ajax?action_continuation=1&continuation=4qmFsgIsEhpWTFVVQlI4LTYwLUIyOGhwMkJtRFBkbnRjURoOZWdaUVZEcERSMUUlM0Q%253D&disable_polymer=true
Host: www.youtube.com
HTTP/1.1 200 OK
GET /browse_ajax?action_continuation=1&continuation=4qmFsgIqEhpWTFVVQlI4LTYwLUIyOGhwMkJtRFBkbnRjURoMZWdkUVZEcERUV2RD&disable_polymer=true
Host: youtube.com
HTTP/1.1 301 Moved Permanently
Location: https://www.youtube.com/browse_ajax?action_continuation=1&continuation=4qmFsgIqEhpWTFVVQlI4LTYwLUIyOGhwMkJtRFBkbnRjURoMZWdkUVZEcERUV2RD&disable_polymer=true
GET /browse_ajax?action_continuation=1&continuation=4qmFsgIqEhpWTFVVQlI4LTYwLUIyOGhwMkJtRFBkbnRjURoMZWdkUVZEcERUV2RD&disable_polymer=true
Host: www.youtube.com
HTTP/1.1 200 OK
GET /browse_ajax?action_continuation=1&continuation=4qmFsgIqEhpWTFVVQlI4LTYwLUIyOGhwMkJtRFBkbnRjURoMZWdkUVZEcERTM2RE&disable_polymer=true
Host: youtube.com
HTTP/1.1 301 Moved Permanently
Location: https://www.youtube.com/browse_ajax?action_continuation=1&continuation=4qmFsgIqEhpWTFVVQlI4LTYwLUIyOGhwMkJtRFBkbnRjURoMZWdkUVZEcERTM2RE&disable_polymer=true
GET /browse_ajax?action_continuation=1&continuation=4qmFsgIqEhpWTFVVQlI4LTYwLUIyOGhwMkJtRFBkbnRjURoMZWdkUVZEcERTM2RE&disable_polymer=true
Host: www.youtube.com
HTTP/1.1 200 OK
**After**
GET /playlist?list=UUBR8-60-B28hp2BmDPdntcQ&disable_polymer=true
Host: www.youtube.com
HTTP/1.1 200 OK
GET /browse_ajax?action_continuation=1&continuation=4qmFsgIsEhpWTFVVQlI4LTYwLUIyOGhwMkJtRFBkbnRjURoOZWdaUVZEcERSMUUlM0Q%253D&disable_polymer=true
Host: www.youtube.com
HTTP/1.1 200 OK
GET /browse_ajax?action_continuation=1&continuation=4qmFsgIqEhpWTFVVQlI4LTYwLUIyOGhwMkJtRFBkbnRjURoMZWdkUVZEcERUV2RD&disable_polymer=true
Host: www.youtube.com
HTTP/1.1 200 OK
---
youtube_dl/extractor/youtube.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index 1bc79e014..638c6617e 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -303,7 +303,7 @@ class YoutubeEntryListBaseInfoExtractor(YoutubeBaseInfoExtractor):
# Downloading page may result in intermittent 5xx HTTP error
# that is usually worked around with a retry
more = self._download_json(
- 'https://youtube.com/%s' % mobj.group('more'), playlist_id,
+ 'https://www.youtube.com/%s' % mobj.group('more'), playlist_id,
'Downloading page #%s%s'
% (page_num, ' (retry #%d)' % count if count else ''),
transform_source=uppercase_escape,