Add files via upload
Fixed flake8 issues.
This commit is contained in:
parent
5b6d1d5561
commit
6439a77527
@ -29,6 +29,7 @@ from ..compat import (
|
||||
from ..utils import (
|
||||
bool_or_none,
|
||||
clean_html,
|
||||
dict_get,
|
||||
error_to_compat_str,
|
||||
extract_attributes,
|
||||
ExtractorError,
|
||||
@ -70,14 +71,9 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
|
||||
|
||||
_PLAYLIST_ID_RE = r'(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}'
|
||||
|
||||
_YOUTUBE_CLIENT_HEADERS = {
|
||||
'x-youtube-client-name': '1',
|
||||
'x-youtube-client-version': '1.20200609.04.02',
|
||||
}
|
||||
|
||||
def _set_language(self):
|
||||
self._set_cookie(
|
||||
'.youtube.com', 'PREF', 'f1=50000000&f6=8&hl=en',
|
||||
'.youtube.com', 'PREF', 'f1=50000000&hl=en',
|
||||
# YouTube sets the expire time to about two months
|
||||
expire_time=time.time() + 2 * 30 * 24 * 3600)
|
||||
|
||||
@ -306,8 +302,7 @@ class YoutubeEntryListBaseInfoExtractor(YoutubeBaseInfoExtractor):
|
||||
'https://youtube.com/%s' % mobj.group('more'), playlist_id,
|
||||
'Downloading page #%s%s'
|
||||
% (page_num, ' (retry #%d)' % count if count else ''),
|
||||
transform_source=uppercase_escape,
|
||||
headers=self._YOUTUBE_CLIENT_HEADERS)
|
||||
transform_source=uppercase_escape)
|
||||
break
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code in (500, 503):
|
||||
@ -328,10 +323,10 @@ class YoutubePlaylistBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
|
||||
def _process_page(self, content):
|
||||
for video_id, video_title, video_duration in self.extract_videos_from_page(content):
|
||||
if len(video_id) == 11:
|
||||
#youtube video id found
|
||||
# Youtube video id found
|
||||
yield self.url_result(video_id, 'Youtube', video_id, video_title)
|
||||
elif len(video_id) > 11:
|
||||
#youtube playlist id found
|
||||
# Youtube playlist id found
|
||||
yield self.url_result('https://www.youtube.com/playlist?list=%s' % video_id, 'YoutubePlaylist', video_id, video_title)
|
||||
|
||||
def extract_videos_from_page_impl(self, video_re, page, ids_in_page, titles_in_page, durations_in_page):
|
||||
@ -341,7 +336,7 @@ class YoutubePlaylistBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
|
||||
continue
|
||||
video_id = mobj.group('id')
|
||||
playlist_id = mobj.group('plid') if 'plid' in mobj.groupdict() else None
|
||||
if playlist_id != None:
|
||||
if playlist_id is not None:
|
||||
video_id = playlist_id
|
||||
video_title = unescapeHTML(mobj.group('title')) if 'title' in mobj.groupdict() else None
|
||||
if video_title:
|
||||
@ -353,18 +348,14 @@ class YoutubePlaylistBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
|
||||
video_duration = video_duration.strip()
|
||||
try:
|
||||
idx = ids_in_page.index(video_id)
|
||||
|
||||
if video_title and not titles_in_page[idx]:
|
||||
titles_in_page[idx] = video_title
|
||||
|
||||
if video_duration and not durations_in_page[idx]:
|
||||
durations_in_page[idx] = video_duration
|
||||
|
||||
except ValueError:
|
||||
ids_in_page.append(video_id)
|
||||
titles_in_page.append(video_title)
|
||||
durations_in_page.append(video_duration)
|
||||
|
||||
def extract_videos_from_page(self, page):
|
||||
ids_in_page = []
|
||||
titles_in_page = []
|
||||
@ -410,15 +401,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
(?:www\.)?invidious\.drycat\.fr/|
|
||||
(?:www\.)?tube\.poal\.co/|
|
||||
(?:www\.)?vid\.wxzm\.sx/|
|
||||
(?:www\.)?yewtu\.be/|
|
||||
(?:www\.)?yt\.elukerio\.org/|
|
||||
(?:www\.)?yt\.lelux\.fi/|
|
||||
(?:www\.)?invidious\.ggc-project\.de/|
|
||||
(?:www\.)?yt\.maisputain\.ovh/|
|
||||
(?:www\.)?invidious\.13ad\.de/|
|
||||
(?:www\.)?invidious\.toot\.koeln/|
|
||||
(?:www\.)?invidious\.fdn\.fr/|
|
||||
(?:www\.)?watch\.nettohikari\.com/|
|
||||
(?:www\.)?kgg2m7yk5aybusll\.onion/|
|
||||
(?:www\.)?qklhadlycap4cnod\.onion/|
|
||||
(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion/|
|
||||
@ -426,7 +410,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion/|
|
||||
(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion/|
|
||||
(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p/|
|
||||
(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion/|
|
||||
youtube\.googleapis\.com/) # the various hostnames, with wildcard subdomains
|
||||
(?:.*?\#/)? # handle anchor (#/) redirect urls
|
||||
(?: # the various things that can precede the ID:
|
||||
@ -456,10 +439,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
(?(1).+)? # if we found the ID, everything can follow
|
||||
$""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
|
||||
_NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
|
||||
_PLAYER_INFO_RE = (
|
||||
r'/(?P<id>[a-zA-Z0-9_-]{8,})/player_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?/base\.(?P<ext>[a-z]+)$',
|
||||
r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.(?P<ext>[a-z]+)$',
|
||||
)
|
||||
_formats = {
|
||||
'5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
|
||||
'6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
|
||||
@ -603,7 +582,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'upload_date': '20120506',
|
||||
'title': 'Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]',
|
||||
'alt_title': 'I Love It (feat. Charli XCX)',
|
||||
'description': 'md5:19a2f98d9032b9311e686ed039564f63',
|
||||
'description': 'md5:f3ceb5ef83a08d95b9d146f973157cc8',
|
||||
'tags': ['Icona Pop i love it', 'sweden', 'pop music', 'big beat records', 'big beat', 'charli',
|
||||
'xcx', 'charli xcx', 'girls', 'hbo', 'i love it', "i don't care", 'icona', 'pop',
|
||||
'iconic ep', 'iconic', 'love', 'it'],
|
||||
@ -718,11 +697,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'id': 'nfWlot6h_JM',
|
||||
'ext': 'm4a',
|
||||
'title': 'Taylor Swift - Shake It Off',
|
||||
'description': 'md5:307195cd21ff7fa352270fe884570ef0',
|
||||
'description': 'md5:bec2185232c05479482cb5a9b82719bf',
|
||||
'duration': 242,
|
||||
'uploader': 'TaylorSwiftVEVO',
|
||||
'uploader_id': 'TaylorSwiftVEVO',
|
||||
'upload_date': '20140818',
|
||||
'creator': 'Taylor Swift',
|
||||
},
|
||||
'params': {
|
||||
'youtube_include_dash_manifest': True,
|
||||
@ -787,11 +767,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'upload_date': '20100430',
|
||||
'uploader_id': 'deadmau5',
|
||||
'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
|
||||
'creator': 'Dada Life, deadmau5',
|
||||
'creator': 'deadmau5',
|
||||
'description': 'md5:12c56784b8032162bb936a5f76d55360',
|
||||
'uploader': 'deadmau5',
|
||||
'title': 'Deadmau5 - Some Chords (HD)',
|
||||
'alt_title': 'This Machine Kills Some Chords',
|
||||
'alt_title': 'Some Chords',
|
||||
},
|
||||
'expected_warnings': [
|
||||
'DASH manifest missing',
|
||||
@ -1167,7 +1147,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'skip_download': True,
|
||||
'youtube_include_dash_manifest': False,
|
||||
},
|
||||
'skip': 'not actual anymore',
|
||||
},
|
||||
{
|
||||
# Youtube Music Auto-generated description
|
||||
@ -1178,8 +1157,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'title': 'Voyeur Girl',
|
||||
'description': 'md5:7ae382a65843d6df2685993e90a8628f',
|
||||
'upload_date': '20190312',
|
||||
'uploader': 'Stephen - Topic',
|
||||
'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',
|
||||
'uploader': 'Various Artists - Topic',
|
||||
'uploader_id': 'UCVWKBi1ELZn0QX2CBLSkiyw',
|
||||
'artist': 'Stephen',
|
||||
'track': 'Voyeur Girl',
|
||||
'album': 'it\'s too much love to know my dear',
|
||||
@ -1243,7 +1222,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'id': '-hcAI0g-f5M',
|
||||
'ext': 'mp4',
|
||||
'title': 'Put It On Me',
|
||||
'description': 'md5:f6422397c07c4c907c6638e1fee380a5',
|
||||
'description': 'md5:93c55acc682ae7b0c668f2e34e1c069e',
|
||||
'upload_date': '20180426',
|
||||
'uploader': 'Matt Maeson - Topic',
|
||||
'uploader_id': 'UCnEkIGqtGcQMLk73Kp-Q5LQ',
|
||||
@ -1261,26 +1240,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',
|
||||
'only_matching': True,
|
||||
},
|
||||
{
|
||||
# invalid -> valid video id redirection
|
||||
'url': 'DJztXj2GPfl',
|
||||
'info_dict': {
|
||||
'id': 'DJztXj2GPfk',
|
||||
'ext': 'mp4',
|
||||
'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',
|
||||
'description': 'md5:bf577a41da97918e94fa9798d9228825',
|
||||
'upload_date': '20090125',
|
||||
'uploader': 'Prochorowka',
|
||||
'uploader_id': 'Prochorowka',
|
||||
'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',
|
||||
'artist': 'Panjabi MC',
|
||||
'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',
|
||||
'album': 'Beware of the Boys (Mundian To Bach Ke)',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}
|
||||
]
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
@ -1307,18 +1266,14 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
""" Return a string representation of a signature """
|
||||
return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
|
||||
|
||||
@classmethod
|
||||
def _extract_player_info(cls, player_url):
|
||||
for player_re in cls._PLAYER_INFO_RE:
|
||||
id_m = re.search(player_re, player_url)
|
||||
if id_m:
|
||||
break
|
||||
else:
|
||||
raise ExtractorError('Cannot identify player %r' % player_url)
|
||||
return id_m.group('ext'), id_m.group('id')
|
||||
|
||||
def _extract_signature_function(self, video_id, player_url, example_sig):
|
||||
player_type, player_id = self._extract_player_info(player_url)
|
||||
id_m = re.match(
|
||||
r'.*?-(?P<id>[a-zA-Z0-9_-]+)(?:/watch_as3|/html5player(?:-new)?|(?:/[a-z]{2,3}_[A-Z]{2})?/base)?\.(?P<ext>[a-z]+)$',
|
||||
player_url)
|
||||
if not id_m:
|
||||
raise ExtractorError('Cannot identify player %r' % player_url)
|
||||
player_type = id_m.group('ext')
|
||||
player_id = id_m.group('id')
|
||||
|
||||
# Read from filesystem cache
|
||||
func_id = '%s_%s_%s' % (
|
||||
@ -1400,7 +1355,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
funcname = self._search_regex(
|
||||
(r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
|
||||
r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
|
||||
r'\b(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
|
||||
r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
|
||||
# Obsolete patterns
|
||||
r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
|
||||
@ -1674,63 +1628,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
video_id = mobj.group(2)
|
||||
return video_id
|
||||
|
||||
def _extract_chapters_from_json(self, webpage, video_id, duration):
|
||||
if not webpage:
|
||||
return
|
||||
player = self._parse_json(
|
||||
self._search_regex(
|
||||
r'RELATED_PLAYER_ARGS["\']\s*:\s*({.+})\s*,?\s*\n', webpage,
|
||||
'player args', default='{}'),
|
||||
video_id, fatal=False)
|
||||
if not player or not isinstance(player, dict):
|
||||
return
|
||||
watch_next_response = player.get('watch_next_response')
|
||||
if not isinstance(watch_next_response, compat_str):
|
||||
return
|
||||
response = self._parse_json(watch_next_response, video_id, fatal=False)
|
||||
if not response or not isinstance(response, dict):
|
||||
return
|
||||
chapters_list = try_get(
|
||||
response,
|
||||
lambda x: x['playerOverlays']
|
||||
['playerOverlayRenderer']
|
||||
['decoratedPlayerBarRenderer']
|
||||
['decoratedPlayerBarRenderer']
|
||||
['playerBar']
|
||||
['chapteredPlayerBarRenderer']
|
||||
['chapters'],
|
||||
list)
|
||||
if not chapters_list:
|
||||
return
|
||||
|
||||
def chapter_time(chapter):
|
||||
return float_or_none(
|
||||
try_get(
|
||||
chapter,
|
||||
lambda x: x['chapterRenderer']['timeRangeStartMillis'],
|
||||
int),
|
||||
scale=1000)
|
||||
chapters = []
|
||||
for next_num, chapter in enumerate(chapters_list, start=1):
|
||||
start_time = chapter_time(chapter)
|
||||
if start_time is None:
|
||||
continue
|
||||
end_time = (chapter_time(chapters_list[next_num])
|
||||
if next_num < len(chapters_list) else duration)
|
||||
if end_time is None:
|
||||
continue
|
||||
title = try_get(
|
||||
chapter, lambda x: x['chapterRenderer']['title']['simpleText'],
|
||||
compat_str)
|
||||
chapters.append({
|
||||
'start_time': start_time,
|
||||
'end_time': end_time,
|
||||
'title': title,
|
||||
})
|
||||
return chapters
|
||||
|
||||
@staticmethod
|
||||
def _extract_chapters_from_description(description, duration):
|
||||
def _extract_chapters(description, duration):
|
||||
if not description:
|
||||
return None
|
||||
chapter_lines = re.findall(
|
||||
@ -1764,10 +1663,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
})
|
||||
return chapters
|
||||
|
||||
def _extract_chapters(self, webpage, description, video_id, duration):
|
||||
return (self._extract_chapters_from_json(webpage, video_id, duration)
|
||||
or self._extract_chapters_from_description(description, duration))
|
||||
|
||||
def _real_extract(self, url):
|
||||
url, smuggled_data = unsmuggle_url(url, {})
|
||||
|
||||
@ -1795,10 +1690,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
|
||||
# Get video webpage
|
||||
url = proto + '://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1&bpctr=9999999999' % video_id
|
||||
video_webpage, urlh = self._download_webpage_handle(url, video_id)
|
||||
|
||||
qs = compat_parse_qs(compat_urllib_parse_urlparse(urlh.geturl()).query)
|
||||
video_id = qs.get('v', [None])[0] or video_id
|
||||
video_webpage = self._download_webpage(url, video_id)
|
||||
|
||||
# Attempt to extract SWF player URL
|
||||
mobj = re.search(r'swfConfig.*?"(https?:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage)
|
||||
@ -1827,6 +1719,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
def extract_view_count(v_info):
|
||||
return int_or_none(try_get(v_info, lambda x: x['view_count'][0]))
|
||||
|
||||
def extract_token(v_info):
|
||||
return dict_get(v_info, ('account_playback_token', 'accountPlaybackToken', 'token'))
|
||||
|
||||
def extract_player_response(player_response, video_id):
|
||||
pl_response = str_or_none(player_response)
|
||||
if not pl_response:
|
||||
@ -1839,7 +1734,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
player_response = {}
|
||||
|
||||
# Get video info
|
||||
video_info = {}
|
||||
embed_webpage = None
|
||||
if re.search(r'player-age-gate-content">', video_webpage) is not None:
|
||||
age_gate = True
|
||||
@ -1854,21 +1748,19 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
r'"sts"\s*:\s*(\d+)', embed_webpage, 'sts', default=''),
|
||||
})
|
||||
video_info_url = proto + '://www.youtube.com/get_video_info?' + data
|
||||
try:
|
||||
video_info_webpage = self._download_webpage(
|
||||
video_info_url, video_id,
|
||||
note='Refetching age-gated info webpage',
|
||||
errnote='unable to download video info webpage')
|
||||
except ExtractorError:
|
||||
video_info_webpage = None
|
||||
if video_info_webpage:
|
||||
video_info = compat_parse_qs(video_info_webpage)
|
||||
pl_response = video_info.get('player_response', [None])[0]
|
||||
player_response = extract_player_response(pl_response, video_id)
|
||||
add_dash_mpd(video_info)
|
||||
view_count = extract_view_count(video_info)
|
||||
video_info_webpage = self._download_webpage(
|
||||
video_info_url, video_id,
|
||||
note='Refetching age-gated info webpage',
|
||||
errnote='unable to download video info webpage')
|
||||
video_info = compat_parse_qs(video_info_webpage)
|
||||
pl_response = video_info.get('player_response', [None])[0]
|
||||
player_response = extract_player_response(pl_response, video_id)
|
||||
add_dash_mpd(video_info)
|
||||
view_count = extract_view_count(video_info)
|
||||
else:
|
||||
age_gate = False
|
||||
video_info = None
|
||||
sts = None
|
||||
# Try looking directly into the video webpage
|
||||
ytplayer_config = self._get_ytplayer_config(video_id, video_webpage)
|
||||
if ytplayer_config:
|
||||
@ -1885,10 +1777,61 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
args['ypc_vid'], YoutubeIE.ie_key(), video_id=args['ypc_vid'])
|
||||
if args.get('livestream') == '1' or args.get('live_playback') == 1:
|
||||
is_live = True
|
||||
sts = ytplayer_config.get('sts')
|
||||
if not player_response:
|
||||
player_response = extract_player_response(args.get('player_response'), video_id)
|
||||
if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True):
|
||||
add_dash_mpd_pr(player_response)
|
||||
# We also try looking in get_video_info since it may contain different dashmpd
|
||||
# URL that points to a DASH manifest with possibly different itag set (some itags
|
||||
# are missing from DASH manifest pointed by webpage's dashmpd, some - from DASH
|
||||
# manifest pointed by get_video_info's dashmpd).
|
||||
# The general idea is to take a union of itags of both DASH manifests (for example
|
||||
# video with such 'manifest behavior' see https://github.com/ytdl-org/youtube-dl/issues/6093)
|
||||
self.report_video_info_webpage_download(video_id)
|
||||
for el in ('embedded', 'detailpage', 'vevo', ''):
|
||||
query = {
|
||||
'video_id': video_id,
|
||||
'ps': 'default',
|
||||
'eurl': '',
|
||||
'gl': 'US',
|
||||
'hl': 'en',
|
||||
}
|
||||
if el:
|
||||
query['el'] = el
|
||||
if sts:
|
||||
query['sts'] = sts
|
||||
video_info_webpage = self._download_webpage(
|
||||
'%s://www.youtube.com/get_video_info' % proto,
|
||||
video_id, note=False,
|
||||
errnote='unable to download video info webpage',
|
||||
fatal=False, query=query)
|
||||
if not video_info_webpage:
|
||||
continue
|
||||
get_video_info = compat_parse_qs(video_info_webpage)
|
||||
if not player_response:
|
||||
pl_response = get_video_info.get('player_response', [None])[0]
|
||||
player_response = extract_player_response(pl_response, video_id)
|
||||
add_dash_mpd(get_video_info)
|
||||
if view_count is None:
|
||||
view_count = extract_view_count(get_video_info)
|
||||
if not video_info:
|
||||
video_info = get_video_info
|
||||
get_token = extract_token(get_video_info)
|
||||
if get_token:
|
||||
# Different get_video_info requests may report different results, e.g.
|
||||
# some may report video unavailability, but some may serve it without
|
||||
# any complaint (see https://github.com/ytdl-org/youtube-dl/issues/7362,
|
||||
# the original webpage as well as el=info and el=embedded get_video_info
|
||||
# requests report video unavailability due to geo restriction while
|
||||
# el=detailpage succeeds and returns valid data). This is probably
|
||||
# due to YouTube measures against IP ranges of hosting providers.
|
||||
# Working around by preferring the first succeeded video_info containing
|
||||
# the token if no such video_info yet was found.
|
||||
token = extract_token(video_info)
|
||||
if not token:
|
||||
video_info = get_video_info
|
||||
break
|
||||
|
||||
def extract_unavailable_message():
|
||||
messages = []
|
||||
@ -1901,22 +1844,16 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
if messages:
|
||||
return '\n'.join(messages)
|
||||
|
||||
if not video_info and not player_response:
|
||||
if not video_info:
|
||||
unavailable_message = extract_unavailable_message()
|
||||
if not unavailable_message:
|
||||
unavailable_message = 'Unable to extract video data'
|
||||
raise ExtractorError(
|
||||
'YouTube said: %s' % unavailable_message, expected=True, video_id=video_id)
|
||||
|
||||
if not isinstance(video_info, dict):
|
||||
video_info = {}
|
||||
|
||||
video_details = try_get(
|
||||
player_response, lambda x: x['videoDetails'], dict) or {}
|
||||
|
||||
microformat = try_get(
|
||||
player_response, lambda x: x['microformat']['playerMicroformatRenderer'], dict) or {}
|
||||
|
||||
video_title = video_info.get('title', [None])[0] or video_details.get('title')
|
||||
if not video_title:
|
||||
self._downloader.report_warning('Unable to extract video title')
|
||||
@ -1963,26 +1900,15 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
# fields may contain comma as well (see
|
||||
# https://github.com/ytdl-org/youtube-dl/issues/8536)
|
||||
feed_data = compat_parse_qs(compat_urllib_parse_unquote_plus(feed))
|
||||
|
||||
def feed_entry(name):
|
||||
return try_get(feed_data, lambda x: x[name][0], compat_str)
|
||||
|
||||
feed_id = feed_entry('id')
|
||||
if not feed_id:
|
||||
continue
|
||||
feed_title = feed_entry('title')
|
||||
title = video_title
|
||||
if feed_title:
|
||||
title += ' (%s)' % feed_title
|
||||
entries.append({
|
||||
'_type': 'url_transparent',
|
||||
'ie_key': 'Youtube',
|
||||
'url': smuggle_url(
|
||||
'%s://www.youtube.com/watch?v=%s' % (proto, feed_data['id'][0]),
|
||||
{'force_singlefeed': True}),
|
||||
'title': title,
|
||||
'title': '%s (%s)' % (video_title, feed_data['title'][0]),
|
||||
})
|
||||
feed_ids.append(feed_id)
|
||||
feed_ids.append(feed_data['id'][0])
|
||||
self.to_screen(
|
||||
'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
|
||||
% (', '.join(feed_ids), video_id))
|
||||
@ -1994,8 +1920,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
view_count = extract_view_count(video_info)
|
||||
if view_count is None and video_details:
|
||||
view_count = int_or_none(video_details.get('viewCount'))
|
||||
if view_count is None and microformat:
|
||||
view_count = int_or_none(microformat.get('viewCount'))
|
||||
|
||||
if is_live is None:
|
||||
is_live = bool_or_none(video_details.get('isLive'))
|
||||
@ -2055,12 +1979,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
}
|
||||
|
||||
for fmt in streaming_formats:
|
||||
if fmt.get('drmFamilies') or fmt.get('drm_families'):
|
||||
if fmt.get('drm_families'):
|
||||
continue
|
||||
url = url_or_none(fmt.get('url'))
|
||||
|
||||
if not url:
|
||||
cipher = fmt.get('cipher') or fmt.get('signatureCipher')
|
||||
cipher = fmt.get('cipher')
|
||||
if not cipher:
|
||||
continue
|
||||
url_data = compat_parse_qs(cipher)
|
||||
@ -2111,10 +2035,22 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
|
||||
if self._downloader.params.get('verbose'):
|
||||
if player_url is None:
|
||||
player_version = 'unknown'
|
||||
player_desc = 'unknown'
|
||||
else:
|
||||
player_type, player_version = self._extract_player_info(player_url)
|
||||
player_desc = '%s player %s' % ('flash' if player_type == 'swf' else 'html5', player_version)
|
||||
if player_url.endswith('swf'):
|
||||
player_version = self._search_regex(
|
||||
r'-(.+?)(?:/watch_as3)?\.swf$', player_url,
|
||||
'flash player', fatal=False)
|
||||
player_desc = 'flash player %s' % player_version
|
||||
else:
|
||||
player_version = self._search_regex(
|
||||
[r'html5player-([^/]+?)(?:/html5player(?:-new)?)?\.js',
|
||||
r'(?:www|player(?:_ias)?)-([^/]+)(?:/[a-z]{2,3}_[A-Z]{2})?/base\.js'],
|
||||
player_url,
|
||||
'html5 player', fatal=False)
|
||||
player_desc = 'html5 player %s' % player_version
|
||||
|
||||
parts_sizes = self._signature_cache_id(encrypted_sig)
|
||||
self.to_screen('{%s} signature length %s, %s' %
|
||||
(format_id, parts_sizes, player_desc))
|
||||
@ -2247,12 +2183,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
video_uploader_id = mobj.group('uploader_id')
|
||||
video_uploader_url = mobj.group('uploader_url')
|
||||
else:
|
||||
owner_profile_url = url_or_none(microformat.get('ownerProfileUrl'))
|
||||
if owner_profile_url:
|
||||
video_uploader_id = self._search_regex(
|
||||
r'(?:user|channel)/([^/]+)', owner_profile_url, 'uploader id',
|
||||
default=None)
|
||||
video_uploader_url = owner_profile_url
|
||||
self._downloader.report_warning('unable to extract uploader nickname')
|
||||
|
||||
channel_id = (
|
||||
str_or_none(video_details.get('channelId'))
|
||||
@ -2263,33 +2194,17 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
video_webpage, 'channel id', default=None, group='id'))
|
||||
channel_url = 'http://www.youtube.com/channel/%s' % channel_id if channel_id else None
|
||||
|
||||
thumbnails = []
|
||||
thumbnails_list = try_get(
|
||||
video_details, lambda x: x['thumbnail']['thumbnails'], list) or []
|
||||
for t in thumbnails_list:
|
||||
if not isinstance(t, dict):
|
||||
continue
|
||||
thumbnail_url = url_or_none(t.get('url'))
|
||||
if not thumbnail_url:
|
||||
continue
|
||||
thumbnails.append({
|
||||
'url': thumbnail_url,
|
||||
'width': int_or_none(t.get('width')),
|
||||
'height': int_or_none(t.get('height')),
|
||||
})
|
||||
|
||||
if not thumbnails:
|
||||
# thumbnail image
|
||||
# We try first to get a high quality image:
|
||||
m_thumb = re.search(r'<span itemprop="thumbnail".*?href="(.*?)">',
|
||||
video_webpage, re.DOTALL)
|
||||
if m_thumb is not None:
|
||||
video_thumbnail = m_thumb.group(1)
|
||||
elif 'thumbnail_url' not in video_info:
|
||||
self._downloader.report_warning('unable to extract video thumbnail')
|
||||
video_thumbnail = None
|
||||
# We try first to get a high quality image:
|
||||
m_thumb = re.search(r'<span itemprop="thumbnail".*?href="(.*?)">',
|
||||
video_webpage, re.DOTALL)
|
||||
if m_thumb is not None:
|
||||
video_thumbnail = m_thumb.group(1)
|
||||
thumbnail_url = try_get(video_info, lambda x: x['thumbnail_url'][0], compat_str)
|
||||
if thumbnail_url:
|
||||
video_thumbnail = compat_urllib_parse_unquote_plus(thumbnail_url)
|
||||
if video_thumbnail:
|
||||
thumbnails.append({'url': video_thumbnail})
|
||||
else: # don't panic if we can't find it
|
||||
video_thumbnail = compat_urllib_parse_unquote_plus(video_info['thumbnail_url'][0])
|
||||
|
||||
# upload date
|
||||
upload_date = self._html_search_meta(
|
||||
@ -2299,8 +2214,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
[r'(?s)id="eow-date.*?>(.*?)</span>',
|
||||
r'(?:id="watch-uploader-info".*?>.*?|["\']simpleText["\']\s*:\s*["\'])(?:Published|Uploaded|Streamed live|Started) on (.+?)[<"\']'],
|
||||
video_webpage, 'upload date', default=None)
|
||||
if not upload_date:
|
||||
upload_date = microformat.get('publishDate') or microformat.get('uploadDate')
|
||||
upload_date = unified_strdate(upload_date)
|
||||
|
||||
video_license = self._html_search_regex(
|
||||
@ -2372,21 +2285,17 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
m_cat_container = self._search_regex(
|
||||
r'(?s)<h4[^>]*>\s*Category\s*</h4>\s*<ul[^>]*>(.*?)</ul>',
|
||||
video_webpage, 'categories', default=None)
|
||||
category = None
|
||||
if m_cat_container:
|
||||
category = self._html_search_regex(
|
||||
r'(?s)<a[^<]+>(.*?)</a>', m_cat_container, 'category',
|
||||
default=None)
|
||||
if not category:
|
||||
category = try_get(
|
||||
microformat, lambda x: x['category'], compat_str)
|
||||
video_categories = None if category is None else [category]
|
||||
video_categories = None if category is None else [category]
|
||||
else:
|
||||
video_categories = None
|
||||
|
||||
video_tags = [
|
||||
unescapeHTML(m.group('content'))
|
||||
for m in re.finditer(self._meta_regex('og:video:tag'), video_webpage)]
|
||||
if not video_tags:
|
||||
video_tags = try_get(video_details, lambda x: x['keywords'], list)
|
||||
|
||||
def _extract_count(count_name):
|
||||
return str_to_int(self._search_regex(
|
||||
@ -2437,7 +2346,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
errnote='Unable to download video annotations', fatal=False,
|
||||
data=urlencode_postdata({xsrf_field_name: xsrf_token}))
|
||||
|
||||
chapters = self._extract_chapters(video_webpage, description_original, video_id, video_duration)
|
||||
chapters = self._extract_chapters(description_original, video_duration)
|
||||
|
||||
# Look for the DASH manifest
|
||||
if self._downloader.params.get('youtube_include_dash_manifest', True):
|
||||
@ -2494,23 +2403,30 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
f['stretched_ratio'] = ratio
|
||||
|
||||
if not formats:
|
||||
if 'reason' in video_info:
|
||||
if 'The uploader has not made this video available in your country.' in video_info['reason']:
|
||||
regions_allowed = self._html_search_meta(
|
||||
'regionsAllowed', video_webpage, default=None)
|
||||
countries = regions_allowed.split(',') if regions_allowed else None
|
||||
self.raise_geo_restricted(
|
||||
msg=video_info['reason'][0], countries=countries)
|
||||
reason = video_info['reason'][0]
|
||||
if 'Invalid parameters' in reason:
|
||||
unavailable_message = extract_unavailable_message()
|
||||
if unavailable_message:
|
||||
reason = unavailable_message
|
||||
raise ExtractorError(
|
||||
'YouTube said: %s' % reason,
|
||||
expected=True, video_id=video_id)
|
||||
if video_info.get('license_info') or try_get(player_response, lambda x: x['streamingData']['licenseInfos']):
|
||||
raise ExtractorError('This video is DRM protected.', expected=True)
|
||||
token = extract_token(video_info)
|
||||
if not token:
|
||||
if 'reason' in video_info:
|
||||
if 'The uploader has not made this video available in your country.' in video_info['reason']:
|
||||
regions_allowed = self._html_search_meta(
|
||||
'regionsAllowed', video_webpage, default=None)
|
||||
countries = regions_allowed.split(',') if regions_allowed else None
|
||||
self.raise_geo_restricted(
|
||||
msg=video_info['reason'][0], countries=countries)
|
||||
reason = video_info['reason'][0]
|
||||
if 'Invalid parameters' in reason:
|
||||
unavailable_message = extract_unavailable_message()
|
||||
if unavailable_message:
|
||||
reason = unavailable_message
|
||||
raise ExtractorError(
|
||||
'YouTube said: %s' % reason,
|
||||
expected=True, video_id=video_id)
|
||||
else:
|
||||
raise ExtractorError(
|
||||
'"token" parameter not in video info for unknown reason',
|
||||
video_id=video_id)
|
||||
|
||||
if not formats and (video_info.get('license_info') or try_get(player_response, lambda x: x['streamingData']['licenseInfos'])):
|
||||
raise ExtractorError('This video is DRM protected.', expected=True)
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
@ -2528,7 +2444,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'creator': video_creator or artist,
|
||||
'title': video_title,
|
||||
'alt_title': video_alt_title or track,
|
||||
'thumbnails': thumbnails,
|
||||
'thumbnail': video_thumbnail,
|
||||
'description': video_description,
|
||||
'categories': video_categories,
|
||||
'tags': video_tags,
|
||||
@ -2590,23 +2506,20 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
|
||||
_VIDEO_RE = _VIDEO_RE_TPL % r'(?P<id>[0-9A-Za-z_-]{11})'
|
||||
IE_NAME = 'youtube:playlist'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
|
||||
'url': 'https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re',
|
||||
'info_dict': {
|
||||
'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
|
||||
'uploader': 'Sergey M.',
|
||||
'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
|
||||
'title': 'youtube-dl public playlist',
|
||||
'title': 'ytdl test PL',
|
||||
'id': 'PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re',
|
||||
},
|
||||
'playlist_count': 1,
|
||||
'playlist_count': 3,
|
||||
}, {
|
||||
'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
|
||||
'url': 'https://www.youtube.com/playlist?list=PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx',
|
||||
'info_dict': {
|
||||
'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
|
||||
'uploader': 'Sergey M.',
|
||||
'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
|
||||
'title': 'youtube-dl empty playlist',
|
||||
'id': 'PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx',
|
||||
'title': 'YDL_Empty_List',
|
||||
},
|
||||
'playlist_count': 0,
|
||||
'skip': 'This playlist is private',
|
||||
}, {
|
||||
'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
|
||||
'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
|
||||
@ -2616,7 +2529,7 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
|
||||
'uploader': 'Christiaan008',
|
||||
'uploader_id': 'ChRiStIaAn008',
|
||||
},
|
||||
'playlist_count': 96,
|
||||
'playlist_count': 95,
|
||||
}, {
|
||||
'note': 'issue #673',
|
||||
'url': 'PLBB231211A4F62143',
|
||||
@ -3132,7 +3045,7 @@ class YoutubeLiveIE(YoutubeBaseInfoExtractor):
|
||||
|
||||
class YoutubePlaylistsIE(YoutubePlaylistsBaseInfoExtractor):
|
||||
IE_DESC = 'YouTube.com user/channel playlists'
|
||||
_VALID_URL = r'https?://(?:\w+\.)?youtube\.com/(?:user|channel|c)/(?P<id>[^/]+)/playlists'
|
||||
_VALID_URL = r'https?://(?:\w+\.)?youtube\.com/(?:user|channel)/(?P<id>[^/]+)/playlists'
|
||||
IE_NAME = 'youtube:playlists'
|
||||
|
||||
_TESTS = [{
|
||||
@ -3158,9 +3071,6 @@ class YoutubePlaylistsIE(YoutubePlaylistsBaseInfoExtractor):
|
||||
'title': 'Chem Player',
|
||||
},
|
||||
'skip': 'Blocked',
|
||||
}, {
|
||||
'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
|
||||
@ -3197,7 +3107,7 @@ class YoutubeSearchIE(SearchInfoExtractor, YoutubeSearchBaseInfoExtractor):
|
||||
errnote='Unable to download API page',
|
||||
query={'spf': 'navigate'})
|
||||
html_content = data[1]['body']['content']
|
||||
|
||||
|
||||
if 'class="search-message' in html_content:
|
||||
raise ExtractorError(
|
||||
'[youtube] No video results', expected=True)
|
||||
@ -3307,8 +3217,7 @@ class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
|
||||
more = self._download_json(
|
||||
'https://youtube.com/%s' % mobj.group('more'), self._PLAYLIST_TITLE,
|
||||
'Downloading page #%s' % page_num,
|
||||
transform_source=uppercase_escape,
|
||||
headers=self._YOUTUBE_CLIENT_HEADERS)
|
||||
transform_source=uppercase_escape)
|
||||
content_html = more['content_html']
|
||||
more_widget_html = more['load_more_widget_html']
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user