From f1c051009bcf52525df22eb3a59797076551a579 Mon Sep 17 00:00:00 2001 From: Viktor Szakats Date: Tue, 27 Jun 2017 09:20:18 -0500 Subject: [PATCH 1/4] [soundcloud] Switch to https for API requests --- youtube_dl/extractor/soundcloud.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/soundcloud.py b/youtube_dl/extractor/soundcloud.py index 0ee4a8ff8..994c86145 100644 --- a/youtube_dl/extractor/soundcloud.py +++ b/youtube_dl/extractor/soundcloud.py @@ -136,7 +136,7 @@ class SoundcloudIE(InfoExtractor): @classmethod def _resolv_url(cls, url): - return 'http://api.soundcloud.com/resolve.json?url=' + url + '&client_id=' + cls._CLIENT_ID + return 'https://api.soundcloud.com/resolve.json?url=' + url + '&client_id=' + cls._CLIENT_ID def _extract_info_dict(self, info, full_title=None, quiet=False, secret_token=None): track_id = compat_str(info['id']) @@ -174,7 +174,7 @@ class SoundcloudIE(InfoExtractor): # We have to retrieve the url format_dict = self._download_json( - 'http://api.soundcloud.com/i1/tracks/%s/streams' % track_id, + 'https://api.soundcloud.com/i1/tracks/%s/streams' % track_id, track_id, 'Downloading track url', query={ 'client_id': self._CLIENT_ID, 'secret_token': secret_token, @@ -236,7 +236,7 @@ class SoundcloudIE(InfoExtractor): track_id = mobj.group('track_id') if track_id is not None: - info_json_url = 'http://api.soundcloud.com/tracks/' + track_id + '.json?client_id=' + self._CLIENT_ID + info_json_url = 'https://api.soundcloud.com/tracks/' + track_id + '.json?client_id=' + self._CLIENT_ID full_title = track_id token = mobj.group('secret_token') if token: @@ -261,7 +261,7 @@ class SoundcloudIE(InfoExtractor): self.report_resolve(full_title) - url = 'http://soundcloud.com/%s' % resolve_title + url = 'https://soundcloud.com/%s' % resolve_title info_json_url = self._resolv_url(url) info = self._download_json(info_json_url, full_title, 'Downloading info JSON') @@ -304,7 +304,7 @@ class SoundcloudSetIE(SoundcloudPlaylistBaseIE): # extract simple title (uploader + slug of song title) slug_title = mobj.group('slug_title') full_title = '%s/sets/%s' % (uploader, slug_title) - url = 'http://soundcloud.com/%s/sets/%s' % (uploader, slug_title) + url = 'https://soundcloud.com/%s/sets/%s' % (uploader, slug_title) token = mobj.group('token') if token: @@ -410,7 +410,7 @@ class SoundcloudUserIE(SoundcloudPlaylistBaseIE): mobj = re.match(self._VALID_URL, url) uploader = mobj.group('user') - url = 'http://soundcloud.com/%s/' % uploader + url = 'https://soundcloud.com/%s/' % uploader resolv_url = self._resolv_url(url) user = self._download_json( resolv_url, uploader, 'Downloading user info') @@ -473,7 +473,7 @@ class SoundcloudPlaylistIE(SoundcloudPlaylistBaseIE): _VALID_URL = r'https?://api\.soundcloud\.com/playlists/(?P[0-9]+)(?:/?\?secret_token=(?P[^&]+?))?$' IE_NAME = 'soundcloud:playlist' _TESTS = [{ - 'url': 'http://api.soundcloud.com/playlists/4110309', + 'url': 'https://api.soundcloud.com/playlists/4110309', 'info_dict': { 'id': '4110309', 'title': 'TILT Brass - Bowery Poetry Club, August \'03 [Non-Site SCR 02]', From bf2dc9cc6e1ca38955f646b67f0f6ec40aec139d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 27 Jun 2017 21:26:46 +0700 Subject: [PATCH 2/4] [soundcloud] Fix tests --- youtube_dl/extractor/soundcloud.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/soundcloud.py b/youtube_dl/extractor/soundcloud.py index 994c86145..3f1a46bb2 100644 --- a/youtube_dl/extractor/soundcloud.py +++ b/youtube_dl/extractor/soundcloud.py @@ -290,7 +290,7 @@ class SoundcloudSetIE(SoundcloudPlaylistBaseIE): 'id': '2284613', 'title': 'The Royal Concept EP', }, - 'playlist_mincount': 6, + 'playlist_mincount': 5, }, { 'url': 'https://soundcloud.com/the-concept-band/sets/the-royal-concept-ep/token', 'only_matching': True, @@ -380,7 +380,7 @@ class SoundcloudUserIE(SoundcloudPlaylistBaseIE): 'url': 'https://soundcloud.com/grynpyret/spotlight', 'info_dict': { 'id': '7098329', - 'title': 'GRYNPYRET (Spotlight)', + 'title': 'Grynpyret (Spotlight)', }, 'playlist_mincount': 1, }] From 0646e34c7d511a02d8d93e840bceaa3521c2204e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 27 Jun 2017 22:25:34 +0700 Subject: [PATCH 3/4] [facebook] Add support for plugin video embeds and multiple embeds (closes #13493) --- youtube_dl/extractor/buzzfeed.py | 7 ++++--- youtube_dl/extractor/facebook.py | 20 ++++++++++---------- youtube_dl/extractor/generic.py | 6 +++--- 3 files changed, 17 insertions(+), 16 deletions(-) diff --git a/youtube_dl/extractor/buzzfeed.py b/youtube_dl/extractor/buzzfeed.py index 75fa92d7c..ec411091e 100644 --- a/youtube_dl/extractor/buzzfeed.py +++ b/youtube_dl/extractor/buzzfeed.py @@ -84,9 +84,10 @@ class BuzzFeedIE(InfoExtractor): continue entries.append(self.url_result(video['url'])) - facebook_url = FacebookIE._extract_url(webpage) - if facebook_url: - entries.append(self.url_result(facebook_url)) + facebook_urls = FacebookIE._extract_urls(webpage) + entries.extend([ + self.url_result(facebook_url) + for facebook_url in facebook_urls]) return { '_type': 'playlist', diff --git a/youtube_dl/extractor/facebook.py b/youtube_dl/extractor/facebook.py index b69c1ede0..4b3f6cc86 100644 --- a/youtube_dl/extractor/facebook.py +++ b/youtube_dl/extractor/facebook.py @@ -203,19 +203,19 @@ class FacebookIE(InfoExtractor): }] @staticmethod - def _extract_url(webpage): - mobj = re.search( - r']+?src=(["\'])(?Phttps://www\.facebook\.com/video/embed.+?)\1', webpage) - if mobj is not None: - return mobj.group('url') - + def _extract_urls(webpage): + urls = [] + for mobj in re.finditer( + r']+?src=(["\'])(?Phttps?://www\.facebook\.com/(?:video/embed|plugins/video\.php).+?)\1', + webpage): + urls.append(mobj.group('url')) # Facebook API embed # see https://developers.facebook.com/docs/plugins/embedded-video-player - mobj = re.search(r'''(?x)]+ + for mobj in re.finditer(r'''(?x)]+ class=(?P[\'"])[^\'"]*\bfb-(?:video|post)\b[^\'"]*(?P=q1)[^>]+ - data-href=(?P[\'"])(?P(?:https?:)?//(?:www\.)?facebook.com/.+?)(?P=q2)''', webpage) - if mobj is not None: - return mobj.group('url') + data-href=(?P[\'"])(?P(?:https?:)?//(?:www\.)?facebook.com/.+?)(?P=q2)''', webpage): + urls.append(mobj.group('url')) + return urls def _login(self): (useremail, password) = self._get_login_info() diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 8ef1a2980..760a7f9c2 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -2222,9 +2222,9 @@ class GenericIE(InfoExtractor): return self.url_result(mobj.group('url')) # Look for embedded Facebook player - facebook_url = FacebookIE._extract_url(webpage) - if facebook_url is not None: - return self.url_result(facebook_url, 'Facebook') + facebook_urls = FacebookIE._extract_urls(webpage) + if facebook_urls: + return self.playlist_from_matches(facebook_urls, video_id, video_title) # Look for embedded VK player mobj = re.search(r']+?src=(["\'])(?Phttps?://vk\.com/video_ext\.php.+?)\1', webpage) From fd1c5fba6bee4de36cc3276d86c84c0abc054a2f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 27 Jun 2017 22:36:54 +0700 Subject: [PATCH 4/4] [facebook] Add test for plugin video embed (#13493) --- youtube_dl/extractor/generic.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 760a7f9c2..2792ea3cf 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -1522,6 +1522,21 @@ class GenericIE(InfoExtractor): 'title': 'Facebook video #599637780109885', }, }, + # Facebook