diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 37d09d796..0421de755 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -1,16 +1,16 @@ ## Please follow the guide below - You will be asked some questions and requested to provide some information, please read them **carefully** and answer honestly -- Put an `x` into all the boxes [ ] relevant to your *issue* (like that [x]) -- Use *Preview* tab to see how your issue will actually look like +- Put an `x` into all the boxes [ ] relevant to your *issue* (like this: `[x]`) +- Use the *Preview* tab to see what your issue will actually look like --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.07.23*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.07.23** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.07.30.1*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.07.30.1** ### Before submitting an *issue* make sure you have: -- [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections +- [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections - [ ] [Searched](https://github.com/rg3/youtube-dl/search?type=Issues) the bugtracker for similar issues including closed ones ### What is the purpose of your *issue*? @@ -28,14 +28,14 @@ ### If the purpose of this *issue* is a *bug report*, *site support request* or you are not completely sure provide the full verbose output as follows: -Add `-v` flag to **your command line** you run youtube-dl with, copy the **whole** output and insert it here. It should look similar to one below (replace it with **your** log inserted between triple ```): +Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl -v `), copy the **whole** output and insert it here. It should look similar to one below (replace it with **your** log inserted between triple ```): + ``` -$ youtube-dl -v [debug] System config: [] [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2017.07.23 +[debug] youtube-dl version 2017.07.30.1 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index 8e63b5c11..4f03ef064 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,8 +1,28 @@ -version +version 2017.07.30.1 Core +* [downloader/hls] Use redirect URL as manifest base (#13755) * [options] Correctly hide login info from debug outputs (#13696) +Extractors ++ [watchbox] Add support for watchbox.de (#13739) +- [clipfish] Remove extractor ++ [youjizz] Fix extraction (#13744) ++ [generic] Add support for another ooyala embed pattern (#13727) ++ [ard] Add support for lives (#13771) +* [soundcloud] Update client id ++ [soundcloud:trackstation] Add support for track stations (#13733) +* [svtplay] Use geo verification proxy for API request +* [svtplay] Update API URL (#13767) ++ [yandexdisk] Add support for yadi.sk (#13755) ++ [megaphone] Add support for megaphone.fm +* [amcnetworks] Make rating optional (#12453) +* [cloudy] Fix extraction (#13737) ++ [nickru] Add support for nickelodeon.ru +* [mtv] Improve thumbnal extraction +* [nick] Automate geo-restriction bypass (#13711) +* [niconico] Improve error reporting (#13696) + version 2017.07.23 diff --git a/docs/supportedsites.md b/docs/supportedsites.md index eb09c470c..77aac8249 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -155,7 +155,6 @@ - **chirbit:profile** - **Cinchcast** - **CJSW** - - **Clipfish** - **cliphunter** - **ClipRs** - **Clipsyndicate** @@ -440,6 +439,7 @@ - **Medialaan** - **Mediaset** - **Medici** + - **megaphone.fm**: megaphone.fm embedded players - **Meipai**: 美拍 - **MelonVOD** - **META** @@ -533,6 +533,7 @@ - **nhl.com:videocenter:category**: NHL videocenter category - **nick.com** - **nick.de** + - **nickelodeonru** - **nicknight** - **niconico**: ニコニコ動画 - **NiconicoPlaylist** @@ -734,6 +735,7 @@ - **soundcloud:playlist** - **soundcloud:search**: Soundcloud search - **soundcloud:set** + - **soundcloud:trackstation** - **soundcloud:user** - **soundgasm** - **soundgasm:profile** @@ -968,6 +970,7 @@ - **washingtonpost** - **washingtonpost:article** - **wat.tv** + - **WatchBox** - **WatchIndianPorn**: Watch Indian Porn - **WDR** - **wdr:mobile** @@ -1003,6 +1006,7 @@ - **XVideos** - **XXXYMovies** - **Yahoo**: Yahoo screen and movies + - **YandexDisk** - **yandexmusic:album**: Яндекс.Музыка - Альбом - **yandexmusic:playlist**: Яндекс.Музыка - Плейлист - **yandexmusic:track**: Яндекс.Музыка - Трек diff --git a/youtube_dl/downloader/hls.py b/youtube_dl/downloader/hls.py index 0e29c8a2a..46308cf07 100644 --- a/youtube_dl/downloader/hls.py +++ b/youtube_dl/downloader/hls.py @@ -59,9 +59,9 @@ class HlsFD(FragmentFD): man_url = info_dict['url'] self.to_screen('[%s] Downloading m3u8 manifest' % self.FD_NAME) - manifest = self.ydl.urlopen(self._prepare_url(info_dict, man_url)).read() - - s = manifest.decode('utf-8', 'ignore') + urlh = self.ydl.urlopen(self._prepare_url(info_dict, man_url)) + man_url = urlh.geturl() + s = urlh.read().decode('utf-8', 'ignore') if not self.can_download(s, info_dict): if info_dict.get('extra_param_to_segment_url'): diff --git a/youtube_dl/extractor/ard.py b/youtube_dl/extractor/ard.py index 2d5599456..3f248b147 100644 --- a/youtube_dl/extractor/ard.py +++ b/youtube_dl/extractor/ard.py @@ -93,6 +93,7 @@ class ARDMediathekIE(InfoExtractor): duration = int_or_none(media_info.get('_duration')) thumbnail = media_info.get('_previewImage') + is_live = media_info.get('_isLive') is True subtitles = {} subtitle_url = media_info.get('_subtitleUrl') @@ -106,6 +107,7 @@ class ARDMediathekIE(InfoExtractor): 'id': video_id, 'duration': duration, 'thumbnail': thumbnail, + 'is_live': is_live, 'formats': formats, 'subtitles': subtitles, } @@ -166,9 +168,11 @@ class ARDMediathekIE(InfoExtractor): # determine video id from url m = re.match(self._VALID_URL, url) + document_id = None + numid = re.search(r'documentId=([0-9]+)', url) if numid: - video_id = numid.group(1) + document_id = video_id = numid.group(1) else: video_id = m.group('video_id') @@ -228,12 +232,16 @@ class ARDMediathekIE(InfoExtractor): 'formats': formats, } else: # request JSON file + if not document_id: + video_id = self._search_regex( + r'/play/(?:config|media)/(\d+)', webpage, 'media id') info = self._extract_media_info( - 'http://www.ardmediathek.de/play/media/%s' % video_id, webpage, video_id) + 'http://www.ardmediathek.de/play/media/%s' % video_id, + webpage, video_id) info.update({ 'id': video_id, - 'title': title, + 'title': self._live_title(title) if info.get('is_live') else title, 'description': description, 'thumbnail': thumbnail, }) diff --git a/youtube_dl/extractor/clipfish.py b/youtube_dl/extractor/clipfish.py deleted file mode 100644 index 0920f6219..000000000 --- a/youtube_dl/extractor/clipfish.py +++ /dev/null @@ -1,67 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -from .common import InfoExtractor -from ..utils import ( - int_or_none, - unified_strdate, -) - - -class ClipfishIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?clipfish\.de/(?:[^/]+/)+video/(?P[0-9]+)' - _TEST = { - 'url': 'http://www.clipfish.de/special/ugly-americans/video/4343170/s01-e01-ugly-americans-date-in-der-hoelle/', - 'md5': 'b9a5dc46294154c1193e2d10e0c95693', - 'info_dict': { - 'id': '4343170', - 'ext': 'mp4', - 'title': 'S01 E01 - Ugly Americans - Date in der Hölle', - 'description': 'Mark Lilly arbeitet im Sozialdienst der Stadt New York und soll Immigranten bei ihrer Einbürgerung in die USA zur Seite stehen.', - 'upload_date': '20161005', - 'duration': 1291, - 'view_count': int, - } - } - - def _real_extract(self, url): - video_id = self._match_id(url) - - video_info = self._download_json( - 'http://www.clipfish.de/devapi/id/%s?format=json&apikey=hbbtv' % video_id, - video_id)['items'][0] - - formats = [] - - m3u8_url = video_info.get('media_videourl_hls') - if m3u8_url: - formats.append({ - 'url': m3u8_url.replace('de.hls.fra.clipfish.de', 'hls.fra.clipfish.de'), - 'ext': 'mp4', - 'format_id': 'hls', - }) - - mp4_url = video_info.get('media_videourl') - if mp4_url: - formats.append({ - 'url': mp4_url, - 'format_id': 'mp4', - 'width': int_or_none(video_info.get('width')), - 'height': int_or_none(video_info.get('height')), - 'tbr': int_or_none(video_info.get('bitrate')), - }) - - descr = video_info.get('descr') - if descr: - descr = descr.strip() - - return { - 'id': video_id, - 'title': video_info['title'], - 'description': descr, - 'formats': formats, - 'thumbnail': video_info.get('media_content_thumbnail_large') or video_info.get('media_thumbnail'), - 'duration': int_or_none(video_info.get('media_length')), - 'upload_date': unified_strdate(video_info.get('pubDate')), - 'view_count': int_or_none(video_info.get('media_views')) - } diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 2513f2587..3489e86f0 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -186,7 +186,6 @@ from .chirbit import ( ) from .cinchcast import CinchcastIE from .cjsw import CJSWIE -from .clipfish import ClipfishIE from .cliphunter import CliphunterIE from .cliprs import ClipRsIE from .clipsyndicate import ClipsyndicateIE @@ -558,6 +557,7 @@ from .matchtv import MatchTVIE from .mdr import MDRIE from .mediaset import MediasetIE from .medici import MediciIE +from .megaphone import MegaphoneIE from .meipai import MeipaiIE from .melonvod import MelonVODIE from .meta import METAIE @@ -934,8 +934,9 @@ from .soundcloud import ( SoundcloudIE, SoundcloudSetIE, SoundcloudUserIE, + SoundcloudTrackStationIE, SoundcloudPlaylistIE, - SoundcloudSearchIE + SoundcloudSearchIE, ) from .soundgasm import ( SoundgasmIE, @@ -1243,6 +1244,7 @@ from .washingtonpost import ( WashingtonPostArticleIE, ) from .wat import WatIE +from .watchbox import WatchBoxIE from .watchindianporn import WatchIndianPornIE from .wdr import ( WDRIE, @@ -1297,6 +1299,7 @@ from .yandexmusic import ( YandexMusicAlbumIE, YandexMusicPlaylistIE, ) +from .yandexdisk import YandexDiskIE from .yesjapan import YesJapanIE from .yinyuetai import YinYueTaiIE from .ynet import YnetIE diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 36c81eda9..34e814988 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -97,6 +97,7 @@ from .washingtonpost import WashingtonPostIE from .wistia import WistiaIE from .mediaset import MediasetIE from .joj import JojIE +from .megaphone import MegaphoneIE class GenericIE(InfoExtractor): @@ -574,6 +575,19 @@ class GenericIE(InfoExtractor): }, 'skip': 'movie expired', }, + # ooyala video embedded with http://player.ooyala.com/static/v4/production/latest/core.min.js + { + 'url': 'http://wnep.com/2017/07/22/steampunk-fest-comes-to-honesdale/', + 'info_dict': { + 'id': 'lwYWYxYzE6V5uJMjNGyKtwwiw9ZJD7t2', + 'ext': 'mp4', + 'title': 'Steampunk Fest Comes to Honesdale', + 'duration': 43.276, + }, + 'params': { + 'skip_download': True, + } + }, # embed.ly video { 'url': 'http://www.tested.com/science/weird/460206-tested-grinding-coffee-2000-frames-second/', @@ -2292,6 +2306,7 @@ class GenericIE(InfoExtractor): # Look for Ooyala videos mobj = (re.search(r'player\.ooyala\.com/[^"?]+[?#][^"]*?(?:embedCode|ec)=(?P[^"&]+)', webpage) or re.search(r'OO\.Player\.create\([\'"].*?[\'"],\s*[\'"](?P.{32})[\'"]', webpage) or + re.search(r'OO\.Player\.create\.apply\(\s*OO\.Player\s*,\s*op\(\s*\[\s*[\'"][^\'"]*[\'"]\s*,\s*[\'"](?P.{32})[\'"]', webpage) or re.search(r'SBN\.VideoLinkset\.ooyala\([\'"](?P.{32})[\'"]\)', webpage) or re.search(r'data-ooyala-video-id\s*=\s*[\'"](?P.{32})[\'"]', webpage)) if mobj is not None: @@ -2790,6 +2805,12 @@ class GenericIE(InfoExtractor): return self.playlist_from_matches( joj_urls, video_id, video_title, ie=JojIE.ie_key()) + # Look for megaphone.fm embeds + mpfn_urls = MegaphoneIE._extract_urls(webpage) + if mpfn_urls: + return self.playlist_from_matches( + mpfn_urls, video_id, video_title, ie=MegaphoneIE.ie_key()) + def merge_dicts(dict1, dict2): merged = {} for k, v in dict1.items(): diff --git a/youtube_dl/extractor/megaphone.py b/youtube_dl/extractor/megaphone.py new file mode 100644 index 000000000..60e3caf0d --- /dev/null +++ b/youtube_dl/extractor/megaphone.py @@ -0,0 +1,55 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import js_to_json + + +class MegaphoneIE(InfoExtractor): + IE_NAME = 'megaphone.fm' + IE_DESC = 'megaphone.fm embedded players' + _VALID_URL = r'https://player\.megaphone\.fm/(?P[A-Z0-9]+)' + _TEST = { + 'url': 'https://player.megaphone.fm/GLT9749789991?"', + 'md5': '4816a0de523eb3e972dc0dda2c191f96', + 'info_dict': { + 'id': 'GLT9749789991', + 'ext': 'mp3', + 'title': '#97 What Kind Of Idiot Gets Phished?', + 'thumbnail': 're:^https://.*\.png.*$', + 'duration': 1776.26375, + 'author': 'Reply All', + }, + } + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + title = self._og_search_property('audio:title', webpage) + author = self._og_search_property('audio:artist', webpage) + thumbnail = self._og_search_thumbnail(webpage) + + episode_json = self._search_regex(r'(?s)var\s+episode\s*=\s*(\{.+?\});', webpage, 'episode JSON') + episode_data = self._parse_json(episode_json, video_id, js_to_json) + video_url = self._proto_relative_url(episode_data['mediaUrl'], 'https:') + + formats = [{ + 'url': video_url, + }] + + return { + 'id': video_id, + 'thumbnail': thumbnail, + 'title': title, + 'author': author, + 'duration': episode_data['duration'], + 'formats': formats, + } + + @classmethod + def _extract_urls(cls, webpage): + return [m[0] for m in re.findall( + r']*?\ssrc=["\'](%s)' % cls._VALID_URL, webpage)] diff --git a/youtube_dl/extractor/nrk.py b/youtube_dl/extractor/nrk.py index 3b4f51f61..18ead9426 100644 --- a/youtube_dl/extractor/nrk.py +++ b/youtube_dl/extractor/nrk.py @@ -237,7 +237,7 @@ class NRKTVIE(NRKBaseIE): (?:/\d{2}-\d{2}-\d{4})? (?:\#del=(?P\d+))? ''' % _EPISODE_RE - _API_HOST = 'psapi-we.nrk.no' + _API_HOST = 'psapi-ne.nrk.no' _TESTS = [{ 'url': 'https://tv.nrk.no/serie/20-spoersmaal-tv/MUHH48000314/23-05-2014', diff --git a/youtube_dl/extractor/soundcloud.py b/youtube_dl/extractor/soundcloud.py index 3f1a46bb2..2e52e092b 100644 --- a/youtube_dl/extractor/soundcloud.py +++ b/youtube_dl/extractor/soundcloud.py @@ -31,6 +31,7 @@ class SoundcloudIE(InfoExtractor): _VALID_URL = r'''(?x)^(?:https?://)? (?:(?:(?:www\.|m\.)?soundcloud\.com/ + (?!stations/track) (?P[\w\d-]+)/ (?!(?:tracks|sets(?:/.+?)?|reposts|likes|spotlight)/?(?:$|[?#])) (?P[\w\d-]+)/? @@ -121,7 +122,7 @@ class SoundcloudIE(InfoExtractor): }, ] - _CLIENT_ID = '2t9loNQH90kzJcsFCODdigxfp325aq4z' + _CLIENT_ID = 'JlZIsxg2hY5WnBgtn3jfS0UYCl0K8DOg' _IPHONE_CLIENT_ID = '376f225bf427445fc4bfb6b99b72e0bf' @staticmethod @@ -330,7 +331,63 @@ class SoundcloudSetIE(SoundcloudPlaylistBaseIE): } -class SoundcloudUserIE(SoundcloudPlaylistBaseIE): +class SoundcloudPagedPlaylistBaseIE(SoundcloudPlaylistBaseIE): + _API_BASE = 'https://api.soundcloud.com' + _API_V2_BASE = 'https://api-v2.soundcloud.com' + + def _extract_playlist(self, base_url, playlist_id, playlist_title): + COMMON_QUERY = { + 'limit': 50, + 'client_id': self._CLIENT_ID, + 'linked_partitioning': '1', + } + + query = COMMON_QUERY.copy() + query['offset'] = 0 + + next_href = base_url + '?' + compat_urllib_parse_urlencode(query) + + entries = [] + for i in itertools.count(): + response = self._download_json( + next_href, playlist_id, 'Downloading track page %s' % (i + 1)) + + collection = response['collection'] + if not collection: + break + + def resolve_permalink_url(candidates): + for cand in candidates: + if isinstance(cand, dict): + permalink_url = cand.get('permalink_url') + entry_id = self._extract_id(cand) + if permalink_url and permalink_url.startswith('http'): + return permalink_url, entry_id + + for e in collection: + permalink_url, entry_id = resolve_permalink_url((e, e.get('track'), e.get('playlist'))) + if permalink_url: + entries.append(self.url_result(permalink_url, video_id=entry_id)) + + next_href = response.get('next_href') + if not next_href: + break + + parsed_next_href = compat_urlparse.urlparse(response['next_href']) + qs = compat_urlparse.parse_qs(parsed_next_href.query) + qs.update(COMMON_QUERY) + next_href = compat_urlparse.urlunparse( + parsed_next_href._replace(query=compat_urllib_parse_urlencode(qs, True))) + + return { + '_type': 'playlist', + 'id': playlist_id, + 'title': playlist_title, + 'entries': entries, + } + + +class SoundcloudUserIE(SoundcloudPagedPlaylistBaseIE): _VALID_URL = r'''(?x) https?:// (?:(?:www|m)\.)?soundcloud\.com/ @@ -385,16 +442,13 @@ class SoundcloudUserIE(SoundcloudPlaylistBaseIE): 'playlist_mincount': 1, }] - _API_BASE = 'https://api.soundcloud.com' - _API_V2_BASE = 'https://api-v2.soundcloud.com' - _BASE_URL_MAP = { - 'all': '%s/profile/soundcloud:users:%%s' % _API_V2_BASE, - 'tracks': '%s/users/%%s/tracks' % _API_BASE, - 'sets': '%s/users/%%s/playlists' % _API_V2_BASE, - 'reposts': '%s/profile/soundcloud:users:%%s/reposts' % _API_V2_BASE, - 'likes': '%s/users/%%s/likes' % _API_V2_BASE, - 'spotlight': '%s/users/%%s/spotlight' % _API_V2_BASE, + 'all': '%s/profile/soundcloud:users:%%s' % SoundcloudPagedPlaylistBaseIE._API_V2_BASE, + 'tracks': '%s/users/%%s/tracks' % SoundcloudPagedPlaylistBaseIE._API_BASE, + 'sets': '%s/users/%%s/playlists' % SoundcloudPagedPlaylistBaseIE._API_V2_BASE, + 'reposts': '%s/profile/soundcloud:users:%%s/reposts' % SoundcloudPagedPlaylistBaseIE._API_V2_BASE, + 'likes': '%s/users/%%s/likes' % SoundcloudPagedPlaylistBaseIE._API_V2_BASE, + 'spotlight': '%s/users/%%s/spotlight' % SoundcloudPagedPlaylistBaseIE._API_V2_BASE, } _TITLE_MAP = { @@ -416,57 +470,36 @@ class SoundcloudUserIE(SoundcloudPlaylistBaseIE): resolv_url, uploader, 'Downloading user info') resource = mobj.group('rsrc') or 'all' - base_url = self._BASE_URL_MAP[resource] % user['id'] - COMMON_QUERY = { - 'limit': 50, - 'client_id': self._CLIENT_ID, - 'linked_partitioning': '1', - } + return self._extract_playlist( + self._BASE_URL_MAP[resource] % user['id'], compat_str(user['id']), + '%s (%s)' % (user['username'], self._TITLE_MAP[resource])) - query = COMMON_QUERY.copy() - query['offset'] = 0 - next_href = base_url + '?' + compat_urllib_parse_urlencode(query) +class SoundcloudTrackStationIE(SoundcloudPagedPlaylistBaseIE): + _VALID_URL = r'https?://(?:(?:www|m)\.)?soundcloud\.com/stations/track/[^/]+/(?P<id>[^/?#&]+)' + IE_NAME = 'soundcloud:trackstation' + _TESTS = [{ + 'url': 'https://soundcloud.com/stations/track/officialsundial/your-text', + 'info_dict': { + 'id': '286017854', + 'title': 'Track station: your-text', + }, + 'playlist_mincount': 47, + }] - entries = [] - for i in itertools.count(): - response = self._download_json( - next_href, uploader, 'Downloading track page %s' % (i + 1)) + def _real_extract(self, url): + track_name = self._match_id(url) - collection = response['collection'] - if not collection: - break + webpage = self._download_webpage(url, track_name) - def resolve_permalink_url(candidates): - for cand in candidates: - if isinstance(cand, dict): - permalink_url = cand.get('permalink_url') - entry_id = self._extract_id(cand) - if permalink_url and permalink_url.startswith('http'): - return permalink_url, entry_id + track_id = self._search_regex( + r'soundcloud:track-stations:(\d+)', webpage, 'track id') - for e in collection: - permalink_url, entry_id = resolve_permalink_url((e, e.get('track'), e.get('playlist'))) - if permalink_url: - entries.append(self.url_result(permalink_url, video_id=entry_id)) - - next_href = response.get('next_href') - if not next_href: - break - - parsed_next_href = compat_urlparse.urlparse(response['next_href']) - qs = compat_urlparse.parse_qs(parsed_next_href.query) - qs.update(COMMON_QUERY) - next_href = compat_urlparse.urlunparse( - parsed_next_href._replace(query=compat_urllib_parse_urlencode(qs, True))) - - return { - '_type': 'playlist', - 'id': compat_str(user['id']), - 'title': '%s (%s)' % (user['username'], self._TITLE_MAP[resource]), - 'entries': entries, - } + return self._extract_playlist( + '%s/stations/soundcloud:track-stations:%s/tracks' + % (self._API_V2_BASE, track_id), + track_id, 'Track station: %s' % track_name) class SoundcloudPlaylistIE(SoundcloudPlaylistBaseIE): diff --git a/youtube_dl/extractor/svt.py b/youtube_dl/extractor/svt.py index 1b5afb73e..48bc4529e 100644 --- a/youtube_dl/extractor/svt.py +++ b/youtube_dl/extractor/svt.py @@ -181,7 +181,8 @@ class SVTPlayIE(SVTBaseIE): if video_id: data = self._download_json( - 'http://www.svt.se/videoplayer-api/video/%s' % video_id, video_id) + 'https://api.svt.se/videoplayer-api/video/%s' % video_id, + video_id, headers=self.geo_verification_headers()) info_dict = self._extract_video(data, video_id) if not info_dict.get('title'): info_dict['title'] = re.sub( diff --git a/youtube_dl/extractor/watchbox.py b/youtube_dl/extractor/watchbox.py new file mode 100644 index 000000000..b382338fa --- /dev/null +++ b/youtube_dl/extractor/watchbox.py @@ -0,0 +1,151 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..compat import compat_str +from ..utils import ( + int_or_none, + js_to_json, + strip_or_none, + try_get, + unified_timestamp, +) + + +class WatchBoxIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?watchbox\.de/(?P<kind>serien|filme)/(?:[^/]+/)*[^/]+-(?P<id>\d+)' + _TESTS = [{ + # film + 'url': 'https://www.watchbox.de/filme/free-jimmy-12325.html', + 'info_dict': { + 'id': '341368', + 'ext': 'mp4', + 'title': 'Free Jimmy', + 'description': 'md5:bcd8bafbbf9dc0ef98063d344d7cc5f6', + 'thumbnail': r're:^https?://.*\.jpg$', + 'duration': 4890, + 'age_limit': 16, + 'release_year': 2009, + }, + 'params': { + 'format': 'bestvideo', + 'skip_download': True, + }, + 'expected_warnings': ['Failed to download m3u8 information'], + }, { + # episode + 'url': 'https://www.watchbox.de/serien/ugly-americans-12231/staffel-1/date-in-der-hoelle-328286.html', + 'info_dict': { + 'id': '328286', + 'ext': 'mp4', + 'title': 'S01 E01 - Date in der Hölle', + 'description': 'md5:2f31c74a8186899f33cb5114491dae2b', + 'thumbnail': r're:^https?://.*\.jpg$', + 'duration': 1291, + 'age_limit': 12, + 'release_year': 2010, + 'series': 'Ugly Americans', + 'season_number': 1, + 'episode': 'Date in der Hölle', + 'episode_number': 1, + }, + 'params': { + 'format': 'bestvideo', + 'skip_download': True, + }, + 'expected_warnings': ['Failed to download m3u8 information'], + }, { + 'url': 'https://www.watchbox.de/serien/ugly-americans-12231/staffel-2/der-ring-des-powers-328270', + 'only_matching': True, + }] + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + kind, video_id = mobj.group('kind', 'id') + + webpage = self._download_webpage(url, video_id) + + source = self._parse_json( + self._search_regex( + r'(?s)source\s*:\s*({.+?})\s*,\s*\n', webpage, 'source', + default='{}'), + video_id, transform_source=js_to_json, fatal=False) or {} + + video_id = compat_str(source.get('videoId') or video_id) + + devapi = self._download_json( + 'http://api.watchbox.de/devapi/id/%s' % video_id, video_id, query={ + 'format': 'json', + 'apikey': 'hbbtv', + }, fatal=False) + + item = try_get(devapi, lambda x: x['items'][0], dict) or {} + + title = item.get('title') or try_get( + item, lambda x: x['movie']['headline_movie'], + compat_str) or source['title'] + + formats = [] + hls_url = item.get('media_videourl_hls') or source.get('hls') + if hls_url: + formats.extend(self._extract_m3u8_formats( + hls_url, video_id, 'mp4', entry_protocol='m3u8_native', + m3u8_id='hls', fatal=False)) + dash_url = item.get('media_videourl_wv') or source.get('dash') + if dash_url: + formats.extend(self._extract_mpd_formats( + dash_url, video_id, mpd_id='dash', fatal=False)) + mp4_url = item.get('media_videourl') + if mp4_url: + formats.append({ + 'url': mp4_url, + 'format_id': 'mp4', + 'width': int_or_none(item.get('width')), + 'height': int_or_none(item.get('height')), + 'tbr': int_or_none(item.get('bitrate')), + }) + self._sort_formats(formats) + + description = strip_or_none(item.get('descr')) + thumbnail = item.get('media_content_thumbnail_large') or source.get('poster') or item.get('media_thumbnail') + duration = int_or_none(item.get('media_length') or source.get('length')) + timestamp = unified_timestamp(item.get('pubDate')) + view_count = int_or_none(item.get('media_views')) + age_limit = int_or_none(try_get(item, lambda x: x['movie']['fsk'])) + release_year = int_or_none(try_get(item, lambda x: x['movie']['rel_year'])) + + info = { + 'id': video_id, + 'title': title, + 'description': description, + 'thumbnail': thumbnail, + 'duration': duration, + 'timestamp': timestamp, + 'view_count': view_count, + 'age_limit': age_limit, + 'release_year': release_year, + 'formats': formats, + } + + if kind.lower() == 'serien': + series = try_get( + item, lambda x: x['special']['title'], + compat_str) or source.get('format') + season_number = int_or_none(self._search_regex( + r'^S(\d{1,2})\s*E\d{1,2}', title, 'season number', + default=None) or self._search_regex( + r'/staffel-(\d+)/', url, 'season number', default=None)) + episode = source.get('title') + episode_number = int_or_none(self._search_regex( + r'^S\d{1,2}\s*E(\d{1,2})', title, 'episode number', + default=None)) + info.update({ + 'series': series, + 'season_number': season_number, + 'episode': episode, + 'episode_number': episode_number, + }) + + return info diff --git a/youtube_dl/extractor/yandexdisk.py b/youtube_dl/extractor/yandexdisk.py new file mode 100644 index 000000000..11729f0f7 --- /dev/null +++ b/youtube_dl/extractor/yandexdisk.py @@ -0,0 +1,115 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..compat import compat_str +from ..utils import ( + determine_ext, + float_or_none, + int_or_none, + try_get, + urlencode_postdata, +) + + +class YandexDiskIE(InfoExtractor): + _VALID_URL = r'https?://yadi\.sk/i/(?P<id>[^/?#&]+)' + + _TEST = { + 'url': 'https://yadi.sk/i/VdOeDou8eZs6Y', + 'md5': '33955d7ae052f15853dc41f35f17581c', + 'info_dict': { + 'id': 'VdOeDou8eZs6Y', + 'ext': 'mp4', + 'title': '4.mp4', + 'duration': 168.6, + 'uploader': 'y.botova', + 'uploader_id': '300043621', + 'view_count': int, + }, + } + + def _real_extract(self, url): + video_id = self._match_id(url) + + status = self._download_webpage( + 'https://disk.yandex.com/auth/status', video_id, query={ + 'urlOrigin': url, + 'source': 'public', + 'md5': 'false', + }) + + sk = self._search_regex( + r'(["\'])sk(?:External)?\1\s*:\s*(["\'])(?P<value>(?:(?!\2).)+)\2', + status, 'sk', group='value') + + webpage = self._download_webpage(url, video_id) + + models = self._parse_json( + self._search_regex( + r'<script[^>]+id=["\']models-client[^>]+>\s*(\[.+?\])\s*</script', + webpage, 'video JSON'), + video_id) + + data = next( + model['data'] for model in models + if model.get('model') == 'resource') + + video_hash = data['id'] + title = data['name'] + + models = self._download_json( + 'https://disk.yandex.com/models/', video_id, + data=urlencode_postdata({ + '_model.0': 'videoInfo', + 'id.0': video_hash, + '_model.1': 'do-get-resource-url', + 'id.1': video_hash, + 'version': '13.6', + 'sk': sk, + }), query={'_m': 'videoInfo'})['models'] + + videos = try_get(models, lambda x: x[0]['data']['videos'], list) or [] + source_url = try_get( + models, lambda x: x[1]['data']['file'], compat_str) + + formats = [] + if source_url: + formats.append({ + 'url': source_url, + 'format_id': 'source', + 'ext': determine_ext(title, 'mp4'), + 'quality': 1, + }) + for video in videos: + format_url = video.get('url') + if not format_url: + continue + if determine_ext(format_url) == 'm3u8': + formats.extend(self._extract_m3u8_formats( + format_url, video_id, 'mp4', entry_protocol='m3u8_native', + m3u8_id='hls', fatal=False)) + else: + formats.append({ + 'url': format_url, + }) + self._sort_formats(formats) + + duration = float_or_none(try_get( + models, lambda x: x[0]['data']['duration']), 1000) + uploader = try_get( + data, lambda x: x['user']['display_name'], compat_str) + uploader_id = try_get( + data, lambda x: x['user']['uid'], compat_str) + view_count = int_or_none(try_get( + data, lambda x: x['meta']['views_counter'])) + + return { + 'id': video_id, + 'title': title, + 'duration': duration, + 'uploader': uploader, + 'uploader_id': uploader_id, + 'view_count': view_count, + 'formats': formats, + } diff --git a/youtube_dl/extractor/youjizz.py b/youtube_dl/extractor/youjizz.py index b50f34e9b..f33fabe19 100644 --- a/youtube_dl/extractor/youjizz.py +++ b/youtube_dl/extractor/youjizz.py @@ -1,39 +1,95 @@ from __future__ import unicode_literals +import re + from .common import InfoExtractor +from ..compat import compat_str +from ..utils import ( + determine_ext, + int_or_none, + parse_duration, +) class YouJizzIE(InfoExtractor): - _VALID_URL = r'https?://(?:\w+\.)?youjizz\.com/videos/(?:[^/#?]+)?-(?P<id>[0-9]+)\.html(?:$|[?#])' + _VALID_URL = r'https?://(?:\w+\.)?youjizz\.com/videos/(?:[^/#?]*-(?P<id>\d+)\.html|embed/(?P<embed_id>\d+))' _TESTS = [{ 'url': 'http://www.youjizz.com/videos/zeichentrick-1-2189178.html', - 'md5': '78fc1901148284c69af12640e01c6310', + 'md5': 'b1e1dfaa8bb9537d8b84eeda9cf4acf4', 'info_dict': { 'id': '2189178', 'ext': 'mp4', 'title': 'Zeichentrick 1', 'age_limit': 18, + 'duration': 2874, } }, { 'url': 'http://www.youjizz.com/videos/-2189178.html', 'only_matching': True, + }, { + 'url': 'https://www.youjizz.com/videos/embed/31991001', + 'only_matching': True, }] def _real_extract(self, url): - video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - # YouJizz's HTML5 player has invalid HTML - webpage = webpage.replace('"controls', '" controls') - age_limit = self._rta_search(webpage) - video_title = self._html_search_regex( - r'<title>\s*(.*)\s*', webpage, 'title') + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') or mobj.group('embed_id') - info_dict = self._parse_html5_media_entries(url, webpage, video_id)[0] + webpage = self._download_webpage(url, video_id) + + title = self._html_search_regex( + r'(.+?)', webpage, 'title') + + formats = [] + + encodings = self._parse_json( + self._search_regex( + r'encodings\s*=\s*(\[.+?\]);\n', webpage, 'encodings', + default='[]'), + video_id, fatal=False) + for encoding in encodings: + if not isinstance(encoding, dict): + continue + format_url = encoding.get('filename') + if not isinstance(format_url, compat_str): + continue + if determine_ext(format_url) == 'm3u8': + formats.extend(self._extract_m3u8_formats( + format_url, video_id, 'mp4', entry_protocol='m3u8_native', + m3u8_id='hls', fatal=False)) + else: + format_id = encoding.get('name') or encoding.get('quality') + height = int_or_none(self._search_regex( + r'^(\d+)[pP]', format_id, 'height', default=None)) + formats.append({ + 'url': format_url, + 'format_id': format_id, + 'height': height, + }) + + if formats: + info_dict = { + 'formats': formats, + } + else: + # YouJizz's HTML5 player has invalid HTML + webpage = webpage.replace('"controls', '" controls') + info_dict = self._parse_html5_media_entries( + url, webpage, video_id)[0] + + duration = parse_duration(self._search_regex( + r'Runtime:([^<]+)', webpage, 'duration', + default=None)) + uploader = self._search_regex( + r'Uploaded By:.*?]*>([^<]+)', webpage, 'uploader', + default=None) info_dict.update({ 'id': video_id, - 'title': video_title, - 'age_limit': age_limit, + 'title': title, + 'age_limit': self._rta_search(webpage), + 'duration': duration, + 'uploader': uploader, }) return info_dict diff --git a/youtube_dl/version.py b/youtube_dl/version.py index a8dbb93e3..38162157d 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2017.07.23' +__version__ = '2017.07.30.1'