diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index aefed163a..905576364 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2018.10.29*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2018.10.29** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2018.11.18*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2018.11.18** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -36,7 +36,7 @@ Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2018.10.29 +[debug] youtube-dl version 2018.11.18 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 333acee80..bbcb78808 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -296,5 +296,26 @@ title = self._search_regex( ### Use safe conversion functions -Wrap all extracted numeric data into safe functions from `utils`: `int_or_none`, `float_or_none`. Use them for string to number conversions as well. +Wrap all extracted numeric data into safe functions from [`youtube_dl/utils.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/utils.py): `int_or_none`, `float_or_none`. Use them for string to number conversions as well. + +Use `url_or_none` for safe URL processing. + +Use `try_get` for safe metadata extraction from parsed JSON. + +Explore [`youtube_dl/utils.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/utils.py) for more useful convenience functions. + +#### More examples + +##### Safely extract optional description from parsed JSON +```python +description = try_get(response, lambda x: x['result']['video'][0]['summary'], compat_str) +``` + +##### Safely extract more optional metadata +```python +video = try_get(response, lambda x: x['result']['video'][0], dict) or {} +description = video.get('summary') +duration = float_or_none(video.get('durationMs'), scale=1000) +view_count = int_or_none(video.get('views')) +``` diff --git a/ChangeLog b/ChangeLog index 57dbde12d..0083c4631 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,54 @@ +version 2018.11.18 + +Extractors ++ [wwe] Extract subtitles ++ [wwe] Add support for playlistst (#14781) ++ [wwe] Add support for wwe.com (#14781, #17450) +* [vk] Detect geo restriction (#17767) +* [openload] Use original host during extraction (#18211) +* [atvat] Fix extraction (#18041) ++ [rte] Add support for new API endpoint (#18206) +* [tnaflixnetwork:embed] Fix extraction (#18205) +* [picarto] Use API and add token support (#16518) ++ [zype] Add support for player.zype.com (#18143) +* [vivo] Fix extraction (#18139) +* [ruutu] Update API endpoint (#18138) + + +version 2018.11.07 + +Extractors ++ [youtube] Add another JS signature function name regex (#18091, #18093, + #18094) +* [facebook] Fix tahoe request (#17171) +* [cliphunter] Fix extraction (#18083) ++ [youtube:playlist] Add support for invidio.us (#18077) +* [zattoo] Arrange API hosts for derived extractors (#18035) ++ [youtube] Add fallback metadata extraction from videoDetails (#18052) + + +version 2018.11.03 + +Core +* [extractor/common] Ensure response handle is not prematurely closed before + it can be read if it matches expected_status (#17195, #17846, #17447) + +Extractors +* [laola1tv:embed] Set correct stream access URL scheme (#16341) ++ [ehftv] Add support for ehftv.com (#15408) +* [azmedien] Adopt to major site redesign (#17745, #17746) ++ [twitcasting] Add support for twitcasting.tv (#17981) +* [orf:tvthek] Fix extraction (#17737, #17956, #18024) ++ [openload] Add support for oload.fun (#18045) +* [njpwworld] Fix authentication (#17427) ++ [linkedin:learning] Add support for linkedin.com/learning (#13545) +* [theplatform] Improve error detection (#13222) +* [cnbc] Simplify extraction (#14280, #17110) ++ [cbnc] Add support for new URL schema (#14193) +* [aparat] Improve extraction and extract more metadata (#17445, #18008) +* [aparat] Fix extraction + + version 2018.10.29 Core diff --git a/README.md b/README.md index fdd115c9b..35c3de512 100644 --- a/README.md +++ b/README.md @@ -1168,7 +1168,28 @@ title = self._search_regex( ### Use safe conversion functions -Wrap all extracted numeric data into safe functions from `utils`: `int_or_none`, `float_or_none`. Use them for string to number conversions as well. +Wrap all extracted numeric data into safe functions from [`youtube_dl/utils.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/utils.py): `int_or_none`, `float_or_none`. Use them for string to number conversions as well. + +Use `url_or_none` for safe URL processing. + +Use `try_get` for safe metadata extraction from parsed JSON. + +Explore [`youtube_dl/utils.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/utils.py) for more useful convenience functions. + +#### More examples + +##### Safely extract optional description from parsed JSON +```python +description = try_get(response, lambda x: x['result']['video'][0]['summary'], compat_str) +``` + +##### Safely extract more optional metadata +```python +video = try_get(response, lambda x: x['result']['video'][0], dict) or {} +description = video.get('summary') +duration = float_or_none(video.get('durationMs'), scale=1000) +view_count = int_or_none(video.get('views')) +``` # EMBEDDING YOUTUBE-DL diff --git a/docs/supportedsites.md b/docs/supportedsites.md index e5a6879bc..9009f7e9e 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -84,8 +84,6 @@ - **awaan:season** - **awaan:video** - **AZMedien**: AZ Medien videos - - **AZMedienPlaylist**: AZ Medien playlists - - **AZMedienShowPlaylist**: AZ Medien show playlists - **BaiduVideo**: 百度视频 - **bambuser** - **bambuser:channel** @@ -178,6 +176,7 @@ - **Clyp** - **cmt.com** - **CNBC** + - **CNBCVideo** - **CNN** - **CNNArticle** - **CNNBlogs** @@ -251,6 +250,7 @@ - **EchoMsk** - **egghead:course**: egghead.io course - **egghead:lesson**: egghead.io lesson + - **ehftv** - **eHow** - **EinsUndEinsTV** - **Einthusan** @@ -445,6 +445,8 @@ - **limelight:channel** - **limelight:channel_list** - **LineTV** + - **linkedin:learning** + - **linkedin:learning:course** - **LiTV** - **LiveLeak** - **LiveLeakEmbed** @@ -930,6 +932,7 @@ - **TVPlayer** - **TVPlayHome** - **Tweakers** + - **TwitCasting** - **twitch:chapter** - **twitch:clips** - **twitch:profile** @@ -1077,6 +1080,7 @@ - **wrzuta.pl:playlist** - **WSJ**: Wall Street Journal - **WSJArticle** + - **WWE** - **XBef** - **XboxClips** - **XFileShare**: XFileShare based sites: DaClips, FileHoot, GorillaVid, MovPod, PowerWatch, Rapidvideo.ws, TheVideoBee, Vidto, Streamin.To, XVIDSTAGE, Vid ABC, VidBom, vidlo, RapidVideo.TV, FastVideo.me @@ -1136,3 +1140,4 @@ - **ZDF** - **ZDFChannel** - **zingmp3**: mp3.zing.vn + - **Zype** diff --git a/test/helper.py b/test/helper.py index dfee217a9..aa9a1c9b2 100644 --- a/test/helper.py +++ b/test/helper.py @@ -7,6 +7,7 @@ import json import os.path import re import types +import ssl import sys import youtube_dl.extractor @@ -244,3 +245,12 @@ def expect_warnings(ydl, warnings_re): real_warning(w) ydl.report_warning = _report_warning + + +def http_server_port(httpd): + if os.name == 'java' and isinstance(httpd.socket, ssl.SSLSocket): + # In Jython SSLSocket is not a subclass of socket.socket + sock = httpd.socket.sock + else: + sock = httpd.socket + return sock.getsockname()[1] diff --git a/test/test_InfoExtractor.py b/test/test_InfoExtractor.py index 4833396a5..06be72616 100644 --- a/test/test_InfoExtractor.py +++ b/test/test_InfoExtractor.py @@ -9,11 +9,30 @@ import sys import unittest sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) -from test.helper import FakeYDL, expect_dict, expect_value -from youtube_dl.compat import compat_etree_fromstring +from test.helper import FakeYDL, expect_dict, expect_value, http_server_port +from youtube_dl.compat import compat_etree_fromstring, compat_http_server from youtube_dl.extractor.common import InfoExtractor from youtube_dl.extractor import YoutubeIE, get_info_extractor from youtube_dl.utils import encode_data_uri, strip_jsonp, ExtractorError, RegexNotFoundError +import threading + + +TEAPOT_RESPONSE_STATUS = 418 +TEAPOT_RESPONSE_BODY = "

418 I'm a teapot

" + + +class InfoExtractorTestRequestHandler(compat_http_server.BaseHTTPRequestHandler): + def log_message(self, format, *args): + pass + + def do_GET(self): + if self.path == '/teapot': + self.send_response(TEAPOT_RESPONSE_STATUS) + self.send_header('Content-Type', 'text/html; charset=utf-8') + self.end_headers() + self.wfile.write(TEAPOT_RESPONSE_BODY.encode()) + else: + assert False class TestIE(InfoExtractor): @@ -743,6 +762,25 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/ for i in range(len(entries)): expect_dict(self, entries[i], expected_entries[i]) + def test_response_with_expected_status_returns_content(self): + # Checks for mitigations against the effects of + # that affect Python 3.4.1+, which + # manifest as `_download_webpage`, `_download_xml`, `_download_json`, + # or the underlying `_download_webpage_handle` returning no content + # when a response matches `expected_status`. + + httpd = compat_http_server.HTTPServer( + ('127.0.0.1', 0), InfoExtractorTestRequestHandler) + port = http_server_port(httpd) + server_thread = threading.Thread(target=httpd.serve_forever) + server_thread.daemon = True + server_thread.start() + + (content, urlh) = self.ie._download_webpage_handle( + 'http://127.0.0.1:%d/teapot' % port, None, + expected_status=TEAPOT_RESPONSE_STATUS) + self.assertEqual(content, TEAPOT_RESPONSE_BODY) + if __name__ == '__main__': unittest.main() diff --git a/test/test_downloader_http.py b/test/test_downloader_http.py index 5cf2bf1a5..750472281 100644 --- a/test/test_downloader_http.py +++ b/test/test_downloader_http.py @@ -9,26 +9,16 @@ import sys import unittest sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) -from test.helper import try_rm +from test.helper import http_server_port, try_rm from youtube_dl import YoutubeDL from youtube_dl.compat import compat_http_server from youtube_dl.downloader.http import HttpFD from youtube_dl.utils import encodeFilename -import ssl import threading TEST_DIR = os.path.dirname(os.path.abspath(__file__)) -def http_server_port(httpd): - if os.name == 'java' and isinstance(httpd.socket, ssl.SSLSocket): - # In Jython SSLSocket is not a subclass of socket.socket - sock = httpd.socket.sock - else: - sock = httpd.socket - return sock.getsockname()[1] - - TEST_SIZE = 10 * 1024 diff --git a/test/test_http.py b/test/test_http.py index 409fec9c8..3ee0a5dda 100644 --- a/test/test_http.py +++ b/test/test_http.py @@ -8,6 +8,7 @@ import sys import unittest sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) +from test.helper import http_server_port from youtube_dl import YoutubeDL from youtube_dl.compat import compat_http_server, compat_urllib_request import ssl @@ -16,15 +17,6 @@ import threading TEST_DIR = os.path.dirname(os.path.abspath(__file__)) -def http_server_port(httpd): - if os.name == 'java' and isinstance(httpd.socket, ssl.SSLSocket): - # In Jython SSLSocket is not a subclass of socket.socket - sock = httpd.socket.sock - else: - sock = httpd.socket - return sock.getsockname()[1] - - class HTTPTestRequestHandler(compat_http_server.BaseHTTPRequestHandler): def log_message(self, format, *args): pass diff --git a/youtube_dl/extractor/atvat.py b/youtube_dl/extractor/atvat.py index 1584d53fc..95e572d70 100644 --- a/youtube_dl/extractor/atvat.py +++ b/youtube_dl/extractor/atvat.py @@ -28,8 +28,10 @@ class ATVAtIE(InfoExtractor): display_id = self._match_id(url) webpage = self._download_webpage(url, display_id) video_data = self._parse_json(unescapeHTML(self._search_regex( - r'class="[^"]*jsb_video/FlashPlayer[^"]*"[^>]+data-jsb="([^"]+)"', - webpage, 'player data')), display_id)['config']['initial_video'] + [r'flashPlayerOptions\s*=\s*(["\'])(?P(?:(?!\1).)+)\1', + r'class="[^"]*jsb_video/FlashPlayer[^"]*"[^>]+data-jsb="(?P[^"]+)"'], + webpage, 'player data', group='json')), + display_id)['config']['initial_video'] video_id = video_data['id'] video_title = video_data['title'] diff --git a/youtube_dl/extractor/azmedien.py b/youtube_dl/extractor/azmedien.py index 68f26e2ca..a57a5f114 100644 --- a/youtube_dl/extractor/azmedien.py +++ b/youtube_dl/extractor/azmedien.py @@ -1,213 +1,90 @@ # coding: utf-8 from __future__ import unicode_literals +import json import re from .common import InfoExtractor from .kaltura import KalturaIE -from ..utils import ( - get_element_by_class, - get_element_by_id, - strip_or_none, - urljoin, -) -class AZMedienBaseIE(InfoExtractor): - def _kaltura_video(self, partner_id, entry_id): - return self.url_result( - 'kaltura:%s:%s' % (partner_id, entry_id), ie=KalturaIE.ie_key(), - video_id=entry_id) - - -class AZMedienIE(AZMedienBaseIE): +class AZMedienIE(InfoExtractor): IE_DESC = 'AZ Medien videos' _VALID_URL = r'''(?x) https?:// (?:www\.)? - (?: + (?P telezueri\.ch| telebaern\.tv| telem1\.ch )/ - [0-9]+-show-[^/\#]+ - (?: - /[0-9]+-episode-[^/\#]+ - (?: - /[0-9]+-segment-(?:[^/\#]+\#)?| - \# - )| - \# + [^/]+/ + (?P + [^/]+-(?P\d+) ) - (?P[^\#]+) + (?: + \#video= + (?P + [_0-9a-z]+ + ) + )? ''' _TESTS = [{ - # URL with 'segment' - 'url': 'http://www.telezueri.ch/62-show-zuerinews/13772-episode-sonntag-18-dezember-2016/32419-segment-massenabweisungen-beim-hiltl-club-wegen-pelzboom', + 'url': 'https://www.telezueri.ch/sonntalk/bundesrats-vakanzen-eu-rahmenabkommen-133214569', 'info_dict': { - 'id': '1_2444peh4', + 'id': '1_anruz3wy', 'ext': 'mp4', - 'title': 'Massenabweisungen beim Hiltl Club wegen Pelzboom', - 'description': 'md5:9ea9dd1b159ad65b36ddcf7f0d7c76a8', - 'uploader_id': 'TeleZ?ri', - 'upload_date': '20161218', - 'timestamp': 1482084490, + 'title': 'Bundesrats-Vakanzen / EU-Rahmenabkommen', + 'description': 'md5:dd9f96751ec9c35e409a698a328402f3', + 'uploader_id': 'TVOnline', + 'upload_date': '20180930', + 'timestamp': 1538328802, }, 'params': { 'skip_download': True, }, }, { - # URL with 'segment' and fragment: - 'url': 'http://www.telebaern.tv/118-show-news/14240-episode-dienstag-17-januar-2017/33666-segment-achtung-gefahr#zu-wenig-pflegerinnen-und-pfleger', - 'only_matching': True - }, { - # URL with 'episode' and fragment: - 'url': 'http://www.telem1.ch/47-show-sonntalk/13986-episode-soldaten-fuer-grenzschutz-energiestrategie-obama-bilanz#soldaten-fuer-grenzschutz-energiestrategie-obama-bilanz', - 'only_matching': True - }, { - # URL with 'show' and fragment: - 'url': 'http://www.telezueri.ch/66-show-sonntalk#burka-plakate-trump-putin-china-besuch', + 'url': 'https://www.telebaern.tv/telebaern-news/montag-1-oktober-2018-ganze-sendung-133531189#video=0_7xjo9lf1', 'only_matching': True }] - def _real_extract(self, url): - video_id = self._match_id(url) - - webpage = self._download_webpage(url, video_id) - - partner_id = self._search_regex( - r']+src=["\'](?:https?:)?//(?:[^/]+\.)?kaltura\.com(?:/[^/]+)*/(?:p|partner_id)/([0-9]+)', - webpage, 'kaltura partner id') - entry_id = self._html_search_regex( - r']+data-id=(["\'])(?P(?:(?!\1).)+)\1[^>]+data-slug=["\']%s' - % re.escape(video_id), webpage, 'kaltura entry id', group='id') - - return self._kaltura_video(partner_id, entry_id) - - -class AZMedienPlaylistIE(AZMedienBaseIE): - IE_DESC = 'AZ Medien playlists' - _VALID_URL = r'''(?x) - https?:// - (?:www\.)? - (?: - telezueri\.ch| - telebaern\.tv| - telem1\.ch - )/ - (?P[0-9]+- - (?: - show| - topic| - themen - )-[^/\#]+ - (?: - /[0-9]+-episode-[^/\#]+ - )? - )$ - ''' - - _TESTS = [{ - # URL with 'episode' - 'url': 'http://www.telebaern.tv/118-show-news/13735-episode-donnerstag-15-dezember-2016', - 'info_dict': { - 'id': '118-show-news/13735-episode-donnerstag-15-dezember-2016', - 'title': 'News - Donnerstag, 15. Dezember 2016', - }, - 'playlist_count': 9, - }, { - # URL with 'themen' - 'url': 'http://www.telem1.ch/258-themen-tele-m1-classics', - 'info_dict': { - 'id': '258-themen-tele-m1-classics', - 'title': 'Tele M1 Classics', - }, - 'playlist_mincount': 15, - }, { - # URL with 'topic', contains nested playlists - 'url': 'http://www.telezueri.ch/219-topic-aera-trump-hat-offiziell-begonnen', - 'only_matching': True, - }, { - # URL with 'show' only - 'url': 'http://www.telezueri.ch/86-show-talktaeglich', - 'only_matching': True - }] + _PARTNER_ID = '1719221' def _real_extract(self, url): - show_id = self._match_id(url) - webpage = self._download_webpage(url, show_id) + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + entry_id = mobj.group('kaltura_id') - entries = [] + if not entry_id: + webpage = self._download_webpage(url, video_id) + api_path = self._search_regex( + r'["\']apiPath["\']\s*:\s*["\']([^"^\']+)["\']', + webpage, 'api path') + api_url = 'https://www.%s%s' % (mobj.group('host'), api_path) + payload = { + 'query': '''query VideoContext($articleId: ID!) { + article: node(id: $articleId) { + ... on Article { + mainAssetRelation { + asset { + ... on VideoAsset { + kalturaId + } + } + } + } + } + }''', + 'variables': {'articleId': 'Article:%s' % mobj.group('article_id')}, + } + json_data = self._download_json( + api_url, video_id, headers={ + 'Content-Type': 'application/json', + }, + data=json.dumps(payload).encode()) + entry_id = json_data['data']['article']['mainAssetRelation']['asset']['kalturaId'] - partner_id = self._search_regex( - r'src=["\'](?:https?:)?//(?:[^/]+\.)kaltura\.com/(?:[^/]+/)*(?:p|partner_id)/(\d+)', - webpage, 'kaltura partner id', default=None) - - if partner_id: - entries = [ - self._kaltura_video(partner_id, m.group('id')) - for m in re.finditer( - r'data-id=(["\'])(?P(?:(?!\1).)+)\1', webpage)] - - if not entries: - entries = [ - self.url_result(m.group('url'), ie=AZMedienIE.ie_key()) - for m in re.finditer( - r']+data-real=(["\'])(?Phttp.+?)\1', webpage)] - - if not entries: - entries = [ - # May contain nested playlists (e.g. [1]) thus no explicit - # ie_key - # 1. http://www.telezueri.ch/219-topic-aera-trump-hat-offiziell-begonnen) - self.url_result(urljoin(url, m.group('url'))) - for m in re.finditer( - r']+name=[^>]+href=(["\'])(?P/.+?)\1', webpage)] - - title = self._search_regex( - r'episodeShareTitle\s*=\s*(["\'])(?P(?:(?!\1).)+)\1', - webpage, 'title', - default=strip_or_none(get_element_by_id( - 'video-title', webpage)), group='title') - - return self.playlist_result(entries, show_id, title) - - -class AZMedienShowPlaylistIE(AZMedienBaseIE): - IE_DESC = 'AZ Medien show playlists' - _VALID_URL = r'''(?x) - https?:// - (?:www\.)? - (?: - telezueri\.ch| - telebaern\.tv| - telem1\.ch - )/ - (?: - all-episodes| - alle-episoden - )/ - (?P<id>[^/?#&]+) - ''' - - _TEST = { - 'url': 'http://www.telezueri.ch/all-episodes/astrotalk', - 'info_dict': { - 'id': 'astrotalk', - 'title': 'TeleZüri: AstroTalk - alle episoden', - 'description': 'md5:4c0f7e7d741d906004266e295ceb4a26', - }, - 'playlist_mincount': 13, - } - - def _real_extract(self, url): - playlist_id = self._match_id(url) - webpage = self._download_webpage(url, playlist_id) - episodes = get_element_by_class('search-mobile-box', webpage) - entries = [self.url_result( - urljoin(url, m.group('url'))) for m in re.finditer( - r'<a[^>]+href=(["\'])(?P<url>(?:(?!\1).)+)\1', episodes)] - title = self._og_search_title(webpage, fatal=False) - description = self._og_search_description(webpage) - return self.playlist_result(entries, playlist_id, title, description) + return self.url_result( + 'kaltura:%s:%s' % (self._PARTNER_ID, entry_id), + ie=KalturaIE.ie_key(), video_id=entry_id) diff --git a/youtube_dl/extractor/cliphunter.py b/youtube_dl/extractor/cliphunter.py index ab651d1c8..f2ca7a337 100644 --- a/youtube_dl/extractor/cliphunter.py +++ b/youtube_dl/extractor/cliphunter.py @@ -1,19 +1,10 @@ from __future__ import unicode_literals from .common import InfoExtractor -from ..utils import int_or_none - - -_translation_table = { - 'a': 'h', 'd': 'e', 'e': 'v', 'f': 'o', 'g': 'f', 'i': 'd', 'l': 'n', - 'm': 'a', 'n': 'm', 'p': 'u', 'q': 't', 'r': 's', 'v': 'p', 'x': 'r', - 'y': 'l', 'z': 'i', - '$': ':', '&': '.', '(': '=', '^': '&', '=': '/', -} - - -def _decode(s): - return ''.join(_translation_table.get(c, c) for c in s) +from ..utils import ( + int_or_none, + url_or_none, +) class CliphunterIE(InfoExtractor): @@ -60,14 +51,14 @@ class CliphunterIE(InfoExtractor): formats = [] for format_id, f in gexo_files.items(): - video_url = f.get('url') + video_url = url_or_none(f.get('url')) if not video_url: continue fmt = f.get('fmt') height = f.get('h') format_id = '%s_%sp' % (fmt, height) if fmt and height else format_id formats.append({ - 'url': _decode(video_url), + 'url': video_url, 'format_id': format_id, 'width': int_or_none(f.get('w')), 'height': int_or_none(height), diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 8452125c8..e5f8136fc 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -606,6 +606,11 @@ class InfoExtractor(object): except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: if isinstance(err, compat_urllib_error.HTTPError): if self.__can_accept_status_code(err, expected_status): + # Retain reference to error to prevent file object from + # being closed before it can be read. Works around the + # effects of <https://bugs.python.org/issue15002> + # introduced in Python 3.4.1. + err.fp._error = err return err.fp if errnote is False: diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 8879f5d90..87c7d8b0c 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -88,11 +88,7 @@ from .awaan import ( AWAANLiveIE, AWAANSeasonIE, ) -from .azmedien import ( - AZMedienIE, - AZMedienPlaylistIE, - AZMedienShowPlaylistIE, -) +from .azmedien import AZMedienIE from .baidu import BaiduVideoIE from .bambuser import BambuserIE, BambuserChannelIE from .bandcamp import BandcampIE, BandcampAlbumIE, BandcampWeeklyIE @@ -543,6 +539,7 @@ from .la7 import LA7IE from .laola1tv import ( Laola1TvEmbedIE, Laola1TvIE, + EHFTVIE, ITTFIE, ) from .lci import LCIIE @@ -1196,6 +1193,7 @@ from .tweakers import TweakersIE from .twentyfourvideo import TwentyFourVideoIE from .twentymin import TwentyMinutenIE from .twentythreevideo import TwentyThreeVideoIE +from .twitcasting import TwitCastingIE from .twitch import ( TwitchVideoIE, TwitchChapterIE, @@ -1388,6 +1386,7 @@ from .wsj import ( WSJIE, WSJArticleIE, ) +from .wwe import WWEIE from .xbef import XBefIE from .xboxclips import XboxClipsIE from .xfileshare import XFileShareIE @@ -1480,3 +1479,4 @@ from .zattoo import ( ) from .zdf import ZDFIE, ZDFChannelIE from .zingmp3 import ZingMp3IE +from .zype import ZypeIE diff --git a/youtube_dl/extractor/facebook.py b/youtube_dl/extractor/facebook.py index 97cfe0fc3..74954049d 100644 --- a/youtube_dl/extractor/facebook.py +++ b/youtube_dl/extractor/facebook.py @@ -57,7 +57,7 @@ class FacebookIE(InfoExtractor): _CHROME_USER_AGENT = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.2564.97 Safari/537.36' _VIDEO_PAGE_TEMPLATE = 'https://www.facebook.com/video/video.php?v=%s' - _VIDEO_PAGE_TAHOE_TEMPLATE = 'https://www.facebook.com/video/tahoe/async/%s/?chain=true&isvideo=true' + _VIDEO_PAGE_TAHOE_TEMPLATE = 'https://www.facebook.com/video/tahoe/async/%s/?chain=true&isvideo=true&payloadtype=primary' _TESTS = [{ 'url': 'https://www.facebook.com/video.php?v=637842556329505&fref=nf', diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 545e03371..59cf03faf 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -114,6 +114,7 @@ from .apa import APAIE from .foxnews import FoxNewsIE from .viqeo import ViqeoIE from .expressen import ExpressenIE +from .zype import ZypeIE class GenericIE(InfoExtractor): @@ -2070,6 +2071,20 @@ class GenericIE(InfoExtractor): }, 'playlist_count': 6, }, + { + # Zype embed + 'url': 'https://www.cookscountry.com/episode/554-smoky-barbecue-favorites', + 'info_dict': { + 'id': '5b400b834b32992a310622b9', + 'ext': 'mp4', + 'title': 'Smoky Barbecue Favorites', + 'thumbnail': r're:^https?://.*\.jpe?g', + }, + 'add_ie': [ZypeIE.ie_key()], + 'params': { + 'skip_download': True, + }, + }, { # videojs embed 'url': 'https://video.sibnet.ru/shell.php?videoid=3422904', @@ -3129,6 +3144,11 @@ class GenericIE(InfoExtractor): return self.playlist_from_matches( expressen_urls, video_id, video_title, ie=ExpressenIE.ie_key()) + zype_urls = ZypeIE._extract_urls(webpage) + if zype_urls: + return self.playlist_from_matches( + zype_urls, video_id, video_title, ie=ZypeIE.ie_key()) + # Look for HTML5 media entries = self._parse_html5_media_entries(url, webpage, video_id, m3u8_id='hls') if entries: diff --git a/youtube_dl/extractor/laola1tv.py b/youtube_dl/extractor/laola1tv.py index c7f813370..fa217365a 100644 --- a/youtube_dl/extractor/laola1tv.py +++ b/youtube_dl/extractor/laola1tv.py @@ -2,6 +2,7 @@ from __future__ import unicode_literals import json +import re from .common import InfoExtractor from ..utils import ( @@ -32,7 +33,8 @@ class Laola1TvEmbedIE(InfoExtractor): def _extract_token_url(self, stream_access_url, video_id, data): return self._download_json( - stream_access_url, video_id, headers={ + self._proto_relative_url(stream_access_url, 'https:'), video_id, + headers={ 'Content-Type': 'application/json', }, data=json.dumps(data).encode())['data']['stream-access'][0] @@ -119,9 +121,59 @@ class Laola1TvEmbedIE(InfoExtractor): } -class Laola1TvIE(Laola1TvEmbedIE): +class Laola1TvBaseIE(Laola1TvEmbedIE): + def _extract_video(self, url): + display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) + + if 'Dieser Livestream ist bereits beendet.' in webpage: + raise ExtractorError('This live stream has already finished.', expected=True) + + conf = self._parse_json(self._search_regex( + r'(?s)conf\s*=\s*({.+?});', webpage, 'conf'), + display_id, + transform_source=lambda s: js_to_json(re.sub(r'shareurl:.+,', '', s))) + video_id = conf['videoid'] + + config = self._download_json(conf['configUrl'], video_id, query={ + 'videoid': video_id, + 'partnerid': conf['partnerid'], + 'language': conf.get('language', ''), + 'portal': conf.get('portalid', ''), + }) + error = config.get('error') + if error: + raise ExtractorError('%s said: %s' % (self.IE_NAME, error), expected=True) + + video_data = config['video'] + title = video_data['title'] + is_live = video_data.get('isLivestream') and video_data.get('isLive') + meta = video_data.get('metaInformation') + sports = meta.get('sports') + categories = sports.split(',') if sports else [] + + token_url = self._extract_token_url( + video_data['streamAccess'], video_id, + video_data['abo']['required']) + + formats = self._extract_formats(token_url, video_id) + + return { + 'id': video_id, + 'display_id': display_id, + 'title': self._live_title(title) if is_live else title, + 'description': video_data.get('description'), + 'thumbnail': video_data.get('image'), + 'categories': categories, + 'formats': formats, + 'is_live': is_live, + } + + +class Laola1TvIE(Laola1TvBaseIE): IE_NAME = 'laola1tv' _VALID_URL = r'https?://(?:www\.)?laola1\.tv/[a-z]+-[a-z]+/[^/]+/(?P<id>[^/?#&]+)' + _TESTS = [{ 'url': 'http://www.laola1.tv/de-de/video/straubing-tigers-koelner-haie/227883.html', 'info_dict': { @@ -169,52 +221,30 @@ class Laola1TvIE(Laola1TvEmbedIE): }] def _real_extract(self, url): - display_id = self._match_id(url) + return self._extract_video(url) - webpage = self._download_webpage(url, display_id) - if 'Dieser Livestream ist bereits beendet.' in webpage: - raise ExtractorError('This live stream has already finished.', expected=True) +class EHFTVIE(Laola1TvBaseIE): + IE_NAME = 'ehftv' + _VALID_URL = r'https?://(?:www\.)?ehftv\.com/[a-z]+(?:-[a-z]+)?/[^/]+/(?P<id>[^/?#&]+)' - conf = self._parse_json(self._search_regex( - r'(?s)conf\s*=\s*({.+?});', webpage, 'conf'), - display_id, js_to_json) + _TESTS = [{ + 'url': 'https://www.ehftv.com/int/video/paris-saint-germain-handball-pge-vive-kielce/1166761', + 'info_dict': { + 'id': '1166761', + 'display_id': 'paris-saint-germain-handball-pge-vive-kielce', + 'ext': 'mp4', + 'title': 'Paris Saint-Germain Handball - PGE Vive Kielce', + 'is_live': False, + 'categories': ['Handball'], + }, + 'params': { + 'skip_download': True, + }, + }] - video_id = conf['videoid'] - - config = self._download_json(conf['configUrl'], video_id, query={ - 'videoid': video_id, - 'partnerid': conf['partnerid'], - 'language': conf.get('language', ''), - 'portal': conf.get('portalid', ''), - }) - error = config.get('error') - if error: - raise ExtractorError('%s said: %s' % (self.IE_NAME, error), expected=True) - - video_data = config['video'] - title = video_data['title'] - is_live = video_data.get('isLivestream') and video_data.get('isLive') - meta = video_data.get('metaInformation') - sports = meta.get('sports') - categories = sports.split(',') if sports else [] - - token_url = self._extract_token_url( - video_data['streamAccess'], video_id, - video_data['abo']['required']) - - formats = self._extract_formats(token_url, video_id) - - return { - 'id': video_id, - 'display_id': display_id, - 'title': self._live_title(title) if is_live else title, - 'description': video_data.get('description'), - 'thumbnail': video_data.get('image'), - 'categories': categories, - 'formats': formats, - 'is_live': is_live, - } + def _real_extract(self, url): + return self._extract_video(url) class ITTFIE(InfoExtractor): diff --git a/youtube_dl/extractor/nova.py b/youtube_dl/extractor/nova.py index 80186ec50..901f44b54 100644 --- a/youtube_dl/extractor/nova.py +++ b/youtube_dl/extractor/nova.py @@ -35,7 +35,7 @@ class NovaEmbedIE(InfoExtractor): bitrates = self._parse_json( self._search_regex( - r'(?s)bitrates\s*=\s*({.+?})\s*;', webpage, 'formats'), + r'(?s)(?:src|bitrates)\s*=\s*({.+?})\s*;', webpage, 'formats'), video_id, transform_source=js_to_json) QUALITIES = ('lq', 'mq', 'hq', 'hd') diff --git a/youtube_dl/extractor/openload.py b/youtube_dl/extractor/openload.py index a91f29f5c..cf51e4770 100644 --- a/youtube_dl/extractor/openload.py +++ b/youtube_dl/extractor/openload.py @@ -243,7 +243,18 @@ class PhantomJSwrapper(object): class OpenloadIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?(?:openload\.(?:co|io|link)|oload\.(?:tv|stream|site|xyz|win|download|cloud|cc|icu))/(?:f|embed)/(?P<id>[a-zA-Z0-9-_]+)' + _VALID_URL = r'''(?x) + https?:// + (?P<host> + (?:www\.)? + (?: + openload\.(?:co|io|link)| + oload\.(?:tv|stream|site|xyz|win|download|cloud|cc|icu|fun) + ) + )/ + (?:f|embed)/ + (?P<id>[a-zA-Z0-9-_]+) + ''' _TESTS = [{ 'url': 'https://openload.co/f/kUEfGclsU9o', @@ -319,7 +330,10 @@ class OpenloadIE(InfoExtractor): 'only_matching': True, }, { 'url': 'https://oload.icu/f/-_i4y_F_Hs8', - 'only_matching': True + 'only_matching': True, + }, { + 'url': 'https://oload.fun/f/gb6G1H4sHXY', + 'only_matching': True, }] _USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36' @@ -331,8 +345,11 @@ class OpenloadIE(InfoExtractor): webpage) def _real_extract(self, url): - video_id = self._match_id(url) - url_pattern = 'https://openload.co/%%s/%s/' % video_id + mobj = re.match(self._VALID_URL, url) + host = mobj.group('host') + video_id = mobj.group('id') + + url_pattern = 'https://%s/%%s/%s/' % (host, video_id) headers = { 'User-Agent': self._USER_AGENT, } @@ -365,7 +382,7 @@ class OpenloadIE(InfoExtractor): r'>\s*([\w~-]+~[a-f0-9:]+~[\w~-]+)'), webpage, 'stream URL')) - video_url = 'https://openload.co/stream/%s?mime=true' % decoded_id + video_url = 'https://%s/stream/%s?mime=true' % (host, decoded_id) title = self._og_search_title(webpage, default=None) or self._search_regex( r'<span[^>]+class=["\']title["\'][^>]*>([^<]+)', webpage, @@ -376,7 +393,7 @@ class OpenloadIE(InfoExtractor): entry = entries[0] if entries else {} subtitles = entry.get('subtitles') - info_dict = { + return { 'id': video_id, 'title': title, 'thumbnail': entry.get('thumbnail') or self._og_search_thumbnail(webpage, default=None), @@ -385,4 +402,3 @@ class OpenloadIE(InfoExtractor): 'subtitles': subtitles, 'http_headers': headers, } - return info_dict diff --git a/youtube_dl/extractor/orf.py b/youtube_dl/extractor/orf.py index c1fb580ca..d432e3449 100644 --- a/youtube_dl/extractor/orf.py +++ b/youtube_dl/extractor/orf.py @@ -15,6 +15,7 @@ from ..utils import ( strip_jsonp, unescapeHTML, unified_strdate, + url_or_none, ) @@ -68,26 +69,35 @@ class ORFTVthekIE(InfoExtractor): webpage, 'playlist', group='json'), playlist_id, transform_source=unescapeHTML)['playlist']['videos'] - def quality_to_int(s): - m = re.search('([0-9]+)', s) - if m is None: - return -1 - return int(m.group(1)) - entries = [] for sd in data_jsb: video_id, title = sd.get('id'), sd.get('title') if not video_id or not title: continue video_id = compat_str(video_id) - formats = [{ - 'preference': -10 if fd['delivery'] == 'hls' else None, - 'format_id': '%s-%s-%s' % ( - fd['delivery'], fd['quality'], fd['quality_string']), - 'url': fd['src'], - 'protocol': fd['protocol'], - 'quality': quality_to_int(fd['quality']), - } for fd in sd['sources']] + formats = [] + for fd in sd['sources']: + src = url_or_none(fd.get('src')) + if not src: + continue + format_id_list = [] + for key in ('delivery', 'quality', 'quality_string'): + value = fd.get(key) + if value: + format_id_list.append(value) + format_id = '-'.join(format_id_list) + if determine_ext(fd['src']) == 'm3u8': + formats.extend(self._extract_m3u8_formats( + fd['src'], video_id, 'mp4', m3u8_id=format_id)) + elif determine_ext(fd['src']) == 'f4m': + formats.extend(self._extract_f4m_formats( + fd['src'], video_id, f4m_id=format_id)) + else: + formats.append({ + 'format_id': format_id, + 'url': src, + 'protocol': fd.get('protocol'), + }) # Check for geoblocking. # There is a property is_geoprotection, but that's always false diff --git a/youtube_dl/extractor/picarto.py b/youtube_dl/extractor/picarto.py index 2366dfb34..8099ef1d6 100644 --- a/youtube_dl/extractor/picarto.py +++ b/youtube_dl/extractor/picarto.py @@ -1,6 +1,7 @@ # coding: utf-8 from __future__ import unicode_literals +import re import time from .common import InfoExtractor @@ -15,7 +16,7 @@ from ..utils import ( class PicartoIE(InfoExtractor): - _VALID_URL = r'https?://(?:www.)?picarto\.tv/(?P<id>[a-zA-Z0-9]+)' + _VALID_URL = r'https?://(?:www.)?picarto\.tv/(?P<id>[a-zA-Z0-9]+)(?:/(?P<token>[a-zA-Z0-9]+))?' _TEST = { 'url': 'https://picarto.tv/Setz', 'info_dict': { @@ -33,20 +34,14 @@ class PicartoIE(InfoExtractor): return False if PicartoVodIE.suitable(url) else super(PicartoIE, cls).suitable(url) def _real_extract(self, url): - channel_id = self._match_id(url) - stream_page = self._download_webpage(url, channel_id) + mobj = re.match(self._VALID_URL, url) + channel_id = mobj.group('id') - if '>This channel does not exist' in stream_page: - raise ExtractorError( - 'Channel %s does not exist' % channel_id, expected=True) + metadata = self._download_json( + 'https://api.picarto.tv/v1/channel/name/' + channel_id, + channel_id) - player = self._parse_json( - self._search_regex( - r'(?s)playerSettings\[\d+\]\s*=\s*(\{.+?\}\s*\n)', stream_page, - 'player settings'), - channel_id, transform_source=js_to_json) - - if player.get('online') is False: + if metadata.get('online') is False: raise ExtractorError('Stream is offline', expected=True) cdn_data = self._download_json( @@ -54,20 +49,13 @@ class PicartoIE(InfoExtractor): data=urlencode_postdata({'loadbalancinginfo': channel_id}), note='Downloading load balancing info') - def get_event(key): - return try_get(player, lambda x: x['event'][key], compat_str) or '' - + token = mobj.group('token') or 'public' params = { - 'token': player.get('token') or '', - 'ticket': get_event('ticket'), 'con': int(time.time() * 1000), - 'type': get_event('ticket'), - 'scope': get_event('scope'), + 'token': token, } prefered_edge = cdn_data.get('preferedEdge') - default_tech = player.get('defaultTech') - formats = [] for edge in cdn_data['edges']: @@ -81,8 +69,6 @@ class PicartoIE(InfoExtractor): preference = 0 if edge_id == prefered_edge: preference += 1 - if tech_type == default_tech: - preference += 1 format_id = [] if edge_id: format_id.append(edge_id) @@ -109,7 +95,7 @@ class PicartoIE(InfoExtractor): continue self._sort_formats(formats) - mature = player.get('mature') + mature = metadata.get('adult') if mature is None: age_limit = None else: @@ -117,9 +103,11 @@ class PicartoIE(InfoExtractor): return { 'id': channel_id, - 'title': self._live_title(channel_id), + 'title': self._live_title(metadata.get('title') or channel_id), 'is_live': True, - 'thumbnail': player.get('vodThumb'), + 'thumbnail': try_get(metadata, lambda x: x['thumbnails']['web']), + 'channel': channel_id, + 'channel_url': 'https://picarto.tv/%s' % channel_id, 'age_limit': age_limit, 'formats': formats, } diff --git a/youtube_dl/extractor/rte.py b/youtube_dl/extractor/rte.py index a6fac6c35..1fbc72915 100644 --- a/youtube_dl/extractor/rte.py +++ b/youtube_dl/extractor/rte.py @@ -8,7 +8,10 @@ from ..compat import compat_HTTPError from ..utils import ( float_or_none, parse_iso8601, + str_or_none, + try_get, unescapeHTML, + url_or_none, ExtractorError, ) @@ -17,65 +20,87 @@ class RteBaseIE(InfoExtractor): def _real_extract(self, url): item_id = self._match_id(url) - try: - json_string = self._download_json( - 'http://www.rte.ie/rteavgen/getplaylist/?type=web&format=json&id=' + item_id, - item_id) - except ExtractorError as ee: - if isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 404: - error_info = self._parse_json(ee.cause.read().decode(), item_id, fatal=False) - if error_info: - raise ExtractorError( - '%s said: %s' % (self.IE_NAME, error_info['message']), - expected=True) - raise - - # NB the string values in the JSON are stored using XML escaping(!) - show = json_string['shows'][0] - title = unescapeHTML(show['title']) - description = unescapeHTML(show.get('description')) - thumbnail = show.get('thumbnail') - duration = float_or_none(show.get('duration'), 1000) - timestamp = parse_iso8601(show.get('published')) - - mg = show['media:group'][0] - + info_dict = {} formats = [] - if mg.get('url'): - m = re.match(r'(?P<url>rtmpe?://[^/]+)/(?P<app>.+)/(?P<playpath>mp4:.*)', mg['url']) - if m: - m = m.groupdict() - formats.append({ - 'url': m['url'] + '/' + m['app'], - 'app': m['app'], - 'play_path': m['playpath'], - 'player_url': url, - 'ext': 'flv', - 'format_id': 'rtmp', - }) + ENDPOINTS = ( + 'https://feeds.rasset.ie/rteavgen/player/playlist?type=iptv&format=json&showId=', + 'http://www.rte.ie/rteavgen/getplaylist/?type=web&format=json&id=', + ) - if mg.get('hls_server') and mg.get('hls_url'): - formats.extend(self._extract_m3u8_formats( - mg['hls_server'] + mg['hls_url'], item_id, 'mp4', - entry_protocol='m3u8_native', m3u8_id='hls', fatal=False)) + for num, ep_url in enumerate(ENDPOINTS, start=1): + try: + data = self._download_json(ep_url + item_id, item_id) + except ExtractorError as ee: + if num < len(ENDPOINTS) or formats: + continue + if isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 404: + error_info = self._parse_json(ee.cause.read().decode(), item_id, fatal=False) + if error_info: + raise ExtractorError( + '%s said: %s' % (self.IE_NAME, error_info['message']), + expected=True) + raise - if mg.get('hds_server') and mg.get('hds_url'): - formats.extend(self._extract_f4m_formats( - mg['hds_server'] + mg['hds_url'], item_id, - f4m_id='hds', fatal=False)) + # NB the string values in the JSON are stored using XML escaping(!) + show = try_get(data, lambda x: x['shows'][0], dict) + if not show: + continue + + if not info_dict: + title = unescapeHTML(show['title']) + description = unescapeHTML(show.get('description')) + thumbnail = show.get('thumbnail') + duration = float_or_none(show.get('duration'), 1000) + timestamp = parse_iso8601(show.get('published')) + info_dict = { + 'id': item_id, + 'title': title, + 'description': description, + 'thumbnail': thumbnail, + 'timestamp': timestamp, + 'duration': duration, + } + + mg = try_get(show, lambda x: x['media:group'][0], dict) + if not mg: + continue + + if mg.get('url'): + m = re.match(r'(?P<url>rtmpe?://[^/]+)/(?P<app>.+)/(?P<playpath>mp4:.*)', mg['url']) + if m: + m = m.groupdict() + formats.append({ + 'url': m['url'] + '/' + m['app'], + 'app': m['app'], + 'play_path': m['playpath'], + 'player_url': url, + 'ext': 'flv', + 'format_id': 'rtmp', + }) + + if mg.get('hls_server') and mg.get('hls_url'): + formats.extend(self._extract_m3u8_formats( + mg['hls_server'] + mg['hls_url'], item_id, 'mp4', + entry_protocol='m3u8_native', m3u8_id='hls', fatal=False)) + + if mg.get('hds_server') and mg.get('hds_url'): + formats.extend(self._extract_f4m_formats( + mg['hds_server'] + mg['hds_url'], item_id, + f4m_id='hds', fatal=False)) + + mg_rte_server = str_or_none(mg.get('rte:server')) + mg_url = str_or_none(mg.get('url')) + if mg_rte_server and mg_url: + hds_url = url_or_none(mg_rte_server + mg_url) + if hds_url: + formats.extend(self._extract_f4m_formats( + hds_url, item_id, f4m_id='hds', fatal=False)) self._sort_formats(formats) - return { - 'id': item_id, - 'title': title, - 'description': description, - 'thumbnail': thumbnail, - 'timestamp': timestamp, - 'duration': duration, - 'formats': formats, - } + info_dict['formats'] = formats + return info_dict class RteIE(RteBaseIE): diff --git a/youtube_dl/extractor/ruutu.py b/youtube_dl/extractor/ruutu.py index 9fa8688f8..f530f0083 100644 --- a/youtube_dl/extractor/ruutu.py +++ b/youtube_dl/extractor/ruutu.py @@ -65,7 +65,8 @@ class RuutuIE(InfoExtractor): video_id = self._match_id(url) video_xml = self._download_xml( - 'http://gatling.ruutu.fi/media-xml-cache?id=%s' % video_id, video_id) + 'https://gatling.nelonenmedia.fi/media-xml-cache', video_id, + query={'id': video_id}) formats = [] processed_urls = [] diff --git a/youtube_dl/extractor/shared.py b/youtube_dl/extractor/shared.py index b2250afdd..931a0f70e 100644 --- a/youtube_dl/extractor/shared.py +++ b/youtube_dl/extractor/shared.py @@ -5,6 +5,7 @@ from ..compat import compat_b64decode from ..utils import ( ExtractorError, int_or_none, + url_or_none, urlencode_postdata, ) @@ -86,9 +87,16 @@ class VivoIE(SharedBaseIE): } def _extract_video_url(self, webpage, video_id, *args): + def decode_url(encoded_url): + return compat_b64decode(encoded_url).decode('utf-8') + + stream_url = url_or_none(decode_url(self._search_regex( + r'data-stream\s*=\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage, + 'stream url', default=None, group='url'))) + if stream_url: + return stream_url return self._parse_json( self._search_regex( r'InitializeStream\s*\(\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage, 'stream', group='url'), - video_id, - transform_source=lambda x: compat_b64decode(x).decode('utf-8'))[0] + video_id, transform_source=decode_url)[0] diff --git a/youtube_dl/extractor/tnaflix.py b/youtube_dl/extractor/tnaflix.py index 0c2f8f119..6798ef4c3 100644 --- a/youtube_dl/extractor/tnaflix.py +++ b/youtube_dl/extractor/tnaflix.py @@ -18,8 +18,9 @@ from ..utils import ( class TNAFlixNetworkBaseIE(InfoExtractor): # May be overridden in descendants if necessary _CONFIG_REGEX = [ - r'flashvars\.config\s*=\s*escape\("([^"]+)"', - r'<input[^>]+name="config\d?" value="([^"]+)"', + r'flashvars\.config\s*=\s*escape\("(?P<url>[^"]+)"', + r'<input[^>]+name="config\d?" value="(?P<url>[^"]+)"', + r'config\s*=\s*(["\'])(?P<url>(?:https?:)?//(?:(?!\1).)+)\1', ] _HOST = 'tna' _VKEY_SUFFIX = '' @@ -85,7 +86,8 @@ class TNAFlixNetworkBaseIE(InfoExtractor): webpage = self._download_webpage(url, display_id) cfg_url = self._proto_relative_url(self._html_search_regex( - self._CONFIG_REGEX, webpage, 'flashvars.config', default=None), 'http:') + self._CONFIG_REGEX, webpage, 'flashvars.config', default=None, + group='url'), 'http:') if not cfg_url: inputs = self._hidden_inputs(webpage) diff --git a/youtube_dl/extractor/twitcasting.py b/youtube_dl/extractor/twitcasting.py new file mode 100644 index 000000000..05f8aa9ce --- /dev/null +++ b/youtube_dl/extractor/twitcasting.py @@ -0,0 +1,60 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor + +import re + + +class TwitCastingIE(InfoExtractor): + _VALID_URL = r'https?://(?:[^/]+\.)?twitcasting\.tv/(?P<uploader_id>[^/]+)/movie/(?P<id>\d+)' + _TEST = { + 'url': 'https://twitcasting.tv/ivetesangalo/movie/2357609', + 'md5': '745243cad58c4681dc752490f7540d7f', + 'info_dict': { + 'id': '2357609', + 'ext': 'mp4', + 'title': 'Recorded Live #2357609', + 'uploader_id': 'ivetesangalo', + 'description': "Moi! I'm live on TwitCasting from my iPhone.", + 'thumbnail': r're:^https?://.*\.jpg$', + }, + 'params': { + 'skip_download': True, + }, + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + uploader_id = mobj.group('uploader_id') + + webpage = self._download_webpage(url, video_id) + + title = self._html_search_regex( + r'(?s)<[^>]+id=["\']movietitle[^>]+>(.+?)</', + webpage, 'title', default=None) or self._html_search_meta( + 'twitter:title', webpage, fatal=True) + + m3u8_url = self._search_regex( + (r'data-movie-url=(["\'])(?P<url>(?:(?!\1).)+)\1', + r'(["\'])(?P<url>http.+?\.m3u8.*?)\1'), + webpage, 'm3u8 url', group='url') + + formats = self._extract_m3u8_formats( + m3u8_url, video_id, ext='mp4', entry_protocol='m3u8_native', + m3u8_id='hls') + + thumbnail = self._og_search_thumbnail(webpage) + description = self._og_search_description( + webpage, default=None) or self._html_search_meta( + 'twitter:description', webpage) + + return { + 'id': video_id, + 'title': title, + 'description': description, + 'thumbnail': thumbnail, + 'uploader_id': uploader_id, + 'formats': formats, + } diff --git a/youtube_dl/extractor/vk.py b/youtube_dl/extractor/vk.py index ef8b9bcb7..b52d15ac6 100644 --- a/youtube_dl/extractor/vk.py +++ b/youtube_dl/extractor/vk.py @@ -293,8 +293,12 @@ class VKIE(VKBaseIE): # This video is no longer available, because its author has been blocked. 'url': 'https://vk.com/video-10639516_456240611', 'only_matching': True, - } - ] + }, + { + # The video is not available in your region. + 'url': 'https://vk.com/video-51812607_171445436', + 'only_matching': True, + }] def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) @@ -354,6 +358,9 @@ class VKIE(VKBaseIE): r'<!>This video is no longer available, because it has been deleted.': 'Video %s is no longer available, because it has been deleted.', + + r'<!>The video .+? is not available in your region.': + 'Video %s is not available in your region.', } for error_re, error_msg in ERRORS.items(): diff --git a/youtube_dl/extractor/wwe.py b/youtube_dl/extractor/wwe.py new file mode 100644 index 000000000..bebc77bb5 --- /dev/null +++ b/youtube_dl/extractor/wwe.py @@ -0,0 +1,140 @@ +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..compat import compat_str +from ..utils import ( + try_get, + unescapeHTML, + url_or_none, + urljoin, +) + + +class WWEBaseIE(InfoExtractor): + _SUBTITLE_LANGS = { + 'English': 'en', + 'Deutsch': 'de', + } + + def _extract_entry(self, data, url, video_id=None): + video_id = compat_str(video_id or data['nid']) + title = data['title'] + + formats = self._extract_m3u8_formats( + data['file'], video_id, 'mp4', entry_protocol='m3u8_native', + m3u8_id='hls') + + description = data.get('description') + thumbnail = urljoin(url, data.get('image')) + series = data.get('show_name') + episode = data.get('episode_name') + + subtitles = {} + tracks = data.get('tracks') + if isinstance(tracks, list): + for track in tracks: + if not isinstance(track, dict): + continue + if track.get('kind') != 'captions': + continue + track_file = url_or_none(track.get('file')) + if not track_file: + continue + label = track.get('label') + lang = self._SUBTITLE_LANGS.get(label, label) or 'en' + subtitles.setdefault(lang, []).append({ + 'url': track_file, + }) + + return { + 'id': video_id, + 'title': title, + 'description': description, + 'thumbnail': thumbnail, + 'series': series, + 'episode': episode, + 'formats': formats, + 'subtitles': subtitles, + } + + +class WWEIE(WWEBaseIE): + _VALID_URL = r'https?://(?:[^/]+\.)?wwe\.com/(?:[^/]+/)*videos/(?P<id>[^/?#&]+)' + _TESTS = [{ + 'url': 'https://www.wwe.com/videos/daniel-bryan-vs-andrade-cien-almas-smackdown-live-sept-4-2018', + 'md5': '92811c6a14bfc206f7a6a9c5d9140184', + 'info_dict': { + 'id': '40048199', + 'ext': 'mp4', + 'title': 'Daniel Bryan vs. Andrade "Cien" Almas: SmackDown LIVE, Sept. 4, 2018', + 'description': 'md5:2d7424dbc6755c61a0e649d2a8677f67', + 'thumbnail': r're:^https?://.*\.jpg$', + } + }, { + 'url': 'https://de.wwe.com/videos/gran-metalik-vs-tony-nese-wwe-205-live-sept-4-2018', + 'only_matching': True, + }] + + def _real_extract(self, url): + display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) + + landing = self._parse_json( + self._html_search_regex( + r'(?s)Drupal\.settings\s*,\s*({.+?})\s*\)\s*;', + webpage, 'drupal settings'), + display_id)['WWEVideoLanding'] + + data = landing['initialVideo']['playlist'][0] + video_id = landing.get('initialVideoId') + + info = self._extract_entry(data, url, video_id) + info['display_id'] = display_id + return info + + +class WWEPlaylistIE(WWEBaseIE): + _VALID_URL = r'https?://(?:[^/]+\.)?wwe\.com/(?:[^/]+/)*(?P<id>[^/?#&]+)' + _TESTS = [{ + 'url': 'https://www.wwe.com/shows/raw/2018-11-12', + 'info_dict': { + 'id': '2018-11-12', + }, + 'playlist_mincount': 11, + }, { + 'url': 'http://www.wwe.com/article/walk-the-prank-wwe-edition', + 'only_matching': True, + }, { + 'url': 'https://www.wwe.com/shows/wwenxt/article/matt-riddle-interview', + 'only_matching': True, + }] + + @classmethod + def suitable(cls, url): + return False if WWEIE.suitable(url) else super(WWEPlaylistIE, cls).suitable(url) + + def _real_extract(self, url): + display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) + + entries = [] + for mobj in re.finditer( + r'data-video\s*=\s*(["\'])(?P<data>{.+?})\1', webpage): + video = self._parse_json( + mobj.group('data'), display_id, transform_source=unescapeHTML, + fatal=False) + if not video: + continue + data = try_get(video, lambda x: x['playlist'][0], dict) + if not data: + continue + try: + entry = self._extract_entry(data, url) + except Exception: + continue + entry['extractor_key'] = WWEIE.ie_key() + entries.append(entry) + + return self.playlist_result(entries, display_id) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 78203ef84..3f49f3889 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -41,6 +41,7 @@ from ..utils import ( remove_quotes, remove_start, smuggle_url, + str_or_none, str_to_int, try_get, unescapeHTML, @@ -501,6 +502,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'categories': ['Science & Technology'], 'tags': ['youtube-dl'], 'duration': 10, + 'view_count': int, 'like_count': int, 'dislike_count': int, 'start_time': 1, @@ -583,6 +585,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'categories': ['Science & Technology'], 'tags': ['youtube-dl'], 'duration': 10, + 'view_count': int, 'like_count': int, 'dislike_count': int, }, @@ -1189,7 +1192,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor): (r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(', r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(', r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*c\s*&&\s*d\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(', - r'\bc\s*&&\s*d\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\('), + r'\bc\s*&&\s*d\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(', + r'\bc\s*&&\s*d\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('), jscode, 'Initial JS player signature function name', group='sig') jsi = JSInterpreter(jscode) @@ -1538,6 +1542,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor): def extract_view_count(v_info): return int_or_none(try_get(v_info, lambda x: x['view_count'][0])) + player_response = {} + # Get video info embed_webpage = None if re.search(r'player-age-gate-content">', video_webpage) is not None: @@ -1580,6 +1586,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor): if args.get('livestream') == '1' or args.get('live_playback') == 1: is_live = True sts = ytplayer_config.get('sts') + if not player_response: + pl_response = str_or_none(args.get('player_response')) + if pl_response: + pl_response = self._parse_json(pl_response, video_id, fatal=False) + if isinstance(pl_response, dict): + player_response = pl_response if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True): # We also try looking in get_video_info since it may contain different dashmpd # URL that points to a DASH manifest with possibly different itag set (some itags @@ -1608,6 +1620,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor): if not video_info_webpage: continue get_video_info = compat_parse_qs(video_info_webpage) + if not player_response: + pl_response = get_video_info.get('player_response', [None])[0] + if isinstance(pl_response, dict): + player_response = pl_response add_dash_mpd(get_video_info) if view_count is None: view_count = extract_view_count(get_video_info) @@ -1653,9 +1669,14 @@ class YoutubeIE(YoutubeBaseInfoExtractor): '"token" parameter not in video info for unknown reason', video_id=video_id) + video_details = try_get( + player_response, lambda x: x['videoDetails'], dict) or {} + # title if 'title' in video_info: video_title = video_info['title'][0] + elif 'title' in player_response: + video_title = video_details['title'] else: self._downloader.report_warning('Unable to extract video title') video_title = '_' @@ -1718,6 +1739,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor): if view_count is None: view_count = extract_view_count(video_info) + if view_count is None and video_details: + view_count = int_or_none(video_details.get('viewCount')) # Check for "rental" videos if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info: @@ -1898,7 +1921,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor): raise ExtractorError('no conn, hlsvp or url_encoded_fmt_stream_map information found in video info') # uploader - video_uploader = try_get(video_info, lambda x: x['author'][0], compat_str) + video_uploader = try_get( + video_info, lambda x: x['author'][0], + compat_str) or str_or_none(video_details.get('author')) if video_uploader: video_uploader = compat_urllib_parse_unquote_plus(video_uploader) else: @@ -2011,12 +2036,19 @@ class YoutubeIE(YoutubeBaseInfoExtractor): like_count = _extract_count('like') dislike_count = _extract_count('dislike') + if view_count is None: + view_count = str_to_int(self._search_regex( + r'<[^>]+class=["\']watch-view-count[^>]+>\s*([\d,\s]+)', video_webpage, + 'view count', default=None)) + # subtitles video_subtitles = self.extract_subtitles(video_id, video_webpage) automatic_captions = self.extract_automatic_captions(video_id, video_webpage) video_duration = try_get( video_info, lambda x: int_or_none(x['length_seconds'][0])) + if not video_duration: + video_duration = int_or_none(video_details.get('lengthSeconds')) if not video_duration: video_duration = parse_duration(self._html_search_meta( 'duration', video_webpage, 'video duration')) @@ -2131,7 +2163,11 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor): (?:https?://)? (?:\w+\.)? (?: - youtube\.com/ + (?: + youtube\.com| + invidio\.us + ) + / (?: (?:course|view_play_list|my_playlists|artist|playlist|watch|embed/(?:videoseries|[0-9A-Za-z_-]{11})) \? (?:.*?[&;])*? (?:p|a|list)= @@ -2244,6 +2280,7 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor): 'description': 'md5:507cdcb5a49ac0da37a920ece610be80', 'categories': ['People & Blogs'], 'tags': list, + 'view_count': int, 'like_count': int, 'dislike_count': int, }, @@ -2282,6 +2319,9 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor): # music album playlist 'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM', 'only_matching': True, + }, { + 'url': 'https://invidio.us/playlist?list=PLDIoUOhQQPlXr63I_vwF9GD8sAKh77dWU', + 'only_matching': True, }] def _real_initialize(self): diff --git a/youtube_dl/extractor/zattoo.py b/youtube_dl/extractor/zattoo.py index bbe0aecb6..896276301 100644 --- a/youtube_dl/extractor/zattoo.py +++ b/youtube_dl/extractor/zattoo.py @@ -22,7 +22,7 @@ class ZattooPlatformBaseIE(InfoExtractor): _power_guide_hash = None def _host_url(self): - return 'https://%s' % self._HOST + return 'https://%s' % (self._API_HOST if hasattr(self, '_API_HOST') else self._HOST) def _login(self): username, password = self._get_login_info() @@ -286,6 +286,7 @@ class ZattooLiveIE(ZattooBaseIE): class NetPlusIE(ZattooIE): _NETRC_MACHINE = 'netplus' _HOST = 'netplus.tv' + _API_HOST = 'www.%s' % _HOST _VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST) _TESTS = [{ @@ -300,7 +301,7 @@ class MNetTVIE(ZattooIE): _VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST) _TESTS = [{ - 'url': 'https://www.tvplus.m-net.de/watch/abc/123-abc', + 'url': 'https://tvplus.m-net.de/watch/abc/123-abc', 'only_matching': True, }] @@ -311,7 +312,7 @@ class WalyTVIE(ZattooIE): _VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST) _TESTS = [{ - 'url': 'https://www.player.waly.tv/watch/abc/123-abc', + 'url': 'https://player.waly.tv/watch/abc/123-abc', 'only_matching': True, }] @@ -319,6 +320,7 @@ class WalyTVIE(ZattooIE): class BBVTVIE(ZattooIE): _NETRC_MACHINE = 'bbvtv' _HOST = 'bbv-tv.net' + _API_HOST = 'www.%s' % _HOST _VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST) _TESTS = [{ @@ -330,6 +332,7 @@ class BBVTVIE(ZattooIE): class VTXTVIE(ZattooIE): _NETRC_MACHINE = 'vtxtv' _HOST = 'vtxtv.ch' + _API_HOST = 'www.%s' % _HOST _VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST) _TESTS = [{ @@ -341,6 +344,7 @@ class VTXTVIE(ZattooIE): class MyVisionTVIE(ZattooIE): _NETRC_MACHINE = 'myvisiontv' _HOST = 'myvisiontv.ch' + _API_HOST = 'www.%s' % _HOST _VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST) _TESTS = [{ @@ -355,7 +359,7 @@ class GlattvisionTVIE(ZattooIE): _VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST) _TESTS = [{ - 'url': 'https://www.iptv.glattvision.ch/watch/abc/123-abc', + 'url': 'https://iptv.glattvision.ch/watch/abc/123-abc', 'only_matching': True, }] @@ -363,6 +367,7 @@ class GlattvisionTVIE(ZattooIE): class SAKTVIE(ZattooIE): _NETRC_MACHINE = 'saktv' _HOST = 'saktv.ch' + _API_HOST = 'www.%s' % _HOST _VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST) _TESTS = [{ @@ -377,7 +382,7 @@ class EWETVIE(ZattooIE): _VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST) _TESTS = [{ - 'url': 'https://www.tvonline.ewe.de/watch/abc/123-abc', + 'url': 'https://tvonline.ewe.de/watch/abc/123-abc', 'only_matching': True, }] @@ -385,6 +390,7 @@ class EWETVIE(ZattooIE): class QuantumTVIE(ZattooIE): _NETRC_MACHINE = 'quantumtv' _HOST = 'quantum-tv.com' + _API_HOST = 'www.%s' % _HOST _VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST) _TESTS = [{ @@ -395,11 +401,11 @@ class QuantumTVIE(ZattooIE): class OsnatelTVIE(ZattooIE): _NETRC_MACHINE = 'osnateltv' - _HOST = 'onlinetv.osnatel.de' + _HOST = 'tvonline.osnatel.de' _VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST) _TESTS = [{ - 'url': 'https://www.onlinetv.osnatel.de/watch/abc/123-abc', + 'url': 'https://tvonline.osnatel.de/watch/abc/123-abc', 'only_matching': True, }] @@ -407,6 +413,7 @@ class OsnatelTVIE(ZattooIE): class EinsUndEinsTVIE(ZattooIE): _NETRC_MACHINE = '1und1tv' _HOST = '1und1.tv' + _API_HOST = 'www.%s' % _HOST _VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST) _TESTS = [{ diff --git a/youtube_dl/extractor/zype.py b/youtube_dl/extractor/zype.py new file mode 100644 index 000000000..3b16e703b --- /dev/null +++ b/youtube_dl/extractor/zype.py @@ -0,0 +1,57 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor + + +class ZypeIE(InfoExtractor): + _VALID_URL = r'https?://player\.zype\.com/embed/(?P<id>[\da-fA-F]+)\.js\?.*?api_key=[^&]+' + _TEST = { + 'url': 'https://player.zype.com/embed/5b400b834b32992a310622b9.js?api_key=jZ9GUhRmxcPvX7M3SlfejB6Hle9jyHTdk2jVxG7wOHPLODgncEKVdPYBhuz9iWXQ&autoplay=false&controls=true&da=false', + 'md5': 'eaee31d474c76a955bdaba02a505c595', + 'info_dict': { + 'id': '5b400b834b32992a310622b9', + 'ext': 'mp4', + 'title': 'Smoky Barbecue Favorites', + 'thumbnail': r're:^https?://.*\.jpe?g', + }, + } + + @staticmethod + def _extract_urls(webpage): + return [ + mobj.group('url') + for mobj in re.finditer( + r'<script[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//player\.zype\.com/embed/[\da-fA-F]+\.js\?.*?api_key=.+?)\1', + webpage)] + + def _real_extract(self, url): + video_id = self._match_id(url) + + webpage = self._download_webpage(url, video_id) + + title = self._search_regex( + r'video_title\s*[:=]\s*(["\'])(?P<value>(?:(?!\1).)+)\1', webpage, + 'title', group='value') + + m3u8_url = self._search_regex( + r'(["\'])(?P<url>(?:(?!\1).)+\.m3u8(?:(?!\1).)*)\1', webpage, + 'm3u8 url', group='url') + + formats = self._extract_m3u8_formats( + m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native', + m3u8_id='hls') + self._sort_formats(formats) + + thumbnail = self._search_regex( + r'poster\s*[:=]\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage, 'thumbnail', + default=False, group='url') + + return { + 'id': video_id, + 'title': title, + 'thumbnail': thumbnail, + 'formats': formats, + } diff --git a/youtube_dl/version.py b/youtube_dl/version.py index ae9a77966..7f5ad7bf4 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2018.10.29' +__version__ = '2018.11.18'