From 702ccf2dc08603fed98d2672f86af1a0e300d83e Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Tue, 10 May 2016 15:58:25 +0800 Subject: [PATCH 01/45] [compat] Rename shlex_quote and remove unused subprocess_check_output --- youtube_dl/compat.py | 19 +++---------------- youtube_dl/postprocessor/execafterdownload.py | 4 ++-- youtube_dl/utils.py | 4 ++-- 3 files changed, 7 insertions(+), 20 deletions(-) diff --git a/youtube_dl/compat.py b/youtube_dl/compat.py index e48c761a6..1392361a1 100644 --- a/youtube_dl/compat.py +++ b/youtube_dl/compat.py @@ -341,9 +341,9 @@ except ImportError: # Python 2 return parsed_result try: - from shlex import quote as shlex_quote + from shlex import quote as compat_shlex_quote except ImportError: # Python < 3.3 - def shlex_quote(s): + def compat_shlex_quote(s): if re.match(r'^[-_\w./]+$', s): return s else: @@ -466,18 +466,6 @@ else: print(s) -try: - subprocess_check_output = subprocess.check_output -except AttributeError: - def subprocess_check_output(*args, **kwargs): - assert 'input' not in kwargs - p = subprocess.Popen(*args, stdout=subprocess.PIPE, **kwargs) - output, _ = p.communicate() - ret = p.poll() - if ret: - raise subprocess.CalledProcessError(ret, p.args, output=output) - return output - if sys.version_info < (3, 0) and sys.platform == 'win32': def compat_getpass(prompt, *args, **kwargs): if isinstance(prompt, compat_str): @@ -635,6 +623,7 @@ __all__ = [ 'compat_parse_qs', 'compat_print', 'compat_setenv', + 'compat_shlex_quote', 'compat_shlex_split', 'compat_socket_create_connection', 'compat_str', @@ -656,7 +645,5 @@ __all__ = [ 'compat_urlretrieve', 'compat_xml_parse_error', 'compat_xpath', - 'shlex_quote', - 'subprocess_check_output', 'workaround_optparse_bug9161', ] diff --git a/youtube_dl/postprocessor/execafterdownload.py b/youtube_dl/postprocessor/execafterdownload.py index 74f66d669..90630c2d7 100644 --- a/youtube_dl/postprocessor/execafterdownload.py +++ b/youtube_dl/postprocessor/execafterdownload.py @@ -3,7 +3,7 @@ from __future__ import unicode_literals import subprocess from .common import PostProcessor -from ..compat import shlex_quote +from ..compat import compat_shlex_quote from ..utils import PostProcessingError @@ -17,7 +17,7 @@ class ExecAfterDownloadPP(PostProcessor): if '{}' not in cmd: cmd += ' {}' - cmd = cmd.replace('{}', shlex_quote(information['filepath'])) + cmd = cmd.replace('{}', compat_shlex_quote(information['filepath'])) self._downloader.to_screen('[exec] Executing command: %s' % cmd) retCode = subprocess.call(cmd, shell=True) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index dbac38b55..e8b09e9db 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -42,6 +42,7 @@ from .compat import ( compat_http_client, compat_kwargs, compat_parse_qs, + compat_shlex_quote, compat_socket_create_connection, compat_str, compat_struct_pack, @@ -52,7 +53,6 @@ from .compat import ( compat_urllib_request, compat_urlparse, compat_xpath, - shlex_quote, ) from .socks import ( @@ -1977,7 +1977,7 @@ def ytdl_is_updateable(): def args_to_str(args): # Get a short string representation for a subprocess command - return ' '.join(shlex_quote(a) for a in args) + return ' '.join(compat_shlex_quote(a) for a in args) def error_to_compat_str(err): From e73b9c65e279f283b28d14be5b7173eae46d4364 Mon Sep 17 00:00:00 2001 From: teemuy Date: Wed, 11 May 2016 18:10:30 +0300 Subject: [PATCH 02/45] Bugfix: Allow colons in custom HTTP header values. --- youtube_dl/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index cbd84c3af..740a1904b 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -69,7 +69,7 @@ def _real_main(argv=None): for h in opts.headers: if h.find(':', 1) < 0: parser.error('wrong header formatting, it should be key:value, not "%s"' % h) - key, value = h.split(':', 2) + key, value = h.split(':', 1) if opts.verbose: write_string('[debug] Adding header from command line option %s:%s\n' % (key, value)) std_headers[key] = value From e0741fd4496c85ef447e72df935cb6edd1af53ed Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 11 May 2016 22:03:30 +0600 Subject: [PATCH 03/45] [__init__] Simplify colon presence check --- youtube_dl/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 740a1904b..5df965191 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -67,7 +67,7 @@ def _real_main(argv=None): # Custom HTTP headers if opts.headers is not None: for h in opts.headers: - if h.find(':', 1) < 0: + if ':' not in h: parser.error('wrong header formatting, it should be key:value, not "%s"' % h) key, value = h.split(':', 1) if opts.verbose: From 4540515cb3daa0716fa94e54cacb566ef1461ab3 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Thu, 12 May 2016 18:48:27 +0800 Subject: [PATCH 04/45] [iqiyi] Fix 1080P extraction (closes #9446) --- youtube_dl/extractor/iqiyi.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/iqiyi.py b/youtube_dl/extractor/iqiyi.py index ffb8008ce..ddcb3c916 100644 --- a/youtube_dl/extractor/iqiyi.py +++ b/youtube_dl/extractor/iqiyi.py @@ -505,7 +505,10 @@ class IqiyiIE(InfoExtractor): 'enc': md5_text(enc_key + tail), 'qyid': _uuid, 'tn': random.random(), - 'um': 0, + # In iQiyi's flash player, um is set to 1 if there's a logged user + # Some 1080P formats are only available with a logged user. + # Here force um=1 to trick the iQiyi server + 'um': 1, 'authkey': md5_text(md5_text('') + tail), 'k_tag': 1, } From 778a1ccca7d6cce06faf17867f20b87883d84e98 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Thu, 12 May 2016 19:48:48 +0800 Subject: [PATCH 05/45] =?UTF-8?q?[utils]=20Add=20=C5=92=20and=20=C5=93=20f?= =?UTF-8?q?ound=20in=20French=20to=20ACCENT=5FCHARS?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes #9463 --- test/test_utils.py | 4 ++-- youtube_dl/utils.py | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/test/test_utils.py b/test/test_utils.py index 5702ffa97..ca254779f 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -155,8 +155,8 @@ class TestUtil(unittest.TestCase): self.assertTrue(sanitize_filename(':', restricted=True) != '') self.assertEqual(sanitize_filename( - 'ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõöøùúûüýþÿ', restricted=True), - 'AAAAAAAECEEEEIIIIDNOOOOOOUUUUYPssaaaaaaaeceeeeiiiionoooooouuuuypy') + 'ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØŒÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõöøœùúûüýþÿ', restricted=True), + 'AAAAAAAECEEEEIIIIDNOOOOOOOEUUUUYPssaaaaaaaeceeeeiiiionoooooooeuuuuypy') def test_sanitize_ids(self): self.assertEqual(sanitize_filename('_n_cd26wFpw', is_id=True), '_n_cd26wFpw') diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index e8b09e9db..6592c8ec2 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -104,9 +104,9 @@ KNOWN_EXTENSIONS = ( 'f4f', 'f4m', 'm3u8', 'smil') # needed for sanitizing filenames in restricted mode -ACCENT_CHARS = dict(zip('ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõöøùúûüýþÿ', - itertools.chain('AAAAAA', ['AE'], 'CEEEEIIIIDNOOOOOOUUUUYP', ['ss'], - 'aaaaaa', ['ae'], 'ceeeeiiiionoooooouuuuypy'))) +ACCENT_CHARS = dict(zip('ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØŒÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõöøœùúûüýþÿ', + itertools.chain('AAAAAA', ['AE'], 'CEEEEIIIIDNOOOOOO', ['OE'], 'UUUUYP', ['ss'], + 'aaaaaa', ['ae'], 'ceeeeiiiionoooooo', ['oe'], 'uuuuypy'))) def preferredencoding(): From 7e8ddca1bb10068356d1ec43cf66e7627b76fce7 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Thu, 12 May 2016 19:56:58 +0800 Subject: [PATCH 06/45] [vevo] Delay the georestriction check to prevent false alerts Fixes #9408 --- youtube_dl/extractor/vevo.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/vevo.py b/youtube_dl/extractor/vevo.py index c0632cd6a..388b4debe 100644 --- a/youtube_dl/extractor/vevo.py +++ b/youtube_dl/extractor/vevo.py @@ -213,19 +213,17 @@ class VevoIE(VevoBaseIE): formats = [] if not video_info: - if response and response.get('statusCode') != 909: + try: + self._initialize_api(video_id) + except ExtractorError: ytid = response.get('errorInfo', {}).get('ytid') if ytid: self.report_warning( 'Video is geoblocked, trying with the YouTube video %s' % ytid) return self.url_result(ytid, 'Youtube', ytid) - if 'statusMessage' in response: - raise ExtractorError('%s said: %s' % ( - self.IE_NAME, response['statusMessage']), expected=True) - raise ExtractorError('Unable to extract videos') + raise - self._initialize_api(video_id) video_info = self._call_api( 'video/%s' % video_id, video_id, 'Downloading api video info', 'Failed to download video info') From 1b405bb47d91119cc612a90d26f27f2b93f7c7b4 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Tue, 3 May 2016 18:06:50 +0800 Subject: [PATCH 07/45] [downloader/f4m] Tolerate truncate segments when testing Replaces #9216 Fixes #9214 and test_Bloomberg partially --- youtube_dl/downloader/f4m.py | 42 +++++++++++++++++++++++++++--------- 1 file changed, 32 insertions(+), 10 deletions(-) diff --git a/youtube_dl/downloader/f4m.py b/youtube_dl/downloader/f4m.py index 3d9337afa..314def4cb 100644 --- a/youtube_dl/downloader/f4m.py +++ b/youtube_dl/downloader/f4m.py @@ -23,26 +23,38 @@ from ..utils import ( ) +class DataTruncatedError(Exception): + pass + + class FlvReader(io.BytesIO): """ Reader for Flv files The file format is documented in https://www.adobe.com/devnet/f4v.html """ + def read_bytes(self, n): + data = self.read(n) + if len(data) < n: + raise DataTruncatedError( + 'FlvReader error: need %d bytes while only %d bytes got' % ( + n, len(data))) + return data + # Utility functions for reading numbers and strings def read_unsigned_long_long(self): - return compat_struct_unpack('!Q', self.read(8))[0] + return compat_struct_unpack('!Q', self.read_bytes(8))[0] def read_unsigned_int(self): - return compat_struct_unpack('!I', self.read(4))[0] + return compat_struct_unpack('!I', self.read_bytes(4))[0] def read_unsigned_char(self): - return compat_struct_unpack('!B', self.read(1))[0] + return compat_struct_unpack('!B', self.read_bytes(1))[0] def read_string(self): res = b'' while True: - char = self.read(1) + char = self.read_bytes(1) if char == b'\x00': break res += char @@ -53,18 +65,18 @@ class FlvReader(io.BytesIO): Read a box and return the info as a tuple: (box_size, box_type, box_data) """ real_size = size = self.read_unsigned_int() - box_type = self.read(4) + box_type = self.read_bytes(4) header_end = 8 if size == 1: real_size = self.read_unsigned_long_long() header_end = 16 - return real_size, box_type, self.read(real_size - header_end) + return real_size, box_type, self.read_bytes(real_size - header_end) def read_asrt(self): # version self.read_unsigned_char() # flags - self.read(3) + self.read_bytes(3) quality_entry_count = self.read_unsigned_char() # QualityEntryCount for i in range(quality_entry_count): @@ -85,7 +97,7 @@ class FlvReader(io.BytesIO): # version self.read_unsigned_char() # flags - self.read(3) + self.read_bytes(3) # time scale self.read_unsigned_int() @@ -119,7 +131,7 @@ class FlvReader(io.BytesIO): # version self.read_unsigned_char() # flags - self.read(3) + self.read_bytes(3) self.read_unsigned_int() # BootstrapinfoVersion # Profile,Live,Update,Reserved @@ -374,7 +386,17 @@ class F4mFD(FragmentFD): down.close() reader = FlvReader(down_data) while True: - _, box_type, box_data = reader.read_box_info() + try: + _, box_type, box_data = reader.read_box_info() + except DataTruncatedError: + if test: + # In tests, segments may be truncated, and thus + # FlvReader may not be able to parse the whole + # chunk. If so, write the segment as is + # See https://github.com/rg3/youtube-dl/issues/9214 + dest_stream.write(down_data) + break + raise if box_type == b'mdat': dest_stream.write(box_data) break From a3fa6024d676ec20a06fe618f5c3d6e064f49336 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Thu, 12 May 2016 20:05:43 +0800 Subject: [PATCH 08/45] [bloomberg] Fix test_Bloomberg In this test case, sometimes HLS is the best format while sometimes HDS is. To prevent occasional test failures, force HDS to be the best format. In the past, testing against HDS formats causes the same error as #9214, which is fixed as #9377 landed. --- youtube_dl/extractor/bloomberg.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/youtube_dl/extractor/bloomberg.py b/youtube_dl/extractor/bloomberg.py index 13343bc25..bd538be50 100644 --- a/youtube_dl/extractor/bloomberg.py +++ b/youtube_dl/extractor/bloomberg.py @@ -17,6 +17,9 @@ class BloombergIE(InfoExtractor): 'title': 'Shah\'s Presentation on Foreign-Exchange Strategies', 'description': 'md5:a8ba0302912d03d246979735c17d2761', }, + 'params': { + 'format': 'best[format_id^=hds]', + }, }, { 'url': 'http://www.bloomberg.com/news/articles/2015-11-12/five-strange-things-that-have-been-happening-in-financial-markets', 'only_matching': True, From f388f616c1f2ad9d2c906c4183cf996c845b2858 Mon Sep 17 00:00:00 2001 From: TRox1972 Date: Thu, 12 May 2016 16:48:12 +0200 Subject: [PATCH 09/45] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 4ef6b6d5a..a2febab2c 100644 --- a/README.md +++ b/README.md @@ -417,7 +417,7 @@ which means you can modify it, redistribute it or use it however you like. # CONFIGURATION -You can configure youtube-dl by placing any supported command line option to a configuration file. On Linux, the system wide configuration file is located at `/etc/youtube-dl.conf` and the user wide configuration file at `~/.config/youtube-dl/config`. On Windows, the user wide configuration file locations are `%APPDATA%\youtube-dl\config.txt` or `C:\Users\\youtube-dl.conf`. +You can configure youtube-dl by placing any supported command line option to a configuration file. On Linux and OS X, the system wide configuration file is located at `/etc/youtube-dl.conf` and the user wide configuration file at `~/.config/youtube-dl/config`. On Windows, the user wide configuration file locations are `%APPDATA%\youtube-dl\config.txt` or `C:\Users\\youtube-dl.conf`. For example, with the following configuration file youtube-dl will always extract the audio, not copy the mtime, use a proxy and save all videos under `Movies` directory in your home directory: ``` From 7581bfc958c8de77adbf8a502564d2263d17479d Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Thu, 12 May 2016 18:57:53 +0800 Subject: [PATCH 10/45] [utils] Unquote crendentials passed to SOCKS proxies Fixes #9450 --- youtube_dl/utils.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 6592c8ec2..d6f94f8cd 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -50,6 +50,7 @@ from .compat import ( compat_urllib_parse, compat_urllib_parse_urlencode, compat_urllib_parse_urlparse, + compat_urllib_parse_unquote_plus, compat_urllib_request, compat_urlparse, compat_xpath, @@ -886,7 +887,8 @@ def make_socks_conn_class(base_class, socks_proxy): socks_type, url_components.hostname, url_components.port or 1080, True, # Remote DNS - url_components.username, url_components.password + compat_urllib_parse_unquote_plus(url_components.username), + compat_urllib_parse_unquote_plus(url_components.password), ) class SocksConnection(base_class): From 0db3a66162cf1059dbfccd60db350596f7c5b469 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 12 May 2016 23:57:52 +0600 Subject: [PATCH 11/45] [twitch] Skip dead tests --- youtube_dl/extractor/twitch.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/youtube_dl/extractor/twitch.py b/youtube_dl/extractor/twitch.py index 36ee1adff..68f50487b 100644 --- a/youtube_dl/extractor/twitch.py +++ b/youtube_dl/extractor/twitch.py @@ -171,6 +171,7 @@ class TwitchVideoIE(TwitchItemBaseIE): 'title': 'Worlds Semifinals - Star Horn Royal Club vs. OMG', }, 'playlist_mincount': 12, + 'skip': 'HTTP Error 404: Not Found', } @@ -187,6 +188,7 @@ class TwitchChapterIE(TwitchItemBaseIE): 'title': 'ACRL Off Season - Sports Cars @ Nordschleife', }, 'playlist_mincount': 3, + 'skip': 'HTTP Error 404: Not Found', }, { 'url': 'http://www.twitch.tv/tsm_theoddone/c/2349361', 'only_matching': True, @@ -368,6 +370,7 @@ class TwitchBookmarksIE(TwitchPlaylistBaseIE): 'title': 'Ognos', }, 'playlist_mincount': 3, + 'skip': 'HTTP Error 404: Not Found', } def _extract_playlist_page(self, response): From 0df79d552a6d528ac5bb1a9cce99199aafe79144 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 13 May 2016 00:14:30 +0600 Subject: [PATCH 12/45] [twitch:bookmarks] Remove extractor Bookmarks no longer available --- youtube_dl/extractor/extractors.py | 1 - youtube_dl/extractor/twitch.py | 26 -------------------------- 2 files changed, 27 deletions(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index a0bb3d4c2..f2bd4fe97 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -834,7 +834,6 @@ from .twitch import ( TwitchVodIE, TwitchProfileIE, TwitchPastBroadcastsIE, - TwitchBookmarksIE, TwitchStreamIE, ) from .twitter import ( diff --git a/youtube_dl/extractor/twitch.py b/youtube_dl/extractor/twitch.py index 68f50487b..f7b98e190 100644 --- a/youtube_dl/extractor/twitch.py +++ b/youtube_dl/extractor/twitch.py @@ -357,32 +357,6 @@ class TwitchPastBroadcastsIE(TwitchPlaylistBaseIE): } -class TwitchBookmarksIE(TwitchPlaylistBaseIE): - IE_NAME = 'twitch:bookmarks' - _VALID_URL = r'%s/(?P[^/]+)/profile/bookmarks/?(?:\#.*)?$' % TwitchBaseIE._VALID_URL_BASE - _PLAYLIST_URL = '%s/api/bookmark/?user=%%s&offset=%%d&limit=%%d' % TwitchBaseIE._API_BASE - _PLAYLIST_TYPE = 'bookmarks' - - _TEST = { - 'url': 'http://www.twitch.tv/ognos/profile/bookmarks', - 'info_dict': { - 'id': 'ognos', - 'title': 'Ognos', - }, - 'playlist_mincount': 3, - 'skip': 'HTTP Error 404: Not Found', - } - - def _extract_playlist_page(self, response): - entries = [] - for bookmark in response.get('bookmarks', []): - video = bookmark.get('video') - if not video: - continue - entries.append(video['url']) - return entries - - class TwitchStreamIE(TwitchBaseIE): IE_NAME = 'twitch:stream' _VALID_URL = r'%s/(?P[^/#?]+)/?(?:\#.*)?$' % TwitchBaseIE._VALID_URL_BASE From d8d540cf0d11dbf7b3d9de611470fc7114c8d1ca Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 13 May 2016 02:07:12 +0600 Subject: [PATCH 13/45] [nrk] Rework extractor (Closes #9470) --- youtube_dl/extractor/nrk.py | 435 ++++++++++++++++-------------------- 1 file changed, 196 insertions(+), 239 deletions(-) diff --git a/youtube_dl/extractor/nrk.py b/youtube_dl/extractor/nrk.py index 51dfc27ac..f0fbdd8be 100644 --- a/youtube_dl/extractor/nrk.py +++ b/youtube_dl/extractor/nrk.py @@ -4,91 +4,224 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..compat import ( - compat_urlparse, - compat_urllib_parse_unquote, -) +from ..compat import compat_urllib_parse_unquote from ..utils import ( - determine_ext, ExtractorError, - float_or_none, + int_or_none, + parse_age_limit, parse_duration, - unified_strdate, ) -class NRKIE(InfoExtractor): - _VALID_URL = r'(?:nrk:|https?://(?:www\.)?nrk\.no/video/PS\*)(?P\d+)' - - _TESTS = [ - { - 'url': 'http://www.nrk.no/video/PS*150533', - # MD5 is unstable - 'info_dict': { - 'id': '150533', - 'ext': 'flv', - 'title': 'Dompap og andre fugler i Piip-Show', - 'description': 'md5:d9261ba34c43b61c812cb6b0269a5c8f', - 'duration': 263, - } - }, - { - 'url': 'http://www.nrk.no/video/PS*154915', - # MD5 is unstable - 'info_dict': { - 'id': '154915', - 'ext': 'flv', - 'title': 'Slik høres internett ut når du er blind', - 'description': 'md5:a621f5cc1bd75c8d5104cb048c6b8568', - 'duration': 20, - } - }, - ] +class NRKBaseIE(InfoExtractor): + def _extract_formats(self, manifest_url, video_id, fatal=True): + return self._extract_f4m_formats( + manifest_url + '?hdcore=3.5.0&plugin=aasp-3.5.0.151.81', + video_id, f4m_id='hds', fatal=fatal) def _real_extract(self, url): video_id = self._match_id(url) data = self._download_json( - 'http://v8.psapi.nrk.no/mediaelement/%s' % video_id, - video_id, 'Downloading media JSON') + 'http://%s/mediaelement/%s' % (self._API_HOST, video_id), + video_id, 'Downloading mediaelement JSON') - media_url = data.get('mediaUrl') + title = data.get('fullTitle') or data.get('mainTitle') or data['title'] + video_id = data.get('id') or video_id - if not media_url: - if data['usageRights']['isGeoBlocked']: + entries = [] + + media_assets = data.get('mediaAssets') + if media_assets and isinstance(media_assets, list): + def video_id_and_title(idx): + return ((video_id, title) if len(media_assets) == 1 + else ('%s-%d' % (video_id, idx), '%s (Part %d)' % (title, idx))) + for num, asset in enumerate(media_assets, 1): + asset_url = asset.get('url') + if not asset_url: + continue + formats = self._extract_formats(asset_url, video_id, fatal=False) + if not formats: + continue + self._sort_formats(formats) + entry_id, entry_title = video_id_and_title(num) + duration = parse_duration(asset.get('duration')) + subtitles = {} + for subtitle in ('webVtt', 'timedText'): + subtitle_url = asset.get('%sSubtitlesUrl' % subtitle) + if subtitle_url: + subtitles.setdefault('no', []).append({'url': subtitle_url}) + entries.append({ + 'id': asset.get('carrierId') or entry_id, + 'title': entry_title, + 'duration': duration, + 'subtitles': subtitles, + 'formats': formats, + }) + + if not entries: + media_url = data.get('mediaUrl') + if media_url: + formats = self._extract_formats(media_url, video_id) + self._sort_formats(formats) + duration = parse_duration(data.get('duration')) + entries = [{ + 'id': video_id, + 'title': title, + 'duration': duration, + 'formats': formats, + }] + + if not entries: + if data.get('usageRights', {}).get('isGeoBlocked'): raise ExtractorError( 'NRK har ikke rettigheter til å vise dette programmet utenfor Norge', expected=True) - if determine_ext(media_url) == 'f4m': - formats = self._extract_f4m_formats( - media_url + '?hdcore=3.5.0&plugin=aasp-3.5.0.151.81', video_id, f4m_id='hds') - self._sort_formats(formats) - else: - formats = [{ - 'url': media_url, - 'ext': 'flv', - }] - - duration = parse_duration(data.get('duration')) + conviva = data.get('convivaStatistics') or {} + series = conviva.get('seriesName') or data.get('seriesTitle') + episode = conviva.get('episodeName') or data.get('episodeNumberOrDate') + thumbnails = None images = data.get('images') - if images: - thumbnails = images['webImages'] - thumbnails.sort(key=lambda image: image['pixelWidth']) - thumbnail = thumbnails[-1]['imageUrl'] - else: - thumbnail = None + if images and isinstance(images, dict): + web_images = images.get('webImages') + if isinstance(web_images, list): + thumbnails = [{ + 'url': image['imageUrl'], + 'width': int_or_none(image.get('width')), + 'height': int_or_none(image.get('height')), + } for image in web_images if image.get('imageUrl')] - return { - 'id': video_id, - 'title': data['title'], - 'description': data['description'], - 'duration': duration, - 'thumbnail': thumbnail, - 'formats': formats, + description = data.get('description') + + common_info = { + 'description': description, + 'series': series, + 'episode': episode, + 'age_limit': parse_age_limit(data.get('legalAge')), + 'thumbnails': thumbnails, } + vcodec = 'none' if data.get('mediaType') == 'Audio' else None + + # TODO: extract chapters when https://github.com/rg3/youtube-dl/pull/9409 is merged + + for entry in entries: + entry.update(common_info) + for f in entry['formats']: + f['vcodec'] = vcodec + + return self.playlist_result(entries, video_id, title, description) + + +class NRKIE(NRKBaseIE): + _VALID_URL = r'(?:nrk:|https?://(?:www\.)?nrk\.no/video/PS\*)(?P\d+)' + _API_HOST = 'v8.psapi.nrk.no' + _TESTS = [{ + # video + 'url': 'http://www.nrk.no/video/PS*150533', + # MD5 is unstable + 'info_dict': { + 'id': '150533', + 'ext': 'flv', + 'title': 'Dompap og andre fugler i Piip-Show', + 'description': 'md5:d9261ba34c43b61c812cb6b0269a5c8f', + 'duration': 263, + } + }, { + # audio + 'url': 'http://www.nrk.no/video/PS*154915', + # MD5 is unstable + 'info_dict': { + 'id': '154915', + 'ext': 'flv', + 'title': 'Slik høres internett ut når du er blind', + 'description': 'md5:a621f5cc1bd75c8d5104cb048c6b8568', + 'duration': 20, + } + }] + + +class NRKTVIE(NRKBaseIE): + IE_DESC = 'NRK TV and NRK Radio' + _VALID_URL = r'https?://(?:tv|radio)\.nrk(?:super)?\.no/(?:serie/[^/]+|program)/(?P[a-zA-Z]{4}\d{8})(?:/\d{2}-\d{2}-\d{4})?(?:#del=(?P\d+))?' + _API_HOST = 'psapi-we.nrk.no' + + _TESTS = [{ + 'url': 'https://tv.nrk.no/serie/20-spoersmaal-tv/MUHH48000314/23-05-2014', + 'info_dict': { + 'id': 'MUHH48000314', + 'ext': 'mp4', + 'title': '20 spørsmål', + 'description': 'md5:bdea103bc35494c143c6a9acdd84887a', + 'upload_date': '20140523', + 'duration': 1741.52, + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, + }, { + 'url': 'https://tv.nrk.no/program/mdfp15000514', + 'info_dict': { + 'id': 'mdfp15000514', + 'ext': 'mp4', + 'title': 'Grunnlovsjubiléet - Stor ståhei for ingenting', + 'description': 'md5:654c12511f035aed1e42bdf5db3b206a', + 'upload_date': '20140524', + 'duration': 4605.08, + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, + }, { + # single playlist video + 'url': 'https://tv.nrk.no/serie/tour-de-ski/MSPO40010515/06-01-2015#del=2', + 'md5': 'adbd1dbd813edaf532b0a253780719c2', + 'info_dict': { + 'id': 'MSPO40010515-part2', + 'ext': 'flv', + 'title': 'Tour de Ski: Sprint fri teknikk, kvinner og menn 06.01.2015 (del 2:2)', + 'description': 'md5:238b67b97a4ac7d7b4bf0edf8cc57d26', + 'upload_date': '20150106', + }, + 'skip': 'Only works from Norway', + }, { + 'url': 'https://tv.nrk.no/serie/tour-de-ski/MSPO40010515/06-01-2015', + 'playlist': [{ + 'md5': '9480285eff92d64f06e02a5367970a7a', + 'info_dict': { + 'id': 'MSPO40010515-part1', + 'ext': 'flv', + 'title': 'Tour de Ski: Sprint fri teknikk, kvinner og menn 06.01.2015 (del 1:2)', + 'description': 'md5:238b67b97a4ac7d7b4bf0edf8cc57d26', + 'upload_date': '20150106', + }, + }, { + 'md5': 'adbd1dbd813edaf532b0a253780719c2', + 'info_dict': { + 'id': 'MSPO40010515-part2', + 'ext': 'flv', + 'title': 'Tour de Ski: Sprint fri teknikk, kvinner og menn 06.01.2015 (del 2:2)', + 'description': 'md5:238b67b97a4ac7d7b4bf0edf8cc57d26', + 'upload_date': '20150106', + }, + }], + 'info_dict': { + 'id': 'MSPO40010515', + 'title': 'Tour de Ski: Sprint fri teknikk, kvinner og menn', + 'description': 'md5:238b67b97a4ac7d7b4bf0edf8cc57d26', + 'upload_date': '20150106', + 'duration': 6947.52, + }, + 'skip': 'Only works from Norway', + }, { + 'url': 'https://radio.nrk.no/serie/dagsnytt/NPUB21019315/12-07-2015#', + 'only_matching': True, + }] + class NRKPlaylistIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?nrk\.no/(?!video|skole)(?:[^/]+/)+(?P[^/]+)' @@ -159,179 +292,3 @@ class NRKSkoleIE(InfoExtractor): nrk_id = self._search_regex(r'data-nrk-id=["\'](\d+)', webpage, 'nrk id') return self.url_result('nrk:%s' % nrk_id) - - -class NRKTVIE(InfoExtractor): - IE_DESC = 'NRK TV and NRK Radio' - _VALID_URL = r'(?Phttps?://(?:tv|radio)\.nrk(?:super)?\.no/)(?:serie/[^/]+|program)/(?P[a-zA-Z]{4}\d{8})(?:/\d{2}-\d{2}-\d{4})?(?:#del=(?P\d+))?' - - _TESTS = [ - { - 'url': 'https://tv.nrk.no/serie/20-spoersmaal-tv/MUHH48000314/23-05-2014', - 'info_dict': { - 'id': 'MUHH48000314', - 'ext': 'mp4', - 'title': '20 spørsmål', - 'description': 'md5:bdea103bc35494c143c6a9acdd84887a', - 'upload_date': '20140523', - 'duration': 1741.52, - }, - 'params': { - # m3u8 download - 'skip_download': True, - }, - }, - { - 'url': 'https://tv.nrk.no/program/mdfp15000514', - 'info_dict': { - 'id': 'mdfp15000514', - 'ext': 'mp4', - 'title': 'Grunnlovsjubiléet - Stor ståhei for ingenting', - 'description': 'md5:654c12511f035aed1e42bdf5db3b206a', - 'upload_date': '20140524', - 'duration': 4605.08, - }, - 'params': { - # m3u8 download - 'skip_download': True, - }, - }, - { - # single playlist video - 'url': 'https://tv.nrk.no/serie/tour-de-ski/MSPO40010515/06-01-2015#del=2', - 'md5': 'adbd1dbd813edaf532b0a253780719c2', - 'info_dict': { - 'id': 'MSPO40010515-part2', - 'ext': 'flv', - 'title': 'Tour de Ski: Sprint fri teknikk, kvinner og menn 06.01.2015 (del 2:2)', - 'description': 'md5:238b67b97a4ac7d7b4bf0edf8cc57d26', - 'upload_date': '20150106', - }, - 'skip': 'Only works from Norway', - }, - { - 'url': 'https://tv.nrk.no/serie/tour-de-ski/MSPO40010515/06-01-2015', - 'playlist': [ - { - 'md5': '9480285eff92d64f06e02a5367970a7a', - 'info_dict': { - 'id': 'MSPO40010515-part1', - 'ext': 'flv', - 'title': 'Tour de Ski: Sprint fri teknikk, kvinner og menn 06.01.2015 (del 1:2)', - 'description': 'md5:238b67b97a4ac7d7b4bf0edf8cc57d26', - 'upload_date': '20150106', - }, - }, - { - 'md5': 'adbd1dbd813edaf532b0a253780719c2', - 'info_dict': { - 'id': 'MSPO40010515-part2', - 'ext': 'flv', - 'title': 'Tour de Ski: Sprint fri teknikk, kvinner og menn 06.01.2015 (del 2:2)', - 'description': 'md5:238b67b97a4ac7d7b4bf0edf8cc57d26', - 'upload_date': '20150106', - }, - }, - ], - 'info_dict': { - 'id': 'MSPO40010515', - 'title': 'Tour de Ski: Sprint fri teknikk, kvinner og menn', - 'description': 'md5:238b67b97a4ac7d7b4bf0edf8cc57d26', - 'upload_date': '20150106', - 'duration': 6947.5199999999995, - }, - 'skip': 'Only works from Norway', - }, - { - 'url': 'https://radio.nrk.no/serie/dagsnytt/NPUB21019315/12-07-2015#', - 'only_matching': True, - } - ] - - def _extract_f4m(self, manifest_url, video_id): - return self._extract_f4m_formats( - manifest_url + '?hdcore=3.1.1&plugin=aasp-3.1.1.69.124', video_id, f4m_id='hds') - - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') - part_id = mobj.group('part_id') - base_url = mobj.group('baseurl') - - webpage = self._download_webpage(url, video_id) - - title = self._html_search_meta( - 'title', webpage, 'title') - description = self._html_search_meta( - 'description', webpage, 'description') - - thumbnail = self._html_search_regex( - r'data-posterimage="([^"]+)"', - webpage, 'thumbnail', fatal=False) - upload_date = unified_strdate(self._html_search_meta( - 'rightsfrom', webpage, 'upload date', fatal=False)) - duration = float_or_none(self._html_search_regex( - r'data-duration="([^"]+)"', - webpage, 'duration', fatal=False)) - - # playlist - parts = re.findall( - r']+data-argument="([^"]+)">([^<]+)', webpage) - if parts: - entries = [] - for current_part_id, stream_url, part_title in parts: - if part_id and current_part_id != part_id: - continue - video_part_id = '%s-part%s' % (video_id, current_part_id) - formats = self._extract_f4m(stream_url, video_part_id) - entries.append({ - 'id': video_part_id, - 'title': part_title, - 'description': description, - 'thumbnail': thumbnail, - 'upload_date': upload_date, - 'formats': formats, - }) - if part_id: - if entries: - return entries[0] - else: - playlist = self.playlist_result(entries, video_id, title, description) - playlist.update({ - 'thumbnail': thumbnail, - 'upload_date': upload_date, - 'duration': duration, - }) - return playlist - - formats = [] - - f4m_url = re.search(r'data-media="([^"]+)"', webpage) - if f4m_url: - formats.extend(self._extract_f4m(f4m_url.group(1), video_id)) - - m3u8_url = re.search(r'data-hls-media="([^"]+)"', webpage) - if m3u8_url: - formats.extend(self._extract_m3u8_formats(m3u8_url.group(1), video_id, 'mp4', m3u8_id='hls')) - self._sort_formats(formats) - - subtitles_url = self._html_search_regex( - r'data-subtitlesurl\s*=\s*(["\'])(?P.+?)\1', - webpage, 'subtitle URL', default=None, group='url') - subtitles = {} - if subtitles_url: - subtitles['no'] = [{ - 'ext': 'ttml', - 'url': compat_urlparse.urljoin(base_url, subtitles_url), - }] - - return { - 'id': video_id, - 'title': title, - 'description': description, - 'thumbnail': thumbnail, - 'upload_date': upload_date, - 'duration': duration, - 'formats': formats, - 'subtitles': subtitles, - } From b9e7bc55da1c1275737b356efadc06435b8bfa2c Mon Sep 17 00:00:00 2001 From: remitamine Date: Thu, 12 May 2016 22:45:54 +0100 Subject: [PATCH 14/45] [mgtv] extract http formats --- youtube_dl/extractor/mgtv.py | 43 ++++++++++++++++++------------------ 1 file changed, 22 insertions(+), 21 deletions(-) diff --git a/youtube_dl/extractor/mgtv.py b/youtube_dl/extractor/mgtv.py index a14d176a5..9fbc74f5d 100644 --- a/youtube_dl/extractor/mgtv.py +++ b/youtube_dl/extractor/mgtv.py @@ -11,7 +11,7 @@ class MGTVIE(InfoExtractor): _TEST = { 'url': 'http://www.mgtv.com/v/1/290525/f/3116640.html', - 'md5': '', + 'md5': '1bdadcf760a0b90946ca68ee9a2db41a', 'info_dict': { 'id': '3116640', 'ext': 'mp4', @@ -20,15 +20,6 @@ class MGTVIE(InfoExtractor): 'duration': 7461, 'thumbnail': 're:^https?://.*\.jpg$', }, - 'params': { - 'skip_download': True, # m3u8 download - }, - } - - _FORMAT_MAP = { - '标清': ('Standard', 0), - '高清': ('High', 1), - '超清': ('SuperHigh', 2), } def _real_extract(self, url): @@ -40,17 +31,27 @@ class MGTVIE(InfoExtractor): formats = [] for idx, stream in enumerate(api_data['stream']): - format_name = stream.get('name') - format_id, preference = self._FORMAT_MAP.get(format_name, (None, None)) - format_info = self._download_json( - stream['url'], video_id, - note='Download video info for format %s' % format_id or '#%d' % idx) - formats.append({ - 'format_id': format_id, - 'url': format_info['info'], - 'ext': 'mp4', # These are m3u8 playlists - 'preference': preference, - }) + stream_url = stream.get('url') + if not stream_url: + continue + tbr = int_or_none(self._search_regex( + r'(\d+)\.mp4', stream_url, 'tbr', default=None)) + + def extract_format(stream_url, format_id, idx, query={}): + format_info = self._download_json( + stream_url, video_id, + note='Download video info for format %s' % format_id or '#%d' % idx, query=query) + return { + 'format_id': format_id, + 'url': format_info['info'], + 'ext': 'mp4', + 'tbr': tbr, + } + + formats.append(extract_format( + stream_url, 'hls-%d' % tbr if tbr else None, idx * 2)) + formats.append(extract_format(stream_url.replace( + '/playlist.m3u8', ''), 'http-%d' % tbr if tbr else None, idx * 2 + 1, {'pno': 1031})) self._sort_formats(formats) return { From 99d79b8692ae8981aff91cf5b1475516b60eb765 Mon Sep 17 00:00:00 2001 From: remitamine Date: Fri, 13 May 2016 05:21:45 +0100 Subject: [PATCH 15/45] [ustudio] add support ustudio app/embed urls --- youtube_dl/extractor/ustudio.py | 66 +++++++++++++++++++++++++++++++-- 1 file changed, 62 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/ustudio.py b/youtube_dl/extractor/ustudio.py index cafc082b6..3484a2046 100644 --- a/youtube_dl/extractor/ustudio.py +++ b/youtube_dl/extractor/ustudio.py @@ -6,10 +6,12 @@ from .common import InfoExtractor from ..utils import ( int_or_none, unified_strdate, + unescapeHTML, ) class UstudioIE(InfoExtractor): + IE_NAME = 'ustudio' _VALID_URL = r'https?://(?:(?:www|v1)\.)?ustudio\.com/video/(?P[^/]+)/(?P[^/?#&]+)' _TEST = { 'url': 'http://ustudio.com/video/Uxu2my9bgSph/san_francisco_golden_gate_bridge', @@ -27,9 +29,7 @@ class UstudioIE(InfoExtractor): } def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') - display_id = mobj.group('display_id') + video_id, display_id = re.match(self._VALID_URL, url).groups() config = self._download_xml( 'http://v1.ustudio.com/embed/%s/ustudio/config.xml' % video_id, @@ -37,7 +37,7 @@ class UstudioIE(InfoExtractor): def extract(kind): return [{ - 'url': item.attrib['url'], + 'url': unescapeHTML(item.attrib['url']), 'width': int_or_none(item.get('width')), 'height': int_or_none(item.get('height')), } for item in config.findall('./qualities/quality/%s' % kind) if item.get('url')] @@ -65,3 +65,61 @@ class UstudioIE(InfoExtractor): 'uploader': uploader, 'formats': formats, } + + +class UstudioEmbedIE(InfoExtractor): + IE_NAME = 'ustudio:embed' + _VALID_URL = r'https?://(?:(?:app|embed)\.)?ustudio\.com/embed/(?P[^/]+)/(?P[^/]+)' + _TEST = { + 'url': 'http://app.ustudio.com/embed/DeN7VdYRDKhP/Uw7G1kMCe65T', + 'md5': '47c0be52a09b23a7f40de9469cec58f4', + 'info_dict': { + 'id': 'Uw7G1kMCe65T', + 'ext': 'mp4', + 'title': '5 Things IT Should Know About Video', + 'description': 'md5:93d32650884b500115e158c5677d25ad', + 'uploader_id': 'DeN7VdYRDKhP', + } + } + + def _real_extract(self, url): + uploader_id, video_id = re.match(self._VALID_URL, url).groups() + video_data = self._download_json( + 'http://app.ustudio.com/embed/%s/%s/config.json' % (uploader_id, video_id), + video_id)['videos'][0] + title = video_data['name'] + + formats = [] + for ext, qualities in video_data.get('transcodes', {}).items(): + for quality in qualities: + quality_url = quality.get('url') + if not quality_url: + continue + height = int_or_none(quality.get('height')) + formats.append({ + 'format_id': '%s-%dp' % (ext, height) if height else ext, + 'url': quality_url, + 'width': int_or_none(quality.get('width')), + 'height': height, + }) + self._sort_formats(formats) + + thumbnails = [] + for image in video_data.get('images', []): + image_url = image.get('url') + if not image_url: + continue + thumbnails.append({ + 'url': image_url, + }) + + return { + 'id': video_id, + 'title': title, + 'description': video_data.get('description'), + 'duration': int_or_none(video_data.get('duration')), + 'uploader_id': uploader_id, + 'tags': video_data.get('keywords'), + 'thumbnails': thumbnails, + 'formats': formats, + } From cdf32ff15d6fc9d1902bfb3ed10a582070d20cd9 Mon Sep 17 00:00:00 2001 From: remitamine Date: Fri, 13 May 2016 05:25:32 +0100 Subject: [PATCH 16/45] [extractors] add import for UstudioEmbedIE --- youtube_dl/extractor/extractors.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index f2bd4fe97..50d2204f2 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -851,7 +851,10 @@ from .unistra import UnistraIE from .urort import UrortIE from .usatoday import USATodayIE from .ustream import UstreamIE, UstreamChannelIE -from .ustudio import UstudioIE +from .ustudio import ( + UstudioIE, + UstudioEmbedIE, +) from .varzesh3 import Varzesh3IE from .vbox7 import Vbox7IE from .veehd import VeeHDIE From 18cf6381f6b140431f3a747fc2d222be08ab2e23 Mon Sep 17 00:00:00 2001 From: remitamine Date: Fri, 13 May 2016 08:05:28 +0100 Subject: [PATCH 17/45] [nrk] extract m3u8 formats --- youtube_dl/extractor/nrk.py | 39 +++++++++++++++---------------------- 1 file changed, 16 insertions(+), 23 deletions(-) diff --git a/youtube_dl/extractor/nrk.py b/youtube_dl/extractor/nrk.py index f0fbdd8be..7532f40c1 100644 --- a/youtube_dl/extractor/nrk.py +++ b/youtube_dl/extractor/nrk.py @@ -15,9 +15,14 @@ from ..utils import ( class NRKBaseIE(InfoExtractor): def _extract_formats(self, manifest_url, video_id, fatal=True): - return self._extract_f4m_formats( + formats = [] + formats.extend(self._extract_f4m_formats( manifest_url + '?hdcore=3.5.0&plugin=aasp-3.5.0.151.81', - video_id, f4m_id='hds', fatal=fatal) + video_id, f4m_id='hds', fatal=fatal)) + formats.extend(self._extract_m3u8_formats(manifest_url.replace( + 'akamaihd.net/z/', 'akamaihd.net/i/').replace('/manifest.f4m', '/master.m3u8'), + video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=fatal)) + return formats def _real_extract(self, url): video_id = self._match_id(url) @@ -121,10 +126,10 @@ class NRKIE(NRKBaseIE): _TESTS = [{ # video 'url': 'http://www.nrk.no/video/PS*150533', - # MD5 is unstable + 'md5': '2f7f6eeb2aacdd99885f355428715cfa', 'info_dict': { 'id': '150533', - 'ext': 'flv', + 'ext': 'mp4', 'title': 'Dompap og andre fugler i Piip-Show', 'description': 'md5:d9261ba34c43b61c812cb6b0269a5c8f', 'duration': 263, @@ -150,32 +155,24 @@ class NRKTVIE(NRKBaseIE): _TESTS = [{ 'url': 'https://tv.nrk.no/serie/20-spoersmaal-tv/MUHH48000314/23-05-2014', + 'md5': '4e9ca6629f09e588ed240fb11619922a', 'info_dict': { - 'id': 'MUHH48000314', + 'id': 'MUHH48000314AA', 'ext': 'mp4', - 'title': '20 spørsmål', + 'title': '20 spørsmål 23.05.2014', 'description': 'md5:bdea103bc35494c143c6a9acdd84887a', - 'upload_date': '20140523', 'duration': 1741.52, }, - 'params': { - # m3u8 download - 'skip_download': True, - }, }, { 'url': 'https://tv.nrk.no/program/mdfp15000514', + 'md5': '43d0be26663d380603a9cf0c24366531', 'info_dict': { - 'id': 'mdfp15000514', + 'id': 'MDFP15000514CA', 'ext': 'mp4', - 'title': 'Grunnlovsjubiléet - Stor ståhei for ingenting', - 'description': 'md5:654c12511f035aed1e42bdf5db3b206a', - 'upload_date': '20140524', + 'title': 'Grunnlovsjubiléet - Stor ståhei for ingenting 24.05.2014', + 'description': 'md5:89290c5ccde1b3a24bb8050ab67fe1db', 'duration': 4605.08, }, - 'params': { - # m3u8 download - 'skip_download': True, - }, }, { # single playlist video 'url': 'https://tv.nrk.no/serie/tour-de-ski/MSPO40010515/06-01-2015#del=2', @@ -185,7 +182,6 @@ class NRKTVIE(NRKBaseIE): 'ext': 'flv', 'title': 'Tour de Ski: Sprint fri teknikk, kvinner og menn 06.01.2015 (del 2:2)', 'description': 'md5:238b67b97a4ac7d7b4bf0edf8cc57d26', - 'upload_date': '20150106', }, 'skip': 'Only works from Norway', }, { @@ -197,7 +193,6 @@ class NRKTVIE(NRKBaseIE): 'ext': 'flv', 'title': 'Tour de Ski: Sprint fri teknikk, kvinner og menn 06.01.2015 (del 1:2)', 'description': 'md5:238b67b97a4ac7d7b4bf0edf8cc57d26', - 'upload_date': '20150106', }, }, { 'md5': 'adbd1dbd813edaf532b0a253780719c2', @@ -206,14 +201,12 @@ class NRKTVIE(NRKBaseIE): 'ext': 'flv', 'title': 'Tour de Ski: Sprint fri teknikk, kvinner og menn 06.01.2015 (del 2:2)', 'description': 'md5:238b67b97a4ac7d7b4bf0edf8cc57d26', - 'upload_date': '20150106', }, }], 'info_dict': { 'id': 'MSPO40010515', 'title': 'Tour de Ski: Sprint fri teknikk, kvinner og menn', 'description': 'md5:238b67b97a4ac7d7b4bf0edf8cc57d26', - 'upload_date': '20150106', 'duration': 6947.52, }, 'skip': 'Only works from Norway', From ad55e101651edc732acac22cfb25d276d6c8bdca Mon Sep 17 00:00:00 2001 From: remitamine Date: Fri, 13 May 2016 08:35:38 +0100 Subject: [PATCH 18/45] [brightcove] change the protocol for m3u8 formats to m3u8_native --- youtube_dl/extractor/brightcove.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/brightcove.py b/youtube_dl/extractor/brightcove.py index f0781fc27..fc7fc5b16 100644 --- a/youtube_dl/extractor/brightcove.py +++ b/youtube_dl/extractor/brightcove.py @@ -307,9 +307,10 @@ class BrightcoveLegacyIE(InfoExtractor): playlist_title=playlist_info['mediaCollectionDTO']['displayName']) def _extract_video_info(self, video_info): + video_id = compat_str(video_info['id']) publisher_id = video_info.get('publisherId') info = { - 'id': compat_str(video_info['id']), + 'id': video_id, 'title': video_info['displayName'].strip(), 'description': video_info.get('shortDescription'), 'thumbnail': video_info.get('videoStillURL') or video_info.get('thumbnailURL'), @@ -331,7 +332,8 @@ class BrightcoveLegacyIE(InfoExtractor): url_comp = compat_urllib_parse_urlparse(url) if url_comp.path.endswith('.m3u8'): formats.extend( - self._extract_m3u8_formats(url, info['id'], 'mp4')) + self._extract_m3u8_formats( + url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)) continue elif 'akamaihd.net' in url_comp.netloc: # This type of renditions are served through @@ -365,7 +367,7 @@ class BrightcoveLegacyIE(InfoExtractor): a_format.update({ 'format_id': 'hls%s' % ('-%s' % tbr if tbr else ''), 'ext': 'mp4', - 'protocol': 'm3u8', + 'protocol': 'm3u8_native', }) formats.append(a_format) @@ -395,7 +397,7 @@ class BrightcoveLegacyIE(InfoExtractor): return ad_info if 'url' not in info and not info.get('formats'): - raise ExtractorError('Unable to extract video url for %s' % info['id']) + raise ExtractorError('Unable to extract video url for %s' % video_id) return info @@ -527,7 +529,7 @@ class BrightcoveNewIE(InfoExtractor): if not src: continue formats.extend(self._extract_m3u8_formats( - src, video_id, 'mp4', m3u8_id='hls', fatal=False)) + src, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)) elif source_type == 'application/dash+xml': if not src: continue From cc1028aa6d27aeec39617d1ff8d2edcf1ee989d7 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Fri, 13 May 2016 18:11:08 +0800 Subject: [PATCH 19/45] [openload] Fix extraction (closes #9472) --- youtube_dl/extractor/openload.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/openload.py b/youtube_dl/extractor/openload.py index 456561bcc..5049b870e 100644 --- a/youtube_dl/extractor/openload.py +++ b/youtube_dl/extractor/openload.py @@ -100,7 +100,7 @@ class OpenloadIE(InfoExtractor): raise ExtractorError('File not found', expected=True) code = self._search_regex( - r']+>\s*]+>([^<]+)', + r'\s*\s*]+>([^<]+)', webpage, 'JS code') decoded = self.openload_decode(code) From f196508f7b872963d13bcff94c0105d743322f71 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 13 May 2016 22:19:00 +0600 Subject: [PATCH 20/45] [imdb] Relax _VALID_URL (Closes #9481) --- youtube_dl/extractor/imdb.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/imdb.py b/youtube_dl/extractor/imdb.py index 8bed8ccd0..203156229 100644 --- a/youtube_dl/extractor/imdb.py +++ b/youtube_dl/extractor/imdb.py @@ -12,9 +12,9 @@ from ..utils import ( class ImdbIE(InfoExtractor): IE_NAME = 'imdb' IE_DESC = 'Internet Movie Database trailers' - _VALID_URL = r'https?://(?:www|m)\.imdb\.com/video/imdb/vi(?P\d+)' + _VALID_URL = r'https?://(?:www|m)\.imdb\.com/video/[^/]+/vi(?P\d+)' - _TEST = { + _TESTS = [{ 'url': 'http://www.imdb.com/video/imdb/vi2524815897', 'info_dict': { 'id': '2524815897', @@ -22,7 +22,10 @@ class ImdbIE(InfoExtractor): 'title': 'Ice Age: Continental Drift Trailer (No. 2) - IMDb', 'description': 'md5:9061c2219254e5d14e03c25c98e96a81', } - } + }, { + 'url': 'http://www.imdb.com/video/_/vi2524815897', + 'only_matching': True, + }] def _real_extract(self, url): video_id = self._match_id(url) From 96c2e3e909171d103beafd1fd88e9d6e215681c6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 13 May 2016 23:25:05 +0600 Subject: [PATCH 21/45] [imdb] Improve extraction --- youtube_dl/extractor/imdb.py | 28 +++++++++++++++++++++------- 1 file changed, 21 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/imdb.py b/youtube_dl/extractor/imdb.py index 203156229..3a2b7cec5 100644 --- a/youtube_dl/extractor/imdb.py +++ b/youtube_dl/extractor/imdb.py @@ -1,10 +1,10 @@ from __future__ import unicode_literals import re -import json from .common import InfoExtractor from ..utils import ( + mimetype2ext, qualities, ) @@ -51,13 +51,27 @@ class ImdbIE(InfoExtractor): json_data = self._search_regex( r']+class="imdb-player-data"[^>]*?>(.*?)', format_page, 'json data', flags=re.DOTALL) - info = json.loads(json_data) - format_info = info['videoPlayerObject']['video'] - f_id = format_info['ffname'] + info = self._parse_json(json_data, video_id, fatal=False) + if not info: + continue + format_info = info.get('videoPlayerObject', {}).get('video', {}) + if not format_info: + continue + video_info_list = format_info.get('videoInfoList') + if not video_info_list or not isinstance(video_info_list, list): + continue + video_info = video_info_list[0] + if not video_info or not isinstance(video_info, dict): + continue + video_url = video_info.get('videoUrl') + if not video_url: + continue + format_id = format_info.get('ffname') formats.append({ - 'format_id': f_id, - 'url': format_info['videoInfoList'][0]['videoUrl'], - 'quality': quality(f_id), + 'format_id': format_id, + 'url': video_url, + 'ext': mimetype2ext(video_info.get('videoMimeType')), + 'quality': quality(format_id), }) self._sort_formats(formats) From 0730be9022b415738e917c4cf72c2347ff0008e0 Mon Sep 17 00:00:00 2001 From: remitamine Date: Fri, 13 May 2016 20:24:36 +0100 Subject: [PATCH 22/45] [sina] fix extraction(fixes #1146) --- youtube_dl/extractor/sina.py | 124 ++++++++++++++++++++++++----------- 1 file changed, 84 insertions(+), 40 deletions(-) diff --git a/youtube_dl/extractor/sina.py b/youtube_dl/extractor/sina.py index d03f1b1d4..8fc66732a 100644 --- a/youtube_dl/extractor/sina.py +++ b/youtube_dl/extractor/sina.py @@ -4,28 +4,35 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..compat import compat_urllib_parse_urlencode -from ..utils import sanitized_Request +from ..utils import ( + HEADRequest, + ExtractorError, + int_or_none, + update_url_query, + qualities, + get_element_by_attribute, + clean_html, +) class SinaIE(InfoExtractor): - _VALID_URL = r'''(?x)https?://(.*?\.)?video\.sina\.com\.cn/ - ( - (.+?/(((?P\d+).html)|(.*?(\#|(vid=)|b/)(?P\d+?)($|&|\-)))) - | + _VALID_URL = r'''(?x)https?://(?:.*?\.)?video\.sina\.com\.cn/ + (?: + (?:view/|.*\#)(?P\d+)| + .+?/(?P[^/?#]+)(?:\.s?html)| # This is used by external sites like Weibo - (api/sinawebApi/outplay.php/(?P.+?)\.swf) + api/sinawebApi/outplay.php/(?P.+?)\.swf ) ''' _TESTS = [ { - 'url': 'http://video.sina.com.cn/news/vlist/zt/chczlj2013/?opsubject_id=top12#110028898', - 'md5': 'd65dd22ddcf44e38ce2bf58a10c3e71f', + 'url': 'http://video.sina.com.cn/news/spj/topvideoes20160504/?opsubject_id=top1#250576622', + 'md5': 'd38433e2fc886007729735650ae4b3e9', 'info_dict': { - 'id': '110028898', - 'ext': 'flv', - 'title': '《中国新闻》 朝鲜要求巴拿马立即释放被扣船员', + 'id': '250576622', + 'ext': 'mp4', + 'title': '现场:克鲁兹宣布退选 特朗普将稳获提名', } }, { @@ -35,37 +42,74 @@ class SinaIE(InfoExtractor): 'ext': 'flv', 'title': '军方提高对朝情报监视级别', }, + 'skip': 'the page does not exist or has been deleted', + }, + { + 'url': 'http://video.sina.com.cn/view/250587748.html', + 'md5': '3d1807a25c775092aab3bc157fff49b4', + 'info_dict': { + 'id': '250587748', + 'ext': 'mp4', + 'title': '瞬间泪目:8年前汶川地震珍贵视频首曝光', + }, }, ] - def _extract_video(self, video_id): - data = compat_urllib_parse_urlencode({'vid': video_id}) - url_doc = self._download_xml('http://v.iask.com/v_play.php?%s' % data, - video_id, 'Downloading video url') - image_page = self._download_webpage( - 'http://interface.video.sina.com.cn/interface/common/getVideoImage.php?%s' % data, - video_id, 'Downloading thumbnail info') - - return {'id': video_id, - 'url': url_doc.find('./durl/url').text, - 'ext': 'flv', - 'title': url_doc.find('./vname').text, - 'thumbnail': image_page.split('=')[1], - } - def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') - if mobj.group('token') is not None: - # The video id is in the redirected url - self.to_screen('Getting video id') - request = sanitized_Request(url) - request.get_method = lambda: 'HEAD' - (_, urlh) = self._download_webpage_handle(request, 'NA', False) - return self._real_extract(urlh.geturl()) - elif video_id is None: - pseudo_id = mobj.group('pseudo_id') - webpage = self._download_webpage(url, pseudo_id) - video_id = self._search_regex(r'vid:\'(\d+?)\'', webpage, 'video id') - return self._extract_video(video_id) + video_id = mobj.group('video_id') + if not video_id: + if mobj.group('token') is not None: + # The video id is in the redirected url + self.to_screen('Getting video id') + request = HEADRequest(url) + (_, urlh) = self._download_webpage_handle(request, 'NA', False) + return self._real_extract(urlh.geturl()) + else: + pseudo_id = mobj.group('pseudo_id') + webpage = self._download_webpage(url, pseudo_id) + error = get_element_by_attribute('class', 'errtitle', webpage) + if error: + raise ExtractorError('%s said: %s' % ( + self.IE_NAME, clean_html(error)), expected=True) + video_id = self._search_regex( + r"video_id\s*:\s*'(\d+)'", webpage, 'video id') + + video_data = self._download_json( + 'http://s.video.sina.com.cn/video/h5play', + video_id, query={'video_id': video_id}) + if video_data['code'] != 1: + raise ExtractorError('%s said: %s' % ( + self.IE_NAME, video_data['message']), expected=True) + else: + video_data = video_data['data'] + title = video_data['title'] + description = video_data.get('description') + if description: + description = description.strip() + + preference = qualities(['cif', 'sd', 'hd', 'fhd', 'ffd']) + formats = [] + for quality_id, quality in video_data.get('videos', {}).get('mp4', {}).items(): + file_api = quality.get('file_api') + file_id = quality.get('file_id') + if not file_api or not file_id: + continue + formats.append({ + 'format_id': quality_id, + 'url': update_url_query(file_api, {'vid': file_id}), + 'preference': preference(quality_id), + 'ext': 'mp4', + }) + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': title, + 'description': description, + 'thumbnail': video_data.get('image'), + 'duration': int_or_none(video_data.get('length')), + 'timestamp': int_or_none(video_data.get('create_time')), + 'formats': formats, + } From 134c6ea856be472f253bffbe99b88546fe417806 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 14 May 2016 04:46:38 +0600 Subject: [PATCH 23/45] [YoutubeDL] Sanitize url for url and url_transparent extraction results --- youtube_dl/YoutubeDL.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 34eeb77c5..03a6a1890 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -720,6 +720,7 @@ class YoutubeDL(object): result_type = ie_result.get('_type', 'video') if result_type in ('url', 'url_transparent'): + ie_result['url'] = sanitize_url(ie_result['url']) extract_flat = self.params.get('extract_flat', False) if ((extract_flat == 'in_playlist' and 'playlist' in extra_info) or extract_flat is True): From b5abf8614898cc728488d7ecc7a55a4c5c92758f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 14 May 2016 04:53:14 +0600 Subject: [PATCH 24/45] [cinemassacre] Remove extractor (Closes #9457) It now uses jwplatform --- youtube_dl/extractor/cinemassacre.py | 119 --------------------------- youtube_dl/extractor/extractors.py | 1 - 2 files changed, 120 deletions(-) delete mode 100644 youtube_dl/extractor/cinemassacre.py diff --git a/youtube_dl/extractor/cinemassacre.py b/youtube_dl/extractor/cinemassacre.py deleted file mode 100644 index 042c4f2f1..000000000 --- a/youtube_dl/extractor/cinemassacre.py +++ /dev/null @@ -1,119 +0,0 @@ -# encoding: utf-8 -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor -from ..utils import ExtractorError -from .screenwavemedia import ScreenwaveMediaIE - - -class CinemassacreIE(InfoExtractor): - _VALID_URL = 'https?://(?:www\.)?cinemassacre\.com/(?P[0-9]{4})/(?P[0-9]{2})/(?P[0-9]{2})/(?P[^?#/]+)' - _TESTS = [ - { - 'url': 'http://cinemassacre.com/2012/11/10/avgn-the-movie-trailer/', - 'md5': 'fde81fbafaee331785f58cd6c0d46190', - 'info_dict': { - 'id': 'Cinemassacre-19911', - 'ext': 'mp4', - 'upload_date': '20121110', - 'title': '“Angry Video Game Nerd: The Movie” – Trailer', - 'description': 'md5:fb87405fcb42a331742a0dce2708560b', - }, - 'params': { - # m3u8 download - 'skip_download': True, - }, - }, - { - 'url': 'http://cinemassacre.com/2013/10/02/the-mummys-hand-1940', - 'md5': 'd72f10cd39eac4215048f62ab477a511', - 'info_dict': { - 'id': 'Cinemassacre-521be8ef82b16', - 'ext': 'mp4', - 'upload_date': '20131002', - 'title': 'The Mummy’s Hand (1940)', - }, - 'params': { - # m3u8 download - 'skip_download': True, - }, - }, - { - # Youtube embedded video - 'url': 'http://cinemassacre.com/2006/12/07/chronologically-confused-about-bad-movie-and-video-game-sequel-titles/', - 'md5': 'ec9838a5520ef5409b3e4e42fcb0a3b9', - 'info_dict': { - 'id': 'OEVzPCY2T-g', - 'ext': 'webm', - 'title': 'AVGN: Chronologically Confused about Bad Movie and Video Game Sequel Titles', - 'upload_date': '20061207', - 'uploader': 'Cinemassacre', - 'uploader_id': 'JamesNintendoNerd', - 'description': 'md5:784734696c2b8b7f4b8625cc799e07f6', - } - }, - { - # Youtube embedded video - 'url': 'http://cinemassacre.com/2006/09/01/mckids/', - 'md5': '7393c4e0f54602ad110c793eb7a6513a', - 'info_dict': { - 'id': 'FnxsNhuikpo', - 'ext': 'webm', - 'upload_date': '20060901', - 'uploader': 'Cinemassacre Extra', - 'description': 'md5:de9b751efa9e45fbaafd9c8a1123ed53', - 'uploader_id': 'Cinemassacre', - 'title': 'AVGN: McKids', - } - }, - { - 'url': 'http://cinemassacre.com/2015/05/25/mario-kart-64-nintendo-64-james-mike-mondays/', - 'md5': '1376908e49572389e7b06251a53cdd08', - 'info_dict': { - 'id': 'Cinemassacre-555779690c440', - 'ext': 'mp4', - 'description': 'Let’s Play Mario Kart 64 !! Mario Kart 64 is a classic go-kart racing game released for the Nintendo 64 (N64). Today James & Mike do 4 player Battle Mode with Kyle and Bootsy!', - 'title': 'Mario Kart 64 (Nintendo 64) James & Mike Mondays', - 'upload_date': '20150525', - }, - 'params': { - # m3u8 download - 'skip_download': True, - }, - } - ] - - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - display_id = mobj.group('display_id') - video_date = mobj.group('date_y') + mobj.group('date_m') + mobj.group('date_d') - - webpage = self._download_webpage(url, display_id) - - playerdata_url = self._search_regex( - [ - ScreenwaveMediaIE.EMBED_PATTERN, - r']+src="(?P(?:https?:)?//(?:[^.]+\.)?youtube\.com/.+?)"', - ], - webpage, 'player data URL', default=None, group='url') - if not playerdata_url: - raise ExtractorError('Unable to find player data') - - video_title = self._html_search_regex( - r'(?P<title>.+?)\|', webpage, 'title') - video_description = self._html_search_regex( - r'<div class="entry-content">(?P<description>.+?)</div>', - webpage, 'description', flags=re.DOTALL, fatal=False) - video_thumbnail = self._og_search_thumbnail(webpage) - - return { - '_type': 'url_transparent', - 'display_id': display_id, - 'title': video_title, - 'description': video_description, - 'upload_date': video_date, - 'thumbnail': video_thumbnail, - 'url': playerdata_url, - } diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 50d2204f2..b6f4ccc5d 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -124,7 +124,6 @@ from .chirbit import ( ChirbitProfileIE, ) from .cinchcast import CinchcastIE -from .cinemassacre import CinemassacreIE from .cliprs import ClipRsIE from .clipfish import ClipfishIE from .cliphunter import CliphunterIE From 98d560f205e6aeddc767844d142b00525a9eaff9 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sat, 14 May 2016 18:48:36 +0800 Subject: [PATCH 25/45] [test/test_socks] Skip SOCKS tests They occasional trigger errors or blocks (https://travis-ci.org/rg3/youtube-dl/jobs/130184883) --- test/test_socks.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/test/test_socks.py b/test/test_socks.py index d07003ceb..1e68eb0da 100644 --- a/test/test_socks.py +++ b/test/test_socks.py @@ -77,17 +77,28 @@ class TestMultipleSocks(unittest.TestCase): class TestSocks(unittest.TestCase): + _SKIP_SOCKS_TEST = True + def setUp(self): + if self._SKIP_SOCKS_TEST: + return + self.port = random.randint(20000, 30000) self.server_process = subprocess.Popen([ 'srelay', '-f', '-i', '127.0.0.1:%d' % self.port], stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE) def tearDown(self): + if self._SKIP_SOCKS_TEST: + return + self.server_process.terminate() self.server_process.communicate() def _get_ip(self, protocol): + if self._SKIP_SOCKS_TEST: + return '127.0.0.1' + ydl = FakeYDL({ 'proxy': '%s://127.0.0.1:%d' % (protocol, self.port), }) From 791ff52f753ee123426766aaa5320eb63a874b7b Mon Sep 17 00:00:00 2001 From: Jakub Wilk <jwilk@jwilk.net> Date: Sat, 14 May 2016 13:19:54 +0200 Subject: [PATCH 26/45] [teamcoco] Fix base64 regexp --- youtube_dl/extractor/teamcoco.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/teamcoco.py b/youtube_dl/extractor/teamcoco.py index b49ab5f5b..79a778920 100644 --- a/youtube_dl/extractor/teamcoco.py +++ b/youtube_dl/extractor/teamcoco.py @@ -88,7 +88,7 @@ class TeamcocoIE(InfoExtractor): preload_codes = self._html_search_regex( r'(function.+)setTimeout\(function\(\)\{playlist', webpage, 'preload codes') - base64_fragments = re.findall(r'"([a-zA-z0-9+/=]+)"', preload_codes) + base64_fragments = re.findall(r'"([a-zA-Z0-9+/=]+)"', preload_codes) base64_fragments.remove('init') def _check_sequence(cur_fragments): From 66e7ace17a36ed0f761ae620801e9e27d5c3cb3f Mon Sep 17 00:00:00 2001 From: Jakub Wilk <jwilk@jwilk.net> Date: Sat, 14 May 2016 13:41:41 +0200 Subject: [PATCH 27/45] Don't hardcode errno constant The value of ENOENT is architecture-dependent, so don't assume it's always 2. --- youtube_dl/YoutubeDL.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 03a6a1890..3917ca9dc 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -326,7 +326,7 @@ class YoutubeDL(object): ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs) self._output_channel = os.fdopen(master, 'rb') except OSError as ose: - if ose.errno == 2: + if ose.errno == errno.ENOENT: self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that fribidi is an executable file in one of the directories in your $PATH.') else: raise From bd1e484448c84904ce0d99fe05c3721053aa3c00 Mon Sep 17 00:00:00 2001 From: felix <felix.von.s@posteo.de> Date: Sun, 13 Mar 2016 12:29:15 +0100 Subject: [PATCH 28/45] [utils] js_to_json: various improvements now JS object literals like { /* " */ 0: ",]\xaa<\/p>", } will be correctly converted to JSON. --- test/test_utils.py | 12 ++++++++++++ youtube_dl/utils.py | 30 ++++++++++++++++-------------- 2 files changed, 28 insertions(+), 14 deletions(-) diff --git a/test/test_utils.py b/test/test_utils.py index ca254779f..ab2842f3b 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -640,6 +640,18 @@ class TestUtil(unittest.TestCase): on = js_to_json('{"abc": "def",}') self.assertEqual(json.loads(on), {'abc': 'def'}) + on = js_to_json('{ 0: /* " \n */ ",]" , }') + self.assertEqual(json.loads(on), {'0': ',]'}) + + on = js_to_json(r'["<p>x<\/p>"]') + self.assertEqual(json.loads(on), ['<p>x</p>']) + + on = js_to_json(r'["\xaa"]') + self.assertEqual(json.loads(on), ['\u00aa']) + + on = js_to_json("['a\\\nb']") + self.assertEqual(json.loads(on), ['ab']) + def test_extract_attributes(self): self.assertEqual(extract_attributes('<e x="y">'), {'x': 'y'}) self.assertEqual(extract_attributes("<e x='y'>"), {'x': 'y'}) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index d6f94f8cd..52a20632f 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -1914,24 +1914,26 @@ def js_to_json(code): v = m.group(0) if v in ('true', 'false', 'null'): return v - if v.startswith('"'): - v = re.sub(r"\\'", "'", v[1:-1]) - elif v.startswith("'"): - v = v[1:-1] - v = re.sub(r"\\\\|\\'|\"", lambda m: { - '\\\\': '\\\\', - "\\'": "'", + elif v.startswith('/*') or v == ',': + return "" + + if v[0] in ("'", '"'): + v = re.sub(r'(?s)\\.|"', lambda m: { '"': '\\"', - }[m.group(0)], v) + "\\'": "'", + '\\\n': '', + '\\x': '\\u00', + }.get(m.group(0), m.group(0)), v[1:-1]) + return '"%s"' % v - res = re.sub(r'''(?x) - "(?:[^"\\]*(?:\\\\|\\['"nu]))*[^"\\]*"| - '(?:[^'\\]*(?:\\\\|\\['"nu]))*[^'\\]*'| - [a-zA-Z_][.a-zA-Z_0-9]* + return re.sub(r'''(?sx) + "(?:[^"\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^"\\]*"| + '(?:[^'\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^'\\]*'| + /\*.*?\*/|,(?=\s*[\]}])| + [a-zA-Z_][.a-zA-Z_0-9]*| + [0-9]+(?=\s*:) ''', fix_kv, code) - res = re.sub(r',(\s*[\]}])', lambda m: m.group(1), res) - return res def qualities(quality_ids): From 640eea0a0cf7ae589126f7762e1cfc7bdd2250d9 Mon Sep 17 00:00:00 2001 From: felix <felix.von.s@posteo.de> Date: Sun, 20 Mar 2016 12:17:57 +0100 Subject: [PATCH 29/45] [ora] minimise fragile regex shenanigans; recognise unsafespeech.com URLs --- youtube_dl/extractor/ora.py | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/youtube_dl/extractor/ora.py b/youtube_dl/extractor/ora.py index 8545fb1b8..cfae71bcc 100644 --- a/youtube_dl/extractor/ora.py +++ b/youtube_dl/extractor/ora.py @@ -6,13 +6,14 @@ from .common import InfoExtractor from ..compat import compat_urlparse from ..utils import ( get_element_by_attribute, + js_to_json, qualities, unescapeHTML, ) class OraTVIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?ora\.tv/([^/]+/)*(?P<id>[^/\?#]+)' + _VALID_URL = r'https?://(?:www\.)?(ora\.tv|unsafespeech\.com)/([^/]+/)*(?P<id>[^/\?#]+)' _TEST = { 'url': 'https://www.ora.tv/larrykingnow/2015/12/16/vine-youtube-stars-zach-king-king-bach-on-their-viral-videos-0_36jupg6090pq', 'md5': 'fa33717591c631ec93b04b0e330df786', @@ -28,10 +29,13 @@ class OraTVIE(InfoExtractor): display_id = self._match_id(url) webpage = self._download_webpage(url, display_id) - video_data = self._search_regex( - r'"(?:video|current)"\s*:\s*({[^}]+?})', webpage, 'current video') - m3u8_url = self._search_regex( - r'hls_stream"?\s*:\s*"([^"]+)', video_data, 'm3u8 url', None) + ora_meta = self._parse_json(self._search_regex( + r'(?s);\s*ora_meta = ({.*?});</script>', webpage, 'ora_meta'), display_id, + transform_source=lambda data: js_to_json(re.sub('":(document|\().*?(:false|\(\)),', '":null,', data))) + + video_data = ora_meta.get('video', ora_meta.get('current')) + m3u8_url = video_data['hls_stream'] + if m3u8_url: formats = self._extract_m3u8_formats( m3u8_url, display_id, 'mp4', 'm3u8_native', @@ -60,13 +64,11 @@ class OraTVIE(InfoExtractor): r'"youtube_id"\s*:\s*"([^"]+)', webpage, 'youtube id'), 'Youtube') return { - 'id': self._search_regex( - r'"id"\s*:\s*(\d+)', video_data, 'video id', default=display_id), + 'id': video_data.get('id', display_id), 'display_id': display_id, 'title': unescapeHTML(self._og_search_title(webpage)), 'description': get_element_by_attribute( 'class', 'video_txt_decription', webpage), - 'thumbnail': self._proto_relative_url(self._search_regex( - r'"thumb"\s*:\s*"([^"]+)', video_data, 'thumbnail', None)), + 'thumbnail': self._proto_relative_url(video_data.get('thumb')), 'formats': formats, } From 89ac4a19e658203db85c6a1d4b267a2eeb47a38e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 14 May 2016 20:39:58 +0600 Subject: [PATCH 30/45] [utils] Process non-base 10 integers in js_to_json --- test/test_utils.py | 19 +++++++++++++++++++ youtube_dl/utils.py | 12 ++++++++++++ 2 files changed, 31 insertions(+) diff --git a/test/test_utils.py b/test/test_utils.py index ab2842f3b..26f66bff6 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -617,6 +617,15 @@ class TestUtil(unittest.TestCase): json_code = js_to_json(inp) self.assertEqual(json.loads(json_code), json.loads(inp)) + inp = '''{ + 0:{src:'skipped', type: 'application/dash+xml'}, + 1:{src:'skipped', type: 'application/vnd.apple.mpegURL'}, + }''' + self.assertEqual(js_to_json(inp), '''{ + "0":{"src":"skipped", "type": "application/dash+xml"}, + "1":{"src":"skipped", "type": "application/vnd.apple.mpegURL"} + }''') + def test_js_to_json_edgecases(self): on = js_to_json("{abc_def:'1\\'\\\\2\\\\\\'3\"4'}") self.assertEqual(json.loads(on), {"abc_def": "1'\\2\\'3\"4"}) @@ -652,6 +661,16 @@ class TestUtil(unittest.TestCase): on = js_to_json("['a\\\nb']") self.assertEqual(json.loads(on), ['ab']) + on = js_to_json('{0xff:0xff}') + self.assertEqual(json.loads(on), {'255': 255}) + + on = js_to_json('{077:077}') + self.assertEqual(json.loads(on), {'63': 63}) + + on = js_to_json('{42:42}') + self.assertEqual(json.loads(on), {'42': 42}) + + def test_extract_attributes(self): self.assertEqual(extract_attributes('<e x="y">'), {'x': 'y'}) self.assertEqual(extract_attributes("<e x='y'>"), {'x': 'y'}) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 52a20632f..25a9f33c0 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -1925,6 +1925,17 @@ def js_to_json(code): '\\x': '\\u00', }.get(m.group(0), m.group(0)), v[1:-1]) + INTEGER_TABLE = ( + (r'^(0[xX][0-9a-fA-F]+)', 16), + (r'^(0+[0-7]+)', 8), + ) + + for regex, base in INTEGER_TABLE: + im = re.match(regex, v) + if im: + i = int(im.group(1), base) + return '"%d":' % i if v.endswith(':') else '%d' % i + return '"%s"' % v return re.sub(r'''(?sx) @@ -1932,6 +1943,7 @@ def js_to_json(code): '(?:[^'\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^'\\]*'| /\*.*?\*/|,(?=\s*[\]}])| [a-zA-Z_][.a-zA-Z_0-9]*| + (?:0[xX][0-9a-fA-F]+|0+[0-7]+)(?:\s*:)?| [0-9]+(?=\s*:) ''', fix_kv, code) From ca950f49e909baf6672034ffc2c1c2ee7133cf23 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 14 May 2016 20:45:18 +0600 Subject: [PATCH 31/45] [ora] Revert extraction to regexes It's less fragile than using js_to_json with ora js --- youtube_dl/extractor/ora.py | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/youtube_dl/extractor/ora.py b/youtube_dl/extractor/ora.py index cfae71bcc..1d42be39b 100644 --- a/youtube_dl/extractor/ora.py +++ b/youtube_dl/extractor/ora.py @@ -6,15 +6,14 @@ from .common import InfoExtractor from ..compat import compat_urlparse from ..utils import ( get_element_by_attribute, - js_to_json, qualities, unescapeHTML, ) class OraTVIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?(ora\.tv|unsafespeech\.com)/([^/]+/)*(?P<id>[^/\?#]+)' - _TEST = { + _VALID_URL = r'https?://(?:www\.)?(?:ora\.tv|unsafespeech\.com)/([^/]+/)*(?P<id>[^/\?#]+)' + _TESTS = [{ 'url': 'https://www.ora.tv/larrykingnow/2015/12/16/vine-youtube-stars-zach-king-king-bach-on-their-viral-videos-0_36jupg6090pq', 'md5': 'fa33717591c631ec93b04b0e330df786', 'info_dict': { @@ -23,19 +22,19 @@ class OraTVIE(InfoExtractor): 'title': 'Vine & YouTube Stars Zach King & King Bach On Their Viral Videos!', 'description': 'md5:ebbc5b1424dd5dba7be7538148287ac1', } - } + }, { + 'url': 'http://www.unsafespeech.com/video/2016/5/10/student-self-censorship-and-the-thought-police-on-university-campuses-0_6622bnkppw4d', + 'only_matching': True, + }] def _real_extract(self, url): display_id = self._match_id(url) webpage = self._download_webpage(url, display_id) - ora_meta = self._parse_json(self._search_regex( - r'(?s);\s*ora_meta = ({.*?});</script>', webpage, 'ora_meta'), display_id, - transform_source=lambda data: js_to_json(re.sub('":(document|\().*?(:false|\(\)),', '":null,', data))) - - video_data = ora_meta.get('video', ora_meta.get('current')) - m3u8_url = video_data['hls_stream'] - + video_data = self._search_regex( + r'"(?:video|current)"\s*:\s*({[^}]+?})', webpage, 'current video') + m3u8_url = self._search_regex( + r'hls_stream"?\s*:\s*"([^"]+)', video_data, 'm3u8 url', None) if m3u8_url: formats = self._extract_m3u8_formats( m3u8_url, display_id, 'mp4', 'm3u8_native', @@ -64,11 +63,13 @@ class OraTVIE(InfoExtractor): r'"youtube_id"\s*:\s*"([^"]+)', webpage, 'youtube id'), 'Youtube') return { - 'id': video_data.get('id', display_id), + 'id': self._search_regex( + r'"id"\s*:\s*(\d+)', video_data, 'video id', default=display_id), 'display_id': display_id, 'title': unescapeHTML(self._og_search_title(webpage)), 'description': get_element_by_attribute( 'class', 'video_txt_decription', webpage), - 'thumbnail': self._proto_relative_url(video_data.get('thumb')), + 'thumbnail': self._proto_relative_url(self._search_regex( + r'"thumb"\s*:\s*"([^"]+)', video_data, 'thumbnail', None)), 'formats': formats, } From 364cf465dd53e8006f5523c348f127f8df657bc3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 14 May 2016 20:46:33 +0600 Subject: [PATCH 32/45] [test_utils] PEP 8 --- test/test_utils.py | 1 - 1 file changed, 1 deletion(-) diff --git a/test/test_utils.py b/test/test_utils.py index 26f66bff6..520d32ff5 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -670,7 +670,6 @@ class TestUtil(unittest.TestCase): on = js_to_json('{42:42}') self.assertEqual(json.loads(on), {'42': 42}) - def test_extract_attributes(self): self.assertEqual(extract_attributes('<e x="y">'), {'x': 'y'}) self.assertEqual(extract_attributes("<e x='y'>"), {'x': 'y'}) From 5c86bfe70ff0048e59c6e890af14a055522fd3fd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 14 May 2016 23:35:03 +0600 Subject: [PATCH 33/45] [3qsdn] Add extractor --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/threeqsdn.py | 132 +++++++++++++++++++++++++++++ 2 files changed, 133 insertions(+) create mode 100644 youtube_dl/extractor/threeqsdn.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index b6f4ccc5d..2db3b3c3f 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -766,6 +766,7 @@ from .thesixtyone import TheSixtyOneIE from .thestar import TheStarIE from .thisamericanlife import ThisAmericanLifeIE from .thisav import ThisAVIE +from .threeqsdn import ThreeQSDNIE from .tinypic import TinyPicIE from .tlc import TlcDeIE from .tmz import ( diff --git a/youtube_dl/extractor/threeqsdn.py b/youtube_dl/extractor/threeqsdn.py new file mode 100644 index 000000000..27a3de5c4 --- /dev/null +++ b/youtube_dl/extractor/threeqsdn.py @@ -0,0 +1,132 @@ +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + determine_ext, + js_to_json, + mimetype2ext, +) + + +class ThreeQSDNIE(InfoExtractor): + IE_NAME = '3qsdn' + IE_DESC = '3Q SDN' + _VALID_URL = r'https?://playout\.3qsdn\.com/(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})' + _TESTS = [{ + # ondemand from http://www.philharmonie.tv/veranstaltung/26/ + 'url': 'http://playout.3qsdn.com/0280d6b9-1215-11e6-b427-0cc47a188158?protocol=http', + 'md5': 'ab040e37bcfa2e0c079f92cb1dd7f6cd', + 'info_dict': { + 'id': '0280d6b9-1215-11e6-b427-0cc47a188158', + 'ext': 'mp4', + 'title': '0280d6b9-1215-11e6-b427-0cc47a188158', + 'is_live': False, + }, + 'expected_warnings': ['Failed to download MPD manifest'], + }, { + # live video stream + 'url': 'https://playout.3qsdn.com/d755d94b-4ab9-11e3-9162-0025907ad44f?js=true', + 'info_dict': { + 'id': 'd755d94b-4ab9-11e3-9162-0025907ad44f', + 'ext': 'mp4', + 'title': 'd755d94b-4ab9-11e3-9162-0025907ad44f', + 'is_live': False, + }, + }, { + # live audio stream + 'url': 'http://playout.3qsdn.com/9edf36e0-6bf2-11e2-a16a-9acf09e2db48', + 'only_matching': True, + }, { + # live audio stream with some 404 URLs + 'url': 'http://playout.3qsdn.com/ac5c3186-777a-11e2-9c30-9acf09e2db48', + 'only_matching': True, + }, { + # geo restricted with 'This content is not available in your country' + 'url': 'http://playout.3qsdn.com/d63a3ffe-75e8-11e2-9c30-9acf09e2db48', + 'only_matching': True, + }, { + # geo restricted with 'playout.3qsdn.com/forbidden' + 'url': 'http://playout.3qsdn.com/8e330f26-6ae2-11e2-a16a-9acf09e2db48', + 'only_matching': True, + }, { + # live video with rtmp link + 'url': 'https://playout.3qsdn.com/6092bb9e-8f72-11e4-a173-002590c750be', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + + js = self._download_webpage( + 'http://playout.3qsdn.com/%s' % video_id, video_id, + query={'js': 'true'}) + + if any(p in js for p in ( + '>This content is not available in your country', + 'playout.3qsdn.com/forbidden')): + self.raise_geo_restricted() + + stream_content = self._search_regex( + r'streamContent\s*:\s*(["\'])(?P<content>.+?)\1', js, + 'stream content', default='demand', group='content') + + live = stream_content == 'live' + + stream_type = self._search_regex( + r'streamType\s*:\s*(["\'])(?P<type>audio|video)\1', js, + 'stream type', default='video', group='type') + + formats = [] + urls = set() + + def extract_formats(item_url, item={}): + if not item_url or item_url in urls: + return + urls.add(item_url) + type_ = item.get('type') + ext = determine_ext(item_url, default_ext=None) + if type_ == 'application/dash+xml' or ext == 'mpd': + formats.extend(self._extract_mpd_formats( + item_url, video_id, mpd_id='mpd', fatal=False)) + elif type_ in ('application/vnd.apple.mpegURL', 'application/x-mpegurl') or ext == 'm3u8': + formats.extend(self._extract_m3u8_formats( + item_url, video_id, 'mp4', + entry_protocol='m3u8' if live else 'm3u8_native', + m3u8_id='hls', fatal=False)) + elif ext == 'f4m': + formats.extend(self._extract_f4m_formats( + item_url, video_id, f4m_id='hds', fatal=False)) + else: + if not self._is_valid_url(item_url, video_id): + return + formats.append({ + 'url': item_url, + 'format_id': item.get('quality'), + 'ext': 'mp4' if item_url.startswith('rtsp') else mimetype2ext(type_) or ext, + 'vcodec': 'none' if stream_type == 'audio' else None, + }) + + for item_js in re.findall(r'({.*?\b(?:src|source)\s*:\s*["\'].+?})', js): + f = self._parse_json( + item_js, video_id, transform_source=js_to_json, fatal=False) + if not f: + continue + extract_formats(f.get('src'), f) + + # More relaxed version to collect additional URLs and acting + # as a future-proof fallback + for _, src in re.findall(r'\b(?:src|source)\s*:\s*(["\'])((?:https?|rtsp)://.+?)\1', js): + extract_formats(src) + + self._sort_formats(formats) + + title = self._live_title(video_id) if live else video_id + + return { + 'id': video_id, + 'title': title, + 'is_live': live, + 'formats': formats, + } From 5d39176f6de8bab1e019ead7cd497659f3fc1a94 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 14 May 2016 23:40:34 +0600 Subject: [PATCH 34/45] [extractor/generic:3qsdn] Add support for embeds --- youtube_dl/extractor/generic.py | 6 ++++++ youtube_dl/extractor/threeqsdn.py | 7 +++++++ 2 files changed, 13 insertions(+) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 0f1eb7fa6..b48ccfc97 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -61,6 +61,7 @@ from .jwplatform import JWPlatformIE from .digiteka import DigitekaIE from .instagram import InstagramIE from .liveleak import LiveLeakIE +from .threeqsdn import ThreeQSDNIE class GenericIE(InfoExtractor): @@ -1983,6 +1984,11 @@ class GenericIE(InfoExtractor): if liveleak_url: return self.url_result(liveleak_url, 'LiveLeak') + # Look for 3Q SDN embeds + threeqsdn_url = ThreeQSDNIE._extract_url(webpage) + if threeqsdn_url: + return self.url_result(self._proto_relative_url(threeqsdn_url), ThreeQSDNIE.ie_key()) + def check_video(vurl): if YoutubeIE.suitable(vurl): return True diff --git a/youtube_dl/extractor/threeqsdn.py b/youtube_dl/extractor/threeqsdn.py index 27a3de5c4..c77a07989 100644 --- a/youtube_dl/extractor/threeqsdn.py +++ b/youtube_dl/extractor/threeqsdn.py @@ -56,6 +56,13 @@ class ThreeQSDNIE(InfoExtractor): 'only_matching': True, }] + @staticmethod + def _extract_url(webpage): + mobj = re.search( + r'<iframe[^>]+\b(?:data-)?src=(["\'])(?P<url>%s.*?)\1' % ThreeQSDNIE._VALID_URL, webpage) + if mobj: + return mobj.group('url') + def _real_extract(self, url): video_id = self._match_id(url) From cda6d47aad106a825f837c7a583fffc783c4b63b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 14 May 2016 23:41:57 +0600 Subject: [PATCH 35/45] [utils] Simplify integer conversion in js_to_json --- youtube_dl/utils.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 25a9f33c0..a637563cb 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -1926,14 +1926,14 @@ def js_to_json(code): }.get(m.group(0), m.group(0)), v[1:-1]) INTEGER_TABLE = ( - (r'^(0[xX][0-9a-fA-F]+)', 16), - (r'^(0+[0-7]+)', 8), + (r'^0[xX][0-9a-fA-F]+', 16), + (r'^0+[0-7]+', 8), ) for regex, base in INTEGER_TABLE: im = re.match(regex, v) if im: - i = int(im.group(1), base) + i = int(im.group(0), base) return '"%d":' % i if v.endswith(':') else '%d' % i return '"%s"' % v From 6f41b2bcf16899f8c3f0ea705b2914cf1ae668a3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 14 May 2016 23:58:25 +0600 Subject: [PATCH 36/45] [extractor/generic] Improve 3qsdn embeds support (Closes #9453) --- youtube_dl/extractor/generic.py | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index b48ccfc97..a6b1e23e3 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -1428,7 +1428,8 @@ class GenericIE(InfoExtractor): # Site Name | Video Title # Video Title - Tagline | Site Name # and so on and so forth; it's just not practical - video_title = self._html_search_regex( + video_title = self._og_search_title( + webpage, default=None) or self._html_search_regex( r'(?s)<title>(.*?)', webpage, 'video title', default='video') @@ -1446,6 +1447,9 @@ class GenericIE(InfoExtractor): video_uploader = self._search_regex( r'^(?:https?://)?([^/]*)/.*', url, 'video uploader') + video_description = self._og_search_description(webpage, default=None) + video_thumbnail = self._og_search_thumbnail(webpage, default=None) + # Helper method def _playlist_from_matches(matches, getter=None, ie=None): urlrs = orderedSet( @@ -1987,7 +1991,15 @@ class GenericIE(InfoExtractor): # Look for 3Q SDN embeds threeqsdn_url = ThreeQSDNIE._extract_url(webpage) if threeqsdn_url: - return self.url_result(self._proto_relative_url(threeqsdn_url), ThreeQSDNIE.ie_key()) + return { + '_type': 'url_transparent', + 'ie_key': ThreeQSDNIE.ie_key(), + 'url': self._proto_relative_url(threeqsdn_url), + 'title': video_title, + 'description': video_description, + 'thumbnail': video_thumbnail, + 'uploader': video_uploader, + } def check_video(vurl): if YoutubeIE.suitable(vurl): From ed56f260399728f1975dd30f4c8ee110cf106d84 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 15 May 2016 03:34:35 +0600 Subject: [PATCH 37/45] [extractor/common] Improve name extraction for m3u8 formats --- youtube_dl/extractor/common.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 0843d89af..8a8c07226 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1139,11 +1139,14 @@ class InfoExtractor(object): if m3u8_id: format_id.append(m3u8_id) last_media_name = last_media.get('NAME') if last_media and last_media.get('TYPE') != 'SUBTITLES' else None + # Despite specification does not mention NAME attribute for + # EXT-X-STREAM-INF it still sometimes may be present + stream_name = last_info.get('NAME') or last_media_name # Bandwidth of live streams may differ over time thus making # format_id unpredictable. So it's better to keep provided # format_id intact. if not live: - format_id.append(last_media_name if last_media_name else '%d' % (tbr if tbr else len(formats))) + format_id.append(stream_name if stream_name else '%d' % (tbr if tbr else len(formats))) f = { 'format_id': '-'.join(format_id), 'url': format_url(line.strip()), From 69c9cc2716a4d076b023096c23b6f7646627824a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 15 May 2016 03:38:04 +0600 Subject: [PATCH 38/45] [xvideos] Extract html5 player formats (Closes #9495) --- youtube_dl/extractor/xvideos.py | 43 ++++++++++++++++----------------- 1 file changed, 21 insertions(+), 22 deletions(-) diff --git a/youtube_dl/extractor/xvideos.py b/youtube_dl/extractor/xvideos.py index 710ad5041..1dfe031ca 100644 --- a/youtube_dl/extractor/xvideos.py +++ b/youtube_dl/extractor/xvideos.py @@ -8,7 +8,6 @@ from ..utils import ( clean_html, ExtractorError, determine_ext, - sanitized_Request, ) @@ -25,8 +24,6 @@ class XVideosIE(InfoExtractor): } } - _ANDROID_USER_AGENT = 'Mozilla/5.0 (Linux; Android 4.0.4; Galaxy Nexus Build/IMM76B) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/18.0.1025.133 Mobile Safari/535.19' - def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) @@ -35,31 +32,34 @@ class XVideosIE(InfoExtractor): if mobj: raise ExtractorError('%s said: %s' % (self.IE_NAME, clean_html(mobj.group(1))), expected=True) - video_url = compat_urllib_parse_unquote( - self._search_regex(r'flv_url=(.+?)&', webpage, 'video URL')) video_title = self._html_search_regex( r'(.*?)\s+-\s+XVID', webpage, 'title') video_thumbnail = self._search_regex( r'url_bigthumb=(.+?)&', webpage, 'thumbnail', fatal=False) - formats = [{ - 'url': video_url, - }] + formats = [] - android_req = sanitized_Request(url) - android_req.add_header('User-Agent', self._ANDROID_USER_AGENT) - android_webpage = self._download_webpage(android_req, video_id, fatal=False) + video_url = compat_urllib_parse_unquote(self._search_regex( + r'flv_url=(.+?)&', webpage, 'video URL', default='')) + if video_url: + formats.append({'url': video_url}) - if android_webpage is not None: - player_params_str = self._search_regex( - 'mobileReplacePlayerDivTwoQual\(([^)]+)\)', - android_webpage, 'player parameters', default='') - player_params = list(map(lambda s: s.strip(' \''), player_params_str.split(','))) - if player_params: - formats.extend([{ - 'url': param, - 'preference': -10, - } for param in player_params if determine_ext(param) == 'mp4']) + player_args = self._search_regex( + r'(?s)new\s+HTML5Player\((.+?)\)', webpage, ' html5 player', default=None) + if player_args: + for arg in player_args.split(','): + format_url = self._search_regex( + r'(["\'])(?P<url>https?://.+?)\1', arg, 'url', + default=None, group='url') + if not format_url: + continue + ext = determine_ext(format_url) + if ext == 'mp4': + formats.append({'url': format_url}) + elif ext == 'm3u8': + formats.extend(self._extract_m3u8_formats( + format_url, video_id, 'mp4', + entry_protocol='m3u8_native', m3u8_id='hls', fatal=False)) self._sort_formats(formats) @@ -67,7 +67,6 @@ class XVideosIE(InfoExtractor): 'id': video_id, 'formats': formats, 'title': video_title, - 'ext': 'flv', 'thumbnail': video_thumbnail, 'age_limit': 18, } From 79298173c5a957456cb17b2b26338a657f1aae1e Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sun, 15 May 2016 15:32:54 +0800 Subject: [PATCH 39/45] [utils] Fix getheader in urlhandle_detect_ext Fixes #7049, related to #9440 --- youtube_dl/utils.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index a637563cb..24e74428b 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -2031,11 +2031,7 @@ def mimetype2ext(mt): def urlhandle_detect_ext(url_handle): - try: - url_handle.headers - getheader = lambda h: url_handle.headers[h] - except AttributeError: # Python < 3 - getheader = url_handle.info().getheader + getheader = url_handle.headers.get cd = getheader('Content-Disposition') if cd: From cec9727c7f6a0dad8b10a51f0a6581ac5a1dbe86 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sun, 15 May 2016 15:35:31 +0800 Subject: [PATCH 40/45] [hearthisat] Detect invalid download links (fixes #9440) --- youtube_dl/extractor/hearthisat.py | 38 +++++++++++++++++++++++------- 1 file changed, 29 insertions(+), 9 deletions(-) diff --git a/youtube_dl/extractor/hearthisat.py b/youtube_dl/extractor/hearthisat.py index 7d8698655..ac42ef414 100644 --- a/youtube_dl/extractor/hearthisat.py +++ b/youtube_dl/extractor/hearthisat.py @@ -7,6 +7,7 @@ from .common import InfoExtractor from ..compat import compat_urlparse from ..utils import ( HEADRequest, + KNOWN_EXTENSIONS, sanitized_Request, str_to_int, urlencode_postdata, @@ -17,7 +18,7 @@ from ..utils import ( class HearThisAtIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?hearthis\.at/(?P<artist>[^/]+)/(?P<title>[A-Za-z0-9\-]+)/?$' _PLAYLIST_URL = 'https://hearthis.at/playlist.php' - _TEST = { + _TESTS = [{ 'url': 'https://hearthis.at/moofi/dr-kreep', 'md5': 'ab6ec33c8fed6556029337c7885eb4e0', 'info_dict': { @@ -34,7 +35,25 @@ class HearThisAtIE(InfoExtractor): 'duration': 71, 'categories': ['Experimental'], } - } + }, { + # 'download' link redirects to the original webpage + 'url': 'https://hearthis.at/twitchsf/dj-jim-hopkins-totally-bitchin-80s-dance-mix/', + 'md5': '5980ceb7c461605d30f1f039df160c6e', + 'info_dict': { + 'id': '811296', + 'ext': 'mp3', + 'title': 'TwitchSF - DJ Jim Hopkins - Totally Bitchin\' 80\'s Dance Mix!', + 'description': 'Listen to DJ Jim Hopkins - Totally Bitchin\' 80\'s Dance Mix! by TwitchSF on hearthis.at - Dance', + 'upload_date': '20160328', + 'timestamp': 1459186146, + 'thumbnail': 're:^https?://.*\.jpg$', + 'comment_count': int, + 'view_count': int, + 'like_count': int, + 'duration': 4360, + 'categories': ['Dance'], + }, + }] def _real_extract(self, url): m = re.match(self._VALID_URL, url) @@ -90,13 +109,14 @@ class HearThisAtIE(InfoExtractor): ext_handle = self._request_webpage( ext_req, display_id, note='Determining extension') ext = urlhandle_detect_ext(ext_handle) - formats.append({ - 'format_id': 'download', - 'vcodec': 'none', - 'ext': ext, - 'url': download_url, - 'preference': 2, # Usually better quality - }) + if ext in KNOWN_EXTENSIONS: + formats.append({ + 'format_id': 'download', + 'vcodec': 'none', + 'ext': ext, + 'url': download_url, + 'preference': 2, # Usually better quality + }) self._sort_formats(formats) return { From 5572d598a537998615c760ca06bd8d3894150c6a Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sun, 15 May 2016 15:44:04 +0800 Subject: [PATCH 41/45] [hearthisat] Update the first test --- youtube_dl/extractor/hearthisat.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/hearthisat.py b/youtube_dl/extractor/hearthisat.py index ac42ef414..256453882 100644 --- a/youtube_dl/extractor/hearthisat.py +++ b/youtube_dl/extractor/hearthisat.py @@ -27,7 +27,7 @@ class HearThisAtIE(InfoExtractor): 'title': 'Moofi - Dr. Kreep', 'thumbnail': 're:^https?://.*\.jpg$', 'timestamp': 1421564134, - 'description': 'Creepy Patch. Mutable Instruments Braids Vowel + Formant Mode.', + 'description': 'Listen to Dr. Kreep by Moofi on hearthis.at - Modular, Eurorack, Mutable Intruments Braids, Valhalla-DSP', 'upload_date': '20150118', 'comment_count': int, 'view_count': int, From a0a81918f18252805b161e4f7d0dc4924b672948 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 15 May 2016 22:07:51 +0600 Subject: [PATCH 42/45] [collegehumor] Remove extractor It now uses brightcove --- youtube_dl/extractor/collegehumor.py | 101 --------------------------- youtube_dl/extractor/extractors.py | 1 - 2 files changed, 102 deletions(-) delete mode 100644 youtube_dl/extractor/collegehumor.py diff --git a/youtube_dl/extractor/collegehumor.py b/youtube_dl/extractor/collegehumor.py deleted file mode 100644 index 002b24037..000000000 --- a/youtube_dl/extractor/collegehumor.py +++ /dev/null @@ -1,101 +0,0 @@ -from __future__ import unicode_literals - -import json -import re - -from .common import InfoExtractor -from ..utils import int_or_none - - -class CollegeHumorIE(InfoExtractor): - _VALID_URL = r'^(?:https?://)?(?:www\.)?collegehumor\.com/(video|embed|e)/(?P<videoid>[0-9]+)/?(?P<shorttitle>.*)$' - - _TESTS = [ - { - 'url': 'http://www.collegehumor.com/video/6902724/comic-con-cosplay-catastrophe', - 'md5': 'dcc0f5c1c8be98dc33889a191f4c26bd', - 'info_dict': { - 'id': '6902724', - 'ext': 'mp4', - 'title': 'Comic-Con Cosplay Catastrophe', - 'description': "Fans get creative this year at San Diego. Too creative. And yes, that's really Joss Whedon.", - 'age_limit': 13, - 'duration': 187, - }, - }, { - 'url': 'http://www.collegehumor.com/video/3505939/font-conference', - 'md5': '72fa701d8ef38664a4dbb9e2ab721816', - 'info_dict': { - 'id': '3505939', - 'ext': 'mp4', - 'title': 'Font Conference', - 'description': "This video wasn't long enough, so we made it double-spaced.", - 'age_limit': 10, - 'duration': 179, - }, - }, { - # embedded youtube video - 'url': 'http://www.collegehumor.com/embed/6950306', - 'info_dict': { - 'id': 'Z-bao9fg6Yc', - 'ext': 'mp4', - 'title': 'Young Americans Think President John F. Kennedy Died THIS MORNING IN A CAR ACCIDENT!!!', - 'uploader': 'Mark Dice', - 'uploader_id': 'MarkDice', - 'description': 'md5:62c3dab9351fac7bb44b53b69511d87f', - 'upload_date': '20140127', - }, - 'params': { - 'skip_download': True, - }, - 'add_ie': ['Youtube'], - }, - ] - - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('videoid') - - jsonUrl = 'http://www.collegehumor.com/moogaloop/video/' + video_id + '.json' - data = json.loads(self._download_webpage( - jsonUrl, video_id, 'Downloading info JSON')) - vdata = data['video'] - if vdata.get('youtubeId') is not None: - return { - '_type': 'url', - 'url': vdata['youtubeId'], - 'ie_key': 'Youtube', - } - - AGE_LIMITS = {'nc17': 18, 'r': 18, 'pg13': 13, 'pg': 10, 'g': 0} - rating = vdata.get('rating') - if rating: - age_limit = AGE_LIMITS.get(rating.lower()) - else: - age_limit = None # None = No idea - - PREFS = {'high_quality': 2, 'low_quality': 0} - formats = [] - for format_key in ('mp4', 'webm'): - for qname, qurl in vdata.get(format_key, {}).items(): - formats.append({ - 'format_id': format_key + '_' + qname, - 'url': qurl, - 'format': format_key, - 'preference': PREFS.get(qname), - }) - self._sort_formats(formats) - - duration = int_or_none(vdata.get('duration'), 1000) - like_count = int_or_none(vdata.get('likes')) - - return { - 'id': video_id, - 'title': vdata['title'], - 'description': vdata.get('description'), - 'thumbnail': vdata.get('thumbnail'), - 'formats': formats, - 'age_limit': age_limit, - 'duration': duration, - 'like_count': like_count, - } diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 2db3b3c3f..ca9d85e33 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -138,7 +138,6 @@ from .cnn import ( CNNBlogsIE, CNNArticleIE, ) -from .collegehumor import CollegeHumorIE from .collegerama import CollegeRamaIE from .comedycentral import ComedyCentralIE, ComedyCentralShowsIE from .comcarcoff import ComCarCoffIE From f7199423e542580cf8c30991d122673276113497 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 16 May 2016 00:30:13 +0600 Subject: [PATCH 43/45] [groupon] Add support for Youtube embeds (Closes #9508) --- youtube_dl/extractor/groupon.py | 31 ++++++++++++++++++++++--------- 1 file changed, 22 insertions(+), 9 deletions(-) diff --git a/youtube_dl/extractor/groupon.py b/youtube_dl/extractor/groupon.py index f6b69662b..1dd0a81cc 100644 --- a/youtube_dl/extractor/groupon.py +++ b/youtube_dl/extractor/groupon.py @@ -4,7 +4,7 @@ from .common import InfoExtractor class GrouponIE(InfoExtractor): - _VALID_URL = r'https?://www\.groupon\.com/deals/(?P<id>[^?#]+)' + _VALID_URL = r'https?://(?:www\.)?groupon\.com/deals/(?P<id>[^/?#&]+)' _TEST = { 'url': 'https://www.groupon.com/deals/bikram-yoga-huntington-beach-2#ooid=tubGNycTo_9Uxg82uESj4i61EYX8nyuf', @@ -15,18 +15,26 @@ class GrouponIE(InfoExtractor): }, 'playlist': [{ 'info_dict': { - 'id': 'tubGNycTo_9Uxg82uESj4i61EYX8nyuf', - 'ext': 'flv', - 'title': 'Bikram Yoga Huntington Beach | Orange County', + 'id': 'fk6OhWpXgIQ', + 'ext': 'mp4', + 'title': 'Bikram Yoga Huntington Beach | Orange County !tubGNycTo@9Uxg82uESj4i61EYX8nyuf', 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e', - 'duration': 44.961, + 'duration': 45, + 'upload_date': '20160405', + 'uploader_id': 'groupon', + 'uploader': 'Groupon', }, }], 'params': { - 'skip_download': 'HDS', + 'skip_download': True, } } + _PROVIDERS = { + 'ooyala': ('ooyala:%s', 'Ooyala'), + 'youtube': ('%s', 'Youtube'), + } + def _real_extract(self, url): playlist_id = self._match_id(url) webpage = self._download_webpage(url, playlist_id) @@ -36,12 +44,17 @@ class GrouponIE(InfoExtractor): videos = payload['carousel'].get('dealVideos', []) entries = [] for v in videos: - if v.get('provider') != 'OOYALA': + provider = v.get('provider') + video_id = v.get('media') or v.get('id') or v.get('baseURL') + if not provider or not video_id: + continue + url_pattern, ie_key = self._PROVIDERS.get(provider.lower()) + if not url_pattern: self.report_warning( '%s: Unsupported video provider %s, skipping video' % - (playlist_id, v.get('provider'))) + (playlist_id, provider)) continue - entries.append(self.url_result('ooyala:%s' % v['media'])) + entries.append(self.url_result(url_pattern % video_id, ie_key)) return { '_type': 'playlist', From 36755d9d694f818ce8f367ce7eb41374f194893d Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Mon, 16 May 2016 17:25:47 +0200 Subject: [PATCH 44/45] release 2016.05.16 --- .github/ISSUE_TEMPLATE.md | 6 +++--- docs/supportedsites.md | 7 +++---- youtube_dl/version.py | 2 +- 3 files changed, 7 insertions(+), 8 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 1fb878b59..7024fc729 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.05.10*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.05.10** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.05.16*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.05.16** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v <your command line> [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2016.05.10 +[debug] youtube-dl version 2016.05.16 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/docs/supportedsites.md b/docs/supportedsites.md index de84e5c84..29db13883 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -6,6 +6,7 @@ - **22tracks:genre** - **22tracks:track** - **24video** + - **3qsdn**: 3Q SDN - **3sat** - **4tube** - **56.com** @@ -114,7 +115,6 @@ - **chirbit** - **chirbit:profile** - **Cinchcast** - - **Cinemassacre** - **Clipfish** - **cliphunter** - **ClipRs** @@ -128,7 +128,6 @@ - **CNN** - **CNNArticle** - **CNNBlogs** - - **CollegeHumor** - **CollegeRama** - **ComCarCoff** - **ComedyCentral** @@ -680,7 +679,6 @@ - **tvp.pl:Series** - **TVPlay**: TV3Play and related services - **Tweakers** - - **twitch:bookmarks** - **twitch:chapter** - **twitch:past_broadcasts** - **twitch:profile** @@ -698,7 +696,8 @@ - **USAToday** - **ustream** - **ustream:channel** - - **Ustudio** + - **ustudio** + - **ustudio:embed** - **Varzesh3** - **Vbox7** - **VeeHD** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 45e40c0d1..5a0fdd6ce 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2016.05.10' +__version__ = '2016.05.16' From cdd94c2eae6c6f0a627d457c3a73894a62eb86c5 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Tue, 17 May 2016 14:38:15 +0800 Subject: [PATCH 45/45] [utils] Check for None values in SOCKS proxy Originally reported at https://github.com/rg3/youtube-dl/pull/9287#issuecomment-219617864 --- youtube_dl/utils.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 24e74428b..ac60ba18c 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -883,12 +883,17 @@ def make_socks_conn_class(base_class, socks_proxy): elif url_components.scheme.lower() == 'socks4a': socks_type = ProxyType.SOCKS4A + def unquote_if_non_empty(s): + if not s: + return s + return compat_urllib_parse_unquote_plus(s) + proxy_args = ( socks_type, url_components.hostname, url_components.port or 1080, True, # Remote DNS - compat_urllib_parse_unquote_plus(url_components.username), - compat_urllib_parse_unquote_plus(url_components.password), + unquote_if_non_empty(url_components.username), + unquote_if_non_empty(url_components.password), ) class SocksConnection(base_class):