From f206126df090d78f30426321473ebd566c3b7866 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 23 Jan 2018 21:53:01 +0700 Subject: [PATCH 01/17] [compat] Add compat_b64decode --- youtube_dl/compat.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/youtube_dl/compat.py b/youtube_dl/compat.py index 41ca9adf1..646c9d79c 100644 --- a/youtube_dl/compat.py +++ b/youtube_dl/compat.py @@ -1,6 +1,7 @@ # coding: utf-8 from __future__ import unicode_literals +import base64 import binascii import collections import ctypes @@ -2908,6 +2909,16 @@ except ImportError: # not 2.6+ or is 3.x except ImportError: compat_zip = zip + +if sys.version_info < (3, 3): + def compat_b64decode(s, *args, **kwargs): + if isinstance(s, compat_str): + s = s.encode('ascii') + return base64.b64decode(s, *args, **kwargs) +else: + compat_b64decode = base64.b64decode + + if platform.python_implementation() == 'PyPy' and sys.pypy_version_info < (5, 4, 0): # PyPy2 prior to version 5.4.0 expects byte strings as Windows function # names, see the original PyPy issue [1] and the youtube-dl one [2]. @@ -2930,6 +2941,7 @@ __all__ = [ 'compat_HTMLParseError', 'compat_HTMLParser', 'compat_HTTPError', + 'compat_b64decode', 'compat_basestring', 'compat_chr', 'compat_cookiejar', From 5d7d805ca90992cac1cdffbe5d3df3d894d2b979 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 23 Jan 2018 21:53:45 +0700 Subject: [PATCH 02/17] [mixcloud] Use compat_b64decode (closes #15394) --- youtube_dl/extractor/mixcloud.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/mixcloud.py b/youtube_dl/extractor/mixcloud.py index 7b2bb6e20..785b99bc3 100644 --- a/youtube_dl/extractor/mixcloud.py +++ b/youtube_dl/extractor/mixcloud.py @@ -7,6 +7,7 @@ import re from .common import InfoExtractor from ..compat import ( + compat_b64decode, compat_chr, compat_ord, compat_str, @@ -79,7 +80,7 @@ class MixcloudIE(InfoExtractor): if encrypted_play_info is not None: # Decode - encrypted_play_info = base64.b64decode(encrypted_play_info) + encrypted_play_info = compat_b64decode(encrypted_play_info) else: # New path full_info_json = self._parse_json(self._html_search_regex( @@ -109,7 +110,7 @@ class MixcloudIE(InfoExtractor): kpa_target = encrypted_play_info else: kps = ['https://', 'http://'] - kpa_target = base64.b64decode(info_json['streamInfo']['url']) + kpa_target = compat_b64decode(info_json['streamInfo']['url']) for kp in kps: partial_key = self._decrypt_xor_cipher(kpa_target, kp) for quote in ["'", '"']: @@ -165,7 +166,7 @@ class MixcloudIE(InfoExtractor): format_url = stream_info.get(url_key) if not format_url: continue - decrypted = self._decrypt_xor_cipher(key, base64.b64decode(format_url)) + decrypted = self._decrypt_xor_cipher(key, compat_b64decode(format_url)) if not decrypted: continue if url_key == 'hlsUrl': From cf2820710d61742818a906af07f6d6c9669d58a9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 23 Jan 2018 22:23:12 +0700 Subject: [PATCH 03/17] Switch codebase to use compat_b64decode --- youtube_dl/aes.py | 4 ++-- youtube_dl/downloader/f4m.py | 6 +++--- youtube_dl/extractor/adn.py | 10 ++++++---- youtube_dl/extractor/bigflix.py | 10 ++++++---- youtube_dl/extractor/chilloutzone.py | 4 ++-- youtube_dl/extractor/chirbit.py | 5 ++--- youtube_dl/extractor/crunchyroll.py | 6 +++--- youtube_dl/extractor/daisuki.py | 3 ++- youtube_dl/extractor/dumpert.py | 4 ++-- youtube_dl/extractor/einthusan.py | 8 ++++---- youtube_dl/extractor/hotnewhiphop.py | 5 ++--- youtube_dl/extractor/infoq.py | 5 ++--- youtube_dl/extractor/leeco.py | 4 ++-- youtube_dl/extractor/mangomolo.py | 11 +++++------ youtube_dl/extractor/mixcloud.py | 1 - youtube_dl/extractor/ooyala.py | 11 +++++++---- youtube_dl/extractor/rtl2.py | 8 ++++---- youtube_dl/extractor/rtve.py | 3 ++- youtube_dl/extractor/shared.py | 10 ++++------ youtube_dl/extractor/teamcoco.py | 8 +++++--- youtube_dl/extractor/tutv.py | 9 +++++---- 21 files changed, 70 insertions(+), 65 deletions(-) diff --git a/youtube_dl/aes.py b/youtube_dl/aes.py index c5bb3c4ef..461bb6d41 100644 --- a/youtube_dl/aes.py +++ b/youtube_dl/aes.py @@ -1,8 +1,8 @@ from __future__ import unicode_literals -import base64 from math import ceil +from .compat import compat_b64decode from .utils import bytes_to_intlist, intlist_to_bytes BLOCK_SIZE_BYTES = 16 @@ -180,7 +180,7 @@ def aes_decrypt_text(data, password, key_size_bytes): """ NONCE_LENGTH_BYTES = 8 - data = bytes_to_intlist(base64.b64decode(data.encode('utf-8'))) + data = bytes_to_intlist(compat_b64decode(data)) password = bytes_to_intlist(password.encode('utf-8')) key = password[:key_size_bytes] + [0] * (key_size_bytes - len(password)) diff --git a/youtube_dl/downloader/f4m.py b/youtube_dl/downloader/f4m.py index fdb80f42a..15e71be9a 100644 --- a/youtube_dl/downloader/f4m.py +++ b/youtube_dl/downloader/f4m.py @@ -1,12 +1,12 @@ from __future__ import division, unicode_literals -import base64 import io import itertools import time from .fragment import FragmentFD from ..compat import ( + compat_b64decode, compat_etree_fromstring, compat_urlparse, compat_urllib_error, @@ -312,7 +312,7 @@ class F4mFD(FragmentFD): boot_info = self._get_bootstrap_from_url(bootstrap_url) else: bootstrap_url = None - bootstrap = base64.b64decode(node.text.encode('ascii')) + bootstrap = compat_b64decode(node.text) boot_info = read_bootstrap_info(bootstrap) return boot_info, bootstrap_url @@ -349,7 +349,7 @@ class F4mFD(FragmentFD): live = boot_info['live'] metadata_node = media.find(_add_ns('metadata')) if metadata_node is not None: - metadata = base64.b64decode(metadata_node.text.encode('ascii')) + metadata = compat_b64decode(metadata_node.text) else: metadata = None diff --git a/youtube_dl/extractor/adn.py b/youtube_dl/extractor/adn.py index cffdab6ca..64fb755da 100644 --- a/youtube_dl/extractor/adn.py +++ b/youtube_dl/extractor/adn.py @@ -1,13 +1,15 @@ # coding: utf-8 from __future__ import unicode_literals -import base64 import json import os from .common import InfoExtractor from ..aes import aes_cbc_decrypt -from ..compat import compat_ord +from ..compat import ( + compat_b64decode, + compat_ord, +) from ..utils import ( bytes_to_intlist, ExtractorError, @@ -48,9 +50,9 @@ class ADNIE(InfoExtractor): # http://animedigitalnetwork.fr/components/com_vodvideo/videojs/adn-vjs.min.js dec_subtitles = intlist_to_bytes(aes_cbc_decrypt( - bytes_to_intlist(base64.b64decode(enc_subtitles[24:])), + bytes_to_intlist(compat_b64decode(enc_subtitles[24:])), bytes_to_intlist(b'\x1b\xe0\x29\x61\x38\x94\x24\x00\x12\xbd\xc5\x80\xac\xce\xbe\xb0'), - bytes_to_intlist(base64.b64decode(enc_subtitles[:24])) + bytes_to_intlist(compat_b64decode(enc_subtitles[:24])) )) subtitles_json = self._parse_json( dec_subtitles[:-compat_ord(dec_subtitles[-1])].decode(), diff --git a/youtube_dl/extractor/bigflix.py b/youtube_dl/extractor/bigflix.py index b4ce767af..28e3e59f6 100644 --- a/youtube_dl/extractor/bigflix.py +++ b/youtube_dl/extractor/bigflix.py @@ -1,11 +1,13 @@ # coding: utf-8 from __future__ import unicode_literals -import base64 import re from .common import InfoExtractor -from ..compat import compat_urllib_parse_unquote +from ..compat import ( + compat_b64decode, + compat_urllib_parse_unquote, +) class BigflixIE(InfoExtractor): @@ -39,8 +41,8 @@ class BigflixIE(InfoExtractor): webpage, 'title') def decode_url(quoted_b64_url): - return base64.b64decode(compat_urllib_parse_unquote( - quoted_b64_url).encode('ascii')).decode('utf-8') + return compat_b64decode(compat_urllib_parse_unquote( + quoted_b64_url)).decode('utf-8') formats = [] for height, encoded_url in re.findall( diff --git a/youtube_dl/extractor/chilloutzone.py b/youtube_dl/extractor/chilloutzone.py index d4769da75..5aac21299 100644 --- a/youtube_dl/extractor/chilloutzone.py +++ b/youtube_dl/extractor/chilloutzone.py @@ -1,11 +1,11 @@ from __future__ import unicode_literals import re -import base64 import json from .common import InfoExtractor from .youtube import YoutubeIE +from ..compat import compat_b64decode from ..utils import ( clean_html, ExtractorError @@ -58,7 +58,7 @@ class ChilloutzoneIE(InfoExtractor): base64_video_info = self._html_search_regex( r'var cozVidData = "(.+?)";', webpage, 'video data') - decoded_video_info = base64.b64decode(base64_video_info.encode('utf-8')).decode('utf-8') + decoded_video_info = compat_b64decode(base64_video_info).decode('utf-8') video_info_dict = json.loads(decoded_video_info) # get video information from dict diff --git a/youtube_dl/extractor/chirbit.py b/youtube_dl/extractor/chirbit.py index 4815b34be..8d75cdf19 100644 --- a/youtube_dl/extractor/chirbit.py +++ b/youtube_dl/extractor/chirbit.py @@ -1,10 +1,10 @@ # coding: utf-8 from __future__ import unicode_literals -import base64 import re from .common import InfoExtractor +from ..compat import compat_b64decode from ..utils import parse_duration @@ -44,8 +44,7 @@ class ChirbitIE(InfoExtractor): # Reverse engineered from https://chirb.it/js/chirbit.player.js (look # for soundURL) - audio_url = base64.b64decode( - data_fd[::-1].encode('ascii')).decode('utf-8') + audio_url = compat_b64decode(data_fd[::-1]).decode('utf-8') title = self._search_regex( r'class=["\']chirbit-title["\'][^>]*>([^<]+)', webpage, 'title') diff --git a/youtube_dl/extractor/crunchyroll.py b/youtube_dl/extractor/crunchyroll.py index b92f25447..3efdc8c21 100644 --- a/youtube_dl/extractor/crunchyroll.py +++ b/youtube_dl/extractor/crunchyroll.py @@ -3,13 +3,13 @@ from __future__ import unicode_literals import re import json -import base64 import zlib from hashlib import sha1 from math import pow, sqrt, floor from .common import InfoExtractor from ..compat import ( + compat_b64decode, compat_etree_fromstring, compat_urllib_parse_urlencode, compat_urllib_request, @@ -272,8 +272,8 @@ class CrunchyrollIE(CrunchyrollBaseIE): } def _decrypt_subtitles(self, data, iv, id): - data = bytes_to_intlist(base64.b64decode(data.encode('utf-8'))) - iv = bytes_to_intlist(base64.b64decode(iv.encode('utf-8'))) + data = bytes_to_intlist(compat_b64decode(data)) + iv = bytes_to_intlist(compat_b64decode(iv)) id = int(id) def obfuscate_key_aux(count, modulo, start): diff --git a/youtube_dl/extractor/daisuki.py b/youtube_dl/extractor/daisuki.py index 5c9ac68a0..dbc1aa5d4 100644 --- a/youtube_dl/extractor/daisuki.py +++ b/youtube_dl/extractor/daisuki.py @@ -10,6 +10,7 @@ from ..aes import ( aes_cbc_decrypt, aes_cbc_encrypt, ) +from ..compat import compat_b64decode from ..utils import ( bytes_to_intlist, bytes_to_long, @@ -93,7 +94,7 @@ class DaisukiMottoIE(InfoExtractor): rtn = self._parse_json( intlist_to_bytes(aes_cbc_decrypt(bytes_to_intlist( - base64.b64decode(encrypted_rtn)), + compat_b64decode(encrypted_rtn)), aes_key, iv)).decode('utf-8').rstrip('\0'), video_id) diff --git a/youtube_dl/extractor/dumpert.py b/youtube_dl/extractor/dumpert.py index c9fc9b5a9..be2e3d378 100644 --- a/youtube_dl/extractor/dumpert.py +++ b/youtube_dl/extractor/dumpert.py @@ -1,10 +1,10 @@ # coding: utf-8 from __future__ import unicode_literals -import base64 import re from .common import InfoExtractor +from ..compat import compat_b64decode from ..utils import ( qualities, sanitized_Request, @@ -42,7 +42,7 @@ class DumpertIE(InfoExtractor): r'data-files="([^"]+)"', webpage, 'data files') files = self._parse_json( - base64.b64decode(files_base64.encode('utf-8')).decode('utf-8'), + compat_b64decode(files_base64).decode('utf-8'), video_id) quality = qualities(['flv', 'mobile', 'tablet', '720p']) diff --git a/youtube_dl/extractor/einthusan.py b/youtube_dl/extractor/einthusan.py index 3f6268637..4485bf8c1 100644 --- a/youtube_dl/extractor/einthusan.py +++ b/youtube_dl/extractor/einthusan.py @@ -1,13 +1,13 @@ # coding: utf-8 from __future__ import unicode_literals -import base64 import json from .common import InfoExtractor from ..compat import ( - compat_urlparse, + compat_b64decode, compat_str, + compat_urlparse, ) from ..utils import ( extract_attributes, @@ -36,9 +36,9 @@ class EinthusanIE(InfoExtractor): # reversed from jsoncrypto.prototype.decrypt() in einthusan-PGMovieWatcher.js def _decrypt(self, encrypted_data, video_id): - return self._parse_json(base64.b64decode(( + return self._parse_json(compat_b64decode(( encrypted_data[:10] + encrypted_data[-1] + encrypted_data[12:-1] - ).encode('ascii')).decode('utf-8'), video_id) + )).decode('utf-8'), video_id) def _real_extract(self, url): video_id = self._match_id(url) diff --git a/youtube_dl/extractor/hotnewhiphop.py b/youtube_dl/extractor/hotnewhiphop.py index 34163725f..4703e1894 100644 --- a/youtube_dl/extractor/hotnewhiphop.py +++ b/youtube_dl/extractor/hotnewhiphop.py @@ -1,8 +1,7 @@ from __future__ import unicode_literals -import base64 - from .common import InfoExtractor +from ..compat import compat_b64decode from ..utils import ( ExtractorError, HEADRequest, @@ -48,7 +47,7 @@ class HotNewHipHopIE(InfoExtractor): if 'mediaKey' not in mkd: raise ExtractorError('Did not get a media key') - redirect_url = base64.b64decode(video_url_base64).decode('utf-8') + redirect_url = compat_b64decode(video_url_base64).decode('utf-8') redirect_req = HEADRequest(redirect_url) req = self._request_webpage( redirect_req, video_id, diff --git a/youtube_dl/extractor/infoq.py b/youtube_dl/extractor/infoq.py index c3e892feb..391c2f5d0 100644 --- a/youtube_dl/extractor/infoq.py +++ b/youtube_dl/extractor/infoq.py @@ -2,9 +2,8 @@ from __future__ import unicode_literals -import base64 - from ..compat import ( + compat_b64decode, compat_urllib_parse_unquote, compat_urlparse, ) @@ -61,7 +60,7 @@ class InfoQIE(BokeCCBaseIE): encoded_id = self._search_regex( r"jsclassref\s*=\s*'([^']*)'", webpage, 'encoded id', default=None) - real_id = compat_urllib_parse_unquote(base64.b64decode(encoded_id.encode('ascii')).decode('utf-8')) + real_id = compat_urllib_parse_unquote(compat_b64decode(encoded_id).decode('utf-8')) playpath = 'mp4:' + real_id return [{ diff --git a/youtube_dl/extractor/leeco.py b/youtube_dl/extractor/leeco.py index 0a07c1320..ffe10154b 100644 --- a/youtube_dl/extractor/leeco.py +++ b/youtube_dl/extractor/leeco.py @@ -1,7 +1,6 @@ # coding: utf-8 from __future__ import unicode_literals -import base64 import datetime import hashlib import re @@ -9,6 +8,7 @@ import time from .common import InfoExtractor from ..compat import ( + compat_b64decode, compat_ord, compat_str, compat_urllib_parse_urlencode, @@ -329,7 +329,7 @@ class LetvCloudIE(InfoExtractor): raise ExtractorError('Letv cloud returned an unknwon error') def b64decode(s): - return base64.b64decode(s.encode('utf-8')).decode('utf-8') + return compat_b64decode(s).decode('utf-8') formats = [] for media in play_json['data']['video_info']['media'].values(): diff --git a/youtube_dl/extractor/mangomolo.py b/youtube_dl/extractor/mangomolo.py index dbd761a67..482175a34 100644 --- a/youtube_dl/extractor/mangomolo.py +++ b/youtube_dl/extractor/mangomolo.py @@ -1,13 +1,12 @@ # coding: utf-8 from __future__ import unicode_literals -import base64 - from .common import InfoExtractor -from ..compat import compat_urllib_parse_unquote -from ..utils import ( - int_or_none, +from ..compat import ( + compat_b64decode, + compat_urllib_parse_unquote, ) +from ..utils import int_or_none class MangomoloBaseIE(InfoExtractor): @@ -51,4 +50,4 @@ class MangomoloLiveIE(MangomoloBaseIE): _IS_LIVE = True def _get_real_id(self, page_id): - return base64.b64decode(compat_urllib_parse_unquote(page_id).encode()).decode() + return compat_b64decode(compat_urllib_parse_unquote(page_id)).decode() diff --git a/youtube_dl/extractor/mixcloud.py b/youtube_dl/extractor/mixcloud.py index 785b99bc3..a56b7690f 100644 --- a/youtube_dl/extractor/mixcloud.py +++ b/youtube_dl/extractor/mixcloud.py @@ -1,6 +1,5 @@ from __future__ import unicode_literals -import base64 import functools import itertools import re diff --git a/youtube_dl/extractor/ooyala.py b/youtube_dl/extractor/ooyala.py index 52580baed..ad8bf03f8 100644 --- a/youtube_dl/extractor/ooyala.py +++ b/youtube_dl/extractor/ooyala.py @@ -1,9 +1,13 @@ from __future__ import unicode_literals + import re -import base64 from .common import InfoExtractor -from ..compat import compat_str +from ..compat import ( + compat_b64decode, + compat_str, + compat_urllib_parse_urlencode, +) from ..utils import ( determine_ext, ExtractorError, @@ -12,7 +16,6 @@ from ..utils import ( try_get, unsmuggle_url, ) -from ..compat import compat_urllib_parse_urlencode class OoyalaBaseIE(InfoExtractor): @@ -44,7 +47,7 @@ class OoyalaBaseIE(InfoExtractor): url_data = try_get(stream, lambda x: x['url']['data'], compat_str) if not url_data: continue - s_url = base64.b64decode(url_data.encode('ascii')).decode('utf-8') + s_url = compat_b64decode(url_data).decode('utf-8') if not s_url or s_url in urls: continue urls.append(s_url) diff --git a/youtube_dl/extractor/rtl2.py b/youtube_dl/extractor/rtl2.py index 666e90e90..18a327d81 100644 --- a/youtube_dl/extractor/rtl2.py +++ b/youtube_dl/extractor/rtl2.py @@ -1,12 +1,12 @@ # coding: utf-8 from __future__ import unicode_literals -import base64 import re from .common import InfoExtractor from ..aes import aes_cbc_decrypt from ..compat import ( + compat_b64decode, compat_ord, compat_str, ) @@ -142,11 +142,11 @@ class RTL2YouIE(RTL2YouBaseIE): stream_data = self._download_json( self._BACKWERK_BASE_URL + 'stream/video/' + video_id, video_id) - data, iv = base64.b64decode(stream_data['streamUrl']).decode().split(':') + data, iv = compat_b64decode(stream_data['streamUrl']).decode().split(':') stream_url = intlist_to_bytes(aes_cbc_decrypt( - bytes_to_intlist(base64.b64decode(data)), + bytes_to_intlist(compat_b64decode(data)), bytes_to_intlist(self._AES_KEY), - bytes_to_intlist(base64.b64decode(iv)) + bytes_to_intlist(compat_b64decode(iv)) )) if b'rtl2_you_video_not_found' in stream_url: raise ExtractorError('video not found', expected=True) diff --git a/youtube_dl/extractor/rtve.py b/youtube_dl/extractor/rtve.py index fa60ffd5e..ce9db0629 100644 --- a/youtube_dl/extractor/rtve.py +++ b/youtube_dl/extractor/rtve.py @@ -7,6 +7,7 @@ import time from .common import InfoExtractor from ..compat import ( + compat_b64decode, compat_struct_unpack, ) from ..utils import ( @@ -21,7 +22,7 @@ from ..utils import ( def _decrypt_url(png): - encrypted_data = base64.b64decode(png.encode('utf-8')) + encrypted_data = compat_b64decode(png) text_index = encrypted_data.find(b'tEXt') text_chunk = encrypted_data[text_index - 4:] length = compat_struct_unpack('!I', text_chunk[:4])[0] diff --git a/youtube_dl/extractor/shared.py b/youtube_dl/extractor/shared.py index 89e19e927..b2250afdd 100644 --- a/youtube_dl/extractor/shared.py +++ b/youtube_dl/extractor/shared.py @@ -1,8 +1,7 @@ from __future__ import unicode_literals -import base64 - from .common import InfoExtractor +from ..compat import compat_b64decode from ..utils import ( ExtractorError, int_or_none, @@ -22,8 +21,8 @@ class SharedBaseIE(InfoExtractor): video_url = self._extract_video_url(webpage, video_id, url) - title = base64.b64decode(self._html_search_meta( - 'full:title', webpage, 'title').encode('utf-8')).decode('utf-8') + title = compat_b64decode(self._html_search_meta( + 'full:title', webpage, 'title')).decode('utf-8') filesize = int_or_none(self._html_search_meta( 'full:size', webpage, 'file size', fatal=False)) @@ -92,5 +91,4 @@ class VivoIE(SharedBaseIE): r'InitializeStream\s*\(\s*(["\'])(?P(?:(?!\1).)+)\1', webpage, 'stream', group='url'), video_id, - transform_source=lambda x: base64.b64decode( - x.encode('ascii')).decode('utf-8'))[0] + transform_source=lambda x: compat_b64decode(x).decode('utf-8'))[0] diff --git a/youtube_dl/extractor/teamcoco.py b/youtube_dl/extractor/teamcoco.py index 75346393b..9056c8cbc 100644 --- a/youtube_dl/extractor/teamcoco.py +++ b/youtube_dl/extractor/teamcoco.py @@ -1,18 +1,20 @@ # coding: utf-8 from __future__ import unicode_literals -import base64 import binascii import re import json from .common import InfoExtractor +from ..compat import ( + compat_b64decode, + compat_ord, +) from ..utils import ( ExtractorError, qualities, determine_ext, ) -from ..compat import compat_ord class TeamcocoIE(InfoExtractor): @@ -97,7 +99,7 @@ class TeamcocoIE(InfoExtractor): for i in range(len(cur_fragments)): cur_sequence = (''.join(cur_fragments[i:] + cur_fragments[:i])).encode('ascii') try: - raw_data = base64.b64decode(cur_sequence) + raw_data = compat_b64decode(cur_sequence) if compat_ord(raw_data[0]) == compat_ord('{'): return json.loads(raw_data.decode('utf-8')) except (TypeError, binascii.Error, UnicodeDecodeError, ValueError): diff --git a/youtube_dl/extractor/tutv.py b/youtube_dl/extractor/tutv.py index 822372ea1..362318b24 100644 --- a/youtube_dl/extractor/tutv.py +++ b/youtube_dl/extractor/tutv.py @@ -1,9 +1,10 @@ from __future__ import unicode_literals -import base64 - from .common import InfoExtractor -from ..compat import compat_parse_qs +from ..compat import ( + compat_b64decode, + compat_parse_qs, +) class TutvIE(InfoExtractor): @@ -26,7 +27,7 @@ class TutvIE(InfoExtractor): data_content = self._download_webpage( 'http://tu.tv/flvurl.php?codVideo=%s' % internal_id, video_id, 'Downloading video info') - video_url = base64.b64decode(compat_parse_qs(data_content)['kpt'][0].encode('utf-8')).decode('utf-8') + video_url = compat_b64decode(compat_parse_qs(data_content)['kpt'][0]).decode('utf-8') return { 'id': internal_id, From dc400ed6a2f79977cda7968b626b1ead35523b37 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Tue, 23 Jan 2018 19:06:46 +0100 Subject: [PATCH 04/17] [tbs] update tokenizer url(fixes #15395) --- youtube_dl/extractor/tbs.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/tbs.py b/youtube_dl/extractor/tbs.py index eab22c38f..edc31729d 100644 --- a/youtube_dl/extractor/tbs.py +++ b/youtube_dl/extractor/tbs.py @@ -58,7 +58,7 @@ class TBSIE(TurnerBaseIE): continue if stream_data.get('playlistProtection') == 'spe': m3u8_url = self._add_akamai_spe_token( - 'http://www.%s.com/service/token_spe' % site, + 'http://token.vgtf.net/token/token_spe', m3u8_url, media_id, { 'url': url, 'site_name': site[:3].upper(), From 967ebbdb6cdb655815f73482763ed8f6eeff5c96 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Tue, 23 Jan 2018 19:22:44 +0100 Subject: [PATCH 05/17] [prosiebensat1] add another clip ID regexp(fixes #15378) --- youtube_dl/extractor/prosiebensat1.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/prosiebensat1.py b/youtube_dl/extractor/prosiebensat1.py index 7e680a728..48757fd4f 100644 --- a/youtube_dl/extractor/prosiebensat1.py +++ b/youtube_dl/extractor/prosiebensat1.py @@ -345,6 +345,7 @@ class ProSiebenSat1IE(ProSiebenSat1BaseIE): r'clip[iI]d\s*=\s*["\'](\d+)', r"'itemImageUrl'\s*:\s*'/dynamic/thumbnails/full/\d+/(\d+)", r'proMamsId"\s*:\s*"(\d+)', + r'proMamsId"\s*:\s*"(\d+)', ] _TITLE_REGEXES = [ r'

\s*(.+?)

', From 837b0617100f20b40a4b831439263c131a59aadb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 24 Jan 2018 22:41:25 +0700 Subject: [PATCH 06/17] [teachertube] Fix and relax thumbnail extraction (closes #15403) --- youtube_dl/extractor/teachertube.py | 31 ++++++++++------------------- 1 file changed, 11 insertions(+), 20 deletions(-) diff --git a/youtube_dl/extractor/teachertube.py b/youtube_dl/extractor/teachertube.py index f14713a78..0f3231a91 100644 --- a/youtube_dl/extractor/teachertube.py +++ b/youtube_dl/extractor/teachertube.py @@ -17,6 +17,7 @@ class TeacherTubeIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?teachertube\.com/(viewVideo\.php\?video_id=|music\.php\?music_id=|video/(?:[\da-z-]+-)?|audio/)(?P\d+)' _TESTS = [{ + # flowplayer 'url': 'http://www.teachertube.com/viewVideo.php?video_id=339997', 'md5': 'f9434ef992fd65936d72999951ee254c', 'info_dict': { @@ -24,19 +25,10 @@ class TeacherTubeIE(InfoExtractor): 'ext': 'mp4', 'title': 'Measures of dispersion from a frequency table', 'description': 'Measures of dispersion from a frequency table', - 'thumbnail': r're:http://.*\.jpg', - }, - }, { - 'url': 'http://www.teachertube.com/viewVideo.php?video_id=340064', - 'md5': '0d625ec6bc9bf50f70170942ad580676', - 'info_dict': { - 'id': '340064', - 'ext': 'mp4', - 'title': 'How to Make Paper Dolls _ Paper Art Projects', - 'description': 'Learn how to make paper dolls in this simple', - 'thumbnail': r're:http://.*\.jpg', + 'thumbnail': r're:https?://.*\.(?:jpg|png)', }, }, { + # jwplayer 'url': 'http://www.teachertube.com/music.php?music_id=8805', 'md5': '01e8352006c65757caf7b961f6050e21', 'info_dict': { @@ -46,14 +38,9 @@ class TeacherTubeIE(InfoExtractor): 'description': 'RADIJSKA EMISIJA ZRAKOPLOVNE TEHNI?KE ?KOLE P', }, }, { + # unavailable video 'url': 'http://www.teachertube.com/video/intro-video-schleicher-297790', - 'md5': '9c79fbb2dd7154823996fc28d4a26998', - 'info_dict': { - 'id': '297790', - 'ext': 'mp4', - 'title': 'Intro Video - Schleicher', - 'description': 'Intro Video - Why to flip, how flipping will', - }, + 'only_matching': True, }] def _real_extract(self, url): @@ -84,12 +71,16 @@ class TeacherTubeIE(InfoExtractor): self._sort_formats(formats) + thumbnail = self._og_search_thumbnail( + webpage, default=None) or self._html_search_meta( + 'thumbnail', webpage) + return { 'id': video_id, 'title': title, - 'thumbnail': self._html_search_regex(r'\'image\'\s*:\s*["\']([^"\']+)["\']', webpage, 'thumbnail'), - 'formats': formats, 'description': description, + 'thumbnail': thumbnail, + 'formats': formats, } From 9d6458a206b44db72fa810af80fb742fb647ff16 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 24 Jan 2018 22:46:04 +0700 Subject: [PATCH 07/17] [teachertube] Capture and output error message --- youtube_dl/extractor/teachertube.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/teachertube.py b/youtube_dl/extractor/teachertube.py index 0f3231a91..1272078c5 100644 --- a/youtube_dl/extractor/teachertube.py +++ b/youtube_dl/extractor/teachertube.py @@ -5,8 +5,9 @@ import re from .common import InfoExtractor from ..utils import ( - qualities, determine_ext, + ExtractorError, + qualities, ) @@ -47,6 +48,12 @@ class TeacherTubeIE(InfoExtractor): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) + error = self._search_regex( + r']+\bclass=["\']msgBox error[^>]+>([^<]+)', webpage, + 'error', default=None) + if error: + raise ExtractorError('%s said: %s' % (self.IE_NAME, error), expected=True) + title = self._html_search_meta('title', webpage, 'title', fatal=True) TITLE_SUFFIX = ' - TeacherTube' if title.endswith(TITLE_SUFFIX): From bbb7c3f7e92111d0d5060297db007ecb1047c2c8 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Thu, 25 Jan 2018 22:30:33 +0800 Subject: [PATCH 08/17] [youtube] Extract precise error messages (closes #15284) --- ChangeLog | 7 +++++++ youtube_dl/extractor/youtube.py | 17 +++++++++++++---- 2 files changed, 20 insertions(+), 4 deletions(-) diff --git a/ChangeLog b/ChangeLog index 65a01fcc7..4ee10ca7c 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,10 @@ +version + +Extractors + +* [youtube] Extract precise error messages (#15284) + + version 2018.01.21 Core diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index f698a5627..43051512b 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1596,6 +1596,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor): if 'token' not in video_info: video_info = get_video_info break + + def extract_unavailable_message(): + return self._html_search_regex( + r'(?s)]+id="unavailable-message"[^>]*>(.+?)', + video_webpage, 'unavailable message', default=None) + if 'token' not in video_info: if 'reason' in video_info: if 'The uploader has not made this video available in your country.' in video_info['reason']: @@ -1604,8 +1610,13 @@ class YoutubeIE(YoutubeBaseInfoExtractor): countries = regions_allowed.split(',') if regions_allowed else None self.raise_geo_restricted( msg=video_info['reason'][0], countries=countries) + reason = video_info['reason'][0] + if 'Invalid parameters' in reason: + unavailable_message = extract_unavailable_message() + if unavailable_message: + reason = unavailable_message raise ExtractorError( - 'YouTube said: %s' % video_info['reason'][0], + 'YouTube said: %s' % reason, expected=True, video_id=video_id) else: raise ExtractorError( @@ -1953,9 +1964,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): a_format.setdefault('http_headers', {})['Youtubedl-no-compression'] = 'True' formats.append(a_format) else: - unavailable_message = self._html_search_regex( - r'(?s)]+id="unavailable-message"[^>]*>(.+?)', - video_webpage, 'unavailable message', default=None) + unavailable_message = extract_unavailable_message() if unavailable_message: raise ExtractorError(unavailable_message, expected=True) raise ExtractorError('no conn, hlsvp or url_encoded_fmt_stream_map information found in video info') From 864a4576b70bfe9abc3c4f72b0b5e8173b686875 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 26 Jan 2018 23:49:47 +0700 Subject: [PATCH 09/17] [dplay] Add support for disco-api videos (closes #15396) --- youtube_dl/extractor/dplay.py | 100 ++++++++++++++++++++++++++++++++-- 1 file changed, 96 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/dplay.py b/youtube_dl/extractor/dplay.py index 76e784105..2840636e5 100644 --- a/youtube_dl/extractor/dplay.py +++ b/youtube_dl/extractor/dplay.py @@ -12,25 +12,28 @@ from ..compat import ( compat_urlparse, ) from ..utils import ( + determine_ext, ExtractorError, + float_or_none, int_or_none, remove_end, try_get, unified_strdate, + unified_timestamp, update_url_query, USER_AGENTS, ) class DPlayIE(InfoExtractor): - _VALID_URL = r'https?://(?Pwww\.dplay\.(?:dk|se|no))/[^/]+/(?P[^/?#]+)' + _VALID_URL = r'https?://(?Pwww\.(?Pdplay\.(?:dk|se|no)))/(?:videoer/)?(?P[^/]+/[^/?#]+)' _TESTS = [{ # non geo restricted, via secure api, unsigned download hls URL 'url': 'http://www.dplay.se/nugammalt-77-handelser-som-format-sverige/season-1-svensken-lar-sig-njuta-av-livet/', 'info_dict': { 'id': '3172', - 'display_id': 'season-1-svensken-lar-sig-njuta-av-livet', + 'display_id': 'nugammalt-77-handelser-som-format-sverige/season-1-svensken-lar-sig-njuta-av-livet', 'ext': 'mp4', 'title': 'Svensken lär sig njuta av livet', 'description': 'md5:d3819c9bccffd0fe458ca42451dd50d8', @@ -48,7 +51,7 @@ class DPlayIE(InfoExtractor): 'url': 'http://www.dplay.dk/mig-og-min-mor/season-6-episode-12/', 'info_dict': { 'id': '70816', - 'display_id': 'season-6-episode-12', + 'display_id': 'mig-og-min-mor/season-6-episode-12', 'ext': 'mp4', 'title': 'Episode 12', 'description': 'md5:9c86e51a93f8a4401fc9641ef9894c90', @@ -65,6 +68,26 @@ class DPlayIE(InfoExtractor): # geo restricted, via direct unsigned hls URL 'url': 'http://www.dplay.no/pga-tour/season-1-hoydepunkter-18-21-februar/', 'only_matching': True, + }, { + # disco-api + 'url': 'https://www.dplay.no/videoer/i-kongens-klr/sesong-1-episode-7', + 'info_dict': { + 'id': '40206', + 'display_id': 'i-kongens-klr/sesong-1-episode-7', + 'ext': 'mp4', + 'title': 'Episode 7', + 'description': 'md5:e3e1411b2b9aebeea36a6ec5d50c60cf', + 'duration': 2611.16, + 'timestamp': 1516726800, + 'upload_date': '20180123', + 'series': 'I kongens klær', + 'season_number': 1, + 'episode_number': 7, + }, + 'params': { + 'format': 'bestvideo', + 'skip_download': True, + }, }] def _real_extract(self, url): @@ -75,7 +98,76 @@ class DPlayIE(InfoExtractor): webpage = self._download_webpage(url, display_id) video_id = self._search_regex( - r'data-video-id=["\'](\d+)', webpage, 'video id') + r'data-video-id=["\'](\d+)', webpage, 'video id', default=None) + + if not video_id: + host = mobj.group('host') + disco_base = 'https://disco-api.%s' % host + self._download_json( + '%s/token' % disco_base, display_id, 'Downloading token', + query={ + 'realm': host.replace('.', ''), + }) + video = self._download_json( + '%s/content/videos/%s' % (disco_base, display_id), display_id, + headers={ + 'Referer': url, + 'x-disco-client': 'WEB:UNKNOWN:dplay-client:0.0.1', + }, query={ + 'include': 'show' + }) + video_id = video['data']['id'] + info = video['data']['attributes'] + title = info['name'] + formats = [] + for format_id, format_dict in self._download_json( + '%s/playback/videoPlaybackInfo/%s' % (disco_base, video_id), + display_id)['data']['attributes']['streaming'].items(): + if not isinstance(format_dict, dict): + continue + format_url = format_dict.get('url') + if not format_url: + continue + ext = determine_ext(format_url) + if format_id == 'dash' or ext == 'mpd': + formats.extend(self._extract_mpd_formats( + format_url, display_id, mpd_id='dash', fatal=False)) + elif format_id == 'hls' or ext == 'm3u8': + formats.extend(self._extract_m3u8_formats( + format_url, display_id, 'mp4', + entry_protocol='m3u8_native', m3u8_id='hls', + fatal=False)) + else: + formats.append({ + 'url': format_url, + 'format_id': format_id, + }) + self._sort_formats(formats) + + series = None + try: + included = video.get('included') + if isinstance(included, list): + show = next(e for e in included if e.get('type') == 'show') + series = try_get( + show, lambda x: x['attributes']['name'], compat_str) + except StopIteration: + pass + + return { + 'id': video_id, + 'display_id': display_id, + 'title': title, + 'description': info.get('description'), + 'duration': float_or_none( + info.get('videoDuration'), scale=1000), + 'timestamp': unified_timestamp(info.get('publishStart')), + 'series': series, + 'season_number': int_or_none(info.get('seasonNumber')), + 'episode_number': int_or_none(info.get('episodeNumber')), + 'age_limit': int_or_none(info.get('minimum_age')), + 'formats': formats, + } info = self._download_json( 'http://%s/api/v2/ajax/videos?video_id=%s' % (domain, video_id), From a0ee342b50f8fac4663a1d4d3822f5879caf398a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 26 Jan 2018 23:56:31 +0700 Subject: [PATCH 10/17] [dplay] Bypass geo restriction --- youtube_dl/extractor/dplay.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/dplay.py b/youtube_dl/extractor/dplay.py index 2840636e5..a08dace43 100644 --- a/youtube_dl/extractor/dplay.py +++ b/youtube_dl/extractor/dplay.py @@ -26,7 +26,7 @@ from ..utils import ( class DPlayIE(InfoExtractor): - _VALID_URL = r'https?://(?Pwww\.(?Pdplay\.(?:dk|se|no)))/(?:videoer/)?(?P[^/]+/[^/?#]+)' + _VALID_URL = r'https?://(?Pwww\.(?Pdplay\.(?Pdk|se|no)))/(?:videoer/)?(?P[^/]+/[^/?#]+)' _TESTS = [{ # non geo restricted, via secure api, unsigned download hls URL @@ -88,6 +88,10 @@ class DPlayIE(InfoExtractor): 'format': 'bestvideo', 'skip_download': True, }, + }, { + # geo restricted, bypassable via X-Forwarded-For + 'url': 'https://www.dplay.dk/videoer/singleliv/season-5-episode-3', + 'only_matching': True, }] def _real_extract(self, url): @@ -95,6 +99,8 @@ class DPlayIE(InfoExtractor): display_id = mobj.group('id') domain = mobj.group('domain') + self._initialize_geo_bypass([mobj.group('country').upper()]) + webpage = self._download_webpage(url, display_id) video_id = self._search_regex( From 3931b84597c3de013ab9ec3455dbcdaf568871e2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 27 Jan 2018 23:23:36 +0700 Subject: [PATCH 11/17] [extractor/common] Improve _json_ld for articles --- youtube_dl/extractor/common.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index a072e9bc9..deafb4850 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1027,7 +1027,7 @@ class InfoExtractor(object): part_of_series = e.get('partOfSeries') or e.get('partOfTVSeries') if isinstance(part_of_series, dict) and part_of_series.get('@type') in ('TVSeries', 'Series', 'CreativeWorkSeries'): info['series'] = unescapeHTML(part_of_series.get('name')) - elif item_type == 'Article': + elif item_type in ('Article', 'NewsArticle'): info.update({ 'timestamp': parse_iso8601(e.get('datePublished')), 'title': unescapeHTML(e.get('headline')), From 27940ca09c51af23f7fcc6609ab75974576d5b19 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Petr=20Nov=C3=A1k?= Date: Sat, 27 Jan 2018 17:34:31 +0100 Subject: [PATCH 12/17] [seznamzpravy] Add extractor (closes #14102) --- youtube_dl/extractor/extractors.py | 4 + youtube_dl/extractor/seznamzpravy.py | 169 +++++++++++++++++++++++++++ 2 files changed, 173 insertions(+) create mode 100644 youtube_dl/extractor/seznamzpravy.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 57e74ba62..b442256fe 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -933,6 +933,10 @@ from .servingsys import ServingSysIE from .servus import ServusIE from .sevenplus import SevenPlusIE from .sexu import SexuIE +from .seznamzpravy import ( + SeznamZpravyIE, + SeznamZpravyArticleIE, +) from .shahid import ( ShahidIE, ShahidShowIE, diff --git a/youtube_dl/extractor/seznamzpravy.py b/youtube_dl/extractor/seznamzpravy.py new file mode 100644 index 000000000..0d1e7668e --- /dev/null +++ b/youtube_dl/extractor/seznamzpravy.py @@ -0,0 +1,169 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..compat import ( + compat_parse_qs, + compat_str, + compat_urllib_parse_urlparse, +) +from ..utils import ( + urljoin, + int_or_none, + try_get, + update_url_query, +) + + +def _raw_id(src_url): + return compat_urllib_parse_urlparse(src_url).path.split('/')[-1] + + +class SeznamZpravyIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?(?:seznam\.cz/zpravy|seznamzpravy\.cz)/iframe/player\?.*\bsrc=' + _TESTS = [{ + 'url': r'https://www.seznamzpravy.cz/iframe/player?duration=241&serviceSlug=zpravy&src=https%3A%2F%2Fv39-a.sdn.szn.cz%2Fv_39%2Fvmd%2F5999c902ea707c67d8e267a9%3Ffl%3Dmdk%2C432f65a0%7C&itemType=video&autoPlay=false&title=Sv%C4%9Bt%20bez%20obalu%3A%20%C4%8Ce%C5%A1t%C3%AD%20voj%C3%A1ci%20na%20mis%C3%ADch%20(kr%C3%A1tk%C3%A1%20verze)&series=Sv%C4%9Bt%20bez%20obalu&serviceName=Seznam%20Zpr%C3%A1vy&poster=%2F%2Fd39-a.sdn.szn.cz%2Fd_39%2Fc_img_F_I%2FR5puJ.jpeg%3Ffl%3Dcro%2C0%2C0%2C1920%2C1080%7Cres%2C1200%2C%2C1%7Cjpg%2C80%2C%2C1&width=1920&height=1080&cutFrom=0&cutTo=0&splVersion=VOD&contentId=170889&contextId=35990&showAdvert=true&collocation=&autoplayPossible=true&embed=&isVideoTooShortForPreroll=false&isVideoTooLongForPostroll=true&videoCommentOpKey=&videoCommentId=&version=4.0.76&dotService=zpravy&gemiusPrismIdentifier=bVc1ZIb_Qax4W2v5xOPGpMeCP31kFfrTzj0SqPTLh_b.Z7&zoneIdPreroll=seznam.pack.videospot&skipOffsetPreroll=5§ionPrefixPreroll=%2Fzpravy', + 'params': {'skip_download': True}, # 'file_minsize': 1586 seems to get killed in test_download.py + 'info_dict': { + 'id': '170889', + 'ext': 'mp4', + 'title': 'Svět bez obalu: Čeští vojáci na misích (krátká verze)', + } + }] + + def _extract_sdn_formats(self, sdn_url, video_id): + sdn_data = self._download_json(sdn_url, video_id) + formats = [] + mp4_formats = try_get(sdn_data, lambda x: x['data']['mp4'], dict) or {} + for fmt, fmtdata in mp4_formats.items(): + relative_url = fmtdata.get('url') + if not relative_url: + continue + + try: + width, height = fmtdata.get('resolution') + except (TypeError, ValueError): + width, height = None, None + + formats.append({ + 'format_id': fmt, + 'width': int_or_none(width), + 'height': int_or_none(height), + 'url': urljoin(sdn_url, relative_url), + 'vcodec': fmtdata.get('codec'), + 'tbr': int_or_none(fmtdata.get('bandwidth'), scale=1000), + 'duration': int_or_none(fmtdata.get('duration'), scale=1000), + }) + + playlists = sdn_data.get('pls', {}) + dash_rel_url = try_get(playlists, lambda x: x['dash']['url'], compat_str) + if dash_rel_url: + formats.extend(self._extract_mpd_formats(urljoin(sdn_url, dash_rel_url), video_id, mpd_id='dash', fatal=False)) + + hls_rel_url = try_get(playlists, lambda x: x['hls']['url'], compat_str) + if hls_rel_url: + formats.extend(self._extract_m3u8_formats(urljoin(sdn_url, hls_rel_url), video_id, ext='mp4', m3u8_id='hls', fatal=False)) + + self._sort_formats(formats) + return formats + + def _real_extract(self, url): + params = compat_parse_qs(compat_urllib_parse_urlparse(url).query) + src = params['src'][0] + video_id = params.get('contentId', [_raw_id(src)])[0] + + return { + 'id': video_id, + 'title': params['title'][0], + 'formats': self._extract_sdn_formats(src + 'spl2,2,VOD', video_id), + } + + +class SeznamZpravyArticleIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?(?:seznam\.cz/zpravy|seznamzpravy\.cz)/clanek/(?:[-a-z0-9]+)-(?P[0-9]+)' + _API_URL = 'https://apizpravy.seznam.cz/' + + _TESTS = [{ + # two videos on one page, with SDN URL + 'url': 'https://www.seznamzpravy.cz/clanek/jejich-svet-na-nas-utoci-je-lepsi-branit-se-na-jejich-pisecku-rika-reziser-a-major-v-zaloze-marhoul-35990', + 'params': {'skip_download': True}, + # ^ this is here instead of 'file_minsize': 1586, which does not work because + # test_download.py forces expected_minsize to at least 10k when test is running + 'info_dict': { + 'id': '170889', + 'ext': 'mp4', + 'title': 'Svět bez obalu: Čeští vojáci na misích (krátká verze)', + } + }, { + # video with live stream URL + 'url': 'https://www.seznam.cz/zpravy/clanek/znovu-do-vlady-s-ano-pavel-belobradek-ve-volebnim-specialu-seznamu-38489', + 'info_dict': { + 'id': '185688', + 'ext': 'mp4', + 'title': 'Předseda KDU-ČSL Pavel Bělobrádek ve volební Výzvě Seznamu', + } + }] + + def _extract_caption(self, api_data, article_id): + title = api_data.get('title') or api_data.get('captionTitle') + caption = api_data.get('caption') + if not title or not caption: + return {} + + if 'sdn' in caption.get('video', {}): + src_url = caption['video']['sdn'] + elif 'liveStreamUrl' in caption: + src_url = self._download_json(caption['liveStreamUrl'], article_id)['Location'] + else: + return {} + + return { + 'id': caption.get('uid'), + 'title': caption.get('title'), + 'src': src_url, + } + + def _extract_content(self, api_data): + entries = [] + for item in api_data.get('content', []): + media = item.get('properties', {}).get('media', {}) + src_url = media.get('video', {}).get('sdn') + title = media.get('title') + if not src_url or not title: + continue + + entries.append({ + 'id': media.get('uid'), + 'title': title, + 'src': src_url, + }) + + return entries + + def _iframe_result(self, info_dict): + video_id = info_dict['id'] or _raw_id(info_dict['src']) + url = update_url_query('https://www.seznam.cz/zpravy/iframe/player', { + 'src': info_dict['src'], + 'title': info_dict['title'], + 'contentId': video_id, + 'serviceName': 'Seznam Zprávy', + }) + return self.url_result(url, ie='SeznamZpravy', video_id=video_id, video_title=info_dict['title']) + + def _real_extract(self, url): + article_id = self._match_id(url) + api_data = self._download_json(self._API_URL + 'v1/documents/' + article_id, article_id) + + caption = self._extract_caption(api_data, article_id) + content = self._extract_content(api_data) + + if caption and not content: + return self._iframe_result(caption) + else: + if caption: + content.insert(0, caption) + return self.playlist_result( + [self._iframe_result(x) for x in content], + playlist_id=article_id, + playlist_title=api_data.get('title') or caption.get('title') + ) From 3c3a07ee0bbd1305af155ae028afba36c74d4b0d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 27 Jan 2018 23:36:44 +0700 Subject: [PATCH 13/17] [seznamzpravy] Improve and simplify (closes #14616) --- youtube_dl/extractor/seznamzpravy.py | 187 ++++++++++++++------------- 1 file changed, 94 insertions(+), 93 deletions(-) diff --git a/youtube_dl/extractor/seznamzpravy.py b/youtube_dl/extractor/seznamzpravy.py index 0d1e7668e..cf32d1e0c 100644 --- a/youtube_dl/extractor/seznamzpravy.py +++ b/youtube_dl/extractor/seznamzpravy.py @@ -1,6 +1,8 @@ # coding: utf-8 from __future__ import unicode_literals +import re + from .common import InfoExtractor from ..compat import ( compat_parse_qs, @@ -10,8 +12,8 @@ from ..compat import ( from ..utils import ( urljoin, int_or_none, + parse_codecs, try_get, - update_url_query, ) @@ -20,150 +22,149 @@ def _raw_id(src_url): class SeznamZpravyIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?(?:seznam\.cz/zpravy|seznamzpravy\.cz)/iframe/player\?.*\bsrc=' + _VALID_URL = r'https?://(?:www\.)?seznamzpravy\.cz/iframe/player\?.*\bsrc=' _TESTS = [{ - 'url': r'https://www.seznamzpravy.cz/iframe/player?duration=241&serviceSlug=zpravy&src=https%3A%2F%2Fv39-a.sdn.szn.cz%2Fv_39%2Fvmd%2F5999c902ea707c67d8e267a9%3Ffl%3Dmdk%2C432f65a0%7C&itemType=video&autoPlay=false&title=Sv%C4%9Bt%20bez%20obalu%3A%20%C4%8Ce%C5%A1t%C3%AD%20voj%C3%A1ci%20na%20mis%C3%ADch%20(kr%C3%A1tk%C3%A1%20verze)&series=Sv%C4%9Bt%20bez%20obalu&serviceName=Seznam%20Zpr%C3%A1vy&poster=%2F%2Fd39-a.sdn.szn.cz%2Fd_39%2Fc_img_F_I%2FR5puJ.jpeg%3Ffl%3Dcro%2C0%2C0%2C1920%2C1080%7Cres%2C1200%2C%2C1%7Cjpg%2C80%2C%2C1&width=1920&height=1080&cutFrom=0&cutTo=0&splVersion=VOD&contentId=170889&contextId=35990&showAdvert=true&collocation=&autoplayPossible=true&embed=&isVideoTooShortForPreroll=false&isVideoTooLongForPostroll=true&videoCommentOpKey=&videoCommentId=&version=4.0.76&dotService=zpravy&gemiusPrismIdentifier=bVc1ZIb_Qax4W2v5xOPGpMeCP31kFfrTzj0SqPTLh_b.Z7&zoneIdPreroll=seznam.pack.videospot&skipOffsetPreroll=5§ionPrefixPreroll=%2Fzpravy', - 'params': {'skip_download': True}, # 'file_minsize': 1586 seems to get killed in test_download.py + 'url': 'https://www.seznamzpravy.cz/iframe/player?duration=241&serviceSlug=zpravy&src=https%3A%2F%2Fv39-a.sdn.szn.cz%2Fv_39%2Fvmd%2F5999c902ea707c67d8e267a9%3Ffl%3Dmdk%2C432f65a0%7C&itemType=video&autoPlay=false&title=Sv%C4%9Bt%20bez%20obalu%3A%20%C4%8Ce%C5%A1t%C3%AD%20voj%C3%A1ci%20na%20mis%C3%ADch%20(kr%C3%A1tk%C3%A1%20verze)&series=Sv%C4%9Bt%20bez%20obalu&serviceName=Seznam%20Zpr%C3%A1vy&poster=%2F%2Fd39-a.sdn.szn.cz%2Fd_39%2Fc_img_F_I%2FR5puJ.jpeg%3Ffl%3Dcro%2C0%2C0%2C1920%2C1080%7Cres%2C1200%2C%2C1%7Cjpg%2C80%2C%2C1&width=1920&height=1080&cutFrom=0&cutTo=0&splVersion=VOD&contentId=170889&contextId=35990&showAdvert=true&collocation=&autoplayPossible=true&embed=&isVideoTooShortForPreroll=false&isVideoTooLongForPostroll=true&videoCommentOpKey=&videoCommentId=&version=4.0.76&dotService=zpravy&gemiusPrismIdentifier=bVc1ZIb_Qax4W2v5xOPGpMeCP31kFfrTzj0SqPTLh_b.Z7&zoneIdPreroll=seznam.pack.videospot&skipOffsetPreroll=5§ionPrefixPreroll=%2Fzpravy', 'info_dict': { 'id': '170889', 'ext': 'mp4', 'title': 'Svět bez obalu: Čeští vojáci na misích (krátká verze)', - } + 'thumbnail': r're:^https?://.*\.jpe?g', + 'duration': 241, + 'series': 'Svět bez obalu', + }, + 'params': { + 'skip_download': True, + }, + }, { + # with Location key + 'url': 'https://www.seznamzpravy.cz/iframe/player?duration=null&serviceSlug=zpravy&src=https%3A%2F%2Flive-a.sdn.szn.cz%2Fv_39%2F59e468fe454f8472a96af9fa%3Ffl%3Dmdk%2C5c1e2840%7C&itemType=livevod&autoPlay=false&title=P%C5%99edseda%20KDU-%C4%8CSL%20Pavel%20B%C4%9Blobr%C3%A1dek%20ve%20volebn%C3%AD%20V%C3%BDzv%C4%9B%20Seznamu&series=V%C3%BDzva&serviceName=Seznam%20Zpr%C3%A1vy&poster=%2F%2Fd39-a.sdn.szn.cz%2Fd_39%2Fc_img_G_J%2FjTBCs.jpeg%3Ffl%3Dcro%2C0%2C0%2C1280%2C720%7Cres%2C1200%2C%2C1%7Cjpg%2C80%2C%2C1&width=16&height=9&cutFrom=0&cutTo=0&splVersion=VOD&contentId=185688&contextId=38489&showAdvert=true&collocation=&hideFullScreen=false&hideSubtitles=false&embed=&isVideoTooShortForPreroll=false&isVideoTooShortForPreroll2=false&isVideoTooLongForPostroll=false&fakePostrollZoneID=seznam.clanky.zpravy.preroll&fakePrerollZoneID=seznam.clanky.zpravy.preroll&videoCommentId=&trim=default_16x9&noPrerollVideoLength=30&noPreroll2VideoLength=undefined&noMidrollVideoLength=0&noPostrollVideoLength=999999&autoplayPossible=true&version=5.0.41&dotService=zpravy&gemiusPrismIdentifier=zD3g7byfW5ekpXmxTVLaq5Srjw5i4hsYo0HY1aBwIe..27&zoneIdPreroll=seznam.pack.videospot&skipOffsetPreroll=5§ionPrefixPreroll=%2Fzpravy%2Fvyzva&zoneIdPostroll=seznam.pack.videospot&skipOffsetPostroll=5§ionPrefixPostroll=%2Fzpravy%2Fvyzva®ression=false', + 'info_dict': { + 'id': '185688', + 'ext': 'mp4', + 'title': 'Předseda KDU-ČSL Pavel Bělobrádek ve volební Výzvě Seznamu', + 'thumbnail': r're:^https?://.*\.jpe?g', + 'series': 'Výzva', + }, + 'params': { + 'skip_download': True, + }, }] + @staticmethod + def _extract_urls(webpage): + return [ + mobj.group('url') for mobj in re.finditer( + r']+\bsrc=(["\'])(?P(?:https?:)?//(?:www\.)?seznamzpravy\.cz/iframe/player\?.*?)\1', + webpage)] + def _extract_sdn_formats(self, sdn_url, video_id): sdn_data = self._download_json(sdn_url, video_id) + + if sdn_data.get('Location'): + sdn_url = sdn_data['Location'] + sdn_data = self._download_json(sdn_url, video_id) + formats = [] mp4_formats = try_get(sdn_data, lambda x: x['data']['mp4'], dict) or {} - for fmt, fmtdata in mp4_formats.items(): - relative_url = fmtdata.get('url') + for format_id, format_data in mp4_formats.items(): + relative_url = format_data.get('url') if not relative_url: continue try: - width, height = fmtdata.get('resolution') + width, height = format_data.get('resolution') except (TypeError, ValueError): width, height = None, None - formats.append({ - 'format_id': fmt, + f = { + 'url': urljoin(sdn_url, relative_url), + 'format_id': 'http-%s' % format_id, + 'tbr': int_or_none(format_data.get('bandwidth'), scale=1000), 'width': int_or_none(width), 'height': int_or_none(height), - 'url': urljoin(sdn_url, relative_url), - 'vcodec': fmtdata.get('codec'), - 'tbr': int_or_none(fmtdata.get('bandwidth'), scale=1000), - 'duration': int_or_none(fmtdata.get('duration'), scale=1000), - }) + } + f.update(parse_codecs(format_data.get('codec'))) + formats.append(f) - playlists = sdn_data.get('pls', {}) - dash_rel_url = try_get(playlists, lambda x: x['dash']['url'], compat_str) + pls = sdn_data.get('pls', {}) + + def get_url(format_id): + return try_get(pls, lambda x: x[format_id]['url'], compat_str) + + dash_rel_url = get_url('dash') if dash_rel_url: - formats.extend(self._extract_mpd_formats(urljoin(sdn_url, dash_rel_url), video_id, mpd_id='dash', fatal=False)) + formats.extend(self._extract_mpd_formats( + urljoin(sdn_url, dash_rel_url), video_id, mpd_id='dash', + fatal=False)) - hls_rel_url = try_get(playlists, lambda x: x['hls']['url'], compat_str) + hls_rel_url = get_url('hls') if hls_rel_url: - formats.extend(self._extract_m3u8_formats(urljoin(sdn_url, hls_rel_url), video_id, ext='mp4', m3u8_id='hls', fatal=False)) + formats.extend(self._extract_m3u8_formats( + urljoin(sdn_url, hls_rel_url), video_id, ext='mp4', + m3u8_id='hls', fatal=False)) self._sort_formats(formats) return formats def _real_extract(self, url): params = compat_parse_qs(compat_urllib_parse_urlparse(url).query) + src = params['src'][0] + title = params['title'][0] video_id = params.get('contentId', [_raw_id(src)])[0] + formats = self._extract_sdn_formats(src + 'spl2,2,VOD', video_id) + + duration = int_or_none(params.get('duration', [None])[0]) + series = params.get('series', [None])[0] + thumbnail = params.get('poster', [None])[0] return { 'id': video_id, - 'title': params['title'][0], - 'formats': self._extract_sdn_formats(src + 'spl2,2,VOD', video_id), + 'title': title, + 'thumbnail': thumbnail, + 'duration': duration, + 'series': series, + 'formats': formats, } class SeznamZpravyArticleIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?(?:seznam\.cz/zpravy|seznamzpravy\.cz)/clanek/(?:[-a-z0-9]+)-(?P[0-9]+)' + _VALID_URL = r'https?://(?:www\.)?(?:seznam\.cz/zpravy|seznamzpravy\.cz)/clanek/(?:[^/?#&]+)-(?P\d+)' _API_URL = 'https://apizpravy.seznam.cz/' _TESTS = [{ # two videos on one page, with SDN URL 'url': 'https://www.seznamzpravy.cz/clanek/jejich-svet-na-nas-utoci-je-lepsi-branit-se-na-jejich-pisecku-rika-reziser-a-major-v-zaloze-marhoul-35990', - 'params': {'skip_download': True}, - # ^ this is here instead of 'file_minsize': 1586, which does not work because - # test_download.py forces expected_minsize to at least 10k when test is running 'info_dict': { - 'id': '170889', - 'ext': 'mp4', - 'title': 'Svět bez obalu: Čeští vojáci na misích (krátká verze)', - } + 'id': '35990', + 'title': 'md5:6011c877a36905f28f271fcd8dcdb0f2', + 'description': 'md5:933f7b06fa337a814ba199d3596d27ba', + }, + 'playlist_count': 2, }, { # video with live stream URL 'url': 'https://www.seznam.cz/zpravy/clanek/znovu-do-vlady-s-ano-pavel-belobradek-ve-volebnim-specialu-seznamu-38489', 'info_dict': { - 'id': '185688', - 'ext': 'mp4', - 'title': 'Předseda KDU-ČSL Pavel Bělobrádek ve volební Výzvě Seznamu', - } + 'id': '38489', + 'title': 'md5:8fa1afdc36fd378cf0eba2b74c5aca60', + 'description': 'md5:428e7926a1a81986ec7eb23078004fb4', + }, + 'playlist_count': 1, }] - def _extract_caption(self, api_data, article_id): - title = api_data.get('title') or api_data.get('captionTitle') - caption = api_data.get('caption') - if not title or not caption: - return {} - - if 'sdn' in caption.get('video', {}): - src_url = caption['video']['sdn'] - elif 'liveStreamUrl' in caption: - src_url = self._download_json(caption['liveStreamUrl'], article_id)['Location'] - else: - return {} - - return { - 'id': caption.get('uid'), - 'title': caption.get('title'), - 'src': src_url, - } - - def _extract_content(self, api_data): - entries = [] - for item in api_data.get('content', []): - media = item.get('properties', {}).get('media', {}) - src_url = media.get('video', {}).get('sdn') - title = media.get('title') - if not src_url or not title: - continue - - entries.append({ - 'id': media.get('uid'), - 'title': title, - 'src': src_url, - }) - - return entries - - def _iframe_result(self, info_dict): - video_id = info_dict['id'] or _raw_id(info_dict['src']) - url = update_url_query('https://www.seznam.cz/zpravy/iframe/player', { - 'src': info_dict['src'], - 'title': info_dict['title'], - 'contentId': video_id, - 'serviceName': 'Seznam Zprávy', - }) - return self.url_result(url, ie='SeznamZpravy', video_id=video_id, video_title=info_dict['title']) - def _real_extract(self, url): article_id = self._match_id(url) - api_data = self._download_json(self._API_URL + 'v1/documents/' + article_id, article_id) - caption = self._extract_caption(api_data, article_id) - content = self._extract_content(api_data) + webpage = self._download_webpage(url, article_id) - if caption and not content: - return self._iframe_result(caption) - else: - if caption: - content.insert(0, caption) - return self.playlist_result( - [self._iframe_result(x) for x in content], - playlist_id=article_id, - playlist_title=api_data.get('title') or caption.get('title') - ) + info = self._search_json_ld(webpage, article_id, default={}) + print(info) + + title = info.get('title') or self._og_search_title(webpage, fatal=False) + description = info.get('description') or self._og_search_description(webpage) + + return self.playlist_result([ + self.url_result(url, ie=SeznamZpravyIE.ie_key()) + for url in SeznamZpravyIE._extract_urls(webpage)], + article_id, title, description) From 07e56e6df7f9739fbd0726369fd5c1cbba19b838 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 27 Jan 2018 23:41:25 +0700 Subject: [PATCH 14/17] [ChangeLog] Actualize --- ChangeLog | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/ChangeLog b/ChangeLog index 4ee10ca7c..e0be3a026 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,8 +1,21 @@ version -Extractors +Core +* [extractor/common] Improve _json_ld for articles +* Switch codebase to use compat_b64decode ++ [compat] Add compat_b64decode +Extractors ++ [seznamzpravy] Add support for seznam.cz and seznamzpravy.cz (#14102, #14616) +* [dplay] Bypass geo restriction ++ [dplay] Add support for disco-api videos (#15396) * [youtube] Extract precise error messages (#15284) +* [teachertube] Capture and output error message +* [teachertube] Fix and relax thumbnail extraction (#15403) ++ [prosiebensat1] Add another clip id regular expression (#15378) +* [tbs] Update tokenizer url (#15395) +* [mixcloud] Use compat_b64decode (#15394) +- [thesixtyone] Remove extractor (#15341) version 2018.01.21 From eee1692ff3d811101fbfa996e118ee397ddeb248 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 27 Jan 2018 23:44:28 +0700 Subject: [PATCH 15/17] release 2018.01.27 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- docs/supportedsites.md | 3 ++- youtube_dl/version.py | 2 +- 4 files changed, 7 insertions(+), 6 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 145c3ff83..c5eff009c 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2018.01.21*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2018.01.21** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2018.01.27*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2018.01.27** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2018.01.21 +[debug] youtube-dl version 2018.01.27 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index e0be3a026..00c5c9c6b 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version +version 2018.01.27 Core * [extractor/common] Improve _json_ld for articles diff --git a/docs/supportedsites.md b/docs/supportedsites.md index b0825c58b..c15b5eec5 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -732,6 +732,8 @@ - **ServingSys** - **Servus** - **Sexu** + - **SeznamZpravy** + - **SeznamZpravyArticle** - **Shahid** - **ShahidShow** - **Shared**: shared.sx @@ -822,7 +824,6 @@ - **ThePlatform** - **ThePlatformFeed** - **TheScene** - - **TheSixtyOne** - **TheStar** - **TheSun** - **TheWeatherChannel** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 11e82f433..8a2b57ffb 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2018.01.21' +__version__ = '2018.01.27' From c989bdbef8fdcfd38d51b987a4c745479d02e2f2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 28 Jan 2018 05:14:40 +0700 Subject: [PATCH 16/17] [downloader/ism] Fix Python 3.2 support --- youtube_dl/downloader/ism.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/downloader/ism.py b/youtube_dl/downloader/ism.py index 9b001ecff..138564267 100644 --- a/youtube_dl/downloader/ism.py +++ b/youtube_dl/downloader/ism.py @@ -139,7 +139,7 @@ def write_piff_header(stream, params): sample_entry_payload += u16.pack(0x18) # depth sample_entry_payload += s16.pack(-1) # pre defined - codec_private_data = binascii.unhexlify(params['codec_private_data']) + codec_private_data = binascii.unhexlify(params['codec_private_data'].encode('utf-8')) if fourcc in ('H264', 'AVC1'): sps, pps = codec_private_data.split(u32.pack(1))[1:] avcc_payload = u8.pack(1) # configuration version From 65220c3bd6bfcb9023af904634ce1e76592cfe3e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 28 Jan 2018 03:04:39 +0700 Subject: [PATCH 17/17] Add support for IronPython --- youtube_dl/compat.py | 16 ++++++++++++++++ youtube_dl/downloader/ism.py | 26 ++++++++++++++------------ youtube_dl/utils.py | 4 ++-- 3 files changed, 32 insertions(+), 14 deletions(-) diff --git a/youtube_dl/compat.py b/youtube_dl/compat.py index 646c9d79c..27ece2d29 100644 --- a/youtube_dl/compat.py +++ b/youtube_dl/compat.py @@ -2897,9 +2897,24 @@ except TypeError: if isinstance(spec, compat_str): spec = spec.encode('ascii') return struct.unpack(spec, *args) + + class compat_Struct(struct.Struct): + def __init__(self, fmt): + if isinstance(fmt, compat_str): + fmt = fmt.encode('ascii') + super(compat_Struct, self).__init__(fmt) else: compat_struct_pack = struct.pack compat_struct_unpack = struct.unpack + if platform.python_implementation() == 'IronPython' and sys.version_info < (2, 7, 8): + class compat_Struct(struct.Struct): + def unpack(self, string): + if not isinstance(string, buffer): + string = buffer(string) + return super(compat_Struct, self).unpack(string) + else: + compat_Struct = struct.Struct + try: from future_builtins import zip as compat_zip @@ -2941,6 +2956,7 @@ __all__ = [ 'compat_HTMLParseError', 'compat_HTMLParser', 'compat_HTTPError', + 'compat_Struct', 'compat_b64decode', 'compat_basestring', 'compat_chr', diff --git a/youtube_dl/downloader/ism.py b/youtube_dl/downloader/ism.py index 138564267..063fcf444 100644 --- a/youtube_dl/downloader/ism.py +++ b/youtube_dl/downloader/ism.py @@ -1,25 +1,27 @@ from __future__ import unicode_literals import time -import struct import binascii import io from .fragment import FragmentFD -from ..compat import compat_urllib_error +from ..compat import ( + compat_Struct, + compat_urllib_error, +) -u8 = struct.Struct(b'>B') -u88 = struct.Struct(b'>Bx') -u16 = struct.Struct(b'>H') -u1616 = struct.Struct(b'>Hxx') -u32 = struct.Struct(b'>I') -u64 = struct.Struct(b'>Q') +u8 = compat_Struct('>B') +u88 = compat_Struct('>Bx') +u16 = compat_Struct('>H') +u1616 = compat_Struct('>Hxx') +u32 = compat_Struct('>I') +u64 = compat_Struct('>Q') -s88 = struct.Struct(b'>bx') -s16 = struct.Struct(b'>h') -s1616 = struct.Struct(b'>hxx') -s32 = struct.Struct(b'>i') +s88 = compat_Struct('>bx') +s16 = compat_Struct('>h') +s1616 = compat_Struct('>hxx') +s32 = compat_Struct('>i') unity_matrix = (s32.pack(0x10000) + s32.pack(0) * 3) * 2 + s32.pack(0x40000000) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 2fe9cf585..ef44b99a5 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -866,8 +866,8 @@ def _create_http_connection(ydl_handler, http_class, is_https, *args, **kwargs): # expected HTTP responses to meet HTTP/1.0 or later (see also # https://github.com/rg3/youtube-dl/issues/6727) if sys.version_info < (3, 0): - kwargs[b'strict'] = True - hc = http_class(*args, **kwargs) + kwargs['strict'] = True + hc = http_class(*args, **compat_kwargs(kwargs)) source_address = ydl_handler._params.get('source_address') if source_address is not None: sa = (source_address, 0)