From e5a088dc4be4fdcc96927a9f1b7284d4cd49c415 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 31 Oct 2016 23:32:08 +0700 Subject: [PATCH 01/25] [utils] Fix --match-filter for int-like strings (closes #11082) --- test/test_YoutubeDL.py | 6 ++++++ youtube_dl/utils.py | 12 +++++++++--- 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/test/test_YoutubeDL.py b/test/test_YoutubeDL.py index 0dfe25c00..8bf00bea9 100644 --- a/test/test_YoutubeDL.py +++ b/test/test_YoutubeDL.py @@ -605,6 +605,7 @@ class TestYoutubeDL(unittest.TestCase): 'extractor': 'TEST', 'duration': 30, 'filesize': 10 * 1024, + 'playlist_id': '42', } second = { 'id': '2', @@ -614,6 +615,7 @@ class TestYoutubeDL(unittest.TestCase): 'duration': 10, 'description': 'foo', 'filesize': 5 * 1024, + 'playlist_id': '43', } videos = [first, second] @@ -650,6 +652,10 @@ class TestYoutubeDL(unittest.TestCase): res = get_videos(f) self.assertEqual(res, ['1']) + f = match_filter_func('playlist_id = 42') + res = get_videos(f) + self.assertEqual(res, ['1']) + def test_playlist_items_selection(self): entries = [{ 'id': compat_str(i), diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 2770c5f1c..1a5ce8688 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -2345,11 +2345,18 @@ def _match_one(filter_part, dct): m = operator_rex.search(filter_part) if m: op = COMPARISON_OPERATORS[m.group('op')] - if m.group('strval') is not None: + actual_value = dct.get(m.group('key')) + if (m.group('strval') is not None or + # If the original field is a string and matching comparisonvalue is + # a number we should respect the origin of the original field + # and process comparison value as a string (see + # https://github.com/rg3/youtube-dl/issues/11082). + actual_value is not None and m.group('intval') is not None and + isinstance(actual_value, compat_str)): if m.group('op') not in ('=', '!='): raise ValueError( 'Operator %s does not support string values!' % m.group('op')) - comparison_value = m.group('strval') + comparison_value = m.group('strval') or m.group('intval') else: try: comparison_value = int(m.group('intval')) @@ -2361,7 +2368,6 @@ def _match_one(filter_part, dct): raise ValueError( 'Invalid integer value %r in filter part %r' % ( m.group('intval'), filter_part)) - actual_value = dct.get(m.group('key')) if actual_value is None: return m.group('none_inclusive') return op(actual_value, comparison_value) From b82c33dd67c24cd6db94a9793b2f62a5db412795 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 1 Nov 2016 01:15:46 +0700 Subject: [PATCH 02/25] [extractor/common] Improve mpd base URL extraction (closes #10909, closes #11079) --- youtube_dl/extractor/common.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 415dc84c8..68b325fca 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1539,7 +1539,7 @@ class InfoExtractor(object): if res is False: return [] mpd, urlh = res - mpd_base_url = re.match(r'https?://.+/', urlh.geturl()).group() + mpd_base_url = re.match(r'https?://[^?#&]+/', urlh.geturl()).group() return self._parse_mpd_formats( compat_etree_fromstring(mpd.encode('utf-8')), mpd_id, mpd_base_url, From e3577722b0bdb16e786eba8029d09ccc1983e0ce Mon Sep 17 00:00:00 2001 From: NeroBurner Date: Mon, 26 Sep 2016 22:45:02 +0200 Subject: [PATCH 03/25] [nicknight] Add extractor --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/nick.py | 27 ++++++++++++++++++++++++++- 2 files changed, 27 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index dea97920b..913ffe29a 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -596,6 +596,7 @@ from .nhl import ( from .nick import ( NickIE, NickDeIE, + NickNightAtIE, ) from .niconico import NiconicoIE, NiconicoPlaylistIE from .ninecninemedia import ( diff --git a/youtube_dl/extractor/nick.py b/youtube_dl/extractor/nick.py index 57cf1ce8e..a96bb0ef5 100644 --- a/youtube_dl/extractor/nick.py +++ b/youtube_dl/extractor/nick.py @@ -69,7 +69,7 @@ class NickIE(MTVServicesInfoExtractor): class NickDeIE(MTVServicesInfoExtractor): IE_NAME = 'nick.de' - _VALID_URL = r'https?://(?:www\.)?(?:nick\.de|nickelodeon\.nl)/(?:playlist|shows)/(?:[^/]+/)*(?P[^/?#&]+)' + _VALID_URL = r'https?://(?:www\.)?(?:nick\.de|nickelodeon\.(?:nl|at))/(?:playlist|shows)/(?:[^/]+/)*(?P[^/?#&]+)' _TESTS = [{ 'url': 'http://www.nick.de/playlist/3773-top-videos/videos/episode/17306-zu-wasser-und-zu-land-rauchende-erdnusse', 'only_matching': True, @@ -91,3 +91,28 @@ class NickDeIE(MTVServicesInfoExtractor): {'siteKey': 'nick.de'}) return self._get_videos_info_from_url(mrss_url, video_id) + + +class NickNightAtIE(MTVServicesInfoExtractor): + IE_NAME = 'nicknight.de' + _VALID_URL = r'https?://(?:www\.)nicknight\.(?:de|at|tv)/(?:playlist|shows)/(?:[^/]+/)*(?P[^/?#&]+)' + _TESTS = [{ + 'url': 'http://www.nicknight.at/shows/977-awkward/videos/85987-nimmer-beste-freunde', + 'only_matching': True, + }, { + 'url': 'http://www.nicknight.at/shows/977-awkward', + 'only_matching': True, + }, { + 'url': 'http://www.nicknight.at/shows/1900-faking-it', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + + webpage = self._download_webpage(url, video_id) + + mrss_url = self._search_regex( + r'mrss: (["\'])(?Phttp.+?)\1', webpage, 'mrss url', group='url') + + return self._get_videos_info_from_url(mrss_url, video_id) From 9c82bba05d5495d29be2ee20fc9cb690b37fcdce Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 2 Nov 2016 01:29:05 +0700 Subject: [PATCH 04/25] [nickde] Improve extraction --- youtube_dl/extractor/nick.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/nick.py b/youtube_dl/extractor/nick.py index a96bb0ef5..36364bee9 100644 --- a/youtube_dl/extractor/nick.py +++ b/youtube_dl/extractor/nick.py @@ -1,6 +1,8 @@ # coding: utf-8 from __future__ import unicode_literals +import re + from .mtv import MTVServicesInfoExtractor from ..utils import update_url_query @@ -69,7 +71,7 @@ class NickIE(MTVServicesInfoExtractor): class NickDeIE(MTVServicesInfoExtractor): IE_NAME = 'nick.de' - _VALID_URL = r'https?://(?:www\.)?(?:nick\.de|nickelodeon\.(?:nl|at))/(?:playlist|shows)/(?:[^/]+/)*(?P[^/?#&]+)' + _VALID_URL = r'https?://(?:www\.)?(?Pnick\.de|nickelodeon\.(?:nl|at))/(?:playlist|shows)/(?:[^/]+/)*(?P[^/?#&]+)' _TESTS = [{ 'url': 'http://www.nick.de/playlist/3773-top-videos/videos/episode/17306-zu-wasser-und-zu-land-rauchende-erdnusse', 'only_matching': True, @@ -79,16 +81,21 @@ class NickDeIE(MTVServicesInfoExtractor): }, { 'url': 'http://www.nickelodeon.nl/shows/474-spongebob/videos/17403-een-kijkje-in-de-keuken-met-sandy-van-binnenuit', 'only_matching': True, + }, { + 'url': 'http://www.nickelodeon.at/playlist/3773-top-videos/videos/episode/77993-das-letzte-gefecht', + 'only_matching': True, }] def _real_extract(self, url): - video_id = self._match_id(url) + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + host = mobj.group('host') webpage = self._download_webpage(url, video_id) mrss_url = update_url_query(self._search_regex( r'data-mrss=(["\'])(?Phttp.+?)\1', webpage, 'mrss url', group='url'), - {'siteKey': 'nick.de'}) + {'siteKey': host}) return self._get_videos_info_from_url(mrss_url, video_id) From f449c061d04b37f70dafa8c01a2e1fb7a47a9a5f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 2 Nov 2016 01:35:53 +0700 Subject: [PATCH 05/25] [nicknight] Improve extraction (closes #10769) --- youtube_dl/extractor/extractors.py | 2 +- youtube_dl/extractor/nick.py | 28 +++++++++++++--------------- 2 files changed, 14 insertions(+), 16 deletions(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 913ffe29a..f30ac5aaf 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -596,7 +596,7 @@ from .nhl import ( from .nick import ( NickIE, NickDeIE, - NickNightAtIE, + NickNightIE, ) from .niconico import NiconicoIE, NiconicoPlaylistIE from .ninecninemedia import ( diff --git a/youtube_dl/extractor/nick.py b/youtube_dl/extractor/nick.py index 36364bee9..7672845bf 100644 --- a/youtube_dl/extractor/nick.py +++ b/youtube_dl/extractor/nick.py @@ -86,6 +86,11 @@ class NickDeIE(MTVServicesInfoExtractor): 'only_matching': True, }] + def _extract_mrss_url(self, webpage, host): + return update_url_query(self._search_regex( + r'data-mrss=(["\'])(?Phttp.+?)\1', webpage, 'mrss url', group='url'), + {'siteKey': host}) + def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) video_id = mobj.group('id') @@ -93,16 +98,14 @@ class NickDeIE(MTVServicesInfoExtractor): webpage = self._download_webpage(url, video_id) - mrss_url = update_url_query(self._search_regex( - r'data-mrss=(["\'])(?Phttp.+?)\1', webpage, 'mrss url', group='url'), - {'siteKey': host}) + mrss_url = self._extract_mrss_url(webpage, host) return self._get_videos_info_from_url(mrss_url, video_id) -class NickNightAtIE(MTVServicesInfoExtractor): - IE_NAME = 'nicknight.de' - _VALID_URL = r'https?://(?:www\.)nicknight\.(?:de|at|tv)/(?:playlist|shows)/(?:[^/]+/)*(?P[^/?#&]+)' +class NickNightIE(NickDeIE): + IE_NAME = 'nicknight' + _VALID_URL = r'https?://(?:www\.)(?Pnicknight\.(?:de|at|tv))/(?:playlist|shows)/(?:[^/]+/)*(?P[^/?#&]+)' _TESTS = [{ 'url': 'http://www.nicknight.at/shows/977-awkward/videos/85987-nimmer-beste-freunde', 'only_matching': True, @@ -114,12 +117,7 @@ class NickNightAtIE(MTVServicesInfoExtractor): 'only_matching': True, }] - def _real_extract(self, url): - video_id = self._match_id(url) - - webpage = self._download_webpage(url, video_id) - - mrss_url = self._search_regex( - r'mrss: (["\'])(?Phttp.+?)\1', webpage, 'mrss url', group='url') - - return self._get_videos_info_from_url(mrss_url, video_id) + def _extract_mrss_url(self, webpage, *args): + return self._search_regex( + r'mrss\s*:\s*(["\'])(?Phttp.+?)\1', webpage, + 'mrss url', group='url') From b2758123c5e759fdb0c7d23d380e4dd9e245cd4a Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Wed, 19 Oct 2016 16:22:40 +0100 Subject: [PATCH 06/25] add Basic support for Smooth Streaming protocol(#8118) --- youtube_dl/YoutubeDL.py | 2 +- youtube_dl/downloader/__init__.py | 2 + youtube_dl/downloader/ism.py | 273 ++++++++++++++++++++++++++++++ youtube_dl/extractor/common.py | 101 +++++++++++ 4 files changed, 377 insertions(+), 1 deletion(-) create mode 100644 youtube_dl/downloader/ism.py diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 99825e343..53f20ac2c 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -1658,7 +1658,7 @@ class YoutubeDL(object): video_ext, audio_ext = audio.get('ext'), video.get('ext') if video_ext and audio_ext: COMPATIBLE_EXTS = ( - ('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v'), + ('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma'), ('webm') ) for exts in COMPATIBLE_EXTS: diff --git a/youtube_dl/downloader/__init__.py b/youtube_dl/downloader/__init__.py index 817591d97..16952e359 100644 --- a/youtube_dl/downloader/__init__.py +++ b/youtube_dl/downloader/__init__.py @@ -7,6 +7,7 @@ from .http import HttpFD from .rtmp import RtmpFD from .dash import DashSegmentsFD from .rtsp import RtspFD +from .ism import IsmFD from .external import ( get_external_downloader, FFmpegFD, @@ -24,6 +25,7 @@ PROTOCOL_MAP = { 'rtsp': RtspFD, 'f4m': F4mFD, 'http_dash_segments': DashSegmentsFD, + 'ism': IsmFD, } diff --git a/youtube_dl/downloader/ism.py b/youtube_dl/downloader/ism.py new file mode 100644 index 000000000..9f8f14b66 --- /dev/null +++ b/youtube_dl/downloader/ism.py @@ -0,0 +1,273 @@ +from __future__ import unicode_literals + +import os +import time +import struct +import binascii +import io + +from .fragment import FragmentFD +from ..compat import compat_urllib_error +from ..utils import ( + sanitize_open, + encodeFilename, +) + + +u8 = struct.Struct(b'>B') +u88 = struct.Struct(b'>Bx') +u16 = struct.Struct(b'>H') +u1616 = struct.Struct(b'>Hxx') +u32 = struct.Struct(b'>I') +u64 = struct.Struct(b'>Q') + +s88 = struct.Struct(b'>bx') +s16 = struct.Struct(b'>h') +s1616 = struct.Struct(b'>hxx') +s32 = struct.Struct(b'>i') + +unity_matrix = (s32.pack(0x10000) + s32.pack(0) * 3) * 2 + s32.pack(0x40000000) + +TRACK_ENABLED = 0x1 +TRACK_IN_MOVIE = 0x2 +TRACK_IN_PREVIEW = 0x4 + +SELF_CONTAINED = 0x1 + + +def box(box_type, payload): + return u32.pack(8 + len(payload)) + box_type + payload + + +def full_box(box_type, version, flags, payload): + return box(box_type, u8.pack(version) + u32.pack(flags)[1:] + payload) + + +def write_piff_header(stream, params): + track_id = params['track_id'] + fourcc = params['fourcc'] + duration = params['duration'] + timescale = params.get('timescale', 10000000) + language = params.get('language', 'und') + height = params.get('height', 0) + width = params.get('width', 0) + is_audio = width == 0 and height == 0 + creation_time = modification_time = int(time.time()) + + ftyp_payload = b'isml' # major brand + ftyp_payload += u32.pack(1) # minor version + ftyp_payload += b'piff' + b'iso2' # compatible brands + stream.write(box(b'ftyp', ftyp_payload)) # File Type Box + + mvhd_payload = u64.pack(creation_time) + mvhd_payload += u64.pack(modification_time) + mvhd_payload += u32.pack(timescale) + mvhd_payload += u64.pack(duration) + mvhd_payload += s1616.pack(1) # rate + mvhd_payload += s88.pack(1) # volume + mvhd_payload += u16.pack(0) # reserved + mvhd_payload += u32.pack(0) * 2 # reserved + mvhd_payload += unity_matrix + mvhd_payload += u32.pack(0) * 6 # pre defined + mvhd_payload += u32.pack(0xffffffff) # next track id + moov_payload = full_box(b'mvhd', 1, 0, mvhd_payload) # Movie Header Box + + tkhd_payload = u64.pack(creation_time) + tkhd_payload += u64.pack(modification_time) + tkhd_payload += u32.pack(track_id) # track id + tkhd_payload += u32.pack(0) # reserved + tkhd_payload += u64.pack(duration) + tkhd_payload += u32.pack(0) * 2 # reserved + tkhd_payload += s16.pack(0) # layer + tkhd_payload += s16.pack(0) # alternate group + tkhd_payload += s88.pack(1 if is_audio else 0) # volume + tkhd_payload += u16.pack(0) # reserved + tkhd_payload += unity_matrix + tkhd_payload += u1616.pack(width) + tkhd_payload += u1616.pack(height) + trak_payload = full_box(b'tkhd', 1, TRACK_ENABLED | TRACK_IN_MOVIE | TRACK_IN_PREVIEW, tkhd_payload) # Track Header Box + + mdhd_payload = u64.pack(creation_time) + mdhd_payload += u64.pack(modification_time) + mdhd_payload += u32.pack(timescale) + mdhd_payload += u64.pack(duration) + mdhd_payload += u16.pack(((ord(language[0]) - 0x60) << 10) | ((ord(language[1]) - 0x60) << 5) | (ord(language[2]) - 0x60)) + mdhd_payload += u16.pack(0) # pre defined + mdia_payload = full_box(b'mdhd', 1, 0, mdhd_payload) # Media Header Box + + hdlr_payload = u32.pack(0) # pre defined + hdlr_payload += b'soun' if is_audio else b'vide' # handler type + hdlr_payload += u32.pack(0) * 3 # reserved + hdlr_payload += (b'Sound' if is_audio else b'Video') + b'Handler\0' # name + mdia_payload += full_box(b'hdlr', 0, 0, hdlr_payload) # Handler Reference Box + + if is_audio: + smhd_payload = s88.pack(0) # balance + smhd_payload = u16.pack(0) # reserved + media_header_box = full_box(b'smhd', 0, 0, smhd_payload) # Sound Media Header + else: + vmhd_payload = u16.pack(0) # graphics mode + vmhd_payload += u16.pack(0) * 3 # opcolor + media_header_box = full_box(b'vmhd', 0, 1, vmhd_payload) # Video Media Header + minf_payload = media_header_box + + dref_payload = u32.pack(1) # entry count + dref_payload += full_box(b'url ', 0, SELF_CONTAINED, b'') # Data Entry URL Box + dinf_payload = full_box(b'dref', 0, 0, dref_payload) # Data Reference Box + minf_payload += box(b'dinf', dinf_payload) # Data Information Box + + stsd_payload = u32.pack(1) # entry count + + sample_entry_payload = u8.pack(0) * 6 # reserved + sample_entry_payload += u16.pack(1) # data reference index + if is_audio: + sample_entry_payload += u32.pack(0) * 2 # reserved + sample_entry_payload += u16.pack(params.get('channels', 2)) + sample_entry_payload += u16.pack(params.get('bits_per_sample', 16)) + sample_entry_payload += u16.pack(0) # pre defined + sample_entry_payload += u16.pack(0) # reserved + sample_entry_payload += u1616.pack(params['sampling_rate']) + + if fourcc == 'AACL': + smaple_entry_box = box(b'mp4a', sample_entry_payload) + else: + sample_entry_payload = sample_entry_payload + sample_entry_payload += u16.pack(0) # pre defined + sample_entry_payload += u16.pack(0) # reserved + sample_entry_payload += u32.pack(0) * 3 # pre defined + sample_entry_payload += u16.pack(width) + sample_entry_payload += u16.pack(height) + sample_entry_payload += u1616.pack(0x48) # horiz resolution 72 dpi + sample_entry_payload += u1616.pack(0x48) # vert resolution 72 dpi + sample_entry_payload += u32.pack(0) # reserved + sample_entry_payload += u16.pack(1) # frame count + sample_entry_payload += u8.pack(0) * 32 # compressor name + sample_entry_payload += u16.pack(0x18) # depth + sample_entry_payload += s16.pack(-1) # pre defined + + codec_private_data = binascii.unhexlify(params['codec_private_data']) + if fourcc in ('H264', 'AVC1'): + sps, pps = codec_private_data.split(u32.pack(1))[1:] + avcc_payload = u8.pack(1) # configuration version + avcc_payload += sps[1] # avc profile indication + avcc_payload += sps[2] # profile compatibility + avcc_payload += sps[3] # avc level indication + avcc_payload += u8.pack(0xfc | (params.get('nal_unit_length_field', 4) - 1)) # complete represenation (1) + reserved (11111) + length size minus one + avcc_payload += u8.pack(1) # reserved (0) + number of sps (0000001) + avcc_payload += u16.pack(len(sps)) + avcc_payload += sps + avcc_payload += u8.pack(1) # number of pps + avcc_payload += u16.pack(len(pps)) + avcc_payload += pps + sample_entry_payload += box(b'avcC', avcc_payload) # AVC Decoder Configuration Record + smaple_entry_box = box(b'avc1', sample_entry_payload) # AVC Simple Entry + stsd_payload += smaple_entry_box + + stbl_payload = full_box(b'stsd', 0, 0, stsd_payload) # Sample Description Box + + stts_payload = u32.pack(0) # entry count + stbl_payload += full_box(b'stts', 0, 0, stts_payload) # Decoding Time to Sample Box + + stsc_payload = u32.pack(0) # entry count + stbl_payload += full_box(b'stsc', 0, 0, stsc_payload) # Sample To Chunk Box + + stco_payload = u32.pack(0) # entry count + stbl_payload += full_box(b'stco', 0, 0, stco_payload) # Chunk Offset Box + + minf_payload += box(b'stbl', stbl_payload) # Sample Table Box + + mdia_payload += box(b'minf', minf_payload) # Media Information Box + + trak_payload += box(b'mdia', mdia_payload) # Media Box + + moov_payload += box(b'trak', trak_payload) # Track Box + + mehd_payload = u64.pack(duration) + mvex_payload = full_box(b'mehd', 1, 0, mehd_payload) # Movie Extends Header Box + + trex_payload = u32.pack(track_id) # track id + trex_payload += u32.pack(1) # default sample description index + trex_payload += u32.pack(0) # default sample duration + trex_payload += u32.pack(0) # default sample size + trex_payload += u32.pack(0) # default sample flags + mvex_payload += full_box(b'trex', 0, 0, trex_payload) # Track Extends Box + + moov_payload += box(b'mvex', mvex_payload) # Movie Extends Box + stream.write(box(b'moov', moov_payload)) # Movie Box + + +def extract_box_data(data, box_sequence): + data_reader = io.BytesIO(data) + while True: + box_size = u32.unpack(data_reader.read(4))[0] + box_type = data_reader.read(4) + if box_type == box_sequence[0]: + box_data = data_reader.read(box_size - 8) + if len(box_sequence) == 1: + return box_data + return extract_box_data(box_data, box_sequence[1:]) + data_reader.seek(box_size - 8, 1) + + +class IsmFD(FragmentFD): + """ + Download segments in a ISM manifest + """ + + FD_NAME = 'ism' + + def real_download(self, filename, info_dict): + segments = info_dict['fragments'][:1] if self.params.get( + 'test', False) else info_dict['fragments'] + + ctx = { + 'filename': filename, + 'total_frags': len(segments), + } + + self._prepare_and_start_frag_download(ctx) + + segments_filenames = [] + + fragment_retries = self.params.get('fragment_retries', 0) + skip_unavailable_fragments = self.params.get('skip_unavailable_fragments', True) + + track_written = False + for i, segment in enumerate(segments): + segment_url = segment['url'] + segment_name = 'Frag%d' % i + target_filename = '%s-%s' % (ctx['tmpfilename'], segment_name) + count = 0 + while count <= fragment_retries: + try: + success = ctx['dl'].download(target_filename, {'url': segment_url}) + if not success: + return False + down, target_sanitized = sanitize_open(target_filename, 'rb') + down_data = down.read() + if not track_written: + tfhd_data = extract_box_data(down_data, [b'moof', b'traf', b'tfhd']) + info_dict['_download_params']['track_id'] = u32.unpack(tfhd_data[4:8])[0] + write_piff_header(ctx['dest_stream'], info_dict['_download_params']) + track_written = True + ctx['dest_stream'].write(down_data) + down.close() + segments_filenames.append(target_sanitized) + break + except compat_urllib_error.HTTPError as err: + count += 1 + if count <= fragment_retries: + self.report_retry_fragment(err, segment_name, count, fragment_retries) + if count > fragment_retries: + if skip_unavailable_fragments: + self.report_skip_fragment(segment_name) + continue + self.report_error('giving up after %s fragment retries' % fragment_retries) + return False + + self._finish_frag_download(ctx) + + for segment_file in segments_filenames: + os.remove(encodeFilename(segment_file)) + + return True diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 68b325fca..2e9f05ae3 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1780,6 +1780,107 @@ class InfoExtractor(object): self.report_warning('Unknown MIME type %s in DASH manifest' % mime_type) return formats + def _extract_ism_formats(self, ism_url, video_id, ism_id=None, note=None, errnote=None, fatal=True): + res = self._download_webpage_handle( + ism_url, video_id, + note=note or 'Downloading ISM manifest', + errnote=errnote or 'Failed to download ISM manifest', + fatal=fatal) + if res is False: + return [] + ism, urlh = res + + return self._parse_ism_formats( + compat_etree_fromstring(ism.encode('utf-8')), urlh.geturl(), ism_id) + + def _parse_ism_formats(self, ism_doc, ism_url, ism_id=None): + if ism_doc.get('IsLive') == 'TRUE' or ism_doc.find('Protection') is not None: + return [] + + ism_base_url = re.match(r'https?://.+/', ism_url).group() + + duration = int(ism_doc.attrib['Duration']) + timescale = int_or_none(ism_doc.get('TimeScale')) or 10000000 + + formats = [] + for stream in ism_doc.findall('StreamIndex'): + stream_type = stream.get('Type') + if stream_type not in ('video', 'audio'): + continue + url_pattern = stream.attrib['Url'] + stream_timescale = int_or_none(stream.get('TimeScale')) or timescale + stream_name = stream.get('Name') + for track in stream.findall('QualityLevel'): + fourcc = track.get('FourCC') + # TODO: add support for WVC1 and WMAP + if fourcc not in ('H264', 'AVC1', 'AACL'): + self.report_warning('%s is not a supported codec' % fourcc) + continue + tbr = int(track.attrib['Bitrate']) // 1000 + width = int_or_none(track.get('MaxWidth')) + height = int_or_none(track.get('MaxHeight')) + sampling_rate = int_or_none(track.get('SamplingRate')) + + track_url_pattern = re.sub(r'{[Bb]itrate}', track.attrib['Bitrate'], url_pattern) + track_url_pattern = compat_urlparse.urljoin(ism_url, track_url_pattern) + + fragments = [] + fragment_ctx = { + 'time': 0, + } + stream_fragments = stream.findall('c') + for stream_fragment_index, stream_fragment in enumerate(stream_fragments): + fragment_ctx['time'] = int_or_none(stream_fragment.get('t')) or fragment_ctx['time'] + fragment_repeat = int_or_none(stream_fragment.get('r')) or 1 + fragment_ctx['duration'] = int_or_none(stream_fragment.get('d')) + if not fragment_ctx['duration']: + try: + next_fragment_time = int(stream_fragment[stream_fragment_index + 1].attrib['t']) + except IndexError: + next_fragment_time = duration + fragment_ctx['duration'] = (next_fragment_time - frgament_time) / fragment_repeat + for _ in range(fragment_repeat): + fragments.append({ + 'url': re.sub(r'{start[ _]time}', str(fragment_ctx['time']), track_url_pattern), + 'duration': fragment_ctx['duration'] / stream_timescale, + }) + fragment_ctx['time'] += fragment_ctx['duration'] + + format_id = [] + if ism_id: + format_id.append(ism_id) + if stream_name: + format_id.append(stream_name) + format_id.append(compat_str(tbr)) + + formats.append({ + 'format_id': '-'.join(format_id), + 'url': ism_url, + 'manifest_url': ism_url, + 'ext': 'ismv' if stream_type == 'video' else 'isma', + 'width': width, + 'height': height, + 'tbr': tbr, + 'asr': sampling_rate, + 'vcodec': 'none' if stream_type == 'audio' else fourcc, + 'acodec': 'none' if stream_type == 'video' else fourcc, + 'protocol': 'ism', + 'fragments': fragments, + '_download_params': { + 'duration': duration, + 'timescale': stream_timescale, + 'width': width or 0, + 'height': height or 0, + 'fourcc': fourcc, + 'codec_private_data': track.get('CodecPrivateData'), + 'sampling_rate': sampling_rate, + 'channels': int_or_none(track.get('Channels', 2)), + 'bits_per_sample': int_or_none(track.get('BitsPerSample', 16)), + 'nal_unit_length_field': int_or_none(track.get('NALUnitLengthField', 4)), + }, + }) + return formats + def _parse_html5_media_entries(self, base_url, webpage, video_id, m3u8_id=None, m3u8_entry_protocol='m3u8'): def absolute_url(video_url): return compat_urlparse.urljoin(base_url, video_url) From 639e3b5c9985aacf7c0dc018c211a78161bbafd2 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Wed, 19 Oct 2016 16:24:43 +0100 Subject: [PATCH 07/25] extract ISM formats in some of the extractors --- youtube_dl/extractor/microsoftvirtualacademy.py | 7 +++++-- youtube_dl/extractor/msn.py | 5 ++--- youtube_dl/extractor/onet.py | 4 ++-- youtube_dl/extractor/tvp.py | 3 +++ 4 files changed, 12 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/microsoftvirtualacademy.py b/youtube_dl/extractor/microsoftvirtualacademy.py index afd3e98ec..8e0aee0e6 100644 --- a/youtube_dl/extractor/microsoftvirtualacademy.py +++ b/youtube_dl/extractor/microsoftvirtualacademy.py @@ -71,12 +71,15 @@ class MicrosoftVirtualAcademyIE(MicrosoftVirtualAcademyBaseIE): formats = [] for sources in settings.findall(compat_xpath('.//MediaSources')): - if sources.get('videoType') == 'smoothstreaming': - continue + sources_type = sources.get('videoType') for source in sources.findall(compat_xpath('./MediaSource')): video_url = source.text if not video_url or not video_url.startswith('http'): continue + if sources_type == 'smoothstreaming': + formats.extend(self._extract_ism_formats( + video_url, video_id, 'mss', fatal=False)) + continue video_mode = source.get('videoMode') height = int_or_none(self._search_regex( r'^(\d+)[pP]$', video_mode or '', 'height', default=None)) diff --git a/youtube_dl/extractor/msn.py b/youtube_dl/extractor/msn.py index 1ec8e0f50..d75ce8b3b 100644 --- a/youtube_dl/extractor/msn.py +++ b/youtube_dl/extractor/msn.py @@ -69,10 +69,9 @@ class MSNIE(InfoExtractor): if not format_url: continue ext = determine_ext(format_url) - # .ism is not yet supported (see - # https://github.com/rg3/youtube-dl/issues/8118) if ext == 'ism': - continue + formats.extend(self._extract_ism_formats( + format_url + '/Manifest', display_id, 'mss', fatal=False)) if 'm3u8' in format_url: # m3u8_native should not be used here until # https://github.com/rg3/youtube-dl/issues/9913 is fixed diff --git a/youtube_dl/extractor/onet.py b/youtube_dl/extractor/onet.py index 9cbc7c2e2..0a501b3e5 100644 --- a/youtube_dl/extractor/onet.py +++ b/youtube_dl/extractor/onet.py @@ -56,8 +56,8 @@ class OnetBaseIE(InfoExtractor): continue ext = determine_ext(video_url) if format_id == 'ism': - # TODO: Support Microsoft Smooth Streaming - continue + formats.extend(self._extract_ism_formats( + video_url, video_id, 'mss', fatal=False)) elif ext == 'mpd': formats.extend(self._extract_mpd_formats( video_url, video_id, mpd_id='dash', fatal=False)) diff --git a/youtube_dl/extractor/tvp.py b/youtube_dl/extractor/tvp.py index 2dbbc2ca7..06ea2b40a 100644 --- a/youtube_dl/extractor/tvp.py +++ b/youtube_dl/extractor/tvp.py @@ -139,6 +139,9 @@ class TVPEmbedIE(InfoExtractor): # formats.extend(self._extract_mpd_formats( # video_url_base + '.ism/video.mpd', # video_id, mpd_id='dash', fatal=False)) + formats.extend(self._extract_ism_formats( + video_url_base + '.ism/Manifest', + video_id, 'mss', fatal=False)) formats.extend(self._extract_f4m_formats( video_url_base + '.ism/video.f4m', video_id, f4m_id='hds', fatal=False)) From 02dc0a36b72b7312996d59b9ec96768f925cb4a4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 2 Nov 2016 02:14:01 +0700 Subject: [PATCH 08/25] [utils] Introduce base_url --- test/test_utils.py | 8 ++++++++ youtube_dl/extractor/common.py | 5 +++-- youtube_dl/utils.py | 4 ++++ 3 files changed, 15 insertions(+), 2 deletions(-) diff --git a/test/test_utils.py b/test/test_utils.py index b1b2effca..cb75ca53e 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -69,6 +69,7 @@ from youtube_dl.utils import ( uppercase_escape, lowercase_escape, url_basename, + base_url, urlencode_postdata, urshift, update_url_query, @@ -437,6 +438,13 @@ class TestUtil(unittest.TestCase): url_basename('http://media.w3.org/2010/05/sintel/trailer.mp4'), 'trailer.mp4') + def test_base_url(self): + self.assertEqual(base_url('http://foo.de/'), 'http://foo.de/') + self.assertEqual(base_url('http://foo.de/bar'), 'http://foo.de/') + self.assertEqual(base_url('http://foo.de/bar/'), 'http://foo.de/bar/') + self.assertEqual(base_url('http://foo.de/bar/baz'), 'http://foo.de/bar/') + self.assertEqual(base_url('http://foo.de/bar/baz?x=z/x/c'), 'http://foo.de/bar/') + def test_parse_age_limit(self): self.assertEqual(parse_age_limit(None), None) self.assertEqual(parse_age_limit(False), None) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 2e9f05ae3..140ccf234 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -30,6 +30,7 @@ from ..downloader.f4m import remove_encrypted_media from ..utils import ( NO_DEFAULT, age_restricted, + base_url, bug_reports_message, clean_html, compiled_regex_type, @@ -1539,7 +1540,7 @@ class InfoExtractor(object): if res is False: return [] mpd, urlh = res - mpd_base_url = re.match(r'https?://[^?#&]+/', urlh.geturl()).group() + mpd_base_url = base_url(urlh.geturl()) return self._parse_mpd_formats( compat_etree_fromstring(mpd.encode('utf-8')), mpd_id, mpd_base_url, @@ -1797,7 +1798,7 @@ class InfoExtractor(object): if ism_doc.get('IsLive') == 'TRUE' or ism_doc.find('Protection') is not None: return [] - ism_base_url = re.match(r'https?://.+/', ism_url).group() + ism_base_url = base_url(ism_url) duration = int(ism_doc.attrib['Duration']) timescale = int_or_none(ism_doc.get('TimeScale')) or 10000000 diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 1a5ce8688..9595bcf9f 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -1691,6 +1691,10 @@ def url_basename(url): return path.strip('/').split('/')[-1] +def base_url(url): + return re.match(r'https?://[^?#&]+/', url).group() + + class HEADRequest(compat_urllib_request.Request): def get_method(self): return 'HEAD' From 1616f9b4525b8db229c162c21681e3c75abe4ce3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 2 Nov 2016 02:21:43 +0700 Subject: [PATCH 09/25] [extractor/common] Fix typo --- youtube_dl/extractor/common.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 140ccf234..7e01c5fbb 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1839,10 +1839,10 @@ class InfoExtractor(object): next_fragment_time = int(stream_fragment[stream_fragment_index + 1].attrib['t']) except IndexError: next_fragment_time = duration - fragment_ctx['duration'] = (next_fragment_time - frgament_time) / fragment_repeat + fragment_ctx['duration'] = (next_fragment_time - fragment_ctx['time']) / fragment_repeat for _ in range(fragment_repeat): fragments.append({ - 'url': re.sub(r'{start[ _]time}', str(fragment_ctx['time']), track_url_pattern), + 'url': re.sub(r'{start[ _]time}', compat_str(fragment_ctx['time']), track_url_pattern), 'duration': fragment_ctx['duration'] / stream_timescale, }) fragment_ctx['time'] += fragment_ctx['duration'] From a18aeee8030a8c4eb4c69107d181195b17d08fa2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 2 Nov 2016 02:33:17 +0700 Subject: [PATCH 10/25] [ChangeLog] Actualize --- ChangeLog | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/ChangeLog b/ChangeLog index f36e04d13..6f2a946d1 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,18 @@ +version + +Core ++ Add basic support for Smooth Streaming protocol (#8118, #10969) +* Improve MPD manifest base URL extraction (#10909, #11079) +* Fix --match-filter for int-like strings (#11082) + +Extractors ++ [mva] Add support for ISM formats ++ [msn] Add support for ISM formats ++ [onet] Add support for ISM formats ++ [tvp] Add support for ISM formats ++ [nicknight] Add support for nicknight sites (#10769) + + version 2016.10.30 Extractors From 3365ea8929e53955fa1dd46b2b30492619c17055 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 2 Nov 2016 02:34:23 +0700 Subject: [PATCH 11/25] [extractor/common] Remove unused code --- youtube_dl/extractor/common.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 7e01c5fbb..50841f0cf 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1798,8 +1798,6 @@ class InfoExtractor(object): if ism_doc.get('IsLive') == 'TRUE' or ism_doc.find('Protection') is not None: return [] - ism_base_url = base_url(ism_url) - duration = int(ism_doc.attrib['Duration']) timescale = int_or_none(ism_doc.get('TimeScale')) or 10000000 From 8956d6608a02f8745a8d619b0f697a95e98981c0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 2 Nov 2016 02:39:36 +0700 Subject: [PATCH 12/25] release 2016.11.02 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- docs/supportedsites.md | 1 + youtube_dl/version.py | 2 +- 4 files changed, 6 insertions(+), 5 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 3e020524b..975ea8700 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.10.31*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.10.31** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.11.02*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.11.02** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2016.10.31 +[debug] youtube-dl version 2016.11.02 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index 6f2a946d1..c80828512 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version +version 2016.11.02 Core + Add basic support for Smooth Streaming protocol (#8118, #10969) diff --git a/docs/supportedsites.md b/docs/supportedsites.md index e04fb6be2..7ed6b9006 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -483,6 +483,7 @@ - **nhl.com:videocenter:category**: NHL videocenter category - **nick.com** - **nick.de** + - **nicknight** - **niconico**: ニコニコ動画 - **NiconicoPlaylist** - **Nintendo** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 16274c22d..7cdd94f29 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2016.10.31' +__version__ = '2016.11.02' From cc99a77ac1c5fb5859d75589d49bb25361a1fb2b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 2 Nov 2016 03:01:13 +0700 Subject: [PATCH 13/25] [extractor/generic] Add support for ISM manifests --- ChangeLog | 6 ++++++ youtube_dl/extractor/generic.py | 6 ++++++ 2 files changed, 12 insertions(+) diff --git a/ChangeLog b/ChangeLog index c80828512..ec26e0c8d 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +version + +Extractors ++ [generic] Add support for ISM manifests + + version 2016.11.02 Core diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 15d1c0225..fc3d01eed 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -1634,6 +1634,10 @@ class GenericIE(InfoExtractor): doc = compat_etree_fromstring(webpage.encode('utf-8')) if doc.tag == 'rss': return self._extract_rss(url, video_id, doc) + elif doc.tag == 'SmoothStreamingMedia': + info_dict['formats'] = self._parse_ism_formats(doc, url) + self._sort_formats(info_dict['formats']) + return info_dict elif re.match(r'^(?:{[^}]+})?smil$', doc.tag): smil = self._parse_smil(doc, url, video_id) self._sort_formats(smil['formats']) @@ -2449,6 +2453,8 @@ class GenericIE(InfoExtractor): entry_info_dict['formats'] = self._extract_mpd_formats(video_url, video_id) elif ext == 'f4m': entry_info_dict['formats'] = self._extract_f4m_formats(video_url, video_id) + elif re.search(r'(?i)\.ism/manifest', video_url): + entry_info_dict['formats'] = self._extract_ism_formats(video_url, video_id) else: entry_info_dict['url'] = video_url From 4f9cd4d36fa88758cdff822f03879c6e0b6aa42d Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Wed, 2 Nov 2016 13:55:40 +0100 Subject: [PATCH 14/25] [radiocanada] extract subtitle(closes #11096) --- youtube_dl/extractor/radiocanada.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/youtube_dl/extractor/radiocanada.py b/youtube_dl/extractor/radiocanada.py index 6751270ee..321917ad0 100644 --- a/youtube_dl/extractor/radiocanada.py +++ b/youtube_dl/extractor/radiocanada.py @@ -125,6 +125,14 @@ class RadioCanadaIE(InfoExtractor): f4m_id='hds', fatal=False)) self._sort_formats(formats) + subtitles = {} + closed_caption_url = get_meta('closedCaption') or get_meta('closedCaptionHTML5') + if closed_caption_url: + subtitles['fr'] = [{ + 'url': closed_caption_url, + 'ext': determine_ext(closed_caption_url, 'vtt'), + }] + return { 'id': video_id, 'title': get_meta('Title'), @@ -135,6 +143,7 @@ class RadioCanadaIE(InfoExtractor): 'season_number': int_or_none('SrcSaison'), 'episode_number': int_or_none('SrcEpisode'), 'upload_date': unified_strdate(get_meta('Date')), + 'subtitles': subtitles, 'formats': formats, } From 26aae566902251f9674593a2b0f0ca7477b96a56 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 2 Nov 2016 23:34:37 +0700 Subject: [PATCH 15/25] [extractor/generic] Improve ISM extraction --- youtube_dl/extractor/generic.py | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index fc3d01eed..0bb263ce7 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -2453,8 +2453,21 @@ class GenericIE(InfoExtractor): entry_info_dict['formats'] = self._extract_mpd_formats(video_url, video_id) elif ext == 'f4m': entry_info_dict['formats'] = self._extract_f4m_formats(video_url, video_id) - elif re.search(r'(?i)\.ism/manifest', video_url): - entry_info_dict['formats'] = self._extract_ism_formats(video_url, video_id) + elif re.search(r'(?i)\.(?:ism|smil)/manifest', video_url): + # Just matching .ism/manifest is not enough to be reliably sure + # whether it's actually an ISM manifest or some other streaming + # manifest since there are various streaming URL formats + # possible (see [1]) as well as some other shenanigans like + # .smil/manifest URLs that actually serve an ISM (see [2]) and + # so on. + # Thus the most reasonable way to solve this is to delegate + # to generic extractor in order to look into the contents of + # the manifest itself. + # 1. https://azure.microsoft.com/en-us/documentation/articles/media-services-deliver-content-overview/#streaming-url-formats + # 2. https://svs.itworkscdn.net/lbcivod/smil:itwfcdn/lbci/170976.smil/Manifest + entry_info_dict = self.url_result( + smuggle_url(video_url, {'to_generic': True}), + GenericIE.ie_key()) else: entry_info_dict['url'] = video_url From 4119a96ce57b11437efd329a8c2602ee7fa7ea2c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 2 Nov 2016 23:43:41 +0700 Subject: [PATCH 16/25] [extractor/generic] Skip URLs we came from when delegating ISM extraction --- youtube_dl/extractor/generic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 0bb263ce7..a0a45dce0 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -2453,7 +2453,7 @@ class GenericIE(InfoExtractor): entry_info_dict['formats'] = self._extract_mpd_formats(video_url, video_id) elif ext == 'f4m': entry_info_dict['formats'] = self._extract_f4m_formats(video_url, video_id) - elif re.search(r'(?i)\.(?:ism|smil)/manifest', video_url): + elif re.search(r'(?i)\.(?:ism|smil)/manifest', video_url) and video_url != url: # Just matching .ism/manifest is not enough to be reliably sure # whether it's actually an ISM manifest or some other streaming # manifest since there are various streaming URL formats From 3b4b66b50c6605a7c878364e023776aa8ac7b8b8 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Thu, 3 Nov 2016 00:43:33 +0100 Subject: [PATCH 17/25] [shahid] add support for authentication(closes #11091) --- youtube_dl/extractor/shahid.py | 74 +++++++++++++++++++++++++++------- 1 file changed, 60 insertions(+), 14 deletions(-) diff --git a/youtube_dl/extractor/shahid.py b/youtube_dl/extractor/shahid.py index ca286abb1..62d41e88a 100644 --- a/youtube_dl/extractor/shahid.py +++ b/youtube_dl/extractor/shahid.py @@ -1,17 +1,24 @@ # coding: utf-8 from __future__ import unicode_literals +import re +import json + from .common import InfoExtractor +from ..compat import compat_HTTPError from ..utils import ( ExtractorError, int_or_none, parse_iso8601, str_or_none, + urlencode_postdata, + clean_html, ) class ShahidIE(InfoExtractor): - _VALID_URL = r'https?://shahid\.mbc\.net/ar/episode/(?P\d+)/?' + _NETRC_MACHINE = 'shahid' + _VALID_URL = r'https?://shahid\.mbc\.net/ar/(?Pepisode|movie)/(?P\d+)' _TESTS = [{ 'url': 'https://shahid.mbc.net/ar/episode/90574/%D8%A7%D9%84%D9%85%D9%84%D9%83-%D8%B9%D8%A8%D8%AF%D8%A7%D9%84%D9%84%D9%87-%D8%A7%D9%84%D8%A5%D9%86%D8%B3%D8%A7%D9%86-%D8%A7%D9%84%D9%85%D9%88%D8%B3%D9%85-1-%D9%83%D9%84%D9%8A%D8%A8-3.html', 'info_dict': { @@ -27,18 +34,54 @@ class ShahidIE(InfoExtractor): # m3u8 download 'skip_download': True, } + }, { + 'url': 'https://shahid.mbc.net/ar/movie/151746/%D8%A7%D9%84%D9%82%D9%86%D8%A7%D8%B5%D8%A9.html', + 'only_matching': True }, { # shahid plus subscriber only 'url': 'https://shahid.mbc.net/ar/episode/90511/%D9%85%D8%B1%D8%A7%D9%8A%D8%A7-2011-%D8%A7%D9%84%D9%85%D9%88%D8%B3%D9%85-1-%D8%A7%D9%84%D8%AD%D9%84%D9%82%D8%A9-1.html', 'only_matching': True }] - def _call_api(self, path, video_id, note): - data = self._download_json( - 'http://api.shahid.net/api/v1_1/' + path, video_id, note, query={ - 'apiKey': 'sh@hid0nlin3', - 'hash': 'b2wMCTHpSmyxGqQjJFOycRmLSex+BpTK/ooxy6vHaqs=', - }).get('data', {}) + def _real_initialize(self): + email, password = self._get_login_info() + if email is None: + return + + try: + user_data = self._download_json( + 'https://shahid.mbc.net/wd/service/users/login', + None, 'Logging in', data=json.dumps({ + 'email': email, + 'password': password, + 'basic': 'false', + }).encode('utf-8'), headers={ + 'Content-Type': 'application/json; charset=UTF-8', + })['user'] + except ExtractorError as e: + if isinstance(e.cause, compat_HTTPError): + fail_data = self._parse_json( + e.cause.read().decode('utf-8'), None, fatal=False) + if fail_data: + faults = fail_data.get('faults', []) + faults_message = ', '.join([clean_html(fault['userMessage']) for fault in faults if fault.get('userMessage')]) + if faults_message: + raise ExtractorError(faults_message, expected=True) + raise + + self._download_webpage( + 'https://shahid.mbc.net/populateContext', + None, 'Populate Context', data=urlencode_postdata({ + 'firstName': user_data['firstName'], + 'lastName': user_data['lastName'], + 'userName': user_data['email'], + 'csg_user_name': user_data['email'], + 'subscriberId': user_data['id'], + 'sessionId': user_data['sessionId'], + })) + + def _get_api_data(self, response): + data = response.get('data', {}) error = data.get('error') if error: @@ -49,11 +92,11 @@ class ShahidIE(InfoExtractor): return data def _real_extract(self, url): - video_id = self._match_id(url) + page_type, video_id = re.match(self._VALID_URL, url).groups() - player = self._call_api( - 'Content/Episode/%s' % video_id, - video_id, 'Downloading player JSON') + player = self._get_api_data(self._download_json( + 'https://shahid.mbc.net/arContent/getPlayerContent-param-.id-%s.type-player.html' % video_id, + video_id, 'Downloading player JSON')) if player.get('drm'): raise ExtractorError('This video is DRM protected.', expected=True) @@ -61,9 +104,12 @@ class ShahidIE(InfoExtractor): formats = self._extract_m3u8_formats(player['url'], video_id, 'mp4') self._sort_formats(formats) - video = self._call_api( - 'episode/%s' % video_id, video_id, - 'Downloading video JSON')['episode'] + video = self._get_api_data(self._download_json( + 'http://api.shahid.net/api/v1_1/%s/%s' % (page_type, video_id), + video_id, 'Downloading video JSON', query={ + 'apiKey': 'sh@hid0nlin3', + 'hash': 'b2wMCTHpSmyxGqQjJFOycRmLSex+BpTK/ooxy6vHaqs=', + }))[page_type] title = video['title'] categories = [ From f4dfa9a5ed0756c882a947bd455cc2488d51ffd7 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Thu, 3 Nov 2016 09:04:20 +0100 Subject: [PATCH 18/25] [tubitv] fix extraction(closes #11061) --- youtube_dl/extractor/tubitv.py | 39 ++++++++++++++++++---------------- 1 file changed, 21 insertions(+), 18 deletions(-) diff --git a/youtube_dl/extractor/tubitv.py b/youtube_dl/extractor/tubitv.py index c6572defb..3a37df2e8 100644 --- a/youtube_dl/extractor/tubitv.py +++ b/youtube_dl/extractor/tubitv.py @@ -9,7 +9,6 @@ from ..utils import ( int_or_none, sanitized_Request, urlencode_postdata, - parse_iso8601, ) @@ -19,17 +18,13 @@ class TubiTvIE(InfoExtractor): _NETRC_MACHINE = 'tubitv' _TEST = { 'url': 'http://tubitv.com/video/283829/the_comedian_at_the_friday', + 'md5': '43ac06be9326f41912dc64ccf7a80320', 'info_dict': { 'id': '283829', 'ext': 'mp4', 'title': 'The Comedian at The Friday', 'description': 'A stand up comedian is forced to look at the decisions in his life while on a one week trip to the west coast.', - 'uploader': 'Indie Rights Films', - 'upload_date': '20160111', - 'timestamp': 1452555979, - }, - 'params': { - 'skip_download': 'HLS download', + 'uploader_id': 'bc168bee0d18dd1cb3b86c68706ab434', }, } @@ -58,19 +53,28 @@ class TubiTvIE(InfoExtractor): video_id = self._match_id(url) video_data = self._download_json( 'http://tubitv.com/oz/videos/%s/content' % video_id, video_id) - title = video_data['n'] + title = video_data['title'] formats = self._extract_m3u8_formats( - video_data['mh'], video_id, 'mp4', 'm3u8_native') + self._proto_relative_url(video_data['url']), + video_id, 'mp4', 'm3u8_native') self._sort_formats(formats) + thumbnails = [] + for thumbnail_url in video_data.get('thumbnails', []): + if not thumbnail_url: + continue + thumbnails.append({ + 'url': self._proto_relative_url(thumbnail_url), + }) + subtitles = {} - for sub in video_data.get('sb', []): - sub_url = sub.get('u') + for sub in video_data.get('subtitles', []): + sub_url = sub.get('url') if not sub_url: continue - subtitles.setdefault(sub.get('l', 'en'), []).append({ - 'url': sub_url, + subtitles.setdefault(sub.get('lang', 'English'), []).append({ + 'url': self._proto_relative_url(sub_url), }) return { @@ -78,9 +82,8 @@ class TubiTvIE(InfoExtractor): 'title': title, 'formats': formats, 'subtitles': subtitles, - 'thumbnail': video_data.get('ph'), - 'description': video_data.get('d'), - 'duration': int_or_none(video_data.get('s')), - 'timestamp': parse_iso8601(video_data.get('u')), - 'uploader': video_data.get('on'), + 'thumbnails': thumbnails, + 'description': video_data.get('description'), + 'duration': int_or_none(video_data.get('duration')), + 'uploader_id': video_data.get('publisher_id'), } From b811b4c93bd21c5a053f7deddd3c0a3fe2486184 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Thu, 3 Nov 2016 10:37:07 +0100 Subject: [PATCH 19/25] [vice] add support for uplynk preplay videos(#11101) --- youtube_dl/extractor/vice.py | 124 +++++++++++++++++++++++++++---- youtube_dl/extractor/viceland.py | 82 +------------------- 2 files changed, 114 insertions(+), 92 deletions(-) diff --git a/youtube_dl/extractor/vice.py b/youtube_dl/extractor/vice.py index e2b2ce098..065c1fa82 100644 --- a/youtube_dl/extractor/vice.py +++ b/youtube_dl/extractor/vice.py @@ -1,12 +1,92 @@ from __future__ import unicode_literals import re +import time +import hashlib +import json +from .adobepass import AdobePassIE from .common import InfoExtractor -from ..utils import ExtractorError +from ..compat import compat_HTTPError +from ..utils import ( + int_or_none, + parse_age_limit, + str_or_none, + parse_duration, + ExtractorError, + extract_attributes, +) -class ViceIE(InfoExtractor): +class ViceBaseIE(AdobePassIE): + def _extract_preplay_video(self, url, webpage): + watch_hub_data = extract_attributes(self._search_regex( + r'(?s)()', webpage, 'watch hub')) + video_id = watch_hub_data['vms-id'] + title = watch_hub_data['video-title'] + + query = {} + is_locked = watch_hub_data.get('video-locked') == '1' + if is_locked: + resource = self._get_mvpd_resource( + 'VICELAND', title, video_id, + watch_hub_data.get('video-rating')) + query['tvetoken'] = self._extract_mvpd_auth(url, video_id, 'VICELAND', resource) + + # signature generation algorithm is reverse engineered from signatureGenerator in + # webpack:///../shared/~/vice-player/dist/js/vice-player.js in + # https://www.viceland.com/assets/common/js/web.vendor.bundle.js + exp = int(time.time()) + 14400 + query.update({ + 'exp': exp, + 'sign': hashlib.sha512(('%s:GET:%d' % (video_id, exp)).encode()).hexdigest(), + }) + + try: + host = 'www.viceland' if is_locked else self._PREPLAY_HOST + preplay = self._download_json('https://%s.com/en_us/preplay/%s' % (host, video_id), video_id, query=query) + except ExtractorError as e: + if isinstance(e.cause, compat_HTTPError) and e.cause.code == 400: + error = json.loads(e.cause.read().decode()) + raise ExtractorError('%s said: %s' % (self.IE_NAME, error['details']), expected=True) + raise + + video_data = preplay['video'] + base = video_data['base'] + uplynk_preplay_url = preplay['preplayURL'] + episode = video_data.get('episode', {}) + channel = video_data.get('channel', {}) + + subtitles = {} + cc_url = preplay.get('ccURL') + if cc_url: + subtitles['en'] = [{ + 'url': cc_url, + }] + + return { + '_type': 'url_transparent', + 'url': uplynk_preplay_url, + 'id': video_id, + 'title': title, + 'description': base.get('body'), + 'thumbnail': watch_hub_data.get('cover-image') or watch_hub_data.get('thumbnail'), + 'duration': parse_duration(video_data.get('video_duration') or watch_hub_data.get('video-duration')), + 'timestamp': int_or_none(video_data.get('created_at')), + 'age_limit': parse_age_limit(video_data.get('video_rating')), + 'series': video_data.get('show_title') or watch_hub_data.get('show-title'), + 'episode_number': int_or_none(episode.get('episode_number') or watch_hub_data.get('episode')), + 'episode_id': str_or_none(episode.get('id') or video_data.get('episode_id')), + 'season_number': int_or_none(watch_hub_data.get('season')), + 'season_id': str_or_none(episode.get('season_id')), + 'uploader': channel.get('base', {}).get('title') or watch_hub_data.get('channel-title'), + 'uploader_id': str_or_none(channel.get('id')), + 'subtitles': subtitles, + 'ie_key': 'UplynkPreplay', + } + + +class ViceIE(ViceBaseIE): _VALID_URL = r'https?://(?:.+?\.)?vice\.com/(?:[^/]+/)?videos?/(?P[^/?#&]+)' _TESTS = [{ @@ -21,7 +101,7 @@ class ViceIE(InfoExtractor): 'add_ie': ['Ooyala'], }, { 'url': 'http://www.vice.com/video/how-to-hack-a-car', - 'md5': '6fb2989a3fed069fb8eab3401fc2d3c9', + 'md5': 'a7ecf64ee4fa19b916c16f4b56184ae2', 'info_dict': { 'id': '3jstaBeXgAs', 'ext': 'mp4', @@ -32,6 +112,22 @@ class ViceIE(InfoExtractor): 'upload_date': '20140529', }, 'add_ie': ['Youtube'], + }, { + 'url': 'https://video.vice.com/en_us/video/the-signal-from-tolva/5816510690b70e6c5fd39a56', + 'md5': '', + 'info_dict': { + 'id': '5816510690b70e6c5fd39a56', + 'ext': 'mp4', + 'uploader': 'Waypoint', + 'title': 'The Signal From Tölva', + 'uploader_id': '57f7d621e05ca860fa9ccaf9', + 'timestamp': 1477941983938, + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, + 'add_ie': ['UplynkPreplay'], }, { 'url': 'https://news.vice.com/video/experimenting-on-animals-inside-the-monkey-lab', 'only_matching': True, @@ -42,21 +138,21 @@ class ViceIE(InfoExtractor): 'url': 'https://munchies.vice.com/en/videos/watch-the-trailer-for-our-new-series-the-pizza-show', 'only_matching': True, }] + _PREPLAY_HOST = 'video.vice' def _real_extract(self, url): video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - try: - embed_code = self._search_regex( - r'embedCode=([^&\'"]+)', webpage, - 'ooyala embed code', default=None) - if embed_code: - return self.url_result('ooyala:%s' % embed_code, 'Ooyala') - youtube_id = self._search_regex( - r'data-youtube-id="([^"]+)"', webpage, 'youtube id') + webpage, urlh = self._download_webpage_handle(url, video_id) + embed_code = self._search_regex( + r'embedCode=([^&\'"]+)', webpage, + 'ooyala embed code', default=None) + if embed_code: + return self.url_result('ooyala:%s' % embed_code, 'Ooyala') + youtube_id = self._search_regex( + r'data-youtube-id="([^"]+)"', webpage, 'youtube id', default=None) + if youtube_id: return self.url_result(youtube_id, 'Youtube') - except ExtractorError: - raise ExtractorError('The page doesn\'t contain a video', expected=True) + return self._extract_preplay_video(urlh.geturl(), webpage) class ViceShowIE(InfoExtractor): diff --git a/youtube_dl/extractor/viceland.py b/youtube_dl/extractor/viceland.py index 8742b607a..0eff055a6 100644 --- a/youtube_dl/extractor/viceland.py +++ b/youtube_dl/extractor/viceland.py @@ -1,23 +1,10 @@ # coding: utf-8 from __future__ import unicode_literals -import time -import hashlib -import json - -from .adobepass import AdobePassIE -from ..compat import compat_HTTPError -from ..utils import ( - int_or_none, - parse_age_limit, - str_or_none, - parse_duration, - ExtractorError, - extract_attributes, -) +from .vice import ViceBaseIE -class VicelandIE(AdobePassIE): +class VicelandIE(ViceBaseIE): _VALID_URL = r'https?://(?:www\.)?viceland\.com/[^/]+/video/[^/]+/(?P[a-f0-9]+)' _TEST = { 'url': 'https://www.viceland.com/en_us/video/cyberwar-trailer/57608447973ee7705f6fbd4e', @@ -38,70 +25,9 @@ class VicelandIE(AdobePassIE): }, 'add_ie': ['UplynkPreplay'], } + _PREPLAY_HOST = 'www.viceland' def _real_extract(self, url): video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - watch_hub_data = extract_attributes(self._search_regex( - r'(?s)()', webpage, 'watch hub')) - video_id = watch_hub_data['vms-id'] - title = watch_hub_data['video-title'] - - query = {} - if watch_hub_data.get('video-locked') == '1': - resource = self._get_mvpd_resource( - 'VICELAND', title, video_id, - watch_hub_data.get('video-rating')) - query['tvetoken'] = self._extract_mvpd_auth(url, video_id, 'VICELAND', resource) - - # signature generation algorithm is reverse engineered from signatureGenerator in - # webpack:///../shared/~/vice-player/dist/js/vice-player.js in - # https://www.viceland.com/assets/common/js/web.vendor.bundle.js - exp = int(time.time()) + 14400 - query.update({ - 'exp': exp, - 'sign': hashlib.sha512(('%s:GET:%d' % (video_id, exp)).encode()).hexdigest(), - }) - - try: - preplay = self._download_json('https://www.viceland.com/en_us/preplay/%s' % video_id, video_id, query=query) - except ExtractorError as e: - if isinstance(e.cause, compat_HTTPError) and e.cause.code == 400: - error = json.loads(e.cause.read().decode()) - raise ExtractorError('%s said: %s' % (self.IE_NAME, error['details']), expected=True) - raise - - video_data = preplay['video'] - base = video_data['base'] - uplynk_preplay_url = preplay['preplayURL'] - episode = video_data.get('episode', {}) - channel = video_data.get('channel', {}) - - subtitles = {} - cc_url = preplay.get('ccURL') - if cc_url: - subtitles['en'] = [{ - 'url': cc_url, - }] - - return { - '_type': 'url_transparent', - 'url': uplynk_preplay_url, - 'id': video_id, - 'title': title, - 'description': base.get('body'), - 'thumbnail': watch_hub_data.get('cover-image') or watch_hub_data.get('thumbnail'), - 'duration': parse_duration(video_data.get('video_duration') or watch_hub_data.get('video-duration')), - 'timestamp': int_or_none(video_data.get('created_at')), - 'age_limit': parse_age_limit(video_data.get('video_rating')), - 'series': video_data.get('show_title') or watch_hub_data.get('show-title'), - 'episode_number': int_or_none(episode.get('episode_number') or watch_hub_data.get('episode')), - 'episode_id': str_or_none(episode.get('id') or video_data.get('episode_id')), - 'season_number': int_or_none(watch_hub_data.get('season')), - 'season_id': str_or_none(episode.get('season_id')), - 'uploader': channel.get('base', {}).get('title') or watch_hub_data.get('channel-title'), - 'uploader_id': str_or_none(channel.get('id')), - 'subtitles': subtitles, - 'ie_key': 'UplynkPreplay', - } + return self._extract_preplay_video(url, webpage) From 3a86b2c51e508c11502fcf7b74c470c68fd5bcd6 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Thu, 3 Nov 2016 18:55:55 +0800 Subject: [PATCH 20/25] Ignore and clean .wav files --- .gitignore | 1 + Makefile | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index 002b700f5..354505d66 100644 --- a/.gitignore +++ b/.gitignore @@ -30,6 +30,7 @@ updates_key.pem *.m4v *.mp3 *.3gp +*.wav *.part *.swp test/testdata diff --git a/Makefile b/Makefile index 8d66e48c9..b7cec1666 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ all: youtube-dl README.md CONTRIBUTING.md README.txt youtube-dl.1 youtube-dl.bash-completion youtube-dl.zsh youtube-dl.fish supportedsites clean: - rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish youtube_dl/extractor/lazy_extractors.py *.dump *.part* *.info.json *.mp4 *.m4a *.flv *.mp3 *.avi *.mkv *.webm *.3gp *.jpg *.png CONTRIBUTING.md.tmp ISSUE_TEMPLATE.md.tmp youtube-dl youtube-dl.exe + rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish youtube_dl/extractor/lazy_extractors.py *.dump *.part* *.info.json *.mp4 *.m4a *.flv *.mp3 *.avi *.mkv *.webm *.3gp *.wav *.jpg *.png CONTRIBUTING.md.tmp ISSUE_TEMPLATE.md.tmp youtube-dl youtube-dl.exe find . -name "*.pyc" -delete find . -name "*.class" -delete From b47ecd0b74010cdd4b16049c33ef048b72cf6207 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Thu, 3 Nov 2016 12:50:25 +0100 Subject: [PATCH 21/25] [vzaar] Add new extractor(closes #11093) --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/vzaar.py | 55 ++++++++++++++++++++++++++++++ 2 files changed, 56 insertions(+) create mode 100644 youtube_dl/extractor/vzaar.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index f30ac5aaf..499239a22 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1101,6 +1101,7 @@ from .vrt import VRTIE from .vube import VubeIE from .vuclip import VuClipIE from .vyborymos import VyboryMosIE +from .vzaar import VzaarIE from .walla import WallaIE from .washingtonpost import ( WashingtonPostIE, diff --git a/youtube_dl/extractor/vzaar.py b/youtube_dl/extractor/vzaar.py new file mode 100644 index 000000000..b270f08d1 --- /dev/null +++ b/youtube_dl/extractor/vzaar.py @@ -0,0 +1,55 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( + int_or_none, + float_or_none, +) + + +class VzaarIE(InfoExtractor): + _VALID_URL = r'https?://(?:(?:www|view)\.)?vzaar\.com/(?:videos/)?(?P\d+)' + _TESTS = [{ + 'url': 'https://vzaar.com/videos/1152805', + 'md5': 'bde5ddfeb104a6c56a93a06b04901dbf', + 'info_dict': { + 'id': '1152805', + 'ext': 'mp4', + 'title': 'sample video (public)', + }, + }, { + 'url': 'https://view.vzaar.com/27272/player', + 'md5': '3b50012ac9bbce7f445550d54e0508f2', + 'info_dict': { + 'id': '27272', + 'ext': 'mp3', + 'title': 'MP3', + }, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + video_data = self._download_json( + 'http://view.vzaar.com/v2/%s/video' % video_id, video_id) + source_url = video_data['sourceUrl'] + + info = { + 'id': video_id, + 'title': video_data['videoTitle'], + 'url': source_url, + 'thumbnail': self._proto_relative_url(video_data.get('poster')), + 'duration': float_or_none(video_data.get('videoDuration')), + } + if 'audio' in source_url: + info.update({ + 'vcodec': 'none', + 'ext': 'mp3', + }) + else: + info.update({ + 'width': int_or_none(video_data.get('width')), + 'height': int_or_none(video_data.get('height')), + 'ext': 'mp4', + }) + return info From 22979993e7afd0b8792011ead3d64d3945703ce8 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Thu, 3 Nov 2016 16:07:22 +0100 Subject: [PATCH 22/25] [vice] add coding cookie --- youtube_dl/extractor/vice.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/vice.py b/youtube_dl/extractor/vice.py index 065c1fa82..8a00c8fee 100644 --- a/youtube_dl/extractor/vice.py +++ b/youtube_dl/extractor/vice.py @@ -1,3 +1,4 @@ +# coding: utf-8 from __future__ import unicode_literals import re From 10380e55def817f568b3e1b8e16a2133ded19124 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Thu, 3 Nov 2016 16:08:57 +0100 Subject: [PATCH 23/25] [downloader/ism] fix AVC Decoder Configuration Record creation in python 3 --- youtube_dl/downloader/ism.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/youtube_dl/downloader/ism.py b/youtube_dl/downloader/ism.py index 9f8f14b66..53f319cae 100644 --- a/youtube_dl/downloader/ism.py +++ b/youtube_dl/downloader/ism.py @@ -149,9 +149,7 @@ def write_piff_header(stream, params): if fourcc in ('H264', 'AVC1'): sps, pps = codec_private_data.split(u32.pack(1))[1:] avcc_payload = u8.pack(1) # configuration version - avcc_payload += sps[1] # avc profile indication - avcc_payload += sps[2] # profile compatibility - avcc_payload += sps[3] # avc level indication + avcc_payload += sps[1:4] # avc profile indication + profile compatibility + avc level indication avcc_payload += u8.pack(0xfc | (params.get('nal_unit_length_field', 4) - 1)) # complete represenation (1) + reserved (11111) + length size minus one avcc_payload += u8.pack(1) # reserved (0) + number of sps (0000001) avcc_payload += u16.pack(len(sps)) From 9d64e1dcdc610491217a3197f97e5bd8120d1974 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 3 Nov 2016 22:15:09 +0700 Subject: [PATCH 24/25] [downloader/ism] Fix typo --- youtube_dl/downloader/ism.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/youtube_dl/downloader/ism.py b/youtube_dl/downloader/ism.py index 53f319cae..93cac5e98 100644 --- a/youtube_dl/downloader/ism.py +++ b/youtube_dl/downloader/ism.py @@ -129,7 +129,7 @@ def write_piff_header(stream, params): sample_entry_payload += u1616.pack(params['sampling_rate']) if fourcc == 'AACL': - smaple_entry_box = box(b'mp4a', sample_entry_payload) + sample_entry_box = box(b'mp4a', sample_entry_payload) else: sample_entry_payload = sample_entry_payload sample_entry_payload += u16.pack(0) # pre defined @@ -158,8 +158,8 @@ def write_piff_header(stream, params): avcc_payload += u16.pack(len(pps)) avcc_payload += pps sample_entry_payload += box(b'avcC', avcc_payload) # AVC Decoder Configuration Record - smaple_entry_box = box(b'avc1', sample_entry_payload) # AVC Simple Entry - stsd_payload += smaple_entry_box + sample_entry_box = box(b'avc1', sample_entry_payload) # AVC Simple Entry + stsd_payload += sample_entry_box stbl_payload = full_box(b'stsd', 0, 0, stsd_payload) # Sample Description Box From 32f2627aed68ed2981f438067252b9921c4a39fe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 3 Nov 2016 22:22:40 +0700 Subject: [PATCH 25/25] [vodlocker] Add another removed file pattern (closes #11106) --- youtube_dl/extractor/vodlocker.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/vodlocker.py b/youtube_dl/extractor/vodlocker.py index c85b474d2..bbfa6e5f2 100644 --- a/youtube_dl/extractor/vodlocker.py +++ b/youtube_dl/extractor/vodlocker.py @@ -31,7 +31,8 @@ class VodlockerIE(InfoExtractor): if any(p in webpage for p in ( '>THIS FILE WAS DELETED<', '>File Not Found<', - 'The file you were looking for could not be found, sorry for any inconvenience.<')): + 'The file you were looking for could not be found, sorry for any inconvenience.<', + '>The file was removed')): raise ExtractorError('Video %s does not exist' % video_id, expected=True) fields = self._hidden_inputs(webpage)