Merge branch 'master' into adobepass

This commit is contained in:
raleeper 2016-11-03 08:36:40 -07:00
commit fbd6c321ee
27 changed files with 760 additions and 147 deletions

View File

@ -6,8 +6,8 @@
--- ---
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.10.31*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. ### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.11.02*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.10.31** - [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.11.02**
### Before submitting an *issue* make sure you have: ### Before submitting an *issue* make sure you have:
- [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
@ -35,7 +35,7 @@ $ youtube-dl -v <your command line>
[debug] User config: [] [debug] User config: []
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
[debug] youtube-dl version 2016.10.31 [debug] youtube-dl version 2016.11.02
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
[debug] Proxy map: {} [debug] Proxy map: {}

1
.gitignore vendored
View File

@ -30,6 +30,7 @@ updates_key.pem
*.m4v *.m4v
*.mp3 *.mp3
*.3gp *.3gp
*.wav
*.part *.part
*.swp *.swp
test/testdata test/testdata

View File

@ -1,3 +1,24 @@
version <unreleased>
Extractors
+ [generic] Add support for ISM manifests
version 2016.11.02
Core
+ Add basic support for Smooth Streaming protocol (#8118, #10969)
* Improve MPD manifest base URL extraction (#10909, #11079)
* Fix --match-filter for int-like strings (#11082)
Extractors
+ [mva] Add support for ISM formats
+ [msn] Add support for ISM formats
+ [onet] Add support for ISM formats
+ [tvp] Add support for ISM formats
+ [nicknight] Add support for nicknight sites (#10769)
version 2016.10.30 version 2016.10.30
Extractors Extractors

View File

@ -1,7 +1,7 @@
all: youtube-dl README.md CONTRIBUTING.md README.txt youtube-dl.1 youtube-dl.bash-completion youtube-dl.zsh youtube-dl.fish supportedsites all: youtube-dl README.md CONTRIBUTING.md README.txt youtube-dl.1 youtube-dl.bash-completion youtube-dl.zsh youtube-dl.fish supportedsites
clean: clean:
rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish youtube_dl/extractor/lazy_extractors.py *.dump *.part* *.info.json *.mp4 *.m4a *.flv *.mp3 *.avi *.mkv *.webm *.3gp *.jpg *.png CONTRIBUTING.md.tmp ISSUE_TEMPLATE.md.tmp youtube-dl youtube-dl.exe rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish youtube_dl/extractor/lazy_extractors.py *.dump *.part* *.info.json *.mp4 *.m4a *.flv *.mp3 *.avi *.mkv *.webm *.3gp *.wav *.jpg *.png CONTRIBUTING.md.tmp ISSUE_TEMPLATE.md.tmp youtube-dl youtube-dl.exe
find . -name "*.pyc" -delete find . -name "*.pyc" -delete
find . -name "*.class" -delete find . -name "*.class" -delete

View File

@ -483,6 +483,7 @@
- **nhl.com:videocenter:category**: NHL videocenter category - **nhl.com:videocenter:category**: NHL videocenter category
- **nick.com** - **nick.com**
- **nick.de** - **nick.de**
- **nicknight**
- **niconico**: ニコニコ動画 - **niconico**: ニコニコ動画
- **NiconicoPlaylist** - **NiconicoPlaylist**
- **Nintendo** - **Nintendo**

View File

@ -605,6 +605,7 @@ class TestYoutubeDL(unittest.TestCase):
'extractor': 'TEST', 'extractor': 'TEST',
'duration': 30, 'duration': 30,
'filesize': 10 * 1024, 'filesize': 10 * 1024,
'playlist_id': '42',
} }
second = { second = {
'id': '2', 'id': '2',
@ -614,6 +615,7 @@ class TestYoutubeDL(unittest.TestCase):
'duration': 10, 'duration': 10,
'description': 'foo', 'description': 'foo',
'filesize': 5 * 1024, 'filesize': 5 * 1024,
'playlist_id': '43',
} }
videos = [first, second] videos = [first, second]
@ -650,6 +652,10 @@ class TestYoutubeDL(unittest.TestCase):
res = get_videos(f) res = get_videos(f)
self.assertEqual(res, ['1']) self.assertEqual(res, ['1'])
f = match_filter_func('playlist_id = 42')
res = get_videos(f)
self.assertEqual(res, ['1'])
def test_playlist_items_selection(self): def test_playlist_items_selection(self):
entries = [{ entries = [{
'id': compat_str(i), 'id': compat_str(i),

View File

@ -69,6 +69,7 @@ from youtube_dl.utils import (
uppercase_escape, uppercase_escape,
lowercase_escape, lowercase_escape,
url_basename, url_basename,
base_url,
urlencode_postdata, urlencode_postdata,
urshift, urshift,
update_url_query, update_url_query,
@ -437,6 +438,13 @@ class TestUtil(unittest.TestCase):
url_basename('http://media.w3.org/2010/05/sintel/trailer.mp4'), url_basename('http://media.w3.org/2010/05/sintel/trailer.mp4'),
'trailer.mp4') 'trailer.mp4')
def test_base_url(self):
self.assertEqual(base_url('http://foo.de/'), 'http://foo.de/')
self.assertEqual(base_url('http://foo.de/bar'), 'http://foo.de/')
self.assertEqual(base_url('http://foo.de/bar/'), 'http://foo.de/bar/')
self.assertEqual(base_url('http://foo.de/bar/baz'), 'http://foo.de/bar/')
self.assertEqual(base_url('http://foo.de/bar/baz?x=z/x/c'), 'http://foo.de/bar/')
def test_parse_age_limit(self): def test_parse_age_limit(self):
self.assertEqual(parse_age_limit(None), None) self.assertEqual(parse_age_limit(None), None)
self.assertEqual(parse_age_limit(False), None) self.assertEqual(parse_age_limit(False), None)

View File

@ -1658,7 +1658,7 @@ class YoutubeDL(object):
video_ext, audio_ext = audio.get('ext'), video.get('ext') video_ext, audio_ext = audio.get('ext'), video.get('ext')
if video_ext and audio_ext: if video_ext and audio_ext:
COMPATIBLE_EXTS = ( COMPATIBLE_EXTS = (
('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v'), ('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma'),
('webm') ('webm')
) )
for exts in COMPATIBLE_EXTS: for exts in COMPATIBLE_EXTS:

View File

@ -7,6 +7,7 @@ from .http import HttpFD
from .rtmp import RtmpFD from .rtmp import RtmpFD
from .dash import DashSegmentsFD from .dash import DashSegmentsFD
from .rtsp import RtspFD from .rtsp import RtspFD
from .ism import IsmFD
from .external import ( from .external import (
get_external_downloader, get_external_downloader,
FFmpegFD, FFmpegFD,
@ -24,6 +25,7 @@ PROTOCOL_MAP = {
'rtsp': RtspFD, 'rtsp': RtspFD,
'f4m': F4mFD, 'f4m': F4mFD,
'http_dash_segments': DashSegmentsFD, 'http_dash_segments': DashSegmentsFD,
'ism': IsmFD,
} }

View File

@ -0,0 +1,271 @@
from __future__ import unicode_literals
import os
import time
import struct
import binascii
import io
from .fragment import FragmentFD
from ..compat import compat_urllib_error
from ..utils import (
sanitize_open,
encodeFilename,
)
u8 = struct.Struct(b'>B')
u88 = struct.Struct(b'>Bx')
u16 = struct.Struct(b'>H')
u1616 = struct.Struct(b'>Hxx')
u32 = struct.Struct(b'>I')
u64 = struct.Struct(b'>Q')
s88 = struct.Struct(b'>bx')
s16 = struct.Struct(b'>h')
s1616 = struct.Struct(b'>hxx')
s32 = struct.Struct(b'>i')
unity_matrix = (s32.pack(0x10000) + s32.pack(0) * 3) * 2 + s32.pack(0x40000000)
TRACK_ENABLED = 0x1
TRACK_IN_MOVIE = 0x2
TRACK_IN_PREVIEW = 0x4
SELF_CONTAINED = 0x1
def box(box_type, payload):
return u32.pack(8 + len(payload)) + box_type + payload
def full_box(box_type, version, flags, payload):
return box(box_type, u8.pack(version) + u32.pack(flags)[1:] + payload)
def write_piff_header(stream, params):
track_id = params['track_id']
fourcc = params['fourcc']
duration = params['duration']
timescale = params.get('timescale', 10000000)
language = params.get('language', 'und')
height = params.get('height', 0)
width = params.get('width', 0)
is_audio = width == 0 and height == 0
creation_time = modification_time = int(time.time())
ftyp_payload = b'isml' # major brand
ftyp_payload += u32.pack(1) # minor version
ftyp_payload += b'piff' + b'iso2' # compatible brands
stream.write(box(b'ftyp', ftyp_payload)) # File Type Box
mvhd_payload = u64.pack(creation_time)
mvhd_payload += u64.pack(modification_time)
mvhd_payload += u32.pack(timescale)
mvhd_payload += u64.pack(duration)
mvhd_payload += s1616.pack(1) # rate
mvhd_payload += s88.pack(1) # volume
mvhd_payload += u16.pack(0) # reserved
mvhd_payload += u32.pack(0) * 2 # reserved
mvhd_payload += unity_matrix
mvhd_payload += u32.pack(0) * 6 # pre defined
mvhd_payload += u32.pack(0xffffffff) # next track id
moov_payload = full_box(b'mvhd', 1, 0, mvhd_payload) # Movie Header Box
tkhd_payload = u64.pack(creation_time)
tkhd_payload += u64.pack(modification_time)
tkhd_payload += u32.pack(track_id) # track id
tkhd_payload += u32.pack(0) # reserved
tkhd_payload += u64.pack(duration)
tkhd_payload += u32.pack(0) * 2 # reserved
tkhd_payload += s16.pack(0) # layer
tkhd_payload += s16.pack(0) # alternate group
tkhd_payload += s88.pack(1 if is_audio else 0) # volume
tkhd_payload += u16.pack(0) # reserved
tkhd_payload += unity_matrix
tkhd_payload += u1616.pack(width)
tkhd_payload += u1616.pack(height)
trak_payload = full_box(b'tkhd', 1, TRACK_ENABLED | TRACK_IN_MOVIE | TRACK_IN_PREVIEW, tkhd_payload) # Track Header Box
mdhd_payload = u64.pack(creation_time)
mdhd_payload += u64.pack(modification_time)
mdhd_payload += u32.pack(timescale)
mdhd_payload += u64.pack(duration)
mdhd_payload += u16.pack(((ord(language[0]) - 0x60) << 10) | ((ord(language[1]) - 0x60) << 5) | (ord(language[2]) - 0x60))
mdhd_payload += u16.pack(0) # pre defined
mdia_payload = full_box(b'mdhd', 1, 0, mdhd_payload) # Media Header Box
hdlr_payload = u32.pack(0) # pre defined
hdlr_payload += b'soun' if is_audio else b'vide' # handler type
hdlr_payload += u32.pack(0) * 3 # reserved
hdlr_payload += (b'Sound' if is_audio else b'Video') + b'Handler\0' # name
mdia_payload += full_box(b'hdlr', 0, 0, hdlr_payload) # Handler Reference Box
if is_audio:
smhd_payload = s88.pack(0) # balance
smhd_payload = u16.pack(0) # reserved
media_header_box = full_box(b'smhd', 0, 0, smhd_payload) # Sound Media Header
else:
vmhd_payload = u16.pack(0) # graphics mode
vmhd_payload += u16.pack(0) * 3 # opcolor
media_header_box = full_box(b'vmhd', 0, 1, vmhd_payload) # Video Media Header
minf_payload = media_header_box
dref_payload = u32.pack(1) # entry count
dref_payload += full_box(b'url ', 0, SELF_CONTAINED, b'') # Data Entry URL Box
dinf_payload = full_box(b'dref', 0, 0, dref_payload) # Data Reference Box
minf_payload += box(b'dinf', dinf_payload) # Data Information Box
stsd_payload = u32.pack(1) # entry count
sample_entry_payload = u8.pack(0) * 6 # reserved
sample_entry_payload += u16.pack(1) # data reference index
if is_audio:
sample_entry_payload += u32.pack(0) * 2 # reserved
sample_entry_payload += u16.pack(params.get('channels', 2))
sample_entry_payload += u16.pack(params.get('bits_per_sample', 16))
sample_entry_payload += u16.pack(0) # pre defined
sample_entry_payload += u16.pack(0) # reserved
sample_entry_payload += u1616.pack(params['sampling_rate'])
if fourcc == 'AACL':
sample_entry_box = box(b'mp4a', sample_entry_payload)
else:
sample_entry_payload = sample_entry_payload
sample_entry_payload += u16.pack(0) # pre defined
sample_entry_payload += u16.pack(0) # reserved
sample_entry_payload += u32.pack(0) * 3 # pre defined
sample_entry_payload += u16.pack(width)
sample_entry_payload += u16.pack(height)
sample_entry_payload += u1616.pack(0x48) # horiz resolution 72 dpi
sample_entry_payload += u1616.pack(0x48) # vert resolution 72 dpi
sample_entry_payload += u32.pack(0) # reserved
sample_entry_payload += u16.pack(1) # frame count
sample_entry_payload += u8.pack(0) * 32 # compressor name
sample_entry_payload += u16.pack(0x18) # depth
sample_entry_payload += s16.pack(-1) # pre defined
codec_private_data = binascii.unhexlify(params['codec_private_data'])
if fourcc in ('H264', 'AVC1'):
sps, pps = codec_private_data.split(u32.pack(1))[1:]
avcc_payload = u8.pack(1) # configuration version
avcc_payload += sps[1:4] # avc profile indication + profile compatibility + avc level indication
avcc_payload += u8.pack(0xfc | (params.get('nal_unit_length_field', 4) - 1)) # complete represenation (1) + reserved (11111) + length size minus one
avcc_payload += u8.pack(1) # reserved (0) + number of sps (0000001)
avcc_payload += u16.pack(len(sps))
avcc_payload += sps
avcc_payload += u8.pack(1) # number of pps
avcc_payload += u16.pack(len(pps))
avcc_payload += pps
sample_entry_payload += box(b'avcC', avcc_payload) # AVC Decoder Configuration Record
sample_entry_box = box(b'avc1', sample_entry_payload) # AVC Simple Entry
stsd_payload += sample_entry_box
stbl_payload = full_box(b'stsd', 0, 0, stsd_payload) # Sample Description Box
stts_payload = u32.pack(0) # entry count
stbl_payload += full_box(b'stts', 0, 0, stts_payload) # Decoding Time to Sample Box
stsc_payload = u32.pack(0) # entry count
stbl_payload += full_box(b'stsc', 0, 0, stsc_payload) # Sample To Chunk Box
stco_payload = u32.pack(0) # entry count
stbl_payload += full_box(b'stco', 0, 0, stco_payload) # Chunk Offset Box
minf_payload += box(b'stbl', stbl_payload) # Sample Table Box
mdia_payload += box(b'minf', minf_payload) # Media Information Box
trak_payload += box(b'mdia', mdia_payload) # Media Box
moov_payload += box(b'trak', trak_payload) # Track Box
mehd_payload = u64.pack(duration)
mvex_payload = full_box(b'mehd', 1, 0, mehd_payload) # Movie Extends Header Box
trex_payload = u32.pack(track_id) # track id
trex_payload += u32.pack(1) # default sample description index
trex_payload += u32.pack(0) # default sample duration
trex_payload += u32.pack(0) # default sample size
trex_payload += u32.pack(0) # default sample flags
mvex_payload += full_box(b'trex', 0, 0, trex_payload) # Track Extends Box
moov_payload += box(b'mvex', mvex_payload) # Movie Extends Box
stream.write(box(b'moov', moov_payload)) # Movie Box
def extract_box_data(data, box_sequence):
data_reader = io.BytesIO(data)
while True:
box_size = u32.unpack(data_reader.read(4))[0]
box_type = data_reader.read(4)
if box_type == box_sequence[0]:
box_data = data_reader.read(box_size - 8)
if len(box_sequence) == 1:
return box_data
return extract_box_data(box_data, box_sequence[1:])
data_reader.seek(box_size - 8, 1)
class IsmFD(FragmentFD):
"""
Download segments in a ISM manifest
"""
FD_NAME = 'ism'
def real_download(self, filename, info_dict):
segments = info_dict['fragments'][:1] if self.params.get(
'test', False) else info_dict['fragments']
ctx = {
'filename': filename,
'total_frags': len(segments),
}
self._prepare_and_start_frag_download(ctx)
segments_filenames = []
fragment_retries = self.params.get('fragment_retries', 0)
skip_unavailable_fragments = self.params.get('skip_unavailable_fragments', True)
track_written = False
for i, segment in enumerate(segments):
segment_url = segment['url']
segment_name = 'Frag%d' % i
target_filename = '%s-%s' % (ctx['tmpfilename'], segment_name)
count = 0
while count <= fragment_retries:
try:
success = ctx['dl'].download(target_filename, {'url': segment_url})
if not success:
return False
down, target_sanitized = sanitize_open(target_filename, 'rb')
down_data = down.read()
if not track_written:
tfhd_data = extract_box_data(down_data, [b'moof', b'traf', b'tfhd'])
info_dict['_download_params']['track_id'] = u32.unpack(tfhd_data[4:8])[0]
write_piff_header(ctx['dest_stream'], info_dict['_download_params'])
track_written = True
ctx['dest_stream'].write(down_data)
down.close()
segments_filenames.append(target_sanitized)
break
except compat_urllib_error.HTTPError as err:
count += 1
if count <= fragment_retries:
self.report_retry_fragment(err, segment_name, count, fragment_retries)
if count > fragment_retries:
if skip_unavailable_fragments:
self.report_skip_fragment(segment_name)
continue
self.report_error('giving up after %s fragment retries' % fragment_retries)
return False
self._finish_frag_download(ctx)
for segment_file in segments_filenames:
os.remove(encodeFilename(segment_file))
return True

View File

@ -30,6 +30,7 @@ from ..downloader.f4m import remove_encrypted_media
from ..utils import ( from ..utils import (
NO_DEFAULT, NO_DEFAULT,
age_restricted, age_restricted,
base_url,
bug_reports_message, bug_reports_message,
clean_html, clean_html,
compiled_regex_type, compiled_regex_type,
@ -1539,7 +1540,7 @@ class InfoExtractor(object):
if res is False: if res is False:
return [] return []
mpd, urlh = res mpd, urlh = res
mpd_base_url = re.match(r'https?://.+/', urlh.geturl()).group() mpd_base_url = base_url(urlh.geturl())
return self._parse_mpd_formats( return self._parse_mpd_formats(
compat_etree_fromstring(mpd.encode('utf-8')), mpd_id, mpd_base_url, compat_etree_fromstring(mpd.encode('utf-8')), mpd_id, mpd_base_url,
@ -1780,6 +1781,105 @@ class InfoExtractor(object):
self.report_warning('Unknown MIME type %s in DASH manifest' % mime_type) self.report_warning('Unknown MIME type %s in DASH manifest' % mime_type)
return formats return formats
def _extract_ism_formats(self, ism_url, video_id, ism_id=None, note=None, errnote=None, fatal=True):
res = self._download_webpage_handle(
ism_url, video_id,
note=note or 'Downloading ISM manifest',
errnote=errnote or 'Failed to download ISM manifest',
fatal=fatal)
if res is False:
return []
ism, urlh = res
return self._parse_ism_formats(
compat_etree_fromstring(ism.encode('utf-8')), urlh.geturl(), ism_id)
def _parse_ism_formats(self, ism_doc, ism_url, ism_id=None):
if ism_doc.get('IsLive') == 'TRUE' or ism_doc.find('Protection') is not None:
return []
duration = int(ism_doc.attrib['Duration'])
timescale = int_or_none(ism_doc.get('TimeScale')) or 10000000
formats = []
for stream in ism_doc.findall('StreamIndex'):
stream_type = stream.get('Type')
if stream_type not in ('video', 'audio'):
continue
url_pattern = stream.attrib['Url']
stream_timescale = int_or_none(stream.get('TimeScale')) or timescale
stream_name = stream.get('Name')
for track in stream.findall('QualityLevel'):
fourcc = track.get('FourCC')
# TODO: add support for WVC1 and WMAP
if fourcc not in ('H264', 'AVC1', 'AACL'):
self.report_warning('%s is not a supported codec' % fourcc)
continue
tbr = int(track.attrib['Bitrate']) // 1000
width = int_or_none(track.get('MaxWidth'))
height = int_or_none(track.get('MaxHeight'))
sampling_rate = int_or_none(track.get('SamplingRate'))
track_url_pattern = re.sub(r'{[Bb]itrate}', track.attrib['Bitrate'], url_pattern)
track_url_pattern = compat_urlparse.urljoin(ism_url, track_url_pattern)
fragments = []
fragment_ctx = {
'time': 0,
}
stream_fragments = stream.findall('c')
for stream_fragment_index, stream_fragment in enumerate(stream_fragments):
fragment_ctx['time'] = int_or_none(stream_fragment.get('t')) or fragment_ctx['time']
fragment_repeat = int_or_none(stream_fragment.get('r')) or 1
fragment_ctx['duration'] = int_or_none(stream_fragment.get('d'))
if not fragment_ctx['duration']:
try:
next_fragment_time = int(stream_fragment[stream_fragment_index + 1].attrib['t'])
except IndexError:
next_fragment_time = duration
fragment_ctx['duration'] = (next_fragment_time - fragment_ctx['time']) / fragment_repeat
for _ in range(fragment_repeat):
fragments.append({
'url': re.sub(r'{start[ _]time}', compat_str(fragment_ctx['time']), track_url_pattern),
'duration': fragment_ctx['duration'] / stream_timescale,
})
fragment_ctx['time'] += fragment_ctx['duration']
format_id = []
if ism_id:
format_id.append(ism_id)
if stream_name:
format_id.append(stream_name)
format_id.append(compat_str(tbr))
formats.append({
'format_id': '-'.join(format_id),
'url': ism_url,
'manifest_url': ism_url,
'ext': 'ismv' if stream_type == 'video' else 'isma',
'width': width,
'height': height,
'tbr': tbr,
'asr': sampling_rate,
'vcodec': 'none' if stream_type == 'audio' else fourcc,
'acodec': 'none' if stream_type == 'video' else fourcc,
'protocol': 'ism',
'fragments': fragments,
'_download_params': {
'duration': duration,
'timescale': stream_timescale,
'width': width or 0,
'height': height or 0,
'fourcc': fourcc,
'codec_private_data': track.get('CodecPrivateData'),
'sampling_rate': sampling_rate,
'channels': int_or_none(track.get('Channels', 2)),
'bits_per_sample': int_or_none(track.get('BitsPerSample', 16)),
'nal_unit_length_field': int_or_none(track.get('NALUnitLengthField', 4)),
},
})
return formats
def _parse_html5_media_entries(self, base_url, webpage, video_id, m3u8_id=None, m3u8_entry_protocol='m3u8'): def _parse_html5_media_entries(self, base_url, webpage, video_id, m3u8_id=None, m3u8_entry_protocol='m3u8'):
def absolute_url(video_url): def absolute_url(video_url):
return compat_urlparse.urljoin(base_url, video_url) return compat_urlparse.urljoin(base_url, video_url)

View File

@ -596,6 +596,7 @@ from .nhl import (
from .nick import ( from .nick import (
NickIE, NickIE,
NickDeIE, NickDeIE,
NickNightIE,
) )
from .niconico import NiconicoIE, NiconicoPlaylistIE from .niconico import NiconicoIE, NiconicoPlaylistIE
from .ninecninemedia import ( from .ninecninemedia import (
@ -1100,6 +1101,7 @@ from .vrt import VRTIE
from .vube import VubeIE from .vube import VubeIE
from .vuclip import VuClipIE from .vuclip import VuClipIE
from .vyborymos import VyboryMosIE from .vyborymos import VyboryMosIE
from .vzaar import VzaarIE
from .walla import WallaIE from .walla import WallaIE
from .washingtonpost import ( from .washingtonpost import (
WashingtonPostIE, WashingtonPostIE,

View File

@ -1634,6 +1634,10 @@ class GenericIE(InfoExtractor):
doc = compat_etree_fromstring(webpage.encode('utf-8')) doc = compat_etree_fromstring(webpage.encode('utf-8'))
if doc.tag == 'rss': if doc.tag == 'rss':
return self._extract_rss(url, video_id, doc) return self._extract_rss(url, video_id, doc)
elif doc.tag == 'SmoothStreamingMedia':
info_dict['formats'] = self._parse_ism_formats(doc, url)
self._sort_formats(info_dict['formats'])
return info_dict
elif re.match(r'^(?:{[^}]+})?smil$', doc.tag): elif re.match(r'^(?:{[^}]+})?smil$', doc.tag):
smil = self._parse_smil(doc, url, video_id) smil = self._parse_smil(doc, url, video_id)
self._sort_formats(smil['formats']) self._sort_formats(smil['formats'])
@ -2449,6 +2453,21 @@ class GenericIE(InfoExtractor):
entry_info_dict['formats'] = self._extract_mpd_formats(video_url, video_id) entry_info_dict['formats'] = self._extract_mpd_formats(video_url, video_id)
elif ext == 'f4m': elif ext == 'f4m':
entry_info_dict['formats'] = self._extract_f4m_formats(video_url, video_id) entry_info_dict['formats'] = self._extract_f4m_formats(video_url, video_id)
elif re.search(r'(?i)\.(?:ism|smil)/manifest', video_url) and video_url != url:
# Just matching .ism/manifest is not enough to be reliably sure
# whether it's actually an ISM manifest or some other streaming
# manifest since there are various streaming URL formats
# possible (see [1]) as well as some other shenanigans like
# .smil/manifest URLs that actually serve an ISM (see [2]) and
# so on.
# Thus the most reasonable way to solve this is to delegate
# to generic extractor in order to look into the contents of
# the manifest itself.
# 1. https://azure.microsoft.com/en-us/documentation/articles/media-services-deliver-content-overview/#streaming-url-formats
# 2. https://svs.itworkscdn.net/lbcivod/smil:itwfcdn/lbci/170976.smil/Manifest
entry_info_dict = self.url_result(
smuggle_url(video_url, {'to_generic': True}),
GenericIE.ie_key())
else: else:
entry_info_dict['url'] = video_url entry_info_dict['url'] = video_url

View File

@ -71,12 +71,15 @@ class MicrosoftVirtualAcademyIE(MicrosoftVirtualAcademyBaseIE):
formats = [] formats = []
for sources in settings.findall(compat_xpath('.//MediaSources')): for sources in settings.findall(compat_xpath('.//MediaSources')):
if sources.get('videoType') == 'smoothstreaming': sources_type = sources.get('videoType')
continue
for source in sources.findall(compat_xpath('./MediaSource')): for source in sources.findall(compat_xpath('./MediaSource')):
video_url = source.text video_url = source.text
if not video_url or not video_url.startswith('http'): if not video_url or not video_url.startswith('http'):
continue continue
if sources_type == 'smoothstreaming':
formats.extend(self._extract_ism_formats(
video_url, video_id, 'mss', fatal=False))
continue
video_mode = source.get('videoMode') video_mode = source.get('videoMode')
height = int_or_none(self._search_regex( height = int_or_none(self._search_regex(
r'^(\d+)[pP]$', video_mode or '', 'height', default=None)) r'^(\d+)[pP]$', video_mode or '', 'height', default=None))

View File

@ -69,10 +69,9 @@ class MSNIE(InfoExtractor):
if not format_url: if not format_url:
continue continue
ext = determine_ext(format_url) ext = determine_ext(format_url)
# .ism is not yet supported (see
# https://github.com/rg3/youtube-dl/issues/8118)
if ext == 'ism': if ext == 'ism':
continue formats.extend(self._extract_ism_formats(
format_url + '/Manifest', display_id, 'mss', fatal=False))
if 'm3u8' in format_url: if 'm3u8' in format_url:
# m3u8_native should not be used here until # m3u8_native should not be used here until
# https://github.com/rg3/youtube-dl/issues/9913 is fixed # https://github.com/rg3/youtube-dl/issues/9913 is fixed

View File

@ -1,6 +1,8 @@
# coding: utf-8 # coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import re
from .mtv import MTVServicesInfoExtractor from .mtv import MTVServicesInfoExtractor
from ..utils import update_url_query from ..utils import update_url_query
@ -69,7 +71,7 @@ class NickIE(MTVServicesInfoExtractor):
class NickDeIE(MTVServicesInfoExtractor): class NickDeIE(MTVServicesInfoExtractor):
IE_NAME = 'nick.de' IE_NAME = 'nick.de'
_VALID_URL = r'https?://(?:www\.)?(?:nick\.de|nickelodeon\.nl)/(?:playlist|shows)/(?:[^/]+/)*(?P<id>[^/?#&]+)' _VALID_URL = r'https?://(?:www\.)?(?P<host>nick\.de|nickelodeon\.(?:nl|at))/(?:playlist|shows)/(?:[^/]+/)*(?P<id>[^/?#&]+)'
_TESTS = [{ _TESTS = [{
'url': 'http://www.nick.de/playlist/3773-top-videos/videos/episode/17306-zu-wasser-und-zu-land-rauchende-erdnusse', 'url': 'http://www.nick.de/playlist/3773-top-videos/videos/episode/17306-zu-wasser-und-zu-land-rauchende-erdnusse',
'only_matching': True, 'only_matching': True,
@ -79,15 +81,43 @@ class NickDeIE(MTVServicesInfoExtractor):
}, { }, {
'url': 'http://www.nickelodeon.nl/shows/474-spongebob/videos/17403-een-kijkje-in-de-keuken-met-sandy-van-binnenuit', 'url': 'http://www.nickelodeon.nl/shows/474-spongebob/videos/17403-een-kijkje-in-de-keuken-met-sandy-van-binnenuit',
'only_matching': True, 'only_matching': True,
}, {
'url': 'http://www.nickelodeon.at/playlist/3773-top-videos/videos/episode/77993-das-letzte-gefecht',
'only_matching': True,
}] }]
def _extract_mrss_url(self, webpage, host):
return update_url_query(self._search_regex(
r'data-mrss=(["\'])(?P<url>http.+?)\1', webpage, 'mrss url', group='url'),
{'siteKey': host})
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
host = mobj.group('host')
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
mrss_url = update_url_query(self._search_regex( mrss_url = self._extract_mrss_url(webpage, host)
r'data-mrss=(["\'])(?P<url>http.+?)\1', webpage, 'mrss url', group='url'),
{'siteKey': 'nick.de'})
return self._get_videos_info_from_url(mrss_url, video_id) return self._get_videos_info_from_url(mrss_url, video_id)
class NickNightIE(NickDeIE):
IE_NAME = 'nicknight'
_VALID_URL = r'https?://(?:www\.)(?P<host>nicknight\.(?:de|at|tv))/(?:playlist|shows)/(?:[^/]+/)*(?P<id>[^/?#&]+)'
_TESTS = [{
'url': 'http://www.nicknight.at/shows/977-awkward/videos/85987-nimmer-beste-freunde',
'only_matching': True,
}, {
'url': 'http://www.nicknight.at/shows/977-awkward',
'only_matching': True,
}, {
'url': 'http://www.nicknight.at/shows/1900-faking-it',
'only_matching': True,
}]
def _extract_mrss_url(self, webpage, *args):
return self._search_regex(
r'mrss\s*:\s*(["\'])(?P<url>http.+?)\1', webpage,
'mrss url', group='url')

View File

@ -56,8 +56,8 @@ class OnetBaseIE(InfoExtractor):
continue continue
ext = determine_ext(video_url) ext = determine_ext(video_url)
if format_id == 'ism': if format_id == 'ism':
# TODO: Support Microsoft Smooth Streaming formats.extend(self._extract_ism_formats(
continue video_url, video_id, 'mss', fatal=False))
elif ext == 'mpd': elif ext == 'mpd':
formats.extend(self._extract_mpd_formats( formats.extend(self._extract_mpd_formats(
video_url, video_id, mpd_id='dash', fatal=False)) video_url, video_id, mpd_id='dash', fatal=False))

View File

@ -125,6 +125,14 @@ class RadioCanadaIE(InfoExtractor):
f4m_id='hds', fatal=False)) f4m_id='hds', fatal=False))
self._sort_formats(formats) self._sort_formats(formats)
subtitles = {}
closed_caption_url = get_meta('closedCaption') or get_meta('closedCaptionHTML5')
if closed_caption_url:
subtitles['fr'] = [{
'url': closed_caption_url,
'ext': determine_ext(closed_caption_url, 'vtt'),
}]
return { return {
'id': video_id, 'id': video_id,
'title': get_meta('Title'), 'title': get_meta('Title'),
@ -135,6 +143,7 @@ class RadioCanadaIE(InfoExtractor):
'season_number': int_or_none('SrcSaison'), 'season_number': int_or_none('SrcSaison'),
'episode_number': int_or_none('SrcEpisode'), 'episode_number': int_or_none('SrcEpisode'),
'upload_date': unified_strdate(get_meta('Date')), 'upload_date': unified_strdate(get_meta('Date')),
'subtitles': subtitles,
'formats': formats, 'formats': formats,
} }

View File

@ -1,17 +1,24 @@
# coding: utf-8 # coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import re
import json
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_HTTPError
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
int_or_none, int_or_none,
parse_iso8601, parse_iso8601,
str_or_none, str_or_none,
urlencode_postdata,
clean_html,
) )
class ShahidIE(InfoExtractor): class ShahidIE(InfoExtractor):
_VALID_URL = r'https?://shahid\.mbc\.net/ar/episode/(?P<id>\d+)/?' _NETRC_MACHINE = 'shahid'
_VALID_URL = r'https?://shahid\.mbc\.net/ar/(?P<type>episode|movie)/(?P<id>\d+)'
_TESTS = [{ _TESTS = [{
'url': 'https://shahid.mbc.net/ar/episode/90574/%D8%A7%D9%84%D9%85%D9%84%D9%83-%D8%B9%D8%A8%D8%AF%D8%A7%D9%84%D9%84%D9%87-%D8%A7%D9%84%D8%A5%D9%86%D8%B3%D8%A7%D9%86-%D8%A7%D9%84%D9%85%D9%88%D8%B3%D9%85-1-%D9%83%D9%84%D9%8A%D8%A8-3.html', 'url': 'https://shahid.mbc.net/ar/episode/90574/%D8%A7%D9%84%D9%85%D9%84%D9%83-%D8%B9%D8%A8%D8%AF%D8%A7%D9%84%D9%84%D9%87-%D8%A7%D9%84%D8%A5%D9%86%D8%B3%D8%A7%D9%86-%D8%A7%D9%84%D9%85%D9%88%D8%B3%D9%85-1-%D9%83%D9%84%D9%8A%D8%A8-3.html',
'info_dict': { 'info_dict': {
@ -27,18 +34,54 @@ class ShahidIE(InfoExtractor):
# m3u8 download # m3u8 download
'skip_download': True, 'skip_download': True,
} }
}, {
'url': 'https://shahid.mbc.net/ar/movie/151746/%D8%A7%D9%84%D9%82%D9%86%D8%A7%D8%B5%D8%A9.html',
'only_matching': True
}, { }, {
# shahid plus subscriber only # shahid plus subscriber only
'url': 'https://shahid.mbc.net/ar/episode/90511/%D9%85%D8%B1%D8%A7%D9%8A%D8%A7-2011-%D8%A7%D9%84%D9%85%D9%88%D8%B3%D9%85-1-%D8%A7%D9%84%D8%AD%D9%84%D9%82%D8%A9-1.html', 'url': 'https://shahid.mbc.net/ar/episode/90511/%D9%85%D8%B1%D8%A7%D9%8A%D8%A7-2011-%D8%A7%D9%84%D9%85%D9%88%D8%B3%D9%85-1-%D8%A7%D9%84%D8%AD%D9%84%D9%82%D8%A9-1.html',
'only_matching': True 'only_matching': True
}] }]
def _call_api(self, path, video_id, note): def _real_initialize(self):
data = self._download_json( email, password = self._get_login_info()
'http://api.shahid.net/api/v1_1/' + path, video_id, note, query={ if email is None:
'apiKey': 'sh@hid0nlin3', return
'hash': 'b2wMCTHpSmyxGqQjJFOycRmLSex+BpTK/ooxy6vHaqs=',
}).get('data', {}) try:
user_data = self._download_json(
'https://shahid.mbc.net/wd/service/users/login',
None, 'Logging in', data=json.dumps({
'email': email,
'password': password,
'basic': 'false',
}).encode('utf-8'), headers={
'Content-Type': 'application/json; charset=UTF-8',
})['user']
except ExtractorError as e:
if isinstance(e.cause, compat_HTTPError):
fail_data = self._parse_json(
e.cause.read().decode('utf-8'), None, fatal=False)
if fail_data:
faults = fail_data.get('faults', [])
faults_message = ', '.join([clean_html(fault['userMessage']) for fault in faults if fault.get('userMessage')])
if faults_message:
raise ExtractorError(faults_message, expected=True)
raise
self._download_webpage(
'https://shahid.mbc.net/populateContext',
None, 'Populate Context', data=urlencode_postdata({
'firstName': user_data['firstName'],
'lastName': user_data['lastName'],
'userName': user_data['email'],
'csg_user_name': user_data['email'],
'subscriberId': user_data['id'],
'sessionId': user_data['sessionId'],
}))
def _get_api_data(self, response):
data = response.get('data', {})
error = data.get('error') error = data.get('error')
if error: if error:
@ -49,11 +92,11 @@ class ShahidIE(InfoExtractor):
return data return data
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) page_type, video_id = re.match(self._VALID_URL, url).groups()
player = self._call_api( player = self._get_api_data(self._download_json(
'Content/Episode/%s' % video_id, 'https://shahid.mbc.net/arContent/getPlayerContent-param-.id-%s.type-player.html' % video_id,
video_id, 'Downloading player JSON') video_id, 'Downloading player JSON'))
if player.get('drm'): if player.get('drm'):
raise ExtractorError('This video is DRM protected.', expected=True) raise ExtractorError('This video is DRM protected.', expected=True)
@ -61,9 +104,12 @@ class ShahidIE(InfoExtractor):
formats = self._extract_m3u8_formats(player['url'], video_id, 'mp4') formats = self._extract_m3u8_formats(player['url'], video_id, 'mp4')
self._sort_formats(formats) self._sort_formats(formats)
video = self._call_api( video = self._get_api_data(self._download_json(
'episode/%s' % video_id, video_id, 'http://api.shahid.net/api/v1_1/%s/%s' % (page_type, video_id),
'Downloading video JSON')['episode'] video_id, 'Downloading video JSON', query={
'apiKey': 'sh@hid0nlin3',
'hash': 'b2wMCTHpSmyxGqQjJFOycRmLSex+BpTK/ooxy6vHaqs=',
}))[page_type]
title = video['title'] title = video['title']
categories = [ categories = [

View File

@ -9,7 +9,6 @@ from ..utils import (
int_or_none, int_or_none,
sanitized_Request, sanitized_Request,
urlencode_postdata, urlencode_postdata,
parse_iso8601,
) )
@ -19,17 +18,13 @@ class TubiTvIE(InfoExtractor):
_NETRC_MACHINE = 'tubitv' _NETRC_MACHINE = 'tubitv'
_TEST = { _TEST = {
'url': 'http://tubitv.com/video/283829/the_comedian_at_the_friday', 'url': 'http://tubitv.com/video/283829/the_comedian_at_the_friday',
'md5': '43ac06be9326f41912dc64ccf7a80320',
'info_dict': { 'info_dict': {
'id': '283829', 'id': '283829',
'ext': 'mp4', 'ext': 'mp4',
'title': 'The Comedian at The Friday', 'title': 'The Comedian at The Friday',
'description': 'A stand up comedian is forced to look at the decisions in his life while on a one week trip to the west coast.', 'description': 'A stand up comedian is forced to look at the decisions in his life while on a one week trip to the west coast.',
'uploader': 'Indie Rights Films', 'uploader_id': 'bc168bee0d18dd1cb3b86c68706ab434',
'upload_date': '20160111',
'timestamp': 1452555979,
},
'params': {
'skip_download': 'HLS download',
}, },
} }
@ -58,19 +53,28 @@ class TubiTvIE(InfoExtractor):
video_id = self._match_id(url) video_id = self._match_id(url)
video_data = self._download_json( video_data = self._download_json(
'http://tubitv.com/oz/videos/%s/content' % video_id, video_id) 'http://tubitv.com/oz/videos/%s/content' % video_id, video_id)
title = video_data['n'] title = video_data['title']
formats = self._extract_m3u8_formats( formats = self._extract_m3u8_formats(
video_data['mh'], video_id, 'mp4', 'm3u8_native') self._proto_relative_url(video_data['url']),
video_id, 'mp4', 'm3u8_native')
self._sort_formats(formats) self._sort_formats(formats)
thumbnails = []
for thumbnail_url in video_data.get('thumbnails', []):
if not thumbnail_url:
continue
thumbnails.append({
'url': self._proto_relative_url(thumbnail_url),
})
subtitles = {} subtitles = {}
for sub in video_data.get('sb', []): for sub in video_data.get('subtitles', []):
sub_url = sub.get('u') sub_url = sub.get('url')
if not sub_url: if not sub_url:
continue continue
subtitles.setdefault(sub.get('l', 'en'), []).append({ subtitles.setdefault(sub.get('lang', 'English'), []).append({
'url': sub_url, 'url': self._proto_relative_url(sub_url),
}) })
return { return {
@ -78,9 +82,8 @@ class TubiTvIE(InfoExtractor):
'title': title, 'title': title,
'formats': formats, 'formats': formats,
'subtitles': subtitles, 'subtitles': subtitles,
'thumbnail': video_data.get('ph'), 'thumbnails': thumbnails,
'description': video_data.get('d'), 'description': video_data.get('description'),
'duration': int_or_none(video_data.get('s')), 'duration': int_or_none(video_data.get('duration')),
'timestamp': parse_iso8601(video_data.get('u')), 'uploader_id': video_data.get('publisher_id'),
'uploader': video_data.get('on'),
} }

View File

@ -139,6 +139,9 @@ class TVPEmbedIE(InfoExtractor):
# formats.extend(self._extract_mpd_formats( # formats.extend(self._extract_mpd_formats(
# video_url_base + '.ism/video.mpd', # video_url_base + '.ism/video.mpd',
# video_id, mpd_id='dash', fatal=False)) # video_id, mpd_id='dash', fatal=False))
formats.extend(self._extract_ism_formats(
video_url_base + '.ism/Manifest',
video_id, 'mss', fatal=False))
formats.extend(self._extract_f4m_formats( formats.extend(self._extract_f4m_formats(
video_url_base + '.ism/video.f4m', video_url_base + '.ism/video.f4m',
video_id, f4m_id='hds', fatal=False)) video_id, f4m_id='hds', fatal=False))

View File

@ -1,12 +1,93 @@
# coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import re import re
import time
import hashlib
import json
from .adobepass import AdobePassIE
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ExtractorError from ..compat import compat_HTTPError
from ..utils import (
int_or_none,
parse_age_limit,
str_or_none,
parse_duration,
ExtractorError,
extract_attributes,
)
class ViceIE(InfoExtractor): class ViceBaseIE(AdobePassIE):
def _extract_preplay_video(self, url, webpage):
watch_hub_data = extract_attributes(self._search_regex(
r'(?s)(<watch-hub\s*.+?</watch-hub>)', webpage, 'watch hub'))
video_id = watch_hub_data['vms-id']
title = watch_hub_data['video-title']
query = {}
is_locked = watch_hub_data.get('video-locked') == '1'
if is_locked:
resource = self._get_mvpd_resource(
'VICELAND', title, video_id,
watch_hub_data.get('video-rating'))
query['tvetoken'] = self._extract_mvpd_auth(url, video_id, 'VICELAND', resource)
# signature generation algorithm is reverse engineered from signatureGenerator in
# webpack:///../shared/~/vice-player/dist/js/vice-player.js in
# https://www.viceland.com/assets/common/js/web.vendor.bundle.js
exp = int(time.time()) + 14400
query.update({
'exp': exp,
'sign': hashlib.sha512(('%s:GET:%d' % (video_id, exp)).encode()).hexdigest(),
})
try:
host = 'www.viceland' if is_locked else self._PREPLAY_HOST
preplay = self._download_json('https://%s.com/en_us/preplay/%s' % (host, video_id), video_id, query=query)
except ExtractorError as e:
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 400:
error = json.loads(e.cause.read().decode())
raise ExtractorError('%s said: %s' % (self.IE_NAME, error['details']), expected=True)
raise
video_data = preplay['video']
base = video_data['base']
uplynk_preplay_url = preplay['preplayURL']
episode = video_data.get('episode', {})
channel = video_data.get('channel', {})
subtitles = {}
cc_url = preplay.get('ccURL')
if cc_url:
subtitles['en'] = [{
'url': cc_url,
}]
return {
'_type': 'url_transparent',
'url': uplynk_preplay_url,
'id': video_id,
'title': title,
'description': base.get('body'),
'thumbnail': watch_hub_data.get('cover-image') or watch_hub_data.get('thumbnail'),
'duration': parse_duration(video_data.get('video_duration') or watch_hub_data.get('video-duration')),
'timestamp': int_or_none(video_data.get('created_at')),
'age_limit': parse_age_limit(video_data.get('video_rating')),
'series': video_data.get('show_title') or watch_hub_data.get('show-title'),
'episode_number': int_or_none(episode.get('episode_number') or watch_hub_data.get('episode')),
'episode_id': str_or_none(episode.get('id') or video_data.get('episode_id')),
'season_number': int_or_none(watch_hub_data.get('season')),
'season_id': str_or_none(episode.get('season_id')),
'uploader': channel.get('base', {}).get('title') or watch_hub_data.get('channel-title'),
'uploader_id': str_or_none(channel.get('id')),
'subtitles': subtitles,
'ie_key': 'UplynkPreplay',
}
class ViceIE(ViceBaseIE):
_VALID_URL = r'https?://(?:.+?\.)?vice\.com/(?:[^/]+/)?videos?/(?P<id>[^/?#&]+)' _VALID_URL = r'https?://(?:.+?\.)?vice\.com/(?:[^/]+/)?videos?/(?P<id>[^/?#&]+)'
_TESTS = [{ _TESTS = [{
@ -21,7 +102,7 @@ class ViceIE(InfoExtractor):
'add_ie': ['Ooyala'], 'add_ie': ['Ooyala'],
}, { }, {
'url': 'http://www.vice.com/video/how-to-hack-a-car', 'url': 'http://www.vice.com/video/how-to-hack-a-car',
'md5': '6fb2989a3fed069fb8eab3401fc2d3c9', 'md5': 'a7ecf64ee4fa19b916c16f4b56184ae2',
'info_dict': { 'info_dict': {
'id': '3jstaBeXgAs', 'id': '3jstaBeXgAs',
'ext': 'mp4', 'ext': 'mp4',
@ -32,6 +113,22 @@ class ViceIE(InfoExtractor):
'upload_date': '20140529', 'upload_date': '20140529',
}, },
'add_ie': ['Youtube'], 'add_ie': ['Youtube'],
}, {
'url': 'https://video.vice.com/en_us/video/the-signal-from-tolva/5816510690b70e6c5fd39a56',
'md5': '',
'info_dict': {
'id': '5816510690b70e6c5fd39a56',
'ext': 'mp4',
'uploader': 'Waypoint',
'title': 'The Signal From Tölva',
'uploader_id': '57f7d621e05ca860fa9ccaf9',
'timestamp': 1477941983938,
},
'params': {
# m3u8 download
'skip_download': True,
},
'add_ie': ['UplynkPreplay'],
}, { }, {
'url': 'https://news.vice.com/video/experimenting-on-animals-inside-the-monkey-lab', 'url': 'https://news.vice.com/video/experimenting-on-animals-inside-the-monkey-lab',
'only_matching': True, 'only_matching': True,
@ -42,21 +139,21 @@ class ViceIE(InfoExtractor):
'url': 'https://munchies.vice.com/en/videos/watch-the-trailer-for-our-new-series-the-pizza-show', 'url': 'https://munchies.vice.com/en/videos/watch-the-trailer-for-our-new-series-the-pizza-show',
'only_matching': True, 'only_matching': True,
}] }]
_PREPLAY_HOST = 'video.vice'
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id) webpage, urlh = self._download_webpage_handle(url, video_id)
try: embed_code = self._search_regex(
embed_code = self._search_regex( r'embedCode=([^&\'"]+)', webpage,
r'embedCode=([^&\'"]+)', webpage, 'ooyala embed code', default=None)
'ooyala embed code', default=None) if embed_code:
if embed_code: return self.url_result('ooyala:%s' % embed_code, 'Ooyala')
return self.url_result('ooyala:%s' % embed_code, 'Ooyala') youtube_id = self._search_regex(
youtube_id = self._search_regex( r'data-youtube-id="([^"]+)"', webpage, 'youtube id', default=None)
r'data-youtube-id="([^"]+)"', webpage, 'youtube id') if youtube_id:
return self.url_result(youtube_id, 'Youtube') return self.url_result(youtube_id, 'Youtube')
except ExtractorError: return self._extract_preplay_video(urlh.geturl(), webpage)
raise ExtractorError('The page doesn\'t contain a video', expected=True)
class ViceShowIE(InfoExtractor): class ViceShowIE(InfoExtractor):

View File

@ -1,23 +1,10 @@
# coding: utf-8 # coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import time from .vice import ViceBaseIE
import hashlib
import json
from .adobepass import AdobePassIE
from ..compat import compat_HTTPError
from ..utils import (
int_or_none,
parse_age_limit,
str_or_none,
parse_duration,
ExtractorError,
extract_attributes,
)
class VicelandIE(AdobePassIE): class VicelandIE(ViceBaseIE):
_VALID_URL = r'https?://(?:www\.)?viceland\.com/[^/]+/video/[^/]+/(?P<id>[a-f0-9]+)' _VALID_URL = r'https?://(?:www\.)?viceland\.com/[^/]+/video/[^/]+/(?P<id>[a-f0-9]+)'
_TEST = { _TEST = {
'url': 'https://www.viceland.com/en_us/video/cyberwar-trailer/57608447973ee7705f6fbd4e', 'url': 'https://www.viceland.com/en_us/video/cyberwar-trailer/57608447973ee7705f6fbd4e',
@ -38,70 +25,9 @@ class VicelandIE(AdobePassIE):
}, },
'add_ie': ['UplynkPreplay'], 'add_ie': ['UplynkPreplay'],
} }
_PREPLAY_HOST = 'www.viceland'
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
watch_hub_data = extract_attributes(self._search_regex( return self._extract_preplay_video(url, webpage)
r'(?s)(<watch-hub\s*.+?</watch-hub>)', webpage, 'watch hub'))
video_id = watch_hub_data['vms-id']
title = watch_hub_data['video-title']
query = {}
if watch_hub_data.get('video-locked') == '1':
resource = self._get_mvpd_resource(
'VICELAND', title, video_id,
watch_hub_data.get('video-rating'))
query['tvetoken'] = self._extract_mvpd_auth(url, video_id, 'VICELAND', resource)
# signature generation algorithm is reverse engineered from signatureGenerator in
# webpack:///../shared/~/vice-player/dist/js/vice-player.js in
# https://www.viceland.com/assets/common/js/web.vendor.bundle.js
exp = int(time.time()) + 14400
query.update({
'exp': exp,
'sign': hashlib.sha512(('%s:GET:%d' % (video_id, exp)).encode()).hexdigest(),
})
try:
preplay = self._download_json('https://www.viceland.com/en_us/preplay/%s' % video_id, video_id, query=query)
except ExtractorError as e:
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 400:
error = json.loads(e.cause.read().decode())
raise ExtractorError('%s said: %s' % (self.IE_NAME, error['details']), expected=True)
raise
video_data = preplay['video']
base = video_data['base']
uplynk_preplay_url = preplay['preplayURL']
episode = video_data.get('episode', {})
channel = video_data.get('channel', {})
subtitles = {}
cc_url = preplay.get('ccURL')
if cc_url:
subtitles['en'] = [{
'url': cc_url,
}]
return {
'_type': 'url_transparent',
'url': uplynk_preplay_url,
'id': video_id,
'title': title,
'description': base.get('body'),
'thumbnail': watch_hub_data.get('cover-image') or watch_hub_data.get('thumbnail'),
'duration': parse_duration(video_data.get('video_duration') or watch_hub_data.get('video-duration')),
'timestamp': int_or_none(video_data.get('created_at')),
'age_limit': parse_age_limit(video_data.get('video_rating')),
'series': video_data.get('show_title') or watch_hub_data.get('show-title'),
'episode_number': int_or_none(episode.get('episode_number') or watch_hub_data.get('episode')),
'episode_id': str_or_none(episode.get('id') or video_data.get('episode_id')),
'season_number': int_or_none(watch_hub_data.get('season')),
'season_id': str_or_none(episode.get('season_id')),
'uploader': channel.get('base', {}).get('title') or watch_hub_data.get('channel-title'),
'uploader_id': str_or_none(channel.get('id')),
'subtitles': subtitles,
'ie_key': 'UplynkPreplay',
}

View File

@ -31,7 +31,8 @@ class VodlockerIE(InfoExtractor):
if any(p in webpage for p in ( if any(p in webpage for p in (
'>THIS FILE WAS DELETED<', '>THIS FILE WAS DELETED<',
'>File Not Found<', '>File Not Found<',
'The file you were looking for could not be found, sorry for any inconvenience.<')): 'The file you were looking for could not be found, sorry for any inconvenience.<',
'>The file was removed')):
raise ExtractorError('Video %s does not exist' % video_id, expected=True) raise ExtractorError('Video %s does not exist' % video_id, expected=True)
fields = self._hidden_inputs(webpage) fields = self._hidden_inputs(webpage)

View File

@ -0,0 +1,55 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
int_or_none,
float_or_none,
)
class VzaarIE(InfoExtractor):
_VALID_URL = r'https?://(?:(?:www|view)\.)?vzaar\.com/(?:videos/)?(?P<id>\d+)'
_TESTS = [{
'url': 'https://vzaar.com/videos/1152805',
'md5': 'bde5ddfeb104a6c56a93a06b04901dbf',
'info_dict': {
'id': '1152805',
'ext': 'mp4',
'title': 'sample video (public)',
},
}, {
'url': 'https://view.vzaar.com/27272/player',
'md5': '3b50012ac9bbce7f445550d54e0508f2',
'info_dict': {
'id': '27272',
'ext': 'mp3',
'title': 'MP3',
},
}]
def _real_extract(self, url):
video_id = self._match_id(url)
video_data = self._download_json(
'http://view.vzaar.com/v2/%s/video' % video_id, video_id)
source_url = video_data['sourceUrl']
info = {
'id': video_id,
'title': video_data['videoTitle'],
'url': source_url,
'thumbnail': self._proto_relative_url(video_data.get('poster')),
'duration': float_or_none(video_data.get('videoDuration')),
}
if 'audio' in source_url:
info.update({
'vcodec': 'none',
'ext': 'mp3',
})
else:
info.update({
'width': int_or_none(video_data.get('width')),
'height': int_or_none(video_data.get('height')),
'ext': 'mp4',
})
return info

View File

@ -1691,6 +1691,10 @@ def url_basename(url):
return path.strip('/').split('/')[-1] return path.strip('/').split('/')[-1]
def base_url(url):
return re.match(r'https?://[^?#&]+/', url).group()
class HEADRequest(compat_urllib_request.Request): class HEADRequest(compat_urllib_request.Request):
def get_method(self): def get_method(self):
return 'HEAD' return 'HEAD'
@ -2345,11 +2349,18 @@ def _match_one(filter_part, dct):
m = operator_rex.search(filter_part) m = operator_rex.search(filter_part)
if m: if m:
op = COMPARISON_OPERATORS[m.group('op')] op = COMPARISON_OPERATORS[m.group('op')]
if m.group('strval') is not None: actual_value = dct.get(m.group('key'))
if (m.group('strval') is not None or
# If the original field is a string and matching comparisonvalue is
# a number we should respect the origin of the original field
# and process comparison value as a string (see
# https://github.com/rg3/youtube-dl/issues/11082).
actual_value is not None and m.group('intval') is not None and
isinstance(actual_value, compat_str)):
if m.group('op') not in ('=', '!='): if m.group('op') not in ('=', '!='):
raise ValueError( raise ValueError(
'Operator %s does not support string values!' % m.group('op')) 'Operator %s does not support string values!' % m.group('op'))
comparison_value = m.group('strval') comparison_value = m.group('strval') or m.group('intval')
else: else:
try: try:
comparison_value = int(m.group('intval')) comparison_value = int(m.group('intval'))
@ -2361,7 +2372,6 @@ def _match_one(filter_part, dct):
raise ValueError( raise ValueError(
'Invalid integer value %r in filter part %r' % ( 'Invalid integer value %r in filter part %r' % (
m.group('intval'), filter_part)) m.group('intval'), filter_part))
actual_value = dct.get(m.group('key'))
if actual_value is None: if actual_value is None:
return m.group('none_inclusive') return m.group('none_inclusive')
return op(actual_value, comparison_value) return op(actual_value, comparison_value)

View File

@ -1,3 +1,3 @@
from __future__ import unicode_literals from __future__ import unicode_literals
__version__ = '2016.10.31' __version__ = '2016.11.02'