This commit is contained in:
Gilles Habran 2016-04-22 16:38:40 +02:00
commit f896739541
24 changed files with 444 additions and 360 deletions

View File

@ -413,6 +413,7 @@ class TestUtil(unittest.TestCase):
self.assertEqual(parse_duration('01:02:03:04'), 93784)
self.assertEqual(parse_duration('1 hour 3 minutes'), 3780)
self.assertEqual(parse_duration('87 Min.'), 5220)
self.assertEqual(parse_duration('PT1H0.040S'), 3600.04)
def test_fix_xml_ampersands(self):
self.assertEqual(

View File

@ -260,7 +260,9 @@ class YoutubeDL(object):
The following options determine which downloader is picked:
external_downloader: Executable of the external downloader to call.
None or unset for standard (built-in) downloader.
hls_prefer_native: Use the native HLS downloader instead of ffmpeg/avconv.
hls_prefer_native: Use the native HLS downloader instead of ffmpeg/avconv
if True, otherwise use ffmpeg/avconv if False, otherwise
use downloader suggested by extractor if None.
The following parameters are not used by YoutubeDL itself, they are used by
the downloader (see youtube_dl/downloader/common.py):

View File

@ -41,9 +41,12 @@ def get_suitable_downloader(info_dict, params={}):
if ed.can_download(info_dict):
return ed
if protocol == 'm3u8' and params.get('hls_prefer_native'):
if protocol == 'm3u8' and params.get('hls_prefer_native') is True:
return HlsFD
if protocol == 'm3u8_native' and params.get('hls_prefer_native') is False:
return FFmpegFD
return PROTOCOL_MAP.get(protocol, HttpFD)

View File

@ -382,7 +382,7 @@ class InfoExtractor(object):
else:
if query:
url_or_request = update_url_query(url_or_request, query)
if data or headers:
if data is not None or headers:
url_or_request = sanitized_Request(url_or_request, data, headers)
try:
return self._downloader.urlopen(url_or_request)

View File

@ -0,0 +1,114 @@
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
int_or_none,
parse_duration,
remove_end,
xpath_element,
xpath_text,
)
class DigitallySpeakingIE(InfoExtractor):
_VALID_URL = r'https?://(?:evt\.dispeak|events\.digitallyspeaking)\.com/(?:[^/]+/)+xml/(?P<id>[^.]+)\.xml'
_TESTS = [{
# From http://gdcvault.com/play/1023460/Tenacious-Design-and-The-Interface
'url': 'http://evt.dispeak.com/ubm/gdc/sf16/xml/840376_BQRC.xml',
'md5': 'a8efb6c31ed06ca8739294960b2dbabd',
'info_dict': {
'id': '840376_BQRC',
'ext': 'mp4',
'title': 'Tenacious Design and The Interface of \'Destiny\'',
},
}, {
# From http://www.gdcvault.com/play/1014631/Classic-Game-Postmortem-PAC
'url': 'http://events.digitallyspeaking.com/gdc/sf11/xml/12396_1299111843500GMPX.xml',
'only_matching': True,
}]
def _parse_mp4(self, metadata):
video_formats = []
video_root = None
mp4_video = xpath_text(metadata, './mp4video', default=None)
if mp4_video is not None:
mobj = re.match(r'(?P<root>https?://.*?/).*', mp4_video)
video_root = mobj.group('root')
if video_root is None:
http_host = xpath_text(metadata, 'httpHost', default=None)
if http_host:
video_root = 'http://%s/' % http_host
if video_root is None:
# Hard-coded in http://evt.dispeak.com/ubm/gdc/sf16/custom/player2.js
# Works for GPUTechConf, too
video_root = 'http://s3-2u.digitallyspeaking.com/'
formats = metadata.findall('./MBRVideos/MBRVideo')
if not formats:
return None
for a_format in formats:
stream_name = xpath_text(a_format, 'streamName', fatal=True)
video_path = re.match(r'mp4\:(?P<path>.*)', stream_name).group('path')
url = video_root + video_path
vbr = xpath_text(a_format, 'bitrate')
video_formats.append({
'url': url,
'vbr': int_or_none(vbr),
})
return video_formats
def _parse_flv(self, metadata):
formats = []
akamai_url = xpath_text(metadata, './akamaiHost', fatal=True)
audios = metadata.findall('./audios/audio')
for audio in audios:
formats.append({
'url': 'rtmp://%s/ondemand?ovpfv=1.1' % akamai_url,
'play_path': remove_end(audio.get('url'), '.flv'),
'ext': 'flv',
'vcodec': 'none',
'format_id': audio.get('code'),
})
slide_video_path = xpath_text(metadata, './slideVideo', fatal=True)
formats.append({
'url': 'rtmp://%s/ondemand?ovpfv=1.1' % akamai_url,
'play_path': remove_end(slide_video_path, '.flv'),
'ext': 'flv',
'format_note': 'slide deck video',
'quality': -2,
'preference': -2,
'format_id': 'slides',
})
speaker_video_path = xpath_text(metadata, './speakerVideo', fatal=True)
formats.append({
'url': 'rtmp://%s/ondemand?ovpfv=1.1' % akamai_url,
'play_path': remove_end(speaker_video_path, '.flv'),
'ext': 'flv',
'format_note': 'speaker video',
'quality': -1,
'preference': -1,
'format_id': 'speaker',
})
return formats
def _real_extract(self, url):
video_id = self._match_id(url)
xml_description = self._download_xml(url, video_id)
metadata = xpath_element(xml_description, 'metadata')
video_formats = self._parse_mp4(metadata)
if video_formats is None:
video_formats = self._parse_flv(metadata)
return {
'id': video_id,
'formats': video_formats,
'title': xpath_text(metadata, 'title', fatal=True),
'duration': parse_duration(xpath_text(metadata, 'endTime')),
'creator': xpath_text(metadata, 'speaker'),
}

View File

@ -1,39 +0,0 @@
# encoding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
class DumpIE(InfoExtractor):
_VALID_URL = r'^https?://(?:www\.)?dump\.com/(?P<id>[a-zA-Z0-9]+)/'
_TEST = {
'url': 'http://www.dump.com/oneus/',
'md5': 'ad71704d1e67dfd9e81e3e8b42d69d99',
'info_dict': {
'id': 'oneus',
'ext': 'flv',
'title': "He's one of us.",
'thumbnail': 're:^https?://.*\.jpg$',
},
}
def _real_extract(self, url):
m = re.match(self._VALID_URL, url)
video_id = m.group('id')
webpage = self._download_webpage(url, video_id)
video_url = self._search_regex(
r's1.addVariable\("file",\s*"([^"]+)"', webpage, 'video URL')
title = self._og_search_title(webpage)
thumbnail = self._og_search_thumbnail(webpage)
return {
'id': video_id,
'title': title,
'url': video_url,
'thumbnail': thumbnail,
}

View File

@ -8,6 +8,7 @@ from ..compat import compat_HTTPError
from ..utils import (
ExtractorError,
int_or_none,
url_basename,
)
@ -22,7 +23,7 @@ class EaglePlatformIE(InfoExtractor):
_TESTS = [{
# http://lenta.ru/news/2015/03/06/navalny/
'url': 'http://lentaru.media.eagleplatform.com/index/player?player=new&record_id=227304&player_template_id=5201',
'md5': '70f5187fb620f2c1d503b3b22fd4efe3',
'md5': '881ee8460e1b7735a8be938e2ffb362b',
'info_dict': {
'id': '227304',
'ext': 'mp4',
@ -37,7 +38,7 @@ class EaglePlatformIE(InfoExtractor):
# http://muz-tv.ru/play/7129/
# http://media.clipyou.ru/index/player?record_id=12820&width=730&height=415&autoplay=true
'url': 'eagleplatform:media.clipyou.ru:12820',
'md5': '90b26344ba442c8e44aa4cf8f301164a',
'md5': '358597369cf8ba56675c1df15e7af624',
'info_dict': {
'id': '12820',
'ext': 'mp4',
@ -90,17 +91,30 @@ class EaglePlatformIE(InfoExtractor):
secure_m3u8 = self._proto_relative_url(media['sources']['secure_m3u8']['auto'], 'http:')
formats = []
m3u8_url = self._get_video_url(secure_m3u8, video_id, 'Downloading m3u8 JSON')
formats = self._extract_m3u8_formats(
m3u8_formats = self._extract_m3u8_formats(
m3u8_url, video_id,
'mp4', entry_protocol='m3u8_native', m3u8_id='hls')
formats.extend(m3u8_formats)
mp4_url = self._get_video_url(
# Secure mp4 URL is constructed according to Player.prototype.mp4 from
# http://lentaru.media.eagleplatform.com/player/player.js
re.sub(r'm3u8|hlsvod|hls|f4m', 'mp4', secure_m3u8),
video_id, 'Downloading mp4 JSON')
formats.append({'url': mp4_url, 'format_id': 'mp4'})
mp4_url_basename = url_basename(mp4_url)
for m3u8_format in m3u8_formats:
mobj = re.search('/([^/]+)/index\.m3u8', m3u8_format['url'])
if mobj:
http_format = m3u8_format.copy()
http_format.update({
'url': mp4_url.replace(mp4_url_basename, mobj.group(1)),
'format_id': m3u8_format['format_id'].replace('hls', 'http'),
'protocol': 'http',
})
formats.append(http_format)
self._sort_formats(formats)

View File

@ -193,10 +193,10 @@ from .drbonanza import DRBonanzaIE
from .drtuber import DrTuberIE
from .drtv import DRTVIE
from .dvtv import DVTVIE
from .dump import DumpIE
from .dumpert import DumpertIE
from .defense import DefenseGouvFrIE
from .discovery import DiscoveryIE
from .dispeak import DigitallySpeakingIE
from .dropbox import DropboxIE
from .dw import (
DWIE,
@ -406,6 +406,7 @@ from .mdr import MDRIE
from .metacafe import MetacafeIE
from .metacritic import MetacriticIE
from .mgoon import MgoonIE
from .mgtv import MGTVIE
from .minhateca import MinhatecaIE
from .ministrygrid import MinistryGridIE
from .minoto import MinotoIE
@ -559,12 +560,12 @@ from .pandoratv import PandoraTVIE
from .parliamentliveuk import ParliamentLiveUKIE
from .patreon import PatreonIE
from .pbs import PBSIE
from .people import PeopleIE
from .periscope import PeriscopeIE
from .philharmoniedeparis import PhilharmonieDeParisIE
from .phoenix import PhoenixIE
from .photobucket import PhotobucketIE
from .pinkbike import PinkbikeIE
from .planetaplay import PlanetaPlayIE
from .pladform import PladformIE
from .played import PlayedIE
from .playfm import PlayFMIE
@ -600,7 +601,6 @@ from .qqmusic import (
QQMusicToplistIE,
QQMusicPlaylistIE,
)
from .quickvid import QuickVidIE
from .r7 import R7IE
from .radiode import RadioDeIE
from .radiojavan import RadioJavanIE

View File

@ -24,15 +24,12 @@ class GazetaIE(InfoExtractor):
'only_matching': True,
}, {
'url': 'http://www.gazeta.ru/video/main/main/2015/06/22/platit_ili_ne_platit_po_isku_yukosa.shtml',
'md5': '37f19f78355eb2f4256ee1688359f24c',
'info_dict': {
'id': '252048',
'ext': 'mp4',
'title': '"Если по иску ЮКОСа придется платить, это будет большой удар по бюджету"',
},
'params': {
# m3u8 download
'skip_download': True,
},
'add_ie': ['EaglePlatform'],
}]

View File

@ -4,7 +4,6 @@ import re
from .common import InfoExtractor
from ..utils import (
remove_end,
HEADRequest,
sanitized_Request,
urlencode_postdata,
@ -51,63 +50,33 @@ class GDCVaultIE(InfoExtractor):
{
'url': 'http://gdcvault.com/play/1020791/',
'only_matching': True,
}
},
{
# Hard-coded hostname
'url': 'http://gdcvault.com/play/1023460/Tenacious-Design-and-The-Interface',
'md5': 'a8efb6c31ed06ca8739294960b2dbabd',
'info_dict': {
'id': '1023460',
'ext': 'mp4',
'display_id': 'Tenacious-Design-and-The-Interface',
'title': 'Tenacious Design and The Interface of \'Destiny\'',
},
},
{
# Multiple audios
'url': 'http://www.gdcvault.com/play/1014631/Classic-Game-Postmortem-PAC',
'info_dict': {
'id': '1014631',
'ext': 'flv',
'title': 'How to Create a Good Game - From My Experience of Designing Pac-Man',
},
'params': {
'skip_download': True, # Requires rtmpdump
'format': 'jp', # The japanese audio
}
},
]
def _parse_mp4(self, xml_description):
video_formats = []
mp4_video = xml_description.find('./metadata/mp4video')
if mp4_video is None:
return None
mobj = re.match(r'(?P<root>https?://.*?/).*', mp4_video.text)
video_root = mobj.group('root')
formats = xml_description.findall('./metadata/MBRVideos/MBRVideo')
for format in formats:
mobj = re.match(r'mp4\:(?P<path>.*)', format.find('streamName').text)
url = video_root + mobj.group('path')
vbr = format.find('bitrate').text
video_formats.append({
'url': url,
'vbr': int(vbr),
})
return video_formats
def _parse_flv(self, xml_description):
formats = []
akamai_url = xml_description.find('./metadata/akamaiHost').text
audios = xml_description.find('./metadata/audios')
if audios is not None:
for audio in audios:
formats.append({
'url': 'rtmp://%s/ondemand?ovpfv=1.1' % akamai_url,
'play_path': remove_end(audio.get('url'), '.flv'),
'ext': 'flv',
'vcodec': 'none',
'format_id': audio.get('code'),
})
slide_video_path = xml_description.find('./metadata/slideVideo').text
formats.append({
'url': 'rtmp://%s/ondemand?ovpfv=1.1' % akamai_url,
'play_path': remove_end(slide_video_path, '.flv'),
'ext': 'flv',
'format_note': 'slide deck video',
'quality': -2,
'preference': -2,
'format_id': 'slides',
})
speaker_video_path = xml_description.find('./metadata/speakerVideo').text
formats.append({
'url': 'rtmp://%s/ondemand?ovpfv=1.1' % akamai_url,
'play_path': remove_end(speaker_video_path, '.flv'),
'ext': 'flv',
'format_note': 'speaker video',
'quality': -1,
'preference': -1,
'format_id': 'speaker',
})
return formats
def _login(self, webpage_url, display_id):
(username, password) = self._get_login_info()
if username is None or password is None:
@ -183,17 +152,10 @@ class GDCVaultIE(InfoExtractor):
r'<iframe src=".*?\?xmlURL=xml/(?P<xml_file>.+?\.xml).*?".*?</iframe>',
start_page, 'xml filename')
xml_description = self._download_xml(
'%s/xml/%s' % (xml_root, xml_name), display_id)
video_title = xml_description.find('./metadata/title').text
video_formats = self._parse_mp4(xml_description)
if video_formats is None:
video_formats = self._parse_flv(xml_description)
return {
'_type': 'url_transparent',
'id': video_id,
'display_id': display_id,
'title': video_title,
'formats': video_formats,
'url': '%s/xml/%s' % (xml_root, xml_name),
'ie_key': 'DigitallySpeaking',
}

View File

@ -2,12 +2,6 @@
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
xpath_element,
xpath_text,
int_or_none,
parse_duration,
)
class GPUTechConfIE(InfoExtractor):
@ -27,29 +21,15 @@ class GPUTechConfIE(InfoExtractor):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
root_path = self._search_regex(r'var\s+rootPath\s*=\s*"([^"]+)', webpage, 'root path', 'http://evt.dispeak.com/nvidia/events/gtc15/')
xml_file_id = self._search_regex(r'var\s+xmlFileId\s*=\s*"([^"]+)', webpage, 'xml file id')
doc = self._download_xml('%sxml/%s.xml' % (root_path, xml_file_id), video_id)
metadata = xpath_element(doc, 'metadata')
http_host = xpath_text(metadata, 'httpHost', 'http host', True)
mbr_videos = xpath_element(metadata, 'MBRVideos')
formats = []
for mbr_video in mbr_videos.findall('MBRVideo'):
stream_name = xpath_text(mbr_video, 'streamName')
if stream_name:
formats.append({
'url': 'http://%s/%s' % (http_host, stream_name.replace('mp4:', '')),
'tbr': int_or_none(xpath_text(mbr_video, 'bitrate')),
})
self._sort_formats(formats)
root_path = self._search_regex(
r'var\s+rootPath\s*=\s*"([^"]+)', webpage, 'root path',
default='http://evt.dispeak.com/nvidia/events/gtc15/')
xml_file_id = self._search_regex(
r'var\s+xmlFileId\s*=\s*"([^"]+)', webpage, 'xml file id')
return {
'_type': 'url_transparent',
'id': video_id,
'title': xpath_text(metadata, 'title'),
'duration': parse_duration(xpath_text(metadata, 'endTime')),
'creator': xpath_text(metadata, 'speaker'),
'formats': formats,
'url': '%sxml/%s.xml' % (root_path, xml_file_id),
'ie_key': 'DigitallySpeaking',
}

View File

@ -0,0 +1,63 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import int_or_none
class MGTVIE(InfoExtractor):
_VALID_URL = r'https?://www\.mgtv\.com/v/(?:[^/]+/)*(?P<id>\d+)\.html'
IE_DESC = '芒果TV'
_TEST = {
'url': 'http://www.mgtv.com/v/1/290525/f/3116640.html',
'md5': '',
'info_dict': {
'id': '3116640',
'ext': 'mp4',
'title': '我是歌手第四季双年巅峰会:韩红李玟“双王”领军对抗',
'description': '我是歌手第四季双年巅峰会',
'duration': 7461,
'thumbnail': 're:^https?://.*\.jpg$',
},
'params': {
'skip_download': True, # m3u8 download
},
}
_FORMAT_MAP = {
'标清': ('Standard', 0),
'高清': ('High', 1),
'超清': ('SuperHigh', 2),
}
def _real_extract(self, url):
video_id = self._match_id(url)
api_data = self._download_json(
'http://v.api.mgtv.com/player/video', video_id,
query={'video_id': video_id})['data']
info = api_data['info']
formats = []
for idx, stream in enumerate(api_data['stream']):
format_name = stream.get('name')
format_id, preference = self._FORMAT_MAP.get(format_name, (None, None))
format_info = self._download_json(
stream['url'], video_id,
note='Download video info for format %s' % format_id or '#%d' % idx)
formats.append({
'format_id': format_id,
'url': format_info['info'],
'ext': 'mp4', # These are m3u8 playlists
'preference': preference,
})
self._sort_formats(formats)
return {
'id': video_id,
'title': info['title'].strip(),
'formats': formats,
'description': info.get('desc'),
'duration': int_or_none(info.get('duration')),
'thumbnail': info.get('thumb'),
}

View File

@ -4,24 +4,24 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import ExtractorError
from ..utils import (
ExtractorError,
int_or_none,
)
class NewstubeIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?newstube\.ru/media/(?P<id>.+)'
_TEST = {
'url': 'http://www.newstube.ru/media/telekanal-cnn-peremestil-gorod-slavyansk-v-krym',
'md5': '801eef0c2a9f4089fa04e4fe3533abdc',
'info_dict': {
'id': '728e0ef2-e187-4012-bac0-5a081fdcb1f6',
'ext': 'flv',
'ext': 'mp4',
'title': 'Телеканал CNN переместил город Славянск в Крым',
'description': 'md5:419a8c9f03442bc0b0a794d689360335',
'duration': 31.05,
},
'params': {
# rtmp download
'skip_download': True,
},
}
def _real_extract(self, url):
@ -62,7 +62,6 @@ class NewstubeIE(InfoExtractor):
server = media_location.find(ns('./Server')).text
app = media_location.find(ns('./App')).text
media_id = stream_info.find(ns('./Id')).text
quality_id = stream_info.find(ns('./QualityId')).text
name = stream_info.find(ns('./Name')).text
width = int(stream_info.find(ns('./Width')).text)
height = int(stream_info.find(ns('./Height')).text)
@ -74,12 +73,38 @@ class NewstubeIE(InfoExtractor):
'rtmp_conn': ['S:%s' % session_id, 'S:%s' % media_id, 'S:n2'],
'page_url': url,
'ext': 'flv',
'format_id': quality_id,
'format_note': name,
'format_id': 'rtmp' + ('-%s' % name if name else ''),
'width': width,
'height': height,
})
sources_data = self._download_json(
'http://www.newstube.ru/player2/getsources?guid=%s' % video_guid,
video_guid, fatal=False)
if sources_data:
for source in sources_data.get('Sources', []):
source_url = source.get('Src')
if not source_url:
continue
height = int_or_none(source.get('Height'))
f = {
'format_id': 'http' + ('-%dp' % height if height else ''),
'url': source_url,
'width': int_or_none(source.get('Width')),
'height': height,
}
source_type = source.get('Type')
if source_type:
mobj = re.search(r'codecs="([^,]+),\s*([^"]+)"', source_type)
if mobj:
vcodec, acodec = mobj.groups()
f.update({
'vcodec': vcodec,
'acodec': acodec,
})
formats.append(f)
self._check_formats(formats, video_guid)
self._sort_formats(formats)
return {

View File

@ -0,0 +1,32 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
class PeopleIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?people\.com/people/videos/0,,(?P<id>\d+),00\.html'
_TEST = {
'url': 'http://www.people.com/people/videos/0,,20995451,00.html',
'info_dict': {
'id': 'ref:20995451',
'ext': 'mp4',
'title': 'Astronaut Love Triangle Victim Speaks Out: “The Crime in 2007 Hasnt Defined Us”',
'description': 'Colleen Shipman speaks to PEOPLE for the first time about life after the attack',
'thumbnail': 're:^https?://.*\.jpg',
'duration': 246.318,
'timestamp': 1458720585,
'upload_date': '20160323',
'uploader_id': '416418724',
},
'params': {
'skip_download': True,
},
'add_ie': ['BrightcoveNew'],
}
def _real_extract(self, url):
return self.url_result(
'http://players.brightcove.net/416418724/default_default/index.html?videoId=ref:%s'
% self._match_id(url), 'BrightcoveNew')

View File

@ -1,61 +0,0 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import ExtractorError
class PlanetaPlayIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?planetaplay\.com/\?sng=(?P<id>[0-9]+)'
_API_URL = 'http://planetaplay.com/action/playlist/?sng={0:}'
_THUMBNAIL_URL = 'http://planetaplay.com/img/thumb/{thumb:}'
_TEST = {
'url': 'http://planetaplay.com/?sng=3586',
'md5': '9d569dceb7251a4e01355d5aea60f9db',
'info_dict': {
'id': '3586',
'ext': 'flv',
'title': 'md5:e829428ee28b1deed00de90de49d1da1',
},
'skip': 'Not accessible from Travis CI server',
}
_SONG_FORMATS = {
'lq': (0, 'http://www.planetaplay.com/videoplayback/{med_hash:}'),
'hq': (1, 'http://www.planetaplay.com/videoplayback/hi/{med_hash:}'),
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
response = self._download_json(
self._API_URL.format(video_id), video_id)['response']
try:
data = response.get('data')[0]
except IndexError:
raise ExtractorError(
'%s: failed to get the playlist' % self.IE_NAME, expected=True)
title = '{song_artists:} - {sng_name:}'.format(**data)
thumbnail = self._THUMBNAIL_URL.format(**data)
formats = []
for format_id, (quality, url_template) in self._SONG_FORMATS.items():
formats.append({
'format_id': format_id,
'url': url_template.format(**data),
'quality': quality,
'ext': 'flv',
})
self._sort_formats(formats)
return {
'id': video_id,
'title': title,
'formats': formats,
'thumbnail': thumbnail,
}

View File

@ -1,54 +0,0 @@
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..compat import (
compat_urlparse,
)
from ..utils import (
determine_ext,
int_or_none,
)
class QuickVidIE(InfoExtractor):
_VALID_URL = r'https?://(www\.)?quickvid\.org/watch\.php\?v=(?P<id>[a-zA-Z_0-9-]+)'
_TEST = {
'url': 'http://quickvid.org/watch.php?v=sUQT3RCG8dx',
'md5': 'c0c72dd473f260c06c808a05d19acdc5',
'info_dict': {
'id': 'sUQT3RCG8dx',
'ext': 'mp4',
'title': 'Nick Offerman\'s Summer Reading Recap',
'thumbnail': 're:^https?://.*\.(?:png|jpg|gif)$',
'view_count': int,
},
'skip': 'Not accessible from Travis CI server',
}
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
title = self._html_search_regex(r'<h2>(.*?)</h2>', webpage, 'title')
view_count = int_or_none(self._html_search_regex(
r'(?s)<div id="views">(.*?)</div>',
webpage, 'view count', fatal=False))
video_code = self._search_regex(
r'(?s)<video id="video"[^>]*>(.*?)</video>', webpage, 'video code')
formats = [
{
'url': compat_urlparse.urljoin(url, src),
'format_id': determine_ext(src, None),
} for src in re.findall('<source\s+src="([^"]+)"', video_code)
]
self._sort_formats(formats)
return {
'id': video_id,
'title': title,
'formats': formats,
'thumbnail': self._og_search_thumbnail(webpage),
'view_count': view_count,
}

View File

@ -4,12 +4,18 @@ from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
int_or_none,
unescapeHTML,
ExtractorError,
)
class RTBFIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?rtbf\.be/(?:video/[^?]+\?.*\bid=|ouftivi/(?:[^/]+/)*[^?]+\?.*\bvideoId=)(?P<id>\d+)'
_VALID_URL = r'''(?x)
https?://(?:www\.)?rtbf\.be/
(?:
video/[^?]+\?.*\bid=|
ouftivi/(?:[^/]+/)*[^?]+\?.*\bvideoId=|
auvio/[^/]+\?.*id=
)(?P<id>\d+)'''
_TESTS = [{
'url': 'https://www.rtbf.be/video/detail_les-diables-au-coeur-episode-2?id=1921274',
'md5': '799f334ddf2c0a582ba80c44655be570',
@ -17,7 +23,11 @@ class RTBFIE(InfoExtractor):
'id': '1921274',
'ext': 'mp4',
'title': 'Les Diables au coeur (épisode 2)',
'description': 'Football - Diables Rouges',
'duration': 3099,
'upload_date': '20140425',
'timestamp': 1398456336,
'uploader': 'rtbfsport',
}
}, {
# geo restricted
@ -26,45 +36,63 @@ class RTBFIE(InfoExtractor):
}, {
'url': 'http://www.rtbf.be/ouftivi/niouzz?videoId=2055858',
'only_matching': True,
}, {
'url': 'http://www.rtbf.be/auvio/detail_jeudi-en-prime-siegfried-bracke?id=2102996',
'only_matching': True,
}]
_IMAGE_HOST = 'http://ds1.ds.static.rtbf.be'
_PROVIDERS = {
'YOUTUBE': 'Youtube',
'DAILYMOTION': 'Dailymotion',
'VIMEO': 'Vimeo',
}
_QUALITIES = [
('mobile', 'mobile'),
('web', 'SD'),
('url', 'MD'),
('mobile', 'SD'),
('web', 'MD'),
('high', 'HD'),
]
def _real_extract(self, url):
video_id = self._match_id(url)
data = self._download_json(
'http://www.rtbf.be/api/media/video?method=getVideoDetail&args[]=%s' % video_id, video_id)
webpage = self._download_webpage(
'http://www.rtbf.be/video/embed?id=%s' % video_id, video_id)
error = data.get('error')
if error:
raise ExtractorError('%s said: %s' % (self.IE_NAME, error), expected=True)
data = self._parse_json(
unescapeHTML(self._search_regex(
r'data-media="([^"]+)"', webpage, 'data video')),
video_id)
data = data['data']
provider = data.get('provider')
if provider in self._PROVIDERS:
return self.url_result(data['url'], self._PROVIDERS[provider])
if data.get('provider').lower() == 'youtube':
video_url = data.get('downloadUrl') or data.get('url')
return self.url_result(video_url, 'Youtube')
formats = []
for key, format_id in self._QUALITIES:
format_url = data['sources'].get(key)
format_url = data.get(key + 'Url')
if format_url:
formats.append({
'format_id': format_id,
'url': format_url,
})
thumbnails = []
for thumbnail_id, thumbnail_url in data.get('thumbnail', {}).items():
if thumbnail_id != 'default':
thumbnails.append({
'url': self._IMAGE_HOST + thumbnail_url,
'id': thumbnail_id,
})
return {
'id': video_id,
'formats': formats,
'title': data['title'],
'description': data.get('description') or data.get('subtitle'),
'thumbnail': data.get('thumbnail'),
'thumbnails': thumbnails,
'duration': data.get('duration') or data.get('realDuration'),
'timestamp': int_or_none(data.get('created')),
'view_count': int_or_none(data.get('viewCount')),
'uploader': data.get('channel'),
'tags': data.get('tags'),
}

View File

@ -14,7 +14,6 @@ class StreetVoiceIE(InfoExtractor):
'info_dict': {
'id': '94440',
'ext': 'mp3',
'filesize': 4167053,
'title': '',
'description': 'Crispy脆樂團 - 輸',
'thumbnail': 're:^https?://.*\.jpg$',
@ -32,20 +31,19 @@ class StreetVoiceIE(InfoExtractor):
song_id = self._match_id(url)
song = self._download_json(
'http://streetvoice.com/music/api/song/%s' % song_id, song_id)
'https://streetvoice.com/api/v1/public/song/%s/' % song_id, song_id, data=b'')
title = song['name']
author = song['musician']['name']
author = song['user']['nickname']
return {
'id': song_id,
'url': song['file'],
'filesize': song.get('size'),
'title': title,
'description': '%s - %s' % (author, title),
'thumbnail': self._proto_relative_url(song.get('image'), 'http:'),
'duration': song.get('length'),
'upload_date': unified_strdate(song.get('created_at')),
'uploader': author,
'uploader_id': compat_str(song['musician']['id']),
'uploader_id': compat_str(song['user']['id']),
}

View File

@ -1,7 +1,6 @@
# coding: utf-8
from __future__ import unicode_literals
import codecs
import re
from .common import InfoExtractor
@ -10,22 +9,24 @@ from ..utils import (
int_or_none,
sanitized_Request,
urlencode_postdata,
parse_iso8601,
)
class TubiTvIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?tubitv\.com/video\?id=(?P<id>[0-9]+)'
_VALID_URL = r'https?://(?:www\.)?tubitv\.com/video/(?P<id>[0-9]+)'
_LOGIN_URL = 'http://tubitv.com/login'
_NETRC_MACHINE = 'tubitv'
_TEST = {
'url': 'http://tubitv.com/video?id=54411&title=The_Kitchen_Musical_-_EP01',
'url': 'http://tubitv.com/video/283829/the_comedian_at_the_friday',
'info_dict': {
'id': '54411',
'id': '283829',
'ext': 'mp4',
'title': 'The Kitchen Musical - EP01',
'thumbnail': 're:^https?://.*\.png$',
'description': 'md5:37532716166069b353e8866e71fefae7',
'duration': 2407,
'title': 'The Comedian at The Friday',
'description': 'A stand up comedian is forced to look at the decisions in his life while on a one week trip to the west coast.',
'uploader': 'Indie Rights Films',
'upload_date': '20160111',
'timestamp': 1452555979,
},
'params': {
'skip_download': 'HLS download',
@ -55,27 +56,31 @@ class TubiTvIE(InfoExtractor):
def _real_extract(self, url):
video_id = self._match_id(url)
video_data = self._download_json(
'http://tubitv.com/oz/videos/%s/content' % video_id, video_id)
title = video_data['n']
webpage = self._download_webpage(url, video_id)
if re.search(r"<(?:DIV|div) class='login-required-screen'>", webpage):
self.raise_login_required('This video requires login')
title = self._og_search_title(webpage)
description = self._og_search_description(webpage)
thumbnail = self._og_search_thumbnail(webpage)
duration = int_or_none(self._html_search_meta(
'video:duration', webpage, 'duration'))
apu = self._search_regex(r"apu='([^']+)'", webpage, 'apu')
m3u8_url = codecs.decode(apu, 'rot_13')[::-1]
formats = self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4')
formats = self._extract_m3u8_formats(
video_data['mh'], video_id, 'mp4', 'm3u8_native')
self._sort_formats(formats)
subtitles = {}
for sub in video_data.get('sb', []):
sub_url = sub.get('u')
if not sub_url:
continue
subtitles.setdefault(sub.get('l', 'en'), []).append({
'url': sub_url,
})
return {
'id': video_id,
'title': title,
'formats': formats,
'thumbnail': thumbnail,
'description': description,
'duration': duration,
'subtitles': subtitles,
'thumbnail': video_data.get('ph'),
'description': video_data.get('d'),
'duration': int_or_none(video_data.get('s')),
'timestamp': parse_iso8601(video_data.get('u')),
'uploader': video_data.get('on'),
}

View File

@ -58,7 +58,9 @@ class TvigleIE(InfoExtractor):
if not video_id:
webpage = self._download_webpage(url, display_id)
video_id = self._html_search_regex(
r'class="video-preview current_playing" id="(\d+)">',
(r'<div[^>]+class=["\']player["\'][^>]+id=["\'](\d+)',
r'var\s+cloudId\s*=\s*["\'](\d+)',
r'class="video-preview current_playing" id="(\d+)"'),
webpage, 'video id')
video_data = self._download_json(
@ -81,10 +83,10 @@ class TvigleIE(InfoExtractor):
formats = []
for vcodec, fmts in item['videos'].items():
if vcodec == 'hls':
continue
for format_id, video_url in fmts.items():
if format_id == 'm3u8':
formats.extend(self._extract_m3u8_formats(
video_url, video_id, 'mp4', m3u8_id=vcodec))
continue
height = self._search_regex(
r'^(\d+)[pP]$', format_id, 'height', default=None)

View File

@ -282,10 +282,10 @@ class VimeoIE(VimeoBaseInfoExtractor):
pass_url = url + '/check-password'
password_request = sanitized_Request(pass_url, data)
password_request.add_header('Content-Type', 'application/x-www-form-urlencoded')
password_request.add_header('Referer', url)
return self._download_json(
password_request, video_id,
'Verifying the password',
'Wrong password')
'Verifying the password', 'Wrong password')
def _real_initialize(self):
self._login()

View File

@ -125,6 +125,12 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
if login_results is False:
return False
error_msg = self._html_search_regex(
r'<[^>]+id="errormsg_0_Passwd"[^>]*>([^<]+)<',
login_results, 'error message', default=None)
if error_msg:
raise ExtractorError('Unable to login: %s' % error_msg, expected=True)
if re.search(r'id="errormsg_0_Passwd"', login_results) is not None:
raise ExtractorError('Please use your account password and a two-factor code instead of an application-specific password.', expected=True)

View File

@ -425,8 +425,12 @@ def parseOpts(overrideArguments=None):
help='Set file xattribute ytdl.filesize with expected filesize (experimental)')
downloader.add_option(
'--hls-prefer-native',
dest='hls_prefer_native', action='store_true',
help='Use the native HLS downloader instead of ffmpeg (experimental)')
dest='hls_prefer_native', action='store_true', default=None,
help='Use the native HLS downloader instead of ffmpeg')
downloader.add_option(
'--hls-prefer-ffmpeg',
dest='hls_prefer_native', action='store_false', default=None,
help='Use ffmpeg instead of the native HLS downloader')
downloader.add_option(
'--hls-use-mpegts',
dest='hls_use_mpegts', action='store_true',

View File

@ -1540,44 +1540,46 @@ def parse_duration(s):
s = s.strip()
m = re.match(
r'''(?ix)(?:P?T)?
(?:
(?P<only_mins>[0-9.]+)\s*(?:mins?\.?|minutes?)\s*|
(?P<only_hours>[0-9.]+)\s*(?:hours?)|
\s*(?P<hours_reversed>[0-9]+)\s*(?:[:h]|hours?)\s*(?P<mins_reversed>[0-9]+)\s*(?:[:m]|mins?\.?|minutes?)\s*|
(?:
days, hours, mins, secs, ms = [None] * 5
m = re.match(r'(?:(?:(?:(?P<days>[0-9]+):)?(?P<hours>[0-9]+):)?(?P<mins>[0-9]+):)?(?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?$', s)
if m:
days, hours, mins, secs, ms = m.groups()
else:
m = re.match(
r'''(?ix)(?:P?T)?
(?:
(?:(?P<days>[0-9]+)\s*(?:[:d]|days?)\s*)?
(?P<hours>[0-9]+)\s*(?:[:h]|hours?)\s*
(?P<days>[0-9]+)\s*d(?:ays?)?\s*
)?
(?P<mins>[0-9]+)\s*(?:[:m]|mins?|minutes?)\s*
)?
(?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*(?:s|secs?|seconds?)?
)$''', s)
if not m:
return None
res = 0
if m.group('only_mins'):
return float_or_none(m.group('only_mins'), invscale=60)
if m.group('only_hours'):
return float_or_none(m.group('only_hours'), invscale=60 * 60)
if m.group('secs'):
res += int(m.group('secs'))
if m.group('mins_reversed'):
res += int(m.group('mins_reversed')) * 60
if m.group('mins'):
res += int(m.group('mins')) * 60
if m.group('hours'):
res += int(m.group('hours')) * 60 * 60
if m.group('hours_reversed'):
res += int(m.group('hours_reversed')) * 60 * 60
if m.group('days'):
res += int(m.group('days')) * 24 * 60 * 60
if m.group('ms'):
res += float(m.group('ms'))
return res
(?:
(?P<hours>[0-9]+)\s*h(?:ours?)?\s*
)?
(?:
(?P<mins>[0-9]+)\s*m(?:in(?:ute)?s?)?\s*
)?
(?:
(?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*s(?:ec(?:ond)?s?)?\s*
)?$''', s)
if m:
days, hours, mins, secs, ms = m.groups()
else:
m = re.match(r'(?i)(?:(?P<hours>[0-9.]+)\s*(?:hours?)|(?P<mins>[0-9.]+)\s*(?:mins?\.?|minutes?)\s*)$', s)
if m:
hours, mins = m.groups()
else:
return None
duration = 0
if secs:
duration += float(secs)
if mins:
duration += float(mins) * 60
if hours:
duration += float(hours) * 60 * 60
if days:
duration += float(days) * 24 * 60 * 60
if ms:
duration += float(ms)
return duration
def prepend_extension(filename, ext, expected_real_ext=None):