This commit is contained in:
Gilles Habran 2016-04-18 14:05:18 +02:00
commit d742440e82
31 changed files with 297 additions and 398 deletions

View File

@ -44,7 +44,7 @@ class TestYoutubeLists(unittest.TestCase):
ie = YoutubePlaylistIE(dl) ie = YoutubePlaylistIE(dl)
result = ie.extract('https://www.youtube.com/watch?v=W01L70IGBgE&index=2&list=RDOQpdSVF_k_w') result = ie.extract('https://www.youtube.com/watch?v=W01L70IGBgE&index=2&list=RDOQpdSVF_k_w')
entries = result['entries'] entries = result['entries']
self.assertTrue(len(entries) >= 20) self.assertTrue(len(entries) >= 50)
original_video = entries[0] original_video = entries[0]
self.assertEqual(original_video['id'], 'OQpdSVF_k_w') self.assertEqual(original_video['id'], 'OQpdSVF_k_w')

View File

@ -225,7 +225,7 @@ class FFmpegFD(ExternalFD):
args += ['-i', url, '-c', 'copy'] args += ['-i', url, '-c', 'copy']
if protocol == 'm3u8': if protocol == 'm3u8':
if self.params.get('hls_use_mpegts', False): if self.params.get('hls_use_mpegts', False) or tmpfilename == '-':
args += ['-f', 'mpegts'] args += ['-f', 'mpegts']
else: else:
args += ['-f', 'mp4', '-bsf:a', 'aac_adtstoasc'] args += ['-f', 'mp4', '-bsf:a', 'aac_adtstoasc']

View File

@ -30,14 +30,14 @@ class AudiomackIE(InfoExtractor):
# audiomack wrapper around soundcloud song # audiomack wrapper around soundcloud song
{ {
'add_ie': ['Soundcloud'], 'add_ie': ['Soundcloud'],
'url': 'http://www.audiomack.com/song/xclusiveszone/take-kare', 'url': 'http://www.audiomack.com/song/hip-hop-daily/black-mamba-freestyle',
'info_dict': { 'info_dict': {
'id': '172419696', 'id': '258901379',
'ext': 'mp3', 'ext': 'mp3',
'description': 'md5:1fc3272ed7a635cce5be1568c2822997', 'description': 'mamba day freestyle for the legend Kobe Bryant ',
'title': 'Young Thug ft Lil Wayne - Take Kare', 'title': 'Black Mamba Freestyle [Prod. By Danny Wolf]',
'uploader': 'Young Thug World', 'uploader': 'ILOVEMAKONNEN',
'upload_date': '20141016', 'upload_date': '20160414',
} }
}, },
] ]

View File

@ -671,6 +671,7 @@ class BBCIE(BBCCoUkIE):
'info_dict': { 'info_dict': {
'id': '34475836', 'id': '34475836',
'title': 'Jurgen Klopp: Furious football from a witty and winning coach', 'title': 'Jurgen Klopp: Furious football from a witty and winning coach',
'description': 'Fast-paced football, wit, wisdom and a ready smile - why Liverpool fans should come to love new boss Jurgen Klopp.',
}, },
'playlist_count': 3, 'playlist_count': 3,
}, { }, {

View File

@ -5,7 +5,6 @@ from ..utils import (
xpath_text, xpath_text,
xpath_element, xpath_element,
int_or_none, int_or_none,
ExtractorError,
find_xpath_attr, find_xpath_attr,
) )
@ -64,7 +63,7 @@ class CBSIE(CBSBaseIE):
'url': 'http://www.colbertlateshow.com/podcasts/dYSwjqPs_X1tvbV_P2FcPWRa_qT6akTC/in-the-bad-room-with-stephen/', 'url': 'http://www.colbertlateshow.com/podcasts/dYSwjqPs_X1tvbV_P2FcPWRa_qT6akTC/in-the-bad-room-with-stephen/',
'only_matching': True, 'only_matching': True,
}] }]
TP_RELEASE_URL_TEMPLATE = 'http://link.theplatform.com/s/dJ5BDC/%s?manifest=m3u&mbr=true' TP_RELEASE_URL_TEMPLATE = 'http://link.theplatform.com/s/dJ5BDC/%s?mbr=true'
def _real_extract(self, url): def _real_extract(self, url):
display_id = self._match_id(url) display_id = self._match_id(url)
@ -84,11 +83,11 @@ class CBSIE(CBSBaseIE):
pid = xpath_text(item, 'pid') pid = xpath_text(item, 'pid')
if not pid: if not pid:
continue continue
try: tp_release_url = self.TP_RELEASE_URL_TEMPLATE % pid
tp_formats, tp_subtitles = self._extract_theplatform_smil( if '.m3u8' in xpath_text(item, 'contentUrl', default=''):
self.TP_RELEASE_URL_TEMPLATE % pid, content_id, 'Downloading %s SMIL data' % pid) tp_release_url += '&manifest=m3u'
except ExtractorError: tp_formats, tp_subtitles = self._extract_theplatform_smil(
continue tp_release_url, content_id, 'Downloading %s SMIL data' % pid)
formats.extend(tp_formats) formats.extend(tp_formats)
subtitles = self._merge_subtitles(subtitles, tp_subtitles) subtitles = self._merge_subtitles(subtitles, tp_subtitles)
self._sort_formats(formats) self._sort_formats(formats)

View File

@ -4,6 +4,7 @@ from __future__ import unicode_literals
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_HTTPError
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
int_or_none, int_or_none,
@ -55,8 +56,13 @@ class EaglePlatformIE(InfoExtractor):
raise ExtractorError(' '.join(response['errors']), expected=True) raise ExtractorError(' '.join(response['errors']), expected=True)
def _download_json(self, url_or_request, video_id, note='Downloading JSON metadata'): def _download_json(self, url_or_request, video_id, note='Downloading JSON metadata'):
response = super(EaglePlatformIE, self)._download_json(url_or_request, video_id, note) try:
self._handle_error(response) response = super(EaglePlatformIE, self)._download_json(url_or_request, video_id, note)
except ExtractorError as ee:
if isinstance(ee.cause, compat_HTTPError):
response = self._parse_json(ee.cause.read().decode('utf-8'), video_id)
self._handle_error(response)
raise
return response return response
def _get_video_url(self, url_or_request, video_id, note='Downloading JSON metadata'): def _get_video_url(self, url_or_request, video_id, note='Downloading JSON metadata'):

View File

@ -425,7 +425,6 @@ from .moevideo import MoeVideoIE
from .mofosex import MofosexIE from .mofosex import MofosexIE
from .mojvideo import MojvideoIE from .mojvideo import MojvideoIE
from .moniker import MonikerIE from .moniker import MonikerIE
from .mooshare import MooshareIE
from .morningstar import MorningstarIE from .morningstar import MorningstarIE
from .motherless import MotherlessIE from .motherless import MotherlessIE
from .motorsport import MotorsportIE from .motorsport import MotorsportIE
@ -470,7 +469,6 @@ from .ndr import (
from .ndtv import NDTVIE from .ndtv import NDTVIE
from .netzkino import NetzkinoIE from .netzkino import NetzkinoIE
from .nerdcubed import NerdCubedFeedIE from .nerdcubed import NerdCubedFeedIE
from .nerdist import NerdistIE
from .neteasemusic import ( from .neteasemusic import (
NetEaseMusicIE, NetEaseMusicIE,
NetEaseMusicAlbumIE, NetEaseMusicAlbumIE,
@ -753,7 +751,6 @@ from .teletask import TeleTaskIE
from .testurl import TestURLIE from .testurl import TestURLIE
from .tf1 import TF1IE from .tf1 import TF1IE
from .theintercept import TheInterceptIE from .theintercept import TheInterceptIE
from .theonion import TheOnionIE
from .theplatform import ( from .theplatform import (
ThePlatformIE, ThePlatformIE,
ThePlatformFeedIE, ThePlatformFeedIE,

View File

@ -7,7 +7,7 @@ from .common import InfoExtractor
class GazetaIE(InfoExtractor): class GazetaIE(InfoExtractor):
_VALID_URL = r'(?P<url>https?://(?:www\.)?gazeta\.ru/(?:[^/]+/)?video/(?:(?:main|\d{4}/\d{2}/\d{2})/)?(?P<id>[A-Za-z0-9-_.]+)\.s?html)' _VALID_URL = r'(?P<url>https?://(?:www\.)?gazeta\.ru/(?:[^/]+/)?video/(?:main/)*(?:\d{4}/\d{2}/\d{2}/)?(?P<id>[A-Za-z0-9-_.]+)\.s?html)'
_TESTS = [{ _TESTS = [{
'url': 'http://www.gazeta.ru/video/main/zadaite_vopros_vladislavu_yurevichu.shtml', 'url': 'http://www.gazeta.ru/video/main/zadaite_vopros_vladislavu_yurevichu.shtml',
'md5': 'd49c9bdc6e5a7888f27475dc215ee789', 'md5': 'd49c9bdc6e5a7888f27475dc215ee789',
@ -18,9 +18,22 @@ class GazetaIE(InfoExtractor):
'description': 'md5:38617526050bd17b234728e7f9620a71', 'description': 'md5:38617526050bd17b234728e7f9620a71',
'thumbnail': 're:^https?://.*\.jpg', 'thumbnail': 're:^https?://.*\.jpg',
}, },
'skip': 'video not found',
}, { }, {
'url': 'http://www.gazeta.ru/lifestyle/video/2015/03/08/master-klass_krasivoi_byt._delaem_vesennii_makiyazh.shtml', 'url': 'http://www.gazeta.ru/lifestyle/video/2015/03/08/master-klass_krasivoi_byt._delaem_vesennii_makiyazh.shtml',
'only_matching': True, 'only_matching': True,
}, {
'url': 'http://www.gazeta.ru/video/main/main/2015/06/22/platit_ili_ne_platit_po_isku_yukosa.shtml',
'info_dict': {
'id': '252048',
'ext': 'mp4',
'title': '"Если по иску ЮКОСа придется платить, это будет большой удар по бюджету"',
},
'params': {
# m3u8 download
'skip_download': True,
},
'add_ie': ['EaglePlatform'],
}] }]
def _real_extract(self, url): def _real_extract(self, url):

View File

@ -105,7 +105,8 @@ class GenericIE(InfoExtractor):
'skip_download': True, # infinite live stream 'skip_download': True, # infinite live stream
}, },
'expected_warnings': [ 'expected_warnings': [
r'501.*Not Implemented' r'501.*Not Implemented',
r'400.*Bad Request',
], ],
}, },
# Direct link with incorrect MIME type # Direct link with incorrect MIME type
@ -1955,7 +1956,8 @@ class GenericIE(InfoExtractor):
# Look for Instagram embeds # Look for Instagram embeds
instagram_embed_url = InstagramIE._extract_embed_url(webpage) instagram_embed_url = InstagramIE._extract_embed_url(webpage)
if instagram_embed_url is not None: if instagram_embed_url is not None:
return self.url_result(instagram_embed_url, InstagramIE.ie_key()) return self.url_result(
self._proto_relative_url(instagram_embed_url), InstagramIE.ie_key())
# Look for LiveLeak embeds # Look for LiveLeak embeds
liveleak_url = LiveLeakIE._extract_url(webpage) liveleak_url = LiveLeakIE._extract_url(webpage)

View File

@ -69,7 +69,7 @@ class HuffPostIE(InfoExtractor):
formats.extend(self._extract_m3u8_formats( formats.extend(self._extract_m3u8_formats(
url, video_id, ext='mp4', m3u8_id='hls', fatal=False)) url, video_id, ext='mp4', m3u8_id='hls', fatal=False))
elif ext == 'f4m': elif ext == 'f4m':
formats.extend(self._extract_f4m_formatsa( formats.extend(self._extract_f4m_formats(
url + '?hdcore=2.9.5', video_id, f4m_id='hds', fatal=False)) url + '?hdcore=2.9.5', video_id, f4m_id='hds', fatal=False))
else: else:
formats.append({ formats.append({

View File

@ -12,7 +12,7 @@ from ..utils import (
class InstagramIE(InfoExtractor): class InstagramIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?instagram\.com/p/(?P<id>[^/?#&]+)' _VALID_URL = r'(?P<url>https?://(?:www\.)?instagram\.com/p/(?P<id>[^/?#&]+))'
_TESTS = [{ _TESTS = [{
'url': 'https://instagram.com/p/aye83DjauH/?foo=bar#abc', 'url': 'https://instagram.com/p/aye83DjauH/?foo=bar#abc',
'md5': '0d2da106a9d2631273e192b372806516', 'md5': '0d2da106a9d2631273e192b372806516',
@ -38,10 +38,19 @@ class InstagramIE(InfoExtractor):
}, { }, {
'url': 'https://instagram.com/p/-Cmh1cukG2/', 'url': 'https://instagram.com/p/-Cmh1cukG2/',
'only_matching': True, 'only_matching': True,
}, {
'url': 'http://instagram.com/p/9o6LshA7zy/embed/',
'only_matching': True,
}] }]
@staticmethod @staticmethod
def _extract_embed_url(webpage): def _extract_embed_url(webpage):
mobj = re.search(
r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?instagram\.com/p/[^/]+/embed.*?)\1',
webpage)
if mobj:
return mobj.group('url')
blockquote_el = get_element_by_attribute( blockquote_el = get_element_by_attribute(
'class', 'instagram-media', webpage) 'class', 'instagram-media', webpage)
if blockquote_el is None: if blockquote_el is None:
@ -53,7 +62,9 @@ class InstagramIE(InfoExtractor):
return mobj.group('link') return mobj.group('link')
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
url = mobj.group('url')
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
uploader_id = self._search_regex(r'"owner":{"username":"(.+?)"', uploader_id = self._search_regex(r'"owner":{"username":"(.+?)"',

View File

@ -165,7 +165,7 @@ class IqiyiIE(InfoExtractor):
IE_NAME = 'iqiyi' IE_NAME = 'iqiyi'
IE_DESC = '爱奇艺' IE_DESC = '爱奇艺'
_VALID_URL = r'https?://(?:[^.]+\.)?iqiyi\.com/.+\.html' _VALID_URL = r'https?://(?:(?:[^.]+\.)?iqiyi\.com|www\.pps\.tv)/.+\.html'
_NETRC_MACHINE = 'iqiyi' _NETRC_MACHINE = 'iqiyi'
@ -273,6 +273,9 @@ class IqiyiIE(InfoExtractor):
'title': '灌篮高手 国语版', 'title': '灌篮高手 国语版',
}, },
'playlist_count': 101, 'playlist_count': 101,
}, {
'url': 'http://www.pps.tv/w_19rrbav0ph.html',
'only_matching': True,
}] }]
_FORMATS_MAP = [ _FORMATS_MAP = [

View File

@ -2,39 +2,63 @@
from __future__ import unicode_literals from __future__ import unicode_literals
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_urllib_parse_unquote_plus
from ..utils import (
js_to_json,
)
class KaraoketvIE(InfoExtractor): class KaraoketvIE(InfoExtractor):
_VALID_URL = r'https?://karaoketv\.co\.il/\?container=songs&id=(?P<id>[0-9]+)' _VALID_URL = r'http://www.karaoketv.co.il/[^/]+/(?P<id>\d+)'
_TEST = { _TEST = {
'url': 'http://karaoketv.co.il/?container=songs&id=171568', 'url': 'http://www.karaoketv.co.il/%D7%A9%D7%99%D7%A8%D7%99_%D7%A7%D7%A8%D7%99%D7%95%D7%A7%D7%99/58356/%D7%90%D7%99%D7%96%D7%95%D7%9F',
'info_dict': { 'info_dict': {
'id': '171568', 'id': '58356',
'ext': 'mp4', 'ext': 'flv',
'title': 'אל העולם שלך - רותם כהן - שרים קריוקי', 'title': 'קריוקי של איזון',
},
'params': {
# rtmp download
'skip_download': True,
} }
} }
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
api_page_url = self._search_regex(
r'<iframe[^>]+src=(["\'])(?P<url>https?://www\.karaoke\.co\.il/api_play\.php\?.+?)\1',
webpage, 'API play URL', group='url')
page_video_url = self._og_search_video_url(webpage, video_id) api_page = self._download_webpage(api_page_url, video_id)
config_json = compat_urllib_parse_unquote_plus(self._search_regex( video_cdn_url = self._search_regex(
r'config=(.*)', page_video_url, 'configuration')) r'<iframe[^>]+src=(["\'])(?P<url>https?://www\.video-cdn\.com/embed/iframe/.+?)\1',
api_page, 'video cdn URL', group='url')
urls_info_json = self._download_json( video_cdn = self._download_webpage(video_cdn_url, video_id)
config_json, video_id, 'Downloading configuration', play_path = self._parse_json(
transform_source=js_to_json) self._search_regex(
r'var\s+options\s*=\s*({.+?});', video_cdn, 'options'),
video_id)['clip']['url']
url = urls_info_json['playlist'][0]['url'] settings = self._parse_json(
self._search_regex(
r'var\s+settings\s*=\s*({.+?});', video_cdn, 'servers', default='{}'),
video_id, fatal=False) or {}
servers = settings.get('servers')
if not servers or not isinstance(servers, list):
servers = ('wowzail.video-cdn.com:80/vodcdn', )
formats = [{
'url': 'rtmp://%s' % server if not server.startswith('rtmp') else server,
'play_path': play_path,
'app': 'vodcdn',
'page_url': video_cdn_url,
'player_url': 'http://www.video-cdn.com/assets/flowplayer/flowplayer.commercial-3.2.18.swf',
'rtmp_real_time': True,
'ext': 'flv',
} for server in servers]
return { return {
'id': video_id, 'id': video_id,
'title': self._og_search_title(webpage), 'title': self._og_search_title(webpage),
'url': url, 'formats': formats,
} }

View File

@ -268,7 +268,7 @@ class KuwoCategoryIE(InfoExtractor):
'title': '八十年代精选', 'title': '八十年代精选',
'description': '这些都是属于八十年代的回忆!', 'description': '这些都是属于八十年代的回忆!',
}, },
'playlist_count': 30, 'playlist_count': 24,
} }
def _real_extract(self, url): def _real_extract(self, url):

View File

@ -49,8 +49,8 @@ class MDRIE(InfoExtractor):
'ext': 'mp4', 'ext': 'mp4',
'title': 'Beutolomäus und der geheime Weihnachtswunsch', 'title': 'Beutolomäus und der geheime Weihnachtswunsch',
'description': 'md5:b69d32d7b2c55cbe86945ab309d39bbd', 'description': 'md5:b69d32d7b2c55cbe86945ab309d39bbd',
'timestamp': 1419047100, 'timestamp': 1450950000,
'upload_date': '20141220', 'upload_date': '20151224',
'duration': 4628, 'duration': 4628,
'uploader': 'KIKA', 'uploader': 'KIKA',
}, },
@ -71,8 +71,8 @@ class MDRIE(InfoExtractor):
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
data_url = self._search_regex( data_url = self._search_regex(
r'(?:dataURL|playerXml(?:["\'])?)\s*:\s*(["\'])(?P<url>\\?/.+/(?:video|audio)-?[0-9]+-avCustom\.xml)\1', r'(?:dataURL|playerXml(?:["\'])?)\s*:\s*(["\'])(?P<url>.+/(?:video|audio)-?[0-9]+-avCustom\.xml)\1',
webpage, 'data url', default=None, group='url').replace('\/', '/') webpage, 'data url', group='url').replace('\/', '/')
doc = self._download_xml( doc = self._download_xml(
compat_urlparse.urljoin(url, data_url), video_id) compat_urlparse.urljoin(url, data_url), video_id)

View File

@ -1,110 +0,0 @@
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
ExtractorError,
sanitized_Request,
urlencode_postdata,
)
class MooshareIE(InfoExtractor):
IE_NAME = 'mooshare'
IE_DESC = 'Mooshare.biz'
_VALID_URL = r'https?://(?:www\.)?mooshare\.biz/(?P<id>[\da-z]{12})'
_TESTS = [
{
'url': 'http://mooshare.biz/8dqtk4bjbp8g',
'md5': '4e14f9562928aecd2e42c6f341c8feba',
'info_dict': {
'id': '8dqtk4bjbp8g',
'ext': 'mp4',
'title': 'Comedy Football 2011 - (part 1-2)',
'duration': 893,
},
},
{
'url': 'http://mooshare.biz/aipjtoc4g95j',
'info_dict': {
'id': 'aipjtoc4g95j',
'ext': 'mp4',
'title': 'Orange Caramel Dashing Through the Snow',
'duration': 212,
},
'params': {
# rtmp download
'skip_download': True,
}
}
]
def _real_extract(self, url):
video_id = self._match_id(url)
page = self._download_webpage(url, video_id, 'Downloading page')
if re.search(r'>Video Not Found or Deleted<', page) is not None:
raise ExtractorError('Video %s does not exist' % video_id, expected=True)
hash_key = self._html_search_regex(r'<input type="hidden" name="hash" value="([^"]+)">', page, 'hash')
title = self._html_search_regex(r'(?m)<div class="blockTitle">\s*<h2>Watch ([^<]+)</h2>', page, 'title')
download_form = {
'op': 'download1',
'id': video_id,
'hash': hash_key,
}
request = sanitized_Request(
'http://mooshare.biz/%s' % video_id, urlencode_postdata(download_form))
request.add_header('Content-Type', 'application/x-www-form-urlencoded')
self._sleep(5, video_id)
video_page = self._download_webpage(request, video_id, 'Downloading video page')
thumbnail = self._html_search_regex(r'image:\s*"([^"]+)",', video_page, 'thumbnail', fatal=False)
duration_str = self._html_search_regex(r'duration:\s*"(\d+)",', video_page, 'duration', fatal=False)
duration = int(duration_str) if duration_str is not None else None
formats = []
# SD video
mobj = re.search(r'(?m)file:\s*"(?P<url>[^"]+)",\s*provider:', video_page)
if mobj is not None:
formats.append({
'url': mobj.group('url'),
'format_id': 'sd',
'format': 'SD',
})
# HD video
mobj = re.search(r'\'hd-2\': { file: \'(?P<url>[^\']+)\' },', video_page)
if mobj is not None:
formats.append({
'url': mobj.group('url'),
'format_id': 'hd',
'format': 'HD',
})
# rtmp video
mobj = re.search(r'(?m)file: "(?P<playpath>[^"]+)",\s*streamer: "(?P<rtmpurl>rtmp://[^"]+)",', video_page)
if mobj is not None:
formats.append({
'url': mobj.group('rtmpurl'),
'play_path': mobj.group('playpath'),
'rtmp_live': False,
'ext': 'mp4',
'format_id': 'rtmp',
'format': 'HD',
})
return {
'id': video_id,
'title': title,
'thumbnail': thumbnail,
'duration': duration,
'formats': formats,
}

View File

@ -1,17 +1,21 @@
# encoding: utf-8 # encoding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import int_or_none from ..compat import compat_urlparse
from ..utils import (
int_or_none,
js_to_json,
mimetype2ext,
)
class MusicPlayOnIE(InfoExtractor): class MusicPlayOnIE(InfoExtractor):
_VALID_URL = r'https?://(?:.+?\.)?musicplayon\.com/play(?:-touch)?\?(?:v|pl=100&play)=(?P<id>\d+)' _VALID_URL = r'https?://(?:.+?\.)?musicplayon\.com/play(?:-touch)?\?(?:v|pl=\d+&play)=(?P<id>\d+)'
_TEST = { _TESTS = [{
'url': 'http://en.musicplayon.com/play?v=433377', 'url': 'http://en.musicplayon.com/play?v=433377',
'md5': '00cdcdea1726abdf500d1e7fd6dd59bb',
'info_dict': { 'info_dict': {
'id': '433377', 'id': '433377',
'ext': 'mp4', 'ext': 'mp4',
@ -20,15 +24,16 @@ class MusicPlayOnIE(InfoExtractor):
'duration': 342, 'duration': 342,
'uploader': 'ultrafish', 'uploader': 'ultrafish',
}, },
'params': { }, {
# m3u8 download 'url': 'http://en.musicplayon.com/play?pl=102&play=442629',
'skip_download': True, 'only_matching': True,
}, }]
}
_URL_TEMPLATE = 'http://en.musicplayon.com/play?v=%s'
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) video_id = self._match_id(url)
video_id = mobj.group('id') url = self._URL_TEMPLATE % video_id
page = self._download_webpage(url, video_id) page = self._download_webpage(url, video_id)
@ -40,28 +45,14 @@ class MusicPlayOnIE(InfoExtractor):
uploader = self._html_search_regex( uploader = self._html_search_regex(
r'<div>by&nbsp;<a href="[^"]+" class="purple">([^<]+)</a></div>', page, 'uploader', fatal=False) r'<div>by&nbsp;<a href="[^"]+" class="purple">([^<]+)</a></div>', page, 'uploader', fatal=False)
formats = [ sources = self._parse_json(
{ self._search_regex(r'setup\[\'_sources\'\]\s*=\s*([^;]+);', page, 'video sources'),
'url': 'http://media0-eu-nl.musicplayon.com/stream-mobile?id=%s&type=.mp4' % video_id, video_id, transform_source=js_to_json)
'ext': 'mp4', formats = [{
} 'url': compat_urlparse.urljoin(url, source['src']),
] 'ext': mimetype2ext(source.get('type')),
'format_note': source.get('data-res'),
manifest = self._download_webpage( } for source in sources]
'http://en.musicplayon.com/manifest.m3u8?v=%s' % video_id, video_id, 'Downloading manifest')
for entry in manifest.split('#')[1:]:
if entry.startswith('EXT-X-STREAM-INF:'):
meta, url, _ = entry.split('\n')
params = dict(param.split('=') for param in meta.split(',')[1:])
formats.append({
'url': url,
'ext': 'mp4',
'tbr': int(params['BANDWIDTH']),
'width': int(params['RESOLUTION'].split('x')[1]),
'height': int(params['RESOLUTION'].split('x')[-1]),
'format_note': params['NAME'].replace('"', '').strip(),
})
return { return {
'id': video_id, 'id': video_id,

View File

@ -1,80 +0,0 @@
# encoding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
determine_ext,
parse_iso8601,
xpath_text,
)
class NerdistIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?nerdist\.com/vepisode/(?P<id>[^/?#]+)'
_TEST = {
'url': 'http://www.nerdist.com/vepisode/exclusive-which-dc-characters-w',
'md5': '3698ed582931b90d9e81e02e26e89f23',
'info_dict': {
'display_id': 'exclusive-which-dc-characters-w',
'id': 'RPHpvJyr',
'ext': 'mp4',
'title': 'Your TEEN TITANS Revealed! Who\'s on the show?',
'thumbnail': 're:^https?://.*/thumbs/.*\.jpg$',
'description': 'Exclusive: Find out which DC Comics superheroes will star in TEEN TITANS Live-Action TV Show on Nerdist News with Jessica Chobot!',
'uploader': 'Eric Diaz',
'upload_date': '20150202',
'timestamp': 1422892808,
}
}
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
video_id = self._search_regex(
r'''(?x)<script\s+(?:type="text/javascript"\s+)?
src="https?://content\.nerdist\.com/players/([a-zA-Z0-9_]+)-''',
webpage, 'video ID')
timestamp = parse_iso8601(self._html_search_meta(
'shareaholic:article_published_time', webpage, 'upload date'))
uploader = self._html_search_meta(
'shareaholic:article_author_name', webpage, 'article author')
doc = self._download_xml(
'http://content.nerdist.com/jw6/%s.xml' % video_id, video_id)
video_info = doc.find('.//item')
title = xpath_text(video_info, './title', fatal=True)
description = xpath_text(video_info, './description')
thumbnail = xpath_text(
video_info, './{http://rss.jwpcdn.com/}image', 'thumbnail')
formats = []
for source in video_info.findall('./{http://rss.jwpcdn.com/}source'):
vurl = source.attrib['file']
ext = determine_ext(vurl)
if ext == 'm3u8':
formats.extend(self._extract_m3u8_formats(
vurl, video_id, entry_protocol='m3u8_native', ext='mp4',
preference=0))
elif ext == 'smil':
formats.extend(self._extract_smil_formats(
vurl, video_id, fatal=False
))
else:
formats.append({
'format_id': ext,
'url': vurl,
})
self._sort_formats(formats)
return {
'id': video_id,
'display_id': display_id,
'title': title,
'description': description,
'thumbnail': thumbnail,
'timestamp': timestamp,
'formats': formats,
'uploader': uploader,
}

View File

@ -7,8 +7,8 @@ from .common import InfoExtractor
class NewgroundsIE(InfoExtractor): class NewgroundsIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?newgrounds\.com/audio/listen/(?P<id>[0-9]+)' _VALID_URL = r'https?://(?:www\.)?newgrounds\.com/(?:audio/listen|portal/view)/(?P<id>[0-9]+)'
_TEST = { _TESTS = [{
'url': 'http://www.newgrounds.com/audio/listen/549479', 'url': 'http://www.newgrounds.com/audio/listen/549479',
'md5': 'fe6033d297591288fa1c1f780386f07a', 'md5': 'fe6033d297591288fa1c1f780386f07a',
'info_dict': { 'info_dict': {
@ -17,7 +17,16 @@ class NewgroundsIE(InfoExtractor):
'title': 'B7 - BusMode', 'title': 'B7 - BusMode',
'uploader': 'Burn7', 'uploader': 'Burn7',
} }
} }, {
'url': 'http://www.newgrounds.com/portal/view/673111',
'md5': '3394735822aab2478c31b1004fe5e5bc',
'info_dict': {
'id': '673111',
'ext': 'mp4',
'title': 'Dancin',
'uploader': 'Squirrelman82',
},
}]
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
@ -25,9 +34,11 @@ class NewgroundsIE(InfoExtractor):
webpage = self._download_webpage(url, music_id) webpage = self._download_webpage(url, music_id)
title = self._html_search_regex( title = self._html_search_regex(
r',"name":"([^"]+)",', webpage, 'music title') r'<title>([^>]+)</title>', webpage, 'title')
uploader = self._html_search_regex( uploader = self._html_search_regex(
r',"artist":"([^"]+)",', webpage, 'music uploader') [r',"artist":"([^"]+)",', r'[\'"]owner[\'"]\s*:\s*[\'"]([^\'"]+)[\'"],'],
webpage, 'uploader')
music_url_json_string = self._html_search_regex( music_url_json_string = self._html_search_regex(
r'({"url":"[^"]+"),', webpage, 'music url') + '}' r'({"url":"[^"]+"),', webpage, 'music url') + '}'

View File

@ -4,7 +4,10 @@ from __future__ import unicode_literals
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import determine_ext from ..utils import (
determine_ext,
int_or_none,
)
class OnionStudiosIE(InfoExtractor): class OnionStudiosIE(InfoExtractor):
@ -17,7 +20,7 @@ class OnionStudiosIE(InfoExtractor):
'id': '2937', 'id': '2937',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Hannibal charges forward, stops for a cocktail', 'title': 'Hannibal charges forward, stops for a cocktail',
'description': 'md5:545299bda6abf87e5ec666548c6a9448', 'description': 'md5:e786add7f280b7f0fe237b64cc73df76',
'thumbnail': 're:^https?://.*\.jpg$', 'thumbnail': 're:^https?://.*\.jpg$',
'uploader': 'The A.V. Club', 'uploader': 'The A.V. Club',
'uploader_id': 'TheAVClub', 'uploader_id': 'TheAVClub',
@ -42,9 +45,19 @@ class OnionStudiosIE(InfoExtractor):
formats = [] formats = []
for src in re.findall(r'<source[^>]+src="([^"]+)"', webpage): for src in re.findall(r'<source[^>]+src="([^"]+)"', webpage):
if determine_ext(src) != 'm3u8': # m3u8 always results in 403 ext = determine_ext(src)
if ext == 'm3u8':
formats.extend(self._extract_m3u8_formats(
src, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False))
else:
height = int_or_none(self._search_regex(
r'/(\d+)\.%s' % ext, src, 'height', default=None))
formats.append({ formats.append({
'format_id': ext + ('-%sp' % height if height else ''),
'url': src, 'url': src,
'height': height,
'ext': ext,
'preference': 1,
}) })
self._sort_formats(formats) self._sort_formats(formats)

View File

@ -40,7 +40,7 @@ class Puls4IE(InfoExtractor):
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
error_message = self._html_search_regex( error_message = self._html_search_regex(
r'<div class="message-error">(.+?)</div>', r'<div[^>]+class="message-error"[^>]*>(.+?)</div>',
webpage, 'error message', default=None) webpage, 'error message', default=None)
if error_message: if error_message:
raise ExtractorError( raise ExtractorError(

View File

@ -6,6 +6,7 @@ import re
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_urlparse from ..compat import compat_urlparse
from ..utils import ( from ..utils import (
js_to_json,
unified_strdate, unified_strdate,
) )
@ -94,19 +95,32 @@ class SportBoxEmbedIE(InfoExtractor):
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
hls = self._search_regex( formats = []
r"sportboxPlayer\.jwplayer_common_params\.file\s*=\s*['\"]([^'\"]+)['\"]",
webpage, 'hls file') def cleanup_js(code):
# desktop_advert_config contains complex Javascripts and we don't need it
return js_to_json(re.sub(r'desktop_advert_config.*', '', code))
jwplayer_data = self._parse_json(self._search_regex(
r'(?s)player\.setup\(({.+?})\);', webpage, 'jwplayer settings'), video_id,
transform_source=cleanup_js)
hls_url = jwplayer_data.get('hls_url')
if hls_url:
formats.extend(self._extract_m3u8_formats(
hls_url, video_id, ext='mp4', m3u8_id='hls'))
rtsp_url = jwplayer_data.get('rtsp_url')
if rtsp_url:
formats.append({
'url': rtsp_url,
'format_id': 'rtsp',
})
formats = self._extract_m3u8_formats(hls, video_id, 'mp4')
self._sort_formats(formats) self._sort_formats(formats)
title = self._search_regex( title = jwplayer_data['node_title']
r'sportboxPlayer\.node_title\s*=\s*"([^"]+)"', webpage, 'title') thumbnail = jwplayer_data.get('image_url')
thumbnail = self._search_regex(
r'sportboxPlayer\.jwplayer_common_params\.image\s*=\s*"([^"]+)"',
webpage, 'thumbnail', default=None)
return { return {
'id': video_id, 'id': video_id,

View File

@ -1,63 +0,0 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
class TheOnionIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?theonion\.com/video/[^,]+,(?P<id>[0-9]+)/?'
_TEST = {
'url': 'http://www.theonion.com/video/man-wearing-mm-jacket-gods-image,36918/',
'md5': '19eaa9a39cf9b9804d982e654dc791ee',
'info_dict': {
'id': '2133',
'ext': 'mp4',
'title': 'Man Wearing M&M Jacket Apparently Made In God\'s Image',
'description': 'md5:cc12448686b5600baae9261d3e180910',
'thumbnail': 're:^https?://.*\.jpg\?\d+$',
}
}
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
video_id = self._search_regex(
r'"videoId":\s(\d+),', webpage, 'video ID')
title = self._og_search_title(webpage)
description = self._og_search_description(webpage)
thumbnail = self._og_search_thumbnail(webpage)
sources = re.findall(r'<source src="([^"]+)" type="([^"]+)"', webpage)
formats = []
for src, type_ in sources:
if type_ == 'video/mp4':
formats.append({
'format_id': 'mp4_sd',
'preference': 1,
'url': src,
})
elif type_ == 'video/webm':
formats.append({
'format_id': 'webm_sd',
'preference': 0,
'url': src,
})
elif type_ == 'application/x-mpegURL':
formats.extend(
self._extract_m3u8_formats(src, display_id, preference=-1))
else:
self.report_warning(
'Encountered unexpected format: %s' % type_)
self._sort_formats(formats)
return {
'id': video_id,
'display_id': display_id,
'title': title,
'formats': formats,
'thumbnail': thumbnail,
'description': description,
}

View File

@ -50,8 +50,6 @@ class ThePlatformBaseIE(OnceIE):
else: else:
formats.append(_format) formats.append(_format)
self._sort_formats(formats)
subtitles = self._parse_smil_subtitles(meta, default_ns) subtitles = self._parse_smil_subtitles(meta, default_ns)
return formats, subtitles return formats, subtitles
@ -241,6 +239,7 @@ class ThePlatformIE(ThePlatformBaseIE):
smil_url = self._sign_url(smil_url, sig['key'], sig['secret']) smil_url = self._sign_url(smil_url, sig['key'], sig['secret'])
formats, subtitles = self._extract_theplatform_smil(smil_url, video_id) formats, subtitles = self._extract_theplatform_smil(smil_url, video_id)
self._sort_formats(formats)
ret = self.get_metadata(path, video_id) ret = self.get_metadata(path, video_id)
combined_subtitles = self._merge_subtitles(ret.get('subtitles', {}), subtitles) combined_subtitles = self._merge_subtitles(ret.get('subtitles', {}), subtitles)

View File

@ -65,6 +65,9 @@ class TudouIE(InfoExtractor):
if quality: if quality:
info_url += '&hd' + quality info_url += '&hd' + quality
xml_data = self._download_xml(info_url, video_id, 'Opening the info XML page') xml_data = self._download_xml(info_url, video_id, 'Opening the info XML page')
error = xml_data.attrib.get('error')
if error is not None:
raise ExtractorError('Tudou said: %s' % error, expected=True)
final_url = xml_data.text final_url = xml_data.text
return final_url return final_url

View File

@ -260,6 +260,17 @@ class TwitterIE(InfoExtractor):
'upload_date': '20140615', 'upload_date': '20140615',
}, },
'add_ie': ['Vine'], 'add_ie': ['Vine'],
}, {
'url': 'https://twitter.com/captainamerica/status/719944021058060289',
# md5 constantly changes
'info_dict': {
'id': '719944021058060289',
'ext': 'mp4',
'title': 'Captain America - @King0fNerd Are you sure you made the right choice? Find out in theaters.',
'description': 'Captain America on Twitter: "@King0fNerd Are you sure you made the right choice? Find out in theaters. https://t.co/GpgYi9xMJI"',
'uploader_id': 'captainamerica',
'uploader': 'Captain America',
},
}] }]
def _real_extract(self, url): def _real_extract(self, url):
@ -284,17 +295,6 @@ class TwitterIE(InfoExtractor):
'title': username + ' - ' + title, 'title': username + ' - ' + title,
} }
card_id = self._search_regex(
r'["\']/i/cards/tfw/v1/(\d+)', webpage, 'twitter card url', default=None)
if card_id:
card_url = 'https://twitter.com/i/cards/tfw/v1/' + card_id
info.update({
'_type': 'url_transparent',
'ie_key': 'TwitterCard',
'url': card_url,
})
return info
mobj = re.search(r'''(?x) mobj = re.search(r'''(?x)
<video[^>]+class="animated-gif"(?P<more_info>[^>]+)>\s* <video[^>]+class="animated-gif"(?P<more_info>[^>]+)>\s*
<source[^>]+video-src="(?P<url>[^"]+)" <source[^>]+video-src="(?P<url>[^"]+)"

View File

@ -41,6 +41,12 @@ class UstreamIE(InfoExtractor):
'uploader': 'sportscanadatv', 'uploader': 'sportscanadatv',
}, },
'skip': 'This Pro Broadcaster has chosen to remove this video from the ustream.tv site.', 'skip': 'This Pro Broadcaster has chosen to remove this video from the ustream.tv site.',
}, {
'url': 'http://www.ustream.tv/embed/10299409',
'info_dict': {
'id': '10299409',
},
'playlist_count': 3,
}] }]
def _real_extract(self, url): def _real_extract(self, url):
@ -55,10 +61,12 @@ class UstreamIE(InfoExtractor):
if m.group('type') == 'embed': if m.group('type') == 'embed':
video_id = m.group('id') video_id = m.group('id')
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
desktop_video_id = self._html_search_regex( content_video_ids = self._parse_json(self._search_regex(
r'ContentVideoIds=\["([^"]*?)"\]', webpage, 'desktop_video_id') r'ustream\.vars\.offAirContentVideoIds=([^;]+);', webpage,
desktop_url = 'http://www.ustream.tv/recorded/' + desktop_video_id 'content video IDs'), video_id)
return self.url_result(desktop_url, 'Ustream') return self.playlist_result(
map(lambda u: self.url_result('http://www.ustream.tv/recorded/' + u, 'Ustream'), content_video_ids),
video_id)
params = self._download_json( params = self._download_json(
'https://api.ustream.tv/videos/%s.json' % video_id, video_id) 'https://api.ustream.tv/videos/%s.json' % video_id, video_id)

View File

@ -2,11 +2,19 @@
from __future__ import unicode_literals from __future__ import unicode_literals
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import (
compat_urllib_parse_urlparse,
compat_parse_qs,
)
from ..utils import (
clean_html,
remove_start,
)
class Varzesh3IE(InfoExtractor): class Varzesh3IE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?video\.varzesh3\.com/(?:[^/]+/)+(?P<id>[^/]+)/?' _VALID_URL = r'https?://(?:www\.)?video\.varzesh3\.com/(?:[^/]+/)+(?P<id>[^/]+)/?'
_TEST = { _TESTS = [{
'url': 'http://video.varzesh3.com/germany/bundesliga/5-%D9%88%D8%A7%DA%A9%D9%86%D8%B4-%D8%A8%D8%B1%D8%AA%D8%B1-%D8%AF%D8%B1%D9%88%D8%A7%D8%B2%D9%87%E2%80%8C%D8%A8%D8%A7%D9%86%D8%A7%D9%86%D8%9B%D9%87%D9%81%D8%AA%D9%87-26-%D8%A8%D9%88%D9%86%D8%AF%D8%B3/', 'url': 'http://video.varzesh3.com/germany/bundesliga/5-%D9%88%D8%A7%DA%A9%D9%86%D8%B4-%D8%A8%D8%B1%D8%AA%D8%B1-%D8%AF%D8%B1%D9%88%D8%A7%D8%B2%D9%87%E2%80%8C%D8%A8%D8%A7%D9%86%D8%A7%D9%86%D8%9B%D9%87%D9%81%D8%AA%D9%87-26-%D8%A8%D9%88%D9%86%D8%AF%D8%B3/',
'md5': '2a933874cb7dce4366075281eb49e855', 'md5': '2a933874cb7dce4366075281eb49e855',
'info_dict': { 'info_dict': {
@ -15,8 +23,19 @@ class Varzesh3IE(InfoExtractor):
'title': '۵ واکنش برتر دروازه‌بانان؛هفته ۲۶ بوندسلیگا', 'title': '۵ واکنش برتر دروازه‌بانان؛هفته ۲۶ بوندسلیگا',
'description': 'فصل ۲۰۱۵-۲۰۱۴', 'description': 'فصل ۲۰۱۵-۲۰۱۴',
'thumbnail': 're:^https?://.*\.jpg$', 'thumbnail': 're:^https?://.*\.jpg$',
} },
} 'skip': 'HTTP 404 Error',
}, {
'url': 'http://video.varzesh3.com/video/112785/%D8%AF%D9%84%D9%87-%D8%B9%D9%84%DB%8C%D8%9B-%D8%B3%D8%AA%D8%A7%D8%B1%D9%87-%D9%86%D9%88%D8%B8%D9%87%D9%88%D8%B1-%D9%84%DB%8C%DA%AF-%D8%A8%D8%B1%D8%AA%D8%B1-%D8%AC%D8%B2%DB%8C%D8%B1%D9%87',
'md5': '841b7cd3afbc76e61708d94e53a4a4e7',
'info_dict': {
'id': '112785',
'ext': 'mp4',
'title': 'دله علی؛ ستاره نوظهور لیگ برتر جزیره',
'description': 'فوتبال 120',
},
'expected_warnings': ['description'],
}]
def _real_extract(self, url): def _real_extract(self, url):
display_id = self._match_id(url) display_id = self._match_id(url)
@ -26,15 +45,30 @@ class Varzesh3IE(InfoExtractor):
video_url = self._search_regex( video_url = self._search_regex(
r'<source[^>]+src="([^"]+)"', webpage, 'video url') r'<source[^>]+src="([^"]+)"', webpage, 'video url')
title = self._og_search_title(webpage) title = remove_start(self._html_search_regex(
r'<title>([^<]+)</title>', webpage, 'title'), 'ویدیو ورزش 3 | ')
description = self._html_search_regex( description = self._html_search_regex(
r'(?s)<div class="matn">(.+?)</div>', r'(?s)<div class="matn">(.+?)</div>',
webpage, 'description', fatal=False) webpage, 'description', default=None)
thumbnail = self._og_search_thumbnail(webpage) if description is None:
description = clean_html(self._html_search_meta('description', webpage))
thumbnail = self._og_search_thumbnail(webpage, default=None)
if thumbnail is None:
fb_sharer_url = self._search_regex(
r'<a[^>]+href="(https?://www\.facebook\.com/sharer/sharer\.php?[^"]+)"',
webpage, 'facebook sharer URL', fatal=False)
sharer_params = compat_parse_qs(compat_urllib_parse_urlparse(fb_sharer_url).query)
thumbnail = sharer_params.get('p[images][0]', [None])[0]
video_id = self._search_regex( video_id = self._search_regex(
r"<link[^>]+rel='(?:canonical|shortlink)'[^>]+href='/\?p=([^']+)'", r"<link[^>]+rel='(?:canonical|shortlink)'[^>]+href='/\?p=([^']+)'",
webpage, display_id, default=display_id) webpage, display_id, default=None)
if video_id is None:
video_id = self._search_regex(
'var\s+VideoId\s*=\s*(\d+);', webpage, 'video id',
default=display_id)
return { return {
'url': video_url, 'url': video_url,

View File

@ -3,7 +3,6 @@ from __future__ import unicode_literals
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from .ooyala import OoyalaIE
from ..utils import ExtractorError from ..utils import ExtractorError
@ -14,13 +13,21 @@ class ViceIE(InfoExtractor):
'url': 'http://www.vice.com/video/cowboy-capitalists-part-1', 'url': 'http://www.vice.com/video/cowboy-capitalists-part-1',
'info_dict': { 'info_dict': {
'id': '43cW1mYzpia9IlestBjVpd23Yu3afAfp', 'id': '43cW1mYzpia9IlestBjVpd23Yu3afAfp',
'ext': 'mp4', 'ext': 'flv',
'title': 'VICE_COWBOYCAPITALISTS_PART01_v1_VICE_WM_1080p.mov', 'title': 'VICE_COWBOYCAPITALISTS_PART01_v1_VICE_WM_1080p.mov',
'duration': 725.983, 'duration': 725.983,
}, },
'params': { }, {
# Requires ffmpeg (m3u8 manifest) 'url': 'http://www.vice.com/video/how-to-hack-a-car',
'skip_download': True, 'md5': '6fb2989a3fed069fb8eab3401fc2d3c9',
'info_dict': {
'id': '3jstaBeXgAs',
'ext': 'mp4',
'title': 'How to Hack a Car: Phreaked Out (Episode 2)',
'description': 'md5:ee95453f7ff495db8efe14ae8bf56f30',
'uploader_id': 'MotherboardTV',
'uploader': 'Motherboard',
'upload_date': '20140529',
}, },
}, { }, {
'url': 'https://news.vice.com/video/experimenting-on-animals-inside-the-monkey-lab', 'url': 'https://news.vice.com/video/experimenting-on-animals-inside-the-monkey-lab',
@ -39,11 +46,14 @@ class ViceIE(InfoExtractor):
try: try:
embed_code = self._search_regex( embed_code = self._search_regex(
r'embedCode=([^&\'"]+)', webpage, r'embedCode=([^&\'"]+)', webpage,
'ooyala embed code') 'ooyala embed code', default=None)
ooyala_url = OoyalaIE._url_for_embed_code(embed_code) if embed_code:
return self.url_result('ooyala:%s' % embed_code, 'Ooyala')
youtube_id = self._search_regex(
r'data-youtube-id="([^"]+)"', webpage, 'youtube id')
return self.url_result(youtube_id, 'Youtube')
except ExtractorError: except ExtractorError:
raise ExtractorError('The page doesn\'t contain a video', expected=True) raise ExtractorError('The page doesn\'t contain a video', expected=True)
return self.url_result(ooyala_url, ie='Ooyala')
class ViceShowIE(InfoExtractor): class ViceShowIE(InfoExtractor):

View File

@ -1818,20 +1818,32 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
def _extract_mix(self, playlist_id): def _extract_mix(self, playlist_id):
# The mixes are generated from a single video # The mixes are generated from a single video
# the id of the playlist is just 'RD' + video_id # the id of the playlist is just 'RD' + video_id
url = 'https://youtube.com/watch?v=%s&list=%s' % (playlist_id[-11:], playlist_id) ids = []
webpage = self._download_webpage( last_id = playlist_id[-11:]
url, playlist_id, 'Downloading Youtube mix') for n in itertools.count(1):
url = 'https://youtube.com/watch?v=%s&list=%s' % (last_id, playlist_id)
webpage = self._download_webpage(
url, playlist_id, 'Downloading page {0} of Youtube mix'.format(n))
new_ids = orderedSet(re.findall(
r'''(?xs)data-video-username=".*?".*?
href="/watch\?v=([0-9A-Za-z_-]{11})&amp;[^"]*?list=%s''' % re.escape(playlist_id),
webpage))
# Fetch new pages until all the videos are repeated, it seems that
# there are always 51 unique videos.
new_ids = [_id for _id in new_ids if _id not in ids]
if not new_ids:
break
ids.extend(new_ids)
last_id = ids[-1]
url_results = self._ids_to_results(ids)
search_title = lambda class_name: get_element_by_attribute('class', class_name, webpage) search_title = lambda class_name: get_element_by_attribute('class', class_name, webpage)
title_span = ( title_span = (
search_title('playlist-title') or search_title('playlist-title') or
search_title('title long-title') or search_title('title long-title') or
search_title('title')) search_title('title'))
title = clean_html(title_span) title = clean_html(title_span)
ids = orderedSet(re.findall(
r'''(?xs)data-video-username=".*?".*?
href="/watch\?v=([0-9A-Za-z_-]{11})&amp;[^"]*?list=%s''' % re.escape(playlist_id),
webpage))
url_results = self._ids_to_results(ids)
return self.playlist_result(url_results, playlist_id, title) return self.playlist_result(url_results, playlist_id, title)
@ -1987,8 +1999,8 @@ class YoutubeUserIE(YoutubeChannelIE):
def suitable(cls, url): def suitable(cls, url):
# Don't return True if the url can be extracted with other youtube # Don't return True if the url can be extracted with other youtube
# extractor, the regex would is too permissive and it would match. # extractor, the regex would is too permissive and it would match.
other_ies = iter(klass for (name, klass) in globals().items() if name.endswith('IE') and klass is not cls) other_yt_ies = iter(klass for (name, klass) in globals().items() if name.startswith('Youtube') and name.endswith('IE') and klass is not cls)
if any(ie.suitable(url) for ie in other_ies): if any(ie.suitable(url) for ie in other_yt_ies):
return False return False
else: else:
return super(YoutubeUserIE, cls).suitable(url) return super(YoutubeUserIE, cls).suitable(url)

View File

@ -175,7 +175,8 @@ class FFmpegPostProcessor(PostProcessor):
# Always use 'file:' because the filename may contain ':' (ffmpeg # Always use 'file:' because the filename may contain ':' (ffmpeg
# interprets that as a protocol) or can start with '-' (-- is broken in # interprets that as a protocol) or can start with '-' (-- is broken in
# ffmpeg, see https://ffmpeg.org/trac/ffmpeg/ticket/2127 for details) # ffmpeg, see https://ffmpeg.org/trac/ffmpeg/ticket/2127 for details)
return 'file:' + fn # Also leave '-' intact in order not to break streaming to stdout.
return 'file:' + fn if fn != '-' else fn
class FFmpegExtractAudioPP(FFmpegPostProcessor): class FFmpegExtractAudioPP(FFmpegPostProcessor):