Merge branch 'master' into BlenderCloud-issue-13282

This commit is contained in:
Parmjit Virk 2017-07-09 18:20:58 -05:00
commit 6e15af0db5
18 changed files with 470 additions and 83 deletions

View File

@ -6,8 +6,8 @@
--- ---
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.07.02*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. ### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.07.09*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.07.02** - [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.07.09**
### Before submitting an *issue* make sure you have: ### Before submitting an *issue* make sure you have:
- [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
@ -35,7 +35,7 @@ $ youtube-dl -v <your command line>
[debug] User config: [] [debug] User config: []
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
[debug] youtube-dl version 2017.07.02 [debug] youtube-dl version 2017.07.09
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
[debug] Proxy map: {} [debug] Proxy map: {}

View File

@ -1,7 +1,27 @@
version <unreleased> version 2017.07.09
Core
+ [extractor/common] Add support for AMP tags in _parse_html5_media_entries
+ [utils] Support attributes with no values in get_elements_by_attribute
Extractors Extractors
+ [dailymail] Add support for embeds
+ [joj] Add support for joj.sk (#13268)
* [abc.net.au:iview] Extract more formats (#13492, #13489)
* [egghead:course] Fix extraction (#6635, #13370)
+ [cjsw] Add support for cjsw.com (#13525)
+ [eagleplatform] Add support for referrer protected videos (#13557)
+ [eagleplatform] Add support for another embed pattern (#13557)
* [veoh] Extend URL regular expression (#13601)
* [npo:live] Fix live stream id extraction (#13568, #13605)
* [googledrive] Fix height extraction (#13603)
+ [dailymotion] Add support for new layout (#13580)
- [yam] Remove extractor - [yam] Remove extractor
* [xhamster] Extract all formats and fix duration extraction (#13593)
+ [xhamster] Add support for new URL schema (#13593)
* [espn] Extend URL regular expression (#13244, #13549)
* [kaltura] Fix typo in subtitles extraction (#13569)
* [vier] Adapt extraction to redesign (#13575)
version 2017.07.02 version 2017.07.02

View File

@ -154,6 +154,7 @@
- **chirbit** - **chirbit**
- **chirbit:profile** - **chirbit:profile**
- **Cinchcast** - **Cinchcast**
- **CJSW**
- **Clipfish** - **Clipfish**
- **cliphunter** - **cliphunter**
- **ClipRs** - **ClipRs**
@ -369,6 +370,7 @@
- **Jamendo** - **Jamendo**
- **JamendoAlbum** - **JamendoAlbum**
- **JeuxVideo** - **JeuxVideo**
- **Joj**
- **Jove** - **Jove**
- **jpopsuki.tv** - **jpopsuki.tv**
- **JWPlatform** - **JWPlatform**
@ -996,7 +998,6 @@
- **XVideos** - **XVideos**
- **XXXYMovies** - **XXXYMovies**
- **Yahoo**: Yahoo screen and movies - **Yahoo**: Yahoo screen and movies
- **Yam**: 蕃薯藤yam天空部落
- **yandexmusic:album**: Яндекс.Музыка - Альбом - **yandexmusic:album**: Яндекс.Музыка - Альбом
- **yandexmusic:playlist**: Яндекс.Музыка - Плейлист - **yandexmusic:playlist**: Яндекс.Музыка - Плейлист
- **yandexmusic:track**: Яндекс.Музыка - Трек - **yandexmusic:track**: Яндекс.Музыка - Трек

View File

@ -1890,7 +1890,7 @@ class YoutubeDL(object):
info_dict.get('protocol') == 'm3u8' and info_dict.get('protocol') == 'm3u8' and
self.params.get('hls_prefer_native')): self.params.get('hls_prefer_native')):
if fixup_policy == 'warn': if fixup_policy == 'warn':
self.report_warning('%s: malformated aac bitstream.' % ( self.report_warning('%s: malformed AAC bitstream detected.' % (
info_dict['id'])) info_dict['id']))
elif fixup_policy == 'detect_or_warn': elif fixup_policy == 'detect_or_warn':
fixup_pp = FFmpegFixupM3u8PP(self) fixup_pp = FFmpegFixupM3u8PP(self)
@ -1899,7 +1899,7 @@ class YoutubeDL(object):
info_dict['__postprocessors'].append(fixup_pp) info_dict['__postprocessors'].append(fixup_pp)
else: else:
self.report_warning( self.report_warning(
'%s: malformated aac bitstream. %s' '%s: malformed AAC bitstream detected. %s'
% (info_dict['id'], INSTALL_FFMPEG_MESSAGE)) % (info_dict['id'], INSTALL_FFMPEG_MESSAGE))
else: else:
assert fixup_policy in ('ignore', 'never') assert fixup_policy in ('ignore', 'never')

View File

@ -3,11 +3,13 @@ from __future__ import unicode_literals
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_str
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
js_to_json, js_to_json,
int_or_none, int_or_none,
parse_iso8601, parse_iso8601,
try_get,
) )
@ -124,7 +126,20 @@ class ABCIViewIE(InfoExtractor):
title = video_params.get('title') or video_params['seriesTitle'] title = video_params.get('title') or video_params['seriesTitle']
stream = next(s for s in video_params['playlist'] if s.get('type') == 'program') stream = next(s for s in video_params['playlist'] if s.get('type') == 'program')
formats = self._extract_akamai_formats(stream['hds-unmetered'], video_id) format_urls = [
try_get(stream, lambda x: x['hds-unmetered'], compat_str)]
# May have higher quality video
sd_url = try_get(
stream, lambda x: x['streams']['hds']['sd'], compat_str)
if sd_url:
format_urls.append(sd_url.replace('metered', 'um'))
formats = []
for format_url in format_urls:
if format_url:
formats.extend(
self._extract_akamai_formats(format_url, video_id))
self._sort_formats(formats) self._sort_formats(formats)
subtitles = {} subtitles = {}

View File

@ -0,0 +1,72 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
determine_ext,
unescapeHTML,
)
class CJSWIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?cjsw\.com/program/(?P<program>[^/]+)/episode/(?P<id>\d+)'
_TESTS = [{
'url': 'http://cjsw.com/program/freshly-squeezed/episode/20170620',
'md5': 'cee14d40f1e9433632c56e3d14977120',
'info_dict': {
'id': '91d9f016-a2e7-46c5-8dcb-7cbcd7437c41',
'ext': 'mp3',
'title': 'Freshly Squeezed Episode June 20, 2017',
'description': 'md5:c967d63366c3898a80d0c7b0ff337202',
'series': 'Freshly Squeezed',
'episode_id': '20170620',
},
}, {
# no description
'url': 'http://cjsw.com/program/road-pops/episode/20170707/',
'only_matching': True,
}]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
program, episode_id = mobj.group('program', 'id')
audio_id = '%s/%s' % (program, episode_id)
webpage = self._download_webpage(url, episode_id)
title = unescapeHTML(self._search_regex(
(r'<h1[^>]+class=["\']episode-header__title["\'][^>]*>(?P<title>[^<]+)',
r'data-audio-title=(["\'])(?P<title>(?:(?!\1).)+)\1'),
webpage, 'title', group='title'))
audio_url = self._search_regex(
r'<button[^>]+data-audio-src=(["\'])(?P<url>(?:(?!\1).)+)\1',
webpage, 'audio url', group='url')
audio_id = self._search_regex(
r'/([\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})\.mp3',
audio_url, 'audio id', default=audio_id)
formats = [{
'url': audio_url,
'ext': determine_ext(audio_url, 'mp3'),
'vcodec': 'none',
}]
description = self._html_search_regex(
r'<p>(?P<description>.+?)</p>', webpage, 'description',
default=None)
series = self._search_regex(
r'data-showname=(["\'])(?P<name>(?:(?!\1).)+)\1', webpage,
'series', default=program, group='name')
return {
'id': audio_id,
'title': title,
'description': description,
'formats': formats,
'series': series,
'episode_id': episode_id,
}

View File

@ -2132,15 +2132,18 @@ class InfoExtractor(object):
return is_plain_url, formats return is_plain_url, formats
entries = [] entries = []
# amp-video and amp-audio are very similar to their HTML5 counterparts
# so we wll include them right here (see
# https://www.ampproject.org/docs/reference/components/amp-video)
media_tags = [(media_tag, media_type, '') media_tags = [(media_tag, media_type, '')
for media_tag, media_type for media_tag, media_type
in re.findall(r'(?s)(<(video|audio)[^>]*/>)', webpage)] in re.findall(r'(?s)(<(?:amp-)?(video|audio)[^>]*/>)', webpage)]
media_tags.extend(re.findall( media_tags.extend(re.findall(
# We only allow video|audio followed by a whitespace or '>'. # We only allow video|audio followed by a whitespace or '>'.
# Allowing more characters may end up in significant slow down (see # Allowing more characters may end up in significant slow down (see
# https://github.com/rg3/youtube-dl/issues/11979, example URL: # https://github.com/rg3/youtube-dl/issues/11979, example URL:
# http://www.porntrex.com/maps/videositemap.xml). # http://www.porntrex.com/maps/videositemap.xml).
r'(?s)(<(?P<tag>video|audio)(?:\s+[^>]*)?>)(.*?)</(?P=tag)>', webpage)) r'(?s)(<(?P<tag>(?:amp-)?(?:video|audio))(?:\s+[^>]*)?>)(.*?)</(?P=tag)>', webpage))
for media_tag, media_type, media_content in media_tags: for media_tag, media_type, media_content in media_tags:
media_info = { media_info = {
'formats': [], 'formats': [],

View File

@ -1,6 +1,8 @@
# coding: utf-8 # coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import re
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_str from ..compat import compat_str
from ..utils import ( from ..utils import (
@ -12,8 +14,8 @@ from ..utils import (
class DailyMailIE(InfoExtractor): class DailyMailIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?dailymail\.co\.uk/video/[^/]+/video-(?P<id>[0-9]+)' _VALID_URL = r'https?://(?:www\.)?dailymail\.co\.uk/(?:video/[^/]+/video-|embed/video/)(?P<id>[0-9]+)'
_TEST = { _TESTS = [{
'url': 'http://www.dailymail.co.uk/video/tvshowbiz/video-1295863/The-Mountain-appears-sparkling-water-ad-Heavy-Bubbles.html', 'url': 'http://www.dailymail.co.uk/video/tvshowbiz/video-1295863/The-Mountain-appears-sparkling-water-ad-Heavy-Bubbles.html',
'md5': 'f6129624562251f628296c3a9ffde124', 'md5': 'f6129624562251f628296c3a9ffde124',
'info_dict': { 'info_dict': {
@ -22,7 +24,16 @@ class DailyMailIE(InfoExtractor):
'title': 'The Mountain appears in sparkling water ad for \'Heavy Bubbles\'', 'title': 'The Mountain appears in sparkling water ad for \'Heavy Bubbles\'',
'description': 'md5:a93d74b6da172dd5dc4d973e0b766a84', 'description': 'md5:a93d74b6da172dd5dc4d973e0b766a84',
} }
} }, {
'url': 'http://www.dailymail.co.uk/embed/video/1295863.html',
'only_matching': True,
}]
@staticmethod
def _extract_urls(webpage):
return re.findall(
r'<iframe\b[^>]+\bsrc=["\'](?P<url>(?:https?:)?//(?:www\.)?dailymail\.co\.uk/embed/video/\d+\.html)',
webpage)
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)

View File

@ -11,6 +11,7 @@ from ..compat import (
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
int_or_none, int_or_none,
unsmuggle_url,
) )
@ -50,6 +51,10 @@ class EaglePlatformIE(InfoExtractor):
'view_count': int, 'view_count': int,
}, },
'skip': 'Georestricted', 'skip': 'Georestricted',
}, {
# referrer protected video (https://tvrain.ru/lite/teleshow/kak_vse_nachinalos/namin-418921/)
'url': 'eagleplatform:tvrainru.media.eagleplatform.com:582306',
'only_matching': True,
}] }]
@staticmethod @staticmethod
@ -60,16 +65,40 @@ class EaglePlatformIE(InfoExtractor):
webpage) webpage)
if mobj is not None: if mobj is not None:
return mobj.group('url') return mobj.group('url')
# Basic usage embedding (see http://dultonmedia.github.io/eplayer/) PLAYER_JS_RE = r'''
<script[^>]+
src=(?P<qjs>["\'])(?:https?:)?//(?P<host>(?:(?!(?P=qjs)).)+\.media\.eagleplatform\.com)/player/player\.js(?P=qjs)
.+?
'''
# "Basic usage" embedding (see http://dultonmedia.github.io/eplayer/)
mobj = re.search( mobj = re.search(
r'''(?xs) r'''(?xs)
<script[^>]+ %s
src=(?P<q1>["\'])(?:https?:)?//(?P<host>.+?\.media\.eagleplatform\.com)/player/player\.js(?P=q1)
.+?
<div[^>]+ <div[^>]+
class=(?P<q2>["\'])eagleplayer(?P=q2)[^>]+ class=(?P<qclass>["\'])eagleplayer(?P=qclass)[^>]+
data-id=["\'](?P<id>\d+) data-id=["\'](?P<id>\d+)
''', webpage) ''' % PLAYER_JS_RE, webpage)
if mobj is not None:
return 'eagleplatform:%(host)s:%(id)s' % mobj.groupdict()
# Generalization of "Javascript code usage", "Combined usage" and
# "Usage without attaching to DOM" embeddings (see
# http://dultonmedia.github.io/eplayer/)
mobj = re.search(
r'''(?xs)
%s
<script>
.+?
new\s+EaglePlayer\(
(?:[^,]+\s*,\s*)?
{
.+?
\bid\s*:\s*["\']?(?P<id>\d+)
.+?
}
\s*\)
.+?
</script>
''' % PLAYER_JS_RE, webpage)
if mobj is not None: if mobj is not None:
return 'eagleplatform:%(host)s:%(id)s' % mobj.groupdict() return 'eagleplatform:%(host)s:%(id)s' % mobj.groupdict()
@ -79,9 +108,10 @@ class EaglePlatformIE(InfoExtractor):
if status != 200: if status != 200:
raise ExtractorError(' '.join(response['errors']), expected=True) raise ExtractorError(' '.join(response['errors']), expected=True)
def _download_json(self, url_or_request, video_id, note='Downloading JSON metadata', *args, **kwargs): def _download_json(self, url_or_request, video_id, *args, **kwargs):
try: try:
response = super(EaglePlatformIE, self)._download_json(url_or_request, video_id, note) response = super(EaglePlatformIE, self)._download_json(
url_or_request, video_id, *args, **kwargs)
except ExtractorError as ee: except ExtractorError as ee:
if isinstance(ee.cause, compat_HTTPError): if isinstance(ee.cause, compat_HTTPError):
response = self._parse_json(ee.cause.read().decode('utf-8'), video_id) response = self._parse_json(ee.cause.read().decode('utf-8'), video_id)
@ -93,11 +123,24 @@ class EaglePlatformIE(InfoExtractor):
return self._download_json(url_or_request, video_id, note)['data'][0] return self._download_json(url_or_request, video_id, note)['data'][0]
def _real_extract(self, url): def _real_extract(self, url):
url, smuggled_data = unsmuggle_url(url, {})
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
host, video_id = mobj.group('custom_host') or mobj.group('host'), mobj.group('id') host, video_id = mobj.group('custom_host') or mobj.group('host'), mobj.group('id')
headers = {}
query = {
'id': video_id,
}
referrer = smuggled_data.get('referrer')
if referrer:
headers['Referer'] = referrer
query['referrer'] = referrer
player_data = self._download_json( player_data = self._download_json(
'http://%s/api/player_data?id=%s' % (host, video_id), video_id) 'http://%s/api/player_data' % host, video_id,
headers=headers, query=query)
media = player_data['data']['playlist']['viewports'][0]['medialist'][0] media = player_data['data']['playlist']['viewports'][0]['medialist'][0]

View File

@ -1,15 +1,13 @@
# coding: utf-8 # coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import re
from .common import InfoExtractor from .common import InfoExtractor
class EggheadCourseIE(InfoExtractor): class EggheadCourseIE(InfoExtractor):
IE_DESC = 'egghead.io course' IE_DESC = 'egghead.io course'
IE_NAME = 'egghead:course' IE_NAME = 'egghead:course'
_VALID_URL = r'https://egghead\.io/courses/(?P<id>[a-zA-Z_0-9-]+)' _VALID_URL = r'https://egghead\.io/courses/(?P<id>[^/?#&]+)'
_TEST = { _TEST = {
'url': 'https://egghead.io/courses/professor-frisby-introduces-composable-functional-javascript', 'url': 'https://egghead.io/courses/professor-frisby-introduces-composable-functional-javascript',
'playlist_count': 29, 'playlist_count': 29,
@ -22,18 +20,16 @@ class EggheadCourseIE(InfoExtractor):
def _real_extract(self, url): def _real_extract(self, url):
playlist_id = self._match_id(url) playlist_id = self._match_id(url)
webpage = self._download_webpage(url, playlist_id)
title = self._html_search_regex(r'<h1 class="title">([^<]+)</h1>', webpage, 'title') course = self._download_json(
ul = self._search_regex(r'(?s)<ul class="series-lessons-list">(.*?)</ul>', webpage, 'session list') 'https://egghead.io/api/v1/series/%s' % playlist_id, playlist_id)
found = re.findall(r'(?s)<a class="[^"]*"\s*href="([^"]+)">\s*<li class="item', ul) entries = [
entries = [self.url_result(m) for m in found] self.url_result(
'wistia:%s' % lesson['wistia_id'], ie='Wistia',
video_id=lesson['wistia_id'], video_title=lesson.get('title'))
for lesson in course['lessons'] if lesson.get('wistia_id')]
return { return self.playlist_result(
'_type': 'playlist', entries, playlist_id, course.get('title'),
'id': playlist_id, course.get('description'))
'title': title,
'description': self._og_search_description(webpage),
'entries': entries,
}

View File

@ -189,6 +189,7 @@ from .chirbit import (
ChirbitProfileIE, ChirbitProfileIE,
) )
from .cinchcast import CinchcastIE from .cinchcast import CinchcastIE
from .cjsw import CJSWIE
from .clipfish import ClipfishIE from .clipfish import ClipfishIE
from .cliphunter import CliphunterIE from .cliphunter import CliphunterIE
from .cliprs import ClipRsIE from .cliprs import ClipRsIE
@ -473,6 +474,7 @@ from .jamendo import (
) )
from .jeuxvideo import JeuxVideoIE from .jeuxvideo import JeuxVideoIE
from .jove import JoveIE from .jove import JoveIE
from .joj import JojIE
from .jwplatform import JWPlatformIE from .jwplatform import JWPlatformIE
from .jpopsukitv import JpopsukiIE from .jpopsukitv import JpopsukiIE
from .kaltura import KalturaIE from .kaltura import KalturaIE
@ -1208,7 +1210,8 @@ from .vk import (
) )
from .vlive import ( from .vlive import (
VLiveIE, VLiveIE,
VLiveChannelIE VLiveChannelIE,
VLivePlaylistIE
) )
from .vodlocker import VodlockerIE from .vodlocker import VodlockerIE
from .vodpl import VODPlIE from .vodpl import VODPlIE

View File

@ -57,6 +57,7 @@ from .dailymotion import (
DailymotionIE, DailymotionIE,
DailymotionCloudIE, DailymotionCloudIE,
) )
from .dailymail import DailyMailIE
from .onionstudios import OnionStudiosIE from .onionstudios import OnionStudiosIE
from .viewlift import ViewLiftEmbedIE from .viewlift import ViewLiftEmbedIE
from .mtv import MTVServicesEmbeddedIE from .mtv import MTVServicesEmbeddedIE
@ -91,6 +92,7 @@ from .anvato import AnvatoIE
from .washingtonpost import WashingtonPostIE from .washingtonpost import WashingtonPostIE
from .wistia import WistiaIE from .wistia import WistiaIE
from .mediaset import MediasetIE from .mediaset import MediasetIE
from .joj import JojIE
class GenericIE(InfoExtractor): class GenericIE(InfoExtractor):
@ -759,6 +761,20 @@ class GenericIE(InfoExtractor):
}, },
'add_ie': ['Dailymotion'], 'add_ie': ['Dailymotion'],
}, },
# DailyMail embed
{
'url': 'http://www.bumm.sk/krimi/2017/07/05/biztonsagi-kamera-buktatta-le-az-agg-ferfit-utlegelo-apolot',
'info_dict': {
'id': '1495629',
'ext': 'mp4',
'title': 'Care worker punches elderly dementia patient in head 11 times',
'description': 'md5:3a743dee84e57e48ec68bf67113199a5',
},
'add_ie': ['DailyMail'],
'params': {
'skip_download': True,
},
},
# YouTube embed # YouTube embed
{ {
'url': 'http://www.badzine.de/ansicht/datum/2014/06/09/so-funktioniert-die-neue-englische-badminton-liga.html', 'url': 'http://www.badzine.de/ansicht/datum/2014/06/09/so-funktioniert-die-neue-englische-badminton-liga.html',
@ -1185,7 +1201,7 @@ class GenericIE(InfoExtractor):
}, },
'add_ie': ['Kaltura'], 'add_ie': ['Kaltura'],
}, },
# Eagle.Platform embed (generic URL) # EaglePlatform embed (generic URL)
{ {
'url': 'http://lenta.ru/news/2015/03/06/navalny/', 'url': 'http://lenta.ru/news/2015/03/06/navalny/',
# Not checking MD5 as sometimes the direct HTTP link results in 404 and HLS is used # Not checking MD5 as sometimes the direct HTTP link results in 404 and HLS is used
@ -1199,8 +1215,26 @@ class GenericIE(InfoExtractor):
'view_count': int, 'view_count': int,
'age_limit': 0, 'age_limit': 0,
}, },
'params': {
'skip_download': True,
},
}, },
# ClipYou (Eagle.Platform) embed (custom URL) # referrer protected EaglePlatform embed
{
'url': 'https://tvrain.ru/lite/teleshow/kak_vse_nachinalos/namin-418921/',
'info_dict': {
'id': '582306',
'ext': 'mp4',
'title': 'Стас Намин: «Мы нарушили девственность Кремля»',
'thumbnail': r're:^https?://.*\.jpg$',
'duration': 3382,
'view_count': int,
},
'params': {
'skip_download': True,
},
},
# ClipYou (EaglePlatform) embed (custom URL)
{ {
'url': 'http://muz-tv.ru/play/7129/', 'url': 'http://muz-tv.ru/play/7129/',
# Not checking MD5 as sometimes the direct HTTP link results in 404 and HLS is used # Not checking MD5 as sometimes the direct HTTP link results in 404 and HLS is used
@ -1212,6 +1246,9 @@ class GenericIE(InfoExtractor):
'duration': 216, 'duration': 216,
'view_count': int, 'view_count': int,
}, },
'params': {
'skip_download': True,
},
}, },
# Pladform embed # Pladform embed
{ {
@ -1749,6 +1786,26 @@ class GenericIE(InfoExtractor):
}, },
'add_ie': [MediasetIE.ie_key()], 'add_ie': [MediasetIE.ie_key()],
}, },
{
# JOJ.sk embeds
'url': 'https://www.noviny.sk/slovensko/238543-slovenskom-sa-prehnala-vlna-silnych-burok',
'info_dict': {
'id': '238543-slovenskom-sa-prehnala-vlna-silnych-burok',
'title': 'Slovenskom sa prehnala vlna silných búrok',
},
'playlist_mincount': 5,
'add_ie': [JojIE.ie_key()],
},
{
# AMP embed (see https://www.ampproject.org/docs/reference/components/amp-video)
'url': 'https://tvrain.ru/amp/418921/',
'md5': 'cc00413936695987e8de148b67d14f1d',
'info_dict': {
'id': '418921',
'ext': 'mp4',
'title': 'Стас Намин: «Мы нарушили девственность Кремля»',
},
},
# { # {
# # TODO: find another test # # TODO: find another test
# # http://schema.org/VideoObject # # http://schema.org/VideoObject
@ -2148,6 +2205,12 @@ class GenericIE(InfoExtractor):
return self.playlist_from_matches( return self.playlist_from_matches(
playlists, video_id, video_title, lambda p: '//dailymotion.com/playlist/%s' % p) playlists, video_id, video_title, lambda p: '//dailymotion.com/playlist/%s' % p)
# Look for DailyMail embeds
dailymail_urls = DailyMailIE._extract_urls(webpage)
if dailymail_urls:
return self.playlist_from_matches(
dailymail_urls, video_id, video_title, ie=DailyMailIE.ie_key())
# Look for embedded Wistia player # Look for embedded Wistia player
wistia_url = WistiaIE._extract_url(webpage) wistia_url = WistiaIE._extract_url(webpage)
if wistia_url: if wistia_url:
@ -2443,12 +2506,12 @@ class GenericIE(InfoExtractor):
if kaltura_url: if kaltura_url:
return self.url_result(smuggle_url(kaltura_url, {'source_url': url}), KalturaIE.ie_key()) return self.url_result(smuggle_url(kaltura_url, {'source_url': url}), KalturaIE.ie_key())
# Look for Eagle.Platform embeds # Look for EaglePlatform embeds
eagleplatform_url = EaglePlatformIE._extract_url(webpage) eagleplatform_url = EaglePlatformIE._extract_url(webpage)
if eagleplatform_url: if eagleplatform_url:
return self.url_result(eagleplatform_url, EaglePlatformIE.ie_key()) return self.url_result(smuggle_url(eagleplatform_url, {'referrer': url}), EaglePlatformIE.ie_key())
# Look for ClipYou (uses Eagle.Platform) embeds # Look for ClipYou (uses EaglePlatform) embeds
mobj = re.search( mobj = re.search(
r'<iframe[^>]+src="https?://(?P<host>media\.clipyou\.ru)/index/player\?.*\brecord_id=(?P<id>\d+).*"', webpage) r'<iframe[^>]+src="https?://(?P<host>media\.clipyou\.ru)/index/player\?.*\brecord_id=(?P<id>\d+).*"', webpage)
if mobj is not None: if mobj is not None:
@ -2691,6 +2754,12 @@ class GenericIE(InfoExtractor):
return self.playlist_from_matches( return self.playlist_from_matches(
mediaset_urls, video_id, video_title, ie=MediasetIE.ie_key()) mediaset_urls, video_id, video_title, ie=MediasetIE.ie_key())
# Look for JOJ.sk embeds
joj_urls = JojIE._extract_urls(webpage)
if joj_urls:
return self.playlist_from_matches(
joj_urls, video_id, video_title, ie=JojIE.ie_key())
def merge_dicts(dict1, dict2): def merge_dicts(dict1, dict2):
merged = {} merged = {}
for k, v in dict1.items(): for k, v in dict1.items():

100
youtube_dl/extractor/joj.py Executable file
View File

@ -0,0 +1,100 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
int_or_none,
js_to_json,
try_get,
)
class JojIE(InfoExtractor):
_VALID_URL = r'''(?x)
(?:
joj:|
https?://media\.joj\.sk/embed/
)
(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})
'''
_TESTS = [{
'url': 'https://media.joj.sk/embed/a388ec4c-6019-4a4a-9312-b1bee194e932',
'info_dict': {
'id': 'a388ec4c-6019-4a4a-9312-b1bee194e932',
'ext': 'mp4',
'title': 'NOVÉ BÝVANIE',
'thumbnail': r're:^https?://.*\.jpg$',
'duration': 3118,
}
}, {
'url': 'joj:a388ec4c-6019-4a4a-9312-b1bee194e932',
'only_matching': True,
}]
@staticmethod
def _extract_urls(webpage):
return re.findall(
r'<iframe\b[^>]+\bsrc=["\'](?P<url>(?:https?:)?//media\.joj\.sk/embed/[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})',
webpage)
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(
'https://media.joj.sk/embed/%s' % video_id, video_id)
title = self._search_regex(
(r'videoTitle\s*:\s*(["\'])(?P<title>(?:(?!\1).)+)\1',
r'<title>(?P<title>[^<]+)'), webpage, 'title',
default=None, group='title') or self._og_search_title(webpage)
bitrates = self._parse_json(
self._search_regex(
r'(?s)bitrates\s*=\s*({.+?});', webpage, 'bitrates',
default='{}'),
video_id, transform_source=js_to_json, fatal=False)
formats = []
for format_url in try_get(bitrates, lambda x: x['mp4'], list) or []:
if isinstance(format_url, compat_str):
height = self._search_regex(
r'(\d+)[pP]\.', format_url, 'height', default=None)
formats.append({
'url': format_url,
'format_id': '%sp' % height if height else None,
'height': int(height),
})
if not formats:
playlist = self._download_xml(
'https://media.joj.sk/services/Video.php?clip=%s' % video_id,
video_id)
for file_el in playlist.findall('./files/file'):
path = file_el.get('path')
if not path:
continue
format_id = file_el.get('id') or file_el.get('label')
formats.append({
'url': 'http://n16.joj.sk/storage/%s' % path.replace(
'dat/', '', 1),
'format_id': format_id,
'height': int_or_none(self._search_regex(
r'(\d+)[pP]', format_id or path, 'height',
default=None)),
})
self._sort_formats(formats)
thumbnail = self._og_search_thumbnail(webpage)
duration = int_or_none(self._search_regex(
r'videoDuration\s*:\s*(\d+)', webpage, 'duration', fatal=False))
return {
'id': video_id,
'title': title,
'thumbnail': thumbnail,
'duration': duration,
'formats': formats,
}

View File

@ -341,7 +341,7 @@ class NPOLiveIE(NPOBaseIE):
webpage = self._download_webpage(url, display_id) webpage = self._download_webpage(url, display_id)
live_id = self._search_regex( live_id = self._search_regex(
r'data-prid="([^"]+)"', webpage, 'live id') [r'media-id="([^"]+)"', r'data-prid="([^"]+)"'], webpage, 'live id')
return { return {
'_type': 'url_transparent', '_type': 'url_transparent',

View File

@ -12,47 +12,46 @@ from ..utils import (
class VeohIE(InfoExtractor): class VeohIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?veoh\.com/(?:watch|iphone/#_Watch)/(?P<id>(?:v|yapi-)[\da-zA-Z]+)' _VALID_URL = r'https?://(?:www\.)?veoh\.com/(?:watch|iphone/#_Watch)/(?P<id>(?:v|e|yapi-)[\da-zA-Z]+)'
_TESTS = [ _TESTS = [{
{ 'url': 'http://www.veoh.com/watch/v56314296nk7Zdmz3',
'url': 'http://www.veoh.com/watch/v56314296nk7Zdmz3', 'md5': '620e68e6a3cff80086df3348426c9ca3',
'md5': '620e68e6a3cff80086df3348426c9ca3', 'info_dict': {
'info_dict': { 'id': '56314296',
'id': '56314296', 'ext': 'mp4',
'ext': 'mp4', 'title': 'Straight Backs Are Stronger',
'title': 'Straight Backs Are Stronger', 'uploader': 'LUMOback',
'uploader': 'LUMOback', 'description': 'At LUMOback, we believe straight backs are stronger. The LUMOback Posture & Movement Sensor: It gently vibrates when you slouch, inspiring improved posture and mobility. Use the app to track your data and improve your posture over time. ',
'description': 'At LUMOback, we believe straight backs are stronger. The LUMOback Posture & Movement Sensor: It gently vibrates when you slouch, inspiring improved posture and mobility. Use the app to track your data and improve your posture over time. ',
},
}, },
{ }, {
'url': 'http://www.veoh.com/watch/v27701988pbTc4wzN?h1=Chile+workers+cover+up+to+avoid+skin+damage', 'url': 'http://www.veoh.com/watch/v27701988pbTc4wzN?h1=Chile+workers+cover+up+to+avoid+skin+damage',
'md5': '4a6ff84b87d536a6a71e6aa6c0ad07fa', 'md5': '4a6ff84b87d536a6a71e6aa6c0ad07fa',
'info_dict': { 'info_dict': {
'id': '27701988', 'id': '27701988',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Chile workers cover up to avoid skin damage', 'title': 'Chile workers cover up to avoid skin damage',
'description': 'md5:2bd151625a60a32822873efc246ba20d', 'description': 'md5:2bd151625a60a32822873efc246ba20d',
'uploader': 'afp-news', 'uploader': 'afp-news',
'duration': 123, 'duration': 123,
},
'skip': 'This video has been deleted.',
}, },
{ 'skip': 'This video has been deleted.',
'url': 'http://www.veoh.com/watch/v69525809F6Nc4frX', }, {
'md5': '4fde7b9e33577bab2f2f8f260e30e979', 'url': 'http://www.veoh.com/watch/v69525809F6Nc4frX',
'note': 'Embedded ooyala video', 'md5': '4fde7b9e33577bab2f2f8f260e30e979',
'info_dict': { 'note': 'Embedded ooyala video',
'id': '69525809', 'info_dict': {
'ext': 'mp4', 'id': '69525809',
'title': 'Doctors Alter Plan For Preteen\'s Weight Loss Surgery', 'ext': 'mp4',
'description': 'md5:f5a11c51f8fb51d2315bca0937526891', 'title': 'Doctors Alter Plan For Preteen\'s Weight Loss Surgery',
'uploader': 'newsy-videos', 'description': 'md5:f5a11c51f8fb51d2315bca0937526891',
}, 'uploader': 'newsy-videos',
'skip': 'This video has been deleted.',
}, },
] 'skip': 'This video has been deleted.',
}, {
'url': 'http://www.veoh.com/watch/e152215AJxZktGS',
'only_matching': True,
}]
def _extract_formats(self, source): def _extract_formats(self, source):
formats = [] formats = []

View File

@ -49,6 +49,10 @@ class VLiveIE(InfoExtractor):
}, },
}] }]
@classmethod
def suitable(cls, url):
return False if VLivePlaylistIE.suitable(url) else super(VLiveIE, cls).suitable(url)
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
@ -261,3 +265,54 @@ class VLiveChannelIE(InfoExtractor):
return self.playlist_result( return self.playlist_result(
entries, channel_code, channel_name) entries, channel_code, channel_name)
class VLivePlaylistIE(InfoExtractor):
IE_NAME = 'vlive:playlist'
_VALID_URL = r'https?://(?:(?:www|m)\.)?vlive\.tv/video/(?P<video_id>[0-9]+)/playlist/(?P<id>[0-9]+)'
_TEST = {
'url': 'http://www.vlive.tv/video/22867/playlist/22912',
'info_dict': {
'id': '22912',
'title': 'Valentine Day Message from TWICE'
},
'playlist_mincount': 9
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id, playlist_id = mobj.group('video_id', 'id')
VIDEO_URL_TEMPLATE = 'http://www.vlive.tv/video/%s'
if self._downloader.params.get('noplaylist'):
self.to_screen(
'Downloading just video %s because of --no-playlist' % video_id)
return self.url_result(
VIDEO_URL_TEMPLATE % video_id,
ie=VLiveIE.ie_key(), video_id=video_id)
self.to_screen(
'Downloading playlist %s - add --no-playlist to just download video'
% playlist_id)
webpage = self._download_webpage(
'http://www.vlive.tv/video/%s/playlist/%s'
% (video_id, playlist_id), playlist_id)
item_ids = self._parse_json(
self._search_regex(
r'playlistVideoSeqs\s*=\s*(\[[^]]+\])', webpage,
'playlist video seqs'),
playlist_id)
entries = [
self.url_result(
VIDEO_URL_TEMPLATE % item_id, ie=VLiveIE.ie_key(),
video_id=compat_str(item_id))
for item_id in item_ids]
playlist_name = self._html_search_regex(
r'<div[^>]+class="[^"]*multicam_playlist[^>]*>\s*<h3[^>]+>([^<]+)',
webpage, 'playlist title', fatal=False)
return self.playlist_result(entries, playlist_id, playlist_name)

View File

@ -542,7 +542,7 @@ class FFmpegFixupM3u8PP(FFmpegPostProcessor):
temp_filename = prepend_extension(filename, 'temp') temp_filename = prepend_extension(filename, 'temp')
options = ['-c', 'copy', '-f', 'mp4', '-bsf:a', 'aac_adtstoasc'] options = ['-c', 'copy', '-f', 'mp4', '-bsf:a', 'aac_adtstoasc']
self._downloader.to_screen('[ffmpeg] Fixing malformated aac bitstream in "%s"' % filename) self._downloader.to_screen('[ffmpeg] Fixing malformed AAC bitstream in "%s"' % filename)
self.run_ffmpeg(filename, temp_filename, options) self.run_ffmpeg(filename, temp_filename, options)
os.remove(encodeFilename(filename)) os.remove(encodeFilename(filename))

View File

@ -1,3 +1,3 @@
from __future__ import unicode_literals from __future__ import unicode_literals
__version__ = '2017.07.02' __version__ = '2017.07.09'