Merge remote-tracking branch 'upstream/master'

This commit is contained in:
Petr Novak 2017-11-25 02:19:39 +01:00
commit 3c211cff37
44 changed files with 635 additions and 269 deletions

View File

@ -6,8 +6,8 @@
--- ---
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.10.29*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. ### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.11.15*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.10.29** - [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.11.15**
### Before submitting an *issue* make sure you have: ### Before submitting an *issue* make sure you have:
- [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections - [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
@ -35,7 +35,7 @@ Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl
[debug] User config: [] [debug] User config: []
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
[debug] youtube-dl version 2017.10.29 [debug] youtube-dl version 2017.11.15
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
[debug] Proxy map: {} [debug] Proxy map: {}

View File

@ -1,3 +1,49 @@
version <unreleased>
Extractors
* [youku] Fix extraction; update ccode (#14815)
+ [JWPlatform] support iframes (#14828)
version 2017.11.15
Core
* [common] Skip Apple FairPlay m3u8 manifests (#14741)
* [YoutubeDL] Fix playlist range optimization for --playlist-items (#14740)
Extractors
* [vshare] Capture and output error message
* [vshare] Fix extraction (#14473)
* [crunchyroll] Extract old RTMP formats
* [tva] Fix extraction (#14736)
* [gamespot] Lower preference of HTTP formats (#14652)
* [instagram:user] Fix extraction (#14699)
* [ccma] Fix typo (#14730)
- Remove sensitive data from logging in messages
* [instagram:user] Fix extraction (#14699)
+ [gamespot] Add support for article URLs (#14652)
* [gamespot] Skip Brightcove Once HTTP formats (#14652)
* [cartoonnetwork] Update tokenizer_src (#14666)
+ [wsj] Recognize another URL pattern (#14704)
* [pandatv] Update API URL and sign format URLs (#14693)
* [crunchyroll] Use old login method (#11572)
version 2017.11.06
Core
+ [extractor/common] Add protocol for f4m formats
* [f4m] Prefer baseURL for relative URLs (#14660)
* [extractor/common] Respect URL query in _extract_wowza_formats (14645)
Extractors
+ [hotstar:playlist] Add support for playlists (#12465)
* [hotstar] Bypass geo restriction (#14672)
- [22tracks] Remove extractor (#11024, #14628)
+ [skysport] Sdd support ooyala videos protected with embed_token (#14641)
* [gamespot] Extract formats referenced with new data fields (#14652)
* [spankbang] Detect unavailable videos (#14644)
version 2017.10.29 version 2017.10.29
Core Core

View File

@ -3,8 +3,6 @@
- **1up.com** - **1up.com**
- **20min** - **20min**
- **220.ro** - **220.ro**
- **22tracks:genre**
- **22tracks:track**
- **24video** - **24video**
- **3qsdn**: 3Q SDN - **3qsdn**: 3Q SDN
- **3sat** - **3sat**
@ -342,6 +340,7 @@
- **HornBunny** - **HornBunny**
- **HotNewHipHop** - **HotNewHipHop**
- **HotStar** - **HotStar**
- **hotstar:playlist**
- **Howcast** - **Howcast**
- **HowStuffWorks** - **HowStuffWorks**
- **HRTi** - **HRTi**

View File

@ -948,7 +948,8 @@ class YoutubeDL(object):
report_download(n_entries) report_download(n_entries)
else: # iterable else: # iterable
if playlistitems: if playlistitems:
entries = make_playlistitems_entries(list(ie_entries)) entries = make_playlistitems_entries(list(itertools.islice(
ie_entries, 0, max(playlistitems))))
else: else:
entries = list(itertools.islice( entries = list(itertools.islice(
ie_entries, playliststart, playlistend)) ie_entries, playliststart, playlistend))

View File

@ -78,7 +78,7 @@ class AnimeOnDemandIE(InfoExtractor):
post_url = urljoin(self._LOGIN_URL, post_url) post_url = urljoin(self._LOGIN_URL, post_url)
response = self._download_webpage( response = self._download_webpage(
post_url, None, 'Logging in as %s' % username, post_url, None, 'Logging in',
data=urlencode_postdata(login_form), headers={ data=urlencode_postdata(login_form), headers={
'Referer': self._LOGIN_URL, 'Referer': self._LOGIN_URL,
}) })

View File

@ -87,7 +87,7 @@ class AtresPlayerIE(InfoExtractor):
self._LOGIN_URL, urlencode_postdata(login_form)) self._LOGIN_URL, urlencode_postdata(login_form))
request.add_header('Content-Type', 'application/x-www-form-urlencoded') request.add_header('Content-Type', 'application/x-www-form-urlencoded')
response = self._download_webpage( response = self._download_webpage(
request, None, 'Logging in as %s' % username) request, None, 'Logging in')
error = self._html_search_regex( error = self._html_search_regex(
r'(?s)<ul[^>]+class="[^"]*\blist_error\b[^"]*">(.+?)</ul>', r'(?s)<ul[^>]+class="[^"]*\blist_error\b[^"]*">(.+?)</ul>',

View File

@ -59,7 +59,7 @@ class BambuserIE(InfoExtractor):
self._LOGIN_URL, urlencode_postdata(login_form)) self._LOGIN_URL, urlencode_postdata(login_form))
request.add_header('Referer', self._LOGIN_URL) request.add_header('Referer', self._LOGIN_URL)
response = self._download_webpage( response = self._download_webpage(
request, None, 'Logging in as %s' % username) request, None, 'Logging in')
login_error = self._html_search_regex( login_error = self._html_search_regex(
r'(?s)<div class="messages error">(.+?)</div>', r'(?s)<div class="messages error">(.+?)</div>',

View File

@ -31,7 +31,7 @@ class CartoonNetworkIE(TurnerBaseIE):
'http://www.cartoonnetwork.com/video-seo-svc/episodeservices/getCvpPlaylist?networkName=CN2&' + query, video_id, { 'http://www.cartoonnetwork.com/video-seo-svc/episodeservices/getCvpPlaylist?networkName=CN2&' + query, video_id, {
'secure': { 'secure': {
'media_src': 'http://androidhls-secure.cdn.turner.com/toon/big', 'media_src': 'http://androidhls-secure.cdn.turner.com/toon/big',
'tokenizer_src': 'http://www.cartoonnetwork.com/cntv/mvpd/processors/services/token_ipadAdobe.do', 'tokenizer_src': 'https://token.vgtf.net/token/token_mobile',
}, },
}, { }, {
'url': url, 'url': url,

View File

@ -93,7 +93,7 @@ class CCMAIE(InfoExtractor):
'description': clean_html(informacio.get('descripcio')), 'description': clean_html(informacio.get('descripcio')),
'duration': duration, 'duration': duration,
'timestamp': timestamp, 'timestamp': timestamp,
'thumnails': thumbnails, 'thumbnails': thumbnails,
'subtitles': subtitles, 'subtitles': subtitles,
'formats': formats, 'formats': formats,
} }

View File

@ -592,19 +592,11 @@ class InfoExtractor(object):
if not encoding: if not encoding:
encoding = self._guess_encoding_from_content(content_type, webpage_bytes) encoding = self._guess_encoding_from_content(content_type, webpage_bytes)
if self._downloader.params.get('dump_intermediate_pages', False): if self._downloader.params.get('dump_intermediate_pages', False):
try: self.to_screen('Dumping request to ' + urlh.geturl())
url = url_or_request.get_full_url()
except AttributeError:
url = url_or_request
self.to_screen('Dumping request to ' + url)
dump = base64.b64encode(webpage_bytes).decode('ascii') dump = base64.b64encode(webpage_bytes).decode('ascii')
self._downloader.to_screen(dump) self._downloader.to_screen(dump)
if self._downloader.params.get('write_pages', False): if self._downloader.params.get('write_pages', False):
try: basen = '%s_%s' % (video_id, urlh.geturl())
url = url_or_request.get_full_url()
except AttributeError:
url = url_or_request
basen = '%s_%s' % (video_id, url)
if len(basen) > 240: if len(basen) > 240:
h = '___' + hashlib.md5(basen.encode('utf-8')).hexdigest() h = '___' + hashlib.md5(basen.encode('utf-8')).hexdigest()
basen = basen[:240 - len(h)] + h basen = basen[:240 - len(h)] + h
@ -1356,6 +1348,9 @@ class InfoExtractor(object):
if '#EXT-X-FAXS-CM:' in m3u8_doc: # Adobe Flash Access if '#EXT-X-FAXS-CM:' in m3u8_doc: # Adobe Flash Access
return [] return []
if re.search(r'#EXT-X-SESSION-KEY:.*?URI="skd://', m3u8_doc): # Apple FairPlay
return []
formats = [] formats = []
format_url = lambda u: ( format_url = lambda u: (

View File

@ -38,11 +38,32 @@ class CrunchyrollBaseIE(InfoExtractor):
_LOGIN_FORM = 'login_form' _LOGIN_FORM = 'login_form'
_NETRC_MACHINE = 'crunchyroll' _NETRC_MACHINE = 'crunchyroll'
def _call_rpc_api(self, method, video_id, note=None, data=None):
data = data or {}
data['req'] = 'RpcApi' + method
data = compat_urllib_parse_urlencode(data).encode('utf-8')
return self._download_xml(
'http://www.crunchyroll.com/xml/',
video_id, note, fatal=False, data=data, headers={
'Content-Type': 'application/x-www-form-urlencoded',
})
def _login(self): def _login(self):
(username, password) = self._get_login_info() (username, password) = self._get_login_info()
if username is None: if username is None:
return return
self._download_webpage(
'https://www.crunchyroll.com/?a=formhandler',
None, 'Logging in', 'Wrong login info',
data=urlencode_postdata({
'formname': 'RpcApiUser_Login',
'next_url': 'https://www.crunchyroll.com/acct/membership',
'name': username,
'password': password,
}))
'''
login_page = self._download_webpage( login_page = self._download_webpage(
self._LOGIN_URL, None, 'Downloading login page') self._LOGIN_URL, None, 'Downloading login page')
@ -86,6 +107,7 @@ class CrunchyrollBaseIE(InfoExtractor):
raise ExtractorError('Unable to login: %s' % error, expected=True) raise ExtractorError('Unable to login: %s' % error, expected=True)
raise ExtractorError('Unable to log in') raise ExtractorError('Unable to log in')
'''
def _real_initialize(self): def _real_initialize(self):
self._login() self._login()
@ -365,15 +387,19 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
def _get_subtitles(self, video_id, webpage): def _get_subtitles(self, video_id, webpage):
subtitles = {} subtitles = {}
for sub_id, sub_name in re.findall(r'\bssid=([0-9]+)"[^>]+?\btitle="([^"]+)', webpage): for sub_id, sub_name in re.findall(r'\bssid=([0-9]+)"[^>]+?\btitle="([^"]+)', webpage):
sub_page = self._download_webpage( sub_doc = self._call_rpc_api(
'http://www.crunchyroll.com/xml/?req=RpcApiSubtitle_GetXml&subtitle_script_id=' + sub_id, 'Subtitle_GetXml', video_id,
video_id, note='Downloading subtitles for ' + sub_name) 'Downloading subtitles for ' + sub_name, data={
id = self._search_regex(r'id=\'([0-9]+)', sub_page, 'subtitle_id', fatal=False) 'subtitle_script_id': sub_id,
iv = self._search_regex(r'<iv>([^<]+)', sub_page, 'subtitle_iv', fatal=False) })
data = self._search_regex(r'<data>([^<]+)', sub_page, 'subtitle_data', fatal=False) if not sub_doc:
if not id or not iv or not data:
continue continue
subtitle = self._decrypt_subtitles(data, iv, id).decode('utf-8') sid = sub_doc.get('id')
iv = xpath_text(sub_doc, 'iv', 'subtitle iv')
data = xpath_text(sub_doc, 'data', 'subtitle data')
if not sid or not iv or not data:
continue
subtitle = self._decrypt_subtitles(data, iv, sid).decode('utf-8')
lang_code = self._search_regex(r'lang_code=["\']([^"\']+)', subtitle, 'subtitle_lang_code', fatal=False) lang_code = self._search_regex(r'lang_code=["\']([^"\']+)', subtitle, 'subtitle_lang_code', fatal=False)
if not lang_code: if not lang_code:
continue continue
@ -444,15 +470,29 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
for fmt in available_fmts: for fmt in available_fmts:
stream_quality, stream_format = self._FORMAT_IDS[fmt] stream_quality, stream_format = self._FORMAT_IDS[fmt]
video_format = fmt + 'p' video_format = fmt + 'p'
streamdata_req = sanitized_Request( stream_infos = []
'http://www.crunchyroll.com/xml/?req=RpcApiVideoPlayer_GetStandardConfig&media_id=%s&video_format=%s&video_quality=%s' streamdata = self._call_rpc_api(
% (video_id, stream_format, stream_quality), 'VideoPlayer_GetStandardConfig', video_id,
compat_urllib_parse_urlencode({'current_page': url}).encode('utf-8')) 'Downloading media info for %s' % video_format, data={
streamdata_req.add_header('Content-Type', 'application/x-www-form-urlencoded') 'media_id': video_id,
streamdata = self._download_xml( 'video_format': stream_format,
streamdata_req, video_id, 'video_quality': stream_quality,
note='Downloading media info for %s' % video_format) 'current_page': url,
})
if streamdata:
stream_info = streamdata.find('./{default}preload/stream_info') stream_info = streamdata.find('./{default}preload/stream_info')
if stream_info:
stream_infos.append(stream_info)
stream_info = self._call_rpc_api(
'VideoEncode_GetStreamInfo', video_id,
'Downloading stream info for %s' % video_format, data={
'media_id': video_id,
'video_format': stream_format,
'video_encode_quality': stream_quality,
})
if stream_info:
stream_infos.append(stream_info)
for stream_info in stream_infos:
video_encode_id = xpath_text(stream_info, './video_encode_id') video_encode_id = xpath_text(stream_info, './video_encode_id')
if video_encode_id in video_encode_ids: if video_encode_id in video_encode_ids:
continue continue
@ -473,7 +513,6 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
metadata = stream_info.find('./metadata') metadata = stream_info.find('./metadata')
format_info = { format_info = {
'format': video_format, 'format': video_format,
'format_id': video_format,
'height': int_or_none(xpath_text(metadata, './height')), 'height': int_or_none(xpath_text(metadata, './height')),
'width': int_or_none(xpath_text(metadata, './width')), 'width': int_or_none(xpath_text(metadata, './width')),
} }
@ -486,23 +525,24 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
path='%s/%s' % (remove_end(parsed_video_url.path, '/'), video_file.split(':')[-1]))) path='%s/%s' % (remove_end(parsed_video_url.path, '/'), video_file.split(':')[-1])))
if self._is_valid_url(direct_video_url, video_id, video_format): if self._is_valid_url(direct_video_url, video_id, video_format):
format_info.update({ format_info.update({
'format_id': 'http-' + video_format,
'url': direct_video_url, 'url': direct_video_url,
}) })
formats.append(format_info) formats.append(format_info)
continue continue
format_info.update({ format_info.update({
'format_id': 'rtmp-' + video_format,
'url': video_url, 'url': video_url,
'play_path': video_file, 'play_path': video_file,
'ext': 'flv', 'ext': 'flv',
}) })
formats.append(format_info) formats.append(format_info)
self._sort_formats(formats) self._sort_formats(formats, ('height', 'width', 'tbr', 'fps'))
metadata = self._download_xml( metadata = self._call_rpc_api(
'http://www.crunchyroll.com/xml', video_id, 'VideoPlayer_GetMediaMetadata', video_id,
note='Downloading media info', query={ note='Downloading media info', data={
'req': 'RpcApiVideoPlayer_GetMediaMetadata',
'media_id': video_id, 'media_id': video_id,
}) })

View File

@ -54,7 +54,7 @@ class DramaFeverBaseIE(AMPIE):
request = sanitized_Request( request = sanitized_Request(
self._LOGIN_URL, urlencode_postdata(login_form)) self._LOGIN_URL, urlencode_postdata(login_form))
response = self._download_webpage( response = self._download_webpage(
request, None, 'Logging in as %s' % username) request, None, 'Logging in')
if all(logout_pattern not in response if all(logout_pattern not in response
for logout_pattern in ['href="/accounts/logout/"', '>Log out<']): for logout_pattern in ['href="/accounts/logout/"', '>Log out<']):

View File

@ -10,7 +10,7 @@ from ..utils import (
class DrTuberIE(InfoExtractor): class DrTuberIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?drtuber\.com/(?:video|embed)/(?P<id>\d+)(?:/(?P<display_id>[\w-]+))?' _VALID_URL = r'https?://(?:(?:www|m)\.)?drtuber\.com/(?:video|embed)/(?P<id>\d+)(?:/(?P<display_id>[\w-]+))?'
_TESTS = [{ _TESTS = [{
'url': 'http://www.drtuber.com/video/1740434/hot-perky-blonde-naked-golf', 'url': 'http://www.drtuber.com/video/1740434/hot-perky-blonde-naked-golf',
'md5': '93e680cf2536ad0dfb7e74d94a89facd', 'md5': '93e680cf2536ad0dfb7e74d94a89facd',
@ -28,6 +28,9 @@ class DrTuberIE(InfoExtractor):
}, { }, {
'url': 'http://www.drtuber.com/embed/489939', 'url': 'http://www.drtuber.com/embed/489939',
'only_matching': True, 'only_matching': True,
}, {
'url': 'http://m.drtuber.com/video/3893529/lingerie-blowjob-from-beautiful-teen',
'only_matching': True,
}] }]
@staticmethod @staticmethod

View File

@ -348,7 +348,6 @@ from .firstpost import FirstpostIE
from .firsttv import FirstTVIE from .firsttv import FirstTVIE
from .fivemin import FiveMinIE from .fivemin import FiveMinIE
from .fivetv import FiveTVIE from .fivetv import FiveTVIE
from .fktv import FKTVIE
from .flickr import FlickrIE from .flickr import FlickrIE
from .flipagram import FlipagramIE from .flipagram import FlipagramIE
from .folketinget import FolketingetIE from .folketinget import FolketingetIE
@ -432,7 +431,10 @@ from .hitbox import HitboxIE, HitboxLiveIE
from .hitrecord import HitRecordIE from .hitrecord import HitRecordIE
from .hornbunny import HornBunnyIE from .hornbunny import HornBunnyIE
from .hotnewhiphop import HotNewHipHopIE from .hotnewhiphop import HotNewHipHopIE
from .hotstar import HotStarIE from .hotstar import (
HotStarIE,
HotStarPlaylistIE,
)
from .howcast import HowcastIE from .howcast import HowcastIE
from .howstuffworks import HowStuffWorksIE from .howstuffworks import HowStuffWorksIE
from .hrti import ( from .hrti import (
@ -569,6 +571,7 @@ from .mangomolo import (
MangomoloLiveIE, MangomoloLiveIE,
) )
from .manyvids import ManyVidsIE from .manyvids import ManyVidsIE
from .massengeschmacktv import MassengeschmackTVIE
from .matchtv import MatchTVIE from .matchtv import MatchTVIE
from .mdr import MDRIE from .mdr import MDRIE
from .mediaset import MediasetIE from .mediaset import MediasetIE
@ -786,6 +789,7 @@ from .patreon import PatreonIE
from .pbs import PBSIE from .pbs import PBSIE
from .pearvideo import PearVideoIE from .pearvideo import PearVideoIE
from .people import PeopleIE from .people import PeopleIE
from .performgroup import PerformGroupIE
from .periscope import ( from .periscope import (
PeriscopeIE, PeriscopeIE,
PeriscopeUserIE, PeriscopeUserIE,

View File

@ -1,7 +1,10 @@
# coding: utf-8 # coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import re
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_etree_fromstring
from ..utils import ( from ..utils import (
xpath_element, xpath_element,
xpath_text, xpath_text,
@ -43,10 +46,15 @@ class FazIE(InfoExtractor):
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
description = self._og_search_description(webpage) description = self._og_search_description(webpage)
config_xml_url = self._search_regex( media = self._html_search_regex(
r'videoXMLURL\s*=\s*"([^"]+)', webpage, 'config xml url') r"data-videojs-media='([^']+)",
config = self._download_xml( webpage, 'media')
config_xml_url, video_id, 'Downloading config xml') if media == 'extern':
perform_url = self._search_regex(
r"<iframe[^>]+?src='((?:http:)?//player\.performgroup\.com/eplayer/eplayer\.html#/?[0-9a-f]{26}\.[0-9a-z]{26})",
webpage, 'perform url')
return self.url_result(perform_url)
config = compat_etree_fromstring(media)
encodings = xpath_element(config, 'ENCODINGS', 'encodings', True) encodings = xpath_element(config, 'ENCODINGS', 'encodings', True)
formats = [] formats = []
@ -55,12 +63,24 @@ class FazIE(InfoExtractor):
if encoding is not None: if encoding is not None:
encoding_url = xpath_text(encoding, 'FILENAME') encoding_url = xpath_text(encoding, 'FILENAME')
if encoding_url: if encoding_url:
formats.append({ tbr = xpath_text(encoding, 'AVERAGEBITRATE', 1000)
if tbr:
tbr = int_or_none(tbr.replace(',', '.'))
f = {
'url': encoding_url, 'url': encoding_url,
'format_id': code.lower(), 'format_id': code.lower(),
'quality': pref, 'quality': pref,
'tbr': int_or_none(xpath_text(encoding, 'AVERAGEBITRATE')), 'tbr': tbr,
'vcodec': xpath_text(encoding, 'CODEC'),
}
mobj = re.search(r'(\d+)x(\d+)_(\d+)\.mp4', encoding_url)
if mobj:
f.update({
'width': int(mobj.group(1)),
'height': int(mobj.group(2)),
'tbr': tbr or int(mobj.group(3)),
}) })
formats.append(f)
self._sort_formats(formats) self._sort_formats(formats)
return { return {

View File

@ -1,51 +0,0 @@
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
clean_html,
determine_ext,
js_to_json,
)
class FKTVIE(InfoExtractor):
IE_NAME = 'fernsehkritik.tv'
_VALID_URL = r'https?://(?:www\.)?fernsehkritik\.tv/folge-(?P<id>[0-9]+)(?:/.*)?'
_TEST = {
'url': 'http://fernsehkritik.tv/folge-1',
'md5': '21f0b0c99bce7d5b524eb1b17b1c6d79',
'info_dict': {
'id': '1',
'ext': 'mp4',
'title': 'Folge 1 vom 10. April 2007',
'thumbnail': r're:^https?://.*\.jpg$',
},
}
def _real_extract(self, url):
episode = self._match_id(url)
webpage = self._download_webpage(
'http://fernsehkritik.tv/folge-%s/play' % episode, episode)
title = clean_html(self._html_search_regex(
'<h3>([^<]+)</h3>', webpage, 'title'))
thumbnail = self._search_regex(r'POSTER\s*=\s*"([^"]+)', webpage, 'thumbnail', fatal=False)
sources = self._parse_json(self._search_regex(r'(?s)MEDIA\s*=\s*(\[.+?\]);', webpage, 'media'), episode, js_to_json)
formats = []
for source in sources:
furl = source.get('src')
if furl:
formats.append({
'url': furl,
'format_id': determine_ext(furl),
})
self._sort_formats(formats)
return {
'id': episode,
'title': title,
'formats': formats,
'thumbnail': thumbnail,
}

View File

@ -2,7 +2,6 @@
from __future__ import unicode_literals from __future__ import unicode_literals
from .anvato import AnvatoIE from .anvato import AnvatoIE
from ..utils import js_to_json
class FOX9IE(AnvatoIE): class FOX9IE(AnvatoIE):
@ -34,9 +33,9 @@ class FOX9IE(AnvatoIE):
video_id = self._parse_json( video_id = self._parse_json(
self._search_regex( self._search_regex(
r'AnvatoPlaylist\s*\(\s*(\[.+?\])\s*\)\s*;', r"this\.videosJson\s*=\s*'(\[.+?\])';",
webpage, 'anvato playlist'), webpage, 'anvato playlist'),
video_id, transform_source=js_to_json)[0]['video'] video_id)[0]['video']
return self._get_anvato_videos( return self._get_anvato_videos(
'anvato_epfox_app_web_prod_b3373168e12f423f41504f207000188daf88251b', 'anvato_epfox_app_web_prod_b3373168e12f423f41504f207000188daf88251b',

View File

@ -363,6 +363,6 @@ class CultureboxIE(FranceTVBaseInfoExtractor):
raise ExtractorError('Video %s is not available' % name, expected=True) raise ExtractorError('Video %s is not available' % name, expected=True)
video_id, catalogue = self._search_regex( video_id, catalogue = self._search_regex(
r'"http://videos\.francetv\.fr/video/([^@]+@[^"]+)"', webpage, 'video id').split('@') r'"https?://videos\.francetv\.fr/video/([^@]+@[^"]+)"', webpage, 'video id').split('@')
return self._extract_video(video_id, catalogue) return self._extract_video(video_id, catalogue)

View File

@ -57,7 +57,7 @@ class FunimationIE(InfoExtractor):
try: try:
data = self._download_json( data = self._download_json(
'https://prod-api-funimationnow.dadcdigital.com/api/auth/login/', 'https://prod-api-funimationnow.dadcdigital.com/api/auth/login/',
None, 'Logging in as %s' % username, data=urlencode_postdata({ None, 'Logging in', data=urlencode_postdata({
'username': username, 'username': username,
'password': password, 'password': password,
})) }))

View File

@ -14,7 +14,7 @@ from ..utils import (
class GameSpotIE(OnceIE): class GameSpotIE(OnceIE):
_VALID_URL = r'https?://(?:www\.)?gamespot\.com/videos/(?:[^/]+/\d+-|embed/)(?P<id>\d+)' _VALID_URL = r'https?://(?:www\.)?gamespot\.com/(?:video|article)s/(?:[^/]+/\d+-|embed/)(?P<id>\d+)'
_TESTS = [{ _TESTS = [{
'url': 'http://www.gamespot.com/videos/arma-3-community-guide-sitrep-i/2300-6410818/', 'url': 'http://www.gamespot.com/videos/arma-3-community-guide-sitrep-i/2300-6410818/',
'md5': 'b2a30deaa8654fcccd43713a6b6a4825', 'md5': 'b2a30deaa8654fcccd43713a6b6a4825',
@ -38,6 +38,9 @@ class GameSpotIE(OnceIE):
}, { }, {
'url': 'https://www.gamespot.com/videos/embed/6439218/', 'url': 'https://www.gamespot.com/videos/embed/6439218/',
'only_matching': True, 'only_matching': True,
}, {
'url': 'https://www.gamespot.com/articles/the-last-of-us-2-receives-new-ps4-trailer/1100-6454469/',
'only_matching': True,
}] }]
def _real_extract(self, url): def _real_extract(self, url):
@ -108,7 +111,8 @@ class GameSpotIE(OnceIE):
onceux_url = self._parse_json(unescapeHTML(onceux_json), page_id).get('metadataUri') onceux_url = self._parse_json(unescapeHTML(onceux_json), page_id).get('metadataUri')
if onceux_url: if onceux_url:
formats.extend(self._extract_once_formats(re.sub( formats.extend(self._extract_once_formats(re.sub(
r'https?://[^/]+', 'http://once.unicornmedia.com', onceux_url))) r'https?://[^/]+', 'http://once.unicornmedia.com', onceux_url),
http_formats_preference=-1))
if not formats: if not formats:
for quality in ['sd', 'hd']: for quality in ['sd', 'hd']:

View File

@ -102,6 +102,7 @@ from .joj import JojIE
from .megaphone import MegaphoneIE from .megaphone import MegaphoneIE
from .vzaar import VzaarIE from .vzaar import VzaarIE
from .channel9 import Channel9IE from .channel9 import Channel9IE
from .vshare import VShareIE
class GenericIE(InfoExtractor): class GenericIE(InfoExtractor):
@ -1098,9 +1099,9 @@ class GenericIE(InfoExtractor):
}, },
# jwplayer rtmp # jwplayer rtmp
{ {
'url': 'http://www.suffolk.edu/sjc/', 'url': 'http://www.suffolk.edu/sjc/live.php',
'info_dict': { 'info_dict': {
'id': 'sjclive', 'id': 'live',
'ext': 'flv', 'ext': 'flv',
'title': 'Massachusetts Supreme Judicial Court Oral Arguments', 'title': 'Massachusetts Supreme Judicial Court Oral Arguments',
'uploader': 'www.suffolk.edu', 'uploader': 'www.suffolk.edu',
@ -1108,7 +1109,7 @@ class GenericIE(InfoExtractor):
'params': { 'params': {
'skip_download': True, 'skip_download': True,
}, },
'skip': 'does not contain a video anymore', 'skip': 'Only has video a few mornings per month, see http://www.suffolk.edu/sjc/',
}, },
# Complex jwplayer # Complex jwplayer
{ {
@ -1135,6 +1136,19 @@ class GenericIE(InfoExtractor):
'skip_download': True, 'skip_download': True,
} }
}, },
{
# JWPlatform iframe
'url': 'https://www.mediaite.com/tv/dem-senator-claims-gary-cohn-faked-a-bad-connection-during-trump-call-to-get-him-off-the-phone/',
'md5': 'ca00a040364b5b439230e7ebfd02c4e9',
'info_dict': {
'id': 'O0c5JcKT',
'ext': 'mp4',
'upload_date': '20171122',
'timestamp': 1511366290,
'title': 'Dem Senator Claims Gary Cohn Faked a Bad Connection During Trump Call to Get Him Off the Phone',
},
'add_ie': [JWPlatformIE.ie_key()],
},
{ {
# Video.js embed, multiple formats # Video.js embed, multiple formats
'url': 'http://ortcam.com/solidworks-урок-6-настройка-чертежа_33f9b7351.html', 'url': 'http://ortcam.com/solidworks-урок-6-настройка-чертежа_33f9b7351.html',
@ -1921,6 +1935,16 @@ class GenericIE(InfoExtractor):
'title': 'Rescue Kit 14 Free Edition - Getting started', 'title': 'Rescue Kit 14 Free Edition - Getting started',
}, },
'playlist_count': 4, 'playlist_count': 4,
},
{
# vshare embed
'url': 'https://youtube-dl-demo.neocities.org/vshare.html',
'md5': '17b39f55b5497ae8b59f5fbce8e35886',
'info_dict': {
'id': '0f64ce6',
'title': 'vl14062007715967',
'ext': 'mp4',
}
} }
# { # {
# # TODO: find another test # # TODO: find another test
@ -2879,6 +2903,11 @@ class GenericIE(InfoExtractor):
return self.playlist_from_matches( return self.playlist_from_matches(
channel9_urls, video_id, video_title, ie=Channel9IE.ie_key()) channel9_urls, video_id, video_title, ie=Channel9IE.ie_key())
vshare_urls = VShareIE._extract_urls(webpage)
if vshare_urls:
return self.playlist_from_matches(
vshare_urls, video_id, video_title, ie=VShareIE.ie_key())
def merge_dicts(dict1, dict2): def merge_dicts(dict1, dict2):
merged = {} merged = {}
for k, v in dict1.items(): for k, v in dict1.items():

View File

@ -1,22 +1,47 @@
# coding: utf-8 # coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import re
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_str
from ..utils import ( from ..utils import (
ExtractorError,
determine_ext, determine_ext,
ExtractorError,
int_or_none, int_or_none,
) )
class HotStarIE(InfoExtractor): class HotStarBaseIE(InfoExtractor):
_GEO_COUNTRIES = ['IN']
def _download_json(self, *args, **kwargs):
response = super(HotStarBaseIE, self)._download_json(*args, **kwargs)
if response['resultCode'] != 'OK':
if kwargs.get('fatal'):
raise ExtractorError(
response['errorDescription'], expected=True)
return None
return response['resultObj']
def _download_content_info(self, content_id):
return self._download_json(
'https://account.hotstar.com/AVS/besc', content_id, query={
'action': 'GetAggregatedContentDetails',
'appVersion': '5.0.40',
'channel': 'PCTV',
'contentId': content_id,
})['contentInfo'][0]
class HotStarIE(HotStarBaseIE):
_VALID_URL = r'https?://(?:www\.)?hotstar\.com/(?:.+?[/-])?(?P<id>\d{10})' _VALID_URL = r'https?://(?:www\.)?hotstar\.com/(?:.+?[/-])?(?P<id>\d{10})'
_TESTS = [{ _TESTS = [{
'url': 'http://www.hotstar.com/on-air-with-aib--english-1000076273', 'url': 'http://www.hotstar.com/on-air-with-aib--english-1000076273',
'info_dict': { 'info_dict': {
'id': '1000076273', 'id': '1000076273',
'ext': 'mp4', 'ext': 'mp4',
'title': 'On Air With AIB - English', 'title': 'On Air With AIB',
'description': 'md5:c957d8868e9bc793ccb813691cc4c434', 'description': 'md5:c957d8868e9bc793ccb813691cc4c434',
'timestamp': 1447227000, 'timestamp': 1447227000,
'upload_date': '20151111', 'upload_date': '20151111',
@ -34,23 +59,11 @@ class HotStarIE(InfoExtractor):
'only_matching': True, 'only_matching': True,
}] }]
def _download_json(self, url_or_request, video_id, note='Downloading JSON metadata', fatal=True, query=None):
json_data = super(HotStarIE, self)._download_json(
url_or_request, video_id, note, fatal=fatal, query=query)
if json_data['resultCode'] != 'OK':
if fatal:
raise ExtractorError(json_data['errorDescription'])
return None
return json_data['resultObj']
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
video_data = self._download_json(
'http://account.hotstar.com/AVS/besc', video_id, query={ video_data = self._download_content_info(video_id)
'action': 'GetAggregatedContentDetails',
'channel': 'PCTV',
'contentId': video_id,
})['contentInfo'][0]
title = video_data['episodeTitle'] title = video_data['episodeTitle']
if video_data.get('encrypted') == 'Y': if video_data.get('encrypted') == 'Y':
@ -99,3 +112,51 @@ class HotStarIE(InfoExtractor):
'episode_number': int_or_none(video_data.get('episodeNumber')), 'episode_number': int_or_none(video_data.get('episodeNumber')),
'series': video_data.get('contentTitle'), 'series': video_data.get('contentTitle'),
} }
class HotStarPlaylistIE(HotStarBaseIE):
IE_NAME = 'hotstar:playlist'
_VALID_URL = r'(?P<url>https?://(?:www\.)?hotstar\.com/tv/[^/]+/(?P<content_id>\d+))/(?P<type>[^/]+)/(?P<id>\d+)'
_TESTS = [{
'url': 'http://www.hotstar.com/tv/pratidaan/14982/episodes/14812/9993',
'info_dict': {
'id': '14812',
},
'playlist_mincount': 75,
}, {
'url': 'http://www.hotstar.com/tv/pratidaan/14982/popular-clips/9998/9998',
'only_matching': True,
}]
_ITEM_TYPES = {
'episodes': 'EPISODE',
'popular-clips': 'CLIPS',
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
base_url = mobj.group('url')
content_id = mobj.group('content_id')
playlist_type = mobj.group('type')
content_info = self._download_content_info(content_id)
playlist_id = compat_str(content_info['categoryId'])
collection = self._download_json(
'https://search.hotstar.com/AVS/besc', playlist_id, query={
'action': 'SearchContents',
'appVersion': '5.0.40',
'channel': 'PCTV',
'moreFilters': 'series:%s;' % playlist_id,
'query': '*',
'searchOrder': 'last_broadcast_date desc,year desc,title asc',
'type': self._ITEM_TYPES.get(playlist_type, 'EPISODE'),
})
entries = [
self.url_result(
'%s/_/%s' % (base_url, video['contentId']),
ie=HotStarIE.ie_key(), video_id=video['contentId'])
for video in collection['response']['docs']
if video.get('contentId')]
return self.playlist_result(entries, playlist_id)

View File

@ -1,5 +1,6 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import itertools
import re import re
from .common import InfoExtractor from .common import InfoExtractor
@ -7,7 +8,6 @@ from ..compat import compat_str
from ..utils import ( from ..utils import (
get_element_by_attribute, get_element_by_attribute,
int_or_none, int_or_none,
limit_length,
lowercase_escape, lowercase_escape,
try_get, try_get,
) )
@ -130,13 +130,21 @@ class InstagramIE(InfoExtractor):
video_url = media.get('video_url') video_url = media.get('video_url')
height = int_or_none(media.get('dimensions', {}).get('height')) height = int_or_none(media.get('dimensions', {}).get('height'))
width = int_or_none(media.get('dimensions', {}).get('width')) width = int_or_none(media.get('dimensions', {}).get('width'))
description = media.get('caption') description = try_get(
media, lambda x: x['edge_media_to_caption']['edges'][0]['node']['text'],
compat_str) or media.get('caption')
thumbnail = media.get('display_src') thumbnail = media.get('display_src')
timestamp = int_or_none(media.get('date')) timestamp = int_or_none(media.get('taken_at_timestamp') or media.get('date'))
uploader = media.get('owner', {}).get('full_name') uploader = media.get('owner', {}).get('full_name')
uploader_id = media.get('owner', {}).get('username') uploader_id = media.get('owner', {}).get('username')
like_count = int_or_none(media.get('likes', {}).get('count'))
comment_count = int_or_none(media.get('comments', {}).get('count')) def get_count(key, kind):
return int_or_none(try_get(
media, (lambda x: x['edge_media_%s' % key]['count'],
lambda x: x['%ss' % kind]['count'])))
like_count = get_count('preview_like', 'like')
comment_count = get_count('to_comment', 'comment')
comments = [{ comments = [{
'author': comment.get('user', {}).get('username'), 'author': comment.get('user', {}).get('username'),
'author_id': comment.get('user', {}).get('id'), 'author_id': comment.get('user', {}).get('id'),
@ -212,7 +220,7 @@ class InstagramIE(InfoExtractor):
class InstagramUserIE(InfoExtractor): class InstagramUserIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?instagram\.com/(?P<username>[^/]{2,})/?(?:$|[?#])' _VALID_URL = r'https?://(?:www\.)?instagram\.com/(?P<id>[^/]{2,})/?(?:$|[?#])'
IE_DESC = 'Instagram user profile' IE_DESC = 'Instagram user profile'
IE_NAME = 'instagram:user' IE_NAME = 'instagram:user'
_TEST = { _TEST = {
@ -221,82 +229,79 @@ class InstagramUserIE(InfoExtractor):
'id': 'porsche', 'id': 'porsche',
'title': 'porsche', 'title': 'porsche',
}, },
'playlist_mincount': 2, 'playlist_count': 5,
'playlist': [{
'info_dict': {
'id': '614605558512799803_462752227',
'ext': 'mp4',
'title': '#Porsche Intelligent Performance.',
'thumbnail': r're:^https?://.*\.jpg',
'uploader': 'Porsche',
'uploader_id': 'porsche',
'timestamp': 1387486713,
'upload_date': '20131219',
},
}],
'params': { 'params': {
'extract_flat': True, 'extract_flat': True,
'skip_download': True, 'skip_download': True,
'playlistend': 5,
} }
} }
def _real_extract(self, url): def _entries(self, uploader_id):
mobj = re.match(self._VALID_URL, url) query = {
uploader_id = mobj.group('username') '__a': 1,
}
entries = [] def get_count(kind):
page_count = 0 return int_or_none(try_get(
media_url = 'http://instagram.com/%s/media' % uploader_id node, lambda x: x['%ss' % kind]['count']))
while True:
for page_num in itertools.count(1):
page = self._download_json( page = self._download_json(
media_url, uploader_id, 'https://instagram.com/%s/' % uploader_id, uploader_id,
note='Downloading page %d ' % (page_count + 1), note='Downloading page %d' % page_num,
) fatal=False, query=query)
page_count += 1 if not page:
break
for it in page['items']: nodes = try_get(page, lambda x: x['user']['media']['nodes'], list)
if it.get('type') != 'video': if not nodes:
break
max_id = None
for node in nodes:
node_id = node.get('id')
if node_id:
max_id = node_id
if node.get('__typename') != 'GraphVideo' and node.get('is_video') is not True:
continue
video_id = node.get('code')
if not video_id:
continue continue
like_count = int_or_none(it.get('likes', {}).get('count'))
user = it.get('user', {})
formats = [{ info = self.url_result(
'format_id': k, 'https://instagram.com/p/%s/' % video_id,
'height': v.get('height'), ie=InstagramIE.ie_key(), video_id=video_id)
'width': v.get('width'),
'url': v['url'],
} for k, v in it['videos'].items()]
self._sort_formats(formats)
thumbnails_el = it.get('images', {}) description = try_get(
thumbnail = thumbnails_el.get('thumbnail', {}).get('url') node, [lambda x: x['caption'], lambda x: x['text']['id']],
compat_str)
thumbnail = node.get('thumbnail_src') or node.get('display_src')
timestamp = int_or_none(node.get('date'))
# In some cases caption is null, which corresponds to None comment_count = get_count('comment')
# in python. As a result, it.get('caption', {}) gives None like_count = get_count('like')
title = (it.get('caption') or {}).get('text', it['id']) view_count = int_or_none(node.get('video_views'))
entries.append({ info.update({
'id': it['id'], 'description': description,
'title': limit_length(title, 80),
'formats': formats,
'thumbnail': thumbnail, 'thumbnail': thumbnail,
'webpage_url': it.get('link'), 'timestamp': timestamp,
'uploader': user.get('full_name'), 'comment_count': comment_count,
'uploader_id': user.get('username'),
'like_count': like_count, 'like_count': like_count,
'timestamp': int_or_none(it.get('created_time')), 'view_count': view_count,
}) })
if not page['items']: yield info
break
max_id = page['items'][-1]['id'].split('_')[0]
media_url = (
'http://instagram.com/%s/media?max_id=%s' % (
uploader_id, max_id))
return { if not max_id:
'_type': 'playlist', break
'entries': entries,
'id': uploader_id, query['max_id'] = max_id
'title': uploader_id,
} def _real_extract(self, url):
uploader_id = self._match_id(url)
return self.playlist_result(
self._entries(uploader_id), uploader_id, uploader_id)

View File

@ -24,7 +24,7 @@ class JWPlatformIE(InfoExtractor):
@staticmethod @staticmethod
def _extract_url(webpage): def _extract_url(webpage):
mobj = re.search( mobj = re.search(
r'<script[^>]+?src=["\'](?P<url>(?:https?:)?//content.jwplatform.com/players/[a-zA-Z0-9]{8})', r'<(?:script|iframe)[^>]+?src=["\'](?P<url>(?:https?:)?//content.jwplatform.com/players/[a-zA-Z0-9]{8})',
webpage) webpage)
if mobj: if mobj:
return mobj.group('url') return mobj.group('url')

View File

@ -114,7 +114,7 @@ class LivestreamIE(InfoExtractor):
smil_url = video_data.get('smil_url') smil_url = video_data.get('smil_url')
if smil_url: if smil_url:
formats.extend(self._extract_smil_formats(smil_url, video_id)) formats.extend(self._extract_smil_formats(smil_url, video_id, fatal=False))
m3u8_url = video_data.get('m3u8_url') m3u8_url = video_data.get('m3u8_url')
if m3u8_url: if m3u8_url:

View File

@ -0,0 +1,77 @@
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
clean_html,
determine_ext,
int_or_none,
js_to_json,
mimetype2ext,
parse_filesize,
)
class MassengeschmackTVIE(InfoExtractor):
IE_NAME = 'massengeschmack.tv'
_VALID_URL = r'https?://(?:www\.)?massengeschmack\.tv/play/(?P<id>[^?&#]+)'
_TEST = {
'url': 'https://massengeschmack.tv/play/fktv202',
'md5': 'a9e054db9c2b5a08f0a0527cc201e8d3',
'info_dict': {
'id': 'fktv202',
'ext': 'mp4',
'title': 'Fernsehkritik-TV - Folge 202',
},
}
def _real_extract(self, url):
episode = self._match_id(url)
webpage = self._download_webpage(url, episode)
title = clean_html(self._html_search_regex(
'<h3>([^<]+)</h3>', webpage, 'title'))
thumbnail = self._search_regex(r'POSTER\s*=\s*"([^"]+)', webpage, 'thumbnail', fatal=False)
sources = self._parse_json(self._search_regex(r'(?s)MEDIA\s*=\s*(\[.+?\]);', webpage, 'media'), episode, js_to_json)
formats = []
for source in sources:
furl = source.get('src')
if not furl:
continue
furl = self._proto_relative_url(furl)
ext = determine_ext(furl) or mimetype2ext(source.get('type'))
if ext == 'm3u8':
formats.extend(self._extract_m3u8_formats(
furl, episode, 'mp4', 'm3u8_native',
m3u8_id='hls', fatal=False))
else:
formats.append({
'url': furl,
'format_id': determine_ext(furl),
})
for (durl, format_id, width, height, filesize) in re.findall(r'''(?x)
<a[^>]+?href="(?P<url>(?:https:)?//[^"]+)".*?
<strong>(?P<format_id>.+?)</strong>.*?
<small>(?:(?P<width>\d+)x(?P<height>\d+))?\s+?\((?P<filesize>[\d,]+\s*[GM]iB)\)</small>
''', webpage):
formats.append({
'url': durl,
'format_id': format_id,
'width': int_or_none(width),
'height': int_or_none(height),
'filesize': parse_filesize(filesize),
'vcodec': 'none' if format_id.startswith('Audio') else None,
})
self._sort_formats(formats, ('width', 'height', 'filesize', 'tbr'))
return {
'id': episode,
'title': title,
'formats': formats,
'thumbnail': thumbnail,
}

View File

@ -70,7 +70,7 @@ class NocoIE(InfoExtractor):
return return
login = self._download_json( login = self._download_json(
self._LOGIN_URL, None, 'Logging in as %s' % username, self._LOGIN_URL, None, 'Logging in',
data=urlencode_postdata({ data=urlencode_postdata({
'a': 'login', 'a': 'login',
'cookie': '1', 'cookie': '1',

View File

@ -11,7 +11,7 @@ class OnceIE(InfoExtractor):
ADAPTIVE_URL_TEMPLATE = 'http://once.unicornmedia.com/now/master/playlist/%s/%s/%s/content.m3u8' ADAPTIVE_URL_TEMPLATE = 'http://once.unicornmedia.com/now/master/playlist/%s/%s/%s/content.m3u8'
PROGRESSIVE_URL_TEMPLATE = 'http://once.unicornmedia.com/now/media/progressive/%s/%s/%s/%s/content.mp4' PROGRESSIVE_URL_TEMPLATE = 'http://once.unicornmedia.com/now/media/progressive/%s/%s/%s/%s/content.mp4'
def _extract_once_formats(self, url): def _extract_once_formats(self, url, http_formats_preference=None):
domain_id, application_id, media_item_id = re.match( domain_id, application_id, media_item_id = re.match(
OnceIE._VALID_URL, url).groups() OnceIE._VALID_URL, url).groups()
formats = self._extract_m3u8_formats( formats = self._extract_m3u8_formats(
@ -35,6 +35,7 @@ class OnceIE(InfoExtractor):
'format_id': adaptive_format['format_id'].replace( 'format_id': adaptive_format['format_id'].replace(
'hls', 'http'), 'hls', 'http'),
'protocol': 'http', 'protocol': 'http',
'preference': http_formats_preference,
}) })
progressive_formats.append(progressive_format) progressive_formats.append(progressive_format)
self._check_formats(progressive_formats, media_item_id) self._check_formats(progressive_formats, media_item_id)

View File

@ -33,7 +33,7 @@ class PandaTVIE(InfoExtractor):
video_id = self._match_id(url) video_id = self._match_id(url)
config = self._download_json( config = self._download_json(
'https://www.panda.tv/api_room?roomid=%s' % video_id, video_id) 'https://www.panda.tv/api_room_v2?roomid=%s' % video_id, video_id)
error_code = config.get('errno', 0) error_code = config.get('errno', 0)
if error_code is not 0: if error_code is not 0:
@ -66,6 +66,11 @@ class PandaTVIE(InfoExtractor):
plflag1 = '4' plflag1 = '4'
live_panda = 'live_panda' if plflag0 < 1 else '' live_panda = 'live_panda' if plflag0 < 1 else ''
plflag_auth = self._parse_json(video_info['plflag_list'], video_id)
sign = plflag_auth['auth']['sign']
ts = plflag_auth['auth']['time']
rid = plflag_auth['auth']['rid']
quality_key = qualities(['OD', 'HD', 'SD']) quality_key = qualities(['OD', 'HD', 'SD'])
suffix = ['_small', '_mid', ''] suffix = ['_small', '_mid', '']
formats = [] formats = []
@ -77,8 +82,8 @@ class PandaTVIE(InfoExtractor):
continue continue
for pref, (ext, pl) in enumerate((('m3u8', '-hls'), ('flv', ''))): for pref, (ext, pl) in enumerate((('m3u8', '-hls'), ('flv', ''))):
formats.append({ formats.append({
'url': 'https://pl%s%s.live.panda.tv/live_panda/%s%s%s.%s' 'url': 'https://pl%s%s.live.panda.tv/live_panda/%s%s%s.%s?sign=%s&ts=%s&rid=%s'
% (pl, plflag1, room_key, live_panda, suffix[quality], ext), % (pl, plflag1, room_key, live_panda, suffix[quality], ext, sign, ts, rid),
'format_id': '%s-%s' % (k, ext), 'format_id': '%s-%s' % (k, ext),
'quality': quality, 'quality': quality,
'source_preference': pref, 'source_preference': pref,

View File

@ -67,7 +67,7 @@ class PatreonIE(InfoExtractor):
'https://www.patreon.com/processLogin', 'https://www.patreon.com/processLogin',
compat_urllib_parse_urlencode(login_form).encode('utf-8') compat_urllib_parse_urlencode(login_form).encode('utf-8')
) )
login_page = self._download_webpage(request, None, note='Logging in as %s' % username) login_page = self._download_webpage(request, None, note='Logging in')
if re.search(r'onLoginFailed', login_page): if re.search(r'onLoginFailed', login_page):
raise ExtractorError('Unable to login, incorrect username and/or password', expected=True) raise ExtractorError('Unable to login, incorrect username and/or password', expected=True)

View File

@ -0,0 +1,83 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import int_or_none
class PerformGroupIE(InfoExtractor):
_VALID_URL = r'https?://player\.performgroup\.com/eplayer(?:/eplayer\.html|\.js)#/?(?P<id>[0-9a-f]{26})\.(?P<auth_token>[0-9a-z]{26})'
_TESTS = [{
# http://www.faz.net/aktuell/sport/fussball/wm-2018-playoffs-schweiz-besiegt-nordirland-1-0-15286104.html
'url': 'http://player.performgroup.com/eplayer/eplayer.html#d478c41c5d192f56b9aa859de8.1w4crrej5w14e1ed4s1ce4ykab',
'md5': '259cb03d142e2e52471e8837ecacb29f',
'info_dict': {
'id': 'xgrwobuzumes1lwjxtcdpwgxd',
'ext': 'mp4',
'title': 'Liga MX: Keine Einsicht nach Horrorfoul',
'description': 'md5:7cd3b459c82725b021e046ab10bf1c5b',
'timestamp': 1511533477,
'upload_date': '20171124',
}
}]
def _call_api(self, service, auth_token, content_id, referer_url):
return self._download_json(
'http://ep3.performfeeds.com/ep%s/%s/%s/' % (service, auth_token, content_id),
content_id, headers={
'Referer': referer_url,
'Origin': 'http://player.performgroup.com',
}, query={
'_fmt': 'json',
})
def _real_extract(self, url):
player_id, auth_token = re.search(self._VALID_URL, url).groups()
bootstrap = self._call_api('bootstrap', auth_token, player_id, url)
video = bootstrap['config']['dataSource']['sourceItems'][0]['videos'][0]
video_id = video['uuid']
vod = self._call_api('vod', auth_token, video_id, url)
media = vod['videos']['video'][0]['media']
formats = []
hls_url = media.get('hls', {}).get('url')
if hls_url:
formats.extend(self._extract_m3u8_formats(hls_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False))
hds_url = media.get('hds', {}).get('url')
if hds_url:
formats.extend(self._extract_f4m_formats(hds_url + '?hdcore', video_id, f4m_id='hds', fatal=False))
for c in media.get('content', []):
c_url = c.get('url')
if not c_url:
continue
tbr = int_or_none(c.get('bitrate'), 1000)
format_id = 'http'
if tbr:
format_id += '-%d' % tbr
formats.append({
'format_id': format_id,
'url': c_url,
'tbr': tbr,
'width': int_or_none(c.get('width')),
'height': int_or_none(c.get('height')),
'filesize': int_or_none(c.get('fileSize')),
'vcodec': c.get('type'),
'fps': int_or_none(c.get('videoFrameRate')),
'vbr': int_or_none(c.get('videoRate'), 1000),
'abr': int_or_none(c.get('audioRate'), 1000),
})
self._sort_formats(formats)
return {
'id': video_id,
'title': video['title'],
'description': video.get('description'),
'thumbnail': video.get('poster'),
'duration': int_or_none(video.get('duration')),
'timestamp': int_or_none(video.get('publishedTime'), 1000),
'formats': formats,
}

View File

@ -116,7 +116,7 @@ class PluralsightIE(PluralsightBaseIE):
post_url = compat_urlparse.urljoin(self._LOGIN_URL, post_url) post_url = compat_urlparse.urljoin(self._LOGIN_URL, post_url)
response = self._download_webpage( response = self._download_webpage(
post_url, None, 'Logging in as %s' % username, post_url, None, 'Logging in',
data=urlencode_postdata(login_form), data=urlencode_postdata(login_form),
headers={'Content-Type': 'application/x-www-form-urlencoded'}) headers={'Content-Type': 'application/x-www-form-urlencoded'})

View File

@ -68,7 +68,7 @@ class RoosterTeethIE(InfoExtractor):
login_request = self._download_webpage( login_request = self._download_webpage(
self._LOGIN_URL, None, self._LOGIN_URL, None,
note='Logging in as %s' % username, note='Logging in',
data=urlencode_postdata(login_form), data=urlencode_postdata(login_form),
headers={ headers={
'Referer': self._LOGIN_URL, 'Referer': self._LOGIN_URL,

View File

@ -61,7 +61,7 @@ class SafariBaseIE(InfoExtractor):
request = sanitized_Request( request = sanitized_Request(
self._LOGIN_URL, urlencode_postdata(login_form), headers=headers) self._LOGIN_URL, urlencode_postdata(login_form), headers=headers)
login_page = self._download_webpage( login_page = self._download_webpage(
request, None, 'Logging in as %s' % username) request, None, 'Logging in')
if not is_logged(login_page): if not is_logged(login_page):
raise ExtractorError( raise ExtractorError(

View File

@ -7,7 +7,7 @@ from ..utils import ExtractorError
class SpankBangIE(InfoExtractor): class SpankBangIE(InfoExtractor):
_VALID_URL = r'https?://(?:(?:www|[a-z]{2})\.)?spankbang\.com/(?P<id>[\da-z]+)/video' _VALID_URL = r'https?://(?:(?:www|m|[a-z]{2})\.)?spankbang\.com/(?P<id>[\da-z]+)/video'
_TESTS = [{ _TESTS = [{
'url': 'http://spankbang.com/3vvn/video/fantasy+solo', 'url': 'http://spankbang.com/3vvn/video/fantasy+solo',
'md5': '1cc433e1d6aa14bc376535b8679302f7', 'md5': '1cc433e1d6aa14bc376535b8679302f7',
@ -15,7 +15,7 @@ class SpankBangIE(InfoExtractor):
'id': '3vvn', 'id': '3vvn',
'ext': 'mp4', 'ext': 'mp4',
'title': 'fantasy solo', 'title': 'fantasy solo',
'description': 'Watch fantasy solo free HD porn video - 05 minutes - dillion harper masturbates on a bed free adult movies.', 'description': 'Watch fantasy solo free HD porn video - 05 minutes - Babe,Masturbation,Solo,Toy - dillion harper masturbates on a bed free adult movies sexy clips.',
'thumbnail': r're:^https?://.*\.jpg$', 'thumbnail': r're:^https?://.*\.jpg$',
'uploader': 'silly2587', 'uploader': 'silly2587',
'age_limit': 18, 'age_limit': 18,
@ -28,6 +28,10 @@ class SpankBangIE(InfoExtractor):
# no uploader # no uploader
'url': 'http://spankbang.com/lklg/video/sex+with+anyone+wedding+edition+2', 'url': 'http://spankbang.com/lklg/video/sex+with+anyone+wedding+edition+2',
'only_matching': True, 'only_matching': True,
}, {
# mobile page
'url': 'http://m.spankbang.com/1o2de/video/can+t+remember+her+name',
'only_matching': True,
}] }]
def _real_extract(self, url): def _real_extract(self, url):

View File

@ -32,6 +32,8 @@ class TVAIE(InfoExtractor):
video_data = self._download_json( video_data = self._download_json(
'https://videos.tva.ca/proxy/item/_' + video_id, video_id, headers={ 'https://videos.tva.ca/proxy/item/_' + video_id, video_id, headers={
'Accept': 'application/json', 'Accept': 'application/json',
}, query={
'appId': '5955fc5f23eec60006c951f1',
}) })
def get_attribute(key): def get_attribute(key):

View File

@ -101,7 +101,7 @@ class TwitchBaseIE(InfoExtractor):
fail(clean_html(login_page)) fail(clean_html(login_page))
redirect_page, handle = login_step( redirect_page, handle = login_step(
login_page, handle, 'Logging in as %s' % username, { login_page, handle, 'Logging in', {
'username': username, 'username': username,
'password': password, 'password': password,
}) })

View File

@ -164,7 +164,7 @@ class UdemyIE(InfoExtractor):
}) })
response = self._download_webpage( response = self._download_webpage(
self._LOGIN_URL, None, 'Logging in as %s' % username, self._LOGIN_URL, None, 'Logging in',
data=urlencode_postdata(login_form), data=urlencode_postdata(login_form),
headers={ headers={
'Referer': self._ORIGIN_URL, 'Referer': self._ORIGIN_URL,

View File

@ -99,7 +99,7 @@ class VikiBaseIE(InfoExtractor):
login = self._call_api( login = self._call_api(
'sessions.json', None, 'sessions.json', None,
'Logging in as %s' % username, post_data=login_form) 'Logging in', post_data=login_form)
self._token = login.get('token') self._token = login.get('token')
if not self._token: if not self._token:

View File

@ -67,7 +67,7 @@ class VKBaseIE(InfoExtractor):
login_page = self._download_webpage( login_page = self._download_webpage(
'https://login.vk.com/?act=login', None, 'https://login.vk.com/?act=login', None,
note='Logging in as %s' % username, note='Logging in',
data=urlencode_postdata(login_form)) data=urlencode_postdata(login_form))
if re.search(r'onLoginFailed', login_page): if re.search(r'onLoginFailed', login_page):

View File

@ -1,14 +1,21 @@
# coding: utf-8 # coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import re
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_chr
from ..utils import (
decode_packed_codes,
ExtractorError,
)
class VShareIE(InfoExtractor): class VShareIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?vshare\.io/[dv]/(?P<id>[^/?#&]+)' _VALID_URL = r'https?://(?:www\.)?vshare\.io/[dv]/(?P<id>[^/?#&]+)'
_TESTS = [{ _TESTS = [{
'url': 'https://vshare.io/d/0f64ce6', 'url': 'https://vshare.io/d/0f64ce6',
'md5': '16d7b8fef58846db47419199ff1ab3e7', 'md5': '17b39f55b5497ae8b59f5fbce8e35886',
'info_dict': { 'info_dict': {
'id': '0f64ce6', 'id': '0f64ce6',
'title': 'vl14062007715967', 'title': 'vl14062007715967',
@ -19,20 +26,49 @@ class VShareIE(InfoExtractor):
'only_matching': True, 'only_matching': True,
}] }]
@staticmethod
def _extract_urls(webpage):
return re.findall(
r'<iframe[^>]+?src=["\'](?P<url>(?:https?:)?//(?:www\.)?vshare\.io/v/[^/?#&]+)',
webpage)
def _extract_packed(self, webpage):
packed = self._search_regex(
r'(eval\(function.+)', webpage, 'packed code')
unpacked = decode_packed_codes(packed)
digits = self._search_regex(r'\[((?:\d+,?)+)\]', unpacked, 'digits')
digits = [int(digit) for digit in digits.split(',')]
key_digit = self._search_regex(
r'fromCharCode\(.+?(\d+)\)}', unpacked, 'key digit')
chars = [compat_chr(d - int(key_digit)) for d in digits]
return ''.join(chars)
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
webpage = self._download_webpage( webpage = self._download_webpage(
'https://vshare.io/d/%s' % video_id, video_id) 'https://vshare.io/v/%s/width-650/height-430/1' % video_id,
video_id)
title = self._html_search_regex( title = self._html_search_regex(
r'(?s)<div id="root-container">(.+?)<br/>', webpage, 'title') r'<title>([^<]+)</title>', webpage, 'title')
video_url = self._search_regex( title = title.split(' - ')[0]
r'<a[^>]+href=(["\'])(?P<url>(?:https?:)?//.+?)\1[^>]*>[Cc]lick\s+here',
webpage, 'video url', group='url')
return { error = self._html_search_regex(
r'(?s)<div[^>]+\bclass=["\']xxx-error[^>]+>(.+?)</div', webpage,
'error', default=None)
if error:
raise ExtractorError(error, expected=True)
info = self._parse_html5_media_entries(
url, '<video>%s</video>' % self._extract_packed(webpage),
video_id)[0]
self._sort_formats(info['formats'])
info.update({
'id': video_id, 'id': video_id,
'title': title, 'title': title,
'url': video_url, })
}
return info

View File

@ -13,7 +13,7 @@ class WSJIE(InfoExtractor):
_VALID_URL = r'''(?x) _VALID_URL = r'''(?x)
(?: (?:
https?://video-api\.wsj\.com/api-video/player/iframe\.html\?.*?\bguid=| https?://video-api\.wsj\.com/api-video/player/iframe\.html\?.*?\bguid=|
https?://(?:www\.)?(?:wsj|barrons)\.com/video/[^/]+/| https?://(?:www\.)?(?:wsj|barrons)\.com/video/(?:[^/]+/)+|
wsj: wsj:
) )
(?P<id>[a-fA-F0-9-]{36}) (?P<id>[a-fA-F0-9-]{36})
@ -38,6 +38,9 @@ class WSJIE(InfoExtractor):
}, { }, {
'url': 'http://www.barrons.com/video/capitalism-deserves-more-respect-from-millennials/F301217E-6F46-43AE-B8D2-B7180D642EE9.html', 'url': 'http://www.barrons.com/video/capitalism-deserves-more-respect-from-millennials/F301217E-6F46-43AE-B8D2-B7180D642EE9.html',
'only_matching': True, 'only_matching': True,
}, {
'url': 'https://www.wsj.com/video/series/a-brief-history-of/the-modern-cell-carrier-how-we-got-here/980E2187-401D-48A1-B82B-1486CEE06CB9',
'only_matching': True,
}] }]
def _real_extract(self, url): def _real_extract(self, url):

View File

@ -154,7 +154,7 @@ class YoukuIE(InfoExtractor):
# request basic data # request basic data
basic_data_params = { basic_data_params = {
'vid': video_id, 'vid': video_id,
'ccode': '0402' if 'tudou.com' in url else '0401', 'ccode': '0502',
'client_ip': '192.168.1.1', 'client_ip': '192.168.1.1',
'utid': cna, 'utid': cna,
'client_ts': time.time() / 1000, 'client_ts': time.time() / 1000,

View File

@ -1,3 +1,3 @@
from __future__ import unicode_literals from __future__ import unicode_literals
__version__ = '2017.10.29' __version__ = '2017.11.15'