Merge branch 'master' of https://github.com/rg3/youtube-dl
This commit is contained in:
commit
8ddff4c81a
6
.github/ISSUE_TEMPLATE.md
vendored
6
.github/ISSUE_TEMPLATE.md
vendored
@ -6,8 +6,8 @@
|
|||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.07.17*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
|
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.07.28*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
|
||||||
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.07.17**
|
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.07.28**
|
||||||
|
|
||||||
### Before submitting an *issue* make sure you have:
|
### Before submitting an *issue* make sure you have:
|
||||||
- [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
|
- [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
|
||||||
@ -35,7 +35,7 @@ $ youtube-dl -v <your command line>
|
|||||||
[debug] User config: []
|
[debug] User config: []
|
||||||
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
|
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
|
||||||
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
|
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
|
||||||
[debug] youtube-dl version 2016.07.17
|
[debug] youtube-dl version 2016.07.28
|
||||||
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
|
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
|
||||||
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
|
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
|
||||||
[debug] Proxy map: {}
|
[debug] Proxy map: {}
|
||||||
|
1
AUTHORS
1
AUTHORS
@ -178,3 +178,4 @@ Artur Krysiak
|
|||||||
Jakub Adam Wieczorek
|
Jakub Adam Wieczorek
|
||||||
Aleksandar Topuzović
|
Aleksandar Topuzović
|
||||||
Nehal Patel
|
Nehal Patel
|
||||||
|
Rob van Bekkum
|
||||||
|
@ -1,6 +1,7 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import itertools
|
||||||
import json
|
import json
|
||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
@ -21,10 +22,15 @@ def format_size(bytes):
|
|||||||
|
|
||||||
total_bytes = 0
|
total_bytes = 0
|
||||||
|
|
||||||
releases = json.loads(compat_urllib_request.urlopen(
|
for page in itertools.count(1):
|
||||||
'https://api.github.com/repos/rg3/youtube-dl/releases').read().decode('utf-8'))
|
releases = json.loads(compat_urllib_request.urlopen(
|
||||||
|
'https://api.github.com/repos/rg3/youtube-dl/releases?page=%s' % page
|
||||||
|
).read().decode('utf-8'))
|
||||||
|
|
||||||
for release in releases:
|
if not releases:
|
||||||
|
break
|
||||||
|
|
||||||
|
for release in releases:
|
||||||
compat_print(release['name'])
|
compat_print(release['name'])
|
||||||
for asset in release['assets']:
|
for asset in release['assets']:
|
||||||
asset_name = asset['name']
|
asset_name = asset['name']
|
||||||
|
@ -46,6 +46,7 @@
|
|||||||
- **archive.org**: archive.org videos
|
- **archive.org**: archive.org videos
|
||||||
- **ARD**
|
- **ARD**
|
||||||
- **ARD:mediathek**
|
- **ARD:mediathek**
|
||||||
|
- **Arkena**
|
||||||
- **arte.tv**
|
- **arte.tv**
|
||||||
- **arte.tv:+7**
|
- **arte.tv:+7**
|
||||||
- **arte.tv:cinema**
|
- **arte.tv:cinema**
|
||||||
@ -141,7 +142,7 @@
|
|||||||
- **CollegeRama**
|
- **CollegeRama**
|
||||||
- **ComCarCoff**
|
- **ComCarCoff**
|
||||||
- **ComedyCentral**
|
- **ComedyCentral**
|
||||||
- **ComedyCentralShows**: The Daily Show / The Colbert Report
|
- **ComedyCentralTV**
|
||||||
- **CondeNast**: Condé Nast media group: Allure, Architectural Digest, Ars Technica, Bon Appétit, Brides, Condé Nast, Condé Nast Traveler, Details, Epicurious, GQ, Glamour, Golf Digest, SELF, Teen Vogue, The New Yorker, Vanity Fair, Vogue, W Magazine, WIRED
|
- **CondeNast**: Condé Nast media group: Allure, Architectural Digest, Ars Technica, Bon Appétit, Brides, Condé Nast, Condé Nast Traveler, Details, Epicurious, GQ, Glamour, Golf Digest, SELF, Teen Vogue, The New Yorker, Vanity Fair, Vogue, W Magazine, WIRED
|
||||||
- **Coub**
|
- **Coub**
|
||||||
- **Cracked**
|
- **Cracked**
|
||||||
@ -336,6 +337,8 @@
|
|||||||
- **kuwo:song**: 酷我音乐
|
- **kuwo:song**: 酷我音乐
|
||||||
- **la7.it**
|
- **la7.it**
|
||||||
- **Laola1Tv**
|
- **Laola1Tv**
|
||||||
|
- **Lcp**
|
||||||
|
- **LcpPlay**
|
||||||
- **Le**: 乐视网
|
- **Le**: 乐视网
|
||||||
- **Learnr**
|
- **Learnr**
|
||||||
- **Lecture2Go**
|
- **Lecture2Go**
|
||||||
@ -397,7 +400,6 @@
|
|||||||
- **MSN**
|
- **MSN**
|
||||||
- **MTV**
|
- **MTV**
|
||||||
- **mtv.de**
|
- **mtv.de**
|
||||||
- **mtviggy.com**
|
|
||||||
- **mtvservices:embedded**
|
- **mtvservices:embedded**
|
||||||
- **MuenchenTV**: münchen.tv
|
- **MuenchenTV**: münchen.tv
|
||||||
- **MusicPlayOn**
|
- **MusicPlayOn**
|
||||||
@ -437,7 +439,6 @@
|
|||||||
- **Newstube**
|
- **Newstube**
|
||||||
- **NextMedia**: 蘋果日報
|
- **NextMedia**: 蘋果日報
|
||||||
- **NextMediaActionNews**: 蘋果日報 - 動新聞
|
- **NextMediaActionNews**: 蘋果日報 - 動新聞
|
||||||
- **nextmovie.com**
|
|
||||||
- **nfb**: National Film Board of Canada
|
- **nfb**: National Film Board of Canada
|
||||||
- **nfl.com**
|
- **nfl.com**
|
||||||
- **nhl.com**
|
- **nhl.com**
|
||||||
@ -477,6 +478,7 @@
|
|||||||
- **NYTimes**
|
- **NYTimes**
|
||||||
- **NYTimesArticle**
|
- **NYTimesArticle**
|
||||||
- **ocw.mit.edu**
|
- **ocw.mit.edu**
|
||||||
|
- **OdaTV**
|
||||||
- **Odnoklassniki**
|
- **Odnoklassniki**
|
||||||
- **OktoberfestTV**
|
- **OktoberfestTV**
|
||||||
- **on.aol.com**
|
- **on.aol.com**
|
||||||
@ -694,6 +696,7 @@
|
|||||||
- **TNAFlix**
|
- **TNAFlix**
|
||||||
- **TNAFlixNetworkEmbed**
|
- **TNAFlixNetworkEmbed**
|
||||||
- **toggle**
|
- **toggle**
|
||||||
|
- **Tosh**: Tosh.0
|
||||||
- **tou.tv**
|
- **tou.tv**
|
||||||
- **Toypics**: Toypics user profile
|
- **Toypics**: Toypics user profile
|
||||||
- **ToypicsUser**: Toypics user profile
|
- **ToypicsUser**: Toypics user profile
|
||||||
|
@ -101,8 +101,6 @@ class TestAllURLsMatching(unittest.TestCase):
|
|||||||
self.assertMatch(':ytsubs', ['youtube:subscriptions'])
|
self.assertMatch(':ytsubs', ['youtube:subscriptions'])
|
||||||
self.assertMatch(':ytsubscriptions', ['youtube:subscriptions'])
|
self.assertMatch(':ytsubscriptions', ['youtube:subscriptions'])
|
||||||
self.assertMatch(':ythistory', ['youtube:history'])
|
self.assertMatch(':ythistory', ['youtube:history'])
|
||||||
self.assertMatch(':thedailyshow', ['ComedyCentralShows'])
|
|
||||||
self.assertMatch(':tds', ['ComedyCentralShows'])
|
|
||||||
|
|
||||||
def test_vimeo_matching(self):
|
def test_vimeo_matching(self):
|
||||||
self.assertMatch('https://vimeo.com/channels/tributes', ['vimeo:channel'])
|
self.assertMatch('https://vimeo.com/channels/tributes', ['vimeo:channel'])
|
||||||
|
@ -73,6 +73,7 @@ class ARDMediathekIE(InfoExtractor):
|
|||||||
'description': 'md5:c0c1c8048514deaed2a73b3a60eecacb',
|
'description': 'md5:c0c1c8048514deaed2a73b3a60eecacb',
|
||||||
'duration': 3287,
|
'duration': 3287,
|
||||||
},
|
},
|
||||||
|
'skip': 'Video is no longer available',
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _extract_media_info(self, media_info_url, webpage, video_id):
|
def _extract_media_info(self, media_info_url, webpage, video_id):
|
||||||
|
115
youtube_dl/extractor/arkena.py
Normal file
115
youtube_dl/extractor/arkena.py
Normal file
@ -0,0 +1,115 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
determine_ext,
|
||||||
|
float_or_none,
|
||||||
|
int_or_none,
|
||||||
|
mimetype2ext,
|
||||||
|
parse_iso8601,
|
||||||
|
strip_jsonp,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class ArkenaIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://play\.arkena\.com/(?:config|embed)/avp/v\d/player/media/(?P<id>[^/]+)/[^/]+/(?P<account_id>\d+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://play.arkena.com/embed/avp/v2/player/media/b41dda37-d8e7-4d3f-b1b5-9a9db578bdfe/1/129411',
|
||||||
|
'md5': 'b96f2f71b359a8ecd05ce4e1daa72365',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'b41dda37-d8e7-4d3f-b1b5-9a9db578bdfe',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Big Buck Bunny',
|
||||||
|
'description': 'Royalty free test video',
|
||||||
|
'timestamp': 1432816365,
|
||||||
|
'upload_date': '20150528',
|
||||||
|
'is_live': False,
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://play.arkena.com/config/avp/v2/player/media/b41dda37-d8e7-4d3f-b1b5-9a9db578bdfe/1/129411/?callbackMethod=jQuery1111023664739129262213_1469227693893',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'http://play.arkena.com/config/avp/v1/player/media/327336/darkmatter/131064/?callbackMethod=jQuery1111002221189684892677_1469227595972',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'http://play.arkena.com/embed/avp/v1/player/media/327336/darkmatter/131064/',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _extract_url(webpage):
|
||||||
|
# See https://support.arkena.com/display/PLAY/Ways+to+embed+your+video
|
||||||
|
mobj = re.search(
|
||||||
|
r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//play\.arkena\.com/embed/avp/.+?)\1',
|
||||||
|
webpage)
|
||||||
|
if mobj:
|
||||||
|
return mobj.group('url')
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
video_id = mobj.group('id')
|
||||||
|
account_id = mobj.group('account_id')
|
||||||
|
|
||||||
|
playlist = self._download_json(
|
||||||
|
'https://play.arkena.com/config/avp/v2/player/media/%s/0/%s/?callbackMethod=_'
|
||||||
|
% (video_id, account_id),
|
||||||
|
video_id, transform_source=strip_jsonp)['Playlist'][0]
|
||||||
|
|
||||||
|
media_info = playlist['MediaInfo']
|
||||||
|
title = media_info['Title']
|
||||||
|
media_files = playlist['MediaFiles']
|
||||||
|
|
||||||
|
is_live = False
|
||||||
|
formats = []
|
||||||
|
for kind_case, kind_formats in media_files.items():
|
||||||
|
kind = kind_case.lower()
|
||||||
|
for f in kind_formats:
|
||||||
|
f_url = f.get('Url')
|
||||||
|
if not f_url:
|
||||||
|
continue
|
||||||
|
is_live = f.get('Live') == 'true'
|
||||||
|
exts = (mimetype2ext(f.get('Type')), determine_ext(f_url, None))
|
||||||
|
if kind == 'm3u8' or 'm3u8' in exts:
|
||||||
|
formats.extend(self._extract_m3u8_formats(
|
||||||
|
f_url, video_id, 'mp4',
|
||||||
|
entry_protocol='m3u8' if is_live else 'm3u8_native',
|
||||||
|
m3u8_id=kind, fatal=False, live=is_live))
|
||||||
|
elif kind == 'flash' or 'f4m' in exts:
|
||||||
|
formats.extend(self._extract_f4m_formats(
|
||||||
|
f_url, video_id, f4m_id=kind, fatal=False))
|
||||||
|
elif kind == 'dash' or 'mpd' in exts:
|
||||||
|
formats.extend(self._extract_mpd_formats(
|
||||||
|
f_url, video_id, mpd_id=kind, fatal=False))
|
||||||
|
elif kind == 'silverlight':
|
||||||
|
# TODO: process when ism is supported (see
|
||||||
|
# https://github.com/rg3/youtube-dl/issues/8118)
|
||||||
|
continue
|
||||||
|
else:
|
||||||
|
tbr = float_or_none(f.get('Bitrate'), 1000)
|
||||||
|
formats.append({
|
||||||
|
'url': f_url,
|
||||||
|
'format_id': '%s-%d' % (kind, tbr) if tbr else kind,
|
||||||
|
'tbr': tbr,
|
||||||
|
})
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
description = media_info.get('Description')
|
||||||
|
video_id = media_info.get('VideoId') or video_id
|
||||||
|
timestamp = parse_iso8601(media_info.get('PublishDate'))
|
||||||
|
thumbnails = [{
|
||||||
|
'url': thumbnail['Url'],
|
||||||
|
'width': int_or_none(thumbnail.get('Size')),
|
||||||
|
} for thumbnail in (media_info.get('Poster') or []) if thumbnail.get('Url')]
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'description': description,
|
||||||
|
'timestamp': timestamp,
|
||||||
|
'is_live': is_live,
|
||||||
|
'thumbnails': thumbnails,
|
||||||
|
'formats': formats,
|
||||||
|
}
|
@ -12,7 +12,7 @@ class BigflixIE(InfoExtractor):
|
|||||||
_VALID_URL = r'https?://(?:www\.)?bigflix\.com/.+/(?P<id>[0-9]+)'
|
_VALID_URL = r'https?://(?:www\.)?bigflix\.com/.+/(?P<id>[0-9]+)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.bigflix.com/Hindi-movies/Action-movies/Singham-Returns/16537',
|
'url': 'http://www.bigflix.com/Hindi-movies/Action-movies/Singham-Returns/16537',
|
||||||
'md5': 'ec76aa9b1129e2e5b301a474e54fab74',
|
'md5': 'dc1b4aebb46e3a7077ecc0d9f43f61e3',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '16537',
|
'id': '16537',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
@ -26,7 +26,7 @@ class BigflixIE(InfoExtractor):
|
|||||||
'id': '16070',
|
'id': '16070',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Madarasapatinam',
|
'title': 'Madarasapatinam',
|
||||||
'description': 'md5:63b9b8ed79189c6f0418c26d9a3452ca',
|
'description': 'md5:9f0470b26a4ba8e824c823b5d95c2f6b',
|
||||||
'formats': 'mincount:2',
|
'formats': 'mincount:2',
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
|
@ -1,7 +1,6 @@
|
|||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import datetime
|
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
@ -10,8 +9,10 @@ from ..compat import (
|
|||||||
compat_urlparse,
|
compat_urlparse,
|
||||||
)
|
)
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
parse_iso8601,
|
clean_html,
|
||||||
|
parse_duration,
|
||||||
str_to_int,
|
str_to_int,
|
||||||
|
unified_strdate,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -26,14 +27,14 @@ class CamdemyIE(InfoExtractor):
|
|||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Ch1-1 Introduction, Signals (02-23-2012)',
|
'title': 'Ch1-1 Introduction, Signals (02-23-2012)',
|
||||||
'thumbnail': 're:^https?://.*\.jpg$',
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
'description': '',
|
|
||||||
'creator': 'ss11spring',
|
'creator': 'ss11spring',
|
||||||
|
'duration': 1591,
|
||||||
'upload_date': '20130114',
|
'upload_date': '20130114',
|
||||||
'timestamp': 1358154556,
|
|
||||||
'view_count': int,
|
'view_count': int,
|
||||||
}
|
}
|
||||||
}, {
|
}, {
|
||||||
# With non-empty description
|
# With non-empty description
|
||||||
|
# webpage returns "No permission or not login"
|
||||||
'url': 'http://www.camdemy.com/media/13885',
|
'url': 'http://www.camdemy.com/media/13885',
|
||||||
'md5': '4576a3bb2581f86c61044822adbd1249',
|
'md5': '4576a3bb2581f86c61044822adbd1249',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
@ -41,64 +42,71 @@ class CamdemyIE(InfoExtractor):
|
|||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'EverCam + Camdemy QuickStart',
|
'title': 'EverCam + Camdemy QuickStart',
|
||||||
'thumbnail': 're:^https?://.*\.jpg$',
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
'description': 'md5:050b62f71ed62928f8a35f1a41e186c9',
|
'description': 'md5:2a9f989c2b153a2342acee579c6e7db6',
|
||||||
'creator': 'evercam',
|
'creator': 'evercam',
|
||||||
'upload_date': '20140620',
|
'duration': 318,
|
||||||
'timestamp': 1403271569,
|
|
||||||
}
|
}
|
||||||
}, {
|
}, {
|
||||||
# External source
|
# External source (YouTube)
|
||||||
'url': 'http://www.camdemy.com/media/14842',
|
'url': 'http://www.camdemy.com/media/14842',
|
||||||
'md5': '50e1c3c3aa233d3d7b7daa2fa10b1cf7',
|
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '2vsYQzNIsJo',
|
'id': '2vsYQzNIsJo',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
|
'title': 'Excel 2013 Tutorial - How to add Password Protection',
|
||||||
|
'description': 'Excel 2013 Tutorial for Beginners - How to add Password Protection',
|
||||||
'upload_date': '20130211',
|
'upload_date': '20130211',
|
||||||
'uploader': 'Hun Kim',
|
'uploader': 'Hun Kim',
|
||||||
'description': 'Excel 2013 Tutorial for Beginners - How to add Password Protection',
|
|
||||||
'uploader_id': 'hunkimtutorials',
|
'uploader_id': 'hunkimtutorials',
|
||||||
'title': 'Excel 2013 Tutorial - How to add Password Protection',
|
},
|
||||||
}
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
page = self._download_webpage(url, video_id)
|
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
src_from = self._html_search_regex(
|
src_from = self._html_search_regex(
|
||||||
r"<div class='srcFrom'>Source: <a title='([^']+)'", page,
|
r"class=['\"]srcFrom['\"][^>]*>Sources?(?:\s+from)?\s*:\s*<a[^>]+(?:href|title)=(['\"])(?P<url>(?:(?!\1).)+)\1",
|
||||||
'external source', default=None)
|
webpage, 'external source', default=None, group='url')
|
||||||
if src_from:
|
if src_from:
|
||||||
return self.url_result(src_from)
|
return self.url_result(src_from)
|
||||||
|
|
||||||
oembed_obj = self._download_json(
|
oembed_obj = self._download_json(
|
||||||
'http://www.camdemy.com/oembed/?format=json&url=' + url, video_id)
|
'http://www.camdemy.com/oembed/?format=json&url=' + url, video_id)
|
||||||
|
|
||||||
|
title = oembed_obj['title']
|
||||||
thumb_url = oembed_obj['thumbnail_url']
|
thumb_url = oembed_obj['thumbnail_url']
|
||||||
video_folder = compat_urlparse.urljoin(thumb_url, 'video/')
|
video_folder = compat_urlparse.urljoin(thumb_url, 'video/')
|
||||||
file_list_doc = self._download_xml(
|
file_list_doc = self._download_xml(
|
||||||
compat_urlparse.urljoin(video_folder, 'fileList.xml'),
|
compat_urlparse.urljoin(video_folder, 'fileList.xml'),
|
||||||
video_id, 'Filelist XML')
|
video_id, 'Downloading filelist XML')
|
||||||
file_name = file_list_doc.find('./video/item/fileName').text
|
file_name = file_list_doc.find('./video/item/fileName').text
|
||||||
video_url = compat_urlparse.urljoin(video_folder, file_name)
|
video_url = compat_urlparse.urljoin(video_folder, file_name)
|
||||||
|
|
||||||
timestamp = parse_iso8601(self._html_search_regex(
|
# Some URLs return "No permission or not login" in a webpage despite being
|
||||||
r"<div class='title'>Posted\s*:</div>\s*<div class='value'>([^<>]+)<",
|
# freely available via oembed JSON URL (e.g. http://www.camdemy.com/media/13885)
|
||||||
page, 'creation time', fatal=False),
|
upload_date = unified_strdate(self._search_regex(
|
||||||
delimiter=' ', timezone=datetime.timedelta(hours=8))
|
r'>published on ([^<]+)<', webpage,
|
||||||
view_count = str_to_int(self._html_search_regex(
|
'upload date', default=None))
|
||||||
r"<div class='title'>Views\s*:</div>\s*<div class='value'>([^<>]+)<",
|
view_count = str_to_int(self._search_regex(
|
||||||
page, 'view count', fatal=False))
|
r'role=["\']viewCnt["\'][^>]*>([\d,.]+) views',
|
||||||
|
webpage, 'view count', default=None))
|
||||||
|
description = self._html_search_meta(
|
||||||
|
'description', webpage, default=None) or clean_html(
|
||||||
|
oembed_obj.get('description'))
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'url': video_url,
|
'url': video_url,
|
||||||
'title': oembed_obj['title'],
|
'title': title,
|
||||||
'thumbnail': thumb_url,
|
'thumbnail': thumb_url,
|
||||||
'description': self._html_search_meta('description', page),
|
'description': description,
|
||||||
'creator': oembed_obj['author_name'],
|
'creator': oembed_obj.get('author_name'),
|
||||||
'duration': oembed_obj['duration'],
|
'duration': parse_duration(oembed_obj.get('duration')),
|
||||||
'timestamp': timestamp,
|
'upload_date': upload_date,
|
||||||
'view_count': view_count,
|
'view_count': view_count,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -4,9 +4,11 @@ from __future__ import unicode_literals
|
|||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
from ..compat import compat_str
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
js_to_json,
|
js_to_json,
|
||||||
smuggle_url,
|
smuggle_url,
|
||||||
|
try_get,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -27,7 +29,20 @@ class CBCIE(InfoExtractor):
|
|||||||
},
|
},
|
||||||
'skip': 'Geo-restricted to Canada',
|
'skip': 'Geo-restricted to Canada',
|
||||||
}, {
|
}, {
|
||||||
# with clipId
|
# with clipId, feed available via tpfeed.cbc.ca and feed.theplatform.com
|
||||||
|
'url': 'http://www.cbc.ca/22minutes/videos/22-minutes-update/22-minutes-update-episode-4',
|
||||||
|
'md5': '162adfa070274b144f4fdc3c3b8207db',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '2414435309',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': '22 Minutes Update: What Not To Wear Quebec',
|
||||||
|
'description': "This week's latest Canadian top political story is What Not To Wear Quebec.",
|
||||||
|
'upload_date': '20131025',
|
||||||
|
'uploader': 'CBCC-NEW',
|
||||||
|
'timestamp': 1382717907,
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
# with clipId, feed only available via tpfeed.cbc.ca
|
||||||
'url': 'http://www.cbc.ca/archives/entry/1978-robin-williams-freestyles-on-90-minutes-live',
|
'url': 'http://www.cbc.ca/archives/entry/1978-robin-williams-freestyles-on-90-minutes-live',
|
||||||
'md5': '0274a90b51a9b4971fe005c63f592f12',
|
'md5': '0274a90b51a9b4971fe005c63f592f12',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
@ -83,6 +98,12 @@ class CBCIE(InfoExtractor):
|
|||||||
media_id = player_info.get('mediaId')
|
media_id = player_info.get('mediaId')
|
||||||
if not media_id:
|
if not media_id:
|
||||||
clip_id = player_info['clipId']
|
clip_id = player_info['clipId']
|
||||||
|
feed = self._download_json(
|
||||||
|
'http://tpfeed.cbc.ca/f/ExhSPC/vms_5akSXx4Ng_Zn?byCustomValue={:mpsReleases}{%s}' % clip_id,
|
||||||
|
clip_id, fatal=False)
|
||||||
|
if feed:
|
||||||
|
media_id = try_get(feed, lambda x: x['entries'][0]['guid'], compat_str)
|
||||||
|
if not media_id:
|
||||||
media_id = self._download_json(
|
media_id = self._download_json(
|
||||||
'http://feed.theplatform.com/f/h9dtGB/punlNGjMlc1F?fields=id&byContent=byReleases%3DbyId%253D' + clip_id,
|
'http://feed.theplatform.com/f/h9dtGB/punlNGjMlc1F?fields=id&byContent=byReleases%3DbyId%253D' + clip_id,
|
||||||
clip_id)['entries'][0]['id'].split('/')[-1]
|
clip_id)['entries'][0]['id'].split('/')[-1]
|
||||||
|
@ -1,5 +1,7 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
from .mtv import MTVIE
|
from .mtv import MTVIE
|
||||||
|
from ..utils import ExtractorError
|
||||||
|
|
||||||
|
|
||||||
class CMTIE(MTVIE):
|
class CMTIE(MTVIE):
|
||||||
@ -16,7 +18,27 @@ class CMTIE(MTVIE):
|
|||||||
'title': 'Garth Brooks - "The Call (featuring Trisha Yearwood)"',
|
'title': 'Garth Brooks - "The Call (featuring Trisha Yearwood)"',
|
||||||
'description': 'Blame It All On My Roots',
|
'description': 'Blame It All On My Roots',
|
||||||
},
|
},
|
||||||
|
'skip': 'Video not available',
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.cmt.com/videos/misc/1504699/still-the-king-ep-109-in-3-minutes.jhtml#id=1739908',
|
||||||
|
'md5': 'e61a801ca4a183a466c08bd98dccbb1c',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '1504699',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Still The King Ep. 109 in 3 Minutes',
|
||||||
|
'description': 'Relive or catch up with Still The King by watching this recap of season 1, episode 9. New episodes Sundays 9/8c.',
|
||||||
|
'timestamp': 1469421000.0,
|
||||||
|
'upload_date': '20160725',
|
||||||
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.cmt.com/shows/party-down-south/party-down-south-ep-407-gone-girl/1738172/playlist/#id=1738172',
|
'url': 'http://www.cmt.com/shows/party-down-south/party-down-south-ep-407-gone-girl/1738172/playlist/#id=1738172',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def _transform_rtmp_url(cls, rtmp_video_url):
|
||||||
|
if 'error_not_available.swf' in rtmp_video_url:
|
||||||
|
raise ExtractorError(
|
||||||
|
'%s said: video is not available' % cls.IE_NAME, expected=True)
|
||||||
|
|
||||||
|
return super(CMTIE, cls)._transform_rtmp_url(rtmp_video_url)
|
||||||
|
@ -1,17 +1,6 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
|
||||||
|
|
||||||
from .mtv import MTVServicesInfoExtractor
|
from .mtv import MTVServicesInfoExtractor
|
||||||
from ..compat import (
|
|
||||||
compat_str,
|
|
||||||
compat_urllib_parse_urlencode,
|
|
||||||
)
|
|
||||||
from ..utils import (
|
|
||||||
ExtractorError,
|
|
||||||
float_or_none,
|
|
||||||
unified_strdate,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class ComedyCentralIE(MTVServicesInfoExtractor):
|
class ComedyCentralIE(MTVServicesInfoExtractor):
|
||||||
@ -26,8 +15,10 @@ class ComedyCentralIE(MTVServicesInfoExtractor):
|
|||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'cef0cbb3-e776-4bc9-b62e-8016deccb354',
|
'id': 'cef0cbb3-e776-4bc9-b62e-8016deccb354',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'CC:Stand-Up|Greg Fitzsimmons: Life on Stage|Uncensored - Too Good of a Mother',
|
'title': 'CC:Stand-Up|August 18, 2013|1|0101|Uncensored - Too Good of a Mother',
|
||||||
'description': 'After a certain point, breastfeeding becomes c**kblocking.',
|
'description': 'After a certain point, breastfeeding becomes c**kblocking.',
|
||||||
|
'timestamp': 1376798400,
|
||||||
|
'upload_date': '20130818',
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.cc.com/shows/the-daily-show-with-trevor-noah/interviews/6yx39d/exclusive-rand-paul-extended-interview',
|
'url': 'http://www.cc.com/shows/the-daily-show-with-trevor-noah/interviews/6yx39d/exclusive-rand-paul-extended-interview',
|
||||||
@ -35,244 +26,43 @@ class ComedyCentralIE(MTVServicesInfoExtractor):
|
|||||||
}]
|
}]
|
||||||
|
|
||||||
|
|
||||||
class ComedyCentralShowsIE(MTVServicesInfoExtractor):
|
class ToshIE(MTVServicesInfoExtractor):
|
||||||
IE_DESC = 'The Daily Show / The Colbert Report'
|
IE_DESC = 'Tosh.0'
|
||||||
# urls can be abbreviations like :thedailyshow
|
_VALID_URL = r'^https?://tosh\.cc\.com/video-(?:clips|collections)/[^/]+/(?P<videotitle>[^/?#]+)'
|
||||||
# urls for episodes like:
|
_FEED_URL = 'http://tosh.cc.com/feeds/mrss'
|
||||||
# or urls for clips like: http://www.thedailyshow.com/watch/mon-december-10-2012/any-given-gun-day
|
|
||||||
# or: http://www.colbertnation.com/the-colbert-report-videos/421667/november-29-2012/moon-shattering-news
|
|
||||||
# or: http://www.colbertnation.com/the-colbert-report-collections/422008/festival-of-lights/79524
|
|
||||||
_VALID_URL = r'''(?x)^(:(?P<shortname>tds|thedailyshow)
|
|
||||||
|https?://(:www\.)?
|
|
||||||
(?P<showname>thedailyshow|thecolbertreport|tosh)\.(?:cc\.)?com/
|
|
||||||
((?:full-)?episodes/(?:[0-9a-z]{6}/)?(?P<episode>.*)|
|
|
||||||
(?P<clip>
|
|
||||||
(?:(?:guests/[^/]+|videos|video-(?:clips|playlists)|special-editions|news-team/[^/]+)/[^/]+/(?P<videotitle>[^/?#]+))
|
|
||||||
|(the-colbert-report-(videos|collections)/(?P<clipID>[0-9]+)/[^/]*/(?P<cntitle>.*?))
|
|
||||||
|(watch/(?P<date>[^/]*)/(?P<tdstitle>.*))
|
|
||||||
)|
|
|
||||||
(?P<interview>
|
|
||||||
extended-interviews/(?P<interID>[0-9a-z]+)/
|
|
||||||
(?:playlist_tds_extended_)?(?P<interview_title>[^/?#]*?)
|
|
||||||
(?:/[^/?#]?|[?#]|$))))
|
|
||||||
'''
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://thedailyshow.cc.com/watch/thu-december-13-2012/kristen-stewart',
|
|
||||||
'md5': '4e2f5cb088a83cd8cdb7756132f9739d',
|
|
||||||
'info_dict': {
|
|
||||||
'id': 'ab9ab3e7-5a98-4dbe-8b21-551dc0523d55',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'upload_date': '20121213',
|
|
||||||
'description': 'Kristen Stewart learns to let loose in "On the Road."',
|
|
||||||
'uploader': 'thedailyshow',
|
|
||||||
'title': 'thedailyshow kristen-stewart part 1',
|
|
||||||
}
|
|
||||||
}, {
|
|
||||||
'url': 'http://thedailyshow.cc.com/extended-interviews/b6364d/sarah-chayes-extended-interview',
|
|
||||||
'info_dict': {
|
|
||||||
'id': 'sarah-chayes-extended-interview',
|
|
||||||
'description': 'Carnegie Endowment Senior Associate Sarah Chayes discusses how corrupt institutions function throughout the world in her book "Thieves of State: Why Corruption Threatens Global Security."',
|
|
||||||
'title': 'thedailyshow Sarah Chayes Extended Interview',
|
|
||||||
},
|
|
||||||
'playlist': [
|
|
||||||
{
|
|
||||||
'info_dict': {
|
|
||||||
'id': '0baad492-cbec-4ec1-9e50-ad91c291127f',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'upload_date': '20150129',
|
|
||||||
'description': 'Carnegie Endowment Senior Associate Sarah Chayes discusses how corrupt institutions function throughout the world in her book "Thieves of State: Why Corruption Threatens Global Security."',
|
|
||||||
'uploader': 'thedailyshow',
|
|
||||||
'title': 'thedailyshow sarah-chayes-extended-interview part 1',
|
|
||||||
},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
'info_dict': {
|
|
||||||
'id': '1e4fb91b-8ce7-4277-bd7c-98c9f1bbd283',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'upload_date': '20150129',
|
|
||||||
'description': 'Carnegie Endowment Senior Associate Sarah Chayes discusses how corrupt institutions function throughout the world in her book "Thieves of State: Why Corruption Threatens Global Security."',
|
|
||||||
'uploader': 'thedailyshow',
|
|
||||||
'title': 'thedailyshow sarah-chayes-extended-interview part 2',
|
|
||||||
},
|
|
||||||
},
|
|
||||||
],
|
|
||||||
'params': {
|
|
||||||
'skip_download': True,
|
|
||||||
},
|
|
||||||
}, {
|
|
||||||
'url': 'http://thedailyshow.cc.com/extended-interviews/xm3fnq/andrew-napolitano-extended-interview',
|
|
||||||
'only_matching': True,
|
|
||||||
}, {
|
|
||||||
'url': 'http://thecolbertreport.cc.com/videos/29w6fx/-realhumanpraise-for-fox-news',
|
|
||||||
'only_matching': True,
|
|
||||||
}, {
|
|
||||||
'url': 'http://thecolbertreport.cc.com/videos/gh6urb/neil-degrasse-tyson-pt--1?xrs=eml_col_031114',
|
|
||||||
'only_matching': True,
|
|
||||||
}, {
|
|
||||||
'url': 'http://thedailyshow.cc.com/guests/michael-lewis/3efna8/exclusive---michael-lewis-extended-interview-pt--3',
|
|
||||||
'only_matching': True,
|
|
||||||
}, {
|
|
||||||
'url': 'http://thedailyshow.cc.com/episodes/sy7yv0/april-8--2014---denis-leary',
|
|
||||||
'only_matching': True,
|
|
||||||
}, {
|
|
||||||
'url': 'http://thecolbertreport.cc.com/episodes/8ase07/april-8--2014---jane-goodall',
|
|
||||||
'only_matching': True,
|
|
||||||
}, {
|
|
||||||
'url': 'http://thedailyshow.cc.com/video-playlists/npde3s/the-daily-show-19088-highlights',
|
|
||||||
'only_matching': True,
|
|
||||||
}, {
|
|
||||||
'url': 'http://thedailyshow.cc.com/video-playlists/t6d9sg/the-daily-show-20038-highlights/be3cwo',
|
|
||||||
'only_matching': True,
|
|
||||||
}, {
|
|
||||||
'url': 'http://thedailyshow.cc.com/special-editions/2l8fdb/special-edition---a-look-back-at-food',
|
|
||||||
'only_matching': True,
|
|
||||||
}, {
|
|
||||||
'url': 'http://thedailyshow.cc.com/news-team/michael-che/7wnfel/we-need-to-talk-about-israel',
|
|
||||||
'only_matching': True,
|
|
||||||
}, {
|
|
||||||
'url': 'http://tosh.cc.com/video-clips/68g93d/twitter-users-share-summer-plans',
|
'url': 'http://tosh.cc.com/video-clips/68g93d/twitter-users-share-summer-plans',
|
||||||
|
'info_dict': {
|
||||||
|
'description': 'Tosh asked fans to share their summer plans.',
|
||||||
|
'title': 'Twitter Users Share Summer Plans',
|
||||||
|
},
|
||||||
|
'playlist': [{
|
||||||
|
'md5': 'f269e88114c1805bb6d7653fecea9e06',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '90498ec2-ed00-11e0-aca6-0026b9414f30',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Tosh.0|June 9, 2077|2|211|Twitter Users Share Summer Plans',
|
||||||
|
'description': 'Tosh asked fans to share their summer plans.',
|
||||||
|
'thumbnail': 're:^https?://.*\.jpg',
|
||||||
|
# It's really reported to be published on year 2077
|
||||||
|
'upload_date': '20770610',
|
||||||
|
'timestamp': 3390510600,
|
||||||
|
'subtitles': {
|
||||||
|
'en': 'mincount:3',
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}]
|
||||||
|
}, {
|
||||||
|
'url': 'http://tosh.cc.com/video-collections/x2iz7k/just-plain-foul/m5q4fp',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
_available_formats = ['3500', '2200', '1700', '1200', '750', '400']
|
@classmethod
|
||||||
|
def _transform_rtmp_url(cls, rtmp_video_url):
|
||||||
_video_extensions = {
|
new_urls = super(ToshIE, cls)._transform_rtmp_url(rtmp_video_url)
|
||||||
'3500': 'mp4',
|
new_urls['rtmp'] = rtmp_video_url.replace('viacomccstrm', 'viacommtvstrm')
|
||||||
'2200': 'mp4',
|
return new_urls
|
||||||
'1700': 'mp4',
|
|
||||||
'1200': 'mp4',
|
|
||||||
'750': 'mp4',
|
|
||||||
'400': 'mp4',
|
|
||||||
}
|
|
||||||
_video_dimensions = {
|
|
||||||
'3500': (1280, 720),
|
|
||||||
'2200': (960, 540),
|
|
||||||
'1700': (768, 432),
|
|
||||||
'1200': (640, 360),
|
|
||||||
'750': (512, 288),
|
|
||||||
'400': (384, 216),
|
|
||||||
}
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
mobj = re.match(self._VALID_URL, url)
|
|
||||||
|
|
||||||
if mobj.group('shortname'):
|
|
||||||
return self.url_result('http://www.cc.com/shows/the-daily-show-with-trevor-noah/full-episodes')
|
|
||||||
|
|
||||||
if mobj.group('clip'):
|
|
||||||
if mobj.group('videotitle'):
|
|
||||||
epTitle = mobj.group('videotitle')
|
|
||||||
elif mobj.group('showname') == 'thedailyshow':
|
|
||||||
epTitle = mobj.group('tdstitle')
|
|
||||||
else:
|
|
||||||
epTitle = mobj.group('cntitle')
|
|
||||||
dlNewest = False
|
|
||||||
elif mobj.group('interview'):
|
|
||||||
epTitle = mobj.group('interview_title')
|
|
||||||
dlNewest = False
|
|
||||||
else:
|
|
||||||
dlNewest = not mobj.group('episode')
|
|
||||||
if dlNewest:
|
|
||||||
epTitle = mobj.group('showname')
|
|
||||||
else:
|
|
||||||
epTitle = mobj.group('episode')
|
|
||||||
show_name = mobj.group('showname')
|
|
||||||
|
|
||||||
webpage, htmlHandle = self._download_webpage_handle(url, epTitle)
|
|
||||||
if dlNewest:
|
|
||||||
url = htmlHandle.geturl()
|
|
||||||
mobj = re.match(self._VALID_URL, url, re.VERBOSE)
|
|
||||||
if mobj is None:
|
|
||||||
raise ExtractorError('Invalid redirected URL: ' + url)
|
|
||||||
if mobj.group('episode') == '':
|
|
||||||
raise ExtractorError('Redirected URL is still not specific: ' + url)
|
|
||||||
epTitle = (mobj.group('episode') or mobj.group('videotitle')).rpartition('/')[-1]
|
|
||||||
|
|
||||||
mMovieParams = re.findall('(?:<param name="movie" value="|var url = ")(http://media.mtvnservices.com/([^"]*(?:episode|video).*?:.*?))"', webpage)
|
|
||||||
if len(mMovieParams) == 0:
|
|
||||||
# The Colbert Report embeds the information in a without
|
|
||||||
# a URL prefix; so extract the alternate reference
|
|
||||||
# and then add the URL prefix manually.
|
|
||||||
|
|
||||||
altMovieParams = re.findall('data-mgid="([^"]*(?:episode|video|playlist).*?:.*?)"', webpage)
|
|
||||||
if len(altMovieParams) == 0:
|
|
||||||
raise ExtractorError('unable to find Flash URL in webpage ' + url)
|
|
||||||
else:
|
|
||||||
mMovieParams = [('http://media.mtvnservices.com/' + altMovieParams[0], altMovieParams[0])]
|
|
||||||
|
|
||||||
uri = mMovieParams[0][1]
|
|
||||||
# Correct cc.com in uri
|
|
||||||
uri = re.sub(r'(episode:[^.]+)(\.cc)?\.com', r'\1.com', uri)
|
|
||||||
|
|
||||||
index_url = 'http://%s.cc.com/feeds/mrss?%s' % (show_name, compat_urllib_parse_urlencode({'uri': uri}))
|
|
||||||
idoc = self._download_xml(
|
|
||||||
index_url, epTitle,
|
|
||||||
'Downloading show index', 'Unable to download episode index')
|
|
||||||
|
|
||||||
title = idoc.find('./channel/title').text
|
|
||||||
description = idoc.find('./channel/description').text
|
|
||||||
|
|
||||||
entries = []
|
|
||||||
item_els = idoc.findall('.//item')
|
|
||||||
for part_num, itemEl in enumerate(item_els):
|
|
||||||
upload_date = unified_strdate(itemEl.findall('./pubDate')[0].text)
|
|
||||||
thumbnail = itemEl.find('.//{http://search.yahoo.com/mrss/}thumbnail').attrib.get('url')
|
|
||||||
|
|
||||||
content = itemEl.find('.//{http://search.yahoo.com/mrss/}content')
|
|
||||||
duration = float_or_none(content.attrib.get('duration'))
|
|
||||||
mediagen_url = content.attrib['url']
|
|
||||||
guid = itemEl.find('./guid').text.rpartition(':')[-1]
|
|
||||||
|
|
||||||
cdoc = self._download_xml(
|
|
||||||
mediagen_url, epTitle,
|
|
||||||
'Downloading configuration for segment %d / %d' % (part_num + 1, len(item_els)))
|
|
||||||
|
|
||||||
turls = []
|
|
||||||
for rendition in cdoc.findall('.//rendition'):
|
|
||||||
finfo = (rendition.attrib['bitrate'], rendition.findall('./src')[0].text)
|
|
||||||
turls.append(finfo)
|
|
||||||
|
|
||||||
formats = []
|
|
||||||
for format, rtmp_video_url in turls:
|
|
||||||
w, h = self._video_dimensions.get(format, (None, None))
|
|
||||||
formats.append({
|
|
||||||
'format_id': 'vhttp-%s' % format,
|
|
||||||
'url': self._transform_rtmp_url(rtmp_video_url),
|
|
||||||
'ext': self._video_extensions.get(format, 'mp4'),
|
|
||||||
'height': h,
|
|
||||||
'width': w,
|
|
||||||
})
|
|
||||||
formats.append({
|
|
||||||
'format_id': 'rtmp-%s' % format,
|
|
||||||
'url': rtmp_video_url.replace('viacomccstrm', 'viacommtvstrm'),
|
|
||||||
'ext': self._video_extensions.get(format, 'mp4'),
|
|
||||||
'height': h,
|
|
||||||
'width': w,
|
|
||||||
})
|
|
||||||
self._sort_formats(formats)
|
|
||||||
|
|
||||||
subtitles = self._extract_subtitles(cdoc, guid)
|
|
||||||
|
|
||||||
virtual_id = show_name + ' ' + epTitle + ' part ' + compat_str(part_num + 1)
|
|
||||||
entries.append({
|
|
||||||
'id': guid,
|
|
||||||
'title': virtual_id,
|
|
||||||
'formats': formats,
|
|
||||||
'uploader': show_name,
|
|
||||||
'upload_date': upload_date,
|
|
||||||
'duration': duration,
|
|
||||||
'thumbnail': thumbnail,
|
|
||||||
'description': description,
|
|
||||||
'subtitles': subtitles,
|
|
||||||
})
|
|
||||||
|
|
||||||
return {
|
|
||||||
'_type': 'playlist',
|
|
||||||
'id': epTitle,
|
|
||||||
'entries': entries,
|
|
||||||
'title': show_name + ' ' + title,
|
|
||||||
'description': description,
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
class ComedyCentralTVIE(MTVServicesInfoExtractor):
|
class ComedyCentralTVIE(MTVServicesInfoExtractor):
|
||||||
|
@ -1481,6 +1481,13 @@ class InfoExtractor(object):
|
|||||||
compat_etree_fromstring(mpd.encode('utf-8')), mpd_id, mpd_base_url, formats_dict=formats_dict)
|
compat_etree_fromstring(mpd.encode('utf-8')), mpd_id, mpd_base_url, formats_dict=formats_dict)
|
||||||
|
|
||||||
def _parse_mpd_formats(self, mpd_doc, mpd_id=None, mpd_base_url='', formats_dict={}):
|
def _parse_mpd_formats(self, mpd_doc, mpd_id=None, mpd_base_url='', formats_dict={}):
|
||||||
|
"""
|
||||||
|
Parse formats from MPD manifest.
|
||||||
|
References:
|
||||||
|
1. MPEG-DASH Standard, ISO/IEC 23009-1:2014(E),
|
||||||
|
http://standards.iso.org/ittf/PubliclyAvailableStandards/c065274_ISO_IEC_23009-1_2014.zip
|
||||||
|
2. https://en.wikipedia.org/wiki/Dynamic_Adaptive_Streaming_over_HTTP
|
||||||
|
"""
|
||||||
if mpd_doc.get('type') == 'dynamic':
|
if mpd_doc.get('type') == 'dynamic':
|
||||||
return []
|
return []
|
||||||
|
|
||||||
@ -1513,8 +1520,16 @@ class InfoExtractor(object):
|
|||||||
s_e = segment_timeline.findall(_add_ns('S'))
|
s_e = segment_timeline.findall(_add_ns('S'))
|
||||||
if s_e:
|
if s_e:
|
||||||
ms_info['total_number'] = 0
|
ms_info['total_number'] = 0
|
||||||
|
ms_info['s'] = []
|
||||||
for s in s_e:
|
for s in s_e:
|
||||||
ms_info['total_number'] += 1 + int(s.get('r', '0'))
|
r = int(s.get('r', 0))
|
||||||
|
ms_info['total_number'] += 1 + r
|
||||||
|
ms_info['s'].append({
|
||||||
|
't': int(s.get('t', 0)),
|
||||||
|
# @d is mandatory (see [1, 5.3.9.6.2, Table 17, page 60])
|
||||||
|
'd': int(s.attrib['d']),
|
||||||
|
'r': r,
|
||||||
|
})
|
||||||
else:
|
else:
|
||||||
timescale = segment_template.get('timescale')
|
timescale = segment_template.get('timescale')
|
||||||
if timescale:
|
if timescale:
|
||||||
@ -1551,7 +1566,7 @@ class InfoExtractor(object):
|
|||||||
continue
|
continue
|
||||||
representation_attrib = adaptation_set.attrib.copy()
|
representation_attrib = adaptation_set.attrib.copy()
|
||||||
representation_attrib.update(representation.attrib)
|
representation_attrib.update(representation.attrib)
|
||||||
# According to page 41 of ISO/IEC 29001-1:2014, @mimeType is mandatory
|
# According to [1, 5.3.7.2, Table 9, page 41], @mimeType is mandatory
|
||||||
mime_type = representation_attrib['mimeType']
|
mime_type = representation_attrib['mimeType']
|
||||||
content_type = mime_type.split('/')[0]
|
content_type = mime_type.split('/')[0]
|
||||||
if content_type == 'text':
|
if content_type == 'text':
|
||||||
@ -1595,16 +1610,40 @@ class InfoExtractor(object):
|
|||||||
representation_ms_info['total_number'] = int(math.ceil(float(period_duration) / segment_duration))
|
representation_ms_info['total_number'] = int(math.ceil(float(period_duration) / segment_duration))
|
||||||
media_template = representation_ms_info['media_template']
|
media_template = representation_ms_info['media_template']
|
||||||
media_template = media_template.replace('$RepresentationID$', representation_id)
|
media_template = media_template.replace('$RepresentationID$', representation_id)
|
||||||
media_template = re.sub(r'\$(Number|Bandwidth)\$', r'%(\1)d', media_template)
|
media_template = re.sub(r'\$(Number|Bandwidth|Time)\$', r'%(\1)d', media_template)
|
||||||
media_template = re.sub(r'\$(Number|Bandwidth)%([^$]+)\$', r'%(\1)\2', media_template)
|
media_template = re.sub(r'\$(Number|Bandwidth|Time)%([^$]+)\$', r'%(\1)\2', media_template)
|
||||||
media_template.replace('$$', '$')
|
media_template.replace('$$', '$')
|
||||||
|
|
||||||
|
# As per [1, 5.3.9.4.4, Table 16, page 55] $Number$ and $Time$
|
||||||
|
# can't be used at the same time
|
||||||
|
if '%(Number' in media_template:
|
||||||
representation_ms_info['segment_urls'] = [
|
representation_ms_info['segment_urls'] = [
|
||||||
media_template % {
|
media_template % {
|
||||||
'Number': segment_number,
|
'Number': segment_number,
|
||||||
'Bandwidth': representation_attrib.get('bandwidth')}
|
'Bandwidth': representation_attrib.get('bandwidth'),
|
||||||
|
}
|
||||||
for segment_number in range(
|
for segment_number in range(
|
||||||
representation_ms_info['start_number'],
|
representation_ms_info['start_number'],
|
||||||
representation_ms_info['total_number'] + representation_ms_info['start_number'])]
|
representation_ms_info['total_number'] + representation_ms_info['start_number'])]
|
||||||
|
else:
|
||||||
|
representation_ms_info['segment_urls'] = []
|
||||||
|
segment_time = 0
|
||||||
|
|
||||||
|
def add_segment_url():
|
||||||
|
representation_ms_info['segment_urls'].append(
|
||||||
|
media_template % {
|
||||||
|
'Time': segment_time,
|
||||||
|
'Bandwidth': representation_attrib.get('bandwidth'),
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
for num, s in enumerate(representation_ms_info['s']):
|
||||||
|
segment_time = s.get('t') or segment_time
|
||||||
|
add_segment_url()
|
||||||
|
for r in range(s.get('r', 0)):
|
||||||
|
segment_time += s['d']
|
||||||
|
add_segment_url()
|
||||||
|
segment_time += s['d']
|
||||||
if 'segment_urls' in representation_ms_info:
|
if 'segment_urls' in representation_ms_info:
|
||||||
f.update({
|
f.update({
|
||||||
'segment_urls': representation_ms_info['segment_urls'],
|
'segment_urls': representation_ms_info['segment_urls'],
|
||||||
@ -1747,7 +1786,7 @@ class InfoExtractor(object):
|
|||||||
|
|
||||||
any_restricted = False
|
any_restricted = False
|
||||||
for tc in self.get_testcases(include_onlymatching=False):
|
for tc in self.get_testcases(include_onlymatching=False):
|
||||||
if 'playlist' in tc:
|
if tc.get('playlist', []):
|
||||||
tc = tc['playlist'][0]
|
tc = tc['playlist'][0]
|
||||||
is_restricted = age_restricted(
|
is_restricted = age_restricted(
|
||||||
tc.get('info_dict', {}).get('age_limit'), age_limit)
|
tc.get('info_dict', {}).get('age_limit'), age_limit)
|
||||||
|
@ -5,19 +5,20 @@ from .common import InfoExtractor
|
|||||||
from ..utils import (
|
from ..utils import (
|
||||||
int_or_none,
|
int_or_none,
|
||||||
determine_protocol,
|
determine_protocol,
|
||||||
|
unescapeHTML,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class DailyMailIE(InfoExtractor):
|
class DailyMailIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?dailymail\.co\.uk/video/[^/]+/video-(?P<id>[0-9]+)'
|
_VALID_URL = r'https?://(?:www\.)?dailymail\.co\.uk/video/[^/]+/video-(?P<id>[0-9]+)'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://www.dailymail.co.uk/video/sciencetech/video-1288527/Turn-video-impressionist-masterpiece.html',
|
'url': 'http://www.dailymail.co.uk/video/tvshowbiz/video-1295863/The-Mountain-appears-sparkling-water-ad-Heavy-Bubbles.html',
|
||||||
'md5': '2f639d446394f53f3a33658b518b6615',
|
'md5': 'f6129624562251f628296c3a9ffde124',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '1288527',
|
'id': '1295863',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Turn any video into an impressionist masterpiece',
|
'title': 'The Mountain appears in sparkling water ad for \'Heavy Bubbles\'',
|
||||||
'description': 'md5:88ddbcb504367987b2708bb38677c9d2',
|
'description': 'md5:a93d74b6da172dd5dc4d973e0b766a84',
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -26,7 +27,7 @@ class DailyMailIE(InfoExtractor):
|
|||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
video_data = self._parse_json(self._search_regex(
|
video_data = self._parse_json(self._search_regex(
|
||||||
r"data-opts='({.+?})'", webpage, 'video data'), video_id)
|
r"data-opts='({.+?})'", webpage, 'video data'), video_id)
|
||||||
title = video_data['title']
|
title = unescapeHTML(video_data['title'])
|
||||||
video_sources = self._download_json(video_data.get(
|
video_sources = self._download_json(video_data.get(
|
||||||
'sources', {}).get('url') or 'http://www.dailymail.co.uk/api/player/%s/video-sources.json' % video_id, video_id)
|
'sources', {}).get('url') or 'http://www.dailymail.co.uk/api/player/%s/video-sources.json' % video_id, video_id)
|
||||||
|
|
||||||
@ -55,7 +56,7 @@ class DailyMailIE(InfoExtractor):
|
|||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': title,
|
'title': title,
|
||||||
'description': video_data.get('descr'),
|
'description': unescapeHTML(video_data.get('descr')),
|
||||||
'thumbnail': video_data.get('poster') or video_data.get('thumbnail'),
|
'thumbnail': video_data.get('poster') or video_data.get('thumbnail'),
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
}
|
}
|
||||||
|
@ -62,11 +62,9 @@ class DCNBaseIE(InfoExtractor):
|
|||||||
r'file\s*:\s*"https?(://[^"]+)/playlist.m3u8',
|
r'file\s*:\s*"https?(://[^"]+)/playlist.m3u8',
|
||||||
r'<a[^>]+href="rtsp(://[^"]+)"'
|
r'<a[^>]+href="rtsp(://[^"]+)"'
|
||||||
], webpage, 'format url')
|
], webpage, 'format url')
|
||||||
# TODO: Current DASH formats are broken - $Time$ pattern in
|
formats.extend(self._extract_mpd_formats(
|
||||||
# <SegmentTemplate> not implemented yet
|
format_url_base + '/manifest.mpd',
|
||||||
# formats.extend(self._extract_mpd_formats(
|
video_id, mpd_id='dash', fatal=False))
|
||||||
# format_url_base + '/manifest.mpd',
|
|
||||||
# video_id, mpd_id='dash', fatal=False))
|
|
||||||
formats.extend(self._extract_m3u8_formats(
|
formats.extend(self._extract_m3u8_formats(
|
||||||
format_url_base + '/playlist.m3u8', video_id, 'mp4',
|
format_url_base + '/playlist.m3u8', video_id, 'mp4',
|
||||||
m3u8_entry_protocol, m3u8_id='hls', fatal=False))
|
m3u8_entry_protocol, m3u8_id='hls', fatal=False))
|
||||||
|
@ -4,19 +4,23 @@ from __future__ import unicode_literals
|
|||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
from ..compat import compat_str
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
|
encode_base_n,
|
||||||
|
ExtractorError,
|
||||||
|
int_or_none,
|
||||||
parse_duration,
|
parse_duration,
|
||||||
str_to_int,
|
str_to_int,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class EpornerIE(InfoExtractor):
|
class EpornerIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?eporner\.com/hd-porn/(?P<id>\w+)/(?P<display_id>[\w-]+)'
|
_VALID_URL = r'https?://(?:www\.)?eporner\.com/hd-porn/(?P<id>\w+)(?:/(?P<display_id>[\w-]+))?'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.eporner.com/hd-porn/95008/Infamous-Tiffany-Teen-Strip-Tease-Video/',
|
'url': 'http://www.eporner.com/hd-porn/95008/Infamous-Tiffany-Teen-Strip-Tease-Video/',
|
||||||
'md5': '39d486f046212d8e1b911c52ab4691f8',
|
'md5': '39d486f046212d8e1b911c52ab4691f8',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '95008',
|
'id': 'qlDUmNsj6VS',
|
||||||
'display_id': 'Infamous-Tiffany-Teen-Strip-Tease-Video',
|
'display_id': 'Infamous-Tiffany-Teen-Strip-Tease-Video',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Infamous Tiffany Teen Strip Tease Video',
|
'title': 'Infamous Tiffany Teen Strip Tease Video',
|
||||||
@ -28,34 +32,72 @@ class EpornerIE(InfoExtractor):
|
|||||||
# New (May 2016) URL layout
|
# New (May 2016) URL layout
|
||||||
'url': 'http://www.eporner.com/hd-porn/3YRUtzMcWn0/Star-Wars-XXX-Parody/',
|
'url': 'http://www.eporner.com/hd-porn/3YRUtzMcWn0/Star-Wars-XXX-Parody/',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.eporner.com/hd-porn/3YRUtzMcWn0',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
video_id = mobj.group('id')
|
video_id = mobj.group('id')
|
||||||
display_id = mobj.group('display_id')
|
display_id = mobj.group('display_id') or video_id
|
||||||
|
|
||||||
webpage = self._download_webpage(url, display_id)
|
webpage, urlh = self._download_webpage_handle(url, display_id)
|
||||||
title = self._html_search_regex(
|
|
||||||
r'<title>(.*?) - EPORNER', webpage, 'title')
|
|
||||||
|
|
||||||
redirect_url = 'http://www.eporner.com/config5/%s' % video_id
|
video_id = self._match_id(compat_str(urlh.geturl()))
|
||||||
player_code = self._download_webpage(
|
|
||||||
redirect_url, display_id, note='Downloading player config')
|
|
||||||
|
|
||||||
sources = self._search_regex(
|
hash = self._search_regex(
|
||||||
r'(?s)sources\s*:\s*\[\s*({.+?})\s*\]', player_code, 'sources')
|
r'hash\s*:\s*["\']([\da-f]{32})', webpage, 'hash')
|
||||||
|
|
||||||
|
title = self._og_search_title(webpage, default=None) or self._html_search_regex(
|
||||||
|
r'<title>(.+?) - EPORNER', webpage, 'title')
|
||||||
|
|
||||||
|
# Reverse engineered from vjs.js
|
||||||
|
def calc_hash(s):
|
||||||
|
return ''.join((encode_base_n(int(s[lb:lb + 8], 16), 36) for lb in range(0, 32, 8)))
|
||||||
|
|
||||||
|
video = self._download_json(
|
||||||
|
'http://www.eporner.com/xhr/video/%s' % video_id,
|
||||||
|
display_id, note='Downloading video JSON',
|
||||||
|
query={
|
||||||
|
'hash': calc_hash(hash),
|
||||||
|
'device': 'generic',
|
||||||
|
'domain': 'www.eporner.com',
|
||||||
|
'fallback': 'false',
|
||||||
|
})
|
||||||
|
|
||||||
|
if video.get('available') is False:
|
||||||
|
raise ExtractorError(
|
||||||
|
'%s said: %s' % (self.IE_NAME, video['message']), expected=True)
|
||||||
|
|
||||||
|
sources = video['sources']
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
for video_url, format_id in re.findall(r'file\s*:\s*"([^"]+)",\s*label\s*:\s*"([^"]+)"', sources):
|
for kind, formats_dict in sources.items():
|
||||||
fmt = {
|
if not isinstance(formats_dict, dict):
|
||||||
'url': video_url,
|
continue
|
||||||
|
for format_id, format_dict in formats_dict.items():
|
||||||
|
if not isinstance(format_dict, dict):
|
||||||
|
continue
|
||||||
|
src = format_dict.get('src')
|
||||||
|
if not isinstance(src, compat_str) or not src.startswith('http'):
|
||||||
|
continue
|
||||||
|
if kind == 'hls':
|
||||||
|
formats.extend(self._extract_m3u8_formats(
|
||||||
|
src, display_id, 'mp4', entry_protocol='m3u8_native',
|
||||||
|
m3u8_id=kind, fatal=False))
|
||||||
|
else:
|
||||||
|
height = int_or_none(self._search_regex(
|
||||||
|
r'(\d+)[pP]', format_id, 'height', default=None))
|
||||||
|
fps = int_or_none(self._search_regex(
|
||||||
|
r'(\d+)fps', format_id, 'fps', default=None))
|
||||||
|
|
||||||
|
formats.append({
|
||||||
|
'url': src,
|
||||||
'format_id': format_id,
|
'format_id': format_id,
|
||||||
}
|
'height': height,
|
||||||
m = re.search(r'^(\d+)', format_id)
|
'fps': fps,
|
||||||
if m:
|
})
|
||||||
fmt['height'] = int(m.group(1))
|
|
||||||
formats.append(fmt)
|
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
duration = parse_duration(self._html_search_meta('duration', webpage))
|
duration = parse_duration(self._html_search_meta('duration', webpage))
|
||||||
|
@ -44,6 +44,7 @@ from .appletrailers import (
|
|||||||
AppleTrailersSectionIE,
|
AppleTrailersSectionIE,
|
||||||
)
|
)
|
||||||
from .archiveorg import ArchiveOrgIE
|
from .archiveorg import ArchiveOrgIE
|
||||||
|
from .arkena import ArkenaIE
|
||||||
from .ard import (
|
from .ard import (
|
||||||
ARDIE,
|
ARDIE,
|
||||||
ARDMediathekIE,
|
ARDMediathekIE,
|
||||||
@ -158,8 +159,8 @@ from .coub import CoubIE
|
|||||||
from .collegerama import CollegeRamaIE
|
from .collegerama import CollegeRamaIE
|
||||||
from .comedycentral import (
|
from .comedycentral import (
|
||||||
ComedyCentralIE,
|
ComedyCentralIE,
|
||||||
ComedyCentralShowsIE,
|
|
||||||
ComedyCentralTVIE,
|
ComedyCentralTVIE,
|
||||||
|
ToshIE,
|
||||||
)
|
)
|
||||||
from .comcarcoff import ComCarCoffIE
|
from .comcarcoff import ComCarCoffIE
|
||||||
from .commonmistakes import CommonMistakesIE, UnicodeBOMIE
|
from .commonmistakes import CommonMistakesIE, UnicodeBOMIE
|
||||||
@ -397,6 +398,10 @@ from .kuwo import (
|
|||||||
)
|
)
|
||||||
from .la7 import LA7IE
|
from .la7 import LA7IE
|
||||||
from .laola1tv import Laola1TvIE
|
from .laola1tv import Laola1TvIE
|
||||||
|
from .lcp import (
|
||||||
|
LcpPlayIE,
|
||||||
|
LcpIE,
|
||||||
|
)
|
||||||
from .learnr import LearnrIE
|
from .learnr import LearnrIE
|
||||||
from .lecture2go import Lecture2GoIE
|
from .lecture2go import Lecture2GoIE
|
||||||
from .lemonde import LemondeIE
|
from .lemonde import LemondeIE
|
||||||
@ -475,7 +480,6 @@ from .msn import MSNIE
|
|||||||
from .mtv import (
|
from .mtv import (
|
||||||
MTVIE,
|
MTVIE,
|
||||||
MTVServicesEmbeddedIE,
|
MTVServicesEmbeddedIE,
|
||||||
MTVIggyIE,
|
|
||||||
MTVDEIE,
|
MTVDEIE,
|
||||||
)
|
)
|
||||||
from .muenchentv import MuenchenTVIE
|
from .muenchentv import MuenchenTVIE
|
||||||
@ -525,7 +529,6 @@ from .nextmedia import (
|
|||||||
NextMediaActionNewsIE,
|
NextMediaActionNewsIE,
|
||||||
AppleDailyIE,
|
AppleDailyIE,
|
||||||
)
|
)
|
||||||
from .nextmovie import NextMovieIE
|
|
||||||
from .nfb import NFBIE
|
from .nfb import NFBIE
|
||||||
from .nfl import NFLIE
|
from .nfl import NFLIE
|
||||||
from .nhl import (
|
from .nhl import (
|
||||||
|
@ -27,7 +27,7 @@ class FacebookIE(InfoExtractor):
|
|||||||
_VALID_URL = r'''(?x)
|
_VALID_URL = r'''(?x)
|
||||||
(?:
|
(?:
|
||||||
https?://
|
https?://
|
||||||
(?:\w+\.)?facebook\.com/
|
(?:[\w-]+\.)?facebook\.com/
|
||||||
(?:[^#]*?\#!/)?
|
(?:[^#]*?\#!/)?
|
||||||
(?:
|
(?:
|
||||||
(?:
|
(?:
|
||||||
@ -127,6 +127,9 @@ class FacebookIE(InfoExtractor):
|
|||||||
}, {
|
}, {
|
||||||
'url': 'https://www.facebook.com/groups/164828000315060/permalink/764967300301124/',
|
'url': 'https://www.facebook.com/groups/164828000315060/permalink/764967300301124/',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://zh-hk.facebook.com/peoplespower/videos/1135894589806027/',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
|
@ -62,6 +62,7 @@ from .videomore import VideomoreIE
|
|||||||
from .googledrive import GoogleDriveIE
|
from .googledrive import GoogleDriveIE
|
||||||
from .jwplatform import JWPlatformIE
|
from .jwplatform import JWPlatformIE
|
||||||
from .digiteka import DigitekaIE
|
from .digiteka import DigitekaIE
|
||||||
|
from .arkena import ArkenaIE
|
||||||
from .instagram import InstagramIE
|
from .instagram import InstagramIE
|
||||||
from .liveleak import LiveLeakIE
|
from .liveleak import LiveLeakIE
|
||||||
from .threeqsdn import ThreeQSDNIE
|
from .threeqsdn import ThreeQSDNIE
|
||||||
@ -70,6 +71,7 @@ from .vessel import VesselIE
|
|||||||
from .kaltura import KalturaIE
|
from .kaltura import KalturaIE
|
||||||
from .eagleplatform import EaglePlatformIE
|
from .eagleplatform import EaglePlatformIE
|
||||||
from .facebook import FacebookIE
|
from .facebook import FacebookIE
|
||||||
|
from .soundcloud import SoundcloudIE
|
||||||
|
|
||||||
|
|
||||||
class GenericIE(InfoExtractor):
|
class GenericIE(InfoExtractor):
|
||||||
@ -473,7 +475,7 @@ class GenericIE(InfoExtractor):
|
|||||||
'url': 'http://www.vestifinance.ru/articles/25753',
|
'url': 'http://www.vestifinance.ru/articles/25753',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '25753',
|
'id': '25753',
|
||||||
'title': 'Вести Экономика ― Прямые трансляции с Форума-выставки "Госзаказ-2013"',
|
'title': 'Прямые трансляции с Форума-выставки "Госзаказ-2013"',
|
||||||
},
|
},
|
||||||
'playlist': [{
|
'playlist': [{
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
@ -640,6 +642,8 @@ class GenericIE(InfoExtractor):
|
|||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Key and Peele|October 10, 2012|2|203|Liam Neesons - Uncensored',
|
'title': 'Key and Peele|October 10, 2012|2|203|Liam Neesons - Uncensored',
|
||||||
'description': 'Two valets share their love for movie star Liam Neesons.',
|
'description': 'Two valets share their love for movie star Liam Neesons.',
|
||||||
|
'timestamp': 1349922600,
|
||||||
|
'upload_date': '20121011',
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
# YouTube embed via <data-embed-url="">
|
# YouTube embed via <data-embed-url="">
|
||||||
@ -781,6 +785,15 @@ class GenericIE(InfoExtractor):
|
|||||||
'upload_date': '20141029',
|
'upload_date': '20141029',
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
# Soundcloud multiple embeds
|
||||||
|
{
|
||||||
|
'url': 'http://www.guitarplayer.com/lessons/1014/legato-workout-one-hour-to-more-fluid-performance---tab/52809',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '52809',
|
||||||
|
'title': 'Guitar Essentials: Legato Workout—One-Hour to Fluid Performance | TAB + AUDIO',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 7,
|
||||||
|
},
|
||||||
# Livestream embed
|
# Livestream embed
|
||||||
{
|
{
|
||||||
'url': 'http://www.esa.int/Our_Activities/Space_Science/Rosetta/Philae_comet_touch-down_webcast',
|
'url': 'http://www.esa.int/Our_Activities/Space_Science/Rosetta/Philae_comet_touch-down_webcast',
|
||||||
@ -856,6 +869,7 @@ class GenericIE(InfoExtractor):
|
|||||||
'description': 'md5:601cb790edd05908957dae8aaa866465',
|
'description': 'md5:601cb790edd05908957dae8aaa866465',
|
||||||
'upload_date': '20150220',
|
'upload_date': '20150220',
|
||||||
},
|
},
|
||||||
|
'skip': 'All The Daily Show URLs now redirect to http://www.cc.com/shows/',
|
||||||
},
|
},
|
||||||
# jwplayer YouTube
|
# jwplayer YouTube
|
||||||
{
|
{
|
||||||
@ -1342,6 +1356,23 @@ class GenericIE(InfoExtractor):
|
|||||||
},
|
},
|
||||||
'add_ie': ['Vimeo'],
|
'add_ie': ['Vimeo'],
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
'url': 'https://support.arkena.com/display/PLAY/Ways+to+embed+your+video',
|
||||||
|
'md5': 'b96f2f71b359a8ecd05ce4e1daa72365',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'b41dda37-d8e7-4d3f-b1b5-9a9db578bdfe',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Big Buck Bunny',
|
||||||
|
'description': 'Royalty free test video',
|
||||||
|
'timestamp': 1432816365,
|
||||||
|
'upload_date': '20150528',
|
||||||
|
'is_live': False,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
'add_ie': [ArkenaIE.ie_key()],
|
||||||
|
},
|
||||||
# {
|
# {
|
||||||
# # TODO: find another test
|
# # TODO: find another test
|
||||||
# # http://schema.org/VideoObject
|
# # http://schema.org/VideoObject
|
||||||
@ -1978,12 +2009,9 @@ class GenericIE(InfoExtractor):
|
|||||||
return self.url_result(myvi_url)
|
return self.url_result(myvi_url)
|
||||||
|
|
||||||
# Look for embedded soundcloud player
|
# Look for embedded soundcloud player
|
||||||
mobj = re.search(
|
soundcloud_urls = SoundcloudIE._extract_urls(webpage)
|
||||||
r'<iframe\s+(?:[a-zA-Z0-9_-]+="[^"]+"\s+)*src="(?P<url>https?://(?:w\.)?soundcloud\.com/player[^"]+)"',
|
if soundcloud_urls:
|
||||||
webpage)
|
return _playlist_from_matches(soundcloud_urls, getter=unescapeHTML, ie=SoundcloudIE.ie_key())
|
||||||
if mobj is not None:
|
|
||||||
url = unescapeHTML(mobj.group('url'))
|
|
||||||
return self.url_result(url)
|
|
||||||
|
|
||||||
# Look for embedded mtvservices player
|
# Look for embedded mtvservices player
|
||||||
mtvservices_url = MTVServicesEmbeddedIE._extract_url(webpage)
|
mtvservices_url = MTVServicesEmbeddedIE._extract_url(webpage)
|
||||||
@ -2146,6 +2174,11 @@ class GenericIE(InfoExtractor):
|
|||||||
if digiteka_url:
|
if digiteka_url:
|
||||||
return self.url_result(self._proto_relative_url(digiteka_url), DigitekaIE.ie_key())
|
return self.url_result(self._proto_relative_url(digiteka_url), DigitekaIE.ie_key())
|
||||||
|
|
||||||
|
# Look for Arkena embeds
|
||||||
|
arkena_url = ArkenaIE._extract_url(webpage)
|
||||||
|
if arkena_url:
|
||||||
|
return self.url_result(arkena_url, ArkenaIE.ie_key())
|
||||||
|
|
||||||
# Look for Limelight embeds
|
# Look for Limelight embeds
|
||||||
mobj = re.search(r'LimelightPlayer\.doLoad(Media|Channel|ChannelList)\(["\'](?P<id>[a-z0-9]{32})', webpage)
|
mobj = re.search(r'LimelightPlayer\.doLoad(Media|Channel|ChannelList)\(["\'](?P<id>[a-z0-9]{32})', webpage)
|
||||||
if mobj:
|
if mobj:
|
||||||
|
@ -36,7 +36,6 @@ class InstagramIE(InfoExtractor):
|
|||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'BA-pQFBG8HZ',
|
'id': 'BA-pQFBG8HZ',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'uploader_id': 'britneyspears',
|
|
||||||
'title': 'Video by britneyspears',
|
'title': 'Video by britneyspears',
|
||||||
'thumbnail': 're:^https?://.*\.jpg',
|
'thumbnail': 're:^https?://.*\.jpg',
|
||||||
'timestamp': 1453760977,
|
'timestamp': 1453760977,
|
||||||
|
90
youtube_dl/extractor/lcp.py
Normal file
90
youtube_dl/extractor/lcp.py
Normal file
@ -0,0 +1,90 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from .arkena import ArkenaIE
|
||||||
|
|
||||||
|
|
||||||
|
class LcpPlayIE(ArkenaIE):
|
||||||
|
_VALID_URL = r'https?://play\.lcp\.fr/embed/(?P<id>[^/]+)/(?P<account_id>[^/]+)/[^/]+/[^/]+'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'http://play.lcp.fr/embed/327336/131064/darkmatter/0',
|
||||||
|
'md5': 'b8bd9298542929c06c1c15788b1f277a',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '327336',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': '327336',
|
||||||
|
'timestamp': 1456391602,
|
||||||
|
'upload_date': '20160225',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
}]
|
||||||
|
|
||||||
|
|
||||||
|
class LcpIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?lcp\.fr/(?:[^/]+/)*(?P<id>[^/]+)'
|
||||||
|
|
||||||
|
_TESTS = [{
|
||||||
|
# arkena embed
|
||||||
|
'url': 'http://www.lcp.fr/la-politique-en-video/schwartzenberg-prg-preconise-francois-hollande-de-participer-une-primaire',
|
||||||
|
'md5': 'b8bd9298542929c06c1c15788b1f277a',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'd56d03e9',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Schwartzenberg (PRG) préconise à François Hollande de participer à une primaire à gauche',
|
||||||
|
'description': 'md5:96ad55009548da9dea19f4120c6c16a8',
|
||||||
|
'timestamp': 1456488895,
|
||||||
|
'upload_date': '20160226',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
# dailymotion live stream
|
||||||
|
'url': 'http://www.lcp.fr/le-direct',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'xji3qy',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'La Chaine Parlementaire (LCP), Live TNT',
|
||||||
|
'description': 'md5:5c69593f2de0f38bd9a949f2c95e870b',
|
||||||
|
'uploader': 'LCP',
|
||||||
|
'uploader_id': 'xbz33d',
|
||||||
|
'timestamp': 1308923058,
|
||||||
|
'upload_date': '20110624',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
# m3u8 live stream
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.lcp.fr/emissions/277792-les-volontaires',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
display_id = self._match_id(url)
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, display_id)
|
||||||
|
|
||||||
|
play_url = self._search_regex(
|
||||||
|
r'<iframe[^>]+src=(["\'])(?P<url>%s?(?:(?!\1).)*)\1' % LcpPlayIE._VALID_URL,
|
||||||
|
webpage, 'play iframe', default=None, group='url')
|
||||||
|
|
||||||
|
if not play_url:
|
||||||
|
return self.url_result(url, 'Generic')
|
||||||
|
|
||||||
|
title = self._og_search_title(webpage, default=None) or self._html_search_meta(
|
||||||
|
'twitter:title', webpage, fatal=True)
|
||||||
|
description = self._html_search_meta(
|
||||||
|
('description', 'twitter:description'), webpage)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'_type': 'url_transparent',
|
||||||
|
'ie_key': LcpPlayIE.ie_key(),
|
||||||
|
'url': play_url,
|
||||||
|
'display_id': display_id,
|
||||||
|
'title': title,
|
||||||
|
'description': description,
|
||||||
|
}
|
@ -9,7 +9,7 @@ class MGTVIE(InfoExtractor):
|
|||||||
_VALID_URL = r'https?://www\.mgtv\.com/v/(?:[^/]+/)*(?P<id>\d+)\.html'
|
_VALID_URL = r'https?://www\.mgtv\.com/v/(?:[^/]+/)*(?P<id>\d+)\.html'
|
||||||
IE_DESC = '芒果TV'
|
IE_DESC = '芒果TV'
|
||||||
|
|
||||||
_TEST = {
|
_TESTS = [{
|
||||||
'url': 'http://www.mgtv.com/v/1/290525/f/3116640.html',
|
'url': 'http://www.mgtv.com/v/1/290525/f/3116640.html',
|
||||||
'md5': '1bdadcf760a0b90946ca68ee9a2db41a',
|
'md5': '1bdadcf760a0b90946ca68ee9a2db41a',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
@ -20,7 +20,11 @@ class MGTVIE(InfoExtractor):
|
|||||||
'duration': 7461,
|
'duration': 7461,
|
||||||
'thumbnail': 're:^https?://.*\.jpg$',
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
},
|
},
|
||||||
}
|
}, {
|
||||||
|
# no tbr extracted from stream_url
|
||||||
|
'url': 'http://www.mgtv.com/v/1/1/f/3324755.html',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
@ -41,7 +45,8 @@ class MGTVIE(InfoExtractor):
|
|||||||
def extract_format(stream_url, format_id, idx, query={}):
|
def extract_format(stream_url, format_id, idx, query={}):
|
||||||
format_info = self._download_json(
|
format_info = self._download_json(
|
||||||
stream_url, video_id,
|
stream_url, video_id,
|
||||||
note='Download video info for format %s' % format_id or '#%d' % idx, query=query)
|
note='Download video info for format %s' % (format_id or '#%d' % idx),
|
||||||
|
query=query)
|
||||||
return {
|
return {
|
||||||
'format_id': format_id,
|
'format_id': format_id,
|
||||||
'url': format_info['info'],
|
'url': format_info['info'],
|
||||||
|
@ -16,6 +16,7 @@ from ..utils import (
|
|||||||
HEADRequest,
|
HEADRequest,
|
||||||
sanitized_Request,
|
sanitized_Request,
|
||||||
strip_or_none,
|
strip_or_none,
|
||||||
|
timeconvert,
|
||||||
unescapeHTML,
|
unescapeHTML,
|
||||||
url_basename,
|
url_basename,
|
||||||
RegexNotFoundError,
|
RegexNotFoundError,
|
||||||
@ -36,13 +37,13 @@ class MTVServicesInfoExtractor(InfoExtractor):
|
|||||||
return uri.split(':')[-1]
|
return uri.split(':')[-1]
|
||||||
|
|
||||||
# This was originally implemented for ComedyCentral, but it also works here
|
# This was originally implemented for ComedyCentral, but it also works here
|
||||||
@staticmethod
|
@classmethod
|
||||||
def _transform_rtmp_url(rtmp_video_url):
|
def _transform_rtmp_url(cls, rtmp_video_url):
|
||||||
m = re.match(r'^rtmpe?://.*?/(?P<finalid>gsp\..+?/.*)$', rtmp_video_url)
|
m = re.match(r'^rtmpe?://.*?/(?P<finalid>gsp\..+?/.*)$', rtmp_video_url)
|
||||||
if not m:
|
if not m:
|
||||||
return rtmp_video_url
|
return {'rtmp': rtmp_video_url}
|
||||||
base = 'http://viacommtvstrmfs.fplive.net/'
|
base = 'http://viacommtvstrmfs.fplive.net/'
|
||||||
return base + m.group('finalid')
|
return {'http': base + m.group('finalid')}
|
||||||
|
|
||||||
def _get_feed_url(self, uri):
|
def _get_feed_url(self, uri):
|
||||||
return self._FEED_URL
|
return self._FEED_URL
|
||||||
@ -86,14 +87,14 @@ class MTVServicesInfoExtractor(InfoExtractor):
|
|||||||
rtmp_video_url = rendition.find('./src').text
|
rtmp_video_url = rendition.find('./src').text
|
||||||
if rtmp_video_url.endswith('siteunavail.png'):
|
if rtmp_video_url.endswith('siteunavail.png'):
|
||||||
continue
|
continue
|
||||||
new_url = self._transform_rtmp_url(rtmp_video_url)
|
new_urls = self._transform_rtmp_url(rtmp_video_url)
|
||||||
formats.append({
|
formats.extend([{
|
||||||
'ext': 'flv' if new_url.startswith('rtmp') else ext,
|
'ext': 'flv' if new_url.startswith('rtmp') else ext,
|
||||||
'url': new_url,
|
'url': new_url,
|
||||||
'format_id': rendition.get('bitrate'),
|
'format_id': '-'.join(filter(None, [kind, rendition.get('bitrate')])),
|
||||||
'width': int(rendition.get('width')),
|
'width': int(rendition.get('width')),
|
||||||
'height': int(rendition.get('height')),
|
'height': int(rendition.get('height')),
|
||||||
})
|
} for kind, new_url in new_urls.items()])
|
||||||
except (KeyError, TypeError):
|
except (KeyError, TypeError):
|
||||||
raise ExtractorError('Invalid rendition field.')
|
raise ExtractorError('Invalid rendition field.')
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
@ -136,6 +137,8 @@ class MTVServicesInfoExtractor(InfoExtractor):
|
|||||||
|
|
||||||
description = strip_or_none(xpath_text(itemdoc, 'description'))
|
description = strip_or_none(xpath_text(itemdoc, 'description'))
|
||||||
|
|
||||||
|
timestamp = timeconvert(xpath_text(itemdoc, 'pubDate'))
|
||||||
|
|
||||||
title_el = None
|
title_el = None
|
||||||
if title_el is None:
|
if title_el is None:
|
||||||
title_el = find_xpath_attr(
|
title_el = find_xpath_attr(
|
||||||
@ -168,6 +171,7 @@ class MTVServicesInfoExtractor(InfoExtractor):
|
|||||||
'thumbnail': self._get_thumbnail_url(uri, itemdoc),
|
'thumbnail': self._get_thumbnail_url(uri, itemdoc),
|
||||||
'description': description,
|
'description': description,
|
||||||
'duration': float_or_none(content_el.attrib.get('duration')),
|
'duration': float_or_none(content_el.attrib.get('duration')),
|
||||||
|
'timestamp': timestamp,
|
||||||
}
|
}
|
||||||
|
|
||||||
def _get_feed_query(self, uri):
|
def _get_feed_query(self, uri):
|
||||||
@ -186,8 +190,13 @@ class MTVServicesInfoExtractor(InfoExtractor):
|
|||||||
idoc = self._download_xml(
|
idoc = self._download_xml(
|
||||||
url, video_id,
|
url, video_id,
|
||||||
'Downloading info', transform_source=fix_xml_ampersands)
|
'Downloading info', transform_source=fix_xml_ampersands)
|
||||||
|
|
||||||
|
title = xpath_text(idoc, './channel/title')
|
||||||
|
description = xpath_text(idoc, './channel/description')
|
||||||
|
|
||||||
return self.playlist_result(
|
return self.playlist_result(
|
||||||
[self._get_video_info(item) for item in idoc.findall('.//item')])
|
[self._get_video_info(item) for item in idoc.findall('.//item')],
|
||||||
|
playlist_title=title, playlist_description=description)
|
||||||
|
|
||||||
def _extract_mgid(self, webpage):
|
def _extract_mgid(self, webpage):
|
||||||
try:
|
try:
|
||||||
@ -233,6 +242,8 @@ class MTVServicesEmbeddedIE(MTVServicesInfoExtractor):
|
|||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Peter Dinklage Sums Up \'Game Of Thrones\' In 45 Seconds',
|
'title': 'Peter Dinklage Sums Up \'Game Of Thrones\' In 45 Seconds',
|
||||||
'description': '"Sexy sexy sexy, stabby stabby stabby, beautiful language," says Peter Dinklage as he tries summarizing "Game of Thrones" in under a minute.',
|
'description': '"Sexy sexy sexy, stabby stabby stabby, beautiful language," says Peter Dinklage as he tries summarizing "Game of Thrones" in under a minute.',
|
||||||
|
'timestamp': 1400126400,
|
||||||
|
'upload_date': '20140515',
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -275,6 +286,8 @@ class MTVIE(MTVServicesInfoExtractor):
|
|||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Taylor Swift - "Ours (VH1 Storytellers)"',
|
'title': 'Taylor Swift - "Ours (VH1 Storytellers)"',
|
||||||
'description': 'Album: Taylor Swift performs "Ours" for VH1 Storytellers at Harvey Mudd College.',
|
'description': 'Album: Taylor Swift performs "Ours" for VH1 Storytellers at Harvey Mudd College.',
|
||||||
|
'timestamp': 1352610000,
|
||||||
|
'upload_date': '20121111',
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
]
|
]
|
||||||
@ -301,20 +314,6 @@ class MTVIE(MTVServicesInfoExtractor):
|
|||||||
return self._get_videos_info(uri)
|
return self._get_videos_info(uri)
|
||||||
|
|
||||||
|
|
||||||
class MTVIggyIE(MTVServicesInfoExtractor):
|
|
||||||
IE_NAME = 'mtviggy.com'
|
|
||||||
_VALID_URL = r'https?://www\.mtviggy\.com/videos/.+'
|
|
||||||
_TEST = {
|
|
||||||
'url': 'http://www.mtviggy.com/videos/arcade-fire-behind-the-scenes-at-the-biggest-music-experiment-yet/',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '984696',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'Arcade Fire: Behind the Scenes at the Biggest Music Experiment Yet',
|
|
||||||
}
|
|
||||||
}
|
|
||||||
_FEED_URL = 'http://all.mtvworldverticals.com/feed-xml/'
|
|
||||||
|
|
||||||
|
|
||||||
class MTVDEIE(MTVServicesInfoExtractor):
|
class MTVDEIE(MTVServicesInfoExtractor):
|
||||||
IE_NAME = 'mtv.de'
|
IE_NAME = 'mtv.de'
|
||||||
_VALID_URL = r'https?://(?:www\.)?mtv\.de/(?:artists|shows|news)/(?:[^/]+/)*(?P<id>\d+)-[^/#?]+/*(?:[#?].*)?$'
|
_VALID_URL = r'https?://(?:www\.)?mtv\.de/(?:artists|shows|news)/(?:[^/]+/)*(?P<id>\d+)-[^/#?]+/*(?:[#?].*)?$'
|
||||||
@ -322,7 +321,7 @@ class MTVDEIE(MTVServicesInfoExtractor):
|
|||||||
'url': 'http://www.mtv.de/artists/10571-cro/videos/61131-traum',
|
'url': 'http://www.mtv.de/artists/10571-cro/videos/61131-traum',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'music_video-a50bc5f0b3aa4b3190aa',
|
'id': 'music_video-a50bc5f0b3aa4b3190aa',
|
||||||
'ext': 'mp4',
|
'ext': 'flv',
|
||||||
'title': 'MusicVideo_cro-traum',
|
'title': 'MusicVideo_cro-traum',
|
||||||
'description': 'Cro - Traum',
|
'description': 'Cro - Traum',
|
||||||
},
|
},
|
||||||
@ -330,20 +329,21 @@ class MTVDEIE(MTVServicesInfoExtractor):
|
|||||||
# rtmp download
|
# rtmp download
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
|
'skip': 'Blocked at Travis CI',
|
||||||
}, {
|
}, {
|
||||||
# mediagen URL without query (e.g. http://videos.mtvnn.com/mediagen/e865da714c166d18d6f80893195fcb97)
|
# mediagen URL without query (e.g. http://videos.mtvnn.com/mediagen/e865da714c166d18d6f80893195fcb97)
|
||||||
'url': 'http://www.mtv.de/shows/933-teen-mom-2/staffeln/5353/folgen/63565-enthullungen',
|
'url': 'http://www.mtv.de/shows/933-teen-mom-2/staffeln/5353/folgen/63565-enthullungen',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'local_playlist-f5ae778b9832cc837189',
|
'id': 'local_playlist-f5ae778b9832cc837189',
|
||||||
'ext': 'mp4',
|
'ext': 'flv',
|
||||||
'title': 'Episode_teen-mom-2_shows_season-5_episode-1_full-episode_part1',
|
'title': 'Episode_teen-mom-2_shows_season-5_episode-1_full-episode_part1',
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
# rtmp download
|
# rtmp download
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
|
'skip': 'Blocked at Travis CI',
|
||||||
}, {
|
}, {
|
||||||
# single video in pagePlaylist with different id
|
|
||||||
'url': 'http://www.mtv.de/news/77491-mtv-movies-spotlight-pixels-teil-3',
|
'url': 'http://www.mtv.de/news/77491-mtv-movies-spotlight-pixels-teil-3',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'local_playlist-4e760566473c4c8c5344',
|
'id': 'local_playlist-4e760566473c4c8c5344',
|
||||||
@ -355,6 +355,7 @@ class MTVDEIE(MTVServicesInfoExtractor):
|
|||||||
# rtmp download
|
# rtmp download
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
|
'skip': 'Das Video kann zur Zeit nicht abgespielt werden.',
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
@ -367,11 +368,14 @@ class MTVDEIE(MTVServicesInfoExtractor):
|
|||||||
r'window\.pagePlaylist\s*=\s*(\[.+?\]);\n', webpage, 'page playlist'),
|
r'window\.pagePlaylist\s*=\s*(\[.+?\]);\n', webpage, 'page playlist'),
|
||||||
video_id)
|
video_id)
|
||||||
|
|
||||||
|
def _mrss_url(item):
|
||||||
|
return item['mrss'] + item.get('mrssvars', '')
|
||||||
|
|
||||||
# news pages contain single video in playlist with different id
|
# news pages contain single video in playlist with different id
|
||||||
if len(playlist) == 1:
|
if len(playlist) == 1:
|
||||||
return self._get_videos_info_from_url(playlist[0]['mrss'], video_id)
|
return self._get_videos_info_from_url(_mrss_url(playlist[0]), video_id)
|
||||||
|
|
||||||
for item in playlist:
|
for item in playlist:
|
||||||
item_id = item.get('id')
|
item_id = item.get('id')
|
||||||
if item_id and compat_str(item_id) == video_id:
|
if item_id and compat_str(item_id) == video_id:
|
||||||
return self._get_videos_info_from_url(item['mrss'], video_id)
|
return self._get_videos_info_from_url(_mrss_url(item), video_id)
|
||||||
|
@ -1,30 +0,0 @@
|
|||||||
# coding: utf-8
|
|
||||||
from __future__ import unicode_literals
|
|
||||||
|
|
||||||
from .mtv import MTVServicesInfoExtractor
|
|
||||||
from ..compat import compat_urllib_parse_urlencode
|
|
||||||
|
|
||||||
|
|
||||||
class NextMovieIE(MTVServicesInfoExtractor):
|
|
||||||
IE_NAME = 'nextmovie.com'
|
|
||||||
_VALID_URL = r'https?://(?:www\.)?nextmovie\.com/shows/[^/]+/\d{4}-\d{2}-\d{2}/(?P<id>[^/?#]+)'
|
|
||||||
_FEED_URL = 'http://lite.dextr.mtvi.com/service1/dispatch.htm'
|
|
||||||
_TESTS = [{
|
|
||||||
'url': 'http://www.nextmovie.com/shows/exclusives/2013-03-10/mgid:uma:videolist:nextmovie.com:1715019/',
|
|
||||||
'md5': '09a9199f2f11f10107d04fcb153218aa',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '961726',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'The Muppets\' Gravity',
|
|
||||||
},
|
|
||||||
}]
|
|
||||||
|
|
||||||
def _get_feed_query(self, uri):
|
|
||||||
return compat_urllib_parse_urlencode({
|
|
||||||
'feed': '1505',
|
|
||||||
'mgid': uri,
|
|
||||||
})
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
mgid = self._match_id(url)
|
|
||||||
return self._get_videos_info(mgid)
|
|
@ -7,6 +7,7 @@ from ..utils import update_url_query
|
|||||||
|
|
||||||
|
|
||||||
class NickIE(MTVServicesInfoExtractor):
|
class NickIE(MTVServicesInfoExtractor):
|
||||||
|
# None of videos on the website are still alive?
|
||||||
IE_NAME = 'nick.com'
|
IE_NAME = 'nick.com'
|
||||||
_VALID_URL = r'https?://(?:www\.)?nick(?:jr)?\.com/(?:videos/clip|[^/]+/videos)/(?P<id>[^/?#.]+)'
|
_VALID_URL = r'https?://(?:www\.)?nick(?:jr)?\.com/(?:videos/clip|[^/]+/videos)/(?P<id>[^/?#.]+)'
|
||||||
_FEED_URL = 'http://udat.mtvnservices.com/service1/dispatch.htm'
|
_FEED_URL = 'http://udat.mtvnservices.com/service1/dispatch.htm'
|
||||||
|
@ -59,11 +59,8 @@ class OnetBaseIE(InfoExtractor):
|
|||||||
# TODO: Support Microsoft Smooth Streaming
|
# TODO: Support Microsoft Smooth Streaming
|
||||||
continue
|
continue
|
||||||
elif ext == 'mpd':
|
elif ext == 'mpd':
|
||||||
# TODO: Current DASH formats are broken - $Time$ pattern in
|
formats.extend(self._extract_mpd_formats(
|
||||||
# <SegmentTemplate> not implemented yet
|
video_url, video_id, mpd_id='dash', fatal=False))
|
||||||
# formats.extend(self._extract_mpd_formats(
|
|
||||||
# video_url, video_id, mpd_id='dash', fatal=False))
|
|
||||||
continue
|
|
||||||
else:
|
else:
|
||||||
formats.append({
|
formats.append({
|
||||||
'url': video_url,
|
'url': video_url,
|
||||||
|
@ -137,13 +137,16 @@ class ORFTVthekIE(InfoExtractor):
|
|||||||
class ORFOE1IE(InfoExtractor):
|
class ORFOE1IE(InfoExtractor):
|
||||||
IE_NAME = 'orf:oe1'
|
IE_NAME = 'orf:oe1'
|
||||||
IE_DESC = 'Radio Österreich 1'
|
IE_DESC = 'Radio Österreich 1'
|
||||||
_VALID_URL = r'https?://oe1\.orf\.at/(?:programm/|konsole.*?#\?track_id=)(?P<id>[0-9]+)'
|
_VALID_URL = r'https?://oe1\.orf\.at/(?:programm/|konsole\?.*?\btrack_id=)(?P<id>[0-9]+)'
|
||||||
|
|
||||||
# Audios on ORF radio are only available for 7 days, so we can't add tests.
|
# Audios on ORF radio are only available for 7 days, so we can't add tests.
|
||||||
_TEST = {
|
_TESTS = [{
|
||||||
'url': 'http://oe1.orf.at/konsole?show=on_demand#?track_id=394211',
|
'url': 'http://oe1.orf.at/konsole?show=on_demand#?track_id=394211',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}
|
}, {
|
||||||
|
'url': 'http://oe1.orf.at/konsole?show=ondemand&track_id=443608&load_day=/programm/konsole/tag/20160726',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
show_id = self._match_id(url)
|
show_id = self._match_id(url)
|
||||||
|
@ -111,7 +111,7 @@ class PornHubIE(InfoExtractor):
|
|||||||
webpage = self._download_webpage(req, video_id)
|
webpage = self._download_webpage(req, video_id)
|
||||||
|
|
||||||
error_msg = self._html_search_regex(
|
error_msg = self._html_search_regex(
|
||||||
r'(?s)<div[^>]+class=(["\']).*?\b(?:removed|userMessageSection)\b.*?\1[^>]*>(?P<error>.+?)</div>',
|
r'(?s)<div[^>]+class=(["\'])(?:(?!\1).)*\b(?:removed|userMessageSection)\b(?:(?!\1).)*\1[^>]*>(?P<error>.+?)</div>',
|
||||||
webpage, 'error message', default=None, group='error')
|
webpage, 'error message', default=None, group='error')
|
||||||
if error_msg:
|
if error_msg:
|
||||||
error_msg = re.sub(r'\s+', ' ', error_msg)
|
error_msg = re.sub(r'\s+', ' ', error_msg)
|
||||||
|
@ -6,7 +6,6 @@ from .common import InfoExtractor
|
|||||||
from ..utils import (
|
from ..utils import (
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
sanitized_Request,
|
|
||||||
urlencode_postdata,
|
urlencode_postdata,
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -37,28 +36,33 @@ class SharedIE(InfoExtractor):
|
|||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
webpage = self._download_webpage(url, video_id)
|
|
||||||
|
webpage, urlh = self._download_webpage_handle(url, video_id)
|
||||||
|
|
||||||
if '>File does not exist<' in webpage:
|
if '>File does not exist<' in webpage:
|
||||||
raise ExtractorError(
|
raise ExtractorError(
|
||||||
'Video %s does not exist' % video_id, expected=True)
|
'Video %s does not exist' % video_id, expected=True)
|
||||||
|
|
||||||
download_form = self._hidden_inputs(webpage)
|
download_form = self._hidden_inputs(webpage)
|
||||||
request = sanitized_Request(
|
|
||||||
url, urlencode_postdata(download_form))
|
|
||||||
request.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
|
||||||
|
|
||||||
video_page = self._download_webpage(
|
video_page = self._download_webpage(
|
||||||
request, video_id, 'Downloading video page')
|
urlh.geturl(), video_id, 'Downloading video page',
|
||||||
|
data=urlencode_postdata(download_form),
|
||||||
|
headers={
|
||||||
|
'Content-Type': 'application/x-www-form-urlencoded',
|
||||||
|
'Referer': urlh.geturl(),
|
||||||
|
})
|
||||||
|
|
||||||
video_url = self._html_search_regex(
|
video_url = self._html_search_regex(
|
||||||
r'data-url="([^"]+)"', video_page, 'video URL')
|
r'data-url=(["\'])(?P<url>(?:(?!\1).)+)\1',
|
||||||
|
video_page, 'video URL', group='url')
|
||||||
title = base64.b64decode(self._html_search_meta(
|
title = base64.b64decode(self._html_search_meta(
|
||||||
'full:title', webpage, 'title').encode('utf-8')).decode('utf-8')
|
'full:title', webpage, 'title').encode('utf-8')).decode('utf-8')
|
||||||
filesize = int_or_none(self._html_search_meta(
|
filesize = int_or_none(self._html_search_meta(
|
||||||
'full:size', webpage, 'file size', fatal=False))
|
'full:size', webpage, 'file size', fatal=False))
|
||||||
thumbnail = self._html_search_regex(
|
thumbnail = self._html_search_regex(
|
||||||
r'data-poster="([^"]+)"', video_page, 'thumbnail', default=None)
|
r'data-poster=(["\'])(?P<url>(?:(?!\1).)+)\1',
|
||||||
|
video_page, 'thumbnail', default=None, group='url')
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
|
@ -13,20 +13,21 @@ from ..utils import (
|
|||||||
sanitized_Request,
|
sanitized_Request,
|
||||||
unified_strdate,
|
unified_strdate,
|
||||||
urlencode_postdata,
|
urlencode_postdata,
|
||||||
|
xpath_text,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class SmotriIE(InfoExtractor):
|
class SmotriIE(InfoExtractor):
|
||||||
IE_DESC = 'Smotri.com'
|
IE_DESC = 'Smotri.com'
|
||||||
IE_NAME = 'smotri'
|
IE_NAME = 'smotri'
|
||||||
_VALID_URL = r'^https?://(?:www\.)?(?:smotri\.com/video/view/\?id=|pics\.smotri\.com/(?:player|scrubber_custom8)\.swf\?file=)(?P<id>v(?P<realvideoid>[0-9]+)[a-z0-9]{4})'
|
_VALID_URL = r'https?://(?:www\.)?(?:smotri\.com/video/view/\?id=|pics\.smotri\.com/(?:player|scrubber_custom8)\.swf\?file=)(?P<id>v(?P<realvideoid>[0-9]+)[a-z0-9]{4})'
|
||||||
_NETRC_MACHINE = 'smotri'
|
_NETRC_MACHINE = 'smotri'
|
||||||
|
|
||||||
_TESTS = [
|
_TESTS = [
|
||||||
# real video id 2610366
|
# real video id 2610366
|
||||||
{
|
{
|
||||||
'url': 'http://smotri.com/video/view/?id=v261036632ab',
|
'url': 'http://smotri.com/video/view/?id=v261036632ab',
|
||||||
'md5': '2a7b08249e6f5636557579c368040eb9',
|
'md5': '02c0dfab2102984e9c5bb585cc7cc321',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'v261036632ab',
|
'id': 'v261036632ab',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
@ -174,11 +175,11 @@ class SmotriIE(InfoExtractor):
|
|||||||
if video_password:
|
if video_password:
|
||||||
video_form['pass'] = hashlib.md5(video_password.encode('utf-8')).hexdigest()
|
video_form['pass'] = hashlib.md5(video_password.encode('utf-8')).hexdigest()
|
||||||
|
|
||||||
request = sanitized_Request(
|
video = self._download_json(
|
||||||
'http://smotri.com/video/view/url/bot/', urlencode_postdata(video_form))
|
'http://smotri.com/video/view/url/bot/',
|
||||||
request.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
video_id, 'Downloading video JSON',
|
||||||
|
data=urlencode_postdata(video_form),
|
||||||
video = self._download_json(request, video_id, 'Downloading video JSON')
|
headers={'Content-Type': 'application/x-www-form-urlencoded'})
|
||||||
|
|
||||||
video_url = video.get('_vidURL') or video.get('_vidURL_mp4')
|
video_url = video.get('_vidURL') or video.get('_vidURL_mp4')
|
||||||
|
|
||||||
@ -196,11 +197,11 @@ class SmotriIE(InfoExtractor):
|
|||||||
raise ExtractorError(msg, expected=True)
|
raise ExtractorError(msg, expected=True)
|
||||||
|
|
||||||
title = video['title']
|
title = video['title']
|
||||||
thumbnail = video['_imgURL']
|
thumbnail = video.get('_imgURL')
|
||||||
upload_date = unified_strdate(video['added'])
|
upload_date = unified_strdate(video.get('added'))
|
||||||
uploader = video['userNick']
|
uploader = video.get('userNick')
|
||||||
uploader_id = video['userLogin']
|
uploader_id = video.get('userLogin')
|
||||||
duration = int_or_none(video['duration'])
|
duration = int_or_none(video.get('duration'))
|
||||||
|
|
||||||
# Video JSON does not provide enough meta data
|
# Video JSON does not provide enough meta data
|
||||||
# We will extract some from the video web page instead
|
# We will extract some from the video web page instead
|
||||||
@ -209,7 +210,7 @@ class SmotriIE(InfoExtractor):
|
|||||||
|
|
||||||
# Warning if video is unavailable
|
# Warning if video is unavailable
|
||||||
warning = self._html_search_regex(
|
warning = self._html_search_regex(
|
||||||
r'<div class="videoUnModer">(.*?)</div>', webpage,
|
r'<div[^>]+class="videoUnModer"[^>]*>(.+?)</div>', webpage,
|
||||||
'warning message', default=None)
|
'warning message', default=None)
|
||||||
if warning is not None:
|
if warning is not None:
|
||||||
self._downloader.report_warning(
|
self._downloader.report_warning(
|
||||||
@ -217,20 +218,22 @@ class SmotriIE(InfoExtractor):
|
|||||||
(video_id, warning))
|
(video_id, warning))
|
||||||
|
|
||||||
# Adult content
|
# Adult content
|
||||||
if re.search('EroConfirmText">', webpage) is not None:
|
if 'EroConfirmText">' in webpage:
|
||||||
self.report_age_confirmation()
|
self.report_age_confirmation()
|
||||||
confirm_string = self._html_search_regex(
|
confirm_string = self._html_search_regex(
|
||||||
r'<a href="/video/view/\?id=%s&confirm=([^"]+)" title="[^"]+">' % video_id,
|
r'<a[^>]+href="/video/view/\?id=%s&confirm=([^"]+)"' % video_id,
|
||||||
webpage, 'confirm string')
|
webpage, 'confirm string')
|
||||||
confirm_url = webpage_url + '&confirm=%s' % confirm_string
|
confirm_url = webpage_url + '&confirm=%s' % confirm_string
|
||||||
webpage = self._download_webpage(confirm_url, video_id, 'Downloading video page (age confirmed)')
|
webpage = self._download_webpage(
|
||||||
|
confirm_url, video_id,
|
||||||
|
'Downloading video page (age confirmed)')
|
||||||
adult_content = True
|
adult_content = True
|
||||||
else:
|
else:
|
||||||
adult_content = False
|
adult_content = False
|
||||||
|
|
||||||
view_count = self._html_search_regex(
|
view_count = self._html_search_regex(
|
||||||
'Общее количество просмотров.*?<span class="Number">(\\d+)</span>',
|
r'(?s)Общее количество просмотров.*?<span class="Number">(\d+)</span>',
|
||||||
webpage, 'view count', fatal=False, flags=re.MULTILINE | re.DOTALL)
|
webpage, 'view count', fatal=False)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
@ -249,37 +252,33 @@ class SmotriIE(InfoExtractor):
|
|||||||
class SmotriCommunityIE(InfoExtractor):
|
class SmotriCommunityIE(InfoExtractor):
|
||||||
IE_DESC = 'Smotri.com community videos'
|
IE_DESC = 'Smotri.com community videos'
|
||||||
IE_NAME = 'smotri:community'
|
IE_NAME = 'smotri:community'
|
||||||
_VALID_URL = r'^https?://(?:www\.)?smotri\.com/community/video/(?P<communityid>[0-9A-Za-z_\'-]+)'
|
_VALID_URL = r'https?://(?:www\.)?smotri\.com/community/video/(?P<id>[0-9A-Za-z_\'-]+)'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://smotri.com/community/video/kommuna',
|
'url': 'http://smotri.com/community/video/kommuna',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'kommuna',
|
'id': 'kommuna',
|
||||||
'title': 'КПРФ',
|
|
||||||
},
|
},
|
||||||
'playlist_mincount': 4,
|
'playlist_mincount': 4,
|
||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
community_id = self._match_id(url)
|
||||||
community_id = mobj.group('communityid')
|
|
||||||
|
|
||||||
url = 'http://smotri.com/export/rss/video/by/community/-/%s/video.xml' % community_id
|
rss = self._download_xml(
|
||||||
rss = self._download_xml(url, community_id, 'Downloading community RSS')
|
'http://smotri.com/export/rss/video/by/community/-/%s/video.xml' % community_id,
|
||||||
|
community_id, 'Downloading community RSS')
|
||||||
|
|
||||||
entries = [self.url_result(video_url.text, 'Smotri')
|
entries = [
|
||||||
|
self.url_result(video_url.text, SmotriIE.ie_key())
|
||||||
for video_url in rss.findall('./channel/item/link')]
|
for video_url in rss.findall('./channel/item/link')]
|
||||||
|
|
||||||
description_text = rss.find('./channel/description').text
|
return self.playlist_result(entries, community_id)
|
||||||
community_title = self._html_search_regex(
|
|
||||||
'^Видео сообщества "([^"]+)"$', description_text, 'community title')
|
|
||||||
|
|
||||||
return self.playlist_result(entries, community_id, community_title)
|
|
||||||
|
|
||||||
|
|
||||||
class SmotriUserIE(InfoExtractor):
|
class SmotriUserIE(InfoExtractor):
|
||||||
IE_DESC = 'Smotri.com user videos'
|
IE_DESC = 'Smotri.com user videos'
|
||||||
IE_NAME = 'smotri:user'
|
IE_NAME = 'smotri:user'
|
||||||
_VALID_URL = r'^https?://(?:www\.)?smotri\.com/user/(?P<userid>[0-9A-Za-z_\'-]+)'
|
_VALID_URL = r'https?://(?:www\.)?smotri\.com/user/(?P<id>[0-9A-Za-z_\'-]+)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://smotri.com/user/inspector',
|
'url': 'http://smotri.com/user/inspector',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
@ -290,19 +289,19 @@ class SmotriUserIE(InfoExtractor):
|
|||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
user_id = self._match_id(url)
|
||||||
user_id = mobj.group('userid')
|
|
||||||
|
|
||||||
url = 'http://smotri.com/export/rss/user/video/-/%s/video.xml' % user_id
|
rss = self._download_xml(
|
||||||
rss = self._download_xml(url, user_id, 'Downloading user RSS')
|
'http://smotri.com/export/rss/user/video/-/%s/video.xml' % user_id,
|
||||||
|
user_id, 'Downloading user RSS')
|
||||||
|
|
||||||
entries = [self.url_result(video_url.text, 'Smotri')
|
entries = [self.url_result(video_url.text, 'Smotri')
|
||||||
for video_url in rss.findall('./channel/item/link')]
|
for video_url in rss.findall('./channel/item/link')]
|
||||||
|
|
||||||
description_text = rss.find('./channel/description').text
|
description_text = xpath_text(rss, './channel/description') or ''
|
||||||
user_nickname = self._html_search_regex(
|
user_nickname = self._search_regex(
|
||||||
'^Видео режиссера (.*)$', description_text,
|
'^Видео режиссера (.+)$', description_text,
|
||||||
'user nickname')
|
'user nickname', fatal=False)
|
||||||
|
|
||||||
return self.playlist_result(entries, user_id, user_nickname)
|
return self.playlist_result(entries, user_id, user_nickname)
|
||||||
|
|
||||||
@ -310,11 +309,11 @@ class SmotriUserIE(InfoExtractor):
|
|||||||
class SmotriBroadcastIE(InfoExtractor):
|
class SmotriBroadcastIE(InfoExtractor):
|
||||||
IE_DESC = 'Smotri.com broadcasts'
|
IE_DESC = 'Smotri.com broadcasts'
|
||||||
IE_NAME = 'smotri:broadcast'
|
IE_NAME = 'smotri:broadcast'
|
||||||
_VALID_URL = r'^https?://(?:www\.)?(?P<url>smotri\.com/live/(?P<broadcastid>[^/]+))/?.*'
|
_VALID_URL = r'https?://(?:www\.)?(?P<url>smotri\.com/live/(?P<id>[^/]+))/?.*'
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
broadcast_id = mobj.group('broadcastid')
|
broadcast_id = mobj.group('id')
|
||||||
|
|
||||||
broadcast_url = 'http://' + mobj.group('url')
|
broadcast_url = 'http://' + mobj.group('url')
|
||||||
broadcast_page = self._download_webpage(broadcast_url, broadcast_id, 'Downloading broadcast page')
|
broadcast_page = self._download_webpage(broadcast_url, broadcast_id, 'Downloading broadcast page')
|
||||||
@ -328,7 +327,8 @@ class SmotriBroadcastIE(InfoExtractor):
|
|||||||
|
|
||||||
(username, password) = self._get_login_info()
|
(username, password) = self._get_login_info()
|
||||||
if username is None:
|
if username is None:
|
||||||
self.raise_login_required('Erotic broadcasts allowed only for registered users')
|
self.raise_login_required(
|
||||||
|
'Erotic broadcasts allowed only for registered users')
|
||||||
|
|
||||||
login_form = {
|
login_form = {
|
||||||
'login-hint53': '1',
|
'login-hint53': '1',
|
||||||
@ -343,8 +343,9 @@ class SmotriBroadcastIE(InfoExtractor):
|
|||||||
broadcast_page = self._download_webpage(
|
broadcast_page = self._download_webpage(
|
||||||
request, broadcast_id, 'Logging in and confirming age')
|
request, broadcast_id, 'Logging in and confirming age')
|
||||||
|
|
||||||
if re.search('>Неверный логин или пароль<', broadcast_page) is not None:
|
if '>Неверный логин или пароль<' in broadcast_page:
|
||||||
raise ExtractorError('Unable to log in: bad username or password', expected=True)
|
raise ExtractorError(
|
||||||
|
'Unable to log in: bad username or password', expected=True)
|
||||||
|
|
||||||
adult_content = True
|
adult_content = True
|
||||||
else:
|
else:
|
||||||
@ -383,11 +384,11 @@ class SmotriBroadcastIE(InfoExtractor):
|
|||||||
|
|
||||||
broadcast_playpath = broadcast_json['_streamName']
|
broadcast_playpath = broadcast_json['_streamName']
|
||||||
broadcast_app = '%s/%s' % (mobj.group('app'), broadcast_json['_vidURL'])
|
broadcast_app = '%s/%s' % (mobj.group('app'), broadcast_json['_vidURL'])
|
||||||
broadcast_thumbnail = broadcast_json['_imgURL']
|
broadcast_thumbnail = broadcast_json.get('_imgURL')
|
||||||
broadcast_title = self._live_title(broadcast_json['title'])
|
broadcast_title = self._live_title(broadcast_json['title'])
|
||||||
broadcast_description = broadcast_json['description']
|
broadcast_description = broadcast_json.get('description')
|
||||||
broadcaster_nick = broadcast_json['nick']
|
broadcaster_nick = broadcast_json.get('nick')
|
||||||
broadcaster_login = broadcast_json['login']
|
broadcaster_login = broadcast_json.get('login')
|
||||||
rtmp_conn = 'S:%s' % uuid.uuid4().hex
|
rtmp_conn = 'S:%s' % uuid.uuid4().hex
|
||||||
except KeyError:
|
except KeyError:
|
||||||
if protected_broadcast:
|
if protected_broadcast:
|
||||||
|
@ -119,6 +119,12 @@ class SoundcloudIE(InfoExtractor):
|
|||||||
_CLIENT_ID = '02gUJC0hH2ct1EGOcYXQIzRFU91c72Ea'
|
_CLIENT_ID = '02gUJC0hH2ct1EGOcYXQIzRFU91c72Ea'
|
||||||
_IPHONE_CLIENT_ID = '376f225bf427445fc4bfb6b99b72e0bf'
|
_IPHONE_CLIENT_ID = '376f225bf427445fc4bfb6b99b72e0bf'
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _extract_urls(webpage):
|
||||||
|
return [m.group('url') for m in re.finditer(
|
||||||
|
r'<iframe[^>]+src=(["\'])(?P<url>(?:https?://)?(?:w\.)?soundcloud\.com/player.+?)\1',
|
||||||
|
webpage)]
|
||||||
|
|
||||||
def report_resolve(self, video_id):
|
def report_resolve(self, video_id):
|
||||||
"""Report information extraction."""
|
"""Report information extraction."""
|
||||||
self.to_screen('%s: Resolving id' % video_id)
|
self.to_screen('%s: Resolving id' % video_id)
|
||||||
|
@ -17,6 +17,8 @@ class SouthParkIE(MTVServicesInfoExtractor):
|
|||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'South Park|Bat Daded',
|
'title': 'South Park|Bat Daded',
|
||||||
'description': 'Randy disqualifies South Park by getting into a fight with Bat Dad.',
|
'description': 'Randy disqualifies South Park by getting into a fight with Bat Dad.',
|
||||||
|
'timestamp': 1112760000,
|
||||||
|
'upload_date': '20050406',
|
||||||
},
|
},
|
||||||
}]
|
}]
|
||||||
|
|
||||||
@ -28,6 +30,10 @@ class SouthParkEsIE(SouthParkIE):
|
|||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://southpark.cc.com/episodios-en-espanol/s01e01-cartman-consigue-una-sonda-anal#source=351c1323-0b96-402d-a8b9-40d01b2e9bde&position=1&sort=!airdate',
|
'url': 'http://southpark.cc.com/episodios-en-espanol/s01e01-cartman-consigue-una-sonda-anal#source=351c1323-0b96-402d-a8b9-40d01b2e9bde&position=1&sort=!airdate',
|
||||||
|
'info_dict': {
|
||||||
|
'title': 'Cartman Consigue Una Sonda Anal',
|
||||||
|
'description': 'Cartman Consigue Una Sonda Anal',
|
||||||
|
},
|
||||||
'playlist_count': 4,
|
'playlist_count': 4,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
@ -42,17 +48,27 @@ class SouthParkDeIE(SouthParkIE):
|
|||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '85487c96-b3b9-4e39-9127-ad88583d9bf2',
|
'id': '85487c96-b3b9-4e39-9127-ad88583d9bf2',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'The Government Won\'t Respect My Privacy',
|
'title': 'South Park|The Government Won\'t Respect My Privacy',
|
||||||
'description': 'Cartman explains the benefits of "Shitter" to Stan, Kyle and Craig.',
|
'description': 'Cartman explains the benefits of "Shitter" to Stan, Kyle and Craig.',
|
||||||
|
'timestamp': 1380160800,
|
||||||
|
'upload_date': '20130926',
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
# non-ASCII characters in initial URL
|
# non-ASCII characters in initial URL
|
||||||
'url': 'http://www.southpark.de/alle-episoden/s18e09-hashtag-aufwärmen',
|
'url': 'http://www.southpark.de/alle-episoden/s18e09-hashtag-aufwärmen',
|
||||||
'playlist_count': 4,
|
'info_dict': {
|
||||||
|
'title': 'Hashtag „Aufwärmen“',
|
||||||
|
'description': 'Kyle will mit seinem kleinen Bruder Ike Videospiele spielen. Als der nicht mehr mit ihm spielen will, hat Kyle Angst, dass er die Kids von heute nicht mehr versteht.',
|
||||||
|
},
|
||||||
|
'playlist_count': 3,
|
||||||
}, {
|
}, {
|
||||||
# non-ASCII characters in redirect URL
|
# non-ASCII characters in redirect URL
|
||||||
'url': 'http://www.southpark.de/alle-episoden/s18e09',
|
'url': 'http://www.southpark.de/alle-episoden/s18e09',
|
||||||
'playlist_count': 4,
|
'info_dict': {
|
||||||
|
'title': 'Hashtag „Aufwärmen“',
|
||||||
|
'description': 'Kyle will mit seinem kleinen Bruder Ike Videospiele spielen. Als der nicht mehr mit ihm spielen will, hat Kyle Angst, dass er die Kids von heute nicht mehr versteht.',
|
||||||
|
},
|
||||||
|
'playlist_count': 3,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
|
|
||||||
@ -63,7 +79,11 @@ class SouthParkNlIE(SouthParkIE):
|
|||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.southpark.nl/full-episodes/s18e06-freemium-isnt-free',
|
'url': 'http://www.southpark.nl/full-episodes/s18e06-freemium-isnt-free',
|
||||||
'playlist_count': 4,
|
'info_dict': {
|
||||||
|
'title': 'Freemium Isn\'t Free',
|
||||||
|
'description': 'Stan is addicted to the new Terrance and Phillip mobile game.',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 3,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
|
|
||||||
@ -74,5 +94,9 @@ class SouthParkDkIE(SouthParkIE):
|
|||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.southparkstudios.dk/full-episodes/s18e07-grounded-vindaloop',
|
'url': 'http://www.southparkstudios.dk/full-episodes/s18e07-grounded-vindaloop',
|
||||||
'playlist_count': 4,
|
'info_dict': {
|
||||||
|
'title': 'Grounded Vindaloop',
|
||||||
|
'description': 'Butters is convinced he\'s living in a virtual reality.',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 3,
|
||||||
}]
|
}]
|
||||||
|
@ -11,8 +11,10 @@ class SpikeIE(MTVServicesInfoExtractor):
|
|||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'b9c8221a-4e50-479a-b86d-3333323e38ba',
|
'id': 'b9c8221a-4e50-479a-b86d-3333323e38ba',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Auction Hunters|Can Allen Ride A Hundred Year-Old Motorcycle?',
|
'title': 'Auction Hunters|December 27, 2013|4|414|Can Allen Ride A Hundred Year-Old Motorcycle?',
|
||||||
'description': 'md5:fbed7e82ed5fad493615b3094a9499cb',
|
'description': 'md5:fbed7e82ed5fad493615b3094a9499cb',
|
||||||
|
'timestamp': 1388120400,
|
||||||
|
'upload_date': '20131227',
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.spike.com/video-clips/lhtu8m/',
|
'url': 'http://www.spike.com/video-clips/lhtu8m/',
|
||||||
|
@ -47,11 +47,10 @@ class TelegraafIE(InfoExtractor):
|
|||||||
ext = determine_ext(manifest_url)
|
ext = determine_ext(manifest_url)
|
||||||
if ext == 'm3u8':
|
if ext == 'm3u8':
|
||||||
formats.extend(self._extract_m3u8_formats(
|
formats.extend(self._extract_m3u8_formats(
|
||||||
manifest_url, video_id, ext='mp4', m3u8_id='hls'))
|
manifest_url, video_id, ext='mp4', m3u8_id='hls', fatal=False))
|
||||||
elif ext == 'mpd':
|
elif ext == 'mpd':
|
||||||
# TODO: Current DASH formats are broken - $Time$ pattern in
|
formats.extend(self._extract_mpd_formats(
|
||||||
# <SegmentTemplate> not implemented yet
|
manifest_url, video_id, mpd_id='dash', fatal=False))
|
||||||
continue
|
|
||||||
else:
|
else:
|
||||||
self.report_warning('Unknown adaptive format %s' % ext)
|
self.report_warning('Unknown adaptive format %s' % ext)
|
||||||
for location in locations.get('progressive', []):
|
for location in locations.get('progressive', []):
|
||||||
|
@ -9,56 +9,23 @@ class TVLandIE(MTVServicesInfoExtractor):
|
|||||||
_VALID_URL = r'https?://(?:www\.)?tvland\.com/(?:video-clips|episodes)/(?P<id>[^/?#.]+)'
|
_VALID_URL = r'https?://(?:www\.)?tvland\.com/(?:video-clips|episodes)/(?P<id>[^/?#.]+)'
|
||||||
_FEED_URL = 'http://www.tvland.com/feeds/mrss/'
|
_FEED_URL = 'http://www.tvland.com/feeds/mrss/'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
|
# Geo-restricted. Without a proxy metadata are still there. With a
|
||||||
|
# proxy it redirects to http://m.tvland.com/app/
|
||||||
'url': 'http://www.tvland.com/episodes/hqhps2/everybody-loves-raymond-the-invasion-ep-048',
|
'url': 'http://www.tvland.com/episodes/hqhps2/everybody-loves-raymond-the-invasion-ep-048',
|
||||||
'playlist': [
|
|
||||||
{
|
|
||||||
'md5': '227e9723b9669c05bf51098b10287aa7',
|
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'bcbd3a83-3aca-4dca-809b-f78a87dcccdd',
|
'description': 'md5:80973e81b916a324e05c14a3fb506d29',
|
||||||
'ext': 'mp4',
|
'title': 'The Invasion',
|
||||||
'title': 'Everybody Loves Raymond|Everybody Loves Raymond 048 HD, Part 1 of 5',
|
|
||||||
}
|
|
||||||
},
|
},
|
||||||
{
|
'playlist': [],
|
||||||
'md5': '9fa2b764ec0e8194fb3ebb01a83df88b',
|
|
||||||
'info_dict': {
|
|
||||||
'id': 'f4279548-6e13-40dd-92e8-860d27289197',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'Everybody Loves Raymond|Everybody Loves Raymond 048 HD, Part 2 of 5',
|
|
||||||
}
|
|
||||||
},
|
|
||||||
{
|
|
||||||
'md5': 'fde4c3bccd7cc7e3576b338734153cec',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '664e4a38-53ef-4115-9bc9-d0f789ec6334',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'Everybody Loves Raymond|Everybody Loves Raymond 048 HD, Part 3 of 5',
|
|
||||||
}
|
|
||||||
},
|
|
||||||
{
|
|
||||||
'md5': '247f6780cda6891f2e49b8ae2b10e017',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '9146ecf5-b15a-4d78-879c-6679b77f4960',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'Everybody Loves Raymond|Everybody Loves Raymond 048 HD, Part 4 of 5',
|
|
||||||
}
|
|
||||||
},
|
|
||||||
{
|
|
||||||
'md5': 'fd269f33256e47bad5eb6c40de089ff6',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '04334a2e-9a47-4214-a8c2-ae5792e2fab7',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'Everybody Loves Raymond|Everybody Loves Raymond 048 HD, Part 5 of 5',
|
|
||||||
}
|
|
||||||
}
|
|
||||||
],
|
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.tvland.com/video-clips/zea2ev/younger-younger--hilary-duff---little-lies',
|
'url': 'http://www.tvland.com/video-clips/zea2ev/younger-younger--hilary-duff---little-lies',
|
||||||
'md5': 'e2c6389401cf485df26c79c247b08713',
|
'md5': 'e2c6389401cf485df26c79c247b08713',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'b8697515-4bbe-4e01-83d5-fa705ce5fa88',
|
'id': 'b8697515-4bbe-4e01-83d5-fa705ce5fa88',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Younger|Younger: Hilary Duff - Little Lies',
|
'title': 'Younger|December 28, 2015|2|NO-EPISODE#|Younger: Hilary Duff - Little Lies',
|
||||||
'description': 'md5:7d192f56ca8d958645c83f0de8ef0269'
|
'description': 'md5:7d192f56ca8d958645c83f0de8ef0269',
|
||||||
|
'upload_date': '20151228',
|
||||||
|
'timestamp': 1451289600,
|
||||||
},
|
},
|
||||||
}]
|
}]
|
||||||
|
@ -89,8 +89,8 @@ class TVPIE(InfoExtractor):
|
|||||||
r'(https?://.+?/video)(?:\.(?:ism|f4m|m3u8)|-\d+\.mp4)',
|
r'(https?://.+?/video)(?:\.(?:ism|f4m|m3u8)|-\d+\.mp4)',
|
||||||
video_url, 'video base url', default=None)
|
video_url, 'video base url', default=None)
|
||||||
if video_url_base:
|
if video_url_base:
|
||||||
# TODO: Current DASH formats are broken - $Time$ pattern in
|
# TODO: <Group> found instead of <AdaptationSet> in MPD manifest.
|
||||||
# <SegmentTemplate> not implemented yet
|
# It's not mentioned in MPEG-DASH standard. Figure that out.
|
||||||
# formats.extend(self._extract_mpd_formats(
|
# formats.extend(self._extract_mpd_formats(
|
||||||
# video_url_base + '.ism/video.mpd',
|
# video_url_base + '.ism/video.mpd',
|
||||||
# video_id, mpd_id='dash', fatal=False))
|
# video_id, mpd_id='dash', fatal=False))
|
||||||
|
@ -461,7 +461,7 @@ class TwitchClipsIE(InfoExtractor):
|
|||||||
IE_NAME = 'twitch:clips'
|
IE_NAME = 'twitch:clips'
|
||||||
_VALID_URL = r'https?://clips\.twitch\.tv/(?:[^/]+/)*(?P<id>[^/?#&]+)'
|
_VALID_URL = r'https?://clips\.twitch\.tv/(?:[^/]+/)*(?P<id>[^/?#&]+)'
|
||||||
|
|
||||||
_TEST = {
|
_TESTS = [{
|
||||||
'url': 'https://clips.twitch.tv/ea/AggressiveCobraPoooound',
|
'url': 'https://clips.twitch.tv/ea/AggressiveCobraPoooound',
|
||||||
'md5': '761769e1eafce0ffebfb4089cb3847cd',
|
'md5': '761769e1eafce0ffebfb4089cb3847cd',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
@ -473,7 +473,11 @@ class TwitchClipsIE(InfoExtractor):
|
|||||||
'uploader': 'stereotype_',
|
'uploader': 'stereotype_',
|
||||||
'uploader_id': 'stereotype_',
|
'uploader_id': 'stereotype_',
|
||||||
},
|
},
|
||||||
}
|
}, {
|
||||||
|
# multiple formats
|
||||||
|
'url': 'https://clips.twitch.tv/rflegendary/UninterestedBeeDAESuppy',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
@ -485,15 +489,27 @@ class TwitchClipsIE(InfoExtractor):
|
|||||||
r'(?s)clipInfo\s*=\s*({.+?});', webpage, 'clip info'),
|
r'(?s)clipInfo\s*=\s*({.+?});', webpage, 'clip info'),
|
||||||
video_id, transform_source=js_to_json)
|
video_id, transform_source=js_to_json)
|
||||||
|
|
||||||
video_url = clip['clip_video_url']
|
title = clip.get('channel_title') or self._og_search_title(webpage)
|
||||||
title = clip['channel_title']
|
|
||||||
|
formats = [{
|
||||||
|
'url': option['source'],
|
||||||
|
'format_id': option.get('quality'),
|
||||||
|
'height': int_or_none(option.get('quality')),
|
||||||
|
} for option in clip.get('quality_options', []) if option.get('source')]
|
||||||
|
|
||||||
|
if not formats:
|
||||||
|
formats = [{
|
||||||
|
'url': clip['clip_video_url'],
|
||||||
|
}]
|
||||||
|
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'url': video_url,
|
|
||||||
'title': title,
|
'title': title,
|
||||||
'thumbnail': self._og_search_thumbnail(webpage),
|
'thumbnail': self._og_search_thumbnail(webpage),
|
||||||
'creator': clip.get('broadcaster_display_name') or clip.get('broadcaster_login'),
|
'creator': clip.get('broadcaster_display_name') or clip.get('broadcaster_login'),
|
||||||
'uploader': clip.get('curator_login'),
|
'uploader': clip.get('curator_login'),
|
||||||
'uploader_id': clip.get('curator_display_name'),
|
'uploader_id': clip.get('curator_display_name'),
|
||||||
|
'formats': formats,
|
||||||
}
|
}
|
||||||
|
@ -53,6 +53,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
|
|||||||
"""Provide base functions for Youtube extractors"""
|
"""Provide base functions for Youtube extractors"""
|
||||||
_LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
|
_LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
|
||||||
_TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
|
_TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
|
||||||
|
_PASSWORD_CHALLENGE_URL = 'https://accounts.google.com/signin/challenge/sl/password'
|
||||||
_NETRC_MACHINE = 'youtube'
|
_NETRC_MACHINE = 'youtube'
|
||||||
# If True it will raise an error if no login info is provided
|
# If True it will raise an error if no login info is provided
|
||||||
_LOGIN_REQUIRED = False
|
_LOGIN_REQUIRED = False
|
||||||
@ -116,12 +117,10 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
|
|||||||
'hl': 'en_US',
|
'hl': 'en_US',
|
||||||
}
|
}
|
||||||
|
|
||||||
login_data = urlencode_postdata(login_form_strs)
|
|
||||||
|
|
||||||
req = sanitized_Request(self._LOGIN_URL, login_data)
|
|
||||||
login_results = self._download_webpage(
|
login_results = self._download_webpage(
|
||||||
req, None,
|
self._PASSWORD_CHALLENGE_URL, None,
|
||||||
note='Logging in', errnote='unable to log in', fatal=False)
|
note='Logging in', errnote='unable to log in', fatal=False,
|
||||||
|
data=urlencode_postdata(login_form_strs))
|
||||||
if login_results is False:
|
if login_results is False:
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
@ -2123,6 +2123,7 @@ def mimetype2ext(mt):
|
|||||||
'dash+xml': 'mpd',
|
'dash+xml': 'mpd',
|
||||||
'f4m': 'f4m',
|
'f4m': 'f4m',
|
||||||
'f4m+xml': 'f4m',
|
'f4m+xml': 'f4m',
|
||||||
|
'hds+xml': 'f4m',
|
||||||
'vnd.ms-sstr+xml': 'ism',
|
'vnd.ms-sstr+xml': 'ism',
|
||||||
}.get(res, res)
|
}.get(res, res)
|
||||||
|
|
||||||
|
@ -1,3 +1,3 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
__version__ = '2016.07.17'
|
__version__ = '2016.07.28'
|
||||||
|
Loading…
x
Reference in New Issue
Block a user