This commit is contained in:
Gilles Habran 2016-04-15 15:46:32 +02:00
commit 458af683d8
22 changed files with 412 additions and 182 deletions

View File

@ -6,8 +6,8 @@
---
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.04.06*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.04.06**
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.04.13*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.04.13**
### Before submitting an *issue* make sure you have:
- [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
@ -35,7 +35,7 @@ $ youtube-dl -v <your command line>
[debug] User config: []
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
[debug] youtube-dl version 2016.04.06
[debug] youtube-dl version 2016.04.13
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
[debug] Proxy map: {}

View File

@ -140,14 +140,14 @@ After you have ensured this site is distributing it's content legally, you can f
# TODO more properties (see youtube_dl/extractor/common.py)
}
```
5. Add an import in [`youtube_dl/extractor/__init__.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/__init__.py).
5. Add an import in [`youtube_dl/extractor/extractors.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/extractors.py).
6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc.
7. Have a look at [`youtube_dl/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/rg3/youtube-dl/blob/58525c94d547be1c8167d16c298bdd75506db328/youtube_dl/extractor/common.py#L68-L226). Add tests and code for as many as you want.
8. Keep in mind that the only mandatory fields in info dict for successful extraction process are `id`, `title` and either `url` or `formats`, i.e. these are the critical data the extraction does not make any sense without. This means that [any field](https://github.com/rg3/youtube-dl/blob/58525c94d547be1c8167d16c298bdd75506db328/youtube_dl/extractor/common.py#L138-L226) apart from aforementioned mandatory ones should be treated **as optional** and extraction should be **tolerate** to situations when sources for these fields can potentially be unavailable (even if they always available at the moment) and **future-proof** in order not to break the extraction of general purpose mandatory fields. For example, if you have some intermediate dict `meta` that is a source of metadata and it has a key `summary` that you want to extract and put into resulting info dict as `description`, you should be ready that this key may be missing from the `meta` dict, i.e. you should extract it as `meta.get('summary')` and not `meta['summary']`. Similarly, you should pass `fatal=False` when extracting data from a webpage with `_search_regex/_html_search_regex`.
9. Check the code with [flake8](https://pypi.python.org/pypi/flake8).
10. When the tests pass, [add](http://git-scm.com/docs/git-add) the new files and [commit](http://git-scm.com/docs/git-commit) them and [push](http://git-scm.com/docs/git-push) the result, like this:
$ git add youtube_dl/extractor/__init__.py
$ git add youtube_dl/extractor/extractors.py
$ git add youtube_dl/extractor/yourextractor.py
$ git commit -m '[yourextractor] Add new extractor'
$ git push origin yourextractor

View File

@ -115,6 +115,7 @@
- **Cinemassacre**
- **Clipfish**
- **cliphunter**
- **ClipRs**
- **Clipsyndicate**
- **cloudtime**: CloudTime
- **Cloudy**
@ -286,7 +287,6 @@
- **ivi:compilation**: ivi.ru compilations
- **ivideon**: Ivideon TV
- **Izlesene**
- **JadoreCettePub**
- **JeuxVideo**
- **Jove**
- **jpopsuki.tv**
@ -484,6 +484,7 @@
- **Pornotube**
- **PornoVoisines**
- **PornoXO**
- **PressTV**
- **PrimeShareTV**
- **PromptFile**
- **prosiebensat1**: ProSiebenSat.1 Digital

View File

@ -27,6 +27,8 @@ class RtspFD(FileDownloader):
self.report_error('MMS or RTSP download detected but neither "mplayer" nor "mpv" could be run. Please install any.')
return False
self._debug_cmd(args)
retval = subprocess.call(args)
if retval == 0:
fsize = os.path.getsize(encodeFilename(tmpfilename))

View File

@ -12,9 +12,10 @@ from ..utils import (
class AolIE(InfoExtractor):
IE_NAME = 'on.aol.com'
_VALID_URL = r'(?:aol-video:|https?://on\.aol\.com/video/.*-)(?P<id>[^/?-]+)'
_VALID_URL = r'(?:aol-video:|https?://on\.aol\.com/.*-)(?P<id>[^/?-]+)'
_TESTS = [{
# video with 5min ID
'url': 'http://on.aol.com/video/u-s--official-warns-of-largest-ever-irs-phone-scam-518167793?icid=OnHomepageC2Wide_MustSee_Img',
'md5': '18ef68f48740e86ae94b98da815eec42',
'info_dict': {
@ -31,6 +32,7 @@ class AolIE(InfoExtractor):
'skip_download': True,
}
}, {
# video with vidible ID
'url': 'http://on.aol.com/video/netflix-is-raising-rates-5707d6b8e4b090497b04f706?context=PC:homepage:PL1944:1460189336183',
'info_dict': {
'id': '5707d6b8e4b090497b04f706',
@ -45,6 +47,12 @@ class AolIE(InfoExtractor):
# m3u8 download
'skip_download': True,
}
}, {
'url': 'http://on.aol.com/partners/abc-551438d309eab105804dbfe8/sneak-peek-was-haley-really-framed-570eaebee4b0448640a5c944',
'only_matching': True,
}, {
'url': 'http://on.aol.com/shows/park-bench-shw518173474-559a1b9be4b0c3bfad3357a7?context=SH:SHW518173474:PL4327:1460619712763',
'only_matching': True,
}]
def _real_extract(self, url):

View File

@ -210,7 +210,7 @@ class ArteTVPlus7IE(InfoExtractor):
# It also uses the arte_vp_url url from the webpage to extract the information
class ArteTVCreativeIE(ArteTVPlus7IE):
IE_NAME = 'arte.tv:creative'
_VALID_URL = r'https?://creative\.arte\.tv/(?P<lang>fr|de|en|es)/(?:magazine?/)?(?P<id>[^/?#&]+)'
_VALID_URL = r'https?://creative\.arte\.tv/(?P<lang>fr|de|en|es)/(?:[^/]+/)*(?P<id>[^/?#&]+)'
_TESTS = [{
'url': 'http://creative.arte.tv/de/magazin/agentur-amateur-corporate-design',
@ -229,9 +229,27 @@ class ArteTVCreativeIE(ArteTVPlus7IE):
'description': 'Événement ! Quarante-cinq ans après leurs premiers succès, les légendaires Monty Python remontent sur scène.\n',
'upload_date': '20140805',
}
}, {
'url': 'http://creative.arte.tv/de/episode/agentur-amateur-4-der-erste-kunde',
'only_matching': True,
}]
class ArteTVInfoIE(ArteTVPlus7IE):
IE_NAME = 'arte.tv:info'
_VALID_URL = r'https?://info\.arte\.tv/(?P<lang>fr|de|en|es)/(?:[^/]+/)*(?P<id>[^/?#&]+)'
_TEST = {
'url': 'http://info.arte.tv/fr/service-civique-un-cache-misere',
'info_dict': {
'id': '067528-000-A',
'ext': 'mp4',
'title': 'Service civique, un cache misère ?',
'upload_date': '20160403',
},
}
class ArteTVFutureIE(ArteTVPlus7IE):
IE_NAME = 'arte.tv:future'
_VALID_URL = r'https?://future\.arte\.tv/(?P<lang>fr|de|en|es)/(?P<id>[^/?#&]+)'

View File

@ -340,7 +340,7 @@ class BrightcoveLegacyIE(InfoExtractor):
ext = 'flv'
if ext is None:
ext = determine_ext(url)
tbr = int_or_none(rend.get('encodingRate'), 1000),
tbr = int_or_none(rend.get('encodingRate'), 1000)
a_format = {
'format_id': 'http%s' % ('-%s' % tbr if tbr else ''),
'url': url,

View File

@ -46,6 +46,7 @@ from .arte import (
ArteTVPlus7IE,
ArteTVCreativeIE,
ArteTVConcertIE,
ArteTVInfoIE,
ArteTVFutureIE,
ArteTVCinemaIE,
ArteTVDDCIE,
@ -411,7 +412,12 @@ from .minoto import MinotoIE
from .miomio import MioMioIE
from .mit import TechTVMITIE, MITIE, OCWMITIE
from .mitele import MiTeleIE
from .mixcloud import MixcloudIE
from .mixcloud import (
MixcloudIE,
MixcloudUserIE,
MixcloudPlaylistIE,
MixcloudStreamIE,
)
from .mlb import MLBIE
from .mnet import MnetIE
from .mpora import MporaIE
@ -729,6 +735,7 @@ from .sztvhu import SztvHuIE
from .tagesschau import TagesschauIE
from .tapely import TapelyIE
from .tass import TassIE
from .tdslifeway import TDSLifewayIE
from .teachertube import (
TeacherTubeIE,
TeacherTubeUserIE,
@ -831,7 +838,6 @@ from .twitter import (
TwitterIE,
TwitterAmplifyIE,
)
from .ubu import UbuIE
from .udemy import (
UdemyIE,
UdemyCourseIE
@ -916,7 +922,6 @@ from .vulture import VultureIE
from .walla import WallaIE
from .washingtonpost import WashingtonPostIE
from .wat import WatIE
from .wayofthemaster import WayOfTheMasterIE
from .wdr import (
WDRIE,
WDRMobileIE,

View File

@ -60,6 +60,7 @@ from .googledrive import GoogleDriveIE
from .jwplatform import JWPlatformIE
from .digiteka import DigitekaIE
from .instagram import InstagramIE
from .liveleak import LiveLeakIE
class GenericIE(InfoExtractor):
@ -1140,6 +1141,18 @@ class GenericIE(InfoExtractor):
'upload_date': '20160409',
},
},
# LiveLeak embed
{
'url': 'http://www.wykop.pl/link/3088787/',
'md5': 'ace83b9ed19b21f68e1b50e844fdf95d',
'info_dict': {
'id': '874_1459135191',
'ext': 'mp4',
'title': 'Man shows poor quality of new apartment building',
'description': 'The wall is like a sand pile.',
'uploader': 'Lake8737',
}
},
]
def report_following_redirect(self, new_url):
@ -1944,6 +1957,11 @@ class GenericIE(InfoExtractor):
if instagram_embed_url is not None:
return self.url_result(instagram_embed_url, InstagramIE.ie_key())
# Look for LiveLeak embeds
liveleak_url = LiveLeakIE._extract_url(webpage)
if liveleak_url:
return self.url_result(liveleak_url, 'LiveLeak')
def check_video(vurl):
if YoutubeIE.suitable(vurl):
return True

View File

@ -4,6 +4,7 @@ import re
from .common import InfoExtractor
from ..utils import (
determine_ext,
parse_duration,
unified_strdate,
)
@ -29,7 +30,12 @@ class HuffPostIE(InfoExtractor):
'description': 'This week on Legalese It, Mike talks to David Bosco about his new book on the ICC, "Rough Justice," he also discusses the Virginia AG\'s historic stance on gay marriage, the execution of Edgar Tamayo, the ICC\'s delay of Kenya\'s President and more. ',
'duration': 1549,
'upload_date': '20140124',
}
},
'params': {
# m3u8 download
'skip_download': True,
},
'expected_warnings': ['HTTP Error 404: Not Found'],
}
def _real_extract(self, url):
@ -45,7 +51,7 @@ class HuffPostIE(InfoExtractor):
description = data.get('description')
thumbnails = []
for url in data['images'].values():
for url in filter(None, data['images'].values()):
m = re.match('.*-([0-9]+x[0-9]+)\.', url)
if not m:
continue
@ -54,13 +60,25 @@ class HuffPostIE(InfoExtractor):
'resolution': m.group(1),
})
formats = [{
'format': key,
'format_id': key.replace('/', '.'),
'ext': 'mp4',
'url': url,
'vcodec': 'none' if key.startswith('audio/') else None,
} for key, url in data.get('sources', {}).get('live', {}).items()]
formats = []
sources = data.get('sources', {})
live_sources = list(sources.get('live', {}).items()) + list(sources.get('live_again', {}).items())
for key, url in live_sources:
ext = determine_ext(url)
if ext == 'm3u8':
formats.extend(self._extract_m3u8_formats(
url, video_id, ext='mp4', m3u8_id='hls', fatal=False))
elif ext == 'f4m':
formats.extend(self._extract_f4m_formatsa(
url + '?hdcore=2.9.5', video_id, f4m_id='hds', fatal=False))
else:
formats.append({
'format': key,
'format_id': key.replace('/', '.'),
'ext': 'mp4',
'url': url,
'vcodec': 'none' if key.startswith('audio/') else None,
})
if not formats and data.get('fivemin_id'):
return self.url_result('5min:%s' % data['fivemin_id'])

View File

@ -52,9 +52,12 @@ class KarriereVideosIE(InfoExtractor):
video_id = self._search_regex(
r'/config/video/(.+?)\.xml', webpage, 'video id')
# Server returns malformed headers
# Force Accept-Encoding: * to prevent gzipped results
playlist = self._download_xml(
'http://www.karrierevideos.at/player-playlist.xml.php?p=%s' % video_id,
video_id, transform_source=fix_xml_ampersands)
video_id, transform_source=fix_xml_ampersands,
headers={'Accept-Encoding': '*'})
NS_MAP = {
'jwplayer': 'http://developer.longtailvideo.com/trac/wiki/FlashFormats'

View File

@ -63,6 +63,7 @@ class Laola1TvIE(InfoExtractor):
'params': {
'skip_download': True,
},
'skip': 'This live stream has already finished.',
}]
def _real_extract(self, url):
@ -74,6 +75,9 @@ class Laola1TvIE(InfoExtractor):
webpage = self._download_webpage(url, display_id)
if 'Dieser Livestream ist bereits beendet.' in webpage:
raise ExtractorError('This live stream has already finished.', expected=True)
iframe_url = self._search_regex(
r'<iframe[^>]*?id="videoplayer"[^>]*?src="([^"]+)"',
webpage, 'iframe url')

View File

@ -6,6 +6,7 @@ import re
from .common import InfoExtractor
from ..utils import (
determine_ext,
determine_protocol,
parse_duration,
int_or_none,
)
@ -18,10 +19,14 @@ class Lecture2GoIE(InfoExtractor):
'md5': 'ac02b570883020d208d405d5a3fd2f7f',
'info_dict': {
'id': '17473',
'ext': 'flv',
'ext': 'mp4',
'title': '2 - Endliche Automaten und reguläre Sprachen',
'creator': 'Frank Heitmann',
'duration': 5220,
},
'params': {
# m3u8 download
'skip_download': True,
}
}
@ -32,14 +37,18 @@ class Lecture2GoIE(InfoExtractor):
title = self._html_search_regex(r'<em[^>]+class="title">(.+)</em>', webpage, 'title')
formats = []
for url in set(re.findall(r'"src","([^"]+)"', webpage)):
for url in set(re.findall(r'var\s+playerUri\d+\s*=\s*"([^"]+)"', webpage)):
ext = determine_ext(url)
protocol = determine_protocol({'url': url})
if ext == 'f4m':
formats.extend(self._extract_f4m_formats(url, video_id))
formats.extend(self._extract_f4m_formats(url, video_id, f4m_id='hds'))
elif ext == 'm3u8':
formats.extend(self._extract_m3u8_formats(url, video_id))
formats.extend(self._extract_m3u8_formats(url, video_id, ext='mp4', m3u8_id='hls'))
else:
if protocol == 'rtmp':
continue # XXX: currently broken
formats.append({
'format_id': protocol,
'url': url,
})

View File

@ -53,6 +53,14 @@ class LiveLeakIE(InfoExtractor):
}
}]
@staticmethod
def _extract_url(webpage):
mobj = re.search(
r'<iframe[^>]+src="https?://(?:\w+\.)?liveleak\.com/ll_embed\?(?:.*?)i=(?P<id>[\w_]+)(?:.*)',
webpage)
if mobj:
return 'http://www.liveleak.com/view?i=%s' % mobj.group('id')
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)

View File

@ -11,7 +11,7 @@ from ..utils import (
class MetacriticIE(InfoExtractor):
_VALID_URL = r'https?://www\.metacritic\.com/.+?/trailers/(?P<id>\d+)'
_TEST = {
_TESTS = [{
'url': 'http://www.metacritic.com/game/playstation-4/infamous-second-son/trailers/3698222',
'info_dict': {
'id': '3698222',
@ -20,7 +20,17 @@ class MetacriticIE(InfoExtractor):
'description': 'Take a peak behind-the-scenes to see how Sucker Punch brings smoke into the universe of inFAMOUS Second Son on the PS4.',
'duration': 221,
},
}
'skip': 'Not providing trailers anymore',
}, {
'url': 'http://www.metacritic.com/game/playstation-4/tales-from-the-borderlands-a-telltale-game-series/trailers/5740315',
'info_dict': {
'id': '5740315',
'ext': 'mp4',
'title': 'Tales from the Borderlands - Finale: The Vault of the Traveler',
'description': 'In the final episode of the season, all hell breaks loose. Jack is now in control of Helios\' systems, and he\'s ready to reclaim his rightful place as king of Hyperion (with or without you).',
'duration': 114,
},
}]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)

View File

@ -1,8 +1,5 @@
from __future__ import unicode_literals
import json
import re
from .common import InfoExtractor
from ..utils import (
ExtractorError,
@ -20,21 +17,28 @@ class MinistryGridIE(InfoExtractor):
'id': '3453494717001',
'ext': 'mp4',
'title': 'The Gospel by Numbers',
'thumbnail': 're:^https?://.*\.jpg',
'upload_date': '20140410',
'description': 'Coming soon from T4G 2014!',
'uploader': 'LifeWay Christian Resources (MG)',
'uploader_id': '2034960640001',
'timestamp': 1397145591,
},
'params': {
# m3u8 download
'skip_download': True,
},
'add_ie': ['TDSLifeway'],
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
portlets_json = self._search_regex(
r'Liferay\.Portlet\.list=(\[.+?\])', webpage, 'portlet list')
portlets = json.loads(portlets_json)
portlets = self._parse_json(self._search_regex(
r'Liferay\.Portlet\.list=(\[.+?\])', webpage, 'portlet list'),
video_id)
pl_id = self._search_regex(
r'<!--\s*p_l_id - ([0-9]+)<br>', webpage, 'p_l_id')
r'getPlid:function\(\){return"(\d+)"}', webpage, 'p_l_id')
for i, portlet in enumerate(portlets):
portlet_url = 'http://www.ministrygrid.com/c/portal/render_portlet?p_l_id=%s&p_p_id=%s' % (pl_id, portlet)
@ -46,12 +50,8 @@ class MinistryGridIE(InfoExtractor):
r'<iframe.*?src="([^"]+)"', portlet_code, 'video iframe',
default=None)
if video_iframe_url:
surl = smuggle_url(
video_iframe_url, {'force_videoid': video_id})
return {
'_type': 'url',
'id': video_id,
'url': surl,
}
return self.url_result(
smuggle_url(video_iframe_url, {'force_videoid': video_id}),
video_id=video_id)
raise ExtractorError('Could not find video iframe in any portlets')

View File

@ -1,27 +1,35 @@
from __future__ import unicode_literals
import base64
import functools
import itertools
import re
from .common import InfoExtractor
from ..compat import compat_urllib_parse_unquote
from ..compat import (
compat_chr,
compat_ord,
compat_urllib_parse_unquote,
compat_urlparse,
)
from ..utils import (
clean_html,
ExtractorError,
HEADRequest,
NO_DEFAULT,
OnDemandPagedList,
parse_count,
str_to_int,
)
class MixcloudIE(InfoExtractor):
_VALID_URL = r'^(?:https?://)?(?:www\.)?mixcloud\.com/([^/]+)/([^/]+)'
_VALID_URL = r'^(?:https?://)?(?:www\.)?mixcloud\.com/([^/]+)/(?!stream|uploads|favorites|listens|playlists)([^/]+)'
IE_NAME = 'mixcloud'
_TESTS = [{
'url': 'http://www.mixcloud.com/dholbach/cryptkeeper/',
'info_dict': {
'id': 'dholbach-cryptkeeper',
'ext': 'mp3',
'ext': 'm4a',
'title': 'Cryptkeeper',
'description': 'After quite a long silence from myself, finally another Drum\'n\'Bass mix with my favourite current dance floor bangers.',
'uploader': 'Daniel Holbach',
@ -39,22 +47,22 @@ class MixcloudIE(InfoExtractor):
'description': 'md5:2b8aec6adce69f9d41724647c65875e8',
'uploader': 'Gilles Peterson Worldwide',
'uploader_id': 'gillespeterson',
'thumbnail': 're:https?://.*/images/',
'thumbnail': 're:https?://.*',
'view_count': int,
'like_count': int,
},
}]
def _check_url(self, url, track_id, ext):
try:
# We only want to know if the request succeed
# don't download the whole file
self._request_webpage(
HEADRequest(url), track_id,
'Trying %s URL' % ext)
return True
except ExtractorError:
return False
# See https://www.mixcloud.com/media/js2/www_js_2.9e23256562c080482435196ca3975ab5.js
@staticmethod
def _decrypt_play_info(play_info):
KEY = 'pleasedontdownloadourmusictheartistswontgetpaid'
play_info = base64.b64decode(play_info.encode('ascii'))
return ''.join([
compat_chr(compat_ord(ch) ^ compat_ord(KEY[idx % len(KEY)]))
for idx, ch in enumerate(play_info)])
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
@ -68,19 +76,15 @@ class MixcloudIE(InfoExtractor):
r'(?s)<div[^>]+class="global-message cloudcast-disabled-notice-light"[^>]*>(.+?)<(?:a|/div)',
webpage, 'error message', default=None)
preview_url = self._search_regex(
r'\s(?:data-preview-url|m-preview)="([^"]+)"',
webpage, 'preview url', default=None if message else NO_DEFAULT)
encrypted_play_info = self._search_regex(
r'm-play-info="([^"]+)"', webpage, 'play info')
play_info = self._parse_json(
self._decrypt_play_info(encrypted_play_info), track_id)
if message:
if message and 'stream_url' not in play_info:
raise ExtractorError('%s said: %s' % (self.IE_NAME, message), expected=True)
song_url = re.sub(r'audiocdn(\d+)', r'stream\1', preview_url)
song_url = song_url.replace('/previews/', '/c/originals/')
if not self._check_url(song_url, track_id, 'mp3'):
song_url = song_url.replace('.mp3', '.m4a').replace('originals/', 'm4a/64/')
if not self._check_url(song_url, track_id, 'm4a'):
raise ExtractorError('Unable to extract track url')
song_url = play_info['stream_url']
PREFIX = (
r'm-play-on-spacebar[^>]+'
@ -115,3 +119,201 @@ class MixcloudIE(InfoExtractor):
'view_count': view_count,
'like_count': like_count,
}
class MixcloudPlaylistBaseIE(InfoExtractor):
_PAGE_SIZE = 24
def _find_urls_in_page(self, page):
for url in re.findall(r'm-play-button m-url="(?P<url>[^"]+)"', page):
yield self.url_result(
compat_urlparse.urljoin('https://www.mixcloud.com', clean_html(url)),
MixcloudIE.ie_key())
def _fetch_tracks_page(self, path, video_id, page_name, current_page, real_page_number=None):
real_page_number = real_page_number or current_page + 1
return self._download_webpage(
'https://www.mixcloud.com/%s/' % path, video_id,
note='Download %s (page %d)' % (page_name, current_page + 1),
errnote='Unable to download %s' % page_name,
query={'page': real_page_number, 'list': 'main', '_ajax': '1'},
headers={'X-Requested-With': 'XMLHttpRequest'})
def _tracks_page_func(self, page, video_id, page_name, current_page):
resp = self._fetch_tracks_page(page, video_id, page_name, current_page)
for item in self._find_urls_in_page(resp):
yield item
def _get_user_description(self, page_content):
return self._html_search_regex(
r'<div[^>]+class="description-text"[^>]*>(.+?)</div>',
page_content, 'user description', fatal=False)
class MixcloudUserIE(MixcloudPlaylistBaseIE):
_VALID_URL = r'^(?:https?://)?(?:www\.)?mixcloud\.com/(?P<user>[^/]+)/(?P<type>uploads|favorites|listens)?/?$'
IE_NAME = 'mixcloud:user'
_TESTS = [{
'url': 'http://www.mixcloud.com/dholbach/',
'info_dict': {
'id': 'dholbach_uploads',
'title': 'Daniel Holbach (uploads)',
'description': 'md5:327af72d1efeb404a8216c27240d1370',
},
'playlist_mincount': 11,
}, {
'url': 'http://www.mixcloud.com/dholbach/uploads/',
'info_dict': {
'id': 'dholbach_uploads',
'title': 'Daniel Holbach (uploads)',
'description': 'md5:327af72d1efeb404a8216c27240d1370',
},
'playlist_mincount': 11,
}, {
'url': 'http://www.mixcloud.com/dholbach/favorites/',
'info_dict': {
'id': 'dholbach_favorites',
'title': 'Daniel Holbach (favorites)',
'description': 'md5:327af72d1efeb404a8216c27240d1370',
},
'params': {
'playlist_items': '1-100',
},
'playlist_mincount': 100,
}, {
'url': 'http://www.mixcloud.com/dholbach/listens/',
'info_dict': {
'id': 'dholbach_listens',
'title': 'Daniel Holbach (listens)',
'description': 'md5:327af72d1efeb404a8216c27240d1370',
},
'params': {
'playlist_items': '1-100',
},
'playlist_mincount': 100,
}]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
user_id = mobj.group('user')
list_type = mobj.group('type')
# if only a profile URL was supplied, default to download all uploads
if list_type is None:
list_type = 'uploads'
video_id = '%s_%s' % (user_id, list_type)
profile = self._download_webpage(
'https://www.mixcloud.com/%s/' % user_id, video_id,
note='Downloading user profile',
errnote='Unable to download user profile')
username = self._og_search_title(profile)
description = self._get_user_description(profile)
entries = OnDemandPagedList(
functools.partial(
self._tracks_page_func,
'%s/%s' % (user_id, list_type), video_id, 'list of %s' % list_type),
self._PAGE_SIZE, use_cache=True)
return self.playlist_result(
entries, video_id, '%s (%s)' % (username, list_type), description)
class MixcloudPlaylistIE(MixcloudPlaylistBaseIE):
_VALID_URL = r'^(?:https?://)?(?:www\.)?mixcloud\.com/(?P<user>[^/]+)/playlists/(?P<playlist>[^/]+)/?$'
IE_NAME = 'mixcloud:playlist'
_TESTS = [{
'url': 'https://www.mixcloud.com/RedBullThre3style/playlists/tokyo-finalists-2015/',
'info_dict': {
'id': 'RedBullThre3style_tokyo-finalists-2015',
'title': 'National Champions 2015',
'description': 'md5:6ff5fb01ac76a31abc9b3939c16243a3',
},
'playlist_mincount': 16,
}, {
'url': 'https://www.mixcloud.com/maxvibes/playlists/jazzcat-on-ness-radio/',
'info_dict': {
'id': 'maxvibes_jazzcat-on-ness-radio',
'title': 'Jazzcat on Ness Radio',
'description': 'md5:7bbbf0d6359a0b8cda85224be0f8f263',
},
'playlist_mincount': 23
}]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
user_id = mobj.group('user')
playlist_id = mobj.group('playlist')
video_id = '%s_%s' % (user_id, playlist_id)
profile = self._download_webpage(
url, user_id,
note='Downloading playlist page',
errnote='Unable to download playlist page')
description = self._get_user_description(profile)
playlist_title = self._html_search_regex(
r'<span[^>]+class="[^"]*list-playlist-title[^"]*"[^>]*>(.*?)</span>',
profile, 'playlist title')
entries = OnDemandPagedList(
functools.partial(
self._tracks_page_func,
'%s/playlists/%s' % (user_id, playlist_id), video_id, 'tracklist'),
self._PAGE_SIZE)
return self.playlist_result(entries, video_id, playlist_title, description)
class MixcloudStreamIE(MixcloudPlaylistBaseIE):
_VALID_URL = r'^(?:https?://)?(?:www\.)?mixcloud\.com/(?P<id>[^/]+)/stream/?$'
IE_NAME = 'mixcloud:stream'
_TEST = {
'url': 'https://www.mixcloud.com/FirstEar/stream/',
'info_dict': {
'id': 'FirstEar',
'title': 'First Ear',
'description': 'Curators of good music\nfirstearmusic.com',
},
'playlist_mincount': 192,
}
def _real_extract(self, url):
user_id = self._match_id(url)
webpage = self._download_webpage(url, user_id)
entries = []
prev_page_url = None
def _handle_page(page):
entries.extend(self._find_urls_in_page(page))
return self._search_regex(
r'm-next-page-url="([^"]+)"', page,
'next page URL', default=None)
next_page_url = _handle_page(webpage)
for idx in itertools.count(0):
if not next_page_url or prev_page_url == next_page_url:
break
prev_page_url = next_page_url
current_page = int(self._search_regex(
r'\?page=(\d+)', next_page_url, 'next page number'))
next_page_url = _handle_page(self._fetch_tracks_page(
'%s/stream' % user_id, user_id, 'stream', idx,
real_page_number=current_page))
username = self._og_search_title(webpage)
description = self._get_user_description(webpage)
return self.playlist_result(entries, user_id, username, description)

View File

@ -0,0 +1,33 @@
from __future__ import unicode_literals
from .common import InfoExtractor
class TDSLifewayIE(InfoExtractor):
_VALID_URL = r'https?://tds\.lifeway\.com/v1/trainingdeliverysystem/courses/(?P<id>\d+)/index\.html'
_TEST = {
# From http://www.ministrygrid.com/training-viewer/-/training/t4g-2014-conference/the-gospel-by-numbers-4/the-gospel-by-numbers
'url': 'http://tds.lifeway.com/v1/trainingdeliverysystem/courses/3453494717001/index.html?externalRegistration=AssetId%7C34F466F1-78F3-4619-B2AB-A8EFFA55E9E9%21InstanceId%7C0%21UserId%7Caaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa&grouping=http%3A%2F%2Flifeway.com%2Fvideo%2F3453494717001&activity_id=http%3A%2F%2Flifeway.com%2Fvideo%2F3453494717001&content_endpoint=http%3A%2F%2Ftds.lifeway.com%2Fv1%2Ftrainingdeliverysystem%2FScormEngineInterface%2FTCAPI%2Fcontent%2F&actor=%7B%22name%22%3A%5B%22Guest%20Guest%22%5D%2C%22account%22%3A%5B%7B%22accountServiceHomePage%22%3A%22http%3A%2F%2Fscorm.lifeway.com%2F%22%2C%22accountName%22%3A%22aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa%22%7D%5D%2C%22objectType%22%3A%22Agent%22%7D&content_token=462a50b2-b6f9-4970-99b1-930882c499fb&registration=93d6ec8e-7f7b-4ed3-bbc8-a857913c0b2a&externalConfiguration=access%7CFREE%21adLength%7C-1%21assignOrgId%7C4AE36F78-299A-425D-91EF-E14A899B725F%21assignOrgParentId%7C%21courseId%7C%21isAnonymous%7Cfalse%21previewAsset%7Cfalse%21previewLength%7C-1%21previewMode%7Cfalse%21royalty%7CFREE%21sessionId%7C671422F9-8E79-48D4-9C2C-4EE6111EA1CD%21trackId%7C&auth=Basic%20OjhmZjk5MDBmLTBlYTMtNDJhYS04YjFlLWE4MWQ3NGNkOGRjYw%3D%3D&endpoint=http%3A%2F%2Ftds.lifeway.com%2Fv1%2Ftrainingdeliverysystem%2FScormEngineInterface%2FTCAPI%2F',
'info_dict': {
'id': '3453494717001',
'ext': 'mp4',
'title': 'The Gospel by Numbers',
'thumbnail': 're:^https?://.*\.jpg',
'upload_date': '20140410',
'description': 'Coming soon from T4G 2014!',
'uploader_id': '2034960640001',
'timestamp': 1397145591,
},
'params': {
# m3u8 download
'skip_download': True,
},
'add_ie': ['BrightcoveNew'],
}
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/2034960640001/default_default/index.html?videoId=%s'
def _real_extract(self, url):
brightcove_id = self._match_id(url)
return self.url_result(self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id, 'BrightcoveNew', brightcove_id)

View File

@ -1,57 +0,0 @@
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
int_or_none,
qualities,
)
class UbuIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?ubu\.com/film/(?P<id>[\da-z_-]+)\.html'
_TEST = {
'url': 'http://ubu.com/film/her_noise.html',
'md5': '138d5652618bf0f03878978db9bef1ee',
'info_dict': {
'id': 'her_noise',
'ext': 'm4v',
'title': 'Her Noise - The Making Of (2007)',
'duration': 3600,
},
}
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
title = self._html_search_regex(
r'<title>.+?Film &amp; Video: ([^<]+)</title>', webpage, 'title')
duration = int_or_none(self._html_search_regex(
r'Duration: (\d+) minutes', webpage, 'duration', fatal=False),
invscale=60)
formats = []
FORMAT_REGEXES = [
('sq', r"'flashvars'\s*,\s*'file=([^']+)'"),
('hq', r'href="(http://ubumexico\.centro\.org\.mx/video/[^"]+)"'),
]
preference = qualities([fid for fid, _ in FORMAT_REGEXES])
for format_id, format_regex in FORMAT_REGEXES:
m = re.search(format_regex, webpage)
if m:
formats.append({
'url': m.group(1),
'format_id': format_id,
'preference': preference(format_id),
})
self._sort_formats(formats)
return {
'id': video_id,
'title': title,
'duration': duration,
'formats': formats,
}

View File

@ -1,52 +0,0 @@
from __future__ import unicode_literals
import re
from .common import InfoExtractor
class WayOfTheMasterIE(InfoExtractor):
_VALID_URL = r'https?://www\.wayofthemaster\.com/([^/?#]*/)*(?P<id>[^/?#]+)\.s?html(?:$|[?#])'
_TEST = {
'url': 'http://www.wayofthemaster.com/hbks.shtml',
'md5': '5316b57487ada8480606a93cb3d18d24',
'info_dict': {
'id': 'hbks',
'ext': 'mp4',
'title': 'Intelligent Design vs. Evolution',
},
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id)
title = self._search_regex(
r'<img src="images/title_[^"]+".*?alt="([^"]+)"',
webpage, 'title', default=None)
if title is None:
title = self._html_search_regex(
r'<title>(.*?)</title>', webpage, 'page title')
url_base = self._search_regex(
r'<param\s+name="?movie"?\s+value=".*?/wotm_videoplayer_highlow[0-9]*\.swf\?vid=([^"]+)"',
webpage, 'URL base')
formats = [{
'format_id': 'low',
'quality': 1,
'url': url_base + '_low.mp4',
}, {
'format_id': 'high',
'quality': 2,
'url': url_base + '_high.mp4',
}]
self._sort_formats(formats)
return {
'id': video_id,
'title': title,
'formats': formats,
}

View File

@ -12,7 +12,7 @@ from ..utils import (
class XboxClipsIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?xboxclips\.com/(?:video\.php\?.*vid=|[^/]+/)(?P<id>[\w-]{36})'
_TEST = {
'url': 'https://xboxclips.com/video.php?uid=2533274823424419&gamertag=Iabdulelah&vid=074a69a9-5faf-46aa-b93b-9909c1720325',
'url': 'http://xboxclips.com/video.php?uid=2533274823424419&gamertag=Iabdulelah&vid=074a69a9-5faf-46aa-b93b-9909c1720325',
'md5': 'fbe1ec805e920aeb8eced3c3e657df5d',
'info_dict': {
'id': '074a69a9-5faf-46aa-b93b-9909c1720325',

View File

@ -1,3 +1,3 @@
from __future__ import unicode_literals
__version__ = '2016.04.06'
__version__ = '2016.04.13'