Merge branch 'master' of https://github.com/rg3/youtube-dl into multipart_videos

This commit is contained in:
Mark Lee 2014-04-04 08:56:49 -07:00
commit 7d33af60cd
25 changed files with 664 additions and 131 deletions

View File

@ -3,5 +3,4 @@ include test/*.py
include test/*.json include test/*.json
include youtube-dl.bash-completion include youtube-dl.bash-completion
include youtube-dl.1 include youtube-dl.1
recursive-include docs * recursive-include docs Makefile conf.py *.rst
prune docs/_build

View File

@ -371,7 +371,67 @@ If you want to create a build of youtube-dl yourself, you'll need
### Adding support for a new site ### Adding support for a new site
If you want to add support for a new site, copy *any* [recently modified](https://github.com/rg3/youtube-dl/commits/master/youtube_dl/extractor) file in `youtube_dl/extractor`, add an import in [`youtube_dl/extractor/__init__.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/__init__.py). Have a look at [`youtube_dl/common/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should return](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L38). Don't forget to run the tests with `python test/test_download.py TestDownload.test_YourExtractor`! For a detailed tutorial, refer to [this blog post](http://filippo.io/add-support-for-a-new-video-site-to-youtube-dl/). If you want to add support for a new site, you can follow this quick list (assuming your service is called `yourextractor`):
1. [Fork this repository](https://github.com/rg3/youtube-dl/fork)
2. Check out the source code with `git clone git@github.com:YOUR_GITHUB_USERNAME/youtube-dl.git`
3. Start a new git branch with `cd youtube-dl; git checkout -b yourextractor`
4. Start with this simple template and save it to `youtube_dl/extractor/yourextractor.py`:
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
class YourExtractorIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?yourextractor\.com/watch/(?P<id>[0-9]+)'
_TEST = {
'url': 'http://yourextractor.com/watch/42',
'md5': 'TODO: md5 sum of the first 10KiB of the video file',
'info_dict': {
'id': '42',
'ext': 'mp4',
'title': 'Video title goes here',
# TODO more properties, either as:
# * A value
# * MD5 checksum; start the string with md5:
# * A regular expression; start the string with re:
# * Any Python type (for example int or float)
}
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
# TODO more code goes here, for example ...
webpage = self._download_webpage(url, video_id)
title = self._html_search_regex(r'<h1>(.*?)</h1>', webpage, 'title')
return {
'id': video_id,
'title': title,
# TODO more properties (see youtube_dl/extractor/common.py)
}
5. Add an import in [`youtube_dl/extractor/__init__.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/__init__.py).
6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done.
7. Have a look at [`youtube_dl/common/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should return](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L38). Add tests and code for as many as you want.
8. If you can, check the code with [pyflakes](https://pypi.python.org/pypi/pyflakes) (a good idea) and [pep8](https://pypi.python.org/pypi/pep8) (optional, ignore E501).
9. When the tests pass, [add](https://www.kernel.org/pub/software/scm/git/docs/git-add.html) the new files and [commit](https://www.kernel.org/pub/software/scm/git/docs/git-commit.html) them and [push](https://www.kernel.org/pub/software/scm/git/docs/git-push.html) the result, like this:
$ git add youtube_dl/extractor/__init__.py
$ git add youtube_dl/extractor/yourextractor.py
$ git commit -m '[yourextractor] Add new extractor'
$ git push origin yourextractor
10. Finally, [create a pull request](https://help.github.com/articles/creating-a-pull-request). We'll then review and merge it.
In any case, thank you very much for your contributions!
# BUGS # BUGS

View File

@ -26,16 +26,27 @@ class YDL(FakeYDL):
self.msgs.append(msg) self.msgs.append(msg)
def _make_result(formats, **kwargs):
res = {
'formats': formats,
'id': 'testid',
'title': 'testttitle',
'extractor': 'testex',
}
res.update(**kwargs)
return res
class TestFormatSelection(unittest.TestCase): class TestFormatSelection(unittest.TestCase):
def test_prefer_free_formats(self): def test_prefer_free_formats(self):
# Same resolution => download webm # Same resolution => download webm
ydl = YDL() ydl = YDL()
ydl.params['prefer_free_formats'] = True ydl.params['prefer_free_formats'] = True
formats = [ formats = [
{'ext': 'webm', 'height': 460}, {'ext': 'webm', 'height': 460, 'url': 'x'},
{'ext': 'mp4', 'height': 460}, {'ext': 'mp4', 'height': 460, 'url': 'y'},
] ]
info_dict = {'formats': formats, 'extractor': 'test'} info_dict = _make_result(formats)
yie = YoutubeIE(ydl) yie = YoutubeIE(ydl)
yie._sort_formats(info_dict['formats']) yie._sort_formats(info_dict['formats'])
ydl.process_ie_result(info_dict) ydl.process_ie_result(info_dict)
@ -46,8 +57,8 @@ class TestFormatSelection(unittest.TestCase):
ydl = YDL() ydl = YDL()
ydl.params['prefer_free_formats'] = True ydl.params['prefer_free_formats'] = True
formats = [ formats = [
{'ext': 'webm', 'height': 720}, {'ext': 'webm', 'height': 720, 'url': 'a'},
{'ext': 'mp4', 'height': 1080}, {'ext': 'mp4', 'height': 1080, 'url': 'b'},
] ]
info_dict['formats'] = formats info_dict['formats'] = formats
yie = YoutubeIE(ydl) yie = YoutubeIE(ydl)
@ -60,9 +71,9 @@ class TestFormatSelection(unittest.TestCase):
ydl = YDL() ydl = YDL()
ydl.params['prefer_free_formats'] = False ydl.params['prefer_free_formats'] = False
formats = [ formats = [
{'ext': 'webm', 'height': 720}, {'ext': 'webm', 'height': 720, 'url': '_'},
{'ext': 'mp4', 'height': 720}, {'ext': 'mp4', 'height': 720, 'url': '_'},
{'ext': 'flv', 'height': 720}, {'ext': 'flv', 'height': 720, 'url': '_'},
] ]
info_dict['formats'] = formats info_dict['formats'] = formats
yie = YoutubeIE(ydl) yie = YoutubeIE(ydl)
@ -74,8 +85,8 @@ class TestFormatSelection(unittest.TestCase):
ydl = YDL() ydl = YDL()
ydl.params['prefer_free_formats'] = False ydl.params['prefer_free_formats'] = False
formats = [ formats = [
{'ext': 'flv', 'height': 720}, {'ext': 'flv', 'height': 720, 'url': '_'},
{'ext': 'webm', 'height': 720}, {'ext': 'webm', 'height': 720, 'url': '_'},
] ]
info_dict['formats'] = formats info_dict['formats'] = formats
yie = YoutubeIE(ydl) yie = YoutubeIE(ydl)
@ -91,8 +102,7 @@ class TestFormatSelection(unittest.TestCase):
{'format_id': 'great', 'url': 'http://example.com/great', 'preference': 3}, {'format_id': 'great', 'url': 'http://example.com/great', 'preference': 3},
{'format_id': 'excellent', 'url': 'http://example.com/exc', 'preference': 4}, {'format_id': 'excellent', 'url': 'http://example.com/exc', 'preference': 4},
] ]
info_dict = { info_dict = _make_result(formats)
'formats': formats, 'extractor': 'test', 'id': 'testvid'}
ydl = YDL() ydl = YDL()
ydl.process_ie_result(info_dict) ydl.process_ie_result(info_dict)
@ -120,12 +130,12 @@ class TestFormatSelection(unittest.TestCase):
def test_format_selection(self): def test_format_selection(self):
formats = [ formats = [
{'format_id': '35', 'ext': 'mp4', 'preference': 1}, {'format_id': '35', 'ext': 'mp4', 'preference': 1, 'url': '_'},
{'format_id': '45', 'ext': 'webm', 'preference': 2}, {'format_id': '45', 'ext': 'webm', 'preference': 2, 'url': '_'},
{'format_id': '47', 'ext': 'webm', 'preference': 3}, {'format_id': '47', 'ext': 'webm', 'preference': 3, 'url': '_'},
{'format_id': '2', 'ext': 'flv', 'preference': 4}, {'format_id': '2', 'ext': 'flv', 'preference': 4, 'url': '_'},
] ]
info_dict = {'formats': formats, 'extractor': 'test'} info_dict = _make_result(formats)
ydl = YDL({'format': '20/47'}) ydl = YDL({'format': '20/47'})
ydl.process_ie_result(info_dict.copy()) ydl.process_ie_result(info_dict.copy())
@ -154,12 +164,12 @@ class TestFormatSelection(unittest.TestCase):
def test_format_selection_audio(self): def test_format_selection_audio(self):
formats = [ formats = [
{'format_id': 'audio-low', 'ext': 'webm', 'preference': 1, 'vcodec': 'none'}, {'format_id': 'audio-low', 'ext': 'webm', 'preference': 1, 'vcodec': 'none', 'url': '_'},
{'format_id': 'audio-mid', 'ext': 'webm', 'preference': 2, 'vcodec': 'none'}, {'format_id': 'audio-mid', 'ext': 'webm', 'preference': 2, 'vcodec': 'none', 'url': '_'},
{'format_id': 'audio-high', 'ext': 'flv', 'preference': 3, 'vcodec': 'none'}, {'format_id': 'audio-high', 'ext': 'flv', 'preference': 3, 'vcodec': 'none', 'url': '_'},
{'format_id': 'vid', 'ext': 'mp4', 'preference': 4}, {'format_id': 'vid', 'ext': 'mp4', 'preference': 4, 'url': '_'},
] ]
info_dict = {'formats': formats, 'extractor': 'test'} info_dict = _make_result(formats)
ydl = YDL({'format': 'bestaudio'}) ydl = YDL({'format': 'bestaudio'})
ydl.process_ie_result(info_dict.copy()) ydl.process_ie_result(info_dict.copy())
@ -172,10 +182,10 @@ class TestFormatSelection(unittest.TestCase):
self.assertEqual(downloaded['format_id'], 'audio-low') self.assertEqual(downloaded['format_id'], 'audio-low')
formats = [ formats = [
{'format_id': 'vid-low', 'ext': 'mp4', 'preference': 1}, {'format_id': 'vid-low', 'ext': 'mp4', 'preference': 1, 'url': '_'},
{'format_id': 'vid-high', 'ext': 'mp4', 'preference': 2}, {'format_id': 'vid-high', 'ext': 'mp4', 'preference': 2, 'url': '_'},
] ]
info_dict = {'formats': formats, 'extractor': 'test'} info_dict = _make_result(formats)
ydl = YDL({'format': 'bestaudio/worstaudio/best'}) ydl = YDL({'format': 'bestaudio/worstaudio/best'})
ydl.process_ie_result(info_dict.copy()) ydl.process_ie_result(info_dict.copy())
@ -184,11 +194,11 @@ class TestFormatSelection(unittest.TestCase):
def test_format_selection_video(self): def test_format_selection_video(self):
formats = [ formats = [
{'format_id': 'dash-video-low', 'ext': 'mp4', 'preference': 1, 'acodec': 'none'}, {'format_id': 'dash-video-low', 'ext': 'mp4', 'preference': 1, 'acodec': 'none', 'url': '_'},
{'format_id': 'dash-video-high', 'ext': 'mp4', 'preference': 2, 'acodec': 'none'}, {'format_id': 'dash-video-high', 'ext': 'mp4', 'preference': 2, 'acodec': 'none', 'url': '_'},
{'format_id': 'vid', 'ext': 'mp4', 'preference': 3}, {'format_id': 'vid', 'ext': 'mp4', 'preference': 3, 'url': '_'},
] ]
info_dict = {'formats': formats, 'extractor': 'test'} info_dict = _make_result(formats)
ydl = YDL({'format': 'bestvideo'}) ydl = YDL({'format': 'bestvideo'})
ydl.process_ie_result(info_dict.copy()) ydl.process_ie_result(info_dict.copy())
@ -217,10 +227,12 @@ class TestFormatSelection(unittest.TestCase):
for f1id, f2id in zip(order, order[1:]): for f1id, f2id in zip(order, order[1:]):
f1 = YoutubeIE._formats[f1id].copy() f1 = YoutubeIE._formats[f1id].copy()
f1['format_id'] = f1id f1['format_id'] = f1id
f1['url'] = 'url:' + f1id
f2 = YoutubeIE._formats[f2id].copy() f2 = YoutubeIE._formats[f2id].copy()
f2['format_id'] = f2id f2['format_id'] = f2id
f2['url'] = 'url:' + f2id
info_dict = {'formats': [f1, f2], 'extractor': 'youtube'} info_dict = _make_result([f1, f2], extractor='youtube')
ydl = YDL() ydl = YDL()
yie = YoutubeIE(ydl) yie = YoutubeIE(ydl)
yie._sort_formats(info_dict['formats']) yie._sort_formats(info_dict['formats'])
@ -228,7 +240,7 @@ class TestFormatSelection(unittest.TestCase):
downloaded = ydl.downloaded_info_dicts[0] downloaded = ydl.downloaded_info_dicts[0]
self.assertEqual(downloaded['format_id'], f1id) self.assertEqual(downloaded['format_id'], f1id)
info_dict = {'formats': [f2, f1], 'extractor': 'youtube'} info_dict = _make_result([f2, f1], extractor='youtube')
ydl = YDL() ydl = YDL()
yie = YoutubeIE(ydl) yie = YoutubeIE(ydl)
yie._sort_formats(info_dict['formats']) yie._sort_formats(info_dict['formats'])

View File

@ -153,6 +153,9 @@ class TestAllURLsMatching(unittest.TestCase):
self.assertMatch( self.assertMatch(
'http://thecolbertreport.cc.com/videos/gh6urb/neil-degrasse-tyson-pt--1?xrs=eml_col_031114', 'http://thecolbertreport.cc.com/videos/gh6urb/neil-degrasse-tyson-pt--1?xrs=eml_col_031114',
['ComedyCentralShows']) ['ComedyCentralShows'])
self.assertMatch(
'http://thedailyshow.cc.com/guests/michael-lewis/3efna8/exclusive---michael-lewis-extended-interview-pt--3',
['ComedyCentralShows'])
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()

View File

@ -42,6 +42,7 @@ from youtube_dl.extractor import (
ToypicsUserIE, ToypicsUserIE,
XTubeUserIE, XTubeUserIE,
InstagramUserIE, InstagramUserIE,
CSpanIE,
) )
@ -314,6 +315,19 @@ class TestPlaylists(unittest.TestCase):
} }
expect_info_dict(self, EXPECTED, test_video) expect_info_dict(self, EXPECTED, test_video)
def test_CSpan_playlist(self):
dl = FakeYDL()
ie = CSpanIE(dl)
result = ie.extract(
'http://www.c-span.org/video/?318608-1/gm-ignition-switch-recall')
self.assertIsPlaylist(result)
self.assertEqual(result['id'], '342759')
self.assertEqual(
result['title'], 'General Motors Ignition Switch Recall')
self.assertEqual(len(result['entries']), 9)
whole_duration = sum(e['duration'] for e in result['entries'])
self.assertEqual(whole_duration, 14855)
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()

View File

@ -703,6 +703,11 @@ class YoutubeDL(object):
def process_video_result(self, info_dict, download=True): def process_video_result(self, info_dict, download=True):
assert info_dict.get('_type', 'video') == 'video' assert info_dict.get('_type', 'video') == 'video'
if 'id' not in info_dict:
raise ExtractorError('Missing "id" field in extractor result')
if 'title' not in info_dict:
raise ExtractorError('Missing "title" field in extractor result')
if 'playlist' not in info_dict: if 'playlist' not in info_dict:
# It isn't part of a playlist # It isn't part of a playlist
info_dict['playlist'] = None info_dict['playlist'] = None
@ -734,6 +739,9 @@ class YoutubeDL(object):
# We check that all the formats have the format and format_id fields # We check that all the formats have the format and format_id fields
for i, format in enumerate(formats): for i, format in enumerate(formats):
if 'url' not in format:
raise ExtractorError('Missing "url" key in result (index %d)' % i)
if format.get('format_id') is None: if format.get('format_id') is None:
format['format_id'] = compat_str(i) format['format_id'] = compat_str(i)
if format.get('format') is None: if format.get('format') is None:
@ -744,7 +752,7 @@ class YoutubeDL(object):
) )
# Automatically determine file extension if missing # Automatically determine file extension if missing
if 'ext' not in format: if 'ext' not in format:
format['ext'] = determine_ext(format['url']) format['ext'] = determine_ext(format['url']).lower()
format_limit = self.params.get('format_limit', None) format_limit = self.params.get('format_limit', None)
if format_limit: if format_limit:
@ -869,7 +877,7 @@ class YoutubeDL(object):
try: try:
dn = os.path.dirname(encodeFilename(filename)) dn = os.path.dirname(encodeFilename(filename))
if dn != '' and not os.path.exists(dn): if dn and not os.path.exists(dn):
os.makedirs(dn) os.makedirs(dn)
except (OSError, IOError) as err: except (OSError, IOError) as err:
self.report_error('unable to create directory ' + compat_str(err)) self.report_error('unable to create directory ' + compat_str(err))

View File

@ -4,9 +4,10 @@ import sys
import time import time
from ..utils import ( from ..utils import (
compat_str,
encodeFilename, encodeFilename,
timeconvert,
format_bytes, format_bytes,
timeconvert,
) )
@ -173,7 +174,7 @@ class FileDownloader(object):
return return
os.rename(encodeFilename(old_filename), encodeFilename(new_filename)) os.rename(encodeFilename(old_filename), encodeFilename(new_filename))
except (IOError, OSError) as err: except (IOError, OSError) as err:
self.report_error(u'unable to rename file: %s' % str(err)) self.report_error(u'unable to rename file: %s' % compat_str(err))
def try_utime(self, filename, last_modified_hdr): def try_utime(self, filename, last_modified_hdr):
"""Try to set the last-modified time of the given file.""" """Try to set the last-modified time of the given file."""

View File

@ -297,6 +297,7 @@ class F4mFD(FileDownloader):
break break
frags_filenames.append(frag_filename) frags_filenames.append(frag_filename)
dest_stream.close()
self.report_finish(format_bytes(state['downloaded_bytes']), time.time() - start) self.report_finish(format_bytes(state['downloaded_bytes']), time.time() - start)
self.try_rename(tmpfilename, filename) self.try_rename(tmpfilename, filename)

View File

@ -40,6 +40,7 @@ from .clipfish import ClipfishIE
from .cliphunter import CliphunterIE from .cliphunter import CliphunterIE
from .clipsyndicate import ClipsyndicateIE from .clipsyndicate import ClipsyndicateIE
from .cmt import CMTIE from .cmt import CMTIE
from .cnet import CNETIE
from .cnn import ( from .cnn import (
CNNIE, CNNIE,
CNNBlogsIE, CNNBlogsIE,
@ -83,6 +84,7 @@ from .fktv import (
) )
from .flickr import FlickrIE from .flickr import FlickrIE
from .fourtube import FourTubeIE from .fourtube import FourTubeIE
from .franceculture import FranceCultureIE
from .franceinter import FranceInterIE from .franceinter import FranceInterIE
from .francetv import ( from .francetv import (
PluzzIE, PluzzIE,
@ -152,6 +154,8 @@ from .mixcloud import MixcloudIE
from .mpora import MporaIE from .mpora import MporaIE
from .mofosex import MofosexIE from .mofosex import MofosexIE
from .mooshare import MooshareIE from .mooshare import MooshareIE
from .morningstar import MorningstarIE
from .motorsport import MotorsportIE
from .mtv import ( from .mtv import (
MTVIE, MTVIE,
MTVIggyIE, MTVIggyIE,

View File

@ -2,39 +2,46 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import re import re
import json
from .common import InfoExtractor from .common import InfoExtractor
class C56IE(InfoExtractor): class C56IE(InfoExtractor):
_VALID_URL = r'https?://((www|player)\.)?56\.com/(.+?/)?(v_|(play_album.+-))(?P<textid>.+?)\.(html|swf)' _VALID_URL = r'https?://(?:(?:www|player)\.)?56\.com/(?:.+?/)?(?:v_|(?:play_album.+-))(?P<textid>.+?)\.(?:html|swf)'
IE_NAME = '56.com' IE_NAME = '56.com'
_TEST = { _TEST = {
'url': 'http://www.56.com/u39/v_OTM0NDA3MTY.html', 'url': 'http://www.56.com/u39/v_OTM0NDA3MTY.html',
'file': '93440716.flv',
'md5': 'e59995ac63d0457783ea05f93f12a866', 'md5': 'e59995ac63d0457783ea05f93f12a866',
'info_dict': { 'info_dict': {
'id': '93440716',
'ext': 'flv',
'title': '网事知多少 第32期车怒', 'title': '网事知多少 第32期车怒',
'duration': 283.813,
}, },
} }
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url, flags=re.VERBOSE) mobj = re.match(self._VALID_URL, url, flags=re.VERBOSE)
text_id = mobj.group('textid') text_id = mobj.group('textid')
info_page = self._download_webpage('http://vxml.56.com/json/%s/' % text_id,
text_id, 'Downloading video info') page = self._download_json(
info = json.loads(info_page)['info'] 'http://vxml.56.com/json/%s/' % text_id, text_id, 'Downloading video info')
formats = [{
info = page['info']
formats = [
{
'format_id': f['type'], 'format_id': f['type'],
'filesize': int(f['filesize']), 'filesize': int(f['filesize']),
'url': f['url'] 'url': f['url']
} for f in info['rfiles']] } for f in info['rfiles']
]
self._sort_formats(formats) self._sort_formats(formats)
return { return {
'id': info['vid'], 'id': info['vid'],
'title': info['Subject'], 'title': info['Subject'],
'duration': int(info['duration']) / 1000.0,
'formats': formats, 'formats': formats,
'thumbnail': info.get('bimg') or info.get('img'), 'thumbnail': info.get('bimg') or info.get('img'),
} }

View File

@ -0,0 +1,75 @@
# coding: utf-8
from __future__ import unicode_literals
import json
import re
from .common import InfoExtractor
from ..utils import (
ExtractorError,
int_or_none,
)
class CNETIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?cnet\.com/videos/(?P<id>[^/]+)/'
_TEST = {
'url': 'http://www.cnet.com/videos/hands-on-with-microsofts-windows-8-1-update/',
'md5': '041233212a0d06b179c87cbcca1577b8',
'info_dict': {
'id': '56f4ea68-bd21-4852-b08c-4de5b8354c60',
'ext': 'mp4',
'title': 'Hands-on with Microsoft Windows 8.1 Update',
'description': 'The new update to the Windows 8 OS brings improved performance for mouse and keyboard users.',
'thumbnail': 're:^http://.*/flmswindows8.jpg$',
'uploader_id': 'sarah.mitroff@cbsinteractive.com',
'uploader': 'Sarah Mitroff',
}
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
display_id = mobj.group('id')
webpage = self._download_webpage(url, display_id)
data_json = self._html_search_regex(
r"<div class=\"cnetVideoPlayer\" data-cnet-video-options='([^']+)'",
webpage, 'data json')
data = json.loads(data_json)
vdata = data['video']
if not vdata:
vdata = data['videos'][0]
if not vdata:
raise ExtractorError('Cannot find video data')
video_id = vdata['id']
title = vdata['headline']
description = vdata.get('dek')
thumbnail = vdata.get('image', {}).get('path')
author = vdata.get('author')
if author:
uploader = '%s %s' % (author['firstName'], author['lastName'])
uploader_id = author.get('email')
else:
uploader = None
uploader_id = None
formats = [{
'format_id': '%s-%s-%s' % (
f['type'], f['format'],
int_or_none(f.get('bitrate'), 1000, default='')),
'url': f['uri'],
'tbr': int_or_none(f.get('bitrate'), 1000),
} for f in vdata['files']['data']]
self._sort_formats(formats)
return {
'id': video_id,
'display_id': display_id,
'title': title,
'formats': formats,
'description': description,
'uploader': uploader,
'uploader_id': uploader_id,
'thumbnail': thumbnail,
}

View File

@ -41,9 +41,9 @@ class ComedyCentralShowsIE(InfoExtractor):
_VALID_URL = r'''(?x)^(:(?P<shortname>tds|thedailyshow|cr|colbert|colbertnation|colbertreport) _VALID_URL = r'''(?x)^(:(?P<shortname>tds|thedailyshow|cr|colbert|colbertnation|colbertreport)
|https?://(:www\.)? |https?://(:www\.)?
(?P<showname>thedailyshow|thecolbertreport)\.(?:cc\.)?com/ (?P<showname>thedailyshow|thecolbertreport)\.(?:cc\.)?com/
(full-episodes/(?P<episode>.*)| (full-episodes/(?:[0-9a-z]{6}/)?(?P<episode>.*)|
(?P<clip> (?P<clip>
(?:videos/[^/]+/(?P<videotitle>[^/?#]+)) (?:(?:guests/[^/]+|videos)/[^/]+/(?P<videotitle>[^/?#]+))
|(the-colbert-report-(videos|collections)/(?P<clipID>[0-9]+)/[^/]*/(?P<cntitle>.*?)) |(the-colbert-report-(videos|collections)/(?P<clipID>[0-9]+)/[^/]*/(?P<cntitle>.*?))
|(watch/(?P<date>[^/]*)/(?P<tdstitle>.*)) |(watch/(?P<date>[^/]*)/(?P<tdstitle>.*))
)| )|

View File

@ -256,6 +256,17 @@ class InfoExtractor(object):
outf.write(webpage_bytes) outf.write(webpage_bytes)
content = webpage_bytes.decode(encoding, 'replace') content = webpage_bytes.decode(encoding, 'replace')
if (u'<title>Access to this site is blocked</title>' in content and
u'Websense' in content[:512]):
msg = u'Access to this webpage has been blocked by Websense filtering software in your network.'
blocked_iframe = self._html_search_regex(
r'<iframe src="([^"]+)"', content,
u'Websense information URL', default=None)
if blocked_iframe:
msg += u' Visit %s for more details' % blocked_iframe
raise ExtractorError(msg, expected=True)
return (content, urlh) return (content, urlh)
def _download_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True): def _download_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True):

View File

@ -4,6 +4,7 @@ import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
int_or_none,
unescapeHTML, unescapeHTML,
find_xpath_attr, find_xpath_attr,
) )
@ -54,18 +55,29 @@ class CSpanIE(InfoExtractor):
info_url = 'http://c-spanvideo.org/videoLibrary/assets/player/ajax-player.php?os=android&html5=program&id=' + video_id info_url = 'http://c-spanvideo.org/videoLibrary/assets/player/ajax-player.php?os=android&html5=program&id=' + video_id
data = self._download_json(info_url, video_id) data = self._download_json(info_url, video_id)
url = unescapeHTML(data['video']['files'][0]['path']['#text']) doc = self._download_xml(
'http://www.c-span.org/common/services/flashXml.php?programid=' + video_id,
doc = self._download_xml('http://www.c-span.org/common/services/flashXml.php?programid=' + video_id,
video_id) video_id)
def find_string(s): title = find_xpath_attr(doc, './/string', 'name', 'title').text
return find_xpath_attr(doc, './/string', 'name', s).text thumbnail = find_xpath_attr(doc, './/string', 'name', 'poster').text
files = data['video']['files']
entries = [{
'id': '%s_%d' % (video_id, partnum + 1),
'title': (
title if len(files) == 1 else
'%s part %d' % (title, partnum + 1)),
'url': unescapeHTML(f['path']['#text']),
'description': description,
'thumbnail': thumbnail,
'duration': int_or_none(f.get('length', {}).get('#text')),
} for partnum, f in enumerate(files)]
return { return {
'_type': 'playlist',
'entries': entries,
'title': title,
'id': video_id, 'id': video_id,
'title': find_string('title'),
'url': url,
'description': description,
'thumbnail': find_string('poster'),
} }

View File

@ -180,7 +180,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor):
class DailymotionPlaylistIE(DailymotionBaseInfoExtractor): class DailymotionPlaylistIE(DailymotionBaseInfoExtractor):
IE_NAME = u'dailymotion:playlist' IE_NAME = u'dailymotion:playlist'
_VALID_URL = r'(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/playlist/(?P<id>.+?)/' _VALID_URL = r'(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/playlist/(?P<id>.+?)/'
_MORE_PAGES_INDICATOR = r'<div class="next">.*?<a.*?href="/playlist/.+?".*?>.*?</a>.*?</div>' _MORE_PAGES_INDICATOR = r'(?s)<div class="pages[^"]*">.*?<a\s+class="[^"]*?icon-arrow_right[^"]*?"'
_PAGE_TEMPLATE = 'https://www.dailymotion.com/playlist/%s/%s' _PAGE_TEMPLATE = 'https://www.dailymotion.com/playlist/%s/%s'
def _extract_entries(self, id): def _extract_entries(self, id):
@ -190,10 +190,9 @@ class DailymotionPlaylistIE(DailymotionBaseInfoExtractor):
webpage = self._download_webpage(request, webpage = self._download_webpage(request,
id, u'Downloading page %s' % pagenum) id, u'Downloading page %s' % pagenum)
playlist_el = get_element_by_attribute(u'class', u'row video_list', webpage) video_ids.extend(re.findall(r'data-id="(.+?)"', webpage))
video_ids.extend(re.findall(r'data-id="(.+?)"', playlist_el))
if re.search(self._MORE_PAGES_INDICATOR, webpage, re.DOTALL) is None: if re.search(self._MORE_PAGES_INDICATOR, webpage) is None:
break break
return [self.url_result('http://www.dailymotion.com/video/%s' % video_id, 'Dailymotion') return [self.url_result('http://www.dailymotion.com/video/%s' % video_id, 'Dailymotion')
for video_id in orderedSet(video_ids)] for video_id in orderedSet(video_ids)]
@ -212,8 +211,7 @@ class DailymotionPlaylistIE(DailymotionBaseInfoExtractor):
class DailymotionUserIE(DailymotionPlaylistIE): class DailymotionUserIE(DailymotionPlaylistIE):
IE_NAME = u'dailymotion:user' IE_NAME = u'dailymotion:user'
_VALID_URL = r'(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/user/(?P<user>[^/]+)' _VALID_URL = r'https?://(?:www\.)?dailymotion\.[a-z]{2,3}/user/(?P<user>[^/]+)'
_MORE_PAGES_INDICATOR = r'<div class="next">.*?<a.*?href="/user/.+?".*?>.*?</a>.*?</div>'
_PAGE_TEMPLATE = 'http://www.dailymotion.com/user/%s/%s' _PAGE_TEMPLATE = 'http://www.dailymotion.com/user/%s/%s'
def _real_extract(self, url): def _real_extract(self, url):

View File

@ -0,0 +1,77 @@
# coding: utf-8
from __future__ import unicode_literals
import json
import re
from .common import InfoExtractor
from ..utils import (
compat_parse_qs,
compat_urlparse,
)
class FranceCultureIE(InfoExtractor):
_VALID_URL = r'(?P<baseurl>http://(?:www\.)?franceculture\.fr/)player/reecouter\?play=(?P<id>[0-9]+)'
_TEST = {
'url': 'http://www.franceculture.fr/player/reecouter?play=4795174',
'info_dict': {
'id': '4795174',
'ext': 'mp3',
'title': 'Rendez-vous au pays des geeks',
'vcodec': 'none',
'uploader': 'Colette Fellous',
'upload_date': '20140301',
'duration': 3601,
'thumbnail': r're:^http://www\.franceculture\.fr/.*/images/player/Carnet-nomade\.jpg$',
'description': 'Avec :Jean-Baptiste Péretié pour son documentaire sur Arte "La revanche des « geeks », une enquête menée aux Etats-Unis dans la S ...',
}
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
baseurl = mobj.group('baseurl')
webpage = self._download_webpage(url, video_id)
params_code = self._search_regex(
r"<param name='movie' value='/sites/all/modules/rf/rf_player/swf/loader.swf\?([^']+)' />",
webpage, 'parameter code')
params = compat_parse_qs(params_code)
video_url = compat_urlparse.urljoin(baseurl, params['urlAOD'][0])
title = self._html_search_regex(
r'<h1 class="title[^"]+">(.+?)</h1>', webpage, 'title')
uploader = self._html_search_regex(
r'(?s)<div id="emission".*?<span class="author">(.*?)</span>',
webpage, 'uploader', fatal=False)
thumbnail_part = self._html_search_regex(
r'(?s)<div id="emission".*?<img src="([^"]+)"', webpage,
'thumbnail', fatal=False)
if thumbnail_part is None:
thumbnail = None
else:
thumbnail = compat_urlparse.urljoin(baseurl, thumbnail_part)
description = self._html_search_regex(
r'(?s)<p class="desc">(.*?)</p>', webpage, 'description')
info = json.loads(params['infoData'][0])[0]
duration = info.get('media_length')
upload_date_candidate = info.get('media_section5')
upload_date = (
upload_date_candidate
if (upload_date_candidate is not None and
re.match(r'[0-9]{8}$', upload_date_candidate))
else None)
return {
'id': video_id,
'url': video_url,
'vcodec': 'none' if video_url.lower().endswith('.mp3') else None,
'duration': duration,
'uploader': uploader,
'upload_date': upload_date,
'title': title,
'thumbnail': thumbnail,
'description': description,
}

View File

@ -114,20 +114,6 @@ class GenericIE(InfoExtractor):
'title': '2cc213299525360.mov', # that's what we get 'title': '2cc213299525360.mov', # that's what we get
}, },
}, },
# second style of embedded ooyala videos
{
'url': 'http://www.smh.com.au/tv/business/show/financial-review-sunday/behind-the-scenes-financial-review-sunday--4350201.html',
'info_dict': {
'id': '13djJjYjptA1XpPx8r9kuzPyj3UZH0Uk',
'ext': 'mp4',
'title': 'Behind-the-scenes: Financial Review Sunday ',
'description': 'Step inside Channel Nine studios for an exclusive tour of its upcoming financial business show.',
},
'params': {
# m3u8 download
'skip_download': True,
},
},
# google redirect # google redirect
{ {
'url': 'http://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=web&cd=1&cad=rja&ved=0CCUQtwIwAA&url=http%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DcmQHVoWB5FY&ei=F-sNU-LLCaXk4QT52ICQBQ&usg=AFQjCNEw4hL29zgOohLXvpJ-Bdh2bils1Q&bvm=bv.61965928,d.bGE', 'url': 'http://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=web&cd=1&cad=rja&ved=0CCUQtwIwAA&url=http%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DcmQHVoWB5FY&ei=F-sNU-LLCaXk4QT52ICQBQ&usg=AFQjCNEw4hL29zgOohLXvpJ-Bdh2bils1Q&bvm=bv.61965928,d.bGE',

View File

@ -0,0 +1,55 @@
# coding: utf-8
from __future__ import unicode_literals
import hashlib
import json
import re
import time
from .common import InfoExtractor
from ..utils import (
compat_parse_qs,
compat_str,
int_or_none,
)
class MorningstarIE(InfoExtractor):
IE_DESC = 'morningstar.com'
_VALID_URL = r'https?://(?:www\.)?morningstar\.com/cover/videocenter\.aspx\?id=(?P<id>[0-9]+)'
_TEST = {
'url': 'http://www.morningstar.com/cover/videocenter.aspx?id=615869',
'md5': '6c0acface7a787aadc8391e4bbf7b0f5',
'info_dict': {
'id': '615869',
'ext': 'mp4',
'title': 'Get Ahead of the Curve on 2013 Taxes',
'description': "Vanguard's Joel Dickson on managing higher tax rates for high-income earners and fund capital-gain distributions in 2013.",
'thumbnail': r're:^https?://.*m(?:orning)?star\.com/.+thumb\.jpg$'
}
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id)
title = self._html_search_regex(
r'<h1 id="titleLink">(.*?)</h1>', webpage, 'title')
video_url = self._html_search_regex(
r'<input type="hidden" id="hidVideoUrl" value="([^"]+)"',
webpage, 'video URL')
thumbnail = self._html_search_regex(
r'<input type="hidden" id="hidSnapshot" value="([^"]+)"',
webpage, 'thumbnail', fatal=False)
description = self._html_search_regex(
r'<div id="mstarDeck".*?>(.*?)</div>',
webpage, 'description', fatal=False)
return {
'id': video_id,
'title': title,
'url': video_url,
'thumbnail': thumbnail,
'description': description,
}

View File

@ -0,0 +1,63 @@
# coding: utf-8
from __future__ import unicode_literals
import hashlib
import json
import re
import time
from .common import InfoExtractor
from ..utils import (
compat_parse_qs,
compat_str,
int_or_none,
)
class MotorsportIE(InfoExtractor):
IE_DESC = 'motorsport.com'
_VALID_URL = r'http://www\.motorsport\.com/[^/?#]+/video/(?:[^/?#]+/)(?P<id>[^/]+)/(?:$|[?#])'
_TEST = {
'url': 'http://www.motorsport.com/f1/video/main-gallery/red-bull-racing-2014-rules-explained/',
'md5': '5592cb7c5005d9b2c163df5ac3dc04e4',
'info_dict': {
'id': '7063',
'ext': 'mp4',
'title': 'Red Bull Racing: 2014 Rules Explained',
'duration': 207,
'description': 'A new clip from Red Bull sees Daniel Ricciardo and Sebastian Vettel explain the 2014 Formula One regulations which are arguably the most complex the sport has ever seen.',
'uploader': 'rainiere',
'thumbnail': r're:^http://.*motorsport\.com/.+\.jpg$'
}
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
display_id = mobj.group('id')
webpage = self._download_webpage(url, display_id)
flashvars_code = self._html_search_regex(
r'<embed id="player".*?flashvars="([^"]+)"', webpage, 'flashvars')
flashvars = compat_parse_qs(flashvars_code)
params = json.loads(flashvars['parameters'][0])
e = compat_str(int(time.time()) + 24 * 60 * 60)
base_video_url = params['location'] + '?e=' + e
s = 'h3hg713fh32'
h = hashlib.md5(s + base_video_url).hexdigest()
video_url = base_video_url + '&h=' + h
uploader = self._html_search_regex(
r'(?s)<span class="label">Video by: </span>(.*?)</a>', webpage,
'uploader', fatal=False)
return {
'id': params['video_id'],
'display_id': display_id,
'title': params['title'],
'url': video_url,
'description': params.get('description'),
'thumbnail': params.get('main_thumb'),
'duration': int_or_none(params.get('duration')),
'uploader': uploader,
}

View File

@ -1,44 +1,81 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import re import re
import json
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import compat_urllib_parse from ..utils import int_or_none
class PornHdIE(InfoExtractor): class PornHdIE(InfoExtractor):
_VALID_URL = r'(?:http://)?(?:www\.)?pornhd\.com/(?:[a-z]{2,4}/)?videos/(?P<video_id>[0-9]+)/(?P<video_title>.+)' _VALID_URL = r'http://(?:www\.)?pornhd\.com/(?:[a-z]{2,4}/)?videos/(?P<id>\d+)'
_TEST = { _TEST = {
'url': 'http://www.pornhd.com/videos/1962/sierra-day-gets-his-cum-all-over-herself-hd-porn-video', 'url': 'http://www.pornhd.com/videos/1962/sierra-day-gets-his-cum-all-over-herself-hd-porn-video',
'file': '1962.flv', 'md5': '956b8ca569f7f4d8ec563e2c41598441',
'md5': '35272469887dca97abd30abecc6cdf75',
'info_dict': { 'info_dict': {
"title": "sierra-day-gets-his-cum-all-over-herself-hd-porn-video", 'id': '1962',
"age_limit": 18, 'ext': 'mp4',
'title': 'Sierra loves doing laundry',
'description': 'md5:8ff0523848ac2b8f9b065ba781ccf294',
'age_limit': 18,
} }
} }
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
video_id = mobj.group('video_id')
video_title = mobj.group('video_title')
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
next_url = self._html_search_regex( title = self._og_search_title(webpage)
r'&hd=(http.+?)&', webpage, 'video URL') TITLE_SUFFIX = ' porn HD Video | PornHD.com '
next_url = compat_urllib_parse.unquote(next_url) if title.endswith(TITLE_SUFFIX):
title = title[:-len(TITLE_SUFFIX)]
video_url = self._download_webpage( description = self._html_search_regex(
next_url, video_id, note='Retrieving video URL', r'<div class="description">([^<]+)</div>', webpage, 'description', fatal=False)
errnote='Could not retrieve video URL') view_count = int_or_none(self._html_search_regex(
age_limit = 18 r'(\d+) views </span>', webpage, 'view count', fatal=False))
formats = [
{
'url': url,
'ext': format.lower(),
'format_id': '%s-%s' % (format.lower(), quality.lower()),
'quality': 1 if quality.lower() == 'high' else 0,
} for format, quality, url in re.findall(
r'var __video([\da-zA-Z]+?)(Low|High)StreamUrl = \'(http://.+?)\?noProxy=1\'', webpage)
]
mobj = re.search(r'flashVars = (?P<flashvars>{.+?});', webpage)
if mobj:
flashvars = json.loads(mobj.group('flashvars'))
formats.extend([
{
'url': flashvars['hashlink'].replace('?noProxy=1', ''),
'ext': 'flv',
'format_id': 'flv-low',
'quality': 0,
},
{
'url': flashvars['hd'].replace('?noProxy=1', ''),
'ext': 'flv',
'format_id': 'flv-high',
'quality': 1,
}
])
thumbnail = flashvars['urlWallpaper']
else:
thumbnail = self._og_search_thumbnail(webpage)
self._sort_formats(formats)
return { return {
'id': video_id, 'id': video_id,
'url': video_url, 'title': title,
'ext': 'flv', 'description': description,
'title': video_title, 'thumbnail': thumbnail,
'age_limit': age_limit, 'view_count': view_count,
'formats': formats,
'age_limit': 18,
} }

View File

@ -9,14 +9,16 @@ from ..utils import (
parse_duration, parse_duration,
parse_iso8601, parse_iso8601,
unescapeHTML, unescapeHTML,
compat_str,
) )
class RTSIE(InfoExtractor): class RTSIE(InfoExtractor):
IE_DESC = 'RTS.ch' IE_DESC = 'RTS.ch'
_VALID_URL = r'^https?://(?:www\.)?rts\.ch/archives/tv/[^/]+/(?P<id>[0-9]+)-.*?\.html' _VALID_URL = r'^https?://(?:www\.)?rts\.ch/(?:[^/]+/){2,}(?P<id>[0-9]+)-.*?\.html'
_TEST = { _TESTS = [
{
'url': 'http://www.rts.ch/archives/tv/divers/3449373-les-enfants-terribles.html', 'url': 'http://www.rts.ch/archives/tv/divers/3449373-les-enfants-terribles.html',
'md5': '753b877968ad8afaeddccc374d4256a5', 'md5': '753b877968ad8afaeddccc374d4256a5',
'info_dict': { 'info_dict': {
@ -30,25 +32,110 @@ class RTSIE(InfoExtractor):
'timestamp': -40280400, 'timestamp': -40280400,
'thumbnail': 're:^https?://.*\.image' 'thumbnail': 're:^https?://.*\.image'
}, },
} },
{
'url': 'http://www.rts.ch/emissions/passe-moi-les-jumelles/5624067-entre-ciel-et-mer.html',
'md5': 'c197f0b2421995c63a64cc73d800f42e',
'info_dict': {
'id': '5738317',
'ext': 'mp4',
'duration': 55,
'title': 'Bande de lancement de Passe-moi les jumelles',
'description': '',
'uploader': 'Passe-moi les jumelles',
'upload_date': '20140404',
'timestamp': 1396635300,
'thumbnail': 're:^https?://.*\.image'
},
},
{
'url': 'http://www.rts.ch/video/sport/hockey/5745975-1-2-kloten-fribourg-5-2-second-but-pour-gotteron-par-kwiatowski.html',
'md5': 'b4326fecd3eb64a458ba73c73e91299d',
'info_dict': {
'id': '5745975',
'ext': 'mp4',
'duration': 48,
'title': '1/2, Kloten - Fribourg (5-2): second but pour Gottéron par Kwiatowski',
'description': 'Hockey - Playoff',
'uploader': 'Hockey',
'upload_date': '20140403',
'timestamp': 1396556882,
'thumbnail': 're:^https?://.*\.image'
},
'skip': 'Blocked outside Switzerland',
},
{
'url': 'http://www.rts.ch/video/info/journal-continu/5745356-londres-cachee-par-un-epais-smog.html',
'md5': '9bb06503773c07ce83d3cbd793cebb91',
'info_dict': {
'id': '5745356',
'ext': 'mp4',
'duration': 33,
'title': 'Londres cachée par un épais smog',
'description': 'Un important voile de smog recouvre Londres depuis mercredi, provoqué par la pollution et du sable du Sahara.',
'uploader': 'Le Journal en continu',
'upload_date': '20140403',
'timestamp': 1396537322,
'thumbnail': 're:^https?://.*\.image'
},
},
{
'url': 'http://www.rts.ch/audio/couleur3/programmes/la-belle-video-de-stephane-laurenceau/5706148-urban-hippie-de-damien-krisl-03-04-2014.html',
'md5': 'dd8ef6a22dff163d063e2a52bc8adcae',
'info_dict': {
'id': '5706148',
'ext': 'mp3',
'duration': 123,
'title': '"Urban Hippie", de Damien Krisl',
'description': 'Des Hippies super glam.',
'upload_date': '20140403',
'timestamp': 1396551600,
},
},
]
def _real_extract(self, url): def _real_extract(self, url):
m = re.match(self._VALID_URL, url) m = re.match(self._VALID_URL, url)
video_id = m.group('id') video_id = m.group('id')
all_info = self._download_json( def download_json(video_id):
return self._download_json(
'http://www.rts.ch/a/%s.html?f=json/article' % video_id, video_id) 'http://www.rts.ch/a/%s.html?f=json/article' % video_id, video_id)
info = all_info['video']['JSONinfo']
all_info = download_json(video_id)
# video_id extracted out of URL is not always a real id
if 'video' not in all_info and 'audio' not in all_info:
page = self._download_webpage(url, video_id)
video_id = self._html_search_regex(r'<(?:video|audio) data-id="(\d+)"', page, 'video id')
all_info = download_json(video_id)
info = all_info['video']['JSONinfo'] if 'video' in all_info else all_info['audio']
upload_timestamp = parse_iso8601(info.get('broadcast_date')) upload_timestamp = parse_iso8601(info.get('broadcast_date'))
duration = parse_duration(info.get('duration')) duration = info.get('duration') or info.get('cutout') or info.get('cutduration')
if isinstance(duration, compat_str):
duration = parse_duration(duration)
view_count = info.get('plays')
thumbnail = unescapeHTML(info.get('preview_image_url')) thumbnail = unescapeHTML(info.get('preview_image_url'))
def extract_bitrate(url):
return int_or_none(self._search_regex(
r'-([0-9]+)k\.', url, 'bitrate', default=None))
formats = [{ formats = [{
'format_id': fid, 'format_id': fid,
'url': furl, 'url': furl,
'tbr': int_or_none(self._search_regex( 'tbr': extract_bitrate(furl),
r'-([0-9]+)k\.', furl, 'bitrate', default=None)),
} for fid, furl in info['streams'].items()] } for fid, furl in info['streams'].items()]
if 'media' in info:
formats.extend([{
'format_id': '%s-%sk' % (media['ext'], media['rate']),
'url': 'http://download-video.rts.ch/%s' % media['url'],
'tbr': media['rate'] or extract_bitrate(media['url']),
} for media in info['media'] if media.get('rate')])
self._sort_formats(formats) self._sort_formats(formats)
return { return {
@ -57,6 +144,7 @@ class RTSIE(InfoExtractor):
'title': info['title'], 'title': info['title'],
'description': info.get('intro'), 'description': info.get('intro'),
'duration': duration, 'duration': duration,
'view_count': view_count,
'uploader': info.get('programName'), 'uploader': info.get('programName'),
'timestamp': upload_timestamp, 'timestamp': upload_timestamp,
'thumbnail': thumbnail, 'thumbnail': thumbnail,

View File

@ -3,11 +3,12 @@ from __future__ import unicode_literals
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from .youtube import YoutubeIE
class WimpIE(InfoExtractor): class WimpIE(InfoExtractor):
_VALID_URL = r'http://(?:www\.)?wimp\.com/([^/]+)/' _VALID_URL = r'http://(?:www\.)?wimp\.com/([^/]+)/'
_TEST = { _TESTS = [{
'url': 'http://www.wimp.com/maruexhausted/', 'url': 'http://www.wimp.com/maruexhausted/',
'md5': 'f1acced123ecb28d9bb79f2479f2b6a1', 'md5': 'f1acced123ecb28d9bb79f2479f2b6a1',
'info_dict': { 'info_dict': {
@ -16,7 +17,20 @@ class WimpIE(InfoExtractor):
'title': 'Maru is exhausted.', 'title': 'Maru is exhausted.',
'description': 'md5:57e099e857c0a4ea312542b684a869b8', 'description': 'md5:57e099e857c0a4ea312542b684a869b8',
} }
} }, {
# youtube video
'url': 'http://www.wimp.com/clowncar/',
'info_dict': {
'id': 'cG4CEr2aiSg',
'ext': 'mp4',
'title': 'Basset hound clown car...incredible!',
'description': 'md5:8d228485e0719898c017203f900b3a35',
'uploader': 'Gretchen Hoey',
'uploader_id': 'gretchenandjeff1',
'upload_date': '20140303',
},
'add_ie': ['Youtube'],
}]
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
@ -24,6 +38,13 @@ class WimpIE(InfoExtractor):
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
video_url = self._search_regex( video_url = self._search_regex(
r's1\.addVariable\("file",\s*"([^"]+)"\);', webpage, 'video URL') r's1\.addVariable\("file",\s*"([^"]+)"\);', webpage, 'video URL')
if YoutubeIE.suitable(video_url):
self.to_screen('Found YouTube video')
return {
'_type': 'url',
'url': video_url,
'ie_key': YoutubeIE.ie_key(),
}
return { return {
'id': video_id, 'id': video_id,

View File

@ -1453,7 +1453,8 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
more_widget_html = more['load_more_widget_html'] more_widget_html = more['load_more_widget_html']
playlist_title = self._html_search_regex( playlist_title = self._html_search_regex(
r'<h1 class="pl-header-title">\s*(.*?)\s*</h1>', page, u'title') r'(?s)<h1 class="pl-header-title[^"]*">\s*(.*?)\s*</h1>',
page, u'title')
url_results = self._ids_to_results(ids) url_results = self._ids_to_results(ids)
return self.playlist_result(url_results, playlist_id, playlist_title) return self.playlist_result(url_results, playlist_id, playlist_title)
@ -1753,7 +1754,7 @@ class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
return self.playlist_result(feed_entries, playlist_title=self._PLAYLIST_TITLE) return self.playlist_result(feed_entries, playlist_title=self._PLAYLIST_TITLE)
class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor): class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
IE_DESC = u'YouTube.com subscriptions feed, "ytsubs" keyword(requires authentication)' IE_DESC = u'YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)'
_VALID_URL = r'https?://www\.youtube\.com/feed/subscriptions|:ytsubs(?:criptions)?' _VALID_URL = r'https?://www\.youtube\.com/feed/subscriptions|:ytsubs(?:criptions)?'
_FEED_NAME = 'subscriptions' _FEED_NAME = 'subscriptions'
_PLAYLIST_TITLE = u'Youtube Subscriptions' _PLAYLIST_TITLE = u'Youtube Subscriptions'

View File

@ -1180,12 +1180,12 @@ class HEADRequest(compat_urllib_request.Request):
return "HEAD" return "HEAD"
def int_or_none(v, scale=1): def int_or_none(v, scale=1, default=None):
return v if v is None else (int(v) // scale) return default if v is None else (int(v) // scale)
def float_or_none(v, scale=1): def float_or_none(v, scale=1, default=None):
return v if v is None else (float(v) / scale) return default if v is None else (float(v) / scale)
def parse_duration(s): def parse_duration(s):

View File

@ -1,2 +1,2 @@
__version__ = '2014.04.02' __version__ = '2014.04.04.2'