Merge branch 'master' of https://github.com/rg3/youtube-dl into multipart_videos
This commit is contained in:
commit
7d33af60cd
@ -3,5 +3,4 @@ include test/*.py
|
||||
include test/*.json
|
||||
include youtube-dl.bash-completion
|
||||
include youtube-dl.1
|
||||
recursive-include docs *
|
||||
prune docs/_build
|
||||
recursive-include docs Makefile conf.py *.rst
|
||||
|
62
README.md
62
README.md
@ -371,7 +371,67 @@ If you want to create a build of youtube-dl yourself, you'll need
|
||||
|
||||
### Adding support for a new site
|
||||
|
||||
If you want to add support for a new site, copy *any* [recently modified](https://github.com/rg3/youtube-dl/commits/master/youtube_dl/extractor) file in `youtube_dl/extractor`, add an import in [`youtube_dl/extractor/__init__.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/__init__.py). Have a look at [`youtube_dl/common/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should return](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L38). Don't forget to run the tests with `python test/test_download.py TestDownload.test_YourExtractor`! For a detailed tutorial, refer to [this blog post](http://filippo.io/add-support-for-a-new-video-site-to-youtube-dl/).
|
||||
If you want to add support for a new site, you can follow this quick list (assuming your service is called `yourextractor`):
|
||||
|
||||
1. [Fork this repository](https://github.com/rg3/youtube-dl/fork)
|
||||
2. Check out the source code with `git clone git@github.com:YOUR_GITHUB_USERNAME/youtube-dl.git`
|
||||
3. Start a new git branch with `cd youtube-dl; git checkout -b yourextractor`
|
||||
4. Start with this simple template and save it to `youtube_dl/extractor/yourextractor.py`:
|
||||
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class YourExtractorIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?yourextractor\.com/watch/(?P<id>[0-9]+)'
|
||||
_TEST = {
|
||||
'url': 'http://yourextractor.com/watch/42',
|
||||
'md5': 'TODO: md5 sum of the first 10KiB of the video file',
|
||||
'info_dict': {
|
||||
'id': '42',
|
||||
'ext': 'mp4',
|
||||
'title': 'Video title goes here',
|
||||
# TODO more properties, either as:
|
||||
# * A value
|
||||
# * MD5 checksum; start the string with md5:
|
||||
# * A regular expression; start the string with re:
|
||||
# * Any Python type (for example int or float)
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
# TODO more code goes here, for example ...
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
title = self._html_search_regex(r'<h1>(.*?)</h1>', webpage, 'title')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
# TODO more properties (see youtube_dl/extractor/common.py)
|
||||
}
|
||||
|
||||
|
||||
5. Add an import in [`youtube_dl/extractor/__init__.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/__init__.py).
|
||||
6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done.
|
||||
7. Have a look at [`youtube_dl/common/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should return](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L38). Add tests and code for as many as you want.
|
||||
8. If you can, check the code with [pyflakes](https://pypi.python.org/pypi/pyflakes) (a good idea) and [pep8](https://pypi.python.org/pypi/pep8) (optional, ignore E501).
|
||||
9. When the tests pass, [add](https://www.kernel.org/pub/software/scm/git/docs/git-add.html) the new files and [commit](https://www.kernel.org/pub/software/scm/git/docs/git-commit.html) them and [push](https://www.kernel.org/pub/software/scm/git/docs/git-push.html) the result, like this:
|
||||
|
||||
$ git add youtube_dl/extractor/__init__.py
|
||||
$ git add youtube_dl/extractor/yourextractor.py
|
||||
$ git commit -m '[yourextractor] Add new extractor'
|
||||
$ git push origin yourextractor
|
||||
|
||||
10. Finally, [create a pull request](https://help.github.com/articles/creating-a-pull-request). We'll then review and merge it.
|
||||
|
||||
In any case, thank you very much for your contributions!
|
||||
|
||||
# BUGS
|
||||
|
||||
|
@ -26,16 +26,27 @@ class YDL(FakeYDL):
|
||||
self.msgs.append(msg)
|
||||
|
||||
|
||||
def _make_result(formats, **kwargs):
|
||||
res = {
|
||||
'formats': formats,
|
||||
'id': 'testid',
|
||||
'title': 'testttitle',
|
||||
'extractor': 'testex',
|
||||
}
|
||||
res.update(**kwargs)
|
||||
return res
|
||||
|
||||
|
||||
class TestFormatSelection(unittest.TestCase):
|
||||
def test_prefer_free_formats(self):
|
||||
# Same resolution => download webm
|
||||
ydl = YDL()
|
||||
ydl.params['prefer_free_formats'] = True
|
||||
formats = [
|
||||
{'ext': 'webm', 'height': 460},
|
||||
{'ext': 'mp4', 'height': 460},
|
||||
{'ext': 'webm', 'height': 460, 'url': 'x'},
|
||||
{'ext': 'mp4', 'height': 460, 'url': 'y'},
|
||||
]
|
||||
info_dict = {'formats': formats, 'extractor': 'test'}
|
||||
info_dict = _make_result(formats)
|
||||
yie = YoutubeIE(ydl)
|
||||
yie._sort_formats(info_dict['formats'])
|
||||
ydl.process_ie_result(info_dict)
|
||||
@ -46,8 +57,8 @@ class TestFormatSelection(unittest.TestCase):
|
||||
ydl = YDL()
|
||||
ydl.params['prefer_free_formats'] = True
|
||||
formats = [
|
||||
{'ext': 'webm', 'height': 720},
|
||||
{'ext': 'mp4', 'height': 1080},
|
||||
{'ext': 'webm', 'height': 720, 'url': 'a'},
|
||||
{'ext': 'mp4', 'height': 1080, 'url': 'b'},
|
||||
]
|
||||
info_dict['formats'] = formats
|
||||
yie = YoutubeIE(ydl)
|
||||
@ -60,9 +71,9 @@ class TestFormatSelection(unittest.TestCase):
|
||||
ydl = YDL()
|
||||
ydl.params['prefer_free_formats'] = False
|
||||
formats = [
|
||||
{'ext': 'webm', 'height': 720},
|
||||
{'ext': 'mp4', 'height': 720},
|
||||
{'ext': 'flv', 'height': 720},
|
||||
{'ext': 'webm', 'height': 720, 'url': '_'},
|
||||
{'ext': 'mp4', 'height': 720, 'url': '_'},
|
||||
{'ext': 'flv', 'height': 720, 'url': '_'},
|
||||
]
|
||||
info_dict['formats'] = formats
|
||||
yie = YoutubeIE(ydl)
|
||||
@ -74,8 +85,8 @@ class TestFormatSelection(unittest.TestCase):
|
||||
ydl = YDL()
|
||||
ydl.params['prefer_free_formats'] = False
|
||||
formats = [
|
||||
{'ext': 'flv', 'height': 720},
|
||||
{'ext': 'webm', 'height': 720},
|
||||
{'ext': 'flv', 'height': 720, 'url': '_'},
|
||||
{'ext': 'webm', 'height': 720, 'url': '_'},
|
||||
]
|
||||
info_dict['formats'] = formats
|
||||
yie = YoutubeIE(ydl)
|
||||
@ -91,8 +102,7 @@ class TestFormatSelection(unittest.TestCase):
|
||||
{'format_id': 'great', 'url': 'http://example.com/great', 'preference': 3},
|
||||
{'format_id': 'excellent', 'url': 'http://example.com/exc', 'preference': 4},
|
||||
]
|
||||
info_dict = {
|
||||
'formats': formats, 'extractor': 'test', 'id': 'testvid'}
|
||||
info_dict = _make_result(formats)
|
||||
|
||||
ydl = YDL()
|
||||
ydl.process_ie_result(info_dict)
|
||||
@ -120,12 +130,12 @@ class TestFormatSelection(unittest.TestCase):
|
||||
|
||||
def test_format_selection(self):
|
||||
formats = [
|
||||
{'format_id': '35', 'ext': 'mp4', 'preference': 1},
|
||||
{'format_id': '45', 'ext': 'webm', 'preference': 2},
|
||||
{'format_id': '47', 'ext': 'webm', 'preference': 3},
|
||||
{'format_id': '2', 'ext': 'flv', 'preference': 4},
|
||||
{'format_id': '35', 'ext': 'mp4', 'preference': 1, 'url': '_'},
|
||||
{'format_id': '45', 'ext': 'webm', 'preference': 2, 'url': '_'},
|
||||
{'format_id': '47', 'ext': 'webm', 'preference': 3, 'url': '_'},
|
||||
{'format_id': '2', 'ext': 'flv', 'preference': 4, 'url': '_'},
|
||||
]
|
||||
info_dict = {'formats': formats, 'extractor': 'test'}
|
||||
info_dict = _make_result(formats)
|
||||
|
||||
ydl = YDL({'format': '20/47'})
|
||||
ydl.process_ie_result(info_dict.copy())
|
||||
@ -154,12 +164,12 @@ class TestFormatSelection(unittest.TestCase):
|
||||
|
||||
def test_format_selection_audio(self):
|
||||
formats = [
|
||||
{'format_id': 'audio-low', 'ext': 'webm', 'preference': 1, 'vcodec': 'none'},
|
||||
{'format_id': 'audio-mid', 'ext': 'webm', 'preference': 2, 'vcodec': 'none'},
|
||||
{'format_id': 'audio-high', 'ext': 'flv', 'preference': 3, 'vcodec': 'none'},
|
||||
{'format_id': 'vid', 'ext': 'mp4', 'preference': 4},
|
||||
{'format_id': 'audio-low', 'ext': 'webm', 'preference': 1, 'vcodec': 'none', 'url': '_'},
|
||||
{'format_id': 'audio-mid', 'ext': 'webm', 'preference': 2, 'vcodec': 'none', 'url': '_'},
|
||||
{'format_id': 'audio-high', 'ext': 'flv', 'preference': 3, 'vcodec': 'none', 'url': '_'},
|
||||
{'format_id': 'vid', 'ext': 'mp4', 'preference': 4, 'url': '_'},
|
||||
]
|
||||
info_dict = {'formats': formats, 'extractor': 'test'}
|
||||
info_dict = _make_result(formats)
|
||||
|
||||
ydl = YDL({'format': 'bestaudio'})
|
||||
ydl.process_ie_result(info_dict.copy())
|
||||
@ -172,10 +182,10 @@ class TestFormatSelection(unittest.TestCase):
|
||||
self.assertEqual(downloaded['format_id'], 'audio-low')
|
||||
|
||||
formats = [
|
||||
{'format_id': 'vid-low', 'ext': 'mp4', 'preference': 1},
|
||||
{'format_id': 'vid-high', 'ext': 'mp4', 'preference': 2},
|
||||
{'format_id': 'vid-low', 'ext': 'mp4', 'preference': 1, 'url': '_'},
|
||||
{'format_id': 'vid-high', 'ext': 'mp4', 'preference': 2, 'url': '_'},
|
||||
]
|
||||
info_dict = {'formats': formats, 'extractor': 'test'}
|
||||
info_dict = _make_result(formats)
|
||||
|
||||
ydl = YDL({'format': 'bestaudio/worstaudio/best'})
|
||||
ydl.process_ie_result(info_dict.copy())
|
||||
@ -184,11 +194,11 @@ class TestFormatSelection(unittest.TestCase):
|
||||
|
||||
def test_format_selection_video(self):
|
||||
formats = [
|
||||
{'format_id': 'dash-video-low', 'ext': 'mp4', 'preference': 1, 'acodec': 'none'},
|
||||
{'format_id': 'dash-video-high', 'ext': 'mp4', 'preference': 2, 'acodec': 'none'},
|
||||
{'format_id': 'vid', 'ext': 'mp4', 'preference': 3},
|
||||
{'format_id': 'dash-video-low', 'ext': 'mp4', 'preference': 1, 'acodec': 'none', 'url': '_'},
|
||||
{'format_id': 'dash-video-high', 'ext': 'mp4', 'preference': 2, 'acodec': 'none', 'url': '_'},
|
||||
{'format_id': 'vid', 'ext': 'mp4', 'preference': 3, 'url': '_'},
|
||||
]
|
||||
info_dict = {'formats': formats, 'extractor': 'test'}
|
||||
info_dict = _make_result(formats)
|
||||
|
||||
ydl = YDL({'format': 'bestvideo'})
|
||||
ydl.process_ie_result(info_dict.copy())
|
||||
@ -217,10 +227,12 @@ class TestFormatSelection(unittest.TestCase):
|
||||
for f1id, f2id in zip(order, order[1:]):
|
||||
f1 = YoutubeIE._formats[f1id].copy()
|
||||
f1['format_id'] = f1id
|
||||
f1['url'] = 'url:' + f1id
|
||||
f2 = YoutubeIE._formats[f2id].copy()
|
||||
f2['format_id'] = f2id
|
||||
f2['url'] = 'url:' + f2id
|
||||
|
||||
info_dict = {'formats': [f1, f2], 'extractor': 'youtube'}
|
||||
info_dict = _make_result([f1, f2], extractor='youtube')
|
||||
ydl = YDL()
|
||||
yie = YoutubeIE(ydl)
|
||||
yie._sort_formats(info_dict['formats'])
|
||||
@ -228,7 +240,7 @@ class TestFormatSelection(unittest.TestCase):
|
||||
downloaded = ydl.downloaded_info_dicts[0]
|
||||
self.assertEqual(downloaded['format_id'], f1id)
|
||||
|
||||
info_dict = {'formats': [f2, f1], 'extractor': 'youtube'}
|
||||
info_dict = _make_result([f2, f1], extractor='youtube')
|
||||
ydl = YDL()
|
||||
yie = YoutubeIE(ydl)
|
||||
yie._sort_formats(info_dict['formats'])
|
||||
|
@ -153,6 +153,9 @@ class TestAllURLsMatching(unittest.TestCase):
|
||||
self.assertMatch(
|
||||
'http://thecolbertreport.cc.com/videos/gh6urb/neil-degrasse-tyson-pt--1?xrs=eml_col_031114',
|
||||
['ComedyCentralShows'])
|
||||
self.assertMatch(
|
||||
'http://thedailyshow.cc.com/guests/michael-lewis/3efna8/exclusive---michael-lewis-extended-interview-pt--3',
|
||||
['ComedyCentralShows'])
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
@ -42,6 +42,7 @@ from youtube_dl.extractor import (
|
||||
ToypicsUserIE,
|
||||
XTubeUserIE,
|
||||
InstagramUserIE,
|
||||
CSpanIE,
|
||||
)
|
||||
|
||||
|
||||
@ -314,6 +315,19 @@ class TestPlaylists(unittest.TestCase):
|
||||
}
|
||||
expect_info_dict(self, EXPECTED, test_video)
|
||||
|
||||
def test_CSpan_playlist(self):
|
||||
dl = FakeYDL()
|
||||
ie = CSpanIE(dl)
|
||||
result = ie.extract(
|
||||
'http://www.c-span.org/video/?318608-1/gm-ignition-switch-recall')
|
||||
self.assertIsPlaylist(result)
|
||||
self.assertEqual(result['id'], '342759')
|
||||
self.assertEqual(
|
||||
result['title'], 'General Motors Ignition Switch Recall')
|
||||
self.assertEqual(len(result['entries']), 9)
|
||||
whole_duration = sum(e['duration'] for e in result['entries'])
|
||||
self.assertEqual(whole_duration, 14855)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
@ -703,6 +703,11 @@ class YoutubeDL(object):
|
||||
def process_video_result(self, info_dict, download=True):
|
||||
assert info_dict.get('_type', 'video') == 'video'
|
||||
|
||||
if 'id' not in info_dict:
|
||||
raise ExtractorError('Missing "id" field in extractor result')
|
||||
if 'title' not in info_dict:
|
||||
raise ExtractorError('Missing "title" field in extractor result')
|
||||
|
||||
if 'playlist' not in info_dict:
|
||||
# It isn't part of a playlist
|
||||
info_dict['playlist'] = None
|
||||
@ -734,6 +739,9 @@ class YoutubeDL(object):
|
||||
|
||||
# We check that all the formats have the format and format_id fields
|
||||
for i, format in enumerate(formats):
|
||||
if 'url' not in format:
|
||||
raise ExtractorError('Missing "url" key in result (index %d)' % i)
|
||||
|
||||
if format.get('format_id') is None:
|
||||
format['format_id'] = compat_str(i)
|
||||
if format.get('format') is None:
|
||||
@ -744,7 +752,7 @@ class YoutubeDL(object):
|
||||
)
|
||||
# Automatically determine file extension if missing
|
||||
if 'ext' not in format:
|
||||
format['ext'] = determine_ext(format['url'])
|
||||
format['ext'] = determine_ext(format['url']).lower()
|
||||
|
||||
format_limit = self.params.get('format_limit', None)
|
||||
if format_limit:
|
||||
@ -869,7 +877,7 @@ class YoutubeDL(object):
|
||||
|
||||
try:
|
||||
dn = os.path.dirname(encodeFilename(filename))
|
||||
if dn != '' and not os.path.exists(dn):
|
||||
if dn and not os.path.exists(dn):
|
||||
os.makedirs(dn)
|
||||
except (OSError, IOError) as err:
|
||||
self.report_error('unable to create directory ' + compat_str(err))
|
||||
|
@ -4,9 +4,10 @@ import sys
|
||||
import time
|
||||
|
||||
from ..utils import (
|
||||
compat_str,
|
||||
encodeFilename,
|
||||
timeconvert,
|
||||
format_bytes,
|
||||
timeconvert,
|
||||
)
|
||||
|
||||
|
||||
@ -173,7 +174,7 @@ class FileDownloader(object):
|
||||
return
|
||||
os.rename(encodeFilename(old_filename), encodeFilename(new_filename))
|
||||
except (IOError, OSError) as err:
|
||||
self.report_error(u'unable to rename file: %s' % str(err))
|
||||
self.report_error(u'unable to rename file: %s' % compat_str(err))
|
||||
|
||||
def try_utime(self, filename, last_modified_hdr):
|
||||
"""Try to set the last-modified time of the given file."""
|
||||
|
@ -297,6 +297,7 @@ class F4mFD(FileDownloader):
|
||||
break
|
||||
frags_filenames.append(frag_filename)
|
||||
|
||||
dest_stream.close()
|
||||
self.report_finish(format_bytes(state['downloaded_bytes']), time.time() - start)
|
||||
|
||||
self.try_rename(tmpfilename, filename)
|
||||
|
@ -40,6 +40,7 @@ from .clipfish import ClipfishIE
|
||||
from .cliphunter import CliphunterIE
|
||||
from .clipsyndicate import ClipsyndicateIE
|
||||
from .cmt import CMTIE
|
||||
from .cnet import CNETIE
|
||||
from .cnn import (
|
||||
CNNIE,
|
||||
CNNBlogsIE,
|
||||
@ -83,6 +84,7 @@ from .fktv import (
|
||||
)
|
||||
from .flickr import FlickrIE
|
||||
from .fourtube import FourTubeIE
|
||||
from .franceculture import FranceCultureIE
|
||||
from .franceinter import FranceInterIE
|
||||
from .francetv import (
|
||||
PluzzIE,
|
||||
@ -152,6 +154,8 @@ from .mixcloud import MixcloudIE
|
||||
from .mpora import MporaIE
|
||||
from .mofosex import MofosexIE
|
||||
from .mooshare import MooshareIE
|
||||
from .morningstar import MorningstarIE
|
||||
from .motorsport import MotorsportIE
|
||||
from .mtv import (
|
||||
MTVIE,
|
||||
MTVIggyIE,
|
||||
|
@ -2,39 +2,46 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class C56IE(InfoExtractor):
|
||||
_VALID_URL = r'https?://((www|player)\.)?56\.com/(.+?/)?(v_|(play_album.+-))(?P<textid>.+?)\.(html|swf)'
|
||||
_VALID_URL = r'https?://(?:(?:www|player)\.)?56\.com/(?:.+?/)?(?:v_|(?:play_album.+-))(?P<textid>.+?)\.(?:html|swf)'
|
||||
IE_NAME = '56.com'
|
||||
_TEST = {
|
||||
'url': 'http://www.56.com/u39/v_OTM0NDA3MTY.html',
|
||||
'file': '93440716.flv',
|
||||
'md5': 'e59995ac63d0457783ea05f93f12a866',
|
||||
'info_dict': {
|
||||
'id': '93440716',
|
||||
'ext': 'flv',
|
||||
'title': '网事知多少 第32期:车怒',
|
||||
'duration': 283.813,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url, flags=re.VERBOSE)
|
||||
text_id = mobj.group('textid')
|
||||
info_page = self._download_webpage('http://vxml.56.com/json/%s/' % text_id,
|
||||
text_id, 'Downloading video info')
|
||||
info = json.loads(info_page)['info']
|
||||
formats = [{
|
||||
'format_id': f['type'],
|
||||
'filesize': int(f['filesize']),
|
||||
'url': f['url']
|
||||
} for f in info['rfiles']]
|
||||
|
||||
page = self._download_json(
|
||||
'http://vxml.56.com/json/%s/' % text_id, text_id, 'Downloading video info')
|
||||
|
||||
info = page['info']
|
||||
|
||||
formats = [
|
||||
{
|
||||
'format_id': f['type'],
|
||||
'filesize': int(f['filesize']),
|
||||
'url': f['url']
|
||||
} for f in info['rfiles']
|
||||
]
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': info['vid'],
|
||||
'title': info['Subject'],
|
||||
'duration': int(info['duration']) / 1000.0,
|
||||
'formats': formats,
|
||||
'thumbnail': info.get('bimg') or info.get('img'),
|
||||
}
|
||||
|
75
youtube_dl/extractor/cnet.py
Normal file
75
youtube_dl/extractor/cnet.py
Normal file
@ -0,0 +1,75 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
)
|
||||
|
||||
|
||||
class CNETIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?cnet\.com/videos/(?P<id>[^/]+)/'
|
||||
_TEST = {
|
||||
'url': 'http://www.cnet.com/videos/hands-on-with-microsofts-windows-8-1-update/',
|
||||
'md5': '041233212a0d06b179c87cbcca1577b8',
|
||||
'info_dict': {
|
||||
'id': '56f4ea68-bd21-4852-b08c-4de5b8354c60',
|
||||
'ext': 'mp4',
|
||||
'title': 'Hands-on with Microsoft Windows 8.1 Update',
|
||||
'description': 'The new update to the Windows 8 OS brings improved performance for mouse and keyboard users.',
|
||||
'thumbnail': 're:^http://.*/flmswindows8.jpg$',
|
||||
'uploader_id': 'sarah.mitroff@cbsinteractive.com',
|
||||
'uploader': 'Sarah Mitroff',
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
display_id = mobj.group('id')
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
data_json = self._html_search_regex(
|
||||
r"<div class=\"cnetVideoPlayer\" data-cnet-video-options='([^']+)'",
|
||||
webpage, 'data json')
|
||||
data = json.loads(data_json)
|
||||
vdata = data['video']
|
||||
if not vdata:
|
||||
vdata = data['videos'][0]
|
||||
if not vdata:
|
||||
raise ExtractorError('Cannot find video data')
|
||||
|
||||
video_id = vdata['id']
|
||||
title = vdata['headline']
|
||||
description = vdata.get('dek')
|
||||
thumbnail = vdata.get('image', {}).get('path')
|
||||
author = vdata.get('author')
|
||||
if author:
|
||||
uploader = '%s %s' % (author['firstName'], author['lastName'])
|
||||
uploader_id = author.get('email')
|
||||
else:
|
||||
uploader = None
|
||||
uploader_id = None
|
||||
|
||||
formats = [{
|
||||
'format_id': '%s-%s-%s' % (
|
||||
f['type'], f['format'],
|
||||
int_or_none(f.get('bitrate'), 1000, default='')),
|
||||
'url': f['uri'],
|
||||
'tbr': int_or_none(f.get('bitrate'), 1000),
|
||||
} for f in vdata['files']['data']]
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'description': description,
|
||||
'uploader': uploader,
|
||||
'uploader_id': uploader_id,
|
||||
'thumbnail': thumbnail,
|
||||
}
|
@ -41,9 +41,9 @@ class ComedyCentralShowsIE(InfoExtractor):
|
||||
_VALID_URL = r'''(?x)^(:(?P<shortname>tds|thedailyshow|cr|colbert|colbertnation|colbertreport)
|
||||
|https?://(:www\.)?
|
||||
(?P<showname>thedailyshow|thecolbertreport)\.(?:cc\.)?com/
|
||||
(full-episodes/(?P<episode>.*)|
|
||||
(full-episodes/(?:[0-9a-z]{6}/)?(?P<episode>.*)|
|
||||
(?P<clip>
|
||||
(?:videos/[^/]+/(?P<videotitle>[^/?#]+))
|
||||
(?:(?:guests/[^/]+|videos)/[^/]+/(?P<videotitle>[^/?#]+))
|
||||
|(the-colbert-report-(videos|collections)/(?P<clipID>[0-9]+)/[^/]*/(?P<cntitle>.*?))
|
||||
|(watch/(?P<date>[^/]*)/(?P<tdstitle>.*))
|
||||
)|
|
||||
|
@ -256,6 +256,17 @@ class InfoExtractor(object):
|
||||
outf.write(webpage_bytes)
|
||||
|
||||
content = webpage_bytes.decode(encoding, 'replace')
|
||||
|
||||
if (u'<title>Access to this site is blocked</title>' in content and
|
||||
u'Websense' in content[:512]):
|
||||
msg = u'Access to this webpage has been blocked by Websense filtering software in your network.'
|
||||
blocked_iframe = self._html_search_regex(
|
||||
r'<iframe src="([^"]+)"', content,
|
||||
u'Websense information URL', default=None)
|
||||
if blocked_iframe:
|
||||
msg += u' Visit %s for more details' % blocked_iframe
|
||||
raise ExtractorError(msg, expected=True)
|
||||
|
||||
return (content, urlh)
|
||||
|
||||
def _download_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True):
|
||||
|
@ -4,6 +4,7 @@ import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
unescapeHTML,
|
||||
find_xpath_attr,
|
||||
)
|
||||
@ -54,18 +55,29 @@ class CSpanIE(InfoExtractor):
|
||||
info_url = 'http://c-spanvideo.org/videoLibrary/assets/player/ajax-player.php?os=android&html5=program&id=' + video_id
|
||||
data = self._download_json(info_url, video_id)
|
||||
|
||||
url = unescapeHTML(data['video']['files'][0]['path']['#text'])
|
||||
|
||||
doc = self._download_xml('http://www.c-span.org/common/services/flashXml.php?programid=' + video_id,
|
||||
doc = self._download_xml(
|
||||
'http://www.c-span.org/common/services/flashXml.php?programid=' + video_id,
|
||||
video_id)
|
||||
|
||||
def find_string(s):
|
||||
return find_xpath_attr(doc, './/string', 'name', s).text
|
||||
title = find_xpath_attr(doc, './/string', 'name', 'title').text
|
||||
thumbnail = find_xpath_attr(doc, './/string', 'name', 'poster').text
|
||||
|
||||
files = data['video']['files']
|
||||
|
||||
entries = [{
|
||||
'id': '%s_%d' % (video_id, partnum + 1),
|
||||
'title': (
|
||||
title if len(files) == 1 else
|
||||
'%s part %d' % (title, partnum + 1)),
|
||||
'url': unescapeHTML(f['path']['#text']),
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'duration': int_or_none(f.get('length', {}).get('#text')),
|
||||
} for partnum, f in enumerate(files)]
|
||||
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
'entries': entries,
|
||||
'title': title,
|
||||
'id': video_id,
|
||||
'title': find_string('title'),
|
||||
'url': url,
|
||||
'description': description,
|
||||
'thumbnail': find_string('poster'),
|
||||
}
|
||||
|
@ -180,7 +180,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||
class DailymotionPlaylistIE(DailymotionBaseInfoExtractor):
|
||||
IE_NAME = u'dailymotion:playlist'
|
||||
_VALID_URL = r'(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/playlist/(?P<id>.+?)/'
|
||||
_MORE_PAGES_INDICATOR = r'<div class="next">.*?<a.*?href="/playlist/.+?".*?>.*?</a>.*?</div>'
|
||||
_MORE_PAGES_INDICATOR = r'(?s)<div class="pages[^"]*">.*?<a\s+class="[^"]*?icon-arrow_right[^"]*?"'
|
||||
_PAGE_TEMPLATE = 'https://www.dailymotion.com/playlist/%s/%s'
|
||||
|
||||
def _extract_entries(self, id):
|
||||
@ -190,10 +190,9 @@ class DailymotionPlaylistIE(DailymotionBaseInfoExtractor):
|
||||
webpage = self._download_webpage(request,
|
||||
id, u'Downloading page %s' % pagenum)
|
||||
|
||||
playlist_el = get_element_by_attribute(u'class', u'row video_list', webpage)
|
||||
video_ids.extend(re.findall(r'data-id="(.+?)"', playlist_el))
|
||||
video_ids.extend(re.findall(r'data-id="(.+?)"', webpage))
|
||||
|
||||
if re.search(self._MORE_PAGES_INDICATOR, webpage, re.DOTALL) is None:
|
||||
if re.search(self._MORE_PAGES_INDICATOR, webpage) is None:
|
||||
break
|
||||
return [self.url_result('http://www.dailymotion.com/video/%s' % video_id, 'Dailymotion')
|
||||
for video_id in orderedSet(video_ids)]
|
||||
@ -212,8 +211,7 @@ class DailymotionPlaylistIE(DailymotionBaseInfoExtractor):
|
||||
|
||||
class DailymotionUserIE(DailymotionPlaylistIE):
|
||||
IE_NAME = u'dailymotion:user'
|
||||
_VALID_URL = r'(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/user/(?P<user>[^/]+)'
|
||||
_MORE_PAGES_INDICATOR = r'<div class="next">.*?<a.*?href="/user/.+?".*?>.*?</a>.*?</div>'
|
||||
_VALID_URL = r'https?://(?:www\.)?dailymotion\.[a-z]{2,3}/user/(?P<user>[^/]+)'
|
||||
_PAGE_TEMPLATE = 'http://www.dailymotion.com/user/%s/%s'
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
77
youtube_dl/extractor/franceculture.py
Normal file
77
youtube_dl/extractor/franceculture.py
Normal file
@ -0,0 +1,77 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
compat_parse_qs,
|
||||
compat_urlparse,
|
||||
)
|
||||
|
||||
|
||||
class FranceCultureIE(InfoExtractor):
|
||||
_VALID_URL = r'(?P<baseurl>http://(?:www\.)?franceculture\.fr/)player/reecouter\?play=(?P<id>[0-9]+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.franceculture.fr/player/reecouter?play=4795174',
|
||||
'info_dict': {
|
||||
'id': '4795174',
|
||||
'ext': 'mp3',
|
||||
'title': 'Rendez-vous au pays des geeks',
|
||||
'vcodec': 'none',
|
||||
'uploader': 'Colette Fellous',
|
||||
'upload_date': '20140301',
|
||||
'duration': 3601,
|
||||
'thumbnail': r're:^http://www\.franceculture\.fr/.*/images/player/Carnet-nomade\.jpg$',
|
||||
'description': 'Avec :Jean-Baptiste Péretié pour son documentaire sur Arte "La revanche des « geeks », une enquête menée aux Etats-Unis dans la S ...',
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
baseurl = mobj.group('baseurl')
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
params_code = self._search_regex(
|
||||
r"<param name='movie' value='/sites/all/modules/rf/rf_player/swf/loader.swf\?([^']+)' />",
|
||||
webpage, 'parameter code')
|
||||
params = compat_parse_qs(params_code)
|
||||
video_url = compat_urlparse.urljoin(baseurl, params['urlAOD'][0])
|
||||
|
||||
title = self._html_search_regex(
|
||||
r'<h1 class="title[^"]+">(.+?)</h1>', webpage, 'title')
|
||||
uploader = self._html_search_regex(
|
||||
r'(?s)<div id="emission".*?<span class="author">(.*?)</span>',
|
||||
webpage, 'uploader', fatal=False)
|
||||
thumbnail_part = self._html_search_regex(
|
||||
r'(?s)<div id="emission".*?<img src="([^"]+)"', webpage,
|
||||
'thumbnail', fatal=False)
|
||||
if thumbnail_part is None:
|
||||
thumbnail = None
|
||||
else:
|
||||
thumbnail = compat_urlparse.urljoin(baseurl, thumbnail_part)
|
||||
description = self._html_search_regex(
|
||||
r'(?s)<p class="desc">(.*?)</p>', webpage, 'description')
|
||||
|
||||
info = json.loads(params['infoData'][0])[0]
|
||||
duration = info.get('media_length')
|
||||
upload_date_candidate = info.get('media_section5')
|
||||
upload_date = (
|
||||
upload_date_candidate
|
||||
if (upload_date_candidate is not None and
|
||||
re.match(r'[0-9]{8}$', upload_date_candidate))
|
||||
else None)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'vcodec': 'none' if video_url.lower().endswith('.mp3') else None,
|
||||
'duration': duration,
|
||||
'uploader': uploader,
|
||||
'upload_date': upload_date,
|
||||
'title': title,
|
||||
'thumbnail': thumbnail,
|
||||
'description': description,
|
||||
}
|
@ -114,20 +114,6 @@ class GenericIE(InfoExtractor):
|
||||
'title': '2cc213299525360.mov', # that's what we get
|
||||
},
|
||||
},
|
||||
# second style of embedded ooyala videos
|
||||
{
|
||||
'url': 'http://www.smh.com.au/tv/business/show/financial-review-sunday/behind-the-scenes-financial-review-sunday--4350201.html',
|
||||
'info_dict': {
|
||||
'id': '13djJjYjptA1XpPx8r9kuzPyj3UZH0Uk',
|
||||
'ext': 'mp4',
|
||||
'title': 'Behind-the-scenes: Financial Review Sunday ',
|
||||
'description': 'Step inside Channel Nine studios for an exclusive tour of its upcoming financial business show.',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
# google redirect
|
||||
{
|
||||
'url': 'http://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=web&cd=1&cad=rja&ved=0CCUQtwIwAA&url=http%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DcmQHVoWB5FY&ei=F-sNU-LLCaXk4QT52ICQBQ&usg=AFQjCNEw4hL29zgOohLXvpJ-Bdh2bils1Q&bvm=bv.61965928,d.bGE',
|
||||
|
55
youtube_dl/extractor/morningstar.py
Normal file
55
youtube_dl/extractor/morningstar.py
Normal file
@ -0,0 +1,55 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import hashlib
|
||||
import json
|
||||
import re
|
||||
import time
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
compat_parse_qs,
|
||||
compat_str,
|
||||
int_or_none,
|
||||
)
|
||||
|
||||
|
||||
class MorningstarIE(InfoExtractor):
|
||||
IE_DESC = 'morningstar.com'
|
||||
_VALID_URL = r'https?://(?:www\.)?morningstar\.com/cover/videocenter\.aspx\?id=(?P<id>[0-9]+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.morningstar.com/cover/videocenter.aspx?id=615869',
|
||||
'md5': '6c0acface7a787aadc8391e4bbf7b0f5',
|
||||
'info_dict': {
|
||||
'id': '615869',
|
||||
'ext': 'mp4',
|
||||
'title': 'Get Ahead of the Curve on 2013 Taxes',
|
||||
'description': "Vanguard's Joel Dickson on managing higher tax rates for high-income earners and fund capital-gain distributions in 2013.",
|
||||
'thumbnail': r're:^https?://.*m(?:orning)?star\.com/.+thumb\.jpg$'
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
title = self._html_search_regex(
|
||||
r'<h1 id="titleLink">(.*?)</h1>', webpage, 'title')
|
||||
video_url = self._html_search_regex(
|
||||
r'<input type="hidden" id="hidVideoUrl" value="([^"]+)"',
|
||||
webpage, 'video URL')
|
||||
thumbnail = self._html_search_regex(
|
||||
r'<input type="hidden" id="hidSnapshot" value="([^"]+)"',
|
||||
webpage, 'thumbnail', fatal=False)
|
||||
description = self._html_search_regex(
|
||||
r'<div id="mstarDeck".*?>(.*?)</div>',
|
||||
webpage, 'description', fatal=False)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'url': video_url,
|
||||
'thumbnail': thumbnail,
|
||||
'description': description,
|
||||
}
|
63
youtube_dl/extractor/motorsport.py
Normal file
63
youtube_dl/extractor/motorsport.py
Normal file
@ -0,0 +1,63 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import hashlib
|
||||
import json
|
||||
import re
|
||||
import time
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
compat_parse_qs,
|
||||
compat_str,
|
||||
int_or_none,
|
||||
)
|
||||
|
||||
|
||||
class MotorsportIE(InfoExtractor):
|
||||
IE_DESC = 'motorsport.com'
|
||||
_VALID_URL = r'http://www\.motorsport\.com/[^/?#]+/video/(?:[^/?#]+/)(?P<id>[^/]+)/(?:$|[?#])'
|
||||
_TEST = {
|
||||
'url': 'http://www.motorsport.com/f1/video/main-gallery/red-bull-racing-2014-rules-explained/',
|
||||
'md5': '5592cb7c5005d9b2c163df5ac3dc04e4',
|
||||
'info_dict': {
|
||||
'id': '7063',
|
||||
'ext': 'mp4',
|
||||
'title': 'Red Bull Racing: 2014 Rules Explained',
|
||||
'duration': 207,
|
||||
'description': 'A new clip from Red Bull sees Daniel Ricciardo and Sebastian Vettel explain the 2014 Formula One regulations – which are arguably the most complex the sport has ever seen.',
|
||||
'uploader': 'rainiere',
|
||||
'thumbnail': r're:^http://.*motorsport\.com/.+\.jpg$'
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
display_id = mobj.group('id')
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
flashvars_code = self._html_search_regex(
|
||||
r'<embed id="player".*?flashvars="([^"]+)"', webpage, 'flashvars')
|
||||
flashvars = compat_parse_qs(flashvars_code)
|
||||
params = json.loads(flashvars['parameters'][0])
|
||||
|
||||
e = compat_str(int(time.time()) + 24 * 60 * 60)
|
||||
base_video_url = params['location'] + '?e=' + e
|
||||
s = 'h3hg713fh32'
|
||||
h = hashlib.md5(s + base_video_url).hexdigest()
|
||||
video_url = base_video_url + '&h=' + h
|
||||
|
||||
uploader = self._html_search_regex(
|
||||
r'(?s)<span class="label">Video by: </span>(.*?)</a>', webpage,
|
||||
'uploader', fatal=False)
|
||||
|
||||
return {
|
||||
'id': params['video_id'],
|
||||
'display_id': display_id,
|
||||
'title': params['title'],
|
||||
'url': video_url,
|
||||
'description': params.get('description'),
|
||||
'thumbnail': params.get('main_thumb'),
|
||||
'duration': int_or_none(params.get('duration')),
|
||||
'uploader': uploader,
|
||||
}
|
@ -1,44 +1,81 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import compat_urllib_parse
|
||||
from ..utils import int_or_none
|
||||
|
||||
|
||||
class PornHdIE(InfoExtractor):
|
||||
_VALID_URL = r'(?:http://)?(?:www\.)?pornhd\.com/(?:[a-z]{2,4}/)?videos/(?P<video_id>[0-9]+)/(?P<video_title>.+)'
|
||||
_VALID_URL = r'http://(?:www\.)?pornhd\.com/(?:[a-z]{2,4}/)?videos/(?P<id>\d+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.pornhd.com/videos/1962/sierra-day-gets-his-cum-all-over-herself-hd-porn-video',
|
||||
'file': '1962.flv',
|
||||
'md5': '35272469887dca97abd30abecc6cdf75',
|
||||
'md5': '956b8ca569f7f4d8ec563e2c41598441',
|
||||
'info_dict': {
|
||||
"title": "sierra-day-gets-his-cum-all-over-herself-hd-porn-video",
|
||||
"age_limit": 18,
|
||||
'id': '1962',
|
||||
'ext': 'mp4',
|
||||
'title': 'Sierra loves doing laundry',
|
||||
'description': 'md5:8ff0523848ac2b8f9b065ba781ccf294',
|
||||
'age_limit': 18,
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
|
||||
video_id = mobj.group('video_id')
|
||||
video_title = mobj.group('video_title')
|
||||
video_id = mobj.group('id')
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
next_url = self._html_search_regex(
|
||||
r'&hd=(http.+?)&', webpage, 'video URL')
|
||||
next_url = compat_urllib_parse.unquote(next_url)
|
||||
title = self._og_search_title(webpage)
|
||||
TITLE_SUFFIX = ' porn HD Video | PornHD.com '
|
||||
if title.endswith(TITLE_SUFFIX):
|
||||
title = title[:-len(TITLE_SUFFIX)]
|
||||
|
||||
video_url = self._download_webpage(
|
||||
next_url, video_id, note='Retrieving video URL',
|
||||
errnote='Could not retrieve video URL')
|
||||
age_limit = 18
|
||||
description = self._html_search_regex(
|
||||
r'<div class="description">([^<]+)</div>', webpage, 'description', fatal=False)
|
||||
view_count = int_or_none(self._html_search_regex(
|
||||
r'(\d+) views </span>', webpage, 'view count', fatal=False))
|
||||
|
||||
formats = [
|
||||
{
|
||||
'url': url,
|
||||
'ext': format.lower(),
|
||||
'format_id': '%s-%s' % (format.lower(), quality.lower()),
|
||||
'quality': 1 if quality.lower() == 'high' else 0,
|
||||
} for format, quality, url in re.findall(
|
||||
r'var __video([\da-zA-Z]+?)(Low|High)StreamUrl = \'(http://.+?)\?noProxy=1\'', webpage)
|
||||
]
|
||||
|
||||
mobj = re.search(r'flashVars = (?P<flashvars>{.+?});', webpage)
|
||||
if mobj:
|
||||
flashvars = json.loads(mobj.group('flashvars'))
|
||||
formats.extend([
|
||||
{
|
||||
'url': flashvars['hashlink'].replace('?noProxy=1', ''),
|
||||
'ext': 'flv',
|
||||
'format_id': 'flv-low',
|
||||
'quality': 0,
|
||||
},
|
||||
{
|
||||
'url': flashvars['hd'].replace('?noProxy=1', ''),
|
||||
'ext': 'flv',
|
||||
'format_id': 'flv-high',
|
||||
'quality': 1,
|
||||
}
|
||||
])
|
||||
thumbnail = flashvars['urlWallpaper']
|
||||
else:
|
||||
thumbnail = self._og_search_thumbnail(webpage)
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'ext': 'flv',
|
||||
'title': video_title,
|
||||
'age_limit': age_limit,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'view_count': view_count,
|
||||
'formats': formats,
|
||||
'age_limit': 18,
|
||||
}
|
||||
|
@ -9,46 +9,133 @@ from ..utils import (
|
||||
parse_duration,
|
||||
parse_iso8601,
|
||||
unescapeHTML,
|
||||
compat_str,
|
||||
)
|
||||
|
||||
|
||||
class RTSIE(InfoExtractor):
|
||||
IE_DESC = 'RTS.ch'
|
||||
_VALID_URL = r'^https?://(?:www\.)?rts\.ch/archives/tv/[^/]+/(?P<id>[0-9]+)-.*?\.html'
|
||||
_VALID_URL = r'^https?://(?:www\.)?rts\.ch/(?:[^/]+/){2,}(?P<id>[0-9]+)-.*?\.html'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.rts.ch/archives/tv/divers/3449373-les-enfants-terribles.html',
|
||||
'md5': '753b877968ad8afaeddccc374d4256a5',
|
||||
'info_dict': {
|
||||
'id': '3449373',
|
||||
'ext': 'mp4',
|
||||
'duration': 1488,
|
||||
'title': 'Les Enfants Terribles',
|
||||
'description': 'France Pommier et sa soeur Luce Feral, les deux filles de ce groupe de 5.',
|
||||
'uploader': 'Divers',
|
||||
'upload_date': '19680921',
|
||||
'timestamp': -40280400,
|
||||
'thumbnail': 're:^https?://.*\.image'
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://www.rts.ch/archives/tv/divers/3449373-les-enfants-terribles.html',
|
||||
'md5': '753b877968ad8afaeddccc374d4256a5',
|
||||
'info_dict': {
|
||||
'id': '3449373',
|
||||
'ext': 'mp4',
|
||||
'duration': 1488,
|
||||
'title': 'Les Enfants Terribles',
|
||||
'description': 'France Pommier et sa soeur Luce Feral, les deux filles de ce groupe de 5.',
|
||||
'uploader': 'Divers',
|
||||
'upload_date': '19680921',
|
||||
'timestamp': -40280400,
|
||||
'thumbnail': 're:^https?://.*\.image'
|
||||
},
|
||||
},
|
||||
}
|
||||
{
|
||||
'url': 'http://www.rts.ch/emissions/passe-moi-les-jumelles/5624067-entre-ciel-et-mer.html',
|
||||
'md5': 'c197f0b2421995c63a64cc73d800f42e',
|
||||
'info_dict': {
|
||||
'id': '5738317',
|
||||
'ext': 'mp4',
|
||||
'duration': 55,
|
||||
'title': 'Bande de lancement de Passe-moi les jumelles',
|
||||
'description': '',
|
||||
'uploader': 'Passe-moi les jumelles',
|
||||
'upload_date': '20140404',
|
||||
'timestamp': 1396635300,
|
||||
'thumbnail': 're:^https?://.*\.image'
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'http://www.rts.ch/video/sport/hockey/5745975-1-2-kloten-fribourg-5-2-second-but-pour-gotteron-par-kwiatowski.html',
|
||||
'md5': 'b4326fecd3eb64a458ba73c73e91299d',
|
||||
'info_dict': {
|
||||
'id': '5745975',
|
||||
'ext': 'mp4',
|
||||
'duration': 48,
|
||||
'title': '1/2, Kloten - Fribourg (5-2): second but pour Gottéron par Kwiatowski',
|
||||
'description': 'Hockey - Playoff',
|
||||
'uploader': 'Hockey',
|
||||
'upload_date': '20140403',
|
||||
'timestamp': 1396556882,
|
||||
'thumbnail': 're:^https?://.*\.image'
|
||||
},
|
||||
'skip': 'Blocked outside Switzerland',
|
||||
},
|
||||
{
|
||||
'url': 'http://www.rts.ch/video/info/journal-continu/5745356-londres-cachee-par-un-epais-smog.html',
|
||||
'md5': '9bb06503773c07ce83d3cbd793cebb91',
|
||||
'info_dict': {
|
||||
'id': '5745356',
|
||||
'ext': 'mp4',
|
||||
'duration': 33,
|
||||
'title': 'Londres cachée par un épais smog',
|
||||
'description': 'Un important voile de smog recouvre Londres depuis mercredi, provoqué par la pollution et du sable du Sahara.',
|
||||
'uploader': 'Le Journal en continu',
|
||||
'upload_date': '20140403',
|
||||
'timestamp': 1396537322,
|
||||
'thumbnail': 're:^https?://.*\.image'
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'http://www.rts.ch/audio/couleur3/programmes/la-belle-video-de-stephane-laurenceau/5706148-urban-hippie-de-damien-krisl-03-04-2014.html',
|
||||
'md5': 'dd8ef6a22dff163d063e2a52bc8adcae',
|
||||
'info_dict': {
|
||||
'id': '5706148',
|
||||
'ext': 'mp3',
|
||||
'duration': 123,
|
||||
'title': '"Urban Hippie", de Damien Krisl',
|
||||
'description': 'Des Hippies super glam.',
|
||||
'upload_date': '20140403',
|
||||
'timestamp': 1396551600,
|
||||
},
|
||||
},
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
m = re.match(self._VALID_URL, url)
|
||||
video_id = m.group('id')
|
||||
|
||||
all_info = self._download_json(
|
||||
'http://www.rts.ch/a/%s.html?f=json/article' % video_id, video_id)
|
||||
info = all_info['video']['JSONinfo']
|
||||
def download_json(video_id):
|
||||
return self._download_json(
|
||||
'http://www.rts.ch/a/%s.html?f=json/article' % video_id, video_id)
|
||||
|
||||
all_info = download_json(video_id)
|
||||
|
||||
# video_id extracted out of URL is not always a real id
|
||||
if 'video' not in all_info and 'audio' not in all_info:
|
||||
page = self._download_webpage(url, video_id)
|
||||
video_id = self._html_search_regex(r'<(?:video|audio) data-id="(\d+)"', page, 'video id')
|
||||
all_info = download_json(video_id)
|
||||
|
||||
info = all_info['video']['JSONinfo'] if 'video' in all_info else all_info['audio']
|
||||
|
||||
upload_timestamp = parse_iso8601(info.get('broadcast_date'))
|
||||
duration = parse_duration(info.get('duration'))
|
||||
duration = info.get('duration') or info.get('cutout') or info.get('cutduration')
|
||||
if isinstance(duration, compat_str):
|
||||
duration = parse_duration(duration)
|
||||
view_count = info.get('plays')
|
||||
thumbnail = unescapeHTML(info.get('preview_image_url'))
|
||||
|
||||
def extract_bitrate(url):
|
||||
return int_or_none(self._search_regex(
|
||||
r'-([0-9]+)k\.', url, 'bitrate', default=None))
|
||||
|
||||
formats = [{
|
||||
'format_id': fid,
|
||||
'url': furl,
|
||||
'tbr': int_or_none(self._search_regex(
|
||||
r'-([0-9]+)k\.', furl, 'bitrate', default=None)),
|
||||
'tbr': extract_bitrate(furl),
|
||||
} for fid, furl in info['streams'].items()]
|
||||
|
||||
if 'media' in info:
|
||||
formats.extend([{
|
||||
'format_id': '%s-%sk' % (media['ext'], media['rate']),
|
||||
'url': 'http://download-video.rts.ch/%s' % media['url'],
|
||||
'tbr': media['rate'] or extract_bitrate(media['url']),
|
||||
} for media in info['media'] if media.get('rate')])
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
@ -57,6 +144,7 @@ class RTSIE(InfoExtractor):
|
||||
'title': info['title'],
|
||||
'description': info.get('intro'),
|
||||
'duration': duration,
|
||||
'view_count': view_count,
|
||||
'uploader': info.get('programName'),
|
||||
'timestamp': upload_timestamp,
|
||||
'thumbnail': thumbnail,
|
||||
|
@ -3,11 +3,12 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .youtube import YoutubeIE
|
||||
|
||||
|
||||
class WimpIE(InfoExtractor):
|
||||
_VALID_URL = r'http://(?:www\.)?wimp\.com/([^/]+)/'
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
'url': 'http://www.wimp.com/maruexhausted/',
|
||||
'md5': 'f1acced123ecb28d9bb79f2479f2b6a1',
|
||||
'info_dict': {
|
||||
@ -16,7 +17,20 @@ class WimpIE(InfoExtractor):
|
||||
'title': 'Maru is exhausted.',
|
||||
'description': 'md5:57e099e857c0a4ea312542b684a869b8',
|
||||
}
|
||||
}
|
||||
}, {
|
||||
# youtube video
|
||||
'url': 'http://www.wimp.com/clowncar/',
|
||||
'info_dict': {
|
||||
'id': 'cG4CEr2aiSg',
|
||||
'ext': 'mp4',
|
||||
'title': 'Basset hound clown car...incredible!',
|
||||
'description': 'md5:8d228485e0719898c017203f900b3a35',
|
||||
'uploader': 'Gretchen Hoey',
|
||||
'uploader_id': 'gretchenandjeff1',
|
||||
'upload_date': '20140303',
|
||||
},
|
||||
'add_ie': ['Youtube'],
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
@ -24,6 +38,13 @@ class WimpIE(InfoExtractor):
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
video_url = self._search_regex(
|
||||
r's1\.addVariable\("file",\s*"([^"]+)"\);', webpage, 'video URL')
|
||||
if YoutubeIE.suitable(video_url):
|
||||
self.to_screen('Found YouTube video')
|
||||
return {
|
||||
'_type': 'url',
|
||||
'url': video_url,
|
||||
'ie_key': YoutubeIE.ie_key(),
|
||||
}
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
@ -31,4 +52,4 @@ class WimpIE(InfoExtractor):
|
||||
'title': self._og_search_title(webpage),
|
||||
'thumbnail': self._og_search_thumbnail(webpage),
|
||||
'description': self._og_search_description(webpage),
|
||||
}
|
||||
}
|
||||
|
@ -1453,7 +1453,8 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
|
||||
more_widget_html = more['load_more_widget_html']
|
||||
|
||||
playlist_title = self._html_search_regex(
|
||||
r'<h1 class="pl-header-title">\s*(.*?)\s*</h1>', page, u'title')
|
||||
r'(?s)<h1 class="pl-header-title[^"]*">\s*(.*?)\s*</h1>',
|
||||
page, u'title')
|
||||
|
||||
url_results = self._ids_to_results(ids)
|
||||
return self.playlist_result(url_results, playlist_id, playlist_title)
|
||||
@ -1753,7 +1754,7 @@ class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
|
||||
return self.playlist_result(feed_entries, playlist_title=self._PLAYLIST_TITLE)
|
||||
|
||||
class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
|
||||
IE_DESC = u'YouTube.com subscriptions feed, "ytsubs" keyword(requires authentication)'
|
||||
IE_DESC = u'YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)'
|
||||
_VALID_URL = r'https?://www\.youtube\.com/feed/subscriptions|:ytsubs(?:criptions)?'
|
||||
_FEED_NAME = 'subscriptions'
|
||||
_PLAYLIST_TITLE = u'Youtube Subscriptions'
|
||||
|
@ -1180,12 +1180,12 @@ class HEADRequest(compat_urllib_request.Request):
|
||||
return "HEAD"
|
||||
|
||||
|
||||
def int_or_none(v, scale=1):
|
||||
return v if v is None else (int(v) // scale)
|
||||
def int_or_none(v, scale=1, default=None):
|
||||
return default if v is None else (int(v) // scale)
|
||||
|
||||
|
||||
def float_or_none(v, scale=1):
|
||||
return v if v is None else (float(v) / scale)
|
||||
def float_or_none(v, scale=1, default=None):
|
||||
return default if v is None else (float(v) / scale)
|
||||
|
||||
|
||||
def parse_duration(s):
|
||||
|
@ -1,2 +1,2 @@
|
||||
|
||||
__version__ = '2014.04.02'
|
||||
__version__ = '2014.04.04.2'
|
||||
|
Loading…
x
Reference in New Issue
Block a user