Merge pull request #1 from rg3/master

Update
This commit is contained in:
Alex Snet 2014-07-14 14:15:37 +04:00
commit 588316f5f1
46 changed files with 1153 additions and 232 deletions

View File

@ -70,8 +70,9 @@ which means you can modify it, redistribute it or use it however you like.
--default-search PREFIX Use this prefix for unqualified URLs. For --default-search PREFIX Use this prefix for unqualified URLs. For
example "gvsearch2:" downloads two videos example "gvsearch2:" downloads two videos
from google videos for youtube-dl "large from google videos for youtube-dl "large
apple". By default (with value "auto") apple". Use the value "auto" to let
youtube-dl guesses. youtube-dl guess. The default value "error"
just throws an error.
--ignore-config Do not read configuration files. When given --ignore-config Do not read configuration files. When given
in the global configuration file /etc in the global configuration file /etc
/youtube-dl.conf: do not read the user /youtube-dl.conf: do not read the user
@ -254,7 +255,7 @@ which means you can modify it, redistribute it or use it however you like.
128K (default 5) 128K (default 5)
--recode-video FORMAT Encode the video to another format if --recode-video FORMAT Encode the video to another format if
necessary (currently supported: necessary (currently supported:
mp4|flv|ogg|webm) mp4|flv|ogg|webm|mkv)
-k, --keep-video keeps the video file on disk after the -k, --keep-video keeps the video file on disk after the
post-processing; the video is erased by post-processing; the video is erased by
default default

View File

@ -69,9 +69,6 @@ class TestAllURLsMatching(unittest.TestCase):
def test_youtube_show_matching(self): def test_youtube_show_matching(self):
self.assertMatch('http://www.youtube.com/show/airdisasters', ['youtube:show']) self.assertMatch('http://www.youtube.com/show/airdisasters', ['youtube:show'])
def test_youtube_truncated(self):
self.assertMatch('http://www.youtube.com/watch?', ['youtube:truncated_url'])
def test_youtube_search_matching(self): def test_youtube_search_matching(self):
self.assertMatch('http://www.youtube.com/results?search_query=making+mustard', ['youtube:search_url']) self.assertMatch('http://www.youtube.com/results?search_query=making+mustard', ['youtube:search_url'])
self.assertMatch('https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video', ['youtube:search_url']) self.assertMatch('https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video', ['youtube:search_url'])

View File

@ -28,7 +28,7 @@ from youtube_dl.extractor import (
SoundcloudSetIE, SoundcloudSetIE,
SoundcloudUserIE, SoundcloudUserIE,
SoundcloudPlaylistIE, SoundcloudPlaylistIE,
TeacherTubeClassroomIE, TeacherTubeUserIE,
LivestreamIE, LivestreamIE,
LivestreamOriginalIE, LivestreamOriginalIE,
NHLVideocenterIE, NHLVideocenterIE,
@ -111,7 +111,7 @@ class TestPlaylists(unittest.TestCase):
ie = VineUserIE(dl) ie = VineUserIE(dl)
result = ie.extract('https://vine.co/Visa') result = ie.extract('https://vine.co/Visa')
self.assertIsPlaylist(result) self.assertIsPlaylist(result)
self.assertTrue(len(result['entries']) >= 50) self.assertTrue(len(result['entries']) >= 47)
def test_ustream_channel(self): def test_ustream_channel(self):
dl = FakeYDL() dl = FakeYDL()
@ -137,6 +137,14 @@ class TestPlaylists(unittest.TestCase):
self.assertEqual(result['id'], '9615865') self.assertEqual(result['id'], '9615865')
self.assertTrue(len(result['entries']) >= 12) self.assertTrue(len(result['entries']) >= 12)
def test_soundcloud_likes(self):
dl = FakeYDL()
ie = SoundcloudUserIE(dl)
result = ie.extract('https://soundcloud.com/the-concept-band/likes')
self.assertIsPlaylist(result)
self.assertEqual(result['id'], '9615865')
self.assertTrue(len(result['entries']) >= 1)
def test_soundcloud_playlist(self): def test_soundcloud_playlist(self):
dl = FakeYDL() dl = FakeYDL()
ie = SoundcloudPlaylistIE(dl) ie = SoundcloudPlaylistIE(dl)
@ -379,13 +387,13 @@ class TestPlaylists(unittest.TestCase):
result['title'], 'Brace Yourself - Today\'s Weirdest News') result['title'], 'Brace Yourself - Today\'s Weirdest News')
self.assertTrue(len(result['entries']) >= 10) self.assertTrue(len(result['entries']) >= 10)
def test_TeacherTubeClassroom(self): def test_TeacherTubeUser(self):
dl = FakeYDL() dl = FakeYDL()
ie = TeacherTubeClassroomIE(dl) ie = TeacherTubeUserIE(dl)
result = ie.extract('http://www.teachertube.com/view_classroom.php?user=rbhagwati2') result = ie.extract('http://www.teachertube.com/user/profile/rbhagwati2')
self.assertIsPlaylist(result) self.assertIsPlaylist(result)
self.assertEqual(result['id'], 'rbhagwati2') self.assertEqual(result['id'], 'rbhagwati2')
self.assertTrue(len(result['entries']) >= 20) self.assertTrue(len(result['entries']) >= 179)
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()

View File

@ -87,7 +87,7 @@ class TestYoutubeSubtitles(BaseTestSubtitles):
def test_youtube_nosubtitles(self): def test_youtube_nosubtitles(self):
self.DL.expect_warning(u'video doesn\'t have subtitles') self.DL.expect_warning(u'video doesn\'t have subtitles')
self.url = 'sAjKT8FhjI8' self.url = 'n5BB19UTcdA'
self.DL.params['writesubtitles'] = True self.DL.params['writesubtitles'] = True
self.DL.params['allsubtitles'] = True self.DL.params['allsubtitles'] = True
subtitles = self.getSubtitles() subtitles = self.getSubtitles()

View File

@ -33,6 +33,12 @@ _TESTS = [
90, 90,
u']\\[@?>=<;:/.-,+*)(\'&%$#"hZYXWVUTSRQPONMLKJIHGFEDCBAzyxwvutsrqponmlkjiagfedcb39876', u']\\[@?>=<;:/.-,+*)(\'&%$#"hZYXWVUTSRQPONMLKJIHGFEDCBAzyxwvutsrqponmlkjiagfedcb39876',
), ),
(
u'https://s.ytimg.com/yts/jsbin/html5player-en_US-vflXGBaUN.js',
u'js',
u'2ACFC7A61CA478CD21425E5A57EBD73DDC78E22A.2094302436B2D377D14A3BBA23022D023B8BC25AA',
u'A52CB8B320D22032ABB3A41D773D2B6342034902.A22E87CDD37DBE75A5E52412DC874AC16A7CFCA2',
),
] ]
@ -44,7 +50,7 @@ class TestSignature(unittest.TestCase):
os.mkdir(self.TESTDATA_DIR) os.mkdir(self.TESTDATA_DIR)
def make_tfunc(url, stype, sig_length, expected_sig): def make_tfunc(url, stype, sig_input, expected_sig):
basename = url.rpartition('/')[2] basename = url.rpartition('/')[2]
m = re.match(r'.*-([a-zA-Z0-9_-]+)\.[a-z]+$', basename) m = re.match(r'.*-([a-zA-Z0-9_-]+)\.[a-z]+$', basename)
assert m, '%r should follow URL format' % basename assert m, '%r should follow URL format' % basename
@ -66,7 +72,9 @@ def make_tfunc(url, stype, sig_length, expected_sig):
with open(fn, 'rb') as testf: with open(fn, 'rb') as testf:
swfcode = testf.read() swfcode = testf.read()
func = ie._parse_sig_swf(swfcode) func = ie._parse_sig_swf(swfcode)
src_sig = compat_str(string.printable[:sig_length]) src_sig = (
compat_str(string.printable[:sig_input])
if isinstance(sig_input, int) else sig_input)
got_sig = func(src_sig) got_sig = func(src_sig)
self.assertEqual(got_sig, expected_sig) self.assertEqual(got_sig, expected_sig)

View File

@ -993,6 +993,8 @@ class YoutubeDL(object):
fd = get_suitable_downloader(info)(self, self.params) fd = get_suitable_downloader(info)(self, self.params)
for ph in self._progress_hooks: for ph in self._progress_hooks:
fd.add_progress_hook(ph) fd.add_progress_hook(ph)
if self.params.get('verbose'):
self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
return fd.download(name, info) return fd.download(name, info)
if info_dict.get('requested_formats') is not None: if info_dict.get('requested_formats') is not None:
downloaded = [] downloaded = []

View File

@ -59,6 +59,11 @@ __authors__ = (
'Adam Thalhammer', 'Adam Thalhammer',
'Georg Jähnig', 'Georg Jähnig',
'Ralf Haring', 'Ralf Haring',
'Koki Takahashi',
'Ariset Llerena',
'Adam Malcontenti-Wilson',
'Tobias Bell',
'Naglis Jonaitis',
) )
__license__ = 'Public Domain' __license__ = 'Public Domain'
@ -269,7 +274,7 @@ def parseOpts(overrideArguments=None):
general.add_option( general.add_option(
'--default-search', '--default-search',
dest='default_search', metavar='PREFIX', dest='default_search', metavar='PREFIX',
help='Use this prefix for unqualified URLs. For example "gvsearch2:" downloads two videos from google videos for youtube-dl "large apple". By default (with value "auto") youtube-dl guesses.') help='Use this prefix for unqualified URLs. For example "gvsearch2:" downloads two videos from google videos for youtube-dl "large apple". Use the value "auto" to let youtube-dl guess. The default value "error" just throws an error.')
general.add_option( general.add_option(
'--ignore-config', '--ignore-config',
action='store_true', action='store_true',
@ -505,7 +510,7 @@ def parseOpts(overrideArguments=None):
postproc.add_option('--audio-quality', metavar='QUALITY', dest='audioquality', default='5', postproc.add_option('--audio-quality', metavar='QUALITY', dest='audioquality', default='5',
help='ffmpeg/avconv audio quality specification, insert a value between 0 (better) and 9 (worse) for VBR or a specific bitrate like 128K (default 5)') help='ffmpeg/avconv audio quality specification, insert a value between 0 (better) and 9 (worse) for VBR or a specific bitrate like 128K (default 5)')
postproc.add_option('--recode-video', metavar='FORMAT', dest='recodevideo', default=None, postproc.add_option('--recode-video', metavar='FORMAT', dest='recodevideo', default=None,
help='Encode the video to another format if necessary (currently supported: mp4|flv|ogg|webm)') help='Encode the video to another format if necessary (currently supported: mp4|flv|ogg|webm|mkv)')
postproc.add_option('-k', '--keep-video', action='store_true', dest='keepvideo', default=False, postproc.add_option('-k', '--keep-video', action='store_true', dest='keepvideo', default=False,
help='keeps the video file on disk after the post-processing; the video is erased by default') help='keeps the video file on disk after the post-processing; the video is erased by default')
postproc.add_option('--no-post-overwrites', action='store_true', dest='nopostoverwrites', default=False, postproc.add_option('--no-post-overwrites', action='store_true', dest='nopostoverwrites', default=False,

View File

@ -3,6 +3,7 @@ from .addanime import AddAnimeIE
from .aftonbladet import AftonbladetIE from .aftonbladet import AftonbladetIE
from .anitube import AnitubeIE from .anitube import AnitubeIE
from .aol import AolIE from .aol import AolIE
from .allocine import AllocineIE
from .aparat import AparatIE from .aparat import AparatIE
from .appletrailers import AppleTrailersIE from .appletrailers import AppleTrailersIE
from .archiveorg import ArchiveOrgIE from .archiveorg import ArchiveOrgIE
@ -82,6 +83,7 @@ from .extremetube import ExtremeTubeIE
from .facebook import FacebookIE from .facebook import FacebookIE
from .faz import FazIE from .faz import FazIE
from .fc2 import FC2IE from .fc2 import FC2IE
from .firedrive import FiredriveIE
from .firstpost import FirstpostIE from .firstpost import FirstpostIE
from .firsttv import FirstTVIE from .firsttv import FirstTVIE
from .fivemin import FiveMinIE from .fivemin import FiveMinIE
@ -104,6 +106,7 @@ from .freesound import FreesoundIE
from .freespeech import FreespeechIE from .freespeech import FreespeechIE
from .funnyordie import FunnyOrDieIE from .funnyordie import FunnyOrDieIE
from .gamekings import GamekingsIE from .gamekings import GamekingsIE
from .gameone import GameOneIE
from .gamespot import GameSpotIE from .gamespot import GameSpotIE
from .gametrailers import GametrailersIE from .gametrailers import GametrailersIE
from .gdcvault import GDCVaultIE from .gdcvault import GDCVaultIE
@ -111,6 +114,7 @@ from .generic import GenericIE
from .googleplus import GooglePlusIE from .googleplus import GooglePlusIE
from .googlesearch import GoogleSearchIE from .googlesearch import GoogleSearchIE
from .gorillavid import GorillaVidIE from .gorillavid import GorillaVidIE
from .goshgay import GoshgayIE
from .hark import HarkIE from .hark import HarkIE
from .helsinki import HelsinkiIE from .helsinki import HelsinkiIE
from .hentaistigma import HentaiStigmaIE from .hentaistigma import HentaiStigmaIE
@ -228,6 +232,7 @@ from .radiofrance import RadioFranceIE
from .rai import RaiIE from .rai import RaiIE
from .rbmaradio import RBMARadioIE from .rbmaradio import RBMARadioIE
from .redtube import RedTubeIE from .redtube import RedTubeIE
from .reverbnation import ReverbNationIE
from .ringtv import RingTVIE from .ringtv import RingTVIE
from .ro220 import Ro220IE from .ro220 import Ro220IE
from .rottentomatoes import RottenTomatoesIE from .rottentomatoes import RottenTomatoesIE
@ -236,6 +241,7 @@ from .rtbf import RTBFIE
from .rtlnow import RTLnowIE from .rtlnow import RTLnowIE
from .rts import RTSIE from .rts import RTSIE
from .rtve import RTVEALaCartaIE from .rtve import RTVEALaCartaIE
from .ruhd import RUHDIE
from .rutube import ( from .rutube import (
RutubeIE, RutubeIE,
RutubeChannelIE, RutubeChannelIE,
@ -245,6 +251,7 @@ from .rutube import (
from .rutv import RUTVIE from .rutv import RUTVIE
from .savefrom import SaveFromIE from .savefrom import SaveFromIE
from .scivee import SciVeeIE from .scivee import SciVeeIE
from .screencast import ScreencastIE
from .servingsys import ServingSysIE from .servingsys import ServingSysIE
from .sina import SinaIE from .sina import SinaIE
from .slideshare import SlideshareIE from .slideshare import SlideshareIE
@ -263,8 +270,8 @@ from .soundcloud import (
SoundcloudPlaylistIE SoundcloudPlaylistIE
) )
from .soundgasm import SoundgasmIE from .soundgasm import SoundgasmIE
from .southparkstudios import ( from .southpark import (
SouthParkStudiosIE, SouthParkIE,
SouthparkDeIE, SouthparkDeIE,
) )
from .space import SpaceIE from .space import SpaceIE
@ -282,12 +289,13 @@ from .sztvhu import SztvHuIE
from .tagesschau import TagesschauIE from .tagesschau import TagesschauIE
from .teachertube import ( from .teachertube import (
TeacherTubeIE, TeacherTubeIE,
TeacherTubeClassroomIE, TeacherTubeUserIE,
) )
from .teachingchannel import TeachingChannelIE from .teachingchannel import TeachingChannelIE
from .teamcoco import TeamcocoIE from .teamcoco import TeamcocoIE
from .techtalks import TechTalksIE from .techtalks import TechTalksIE
from .ted import TEDIE from .ted import TEDIE
from .tenplay import TenPlayIE
from .testurl import TestURLIE from .testurl import TestURLIE
from .tf1 import TF1IE from .tf1 import TF1IE
from .theplatform import ThePlatformIE from .theplatform import ThePlatformIE
@ -335,12 +343,14 @@ from .vimeo import (
VimeoReviewIE, VimeoReviewIE,
VimeoWatchLaterIE, VimeoWatchLaterIE,
) )
from .vimple import VimpleIE
from .vine import ( from .vine import (
VineIE, VineIE,
VineUserIE, VineUserIE,
) )
from .viki import VikiIE from .viki import VikiIE
from .vk import VKIE from .vk import VKIE
from .vodlocker import VodlockerIE
from .vube import VubeIE from .vube import VubeIE
from .vuclip import VuClipIE from .vuclip import VuClipIE
from .vulture import VultureIE from .vulture import VultureIE

View File

@ -0,0 +1,89 @@
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
import re
import json
from .common import InfoExtractor
from ..utils import (
compat_str,
qualities,
determine_ext,
)
class AllocineIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?allocine\.fr/(?P<typ>article|video|film)/(fichearticle_gen_carticle=|player_gen_cmedia=|fichefilm_gen_cfilm=)(?P<id>[0-9]+)(?:\.html)?'
_TESTS = [{
'url': 'http://www.allocine.fr/article/fichearticle_gen_carticle=18635087.html',
'md5': '0c9fcf59a841f65635fa300ac43d8269',
'info_dict': {
'id': '19546517',
'ext': 'mp4',
'title': 'Astérix - Le Domaine des Dieux Teaser VF',
'description': 'md5:4a754271d9c6f16c72629a8a993ee884',
'thumbnail': 're:http://.*\.jpg',
},
}, {
'url': 'http://www.allocine.fr/video/player_gen_cmedia=19540403&cfilm=222257.html',
'md5': 'd0cdce5d2b9522ce279fdfec07ff16e0',
'info_dict': {
'id': '19540403',
'ext': 'mp4',
'title': 'Planes 2 Bande-annonce VF',
'description': 'md5:c4b1f7bd682a91de6491ada267ec0f4d',
'thumbnail': 're:http://.*\.jpg',
},
}, {
'url': 'http://www.allocine.fr/film/fichefilm_gen_cfilm=181290.html',
'md5': '101250fb127ef9ca3d73186ff22a47ce',
'info_dict': {
'id': '19544709',
'ext': 'mp4',
'title': 'Dragons 2 - Bande annonce finale VF',
'description': 'md5:e74a4dc750894bac300ece46c7036490',
'thumbnail': 're:http://.*\.jpg',
},
}]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
typ = mobj.group('typ')
display_id = mobj.group('id')
webpage = self._download_webpage(url, display_id)
if typ == 'film':
video_id = self._search_regex(r'href="/video/player_gen_cmedia=([0-9]+).+"', webpage, 'video id')
else:
player = self._search_regex(r'data-player=\'([^\']+)\'>', webpage, 'data player')
player_data = json.loads(player)
video_id = compat_str(player_data['refMedia'])
xml = self._download_xml('http://www.allocine.fr/ws/AcVisiondataV4.ashx?media=%s' % video_id, display_id)
video = xml.find('.//AcVisionVideo').attrib
quality = qualities(['ld', 'md', 'hd'])
formats = []
for k, v in video.items():
if re.match(r'.+_path', k):
format_id = k.split('_')[0]
formats.append({
'format_id': format_id,
'quality': quality(format_id),
'url': v,
'ext': determine_ext(v),
})
self._sort_formats(formats)
return {
'id': video_id,
'title': video['videoTitle'],
'thumbnail': self._og_search_thumbnail(webpage),
'formats': formats,
'description': self._og_search_description(webpage),
}

View File

@ -1,22 +1,24 @@
from __future__ import unicode_literals
import re import re
from .common import InfoExtractor from .common import InfoExtractor
class AnitubeIE(InfoExtractor): class AnitubeIE(InfoExtractor):
IE_NAME = u'anitube.se' IE_NAME = 'anitube.se'
_VALID_URL = r'https?://(?:www\.)?anitube\.se/video/(?P<id>\d+)' _VALID_URL = r'https?://(?:www\.)?anitube\.se/video/(?P<id>\d+)'
_TEST = { _TEST = {
u'url': u'http://www.anitube.se/video/36621', 'url': 'http://www.anitube.se/video/36621',
u'md5': u'59d0eeae28ea0bc8c05e7af429998d43', 'md5': '59d0eeae28ea0bc8c05e7af429998d43',
u'file': u'36621.mp4', 'info_dict': {
u'info_dict': { 'id': '36621',
u'id': u'36621', 'ext': 'mp4',
u'ext': u'mp4', 'title': 'Recorder to Randoseru 01',
u'title': u'Recorder to Randoseru 01', 'duration': 180.19,
}, },
u'skip': u'Blocked in the US', 'skip': 'Blocked in the US',
} }
def _real_extract(self, url): def _real_extract(self, url):
@ -24,13 +26,15 @@ class AnitubeIE(InfoExtractor):
video_id = mobj.group('id') video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
key = self._html_search_regex(r'http://www\.anitube\.se/embed/([A-Za-z0-9_-]*)', key = self._html_search_regex(
webpage, u'key') r'http://www\.anitube\.se/embed/([A-Za-z0-9_-]*)', webpage, 'key')
config_xml = self._download_xml('http://www.anitube.se/nuevo/econfig.php?key=%s' % key, config_xml = self._download_xml(
key) 'http://www.anitube.se/nuevo/econfig.php?key=%s' % key, key)
video_title = config_xml.find('title').text video_title = config_xml.find('title').text
thumbnail = config_xml.find('image').text
duration = float(config_xml.find('duration').text)
formats = [] formats = []
video_url = config_xml.find('file') video_url = config_xml.find('file')
@ -49,5 +53,7 @@ class AnitubeIE(InfoExtractor):
return { return {
'id': video_id, 'id': video_id,
'title': video_title, 'title': video_title,
'thumbnail': thumbnail,
'duration': duration,
'formats': formats 'formats': formats
} }

View File

@ -39,7 +39,10 @@ class ArteTvIE(InfoExtractor):
formats = [{ formats = [{
'forma_id': q.attrib['quality'], 'forma_id': q.attrib['quality'],
'url': q.text, # The playpath starts at 'mp4:', if we don't manually
# split the url, rtmpdump will incorrectly parse them
'url': q.text.split('mp4:', 1)[0],
'play_path': 'mp4:' + q.text.split('mp4:', 1)[1],
'ext': 'flv', 'ext': 'flv',
'quality': 2 if q.attrib['quality'] == 'hd' else 1, 'quality': 2 if q.attrib['quality'] == 'hd' else 1,
} for q in config.findall('./urls/url')] } for q in config.findall('./urls/url')]

View File

@ -1,11 +1,12 @@
import base64 import base64
import hashlib import hashlib
import json import json
import netrc
import os import os
import re import re
import socket import socket
import sys import sys
import netrc import time
import xml.etree.ElementTree import xml.etree.ElementTree
from ..utils import ( from ..utils import (
@ -462,14 +463,14 @@ class InfoExtractor(object):
def _og_search_url(self, html, **kargs): def _og_search_url(self, html, **kargs):
return self._og_search_property('url', html, **kargs) return self._og_search_property('url', html, **kargs)
def _html_search_meta(self, name, html, display_name=None, fatal=False): def _html_search_meta(self, name, html, display_name=None, fatal=False, **kwargs):
if display_name is None: if display_name is None:
display_name = name display_name = name
return self._html_search_regex( return self._html_search_regex(
r'''(?ix)<meta r'''(?ix)<meta
(?=[^>]+(?:itemprop|name|property)=["\']%s["\']) (?=[^>]+(?:itemprop|name|property)=["\']%s["\'])
[^>]+content=["\']([^"\']+)["\']''' % re.escape(name), [^>]+content=["\']([^"\']+)["\']''' % re.escape(name),
html, display_name, fatal=fatal) html, display_name, fatal=fatal, **kwargs)
def _dc_search_uploader(self, html): def _dc_search_uploader(self, html):
return self._html_search_meta('dc.creator', html, 'uploader') return self._html_search_meta('dc.creator', html, 'uploader')
@ -575,6 +576,13 @@ class InfoExtractor(object):
else: else:
return url return url
def _sleep(self, timeout, video_id, msg_template=None):
if msg_template is None:
msg_template = u'%(video_id)s: Waiting for %(timeout)s seconds'
msg = msg_template % {'video_id': video_id, 'timeout': timeout}
self.to_screen(msg)
time.sleep(timeout)
class SearchInfoExtractor(InfoExtractor): class SearchInfoExtractor(InfoExtractor):
""" """
@ -618,4 +626,3 @@ class SearchInfoExtractor(InfoExtractor):
@property @property
def SEARCH_KEY(self): def SEARCH_KEY(self):
return self._SEARCH_KEY return self._SEARCH_KEY

View File

@ -1,40 +1,43 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
from __future__ import unicode_literals
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import determine_ext
class CriterionIE(InfoExtractor): class CriterionIE(InfoExtractor):
_VALID_URL = r'https?://www\.criterion\.com/films/(\d*)-.+' _VALID_URL = r'https?://www\.criterion\.com/films/(?P<id>[0-9]+)-.+'
_TEST = { _TEST = {
u'url': u'http://www.criterion.com/films/184-le-samourai', 'url': 'http://www.criterion.com/films/184-le-samourai',
u'file': u'184.mp4', 'md5': 'bc51beba55685509883a9a7830919ec3',
u'md5': u'bc51beba55685509883a9a7830919ec3', 'info_dict': {
u'info_dict': { 'id': '184',
u"title": u"Le Samouraï", 'ext': 'mp4',
u"description" : u'md5:a2b4b116326558149bef81f76dcbb93f', 'title': 'Le Samouraï',
'description': 'md5:a2b4b116326558149bef81f76dcbb93f',
} }
} }
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
video_id = mobj.group(1) video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
final_url = self._search_regex(r'so.addVariable\("videoURL", "(.+?)"\)\;', final_url = self._search_regex(
webpage, 'video url') r'so.addVariable\("videoURL", "(.+?)"\)\;', webpage, 'video url')
title = self._html_search_regex(r'<meta content="(.+?)" property="og:title" />', title = self._og_search_title(webpage)
webpage, 'video title') description = self._html_search_regex(
description = self._html_search_regex(r'<meta name="description" content="(.+?)" />', r'<meta name="description" content="(.+?)" />',
webpage, 'video description') webpage, 'video description')
thumbnail = self._search_regex(r'so.addVariable\("thumbnailURL", "(.+?)"\)\;', thumbnail = self._search_regex(
webpage, 'thumbnail url') r'so.addVariable\("thumbnailURL", "(.+?)"\)\;',
webpage, 'thumbnail url')
return {'id': video_id, return {
'url' : final_url, 'id': video_id,
'title': title, 'url': final_url,
'ext': determine_ext(final_url), 'title': title,
'description': description, 'description': description,
'thumbnail': thumbnail, 'thumbnail': thumbnail,
} }

View File

@ -0,0 +1,83 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
ExtractorError,
compat_urllib_parse,
compat_urllib_request,
determine_ext,
)
class FiredriveIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?firedrive\.com/' + \
'(?:file|embed)/(?P<id>[0-9a-zA-Z]+)'
_FILE_DELETED_REGEX = r'<div class="removed_file_image">'
_TESTS = [{
'url': 'https://www.firedrive.com/file/FEB892FA160EBD01',
'md5': 'd5d4252f80ebeab4dc2d5ceaed1b7970',
'info_dict': {
'id': 'FEB892FA160EBD01',
'ext': 'flv',
'title': 'bbb_theora_486kbit.flv',
'thumbnail': 're:^http://.*\.jpg$',
},
}]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
url = 'http://firedrive.com/file/%s' % video_id
webpage = self._download_webpage(url, video_id)
if re.search(self._FILE_DELETED_REGEX, webpage) is not None:
raise ExtractorError('Video %s does not exist' % video_id,
expected=True)
fields = dict(re.findall(r'''(?x)<input\s+
type="hidden"\s+
name="([^"]+)"\s+
(?:id="[^"]+"\s+)?
value="([^"]*)"
''', webpage))
post = compat_urllib_parse.urlencode(fields)
req = compat_urllib_request.Request(url, post)
req.add_header('Content-type', 'application/x-www-form-urlencoded')
# Apparently, this header is required for confirmation to work.
req.add_header('Host', 'www.firedrive.com')
webpage = self._download_webpage(req, video_id,
'Downloading video page')
title = self._search_regex(r'class="external_title_left">(.+)</div>',
webpage, 'title')
thumbnail = self._search_regex(r'image:\s?"(//[^\"]+)', webpage,
'thumbnail', fatal=False)
if thumbnail is not None:
thumbnail = 'http:' + thumbnail
ext = self._search_regex(r'type:\s?\'([^\']+)\',',
webpage, 'extension', fatal=False)
video_url = self._search_regex(
r'file:\s?\'(http[^\']+)\',', webpage, 'file url')
formats = [{
'format_id': 'sd',
'url': video_url,
'ext': ext,
}]
return {
'id': video_id,
'title': title,
'thumbnail': thumbnail,
'formats': formats,
}

View File

@ -0,0 +1,90 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
xpath_with_ns,
parse_iso8601
)
NAMESPACE_MAP = {
'media': 'http://search.yahoo.com/mrss/',
}
# URL prefix to download the mp4 files directly instead of streaming via rtmp
# Credits go to XBox-Maniac
# http://board.jdownloader.org/showpost.php?p=185835&postcount=31
RAW_MP4_URL = 'http://cdn.riptide-mtvn.com/'
class GameOneIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?gameone\.de/tv/(?P<id>\d+)'
_TEST = {
'url': 'http://www.gameone.de/tv/288',
'md5': '136656b7fb4c9cb4a8e2d500651c499b',
'info_dict': {
'id': '288',
'ext': 'mp4',
'title': 'Game One - Folge 288',
'duration': 1238,
'thumbnail': 'http://s3.gameone.de/gameone/assets/video_metas/teaser_images/000/643/636/big/640x360.jpg',
'description': 'FIFA-Pressepokal 2014, Star Citizen, Kingdom Come: Deliverance, Project Cars, Schöner Trants Nerdquiz Folge 2 Runde 1',
'age_limit': 16,
'upload_date': '20140513',
'timestamp': 1399980122,
}
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id)
og_video = self._og_search_video_url(webpage, secure=False)
description = self._html_search_meta('description', webpage)
age_limit = int(
self._search_regex(
r'age=(\d+)',
self._html_search_meta(
'age-de-meta-label',
webpage),
'age_limit',
'0'))
mrss_url = self._search_regex(r'mrss=([^&]+)', og_video, 'mrss')
mrss = self._download_xml(mrss_url, video_id, 'Downloading mrss')
title = mrss.find('.//item/title').text
thumbnail = mrss.find('.//item/image').get('url')
timestamp = parse_iso8601(mrss.find('.//pubDate').text, delimiter=' ')
content = mrss.find(xpath_with_ns('.//media:content', NAMESPACE_MAP))
content_url = content.get('url')
content = self._download_xml(
content_url,
video_id,
'Downloading media:content')
rendition_items = content.findall('.//rendition')
duration = int(rendition_items[0].get('duration'))
formats = [
{
'url': re.sub(r'.*/(r2)', RAW_MP4_URL + r'\1', r.find('./src').text),
'width': int(r.get('width')),
'height': int(r.get('height')),
'tbr': int(r.get('bitrate')),
}
for r in rendition_items
]
self._sort_formats(formats)
return {
'id': video_id,
'title': title,
'thumbnail': thumbnail,
'duration': duration,
'formats': formats,
'description': description,
'age_limit': age_limit,
'timestamp': timestamp,
}

View File

@ -383,7 +383,7 @@ class GenericIE(InfoExtractor):
if not parsed_url.scheme: if not parsed_url.scheme:
default_search = self._downloader.params.get('default_search') default_search = self._downloader.params.get('default_search')
if default_search is None: if default_search is None:
default_search = 'auto_warning' default_search = 'error'
if default_search in ('auto', 'auto_warning'): if default_search in ('auto', 'auto_warning'):
if '/' in url: if '/' in url:
@ -397,8 +397,13 @@ class GenericIE(InfoExtractor):
expected=True) expected=True)
else: else:
self._downloader.report_warning( self._downloader.report_warning(
'Falling back to youtube search for %s . Set --default-search to "auto" to suppress this warning.' % url) 'Falling back to youtube search for %s . Set --default-search "auto" to suppress this warning.' % url)
return self.url_result('ytsearch:' + url) return self.url_result('ytsearch:' + url)
elif default_search == 'error':
raise ExtractorError(
('%r is not a valid URL. '
'Set --default-search "ytseach" (or run youtube-dl "ytsearch:%s" ) to search YouTube'
) % (url, url), expected=True)
else: else:
assert ':' in default_search assert ':' in default_search
return self.url_result(default_search + url) return self.url_result(default_search + url)
@ -620,6 +625,11 @@ class GenericIE(InfoExtractor):
if mobj is not None: if mobj is not None:
return self.url_result(mobj.group('url'), 'VK') return self.url_result(mobj.group('url'), 'VK')
# Look for embedded ivi player
mobj = re.search(r'<embed[^>]+?src=(["\'])(?P<url>https?://(?:www\.)?ivi\.ru/video/player.+?)\1', webpage)
if mobj is not None:
return self.url_result(mobj.group('url'), 'Ivi')
# Look for embedded Huffington Post player # Look for embedded Huffington Post player
mobj = re.search( mobj = re.search(
r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed\.live\.huffingtonpost\.com/.+?)\1', webpage) r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed\.live\.huffingtonpost\.com/.+?)\1', webpage)

View File

@ -52,8 +52,7 @@ class GooglePlusIE(InfoExtractor):
# Extract title # Extract title
# Get the first line for title # Get the first line for title
video_title = self._html_search_regex(r'<meta name\=\"Description\" content\=\"(.*?)[\n<"]', video_title = self._og_search_description(webpage).splitlines()[0]
webpage, 'title', default='NA')
# Step 2, Simulate clicking the image box to launch video # Step 2, Simulate clicking the image box to launch video
DOMAIN = 'https://plus.google.com/' DOMAIN = 'https://plus.google.com/'

View File

@ -12,7 +12,12 @@ from ..utils import (
class GorillaVidIE(InfoExtractor): class GorillaVidIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?gorillavid\.in/(?:embed-)?(?P<id>[0-9a-zA-Z]+)(?:-[0-9]+x[0-9]+\.html)?' IE_DESC = 'GorillaVid.in and daclips.in'
_VALID_URL = r'''(?x)
https?://(?P<host>(?:www\.)?
(?:daclips\.in|gorillavid\.in))/
(?:embed-)?(?P<id>[0-9a-zA-Z]+)(?:-[0-9]+x[0-9]+\.html)?
'''
_TESTS = [{ _TESTS = [{
'url': 'http://gorillavid.in/06y9juieqpmi', 'url': 'http://gorillavid.in/06y9juieqpmi',
@ -32,15 +37,22 @@ class GorillaVidIE(InfoExtractor):
'title': 'Say something nice', 'title': 'Say something nice',
'thumbnail': 're:http://.*\.jpg', 'thumbnail': 're:http://.*\.jpg',
}, },
}, {
'url': 'http://daclips.in/3rso4kdn6f9m',
'md5': '1ad8fd39bb976eeb66004d3a4895f106',
'info_dict': {
'id': '3rso4kdn6f9m',
'ext': 'mp4',
'title': 'Micro Pig piglets ready on 16th July 2009',
'thumbnail': 're:http://.*\.jpg',
},
}] }]
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id') video_id = mobj.group('id')
url = 'http://gorillavid.in/%s' % video_id webpage = self._download_webpage('http://%s/%s' % (mobj.group('host'), video_id), video_id)
webpage = self._download_webpage(url, video_id)
fields = dict(re.findall(r'''(?x)<input\s+ fields = dict(re.findall(r'''(?x)<input\s+
type="hidden"\s+ type="hidden"\s+

View File

@ -0,0 +1,73 @@
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
compat_urlparse,
str_to_int,
ExtractorError,
)
import json
class GoshgayIE(InfoExtractor):
_VALID_URL = r'^(?:https?://)www.goshgay.com/video(?P<id>\d+?)($|/)'
_TEST = {
'url': 'http://www.goshgay.com/video4116282',
'md5': '268b9f3c3229105c57859e166dd72b03',
'info_dict': {
'id': '4116282',
'ext': 'flv',
'title': 'md5:089833a4790b5e103285a07337f245bf',
'thumbnail': 're:http://.*\.jpg',
'age_limit': 18,
}
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id)
title = self._search_regex(r'class="video-title"><h1>(.+?)<', webpage, 'title')
player_config = self._search_regex(
r'(?s)jwplayer\("player"\)\.setup\(({.+?})\)', webpage, 'config settings')
player_vars = json.loads(player_config.replace("'", '"'))
width = str_to_int(player_vars.get('width'))
height = str_to_int(player_vars.get('height'))
config_uri = player_vars.get('config')
if config_uri is None:
raise ExtractorError('Missing config URI')
node = self._download_xml(config_uri, video_id, 'Downloading player config XML',
errnote='Unable to download XML')
if node is None:
raise ExtractorError('Missing config XML')
if node.tag != 'config':
raise ExtractorError('Missing config attribute')
fns = node.findall('file')
imgs = node.findall('image')
if len(fns) != 1:
raise ExtractorError('Missing media URI')
video_url = fns[0].text
if len(imgs) < 1:
thumbnail = None
else:
thumbnail = imgs[0].text
url_comp = compat_urlparse.urlparse(url)
ref = "%s://%s%s" % (url_comp[0], url_comp[1], url_comp[2])
return {
'id': video_id,
'url': video_url,
'title': title,
'width': width,
'height': height,
'thumbnail': thumbnail,
'http_referer': ref,
'age_limit': 18,
}

View File

@ -14,7 +14,7 @@ from ..utils import (
class IviIE(InfoExtractor): class IviIE(InfoExtractor):
IE_DESC = 'ivi.ru' IE_DESC = 'ivi.ru'
IE_NAME = 'ivi' IE_NAME = 'ivi'
_VALID_URL = r'https?://(?:www\.)?ivi\.ru/watch(?:/(?P<compilationid>[^/]+))?/(?P<videoid>\d+)' _VALID_URL = r'https?://(?:www\.)?ivi\.ru/(?:watch/(?:[^/]+/)?|video/player\?.*?videoId=)(?P<videoid>\d+)'
_TESTS = [ _TESTS = [
# Single movie # Single movie

View File

@ -28,7 +28,7 @@ class MporaIE(InfoExtractor):
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
data_json = self._search_regex( data_json = self._search_regex(
r"new FM\.Player\('[^']+',\s*(\{.*?)\);\n", webpage, 'json') r"new FM\.Player\('[^']+',\s*(\{.*?)\).player;", webpage, 'json')
data = json.loads(data_json) data = json.loads(data_json)

View File

@ -158,6 +158,9 @@ class MTVServicesInfoExtractor(InfoExtractor):
if mgid.endswith('.swf'): if mgid.endswith('.swf'):
mgid = mgid[:-4] mgid = mgid[:-4]
except RegexNotFoundError: except RegexNotFoundError:
mgid = None
if mgid is None or ':' not in mgid:
mgid = self._search_regex( mgid = self._search_regex(
[r'data-mgid="(.*?)"', r'swfobject.embedSWF\(".*?(mgid:.*?)"'], [r'data-mgid="(.*?)"', r'swfobject.embedSWF\(".*?(mgid:.*?)"'],
webpage, u'mgid') webpage, u'mgid')

View File

@ -18,15 +18,15 @@ class NDRIE(InfoExtractor):
_TESTS = [ _TESTS = [
{ {
'url': 'http://www.ndr.de/fernsehen/sendungen/markt/markt7959.html', 'url': 'http://www.ndr.de/fernsehen/media/dienordreportage325.html',
'md5': 'e7a6079ca39d3568f4996cb858dd6708', 'md5': '4a4eeafd17c3058b65f0c8f091355855',
'note': 'Video file', 'note': 'Video file',
'info_dict': { 'info_dict': {
'id': '7959', 'id': '325',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Markt - die ganze Sendung', 'title': 'Blaue Bohnen aus Blocken',
'description': 'md5:af9179cf07f67c5c12dc6d9997e05725', 'description': 'md5:190d71ba2ccddc805ed01547718963bc',
'duration': 2655, 'duration': 1715,
}, },
}, },
{ {

View File

@ -4,18 +4,19 @@ from __future__ import unicode_literals
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ExtractorError
class NewstubeIE(InfoExtractor): class NewstubeIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?newstube\.ru/media/(?P<id>.+)' _VALID_URL = r'https?://(?:www\.)?newstube\.ru/media/(?P<id>.+)'
_TEST = { _TEST = {
'url': 'http://newstube.ru/media/na-korable-progress-prodolzhaetsya-testirovanie-sistemy-kurs', 'url': 'http://www.newstube.ru/media/telekanal-cnn-peremestil-gorod-slavyansk-v-krym',
'info_dict': { 'info_dict': {
'id': 'd156a237-a6e9-4111-a682-039995f721f1', 'id': '728e0ef2-e187-4012-bac0-5a081fdcb1f6',
'ext': 'flv', 'ext': 'flv',
'title': 'На корабле «Прогресс» продолжается тестирование системы «Курс»', 'title': 'Телеканал CNN переместил город Славянск в Крым',
'description': 'md5:d0cbe7b4a6f600552617e48548d5dc77', 'description': 'md5:419a8c9f03442bc0b0a794d689360335',
'duration': 20.04, 'duration': 31.05,
}, },
'params': { 'params': {
# rtmp download # rtmp download
@ -40,6 +41,10 @@ class NewstubeIE(InfoExtractor):
def ns(s): def ns(s):
return s.replace('/', '/%(ns)s') % {'ns': '{http://app1.newstube.ru/N2SiteWS/player.asmx}'} return s.replace('/', '/%(ns)s') % {'ns': '{http://app1.newstube.ru/N2SiteWS/player.asmx}'}
error_message = player.find(ns('./ErrorMessage'))
if error_message is not None:
raise ExtractorError('%s returned error: %s' % (self.IE_NAME, error_message.text), expected=True)
session_id = player.find(ns('./SessionId')).text session_id = player.find(ns('./SessionId')).text
media_info = player.find(ns('./Medias/MediaInfo')) media_info = player.find(ns('./Medias/MediaInfo'))
title = media_info.find(ns('./Name')).text title = media_info.find(ns('./Name')).text

View File

@ -8,10 +8,9 @@ from ..utils import (
compat_urllib_parse, compat_urllib_parse,
compat_urllib_request, compat_urllib_request,
compat_urlparse, compat_urlparse,
compat_str,
ExtractorError,
unified_strdate, unified_strdate,
parse_duration,
int_or_none,
) )
@ -30,6 +29,7 @@ class NiconicoIE(InfoExtractor):
'uploader_id': '2698420', 'uploader_id': '2698420',
'upload_date': '20131123', 'upload_date': '20131123',
'description': '(c) copyright 2008, Blender Foundation / www.bigbuckbunny.org', 'description': '(c) copyright 2008, Blender Foundation / www.bigbuckbunny.org',
'duration': 33,
}, },
'params': { 'params': {
'username': 'ydl.niconico@gmail.com', 'username': 'ydl.niconico@gmail.com',
@ -37,17 +37,20 @@ class NiconicoIE(InfoExtractor):
}, },
} }
_VALID_URL = r'^https?://(?:www\.|secure\.)?nicovideo\.jp/watch/([a-z][a-z][0-9]+)(?:.*)$' _VALID_URL = r'https?://(?:www\.|secure\.)?nicovideo\.jp/watch/((?:[a-z]{2})?[0-9]+)'
_NETRC_MACHINE = 'niconico' _NETRC_MACHINE = 'niconico'
# Determine whether the downloader uses authentication to download video
_AUTHENTICATE = False
def _real_initialize(self): def _real_initialize(self):
self._login() if self._downloader.params.get('username', None) is not None:
self._AUTHENTICATE = True
if self._AUTHENTICATE:
self._login()
def _login(self): def _login(self):
(username, password) = self._get_login_info() (username, password) = self._get_login_info()
if username is None:
# Login is required
raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
# Log in # Log in
login_form_strs = { login_form_strs = {
@ -79,44 +82,66 @@ class NiconicoIE(InfoExtractor):
'http://ext.nicovideo.jp/api/getthumbinfo/' + video_id, video_id, 'http://ext.nicovideo.jp/api/getthumbinfo/' + video_id, video_id,
note='Downloading video info page') note='Downloading video info page')
# Get flv info if self._AUTHENTICATE:
flv_info_webpage = self._download_webpage( # Get flv info
'http://flapi.nicovideo.jp/api/getflv?v=' + video_id, flv_info_webpage = self._download_webpage(
video_id, 'Downloading flv info') 'http://flapi.nicovideo.jp/api/getflv?v=' + video_id,
video_id, 'Downloading flv info')
else:
# Get external player info
ext_player_info = self._download_webpage(
'http://ext.nicovideo.jp/thumb_watch/' + video_id, video_id)
thumb_play_key = self._search_regex(
r'\'thumbPlayKey\'\s*:\s*\'(.*?)\'', ext_player_info, 'thumbPlayKey')
# Get flv info
flv_info_data = compat_urllib_parse.urlencode({
'k': thumb_play_key,
'v': video_id
})
flv_info_request = compat_urllib_request.Request(
'http://ext.nicovideo.jp/thumb_watch', flv_info_data,
{'Content-Type': 'application/x-www-form-urlencoded'})
flv_info_webpage = self._download_webpage(
flv_info_request, video_id,
note='Downloading flv info', errnote='Unable to download flv info')
video_real_url = compat_urlparse.parse_qs(flv_info_webpage)['url'][0] video_real_url = compat_urlparse.parse_qs(flv_info_webpage)['url'][0]
# Start extracting information # Start extracting information
video_title = video_info.find('.//title').text title = video_info.find('.//title').text
video_extension = video_info.find('.//movie_type').text extension = video_info.find('.//movie_type').text
video_format = video_extension.upper() video_format = extension.upper()
video_thumbnail = video_info.find('.//thumbnail_url').text thumbnail = video_info.find('.//thumbnail_url').text
video_description = video_info.find('.//description').text description = video_info.find('.//description').text
video_uploader_id = video_info.find('.//user_id').text upload_date = unified_strdate(video_info.find('.//first_retrieve').text.split('+')[0])
video_upload_date = unified_strdate(video_info.find('.//first_retrieve').text.split('+')[0]) view_count = int_or_none(video_info.find('.//view_counter').text)
video_view_count = video_info.find('.//view_counter').text comment_count = int_or_none(video_info.find('.//comment_num').text)
video_webpage_url = video_info.find('.//watch_url').text duration = parse_duration(video_info.find('.//length').text)
webpage_url = video_info.find('.//watch_url').text
# uploader if video_info.find('.//ch_id') is not None:
video_uploader = video_uploader_id uploader_id = video_info.find('.//ch_id').text
url = 'http://seiga.nicovideo.jp/api/user/info?id=' + video_uploader_id uploader = video_info.find('.//ch_name').text
try: elif video_info.find('.//user_id') is not None:
user_info = self._download_xml( uploader_id = video_info.find('.//user_id').text
url, video_id, note='Downloading user information') uploader = video_info.find('.//user_nickname').text
video_uploader = user_info.find('.//nickname').text else:
except ExtractorError as err: uploader_id = uploader = None
self._downloader.report_warning('Unable to download user info webpage: %s' % compat_str(err))
return { return {
'id': video_id, 'id': video_id,
'url': video_real_url, 'url': video_real_url,
'title': video_title, 'title': title,
'ext': video_extension, 'ext': extension,
'format': video_format, 'format': video_format,
'thumbnail': video_thumbnail, 'thumbnail': thumbnail,
'description': video_description, 'description': description,
'uploader': video_uploader, 'uploader': uploader,
'upload_date': video_upload_date, 'upload_date': upload_date,
'uploader_id': video_uploader_id, 'uploader_id': uploader_id,
'view_count': video_view_count, 'view_count': view_count,
'webpage_url': video_webpage_url, 'comment_count': comment_count,
'duration': duration,
'webpage_url': webpage_url,
} }

View File

@ -47,7 +47,7 @@ class NineGagIE(InfoExtractor):
webpage = self._download_webpage(url, display_id) webpage = self._download_webpage(url, display_id)
post_view = json.loads(self._html_search_regex( post_view = json.loads(self._html_search_regex(
r'var postView = new app\.PostView\({\s*post:\s*({.+?}),', webpage, 'post view')) r'var postView = new app\.PostView\({\s*post:\s*({.+?}),\s*posts:\s*prefetchedCurrentPost', webpage, 'post view'))
youtube_id = post_view['videoExternalId'] youtube_id = post_view['videoExternalId']
title = post_view['title'] title = post_view['title']

View File

@ -46,7 +46,7 @@ class PyvideoIE(InfoExtractor):
return self.url_result(m_youtube.group(1), 'Youtube') return self.url_result(m_youtube.group(1), 'Youtube')
title = self._html_search_regex( title = self._html_search_regex(
r'<div class="section">.*?<h3(?:\s+class="[^"]*")?>([^>]+?)</h3>', r'<div class="section">\s*<h3(?:\s+class="[^"]*"[^>]*)?>([^>]+?)</h3>',
webpage, 'title', flags=re.DOTALL) webpage, 'title', flags=re.DOTALL)
video_url = self._search_regex( video_url = self._search_regex(
[r'<source src="(.*?)"', r'<dt>Download</dt>.*?<a href="(.+?)"'], [r'<source src="(.*?)"', r'<dt>Download</dt>.*?<a href="(.+?)"'],

View File

@ -35,7 +35,8 @@ class RaiIE(SubtitlesInfoExtractor):
'description': '', 'description': '',
'upload_date': '20140612', 'upload_date': '20140612',
'duration': 1758, 'duration': 1758,
} },
'skip': 'Error 404',
}, },
{ {
'url': 'http://www.rainews.it/dl/rainews/media/state-of-the-net-Antonella-La-Carpia-regole-virali-7aafdea9-0e5d-49d5-88a6-7e65da67ae13.html', 'url': 'http://www.rainews.it/dl/rainews/media/state-of-the-net-Antonella-La-Carpia-regole-virali-7aafdea9-0e5d-49d5-88a6-7e65da67ae13.html',

View File

@ -0,0 +1,45 @@
from __future__ import unicode_literals
import re
import time
from .common import InfoExtractor
from ..utils import strip_jsonp
class ReverbNationIE(InfoExtractor):
_VALID_URL = r'^https?://(?:www\.)?reverbnation\.com/.*?/song/(?P<id>\d+).*?$'
_TESTS = [{
'url': 'http://www.reverbnation.com/alkilados/song/16965047-mona-lisa',
'file': '16965047.mp3',
'md5': '3da12ebca28c67c111a7f8b262d3f7a7',
'info_dict': {
"title": "MONA LISA",
"uploader": "ALKILADOS",
"uploader_id": 216429,
"thumbnail": "//gp1.wac.edgecastcdn.net/802892/production_public/Photo/13761700/image/1366002176_AVATAR_MONA_LISA.jpg"
},
}]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
song_id = mobj.group('id')
api_res = self._download_json(
'https://api.reverbnation.com/song/%s?callback=api_response_5&_=%d'
% (song_id, int(time.time() * 1000)),
song_id,
transform_source=strip_jsonp,
note='Downloading information of song %s' % song_id
)
return {
'id': song_id,
'title': api_res.get('name'),
'url': api_res.get('url'),
'uploader': api_res.get('artist', {}).get('name'),
'uploader_id': api_res.get('artist', {}).get('id'),
'thumbnail': api_res.get('image', api_res.get('thumbnail')),
'ext': 'mp3',
'vcodec': 'none',
}

View File

@ -0,0 +1,46 @@
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
import re
from .common import InfoExtractor
class RUHDIE(InfoExtractor):
_VALID_URL = r'http://(?:www\.)?ruhd\.ru/play\.php\?vid=(?P<id>\d+)'
_TEST = {
'url': 'http://www.ruhd.ru/play.php?vid=207',
'md5': 'd1a9ec4edf8598e3fbd92bb16072ba83',
'info_dict': {
'id': '207',
'ext': 'divx',
'title': 'КОТ бааааам',
'description': 'классный кот)',
'thumbnail': 're:^http://.*\.jpg$',
}
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id)
video_url = self._html_search_regex(
r'<param name="src" value="([^"]+)"', webpage, 'video url')
title = self._html_search_regex(
r'<title>([^<]+)&nbsp;&nbsp; RUHD.ru - Видео Высокого качества №1 в России!</title>', webpage, 'title')
description = self._html_search_regex(
r'(?s)<div id="longdesc">(.+?)<span id="showlink">', webpage, 'description', fatal=False)
thumbnail = self._html_search_regex(
r'<param name="previewImage" value="([^"]+)"', webpage, 'thumbnail', fatal=False)
if thumbnail:
thumbnail = 'http://www.ruhd.ru' + thumbnail
return {
'id': video_id,
'url': video_url,
'title': title,
'description': description,
'thumbnail': thumbnail,
}

View File

@ -0,0 +1,112 @@
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
ExtractorError,
compat_parse_qs,
compat_urllib_request,
)
class ScreencastIE(InfoExtractor):
_VALID_URL = r'https?://www\.screencast\.com/t/(?P<id>[a-zA-Z0-9]+)'
_TESTS = [{
'url': 'http://www.screencast.com/t/3ZEjQXlT',
'md5': '917df1c13798a3e96211dd1561fded83',
'info_dict': {
'id': '3ZEjQXlT',
'ext': 'm4v',
'title': 'Color Measurement with Ocean Optics Spectrometers',
'description': 'md5:240369cde69d8bed61349a199c5fb153',
'thumbnail': 're:^https?://.*\.(?:gif|jpg)$',
}
}, {
'url': 'http://www.screencast.com/t/V2uXehPJa1ZI',
'md5': 'e8e4b375a7660a9e7e35c33973410d34',
'info_dict': {
'id': 'V2uXehPJa1ZI',
'ext': 'mov',
'title': 'The Amadeus Spectrometer',
'description': 're:^In this video, our friends at.*To learn more about Amadeus, visit',
'thumbnail': 're:^https?://.*\.(?:gif|jpg)$',
}
}, {
'url': 'http://www.screencast.com/t/aAB3iowa',
'md5': 'dedb2734ed00c9755761ccaee88527cd',
'info_dict': {
'id': 'aAB3iowa',
'ext': 'mp4',
'title': 'Google Earth Export',
'description': 'Provides a demo of a CommunityViz export to Google Earth, one of the 3D viewing options.',
'thumbnail': 're:^https?://.*\.(?:gif|jpg)$',
}
}, {
'url': 'http://www.screencast.com/t/X3ddTrYh',
'md5': '669ee55ff9c51988b4ebc0877cc8b159',
'info_dict': {
'id': 'X3ddTrYh',
'ext': 'wmv',
'title': 'Toolkit 6 User Group Webinar (2014-03-04) - Default Judgment and First Impression',
'description': 'md5:7b9f393bc92af02326a5c5889639eab0',
'thumbnail': 're:^https?://.*\.(?:gif|jpg)$',
}
},
]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id)
video_url = self._html_search_regex(
r'<embed name="Video".*?src="([^"]+)"', webpage,
'QuickTime embed', default=None)
if video_url is None:
flash_vars_s = self._html_search_regex(
r'<param name="flashVars" value="([^"]+)"', webpage, 'flash vars',
default=None)
if not flash_vars_s:
flash_vars_s = self._html_search_regex(
r'<param name="initParams" value="([^"]+)"', webpage, 'flash vars',
default=None)
if flash_vars_s:
flash_vars_s = flash_vars_s.replace(',', '&')
if flash_vars_s:
flash_vars = compat_parse_qs(flash_vars_s)
video_url_raw = compat_urllib_request.quote(
flash_vars['content'][0])
video_url = video_url_raw.replace('http%3A', 'http:')
if video_url is None:
video_meta = self._html_search_meta(
'og:video', webpage, default=None)
if video_meta:
video_url = self._search_regex(
r'src=(.*?)(?:$|&)', video_meta,
'meta tag video URL', default=None)
if video_url is None:
raise ExtractorError('Cannot find video')
title = self._og_search_title(webpage, default=None)
if title is None:
title = self._html_search_regex(
[r'<b>Title:</b> ([^<]*)</div>',
r'class="tabSeperator">></span><span class="tabText">(.*?)<'],
webpage, 'title')
thumbnail = self._og_search_thumbnail(webpage)
description = self._og_search_description(webpage, default=None)
if description is None:
description = self._html_search_meta('description', webpage)
return {
'id': video_id,
'url': video_url,
'title': title,
'description': description,
'thumbnail': thumbnail,
}

View File

@ -255,7 +255,7 @@ class SoundcloudSetIE(SoundcloudIE):
class SoundcloudUserIE(SoundcloudIE): class SoundcloudUserIE(SoundcloudIE):
_VALID_URL = r'https?://(www\.)?soundcloud\.com/(?P<user>[^/]+)(/?(tracks/)?)?(\?.*)?$' _VALID_URL = r'https?://(www\.)?soundcloud\.com/(?P<user>[^/]+)/?((?P<rsrc>tracks|likes)/?)?(\?.*)?$'
IE_NAME = 'soundcloud:user' IE_NAME = 'soundcloud:user'
# it's in tests/test_playlists.py # it's in tests/test_playlists.py
@ -264,24 +264,31 @@ class SoundcloudUserIE(SoundcloudIE):
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
uploader = mobj.group('user') uploader = mobj.group('user')
resource = mobj.group('rsrc')
if resource is None:
resource = 'tracks'
elif resource == 'likes':
resource = 'favorites'
url = 'http://soundcloud.com/%s/' % uploader url = 'http://soundcloud.com/%s/' % uploader
resolv_url = self._resolv_url(url) resolv_url = self._resolv_url(url)
user = self._download_json( user = self._download_json(
resolv_url, uploader, 'Downloading user info') resolv_url, uploader, 'Downloading user info')
base_url = 'http://api.soundcloud.com/users/%s/tracks.json?' % uploader base_url = 'http://api.soundcloud.com/users/%s/%s.json?' % (uploader, resource)
entries = [] entries = []
for i in itertools.count(): for i in itertools.count():
data = compat_urllib_parse.urlencode({ data = compat_urllib_parse.urlencode({
'offset': i * 50, 'offset': i * 50,
'limit': 50,
'client_id': self._CLIENT_ID, 'client_id': self._CLIENT_ID,
}) })
new_entries = self._download_json( new_entries = self._download_json(
base_url + data, uploader, 'Downloading track page %s' % (i + 1)) base_url + data, uploader, 'Downloading track page %s' % (i + 1))
entries.extend(self._extract_info_dict(e, quiet=True) for e in new_entries) if len(new_entries) == 0:
if len(new_entries) < 50: self.to_screen('%s: End page received' % uploader)
break break
entries.extend(self._extract_info_dict(e, quiet=True) for e in new_entries)
return { return {
'_type': 'playlist', '_type': 'playlist',

View File

@ -3,24 +3,24 @@ from __future__ import unicode_literals
from .mtv import MTVServicesInfoExtractor from .mtv import MTVServicesInfoExtractor
class SouthParkStudiosIE(MTVServicesInfoExtractor): class SouthParkIE(MTVServicesInfoExtractor):
IE_NAME = 'southparkstudios.com' IE_NAME = 'southpark.cc.com'
_VALID_URL = r'https?://(www\.)?(?P<url>southparkstudios\.com/(clips|full-episodes)/(?P<id>.+?)(\?|#|$))' _VALID_URL = r'https?://(www\.)?(?P<url>southpark\.cc\.com/(clips|full-episodes)/(?P<id>.+?)(\?|#|$))'
_FEED_URL = 'http://www.southparkstudios.com/feeds/video-player/mrss' _FEED_URL = 'http://www.southparkstudios.com/feeds/video-player/mrss'
_TESTS = [{ _TESTS = [{
'url': 'http://www.southparkstudios.com/clips/104437/bat-daded#tab=featured', 'url': 'http://southpark.cc.com/clips/104437/bat-daded#tab=featured',
'info_dict': { 'info_dict': {
'id': 'a7bff6c2-ed00-11e0-aca6-0026b9414f30', 'id': 'a7bff6c2-ed00-11e0-aca6-0026b9414f30',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Bat Daded', 'title': 'South Park|Bat Daded',
'description': 'Randy disqualifies South Park by getting into a fight with Bat Dad.', 'description': 'Randy disqualifies South Park by getting into a fight with Bat Dad.',
}, },
}] }]
class SouthparkDeIE(SouthParkStudiosIE): class SouthparkDeIE(SouthParkIE):
IE_NAME = 'southpark.de' IE_NAME = 'southpark.de'
_VALID_URL = r'https?://(www\.)?(?P<url>southpark\.de/(clips|alle-episoden)/(?P<id>.+?)(\?|#|$))' _VALID_URL = r'https?://(www\.)?(?P<url>southpark\.de/(clips|alle-episoden)/(?P<id>.+?)(\?|#|$))'
_FEED_URL = 'http://www.southpark.de/feeds/video-player/mrss/' _FEED_URL = 'http://www.southpark.de/feeds/video-player/mrss/'

View File

@ -20,13 +20,13 @@ class TagesschauIE(InfoExtractor):
'thumbnail': 're:^http:.*\.jpg$', 'thumbnail': 're:^http:.*\.jpg$',
}, },
}, { }, {
'url': 'http://www.tagesschau.de/multimedia/video/video-196.html', 'url': 'http://www.tagesschau.de/multimedia/video/video-5964.html',
'md5': '8aaa8bf3ae1ca2652309718c03019128', 'md5': '66652566900963a3f962333579eeffcf',
'info_dict': { 'info_dict': {
'id': '196', 'id': '5964',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Ukraine-Konflikt: Klitschko in Kiew als Bürgermeister vereidigt', 'title': 'Nahost-Konflikt: Israel bombadiert Ziele im Gazastreifen und Westjordanland',
'description': 'md5:f22e4af75821d174fa6c977349682691', 'description': 'md5:07bfc78c48eec3145ed4805299a1900a',
'thumbnail': 're:http://.*\.jpg', 'thumbnail': 're:http://.*\.jpg',
}, },
}] }]

View File

@ -14,7 +14,7 @@ class TeacherTubeIE(InfoExtractor):
IE_NAME = 'teachertube' IE_NAME = 'teachertube'
IE_DESC = 'teachertube.com videos' IE_DESC = 'teachertube.com videos'
_VALID_URL = r'https?://(?:www\.)?teachertube\.com/(viewVideo\.php\?video_id=|music\.php\?music_id=|video/|audio/)(?P<id>\d+)' _VALID_URL = r'https?://(?:www\.)?teachertube\.com/(viewVideo\.php\?video_id=|music\.php\?music_id=|video/(?:[\da-z-]+-)?|audio/)(?P<id>\d+)'
_TESTS = [{ _TESTS = [{
'url': 'http://www.teachertube.com/viewVideo.php?video_id=339997', 'url': 'http://www.teachertube.com/viewVideo.php?video_id=339997',
@ -45,6 +45,15 @@ class TeacherTubeIE(InfoExtractor):
'title': 'PER ASPERA AD ASTRA', 'title': 'PER ASPERA AD ASTRA',
'description': 'RADIJSKA EMISIJA ZRAKOPLOVNE TEHNI?KE ?KOLE P', 'description': 'RADIJSKA EMISIJA ZRAKOPLOVNE TEHNI?KE ?KOLE P',
}, },
}, {
'url': 'http://www.teachertube.com/video/intro-video-schleicher-297790',
'md5': '9c79fbb2dd7154823996fc28d4a26998',
'info_dict': {
'id': '297790',
'ext': 'mp4',
'title': 'Intro Video - Schleicher',
'description': 'Intro Video - Why to flip, how flipping will',
},
}] }]
def _real_extract(self, url): def _real_extract(self, url):
@ -86,22 +95,30 @@ class TeacherTubeIE(InfoExtractor):
} }
class TeacherTubeClassroomIE(InfoExtractor): class TeacherTubeUserIE(InfoExtractor):
IE_NAME = 'teachertube:classroom' IE_NAME = 'teachertube:user:collection'
IE_DESC = 'teachertube.com online classrooms' IE_DESC = 'teachertube.com user and collection videos'
_VALID_URL = r'https?://(?:www\.)?teachertube\.com/view_classroom\.php\?user=(?P<user>[0-9a-zA-Z]+)' _VALID_URL = r'https?://(?:www\.)?teachertube\.com/(user/profile|collection)/(?P<user>[0-9a-zA-Z]+)/?'
_MEDIA_RE = r'(?s)"sidebar_thumb_time">[0-9:]+</div>.+?<a href="(https?://(?:www\.)?teachertube\.com/(?:video|audio)/[^"]+)">'
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
user_id = mobj.group('user') user_id = mobj.group('user')
rss = self._download_xml( urls = []
'http://www.teachertube.com/rssclassroom.php?mode=user&username=%s' % user_id, webpage = self._download_webpage(url, user_id)
user_id, 'Downloading classroom RSS') urls.extend(re.findall(self._MEDIA_RE, webpage))
pages = re.findall(r'/ajax-user/user-videos/%s\?page=([0-9]+)' % user_id, webpage)[1:-1]
for p in pages:
more = 'http://www.teachertube.com/ajax-user/user-videos/%s?page=%s' % (user_id, p)
webpage = self._download_webpage(more, user_id, 'Downloading page %s/%s' % (p, len(pages) + 1))
urls.extend(re.findall(self._MEDIA_RE, webpage))
entries = [] entries = []
for url in rss.findall('.//{http://search.yahoo.com/mrss/}player'): for url in urls:
entries.append(self.url_result(url.attrib['url'], 'TeacherTube')) entries.append(self.url_result(url, 'TeacherTube'))
return self.playlist_result(entries, user_id) return self.playlist_result(entries, user_id)

View File

@ -0,0 +1,84 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
class TenPlayIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?ten(play)?\.com\.au/.+'
_TEST = {
'url': 'http://tenplay.com.au/ten-insider/extra/season-2013/tenplay-tv-your-way',
#'md5': 'd68703d9f73dc8fccf3320ab34202590',
'info_dict': {
'id': '2695695426001',
'ext': 'flv',
'title': 'TENplay: TV your way',
'description': 'Welcome to a new TV experience. Enjoy a taste of the TENplay benefits.',
'timestamp': 1380150606.889,
'upload_date': '20130925',
'uploader': 'TENplay',
},
'params': {
'skip_download': True, # Requires rtmpdump
}
}
_video_fields = [
"id", "name", "shortDescription", "longDescription", "creationDate",
"publishedDate", "lastModifiedDate", "customFields", "videoStillURL",
"thumbnailURL", "referenceId", "length", "playsTotal",
"playsTrailingWeek", "renditions", "captioning", "startDate", "endDate"]
def _real_extract(self, url):
webpage = self._download_webpage(url, url)
video_id = self._html_search_regex(
r'videoID: "(\d+?)"', webpage, 'video_id')
api_token = self._html_search_regex(
r'apiToken: "([a-zA-Z0-9-_\.]+?)"', webpage, 'api_token')
title = self._html_search_regex(
r'<meta property="og:title" content="\s*(.*?)\s*"\s*/?\s*>',
webpage, 'title')
json = self._download_json('https://api.brightcove.com/services/library?command=find_video_by_id&video_id=%s&token=%s&video_fields=%s' % (video_id, api_token, ','.join(self._video_fields)), title)
formats = []
for rendition in json['renditions']:
url = rendition['remoteUrl'] or rendition['url']
protocol = 'rtmp' if url.startswith('rtmp') else 'http'
ext = 'flv' if protocol == 'rtmp' else rendition['videoContainer'].lower()
if protocol == 'rtmp':
url = url.replace('&mp4:', '')
formats.append({
'format_id': '_'.join(['rtmp', rendition['videoContainer'].lower(), rendition['videoCodec'].lower()]),
'width': rendition['frameWidth'],
'height': rendition['frameHeight'],
'tbr': rendition['encodingRate'] / 1024,
'filesize': rendition['size'],
'protocol': protocol,
'ext': ext,
'vcodec': rendition['videoCodec'].lower(),
'container': rendition['videoContainer'].lower(),
'url': url,
})
return {
'id': video_id,
'display_id': json['referenceId'],
'title': json['name'],
'description': json['shortDescription'] or json['longDescription'],
'formats': formats,
'thumbnails': [{
'url': json['videoStillURL']
}, {
'url': json['thumbnailURL']
}],
'thumbnail': json['videoStillURL'],
'duration': json['length'] / 1000,
'timestamp': float(json['creationDate']) / 1000,
'uploader': json['customFields']['production_company_distributor'] if 'production_company_distributor' in json['customFields'] else 'TENplay',
'view_count': json['playsTotal']
}

View File

@ -5,6 +5,7 @@ import re
from .common import InfoExtractor from .common import InfoExtractor
from .brightcove import BrightcoveIE from .brightcove import BrightcoveIE
from .discovery import DiscoveryIE from .discovery import DiscoveryIE
from ..utils import compat_urlparse
class TlcIE(DiscoveryIE): class TlcIE(DiscoveryIE):
@ -51,6 +52,10 @@ class TlcDeIE(InfoExtractor):
# Otherwise we don't get the correct 'BrightcoveExperience' element, # Otherwise we don't get the correct 'BrightcoveExperience' element,
# example: http://www.tlc.de/sendungen/cake-boss/videos/cake-boss-cannoli-drama/ # example: http://www.tlc.de/sendungen/cake-boss/videos/cake-boss-cannoli-drama/
iframe_url = iframe_url.replace('.htm?', '.php?') iframe_url = iframe_url.replace('.htm?', '.php?')
url_fragment = compat_urlparse.urlparse(url).fragment
if url_fragment:
# Since the fragment is not send to the server, we always get the same iframe
iframe_url = re.sub(r'playlist=(\d+)', 'playlist=%s' % url_fragment, iframe_url)
iframe = self._download_webpage(iframe_url, title) iframe = self._download_webpage(iframe_url, title)
return { return {

View File

@ -49,6 +49,7 @@ class VeohIE(InfoExtractor):
'description': 'md5:f5a11c51f8fb51d2315bca0937526891', 'description': 'md5:f5a11c51f8fb51d2315bca0937526891',
'uploader': 'newsy-videos', 'uploader': 'newsy-videos',
}, },
'skip': 'This video has been deleted.',
}, },
] ]

View File

@ -0,0 +1,86 @@
# coding: utf-8
from __future__ import unicode_literals
import base64
import re
import xml.etree.ElementTree
import zlib
from .common import InfoExtractor
from ..utils import int_or_none
class VimpleIE(InfoExtractor):
IE_DESC = 'Vimple.ru'
_VALID_URL = r'https?://(player.vimple.ru/iframe|vimple.ru)/(?P<id>[a-f0-9]{10,})'
_TESTS = [
# Quality: Large, from iframe
{
'url': 'http://player.vimple.ru/iframe/b132bdfd71b546d3972f9ab9a25f201c',
'info_dict': {
'id': 'b132bdfd71b546d3972f9ab9a25f201c',
'title': 'great-escape-minecraft.flv',
'ext': 'mp4',
'duration': 352,
'webpage_url': 'http://vimple.ru/b132bdfd71b546d3972f9ab9a25f201c',
},
},
# Quality: Medium, from mainpage
{
'url': 'http://vimple.ru/a15950562888453b8e6f9572dc8600cd',
'info_dict': {
'id': 'a15950562888453b8e6f9572dc8600cd',
'title': 'DB 01',
'ext': 'flv',
'duration': 1484,
'webpage_url': 'http://vimple.ru/a15950562888453b8e6f9572dc8600cd',
}
},
]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
iframe_url = 'http://player.vimple.ru/iframe/%s' % video_id
iframe = self._download_webpage(
iframe_url, video_id,
note='Downloading iframe', errnote='unable to fetch iframe')
player_url = self._html_search_regex(
r'"(http://player.vimple.ru/flash/.+?)"', iframe, 'player url')
player = self._request_webpage(
player_url, video_id, note='Downloading swf player').read()
player = zlib.decompress(player[8:])
xml_pieces = re.findall(b'([a-zA-Z0-9 =+/]{500})', player)
xml_pieces = [piece[1:-1] for piece in xml_pieces]
xml_data = b''.join(xml_pieces)
xml_data = base64.b64decode(xml_data)
xml_data = xml.etree.ElementTree.fromstring(xml_data)
video = xml_data.find('Video')
quality = video.get('quality')
q_tag = video.find(quality.capitalize())
formats = [
{
'url': q_tag.get('url'),
'tbr': int(q_tag.get('bitrate')),
'filesize': int(q_tag.get('filesize')),
'format_id': quality,
},
]
return {
'id': video_id,
'title': video.find('Title').text,
'formats': formats,
'thumbnail': video.find('Poster').get('url'),
'duration': int_or_none(video.get('duration')),
'webpage_url': video.find('Share').get('videoPageUrl'),
}

View File

@ -16,7 +16,7 @@ from ..utils import (
class VKIE(InfoExtractor): class VKIE(InfoExtractor):
IE_NAME = 'vk.com' IE_NAME = 'vk.com'
_VALID_URL = r'https?://vk\.com/(?:video_ext\.php\?.*?\boid=(?P<oid>-?\d+).*?\bid=(?P<id>\d+)|(?:videos.*?\?.*?z=)?video(?P<videoid>.*?)(?:\?|%2F|$))' _VALID_URL = r'https?://(?:m\.)?vk\.com/(?:video_ext\.php\?.*?\boid=(?P<oid>-?\d+).*?\bid=(?P<id>\d+)|(?:.+?\?.*?z=)?video(?P<videoid>.*?)(?:\?|%2F|$))'
_NETRC_MACHINE = 'vk' _NETRC_MACHINE = 'vk'
_TESTS = [ _TESTS = [
@ -62,11 +62,47 @@ class VKIE(InfoExtractor):
'id': '164049491', 'id': '164049491',
'ext': 'mp4', 'ext': 'mp4',
'uploader': 'Триллеры', 'uploader': 'Триллеры',
'title': '► Бойцовский клуб / Fight Club 1999 [HD 720]\u00a0', 'title': '► Бойцовский клуб / Fight Club 1999 [HD 720]',
'duration': 8352, 'duration': 8352,
}, },
'skip': 'Requires vk account credentials', 'skip': 'Requires vk account credentials',
}, },
{
'url': 'http://vk.com/feed?z=video-43215063_166094326%2Fbb50cacd3177146d7a',
'md5': 'd82c22e449f036282d1d3f7f4d276869',
'info_dict': {
'id': '166094326',
'ext': 'mp4',
'uploader': 'Киномания - лучшее из мира кино',
'title': 'Запах женщины (1992)',
'duration': 9392,
},
'skip': 'Requires vk account credentials',
},
{
'url': 'http://vk.com/hd_kino_mania?z=video-43215063_168067957%2F15c66b9b533119788d',
'md5': '4d7a5ef8cf114dfa09577e57b2993202',
'info_dict': {
'id': '168067957',
'ext': 'mp4',
'uploader': 'Киномания - лучшее из мира кино',
'title': ' ',
'duration': 7291,
},
'skip': 'Requires vk account credentials',
},
{
'url': 'http://m.vk.com/video-43215063_169084319?list=125c627d1aa1cebb83&from=wall-43215063_2566540',
'md5': '0c45586baa71b7cb1d0784ee3f4e00a6',
'note': 'ivi.ru embed',
'info_dict': {
'id': '60690',
'ext': 'mp4',
'title': 'Книга Илая',
'duration': 6771,
},
'skip': 'Only works from Russia',
},
] ]
def _login(self): def _login(self):
@ -110,6 +146,16 @@ class VKIE(InfoExtractor):
if m_yt is not None: if m_yt is not None:
self.to_screen('Youtube video detected') self.to_screen('Youtube video detected')
return self.url_result(m_yt.group(1), 'Youtube') return self.url_result(m_yt.group(1), 'Youtube')
m_opts = re.search(r'(?s)var\s+opts\s*=\s*({.*?});', info_page)
if m_opts:
m_opts_url = re.search(r"url\s*:\s*'([^']+)", m_opts.group(1))
if m_opts_url:
opts_url = m_opts_url.group(1)
if opts_url.startswith('//'):
opts_url = 'http:' + opts_url
return self.url_result(opts_url)
data_json = self._search_regex(r'var vars = ({.*?});', info_page, 'vars') data_json = self._search_regex(r'var vars = ({.*?});', info_page, 'vars')
data = json.loads(data_json) data = json.loads(data_json)

View File

@ -0,0 +1,63 @@
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
compat_urllib_parse,
compat_urllib_request,
)
class VodlockerIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?vodlocker.com/(?P<id>[0-9a-zA-Z]+)(?:\..*?)?'
_TESTS = [{
'url': 'http://vodlocker.com/e8wvyzz4sl42',
'md5': 'ce0c2d18fa0735f1bd91b69b0e54aacf',
'info_dict': {
'id': 'e8wvyzz4sl42',
'ext': 'mp4',
'title': 'Germany vs Brazil',
'thumbnail': 're:http://.*\.jpg',
},
}]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id)
fields = dict(re.findall(r'''(?x)<input\s+
type="hidden"\s+
name="([^"]+)"\s+
(?:id="[^"]+"\s+)?
value="([^"]*)"
''', webpage))
if fields['op'] == 'download1':
self._sleep(3, video_id) # they do detect when requests happen too fast!
post = compat_urllib_parse.urlencode(fields)
req = compat_urllib_request.Request(url, post)
req.add_header('Content-type', 'application/x-www-form-urlencoded')
webpage = self._download_webpage(
req, video_id, 'Downloading video page')
title = self._search_regex(
r'id="file_title".*?>\s*(.*?)\s*<span', webpage, 'title')
thumbnail = self._search_regex(
r'image:\s*"(http[^\"]+)",', webpage, 'thumbnail')
url = self._search_regex(
r'file:\s*"(http[^\"]+)",', webpage, 'file url')
formats = [{
'format_id': 'sd',
'url': url,
}]
return {
'id': video_id,
'title': title,
'thumbnail': thumbnail,
'formats': formats,
}

View File

@ -1,3 +1,4 @@
# -*- coding: utf-8 -*-
from __future__ import unicode_literals from __future__ import unicode_literals
import re import re
@ -54,14 +55,14 @@ class WDRIE(InfoExtractor):
}, },
}, },
{ {
'url': 'http://www.funkhauseuropa.de/av/audiogrenzenlosleckerbaklava101-audioplayer.html', 'url': 'http://www.funkhauseuropa.de/av/audiosuepersongsoulbossanova100-audioplayer.html',
'md5': 'cfff440d4ee64114083ac44676df5d15', 'md5': '24e83813e832badb0a8d7d1ef9ef0691',
'info_dict': { 'info_dict': {
'id': 'mdb-363068', 'id': 'mdb-463528',
'ext': 'mp3', 'ext': 'mp3',
'title': 'Grenzenlos lecker - Baklava', 'title': 'Süpersong: Soul Bossa Nova',
'description': 'md5:7b29e97e10dfb6e265238b32fa35b23a', 'description': 'md5:7b29e97e10dfb6e265238b32fa35b23a',
'upload_date': '20140311', 'upload_date': '20140630',
}, },
}, },
] ]
@ -127,9 +128,10 @@ class WDRMobileIE(InfoExtractor):
'info_dict': { 'info_dict': {
'title': '4283021', 'title': '4283021',
'id': '421735', 'id': '421735',
'ext': 'mp4',
'age_limit': 0, 'age_limit': 0,
}, },
'_skip': 'Will be depublicized shortly' 'skip': 'Problems with loading data.'
} }
def _real_extract(self, url): def _real_extract(self, url):
@ -139,6 +141,7 @@ class WDRMobileIE(InfoExtractor):
'title': mobj.group('title'), 'title': mobj.group('title'),
'age_limit': int(mobj.group('age_limit')), 'age_limit': int(mobj.group('age_limit')),
'url': url, 'url': url,
'ext': determine_ext(url),
'user_agent': 'mobile', 'user_agent': 'mobile',
} }

View File

@ -865,71 +865,26 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
def _decrypt_signature(self, s, video_id, player_url, age_gate=False): def _decrypt_signature(self, s, video_id, player_url, age_gate=False):
"""Turn the encrypted s field into a working signature""" """Turn the encrypted s field into a working signature"""
if player_url is not None: if player_url is None:
if player_url.startswith(u'//'): raise ExtractorError(u'Cannot decrypt signature without player_url')
player_url = u'https:' + player_url
try:
player_id = (player_url, len(s))
if player_id not in self._player_cache:
func = self._extract_signature_function(
video_id, player_url, len(s)
)
self._player_cache[player_id] = func
func = self._player_cache[player_id]
if self._downloader.params.get('youtube_print_sig_code'):
self._print_sig_code(func, len(s))
return func(s)
except Exception:
tb = traceback.format_exc()
self._downloader.report_warning(
u'Automatic signature extraction failed: ' + tb)
self._downloader.report_warning( if player_url.startswith(u'//'):
u'Warning: Falling back to static signature algorithm') player_url = u'https:' + player_url
try:
return self._static_decrypt_signature( player_id = (player_url, len(s))
s, video_id, player_url, age_gate) if player_id not in self._player_cache:
func = self._extract_signature_function(
def _static_decrypt_signature(self, s, video_id, player_url, age_gate): video_id, player_url, len(s)
if age_gate: )
# The videos with age protection use another player, so the self._player_cache[player_id] = func
# algorithms can be different. func = self._player_cache[player_id]
if len(s) == 86: if self._downloader.params.get('youtube_print_sig_code'):
return s[2:63] + s[82] + s[64:82] + s[63] self._print_sig_code(func, len(s))
return func(s)
if len(s) == 93: except Exception as e:
return s[86:29:-1] + s[88] + s[28:5:-1] tb = traceback.format_exc()
elif len(s) == 92: raise ExtractorError(
return s[25] + s[3:25] + s[0] + s[26:42] + s[79] + s[43:79] + s[91] + s[80:83] u'Automatic signature extraction failed: ' + tb, cause=e)
elif len(s) == 91:
return s[84:27:-1] + s[86] + s[26:5:-1]
elif len(s) == 90:
return s[25] + s[3:25] + s[2] + s[26:40] + s[77] + s[41:77] + s[89] + s[78:81]
elif len(s) == 89:
return s[84:78:-1] + s[87] + s[77:60:-1] + s[0] + s[59:3:-1]
elif len(s) == 88:
return s[7:28] + s[87] + s[29:45] + s[55] + s[46:55] + s[2] + s[56:87] + s[28]
elif len(s) == 87:
return s[6:27] + s[4] + s[28:39] + s[27] + s[40:59] + s[2] + s[60:]
elif len(s) == 86:
return s[80:72:-1] + s[16] + s[71:39:-1] + s[72] + s[38:16:-1] + s[82] + s[15::-1]
elif len(s) == 85:
return s[3:11] + s[0] + s[12:55] + s[84] + s[56:84]
elif len(s) == 84:
return s[78:70:-1] + s[14] + s[69:37:-1] + s[70] + s[36:14:-1] + s[80] + s[:14][::-1]
elif len(s) == 83:
return s[80:63:-1] + s[0] + s[62:0:-1] + s[63]
elif len(s) == 82:
return s[80:37:-1] + s[7] + s[36:7:-1] + s[0] + s[6:0:-1] + s[37]
elif len(s) == 81:
return s[56] + s[79:56:-1] + s[41] + s[55:41:-1] + s[80] + s[40:34:-1] + s[0] + s[33:29:-1] + s[34] + s[28:9:-1] + s[29] + s[8:0:-1] + s[9]
elif len(s) == 80:
return s[1:19] + s[0] + s[20:68] + s[19] + s[69:80]
elif len(s) == 79:
return s[54] + s[77:54:-1] + s[39] + s[53:39:-1] + s[78] + s[38:34:-1] + s[0] + s[33:29:-1] + s[34] + s[28:9:-1] + s[29] + s[8:0:-1] + s[9]
else:
raise ExtractorError(u'Unable to decrypt signature, key length %d not supported; retrying might work' % (len(s)))
def _get_available_subtitles(self, video_id, webpage): def _get_available_subtitles(self, video_id, webpage):
try: try:
@ -1698,14 +1653,14 @@ class YoutubeSearchURLIE(InfoExtractor):
webpage = self._download_webpage(url, query) webpage = self._download_webpage(url, query)
result_code = self._search_regex( result_code = self._search_regex(
r'(?s)<ol id="search-results"(.*?)</ol>', webpage, u'result HTML') r'(?s)<ol class="item-section"(.*?)</ol>', webpage, u'result HTML')
part_codes = re.findall( part_codes = re.findall(
r'(?s)<h3 class="yt-lockup-title">(.*?)</h3>', result_code) r'(?s)<h3 class="yt-lockup-title">(.*?)</h3>', result_code)
entries = [] entries = []
for part_code in part_codes: for part_code in part_codes:
part_title = self._html_search_regex( part_title = self._html_search_regex(
r'(?s)title="([^"]+)"', part_code, 'item title', fatal=False) [r'(?s)title="([^"]+)"', r'>([^<]+)</a>'], part_code, 'item title', fatal=False)
part_url_snippet = self._html_search_regex( part_url_snippet = self._html_search_regex(
r'(?s)href="([^"]+)"', part_code, 'item URL') r'(?s)href="([^"]+)"', part_code, 'item URL')
part_url = compat_urlparse.urljoin( part_url = compat_urlparse.urljoin(
@ -1825,10 +1780,21 @@ class YoutubeTruncatedURLIE(InfoExtractor):
IE_NAME = 'youtube:truncated_url' IE_NAME = 'youtube:truncated_url'
IE_DESC = False # Do not list IE_DESC = False # Do not list
_VALID_URL = r'''(?x) _VALID_URL = r'''(?x)
(?:https?://)?[^/]+/watch\?(?:feature=[a-z_]+)?$| (?:https?://)?[^/]+/watch\?(?:
feature=[a-z_]+|
annotation_id=annotation_[^&]+
)?$|
(?:https?://)?(?:www\.)?youtube\.com/attribution_link\?a=[^&]+$ (?:https?://)?(?:www\.)?youtube\.com/attribution_link\?a=[^&]+$
''' '''
_TESTS = [{
'url': 'http://www.youtube.com/watch?annotation_id=annotation_3951667041',
'only_matching': True,
}, {
'url': 'http://www.youtube.com/watch?',
'only_matching': True,
}]
def _real_extract(self, url): def _real_extract(self, url):
raise ExtractorError( raise ExtractorError(
u'Did you forget to quote the URL? Remember that & is a meta ' u'Did you forget to quote the URL? Remember that & is a meta '

View File

@ -59,7 +59,7 @@ class JSInterpreter(object):
if member == 'split("")': if member == 'split("")':
return list(val) return list(val)
if member == 'join("")': if member == 'join("")':
return u''.join(val) return ''.join(val)
if member == 'length': if member == 'length':
return len(val) return len(val)
if member == 'reverse()': if member == 'reverse()':
@ -99,7 +99,7 @@ class JSInterpreter(object):
def extract_function(self, funcname): def extract_function(self, funcname):
func_m = re.search( func_m = re.search(
(r'(?:function %s|%s\s*=\s*function)' % ( (r'(?:function %s|[{;]%s\s*=\s*function)' % (
re.escape(funcname), re.escape(funcname))) + re.escape(funcname), re.escape(funcname))) +
r'\((?P<args>[a-z,]+)\){(?P<code>[^}]+)}', r'\((?P<args>[a-z,]+)\){(?P<code>[^}]+)}',
self.code) self.code)

View File

@ -775,7 +775,7 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
https_response = http_response https_response = http_response
def parse_iso8601(date_str): def parse_iso8601(date_str, delimiter='T'):
""" Return a UNIX timestamp from the given date """ """ Return a UNIX timestamp from the given date """
if date_str is None: if date_str is None:
@ -795,8 +795,8 @@ def parse_iso8601(date_str):
timezone = datetime.timedelta( timezone = datetime.timedelta(
hours=sign * int(m.group('hours')), hours=sign * int(m.group('hours')),
minutes=sign * int(m.group('minutes'))) minutes=sign * int(m.group('minutes')))
date_format = '%Y-%m-%d{0}%H:%M:%S'.format(delimiter)
dt = datetime.datetime.strptime(date_str, '%Y-%m-%dT%H:%M:%S') - timezone dt = datetime.datetime.strptime(date_str, date_format) - timezone
return calendar.timegm(dt.timetuple()) return calendar.timegm(dt.timetuple())
@ -1428,7 +1428,7 @@ US_RATINGS = {
def strip_jsonp(code): def strip_jsonp(code):
return re.sub(r'(?s)^[a-zA-Z_]+\s*\(\s*(.*)\);\s*?\s*$', r'\1', code) return re.sub(r'(?s)^[a-zA-Z0-9_]+\s*\(\s*(.*)\);?\s*?\s*$', r'\1', code)
def qualities(quality_ids): def qualities(quality_ids):

View File

@ -1,2 +1,2 @@
__version__ = '2014.06.26' __version__ = '2014.07.11.3'