Merge branch 'master' into use-other-downloaders

This commit is contained in:
Rogério Brito 2013-08-07 23:34:38 -03:00
commit bac1eb8b36
24 changed files with 568 additions and 155 deletions

View File

@ -17,18 +17,18 @@ tests = [
# 87 - vflART1Nf 2013/07/24 # 87 - vflART1Nf 2013/07/24
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$^&*()_-+={[]}|:;?/>.<", ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$^&*()_-+={[]}|:;?/>.<",
"tyuioplkjhgfdsazxcv<nm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$^&*()_-+={[]}|:;?/>"), "tyuioplkjhgfdsazxcv<nm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$^&*()_-+={[]}|:;?/>"),
# 86 - vfl_ymO4Z 2013/06/27 # 86 - vflm_D8eE 2013/07/31
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[|};?/>.<", ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[|};?/>.<",
"ertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!/#$%^&*()_-+={[|};?@"), ">.1}|[{=+-_)(*&^%$#@!MNBVCXZASDFGHJK<POIUYTREW509876L432/mnbvcxzasdfghjklpoiuytre"),
# 85 - vflSAFCP9 2013/07/19 # 85 - vflSAFCP9 2013/07/19
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?/>.<", ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?/>.<",
"ertyuiqplkjhgfdsazx$vbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#<%^&*()_-+={[};?/c"), "ertyuiqplkjhgfdsazx$vbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#<%^&*()_-+={[};?/c"),
# 84 # 84
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?>.<", ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?>.<",
"<.>?;}[{=+-_)(*&^%$#@!MNBVCXZASDFGHJKLPOIUYTREWe098765432rmnbvcxzasdfghjklpoiuyt1"), "<.>?;}[{=+-_)(*&^%$#@!MNBVCXZASDFGHJKLPOIUYTREWe098765432rmnbvcxzasdfghjklpoiuyt1"),
# 83 - vflcaqGO8 2013/07/11 # 83 - vflTWC9KW 2013/08/01
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!#$%^&*()_+={[};?/>.<", ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!#$%^&*()_+={[};?/>.<",
"urty8ioplkjhgfdsazxcvbqm1234567S90QWERTYUIOPLKJHGFDnAZXCVBNM!#$%^&*()_+={[};?/>.<"), "qwertyuioplkjhg>dsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!#$%^&*()_+={[};?/f"),
# 82 # 82
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKHGFDSAZXCVBNM!@#$%^&*(-+={[};?/>.<", ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKHGFDSAZXCVBNM!@#$%^&*(-+={[};?/>.<",
"Q>/?;}[{=+-(*<^%$#@!MNBVCXZASDFGHKLPOIUY8REWT0q&7654321mnbvcxzasdfghjklpoiuytrew9"), "Q>/?;}[{=+-(*<^%$#@!MNBVCXZASDFGHKLPOIUY8REWT0q&7654321mnbvcxzasdfghjklpoiuytrew9"),
@ -40,6 +40,12 @@ tests = [
"Z?;}[{=+-(*&^%$#@!MNBVCXRASDFGHKLPOIUYT/EWQ0q87659321mnbvcxzasdfghjkl4oiuytrewp"), "Z?;}[{=+-(*&^%$#@!MNBVCXRASDFGHKLPOIUYT/EWQ0q87659321mnbvcxzasdfghjkl4oiuytrewp"),
] ]
tests_age_gate = [
# 86 - vflqinMWD
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[|};?/>.<",
"ertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!/#$%^&*()_-+={[|};?@"),
]
def find_matching(wrong, right): def find_matching(wrong, right):
idxs = [wrong.index(c) for c in right] idxs = [wrong.index(c) for c in right]
return compress(idxs) return compress(idxs)
@ -90,6 +96,8 @@ def genall(tests):
def main(): def main():
print(genall(tests)) print(genall(tests))
print(u' Age gate:')
print(genall(tests_age_gate))
if __name__ == '__main__': if __name__ == '__main__':
main() main()

38
test/test_playlists.py Normal file
View File

@ -0,0 +1,38 @@
#!/usr/bin/env python
import sys
import unittest
import json
# Allow direct execution
import os
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from youtube_dl.extractor import DailymotionPlaylistIE, VimeoChannelIE
from youtube_dl.utils import *
from helper import FakeYDL
class TestPlaylists(unittest.TestCase):
def assertIsPlaylist(self, info):
"""Make sure the info has '_type' set to 'playlist'"""
self.assertEqual(info['_type'], 'playlist')
def test_dailymotion_playlist(self):
dl = FakeYDL()
ie = DailymotionPlaylistIE(dl)
result = ie.extract('http://www.dailymotion.com/playlist/xv4bw_nqtv_sport/1#video=xl8v3q')
self.assertIsPlaylist(result)
self.assertEqual(result['title'], u'SPORT')
self.assertTrue(len(result['entries']) > 20)
def test_vimeo_channel(self):
dl = FakeYDL()
ie = VimeoChannelIE(dl)
result = ie.extract('http://vimeo.com/channels/tributes')
self.assertIsPlaylist(result)
self.assertEqual(result['title'], u'Vimeo Tributes')
self.assertTrue(len(result['entries']) > 24)
if __name__ == '__main__':
unittest.main()

View File

@ -1,72 +0,0 @@
#!/usr/bin/env python
import unittest
import sys
# Allow direct execution
import os
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from youtube_dl.extractor.youtube import YoutubeIE
from helper import FakeYDL
sig = YoutubeIE(FakeYDL())._decrypt_signature
class TestYoutubeSig(unittest.TestCase):
def test_92(self):
wrong = "F9F9B6E6FD47029957AB911A964CC20D95A181A5D37A2DBEFD67D403DB0E8BE4F4910053E4E8A79.0B70B.0B80B8"
right = "69B6E6FD47029957AB911A9F4CC20D95A181A5D3.A2DBEFD67D403DB0E8BE4F4910053E4E8A7980B7"
self.assertEqual(sig(wrong), right)
def test_90(self):
wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[]}|:;?/>.<'`"
right = "mrtyuioplkjhgfdsazxcvbne1234567890QWER[YUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={`]}|"
self.assertEqual(sig(wrong), right)
def test_88(self):
wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[]}|:;?/>.<"
right = "J:|}][{=+-_)(*&;%$#@>MNBVCXZASDFGH^KLPOIUYTREWQ0987654321mnbvcxzasdfghrklpoiuytej"
self.assertEqual(sig(wrong), right)
def test_87(self):
wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$^&*()_-+={[]}|:;?/>.<"
right = "tyuioplkjhgfdsazxcv<nm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$^&*()_-+={[]}|:;?/>"
self.assertEqual(sig(wrong), right)
def test_86(self):
wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[|};?/>.<"
right = "ertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!/#$%^&*()_-+={[|};?@"
self.assertEqual(sig(wrong), right)
def test_85(self):
wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?/>.<"
right = "ertyuiqplkjhgfdsazx$vbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#<%^&*()_-+={[};?/c"
self.assertEqual(sig(wrong), right)
def test_84(self):
wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?>.<"
right = "<.>?;}[{=+-_)(*&^%$#@!MNBVCXZASDFGHJKLPOIUYTREWe098765432rmnbvcxzasdfghjklpoiuyt1"
self.assertEqual(sig(wrong), right)
def test_83(self):
wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!#$%^&*()_+={[};?/>.<"
right = "urty8ioplkjhgfdsazxcvbqm1234567S90QWERTYUIOPLKJHGFDnAZXCVBNM!#$%^&*()_+={[};?/>.<"
self.assertEqual(sig(wrong), right)
def test_82(self):
wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKHGFDSAZXCVBNM!@#$%^&*(-+={[};?/>.<"
right = "Q>/?;}[{=+-(*<^%$#@!MNBVCXZASDFGHKLPOIUY8REWT0q&7654321mnbvcxzasdfghjklpoiuytrew9"
self.assertEqual(sig(wrong), right)
def test_81(self):
wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKHGFDSAZXCVBNM!@#$%^&*(-+={[};?/>."
right = "C>/?;}[{=+-(*&^%$#@!MNBVYXZASDFGHKLPOIU.TREWQ0q87659321mnbvcxzasdfghjkl4oiuytrewp"
self.assertEqual(sig(wrong), right)
def test_79(self):
wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKHGFDSAZXCVBNM!@#$%^&*(-+={[};?/"
right = "Z?;}[{=+-(*&^%$#@!MNBVCXRASDFGHKLPOIUYT/EWQ0q87659321mnbvcxzasdfghjkl4oiuytrewp"
self.assertEqual(sig(wrong), right)
if __name__ == '__main__':
unittest.main()

View File

@ -386,6 +386,35 @@ class FileDownloader(object):
self.report_error(u'mplayer exited with code %d' % retval) self.report_error(u'mplayer exited with code %d' % retval)
return False return False
def _download_m3u8_with_ffmpeg(self, filename, url):
self.report_destination(filename)
tmpfilename = self.temp_name(filename)
args = ['ffmpeg', '-y', '-i', url, '-f', 'mp4', tmpfilename]
# Check for ffmpeg first
try:
subprocess.call(['ffmpeg', '-h'], stdout=(open(os.path.devnull, 'w')), stderr=subprocess.STDOUT)
except (OSError, IOError):
self.report_error(u'm3u8 download detected but "%s" could not be run' % args[0] )
return False
retval = subprocess.call(args)
if retval == 0:
fsize = os.path.getsize(encodeFilename(tmpfilename))
self.to_screen(u'\r[%s] %s bytes' % (args[0], fsize))
self.try_rename(tmpfilename, filename)
self._hook_progress({
'downloaded_bytes': fsize,
'total_bytes': fsize,
'filename': filename,
'status': 'finished',
})
return True
else:
self.to_stderr(u"\n")
self.report_error(u'ffmpeg exited with code %d' % retval)
return False
def _do_download(self, filename, info_dict): def _do_download(self, filename, info_dict):
url = info_dict['url'] url = info_dict['url']
@ -411,6 +440,10 @@ class FileDownloader(object):
if url.startswith('mms') or url.startswith('rtsp'): if url.startswith('mms') or url.startswith('rtsp'):
return self._download_with_mplayer(filename, url) return self._download_with_mplayer(filename, url)
# m3u8 manifest are downloaded with ffmpeg
if determine_ext(url) == u'm3u8':
return self._download_m3u8_with_ffmpeg(filename, url)
tmpfilename = self.temp_name(filename) tmpfilename = self.temp_name(filename)
stream = None stream = None

View File

@ -100,7 +100,8 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor):
self._nopostoverwrites = nopostoverwrites self._nopostoverwrites = nopostoverwrites
def get_audio_codec(self, path): def get_audio_codec(self, path):
if not self._exes['ffprobe'] and not self._exes['avprobe']: return None if not self._exes['ffprobe'] and not self._exes['avprobe']:
raise PostProcessingError(u'ffprobe or avprobe not found. Please install one.')
try: try:
cmd = [self._exes['avprobe'] or self._exes['ffprobe'], '-show_streams', encodeFilename(self._ffmpeg_filename_argument(path))] cmd = [self._exes['avprobe'] or self._exes['ffprobe'], '-show_streams', encodeFilename(self._ffmpeg_filename_argument(path))]
handle = subprocess.Popen(cmd, stderr=compat_subprocess_get_DEVNULL(), stdout=subprocess.PIPE) handle = subprocess.Popen(cmd, stderr=compat_subprocess_get_DEVNULL(), stdout=subprocess.PIPE)
@ -208,7 +209,7 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor):
try: try:
os.utime(encodeFilename(new_path), (time.time(), information['filetime'])) os.utime(encodeFilename(new_path), (time.time(), information['filetime']))
except: except:
self._downloader.to_stderr(u'WARNING: Cannot update utime of audio file') self._downloader.report_warning(u'Cannot update utime of audio file')
information['filepath'] = new_path information['filepath'] = new_path
return self._nopostoverwrites,information return self._nopostoverwrites,information

View File

@ -594,7 +594,7 @@ class YoutubeDL(object):
# No clear decision yet, let IE decide # No clear decision yet, let IE decide
keep_video = keep_video_wish keep_video = keep_video_wish
except PostProcessingError as e: except PostProcessingError as e:
self.to_stderr(u'ERROR: ' + e.msg) self.report_error(e.msg)
if keep_video is False and not self.params.get('keepvideo', False): if keep_video is False and not self.params.get('keepvideo', False):
try: try:
self.to_screen(u'Deleting original file %s (pass -k to keep)' % filename) self.to_screen(u'Deleting original file %s (pass -k to keep)' % filename)

View File

@ -402,6 +402,8 @@ def _real_main(argv=None):
batchurls = batchfd.readlines() batchurls = batchfd.readlines()
batchurls = [x.strip() for x in batchurls] batchurls = [x.strip() for x in batchurls]
batchurls = [x for x in batchurls if len(x) > 0 and not re.search(r'^[#/;]', x)] batchurls = [x for x in batchurls if len(x) > 0 and not re.search(r'^[#/;]', x)]
if opts.verbose:
sys.stderr.write(u'[debug] Batch file urls: ' + repr(batchurls) + u'\n')
except IOError: except IOError:
sys.exit(u'ERROR: batch file could not be read') sys.exit(u'ERROR: batch file could not be read')
all_urls = batchurls + args all_urls = batchurls + args

View File

@ -12,7 +12,7 @@ from .comedycentral import ComedyCentralIE
from .condenast import CondeNastIE from .condenast import CondeNastIE
from .criterion import CriterionIE from .criterion import CriterionIE
from .cspan import CSpanIE from .cspan import CSpanIE
from .dailymotion import DailymotionIE from .dailymotion import DailymotionIE, DailymotionPlaylistIE
from .depositfiles import DepositFilesIE from .depositfiles import DepositFilesIE
from .dotsub import DotsubIE from .dotsub import DotsubIE
from .dreisat import DreiSatIE from .dreisat import DreiSatIE
@ -38,15 +38,18 @@ from .infoq import InfoQIE
from .instagram import InstagramIE from .instagram import InstagramIE
from .jukebox import JukeboxIE from .jukebox import JukeboxIE
from .justintv import JustinTVIE from .justintv import JustinTVIE
from .kankan import KankanIE
from .keek import KeekIE from .keek import KeekIE
from .liveleak import LiveLeakIE from .liveleak import LiveLeakIE
from .livestream import LivestreamIE from .livestream import LivestreamIE
from .metacafe import MetacafeIE from .metacafe import MetacafeIE
from .mixcloud import MixcloudIE from .mixcloud import MixcloudIE
from .mtv import MTVIE from .mtv import MTVIE
from .muzu import MuzuTVIE
from .myspass import MySpassIE from .myspass import MySpassIE
from .myvideo import MyVideoIE from .myvideo import MyVideoIE
from .nba import NBAIE from .nba import NBAIE
from .ooyala import OoyalaIE
from .photobucket import PhotobucketIE from .photobucket import PhotobucketIE
from .pornotube import PornotubeIE from .pornotube import PornotubeIE
from .rbmaradio import RBMARadioIE from .rbmaradio import RBMARadioIE
@ -71,7 +74,8 @@ from .ustream import UstreamIE
from .vbox7 import Vbox7IE from .vbox7 import Vbox7IE
from .veoh import VeohIE from .veoh import VeohIE
from .vevo import VevoIE from .vevo import VevoIE
from .vimeo import VimeoIE from .videofyme import VideofyMeIE
from .vimeo import VimeoIE, VimeoChannelIE
from .vine import VineIE from .vine import VineIE
from .c56 import C56IE from .c56 import C56IE
from .wat import WatIE from .wat import WatIE

View File

@ -17,13 +17,14 @@ class ArteTvIE(InfoExtractor):
""" """
_EMISSION_URL = r'(?:http://)?www\.arte.tv/guide/(?P<lang>fr|de)/(?:(?:sendungen|emissions)/)?(?P<id>.*?)/(?P<name>.*?)(\?.*)?' _EMISSION_URL = r'(?:http://)?www\.arte.tv/guide/(?P<lang>fr|de)/(?:(?:sendungen|emissions)/)?(?P<id>.*?)/(?P<name>.*?)(\?.*)?'
_VIDEOS_URL = r'(?:http://)?videos.arte.tv/(?P<lang>fr|de)/.*-(?P<id>.*?).html' _VIDEOS_URL = r'(?:http://)?videos.arte.tv/(?P<lang>fr|de)/.*-(?P<id>.*?).html'
_LIVEWEB_URL = r'(?:http://)?liveweb.arte.tv/(?P<lang>fr|de)/(?P<subpage>.+?)/(?P<name>.+)'
_LIVE_URL = r'index-[0-9]+\.html$' _LIVE_URL = r'index-[0-9]+\.html$'
IE_NAME = u'arte.tv' IE_NAME = u'arte.tv'
@classmethod @classmethod
def suitable(cls, url): def suitable(cls, url):
return any(re.match(regex, url) for regex in (cls._EMISSION_URL, cls._VIDEOS_URL)) return any(re.match(regex, url) for regex in (cls._EMISSION_URL, cls._VIDEOS_URL, cls._LIVEWEB_URL))
# TODO implement Live Stream # TODO implement Live Stream
# from ..utils import compat_urllib_parse # from ..utils import compat_urllib_parse
@ -68,6 +69,12 @@ class ArteTvIE(InfoExtractor):
lang = mobj.group('lang') lang = mobj.group('lang')
return self._extract_video(url, id, lang) return self._extract_video(url, id, lang)
mobj = re.match(self._LIVEWEB_URL, url)
if mobj is not None:
name = mobj.group('name')
lang = mobj.group('lang')
return self._extract_liveweb(url, name, lang)
if re.search(self._LIVE_URL, video_id) is not None: if re.search(self._LIVE_URL, video_id) is not None:
raise ExtractorError(u'Arte live streams are not yet supported, sorry') raise ExtractorError(u'Arte live streams are not yet supported, sorry')
# self.extractLiveStream(url) # self.extractLiveStream(url)
@ -85,7 +92,7 @@ class ArteTvIE(InfoExtractor):
info_dict = {'id': player_info['VID'], info_dict = {'id': player_info['VID'],
'title': player_info['VTI'], 'title': player_info['VTI'],
'description': player_info['VDE'], 'description': player_info.get('VDE'),
'upload_date': unified_strdate(player_info['VDA'].split(' ')[0]), 'upload_date': unified_strdate(player_info['VDA'].split(' ')[0]),
'thumbnail': player_info['programImage'], 'thumbnail': player_info['programImage'],
'ext': 'flv', 'ext': 'flv',
@ -98,12 +105,14 @@ class ArteTvIE(InfoExtractor):
l = 'F' l = 'F'
elif lang == 'de': elif lang == 'de':
l = 'A' l = 'A'
regexes = [r'VO?%s' % l, r'V%s-ST.' % l] regexes = [r'VO?%s' % l, r'VO?.-ST%s' % l]
return any(re.match(r, f['versionCode']) for r in regexes) return any(re.match(r, f['versionCode']) for r in regexes)
# Some formats may not be in the same language as the url # Some formats may not be in the same language as the url
formats = filter(_match_lang, formats) formats = filter(_match_lang, formats)
# We order the formats by quality # We order the formats by quality
formats = sorted(formats, key=lambda f: int(f['height'])) formats = sorted(formats, key=lambda f: int(f['height']))
# Prefer videos without subtitles in the same language
formats = sorted(formats, key=lambda f: re.match(r'VO(F|A)-STM\1', f['versionCode']) is None)
# Pick the best quality # Pick the best quality
format_info = formats[-1] format_info = formats[-1]
if format_info['mediaType'] == u'rtmp': if format_info['mediaType'] == u'rtmp':
@ -144,3 +153,22 @@ class ArteTvIE(InfoExtractor):
'url': video_url, 'url': video_url,
'ext': 'flv', 'ext': 'flv',
} }
def _extract_liveweb(self, url, name, lang):
"""Extract form http://liveweb.arte.tv/"""
webpage = self._download_webpage(url, name)
video_id = self._search_regex(r'eventId=(\d+?)("|&)', webpage, u'event id')
config_xml = self._download_webpage('http://download.liveweb.arte.tv/o21/liveweb/events/event-%s.xml' % video_id,
video_id, u'Downloading information')
config_doc = xml.etree.ElementTree.fromstring(config_xml.encode('utf-8'))
event_doc = config_doc.find('event')
url_node = event_doc.find('video').find('urlHd')
if url_node is None:
url_node = video_doc.find('urlSd')
return {'id': video_id,
'title': event_doc.find('name%s' % lang.capitalize()).text,
'url': url_node.text.replace('MP4', 'mp4'),
'ext': 'flv',
'thumbnail': self._og_search_thumbnail(webpage),
}

View File

@ -10,7 +10,7 @@ from ..utils import (
class CollegeHumorIE(InfoExtractor): class CollegeHumorIE(InfoExtractor):
_VALID_URL = r'^(?:https?://)?(?:www\.)?collegehumor\.com/(video|embed|e)/(?P<videoid>[0-9]+)/(?P<shorttitle>.*)$' _VALID_URL = r'^(?:https?://)?(?:www\.)?collegehumor\.com/(video|embed|e)/(?P<videoid>[0-9]+)/?(?P<shorttitle>.*)$'
_TEST = { _TEST = {
u'url': u'http://www.collegehumor.com/video/6902724/comic-con-cosplay-catastrophe', u'url': u'http://www.collegehumor.com/video/6902724/comic-con-cosplay-catastrophe',

View File

@ -1,9 +1,12 @@
import re import re
import json import json
import itertools
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
compat_urllib_request, compat_urllib_request,
get_element_by_attribute,
get_element_by_id,
ExtractorError, ExtractorError,
) )
@ -77,3 +80,31 @@ class DailymotionIE(InfoExtractor):
'ext': video_extension, 'ext': video_extension,
'thumbnail': info['thumbnail_url'] 'thumbnail': info['thumbnail_url']
}] }]
class DailymotionPlaylistIE(InfoExtractor):
_VALID_URL = r'(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/playlist/(?P<id>.+?)/'
_MORE_PAGES_INDICATOR = r'<div class="next">.*?<a.*?href="/playlist/.+?".*?>.*?</a>.*?</div>'
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
playlist_id = mobj.group('id')
video_ids = []
for pagenum in itertools.count(1):
webpage = self._download_webpage('https://www.dailymotion.com/playlist/%s/%s' % (playlist_id, pagenum),
playlist_id, u'Downloading page %s' % pagenum)
playlist_el = get_element_by_attribute(u'class', u'video_list', webpage)
video_ids.extend(re.findall(r'data-id="(.+?)" data-ext-id', playlist_el))
if re.search(self._MORE_PAGES_INDICATOR, webpage, re.DOTALL) is None:
break
entries = [self.url_result('http://www.dailymotion.com/video/%s' % video_id, 'Dailymotion')
for video_id in video_ids]
return {'_type': 'playlist',
'id': playlist_id,
'title': get_element_by_id(u'playlist_name', webpage),
'entries': entries,
}

View File

@ -0,0 +1,37 @@
import re
from .common import InfoExtractor
from ..utils import determine_ext
class KankanIE(InfoExtractor):
_VALID_URL = r'https?://(?:.*?\.)?kankan\.com/.+?/(?P<id>\d+)\.shtml'
_TEST = {
u'url': u'http://yinyue.kankan.com/vod/48/48863.shtml',
u'file': u'48863.flv',
u'md5': u'29aca1e47ae68fc28804aca89f29507e',
u'info_dict': {
u'title': u'Ready To Go',
},
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id)
title = self._search_regex(r'G_TITLE=[\'"](.+?)[\'"]', webpage, u'video title')
gcid = self._search_regex(r'lurl:[\'"]http://.+?/.+?/(.+?)/', webpage, u'gcid')
video_info_page = self._download_webpage('http://p2s.cl.kankan.com/getCdnresource_flv?gcid=%s' % gcid,
video_id, u'Downloading video url info')
ip = self._search_regex(r'ip:"(.+?)"', video_info_page, u'video url ip')
path = self._search_regex(r'path:"(.+?)"', video_info_page, u'video url path')
video_url = 'http://%s%s' % (ip, path)
return {'id': video_id,
'title': title,
'url': video_url,
'ext': determine_ext(video_url),
}

View File

@ -0,0 +1,64 @@
import re
import json
from .common import InfoExtractor
from ..utils import (
compat_urllib_parse,
determine_ext,
)
class MuzuTVIE(InfoExtractor):
_VALID_URL = r'https?://www.muzu.tv/(.+?)/(.+?)/(?P<id>\d+)'
IE_NAME = u'muzu.tv'
_TEST = {
u'url': u'http://www.muzu.tv/defected/marcashken-featuring-sos-cat-walk-original-mix-music-video/1981454/',
u'file': u'1981454.mp4',
u'md5': u'98f8b2c7bc50578d6a0364fff2bfb000',
u'info_dict': {
u'title': u'Cat Walk (Original Mix)',
u'description': u'md5:90e868994de201b2570e4e5854e19420',
u'uploader': u'MarcAshken featuring SOS',
},
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
info_data = compat_urllib_parse.urlencode({'format': 'json',
'url': url,
})
video_info_page = self._download_webpage('http://www.muzu.tv/api/oembed/?%s' % info_data,
video_id, u'Downloading video info')
info = json.loads(video_info_page)
player_info_page = self._download_webpage('http://player.muzu.tv/player/playerInit?ai=%s' % video_id,
video_id, u'Downloading player info')
video_info = json.loads(player_info_page)['videos'][0]
for quality in ['1080' , '720', '480', '360']:
if video_info.get('v%s' % quality):
break
data = compat_urllib_parse.urlencode({'ai': video_id,
# Even if each time you watch a video the hash changes,
# it seems to work for different videos, and it will work
# even if you use any non empty string as a hash
'viewhash': 'VBNff6djeV4HV5TRPW5kOHub2k',
'device': 'web',
'qv': quality,
})
video_url_page = self._download_webpage('http://player.muzu.tv/player/requestVideo?%s' % data,
video_id, u'Downloading video url')
video_url_info = json.loads(video_url_page)
video_url = video_url_info['url']
return {'id': video_id,
'title': info['title'],
'url': video_url,
'ext': determine_ext(video_url),
'thumbnail': info['thumbnail_url'],
'description': info['description'],
'uploader': info['author_name'],
}

View File

@ -2,11 +2,13 @@ import binascii
import base64 import base64
import hashlib import hashlib
import re import re
import json
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
compat_ord, compat_ord,
compat_urllib_parse, compat_urllib_parse,
compat_urllib_request,
ExtractorError, ExtractorError,
) )
@ -16,7 +18,7 @@ from ..utils import (
class MyVideoIE(InfoExtractor): class MyVideoIE(InfoExtractor):
"""Information Extractor for myvideo.de.""" """Information Extractor for myvideo.de."""
_VALID_URL = r'(?:http://)?(?:www\.)?myvideo\.de/watch/([0-9]+)/([^?/]+).*' _VALID_URL = r'(?:http://)?(?:www\.)?myvideo\.de/(?:[^/]+/)?watch/([0-9]+)/([^?/]+).*'
IE_NAME = u'myvideo' IE_NAME = u'myvideo'
_TEST = { _TEST = {
u'url': u'http://www.myvideo.de/watch/8229274/bowling_fail_or_win', u'url': u'http://www.myvideo.de/watch/8229274/bowling_fail_or_win',
@ -85,6 +87,20 @@ class MyVideoIE(InfoExtractor):
'ext': video_ext, 'ext': video_ext,
}] }]
mobj = re.search(r'data-video-service="/service/data/video/%s/config' % video_id, webpage)
if mobj is not None:
request = compat_urllib_request.Request('http://www.myvideo.de/service/data/video/%s/config' % video_id, '')
response = self._download_webpage(request, video_id,
u'Downloading video info')
info = json.loads(base64.b64decode(response).decode('utf-8'))
return {'id': video_id,
'title': info['title'],
'url': info['streaming_url'].replace('rtmpe', 'rtmpt'),
'play_path': info['filename'],
'ext': 'flv',
'thumbnail': info['thumbnail'][0]['url'],
}
# try encxml # try encxml
mobj = re.search('var flashvars={(.+?)}', webpage) mobj = re.search('var flashvars={(.+?)}', webpage)
if mobj is None: if mobj is None:

View File

@ -0,0 +1,52 @@
import re
import json
from .common import InfoExtractor
from ..utils import unescapeHTML
class OoyalaIE(InfoExtractor):
_VALID_URL = r'https?://.+?\.ooyala\.com/.*?embedCode=(?P<id>.+?)(&|$)'
_TEST = {
# From http://it.slashdot.org/story/13/04/25/178216/recovering-data-from-broken-hard-drives-and-ssds-video
u'url': u'http://player.ooyala.com/player.js?embedCode=pxczE2YjpfHfn1f3M-ykG_AmJRRn0PD8',
u'file': u'pxczE2YjpfHfn1f3M-ykG_AmJRRn0PD8.mp4',
u'md5': u'3f5cceb3a7bf461d6c29dc466cf8033c',
u'info_dict': {
u'title': u'Explaining Data Recovery from Hard Drives and SSDs',
u'description': u'How badly damaged does a drive have to be to defeat Russell and his crew? Apparently, smashed to bits.',
},
}
def _extract_result(self, info, more_info):
return {'id': info['embedCode'],
'ext': 'mp4',
'title': unescapeHTML(info['title']),
'url': info['url'],
'description': unescapeHTML(more_info['description']),
'thumbnail': more_info['promo'],
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
embedCode = mobj.group('id')
player_url = 'http://player.ooyala.com/player.js?embedCode=%s' % embedCode
player = self._download_webpage(player_url, embedCode)
mobile_url = self._search_regex(r'mobile_player_url="(.+?)&device="',
player, u'mobile player url')
mobile_player = self._download_webpage(mobile_url, embedCode)
videos_info = self._search_regex(r'eval\("\((\[{.*?stream_redirect.*?}\])\)"\);', mobile_player, u'info').replace('\\"','"')
videos_more_info = self._search_regex(r'eval\("\(({.*?\\"promo\\".*?})\)"', mobile_player, u'more info').replace('\\"','"')
videos_info = json.loads(videos_info)
videos_more_info =json.loads(videos_more_info)
if videos_more_info.get('lineup'):
videos = [self._extract_result(info, more_info) for (info, more_info) in zip(videos_info, videos_more_info['lineup'])]
return {'_type': 'playlist',
'id': embedCode,
'title': unescapeHTML(videos_more_info['title']),
'entries': videos,
}
else:
return self._extract_result(videos_info[0], videos_more_info)

View File

@ -6,20 +6,17 @@ import re
from .common import InfoExtractor from .common import InfoExtractor
class TF1IE(InfoExtractor): class TF1IE(InfoExtractor):
""" """TF1 uses the wat.tv player."""
TF1 uses the wat.tv player, currently it can only download videos with the
html5 player enabled, it cannot download HD videos.
"""
_WORKING = False
_VALID_URL = r'http://videos.tf1.fr/.*-(.*?).html' _VALID_URL = r'http://videos.tf1.fr/.*-(.*?).html'
_TEST = { _TEST = {
u'url': u'http://videos.tf1.fr/auto-moto/citroen-grand-c4-picasso-2013-presentation-officielle-8062060.html', u'url': u'http://videos.tf1.fr/auto-moto/citroen-grand-c4-picasso-2013-presentation-officielle-8062060.html',
u'file': u'10635995.mp4', u'file': u'10635995.mp4',
u'md5': u'66789d3e91278d332f75e1feb7aea327', u'md5': u'2e378cc28b9957607d5e88f274e637d8',
u'info_dict': { u'info_dict': {
u'title': u'Citroën Grand C4 Picasso 2013 : présentation officielle', u'title': u'Citroën Grand C4 Picasso 2013 : présentation officielle',
u'description': u'Vidéo officielle du nouveau Citroën Grand C4 Picasso, lancé à l\'automne 2013.', u'description': u'Vidéo officielle du nouveau Citroën Grand C4 Picasso, lancé à l\'automne 2013.',
} },
u'skip': u'Sometimes wat serves the whole file with the --test option',
} }
def _real_extract(self, url): def _real_extract(self, url):

View File

@ -35,12 +35,12 @@ class VevoIE(InfoExtractor):
self.report_extraction(video_id) self.report_extraction(video_id)
video_info = json.loads(info_json) video_info = json.loads(info_json)
m_urls = list(re.finditer(r'<video src="(?P<ext>.*?):(?P<url>.*?)"', links_webpage)) m_urls = list(re.finditer(r'<video src="(?P<ext>.*?):/?(?P<url>.*?)"', links_webpage))
if m_urls is None or len(m_urls) == 0: if m_urls is None or len(m_urls) == 0:
raise ExtractorError(u'Unable to extract video url') raise ExtractorError(u'Unable to extract video url')
# They are sorted from worst to best quality # They are sorted from worst to best quality
m_url = m_urls[-1] m_url = m_urls[-1]
video_url = base_url + m_url.group('url') video_url = base_url + '/' + m_url.group('url')
ext = m_url.group('ext') ext = m_url.group('ext')
return {'url': video_url, return {'url': video_url,

View File

@ -0,0 +1,49 @@
import re
import xml.etree.ElementTree
from .common import InfoExtractor
from ..utils import (
find_xpath_attr,
determine_ext,
)
class VideofyMeIE(InfoExtractor):
_VALID_URL = r'https?://(www.videofy.me/.+?|p.videofy.me/v)/(?P<id>\d+)(&|#|$)'
IE_NAME = u'videofy.me'
_TEST = {
u'url': u'http://www.videofy.me/thisisvideofyme/1100701',
u'file': u'1100701.mp4',
u'md5': u'2046dd5758541d630bfa93e741e2fd79',
u'info_dict': {
u'title': u'This is VideofyMe',
u'description': None,
u'uploader': u'VideofyMe',
u'uploader_id': u'thisisvideofyme',
},
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
config_xml = self._download_webpage('http://sunshine.videofy.me/?videoId=%s' % video_id,
video_id)
config = xml.etree.ElementTree.fromstring(config_xml.encode('utf-8'))
video = config.find('video')
sources = video.find('sources')
url_node = find_xpath_attr(sources, 'source', 'id', 'HQ on')
if url_node is None:
url_node = find_xpath_attr(sources, 'source', 'id', 'HQ off')
video_url = url_node.find('url').text
return {'id': video_id,
'title': video.find('title').text,
'url': video_url,
'ext': determine_ext(video_url),
'thumbnail': video.find('thumb').text,
'description': video.find('description').text,
'uploader': config.find('blog/name').text,
'uploader_id': video.find('identifier').text,
'view_count': re.search(r'\d+', video.find('views').text).group(),
}

View File

@ -1,5 +1,6 @@
import json import json
import re import re
import itertools
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
@ -171,3 +172,31 @@ class VimeoIE(InfoExtractor):
'thumbnail': video_thumbnail, 'thumbnail': video_thumbnail,
'description': video_description, 'description': video_description,
}] }]
class VimeoChannelIE(InfoExtractor):
IE_NAME = u'vimeo:channel'
_VALID_URL = r'(?:https?://)?vimeo.\com/channels/(?P<id>[^/]+)'
_MORE_PAGES_INDICATOR = r'<a.+?rel="next"'
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
channel_id = mobj.group('id')
video_ids = []
for pagenum in itertools.count(1):
webpage = self._download_webpage('http://vimeo.com/channels/%s/videos/page:%d' % (channel_id, pagenum),
channel_id, u'Downloading page %s' % pagenum)
video_ids.extend(re.findall(r'id="clip_(\d+?)"', webpage))
if re.search(self._MORE_PAGES_INDICATOR, webpage, re.DOTALL) is None:
break
entries = [self.url_result('http://vimeo.com/%s' % video_id, 'Vimeo')
for video_id in video_ids]
channel_title = self._html_search_regex(r'<a href="/channels/%s">(.*?)</a>' % channel_id,
webpage, u'channel title')
return {'_type': 'playlist',
'id': channel_id,
'title': channel_title,
'entries': entries,
}

View File

@ -12,17 +12,17 @@ from ..utils import (
class WatIE(InfoExtractor): class WatIE(InfoExtractor):
_WORKING = False
_VALID_URL=r'http://www.wat.tv/.*-(?P<shortID>.*?)_.*?.html' _VALID_URL=r'http://www.wat.tv/.*-(?P<shortID>.*?)_.*?.html'
IE_NAME = 'wat.tv' IE_NAME = 'wat.tv'
_TEST = { _TEST = {
u'url': u'http://www.wat.tv/video/world-war-philadelphia-vost-6bv55_2fjr7_.html', u'url': u'http://www.wat.tv/video/world-war-philadelphia-vost-6bv55_2fjr7_.html',
u'file': u'10631273.mp4', u'file': u'10631273.mp4',
u'md5': u'0a4fe7870f31eaeabb5e25fd8da8414a', u'md5': u'd8b2231e1e333acd12aad94b80937e19',
u'info_dict': { u'info_dict': {
u'title': u'World War Z - Philadelphia VOST', u'title': u'World War Z - Philadelphia VOST',
u'description': u'La menace est partout. Que se passe-t-il à Philadelphia ?\r\nWORLD WAR Z, avec Brad Pitt, au cinéma le 3 juillet.\r\nhttp://www.worldwarz.fr', u'description': u'La menace est partout. Que se passe-t-il à Philadelphia ?\r\nWORLD WAR Z, avec Brad Pitt, au cinéma le 3 juillet.\r\nhttp://www.worldwarz.fr',
} },
u'skip': u'Sometimes wat serves the whole file with the --test option',
} }
def download_video_info(self, real_id): def download_video_info(self, real_id):
@ -59,20 +59,8 @@ class WatIE(InfoExtractor):
# Otherwise we can continue and extract just one part, we have to use # Otherwise we can continue and extract just one part, we have to use
# the short id for getting the video url # the short id for getting the video url
player_data = compat_urllib_parse.urlencode({'shortVideoId': short_id,
'html5': '1'})
player_info = self._download_webpage('http://www.wat.tv/player?' + player_data,
real_id, u'Downloading player info')
player = json.loads(player_info)['player']
html5_player = self._html_search_regex(r'iframe src="(.*?)"', player,
'html5 player')
player_webpage = self._download_webpage(html5_player, real_id,
u'Downloading player webpage')
video_url = self._search_regex(r'urlhtml5 : "(.*?)"', player_webpage,
'video url')
info = {'id': real_id, info = {'id': real_id,
'url': video_url, 'url': 'http://wat.tv/get/android5/%s.mp4' % real_id,
'ext': 'mp4', 'ext': 'mp4',
'title': first_chapter['title'], 'title': first_chapter['title'],
'thumbnail': first_chapter['preview'], 'thumbnail': first_chapter['preview'],

View File

@ -21,6 +21,13 @@ class WorldStarHipHopIE(InfoExtractor):
webpage_src = self._download_webpage(url, video_id) webpage_src = self._download_webpage(url, video_id)
m_vevo_id = re.search(r'videoId=(.*?)&amp?',
webpage_src)
if m_vevo_id is not None:
self.to_screen(u'Vevo video detected:')
return self.url_result('vevo:%s' % m_vevo_id.group(1), ie='Vevo')
video_url = self._search_regex(r'so\.addVariable\("file","(.*?)"\)', video_url = self._search_regex(r'so\.addVariable\("file","(.*?)"\)',
webpage_src, u'video URL') webpage_src, u'video URL')

View File

@ -153,8 +153,14 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
$""" $"""
_NEXT_URL_RE = r'[\?&]next_url=([^&]+)' _NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
# Listed in order of quality # Listed in order of quality
_available_formats = ['38', '37', '46', '22', '45', '35', '44', '34', '18', '43', '6', '5', '17', '13'] _available_formats = ['38', '37', '46', '22', '45', '35', '44', '34', '18', '43', '6', '5', '17', '13',
_available_formats_prefer_free = ['38', '46', '37', '45', '22', '44', '35', '43', '34', '18', '6', '5', '17', '13'] '95', '94', '93', '92', '132', '151',
'85', '84', '102', '83', '101', '82', '100',
]
_available_formats_prefer_free = ['38', '46', '37', '45', '22', '44', '35', '43', '34', '18', '6', '5', '17', '13',
'95', '94', '93', '92', '132', '151',
'85', '102', '84', '101', '83', '100', '82',
]
_video_extensions = { _video_extensions = {
'13': '3gp', '13': '3gp',
'17': 'mp4', '17': 'mp4',
@ -166,6 +172,24 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'44': 'webm', '44': 'webm',
'45': 'webm', '45': 'webm',
'46': 'webm', '46': 'webm',
# 3d videos
'82': 'mp4',
'83': 'mp4',
'84': 'mp4',
'85': 'mp4',
'100': 'webm',
'101': 'webm',
'102': 'webm',
# videos that use m3u8
'92': 'mp4',
'93': 'mp4',
'94': 'mp4',
'95': 'mp4',
'96': 'mp4',
'132': 'mp4',
'151': 'mp4',
} }
_video_dimensions = { _video_dimensions = {
'5': '240x400', '5': '240x400',
@ -182,7 +206,22 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'44': '480x854', '44': '480x854',
'45': '720x1280', '45': '720x1280',
'46': '1080x1920', '46': '1080x1920',
'82': '360p',
'83': '480p',
'84': '720p',
'85': '1080p',
'92': '240p',
'93': '360p',
'94': '480p',
'95': '720p',
'96': '1080p',
'100': '360p',
'101': '480p',
'102': '720p',
'132': '240p',
'151': '72p',
} }
_3d_itags = ['85', '84', '102', '83', '101', '82', '100']
IE_NAME = u'youtube' IE_NAME = u'youtube'
_TESTS = [ _TESTS = [
{ {
@ -232,6 +271,21 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
u"uploader_id": u"justintimberlakeVEVO" u"uploader_id": u"justintimberlakeVEVO"
} }
}, },
{
u'url': u'https://www.youtube.com/watch?v=TGi3HqYrWHE',
u'file': u'TGi3HqYrWHE.mp4',
u'note': u'm3u8 video',
u'info_dict': {
u'title': u'Triathlon - Men - London 2012 Olympic Games',
u'description': u'- Men - TR02 - Triathlon - 07 August 2012 - London 2012 Olympic Games',
u'uploader': u'olympic',
u'upload_date': u'20120807',
u'uploader_id': u'olympic',
},
u'params': {
u'skip_download': True,
},
},
] ]
@ -284,15 +338,15 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
elif len(s) == 88: elif len(s) == 88:
return s[48] + s[81:67:-1] + s[82] + s[66:62:-1] + s[85] + s[61:48:-1] + s[67] + s[47:12:-1] + s[3] + s[11:3:-1] + s[2] + s[12] return s[48] + s[81:67:-1] + s[82] + s[66:62:-1] + s[85] + s[61:48:-1] + s[67] + s[47:12:-1] + s[3] + s[11:3:-1] + s[2] + s[12]
elif len(s) == 87: elif len(s) == 87:
return s[4:23] + s[86] + s[24:85] return s[83:53:-1] + s[3] + s[52:40:-1] + s[86] + s[39:10:-1] + s[0] + s[9:3:-1] + s[53]
elif len(s) == 86: elif len(s) == 86:
return s[2:63] + s[82] + s[64:82] + s[63] return s[83:85] + s[26] + s[79:46:-1] + s[85] + s[45:36:-1] + s[30] + s[35:30:-1] + s[46] + s[29:26:-1] + s[82] + s[25:1:-1]
elif len(s) == 85: elif len(s) == 85:
return s[2:8] + s[0] + s[9:21] + s[65] + s[22:65] + s[84] + s[66:82] + s[21] return s[2:8] + s[0] + s[9:21] + s[65] + s[22:65] + s[84] + s[66:82] + s[21]
elif len(s) == 84: elif len(s) == 84:
return s[83:36:-1] + s[2] + s[35:26:-1] + s[3] + s[25:3:-1] + s[26] return s[83:27:-1] + s[0] + s[26:5:-1] + s[2:0:-1] + s[27]
elif len(s) == 83: elif len(s) == 83:
return s[6] + s[3:6] + s[33] + s[7:24] + s[0] + s[25:33] + s[53] + s[34:53] + s[24] + s[54:] return s[:15] + s[80] + s[16:80] + s[15]
elif len(s) == 82: elif len(s) == 82:
return s[36] + s[79:67:-1] + s[81] + s[66:40:-1] + s[33] + s[39:36:-1] + s[40] + s[35] + s[0] + s[67] + s[32:0:-1] + s[34] return s[36] + s[79:67:-1] + s[81] + s[66:40:-1] + s[33] + s[39:36:-1] + s[40] + s[35] + s[0] + s[67] + s[32:0:-1] + s[34]
elif len(s) == 81: elif len(s) == 81:
@ -303,6 +357,16 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
else: else:
raise ExtractorError(u'Unable to decrypt signature, key length %d not supported; retrying might work' % (len(s))) raise ExtractorError(u'Unable to decrypt signature, key length %d not supported; retrying might work' % (len(s)))
def _decrypt_signature_age_gate(self, s):
# The videos with age protection use another player, so the algorithms
# can be different.
if len(s) == 86:
return s[2:63] + s[82] + s[64:82] + s[63]
else:
# Fallback to the other algortihms
return self._decrypt_signature(s)
def _get_available_subtitles(self, video_id): def _get_available_subtitles(self, video_id):
self.report_video_subtitles_download(video_id) self.report_video_subtitles_download(video_id)
request = compat_urllib_request.Request('http://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id) request = compat_urllib_request.Request('http://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id)
@ -404,7 +468,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
def _print_formats(self, formats): def _print_formats(self, formats):
print('Available formats:') print('Available formats:')
for x in formats: for x in formats:
print('%s\t:\t%s\t[%s]' %(x, self._video_extensions.get(x, 'flv'), self._video_dimensions.get(x, '???'))) print('%s\t:\t%s\t[%s]%s' %(x, self._video_extensions.get(x, 'flv'),
self._video_dimensions.get(x, '???'),
' (3D)' if x in self._3d_itags else ''))
def _extract_id(self, url): def _extract_id(self, url):
mobj = re.match(self._VALID_URL, url, re.VERBOSE) mobj = re.match(self._VALID_URL, url, re.VERBOSE)
@ -413,6 +479,57 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
video_id = mobj.group(2) video_id = mobj.group(2)
return video_id return video_id
def _get_video_url_list(self, url_map):
"""
Transform a dictionary in the format {itag:url} to a list of (itag, url)
with the requested formats.
"""
req_format = self._downloader.params.get('format', None)
format_limit = self._downloader.params.get('format_limit', None)
available_formats = self._available_formats_prefer_free if self._downloader.params.get('prefer_free_formats', False) else self._available_formats
if format_limit is not None and format_limit in available_formats:
format_list = available_formats[available_formats.index(format_limit):]
else:
format_list = available_formats
existing_formats = [x for x in format_list if x in url_map]
if len(existing_formats) == 0:
raise ExtractorError(u'no known formats available for video')
if self._downloader.params.get('listformats', None):
self._print_formats(existing_formats)
return
if req_format is None or req_format == 'best':
video_url_list = [(existing_formats[0], url_map[existing_formats[0]])] # Best quality
elif req_format == 'worst':
video_url_list = [(existing_formats[-1], url_map[existing_formats[-1]])] # worst quality
elif req_format in ('-1', 'all'):
video_url_list = [(f, url_map[f]) for f in existing_formats] # All formats
else:
# Specific formats. We pick the first in a slash-delimeted sequence.
# For example, if '1/2/3/4' is requested and '2' and '4' are available, we pick '2'.
req_formats = req_format.split('/')
video_url_list = None
for rf in req_formats:
if rf in url_map:
video_url_list = [(rf, url_map[rf])]
break
if video_url_list is None:
raise ExtractorError(u'requested format not available')
return video_url_list
def _extract_from_m3u8(self, manifest_url, video_id):
url_map = {}
def _get_urls(_manifest):
lines = _manifest.split('\n')
urls = filter(lambda l: l and not l.startswith('#'),
lines)
return urls
manifest = self._download_webpage(manifest_url, video_id, u'Downloading formats manifest')
formats_urls = _get_urls(manifest)
for format_url in formats_urls:
itag = self._search_regex(r'itag/(\d+?)/', format_url, 'itag')
url_map[itag] = format_url
return url_map
def _real_extract(self, url): def _real_extract(self, url):
if re.match(r'(?:https?://)?[^/]+/watch\?feature=[a-z_]+$', url): if re.match(r'(?:https?://)?[^/]+/watch\?feature=[a-z_]+$', url):
self._downloader.report_warning(u'Did you forget to quote the URL? Remember that & is a meta-character in most shells, so you want to put the URL in quotes, like youtube-dl \'http://www.youtube.com/watch?feature=foo&v=BaW_jenozKc\' (or simply youtube-dl BaW_jenozKc ).') self._downloader.report_warning(u'Did you forget to quote the URL? Remember that & is a meta-character in most shells, so you want to put the URL in quotes, like youtube-dl \'http://www.youtube.com/watch?feature=foo&v=BaW_jenozKc\' (or simply youtube-dl BaW_jenozKc ).')
@ -567,7 +684,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
video_duration = compat_urllib_parse.unquote_plus(video_info['length_seconds'][0]) video_duration = compat_urllib_parse.unquote_plus(video_info['length_seconds'][0])
# Decide which formats to download # Decide which formats to download
req_format = self._downloader.params.get('format', None)
try: try:
mobj = re.search(r';ytplayer.config = ({.*?});', video_webpage) mobj = re.search(r';ytplayer.config = ({.*?});', video_webpage)
@ -602,8 +718,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
s = url_data['s'][0] s = url_data['s'][0]
if age_gate: if age_gate:
player_version = self._search_regex(r'ad3-(.+?)\.swf', player_version = self._search_regex(r'ad3-(.+?)\.swf',
video_info['ad3_module'][0], 'flash player', video_info['ad3_module'][0] if 'ad3_module' in video_info else 'NOT FOUND',
fatal=False) 'flash player', fatal=False)
player = 'flash player %s' % player_version player = 'flash player %s' % player_version
else: else:
player = u'html5 player %s' % self._search_regex(r'html5player-(.+?)\.js', video_webpage, player = u'html5 player %s' % self._search_regex(r'html5player-(.+?)\.js', video_webpage,
@ -611,41 +727,25 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
parts_sizes = u'.'.join(compat_str(len(part)) for part in s.split('.')) parts_sizes = u'.'.join(compat_str(len(part)) for part in s.split('.'))
self.to_screen(u'encrypted signature length %d (%s), itag %s, %s' % self.to_screen(u'encrypted signature length %d (%s), itag %s, %s' %
(len(s), parts_sizes, url_data['itag'][0], player)) (len(s), parts_sizes, url_data['itag'][0], player))
signature = self._decrypt_signature(url_data['s'][0]) encrypted_sig = url_data['s'][0]
if age_gate:
signature = self._decrypt_signature_age_gate(encrypted_sig)
else:
signature = self._decrypt_signature(encrypted_sig)
url += '&signature=' + signature url += '&signature=' + signature
if 'ratebypass' not in url: if 'ratebypass' not in url:
url += '&ratebypass=yes' url += '&ratebypass=yes'
url_map[url_data['itag'][0]] = url url_map[url_data['itag'][0]] = url
video_url_list = self._get_video_url_list(url_map)
format_limit = self._downloader.params.get('format_limit', None) if not video_url_list:
available_formats = self._available_formats_prefer_free if self._downloader.params.get('prefer_free_formats', False) else self._available_formats
if format_limit is not None and format_limit in available_formats:
format_list = available_formats[available_formats.index(format_limit):]
else:
format_list = available_formats
existing_formats = [x for x in format_list if x in url_map]
if len(existing_formats) == 0:
raise ExtractorError(u'no known formats available for video')
if self._downloader.params.get('listformats', None):
self._print_formats(existing_formats)
return return
if req_format is None or req_format == 'best': elif video_info.get('hlsvp'):
video_url_list = [(existing_formats[0], url_map[existing_formats[0]])] # Best quality manifest_url = video_info['hlsvp'][0]
elif req_format == 'worst': url_map = self._extract_from_m3u8(manifest_url, video_id)
video_url_list = [(existing_formats[-1], url_map[existing_formats[-1]])] # worst quality video_url_list = self._get_video_url_list(url_map)
elif req_format in ('-1', 'all'): if not video_url_list:
video_url_list = [(f, url_map[f]) for f in existing_formats] # All formats return
else:
# Specific formats. We pick the first in a slash-delimeted sequence.
# For example, if '1/2/3/4' is requested and '2' and '4' are available, we pick '2'.
req_formats = req_format.split('/')
video_url_list = None
for rf in req_formats:
if rf in url_map:
video_url_list = [(rf, url_map[rf])]
break
if video_url_list is None:
raise ExtractorError(u'requested format not available')
else: else:
raise ExtractorError(u'no conn or url_encoded_fmt_stream_map information found in video info') raise ExtractorError(u'no conn or url_encoded_fmt_stream_map information found in video info')
@ -654,8 +754,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
# Extension # Extension
video_extension = self._video_extensions.get(format_param, 'flv') video_extension = self._video_extensions.get(format_param, 'flv')
video_format = '{0} - {1}'.format(format_param if format_param else video_extension, video_format = '{0} - {1}{2}'.format(format_param if format_param else video_extension,
self._video_dimensions.get(format_param, '???')) self._video_dimensions.get(format_param, '???'),
' (3D)' if format_param in self._3d_itags else '')
results.append({ results.append({
'id': video_id, 'id': video_id,

View File

@ -207,7 +207,7 @@ if sys.version_info >= (2,7):
def find_xpath_attr(node, xpath, key, val): def find_xpath_attr(node, xpath, key, val):
""" Find the xpath xpath[@key=val] """ """ Find the xpath xpath[@key=val] """
assert re.match(r'^[a-zA-Z]+$', key) assert re.match(r'^[a-zA-Z]+$', key)
assert re.match(r'^[a-zA-Z@]*$', val) assert re.match(r'^[a-zA-Z@\s]*$', val)
expr = xpath + u"[@%s='%s']" % (key, val) expr = xpath + u"[@%s='%s']" % (key, val)
return node.find(expr) return node.find(expr)
else: else:

View File

@ -1,2 +1,2 @@
__version__ = '2013.07.25.2' __version__ = '2013.08.08'