diff --git a/devscripts/youtube_genalgo.py b/devscripts/youtube_genalgo.py index 22977ccd9..31d6ec952 100644 --- a/devscripts/youtube_genalgo.py +++ b/devscripts/youtube_genalgo.py @@ -17,18 +17,18 @@ tests = [ # 87 - vflART1Nf 2013/07/24 ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$^&*()_-+={[]}|:;?/>.<", "tyuioplkjhgfdsazxcv"), - # 86 - vfl_ymO4Z 2013/06/27 + # 86 - vflm_D8eE 2013/07/31 ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[|};?/>.<", - "ertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!/#$%^&*()_-+={[|};?@"), + ">.1}|[{=+-_)(*&^%$#@!MNBVCXZASDFGHJK.<", "ertyuiqplkjhgfdsazx$vbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#<%^&*()_-+={[};?/c"), # 84 ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?>.<", "<.>?;}[{=+-_)(*&^%$#@!MNBVCXZASDFGHJKLPOIUYTREWe098765432rmnbvcxzasdfghjklpoiuyt1"), - # 83 - vflcaqGO8 2013/07/11 + # 83 - vflTWC9KW 2013/08/01 ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!#$%^&*()_+={[};?/>.<", - "urty8ioplkjhgfdsazxcvbqm1234567S90QWERTYUIOPLKJHGFDnAZXCVBNM!#$%^&*()_+={[};?/>.<"), + "qwertyuioplkjhg>dsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!#$%^&*()_+={[};?/f"), # 82 ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKHGFDSAZXCVBNM!@#$%^&*(-+={[};?/>.<", "Q>/?;}[{=+-(*<^%$#@!MNBVCXZASDFGHKLPOIUY8REWT0q&7654321mnbvcxzasdfghjklpoiuytrew9"), @@ -40,6 +40,12 @@ tests = [ "Z?;}[{=+-(*&^%$#@!MNBVCXRASDFGHKLPOIUYT/EWQ0q87659321mnbvcxzasdfghjkl4oiuytrewp"), ] +tests_age_gate = [ + # 86 - vflqinMWD + ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[|};?/>.<", + "ertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!/#$%^&*()_-+={[|};?@"), +] + def find_matching(wrong, right): idxs = [wrong.index(c) for c in right] return compress(idxs) @@ -90,6 +96,8 @@ def genall(tests): def main(): print(genall(tests)) + print(u' Age gate:') + print(genall(tests_age_gate)) if __name__ == '__main__': main() diff --git a/test/test_playlists.py b/test/test_playlists.py new file mode 100644 index 000000000..65de3a55c --- /dev/null +++ b/test/test_playlists.py @@ -0,0 +1,38 @@ +#!/usr/bin/env python + +import sys +import unittest +import json + +# Allow direct execution +import os +sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +from youtube_dl.extractor import DailymotionPlaylistIE, VimeoChannelIE +from youtube_dl.utils import * + +from helper import FakeYDL + +class TestPlaylists(unittest.TestCase): + def assertIsPlaylist(self, info): + """Make sure the info has '_type' set to 'playlist'""" + self.assertEqual(info['_type'], 'playlist') + + def test_dailymotion_playlist(self): + dl = FakeYDL() + ie = DailymotionPlaylistIE(dl) + result = ie.extract('http://www.dailymotion.com/playlist/xv4bw_nqtv_sport/1#video=xl8v3q') + self.assertIsPlaylist(result) + self.assertEqual(result['title'], u'SPORT') + self.assertTrue(len(result['entries']) > 20) + + def test_vimeo_channel(self): + dl = FakeYDL() + ie = VimeoChannelIE(dl) + result = ie.extract('http://vimeo.com/channels/tributes') + self.assertIsPlaylist(result) + self.assertEqual(result['title'], u'Vimeo Tributes') + self.assertTrue(len(result['entries']) > 24) + +if __name__ == '__main__': + unittest.main() diff --git a/test/test_youtube_sig.py b/test/test_youtube_sig.py deleted file mode 100644 index 4d45a0e08..000000000 --- a/test/test_youtube_sig.py +++ /dev/null @@ -1,72 +0,0 @@ -#!/usr/bin/env python - -import unittest -import sys - -# Allow direct execution -import os -sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) - -from youtube_dl.extractor.youtube import YoutubeIE -from helper import FakeYDL - -sig = YoutubeIE(FakeYDL())._decrypt_signature - -class TestYoutubeSig(unittest.TestCase): - def test_92(self): - wrong = "F9F9B6E6FD47029957AB911A964CC20D95A181A5D37A2DBEFD67D403DB0E8BE4F4910053E4E8A79.0B70B.0B80B8" - right = "69B6E6FD47029957AB911A9F4CC20D95A181A5D3.A2DBEFD67D403DB0E8BE4F4910053E4E8A7980B7" - self.assertEqual(sig(wrong), right) - - def test_90(self): - wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[]}|:;?/>.<'`" - right = "mrtyuioplkjhgfdsazxcvbne1234567890QWER[YUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={`]}|" - self.assertEqual(sig(wrong), right) - - def test_88(self): - wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[]}|:;?/>.<" - right = "J:|}][{=+-_)(*&;%$#@>MNBVCXZASDFGH^KLPOIUYTREWQ0987654321mnbvcxzasdfghrklpoiuytej" - self.assertEqual(sig(wrong), right) - - def test_87(self): - wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$^&*()_-+={[]}|:;?/>.<" - right = "tyuioplkjhgfdsazxcv" - self.assertEqual(sig(wrong), right) - - def test_86(self): - wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[|};?/>.<" - right = "ertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!/#$%^&*()_-+={[|};?@" - self.assertEqual(sig(wrong), right) - - def test_85(self): - wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?/>.<" - right = "ertyuiqplkjhgfdsazx$vbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#<%^&*()_-+={[};?/c" - self.assertEqual(sig(wrong), right) - - def test_84(self): - wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?>.<" - right = "<.>?;}[{=+-_)(*&^%$#@!MNBVCXZASDFGHJKLPOIUYTREWe098765432rmnbvcxzasdfghjklpoiuyt1" - self.assertEqual(sig(wrong), right) - - def test_83(self): - wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!#$%^&*()_+={[};?/>.<" - right = "urty8ioplkjhgfdsazxcvbqm1234567S90QWERTYUIOPLKJHGFDnAZXCVBNM!#$%^&*()_+={[};?/>.<" - self.assertEqual(sig(wrong), right) - - def test_82(self): - wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKHGFDSAZXCVBNM!@#$%^&*(-+={[};?/>.<" - right = "Q>/?;}[{=+-(*<^%$#@!MNBVCXZASDFGHKLPOIUY8REWT0q&7654321mnbvcxzasdfghjklpoiuytrew9" - self.assertEqual(sig(wrong), right) - - def test_81(self): - wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKHGFDSAZXCVBNM!@#$%^&*(-+={[};?/>." - right = "C>/?;}[{=+-(*&^%$#@!MNBVYXZASDFGHKLPOIU.TREWQ0q87659321mnbvcxzasdfghjkl4oiuytrewp" - self.assertEqual(sig(wrong), right) - - def test_79(self): - wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKHGFDSAZXCVBNM!@#$%^&*(-+={[};?/" - right = "Z?;}[{=+-(*&^%$#@!MNBVCXRASDFGHKLPOIUYT/EWQ0q87659321mnbvcxzasdfghjkl4oiuytrewp" - self.assertEqual(sig(wrong), right) - -if __name__ == '__main__': - unittest.main() diff --git a/youtube_dl/FileDownloader.py b/youtube_dl/FileDownloader.py index f748df428..1f9588825 100644 --- a/youtube_dl/FileDownloader.py +++ b/youtube_dl/FileDownloader.py @@ -386,6 +386,35 @@ class FileDownloader(object): self.report_error(u'mplayer exited with code %d' % retval) return False + def _download_m3u8_with_ffmpeg(self, filename, url): + self.report_destination(filename) + tmpfilename = self.temp_name(filename) + + args = ['ffmpeg', '-y', '-i', url, '-f', 'mp4', tmpfilename] + # Check for ffmpeg first + try: + subprocess.call(['ffmpeg', '-h'], stdout=(open(os.path.devnull, 'w')), stderr=subprocess.STDOUT) + except (OSError, IOError): + self.report_error(u'm3u8 download detected but "%s" could not be run' % args[0] ) + return False + + retval = subprocess.call(args) + if retval == 0: + fsize = os.path.getsize(encodeFilename(tmpfilename)) + self.to_screen(u'\r[%s] %s bytes' % (args[0], fsize)) + self.try_rename(tmpfilename, filename) + self._hook_progress({ + 'downloaded_bytes': fsize, + 'total_bytes': fsize, + 'filename': filename, + 'status': 'finished', + }) + return True + else: + self.to_stderr(u"\n") + self.report_error(u'ffmpeg exited with code %d' % retval) + return False + def _do_download(self, filename, info_dict): url = info_dict['url'] @@ -411,6 +440,10 @@ class FileDownloader(object): if url.startswith('mms') or url.startswith('rtsp'): return self._download_with_mplayer(filename, url) + # m3u8 manifest are downloaded with ffmpeg + if determine_ext(url) == u'm3u8': + return self._download_m3u8_with_ffmpeg(filename, url) + tmpfilename = self.temp_name(filename) stream = None diff --git a/youtube_dl/PostProcessor.py b/youtube_dl/PostProcessor.py index 8c5e53991..fddf58606 100644 --- a/youtube_dl/PostProcessor.py +++ b/youtube_dl/PostProcessor.py @@ -100,7 +100,8 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor): self._nopostoverwrites = nopostoverwrites def get_audio_codec(self, path): - if not self._exes['ffprobe'] and not self._exes['avprobe']: return None + if not self._exes['ffprobe'] and not self._exes['avprobe']: + raise PostProcessingError(u'ffprobe or avprobe not found. Please install one.') try: cmd = [self._exes['avprobe'] or self._exes['ffprobe'], '-show_streams', encodeFilename(self._ffmpeg_filename_argument(path))] handle = subprocess.Popen(cmd, stderr=compat_subprocess_get_DEVNULL(), stdout=subprocess.PIPE) @@ -208,7 +209,7 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor): try: os.utime(encodeFilename(new_path), (time.time(), information['filetime'])) except: - self._downloader.to_stderr(u'WARNING: Cannot update utime of audio file') + self._downloader.report_warning(u'Cannot update utime of audio file') information['filepath'] = new_path return self._nopostoverwrites,information diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index c76f1118e..496866900 100644 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -594,7 +594,7 @@ class YoutubeDL(object): # No clear decision yet, let IE decide keep_video = keep_video_wish except PostProcessingError as e: - self.to_stderr(u'ERROR: ' + e.msg) + self.report_error(e.msg) if keep_video is False and not self.params.get('keepvideo', False): try: self.to_screen(u'Deleting original file %s (pass -k to keep)' % filename) diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 59584e2c5..e8ac09a8f 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -402,6 +402,8 @@ def _real_main(argv=None): batchurls = batchfd.readlines() batchurls = [x.strip() for x in batchurls] batchurls = [x for x in batchurls if len(x) > 0 and not re.search(r'^[#/;]', x)] + if opts.verbose: + sys.stderr.write(u'[debug] Batch file urls: ' + repr(batchurls) + u'\n') except IOError: sys.exit(u'ERROR: batch file could not be read') all_urls = batchurls + args diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 7cfb292d9..84c02c2ed 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -12,7 +12,7 @@ from .comedycentral import ComedyCentralIE from .condenast import CondeNastIE from .criterion import CriterionIE from .cspan import CSpanIE -from .dailymotion import DailymotionIE +from .dailymotion import DailymotionIE, DailymotionPlaylistIE from .depositfiles import DepositFilesIE from .dotsub import DotsubIE from .dreisat import DreiSatIE @@ -38,15 +38,18 @@ from .infoq import InfoQIE from .instagram import InstagramIE from .jukebox import JukeboxIE from .justintv import JustinTVIE +from .kankan import KankanIE from .keek import KeekIE from .liveleak import LiveLeakIE from .livestream import LivestreamIE from .metacafe import MetacafeIE from .mixcloud import MixcloudIE from .mtv import MTVIE +from .muzu import MuzuTVIE from .myspass import MySpassIE from .myvideo import MyVideoIE from .nba import NBAIE +from .ooyala import OoyalaIE from .photobucket import PhotobucketIE from .pornotube import PornotubeIE from .rbmaradio import RBMARadioIE @@ -71,7 +74,8 @@ from .ustream import UstreamIE from .vbox7 import Vbox7IE from .veoh import VeohIE from .vevo import VevoIE -from .vimeo import VimeoIE +from .videofyme import VideofyMeIE +from .vimeo import VimeoIE, VimeoChannelIE from .vine import VineIE from .c56 import C56IE from .wat import WatIE diff --git a/youtube_dl/extractor/arte.py b/youtube_dl/extractor/arte.py index 993e30f7a..69b3b0ad7 100644 --- a/youtube_dl/extractor/arte.py +++ b/youtube_dl/extractor/arte.py @@ -17,13 +17,14 @@ class ArteTvIE(InfoExtractor): """ _EMISSION_URL = r'(?:http://)?www\.arte.tv/guide/(?Pfr|de)/(?:(?:sendungen|emissions)/)?(?P.*?)/(?P.*?)(\?.*)?' _VIDEOS_URL = r'(?:http://)?videos.arte.tv/(?Pfr|de)/.*-(?P.*?).html' + _LIVEWEB_URL = r'(?:http://)?liveweb.arte.tv/(?Pfr|de)/(?P.+?)/(?P.+)' _LIVE_URL = r'index-[0-9]+\.html$' IE_NAME = u'arte.tv' @classmethod def suitable(cls, url): - return any(re.match(regex, url) for regex in (cls._EMISSION_URL, cls._VIDEOS_URL)) + return any(re.match(regex, url) for regex in (cls._EMISSION_URL, cls._VIDEOS_URL, cls._LIVEWEB_URL)) # TODO implement Live Stream # from ..utils import compat_urllib_parse @@ -68,6 +69,12 @@ class ArteTvIE(InfoExtractor): lang = mobj.group('lang') return self._extract_video(url, id, lang) + mobj = re.match(self._LIVEWEB_URL, url) + if mobj is not None: + name = mobj.group('name') + lang = mobj.group('lang') + return self._extract_liveweb(url, name, lang) + if re.search(self._LIVE_URL, video_id) is not None: raise ExtractorError(u'Arte live streams are not yet supported, sorry') # self.extractLiveStream(url) @@ -85,7 +92,7 @@ class ArteTvIE(InfoExtractor): info_dict = {'id': player_info['VID'], 'title': player_info['VTI'], - 'description': player_info['VDE'], + 'description': player_info.get('VDE'), 'upload_date': unified_strdate(player_info['VDA'].split(' ')[0]), 'thumbnail': player_info['programImage'], 'ext': 'flv', @@ -98,12 +105,14 @@ class ArteTvIE(InfoExtractor): l = 'F' elif lang == 'de': l = 'A' - regexes = [r'VO?%s' % l, r'V%s-ST.' % l] + regexes = [r'VO?%s' % l, r'VO?.-ST%s' % l] return any(re.match(r, f['versionCode']) for r in regexes) # Some formats may not be in the same language as the url formats = filter(_match_lang, formats) # We order the formats by quality formats = sorted(formats, key=lambda f: int(f['height'])) + # Prefer videos without subtitles in the same language + formats = sorted(formats, key=lambda f: re.match(r'VO(F|A)-STM\1', f['versionCode']) is None) # Pick the best quality format_info = formats[-1] if format_info['mediaType'] == u'rtmp': @@ -144,3 +153,22 @@ class ArteTvIE(InfoExtractor): 'url': video_url, 'ext': 'flv', } + + def _extract_liveweb(self, url, name, lang): + """Extract form http://liveweb.arte.tv/""" + webpage = self._download_webpage(url, name) + video_id = self._search_regex(r'eventId=(\d+?)("|&)', webpage, u'event id') + config_xml = self._download_webpage('http://download.liveweb.arte.tv/o21/liveweb/events/event-%s.xml' % video_id, + video_id, u'Downloading information') + config_doc = xml.etree.ElementTree.fromstring(config_xml.encode('utf-8')) + event_doc = config_doc.find('event') + url_node = event_doc.find('video').find('urlHd') + if url_node is None: + url_node = video_doc.find('urlSd') + + return {'id': video_id, + 'title': event_doc.find('name%s' % lang.capitalize()).text, + 'url': url_node.text.replace('MP4', 'mp4'), + 'ext': 'flv', + 'thumbnail': self._og_search_thumbnail(webpage), + } diff --git a/youtube_dl/extractor/collegehumor.py b/youtube_dl/extractor/collegehumor.py index 5badde03a..30b9c7549 100644 --- a/youtube_dl/extractor/collegehumor.py +++ b/youtube_dl/extractor/collegehumor.py @@ -10,7 +10,7 @@ from ..utils import ( class CollegeHumorIE(InfoExtractor): - _VALID_URL = r'^(?:https?://)?(?:www\.)?collegehumor\.com/(video|embed|e)/(?P[0-9]+)/(?P.*)$' + _VALID_URL = r'^(?:https?://)?(?:www\.)?collegehumor\.com/(video|embed|e)/(?P[0-9]+)/?(?P.*)$' _TEST = { u'url': u'http://www.collegehumor.com/video/6902724/comic-con-cosplay-catastrophe', diff --git a/youtube_dl/extractor/dailymotion.py b/youtube_dl/extractor/dailymotion.py index 9bf7a28ca..fa8c630d0 100644 --- a/youtube_dl/extractor/dailymotion.py +++ b/youtube_dl/extractor/dailymotion.py @@ -1,9 +1,12 @@ import re import json +import itertools from .common import InfoExtractor from ..utils import ( compat_urllib_request, + get_element_by_attribute, + get_element_by_id, ExtractorError, ) @@ -77,3 +80,31 @@ class DailymotionIE(InfoExtractor): 'ext': video_extension, 'thumbnail': info['thumbnail_url'] }] + + +class DailymotionPlaylistIE(InfoExtractor): + _VALID_URL = r'(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/playlist/(?P.+?)/' + _MORE_PAGES_INDICATOR = r'' + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + playlist_id = mobj.group('id') + video_ids = [] + + for pagenum in itertools.count(1): + webpage = self._download_webpage('https://www.dailymotion.com/playlist/%s/%s' % (playlist_id, pagenum), + playlist_id, u'Downloading page %s' % pagenum) + + playlist_el = get_element_by_attribute(u'class', u'video_list', webpage) + video_ids.extend(re.findall(r'data-id="(.+?)" data-ext-id', playlist_el)) + + if re.search(self._MORE_PAGES_INDICATOR, webpage, re.DOTALL) is None: + break + + entries = [self.url_result('http://www.dailymotion.com/video/%s' % video_id, 'Dailymotion') + for video_id in video_ids] + return {'_type': 'playlist', + 'id': playlist_id, + 'title': get_element_by_id(u'playlist_name', webpage), + 'entries': entries, + } diff --git a/youtube_dl/extractor/kankan.py b/youtube_dl/extractor/kankan.py new file mode 100644 index 000000000..8537ba584 --- /dev/null +++ b/youtube_dl/extractor/kankan.py @@ -0,0 +1,37 @@ +import re + +from .common import InfoExtractor +from ..utils import determine_ext + + +class KankanIE(InfoExtractor): + _VALID_URL = r'https?://(?:.*?\.)?kankan\.com/.+?/(?P\d+)\.shtml' + + _TEST = { + u'url': u'http://yinyue.kankan.com/vod/48/48863.shtml', + u'file': u'48863.flv', + u'md5': u'29aca1e47ae68fc28804aca89f29507e', + u'info_dict': { + u'title': u'Ready To Go', + }, + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + webpage = self._download_webpage(url, video_id) + + title = self._search_regex(r'G_TITLE=[\'"](.+?)[\'"]', webpage, u'video title') + gcid = self._search_regex(r'lurl:[\'"]http://.+?/.+?/(.+?)/', webpage, u'gcid') + + video_info_page = self._download_webpage('http://p2s.cl.kankan.com/getCdnresource_flv?gcid=%s' % gcid, + video_id, u'Downloading video url info') + ip = self._search_regex(r'ip:"(.+?)"', video_info_page, u'video url ip') + path = self._search_regex(r'path:"(.+?)"', video_info_page, u'video url path') + video_url = 'http://%s%s' % (ip, path) + + return {'id': video_id, + 'title': title, + 'url': video_url, + 'ext': determine_ext(video_url), + } diff --git a/youtube_dl/extractor/muzu.py b/youtube_dl/extractor/muzu.py new file mode 100644 index 000000000..03e31ea1c --- /dev/null +++ b/youtube_dl/extractor/muzu.py @@ -0,0 +1,64 @@ +import re +import json + +from .common import InfoExtractor +from ..utils import ( + compat_urllib_parse, + determine_ext, +) + + +class MuzuTVIE(InfoExtractor): + _VALID_URL = r'https?://www.muzu.tv/(.+?)/(.+?)/(?P\d+)' + IE_NAME = u'muzu.tv' + + _TEST = { + u'url': u'http://www.muzu.tv/defected/marcashken-featuring-sos-cat-walk-original-mix-music-video/1981454/', + u'file': u'1981454.mp4', + u'md5': u'98f8b2c7bc50578d6a0364fff2bfb000', + u'info_dict': { + u'title': u'Cat Walk (Original Mix)', + u'description': u'md5:90e868994de201b2570e4e5854e19420', + u'uploader': u'MarcAshken featuring SOS', + }, + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + + info_data = compat_urllib_parse.urlencode({'format': 'json', + 'url': url, + }) + video_info_page = self._download_webpage('http://www.muzu.tv/api/oembed/?%s' % info_data, + video_id, u'Downloading video info') + info = json.loads(video_info_page) + + player_info_page = self._download_webpage('http://player.muzu.tv/player/playerInit?ai=%s' % video_id, + video_id, u'Downloading player info') + video_info = json.loads(player_info_page)['videos'][0] + for quality in ['1080' , '720', '480', '360']: + if video_info.get('v%s' % quality): + break + + data = compat_urllib_parse.urlencode({'ai': video_id, + # Even if each time you watch a video the hash changes, + # it seems to work for different videos, and it will work + # even if you use any non empty string as a hash + 'viewhash': 'VBNff6djeV4HV5TRPW5kOHub2k', + 'device': 'web', + 'qv': quality, + }) + video_url_page = self._download_webpage('http://player.muzu.tv/player/requestVideo?%s' % data, + video_id, u'Downloading video url') + video_url_info = json.loads(video_url_page) + video_url = video_url_info['url'] + + return {'id': video_id, + 'title': info['title'], + 'url': video_url, + 'ext': determine_ext(video_url), + 'thumbnail': info['thumbnail_url'], + 'description': info['description'], + 'uploader': info['author_name'], + } diff --git a/youtube_dl/extractor/myvideo.py b/youtube_dl/extractor/myvideo.py index b2a7b1df0..0404e6e43 100644 --- a/youtube_dl/extractor/myvideo.py +++ b/youtube_dl/extractor/myvideo.py @@ -2,11 +2,13 @@ import binascii import base64 import hashlib import re +import json from .common import InfoExtractor from ..utils import ( compat_ord, compat_urllib_parse, + compat_urllib_request, ExtractorError, ) @@ -16,7 +18,7 @@ from ..utils import ( class MyVideoIE(InfoExtractor): """Information Extractor for myvideo.de.""" - _VALID_URL = r'(?:http://)?(?:www\.)?myvideo\.de/watch/([0-9]+)/([^?/]+).*' + _VALID_URL = r'(?:http://)?(?:www\.)?myvideo\.de/(?:[^/]+/)?watch/([0-9]+)/([^?/]+).*' IE_NAME = u'myvideo' _TEST = { u'url': u'http://www.myvideo.de/watch/8229274/bowling_fail_or_win', @@ -85,6 +87,20 @@ class MyVideoIE(InfoExtractor): 'ext': video_ext, }] + mobj = re.search(r'data-video-service="/service/data/video/%s/config' % video_id, webpage) + if mobj is not None: + request = compat_urllib_request.Request('http://www.myvideo.de/service/data/video/%s/config' % video_id, '') + response = self._download_webpage(request, video_id, + u'Downloading video info') + info = json.loads(base64.b64decode(response).decode('utf-8')) + return {'id': video_id, + 'title': info['title'], + 'url': info['streaming_url'].replace('rtmpe', 'rtmpt'), + 'play_path': info['filename'], + 'ext': 'flv', + 'thumbnail': info['thumbnail'][0]['url'], + } + # try encxml mobj = re.search('var flashvars={(.+?)}', webpage) if mobj is None: diff --git a/youtube_dl/extractor/ooyala.py b/youtube_dl/extractor/ooyala.py new file mode 100644 index 000000000..b734722d0 --- /dev/null +++ b/youtube_dl/extractor/ooyala.py @@ -0,0 +1,52 @@ +import re +import json + +from .common import InfoExtractor +from ..utils import unescapeHTML + +class OoyalaIE(InfoExtractor): + _VALID_URL = r'https?://.+?\.ooyala\.com/.*?embedCode=(?P.+?)(&|$)' + + _TEST = { + # From http://it.slashdot.org/story/13/04/25/178216/recovering-data-from-broken-hard-drives-and-ssds-video + u'url': u'http://player.ooyala.com/player.js?embedCode=pxczE2YjpfHfn1f3M-ykG_AmJRRn0PD8', + u'file': u'pxczE2YjpfHfn1f3M-ykG_AmJRRn0PD8.mp4', + u'md5': u'3f5cceb3a7bf461d6c29dc466cf8033c', + u'info_dict': { + u'title': u'Explaining Data Recovery from Hard Drives and SSDs', + u'description': u'How badly damaged does a drive have to be to defeat Russell and his crew? Apparently, smashed to bits.', + }, + } + + def _extract_result(self, info, more_info): + return {'id': info['embedCode'], + 'ext': 'mp4', + 'title': unescapeHTML(info['title']), + 'url': info['url'], + 'description': unescapeHTML(more_info['description']), + 'thumbnail': more_info['promo'], + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + embedCode = mobj.group('id') + player_url = 'http://player.ooyala.com/player.js?embedCode=%s' % embedCode + player = self._download_webpage(player_url, embedCode) + mobile_url = self._search_regex(r'mobile_player_url="(.+?)&device="', + player, u'mobile player url') + mobile_player = self._download_webpage(mobile_url, embedCode) + videos_info = self._search_regex(r'eval\("\((\[{.*?stream_redirect.*?}\])\)"\);', mobile_player, u'info').replace('\\"','"') + videos_more_info = self._search_regex(r'eval\("\(({.*?\\"promo\\".*?})\)"', mobile_player, u'more info').replace('\\"','"') + videos_info = json.loads(videos_info) + videos_more_info =json.loads(videos_more_info) + + if videos_more_info.get('lineup'): + videos = [self._extract_result(info, more_info) for (info, more_info) in zip(videos_info, videos_more_info['lineup'])] + return {'_type': 'playlist', + 'id': embedCode, + 'title': unescapeHTML(videos_more_info['title']), + 'entries': videos, + } + else: + return self._extract_result(videos_info[0], videos_more_info) + diff --git a/youtube_dl/extractor/tf1.py b/youtube_dl/extractor/tf1.py index a8af89f83..772134a12 100644 --- a/youtube_dl/extractor/tf1.py +++ b/youtube_dl/extractor/tf1.py @@ -6,20 +6,17 @@ import re from .common import InfoExtractor class TF1IE(InfoExtractor): - """ - TF1 uses the wat.tv player, currently it can only download videos with the - html5 player enabled, it cannot download HD videos. - """ - _WORKING = False + """TF1 uses the wat.tv player.""" _VALID_URL = r'http://videos.tf1.fr/.*-(.*?).html' _TEST = { u'url': u'http://videos.tf1.fr/auto-moto/citroen-grand-c4-picasso-2013-presentation-officielle-8062060.html', u'file': u'10635995.mp4', - u'md5': u'66789d3e91278d332f75e1feb7aea327', + u'md5': u'2e378cc28b9957607d5e88f274e637d8', u'info_dict': { u'title': u'Citroën Grand C4 Picasso 2013 : présentation officielle', u'description': u'Vidéo officielle du nouveau Citroën Grand C4 Picasso, lancé à l\'automne 2013.', - } + }, + u'skip': u'Sometimes wat serves the whole file with the --test option', } def _real_extract(self, url): diff --git a/youtube_dl/extractor/vevo.py b/youtube_dl/extractor/vevo.py index 3b16dcfbc..67537eae5 100644 --- a/youtube_dl/extractor/vevo.py +++ b/youtube_dl/extractor/vevo.py @@ -35,12 +35,12 @@ class VevoIE(InfoExtractor): self.report_extraction(video_id) video_info = json.loads(info_json) - m_urls = list(re.finditer(r'