diff --git a/devscripts/youtube_genalgo.py b/devscripts/youtube_genalgo.py
index 22977ccd9..31d6ec952 100644
--- a/devscripts/youtube_genalgo.py
+++ b/devscripts/youtube_genalgo.py
@@ -17,18 +17,18 @@ tests = [
# 87 - vflART1Nf 2013/07/24
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$^&*()_-+={[]}|:;?/>.<",
"tyuioplkjhgfdsazxcv"),
- # 86 - vfl_ymO4Z 2013/06/27
+ # 86 - vflm_D8eE 2013/07/31
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[|};?/>.<",
- "ertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!/#$%^&*()_-+={[|};?@"),
+ ">.1}|[{=+-_)(*&^%$#@!MNBVCXZASDFGHJK.<",
"ertyuiqplkjhgfdsazx$vbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#<%^&*()_-+={[};?/c"),
# 84
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?>.<",
"<.>?;}[{=+-_)(*&^%$#@!MNBVCXZASDFGHJKLPOIUYTREWe098765432rmnbvcxzasdfghjklpoiuyt1"),
- # 83 - vflcaqGO8 2013/07/11
+ # 83 - vflTWC9KW 2013/08/01
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!#$%^&*()_+={[};?/>.<",
- "urty8ioplkjhgfdsazxcvbqm1234567S90QWERTYUIOPLKJHGFDnAZXCVBNM!#$%^&*()_+={[};?/>.<"),
+ "qwertyuioplkjhg>dsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!#$%^&*()_+={[};?/f"),
# 82
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKHGFDSAZXCVBNM!@#$%^&*(-+={[};?/>.<",
"Q>/?;}[{=+-(*<^%$#@!MNBVCXZASDFGHKLPOIUY8REWT0q&7654321mnbvcxzasdfghjklpoiuytrew9"),
@@ -40,6 +40,12 @@ tests = [
"Z?;}[{=+-(*&^%$#@!MNBVCXRASDFGHKLPOIUYT/EWQ0q87659321mnbvcxzasdfghjkl4oiuytrewp"),
]
+tests_age_gate = [
+ # 86 - vflqinMWD
+ ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[|};?/>.<",
+ "ertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!/#$%^&*()_-+={[|};?@"),
+]
+
def find_matching(wrong, right):
idxs = [wrong.index(c) for c in right]
return compress(idxs)
@@ -90,6 +96,8 @@ def genall(tests):
def main():
print(genall(tests))
+ print(u' Age gate:')
+ print(genall(tests_age_gate))
if __name__ == '__main__':
main()
diff --git a/test/test_playlists.py b/test/test_playlists.py
new file mode 100644
index 000000000..65de3a55c
--- /dev/null
+++ b/test/test_playlists.py
@@ -0,0 +1,38 @@
+#!/usr/bin/env python
+
+import sys
+import unittest
+import json
+
+# Allow direct execution
+import os
+sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+from youtube_dl.extractor import DailymotionPlaylistIE, VimeoChannelIE
+from youtube_dl.utils import *
+
+from helper import FakeYDL
+
+class TestPlaylists(unittest.TestCase):
+ def assertIsPlaylist(self, info):
+ """Make sure the info has '_type' set to 'playlist'"""
+ self.assertEqual(info['_type'], 'playlist')
+
+ def test_dailymotion_playlist(self):
+ dl = FakeYDL()
+ ie = DailymotionPlaylistIE(dl)
+ result = ie.extract('http://www.dailymotion.com/playlist/xv4bw_nqtv_sport/1#video=xl8v3q')
+ self.assertIsPlaylist(result)
+ self.assertEqual(result['title'], u'SPORT')
+ self.assertTrue(len(result['entries']) > 20)
+
+ def test_vimeo_channel(self):
+ dl = FakeYDL()
+ ie = VimeoChannelIE(dl)
+ result = ie.extract('http://vimeo.com/channels/tributes')
+ self.assertIsPlaylist(result)
+ self.assertEqual(result['title'], u'Vimeo Tributes')
+ self.assertTrue(len(result['entries']) > 24)
+
+if __name__ == '__main__':
+ unittest.main()
diff --git a/test/test_youtube_sig.py b/test/test_youtube_sig.py
deleted file mode 100644
index 4d45a0e08..000000000
--- a/test/test_youtube_sig.py
+++ /dev/null
@@ -1,72 +0,0 @@
-#!/usr/bin/env python
-
-import unittest
-import sys
-
-# Allow direct execution
-import os
-sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
-
-from youtube_dl.extractor.youtube import YoutubeIE
-from helper import FakeYDL
-
-sig = YoutubeIE(FakeYDL())._decrypt_signature
-
-class TestYoutubeSig(unittest.TestCase):
- def test_92(self):
- wrong = "F9F9B6E6FD47029957AB911A964CC20D95A181A5D37A2DBEFD67D403DB0E8BE4F4910053E4E8A79.0B70B.0B80B8"
- right = "69B6E6FD47029957AB911A9F4CC20D95A181A5D3.A2DBEFD67D403DB0E8BE4F4910053E4E8A7980B7"
- self.assertEqual(sig(wrong), right)
-
- def test_90(self):
- wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[]}|:;?/>.<'`"
- right = "mrtyuioplkjhgfdsazxcvbne1234567890QWER[YUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={`]}|"
- self.assertEqual(sig(wrong), right)
-
- def test_88(self):
- wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[]}|:;?/>.<"
- right = "J:|}][{=+-_)(*&;%$#@>MNBVCXZASDFGH^KLPOIUYTREWQ0987654321mnbvcxzasdfghrklpoiuytej"
- self.assertEqual(sig(wrong), right)
-
- def test_87(self):
- wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$^&*()_-+={[]}|:;?/>.<"
- right = "tyuioplkjhgfdsazxcv"
- self.assertEqual(sig(wrong), right)
-
- def test_86(self):
- wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[|};?/>.<"
- right = "ertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!/#$%^&*()_-+={[|};?@"
- self.assertEqual(sig(wrong), right)
-
- def test_85(self):
- wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?/>.<"
- right = "ertyuiqplkjhgfdsazx$vbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#<%^&*()_-+={[};?/c"
- self.assertEqual(sig(wrong), right)
-
- def test_84(self):
- wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?>.<"
- right = "<.>?;}[{=+-_)(*&^%$#@!MNBVCXZASDFGHJKLPOIUYTREWe098765432rmnbvcxzasdfghjklpoiuyt1"
- self.assertEqual(sig(wrong), right)
-
- def test_83(self):
- wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!#$%^&*()_+={[};?/>.<"
- right = "urty8ioplkjhgfdsazxcvbqm1234567S90QWERTYUIOPLKJHGFDnAZXCVBNM!#$%^&*()_+={[};?/>.<"
- self.assertEqual(sig(wrong), right)
-
- def test_82(self):
- wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKHGFDSAZXCVBNM!@#$%^&*(-+={[};?/>.<"
- right = "Q>/?;}[{=+-(*<^%$#@!MNBVCXZASDFGHKLPOIUY8REWT0q&7654321mnbvcxzasdfghjklpoiuytrew9"
- self.assertEqual(sig(wrong), right)
-
- def test_81(self):
- wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKHGFDSAZXCVBNM!@#$%^&*(-+={[};?/>."
- right = "C>/?;}[{=+-(*&^%$#@!MNBVYXZASDFGHKLPOIU.TREWQ0q87659321mnbvcxzasdfghjkl4oiuytrewp"
- self.assertEqual(sig(wrong), right)
-
- def test_79(self):
- wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKHGFDSAZXCVBNM!@#$%^&*(-+={[};?/"
- right = "Z?;}[{=+-(*&^%$#@!MNBVCXRASDFGHKLPOIUYT/EWQ0q87659321mnbvcxzasdfghjkl4oiuytrewp"
- self.assertEqual(sig(wrong), right)
-
-if __name__ == '__main__':
- unittest.main()
diff --git a/youtube_dl/FileDownloader.py b/youtube_dl/FileDownloader.py
index f748df428..1f9588825 100644
--- a/youtube_dl/FileDownloader.py
+++ b/youtube_dl/FileDownloader.py
@@ -386,6 +386,35 @@ class FileDownloader(object):
self.report_error(u'mplayer exited with code %d' % retval)
return False
+ def _download_m3u8_with_ffmpeg(self, filename, url):
+ self.report_destination(filename)
+ tmpfilename = self.temp_name(filename)
+
+ args = ['ffmpeg', '-y', '-i', url, '-f', 'mp4', tmpfilename]
+ # Check for ffmpeg first
+ try:
+ subprocess.call(['ffmpeg', '-h'], stdout=(open(os.path.devnull, 'w')), stderr=subprocess.STDOUT)
+ except (OSError, IOError):
+ self.report_error(u'm3u8 download detected but "%s" could not be run' % args[0] )
+ return False
+
+ retval = subprocess.call(args)
+ if retval == 0:
+ fsize = os.path.getsize(encodeFilename(tmpfilename))
+ self.to_screen(u'\r[%s] %s bytes' % (args[0], fsize))
+ self.try_rename(tmpfilename, filename)
+ self._hook_progress({
+ 'downloaded_bytes': fsize,
+ 'total_bytes': fsize,
+ 'filename': filename,
+ 'status': 'finished',
+ })
+ return True
+ else:
+ self.to_stderr(u"\n")
+ self.report_error(u'ffmpeg exited with code %d' % retval)
+ return False
+
def _do_download(self, filename, info_dict):
url = info_dict['url']
@@ -411,6 +440,10 @@ class FileDownloader(object):
if url.startswith('mms') or url.startswith('rtsp'):
return self._download_with_mplayer(filename, url)
+ # m3u8 manifest are downloaded with ffmpeg
+ if determine_ext(url) == u'm3u8':
+ return self._download_m3u8_with_ffmpeg(filename, url)
+
tmpfilename = self.temp_name(filename)
stream = None
diff --git a/youtube_dl/PostProcessor.py b/youtube_dl/PostProcessor.py
index 8c5e53991..fddf58606 100644
--- a/youtube_dl/PostProcessor.py
+++ b/youtube_dl/PostProcessor.py
@@ -100,7 +100,8 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor):
self._nopostoverwrites = nopostoverwrites
def get_audio_codec(self, path):
- if not self._exes['ffprobe'] and not self._exes['avprobe']: return None
+ if not self._exes['ffprobe'] and not self._exes['avprobe']:
+ raise PostProcessingError(u'ffprobe or avprobe not found. Please install one.')
try:
cmd = [self._exes['avprobe'] or self._exes['ffprobe'], '-show_streams', encodeFilename(self._ffmpeg_filename_argument(path))]
handle = subprocess.Popen(cmd, stderr=compat_subprocess_get_DEVNULL(), stdout=subprocess.PIPE)
@@ -208,7 +209,7 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor):
try:
os.utime(encodeFilename(new_path), (time.time(), information['filetime']))
except:
- self._downloader.to_stderr(u'WARNING: Cannot update utime of audio file')
+ self._downloader.report_warning(u'Cannot update utime of audio file')
information['filepath'] = new_path
return self._nopostoverwrites,information
diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py
index c76f1118e..496866900 100644
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -594,7 +594,7 @@ class YoutubeDL(object):
# No clear decision yet, let IE decide
keep_video = keep_video_wish
except PostProcessingError as e:
- self.to_stderr(u'ERROR: ' + e.msg)
+ self.report_error(e.msg)
if keep_video is False and not self.params.get('keepvideo', False):
try:
self.to_screen(u'Deleting original file %s (pass -k to keep)' % filename)
diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py
index 59584e2c5..e8ac09a8f 100644
--- a/youtube_dl/__init__.py
+++ b/youtube_dl/__init__.py
@@ -402,6 +402,8 @@ def _real_main(argv=None):
batchurls = batchfd.readlines()
batchurls = [x.strip() for x in batchurls]
batchurls = [x for x in batchurls if len(x) > 0 and not re.search(r'^[#/;]', x)]
+ if opts.verbose:
+ sys.stderr.write(u'[debug] Batch file urls: ' + repr(batchurls) + u'\n')
except IOError:
sys.exit(u'ERROR: batch file could not be read')
all_urls = batchurls + args
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py
index 7cfb292d9..84c02c2ed 100644
--- a/youtube_dl/extractor/__init__.py
+++ b/youtube_dl/extractor/__init__.py
@@ -12,7 +12,7 @@ from .comedycentral import ComedyCentralIE
from .condenast import CondeNastIE
from .criterion import CriterionIE
from .cspan import CSpanIE
-from .dailymotion import DailymotionIE
+from .dailymotion import DailymotionIE, DailymotionPlaylistIE
from .depositfiles import DepositFilesIE
from .dotsub import DotsubIE
from .dreisat import DreiSatIE
@@ -38,15 +38,18 @@ from .infoq import InfoQIE
from .instagram import InstagramIE
from .jukebox import JukeboxIE
from .justintv import JustinTVIE
+from .kankan import KankanIE
from .keek import KeekIE
from .liveleak import LiveLeakIE
from .livestream import LivestreamIE
from .metacafe import MetacafeIE
from .mixcloud import MixcloudIE
from .mtv import MTVIE
+from .muzu import MuzuTVIE
from .myspass import MySpassIE
from .myvideo import MyVideoIE
from .nba import NBAIE
+from .ooyala import OoyalaIE
from .photobucket import PhotobucketIE
from .pornotube import PornotubeIE
from .rbmaradio import RBMARadioIE
@@ -71,7 +74,8 @@ from .ustream import UstreamIE
from .vbox7 import Vbox7IE
from .veoh import VeohIE
from .vevo import VevoIE
-from .vimeo import VimeoIE
+from .videofyme import VideofyMeIE
+from .vimeo import VimeoIE, VimeoChannelIE
from .vine import VineIE
from .c56 import C56IE
from .wat import WatIE
diff --git a/youtube_dl/extractor/arte.py b/youtube_dl/extractor/arte.py
index 993e30f7a..69b3b0ad7 100644
--- a/youtube_dl/extractor/arte.py
+++ b/youtube_dl/extractor/arte.py
@@ -17,13 +17,14 @@ class ArteTvIE(InfoExtractor):
"""
_EMISSION_URL = r'(?:http://)?www\.arte.tv/guide/(?Pfr|de)/(?:(?:sendungen|emissions)/)?(?P.*?)/(?P.*?)(\?.*)?'
_VIDEOS_URL = r'(?:http://)?videos.arte.tv/(?Pfr|de)/.*-(?P.*?).html'
+ _LIVEWEB_URL = r'(?:http://)?liveweb.arte.tv/(?Pfr|de)/(?P.+?)/(?P.+)'
_LIVE_URL = r'index-[0-9]+\.html$'
IE_NAME = u'arte.tv'
@classmethod
def suitable(cls, url):
- return any(re.match(regex, url) for regex in (cls._EMISSION_URL, cls._VIDEOS_URL))
+ return any(re.match(regex, url) for regex in (cls._EMISSION_URL, cls._VIDEOS_URL, cls._LIVEWEB_URL))
# TODO implement Live Stream
# from ..utils import compat_urllib_parse
@@ -68,6 +69,12 @@ class ArteTvIE(InfoExtractor):
lang = mobj.group('lang')
return self._extract_video(url, id, lang)
+ mobj = re.match(self._LIVEWEB_URL, url)
+ if mobj is not None:
+ name = mobj.group('name')
+ lang = mobj.group('lang')
+ return self._extract_liveweb(url, name, lang)
+
if re.search(self._LIVE_URL, video_id) is not None:
raise ExtractorError(u'Arte live streams are not yet supported, sorry')
# self.extractLiveStream(url)
@@ -85,7 +92,7 @@ class ArteTvIE(InfoExtractor):
info_dict = {'id': player_info['VID'],
'title': player_info['VTI'],
- 'description': player_info['VDE'],
+ 'description': player_info.get('VDE'),
'upload_date': unified_strdate(player_info['VDA'].split(' ')[0]),
'thumbnail': player_info['programImage'],
'ext': 'flv',
@@ -98,12 +105,14 @@ class ArteTvIE(InfoExtractor):
l = 'F'
elif lang == 'de':
l = 'A'
- regexes = [r'VO?%s' % l, r'V%s-ST.' % l]
+ regexes = [r'VO?%s' % l, r'VO?.-ST%s' % l]
return any(re.match(r, f['versionCode']) for r in regexes)
# Some formats may not be in the same language as the url
formats = filter(_match_lang, formats)
# We order the formats by quality
formats = sorted(formats, key=lambda f: int(f['height']))
+ # Prefer videos without subtitles in the same language
+ formats = sorted(formats, key=lambda f: re.match(r'VO(F|A)-STM\1', f['versionCode']) is None)
# Pick the best quality
format_info = formats[-1]
if format_info['mediaType'] == u'rtmp':
@@ -144,3 +153,22 @@ class ArteTvIE(InfoExtractor):
'url': video_url,
'ext': 'flv',
}
+
+ def _extract_liveweb(self, url, name, lang):
+ """Extract form http://liveweb.arte.tv/"""
+ webpage = self._download_webpage(url, name)
+ video_id = self._search_regex(r'eventId=(\d+?)("|&)', webpage, u'event id')
+ config_xml = self._download_webpage('http://download.liveweb.arte.tv/o21/liveweb/events/event-%s.xml' % video_id,
+ video_id, u'Downloading information')
+ config_doc = xml.etree.ElementTree.fromstring(config_xml.encode('utf-8'))
+ event_doc = config_doc.find('event')
+ url_node = event_doc.find('video').find('urlHd')
+ if url_node is None:
+ url_node = video_doc.find('urlSd')
+
+ return {'id': video_id,
+ 'title': event_doc.find('name%s' % lang.capitalize()).text,
+ 'url': url_node.text.replace('MP4', 'mp4'),
+ 'ext': 'flv',
+ 'thumbnail': self._og_search_thumbnail(webpage),
+ }
diff --git a/youtube_dl/extractor/collegehumor.py b/youtube_dl/extractor/collegehumor.py
index 5badde03a..30b9c7549 100644
--- a/youtube_dl/extractor/collegehumor.py
+++ b/youtube_dl/extractor/collegehumor.py
@@ -10,7 +10,7 @@ from ..utils import (
class CollegeHumorIE(InfoExtractor):
- _VALID_URL = r'^(?:https?://)?(?:www\.)?collegehumor\.com/(video|embed|e)/(?P[0-9]+)/(?P.*)$'
+ _VALID_URL = r'^(?:https?://)?(?:www\.)?collegehumor\.com/(video|embed|e)/(?P[0-9]+)/?(?P.*)$'
_TEST = {
u'url': u'http://www.collegehumor.com/video/6902724/comic-con-cosplay-catastrophe',
diff --git a/youtube_dl/extractor/dailymotion.py b/youtube_dl/extractor/dailymotion.py
index 9bf7a28ca..fa8c630d0 100644
--- a/youtube_dl/extractor/dailymotion.py
+++ b/youtube_dl/extractor/dailymotion.py
@@ -1,9 +1,12 @@
import re
import json
+import itertools
from .common import InfoExtractor
from ..utils import (
compat_urllib_request,
+ get_element_by_attribute,
+ get_element_by_id,
ExtractorError,
)
@@ -77,3 +80,31 @@ class DailymotionIE(InfoExtractor):
'ext': video_extension,
'thumbnail': info['thumbnail_url']
}]
+
+
+class DailymotionPlaylistIE(InfoExtractor):
+ _VALID_URL = r'(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/playlist/(?P.+?)/'
+ _MORE_PAGES_INDICATOR = r''
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ playlist_id = mobj.group('id')
+ video_ids = []
+
+ for pagenum in itertools.count(1):
+ webpage = self._download_webpage('https://www.dailymotion.com/playlist/%s/%s' % (playlist_id, pagenum),
+ playlist_id, u'Downloading page %s' % pagenum)
+
+ playlist_el = get_element_by_attribute(u'class', u'video_list', webpage)
+ video_ids.extend(re.findall(r'data-id="(.+?)" data-ext-id', playlist_el))
+
+ if re.search(self._MORE_PAGES_INDICATOR, webpage, re.DOTALL) is None:
+ break
+
+ entries = [self.url_result('http://www.dailymotion.com/video/%s' % video_id, 'Dailymotion')
+ for video_id in video_ids]
+ return {'_type': 'playlist',
+ 'id': playlist_id,
+ 'title': get_element_by_id(u'playlist_name', webpage),
+ 'entries': entries,
+ }
diff --git a/youtube_dl/extractor/kankan.py b/youtube_dl/extractor/kankan.py
new file mode 100644
index 000000000..8537ba584
--- /dev/null
+++ b/youtube_dl/extractor/kankan.py
@@ -0,0 +1,37 @@
+import re
+
+from .common import InfoExtractor
+from ..utils import determine_ext
+
+
+class KankanIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:.*?\.)?kankan\.com/.+?/(?P\d+)\.shtml'
+
+ _TEST = {
+ u'url': u'http://yinyue.kankan.com/vod/48/48863.shtml',
+ u'file': u'48863.flv',
+ u'md5': u'29aca1e47ae68fc28804aca89f29507e',
+ u'info_dict': {
+ u'title': u'Ready To Go',
+ },
+ }
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('id')
+ webpage = self._download_webpage(url, video_id)
+
+ title = self._search_regex(r'G_TITLE=[\'"](.+?)[\'"]', webpage, u'video title')
+ gcid = self._search_regex(r'lurl:[\'"]http://.+?/.+?/(.+?)/', webpage, u'gcid')
+
+ video_info_page = self._download_webpage('http://p2s.cl.kankan.com/getCdnresource_flv?gcid=%s' % gcid,
+ video_id, u'Downloading video url info')
+ ip = self._search_regex(r'ip:"(.+?)"', video_info_page, u'video url ip')
+ path = self._search_regex(r'path:"(.+?)"', video_info_page, u'video url path')
+ video_url = 'http://%s%s' % (ip, path)
+
+ return {'id': video_id,
+ 'title': title,
+ 'url': video_url,
+ 'ext': determine_ext(video_url),
+ }
diff --git a/youtube_dl/extractor/muzu.py b/youtube_dl/extractor/muzu.py
new file mode 100644
index 000000000..03e31ea1c
--- /dev/null
+++ b/youtube_dl/extractor/muzu.py
@@ -0,0 +1,64 @@
+import re
+import json
+
+from .common import InfoExtractor
+from ..utils import (
+ compat_urllib_parse,
+ determine_ext,
+)
+
+
+class MuzuTVIE(InfoExtractor):
+ _VALID_URL = r'https?://www.muzu.tv/(.+?)/(.+?)/(?P\d+)'
+ IE_NAME = u'muzu.tv'
+
+ _TEST = {
+ u'url': u'http://www.muzu.tv/defected/marcashken-featuring-sos-cat-walk-original-mix-music-video/1981454/',
+ u'file': u'1981454.mp4',
+ u'md5': u'98f8b2c7bc50578d6a0364fff2bfb000',
+ u'info_dict': {
+ u'title': u'Cat Walk (Original Mix)',
+ u'description': u'md5:90e868994de201b2570e4e5854e19420',
+ u'uploader': u'MarcAshken featuring SOS',
+ },
+ }
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('id')
+
+ info_data = compat_urllib_parse.urlencode({'format': 'json',
+ 'url': url,
+ })
+ video_info_page = self._download_webpage('http://www.muzu.tv/api/oembed/?%s' % info_data,
+ video_id, u'Downloading video info')
+ info = json.loads(video_info_page)
+
+ player_info_page = self._download_webpage('http://player.muzu.tv/player/playerInit?ai=%s' % video_id,
+ video_id, u'Downloading player info')
+ video_info = json.loads(player_info_page)['videos'][0]
+ for quality in ['1080' , '720', '480', '360']:
+ if video_info.get('v%s' % quality):
+ break
+
+ data = compat_urllib_parse.urlencode({'ai': video_id,
+ # Even if each time you watch a video the hash changes,
+ # it seems to work for different videos, and it will work
+ # even if you use any non empty string as a hash
+ 'viewhash': 'VBNff6djeV4HV5TRPW5kOHub2k',
+ 'device': 'web',
+ 'qv': quality,
+ })
+ video_url_page = self._download_webpage('http://player.muzu.tv/player/requestVideo?%s' % data,
+ video_id, u'Downloading video url')
+ video_url_info = json.loads(video_url_page)
+ video_url = video_url_info['url']
+
+ return {'id': video_id,
+ 'title': info['title'],
+ 'url': video_url,
+ 'ext': determine_ext(video_url),
+ 'thumbnail': info['thumbnail_url'],
+ 'description': info['description'],
+ 'uploader': info['author_name'],
+ }
diff --git a/youtube_dl/extractor/myvideo.py b/youtube_dl/extractor/myvideo.py
index b2a7b1df0..0404e6e43 100644
--- a/youtube_dl/extractor/myvideo.py
+++ b/youtube_dl/extractor/myvideo.py
@@ -2,11 +2,13 @@ import binascii
import base64
import hashlib
import re
+import json
from .common import InfoExtractor
from ..utils import (
compat_ord,
compat_urllib_parse,
+ compat_urllib_request,
ExtractorError,
)
@@ -16,7 +18,7 @@ from ..utils import (
class MyVideoIE(InfoExtractor):
"""Information Extractor for myvideo.de."""
- _VALID_URL = r'(?:http://)?(?:www\.)?myvideo\.de/watch/([0-9]+)/([^?/]+).*'
+ _VALID_URL = r'(?:http://)?(?:www\.)?myvideo\.de/(?:[^/]+/)?watch/([0-9]+)/([^?/]+).*'
IE_NAME = u'myvideo'
_TEST = {
u'url': u'http://www.myvideo.de/watch/8229274/bowling_fail_or_win',
@@ -85,6 +87,20 @@ class MyVideoIE(InfoExtractor):
'ext': video_ext,
}]
+ mobj = re.search(r'data-video-service="/service/data/video/%s/config' % video_id, webpage)
+ if mobj is not None:
+ request = compat_urllib_request.Request('http://www.myvideo.de/service/data/video/%s/config' % video_id, '')
+ response = self._download_webpage(request, video_id,
+ u'Downloading video info')
+ info = json.loads(base64.b64decode(response).decode('utf-8'))
+ return {'id': video_id,
+ 'title': info['title'],
+ 'url': info['streaming_url'].replace('rtmpe', 'rtmpt'),
+ 'play_path': info['filename'],
+ 'ext': 'flv',
+ 'thumbnail': info['thumbnail'][0]['url'],
+ }
+
# try encxml
mobj = re.search('var flashvars={(.+?)}', webpage)
if mobj is None:
diff --git a/youtube_dl/extractor/ooyala.py b/youtube_dl/extractor/ooyala.py
new file mode 100644
index 000000000..b734722d0
--- /dev/null
+++ b/youtube_dl/extractor/ooyala.py
@@ -0,0 +1,52 @@
+import re
+import json
+
+from .common import InfoExtractor
+from ..utils import unescapeHTML
+
+class OoyalaIE(InfoExtractor):
+ _VALID_URL = r'https?://.+?\.ooyala\.com/.*?embedCode=(?P.+?)(&|$)'
+
+ _TEST = {
+ # From http://it.slashdot.org/story/13/04/25/178216/recovering-data-from-broken-hard-drives-and-ssds-video
+ u'url': u'http://player.ooyala.com/player.js?embedCode=pxczE2YjpfHfn1f3M-ykG_AmJRRn0PD8',
+ u'file': u'pxczE2YjpfHfn1f3M-ykG_AmJRRn0PD8.mp4',
+ u'md5': u'3f5cceb3a7bf461d6c29dc466cf8033c',
+ u'info_dict': {
+ u'title': u'Explaining Data Recovery from Hard Drives and SSDs',
+ u'description': u'How badly damaged does a drive have to be to defeat Russell and his crew? Apparently, smashed to bits.',
+ },
+ }
+
+ def _extract_result(self, info, more_info):
+ return {'id': info['embedCode'],
+ 'ext': 'mp4',
+ 'title': unescapeHTML(info['title']),
+ 'url': info['url'],
+ 'description': unescapeHTML(more_info['description']),
+ 'thumbnail': more_info['promo'],
+ }
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ embedCode = mobj.group('id')
+ player_url = 'http://player.ooyala.com/player.js?embedCode=%s' % embedCode
+ player = self._download_webpage(player_url, embedCode)
+ mobile_url = self._search_regex(r'mobile_player_url="(.+?)&device="',
+ player, u'mobile player url')
+ mobile_player = self._download_webpage(mobile_url, embedCode)
+ videos_info = self._search_regex(r'eval\("\((\[{.*?stream_redirect.*?}\])\)"\);', mobile_player, u'info').replace('\\"','"')
+ videos_more_info = self._search_regex(r'eval\("\(({.*?\\"promo\\".*?})\)"', mobile_player, u'more info').replace('\\"','"')
+ videos_info = json.loads(videos_info)
+ videos_more_info =json.loads(videos_more_info)
+
+ if videos_more_info.get('lineup'):
+ videos = [self._extract_result(info, more_info) for (info, more_info) in zip(videos_info, videos_more_info['lineup'])]
+ return {'_type': 'playlist',
+ 'id': embedCode,
+ 'title': unescapeHTML(videos_more_info['title']),
+ 'entries': videos,
+ }
+ else:
+ return self._extract_result(videos_info[0], videos_more_info)
+
diff --git a/youtube_dl/extractor/tf1.py b/youtube_dl/extractor/tf1.py
index a8af89f83..772134a12 100644
--- a/youtube_dl/extractor/tf1.py
+++ b/youtube_dl/extractor/tf1.py
@@ -6,20 +6,17 @@ import re
from .common import InfoExtractor
class TF1IE(InfoExtractor):
- """
- TF1 uses the wat.tv player, currently it can only download videos with the
- html5 player enabled, it cannot download HD videos.
- """
- _WORKING = False
+ """TF1 uses the wat.tv player."""
_VALID_URL = r'http://videos.tf1.fr/.*-(.*?).html'
_TEST = {
u'url': u'http://videos.tf1.fr/auto-moto/citroen-grand-c4-picasso-2013-presentation-officielle-8062060.html',
u'file': u'10635995.mp4',
- u'md5': u'66789d3e91278d332f75e1feb7aea327',
+ u'md5': u'2e378cc28b9957607d5e88f274e637d8',
u'info_dict': {
u'title': u'Citroën Grand C4 Picasso 2013 : présentation officielle',
u'description': u'Vidéo officielle du nouveau Citroën Grand C4 Picasso, lancé à l\'automne 2013.',
- }
+ },
+ u'skip': u'Sometimes wat serves the whole file with the --test option',
}
def _real_extract(self, url):
diff --git a/youtube_dl/extractor/vevo.py b/youtube_dl/extractor/vevo.py
index 3b16dcfbc..67537eae5 100644
--- a/youtube_dl/extractor/vevo.py
+++ b/youtube_dl/extractor/vevo.py
@@ -35,12 +35,12 @@ class VevoIE(InfoExtractor):
self.report_extraction(video_id)
video_info = json.loads(info_json)
- m_urls = list(re.finditer(r'