diff --git a/README.md b/README.md index 7d190249d..ceb85fef1 100644 --- a/README.md +++ b/README.md @@ -116,12 +116,14 @@ which means you can modify it, redistribute it or use it however you like. -F, --list-formats list all available formats (currently youtube only) --write-sub write subtitle file (currently youtube only) + --write-auto-sub write automatic subtitle file (currently youtube + only) --only-sub [deprecated] alias of --skip-download --all-subs downloads all the available subtitles of the video (currently youtube only) --list-subs lists all available subtitles for the video (currently youtube only) - --sub-format FORMAT subtitle format [srt/sbv] (default=srt) + --sub-format FORMAT subtitle format [srt/sbv/vtt] (default=srt) (currently youtube only) --sub-lang LANG language of the subtitles to download (optional) use IETF language tags like 'en' @@ -166,7 +168,7 @@ The `-o` option allows users to indicate a template for the output file names. T - `playlist`: The name or the id of the playlist that contains the video. - `playlist_index`: The index of the video in the playlist, a five-digit number. -The current default template is `%(id)s.%(ext)s`, but that will be switchted to `%(title)s-%(id)s.%(ext)s` (which can be requested with `-t` at the moment). +The current default template is `%(title)s-%(id)s.%(ext)s`. In some cases, you don't want special characters such as 中, spaces, or &, such as when transferring the downloaded filename to a Windows system or the filename through an 8bit-unsafe channel. In these cases, add the `--restrict-filenames` flag to get a shorter title: diff --git a/devscripts/youtube_genalgo.py b/devscripts/youtube_genalgo.py new file mode 100644 index 000000000..c3d69e6f4 --- /dev/null +++ b/devscripts/youtube_genalgo.py @@ -0,0 +1,83 @@ +#!/usr/bin/env python + +# Generate youtube signature algorithm from test cases + +import sys + +tests = [ + # 88 + ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[]}|:;?/>.<", + "J:|}][{=+-_)(*&;%$#@>MNBVCXZASDFGH^KLPOIUYTREWQ0987654321mnbvcxzasdfghrklpoiuytej"), + # 87 + ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$^&*()_-+={[]}|:;?/>.<", + "!?;:|}][{=+-_)(*&^$#@/MNBVCXZASqFGHJKLPOIUYTREWQ0987654321mnbvcxzasdfghjklpoiuytr"), + # 86 - vfl_ymO4Z 2013/06/27 + ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[|};?/>.<", + "ertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!/#$%^&*()_-+={[|};?@"), + # 85 + ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?/>.<", + "{>/?;}[.=+-_)(*&^%$#@!MqBVCXZASDFwHJKLPOIUYTREWQ0987654321mnbvcxzasdfghjklpoiuytr"), + # 84 + ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?>.<", + "<.>?;}[{=+-_)(*&^%$#@!MNBVCXZASDFGHJKLPOIUYTREWe098765432rmnbvcxzasdfghjklpoiuyt1"), + # 83 + ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!#$%^&*()_+={[};?/>.<", + "D.>/?;}[{=+_)(*&^%$#!MNBVCXeAS.<", + "Q>/?;}[{=+-(*<^%$#@!MNBVCXZASDFGHKLPOIUY8REWT0q&7654321mnbvcxzasdfghjklpoiuytrew9"), +] + +def find_matching(wrong, right): + idxs = [wrong.index(c) for c in right] + return compress(idxs) + return ('s[%d]' % i for i in idxs) + +def compress(idxs): + def _genslice(start, end, step): + starts = '' if start == 0 else str(start) + ends = ':%d' % (end+step) + steps = '' if step == 1 else (':%d' % step) + return 's[%s%s%s]' % (starts, ends, steps) + + step = None + for i, prev in zip(idxs[1:], idxs[:-1]): + if step is not None: + if i - prev == step: + continue + yield _genslice(start, prev, step) + step = None + continue + if i - prev in [-1, 1]: + step = i - prev + start = prev + continue + else: + yield 's[%d]' % prev + if step is None: + yield 's[%d]' % i + else: + yield _genslice(start, i, step) + +def _assert_compress(inp, exp): + res = list(compress(inp)) + if res != exp: + print('Got %r, expected %r' % (res, exp)) + assert res == exp +_assert_compress([0,2,4,6], ['s[0]', 's[2]', 's[4]', 's[6]']) +_assert_compress([0,1,2,4,6,7], ['s[:3]', 's[4]', 's[6:8]']) +_assert_compress([8,0,1,2,4,7,6,9], ['s[8]', 's[:3]', 's[4]', 's[7:5:-1]', 's[9]']) + +def gen(wrong, right, indent): + code = ' + '.join(find_matching(wrong, right)) + return 'if len(s) == %d:\n%s return %s\n' % (len(wrong), indent, code) + +def genall(tests): + indent = ' ' * 8 + return indent + (indent + 'el').join(gen(wrong, right, indent) for wrong,right in tests) + +def main(): + print(genall(tests)) + +if __name__ == '__main__': + main() diff --git a/test/helper.py b/test/helper.py new file mode 100644 index 000000000..a2b468b50 --- /dev/null +++ b/test/helper.py @@ -0,0 +1,44 @@ +import io +import json +import os.path + +import youtube_dl.extractor +from youtube_dl import YoutubeDL, YoutubeDLHandler +from youtube_dl.utils import ( + compat_cookiejar, + compat_urllib_request, +) + +# General configuration (from __init__, not very elegant...) +jar = compat_cookiejar.CookieJar() +cookie_processor = compat_urllib_request.HTTPCookieProcessor(jar) +proxy_handler = compat_urllib_request.ProxyHandler() +opener = compat_urllib_request.build_opener(proxy_handler, cookie_processor, YoutubeDLHandler()) +compat_urllib_request.install_opener(opener) + +PARAMETERS_FILE = os.path.join(os.path.dirname(os.path.abspath(__file__)), "parameters.json") +with io.open(PARAMETERS_FILE, encoding='utf-8') as pf: + parameters = json.load(pf) + +class FakeYDL(YoutubeDL): + def __init__(self): + self.result = [] + # Different instances of the downloader can't share the same dictionary + # some test set the "sublang" parameter, which would break the md5 checks. + self.params = dict(parameters) + def to_screen(self, s): + print(s) + def trouble(self, s, tb=None): + raise Exception(s) + def download(self, x): + self.result.append(x) + +def get_testcases(): + for ie in youtube_dl.extractor.gen_extractors(): + t = getattr(ie, '_TEST', None) + if t: + t['name'] = type(ie).__name__[:-len('IE')] + yield t + for t in getattr(ie, '_TESTS', []): + t['name'] = type(ie).__name__[:-len('IE')] + yield t diff --git a/test/test_all_urls.py b/test/test_all_urls.py index d3ee296be..39a5ee33a 100644 --- a/test/test_all_urls.py +++ b/test/test_all_urls.py @@ -7,7 +7,8 @@ import unittest import os sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) -from youtube_dl.extractor import YoutubeIE, YoutubePlaylistIE, YoutubeChannelIE, JustinTVIE +from youtube_dl.extractor import YoutubeIE, YoutubePlaylistIE, YoutubeChannelIE, JustinTVIE, gen_extractors +from helper import get_testcases class TestAllURLsMatching(unittest.TestCase): def test_youtube_playlist_matching(self): @@ -50,5 +51,16 @@ class TestAllURLsMatching(unittest.TestCase): self.assertEqual(YoutubeIE()._extract_id('https://www.youtube.com/watch?&v=BaW_jenozKc'), 'BaW_jenozKc') self.assertEqual(YoutubeIE()._extract_id('https://www.youtube.com/watch?feature=player_embedded&v=BaW_jenozKc'), 'BaW_jenozKc') + def test_no_duplicates(self): + ies = gen_extractors() + for tc in get_testcases(): + url = tc['url'] + for ie in ies: + if type(ie).__name__ in ['GenericIE', tc['name'] + 'IE']: + self.assertTrue(ie.suitable(url), '%s should match URL %r' % (type(ie).__name__, url)) + else: + self.assertFalse(ie.suitable(url), '%s should not match URL %r' % (type(ie).__name__, url)) + + if __name__ == '__main__': unittest.main() diff --git a/test/test_download.py b/test/test_download.py index 067bde4bb..21cb2e694 100644 --- a/test/test_download.py +++ b/test/test_download.py @@ -14,10 +14,8 @@ import binascii sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) import youtube_dl.YoutubeDL -import youtube_dl.extractor from youtube_dl.utils import * -DEF_FILE = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'tests.json') PARAMETERS_FILE = os.path.join(os.path.dirname(os.path.abspath(__file__)), "parameters.json") RETRIES = 3 @@ -56,8 +54,9 @@ def _file_md5(fn): with open(fn, 'rb') as f: return hashlib.md5(f.read()).hexdigest() -with io.open(DEF_FILE, encoding='utf-8') as deff: - defs = json.load(deff) +from helper import get_testcases +defs = get_testcases() + with io.open(PARAMETERS_FILE, encoding='utf-8') as pf: parameters = json.load(pf) @@ -73,22 +72,23 @@ def generator(test_case): def test_template(self): ie = youtube_dl.extractor.get_info_extractor(test_case['name']) + def print_skipping(reason): + print('Skipping %s: %s' % (test_case['name'], reason)) if not ie._WORKING: - print('Skipping: IE marked as not _WORKING') + print_skipping('IE marked as not _WORKING') return if 'playlist' not in test_case and not test_case['file']: - print('Skipping: No output file specified') + print_skipping('No output file specified') return if 'skip' in test_case: - print('Skipping: {0}'.format(test_case['skip'])) + print_skipping(test_case['skip']) return params = self.parameters.copy() params.update(test_case.get('params', {})) ydl = YoutubeDL(params) - for ie in youtube_dl.extractor.gen_extractors(): - ydl.add_info_extractor(ie) + ydl.add_default_info_extractors() finished_hook_called = set() def _hook(status): if status['status'] == 'finished': @@ -155,9 +155,12 @@ def generator(test_case): ### And add them to TestDownload for n, test_case in enumerate(defs): test_method = generator(test_case) - test_method.__name__ = "test_{0}".format(test_case["name"]) - if getattr(TestDownload, test_method.__name__, False): - test_method.__name__ = "test_{0}_{1}".format(test_case["name"], n) + tname = 'test_' + str(test_case['name']) + i = 1 + while hasattr(TestDownload, tname): + tname = 'test_' + str(test_case['name']) + '_' + str(i) + i += 1 + test_method.__name__ = tname setattr(TestDownload, test_method.__name__, test_method) del test_method diff --git a/test/test_youtube_lists.py b/test/test_youtube_lists.py index 320b44082..4486b7eb0 100644 --- a/test/test_youtube_lists.py +++ b/test/test_youtube_lists.py @@ -10,30 +10,8 @@ sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from youtube_dl.extractor import YoutubeUserIE, YoutubePlaylistIE, YoutubeIE, YoutubeChannelIE from youtube_dl.utils import * -from youtube_dl import YoutubeDL -PARAMETERS_FILE = os.path.join(os.path.dirname(os.path.abspath(__file__)), "parameters.json") -with io.open(PARAMETERS_FILE, encoding='utf-8') as pf: - parameters = json.load(pf) - -# General configuration (from __init__, not very elegant...) -jar = compat_cookiejar.CookieJar() -cookie_processor = compat_urllib_request.HTTPCookieProcessor(jar) -proxy_handler = compat_urllib_request.ProxyHandler() -opener = compat_urllib_request.build_opener(proxy_handler, cookie_processor, YoutubeDLHandler()) -compat_urllib_request.install_opener(opener) - -class FakeYDL(YoutubeDL): - def __init__(self): - self.result = [] - self.params = parameters - def to_screen(self, s): - print(s) - def trouble(self, s, tb=None): - raise Exception(s) - def extract_info(self, url): - self.result.append(url) - return url +from helper import FakeYDL class TestYoutubeLists(unittest.TestCase): def assertIsPlaylist(self,info): diff --git a/test/test_youtube_sig.py b/test/test_youtube_sig.py new file mode 100755 index 000000000..e87b6259b --- /dev/null +++ b/test/test_youtube_sig.py @@ -0,0 +1,57 @@ +#!/usr/bin/env python + +import unittest +import sys + +# Allow direct execution +import os +sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +from youtube_dl.extractor.youtube import YoutubeIE +from helper import FakeYDL + +sig = YoutubeIE(FakeYDL())._decrypt_signature + +class TestYoutubeSig(unittest.TestCase): + def test_43_43(self): + wrong = '5AEEAE0EC39677BC65FD9021CCD115F1F2DBD5A59E4.C0B243A3E2DED6769199AF3461781E75122AE135135' + right = '931EA22157E1871643FA9519676DED253A342B0C.4E95A5DBD2F1F511DCC1209DF56CB77693CE0EAE' + self.assertEqual(sig(wrong), right) + + def test_88(self): + wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[]}|:;?/>.<" + right = "J:|}][{=+-_)(*&;%$#@>MNBVCXZASDFGH^KLPOIUYTREWQ0987654321mnbvcxzasdfghrklpoiuytej" + self.assertEqual(sig(wrong), right) + + def test_87(self): + wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$^&*()_-+={[]}|:;?/>.<" + right = "!?;:|}][{=+-_)(*&^$#@/MNBVCXZASqFGHJKLPOIUYTREWQ0987654321mnbvcxzasdfghjklpoiuytr" + self.assertEqual(sig(wrong), right) + + def test_86(self): + wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[|};?/>.<" + right = "ertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!/#$%^&*()_-+={[|};?@" + self.assertEqual(sig(wrong), right) + + def test_85(self): + wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?/>.<" + right = "{>/?;}[.=+-_)(*&^%$#@!MqBVCXZASDFwHJKLPOIUYTREWQ0987654321mnbvcxzasdfghjklpoiuytr" + self.assertEqual(sig(wrong), right) + + def test_84(self): + wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?>.<" + right = "<.>?;}[{=+-_)(*&^%$#@!MNBVCXZASDFGHJKLPOIUYTREWe098765432rmnbvcxzasdfghjklpoiuyt1" + self.assertEqual(sig(wrong), right) + + def test_83(self): + wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!#$%^&*()_+={[};?/>.<" + right = "D.>/?;}[{=+_)(*&^%$#!MNBVCXeAS>>>S<<<<<< -1" - } - }, - { - "name": "YouPorn", - "md5": "c37ddbaaa39058c76a7e86c6813423c1", - "url": "http://www.youporn.com/watch/505835/sex-ed-is-it-safe-to-masturbate-daily/", - "file": "505835.mp4", - "info_dict": { - "upload_date": "20101221", - "description": "Love & Sex Answers: http://bit.ly/DanAndJenn -- Is It Unhealthy To Masturbate Daily?", - "uploader": "Ask Dan And Jennifer", - "title": "Sex Ed: Is It Safe To Masturbate Daily?" - } - }, - { - "name": "Pornotube", - "md5": "374dd6dcedd24234453b295209aa69b6", - "url": "http://pornotube.com/c/173/m/1689755/Marilyn-Monroe-Bathing", - "file": "1689755.flv", - "info_dict": { - "upload_date": "20090708", - "title": "Marilyn-Monroe-Bathing" - } - }, - { - "name": "YouJizz", - "md5": "07e15fa469ba384c7693fd246905547c", - "url": "http://www.youjizz.com/videos/zeichentrick-1-2189178.html", - "file": "2189178.flv", - "info_dict": { - "title": "Zeichentrick 1" - } - }, - { - "name": "Vimeo", - "md5": "8879b6cc097e987f02484baf890129e5", - "url": "http://vimeo.com/56015672", - "file": "56015672.mp4", - "info_dict": { - "title": "youtube-dl test video - ★ \" ' 幸 / \\ ä ↭ 𝕐", - "uploader": "Filippo Valsorda", - "uploader_id": "user7108434", - "upload_date": "20121220", - "description": "This is a test case for youtube-dl.\nFor more information, see github.com/rg3/youtube-dl\nTest chars: ★ \" ' 幸 / \\ ä ↭ 𝕐" - } - }, - { - "name": "Soundcloud", - "md5": "ebef0a451b909710ed1d7787dddbf0d7", - "url": "http://soundcloud.com/ethmusic/lostin-powers-she-so-heavy", - "file": "62986583.mp3", - "info_dict": { - "upload_date": "20121011", - "description": "No Downloads untill we record the finished version this weekend, i was too pumped n i had to post it , earl is prolly gonna b hella p.o'd", - "uploader": "E.T. ExTerrestrial Music", - "title": "Lostin Powers - She so Heavy (SneakPreview) Adrian Ackers Blueprint 1" - } - }, - { - "name": "StanfordOpenClassroom", - "md5": "544a9468546059d4e80d76265b0443b8", - "url": "http://openclassroom.stanford.edu/MainFolder/VideoPage.php?course=PracticalUnix&video=intro-environment&speed=100", - "file": "PracticalUnix_intro-environment.mp4", - "info_dict": { - "title": "Intro Environment" - } - }, - { - "name": "XNXX", - "md5": "0831677e2b4761795f68d417e0b7b445", - "url": "http://video.xnxx.com/video1135332/lida_naked_funny_actress_5_", - "file": "1135332.flv", - "info_dict": { - "title": "lida » Naked Funny Actress (5)" - } - }, - { - "name": "Youku", - "url": "http://v.youku.com/v_show/id_XNDgyMDQ2NTQw.html", - "file": "XNDgyMDQ2NTQw_part00.flv", - "md5": "ffe3f2e435663dc2d1eea34faeff5b5b", - "params": { "test": false }, - "info_dict": { - "title": "youtube-dl test video \"'/\\ä↭𝕐" - } - }, - { - "name": "NBA", - "url": "http://www.nba.com/video/games/nets/2012/12/04/0021200253-okc-bkn-recap.nba/index.html", - "file": "0021200253-okc-bkn-recap.nba.mp4", - "md5": "c0edcfc37607344e2ff8f13c378c88a4", - "info_dict": { - "description": "Kevin Durant scores 32 points and dishes out six assists as the Thunder beat the Nets in Brooklyn.", - "title": "Thunder vs. Nets" - } - }, - { - "name": "JustinTV", - "url": "http://www.twitch.tv/thegamedevhub/b/296128360", - "file": "296128360.flv", - "md5": "ecaa8a790c22a40770901460af191c9a", - "info_dict": { - "upload_date": "20110927", - "uploader_id": 25114803, - "uploader": "thegamedevhub", - "title": "Beginner Series - Scripting With Python Pt.1" - } - }, - { - "name": "MyVideo", - "url": "http://www.myvideo.de/watch/8229274/bowling_fail_or_win", - "file": "8229274.flv", - "md5": "2d2753e8130479ba2cb7e0a37002053e", - "info_dict": { - "title": "bowling-fail-or-win" - } - }, - { - "name": "Escapist", - "url": "http://www.escapistmagazine.com/videos/view/the-escapist-presents/6618-Breaking-Down-Baldurs-Gate", - "file": "6618-Breaking-Down-Baldurs-Gate.mp4", - "md5": "c6793dbda81388f4264c1ba18684a74d", - "info_dict": { - "description": "Baldur's Gate: Original, Modded or Enhanced Edition? I'll break down what you can expect from the new Baldur's Gate: Enhanced Edition.", - "uploader": "the-escapist-presents", - "title": "Breaking Down Baldur's Gate" - } - }, - { - "name": "GooglePlus", - "url": "https://plus.google.com/u/0/108897254135232129896/posts/ZButuJc6CtH", - "file": "ZButuJc6CtH.flv", - "info_dict": { - "upload_date": "20120613", - "uploader": "井上ヨシマサ", - "title": "嘆きの天使 降臨" - } - }, - { - "name": "FunnyOrDie", - "url": "http://www.funnyordie.com/videos/0732f586d7/heart-shaped-box-literal-video-version", - "file": "0732f586d7.mp4", - "md5": "f647e9e90064b53b6e046e75d0241fbd", - "info_dict": { - "description": "Lyrics changed to match the video. Spoken cameo by Obscurus Lupa (from ThatGuyWithTheGlasses.com). Based on a concept by Dustin McLean (DustFilms.com). Performed, edited, and written by David A. Scott.", - "title": "Heart-Shaped Box: Literal Video Version" - } - }, - { - "name": "Steam", - "url": "http://store.steampowered.com/video/105600/", - "playlist": [ - { - "file": "81300.flv", - "md5": "f870007cee7065d7c76b88f0a45ecc07", - "info_dict": { - "title": "Terraria 1.1 Trailer" - } - }, - { - "file": "80859.flv", - "md5": "61aaf31a5c5c3041afb58fb83cbb5751", - "info_dict": { - "title": "Terraria Trailer" - } - } - ] - }, - { - "name": "Ustream", - "url": "http://www.ustream.tv/recorded/20274954", - "file": "20274954.flv", - "md5": "088f151799e8f572f84eb62f17d73e5c", - "info_dict": { - "title": "Young Americans for Liberty February 7, 2012 2:28 AM", - "uploader": "Young Americans for Liberty" - } - }, - { - "name": "InfoQ", - "url": "http://www.infoq.com/presentations/A-Few-of-My-Favorite-Python-Things", - "file": "12-jan-pythonthings.mp4", - "info_dict": { - "description": "Mike Pirnat presents some tips and tricks, standard libraries and third party packages that make programming in Python a richer experience.", - "title": "A Few of My Favorite [Python] Things" - }, - "params": { - "skip_download": true - } - }, - { - "name": "ComedyCentral", - "url": "http://www.thedailyshow.com/watch/thu-december-13-2012/kristen-stewart", - "file": "422212.mp4", - "md5": "4e2f5cb088a83cd8cdb7756132f9739d", - "info_dict": { - "upload_date": "20121214", - "description": "Kristen Stewart", - "uploader": "thedailyshow", - "title": "thedailyshow-kristen-stewart part 1" - } - }, - { - "name": "RBMARadio", - "url": "http://www.rbmaradio.com/shows/ford-lopatin-live-at-primavera-sound-2011", - "file": "ford-lopatin-live-at-primavera-sound-2011.mp3", - "md5": "6bc6f9bcb18994b4c983bc3bf4384d95", - "info_dict": { - "title": "Live at Primavera Sound 2011", - "description": "Joel Ford and Daniel \u2019Oneohtrix Point Never\u2019 Lopatin fly their midified pop extravaganza to Spain. Live at Primavera Sound 2011.", - "uploader": "Ford & Lopatin", - "uploader_id": "ford-lopatin", - "location": "Spain" - } - }, - { - "name": "Facebook", - "url": "https://www.facebook.com/photo.php?v=120708114770723", - "file": "120708114770723.mp4", - "md5": "48975a41ccc4b7a581abd68651c1a5a8", - "info_dict": { - "title": "PEOPLE ARE AWESOME 2013", - "duration": 279 - } - }, - { - "name": "EightTracks", - "url": "http://8tracks.com/ytdl/youtube-dl-test-tracks-a", - "playlist": [ - { - "file": "11885610.m4a", - "md5": "96ce57f24389fc8734ce47f4c1abcc55", - "info_dict": { - "title": "youtue-dl project<>\"' - youtube-dl test track 1 \"'/\\\u00e4\u21ad", - "uploader_id": "ytdl" - } - }, - { - "file": "11885608.m4a", - "md5": "4ab26f05c1f7291ea460a3920be8021f", - "info_dict": { - "title": "youtube-dl project - youtube-dl test track 2 \"'/\\\u00e4\u21ad", - "uploader_id": "ytdl" - - } - }, - { - "file": "11885679.m4a", - "md5": "d30b5b5f74217410f4689605c35d1fd7", - "info_dict": { - "title": "youtube-dl project as well - youtube-dl test track 3 \"'/\\\u00e4\u21ad", - "uploader_id": "ytdl" - } - }, - { - "file": "11885680.m4a", - "md5": "4eb0a669317cd725f6bbd336a29f923a", - "info_dict": { - "title": "youtube-dl project as well - youtube-dl test track 4 \"'/\\\u00e4\u21ad", - "uploader_id": "ytdl" - } - }, - { - "file": "11885682.m4a", - "md5": "1893e872e263a2705558d1d319ad19e8", - "info_dict": { - "title": "PH - youtube-dl test track 5 \"'/\\\u00e4\u21ad", - "uploader_id": "ytdl" - } - }, - { - "file": "11885683.m4a", - "md5": "b673c46f47a216ab1741ae8836af5899", - "info_dict": { - "title": "PH - youtube-dl test track 6 \"'/\\\u00e4\u21ad", - "uploader_id": "ytdl" - } - }, - { - "file": "11885684.m4a", - "md5": "1d74534e95df54986da7f5abf7d842b7", - "info_dict": { - "title": "phihag - youtube-dl test track 7 \"'/\\\u00e4\u21ad", - "uploader_id": "ytdl" - } - }, - { - "file": "11885685.m4a", - "md5": "f081f47af8f6ae782ed131d38b9cd1c0", - "info_dict": { - "title": "phihag - youtube-dl test track 8 \"'/\\\u00e4\u21ad", - "uploader_id": "ytdl" - } - } - ] - }, - { - "name": "Keek", - "url": "http://www.keek.com/ytdl/keeks/NODfbab", - "file": "NODfbab.mp4", - "md5": "9b0636f8c0f7614afa4ea5e4c6e57e83", - "info_dict": { - "uploader": "ytdl", - "title": "test chars: \"'/\\ä<>This is a test video for youtube-dl.For more information, contact phihag@phihag.de ." - } - }, - { - "name": "TED", - "url": "http://www.ted.com/talks/dan_dennett_on_our_consciousness.html", - "file": "102.mp4", - "md5": "8cd9dfa41ee000ce658fd48fb5d89a61", - "info_dict": { - "title": "Dan Dennett: The illusion of consciousness", - "description": "md5:c6fa72e6eedbd938c9caf6b2702f5922" - } - }, - { - "name": "MySpass", - "url": "http://www.myspass.de/myspass/shows/tvshows/absolute-mehrheit/Absolute-Mehrheit-vom-17022013-Die-Highlights-Teil-2--/11741/", - "file": "11741.mp4", - "md5": "0b49f4844a068f8b33f4b7c88405862b", - "info_dict": { - "description": "Wer kann in die Fußstapfen von Wolfgang Kubicki treten und die Mehrheit der Zuschauer hinter sich versammeln? Wird vielleicht sogar die Absolute Mehrheit geknackt und der Jackpot von 200.000 Euro mit nach Hause genommen?", - "title": "Absolute Mehrheit vom 17.02.2013 - Die Highlights, Teil 2" - } - }, - { - "name": "Generic", - "url": "http://www.hodiho.fr/2013/02/regis-plante-sa-jeep.html", - "file": "13601338388002.mp4", - "md5": "85b90ccc9d73b4acd9138d3af4c27f89", - "info_dict": { - "uploader": "www.hodiho.fr", - "title": "Régis plante sa Jeep" - } - }, - { - "name": "Spiegel", - "url": "http://www.spiegel.de/video/vulkan-tungurahua-in-ecuador-ist-wieder-aktiv-video-1259285.html", - "file": "1259285.mp4", - "md5": "2c2754212136f35fb4b19767d242f66e", - "info_dict": { - "title": "Vulkanausbruch in Ecuador: Der \"Feuerschlund\" ist wieder aktiv" - } - }, - { - "name": "LiveLeak", - "md5": "0813c2430bea7a46bf13acf3406992f4", - "url": "http://www.liveleak.com/view?i=757_1364311680", - "file": "757_1364311680.mp4", - "info_dict": { - "title": "Most unlucky car accident", - "description": "extremely bad day for this guy..!", - "uploader": "ljfriel2" - } - }, - { - "name": "WorldStarHipHop", - "url": "http://www.worldstarhiphop.com/videos/video.php?v=wshh6a7q1ny0G34ZwuIO", - "file": "wshh6a7q1ny0G34ZwuIO.mp4", - "md5": "9d04de741161603bf7071bbf4e883186", - "info_dict": { - "title": "Video: KO Of The Week: MMA Fighter Gets Knocked Out By Swift Head Kick!" - } - }, - { - "name": "ARD", - "url": "http://www.ardmediathek.de/das-erste/tagesschau-in-100-sek?documentId=14077640", - "file": "14077640.mp4", - "md5": "6ca8824255460c787376353f9e20bbd8", - "info_dict": { - "title": "11.04.2013 09:23 Uhr - Tagesschau in 100 Sekunden" - }, - "skip": "Requires rtmpdump" - }, - { - "name": "Tumblr", - "url": "http://resigno.tumblr.com/post/53364321212/e-de-extrema-importancia-que-esse-video-seja", - "file": "53364321212.mp4", - "md5": "0716d3dd51baf68a28b40fdf1251494e", - "info_dict": { - "title": "Rafael Lemos | Tumblr" - } - }, - { - "name": "SoundcloudSet", - "url":"https://soundcloud.com/the-concept-band/sets/the-royal-concept-ep", - "playlist":[ - { - "file":"30510138.mp3", - "md5":"f9136bf103901728f29e419d2c70f55d", - "info_dict": { - "upload_date": "20111213", - "description": "The Royal Concept from Stockholm\r\nFilip / Povel / David / Magnus\r\nwww.royalconceptband.com", - "uploader": "The Royal Concept", - "title": "D-D-Dance" - } - }, - { - "file":"47127625.mp3", - "md5":"09b6758a018470570f8fd423c9453dd8", - "info_dict": { - "upload_date": "20120521", - "description": "The Royal Concept from Stockholm\r\nFilip / Povel / David / Magnus\r\nwww.royalconceptband.com", - "uploader": "The Royal Concept", - "title": "The Royal Concept - Gimme Twice" - } - }, - { - "file":"47127627.mp3", - "md5":"154abd4e418cea19c3b901f1e1306d9c", - "info_dict": { - "upload_date": "20120521", - "uploader": "The Royal Concept", - "title": "Goldrushed" - } - }, - { - "file":"47127629.mp3", - "md5":"2f5471edc79ad3f33a683153e96a79c1", - "info_dict": { - "upload_date": "20120521", - "description": "The Royal Concept from Stockholm\r\nFilip / Povel / David / Magnus\r\nwww.royalconceptband.com", - "uploader": "The Royal Concept", - "title": "In the End" - } - }, - { - "file":"47127631.mp3", - "md5":"f9ba87aa940af7213f98949254f1c6e2", - "info_dict": { - "upload_date": "20120521", - "description": "The Royal Concept from Stockholm\r\nFilip / David / Povel / Magnus\r\nwww.theroyalconceptband.com", - "uploader": "The Royal Concept", - "title": "Knocked Up" - } - }, - { - "file":"75206121.mp3", - "md5":"f9d1fe9406717e302980c30de4af9353", - "info_dict": { - "upload_date": "20130116", - "description": "The unreleased track World on Fire premiered on the CW's hit show Arrow (8pm/7pm central). \r\nAs a gift to our fans we would like to offer you a free download of the track! ", - "uploader": "The Royal Concept", - "title": "World On Fire" - } - } - ] - }, - { - "name":"Bandcamp", - "url":"http://youtube-dl.bandcamp.com/track/youtube-dl-test-song", - "file":"1812978515.mp3", - "md5":"cdeb30cdae1921719a3cbcab696ef53c", - "info_dict": { - "title":"youtube-dl test song \"'/\\ä↭" - }, - "skip": "There is a limit of 200 free downloads / month for the test song" - }, - { - "name": "RedTube", - "url": "http://www.redtube.com/66418", - "file": "66418.mp4", - "md5": "7b8c22b5e7098a3e1c09709df1126d2d", - "info_dict":{ - "title":"Sucked on a toilet" - } - }, - { - "name": "Photobucket", - "url": "http://media.photobucket.com/user/rachaneronas/media/TiredofLinkBuildingTryBacklinkMyDomaincom_zpsc0c3b9fa.mp4.html?filters[term]=search&filters[primary]=videos&filters[secondary]=images&sort=1&o=0", - "file": "zpsc0c3b9fa.mp4", - "md5": "7dabfb92b0a31f6c16cebc0f8e60ff99", - "info_dict": { - "upload_date": "20130504", - "uploader": "rachaneronas", - "title": "Tired of Link Building? Try BacklinkMyDomain.com!" - } - }, - { - "name": "Ina", - "url": "www.ina.fr/video/I12055569/francois-hollande-je-crois-que-c-est-clair-video.html", - "file": "I12055569.mp4", - "md5": "a667021bf2b41f8dc6049479d9bb38a3", - "info_dict":{ - "title":"François Hollande \"Je crois que c'est clair\"" - } - }, - { - "name": "Yahoo", - "url": "http://screen.yahoo.com/julian-smith-travis-legg-watch-214727115.html", - "file": "214727115.flv", - "md5": "2e717f169c1be93d84d3794a00d4a325", - "info_dict": { - "title": "Julian Smith & Travis Legg Watch Julian Smith" - }, - "skip": "Requires rtmpdump" - }, - { - "name": "Howcast", - "url": "http://www.howcast.com/videos/390161-How-to-Tie-a-Square-Knot-Properly", - "file": "390161.mp4", - "md5": "1d7ba54e2c9d7dc6935ef39e00529138", - "info_dict":{ - "title":"How to Tie a Square Knot Properly", - "description":"The square knot, also known as the reef knot, is one of the oldest, most basic knots to tie, and can be used in many different ways. Here's the proper way to tie a square knot." - } - }, - { - "name": "Vine", - "url": "https://vine.co/v/b9KOOWX7HUx", - "file": "b9KOOWX7HUx.mp4", - "md5": "2f36fed6235b16da96ce9b4dc890940d", - "info_dict":{ - "title": "Chicken.", - "uploader": "Jack Dorsey" - } - }, - { - "name": "Flickr", - "url": "http://www.flickr.com/photos/forestwander-nature-pictures/5645318632/in/photostream/", - "file": "5645318632.mp4", - "md5": "6fdc01adbc89d72fc9c4f15b4a4ba87b", - "info_dict":{ - "title": "Dark Hollow Waterfalls", - "uploader_id": "forestwander-nature-pictures", - "description": "Waterfalls in the Springtime at Dark Hollow Waterfalls. These are located just off of Skyline Drive in Virginia. They are only about 6/10 of a mile hike but it is a pretty steep hill and a good climb back up." - } - }, - { - "name": "Teamcoco", - "url": "http://teamcoco.com/video/louis-ck-interview-george-w-bush", - "file": "19705.mp4", - "md5": "27b6f7527da5acf534b15f21b032656e", - "info_dict":{ - "title": "Louis C.K. Interview Pt. 1 11/3/11", - "description": "Louis C.K. got starstruck by George W. Bush, so what? Part one." - } - }, - { - "name": "XHamster", - "url": "http://xhamster.com/movies/1509445/femaleagent_shy_beauty_takes_the_bait.html", - "file": "1509445.flv", - "md5": "9f48e0e8d58e3076bb236ff412ab62fa", - "info_dict": { - "upload_date": "20121014", - "uploader_id": "Ruseful2011", - "title": "FemaleAgent Shy beauty takes the bait" - } - }, - { - "name": "Hypem", - "url": "http://hypem.com/track/1v6ga/BODYWORK+-+TAME", - "file": "1v6ga.mp3", - "md5": "b9cc91b5af8995e9f0c1cee04c575828", - "info_dict":{ - "title":"Tame" - } - }, - { - "name": "Vbox7", - "url": "http://vbox7.com/play:249bb972c2", - "file": "249bb972c2.flv", - "md5": "9c70d6d956f888bdc08c124acc120cfe", - "info_dict":{ - "title":"Смях! Чудо - чист за секунди - Скрита камера" - } - }, - { - "name": "Gametrailers", - "url": "http://www.gametrailers.com/videos/zbvr8i/mirror-s-edge-2-e3-2013--debut-trailer", - "file": "zbvr8i.flv", - "md5": "c3edbc995ab4081976e16779bd96a878", - "info_dict": { - "title": "E3 2013: Debut Trailer" - }, - "skip": "Requires rtmpdump" - }, - { - "name": "Statigram", - "url": "http://statigr.am/p/484091715184808010_284179915", - "file": "484091715184808010_284179915.mp4", - "md5": "deda4ff333abe2e118740321e992605b", - "info_dict": { - "uploader_id": "videoseconds", - "title": "Instagram photo by @videoseconds (Videos)" - } - }, - { - "name": "Break", - "url": "http://www.break.com/video/when-girls-act-like-guys-2468056", - "file": "2468056.mp4", - "md5": "a3513fb1547fba4fb6cfac1bffc6c46b", - "info_dict": { - "title": "When Girls Act Like D-Bags" - } - }, - { - "name": "Vevo", - "url": "http://www.vevo.com/watch/hurts/somebody-to-die-for/GB1101300280", - "file": "GB1101300280.mp4", - "md5": "06bea460acb744eab74a9d7dcb4bfd61", - "info_dict": { - "title": "Somebody To Die For", - "upload_date": "20130624", - "uploader": "Hurts" - } - }, - { - "name": "Tudou", - "url": "http://www.tudou.com/listplay/zzdE77v6Mmo/2xN2duXMxmw.html", - "file": "159447792.f4v", - "md5": "ad7c358a01541e926a1e413612c6b10a", - "info_dict": { - "title": "卡马乔国足开大脚长传冲吊集锦" - } - } -] diff --git a/youtube_dl/FileDownloader.py b/youtube_dl/FileDownloader.py index 57d8b139f..f748df428 100644 --- a/youtube_dl/FileDownloader.py +++ b/youtube_dl/FileDownloader.py @@ -137,7 +137,7 @@ class FileDownloader(object): self.ydl.report_warning(*args, **kargs) def report_error(self, *args, **kargs): - self.ydl.error(*args, **kargs) + self.ydl.report_error(*args, **kargs) def slow_down(self, start_time, byte_counter): """Sleep if the download speed is over the rate limit.""" diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index e9a24a95a..d3281fed2 100644 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -13,7 +13,7 @@ import time import traceback from .utils import * -from .extractor import get_info_extractor +from .extractor import get_info_extractor, gen_extractors from .FileDownloader import FileDownloader @@ -75,7 +75,7 @@ class YoutubeDL(object): writeautomaticsub: Write the automatic subtitles to a file allsubtitles: Downloads all the subtitles of the video listsubtitles: Lists all available subtitles for the video - subtitlesformat: Subtitle format [sbv/srt] (default=srt) + subtitlesformat: Subtitle format [srt/sbv/vtt] (default=srt) subtitleslang: Language of the subtitles to download keepvideo: Keep the video file after post-processing daterange: A DateRange object, download only if the upload_date is in the range. @@ -113,6 +113,13 @@ class YoutubeDL(object): self._ies.append(ie) ie.set_downloader(self) + def add_default_info_extractors(self): + """ + Add the InfoExtractors returned by gen_extractors to the end of the list + """ + for ie in gen_extractors(): + self.add_info_extractor(ie) + def add_post_processor(self, pp): """Add a PostProcessor object to the end of the chain.""" self._pps.append(pp) diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 3fd70a750..77432a9a6 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -26,7 +26,8 @@ __authors__ = ( 'Julien Fraichard', 'Johny Mo Swag', 'Axel Noack', - ) + 'Albert Kim', +) __license__ = 'Public Domain' @@ -205,7 +206,7 @@ def parseOpts(overrideArguments=None): help='lists all available subtitles for the video (currently youtube only)', default=False) video_format.add_option('--sub-format', action='store', dest='subtitlesformat', metavar='FORMAT', - help='subtitle format [srt/sbv] (default=srt) (currently youtube only)', default='srt') + help='subtitle format [srt/sbv/vtt] (default=srt) (currently youtube only)', default='srt') video_format.add_option('--sub-lang', '--srt-lang', action='store', dest='subtitleslang', metavar='LANG', help='language of the subtitles to download (optional) use IETF language tags like \'en\'') @@ -576,8 +577,7 @@ def _real_main(argv=None): ydl.to_screen(u'[debug] Python version %s - %s' %(platform.python_version(), platform.platform())) ydl.to_screen(u'[debug] Proxy map: ' + str(proxy_handler.proxies)) - for extractor in extractors: - ydl.add_info_extractor(extractor) + ydl.add_default_info_extractors() # PostProcessors if opts.extractaudio: diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 0ea990860..98733e394 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -1,11 +1,13 @@ from .ard import ARDIE from .arte import ArteTvIE +from .auengine import AUEngineIE from .bandcamp import BandcampIE from .bliptv import BlipTVIE, BlipTVUserIE from .breakcom import BreakIE from .collegehumor import CollegeHumorIE from .comedycentral import ComedyCentralIE +from .cspan import CSpanIE from .dailymotion import DailymotionIE from .depositfiles import DepositFilesIE from .eighttracks import EightTracksIE @@ -13,10 +15,12 @@ from .escapist import EscapistIE from .facebook import FacebookIE from .flickr import FlickrIE from .funnyordie import FunnyOrDieIE +from .gamespot import GameSpotIE from .gametrailers import GametrailersIE from .generic import GenericIE from .googleplus import GooglePlusIE from .googlesearch import GoogleSearchIE +from .hotnewhiphop import HotNewHipHopIE from .howcast import HowcastIE from .hypem import HypemIE from .ina import InaIE @@ -35,6 +39,7 @@ from .photobucket import PhotobucketIE from .pornotube import PornotubeIE from .rbmaradio import RBMARadioIE from .redtube import RedTubeIE +from .ringtv import RingTVIE from .soundcloud import SoundcloudIE, SoundcloudSetIE from .spiegel import SpiegelIE from .stanfordoc import StanfordOpenClassroomIE @@ -42,13 +47,17 @@ from .statigram import StatigramIE from .steam import SteamIE from .teamcoco import TeamcocoIE from .ted import TEDIE +from .tf1 import TF1IE from .tudou import TudouIE from .tumblr import TumblrIE +from .tutv import TutvIE from .ustream import UstreamIE from .vbox7 import Vbox7IE from .vevo import VevoIE from .vimeo import VimeoIE from .vine import VineIE +from .wat import WatIE +from .wimp import WimpIE from .worldstarhiphop import WorldStarHipHopIE from .xhamster import XHamsterIE from .xnxx import XNXXIE @@ -61,79 +70,18 @@ from .youtube import YoutubeIE, YoutubePlaylistIE, YoutubeSearchIE, YoutubeUserI from .zdf import ZDFIE +_ALL_CLASSES = [ + klass + for name, klass in globals().items() + if name.endswith('IE') and name != 'GenericIE' +] +_ALL_CLASSES.append(GenericIE) + def gen_extractors(): """ Return a list of an instance of every supported extractor. The order does matter; the first extractor matched is the one handling the URL. """ - return [ - YoutubePlaylistIE(), - YoutubeChannelIE(), - YoutubeUserIE(), - YoutubeSearchIE(), - YoutubeIE(), - MetacafeIE(), - DailymotionIE(), - GoogleSearchIE(), - PhotobucketIE(), - YahooIE(), - YahooSearchIE(), - DepositFilesIE(), - FacebookIE(), - BlipTVIE(), - BlipTVUserIE(), - VimeoIE(), - MyVideoIE(), - ComedyCentralIE(), - EscapistIE(), - CollegeHumorIE(), - XVideosIE(), - SoundcloudSetIE(), - SoundcloudIE(), - InfoQIE(), - MixcloudIE(), - StanfordOpenClassroomIE(), - MTVIE(), - YoukuIE(), - XNXXIE(), - YouJizzIE(), - PornotubeIE(), - YouPornIE(), - GooglePlusIE(), - ArteTvIE(), - NBAIE(), - WorldStarHipHopIE(), - JustinTVIE(), - FunnyOrDieIE(), - SteamIE(), - UstreamIE(), - RBMARadioIE(), - EightTracksIE(), - KeekIE(), - TEDIE(), - MySpassIE(), - SpiegelIE(), - LiveLeakIE(), - ARDIE(), - ZDFIE(), - TumblrIE(), - BandcampIE(), - RedTubeIE(), - InaIE(), - HowcastIE(), - VineIE(), - FlickrIE(), - TeamcocoIE(), - XHamsterIE(), - HypemIE(), - Vbox7IE(), - GametrailersIE(), - StatigramIE(), - BreakIE(), - VevoIE(), - JukeboxIE(), - TudouIE(), - GenericIE() - ] + return [klass() for klass in _ALL_CLASSES] def get_info_extractor(ie_name): """Returns the info extractor class with the given ie_name""" diff --git a/youtube_dl/extractor/ard.py b/youtube_dl/extractor/ard.py index e1ecdf4d3..5793a4129 100644 --- a/youtube_dl/extractor/ard.py +++ b/youtube_dl/extractor/ard.py @@ -9,6 +9,15 @@ class ARDIE(InfoExtractor): _VALID_URL = r'^(?:https?://)?(?:(?:www\.)?ardmediathek\.de|mediathek\.daserste\.de)/(?:.*/)(?P[^/\?]+)(?:\?.*)?' _TITLE = r'(?P.*)</h1>' _MEDIA_STREAM = r'mediaCollection\.addMediaStream\((?P<media_type>\d+), (?P<quality>\d+), "(?P<rtmp_url>[^"]*)", "(?P<video_url>[^"]*)", "[^"]*"\)' + _TEST = { + u'url': u'http://www.ardmediathek.de/das-erste/tagesschau-in-100-sek?documentId=14077640', + u'file': u'14077640.mp4', + u'md5': u'6ca8824255460c787376353f9e20bbd8', + u'info_dict': { + u"title": u"11.04.2013 09:23 Uhr - Tagesschau in 100 Sekunden" + }, + u'skip': u'Requires rtmpdump' + } def _real_extract(self, url): # determine video id from url diff --git a/youtube_dl/extractor/arte.py b/youtube_dl/extractor/arte.py index 82e3ffe04..b061b9566 100644 --- a/youtube_dl/extractor/arte.py +++ b/youtube_dl/extractor/arte.py @@ -1,53 +1,21 @@ import re -import socket +import json from .common import InfoExtractor from ..utils import ( - compat_http_client, - compat_str, - compat_urllib_error, + # This is used by the not implemented extractLiveStream method compat_urllib_parse, - compat_urllib_request, ExtractorError, unified_strdate, ) class ArteTvIE(InfoExtractor): - """arte.tv information extractor.""" - - _VALID_URL = r'(?:http://)?videos\.arte\.tv/(?:fr|de)/videos/.*' + _VALID_URL = r'(?:http://)?www\.arte.tv/guide/(?:fr|de)/(?:(?:sendungen|emissions)/)?(?P<id>.*?)/(?P<name>.*?)(\?.*)?' _LIVE_URL = r'index-[0-9]+\.html$' IE_NAME = u'arte.tv' - def fetch_webpage(self, url): - request = compat_urllib_request.Request(url) - try: - self.report_download_webpage(url) - webpage = compat_urllib_request.urlopen(request).read() - except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: - raise ExtractorError(u'Unable to retrieve video webpage: %s' % compat_str(err)) - except ValueError as err: - raise ExtractorError(u'Invalid URL: %s' % url) - return webpage - - def grep_webpage(self, url, regex, regexFlags, matchTuples): - page = self.fetch_webpage(url) - mobj = re.search(regex, page, regexFlags) - info = {} - - if mobj is None: - raise ExtractorError(u'Invalid URL: %s' % url) - - for (i, key, err) in matchTuples: - if mobj.group(i) is None: - raise ExtractorError(err) - else: - info[key] = mobj.group(i) - - return info - # TODO implement Live Stream # def extractLiveStream(self, url): # video_lang = url.split('/')[-4] @@ -75,62 +43,44 @@ class ArteTvIE(InfoExtractor): # ) # video_url = u'%s/%s' % (info.get('url'), info.get('path')) - def extractPlus7Stream(self, url): - video_lang = url.split('/')[-3] - info = self.grep_webpage( - url, - r'param name="movie".*?videorefFileUrl=(http[^\'"&]*)', - 0, - [ - (1, 'url', u'Invalid URL: %s' % url) - ] - ) - next_url = compat_urllib_parse.unquote(info.get('url')) - info = self.grep_webpage( - next_url, - r'<video lang="%s" ref="(http[^\'"&]*)' % video_lang, - 0, - [ - (1, 'url', u'Could not find <video> tag: %s' % url) - ] - ) - next_url = compat_urllib_parse.unquote(info.get('url')) - - info = self.grep_webpage( - next_url, - r'<video id="(.*?)".*?>.*?' + - '<name>(.*?)</name>.*?' + - '<dateVideo>(.*?)</dateVideo>.*?' + - '<url quality="hd">(.*?)</url>', - re.DOTALL, - [ - (1, 'id', u'could not extract video id: %s' % url), - (2, 'title', u'could not extract video title: %s' % url), - (3, 'date', u'could not extract video date: %s' % url), - (4, 'url', u'could not extract video url: %s' % url) - ] - ) - - return { - 'id': info.get('id'), - 'url': compat_urllib_parse.unquote(info.get('url')), - 'uploader': u'arte.tv', - 'upload_date': unified_strdate(info.get('date')), - 'title': info.get('title').decode('utf-8'), - 'ext': u'mp4', - 'format': u'NA', - 'player_url': None, - } - def _real_extract(self, url): - video_id = url.split('/')[-1] - self.report_extraction(video_id) + mobj = re.match(self._VALID_URL, url) + name = mobj.group('name') + # This is not a real id, it can be for example AJT for the news + # http://www.arte.tv/guide/fr/emissions/AJT/arte-journal + video_id = mobj.group('id') if re.search(self._LIVE_URL, video_id) is not None: raise ExtractorError(u'Arte live streams are not yet supported, sorry') # self.extractLiveStream(url) # return - else: - info = self.extractPlus7Stream(url) - return [info] + webpage = self._download_webpage(url, video_id) + json_url = self._html_search_regex(r'arte_vp_url="(.*?)"', webpage, 'json url') + + json_info = self._download_webpage(json_url, video_id, 'Downloading info json') + self.report_extraction(video_id) + info = json.loads(json_info) + player_info = info['videoJsonPlayer'] + + info_dict = {'id': player_info['VID'], + 'title': player_info['VTI'], + 'description': player_info['VDE'], + 'upload_date': unified_strdate(player_info['VDA'].split(' ')[0]), + 'thumbnail': player_info['programImage'], + } + + formats = player_info['VSR'].values() + # We order the formats by quality + formats = sorted(formats, key=lambda f: int(f['height'])) + # Pick the best quality + format_info = formats[-1] + if format_info['mediaType'] == u'rtmp': + info_dict['url'] = format_info['streamer'] + info_dict['play_path'] = 'mp4:' + format_info['url'] + info_dict['ext'] = 'mp4' + else: + info_dict['url'] = format_info['url'] + info_dict['ext'] = 'mp4' + + return info_dict diff --git a/youtube_dl/extractor/auengine.py b/youtube_dl/extractor/auengine.py new file mode 100644 index 000000000..3b4ade3bf --- /dev/null +++ b/youtube_dl/extractor/auengine.py @@ -0,0 +1,38 @@ +import os.path +import re + +from .common import InfoExtractor +from ..utils import ( + compat_urllib_parse, + compat_urllib_parse_urlparse, +) + +class AUEngineIE(InfoExtractor): + _VALID_URL = r'(?:http://)?(?:www\.)?auengine\.com/embed.php\?.*?file=([^&]+).*?' + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group(1) + webpage = self._download_webpage(url, video_id) + title = self._html_search_regex(r'<title>(?P<title>.+?)', + webpage, u'title') + title = title.strip() + links = re.findall(r'[^A-Za-z0-9]?(?:file|url):\s*["\'](http[^\'"&]*)', webpage) + links = [compat_urllib_parse.unquote(l) for l in links] + for link in links: + root, pathext = os.path.splitext(compat_urllib_parse_urlparse(link).path) + if pathext == '.png': + thumbnail = link + elif pathext == '.mp4': + url = link + ext = pathext + if ext == title[-len(ext):]: + title = title[:-len(ext)] + ext = ext[1:] + return [{ + 'id': video_id, + 'url': url, + 'ext': ext, + 'title': title, + 'thumbnail': thumbnail, + }] diff --git a/youtube_dl/extractor/bandcamp.py b/youtube_dl/extractor/bandcamp.py index dcf6721ee..129a20f44 100644 --- a/youtube_dl/extractor/bandcamp.py +++ b/youtube_dl/extractor/bandcamp.py @@ -9,6 +9,15 @@ from ..utils import ( class BandcampIE(InfoExtractor): _VALID_URL = r'http://.*?\.bandcamp\.com/track/(?P.*)' + _TEST = { + u'url': u'http://youtube-dl.bandcamp.com/track/youtube-dl-test-song', + u'file': u'1812978515.mp3', + u'md5': u'cdeb30cdae1921719a3cbcab696ef53c', + u'info_dict': { + u"title": u"youtube-dl test song \"'/\\\u00e4\u21ad" + }, + u'skip': u'There is a limit of 200 free downloads / month for the test song' + } def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) diff --git a/youtube_dl/extractor/bliptv.py b/youtube_dl/extractor/bliptv.py index df2ad4be2..37141e6a0 100644 --- a/youtube_dl/extractor/bliptv.py +++ b/youtube_dl/extractor/bliptv.py @@ -24,6 +24,17 @@ class BlipTVIE(InfoExtractor): _VALID_URL = r'^(?:https?://)?(?:\w+\.)?blip\.tv/((.+/)|(play/)|(api\.swf#))(.+)$' _URL_EXT = r'^.*\.([a-z0-9]+)$' IE_NAME = u'blip.tv' + _TEST = { + u'url': u'http://blip.tv/cbr/cbr-exclusive-gotham-city-imposters-bats-vs-jokerz-short-3-5796352', + u'file': u'5779306.m4v', + u'md5': u'b2d849efcf7ee18917e4b4d9ff37cafe', + u'info_dict': { + u"upload_date": u"20111205", + u"description": u"md5:9bc31f227219cde65e47eeec8d2dc596", + u"uploader": u"Comic Book Resources - CBR TV", + u"title": u"CBR EXCLUSIVE: \"Gotham City Imposters\" Bats VS Jokerz Short 3" + } + } def report_direct_download(self, title): """Report information extraction.""" diff --git a/youtube_dl/extractor/breakcom.py b/youtube_dl/extractor/breakcom.py index 1f6620d91..34f555e89 100644 --- a/youtube_dl/extractor/breakcom.py +++ b/youtube_dl/extractor/breakcom.py @@ -5,6 +5,14 @@ from .common import InfoExtractor class BreakIE(InfoExtractor): _VALID_URL = r'(?:http://)?(?:www\.)?break\.com/video/([^/]+)' + _TEST = { + u'url': u'http://www.break.com/video/when-girls-act-like-guys-2468056', + u'file': u'2468056.mp4', + u'md5': u'a3513fb1547fba4fb6cfac1bffc6c46b', + u'info_dict': { + u"title": u"When Girls Act Like D-Bags" + } + } def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) diff --git a/youtube_dl/extractor/comedycentral.py b/youtube_dl/extractor/comedycentral.py index 1bb359046..d9337f8a1 100644 --- a/youtube_dl/extractor/comedycentral.py +++ b/youtube_dl/extractor/comedycentral.py @@ -27,6 +27,17 @@ class ComedyCentralIE(InfoExtractor): (the-colbert-report-(videos|collections)/(?P<clipID>[0-9]+)/[^/]*/(?P<cntitle>.*?)) |(watch/(?P<date>[^/]*)/(?P<tdstitle>.*))))) $""" + _TEST = { + u'url': u'http://www.thedailyshow.com/watch/thu-december-13-2012/kristen-stewart', + u'file': u'422212.mp4', + u'md5': u'4e2f5cb088a83cd8cdb7756132f9739d', + u'info_dict': { + u"upload_date": u"20121214", + u"description": u"Kristen Stewart", + u"uploader": u"thedailyshow", + u"title": u"thedailyshow-kristen-stewart part 1" + } + } _available_formats = ['3500', '2200', '1700', '1200', '750', '400'] @@ -172,7 +183,7 @@ class ComedyCentralIE(InfoExtractor): 'ext': 'mp4', 'format': format, 'thumbnail': None, - 'description': officialTitle, + 'description': compat_str(officialTitle), } results.append(info) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 64d63e109..5c6fd7945 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -44,6 +44,7 @@ class InfoExtractor(object): location: Physical location of the video. player_url: SWF Player URL (used for rtmpdump). subtitles: The subtitle file contents. + view_count: How many users have watched the video on the platform. urlhandle: [internal] The urlHandle to be used to download the file, like returned by urllib.request.urlopen diff --git a/youtube_dl/extractor/cspan.py b/youtube_dl/extractor/cspan.py new file mode 100644 index 000000000..a4853279b --- /dev/null +++ b/youtube_dl/extractor/cspan.py @@ -0,0 +1,53 @@ +import re + +from .common import InfoExtractor +from ..utils import ( + compat_urllib_parse, +) + +class CSpanIE(InfoExtractor): + _VALID_URL = r'http://www.c-spanvideo.org/program/(.*)' + _TEST = { + u'url': u'http://www.c-spanvideo.org/program/HolderonV', + u'file': u'315139.flv', + u'md5': u'74a623266956f69e4df0068ab6c80fe4', + u'info_dict': { + u"title": u"Attorney General Eric Holder on Voting Rights Act Decision" + }, + u'skip': u'Requires rtmpdump' + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + prog_name = mobj.group(1) + webpage = self._download_webpage(url, prog_name) + video_id = self._search_regex(r'programid=(.*?)&', webpage, 'video id') + data = compat_urllib_parse.urlencode({'programid': video_id, + 'dynamic':'1'}) + info_url = 'http://www.c-spanvideo.org/common/services/flashXml.php?' + data + video_info = self._download_webpage(info_url, video_id, u'Downloading video info') + + self.report_extraction(video_id) + + title = self._html_search_regex(r'<string name="title">(.*?)</string>', + video_info, 'title') + description = self._html_search_regex(r'<meta (?:property="og:|name=")description" content="(.*?)"', + webpage, 'description', + flags=re.MULTILINE|re.DOTALL) + thumbnail = self._html_search_regex(r'<meta property="og:image" content="(.*?)"', + webpage, 'thumbnail') + + url = self._search_regex(r'<string name="URL">(.*?)</string>', + video_info, 'video url') + url = url.replace('$(protocol)', 'rtmp').replace('$(port)', '443') + path = self._search_regex(r'<string name="path">(.*?)</string>', + video_info, 'rtmp play path') + + return {'id': video_id, + 'title': title, + 'ext': 'flv', + 'url': url, + 'play_path': path, + 'description': description, + 'thumbnail': thumbnail, + } diff --git a/youtube_dl/extractor/dailymotion.py b/youtube_dl/extractor/dailymotion.py index 34306b073..3297a8549 100644 --- a/youtube_dl/extractor/dailymotion.py +++ b/youtube_dl/extractor/dailymotion.py @@ -14,6 +14,15 @@ class DailymotionIE(InfoExtractor): _VALID_URL = r'(?i)(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/video/([^/]+)' IE_NAME = u'dailymotion' + _TEST = { + u'url': u'http://www.dailymotion.com/video/x33vw9_tutoriel-de-youtubeur-dl-des-video_tech', + u'file': u'x33vw9.mp4', + u'md5': u'392c4b85a60a90dc4792da41ce3144eb', + u'info_dict': { + u"uploader": u"Alex and Van .", + u"title": u"Tutoriel de Youtubeur\"DL DES VIDEO DE YOUTUBE\"" + } + } def _real_extract(self, url): # Extract id and simplified title from URL diff --git a/youtube_dl/extractor/eighttracks.py b/youtube_dl/extractor/eighttracks.py index c3d4343f4..cced06811 100644 --- a/youtube_dl/extractor/eighttracks.py +++ b/youtube_dl/extractor/eighttracks.py @@ -12,6 +12,77 @@ from ..utils import ( class EightTracksIE(InfoExtractor): IE_NAME = '8tracks' _VALID_URL = r'https?://8tracks.com/(?P<user>[^/]+)/(?P<id>[^/#]+)(?:#.*)?$' + _TEST = { + u"name": u"EightTracks", + u"url": u"http://8tracks.com/ytdl/youtube-dl-test-tracks-a", + u"playlist": [ + { + u"file": u"11885610.m4a", + u"md5": u"96ce57f24389fc8734ce47f4c1abcc55", + u"info_dict": { + u"title": u"youtue-dl project<>\"' - youtube-dl test track 1 \"'/\\\u00e4\u21ad", + u"uploader_id": u"ytdl" + } + }, + { + u"file": u"11885608.m4a", + u"md5": u"4ab26f05c1f7291ea460a3920be8021f", + u"info_dict": { + u"title": u"youtube-dl project - youtube-dl test track 2 \"'/\\\u00e4\u21ad", + u"uploader_id": u"ytdl" + } + }, + { + u"file": u"11885679.m4a", + u"md5": u"d30b5b5f74217410f4689605c35d1fd7", + u"info_dict": { + u"title": u"youtube-dl project as well - youtube-dl test track 3 \"'/\\\u00e4\u21ad", + u"uploader_id": u"ytdl" + } + }, + { + u"file": u"11885680.m4a", + u"md5": u"4eb0a669317cd725f6bbd336a29f923a", + u"info_dict": { + u"title": u"youtube-dl project as well - youtube-dl test track 4 \"'/\\\u00e4\u21ad", + u"uploader_id": u"ytdl" + } + }, + { + u"file": u"11885682.m4a", + u"md5": u"1893e872e263a2705558d1d319ad19e8", + u"info_dict": { + u"title": u"PH - youtube-dl test track 5 \"'/\\\u00e4\u21ad", + u"uploader_id": u"ytdl" + } + }, + { + u"file": u"11885683.m4a", + u"md5": u"b673c46f47a216ab1741ae8836af5899", + u"info_dict": { + u"title": u"PH - youtube-dl test track 6 \"'/\\\u00e4\u21ad", + u"uploader_id": u"ytdl" + } + }, + { + u"file": u"11885684.m4a", + u"md5": u"1d74534e95df54986da7f5abf7d842b7", + u"info_dict": { + u"title": u"phihag - youtube-dl test track 7 \"'/\\\u00e4\u21ad", + u"uploader_id": u"ytdl" + } + }, + { + u"file": u"11885685.m4a", + u"md5": u"f081f47af8f6ae782ed131d38b9cd1c0", + u"info_dict": { + u"title": u"phihag - youtube-dl test track 8 \"'/\\\u00e4\u21ad", + u"uploader_id": u"ytdl" + } + } + ] + } + def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) diff --git a/youtube_dl/extractor/escapist.py b/youtube_dl/extractor/escapist.py index 86b145bca..794460e84 100644 --- a/youtube_dl/extractor/escapist.py +++ b/youtube_dl/extractor/escapist.py @@ -12,6 +12,16 @@ from ..utils import ( class EscapistIE(InfoExtractor): _VALID_URL = r'^(https?://)?(www\.)?escapistmagazine\.com/videos/view/(?P<showname>[^/]+)/(?P<episode>[^/?]+)[/?]?.*$' + _TEST = { + u'url': u'http://www.escapistmagazine.com/videos/view/the-escapist-presents/6618-Breaking-Down-Baldurs-Gate', + u'file': u'6618-Breaking-Down-Baldurs-Gate.mp4', + u'md5': u'c6793dbda81388f4264c1ba18684a74d', + u'info_dict': { + u"description": u"Baldur's Gate: Original, Modded or Enhanced Edition? I'll break down what you can expect from the new Baldur's Gate: Enhanced Edition.", + u"uploader": u"the-escapist-presents", + u"title": u"Breaking Down Baldur's Gate" + } + } def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) diff --git a/youtube_dl/extractor/facebook.py b/youtube_dl/extractor/facebook.py index c694f9adb..beaa5b4bd 100644 --- a/youtube_dl/extractor/facebook.py +++ b/youtube_dl/extractor/facebook.py @@ -22,6 +22,15 @@ class FacebookIE(InfoExtractor): _LOGIN_URL = 'https://login.facebook.com/login.php?m&next=http%3A%2F%2Fm.facebook.com%2Fhome.php&' _NETRC_MACHINE = 'facebook' IE_NAME = u'facebook' + _TEST = { + u'url': u'https://www.facebook.com/photo.php?v=120708114770723', + u'file': u'120708114770723.mp4', + u'md5': u'48975a41ccc4b7a581abd68651c1a5a8', + u'info_dict': { + u"duration": 279, + u"title": u"PEOPLE ARE AWESOME 2013" + } + } def report_login(self): """Report attempt to log in.""" diff --git a/youtube_dl/extractor/flickr.py b/youtube_dl/extractor/flickr.py index 791d5b61d..bd97bff9a 100644 --- a/youtube_dl/extractor/flickr.py +++ b/youtube_dl/extractor/flickr.py @@ -10,6 +10,16 @@ from ..utils import ( class FlickrIE(InfoExtractor): """Information Extractor for Flickr videos""" _VALID_URL = r'(?:https?://)?(?:www\.)?flickr\.com/photos/(?P<uploader_id>[\w\-_@]+)/(?P<id>\d+).*' + _TEST = { + u'url': u'http://www.flickr.com/photos/forestwander-nature-pictures/5645318632/in/photostream/', + u'file': u'5645318632.mp4', + u'md5': u'6fdc01adbc89d72fc9c4f15b4a4ba87b', + u'info_dict': { + u"description": u"Waterfalls in the Springtime at Dark Hollow Waterfalls. These are located just off of Skyline Drive in Virginia. They are only about 6/10 of a mile hike but it is a pretty steep hill and a good climb back up.", + u"uploader_id": u"forestwander-nature-pictures", + u"title": u"Dark Hollow Waterfalls" + } + } def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) diff --git a/youtube_dl/extractor/funnyordie.py b/youtube_dl/extractor/funnyordie.py index 3045978f1..388aacf2f 100644 --- a/youtube_dl/extractor/funnyordie.py +++ b/youtube_dl/extractor/funnyordie.py @@ -5,6 +5,15 @@ from .common import InfoExtractor class FunnyOrDieIE(InfoExtractor): _VALID_URL = r'^(?:https?://)?(?:www\.)?funnyordie\.com/videos/(?P<id>[0-9a-f]+)/.*$' + _TEST = { + u'url': u'http://www.funnyordie.com/videos/0732f586d7/heart-shaped-box-literal-video-version', + u'file': u'0732f586d7.mp4', + u'md5': u'f647e9e90064b53b6e046e75d0241fbd', + u'info_dict': { + u"description": u"Lyrics changed to match the video. Spoken cameo by Obscurus Lupa (from ThatGuyWithTheGlasses.com). Based on a concept by Dustin McLean (DustFilms.com). Performed, edited, and written by David A. Scott.", + u"title": u"Heart-Shaped Box: Literal Video Version" + } + } def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) diff --git a/youtube_dl/extractor/gamespot.py b/youtube_dl/extractor/gamespot.py new file mode 100644 index 000000000..cec3b7ac8 --- /dev/null +++ b/youtube_dl/extractor/gamespot.py @@ -0,0 +1,45 @@ +import re +import xml.etree.ElementTree + +from .common import InfoExtractor +from ..utils import ( + unified_strdate, +) + +class GameSpotIE(InfoExtractor): + _VALID_URL = r'(?:http://)?(?:www\.)?gamespot\.com/([^/]+)/videos/([^/]+)-([^/d]+)/' + _TEST = { + u"url": u"http://www.gamespot.com/arma-iii/videos/arma-iii-community-guide-sitrep-i-6410818/", + u"file": u"6410818.mp4", + u"md5": u"5569d64ca98db01f0177c934fe8c1e9b", + u"info_dict": { + u"title": u"Arma III - Community Guide: SITREP I", + u"upload_date": u"20130627", + } + } + + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group(3).split("-")[-1] + info_url = "http://www.gamespot.com/pages/video_player/xml.php?id="+str(video_id) + info_xml = self._download_webpage(info_url, video_id) + doc = xml.etree.ElementTree.fromstring(info_xml) + clip_el = doc.find('./playList/clip') + + video_url = clip_el.find('./URI').text + title = clip_el.find('./title').text + ext = video_url.rpartition('.')[2] + thumbnail_url = clip_el.find('./screenGrabURI').text + view_count = int(clip_el.find('./views').text) + upload_date = unified_strdate(clip_el.find('./postDate').text) + + return [{ + 'id' : video_id, + 'url' : video_url, + 'ext' : ext, + 'title' : title, + 'thumbnail' : thumbnail_url, + 'upload_date' : upload_date, + 'view_count' : view_count, + }] diff --git a/youtube_dl/extractor/gametrailers.py b/youtube_dl/extractor/gametrailers.py index 33e59e82c..3ce93b492 100644 --- a/youtube_dl/extractor/gametrailers.py +++ b/youtube_dl/extractor/gametrailers.py @@ -9,6 +9,15 @@ from ..utils import ( class GametrailersIE(InfoExtractor): _VALID_URL = r'http://www.gametrailers.com/(?P<type>videos|reviews|full-episodes)/(?P<id>.*?)/(?P<title>.*)' + _TEST = { + u'url': u'http://www.gametrailers.com/videos/zbvr8i/mirror-s-edge-2-e3-2013--debut-trailer', + u'file': u'zbvr8i.flv', + u'md5': u'c3edbc995ab4081976e16779bd96a878', + u'info_dict': { + u"title": u"E3 2013: Debut Trailer" + }, + u'skip': u'Requires rtmpdump' + } def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 7a877b3bc..019bbe6e9 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -15,6 +15,15 @@ class GenericIE(InfoExtractor): _VALID_URL = r'.*' IE_NAME = u'generic' + _TEST = { + u'url': u'http://www.hodiho.fr/2013/02/regis-plante-sa-jeep.html', + u'file': u'13601338388002.mp4', + u'md5': u'85b90ccc9d73b4acd9138d3af4c27f89', + u'info_dict': { + u"uploader": u"www.hodiho.fr", + u"title": u"R\u00e9gis plante sa Jeep" + } + } def report_download_webpage(self, video_id): """Report webpage download.""" @@ -102,7 +111,7 @@ class GenericIE(InfoExtractor): mobj = re.search(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage) if mobj is None: # Broaden the search a little bit: JWPlayer JS loader - mobj = re.search(r'[^A-Za-z0-9]?file:\s*["\'](http[^\'"&]*)', webpage) + mobj = re.search(r'[^A-Za-z0-9]?file["\']?:\s*["\'](http[^\'"&]*)', webpage) if mobj is None: # Try to find twitter cards info mobj = re.search(r'<meta (?:property|name)="twitter:player:stream" (?:content|value)="(.+?)"', webpage) @@ -135,7 +144,7 @@ class GenericIE(InfoExtractor): # Video Title - Tagline | Site Name # and so on and so forth; it's just not practical video_title = self._html_search_regex(r'<title>(.*)', - webpage, u'video title') + webpage, u'video title', default=u'video') # video uploader is domain name video_uploader = self._search_regex(r'(?:https?://)?([^/]*)/.*', diff --git a/youtube_dl/extractor/googleplus.py b/youtube_dl/extractor/googleplus.py index ff2cdeebb..a8f171afd 100644 --- a/youtube_dl/extractor/googleplus.py +++ b/youtube_dl/extractor/googleplus.py @@ -1,3 +1,5 @@ +# coding: utf-8 + import datetime import re @@ -12,6 +14,15 @@ class GooglePlusIE(InfoExtractor): _VALID_URL = r'(?:https://)?plus\.google\.com/(?:[^/]+/)*?posts/(\w+)' IE_NAME = u'plus.google' + _TEST = { + u"url": u"https://plus.google.com/u/0/108897254135232129896/posts/ZButuJc6CtH", + u"file": u"ZButuJc6CtH.flv", + u"info_dict": { + u"upload_date": u"20120613", + u"uploader": u"井上ヨシマサ", + u"title": u"嘆きの天使 降臨" + } + } def _real_extract(self, url): # Extract id from URL diff --git a/youtube_dl/extractor/hotnewhiphop.py b/youtube_dl/extractor/hotnewhiphop.py new file mode 100644 index 000000000..ca3abb7d7 --- /dev/null +++ b/youtube_dl/extractor/hotnewhiphop.py @@ -0,0 +1,48 @@ +import re +import base64 + +from .common import InfoExtractor + + +class HotNewHipHopIE(InfoExtractor): + _VALID_URL = r'http://www\.hotnewhiphop.com/.*\.(?P.*)\.html' + _TEST = { + u'url': u"http://www.hotnewhiphop.com/freddie-gibbs-lay-it-down-song.1435540.html'", + u'file': u'1435540.mp3', + u'md5': u'2c2cd2f76ef11a9b3b581e8b232f3d96', + u'info_dict': { + u"title": u"Freddie Gibbs Songs - Lay It Down" + } + } + + def _real_extract(self, url): + m = re.match(self._VALID_URL, url) + video_id = m.group('id') + + webpage_src = self._download_webpage(url, video_id) + + video_url_base64 = self._search_regex(r'data-path="(.*?)"', + webpage_src, u'video URL', fatal=False) + + if video_url_base64 == None: + video_url = self._search_regex(r'"contentUrl" content="(.*?)"', webpage_src, + u'video URL') + return self.url_result(video_url, ie='Youtube') + + video_url = base64.b64decode(video_url_base64).decode('utf-8') + + video_title = self._html_search_regex(r"(.*)", + webpage_src, u'title') + + # Getting thumbnail and if not thumbnail sets correct title for WSHH candy video. + thumbnail = self._html_search_regex(r'"og:image" content="(.*)"', + webpage_src, u'thumbnail', fatal=False) + + results = [{ + 'id': video_id, + 'url' : video_url, + 'title' : video_title, + 'thumbnail' : thumbnail, + 'ext' : 'mp3', + }] + return results \ No newline at end of file diff --git a/youtube_dl/extractor/howcast.py b/youtube_dl/extractor/howcast.py index 7b94f85ad..6104c4b5e 100644 --- a/youtube_dl/extractor/howcast.py +++ b/youtube_dl/extractor/howcast.py @@ -5,6 +5,15 @@ from .common import InfoExtractor class HowcastIE(InfoExtractor): _VALID_URL = r'(?:https?://)?(?:www\.)?howcast\.com/videos/(?P\d+)' + _TEST = { + u'url': u'http://www.howcast.com/videos/390161-How-to-Tie-a-Square-Knot-Properly', + u'file': u'390161.mp4', + u'md5': u'1d7ba54e2c9d7dc6935ef39e00529138', + u'info_dict': { + u"description": u"The square knot, also known as the reef knot, is one of the oldest, most basic knots to tie, and can be used in many different ways. Here's the proper way to tie a square knot.", + u"title": u"How to Tie a Square Knot Properly" + } + } def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) diff --git a/youtube_dl/extractor/hypem.py b/youtube_dl/extractor/hypem.py index ceec4f616..ab2b59103 100644 --- a/youtube_dl/extractor/hypem.py +++ b/youtube_dl/extractor/hypem.py @@ -15,6 +15,14 @@ from ..utils import ( class HypemIE(InfoExtractor): """Information Extractor for hypem""" _VALID_URL = r'(?:http://)?(?:www\.)?hypem\.com/track/([^/]+)/([^/]+)' + _TEST = { + u'url': u'http://hypem.com/track/1v6ga/BODYWORK+-+TAME', + u'file': u'1v6ga.mp3', + u'md5': u'b9cc91b5af8995e9f0c1cee04c575828', + u'info_dict': { + u"title": u"Tame" + } + } def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) diff --git a/youtube_dl/extractor/ina.py b/youtube_dl/extractor/ina.py index c19b95659..962c59214 100644 --- a/youtube_dl/extractor/ina.py +++ b/youtube_dl/extractor/ina.py @@ -6,6 +6,14 @@ from .common import InfoExtractor class InaIE(InfoExtractor): """Information Extractor for Ina.fr""" _VALID_URL = r'(?:http://)?(?:www\.)?ina\.fr/video/(?PI[0-9]+)/.*' + _TEST = { + u'url': u'www.ina.fr/video/I12055569/francois-hollande-je-crois-que-c-est-clair-video.html', + u'file': u'I12055569.mp4', + u'md5': u'a667021bf2b41f8dc6049479d9bb38a3', + u'info_dict': { + u"title": u"Fran\u00e7ois Hollande \"Je crois que c'est clair\"" + } + } def _real_extract(self,url): mobj = re.match(self._VALID_URL, url) diff --git a/youtube_dl/extractor/infoq.py b/youtube_dl/extractor/infoq.py index 905674282..c79c589c7 100644 --- a/youtube_dl/extractor/infoq.py +++ b/youtube_dl/extractor/infoq.py @@ -11,6 +11,18 @@ from ..utils import ( class InfoQIE(InfoExtractor): _VALID_URL = r'^(?:https?://)?(?:www\.)?infoq\.com/[^/]+/[^/]+$' + _TEST = { + u"name": u"InfoQ", + u"url": u"http://www.infoq.com/presentations/A-Few-of-My-Favorite-Python-Things", + u"file": u"12-jan-pythonthings.mp4", + u"info_dict": { + u"description": u"Mike Pirnat presents some tips and tricks, standard libraries and third party packages that make programming in Python a richer experience.", + u"title": u"A Few of My Favorite [Python] Things" + }, + u"params": { + u"skip_download": True + } + } def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) diff --git a/youtube_dl/extractor/justintv.py b/youtube_dl/extractor/justintv.py index b2006e334..f60017992 100644 --- a/youtube_dl/extractor/justintv.py +++ b/youtube_dl/extractor/justintv.py @@ -26,6 +26,17 @@ class JustinTVIE(InfoExtractor): """ _JUSTIN_PAGE_LIMIT = 100 IE_NAME = u'justin.tv' + _TEST = { + u'url': u'http://www.twitch.tv/thegamedevhub/b/296128360', + u'file': u'296128360.flv', + u'md5': u'ecaa8a790c22a40770901460af191c9a', + u'info_dict': { + u"upload_date": u"20110927", + u"uploader_id": 25114803, + u"uploader": u"thegamedevhub", + u"title": u"Beginner Series - Scripting With Python Pt.1" + } + } def report_download_page(self, channel, offset): """Report attempt to download a single page of videos.""" diff --git a/youtube_dl/extractor/keek.py b/youtube_dl/extractor/keek.py index e2093a0be..72ad6a3d0 100644 --- a/youtube_dl/extractor/keek.py +++ b/youtube_dl/extractor/keek.py @@ -6,6 +6,15 @@ from .common import InfoExtractor class KeekIE(InfoExtractor): _VALID_URL = r'http://(?:www\.)?keek\.com/(?:!|\w+/keeks/)(?P\w+)' IE_NAME = u'keek' + _TEST = { + u'url': u'http://www.keek.com/ytdl/keeks/NODfbab', + u'file': u'NODfbab.mp4', + u'md5': u'9b0636f8c0f7614afa4ea5e4c6e57e83', + u'info_dict': { + u"uploader": u"ytdl", + u"title": u"test chars: \"'/\\\u00e4<>This is a test video for youtube-dl.For more information, contact phihag@phihag.de ." + } + } def _real_extract(self, url): m = re.match(self._VALID_URL, url) diff --git a/youtube_dl/extractor/liveleak.py b/youtube_dl/extractor/liveleak.py index d4b142ea0..cf8a2c931 100644 --- a/youtube_dl/extractor/liveleak.py +++ b/youtube_dl/extractor/liveleak.py @@ -10,6 +10,16 @@ class LiveLeakIE(InfoExtractor): _VALID_URL = r'^(?:http?://)?(?:\w+\.)?liveleak\.com/view\?(?:.*?)i=(?P[\w_]+)(?:.*)' IE_NAME = u'liveleak' + _TEST = { + u'url': u'http://www.liveleak.com/view?i=757_1364311680', + u'file': u'757_1364311680.mp4', + u'md5': u'0813c2430bea7a46bf13acf3406992f4', + u'info_dict': { + u"description": u"extremely bad day for this guy..!", + u"uploader": u"ljfriel2", + u"title": u"Most unlucky car accident" + } + } def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) diff --git a/youtube_dl/extractor/metacafe.py b/youtube_dl/extractor/metacafe.py index 66d6554fe..4c3f81b98 100644 --- a/youtube_dl/extractor/metacafe.py +++ b/youtube_dl/extractor/metacafe.py @@ -20,6 +20,19 @@ class MetacafeIE(InfoExtractor): _DISCLAIMER = 'http://www.metacafe.com/family_filter/' _FILTER_POST = 'http://www.metacafe.com/f/index.php?inputType=filter&controllerGroup=user' IE_NAME = u'metacafe' + _TEST = { + u"add_ie": ["Youtube"], + u"url": u"http://metacafe.com/watch/yt-_aUehQsCQtM/the_electric_company_short_i_pbs_kids_go/", + u"file": u"_aUehQsCQtM.flv", + u"info_dict": { + u"upload_date": u"20090102", + u"title": u"The Electric Company | \"Short I\" | PBS KIDS GO!", + u"description": u"md5:2439a8ef6d5a70e380c22f5ad323e5a8", + u"uploader": u"PBS", + u"uploader_id": u"PBS" + } + } + def report_disclaimer(self): """Report disclaimer retrieval.""" diff --git a/youtube_dl/extractor/myspass.py b/youtube_dl/extractor/myspass.py index 7b016bb86..107665d15 100644 --- a/youtube_dl/extractor/myspass.py +++ b/youtube_dl/extractor/myspass.py @@ -11,6 +11,15 @@ from ..utils import ( class MySpassIE(InfoExtractor): _VALID_URL = r'http://www.myspass.de/.*' + _TEST = { + u'url': u'http://www.myspass.de/myspass/shows/tvshows/absolute-mehrheit/Absolute-Mehrheit-vom-17022013-Die-Highlights-Teil-2--/11741/', + u'file': u'11741.mp4', + u'md5': u'0b49f4844a068f8b33f4b7c88405862b', + u'info_dict': { + u"description": u"Wer kann in die Fu\u00dfstapfen von Wolfgang Kubicki treten und die Mehrheit der Zuschauer hinter sich versammeln? Wird vielleicht sogar die Absolute Mehrheit geknackt und der Jackpot von 200.000 Euro mit nach Hause genommen?", + u"title": u"Absolute Mehrheit vom 17.02.2013 - Die Highlights, Teil 2" + } + } def _real_extract(self, url): META_DATA_URL_TEMPLATE = 'http://www.myspass.de/myspass/includes/apps/video/getvideometadataxml.php?id=%s' diff --git a/youtube_dl/extractor/myvideo.py b/youtube_dl/extractor/myvideo.py index 47a44e3e0..b2a7b1df0 100644 --- a/youtube_dl/extractor/myvideo.py +++ b/youtube_dl/extractor/myvideo.py @@ -18,6 +18,14 @@ class MyVideoIE(InfoExtractor): _VALID_URL = r'(?:http://)?(?:www\.)?myvideo\.de/watch/([0-9]+)/([^?/]+).*' IE_NAME = u'myvideo' + _TEST = { + u'url': u'http://www.myvideo.de/watch/8229274/bowling_fail_or_win', + u'file': u'8229274.flv', + u'md5': u'2d2753e8130479ba2cb7e0a37002053e', + u'info_dict': { + u"title": u"bowling-fail-or-win" + } + } # Original Code from: https://github.com/dersphere/plugin.video.myvideo_de.git # Released into the Public Domain by Tristan Fischer on 2013-05-19 diff --git a/youtube_dl/extractor/nba.py b/youtube_dl/extractor/nba.py index 296d4cd36..122b7dd26 100644 --- a/youtube_dl/extractor/nba.py +++ b/youtube_dl/extractor/nba.py @@ -8,6 +8,15 @@ from ..utils import ( class NBAIE(InfoExtractor): _VALID_URL = r'^(?:https?://)?(?:watch\.|www\.)?nba\.com/(?:nba/)?video(/[^?]*?)(?:/index\.html)?(?:\?.*)?$' + _TEST = { + u'url': u'http://www.nba.com/video/games/nets/2012/12/04/0021200253-okc-bkn-recap.nba/index.html', + u'file': u'0021200253-okc-bkn-recap.nba.mp4', + u'md5': u'c0edcfc37607344e2ff8f13c378c88a4', + u'info_dict': { + u"description": u"Kevin Durant scores 32 points and dishes out six assists as the Thunder beat the Nets in Brooklyn.", + u"title": u"Thunder vs. Nets" + } + } def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) diff --git a/youtube_dl/extractor/photobucket.py b/youtube_dl/extractor/photobucket.py index cd7fe6f52..305b79773 100644 --- a/youtube_dl/extractor/photobucket.py +++ b/youtube_dl/extractor/photobucket.py @@ -16,6 +16,16 @@ class PhotobucketIE(InfoExtractor): # Check if it's necessary to keep the old extracion process _VALID_URL = r'(?:http://)?(?:[a-z0-9]+\.)?photobucket\.com/.*(([\?\&]current=)|_)(?P.*)\.(?P(flv)|(mp4))' IE_NAME = u'photobucket' + _TEST = { + u'url': u'http://media.photobucket.com/user/rachaneronas/media/TiredofLinkBuildingTryBacklinkMyDomaincom_zpsc0c3b9fa.mp4.html?filters[term]=search&filters[primary]=videos&filters[secondary]=images&sort=1&o=0', + u'file': u'zpsc0c3b9fa.mp4', + u'md5': u'7dabfb92b0a31f6c16cebc0f8e60ff99', + u'info_dict': { + u"upload_date": u"20130504", + u"uploader": u"rachaneronas", + u"title": u"Tired of Link Building? Try BacklinkMyDomain.com!" + } + } def _real_extract(self, url): # Extract id from URL diff --git a/youtube_dl/extractor/pornotube.py b/youtube_dl/extractor/pornotube.py index 0adb40db0..add76a11e 100644 --- a/youtube_dl/extractor/pornotube.py +++ b/youtube_dl/extractor/pornotube.py @@ -10,6 +10,15 @@ from ..utils import ( class PornotubeIE(InfoExtractor): _VALID_URL = r'^(?:https?://)?(?:\w+\.)?pornotube\.com(/c/(?P[0-9]+))?(/m/(?P[0-9]+))(/(?P.+))$' + _TEST = { + u'url': u'http://pornotube.com/c/173/m/1689755/Marilyn-Monroe-Bathing', + u'file': u'1689755.flv', + u'md5': u'374dd6dcedd24234453b295209aa69b6', + u'info_dict': { + u"upload_date": u"20090708", + u"title": u"Marilyn-Monroe-Bathing" + } + } def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) diff --git a/youtube_dl/extractor/rbmaradio.py b/youtube_dl/extractor/rbmaradio.py index 0c75eee2a..4b6147a73 100644 --- a/youtube_dl/extractor/rbmaradio.py +++ b/youtube_dl/extractor/rbmaradio.py @@ -11,6 +11,18 @@ from ..utils import ( class RBMARadioIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?rbmaradio\.com/shows/(?P<videoID>[^/]+)$' + _TEST = { + u'url': u'http://www.rbmaradio.com/shows/ford-lopatin-live-at-primavera-sound-2011', + u'file': u'ford-lopatin-live-at-primavera-sound-2011.mp3', + u'md5': u'6bc6f9bcb18994b4c983bc3bf4384d95', + u'info_dict': { + u"uploader_id": u"ford-lopatin", + u"location": u"Spain", + u"description": u"Joel Ford and Daniel \u2019Oneohtrix Point Never\u2019 Lopatin fly their midified pop extravaganza to Spain. Live at Primavera Sound 2011.", + u"uploader": u"Ford & Lopatin", + u"title": u"Live at Primavera Sound 2011" + } + } def _real_extract(self, url): m = re.match(self._VALID_URL, url) diff --git a/youtube_dl/extractor/redtube.py b/youtube_dl/extractor/redtube.py index ebc4e2326..1d2cf1f56 100644 --- a/youtube_dl/extractor/redtube.py +++ b/youtube_dl/extractor/redtube.py @@ -5,6 +5,14 @@ from .common import InfoExtractor class RedTubeIE(InfoExtractor): _VALID_URL = r'(?:http://)?(?:www\.)?redtube\.com/(?P<id>[0-9]+)' + _TEST = { + u'url': u'http://www.redtube.com/66418', + u'file': u'66418.mp4', + u'md5': u'7b8c22b5e7098a3e1c09709df1126d2d', + u'info_dict': { + u"title": u"Sucked on a toilet" + } + } def _real_extract(self,url): mobj = re.match(self._VALID_URL, url) diff --git a/youtube_dl/extractor/ringtv.py b/youtube_dl/extractor/ringtv.py new file mode 100644 index 000000000..1b08c3167 --- /dev/null +++ b/youtube_dl/extractor/ringtv.py @@ -0,0 +1,37 @@ +import re + +from .common import InfoExtractor + + +class RingTVIE(InfoExtractor): + _VALID_URL = r'(?:http://)?(?:www\.)?ringtv\.craveonline\.com/videos/video/([^/]+)' + _TEST = { + u"url": u"http://ringtv.craveonline.com/videos/video/746619-canelo-alvarez-talks-about-mayweather-showdown", + u"file": u"746619.mp4", + u"md5": u"7c46b4057d22de32e0a539f017e64ad3", + u"info_dict": { + u"title": u"Canelo Alvarez talks about Mayweather showdown", + u"description": u"Saul \\\"Canelo\\\" Alvarez spoke to the media about his Sept. 14 showdown with Floyd Mayweather after their kick-off presser in NYC. Canelo is motivated and confident that he will have the speed and gameplan to beat the pound-for-pound king." + } + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group(1).split('-')[0] + webpage = self._download_webpage(url, video_id) + title = self._search_regex(r'<title>(.+?)', + webpage, 'video title').replace(' | RingTV','') + description = self._search_regex(r'
(.+?)
', + webpage, 'Description') + final_url = "http://ringtv.craveonline.springboardplatform.com/storage/ringtv.craveonline.com/conversion/%s.mp4" %(str(video_id)) + thumbnail_url = "http://ringtv.craveonline.springboardplatform.com/storage/ringtv.craveonline.com/snapshots/%s.jpg" %(str(video_id)) + ext = final_url.split('.')[-1] + return [{ + 'id' : video_id, + 'url' : final_url, + 'ext' : ext, + 'title' : title, + 'thumbnail' : thumbnail_url, + 'description' : description, + }] + diff --git a/youtube_dl/extractor/soundcloud.py b/youtube_dl/extractor/soundcloud.py index 80d7e1b54..d47c49c03 100644 --- a/youtube_dl/extractor/soundcloud.py +++ b/youtube_dl/extractor/soundcloud.py @@ -19,8 +19,19 @@ class SoundcloudIE(InfoExtractor): of the stream token and uid """ - _VALID_URL = r'^(?:https?://)?(?:www\.)?soundcloud\.com/([\w\d-]+)/([\w\d-]+)' + _VALID_URL = r'^(?:https?://)?(?:www\.)?soundcloud\.com/([\w\d-]+)/([\w\d-]+)(?:[?].*)?$' IE_NAME = u'soundcloud' + _TEST = { + u'url': u'http://soundcloud.com/ethmusic/lostin-powers-she-so-heavy', + u'file': u'62986583.mp3', + u'md5': u'ebef0a451b909710ed1d7787dddbf0d7', + u'info_dict': { + u"upload_date": u"20121011", + u"description": u"No Downloads untill we record the finished version this weekend, i was too pumped n i had to post it , earl is prolly gonna b hella p.o'd", + u"uploader": u"E.T. ExTerrestrial Music", + u"title": u"Lostin Powers - She so Heavy (SneakPreview) Adrian Ackers Blueprint 1" + } + } def report_resolve(self, video_id): """Report information extraction.""" @@ -75,8 +86,72 @@ class SoundcloudSetIE(InfoExtractor): of the stream token and uid """ - _VALID_URL = r'^(?:https?://)?(?:www\.)?soundcloud\.com/([\w\d-]+)/sets/([\w\d-]+)' + _VALID_URL = r'^(?:https?://)?(?:www\.)?soundcloud\.com/([\w\d-]+)/sets/([\w\d-]+)(?:[?].*)?$' IE_NAME = u'soundcloud:set' + _TEST = { + u"url":"https://soundcloud.com/the-concept-band/sets/the-royal-concept-ep", + u"playlist": [ + { + u"file":"30510138.mp3", + u"md5":"f9136bf103901728f29e419d2c70f55d", + u"info_dict": { + u"upload_date": u"20111213", + u"description": u"The Royal Concept from Stockholm\r\nFilip / Povel / David / Magnus\r\nwww.royalconceptband.com", + u"uploader": u"The Royal Concept", + u"title": u"D-D-Dance" + } + }, + { + u"file":"47127625.mp3", + u"md5":"09b6758a018470570f8fd423c9453dd8", + u"info_dict": { + u"upload_date": u"20120521", + u"description": u"The Royal Concept from Stockholm\r\nFilip / Povel / David / Magnus\r\nwww.royalconceptband.com", + u"uploader": u"The Royal Concept", + u"title": u"The Royal Concept - Gimme Twice" + } + }, + { + u"file":"47127627.mp3", + u"md5":"154abd4e418cea19c3b901f1e1306d9c", + u"info_dict": { + u"upload_date": u"20120521", + u"uploader": u"The Royal Concept", + u"title": u"Goldrushed" + } + }, + { + u"file":"47127629.mp3", + u"md5":"2f5471edc79ad3f33a683153e96a79c1", + u"info_dict": { + u"upload_date": u"20120521", + u"description": u"The Royal Concept from Stockholm\r\nFilip / Povel / David / Magnus\r\nwww.royalconceptband.com", + u"uploader": u"The Royal Concept", + u"title": u"In the End" + } + }, + { + u"file":"47127631.mp3", + u"md5":"f9ba87aa940af7213f98949254f1c6e2", + u"info_dict": { + u"upload_date": u"20120521", + u"description": u"The Royal Concept from Stockholm\r\nFilip / David / Povel / Magnus\r\nwww.theroyalconceptband.com", + u"uploader": u"The Royal Concept", + u"title": u"Knocked Up" + } + }, + { + u"file":"75206121.mp3", + u"md5":"f9d1fe9406717e302980c30de4af9353", + u"info_dict": { + u"upload_date": u"20130116", + u"description": u"The unreleased track World on Fire premiered on the CW's hit show Arrow (8pm/7pm central). \r\nAs a gift to our fans we would like to offer you a free download of the track! ", + u"uploader": u"The Royal Concept", + u"title": u"World On Fire" + } + } + ] + } def report_resolve(self, video_id): """Report information extraction.""" diff --git a/youtube_dl/extractor/spiegel.py b/youtube_dl/extractor/spiegel.py index 98a65b78b..13c86401c 100644 --- a/youtube_dl/extractor/spiegel.py +++ b/youtube_dl/extractor/spiegel.py @@ -6,6 +6,14 @@ from .common import InfoExtractor class SpiegelIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?spiegel\.de/video/[^/]*-(?P[0-9]+)(?:\.html)?(?:#.*)?$' + _TEST = { + u'url': u'http://www.spiegel.de/video/vulkan-tungurahua-in-ecuador-ist-wieder-aktiv-video-1259285.html', + u'file': u'1259285.mp4', + u'md5': u'2c2754212136f35fb4b19767d242f66e', + u'info_dict': { + u"title": u"Vulkanausbruch in Ecuador: Der \"Feuerschlund\" ist wieder aktiv" + } + } def _real_extract(self, url): m = re.match(self._VALID_URL, url) diff --git a/youtube_dl/extractor/stanfordoc.py b/youtube_dl/extractor/stanfordoc.py index 8d3e32ab9..25a0d09f7 100644 --- a/youtube_dl/extractor/stanfordoc.py +++ b/youtube_dl/extractor/stanfordoc.py @@ -20,6 +20,14 @@ class StanfordOpenClassroomIE(InfoExtractor): _VALID_URL = r'^(?:https?://)?openclassroom.stanford.edu(?P/?|(/MainFolder/(?:HomePage|CoursePage|VideoPage)\.php([?]course=(?P[^&]+)(&video=(?P