Merge branch 'master' into use-other-downloaders

This commit is contained in:
Rogério Brito 2013-06-30 12:15:17 -03:00
commit 0f58da6725
78 changed files with 1402 additions and 947 deletions

View File

@ -116,12 +116,14 @@ which means you can modify it, redistribute it or use it however you like.
-F, --list-formats list all available formats (currently youtube -F, --list-formats list all available formats (currently youtube
only) only)
--write-sub write subtitle file (currently youtube only) --write-sub write subtitle file (currently youtube only)
--write-auto-sub write automatic subtitle file (currently youtube
only)
--only-sub [deprecated] alias of --skip-download --only-sub [deprecated] alias of --skip-download
--all-subs downloads all the available subtitles of the --all-subs downloads all the available subtitles of the
video (currently youtube only) video (currently youtube only)
--list-subs lists all available subtitles for the video --list-subs lists all available subtitles for the video
(currently youtube only) (currently youtube only)
--sub-format FORMAT subtitle format [srt/sbv] (default=srt) --sub-format FORMAT subtitle format [srt/sbv/vtt] (default=srt)
(currently youtube only) (currently youtube only)
--sub-lang LANG language of the subtitles to download (optional) --sub-lang LANG language of the subtitles to download (optional)
use IETF language tags like 'en' use IETF language tags like 'en'
@ -166,7 +168,7 @@ The `-o` option allows users to indicate a template for the output file names. T
- `playlist`: The name or the id of the playlist that contains the video. - `playlist`: The name or the id of the playlist that contains the video.
- `playlist_index`: The index of the video in the playlist, a five-digit number. - `playlist_index`: The index of the video in the playlist, a five-digit number.
The current default template is `%(id)s.%(ext)s`, but that will be switchted to `%(title)s-%(id)s.%(ext)s` (which can be requested with `-t` at the moment). The current default template is `%(title)s-%(id)s.%(ext)s`.
In some cases, you don't want special characters such as 中, spaces, or &, such as when transferring the downloaded filename to a Windows system or the filename through an 8bit-unsafe channel. In these cases, add the `--restrict-filenames` flag to get a shorter title: In some cases, you don't want special characters such as 中, spaces, or &, such as when transferring the downloaded filename to a Windows system or the filename through an 8bit-unsafe channel. In these cases, add the `--restrict-filenames` flag to get a shorter title:

View File

@ -0,0 +1,83 @@
#!/usr/bin/env python
# Generate youtube signature algorithm from test cases
import sys
tests = [
# 88
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[]}|:;?/>.<",
"J:|}][{=+-_)(*&;%$#@>MNBVCXZASDFGH^KLPOIUYTREWQ0987654321mnbvcxzasdfghrklpoiuytej"),
# 87
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$^&*()_-+={[]}|:;?/>.<",
"!?;:|}][{=+-_)(*&^$#@/MNBVCXZASqFGHJKLPOIUYTREWQ0987654321mnbvcxzasdfghjklpoiuytr"),
# 86 - vfl_ymO4Z 2013/06/27
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[|};?/>.<",
"ertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!/#$%^&*()_-+={[|};?@"),
# 85
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?/>.<",
"{>/?;}[.=+-_)(*&^%$#@!MqBVCXZASDFwHJKLPOIUYTREWQ0987654321mnbvcxzasdfghjklpoiuytr"),
# 84
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?>.<",
"<.>?;}[{=+-_)(*&^%$#@!MNBVCXZASDFGHJKLPOIUYTREWe098765432rmnbvcxzasdfghjklpoiuyt1"),
# 83
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!#$%^&*()_+={[};?/>.<",
"D.>/?;}[{=+_)(*&^%$#!MNBVCXeAS<FGHJKLPOIUYTREWZ0987654321mnbvcxzasdfghjklpoiuytrQ"),
# 82
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKHGFDSAZXCVBNM!@#$%^&*(-+={[};?/>.<",
"Q>/?;}[{=+-(*<^%$#@!MNBVCXZASDFGHKLPOIUY8REWT0q&7654321mnbvcxzasdfghjklpoiuytrew9"),
]
def find_matching(wrong, right):
idxs = [wrong.index(c) for c in right]
return compress(idxs)
return ('s[%d]' % i for i in idxs)
def compress(idxs):
def _genslice(start, end, step):
starts = '' if start == 0 else str(start)
ends = ':%d' % (end+step)
steps = '' if step == 1 else (':%d' % step)
return 's[%s%s%s]' % (starts, ends, steps)
step = None
for i, prev in zip(idxs[1:], idxs[:-1]):
if step is not None:
if i - prev == step:
continue
yield _genslice(start, prev, step)
step = None
continue
if i - prev in [-1, 1]:
step = i - prev
start = prev
continue
else:
yield 's[%d]' % prev
if step is None:
yield 's[%d]' % i
else:
yield _genslice(start, i, step)
def _assert_compress(inp, exp):
res = list(compress(inp))
if res != exp:
print('Got %r, expected %r' % (res, exp))
assert res == exp
_assert_compress([0,2,4,6], ['s[0]', 's[2]', 's[4]', 's[6]'])
_assert_compress([0,1,2,4,6,7], ['s[:3]', 's[4]', 's[6:8]'])
_assert_compress([8,0,1,2,4,7,6,9], ['s[8]', 's[:3]', 's[4]', 's[7:5:-1]', 's[9]'])
def gen(wrong, right, indent):
code = ' + '.join(find_matching(wrong, right))
return 'if len(s) == %d:\n%s return %s\n' % (len(wrong), indent, code)
def genall(tests):
indent = ' ' * 8
return indent + (indent + 'el').join(gen(wrong, right, indent) for wrong,right in tests)
def main():
print(genall(tests))
if __name__ == '__main__':
main()

44
test/helper.py Normal file
View File

@ -0,0 +1,44 @@
import io
import json
import os.path
import youtube_dl.extractor
from youtube_dl import YoutubeDL, YoutubeDLHandler
from youtube_dl.utils import (
compat_cookiejar,
compat_urllib_request,
)
# General configuration (from __init__, not very elegant...)
jar = compat_cookiejar.CookieJar()
cookie_processor = compat_urllib_request.HTTPCookieProcessor(jar)
proxy_handler = compat_urllib_request.ProxyHandler()
opener = compat_urllib_request.build_opener(proxy_handler, cookie_processor, YoutubeDLHandler())
compat_urllib_request.install_opener(opener)
PARAMETERS_FILE = os.path.join(os.path.dirname(os.path.abspath(__file__)), "parameters.json")
with io.open(PARAMETERS_FILE, encoding='utf-8') as pf:
parameters = json.load(pf)
class FakeYDL(YoutubeDL):
def __init__(self):
self.result = []
# Different instances of the downloader can't share the same dictionary
# some test set the "sublang" parameter, which would break the md5 checks.
self.params = dict(parameters)
def to_screen(self, s):
print(s)
def trouble(self, s, tb=None):
raise Exception(s)
def download(self, x):
self.result.append(x)
def get_testcases():
for ie in youtube_dl.extractor.gen_extractors():
t = getattr(ie, '_TEST', None)
if t:
t['name'] = type(ie).__name__[:-len('IE')]
yield t
for t in getattr(ie, '_TESTS', []):
t['name'] = type(ie).__name__[:-len('IE')]
yield t

View File

@ -7,7 +7,8 @@ import unittest
import os import os
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from youtube_dl.extractor import YoutubeIE, YoutubePlaylistIE, YoutubeChannelIE, JustinTVIE from youtube_dl.extractor import YoutubeIE, YoutubePlaylistIE, YoutubeChannelIE, JustinTVIE, gen_extractors
from helper import get_testcases
class TestAllURLsMatching(unittest.TestCase): class TestAllURLsMatching(unittest.TestCase):
def test_youtube_playlist_matching(self): def test_youtube_playlist_matching(self):
@ -50,5 +51,16 @@ class TestAllURLsMatching(unittest.TestCase):
self.assertEqual(YoutubeIE()._extract_id('https://www.youtube.com/watch?&v=BaW_jenozKc'), 'BaW_jenozKc') self.assertEqual(YoutubeIE()._extract_id('https://www.youtube.com/watch?&v=BaW_jenozKc'), 'BaW_jenozKc')
self.assertEqual(YoutubeIE()._extract_id('https://www.youtube.com/watch?feature=player_embedded&v=BaW_jenozKc'), 'BaW_jenozKc') self.assertEqual(YoutubeIE()._extract_id('https://www.youtube.com/watch?feature=player_embedded&v=BaW_jenozKc'), 'BaW_jenozKc')
def test_no_duplicates(self):
ies = gen_extractors()
for tc in get_testcases():
url = tc['url']
for ie in ies:
if type(ie).__name__ in ['GenericIE', tc['name'] + 'IE']:
self.assertTrue(ie.suitable(url), '%s should match URL %r' % (type(ie).__name__, url))
else:
self.assertFalse(ie.suitable(url), '%s should not match URL %r' % (type(ie).__name__, url))
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()

View File

@ -14,10 +14,8 @@ import binascii
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
import youtube_dl.YoutubeDL import youtube_dl.YoutubeDL
import youtube_dl.extractor
from youtube_dl.utils import * from youtube_dl.utils import *
DEF_FILE = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'tests.json')
PARAMETERS_FILE = os.path.join(os.path.dirname(os.path.abspath(__file__)), "parameters.json") PARAMETERS_FILE = os.path.join(os.path.dirname(os.path.abspath(__file__)), "parameters.json")
RETRIES = 3 RETRIES = 3
@ -56,8 +54,9 @@ def _file_md5(fn):
with open(fn, 'rb') as f: with open(fn, 'rb') as f:
return hashlib.md5(f.read()).hexdigest() return hashlib.md5(f.read()).hexdigest()
with io.open(DEF_FILE, encoding='utf-8') as deff: from helper import get_testcases
defs = json.load(deff) defs = get_testcases()
with io.open(PARAMETERS_FILE, encoding='utf-8') as pf: with io.open(PARAMETERS_FILE, encoding='utf-8') as pf:
parameters = json.load(pf) parameters = json.load(pf)
@ -73,22 +72,23 @@ def generator(test_case):
def test_template(self): def test_template(self):
ie = youtube_dl.extractor.get_info_extractor(test_case['name']) ie = youtube_dl.extractor.get_info_extractor(test_case['name'])
def print_skipping(reason):
print('Skipping %s: %s' % (test_case['name'], reason))
if not ie._WORKING: if not ie._WORKING:
print('Skipping: IE marked as not _WORKING') print_skipping('IE marked as not _WORKING')
return return
if 'playlist' not in test_case and not test_case['file']: if 'playlist' not in test_case and not test_case['file']:
print('Skipping: No output file specified') print_skipping('No output file specified')
return return
if 'skip' in test_case: if 'skip' in test_case:
print('Skipping: {0}'.format(test_case['skip'])) print_skipping(test_case['skip'])
return return
params = self.parameters.copy() params = self.parameters.copy()
params.update(test_case.get('params', {})) params.update(test_case.get('params', {}))
ydl = YoutubeDL(params) ydl = YoutubeDL(params)
for ie in youtube_dl.extractor.gen_extractors(): ydl.add_default_info_extractors()
ydl.add_info_extractor(ie)
finished_hook_called = set() finished_hook_called = set()
def _hook(status): def _hook(status):
if status['status'] == 'finished': if status['status'] == 'finished':
@ -155,9 +155,12 @@ def generator(test_case):
### And add them to TestDownload ### And add them to TestDownload
for n, test_case in enumerate(defs): for n, test_case in enumerate(defs):
test_method = generator(test_case) test_method = generator(test_case)
test_method.__name__ = "test_{0}".format(test_case["name"]) tname = 'test_' + str(test_case['name'])
if getattr(TestDownload, test_method.__name__, False): i = 1
test_method.__name__ = "test_{0}_{1}".format(test_case["name"], n) while hasattr(TestDownload, tname):
tname = 'test_' + str(test_case['name']) + '_' + str(i)
i += 1
test_method.__name__ = tname
setattr(TestDownload, test_method.__name__, test_method) setattr(TestDownload, test_method.__name__, test_method)
del test_method del test_method

View File

@ -10,30 +10,8 @@ sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from youtube_dl.extractor import YoutubeUserIE, YoutubePlaylistIE, YoutubeIE, YoutubeChannelIE from youtube_dl.extractor import YoutubeUserIE, YoutubePlaylistIE, YoutubeIE, YoutubeChannelIE
from youtube_dl.utils import * from youtube_dl.utils import *
from youtube_dl import YoutubeDL
PARAMETERS_FILE = os.path.join(os.path.dirname(os.path.abspath(__file__)), "parameters.json") from helper import FakeYDL
with io.open(PARAMETERS_FILE, encoding='utf-8') as pf:
parameters = json.load(pf)
# General configuration (from __init__, not very elegant...)
jar = compat_cookiejar.CookieJar()
cookie_processor = compat_urllib_request.HTTPCookieProcessor(jar)
proxy_handler = compat_urllib_request.ProxyHandler()
opener = compat_urllib_request.build_opener(proxy_handler, cookie_processor, YoutubeDLHandler())
compat_urllib_request.install_opener(opener)
class FakeYDL(YoutubeDL):
def __init__(self):
self.result = []
self.params = parameters
def to_screen(self, s):
print(s)
def trouble(self, s, tb=None):
raise Exception(s)
def extract_info(self, url):
self.result.append(url)
return url
class TestYoutubeLists(unittest.TestCase): class TestYoutubeLists(unittest.TestCase):
def assertIsPlaylist(self,info): def assertIsPlaylist(self,info):

57
test/test_youtube_sig.py Executable file
View File

@ -0,0 +1,57 @@
#!/usr/bin/env python
import unittest
import sys
# Allow direct execution
import os
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from youtube_dl.extractor.youtube import YoutubeIE
from helper import FakeYDL
sig = YoutubeIE(FakeYDL())._decrypt_signature
class TestYoutubeSig(unittest.TestCase):
def test_43_43(self):
wrong = '5AEEAE0EC39677BC65FD9021CCD115F1F2DBD5A59E4.C0B243A3E2DED6769199AF3461781E75122AE135135'
right = '931EA22157E1871643FA9519676DED253A342B0C.4E95A5DBD2F1F511DCC1209DF56CB77693CE0EAE'
self.assertEqual(sig(wrong), right)
def test_88(self):
wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[]}|:;?/>.<"
right = "J:|}][{=+-_)(*&;%$#@>MNBVCXZASDFGH^KLPOIUYTREWQ0987654321mnbvcxzasdfghrklpoiuytej"
self.assertEqual(sig(wrong), right)
def test_87(self):
wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$^&*()_-+={[]}|:;?/>.<"
right = "!?;:|}][{=+-_)(*&^$#@/MNBVCXZASqFGHJKLPOIUYTREWQ0987654321mnbvcxzasdfghjklpoiuytr"
self.assertEqual(sig(wrong), right)
def test_86(self):
wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[|};?/>.<"
right = "ertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!/#$%^&*()_-+={[|};?@"
self.assertEqual(sig(wrong), right)
def test_85(self):
wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?/>.<"
right = "{>/?;}[.=+-_)(*&^%$#@!MqBVCXZASDFwHJKLPOIUYTREWQ0987654321mnbvcxzasdfghjklpoiuytr"
self.assertEqual(sig(wrong), right)
def test_84(self):
wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?>.<"
right = "<.>?;}[{=+-_)(*&^%$#@!MNBVCXZASDFGHJKLPOIUYTREWe098765432rmnbvcxzasdfghjklpoiuyt1"
self.assertEqual(sig(wrong), right)
def test_83(self):
wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!#$%^&*()_+={[};?/>.<"
right = "D.>/?;}[{=+_)(*&^%$#!MNBVCXeAS<FGHJKLPOIUYTREWZ0987654321mnbvcxzasdfghjklpoiuytrQ"
self.assertEqual(sig(wrong), right)
def test_82(self):
wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKHGFDSAZXCVBNM!@#$%^&*(-+={[};?/>.<"
right = "Q>/?;}[{=+-(*<^%$#@!MNBVCXZASDFGHKLPOIUY8REWT0q&7654321mnbvcxzasdfghjklpoiuytrew9"
self.assertEqual(sig(wrong), right)
if __name__ == '__main__':
unittest.main()

View File

@ -12,31 +12,7 @@ sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from youtube_dl.extractor import YoutubeIE from youtube_dl.extractor import YoutubeIE
from youtube_dl.utils import * from youtube_dl.utils import *
from youtube_dl import YoutubeDL from helper import FakeYDL
PARAMETERS_FILE = os.path.join(os.path.dirname(os.path.abspath(__file__)), "parameters.json")
with io.open(PARAMETERS_FILE, encoding='utf-8') as pf:
parameters = json.load(pf)
# General configuration (from __init__, not very elegant...)
jar = compat_cookiejar.CookieJar()
cookie_processor = compat_urllib_request.HTTPCookieProcessor(jar)
proxy_handler = compat_urllib_request.ProxyHandler()
opener = compat_urllib_request.build_opener(proxy_handler, cookie_processor, YoutubeDLHandler())
compat_urllib_request.install_opener(opener)
class FakeYDL(YoutubeDL):
def __init__(self):
self.result = []
# Different instances of the downloader can't share the same dictionary
# some test set the "sublang" parameter, which would break the md5 checks.
self.params = dict(parameters)
def to_screen(self, s):
print(s)
def trouble(self, s, tb=None):
raise Exception(s)
def download(self, x):
self.result.append(x)
md5 = lambda s: hashlib.md5(s.encode('utf-8')).hexdigest() md5 = lambda s: hashlib.md5(s.encode('utf-8')).hexdigest()
@ -84,7 +60,7 @@ class TestYoutubeSubtitles(unittest.TestCase):
info_dict = IE.extract('QRS8MkLhQmM') info_dict = IE.extract('QRS8MkLhQmM')
subtitles = info_dict[0]['subtitles'] subtitles = info_dict[0]['subtitles']
self.assertEqual(len(subtitles), 13) self.assertEqual(len(subtitles), 13)
def test_youtube_subtitles_format(self): def test_youtube_subtitles_sbv_format(self):
DL = FakeYDL() DL = FakeYDL()
DL.params['writesubtitles'] = True DL.params['writesubtitles'] = True
DL.params['subtitlesformat'] = 'sbv' DL.params['subtitlesformat'] = 'sbv'
@ -92,6 +68,14 @@ class TestYoutubeSubtitles(unittest.TestCase):
info_dict = IE.extract('QRS8MkLhQmM') info_dict = IE.extract('QRS8MkLhQmM')
sub = info_dict[0]['subtitles'][0] sub = info_dict[0]['subtitles'][0]
self.assertEqual(md5(sub[2]), '13aeaa0c245a8bed9a451cb643e3ad8b') self.assertEqual(md5(sub[2]), '13aeaa0c245a8bed9a451cb643e3ad8b')
def test_youtube_subtitles_vtt_format(self):
DL = FakeYDL()
DL.params['writesubtitles'] = True
DL.params['subtitlesformat'] = 'vtt'
IE = YoutubeIE(DL)
info_dict = IE.extract('QRS8MkLhQmM')
sub = info_dict[0]['subtitles'][0]
self.assertEqual(md5(sub[2]), '356cdc577fde0c6783b9b822e7206ff7')
def test_youtube_list_subtitles(self): def test_youtube_list_subtitles(self):
DL = FakeYDL() DL = FakeYDL()
DL.params['listsubtitles'] = True DL.params['listsubtitles'] = True

View File

@ -1,699 +0,0 @@
[
{
"name": "Youtube",
"url": "http://www.youtube.com/watch?v=BaW_jenozKc",
"file": "BaW_jenozKc.mp4",
"info_dict": {
"title": "youtube-dl test video \"'/\\ä↭𝕐",
"uploader": "Philipp Hagemeister",
"uploader_id": "phihag",
"upload_date": "20121002",
"description": "test chars: \"'/\\ä↭𝕐\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de ."
}
},
{
"name": "Youtube",
"url": "http://www.youtube.com/watch?v=1ltcDfZMA3U",
"file": "1ltcDfZMA3U.flv",
"note": "Test VEVO video (#897)",
"info_dict": {
"upload_date": "20070518",
"title": "Maps - It Will Find You",
"description": "Music video by Maps performing It Will Find You.",
"uploader": "MuteUSA",
"uploader_id": "MuteUSA"
}
},
{
"name": "Youtube",
"url": "http://www.youtube.com/watch?v=UxxajLWwzqY",
"file": "UxxajLWwzqY.mp4",
"note": "Test generic use_cipher_signature video (#897)",
"info_dict": {
"upload_date": "20120506",
"title": "Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]",
"description": "md5:b085c9804f5ab69f4adea963a2dceb3c",
"uploader": "IconaPop",
"uploader_id": "IconaPop"
}
},
{
"name": "Dailymotion",
"md5": "392c4b85a60a90dc4792da41ce3144eb",
"url": "http://www.dailymotion.com/video/x33vw9_tutoriel-de-youtubeur-dl-des-video_tech",
"file": "x33vw9.mp4",
"info_dict": {
"uploader": "Alex and Van .",
"title": "Tutoriel de Youtubeur\"DL DES VIDEO DE YOUTUBE\""
}
},
{
"name": "Metacafe",
"add_ie": ["Youtube"],
"url": "http://metacafe.com/watch/yt-_aUehQsCQtM/the_electric_company_short_i_pbs_kids_go/",
"file": "_aUehQsCQtM.flv",
"info_dict": {
"upload_date": "20090102",
"title": "The Electric Company | \"Short I\" | PBS KIDS GO!",
"description": "md5:2439a8ef6d5a70e380c22f5ad323e5a8",
"uploader": "PBS",
"uploader_id": "PBS"
}
},
{
"name": "BlipTV",
"md5": "b2d849efcf7ee18917e4b4d9ff37cafe",
"url": "http://blip.tv/cbr/cbr-exclusive-gotham-city-imposters-bats-vs-jokerz-short-3-5796352",
"file": "5779306.m4v",
"info_dict": {
"upload_date": "20111205",
"description": "md5:9bc31f227219cde65e47eeec8d2dc596",
"uploader": "Comic Book Resources - CBR TV",
"title": "CBR EXCLUSIVE: \"Gotham City Imposters\" Bats VS Jokerz Short 3"
}
},
{
"name": "XVideos",
"md5": "1d0c835822f0a71a7bf011855db929d0",
"url": "http://www.xvideos.com/video939581/funny_porns_by_s_-1",
"file": "939581.flv",
"info_dict": {
"title": "Funny Porns By >>>>S<<<<<< -1"
}
},
{
"name": "YouPorn",
"md5": "c37ddbaaa39058c76a7e86c6813423c1",
"url": "http://www.youporn.com/watch/505835/sex-ed-is-it-safe-to-masturbate-daily/",
"file": "505835.mp4",
"info_dict": {
"upload_date": "20101221",
"description": "Love & Sex Answers: http://bit.ly/DanAndJenn -- Is It Unhealthy To Masturbate Daily?",
"uploader": "Ask Dan And Jennifer",
"title": "Sex Ed: Is It Safe To Masturbate Daily?"
}
},
{
"name": "Pornotube",
"md5": "374dd6dcedd24234453b295209aa69b6",
"url": "http://pornotube.com/c/173/m/1689755/Marilyn-Monroe-Bathing",
"file": "1689755.flv",
"info_dict": {
"upload_date": "20090708",
"title": "Marilyn-Monroe-Bathing"
}
},
{
"name": "YouJizz",
"md5": "07e15fa469ba384c7693fd246905547c",
"url": "http://www.youjizz.com/videos/zeichentrick-1-2189178.html",
"file": "2189178.flv",
"info_dict": {
"title": "Zeichentrick 1"
}
},
{
"name": "Vimeo",
"md5": "8879b6cc097e987f02484baf890129e5",
"url": "http://vimeo.com/56015672",
"file": "56015672.mp4",
"info_dict": {
"title": "youtube-dl test video - ★ \" ' 幸 / \\ ä ↭ 𝕐",
"uploader": "Filippo Valsorda",
"uploader_id": "user7108434",
"upload_date": "20121220",
"description": "This is a test case for youtube-dl.\nFor more information, see github.com/rg3/youtube-dl\nTest chars: ★ \" ' 幸 / \\ ä ↭ 𝕐"
}
},
{
"name": "Soundcloud",
"md5": "ebef0a451b909710ed1d7787dddbf0d7",
"url": "http://soundcloud.com/ethmusic/lostin-powers-she-so-heavy",
"file": "62986583.mp3",
"info_dict": {
"upload_date": "20121011",
"description": "No Downloads untill we record the finished version this weekend, i was too pumped n i had to post it , earl is prolly gonna b hella p.o'd",
"uploader": "E.T. ExTerrestrial Music",
"title": "Lostin Powers - She so Heavy (SneakPreview) Adrian Ackers Blueprint 1"
}
},
{
"name": "StanfordOpenClassroom",
"md5": "544a9468546059d4e80d76265b0443b8",
"url": "http://openclassroom.stanford.edu/MainFolder/VideoPage.php?course=PracticalUnix&video=intro-environment&speed=100",
"file": "PracticalUnix_intro-environment.mp4",
"info_dict": {
"title": "Intro Environment"
}
},
{
"name": "XNXX",
"md5": "0831677e2b4761795f68d417e0b7b445",
"url": "http://video.xnxx.com/video1135332/lida_naked_funny_actress_5_",
"file": "1135332.flv",
"info_dict": {
"title": "lida » Naked Funny Actress (5)"
}
},
{
"name": "Youku",
"url": "http://v.youku.com/v_show/id_XNDgyMDQ2NTQw.html",
"file": "XNDgyMDQ2NTQw_part00.flv",
"md5": "ffe3f2e435663dc2d1eea34faeff5b5b",
"params": { "test": false },
"info_dict": {
"title": "youtube-dl test video \"'/\\ä↭𝕐"
}
},
{
"name": "NBA",
"url": "http://www.nba.com/video/games/nets/2012/12/04/0021200253-okc-bkn-recap.nba/index.html",
"file": "0021200253-okc-bkn-recap.nba.mp4",
"md5": "c0edcfc37607344e2ff8f13c378c88a4",
"info_dict": {
"description": "Kevin Durant scores 32 points and dishes out six assists as the Thunder beat the Nets in Brooklyn.",
"title": "Thunder vs. Nets"
}
},
{
"name": "JustinTV",
"url": "http://www.twitch.tv/thegamedevhub/b/296128360",
"file": "296128360.flv",
"md5": "ecaa8a790c22a40770901460af191c9a",
"info_dict": {
"upload_date": "20110927",
"uploader_id": 25114803,
"uploader": "thegamedevhub",
"title": "Beginner Series - Scripting With Python Pt.1"
}
},
{
"name": "MyVideo",
"url": "http://www.myvideo.de/watch/8229274/bowling_fail_or_win",
"file": "8229274.flv",
"md5": "2d2753e8130479ba2cb7e0a37002053e",
"info_dict": {
"title": "bowling-fail-or-win"
}
},
{
"name": "Escapist",
"url": "http://www.escapistmagazine.com/videos/view/the-escapist-presents/6618-Breaking-Down-Baldurs-Gate",
"file": "6618-Breaking-Down-Baldurs-Gate.mp4",
"md5": "c6793dbda81388f4264c1ba18684a74d",
"info_dict": {
"description": "Baldur's Gate: Original, Modded or Enhanced Edition? I'll break down what you can expect from the new Baldur's Gate: Enhanced Edition.",
"uploader": "the-escapist-presents",
"title": "Breaking Down Baldur's Gate"
}
},
{
"name": "GooglePlus",
"url": "https://plus.google.com/u/0/108897254135232129896/posts/ZButuJc6CtH",
"file": "ZButuJc6CtH.flv",
"info_dict": {
"upload_date": "20120613",
"uploader": "井上ヨシマサ",
"title": "嘆きの天使 降臨"
}
},
{
"name": "FunnyOrDie",
"url": "http://www.funnyordie.com/videos/0732f586d7/heart-shaped-box-literal-video-version",
"file": "0732f586d7.mp4",
"md5": "f647e9e90064b53b6e046e75d0241fbd",
"info_dict": {
"description": "Lyrics changed to match the video. Spoken cameo by Obscurus Lupa (from ThatGuyWithTheGlasses.com). Based on a concept by Dustin McLean (DustFilms.com). Performed, edited, and written by David A. Scott.",
"title": "Heart-Shaped Box: Literal Video Version"
}
},
{
"name": "Steam",
"url": "http://store.steampowered.com/video/105600/",
"playlist": [
{
"file": "81300.flv",
"md5": "f870007cee7065d7c76b88f0a45ecc07",
"info_dict": {
"title": "Terraria 1.1 Trailer"
}
},
{
"file": "80859.flv",
"md5": "61aaf31a5c5c3041afb58fb83cbb5751",
"info_dict": {
"title": "Terraria Trailer"
}
}
]
},
{
"name": "Ustream",
"url": "http://www.ustream.tv/recorded/20274954",
"file": "20274954.flv",
"md5": "088f151799e8f572f84eb62f17d73e5c",
"info_dict": {
"title": "Young Americans for Liberty February 7, 2012 2:28 AM",
"uploader": "Young Americans for Liberty"
}
},
{
"name": "InfoQ",
"url": "http://www.infoq.com/presentations/A-Few-of-My-Favorite-Python-Things",
"file": "12-jan-pythonthings.mp4",
"info_dict": {
"description": "Mike Pirnat presents some tips and tricks, standard libraries and third party packages that make programming in Python a richer experience.",
"title": "A Few of My Favorite [Python] Things"
},
"params": {
"skip_download": true
}
},
{
"name": "ComedyCentral",
"url": "http://www.thedailyshow.com/watch/thu-december-13-2012/kristen-stewart",
"file": "422212.mp4",
"md5": "4e2f5cb088a83cd8cdb7756132f9739d",
"info_dict": {
"upload_date": "20121214",
"description": "Kristen Stewart",
"uploader": "thedailyshow",
"title": "thedailyshow-kristen-stewart part 1"
}
},
{
"name": "RBMARadio",
"url": "http://www.rbmaradio.com/shows/ford-lopatin-live-at-primavera-sound-2011",
"file": "ford-lopatin-live-at-primavera-sound-2011.mp3",
"md5": "6bc6f9bcb18994b4c983bc3bf4384d95",
"info_dict": {
"title": "Live at Primavera Sound 2011",
"description": "Joel Ford and Daniel \u2019Oneohtrix Point Never\u2019 Lopatin fly their midified pop extravaganza to Spain. Live at Primavera Sound 2011.",
"uploader": "Ford & Lopatin",
"uploader_id": "ford-lopatin",
"location": "Spain"
}
},
{
"name": "Facebook",
"url": "https://www.facebook.com/photo.php?v=120708114770723",
"file": "120708114770723.mp4",
"md5": "48975a41ccc4b7a581abd68651c1a5a8",
"info_dict": {
"title": "PEOPLE ARE AWESOME 2013",
"duration": 279
}
},
{
"name": "EightTracks",
"url": "http://8tracks.com/ytdl/youtube-dl-test-tracks-a",
"playlist": [
{
"file": "11885610.m4a",
"md5": "96ce57f24389fc8734ce47f4c1abcc55",
"info_dict": {
"title": "youtue-dl project<>\"' - youtube-dl test track 1 \"'/\\\u00e4\u21ad",
"uploader_id": "ytdl"
}
},
{
"file": "11885608.m4a",
"md5": "4ab26f05c1f7291ea460a3920be8021f",
"info_dict": {
"title": "youtube-dl project - youtube-dl test track 2 \"'/\\\u00e4\u21ad",
"uploader_id": "ytdl"
}
},
{
"file": "11885679.m4a",
"md5": "d30b5b5f74217410f4689605c35d1fd7",
"info_dict": {
"title": "youtube-dl project as well - youtube-dl test track 3 \"'/\\\u00e4\u21ad",
"uploader_id": "ytdl"
}
},
{
"file": "11885680.m4a",
"md5": "4eb0a669317cd725f6bbd336a29f923a",
"info_dict": {
"title": "youtube-dl project as well - youtube-dl test track 4 \"'/\\\u00e4\u21ad",
"uploader_id": "ytdl"
}
},
{
"file": "11885682.m4a",
"md5": "1893e872e263a2705558d1d319ad19e8",
"info_dict": {
"title": "PH - youtube-dl test track 5 \"'/\\\u00e4\u21ad",
"uploader_id": "ytdl"
}
},
{
"file": "11885683.m4a",
"md5": "b673c46f47a216ab1741ae8836af5899",
"info_dict": {
"title": "PH - youtube-dl test track 6 \"'/\\\u00e4\u21ad",
"uploader_id": "ytdl"
}
},
{
"file": "11885684.m4a",
"md5": "1d74534e95df54986da7f5abf7d842b7",
"info_dict": {
"title": "phihag - youtube-dl test track 7 \"'/\\\u00e4\u21ad",
"uploader_id": "ytdl"
}
},
{
"file": "11885685.m4a",
"md5": "f081f47af8f6ae782ed131d38b9cd1c0",
"info_dict": {
"title": "phihag - youtube-dl test track 8 \"'/\\\u00e4\u21ad",
"uploader_id": "ytdl"
}
}
]
},
{
"name": "Keek",
"url": "http://www.keek.com/ytdl/keeks/NODfbab",
"file": "NODfbab.mp4",
"md5": "9b0636f8c0f7614afa4ea5e4c6e57e83",
"info_dict": {
"uploader": "ytdl",
"title": "test chars: \"'/\\ä<>This is a test video for youtube-dl.For more information, contact phihag@phihag.de ."
}
},
{
"name": "TED",
"url": "http://www.ted.com/talks/dan_dennett_on_our_consciousness.html",
"file": "102.mp4",
"md5": "8cd9dfa41ee000ce658fd48fb5d89a61",
"info_dict": {
"title": "Dan Dennett: The illusion of consciousness",
"description": "md5:c6fa72e6eedbd938c9caf6b2702f5922"
}
},
{
"name": "MySpass",
"url": "http://www.myspass.de/myspass/shows/tvshows/absolute-mehrheit/Absolute-Mehrheit-vom-17022013-Die-Highlights-Teil-2--/11741/",
"file": "11741.mp4",
"md5": "0b49f4844a068f8b33f4b7c88405862b",
"info_dict": {
"description": "Wer kann in die Fußstapfen von Wolfgang Kubicki treten und die Mehrheit der Zuschauer hinter sich versammeln? Wird vielleicht sogar die Absolute Mehrheit geknackt und der Jackpot von 200.000 Euro mit nach Hause genommen?",
"title": "Absolute Mehrheit vom 17.02.2013 - Die Highlights, Teil 2"
}
},
{
"name": "Generic",
"url": "http://www.hodiho.fr/2013/02/regis-plante-sa-jeep.html",
"file": "13601338388002.mp4",
"md5": "85b90ccc9d73b4acd9138d3af4c27f89",
"info_dict": {
"uploader": "www.hodiho.fr",
"title": "Régis plante sa Jeep"
}
},
{
"name": "Spiegel",
"url": "http://www.spiegel.de/video/vulkan-tungurahua-in-ecuador-ist-wieder-aktiv-video-1259285.html",
"file": "1259285.mp4",
"md5": "2c2754212136f35fb4b19767d242f66e",
"info_dict": {
"title": "Vulkanausbruch in Ecuador: Der \"Feuerschlund\" ist wieder aktiv"
}
},
{
"name": "LiveLeak",
"md5": "0813c2430bea7a46bf13acf3406992f4",
"url": "http://www.liveleak.com/view?i=757_1364311680",
"file": "757_1364311680.mp4",
"info_dict": {
"title": "Most unlucky car accident",
"description": "extremely bad day for this guy..!",
"uploader": "ljfriel2"
}
},
{
"name": "WorldStarHipHop",
"url": "http://www.worldstarhiphop.com/videos/video.php?v=wshh6a7q1ny0G34ZwuIO",
"file": "wshh6a7q1ny0G34ZwuIO.mp4",
"md5": "9d04de741161603bf7071bbf4e883186",
"info_dict": {
"title": "Video: KO Of The Week: MMA Fighter Gets Knocked Out By Swift Head Kick!"
}
},
{
"name": "ARD",
"url": "http://www.ardmediathek.de/das-erste/tagesschau-in-100-sek?documentId=14077640",
"file": "14077640.mp4",
"md5": "6ca8824255460c787376353f9e20bbd8",
"info_dict": {
"title": "11.04.2013 09:23 Uhr - Tagesschau in 100 Sekunden"
},
"skip": "Requires rtmpdump"
},
{
"name": "Tumblr",
"url": "http://resigno.tumblr.com/post/53364321212/e-de-extrema-importancia-que-esse-video-seja",
"file": "53364321212.mp4",
"md5": "0716d3dd51baf68a28b40fdf1251494e",
"info_dict": {
"title": "Rafael Lemos | Tumblr"
}
},
{
"name": "SoundcloudSet",
"url":"https://soundcloud.com/the-concept-band/sets/the-royal-concept-ep",
"playlist":[
{
"file":"30510138.mp3",
"md5":"f9136bf103901728f29e419d2c70f55d",
"info_dict": {
"upload_date": "20111213",
"description": "The Royal Concept from Stockholm\r\nFilip / Povel / David / Magnus\r\nwww.royalconceptband.com",
"uploader": "The Royal Concept",
"title": "D-D-Dance"
}
},
{
"file":"47127625.mp3",
"md5":"09b6758a018470570f8fd423c9453dd8",
"info_dict": {
"upload_date": "20120521",
"description": "The Royal Concept from Stockholm\r\nFilip / Povel / David / Magnus\r\nwww.royalconceptband.com",
"uploader": "The Royal Concept",
"title": "The Royal Concept - Gimme Twice"
}
},
{
"file":"47127627.mp3",
"md5":"154abd4e418cea19c3b901f1e1306d9c",
"info_dict": {
"upload_date": "20120521",
"uploader": "The Royal Concept",
"title": "Goldrushed"
}
},
{
"file":"47127629.mp3",
"md5":"2f5471edc79ad3f33a683153e96a79c1",
"info_dict": {
"upload_date": "20120521",
"description": "The Royal Concept from Stockholm\r\nFilip / Povel / David / Magnus\r\nwww.royalconceptband.com",
"uploader": "The Royal Concept",
"title": "In the End"
}
},
{
"file":"47127631.mp3",
"md5":"f9ba87aa940af7213f98949254f1c6e2",
"info_dict": {
"upload_date": "20120521",
"description": "The Royal Concept from Stockholm\r\nFilip / David / Povel / Magnus\r\nwww.theroyalconceptband.com",
"uploader": "The Royal Concept",
"title": "Knocked Up"
}
},
{
"file":"75206121.mp3",
"md5":"f9d1fe9406717e302980c30de4af9353",
"info_dict": {
"upload_date": "20130116",
"description": "The unreleased track World on Fire premiered on the CW's hit show Arrow (8pm/7pm central). \r\nAs a gift to our fans we would like to offer you a free download of the track! ",
"uploader": "The Royal Concept",
"title": "World On Fire"
}
}
]
},
{
"name":"Bandcamp",
"url":"http://youtube-dl.bandcamp.com/track/youtube-dl-test-song",
"file":"1812978515.mp3",
"md5":"cdeb30cdae1921719a3cbcab696ef53c",
"info_dict": {
"title":"youtube-dl test song \"'/\\ä↭"
},
"skip": "There is a limit of 200 free downloads / month for the test song"
},
{
"name": "RedTube",
"url": "http://www.redtube.com/66418",
"file": "66418.mp4",
"md5": "7b8c22b5e7098a3e1c09709df1126d2d",
"info_dict":{
"title":"Sucked on a toilet"
}
},
{
"name": "Photobucket",
"url": "http://media.photobucket.com/user/rachaneronas/media/TiredofLinkBuildingTryBacklinkMyDomaincom_zpsc0c3b9fa.mp4.html?filters[term]=search&filters[primary]=videos&filters[secondary]=images&sort=1&o=0",
"file": "zpsc0c3b9fa.mp4",
"md5": "7dabfb92b0a31f6c16cebc0f8e60ff99",
"info_dict": {
"upload_date": "20130504",
"uploader": "rachaneronas",
"title": "Tired of Link Building? Try BacklinkMyDomain.com!"
}
},
{
"name": "Ina",
"url": "www.ina.fr/video/I12055569/francois-hollande-je-crois-que-c-est-clair-video.html",
"file": "I12055569.mp4",
"md5": "a667021bf2b41f8dc6049479d9bb38a3",
"info_dict":{
"title":"François Hollande \"Je crois que c'est clair\""
}
},
{
"name": "Yahoo",
"url": "http://screen.yahoo.com/julian-smith-travis-legg-watch-214727115.html",
"file": "214727115.flv",
"md5": "2e717f169c1be93d84d3794a00d4a325",
"info_dict": {
"title": "Julian Smith & Travis Legg Watch Julian Smith"
},
"skip": "Requires rtmpdump"
},
{
"name": "Howcast",
"url": "http://www.howcast.com/videos/390161-How-to-Tie-a-Square-Knot-Properly",
"file": "390161.mp4",
"md5": "1d7ba54e2c9d7dc6935ef39e00529138",
"info_dict":{
"title":"How to Tie a Square Knot Properly",
"description":"The square knot, also known as the reef knot, is one of the oldest, most basic knots to tie, and can be used in many different ways. Here's the proper way to tie a square knot."
}
},
{
"name": "Vine",
"url": "https://vine.co/v/b9KOOWX7HUx",
"file": "b9KOOWX7HUx.mp4",
"md5": "2f36fed6235b16da96ce9b4dc890940d",
"info_dict":{
"title": "Chicken.",
"uploader": "Jack Dorsey"
}
},
{
"name": "Flickr",
"url": "http://www.flickr.com/photos/forestwander-nature-pictures/5645318632/in/photostream/",
"file": "5645318632.mp4",
"md5": "6fdc01adbc89d72fc9c4f15b4a4ba87b",
"info_dict":{
"title": "Dark Hollow Waterfalls",
"uploader_id": "forestwander-nature-pictures",
"description": "Waterfalls in the Springtime at Dark Hollow Waterfalls. These are located just off of Skyline Drive in Virginia. They are only about 6/10 of a mile hike but it is a pretty steep hill and a good climb back up."
}
},
{
"name": "Teamcoco",
"url": "http://teamcoco.com/video/louis-ck-interview-george-w-bush",
"file": "19705.mp4",
"md5": "27b6f7527da5acf534b15f21b032656e",
"info_dict":{
"title": "Louis C.K. Interview Pt. 1 11/3/11",
"description": "Louis C.K. got starstruck by George W. Bush, so what? Part one."
}
},
{
"name": "XHamster",
"url": "http://xhamster.com/movies/1509445/femaleagent_shy_beauty_takes_the_bait.html",
"file": "1509445.flv",
"md5": "9f48e0e8d58e3076bb236ff412ab62fa",
"info_dict": {
"upload_date": "20121014",
"uploader_id": "Ruseful2011",
"title": "FemaleAgent Shy beauty takes the bait"
}
},
{
"name": "Hypem",
"url": "http://hypem.com/track/1v6ga/BODYWORK+-+TAME",
"file": "1v6ga.mp3",
"md5": "b9cc91b5af8995e9f0c1cee04c575828",
"info_dict":{
"title":"Tame"
}
},
{
"name": "Vbox7",
"url": "http://vbox7.com/play:249bb972c2",
"file": "249bb972c2.flv",
"md5": "9c70d6d956f888bdc08c124acc120cfe",
"info_dict":{
"title":"Смях! Чудо - чист за секунди - Скрита камера"
}
},
{
"name": "Gametrailers",
"url": "http://www.gametrailers.com/videos/zbvr8i/mirror-s-edge-2-e3-2013--debut-trailer",
"file": "zbvr8i.flv",
"md5": "c3edbc995ab4081976e16779bd96a878",
"info_dict": {
"title": "E3 2013: Debut Trailer"
},
"skip": "Requires rtmpdump"
},
{
"name": "Statigram",
"url": "http://statigr.am/p/484091715184808010_284179915",
"file": "484091715184808010_284179915.mp4",
"md5": "deda4ff333abe2e118740321e992605b",
"info_dict": {
"uploader_id": "videoseconds",
"title": "Instagram photo by @videoseconds (Videos)"
}
},
{
"name": "Break",
"url": "http://www.break.com/video/when-girls-act-like-guys-2468056",
"file": "2468056.mp4",
"md5": "a3513fb1547fba4fb6cfac1bffc6c46b",
"info_dict": {
"title": "When Girls Act Like D-Bags"
}
},
{
"name": "Vevo",
"url": "http://www.vevo.com/watch/hurts/somebody-to-die-for/GB1101300280",
"file": "GB1101300280.mp4",
"md5": "06bea460acb744eab74a9d7dcb4bfd61",
"info_dict": {
"title": "Somebody To Die For",
"upload_date": "20130624",
"uploader": "Hurts"
}
},
{
"name": "Tudou",
"url": "http://www.tudou.com/listplay/zzdE77v6Mmo/2xN2duXMxmw.html",
"file": "159447792.f4v",
"md5": "ad7c358a01541e926a1e413612c6b10a",
"info_dict": {
"title": "卡马乔国足开大脚长传冲吊集锦"
}
}
]

View File

@ -137,7 +137,7 @@ class FileDownloader(object):
self.ydl.report_warning(*args, **kargs) self.ydl.report_warning(*args, **kargs)
def report_error(self, *args, **kargs): def report_error(self, *args, **kargs):
self.ydl.error(*args, **kargs) self.ydl.report_error(*args, **kargs)
def slow_down(self, start_time, byte_counter): def slow_down(self, start_time, byte_counter):
"""Sleep if the download speed is over the rate limit.""" """Sleep if the download speed is over the rate limit."""

View File

@ -13,7 +13,7 @@ import time
import traceback import traceback
from .utils import * from .utils import *
from .extractor import get_info_extractor from .extractor import get_info_extractor, gen_extractors
from .FileDownloader import FileDownloader from .FileDownloader import FileDownloader
@ -75,7 +75,7 @@ class YoutubeDL(object):
writeautomaticsub: Write the automatic subtitles to a file writeautomaticsub: Write the automatic subtitles to a file
allsubtitles: Downloads all the subtitles of the video allsubtitles: Downloads all the subtitles of the video
listsubtitles: Lists all available subtitles for the video listsubtitles: Lists all available subtitles for the video
subtitlesformat: Subtitle format [sbv/srt] (default=srt) subtitlesformat: Subtitle format [srt/sbv/vtt] (default=srt)
subtitleslang: Language of the subtitles to download subtitleslang: Language of the subtitles to download
keepvideo: Keep the video file after post-processing keepvideo: Keep the video file after post-processing
daterange: A DateRange object, download only if the upload_date is in the range. daterange: A DateRange object, download only if the upload_date is in the range.
@ -113,6 +113,13 @@ class YoutubeDL(object):
self._ies.append(ie) self._ies.append(ie)
ie.set_downloader(self) ie.set_downloader(self)
def add_default_info_extractors(self):
"""
Add the InfoExtractors returned by gen_extractors to the end of the list
"""
for ie in gen_extractors():
self.add_info_extractor(ie)
def add_post_processor(self, pp): def add_post_processor(self, pp):
"""Add a PostProcessor object to the end of the chain.""" """Add a PostProcessor object to the end of the chain."""
self._pps.append(pp) self._pps.append(pp)

View File

@ -26,7 +26,8 @@ __authors__ = (
'Julien Fraichard', 'Julien Fraichard',
'Johny Mo Swag', 'Johny Mo Swag',
'Axel Noack', 'Axel Noack',
) 'Albert Kim',
)
__license__ = 'Public Domain' __license__ = 'Public Domain'
@ -205,7 +206,7 @@ def parseOpts(overrideArguments=None):
help='lists all available subtitles for the video (currently youtube only)', default=False) help='lists all available subtitles for the video (currently youtube only)', default=False)
video_format.add_option('--sub-format', video_format.add_option('--sub-format',
action='store', dest='subtitlesformat', metavar='FORMAT', action='store', dest='subtitlesformat', metavar='FORMAT',
help='subtitle format [srt/sbv] (default=srt) (currently youtube only)', default='srt') help='subtitle format [srt/sbv/vtt] (default=srt) (currently youtube only)', default='srt')
video_format.add_option('--sub-lang', '--srt-lang', video_format.add_option('--sub-lang', '--srt-lang',
action='store', dest='subtitleslang', metavar='LANG', action='store', dest='subtitleslang', metavar='LANG',
help='language of the subtitles to download (optional) use IETF language tags like \'en\'') help='language of the subtitles to download (optional) use IETF language tags like \'en\'')
@ -576,8 +577,7 @@ def _real_main(argv=None):
ydl.to_screen(u'[debug] Python version %s - %s' %(platform.python_version(), platform.platform())) ydl.to_screen(u'[debug] Python version %s - %s' %(platform.python_version(), platform.platform()))
ydl.to_screen(u'[debug] Proxy map: ' + str(proxy_handler.proxies)) ydl.to_screen(u'[debug] Proxy map: ' + str(proxy_handler.proxies))
for extractor in extractors: ydl.add_default_info_extractors()
ydl.add_info_extractor(extractor)
# PostProcessors # PostProcessors
if opts.extractaudio: if opts.extractaudio:

View File

@ -1,11 +1,13 @@
from .ard import ARDIE from .ard import ARDIE
from .arte import ArteTvIE from .arte import ArteTvIE
from .auengine import AUEngineIE
from .bandcamp import BandcampIE from .bandcamp import BandcampIE
from .bliptv import BlipTVIE, BlipTVUserIE from .bliptv import BlipTVIE, BlipTVUserIE
from .breakcom import BreakIE from .breakcom import BreakIE
from .collegehumor import CollegeHumorIE from .collegehumor import CollegeHumorIE
from .comedycentral import ComedyCentralIE from .comedycentral import ComedyCentralIE
from .cspan import CSpanIE
from .dailymotion import DailymotionIE from .dailymotion import DailymotionIE
from .depositfiles import DepositFilesIE from .depositfiles import DepositFilesIE
from .eighttracks import EightTracksIE from .eighttracks import EightTracksIE
@ -13,10 +15,12 @@ from .escapist import EscapistIE
from .facebook import FacebookIE from .facebook import FacebookIE
from .flickr import FlickrIE from .flickr import FlickrIE
from .funnyordie import FunnyOrDieIE from .funnyordie import FunnyOrDieIE
from .gamespot import GameSpotIE
from .gametrailers import GametrailersIE from .gametrailers import GametrailersIE
from .generic import GenericIE from .generic import GenericIE
from .googleplus import GooglePlusIE from .googleplus import GooglePlusIE
from .googlesearch import GoogleSearchIE from .googlesearch import GoogleSearchIE
from .hotnewhiphop import HotNewHipHopIE
from .howcast import HowcastIE from .howcast import HowcastIE
from .hypem import HypemIE from .hypem import HypemIE
from .ina import InaIE from .ina import InaIE
@ -35,6 +39,7 @@ from .photobucket import PhotobucketIE
from .pornotube import PornotubeIE from .pornotube import PornotubeIE
from .rbmaradio import RBMARadioIE from .rbmaradio import RBMARadioIE
from .redtube import RedTubeIE from .redtube import RedTubeIE
from .ringtv import RingTVIE
from .soundcloud import SoundcloudIE, SoundcloudSetIE from .soundcloud import SoundcloudIE, SoundcloudSetIE
from .spiegel import SpiegelIE from .spiegel import SpiegelIE
from .stanfordoc import StanfordOpenClassroomIE from .stanfordoc import StanfordOpenClassroomIE
@ -42,13 +47,17 @@ from .statigram import StatigramIE
from .steam import SteamIE from .steam import SteamIE
from .teamcoco import TeamcocoIE from .teamcoco import TeamcocoIE
from .ted import TEDIE from .ted import TEDIE
from .tf1 import TF1IE
from .tudou import TudouIE from .tudou import TudouIE
from .tumblr import TumblrIE from .tumblr import TumblrIE
from .tutv import TutvIE
from .ustream import UstreamIE from .ustream import UstreamIE
from .vbox7 import Vbox7IE from .vbox7 import Vbox7IE
from .vevo import VevoIE from .vevo import VevoIE
from .vimeo import VimeoIE from .vimeo import VimeoIE
from .vine import VineIE from .vine import VineIE
from .wat import WatIE
from .wimp import WimpIE
from .worldstarhiphop import WorldStarHipHopIE from .worldstarhiphop import WorldStarHipHopIE
from .xhamster import XHamsterIE from .xhamster import XHamsterIE
from .xnxx import XNXXIE from .xnxx import XNXXIE
@ -61,79 +70,18 @@ from .youtube import YoutubeIE, YoutubePlaylistIE, YoutubeSearchIE, YoutubeUserI
from .zdf import ZDFIE from .zdf import ZDFIE
_ALL_CLASSES = [
klass
for name, klass in globals().items()
if name.endswith('IE') and name != 'GenericIE'
]
_ALL_CLASSES.append(GenericIE)
def gen_extractors(): def gen_extractors():
""" Return a list of an instance of every supported extractor. """ Return a list of an instance of every supported extractor.
The order does matter; the first extractor matched is the one handling the URL. The order does matter; the first extractor matched is the one handling the URL.
""" """
return [ return [klass() for klass in _ALL_CLASSES]
YoutubePlaylistIE(),
YoutubeChannelIE(),
YoutubeUserIE(),
YoutubeSearchIE(),
YoutubeIE(),
MetacafeIE(),
DailymotionIE(),
GoogleSearchIE(),
PhotobucketIE(),
YahooIE(),
YahooSearchIE(),
DepositFilesIE(),
FacebookIE(),
BlipTVIE(),
BlipTVUserIE(),
VimeoIE(),
MyVideoIE(),
ComedyCentralIE(),
EscapistIE(),
CollegeHumorIE(),
XVideosIE(),
SoundcloudSetIE(),
SoundcloudIE(),
InfoQIE(),
MixcloudIE(),
StanfordOpenClassroomIE(),
MTVIE(),
YoukuIE(),
XNXXIE(),
YouJizzIE(),
PornotubeIE(),
YouPornIE(),
GooglePlusIE(),
ArteTvIE(),
NBAIE(),
WorldStarHipHopIE(),
JustinTVIE(),
FunnyOrDieIE(),
SteamIE(),
UstreamIE(),
RBMARadioIE(),
EightTracksIE(),
KeekIE(),
TEDIE(),
MySpassIE(),
SpiegelIE(),
LiveLeakIE(),
ARDIE(),
ZDFIE(),
TumblrIE(),
BandcampIE(),
RedTubeIE(),
InaIE(),
HowcastIE(),
VineIE(),
FlickrIE(),
TeamcocoIE(),
XHamsterIE(),
HypemIE(),
Vbox7IE(),
GametrailersIE(),
StatigramIE(),
BreakIE(),
VevoIE(),
JukeboxIE(),
TudouIE(),
GenericIE()
]
def get_info_extractor(ie_name): def get_info_extractor(ie_name):
"""Returns the info extractor class with the given ie_name""" """Returns the info extractor class with the given ie_name"""

View File

@ -9,6 +9,15 @@ class ARDIE(InfoExtractor):
_VALID_URL = r'^(?:https?://)?(?:(?:www\.)?ardmediathek\.de|mediathek\.daserste\.de)/(?:.*/)(?P<video_id>[^/\?]+)(?:\?.*)?' _VALID_URL = r'^(?:https?://)?(?:(?:www\.)?ardmediathek\.de|mediathek\.daserste\.de)/(?:.*/)(?P<video_id>[^/\?]+)(?:\?.*)?'
_TITLE = r'<h1(?: class="boxTopHeadline")?>(?P<title>.*)</h1>' _TITLE = r'<h1(?: class="boxTopHeadline")?>(?P<title>.*)</h1>'
_MEDIA_STREAM = r'mediaCollection\.addMediaStream\((?P<media_type>\d+), (?P<quality>\d+), "(?P<rtmp_url>[^"]*)", "(?P<video_url>[^"]*)", "[^"]*"\)' _MEDIA_STREAM = r'mediaCollection\.addMediaStream\((?P<media_type>\d+), (?P<quality>\d+), "(?P<rtmp_url>[^"]*)", "(?P<video_url>[^"]*)", "[^"]*"\)'
_TEST = {
u'url': u'http://www.ardmediathek.de/das-erste/tagesschau-in-100-sek?documentId=14077640',
u'file': u'14077640.mp4',
u'md5': u'6ca8824255460c787376353f9e20bbd8',
u'info_dict': {
u"title": u"11.04.2013 09:23 Uhr - Tagesschau in 100 Sekunden"
},
u'skip': u'Requires rtmpdump'
}
def _real_extract(self, url): def _real_extract(self, url):
# determine video id from url # determine video id from url

View File

@ -1,53 +1,21 @@
import re import re
import socket import json
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
compat_http_client, # This is used by the not implemented extractLiveStream method
compat_str,
compat_urllib_error,
compat_urllib_parse, compat_urllib_parse,
compat_urllib_request,
ExtractorError, ExtractorError,
unified_strdate, unified_strdate,
) )
class ArteTvIE(InfoExtractor): class ArteTvIE(InfoExtractor):
"""arte.tv information extractor.""" _VALID_URL = r'(?:http://)?www\.arte.tv/guide/(?:fr|de)/(?:(?:sendungen|emissions)/)?(?P<id>.*?)/(?P<name>.*?)(\?.*)?'
_VALID_URL = r'(?:http://)?videos\.arte\.tv/(?:fr|de)/videos/.*'
_LIVE_URL = r'index-[0-9]+\.html$' _LIVE_URL = r'index-[0-9]+\.html$'
IE_NAME = u'arte.tv' IE_NAME = u'arte.tv'
def fetch_webpage(self, url):
request = compat_urllib_request.Request(url)
try:
self.report_download_webpage(url)
webpage = compat_urllib_request.urlopen(request).read()
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
raise ExtractorError(u'Unable to retrieve video webpage: %s' % compat_str(err))
except ValueError as err:
raise ExtractorError(u'Invalid URL: %s' % url)
return webpage
def grep_webpage(self, url, regex, regexFlags, matchTuples):
page = self.fetch_webpage(url)
mobj = re.search(regex, page, regexFlags)
info = {}
if mobj is None:
raise ExtractorError(u'Invalid URL: %s' % url)
for (i, key, err) in matchTuples:
if mobj.group(i) is None:
raise ExtractorError(err)
else:
info[key] = mobj.group(i)
return info
# TODO implement Live Stream # TODO implement Live Stream
# def extractLiveStream(self, url): # def extractLiveStream(self, url):
# video_lang = url.split('/')[-4] # video_lang = url.split('/')[-4]
@ -75,62 +43,44 @@ class ArteTvIE(InfoExtractor):
# ) # )
# video_url = u'%s/%s' % (info.get('url'), info.get('path')) # video_url = u'%s/%s' % (info.get('url'), info.get('path'))
def extractPlus7Stream(self, url):
video_lang = url.split('/')[-3]
info = self.grep_webpage(
url,
r'param name="movie".*?videorefFileUrl=(http[^\'"&]*)',
0,
[
(1, 'url', u'Invalid URL: %s' % url)
]
)
next_url = compat_urllib_parse.unquote(info.get('url'))
info = self.grep_webpage(
next_url,
r'<video lang="%s" ref="(http[^\'"&]*)' % video_lang,
0,
[
(1, 'url', u'Could not find <video> tag: %s' % url)
]
)
next_url = compat_urllib_parse.unquote(info.get('url'))
info = self.grep_webpage(
next_url,
r'<video id="(.*?)".*?>.*?' +
'<name>(.*?)</name>.*?' +
'<dateVideo>(.*?)</dateVideo>.*?' +
'<url quality="hd">(.*?)</url>',
re.DOTALL,
[
(1, 'id', u'could not extract video id: %s' % url),
(2, 'title', u'could not extract video title: %s' % url),
(3, 'date', u'could not extract video date: %s' % url),
(4, 'url', u'could not extract video url: %s' % url)
]
)
return {
'id': info.get('id'),
'url': compat_urllib_parse.unquote(info.get('url')),
'uploader': u'arte.tv',
'upload_date': unified_strdate(info.get('date')),
'title': info.get('title').decode('utf-8'),
'ext': u'mp4',
'format': u'NA',
'player_url': None,
}
def _real_extract(self, url): def _real_extract(self, url):
video_id = url.split('/')[-1] mobj = re.match(self._VALID_URL, url)
self.report_extraction(video_id) name = mobj.group('name')
# This is not a real id, it can be for example AJT for the news
# http://www.arte.tv/guide/fr/emissions/AJT/arte-journal
video_id = mobj.group('id')
if re.search(self._LIVE_URL, video_id) is not None: if re.search(self._LIVE_URL, video_id) is not None:
raise ExtractorError(u'Arte live streams are not yet supported, sorry') raise ExtractorError(u'Arte live streams are not yet supported, sorry')
# self.extractLiveStream(url) # self.extractLiveStream(url)
# return # return
else:
info = self.extractPlus7Stream(url)
return [info] webpage = self._download_webpage(url, video_id)
json_url = self._html_search_regex(r'arte_vp_url="(.*?)"', webpage, 'json url')
json_info = self._download_webpage(json_url, video_id, 'Downloading info json')
self.report_extraction(video_id)
info = json.loads(json_info)
player_info = info['videoJsonPlayer']
info_dict = {'id': player_info['VID'],
'title': player_info['VTI'],
'description': player_info['VDE'],
'upload_date': unified_strdate(player_info['VDA'].split(' ')[0]),
'thumbnail': player_info['programImage'],
}
formats = player_info['VSR'].values()
# We order the formats by quality
formats = sorted(formats, key=lambda f: int(f['height']))
# Pick the best quality
format_info = formats[-1]
if format_info['mediaType'] == u'rtmp':
info_dict['url'] = format_info['streamer']
info_dict['play_path'] = 'mp4:' + format_info['url']
info_dict['ext'] = 'mp4'
else:
info_dict['url'] = format_info['url']
info_dict['ext'] = 'mp4'
return info_dict

View File

@ -0,0 +1,38 @@
import os.path
import re
from .common import InfoExtractor
from ..utils import (
compat_urllib_parse,
compat_urllib_parse_urlparse,
)
class AUEngineIE(InfoExtractor):
_VALID_URL = r'(?:http://)?(?:www\.)?auengine\.com/embed.php\?.*?file=([^&]+).*?'
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group(1)
webpage = self._download_webpage(url, video_id)
title = self._html_search_regex(r'<title>(?P<title>.+?)</title>',
webpage, u'title')
title = title.strip()
links = re.findall(r'[^A-Za-z0-9]?(?:file|url):\s*["\'](http[^\'"&]*)', webpage)
links = [compat_urllib_parse.unquote(l) for l in links]
for link in links:
root, pathext = os.path.splitext(compat_urllib_parse_urlparse(link).path)
if pathext == '.png':
thumbnail = link
elif pathext == '.mp4':
url = link
ext = pathext
if ext == title[-len(ext):]:
title = title[:-len(ext)]
ext = ext[1:]
return [{
'id': video_id,
'url': url,
'ext': ext,
'title': title,
'thumbnail': thumbnail,
}]

View File

@ -9,6 +9,15 @@ from ..utils import (
class BandcampIE(InfoExtractor): class BandcampIE(InfoExtractor):
_VALID_URL = r'http://.*?\.bandcamp\.com/track/(?P<title>.*)' _VALID_URL = r'http://.*?\.bandcamp\.com/track/(?P<title>.*)'
_TEST = {
u'url': u'http://youtube-dl.bandcamp.com/track/youtube-dl-test-song',
u'file': u'1812978515.mp3',
u'md5': u'cdeb30cdae1921719a3cbcab696ef53c',
u'info_dict': {
u"title": u"youtube-dl test song \"'/\\\u00e4\u21ad"
},
u'skip': u'There is a limit of 200 free downloads / month for the test song'
}
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)

View File

@ -24,6 +24,17 @@ class BlipTVIE(InfoExtractor):
_VALID_URL = r'^(?:https?://)?(?:\w+\.)?blip\.tv/((.+/)|(play/)|(api\.swf#))(.+)$' _VALID_URL = r'^(?:https?://)?(?:\w+\.)?blip\.tv/((.+/)|(play/)|(api\.swf#))(.+)$'
_URL_EXT = r'^.*\.([a-z0-9]+)$' _URL_EXT = r'^.*\.([a-z0-9]+)$'
IE_NAME = u'blip.tv' IE_NAME = u'blip.tv'
_TEST = {
u'url': u'http://blip.tv/cbr/cbr-exclusive-gotham-city-imposters-bats-vs-jokerz-short-3-5796352',
u'file': u'5779306.m4v',
u'md5': u'b2d849efcf7ee18917e4b4d9ff37cafe',
u'info_dict': {
u"upload_date": u"20111205",
u"description": u"md5:9bc31f227219cde65e47eeec8d2dc596",
u"uploader": u"Comic Book Resources - CBR TV",
u"title": u"CBR EXCLUSIVE: \"Gotham City Imposters\" Bats VS Jokerz Short 3"
}
}
def report_direct_download(self, title): def report_direct_download(self, title):
"""Report information extraction.""" """Report information extraction."""

View File

@ -5,6 +5,14 @@ from .common import InfoExtractor
class BreakIE(InfoExtractor): class BreakIE(InfoExtractor):
_VALID_URL = r'(?:http://)?(?:www\.)?break\.com/video/([^/]+)' _VALID_URL = r'(?:http://)?(?:www\.)?break\.com/video/([^/]+)'
_TEST = {
u'url': u'http://www.break.com/video/when-girls-act-like-guys-2468056',
u'file': u'2468056.mp4',
u'md5': u'a3513fb1547fba4fb6cfac1bffc6c46b',
u'info_dict': {
u"title": u"When Girls Act Like D-Bags"
}
}
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)

View File

@ -27,6 +27,17 @@ class ComedyCentralIE(InfoExtractor):
(the-colbert-report-(videos|collections)/(?P<clipID>[0-9]+)/[^/]*/(?P<cntitle>.*?)) (the-colbert-report-(videos|collections)/(?P<clipID>[0-9]+)/[^/]*/(?P<cntitle>.*?))
|(watch/(?P<date>[^/]*)/(?P<tdstitle>.*))))) |(watch/(?P<date>[^/]*)/(?P<tdstitle>.*)))))
$""" $"""
_TEST = {
u'url': u'http://www.thedailyshow.com/watch/thu-december-13-2012/kristen-stewart',
u'file': u'422212.mp4',
u'md5': u'4e2f5cb088a83cd8cdb7756132f9739d',
u'info_dict': {
u"upload_date": u"20121214",
u"description": u"Kristen Stewart",
u"uploader": u"thedailyshow",
u"title": u"thedailyshow-kristen-stewart part 1"
}
}
_available_formats = ['3500', '2200', '1700', '1200', '750', '400'] _available_formats = ['3500', '2200', '1700', '1200', '750', '400']
@ -172,7 +183,7 @@ class ComedyCentralIE(InfoExtractor):
'ext': 'mp4', 'ext': 'mp4',
'format': format, 'format': format,
'thumbnail': None, 'thumbnail': None,
'description': officialTitle, 'description': compat_str(officialTitle),
} }
results.append(info) results.append(info)

View File

@ -44,6 +44,7 @@ class InfoExtractor(object):
location: Physical location of the video. location: Physical location of the video.
player_url: SWF Player URL (used for rtmpdump). player_url: SWF Player URL (used for rtmpdump).
subtitles: The subtitle file contents. subtitles: The subtitle file contents.
view_count: How many users have watched the video on the platform.
urlhandle: [internal] The urlHandle to be used to download the file, urlhandle: [internal] The urlHandle to be used to download the file,
like returned by urllib.request.urlopen like returned by urllib.request.urlopen

View File

@ -0,0 +1,53 @@
import re
from .common import InfoExtractor
from ..utils import (
compat_urllib_parse,
)
class CSpanIE(InfoExtractor):
_VALID_URL = r'http://www.c-spanvideo.org/program/(.*)'
_TEST = {
u'url': u'http://www.c-spanvideo.org/program/HolderonV',
u'file': u'315139.flv',
u'md5': u'74a623266956f69e4df0068ab6c80fe4',
u'info_dict': {
u"title": u"Attorney General Eric Holder on Voting Rights Act Decision"
},
u'skip': u'Requires rtmpdump'
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
prog_name = mobj.group(1)
webpage = self._download_webpage(url, prog_name)
video_id = self._search_regex(r'programid=(.*?)&', webpage, 'video id')
data = compat_urllib_parse.urlencode({'programid': video_id,
'dynamic':'1'})
info_url = 'http://www.c-spanvideo.org/common/services/flashXml.php?' + data
video_info = self._download_webpage(info_url, video_id, u'Downloading video info')
self.report_extraction(video_id)
title = self._html_search_regex(r'<string name="title">(.*?)</string>',
video_info, 'title')
description = self._html_search_regex(r'<meta (?:property="og:|name=")description" content="(.*?)"',
webpage, 'description',
flags=re.MULTILINE|re.DOTALL)
thumbnail = self._html_search_regex(r'<meta property="og:image" content="(.*?)"',
webpage, 'thumbnail')
url = self._search_regex(r'<string name="URL">(.*?)</string>',
video_info, 'video url')
url = url.replace('$(protocol)', 'rtmp').replace('$(port)', '443')
path = self._search_regex(r'<string name="path">(.*?)</string>',
video_info, 'rtmp play path')
return {'id': video_id,
'title': title,
'ext': 'flv',
'url': url,
'play_path': path,
'description': description,
'thumbnail': thumbnail,
}

View File

@ -14,6 +14,15 @@ class DailymotionIE(InfoExtractor):
_VALID_URL = r'(?i)(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/video/([^/]+)' _VALID_URL = r'(?i)(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/video/([^/]+)'
IE_NAME = u'dailymotion' IE_NAME = u'dailymotion'
_TEST = {
u'url': u'http://www.dailymotion.com/video/x33vw9_tutoriel-de-youtubeur-dl-des-video_tech',
u'file': u'x33vw9.mp4',
u'md5': u'392c4b85a60a90dc4792da41ce3144eb',
u'info_dict': {
u"uploader": u"Alex and Van .",
u"title": u"Tutoriel de Youtubeur\"DL DES VIDEO DE YOUTUBE\""
}
}
def _real_extract(self, url): def _real_extract(self, url):
# Extract id and simplified title from URL # Extract id and simplified title from URL

View File

@ -12,6 +12,77 @@ from ..utils import (
class EightTracksIE(InfoExtractor): class EightTracksIE(InfoExtractor):
IE_NAME = '8tracks' IE_NAME = '8tracks'
_VALID_URL = r'https?://8tracks.com/(?P<user>[^/]+)/(?P<id>[^/#]+)(?:#.*)?$' _VALID_URL = r'https?://8tracks.com/(?P<user>[^/]+)/(?P<id>[^/#]+)(?:#.*)?$'
_TEST = {
u"name": u"EightTracks",
u"url": u"http://8tracks.com/ytdl/youtube-dl-test-tracks-a",
u"playlist": [
{
u"file": u"11885610.m4a",
u"md5": u"96ce57f24389fc8734ce47f4c1abcc55",
u"info_dict": {
u"title": u"youtue-dl project<>\"' - youtube-dl test track 1 \"'/\\\u00e4\u21ad",
u"uploader_id": u"ytdl"
}
},
{
u"file": u"11885608.m4a",
u"md5": u"4ab26f05c1f7291ea460a3920be8021f",
u"info_dict": {
u"title": u"youtube-dl project - youtube-dl test track 2 \"'/\\\u00e4\u21ad",
u"uploader_id": u"ytdl"
}
},
{
u"file": u"11885679.m4a",
u"md5": u"d30b5b5f74217410f4689605c35d1fd7",
u"info_dict": {
u"title": u"youtube-dl project as well - youtube-dl test track 3 \"'/\\\u00e4\u21ad",
u"uploader_id": u"ytdl"
}
},
{
u"file": u"11885680.m4a",
u"md5": u"4eb0a669317cd725f6bbd336a29f923a",
u"info_dict": {
u"title": u"youtube-dl project as well - youtube-dl test track 4 \"'/\\\u00e4\u21ad",
u"uploader_id": u"ytdl"
}
},
{
u"file": u"11885682.m4a",
u"md5": u"1893e872e263a2705558d1d319ad19e8",
u"info_dict": {
u"title": u"PH - youtube-dl test track 5 \"'/\\\u00e4\u21ad",
u"uploader_id": u"ytdl"
}
},
{
u"file": u"11885683.m4a",
u"md5": u"b673c46f47a216ab1741ae8836af5899",
u"info_dict": {
u"title": u"PH - youtube-dl test track 6 \"'/\\\u00e4\u21ad",
u"uploader_id": u"ytdl"
}
},
{
u"file": u"11885684.m4a",
u"md5": u"1d74534e95df54986da7f5abf7d842b7",
u"info_dict": {
u"title": u"phihag - youtube-dl test track 7 \"'/\\\u00e4\u21ad",
u"uploader_id": u"ytdl"
}
},
{
u"file": u"11885685.m4a",
u"md5": u"f081f47af8f6ae782ed131d38b9cd1c0",
u"info_dict": {
u"title": u"phihag - youtube-dl test track 8 \"'/\\\u00e4\u21ad",
u"uploader_id": u"ytdl"
}
}
]
}
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)

View File

@ -12,6 +12,16 @@ from ..utils import (
class EscapistIE(InfoExtractor): class EscapistIE(InfoExtractor):
_VALID_URL = r'^(https?://)?(www\.)?escapistmagazine\.com/videos/view/(?P<showname>[^/]+)/(?P<episode>[^/?]+)[/?]?.*$' _VALID_URL = r'^(https?://)?(www\.)?escapistmagazine\.com/videos/view/(?P<showname>[^/]+)/(?P<episode>[^/?]+)[/?]?.*$'
_TEST = {
u'url': u'http://www.escapistmagazine.com/videos/view/the-escapist-presents/6618-Breaking-Down-Baldurs-Gate',
u'file': u'6618-Breaking-Down-Baldurs-Gate.mp4',
u'md5': u'c6793dbda81388f4264c1ba18684a74d',
u'info_dict': {
u"description": u"Baldur's Gate: Original, Modded or Enhanced Edition? I'll break down what you can expect from the new Baldur's Gate: Enhanced Edition.",
u"uploader": u"the-escapist-presents",
u"title": u"Breaking Down Baldur's Gate"
}
}
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)

View File

@ -22,6 +22,15 @@ class FacebookIE(InfoExtractor):
_LOGIN_URL = 'https://login.facebook.com/login.php?m&next=http%3A%2F%2Fm.facebook.com%2Fhome.php&' _LOGIN_URL = 'https://login.facebook.com/login.php?m&next=http%3A%2F%2Fm.facebook.com%2Fhome.php&'
_NETRC_MACHINE = 'facebook' _NETRC_MACHINE = 'facebook'
IE_NAME = u'facebook' IE_NAME = u'facebook'
_TEST = {
u'url': u'https://www.facebook.com/photo.php?v=120708114770723',
u'file': u'120708114770723.mp4',
u'md5': u'48975a41ccc4b7a581abd68651c1a5a8',
u'info_dict': {
u"duration": 279,
u"title": u"PEOPLE ARE AWESOME 2013"
}
}
def report_login(self): def report_login(self):
"""Report attempt to log in.""" """Report attempt to log in."""

View File

@ -10,6 +10,16 @@ from ..utils import (
class FlickrIE(InfoExtractor): class FlickrIE(InfoExtractor):
"""Information Extractor for Flickr videos""" """Information Extractor for Flickr videos"""
_VALID_URL = r'(?:https?://)?(?:www\.)?flickr\.com/photos/(?P<uploader_id>[\w\-_@]+)/(?P<id>\d+).*' _VALID_URL = r'(?:https?://)?(?:www\.)?flickr\.com/photos/(?P<uploader_id>[\w\-_@]+)/(?P<id>\d+).*'
_TEST = {
u'url': u'http://www.flickr.com/photos/forestwander-nature-pictures/5645318632/in/photostream/',
u'file': u'5645318632.mp4',
u'md5': u'6fdc01adbc89d72fc9c4f15b4a4ba87b',
u'info_dict': {
u"description": u"Waterfalls in the Springtime at Dark Hollow Waterfalls. These are located just off of Skyline Drive in Virginia. They are only about 6/10 of a mile hike but it is a pretty steep hill and a good climb back up.",
u"uploader_id": u"forestwander-nature-pictures",
u"title": u"Dark Hollow Waterfalls"
}
}
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)

View File

@ -5,6 +5,15 @@ from .common import InfoExtractor
class FunnyOrDieIE(InfoExtractor): class FunnyOrDieIE(InfoExtractor):
_VALID_URL = r'^(?:https?://)?(?:www\.)?funnyordie\.com/videos/(?P<id>[0-9a-f]+)/.*$' _VALID_URL = r'^(?:https?://)?(?:www\.)?funnyordie\.com/videos/(?P<id>[0-9a-f]+)/.*$'
_TEST = {
u'url': u'http://www.funnyordie.com/videos/0732f586d7/heart-shaped-box-literal-video-version',
u'file': u'0732f586d7.mp4',
u'md5': u'f647e9e90064b53b6e046e75d0241fbd',
u'info_dict': {
u"description": u"Lyrics changed to match the video. Spoken cameo by Obscurus Lupa (from ThatGuyWithTheGlasses.com). Based on a concept by Dustin McLean (DustFilms.com). Performed, edited, and written by David A. Scott.",
u"title": u"Heart-Shaped Box: Literal Video Version"
}
}
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)

View File

@ -0,0 +1,45 @@
import re
import xml.etree.ElementTree
from .common import InfoExtractor
from ..utils import (
unified_strdate,
)
class GameSpotIE(InfoExtractor):
_VALID_URL = r'(?:http://)?(?:www\.)?gamespot\.com/([^/]+)/videos/([^/]+)-([^/d]+)/'
_TEST = {
u"url": u"http://www.gamespot.com/arma-iii/videos/arma-iii-community-guide-sitrep-i-6410818/",
u"file": u"6410818.mp4",
u"md5": u"5569d64ca98db01f0177c934fe8c1e9b",
u"info_dict": {
u"title": u"Arma III - Community Guide: SITREP I",
u"upload_date": u"20130627",
}
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group(3).split("-")[-1]
info_url = "http://www.gamespot.com/pages/video_player/xml.php?id="+str(video_id)
info_xml = self._download_webpage(info_url, video_id)
doc = xml.etree.ElementTree.fromstring(info_xml)
clip_el = doc.find('./playList/clip')
video_url = clip_el.find('./URI').text
title = clip_el.find('./title').text
ext = video_url.rpartition('.')[2]
thumbnail_url = clip_el.find('./screenGrabURI').text
view_count = int(clip_el.find('./views').text)
upload_date = unified_strdate(clip_el.find('./postDate').text)
return [{
'id' : video_id,
'url' : video_url,
'ext' : ext,
'title' : title,
'thumbnail' : thumbnail_url,
'upload_date' : upload_date,
'view_count' : view_count,
}]

View File

@ -9,6 +9,15 @@ from ..utils import (
class GametrailersIE(InfoExtractor): class GametrailersIE(InfoExtractor):
_VALID_URL = r'http://www.gametrailers.com/(?P<type>videos|reviews|full-episodes)/(?P<id>.*?)/(?P<title>.*)' _VALID_URL = r'http://www.gametrailers.com/(?P<type>videos|reviews|full-episodes)/(?P<id>.*?)/(?P<title>.*)'
_TEST = {
u'url': u'http://www.gametrailers.com/videos/zbvr8i/mirror-s-edge-2-e3-2013--debut-trailer',
u'file': u'zbvr8i.flv',
u'md5': u'c3edbc995ab4081976e16779bd96a878',
u'info_dict': {
u"title": u"E3 2013: Debut Trailer"
},
u'skip': u'Requires rtmpdump'
}
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)

View File

@ -15,6 +15,15 @@ class GenericIE(InfoExtractor):
_VALID_URL = r'.*' _VALID_URL = r'.*'
IE_NAME = u'generic' IE_NAME = u'generic'
_TEST = {
u'url': u'http://www.hodiho.fr/2013/02/regis-plante-sa-jeep.html',
u'file': u'13601338388002.mp4',
u'md5': u'85b90ccc9d73b4acd9138d3af4c27f89',
u'info_dict': {
u"uploader": u"www.hodiho.fr",
u"title": u"R\u00e9gis plante sa Jeep"
}
}
def report_download_webpage(self, video_id): def report_download_webpage(self, video_id):
"""Report webpage download.""" """Report webpage download."""
@ -102,7 +111,7 @@ class GenericIE(InfoExtractor):
mobj = re.search(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage) mobj = re.search(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage)
if mobj is None: if mobj is None:
# Broaden the search a little bit: JWPlayer JS loader # Broaden the search a little bit: JWPlayer JS loader
mobj = re.search(r'[^A-Za-z0-9]?file:\s*["\'](http[^\'"&]*)', webpage) mobj = re.search(r'[^A-Za-z0-9]?file["\']?:\s*["\'](http[^\'"&]*)', webpage)
if mobj is None: if mobj is None:
# Try to find twitter cards info # Try to find twitter cards info
mobj = re.search(r'<meta (?:property|name)="twitter:player:stream" (?:content|value)="(.+?)"', webpage) mobj = re.search(r'<meta (?:property|name)="twitter:player:stream" (?:content|value)="(.+?)"', webpage)
@ -135,7 +144,7 @@ class GenericIE(InfoExtractor):
# Video Title - Tagline | Site Name # Video Title - Tagline | Site Name
# and so on and so forth; it's just not practical # and so on and so forth; it's just not practical
video_title = self._html_search_regex(r'<title>(.*)</title>', video_title = self._html_search_regex(r'<title>(.*)</title>',
webpage, u'video title') webpage, u'video title', default=u'video')
# video uploader is domain name # video uploader is domain name
video_uploader = self._search_regex(r'(?:https?://)?([^/]*)/.*', video_uploader = self._search_regex(r'(?:https?://)?([^/]*)/.*',

View File

@ -1,3 +1,5 @@
# coding: utf-8
import datetime import datetime
import re import re
@ -12,6 +14,15 @@ class GooglePlusIE(InfoExtractor):
_VALID_URL = r'(?:https://)?plus\.google\.com/(?:[^/]+/)*?posts/(\w+)' _VALID_URL = r'(?:https://)?plus\.google\.com/(?:[^/]+/)*?posts/(\w+)'
IE_NAME = u'plus.google' IE_NAME = u'plus.google'
_TEST = {
u"url": u"https://plus.google.com/u/0/108897254135232129896/posts/ZButuJc6CtH",
u"file": u"ZButuJc6CtH.flv",
u"info_dict": {
u"upload_date": u"20120613",
u"uploader": u"井上ヨシマサ",
u"title": u"嘆きの天使 降臨"
}
}
def _real_extract(self, url): def _real_extract(self, url):
# Extract id from URL # Extract id from URL

View File

@ -0,0 +1,48 @@
import re
import base64
from .common import InfoExtractor
class HotNewHipHopIE(InfoExtractor):
_VALID_URL = r'http://www\.hotnewhiphop.com/.*\.(?P<id>.*)\.html'
_TEST = {
u'url': u"http://www.hotnewhiphop.com/freddie-gibbs-lay-it-down-song.1435540.html'",
u'file': u'1435540.mp3',
u'md5': u'2c2cd2f76ef11a9b3b581e8b232f3d96',
u'info_dict': {
u"title": u"Freddie Gibbs Songs - Lay It Down"
}
}
def _real_extract(self, url):
m = re.match(self._VALID_URL, url)
video_id = m.group('id')
webpage_src = self._download_webpage(url, video_id)
video_url_base64 = self._search_regex(r'data-path="(.*?)"',
webpage_src, u'video URL', fatal=False)
if video_url_base64 == None:
video_url = self._search_regex(r'"contentUrl" content="(.*?)"', webpage_src,
u'video URL')
return self.url_result(video_url, ie='Youtube')
video_url = base64.b64decode(video_url_base64).decode('utf-8')
video_title = self._html_search_regex(r"<title>(.*)</title>",
webpage_src, u'title')
# Getting thumbnail and if not thumbnail sets correct title for WSHH candy video.
thumbnail = self._html_search_regex(r'"og:image" content="(.*)"',
webpage_src, u'thumbnail', fatal=False)
results = [{
'id': video_id,
'url' : video_url,
'title' : video_title,
'thumbnail' : thumbnail,
'ext' : 'mp3',
}]
return results

View File

@ -5,6 +5,15 @@ from .common import InfoExtractor
class HowcastIE(InfoExtractor): class HowcastIE(InfoExtractor):
_VALID_URL = r'(?:https?://)?(?:www\.)?howcast\.com/videos/(?P<id>\d+)' _VALID_URL = r'(?:https?://)?(?:www\.)?howcast\.com/videos/(?P<id>\d+)'
_TEST = {
u'url': u'http://www.howcast.com/videos/390161-How-to-Tie-a-Square-Knot-Properly',
u'file': u'390161.mp4',
u'md5': u'1d7ba54e2c9d7dc6935ef39e00529138',
u'info_dict': {
u"description": u"The square knot, also known as the reef knot, is one of the oldest, most basic knots to tie, and can be used in many different ways. Here's the proper way to tie a square knot.",
u"title": u"How to Tie a Square Knot Properly"
}
}
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)

View File

@ -15,6 +15,14 @@ from ..utils import (
class HypemIE(InfoExtractor): class HypemIE(InfoExtractor):
"""Information Extractor for hypem""" """Information Extractor for hypem"""
_VALID_URL = r'(?:http://)?(?:www\.)?hypem\.com/track/([^/]+)/([^/]+)' _VALID_URL = r'(?:http://)?(?:www\.)?hypem\.com/track/([^/]+)/([^/]+)'
_TEST = {
u'url': u'http://hypem.com/track/1v6ga/BODYWORK+-+TAME',
u'file': u'1v6ga.mp3',
u'md5': u'b9cc91b5af8995e9f0c1cee04c575828',
u'info_dict': {
u"title": u"Tame"
}
}
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)

View File

@ -6,6 +6,14 @@ from .common import InfoExtractor
class InaIE(InfoExtractor): class InaIE(InfoExtractor):
"""Information Extractor for Ina.fr""" """Information Extractor for Ina.fr"""
_VALID_URL = r'(?:http://)?(?:www\.)?ina\.fr/video/(?P<id>I[0-9]+)/.*' _VALID_URL = r'(?:http://)?(?:www\.)?ina\.fr/video/(?P<id>I[0-9]+)/.*'
_TEST = {
u'url': u'www.ina.fr/video/I12055569/francois-hollande-je-crois-que-c-est-clair-video.html',
u'file': u'I12055569.mp4',
u'md5': u'a667021bf2b41f8dc6049479d9bb38a3',
u'info_dict': {
u"title": u"Fran\u00e7ois Hollande \"Je crois que c'est clair\""
}
}
def _real_extract(self,url): def _real_extract(self,url):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)

View File

@ -11,6 +11,18 @@ from ..utils import (
class InfoQIE(InfoExtractor): class InfoQIE(InfoExtractor):
_VALID_URL = r'^(?:https?://)?(?:www\.)?infoq\.com/[^/]+/[^/]+$' _VALID_URL = r'^(?:https?://)?(?:www\.)?infoq\.com/[^/]+/[^/]+$'
_TEST = {
u"name": u"InfoQ",
u"url": u"http://www.infoq.com/presentations/A-Few-of-My-Favorite-Python-Things",
u"file": u"12-jan-pythonthings.mp4",
u"info_dict": {
u"description": u"Mike Pirnat presents some tips and tricks, standard libraries and third party packages that make programming in Python a richer experience.",
u"title": u"A Few of My Favorite [Python] Things"
},
u"params": {
u"skip_download": True
}
}
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)

View File

@ -26,6 +26,17 @@ class JustinTVIE(InfoExtractor):
""" """
_JUSTIN_PAGE_LIMIT = 100 _JUSTIN_PAGE_LIMIT = 100
IE_NAME = u'justin.tv' IE_NAME = u'justin.tv'
_TEST = {
u'url': u'http://www.twitch.tv/thegamedevhub/b/296128360',
u'file': u'296128360.flv',
u'md5': u'ecaa8a790c22a40770901460af191c9a',
u'info_dict': {
u"upload_date": u"20110927",
u"uploader_id": 25114803,
u"uploader": u"thegamedevhub",
u"title": u"Beginner Series - Scripting With Python Pt.1"
}
}
def report_download_page(self, channel, offset): def report_download_page(self, channel, offset):
"""Report attempt to download a single page of videos.""" """Report attempt to download a single page of videos."""

View File

@ -6,6 +6,15 @@ from .common import InfoExtractor
class KeekIE(InfoExtractor): class KeekIE(InfoExtractor):
_VALID_URL = r'http://(?:www\.)?keek\.com/(?:!|\w+/keeks/)(?P<videoID>\w+)' _VALID_URL = r'http://(?:www\.)?keek\.com/(?:!|\w+/keeks/)(?P<videoID>\w+)'
IE_NAME = u'keek' IE_NAME = u'keek'
_TEST = {
u'url': u'http://www.keek.com/ytdl/keeks/NODfbab',
u'file': u'NODfbab.mp4',
u'md5': u'9b0636f8c0f7614afa4ea5e4c6e57e83',
u'info_dict': {
u"uploader": u"ytdl",
u"title": u"test chars: \"'/\\\u00e4<>This is a test video for youtube-dl.For more information, contact phihag@phihag.de ."
}
}
def _real_extract(self, url): def _real_extract(self, url):
m = re.match(self._VALID_URL, url) m = re.match(self._VALID_URL, url)

View File

@ -10,6 +10,16 @@ class LiveLeakIE(InfoExtractor):
_VALID_URL = r'^(?:http?://)?(?:\w+\.)?liveleak\.com/view\?(?:.*?)i=(?P<video_id>[\w_]+)(?:.*)' _VALID_URL = r'^(?:http?://)?(?:\w+\.)?liveleak\.com/view\?(?:.*?)i=(?P<video_id>[\w_]+)(?:.*)'
IE_NAME = u'liveleak' IE_NAME = u'liveleak'
_TEST = {
u'url': u'http://www.liveleak.com/view?i=757_1364311680',
u'file': u'757_1364311680.mp4',
u'md5': u'0813c2430bea7a46bf13acf3406992f4',
u'info_dict': {
u"description": u"extremely bad day for this guy..!",
u"uploader": u"ljfriel2",
u"title": u"Most unlucky car accident"
}
}
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)

View File

@ -20,6 +20,19 @@ class MetacafeIE(InfoExtractor):
_DISCLAIMER = 'http://www.metacafe.com/family_filter/' _DISCLAIMER = 'http://www.metacafe.com/family_filter/'
_FILTER_POST = 'http://www.metacafe.com/f/index.php?inputType=filter&controllerGroup=user' _FILTER_POST = 'http://www.metacafe.com/f/index.php?inputType=filter&controllerGroup=user'
IE_NAME = u'metacafe' IE_NAME = u'metacafe'
_TEST = {
u"add_ie": ["Youtube"],
u"url": u"http://metacafe.com/watch/yt-_aUehQsCQtM/the_electric_company_short_i_pbs_kids_go/",
u"file": u"_aUehQsCQtM.flv",
u"info_dict": {
u"upload_date": u"20090102",
u"title": u"The Electric Company | \"Short I\" | PBS KIDS GO!",
u"description": u"md5:2439a8ef6d5a70e380c22f5ad323e5a8",
u"uploader": u"PBS",
u"uploader_id": u"PBS"
}
}
def report_disclaimer(self): def report_disclaimer(self):
"""Report disclaimer retrieval.""" """Report disclaimer retrieval."""

View File

@ -11,6 +11,15 @@ from ..utils import (
class MySpassIE(InfoExtractor): class MySpassIE(InfoExtractor):
_VALID_URL = r'http://www.myspass.de/.*' _VALID_URL = r'http://www.myspass.de/.*'
_TEST = {
u'url': u'http://www.myspass.de/myspass/shows/tvshows/absolute-mehrheit/Absolute-Mehrheit-vom-17022013-Die-Highlights-Teil-2--/11741/',
u'file': u'11741.mp4',
u'md5': u'0b49f4844a068f8b33f4b7c88405862b',
u'info_dict': {
u"description": u"Wer kann in die Fu\u00dfstapfen von Wolfgang Kubicki treten und die Mehrheit der Zuschauer hinter sich versammeln? Wird vielleicht sogar die Absolute Mehrheit geknackt und der Jackpot von 200.000 Euro mit nach Hause genommen?",
u"title": u"Absolute Mehrheit vom 17.02.2013 - Die Highlights, Teil 2"
}
}
def _real_extract(self, url): def _real_extract(self, url):
META_DATA_URL_TEMPLATE = 'http://www.myspass.de/myspass/includes/apps/video/getvideometadataxml.php?id=%s' META_DATA_URL_TEMPLATE = 'http://www.myspass.de/myspass/includes/apps/video/getvideometadataxml.php?id=%s'

View File

@ -18,6 +18,14 @@ class MyVideoIE(InfoExtractor):
_VALID_URL = r'(?:http://)?(?:www\.)?myvideo\.de/watch/([0-9]+)/([^?/]+).*' _VALID_URL = r'(?:http://)?(?:www\.)?myvideo\.de/watch/([0-9]+)/([^?/]+).*'
IE_NAME = u'myvideo' IE_NAME = u'myvideo'
_TEST = {
u'url': u'http://www.myvideo.de/watch/8229274/bowling_fail_or_win',
u'file': u'8229274.flv',
u'md5': u'2d2753e8130479ba2cb7e0a37002053e',
u'info_dict': {
u"title": u"bowling-fail-or-win"
}
}
# Original Code from: https://github.com/dersphere/plugin.video.myvideo_de.git # Original Code from: https://github.com/dersphere/plugin.video.myvideo_de.git
# Released into the Public Domain by Tristan Fischer on 2013-05-19 # Released into the Public Domain by Tristan Fischer on 2013-05-19

View File

@ -8,6 +8,15 @@ from ..utils import (
class NBAIE(InfoExtractor): class NBAIE(InfoExtractor):
_VALID_URL = r'^(?:https?://)?(?:watch\.|www\.)?nba\.com/(?:nba/)?video(/[^?]*?)(?:/index\.html)?(?:\?.*)?$' _VALID_URL = r'^(?:https?://)?(?:watch\.|www\.)?nba\.com/(?:nba/)?video(/[^?]*?)(?:/index\.html)?(?:\?.*)?$'
_TEST = {
u'url': u'http://www.nba.com/video/games/nets/2012/12/04/0021200253-okc-bkn-recap.nba/index.html',
u'file': u'0021200253-okc-bkn-recap.nba.mp4',
u'md5': u'c0edcfc37607344e2ff8f13c378c88a4',
u'info_dict': {
u"description": u"Kevin Durant scores 32 points and dishes out six assists as the Thunder beat the Nets in Brooklyn.",
u"title": u"Thunder vs. Nets"
}
}
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)

View File

@ -16,6 +16,16 @@ class PhotobucketIE(InfoExtractor):
# Check if it's necessary to keep the old extracion process # Check if it's necessary to keep the old extracion process
_VALID_URL = r'(?:http://)?(?:[a-z0-9]+\.)?photobucket\.com/.*(([\?\&]current=)|_)(?P<id>.*)\.(?P<ext>(flv)|(mp4))' _VALID_URL = r'(?:http://)?(?:[a-z0-9]+\.)?photobucket\.com/.*(([\?\&]current=)|_)(?P<id>.*)\.(?P<ext>(flv)|(mp4))'
IE_NAME = u'photobucket' IE_NAME = u'photobucket'
_TEST = {
u'url': u'http://media.photobucket.com/user/rachaneronas/media/TiredofLinkBuildingTryBacklinkMyDomaincom_zpsc0c3b9fa.mp4.html?filters[term]=search&filters[primary]=videos&filters[secondary]=images&sort=1&o=0',
u'file': u'zpsc0c3b9fa.mp4',
u'md5': u'7dabfb92b0a31f6c16cebc0f8e60ff99',
u'info_dict': {
u"upload_date": u"20130504",
u"uploader": u"rachaneronas",
u"title": u"Tired of Link Building? Try BacklinkMyDomain.com!"
}
}
def _real_extract(self, url): def _real_extract(self, url):
# Extract id from URL # Extract id from URL

View File

@ -10,6 +10,15 @@ from ..utils import (
class PornotubeIE(InfoExtractor): class PornotubeIE(InfoExtractor):
_VALID_URL = r'^(?:https?://)?(?:\w+\.)?pornotube\.com(/c/(?P<channel>[0-9]+))?(/m/(?P<videoid>[0-9]+))(/(?P<title>.+))$' _VALID_URL = r'^(?:https?://)?(?:\w+\.)?pornotube\.com(/c/(?P<channel>[0-9]+))?(/m/(?P<videoid>[0-9]+))(/(?P<title>.+))$'
_TEST = {
u'url': u'http://pornotube.com/c/173/m/1689755/Marilyn-Monroe-Bathing',
u'file': u'1689755.flv',
u'md5': u'374dd6dcedd24234453b295209aa69b6',
u'info_dict': {
u"upload_date": u"20090708",
u"title": u"Marilyn-Monroe-Bathing"
}
}
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)

View File

@ -11,6 +11,18 @@ from ..utils import (
class RBMARadioIE(InfoExtractor): class RBMARadioIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?rbmaradio\.com/shows/(?P<videoID>[^/]+)$' _VALID_URL = r'https?://(?:www\.)?rbmaradio\.com/shows/(?P<videoID>[^/]+)$'
_TEST = {
u'url': u'http://www.rbmaradio.com/shows/ford-lopatin-live-at-primavera-sound-2011',
u'file': u'ford-lopatin-live-at-primavera-sound-2011.mp3',
u'md5': u'6bc6f9bcb18994b4c983bc3bf4384d95',
u'info_dict': {
u"uploader_id": u"ford-lopatin",
u"location": u"Spain",
u"description": u"Joel Ford and Daniel \u2019Oneohtrix Point Never\u2019 Lopatin fly their midified pop extravaganza to Spain. Live at Primavera Sound 2011.",
u"uploader": u"Ford & Lopatin",
u"title": u"Live at Primavera Sound 2011"
}
}
def _real_extract(self, url): def _real_extract(self, url):
m = re.match(self._VALID_URL, url) m = re.match(self._VALID_URL, url)

View File

@ -5,6 +5,14 @@ from .common import InfoExtractor
class RedTubeIE(InfoExtractor): class RedTubeIE(InfoExtractor):
_VALID_URL = r'(?:http://)?(?:www\.)?redtube\.com/(?P<id>[0-9]+)' _VALID_URL = r'(?:http://)?(?:www\.)?redtube\.com/(?P<id>[0-9]+)'
_TEST = {
u'url': u'http://www.redtube.com/66418',
u'file': u'66418.mp4',
u'md5': u'7b8c22b5e7098a3e1c09709df1126d2d',
u'info_dict': {
u"title": u"Sucked on a toilet"
}
}
def _real_extract(self,url): def _real_extract(self,url):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)

View File

@ -0,0 +1,37 @@
import re
from .common import InfoExtractor
class RingTVIE(InfoExtractor):
_VALID_URL = r'(?:http://)?(?:www\.)?ringtv\.craveonline\.com/videos/video/([^/]+)'
_TEST = {
u"url": u"http://ringtv.craveonline.com/videos/video/746619-canelo-alvarez-talks-about-mayweather-showdown",
u"file": u"746619.mp4",
u"md5": u"7c46b4057d22de32e0a539f017e64ad3",
u"info_dict": {
u"title": u"Canelo Alvarez talks about Mayweather showdown",
u"description": u"Saul \\\"Canelo\\\" Alvarez spoke to the media about his Sept. 14 showdown with Floyd Mayweather after their kick-off presser in NYC. Canelo is motivated and confident that he will have the speed and gameplan to beat the pound-for-pound king."
}
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group(1).split('-')[0]
webpage = self._download_webpage(url, video_id)
title = self._search_regex(r'<title>(.+?)</title>',
webpage, 'video title').replace(' | RingTV','')
description = self._search_regex(r'<div class="blurb">(.+?)</div>',
webpage, 'Description')
final_url = "http://ringtv.craveonline.springboardplatform.com/storage/ringtv.craveonline.com/conversion/%s.mp4" %(str(video_id))
thumbnail_url = "http://ringtv.craveonline.springboardplatform.com/storage/ringtv.craveonline.com/snapshots/%s.jpg" %(str(video_id))
ext = final_url.split('.')[-1]
return [{
'id' : video_id,
'url' : final_url,
'ext' : ext,
'title' : title,
'thumbnail' : thumbnail_url,
'description' : description,
}]

View File

@ -19,8 +19,19 @@ class SoundcloudIE(InfoExtractor):
of the stream token and uid of the stream token and uid
""" """
_VALID_URL = r'^(?:https?://)?(?:www\.)?soundcloud\.com/([\w\d-]+)/([\w\d-]+)' _VALID_URL = r'^(?:https?://)?(?:www\.)?soundcloud\.com/([\w\d-]+)/([\w\d-]+)(?:[?].*)?$'
IE_NAME = u'soundcloud' IE_NAME = u'soundcloud'
_TEST = {
u'url': u'http://soundcloud.com/ethmusic/lostin-powers-she-so-heavy',
u'file': u'62986583.mp3',
u'md5': u'ebef0a451b909710ed1d7787dddbf0d7',
u'info_dict': {
u"upload_date": u"20121011",
u"description": u"No Downloads untill we record the finished version this weekend, i was too pumped n i had to post it , earl is prolly gonna b hella p.o'd",
u"uploader": u"E.T. ExTerrestrial Music",
u"title": u"Lostin Powers - She so Heavy (SneakPreview) Adrian Ackers Blueprint 1"
}
}
def report_resolve(self, video_id): def report_resolve(self, video_id):
"""Report information extraction.""" """Report information extraction."""
@ -75,8 +86,72 @@ class SoundcloudSetIE(InfoExtractor):
of the stream token and uid of the stream token and uid
""" """
_VALID_URL = r'^(?:https?://)?(?:www\.)?soundcloud\.com/([\w\d-]+)/sets/([\w\d-]+)' _VALID_URL = r'^(?:https?://)?(?:www\.)?soundcloud\.com/([\w\d-]+)/sets/([\w\d-]+)(?:[?].*)?$'
IE_NAME = u'soundcloud:set' IE_NAME = u'soundcloud:set'
_TEST = {
u"url":"https://soundcloud.com/the-concept-band/sets/the-royal-concept-ep",
u"playlist": [
{
u"file":"30510138.mp3",
u"md5":"f9136bf103901728f29e419d2c70f55d",
u"info_dict": {
u"upload_date": u"20111213",
u"description": u"The Royal Concept from Stockholm\r\nFilip / Povel / David / Magnus\r\nwww.royalconceptband.com",
u"uploader": u"The Royal Concept",
u"title": u"D-D-Dance"
}
},
{
u"file":"47127625.mp3",
u"md5":"09b6758a018470570f8fd423c9453dd8",
u"info_dict": {
u"upload_date": u"20120521",
u"description": u"The Royal Concept from Stockholm\r\nFilip / Povel / David / Magnus\r\nwww.royalconceptband.com",
u"uploader": u"The Royal Concept",
u"title": u"The Royal Concept - Gimme Twice"
}
},
{
u"file":"47127627.mp3",
u"md5":"154abd4e418cea19c3b901f1e1306d9c",
u"info_dict": {
u"upload_date": u"20120521",
u"uploader": u"The Royal Concept",
u"title": u"Goldrushed"
}
},
{
u"file":"47127629.mp3",
u"md5":"2f5471edc79ad3f33a683153e96a79c1",
u"info_dict": {
u"upload_date": u"20120521",
u"description": u"The Royal Concept from Stockholm\r\nFilip / Povel / David / Magnus\r\nwww.royalconceptband.com",
u"uploader": u"The Royal Concept",
u"title": u"In the End"
}
},
{
u"file":"47127631.mp3",
u"md5":"f9ba87aa940af7213f98949254f1c6e2",
u"info_dict": {
u"upload_date": u"20120521",
u"description": u"The Royal Concept from Stockholm\r\nFilip / David / Povel / Magnus\r\nwww.theroyalconceptband.com",
u"uploader": u"The Royal Concept",
u"title": u"Knocked Up"
}
},
{
u"file":"75206121.mp3",
u"md5":"f9d1fe9406717e302980c30de4af9353",
u"info_dict": {
u"upload_date": u"20130116",
u"description": u"The unreleased track World on Fire premiered on the CW's hit show Arrow (8pm/7pm central). \r\nAs a gift to our fans we would like to offer you a free download of the track! ",
u"uploader": u"The Royal Concept",
u"title": u"World On Fire"
}
}
]
}
def report_resolve(self, video_id): def report_resolve(self, video_id):
"""Report information extraction.""" """Report information extraction."""

View File

@ -6,6 +6,14 @@ from .common import InfoExtractor
class SpiegelIE(InfoExtractor): class SpiegelIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?spiegel\.de/video/[^/]*-(?P<videoID>[0-9]+)(?:\.html)?(?:#.*)?$' _VALID_URL = r'https?://(?:www\.)?spiegel\.de/video/[^/]*-(?P<videoID>[0-9]+)(?:\.html)?(?:#.*)?$'
_TEST = {
u'url': u'http://www.spiegel.de/video/vulkan-tungurahua-in-ecuador-ist-wieder-aktiv-video-1259285.html',
u'file': u'1259285.mp4',
u'md5': u'2c2754212136f35fb4b19767d242f66e',
u'info_dict': {
u"title": u"Vulkanausbruch in Ecuador: Der \"Feuerschlund\" ist wieder aktiv"
}
}
def _real_extract(self, url): def _real_extract(self, url):
m = re.match(self._VALID_URL, url) m = re.match(self._VALID_URL, url)

View File

@ -20,6 +20,14 @@ class StanfordOpenClassroomIE(InfoExtractor):
_VALID_URL = r'^(?:https?://)?openclassroom.stanford.edu(?P<path>/?|(/MainFolder/(?:HomePage|CoursePage|VideoPage)\.php([?]course=(?P<course>[^&]+)(&video=(?P<video>[^&]+))?(&.*)?)?))$' _VALID_URL = r'^(?:https?://)?openclassroom.stanford.edu(?P<path>/?|(/MainFolder/(?:HomePage|CoursePage|VideoPage)\.php([?]course=(?P<course>[^&]+)(&video=(?P<video>[^&]+))?(&.*)?)?))$'
IE_NAME = u'stanfordoc' IE_NAME = u'stanfordoc'
_TEST = {
u'url': u'http://openclassroom.stanford.edu/MainFolder/VideoPage.php?course=PracticalUnix&video=intro-environment&speed=100',
u'file': u'PracticalUnix_intro-environment.mp4',
u'md5': u'544a9468546059d4e80d76265b0443b8',
u'info_dict': {
u"title": u"Intro Environment"
}
}
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)

View File

@ -4,6 +4,15 @@ from .common import InfoExtractor
class StatigramIE(InfoExtractor): class StatigramIE(InfoExtractor):
_VALID_URL = r'(?:http://)?(?:www\.)?statigr\.am/p/([^/]+)' _VALID_URL = r'(?:http://)?(?:www\.)?statigr\.am/p/([^/]+)'
_TEST = {
u'url': u'http://statigr.am/p/484091715184808010_284179915',
u'file': u'484091715184808010_284179915.mp4',
u'md5': u'deda4ff333abe2e118740321e992605b',
u'info_dict': {
u"uploader_id": u"videoseconds",
u"title": u"Instagram photo by @videoseconds (Videos)"
}
}
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)

View File

@ -16,6 +16,26 @@ class SteamIE(InfoExtractor):
""" """
_VIDEO_PAGE_TEMPLATE = 'http://store.steampowered.com/video/%s/' _VIDEO_PAGE_TEMPLATE = 'http://store.steampowered.com/video/%s/'
_AGECHECK_TEMPLATE = 'http://store.steampowered.com/agecheck/video/%s/?snr=1_agecheck_agecheck__age-gate&ageDay=1&ageMonth=January&ageYear=1970' _AGECHECK_TEMPLATE = 'http://store.steampowered.com/agecheck/video/%s/?snr=1_agecheck_agecheck__age-gate&ageDay=1&ageMonth=January&ageYear=1970'
_TEST = {
u"url": u"http://store.steampowered.com/video/105600/",
u"playlist": [
{
u"file": u"81300.flv",
u"md5": u"f870007cee7065d7c76b88f0a45ecc07",
u"info_dict": {
u"title": u"Terraria 1.1 Trailer"
}
},
{
u"file": u"80859.flv",
u"md5": u"61aaf31a5c5c3041afb58fb83cbb5751",
u"info_dict": {
u"title": u"Terraria Trailer"
}
}
]
}
@classmethod @classmethod
def suitable(cls, url): def suitable(cls, url):

View File

@ -8,6 +8,15 @@ from ..utils import (
class TeamcocoIE(InfoExtractor): class TeamcocoIE(InfoExtractor):
_VALID_URL = r'http://teamcoco\.com/video/(?P<url_title>.*)' _VALID_URL = r'http://teamcoco\.com/video/(?P<url_title>.*)'
_TEST = {
u'url': u'http://teamcoco.com/video/louis-ck-interview-george-w-bush',
u'file': u'19705.mp4',
u'md5': u'27b6f7527da5acf534b15f21b032656e',
u'info_dict': {
u"description": u"Louis C.K. got starstruck by George W. Bush, so what? Part one.",
u"title": u"Louis C.K. Interview Pt. 1 11/3/11"
}
}
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)

View File

@ -14,6 +14,15 @@ class TEDIE(InfoExtractor):
(/lang/(.*?))? # The url may contain the language (/lang/(.*?))? # The url may contain the language
/(?P<name>\w+) # Here goes the name and then ".html" /(?P<name>\w+) # Here goes the name and then ".html"
''' '''
_TEST = {
u'url': u'http://www.ted.com/talks/dan_dennett_on_our_consciousness.html',
u'file': u'102.mp4',
u'md5': u'2d76ee1576672e0bd8f187513267adf6',
u'info_dict': {
u"description": u"md5:c6fa72e6eedbd938c9caf6b2702f5922",
u"title": u"Dan Dennett: The illusion of consciousness"
}
}
@classmethod @classmethod
def suitable(cls, url): def suitable(cls, url):

View File

@ -0,0 +1,35 @@
# coding: utf-8
import json
import re
from .common import InfoExtractor
class TF1IE(InfoExtractor):
"""
TF1 uses the wat.tv player, currently it can only download videos with the
html5 player enabled, it cannot download HD videos.
"""
_VALID_URL = r'http://videos.tf1.fr/.*-(.*?).html'
_TEST = {
u'url': u'http://videos.tf1.fr/auto-moto/citroen-grand-c4-picasso-2013-presentation-officielle-8062060.html',
u'file': u'10635995.mp4',
u'md5': u'66789d3e91278d332f75e1feb7aea327',
u'info_dict': {
u'title': u'Citroën Grand C4 Picasso 2013 : présentation officielle',
u'description': u'Vidéo officielle du nouveau Citroën Grand C4 Picasso, lancé à l\'automne 2013.',
}
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
id = mobj.group(1)
webpage = self._download_webpage(url, id)
embed_url = self._html_search_regex(r'"(https://www.wat.tv/embedframe/.*?)"',
webpage, 'embed url')
embed_page = self._download_webpage(embed_url, id, u'Downloading embed player page')
wat_id = self._search_regex(r'UVID=(.*?)&', embed_page, 'wat id')
wat_info = self._download_webpage('http://www.wat.tv/interface/contentv3/%s' % wat_id, id, u'Downloading Wat info')
wat_info = json.loads(wat_info)['media']
wat_url = wat_info['url']
return self.url_result(wat_url, 'Wat')

View File

@ -5,6 +5,14 @@ from .common import InfoExtractor
class TudouIE(InfoExtractor): class TudouIE(InfoExtractor):
_VALID_URL = r'(?:http://)?(?:www\.)?tudou\.com/(?:listplay|programs)/(?:view|(.+?))/(?:([^/]+)|([^/]+)\.html)' _VALID_URL = r'(?:http://)?(?:www\.)?tudou\.com/(?:listplay|programs)/(?:view|(.+?))/(?:([^/]+)|([^/]+)\.html)'
_TEST = {
u'url': u'http://www.tudou.com/listplay/zzdE77v6Mmo/2xN2duXMxmw.html',
u'file': u'159447792.f4v',
u'md5': u'ad7c358a01541e926a1e413612c6b10a',
u'info_dict': {
u"title": u"\u5361\u9a6c\u4e54\u56fd\u8db3\u5f00\u5927\u811a\u957f\u4f20\u51b2\u540a\u96c6\u9526"
}
}
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)

View File

@ -8,6 +8,14 @@ from ..utils import (
class TumblrIE(InfoExtractor): class TumblrIE(InfoExtractor):
_VALID_URL = r'http://(?P<blog_name>.*?)\.tumblr\.com/((post)|(video))/(?P<id>\d*)/(.*?)' _VALID_URL = r'http://(?P<blog_name>.*?)\.tumblr\.com/((post)|(video))/(?P<id>\d*)/(.*?)'
_TEST = {
u'url': u'http://resigno.tumblr.com/post/53364321212/e-de-extrema-importancia-que-esse-video-seja',
u'file': u'53364321212.mp4',
u'md5': u'0716d3dd51baf68a28b40fdf1251494e',
u'info_dict': {
u"title": u"Rafael Lemos"
}
}
def _real_extract(self, url): def _real_extract(self, url):
m_url = re.match(self._VALID_URL, url) m_url = re.match(self._VALID_URL, url)
@ -30,7 +38,7 @@ class TumblrIE(InfoExtractor):
# The only place where you can get a title, it's not complete, # The only place where you can get a title, it's not complete,
# but searching in other places doesn't work for all videos # but searching in other places doesn't work for all videos
video_title = self._html_search_regex(r'<title>(?P<title>.*?)</title>', video_title = self._html_search_regex(r'<title>(?P<title>.*?)(?: \| Tumblr)?</title>',
webpage, u'title', flags=re.DOTALL) webpage, u'title', flags=re.DOTALL)
return [{'id': video_id, return [{'id': video_id,

View File

@ -0,0 +1,41 @@
import base64
import re
from .common import InfoExtractor
from ..utils import (
compat_parse_qs,
)
class TutvIE(InfoExtractor):
_VALID_URL=r'https?://(?:www\.)?tu\.tv/videos/(?P<id>[^/?]+)'
_TEST = {
u'url': u'http://tu.tv/videos/noah-en-pabellon-cuahutemoc',
u'file': u'2742556.flv',
u'md5': u'5eb766671f69b82e528dc1e7769c5cb2',
u'info_dict': {
u"title": u"Noah en pabellon cuahutemoc"
}
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id)
title = self._html_search_regex(
r'<meta property="og:title" content="(.*?)">', webpage, u'title')
internal_id = self._search_regex(r'codVideo=([0-9]+)', webpage, u'internal video ID')
data_url = u'http://tu.tv/flvurl.php?codVideo=' + str(internal_id)
data_content = self._download_webpage(data_url, video_id, note=u'Downloading video info')
data = compat_parse_qs(data_content)
video_url = base64.b64decode(data['kpt'][0]).decode('utf-8')
ext = video_url.partition(u'?')[0].rpartition(u'.')[2]
info = {
'id': internal_id,
'url': video_url,
'ext': ext,
'title': title,
}
return [info]

View File

@ -6,6 +6,15 @@ from .common import InfoExtractor
class UstreamIE(InfoExtractor): class UstreamIE(InfoExtractor):
_VALID_URL = r'https?://www\.ustream\.tv/recorded/(?P<videoID>\d+)' _VALID_URL = r'https?://www\.ustream\.tv/recorded/(?P<videoID>\d+)'
IE_NAME = u'ustream' IE_NAME = u'ustream'
_TEST = {
u'url': u'http://www.ustream.tv/recorded/20274954',
u'file': u'20274954.flv',
u'md5': u'088f151799e8f572f84eb62f17d73e5c',
u'info_dict': {
u"uploader": u"Young Americans for Liberty",
u"title": u"Young Americans for Liberty February 7, 2012 2:28 AM"
}
}
def _real_extract(self, url): def _real_extract(self, url):
m = re.match(self._VALID_URL, url) m = re.match(self._VALID_URL, url)

View File

@ -12,6 +12,14 @@ from ..utils import (
class Vbox7IE(InfoExtractor): class Vbox7IE(InfoExtractor):
"""Information Extractor for Vbox7""" """Information Extractor for Vbox7"""
_VALID_URL = r'(?:http://)?(?:www\.)?vbox7\.com/play:([^/]+)' _VALID_URL = r'(?:http://)?(?:www\.)?vbox7\.com/play:([^/]+)'
_TEST = {
u'url': u'http://vbox7.com/play:249bb972c2',
u'file': u'249bb972c2.flv',
u'md5': u'9c70d6d956f888bdc08c124acc120cfe',
u'info_dict': {
u"title": u"\u0421\u043c\u044f\u0445! \u0427\u0443\u0434\u043e - \u0447\u0438\u0441\u0442 \u0437\u0430 \u0441\u0435\u043a\u0443\u043d\u0434\u0438 - \u0421\u043a\u0440\u0438\u0442\u0430 \u043a\u0430\u043c\u0435\u0440\u0430"
}
}
def _real_extract(self,url): def _real_extract(self,url):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)

View File

@ -3,7 +3,6 @@ import json
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
unified_strdate,
ExtractorError, ExtractorError,
) )
@ -13,6 +12,16 @@ class VevoIE(InfoExtractor):
(currently used by MTVIE) (currently used by MTVIE)
""" """
_VALID_URL = r'((http://www.vevo.com/watch/.*?/.*?/)|(vevo:))(?P<id>.*)$' _VALID_URL = r'((http://www.vevo.com/watch/.*?/.*?/)|(vevo:))(?P<id>.*)$'
_TEST = {
u'url': u'http://www.vevo.com/watch/hurts/somebody-to-die-for/GB1101300280',
u'file': u'GB1101300280.mp4',
u'md5': u'06bea460acb744eab74a9d7dcb4bfd61',
u'info_dict': {
u"upload_date": u"20130624",
u"uploader": u"Hurts",
u"title": u"Somebody To Die For"
}
}
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)

View File

@ -16,8 +16,20 @@ class VimeoIE(InfoExtractor):
"""Information extractor for vimeo.com.""" """Information extractor for vimeo.com."""
# _VALID_URL matches Vimeo URLs # _VALID_URL matches Vimeo URLs
_VALID_URL = r'(?P<proto>https?://)?(?:(?:www|player)\.)?vimeo(?P<pro>pro)?\.com/(?:(?:(?:groups|album)/[^/]+)|(?:.*?)/)?(?P<direct_link>play_redirect_hls\?clip_id=)?(?:videos?/)?(?P<id>[0-9]+)' _VALID_URL = r'(?P<proto>https?://)?(?:(?:www|player)\.)?vimeo(?P<pro>pro)?\.com/(?:(?:(?:groups|album)/[^/]+)|(?:.*?)/)?(?P<direct_link>play_redirect_hls\?clip_id=)?(?:videos?/)?(?P<id>[0-9]+)(?:[?].*)?$'
IE_NAME = u'vimeo' IE_NAME = u'vimeo'
_TEST = {
u'url': u'http://vimeo.com/56015672',
u'file': u'56015672.mp4',
u'md5': u'8879b6cc097e987f02484baf890129e5',
u'info_dict': {
u"upload_date": u"20121220",
u"description": u"This is a test case for youtube-dl.\nFor more information, see github.com/rg3/youtube-dl\nTest chars: \u2605 \" ' \u5e78 / \\ \u00e4 \u21ad \U0001d550",
u"uploader_id": u"user7108434",
u"uploader": u"Filippo Valsorda",
u"title": u"youtube-dl test video - \u2605 \" ' \u5e78 / \\ \u00e4 \u21ad \U0001d550"
}
}
def _verify_video_password(self, url, video_id, webpage): def _verify_video_password(self, url, video_id, webpage):
password = self._downloader.params.get('videopassword', None) password = self._downloader.params.get('videopassword', None)

View File

@ -5,6 +5,15 @@ from .common import InfoExtractor
class VineIE(InfoExtractor): class VineIE(InfoExtractor):
_VALID_URL = r'(?:https?://)?(?:www\.)?vine\.co/v/(?P<id>\w+)' _VALID_URL = r'(?:https?://)?(?:www\.)?vine\.co/v/(?P<id>\w+)'
_TEST = {
u'url': u'https://vine.co/v/b9KOOWX7HUx',
u'file': u'b9KOOWX7HUx.mp4',
u'md5': u'2f36fed6235b16da96ce9b4dc890940d',
u'info_dict': {
u"uploader": u"Jack Dorsey",
u"title": u"Chicken."
}
}
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)

View File

@ -0,0 +1,84 @@
# coding: utf-8
import json
import re
from .common import InfoExtractor
from ..utils import (
compat_urllib_parse,
unified_strdate,
)
class WatIE(InfoExtractor):
_VALID_URL=r'http://www.wat.tv/.*-(?P<shortID>.*?)_.*?.html'
IE_NAME = 'wat.tv'
_TEST = {
u'url': u'http://www.wat.tv/video/world-war-philadelphia-vost-6bv55_2fjr7_.html',
u'file': u'10631273.mp4',
u'md5': u'0a4fe7870f31eaeabb5e25fd8da8414a',
u'info_dict': {
u'title': u'World War Z - Philadelphia VOST',
u'description': u'La menace est partout. Que se passe-t-il à Philadelphia ?\r\nWORLD WAR Z, avec Brad Pitt, au cinéma le 3 juillet.\r\nhttp://www.worldwarz.fr',
}
}
def download_video_info(self, real_id):
# 'contentv4' is used in the website, but it also returns the related
# videos, we don't need them
info = self._download_webpage('http://www.wat.tv/interface/contentv3/' + real_id, real_id, 'Downloading video info')
info = json.loads(info)
return info['media']
def _real_extract(self, url):
def real_id_for_chapter(chapter):
return chapter['tc_start'].split('-')[0]
mobj = re.match(self._VALID_URL, url)
short_id = mobj.group('shortID')
webpage = self._download_webpage(url, short_id)
real_id = self._search_regex(r'xtpage = ".*-(.*?)";', webpage, 'real id')
video_info = self.download_video_info(real_id)
chapters = video_info['chapters']
first_chapter = chapters[0]
if real_id_for_chapter(first_chapter) != real_id:
self.to_screen('Multipart video detected')
chapter_urls = []
for chapter in chapters:
chapter_id = real_id_for_chapter(chapter)
# Yes, when we this chapter is processed by WatIE,
# it will download the info again
chapter_info = self.download_video_info(chapter_id)
chapter_urls.append(chapter_info['url'])
entries = [self.url_result(chapter_url) for chapter_url in chapter_urls]
return self.playlist_result(entries, real_id, video_info['title'])
# Otherwise we can continue and extract just one part, we have to use
# the short id for getting the video url
player_data = compat_urllib_parse.urlencode({'shortVideoId': short_id,
'html5': '1'})
player_info = self._download_webpage('http://www.wat.tv/player?' + player_data,
real_id, u'Downloading player info')
player = json.loads(player_info)['player']
html5_player = self._html_search_regex(r'iframe src="(.*?)"', player,
'html5 player')
player_webpage = self._download_webpage(html5_player, real_id,
u'Downloading player webpage')
video_url = self._search_regex(r'urlhtml5 : "(.*?)"', player_webpage,
'video url')
info = {'id': real_id,
'url': video_url,
'ext': 'mp4',
'title': first_chapter['title'],
'thumbnail': first_chapter['preview'],
'description': first_chapter['description'],
'view_count': video_info['views'],
}
if 'date_diffusion' in first_chapter:
info['upload_date'] = unified_strdate(first_chapter['date_diffusion'])
return info

View File

@ -0,0 +1,36 @@
import re
import base64
from .common import InfoExtractor
class WimpIE(InfoExtractor):
_VALID_URL = r'(?:http://)?(?:www\.)?wimp\.com/([^/]+)/'
_TEST = {
u'url': u'http://www.wimp.com/deerfence/',
u'file': u'deerfence.flv',
u'md5': u'8b215e2e0168c6081a1cf84b2846a2b5',
u'info_dict': {
u"title": u"Watch Till End: Herd of deer jump over a fence."
}
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group(1)
webpage = self._download_webpage(url, video_id)
title = self._search_regex(r'<meta name="description" content="(.+?)" />',webpage, 'video title')
thumbnail_url = self._search_regex(r'<meta property="og\:image" content="(.+?)" />', webpage,'video thumbnail')
googleString = self._search_regex("googleCode = '(.*?)'", webpage, 'file url')
googleString = base64.b64decode(googleString).decode('ascii')
final_url = self._search_regex('","(.*?)"', googleString,'final video url')
ext = final_url.rpartition(u'.')[2]
return [{
'id': video_id,
'url': final_url,
'ext': ext,
'title': title,
'thumbnail': thumbnail_url,
}]

View File

@ -5,7 +5,15 @@ from .common import InfoExtractor
class WorldStarHipHopIE(InfoExtractor): class WorldStarHipHopIE(InfoExtractor):
_VALID_URL = r'https?://(?:www|m)\.worldstar(?:candy|hiphop)\.com/videos/video\.php\?v=(?P<id>.*)' _VALID_URL = r'https?://(?:www|m)\.worldstar(?:candy|hiphop)\.com/videos/video\.php\?v=(?P<id>.*)'
IE_NAME = u'WorldStarHipHop' _TEST = {
"url": "http://www.worldstarhiphop.com/videos/video.php?v=wshh6a7q1ny0G34ZwuIO",
"file": "wshh6a7q1ny0G34ZwuIO.mp4",
"md5": "9d04de741161603bf7071bbf4e883186",
"info_dict": {
"title": "Video: KO Of The Week: MMA Fighter Gets Knocked Out By Swift Head Kick!"
}
}
def _real_extract(self, url): def _real_extract(self, url):
m = re.match(self._VALID_URL, url) m = re.match(self._VALID_URL, url)

View File

@ -11,6 +11,16 @@ from ..utils import (
class XHamsterIE(InfoExtractor): class XHamsterIE(InfoExtractor):
"""Information Extractor for xHamster""" """Information Extractor for xHamster"""
_VALID_URL = r'(?:http://)?(?:www.)?xhamster\.com/movies/(?P<id>[0-9]+)/.*\.html' _VALID_URL = r'(?:http://)?(?:www.)?xhamster\.com/movies/(?P<id>[0-9]+)/.*\.html'
_TEST = {
u'url': u'http://xhamster.com/movies/1509445/femaleagent_shy_beauty_takes_the_bait.html',
u'file': u'1509445.flv',
u'md5': u'9f48e0e8d58e3076bb236ff412ab62fa',
u'info_dict': {
u"upload_date": u"20121014",
u"uploader_id": u"Ruseful2011",
u"title": u"FemaleAgent Shy beauty takes the bait"
}
}
def _real_extract(self,url): def _real_extract(self,url):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)

View File

@ -13,6 +13,14 @@ class XNXXIE(InfoExtractor):
VIDEO_URL_RE = r'flv_url=(.*?)&amp;' VIDEO_URL_RE = r'flv_url=(.*?)&amp;'
VIDEO_TITLE_RE = r'<title>(.*?)\s+-\s+XNXX.COM' VIDEO_TITLE_RE = r'<title>(.*?)\s+-\s+XNXX.COM'
VIDEO_THUMB_RE = r'url_bigthumb=(.*?)&amp;' VIDEO_THUMB_RE = r'url_bigthumb=(.*?)&amp;'
_TEST = {
u'url': u'http://video.xnxx.com/video1135332/lida_naked_funny_actress_5_',
u'file': u'1135332.flv',
u'md5': u'0831677e2b4761795f68d417e0b7b445',
u'info_dict': {
u"title": u"lida \u00bb Naked Funny Actress (5)"
}
}
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)

View File

@ -8,6 +8,14 @@ from ..utils import (
class XVideosIE(InfoExtractor): class XVideosIE(InfoExtractor):
_VALID_URL = r'^(?:https?://)?(?:www\.)?xvideos\.com/video([0-9]+)(?:.*)' _VALID_URL = r'^(?:https?://)?(?:www\.)?xvideos\.com/video([0-9]+)(?:.*)'
_TEST = {
u'url': u'http://www.xvideos.com/video939581/funny_porns_by_s_-1',
u'file': u'939581.flv',
u'md5': u'1d0c835822f0a71a7bf011855db929d0',
u'info_dict': {
u"title": u"Funny Porns By >>>>S<<<<<< -1"
}
}
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)

View File

@ -13,6 +13,15 @@ from ..utils import (
class YahooIE(InfoExtractor): class YahooIE(InfoExtractor):
"""Information extractor for screen.yahoo.com.""" """Information extractor for screen.yahoo.com."""
_VALID_URL = r'http://screen\.yahoo\.com/.*?-(?P<id>\d*?)\.html' _VALID_URL = r'http://screen\.yahoo\.com/.*?-(?P<id>\d*?)\.html'
_TEST = {
u'url': u'http://screen.yahoo.com/julian-smith-travis-legg-watch-214727115.html',
u'file': u'214727115.flv',
u'md5': u'2e717f169c1be93d84d3794a00d4a325',
u'info_dict': {
u"title": u"Julian Smith & Travis Legg Watch Julian Smith"
},
u'skip': u'Requires rtmpdump'
}
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)

View File

@ -8,6 +8,14 @@ from ..utils import (
class YouJizzIE(InfoExtractor): class YouJizzIE(InfoExtractor):
_VALID_URL = r'^(?:https?://)?(?:\w+\.)?youjizz\.com/videos/(?P<videoid>[^.]+).html$' _VALID_URL = r'^(?:https?://)?(?:\w+\.)?youjizz\.com/videos/(?P<videoid>[^.]+).html$'
_TEST = {
u'url': u'http://www.youjizz.com/videos/zeichentrick-1-2189178.html',
u'file': u'2189178.flv',
u'md5': u'07e15fa469ba384c7693fd246905547c',
u'info_dict': {
u"title": u"Zeichentrick 1"
}
}
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)

View File

@ -1,3 +1,5 @@
# coding: utf-8
import json import json
import math import math
import random import random
@ -12,6 +14,16 @@ from ..utils import (
class YoukuIE(InfoExtractor): class YoukuIE(InfoExtractor):
_VALID_URL = r'(?:http://)?v\.youku\.com/v_show/id_(?P<ID>[A-Za-z0-9]+)\.html' _VALID_URL = r'(?:http://)?v\.youku\.com/v_show/id_(?P<ID>[A-Za-z0-9]+)\.html'
_TEST = {
u"url": u"http://v.youku.com/v_show/id_XNDgyMDQ2NTQw.html",
u"file": u"XNDgyMDQ2NTQw_part00.flv",
u"md5": u"ffe3f2e435663dc2d1eea34faeff5b5b",
u"params": { u"test": False },
u"info_dict": {
u"title": u"youtube-dl test video \"'/\\ä↭𝕐"
}
}
def _gen_sid(self): def _gen_sid(self):
nowTime = int(time.time() * 1000) nowTime = int(time.time() * 1000)

View File

@ -16,6 +16,17 @@ from ..utils import (
class YouPornIE(InfoExtractor): class YouPornIE(InfoExtractor):
_VALID_URL = r'^(?:https?://)?(?:\w+\.)?youporn\.com/watch/(?P<videoid>[0-9]+)/(?P<title>[^/]+)' _VALID_URL = r'^(?:https?://)?(?:\w+\.)?youporn\.com/watch/(?P<videoid>[0-9]+)/(?P<title>[^/]+)'
_TEST = {
u'url': u'http://www.youporn.com/watch/505835/sex-ed-is-it-safe-to-masturbate-daily/',
u'file': u'505835.mp4',
u'md5': u'c37ddbaaa39058c76a7e86c6813423c1',
u'info_dict': {
u"upload_date": u"20101221",
u"description": u"Love & Sex Answers: http://bit.ly/DanAndJenn -- Is It Unhealthy To Masturbate Daily?",
u"uploader": u"Ask Dan And Jennifer",
u"title": u"Sex Ed: Is It Safe To Masturbate Daily?"
}
}
def _print_formats(self, formats): def _print_formats(self, formats):
"""Print all available formats""" """Print all available formats"""

View File

@ -81,6 +81,44 @@ class YoutubeIE(InfoExtractor):
'46': '1080x1920', '46': '1080x1920',
} }
IE_NAME = u'youtube' IE_NAME = u'youtube'
_TESTS = [
{
u"url": u"http://www.youtube.com/watch?v=BaW_jenozKc",
u"file": u"BaW_jenozKc.mp4",
u"info_dict": {
u"title": u"youtube-dl test video \"'/\\ä↭𝕐",
u"uploader": u"Philipp Hagemeister",
u"uploader_id": u"phihag",
u"upload_date": u"20121002",
u"description": u"test chars: \"'/\\ä↭𝕐\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de ."
}
},
{
u"url": u"http://www.youtube.com/watch?v=1ltcDfZMA3U",
u"file": u"1ltcDfZMA3U.flv",
u"note": u"Test VEVO video (#897)",
u"info_dict": {
u"upload_date": u"20070518",
u"title": u"Maps - It Will Find You",
u"description": u"Music video by Maps performing It Will Find You.",
u"uploader": u"MuteUSA",
u"uploader_id": u"MuteUSA"
}
},
{
u"url": u"http://www.youtube.com/watch?v=UxxajLWwzqY",
u"file": u"UxxajLWwzqY.mp4",
u"note": u"Test generic use_cipher_signature video (#897)",
u"info_dict": {
u"upload_date": u"20120506",
u"title": u"Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]",
u"description": u"md5:b085c9804f5ab69f4adea963a2dceb3c",
u"uploader": u"IconaPop",
u"uploader_id": u"IconaPop"
}
}
]
@classmethod @classmethod
def suitable(cls, url): def suitable(cls, url):
@ -130,16 +168,25 @@ class YoutubeIE(InfoExtractor):
self.to_screen(u'RTMP download detected') self.to_screen(u'RTMP download detected')
def _decrypt_signature(self, s): def _decrypt_signature(self, s):
"""Decrypt the key the two subkeys must have a length of 43""" """Turn the encrypted s field into a working signature"""
(a,b) = s.split('.')
if len(a) != 43 or len(b) != 43: if len(s) == 88:
raise ExtractorError(u'Unable to decrypt signature, subkeys lengths %d.%d not supported; retrying might work' % (len(a), len(b))) return s[48] + s[81:67:-1] + s[82] + s[66:62:-1] + s[85] + s[61:48:-1] + s[67] + s[47:12:-1] + s[3] + s[11:3:-1] + s[2] + s[12]
if self._downloader.params.get('verbose'): elif len(s) == 87:
self.to_screen('encrypted signature length %d.%d' % (len(a), len(b))) return s[62] + s[82:62:-1] + s[83] + s[61:52:-1] + s[0] + s[51:2:-1]
b = ''.join([b[:8],a[0],b[9:18],b[-4],b[19:39], b[18]])[0:40] elif len(s) == 86:
a = a[-40:] return s[2:63] + s[82] + s[64:82] + s[63]
s_dec = '.'.join((a,b))[::-1] elif len(s) == 85:
return s_dec return s[76] + s[82:76:-1] + s[83] + s[75:60:-1] + s[0] + s[59:50:-1] + s[1] + s[49:2:-1]
elif len(s) == 84:
return s[83:36:-1] + s[2] + s[35:26:-1] + s[3] + s[25:3:-1] + s[26]
elif len(s) == 83:
return s[52] + s[81:55:-1] + s[2] + s[54:52:-1] + s[82] + s[51:36:-1] + s[55] + s[35:2:-1] + s[36]
elif len(s) == 82:
return s[36] + s[79:67:-1] + s[81] + s[66:40:-1] + s[33] + s[39:36:-1] + s[40] + s[35] + s[0] + s[67] + s[32:0:-1] + s[34]
else:
raise ExtractorError(u'Unable to decrypt signature, key length %d not supported; retrying might work' % (len(s)))
def _get_available_subtitles(self, video_id): def _get_available_subtitles(self, video_id):
self.report_video_subtitles_download(video_id) self.report_video_subtitles_download(video_id)
@ -509,6 +556,12 @@ class YoutubeIE(InfoExtractor):
if 'sig' in url_data: if 'sig' in url_data:
url += '&signature=' + url_data['sig'][0] url += '&signature=' + url_data['sig'][0]
elif 's' in url_data: elif 's' in url_data:
if self._downloader.params.get('verbose'):
s = url_data['s'][0]
player = self._search_regex(r'html5player-(.+?)\.js', video_webpage,
'html5 player', fatal=False)
self.to_screen('encrypted signature length %d (%d.%d), itag %s, html5 player %s' %
(len(s), len(s.split('.')[0]), len(s.split('.')[1]), url_data['itag'][0], player))
signature = self._decrypt_signature(url_data['s'][0]) signature = self._decrypt_signature(url_data['s'][0])
url += '&signature=' + signature url += '&signature=' + signature
if 'ratebypass' not in url: if 'ratebypass' not in url:

View File

@ -474,7 +474,7 @@ class ExtractorError(Exception):
""" tb, if given, is the original traceback (so that it can be printed out). """ """ tb, if given, is the original traceback (so that it can be printed out). """
if not sys.exc_info()[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError): if not sys.exc_info()[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError):
msg = msg + u'; please report this issue on http://yt-dl.org/bug' msg = msg + u'; please report this issue on https://yt-dl.org/bug . Be sure to call youtube-dl with the --verbose flag and include its complete output.'
super(ExtractorError, self).__init__(msg) super(ExtractorError, self).__init__(msg)
self.traceback = tb self.traceback = tb

View File

@ -1,2 +1,2 @@
__version__ = '2013.06.33' __version__ = '2013.06.34.4'