Merge branch 'master' into use-other-downloaders

This commit is contained in:
Rogério Brito 2013-10-17 13:59:09 -03:00
commit 7212863883
47 changed files with 1446 additions and 264 deletions

View File

@ -13,13 +13,13 @@ PYTHON=/usr/bin/env python
# set SYSCONFDIR to /etc if PREFIX=/usr or PREFIX=/usr/local # set SYSCONFDIR to /etc if PREFIX=/usr or PREFIX=/usr/local
ifeq ($(PREFIX),/usr) ifeq ($(PREFIX),/usr)
SYSCONFDIR=/etc SYSCONFDIR=/etc
else else
ifeq ($(PREFIX),/usr/local) ifeq ($(PREFIX),/usr/local)
SYSCONFDIR=/etc SYSCONFDIR=/etc
else else
SYSCONFDIR=$(PREFIX)/etc SYSCONFDIR=$(PREFIX)/etc
endif endif
endif endif
install: youtube-dl youtube-dl.1 youtube-dl.bash-completion install: youtube-dl youtube-dl.1 youtube-dl.bash-completion
@ -71,6 +71,7 @@ youtube-dl.tar.gz: youtube-dl README.md README.txt youtube-dl.1 youtube-dl.bash-
--exclude '*~' \ --exclude '*~' \
--exclude '__pycache' \ --exclude '__pycache' \
--exclude '.git' \ --exclude '.git' \
--exclude 'testdata' \
-- \ -- \
bin devscripts test youtube_dl \ bin devscripts test youtube_dl \
CHANGELOG LICENSE README.md README.txt \ CHANGELOG LICENSE README.md README.txt \

View File

@ -57,9 +57,10 @@ which means you can modify it, redistribute it or use it however you like.
file. Record all downloaded videos in it. file. Record all downloaded videos in it.
## Download Options: ## Download Options:
-r, --rate-limit LIMIT maximum download rate (e.g. 50k or 44.6m) -r, --rate-limit LIMIT maximum download rate in bytes per second (e.g.
50K or 4.2M)
-R, --retries RETRIES number of retries (default is 10) -R, --retries RETRIES number of retries (default is 10)
--buffer-size SIZE size of download buffer (e.g. 1024 or 16k) --buffer-size SIZE size of download buffer (e.g. 1024 or 16K)
(default is 1024) (default is 1024)
--no-resize-buffer do not automatically adjust the buffer size. By --no-resize-buffer do not automatically adjust the buffer size. By
default, the buffer size is automatically resized default, the buffer size is automatically resized
@ -100,6 +101,7 @@ which means you can modify it, redistribute it or use it however you like.
file modification time file modification time
--write-description write video description to a .description file --write-description write video description to a .description file
--write-info-json write video metadata to a .info.json file --write-info-json write video metadata to a .info.json file
--write-annotations write video annotations to a .annotation file
--write-thumbnail write thumbnail image to disk --write-thumbnail write thumbnail image to disk
## Verbosity / Simulation Options: ## Verbosity / Simulation Options:
@ -166,6 +168,7 @@ which means you can modify it, redistribute it or use it however you like.
processed files are overwritten by default processed files are overwritten by default
--embed-subs embed subtitles in the video (only for mp4 --embed-subs embed subtitles in the video (only for mp4
videos) videos)
--add-metadata add metadata to the files
# CONFIGURATION # CONFIGURATION

View File

@ -16,10 +16,11 @@ def main():
ie_htmls = [] ie_htmls = []
for ie in sorted(youtube_dl.gen_extractors(), key=lambda i: i.IE_NAME.lower()): for ie in sorted(youtube_dl.gen_extractors(), key=lambda i: i.IE_NAME.lower()):
ie_html = '<b>{}</b>'.format(ie.IE_NAME) ie_html = '<b>{}</b>'.format(ie.IE_NAME)
try: ie_desc = getattr(ie, 'IE_DESC', None)
if ie_desc is False:
continue
elif ie_desc is not None:
ie_html += ': {}'.format(ie.IE_DESC) ie_html += ': {}'.format(ie.IE_DESC)
except AttributeError:
pass
if ie.working() == False: if ie.working() == False:
ie_html += ' (Currently broken)' ie_html += ' (Currently broken)'
ie_htmls.append('<li>{}</li>'.format(ie_html)) ie_htmls.append('<li>{}</li>'.format(ie_html))

0
test/__init__.py Normal file
View File

View File

@ -1,22 +1,27 @@
import errno import errno
import io import io
import hashlib
import json import json
import os.path import os.path
import re import re
import types import types
import youtube_dl.extractor import youtube_dl.extractor
from youtube_dl import YoutubeDL, YoutubeDLHandler from youtube_dl import YoutubeDL
from youtube_dl.utils import (
compat_cookiejar,
compat_urllib_request,
)
youtube_dl._setup_opener(timeout=10)
PARAMETERS_FILE = os.path.join(os.path.dirname(os.path.abspath(__file__)), "parameters.json") def global_setup():
with io.open(PARAMETERS_FILE, encoding='utf-8') as pf: youtube_dl._setup_opener(timeout=10)
parameters = json.load(pf)
def get_params(override=None):
PARAMETERS_FILE = os.path.join(os.path.dirname(os.path.abspath(__file__)),
"parameters.json")
with io.open(PARAMETERS_FILE, encoding='utf-8') as pf:
parameters = json.load(pf)
if override:
parameters.update(override)
return parameters
def try_rm(filename): def try_rm(filename):
@ -32,7 +37,7 @@ class FakeYDL(YoutubeDL):
def __init__(self): def __init__(self):
# Different instances of the downloader can't share the same dictionary # Different instances of the downloader can't share the same dictionary
# some test set the "sublang" parameter, which would break the md5 checks. # some test set the "sublang" parameter, which would break the md5 checks.
params = dict(parameters) params = get_params()
super(FakeYDL, self).__init__(params) super(FakeYDL, self).__init__(params)
self.result = [] self.result = []
@ -62,3 +67,6 @@ def get_testcases():
for t in getattr(ie, '_TESTS', []): for t in getattr(ie, '_TESTS', []):
t['name'] = type(ie).__name__[:-len('IE')] t['name'] = type(ie).__name__[:-len('IE')]
yield t yield t
md5 = lambda s: hashlib.md5(s.encode('utf-8')).hexdigest()

View File

@ -1,14 +1,16 @@
#!/usr/bin/env python #!/usr/bin/env python
import sys
import unittest
# Allow direct execution # Allow direct execution
import os import os
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) import sys
import unittest
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from test.helper import global_setup, try_rm
global_setup()
from youtube_dl import YoutubeDL from youtube_dl import YoutubeDL
from helper import try_rm
def _download_restricted(url, filename, age): def _download_restricted(url, filename, age):

View File

@ -1,14 +1,20 @@
#!/usr/bin/env python #!/usr/bin/env python
import sys
import unittest
# Allow direct execution # Allow direct execution
import os import os
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) import sys
import unittest
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from test.helper import get_testcases
from youtube_dl.extractor import (
gen_extractors,
JustinTVIE,
YoutubeIE,
)
from youtube_dl.extractor import YoutubeIE, YoutubePlaylistIE, YoutubeChannelIE, JustinTVIE, gen_extractors
from helper import get_testcases
class TestAllURLsMatching(unittest.TestCase): class TestAllURLsMatching(unittest.TestCase):
def setUp(self): def setUp(self):

View File

@ -1,18 +1,16 @@
#!/usr/bin/env python #!/usr/bin/env python
import sys
import unittest
import hashlib
# Allow direct execution # Allow direct execution
import os import os
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) import sys
import unittest
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from test.helper import FakeYDL, global_setup, md5
global_setup()
from youtube_dl.extractor import DailymotionIE from youtube_dl.extractor import DailymotionIE
from youtube_dl.utils import *
from helper import FakeYDL
md5 = lambda s: hashlib.md5(s.encode('utf-8')).hexdigest()
class TestDailymotionSubtitles(unittest.TestCase): class TestDailymotionSubtitles(unittest.TestCase):
def setUp(self): def setUp(self):

View File

@ -1,26 +1,31 @@
#!/usr/bin/env python #!/usr/bin/env python
# Allow direct execution
import os
import sys
import unittest
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from test.helper import get_params, get_testcases, global_setup, try_rm, md5
global_setup()
import hashlib import hashlib
import io import io
import os
import json import json
import unittest
import sys
import socket import socket
import binascii
# Allow direct execution
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
import youtube_dl.YoutubeDL import youtube_dl.YoutubeDL
from youtube_dl.utils import * from youtube_dl.utils import (
compat_str,
PARAMETERS_FILE = os.path.join(os.path.dirname(os.path.abspath(__file__)), "parameters.json") compat_urllib_error,
DownloadError,
ExtractorError,
UnavailableVideoError,
)
RETRIES = 3 RETRIES = 3
md5 = lambda s: hashlib.md5(s.encode('utf-8')).hexdigest()
class YoutubeDL(youtube_dl.YoutubeDL): class YoutubeDL(youtube_dl.YoutubeDL):
def __init__(self, *args, **kwargs): def __init__(self, *args, **kwargs):
self.to_stderr = self.to_screen self.to_stderr = self.to_screen
@ -37,18 +42,12 @@ def _file_md5(fn):
with open(fn, 'rb') as f: with open(fn, 'rb') as f:
return hashlib.md5(f.read()).hexdigest() return hashlib.md5(f.read()).hexdigest()
import helper # Set up remaining global configuration
from helper import get_testcases, try_rm
defs = get_testcases() defs = get_testcases()
with io.open(PARAMETERS_FILE, encoding='utf-8') as pf:
parameters = json.load(pf)
class TestDownload(unittest.TestCase): class TestDownload(unittest.TestCase):
maxDiff = None maxDiff = None
def setUp(self): def setUp(self):
self.parameters = parameters
self.defs = defs self.defs = defs
### Dynamically generate tests ### Dynamically generate tests
@ -68,8 +67,7 @@ def generator(test_case):
print_skipping(test_case['skip']) print_skipping(test_case['skip'])
return return
params = self.parameters.copy() params = get_params(test_case.get('params', {}))
params.update(test_case.get('params', {}))
ydl = YoutubeDL(params) ydl = YoutubeDL(params)
ydl.add_default_info_extractors() ydl.add_default_info_extractors()

View File

@ -1,13 +1,16 @@
#!/usr/bin/env python #!/usr/bin/env python
# encoding: utf-8 # encoding: utf-8
import sys
import unittest
import json
# Allow direct execution # Allow direct execution
import os import os
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) import sys
import unittest
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from test.helper import FakeYDL, global_setup
global_setup()
from youtube_dl.extractor import ( from youtube_dl.extractor import (
DailymotionPlaylistIE, DailymotionPlaylistIE,
@ -16,10 +19,9 @@ from youtube_dl.extractor import (
UstreamChannelIE, UstreamChannelIE,
SoundcloudUserIE, SoundcloudUserIE,
LivestreamIE, LivestreamIE,
NHLVideocenterIE,
) )
from youtube_dl.utils import *
from helper import FakeYDL
class TestPlaylists(unittest.TestCase): class TestPlaylists(unittest.TestCase):
def assertIsPlaylist(self, info): def assertIsPlaylist(self, info):
@ -74,5 +76,14 @@ class TestPlaylists(unittest.TestCase):
self.assertEqual(result['title'], u'TEDCity2.0 (English)') self.assertEqual(result['title'], u'TEDCity2.0 (English)')
self.assertTrue(len(result['entries']) >= 4) self.assertTrue(len(result['entries']) >= 4)
def test_nhl_videocenter(self):
dl = FakeYDL()
ie = NHLVideocenterIE(dl)
result = ie.extract('http://video.canucks.nhl.com/videocenter/console?catid=999')
self.assertIsPlaylist(result)
self.assertEqual(result['id'], u'999')
self.assertEqual(result['title'], u'Highlights')
self.assertEqual(len(result['entries']), 12)
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()

View File

@ -1,14 +1,15 @@
#!/usr/bin/env python #!/usr/bin/env python
# coding: utf-8
# Various small unit tests
import sys
import unittest
import xml.etree.ElementTree
# Allow direct execution # Allow direct execution
import os import os
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) import sys
import unittest
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
# Various small unit tests
import xml.etree.ElementTree
#from youtube_dl.utils import htmlentity_transform #from youtube_dl.utils import htmlentity_transform
from youtube_dl.utils import ( from youtube_dl.utils import (
@ -20,6 +21,9 @@ from youtube_dl.utils import (
unified_strdate, unified_strdate,
find_xpath_attr, find_xpath_attr,
get_meta_content, get_meta_content,
xpath_with_ns,
smuggle_url,
unsmuggle_url,
) )
if sys.version_info < (3, 0): if sys.version_info < (3, 0):
@ -141,5 +145,31 @@ class TestUtil(unittest.TestCase):
self.assertEqual(get_meta('description'), u'foo & bar') self.assertEqual(get_meta('description'), u'foo & bar')
self.assertEqual(get_meta('author'), 'Plato') self.assertEqual(get_meta('author'), 'Plato')
def test_xpath_with_ns(self):
testxml = u'''<root xmlns:media="http://example.com/">
<media:song>
<media:author>The Author</media:author>
<url>http://server.com/download.mp3</url>
</media:song>
</root>'''
doc = xml.etree.ElementTree.fromstring(testxml)
find = lambda p: doc.find(xpath_with_ns(p, {'media': 'http://example.com/'}))
self.assertTrue(find('media:song') is not None)
self.assertEqual(find('media:song/media:author').text, u'The Author')
self.assertEqual(find('media:song/url').text, u'http://server.com/download.mp3')
def test_smuggle_url(self):
data = {u"ö": u"ö", u"abc": [3]}
url = 'https://foo.bar/baz?x=y#a'
smug_url = smuggle_url(url, data)
unsmug_url, unsmug_data = unsmuggle_url(smug_url)
self.assertEqual(url, unsmug_url)
self.assertEqual(data, unsmug_data)
res_url, res_data = unsmuggle_url(url)
self.assertEqual(res_url, url)
self.assertEqual(res_data, None)
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()

View File

@ -0,0 +1,80 @@
#!/usr/bin/env python
# coding: utf-8
# Allow direct execution
import os
import sys
import unittest
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from test.helper import get_params, global_setup, try_rm
global_setup()
import io
import xml.etree.ElementTree
import youtube_dl.YoutubeDL
import youtube_dl.extractor
class YoutubeDL(youtube_dl.YoutubeDL):
def __init__(self, *args, **kwargs):
super(YoutubeDL, self).__init__(*args, **kwargs)
self.to_stderr = self.to_screen
params = get_params({
'writeannotations': True,
'skip_download': True,
'writeinfojson': False,
'format': 'flv',
})
TEST_ID = 'gr51aVj-mLg'
ANNOTATIONS_FILE = TEST_ID + '.flv.annotations.xml'
EXPECTED_ANNOTATIONS = ['Speech bubble', 'Note', 'Title', 'Spotlight', 'Label']
class TestAnnotations(unittest.TestCase):
def setUp(self):
# Clear old files
self.tearDown()
def test_info_json(self):
expected = list(EXPECTED_ANNOTATIONS) #Two annotations could have the same text.
ie = youtube_dl.extractor.YoutubeIE()
ydl = YoutubeDL(params)
ydl.add_info_extractor(ie)
ydl.download([TEST_ID])
self.assertTrue(os.path.exists(ANNOTATIONS_FILE))
annoxml = None
with io.open(ANNOTATIONS_FILE, 'r', encoding='utf-8') as annof:
annoxml = xml.etree.ElementTree.parse(annof)
self.assertTrue(annoxml is not None, 'Failed to parse annotations XML')
root = annoxml.getroot()
self.assertEqual(root.tag, 'document')
annotationsTag = root.find('annotations')
self.assertEqual(annotationsTag.tag, 'annotations')
annotations = annotationsTag.findall('annotation')
#Not all the annotations have TEXT children and the annotations are returned unsorted.
for a in annotations:
self.assertEqual(a.tag, 'annotation')
if a.get('type') == 'text':
textTag = a.find('TEXT')
text = textTag.text
self.assertTrue(text in expected) #assertIn only added in python 2.7
#remove the first occurance, there could be more than one annotation with the same text
expected.remove(text)
#We should have seen (and removed) all the expected annotation texts.
self.assertEqual(len(expected), 0, 'Not all expected annotations were found.')
def tearDown(self):
try_rm(ANNOTATIONS_FILE)
if __name__ == '__main__':
unittest.main()

View File

@ -1,37 +1,34 @@
#!/usr/bin/env python #!/usr/bin/env python
# coding: utf-8 # coding: utf-8
import json # Allow direct execution
import os import os
import sys import sys
import unittest import unittest
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
# Allow direct execution from test.helper import get_params, global_setup
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) global_setup()
import io
import json
import youtube_dl.YoutubeDL import youtube_dl.YoutubeDL
import youtube_dl.extractor import youtube_dl.extractor
from youtube_dl.utils import *
PARAMETERS_FILE = os.path.join(os.path.dirname(os.path.abspath(__file__)), "parameters.json")
# General configuration (from __init__, not very elegant...)
jar = compat_cookiejar.CookieJar()
cookie_processor = compat_urllib_request.HTTPCookieProcessor(jar)
proxy_handler = compat_urllib_request.ProxyHandler()
opener = compat_urllib_request.build_opener(proxy_handler, cookie_processor, YoutubeDLHandler())
compat_urllib_request.install_opener(opener)
class YoutubeDL(youtube_dl.YoutubeDL): class YoutubeDL(youtube_dl.YoutubeDL):
def __init__(self, *args, **kwargs): def __init__(self, *args, **kwargs):
super(YoutubeDL, self).__init__(*args, **kwargs) super(YoutubeDL, self).__init__(*args, **kwargs)
self.to_stderr = self.to_screen self.to_stderr = self.to_screen
with io.open(PARAMETERS_FILE, encoding='utf-8') as pf: params = get_params({
params = json.load(pf) 'writeinfojson': True,
params['writeinfojson'] = True 'skip_download': True,
params['skip_download'] = True 'writedescription': True,
params['writedescription'] = True })
TEST_ID = 'BaW_jenozKc' TEST_ID = 'BaW_jenozKc'
INFO_JSON_FILE = TEST_ID + '.mp4.info.json' INFO_JSON_FILE = TEST_ID + '.mp4.info.json'
@ -42,6 +39,7 @@ This is a test video for youtube-dl.
For more information, contact phihag@phihag.de .''' For more information, contact phihag@phihag.de .'''
class TestInfoJSON(unittest.TestCase): class TestInfoJSON(unittest.TestCase):
def setUp(self): def setUp(self):
# Clear old files # Clear old files

View File

@ -1,20 +1,26 @@
#!/usr/bin/env python #!/usr/bin/env python
import sys
import unittest
import json
# Allow direct execution # Allow direct execution
import os import os
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) import sys
import unittest
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from youtube_dl.extractor import YoutubeUserIE, YoutubePlaylistIE, YoutubeIE, YoutubeChannelIE, YoutubeShowIE from test.helper import FakeYDL, global_setup
from youtube_dl.utils import * global_setup()
from youtube_dl.extractor import (
YoutubeUserIE,
YoutubePlaylistIE,
YoutubeIE,
YoutubeChannelIE,
YoutubeShowIE,
)
from helper import FakeYDL
class TestYoutubeLists(unittest.TestCase): class TestYoutubeLists(unittest.TestCase):
def assertIsPlaylist(self,info): def assertIsPlaylist(self, info):
"""Make sure the info has '_type' set to 'playlist'""" """Make sure the info has '_type' set to 'playlist'"""
self.assertEqual(info['_type'], 'playlist') self.assertEqual(info['_type'], 'playlist')
@ -100,7 +106,7 @@ class TestYoutubeLists(unittest.TestCase):
dl = FakeYDL() dl = FakeYDL()
ie = YoutubeShowIE(dl) ie = YoutubeShowIE(dl)
result = ie.extract('http://www.youtube.com/show/airdisasters') result = ie.extract('http://www.youtube.com/show/airdisasters')
self.assertTrue(len(result) >= 4) self.assertTrue(len(result) >= 3)
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()

View File

@ -1,14 +1,18 @@
#!/usr/bin/env python #!/usr/bin/env python
# Allow direct execution
import os
import sys
import unittest
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from test.helper import global_setup
global_setup()
import io import io
import re import re
import string import string
import sys
import unittest
# Allow direct execution
import os
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from youtube_dl.extractor import YoutubeIE from youtube_dl.extractor import YoutubeIE
from youtube_dl.utils import compat_str, compat_urlretrieve from youtube_dl.utils import compat_str, compat_urlretrieve

View File

@ -1,69 +1,79 @@
#!/usr/bin/env python #!/usr/bin/env python
import sys
import unittest
import hashlib
# Allow direct execution # Allow direct execution
import os import os
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) import sys
import unittest
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from test.helper import FakeYDL, global_setup, md5
global_setup()
from youtube_dl.extractor import YoutubeIE from youtube_dl.extractor import YoutubeIE
from youtube_dl.utils import *
from helper import FakeYDL
md5 = lambda s: hashlib.md5(s.encode('utf-8')).hexdigest()
class TestYoutubeSubtitles(unittest.TestCase): class TestYoutubeSubtitles(unittest.TestCase):
def setUp(self): def setUp(self):
self.DL = FakeYDL() self.DL = FakeYDL()
self.url = 'QRS8MkLhQmM' self.url = 'QRS8MkLhQmM'
def getInfoDict(self): def getInfoDict(self):
IE = YoutubeIE(self.DL) IE = YoutubeIE(self.DL)
info_dict = IE.extract(self.url) info_dict = IE.extract(self.url)
return info_dict return info_dict
def getSubtitles(self): def getSubtitles(self):
info_dict = self.getInfoDict() info_dict = self.getInfoDict()
return info_dict[0]['subtitles'] return info_dict[0]['subtitles']
def test_youtube_no_writesubtitles(self): def test_youtube_no_writesubtitles(self):
self.DL.params['writesubtitles'] = False self.DL.params['writesubtitles'] = False
subtitles = self.getSubtitles() subtitles = self.getSubtitles()
self.assertEqual(subtitles, None) self.assertEqual(subtitles, None)
def test_youtube_subtitles(self): def test_youtube_subtitles(self):
self.DL.params['writesubtitles'] = True self.DL.params['writesubtitles'] = True
subtitles = self.getSubtitles() subtitles = self.getSubtitles()
self.assertEqual(md5(subtitles['en']), '4cd9278a35ba2305f47354ee13472260') self.assertEqual(md5(subtitles['en']), '4cd9278a35ba2305f47354ee13472260')
def test_youtube_subtitles_lang(self): def test_youtube_subtitles_lang(self):
self.DL.params['writesubtitles'] = True self.DL.params['writesubtitles'] = True
self.DL.params['subtitleslangs'] = ['it'] self.DL.params['subtitleslangs'] = ['it']
subtitles = self.getSubtitles() subtitles = self.getSubtitles()
self.assertEqual(md5(subtitles['it']), '164a51f16f260476a05b50fe4c2f161d') self.assertEqual(md5(subtitles['it']), '164a51f16f260476a05b50fe4c2f161d')
def test_youtube_allsubtitles(self): def test_youtube_allsubtitles(self):
self.DL.params['writesubtitles'] = True self.DL.params['writesubtitles'] = True
self.DL.params['allsubtitles'] = True self.DL.params['allsubtitles'] = True
subtitles = self.getSubtitles() subtitles = self.getSubtitles()
self.assertEqual(len(subtitles.keys()), 13) self.assertEqual(len(subtitles.keys()), 13)
def test_youtube_subtitles_sbv_format(self): def test_youtube_subtitles_sbv_format(self):
self.DL.params['writesubtitles'] = True self.DL.params['writesubtitles'] = True
self.DL.params['subtitlesformat'] = 'sbv' self.DL.params['subtitlesformat'] = 'sbv'
subtitles = self.getSubtitles() subtitles = self.getSubtitles()
self.assertEqual(md5(subtitles['en']), '13aeaa0c245a8bed9a451cb643e3ad8b') self.assertEqual(md5(subtitles['en']), '13aeaa0c245a8bed9a451cb643e3ad8b')
def test_youtube_subtitles_vtt_format(self): def test_youtube_subtitles_vtt_format(self):
self.DL.params['writesubtitles'] = True self.DL.params['writesubtitles'] = True
self.DL.params['subtitlesformat'] = 'vtt' self.DL.params['subtitlesformat'] = 'vtt'
subtitles = self.getSubtitles() subtitles = self.getSubtitles()
self.assertEqual(md5(subtitles['en']), '356cdc577fde0c6783b9b822e7206ff7') self.assertEqual(md5(subtitles['en']), '356cdc577fde0c6783b9b822e7206ff7')
def test_youtube_list_subtitles(self): def test_youtube_list_subtitles(self):
self.DL.expect_warning(u'Video doesn\'t have automatic captions') self.DL.expect_warning(u'Video doesn\'t have automatic captions')
self.DL.params['listsubtitles'] = True self.DL.params['listsubtitles'] = True
info_dict = self.getInfoDict() info_dict = self.getInfoDict()
self.assertEqual(info_dict, None) self.assertEqual(info_dict, None)
def test_youtube_automatic_captions(self): def test_youtube_automatic_captions(self):
self.url = '8YoUxe5ncPo' self.url = '8YoUxe5ncPo'
self.DL.params['writeautomaticsub'] = True self.DL.params['writeautomaticsub'] = True
self.DL.params['subtitleslangs'] = ['it'] self.DL.params['subtitleslangs'] = ['it']
subtitles = self.getSubtitles() subtitles = self.getSubtitles()
self.assertTrue(subtitles['it'] is not None) self.assertTrue(subtitles['it'] is not None)
def test_youtube_nosubtitles(self): def test_youtube_nosubtitles(self):
self.DL.expect_warning(u'video doesn\'t have subtitles') self.DL.expect_warning(u'video doesn\'t have subtitles')
self.url = 'sAjKT8FhjI8' self.url = 'sAjKT8FhjI8'
@ -71,6 +81,7 @@ class TestYoutubeSubtitles(unittest.TestCase):
self.DL.params['allsubtitles'] = True self.DL.params['allsubtitles'] = True
subtitles = self.getSubtitles() subtitles = self.getSubtitles()
self.assertEqual(len(subtitles), 0) self.assertEqual(len(subtitles), 0)
def test_youtube_multiple_langs(self): def test_youtube_multiple_langs(self):
self.url = 'QRS8MkLhQmM' self.url = 'QRS8MkLhQmM'
self.DL.params['writesubtitles'] = True self.DL.params['writesubtitles'] = True

View File

@ -1,5 +1,8 @@
[tox] [tox]
envlist = py26,py27,py33 envlist = py26,py27,py33
[testenv] [testenv]
deps = nose deps =
commands = nosetests --with-coverage --cover-package=youtube_dl --cover-html --verbose test nose
coverage
commands = nosetests --verbose {posargs:test} # --with-coverage --cover-package=youtube_dl --cover-html
# test.test_download:TestDownload.test_NowVideo

View File

@ -270,6 +270,7 @@ class FileDownloader(object):
def _download_with_rtmpdump(self, filename, url, player_url, page_url, play_path, tc_url): def _download_with_rtmpdump(self, filename, url, player_url, page_url, play_path, tc_url):
self.report_destination(filename) self.report_destination(filename)
tmpfilename = self.temp_name(filename) tmpfilename = self.temp_name(filename)
test = self.params.get('test', False)
# Check for rtmpdump first # Check for rtmpdump first
try: try:
@ -291,6 +292,8 @@ class FileDownloader(object):
basic_args += ['--playpath', play_path] basic_args += ['--playpath', play_path]
if tc_url is not None: if tc_url is not None:
basic_args += ['--tcUrl', url] basic_args += ['--tcUrl', url]
if test:
basic_args += ['--stop', '1']
args = basic_args + [[], ['--resume', '--skip', '1']][self.params.get('continuedl', False)] args = basic_args + [[], ['--resume', '--skip', '1']][self.params.get('continuedl', False)]
if self.params.get('verbose', False): if self.params.get('verbose', False):
try: try:
@ -300,7 +303,7 @@ class FileDownloader(object):
shell_quote = repr shell_quote = repr
self.to_screen(u'[debug] rtmpdump command line: ' + shell_quote(args)) self.to_screen(u'[debug] rtmpdump command line: ' + shell_quote(args))
retval = subprocess.call(args) retval = subprocess.call(args)
while retval == 2 or retval == 1: while (retval == 2 or retval == 1) and not test:
prevsize = os.path.getsize(encodeFilename(tmpfilename)) prevsize = os.path.getsize(encodeFilename(tmpfilename))
self.to_screen(u'\r[rtmpdump] %s bytes' % prevsize, skip_eol=True) self.to_screen(u'\r[rtmpdump] %s bytes' % prevsize, skip_eol=True)
time.sleep(5.0) # This seems to be needed time.sleep(5.0) # This seems to be needed
@ -313,7 +316,7 @@ class FileDownloader(object):
self.to_screen(u'\r[rtmpdump] Could not download the whole video. This can happen for some advertisements.') self.to_screen(u'\r[rtmpdump] Could not download the whole video. This can happen for some advertisements.')
retval = 0 retval = 0
break break
if retval == 0: if retval == 0 or (test and retval == 2):
fsize = os.path.getsize(encodeFilename(tmpfilename)) fsize = os.path.getsize(encodeFilename(tmpfilename))
self.to_screen(u'\r[rtmpdump] %s bytes' % fsize) self.to_screen(u'\r[rtmpdump] %s bytes' % fsize)
self.try_rename(tmpfilename, filename) self.try_rename(tmpfilename, filename)

View File

@ -3,7 +3,14 @@ import subprocess
import sys import sys
import time import time
from .utils import *
from .utils import (
compat_subprocess_get_DEVNULL,
encodeFilename,
PostProcessingError,
shell_quote,
subtitles_filename,
)
class PostProcessor(object): class PostProcessor(object):
@ -82,6 +89,8 @@ class FFmpegPostProcessor(PostProcessor):
+ opts + + opts +
[encodeFilename(self._ffmpeg_filename_argument(out_path))]) [encodeFilename(self._ffmpeg_filename_argument(out_path))])
if self._downloader.params.get('verbose', False):
self._downloader.to_screen(u'[debug] ffmpeg command line: %s' % shell_quote(cmd))
p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
stdout,stderr = p.communicate() stdout,stderr = p.communicate()
if p.returncode != 0: if p.returncode != 0:
@ -177,7 +186,8 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor):
extension = self._preferredcodec extension = self._preferredcodec
more_opts = [] more_opts = []
if self._preferredquality is not None: if self._preferredquality is not None:
if int(self._preferredquality) < 10: # The opus codec doesn't support the -aq option
if int(self._preferredquality) < 10 and extension != 'opus':
more_opts += [self._exes['avconv'] and '-q:a' or '-aq', self._preferredquality] more_opts += [self._exes['avconv'] and '-q:a' or '-aq', self._preferredquality]
else: else:
more_opts += [self._exes['avconv'] and '-b:a' or '-ab', self._preferredquality + 'k'] more_opts += [self._exes['avconv'] and '-b:a' or '-ab', self._preferredquality + 'k']
@ -467,3 +477,35 @@ class FFmpegEmbedSubtitlePP(FFmpegPostProcessor):
os.rename(encodeFilename(temp_filename), encodeFilename(filename)) os.rename(encodeFilename(temp_filename), encodeFilename(filename))
return True, information return True, information
class FFmpegMetadataPP(FFmpegPostProcessor):
def run(self, info):
metadata = {}
if info.get('title') is not None:
metadata['title'] = info['title']
if info.get('upload_date') is not None:
metadata['date'] = info['upload_date']
if info.get('uploader') is not None:
metadata['artist'] = info['uploader']
elif info.get('uploader_id') is not None:
metadata['artist'] = info['uploader_id']
if not metadata:
self._downloader.to_screen(u'[ffmpeg] There isn\'t any metadata to add')
return True, info
filename = info['filepath']
ext = os.path.splitext(filename)[1][1:]
temp_filename = filename + u'.temp'
options = ['-c', 'copy']
for (name, value) in metadata.items():
options.extend(['-metadata', '%s="%s"' % (name, value)])
options.extend(['-f', ext])
self._downloader.to_screen(u'[ffmpeg] Adding metadata to \'%s\'' % filename)
self.run_ffmpeg(filename, temp_filename, options)
os.remove(encodeFilename(filename))
os.rename(encodeFilename(temp_filename), encodeFilename(filename))
return True, info

View File

@ -71,6 +71,7 @@ class YoutubeDL(object):
logtostderr: Log messages to stderr instead of stdout. logtostderr: Log messages to stderr instead of stdout.
writedescription: Write the video description to a .description file writedescription: Write the video description to a .description file
writeinfojson: Write the video description to a .info.json file writeinfojson: Write the video description to a .info.json file
writeannotations: Write the video annotations to a .annotations.xml file
writethumbnail: Write the thumbnail image to a file writethumbnail: Write the thumbnail image to a file
writesubtitles: Write the video subtitles to a file writesubtitles: Write the video subtitles to a file
writeautomaticsub: Write the automatic subtitles to a file writeautomaticsub: Write the automatic subtitles to a file
@ -119,7 +120,7 @@ class YoutubeDL(object):
and not params['restrictfilenames']): and not params['restrictfilenames']):
# On Python 3, the Unicode filesystem API will throw errors (#1474) # On Python 3, the Unicode filesystem API will throw errors (#1474)
self.report_warning( self.report_warning(
u'Assuming --restrict-filenames isnce file system encoding ' u'Assuming --restrict-filenames since file system encoding '
u'cannot encode all charactes. ' u'cannot encode all charactes. '
u'Set the LC_ALL environment variable to fix this.') u'Set the LC_ALL environment variable to fix this.')
params['restrictfilenames'] = True params['restrictfilenames'] = True
@ -258,6 +259,10 @@ class YoutubeDL(object):
""" Report that the metadata file has been written """ """ Report that the metadata file has been written """
self.to_screen(u'[info] Video description metadata as JSON to: ' + infofn) self.to_screen(u'[info] Video description metadata as JSON to: ' + infofn)
def report_writeannotations(self, annofn):
""" Report that the annotations file has been written. """
self.to_screen(u'[info] Writing video annotations to: ' + annofn)
def report_file_already_downloaded(self, file_name): def report_file_already_downloaded(self, file_name):
"""Report file has already been fully downloaded.""" """Report file has already been fully downloaded."""
try: try:
@ -522,6 +527,18 @@ class YoutubeDL(object):
self.report_error(u'Cannot write description file ' + descfn) self.report_error(u'Cannot write description file ' + descfn)
return return
if self.params.get('writeannotations', False):
try:
annofn = filename + u'.annotations.xml'
self.report_writeannotations(annofn)
with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
annofile.write(info_dict['annotations'])
except (KeyError, TypeError):
self.report_warning(u'There are no annotations to write.')
except (OSError, IOError):
self.report_error(u'Cannot write annotations file: ' + annofn)
return
subtitles_are_requested = any([self.params.get('writesubtitles', False), subtitles_are_requested = any([self.params.get('writesubtitles', False),
self.params.get('writeautomaticsub')]) self.params.get('writeautomaticsub')])

View File

@ -31,6 +31,7 @@ __authors__ = (
'Huarong Huo', 'Huarong Huo',
'Ismael Mejía', 'Ismael Mejía',
'Steffan \'Ruirize\' James', 'Steffan \'Ruirize\' James',
'Andras Elso',
) )
__license__ = 'Public Domain' __license__ = 'Public Domain'
@ -46,17 +47,43 @@ import shlex
import socket import socket
import subprocess import subprocess
import sys import sys
import warnings import traceback
import platform import platform
from .utils import * from .utils import (
compat_cookiejar,
compat_print,
compat_str,
compat_urllib_request,
DateRange,
decodeOption,
determine_ext,
DownloadError,
get_cachedir,
make_HTTPS_handler,
MaxDownloadsReached,
platform_name,
preferredencoding,
SameFileError,
std_headers,
write_string,
YoutubeDLHandler,
)
from .update import update_self from .update import update_self
from .version import __version__ from .version import __version__
from .FileDownloader import * from .FileDownloader import (
FileDownloader,
)
from .extractor import gen_extractors from .extractor import gen_extractors
from .YoutubeDL import YoutubeDL from .YoutubeDL import YoutubeDL
from .PostProcessor import * from .PostProcessor import (
FFmpegMetadataPP,
FFmpegVideoConvertor,
FFmpegExtractAudioPP,
FFmpegEmbedSubtitlePP,
)
def parseOpts(overrideArguments=None): def parseOpts(overrideArguments=None):
def _readOptions(filename_bytes): def _readOptions(filename_bytes):
@ -240,11 +267,11 @@ def parseOpts(overrideArguments=None):
help='languages of the subtitles to download (optional) separated by commas, use IETF language tags like \'en,pt\'') help='languages of the subtitles to download (optional) separated by commas, use IETF language tags like \'en,pt\'')
downloader.add_option('-r', '--rate-limit', downloader.add_option('-r', '--rate-limit',
dest='ratelimit', metavar='LIMIT', help='maximum download rate (e.g. 50k or 44.6m)') dest='ratelimit', metavar='LIMIT', help='maximum download rate in bytes per second (e.g. 50K or 4.2M)')
downloader.add_option('-R', '--retries', downloader.add_option('-R', '--retries',
dest='retries', metavar='RETRIES', help='number of retries (default is %default)', default=10) dest='retries', metavar='RETRIES', help='number of retries (default is %default)', default=10)
downloader.add_option('--buffer-size', downloader.add_option('--buffer-size',
dest='buffersize', metavar='SIZE', help='size of download buffer (e.g. 1024 or 16k) (default is %default)', default="1024") dest='buffersize', metavar='SIZE', help='size of download buffer (e.g. 1024 or 16K) (default is %default)', default="1024")
downloader.add_option('--no-resize-buffer', downloader.add_option('--no-resize-buffer',
action='store_true', dest='noresizebuffer', action='store_true', dest='noresizebuffer',
help='do not automatically adjust the buffer size. By default, the buffer size is automatically resized from an initial value of SIZE.', default=False) help='do not automatically adjust the buffer size. By default, the buffer size is automatically resized from an initial value of SIZE.', default=False)
@ -339,6 +366,9 @@ def parseOpts(overrideArguments=None):
filesystem.add_option('--write-info-json', filesystem.add_option('--write-info-json',
action='store_true', dest='writeinfojson', action='store_true', dest='writeinfojson',
help='write video metadata to a .info.json file', default=False) help='write video metadata to a .info.json file', default=False)
filesystem.add_option('--write-annotations',
action='store_true', dest='writeannotations',
help='write video annotations to a .annotation file', default=False)
filesystem.add_option('--write-thumbnail', filesystem.add_option('--write-thumbnail',
action='store_true', dest='writethumbnail', action='store_true', dest='writethumbnail',
help='write thumbnail image to disk', default=False) help='write thumbnail image to disk', default=False)
@ -358,6 +388,8 @@ def parseOpts(overrideArguments=None):
help='do not overwrite post-processed files; the post-processed files are overwritten by default') help='do not overwrite post-processed files; the post-processed files are overwritten by default')
postproc.add_option('--embed-subs', action='store_true', dest='embedsubtitles', default=False, postproc.add_option('--embed-subs', action='store_true', dest='embedsubtitles', default=False,
help='embed subtitles in the video (only for mp4 videos)') help='embed subtitles in the video (only for mp4 videos)')
postproc.add_option('--add-metadata', action='store_true', dest='addmetadata', default=False,
help='add metadata to the files')
parser.add_option_group(general) parser.add_option_group(general)
@ -603,6 +635,7 @@ def _real_main(argv=None):
'nopart': opts.nopart, 'nopart': opts.nopart,
'updatetime': opts.updatetime, 'updatetime': opts.updatetime,
'writedescription': opts.writedescription, 'writedescription': opts.writedescription,
'writeannotations': opts.writeannotations,
'writeinfojson': opts.writeinfojson, 'writeinfojson': opts.writeinfojson,
'writethumbnail': opts.writethumbnail, 'writethumbnail': opts.writethumbnail,
'writesubtitles': opts.writesubtitles, 'writesubtitles': opts.writesubtitles,
@ -655,6 +688,9 @@ def _real_main(argv=None):
ydl.add_default_info_extractors() ydl.add_default_info_extractors()
# PostProcessors # PostProcessors
# Add the metadata pp first, the other pps will copy it
if opts.addmetadata:
ydl.add_post_processor(FFmpegMetadataPP())
if opts.extractaudio: if opts.extractaudio:
ydl.add_post_processor(FFmpegExtractAudioPP(preferredcodec=opts.audioformat, preferredquality=opts.audioquality, nopostoverwrites=opts.nopostoverwrites)) ydl.add_post_processor(FFmpegExtractAudioPP(preferredcodec=opts.audioformat, preferredquality=opts.audioquality, nopostoverwrites=opts.nopostoverwrites))
if opts.recodevideo: if opts.recodevideo:
@ -683,7 +719,7 @@ def _real_main(argv=None):
if opts.cookiefile is not None: if opts.cookiefile is not None:
try: try:
jar.save() jar.save()
except (IOError, OSError) as err: except (IOError, OSError):
sys.exit(u'ERROR: unable to save cookie jar') sys.exit(u'ERROR: unable to save cookie jar')
sys.exit(retcode) sys.exit(retcode)

View File

@ -2,7 +2,12 @@ from .appletrailers import AppleTrailersIE
from .addanime import AddAnimeIE from .addanime import AddAnimeIE
from .archiveorg import ArchiveOrgIE from .archiveorg import ArchiveOrgIE
from .ard import ARDIE from .ard import ARDIE
from .arte import ArteTvIE from .arte import (
ArteTvIE,
ArteTVPlus7IE,
ArteTVCreativeIE,
ArteTVFutureIE,
)
from .auengine import AUEngineIE from .auengine import AUEngineIE
from .bandcamp import BandcampIE from .bandcamp import BandcampIE
from .bliptv import BlipTVIE, BlipTVUserIE from .bliptv import BlipTVIE, BlipTVUserIE
@ -12,6 +17,7 @@ from .brightcove import BrightcoveIE
from .c56 import C56IE from .c56 import C56IE
from .canalplus import CanalplusIE from .canalplus import CanalplusIE
from .canalc2 import Canalc2IE from .canalc2 import Canalc2IE
from .cinemassacre import CinemassacreIE
from .cnn import CNNIE from .cnn import CNNIE
from .collegehumor import CollegeHumorIE from .collegehumor import CollegeHumorIE
from .comedycentral import ComedyCentralIE from .comedycentral import ComedyCentralIE
@ -34,6 +40,7 @@ from .eighttracks import EightTracksIE
from .escapist import EscapistIE from .escapist import EscapistIE
from .exfm import ExfmIE from .exfm import ExfmIE
from .facebook import FacebookIE from .facebook import FacebookIE
from .faz import FazIE
from .fktv import ( from .fktv import (
FKTVIE, FKTVIE,
FKTVPosteckeIE, FKTVPosteckeIE,
@ -60,6 +67,7 @@ from .ign import IGNIE, OneUPIE
from .ina import InaIE from .ina import InaIE
from .infoq import InfoQIE from .infoq import InfoQIE
from .instagram import InstagramIE from .instagram import InstagramIE
from .internetvideoarchive import InternetVideoArchiveIE
from .jeuxvideo import JeuxVideoIE from .jeuxvideo import JeuxVideoIE
from .jukebox import JukeboxIE from .jukebox import JukeboxIE
from .justintv import JustinTVIE from .justintv import JustinTVIE
@ -80,6 +88,8 @@ from .naver import NaverIE
from .nba import NBAIE from .nba import NBAIE
from .nbc import NBCNewsIE from .nbc import NBCNewsIE
from .newgrounds import NewgroundsIE from .newgrounds import NewgroundsIE
from .nhl import NHLIE, NHLVideocenterIE
from .nowvideo import NowVideoIE
from .ooyala import OoyalaIE from .ooyala import OoyalaIE
from .orf import ORFIE from .orf import ORFIE
from .pbs import PBSIE from .pbs import PBSIE
@ -89,8 +99,10 @@ from .rbmaradio import RBMARadioIE
from .redtube import RedTubeIE from .redtube import RedTubeIE
from .ringtv import RingTVIE from .ringtv import RingTVIE
from .ro220 import Ro220IE from .ro220 import Ro220IE
from .rottentomatoes import RottenTomatoesIE
from .roxwel import RoxwelIE from .roxwel import RoxwelIE
from .rtlnow import RTLnowIE from .rtlnow import RTLnowIE
from .rutube import RutubeIE
from .sina import SinaIE from .sina import SinaIE
from .slashdot import SlashdotIE from .slashdot import SlashdotIE
from .slideshare import SlideshareIE from .slideshare import SlideshareIE
@ -101,7 +113,9 @@ from .spiegel import SpiegelIE
from .stanfordoc import StanfordOpenClassroomIE from .stanfordoc import StanfordOpenClassroomIE
from .statigram import StatigramIE from .statigram import StatigramIE
from .steam import SteamIE from .steam import SteamIE
from .sztvhu import SztvHuIE
from .teamcoco import TeamcocoIE from .teamcoco import TeamcocoIE
from .techtalks import TechTalksIE
from .ted import TEDIE from .ted import TEDIE
from .tf1 import TF1IE from .tf1 import TF1IE
from .thisav import ThisAVIE from .thisav import ThisAVIE
@ -118,10 +132,13 @@ from .veoh import VeohIE
from .vevo import VevoIE from .vevo import VevoIE
from .vice import ViceIE from .vice import ViceIE
from .viddler import ViddlerIE from .viddler import ViddlerIE
from .videodetective import VideoDetectiveIE
from .videofyme import VideofyMeIE from .videofyme import VideofyMeIE
from .videopremium import VideoPremiumIE
from .vimeo import VimeoIE, VimeoChannelIE from .vimeo import VimeoIE, VimeoChannelIE
from .vine import VineIE from .vine import VineIE
from .wat import WatIE from .wat import WatIE
from .websurg import WeBSurgIE
from .weibo import WeiboIE from .weibo import WeiboIE
from .wimp import WimpIE from .wimp import WimpIE
from .worldstarhiphop import WorldStarHipHopIE from .worldstarhiphop import WorldStarHipHopIE

View File

@ -1,3 +1,4 @@
# encoding: utf-8
import re import re
import json import json
import xml.etree.ElementTree import xml.etree.ElementTree
@ -7,15 +8,15 @@ from ..utils import (
ExtractorError, ExtractorError,
find_xpath_attr, find_xpath_attr,
unified_strdate, unified_strdate,
determine_ext,
get_element_by_id,
) )
# There are different sources of video in arte.tv, the extraction process
# is different for each one. The videos usually expire in 7 days, so we can't
# add tests.
class ArteTvIE(InfoExtractor): class ArteTvIE(InfoExtractor):
"""
There are two sources of video in arte.tv: videos.arte.tv and
www.arte.tv/guide, the extraction process is different for each one.
The videos expire in 7 days, so we can't add tests.
"""
_EMISSION_URL = r'(?:http://)?www\.arte.tv/guide/(?P<lang>fr|de)/(?:(?:sendungen|emissions)/)?(?P<id>.*?)/(?P<name>.*?)(\?.*)?'
_VIDEOS_URL = r'(?:http://)?videos.arte.tv/(?P<lang>fr|de)/.*-(?P<id>.*?).html' _VIDEOS_URL = r'(?:http://)?videos.arte.tv/(?P<lang>fr|de)/.*-(?P<id>.*?).html'
_LIVEWEB_URL = r'(?:http://)?liveweb.arte.tv/(?P<lang>fr|de)/(?P<subpage>.+?)/(?P<name>.+)' _LIVEWEB_URL = r'(?:http://)?liveweb.arte.tv/(?P<lang>fr|de)/(?P<subpage>.+?)/(?P<name>.+)'
_LIVE_URL = r'index-[0-9]+\.html$' _LIVE_URL = r'index-[0-9]+\.html$'
@ -24,7 +25,7 @@ class ArteTvIE(InfoExtractor):
@classmethod @classmethod
def suitable(cls, url): def suitable(cls, url):
return any(re.match(regex, url) for regex in (cls._EMISSION_URL, cls._VIDEOS_URL, cls._LIVEWEB_URL)) return any(re.match(regex, url) for regex in (cls._VIDEOS_URL, cls._LIVEWEB_URL))
# TODO implement Live Stream # TODO implement Live Stream
# from ..utils import compat_urllib_parse # from ..utils import compat_urllib_parse
@ -55,14 +56,6 @@ class ArteTvIE(InfoExtractor):
# video_url = u'%s/%s' % (info.get('url'), info.get('path')) # video_url = u'%s/%s' % (info.get('url'), info.get('path'))
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._EMISSION_URL, url)
if mobj is not None:
lang = mobj.group('lang')
# This is not a real id, it can be for example AJT for the news
# http://www.arte.tv/guide/fr/emissions/AJT/arte-journal
video_id = mobj.group('id')
return self._extract_emission(url, video_id, lang)
mobj = re.match(self._VIDEOS_URL, url) mobj = re.match(self._VIDEOS_URL, url)
if mobj is not None: if mobj is not None:
id = mobj.group('id') id = mobj.group('id')
@ -80,49 +73,6 @@ class ArteTvIE(InfoExtractor):
# self.extractLiveStream(url) # self.extractLiveStream(url)
# return # return
def _extract_emission(self, url, video_id, lang):
"""Extract from www.arte.tv/guide"""
webpage = self._download_webpage(url, video_id)
json_url = self._html_search_regex(r'arte_vp_url="(.*?)"', webpage, 'json url')
json_info = self._download_webpage(json_url, video_id, 'Downloading info json')
self.report_extraction(video_id)
info = json.loads(json_info)
player_info = info['videoJsonPlayer']
info_dict = {'id': player_info['VID'],
'title': player_info['VTI'],
'description': player_info.get('VDE'),
'upload_date': unified_strdate(player_info['VDA'].split(' ')[0]),
'thumbnail': player_info['programImage'],
'ext': 'flv',
}
formats = player_info['VSR'].values()
def _match_lang(f):
# Return true if that format is in the language of the url
if lang == 'fr':
l = 'F'
elif lang == 'de':
l = 'A'
regexes = [r'VO?%s' % l, r'VO?.-ST%s' % l]
return any(re.match(r, f['versionCode']) for r in regexes)
# Some formats may not be in the same language as the url
formats = filter(_match_lang, formats)
# We order the formats by quality
formats = sorted(formats, key=lambda f: int(f['height']))
# Prefer videos without subtitles in the same language
formats = sorted(formats, key=lambda f: re.match(r'VO(F|A)-STM\1', f['versionCode']) is None)
# Pick the best quality
format_info = formats[-1]
if format_info['mediaType'] == u'rtmp':
info_dict['url'] = format_info['streamer']
info_dict['play_path'] = 'mp4:' + format_info['url']
else:
info_dict['url'] = format_info['url']
return info_dict
def _extract_video(self, url, video_id, lang): def _extract_video(self, url, video_id, lang):
"""Extract from videos.arte.tv""" """Extract from videos.arte.tv"""
ref_xml_url = url.replace('/videos/', '/do_delegate/videos/') ref_xml_url = url.replace('/videos/', '/do_delegate/videos/')
@ -172,3 +122,110 @@ class ArteTvIE(InfoExtractor):
'ext': 'flv', 'ext': 'flv',
'thumbnail': self._og_search_thumbnail(webpage), 'thumbnail': self._og_search_thumbnail(webpage),
} }
class ArteTVPlus7IE(InfoExtractor):
IE_NAME = u'arte.tv:+7'
_VALID_URL = r'https?://www\.arte.tv/guide/(?P<lang>fr|de)/(?:(?:sendungen|emissions)/)?(?P<id>.*?)/(?P<name>.*?)(\?.*)?'
@classmethod
def _extract_url_info(cls, url):
mobj = re.match(cls._VALID_URL, url)
lang = mobj.group('lang')
# This is not a real id, it can be for example AJT for the news
# http://www.arte.tv/guide/fr/emissions/AJT/arte-journal
video_id = mobj.group('id')
return video_id, lang
def _real_extract(self, url):
video_id, lang = self._extract_url_info(url)
webpage = self._download_webpage(url, video_id)
return self._extract_from_webpage(webpage, video_id, lang)
def _extract_from_webpage(self, webpage, video_id, lang):
json_url = self._html_search_regex(r'arte_vp_url="(.*?)"', webpage, 'json url')
json_info = self._download_webpage(json_url, video_id, 'Downloading info json')
self.report_extraction(video_id)
info = json.loads(json_info)
player_info = info['videoJsonPlayer']
info_dict = {
'id': player_info['VID'],
'title': player_info['VTI'],
'description': player_info.get('VDE'),
'upload_date': unified_strdate(player_info.get('VDA', '').split(' ')[0]),
'thumbnail': player_info.get('programImage') or player_info.get('VTU', {}).get('IUR'),
}
formats = player_info['VSR'].values()
def _match_lang(f):
if f.get('versionCode') is None:
return True
# Return true if that format is in the language of the url
if lang == 'fr':
l = 'F'
elif lang == 'de':
l = 'A'
regexes = [r'VO?%s' % l, r'VO?.-ST%s' % l]
return any(re.match(r, f['versionCode']) for r in regexes)
# Some formats may not be in the same language as the url
formats = filter(_match_lang, formats)
# Some formats use the m3u8 protocol
formats = filter(lambda f: f.get('videoFormat') != 'M3U8', formats)
# We order the formats by quality
formats = sorted(formats, key=lambda f: int(f.get('height',-1)))
# Prefer videos without subtitles in the same language
formats = sorted(formats, key=lambda f: re.match(r'VO(F|A)-STM\1', f.get('versionCode', '')) is None)
# Pick the best quality
def _format(format_info):
info = {
'width': format_info.get('width'),
'height': format_info.get('height'),
}
if format_info['mediaType'] == u'rtmp':
info['url'] = format_info['streamer']
info['play_path'] = 'mp4:' + format_info['url']
info['ext'] = 'flv'
else:
info['url'] = format_info['url']
info['ext'] = determine_ext(info['url'])
return info
info_dict['formats'] = [_format(f) for f in formats]
# TODO: Remove when #980 has been merged
info_dict.update(info_dict['formats'][-1])
return info_dict
# It also uses the arte_vp_url url from the webpage to extract the information
class ArteTVCreativeIE(ArteTVPlus7IE):
IE_NAME = u'arte.tv:creative'
_VALID_URL = r'https?://creative\.arte\.tv/(?P<lang>fr|de)/magazine?/(?P<id>.+)'
_TEST = {
u'url': u'http://creative.arte.tv/de/magazin/agentur-amateur-corporate-design',
u'file': u'050489-002.mp4',
u'info_dict': {
u'title': u'Agentur Amateur #2 - Corporate Design',
},
}
class ArteTVFutureIE(ArteTVPlus7IE):
IE_NAME = u'arte.tv:future'
_VALID_URL = r'https?://future\.arte\.tv/(?P<lang>fr|de)/(thema|sujet)/.*?#article-anchor-(?P<id>\d+)'
_TEST = {
u'url': u'http://future.arte.tv/fr/sujet/info-sciences#article-anchor-7081',
u'file': u'050940-003.mp4',
u'info_dict': {
u'title': u'Les champignons au secours de la planète',
},
}
def _real_extract(self, url):
anchor_id, lang = self._extract_url_info(url)
webpage = self._download_webpage(url, anchor_id)
row = get_element_by_id(anchor_id, webpage)
return self._extract_from_webpage(row, anchor_id, lang)

View File

@ -53,6 +53,8 @@ class BrightcoveIE(InfoExtractor):
# Fix up some stupid HTML, see https://github.com/rg3/youtube-dl/issues/1553 # Fix up some stupid HTML, see https://github.com/rg3/youtube-dl/issues/1553
object_str = re.sub(r'(<param name="[^"]+" value="[^"]+")>', object_str = re.sub(r'(<param name="[^"]+" value="[^"]+")>',
lambda m: m.group(1) + '/>', object_str) lambda m: m.group(1) + '/>', object_str)
# Fix up some stupid XML, see https://github.com/rg3/youtube-dl/issues/1608
object_str = object_str.replace(u'<--', u'<!--')
object_doc = xml.etree.ElementTree.fromstring(object_str) object_doc = xml.etree.ElementTree.fromstring(object_str)
assert u'BrightcoveExperience' in object_doc.attrib['class'] assert u'BrightcoveExperience' in object_doc.attrib['class']
@ -96,7 +98,10 @@ class BrightcoveIE(InfoExtractor):
playlist_info = self._download_webpage(self._PLAYLIST_URL_TEMPLATE % player_key, playlist_info = self._download_webpage(self._PLAYLIST_URL_TEMPLATE % player_key,
player_key, u'Downloading playlist information') player_key, u'Downloading playlist information')
playlist_info = json.loads(playlist_info)['videoList'] json_data = json.loads(playlist_info)
if 'videoList' not in json_data:
raise ExtractorError(u'Empty playlist')
playlist_info = json_data['videoList']
videos = [self._extract_video_info(video_info) for video_info in playlist_info['mediaCollectionDTO']['videoDTOs']] videos = [self._extract_video_info(video_info) for video_info in playlist_info['mediaCollectionDTO']['videoDTOs']]
return self.playlist_result(videos, playlist_id=playlist_info['id'], return self.playlist_result(videos, playlist_id=playlist_info['id'],

View File

@ -0,0 +1,91 @@
# encoding: utf-8
import re
from .common import InfoExtractor
from ..utils import (
ExtractorError,
)
class CinemassacreIE(InfoExtractor):
_VALID_URL = r'(?:http://)?(?:www\.)?(?P<url>cinemassacre\.com/(?P<date_Y>[0-9]{4})/(?P<date_m>[0-9]{2})/(?P<date_d>[0-9]{2})/.+?)(?:[/?].*)?'
_TESTS = [{
u'url': u'http://cinemassacre.com/2012/11/10/avgn-the-movie-trailer/',
u'file': u'19911.flv',
u'info_dict': {
u'upload_date': u'20121110',
u'title': u'“Angry Video Game Nerd: The Movie” Trailer',
u'description': u'md5:fb87405fcb42a331742a0dce2708560b',
},
u'params': {
# rtmp download
u'skip_download': True,
},
},
{
u'url': u'http://cinemassacre.com/2013/10/02/the-mummys-hand-1940',
u'file': u'521be8ef82b16.flv',
u'info_dict': {
u'upload_date': u'20131002',
u'title': u'The Mummys Hand (1940)',
},
u'params': {
# rtmp download
u'skip_download': True,
},
}]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
webpage_url = u'http://' + mobj.group('url')
webpage = self._download_webpage(webpage_url, None) # Don't know video id yet
video_date = mobj.group('date_Y') + mobj.group('date_m') + mobj.group('date_d')
mobj = re.search(r'src="(?P<embed_url>http://player\.screenwavemedia\.com/play/(?:embed|player)\.php\?id=(?:Cinemassacre-)?(?P<video_id>.+?))"', webpage)
if not mobj:
raise ExtractorError(u'Can\'t extract embed url and video id')
playerdata_url = mobj.group(u'embed_url')
video_id = mobj.group(u'video_id')
video_title = self._html_search_regex(r'<title>(?P<title>.+?)\|',
webpage, u'title')
video_description = self._html_search_regex(r'<div class="entry-content">(?P<description>.+?)</div>',
webpage, u'description', flags=re.DOTALL, fatal=False)
if len(video_description) == 0:
video_description = None
playerdata = self._download_webpage(playerdata_url, video_id)
base_url = self._html_search_regex(r'\'streamer\': \'(?P<base_url>rtmp://.*?)/(?:vod|Cinemassacre)\'',
playerdata, u'base_url')
base_url += '/Cinemassacre/'
# Important: The file names in playerdata are not used by the player and even wrong for some videos
sd_file = 'Cinemassacre-%s_high.mp4' % video_id
hd_file = 'Cinemassacre-%s.mp4' % video_id
video_thumbnail = 'http://image.screenwavemedia.com/Cinemassacre/Cinemassacre-%s_thumb_640x360.jpg' % video_id
formats = [
{
'url': base_url + sd_file,
'ext': 'flv',
'format': 'sd',
'format_id': 'sd',
},
{
'url': base_url + hd_file,
'ext': 'flv',
'format': 'hd',
'format_id': 'hd',
},
]
info = {
'id': video_id,
'title': video_title,
'formats': formats,
'description': video_description,
'upload_date': video_date,
'thumbnail': video_thumbnail,
}
# TODO: Remove when #980 has been merged
info.update(formats[-1])
return info

View File

@ -0,0 +1,60 @@
# encoding: utf-8
import re
import xml.etree.ElementTree
from .common import InfoExtractor
from ..utils import (
determine_ext,
clean_html,
get_element_by_attribute,
)
class FazIE(InfoExtractor):
IE_NAME = u'faz.net'
_VALID_URL = r'https?://www\.faz\.net/multimedia/videos/.*?-(?P<id>\d+).html'
_TEST = {
u'url': u'http://www.faz.net/multimedia/videos/stockholm-chemie-nobelpreis-fuer-drei-amerikanische-forscher-12610585.html',
u'file': u'12610585.mp4',
u'info_dict': {
u'title': u'Stockholm: Chemie-Nobelpreis für drei amerikanische Forscher',
u'description': u'md5:1453fbf9a0d041d985a47306192ea253',
},
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
self.to_screen(video_id)
webpage = self._download_webpage(url, video_id)
config_xml_url = self._search_regex(r'writeFLV\(\'(.+?)\',', webpage,
u'config xml url')
config_xml = self._download_webpage(config_xml_url, video_id,
u'Downloading config xml')
config = xml.etree.ElementTree.fromstring(config_xml.encode('utf-8'))
encodings = config.find('ENCODINGS')
formats = []
for code in ['LOW', 'HIGH', 'HQ']:
encoding = encodings.find(code)
if encoding is None:
continue
encoding_url = encoding.find('FILENAME').text
formats.append({
'url': encoding_url,
'ext': determine_ext(encoding_url),
'format_id': code.lower(),
})
descr_html = get_element_by_attribute('class', 'Content Copy', webpage)
info = {
'id': video_id,
'title': self._og_search_title(webpage),
'formats': formats,
'description': clean_html(descr_html),
'thumbnail': config.find('STILL/STILL_BIG').text,
}
# TODO: Remove when #980 has been merged
info.update(formats[-1])
return info

View File

@ -1,55 +1,59 @@
import re import re
import xml.etree.ElementTree import json
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
unified_strdate,
compat_urllib_parse, compat_urllib_parse,
compat_urlparse,
unescapeHTML,
get_meta_content,
) )
class GameSpotIE(InfoExtractor): class GameSpotIE(InfoExtractor):
_VALID_URL = r'(?:http://)?(?:www\.)?gamespot\.com/.*-(?P<page_id>\d+)/?' _VALID_URL = r'(?:http://)?(?:www\.)?gamespot\.com/.*-(?P<page_id>\d+)/?'
_TEST = { _TEST = {
u"url": u"http://www.gamespot.com/arma-iii/videos/arma-iii-community-guide-sitrep-i-6410818/", u"url": u"http://www.gamespot.com/arma-iii/videos/arma-iii-community-guide-sitrep-i-6410818/",
u"file": u"6410818.mp4", u"file": u"gs-2300-6410818.mp4",
u"md5": u"b2a30deaa8654fcccd43713a6b6a4825", u"md5": u"b2a30deaa8654fcccd43713a6b6a4825",
u"info_dict": { u"info_dict": {
u"title": u"Arma 3 - Community Guide: SITREP I", u"title": u"Arma 3 - Community Guide: SITREP I",
u"upload_date": u"20130627", u'description': u'Check out this video where some of the basics of Arma 3 is explained.',
} }
} }
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
page_id = mobj.group('page_id') page_id = video_id = mobj.group('page_id')
webpage = self._download_webpage(url, page_id) webpage = self._download_webpage(url, page_id)
video_id = self._html_search_regex([r'"og:video" content=".*?\?id=(\d+)"', data_video_json = self._search_regex(r'data-video=\'(.*?)\'', webpage, u'data video')
r'http://www\.gamespot\.com/videoembed/(\d+)'], data_video = json.loads(unescapeHTML(data_video_json))
webpage, 'video id')
data = compat_urllib_parse.urlencode({'id': video_id, 'newplayer': '1'})
info_url = 'http://www.gamespot.com/pages/video_player/xml.php?' + data
info_xml = self._download_webpage(info_url, video_id)
doc = xml.etree.ElementTree.fromstring(info_xml)
clip_el = doc.find('./playList/clip')
http_urls = [{'url': node.find('filePath').text, # Transform the manifest url to a link to the mp4 files
'rate': int(node.find('rate').text)} # they are used in mobile devices.
for node in clip_el.find('./httpURI')] f4m_url = data_video['videoStreams']['f4m_stream']
best_quality = sorted(http_urls, key=lambda f: f['rate'])[-1] f4m_path = compat_urlparse.urlparse(f4m_url).path
video_url = best_quality['url'] QUALITIES_RE = r'((,\d+)+,?)'
title = clip_el.find('./title').text qualities = self._search_regex(QUALITIES_RE, f4m_path, u'qualities').strip(',').split(',')
ext = video_url.rpartition('.')[2] http_path = f4m_path[1:].split('/', 1)[1]
thumbnail_url = clip_el.find('./screenGrabURI').text http_template = re.sub(QUALITIES_RE, r'%s', http_path)
view_count = int(clip_el.find('./views').text) http_template = http_template.replace('.csmil/manifest.f4m', '')
upload_date = unified_strdate(clip_el.find('./postDate').text) http_template = compat_urlparse.urljoin('http://video.gamespotcdn.com/', http_template)
formats = []
for q in qualities:
formats.append({
'url': http_template % q,
'ext': 'mp4',
'format_id': q,
})
return [{ info = {
'id' : video_id, 'id': data_video['guid'],
'url' : video_url, 'title': compat_urllib_parse.unquote(data_video['title']),
'ext' : ext, 'formats': formats,
'title' : title, 'description': get_meta_content('description', webpage),
'thumbnail' : thumbnail_url, 'thumbnail': self._og_search_thumbnail(webpage),
'upload_date' : upload_date, }
'view_count' : view_count, # TODO: Remove when #980 has been merged
}] info.update(formats[-1])
return info

View File

@ -11,6 +11,8 @@ from ..utils import (
compat_urlparse, compat_urlparse,
ExtractorError, ExtractorError,
smuggle_url,
unescapeHTML,
) )
from .brightcove import BrightcoveIE from .brightcove import BrightcoveIE
@ -29,6 +31,17 @@ class GenericIE(InfoExtractor):
u"title": u"R\u00e9gis plante sa Jeep" u"title": u"R\u00e9gis plante sa Jeep"
} }
}, },
# embedded vimeo video
{
u'url': u'http://skillsmatter.com/podcast/home/move-semanticsperfect-forwarding-and-rvalue-references',
u'file': u'22444065.mp4',
u'md5': u'2903896e23df39722c33f015af0666e2',
u'info_dict': {
u'title': u'ACCU 2011: Move Semantics,Perfect Forwarding, and Rvalue references- Scott Meyers- 13/04/2011',
u"uploader_id": u"skillsmatter",
u"uploader": u"Skills Matter",
}
}
] ]
def report_download_webpage(self, video_id): def report_download_webpage(self, video_id):
@ -121,12 +134,20 @@ class GenericIE(InfoExtractor):
self.report_extraction(video_id) self.report_extraction(video_id)
# Look for BrightCove: # Look for BrightCove:
m_brightcove = re.search(r'<object.+?class=([\'"]).*?BrightcoveExperience.*?\1.+?</object>', webpage, re.DOTALL) m_brightcove = re.search(r'<object[^>]+?class=([\'"])[^>]*?BrightcoveExperience.*?\1.+?</object>', webpage, re.DOTALL)
if m_brightcove is not None: if m_brightcove is not None:
self.to_screen(u'Brightcove video detected.') self.to_screen(u'Brightcove video detected.')
bc_url = BrightcoveIE._build_brighcove_url(m_brightcove.group()) bc_url = BrightcoveIE._build_brighcove_url(m_brightcove.group())
return self.url_result(bc_url, 'Brightcove') return self.url_result(bc_url, 'Brightcove')
# Look for embedded Vimeo player
mobj = re.search(
r'<iframe\s+src="(https?://player.vimeo.com/video/.*?)"', webpage)
if mobj:
player_url = unescapeHTML(mobj.group(1))
surl = smuggle_url(player_url, {'Referer': url})
return self.url_result(surl, 'Vimeo')
# Start with something easy: JW Player in SWFObject # Start with something easy: JW Player in SWFObject
mobj = re.search(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage) mobj = re.search(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage)
if mobj is None: if mobj is None:

View File

@ -0,0 +1,87 @@
import re
import xml.etree.ElementTree
from .common import InfoExtractor
from ..utils import (
compat_urlparse,
compat_urllib_parse,
xpath_with_ns,
determine_ext,
)
class InternetVideoArchiveIE(InfoExtractor):
_VALID_URL = r'https?://video\.internetvideoarchive\.net/flash/players/.*?\?.*?publishedid.*?'
_TEST = {
u'url': u'http://video.internetvideoarchive.net/flash/players/flashconfiguration.aspx?customerid=69249&publishedid=452693&playerid=247',
u'file': u'452693.mp4',
u'info_dict': {
u'title': u'SKYFALL',
u'description': u'In SKYFALL, Bond\'s loyalty to M is tested as her past comes back to haunt her. As MI6 comes under attack, 007 must track down and destroy the threat, no matter how personal the cost.',
u'duration': 156,
},
}
@staticmethod
def _build_url(query):
return 'http://video.internetvideoarchive.net/flash/players/flashconfiguration.aspx?' + query
@staticmethod
def _clean_query(query):
NEEDED_ARGS = ['publishedid', 'customerid']
query_dic = compat_urlparse.parse_qs(query)
cleaned_dic = dict((k,v[0]) for (k,v) in query_dic.items() if k in NEEDED_ARGS)
# Other player ids return m3u8 urls
cleaned_dic['playerid'] = '247'
cleaned_dic['videokbrate'] = '100000'
return compat_urllib_parse.urlencode(cleaned_dic)
def _real_extract(self, url):
query = compat_urlparse.urlparse(url).query
query_dic = compat_urlparse.parse_qs(query)
video_id = query_dic['publishedid'][0]
url = self._build_url(query)
flashconfiguration_xml = self._download_webpage(url, video_id,
u'Downloading flash configuration')
flashconfiguration = xml.etree.ElementTree.fromstring(flashconfiguration_xml.encode('utf-8'))
file_url = flashconfiguration.find('file').text
file_url = file_url.replace('/playlist.aspx', '/mrssplaylist.aspx')
# Replace some of the parameters in the query to get the best quality
# and http links (no m3u8 manifests)
file_url = re.sub(r'(?<=\?)(.+)$',
lambda m: self._clean_query(m.group()),
file_url)
info_xml = self._download_webpage(file_url, video_id,
u'Downloading video info')
info = xml.etree.ElementTree.fromstring(info_xml.encode('utf-8'))
item = info.find('channel/item')
def _bp(p):
return xpath_with_ns(p,
{'media': 'http://search.yahoo.com/mrss/',
'jwplayer': 'http://developer.longtailvideo.com/trac/wiki/FlashFormats'})
formats = []
for content in item.findall(_bp('media:group/media:content')):
attr = content.attrib
f_url = attr['url']
formats.append({
'url': f_url,
'ext': determine_ext(f_url),
'width': int(attr['width']),
'bitrate': int(attr['bitrate']),
})
formats = sorted(formats, key=lambda f: f['bitrate'])
info = {
'id': video_id,
'title': item.find('title').text,
'formats': formats,
'thumbnail': item.find(_bp('media:thumbnail')).attrib['url'],
'description': item.find('description').text,
'duration': int(attr['duration']),
}
# TODO: Remove when #980 has been merged
info.update(formats[-1])
return info

View File

@ -87,7 +87,7 @@ class MTVIE(InfoExtractor):
description_node = itemdoc.find('description') description_node = itemdoc.find('description')
if description_node is not None: if description_node is not None:
description = description_node.text description = description_node.text.strip()
else: else:
description = None description = None

120
youtube_dl/extractor/nhl.py Normal file
View File

@ -0,0 +1,120 @@
import re
import json
import xml.etree.ElementTree
from .common import InfoExtractor
from ..utils import (
compat_urlparse,
compat_urllib_parse,
determine_ext,
unified_strdate,
)
class NHLBaseInfoExtractor(InfoExtractor):
@staticmethod
def _fix_json(json_string):
return json_string.replace('\\\'', '\'')
def _extract_video(self, info):
video_id = info['id']
self.report_extraction(video_id)
initial_video_url = info['publishPoint']
data = compat_urllib_parse.urlencode({
'type': 'fvod',
'path': initial_video_url.replace('.mp4', '_sd.mp4'),
})
path_url = 'http://video.nhl.com/videocenter/servlets/encryptvideopath?' + data
path_response = self._download_webpage(path_url, video_id,
u'Downloading final video url')
path_doc = xml.etree.ElementTree.fromstring(path_response)
video_url = path_doc.find('path').text
join = compat_urlparse.urljoin
return {
'id': video_id,
'title': info['name'],
'url': video_url,
'ext': determine_ext(video_url),
'description': info['description'],
'duration': int(info['duration']),
'thumbnail': join(join(video_url, '/u/'), info['bigImage']),
'upload_date': unified_strdate(info['releaseDate'].split('.')[0]),
}
class NHLIE(NHLBaseInfoExtractor):
IE_NAME = u'nhl.com'
_VALID_URL = r'https?://video(?P<team>\.[^.]*)?\.nhl\.com/videocenter/console\?.*?(?<=[?&])id=(?P<id>\d+)'
_TEST = {
u'url': u'http://video.canucks.nhl.com/videocenter/console?catid=6?id=453614',
u'file': u'453614.mp4',
u'info_dict': {
u'title': u'Quick clip: Weise 4-3 goal vs Flames',
u'description': u'Dale Weise scores his first of the season to put the Canucks up 4-3.',
u'duration': 18,
u'upload_date': u'20131006',
},
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
json_url = 'http://video.nhl.com/videocenter/servlets/playlist?ids=%s&format=json' % video_id
info_json = self._download_webpage(json_url, video_id,
u'Downloading info json')
info_json = self._fix_json(info_json)
info = json.loads(info_json)[0]
return self._extract_video(info)
class NHLVideocenterIE(NHLBaseInfoExtractor):
IE_NAME = u'nhl.com:videocenter'
IE_DESC = u'Download the first 12 videos from a videocenter category'
_VALID_URL = r'https?://video\.(?P<team>[^.]*)\.nhl\.com/videocenter/(console\?.*?catid=(?P<catid>[^&]+))?'
@classmethod
def suitable(cls, url):
if NHLIE.suitable(url):
return False
return super(NHLVideocenterIE, cls).suitable(url)
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
team = mobj.group('team')
webpage = self._download_webpage(url, team)
cat_id = self._search_regex(
[r'var defaultCatId = "(.+?)";',
r'{statusIndex:0,index:0,.*?id:(.*?),'],
webpage, u'category id')
playlist_title = self._html_search_regex(
r'\?catid=%s">(.*?)</a>' % cat_id,
webpage, u'playlist title', flags=re.DOTALL)
data = compat_urllib_parse.urlencode({
'cid': cat_id,
# This is the default value
'count': 12,
'ptrs': 3,
'format': 'json',
})
path = '/videocenter/servlets/browse?' + data
request_url = compat_urlparse.urljoin(url, path)
response = self._download_webpage(request_url, playlist_title)
response = self._fix_json(response)
if not response.strip():
self._downloader.report_warning(u'Got an empty reponse, trying '
u'adding the "newvideos" parameter')
response = self._download_webpage(request_url + '&newvideos=true',
playlist_title)
response = self._fix_json(response)
videos = json.loads(response)
return {
'_type': 'playlist',
'title': playlist_title,
'id': cat_id,
'entries': [self._extract_video(i) for i in videos],
}

View File

@ -0,0 +1,43 @@
import re
from .common import InfoExtractor
from ..utils import compat_urlparse
class NowVideoIE(InfoExtractor):
_VALID_URL = r'(?:https?://)?(?:www\.)?nowvideo\.ch/video/(?P<id>\w+)'
_TEST = {
u'url': u'http://www.nowvideo.ch/video/0mw0yow7b6dxa',
u'file': u'0mw0yow7b6dxa.flv',
u'md5': u'f8fbbc8add72bd95b7850c6a02fc8817',
u'info_dict': {
u"title": u"youtubedl test video _BaW_jenozKc.mp4"
}
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
webpage_url = 'http://www.nowvideo.ch/video/' + video_id
webpage = self._download_webpage(webpage_url, video_id)
self.report_extraction(video_id)
video_title = self._html_search_regex(r'<h4>(.*)</h4>',
webpage, u'video title')
video_key = self._search_regex(r'var fkzd="(.*)";',
webpage, u'video key')
api_call = "http://www.nowvideo.ch/api/player.api.php?file={0}&numOfErrors=0&cid=1&key={1}".format(video_id, video_key)
api_response = self._download_webpage(api_call, video_id,
u'Downloading API page')
video_url = compat_urlparse.parse_qs(api_response)[u'url'][0]
return [{
'id': video_id,
'url': video_url,
'ext': 'flv',
'title': video_title,
}]

View File

@ -0,0 +1,16 @@
from .videodetective import VideoDetectiveIE
# It just uses the same method as videodetective.com,
# the internetvideoarchive.com is extracted from the og:video property
class RottenTomatoesIE(VideoDetectiveIE):
_VALID_URL = r'https?://www\.rottentomatoes\.com/m/[^/]+/trailers/(?P<id>\d+)'
_TEST = {
u'url': u'http://www.rottentomatoes.com/m/toy_story_3/trailers/11028566/',
u'file': '613340.mp4',
u'info_dict': {
u'title': u'TOY STORY 3',
u'description': u'From the creators of the beloved TOY STORY films, comes a story that will reunite the gang in a whole new way.',
},
}

View File

@ -0,0 +1,58 @@
# encoding: utf-8
import re
import json
from .common import InfoExtractor
from ..utils import (
compat_urlparse,
compat_str,
ExtractorError,
)
class RutubeIE(InfoExtractor):
_VALID_URL = r'https?://rutube.ru/video/(?P<long_id>\w+)'
_TEST = {
u'url': u'http://rutube.ru/video/3eac3b4561676c17df9132a9a1e62e3e/',
u'file': u'3eac3b4561676c17df9132a9a1e62e3e.mp4',
u'info_dict': {
u'title': u'Раненный кенгуру забежал в аптеку',
u'uploader': u'NTDRussian',
u'uploader_id': u'29790',
},
u'params': {
# It requires ffmpeg (m3u8 download)
u'skip_download': True,
},
}
def _get_api_response(self, short_id, subpath):
api_url = 'http://rutube.ru/api/play/%s/%s/?format=json' % (subpath, short_id)
response_json = self._download_webpage(api_url, short_id,
u'Downloading %s json' % subpath)
return json.loads(response_json)
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
long_id = mobj.group('long_id')
webpage = self._download_webpage(url, long_id)
og_video = self._og_search_video_url(webpage)
short_id = compat_urlparse.urlparse(og_video).path[1:]
options = self._get_api_response(short_id, 'options')
trackinfo = self._get_api_response(short_id, 'trackinfo')
# Some videos don't have the author field
author = trackinfo.get('author') or {}
m3u8_url = trackinfo['video_balancer'].get('m3u8')
if m3u8_url is None:
raise ExtractorError(u'Couldn\'t find m3u8 manifest url')
return {
'id': trackinfo['id'],
'title': trackinfo['title'],
'url': m3u8_url,
'ext': 'mp4',
'thumbnail': options['thumbnail_url'],
'uploader': author.get('name'),
'uploader_id': compat_str(author['id']) if author else None,
}

View File

@ -0,0 +1,44 @@
# -*- coding: utf-8 -*-
import re
from .common import InfoExtractor
from ..utils import determine_ext
class SztvHuIE(InfoExtractor):
_VALID_URL = r'(?:http://)?(?:(?:www\.)?sztv\.hu|www\.tvszombathely\.hu)/(?:[^/]+)/.+-(?P<id>[0-9]+)'
_TEST = {
u'url': u'http://sztv.hu/hirek/cserkeszek-nepszerusitettek-a-kornyezettudatos-eletmodot-a-savaria-teren-20130909',
u'file': u'20130909.mp4',
u'md5': u'a6df607b11fb07d0e9f2ad94613375cb',
u'info_dict': {
u"title": u"Cserkészek népszerűsítették a környezettudatos életmódot a Savaria téren",
u"description": u'A zöld nap játékos ismeretterjesztő programjait a Magyar Cserkész Szövetség szervezte, akik az ország nyolc városában adják át tudásukat az érdeklődőknek. A PET...',
}
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id)
video_file = self._search_regex(
r'file: "...:(.*?)",', webpage, 'video file')
title = self._html_search_regex(
r'<meta name="title" content="([^"]*?) - [^-]*? - [^-]*?"',
webpage, 'video title')
description = self._html_search_regex(
r'<meta name="description" content="([^"]*)"/>',
webpage, 'video description', fatal=False)
thumbnail = self._og_search_thumbnail(webpage)
video_url = 'http://media.sztv.hu/vod/' + video_file
return {
'id': video_id,
'url': video_url,
'title': title,
'ext': determine_ext(video_url),
'description': description,
'thumbnail': thumbnail,
}

View File

@ -0,0 +1,65 @@
import re
from .common import InfoExtractor
from ..utils import (
get_element_by_attribute,
clean_html,
)
class TechTalksIE(InfoExtractor):
_VALID_URL = r'https?://techtalks\.tv/talks/[^/]*/(?P<id>\d+)/'
_TEST = {
u'url': u'http://techtalks.tv/talks/learning-topic-models-going-beyond-svd/57758/',
u'playlist': [
{
u'file': u'57758.flv',
u'info_dict': {
u'title': u'Learning Topic Models --- Going beyond SVD',
},
},
{
u'file': u'57758-slides.flv',
u'info_dict': {
u'title': u'Learning Topic Models --- Going beyond SVD',
},
},
],
u'params': {
# rtmp download
u'skip_download': True,
},
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
talk_id = mobj.group('id')
webpage = self._download_webpage(url, talk_id)
rtmp_url = self._search_regex(r'netConnectionUrl: \'(.*?)\'', webpage,
u'rtmp url')
play_path = self._search_regex(r'href=\'(.*?)\' [^>]*id="flowplayer_presenter"',
webpage, u'presenter play path')
title = clean_html(get_element_by_attribute('class', 'title', webpage))
video_info = {
'id': talk_id,
'title': title,
'url': rtmp_url,
'play_path': play_path,
'ext': 'flv',
}
m_slides = re.search(r'<a class="slides" href=\'(.*?)\'', webpage)
if m_slides is None:
return video_info
else:
return [
video_info,
# The slides video
{
'id': talk_id + '-slides',
'title': title,
'url': rtmp_url,
'play_path': m_slides.group(1),
'ext': 'flv',
},
]

View File

@ -7,15 +7,25 @@ from .common import InfoExtractor
class TudouIE(InfoExtractor): class TudouIE(InfoExtractor):
_VALID_URL = r'(?:http://)?(?:www\.)?tudou\.com/(?:listplay|programs)/(?:view|(.+?))/(?:([^/]+)|([^/]+))(?:\.html)?' _VALID_URL = r'(?:http://)?(?:www\.)?tudou\.com/(?:listplay|programs|albumplay)/(?:view|(.+?))/(?:([^/]+)|([^/]+))(?:\.html)?'
_TEST = { _TESTS = [{
u'url': u'http://www.tudou.com/listplay/zzdE77v6Mmo/2xN2duXMxmw.html', u'url': u'http://www.tudou.com/listplay/zzdE77v6Mmo/2xN2duXMxmw.html',
u'file': u'159448201.f4v', u'file': u'159448201.f4v',
u'md5': u'140a49ed444bd22f93330985d8475fcb', u'md5': u'140a49ed444bd22f93330985d8475fcb',
u'info_dict': { u'info_dict': {
u"title": u"卡马乔国足开大脚长传冲吊集锦" u"title": u"卡马乔国足开大脚长传冲吊集锦"
} }
} },
{
u'url': u'http://www.tudou.com/albumplay/TenTw_JgiPM/PzsAs5usU9A.html',
u'file': u'todo.mp4',
u'md5': u'todo.mp4',
u'info_dict': {
u'title': u'todo.mp4',
},
u'add_ie': [u'Youku'],
u'skip': u'Only works from China'
}]
def _url_for_id(self, id, quality = None): def _url_for_id(self, id, quality = None):
info_url = "http://v2.tudou.com/f?id="+str(id) info_url = "http://v2.tudou.com/f?id="+str(id)
@ -29,14 +39,18 @@ class TudouIE(InfoExtractor):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
video_id = mobj.group(2) video_id = mobj.group(2)
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
title = re.search(",kw:\"(.+)\"",webpage)
if title is None: m = re.search(r'vcode:\s*[\'"](.+?)[\'"]', webpage)
title = re.search(",kw: \'(.+)\'",webpage) if m and m.group(1):
title = title.group(1) return {
thumbnail_url = re.search(",pic: \'(.+?)\'",webpage) '_type': 'url',
if thumbnail_url is None: 'url': u'youku:' + m.group(1),
thumbnail_url = re.search(",pic:\"(.+?)\"",webpage) 'ie_key': 'Youku'
thumbnail_url = thumbnail_url.group(1) }
title = self._search_regex(r",kw:['\"](.+?)[\"']", webpage, u'title')
thumbnail_url = self._search_regex(
r",pic:\s*[\"'](.+?)[\"']", webpage, u'thumbnail URL', fatal=False)
segs_json = self._search_regex(r'segs: \'(.*)\'', webpage, 'segments') segs_json = self._search_regex(r'segs: \'(.*)\'', webpage, 'segments')
segments = json.loads(segs_json) segments = json.loads(segs_json)

View File

@ -1,11 +1,15 @@
import re import re
import json import json
import xml.etree.ElementTree
import datetime
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
determine_ext,
ExtractorError, ExtractorError,
) )
class VevoIE(InfoExtractor): class VevoIE(InfoExtractor):
""" """
Accepts urls from vevo.com or in the format 'vevo:{id}' Accepts urls from vevo.com or in the format 'vevo:{id}'
@ -15,11 +19,11 @@ class VevoIE(InfoExtractor):
_TEST = { _TEST = {
u'url': u'http://www.vevo.com/watch/hurts/somebody-to-die-for/GB1101300280', u'url': u'http://www.vevo.com/watch/hurts/somebody-to-die-for/GB1101300280',
u'file': u'GB1101300280.mp4', u'file': u'GB1101300280.mp4',
u'md5': u'06bea460acb744eab74a9d7dcb4bfd61',
u'info_dict': { u'info_dict': {
u"upload_date": u"20130624", u"upload_date": u"20130624",
u"uploader": u"Hurts", u"uploader": u"Hurts",
u"title": u"Somebody to Die For" u"title": u"Somebody to Die For",
u'duration': 230,
} }
} }
@ -27,27 +31,47 @@ class VevoIE(InfoExtractor):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id') video_id = mobj.group('id')
json_url = 'http://www.vevo.com/data/video/%s' % video_id json_url = 'http://videoplayer.vevo.com/VideoService/AuthenticateVideo?isrc=%s' % video_id
base_url = 'http://smil.lvl3.vevo.com'
videos_url = '%s/Video/V2/VFILE/%s/%sr.smil' % (base_url, video_id, video_id.lower())
info_json = self._download_webpage(json_url, video_id, u'Downloading json info') info_json = self._download_webpage(json_url, video_id, u'Downloading json info')
links_webpage = self._download_webpage(videos_url, video_id, u'Downloading videos urls')
self.report_extraction(video_id) self.report_extraction(video_id)
video_info = json.loads(info_json) video_info = json.loads(info_json)['video']
m_urls = list(re.finditer(r'<video src="(?P<ext>.*?):/?(?P<url>.*?)"', links_webpage)) last_version = {'version': -1}
if m_urls is None or len(m_urls) == 0: for version in video_info['videoVersions']:
raise ExtractorError(u'Unable to extract video url') # These are the HTTP downloads, other types are for different manifests
# They are sorted from worst to best quality if version['sourceType'] == 2:
m_url = m_urls[-1] if version['version'] > last_version['version']:
video_url = base_url + '/' + m_url.group('url') last_version = version
ext = m_url.group('ext') if last_version['version'] == -1:
raise ExtractorError(u'Unable to extract last version of the video')
return {'url': video_url, renditions = xml.etree.ElementTree.fromstring(last_version['data'])
'ext': ext, formats = []
'id': video_id, # Already sorted from worst to best quality
'title': video_info['title'], for rend in renditions.findall('rendition'):
'thumbnail': video_info['img'], attr = rend.attrib
'upload_date': video_info['launchDate'].replace('/',''), f_url = attr['url']
'uploader': video_info['Artists'][0]['title'], formats.append({
} 'url': f_url,
'ext': determine_ext(f_url),
'height': int(attr['frameheight']),
'width': int(attr['frameWidth']),
})
date_epoch = int(self._search_regex(
r'/Date\((\d+)\)/', video_info['launchDate'], u'launch date'))/1000
upload_date = datetime.datetime.fromtimestamp(date_epoch)
info = {
'id': video_id,
'title': video_info['title'],
'formats': formats,
'thumbnail': video_info['imageUrl'],
'upload_date': upload_date.strftime('%Y%m%d'),
'uploader': video_info['mainArtists'][0]['artistName'],
'duration': video_info['duration'],
}
# TODO: Remove when #980 has been merged
info.update(formats[-1])
return info

View File

@ -0,0 +1,30 @@
import re
from .common import InfoExtractor
from .internetvideoarchive import InternetVideoArchiveIE
from ..utils import (
compat_urlparse,
)
class VideoDetectiveIE(InfoExtractor):
_VALID_URL = r'https?://www\.videodetective\.com/[^/]+/[^/]+/(?P<id>\d+)'
_TEST = {
u'url': u'http://www.videodetective.com/movies/kick-ass-2/194487',
u'file': u'194487.mp4',
u'info_dict': {
u'title': u'KICK-ASS 2',
u'description': u'md5:65ba37ad619165afac7d432eaded6013',
u'duration': 138,
},
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id)
og_video = self._og_search_video_url(webpage)
query = compat_urlparse.urlparse(og_video).query
return self.url_result(InternetVideoArchiveIE._build_url(query),
ie=InternetVideoArchiveIE.ie_key())

View File

@ -0,0 +1,40 @@
import re
import random
from .common import InfoExtractor
class VideoPremiumIE(InfoExtractor):
_VALID_URL = r'(?:https?://)?(?:www\.)?videopremium\.tv/(?P<id>\w+)(?:/.*)?'
_TEST = {
u'url': u'http://videopremium.tv/4w7oadjsf156',
u'file': u'4w7oadjsf156.f4v',
u'info_dict': {
u"title": u"youtube-dl_test_video____a_________-BaW_jenozKc.mp4.mp4"
},
u'params': {
u'skip_download': True,
},
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
webpage_url = 'http://videopremium.tv/' + video_id
webpage = self._download_webpage(webpage_url, video_id)
self.report_extraction(video_id)
video_title = self._html_search_regex(r'<h2(?:.*?)>\s*(.+?)\s*<',
webpage, u'video title')
return [{
'id': video_id,
'url': "rtmp://e%d.md.iplay.md/play" % random.randint(1, 16),
'play_path': "mp4:%s.f4v" % video_id,
'page_url': "http://videopremium.tv/" + video_id,
'player_url': "http://videopremium.tv/uplayer/uppod.swf",
'ext': 'f4v',
'title': video_title,
}]

View File

@ -11,6 +11,7 @@ from ..utils import (
get_element_by_attribute, get_element_by_attribute,
ExtractorError, ExtractorError,
std_headers, std_headers,
unsmuggle_url,
) )
class VimeoIE(InfoExtractor): class VimeoIE(InfoExtractor):
@ -53,7 +54,7 @@ class VimeoIE(InfoExtractor):
u'title': u'Kathy Sierra: Building the minimum Badass User, Business of Software', u'title': u'Kathy Sierra: Building the minimum Badass User, Business of Software',
u'uploader': u'The BLN & Business of Software', u'uploader': u'The BLN & Business of Software',
}, },
}, }
] ]
def _login(self): def _login(self):
@ -98,6 +99,12 @@ class VimeoIE(InfoExtractor):
self._login() self._login()
def _real_extract(self, url, new_video=True): def _real_extract(self, url, new_video=True):
url, data = unsmuggle_url(url)
headers = std_headers
if data is not None:
headers = headers.copy()
headers.update(data)
# Extract ID from URL # Extract ID from URL
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
if mobj is None: if mobj is None:
@ -112,7 +119,7 @@ class VimeoIE(InfoExtractor):
url = 'https://vimeo.com/' + video_id url = 'https://vimeo.com/' + video_id
# Retrieve video webpage to extract further information # Retrieve video webpage to extract further information
request = compat_urllib_request.Request(url, None, std_headers) request = compat_urllib_request.Request(url, None, headers)
webpage = self._download_webpage(request, video_id) webpage = self._download_webpage(request, video_id)
# Now we begin extracting as much information as we can from what we # Now we begin extracting as much information as we can from what we

View File

@ -0,0 +1,59 @@
# coding: utf-8
import re
from ..utils import (
compat_urllib_request,
compat_urllib_parse
)
from .common import InfoExtractor
class WeBSurgIE(InfoExtractor):
IE_NAME = u'websurg.com'
_VALID_URL = r'http://.*?\.websurg\.com/MEDIA/\?noheader=1&doi=(.*)'
_TEST = {
u'url': u'http://www.websurg.com/MEDIA/?noheader=1&doi=vd01en4012',
u'file': u'vd01en4012.mp4',
u'params': {
u'skip_download': True,
},
u'skip': u'Requires login information',
}
_LOGIN_URL = 'http://www.websurg.com/inc/login/login_div.ajax.php?login=1'
def _real_initialize(self):
login_form = {
'username': self._downloader.params['username'],
'password': self._downloader.params['password'],
'Submit': 1
}
request = compat_urllib_request.Request(
self._LOGIN_URL, compat_urllib_parse.urlencode(login_form))
request.add_header(
'Content-Type', 'application/x-www-form-urlencoded;charset=utf-8')
compat_urllib_request.urlopen(request).info()
webpage = self._download_webpage(self._LOGIN_URL, '', 'Logging in')
if webpage != 'OK':
self._downloader.report_error(
u'Unable to log in: bad username/password')
def _real_extract(self, url):
video_id = re.match(self._VALID_URL, url).group(1)
webpage = self._download_webpage(url, video_id)
url_info = re.search(r'streamer="(.*?)" src="(.*?)"', webpage)
return {'id': video_id,
'title': self._og_search_title(webpage),
'description': self._og_search_description(webpage),
'ext' : 'mp4',
'url' : url_info.group(1) + '/' + url_info.group(2),
'thumbnail': self._og_search_thumbnail(webpage)
}

View File

@ -50,6 +50,21 @@ class YahooIE(InfoExtractor):
webpage, u'items', flags=re.MULTILINE) webpage, u'items', flags=re.MULTILINE)
items = json.loads(items_json) items = json.loads(items_json)
info = items['mediaItems']['query']['results']['mediaObj'][0] info = items['mediaItems']['query']['results']['mediaObj'][0]
# The 'meta' field is not always in the video webpage, we request it
# from another page
long_id = info['id']
query = ('SELECT * FROM yahoo.media.video.streams WHERE id="%s"'
' AND plrs="86Gj0vCaSzV_Iuf6hNylf2"' % long_id)
data = compat_urllib_parse.urlencode({
'q': query,
'env': 'prod',
'format': 'json',
})
query_result_json = self._download_webpage(
'http://video.query.yahoo.com/v1/public/yql?' + data,
video_id, u'Downloading video info')
query_result = json.loads(query_result_json)
info = query_result['query']['results']['mediaObj'][0]
meta = info['meta'] meta = info['meta']
formats = [] formats = []

View File

@ -13,7 +13,7 @@ from ..utils import (
class YoukuIE(InfoExtractor): class YoukuIE(InfoExtractor):
_VALID_URL = r'(?:http://)?(v|player)\.youku\.com/(v_show/id_|player\.php/sid/)(?P<ID>[A-Za-z0-9]+)(\.html|/v.swf)' _VALID_URL = r'(?:(?:http://)?(?:v|player)\.youku\.com/(?:v_show/id_|player\.php/sid/)|youku:)(?P<ID>[A-Za-z0-9]+)(?:\.html|/v\.swf|)'
_TEST = { _TEST = {
u"url": u"http://v.youku.com/v_show/id_XNDgyMDQ2NTQw.html", u"url": u"http://v.youku.com/v_show/id_XNDgyMDQ2NTQw.html",
u"file": u"XNDgyMDQ2NTQw_part00.flv", u"file": u"XNDgyMDQ2NTQw_part00.flv",

View File

@ -1116,6 +1116,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
'lang': lang, 'lang': lang,
'v': video_id, 'v': video_id,
'fmt': self._downloader.params.get('subtitlesformat'), 'fmt': self._downloader.params.get('subtitlesformat'),
'name': l[0],
}) })
url = u'http://www.youtube.com/api/timedtext?' + params url = u'http://www.youtube.com/api/timedtext?' + params
sub_lang_list[lang] = url sub_lang_list[lang] = url
@ -1149,7 +1150,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
list_page = self._download_webpage(list_url, video_id) list_page = self._download_webpage(list_url, video_id)
caption_list = xml.etree.ElementTree.fromstring(list_page.encode('utf-8')) caption_list = xml.etree.ElementTree.fromstring(list_page.encode('utf-8'))
original_lang_node = caption_list.find('track') original_lang_node = caption_list.find('track')
if original_lang_node.attrib.get('kind') != 'asr' : if not original_lang_node or original_lang_node.attrib.get('kind') != 'asr' :
self._downloader.report_warning(u'Video doesn\'t have automatic captions') self._downloader.report_warning(u'Video doesn\'t have automatic captions')
return {} return {}
original_lang = original_lang_node.attrib['lang_code'] original_lang = original_lang_node.attrib['lang_code']
@ -1249,6 +1250,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
url_map[itag] = format_url url_map[itag] = format_url
return url_map return url_map
def _extract_annotations(self, video_id):
url = 'https://www.youtube.com/annotations_invideo?features=1&legacy=1&video_id=%s' % video_id
return self._download_webpage(url, video_id, note=u'Searching for annotations.', errnote=u'Unable to download video annotations.')
def _real_extract(self, url): def _real_extract(self, url):
# Extract original video URL from URL with redirection, like age verification, using next_url parameter # Extract original video URL from URL with redirection, like age verification, using next_url parameter
mobj = re.search(self._NEXT_URL_RE, url) mobj = re.search(self._NEXT_URL_RE, url)
@ -1381,6 +1386,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
else: else:
video_duration = compat_urllib_parse.unquote_plus(video_info['length_seconds'][0]) video_duration = compat_urllib_parse.unquote_plus(video_info['length_seconds'][0])
# annotations
video_annotations = None
if self._downloader.params.get('writeannotations', False):
video_annotations = self._extract_annotations(video_id)
# Decide which formats to download # Decide which formats to download
try: try:
@ -1494,6 +1504,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
'subtitles': video_subtitles, 'subtitles': video_subtitles,
'duration': video_duration, 'duration': video_duration,
'age_limit': 18 if age_gate else 0, 'age_limit': 18 if age_gate else 0,
'annotations': video_annotations
}) })
return results return results
@ -1634,7 +1645,7 @@ class YoutubeChannelIE(InfoExtractor):
class YoutubeUserIE(InfoExtractor): class YoutubeUserIE(InfoExtractor):
IE_DESC = u'YouTube.com user videos (URL or "ytuser" keyword)' IE_DESC = u'YouTube.com user videos (URL or "ytuser" keyword)'
_VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?youtube\.com/(?:user/)?(?!watch(?:$|[^a-z_A-Z0-9-])))|ytuser:)(?!feed/)([A-Za-z0-9_-]+)' _VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?youtube\.com/(?:user/)?(?!(?:attribution_link|watch)(?:$|[^a-z_A-Z0-9-])))|ytuser:)(?!feed/)([A-Za-z0-9_-]+)'
_TEMPLATE_URL = 'http://gdata.youtube.com/feeds/api/users/%s' _TEMPLATE_URL = 'http://gdata.youtube.com/feeds/api/users/%s'
_GDATA_PAGE_SIZE = 50 _GDATA_PAGE_SIZE = 50
_GDATA_URL = 'http://gdata.youtube.com/feeds/api/users/%s/uploads?max-results=%d&start-index=%d&alt=json' _GDATA_URL = 'http://gdata.youtube.com/feeds/api/users/%s/uploads?max-results=%d&start-index=%d&alt=json'

View File

@ -9,6 +9,7 @@ import io
import json import json
import locale import locale
import os import os
import pipes
import platform import platform
import re import re
import socket import socket
@ -229,6 +230,19 @@ else:
return f return f
return None return None
# On python2.6 the xml.etree.ElementTree.Element methods don't support
# the namespace parameter
def xpath_with_ns(path, ns_map):
components = [c.split(':') for c in path.split('/')]
replaced = []
for c in components:
if len(c) == 1:
replaced.append(c[0])
else:
ns, tag = c
replaced.append('{%s}%s' % (ns_map[ns], tag))
return '/'.join(replaced)
def htmlentity_transform(matchobj): def htmlentity_transform(matchobj):
"""Transforms an HTML entity to a character. """Transforms an HTML entity to a character.
@ -715,6 +729,7 @@ def unified_strdate(date_str):
'%Y/%m/%d %H:%M:%S', '%Y/%m/%d %H:%M:%S',
'%d.%m.%Y %H:%M', '%d.%m.%Y %H:%M',
'%Y-%m-%dT%H:%M:%SZ', '%Y-%m-%dT%H:%M:%SZ',
'%Y-%m-%dT%H:%M:%S',
] ]
for expression in format_expressions: for expression in format_expressions:
try: try:
@ -926,3 +941,24 @@ class locked_file(object):
def read(self, *args): def read(self, *args):
return self.f.read(*args) return self.f.read(*args)
def shell_quote(args):
return ' '.join(map(pipes.quote, args))
def smuggle_url(url, data):
""" Pass additional data in a URL for internal use. """
sdata = compat_urllib_parse.urlencode(
{u'__youtubedl_smuggle': json.dumps(data)})
return url + u'#' + sdata
def unsmuggle_url(smug_url):
if not '#__youtubedl_smuggle' in smug_url:
return smug_url, None
url, _, sdata = smug_url.rpartition(u'#')
jsond = compat_parse_qs(sdata)[u'__youtubedl_smuggle'][0]
data = json.loads(jsond)
return url, data

View File

@ -1,2 +1,2 @@
__version__ = '2013.10.07' __version__ = '2013.10.17'