Merge branch 'master' into multipart_videos

Conflicts:
	youtube_dl/extractor/mtv.py
This commit is contained in:
Mark Lee 2015-02-28 16:13:21 -08:00
commit 07516b8835
74 changed files with 2068 additions and 804 deletions

View File

@ -111,3 +111,5 @@ Paul Hartmann
Frans de Jonge Frans de Jonge
Robin de Rooij Robin de Rooij
Ryan Schmidt Ryan Schmidt
Leslie P. Polzer
Duncan Keall

View File

@ -2,6 +2,7 @@ all: youtube-dl README.md CONTRIBUTING.md README.txt youtube-dl.1 youtube-dl.bas
clean: clean:
rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish *.dump *.part *.info.json *.mp4 *.flv *.mp3 *.avi CONTRIBUTING.md.tmp youtube-dl youtube-dl.exe rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish *.dump *.part *.info.json *.mp4 *.flv *.mp3 *.avi CONTRIBUTING.md.tmp youtube-dl youtube-dl.exe
find -name "*.pyc" -delete
PREFIX ?= /usr/local PREFIX ?= /usr/local
BINDIR ?= $(PREFIX)/bin BINDIR ?= $(PREFIX)/bin
@ -43,7 +44,7 @@ test:
ot: offlinetest ot: offlinetest
offlinetest: codetest offlinetest: codetest
nosetests --verbose test --exclude test_download --exclude test_age_restriction --exclude test_subtitles --exclude test_write_annotations --exclude test_youtube_lists nosetests --verbose test --exclude test_download.py --exclude test_age_restriction.py --exclude test_subtitles.py --exclude test_write_annotations.py --exclude test_youtube_lists.py
tar: youtube-dl.tar.gz tar: youtube-dl.tar.gz

View File

@ -139,6 +139,8 @@ which means you can modify it, redistribute it or use it however you like.
dislike_count <? 50 & description" . dislike_count <? 50 & description" .
--no-playlist If the URL refers to a video and a --no-playlist If the URL refers to a video and a
playlist, download only the video. playlist, download only the video.
--yes-playlist If the URL refers to a video and a
playlist, download the playlist.
--age-limit YEARS download only videos suitable for the given --age-limit YEARS download only videos suitable for the given
age age
--download-archive FILE Download only videos not listed in the --download-archive FILE Download only videos not listed in the
@ -351,8 +353,8 @@ which means you can modify it, redistribute it or use it however you like.
--all-subs downloads all the available subtitles of --all-subs downloads all the available subtitles of
the video the video
--list-subs lists all available subtitles for the video --list-subs lists all available subtitles for the video
--sub-format FORMAT subtitle format (default=srt) ([sbv/vtt] --sub-format FORMAT subtitle format, accepts formats
youtube only) preference, for example: "ass/srt/best"
--sub-lang LANGS languages of the subtitles to download --sub-lang LANGS languages of the subtitles to download
(optional) separated by commas, use IETF (optional) separated by commas, use IETF
language tags like 'en,pt' language tags like 'en,pt'
@ -406,6 +408,8 @@ which means you can modify it, redistribute it or use it however you like.
downloading, similar to find's -exec downloading, similar to find's -exec
syntax. Example: --exec 'adb push {} syntax. Example: --exec 'adb push {}
/sdcard/Music/ && rm {}' /sdcard/Music/ && rm {}'
--convert-subtitles FORMAT Convert the subtitles to other format
(currently supported: srt|ass|vtt)
# CONFIGURATION # CONFIGURATION

View File

@ -45,12 +45,12 @@ for test in get_testcases():
RESULT = ('.' + domain + '\n' in LIST or '\n' + domain + '\n' in LIST) RESULT = ('.' + domain + '\n' in LIST or '\n' + domain + '\n' in LIST)
if RESULT and ('info_dict' not in test or 'age_limit' not in test['info_dict'] if RESULT and ('info_dict' not in test or 'age_limit' not in test['info_dict'] or
or test['info_dict']['age_limit'] != 18): test['info_dict']['age_limit'] != 18):
print('\nPotential missing age_limit check: {0}'.format(test['name'])) print('\nPotential missing age_limit check: {0}'.format(test['name']))
elif not RESULT and ('info_dict' in test and 'age_limit' in test['info_dict'] elif not RESULT and ('info_dict' in test and 'age_limit' in test['info_dict'] and
and test['info_dict']['age_limit'] == 18): test['info_dict']['age_limit'] == 18):
print('\nPotential false negative: {0}'.format(test['name'])) print('\nPotential false negative: {0}'.format(test['name']))
else: else:

View File

@ -17,6 +17,7 @@
- **AdultSwim** - **AdultSwim**
- **Aftenposten** - **Aftenposten**
- **Aftonbladet** - **Aftonbladet**
- **AirMozilla**
- **AlJazeera** - **AlJazeera**
- **Allocine** - **Allocine**
- **AlphaPorno** - **AlphaPorno**
@ -72,6 +73,8 @@
- **CeskaTelevize** - **CeskaTelevize**
- **channel9**: Channel 9 - **channel9**: Channel 9
- **Chilloutzone** - **Chilloutzone**
- **chirbit**
- **chirbit:profile**
- **Cinchcast** - **Cinchcast**
- **Cinemassacre** - **Cinemassacre**
- **clipfish** - **clipfish**
@ -207,6 +210,7 @@
- **Jove** - **Jove**
- **jpopsuki.tv** - **jpopsuki.tv**
- **Jukebox** - **Jukebox**
- **Kaltura**
- **Kankan** - **Kankan**
- **Karaoketv** - **Karaoketv**
- **keek** - **keek**
@ -218,6 +222,9 @@
- **Ku6** - **Ku6**
- **la7.tv** - **la7.tv**
- **Laola1Tv** - **Laola1Tv**
- **Letv**
- **LetvPlaylist**
- **LetvTv**
- **lifenews**: LIFE | NEWS - **lifenews**: LIFE | NEWS
- **LiveLeak** - **LiveLeak**
- **livestream** - **livestream**
@ -302,6 +309,7 @@
- **Nuvid** - **Nuvid**
- **NYTimes** - **NYTimes**
- **ocw.mit.edu** - **ocw.mit.edu**
- **Odnoklassniki**
- **OktoberfestTV** - **OktoberfestTV**
- **on.aol.com** - **on.aol.com**
- **Ooyala** - **Ooyala**
@ -328,8 +336,10 @@
- **PornoXO** - **PornoXO**
- **PromptFile** - **PromptFile**
- **prosiebensat1**: ProSiebenSat.1 Digital - **prosiebensat1**: ProSiebenSat.1 Digital
- **Puls4**
- **Pyvideo** - **Pyvideo**
- **QuickVid** - **QuickVid**
- **R7**
- **radio.de** - **radio.de**
- **radiobremen** - **radiobremen**
- **radiofrance** - **radiofrance**
@ -385,7 +395,8 @@
- **soundcloud:playlist** - **soundcloud:playlist**
- **soundcloud:set** - **soundcloud:set**
- **soundcloud:user** - **soundcloud:user**
- **Soundgasm** - **soundgasm**
- **soundgasm:profile**
- **southpark.cc.com** - **southpark.cc.com**
- **southpark.de** - **southpark.de**
- **Space** - **Space**
@ -404,7 +415,7 @@
- **StreamCZ** - **StreamCZ**
- **StreetVoice** - **StreetVoice**
- **SunPorno** - **SunPorno**
- **SVTPlay** - **SVTPlay**: SVT Play and Öppet arkiv
- **SWRMediathek** - **SWRMediathek**
- **Syfy** - **Syfy**
- **SztvHu** - **SztvHu**
@ -559,6 +570,7 @@
- **youtube:subscriptions**: YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication) - **youtube:subscriptions**: YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)
- **youtube:user**: YouTube.com user videos (URL or "ytuser" keyword) - **youtube:user**: YouTube.com user videos (URL or "ytuser" keyword)
- **youtube:watch_later**: Youtube watch later list, ":ytwatchlater" for short (requires authentication) - **youtube:watch_later**: Youtube watch later list, ":ytwatchlater" for short (requires authentication)
- **Zapiks**
- **ZDF** - **ZDF**
- **ZDFChannel** - **ZDFChannel**
- **zingmp3:album**: mp3.zing.vn albums - **zingmp3:album**: mp3.zing.vn albums

View File

@ -28,7 +28,7 @@
"retries": 10, "retries": 10,
"simulate": false, "simulate": false,
"subtitleslang": null, "subtitleslang": null,
"subtitlesformat": "srt", "subtitlesformat": "best",
"test": true, "test": true,
"updatetime": true, "updatetime": true,
"usenetrc": false, "usenetrc": false,

View File

@ -337,6 +337,65 @@ class TestFormatSelection(unittest.TestCase):
downloaded = ydl.downloaded_info_dicts[0] downloaded = ydl.downloaded_info_dicts[0]
self.assertEqual(downloaded['format_id'], 'G') self.assertEqual(downloaded['format_id'], 'G')
def test_subtitles(self):
def s_formats(lang, autocaption=False):
return [{
'ext': ext,
'url': 'http://localhost/video.%s.%s' % (lang, ext),
'_auto': autocaption,
} for ext in ['vtt', 'srt', 'ass']]
subtitles = dict((l, s_formats(l)) for l in ['en', 'fr', 'es'])
auto_captions = dict((l, s_formats(l, True)) for l in ['it', 'pt', 'es'])
info_dict = {
'id': 'test',
'title': 'Test',
'url': 'http://localhost/video.mp4',
'subtitles': subtitles,
'automatic_captions': auto_captions,
'extractor': 'TEST',
}
def get_info(params={}):
params.setdefault('simulate', True)
ydl = YDL(params)
ydl.report_warning = lambda *args, **kargs: None
return ydl.process_video_result(info_dict, download=False)
result = get_info()
self.assertFalse(result.get('requested_subtitles'))
self.assertEqual(result['subtitles'], subtitles)
self.assertEqual(result['automatic_captions'], auto_captions)
result = get_info({'writesubtitles': True})
subs = result['requested_subtitles']
self.assertTrue(subs)
self.assertEqual(set(subs.keys()), set(['en']))
self.assertTrue(subs['en'].get('data') is None)
self.assertEqual(subs['en']['ext'], 'ass')
result = get_info({'writesubtitles': True, 'subtitlesformat': 'foo/srt'})
subs = result['requested_subtitles']
self.assertEqual(subs['en']['ext'], 'srt')
result = get_info({'writesubtitles': True, 'subtitleslangs': ['es', 'fr', 'it']})
subs = result['requested_subtitles']
self.assertTrue(subs)
self.assertEqual(set(subs.keys()), set(['es', 'fr']))
result = get_info({'writesubtitles': True, 'writeautomaticsub': True, 'subtitleslangs': ['es', 'pt']})
subs = result['requested_subtitles']
self.assertTrue(subs)
self.assertEqual(set(subs.keys()), set(['es', 'pt']))
self.assertFalse(subs['es']['_auto'])
self.assertTrue(subs['pt']['_auto'])
result = get_info({'writeautomaticsub': True, 'subtitleslangs': ['es', 'pt']})
subs = result['requested_subtitles']
self.assertTrue(subs)
self.assertEqual(set(subs.keys()), set(['es', 'pt']))
self.assertTrue(subs['es']['_auto'])
self.assertTrue(subs['pt']['_auto'])
def test_add_extra_info(self): def test_add_extra_info(self):
test_dict = { test_dict = {
'extractor': 'Foo', 'extractor': 'Foo',

View File

@ -18,6 +18,14 @@ from youtube_dl.extractor import (
VimeoIE, VimeoIE,
WallaIE, WallaIE,
CeskaTelevizeIE, CeskaTelevizeIE,
LyndaIE,
NPOIE,
ComedyCentralIE,
NRKTVIE,
RaiIE,
VikiIE,
ThePlatformIE,
RTVEALaCartaIE,
) )
@ -27,42 +35,38 @@ class BaseTestSubtitles(unittest.TestCase):
def setUp(self): def setUp(self):
self.DL = FakeYDL() self.DL = FakeYDL()
self.ie = self.IE(self.DL) self.ie = self.IE()
self.DL.add_info_extractor(self.ie)
def getInfoDict(self): def getInfoDict(self):
info_dict = self.ie.extract(self.url) info_dict = self.DL.extract_info(self.url, download=False)
return info_dict return info_dict
def getSubtitles(self): def getSubtitles(self):
info_dict = self.getInfoDict() info_dict = self.getInfoDict()
return info_dict['subtitles'] subtitles = info_dict['requested_subtitles']
if not subtitles:
return subtitles
for sub_info in subtitles.values():
if sub_info.get('data') is None:
uf = self.DL.urlopen(sub_info['url'])
sub_info['data'] = uf.read().decode('utf-8')
return dict((l, sub_info['data']) for l, sub_info in subtitles.items())
class TestYoutubeSubtitles(BaseTestSubtitles): class TestYoutubeSubtitles(BaseTestSubtitles):
url = 'QRS8MkLhQmM' url = 'QRS8MkLhQmM'
IE = YoutubeIE IE = YoutubeIE
def test_youtube_no_writesubtitles(self):
self.DL.params['writesubtitles'] = False
subtitles = self.getSubtitles()
self.assertEqual(subtitles, None)
def test_youtube_subtitles(self):
self.DL.params['writesubtitles'] = True
subtitles = self.getSubtitles()
self.assertEqual(md5(subtitles['en']), '4cd9278a35ba2305f47354ee13472260')
def test_youtube_subtitles_lang(self):
self.DL.params['writesubtitles'] = True
self.DL.params['subtitleslangs'] = ['it']
subtitles = self.getSubtitles()
self.assertEqual(md5(subtitles['it']), '164a51f16f260476a05b50fe4c2f161d')
def test_youtube_allsubtitles(self): def test_youtube_allsubtitles(self):
self.DL.params['writesubtitles'] = True self.DL.params['writesubtitles'] = True
self.DL.params['allsubtitles'] = True self.DL.params['allsubtitles'] = True
subtitles = self.getSubtitles() subtitles = self.getSubtitles()
self.assertEqual(len(subtitles.keys()), 13) self.assertEqual(len(subtitles.keys()), 13)
self.assertEqual(md5(subtitles['en']), '4cd9278a35ba2305f47354ee13472260')
self.assertEqual(md5(subtitles['it']), '164a51f16f260476a05b50fe4c2f161d')
for lang in ['it', 'fr', 'de']:
self.assertTrue(subtitles.get(lang) is not None, 'Subtitles for \'%s\' not extracted' % lang)
def test_youtube_subtitles_sbv_format(self): def test_youtube_subtitles_sbv_format(self):
self.DL.params['writesubtitles'] = True self.DL.params['writesubtitles'] = True
@ -76,12 +80,6 @@ class TestYoutubeSubtitles(BaseTestSubtitles):
subtitles = self.getSubtitles() subtitles = self.getSubtitles()
self.assertEqual(md5(subtitles['en']), '3cb210999d3e021bd6c7f0ea751eab06') self.assertEqual(md5(subtitles['en']), '3cb210999d3e021bd6c7f0ea751eab06')
def test_youtube_list_subtitles(self):
self.DL.expect_warning('Video doesn\'t have automatic captions')
self.DL.params['listsubtitles'] = True
info_dict = self.getInfoDict()
self.assertEqual(info_dict, None)
def test_youtube_automatic_captions(self): def test_youtube_automatic_captions(self):
self.url = '8YoUxe5ncPo' self.url = '8YoUxe5ncPo'
self.DL.params['writeautomaticsub'] = True self.DL.params['writeautomaticsub'] = True
@ -103,55 +101,22 @@ class TestYoutubeSubtitles(BaseTestSubtitles):
self.DL.params['writesubtitles'] = True self.DL.params['writesubtitles'] = True
self.DL.params['allsubtitles'] = True self.DL.params['allsubtitles'] = True
subtitles = self.getSubtitles() subtitles = self.getSubtitles()
self.assertEqual(len(subtitles), 0) self.assertFalse(subtitles)
def test_youtube_multiple_langs(self):
self.url = 'QRS8MkLhQmM'
self.DL.params['writesubtitles'] = True
langs = ['it', 'fr', 'de']
self.DL.params['subtitleslangs'] = langs
subtitles = self.getSubtitles()
for lang in langs:
self.assertTrue(subtitles.get(lang) is not None, 'Subtitles for \'%s\' not extracted' % lang)
class TestDailymotionSubtitles(BaseTestSubtitles): class TestDailymotionSubtitles(BaseTestSubtitles):
url = 'http://www.dailymotion.com/video/xczg00' url = 'http://www.dailymotion.com/video/xczg00'
IE = DailymotionIE IE = DailymotionIE
def test_no_writesubtitles(self):
subtitles = self.getSubtitles()
self.assertEqual(subtitles, None)
def test_subtitles(self):
self.DL.params['writesubtitles'] = True
subtitles = self.getSubtitles()
self.assertEqual(md5(subtitles['en']), '976553874490cba125086bbfea3ff76f')
def test_subtitles_lang(self):
self.DL.params['writesubtitles'] = True
self.DL.params['subtitleslangs'] = ['fr']
subtitles = self.getSubtitles()
self.assertEqual(md5(subtitles['fr']), '594564ec7d588942e384e920e5341792')
def test_allsubtitles(self): def test_allsubtitles(self):
self.DL.params['writesubtitles'] = True self.DL.params['writesubtitles'] = True
self.DL.params['allsubtitles'] = True self.DL.params['allsubtitles'] = True
subtitles = self.getSubtitles() subtitles = self.getSubtitles()
self.assertTrue(len(subtitles.keys()) >= 6) self.assertTrue(len(subtitles.keys()) >= 6)
self.assertEqual(md5(subtitles['en']), '976553874490cba125086bbfea3ff76f')
def test_list_subtitles(self): self.assertEqual(md5(subtitles['fr']), '594564ec7d588942e384e920e5341792')
self.DL.expect_warning('Automatic Captions not supported by this server') for lang in ['es', 'fr', 'de']:
self.DL.params['listsubtitles'] = True self.assertTrue(subtitles.get(lang) is not None, 'Subtitles for \'%s\' not extracted' % lang)
info_dict = self.getInfoDict()
self.assertEqual(info_dict, None)
def test_automatic_captions(self):
self.DL.expect_warning('Automatic Captions not supported by this server')
self.DL.params['writeautomaticsub'] = True
self.DL.params['subtitleslang'] = ['en']
subtitles = self.getSubtitles()
self.assertTrue(len(subtitles.keys()) == 0)
def test_nosubtitles(self): def test_nosubtitles(self):
self.DL.expect_warning('video doesn\'t have subtitles') self.DL.expect_warning('video doesn\'t have subtitles')
@ -159,61 +124,21 @@ class TestDailymotionSubtitles(BaseTestSubtitles):
self.DL.params['writesubtitles'] = True self.DL.params['writesubtitles'] = True
self.DL.params['allsubtitles'] = True self.DL.params['allsubtitles'] = True
subtitles = self.getSubtitles() subtitles = self.getSubtitles()
self.assertEqual(len(subtitles), 0) self.assertFalse(subtitles)
def test_multiple_langs(self):
self.DL.params['writesubtitles'] = True
langs = ['es', 'fr', 'de']
self.DL.params['subtitleslangs'] = langs
subtitles = self.getSubtitles()
for lang in langs:
self.assertTrue(subtitles.get(lang) is not None, 'Subtitles for \'%s\' not extracted' % lang)
class TestTedSubtitles(BaseTestSubtitles): class TestTedSubtitles(BaseTestSubtitles):
url = 'http://www.ted.com/talks/dan_dennett_on_our_consciousness.html' url = 'http://www.ted.com/talks/dan_dennett_on_our_consciousness.html'
IE = TEDIE IE = TEDIE
def test_no_writesubtitles(self):
subtitles = self.getSubtitles()
self.assertEqual(subtitles, None)
def test_subtitles(self):
self.DL.params['writesubtitles'] = True
subtitles = self.getSubtitles()
self.assertEqual(md5(subtitles['en']), '4262c1665ff928a2dada178f62cb8d14')
def test_subtitles_lang(self):
self.DL.params['writesubtitles'] = True
self.DL.params['subtitleslangs'] = ['fr']
subtitles = self.getSubtitles()
self.assertEqual(md5(subtitles['fr']), '66a63f7f42c97a50f8c0e90bc7797bb5')
def test_allsubtitles(self): def test_allsubtitles(self):
self.DL.params['writesubtitles'] = True self.DL.params['writesubtitles'] = True
self.DL.params['allsubtitles'] = True self.DL.params['allsubtitles'] = True
subtitles = self.getSubtitles() subtitles = self.getSubtitles()
self.assertTrue(len(subtitles.keys()) >= 28) self.assertTrue(len(subtitles.keys()) >= 28)
self.assertEqual(md5(subtitles['en']), '4262c1665ff928a2dada178f62cb8d14')
def test_list_subtitles(self): self.assertEqual(md5(subtitles['fr']), '66a63f7f42c97a50f8c0e90bc7797bb5')
self.DL.expect_warning('Automatic Captions not supported by this server') for lang in ['es', 'fr', 'de']:
self.DL.params['listsubtitles'] = True
info_dict = self.getInfoDict()
self.assertEqual(info_dict, None)
def test_automatic_captions(self):
self.DL.expect_warning('Automatic Captions not supported by this server')
self.DL.params['writeautomaticsub'] = True
self.DL.params['subtitleslang'] = ['en']
subtitles = self.getSubtitles()
self.assertTrue(len(subtitles.keys()) == 0)
def test_multiple_langs(self):
self.DL.params['writesubtitles'] = True
langs = ['es', 'fr', 'de']
self.DL.params['subtitleslangs'] = langs
subtitles = self.getSubtitles()
for lang in langs:
self.assertTrue(subtitles.get(lang) is not None, 'Subtitles for \'%s\' not extracted' % lang) self.assertTrue(subtitles.get(lang) is not None, 'Subtitles for \'%s\' not extracted' % lang)
@ -221,14 +146,7 @@ class TestBlipTVSubtitles(BaseTestSubtitles):
url = 'http://blip.tv/a/a-6603250' url = 'http://blip.tv/a/a-6603250'
IE = BlipTVIE IE = BlipTVIE
def test_list_subtitles(self):
self.DL.expect_warning('Automatic Captions not supported by this server')
self.DL.params['listsubtitles'] = True
info_dict = self.getInfoDict()
self.assertEqual(info_dict, None)
def test_allsubtitles(self): def test_allsubtitles(self):
self.DL.expect_warning('Automatic Captions not supported by this server')
self.DL.params['writesubtitles'] = True self.DL.params['writesubtitles'] = True
self.DL.params['allsubtitles'] = True self.DL.params['allsubtitles'] = True
subtitles = self.getSubtitles() subtitles = self.getSubtitles()
@ -240,39 +158,13 @@ class TestVimeoSubtitles(BaseTestSubtitles):
url = 'http://vimeo.com/76979871' url = 'http://vimeo.com/76979871'
IE = VimeoIE IE = VimeoIE
def test_no_writesubtitles(self):
subtitles = self.getSubtitles()
self.assertEqual(subtitles, None)
def test_subtitles(self):
self.DL.params['writesubtitles'] = True
subtitles = self.getSubtitles()
self.assertEqual(md5(subtitles['en']), '8062383cf4dec168fc40a088aa6d5888')
def test_subtitles_lang(self):
self.DL.params['writesubtitles'] = True
self.DL.params['subtitleslangs'] = ['fr']
subtitles = self.getSubtitles()
self.assertEqual(md5(subtitles['fr']), 'b6191146a6c5d3a452244d853fde6dc8')
def test_allsubtitles(self): def test_allsubtitles(self):
self.DL.params['writesubtitles'] = True self.DL.params['writesubtitles'] = True
self.DL.params['allsubtitles'] = True self.DL.params['allsubtitles'] = True
subtitles = self.getSubtitles() subtitles = self.getSubtitles()
self.assertEqual(set(subtitles.keys()), set(['de', 'en', 'es', 'fr'])) self.assertEqual(set(subtitles.keys()), set(['de', 'en', 'es', 'fr']))
self.assertEqual(md5(subtitles['en']), '8062383cf4dec168fc40a088aa6d5888')
def test_list_subtitles(self): self.assertEqual(md5(subtitles['fr']), 'b6191146a6c5d3a452244d853fde6dc8')
self.DL.expect_warning('Automatic Captions not supported by this server')
self.DL.params['listsubtitles'] = True
info_dict = self.getInfoDict()
self.assertEqual(info_dict, None)
def test_automatic_captions(self):
self.DL.expect_warning('Automatic Captions not supported by this server')
self.DL.params['writeautomaticsub'] = True
self.DL.params['subtitleslang'] = ['en']
subtitles = self.getSubtitles()
self.assertTrue(len(subtitles.keys()) == 0)
def test_nosubtitles(self): def test_nosubtitles(self):
self.DL.expect_warning('video doesn\'t have subtitles') self.DL.expect_warning('video doesn\'t have subtitles')
@ -280,27 +172,13 @@ class TestVimeoSubtitles(BaseTestSubtitles):
self.DL.params['writesubtitles'] = True self.DL.params['writesubtitles'] = True
self.DL.params['allsubtitles'] = True self.DL.params['allsubtitles'] = True
subtitles = self.getSubtitles() subtitles = self.getSubtitles()
self.assertEqual(len(subtitles), 0) self.assertFalse(subtitles)
def test_multiple_langs(self):
self.DL.params['writesubtitles'] = True
langs = ['es', 'fr', 'de']
self.DL.params['subtitleslangs'] = langs
subtitles = self.getSubtitles()
for lang in langs:
self.assertTrue(subtitles.get(lang) is not None, 'Subtitles for \'%s\' not extracted' % lang)
class TestWallaSubtitles(BaseTestSubtitles): class TestWallaSubtitles(BaseTestSubtitles):
url = 'http://vod.walla.co.il/movie/2705958/the-yes-men' url = 'http://vod.walla.co.il/movie/2705958/the-yes-men'
IE = WallaIE IE = WallaIE
def test_list_subtitles(self):
self.DL.expect_warning('Automatic Captions not supported by this server')
self.DL.params['listsubtitles'] = True
info_dict = self.getInfoDict()
self.assertEqual(info_dict, None)
def test_allsubtitles(self): def test_allsubtitles(self):
self.DL.expect_warning('Automatic Captions not supported by this server') self.DL.expect_warning('Automatic Captions not supported by this server')
self.DL.params['writesubtitles'] = True self.DL.params['writesubtitles'] = True
@ -315,19 +193,13 @@ class TestWallaSubtitles(BaseTestSubtitles):
self.DL.params['writesubtitles'] = True self.DL.params['writesubtitles'] = True
self.DL.params['allsubtitles'] = True self.DL.params['allsubtitles'] = True
subtitles = self.getSubtitles() subtitles = self.getSubtitles()
self.assertEqual(len(subtitles), 0) self.assertFalse(subtitles)
class TestCeskaTelevizeSubtitles(BaseTestSubtitles): class TestCeskaTelevizeSubtitles(BaseTestSubtitles):
url = 'http://www.ceskatelevize.cz/ivysilani/10600540290-u6-uzasny-svet-techniky' url = 'http://www.ceskatelevize.cz/ivysilani/10600540290-u6-uzasny-svet-techniky'
IE = CeskaTelevizeIE IE = CeskaTelevizeIE
def test_list_subtitles(self):
self.DL.expect_warning('Automatic Captions not supported by this server')
self.DL.params['listsubtitles'] = True
info_dict = self.getInfoDict()
self.assertEqual(info_dict, None)
def test_allsubtitles(self): def test_allsubtitles(self):
self.DL.expect_warning('Automatic Captions not supported by this server') self.DL.expect_warning('Automatic Captions not supported by this server')
self.DL.params['writesubtitles'] = True self.DL.params['writesubtitles'] = True
@ -342,7 +214,110 @@ class TestCeskaTelevizeSubtitles(BaseTestSubtitles):
self.DL.params['writesubtitles'] = True self.DL.params['writesubtitles'] = True
self.DL.params['allsubtitles'] = True self.DL.params['allsubtitles'] = True
subtitles = self.getSubtitles() subtitles = self.getSubtitles()
self.assertEqual(len(subtitles), 0) self.assertFalse(subtitles)
class TestLyndaSubtitles(BaseTestSubtitles):
url = 'http://www.lynda.com/Bootstrap-tutorials/Using-exercise-files/110885/114408-4.html'
IE = LyndaIE
def test_allsubtitles(self):
self.DL.params['writesubtitles'] = True
self.DL.params['allsubtitles'] = True
subtitles = self.getSubtitles()
self.assertEqual(set(subtitles.keys()), set(['en']))
self.assertEqual(md5(subtitles['en']), '09bbe67222259bed60deaa26997d73a7')
class TestNPOSubtitles(BaseTestSubtitles):
url = 'http://www.npo.nl/nos-journaal/28-08-2014/POW_00722860'
IE = NPOIE
def test_allsubtitles(self):
self.DL.params['writesubtitles'] = True
self.DL.params['allsubtitles'] = True
subtitles = self.getSubtitles()
self.assertEqual(set(subtitles.keys()), set(['nl']))
self.assertEqual(md5(subtitles['nl']), 'fc6435027572b63fb4ab143abd5ad3f4')
class TestMTVSubtitles(BaseTestSubtitles):
url = 'http://www.cc.com/video-clips/kllhuv/stand-up-greg-fitzsimmons--uncensored---too-good-of-a-mother'
IE = ComedyCentralIE
def getInfoDict(self):
return super(TestMTVSubtitles, self).getInfoDict()['entries'][0]
def test_allsubtitles(self):
self.DL.params['writesubtitles'] = True
self.DL.params['allsubtitles'] = True
subtitles = self.getSubtitles()
self.assertEqual(set(subtitles.keys()), set(['en']))
self.assertEqual(md5(subtitles['en']), 'b9f6ca22a6acf597ec76f61749765e65')
class TestNRKSubtitles(BaseTestSubtitles):
url = 'http://tv.nrk.no/serie/ikke-gjoer-dette-hjemme/DMPV73000411/sesong-2/episode-1'
IE = NRKTVIE
def test_allsubtitles(self):
self.DL.params['writesubtitles'] = True
self.DL.params['allsubtitles'] = True
subtitles = self.getSubtitles()
self.assertEqual(set(subtitles.keys()), set(['no']))
self.assertEqual(md5(subtitles['no']), '1d221e6458c95c5494dcd38e6a1f129a')
class TestRaiSubtitles(BaseTestSubtitles):
url = 'http://www.rai.tv/dl/RaiTV/programmi/media/ContentItem-cb27157f-9dd0-4aee-b788-b1f67643a391.html'
IE = RaiIE
def test_allsubtitles(self):
self.DL.params['writesubtitles'] = True
self.DL.params['allsubtitles'] = True
subtitles = self.getSubtitles()
self.assertEqual(set(subtitles.keys()), set(['it']))
self.assertEqual(md5(subtitles['it']), 'b1d90a98755126b61e667567a1f6680a')
class TestVikiSubtitles(BaseTestSubtitles):
url = 'http://www.viki.com/videos/1060846v-punch-episode-18'
IE = VikiIE
def test_allsubtitles(self):
self.DL.params['writesubtitles'] = True
self.DL.params['allsubtitles'] = True
subtitles = self.getSubtitles()
self.assertEqual(set(subtitles.keys()), set(['en']))
self.assertEqual(md5(subtitles['en']), '53cb083a5914b2d84ef1ab67b880d18a')
class TestThePlatformSubtitles(BaseTestSubtitles):
# from http://www.3playmedia.com/services-features/tools/integrations/theplatform/
# (see http://theplatform.com/about/partners/type/subtitles-closed-captioning/)
url = 'theplatform:JFUjUE1_ehvq'
IE = ThePlatformIE
def test_allsubtitles(self):
self.DL.params['writesubtitles'] = True
self.DL.params['allsubtitles'] = True
subtitles = self.getSubtitles()
self.assertEqual(set(subtitles.keys()), set(['en']))
self.assertEqual(md5(subtitles['en']), '97e7670cbae3c4d26ae8bcc7fdd78d4b')
class TestRtveSubtitles(BaseTestSubtitles):
url = 'http://www.rtve.es/alacarta/videos/los-misterios-de-laura/misterios-laura-capitulo-32-misterio-del-numero-17-2-parte/2428621/'
IE = RTVEALaCartaIE
def test_allsubtitles(self):
print('Skipping, only available from Spain')
return
self.DL.params['writesubtitles'] = True
self.DL.params['allsubtitles'] = True
subtitles = self.getSubtitles()
self.assertEqual(set(subtitles.keys()), set(['es']))
self.assertEqual(md5(subtitles['es']), '69e70cae2d40574fb7316f31d6eb7fca')
if __name__ == '__main__': if __name__ == '__main__':

View File

@ -34,8 +34,8 @@ def _make_testfunc(testfile):
def test_func(self): def test_func(self):
as_file = os.path.join(TEST_DIR, testfile) as_file = os.path.join(TEST_DIR, testfile)
swf_file = os.path.join(TEST_DIR, test_id + '.swf') swf_file = os.path.join(TEST_DIR, test_id + '.swf')
if ((not os.path.exists(swf_file)) if ((not os.path.exists(swf_file)) or
or os.path.getmtime(swf_file) < os.path.getmtime(as_file)): os.path.getmtime(swf_file) < os.path.getmtime(as_file)):
# Recompile # Recompile
try: try:
subprocess.check_call([ subprocess.check_call([

View File

@ -85,6 +85,8 @@ class TestUtil(unittest.TestCase):
self.assertEqual( self.assertEqual(
sanitize_filename('New World record at 0:12:34'), sanitize_filename('New World record at 0:12:34'),
'New World record at 0_12_34') 'New World record at 0_12_34')
self.assertEqual(sanitize_filename('--gasdgf'), '_-gasdgf')
self.assertEqual(sanitize_filename('--gasdgf', is_id=True), '--gasdgf')
forbidden = '"\0\\/' forbidden = '"\0\\/'
for fc in forbidden: for fc in forbidden:
@ -244,6 +246,7 @@ class TestUtil(unittest.TestCase):
self.assertEqual(parse_duration('2.5 hours'), 9000) self.assertEqual(parse_duration('2.5 hours'), 9000)
self.assertEqual(parse_duration('02:03:04'), 7384) self.assertEqual(parse_duration('02:03:04'), 7384)
self.assertEqual(parse_duration('01:02:03:04'), 93784) self.assertEqual(parse_duration('01:02:03:04'), 93784)
self.assertEqual(parse_duration('1 hour 3 minutes'), 3780)
def test_fix_xml_ampersands(self): def test_fix_xml_ampersands(self):
self.assertEqual( self.assertEqual(

View File

@ -28,6 +28,7 @@ from .compat import (
compat_basestring, compat_basestring,
compat_cookiejar, compat_cookiejar,
compat_expanduser, compat_expanduser,
compat_get_terminal_size,
compat_http_client, compat_http_client,
compat_kwargs, compat_kwargs,
compat_str, compat_str,
@ -46,7 +47,6 @@ from .utils import (
ExtractorError, ExtractorError,
format_bytes, format_bytes,
formatSeconds, formatSeconds,
get_term_width,
locked_file, locked_file,
make_HTTPS_handler, make_HTTPS_handler,
MaxDownloadsReached, MaxDownloadsReached,
@ -155,7 +155,7 @@ class YoutubeDL(object):
allsubtitles: Downloads all the subtitles of the video allsubtitles: Downloads all the subtitles of the video
(requires writesubtitles or writeautomaticsub) (requires writesubtitles or writeautomaticsub)
listsubtitles: Lists all available subtitles for the video listsubtitles: Lists all available subtitles for the video
subtitlesformat: Subtitle format [srt/sbv/vtt] (default=srt) subtitlesformat: The format code for subtitles
subtitleslangs: List of languages of the subtitles to download subtitleslangs: List of languages of the subtitles to download
keepvideo: Keep the video file after post-processing keepvideo: Keep the video file after post-processing
daterange: A DateRange object, download only if the upload_date is in the range. daterange: A DateRange object, download only if the upload_date is in the range.
@ -285,7 +285,7 @@ class YoutubeDL(object):
try: try:
import pty import pty
master, slave = pty.openpty() master, slave = pty.openpty()
width = get_term_width() width = compat_get_terminal_size().columns
if width is None: if width is None:
width_args = [] width_args = []
else: else:
@ -309,8 +309,8 @@ class YoutubeDL(object):
raise raise
if (sys.version_info >= (3,) and sys.platform != 'win32' and if (sys.version_info >= (3,) and sys.platform != 'win32' and
sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968'] sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968'] and
and not params.get('restrictfilenames', False)): not params.get('restrictfilenames', False)):
# On Python 3, the Unicode filesystem API will throw errors (#1474) # On Python 3, the Unicode filesystem API will throw errors (#1474)
self.report_warning( self.report_warning(
'Assuming --restrict-filenames since file system encoding ' 'Assuming --restrict-filenames since file system encoding '
@ -1009,6 +1009,15 @@ class YoutubeDL(object):
info_dict['timestamp']) info_dict['timestamp'])
info_dict['upload_date'] = upload_date.strftime('%Y%m%d') info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
if self.params.get('listsubtitles', False):
if 'automatic_captions' in info_dict:
self.list_subtitles(info_dict['id'], info_dict.get('automatic_captions'), 'automatic captions')
self.list_subtitles(info_dict['id'], info_dict.get('subtitles'), 'subtitles')
return
info_dict['requested_subtitles'] = self.process_subtitles(
info_dict['id'], info_dict.get('subtitles'),
info_dict.get('automatic_captions'))
# This extractors handle format selection themselves # This extractors handle format selection themselves
if info_dict['extractor'] in ['Youku']: if info_dict['extractor'] in ['Youku']:
if download: if download:
@ -1146,6 +1155,55 @@ class YoutubeDL(object):
info_dict.update(formats_to_download[-1]) info_dict.update(formats_to_download[-1])
return info_dict return info_dict
def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
"""Select the requested subtitles and their format"""
available_subs = {}
if normal_subtitles and self.params.get('writesubtitles'):
available_subs.update(normal_subtitles)
if automatic_captions and self.params.get('writeautomaticsub'):
for lang, cap_info in automatic_captions.items():
if lang not in available_subs:
available_subs[lang] = cap_info
if (not self.params.get('writesubtitles') and not
self.params.get('writeautomaticsub') or not
available_subs):
return None
if self.params.get('allsubtitles', False):
requested_langs = available_subs.keys()
else:
if self.params.get('subtitleslangs', False):
requested_langs = self.params.get('subtitleslangs')
elif 'en' in available_subs:
requested_langs = ['en']
else:
requested_langs = [list(available_subs.keys())[0]]
formats_query = self.params.get('subtitlesformat', 'best')
formats_preference = formats_query.split('/') if formats_query else []
subs = {}
for lang in requested_langs:
formats = available_subs.get(lang)
if formats is None:
self.report_warning('%s subtitles not available for %s' % (lang, video_id))
continue
for ext in formats_preference:
if ext == 'best':
f = formats[-1]
break
matches = list(filter(lambda f: f['ext'] == ext, formats))
if matches:
f = matches[-1]
break
else:
f = formats[-1]
self.report_warning(
'No subtitle format found matching "%s" for language %s, '
'using %s' % (formats_query, lang, f['ext']))
subs[lang] = f
return subs
def process_info(self, info_dict): def process_info(self, info_dict):
"""Process a single resolved IE result.""" """Process a single resolved IE result."""
@ -1248,14 +1306,22 @@ class YoutubeDL(object):
subtitles_are_requested = any([self.params.get('writesubtitles', False), subtitles_are_requested = any([self.params.get('writesubtitles', False),
self.params.get('writeautomaticsub')]) self.params.get('writeautomaticsub')])
if subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']: if subtitles_are_requested and info_dict.get('requested_subtitles'):
# subtitles download errors are already managed as troubles in relevant IE # subtitles download errors are already managed as troubles in relevant IE
# that way it will silently go on when used with unsupporting IE # that way it will silently go on when used with unsupporting IE
subtitles = info_dict['subtitles'] subtitles = info_dict['requested_subtitles']
sub_format = self.params.get('subtitlesformat', 'srt') ie = self.get_info_extractor(info_dict['extractor_key'])
for sub_lang in subtitles.keys(): for sub_lang, sub_info in subtitles.items():
sub = subtitles[sub_lang] sub_format = sub_info['ext']
if sub is None: if sub_info.get('data') is not None:
sub_data = sub_info['data']
else:
try:
sub_data = ie._download_webpage(
sub_info['url'], info_dict['id'], note=False)
except ExtractorError as err:
self.report_warning('Unable to download subtitle for "%s": %s' %
(sub_lang, compat_str(err.cause)))
continue continue
try: try:
sub_filename = subtitles_filename(filename, sub_lang, sub_format) sub_filename = subtitles_filename(filename, sub_lang, sub_format)
@ -1264,7 +1330,7 @@ class YoutubeDL(object):
else: else:
self.to_screen('[info] Writing video subtitles to: ' + sub_filename) self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile: with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
subfile.write(sub) subfile.write(sub_data)
except (OSError, IOError): except (OSError, IOError):
self.report_error('Cannot write subtitles file ' + sub_filename) self.report_error('Cannot write subtitles file ' + sub_filename)
return return
@ -1395,8 +1461,8 @@ class YoutubeDL(object):
"""Download a given list of URLs.""" """Download a given list of URLs."""
outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL) outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
if (len(url_list) > 1 and if (len(url_list) > 1 and
'%' not in outtmpl '%' not in outtmpl and
and self.params.get('max_downloads') != 1): self.params.get('max_downloads') != 1):
raise SameFileError(outtmpl) raise SameFileError(outtmpl)
for url in url_list: for url in url_list:
@ -1593,6 +1659,17 @@ class YoutubeDL(object):
['ID', 'width', 'height', 'URL'], ['ID', 'width', 'height', 'URL'],
[[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails])) [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
def list_subtitles(self, video_id, subtitles, name='subtitles'):
if not subtitles:
self.to_screen('%s has no %s' % (video_id, name))
return
self.to_screen(
'Available %s for %s:' % (name, video_id))
self.to_screen(render_table(
['Language', 'formats'],
[[lang, ', '.join(f['ext'] for f in reversed(formats))]
for lang, formats in subtitles.items()]))
def urlopen(self, req): def urlopen(self, req):
""" Start an HTTP download """ """ Start an HTTP download """

View File

@ -170,6 +170,9 @@ def _real_main(argv=None):
if opts.recodevideo is not None: if opts.recodevideo is not None:
if opts.recodevideo not in ['mp4', 'flv', 'webm', 'ogg', 'mkv']: if opts.recodevideo not in ['mp4', 'flv', 'webm', 'ogg', 'mkv']:
parser.error('invalid video recode format specified') parser.error('invalid video recode format specified')
if opts.convertsubtitles is not None:
if opts.convertsubtitles not in ['srt', 'vtt', 'ass']:
parser.error('invalid subtitle format specified')
if opts.date is not None: if opts.date is not None:
date = DateRange.day(opts.date) date = DateRange.day(opts.date)
@ -189,14 +192,14 @@ def _real_main(argv=None):
# In Python 2, sys.argv is a bytestring (also note http://bugs.python.org/issue2128 for Windows systems) # In Python 2, sys.argv is a bytestring (also note http://bugs.python.org/issue2128 for Windows systems)
if opts.outtmpl is not None: if opts.outtmpl is not None:
opts.outtmpl = opts.outtmpl.decode(preferredencoding()) opts.outtmpl = opts.outtmpl.decode(preferredencoding())
outtmpl = ((opts.outtmpl is not None and opts.outtmpl) outtmpl = ((opts.outtmpl is not None and opts.outtmpl) or
or (opts.format == '-1' and opts.usetitle and '%(title)s-%(id)s-%(format)s.%(ext)s') (opts.format == '-1' and opts.usetitle and '%(title)s-%(id)s-%(format)s.%(ext)s') or
or (opts.format == '-1' and '%(id)s-%(format)s.%(ext)s') (opts.format == '-1' and '%(id)s-%(format)s.%(ext)s') or
or (opts.usetitle and opts.autonumber and '%(autonumber)s-%(title)s-%(id)s.%(ext)s') (opts.usetitle and opts.autonumber and '%(autonumber)s-%(title)s-%(id)s.%(ext)s') or
or (opts.usetitle and '%(title)s-%(id)s.%(ext)s') (opts.usetitle and '%(title)s-%(id)s.%(ext)s') or
or (opts.useid and '%(id)s.%(ext)s') (opts.useid and '%(id)s.%(ext)s') or
or (opts.autonumber and '%(autonumber)s-%(id)s.%(ext)s') (opts.autonumber and '%(autonumber)s-%(id)s.%(ext)s') or
or DEFAULT_OUTTMPL) DEFAULT_OUTTMPL)
if not os.path.splitext(outtmpl)[1] and opts.extractaudio: if not os.path.splitext(outtmpl)[1] and opts.extractaudio:
parser.error('Cannot download a video and extract audio into the same' parser.error('Cannot download a video and extract audio into the same'
' file! Use "{0}.%(ext)s" instead of "{0}" as the output' ' file! Use "{0}.%(ext)s" instead of "{0}" as the output'
@ -223,10 +226,14 @@ def _real_main(argv=None):
'key': 'FFmpegVideoConvertor', 'key': 'FFmpegVideoConvertor',
'preferedformat': opts.recodevideo, 'preferedformat': opts.recodevideo,
}) })
if opts.convertsubtitles:
postprocessors.append({
'key': 'FFmpegSubtitlesConvertor',
'format': opts.convertsubtitles,
})
if opts.embedsubtitles: if opts.embedsubtitles:
postprocessors.append({ postprocessors.append({
'key': 'FFmpegEmbedSubtitle', 'key': 'FFmpegEmbedSubtitle',
'subtitlesformat': opts.subtitlesformat,
}) })
if opts.xattrs: if opts.xattrs:
postprocessors.append({'key': 'XAttrMetadata'}) postprocessors.append({'key': 'XAttrMetadata'})

View File

@ -1,9 +1,11 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import collections
import getpass import getpass
import optparse import optparse
import os import os
import re import re
import shutil
import socket import socket
import subprocess import subprocess
import sys import sys
@ -364,6 +366,33 @@ def workaround_optparse_bug9161():
return real_add_option(self, *bargs, **bkwargs) return real_add_option(self, *bargs, **bkwargs)
optparse.OptionGroup.add_option = _compat_add_option optparse.OptionGroup.add_option = _compat_add_option
if hasattr(shutil, 'get_terminal_size'): # Python >= 3.3
compat_get_terminal_size = shutil.get_terminal_size
else:
_terminal_size = collections.namedtuple('terminal_size', ['columns', 'lines'])
def compat_get_terminal_size():
columns = compat_getenv('COLUMNS', None)
if columns:
columns = int(columns)
else:
columns = None
lines = compat_getenv('LINES', None)
if lines:
lines = int(lines)
else:
lines = None
try:
sp = subprocess.Popen(
['stty', 'size'],
stdout=subprocess.PIPE, stderr=subprocess.PIPE)
out, err = sp.communicate()
lines, columns = map(int, out.split())
except:
pass
return _terminal_size(columns, lines)
__all__ = [ __all__ = [
'compat_HTTPError', 'compat_HTTPError',
@ -371,6 +400,7 @@ __all__ = [
'compat_chr', 'compat_chr',
'compat_cookiejar', 'compat_cookiejar',
'compat_expanduser', 'compat_expanduser',
'compat_get_terminal_size',
'compat_getenv', 'compat_getenv',
'compat_getpass', 'compat_getpass',
'compat_html_entities', 'compat_html_entities',

View File

@ -311,14 +311,14 @@ class FileDownloader(object):
""" """
nooverwrites_and_exists = ( nooverwrites_and_exists = (
self.params.get('nooverwrites', False) self.params.get('nooverwrites', False) and
and os.path.exists(encodeFilename(filename)) os.path.exists(encodeFilename(filename))
) )
continuedl_and_exists = ( continuedl_and_exists = (
self.params.get('continuedl', False) self.params.get('continuedl', False) and
and os.path.isfile(encodeFilename(filename)) os.path.isfile(encodeFilename(filename)) and
and not self.params.get('nopart', False) not self.params.get('nopart', False)
) )
# Check file already present # Check file already present

View File

@ -11,6 +11,7 @@ from .common import FileDownloader
from .http import HttpFD from .http import HttpFD
from ..compat import ( from ..compat import (
compat_urlparse, compat_urlparse,
compat_urllib_error,
) )
from ..utils import ( from ..utils import (
struct_pack, struct_pack,
@ -121,7 +122,8 @@ class FlvReader(io.BytesIO):
self.read_unsigned_int() # BootstrapinfoVersion self.read_unsigned_int() # BootstrapinfoVersion
# Profile,Live,Update,Reserved # Profile,Live,Update,Reserved
self.read(1) flags = self.read_unsigned_char()
live = flags & 0x20 != 0
# time scale # time scale
self.read_unsigned_int() self.read_unsigned_int()
# CurrentMediaTime # CurrentMediaTime
@ -160,6 +162,7 @@ class FlvReader(io.BytesIO):
return { return {
'segments': segments, 'segments': segments,
'fragments': fragments, 'fragments': fragments,
'live': live,
} }
def read_bootstrap_info(self): def read_bootstrap_info(self):
@ -182,6 +185,10 @@ def build_fragments_list(boot_info):
for segment, fragments_count in segment_run_table['segment_run']: for segment, fragments_count in segment_run_table['segment_run']:
for _ in range(fragments_count): for _ in range(fragments_count):
res.append((segment, next(fragments_counter))) res.append((segment, next(fragments_counter)))
if boot_info['live']:
res = res[-2:]
return res return res
@ -246,6 +253,38 @@ class F4mFD(FileDownloader):
self.report_error('Unsupported DRM') self.report_error('Unsupported DRM')
return media return media
def _get_bootstrap_from_url(self, bootstrap_url):
bootstrap = self.ydl.urlopen(bootstrap_url).read()
return read_bootstrap_info(bootstrap)
def _update_live_fragments(self, bootstrap_url, latest_fragment):
fragments_list = []
retries = 30
while (not fragments_list) and (retries > 0):
boot_info = self._get_bootstrap_from_url(bootstrap_url)
fragments_list = build_fragments_list(boot_info)
fragments_list = [f for f in fragments_list if f[1] > latest_fragment]
if not fragments_list:
# Retry after a while
time.sleep(5.0)
retries -= 1
if not fragments_list:
self.report_error('Failed to update fragments')
return fragments_list
def _parse_bootstrap_node(self, node, base_url):
if node.text is None:
bootstrap_url = compat_urlparse.urljoin(
base_url, node.attrib['url'])
boot_info = self._get_bootstrap_from_url(bootstrap_url)
else:
bootstrap_url = None
bootstrap = base64.b64decode(node.text)
boot_info = read_bootstrap_info(bootstrap)
return (boot_info, bootstrap_url)
def real_download(self, filename, info_dict): def real_download(self, filename, info_dict):
man_url = info_dict['url'] man_url = info_dict['url']
requested_bitrate = info_dict.get('tbr') requested_bitrate = info_dict.get('tbr')
@ -265,18 +304,13 @@ class F4mFD(FileDownloader):
base_url = compat_urlparse.urljoin(man_url, media.attrib['url']) base_url = compat_urlparse.urljoin(man_url, media.attrib['url'])
bootstrap_node = doc.find(_add_ns('bootstrapInfo')) bootstrap_node = doc.find(_add_ns('bootstrapInfo'))
if bootstrap_node.text is None: boot_info, bootstrap_url = self._parse_bootstrap_node(bootstrap_node, base_url)
bootstrap_url = compat_urlparse.urljoin( live = boot_info['live']
base_url, bootstrap_node.attrib['url'])
bootstrap = self.ydl.urlopen(bootstrap_url).read()
else:
bootstrap = base64.b64decode(bootstrap_node.text)
metadata_node = media.find(_add_ns('metadata')) metadata_node = media.find(_add_ns('metadata'))
if metadata_node is not None: if metadata_node is not None:
metadata = base64.b64decode(metadata_node.text) metadata = base64.b64decode(metadata_node.text)
else: else:
metadata = None metadata = None
boot_info = read_bootstrap_info(bootstrap)
fragments_list = build_fragments_list(boot_info) fragments_list = build_fragments_list(boot_info)
if self.params.get('test', False): if self.params.get('test', False):
@ -301,6 +335,7 @@ class F4mFD(FileDownloader):
(dest_stream, tmpfilename) = sanitize_open(tmpfilename, 'wb') (dest_stream, tmpfilename) = sanitize_open(tmpfilename, 'wb')
write_flv_header(dest_stream) write_flv_header(dest_stream)
if not live:
write_metadata_tag(dest_stream, metadata) write_metadata_tag(dest_stream, metadata)
# This dict stores the download progress, it's updated by the progress # This dict stores the download progress, it's updated by the progress
@ -325,8 +360,8 @@ class F4mFD(FileDownloader):
state['frag_index'] += 1 state['frag_index'] += 1
estimated_size = ( estimated_size = (
(state['downloaded_bytes'] + frag_total_bytes) (state['downloaded_bytes'] + frag_total_bytes) /
/ (state['frag_index'] + 1) * total_frags) (state['frag_index'] + 1) * total_frags)
time_now = time.time() time_now = time.time()
state['total_bytes_estimate'] = estimated_size state['total_bytes_estimate'] = estimated_size
state['elapsed'] = time_now - start state['elapsed'] = time_now - start
@ -348,12 +383,14 @@ class F4mFD(FileDownloader):
http_dl.add_progress_hook(frag_progress_hook) http_dl.add_progress_hook(frag_progress_hook)
frags_filenames = [] frags_filenames = []
for (seg_i, frag_i) in fragments_list: while fragments_list:
seg_i, frag_i = fragments_list.pop(0)
name = 'Seg%d-Frag%d' % (seg_i, frag_i) name = 'Seg%d-Frag%d' % (seg_i, frag_i)
url = base_url + name url = base_url + name
if akamai_pv: if akamai_pv:
url += '?' + akamai_pv.strip(';') url += '?' + akamai_pv.strip(';')
frag_filename = '%s-%s' % (tmpfilename, name) frag_filename = '%s-%s' % (tmpfilename, name)
try:
success = http_dl.download(frag_filename, {'url': url}) success = http_dl.download(frag_filename, {'url': url})
if not success: if not success:
return False return False
@ -365,7 +402,26 @@ class F4mFD(FileDownloader):
if box_type == b'mdat': if box_type == b'mdat':
dest_stream.write(box_data) dest_stream.write(box_data)
break break
if live:
os.remove(frag_filename)
else:
frags_filenames.append(frag_filename) frags_filenames.append(frag_filename)
except (compat_urllib_error.HTTPError, ) as err:
if live and (err.code == 404 or err.code == 410):
# We didn't keep up with the live window. Continue
# with the next available fragment.
msg = 'Fragment %d unavailable' % frag_i
self.report_warning(msg)
fragments_list = []
else:
raise
if not fragments_list and live and bootstrap_url:
fragments_list = self._update_live_fragments(bootstrap_url, frag_i)
total_frags += len(fragments_list)
if fragments_list and (fragments_list[0][1] > frag_i + 1):
msg = 'Missed %d fragments' % (fragments_list[0][1] - (frag_i + 1))
self.report_warning(msg)
dest_stream.close() dest_stream.close()

View File

@ -119,7 +119,9 @@ class RtmpFD(FileDownloader):
# Download using rtmpdump. rtmpdump returns exit code 2 when # Download using rtmpdump. rtmpdump returns exit code 2 when
# the connection was interrumpted and resuming appears to be # the connection was interrumpted and resuming appears to be
# possible. This is part of rtmpdump's normal usage, AFAIK. # possible. This is part of rtmpdump's normal usage, AFAIK.
basic_args = ['rtmpdump', '--verbose', '-r', url, '-o', tmpfilename] basic_args = [
'rtmpdump', '--verbose', '-r', url,
'-o', encodeFilename(tmpfilename, True)]
if player_url is not None: if player_url is not None:
basic_args += ['--swfVfy', player_url] basic_args += ['--swfVfy', player_url]
if page_url is not None: if page_url is not None:

View File

@ -8,6 +8,7 @@ from .adobetv import AdobeTVIE
from .adultswim import AdultSwimIE from .adultswim import AdultSwimIE
from .aftenposten import AftenpostenIE from .aftenposten import AftenpostenIE
from .aftonbladet import AftonbladetIE from .aftonbladet import AftonbladetIE
from .airmozilla import AirMozillaIE
from .aljazeera import AlJazeeraIE from .aljazeera import AlJazeeraIE
from .alphaporno import AlphaPornoIE from .alphaporno import AlphaPornoIE
from .anitube import AnitubeIE from .anitube import AnitubeIE
@ -63,6 +64,10 @@ from .ccc import CCCIE
from .ceskatelevize import CeskaTelevizeIE from .ceskatelevize import CeskaTelevizeIE
from .channel9 import Channel9IE from .channel9 import Channel9IE
from .chilloutzone import ChilloutzoneIE from .chilloutzone import ChilloutzoneIE
from .chirbit import (
ChirbitIE,
ChirbitProfileIE,
)
from .cinchcast import CinchcastIE from .cinchcast import CinchcastIE
from .clipfish import ClipfishIE from .clipfish import ClipfishIE
from .cliphunter import CliphunterIE from .cliphunter import CliphunterIE
@ -222,6 +227,7 @@ from .jeuxvideo import JeuxVideoIE
from .jove import JoveIE from .jove import JoveIE
from .jukebox import JukeboxIE from .jukebox import JukeboxIE
from .jpopsukitv import JpopsukiIE from .jpopsukitv import JpopsukiIE
from .kaltura import KalturaIE
from .kankan import KankanIE from .kankan import KankanIE
from .karaoketv import KaraoketvIE from .karaoketv import KaraoketvIE
from .keezmovies import KeezMoviesIE from .keezmovies import KeezMoviesIE
@ -233,6 +239,11 @@ from .krasview import KrasViewIE
from .ku6 import Ku6IE from .ku6 import Ku6IE
from .la7 import LA7IE from .la7 import LA7IE
from .laola1tv import Laola1TvIE from .laola1tv import Laola1TvIE
from .letv import (
LetvIE,
LetvTvIE,
LetvPlaylistIE
)
from .lifenews import LifeNewsIE from .lifenews import LifeNewsIE
from .liveleak import LiveLeakIE from .liveleak import LiveLeakIE
from .livestream import ( from .livestream import (
@ -335,6 +346,7 @@ from .ntvde import NTVDeIE
from .ntvru import NTVRuIE from .ntvru import NTVRuIE
from .nytimes import NYTimesIE from .nytimes import NYTimesIE
from .nuvid import NuvidIE from .nuvid import NuvidIE
from .odnoklassniki import OdnoklassnikiIE
from .oktoberfesttv import OktoberfestTVIE from .oktoberfesttv import OktoberfestTVIE
from .ooyala import OoyalaIE from .ooyala import OoyalaIE
from .openfilm import OpenFilmIE from .openfilm import OpenFilmIE
@ -362,8 +374,10 @@ from .pornotube import PornotubeIE
from .pornoxo import PornoXOIE from .pornoxo import PornoXOIE
from .promptfile import PromptFileIE from .promptfile import PromptFileIE
from .prosiebensat1 import ProSiebenSat1IE from .prosiebensat1 import ProSiebenSat1IE
from .puls4 import Puls4IE
from .pyvideo import PyvideoIE from .pyvideo import PyvideoIE
from .quickvid import QuickVidIE from .quickvid import QuickVidIE
from .r7 import R7IE
from .radiode import RadioDeIE from .radiode import RadioDeIE
from .radiobremen import RadioBremenIE from .radiobremen import RadioBremenIE
from .radiofrance import RadioFranceIE from .radiofrance import RadioFranceIE
@ -424,7 +438,10 @@ from .soundcloud import (
SoundcloudUserIE, SoundcloudUserIE,
SoundcloudPlaylistIE SoundcloudPlaylistIE
) )
from .soundgasm import SoundgasmIE from .soundgasm import (
SoundgasmIE,
SoundgasmProfileIE
)
from .southpark import ( from .southpark import (
SouthParkIE, SouthParkIE,
SouthparkDeIE, SouthparkDeIE,
@ -612,6 +629,7 @@ from .youtube import (
YoutubeUserIE, YoutubeUserIE,
YoutubeWatchLaterIE, YoutubeWatchLaterIE,
) )
from .zapiks import ZapiksIE
from .zdf import ZDFIE, ZDFChannelIE from .zdf import ZDFIE, ZDFChannelIE
from .zingmp3 import ( from .zingmp3 import (
ZingMp3SongIE, ZingMp3SongIE,

View File

@ -28,7 +28,6 @@ class AdobeTVIE(InfoExtractor):
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
player = self._parse_json( player = self._parse_json(
@ -44,8 +43,10 @@ class AdobeTVIE(InfoExtractor):
self._html_search_meta('datepublished', webpage, 'upload date')) self._html_search_meta('datepublished', webpage, 'upload date'))
duration = parse_duration( duration = parse_duration(
self._html_search_meta('duration', webpage, 'duration') self._html_search_meta('duration', webpage, 'duration') or
or self._search_regex(r'Runtime:\s*(\d{2}:\d{2}:\d{2})', webpage, 'duration')) self._search_regex(
r'Runtime:\s*(\d{2}:\d{2}:\d{2})',
webpage, 'duration', fatal=False))
view_count = str_to_int(self._search_regex( view_count = str_to_int(self._search_regex(
r'<div class="views">\s*Views?:\s*([\d,.]+)\s*</div>', r'<div class="views">\s*Views?:\s*([\d,.]+)\s*</div>',

View File

@ -0,0 +1,74 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
int_or_none,
parse_duration,
parse_iso8601,
)
class AirMozillaIE(InfoExtractor):
_VALID_URL = r'https?://air\.mozilla\.org/(?P<id>[0-9a-z-]+)/?'
_TEST = {
'url': 'https://air.mozilla.org/privacy-lab-a-meetup-for-privacy-minded-people-in-san-francisco/',
'md5': '2e3e7486ba5d180e829d453875b9b8bf',
'info_dict': {
'id': '6x4q2w',
'ext': 'mp4',
'title': 'Privacy Lab - a meetup for privacy minded people in San Francisco',
'thumbnail': 're:https://\w+\.cloudfront\.net/6x4q2w/poster\.jpg\?t=\d+',
'description': 'Brings together privacy professionals and others interested in privacy at for-profits, non-profits, and NGOs in an effort to contribute to the state of the ecosystem...',
'timestamp': 1422487800,
'upload_date': '20150128',
'location': 'SFO Commons',
'duration': 3780,
'view_count': int,
'categories': ['Main'],
}
}
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
video_id = self._html_search_regex(r'//vid.ly/(.*?)/embed', webpage, 'id')
embed_script = self._download_webpage('https://vid.ly/{0}/embed'.format(video_id), video_id)
jwconfig = self._search_regex(r'\svar jwconfig = (\{.*?\});\s', embed_script, 'metadata')
metadata = self._parse_json(jwconfig, video_id)
formats = [{
'url': source['file'],
'ext': source['type'],
'format_id': self._search_regex(r'&format=(.*)$', source['file'], 'video format'),
'format': source['label'],
'height': int(source['label'].rstrip('p')),
} for source in metadata['playlist'][0]['sources']]
self._sort_formats(formats)
view_count = int_or_none(self._html_search_regex(
r'Views since archived: ([0-9]+)',
webpage, 'view count', fatal=False))
timestamp = parse_iso8601(self._html_search_regex(
r'<time datetime="(.*?)"', webpage, 'timestamp', fatal=False))
duration = parse_duration(self._search_regex(
r'Duration:\s*(\d+\s*hours?\s*\d+\s*minutes?)',
webpage, 'duration', fatal=False))
return {
'id': video_id,
'title': self._og_search_title(webpage),
'formats': formats,
'url': self._og_search_url(webpage),
'display_id': display_id,
'thumbnail': metadata['playlist'][0].get('image'),
'description': self._og_search_description(webpage),
'timestamp': timestamp,
'location': self._html_search_regex(r'Location: (.*)', webpage, 'location', default=None),
'duration': duration,
'view_count': view_count,
'categories': re.findall(r'<a href=".*?" class="channel">(.*?)</a>', webpage),
}

View File

@ -11,8 +11,8 @@ from ..utils import (
class AppleTrailersIE(InfoExtractor): class AppleTrailersIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?trailers\.apple\.com/trailers/(?P<company>[^/]+)/(?P<movie>[^/]+)' _VALID_URL = r'https?://(?:www\.)?trailers\.apple\.com/(?:trailers|ca)/(?P<company>[^/]+)/(?P<movie>[^/]+)'
_TEST = { _TESTS = [{
"url": "http://trailers.apple.com/trailers/wb/manofsteel/", "url": "http://trailers.apple.com/trailers/wb/manofsteel/",
'info_dict': { 'info_dict': {
'id': 'manofsteel', 'id': 'manofsteel',
@ -63,7 +63,10 @@ class AppleTrailersIE(InfoExtractor):
}, },
}, },
] ]
} }, {
'url': 'http://trailers.apple.com/ca/metropole/autrui/',
'only_matching': True,
}]
_JSON_RE = r'iTunes.playURL\((.*?)\);' _JSON_RE = r'iTunes.playURL\((.*?)\);'

View File

@ -3,7 +3,7 @@ from __future__ import unicode_literals
import time import time
import hmac import hmac
from .subtitles import SubtitlesInfoExtractor from .common import InfoExtractor
from ..compat import ( from ..compat import (
compat_str, compat_str,
compat_urllib_parse, compat_urllib_parse,
@ -17,7 +17,7 @@ from ..utils import (
) )
class AtresPlayerIE(SubtitlesInfoExtractor): class AtresPlayerIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?atresplayer\.com/television/[^/]+/[^/]+/[^/]+/(?P<id>.+?)_\d+\.html' _VALID_URL = r'https?://(?:www\.)?atresplayer\.com/television/[^/]+/[^/]+/[^/]+/(?P<id>.+?)_\d+\.html'
_TESTS = [ _TESTS = [
{ {
@ -144,13 +144,12 @@ class AtresPlayerIE(SubtitlesInfoExtractor):
thumbnail = xpath_text(episode, './media/asset/files/background', 'thumbnail') thumbnail = xpath_text(episode, './media/asset/files/background', 'thumbnail')
subtitles = {} subtitles = {}
subtitle = xpath_text(episode, './media/asset/files/subtitle', 'subtitle') subtitle_url = xpath_text(episode, './media/asset/files/subtitle', 'subtitle')
if subtitle: if subtitle_url:
subtitles['es'] = subtitle subtitles['es'] = [{
'ext': 'srt',
if self._downloader.params.get('listsubtitles', False): 'url': subtitle_url,
self._list_available_subtitles(video_id, subtitles) }]
return
return { return {
'id': video_id, 'id': video_id,
@ -159,5 +158,5 @@ class AtresPlayerIE(SubtitlesInfoExtractor):
'thumbnail': thumbnail, 'thumbnail': thumbnail,
'duration': duration, 'duration': duration,
'formats': formats, 'formats': formats,
'subtitles': self.extract_subtitles(video_id, subtitles), 'subtitles': subtitles,
} }

View File

@ -2,12 +2,12 @@ from __future__ import unicode_literals
import xml.etree.ElementTree import xml.etree.ElementTree
from .subtitles import SubtitlesInfoExtractor from .common import InfoExtractor
from ..utils import ExtractorError from ..utils import ExtractorError
from ..compat import compat_HTTPError from ..compat import compat_HTTPError
class BBCCoUkIE(SubtitlesInfoExtractor): class BBCCoUkIE(InfoExtractor):
IE_NAME = 'bbc.co.uk' IE_NAME = 'bbc.co.uk'
IE_DESC = 'BBC iPlayer' IE_DESC = 'BBC iPlayer'
_VALID_URL = r'https?://(?:www\.)?bbc\.co\.uk/(?:(?:(?:programmes|iplayer(?:/[^/]+)?/(?:episode|playlist))/)|music/clips[/#])(?P<id>[\da-z]{8})' _VALID_URL = r'https?://(?:www\.)?bbc\.co\.uk/(?:(?:(?:programmes|iplayer(?:/[^/]+)?/(?:episode|playlist))/)|music/clips[/#])(?P<id>[\da-z]{8})'
@ -215,17 +215,32 @@ class BBCCoUkIE(SubtitlesInfoExtractor):
formats.extend(conn_formats) formats.extend(conn_formats)
return formats return formats
def _extract_captions(self, media, programme_id): def _get_subtitles(self, media, programme_id):
subtitles = {} subtitles = {}
for connection in self._extract_connections(media): for connection in self._extract_connections(media):
captions = self._download_xml(connection.get('href'), programme_id, 'Downloading captions') captions = self._download_xml(connection.get('href'), programme_id, 'Downloading captions')
lang = captions.get('{http://www.w3.org/XML/1998/namespace}lang', 'en') lang = captions.get('{http://www.w3.org/XML/1998/namespace}lang', 'en')
ps = captions.findall('./{0}body/{0}div/{0}p'.format('{http://www.w3.org/2006/10/ttaf1}')) ps = captions.findall('./{0}body/{0}div/{0}p'.format('{http://www.w3.org/2006/10/ttaf1}'))
srt = '' srt = ''
def _extract_text(p):
if p.text is not None:
stripped_text = p.text.strip()
if stripped_text:
return stripped_text
return ' '.join(span.text.strip() for span in p.findall('{http://www.w3.org/2006/10/ttaf1}span'))
for pos, p in enumerate(ps): for pos, p in enumerate(ps):
srt += '%s\r\n%s --> %s\r\n%s\r\n\r\n' % (str(pos), p.get('begin'), p.get('end'), srt += '%s\r\n%s --> %s\r\n%s\r\n\r\n' % (str(pos), p.get('begin'), p.get('end'), _extract_text(p))
p.text.strip() if p.text is not None else '') subtitles[lang] = [
subtitles[lang] = srt {
'url': connection.get('href'),
'ext': 'ttml',
},
{
'data': srt,
'ext': 'srt',
},
]
return subtitles return subtitles
def _download_media_selector(self, programme_id): def _download_media_selector(self, programme_id):
@ -249,7 +264,7 @@ class BBCCoUkIE(SubtitlesInfoExtractor):
elif kind == 'video': elif kind == 'video':
formats.extend(self._extract_video(media, programme_id)) formats.extend(self._extract_video(media, programme_id))
elif kind == 'captions': elif kind == 'captions':
subtitles = self._extract_captions(media, programme_id) subtitles = self.extract_subtitles(media, programme_id)
return formats, subtitles return formats, subtitles
@ -324,10 +339,6 @@ class BBCCoUkIE(SubtitlesInfoExtractor):
else: else:
programme_id, title, description, duration, formats, subtitles = self._download_playlist(group_id) programme_id, title, description, duration, formats, subtitles = self._download_playlist(group_id)
if self._downloader.params.get('listsubtitles', False):
self._list_available_subtitles(programme_id, subtitles)
return
self._sort_formats(formats) self._sort_formats(formats)
return { return {

View File

@ -3,7 +3,6 @@ from __future__ import unicode_literals
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from .subtitles import SubtitlesInfoExtractor
from ..compat import ( from ..compat import (
compat_str, compat_str,
@ -18,7 +17,7 @@ from ..utils import (
) )
class BlipTVIE(SubtitlesInfoExtractor): class BlipTVIE(InfoExtractor):
_VALID_URL = r'https?://(?:\w+\.)?blip\.tv/(?:(?:.+-|rss/flash/)(?P<id>\d+)|((?:play/|api\.swf#)(?P<lookup_id>[\da-zA-Z+_]+)))' _VALID_URL = r'https?://(?:\w+\.)?blip\.tv/(?:(?:.+-|rss/flash/)(?P<id>\d+)|((?:play/|api\.swf#)(?P<lookup_id>[\da-zA-Z+_]+)))'
_TESTS = [ _TESTS = [
@ -143,7 +142,7 @@ class BlipTVIE(SubtitlesInfoExtractor):
categories = [category.text for category in item.findall('category')] categories = [category.text for category in item.findall('category')]
formats = [] formats = []
subtitles = {} subtitles_urls = {}
media_group = item.find(media('group')) media_group = item.find(media('group'))
for media_content in media_group.findall(media('content')): for media_content in media_group.findall(media('content')):
@ -161,7 +160,7 @@ class BlipTVIE(SubtitlesInfoExtractor):
} }
lang = role.rpartition('-')[-1].strip().lower() lang = role.rpartition('-')[-1].strip().lower()
langcode = LANGS.get(lang, lang) langcode = LANGS.get(lang, lang)
subtitles[langcode] = url subtitles_urls[langcode] = url
elif media_type.startswith('video/'): elif media_type.startswith('video/'):
formats.append({ formats.append({
'url': real_url, 'url': real_url,
@ -175,11 +174,7 @@ class BlipTVIE(SubtitlesInfoExtractor):
}) })
self._sort_formats(formats) self._sort_formats(formats)
# subtitles subtitles = self.extract_subtitles(video_id, subtitles_urls)
video_subtitles = self.extract_subtitles(video_id, subtitles)
if self._downloader.params.get('listsubtitles', False):
self._list_available_subtitles(video_id, subtitles)
return
return { return {
'id': video_id, 'id': video_id,
@ -192,15 +187,22 @@ class BlipTVIE(SubtitlesInfoExtractor):
'thumbnail': thumbnail, 'thumbnail': thumbnail,
'categories': categories, 'categories': categories,
'formats': formats, 'formats': formats,
'subtitles': video_subtitles, 'subtitles': subtitles,
} }
def _download_subtitle_url(self, sub_lang, url): def _get_subtitles(self, video_id, subtitles_urls):
subtitles = {}
for lang, url in subtitles_urls.items():
# For some weird reason, blip.tv serves a video instead of subtitles # For some weird reason, blip.tv serves a video instead of subtitles
# when we request with a common UA # when we request with a common UA
req = compat_urllib_request.Request(url) req = compat_urllib_request.Request(url)
req.add_header('User-Agent', 'youtube-dl') req.add_header('User-Agent', 'youtube-dl')
return self._download_webpage(req, None, note=False) subtitles[lang] = [{
# The extension is 'srt' but it's actually an 'ass' file
'ext': 'ass',
'data': self._download_webpage(req, None, note=False),
}]
return subtitles
class BlipTVUserIE(InfoExtractor): class BlipTVUserIE(InfoExtractor):

View File

@ -6,7 +6,7 @@ from .common import InfoExtractor
class BloombergIE(InfoExtractor): class BloombergIE(InfoExtractor):
_VALID_URL = r'https?://www\.bloomberg\.com/video/(?P<name>.+?)\.html' _VALID_URL = r'https?://www\.bloomberg\.com/video/(?P<id>.+?)\.html'
_TEST = { _TEST = {
'url': 'http://www.bloomberg.com/video/shah-s-presentation-on-foreign-exchange-strategies-qurhIVlJSB6hzkVi229d8g.html', 'url': 'http://www.bloomberg.com/video/shah-s-presentation-on-foreign-exchange-strategies-qurhIVlJSB6hzkVi229d8g.html',
@ -20,9 +20,9 @@ class BloombergIE(InfoExtractor):
} }
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) name = self._match_id(url)
name = mobj.group('name')
webpage = self._download_webpage(url, name) webpage = self._download_webpage(url, name)
f4m_url = self._search_regex( f4m_url = self._search_regex(
r'<source src="(https?://[^"]+\.f4m.*?)"', webpage, r'<source src="(https?://[^"]+\.f4m.*?)"', webpage,
'f4m url') 'f4m url')

View File

@ -3,7 +3,7 @@ from __future__ import unicode_literals
import re import re
from .subtitles import SubtitlesInfoExtractor from .common import InfoExtractor
from ..compat import ( from ..compat import (
compat_urllib_request, compat_urllib_request,
compat_urllib_parse, compat_urllib_parse,
@ -15,7 +15,7 @@ from ..utils import (
) )
class CeskaTelevizeIE(SubtitlesInfoExtractor): class CeskaTelevizeIE(InfoExtractor):
_VALID_URL = r'https?://www\.ceskatelevize\.cz/(porady|ivysilani)/(.+/)?(?P<id>[^?#]+)' _VALID_URL = r'https?://www\.ceskatelevize\.cz/(porady|ivysilani)/(.+/)?(?P<id>[^?#]+)'
_TESTS = [ _TESTS = [
@ -107,13 +107,7 @@ class CeskaTelevizeIE(SubtitlesInfoExtractor):
subtitles = {} subtitles = {}
subs = item.get('subtitles') subs = item.get('subtitles')
if subs: if subs:
subtitles['cs'] = subs[0]['url'] subtitles = self.extract_subtitles(episode_id, subs)
if self._downloader.params.get('listsubtitles', False):
self._list_available_subtitles(video_id, subtitles)
return
subtitles = self._fix_subtitles(self.extract_subtitles(video_id, subtitles))
return { return {
'id': episode_id, 'id': episode_id,
@ -125,11 +119,20 @@ class CeskaTelevizeIE(SubtitlesInfoExtractor):
'subtitles': subtitles, 'subtitles': subtitles,
} }
def _get_subtitles(self, episode_id, subs):
original_subtitles = self._download_webpage(
subs[0]['url'], episode_id, 'Downloading subtitles')
srt_subs = self._fix_subtitles(original_subtitles)
return {
'cs': [{
'ext': 'srt',
'data': srt_subs,
}]
}
@staticmethod @staticmethod
def _fix_subtitles(subtitles): def _fix_subtitles(subtitles):
""" Convert millisecond-based subtitles to SRT """ """ Convert millisecond-based subtitles to SRT """
if subtitles is None:
return subtitles # subtitles not requested
def _msectotimecode(msec): def _msectotimecode(msec):
""" Helper utility to convert milliseconds to timecode """ """ Helper utility to convert milliseconds to timecode """
@ -149,7 +152,4 @@ class CeskaTelevizeIE(SubtitlesInfoExtractor):
else: else:
yield line yield line
fixed_subtitles = {} return "\r\n".join(_fix_subtitle(subtitles))
for k, v in subtitles.items():
fixed_subtitles[k] = "\r\n".join(_fix_subtitle(v))
return fixed_subtitles

View File

@ -0,0 +1,84 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
parse_duration,
int_or_none,
)
class ChirbitIE(InfoExtractor):
IE_NAME = 'chirbit'
_VALID_URL = r'https?://(?:www\.)?chirb\.it/(?:(?:wp|pl)/|fb_chirbit_player\.swf\?key=)?(?P<id>[\da-zA-Z]+)'
_TESTS = [{
'url': 'http://chirb.it/PrIPv5',
'md5': '9847b0dad6ac3e074568bf2cfb197de8',
'info_dict': {
'id': 'PrIPv5',
'ext': 'mp3',
'title': 'Фасадстрой',
'duration': 52,
'view_count': int,
'comment_count': int,
}
}, {
'url': 'https://chirb.it/fb_chirbit_player.swf?key=PrIPv5',
'only_matching': True,
}]
def _real_extract(self, url):
audio_id = self._match_id(url)
webpage = self._download_webpage(
'http://chirb.it/%s' % audio_id, audio_id)
audio_url = self._search_regex(
r'"setFile"\s*,\s*"([^"]+)"', webpage, 'audio url')
title = self._search_regex(
r'itemprop="name">([^<]+)', webpage, 'title')
duration = parse_duration(self._html_search_meta(
'duration', webpage, 'duration', fatal=False))
view_count = int_or_none(self._search_regex(
r'itemprop="playCount"\s*>(\d+)', webpage,
'listen count', fatal=False))
comment_count = int_or_none(self._search_regex(
r'>(\d+) Comments?:', webpage,
'comment count', fatal=False))
return {
'id': audio_id,
'url': audio_url,
'title': title,
'duration': duration,
'view_count': view_count,
'comment_count': comment_count,
}
class ChirbitProfileIE(InfoExtractor):
IE_NAME = 'chirbit:profile'
_VALID_URL = r'https?://(?:www\.)?chirbit.com/(?:rss/)?(?P<id>[^/]+)'
_TEST = {
'url': 'http://chirbit.com/ScarletBeauty',
'info_dict': {
'id': 'ScarletBeauty',
'title': 'Chirbits by ScarletBeauty',
},
'playlist_mincount': 3,
}
def _real_extract(self, url):
profile_id = self._match_id(url)
rss = self._download_xml(
'http://chirbit.com/rss/%s' % profile_id, profile_id)
entries = [
self.url_result(audio_url.text, 'Chirbit')
for audio_url in rss.findall('./channel/item/link')]
title = rss.find('./channel/title').text
return self.playlist_result(entries, profile_id, title)

View File

@ -250,6 +250,8 @@ class ComedyCentralShowsIE(MTVServicesInfoExtractor):
}) })
self._sort_formats(formats) self._sort_formats(formats)
subtitles = self._extract_subtitles(cdoc, guid)
virtual_id = show_name + ' ' + epTitle + ' part ' + compat_str(part_num + 1) virtual_id = show_name + ' ' + epTitle + ' part ' + compat_str(part_num + 1)
entries.append({ entries.append({
'id': guid, 'id': guid,
@ -260,6 +262,7 @@ class ComedyCentralShowsIE(MTVServicesInfoExtractor):
'duration': duration, 'duration': duration,
'thumbnail': thumbnail, 'thumbnail': thumbnail,
'description': description, 'description': description,
'subtitles': subtitles,
}) })
playlist_title = show_name + ' ' + title playlist_title = show_name + ' ' + title

View File

@ -151,8 +151,14 @@ class InfoExtractor(object):
If not explicitly set, calculated from timestamp. If not explicitly set, calculated from timestamp.
uploader_id: Nickname or id of the video uploader. uploader_id: Nickname or id of the video uploader.
location: Physical location where the video was filmed. location: Physical location where the video was filmed.
subtitles: The subtitle file contents as a dictionary in the format subtitles: The available subtitles as a dictionary in the format
{language: subtitles}. {language: subformats}. "subformats" is a list sorted from
lower to higher preference, each element is a dictionary
with the "ext" entry and one of:
* "data": The subtitles file contents
* "url": A url pointing to the subtitles file
automatic_captions: Like 'subtitles', used by the YoutubeIE for
automatically generated captions
duration: Length of the video in seconds, as an integer. duration: Length of the video in seconds, as an integer.
view_count: How many users have watched the video on the platform. view_count: How many users have watched the video on the platform.
like_count: Number of positive ratings of the video like_count: Number of positive ratings of the video
@ -395,6 +401,16 @@ class InfoExtractor(object):
if blocked_iframe: if blocked_iframe:
msg += ' Visit %s for more details' % blocked_iframe msg += ' Visit %s for more details' % blocked_iframe
raise ExtractorError(msg, expected=True) raise ExtractorError(msg, expected=True)
if '<title>The URL you requested has been blocked</title>' in content[:512]:
msg = (
'Access to this webpage has been blocked by Indian censorship. '
'Use a VPN or proxy server (with --proxy) to route around it.')
block_msg = self._html_search_regex(
r'</h1><p>(.*?)</p>',
content, 'block message', default=None)
if block_msg:
msg += ' (Message: "%s")' % block_msg.replace('\n', ' ')
raise ExtractorError(msg, expected=True)
return content return content
@ -825,8 +841,8 @@ class InfoExtractor(object):
media_nodes = manifest.findall('{http://ns.adobe.com/f4m/2.0}media') media_nodes = manifest.findall('{http://ns.adobe.com/f4m/2.0}media')
for i, media_el in enumerate(media_nodes): for i, media_el in enumerate(media_nodes):
if manifest_version == '2.0': if manifest_version == '2.0':
manifest_url = ('/'.join(manifest_url.split('/')[:-1]) + '/' manifest_url = ('/'.join(manifest_url.split('/')[:-1]) + '/' +
+ (media_el.attrib.get('href') or media_el.attrib.get('url'))) (media_el.attrib.get('href') or media_el.attrib.get('url')))
tbr = int_or_none(media_el.attrib.get('bitrate')) tbr = int_or_none(media_el.attrib.get('bitrate'))
formats.append({ formats.append({
'format_id': '-'.join(filter(None, [f4m_id, 'f4m-%d' % (i if tbr is None else tbr)])), 'format_id': '-'.join(filter(None, [f4m_id, 'f4m-%d' % (i if tbr is None else tbr)])),
@ -850,7 +866,7 @@ class InfoExtractor(object):
'url': m3u8_url, 'url': m3u8_url,
'ext': ext, 'ext': ext,
'protocol': 'm3u8', 'protocol': 'm3u8',
'preference': -1, 'preference': preference - 1 if preference else -1,
'resolution': 'multiple', 'resolution': 'multiple',
'format_note': 'Quality selection URL', 'format_note': 'Quality selection URL',
}] }]
@ -932,10 +948,23 @@ class InfoExtractor(object):
formats = [] formats = []
rtmp_count = 0 rtmp_count = 0
if smil.findall('./body/seq/video'):
video = smil.findall('./body/seq/video')[0]
fmts, rtmp_count = self._parse_smil_video(video, video_id, base, rtmp_count)
formats.extend(fmts)
else:
for video in smil.findall('./body/switch/video'): for video in smil.findall('./body/switch/video'):
fmts, rtmp_count = self._parse_smil_video(video, video_id, base, rtmp_count)
formats.extend(fmts)
self._sort_formats(formats)
return formats
def _parse_smil_video(self, video, video_id, base, rtmp_count):
src = video.get('src') src = video.get('src')
if not src: if not src:
continue return ([], rtmp_count)
bitrate = int_or_none(video.get('system-bitrate') or video.get('systemBitrate'), 1000) bitrate = int_or_none(video.get('system-bitrate') or video.get('systemBitrate'), 1000)
width = int_or_none(video.get('width')) width = int_or_none(video.get('width'))
height = int_or_none(video.get('height')) height = int_or_none(video.get('height'))
@ -948,11 +977,11 @@ class InfoExtractor(object):
proto = 'http' proto = 'http'
ext = video.get('ext') ext = video.get('ext')
if proto == 'm3u8': if proto == 'm3u8':
formats.extend(self._extract_m3u8_formats(src, video_id, ext)) return (self._extract_m3u8_formats(src, video_id, ext), rtmp_count)
elif proto == 'rtmp': elif proto == 'rtmp':
rtmp_count += 1 rtmp_count += 1
streamer = video.get('streamer') or base streamer = video.get('streamer') or base
formats.append({ return ([{
'url': streamer, 'url': streamer,
'play_path': src, 'play_path': src,
'ext': 'flv', 'ext': 'flv',
@ -960,10 +989,15 @@ class InfoExtractor(object):
'tbr': bitrate, 'tbr': bitrate,
'width': width, 'width': width,
'height': height, 'height': height,
}) }], rtmp_count)
self._sort_formats(formats) elif proto.startswith('http'):
return ([{
return formats 'url': base + src,
'ext': ext or 'flv',
'tbr': bitrate,
'width': width,
'height': height,
}], rtmp_count)
def _live_title(self, name): def _live_title(self, name):
""" Generate the title for a live video """ """ Generate the title for a live video """
@ -1028,6 +1062,24 @@ class InfoExtractor(object):
any_restricted = any_restricted or is_restricted any_restricted = any_restricted or is_restricted
return not any_restricted return not any_restricted
def extract_subtitles(self, *args, **kwargs):
if (self._downloader.params.get('writesubtitles', False) or
self._downloader.params.get('listsubtitles')):
return self._get_subtitles(*args, **kwargs)
return {}
def _get_subtitles(self, *args, **kwargs):
raise NotImplementedError("This method must be implemented by subclasses")
def extract_automatic_captions(self, *args, **kwargs):
if (self._downloader.params.get('writeautomaticsub', False) or
self._downloader.params.get('listsubtitles')):
return self._get_automatic_captions(*args, **kwargs)
return {}
def _get_automatic_captions(self, *args, **kwargs):
raise NotImplementedError("This method must be implemented by subclasses")
class SearchInfoExtractor(InfoExtractor): class SearchInfoExtractor(InfoExtractor):
""" """

View File

@ -9,7 +9,7 @@ import xml.etree.ElementTree
from hashlib import sha1 from hashlib import sha1
from math import pow, sqrt, floor from math import pow, sqrt, floor
from .subtitles import SubtitlesInfoExtractor from .common import InfoExtractor
from ..compat import ( from ..compat import (
compat_urllib_parse, compat_urllib_parse,
compat_urllib_request, compat_urllib_request,
@ -25,10 +25,9 @@ from ..aes import (
aes_cbc_decrypt, aes_cbc_decrypt,
inc, inc,
) )
from .common import InfoExtractor
class CrunchyrollIE(SubtitlesInfoExtractor): class CrunchyrollIE(InfoExtractor):
_VALID_URL = r'https?://(?:(?P<prefix>www|m)\.)?(?P<url>crunchyroll\.(?:com|fr)/(?:[^/]*/[^/?&]*?|media/\?id=)(?P<video_id>[0-9]+))(?:[/?&]|$)' _VALID_URL = r'https?://(?:(?P<prefix>www|m)\.)?(?P<url>crunchyroll\.(?:com|fr)/(?:[^/]*/[^/?&]*?|media/\?id=)(?P<video_id>[0-9]+))(?:[/?&]|$)'
_TESTS = [{ _TESTS = [{
'url': 'http://www.crunchyroll.com/wanna-be-the-strongest-in-the-world/episode-1-an-idol-wrestler-is-born-645513', 'url': 'http://www.crunchyroll.com/wanna-be-the-strongest-in-the-world/episode-1-an-idol-wrestler-is-born-645513',
@ -187,6 +186,38 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
return output return output
def _get_subtitles(self, video_id, webpage):
subtitles = {}
for sub_id, sub_name in re.findall(r'\?ssid=([0-9]+)" title="([^"]+)', webpage):
sub_page = self._download_webpage(
'http://www.crunchyroll.com/xml/?req=RpcApiSubtitle_GetXml&subtitle_script_id=' + sub_id,
video_id, note='Downloading subtitles for ' + sub_name)
id = self._search_regex(r'id=\'([0-9]+)', sub_page, 'subtitle_id', fatal=False)
iv = self._search_regex(r'<iv>([^<]+)', sub_page, 'subtitle_iv', fatal=False)
data = self._search_regex(r'<data>([^<]+)', sub_page, 'subtitle_data', fatal=False)
if not id or not iv or not data:
continue
id = int(id)
iv = base64.b64decode(iv)
data = base64.b64decode(data)
subtitle = self._decrypt_subtitles(data, iv, id).decode('utf-8')
lang_code = self._search_regex(r'lang_code=["\']([^"\']+)', subtitle, 'subtitle_lang_code', fatal=False)
if not lang_code:
continue
sub_root = xml.etree.ElementTree.fromstring(subtitle)
subtitles[lang_code] = [
{
'ext': 'srt',
'data': self._convert_subtitles_to_srt(sub_root),
},
{
'ext': 'ass',
'data': self._convert_subtitles_to_ass(sub_root),
},
]
return subtitles
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('video_id') video_id = mobj.group('video_id')
@ -249,34 +280,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
'format_id': video_format, 'format_id': video_format,
}) })
subtitles = {} subtitles = self.extract_subtitles(video_id, webpage)
sub_format = self._downloader.params.get('subtitlesformat', 'srt')
for sub_id, sub_name in re.findall(r'\?ssid=([0-9]+)" title="([^"]+)', webpage):
sub_page = self._download_webpage(
'http://www.crunchyroll.com/xml/?req=RpcApiSubtitle_GetXml&subtitle_script_id=' + sub_id,
video_id, note='Downloading subtitles for ' + sub_name)
id = self._search_regex(r'id=\'([0-9]+)', sub_page, 'subtitle_id', fatal=False)
iv = self._search_regex(r'<iv>([^<]+)', sub_page, 'subtitle_iv', fatal=False)
data = self._search_regex(r'<data>([^<]+)', sub_page, 'subtitle_data', fatal=False)
if not id or not iv or not data:
continue
id = int(id)
iv = base64.b64decode(iv)
data = base64.b64decode(data)
subtitle = self._decrypt_subtitles(data, iv, id).decode('utf-8')
lang_code = self._search_regex(r'lang_code=["\']([^"\']+)', subtitle, 'subtitle_lang_code', fatal=False)
if not lang_code:
continue
sub_root = xml.etree.ElementTree.fromstring(subtitle)
if sub_format == 'ass':
subtitles[lang_code] = self._convert_subtitles_to_ass(sub_root)
else:
subtitles[lang_code] = self._convert_subtitles_to_srt(sub_root)
if self._downloader.params.get('listsubtitles', False):
self._list_available_subtitles(video_id, subtitles)
return
return { return {
'id': video_id, 'id': video_id,

View File

@ -6,7 +6,6 @@ import json
import itertools import itertools
from .common import InfoExtractor from .common import InfoExtractor
from .subtitles import SubtitlesInfoExtractor
from ..compat import ( from ..compat import (
compat_str, compat_str,
@ -31,7 +30,7 @@ class DailymotionBaseInfoExtractor(InfoExtractor):
return request return request
class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor): class DailymotionIE(DailymotionBaseInfoExtractor):
"""Information Extractor for Dailymotion""" """Information Extractor for Dailymotion"""
_VALID_URL = r'(?i)(?:https?://)?(?:(www|touch)\.)?dailymotion\.[a-z]{2,3}/(?:(embed|#)/)?video/(?P<id>[^/?_]+)' _VALID_URL = r'(?i)(?:https?://)?(?:(www|touch)\.)?dailymotion\.[a-z]{2,3}/(?:(embed|#)/)?video/(?P<id>[^/?_]+)'
@ -143,9 +142,6 @@ class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor):
# subtitles # subtitles
video_subtitles = self.extract_subtitles(video_id, webpage) video_subtitles = self.extract_subtitles(video_id, webpage)
if self._downloader.params.get('listsubtitles', False):
self._list_available_subtitles(video_id, webpage)
return
view_count = str_to_int(self._search_regex( view_count = str_to_int(self._search_regex(
r'video_views_count[^>]+>\s+([\d\.,]+)', r'video_views_count[^>]+>\s+([\d\.,]+)',
@ -169,7 +165,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor):
'view_count': view_count, 'view_count': view_count,
} }
def _get_available_subtitles(self, video_id, webpage): def _get_subtitles(self, video_id, webpage):
try: try:
sub_list = self._download_webpage( sub_list = self._download_webpage(
'https://api.dailymotion.com/video/%s/subtitles?fields=id,language,url' % video_id, 'https://api.dailymotion.com/video/%s/subtitles?fields=id,language,url' % video_id,
@ -179,7 +175,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor):
return {} return {}
info = json.loads(sub_list) info = json.loads(sub_list)
if (info['total'] > 0): if (info['total'] > 0):
sub_lang_list = dict((l['language'], l['url']) for l in info['list']) sub_lang_list = dict((l['language'], [{'url': l['url'], 'ext': 'srt'}]) for l in info['list'])
return sub_lang_list return sub_lang_list
self._downloader.report_warning('video doesn\'t have subtitles') self._downloader.report_warning('video doesn\'t have subtitles')
return {} return {}

View File

@ -25,8 +25,9 @@ class DefenseGouvFrIE(InfoExtractor):
r"flashvars.pvg_id=\"(\d+)\";", r"flashvars.pvg_id=\"(\d+)\";",
webpage, 'ID') webpage, 'ID')
json_url = ('http://static.videos.gouv.fr/brightcovehub/export/json/' json_url = (
+ video_id) 'http://static.videos.gouv.fr/brightcovehub/export/json/%s' %
video_id)
info = self._download_json(json_url, title, 'Downloading JSON config') info = self._download_json(json_url, title, 'Downloading JSON config')
video_url = info['renditions'][0]['url'] video_url = info['renditions'][0]['url']

View File

@ -1,11 +1,10 @@
from __future__ import unicode_literals from __future__ import unicode_literals
from .subtitles import SubtitlesInfoExtractor from .common import InfoExtractor, ExtractorError
from .common import ExtractorError
from ..utils import parse_iso8601 from ..utils import parse_iso8601
class DRTVIE(SubtitlesInfoExtractor): class DRTVIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?dr\.dk/tv/se/(?:[^/]+/)*(?P<id>[\da-z-]+)(?:[/#?]|$)' _VALID_URL = r'https?://(?:www\.)?dr\.dk/tv/se/(?:[^/]+/)*(?P<id>[\da-z-]+)(?:[/#?]|$)'
_TEST = { _TEST = {
@ -76,7 +75,7 @@ class DRTVIE(SubtitlesInfoExtractor):
} }
for subs in subtitles_list: for subs in subtitles_list:
lang = subs['Language'] lang = subs['Language']
subtitles[LANGS.get(lang, lang)] = subs['Uri'] subtitles[LANGS.get(lang, lang)] = [{'url': subs['Uri'], 'ext': 'vtt'}]
if not formats and restricted_to_denmark: if not formats and restricted_to_denmark:
raise ExtractorError( raise ExtractorError(
@ -84,10 +83,6 @@ class DRTVIE(SubtitlesInfoExtractor):
self._sort_formats(formats) self._sort_formats(formats)
if self._downloader.params.get('listsubtitles', False):
self._list_available_subtitles(video_id, subtitles)
return
return { return {
'id': video_id, 'id': video_id,
'title': title, 'title': title,
@ -96,5 +91,5 @@ class DRTVIE(SubtitlesInfoExtractor):
'timestamp': timestamp, 'timestamp': timestamp,
'duration': duration, 'duration': duration,
'formats': formats, 'formats': formats,
'subtitles': self.extract_subtitles(video_id, subtitles), 'subtitles': subtitles,
} }

View File

@ -35,10 +35,7 @@ class EpornerIE(InfoExtractor):
title = self._html_search_regex( title = self._html_search_regex(
r'<title>(.*?) - EPORNER', webpage, 'title') r'<title>(.*?) - EPORNER', webpage, 'title')
redirect_code = self._html_search_regex( redirect_url = 'http://www.eporner.com/config5/%s' % video_id
r'<script type="text/javascript" src="/config5/%s/([a-f\d]+)/">' % video_id,
webpage, 'redirect_code')
redirect_url = 'http://www.eporner.com/config5/%s/%s' % (video_id, redirect_code)
player_code = self._download_webpage( player_code = self._download_webpage(
redirect_url, display_id, note='Downloading player config') redirect_url, display_id, note='Downloading player config')
@ -69,5 +66,5 @@ class EpornerIE(InfoExtractor):
'duration': duration, 'duration': duration,
'view_count': view_count, 'view_count': view_count,
'formats': formats, 'formats': formats,
'age_limit': self._rta_search(webpage), 'age_limit': 18,
} }

View File

@ -3,15 +3,18 @@ from __future__ import unicode_literals
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import ( from ..compat import (
compat_urllib_parse, compat_urllib_parse,
compat_urllib_request,
) )
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
js_to_json, js_to_json,
parse_duration,
) )
class EscapistIE(InfoExtractor): class EscapistIE(InfoExtractor):
_VALID_URL = r'https?://?(www\.)?escapistmagazine\.com/videos/view/[^/?#]+/(?P<id>[0-9]+)-[^/?#]*(?:$|[?#])' _VALID_URL = r'https?://?(www\.)?escapistmagazine\.com/videos/view/[^/?#]+/(?P<id>[0-9]+)-[^/?#]*(?:$|[?#])'
_USER_AGENT = 'Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like Gecko'
_TEST = { _TEST = {
'url': 'http://www.escapistmagazine.com/videos/view/the-escapist-presents/6618-Breaking-Down-Baldurs-Gate', 'url': 'http://www.escapistmagazine.com/videos/view/the-escapist-presents/6618-Breaking-Down-Baldurs-Gate',
'md5': 'ab3a706c681efca53f0a35f1415cf0d1', 'md5': 'ab3a706c681efca53f0a35f1415cf0d1',
@ -23,44 +26,66 @@ class EscapistIE(InfoExtractor):
'uploader': 'The Escapist Presents', 'uploader': 'The Escapist Presents',
'title': "Breaking Down Baldur's Gate", 'title': "Breaking Down Baldur's Gate",
'thumbnail': 're:^https?://.*\.jpg$', 'thumbnail': 're:^https?://.*\.jpg$',
'duration': 264,
} }
} }
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id) webpage_req = compat_urllib_request.Request(url)
webpage_req.add_header('User-Agent', self._USER_AGENT)
webpage = self._download_webpage(webpage_req, video_id)
uploader_id = self._html_search_regex( uploader_id = self._html_search_regex(
r"<h1 class='headline'><a href='/videos/view/(.*?)'", r"<h1\s+class='headline'>\s*<a\s+href='/videos/view/(.*?)'",
webpage, 'uploader ID', fatal=False) webpage, 'uploader ID', fatal=False)
uploader = self._html_search_regex( uploader = self._html_search_regex(
r"<h1 class='headline'>(.*?)</a>", r"<h1\s+class='headline'>(.*?)</a>",
webpage, 'uploader', fatal=False) webpage, 'uploader', fatal=False)
description = self._html_search_meta('description', webpage) description = self._html_search_meta('description', webpage)
duration = parse_duration(self._html_search_meta('duration', webpage))
raw_title = self._html_search_meta('title', webpage, fatal=True) raw_title = self._html_search_meta('title', webpage, fatal=True)
title = raw_title.partition(' : ')[2] title = raw_title.partition(' : ')[2]
config_url = compat_urllib_parse.unquote(self._html_search_regex( config_url = compat_urllib_parse.unquote(self._html_search_regex(
r'<param name="flashvars" value="config=([^"&]+)', webpage, 'config URL')) r'''(?x)
(?:
<param\s+name="flashvars".*?\s+value="config=|
flashvars=&quot;config=
)
(https?://[^"&]+)
''',
webpage, 'config URL'))
formats = [] formats = []
ad_formats = []
def _add_format(name, cfgurl, quality): def _add_format(name, cfg_url, quality):
cfg_req = compat_urllib_request.Request(cfg_url)
cfg_req.add_header('User-Agent', self._USER_AGENT)
config = self._download_json( config = self._download_json(
cfgurl, video_id, cfg_req, video_id,
'Downloading ' + name + ' configuration', 'Downloading ' + name + ' configuration',
'Unable to download ' + name + ' configuration', 'Unable to download ' + name + ' configuration',
transform_source=js_to_json) transform_source=js_to_json)
playlist = config['playlist'] playlist = config['playlist']
video_url = next( for p in playlist:
p['url'] for p in playlist if p.get('eventCategory') == 'Video':
if p.get('eventCategory') == 'Video') ar = formats
formats.append({ elif p.get('eventCategory') == 'Video Postroll':
'url': video_url, ar = ad_formats
else:
continue
ar.append({
'url': p['url'],
'format_id': name, 'format_id': name,
'quality': quality, 'quality': quality,
'http_headers': {
'User-Agent': self._USER_AGENT,
},
}) })
_add_format('normal', config_url, quality=0) _add_format('normal', config_url, quality=0)
@ -70,10 +95,12 @@ class EscapistIE(InfoExtractor):
_add_format('hq', hq_url, quality=1) _add_format('hq', hq_url, quality=1)
except ExtractorError: except ExtractorError:
pass # That's fine, we'll just use normal quality pass # That's fine, we'll just use normal quality
self._sort_formats(formats) self._sort_formats(formats)
return { if '/escapist/sales-marketing/' in formats[-1]['url']:
raise ExtractorError('This IP address has been blocked by The Escapist', expected=True)
res = {
'id': video_id, 'id': video_id,
'formats': formats, 'formats': formats,
'uploader': uploader, 'uploader': uploader,
@ -81,4 +108,21 @@ class EscapistIE(InfoExtractor):
'title': title, 'title': title,
'thumbnail': self._og_search_thumbnail(webpage), 'thumbnail': self._og_search_thumbnail(webpage),
'description': description, 'description': description,
'duration': duration,
} }
if self._downloader.params.get('include_ads') and ad_formats:
self._sort_formats(ad_formats)
ad_res = {
'id': '%s-ad' % video_id,
'title': '%s (Postroll)' % title,
'formats': ad_formats,
}
return {
'_type': 'playlist',
'entries': [res, ad_res],
'title': title,
'id': video_id,
}
return res

View File

@ -126,11 +126,17 @@ class FacebookIE(InfoExtractor):
params_raw = compat_urllib_parse.unquote(data['params']) params_raw = compat_urllib_parse.unquote(data['params'])
params = json.loads(params_raw) params = json.loads(params_raw)
video_data = params['video_data'][0] video_data = params['video_data'][0]
video_url = video_data.get('hd_src')
if not video_url: formats = []
video_url = video_data['sd_src'] for quality in ['sd', 'hd']:
if not video_url: src = video_data.get('%s_src' % quality)
raise ExtractorError('Cannot find video URL') if src is not None:
formats.append({
'format_id': quality,
'url': src,
})
if not formats:
raise ExtractorError('Cannot find video formats')
video_title = self._html_search_regex( video_title = self._html_search_regex(
r'<h2 class="uiHeaderTitle">([^<]*)</h2>', webpage, 'title', r'<h2 class="uiHeaderTitle">([^<]*)</h2>', webpage, 'title',
@ -146,7 +152,7 @@ class FacebookIE(InfoExtractor):
return { return {
'id': video_id, 'id': video_id,
'title': video_title, 'title': video_title,
'url': video_url, 'formats': formats,
'duration': int_or_none(video_data.get('video_duration')), 'duration': int_or_none(video_data.get('video_duration')),
'thumbnail': video_data.get('thumbnail_src'), 'thumbnail': video_data.get('thumbnail_src'),
} }

View File

@ -1,6 +1,8 @@
# coding: utf-8 # coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
int_or_none, int_or_none,
@ -31,7 +33,7 @@ class GameStarIE(InfoExtractor):
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
og_title = self._og_search_title(webpage) og_title = self._og_search_title(webpage)
title = og_title.replace(' - Video bei GameStar.de', '').strip() title = re.sub(r'\s*- Video (bei|-) GameStar\.de$', '', og_title)
url = 'http://gamestar.de/_misc/videos/portal/getVideoUrl.cfm?premium=0&videoId=' + video_id url = 'http://gamestar.de/_misc/videos/portal/getVideoUrl.cfm?premium=0&videoId=' + video_id

View File

@ -7,6 +7,7 @@ from ..compat import (
compat_urllib_parse, compat_urllib_parse,
compat_urllib_request, compat_urllib_request,
) )
from ..utils import remove_end
class GDCVaultIE(InfoExtractor): class GDCVaultIE(InfoExtractor):
@ -65,10 +66,12 @@ class GDCVaultIE(InfoExtractor):
def _parse_flv(self, xml_description): def _parse_flv(self, xml_description):
video_formats = [] video_formats = []
akami_url = xml_description.find('./metadata/akamaiHost').text akamai_url = xml_description.find('./metadata/akamaiHost').text
slide_video_path = xml_description.find('./metadata/slideVideo').text slide_video_path = xml_description.find('./metadata/slideVideo').text
video_formats.append({ video_formats.append({
'url': 'rtmp://' + akami_url + '/' + slide_video_path, 'url': 'rtmp://%s/ondemand?ovpfv=1.1' % akamai_url,
'play_path': remove_end(slide_video_path, '.flv'),
'ext': 'flv',
'format_note': 'slide deck video', 'format_note': 'slide deck video',
'quality': -2, 'quality': -2,
'preference': -2, 'preference': -2,
@ -76,7 +79,9 @@ class GDCVaultIE(InfoExtractor):
}) })
speaker_video_path = xml_description.find('./metadata/speakerVideo').text speaker_video_path = xml_description.find('./metadata/speakerVideo').text
video_formats.append({ video_formats.append({
'url': 'rtmp://' + akami_url + '/' + speaker_video_path, 'url': 'rtmp://%s/ondemand?ovpfv=1.1' % akamai_url,
'play_path': remove_end(speaker_video_path, '.flv'),
'ext': 'flv',
'format_note': 'speaker video', 'format_note': 'speaker video',
'quality': -1, 'quality': -1,
'preference': -1, 'preference': -1,

View File

@ -547,7 +547,28 @@ class GenericIE(InfoExtractor):
'id': 'aanslagen-kopenhagen', 'id': 'aanslagen-kopenhagen',
'title': 'Aanslagen Kopenhagen | RTL Nieuws', 'title': 'Aanslagen Kopenhagen | RTL Nieuws',
} }
},
# Zapiks embed
{
'url': 'http://www.skipass.com/news/116090-bon-appetit-s5ep3-baqueira-mi-cor.html',
'info_dict': {
'id': '118046',
'ext': 'mp4',
'title': 'EP3S5 - Bon Appétit - Baqueira Mi Corazon !',
} }
},
# Kaltura embed
{
'url': 'http://www.monumentalnetwork.com/videos/john-carlson-postgame-2-25-15',
'info_dict': {
'id': '1_eergr3h1',
'ext': 'mp4',
'upload_date': '20150226',
'uploader_id': 'MonumentalSports-Kaltura@perfectsensedigital.com',
'timestamp': int,
'title': 'John Carlson Postgame 2/25/15',
},
},
] ]
def report_following_redirect(self, new_url): def report_following_redirect(self, new_url):
@ -1098,6 +1119,18 @@ class GenericIE(InfoExtractor):
if mobj is not None: if mobj is not None:
return self.url_result(mobj.group('url'), 'Livestream') return self.url_result(mobj.group('url'), 'Livestream')
# Look for Zapiks embed
mobj = re.search(
r'<iframe[^>]+src="(?P<url>https?://(?:www\.)?zapiks\.fr/index\.php\?.+?)"', webpage)
if mobj is not None:
return self.url_result(mobj.group('url'), 'Zapiks')
# Look for Kaltura embeds
mobj = re.search(
r"(?s)kWidget\.(?:thumb)?[Ee]mbed\(\{.*?'wid'\s*:\s*'_?(?P<partner_id>[^']+)',.*?'entry_id'\s*:\s*'(?P<id>[^']+)',", webpage)
if mobj is not None:
return self.url_result('kaltura:%(partner_id)s:%(id)s' % mobj.groupdict(), 'Kaltura')
def check_video(vurl): def check_video(vurl):
if YoutubeIE.suitable(vurl): if YoutubeIE.suitable(vurl):
return True return True
@ -1193,6 +1226,8 @@ class GenericIE(InfoExtractor):
return entries[0] return entries[0]
else: else:
for num, e in enumerate(entries, start=1): for num, e in enumerate(entries, start=1):
# 'url' results don't have a title
if e.get('title') is not None:
e['title'] = '%s (%d)' % (e['title'], num) e['title'] = '%s (%d)' % (e['title'], num)
return { return {
'_type': 'playlist', '_type': 'playlist',

View File

@ -0,0 +1,138 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..compat import compat_urllib_parse
from ..utils import (
ExtractorError,
int_or_none,
)
class KalturaIE(InfoExtractor):
_VALID_URL = r'''(?x)
(?:kaltura:|
https?://(:?(?:www|cdnapisec)\.)?kaltura\.com/index\.php/kwidget/(?:[^/]+/)*?wid/_
)(?P<partner_id>\d+)
(?::|
/(?:[^/]+/)*?entry_id/
)(?P<id>[0-9a-z_]+)'''
_API_BASE = 'http://cdnapi.kaltura.com/api_v3/index.php?'
_TESTS = [
{
'url': 'kaltura:269692:1_1jc2y3e4',
'md5': '3adcbdb3dcc02d647539e53f284ba171',
'info_dict': {
'id': '1_1jc2y3e4',
'ext': 'mp4',
'title': 'Track 4',
'upload_date': '20131219',
'uploader_id': 'mlundberg@wolfgangsvault.com',
'description': 'The Allman Brothers Band, 12/16/1981',
'thumbnail': 're:^https?://.*/thumbnail/.*',
'timestamp': int,
},
},
{
'url': 'http://www.kaltura.com/index.php/kwidget/cache_st/1300318621/wid/_269692/uiconf_id/3873291/entry_id/1_1jc2y3e4',
'only_matching': True,
},
{
'url': 'https://cdnapisec.kaltura.com/index.php/kwidget/wid/_557781/uiconf_id/22845202/entry_id/1_plr1syf3',
'only_matching': True,
},
]
def _kaltura_api_call(self, video_id, actions, *args, **kwargs):
params = actions[0]
if len(actions) > 1:
for i, a in enumerate(actions[1:], start=1):
for k, v in a.items():
params['%d:%s' % (i, k)] = v
query = compat_urllib_parse.urlencode(params)
url = self._API_BASE + query
data = self._download_json(url, video_id, *args, **kwargs)
status = data if len(actions) == 1 else data[0]
if status.get('objectType') == 'KalturaAPIException':
raise ExtractorError(
'%s said: %s' % (self.IE_NAME, status['message']))
return data
def _get_kaltura_signature(self, video_id, partner_id):
actions = [{
'apiVersion': '3.1',
'expiry': 86400,
'format': 1,
'service': 'session',
'action': 'startWidgetSession',
'widgetId': '_%s' % partner_id,
}]
return self._kaltura_api_call(
video_id, actions, note='Downloading Kaltura signature')['ks']
def _get_video_info(self, video_id, partner_id):
signature = self._get_kaltura_signature(video_id, partner_id)
actions = [
{
'action': 'null',
'apiVersion': '3.1.5',
'clientTag': 'kdp:v3.8.5',
'format': 1, # JSON, 2 = XML, 3 = PHP
'service': 'multirequest',
'ks': signature,
},
{
'action': 'get',
'entryId': video_id,
'service': 'baseentry',
'version': '-1',
},
{
'action': 'getContextData',
'contextDataParams:objectType': 'KalturaEntryContextDataParams',
'contextDataParams:referrer': 'http://www.kaltura.com/',
'contextDataParams:streamerType': 'http',
'entryId': video_id,
'service': 'baseentry',
},
]
return self._kaltura_api_call(
video_id, actions, note='Downloading video info JSON')
def _real_extract(self, url):
video_id = self._match_id(url)
mobj = re.match(self._VALID_URL, url)
partner_id, entry_id = mobj.group('partner_id'), mobj.group('id')
info, source_data = self._get_video_info(entry_id, partner_id)
formats = [{
'format_id': '%(fileExt)s-%(bitrate)s' % f,
'ext': f['fileExt'],
'tbr': f['bitrate'],
'fps': f.get('frameRate'),
'filesize_approx': int_or_none(f.get('size'), invscale=1024),
'container': f.get('containerFormat'),
'vcodec': f.get('videoCodecId'),
'height': f.get('height'),
'width': f.get('width'),
'url': '%s/flavorId/%s' % (info['dataUrl'], f['id']),
} for f in source_data['flavorAssets']]
self._sort_formats(formats)
return {
'id': video_id,
'title': info['name'],
'formats': formats,
'description': info.get('description'),
'thumbnail': info.get('thumbnailUrl'),
'duration': info.get('duration'),
'timestamp': info.get('createdAt'),
'uploader_id': info.get('userId'),
'view_count': info.get('plays'),
}

View File

@ -1,31 +1,32 @@
# -*- coding: utf-8 -*-
from __future__ import unicode_literals from __future__ import unicode_literals
import random import random
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ExtractorError from ..utils import (
ExtractorError,
xpath_text,
)
class Laola1TvIE(InfoExtractor): class Laola1TvIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?laola1\.tv/(?P<lang>[a-z]+)-(?P<portal>[a-z]+)/.*?/(?P<id>[0-9]+)\.html' _VALID_URL = r'https?://(?:www\.)?laola1\.tv/(?P<lang>[a-z]+)-(?P<portal>[a-z]+)/.*?/(?P<id>[0-9]+)\.html'
_TEST = { _TEST = {
'url': 'http://www.laola1.tv/de-de/live/bwf-bitburger-open-grand-prix-gold-court-1/250019.html', 'url': 'http://www.laola1.tv/de-de/video/straubing-tigers-koelner-haie/227883.html',
'info_dict': { 'info_dict': {
'id': '250019', 'id': '227883',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Bitburger Open Grand Prix Gold - Court 1', 'title': 'Straubing Tigers - Kölner Haie',
'categories': ['Badminton'], 'categories': ['Eishockey'],
'uploader': 'BWF - Badminton World Federation', 'is_live': False,
'is_live': True,
}, },
'params': { 'params': {
'skip_download': True, 'skip_download': True,
} }
} }
_BROKEN = True # Not really - extractor works fine, but f4m downloader does not support live streams yet.
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id') video_id = mobj.group('id')
@ -43,15 +44,22 @@ class Laola1TvIE(InfoExtractor):
r'flashvars\.([_a-zA-Z0-9]+)\s*=\s*"([^"]*)";', iframe) r'flashvars\.([_a-zA-Z0-9]+)\s*=\s*"([^"]*)";', iframe)
flashvars = dict((m[0], m[1]) for m in flashvars_m) flashvars = dict((m[0], m[1]) for m in flashvars_m)
partner_id = self._search_regex(
r'partnerid\s*:\s*"([^"]+)"', iframe, 'partner id')
xml_url = ('http://www.laola1.tv/server/hd_video.php?' + xml_url = ('http://www.laola1.tv/server/hd_video.php?' +
'play=%s&partner=1&portal=%s&v5ident=&lang=%s' % ( 'play=%s&partner=%s&portal=%s&v5ident=&lang=%s' % (
video_id, portal, lang)) video_id, partner_id, portal, lang))
hd_doc = self._download_xml(xml_url, video_id) hd_doc = self._download_xml(xml_url, video_id)
title = hd_doc.find('.//video/title').text title = xpath_text(hd_doc, './/video/title', fatal=True)
flash_url = hd_doc.find('.//video/url').text flash_url = xpath_text(hd_doc, './/video/url', fatal=True)
categories = hd_doc.find('.//video/meta_sports').text.split(',') uploader = xpath_text(hd_doc, './/video/meta_organistation')
uploader = hd_doc.find('.//video/meta_organistation').text is_live = xpath_text(hd_doc, './/video/islive') == 'true'
categories = xpath_text(hd_doc, './/video/meta_sports')
if categories:
categories = categories.split(',')
ident = random.randint(10000000, 99999999) ident = random.randint(10000000, 99999999)
token_url = '%s&ident=%s&klub=0&unikey=0&timestamp=%s&auth=%s' % ( token_url = '%s&ident=%s&klub=0&unikey=0&timestamp=%s&auth=%s' % (
@ -60,15 +68,16 @@ class Laola1TvIE(InfoExtractor):
token_doc = self._download_xml( token_doc = self._download_xml(
token_url, video_id, note='Downloading token') token_url, video_id, note='Downloading token')
token_attrib = token_doc.find('.//token').attrib token_attrib = token_doc.find('.//token').attrib
if token_attrib.get('auth') == 'blocked': if token_attrib.get('auth') in ('blocked', 'restricted'):
raise ExtractorError('Token error: ' % token_attrib.get('comment')) raise ExtractorError(
'Token error: %s' % token_attrib.get('comment'), expected=True)
video_url = '%s?hdnea=%s&hdcore=3.2.0' % ( video_url = '%s?hdnea=%s&hdcore=3.2.0' % (
token_attrib['url'], token_attrib['auth']) token_attrib['url'], token_attrib['auth'])
return { return {
'id': video_id, 'id': video_id,
'is_live': True, 'is_live': is_live,
'title': title, 'title': title,
'url': video_url, 'url': video_url,
'uploader': uploader, 'uploader': uploader,

View File

@ -0,0 +1,190 @@
# coding: utf-8
from __future__ import unicode_literals
import datetime
import re
import time
from .common import InfoExtractor
from ..compat import (
compat_urlparse,
compat_urllib_parse,
)
from ..utils import (
determine_ext,
ExtractorError,
parse_iso8601,
)
class LetvIE(InfoExtractor):
_VALID_URL = r'http://www\.letv\.com/ptv/vplay/(?P<id>\d+).html'
_TESTS = [{
'url': 'http://www.letv.com/ptv/vplay/22005890.html',
'md5': 'cab23bd68d5a8db9be31c9a222c1e8df',
'info_dict': {
'id': '22005890',
'ext': 'mp4',
'title': '第87届奥斯卡颁奖礼完美落幕 《鸟人》成最大赢家',
'timestamp': 1424747397,
'upload_date': '20150224',
'description': 'md5:a9cb175fd753e2962176b7beca21a47c',
}
}, {
'url': 'http://www.letv.com/ptv/vplay/1415246.html',
'info_dict': {
'id': '1415246',
'ext': 'mp4',
'title': '美人天下01',
'description': 'md5:f88573d9d7225ada1359eaf0dbf8bcda',
},
'expected_warnings': [
'publish time'
]
}]
# http://www.letv.com/ptv/vplay/1118082.html
# This video is available only in Mainland China
@staticmethod
def urshift(val, n):
return val >> n if val >= 0 else (val + 0x100000000) >> n
# ror() and calc_time_key() are reversed from a embedded swf file in KLetvPlayer.swf
def ror(self, param1, param2):
_loc3_ = 0
while _loc3_ < param2:
param1 = self.urshift(param1, 1) + ((param1 & 1) << 31)
_loc3_ += 1
return param1
def calc_time_key(self, param1):
_loc2_ = 773625421
_loc3_ = self.ror(param1, _loc2_ % 13)
_loc3_ = _loc3_ ^ _loc2_
_loc3_ = self.ror(_loc3_, _loc2_ % 17)
return _loc3_
def _real_extract(self, url):
media_id = self._match_id(url)
page = self._download_webpage(url, media_id)
params = {
'id': media_id,
'platid': 1,
'splatid': 101,
'format': 1,
'tkey': self.calc_time_key(int(time.time())),
'domain': 'www.letv.com'
}
play_json = self._download_json(
'http://api.letv.com/mms/out/video/playJson?' + compat_urllib_parse.urlencode(params),
media_id, 'playJson data')
# Check for errors
playstatus = play_json['playstatus']
if playstatus['status'] == 0:
flag = playstatus['flag']
if flag == 1:
msg = 'Country %s auth error' % playstatus['country']
else:
msg = 'Generic error. flag = %d' % flag
raise ExtractorError(msg, expected=True)
playurl = play_json['playurl']
formats = ['350', '1000', '1300', '720p', '1080p']
dispatch = playurl['dispatch']
urls = []
for format_id in formats:
if format_id in dispatch:
media_url = playurl['domain'][0] + dispatch[format_id][0]
# Mimic what flvxz.com do
url_parts = list(compat_urlparse.urlparse(media_url))
qs = dict(compat_urlparse.parse_qs(url_parts[4]))
qs.update({
'platid': '14',
'splatid': '1401',
'tss': 'no',
'retry': 1
})
url_parts[4] = compat_urllib_parse.urlencode(qs)
media_url = compat_urlparse.urlunparse(url_parts)
url_info_dict = {
'url': media_url,
'ext': determine_ext(dispatch[format_id][1])
}
if format_id[-1:] == 'p':
url_info_dict['height'] = format_id[:-1]
urls.append(url_info_dict)
publish_time = parse_iso8601(self._html_search_regex(
r'发布时间&nbsp;([^<>]+) ', page, 'publish time', fatal=False),
delimiter=' ', timezone=datetime.timedelta(hours=8))
description = self._html_search_meta('description', page, fatal=False)
return {
'id': media_id,
'formats': urls,
'title': playurl['title'],
'thumbnail': playurl['pic'],
'description': description,
'timestamp': publish_time,
}
class LetvTvIE(InfoExtractor):
_VALID_URL = r'http://www.letv.com/tv/(?P<id>\d+).html'
_TESTS = [{
'url': 'http://www.letv.com/tv/46177.html',
'info_dict': {
'id': '46177',
'title': '美人天下',
'description': 'md5:395666ff41b44080396e59570dbac01c'
},
'playlist_count': 35
}]
def _real_extract(self, url):
playlist_id = self._match_id(url)
page = self._download_webpage(url, playlist_id)
media_urls = list(set(re.findall(
r'http://www.letv.com/ptv/vplay/\d+.html', page)))
entries = [self.url_result(media_url, ie='Letv')
for media_url in media_urls]
title = self._html_search_meta('keywords', page,
fatal=False).split('')[0]
description = self._html_search_meta('description', page, fatal=False)
return self.playlist_result(entries, playlist_id, playlist_title=title,
playlist_description=description)
class LetvPlaylistIE(LetvTvIE):
_VALID_URL = r'http://tv.letv.com/[a-z]+/(?P<id>[a-z]+)/index.s?html'
_TESTS = [{
'url': 'http://tv.letv.com/izt/wuzetian/index.html',
'info_dict': {
'id': 'wuzetian',
'title': '武媚娘传奇',
'description': 'md5:e12499475ab3d50219e5bba00b3cb248'
},
# This playlist contains some extra videos other than the drama itself
'playlist_mincount': 96
}, {
'url': 'http://tv.letv.com/pzt/lswjzzjc/index.shtml',
'info_dict': {
'id': 'lswjzzjc',
# The title should be "劲舞青春", but I can't find a simple way to
# determine the playlist title
'title': '乐视午间自制剧场',
'description': 'md5:b1eef244f45589a7b5b1af9ff25a4489'
},
'playlist_mincount': 7
}]

View File

@ -3,7 +3,6 @@ from __future__ import unicode_literals
import re import re
import json import json
from .subtitles import SubtitlesInfoExtractor
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import ( from ..compat import (
compat_str, compat_str,
@ -16,10 +15,10 @@ from ..utils import (
) )
class LyndaIE(SubtitlesInfoExtractor): class LyndaIE(InfoExtractor):
IE_NAME = 'lynda' IE_NAME = 'lynda'
IE_DESC = 'lynda.com videos' IE_DESC = 'lynda.com videos'
_VALID_URL = r'https?://www\.lynda\.com/[^/]+/[^/]+/\d+/(\d+)-\d\.html' _VALID_URL = r'https?://www\.lynda\.com/(?:[^/]+/[^/]+/\d+|player/embed)/(\d+)'
_LOGIN_URL = 'https://www.lynda.com/login/login.aspx' _LOGIN_URL = 'https://www.lynda.com/login/login.aspx'
_NETRC_MACHINE = 'lynda' _NETRC_MACHINE = 'lynda'
@ -28,7 +27,7 @@ class LyndaIE(SubtitlesInfoExtractor):
ACCOUNT_CREDENTIALS_HINT = 'Use --username and --password options to provide lynda.com account credentials.' ACCOUNT_CREDENTIALS_HINT = 'Use --username and --password options to provide lynda.com account credentials.'
_TEST = { _TESTS = [{
'url': 'http://www.lynda.com/Bootstrap-tutorials/Using-exercise-files/110885/114408-4.html', 'url': 'http://www.lynda.com/Bootstrap-tutorials/Using-exercise-files/110885/114408-4.html',
'md5': 'ecfc6862da89489161fb9cd5f5a6fac1', 'md5': 'ecfc6862da89489161fb9cd5f5a6fac1',
'info_dict': { 'info_dict': {
@ -37,7 +36,10 @@ class LyndaIE(SubtitlesInfoExtractor):
'title': 'Using the exercise files', 'title': 'Using the exercise files',
'duration': 68 'duration': 68
} }
} }, {
'url': 'https://www.lynda.com/player/embed/133770?tr=foo=1;bar=g;fizz=rt&fs=0',
'only_matching': True,
}]
def _real_initialize(self): def _real_initialize(self):
self._login() self._login()
@ -88,11 +90,7 @@ class LyndaIE(SubtitlesInfoExtractor):
self._check_formats(formats, video_id) self._check_formats(formats, video_id)
self._sort_formats(formats) self._sort_formats(formats)
if self._downloader.params.get('listsubtitles', False): subtitles = self.extract_subtitles(video_id, page)
self._list_available_subtitles(video_id, page)
return
subtitles = self._fix_subtitles(self.extract_subtitles(video_id, page))
return { return {
'id': video_id, 'id': video_id,
@ -144,15 +142,7 @@ class LyndaIE(SubtitlesInfoExtractor):
if re.search(self._SUCCESSFUL_LOGIN_REGEX, login_page) is None: if re.search(self._SUCCESSFUL_LOGIN_REGEX, login_page) is None:
raise ExtractorError('Unable to log in') raise ExtractorError('Unable to log in')
def _fix_subtitles(self, subtitles): def _fix_subtitles(self, subs):
if subtitles is None:
return subtitles # subtitles not requested
fixed_subtitles = {}
for k, v in subtitles.items():
subs = json.loads(v)
if len(subs) == 0:
continue
srt = '' srt = ''
for pos in range(0, len(subs) - 1): for pos in range(0, len(subs) - 1):
seq_current = subs[pos] seq_current = subs[pos]
@ -165,17 +155,18 @@ class LyndaIE(SubtitlesInfoExtractor):
continue continue
appear_time = m_current.group('timecode') appear_time = m_current.group('timecode')
disappear_time = m_next.group('timecode') disappear_time = m_next.group('timecode')
text = seq_current['Caption'] text = seq_current['Caption'].lstrip()
srt += '%s\r\n%s --> %s\r\n%s' % (str(pos), appear_time, disappear_time, text) srt += '%s\r\n%s --> %s\r\n%s' % (str(pos), appear_time, disappear_time, text)
if srt: if srt:
fixed_subtitles[k] = srt return srt
return fixed_subtitles
def _get_available_subtitles(self, video_id, webpage): def _get_subtitles(self, video_id, webpage):
url = 'http://www.lynda.com/ajax/player?videoId=%s&type=transcript' % video_id url = 'http://www.lynda.com/ajax/player?videoId=%s&type=transcript' % video_id
sub = self._download_webpage(url, None, False) subs = self._download_json(url, None, False)
sub_json = json.loads(sub) if subs:
return {'en': url} if len(sub_json) > 0 else {} return {'en': [{'ext': 'srt', 'data': self._fix_subtitles(subs)}]}
else:
return {}
class LyndaCourseIE(InfoExtractor): class LyndaCourseIE(InfoExtractor):

View File

@ -5,9 +5,6 @@ import json
from .common import InfoExtractor from .common import InfoExtractor
from .youtube import YoutubeIE from .youtube import YoutubeIE
from ..compat import (
compat_urlparse,
)
from ..utils import ( from ..utils import (
clean_html, clean_html,
ExtractorError, ExtractorError,
@ -108,7 +105,6 @@ class OCWMITIE(InfoExtractor):
'upload_date': '20121109', 'upload_date': '20121109',
'uploader_id': 'MIT', 'uploader_id': 'MIT',
'uploader': 'MIT OpenCourseWare', 'uploader': 'MIT OpenCourseWare',
# 'subtitles': 'http://ocw.mit.edu/courses/electrical-engineering-and-computer-science/6-041-probabilistic-systems-analysis-and-applied-probability-fall-2010/video-lectures/lecture-7-multiple-variables-expectations-independence/MIT6_041F11_lec07_300k.mp4.srt'
} }
}, },
{ {
@ -121,7 +117,6 @@ class OCWMITIE(InfoExtractor):
'uploader_id': 'MIT', 'uploader_id': 'MIT',
'uploader': 'MIT OpenCourseWare', 'uploader': 'MIT OpenCourseWare',
'description': 'This section contains lecture video excerpts, lecture notes, an interactive mathlet with supporting documents, and problem solving videos.', 'description': 'This section contains lecture video excerpts, lecture notes, an interactive mathlet with supporting documents, and problem solving videos.',
# 'subtitles': 'http://ocw.mit.edu//courses/mathematics/18-01sc-single-variable-calculus-fall-2010/ocw-18.01-f07-lec01_300k.SRT'
} }
} }
] ]
@ -140,7 +135,6 @@ class OCWMITIE(InfoExtractor):
metadata = re.sub(r'[\'"]', '', embed_chapter_media.group(1)) metadata = re.sub(r'[\'"]', '', embed_chapter_media.group(1))
metadata = re.split(r', ?', metadata) metadata = re.split(r', ?', metadata)
yt = metadata[1] yt = metadata[1]
subs = compat_urlparse.urljoin(self._BASE_URL, metadata[7])
else: else:
# search for call to ocw_embed_chapter_media(container_id, media_url, provider, page_url, image_url, captions_file) # search for call to ocw_embed_chapter_media(container_id, media_url, provider, page_url, image_url, captions_file)
embed_media = re.search(r'ocw_embed_media\((.+?)\)', webpage) embed_media = re.search(r'ocw_embed_media\((.+?)\)', webpage)
@ -148,7 +142,6 @@ class OCWMITIE(InfoExtractor):
metadata = re.sub(r'[\'"]', '', embed_media.group(1)) metadata = re.sub(r'[\'"]', '', embed_media.group(1))
metadata = re.split(r', ?', metadata) metadata = re.split(r', ?', metadata)
yt = metadata[1] yt = metadata[1]
subs = compat_urlparse.urljoin(self._BASE_URL, metadata[5])
else: else:
raise ExtractorError('Unable to find embedded YouTube video.') raise ExtractorError('Unable to find embedded YouTube video.')
video_id = YoutubeIE.extract_id(yt) video_id = YoutubeIE.extract_id(yt)
@ -159,7 +152,5 @@ class OCWMITIE(InfoExtractor):
'title': title, 'title': title,
'description': description, 'description': description,
'url': yt, 'url': yt,
'url_transparent'
'subtitles': subs,
'ie_key': 'Youtube', 'ie_key': 'Youtube',
} }

View File

@ -18,7 +18,7 @@ class MiTeleIE(InfoExtractor):
IE_NAME = 'mitele.es' IE_NAME = 'mitele.es'
_VALID_URL = r'http://www\.mitele\.es/[^/]+/[^/]+/[^/]+/(?P<id>[^/]+)/' _VALID_URL = r'http://www\.mitele\.es/[^/]+/[^/]+/[^/]+/(?P<id>[^/]+)/'
_TEST = { _TESTS = [{
'url': 'http://www.mitele.es/programas-tv/diario-de/la-redaccion/programa-144/', 'url': 'http://www.mitele.es/programas-tv/diario-de/la-redaccion/programa-144/',
'md5': '6a75fe9d0d3275bead0cb683c616fddb', 'md5': '6a75fe9d0d3275bead0cb683c616fddb',
'info_dict': { 'info_dict': {
@ -29,7 +29,7 @@ class MiTeleIE(InfoExtractor):
'display_id': 'programa-144', 'display_id': 'programa-144',
'duration': 2913, 'duration': 2913,
}, },
} }]
def _real_extract(self, url): def _real_extract(self, url):
episode = self._match_id(url) episode = self._match_id(url)

View File

@ -5,7 +5,7 @@ from ..utils import int_or_none
class MporaIE(InfoExtractor): class MporaIE(InfoExtractor):
_VALID_URL = r'https?://(www\.)?mpora\.(?:com|de)/videos/(?P<id>[^?#/]+)' _VALID_URL = r'https?://(?:www\.)?mpora\.(?:com|de)/videos/(?P<id>[^?#/]+)'
IE_NAME = 'MPORA' IE_NAME = 'MPORA'
_TEST = { _TEST = {
@ -25,7 +25,9 @@ class MporaIE(InfoExtractor):
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
data_json = self._search_regex( data_json = self._search_regex(
r"new FM\.Player\('[^']+',\s*(\{.*?)\).player;", webpage, 'json') [r"new FM\.Player\('[^']+',\s*(\{.*?)\).player;",
r"new\s+FM\.Kaltura\.Player\('[^']+'\s*,\s*({.+?})\);"],
webpage, 'json')
data = self._parse_json(data_json, video_id) data = self._parse_json(data_json, video_id)
uploader = data['info_overlay'].get('username') uploader = data['info_overlay'].get('username')

View File

@ -2,7 +2,7 @@ from __future__ import unicode_literals
import re import re
from .subtitles import SubtitlesInfoExtractor from .common import InfoExtractor
from ..compat import ( from ..compat import (
compat_urllib_parse, compat_urllib_parse,
compat_urllib_request, compat_urllib_request,
@ -23,7 +23,7 @@ def _media_xml_tag(tag):
return '{http://search.yahoo.com/mrss/}%s' % tag return '{http://search.yahoo.com/mrss/}%s' % tag
class MTVServicesInfoExtractor(SubtitlesInfoExtractor): class MTVServicesInfoExtractor(InfoExtractor):
_MOBILE_TEMPLATE = None _MOBILE_TEMPLATE = None
@staticmethod @staticmethod
@ -95,25 +95,15 @@ class MTVServicesInfoExtractor(SubtitlesInfoExtractor):
def _extract_subtitles(self, mdoc, mtvn_id): def _extract_subtitles(self, mdoc, mtvn_id):
subtitles = {} subtitles = {}
FORMATS = {
'scc': 'cea-608',
'eia-608': 'cea-608',
'xml': 'ttml',
}
subtitles_format = FORMATS.get(
self._downloader.params.get('subtitlesformat'), 'ttml')
for transcript in mdoc.findall('.//transcript'): for transcript in mdoc.findall('.//transcript'):
if transcript.get('kind') != 'captions': if transcript.get('kind') != 'captions':
continue continue
lang = transcript.get('srclang') lang = transcript.get('srclang')
for typographic in transcript.findall('./typographic'): subtitles[lang] = [{
captions_format = typographic.get('format') 'url': compat_str(typographic.get('src')),
if captions_format == subtitles_format: 'ext': typographic.get('format')
subtitles[lang] = compat_str(typographic.get('src')) } for typographic in transcript.findall('./typographic')]
break return subtitles
if self._downloader.params.get('listsubtitles', False):
self._list_available_subtitles(mtvn_id, subtitles)
return self.extract_subtitles(mtvn_id, subtitles)
def _get_video_info(self, itemdoc): def _get_video_info(self, itemdoc):
uri = itemdoc.find('guid').text uri = itemdoc.find('guid').text
@ -195,8 +185,6 @@ class MTVServicesInfoExtractor(SubtitlesInfoExtractor):
[r'data-mgid="(.*?)"', r'swfobject.embedSWF\(".*?(mgid:.*?)"'], [r'data-mgid="(.*?)"', r'swfobject.embedSWF\(".*?(mgid:.*?)"'],
webpage, 'mgid') webpage, 'mgid')
videos_info = self._get_videos_info(mgid) videos_info = self._get_videos_info(mgid)
if self._downloader.params.get('listsubtitles', False):
return
if self._downloader.params.get('joinparts'): if self._downloader.params.get('joinparts'):
show_name = self._html_search_regex( show_name = self._html_search_regex(
r'<h1.*?class="[^"]*title[^"]*".*?>(.*?)</h1>', r'<h1.*?class="[^"]*title[^"]*".*?>(.*?)</h1>',

View File

@ -3,17 +3,13 @@ from __future__ import unicode_literals
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import (
parse_duration,
unified_strdate,
)
class MusicVaultIE(InfoExtractor): class MusicVaultIE(InfoExtractor):
_VALID_URL = r'https?://www\.musicvault\.com/(?P<uploader_id>[^/?#]*)/video/(?P<display_id>[^/?#]*)_(?P<id>[0-9]+)\.html' _VALID_URL = r'https?://www\.musicvault\.com/(?P<uploader_id>[^/?#]*)/video/(?P<display_id>[^/?#]*)_(?P<id>[0-9]+)\.html'
_TEST = { _TEST = {
'url': 'http://www.musicvault.com/the-allman-brothers-band/video/straight-from-the-heart_1010863.html', 'url': 'http://www.musicvault.com/the-allman-brothers-band/video/straight-from-the-heart_1010863.html',
'md5': '2cdbb3ae75f7fb3519821507d2fb3c15', 'md5': '3adcbdb3dcc02d647539e53f284ba171',
'info_dict': { 'info_dict': {
'id': '1010863', 'id': '1010863',
'ext': 'mp4', 'ext': 'mp4',
@ -22,9 +18,10 @@ class MusicVaultIE(InfoExtractor):
'duration': 244, 'duration': 244,
'uploader': 'The Allman Brothers Band', 'uploader': 'The Allman Brothers Band',
'thumbnail': 're:^https?://.*/thumbnail/.*', 'thumbnail': 're:^https?://.*/thumbnail/.*',
'upload_date': '19811216', 'upload_date': '20131219',
'location': 'Capitol Theatre (Passaic, NJ)', 'location': 'Capitol Theatre (Passaic, NJ)',
'description': 'Listen to The Allman Brothers Band perform Straight from the Heart at Capitol Theatre (Passaic, NJ) on Dec 16, 1981', 'description': 'Listen to The Allman Brothers Band perform Straight from the Heart at Capitol Theatre (Passaic, NJ) on Dec 16, 1981',
'timestamp': int,
} }
} }
@ -43,34 +40,24 @@ class MusicVaultIE(InfoExtractor):
r'<h1.*?>(.*?)</h1>', data_div, 'uploader', fatal=False) r'<h1.*?>(.*?)</h1>', data_div, 'uploader', fatal=False)
title = self._html_search_regex( title = self._html_search_regex(
r'<h2.*?>(.*?)</h2>', data_div, 'title') r'<h2.*?>(.*?)</h2>', data_div, 'title')
upload_date = unified_strdate(self._html_search_regex(
r'<h3.*?>(.*?)</h3>', data_div, 'uploader', fatal=False))
location = self._html_search_regex( location = self._html_search_regex(
r'<h4.*?>(.*?)</h4>', data_div, 'location', fatal=False) r'<h4.*?>(.*?)</h4>', data_div, 'location', fatal=False)
duration = parse_duration(self._html_search_meta('duration', webpage))
VIDEO_URL_TEMPLATE = 'http://cdnapi.kaltura.com/p/%(uid)s/sp/%(wid)s/playManifest/entryId/%(entry_id)s/format/url/protocol/http'
kaltura_id = self._search_regex( kaltura_id = self._search_regex(
r'<div id="video-detail-player" data-kaltura-id="([^"]+)"', r'<div id="video-detail-player" data-kaltura-id="([^"]+)"',
webpage, 'kaltura ID') webpage, 'kaltura ID')
video_url = VIDEO_URL_TEMPLATE % { wid = self._search_regex(r'/wid/_([0-9]+)/', webpage, 'wid')
'entry_id': kaltura_id,
'wid': self._search_regex(r'/wid/_([0-9]+)/', webpage, 'wid'),
'uid': self._search_regex(r'uiconf_id/([0-9]+)/', webpage, 'uid'),
}
return { return {
'id': mobj.group('id'), 'id': mobj.group('id'),
'url': video_url, '_type': 'url_transparent',
'ext': 'mp4', 'url': 'kaltura:%s:%s' % (wid, kaltura_id),
'ie_key': 'Kaltura',
'display_id': display_id, 'display_id': display_id,
'uploader_id': mobj.group('uploader_id'), 'uploader_id': mobj.group('uploader_id'),
'thumbnail': thumbnail, 'thumbnail': thumbnail,
'description': self._html_search_meta('description', webpage), 'description': self._html_search_meta('description', webpage),
'upload_date': upload_date,
'location': location, 'location': location,
'title': title, 'title': title,
'uploader': uploader, 'uploader': uploader,
'duration': duration,
} }

View File

@ -1,6 +1,5 @@
from __future__ import unicode_literals from __future__ import unicode_literals
from .subtitles import SubtitlesInfoExtractor
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
fix_xml_ampersands, fix_xml_ampersands,
@ -12,7 +11,7 @@ from ..utils import (
) )
class NPOBaseIE(SubtitlesInfoExtractor): class NPOBaseIE(InfoExtractor):
def _get_token(self, video_id): def _get_token(self, video_id):
token_page = self._download_webpage( token_page = self._download_webpage(
'http://ida.omroep.nl/npoplayer/i.js', 'http://ida.omroep.nl/npoplayer/i.js',
@ -164,13 +163,10 @@ class NPOIE(NPOBaseIE):
subtitles = {} subtitles = {}
if metadata.get('tt888') == 'ja': if metadata.get('tt888') == 'ja':
subtitles['nl'] = 'http://e.omroep.nl/tt888/%s' % video_id subtitles['nl'] = [{
'ext': 'vtt',
if self._downloader.params.get('listsubtitles', False): 'url': 'http://e.omroep.nl/tt888/%s' % video_id,
self._list_available_subtitles(video_id, subtitles) }]
return
subtitles = self.extract_subtitles(video_id, subtitles)
return { return {
'id': video_id, 'id': video_id,

View File

@ -4,13 +4,13 @@ from __future__ import unicode_literals
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_str
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
float_or_none, float_or_none,
parse_duration, parse_duration,
unified_strdate, unified_strdate,
) )
from .subtitles import SubtitlesInfoExtractor
class NRKIE(InfoExtractor): class NRKIE(InfoExtractor):
@ -73,7 +73,7 @@ class NRKIE(InfoExtractor):
} }
class NRKTVIE(SubtitlesInfoExtractor): class NRKTVIE(InfoExtractor):
_VALID_URL = r'(?P<baseurl>http://tv\.nrk(?:super)?\.no/)(?:serie/[^/]+|program)/(?P<id>[a-zA-Z]{4}\d{8})(?:/\d{2}-\d{2}-\d{4})?(?:#del=(?P<part_id>\d+))?' _VALID_URL = r'(?P<baseurl>http://tv\.nrk(?:super)?\.no/)(?:serie/[^/]+|program)/(?P<id>[a-zA-Z]{4}\d{8})(?:/\d{2}-\d{2}-\d{4})?(?:#del=(?P<part_id>\d+))?'
_TESTS = [ _TESTS = [
@ -156,10 +156,12 @@ class NRKTVIE(SubtitlesInfoExtractor):
if self._downloader.params.get('verbose', False): if self._downloader.params.get('verbose', False):
self.to_screen('[debug] %s' % txt) self.to_screen('[debug] %s' % txt)
def _extract_captions(self, subtitlesurl, video_id, baseurl): def _get_subtitles(self, subtitlesurl, video_id, baseurl):
url = "%s%s" % (baseurl, subtitlesurl) url = "%s%s" % (baseurl, subtitlesurl)
self._debug_print('%s: Subtitle url: %s' % (video_id, url)) self._debug_print('%s: Subtitle url: %s' % (video_id, url))
captions = self._download_xml(url, video_id, 'Downloading subtitles') captions = self._download_xml(
url, video_id, 'Downloading subtitles',
transform_source=lambda s: s.replace(r'<br />', '\r\n'))
lang = captions.get('lang', 'no') lang = captions.get('lang', 'no')
ps = captions.findall('./{0}body/{0}div/{0}p'.format('{http://www.w3.org/ns/ttml}')) ps = captions.findall('./{0}body/{0}div/{0}p'.format('{http://www.w3.org/ns/ttml}'))
srt = '' srt = ''
@ -168,9 +170,11 @@ class NRKTVIE(SubtitlesInfoExtractor):
duration = parse_duration(p.get('dur')) duration = parse_duration(p.get('dur'))
starttime = self._seconds2str(begin) starttime = self._seconds2str(begin)
endtime = self._seconds2str(begin + duration) endtime = self._seconds2str(begin + duration)
text = '\n'.join(p.itertext()) srt += '%s\r\n%s --> %s\r\n%s\r\n\r\n' % (compat_str(pos), starttime, endtime, p.text)
srt += '%s\r\n%s --> %s\r\n%s\r\n\r\n' % (str(pos), starttime, endtime, text) return {lang: [
return {lang: srt} {'ext': 'ttml', 'url': url},
{'ext': 'srt', 'data': srt},
]}
def _extract_f4m(self, manifest_url, video_id): def _extract_f4m(self, manifest_url, video_id):
return self._extract_f4m_formats(manifest_url + '?hdcore=3.1.1&plugin=aasp-3.1.1.69.124', video_id) return self._extract_f4m_formats(manifest_url + '?hdcore=3.1.1&plugin=aasp-3.1.1.69.124', video_id)
@ -243,10 +247,7 @@ class NRKTVIE(SubtitlesInfoExtractor):
webpage, 'subtitle URL', default=None) webpage, 'subtitle URL', default=None)
subtitles = None subtitles = None
if subtitles_url: if subtitles_url:
subtitles = self._extract_captions(subtitles_url, video_id, baseurl) subtitles = self.extract_subtitles(subtitles_url, video_id, baseurl)
if self._downloader.params.get('listsubtitles', False):
self._list_available_subtitles(video_id, subtitles)
return
return { return {
'id': video_id, 'id': video_id,

View File

@ -0,0 +1,85 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
unified_strdate,
int_or_none,
qualities,
)
class OdnoklassnikiIE(InfoExtractor):
_VALID_URL = r'https?://(?:odnoklassniki|ok)\.ru/(?:video|web-api/video/moviePlayer)/(?P<id>\d+)'
_TESTS = [{
'url': 'http://ok.ru/video/20079905452',
'md5': '8e24ad2da6f387948e7a7d44eb8668fe',
'info_dict': {
'id': '20079905452',
'ext': 'mp4',
'title': 'Культура меняет нас (прекрасный ролик!))',
'duration': 100,
'upload_date': '20141207',
'uploader_id': '330537914540',
'uploader': 'Виталий Добровольский',
'like_count': int,
'age_limit': 0,
},
}, {
'url': 'http://ok.ru/web-api/video/moviePlayer/20079905452',
'only_matching': True,
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
player = self._parse_json(
self._search_regex(
r"OKVideo\.start\(({.+?})\s*,\s*'VideoAutoplay_player'", webpage, 'player'),
video_id)
metadata = self._parse_json(player['flashvars']['metadata'], video_id)
movie = metadata['movie']
title = movie['title']
thumbnail = movie.get('poster')
duration = int_or_none(movie.get('duration'))
author = metadata.get('author', {})
uploader_id = author.get('id')
uploader = author.get('name')
upload_date = unified_strdate(self._html_search_meta(
'ya:ovs:upload_date', webpage, 'upload date'))
age_limit = None
adult = self._html_search_meta(
'ya:ovs:adult', webpage, 'age limit')
if adult:
age_limit = 18 if adult == 'true' else 0
like_count = int_or_none(metadata.get('likeCount'))
quality = qualities(('mobile', 'lowest', 'low', 'sd', 'hd'))
formats = [{
'url': f['url'],
'ext': 'mp4',
'format_id': f['name'],
'quality': quality(f['name']),
} for f in metadata['videos']]
return {
'id': video_id,
'title': title,
'thumbnail': thumbnail,
'duration': duration,
'upload_date': upload_date,
'uploader': uploader,
'uploader_id': uploader_id,
'like_count': like_count,
'age_limit': age_limit,
'formats': formats,
}

View File

@ -0,0 +1,88 @@
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
ExtractorError,
unified_strdate,
int_or_none,
)
class Puls4IE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?puls4\.com/video/[^/]+/play/(?P<id>[0-9]+)'
_TESTS = [{
'url': 'http://www.puls4.com/video/pro-und-contra/play/2716816',
'md5': '49f6a6629747eeec43cef6a46b5df81d',
'info_dict': {
'id': '2716816',
'ext': 'mp4',
'title': 'Pro und Contra vom 23.02.2015',
'description': 'md5:293e44634d9477a67122489994675db6',
'duration': 2989,
'upload_date': '20150224',
'uploader': 'PULS_4',
},
'skip': 'Only works from Germany',
}, {
'url': 'http://www.puls4.com/video/kult-spielfilme/play/1298106',
'md5': '6a48316c8903ece8dab9b9a7bf7a59ec',
'info_dict': {
'id': '1298106',
'ext': 'mp4',
'title': 'Lucky Fritz',
},
'skip': 'Only works from Germany',
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
error_message = self._html_search_regex(
r'<div class="message-error">(.+?)</div>',
webpage, 'error message', default=None)
if error_message:
raise ExtractorError(
'%s returned error: %s' % (self.IE_NAME, error_message), expected=True)
real_url = self._html_search_regex(
r'\"fsk-button\".+?href=\"([^"]+)',
webpage, 'fsk_button', default=None)
if real_url:
webpage = self._download_webpage(real_url, video_id)
player = self._search_regex(
r'p4_video_player(?:_iframe)?\("video_\d+_container"\s*,(.+?)\);\s*\}',
webpage, 'player')
player_json = self._parse_json(
'[%s]' % player, video_id,
transform_source=lambda s: s.replace('undefined,', ''))
formats = None
result = None
for v in player_json:
if isinstance(v, list) and not formats:
formats = [{
'url': f['url'],
'format': 'hd' if f.get('hd') else 'sd',
'width': int_or_none(f.get('size_x')),
'height': int_or_none(f.get('size_y')),
'tbr': int_or_none(f.get('bitrate')),
} for f in v]
self._sort_formats(formats)
elif isinstance(v, dict) and not result:
result = {
'id': video_id,
'title': v['videopartname'].strip(),
'description': v.get('videotitle'),
'duration': int_or_none(v.get('videoduration') or v.get('episodeduration')),
'upload_date': unified_strdate(v.get('clipreleasetime')),
'uploader': v.get('channel'),
}
result['formats'] = formats
return result

View File

@ -0,0 +1,88 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
js_to_json,
unescapeHTML,
int_or_none,
)
class R7IE(InfoExtractor):
_VALID_URL = r'''(?x)https?://
(?:
(?:[a-zA-Z]+)\.r7\.com(?:/[^/]+)+/idmedia/|
noticias\.r7\.com(?:/[^/]+)+/[^/]+-|
player\.r7\.com/video/i/
)
(?P<id>[\da-f]{24})
'''
_TESTS = [{
'url': 'http://videos.r7.com/policiais-humilham-suspeito-a-beira-da-morte-morre-com-dignidade-/idmedia/54e7050b0cf2ff57e0279389.html',
'md5': '403c4e393617e8e8ddc748978ee8efde',
'info_dict': {
'id': '54e7050b0cf2ff57e0279389',
'ext': 'mp4',
'title': 'Policiais humilham suspeito à beira da morte: "Morre com dignidade"',
'thumbnail': 're:^https?://.*\.jpg$',
'duration': 98,
'like_count': int,
'view_count': int,
},
}, {
'url': 'http://esportes.r7.com/videos/cigano-manda-recado-aos-fas/idmedia/4e176727b51a048ee6646a1b.html',
'only_matching': True,
}, {
'url': 'http://noticias.r7.com/record-news/video/representante-do-instituto-sou-da-paz-fala-sobre-fim-do-estatuto-do-desarmamento-5480fc580cf2285b117f438d/',
'only_matching': True,
}, {
'url': 'http://player.r7.com/video/i/54e7050b0cf2ff57e0279389?play=true&video=http://vsh.r7.com/54e7050b0cf2ff57e0279389/ER7_RE_BG_MORTE_JOVENS_570kbps_2015-02-2009f17818-cc82-4c8f-86dc-89a66934e633-ATOS_copy.mp4&linkCallback=http://videos.r7.com/policiais-humilham-suspeito-a-beira-da-morte-morre-com-dignidade-/idmedia/54e7050b0cf2ff57e0279389.html&thumbnail=http://vtb.r7.com/ER7_RE_BG_MORTE_JOVENS_570kbps_2015-02-2009f17818-cc82-4c8f-86dc-89a66934e633-thumb.jpg&idCategory=192&share=true&layout=full&full=true',
'only_matching': True,
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(
'http://player.r7.com/video/i/%s' % video_id, video_id)
item = self._parse_json(js_to_json(self._search_regex(
r'(?s)var\s+item\s*=\s*({.+?});', webpage, 'player')), video_id)
title = unescapeHTML(item['title'])
thumbnail = item.get('init', {}).get('thumbUri')
duration = None
statistics = item.get('statistics', {})
like_count = int_or_none(statistics.get('likes'))
view_count = int_or_none(statistics.get('views'))
formats = []
for format_key, format_dict in item['playlist'][0].items():
src = format_dict.get('src')
if not src:
continue
format_id = format_dict.get('format') or format_key
if duration is None:
duration = format_dict.get('duration')
if '.f4m' in src:
formats.extend(self._extract_f4m_formats(src, video_id, preference=-1))
elif src.endswith('.m3u8'):
formats.extend(self._extract_m3u8_formats(src, video_id, 'mp4', preference=-2))
else:
formats.append({
'url': src,
'format_id': format_id,
})
self._sort_formats(formats)
return {
'id': video_id,
'title': title,
'thumbnail': thumbnail,
'duration': duration,
'like_count': like_count,
'view_count': view_count,
'formats': formats,
}

View File

@ -2,7 +2,7 @@ from __future__ import unicode_literals
import re import re
from .subtitles import SubtitlesInfoExtractor from .common import InfoExtractor
from ..compat import ( from ..compat import (
compat_urllib_parse, compat_urllib_parse,
) )
@ -12,7 +12,7 @@ from ..utils import (
) )
class RaiIE(SubtitlesInfoExtractor): class RaiIE(InfoExtractor):
_VALID_URL = r'(?P<url>http://(?:.+?\.)?(?:rai\.it|rai\.tv|rainews\.it)/dl/.+?-(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})(?:-.+?)?\.html)' _VALID_URL = r'(?P<url>http://(?:.+?\.)?(?:rai\.it|rai\.tv|rainews\.it)/dl/.+?-(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})(?:-.+?)?\.html)'
_TESTS = [ _TESTS = [
{ {
@ -89,15 +89,7 @@ class RaiIE(SubtitlesInfoExtractor):
'ext': 'mp4', 'ext': 'mp4',
}) })
if self._downloader.params.get('listsubtitles', False): subtitles = self.extract_subtitles(video_id, url)
page = self._download_webpage(url, video_id)
self._list_available_subtitles(video_id, page)
return
subtitles = {}
if self._have_to_download_any_subtitles:
page = self._download_webpage(url, video_id)
subtitles = self.extract_subtitles(video_id, page)
return { return {
'id': video_id, 'id': video_id,
@ -111,7 +103,8 @@ class RaiIE(SubtitlesInfoExtractor):
'subtitles': subtitles, 'subtitles': subtitles,
} }
def _get_available_subtitles(self, video_id, webpage): def _get_subtitles(self, video_id, url):
webpage = self._download_webpage(url, video_id)
subtitles = {} subtitles = {}
m = re.search(r'<meta name="closedcaption" content="(?P<captions>[^"]+)"', webpage) m = re.search(r'<meta name="closedcaption" content="(?P<captions>[^"]+)"', webpage)
if m: if m:
@ -120,5 +113,8 @@ class RaiIE(SubtitlesInfoExtractor):
SRT_EXT = '.srt' SRT_EXT = '.srt'
if captions.endswith(STL_EXT): if captions.endswith(STL_EXT):
captions = captions[:-len(STL_EXT)] + SRT_EXT captions = captions[:-len(STL_EXT)] + SRT_EXT
subtitles['it'] = 'http://www.rai.tv%s' % compat_urllib_parse.quote(captions) subtitles['it'] = [{
'ext': 'srt',
'url': 'http://www.rai.tv%s' % compat_urllib_parse.quote(captions),
}]
return subtitles return subtitles

View File

@ -146,7 +146,7 @@ class RTLnowIE(InfoExtractor):
mobj = re.search(r'.*/(?P<hoster>[^/]+)/videos/(?P<play_path>.+)\.f4m', filename.text) mobj = re.search(r'.*/(?P<hoster>[^/]+)/videos/(?P<play_path>.+)\.f4m', filename.text)
if mobj: if mobj:
fmt = { fmt = {
'url': 'rtmpe://fmspay-fra2.rtl.de/' + mobj.group('hoster'), 'url': 'rtmpe://fms.rtl.de/' + mobj.group('hoster'),
'play_path': 'mp4:' + mobj.group('play_path'), 'play_path': 'mp4:' + mobj.group('play_path'),
'page_url': url, 'page_url': url,
'player_url': video_page_url + 'includes/vodplayer.swf', 'player_url': video_page_url + 'includes/vodplayer.swf',

View File

@ -6,9 +6,11 @@ import re
import time import time
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_urlparse
from ..utils import ( from ..utils import (
struct_unpack, float_or_none,
remove_end, remove_end,
struct_unpack,
) )
@ -66,6 +68,7 @@ class RTVEALaCartaIE(InfoExtractor):
'id': '2491869', 'id': '2491869',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Balonmano - Swiss Cup masculina. Final: España-Suecia', 'title': 'Balonmano - Swiss Cup masculina. Final: España-Suecia',
'duration': 5024.566,
}, },
}, { }, {
'note': 'Live stream', 'note': 'Live stream',
@ -96,12 +99,14 @@ class RTVEALaCartaIE(InfoExtractor):
).replace('.net.rtve', '.multimedia.cdn.rtve') ).replace('.net.rtve', '.multimedia.cdn.rtve')
video_path = self._download_webpage( video_path = self._download_webpage(
auth_url, video_id, 'Getting video url') auth_url, video_id, 'Getting video url')
# Use mvod.akcdn instead of flash.akamaihd.multimedia.cdn to get # Use mvod1.akcdn instead of flash.akamaihd.multimedia.cdn to get
# the right Content-Length header and the mp4 format # the right Content-Length header and the mp4 format
video_url = ( video_url = compat_urlparse.urljoin(
'http://mvod.akcdn.rtve.es/{0}&v=2.6.8' 'http://mvod1.akcdn.rtve.es/', video_path)
'&fp=MAC%2016,0,0,296&r=MRUGG&g=OEOJWFXNFGCP'.format(video_path)
) subtitles = None
if info.get('sbtFile') is not None:
subtitles = self.extract_subtitles(video_id, info['sbtFile'])
return { return {
'id': video_id, 'id': video_id,
@ -109,8 +114,18 @@ class RTVEALaCartaIE(InfoExtractor):
'url': video_url, 'url': video_url,
'thumbnail': info.get('image'), 'thumbnail': info.get('image'),
'page_url': url, 'page_url': url,
'subtitles': subtitles,
'duration': float_or_none(info.get('duration'), scale=1000),
} }
def _get_subtitles(self, video_id, sub_file):
subs = self._download_json(
sub_file + '.json', video_id,
'Downloading subtitles info')['page']['items']
return dict(
(s['lang'], [{'ext': 'vtt', 'url': s['src']}])
for s in subs)
class RTVELiveIE(InfoExtractor): class RTVELiveIE(InfoExtractor):
IE_NAME = 'rtve.es:live' IE_NAME = 'rtve.es:live'

View File

@ -7,6 +7,7 @@ from .common import InfoExtractor
class SoundgasmIE(InfoExtractor): class SoundgasmIE(InfoExtractor):
IE_NAME = 'soundgasm'
_VALID_URL = r'https?://(?:www\.)?soundgasm\.net/u/(?P<user>[0-9a-zA-Z_\-]+)/(?P<title>[0-9a-zA-Z_\-]+)' _VALID_URL = r'https?://(?:www\.)?soundgasm\.net/u/(?P<user>[0-9a-zA-Z_\-]+)/(?P<title>[0-9a-zA-Z_\-]+)'
_TEST = { _TEST = {
'url': 'http://soundgasm.net/u/ytdl/Piano-sample', 'url': 'http://soundgasm.net/u/ytdl/Piano-sample',
@ -38,3 +39,26 @@ class SoundgasmIE(InfoExtractor):
'title': audio_title, 'title': audio_title,
'description': description 'description': description
} }
class SoundgasmProfileIE(InfoExtractor):
IE_NAME = 'soundgasm:profile'
_VALID_URL = r'https?://(?:www\.)?soundgasm\.net/u/(?P<id>[^/]+)/?(?:\#.*)?$'
_TEST = {
'url': 'http://soundgasm.net/u/ytdl',
'info_dict': {
'id': 'ytdl',
},
'playlist_count': 1,
}
def _real_extract(self, url):
profile_id = self._match_id(url)
webpage = self._download_webpage(url, profile_id)
entries = [
self.url_result(audio_url, 'Soundgasm')
for audio_url in re.findall(r'href="([^"]+/u/%s/[^"]+)' % profile_id, webpage)]
return self.playlist_result(entries, profile_id)

View File

@ -1,99 +0,0 @@
from __future__ import unicode_literals
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
ExtractorError,
)
class SubtitlesInfoExtractor(InfoExtractor):
@property
def _have_to_download_any_subtitles(self):
return any([self._downloader.params.get('writesubtitles', False),
self._downloader.params.get('writeautomaticsub')])
def _list_available_subtitles(self, video_id, webpage):
""" outputs the available subtitles for the video """
sub_lang_list = self._get_available_subtitles(video_id, webpage)
auto_captions_list = self._get_available_automatic_caption(video_id, webpage)
sub_lang = ",".join(list(sub_lang_list.keys()))
self.to_screen('%s: Available subtitles for video: %s' %
(video_id, sub_lang))
auto_lang = ",".join(auto_captions_list.keys())
self.to_screen('%s: Available automatic captions for video: %s' %
(video_id, auto_lang))
def extract_subtitles(self, video_id, webpage):
"""
returns {sub_lang: sub} ,{} if subtitles not found or None if the
subtitles aren't requested.
"""
if not self._have_to_download_any_subtitles:
return None
available_subs_list = {}
if self._downloader.params.get('writeautomaticsub', False):
available_subs_list.update(self._get_available_automatic_caption(video_id, webpage))
if self._downloader.params.get('writesubtitles', False):
available_subs_list.update(self._get_available_subtitles(video_id, webpage))
if not available_subs_list: # error, it didn't get the available subtitles
return {}
if self._downloader.params.get('allsubtitles', False):
sub_lang_list = available_subs_list
else:
if self._downloader.params.get('subtitleslangs', False):
requested_langs = self._downloader.params.get('subtitleslangs')
elif 'en' in available_subs_list:
requested_langs = ['en']
else:
requested_langs = [list(available_subs_list.keys())[0]]
sub_lang_list = {}
for sub_lang in requested_langs:
if sub_lang not in available_subs_list:
self._downloader.report_warning('no closed captions found in the specified language "%s"' % sub_lang)
continue
sub_lang_list[sub_lang] = available_subs_list[sub_lang]
subtitles = {}
for sub_lang, url in sub_lang_list.items():
subtitle = self._request_subtitle_url(sub_lang, url)
if subtitle:
subtitles[sub_lang] = subtitle
return subtitles
def _download_subtitle_url(self, sub_lang, url):
return self._download_webpage(url, None, note=False)
def _request_subtitle_url(self, sub_lang, url):
""" makes the http request for the subtitle """
try:
sub = self._download_subtitle_url(sub_lang, url)
except ExtractorError as err:
self._downloader.report_warning('unable to download video subtitles for %s: %s' % (sub_lang, compat_str(err)))
return
if not sub:
self._downloader.report_warning('Did not fetch video subtitles')
return
return sub
def _get_available_subtitles(self, video_id, webpage):
"""
returns {sub_lang: url} or {} if not available
Must be redefined by the subclasses
"""
# By default, allow implementations to simply pass in the result
assert isinstance(webpage, dict), \
'_get_available_subtitles not implemented'
return webpage
def _get_available_automatic_caption(self, video_id, webpage):
"""
returns {sub_lang: url} or {} if not available
Must be redefined by the subclasses that support automatic captions,
otherwise it will return {}
"""
self._downloader.report_warning('Automatic Captions not supported by this server')
return {}

View File

@ -1,6 +1,8 @@
# coding: utf-8 # coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
determine_ext, determine_ext,
@ -8,23 +10,40 @@ from ..utils import (
class SVTPlayIE(InfoExtractor): class SVTPlayIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?svtplay\.se/video/(?P<id>[0-9]+)' IE_DESC = 'SVT Play and Öppet arkiv'
_TEST = { _VALID_URL = r'https?://(?:www\.)?(?P<host>svtplay|oppetarkiv)\.se/video/(?P<id>[0-9]+)'
_TESTS = [{
'url': 'http://www.svtplay.se/video/2609989/sm-veckan/sm-veckan-rally-final-sasong-1-sm-veckan-rally-final', 'url': 'http://www.svtplay.se/video/2609989/sm-veckan/sm-veckan-rally-final-sasong-1-sm-veckan-rally-final',
'md5': 'f4a184968bc9c802a9b41316657aaa80', 'md5': 'ade3def0643fa1c40587a422f98edfd9',
'info_dict': { 'info_dict': {
'id': '2609989', 'id': '2609989',
'ext': 'mp4', 'ext': 'flv',
'title': 'SM veckan vinter, Örebro - Rally, final', 'title': 'SM veckan vinter, Örebro - Rally, final',
'duration': 4500, 'duration': 4500,
'thumbnail': 're:^https?://.*[\.-]jpg$', 'thumbnail': 're:^https?://.*[\.-]jpg$',
'age_limit': 0,
}, },
} }, {
'url': 'http://www.oppetarkiv.se/video/1058509/rederiet-sasong-1-avsnitt-1-av-318',
'md5': 'c3101a17ce9634f4c1f9800f0746c187',
'info_dict': {
'id': '1058509',
'ext': 'flv',
'title': 'Farlig kryssning',
'duration': 2566,
'thumbnail': 're:^https?://.*[\.-]jpg$',
'age_limit': 0,
},
'skip': 'Only works from Sweden',
}]
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
host = mobj.group('host')
info = self._download_json( info = self._download_json(
'http://www.svtplay.se/video/%s?output=json' % video_id, video_id) 'http://www.%s.se/video/%s?output=json' % (host, video_id), video_id)
title = info['context']['title'] title = info['context']['title']
thumbnail = info['context'].get('thumbnailImage') thumbnail = info['context'].get('thumbnailImage')
@ -33,11 +52,16 @@ class SVTPlayIE(InfoExtractor):
formats = [] formats = []
for vr in video_info['videoReferences']: for vr in video_info['videoReferences']:
vurl = vr['url'] vurl = vr['url']
if determine_ext(vurl) == 'm3u8': ext = determine_ext(vurl)
if ext == 'm3u8':
formats.extend(self._extract_m3u8_formats( formats.extend(self._extract_m3u8_formats(
vurl, video_id, vurl, video_id,
ext='mp4', entry_protocol='m3u8_native', ext='mp4', entry_protocol='m3u8_native',
m3u8_id=vr.get('playerType'))) m3u8_id=vr.get('playerType')))
elif ext == 'f4m':
formats.extend(self._extract_f4m_formats(
vurl + '?hdcore=3.3.0', video_id,
f4m_id=vr.get('playerType')))
else: else:
formats.append({ formats.append({
'format_id': vr.get('playerType'), 'format_id': vr.get('playerType'),
@ -46,6 +70,7 @@ class SVTPlayIE(InfoExtractor):
self._sort_formats(formats) self._sort_formats(formats)
duration = video_info.get('materialLength') duration = video_info.get('materialLength')
age_limit = 18 if video_info.get('inappropriateForChildren') else 0
return { return {
'id': video_id, 'id': video_id,
@ -53,4 +78,5 @@ class SVTPlayIE(InfoExtractor):
'formats': formats, 'formats': formats,
'thumbnail': thumbnail, 'thumbnail': thumbnail,
'duration': duration, 'duration': duration,
'age_limit': age_limit,
} }

View File

@ -1,8 +1,10 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import base64
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import qualities
class TeamcocoIE(InfoExtractor): class TeamcocoIE(InfoExtractor):
@ -24,8 +26,8 @@ class TeamcocoIE(InfoExtractor):
'info_dict': { 'info_dict': {
'id': '19705', 'id': '19705',
'ext': 'mp4', 'ext': 'mp4',
"description": "Louis C.K. got starstruck by George W. Bush, so what? Part one.", 'description': 'Louis C.K. got starstruck by George W. Bush, so what? Part one.',
"title": "Louis C.K. Interview Pt. 1 11/3/11", 'title': 'Louis C.K. Interview Pt. 1 11/3/11',
'age_limit': 0, 'age_limit': 0,
} }
} }
@ -42,42 +44,39 @@ class TeamcocoIE(InfoExtractor):
display_id = mobj.group('display_id') display_id = mobj.group('display_id')
webpage = self._download_webpage(url, display_id) webpage = self._download_webpage(url, display_id)
video_id = mobj.group("video_id") video_id = mobj.group('video_id')
if not video_id: if not video_id:
video_id = self._html_search_regex( video_id = self._html_search_regex(
self._VIDEO_ID_REGEXES, webpage, 'video id') self._VIDEO_ID_REGEXES, webpage, 'video id')
data_url = 'http://teamcoco.com/cvp/2.0/%s.xml' % video_id embed_url = 'http://teamcoco.com/embed/v/%s' % video_id
data = self._download_xml( embed = self._download_webpage(
data_url, display_id, 'Downloading data webpage') embed_url, video_id, 'Downloading embed page')
encoded_data = self._search_regex(
r'"preload"\s*:\s*"([^"]+)"', embed, 'encoded data')
data = self._parse_json(
base64.b64decode(encoded_data.encode('ascii')).decode('utf-8'), video_id)
qualities = ['500k', '480p', '1000k', '720p', '1080p']
formats = [] formats = []
for filed in data.findall('files/file'): get_quality = qualities(['500k', '480p', '1000k', '720p', '1080p'])
if filed.attrib.get('playmode') == 'all': for filed in data['files']:
# it just duplicates one of the entries m_format = re.search(r'(\d+(k|p))\.mp4', filed['url'])
break
file_url = filed.text
m_format = re.search(r'(\d+(k|p))\.mp4', file_url)
if m_format is not None: if m_format is not None:
format_id = m_format.group(1) format_id = m_format.group(1)
else: else:
format_id = filed.attrib['bitrate'] format_id = filed['bitrate']
tbr = ( tbr = (
int(filed.attrib['bitrate']) int(filed['bitrate'])
if filed.attrib['bitrate'].isdigit() if filed['bitrate'].isdigit()
else None) else None)
try:
quality = qualities.index(format_id)
except ValueError:
quality = -1
formats.append({ formats.append({
'url': file_url, 'url': filed['url'],
'ext': 'mp4', 'ext': 'mp4',
'tbr': tbr, 'tbr': tbr,
'format_id': format_id, 'format_id': format_id,
'quality': quality, 'quality': get_quality(format_id),
}) })
self._sort_formats(formats) self._sort_formats(formats)
@ -86,8 +85,8 @@ class TeamcocoIE(InfoExtractor):
'id': video_id, 'id': video_id,
'display_id': display_id, 'display_id': display_id,
'formats': formats, 'formats': formats,
'title': self._og_search_title(webpage), 'title': data['title'],
'thumbnail': self._og_search_thumbnail(webpage), 'thumbnail': data.get('thumb', {}).get('href'),
'description': self._og_search_description(webpage), 'description': data.get('teaser'),
'age_limit': self._family_friendly_search(webpage), 'age_limit': self._family_friendly_search(webpage),
} }

View File

@ -3,14 +3,14 @@ from __future__ import unicode_literals
import json import json
import re import re
from .subtitles import SubtitlesInfoExtractor from .common import InfoExtractor
from ..compat import ( from ..compat import (
compat_str, compat_str,
) )
class TEDIE(SubtitlesInfoExtractor): class TEDIE(InfoExtractor):
_VALID_URL = r'''(?x) _VALID_URL = r'''(?x)
(?P<proto>https?://) (?P<proto>https?://)
(?P<type>www|embed(?:-ssl)?)(?P<urlmain>\.ted\.com/ (?P<type>www|embed(?:-ssl)?)(?P<urlmain>\.ted\.com/
@ -184,11 +184,6 @@ class TEDIE(SubtitlesInfoExtractor):
self._sort_formats(formats) self._sort_formats(formats)
video_id = compat_str(talk_info['id']) video_id = compat_str(talk_info['id'])
# subtitles
video_subtitles = self.extract_subtitles(video_id, talk_info)
if self._downloader.params.get('listsubtitles', False):
self._list_available_subtitles(video_id, talk_info)
return
thumbnail = talk_info['thumb'] thumbnail = talk_info['thumb']
if not thumbnail.startswith('http'): if not thumbnail.startswith('http'):
@ -199,21 +194,25 @@ class TEDIE(SubtitlesInfoExtractor):
'uploader': talk_info['speaker'], 'uploader': talk_info['speaker'],
'thumbnail': thumbnail, 'thumbnail': thumbnail,
'description': self._og_search_description(webpage), 'description': self._og_search_description(webpage),
'subtitles': video_subtitles, 'subtitles': self._get_subtitles(video_id, talk_info),
'formats': formats, 'formats': formats,
'duration': talk_info.get('duration'), 'duration': talk_info.get('duration'),
} }
def _get_available_subtitles(self, video_id, talk_info): def _get_subtitles(self, video_id, talk_info):
languages = [lang['languageCode'] for lang in talk_info.get('languages', [])] languages = [lang['languageCode'] for lang in talk_info.get('languages', [])]
if languages: if languages:
sub_lang_list = {} sub_lang_list = {}
for l in languages: for l in languages:
url = 'http://www.ted.com/talks/subtitles/id/%s/lang/%s/format/srt' % (video_id, l) sub_lang_list[l] = [
sub_lang_list[l] = url {
'url': 'http://www.ted.com/talks/subtitles/id/%s/lang/%s/format/%s' % (video_id, l, ext),
'ext': ext,
}
for ext in ['ted', 'srt']
]
return sub_lang_list return sub_lang_list
else: else:
self._downloader.report_warning('video doesn\'t have subtitles')
return {} return {}
def _watch_info(self, url, name): def _watch_info(self, url, name):

View File

@ -6,9 +6,9 @@ from .mitele import MiTeleIE
class TelecincoIE(MiTeleIE): class TelecincoIE(MiTeleIE):
IE_NAME = 'telecinco.es' IE_NAME = 'telecinco.es'
_VALID_URL = r'https?://www\.telecinco\.es/[^/]+/[^/]+/[^/]+/(?P<id>.*?)\.html' _VALID_URL = r'https?://www\.telecinco\.es/[^/]+/[^/]+/(?:[^/]+/)?(?P<id>.*?)\.html'
_TEST = { _TESTS = [{
'url': 'http://www.telecinco.es/robinfood/temporada-01/t01xp14/Bacalao-cocochas-pil-pil_0_1876350223.html', 'url': 'http://www.telecinco.es/robinfood/temporada-01/t01xp14/Bacalao-cocochas-pil-pil_0_1876350223.html',
'info_dict': { 'info_dict': {
'id': 'MDSVID20141015_0058', 'id': 'MDSVID20141015_0058',
@ -16,4 +16,7 @@ class TelecincoIE(MiTeleIE):
'title': 'Con Martín Berasategui, hacer un bacalao al ...', 'title': 'Con Martín Berasategui, hacer un bacalao al ...',
'duration': 662, 'duration': 662,
}, },
} }, {
'url': 'http://www.telecinco.es/informativos/nacional/Pablo_Iglesias-Informativos_Telecinco-entrevista-Pedro_Piqueras_2_1945155182.html',
'only_matching': True,
}]

View File

@ -8,7 +8,7 @@ import binascii
import hashlib import hashlib
from .subtitles import SubtitlesInfoExtractor from .common import InfoExtractor
from ..compat import ( from ..compat import (
compat_str, compat_str,
) )
@ -22,7 +22,7 @@ from ..utils import (
_x = lambda p: xpath_with_ns(p, {'smil': 'http://www.w3.org/2005/SMIL21/Language'}) _x = lambda p: xpath_with_ns(p, {'smil': 'http://www.w3.org/2005/SMIL21/Language'})
class ThePlatformIE(SubtitlesInfoExtractor): class ThePlatformIE(InfoExtractor):
_VALID_URL = r'''(?x) _VALID_URL = r'''(?x)
(?:https?://(?:link|player)\.theplatform\.com/[sp]/(?P<provider_id>[^/]+)/ (?:https?://(?:link|player)\.theplatform\.com/[sp]/(?P<provider_id>[^/]+)/
(?P<config>(?:[^/\?]+/(?:swf|config)|onsite)/select/)? (?P<config>(?:[^/\?]+/(?:swf|config)|onsite)/select/)?
@ -106,15 +106,11 @@ class ThePlatformIE(SubtitlesInfoExtractor):
captions = info.get('captions') captions = info.get('captions')
if isinstance(captions, list): if isinstance(captions, list):
for caption in captions: for caption in captions:
lang, src = caption.get('lang'), caption.get('src') lang, src, mime = caption.get('lang', 'en'), caption.get('src'), caption.get('type')
if lang and src: subtitles[lang] = [{
subtitles[lang] = src 'ext': 'srt' if mime == 'text/srt' else 'ttml',
'url': src,
if self._downloader.params.get('listsubtitles', False): }]
self._list_available_subtitles(video_id, subtitles)
return
subtitles = self.extract_subtitles(video_id, subtitles)
head = meta.find(_x('smil:head')) head = meta.find(_x('smil:head'))
body = meta.find(_x('smil:body')) body = meta.find(_x('smil:body'))

View File

@ -34,7 +34,15 @@ class TwitchBaseIE(InfoExtractor):
expected=True) expected=True)
def _download_json(self, url, video_id, note='Downloading JSON metadata'): def _download_json(self, url, video_id, note='Downloading JSON metadata'):
response = super(TwitchBaseIE, self)._download_json(url, video_id, note) headers = {
'Referer': 'http://api.twitch.tv/crossdomain/receiver.html?v=2',
'X-Requested-With': 'XMLHttpRequest',
}
for cookie in self._downloader.cookiejar:
if cookie.name == 'api_token':
headers['Twitch-Api-Token'] = cookie.value
request = compat_urllib_request.Request(url, headers=headers)
response = super(TwitchBaseIE, self)._download_json(request, video_id, note)
self._handle_error(response) self._handle_error(response)
return response return response

View File

@ -2,16 +2,17 @@ from __future__ import unicode_literals
import re import re
from ..compat import compat_urlparse
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
unescapeHTML, unescapeHTML,
unified_strdate, unified_strdate,
US_RATINGS, US_RATINGS,
) )
from .subtitles import SubtitlesInfoExtractor from .common import InfoExtractor
class VikiIE(SubtitlesInfoExtractor): class VikiIE(InfoExtractor):
IE_NAME = 'viki' IE_NAME = 'viki'
_VALID_URL = r'^https?://(?:www\.)?viki\.com/videos/(?P<id>[0-9]+v)' _VALID_URL = r'^https?://(?:www\.)?viki\.com/videos/(?P<id>[0-9]+v)'
@ -69,9 +70,6 @@ class VikiIE(SubtitlesInfoExtractor):
# subtitles # subtitles
video_subtitles = self.extract_subtitles(video_id, info_webpage) video_subtitles = self.extract_subtitles(video_id, info_webpage)
if self._downloader.params.get('listsubtitles', False):
self._list_available_subtitles(video_id, info_webpage)
return
return { return {
'id': video_id, 'id': video_id,
@ -85,12 +83,15 @@ class VikiIE(SubtitlesInfoExtractor):
'upload_date': upload_date, 'upload_date': upload_date,
} }
def _get_available_subtitles(self, video_id, info_webpage): def _get_subtitles(self, video_id, info_webpage):
res = {} res = {}
for sturl_html in re.findall(r'<track src="([^"]+)"/>', info_webpage): for sturl_html in re.findall(r'<track src="([^"]+)"', info_webpage):
sturl = unescapeHTML(sturl_html) sturl = unescapeHTML(sturl_html)
m = re.search(r'/(?P<lang>[a-z]+)\.vtt', sturl) m = re.search(r'/(?P<lang>[a-z]+)\.vtt', sturl)
if not m: if not m:
continue continue
res[m.group('lang')] = sturl res[m.group('lang')] = [{
'url': compat_urlparse.urljoin('http://www.viki.com', sturl),
'ext': 'vtt',
}]
return res return res

View File

@ -4,9 +4,9 @@ from __future__ import unicode_literals
import json import json
import re import re
import itertools import itertools
import hashlib
from .common import InfoExtractor from .common import InfoExtractor
from .subtitles import SubtitlesInfoExtractor
from ..compat import ( from ..compat import (
compat_HTTPError, compat_HTTPError,
compat_urllib_parse, compat_urllib_parse,
@ -52,7 +52,7 @@ class VimeoBaseInfoExtractor(InfoExtractor):
self._download_webpage(login_request, None, False, 'Wrong login info') self._download_webpage(login_request, None, False, 'Wrong login info')
class VimeoIE(VimeoBaseInfoExtractor, SubtitlesInfoExtractor): class VimeoIE(VimeoBaseInfoExtractor):
"""Information extractor for vimeo.com.""" """Information extractor for vimeo.com."""
# _VALID_URL matches Vimeo URLs # _VALID_URL matches Vimeo URLs
@ -225,6 +225,11 @@ class VimeoIE(VimeoBaseInfoExtractor, SubtitlesInfoExtractor):
if mobj.group('pro') or mobj.group('player'): if mobj.group('pro') or mobj.group('player'):
url = 'http://player.vimeo.com/video/' + video_id url = 'http://player.vimeo.com/video/' + video_id
password = self._downloader.params.get('videopassword', None)
if password:
headers['Cookie'] = '%s_password=%s' % (
video_id, hashlib.md5(password.encode('utf-8')).hexdigest())
# Retrieve video webpage to extract further information # Retrieve video webpage to extract further information
request = compat_urllib_request.Request(url, None, headers) request = compat_urllib_request.Request(url, None, headers)
try: try:
@ -372,12 +377,10 @@ class VimeoIE(VimeoBaseInfoExtractor, SubtitlesInfoExtractor):
text_tracks = config['request'].get('text_tracks') text_tracks = config['request'].get('text_tracks')
if text_tracks: if text_tracks:
for tt in text_tracks: for tt in text_tracks:
subtitles[tt['lang']] = 'http://vimeo.com' + tt['url'] subtitles[tt['lang']] = [{
'ext': 'vtt',
video_subtitles = self.extract_subtitles(video_id, subtitles) 'url': 'http://vimeo.com' + tt['url'],
if self._downloader.params.get('listsubtitles', False): }]
self._list_available_subtitles(video_id, subtitles)
return
return { return {
'id': video_id, 'id': video_id,
@ -393,7 +396,7 @@ class VimeoIE(VimeoBaseInfoExtractor, SubtitlesInfoExtractor):
'view_count': view_count, 'view_count': view_count,
'like_count': like_count, 'like_count': like_count,
'comment_count': comment_count, 'comment_count': comment_count,
'subtitles': video_subtitles, 'subtitles': subtitles,
} }

View File

@ -3,14 +3,14 @@ from __future__ import unicode_literals
import re import re
from .subtitles import SubtitlesInfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
xpath_text, xpath_text,
int_or_none, int_or_none,
) )
class WallaIE(SubtitlesInfoExtractor): class WallaIE(InfoExtractor):
_VALID_URL = r'http://vod\.walla\.co\.il/[^/]+/(?P<id>\d+)/(?P<display_id>.+)' _VALID_URL = r'http://vod\.walla\.co\.il/[^/]+/(?P<id>\d+)/(?P<display_id>.+)'
_TEST = { _TEST = {
'url': 'http://vod.walla.co.il/movie/2642630/one-direction-all-for-one', 'url': 'http://vod.walla.co.il/movie/2642630/one-direction-all-for-one',
@ -52,13 +52,10 @@ class WallaIE(SubtitlesInfoExtractor):
subtitles = {} subtitles = {}
for subtitle in item.findall('./subtitles/subtitle'): for subtitle in item.findall('./subtitles/subtitle'):
lang = xpath_text(subtitle, './title') lang = xpath_text(subtitle, './title')
subtitles[self._SUBTITLE_LANGS.get(lang, lang)] = xpath_text(subtitle, './src') subtitles[self._SUBTITLE_LANGS.get(lang, lang)] = [{
'ext': 'srt',
if self._downloader.params.get('listsubtitles', False): 'url': xpath_text(subtitle, './src'),
self._list_available_subtitles(video_id, subtitles) }]
return
subtitles = self.extract_subtitles(video_id, subtitles)
formats = [] formats = []
for quality in item.findall('./qualities/quality'): for quality in item.findall('./qualities/quality'):

View File

@ -28,6 +28,7 @@ class WDRIE(InfoExtractor):
'title': 'Servicezeit', 'title': 'Servicezeit',
'description': 'md5:c8f43e5e815eeb54d0b96df2fba906cb', 'description': 'md5:c8f43e5e815eeb54d0b96df2fba906cb',
'upload_date': '20140310', 'upload_date': '20140310',
'is_live': False
}, },
'params': { 'params': {
'skip_download': True, 'skip_download': True,
@ -41,6 +42,7 @@ class WDRIE(InfoExtractor):
'title': 'Marga Spiegel ist tot', 'title': 'Marga Spiegel ist tot',
'description': 'md5:2309992a6716c347891c045be50992e4', 'description': 'md5:2309992a6716c347891c045be50992e4',
'upload_date': '20140311', 'upload_date': '20140311',
'is_live': False
}, },
'params': { 'params': {
'skip_download': True, 'skip_download': True,
@ -55,6 +57,7 @@ class WDRIE(InfoExtractor):
'title': 'Erlebte Geschichten: Marga Spiegel (29.11.2009)', 'title': 'Erlebte Geschichten: Marga Spiegel (29.11.2009)',
'description': 'md5:2309992a6716c347891c045be50992e4', 'description': 'md5:2309992a6716c347891c045be50992e4',
'upload_date': '20091129', 'upload_date': '20091129',
'is_live': False
}, },
}, },
{ {
@ -66,6 +69,7 @@ class WDRIE(InfoExtractor):
'title': 'Flavia Coelho: Amar é Amar', 'title': 'Flavia Coelho: Amar é Amar',
'description': 'md5:7b29e97e10dfb6e265238b32fa35b23a', 'description': 'md5:7b29e97e10dfb6e265238b32fa35b23a',
'upload_date': '20140717', 'upload_date': '20140717',
'is_live': False
}, },
}, },
{ {
@ -74,6 +78,20 @@ class WDRIE(InfoExtractor):
'info_dict': { 'info_dict': {
'id': 'mediathek/video/sendungen/quarks_und_co/filterseite-quarks-und-co100', 'id': 'mediathek/video/sendungen/quarks_und_co/filterseite-quarks-und-co100',
} }
},
{
'url': 'http://www1.wdr.de/mediathek/video/livestream/index.html',
'info_dict': {
'id': 'mdb-103364',
'title': 're:^WDR Fernsehen [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
'description': 'md5:ae2ff888510623bf8d4b115f95a9b7c9',
'ext': 'flv',
'upload_date': '20150212',
'is_live': True
},
'params': {
'skip_download': True,
},
} }
] ]
@ -119,6 +137,10 @@ class WDRIE(InfoExtractor):
video_url = flashvars['dslSrc'][0] video_url = flashvars['dslSrc'][0]
title = flashvars['trackerClipTitle'][0] title = flashvars['trackerClipTitle'][0]
thumbnail = flashvars['startPicture'][0] if 'startPicture' in flashvars else None thumbnail = flashvars['startPicture'][0] if 'startPicture' in flashvars else None
is_live = flashvars.get('isLive', ['0'])[0] == '1'
if is_live:
title = self._live_title(title)
if 'trackerClipAirTime' in flashvars: if 'trackerClipAirTime' in flashvars:
upload_date = flashvars['trackerClipAirTime'][0] upload_date = flashvars['trackerClipAirTime'][0]
@ -131,6 +153,13 @@ class WDRIE(InfoExtractor):
if video_url.endswith('.f4m'): if video_url.endswith('.f4m'):
video_url += '?hdcore=3.2.0&plugin=aasp-3.2.0.77.18' video_url += '?hdcore=3.2.0&plugin=aasp-3.2.0.77.18'
ext = 'flv' ext = 'flv'
elif video_url.endswith('.smil'):
fmt = self._extract_smil_formats(video_url, page_id)[0]
video_url = fmt['url']
sep = '&' if '?' in video_url else '?'
video_url += sep
video_url += 'hdcore=3.3.0&plugin=aasp-3.3.0.99.43'
ext = fmt['ext']
else: else:
ext = determine_ext(video_url) ext = determine_ext(video_url)
@ -144,6 +173,7 @@ class WDRIE(InfoExtractor):
'description': description, 'description': description,
'thumbnail': thumbnail, 'thumbnail': thumbnail,
'upload_date': upload_date, 'upload_date': upload_date,
'is_live': is_live
} }

View File

@ -11,7 +11,6 @@ import time
import traceback import traceback
from .common import InfoExtractor, SearchInfoExtractor from .common import InfoExtractor, SearchInfoExtractor
from .subtitles import SubtitlesInfoExtractor
from ..jsinterp import JSInterpreter from ..jsinterp import JSInterpreter
from ..swfinterp import SWFInterpreter from ..swfinterp import SWFInterpreter
from ..compat import ( from ..compat import (
@ -185,7 +184,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
return return
class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): class YoutubeIE(YoutubeBaseInfoExtractor):
IE_DESC = 'YouTube.com' IE_DESC = 'YouTube.com'
_VALID_URL = r"""(?x)^ _VALID_URL = r"""(?x)^
( (
@ -648,7 +647,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
raise ExtractorError( raise ExtractorError(
'Signature extraction failed: ' + tb, cause=e) 'Signature extraction failed: ' + tb, cause=e)
def _get_available_subtitles(self, video_id, webpage): def _get_subtitles(self, video_id, webpage):
try: try:
subs_doc = self._download_xml( subs_doc = self._download_xml(
'https://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id, 'https://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id,
@ -662,23 +661,27 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
lang = track.attrib['lang_code'] lang = track.attrib['lang_code']
if lang in sub_lang_list: if lang in sub_lang_list:
continue continue
sub_formats = []
for ext in ['sbv', 'vtt', 'srt']:
params = compat_urllib_parse.urlencode({ params = compat_urllib_parse.urlencode({
'lang': lang, 'lang': lang,
'v': video_id, 'v': video_id,
'fmt': self._downloader.params.get('subtitlesformat', 'srt'), 'fmt': ext,
'name': track.attrib['name'].encode('utf-8'), 'name': track.attrib['name'].encode('utf-8'),
}) })
url = 'https://www.youtube.com/api/timedtext?' + params sub_formats.append({
sub_lang_list[lang] = url 'url': 'https://www.youtube.com/api/timedtext?' + params,
'ext': ext,
})
sub_lang_list[lang] = sub_formats
if not sub_lang_list: if not sub_lang_list:
self._downloader.report_warning('video doesn\'t have subtitles') self._downloader.report_warning('video doesn\'t have subtitles')
return {} return {}
return sub_lang_list return sub_lang_list
def _get_available_automatic_caption(self, video_id, webpage): def _get_automatic_captions(self, video_id, webpage):
"""We need the webpage for getting the captions url, pass it as an """We need the webpage for getting the captions url, pass it as an
argument to speed up the process.""" argument to speed up the process."""
sub_format = self._downloader.params.get('subtitlesformat', 'srt')
self.to_screen('%s: Looking for automatic captions' % video_id) self.to_screen('%s: Looking for automatic captions' % video_id)
mobj = re.search(r';ytplayer.config = ({.*?});', webpage) mobj = re.search(r';ytplayer.config = ({.*?});', webpage)
err_msg = 'Couldn\'t find automatic captions for %s' % video_id err_msg = 'Couldn\'t find automatic captions for %s' % video_id
@ -708,14 +711,20 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
sub_lang_list = {} sub_lang_list = {}
for lang_node in caption_list.findall('target'): for lang_node in caption_list.findall('target'):
sub_lang = lang_node.attrib['lang_code'] sub_lang = lang_node.attrib['lang_code']
sub_formats = []
for ext in ['sbv', 'vtt', 'srt']:
params = compat_urllib_parse.urlencode({ params = compat_urllib_parse.urlencode({
'lang': original_lang, 'lang': original_lang,
'tlang': sub_lang, 'tlang': sub_lang,
'fmt': sub_format, 'fmt': ext,
'ts': timestamp, 'ts': timestamp,
'kind': caption_kind, 'kind': caption_kind,
}) })
sub_lang_list[sub_lang] = caption_url + '&' + params sub_formats.append({
'url': caption_url + '&' + params,
'ext': ext,
})
sub_lang_list[sub_lang] = sub_formats
return sub_lang_list return sub_lang_list
# An extractor error can be raise by the download process if there are # An extractor error can be raise by the download process if there are
# no automatic captions but there are subtitles # no automatic captions but there are subtitles
@ -970,10 +979,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
# subtitles # subtitles
video_subtitles = self.extract_subtitles(video_id, video_webpage) video_subtitles = self.extract_subtitles(video_id, video_webpage)
automatic_captions = self.extract_automatic_captions(video_id, video_webpage)
if self._downloader.params.get('listsubtitles', False):
self._list_available_subtitles(video_id, video_webpage)
return
if 'length_seconds' not in video_info: if 'length_seconds' not in video_info:
self._downloader.report_warning('unable to extract video duration') self._downloader.report_warning('unable to extract video duration')
@ -1122,6 +1128,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
'description': video_description, 'description': video_description,
'categories': video_categories, 'categories': video_categories,
'subtitles': video_subtitles, 'subtitles': video_subtitles,
'automatic_captions': automatic_captions,
'duration': video_duration, 'duration': video_duration,
'age_limit': 18 if age_gate else 0, 'age_limit': 18 if age_gate else 0,
'annotations': video_annotations, 'annotations': video_annotations,
@ -1146,13 +1153,13 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
| p/ | p/
) )
( (
(?:PL|LL|EC|UU|FL|RD)?[0-9A-Za-z-_]{10,} (?:PL|LL|EC|UU|FL|RD|UL)?[0-9A-Za-z-_]{10,}
# Top tracks, they can also include dots # Top tracks, they can also include dots
|(?:MC)[\w\.]* |(?:MC)[\w\.]*
) )
.* .*
| |
((?:PL|LL|EC|UU|FL|RD)[0-9A-Za-z-_]{10,}) ((?:PL|LL|EC|UU|FL|RD|UL)[0-9A-Za-z-_]{10,})
)""" )"""
_TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s' _TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s'
_VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})&amp;[^"]*?index=(?P<index>\d+)' _VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})&amp;[^"]*?index=(?P<index>\d+)'
@ -1237,7 +1244,7 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
for vid_id in ids] for vid_id in ids]
def _extract_mix(self, playlist_id): def _extract_mix(self, playlist_id):
# The mixes are generated from a a single video # The mixes are generated from a single video
# the id of the playlist is just 'RD' + video_id # the id of the playlist is just 'RD' + video_id
url = 'https://youtube.com/watch?v=%s&list=%s' % (playlist_id[-11:], playlist_id) url = 'https://youtube.com/watch?v=%s&list=%s' % (playlist_id[-11:], playlist_id)
webpage = self._download_webpage( webpage = self._download_webpage(
@ -1273,7 +1280,7 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
else: else:
self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id)) self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))
if playlist_id.startswith('RD'): if playlist_id.startswith('RD') or playlist_id.startswith('UL'):
# Mixes require a custom extraction process # Mixes require a custom extraction process
return self._extract_mix(playlist_id) return self._extract_mix(playlist_id)

View File

@ -0,0 +1,110 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
parse_duration,
parse_iso8601,
xpath_with_ns,
xpath_text,
int_or_none,
)
class ZapiksIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?zapiks\.(?:fr|com)/(?:(?:[a-z]{2}/)?(?P<display_id>.+?)\.html|index\.php\?.*\bmedia_id=(?P<id>\d+))'
_TESTS = [
{
'url': 'http://www.zapiks.fr/ep2s3-bon-appetit-eh-be-viva.html',
'md5': 'aeb3c473b2d564b2d46d664d28d5f050',
'info_dict': {
'id': '80798',
'ext': 'mp4',
'title': 'EP2S3 - Bon Appétit - Eh bé viva les pyrénées con!',
'description': 'md5:7054d6f6f620c6519be1fe710d4da847',
'thumbnail': 're:^https?://.*\.jpg$',
'duration': 528,
'timestamp': 1359044972,
'upload_date': '20130124',
'view_count': int,
'comment_count': int,
},
},
{
'url': 'http://www.zapiks.com/ep3s5-bon-appetit-baqueira-m-1.html',
'only_matching': True,
},
{
'url': 'http://www.zapiks.com/nl/ep3s5-bon-appetit-baqueira-m-1.html',
'only_matching': True,
},
{
'url': 'http://www.zapiks.fr/index.php?action=playerIframe&amp;media_id=118046&amp;width=640&amp;height=360&amp;autoStart=false&amp;language=fr',
'only_matching': True,
},
]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
display_id = mobj.group('display_id') or video_id
webpage = self._download_webpage(url, display_id)
if not video_id:
video_id = self._search_regex(
r'data-media-id="(\d+)"', webpage, 'video id')
playlist = self._download_xml(
'http://www.zapiks.fr/view/index.php?action=playlist&media_id=%s&lang=en' % video_id,
display_id)
NS_MAP = {
'jwplayer': 'http://rss.jwpcdn.com/'
}
def ns(path):
return xpath_with_ns(path, NS_MAP)
item = playlist.find('./channel/item')
title = xpath_text(item, 'title', 'title') or self._og_search_title(webpage)
description = self._og_search_description(webpage, default=None)
thumbnail = xpath_text(
item, ns('./jwplayer:image'), 'thumbnail') or self._og_search_thumbnail(webpage, default=None)
duration = parse_duration(self._html_search_meta(
'duration', webpage, 'duration', default=None))
timestamp = parse_iso8601(self._html_search_meta(
'uploadDate', webpage, 'upload date', default=None), ' ')
view_count = int_or_none(self._search_regex(
r'UserPlays:(\d+)', webpage, 'view count', default=None))
comment_count = int_or_none(self._search_regex(
r'UserComments:(\d+)', webpage, 'comment count', default=None))
formats = []
for source in item.findall(ns('./jwplayer:source')):
format_id = source.attrib['label']
f = {
'url': source.attrib['file'],
'format_id': format_id,
}
m = re.search(r'^(?P<height>\d+)[pP]', format_id)
if m:
f['height'] = int(m.group('height'))
formats.append(f)
self._sort_formats(formats)
return {
'id': video_id,
'title': title,
'description': description,
'thumbnail': thumbnail,
'duration': duration,
'timestamp': timestamp,
'view_count': view_count,
'comment_count': comment_count,
'formats': formats,
}

View File

@ -8,11 +8,11 @@ import sys
from .downloader.external import list_external_downloaders from .downloader.external import list_external_downloaders
from .compat import ( from .compat import (
compat_expanduser, compat_expanduser,
compat_get_terminal_size,
compat_getenv, compat_getenv,
compat_kwargs, compat_kwargs,
) )
from .utils import ( from .utils import (
get_term_width,
write_string, write_string,
) )
from .version import __version__ from .version import __version__
@ -100,7 +100,7 @@ def parseOpts(overrideArguments=None):
return opts return opts
# No need to wrap help messages if we're on a wide console # No need to wrap help messages if we're on a wide console
columns = get_term_width() columns = compat_get_terminal_size().columns
max_width = columns if columns else 80 max_width = columns if columns else 80
max_help_position = 80 max_help_position = 80
@ -272,6 +272,10 @@ def parseOpts(overrideArguments=None):
'--no-playlist', '--no-playlist',
action='store_true', dest='noplaylist', default=False, action='store_true', dest='noplaylist', default=False,
help='If the URL refers to a video and a playlist, download only the video.') help='If the URL refers to a video and a playlist, download only the video.')
selection.add_option(
'--yes-playlist',
action='store_false', dest='noplaylist', default=False,
help='If the URL refers to a video and a playlist, download the playlist.')
selection.add_option( selection.add_option(
'--age-limit', '--age-limit',
metavar='YEARS', dest='age_limit', default=None, type=int, metavar='YEARS', dest='age_limit', default=None, type=int,
@ -387,8 +391,8 @@ def parseOpts(overrideArguments=None):
help='lists all available subtitles for the video') help='lists all available subtitles for the video')
subtitles.add_option( subtitles.add_option(
'--sub-format', '--sub-format',
action='store', dest='subtitlesformat', metavar='FORMAT', default='srt', action='store', dest='subtitlesformat', metavar='FORMAT', default='best',
help='subtitle format (default=srt) ([sbv/vtt] youtube only)') help='subtitle format, accepts formats preference, for example: "ass/srt/best"')
subtitles.add_option( subtitles.add_option(
'--sub-lang', '--sub-langs', '--srt-lang', '--sub-lang', '--sub-langs', '--srt-lang',
action='callback', dest='subtitleslangs', metavar='LANGS', type='str', action='callback', dest='subtitleslangs', metavar='LANGS', type='str',
@ -751,6 +755,10 @@ def parseOpts(overrideArguments=None):
'--exec', '--exec',
metavar='CMD', dest='exec_cmd', metavar='CMD', dest='exec_cmd',
help='Execute a command on the file after downloading, similar to find\'s -exec syntax. Example: --exec \'adb push {} /sdcard/Music/ && rm {}\'') help='Execute a command on the file after downloading, similar to find\'s -exec syntax. Example: --exec \'adb push {} /sdcard/Music/ && rm {}\'')
postproc.add_option(
'--convert-subtitles', '--convert-subs',
metavar='FORMAT', dest='convertsubtitles', default=None,
help='Convert the subtitles to other format (currently supported: srt|ass|vtt)')
parser.add_option_group(general) parser.add_option_group(general)
parser.add_option_group(network) parser.add_option_group(network)

View File

@ -12,6 +12,7 @@ from .ffmpeg import (
FFmpegMergerPP, FFmpegMergerPP,
FFmpegMetadataPP, FFmpegMetadataPP,
FFmpegVideoConvertorPP, FFmpegVideoConvertorPP,
FFmpegSubtitlesConvertorPP,
) )
from .xattrpp import XAttrMetadataPP from .xattrpp import XAttrMetadataPP
from .execafterdownload import ExecAfterDownloadPP from .execafterdownload import ExecAfterDownloadPP
@ -33,6 +34,7 @@ __all__ = [
'FFmpegMergerPP', 'FFmpegMergerPP',
'FFmpegMetadataPP', 'FFmpegMetadataPP',
'FFmpegPostProcessor', 'FFmpegPostProcessor',
'FFmpegSubtitlesConvertorPP',
'FFmpegVideoConvertorPP', 'FFmpegVideoConvertorPP',
'XAttrMetadataPP', 'XAttrMetadataPP',
] ]

View File

@ -1,5 +1,6 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import io
import os import os
import subprocess import subprocess
import sys import sys
@ -500,10 +501,6 @@ class FFmpegEmbedSubtitlePP(FFmpegPostProcessor):
'zu': 'zul', 'zu': 'zul',
} }
def __init__(self, downloader=None, subtitlesformat='srt'):
super(FFmpegEmbedSubtitlePP, self).__init__(downloader)
self._subformat = subtitlesformat
@classmethod @classmethod
def _conver_lang_code(cls, code): def _conver_lang_code(cls, code):
"""Convert language code from ISO 639-1 to ISO 639-2/T""" """Convert language code from ISO 639-1 to ISO 639-2/T"""
@ -513,13 +510,14 @@ class FFmpegEmbedSubtitlePP(FFmpegPostProcessor):
if information['ext'] != 'mp4': if information['ext'] != 'mp4':
self._downloader.to_screen('[ffmpeg] Subtitles can only be embedded in mp4 files') self._downloader.to_screen('[ffmpeg] Subtitles can only be embedded in mp4 files')
return True, information return True, information
if not information.get('subtitles'): subtitles = information.get('requested_subtitles')
if not subtitles:
self._downloader.to_screen('[ffmpeg] There aren\'t any subtitles to embed') self._downloader.to_screen('[ffmpeg] There aren\'t any subtitles to embed')
return True, information return True, information
sub_langs = [key for key in information['subtitles']] sub_langs = list(subtitles.keys())
filename = information['filepath'] filename = information['filepath']
input_files = [filename] + [subtitles_filename(filename, lang, self._subformat) for lang in sub_langs] input_files = [filename] + [subtitles_filename(filename, lang, sub_info['ext']) for lang, sub_info in subtitles.items()]
opts = [ opts = [
'-map', '0', '-map', '0',
@ -666,3 +664,40 @@ class FFmpegFixupM4aPP(FFmpegPostProcessor):
os.rename(encodeFilename(temp_filename), encodeFilename(filename)) os.rename(encodeFilename(temp_filename), encodeFilename(filename))
return True, info return True, info
class FFmpegSubtitlesConvertorPP(FFmpegPostProcessor):
def __init__(self, downloader=None, format=None):
super(FFmpegSubtitlesConvertorPP, self).__init__(downloader)
self.format = format
def run(self, info):
subs = info.get('requested_subtitles')
filename = info['filepath']
new_ext = self.format
new_format = new_ext
if new_format == 'vtt':
new_format = 'webvtt'
if subs is None:
self._downloader.to_screen('[ffmpeg] There aren\'t any subtitles to convert')
return True, info
self._downloader.to_screen('[ffmpeg] Converting subtitles')
for lang, sub in subs.items():
ext = sub['ext']
if ext == new_ext:
self._downloader.to_screen(
'[ffmpeg] Subtitle file for %s is already in the requested'
'format' % new_ext)
continue
new_file = subtitles_filename(filename, lang, new_ext)
self.run_ffmpeg(
subtitles_filename(filename, lang, ext),
new_file, ['-f', new_format])
with io.open(new_file, 'rt', encoding='utf-8') as f:
subs[lang] = {
'ext': ext,
'data': f.read(),
}
return True, info

View File

@ -35,7 +35,6 @@ import zlib
from .compat import ( from .compat import (
compat_basestring, compat_basestring,
compat_chr, compat_chr,
compat_getenv,
compat_html_entities, compat_html_entities,
compat_http_client, compat_http_client,
compat_parse_qs, compat_parse_qs,
@ -54,7 +53,7 @@ from .compat import (
compiled_regex_type = type(re.compile('')) compiled_regex_type = type(re.compile(''))
std_headers = { std_headers = {
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20100101 Firefox/10.0 (Chrome)', 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20150101 Firefox/20.0 (Chrome)',
'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7', 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
'Accept-Encoding': 'gzip, deflate', 'Accept-Encoding': 'gzip, deflate',
@ -304,6 +303,8 @@ def sanitize_filename(s, restricted=False, is_id=False):
# Common case of "Foreign band name - English song title" # Common case of "Foreign band name - English song title"
if restricted and result.startswith('-_'): if restricted and result.startswith('-_'):
result = result[2:] result = result[2:]
if result.startswith('-'):
result = '_' + result[len('-'):]
if not result: if not result:
result = '_' result = '_'
return result return result
@ -904,8 +905,8 @@ def _windows_write_string(s, out):
def not_a_console(handle): def not_a_console(handle):
if handle == INVALID_HANDLE_VALUE or handle is None: if handle == INVALID_HANDLE_VALUE or handle is None:
return True return True
return ((GetFileType(handle) & ~FILE_TYPE_REMOTE) != FILE_TYPE_CHAR return ((GetFileType(handle) & ~FILE_TYPE_REMOTE) != FILE_TYPE_CHAR or
or GetConsoleMode(handle, ctypes.byref(ctypes.wintypes.DWORD())) == 0) GetConsoleMode(handle, ctypes.byref(ctypes.wintypes.DWORD())) == 0)
if not_a_console(h): if not_a_console(h):
return False return False
@ -1175,22 +1176,6 @@ def parse_filesize(s):
return int(float(num_str) * mult) return int(float(num_str) * mult)
def get_term_width():
columns = compat_getenv('COLUMNS', None)
if columns:
return int(columns)
try:
sp = subprocess.Popen(
['stty', 'size'],
stdout=subprocess.PIPE, stderr=subprocess.PIPE)
out, err = sp.communicate()
return int(out.split()[1])
except:
pass
return None
def month_by_name(name): def month_by_name(name):
""" Return the number of a month by (locale-independently) English name """ """ Return the number of a month by (locale-independently) English name """
@ -1292,6 +1277,7 @@ def parse_duration(s):
(?P<only_mins>[0-9.]+)\s*(?:mins?|minutes?)\s*| (?P<only_mins>[0-9.]+)\s*(?:mins?|minutes?)\s*|
(?P<only_hours>[0-9.]+)\s*(?:hours?)| (?P<only_hours>[0-9.]+)\s*(?:hours?)|
\s*(?P<hours_reversed>[0-9]+)\s*(?:[:h]|hours?)\s*(?P<mins_reversed>[0-9]+)\s*(?:[:m]|mins?|minutes?)\s*|
(?: (?:
(?: (?:
(?:(?P<days>[0-9]+)\s*(?:[:d]|days?)\s*)? (?:(?P<days>[0-9]+)\s*(?:[:d]|days?)\s*)?
@ -1310,10 +1296,14 @@ def parse_duration(s):
return float_or_none(m.group('only_hours'), invscale=60 * 60) return float_or_none(m.group('only_hours'), invscale=60 * 60)
if m.group('secs'): if m.group('secs'):
res += int(m.group('secs')) res += int(m.group('secs'))
if m.group('mins_reversed'):
res += int(m.group('mins_reversed')) * 60
if m.group('mins'): if m.group('mins'):
res += int(m.group('mins')) * 60 res += int(m.group('mins')) * 60
if m.group('hours'): if m.group('hours'):
res += int(m.group('hours')) * 60 * 60 res += int(m.group('hours')) * 60 * 60
if m.group('hours_reversed'):
res += int(m.group('hours_reversed')) * 60 * 60
if m.group('days'): if m.group('days'):
res += int(m.group('days')) * 24 * 60 * 60 res += int(m.group('days')) * 24 * 60 * 60
if m.group('ms'): if m.group('ms'):

View File

@ -1,3 +1,3 @@
from __future__ import unicode_literals from __future__ import unicode_literals
__version__ = '2015.02.20' __version__ = '2015.02.28'