Merge branch 'master' into multipart_videos

Conflicts:
	youtube_dl/extractor/mtv.py
This commit is contained in:
Mark Lee 2015-02-28 16:13:21 -08:00
commit 07516b8835
74 changed files with 2068 additions and 804 deletions

View File

@ -111,3 +111,5 @@ Paul Hartmann
Frans de Jonge
Robin de Rooij
Ryan Schmidt
Leslie P. Polzer
Duncan Keall

View File

@ -2,6 +2,7 @@ all: youtube-dl README.md CONTRIBUTING.md README.txt youtube-dl.1 youtube-dl.bas
clean:
rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish *.dump *.part *.info.json *.mp4 *.flv *.mp3 *.avi CONTRIBUTING.md.tmp youtube-dl youtube-dl.exe
find -name "*.pyc" -delete
PREFIX ?= /usr/local
BINDIR ?= $(PREFIX)/bin
@ -43,7 +44,7 @@ test:
ot: offlinetest
offlinetest: codetest
nosetests --verbose test --exclude test_download --exclude test_age_restriction --exclude test_subtitles --exclude test_write_annotations --exclude test_youtube_lists
nosetests --verbose test --exclude test_download.py --exclude test_age_restriction.py --exclude test_subtitles.py --exclude test_write_annotations.py --exclude test_youtube_lists.py
tar: youtube-dl.tar.gz

View File

@ -139,6 +139,8 @@ which means you can modify it, redistribute it or use it however you like.
dislike_count <? 50 & description" .
--no-playlist If the URL refers to a video and a
playlist, download only the video.
--yes-playlist If the URL refers to a video and a
playlist, download the playlist.
--age-limit YEARS download only videos suitable for the given
age
--download-archive FILE Download only videos not listed in the
@ -351,8 +353,8 @@ which means you can modify it, redistribute it or use it however you like.
--all-subs downloads all the available subtitles of
the video
--list-subs lists all available subtitles for the video
--sub-format FORMAT subtitle format (default=srt) ([sbv/vtt]
youtube only)
--sub-format FORMAT subtitle format, accepts formats
preference, for example: "ass/srt/best"
--sub-lang LANGS languages of the subtitles to download
(optional) separated by commas, use IETF
language tags like 'en,pt'
@ -406,6 +408,8 @@ which means you can modify it, redistribute it or use it however you like.
downloading, similar to find's -exec
syntax. Example: --exec 'adb push {}
/sdcard/Music/ && rm {}'
--convert-subtitles FORMAT Convert the subtitles to other format
(currently supported: srt|ass|vtt)
# CONFIGURATION

View File

@ -45,12 +45,12 @@ for test in get_testcases():
RESULT = ('.' + domain + '\n' in LIST or '\n' + domain + '\n' in LIST)
if RESULT and ('info_dict' not in test or 'age_limit' not in test['info_dict']
or test['info_dict']['age_limit'] != 18):
if RESULT and ('info_dict' not in test or 'age_limit' not in test['info_dict'] or
test['info_dict']['age_limit'] != 18):
print('\nPotential missing age_limit check: {0}'.format(test['name']))
elif not RESULT and ('info_dict' in test and 'age_limit' in test['info_dict']
and test['info_dict']['age_limit'] == 18):
elif not RESULT and ('info_dict' in test and 'age_limit' in test['info_dict'] and
test['info_dict']['age_limit'] == 18):
print('\nPotential false negative: {0}'.format(test['name']))
else:

View File

@ -17,6 +17,7 @@
- **AdultSwim**
- **Aftenposten**
- **Aftonbladet**
- **AirMozilla**
- **AlJazeera**
- **Allocine**
- **AlphaPorno**
@ -72,6 +73,8 @@
- **CeskaTelevize**
- **channel9**: Channel 9
- **Chilloutzone**
- **chirbit**
- **chirbit:profile**
- **Cinchcast**
- **Cinemassacre**
- **clipfish**
@ -207,6 +210,7 @@
- **Jove**
- **jpopsuki.tv**
- **Jukebox**
- **Kaltura**
- **Kankan**
- **Karaoketv**
- **keek**
@ -218,6 +222,9 @@
- **Ku6**
- **la7.tv**
- **Laola1Tv**
- **Letv**
- **LetvPlaylist**
- **LetvTv**
- **lifenews**: LIFE | NEWS
- **LiveLeak**
- **livestream**
@ -302,6 +309,7 @@
- **Nuvid**
- **NYTimes**
- **ocw.mit.edu**
- **Odnoklassniki**
- **OktoberfestTV**
- **on.aol.com**
- **Ooyala**
@ -328,8 +336,10 @@
- **PornoXO**
- **PromptFile**
- **prosiebensat1**: ProSiebenSat.1 Digital
- **Puls4**
- **Pyvideo**
- **QuickVid**
- **R7**
- **radio.de**
- **radiobremen**
- **radiofrance**
@ -385,7 +395,8 @@
- **soundcloud:playlist**
- **soundcloud:set**
- **soundcloud:user**
- **Soundgasm**
- **soundgasm**
- **soundgasm:profile**
- **southpark.cc.com**
- **southpark.de**
- **Space**
@ -404,7 +415,7 @@
- **StreamCZ**
- **StreetVoice**
- **SunPorno**
- **SVTPlay**
- **SVTPlay**: SVT Play and Öppet arkiv
- **SWRMediathek**
- **Syfy**
- **SztvHu**
@ -559,6 +570,7 @@
- **youtube:subscriptions**: YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)
- **youtube:user**: YouTube.com user videos (URL or "ytuser" keyword)
- **youtube:watch_later**: Youtube watch later list, ":ytwatchlater" for short (requires authentication)
- **Zapiks**
- **ZDF**
- **ZDFChannel**
- **zingmp3:album**: mp3.zing.vn albums

View File

@ -28,7 +28,7 @@
"retries": 10,
"simulate": false,
"subtitleslang": null,
"subtitlesformat": "srt",
"subtitlesformat": "best",
"test": true,
"updatetime": true,
"usenetrc": false,

View File

@ -337,6 +337,65 @@ class TestFormatSelection(unittest.TestCase):
downloaded = ydl.downloaded_info_dicts[0]
self.assertEqual(downloaded['format_id'], 'G')
def test_subtitles(self):
def s_formats(lang, autocaption=False):
return [{
'ext': ext,
'url': 'http://localhost/video.%s.%s' % (lang, ext),
'_auto': autocaption,
} for ext in ['vtt', 'srt', 'ass']]
subtitles = dict((l, s_formats(l)) for l in ['en', 'fr', 'es'])
auto_captions = dict((l, s_formats(l, True)) for l in ['it', 'pt', 'es'])
info_dict = {
'id': 'test',
'title': 'Test',
'url': 'http://localhost/video.mp4',
'subtitles': subtitles,
'automatic_captions': auto_captions,
'extractor': 'TEST',
}
def get_info(params={}):
params.setdefault('simulate', True)
ydl = YDL(params)
ydl.report_warning = lambda *args, **kargs: None
return ydl.process_video_result(info_dict, download=False)
result = get_info()
self.assertFalse(result.get('requested_subtitles'))
self.assertEqual(result['subtitles'], subtitles)
self.assertEqual(result['automatic_captions'], auto_captions)
result = get_info({'writesubtitles': True})
subs = result['requested_subtitles']
self.assertTrue(subs)
self.assertEqual(set(subs.keys()), set(['en']))
self.assertTrue(subs['en'].get('data') is None)
self.assertEqual(subs['en']['ext'], 'ass')
result = get_info({'writesubtitles': True, 'subtitlesformat': 'foo/srt'})
subs = result['requested_subtitles']
self.assertEqual(subs['en']['ext'], 'srt')
result = get_info({'writesubtitles': True, 'subtitleslangs': ['es', 'fr', 'it']})
subs = result['requested_subtitles']
self.assertTrue(subs)
self.assertEqual(set(subs.keys()), set(['es', 'fr']))
result = get_info({'writesubtitles': True, 'writeautomaticsub': True, 'subtitleslangs': ['es', 'pt']})
subs = result['requested_subtitles']
self.assertTrue(subs)
self.assertEqual(set(subs.keys()), set(['es', 'pt']))
self.assertFalse(subs['es']['_auto'])
self.assertTrue(subs['pt']['_auto'])
result = get_info({'writeautomaticsub': True, 'subtitleslangs': ['es', 'pt']})
subs = result['requested_subtitles']
self.assertTrue(subs)
self.assertEqual(set(subs.keys()), set(['es', 'pt']))
self.assertTrue(subs['es']['_auto'])
self.assertTrue(subs['pt']['_auto'])
def test_add_extra_info(self):
test_dict = {
'extractor': 'Foo',

View File

@ -18,6 +18,14 @@ from youtube_dl.extractor import (
VimeoIE,
WallaIE,
CeskaTelevizeIE,
LyndaIE,
NPOIE,
ComedyCentralIE,
NRKTVIE,
RaiIE,
VikiIE,
ThePlatformIE,
RTVEALaCartaIE,
)
@ -27,42 +35,38 @@ class BaseTestSubtitles(unittest.TestCase):
def setUp(self):
self.DL = FakeYDL()
self.ie = self.IE(self.DL)
self.ie = self.IE()
self.DL.add_info_extractor(self.ie)
def getInfoDict(self):
info_dict = self.ie.extract(self.url)
info_dict = self.DL.extract_info(self.url, download=False)
return info_dict
def getSubtitles(self):
info_dict = self.getInfoDict()
return info_dict['subtitles']
subtitles = info_dict['requested_subtitles']
if not subtitles:
return subtitles
for sub_info in subtitles.values():
if sub_info.get('data') is None:
uf = self.DL.urlopen(sub_info['url'])
sub_info['data'] = uf.read().decode('utf-8')
return dict((l, sub_info['data']) for l, sub_info in subtitles.items())
class TestYoutubeSubtitles(BaseTestSubtitles):
url = 'QRS8MkLhQmM'
IE = YoutubeIE
def test_youtube_no_writesubtitles(self):
self.DL.params['writesubtitles'] = False
subtitles = self.getSubtitles()
self.assertEqual(subtitles, None)
def test_youtube_subtitles(self):
self.DL.params['writesubtitles'] = True
subtitles = self.getSubtitles()
self.assertEqual(md5(subtitles['en']), '4cd9278a35ba2305f47354ee13472260')
def test_youtube_subtitles_lang(self):
self.DL.params['writesubtitles'] = True
self.DL.params['subtitleslangs'] = ['it']
subtitles = self.getSubtitles()
self.assertEqual(md5(subtitles['it']), '164a51f16f260476a05b50fe4c2f161d')
def test_youtube_allsubtitles(self):
self.DL.params['writesubtitles'] = True
self.DL.params['allsubtitles'] = True
subtitles = self.getSubtitles()
self.assertEqual(len(subtitles.keys()), 13)
self.assertEqual(md5(subtitles['en']), '4cd9278a35ba2305f47354ee13472260')
self.assertEqual(md5(subtitles['it']), '164a51f16f260476a05b50fe4c2f161d')
for lang in ['it', 'fr', 'de']:
self.assertTrue(subtitles.get(lang) is not None, 'Subtitles for \'%s\' not extracted' % lang)
def test_youtube_subtitles_sbv_format(self):
self.DL.params['writesubtitles'] = True
@ -76,12 +80,6 @@ class TestYoutubeSubtitles(BaseTestSubtitles):
subtitles = self.getSubtitles()
self.assertEqual(md5(subtitles['en']), '3cb210999d3e021bd6c7f0ea751eab06')
def test_youtube_list_subtitles(self):
self.DL.expect_warning('Video doesn\'t have automatic captions')
self.DL.params['listsubtitles'] = True
info_dict = self.getInfoDict()
self.assertEqual(info_dict, None)
def test_youtube_automatic_captions(self):
self.url = '8YoUxe5ncPo'
self.DL.params['writeautomaticsub'] = True
@ -103,55 +101,22 @@ class TestYoutubeSubtitles(BaseTestSubtitles):
self.DL.params['writesubtitles'] = True
self.DL.params['allsubtitles'] = True
subtitles = self.getSubtitles()
self.assertEqual(len(subtitles), 0)
def test_youtube_multiple_langs(self):
self.url = 'QRS8MkLhQmM'
self.DL.params['writesubtitles'] = True
langs = ['it', 'fr', 'de']
self.DL.params['subtitleslangs'] = langs
subtitles = self.getSubtitles()
for lang in langs:
self.assertTrue(subtitles.get(lang) is not None, 'Subtitles for \'%s\' not extracted' % lang)
self.assertFalse(subtitles)
class TestDailymotionSubtitles(BaseTestSubtitles):
url = 'http://www.dailymotion.com/video/xczg00'
IE = DailymotionIE
def test_no_writesubtitles(self):
subtitles = self.getSubtitles()
self.assertEqual(subtitles, None)
def test_subtitles(self):
self.DL.params['writesubtitles'] = True
subtitles = self.getSubtitles()
self.assertEqual(md5(subtitles['en']), '976553874490cba125086bbfea3ff76f')
def test_subtitles_lang(self):
self.DL.params['writesubtitles'] = True
self.DL.params['subtitleslangs'] = ['fr']
subtitles = self.getSubtitles()
self.assertEqual(md5(subtitles['fr']), '594564ec7d588942e384e920e5341792')
def test_allsubtitles(self):
self.DL.params['writesubtitles'] = True
self.DL.params['allsubtitles'] = True
subtitles = self.getSubtitles()
self.assertTrue(len(subtitles.keys()) >= 6)
def test_list_subtitles(self):
self.DL.expect_warning('Automatic Captions not supported by this server')
self.DL.params['listsubtitles'] = True
info_dict = self.getInfoDict()
self.assertEqual(info_dict, None)
def test_automatic_captions(self):
self.DL.expect_warning('Automatic Captions not supported by this server')
self.DL.params['writeautomaticsub'] = True
self.DL.params['subtitleslang'] = ['en']
subtitles = self.getSubtitles()
self.assertTrue(len(subtitles.keys()) == 0)
self.assertEqual(md5(subtitles['en']), '976553874490cba125086bbfea3ff76f')
self.assertEqual(md5(subtitles['fr']), '594564ec7d588942e384e920e5341792')
for lang in ['es', 'fr', 'de']:
self.assertTrue(subtitles.get(lang) is not None, 'Subtitles for \'%s\' not extracted' % lang)
def test_nosubtitles(self):
self.DL.expect_warning('video doesn\'t have subtitles')
@ -159,61 +124,21 @@ class TestDailymotionSubtitles(BaseTestSubtitles):
self.DL.params['writesubtitles'] = True
self.DL.params['allsubtitles'] = True
subtitles = self.getSubtitles()
self.assertEqual(len(subtitles), 0)
def test_multiple_langs(self):
self.DL.params['writesubtitles'] = True
langs = ['es', 'fr', 'de']
self.DL.params['subtitleslangs'] = langs
subtitles = self.getSubtitles()
for lang in langs:
self.assertTrue(subtitles.get(lang) is not None, 'Subtitles for \'%s\' not extracted' % lang)
self.assertFalse(subtitles)
class TestTedSubtitles(BaseTestSubtitles):
url = 'http://www.ted.com/talks/dan_dennett_on_our_consciousness.html'
IE = TEDIE
def test_no_writesubtitles(self):
subtitles = self.getSubtitles()
self.assertEqual(subtitles, None)
def test_subtitles(self):
self.DL.params['writesubtitles'] = True
subtitles = self.getSubtitles()
self.assertEqual(md5(subtitles['en']), '4262c1665ff928a2dada178f62cb8d14')
def test_subtitles_lang(self):
self.DL.params['writesubtitles'] = True
self.DL.params['subtitleslangs'] = ['fr']
subtitles = self.getSubtitles()
self.assertEqual(md5(subtitles['fr']), '66a63f7f42c97a50f8c0e90bc7797bb5')
def test_allsubtitles(self):
self.DL.params['writesubtitles'] = True
self.DL.params['allsubtitles'] = True
subtitles = self.getSubtitles()
self.assertTrue(len(subtitles.keys()) >= 28)
def test_list_subtitles(self):
self.DL.expect_warning('Automatic Captions not supported by this server')
self.DL.params['listsubtitles'] = True
info_dict = self.getInfoDict()
self.assertEqual(info_dict, None)
def test_automatic_captions(self):
self.DL.expect_warning('Automatic Captions not supported by this server')
self.DL.params['writeautomaticsub'] = True
self.DL.params['subtitleslang'] = ['en']
subtitles = self.getSubtitles()
self.assertTrue(len(subtitles.keys()) == 0)
def test_multiple_langs(self):
self.DL.params['writesubtitles'] = True
langs = ['es', 'fr', 'de']
self.DL.params['subtitleslangs'] = langs
subtitles = self.getSubtitles()
for lang in langs:
self.assertEqual(md5(subtitles['en']), '4262c1665ff928a2dada178f62cb8d14')
self.assertEqual(md5(subtitles['fr']), '66a63f7f42c97a50f8c0e90bc7797bb5')
for lang in ['es', 'fr', 'de']:
self.assertTrue(subtitles.get(lang) is not None, 'Subtitles for \'%s\' not extracted' % lang)
@ -221,14 +146,7 @@ class TestBlipTVSubtitles(BaseTestSubtitles):
url = 'http://blip.tv/a/a-6603250'
IE = BlipTVIE
def test_list_subtitles(self):
self.DL.expect_warning('Automatic Captions not supported by this server')
self.DL.params['listsubtitles'] = True
info_dict = self.getInfoDict()
self.assertEqual(info_dict, None)
def test_allsubtitles(self):
self.DL.expect_warning('Automatic Captions not supported by this server')
self.DL.params['writesubtitles'] = True
self.DL.params['allsubtitles'] = True
subtitles = self.getSubtitles()
@ -240,39 +158,13 @@ class TestVimeoSubtitles(BaseTestSubtitles):
url = 'http://vimeo.com/76979871'
IE = VimeoIE
def test_no_writesubtitles(self):
subtitles = self.getSubtitles()
self.assertEqual(subtitles, None)
def test_subtitles(self):
self.DL.params['writesubtitles'] = True
subtitles = self.getSubtitles()
self.assertEqual(md5(subtitles['en']), '8062383cf4dec168fc40a088aa6d5888')
def test_subtitles_lang(self):
self.DL.params['writesubtitles'] = True
self.DL.params['subtitleslangs'] = ['fr']
subtitles = self.getSubtitles()
self.assertEqual(md5(subtitles['fr']), 'b6191146a6c5d3a452244d853fde6dc8')
def test_allsubtitles(self):
self.DL.params['writesubtitles'] = True
self.DL.params['allsubtitles'] = True
subtitles = self.getSubtitles()
self.assertEqual(set(subtitles.keys()), set(['de', 'en', 'es', 'fr']))
def test_list_subtitles(self):
self.DL.expect_warning('Automatic Captions not supported by this server')
self.DL.params['listsubtitles'] = True
info_dict = self.getInfoDict()
self.assertEqual(info_dict, None)
def test_automatic_captions(self):
self.DL.expect_warning('Automatic Captions not supported by this server')
self.DL.params['writeautomaticsub'] = True
self.DL.params['subtitleslang'] = ['en']
subtitles = self.getSubtitles()
self.assertTrue(len(subtitles.keys()) == 0)
self.assertEqual(md5(subtitles['en']), '8062383cf4dec168fc40a088aa6d5888')
self.assertEqual(md5(subtitles['fr']), 'b6191146a6c5d3a452244d853fde6dc8')
def test_nosubtitles(self):
self.DL.expect_warning('video doesn\'t have subtitles')
@ -280,27 +172,13 @@ class TestVimeoSubtitles(BaseTestSubtitles):
self.DL.params['writesubtitles'] = True
self.DL.params['allsubtitles'] = True
subtitles = self.getSubtitles()
self.assertEqual(len(subtitles), 0)
def test_multiple_langs(self):
self.DL.params['writesubtitles'] = True
langs = ['es', 'fr', 'de']
self.DL.params['subtitleslangs'] = langs
subtitles = self.getSubtitles()
for lang in langs:
self.assertTrue(subtitles.get(lang) is not None, 'Subtitles for \'%s\' not extracted' % lang)
self.assertFalse(subtitles)
class TestWallaSubtitles(BaseTestSubtitles):
url = 'http://vod.walla.co.il/movie/2705958/the-yes-men'
IE = WallaIE
def test_list_subtitles(self):
self.DL.expect_warning('Automatic Captions not supported by this server')
self.DL.params['listsubtitles'] = True
info_dict = self.getInfoDict()
self.assertEqual(info_dict, None)
def test_allsubtitles(self):
self.DL.expect_warning('Automatic Captions not supported by this server')
self.DL.params['writesubtitles'] = True
@ -315,19 +193,13 @@ class TestWallaSubtitles(BaseTestSubtitles):
self.DL.params['writesubtitles'] = True
self.DL.params['allsubtitles'] = True
subtitles = self.getSubtitles()
self.assertEqual(len(subtitles), 0)
self.assertFalse(subtitles)
class TestCeskaTelevizeSubtitles(BaseTestSubtitles):
url = 'http://www.ceskatelevize.cz/ivysilani/10600540290-u6-uzasny-svet-techniky'
IE = CeskaTelevizeIE
def test_list_subtitles(self):
self.DL.expect_warning('Automatic Captions not supported by this server')
self.DL.params['listsubtitles'] = True
info_dict = self.getInfoDict()
self.assertEqual(info_dict, None)
def test_allsubtitles(self):
self.DL.expect_warning('Automatic Captions not supported by this server')
self.DL.params['writesubtitles'] = True
@ -342,7 +214,110 @@ class TestCeskaTelevizeSubtitles(BaseTestSubtitles):
self.DL.params['writesubtitles'] = True
self.DL.params['allsubtitles'] = True
subtitles = self.getSubtitles()
self.assertEqual(len(subtitles), 0)
self.assertFalse(subtitles)
class TestLyndaSubtitles(BaseTestSubtitles):
url = 'http://www.lynda.com/Bootstrap-tutorials/Using-exercise-files/110885/114408-4.html'
IE = LyndaIE
def test_allsubtitles(self):
self.DL.params['writesubtitles'] = True
self.DL.params['allsubtitles'] = True
subtitles = self.getSubtitles()
self.assertEqual(set(subtitles.keys()), set(['en']))
self.assertEqual(md5(subtitles['en']), '09bbe67222259bed60deaa26997d73a7')
class TestNPOSubtitles(BaseTestSubtitles):
url = 'http://www.npo.nl/nos-journaal/28-08-2014/POW_00722860'
IE = NPOIE
def test_allsubtitles(self):
self.DL.params['writesubtitles'] = True
self.DL.params['allsubtitles'] = True
subtitles = self.getSubtitles()
self.assertEqual(set(subtitles.keys()), set(['nl']))
self.assertEqual(md5(subtitles['nl']), 'fc6435027572b63fb4ab143abd5ad3f4')
class TestMTVSubtitles(BaseTestSubtitles):
url = 'http://www.cc.com/video-clips/kllhuv/stand-up-greg-fitzsimmons--uncensored---too-good-of-a-mother'
IE = ComedyCentralIE
def getInfoDict(self):
return super(TestMTVSubtitles, self).getInfoDict()['entries'][0]
def test_allsubtitles(self):
self.DL.params['writesubtitles'] = True
self.DL.params['allsubtitles'] = True
subtitles = self.getSubtitles()
self.assertEqual(set(subtitles.keys()), set(['en']))
self.assertEqual(md5(subtitles['en']), 'b9f6ca22a6acf597ec76f61749765e65')
class TestNRKSubtitles(BaseTestSubtitles):
url = 'http://tv.nrk.no/serie/ikke-gjoer-dette-hjemme/DMPV73000411/sesong-2/episode-1'
IE = NRKTVIE
def test_allsubtitles(self):
self.DL.params['writesubtitles'] = True
self.DL.params['allsubtitles'] = True
subtitles = self.getSubtitles()
self.assertEqual(set(subtitles.keys()), set(['no']))
self.assertEqual(md5(subtitles['no']), '1d221e6458c95c5494dcd38e6a1f129a')
class TestRaiSubtitles(BaseTestSubtitles):
url = 'http://www.rai.tv/dl/RaiTV/programmi/media/ContentItem-cb27157f-9dd0-4aee-b788-b1f67643a391.html'
IE = RaiIE
def test_allsubtitles(self):
self.DL.params['writesubtitles'] = True
self.DL.params['allsubtitles'] = True
subtitles = self.getSubtitles()
self.assertEqual(set(subtitles.keys()), set(['it']))
self.assertEqual(md5(subtitles['it']), 'b1d90a98755126b61e667567a1f6680a')
class TestVikiSubtitles(BaseTestSubtitles):
url = 'http://www.viki.com/videos/1060846v-punch-episode-18'
IE = VikiIE
def test_allsubtitles(self):
self.DL.params['writesubtitles'] = True
self.DL.params['allsubtitles'] = True
subtitles = self.getSubtitles()
self.assertEqual(set(subtitles.keys()), set(['en']))
self.assertEqual(md5(subtitles['en']), '53cb083a5914b2d84ef1ab67b880d18a')
class TestThePlatformSubtitles(BaseTestSubtitles):
# from http://www.3playmedia.com/services-features/tools/integrations/theplatform/
# (see http://theplatform.com/about/partners/type/subtitles-closed-captioning/)
url = 'theplatform:JFUjUE1_ehvq'
IE = ThePlatformIE
def test_allsubtitles(self):
self.DL.params['writesubtitles'] = True
self.DL.params['allsubtitles'] = True
subtitles = self.getSubtitles()
self.assertEqual(set(subtitles.keys()), set(['en']))
self.assertEqual(md5(subtitles['en']), '97e7670cbae3c4d26ae8bcc7fdd78d4b')
class TestRtveSubtitles(BaseTestSubtitles):
url = 'http://www.rtve.es/alacarta/videos/los-misterios-de-laura/misterios-laura-capitulo-32-misterio-del-numero-17-2-parte/2428621/'
IE = RTVEALaCartaIE
def test_allsubtitles(self):
print('Skipping, only available from Spain')
return
self.DL.params['writesubtitles'] = True
self.DL.params['allsubtitles'] = True
subtitles = self.getSubtitles()
self.assertEqual(set(subtitles.keys()), set(['es']))
self.assertEqual(md5(subtitles['es']), '69e70cae2d40574fb7316f31d6eb7fca')
if __name__ == '__main__':

View File

@ -34,8 +34,8 @@ def _make_testfunc(testfile):
def test_func(self):
as_file = os.path.join(TEST_DIR, testfile)
swf_file = os.path.join(TEST_DIR, test_id + '.swf')
if ((not os.path.exists(swf_file))
or os.path.getmtime(swf_file) < os.path.getmtime(as_file)):
if ((not os.path.exists(swf_file)) or
os.path.getmtime(swf_file) < os.path.getmtime(as_file)):
# Recompile
try:
subprocess.check_call([

View File

@ -85,6 +85,8 @@ class TestUtil(unittest.TestCase):
self.assertEqual(
sanitize_filename('New World record at 0:12:34'),
'New World record at 0_12_34')
self.assertEqual(sanitize_filename('--gasdgf'), '_-gasdgf')
self.assertEqual(sanitize_filename('--gasdgf', is_id=True), '--gasdgf')
forbidden = '"\0\\/'
for fc in forbidden:
@ -244,6 +246,7 @@ class TestUtil(unittest.TestCase):
self.assertEqual(parse_duration('2.5 hours'), 9000)
self.assertEqual(parse_duration('02:03:04'), 7384)
self.assertEqual(parse_duration('01:02:03:04'), 93784)
self.assertEqual(parse_duration('1 hour 3 minutes'), 3780)
def test_fix_xml_ampersands(self):
self.assertEqual(

View File

@ -28,6 +28,7 @@ from .compat import (
compat_basestring,
compat_cookiejar,
compat_expanduser,
compat_get_terminal_size,
compat_http_client,
compat_kwargs,
compat_str,
@ -46,7 +47,6 @@ from .utils import (
ExtractorError,
format_bytes,
formatSeconds,
get_term_width,
locked_file,
make_HTTPS_handler,
MaxDownloadsReached,
@ -155,7 +155,7 @@ class YoutubeDL(object):
allsubtitles: Downloads all the subtitles of the video
(requires writesubtitles or writeautomaticsub)
listsubtitles: Lists all available subtitles for the video
subtitlesformat: Subtitle format [srt/sbv/vtt] (default=srt)
subtitlesformat: The format code for subtitles
subtitleslangs: List of languages of the subtitles to download
keepvideo: Keep the video file after post-processing
daterange: A DateRange object, download only if the upload_date is in the range.
@ -285,7 +285,7 @@ class YoutubeDL(object):
try:
import pty
master, slave = pty.openpty()
width = get_term_width()
width = compat_get_terminal_size().columns
if width is None:
width_args = []
else:
@ -309,8 +309,8 @@ class YoutubeDL(object):
raise
if (sys.version_info >= (3,) and sys.platform != 'win32' and
sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
and not params.get('restrictfilenames', False)):
sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968'] and
not params.get('restrictfilenames', False)):
# On Python 3, the Unicode filesystem API will throw errors (#1474)
self.report_warning(
'Assuming --restrict-filenames since file system encoding '
@ -1009,6 +1009,15 @@ class YoutubeDL(object):
info_dict['timestamp'])
info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
if self.params.get('listsubtitles', False):
if 'automatic_captions' in info_dict:
self.list_subtitles(info_dict['id'], info_dict.get('automatic_captions'), 'automatic captions')
self.list_subtitles(info_dict['id'], info_dict.get('subtitles'), 'subtitles')
return
info_dict['requested_subtitles'] = self.process_subtitles(
info_dict['id'], info_dict.get('subtitles'),
info_dict.get('automatic_captions'))
# This extractors handle format selection themselves
if info_dict['extractor'] in ['Youku']:
if download:
@ -1146,6 +1155,55 @@ class YoutubeDL(object):
info_dict.update(formats_to_download[-1])
return info_dict
def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
"""Select the requested subtitles and their format"""
available_subs = {}
if normal_subtitles and self.params.get('writesubtitles'):
available_subs.update(normal_subtitles)
if automatic_captions and self.params.get('writeautomaticsub'):
for lang, cap_info in automatic_captions.items():
if lang not in available_subs:
available_subs[lang] = cap_info
if (not self.params.get('writesubtitles') and not
self.params.get('writeautomaticsub') or not
available_subs):
return None
if self.params.get('allsubtitles', False):
requested_langs = available_subs.keys()
else:
if self.params.get('subtitleslangs', False):
requested_langs = self.params.get('subtitleslangs')
elif 'en' in available_subs:
requested_langs = ['en']
else:
requested_langs = [list(available_subs.keys())[0]]
formats_query = self.params.get('subtitlesformat', 'best')
formats_preference = formats_query.split('/') if formats_query else []
subs = {}
for lang in requested_langs:
formats = available_subs.get(lang)
if formats is None:
self.report_warning('%s subtitles not available for %s' % (lang, video_id))
continue
for ext in formats_preference:
if ext == 'best':
f = formats[-1]
break
matches = list(filter(lambda f: f['ext'] == ext, formats))
if matches:
f = matches[-1]
break
else:
f = formats[-1]
self.report_warning(
'No subtitle format found matching "%s" for language %s, '
'using %s' % (formats_query, lang, f['ext']))
subs[lang] = f
return subs
def process_info(self, info_dict):
"""Process a single resolved IE result."""
@ -1248,15 +1306,23 @@ class YoutubeDL(object):
subtitles_are_requested = any([self.params.get('writesubtitles', False),
self.params.get('writeautomaticsub')])
if subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']:
if subtitles_are_requested and info_dict.get('requested_subtitles'):
# subtitles download errors are already managed as troubles in relevant IE
# that way it will silently go on when used with unsupporting IE
subtitles = info_dict['subtitles']
sub_format = self.params.get('subtitlesformat', 'srt')
for sub_lang in subtitles.keys():
sub = subtitles[sub_lang]
if sub is None:
continue
subtitles = info_dict['requested_subtitles']
ie = self.get_info_extractor(info_dict['extractor_key'])
for sub_lang, sub_info in subtitles.items():
sub_format = sub_info['ext']
if sub_info.get('data') is not None:
sub_data = sub_info['data']
else:
try:
sub_data = ie._download_webpage(
sub_info['url'], info_dict['id'], note=False)
except ExtractorError as err:
self.report_warning('Unable to download subtitle for "%s": %s' %
(sub_lang, compat_str(err.cause)))
continue
try:
sub_filename = subtitles_filename(filename, sub_lang, sub_format)
if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
@ -1264,7 +1330,7 @@ class YoutubeDL(object):
else:
self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
subfile.write(sub)
subfile.write(sub_data)
except (OSError, IOError):
self.report_error('Cannot write subtitles file ' + sub_filename)
return
@ -1395,8 +1461,8 @@ class YoutubeDL(object):
"""Download a given list of URLs."""
outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
if (len(url_list) > 1 and
'%' not in outtmpl
and self.params.get('max_downloads') != 1):
'%' not in outtmpl and
self.params.get('max_downloads') != 1):
raise SameFileError(outtmpl)
for url in url_list:
@ -1593,6 +1659,17 @@ class YoutubeDL(object):
['ID', 'width', 'height', 'URL'],
[[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
def list_subtitles(self, video_id, subtitles, name='subtitles'):
if not subtitles:
self.to_screen('%s has no %s' % (video_id, name))
return
self.to_screen(
'Available %s for %s:' % (name, video_id))
self.to_screen(render_table(
['Language', 'formats'],
[[lang, ', '.join(f['ext'] for f in reversed(formats))]
for lang, formats in subtitles.items()]))
def urlopen(self, req):
""" Start an HTTP download """

View File

@ -170,6 +170,9 @@ def _real_main(argv=None):
if opts.recodevideo is not None:
if opts.recodevideo not in ['mp4', 'flv', 'webm', 'ogg', 'mkv']:
parser.error('invalid video recode format specified')
if opts.convertsubtitles is not None:
if opts.convertsubtitles not in ['srt', 'vtt', 'ass']:
parser.error('invalid subtitle format specified')
if opts.date is not None:
date = DateRange.day(opts.date)
@ -189,14 +192,14 @@ def _real_main(argv=None):
# In Python 2, sys.argv is a bytestring (also note http://bugs.python.org/issue2128 for Windows systems)
if opts.outtmpl is not None:
opts.outtmpl = opts.outtmpl.decode(preferredencoding())
outtmpl = ((opts.outtmpl is not None and opts.outtmpl)
or (opts.format == '-1' and opts.usetitle and '%(title)s-%(id)s-%(format)s.%(ext)s')
or (opts.format == '-1' and '%(id)s-%(format)s.%(ext)s')
or (opts.usetitle and opts.autonumber and '%(autonumber)s-%(title)s-%(id)s.%(ext)s')
or (opts.usetitle and '%(title)s-%(id)s.%(ext)s')
or (opts.useid and '%(id)s.%(ext)s')
or (opts.autonumber and '%(autonumber)s-%(id)s.%(ext)s')
or DEFAULT_OUTTMPL)
outtmpl = ((opts.outtmpl is not None and opts.outtmpl) or
(opts.format == '-1' and opts.usetitle and '%(title)s-%(id)s-%(format)s.%(ext)s') or
(opts.format == '-1' and '%(id)s-%(format)s.%(ext)s') or
(opts.usetitle and opts.autonumber and '%(autonumber)s-%(title)s-%(id)s.%(ext)s') or
(opts.usetitle and '%(title)s-%(id)s.%(ext)s') or
(opts.useid and '%(id)s.%(ext)s') or
(opts.autonumber and '%(autonumber)s-%(id)s.%(ext)s') or
DEFAULT_OUTTMPL)
if not os.path.splitext(outtmpl)[1] and opts.extractaudio:
parser.error('Cannot download a video and extract audio into the same'
' file! Use "{0}.%(ext)s" instead of "{0}" as the output'
@ -223,10 +226,14 @@ def _real_main(argv=None):
'key': 'FFmpegVideoConvertor',
'preferedformat': opts.recodevideo,
})
if opts.convertsubtitles:
postprocessors.append({
'key': 'FFmpegSubtitlesConvertor',
'format': opts.convertsubtitles,
})
if opts.embedsubtitles:
postprocessors.append({
'key': 'FFmpegEmbedSubtitle',
'subtitlesformat': opts.subtitlesformat,
})
if opts.xattrs:
postprocessors.append({'key': 'XAttrMetadata'})

View File

@ -1,9 +1,11 @@
from __future__ import unicode_literals
import collections
import getpass
import optparse
import os
import re
import shutil
import socket
import subprocess
import sys
@ -364,6 +366,33 @@ def workaround_optparse_bug9161():
return real_add_option(self, *bargs, **bkwargs)
optparse.OptionGroup.add_option = _compat_add_option
if hasattr(shutil, 'get_terminal_size'): # Python >= 3.3
compat_get_terminal_size = shutil.get_terminal_size
else:
_terminal_size = collections.namedtuple('terminal_size', ['columns', 'lines'])
def compat_get_terminal_size():
columns = compat_getenv('COLUMNS', None)
if columns:
columns = int(columns)
else:
columns = None
lines = compat_getenv('LINES', None)
if lines:
lines = int(lines)
else:
lines = None
try:
sp = subprocess.Popen(
['stty', 'size'],
stdout=subprocess.PIPE, stderr=subprocess.PIPE)
out, err = sp.communicate()
lines, columns = map(int, out.split())
except:
pass
return _terminal_size(columns, lines)
__all__ = [
'compat_HTTPError',
@ -371,6 +400,7 @@ __all__ = [
'compat_chr',
'compat_cookiejar',
'compat_expanduser',
'compat_get_terminal_size',
'compat_getenv',
'compat_getpass',
'compat_html_entities',

View File

@ -311,14 +311,14 @@ class FileDownloader(object):
"""
nooverwrites_and_exists = (
self.params.get('nooverwrites', False)
and os.path.exists(encodeFilename(filename))
self.params.get('nooverwrites', False) and
os.path.exists(encodeFilename(filename))
)
continuedl_and_exists = (
self.params.get('continuedl', False)
and os.path.isfile(encodeFilename(filename))
and not self.params.get('nopart', False)
self.params.get('continuedl', False) and
os.path.isfile(encodeFilename(filename)) and
not self.params.get('nopart', False)
)
# Check file already present

View File

@ -11,6 +11,7 @@ from .common import FileDownloader
from .http import HttpFD
from ..compat import (
compat_urlparse,
compat_urllib_error,
)
from ..utils import (
struct_pack,
@ -121,7 +122,8 @@ class FlvReader(io.BytesIO):
self.read_unsigned_int() # BootstrapinfoVersion
# Profile,Live,Update,Reserved
self.read(1)
flags = self.read_unsigned_char()
live = flags & 0x20 != 0
# time scale
self.read_unsigned_int()
# CurrentMediaTime
@ -160,6 +162,7 @@ class FlvReader(io.BytesIO):
return {
'segments': segments,
'fragments': fragments,
'live': live,
}
def read_bootstrap_info(self):
@ -182,6 +185,10 @@ def build_fragments_list(boot_info):
for segment, fragments_count in segment_run_table['segment_run']:
for _ in range(fragments_count):
res.append((segment, next(fragments_counter)))
if boot_info['live']:
res = res[-2:]
return res
@ -246,6 +253,38 @@ class F4mFD(FileDownloader):
self.report_error('Unsupported DRM')
return media
def _get_bootstrap_from_url(self, bootstrap_url):
bootstrap = self.ydl.urlopen(bootstrap_url).read()
return read_bootstrap_info(bootstrap)
def _update_live_fragments(self, bootstrap_url, latest_fragment):
fragments_list = []
retries = 30
while (not fragments_list) and (retries > 0):
boot_info = self._get_bootstrap_from_url(bootstrap_url)
fragments_list = build_fragments_list(boot_info)
fragments_list = [f for f in fragments_list if f[1] > latest_fragment]
if not fragments_list:
# Retry after a while
time.sleep(5.0)
retries -= 1
if not fragments_list:
self.report_error('Failed to update fragments')
return fragments_list
def _parse_bootstrap_node(self, node, base_url):
if node.text is None:
bootstrap_url = compat_urlparse.urljoin(
base_url, node.attrib['url'])
boot_info = self._get_bootstrap_from_url(bootstrap_url)
else:
bootstrap_url = None
bootstrap = base64.b64decode(node.text)
boot_info = read_bootstrap_info(bootstrap)
return (boot_info, bootstrap_url)
def real_download(self, filename, info_dict):
man_url = info_dict['url']
requested_bitrate = info_dict.get('tbr')
@ -265,18 +304,13 @@ class F4mFD(FileDownloader):
base_url = compat_urlparse.urljoin(man_url, media.attrib['url'])
bootstrap_node = doc.find(_add_ns('bootstrapInfo'))
if bootstrap_node.text is None:
bootstrap_url = compat_urlparse.urljoin(
base_url, bootstrap_node.attrib['url'])
bootstrap = self.ydl.urlopen(bootstrap_url).read()
else:
bootstrap = base64.b64decode(bootstrap_node.text)
boot_info, bootstrap_url = self._parse_bootstrap_node(bootstrap_node, base_url)
live = boot_info['live']
metadata_node = media.find(_add_ns('metadata'))
if metadata_node is not None:
metadata = base64.b64decode(metadata_node.text)
else:
metadata = None
boot_info = read_bootstrap_info(bootstrap)
fragments_list = build_fragments_list(boot_info)
if self.params.get('test', False):
@ -301,7 +335,8 @@ class F4mFD(FileDownloader):
(dest_stream, tmpfilename) = sanitize_open(tmpfilename, 'wb')
write_flv_header(dest_stream)
write_metadata_tag(dest_stream, metadata)
if not live:
write_metadata_tag(dest_stream, metadata)
# This dict stores the download progress, it's updated by the progress
# hook
@ -325,8 +360,8 @@ class F4mFD(FileDownloader):
state['frag_index'] += 1
estimated_size = (
(state['downloaded_bytes'] + frag_total_bytes)
/ (state['frag_index'] + 1) * total_frags)
(state['downloaded_bytes'] + frag_total_bytes) /
(state['frag_index'] + 1) * total_frags)
time_now = time.time()
state['total_bytes_estimate'] = estimated_size
state['elapsed'] = time_now - start
@ -348,24 +383,45 @@ class F4mFD(FileDownloader):
http_dl.add_progress_hook(frag_progress_hook)
frags_filenames = []
for (seg_i, frag_i) in fragments_list:
while fragments_list:
seg_i, frag_i = fragments_list.pop(0)
name = 'Seg%d-Frag%d' % (seg_i, frag_i)
url = base_url + name
if akamai_pv:
url += '?' + akamai_pv.strip(';')
frag_filename = '%s-%s' % (tmpfilename, name)
success = http_dl.download(frag_filename, {'url': url})
if not success:
return False
with open(frag_filename, 'rb') as down:
down_data = down.read()
reader = FlvReader(down_data)
while True:
_, box_type, box_data = reader.read_box_info()
if box_type == b'mdat':
dest_stream.write(box_data)
break
frags_filenames.append(frag_filename)
try:
success = http_dl.download(frag_filename, {'url': url})
if not success:
return False
with open(frag_filename, 'rb') as down:
down_data = down.read()
reader = FlvReader(down_data)
while True:
_, box_type, box_data = reader.read_box_info()
if box_type == b'mdat':
dest_stream.write(box_data)
break
if live:
os.remove(frag_filename)
else:
frags_filenames.append(frag_filename)
except (compat_urllib_error.HTTPError, ) as err:
if live and (err.code == 404 or err.code == 410):
# We didn't keep up with the live window. Continue
# with the next available fragment.
msg = 'Fragment %d unavailable' % frag_i
self.report_warning(msg)
fragments_list = []
else:
raise
if not fragments_list and live and bootstrap_url:
fragments_list = self._update_live_fragments(bootstrap_url, frag_i)
total_frags += len(fragments_list)
if fragments_list and (fragments_list[0][1] > frag_i + 1):
msg = 'Missed %d fragments' % (fragments_list[0][1] - (frag_i + 1))
self.report_warning(msg)
dest_stream.close()

View File

@ -119,7 +119,9 @@ class RtmpFD(FileDownloader):
# Download using rtmpdump. rtmpdump returns exit code 2 when
# the connection was interrumpted and resuming appears to be
# possible. This is part of rtmpdump's normal usage, AFAIK.
basic_args = ['rtmpdump', '--verbose', '-r', url, '-o', tmpfilename]
basic_args = [
'rtmpdump', '--verbose', '-r', url,
'-o', encodeFilename(tmpfilename, True)]
if player_url is not None:
basic_args += ['--swfVfy', player_url]
if page_url is not None:

View File

@ -8,6 +8,7 @@ from .adobetv import AdobeTVIE
from .adultswim import AdultSwimIE
from .aftenposten import AftenpostenIE
from .aftonbladet import AftonbladetIE
from .airmozilla import AirMozillaIE
from .aljazeera import AlJazeeraIE
from .alphaporno import AlphaPornoIE
from .anitube import AnitubeIE
@ -63,6 +64,10 @@ from .ccc import CCCIE
from .ceskatelevize import CeskaTelevizeIE
from .channel9 import Channel9IE
from .chilloutzone import ChilloutzoneIE
from .chirbit import (
ChirbitIE,
ChirbitProfileIE,
)
from .cinchcast import CinchcastIE
from .clipfish import ClipfishIE
from .cliphunter import CliphunterIE
@ -222,6 +227,7 @@ from .jeuxvideo import JeuxVideoIE
from .jove import JoveIE
from .jukebox import JukeboxIE
from .jpopsukitv import JpopsukiIE
from .kaltura import KalturaIE
from .kankan import KankanIE
from .karaoketv import KaraoketvIE
from .keezmovies import KeezMoviesIE
@ -233,6 +239,11 @@ from .krasview import KrasViewIE
from .ku6 import Ku6IE
from .la7 import LA7IE
from .laola1tv import Laola1TvIE
from .letv import (
LetvIE,
LetvTvIE,
LetvPlaylistIE
)
from .lifenews import LifeNewsIE
from .liveleak import LiveLeakIE
from .livestream import (
@ -335,6 +346,7 @@ from .ntvde import NTVDeIE
from .ntvru import NTVRuIE
from .nytimes import NYTimesIE
from .nuvid import NuvidIE
from .odnoklassniki import OdnoklassnikiIE
from .oktoberfesttv import OktoberfestTVIE
from .ooyala import OoyalaIE
from .openfilm import OpenFilmIE
@ -362,8 +374,10 @@ from .pornotube import PornotubeIE
from .pornoxo import PornoXOIE
from .promptfile import PromptFileIE
from .prosiebensat1 import ProSiebenSat1IE
from .puls4 import Puls4IE
from .pyvideo import PyvideoIE
from .quickvid import QuickVidIE
from .r7 import R7IE
from .radiode import RadioDeIE
from .radiobremen import RadioBremenIE
from .radiofrance import RadioFranceIE
@ -424,7 +438,10 @@ from .soundcloud import (
SoundcloudUserIE,
SoundcloudPlaylistIE
)
from .soundgasm import SoundgasmIE
from .soundgasm import (
SoundgasmIE,
SoundgasmProfileIE
)
from .southpark import (
SouthParkIE,
SouthparkDeIE,
@ -612,6 +629,7 @@ from .youtube import (
YoutubeUserIE,
YoutubeWatchLaterIE,
)
from .zapiks import ZapiksIE
from .zdf import ZDFIE, ZDFChannelIE
from .zingmp3 import (
ZingMp3SongIE,

View File

@ -28,7 +28,6 @@ class AdobeTVIE(InfoExtractor):
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
player = self._parse_json(
@ -44,8 +43,10 @@ class AdobeTVIE(InfoExtractor):
self._html_search_meta('datepublished', webpage, 'upload date'))
duration = parse_duration(
self._html_search_meta('duration', webpage, 'duration')
or self._search_regex(r'Runtime:\s*(\d{2}:\d{2}:\d{2})', webpage, 'duration'))
self._html_search_meta('duration', webpage, 'duration') or
self._search_regex(
r'Runtime:\s*(\d{2}:\d{2}:\d{2})',
webpage, 'duration', fatal=False))
view_count = str_to_int(self._search_regex(
r'<div class="views">\s*Views?:\s*([\d,.]+)\s*</div>',

View File

@ -0,0 +1,74 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
int_or_none,
parse_duration,
parse_iso8601,
)
class AirMozillaIE(InfoExtractor):
_VALID_URL = r'https?://air\.mozilla\.org/(?P<id>[0-9a-z-]+)/?'
_TEST = {
'url': 'https://air.mozilla.org/privacy-lab-a-meetup-for-privacy-minded-people-in-san-francisco/',
'md5': '2e3e7486ba5d180e829d453875b9b8bf',
'info_dict': {
'id': '6x4q2w',
'ext': 'mp4',
'title': 'Privacy Lab - a meetup for privacy minded people in San Francisco',
'thumbnail': 're:https://\w+\.cloudfront\.net/6x4q2w/poster\.jpg\?t=\d+',
'description': 'Brings together privacy professionals and others interested in privacy at for-profits, non-profits, and NGOs in an effort to contribute to the state of the ecosystem...',
'timestamp': 1422487800,
'upload_date': '20150128',
'location': 'SFO Commons',
'duration': 3780,
'view_count': int,
'categories': ['Main'],
}
}
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
video_id = self._html_search_regex(r'//vid.ly/(.*?)/embed', webpage, 'id')
embed_script = self._download_webpage('https://vid.ly/{0}/embed'.format(video_id), video_id)
jwconfig = self._search_regex(r'\svar jwconfig = (\{.*?\});\s', embed_script, 'metadata')
metadata = self._parse_json(jwconfig, video_id)
formats = [{
'url': source['file'],
'ext': source['type'],
'format_id': self._search_regex(r'&format=(.*)$', source['file'], 'video format'),
'format': source['label'],
'height': int(source['label'].rstrip('p')),
} for source in metadata['playlist'][0]['sources']]
self._sort_formats(formats)
view_count = int_or_none(self._html_search_regex(
r'Views since archived: ([0-9]+)',
webpage, 'view count', fatal=False))
timestamp = parse_iso8601(self._html_search_regex(
r'<time datetime="(.*?)"', webpage, 'timestamp', fatal=False))
duration = parse_duration(self._search_regex(
r'Duration:\s*(\d+\s*hours?\s*\d+\s*minutes?)',
webpage, 'duration', fatal=False))
return {
'id': video_id,
'title': self._og_search_title(webpage),
'formats': formats,
'url': self._og_search_url(webpage),
'display_id': display_id,
'thumbnail': metadata['playlist'][0].get('image'),
'description': self._og_search_description(webpage),
'timestamp': timestamp,
'location': self._html_search_regex(r'Location: (.*)', webpage, 'location', default=None),
'duration': duration,
'view_count': view_count,
'categories': re.findall(r'<a href=".*?" class="channel">(.*?)</a>', webpage),
}

View File

@ -11,8 +11,8 @@ from ..utils import (
class AppleTrailersIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?trailers\.apple\.com/trailers/(?P<company>[^/]+)/(?P<movie>[^/]+)'
_TEST = {
_VALID_URL = r'https?://(?:www\.)?trailers\.apple\.com/(?:trailers|ca)/(?P<company>[^/]+)/(?P<movie>[^/]+)'
_TESTS = [{
"url": "http://trailers.apple.com/trailers/wb/manofsteel/",
'info_dict': {
'id': 'manofsteel',
@ -63,7 +63,10 @@ class AppleTrailersIE(InfoExtractor):
},
},
]
}
}, {
'url': 'http://trailers.apple.com/ca/metropole/autrui/',
'only_matching': True,
}]
_JSON_RE = r'iTunes.playURL\((.*?)\);'

View File

@ -3,7 +3,7 @@ from __future__ import unicode_literals
import time
import hmac
from .subtitles import SubtitlesInfoExtractor
from .common import InfoExtractor
from ..compat import (
compat_str,
compat_urllib_parse,
@ -17,7 +17,7 @@ from ..utils import (
)
class AtresPlayerIE(SubtitlesInfoExtractor):
class AtresPlayerIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?atresplayer\.com/television/[^/]+/[^/]+/[^/]+/(?P<id>.+?)_\d+\.html'
_TESTS = [
{
@ -144,13 +144,12 @@ class AtresPlayerIE(SubtitlesInfoExtractor):
thumbnail = xpath_text(episode, './media/asset/files/background', 'thumbnail')
subtitles = {}
subtitle = xpath_text(episode, './media/asset/files/subtitle', 'subtitle')
if subtitle:
subtitles['es'] = subtitle
if self._downloader.params.get('listsubtitles', False):
self._list_available_subtitles(video_id, subtitles)
return
subtitle_url = xpath_text(episode, './media/asset/files/subtitle', 'subtitle')
if subtitle_url:
subtitles['es'] = [{
'ext': 'srt',
'url': subtitle_url,
}]
return {
'id': video_id,
@ -159,5 +158,5 @@ class AtresPlayerIE(SubtitlesInfoExtractor):
'thumbnail': thumbnail,
'duration': duration,
'formats': formats,
'subtitles': self.extract_subtitles(video_id, subtitles),
'subtitles': subtitles,
}

View File

@ -2,12 +2,12 @@ from __future__ import unicode_literals
import xml.etree.ElementTree
from .subtitles import SubtitlesInfoExtractor
from .common import InfoExtractor
from ..utils import ExtractorError
from ..compat import compat_HTTPError
class BBCCoUkIE(SubtitlesInfoExtractor):
class BBCCoUkIE(InfoExtractor):
IE_NAME = 'bbc.co.uk'
IE_DESC = 'BBC iPlayer'
_VALID_URL = r'https?://(?:www\.)?bbc\.co\.uk/(?:(?:(?:programmes|iplayer(?:/[^/]+)?/(?:episode|playlist))/)|music/clips[/#])(?P<id>[\da-z]{8})'
@ -215,17 +215,32 @@ class BBCCoUkIE(SubtitlesInfoExtractor):
formats.extend(conn_formats)
return formats
def _extract_captions(self, media, programme_id):
def _get_subtitles(self, media, programme_id):
subtitles = {}
for connection in self._extract_connections(media):
captions = self._download_xml(connection.get('href'), programme_id, 'Downloading captions')
lang = captions.get('{http://www.w3.org/XML/1998/namespace}lang', 'en')
ps = captions.findall('./{0}body/{0}div/{0}p'.format('{http://www.w3.org/2006/10/ttaf1}'))
srt = ''
def _extract_text(p):
if p.text is not None:
stripped_text = p.text.strip()
if stripped_text:
return stripped_text
return ' '.join(span.text.strip() for span in p.findall('{http://www.w3.org/2006/10/ttaf1}span'))
for pos, p in enumerate(ps):
srt += '%s\r\n%s --> %s\r\n%s\r\n\r\n' % (str(pos), p.get('begin'), p.get('end'),
p.text.strip() if p.text is not None else '')
subtitles[lang] = srt
srt += '%s\r\n%s --> %s\r\n%s\r\n\r\n' % (str(pos), p.get('begin'), p.get('end'), _extract_text(p))
subtitles[lang] = [
{
'url': connection.get('href'),
'ext': 'ttml',
},
{
'data': srt,
'ext': 'srt',
},
]
return subtitles
def _download_media_selector(self, programme_id):
@ -249,7 +264,7 @@ class BBCCoUkIE(SubtitlesInfoExtractor):
elif kind == 'video':
formats.extend(self._extract_video(media, programme_id))
elif kind == 'captions':
subtitles = self._extract_captions(media, programme_id)
subtitles = self.extract_subtitles(media, programme_id)
return formats, subtitles
@ -324,10 +339,6 @@ class BBCCoUkIE(SubtitlesInfoExtractor):
else:
programme_id, title, description, duration, formats, subtitles = self._download_playlist(group_id)
if self._downloader.params.get('listsubtitles', False):
self._list_available_subtitles(programme_id, subtitles)
return
self._sort_formats(formats)
return {

View File

@ -3,7 +3,6 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
from .subtitles import SubtitlesInfoExtractor
from ..compat import (
compat_str,
@ -18,7 +17,7 @@ from ..utils import (
)
class BlipTVIE(SubtitlesInfoExtractor):
class BlipTVIE(InfoExtractor):
_VALID_URL = r'https?://(?:\w+\.)?blip\.tv/(?:(?:.+-|rss/flash/)(?P<id>\d+)|((?:play/|api\.swf#)(?P<lookup_id>[\da-zA-Z+_]+)))'
_TESTS = [
@ -143,7 +142,7 @@ class BlipTVIE(SubtitlesInfoExtractor):
categories = [category.text for category in item.findall('category')]
formats = []
subtitles = {}
subtitles_urls = {}
media_group = item.find(media('group'))
for media_content in media_group.findall(media('content')):
@ -161,7 +160,7 @@ class BlipTVIE(SubtitlesInfoExtractor):
}
lang = role.rpartition('-')[-1].strip().lower()
langcode = LANGS.get(lang, lang)
subtitles[langcode] = url
subtitles_urls[langcode] = url
elif media_type.startswith('video/'):
formats.append({
'url': real_url,
@ -175,11 +174,7 @@ class BlipTVIE(SubtitlesInfoExtractor):
})
self._sort_formats(formats)
# subtitles
video_subtitles = self.extract_subtitles(video_id, subtitles)
if self._downloader.params.get('listsubtitles', False):
self._list_available_subtitles(video_id, subtitles)
return
subtitles = self.extract_subtitles(video_id, subtitles_urls)
return {
'id': video_id,
@ -192,15 +187,22 @@ class BlipTVIE(SubtitlesInfoExtractor):
'thumbnail': thumbnail,
'categories': categories,
'formats': formats,
'subtitles': video_subtitles,
'subtitles': subtitles,
}
def _download_subtitle_url(self, sub_lang, url):
# For some weird reason, blip.tv serves a video instead of subtitles
# when we request with a common UA
req = compat_urllib_request.Request(url)
req.add_header('User-Agent', 'youtube-dl')
return self._download_webpage(req, None, note=False)
def _get_subtitles(self, video_id, subtitles_urls):
subtitles = {}
for lang, url in subtitles_urls.items():
# For some weird reason, blip.tv serves a video instead of subtitles
# when we request with a common UA
req = compat_urllib_request.Request(url)
req.add_header('User-Agent', 'youtube-dl')
subtitles[lang] = [{
# The extension is 'srt' but it's actually an 'ass' file
'ext': 'ass',
'data': self._download_webpage(req, None, note=False),
}]
return subtitles
class BlipTVUserIE(InfoExtractor):

View File

@ -6,7 +6,7 @@ from .common import InfoExtractor
class BloombergIE(InfoExtractor):
_VALID_URL = r'https?://www\.bloomberg\.com/video/(?P<name>.+?)\.html'
_VALID_URL = r'https?://www\.bloomberg\.com/video/(?P<id>.+?)\.html'
_TEST = {
'url': 'http://www.bloomberg.com/video/shah-s-presentation-on-foreign-exchange-strategies-qurhIVlJSB6hzkVi229d8g.html',
@ -20,9 +20,9 @@ class BloombergIE(InfoExtractor):
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
name = mobj.group('name')
name = self._match_id(url)
webpage = self._download_webpage(url, name)
f4m_url = self._search_regex(
r'<source src="(https?://[^"]+\.f4m.*?)"', webpage,
'f4m url')

View File

@ -3,7 +3,7 @@ from __future__ import unicode_literals
import re
from .subtitles import SubtitlesInfoExtractor
from .common import InfoExtractor
from ..compat import (
compat_urllib_request,
compat_urllib_parse,
@ -15,7 +15,7 @@ from ..utils import (
)
class CeskaTelevizeIE(SubtitlesInfoExtractor):
class CeskaTelevizeIE(InfoExtractor):
_VALID_URL = r'https?://www\.ceskatelevize\.cz/(porady|ivysilani)/(.+/)?(?P<id>[^?#]+)'
_TESTS = [
@ -107,13 +107,7 @@ class CeskaTelevizeIE(SubtitlesInfoExtractor):
subtitles = {}
subs = item.get('subtitles')
if subs:
subtitles['cs'] = subs[0]['url']
if self._downloader.params.get('listsubtitles', False):
self._list_available_subtitles(video_id, subtitles)
return
subtitles = self._fix_subtitles(self.extract_subtitles(video_id, subtitles))
subtitles = self.extract_subtitles(episode_id, subs)
return {
'id': episode_id,
@ -125,11 +119,20 @@ class CeskaTelevizeIE(SubtitlesInfoExtractor):
'subtitles': subtitles,
}
def _get_subtitles(self, episode_id, subs):
original_subtitles = self._download_webpage(
subs[0]['url'], episode_id, 'Downloading subtitles')
srt_subs = self._fix_subtitles(original_subtitles)
return {
'cs': [{
'ext': 'srt',
'data': srt_subs,
}]
}
@staticmethod
def _fix_subtitles(subtitles):
""" Convert millisecond-based subtitles to SRT """
if subtitles is None:
return subtitles # subtitles not requested
def _msectotimecode(msec):
""" Helper utility to convert milliseconds to timecode """
@ -149,7 +152,4 @@ class CeskaTelevizeIE(SubtitlesInfoExtractor):
else:
yield line
fixed_subtitles = {}
for k, v in subtitles.items():
fixed_subtitles[k] = "\r\n".join(_fix_subtitle(v))
return fixed_subtitles
return "\r\n".join(_fix_subtitle(subtitles))

View File

@ -0,0 +1,84 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
parse_duration,
int_or_none,
)
class ChirbitIE(InfoExtractor):
IE_NAME = 'chirbit'
_VALID_URL = r'https?://(?:www\.)?chirb\.it/(?:(?:wp|pl)/|fb_chirbit_player\.swf\?key=)?(?P<id>[\da-zA-Z]+)'
_TESTS = [{
'url': 'http://chirb.it/PrIPv5',
'md5': '9847b0dad6ac3e074568bf2cfb197de8',
'info_dict': {
'id': 'PrIPv5',
'ext': 'mp3',
'title': 'Фасадстрой',
'duration': 52,
'view_count': int,
'comment_count': int,
}
}, {
'url': 'https://chirb.it/fb_chirbit_player.swf?key=PrIPv5',
'only_matching': True,
}]
def _real_extract(self, url):
audio_id = self._match_id(url)
webpage = self._download_webpage(
'http://chirb.it/%s' % audio_id, audio_id)
audio_url = self._search_regex(
r'"setFile"\s*,\s*"([^"]+)"', webpage, 'audio url')
title = self._search_regex(
r'itemprop="name">([^<]+)', webpage, 'title')
duration = parse_duration(self._html_search_meta(
'duration', webpage, 'duration', fatal=False))
view_count = int_or_none(self._search_regex(
r'itemprop="playCount"\s*>(\d+)', webpage,
'listen count', fatal=False))
comment_count = int_or_none(self._search_regex(
r'>(\d+) Comments?:', webpage,
'comment count', fatal=False))
return {
'id': audio_id,
'url': audio_url,
'title': title,
'duration': duration,
'view_count': view_count,
'comment_count': comment_count,
}
class ChirbitProfileIE(InfoExtractor):
IE_NAME = 'chirbit:profile'
_VALID_URL = r'https?://(?:www\.)?chirbit.com/(?:rss/)?(?P<id>[^/]+)'
_TEST = {
'url': 'http://chirbit.com/ScarletBeauty',
'info_dict': {
'id': 'ScarletBeauty',
'title': 'Chirbits by ScarletBeauty',
},
'playlist_mincount': 3,
}
def _real_extract(self, url):
profile_id = self._match_id(url)
rss = self._download_xml(
'http://chirbit.com/rss/%s' % profile_id, profile_id)
entries = [
self.url_result(audio_url.text, 'Chirbit')
for audio_url in rss.findall('./channel/item/link')]
title = rss.find('./channel/title').text
return self.playlist_result(entries, profile_id, title)

View File

@ -250,6 +250,8 @@ class ComedyCentralShowsIE(MTVServicesInfoExtractor):
})
self._sort_formats(formats)
subtitles = self._extract_subtitles(cdoc, guid)
virtual_id = show_name + ' ' + epTitle + ' part ' + compat_str(part_num + 1)
entries.append({
'id': guid,
@ -260,6 +262,7 @@ class ComedyCentralShowsIE(MTVServicesInfoExtractor):
'duration': duration,
'thumbnail': thumbnail,
'description': description,
'subtitles': subtitles,
})
playlist_title = show_name + ' ' + title

View File

@ -151,8 +151,14 @@ class InfoExtractor(object):
If not explicitly set, calculated from timestamp.
uploader_id: Nickname or id of the video uploader.
location: Physical location where the video was filmed.
subtitles: The subtitle file contents as a dictionary in the format
{language: subtitles}.
subtitles: The available subtitles as a dictionary in the format
{language: subformats}. "subformats" is a list sorted from
lower to higher preference, each element is a dictionary
with the "ext" entry and one of:
* "data": The subtitles file contents
* "url": A url pointing to the subtitles file
automatic_captions: Like 'subtitles', used by the YoutubeIE for
automatically generated captions
duration: Length of the video in seconds, as an integer.
view_count: How many users have watched the video on the platform.
like_count: Number of positive ratings of the video
@ -395,6 +401,16 @@ class InfoExtractor(object):
if blocked_iframe:
msg += ' Visit %s for more details' % blocked_iframe
raise ExtractorError(msg, expected=True)
if '<title>The URL you requested has been blocked</title>' in content[:512]:
msg = (
'Access to this webpage has been blocked by Indian censorship. '
'Use a VPN or proxy server (with --proxy) to route around it.')
block_msg = self._html_search_regex(
r'</h1><p>(.*?)</p>',
content, 'block message', default=None)
if block_msg:
msg += ' (Message: "%s")' % block_msg.replace('\n', ' ')
raise ExtractorError(msg, expected=True)
return content
@ -825,8 +841,8 @@ class InfoExtractor(object):
media_nodes = manifest.findall('{http://ns.adobe.com/f4m/2.0}media')
for i, media_el in enumerate(media_nodes):
if manifest_version == '2.0':
manifest_url = ('/'.join(manifest_url.split('/')[:-1]) + '/'
+ (media_el.attrib.get('href') or media_el.attrib.get('url')))
manifest_url = ('/'.join(manifest_url.split('/')[:-1]) + '/' +
(media_el.attrib.get('href') or media_el.attrib.get('url')))
tbr = int_or_none(media_el.attrib.get('bitrate'))
formats.append({
'format_id': '-'.join(filter(None, [f4m_id, 'f4m-%d' % (i if tbr is None else tbr)])),
@ -850,7 +866,7 @@ class InfoExtractor(object):
'url': m3u8_url,
'ext': ext,
'protocol': 'm3u8',
'preference': -1,
'preference': preference - 1 if preference else -1,
'resolution': 'multiple',
'format_note': 'Quality selection URL',
}]
@ -932,39 +948,57 @@ class InfoExtractor(object):
formats = []
rtmp_count = 0
for video in smil.findall('./body/switch/video'):
src = video.get('src')
if not src:
continue
bitrate = int_or_none(video.get('system-bitrate') or video.get('systemBitrate'), 1000)
width = int_or_none(video.get('width'))
height = int_or_none(video.get('height'))
proto = video.get('proto')
if not proto:
if base:
if base.startswith('rtmp'):
proto = 'rtmp'
elif base.startswith('http'):
proto = 'http'
ext = video.get('ext')
if proto == 'm3u8':
formats.extend(self._extract_m3u8_formats(src, video_id, ext))
elif proto == 'rtmp':
rtmp_count += 1
streamer = video.get('streamer') or base
formats.append({
'url': streamer,
'play_path': src,
'ext': 'flv',
'format_id': 'rtmp-%d' % (rtmp_count if bitrate is None else bitrate),
'tbr': bitrate,
'width': width,
'height': height,
})
if smil.findall('./body/seq/video'):
video = smil.findall('./body/seq/video')[0]
fmts, rtmp_count = self._parse_smil_video(video, video_id, base, rtmp_count)
formats.extend(fmts)
else:
for video in smil.findall('./body/switch/video'):
fmts, rtmp_count = self._parse_smil_video(video, video_id, base, rtmp_count)
formats.extend(fmts)
self._sort_formats(formats)
return formats
def _parse_smil_video(self, video, video_id, base, rtmp_count):
src = video.get('src')
if not src:
return ([], rtmp_count)
bitrate = int_or_none(video.get('system-bitrate') or video.get('systemBitrate'), 1000)
width = int_or_none(video.get('width'))
height = int_or_none(video.get('height'))
proto = video.get('proto')
if not proto:
if base:
if base.startswith('rtmp'):
proto = 'rtmp'
elif base.startswith('http'):
proto = 'http'
ext = video.get('ext')
if proto == 'm3u8':
return (self._extract_m3u8_formats(src, video_id, ext), rtmp_count)
elif proto == 'rtmp':
rtmp_count += 1
streamer = video.get('streamer') or base
return ([{
'url': streamer,
'play_path': src,
'ext': 'flv',
'format_id': 'rtmp-%d' % (rtmp_count if bitrate is None else bitrate),
'tbr': bitrate,
'width': width,
'height': height,
}], rtmp_count)
elif proto.startswith('http'):
return ([{
'url': base + src,
'ext': ext or 'flv',
'tbr': bitrate,
'width': width,
'height': height,
}], rtmp_count)
def _live_title(self, name):
""" Generate the title for a live video """
now = datetime.datetime.now()
@ -1028,6 +1062,24 @@ class InfoExtractor(object):
any_restricted = any_restricted or is_restricted
return not any_restricted
def extract_subtitles(self, *args, **kwargs):
if (self._downloader.params.get('writesubtitles', False) or
self._downloader.params.get('listsubtitles')):
return self._get_subtitles(*args, **kwargs)
return {}
def _get_subtitles(self, *args, **kwargs):
raise NotImplementedError("This method must be implemented by subclasses")
def extract_automatic_captions(self, *args, **kwargs):
if (self._downloader.params.get('writeautomaticsub', False) or
self._downloader.params.get('listsubtitles')):
return self._get_automatic_captions(*args, **kwargs)
return {}
def _get_automatic_captions(self, *args, **kwargs):
raise NotImplementedError("This method must be implemented by subclasses")
class SearchInfoExtractor(InfoExtractor):
"""

View File

@ -9,7 +9,7 @@ import xml.etree.ElementTree
from hashlib import sha1
from math import pow, sqrt, floor
from .subtitles import SubtitlesInfoExtractor
from .common import InfoExtractor
from ..compat import (
compat_urllib_parse,
compat_urllib_request,
@ -25,10 +25,9 @@ from ..aes import (
aes_cbc_decrypt,
inc,
)
from .common import InfoExtractor
class CrunchyrollIE(SubtitlesInfoExtractor):
class CrunchyrollIE(InfoExtractor):
_VALID_URL = r'https?://(?:(?P<prefix>www|m)\.)?(?P<url>crunchyroll\.(?:com|fr)/(?:[^/]*/[^/?&]*?|media/\?id=)(?P<video_id>[0-9]+))(?:[/?&]|$)'
_TESTS = [{
'url': 'http://www.crunchyroll.com/wanna-be-the-strongest-in-the-world/episode-1-an-idol-wrestler-is-born-645513',
@ -187,6 +186,38 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
return output
def _get_subtitles(self, video_id, webpage):
subtitles = {}
for sub_id, sub_name in re.findall(r'\?ssid=([0-9]+)" title="([^"]+)', webpage):
sub_page = self._download_webpage(
'http://www.crunchyroll.com/xml/?req=RpcApiSubtitle_GetXml&subtitle_script_id=' + sub_id,
video_id, note='Downloading subtitles for ' + sub_name)
id = self._search_regex(r'id=\'([0-9]+)', sub_page, 'subtitle_id', fatal=False)
iv = self._search_regex(r'<iv>([^<]+)', sub_page, 'subtitle_iv', fatal=False)
data = self._search_regex(r'<data>([^<]+)', sub_page, 'subtitle_data', fatal=False)
if not id or not iv or not data:
continue
id = int(id)
iv = base64.b64decode(iv)
data = base64.b64decode(data)
subtitle = self._decrypt_subtitles(data, iv, id).decode('utf-8')
lang_code = self._search_regex(r'lang_code=["\']([^"\']+)', subtitle, 'subtitle_lang_code', fatal=False)
if not lang_code:
continue
sub_root = xml.etree.ElementTree.fromstring(subtitle)
subtitles[lang_code] = [
{
'ext': 'srt',
'data': self._convert_subtitles_to_srt(sub_root),
},
{
'ext': 'ass',
'data': self._convert_subtitles_to_ass(sub_root),
},
]
return subtitles
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('video_id')
@ -249,34 +280,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
'format_id': video_format,
})
subtitles = {}
sub_format = self._downloader.params.get('subtitlesformat', 'srt')
for sub_id, sub_name in re.findall(r'\?ssid=([0-9]+)" title="([^"]+)', webpage):
sub_page = self._download_webpage(
'http://www.crunchyroll.com/xml/?req=RpcApiSubtitle_GetXml&subtitle_script_id=' + sub_id,
video_id, note='Downloading subtitles for ' + sub_name)
id = self._search_regex(r'id=\'([0-9]+)', sub_page, 'subtitle_id', fatal=False)
iv = self._search_regex(r'<iv>([^<]+)', sub_page, 'subtitle_iv', fatal=False)
data = self._search_regex(r'<data>([^<]+)', sub_page, 'subtitle_data', fatal=False)
if not id or not iv or not data:
continue
id = int(id)
iv = base64.b64decode(iv)
data = base64.b64decode(data)
subtitle = self._decrypt_subtitles(data, iv, id).decode('utf-8')
lang_code = self._search_regex(r'lang_code=["\']([^"\']+)', subtitle, 'subtitle_lang_code', fatal=False)
if not lang_code:
continue
sub_root = xml.etree.ElementTree.fromstring(subtitle)
if sub_format == 'ass':
subtitles[lang_code] = self._convert_subtitles_to_ass(sub_root)
else:
subtitles[lang_code] = self._convert_subtitles_to_srt(sub_root)
if self._downloader.params.get('listsubtitles', False):
self._list_available_subtitles(video_id, subtitles)
return
subtitles = self.extract_subtitles(video_id, webpage)
return {
'id': video_id,

View File

@ -6,7 +6,6 @@ import json
import itertools
from .common import InfoExtractor
from .subtitles import SubtitlesInfoExtractor
from ..compat import (
compat_str,
@ -31,7 +30,7 @@ class DailymotionBaseInfoExtractor(InfoExtractor):
return request
class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor):
class DailymotionIE(DailymotionBaseInfoExtractor):
"""Information Extractor for Dailymotion"""
_VALID_URL = r'(?i)(?:https?://)?(?:(www|touch)\.)?dailymotion\.[a-z]{2,3}/(?:(embed|#)/)?video/(?P<id>[^/?_]+)'
@ -143,9 +142,6 @@ class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor):
# subtitles
video_subtitles = self.extract_subtitles(video_id, webpage)
if self._downloader.params.get('listsubtitles', False):
self._list_available_subtitles(video_id, webpage)
return
view_count = str_to_int(self._search_regex(
r'video_views_count[^>]+>\s+([\d\.,]+)',
@ -169,7 +165,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor):
'view_count': view_count,
}
def _get_available_subtitles(self, video_id, webpage):
def _get_subtitles(self, video_id, webpage):
try:
sub_list = self._download_webpage(
'https://api.dailymotion.com/video/%s/subtitles?fields=id,language,url' % video_id,
@ -179,7 +175,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor):
return {}
info = json.loads(sub_list)
if (info['total'] > 0):
sub_lang_list = dict((l['language'], l['url']) for l in info['list'])
sub_lang_list = dict((l['language'], [{'url': l['url'], 'ext': 'srt'}]) for l in info['list'])
return sub_lang_list
self._downloader.report_warning('video doesn\'t have subtitles')
return {}

View File

@ -25,8 +25,9 @@ class DefenseGouvFrIE(InfoExtractor):
r"flashvars.pvg_id=\"(\d+)\";",
webpage, 'ID')
json_url = ('http://static.videos.gouv.fr/brightcovehub/export/json/'
+ video_id)
json_url = (
'http://static.videos.gouv.fr/brightcovehub/export/json/%s' %
video_id)
info = self._download_json(json_url, title, 'Downloading JSON config')
video_url = info['renditions'][0]['url']

View File

@ -1,11 +1,10 @@
from __future__ import unicode_literals
from .subtitles import SubtitlesInfoExtractor
from .common import ExtractorError
from .common import InfoExtractor, ExtractorError
from ..utils import parse_iso8601
class DRTVIE(SubtitlesInfoExtractor):
class DRTVIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?dr\.dk/tv/se/(?:[^/]+/)*(?P<id>[\da-z-]+)(?:[/#?]|$)'
_TEST = {
@ -76,7 +75,7 @@ class DRTVIE(SubtitlesInfoExtractor):
}
for subs in subtitles_list:
lang = subs['Language']
subtitles[LANGS.get(lang, lang)] = subs['Uri']
subtitles[LANGS.get(lang, lang)] = [{'url': subs['Uri'], 'ext': 'vtt'}]
if not formats and restricted_to_denmark:
raise ExtractorError(
@ -84,10 +83,6 @@ class DRTVIE(SubtitlesInfoExtractor):
self._sort_formats(formats)
if self._downloader.params.get('listsubtitles', False):
self._list_available_subtitles(video_id, subtitles)
return
return {
'id': video_id,
'title': title,
@ -96,5 +91,5 @@ class DRTVIE(SubtitlesInfoExtractor):
'timestamp': timestamp,
'duration': duration,
'formats': formats,
'subtitles': self.extract_subtitles(video_id, subtitles),
'subtitles': subtitles,
}

View File

@ -35,10 +35,7 @@ class EpornerIE(InfoExtractor):
title = self._html_search_regex(
r'<title>(.*?) - EPORNER', webpage, 'title')
redirect_code = self._html_search_regex(
r'<script type="text/javascript" src="/config5/%s/([a-f\d]+)/">' % video_id,
webpage, 'redirect_code')
redirect_url = 'http://www.eporner.com/config5/%s/%s' % (video_id, redirect_code)
redirect_url = 'http://www.eporner.com/config5/%s' % video_id
player_code = self._download_webpage(
redirect_url, display_id, note='Downloading player config')
@ -69,5 +66,5 @@ class EpornerIE(InfoExtractor):
'duration': duration,
'view_count': view_count,
'formats': formats,
'age_limit': self._rta_search(webpage),
'age_limit': 18,
}

View File

@ -3,15 +3,18 @@ from __future__ import unicode_literals
from .common import InfoExtractor
from ..compat import (
compat_urllib_parse,
compat_urllib_request,
)
from ..utils import (
ExtractorError,
js_to_json,
parse_duration,
)
class EscapistIE(InfoExtractor):
_VALID_URL = r'https?://?(www\.)?escapistmagazine\.com/videos/view/[^/?#]+/(?P<id>[0-9]+)-[^/?#]*(?:$|[?#])'
_USER_AGENT = 'Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like Gecko'
_TEST = {
'url': 'http://www.escapistmagazine.com/videos/view/the-escapist-presents/6618-Breaking-Down-Baldurs-Gate',
'md5': 'ab3a706c681efca53f0a35f1415cf0d1',
@ -23,45 +26,67 @@ class EscapistIE(InfoExtractor):
'uploader': 'The Escapist Presents',
'title': "Breaking Down Baldur's Gate",
'thumbnail': 're:^https?://.*\.jpg$',
'duration': 264,
}
}
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
webpage_req = compat_urllib_request.Request(url)
webpage_req.add_header('User-Agent', self._USER_AGENT)
webpage = self._download_webpage(webpage_req, video_id)
uploader_id = self._html_search_regex(
r"<h1 class='headline'><a href='/videos/view/(.*?)'",
r"<h1\s+class='headline'>\s*<a\s+href='/videos/view/(.*?)'",
webpage, 'uploader ID', fatal=False)
uploader = self._html_search_regex(
r"<h1 class='headline'>(.*?)</a>",
r"<h1\s+class='headline'>(.*?)</a>",
webpage, 'uploader', fatal=False)
description = self._html_search_meta('description', webpage)
duration = parse_duration(self._html_search_meta('duration', webpage))
raw_title = self._html_search_meta('title', webpage, fatal=True)
title = raw_title.partition(' : ')[2]
config_url = compat_urllib_parse.unquote(self._html_search_regex(
r'<param name="flashvars" value="config=([^"&]+)', webpage, 'config URL'))
r'''(?x)
(?:
<param\s+name="flashvars".*?\s+value="config=|
flashvars=&quot;config=
)
(https?://[^"&]+)
''',
webpage, 'config URL'))
formats = []
ad_formats = []
def _add_format(name, cfgurl, quality):
def _add_format(name, cfg_url, quality):
cfg_req = compat_urllib_request.Request(cfg_url)
cfg_req.add_header('User-Agent', self._USER_AGENT)
config = self._download_json(
cfgurl, video_id,
cfg_req, video_id,
'Downloading ' + name + ' configuration',
'Unable to download ' + name + ' configuration',
transform_source=js_to_json)
playlist = config['playlist']
video_url = next(
p['url'] for p in playlist
if p.get('eventCategory') == 'Video')
formats.append({
'url': video_url,
'format_id': name,
'quality': quality,
})
for p in playlist:
if p.get('eventCategory') == 'Video':
ar = formats
elif p.get('eventCategory') == 'Video Postroll':
ar = ad_formats
else:
continue
ar.append({
'url': p['url'],
'format_id': name,
'quality': quality,
'http_headers': {
'User-Agent': self._USER_AGENT,
},
})
_add_format('normal', config_url, quality=0)
hq_url = (config_url +
@ -70,10 +95,12 @@ class EscapistIE(InfoExtractor):
_add_format('hq', hq_url, quality=1)
except ExtractorError:
pass # That's fine, we'll just use normal quality
self._sort_formats(formats)
return {
if '/escapist/sales-marketing/' in formats[-1]['url']:
raise ExtractorError('This IP address has been blocked by The Escapist', expected=True)
res = {
'id': video_id,
'formats': formats,
'uploader': uploader,
@ -81,4 +108,21 @@ class EscapistIE(InfoExtractor):
'title': title,
'thumbnail': self._og_search_thumbnail(webpage),
'description': description,
'duration': duration,
}
if self._downloader.params.get('include_ads') and ad_formats:
self._sort_formats(ad_formats)
ad_res = {
'id': '%s-ad' % video_id,
'title': '%s (Postroll)' % title,
'formats': ad_formats,
}
return {
'_type': 'playlist',
'entries': [res, ad_res],
'title': title,
'id': video_id,
}
return res

View File

@ -126,11 +126,17 @@ class FacebookIE(InfoExtractor):
params_raw = compat_urllib_parse.unquote(data['params'])
params = json.loads(params_raw)
video_data = params['video_data'][0]
video_url = video_data.get('hd_src')
if not video_url:
video_url = video_data['sd_src']
if not video_url:
raise ExtractorError('Cannot find video URL')
formats = []
for quality in ['sd', 'hd']:
src = video_data.get('%s_src' % quality)
if src is not None:
formats.append({
'format_id': quality,
'url': src,
})
if not formats:
raise ExtractorError('Cannot find video formats')
video_title = self._html_search_regex(
r'<h2 class="uiHeaderTitle">([^<]*)</h2>', webpage, 'title',
@ -146,7 +152,7 @@ class FacebookIE(InfoExtractor):
return {
'id': video_id,
'title': video_title,
'url': video_url,
'formats': formats,
'duration': int_or_none(video_data.get('video_duration')),
'thumbnail': video_data.get('thumbnail_src'),
}

View File

@ -1,6 +1,8 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
int_or_none,
@ -31,7 +33,7 @@ class GameStarIE(InfoExtractor):
webpage = self._download_webpage(url, video_id)
og_title = self._og_search_title(webpage)
title = og_title.replace(' - Video bei GameStar.de', '').strip()
title = re.sub(r'\s*- Video (bei|-) GameStar\.de$', '', og_title)
url = 'http://gamestar.de/_misc/videos/portal/getVideoUrl.cfm?premium=0&videoId=' + video_id

View File

@ -7,6 +7,7 @@ from ..compat import (
compat_urllib_parse,
compat_urllib_request,
)
from ..utils import remove_end
class GDCVaultIE(InfoExtractor):
@ -65,10 +66,12 @@ class GDCVaultIE(InfoExtractor):
def _parse_flv(self, xml_description):
video_formats = []
akami_url = xml_description.find('./metadata/akamaiHost').text
akamai_url = xml_description.find('./metadata/akamaiHost').text
slide_video_path = xml_description.find('./metadata/slideVideo').text
video_formats.append({
'url': 'rtmp://' + akami_url + '/' + slide_video_path,
'url': 'rtmp://%s/ondemand?ovpfv=1.1' % akamai_url,
'play_path': remove_end(slide_video_path, '.flv'),
'ext': 'flv',
'format_note': 'slide deck video',
'quality': -2,
'preference': -2,
@ -76,7 +79,9 @@ class GDCVaultIE(InfoExtractor):
})
speaker_video_path = xml_description.find('./metadata/speakerVideo').text
video_formats.append({
'url': 'rtmp://' + akami_url + '/' + speaker_video_path,
'url': 'rtmp://%s/ondemand?ovpfv=1.1' % akamai_url,
'play_path': remove_end(speaker_video_path, '.flv'),
'ext': 'flv',
'format_note': 'speaker video',
'quality': -1,
'preference': -1,

View File

@ -547,7 +547,28 @@ class GenericIE(InfoExtractor):
'id': 'aanslagen-kopenhagen',
'title': 'Aanslagen Kopenhagen | RTL Nieuws',
}
}
},
# Zapiks embed
{
'url': 'http://www.skipass.com/news/116090-bon-appetit-s5ep3-baqueira-mi-cor.html',
'info_dict': {
'id': '118046',
'ext': 'mp4',
'title': 'EP3S5 - Bon Appétit - Baqueira Mi Corazon !',
}
},
# Kaltura embed
{
'url': 'http://www.monumentalnetwork.com/videos/john-carlson-postgame-2-25-15',
'info_dict': {
'id': '1_eergr3h1',
'ext': 'mp4',
'upload_date': '20150226',
'uploader_id': 'MonumentalSports-Kaltura@perfectsensedigital.com',
'timestamp': int,
'title': 'John Carlson Postgame 2/25/15',
},
},
]
def report_following_redirect(self, new_url):
@ -1098,6 +1119,18 @@ class GenericIE(InfoExtractor):
if mobj is not None:
return self.url_result(mobj.group('url'), 'Livestream')
# Look for Zapiks embed
mobj = re.search(
r'<iframe[^>]+src="(?P<url>https?://(?:www\.)?zapiks\.fr/index\.php\?.+?)"', webpage)
if mobj is not None:
return self.url_result(mobj.group('url'), 'Zapiks')
# Look for Kaltura embeds
mobj = re.search(
r"(?s)kWidget\.(?:thumb)?[Ee]mbed\(\{.*?'wid'\s*:\s*'_?(?P<partner_id>[^']+)',.*?'entry_id'\s*:\s*'(?P<id>[^']+)',", webpage)
if mobj is not None:
return self.url_result('kaltura:%(partner_id)s:%(id)s' % mobj.groupdict(), 'Kaltura')
def check_video(vurl):
if YoutubeIE.suitable(vurl):
return True
@ -1193,7 +1226,9 @@ class GenericIE(InfoExtractor):
return entries[0]
else:
for num, e in enumerate(entries, start=1):
e['title'] = '%s (%d)' % (e['title'], num)
# 'url' results don't have a title
if e.get('title') is not None:
e['title'] = '%s (%d)' % (e['title'], num)
return {
'_type': 'playlist',
'entries': entries,

View File

@ -0,0 +1,138 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..compat import compat_urllib_parse
from ..utils import (
ExtractorError,
int_or_none,
)
class KalturaIE(InfoExtractor):
_VALID_URL = r'''(?x)
(?:kaltura:|
https?://(:?(?:www|cdnapisec)\.)?kaltura\.com/index\.php/kwidget/(?:[^/]+/)*?wid/_
)(?P<partner_id>\d+)
(?::|
/(?:[^/]+/)*?entry_id/
)(?P<id>[0-9a-z_]+)'''
_API_BASE = 'http://cdnapi.kaltura.com/api_v3/index.php?'
_TESTS = [
{
'url': 'kaltura:269692:1_1jc2y3e4',
'md5': '3adcbdb3dcc02d647539e53f284ba171',
'info_dict': {
'id': '1_1jc2y3e4',
'ext': 'mp4',
'title': 'Track 4',
'upload_date': '20131219',
'uploader_id': 'mlundberg@wolfgangsvault.com',
'description': 'The Allman Brothers Band, 12/16/1981',
'thumbnail': 're:^https?://.*/thumbnail/.*',
'timestamp': int,
},
},
{
'url': 'http://www.kaltura.com/index.php/kwidget/cache_st/1300318621/wid/_269692/uiconf_id/3873291/entry_id/1_1jc2y3e4',
'only_matching': True,
},
{
'url': 'https://cdnapisec.kaltura.com/index.php/kwidget/wid/_557781/uiconf_id/22845202/entry_id/1_plr1syf3',
'only_matching': True,
},
]
def _kaltura_api_call(self, video_id, actions, *args, **kwargs):
params = actions[0]
if len(actions) > 1:
for i, a in enumerate(actions[1:], start=1):
for k, v in a.items():
params['%d:%s' % (i, k)] = v
query = compat_urllib_parse.urlencode(params)
url = self._API_BASE + query
data = self._download_json(url, video_id, *args, **kwargs)
status = data if len(actions) == 1 else data[0]
if status.get('objectType') == 'KalturaAPIException':
raise ExtractorError(
'%s said: %s' % (self.IE_NAME, status['message']))
return data
def _get_kaltura_signature(self, video_id, partner_id):
actions = [{
'apiVersion': '3.1',
'expiry': 86400,
'format': 1,
'service': 'session',
'action': 'startWidgetSession',
'widgetId': '_%s' % partner_id,
}]
return self._kaltura_api_call(
video_id, actions, note='Downloading Kaltura signature')['ks']
def _get_video_info(self, video_id, partner_id):
signature = self._get_kaltura_signature(video_id, partner_id)
actions = [
{
'action': 'null',
'apiVersion': '3.1.5',
'clientTag': 'kdp:v3.8.5',
'format': 1, # JSON, 2 = XML, 3 = PHP
'service': 'multirequest',
'ks': signature,
},
{
'action': 'get',
'entryId': video_id,
'service': 'baseentry',
'version': '-1',
},
{
'action': 'getContextData',
'contextDataParams:objectType': 'KalturaEntryContextDataParams',
'contextDataParams:referrer': 'http://www.kaltura.com/',
'contextDataParams:streamerType': 'http',
'entryId': video_id,
'service': 'baseentry',
},
]
return self._kaltura_api_call(
video_id, actions, note='Downloading video info JSON')
def _real_extract(self, url):
video_id = self._match_id(url)
mobj = re.match(self._VALID_URL, url)
partner_id, entry_id = mobj.group('partner_id'), mobj.group('id')
info, source_data = self._get_video_info(entry_id, partner_id)
formats = [{
'format_id': '%(fileExt)s-%(bitrate)s' % f,
'ext': f['fileExt'],
'tbr': f['bitrate'],
'fps': f.get('frameRate'),
'filesize_approx': int_or_none(f.get('size'), invscale=1024),
'container': f.get('containerFormat'),
'vcodec': f.get('videoCodecId'),
'height': f.get('height'),
'width': f.get('width'),
'url': '%s/flavorId/%s' % (info['dataUrl'], f['id']),
} for f in source_data['flavorAssets']]
self._sort_formats(formats)
return {
'id': video_id,
'title': info['name'],
'formats': formats,
'description': info.get('description'),
'thumbnail': info.get('thumbnailUrl'),
'duration': info.get('duration'),
'timestamp': info.get('createdAt'),
'uploader_id': info.get('userId'),
'view_count': info.get('plays'),
}

View File

@ -1,31 +1,32 @@
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
import random
import re
from .common import InfoExtractor
from ..utils import ExtractorError
from ..utils import (
ExtractorError,
xpath_text,
)
class Laola1TvIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?laola1\.tv/(?P<lang>[a-z]+)-(?P<portal>[a-z]+)/.*?/(?P<id>[0-9]+)\.html'
_TEST = {
'url': 'http://www.laola1.tv/de-de/live/bwf-bitburger-open-grand-prix-gold-court-1/250019.html',
'url': 'http://www.laola1.tv/de-de/video/straubing-tigers-koelner-haie/227883.html',
'info_dict': {
'id': '250019',
'id': '227883',
'ext': 'mp4',
'title': 'Bitburger Open Grand Prix Gold - Court 1',
'categories': ['Badminton'],
'uploader': 'BWF - Badminton World Federation',
'is_live': True,
'title': 'Straubing Tigers - Kölner Haie',
'categories': ['Eishockey'],
'is_live': False,
},
'params': {
'skip_download': True,
}
}
_BROKEN = True # Not really - extractor works fine, but f4m downloader does not support live streams yet.
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
@ -43,15 +44,22 @@ class Laola1TvIE(InfoExtractor):
r'flashvars\.([_a-zA-Z0-9]+)\s*=\s*"([^"]*)";', iframe)
flashvars = dict((m[0], m[1]) for m in flashvars_m)
partner_id = self._search_regex(
r'partnerid\s*:\s*"([^"]+)"', iframe, 'partner id')
xml_url = ('http://www.laola1.tv/server/hd_video.php?' +
'play=%s&partner=1&portal=%s&v5ident=&lang=%s' % (
video_id, portal, lang))
'play=%s&partner=%s&portal=%s&v5ident=&lang=%s' % (
video_id, partner_id, portal, lang))
hd_doc = self._download_xml(xml_url, video_id)
title = hd_doc.find('.//video/title').text
flash_url = hd_doc.find('.//video/url').text
categories = hd_doc.find('.//video/meta_sports').text.split(',')
uploader = hd_doc.find('.//video/meta_organistation').text
title = xpath_text(hd_doc, './/video/title', fatal=True)
flash_url = xpath_text(hd_doc, './/video/url', fatal=True)
uploader = xpath_text(hd_doc, './/video/meta_organistation')
is_live = xpath_text(hd_doc, './/video/islive') == 'true'
categories = xpath_text(hd_doc, './/video/meta_sports')
if categories:
categories = categories.split(',')
ident = random.randint(10000000, 99999999)
token_url = '%s&ident=%s&klub=0&unikey=0&timestamp=%s&auth=%s' % (
@ -60,15 +68,16 @@ class Laola1TvIE(InfoExtractor):
token_doc = self._download_xml(
token_url, video_id, note='Downloading token')
token_attrib = token_doc.find('.//token').attrib
if token_attrib.get('auth') == 'blocked':
raise ExtractorError('Token error: ' % token_attrib.get('comment'))
if token_attrib.get('auth') in ('blocked', 'restricted'):
raise ExtractorError(
'Token error: %s' % token_attrib.get('comment'), expected=True)
video_url = '%s?hdnea=%s&hdcore=3.2.0' % (
token_attrib['url'], token_attrib['auth'])
return {
'id': video_id,
'is_live': True,
'is_live': is_live,
'title': title,
'url': video_url,
'uploader': uploader,

View File

@ -0,0 +1,190 @@
# coding: utf-8
from __future__ import unicode_literals
import datetime
import re
import time
from .common import InfoExtractor
from ..compat import (
compat_urlparse,
compat_urllib_parse,
)
from ..utils import (
determine_ext,
ExtractorError,
parse_iso8601,
)
class LetvIE(InfoExtractor):
_VALID_URL = r'http://www\.letv\.com/ptv/vplay/(?P<id>\d+).html'
_TESTS = [{
'url': 'http://www.letv.com/ptv/vplay/22005890.html',
'md5': 'cab23bd68d5a8db9be31c9a222c1e8df',
'info_dict': {
'id': '22005890',
'ext': 'mp4',
'title': '第87届奥斯卡颁奖礼完美落幕 《鸟人》成最大赢家',
'timestamp': 1424747397,
'upload_date': '20150224',
'description': 'md5:a9cb175fd753e2962176b7beca21a47c',
}
}, {
'url': 'http://www.letv.com/ptv/vplay/1415246.html',
'info_dict': {
'id': '1415246',
'ext': 'mp4',
'title': '美人天下01',
'description': 'md5:f88573d9d7225ada1359eaf0dbf8bcda',
},
'expected_warnings': [
'publish time'
]
}]
# http://www.letv.com/ptv/vplay/1118082.html
# This video is available only in Mainland China
@staticmethod
def urshift(val, n):
return val >> n if val >= 0 else (val + 0x100000000) >> n
# ror() and calc_time_key() are reversed from a embedded swf file in KLetvPlayer.swf
def ror(self, param1, param2):
_loc3_ = 0
while _loc3_ < param2:
param1 = self.urshift(param1, 1) + ((param1 & 1) << 31)
_loc3_ += 1
return param1
def calc_time_key(self, param1):
_loc2_ = 773625421
_loc3_ = self.ror(param1, _loc2_ % 13)
_loc3_ = _loc3_ ^ _loc2_
_loc3_ = self.ror(_loc3_, _loc2_ % 17)
return _loc3_
def _real_extract(self, url):
media_id = self._match_id(url)
page = self._download_webpage(url, media_id)
params = {
'id': media_id,
'platid': 1,
'splatid': 101,
'format': 1,
'tkey': self.calc_time_key(int(time.time())),
'domain': 'www.letv.com'
}
play_json = self._download_json(
'http://api.letv.com/mms/out/video/playJson?' + compat_urllib_parse.urlencode(params),
media_id, 'playJson data')
# Check for errors
playstatus = play_json['playstatus']
if playstatus['status'] == 0:
flag = playstatus['flag']
if flag == 1:
msg = 'Country %s auth error' % playstatus['country']
else:
msg = 'Generic error. flag = %d' % flag
raise ExtractorError(msg, expected=True)
playurl = play_json['playurl']
formats = ['350', '1000', '1300', '720p', '1080p']
dispatch = playurl['dispatch']
urls = []
for format_id in formats:
if format_id in dispatch:
media_url = playurl['domain'][0] + dispatch[format_id][0]
# Mimic what flvxz.com do
url_parts = list(compat_urlparse.urlparse(media_url))
qs = dict(compat_urlparse.parse_qs(url_parts[4]))
qs.update({
'platid': '14',
'splatid': '1401',
'tss': 'no',
'retry': 1
})
url_parts[4] = compat_urllib_parse.urlencode(qs)
media_url = compat_urlparse.urlunparse(url_parts)
url_info_dict = {
'url': media_url,
'ext': determine_ext(dispatch[format_id][1])
}
if format_id[-1:] == 'p':
url_info_dict['height'] = format_id[:-1]
urls.append(url_info_dict)
publish_time = parse_iso8601(self._html_search_regex(
r'发布时间&nbsp;([^<>]+) ', page, 'publish time', fatal=False),
delimiter=' ', timezone=datetime.timedelta(hours=8))
description = self._html_search_meta('description', page, fatal=False)
return {
'id': media_id,
'formats': urls,
'title': playurl['title'],
'thumbnail': playurl['pic'],
'description': description,
'timestamp': publish_time,
}
class LetvTvIE(InfoExtractor):
_VALID_URL = r'http://www.letv.com/tv/(?P<id>\d+).html'
_TESTS = [{
'url': 'http://www.letv.com/tv/46177.html',
'info_dict': {
'id': '46177',
'title': '美人天下',
'description': 'md5:395666ff41b44080396e59570dbac01c'
},
'playlist_count': 35
}]
def _real_extract(self, url):
playlist_id = self._match_id(url)
page = self._download_webpage(url, playlist_id)
media_urls = list(set(re.findall(
r'http://www.letv.com/ptv/vplay/\d+.html', page)))
entries = [self.url_result(media_url, ie='Letv')
for media_url in media_urls]
title = self._html_search_meta('keywords', page,
fatal=False).split('')[0]
description = self._html_search_meta('description', page, fatal=False)
return self.playlist_result(entries, playlist_id, playlist_title=title,
playlist_description=description)
class LetvPlaylistIE(LetvTvIE):
_VALID_URL = r'http://tv.letv.com/[a-z]+/(?P<id>[a-z]+)/index.s?html'
_TESTS = [{
'url': 'http://tv.letv.com/izt/wuzetian/index.html',
'info_dict': {
'id': 'wuzetian',
'title': '武媚娘传奇',
'description': 'md5:e12499475ab3d50219e5bba00b3cb248'
},
# This playlist contains some extra videos other than the drama itself
'playlist_mincount': 96
}, {
'url': 'http://tv.letv.com/pzt/lswjzzjc/index.shtml',
'info_dict': {
'id': 'lswjzzjc',
# The title should be "劲舞青春", but I can't find a simple way to
# determine the playlist title
'title': '乐视午间自制剧场',
'description': 'md5:b1eef244f45589a7b5b1af9ff25a4489'
},
'playlist_mincount': 7
}]

View File

@ -3,7 +3,6 @@ from __future__ import unicode_literals
import re
import json
from .subtitles import SubtitlesInfoExtractor
from .common import InfoExtractor
from ..compat import (
compat_str,
@ -16,10 +15,10 @@ from ..utils import (
)
class LyndaIE(SubtitlesInfoExtractor):
class LyndaIE(InfoExtractor):
IE_NAME = 'lynda'
IE_DESC = 'lynda.com videos'
_VALID_URL = r'https?://www\.lynda\.com/[^/]+/[^/]+/\d+/(\d+)-\d\.html'
_VALID_URL = r'https?://www\.lynda\.com/(?:[^/]+/[^/]+/\d+|player/embed)/(\d+)'
_LOGIN_URL = 'https://www.lynda.com/login/login.aspx'
_NETRC_MACHINE = 'lynda'
@ -28,7 +27,7 @@ class LyndaIE(SubtitlesInfoExtractor):
ACCOUNT_CREDENTIALS_HINT = 'Use --username and --password options to provide lynda.com account credentials.'
_TEST = {
_TESTS = [{
'url': 'http://www.lynda.com/Bootstrap-tutorials/Using-exercise-files/110885/114408-4.html',
'md5': 'ecfc6862da89489161fb9cd5f5a6fac1',
'info_dict': {
@ -37,7 +36,10 @@ class LyndaIE(SubtitlesInfoExtractor):
'title': 'Using the exercise files',
'duration': 68
}
}
}, {
'url': 'https://www.lynda.com/player/embed/133770?tr=foo=1;bar=g;fizz=rt&fs=0',
'only_matching': True,
}]
def _real_initialize(self):
self._login()
@ -88,11 +90,7 @@ class LyndaIE(SubtitlesInfoExtractor):
self._check_formats(formats, video_id)
self._sort_formats(formats)
if self._downloader.params.get('listsubtitles', False):
self._list_available_subtitles(video_id, page)
return
subtitles = self._fix_subtitles(self.extract_subtitles(video_id, page))
subtitles = self.extract_subtitles(video_id, page)
return {
'id': video_id,
@ -144,38 +142,31 @@ class LyndaIE(SubtitlesInfoExtractor):
if re.search(self._SUCCESSFUL_LOGIN_REGEX, login_page) is None:
raise ExtractorError('Unable to log in')
def _fix_subtitles(self, subtitles):
if subtitles is None:
return subtitles # subtitles not requested
fixed_subtitles = {}
for k, v in subtitles.items():
subs = json.loads(v)
if len(subs) == 0:
def _fix_subtitles(self, subs):
srt = ''
for pos in range(0, len(subs) - 1):
seq_current = subs[pos]
m_current = re.match(self._TIMECODE_REGEX, seq_current['Timecode'])
if m_current is None:
continue
srt = ''
for pos in range(0, len(subs) - 1):
seq_current = subs[pos]
m_current = re.match(self._TIMECODE_REGEX, seq_current['Timecode'])
if m_current is None:
continue
seq_next = subs[pos + 1]
m_next = re.match(self._TIMECODE_REGEX, seq_next['Timecode'])
if m_next is None:
continue
appear_time = m_current.group('timecode')
disappear_time = m_next.group('timecode')
text = seq_current['Caption']
srt += '%s\r\n%s --> %s\r\n%s' % (str(pos), appear_time, disappear_time, text)
if srt:
fixed_subtitles[k] = srt
return fixed_subtitles
seq_next = subs[pos + 1]
m_next = re.match(self._TIMECODE_REGEX, seq_next['Timecode'])
if m_next is None:
continue
appear_time = m_current.group('timecode')
disappear_time = m_next.group('timecode')
text = seq_current['Caption'].lstrip()
srt += '%s\r\n%s --> %s\r\n%s' % (str(pos), appear_time, disappear_time, text)
if srt:
return srt
def _get_available_subtitles(self, video_id, webpage):
def _get_subtitles(self, video_id, webpage):
url = 'http://www.lynda.com/ajax/player?videoId=%s&type=transcript' % video_id
sub = self._download_webpage(url, None, False)
sub_json = json.loads(sub)
return {'en': url} if len(sub_json) > 0 else {}
subs = self._download_json(url, None, False)
if subs:
return {'en': [{'ext': 'srt', 'data': self._fix_subtitles(subs)}]}
else:
return {}
class LyndaCourseIE(InfoExtractor):

View File

@ -5,9 +5,6 @@ import json
from .common import InfoExtractor
from .youtube import YoutubeIE
from ..compat import (
compat_urlparse,
)
from ..utils import (
clean_html,
ExtractorError,
@ -108,7 +105,6 @@ class OCWMITIE(InfoExtractor):
'upload_date': '20121109',
'uploader_id': 'MIT',
'uploader': 'MIT OpenCourseWare',
# 'subtitles': 'http://ocw.mit.edu/courses/electrical-engineering-and-computer-science/6-041-probabilistic-systems-analysis-and-applied-probability-fall-2010/video-lectures/lecture-7-multiple-variables-expectations-independence/MIT6_041F11_lec07_300k.mp4.srt'
}
},
{
@ -121,7 +117,6 @@ class OCWMITIE(InfoExtractor):
'uploader_id': 'MIT',
'uploader': 'MIT OpenCourseWare',
'description': 'This section contains lecture video excerpts, lecture notes, an interactive mathlet with supporting documents, and problem solving videos.',
# 'subtitles': 'http://ocw.mit.edu//courses/mathematics/18-01sc-single-variable-calculus-fall-2010/ocw-18.01-f07-lec01_300k.SRT'
}
}
]
@ -140,7 +135,6 @@ class OCWMITIE(InfoExtractor):
metadata = re.sub(r'[\'"]', '', embed_chapter_media.group(1))
metadata = re.split(r', ?', metadata)
yt = metadata[1]
subs = compat_urlparse.urljoin(self._BASE_URL, metadata[7])
else:
# search for call to ocw_embed_chapter_media(container_id, media_url, provider, page_url, image_url, captions_file)
embed_media = re.search(r'ocw_embed_media\((.+?)\)', webpage)
@ -148,7 +142,6 @@ class OCWMITIE(InfoExtractor):
metadata = re.sub(r'[\'"]', '', embed_media.group(1))
metadata = re.split(r', ?', metadata)
yt = metadata[1]
subs = compat_urlparse.urljoin(self._BASE_URL, metadata[5])
else:
raise ExtractorError('Unable to find embedded YouTube video.')
video_id = YoutubeIE.extract_id(yt)
@ -159,7 +152,5 @@ class OCWMITIE(InfoExtractor):
'title': title,
'description': description,
'url': yt,
'url_transparent'
'subtitles': subs,
'ie_key': 'Youtube',
}

View File

@ -18,7 +18,7 @@ class MiTeleIE(InfoExtractor):
IE_NAME = 'mitele.es'
_VALID_URL = r'http://www\.mitele\.es/[^/]+/[^/]+/[^/]+/(?P<id>[^/]+)/'
_TEST = {
_TESTS = [{
'url': 'http://www.mitele.es/programas-tv/diario-de/la-redaccion/programa-144/',
'md5': '6a75fe9d0d3275bead0cb683c616fddb',
'info_dict': {
@ -29,7 +29,7 @@ class MiTeleIE(InfoExtractor):
'display_id': 'programa-144',
'duration': 2913,
},
}
}]
def _real_extract(self, url):
episode = self._match_id(url)

View File

@ -5,7 +5,7 @@ from ..utils import int_or_none
class MporaIE(InfoExtractor):
_VALID_URL = r'https?://(www\.)?mpora\.(?:com|de)/videos/(?P<id>[^?#/]+)'
_VALID_URL = r'https?://(?:www\.)?mpora\.(?:com|de)/videos/(?P<id>[^?#/]+)'
IE_NAME = 'MPORA'
_TEST = {
@ -25,7 +25,9 @@ class MporaIE(InfoExtractor):
webpage = self._download_webpage(url, video_id)
data_json = self._search_regex(
r"new FM\.Player\('[^']+',\s*(\{.*?)\).player;", webpage, 'json')
[r"new FM\.Player\('[^']+',\s*(\{.*?)\).player;",
r"new\s+FM\.Kaltura\.Player\('[^']+'\s*,\s*({.+?})\);"],
webpage, 'json')
data = self._parse_json(data_json, video_id)
uploader = data['info_overlay'].get('username')

View File

@ -2,7 +2,7 @@ from __future__ import unicode_literals
import re
from .subtitles import SubtitlesInfoExtractor
from .common import InfoExtractor
from ..compat import (
compat_urllib_parse,
compat_urllib_request,
@ -23,7 +23,7 @@ def _media_xml_tag(tag):
return '{http://search.yahoo.com/mrss/}%s' % tag
class MTVServicesInfoExtractor(SubtitlesInfoExtractor):
class MTVServicesInfoExtractor(InfoExtractor):
_MOBILE_TEMPLATE = None
@staticmethod
@ -95,25 +95,15 @@ class MTVServicesInfoExtractor(SubtitlesInfoExtractor):
def _extract_subtitles(self, mdoc, mtvn_id):
subtitles = {}
FORMATS = {
'scc': 'cea-608',
'eia-608': 'cea-608',
'xml': 'ttml',
}
subtitles_format = FORMATS.get(
self._downloader.params.get('subtitlesformat'), 'ttml')
for transcript in mdoc.findall('.//transcript'):
if transcript.get('kind') != 'captions':
continue
lang = transcript.get('srclang')
for typographic in transcript.findall('./typographic'):
captions_format = typographic.get('format')
if captions_format == subtitles_format:
subtitles[lang] = compat_str(typographic.get('src'))
break
if self._downloader.params.get('listsubtitles', False):
self._list_available_subtitles(mtvn_id, subtitles)
return self.extract_subtitles(mtvn_id, subtitles)
subtitles[lang] = [{
'url': compat_str(typographic.get('src')),
'ext': typographic.get('format')
} for typographic in transcript.findall('./typographic')]
return subtitles
def _get_video_info(self, itemdoc):
uri = itemdoc.find('guid').text
@ -195,8 +185,6 @@ class MTVServicesInfoExtractor(SubtitlesInfoExtractor):
[r'data-mgid="(.*?)"', r'swfobject.embedSWF\(".*?(mgid:.*?)"'],
webpage, 'mgid')
videos_info = self._get_videos_info(mgid)
if self._downloader.params.get('listsubtitles', False):
return
if self._downloader.params.get('joinparts'):
show_name = self._html_search_regex(
r'<h1.*?class="[^"]*title[^"]*".*?>(.*?)</h1>',

View File

@ -3,17 +3,13 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
parse_duration,
unified_strdate,
)
class MusicVaultIE(InfoExtractor):
_VALID_URL = r'https?://www\.musicvault\.com/(?P<uploader_id>[^/?#]*)/video/(?P<display_id>[^/?#]*)_(?P<id>[0-9]+)\.html'
_TEST = {
'url': 'http://www.musicvault.com/the-allman-brothers-band/video/straight-from-the-heart_1010863.html',
'md5': '2cdbb3ae75f7fb3519821507d2fb3c15',
'md5': '3adcbdb3dcc02d647539e53f284ba171',
'info_dict': {
'id': '1010863',
'ext': 'mp4',
@ -22,9 +18,10 @@ class MusicVaultIE(InfoExtractor):
'duration': 244,
'uploader': 'The Allman Brothers Band',
'thumbnail': 're:^https?://.*/thumbnail/.*',
'upload_date': '19811216',
'upload_date': '20131219',
'location': 'Capitol Theatre (Passaic, NJ)',
'description': 'Listen to The Allman Brothers Band perform Straight from the Heart at Capitol Theatre (Passaic, NJ) on Dec 16, 1981',
'timestamp': int,
}
}
@ -43,34 +40,24 @@ class MusicVaultIE(InfoExtractor):
r'<h1.*?>(.*?)</h1>', data_div, 'uploader', fatal=False)
title = self._html_search_regex(
r'<h2.*?>(.*?)</h2>', data_div, 'title')
upload_date = unified_strdate(self._html_search_regex(
r'<h3.*?>(.*?)</h3>', data_div, 'uploader', fatal=False))
location = self._html_search_regex(
r'<h4.*?>(.*?)</h4>', data_div, 'location', fatal=False)
duration = parse_duration(self._html_search_meta('duration', webpage))
VIDEO_URL_TEMPLATE = 'http://cdnapi.kaltura.com/p/%(uid)s/sp/%(wid)s/playManifest/entryId/%(entry_id)s/format/url/protocol/http'
kaltura_id = self._search_regex(
r'<div id="video-detail-player" data-kaltura-id="([^"]+)"',
webpage, 'kaltura ID')
video_url = VIDEO_URL_TEMPLATE % {
'entry_id': kaltura_id,
'wid': self._search_regex(r'/wid/_([0-9]+)/', webpage, 'wid'),
'uid': self._search_regex(r'uiconf_id/([0-9]+)/', webpage, 'uid'),
}
wid = self._search_regex(r'/wid/_([0-9]+)/', webpage, 'wid')
return {
'id': mobj.group('id'),
'url': video_url,
'ext': 'mp4',
'_type': 'url_transparent',
'url': 'kaltura:%s:%s' % (wid, kaltura_id),
'ie_key': 'Kaltura',
'display_id': display_id,
'uploader_id': mobj.group('uploader_id'),
'thumbnail': thumbnail,
'description': self._html_search_meta('description', webpage),
'upload_date': upload_date,
'location': location,
'title': title,
'uploader': uploader,
'duration': duration,
}

View File

@ -1,6 +1,5 @@
from __future__ import unicode_literals
from .subtitles import SubtitlesInfoExtractor
from .common import InfoExtractor
from ..utils import (
fix_xml_ampersands,
@ -12,7 +11,7 @@ from ..utils import (
)
class NPOBaseIE(SubtitlesInfoExtractor):
class NPOBaseIE(InfoExtractor):
def _get_token(self, video_id):
token_page = self._download_webpage(
'http://ida.omroep.nl/npoplayer/i.js',
@ -164,13 +163,10 @@ class NPOIE(NPOBaseIE):
subtitles = {}
if metadata.get('tt888') == 'ja':
subtitles['nl'] = 'http://e.omroep.nl/tt888/%s' % video_id
if self._downloader.params.get('listsubtitles', False):
self._list_available_subtitles(video_id, subtitles)
return
subtitles = self.extract_subtitles(video_id, subtitles)
subtitles['nl'] = [{
'ext': 'vtt',
'url': 'http://e.omroep.nl/tt888/%s' % video_id,
}]
return {
'id': video_id,

View File

@ -4,13 +4,13 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
ExtractorError,
float_or_none,
parse_duration,
unified_strdate,
)
from .subtitles import SubtitlesInfoExtractor
class NRKIE(InfoExtractor):
@ -73,7 +73,7 @@ class NRKIE(InfoExtractor):
}
class NRKTVIE(SubtitlesInfoExtractor):
class NRKTVIE(InfoExtractor):
_VALID_URL = r'(?P<baseurl>http://tv\.nrk(?:super)?\.no/)(?:serie/[^/]+|program)/(?P<id>[a-zA-Z]{4}\d{8})(?:/\d{2}-\d{2}-\d{4})?(?:#del=(?P<part_id>\d+))?'
_TESTS = [
@ -156,10 +156,12 @@ class NRKTVIE(SubtitlesInfoExtractor):
if self._downloader.params.get('verbose', False):
self.to_screen('[debug] %s' % txt)
def _extract_captions(self, subtitlesurl, video_id, baseurl):
def _get_subtitles(self, subtitlesurl, video_id, baseurl):
url = "%s%s" % (baseurl, subtitlesurl)
self._debug_print('%s: Subtitle url: %s' % (video_id, url))
captions = self._download_xml(url, video_id, 'Downloading subtitles')
captions = self._download_xml(
url, video_id, 'Downloading subtitles',
transform_source=lambda s: s.replace(r'<br />', '\r\n'))
lang = captions.get('lang', 'no')
ps = captions.findall('./{0}body/{0}div/{0}p'.format('{http://www.w3.org/ns/ttml}'))
srt = ''
@ -168,9 +170,11 @@ class NRKTVIE(SubtitlesInfoExtractor):
duration = parse_duration(p.get('dur'))
starttime = self._seconds2str(begin)
endtime = self._seconds2str(begin + duration)
text = '\n'.join(p.itertext())
srt += '%s\r\n%s --> %s\r\n%s\r\n\r\n' % (str(pos), starttime, endtime, text)
return {lang: srt}
srt += '%s\r\n%s --> %s\r\n%s\r\n\r\n' % (compat_str(pos), starttime, endtime, p.text)
return {lang: [
{'ext': 'ttml', 'url': url},
{'ext': 'srt', 'data': srt},
]}
def _extract_f4m(self, manifest_url, video_id):
return self._extract_f4m_formats(manifest_url + '?hdcore=3.1.1&plugin=aasp-3.1.1.69.124', video_id)
@ -243,10 +247,7 @@ class NRKTVIE(SubtitlesInfoExtractor):
webpage, 'subtitle URL', default=None)
subtitles = None
if subtitles_url:
subtitles = self._extract_captions(subtitles_url, video_id, baseurl)
if self._downloader.params.get('listsubtitles', False):
self._list_available_subtitles(video_id, subtitles)
return
subtitles = self.extract_subtitles(subtitles_url, video_id, baseurl)
return {
'id': video_id,

View File

@ -0,0 +1,85 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
unified_strdate,
int_or_none,
qualities,
)
class OdnoklassnikiIE(InfoExtractor):
_VALID_URL = r'https?://(?:odnoklassniki|ok)\.ru/(?:video|web-api/video/moviePlayer)/(?P<id>\d+)'
_TESTS = [{
'url': 'http://ok.ru/video/20079905452',
'md5': '8e24ad2da6f387948e7a7d44eb8668fe',
'info_dict': {
'id': '20079905452',
'ext': 'mp4',
'title': 'Культура меняет нас (прекрасный ролик!))',
'duration': 100,
'upload_date': '20141207',
'uploader_id': '330537914540',
'uploader': 'Виталий Добровольский',
'like_count': int,
'age_limit': 0,
},
}, {
'url': 'http://ok.ru/web-api/video/moviePlayer/20079905452',
'only_matching': True,
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
player = self._parse_json(
self._search_regex(
r"OKVideo\.start\(({.+?})\s*,\s*'VideoAutoplay_player'", webpage, 'player'),
video_id)
metadata = self._parse_json(player['flashvars']['metadata'], video_id)
movie = metadata['movie']
title = movie['title']
thumbnail = movie.get('poster')
duration = int_or_none(movie.get('duration'))
author = metadata.get('author', {})
uploader_id = author.get('id')
uploader = author.get('name')
upload_date = unified_strdate(self._html_search_meta(
'ya:ovs:upload_date', webpage, 'upload date'))
age_limit = None
adult = self._html_search_meta(
'ya:ovs:adult', webpage, 'age limit')
if adult:
age_limit = 18 if adult == 'true' else 0
like_count = int_or_none(metadata.get('likeCount'))
quality = qualities(('mobile', 'lowest', 'low', 'sd', 'hd'))
formats = [{
'url': f['url'],
'ext': 'mp4',
'format_id': f['name'],
'quality': quality(f['name']),
} for f in metadata['videos']]
return {
'id': video_id,
'title': title,
'thumbnail': thumbnail,
'duration': duration,
'upload_date': upload_date,
'uploader': uploader,
'uploader_id': uploader_id,
'like_count': like_count,
'age_limit': age_limit,
'formats': formats,
}

View File

@ -0,0 +1,88 @@
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
ExtractorError,
unified_strdate,
int_or_none,
)
class Puls4IE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?puls4\.com/video/[^/]+/play/(?P<id>[0-9]+)'
_TESTS = [{
'url': 'http://www.puls4.com/video/pro-und-contra/play/2716816',
'md5': '49f6a6629747eeec43cef6a46b5df81d',
'info_dict': {
'id': '2716816',
'ext': 'mp4',
'title': 'Pro und Contra vom 23.02.2015',
'description': 'md5:293e44634d9477a67122489994675db6',
'duration': 2989,
'upload_date': '20150224',
'uploader': 'PULS_4',
},
'skip': 'Only works from Germany',
}, {
'url': 'http://www.puls4.com/video/kult-spielfilme/play/1298106',
'md5': '6a48316c8903ece8dab9b9a7bf7a59ec',
'info_dict': {
'id': '1298106',
'ext': 'mp4',
'title': 'Lucky Fritz',
},
'skip': 'Only works from Germany',
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
error_message = self._html_search_regex(
r'<div class="message-error">(.+?)</div>',
webpage, 'error message', default=None)
if error_message:
raise ExtractorError(
'%s returned error: %s' % (self.IE_NAME, error_message), expected=True)
real_url = self._html_search_regex(
r'\"fsk-button\".+?href=\"([^"]+)',
webpage, 'fsk_button', default=None)
if real_url:
webpage = self._download_webpage(real_url, video_id)
player = self._search_regex(
r'p4_video_player(?:_iframe)?\("video_\d+_container"\s*,(.+?)\);\s*\}',
webpage, 'player')
player_json = self._parse_json(
'[%s]' % player, video_id,
transform_source=lambda s: s.replace('undefined,', ''))
formats = None
result = None
for v in player_json:
if isinstance(v, list) and not formats:
formats = [{
'url': f['url'],
'format': 'hd' if f.get('hd') else 'sd',
'width': int_or_none(f.get('size_x')),
'height': int_or_none(f.get('size_y')),
'tbr': int_or_none(f.get('bitrate')),
} for f in v]
self._sort_formats(formats)
elif isinstance(v, dict) and not result:
result = {
'id': video_id,
'title': v['videopartname'].strip(),
'description': v.get('videotitle'),
'duration': int_or_none(v.get('videoduration') or v.get('episodeduration')),
'upload_date': unified_strdate(v.get('clipreleasetime')),
'uploader': v.get('channel'),
}
result['formats'] = formats
return result

View File

@ -0,0 +1,88 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
js_to_json,
unescapeHTML,
int_or_none,
)
class R7IE(InfoExtractor):
_VALID_URL = r'''(?x)https?://
(?:
(?:[a-zA-Z]+)\.r7\.com(?:/[^/]+)+/idmedia/|
noticias\.r7\.com(?:/[^/]+)+/[^/]+-|
player\.r7\.com/video/i/
)
(?P<id>[\da-f]{24})
'''
_TESTS = [{
'url': 'http://videos.r7.com/policiais-humilham-suspeito-a-beira-da-morte-morre-com-dignidade-/idmedia/54e7050b0cf2ff57e0279389.html',
'md5': '403c4e393617e8e8ddc748978ee8efde',
'info_dict': {
'id': '54e7050b0cf2ff57e0279389',
'ext': 'mp4',
'title': 'Policiais humilham suspeito à beira da morte: "Morre com dignidade"',
'thumbnail': 're:^https?://.*\.jpg$',
'duration': 98,
'like_count': int,
'view_count': int,
},
}, {
'url': 'http://esportes.r7.com/videos/cigano-manda-recado-aos-fas/idmedia/4e176727b51a048ee6646a1b.html',
'only_matching': True,
}, {
'url': 'http://noticias.r7.com/record-news/video/representante-do-instituto-sou-da-paz-fala-sobre-fim-do-estatuto-do-desarmamento-5480fc580cf2285b117f438d/',
'only_matching': True,
}, {
'url': 'http://player.r7.com/video/i/54e7050b0cf2ff57e0279389?play=true&video=http://vsh.r7.com/54e7050b0cf2ff57e0279389/ER7_RE_BG_MORTE_JOVENS_570kbps_2015-02-2009f17818-cc82-4c8f-86dc-89a66934e633-ATOS_copy.mp4&linkCallback=http://videos.r7.com/policiais-humilham-suspeito-a-beira-da-morte-morre-com-dignidade-/idmedia/54e7050b0cf2ff57e0279389.html&thumbnail=http://vtb.r7.com/ER7_RE_BG_MORTE_JOVENS_570kbps_2015-02-2009f17818-cc82-4c8f-86dc-89a66934e633-thumb.jpg&idCategory=192&share=true&layout=full&full=true',
'only_matching': True,
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(
'http://player.r7.com/video/i/%s' % video_id, video_id)
item = self._parse_json(js_to_json(self._search_regex(
r'(?s)var\s+item\s*=\s*({.+?});', webpage, 'player')), video_id)
title = unescapeHTML(item['title'])
thumbnail = item.get('init', {}).get('thumbUri')
duration = None
statistics = item.get('statistics', {})
like_count = int_or_none(statistics.get('likes'))
view_count = int_or_none(statistics.get('views'))
formats = []
for format_key, format_dict in item['playlist'][0].items():
src = format_dict.get('src')
if not src:
continue
format_id = format_dict.get('format') or format_key
if duration is None:
duration = format_dict.get('duration')
if '.f4m' in src:
formats.extend(self._extract_f4m_formats(src, video_id, preference=-1))
elif src.endswith('.m3u8'):
formats.extend(self._extract_m3u8_formats(src, video_id, 'mp4', preference=-2))
else:
formats.append({
'url': src,
'format_id': format_id,
})
self._sort_formats(formats)
return {
'id': video_id,
'title': title,
'thumbnail': thumbnail,
'duration': duration,
'like_count': like_count,
'view_count': view_count,
'formats': formats,
}

View File

@ -2,7 +2,7 @@ from __future__ import unicode_literals
import re
from .subtitles import SubtitlesInfoExtractor
from .common import InfoExtractor
from ..compat import (
compat_urllib_parse,
)
@ -12,7 +12,7 @@ from ..utils import (
)
class RaiIE(SubtitlesInfoExtractor):
class RaiIE(InfoExtractor):
_VALID_URL = r'(?P<url>http://(?:.+?\.)?(?:rai\.it|rai\.tv|rainews\.it)/dl/.+?-(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})(?:-.+?)?\.html)'
_TESTS = [
{
@ -89,15 +89,7 @@ class RaiIE(SubtitlesInfoExtractor):
'ext': 'mp4',
})
if self._downloader.params.get('listsubtitles', False):
page = self._download_webpage(url, video_id)
self._list_available_subtitles(video_id, page)
return
subtitles = {}
if self._have_to_download_any_subtitles:
page = self._download_webpage(url, video_id)
subtitles = self.extract_subtitles(video_id, page)
subtitles = self.extract_subtitles(video_id, url)
return {
'id': video_id,
@ -111,7 +103,8 @@ class RaiIE(SubtitlesInfoExtractor):
'subtitles': subtitles,
}
def _get_available_subtitles(self, video_id, webpage):
def _get_subtitles(self, video_id, url):
webpage = self._download_webpage(url, video_id)
subtitles = {}
m = re.search(r'<meta name="closedcaption" content="(?P<captions>[^"]+)"', webpage)
if m:
@ -120,5 +113,8 @@ class RaiIE(SubtitlesInfoExtractor):
SRT_EXT = '.srt'
if captions.endswith(STL_EXT):
captions = captions[:-len(STL_EXT)] + SRT_EXT
subtitles['it'] = 'http://www.rai.tv%s' % compat_urllib_parse.quote(captions)
subtitles['it'] = [{
'ext': 'srt',
'url': 'http://www.rai.tv%s' % compat_urllib_parse.quote(captions),
}]
return subtitles

View File

@ -146,7 +146,7 @@ class RTLnowIE(InfoExtractor):
mobj = re.search(r'.*/(?P<hoster>[^/]+)/videos/(?P<play_path>.+)\.f4m', filename.text)
if mobj:
fmt = {
'url': 'rtmpe://fmspay-fra2.rtl.de/' + mobj.group('hoster'),
'url': 'rtmpe://fms.rtl.de/' + mobj.group('hoster'),
'play_path': 'mp4:' + mobj.group('play_path'),
'page_url': url,
'player_url': video_page_url + 'includes/vodplayer.swf',

View File

@ -6,9 +6,11 @@ import re
import time
from .common import InfoExtractor
from ..compat import compat_urlparse
from ..utils import (
struct_unpack,
float_or_none,
remove_end,
struct_unpack,
)
@ -66,6 +68,7 @@ class RTVEALaCartaIE(InfoExtractor):
'id': '2491869',
'ext': 'mp4',
'title': 'Balonmano - Swiss Cup masculina. Final: España-Suecia',
'duration': 5024.566,
},
}, {
'note': 'Live stream',
@ -96,12 +99,14 @@ class RTVEALaCartaIE(InfoExtractor):
).replace('.net.rtve', '.multimedia.cdn.rtve')
video_path = self._download_webpage(
auth_url, video_id, 'Getting video url')
# Use mvod.akcdn instead of flash.akamaihd.multimedia.cdn to get
# Use mvod1.akcdn instead of flash.akamaihd.multimedia.cdn to get
# the right Content-Length header and the mp4 format
video_url = (
'http://mvod.akcdn.rtve.es/{0}&v=2.6.8'
'&fp=MAC%2016,0,0,296&r=MRUGG&g=OEOJWFXNFGCP'.format(video_path)
)
video_url = compat_urlparse.urljoin(
'http://mvod1.akcdn.rtve.es/', video_path)
subtitles = None
if info.get('sbtFile') is not None:
subtitles = self.extract_subtitles(video_id, info['sbtFile'])
return {
'id': video_id,
@ -109,8 +114,18 @@ class RTVEALaCartaIE(InfoExtractor):
'url': video_url,
'thumbnail': info.get('image'),
'page_url': url,
'subtitles': subtitles,
'duration': float_or_none(info.get('duration'), scale=1000),
}
def _get_subtitles(self, video_id, sub_file):
subs = self._download_json(
sub_file + '.json', video_id,
'Downloading subtitles info')['page']['items']
return dict(
(s['lang'], [{'ext': 'vtt', 'url': s['src']}])
for s in subs)
class RTVELiveIE(InfoExtractor):
IE_NAME = 'rtve.es:live'

View File

@ -7,6 +7,7 @@ from .common import InfoExtractor
class SoundgasmIE(InfoExtractor):
IE_NAME = 'soundgasm'
_VALID_URL = r'https?://(?:www\.)?soundgasm\.net/u/(?P<user>[0-9a-zA-Z_\-]+)/(?P<title>[0-9a-zA-Z_\-]+)'
_TEST = {
'url': 'http://soundgasm.net/u/ytdl/Piano-sample',
@ -38,3 +39,26 @@ class SoundgasmIE(InfoExtractor):
'title': audio_title,
'description': description
}
class SoundgasmProfileIE(InfoExtractor):
IE_NAME = 'soundgasm:profile'
_VALID_URL = r'https?://(?:www\.)?soundgasm\.net/u/(?P<id>[^/]+)/?(?:\#.*)?$'
_TEST = {
'url': 'http://soundgasm.net/u/ytdl',
'info_dict': {
'id': 'ytdl',
},
'playlist_count': 1,
}
def _real_extract(self, url):
profile_id = self._match_id(url)
webpage = self._download_webpage(url, profile_id)
entries = [
self.url_result(audio_url, 'Soundgasm')
for audio_url in re.findall(r'href="([^"]+/u/%s/[^"]+)' % profile_id, webpage)]
return self.playlist_result(entries, profile_id)

View File

@ -1,99 +0,0 @@
from __future__ import unicode_literals
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
ExtractorError,
)
class SubtitlesInfoExtractor(InfoExtractor):
@property
def _have_to_download_any_subtitles(self):
return any([self._downloader.params.get('writesubtitles', False),
self._downloader.params.get('writeautomaticsub')])
def _list_available_subtitles(self, video_id, webpage):
""" outputs the available subtitles for the video """
sub_lang_list = self._get_available_subtitles(video_id, webpage)
auto_captions_list = self._get_available_automatic_caption(video_id, webpage)
sub_lang = ",".join(list(sub_lang_list.keys()))
self.to_screen('%s: Available subtitles for video: %s' %
(video_id, sub_lang))
auto_lang = ",".join(auto_captions_list.keys())
self.to_screen('%s: Available automatic captions for video: %s' %
(video_id, auto_lang))
def extract_subtitles(self, video_id, webpage):
"""
returns {sub_lang: sub} ,{} if subtitles not found or None if the
subtitles aren't requested.
"""
if not self._have_to_download_any_subtitles:
return None
available_subs_list = {}
if self._downloader.params.get('writeautomaticsub', False):
available_subs_list.update(self._get_available_automatic_caption(video_id, webpage))
if self._downloader.params.get('writesubtitles', False):
available_subs_list.update(self._get_available_subtitles(video_id, webpage))
if not available_subs_list: # error, it didn't get the available subtitles
return {}
if self._downloader.params.get('allsubtitles', False):
sub_lang_list = available_subs_list
else:
if self._downloader.params.get('subtitleslangs', False):
requested_langs = self._downloader.params.get('subtitleslangs')
elif 'en' in available_subs_list:
requested_langs = ['en']
else:
requested_langs = [list(available_subs_list.keys())[0]]
sub_lang_list = {}
for sub_lang in requested_langs:
if sub_lang not in available_subs_list:
self._downloader.report_warning('no closed captions found in the specified language "%s"' % sub_lang)
continue
sub_lang_list[sub_lang] = available_subs_list[sub_lang]
subtitles = {}
for sub_lang, url in sub_lang_list.items():
subtitle = self._request_subtitle_url(sub_lang, url)
if subtitle:
subtitles[sub_lang] = subtitle
return subtitles
def _download_subtitle_url(self, sub_lang, url):
return self._download_webpage(url, None, note=False)
def _request_subtitle_url(self, sub_lang, url):
""" makes the http request for the subtitle """
try:
sub = self._download_subtitle_url(sub_lang, url)
except ExtractorError as err:
self._downloader.report_warning('unable to download video subtitles for %s: %s' % (sub_lang, compat_str(err)))
return
if not sub:
self._downloader.report_warning('Did not fetch video subtitles')
return
return sub
def _get_available_subtitles(self, video_id, webpage):
"""
returns {sub_lang: url} or {} if not available
Must be redefined by the subclasses
"""
# By default, allow implementations to simply pass in the result
assert isinstance(webpage, dict), \
'_get_available_subtitles not implemented'
return webpage
def _get_available_automatic_caption(self, video_id, webpage):
"""
returns {sub_lang: url} or {} if not available
Must be redefined by the subclasses that support automatic captions,
otherwise it will return {}
"""
self._downloader.report_warning('Automatic Captions not supported by this server')
return {}

View File

@ -1,6 +1,8 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
determine_ext,
@ -8,23 +10,40 @@ from ..utils import (
class SVTPlayIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?svtplay\.se/video/(?P<id>[0-9]+)'
_TEST = {
IE_DESC = 'SVT Play and Öppet arkiv'
_VALID_URL = r'https?://(?:www\.)?(?P<host>svtplay|oppetarkiv)\.se/video/(?P<id>[0-9]+)'
_TESTS = [{
'url': 'http://www.svtplay.se/video/2609989/sm-veckan/sm-veckan-rally-final-sasong-1-sm-veckan-rally-final',
'md5': 'f4a184968bc9c802a9b41316657aaa80',
'md5': 'ade3def0643fa1c40587a422f98edfd9',
'info_dict': {
'id': '2609989',
'ext': 'mp4',
'ext': 'flv',
'title': 'SM veckan vinter, Örebro - Rally, final',
'duration': 4500,
'thumbnail': 're:^https?://.*[\.-]jpg$',
'age_limit': 0,
},
}
}, {
'url': 'http://www.oppetarkiv.se/video/1058509/rederiet-sasong-1-avsnitt-1-av-318',
'md5': 'c3101a17ce9634f4c1f9800f0746c187',
'info_dict': {
'id': '1058509',
'ext': 'flv',
'title': 'Farlig kryssning',
'duration': 2566,
'thumbnail': 're:^https?://.*[\.-]jpg$',
'age_limit': 0,
},
'skip': 'Only works from Sweden',
}]
def _real_extract(self, url):
video_id = self._match_id(url)
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
host = mobj.group('host')
info = self._download_json(
'http://www.svtplay.se/video/%s?output=json' % video_id, video_id)
'http://www.%s.se/video/%s?output=json' % (host, video_id), video_id)
title = info['context']['title']
thumbnail = info['context'].get('thumbnailImage')
@ -33,11 +52,16 @@ class SVTPlayIE(InfoExtractor):
formats = []
for vr in video_info['videoReferences']:
vurl = vr['url']
if determine_ext(vurl) == 'm3u8':
ext = determine_ext(vurl)
if ext == 'm3u8':
formats.extend(self._extract_m3u8_formats(
vurl, video_id,
ext='mp4', entry_protocol='m3u8_native',
m3u8_id=vr.get('playerType')))
elif ext == 'f4m':
formats.extend(self._extract_f4m_formats(
vurl + '?hdcore=3.3.0', video_id,
f4m_id=vr.get('playerType')))
else:
formats.append({
'format_id': vr.get('playerType'),
@ -46,6 +70,7 @@ class SVTPlayIE(InfoExtractor):
self._sort_formats(formats)
duration = video_info.get('materialLength')
age_limit = 18 if video_info.get('inappropriateForChildren') else 0
return {
'id': video_id,
@ -53,4 +78,5 @@ class SVTPlayIE(InfoExtractor):
'formats': formats,
'thumbnail': thumbnail,
'duration': duration,
'age_limit': age_limit,
}

View File

@ -1,8 +1,10 @@
from __future__ import unicode_literals
import base64
import re
from .common import InfoExtractor
from ..utils import qualities
class TeamcocoIE(InfoExtractor):
@ -24,8 +26,8 @@ class TeamcocoIE(InfoExtractor):
'info_dict': {
'id': '19705',
'ext': 'mp4',
"description": "Louis C.K. got starstruck by George W. Bush, so what? Part one.",
"title": "Louis C.K. Interview Pt. 1 11/3/11",
'description': 'Louis C.K. got starstruck by George W. Bush, so what? Part one.',
'title': 'Louis C.K. Interview Pt. 1 11/3/11',
'age_limit': 0,
}
}
@ -42,42 +44,39 @@ class TeamcocoIE(InfoExtractor):
display_id = mobj.group('display_id')
webpage = self._download_webpage(url, display_id)
video_id = mobj.group("video_id")
video_id = mobj.group('video_id')
if not video_id:
video_id = self._html_search_regex(
self._VIDEO_ID_REGEXES, webpage, 'video id')
data_url = 'http://teamcoco.com/cvp/2.0/%s.xml' % video_id
data = self._download_xml(
data_url, display_id, 'Downloading data webpage')
embed_url = 'http://teamcoco.com/embed/v/%s' % video_id
embed = self._download_webpage(
embed_url, video_id, 'Downloading embed page')
encoded_data = self._search_regex(
r'"preload"\s*:\s*"([^"]+)"', embed, 'encoded data')
data = self._parse_json(
base64.b64decode(encoded_data.encode('ascii')).decode('utf-8'), video_id)
qualities = ['500k', '480p', '1000k', '720p', '1080p']
formats = []
for filed in data.findall('files/file'):
if filed.attrib.get('playmode') == 'all':
# it just duplicates one of the entries
break
file_url = filed.text
m_format = re.search(r'(\d+(k|p))\.mp4', file_url)
get_quality = qualities(['500k', '480p', '1000k', '720p', '1080p'])
for filed in data['files']:
m_format = re.search(r'(\d+(k|p))\.mp4', filed['url'])
if m_format is not None:
format_id = m_format.group(1)
else:
format_id = filed.attrib['bitrate']
format_id = filed['bitrate']
tbr = (
int(filed.attrib['bitrate'])
if filed.attrib['bitrate'].isdigit()
int(filed['bitrate'])
if filed['bitrate'].isdigit()
else None)
try:
quality = qualities.index(format_id)
except ValueError:
quality = -1
formats.append({
'url': file_url,
'url': filed['url'],
'ext': 'mp4',
'tbr': tbr,
'format_id': format_id,
'quality': quality,
'quality': get_quality(format_id),
})
self._sort_formats(formats)
@ -86,8 +85,8 @@ class TeamcocoIE(InfoExtractor):
'id': video_id,
'display_id': display_id,
'formats': formats,
'title': self._og_search_title(webpage),
'thumbnail': self._og_search_thumbnail(webpage),
'description': self._og_search_description(webpage),
'title': data['title'],
'thumbnail': data.get('thumb', {}).get('href'),
'description': data.get('teaser'),
'age_limit': self._family_friendly_search(webpage),
}

View File

@ -3,14 +3,14 @@ from __future__ import unicode_literals
import json
import re
from .subtitles import SubtitlesInfoExtractor
from .common import InfoExtractor
from ..compat import (
compat_str,
)
class TEDIE(SubtitlesInfoExtractor):
class TEDIE(InfoExtractor):
_VALID_URL = r'''(?x)
(?P<proto>https?://)
(?P<type>www|embed(?:-ssl)?)(?P<urlmain>\.ted\.com/
@ -184,11 +184,6 @@ class TEDIE(SubtitlesInfoExtractor):
self._sort_formats(formats)
video_id = compat_str(talk_info['id'])
# subtitles
video_subtitles = self.extract_subtitles(video_id, talk_info)
if self._downloader.params.get('listsubtitles', False):
self._list_available_subtitles(video_id, talk_info)
return
thumbnail = talk_info['thumb']
if not thumbnail.startswith('http'):
@ -199,21 +194,25 @@ class TEDIE(SubtitlesInfoExtractor):
'uploader': talk_info['speaker'],
'thumbnail': thumbnail,
'description': self._og_search_description(webpage),
'subtitles': video_subtitles,
'subtitles': self._get_subtitles(video_id, talk_info),
'formats': formats,
'duration': talk_info.get('duration'),
}
def _get_available_subtitles(self, video_id, talk_info):
def _get_subtitles(self, video_id, talk_info):
languages = [lang['languageCode'] for lang in talk_info.get('languages', [])]
if languages:
sub_lang_list = {}
for l in languages:
url = 'http://www.ted.com/talks/subtitles/id/%s/lang/%s/format/srt' % (video_id, l)
sub_lang_list[l] = url
sub_lang_list[l] = [
{
'url': 'http://www.ted.com/talks/subtitles/id/%s/lang/%s/format/%s' % (video_id, l, ext),
'ext': ext,
}
for ext in ['ted', 'srt']
]
return sub_lang_list
else:
self._downloader.report_warning('video doesn\'t have subtitles')
return {}
def _watch_info(self, url, name):

View File

@ -6,9 +6,9 @@ from .mitele import MiTeleIE
class TelecincoIE(MiTeleIE):
IE_NAME = 'telecinco.es'
_VALID_URL = r'https?://www\.telecinco\.es/[^/]+/[^/]+/[^/]+/(?P<id>.*?)\.html'
_VALID_URL = r'https?://www\.telecinco\.es/[^/]+/[^/]+/(?:[^/]+/)?(?P<id>.*?)\.html'
_TEST = {
_TESTS = [{
'url': 'http://www.telecinco.es/robinfood/temporada-01/t01xp14/Bacalao-cocochas-pil-pil_0_1876350223.html',
'info_dict': {
'id': 'MDSVID20141015_0058',
@ -16,4 +16,7 @@ class TelecincoIE(MiTeleIE):
'title': 'Con Martín Berasategui, hacer un bacalao al ...',
'duration': 662,
},
}
}, {
'url': 'http://www.telecinco.es/informativos/nacional/Pablo_Iglesias-Informativos_Telecinco-entrevista-Pedro_Piqueras_2_1945155182.html',
'only_matching': True,
}]

View File

@ -8,7 +8,7 @@ import binascii
import hashlib
from .subtitles import SubtitlesInfoExtractor
from .common import InfoExtractor
from ..compat import (
compat_str,
)
@ -22,7 +22,7 @@ from ..utils import (
_x = lambda p: xpath_with_ns(p, {'smil': 'http://www.w3.org/2005/SMIL21/Language'})
class ThePlatformIE(SubtitlesInfoExtractor):
class ThePlatformIE(InfoExtractor):
_VALID_URL = r'''(?x)
(?:https?://(?:link|player)\.theplatform\.com/[sp]/(?P<provider_id>[^/]+)/
(?P<config>(?:[^/\?]+/(?:swf|config)|onsite)/select/)?
@ -106,15 +106,11 @@ class ThePlatformIE(SubtitlesInfoExtractor):
captions = info.get('captions')
if isinstance(captions, list):
for caption in captions:
lang, src = caption.get('lang'), caption.get('src')
if lang and src:
subtitles[lang] = src
if self._downloader.params.get('listsubtitles', False):
self._list_available_subtitles(video_id, subtitles)
return
subtitles = self.extract_subtitles(video_id, subtitles)
lang, src, mime = caption.get('lang', 'en'), caption.get('src'), caption.get('type')
subtitles[lang] = [{
'ext': 'srt' if mime == 'text/srt' else 'ttml',
'url': src,
}]
head = meta.find(_x('smil:head'))
body = meta.find(_x('smil:body'))

View File

@ -34,7 +34,15 @@ class TwitchBaseIE(InfoExtractor):
expected=True)
def _download_json(self, url, video_id, note='Downloading JSON metadata'):
response = super(TwitchBaseIE, self)._download_json(url, video_id, note)
headers = {
'Referer': 'http://api.twitch.tv/crossdomain/receiver.html?v=2',
'X-Requested-With': 'XMLHttpRequest',
}
for cookie in self._downloader.cookiejar:
if cookie.name == 'api_token':
headers['Twitch-Api-Token'] = cookie.value
request = compat_urllib_request.Request(url, headers=headers)
response = super(TwitchBaseIE, self)._download_json(request, video_id, note)
self._handle_error(response)
return response

View File

@ -2,16 +2,17 @@ from __future__ import unicode_literals
import re
from ..compat import compat_urlparse
from ..utils import (
ExtractorError,
unescapeHTML,
unified_strdate,
US_RATINGS,
)
from .subtitles import SubtitlesInfoExtractor
from .common import InfoExtractor
class VikiIE(SubtitlesInfoExtractor):
class VikiIE(InfoExtractor):
IE_NAME = 'viki'
_VALID_URL = r'^https?://(?:www\.)?viki\.com/videos/(?P<id>[0-9]+v)'
@ -69,9 +70,6 @@ class VikiIE(SubtitlesInfoExtractor):
# subtitles
video_subtitles = self.extract_subtitles(video_id, info_webpage)
if self._downloader.params.get('listsubtitles', False):
self._list_available_subtitles(video_id, info_webpage)
return
return {
'id': video_id,
@ -85,12 +83,15 @@ class VikiIE(SubtitlesInfoExtractor):
'upload_date': upload_date,
}
def _get_available_subtitles(self, video_id, info_webpage):
def _get_subtitles(self, video_id, info_webpage):
res = {}
for sturl_html in re.findall(r'<track src="([^"]+)"/>', info_webpage):
for sturl_html in re.findall(r'<track src="([^"]+)"', info_webpage):
sturl = unescapeHTML(sturl_html)
m = re.search(r'/(?P<lang>[a-z]+)\.vtt', sturl)
if not m:
continue
res[m.group('lang')] = sturl
res[m.group('lang')] = [{
'url': compat_urlparse.urljoin('http://www.viki.com', sturl),
'ext': 'vtt',
}]
return res

View File

@ -4,9 +4,9 @@ from __future__ import unicode_literals
import json
import re
import itertools
import hashlib
from .common import InfoExtractor
from .subtitles import SubtitlesInfoExtractor
from ..compat import (
compat_HTTPError,
compat_urllib_parse,
@ -52,7 +52,7 @@ class VimeoBaseInfoExtractor(InfoExtractor):
self._download_webpage(login_request, None, False, 'Wrong login info')
class VimeoIE(VimeoBaseInfoExtractor, SubtitlesInfoExtractor):
class VimeoIE(VimeoBaseInfoExtractor):
"""Information extractor for vimeo.com."""
# _VALID_URL matches Vimeo URLs
@ -225,6 +225,11 @@ class VimeoIE(VimeoBaseInfoExtractor, SubtitlesInfoExtractor):
if mobj.group('pro') or mobj.group('player'):
url = 'http://player.vimeo.com/video/' + video_id
password = self._downloader.params.get('videopassword', None)
if password:
headers['Cookie'] = '%s_password=%s' % (
video_id, hashlib.md5(password.encode('utf-8')).hexdigest())
# Retrieve video webpage to extract further information
request = compat_urllib_request.Request(url, None, headers)
try:
@ -372,12 +377,10 @@ class VimeoIE(VimeoBaseInfoExtractor, SubtitlesInfoExtractor):
text_tracks = config['request'].get('text_tracks')
if text_tracks:
for tt in text_tracks:
subtitles[tt['lang']] = 'http://vimeo.com' + tt['url']
video_subtitles = self.extract_subtitles(video_id, subtitles)
if self._downloader.params.get('listsubtitles', False):
self._list_available_subtitles(video_id, subtitles)
return
subtitles[tt['lang']] = [{
'ext': 'vtt',
'url': 'http://vimeo.com' + tt['url'],
}]
return {
'id': video_id,
@ -393,7 +396,7 @@ class VimeoIE(VimeoBaseInfoExtractor, SubtitlesInfoExtractor):
'view_count': view_count,
'like_count': like_count,
'comment_count': comment_count,
'subtitles': video_subtitles,
'subtitles': subtitles,
}

View File

@ -3,14 +3,14 @@ from __future__ import unicode_literals
import re
from .subtitles import SubtitlesInfoExtractor
from .common import InfoExtractor
from ..utils import (
xpath_text,
int_or_none,
)
class WallaIE(SubtitlesInfoExtractor):
class WallaIE(InfoExtractor):
_VALID_URL = r'http://vod\.walla\.co\.il/[^/]+/(?P<id>\d+)/(?P<display_id>.+)'
_TEST = {
'url': 'http://vod.walla.co.il/movie/2642630/one-direction-all-for-one',
@ -52,13 +52,10 @@ class WallaIE(SubtitlesInfoExtractor):
subtitles = {}
for subtitle in item.findall('./subtitles/subtitle'):
lang = xpath_text(subtitle, './title')
subtitles[self._SUBTITLE_LANGS.get(lang, lang)] = xpath_text(subtitle, './src')
if self._downloader.params.get('listsubtitles', False):
self._list_available_subtitles(video_id, subtitles)
return
subtitles = self.extract_subtitles(video_id, subtitles)
subtitles[self._SUBTITLE_LANGS.get(lang, lang)] = [{
'ext': 'srt',
'url': xpath_text(subtitle, './src'),
}]
formats = []
for quality in item.findall('./qualities/quality'):

View File

@ -28,6 +28,7 @@ class WDRIE(InfoExtractor):
'title': 'Servicezeit',
'description': 'md5:c8f43e5e815eeb54d0b96df2fba906cb',
'upload_date': '20140310',
'is_live': False
},
'params': {
'skip_download': True,
@ -41,6 +42,7 @@ class WDRIE(InfoExtractor):
'title': 'Marga Spiegel ist tot',
'description': 'md5:2309992a6716c347891c045be50992e4',
'upload_date': '20140311',
'is_live': False
},
'params': {
'skip_download': True,
@ -55,6 +57,7 @@ class WDRIE(InfoExtractor):
'title': 'Erlebte Geschichten: Marga Spiegel (29.11.2009)',
'description': 'md5:2309992a6716c347891c045be50992e4',
'upload_date': '20091129',
'is_live': False
},
},
{
@ -66,6 +69,7 @@ class WDRIE(InfoExtractor):
'title': 'Flavia Coelho: Amar é Amar',
'description': 'md5:7b29e97e10dfb6e265238b32fa35b23a',
'upload_date': '20140717',
'is_live': False
},
},
{
@ -74,6 +78,20 @@ class WDRIE(InfoExtractor):
'info_dict': {
'id': 'mediathek/video/sendungen/quarks_und_co/filterseite-quarks-und-co100',
}
},
{
'url': 'http://www1.wdr.de/mediathek/video/livestream/index.html',
'info_dict': {
'id': 'mdb-103364',
'title': 're:^WDR Fernsehen [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
'description': 'md5:ae2ff888510623bf8d4b115f95a9b7c9',
'ext': 'flv',
'upload_date': '20150212',
'is_live': True
},
'params': {
'skip_download': True,
},
}
]
@ -119,6 +137,10 @@ class WDRIE(InfoExtractor):
video_url = flashvars['dslSrc'][0]
title = flashvars['trackerClipTitle'][0]
thumbnail = flashvars['startPicture'][0] if 'startPicture' in flashvars else None
is_live = flashvars.get('isLive', ['0'])[0] == '1'
if is_live:
title = self._live_title(title)
if 'trackerClipAirTime' in flashvars:
upload_date = flashvars['trackerClipAirTime'][0]
@ -131,6 +153,13 @@ class WDRIE(InfoExtractor):
if video_url.endswith('.f4m'):
video_url += '?hdcore=3.2.0&plugin=aasp-3.2.0.77.18'
ext = 'flv'
elif video_url.endswith('.smil'):
fmt = self._extract_smil_formats(video_url, page_id)[0]
video_url = fmt['url']
sep = '&' if '?' in video_url else '?'
video_url += sep
video_url += 'hdcore=3.3.0&plugin=aasp-3.3.0.99.43'
ext = fmt['ext']
else:
ext = determine_ext(video_url)
@ -144,6 +173,7 @@ class WDRIE(InfoExtractor):
'description': description,
'thumbnail': thumbnail,
'upload_date': upload_date,
'is_live': is_live
}

View File

@ -11,7 +11,6 @@ import time
import traceback
from .common import InfoExtractor, SearchInfoExtractor
from .subtitles import SubtitlesInfoExtractor
from ..jsinterp import JSInterpreter
from ..swfinterp import SWFInterpreter
from ..compat import (
@ -185,7 +184,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
return
class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
class YoutubeIE(YoutubeBaseInfoExtractor):
IE_DESC = 'YouTube.com'
_VALID_URL = r"""(?x)^
(
@ -648,7 +647,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
raise ExtractorError(
'Signature extraction failed: ' + tb, cause=e)
def _get_available_subtitles(self, video_id, webpage):
def _get_subtitles(self, video_id, webpage):
try:
subs_doc = self._download_xml(
'https://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id,
@ -662,23 +661,27 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
lang = track.attrib['lang_code']
if lang in sub_lang_list:
continue
params = compat_urllib_parse.urlencode({
'lang': lang,
'v': video_id,
'fmt': self._downloader.params.get('subtitlesformat', 'srt'),
'name': track.attrib['name'].encode('utf-8'),
})
url = 'https://www.youtube.com/api/timedtext?' + params
sub_lang_list[lang] = url
sub_formats = []
for ext in ['sbv', 'vtt', 'srt']:
params = compat_urllib_parse.urlencode({
'lang': lang,
'v': video_id,
'fmt': ext,
'name': track.attrib['name'].encode('utf-8'),
})
sub_formats.append({
'url': 'https://www.youtube.com/api/timedtext?' + params,
'ext': ext,
})
sub_lang_list[lang] = sub_formats
if not sub_lang_list:
self._downloader.report_warning('video doesn\'t have subtitles')
return {}
return sub_lang_list
def _get_available_automatic_caption(self, video_id, webpage):
def _get_automatic_captions(self, video_id, webpage):
"""We need the webpage for getting the captions url, pass it as an
argument to speed up the process."""
sub_format = self._downloader.params.get('subtitlesformat', 'srt')
self.to_screen('%s: Looking for automatic captions' % video_id)
mobj = re.search(r';ytplayer.config = ({.*?});', webpage)
err_msg = 'Couldn\'t find automatic captions for %s' % video_id
@ -708,14 +711,20 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
sub_lang_list = {}
for lang_node in caption_list.findall('target'):
sub_lang = lang_node.attrib['lang_code']
params = compat_urllib_parse.urlencode({
'lang': original_lang,
'tlang': sub_lang,
'fmt': sub_format,
'ts': timestamp,
'kind': caption_kind,
})
sub_lang_list[sub_lang] = caption_url + '&' + params
sub_formats = []
for ext in ['sbv', 'vtt', 'srt']:
params = compat_urllib_parse.urlencode({
'lang': original_lang,
'tlang': sub_lang,
'fmt': ext,
'ts': timestamp,
'kind': caption_kind,
})
sub_formats.append({
'url': caption_url + '&' + params,
'ext': ext,
})
sub_lang_list[sub_lang] = sub_formats
return sub_lang_list
# An extractor error can be raise by the download process if there are
# no automatic captions but there are subtitles
@ -970,10 +979,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
# subtitles
video_subtitles = self.extract_subtitles(video_id, video_webpage)
if self._downloader.params.get('listsubtitles', False):
self._list_available_subtitles(video_id, video_webpage)
return
automatic_captions = self.extract_automatic_captions(video_id, video_webpage)
if 'length_seconds' not in video_info:
self._downloader.report_warning('unable to extract video duration')
@ -1122,6 +1128,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
'description': video_description,
'categories': video_categories,
'subtitles': video_subtitles,
'automatic_captions': automatic_captions,
'duration': video_duration,
'age_limit': 18 if age_gate else 0,
'annotations': video_annotations,
@ -1146,13 +1153,13 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
| p/
)
(
(?:PL|LL|EC|UU|FL|RD)?[0-9A-Za-z-_]{10,}
(?:PL|LL|EC|UU|FL|RD|UL)?[0-9A-Za-z-_]{10,}
# Top tracks, they can also include dots
|(?:MC)[\w\.]*
)
.*
|
((?:PL|LL|EC|UU|FL|RD)[0-9A-Za-z-_]{10,})
((?:PL|LL|EC|UU|FL|RD|UL)[0-9A-Za-z-_]{10,})
)"""
_TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s'
_VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})&amp;[^"]*?index=(?P<index>\d+)'
@ -1237,7 +1244,7 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
for vid_id in ids]
def _extract_mix(self, playlist_id):
# The mixes are generated from a a single video
# The mixes are generated from a single video
# the id of the playlist is just 'RD' + video_id
url = 'https://youtube.com/watch?v=%s&list=%s' % (playlist_id[-11:], playlist_id)
webpage = self._download_webpage(
@ -1273,7 +1280,7 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
else:
self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))
if playlist_id.startswith('RD'):
if playlist_id.startswith('RD') or playlist_id.startswith('UL'):
# Mixes require a custom extraction process
return self._extract_mix(playlist_id)

View File

@ -0,0 +1,110 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
parse_duration,
parse_iso8601,
xpath_with_ns,
xpath_text,
int_or_none,
)
class ZapiksIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?zapiks\.(?:fr|com)/(?:(?:[a-z]{2}/)?(?P<display_id>.+?)\.html|index\.php\?.*\bmedia_id=(?P<id>\d+))'
_TESTS = [
{
'url': 'http://www.zapiks.fr/ep2s3-bon-appetit-eh-be-viva.html',
'md5': 'aeb3c473b2d564b2d46d664d28d5f050',
'info_dict': {
'id': '80798',
'ext': 'mp4',
'title': 'EP2S3 - Bon Appétit - Eh bé viva les pyrénées con!',
'description': 'md5:7054d6f6f620c6519be1fe710d4da847',
'thumbnail': 're:^https?://.*\.jpg$',
'duration': 528,
'timestamp': 1359044972,
'upload_date': '20130124',
'view_count': int,
'comment_count': int,
},
},
{
'url': 'http://www.zapiks.com/ep3s5-bon-appetit-baqueira-m-1.html',
'only_matching': True,
},
{
'url': 'http://www.zapiks.com/nl/ep3s5-bon-appetit-baqueira-m-1.html',
'only_matching': True,
},
{
'url': 'http://www.zapiks.fr/index.php?action=playerIframe&amp;media_id=118046&amp;width=640&amp;height=360&amp;autoStart=false&amp;language=fr',
'only_matching': True,
},
]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
display_id = mobj.group('display_id') or video_id
webpage = self._download_webpage(url, display_id)
if not video_id:
video_id = self._search_regex(
r'data-media-id="(\d+)"', webpage, 'video id')
playlist = self._download_xml(
'http://www.zapiks.fr/view/index.php?action=playlist&media_id=%s&lang=en' % video_id,
display_id)
NS_MAP = {
'jwplayer': 'http://rss.jwpcdn.com/'
}
def ns(path):
return xpath_with_ns(path, NS_MAP)
item = playlist.find('./channel/item')
title = xpath_text(item, 'title', 'title') or self._og_search_title(webpage)
description = self._og_search_description(webpage, default=None)
thumbnail = xpath_text(
item, ns('./jwplayer:image'), 'thumbnail') or self._og_search_thumbnail(webpage, default=None)
duration = parse_duration(self._html_search_meta(
'duration', webpage, 'duration', default=None))
timestamp = parse_iso8601(self._html_search_meta(
'uploadDate', webpage, 'upload date', default=None), ' ')
view_count = int_or_none(self._search_regex(
r'UserPlays:(\d+)', webpage, 'view count', default=None))
comment_count = int_or_none(self._search_regex(
r'UserComments:(\d+)', webpage, 'comment count', default=None))
formats = []
for source in item.findall(ns('./jwplayer:source')):
format_id = source.attrib['label']
f = {
'url': source.attrib['file'],
'format_id': format_id,
}
m = re.search(r'^(?P<height>\d+)[pP]', format_id)
if m:
f['height'] = int(m.group('height'))
formats.append(f)
self._sort_formats(formats)
return {
'id': video_id,
'title': title,
'description': description,
'thumbnail': thumbnail,
'duration': duration,
'timestamp': timestamp,
'view_count': view_count,
'comment_count': comment_count,
'formats': formats,
}

View File

@ -8,11 +8,11 @@ import sys
from .downloader.external import list_external_downloaders
from .compat import (
compat_expanduser,
compat_get_terminal_size,
compat_getenv,
compat_kwargs,
)
from .utils import (
get_term_width,
write_string,
)
from .version import __version__
@ -100,7 +100,7 @@ def parseOpts(overrideArguments=None):
return opts
# No need to wrap help messages if we're on a wide console
columns = get_term_width()
columns = compat_get_terminal_size().columns
max_width = columns if columns else 80
max_help_position = 80
@ -272,6 +272,10 @@ def parseOpts(overrideArguments=None):
'--no-playlist',
action='store_true', dest='noplaylist', default=False,
help='If the URL refers to a video and a playlist, download only the video.')
selection.add_option(
'--yes-playlist',
action='store_false', dest='noplaylist', default=False,
help='If the URL refers to a video and a playlist, download the playlist.')
selection.add_option(
'--age-limit',
metavar='YEARS', dest='age_limit', default=None, type=int,
@ -387,8 +391,8 @@ def parseOpts(overrideArguments=None):
help='lists all available subtitles for the video')
subtitles.add_option(
'--sub-format',
action='store', dest='subtitlesformat', metavar='FORMAT', default='srt',
help='subtitle format (default=srt) ([sbv/vtt] youtube only)')
action='store', dest='subtitlesformat', metavar='FORMAT', default='best',
help='subtitle format, accepts formats preference, for example: "ass/srt/best"')
subtitles.add_option(
'--sub-lang', '--sub-langs', '--srt-lang',
action='callback', dest='subtitleslangs', metavar='LANGS', type='str',
@ -751,6 +755,10 @@ def parseOpts(overrideArguments=None):
'--exec',
metavar='CMD', dest='exec_cmd',
help='Execute a command on the file after downloading, similar to find\'s -exec syntax. Example: --exec \'adb push {} /sdcard/Music/ && rm {}\'')
postproc.add_option(
'--convert-subtitles', '--convert-subs',
metavar='FORMAT', dest='convertsubtitles', default=None,
help='Convert the subtitles to other format (currently supported: srt|ass|vtt)')
parser.add_option_group(general)
parser.add_option_group(network)

View File

@ -12,6 +12,7 @@ from .ffmpeg import (
FFmpegMergerPP,
FFmpegMetadataPP,
FFmpegVideoConvertorPP,
FFmpegSubtitlesConvertorPP,
)
from .xattrpp import XAttrMetadataPP
from .execafterdownload import ExecAfterDownloadPP
@ -33,6 +34,7 @@ __all__ = [
'FFmpegMergerPP',
'FFmpegMetadataPP',
'FFmpegPostProcessor',
'FFmpegSubtitlesConvertorPP',
'FFmpegVideoConvertorPP',
'XAttrMetadataPP',
]

View File

@ -1,5 +1,6 @@
from __future__ import unicode_literals
import io
import os
import subprocess
import sys
@ -500,10 +501,6 @@ class FFmpegEmbedSubtitlePP(FFmpegPostProcessor):
'zu': 'zul',
}
def __init__(self, downloader=None, subtitlesformat='srt'):
super(FFmpegEmbedSubtitlePP, self).__init__(downloader)
self._subformat = subtitlesformat
@classmethod
def _conver_lang_code(cls, code):
"""Convert language code from ISO 639-1 to ISO 639-2/T"""
@ -513,13 +510,14 @@ class FFmpegEmbedSubtitlePP(FFmpegPostProcessor):
if information['ext'] != 'mp4':
self._downloader.to_screen('[ffmpeg] Subtitles can only be embedded in mp4 files')
return True, information
if not information.get('subtitles'):
subtitles = information.get('requested_subtitles')
if not subtitles:
self._downloader.to_screen('[ffmpeg] There aren\'t any subtitles to embed')
return True, information
sub_langs = [key for key in information['subtitles']]
sub_langs = list(subtitles.keys())
filename = information['filepath']
input_files = [filename] + [subtitles_filename(filename, lang, self._subformat) for lang in sub_langs]
input_files = [filename] + [subtitles_filename(filename, lang, sub_info['ext']) for lang, sub_info in subtitles.items()]
opts = [
'-map', '0',
@ -666,3 +664,40 @@ class FFmpegFixupM4aPP(FFmpegPostProcessor):
os.rename(encodeFilename(temp_filename), encodeFilename(filename))
return True, info
class FFmpegSubtitlesConvertorPP(FFmpegPostProcessor):
def __init__(self, downloader=None, format=None):
super(FFmpegSubtitlesConvertorPP, self).__init__(downloader)
self.format = format
def run(self, info):
subs = info.get('requested_subtitles')
filename = info['filepath']
new_ext = self.format
new_format = new_ext
if new_format == 'vtt':
new_format = 'webvtt'
if subs is None:
self._downloader.to_screen('[ffmpeg] There aren\'t any subtitles to convert')
return True, info
self._downloader.to_screen('[ffmpeg] Converting subtitles')
for lang, sub in subs.items():
ext = sub['ext']
if ext == new_ext:
self._downloader.to_screen(
'[ffmpeg] Subtitle file for %s is already in the requested'
'format' % new_ext)
continue
new_file = subtitles_filename(filename, lang, new_ext)
self.run_ffmpeg(
subtitles_filename(filename, lang, ext),
new_file, ['-f', new_format])
with io.open(new_file, 'rt', encoding='utf-8') as f:
subs[lang] = {
'ext': ext,
'data': f.read(),
}
return True, info

View File

@ -35,7 +35,6 @@ import zlib
from .compat import (
compat_basestring,
compat_chr,
compat_getenv,
compat_html_entities,
compat_http_client,
compat_parse_qs,
@ -54,7 +53,7 @@ from .compat import (
compiled_regex_type = type(re.compile(''))
std_headers = {
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20100101 Firefox/10.0 (Chrome)',
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20150101 Firefox/20.0 (Chrome)',
'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
'Accept-Encoding': 'gzip, deflate',
@ -304,6 +303,8 @@ def sanitize_filename(s, restricted=False, is_id=False):
# Common case of "Foreign band name - English song title"
if restricted and result.startswith('-_'):
result = result[2:]
if result.startswith('-'):
result = '_' + result[len('-'):]
if not result:
result = '_'
return result
@ -904,8 +905,8 @@ def _windows_write_string(s, out):
def not_a_console(handle):
if handle == INVALID_HANDLE_VALUE or handle is None:
return True
return ((GetFileType(handle) & ~FILE_TYPE_REMOTE) != FILE_TYPE_CHAR
or GetConsoleMode(handle, ctypes.byref(ctypes.wintypes.DWORD())) == 0)
return ((GetFileType(handle) & ~FILE_TYPE_REMOTE) != FILE_TYPE_CHAR or
GetConsoleMode(handle, ctypes.byref(ctypes.wintypes.DWORD())) == 0)
if not_a_console(h):
return False
@ -1175,22 +1176,6 @@ def parse_filesize(s):
return int(float(num_str) * mult)
def get_term_width():
columns = compat_getenv('COLUMNS', None)
if columns:
return int(columns)
try:
sp = subprocess.Popen(
['stty', 'size'],
stdout=subprocess.PIPE, stderr=subprocess.PIPE)
out, err = sp.communicate()
return int(out.split()[1])
except:
pass
return None
def month_by_name(name):
""" Return the number of a month by (locale-independently) English name """
@ -1292,6 +1277,7 @@ def parse_duration(s):
(?P<only_mins>[0-9.]+)\s*(?:mins?|minutes?)\s*|
(?P<only_hours>[0-9.]+)\s*(?:hours?)|
\s*(?P<hours_reversed>[0-9]+)\s*(?:[:h]|hours?)\s*(?P<mins_reversed>[0-9]+)\s*(?:[:m]|mins?|minutes?)\s*|
(?:
(?:
(?:(?P<days>[0-9]+)\s*(?:[:d]|days?)\s*)?
@ -1310,10 +1296,14 @@ def parse_duration(s):
return float_or_none(m.group('only_hours'), invscale=60 * 60)
if m.group('secs'):
res += int(m.group('secs'))
if m.group('mins_reversed'):
res += int(m.group('mins_reversed')) * 60
if m.group('mins'):
res += int(m.group('mins')) * 60
if m.group('hours'):
res += int(m.group('hours')) * 60 * 60
if m.group('hours_reversed'):
res += int(m.group('hours_reversed')) * 60 * 60
if m.group('days'):
res += int(m.group('days')) * 24 * 60 * 60
if m.group('ms'):

View File

@ -1,3 +1,3 @@
from __future__ import unicode_literals
__version__ = '2015.02.20'
__version__ = '2015.02.28'