diff --git a/README.md b/README.md index 2bea609bf..bc5e0f76d 100644 --- a/README.md +++ b/README.md @@ -70,8 +70,9 @@ which means you can modify it, redistribute it or use it however you like. --default-search PREFIX Use this prefix for unqualified URLs. For example "gvsearch2:" downloads two videos from google videos for youtube-dl "large - apple". By default (with value "auto") - youtube-dl guesses. + apple". Use the value "auto" to let + youtube-dl guess. The default value "error" + just throws an error. --ignore-config Do not read configuration files. When given in the global configuration file /etc /youtube-dl.conf: do not read the user @@ -254,7 +255,7 @@ which means you can modify it, redistribute it or use it however you like. 128K (default 5) --recode-video FORMAT Encode the video to another format if necessary (currently supported: - mp4|flv|ogg|webm) + mp4|flv|ogg|webm|mkv) -k, --keep-video keeps the video file on disk after the post-processing; the video is erased by default diff --git a/test/test_all_urls.py b/test/test_all_urls.py index 4b56137ce..2bc81f020 100644 --- a/test/test_all_urls.py +++ b/test/test_all_urls.py @@ -69,9 +69,6 @@ class TestAllURLsMatching(unittest.TestCase): def test_youtube_show_matching(self): self.assertMatch('http://www.youtube.com/show/airdisasters', ['youtube:show']) - def test_youtube_truncated(self): - self.assertMatch('http://www.youtube.com/watch?', ['youtube:truncated_url']) - def test_youtube_search_matching(self): self.assertMatch('http://www.youtube.com/results?search_query=making+mustard', ['youtube:search_url']) self.assertMatch('https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video', ['youtube:search_url']) diff --git a/test/test_playlists.py b/test/test_playlists.py index 71dac1b02..1a38a667b 100644 --- a/test/test_playlists.py +++ b/test/test_playlists.py @@ -28,7 +28,7 @@ from youtube_dl.extractor import ( SoundcloudSetIE, SoundcloudUserIE, SoundcloudPlaylistIE, - TeacherTubeClassroomIE, + TeacherTubeUserIE, LivestreamIE, LivestreamOriginalIE, NHLVideocenterIE, @@ -111,7 +111,7 @@ class TestPlaylists(unittest.TestCase): ie = VineUserIE(dl) result = ie.extract('https://vine.co/Visa') self.assertIsPlaylist(result) - self.assertTrue(len(result['entries']) >= 50) + self.assertTrue(len(result['entries']) >= 47) def test_ustream_channel(self): dl = FakeYDL() @@ -137,6 +137,14 @@ class TestPlaylists(unittest.TestCase): self.assertEqual(result['id'], '9615865') self.assertTrue(len(result['entries']) >= 12) + def test_soundcloud_likes(self): + dl = FakeYDL() + ie = SoundcloudUserIE(dl) + result = ie.extract('https://soundcloud.com/the-concept-band/likes') + self.assertIsPlaylist(result) + self.assertEqual(result['id'], '9615865') + self.assertTrue(len(result['entries']) >= 1) + def test_soundcloud_playlist(self): dl = FakeYDL() ie = SoundcloudPlaylistIE(dl) @@ -379,13 +387,13 @@ class TestPlaylists(unittest.TestCase): result['title'], 'Brace Yourself - Today\'s Weirdest News') self.assertTrue(len(result['entries']) >= 10) - def test_TeacherTubeClassroom(self): + def test_TeacherTubeUser(self): dl = FakeYDL() - ie = TeacherTubeClassroomIE(dl) - result = ie.extract('http://www.teachertube.com/view_classroom.php?user=rbhagwati2') + ie = TeacherTubeUserIE(dl) + result = ie.extract('http://www.teachertube.com/user/profile/rbhagwati2') self.assertIsPlaylist(result) self.assertEqual(result['id'], 'rbhagwati2') - self.assertTrue(len(result['entries']) >= 20) + self.assertTrue(len(result['entries']) >= 179) if __name__ == '__main__': unittest.main() diff --git a/test/test_subtitles.py b/test/test_subtitles.py index 5736fe581..48c302198 100644 --- a/test/test_subtitles.py +++ b/test/test_subtitles.py @@ -87,7 +87,7 @@ class TestYoutubeSubtitles(BaseTestSubtitles): def test_youtube_nosubtitles(self): self.DL.expect_warning(u'video doesn\'t have subtitles') - self.url = 'sAjKT8FhjI8' + self.url = 'n5BB19UTcdA' self.DL.params['writesubtitles'] = True self.DL.params['allsubtitles'] = True subtitles = self.getSubtitles() diff --git a/test/test_youtube_signature.py b/test/test_youtube_signature.py index 8417c55a6..8d46fe108 100644 --- a/test/test_youtube_signature.py +++ b/test/test_youtube_signature.py @@ -33,6 +33,12 @@ _TESTS = [ 90, u']\\[@?>=<;:/.-,+*)(\'&%$#"hZYXWVUTSRQPONMLKJIHGFEDCBAzyxwvutsrqponmlkjiagfedcb39876', ), + ( + u'https://s.ytimg.com/yts/jsbin/html5player-en_US-vflXGBaUN.js', + u'js', + u'2ACFC7A61CA478CD21425E5A57EBD73DDC78E22A.2094302436B2D377D14A3BBA23022D023B8BC25AA', + u'A52CB8B320D22032ABB3A41D773D2B6342034902.A22E87CDD37DBE75A5E52412DC874AC16A7CFCA2', + ), ] @@ -44,7 +50,7 @@ class TestSignature(unittest.TestCase): os.mkdir(self.TESTDATA_DIR) -def make_tfunc(url, stype, sig_length, expected_sig): +def make_tfunc(url, stype, sig_input, expected_sig): basename = url.rpartition('/')[2] m = re.match(r'.*-([a-zA-Z0-9_-]+)\.[a-z]+$', basename) assert m, '%r should follow URL format' % basename @@ -66,7 +72,9 @@ def make_tfunc(url, stype, sig_length, expected_sig): with open(fn, 'rb') as testf: swfcode = testf.read() func = ie._parse_sig_swf(swfcode) - src_sig = compat_str(string.printable[:sig_length]) + src_sig = ( + compat_str(string.printable[:sig_input]) + if isinstance(sig_input, int) else sig_input) got_sig = func(src_sig) self.assertEqual(got_sig, expected_sig) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index dc0ba986a..3dff723b8 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -993,6 +993,8 @@ class YoutubeDL(object): fd = get_suitable_downloader(info)(self, self.params) for ph in self._progress_hooks: fd.add_progress_hook(ph) + if self.params.get('verbose'): + self.to_stdout('[debug] Invoking downloader on %r' % info.get('url')) return fd.download(name, info) if info_dict.get('requested_formats') is not None: downloaded = [] diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 1e01432d2..5e16a5491 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -59,6 +59,11 @@ __authors__ = ( 'Adam Thalhammer', 'Georg Jähnig', 'Ralf Haring', + 'Koki Takahashi', + 'Ariset Llerena', + 'Adam Malcontenti-Wilson', + 'Tobias Bell', + 'Naglis Jonaitis', ) __license__ = 'Public Domain' @@ -269,7 +274,7 @@ def parseOpts(overrideArguments=None): general.add_option( '--default-search', dest='default_search', metavar='PREFIX', - help='Use this prefix for unqualified URLs. For example "gvsearch2:" downloads two videos from google videos for youtube-dl "large apple". By default (with value "auto") youtube-dl guesses.') + help='Use this prefix for unqualified URLs. For example "gvsearch2:" downloads two videos from google videos for youtube-dl "large apple". Use the value "auto" to let youtube-dl guess. The default value "error" just throws an error.') general.add_option( '--ignore-config', action='store_true', @@ -505,7 +510,7 @@ def parseOpts(overrideArguments=None): postproc.add_option('--audio-quality', metavar='QUALITY', dest='audioquality', default='5', help='ffmpeg/avconv audio quality specification, insert a value between 0 (better) and 9 (worse) for VBR or a specific bitrate like 128K (default 5)') postproc.add_option('--recode-video', metavar='FORMAT', dest='recodevideo', default=None, - help='Encode the video to another format if necessary (currently supported: mp4|flv|ogg|webm)') + help='Encode the video to another format if necessary (currently supported: mp4|flv|ogg|webm|mkv)') postproc.add_option('-k', '--keep-video', action='store_true', dest='keepvideo', default=False, help='keeps the video file on disk after the post-processing; the video is erased by default') postproc.add_option('--no-post-overwrites', action='store_true', dest='nopostoverwrites', default=False, diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index f910d1a26..e49ac3e52 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -3,6 +3,7 @@ from .addanime import AddAnimeIE from .aftonbladet import AftonbladetIE from .anitube import AnitubeIE from .aol import AolIE +from .allocine import AllocineIE from .aparat import AparatIE from .appletrailers import AppleTrailersIE from .archiveorg import ArchiveOrgIE @@ -82,6 +83,7 @@ from .extremetube import ExtremeTubeIE from .facebook import FacebookIE from .faz import FazIE from .fc2 import FC2IE +from .firedrive import FiredriveIE from .firstpost import FirstpostIE from .firsttv import FirstTVIE from .fivemin import FiveMinIE @@ -104,6 +106,7 @@ from .freesound import FreesoundIE from .freespeech import FreespeechIE from .funnyordie import FunnyOrDieIE from .gamekings import GamekingsIE +from .gameone import GameOneIE from .gamespot import GameSpotIE from .gametrailers import GametrailersIE from .gdcvault import GDCVaultIE @@ -111,6 +114,7 @@ from .generic import GenericIE from .googleplus import GooglePlusIE from .googlesearch import GoogleSearchIE from .gorillavid import GorillaVidIE +from .goshgay import GoshgayIE from .hark import HarkIE from .helsinki import HelsinkiIE from .hentaistigma import HentaiStigmaIE @@ -228,6 +232,7 @@ from .radiofrance import RadioFranceIE from .rai import RaiIE from .rbmaradio import RBMARadioIE from .redtube import RedTubeIE +from .reverbnation import ReverbNationIE from .ringtv import RingTVIE from .ro220 import Ro220IE from .rottentomatoes import RottenTomatoesIE @@ -236,6 +241,7 @@ from .rtbf import RTBFIE from .rtlnow import RTLnowIE from .rts import RTSIE from .rtve import RTVEALaCartaIE +from .ruhd import RUHDIE from .rutube import ( RutubeIE, RutubeChannelIE, @@ -245,6 +251,7 @@ from .rutube import ( from .rutv import RUTVIE from .savefrom import SaveFromIE from .scivee import SciVeeIE +from .screencast import ScreencastIE from .servingsys import ServingSysIE from .sina import SinaIE from .slideshare import SlideshareIE @@ -263,8 +270,8 @@ from .soundcloud import ( SoundcloudPlaylistIE ) from .soundgasm import SoundgasmIE -from .southparkstudios import ( - SouthParkStudiosIE, +from .southpark import ( + SouthParkIE, SouthparkDeIE, ) from .space import SpaceIE @@ -282,12 +289,13 @@ from .sztvhu import SztvHuIE from .tagesschau import TagesschauIE from .teachertube import ( TeacherTubeIE, - TeacherTubeClassroomIE, + TeacherTubeUserIE, ) from .teachingchannel import TeachingChannelIE from .teamcoco import TeamcocoIE from .techtalks import TechTalksIE from .ted import TEDIE +from .tenplay import TenPlayIE from .testurl import TestURLIE from .tf1 import TF1IE from .theplatform import ThePlatformIE @@ -335,12 +343,14 @@ from .vimeo import ( VimeoReviewIE, VimeoWatchLaterIE, ) +from .vimple import VimpleIE from .vine import ( VineIE, VineUserIE, ) from .viki import VikiIE from .vk import VKIE +from .vodlocker import VodlockerIE from .vube import VubeIE from .vuclip import VuClipIE from .vulture import VultureIE diff --git a/youtube_dl/extractor/allocine.py b/youtube_dl/extractor/allocine.py new file mode 100644 index 000000000..34f0cd49b --- /dev/null +++ b/youtube_dl/extractor/allocine.py @@ -0,0 +1,89 @@ +# -*- coding: utf-8 -*- +from __future__ import unicode_literals + +import re +import json + +from .common import InfoExtractor +from ..utils import ( + compat_str, + qualities, + determine_ext, +) + + +class AllocineIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?allocine\.fr/(?Particle|video|film)/(fichearticle_gen_carticle=|player_gen_cmedia=|fichefilm_gen_cfilm=)(?P[0-9]+)(?:\.html)?' + + _TESTS = [{ + 'url': 'http://www.allocine.fr/article/fichearticle_gen_carticle=18635087.html', + 'md5': '0c9fcf59a841f65635fa300ac43d8269', + 'info_dict': { + 'id': '19546517', + 'ext': 'mp4', + 'title': 'Astérix - Le Domaine des Dieux Teaser VF', + 'description': 'md5:4a754271d9c6f16c72629a8a993ee884', + 'thumbnail': 're:http://.*\.jpg', + }, + }, { + 'url': 'http://www.allocine.fr/video/player_gen_cmedia=19540403&cfilm=222257.html', + 'md5': 'd0cdce5d2b9522ce279fdfec07ff16e0', + 'info_dict': { + 'id': '19540403', + 'ext': 'mp4', + 'title': 'Planes 2 Bande-annonce VF', + 'description': 'md5:c4b1f7bd682a91de6491ada267ec0f4d', + 'thumbnail': 're:http://.*\.jpg', + }, + }, { + 'url': 'http://www.allocine.fr/film/fichefilm_gen_cfilm=181290.html', + 'md5': '101250fb127ef9ca3d73186ff22a47ce', + 'info_dict': { + 'id': '19544709', + 'ext': 'mp4', + 'title': 'Dragons 2 - Bande annonce finale VF', + 'description': 'md5:e74a4dc750894bac300ece46c7036490', + 'thumbnail': 're:http://.*\.jpg', + }, + }] + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + typ = mobj.group('typ') + display_id = mobj.group('id') + + webpage = self._download_webpage(url, display_id) + + if typ == 'film': + video_id = self._search_regex(r'href="/video/player_gen_cmedia=([0-9]+).+"', webpage, 'video id') + else: + player = self._search_regex(r'data-player=\'([^\']+)\'>', webpage, 'data player') + + player_data = json.loads(player) + video_id = compat_str(player_data['refMedia']) + + xml = self._download_xml('http://www.allocine.fr/ws/AcVisiondataV4.ashx?media=%s' % video_id, display_id) + + video = xml.find('.//AcVisionVideo').attrib + quality = qualities(['ld', 'md', 'hd']) + + formats = [] + for k, v in video.items(): + if re.match(r'.+_path', k): + format_id = k.split('_')[0] + formats.append({ + 'format_id': format_id, + 'quality': quality(format_id), + 'url': v, + 'ext': determine_ext(v), + }) + + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': video['videoTitle'], + 'thumbnail': self._og_search_thumbnail(webpage), + 'formats': formats, + 'description': self._og_search_description(webpage), + } diff --git a/youtube_dl/extractor/anitube.py b/youtube_dl/extractor/anitube.py index 2b019daa9..31f0d417c 100644 --- a/youtube_dl/extractor/anitube.py +++ b/youtube_dl/extractor/anitube.py @@ -1,22 +1,24 @@ +from __future__ import unicode_literals + import re from .common import InfoExtractor class AnitubeIE(InfoExtractor): - IE_NAME = u'anitube.se' + IE_NAME = 'anitube.se' _VALID_URL = r'https?://(?:www\.)?anitube\.se/video/(?P\d+)' _TEST = { - u'url': u'http://www.anitube.se/video/36621', - u'md5': u'59d0eeae28ea0bc8c05e7af429998d43', - u'file': u'36621.mp4', - u'info_dict': { - u'id': u'36621', - u'ext': u'mp4', - u'title': u'Recorder to Randoseru 01', + 'url': 'http://www.anitube.se/video/36621', + 'md5': '59d0eeae28ea0bc8c05e7af429998d43', + 'info_dict': { + 'id': '36621', + 'ext': 'mp4', + 'title': 'Recorder to Randoseru 01', + 'duration': 180.19, }, - u'skip': u'Blocked in the US', + 'skip': 'Blocked in the US', } def _real_extract(self, url): @@ -24,13 +26,15 @@ class AnitubeIE(InfoExtractor): video_id = mobj.group('id') webpage = self._download_webpage(url, video_id) - key = self._html_search_regex(r'http://www\.anitube\.se/embed/([A-Za-z0-9_-]*)', - webpage, u'key') + key = self._html_search_regex( + r'http://www\.anitube\.se/embed/([A-Za-z0-9_-]*)', webpage, 'key') - config_xml = self._download_xml('http://www.anitube.se/nuevo/econfig.php?key=%s' % key, - key) + config_xml = self._download_xml( + 'http://www.anitube.se/nuevo/econfig.php?key=%s' % key, key) video_title = config_xml.find('title').text + thumbnail = config_xml.find('image').text + duration = float(config_xml.find('duration').text) formats = [] video_url = config_xml.find('file') @@ -49,5 +53,7 @@ class AnitubeIE(InfoExtractor): return { 'id': video_id, 'title': video_title, + 'thumbnail': thumbnail, + 'duration': duration, 'formats': formats } diff --git a/youtube_dl/extractor/arte.py b/youtube_dl/extractor/arte.py index b42102f3d..9591bad8a 100644 --- a/youtube_dl/extractor/arte.py +++ b/youtube_dl/extractor/arte.py @@ -39,7 +39,10 @@ class ArteTvIE(InfoExtractor): formats = [{ 'forma_id': q.attrib['quality'], - 'url': q.text, + # The playpath starts at 'mp4:', if we don't manually + # split the url, rtmpdump will incorrectly parse them + 'url': q.text.split('mp4:', 1)[0], + 'play_path': 'mp4:' + q.text.split('mp4:', 1)[1], 'ext': 'flv', 'quality': 2 if q.attrib['quality'] == 'hd' else 1, } for q in config.findall('./urls/url')] diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index e4e4feef9..e68657314 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1,11 +1,12 @@ import base64 import hashlib import json +import netrc import os import re import socket import sys -import netrc +import time import xml.etree.ElementTree from ..utils import ( @@ -462,14 +463,14 @@ class InfoExtractor(object): def _og_search_url(self, html, **kargs): return self._og_search_property('url', html, **kargs) - def _html_search_meta(self, name, html, display_name=None, fatal=False): + def _html_search_meta(self, name, html, display_name=None, fatal=False, **kwargs): if display_name is None: display_name = name return self._html_search_regex( r'''(?ix)]+(?:itemprop|name|property)=["\']%s["\']) [^>]+content=["\']([^"\']+)["\']''' % re.escape(name), - html, display_name, fatal=fatal) + html, display_name, fatal=fatal, **kwargs) def _dc_search_uploader(self, html): return self._html_search_meta('dc.creator', html, 'uploader') @@ -575,6 +576,13 @@ class InfoExtractor(object): else: return url + def _sleep(self, timeout, video_id, msg_template=None): + if msg_template is None: + msg_template = u'%(video_id)s: Waiting for %(timeout)s seconds' + msg = msg_template % {'video_id': video_id, 'timeout': timeout} + self.to_screen(msg) + time.sleep(timeout) + class SearchInfoExtractor(InfoExtractor): """ @@ -618,4 +626,3 @@ class SearchInfoExtractor(InfoExtractor): @property def SEARCH_KEY(self): return self._SEARCH_KEY - diff --git a/youtube_dl/extractor/criterion.py b/youtube_dl/extractor/criterion.py index 31fe3d57b..4fb178165 100644 --- a/youtube_dl/extractor/criterion.py +++ b/youtube_dl/extractor/criterion.py @@ -1,40 +1,43 @@ # -*- coding: utf-8 -*- +from __future__ import unicode_literals import re from .common import InfoExtractor -from ..utils import determine_ext + class CriterionIE(InfoExtractor): - _VALID_URL = r'https?://www\.criterion\.com/films/(\d*)-.+' + _VALID_URL = r'https?://www\.criterion\.com/films/(?P[0-9]+)-.+' _TEST = { - u'url': u'http://www.criterion.com/films/184-le-samourai', - u'file': u'184.mp4', - u'md5': u'bc51beba55685509883a9a7830919ec3', - u'info_dict': { - u"title": u"Le Samouraï", - u"description" : u'md5:a2b4b116326558149bef81f76dcbb93f', + 'url': 'http://www.criterion.com/films/184-le-samourai', + 'md5': 'bc51beba55685509883a9a7830919ec3', + 'info_dict': { + 'id': '184', + 'ext': 'mp4', + 'title': 'Le Samouraï', + 'description': 'md5:a2b4b116326558149bef81f76dcbb93f', } } def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) - video_id = mobj.group(1) + video_id = mobj.group('id') webpage = self._download_webpage(url, video_id) - final_url = self._search_regex(r'so.addVariable\("videoURL", "(.+?)"\)\;', - webpage, 'video url') - title = self._html_search_regex(r'', - webpage, 'video title') - description = self._html_search_regex(r'', - webpage, 'video description') - thumbnail = self._search_regex(r'so.addVariable\("thumbnailURL", "(.+?)"\)\;', - webpage, 'thumbnail url') + final_url = self._search_regex( + r'so.addVariable\("videoURL", "(.+?)"\)\;', webpage, 'video url') + title = self._og_search_title(webpage) + description = self._html_search_regex( + r'', + webpage, 'video description') + thumbnail = self._search_regex( + r'so.addVariable\("thumbnailURL", "(.+?)"\)\;', + webpage, 'thumbnail url') - return {'id': video_id, - 'url' : final_url, - 'title': title, - 'ext': determine_ext(final_url), - 'description': description, - 'thumbnail': thumbnail, - } + return { + 'id': video_id, + 'url': final_url, + 'title': title, + 'description': description, + 'thumbnail': thumbnail, + } diff --git a/youtube_dl/extractor/firedrive.py b/youtube_dl/extractor/firedrive.py new file mode 100644 index 000000000..d26145db1 --- /dev/null +++ b/youtube_dl/extractor/firedrive.py @@ -0,0 +1,83 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + ExtractorError, + compat_urllib_parse, + compat_urllib_request, + determine_ext, +) + + +class FiredriveIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?firedrive\.com/' + \ + '(?:file|embed)/(?P[0-9a-zA-Z]+)' + _FILE_DELETED_REGEX = r'
' + + _TESTS = [{ + 'url': 'https://www.firedrive.com/file/FEB892FA160EBD01', + 'md5': 'd5d4252f80ebeab4dc2d5ceaed1b7970', + 'info_dict': { + 'id': 'FEB892FA160EBD01', + 'ext': 'flv', + 'title': 'bbb_theora_486kbit.flv', + 'thumbnail': 're:^http://.*\.jpg$', + }, + }] + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + + url = 'http://firedrive.com/file/%s' % video_id + + webpage = self._download_webpage(url, video_id) + + if re.search(self._FILE_DELETED_REGEX, webpage) is not None: + raise ExtractorError('Video %s does not exist' % video_id, + expected=True) + + fields = dict(re.findall(r'''(?x)(.+)
', + webpage, 'title') + thumbnail = self._search_regex(r'image:\s?"(//[^\"]+)', webpage, + 'thumbnail', fatal=False) + if thumbnail is not None: + thumbnail = 'http:' + thumbnail + + ext = self._search_regex(r'type:\s?\'([^\']+)\',', + webpage, 'extension', fatal=False) + video_url = self._search_regex( + r'file:\s?\'(http[^\']+)\',', webpage, 'file url') + + formats = [{ + 'format_id': 'sd', + 'url': video_url, + 'ext': ext, + }] + + return { + 'id': video_id, + 'title': title, + 'thumbnail': thumbnail, + 'formats': formats, + } diff --git a/youtube_dl/extractor/gameone.py b/youtube_dl/extractor/gameone.py new file mode 100644 index 000000000..b580f52fb --- /dev/null +++ b/youtube_dl/extractor/gameone.py @@ -0,0 +1,90 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + xpath_with_ns, + parse_iso8601 +) + +NAMESPACE_MAP = { + 'media': 'http://search.yahoo.com/mrss/', +} + +# URL prefix to download the mp4 files directly instead of streaming via rtmp +# Credits go to XBox-Maniac +# http://board.jdownloader.org/showpost.php?p=185835&postcount=31 +RAW_MP4_URL = 'http://cdn.riptide-mtvn.com/' + + +class GameOneIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?gameone\.de/tv/(?P\d+)' + _TEST = { + 'url': 'http://www.gameone.de/tv/288', + 'md5': '136656b7fb4c9cb4a8e2d500651c499b', + 'info_dict': { + 'id': '288', + 'ext': 'mp4', + 'title': 'Game One - Folge 288', + 'duration': 1238, + 'thumbnail': 'http://s3.gameone.de/gameone/assets/video_metas/teaser_images/000/643/636/big/640x360.jpg', + 'description': 'FIFA-Pressepokal 2014, Star Citizen, Kingdom Come: Deliverance, Project Cars, Schöner Trants Nerdquiz Folge 2 Runde 1', + 'age_limit': 16, + 'upload_date': '20140513', + 'timestamp': 1399980122, + } + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + + webpage = self._download_webpage(url, video_id) + og_video = self._og_search_video_url(webpage, secure=False) + description = self._html_search_meta('description', webpage) + age_limit = int( + self._search_regex( + r'age=(\d+)', + self._html_search_meta( + 'age-de-meta-label', + webpage), + 'age_limit', + '0')) + mrss_url = self._search_regex(r'mrss=([^&]+)', og_video, 'mrss') + + mrss = self._download_xml(mrss_url, video_id, 'Downloading mrss') + title = mrss.find('.//item/title').text + thumbnail = mrss.find('.//item/image').get('url') + timestamp = parse_iso8601(mrss.find('.//pubDate').text, delimiter=' ') + content = mrss.find(xpath_with_ns('.//media:content', NAMESPACE_MAP)) + content_url = content.get('url') + + content = self._download_xml( + content_url, + video_id, + 'Downloading media:content') + rendition_items = content.findall('.//rendition') + duration = int(rendition_items[0].get('duration')) + formats = [ + { + 'url': re.sub(r'.*/(r2)', RAW_MP4_URL + r'\1', r.find('./src').text), + 'width': int(r.get('width')), + 'height': int(r.get('height')), + 'tbr': int(r.get('bitrate')), + } + for r in rendition_items + ] + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': title, + 'thumbnail': thumbnail, + 'duration': duration, + 'formats': formats, + 'description': description, + 'age_limit': age_limit, + 'timestamp': timestamp, + } diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 9dd03aba4..f97b59845 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -383,7 +383,7 @@ class GenericIE(InfoExtractor): if not parsed_url.scheme: default_search = self._downloader.params.get('default_search') if default_search is None: - default_search = 'auto_warning' + default_search = 'error' if default_search in ('auto', 'auto_warning'): if '/' in url: @@ -397,8 +397,13 @@ class GenericIE(InfoExtractor): expected=True) else: self._downloader.report_warning( - 'Falling back to youtube search for %s . Set --default-search to "auto" to suppress this warning.' % url) + 'Falling back to youtube search for %s . Set --default-search "auto" to suppress this warning.' % url) return self.url_result('ytsearch:' + url) + elif default_search == 'error': + raise ExtractorError( + ('%r is not a valid URL. ' + 'Set --default-search "ytseach" (or run youtube-dl "ytsearch:%s" ) to search YouTube' + ) % (url, url), expected=True) else: assert ':' in default_search return self.url_result(default_search + url) @@ -620,6 +625,11 @@ class GenericIE(InfoExtractor): if mobj is not None: return self.url_result(mobj.group('url'), 'VK') + # Look for embedded ivi player + mobj = re.search(r']+?src=(["\'])(?Phttps?://(?:www\.)?ivi\.ru/video/player.+?)\1', webpage) + if mobj is not None: + return self.url_result(mobj.group('url'), 'Ivi') + # Look for embedded Huffington Post player mobj = re.search( r']+?src=(["\'])(?Phttps?://embed\.live\.huffingtonpost\.com/.+?)\1', webpage) diff --git a/youtube_dl/extractor/googleplus.py b/youtube_dl/extractor/googleplus.py index cc29a7e5d..07d994b44 100644 --- a/youtube_dl/extractor/googleplus.py +++ b/youtube_dl/extractor/googleplus.py @@ -52,8 +52,7 @@ class GooglePlusIE(InfoExtractor): # Extract title # Get the first line for title - video_title = self._html_search_regex(r'[0-9a-zA-Z]+)(?:-[0-9]+x[0-9]+\.html)?' + IE_DESC = 'GorillaVid.in and daclips.in' + _VALID_URL = r'''(?x) + https?://(?P(?:www\.)? + (?:daclips\.in|gorillavid\.in))/ + (?:embed-)?(?P[0-9a-zA-Z]+)(?:-[0-9]+x[0-9]+\.html)? + ''' _TESTS = [{ 'url': 'http://gorillavid.in/06y9juieqpmi', @@ -32,15 +37,22 @@ class GorillaVidIE(InfoExtractor): 'title': 'Say something nice', 'thumbnail': 're:http://.*\.jpg', }, + }, { + 'url': 'http://daclips.in/3rso4kdn6f9m', + 'md5': '1ad8fd39bb976eeb66004d3a4895f106', + 'info_dict': { + 'id': '3rso4kdn6f9m', + 'ext': 'mp4', + 'title': 'Micro Pig piglets ready on 16th July 2009', + 'thumbnail': 're:http://.*\.jpg', + }, }] def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) video_id = mobj.group('id') - url = 'http://gorillavid.in/%s' % video_id - - webpage = self._download_webpage(url, video_id) + webpage = self._download_webpage('http://%s/%s' % (mobj.group('host'), video_id), video_id) fields = dict(re.findall(r'''(?x)\d+?)($|/)' + _TEST = { + 'url': 'http://www.goshgay.com/video4116282', + 'md5': '268b9f3c3229105c57859e166dd72b03', + 'info_dict': { + 'id': '4116282', + 'ext': 'flv', + 'title': 'md5:089833a4790b5e103285a07337f245bf', + 'thumbnail': 're:http://.*\.jpg', + 'age_limit': 18, + } + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + + webpage = self._download_webpage(url, video_id) + title = self._search_regex(r'class="video-title">

(.+?)<', webpage, 'title') + + player_config = self._search_regex( + r'(?s)jwplayer\("player"\)\.setup\(({.+?})\)', webpage, 'config settings') + player_vars = json.loads(player_config.replace("'", '"')) + width = str_to_int(player_vars.get('width')) + height = str_to_int(player_vars.get('height')) + config_uri = player_vars.get('config') + + if config_uri is None: + raise ExtractorError('Missing config URI') + node = self._download_xml(config_uri, video_id, 'Downloading player config XML', + errnote='Unable to download XML') + if node is None: + raise ExtractorError('Missing config XML') + if node.tag != 'config': + raise ExtractorError('Missing config attribute') + fns = node.findall('file') + imgs = node.findall('image') + if len(fns) != 1: + raise ExtractorError('Missing media URI') + video_url = fns[0].text + if len(imgs) < 1: + thumbnail = None + else: + thumbnail = imgs[0].text + + url_comp = compat_urlparse.urlparse(url) + ref = "%s://%s%s" % (url_comp[0], url_comp[1], url_comp[2]) + + return { + 'id': video_id, + 'url': video_url, + 'title': title, + 'width': width, + 'height': height, + 'thumbnail': thumbnail, + 'http_referer': ref, + 'age_limit': 18, + } diff --git a/youtube_dl/extractor/ivi.py b/youtube_dl/extractor/ivi.py index 528be1524..4027deb70 100644 --- a/youtube_dl/extractor/ivi.py +++ b/youtube_dl/extractor/ivi.py @@ -14,7 +14,7 @@ from ..utils import ( class IviIE(InfoExtractor): IE_DESC = 'ivi.ru' IE_NAME = 'ivi' - _VALID_URL = r'https?://(?:www\.)?ivi\.ru/watch(?:/(?P[^/]+))?/(?P\d+)' + _VALID_URL = r'https?://(?:www\.)?ivi\.ru/(?:watch/(?:[^/]+/)?|video/player\?.*?videoId=)(?P\d+)' _TESTS = [ # Single movie diff --git a/youtube_dl/extractor/mpora.py b/youtube_dl/extractor/mpora.py index 39d6feb98..387935d4d 100644 --- a/youtube_dl/extractor/mpora.py +++ b/youtube_dl/extractor/mpora.py @@ -28,7 +28,7 @@ class MporaIE(InfoExtractor): webpage = self._download_webpage(url, video_id) data_json = self._search_regex( - r"new FM\.Player\('[^']+',\s*(\{.*?)\);\n", webpage, 'json') + r"new FM\.Player\('[^']+',\s*(\{.*?)\).player;", webpage, 'json') data = json.loads(data_json) diff --git a/youtube_dl/extractor/mtv.py b/youtube_dl/extractor/mtv.py index af9490ccc..228b42d2b 100644 --- a/youtube_dl/extractor/mtv.py +++ b/youtube_dl/extractor/mtv.py @@ -158,6 +158,9 @@ class MTVServicesInfoExtractor(InfoExtractor): if mgid.endswith('.swf'): mgid = mgid[:-4] except RegexNotFoundError: + mgid = None + + if mgid is None or ':' not in mgid: mgid = self._search_regex( [r'data-mgid="(.*?)"', r'swfobject.embedSWF\(".*?(mgid:.*?)"'], webpage, u'mgid') diff --git a/youtube_dl/extractor/ndr.py b/youtube_dl/extractor/ndr.py index 3d6096e46..94d5ba982 100644 --- a/youtube_dl/extractor/ndr.py +++ b/youtube_dl/extractor/ndr.py @@ -18,15 +18,15 @@ class NDRIE(InfoExtractor): _TESTS = [ { - 'url': 'http://www.ndr.de/fernsehen/sendungen/markt/markt7959.html', - 'md5': 'e7a6079ca39d3568f4996cb858dd6708', + 'url': 'http://www.ndr.de/fernsehen/media/dienordreportage325.html', + 'md5': '4a4eeafd17c3058b65f0c8f091355855', 'note': 'Video file', 'info_dict': { - 'id': '7959', + 'id': '325', 'ext': 'mp4', - 'title': 'Markt - die ganze Sendung', - 'description': 'md5:af9179cf07f67c5c12dc6d9997e05725', - 'duration': 2655, + 'title': 'Blaue Bohnen aus Blocken', + 'description': 'md5:190d71ba2ccddc805ed01547718963bc', + 'duration': 1715, }, }, { diff --git a/youtube_dl/extractor/newstube.py b/youtube_dl/extractor/newstube.py index 2fd5b8f04..551bd4d7a 100644 --- a/youtube_dl/extractor/newstube.py +++ b/youtube_dl/extractor/newstube.py @@ -4,18 +4,19 @@ from __future__ import unicode_literals import re from .common import InfoExtractor +from ..utils import ExtractorError class NewstubeIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?newstube\.ru/media/(?P.+)' _TEST = { - 'url': 'http://newstube.ru/media/na-korable-progress-prodolzhaetsya-testirovanie-sistemy-kurs', + 'url': 'http://www.newstube.ru/media/telekanal-cnn-peremestil-gorod-slavyansk-v-krym', 'info_dict': { - 'id': 'd156a237-a6e9-4111-a682-039995f721f1', + 'id': '728e0ef2-e187-4012-bac0-5a081fdcb1f6', 'ext': 'flv', - 'title': 'На корабле «Прогресс» продолжается тестирование системы «Курс»', - 'description': 'md5:d0cbe7b4a6f600552617e48548d5dc77', - 'duration': 20.04, + 'title': 'Телеканал CNN переместил город Славянск в Крым', + 'description': 'md5:419a8c9f03442bc0b0a794d689360335', + 'duration': 31.05, }, 'params': { # rtmp download @@ -40,6 +41,10 @@ class NewstubeIE(InfoExtractor): def ns(s): return s.replace('/', '/%(ns)s') % {'ns': '{http://app1.newstube.ru/N2SiteWS/player.asmx}'} + error_message = player.find(ns('./ErrorMessage')) + if error_message is not None: + raise ExtractorError('%s returned error: %s' % (self.IE_NAME, error_message.text), expected=True) + session_id = player.find(ns('./SessionId')).text media_info = player.find(ns('./Medias/MediaInfo')) title = media_info.find(ns('./Name')).text diff --git a/youtube_dl/extractor/niconico.py b/youtube_dl/extractor/niconico.py index 517a72561..c0c139b5d 100644 --- a/youtube_dl/extractor/niconico.py +++ b/youtube_dl/extractor/niconico.py @@ -8,10 +8,9 @@ from ..utils import ( compat_urllib_parse, compat_urllib_request, compat_urlparse, - compat_str, - - ExtractorError, unified_strdate, + parse_duration, + int_or_none, ) @@ -30,6 +29,7 @@ class NiconicoIE(InfoExtractor): 'uploader_id': '2698420', 'upload_date': '20131123', 'description': '(c) copyright 2008, Blender Foundation / www.bigbuckbunny.org', + 'duration': 33, }, 'params': { 'username': 'ydl.niconico@gmail.com', @@ -37,17 +37,20 @@ class NiconicoIE(InfoExtractor): }, } - _VALID_URL = r'^https?://(?:www\.|secure\.)?nicovideo\.jp/watch/([a-z][a-z][0-9]+)(?:.*)$' + _VALID_URL = r'https?://(?:www\.|secure\.)?nicovideo\.jp/watch/((?:[a-z]{2})?[0-9]+)' _NETRC_MACHINE = 'niconico' + # Determine whether the downloader uses authentication to download video + _AUTHENTICATE = False def _real_initialize(self): - self._login() + if self._downloader.params.get('username', None) is not None: + self._AUTHENTICATE = True + + if self._AUTHENTICATE: + self._login() def _login(self): (username, password) = self._get_login_info() - if username is None: - # Login is required - raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True) # Log in login_form_strs = { @@ -79,44 +82,66 @@ class NiconicoIE(InfoExtractor): 'http://ext.nicovideo.jp/api/getthumbinfo/' + video_id, video_id, note='Downloading video info page') - # Get flv info - flv_info_webpage = self._download_webpage( - 'http://flapi.nicovideo.jp/api/getflv?v=' + video_id, - video_id, 'Downloading flv info') + if self._AUTHENTICATE: + # Get flv info + flv_info_webpage = self._download_webpage( + 'http://flapi.nicovideo.jp/api/getflv?v=' + video_id, + video_id, 'Downloading flv info') + else: + # Get external player info + ext_player_info = self._download_webpage( + 'http://ext.nicovideo.jp/thumb_watch/' + video_id, video_id) + thumb_play_key = self._search_regex( + r'\'thumbPlayKey\'\s*:\s*\'(.*?)\'', ext_player_info, 'thumbPlayKey') + + # Get flv info + flv_info_data = compat_urllib_parse.urlencode({ + 'k': thumb_play_key, + 'v': video_id + }) + flv_info_request = compat_urllib_request.Request( + 'http://ext.nicovideo.jp/thumb_watch', flv_info_data, + {'Content-Type': 'application/x-www-form-urlencoded'}) + flv_info_webpage = self._download_webpage( + flv_info_request, video_id, + note='Downloading flv info', errnote='Unable to download flv info') + video_real_url = compat_urlparse.parse_qs(flv_info_webpage)['url'][0] # Start extracting information - video_title = video_info.find('.//title').text - video_extension = video_info.find('.//movie_type').text - video_format = video_extension.upper() - video_thumbnail = video_info.find('.//thumbnail_url').text - video_description = video_info.find('.//description').text - video_uploader_id = video_info.find('.//user_id').text - video_upload_date = unified_strdate(video_info.find('.//first_retrieve').text.split('+')[0]) - video_view_count = video_info.find('.//view_counter').text - video_webpage_url = video_info.find('.//watch_url').text + title = video_info.find('.//title').text + extension = video_info.find('.//movie_type').text + video_format = extension.upper() + thumbnail = video_info.find('.//thumbnail_url').text + description = video_info.find('.//description').text + upload_date = unified_strdate(video_info.find('.//first_retrieve').text.split('+')[0]) + view_count = int_or_none(video_info.find('.//view_counter').text) + comment_count = int_or_none(video_info.find('.//comment_num').text) + duration = parse_duration(video_info.find('.//length').text) + webpage_url = video_info.find('.//watch_url').text - # uploader - video_uploader = video_uploader_id - url = 'http://seiga.nicovideo.jp/api/user/info?id=' + video_uploader_id - try: - user_info = self._download_xml( - url, video_id, note='Downloading user information') - video_uploader = user_info.find('.//nickname').text - except ExtractorError as err: - self._downloader.report_warning('Unable to download user info webpage: %s' % compat_str(err)) + if video_info.find('.//ch_id') is not None: + uploader_id = video_info.find('.//ch_id').text + uploader = video_info.find('.//ch_name').text + elif video_info.find('.//user_id') is not None: + uploader_id = video_info.find('.//user_id').text + uploader = video_info.find('.//user_nickname').text + else: + uploader_id = uploader = None return { 'id': video_id, 'url': video_real_url, - 'title': video_title, - 'ext': video_extension, + 'title': title, + 'ext': extension, 'format': video_format, - 'thumbnail': video_thumbnail, - 'description': video_description, - 'uploader': video_uploader, - 'upload_date': video_upload_date, - 'uploader_id': video_uploader_id, - 'view_count': video_view_count, - 'webpage_url': video_webpage_url, + 'thumbnail': thumbnail, + 'description': description, + 'uploader': uploader, + 'upload_date': upload_date, + 'uploader_id': uploader_id, + 'view_count': view_count, + 'comment_count': comment_count, + 'duration': duration, + 'webpage_url': webpage_url, } diff --git a/youtube_dl/extractor/ninegag.py b/youtube_dl/extractor/ninegag.py index c2e7b67c7..33daa0dec 100644 --- a/youtube_dl/extractor/ninegag.py +++ b/youtube_dl/extractor/ninegag.py @@ -47,7 +47,7 @@ class NineGagIE(InfoExtractor): webpage = self._download_webpage(url, display_id) post_view = json.loads(self._html_search_regex( - r'var postView = new app\.PostView\({\s*post:\s*({.+?}),', webpage, 'post view')) + r'var postView = new app\.PostView\({\s*post:\s*({.+?}),\s*posts:\s*prefetchedCurrentPost', webpage, 'post view')) youtube_id = post_view['videoExternalId'] title = post_view['title'] diff --git a/youtube_dl/extractor/pyvideo.py b/youtube_dl/extractor/pyvideo.py index 0bc0859b4..6d5732d45 100644 --- a/youtube_dl/extractor/pyvideo.py +++ b/youtube_dl/extractor/pyvideo.py @@ -46,7 +46,7 @@ class PyvideoIE(InfoExtractor): return self.url_result(m_youtube.group(1), 'Youtube') title = self._html_search_regex( - r'
.*?([^>]+?)

', + r'
\s*]*)?>([^>]+?)', webpage, 'title', flags=re.DOTALL) video_url = self._search_regex( [r'Download.*?\d+).*?$' + _TESTS = [{ + 'url': 'http://www.reverbnation.com/alkilados/song/16965047-mona-lisa', + 'file': '16965047.mp3', + 'md5': '3da12ebca28c67c111a7f8b262d3f7a7', + 'info_dict': { + "title": "MONA LISA", + "uploader": "ALKILADOS", + "uploader_id": 216429, + "thumbnail": "//gp1.wac.edgecastcdn.net/802892/production_public/Photo/13761700/image/1366002176_AVATAR_MONA_LISA.jpg" + }, + }] + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + song_id = mobj.group('id') + + api_res = self._download_json( + 'https://api.reverbnation.com/song/%s?callback=api_response_5&_=%d' + % (song_id, int(time.time() * 1000)), + song_id, + transform_source=strip_jsonp, + note='Downloading information of song %s' % song_id + ) + + return { + 'id': song_id, + 'title': api_res.get('name'), + 'url': api_res.get('url'), + 'uploader': api_res.get('artist', {}).get('name'), + 'uploader_id': api_res.get('artist', {}).get('id'), + 'thumbnail': api_res.get('image', api_res.get('thumbnail')), + 'ext': 'mp3', + 'vcodec': 'none', + } diff --git a/youtube_dl/extractor/ruhd.py b/youtube_dl/extractor/ruhd.py new file mode 100644 index 000000000..55b58e5e6 --- /dev/null +++ b/youtube_dl/extractor/ruhd.py @@ -0,0 +1,46 @@ +# -*- coding: utf-8 -*- +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor + + +class RUHDIE(InfoExtractor): + _VALID_URL = r'http://(?:www\.)?ruhd\.ru/play\.php\?vid=(?P\d+)' + _TEST = { + 'url': 'http://www.ruhd.ru/play.php?vid=207', + 'md5': 'd1a9ec4edf8598e3fbd92bb16072ba83', + 'info_dict': { + 'id': '207', + 'ext': 'divx', + 'title': 'КОТ бааааам', + 'description': 'классный кот)', + 'thumbnail': 're:^http://.*\.jpg$', + } + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + + webpage = self._download_webpage(url, video_id) + + video_url = self._html_search_regex( + r'([^<]+)   RUHD.ru - Видео Высокого качества №1 в России!', webpage, 'title') + description = self._html_search_regex( + r'(?s)
(.+?)', webpage, 'description', fatal=False) + thumbnail = self._html_search_regex( + r'[a-zA-Z0-9]+)' + _TESTS = [{ + 'url': 'http://www.screencast.com/t/3ZEjQXlT', + 'md5': '917df1c13798a3e96211dd1561fded83', + 'info_dict': { + 'id': '3ZEjQXlT', + 'ext': 'm4v', + 'title': 'Color Measurement with Ocean Optics Spectrometers', + 'description': 'md5:240369cde69d8bed61349a199c5fb153', + 'thumbnail': 're:^https?://.*\.(?:gif|jpg)$', + } + }, { + 'url': 'http://www.screencast.com/t/V2uXehPJa1ZI', + 'md5': 'e8e4b375a7660a9e7e35c33973410d34', + 'info_dict': { + 'id': 'V2uXehPJa1ZI', + 'ext': 'mov', + 'title': 'The Amadeus Spectrometer', + 'description': 're:^In this video, our friends at.*To learn more about Amadeus, visit', + 'thumbnail': 're:^https?://.*\.(?:gif|jpg)$', + } + }, { + 'url': 'http://www.screencast.com/t/aAB3iowa', + 'md5': 'dedb2734ed00c9755761ccaee88527cd', + 'info_dict': { + 'id': 'aAB3iowa', + 'ext': 'mp4', + 'title': 'Google Earth Export', + 'description': 'Provides a demo of a CommunityViz export to Google Earth, one of the 3D viewing options.', + 'thumbnail': 're:^https?://.*\.(?:gif|jpg)$', + } + }, { + 'url': 'http://www.screencast.com/t/X3ddTrYh', + 'md5': '669ee55ff9c51988b4ebc0877cc8b159', + 'info_dict': { + 'id': 'X3ddTrYh', + 'ext': 'wmv', + 'title': 'Toolkit 6 User Group Webinar (2014-03-04) - Default Judgment and First Impression', + 'description': 'md5:7b9f393bc92af02326a5c5889639eab0', + 'thumbnail': 're:^https?://.*\.(?:gif|jpg)$', + } + }, + ] + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + webpage = self._download_webpage(url, video_id) + + video_url = self._html_search_regex( + r'Title: ([^<]*)
', + r'class="tabSeperator">>(.*?)<'], + webpage, 'title') + thumbnail = self._og_search_thumbnail(webpage) + description = self._og_search_description(webpage, default=None) + if description is None: + description = self._html_search_meta('description', webpage) + + return { + 'id': video_id, + 'url': video_url, + 'title': title, + 'description': description, + 'thumbnail': thumbnail, + } diff --git a/youtube_dl/extractor/soundcloud.py b/youtube_dl/extractor/soundcloud.py index 7aa100fb2..14ec9452d 100644 --- a/youtube_dl/extractor/soundcloud.py +++ b/youtube_dl/extractor/soundcloud.py @@ -255,7 +255,7 @@ class SoundcloudSetIE(SoundcloudIE): class SoundcloudUserIE(SoundcloudIE): - _VALID_URL = r'https?://(www\.)?soundcloud\.com/(?P[^/]+)(/?(tracks/)?)?(\?.*)?$' + _VALID_URL = r'https?://(www\.)?soundcloud\.com/(?P[^/]+)/?((?Ptracks|likes)/?)?(\?.*)?$' IE_NAME = 'soundcloud:user' # it's in tests/test_playlists.py @@ -264,24 +264,31 @@ class SoundcloudUserIE(SoundcloudIE): def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) uploader = mobj.group('user') + resource = mobj.group('rsrc') + if resource is None: + resource = 'tracks' + elif resource == 'likes': + resource = 'favorites' url = 'http://soundcloud.com/%s/' % uploader resolv_url = self._resolv_url(url) user = self._download_json( resolv_url, uploader, 'Downloading user info') - base_url = 'http://api.soundcloud.com/users/%s/tracks.json?' % uploader + base_url = 'http://api.soundcloud.com/users/%s/%s.json?' % (uploader, resource) entries = [] for i in itertools.count(): data = compat_urllib_parse.urlencode({ 'offset': i * 50, + 'limit': 50, 'client_id': self._CLIENT_ID, }) new_entries = self._download_json( base_url + data, uploader, 'Downloading track page %s' % (i + 1)) - entries.extend(self._extract_info_dict(e, quiet=True) for e in new_entries) - if len(new_entries) < 50: + if len(new_entries) == 0: + self.to_screen('%s: End page received' % uploader) break + entries.extend(self._extract_info_dict(e, quiet=True) for e in new_entries) return { '_type': 'playlist', diff --git a/youtube_dl/extractor/southparkstudios.py b/youtube_dl/extractor/southpark.py similarity index 73% rename from youtube_dl/extractor/southparkstudios.py rename to youtube_dl/extractor/southpark.py index aea8e6439..c20397b3d 100644 --- a/youtube_dl/extractor/southparkstudios.py +++ b/youtube_dl/extractor/southpark.py @@ -3,24 +3,24 @@ from __future__ import unicode_literals from .mtv import MTVServicesInfoExtractor -class SouthParkStudiosIE(MTVServicesInfoExtractor): - IE_NAME = 'southparkstudios.com' - _VALID_URL = r'https?://(www\.)?(?Psouthparkstudios\.com/(clips|full-episodes)/(?P.+?)(\?|#|$))' +class SouthParkIE(MTVServicesInfoExtractor): + IE_NAME = 'southpark.cc.com' + _VALID_URL = r'https?://(www\.)?(?Psouthpark\.cc\.com/(clips|full-episodes)/(?P.+?)(\?|#|$))' _FEED_URL = 'http://www.southparkstudios.com/feeds/video-player/mrss' _TESTS = [{ - 'url': 'http://www.southparkstudios.com/clips/104437/bat-daded#tab=featured', + 'url': 'http://southpark.cc.com/clips/104437/bat-daded#tab=featured', 'info_dict': { 'id': 'a7bff6c2-ed00-11e0-aca6-0026b9414f30', 'ext': 'mp4', - 'title': 'Bat Daded', + 'title': 'South Park|Bat Daded', 'description': 'Randy disqualifies South Park by getting into a fight with Bat Dad.', }, }] -class SouthparkDeIE(SouthParkStudiosIE): +class SouthparkDeIE(SouthParkIE): IE_NAME = 'southpark.de' _VALID_URL = r'https?://(www\.)?(?Psouthpark\.de/(clips|alle-episoden)/(?P.+?)(\?|#|$))' _FEED_URL = 'http://www.southpark.de/feeds/video-player/mrss/' diff --git a/youtube_dl/extractor/tagesschau.py b/youtube_dl/extractor/tagesschau.py index 36331529e..25b9864ad 100644 --- a/youtube_dl/extractor/tagesschau.py +++ b/youtube_dl/extractor/tagesschau.py @@ -20,13 +20,13 @@ class TagesschauIE(InfoExtractor): 'thumbnail': 're:^http:.*\.jpg$', }, }, { - 'url': 'http://www.tagesschau.de/multimedia/video/video-196.html', - 'md5': '8aaa8bf3ae1ca2652309718c03019128', + 'url': 'http://www.tagesschau.de/multimedia/video/video-5964.html', + 'md5': '66652566900963a3f962333579eeffcf', 'info_dict': { - 'id': '196', + 'id': '5964', 'ext': 'mp4', - 'title': 'Ukraine-Konflikt: Klitschko in Kiew als Bürgermeister vereidigt', - 'description': 'md5:f22e4af75821d174fa6c977349682691', + 'title': 'Nahost-Konflikt: Israel bombadiert Ziele im Gazastreifen und Westjordanland', + 'description': 'md5:07bfc78c48eec3145ed4805299a1900a', 'thumbnail': 're:http://.*\.jpg', }, }] diff --git a/youtube_dl/extractor/teachertube.py b/youtube_dl/extractor/teachertube.py index d9868d569..2c2113b14 100644 --- a/youtube_dl/extractor/teachertube.py +++ b/youtube_dl/extractor/teachertube.py @@ -14,7 +14,7 @@ class TeacherTubeIE(InfoExtractor): IE_NAME = 'teachertube' IE_DESC = 'teachertube.com videos' - _VALID_URL = r'https?://(?:www\.)?teachertube\.com/(viewVideo\.php\?video_id=|music\.php\?music_id=|video/|audio/)(?P\d+)' + _VALID_URL = r'https?://(?:www\.)?teachertube\.com/(viewVideo\.php\?video_id=|music\.php\?music_id=|video/(?:[\da-z-]+-)?|audio/)(?P\d+)' _TESTS = [{ 'url': 'http://www.teachertube.com/viewVideo.php?video_id=339997', @@ -45,6 +45,15 @@ class TeacherTubeIE(InfoExtractor): 'title': 'PER ASPERA AD ASTRA', 'description': 'RADIJSKA EMISIJA ZRAKOPLOVNE TEHNI?KE ?KOLE P', }, + }, { + 'url': 'http://www.teachertube.com/video/intro-video-schleicher-297790', + 'md5': '9c79fbb2dd7154823996fc28d4a26998', + 'info_dict': { + 'id': '297790', + 'ext': 'mp4', + 'title': 'Intro Video - Schleicher', + 'description': 'Intro Video - Why to flip, how flipping will', + }, }] def _real_extract(self, url): @@ -86,22 +95,30 @@ class TeacherTubeIE(InfoExtractor): } -class TeacherTubeClassroomIE(InfoExtractor): - IE_NAME = 'teachertube:classroom' - IE_DESC = 'teachertube.com online classrooms' +class TeacherTubeUserIE(InfoExtractor): + IE_NAME = 'teachertube:user:collection' + IE_DESC = 'teachertube.com user and collection videos' - _VALID_URL = r'https?://(?:www\.)?teachertube\.com/view_classroom\.php\?user=(?P[0-9a-zA-Z]+)' + _VALID_URL = r'https?://(?:www\.)?teachertube\.com/(user/profile|collection)/(?P[0-9a-zA-Z]+)/?' + + _MEDIA_RE = r'(?s)"sidebar_thumb_time">[0-9:]+
.+?' def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) user_id = mobj.group('user') - rss = self._download_xml( - 'http://www.teachertube.com/rssclassroom.php?mode=user&username=%s' % user_id, - user_id, 'Downloading classroom RSS') + urls = [] + webpage = self._download_webpage(url, user_id) + urls.extend(re.findall(self._MEDIA_RE, webpage)) + + pages = re.findall(r'/ajax-user/user-videos/%s\?page=([0-9]+)' % user_id, webpage)[1:-1] + for p in pages: + more = 'http://www.teachertube.com/ajax-user/user-videos/%s?page=%s' % (user_id, p) + webpage = self._download_webpage(more, user_id, 'Downloading page %s/%s' % (p, len(pages) + 1)) + urls.extend(re.findall(self._MEDIA_RE, webpage)) entries = [] - for url in rss.findall('.//{http://search.yahoo.com/mrss/}player'): - entries.append(self.url_result(url.attrib['url'], 'TeacherTube')) + for url in urls: + entries.append(self.url_result(url, 'TeacherTube')) return self.playlist_result(entries, user_id) diff --git a/youtube_dl/extractor/tenplay.py b/youtube_dl/extractor/tenplay.py new file mode 100644 index 000000000..8477840fc --- /dev/null +++ b/youtube_dl/extractor/tenplay.py @@ -0,0 +1,84 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor + + +class TenPlayIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?ten(play)?\.com\.au/.+' + _TEST = { + 'url': 'http://tenplay.com.au/ten-insider/extra/season-2013/tenplay-tv-your-way', + #'md5': 'd68703d9f73dc8fccf3320ab34202590', + 'info_dict': { + 'id': '2695695426001', + 'ext': 'flv', + 'title': 'TENplay: TV your way', + 'description': 'Welcome to a new TV experience. Enjoy a taste of the TENplay benefits.', + 'timestamp': 1380150606.889, + 'upload_date': '20130925', + 'uploader': 'TENplay', + }, + 'params': { + 'skip_download': True, # Requires rtmpdump + } + } + + _video_fields = [ + "id", "name", "shortDescription", "longDescription", "creationDate", + "publishedDate", "lastModifiedDate", "customFields", "videoStillURL", + "thumbnailURL", "referenceId", "length", "playsTotal", + "playsTrailingWeek", "renditions", "captioning", "startDate", "endDate"] + + def _real_extract(self, url): + webpage = self._download_webpage(url, url) + video_id = self._html_search_regex( + r'videoID: "(\d+?)"', webpage, 'video_id') + api_token = self._html_search_regex( + r'apiToken: "([a-zA-Z0-9-_\.]+?)"', webpage, 'api_token') + title = self._html_search_regex( + r'', + webpage, 'title') + + json = self._download_json('https://api.brightcove.com/services/library?command=find_video_by_id&video_id=%s&token=%s&video_fields=%s' % (video_id, api_token, ','.join(self._video_fields)), title) + + formats = [] + for rendition in json['renditions']: + url = rendition['remoteUrl'] or rendition['url'] + protocol = 'rtmp' if url.startswith('rtmp') else 'http' + ext = 'flv' if protocol == 'rtmp' else rendition['videoContainer'].lower() + + if protocol == 'rtmp': + url = url.replace('&mp4:', '') + + formats.append({ + 'format_id': '_'.join(['rtmp', rendition['videoContainer'].lower(), rendition['videoCodec'].lower()]), + 'width': rendition['frameWidth'], + 'height': rendition['frameHeight'], + 'tbr': rendition['encodingRate'] / 1024, + 'filesize': rendition['size'], + 'protocol': protocol, + 'ext': ext, + 'vcodec': rendition['videoCodec'].lower(), + 'container': rendition['videoContainer'].lower(), + 'url': url, + }) + + return { + 'id': video_id, + 'display_id': json['referenceId'], + 'title': json['name'], + 'description': json['shortDescription'] or json['longDescription'], + 'formats': formats, + 'thumbnails': [{ + 'url': json['videoStillURL'] + }, { + 'url': json['thumbnailURL'] + }], + 'thumbnail': json['videoStillURL'], + 'duration': json['length'] / 1000, + 'timestamp': float(json['creationDate']) / 1000, + 'uploader': json['customFields']['production_company_distributor'] if 'production_company_distributor' in json['customFields'] else 'TENplay', + 'view_count': json['playsTotal'] + } diff --git a/youtube_dl/extractor/tlc.py b/youtube_dl/extractor/tlc.py index ad175b83e..d848ee186 100644 --- a/youtube_dl/extractor/tlc.py +++ b/youtube_dl/extractor/tlc.py @@ -5,6 +5,7 @@ import re from .common import InfoExtractor from .brightcove import BrightcoveIE from .discovery import DiscoveryIE +from ..utils import compat_urlparse class TlcIE(DiscoveryIE): @@ -51,6 +52,10 @@ class TlcDeIE(InfoExtractor): # Otherwise we don't get the correct 'BrightcoveExperience' element, # example: http://www.tlc.de/sendungen/cake-boss/videos/cake-boss-cannoli-drama/ iframe_url = iframe_url.replace('.htm?', '.php?') + url_fragment = compat_urlparse.urlparse(url).fragment + if url_fragment: + # Since the fragment is not send to the server, we always get the same iframe + iframe_url = re.sub(r'playlist=(\d+)', 'playlist=%s' % url_fragment, iframe_url) iframe = self._download_webpage(iframe_url, title) return { diff --git a/youtube_dl/extractor/veoh.py b/youtube_dl/extractor/veoh.py index fb132aef6..a7953a7e7 100644 --- a/youtube_dl/extractor/veoh.py +++ b/youtube_dl/extractor/veoh.py @@ -49,6 +49,7 @@ class VeohIE(InfoExtractor): 'description': 'md5:f5a11c51f8fb51d2315bca0937526891', 'uploader': 'newsy-videos', }, + 'skip': 'This video has been deleted.', }, ] diff --git a/youtube_dl/extractor/vimple.py b/youtube_dl/extractor/vimple.py new file mode 100644 index 000000000..33d370e1c --- /dev/null +++ b/youtube_dl/extractor/vimple.py @@ -0,0 +1,86 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import base64 +import re +import xml.etree.ElementTree +import zlib + +from .common import InfoExtractor +from ..utils import int_or_none + + +class VimpleIE(InfoExtractor): + IE_DESC = 'Vimple.ru' + _VALID_URL = r'https?://(player.vimple.ru/iframe|vimple.ru)/(?P[a-f0-9]{10,})' + _TESTS = [ + # Quality: Large, from iframe + { + 'url': 'http://player.vimple.ru/iframe/b132bdfd71b546d3972f9ab9a25f201c', + 'info_dict': { + 'id': 'b132bdfd71b546d3972f9ab9a25f201c', + 'title': 'great-escape-minecraft.flv', + 'ext': 'mp4', + 'duration': 352, + 'webpage_url': 'http://vimple.ru/b132bdfd71b546d3972f9ab9a25f201c', + }, + }, + # Quality: Medium, from mainpage + { + 'url': 'http://vimple.ru/a15950562888453b8e6f9572dc8600cd', + 'info_dict': { + 'id': 'a15950562888453b8e6f9572dc8600cd', + 'title': 'DB 01', + 'ext': 'flv', + 'duration': 1484, + 'webpage_url': 'http://vimple.ru/a15950562888453b8e6f9572dc8600cd', + } + }, + ] + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + + iframe_url = 'http://player.vimple.ru/iframe/%s' % video_id + + iframe = self._download_webpage( + iframe_url, video_id, + note='Downloading iframe', errnote='unable to fetch iframe') + player_url = self._html_search_regex( + r'"(http://player.vimple.ru/flash/.+?)"', iframe, 'player url') + + player = self._request_webpage( + player_url, video_id, note='Downloading swf player').read() + + player = zlib.decompress(player[8:]) + + xml_pieces = re.findall(b'([a-zA-Z0-9 =+/]{500})', player) + xml_pieces = [piece[1:-1] for piece in xml_pieces] + + xml_data = b''.join(xml_pieces) + xml_data = base64.b64decode(xml_data) + + xml_data = xml.etree.ElementTree.fromstring(xml_data) + + video = xml_data.find('Video') + quality = video.get('quality') + q_tag = video.find(quality.capitalize()) + + formats = [ + { + 'url': q_tag.get('url'), + 'tbr': int(q_tag.get('bitrate')), + 'filesize': int(q_tag.get('filesize')), + 'format_id': quality, + }, + ] + + return { + 'id': video_id, + 'title': video.find('Title').text, + 'formats': formats, + 'thumbnail': video.find('Poster').get('url'), + 'duration': int_or_none(video.get('duration')), + 'webpage_url': video.find('Share').get('videoPageUrl'), + } diff --git a/youtube_dl/extractor/vk.py b/youtube_dl/extractor/vk.py index 66fe1dd3e..918bd1098 100644 --- a/youtube_dl/extractor/vk.py +++ b/youtube_dl/extractor/vk.py @@ -16,7 +16,7 @@ from ..utils import ( class VKIE(InfoExtractor): IE_NAME = 'vk.com' - _VALID_URL = r'https?://vk\.com/(?:video_ext\.php\?.*?\boid=(?P-?\d+).*?\bid=(?P\d+)|(?:videos.*?\?.*?z=)?video(?P.*?)(?:\?|%2F|$))' + _VALID_URL = r'https?://(?:m\.)?vk\.com/(?:video_ext\.php\?.*?\boid=(?P-?\d+).*?\bid=(?P\d+)|(?:.+?\?.*?z=)?video(?P.*?)(?:\?|%2F|$))' _NETRC_MACHINE = 'vk' _TESTS = [ @@ -62,11 +62,47 @@ class VKIE(InfoExtractor): 'id': '164049491', 'ext': 'mp4', 'uploader': 'Триллеры', - 'title': '► Бойцовский клуб / Fight Club 1999 [HD 720]\u00a0', + 'title': '► Бойцовский клуб / Fight Club 1999 [HD 720]', 'duration': 8352, }, 'skip': 'Requires vk account credentials', }, + { + 'url': 'http://vk.com/feed?z=video-43215063_166094326%2Fbb50cacd3177146d7a', + 'md5': 'd82c22e449f036282d1d3f7f4d276869', + 'info_dict': { + 'id': '166094326', + 'ext': 'mp4', + 'uploader': 'Киномания - лучшее из мира кино', + 'title': 'Запах женщины (1992)', + 'duration': 9392, + }, + 'skip': 'Requires vk account credentials', + }, + { + 'url': 'http://vk.com/hd_kino_mania?z=video-43215063_168067957%2F15c66b9b533119788d', + 'md5': '4d7a5ef8cf114dfa09577e57b2993202', + 'info_dict': { + 'id': '168067957', + 'ext': 'mp4', + 'uploader': 'Киномания - лучшее из мира кино', + 'title': ' ', + 'duration': 7291, + }, + 'skip': 'Requires vk account credentials', + }, + { + 'url': 'http://m.vk.com/video-43215063_169084319?list=125c627d1aa1cebb83&from=wall-43215063_2566540', + 'md5': '0c45586baa71b7cb1d0784ee3f4e00a6', + 'note': 'ivi.ru embed', + 'info_dict': { + 'id': '60690', + 'ext': 'mp4', + 'title': 'Книга Илая', + 'duration': 6771, + }, + 'skip': 'Only works from Russia', + }, ] def _login(self): @@ -110,6 +146,16 @@ class VKIE(InfoExtractor): if m_yt is not None: self.to_screen('Youtube video detected') return self.url_result(m_yt.group(1), 'Youtube') + + m_opts = re.search(r'(?s)var\s+opts\s*=\s*({.*?});', info_page) + if m_opts: + m_opts_url = re.search(r"url\s*:\s*'([^']+)", m_opts.group(1)) + if m_opts_url: + opts_url = m_opts_url.group(1) + if opts_url.startswith('//'): + opts_url = 'http:' + opts_url + return self.url_result(opts_url) + data_json = self._search_regex(r'var vars = ({.*?});', info_page, 'vars') data = json.loads(data_json) diff --git a/youtube_dl/extractor/vodlocker.py b/youtube_dl/extractor/vodlocker.py new file mode 100644 index 000000000..68c59364b --- /dev/null +++ b/youtube_dl/extractor/vodlocker.py @@ -0,0 +1,63 @@ +# -*- coding: utf-8 -*- +from __future__ import unicode_literals + +import re +from .common import InfoExtractor +from ..utils import ( + compat_urllib_parse, + compat_urllib_request, +) + + +class VodlockerIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?vodlocker.com/(?P[0-9a-zA-Z]+)(?:\..*?)?' + + _TESTS = [{ + 'url': 'http://vodlocker.com/e8wvyzz4sl42', + 'md5': 'ce0c2d18fa0735f1bd91b69b0e54aacf', + 'info_dict': { + 'id': 'e8wvyzz4sl42', + 'ext': 'mp4', + 'title': 'Germany vs Brazil', + 'thumbnail': 're:http://.*\.jpg', + }, + }] + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + webpage = self._download_webpage(url, video_id) + + fields = dict(re.findall(r'''(?x)\s*(.*?)\s*', webpage, u'result HTML') + r'(?s)
    ', webpage, u'result HTML') part_codes = re.findall( r'(?s)

    (.*?)

    ', result_code) entries = [] for part_code in part_codes: part_title = self._html_search_regex( - r'(?s)title="([^"]+)"', part_code, 'item title', fatal=False) + [r'(?s)title="([^"]+)"', r'>([^<]+)
    '], part_code, 'item title', fatal=False) part_url_snippet = self._html_search_regex( r'(?s)href="([^"]+)"', part_code, 'item URL') part_url = compat_urlparse.urljoin( @@ -1825,10 +1780,21 @@ class YoutubeTruncatedURLIE(InfoExtractor): IE_NAME = 'youtube:truncated_url' IE_DESC = False # Do not list _VALID_URL = r'''(?x) - (?:https?://)?[^/]+/watch\?(?:feature=[a-z_]+)?$| + (?:https?://)?[^/]+/watch\?(?: + feature=[a-z_]+| + annotation_id=annotation_[^&]+ + )?$| (?:https?://)?(?:www\.)?youtube\.com/attribution_link\?a=[^&]+$ ''' + _TESTS = [{ + 'url': 'http://www.youtube.com/watch?annotation_id=annotation_3951667041', + 'only_matching': True, + }, { + 'url': 'http://www.youtube.com/watch?', + 'only_matching': True, + }] + def _real_extract(self, url): raise ExtractorError( u'Did you forget to quote the URL? Remember that & is a meta ' diff --git a/youtube_dl/jsinterp.py b/youtube_dl/jsinterp.py index 449482d3c..3bbb07704 100644 --- a/youtube_dl/jsinterp.py +++ b/youtube_dl/jsinterp.py @@ -59,7 +59,7 @@ class JSInterpreter(object): if member == 'split("")': return list(val) if member == 'join("")': - return u''.join(val) + return ''.join(val) if member == 'length': return len(val) if member == 'reverse()': @@ -99,7 +99,7 @@ class JSInterpreter(object): def extract_function(self, funcname): func_m = re.search( - (r'(?:function %s|%s\s*=\s*function)' % ( + (r'(?:function %s|[{;]%s\s*=\s*function)' % ( re.escape(funcname), re.escape(funcname))) + r'\((?P[a-z,]+)\){(?P[^}]+)}', self.code) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 09312e81a..64a9618ca 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -775,7 +775,7 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler): https_response = http_response -def parse_iso8601(date_str): +def parse_iso8601(date_str, delimiter='T'): """ Return a UNIX timestamp from the given date """ if date_str is None: @@ -795,8 +795,8 @@ def parse_iso8601(date_str): timezone = datetime.timedelta( hours=sign * int(m.group('hours')), minutes=sign * int(m.group('minutes'))) - - dt = datetime.datetime.strptime(date_str, '%Y-%m-%dT%H:%M:%S') - timezone + date_format = '%Y-%m-%d{0}%H:%M:%S'.format(delimiter) + dt = datetime.datetime.strptime(date_str, date_format) - timezone return calendar.timegm(dt.timetuple()) @@ -1428,7 +1428,7 @@ US_RATINGS = { def strip_jsonp(code): - return re.sub(r'(?s)^[a-zA-Z_]+\s*\(\s*(.*)\);\s*?\s*$', r'\1', code) + return re.sub(r'(?s)^[a-zA-Z0-9_]+\s*\(\s*(.*)\);?\s*?\s*$', r'\1', code) def qualities(quality_ids): diff --git a/youtube_dl/version.py b/youtube_dl/version.py index ab076489f..2c9591630 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,2 +1,2 @@ -__version__ = '2014.06.26' +__version__ = '2014.07.11.3'