Merge remote-tracking branch 'upstream/master'

This commit is contained in:
rupertbaxter2 2014-07-21 05:23:31 -07:00
commit c7a65d6c18
22 changed files with 247 additions and 81 deletions

View File

@ -148,3 +148,10 @@ def assertRegexpMatches(self, text, regexp, msg=None):
else: else:
msg = note + ', ' + msg msg = note + ', ' + msg
self.assertTrue(m, msg) self.assertTrue(m, msg)
def assertGreaterEqual(self, got, expected, msg=None):
if not (got >= expected):
if msg is None:
msg = '%r not greater than or equal to %r' % (got, expected)
self.assertTrue(got >= expected, msg)

View File

@ -11,6 +11,7 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from test.helper import ( from test.helper import (
assertRegexpMatches, assertRegexpMatches,
assertGreaterEqual,
expect_info_dict, expect_info_dict,
FakeYDL, FakeYDL,
) )
@ -71,8 +72,8 @@ class TestPlaylists(unittest.TestCase):
ie = DailymotionUserIE(dl) ie = DailymotionUserIE(dl)
result = ie.extract('https://www.dailymotion.com/user/nqtv') result = ie.extract('https://www.dailymotion.com/user/nqtv')
self.assertIsPlaylist(result) self.assertIsPlaylist(result)
assertGreaterEqual(self, len(result['entries']), 100)
self.assertEqual(result['title'], 'Rémi Gaillard') self.assertEqual(result['title'], 'Rémi Gaillard')
self.assertTrue(len(result['entries']) >= 100)
def test_vimeo_channel(self): def test_vimeo_channel(self):
dl = FakeYDL() dl = FakeYDL()
@ -111,7 +112,7 @@ class TestPlaylists(unittest.TestCase):
ie = VineUserIE(dl) ie = VineUserIE(dl)
result = ie.extract('https://vine.co/Visa') result = ie.extract('https://vine.co/Visa')
self.assertIsPlaylist(result) self.assertIsPlaylist(result)
self.assertTrue(len(result['entries']) >= 47) assertGreaterEqual(self, len(result['entries']), 47)
def test_ustream_channel(self): def test_ustream_channel(self):
dl = FakeYDL() dl = FakeYDL()
@ -119,7 +120,7 @@ class TestPlaylists(unittest.TestCase):
result = ie.extract('http://www.ustream.tv/channel/channeljapan') result = ie.extract('http://www.ustream.tv/channel/channeljapan')
self.assertIsPlaylist(result) self.assertIsPlaylist(result)
self.assertEqual(result['id'], '10874166') self.assertEqual(result['id'], '10874166')
self.assertTrue(len(result['entries']) >= 54) assertGreaterEqual(self, len(result['entries']), 54)
def test_soundcloud_set(self): def test_soundcloud_set(self):
dl = FakeYDL() dl = FakeYDL()
@ -127,7 +128,7 @@ class TestPlaylists(unittest.TestCase):
result = ie.extract('https://soundcloud.com/the-concept-band/sets/the-royal-concept-ep') result = ie.extract('https://soundcloud.com/the-concept-band/sets/the-royal-concept-ep')
self.assertIsPlaylist(result) self.assertIsPlaylist(result)
self.assertEqual(result['title'], 'The Royal Concept EP') self.assertEqual(result['title'], 'The Royal Concept EP')
self.assertTrue(len(result['entries']) >= 6) assertGreaterEqual(self, len(result['entries']), 6)
def test_soundcloud_user(self): def test_soundcloud_user(self):
dl = FakeYDL() dl = FakeYDL()
@ -135,7 +136,7 @@ class TestPlaylists(unittest.TestCase):
result = ie.extract('https://soundcloud.com/the-concept-band') result = ie.extract('https://soundcloud.com/the-concept-band')
self.assertIsPlaylist(result) self.assertIsPlaylist(result)
self.assertEqual(result['id'], '9615865') self.assertEqual(result['id'], '9615865')
self.assertTrue(len(result['entries']) >= 12) assertGreaterEqual(self, len(result['entries']), 12)
def test_soundcloud_likes(self): def test_soundcloud_likes(self):
dl = FakeYDL() dl = FakeYDL()
@ -143,7 +144,7 @@ class TestPlaylists(unittest.TestCase):
result = ie.extract('https://soundcloud.com/the-concept-band/likes') result = ie.extract('https://soundcloud.com/the-concept-band/likes')
self.assertIsPlaylist(result) self.assertIsPlaylist(result)
self.assertEqual(result['id'], '9615865') self.assertEqual(result['id'], '9615865')
self.assertTrue(len(result['entries']) >= 1) assertGreaterEqual(self, len(result['entries']), 1)
def test_soundcloud_playlist(self): def test_soundcloud_playlist(self):
dl = FakeYDL() dl = FakeYDL()
@ -162,7 +163,7 @@ class TestPlaylists(unittest.TestCase):
result = ie.extract('http://new.livestream.com/tedx/cityenglish') result = ie.extract('http://new.livestream.com/tedx/cityenglish')
self.assertIsPlaylist(result) self.assertIsPlaylist(result)
self.assertEqual(result['title'], 'TEDCity2.0 (English)') self.assertEqual(result['title'], 'TEDCity2.0 (English)')
self.assertTrue(len(result['entries']) >= 4) assertGreaterEqual(self, len(result['entries']), 4)
def test_livestreamoriginal_folder(self): def test_livestreamoriginal_folder(self):
dl = FakeYDL() dl = FakeYDL()
@ -170,7 +171,7 @@ class TestPlaylists(unittest.TestCase):
result = ie.extract('https://www.livestream.com/newplay/folder?dirId=a07bf706-d0e4-4e75-a747-b021d84f2fd3') result = ie.extract('https://www.livestream.com/newplay/folder?dirId=a07bf706-d0e4-4e75-a747-b021d84f2fd3')
self.assertIsPlaylist(result) self.assertIsPlaylist(result)
self.assertEqual(result['id'], 'a07bf706-d0e4-4e75-a747-b021d84f2fd3') self.assertEqual(result['id'], 'a07bf706-d0e4-4e75-a747-b021d84f2fd3')
self.assertTrue(len(result['entries']) >= 28) assertGreaterEqual(self, len(result['entries']), 28)
def test_nhl_videocenter(self): def test_nhl_videocenter(self):
dl = FakeYDL() dl = FakeYDL()
@ -187,7 +188,7 @@ class TestPlaylists(unittest.TestCase):
result = ie.extract('http://bambuser.com/channel/pixelversity') result = ie.extract('http://bambuser.com/channel/pixelversity')
self.assertIsPlaylist(result) self.assertIsPlaylist(result)
self.assertEqual(result['title'], 'pixelversity') self.assertEqual(result['title'], 'pixelversity')
self.assertTrue(len(result['entries']) >= 60) assertGreaterEqual(self, len(result['entries']), 60)
def test_bandcamp_album(self): def test_bandcamp_album(self):
dl = FakeYDL() dl = FakeYDL()
@ -195,7 +196,7 @@ class TestPlaylists(unittest.TestCase):
result = ie.extract('http://mpallante.bandcamp.com/album/nightmare-night-ep') result = ie.extract('http://mpallante.bandcamp.com/album/nightmare-night-ep')
self.assertIsPlaylist(result) self.assertIsPlaylist(result)
self.assertEqual(result['title'], 'Nightmare Night EP') self.assertEqual(result['title'], 'Nightmare Night EP')
self.assertTrue(len(result['entries']) >= 4) assertGreaterEqual(self, len(result['entries']), 4)
def test_smotri_community(self): def test_smotri_community(self):
dl = FakeYDL() dl = FakeYDL()
@ -204,7 +205,7 @@ class TestPlaylists(unittest.TestCase):
self.assertIsPlaylist(result) self.assertIsPlaylist(result)
self.assertEqual(result['id'], 'kommuna') self.assertEqual(result['id'], 'kommuna')
self.assertEqual(result['title'], 'КПРФ') self.assertEqual(result['title'], 'КПРФ')
self.assertTrue(len(result['entries']) >= 4) assertGreaterEqual(self, len(result['entries']), 4)
def test_smotri_user(self): def test_smotri_user(self):
dl = FakeYDL() dl = FakeYDL()
@ -213,7 +214,7 @@ class TestPlaylists(unittest.TestCase):
self.assertIsPlaylist(result) self.assertIsPlaylist(result)
self.assertEqual(result['id'], 'inspector') self.assertEqual(result['id'], 'inspector')
self.assertEqual(result['title'], 'Inspector') self.assertEqual(result['title'], 'Inspector')
self.assertTrue(len(result['entries']) >= 9) assertGreaterEqual(self, len(result['entries']), 9)
def test_AcademicEarthCourse(self): def test_AcademicEarthCourse(self):
dl = FakeYDL() dl = FakeYDL()
@ -232,7 +233,7 @@ class TestPlaylists(unittest.TestCase):
self.assertIsPlaylist(result) self.assertIsPlaylist(result)
self.assertEqual(result['id'], 'dvoe_iz_lartsa') self.assertEqual(result['id'], 'dvoe_iz_lartsa')
self.assertEqual(result['title'], 'Двое из ларца (2006 - 2008)') self.assertEqual(result['title'], 'Двое из ларца (2006 - 2008)')
self.assertTrue(len(result['entries']) >= 24) assertGreaterEqual(self, len(result['entries']), 24)
def test_ivi_compilation_season(self): def test_ivi_compilation_season(self):
dl = FakeYDL() dl = FakeYDL()
@ -241,7 +242,7 @@ class TestPlaylists(unittest.TestCase):
self.assertIsPlaylist(result) self.assertIsPlaylist(result)
self.assertEqual(result['id'], 'dvoe_iz_lartsa/season1') self.assertEqual(result['id'], 'dvoe_iz_lartsa/season1')
self.assertEqual(result['title'], 'Двое из ларца (2006 - 2008) 1 сезон') self.assertEqual(result['title'], 'Двое из ларца (2006 - 2008) 1 сезон')
self.assertTrue(len(result['entries']) >= 12) assertGreaterEqual(self, len(result['entries']), 12)
def test_imdb_list(self): def test_imdb_list(self):
dl = FakeYDL() dl = FakeYDL()
@ -260,7 +261,7 @@ class TestPlaylists(unittest.TestCase):
self.assertEqual(result['id'], 'cryptography') self.assertEqual(result['id'], 'cryptography')
self.assertEqual(result['title'], 'Journey into cryptography') self.assertEqual(result['title'], 'Journey into cryptography')
self.assertEqual(result['description'], 'How have humans protected their secret messages through history? What has changed today?') self.assertEqual(result['description'], 'How have humans protected their secret messages through history? What has changed today?')
self.assertTrue(len(result['entries']) >= 3) assertGreaterEqual(self, len(result['entries']), 3)
def test_EveryonesMixtape(self): def test_EveryonesMixtape(self):
dl = FakeYDL() dl = FakeYDL()
@ -277,7 +278,7 @@ class TestPlaylists(unittest.TestCase):
result = ie.extract('http://rutube.ru/tags/video/1800/') result = ie.extract('http://rutube.ru/tags/video/1800/')
self.assertIsPlaylist(result) self.assertIsPlaylist(result)
self.assertEqual(result['id'], '1800') self.assertEqual(result['id'], '1800')
self.assertTrue(len(result['entries']) >= 68) assertGreaterEqual(self, len(result['entries']), 68)
def test_rutube_person(self): def test_rutube_person(self):
dl = FakeYDL() dl = FakeYDL()
@ -285,7 +286,7 @@ class TestPlaylists(unittest.TestCase):
result = ie.extract('http://rutube.ru/video/person/313878/') result = ie.extract('http://rutube.ru/video/person/313878/')
self.assertIsPlaylist(result) self.assertIsPlaylist(result)
self.assertEqual(result['id'], '313878') self.assertEqual(result['id'], '313878')
self.assertTrue(len(result['entries']) >= 37) assertGreaterEqual(self, len(result['entries']), 37)
def test_multiple_brightcove_videos(self): def test_multiple_brightcove_videos(self):
# https://github.com/rg3/youtube-dl/issues/2283 # https://github.com/rg3/youtube-dl/issues/2283
@ -322,7 +323,7 @@ class TestPlaylists(unittest.TestCase):
self.assertIsPlaylist(result) self.assertIsPlaylist(result)
self.assertEqual(result['id'], '10') self.assertEqual(result['id'], '10')
self.assertEqual(result['title'], 'Who are the hackers?') self.assertEqual(result['title'], 'Who are the hackers?')
self.assertTrue(len(result['entries']) >= 6) assertGreaterEqual(self, len(result['entries']), 6)
def test_toypics_user(self): def test_toypics_user(self):
dl = FakeYDL() dl = FakeYDL()
@ -330,7 +331,7 @@ class TestPlaylists(unittest.TestCase):
result = ie.extract('http://videos.toypics.net/Mikey') result = ie.extract('http://videos.toypics.net/Mikey')
self.assertIsPlaylist(result) self.assertIsPlaylist(result)
self.assertEqual(result['id'], 'Mikey') self.assertEqual(result['id'], 'Mikey')
self.assertTrue(len(result['entries']) >= 17) assertGreaterEqual(self, len(result['entries']), 17)
def test_xtube_user(self): def test_xtube_user(self):
dl = FakeYDL() dl = FakeYDL()
@ -338,7 +339,7 @@ class TestPlaylists(unittest.TestCase):
result = ie.extract('http://www.xtube.com/community/profile.php?user=greenshowers') result = ie.extract('http://www.xtube.com/community/profile.php?user=greenshowers')
self.assertIsPlaylist(result) self.assertIsPlaylist(result)
self.assertEqual(result['id'], 'greenshowers') self.assertEqual(result['id'], 'greenshowers')
self.assertTrue(len(result['entries']) >= 155) assertGreaterEqual(self, len(result['entries']), 155)
def test_InstagramUser(self): def test_InstagramUser(self):
dl = FakeYDL() dl = FakeYDL()
@ -346,7 +347,7 @@ class TestPlaylists(unittest.TestCase):
result = ie.extract('http://instagram.com/porsche') result = ie.extract('http://instagram.com/porsche')
self.assertIsPlaylist(result) self.assertIsPlaylist(result)
self.assertEqual(result['id'], 'porsche') self.assertEqual(result['id'], 'porsche')
self.assertTrue(len(result['entries']) >= 2) assertGreaterEqual(self, len(result['entries']), 2)
test_video = next( test_video = next(
e for e in result['entries'] e for e in result['entries']
if e['id'] == '614605558512799803_462752227') if e['id'] == '614605558512799803_462752227')
@ -385,7 +386,7 @@ class TestPlaylists(unittest.TestCase):
self.assertEqual(result['id'], '152147') self.assertEqual(result['id'], '152147')
self.assertEqual( self.assertEqual(
result['title'], 'Brace Yourself - Today\'s Weirdest News') result['title'], 'Brace Yourself - Today\'s Weirdest News')
self.assertTrue(len(result['entries']) >= 10) assertGreaterEqual(self, len(result['entries']), 10)
def test_TeacherTubeUser(self): def test_TeacherTubeUser(self):
dl = FakeYDL() dl = FakeYDL()
@ -393,7 +394,7 @@ class TestPlaylists(unittest.TestCase):
result = ie.extract('http://www.teachertube.com/user/profile/rbhagwati2') result = ie.extract('http://www.teachertube.com/user/profile/rbhagwati2')
self.assertIsPlaylist(result) self.assertIsPlaylist(result)
self.assertEqual(result['id'], 'rbhagwati2') self.assertEqual(result['id'], 'rbhagwati2')
self.assertTrue(len(result['entries']) >= 179) assertGreaterEqual(self, len(result['entries']), 179)
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()

View File

@ -7,6 +7,7 @@ import unittest
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
import errno
import io import io
import json import json
import re import re

View File

@ -1197,6 +1197,10 @@ class YoutubeDL(object):
if res: if res:
res += ', ' res += ', '
res += format_bytes(fdict['filesize']) res += format_bytes(fdict['filesize'])
elif fdict.get('filesize_approx') is not None:
if res:
res += ', '
res += '~' + format_bytes(fdict['filesize_approx'])
return res return res
def list_formats(self, info_dict): def list_formats(self, info_dict):

View File

@ -72,11 +72,9 @@ __license__ = 'Public Domain'
import codecs import codecs
import io import io
import locale
import optparse import optparse
import os import os
import random import random
import re
import shlex import shlex
import sys import sys

View File

@ -267,6 +267,8 @@ from .smotri import (
SmotriUserIE, SmotriUserIE,
SmotriBroadcastIE, SmotriBroadcastIE,
) )
from .snotr import SnotrIE
from .sockshare import SockshareIE
from .sohu import SohuIE from .sohu import SohuIE
from .soundcloud import ( from .soundcloud import (
SoundcloudIE, SoundcloudIE,

View File

@ -42,7 +42,7 @@ class ChilloutzoneIE(InfoExtractor):
'id': '85523671', 'id': '85523671',
'ext': 'mp4', 'ext': 'mp4',
'title': 'The Sunday Times - Icons', 'title': 'The Sunday Times - Icons',
'description': 'md5:3e1c0dc6047498d6728dcdaad0891762', 'description': 'md5:a5f7ff82e2f7a9ed77473fe666954e84',
'uploader': 'Us', 'uploader': 'Us',
'uploader_id': 'usfilms', 'uploader_id': 'usfilms',
'upload_date': '20140131' 'upload_date': '20140131'

View File

@ -43,7 +43,11 @@ class CNETIE(InfoExtractor):
raise ExtractorError('Cannot find video data') raise ExtractorError('Cannot find video data')
video_id = vdata['id'] video_id = vdata['id']
title = vdata['headline'] title = vdata.get('headline')
if title is None:
title = vdata.get('title')
if title is None:
raise ExtractorError('Cannot find title!')
description = vdata.get('dek') description = vdata.get('dek')
thumbnail = vdata.get('image', {}).get('path') thumbnail = vdata.get('image', {}).get('path')
author = vdata.get('author') author = vdata.get('author')

View File

@ -69,6 +69,7 @@ class InfoExtractor(object):
* vcodec Name of the video codec in use * vcodec Name of the video codec in use
* container Name of the container format * container Name of the container format
* filesize The number of bytes, if known in advance * filesize The number of bytes, if known in advance
* filesize_approx An estimate for the number of bytes
* player_url SWF Player URL (used for rtmpdump). * player_url SWF Player URL (used for rtmpdump).
* protocol The protocol that will be used for the actual * protocol The protocol that will be used for the actual
download, lower-case. download, lower-case.
@ -468,7 +469,7 @@ class InfoExtractor(object):
display_name = name display_name = name
return self._html_search_regex( return self._html_search_regex(
r'''(?ix)<meta r'''(?ix)<meta
(?=[^>]+(?:itemprop|name|property)=["\']%s["\']) (?=[^>]+(?:itemprop|name|property)=["\']?%s["\']?)
[^>]+content=["\']([^"\']+)["\']''' % re.escape(name), [^>]+content=["\']([^"\']+)["\']''' % re.escape(name),
html, display_name, fatal=fatal, **kwargs) html, display_name, fatal=fatal, **kwargs)
@ -555,6 +556,7 @@ class InfoExtractor(object):
f.get('abr') if f.get('abr') is not None else -1, f.get('abr') if f.get('abr') is not None else -1,
audio_ext_preference, audio_ext_preference,
f.get('filesize') if f.get('filesize') is not None else -1, f.get('filesize') if f.get('filesize') is not None else -1,
f.get('filesize_approx') if f.get('filesize_approx') is not None else -1,
f.get('format_id'), f.get('format_id'),
) )
formats.sort(key=_formats_key) formats.sort(key=_formats_key)

View File

@ -5,24 +5,26 @@ import os.path
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import compat_urllib_parse_unquote
class DropboxIE(InfoExtractor): class DropboxIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?dropbox[.]com/s/(?P<id>[a-zA-Z0-9]{15})/(?P<title>[^?#]*)' _VALID_URL = r'https?://(?:www\.)?dropbox[.]com/s/(?P<id>[a-zA-Z0-9]{15})/(?P<title>[^?#]*)'
_TEST = { _TEST = {
'url': 'https://www.dropbox.com/s/0qr9sai2veej4f8/THE_DOCTOR_GAMES.mp4', 'url': 'https://www.dropbox.com/s/nelirfsxnmcfbfh/youtube-dl%20test%20video%20%27%C3%A4%22BaW_jenozKc.mp4',
'md5': '8ae17c51172fb7f93bdd6a214cc8c896', 'md5': '8a3d905427a6951ccb9eb292f154530b',
'info_dict': { 'info_dict': {
'id': '0qr9sai2veej4f8', 'id': 'nelirfsxnmcfbfh',
'ext': 'mp4', 'ext': 'mp4',
'title': 'THE_DOCTOR_GAMES' 'title': 'youtube-dl test video \'ä"BaW_jenozKc'
} }
} }
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id') video_id = mobj.group('id')
title = os.path.splitext(mobj.group('title'))[0] fn = compat_urllib_parse_unquote(mobj.group('title'))
title = os.path.splitext(fn)[0]
video_url = url + '?dl=1' video_url = url + '?dl=1'
return { return {

View File

@ -26,7 +26,7 @@ class FunnyOrDieIE(InfoExtractor):
'id': 'e402820827', 'id': 'e402820827',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Please Use This Song (Jon Lajoie)', 'title': 'Please Use This Song (Jon Lajoie)',
'description': 'md5:2ed27d364f5a805a6dba199faaf6681d', 'description': 'Please use this to sell something. www.jonlajoie.com',
'thumbnail': 're:^http:.*\.jpg$', 'thumbnail': 're:^http:.*\.jpg$',
}, },
}] }]

View File

@ -402,7 +402,7 @@ class GenericIE(InfoExtractor):
elif default_search == 'error': elif default_search == 'error':
raise ExtractorError( raise ExtractorError(
('%r is not a valid URL. ' ('%r is not a valid URL. '
'Set --default-search "ytseach" (or run youtube-dl "ytsearch:%s" ) to search YouTube' 'Set --default-search "ytsearch" (or run youtube-dl "ytsearch:%s" ) to search YouTube'
) % (url, url), expected=True) ) % (url, url), expected=True)
else: else:
assert ':' in default_search assert ':' in default_search

View File

@ -17,7 +17,7 @@ class RTVEALaCartaIE(InfoExtractor):
_TEST = { _TEST = {
'url': 'http://www.rtve.es/alacarta/videos/balonmano/o-swiss-cup-masculina-final-espana-suecia/2491869/', 'url': 'http://www.rtve.es/alacarta/videos/balonmano/o-swiss-cup-masculina-final-espana-suecia/2491869/',
'md5': '18fcd45965bdd076efdb12cd7f6d7b9e', 'md5': '1d49b7e1ca7a7502c56a4bf1b60f1b43',
'info_dict': { 'info_dict': {
'id': '2491869', 'id': '2491869',
'ext': 'mp4', 'ext': 'mp4',

View File

@ -20,7 +20,7 @@ class SaveFromIE(InfoExtractor):
'upload_date': '20120816', 'upload_date': '20120816',
'uploader': 'Howcast', 'uploader': 'Howcast',
'uploader_id': 'Howcast', 'uploader_id': 'Howcast',
'description': 'md5:4f0aac94361a12e1ce57d74f85265175', 'description': 'md5:727900f130df3dc9a25e2721497c7910',
}, },
'params': { 'params': {
'skip_download': True 'skip_download': True

View File

@ -0,0 +1,68 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
float_or_none,
str_to_int,
parse_duration,
)
class SnotrIE(InfoExtractor):
_VALID_URL = r'http?://(?:www\.)?snotr\.com/video/(?P<id>\d+)/([\w]+)'
_TESTS = [{
'url': 'http://www.snotr.com/video/13708/Drone_flying_through_fireworks',
'info_dict': {
'id': '13708',
'ext': 'flv',
'title': 'Drone flying through fireworks!',
'duration': 247,
'filesize_approx': 98566144,
'description': 'A drone flying through Fourth of July Fireworks',
}
}, {
'url': 'http://www.snotr.com/video/530/David_Letteman_-_George_W_Bush_Top_10',
'info_dict': {
'id': '530',
'ext': 'flv',
'title': 'David Letteman - George W. Bush Top 10',
'duration': 126,
'filesize_approx': 8912896,
'description': 'The top 10 George W. Bush moments, brought to you by David Letterman!',
}
}]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id)
title = self._og_search_title(webpage)
description = self._og_search_description(webpage)
video_url = "http://cdn.videos.snotr.com/%s.flv" % video_id
view_count = str_to_int(self._html_search_regex(
r'<p>\n<strong>Views:</strong>\n([\d,\.]+)</p>',
webpage, 'view count', fatal=False))
duration = parse_duration(self._html_search_regex(
r'<p>\n<strong>Length:</strong>\n\s*([0-9:]+).*?</p>',
webpage, 'duration', fatal=False))
filesize_approx = float_or_none(self._html_search_regex(
r'<p>\n<strong>Filesize:</strong>\n\s*([0-9.]+)\s*megabyte</p>',
webpage, 'filesize', fatal=False), invscale=1024 * 1024)
return {
'id': video_id,
'description': description,
'title': title,
'url': video_url,
'view_count': view_count,
'duration': duration,
'filesize_approx': filesize_approx,
}

View File

@ -0,0 +1,78 @@
# coding: utf-8
from __future__ import unicode_literals
from ..utils import (
ExtractorError,
compat_urllib_parse,
compat_urllib_request,
)
import re
from .common import InfoExtractor
class SockshareIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?sockshare\.com/file/(?P<id>[0-9A-Za-z]+)'
_FILE_DELETED_REGEX = r'This file doesn\'t exist, or has been removed\.</div>'
_TEST = {
'url': 'http://www.sockshare.com/file/437BE28B89D799D7',
'md5': '9d0bf1cfb6dbeaa8d562f6c97506c5bd',
'info_dict': {
'id': '437BE28B89D799D7',
'title': 'big_buck_bunny_720p_surround.avi',
'ext': 'avi',
'thumbnail': 're:^http://.*\.jpg$',
}
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
url = 'http://sockshare.com/file/%s' % video_id
webpage = self._download_webpage(url, video_id)
if re.search(self._FILE_DELETED_REGEX, webpage) is not None:
raise ExtractorError('Video %s does not exist' % video_id,
expected=True)
confirm_hash = self._html_search_regex(r'''(?x)<input\s+
type="hidden"\s+
value="([^"]*)"\s+
name="hash"
''', webpage, 'hash')
fields = {
"hash": confirm_hash,
"confirm": "Continue as Free User"
}
post = compat_urllib_parse.urlencode(fields)
req = compat_urllib_request.Request(url, post)
# Apparently, this header is required for confirmation to work.
req.add_header('Host', 'www.sockshare.com')
req.add_header('Content-type', 'application/x-www-form-urlencoded')
webpage = self._download_webpage(
req, video_id, 'Downloading video page')
video_url = self._html_search_regex(
r'<a href="([^"]*)".+class="download_file_link"',
webpage, 'file url')
video_url = "http://www.sockshare.com" + video_url
title = self._html_search_regex(r'<h1>(.+)<strong>', webpage, 'title')
thumbnail = self._html_search_regex(
r'<img\s+src="([^"]*)".+?name="bg"',
webpage, 'thumbnail')
formats = [{
'format_id': 'sd',
'url': video_url,
}]
return {
'id': video_id,
'title': title,
'thumbnail': thumbnail,
'formats': formats,
}

View File

@ -53,7 +53,7 @@ class SteamIE(InfoExtractor):
'ext': 'mp4', 'ext': 'mp4',
'upload_date': '20140329', 'upload_date': '20140329',
'title': 'FRONTIERS - Final Greenlight Trailer', 'title': 'FRONTIERS - Final Greenlight Trailer',
'description': 'md5:6df4fe8dd494ae811869672b0767e025', 'description': 'md5:dc96a773669d0ca1b36c13c1f30250d9',
'uploader': 'AAD Productions', 'uploader': 'AAD Productions',
'uploader_id': 'AtomicAgeDogGames', 'uploader_id': 'AtomicAgeDogGames',
} }

View File

@ -19,16 +19,6 @@ class TagesschauIE(InfoExtractor):
'description': 'md5:69da3c61275b426426d711bde96463ab', 'description': 'md5:69da3c61275b426426d711bde96463ab',
'thumbnail': 're:^http:.*\.jpg$', 'thumbnail': 're:^http:.*\.jpg$',
}, },
}, {
'url': 'http://www.tagesschau.de/multimedia/video/video-5964.html',
'md5': '66652566900963a3f962333579eeffcf',
'info_dict': {
'id': '5964',
'ext': 'mp4',
'title': 'Nahost-Konflikt: Israel bombadiert Ziele im Gazastreifen und Westjordanland',
'description': 'md5:07bfc78c48eec3145ed4805299a1900a',
'thumbnail': 're:http://.*\.jpg',
},
}] }]
_FORMATS = { _FORMATS = {

View File

@ -62,7 +62,7 @@ class TeacherTubeIE(InfoExtractor):
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
title = self._html_search_meta('title', webpage, 'title') title = self._html_search_meta('title', webpage, 'title', fatal=True)
TITLE_SUFFIX = ' - TeacherTube' TITLE_SUFFIX = ' - TeacherTube'
if title.endswith(TITLE_SUFFIX): if title.endswith(TITLE_SUFFIX):
title = title[:-len(TITLE_SUFFIX)].strip() title = title[:-len(TITLE_SUFFIX)].strip()
@ -101,7 +101,11 @@ class TeacherTubeUserIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?teachertube\.com/(user/profile|collection)/(?P<user>[0-9a-zA-Z]+)/?' _VALID_URL = r'https?://(?:www\.)?teachertube\.com/(user/profile|collection)/(?P<user>[0-9a-zA-Z]+)/?'
_MEDIA_RE = r'(?s)"sidebar_thumb_time">[0-9:]+</div>.+?<a href="(https?://(?:www\.)?teachertube\.com/(?:video|audio)/[^"]+)">' _MEDIA_RE = r'''(?sx)
class="?sidebar_thumb_time"?>[0-9:]+</div>
\s*
<a\s+href="(https?://(?:www\.)?teachertube\.com/(?:video|audio)/[^"]+)"
'''
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
@ -111,14 +115,12 @@ class TeacherTubeUserIE(InfoExtractor):
webpage = self._download_webpage(url, user_id) webpage = self._download_webpage(url, user_id)
urls.extend(re.findall(self._MEDIA_RE, webpage)) urls.extend(re.findall(self._MEDIA_RE, webpage))
pages = re.findall(r'/ajax-user/user-videos/%s\?page=([0-9]+)' % user_id, webpage)[1:-1] pages = re.findall(r'/ajax-user/user-videos/%s\?page=([0-9]+)' % user_id, webpage)[:-1]
for p in pages: for p in pages:
more = 'http://www.teachertube.com/ajax-user/user-videos/%s?page=%s' % (user_id, p) more = 'http://www.teachertube.com/ajax-user/user-videos/%s?page=%s' % (user_id, p)
webpage = self._download_webpage(more, user_id, 'Downloading page %s/%s' % (p, len(pages) + 1)) webpage = self._download_webpage(more, user_id, 'Downloading page %s/%s' % (p, len(pages)))
urls.extend(re.findall(self._MEDIA_RE, webpage)) video_urls = re.findall(self._MEDIA_RE, webpage)
urls.extend(video_urls)
entries = []
for url in urls:
entries.append(self.url_result(url, 'TeacherTube'))
entries = [self.url_result(vurl, 'TeacherTube') for vurl in urls]
return self.playlist_result(entries, user_id) return self.playlist_result(entries, user_id)

View File

@ -98,7 +98,7 @@ class VimeoIE(VimeoBaseInfoExtractor, SubtitlesInfoExtractor):
'info_dict': { 'info_dict': {
'id': '54469442', 'id': '54469442',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Kathy Sierra: Building the minimum Badass User, Business of Software', 'title': 'Kathy Sierra: Building the minimum Badass User, Business of Software 2012',
'uploader': 'The BLN & Business of Software', 'uploader': 'The BLN & Business of Software',
'uploader_id': 'theblnbusinessofsoftware', 'uploader_id': 'theblnbusinessofsoftware',
'duration': 3610, 'duration': 3610,

View File

@ -55,14 +55,14 @@ class WDRIE(InfoExtractor):
}, },
}, },
{ {
'url': 'http://www.funkhauseuropa.de/av/audiosuepersongsoulbossanova100-audioplayer.html', 'url': 'http://www.funkhauseuropa.de/av/audioflaviacoelhoamaramar100-audioplayer.html',
'md5': '24e83813e832badb0a8d7d1ef9ef0691', 'md5': '99a1443ff29af19f6c52cf6f4dc1f4aa',
'info_dict': { 'info_dict': {
'id': 'mdb-463528', 'id': 'mdb-478135',
'ext': 'mp3', 'ext': 'mp3',
'title': 'Süpersong: Soul Bossa Nova', 'title': 'Flavia Coelho: Amar é Amar',
'description': 'md5:7b29e97e10dfb6e265238b32fa35b23a', 'description': 'md5:7b29e97e10dfb6e265238b32fa35b23a',
'upload_date': '20140630', 'upload_date': '20140717',
}, },
}, },
] ]

View File

@ -91,11 +91,9 @@ except ImportError:
compat_subprocess_get_DEVNULL = lambda: open(os.path.devnull, 'w') compat_subprocess_get_DEVNULL = lambda: open(os.path.devnull, 'w')
try: try:
from urllib.parse import parse_qs as compat_parse_qs from urllib.parse import unquote as compat_urllib_parse_unquote
except ImportError: # Python 2 except ImportError:
# HACK: The following is the correct parse_qs implementation from cpython 3's stdlib. def compat_urllib_parse_unquote(string, encoding='utf-8', errors='replace'):
# Python 2's version is apparently totally broken
def _unquote(string, encoding='utf-8', errors='replace'):
if string == '': if string == '':
return string return string
res = string.split('%') res = string.split('%')
@ -130,6 +128,13 @@ except ImportError: # Python 2
string += pct_sequence.decode(encoding, errors) string += pct_sequence.decode(encoding, errors)
return string return string
try:
from urllib.parse import parse_qs as compat_parse_qs
except ImportError: # Python 2
# HACK: The following is the correct parse_qs implementation from cpython 3's stdlib.
# Python 2's version is apparently totally broken
def _parse_qsl(qs, keep_blank_values=False, strict_parsing=False, def _parse_qsl(qs, keep_blank_values=False, strict_parsing=False,
encoding='utf-8', errors='replace'): encoding='utf-8', errors='replace'):
qs, _coerce_result = qs, unicode qs, _coerce_result = qs, unicode
@ -149,10 +154,12 @@ except ImportError: # Python 2
continue continue
if len(nv[1]) or keep_blank_values: if len(nv[1]) or keep_blank_values:
name = nv[0].replace('+', ' ') name = nv[0].replace('+', ' ')
name = _unquote(name, encoding=encoding, errors=errors) name = compat_urllib_parse_unquote(
name, encoding=encoding, errors=errors)
name = _coerce_result(name) name = _coerce_result(name)
value = nv[1].replace('+', ' ') value = nv[1].replace('+', ' ')
value = _unquote(value, encoding=encoding, errors=errors) value = compat_urllib_parse_unquote(
value, encoding=encoding, errors=errors)
value = _coerce_result(value) value = _coerce_result(value)
r.append((name, value)) r.append((name, value))
return r return r
@ -1193,13 +1200,6 @@ def format_bytes(bytes):
return u'%.2f%s' % (converted, suffix) return u'%.2f%s' % (converted, suffix)
def str_to_int(int_str):
if int_str is None:
return None
int_str = re.sub(r'[,\.]', u'', int_str)
return int(int_str)
def get_term_width(): def get_term_width():
columns = os.environ.get('COLUMNS', None) columns = os.environ.get('COLUMNS', None)
if columns: if columns:
@ -1267,15 +1267,22 @@ class HEADRequest(compat_urllib_request.Request):
return "HEAD" return "HEAD"
def int_or_none(v, scale=1, default=None, get_attr=None): def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1):
if get_attr: if get_attr:
if v is not None: if v is not None:
v = getattr(v, get_attr, None) v = getattr(v, get_attr, None)
return default if v is None else (int(v) // scale) return default if v is None else (int(v) * invscale // scale)
def float_or_none(v, scale=1, default=None): def str_to_int(int_str):
return default if v is None else (float(v) / scale) if int_str is None:
return None
int_str = re.sub(r'[,\.]', u'', int_str)
return int(int_str)
def float_or_none(v, scale=1, invscale=1, default=None):
return default if v is None else (float(v) * invscale / scale)
def parse_duration(s): def parse_duration(s):