Merge branch 'master' of https://github.com/rg3/youtube-dl into multipart_videos
This commit is contained in:
commit
5d0495f7b4
@ -65,6 +65,7 @@ which means you can modify it, redistribute it or use it however you like.
|
|||||||
configuration in ~/.config/youtube-dl.conf
|
configuration in ~/.config/youtube-dl.conf
|
||||||
(%APPDATA%/youtube-dl/config.txt on
|
(%APPDATA%/youtube-dl/config.txt on
|
||||||
Windows)
|
Windows)
|
||||||
|
--encoding ENCODING Force the specified encoding (experimental)
|
||||||
|
|
||||||
## Video Selection:
|
## Video Selection:
|
||||||
--playlist-start NUMBER playlist video to start at (default is 1)
|
--playlist-start NUMBER playlist video to start at (default is 1)
|
||||||
|
@ -144,7 +144,15 @@ class TestAllURLsMatching(unittest.TestCase):
|
|||||||
self.assertMatch('http://video.pbs.org/widget/partnerplayer/980042464/', ['PBS'])
|
self.assertMatch('http://video.pbs.org/widget/partnerplayer/980042464/', ['PBS'])
|
||||||
|
|
||||||
def test_ComedyCentralShows(self):
|
def test_ComedyCentralShows(self):
|
||||||
self.assertMatch('http://thedailyshow.cc.com/extended-interviews/xm3fnq/andrew-napolitano-extended-interview', ['ComedyCentralShows'])
|
self.assertMatch(
|
||||||
|
'http://thedailyshow.cc.com/extended-interviews/xm3fnq/andrew-napolitano-extended-interview',
|
||||||
|
['ComedyCentralShows'])
|
||||||
|
self.assertMatch(
|
||||||
|
'http://thecolbertreport.cc.com/videos/29w6fx/-realhumanpraise-for-fox-news',
|
||||||
|
['ComedyCentralShows'])
|
||||||
|
self.assertMatch(
|
||||||
|
'http://thecolbertreport.cc.com/videos/gh6urb/neil-degrasse-tyson-pt--1?xrs=eml_col_031114',
|
||||||
|
['ComedyCentralShows'])
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
@ -8,6 +8,7 @@ import datetime
|
|||||||
import errno
|
import errno
|
||||||
import io
|
import io
|
||||||
import json
|
import json
|
||||||
|
import locale
|
||||||
import os
|
import os
|
||||||
import platform
|
import platform
|
||||||
import re
|
import re
|
||||||
@ -160,6 +161,7 @@ class YoutubeDL(object):
|
|||||||
include_ads: Download ads as well
|
include_ads: Download ads as well
|
||||||
default_search: Prepend this string if an input url is not valid.
|
default_search: Prepend this string if an input url is not valid.
|
||||||
'auto' for elaborate guessing
|
'auto' for elaborate guessing
|
||||||
|
encoding: Use this encoding instead of the system-specified.
|
||||||
|
|
||||||
The following parameters are not used by YoutubeDL itself, they are used by
|
The following parameters are not used by YoutubeDL itself, they are used by
|
||||||
the FileDownloader:
|
the FileDownloader:
|
||||||
@ -1219,6 +1221,9 @@ class YoutubeDL(object):
|
|||||||
def print_debug_header(self):
|
def print_debug_header(self):
|
||||||
if not self.params.get('verbose'):
|
if not self.params.get('verbose'):
|
||||||
return
|
return
|
||||||
|
|
||||||
|
write_string('[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' %
|
||||||
|
(locale.getpreferredencoding(), sys.getfilesystemencoding(), sys.stdout.encoding, self.get_encoding()))
|
||||||
write_string('[debug] youtube-dl version ' + __version__ + '\n')
|
write_string('[debug] youtube-dl version ' + __version__ + '\n')
|
||||||
try:
|
try:
|
||||||
sp = subprocess.Popen(
|
sp = subprocess.Popen(
|
||||||
@ -1283,3 +1288,19 @@ class YoutubeDL(object):
|
|||||||
# (See https://github.com/rg3/youtube-dl/issues/1309 for details)
|
# (See https://github.com/rg3/youtube-dl/issues/1309 for details)
|
||||||
opener.addheaders = []
|
opener.addheaders = []
|
||||||
self._opener = opener
|
self._opener = opener
|
||||||
|
|
||||||
|
def encode(self, s):
|
||||||
|
if isinstance(s, bytes):
|
||||||
|
return s # Already encoded
|
||||||
|
|
||||||
|
try:
|
||||||
|
return s.encode(self.get_encoding())
|
||||||
|
except UnicodeEncodeError as err:
|
||||||
|
err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
|
||||||
|
raise
|
||||||
|
|
||||||
|
def get_encoding(self):
|
||||||
|
encoding = self.params.get('encoding')
|
||||||
|
if encoding is None:
|
||||||
|
encoding = preferredencoding()
|
||||||
|
return encoding
|
||||||
|
@ -51,6 +51,7 @@ __authors__ = (
|
|||||||
'David Wagner',
|
'David Wagner',
|
||||||
'Juan C. Olivares',
|
'Juan C. Olivares',
|
||||||
'Mattias Harrysson',
|
'Mattias Harrysson',
|
||||||
|
'phaer',
|
||||||
)
|
)
|
||||||
|
|
||||||
__license__ = 'Public Domain'
|
__license__ = 'Public Domain'
|
||||||
@ -256,13 +257,17 @@ def parseOpts(overrideArguments=None):
|
|||||||
general.add_option(
|
general.add_option(
|
||||||
'--bidi-workaround', dest='bidi_workaround', action='store_true',
|
'--bidi-workaround', dest='bidi_workaround', action='store_true',
|
||||||
help=u'Work around terminals that lack bidirectional text support. Requires bidiv or fribidi executable in PATH')
|
help=u'Work around terminals that lack bidirectional text support. Requires bidiv or fribidi executable in PATH')
|
||||||
general.add_option('--default-search',
|
general.add_option(
|
||||||
dest='default_search', metavar='PREFIX',
|
'--default-search',
|
||||||
help='Use this prefix for unqualified URLs. For example "gvsearch2:" downloads two videos from google videos for youtube-dl "large apple". By default (with value "auto") youtube-dl guesses.')
|
dest='default_search', metavar='PREFIX',
|
||||||
|
help='Use this prefix for unqualified URLs. For example "gvsearch2:" downloads two videos from google videos for youtube-dl "large apple". By default (with value "auto") youtube-dl guesses.')
|
||||||
general.add_option(
|
general.add_option(
|
||||||
'--ignore-config',
|
'--ignore-config',
|
||||||
action='store_true',
|
action='store_true',
|
||||||
help='Do not read configuration files. When given in the global configuration file /etc/youtube-dl.conf: do not read the user configuration in ~/.config/youtube-dl.conf (%APPDATA%/youtube-dl/config.txt on Windows)')
|
help='Do not read configuration files. When given in the global configuration file /etc/youtube-dl.conf: do not read the user configuration in ~/.config/youtube-dl.conf (%APPDATA%/youtube-dl/config.txt on Windows)')
|
||||||
|
general.add_option(
|
||||||
|
'--encoding', dest='encoding', metavar='ENCODING',
|
||||||
|
help='Force the specified encoding (experimental)')
|
||||||
|
|
||||||
selection.add_option(
|
selection.add_option(
|
||||||
'--playlist-start',
|
'--playlist-start',
|
||||||
@ -542,8 +547,6 @@ def parseOpts(overrideArguments=None):
|
|||||||
write_string(u'[debug] System config: ' + repr(_hide_login_info(systemConf)) + '\n')
|
write_string(u'[debug] System config: ' + repr(_hide_login_info(systemConf)) + '\n')
|
||||||
write_string(u'[debug] User config: ' + repr(_hide_login_info(userConf)) + '\n')
|
write_string(u'[debug] User config: ' + repr(_hide_login_info(userConf)) + '\n')
|
||||||
write_string(u'[debug] Command-line args: ' + repr(_hide_login_info(commandLineConf)) + '\n')
|
write_string(u'[debug] Command-line args: ' + repr(_hide_login_info(commandLineConf)) + '\n')
|
||||||
write_string(u'[debug] Encodings: locale %r, fs %r, out %r, pref: %r\n' %
|
|
||||||
(locale.getpreferredencoding(), sys.getfilesystemencoding(), sys.stdout.encoding, preferredencoding()))
|
|
||||||
|
|
||||||
return parser, opts, args
|
return parser, opts, args
|
||||||
|
|
||||||
@ -677,7 +680,7 @@ def _real_main(argv=None):
|
|||||||
date = DateRange.day(opts.date)
|
date = DateRange.day(opts.date)
|
||||||
else:
|
else:
|
||||||
date = DateRange(opts.dateafter, opts.datebefore)
|
date = DateRange(opts.dateafter, opts.datebefore)
|
||||||
if opts.default_search not in ('auto', None) and ':' not in opts.default_search:
|
if opts.default_search not in ('auto', 'auto_warning', None) and ':' not in opts.default_search:
|
||||||
parser.error(u'--default-search invalid; did you forget a colon (:) at the end?')
|
parser.error(u'--default-search invalid; did you forget a colon (:) at the end?')
|
||||||
|
|
||||||
# Do not download videos when there are audio-only formats
|
# Do not download videos when there are audio-only formats
|
||||||
@ -789,6 +792,7 @@ def _real_main(argv=None):
|
|||||||
'include_ads': opts.include_ads,
|
'include_ads': opts.include_ads,
|
||||||
'default_search': opts.default_search,
|
'default_search': opts.default_search,
|
||||||
'youtube_include_dash_manifest': opts.youtube_include_dash_manifest,
|
'youtube_include_dash_manifest': opts.youtube_include_dash_manifest,
|
||||||
|
'encoding': opts.encoding,
|
||||||
}
|
}
|
||||||
|
|
||||||
with YoutubeDL(ydl_opts) as ydl:
|
with YoutubeDL(ydl_opts) as ydl:
|
||||||
|
@ -156,6 +156,7 @@ from .mtv import (
|
|||||||
MTVIE,
|
MTVIE,
|
||||||
MTVIggyIE,
|
MTVIggyIE,
|
||||||
)
|
)
|
||||||
|
from .musicplayon import MusicPlayOnIE
|
||||||
from .muzu import MuzuTVIE
|
from .muzu import MuzuTVIE
|
||||||
from .myspace import MySpaceIE
|
from .myspace import MySpaceIE
|
||||||
from .myspass import MySpassIE
|
from .myspass import MySpassIE
|
||||||
@ -285,7 +286,10 @@ from .vk import VKIE
|
|||||||
from .vube import VubeIE
|
from .vube import VubeIE
|
||||||
from .washingtonpost import WashingtonPostIE
|
from .washingtonpost import WashingtonPostIE
|
||||||
from .wat import WatIE
|
from .wat import WatIE
|
||||||
from .wdr import WDRIE
|
from .wdr import (
|
||||||
|
WDRIE,
|
||||||
|
WDRMausIE,
|
||||||
|
)
|
||||||
from .weibo import WeiboIE
|
from .weibo import WeiboIE
|
||||||
from .wimp import WimpIE
|
from .wimp import WimpIE
|
||||||
from .wistia import WistiaIE
|
from .wistia import WistiaIE
|
||||||
|
@ -6,7 +6,6 @@ import json
|
|||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
compat_urlparse,
|
compat_urlparse,
|
||||||
determine_ext,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@ -1,22 +1,21 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from .ooyala import OoyalaIE
|
|
||||||
|
|
||||||
|
|
||||||
class BloombergIE(InfoExtractor):
|
class BloombergIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://www\.bloomberg\.com/video/(?P<name>.+?)\.html'
|
_VALID_URL = r'https?://www\.bloomberg\.com/video/(?P<name>.+?)\.html'
|
||||||
|
|
||||||
_TEST = {
|
_TEST = {
|
||||||
u'url': u'http://www.bloomberg.com/video/shah-s-presentation-on-foreign-exchange-strategies-qurhIVlJSB6hzkVi229d8g.html',
|
'url': 'http://www.bloomberg.com/video/shah-s-presentation-on-foreign-exchange-strategies-qurhIVlJSB6hzkVi229d8g.html',
|
||||||
u'file': u'12bzhqZTqQHmmlA8I-i0NpzJgcG5NNYX.mp4',
|
'md5': '7bf08858ff7c203c870e8a6190e221e5',
|
||||||
u'info_dict': {
|
'info_dict': {
|
||||||
u'title': u'Shah\'s Presentation on Foreign-Exchange Strategies',
|
'id': 'qurhIVlJSB6hzkVi229d8g',
|
||||||
u'description': u'md5:abc86e5236f9f0e4866c59ad36736686',
|
'ext': 'flv',
|
||||||
},
|
'title': 'Shah\'s Presentation on Foreign-Exchange Strategies',
|
||||||
u'params': {
|
'description': 'md5:0681e0d30dcdfc6abf34594961d8ea88',
|
||||||
# Requires ffmpeg (m3u8 manifest)
|
|
||||||
u'skip_download': True,
|
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -24,7 +23,16 @@ class BloombergIE(InfoExtractor):
|
|||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
name = mobj.group('name')
|
name = mobj.group('name')
|
||||||
webpage = self._download_webpage(url, name)
|
webpage = self._download_webpage(url, name)
|
||||||
embed_code = self._search_regex(
|
f4m_url = self._search_regex(
|
||||||
r'<source src="https?://[^/]+/[^/]+/[^/]+/([^/]+)', webpage,
|
r'<source src="(https?://[^"]+\.f4m.*?)"', webpage,
|
||||||
'embed code')
|
'f4m url')
|
||||||
return OoyalaIE._build_url_result(embed_code)
|
title = re.sub(': Video$', '', self._og_search_title(webpage))
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': name.split('-')[-1],
|
||||||
|
'title': title,
|
||||||
|
'url': f4m_url,
|
||||||
|
'ext': 'flv',
|
||||||
|
'description': self._og_search_description(webpage),
|
||||||
|
'thumbnail': self._og_search_thumbnail(webpage),
|
||||||
|
}
|
||||||
|
@ -87,7 +87,7 @@ class BrightcoveIE(InfoExtractor):
|
|||||||
object_str = object_str.replace('<--', '<!--')
|
object_str = object_str.replace('<--', '<!--')
|
||||||
object_str = fix_xml_ampersands(object_str)
|
object_str = fix_xml_ampersands(object_str)
|
||||||
|
|
||||||
object_doc = xml.etree.ElementTree.fromstring(object_str)
|
object_doc = xml.etree.ElementTree.fromstring(object_str.encode('utf-8'))
|
||||||
|
|
||||||
fv_el = find_xpath_attr(object_doc, './param', 'name', 'flashVars')
|
fv_el = find_xpath_attr(object_doc, './param', 'name', 'flashVars')
|
||||||
if fv_el is not None:
|
if fv_el is not None:
|
||||||
|
@ -43,11 +43,13 @@ class ComedyCentralShowsIE(InfoExtractor):
|
|||||||
(?P<showname>thedailyshow|thecolbertreport)\.(?:cc\.)?com/
|
(?P<showname>thedailyshow|thecolbertreport)\.(?:cc\.)?com/
|
||||||
(full-episodes/(?P<episode>.*)|
|
(full-episodes/(?P<episode>.*)|
|
||||||
(?P<clip>
|
(?P<clip>
|
||||||
(the-colbert-report-(videos|collections)/(?P<clipID>[0-9]+)/[^/]*/(?P<cntitle>.*?))
|
(?:videos/[^/]+/(?P<videotitle>[^/?#]+))
|
||||||
|(watch/(?P<date>[^/]*)/(?P<tdstitle>.*)))|
|
|(the-colbert-report-(videos|collections)/(?P<clipID>[0-9]+)/[^/]*/(?P<cntitle>.*?))
|
||||||
|
|(watch/(?P<date>[^/]*)/(?P<tdstitle>.*))
|
||||||
|
)|
|
||||||
(?P<interview>
|
(?P<interview>
|
||||||
extended-interviews/(?P<interID>[0-9a-z]+)/(?:playlist_tds_extended_)?(?P<interview_title>.*?)(/.*?)?)))
|
extended-interviews/(?P<interID>[0-9a-z]+)/(?:playlist_tds_extended_)?(?P<interview_title>.*?)(/.*?)?)))
|
||||||
$'''
|
(?:[?#].*|$)'''
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://thedailyshow.cc.com/watch/thu-december-13-2012/kristen-stewart',
|
'url': 'http://thedailyshow.cc.com/watch/thu-december-13-2012/kristen-stewart',
|
||||||
'md5': '4e2f5cb088a83cd8cdb7756132f9739d',
|
'md5': '4e2f5cb088a83cd8cdb7756132f9739d',
|
||||||
@ -57,7 +59,7 @@ class ComedyCentralShowsIE(InfoExtractor):
|
|||||||
'upload_date': '20121213',
|
'upload_date': '20121213',
|
||||||
'description': 'Kristen Stewart learns to let loose in "On the Road."',
|
'description': 'Kristen Stewart learns to let loose in "On the Road."',
|
||||||
'uploader': 'thedailyshow',
|
'uploader': 'thedailyshow',
|
||||||
'title': 'thedailyshow-kristen-stewart part 1',
|
'title': 'thedailyshow kristen-stewart part 1',
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -102,7 +104,9 @@ class ComedyCentralShowsIE(InfoExtractor):
|
|||||||
assert mobj is not None
|
assert mobj is not None
|
||||||
|
|
||||||
if mobj.group('clip'):
|
if mobj.group('clip'):
|
||||||
if mobj.group('showname') == 'thedailyshow':
|
if mobj.group('videotitle'):
|
||||||
|
epTitle = mobj.group('videotitle')
|
||||||
|
elif mobj.group('showname') == 'thedailyshow':
|
||||||
epTitle = mobj.group('tdstitle')
|
epTitle = mobj.group('tdstitle')
|
||||||
else:
|
else:
|
||||||
epTitle = mobj.group('cntitle')
|
epTitle = mobj.group('cntitle')
|
||||||
@ -161,7 +165,7 @@ class ComedyCentralShowsIE(InfoExtractor):
|
|||||||
content = itemEl.find('.//{http://search.yahoo.com/mrss/}content')
|
content = itemEl.find('.//{http://search.yahoo.com/mrss/}content')
|
||||||
duration = float_or_none(content.attrib.get('duration'))
|
duration = float_or_none(content.attrib.get('duration'))
|
||||||
mediagen_url = content.attrib['url']
|
mediagen_url = content.attrib['url']
|
||||||
guid = itemEl.find('.//guid').text.rpartition(':')[-1]
|
guid = itemEl.find('./guid').text.rpartition(':')[-1]
|
||||||
|
|
||||||
cdoc = self._download_xml(
|
cdoc = self._download_xml(
|
||||||
mediagen_url, epTitle,
|
mediagen_url, epTitle,
|
||||||
|
@ -10,9 +10,10 @@ class DiscoveryIE(InfoExtractor):
|
|||||||
_VALID_URL = r'http://dsc\.discovery\.com\/[a-zA-Z0-9\-]*/[a-zA-Z0-9\-]*/videos/(?P<id>[a-zA-Z0-9\-]*)(.htm)?'
|
_VALID_URL = r'http://dsc\.discovery\.com\/[a-zA-Z0-9\-]*/[a-zA-Z0-9\-]*/videos/(?P<id>[a-zA-Z0-9\-]*)(.htm)?'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://dsc.discovery.com/tv-shows/mythbusters/videos/mission-impossible-outtakes.htm',
|
'url': 'http://dsc.discovery.com/tv-shows/mythbusters/videos/mission-impossible-outtakes.htm',
|
||||||
'file': '614784.mp4',
|
|
||||||
'md5': 'e12614f9ee303a6ccef415cb0793eba2',
|
'md5': 'e12614f9ee303a6ccef415cb0793eba2',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
'id': '614784',
|
||||||
|
'ext': 'mp4',
|
||||||
'title': 'MythBusters: Mission Impossible Outtakes',
|
'title': 'MythBusters: Mission Impossible Outtakes',
|
||||||
'description': ('Watch Jamie Hyneman and Adam Savage practice being'
|
'description': ('Watch Jamie Hyneman and Adam Savage practice being'
|
||||||
' each other -- to the point of confusing Jamie\'s dog -- and '
|
' each other -- to the point of confusing Jamie\'s dog -- and '
|
||||||
@ -34,7 +35,7 @@ class DiscoveryIE(InfoExtractor):
|
|||||||
formats = []
|
formats = []
|
||||||
for f in info['mp4']:
|
for f in info['mp4']:
|
||||||
formats.append(
|
formats.append(
|
||||||
{'url': f['src'], r'ext': r'mp4', 'tbr': int(f['bitrate'][:-1])})
|
{'url': f['src'], 'ext': 'mp4', 'tbr': int(f['bitrate'][:-1])})
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': info['contentId'],
|
'id': info['contentId'],
|
||||||
|
@ -82,6 +82,17 @@ class GenericIE(InfoExtractor):
|
|||||||
},
|
},
|
||||||
'add_ie': ['Brightcove'],
|
'add_ie': ['Brightcove'],
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
'url': 'http://www.championat.com/video/football/v/87/87499.html',
|
||||||
|
'md5': 'fb973ecf6e4a78a67453647444222983',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '3414141473001',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Видео. Удаление Дзагоева (ЦСКА)',
|
||||||
|
'description': 'Онлайн-трансляция матча ЦСКА - "Волга"',
|
||||||
|
'uploader': 'Championat',
|
||||||
|
},
|
||||||
|
},
|
||||||
# Direct link to a video
|
# Direct link to a video
|
||||||
{
|
{
|
||||||
'url': 'http://media.w3.org/2010/05/sintel/trailer.mp4',
|
'url': 'http://media.w3.org/2010/05/sintel/trailer.mp4',
|
||||||
@ -316,13 +327,16 @@ class GenericIE(InfoExtractor):
|
|||||||
if not parsed_url.scheme:
|
if not parsed_url.scheme:
|
||||||
default_search = self._downloader.params.get('default_search')
|
default_search = self._downloader.params.get('default_search')
|
||||||
if default_search is None:
|
if default_search is None:
|
||||||
default_search = 'auto'
|
default_search = 'auto_warning'
|
||||||
|
|
||||||
if default_search == 'auto':
|
if default_search in ('auto', 'auto_warning'):
|
||||||
if '/' in url:
|
if '/' in url:
|
||||||
self._downloader.report_warning('The url doesn\'t specify the protocol, trying with http')
|
self._downloader.report_warning('The url doesn\'t specify the protocol, trying with http')
|
||||||
return self.url_result('http://' + url)
|
return self.url_result('http://' + url)
|
||||||
else:
|
else:
|
||||||
|
if default_search == 'auto_warning':
|
||||||
|
self._downloader.report_warning(
|
||||||
|
'Falling back to youtube search for %s . Set --default-search to "auto" to suppress this warning.' % url)
|
||||||
return self.url_result('ytsearch:' + url)
|
return self.url_result('ytsearch:' + url)
|
||||||
else:
|
else:
|
||||||
assert ':' in default_search
|
assert ':' in default_search
|
||||||
|
@ -21,9 +21,10 @@ class HuffPostIE(InfoExtractor):
|
|||||||
|
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://live.huffingtonpost.com/r/segment/legalese-it/52dd3e4b02a7602131000677',
|
'url': 'http://live.huffingtonpost.com/r/segment/legalese-it/52dd3e4b02a7602131000677',
|
||||||
'file': '52dd3e4b02a7602131000677.mp4',
|
|
||||||
'md5': '55f5e8981c1c80a64706a44b74833de8',
|
'md5': '55f5e8981c1c80a64706a44b74833de8',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
'id': '52dd3e4b02a7602131000677',
|
||||||
|
'ext': 'mp4',
|
||||||
'title': 'Legalese It! with @MikeSacksHP',
|
'title': 'Legalese It! with @MikeSacksHP',
|
||||||
'description': 'This week on Legalese It, Mike talks to David Bosco about his new book on the ICC, "Rough Justice," he also discusses the Virginia AG\'s historic stance on gay marriage, the execution of Edgar Tamayo, the ICC\'s delay of Kenya\'s President and more. ',
|
'description': 'This week on Legalese It, Mike talks to David Bosco about his new book on the ICC, "Rough Justice," he also discusses the Virginia AG\'s historic stance on gay marriage, the execution of Edgar Tamayo, the ICC\'s delay of Kenya\'s President and more. ',
|
||||||
'duration': 1549,
|
'duration': 1549,
|
||||||
|
@ -1,10 +1,8 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
import re
|
||||||
import json
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
|
||||||
determine_ext,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class IGNIE(InfoExtractor):
|
class IGNIE(InfoExtractor):
|
||||||
@ -14,52 +12,57 @@ class IGNIE(InfoExtractor):
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
_VALID_URL = r'https?://.+?\.ign\.com/(?P<type>videos|show_videos|articles|(?:[^/]*/feature))(/.+)?/(?P<name_or_id>.+)'
|
_VALID_URL = r'https?://.+?\.ign\.com/(?P<type>videos|show_videos|articles|(?:[^/]*/feature))(/.+)?/(?P<name_or_id>.+)'
|
||||||
IE_NAME = u'ign.com'
|
IE_NAME = 'ign.com'
|
||||||
|
|
||||||
_CONFIG_URL_TEMPLATE = 'http://www.ign.com/videos/configs/id/%s.config'
|
_CONFIG_URL_TEMPLATE = 'http://www.ign.com/videos/configs/id/%s.config'
|
||||||
_DESCRIPTION_RE = [r'<span class="page-object-description">(.+?)</span>',
|
_DESCRIPTION_RE = [
|
||||||
r'id="my_show_video">.*?<p>(.*?)</p>',
|
r'<span class="page-object-description">(.+?)</span>',
|
||||||
]
|
r'id="my_show_video">.*?<p>(.*?)</p>',
|
||||||
|
]
|
||||||
|
|
||||||
_TESTS = [
|
_TESTS = [
|
||||||
{
|
{
|
||||||
u'url': u'http://www.ign.com/videos/2013/06/05/the-last-of-us-review',
|
'url': 'http://www.ign.com/videos/2013/06/05/the-last-of-us-review',
|
||||||
u'file': u'8f862beef863986b2785559b9e1aa599.mp4',
|
'md5': 'eac8bdc1890980122c3b66f14bdd02e9',
|
||||||
u'md5': u'eac8bdc1890980122c3b66f14bdd02e9',
|
'info_dict': {
|
||||||
u'info_dict': {
|
'id': '8f862beef863986b2785559b9e1aa599',
|
||||||
u'title': u'The Last of Us Review',
|
'ext': 'mp4',
|
||||||
u'description': u'md5:c8946d4260a4d43a00d5ae8ed998870c',
|
'title': 'The Last of Us Review',
|
||||||
|
'description': 'md5:c8946d4260a4d43a00d5ae8ed998870c',
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
u'url': u'http://me.ign.com/en/feature/15775/100-little-things-in-gta-5-that-will-blow-your-mind',
|
'url': 'http://me.ign.com/en/feature/15775/100-little-things-in-gta-5-that-will-blow-your-mind',
|
||||||
u'playlist': [
|
'playlist': [
|
||||||
{
|
{
|
||||||
u'file': u'5ebbd138523268b93c9141af17bec937.mp4',
|
'info_dict': {
|
||||||
u'info_dict': {
|
'id': '5ebbd138523268b93c9141af17bec937',
|
||||||
u'title': u'GTA 5 Video Review',
|
'ext': 'mp4',
|
||||||
u'description': u'Rockstar drops the mic on this generation of games. Watch our review of the masterly Grand Theft Auto V.',
|
'title': 'GTA 5 Video Review',
|
||||||
|
'description': 'Rockstar drops the mic on this generation of games. Watch our review of the masterly Grand Theft Auto V.',
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
u'file': u'638672ee848ae4ff108df2a296418ee2.mp4',
|
'info_dict': {
|
||||||
u'info_dict': {
|
'id': '638672ee848ae4ff108df2a296418ee2',
|
||||||
u'title': u'26 Twisted Moments from GTA 5 in Slow Motion',
|
'ext': 'mp4',
|
||||||
u'description': u'The twisted beauty of GTA 5 in stunning slow motion.',
|
'title': '26 Twisted Moments from GTA 5 in Slow Motion',
|
||||||
|
'description': 'The twisted beauty of GTA 5 in stunning slow motion.',
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
],
|
],
|
||||||
u'params': {
|
'params': {
|
||||||
u'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
]
|
]
|
||||||
|
|
||||||
def _find_video_id(self, webpage):
|
def _find_video_id(self, webpage):
|
||||||
res_id = [r'data-video-id="(.+?)"',
|
res_id = [
|
||||||
r'<object id="vid_(.+?)"',
|
r'data-video-id="(.+?)"',
|
||||||
r'<meta name="og:image" content=".*/(.+?)-(.+?)/.+.jpg"',
|
r'<object id="vid_(.+?)"',
|
||||||
]
|
r'<meta name="og:image" content=".*/(.+?)-(.+?)/.+.jpg"',
|
||||||
|
]
|
||||||
return self._search_regex(res_id, webpage, 'video id')
|
return self._search_regex(res_id, webpage, 'video id')
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
@ -68,7 +71,7 @@ class IGNIE(InfoExtractor):
|
|||||||
page_type = mobj.group('type')
|
page_type = mobj.group('type')
|
||||||
webpage = self._download_webpage(url, name_or_id)
|
webpage = self._download_webpage(url, name_or_id)
|
||||||
if page_type == 'articles':
|
if page_type == 'articles':
|
||||||
video_url = self._search_regex(r'var videoUrl = "(.+?)"', webpage, u'video url')
|
video_url = self._search_regex(r'var videoUrl = "(.+?)"', webpage, 'video url')
|
||||||
return self.url_result(video_url, ie='IGN')
|
return self.url_result(video_url, ie='IGN')
|
||||||
elif page_type != 'video':
|
elif page_type != 'video':
|
||||||
multiple_urls = re.findall(
|
multiple_urls = re.findall(
|
||||||
@ -80,41 +83,37 @@ class IGNIE(InfoExtractor):
|
|||||||
video_id = self._find_video_id(webpage)
|
video_id = self._find_video_id(webpage)
|
||||||
result = self._get_video_info(video_id)
|
result = self._get_video_info(video_id)
|
||||||
description = self._html_search_regex(self._DESCRIPTION_RE,
|
description = self._html_search_regex(self._DESCRIPTION_RE,
|
||||||
webpage, 'video description',
|
webpage, 'video description', flags=re.DOTALL)
|
||||||
flags=re.DOTALL)
|
|
||||||
result['description'] = description
|
result['description'] = description
|
||||||
return result
|
return result
|
||||||
|
|
||||||
def _get_video_info(self, video_id):
|
def _get_video_info(self, video_id):
|
||||||
config_url = self._CONFIG_URL_TEMPLATE % video_id
|
config_url = self._CONFIG_URL_TEMPLATE % video_id
|
||||||
config = json.loads(self._download_webpage(config_url, video_id,
|
config = self._download_json(config_url, video_id)
|
||||||
u'Downloading video info'))
|
|
||||||
media = config['playlist']['media']
|
media = config['playlist']['media']
|
||||||
video_url = media['url']
|
|
||||||
|
|
||||||
return {'id': media['metadata']['videoId'],
|
return {
|
||||||
'url': video_url,
|
'id': media['metadata']['videoId'],
|
||||||
'ext': determine_ext(video_url),
|
'url': media['url'],
|
||||||
'title': media['metadata']['title'],
|
'title': media['metadata']['title'],
|
||||||
'thumbnail': media['poster'][0]['url'].replace('{size}', 'grande'),
|
'thumbnail': media['poster'][0]['url'].replace('{size}', 'grande'),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
class OneUPIE(IGNIE):
|
class OneUPIE(IGNIE):
|
||||||
"""Extractor for 1up.com, it uses the ign videos system."""
|
|
||||||
|
|
||||||
_VALID_URL = r'https?://gamevideos\.1up\.com/(?P<type>video)/id/(?P<name_or_id>.+)'
|
_VALID_URL = r'https?://gamevideos\.1up\.com/(?P<type>video)/id/(?P<name_or_id>.+)'
|
||||||
IE_NAME = '1up.com'
|
IE_NAME = '1up.com'
|
||||||
|
|
||||||
_DESCRIPTION_RE = r'<div id="vid_summary">(.+?)</div>'
|
_DESCRIPTION_RE = r'<div id="vid_summary">(.+?)</div>'
|
||||||
|
|
||||||
_TEST = {
|
_TEST = {
|
||||||
u'url': u'http://gamevideos.1up.com/video/id/34976',
|
'url': 'http://gamevideos.1up.com/video/id/34976',
|
||||||
u'file': u'34976.mp4',
|
'md5': '68a54ce4ebc772e4b71e3123d413163d',
|
||||||
u'md5': u'68a54ce4ebc772e4b71e3123d413163d',
|
'info_dict': {
|
||||||
u'info_dict': {
|
'id': '34976',
|
||||||
u'title': u'Sniper Elite V2 - Trailer',
|
'ext': 'mp4',
|
||||||
u'description': u'md5:5d289b722f5a6d940ca3136e9dae89cf',
|
'title': 'Sniper Elite V2 - Trailer',
|
||||||
|
'description': 'md5:5d289b722f5a6d940ca3136e9dae89cf',
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -123,7 +122,6 @@ class OneUPIE(IGNIE):
|
|||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
id = mobj.group('name_or_id')
|
|
||||||
result = super(OneUPIE, self)._real_extract(url)
|
result = super(OneUPIE, self)._real_extract(url)
|
||||||
result['id'] = id
|
result['id'] = mobj.group('name_or_id')
|
||||||
return result
|
return result
|
||||||
|
@ -1,37 +1,39 @@
|
|||||||
|
# encoding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
|
||||||
|
|
||||||
class KickStarterIE(InfoExtractor):
|
class KickStarterIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://www\.kickstarter\.com/projects/(?P<id>\d*)/.*'
|
_VALID_URL = r'https?://www\.kickstarter\.com/projects/(?P<id>[^/]*)/.*'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
u"url": u"https://www.kickstarter.com/projects/1404461844/intersection-the-story-of-josh-grant?ref=home_location",
|
'url': 'https://www.kickstarter.com/projects/1404461844/intersection-the-story-of-josh-grant?ref=home_location',
|
||||||
u"file": u"1404461844.mp4",
|
'md5': 'c81addca81327ffa66c642b5d8b08cab',
|
||||||
u"md5": u"c81addca81327ffa66c642b5d8b08cab",
|
'info_dict': {
|
||||||
u"info_dict": {
|
'id': '1404461844',
|
||||||
u"title": u"Intersection: The Story of Josh Grant by Kyle Cowling",
|
'ext': 'mp4',
|
||||||
|
'title': 'Intersection: The Story of Josh Grant by Kyle Cowling',
|
||||||
|
'description': 'A unique motocross documentary that examines the '
|
||||||
|
'life and mind of one of sports most elite athletes: Josh Grant.',
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
m = re.match(self._VALID_URL, url)
|
m = re.match(self._VALID_URL, url)
|
||||||
video_id = m.group('id')
|
video_id = m.group('id')
|
||||||
webpage_src = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
video_url = self._search_regex(r'data-video="(.*?)">',
|
video_url = self._search_regex(r'data-video-url="(.*?)"',
|
||||||
webpage_src, u'video URL')
|
webpage, 'video URL')
|
||||||
if 'mp4' in video_url:
|
video_title = self._html_search_regex(r'<title>(.*?)</title>',
|
||||||
ext = 'mp4'
|
webpage, 'title').rpartition('— Kickstarter')[0].strip()
|
||||||
else:
|
|
||||||
ext = 'flv'
|
|
||||||
video_title = self._html_search_regex(r"<title>(.*?)</title>",
|
|
||||||
webpage_src, u'title').rpartition(u'\u2014 Kickstarter')[0].strip()
|
|
||||||
|
|
||||||
results = [{
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'url': video_url,
|
'url': video_url,
|
||||||
'title': video_title,
|
'title': video_title,
|
||||||
'ext': ext,
|
'description': self._og_search_description(webpage),
|
||||||
}]
|
'thumbnail': self._og_search_thumbnail(webpage),
|
||||||
return results
|
}
|
||||||
|
@ -13,8 +13,9 @@ class MetacriticIE(InfoExtractor):
|
|||||||
|
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://www.metacritic.com/game/playstation-4/infamous-second-son/trailers/3698222',
|
'url': 'http://www.metacritic.com/game/playstation-4/infamous-second-son/trailers/3698222',
|
||||||
'file': '3698222.mp4',
|
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
'id': '3698222',
|
||||||
|
'ext': 'mp4',
|
||||||
'title': 'inFamous: Second Son - inSide Sucker Punch: Smoke & Mirrors',
|
'title': 'inFamous: Second Son - inSide Sucker Punch: Smoke & Mirrors',
|
||||||
'description': 'Take a peak behind-the-scenes to see how Sucker Punch brings smoke into the universe of inFAMOUS Second Son on the PS4.',
|
'description': 'Take a peak behind-the-scenes to see how Sucker Punch brings smoke into the universe of inFAMOUS Second Son on the PS4.',
|
||||||
'duration': 221,
|
'duration': 221,
|
||||||
|
75
youtube_dl/extractor/musicplayon.py
Normal file
75
youtube_dl/extractor/musicplayon.py
Normal file
@ -0,0 +1,75 @@
|
|||||||
|
# encoding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import int_or_none
|
||||||
|
|
||||||
|
|
||||||
|
class MusicPlayOnIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:.+?\.)?musicplayon\.com/play(?:-touch)?\?(?:v|pl=100&play)=(?P<id>\d+)'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://en.musicplayon.com/play?v=433377',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '433377',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Rick Ross - Interview On Chelsea Lately (2014)',
|
||||||
|
'description': 'Rick Ross Interview On Chelsea Lately',
|
||||||
|
'duration': 342,
|
||||||
|
'uploader': 'ultrafish',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
# m3u8 download
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
video_id = mobj.group('id')
|
||||||
|
|
||||||
|
page = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
|
title = self._og_search_title(page)
|
||||||
|
description = self._og_search_description(page)
|
||||||
|
thumbnail = self._og_search_thumbnail(page)
|
||||||
|
duration = self._html_search_meta('video:duration', page, 'duration', fatal=False)
|
||||||
|
view_count = self._og_search_property('count', page, fatal=False)
|
||||||
|
uploader = self._html_search_regex(
|
||||||
|
r'<div>by <a href="[^"]+" class="purple">([^<]+)</a></div>', page, 'uploader', fatal=False)
|
||||||
|
|
||||||
|
formats = [
|
||||||
|
{
|
||||||
|
'url': 'http://media0-eu-nl.musicplayon.com/stream-mobile?id=%s&type=.mp4' % video_id,
|
||||||
|
'ext': 'mp4',
|
||||||
|
}
|
||||||
|
]
|
||||||
|
|
||||||
|
manifest = self._download_webpage(
|
||||||
|
'http://en.musicplayon.com/manifest.m3u8?v=%s' % video_id, video_id, 'Downloading manifest')
|
||||||
|
|
||||||
|
for entry in manifest.split('#')[1:]:
|
||||||
|
if entry.startswith('EXT-X-STREAM-INF:'):
|
||||||
|
meta, url, _ = entry.split('\n')
|
||||||
|
params = dict(param.split('=') for param in meta.split(',')[1:])
|
||||||
|
formats.append({
|
||||||
|
'url': url,
|
||||||
|
'ext': 'mp4',
|
||||||
|
'tbr': int(params['BANDWIDTH']),
|
||||||
|
'width': int(params['RESOLUTION'].split('x')[1]),
|
||||||
|
'height': int(params['RESOLUTION'].split('x')[-1]),
|
||||||
|
'format_note': params['NAME'].replace('"', '').strip(),
|
||||||
|
})
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'description': description,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'uploader': uploader,
|
||||||
|
'duration': int_or_none(duration),
|
||||||
|
'view_count': int_or_none(view_count),
|
||||||
|
'formats': formats,
|
||||||
|
}
|
@ -6,12 +6,13 @@ from .common import InfoExtractor
|
|||||||
|
|
||||||
|
|
||||||
class NBAIE(InfoExtractor):
|
class NBAIE(InfoExtractor):
|
||||||
_VALID_URL = r'^(?:https?://)?(?:watch\.|www\.)?nba\.com/(?:nba/)?video(/[^?]*?)(?:/index\.html)?(?:\?.*)?$'
|
_VALID_URL = r'https?://(?:watch\.|www\.)?nba\.com/(?:nba/)?video(?P<id>/[^?]*?)(?:/index\.html)?(?:\?.*)?$'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://www.nba.com/video/games/nets/2012/12/04/0021200253-okc-bkn-recap.nba/index.html',
|
'url': 'http://www.nba.com/video/games/nets/2012/12/04/0021200253-okc-bkn-recap.nba/index.html',
|
||||||
'file': u'0021200253-okc-bkn-recap.nba.mp4',
|
|
||||||
'md5': u'c0edcfc37607344e2ff8f13c378c88a4',
|
'md5': u'c0edcfc37607344e2ff8f13c378c88a4',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
'id': '0021200253-okc-bkn-recap.nba',
|
||||||
|
'ext': 'mp4',
|
||||||
'description': 'Kevin Durant scores 32 points and dishes out six assists as the Thunder beat the Nets in Brooklyn.',
|
'description': 'Kevin Durant scores 32 points and dishes out six assists as the Thunder beat the Nets in Brooklyn.',
|
||||||
'title': 'Thunder vs. Nets',
|
'title': 'Thunder vs. Nets',
|
||||||
},
|
},
|
||||||
@ -19,7 +20,7 @@ class NBAIE(InfoExtractor):
|
|||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
video_id = mobj.group(1)
|
video_id = mobj.group('id')
|
||||||
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
@ -33,7 +34,6 @@ class NBAIE(InfoExtractor):
|
|||||||
return {
|
return {
|
||||||
'id': shortened_video_id,
|
'id': shortened_video_id,
|
||||||
'url': video_url,
|
'url': video_url,
|
||||||
'ext': 'mp4',
|
|
||||||
'title': title,
|
'title': title,
|
||||||
'description': description,
|
'description': description,
|
||||||
}
|
}
|
||||||
|
@ -5,7 +5,7 @@ import re
|
|||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
RegexNotFoundError,
|
ExtractorError,
|
||||||
unescapeHTML
|
unescapeHTML
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -98,16 +98,15 @@ class NTVIE(InfoExtractor):
|
|||||||
|
|
||||||
page = self._download_webpage(url, video_id, 'Downloading page')
|
page = self._download_webpage(url, video_id, 'Downloading page')
|
||||||
|
|
||||||
def extract(patterns, name, page, fatal=False):
|
for pattern in self._VIDEO_ID_REGEXES:
|
||||||
for pattern in patterns:
|
mobj = re.search(pattern, page)
|
||||||
mobj = re.search(pattern, page)
|
if mobj:
|
||||||
if mobj:
|
break
|
||||||
return mobj.group(1)
|
|
||||||
if fatal:
|
|
||||||
raise RegexNotFoundError(u'Unable to extract %s' % name)
|
|
||||||
return None
|
|
||||||
|
|
||||||
video_id = extract(self._VIDEO_ID_REGEXES, 'video id', page, fatal=True)
|
if not mobj:
|
||||||
|
raise ExtractorError('No media links available for %s' % video_id)
|
||||||
|
|
||||||
|
video_id = mobj.group(1)
|
||||||
|
|
||||||
player = self._download_xml('http://www.ntv.ru/vi%s/' % video_id, video_id, 'Downloading video XML')
|
player = self._download_xml('http://www.ntv.ru/vi%s/' % video_id, video_id, 'Downloading video XML')
|
||||||
title = unescapeHTML(player.find('./data/title').text)
|
title = unescapeHTML(player.find('./data/title').text)
|
||||||
|
@ -1,3 +1,5 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
import re
|
||||||
import os
|
import os
|
||||||
|
|
||||||
@ -5,45 +7,50 @@ from .common import InfoExtractor
|
|||||||
|
|
||||||
|
|
||||||
class PyvideoIE(InfoExtractor):
|
class PyvideoIE(InfoExtractor):
|
||||||
_VALID_URL = r'(?:http://)?(?:www\.)?pyvideo\.org/video/(?P<id>\d+)/(.*)'
|
_VALID_URL = r'http://(?:www\.)?pyvideo\.org/video/(?P<id>\d+)/(.*)'
|
||||||
_TESTS = [{
|
|
||||||
u'url': u'http://pyvideo.org/video/1737/become-a-logging-expert-in-30-minutes',
|
_TESTS = [
|
||||||
u'file': u'24_4WWkSmNo.mp4',
|
{
|
||||||
u'md5': u'de317418c8bc76b1fd8633e4f32acbc6',
|
'url': 'http://pyvideo.org/video/1737/become-a-logging-expert-in-30-minutes',
|
||||||
u'info_dict': {
|
'md5': 'de317418c8bc76b1fd8633e4f32acbc6',
|
||||||
u"title": u"Become a logging expert in 30 minutes",
|
'info_dict': {
|
||||||
u"description": u"md5:9665350d466c67fb5b1598de379021f7",
|
'id': '24_4WWkSmNo',
|
||||||
u"upload_date": u"20130320",
|
'ext': 'mp4',
|
||||||
u"uploader": u"NextDayVideo",
|
'title': 'Become a logging expert in 30 minutes',
|
||||||
u"uploader_id": u"NextDayVideo",
|
'description': 'md5:9665350d466c67fb5b1598de379021f7',
|
||||||
|
'upload_date': '20130320',
|
||||||
|
'uploader': 'NextDayVideo',
|
||||||
|
'uploader_id': 'NextDayVideo',
|
||||||
|
},
|
||||||
|
'add_ie': ['Youtube'],
|
||||||
},
|
},
|
||||||
u'add_ie': ['Youtube'],
|
{
|
||||||
},
|
'url': 'http://pyvideo.org/video/2542/gloriajw-spotifywitherikbernhardsson182m4v',
|
||||||
{
|
'md5': '5fe1c7e0a8aa5570330784c847ff6d12',
|
||||||
u'url': u'http://pyvideo.org/video/2542/gloriajw-spotifywitherikbernhardsson182m4v',
|
'info_dict': {
|
||||||
u'md5': u'5fe1c7e0a8aa5570330784c847ff6d12',
|
'id': '2542',
|
||||||
u'info_dict': {
|
'ext': 'm4v',
|
||||||
u'id': u'2542',
|
'title': 'Gloriajw-SpotifyWithErikBernhardsson182',
|
||||||
u'ext': u'm4v',
|
},
|
||||||
u'title': u'Gloriajw-SpotifyWithErikBernhardsson182',
|
|
||||||
},
|
},
|
||||||
},
|
|
||||||
]
|
]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
video_id = mobj.group('id')
|
video_id = mobj.group('id')
|
||||||
webpage = self._download_webpage(url, video_id)
|
|
||||||
m_youtube = re.search(r'(https?://www\.youtube\.com/watch\?v=.*)', webpage)
|
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
|
m_youtube = re.search(r'(https?://www\.youtube\.com/watch\?v=.*)', webpage)
|
||||||
if m_youtube is not None:
|
if m_youtube is not None:
|
||||||
return self.url_result(m_youtube.group(1), 'Youtube')
|
return self.url_result(m_youtube.group(1), 'Youtube')
|
||||||
|
|
||||||
title = self._html_search_regex(r'<div class="section">.*?<h3>([^>]+?)</h3>',
|
title = self._html_search_regex(
|
||||||
webpage, u'title', flags=re.DOTALL)
|
r'<div class="section">.*?<h3>([^>]+?)</h3>', webpage, 'title', flags=re.DOTALL)
|
||||||
video_url = self._search_regex([r'<source src="(.*?)"',
|
video_url = self._search_regex(
|
||||||
r'<dt>Download</dt>.*?<a href="(.+?)"'],
|
[r'<source src="(.*?)"', r'<dt>Download</dt>.*?<a href="(.+?)"'],
|
||||||
webpage, u'video url', flags=re.DOTALL)
|
webpage, 'video url', flags=re.DOTALL)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': os.path.splitext(title)[0],
|
'title': os.path.splitext(title)[0],
|
||||||
|
@ -1,5 +1,6 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
import re
|
||||||
import json
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import unified_strdate, determine_ext
|
from ..utils import unified_strdate, determine_ext
|
||||||
@ -9,41 +10,44 @@ class RoxwelIE(InfoExtractor):
|
|||||||
_VALID_URL = r'https?://www\.roxwel\.com/player/(?P<filename>.+?)(\.|\?|$)'
|
_VALID_URL = r'https?://www\.roxwel\.com/player/(?P<filename>.+?)(\.|\?|$)'
|
||||||
|
|
||||||
_TEST = {
|
_TEST = {
|
||||||
u'url': u'http://www.roxwel.com/player/passionpittakeawalklive.html',
|
'url': 'http://www.roxwel.com/player/passionpittakeawalklive.html',
|
||||||
u'file': u'passionpittakeawalklive.flv',
|
'info_dict': {
|
||||||
u'md5': u'd9dea8360a1e7d485d2206db7fe13035',
|
'id': 'passionpittakeawalklive',
|
||||||
u'info_dict': {
|
'ext': 'flv',
|
||||||
u'title': u'Take A Walk (live)',
|
'title': 'Take A Walk (live)',
|
||||||
u'uploader': u'Passion Pit',
|
'uploader': 'Passion Pit',
|
||||||
u'description': u'Passion Pit performs "Take A Walk\" live at The Backyard in Austin, Texas. ',
|
'uploader_id': 'passionpit',
|
||||||
|
'upload_date': '20120928',
|
||||||
|
'description': 'Passion Pit performs "Take A Walk\" live at The Backyard in Austin, Texas. ',
|
||||||
},
|
},
|
||||||
u'skip': u'Requires rtmpdump',
|
'params': {
|
||||||
|
# rtmp download
|
||||||
|
'skip_download': True,
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
filename = mobj.group('filename')
|
filename = mobj.group('filename')
|
||||||
info_url = 'http://www.roxwel.com/api/videos/%s' % filename
|
info_url = 'http://www.roxwel.com/api/videos/%s' % filename
|
||||||
info_page = self._download_webpage(info_url, filename,
|
info = self._download_json(info_url, filename)
|
||||||
u'Downloading video info')
|
|
||||||
|
|
||||||
self.report_extraction(filename)
|
|
||||||
info = json.loads(info_page)
|
|
||||||
rtmp_rates = sorted([int(r.replace('flv_', '')) for r in info['media_rates'] if r.startswith('flv_')])
|
rtmp_rates = sorted([int(r.replace('flv_', '')) for r in info['media_rates'] if r.startswith('flv_')])
|
||||||
best_rate = rtmp_rates[-1]
|
best_rate = rtmp_rates[-1]
|
||||||
url_page_url = 'http://roxwel.com/pl_one_time.php?filename=%s&quality=%s' % (filename, best_rate)
|
url_page_url = 'http://roxwel.com/pl_one_time.php?filename=%s&quality=%s' % (filename, best_rate)
|
||||||
rtmp_url = self._download_webpage(url_page_url, filename, u'Downloading video url')
|
rtmp_url = self._download_webpage(url_page_url, filename, 'Downloading video url')
|
||||||
ext = determine_ext(rtmp_url)
|
ext = determine_ext(rtmp_url)
|
||||||
if ext == 'f4v':
|
if ext == 'f4v':
|
||||||
rtmp_url = rtmp_url.replace(filename, 'mp4:%s' % filename)
|
rtmp_url = rtmp_url.replace(filename, 'mp4:%s' % filename)
|
||||||
|
|
||||||
return {'id': filename,
|
return {
|
||||||
'title': info['title'],
|
'id': filename,
|
||||||
'url': rtmp_url,
|
'title': info['title'],
|
||||||
'ext': 'flv',
|
'url': rtmp_url,
|
||||||
'description': info['description'],
|
'ext': 'flv',
|
||||||
'thumbnail': info.get('player_image_url') or info.get('image_url_large'),
|
'description': info['description'],
|
||||||
'uploader': info['artist'],
|
'thumbnail': info.get('player_image_url') or info.get('image_url_large'),
|
||||||
'uploader_id': info['artistname'],
|
'uploader': info['artist'],
|
||||||
'upload_date': unified_strdate(info['dbdate']),
|
'uploader_id': info['artistname'],
|
||||||
}
|
'upload_date': unified_strdate(info['dbdate']),
|
||||||
|
}
|
||||||
|
@ -2,7 +2,6 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
import re
|
||||||
import json
|
|
||||||
import itertools
|
import itertools
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
@ -20,8 +19,9 @@ class RutubeIE(InfoExtractor):
|
|||||||
|
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://rutube.ru/video/3eac3b4561676c17df9132a9a1e62e3e/',
|
'url': 'http://rutube.ru/video/3eac3b4561676c17df9132a9a1e62e3e/',
|
||||||
'file': '3eac3b4561676c17df9132a9a1e62e3e.mp4',
|
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
'id': '3eac3b4561676c17df9132a9a1e62e3e',
|
||||||
|
'ext': 'mp4',
|
||||||
'title': 'Раненный кенгуру забежал в аптеку',
|
'title': 'Раненный кенгуру забежал в аптеку',
|
||||||
'description': 'http://www.ntdtv.ru ',
|
'description': 'http://www.ntdtv.ru ',
|
||||||
'duration': 80,
|
'duration': 80,
|
||||||
@ -38,15 +38,15 @@ class RutubeIE(InfoExtractor):
|
|||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
video_id = mobj.group('id')
|
video_id = mobj.group('id')
|
||||||
|
|
||||||
api_response = self._download_webpage('http://rutube.ru/api/video/%s/?format=json' % video_id,
|
video = self._download_json(
|
||||||
video_id, 'Downloading video JSON')
|
'http://rutube.ru/api/video/%s/?format=json' % video_id,
|
||||||
video = json.loads(api_response)
|
video_id, 'Downloading video JSON')
|
||||||
|
|
||||||
api_response = self._download_webpage('http://rutube.ru/api/play/trackinfo/%s/?format=json' % video_id,
|
trackinfo = self._download_json(
|
||||||
video_id, 'Downloading trackinfo JSON')
|
'http://rutube.ru/api/play/trackinfo/%s/?format=json' % video_id,
|
||||||
trackinfo = json.loads(api_response)
|
video_id, 'Downloading trackinfo JSON')
|
||||||
|
|
||||||
# Some videos don't have the author field
|
# Some videos don't have the author field
|
||||||
author = trackinfo.get('author') or {}
|
author = trackinfo.get('author') or {}
|
||||||
m3u8_url = trackinfo['video_balancer'].get('m3u8')
|
m3u8_url = trackinfo['video_balancer'].get('m3u8')
|
||||||
@ -79,10 +79,9 @@ class RutubeChannelIE(InfoExtractor):
|
|||||||
def _extract_videos(self, channel_id, channel_title=None):
|
def _extract_videos(self, channel_id, channel_title=None):
|
||||||
entries = []
|
entries = []
|
||||||
for pagenum in itertools.count(1):
|
for pagenum in itertools.count(1):
|
||||||
api_response = self._download_webpage(
|
page = self._download_json(
|
||||||
self._PAGE_TEMPLATE % (channel_id, pagenum),
|
self._PAGE_TEMPLATE % (channel_id, pagenum),
|
||||||
channel_id, 'Downloading page %s' % pagenum)
|
channel_id, 'Downloading page %s' % pagenum)
|
||||||
page = json.loads(api_response)
|
|
||||||
results = page['results']
|
results = page['results']
|
||||||
if not results:
|
if not results:
|
||||||
break
|
break
|
||||||
@ -108,10 +107,9 @@ class RutubeMovieIE(RutubeChannelIE):
|
|||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
movie_id = mobj.group('id')
|
movie_id = mobj.group('id')
|
||||||
api_response = self._download_webpage(
|
movie = self._download_json(
|
||||||
self._MOVIE_TEMPLATE % movie_id, movie_id,
|
self._MOVIE_TEMPLATE % movie_id, movie_id,
|
||||||
'Downloading movie JSON')
|
'Downloading movie JSON')
|
||||||
movie = json.loads(api_response)
|
|
||||||
movie_name = movie['name']
|
movie_name = movie['name']
|
||||||
return self._extract_videos(movie_id, movie_name)
|
return self._extract_videos(movie_id, movie_name)
|
||||||
|
|
||||||
|
@ -1,33 +1,37 @@
|
|||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import json
|
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
|
||||||
|
|
||||||
class TF1IE(InfoExtractor):
|
class TF1IE(InfoExtractor):
|
||||||
"""TF1 uses the wat.tv player."""
|
"""TF1 uses the wat.tv player."""
|
||||||
_VALID_URL = r'http://videos\.tf1\.fr/.*-(.*?)\.html'
|
_VALID_URL = r'http://videos\.tf1\.fr/.*-(?P<id>.*?)\.html'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
u'url': u'http://videos.tf1.fr/auto-moto/citroen-grand-c4-picasso-2013-presentation-officielle-8062060.html',
|
'url': 'http://videos.tf1.fr/auto-moto/citroen-grand-c4-picasso-2013-presentation-officielle-8062060.html',
|
||||||
u'file': u'10635995.mp4',
|
'info_dict': {
|
||||||
u'md5': u'2e378cc28b9957607d5e88f274e637d8',
|
'id': '10635995',
|
||||||
u'info_dict': {
|
'ext': 'mp4',
|
||||||
u'title': u'Citroën Grand C4 Picasso 2013 : présentation officielle',
|
'title': 'Citroën Grand C4 Picasso 2013 : présentation officielle',
|
||||||
u'description': u'Vidéo officielle du nouveau Citroën Grand C4 Picasso, lancé à l\'automne 2013.',
|
'description': 'Vidéo officielle du nouveau Citroën Grand C4 Picasso, lancé à l\'automne 2013.',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
# Sometimes wat serves the whole file with the --test option
|
||||||
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
u'skip': u'Sometimes wat serves the whole file with the --test option',
|
|
||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
id = mobj.group(1)
|
video_id = mobj.group('id')
|
||||||
webpage = self._download_webpage(url, id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
embed_url = self._html_search_regex(r'"(https://www.wat.tv/embedframe/.*?)"',
|
embed_url = self._html_search_regex(
|
||||||
webpage, 'embed url')
|
r'"(https://www.wat.tv/embedframe/.*?)"', webpage, 'embed url')
|
||||||
embed_page = self._download_webpage(embed_url, id, u'Downloading embed player page')
|
embed_page = self._download_webpage(embed_url, video_id,
|
||||||
|
'Downloading embed player page')
|
||||||
wat_id = self._search_regex(r'UVID=(.*?)&', embed_page, 'wat id')
|
wat_id = self._search_regex(r'UVID=(.*?)&', embed_page, 'wat id')
|
||||||
wat_info = self._download_webpage('http://www.wat.tv/interface/contentv3/%s' % wat_id, id, u'Downloading Wat info')
|
wat_info = self._download_json(
|
||||||
wat_info = json.loads(wat_info)['media']
|
'http://www.wat.tv/interface/contentv3/%s' % wat_id, video_id)
|
||||||
wat_url = wat_info['url']
|
return self.url_result(wat_info['media']['url'], 'Wat')
|
||||||
return self.url_result(wat_url, 'Wat')
|
|
||||||
|
@ -16,7 +16,7 @@ from ..utils import (
|
|||||||
|
|
||||||
class VKIE(InfoExtractor):
|
class VKIE(InfoExtractor):
|
||||||
IE_NAME = 'vk.com'
|
IE_NAME = 'vk.com'
|
||||||
_VALID_URL = r'https?://vk\.com/(?:video_ext\.php\?.*?\boid=(?P<oid>\d+).*?\bid=(?P<id>\d+)|(?:videos.*?\?.*?z=)?video(?P<videoid>.*?)(?:\?|%2F|$))'
|
_VALID_URL = r'https?://vk\.com/(?:video_ext\.php\?.*?\boid=(?P<oid>-?\d+).*?\bid=(?P<id>\d+)|(?:videos.*?\?.*?z=)?video(?P<videoid>.*?)(?:\?|%2F|$))'
|
||||||
_NETRC_MACHINE = 'vk'
|
_NETRC_MACHINE = 'vk'
|
||||||
|
|
||||||
_TESTS = [
|
_TESTS = [
|
||||||
|
@ -1,37 +1,37 @@
|
|||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import json
|
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
unified_strdate,
|
unified_strdate,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class WatIE(InfoExtractor):
|
class WatIE(InfoExtractor):
|
||||||
_VALID_URL=r'http://www\.wat\.tv/.*-(?P<shortID>.*?)_.*?\.html'
|
_VALID_URL = r'http://www\.wat\.tv/.*-(?P<shortID>.*?)_.*?\.html'
|
||||||
IE_NAME = 'wat.tv'
|
IE_NAME = 'wat.tv'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
u'url': u'http://www.wat.tv/video/world-war-philadelphia-vost-6bv55_2fjr7_.html',
|
'url': 'http://www.wat.tv/video/world-war-philadelphia-vost-6bv55_2fjr7_.html',
|
||||||
u'file': u'10631273.mp4',
|
'info_dict': {
|
||||||
u'md5': u'd8b2231e1e333acd12aad94b80937e19',
|
'id': '10631273',
|
||||||
u'info_dict': {
|
'ext': 'mp4',
|
||||||
u'title': u'World War Z - Philadelphia VOST',
|
'title': 'World War Z - Philadelphia VOST',
|
||||||
u'description': u'La menace est partout. Que se passe-t-il à Philadelphia ?\r\nWORLD WAR Z, avec Brad Pitt, au cinéma le 3 juillet.\r\nhttp://www.worldwarz.fr',
|
'description': 'La menace est partout. Que se passe-t-il à Philadelphia ?\r\nWORLD WAR Z, avec Brad Pitt, au cinéma le 3 juillet.\r\nhttp://www.worldwarz.fr',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
# Sometimes wat serves the whole file with the --test option
|
||||||
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
u'skip': u'Sometimes wat serves the whole file with the --test option',
|
|
||||||
}
|
}
|
||||||
|
|
||||||
def download_video_info(self, real_id):
|
def download_video_info(self, real_id):
|
||||||
# 'contentv4' is used in the website, but it also returns the related
|
# 'contentv4' is used in the website, but it also returns the related
|
||||||
# videos, we don't need them
|
# videos, we don't need them
|
||||||
info = self._download_webpage('http://www.wat.tv/interface/contentv3/' + real_id, real_id, 'Downloading video info')
|
info = self._download_json('http://www.wat.tv/interface/contentv3/' + real_id, real_id)
|
||||||
info = json.loads(info)
|
|
||||||
return info['media']
|
return info['media']
|
||||||
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
def real_id_for_chapter(chapter):
|
def real_id_for_chapter(chapter):
|
||||||
return chapter['tc_start'].split('-')[0]
|
return chapter['tc_start'].split('-')[0]
|
||||||
@ -56,17 +56,17 @@ class WatIE(InfoExtractor):
|
|||||||
entries = [self.url_result(chapter_url) for chapter_url in chapter_urls]
|
entries = [self.url_result(chapter_url) for chapter_url in chapter_urls]
|
||||||
return self.playlist_result(entries, real_id, video_info['title'])
|
return self.playlist_result(entries, real_id, video_info['title'])
|
||||||
|
|
||||||
|
upload_date = None
|
||||||
|
if 'date_diffusion' in first_chapter:
|
||||||
|
upload_date = unified_strdate(first_chapter['date_diffusion'])
|
||||||
# Otherwise we can continue and extract just one part, we have to use
|
# Otherwise we can continue and extract just one part, we have to use
|
||||||
# the short id for getting the video url
|
# the short id for getting the video url
|
||||||
info = {'id': real_id,
|
return {
|
||||||
'url': 'http://wat.tv/get/android5/%s.mp4' % real_id,
|
'id': real_id,
|
||||||
'ext': 'mp4',
|
'url': 'http://wat.tv/get/android5/%s.mp4' % real_id,
|
||||||
'title': first_chapter['title'],
|
'title': first_chapter['title'],
|
||||||
'thumbnail': first_chapter['preview'],
|
'thumbnail': first_chapter['preview'],
|
||||||
'description': first_chapter['description'],
|
'description': first_chapter['description'],
|
||||||
'view_count': video_info['views'],
|
'view_count': video_info['views'],
|
||||||
}
|
'upload_date': upload_date,
|
||||||
if 'date_diffusion' in first_chapter:
|
}
|
||||||
info['upload_date'] = unified_strdate(first_chapter['date_diffusion'])
|
|
||||||
|
|
||||||
return info
|
|
||||||
|
@ -4,9 +4,10 @@ import re
|
|||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
unified_strdate,
|
compat_parse_qs,
|
||||||
compat_urlparse,
|
compat_urlparse,
|
||||||
determine_ext,
|
determine_ext,
|
||||||
|
unified_strdate,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -111,4 +112,85 @@ class WDRIE(InfoExtractor):
|
|||||||
'description': description,
|
'description': description,
|
||||||
'thumbnail': thumbnail,
|
'thumbnail': thumbnail,
|
||||||
'upload_date': upload_date,
|
'upload_date': upload_date,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class WDRMausIE(InfoExtractor):
|
||||||
|
_VALID_URL = 'http://(?:www\.)?wdrmaus\.de/(?:[^/]+/){,2}(?P<id>[^/?#]+)(?:/index\.php5|(?<!index)\.php5|/(?:$|[?#]))'
|
||||||
|
IE_DESC = 'Sendung mit der Maus'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'http://www.wdrmaus.de/aktuelle-sendung/index.php5',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'aktuelle-sendung',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'thumbnail': 're:^http://.+\.jpg',
|
||||||
|
'upload_date': 're:^[0-9]{8}$',
|
||||||
|
'title': 're:^[0-9.]{10} - Aktuelle Sendung$',
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.wdrmaus.de/sachgeschichten/sachgeschichten/40_jahre_maus.php5',
|
||||||
|
'md5': '3b1227ca3ed28d73ec5737c65743b2a3',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '40_jahre_maus',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'thumbnail': 're:^http://.+\.jpg',
|
||||||
|
'upload_date': '20131007',
|
||||||
|
'title': '12.03.2011 - 40 Jahre Maus',
|
||||||
|
}
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
video_id = mobj.group('id')
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
param_code = self._html_search_regex(
|
||||||
|
r'<a href="\?startVideo=1&([^"]+)"', webpage, 'parameters')
|
||||||
|
|
||||||
|
title_date = self._search_regex(
|
||||||
|
r'<div class="sendedatum"><p>Sendedatum:\s*([0-9\.]+)</p>',
|
||||||
|
webpage, 'air date')
|
||||||
|
title_str = self._html_search_regex(
|
||||||
|
r'<h1>(.*?)</h1>', webpage, 'title')
|
||||||
|
title = '%s - %s' % (title_date, title_str)
|
||||||
|
upload_date = unified_strdate(
|
||||||
|
self._html_search_meta('dc.date', webpage))
|
||||||
|
|
||||||
|
fields = compat_parse_qs(param_code)
|
||||||
|
video_url = fields['firstVideo'][0]
|
||||||
|
thumbnail = compat_urlparse.urljoin(url, fields['startPicture'][0])
|
||||||
|
|
||||||
|
formats = [{
|
||||||
|
'format_id': 'rtmp',
|
||||||
|
'url': video_url,
|
||||||
|
}]
|
||||||
|
|
||||||
|
jscode = self._download_webpage(
|
||||||
|
'http://www.wdrmaus.de/codebase/js/extended-medien.min.js',
|
||||||
|
video_id, fatal=False,
|
||||||
|
note='Downloading URL translation table',
|
||||||
|
errnote='Could not download URL translation table')
|
||||||
|
if jscode:
|
||||||
|
for m in re.finditer(
|
||||||
|
r"stream:\s*'dslSrc=(?P<stream>[^']+)',\s*download:\s*'(?P<dl>[^']+)'\s*\}",
|
||||||
|
jscode):
|
||||||
|
if video_url.startswith(m.group('stream')):
|
||||||
|
http_url = video_url.replace(
|
||||||
|
m.group('stream'), m.group('dl'))
|
||||||
|
formats.append({
|
||||||
|
'format_id': 'http',
|
||||||
|
'url': http_url,
|
||||||
|
})
|
||||||
|
break
|
||||||
|
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'formats': formats,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'upload_date': upload_date,
|
||||||
|
}
|
||||||
|
|
||||||
|
# TODO test _1
|
@ -7,13 +7,13 @@ import itertools
|
|||||||
import json
|
import json
|
||||||
import os.path
|
import os.path
|
||||||
import re
|
import re
|
||||||
import string
|
|
||||||
import struct
|
import struct
|
||||||
import traceback
|
import traceback
|
||||||
import zlib
|
import zlib
|
||||||
|
|
||||||
from .common import InfoExtractor, SearchInfoExtractor
|
from .common import InfoExtractor, SearchInfoExtractor
|
||||||
from .subtitles import SubtitlesInfoExtractor
|
from .subtitles import SubtitlesInfoExtractor
|
||||||
|
from ..jsinterp import JSInterpreter
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
compat_chr,
|
compat_chr,
|
||||||
compat_parse_qs,
|
compat_parse_qs,
|
||||||
@ -438,113 +438,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
|||||||
def _parse_sig_js(self, jscode):
|
def _parse_sig_js(self, jscode):
|
||||||
funcname = self._search_regex(
|
funcname = self._search_regex(
|
||||||
r'signature=([a-zA-Z]+)', jscode,
|
r'signature=([a-zA-Z]+)', jscode,
|
||||||
u'Initial JS player signature function name')
|
u'Initial JS player signature function name')
|
||||||
|
|
||||||
functions = {}
|
jsi = JSInterpreter(jscode)
|
||||||
|
initial_function = jsi.extract_function(funcname)
|
||||||
def argidx(varname):
|
|
||||||
return string.lowercase.index(varname)
|
|
||||||
|
|
||||||
def interpret_statement(stmt, local_vars, allow_recursion=20):
|
|
||||||
if allow_recursion < 0:
|
|
||||||
raise ExtractorError(u'Recursion limit reached')
|
|
||||||
|
|
||||||
if stmt.startswith(u'var '):
|
|
||||||
stmt = stmt[len(u'var '):]
|
|
||||||
ass_m = re.match(r'^(?P<out>[a-z]+)(?:\[(?P<index>[^\]]+)\])?' +
|
|
||||||
r'=(?P<expr>.*)$', stmt)
|
|
||||||
if ass_m:
|
|
||||||
if ass_m.groupdict().get('index'):
|
|
||||||
def assign(val):
|
|
||||||
lvar = local_vars[ass_m.group('out')]
|
|
||||||
idx = interpret_expression(ass_m.group('index'),
|
|
||||||
local_vars, allow_recursion)
|
|
||||||
assert isinstance(idx, int)
|
|
||||||
lvar[idx] = val
|
|
||||||
return val
|
|
||||||
expr = ass_m.group('expr')
|
|
||||||
else:
|
|
||||||
def assign(val):
|
|
||||||
local_vars[ass_m.group('out')] = val
|
|
||||||
return val
|
|
||||||
expr = ass_m.group('expr')
|
|
||||||
elif stmt.startswith(u'return '):
|
|
||||||
assign = lambda v: v
|
|
||||||
expr = stmt[len(u'return '):]
|
|
||||||
else:
|
|
||||||
raise ExtractorError(
|
|
||||||
u'Cannot determine left side of statement in %r' % stmt)
|
|
||||||
|
|
||||||
v = interpret_expression(expr, local_vars, allow_recursion)
|
|
||||||
return assign(v)
|
|
||||||
|
|
||||||
def interpret_expression(expr, local_vars, allow_recursion):
|
|
||||||
if expr.isdigit():
|
|
||||||
return int(expr)
|
|
||||||
|
|
||||||
if expr.isalpha():
|
|
||||||
return local_vars[expr]
|
|
||||||
|
|
||||||
m = re.match(r'^(?P<in>[a-z]+)\.(?P<member>.*)$', expr)
|
|
||||||
if m:
|
|
||||||
member = m.group('member')
|
|
||||||
val = local_vars[m.group('in')]
|
|
||||||
if member == 'split("")':
|
|
||||||
return list(val)
|
|
||||||
if member == 'join("")':
|
|
||||||
return u''.join(val)
|
|
||||||
if member == 'length':
|
|
||||||
return len(val)
|
|
||||||
if member == 'reverse()':
|
|
||||||
return val[::-1]
|
|
||||||
slice_m = re.match(r'slice\((?P<idx>.*)\)', member)
|
|
||||||
if slice_m:
|
|
||||||
idx = interpret_expression(
|
|
||||||
slice_m.group('idx'), local_vars, allow_recursion-1)
|
|
||||||
return val[idx:]
|
|
||||||
|
|
||||||
m = re.match(
|
|
||||||
r'^(?P<in>[a-z]+)\[(?P<idx>.+)\]$', expr)
|
|
||||||
if m:
|
|
||||||
val = local_vars[m.group('in')]
|
|
||||||
idx = interpret_expression(m.group('idx'), local_vars,
|
|
||||||
allow_recursion-1)
|
|
||||||
return val[idx]
|
|
||||||
|
|
||||||
m = re.match(r'^(?P<a>.+?)(?P<op>[%])(?P<b>.+?)$', expr)
|
|
||||||
if m:
|
|
||||||
a = interpret_expression(m.group('a'),
|
|
||||||
local_vars, allow_recursion)
|
|
||||||
b = interpret_expression(m.group('b'),
|
|
||||||
local_vars, allow_recursion)
|
|
||||||
return a % b
|
|
||||||
|
|
||||||
m = re.match(
|
|
||||||
r'^(?P<func>[a-zA-Z$]+)\((?P<args>[a-z0-9,]+)\)$', expr)
|
|
||||||
if m:
|
|
||||||
fname = m.group('func')
|
|
||||||
if fname not in functions:
|
|
||||||
functions[fname] = extract_function(fname)
|
|
||||||
argvals = [int(v) if v.isdigit() else local_vars[v]
|
|
||||||
for v in m.group('args').split(',')]
|
|
||||||
return functions[fname](argvals)
|
|
||||||
raise ExtractorError(u'Unsupported JS expression %r' % expr)
|
|
||||||
|
|
||||||
def extract_function(funcname):
|
|
||||||
func_m = re.search(
|
|
||||||
r'function ' + re.escape(funcname) +
|
|
||||||
r'\((?P<args>[a-z,]+)\){(?P<code>[^}]+)}',
|
|
||||||
jscode)
|
|
||||||
argnames = func_m.group('args').split(',')
|
|
||||||
|
|
||||||
def resf(args):
|
|
||||||
local_vars = dict(zip(argnames, args))
|
|
||||||
for stmt in func_m.group('code').split(';'):
|
|
||||||
res = interpret_statement(stmt, local_vars)
|
|
||||||
return res
|
|
||||||
return resf
|
|
||||||
|
|
||||||
initial_function = extract_function(funcname)
|
|
||||||
return lambda s: initial_function([s])
|
return lambda s: initial_function([s])
|
||||||
|
|
||||||
def _parse_sig_swf(self, file_contents):
|
def _parse_sig_swf(self, file_contents):
|
||||||
@ -1549,7 +1446,9 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
|
|||||||
break
|
break
|
||||||
|
|
||||||
more = self._download_json(
|
more = self._download_json(
|
||||||
'https://youtube.com/%s' % mobj.group('more'), playlist_id, 'Downloading page #%s' % page_num)
|
'https://youtube.com/%s' % mobj.group('more'), playlist_id,
|
||||||
|
'Downloading page #%s' % page_num,
|
||||||
|
transform_source=uppercase_escape)
|
||||||
content_html = more['content_html']
|
content_html = more['content_html']
|
||||||
more_widget_html = more['load_more_widget_html']
|
more_widget_html = more['load_more_widget_html']
|
||||||
|
|
||||||
@ -1712,7 +1611,7 @@ class YoutubeUserIE(InfoExtractor):
|
|||||||
|
|
||||||
class YoutubeSearchIE(SearchInfoExtractor):
|
class YoutubeSearchIE(SearchInfoExtractor):
|
||||||
IE_DESC = u'YouTube.com searches'
|
IE_DESC = u'YouTube.com searches'
|
||||||
_API_URL = 'https://gdata.youtube.com/feeds/api/videos?q=%s&start-index=%i&max-results=50&v=2&alt=jsonc'
|
_API_URL = u'https://gdata.youtube.com/feeds/api/videos?q=%s&start-index=%i&max-results=50&v=2&alt=jsonc'
|
||||||
_MAX_RESULTS = 1000
|
_MAX_RESULTS = 1000
|
||||||
IE_NAME = u'youtube:search'
|
IE_NAME = u'youtube:search'
|
||||||
_SEARCH_KEY = 'ytsearch'
|
_SEARCH_KEY = 'ytsearch'
|
||||||
@ -1723,9 +1622,12 @@ class YoutubeSearchIE(SearchInfoExtractor):
|
|||||||
video_ids = []
|
video_ids = []
|
||||||
pagenum = 0
|
pagenum = 0
|
||||||
limit = n
|
limit = n
|
||||||
|
PAGE_SIZE = 50
|
||||||
|
|
||||||
while (50 * pagenum) < limit:
|
while (PAGE_SIZE * pagenum) < limit:
|
||||||
result_url = self._API_URL % (compat_urllib_parse.quote_plus(query), (50*pagenum)+1)
|
result_url = self._API_URL % (
|
||||||
|
compat_urllib_parse.quote_plus(query.encode('utf-8')),
|
||||||
|
(PAGE_SIZE * pagenum) + 1)
|
||||||
data_json = self._download_webpage(
|
data_json = self._download_webpage(
|
||||||
result_url, video_id=u'query "%s"' % query,
|
result_url, video_id=u'query "%s"' % query,
|
||||||
note=u'Downloading page %s' % (pagenum + 1),
|
note=u'Downloading page %s' % (pagenum + 1),
|
||||||
@ -1836,11 +1738,10 @@ class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
|
|||||||
feed_entries = []
|
feed_entries = []
|
||||||
paging = 0
|
paging = 0
|
||||||
for i in itertools.count(1):
|
for i in itertools.count(1):
|
||||||
info = self._download_webpage(self._FEED_TEMPLATE % paging,
|
info = self._download_json(self._FEED_TEMPLATE % paging,
|
||||||
u'%s feed' % self._FEED_NAME,
|
u'%s feed' % self._FEED_NAME,
|
||||||
u'Downloading page %s' % i)
|
u'Downloading page %s' % i)
|
||||||
info = json.loads(info)
|
feed_html = info.get('feed_html') or info.get('content_html')
|
||||||
feed_html = info['feed_html']
|
|
||||||
m_ids = re.finditer(r'"/watch\?v=(.*?)["&]', feed_html)
|
m_ids = re.finditer(r'"/watch\?v=(.*?)["&]', feed_html)
|
||||||
ids = orderedSet(m.group(1) for m in m_ids)
|
ids = orderedSet(m.group(1) for m in m_ids)
|
||||||
feed_entries.extend(
|
feed_entries.extend(
|
||||||
|
116
youtube_dl/jsinterp.py
Normal file
116
youtube_dl/jsinterp.py
Normal file
@ -0,0 +1,116 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .utils import (
|
||||||
|
ExtractorError,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class JSInterpreter(object):
|
||||||
|
def __init__(self, code):
|
||||||
|
self.code = code
|
||||||
|
self._functions = {}
|
||||||
|
|
||||||
|
def interpret_statement(self, stmt, local_vars, allow_recursion=20):
|
||||||
|
if allow_recursion < 0:
|
||||||
|
raise ExtractorError('Recursion limit reached')
|
||||||
|
|
||||||
|
if stmt.startswith('var '):
|
||||||
|
stmt = stmt[len('var '):]
|
||||||
|
ass_m = re.match(r'^(?P<out>[a-z]+)(?:\[(?P<index>[^\]]+)\])?' +
|
||||||
|
r'=(?P<expr>.*)$', stmt)
|
||||||
|
if ass_m:
|
||||||
|
if ass_m.groupdict().get('index'):
|
||||||
|
def assign(val):
|
||||||
|
lvar = local_vars[ass_m.group('out')]
|
||||||
|
idx = self.interpret_expression(
|
||||||
|
ass_m.group('index'), local_vars, allow_recursion)
|
||||||
|
assert isinstance(idx, int)
|
||||||
|
lvar[idx] = val
|
||||||
|
return val
|
||||||
|
expr = ass_m.group('expr')
|
||||||
|
else:
|
||||||
|
def assign(val):
|
||||||
|
local_vars[ass_m.group('out')] = val
|
||||||
|
return val
|
||||||
|
expr = ass_m.group('expr')
|
||||||
|
elif stmt.startswith('return '):
|
||||||
|
assign = lambda v: v
|
||||||
|
expr = stmt[len('return '):]
|
||||||
|
else:
|
||||||
|
raise ExtractorError(
|
||||||
|
'Cannot determine left side of statement in %r' % stmt)
|
||||||
|
|
||||||
|
v = self.interpret_expression(expr, local_vars, allow_recursion)
|
||||||
|
return assign(v)
|
||||||
|
|
||||||
|
def interpret_expression(self, expr, local_vars, allow_recursion):
|
||||||
|
if expr.isdigit():
|
||||||
|
return int(expr)
|
||||||
|
|
||||||
|
if expr.isalpha():
|
||||||
|
return local_vars[expr]
|
||||||
|
|
||||||
|
m = re.match(r'^(?P<in>[a-z]+)\.(?P<member>.*)$', expr)
|
||||||
|
if m:
|
||||||
|
member = m.group('member')
|
||||||
|
val = local_vars[m.group('in')]
|
||||||
|
if member == 'split("")':
|
||||||
|
return list(val)
|
||||||
|
if member == 'join("")':
|
||||||
|
return u''.join(val)
|
||||||
|
if member == 'length':
|
||||||
|
return len(val)
|
||||||
|
if member == 'reverse()':
|
||||||
|
return val[::-1]
|
||||||
|
slice_m = re.match(r'slice\((?P<idx>.*)\)', member)
|
||||||
|
if slice_m:
|
||||||
|
idx = self.interpret_expression(
|
||||||
|
slice_m.group('idx'), local_vars, allow_recursion - 1)
|
||||||
|
return val[idx:]
|
||||||
|
|
||||||
|
m = re.match(
|
||||||
|
r'^(?P<in>[a-z]+)\[(?P<idx>.+)\]$', expr)
|
||||||
|
if m:
|
||||||
|
val = local_vars[m.group('in')]
|
||||||
|
idx = self.interpret_expression(
|
||||||
|
m.group('idx'), local_vars, allow_recursion - 1)
|
||||||
|
return val[idx]
|
||||||
|
|
||||||
|
m = re.match(r'^(?P<a>.+?)(?P<op>[%])(?P<b>.+?)$', expr)
|
||||||
|
if m:
|
||||||
|
a = self.interpret_expression(
|
||||||
|
m.group('a'), local_vars, allow_recursion)
|
||||||
|
b = self.interpret_expression(
|
||||||
|
m.group('b'), local_vars, allow_recursion)
|
||||||
|
return a % b
|
||||||
|
|
||||||
|
m = re.match(
|
||||||
|
r'^(?P<func>[a-zA-Z$]+)\((?P<args>[a-z0-9,]+)\)$', expr)
|
||||||
|
if m:
|
||||||
|
fname = m.group('func')
|
||||||
|
if fname not in self._functions:
|
||||||
|
self._functions[fname] = self.extract_function(fname)
|
||||||
|
argvals = [int(v) if v.isdigit() else local_vars[v]
|
||||||
|
for v in m.group('args').split(',')]
|
||||||
|
return self._functions[fname](argvals)
|
||||||
|
raise ExtractorError('Unsupported JS expression %r' % expr)
|
||||||
|
|
||||||
|
def extract_function(self, funcname):
|
||||||
|
func_m = re.search(
|
||||||
|
(r'(?:function %s|%s\s*=\s*function)' % (
|
||||||
|
re.escape(funcname), re.escape(funcname))) +
|
||||||
|
r'\((?P<args>[a-z,]+)\){(?P<code>[^}]+)}',
|
||||||
|
self.code)
|
||||||
|
if func_m is None:
|
||||||
|
raise ExtractorError('Could not find JS function %r' % funcname)
|
||||||
|
argnames = func_m.group('args').split(',')
|
||||||
|
|
||||||
|
def resf(args):
|
||||||
|
local_vars = dict(zip(argnames, args))
|
||||||
|
for stmt in func_m.group('code').split(';'):
|
||||||
|
res = self.interpret_statement(stmt, local_vars)
|
||||||
|
return res
|
||||||
|
return resf
|
||||||
|
|
@ -55,8 +55,9 @@ class FFmpegPostProcessor(PostProcessor):
|
|||||||
|
|
||||||
if self._downloader.params.get('verbose', False):
|
if self._downloader.params.get('verbose', False):
|
||||||
self._downloader.to_screen(u'[debug] ffmpeg command line: %s' % shell_quote(cmd))
|
self._downloader.to_screen(u'[debug] ffmpeg command line: %s' % shell_quote(cmd))
|
||||||
p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
bcmd = [self._downloader.encode(c) for c in cmd]
|
||||||
stdout,stderr = p.communicate()
|
p = subprocess.Popen(bcmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
||||||
|
stdout, stderr = p.communicate()
|
||||||
if p.returncode != 0:
|
if p.returncode != 0:
|
||||||
stderr = stderr.decode('utf-8', 'replace')
|
stderr = stderr.decode('utf-8', 'replace')
|
||||||
msg = stderr.strip().split('\n')[-1]
|
msg = stderr.strip().split('\n')[-1]
|
||||||
|
@ -539,7 +539,6 @@ def encodeFilename(s, for_subprocess=False):
|
|||||||
encoding = 'utf-8'
|
encoding = 'utf-8'
|
||||||
return s.encode(encoding, 'ignore')
|
return s.encode(encoding, 'ignore')
|
||||||
|
|
||||||
|
|
||||||
def decodeOption(optval):
|
def decodeOption(optval):
|
||||||
if optval is None:
|
if optval is None:
|
||||||
return optval
|
return optval
|
||||||
@ -1269,8 +1268,8 @@ class PagedList(object):
|
|||||||
|
|
||||||
def uppercase_escape(s):
|
def uppercase_escape(s):
|
||||||
return re.sub(
|
return re.sub(
|
||||||
r'\\U([0-9a-fA-F]{8})',
|
r'\\U[0-9a-fA-F]{8}',
|
||||||
lambda m: compat_chr(int(m.group(1), base=16)), s)
|
lambda m: m.group(0).decode('unicode-escape'), s)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
struct.pack(u'!I', 0)
|
struct.pack(u'!I', 0)
|
||||||
|
@ -1,2 +1,2 @@
|
|||||||
|
|
||||||
__version__ = '2014.03.28'
|
__version__ = '2014.04.02'
|
||||||
|
Loading…
x
Reference in New Issue
Block a user