Merge branch 'master' of https://github.com/rg3/youtube-dl
This commit is contained in:
commit
a07fcf5328
6
.github/ISSUE_TEMPLATE.md
vendored
6
.github/ISSUE_TEMPLATE.md
vendored
@ -6,8 +6,8 @@
|
||||
|
||||
---
|
||||
|
||||
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.05.10*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
|
||||
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.05.10**
|
||||
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.05.16*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
|
||||
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.05.16**
|
||||
|
||||
### Before submitting an *issue* make sure you have:
|
||||
- [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
|
||||
@ -35,7 +35,7 @@ $ youtube-dl -v <your command line>
|
||||
[debug] User config: []
|
||||
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
|
||||
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
|
||||
[debug] youtube-dl version 2016.05.10
|
||||
[debug] youtube-dl version 2016.05.16
|
||||
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
|
||||
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
|
||||
[debug] Proxy map: {}
|
||||
|
@ -418,7 +418,7 @@ which means you can modify it, redistribute it or use it however you like.
|
||||
|
||||
# CONFIGURATION
|
||||
|
||||
You can configure youtube-dl by placing any supported command line option to a configuration file. On Linux, the system wide configuration file is located at `/etc/youtube-dl.conf` and the user wide configuration file at `~/.config/youtube-dl/config`. On Windows, the user wide configuration file locations are `%APPDATA%\youtube-dl\config.txt` or `C:\Users\<user name>\youtube-dl.conf`.
|
||||
You can configure youtube-dl by placing any supported command line option to a configuration file. On Linux and OS X, the system wide configuration file is located at `/etc/youtube-dl.conf` and the user wide configuration file at `~/.config/youtube-dl/config`. On Windows, the user wide configuration file locations are `%APPDATA%\youtube-dl\config.txt` or `C:\Users\<user name>\youtube-dl.conf`.
|
||||
|
||||
For example, with the following configuration file youtube-dl will always extract the audio, not copy the mtime, use a proxy and save all videos under `Movies` directory in your home directory:
|
||||
```
|
||||
|
@ -6,6 +6,7 @@
|
||||
- **22tracks:genre**
|
||||
- **22tracks:track**
|
||||
- **24video**
|
||||
- **3qsdn**: 3Q SDN
|
||||
- **3sat**
|
||||
- **4tube**
|
||||
- **56.com**
|
||||
@ -114,7 +115,6 @@
|
||||
- **chirbit**
|
||||
- **chirbit:profile**
|
||||
- **Cinchcast**
|
||||
- **Cinemassacre**
|
||||
- **Clipfish**
|
||||
- **cliphunter**
|
||||
- **ClipRs**
|
||||
@ -128,7 +128,6 @@
|
||||
- **CNN**
|
||||
- **CNNArticle**
|
||||
- **CNNBlogs**
|
||||
- **CollegeHumor**
|
||||
- **CollegeRama**
|
||||
- **ComCarCoff**
|
||||
- **ComedyCentral**
|
||||
@ -680,7 +679,6 @@
|
||||
- **tvp.pl:Series**
|
||||
- **TVPlay**: TV3Play and related services
|
||||
- **Tweakers**
|
||||
- **twitch:bookmarks**
|
||||
- **twitch:chapter**
|
||||
- **twitch:past_broadcasts**
|
||||
- **twitch:profile**
|
||||
@ -698,7 +696,8 @@
|
||||
- **USAToday**
|
||||
- **ustream**
|
||||
- **ustream:channel**
|
||||
- **Ustudio**
|
||||
- **ustudio**
|
||||
- **ustudio:embed**
|
||||
- **Varzesh3**
|
||||
- **Vbox7**
|
||||
- **VeeHD**
|
||||
|
@ -77,17 +77,28 @@ class TestMultipleSocks(unittest.TestCase):
|
||||
|
||||
|
||||
class TestSocks(unittest.TestCase):
|
||||
_SKIP_SOCKS_TEST = True
|
||||
|
||||
def setUp(self):
|
||||
if self._SKIP_SOCKS_TEST:
|
||||
return
|
||||
|
||||
self.port = random.randint(20000, 30000)
|
||||
self.server_process = subprocess.Popen([
|
||||
'srelay', '-f', '-i', '127.0.0.1:%d' % self.port],
|
||||
stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
||||
|
||||
def tearDown(self):
|
||||
if self._SKIP_SOCKS_TEST:
|
||||
return
|
||||
|
||||
self.server_process.terminate()
|
||||
self.server_process.communicate()
|
||||
|
||||
def _get_ip(self, protocol):
|
||||
if self._SKIP_SOCKS_TEST:
|
||||
return '127.0.0.1'
|
||||
|
||||
ydl = FakeYDL({
|
||||
'proxy': '%s://127.0.0.1:%d' % (protocol, self.port),
|
||||
})
|
||||
|
@ -155,8 +155,8 @@ class TestUtil(unittest.TestCase):
|
||||
self.assertTrue(sanitize_filename(':', restricted=True) != '')
|
||||
|
||||
self.assertEqual(sanitize_filename(
|
||||
'ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõöøùúûüýþÿ', restricted=True),
|
||||
'AAAAAAAECEEEEIIIIDNOOOOOOUUUUYPssaaaaaaaeceeeeiiiionoooooouuuuypy')
|
||||
'ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØŒÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõöøœùúûüýþÿ', restricted=True),
|
||||
'AAAAAAAECEEEEIIIIDNOOOOOOOEUUUUYPssaaaaaaaeceeeeiiiionoooooooeuuuuypy')
|
||||
|
||||
def test_sanitize_ids(self):
|
||||
self.assertEqual(sanitize_filename('_n_cd26wFpw', is_id=True), '_n_cd26wFpw')
|
||||
@ -617,6 +617,15 @@ class TestUtil(unittest.TestCase):
|
||||
json_code = js_to_json(inp)
|
||||
self.assertEqual(json.loads(json_code), json.loads(inp))
|
||||
|
||||
inp = '''{
|
||||
0:{src:'skipped', type: 'application/dash+xml'},
|
||||
1:{src:'skipped', type: 'application/vnd.apple.mpegURL'},
|
||||
}'''
|
||||
self.assertEqual(js_to_json(inp), '''{
|
||||
"0":{"src":"skipped", "type": "application/dash+xml"},
|
||||
"1":{"src":"skipped", "type": "application/vnd.apple.mpegURL"}
|
||||
}''')
|
||||
|
||||
def test_js_to_json_edgecases(self):
|
||||
on = js_to_json("{abc_def:'1\\'\\\\2\\\\\\'3\"4'}")
|
||||
self.assertEqual(json.loads(on), {"abc_def": "1'\\2\\'3\"4"})
|
||||
@ -640,6 +649,27 @@ class TestUtil(unittest.TestCase):
|
||||
on = js_to_json('{"abc": "def",}')
|
||||
self.assertEqual(json.loads(on), {'abc': 'def'})
|
||||
|
||||
on = js_to_json('{ 0: /* " \n */ ",]" , }')
|
||||
self.assertEqual(json.loads(on), {'0': ',]'})
|
||||
|
||||
on = js_to_json(r'["<p>x<\/p>"]')
|
||||
self.assertEqual(json.loads(on), ['<p>x</p>'])
|
||||
|
||||
on = js_to_json(r'["\xaa"]')
|
||||
self.assertEqual(json.loads(on), ['\u00aa'])
|
||||
|
||||
on = js_to_json("['a\\\nb']")
|
||||
self.assertEqual(json.loads(on), ['ab'])
|
||||
|
||||
on = js_to_json('{0xff:0xff}')
|
||||
self.assertEqual(json.loads(on), {'255': 255})
|
||||
|
||||
on = js_to_json('{077:077}')
|
||||
self.assertEqual(json.loads(on), {'63': 63})
|
||||
|
||||
on = js_to_json('{42:42}')
|
||||
self.assertEqual(json.loads(on), {'42': 42})
|
||||
|
||||
def test_extract_attributes(self):
|
||||
self.assertEqual(extract_attributes('<e x="y">'), {'x': 'y'})
|
||||
self.assertEqual(extract_attributes("<e x='y'>"), {'x': 'y'})
|
||||
|
@ -326,7 +326,7 @@ class YoutubeDL(object):
|
||||
['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
|
||||
self._output_channel = os.fdopen(master, 'rb')
|
||||
except OSError as ose:
|
||||
if ose.errno == 2:
|
||||
if ose.errno == errno.ENOENT:
|
||||
self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that fribidi is an executable file in one of the directories in your $PATH.')
|
||||
else:
|
||||
raise
|
||||
@ -720,6 +720,7 @@ class YoutubeDL(object):
|
||||
result_type = ie_result.get('_type', 'video')
|
||||
|
||||
if result_type in ('url', 'url_transparent'):
|
||||
ie_result['url'] = sanitize_url(ie_result['url'])
|
||||
extract_flat = self.params.get('extract_flat', False)
|
||||
if ((extract_flat == 'in_playlist' and 'playlist' in extra_info) or
|
||||
extract_flat is True):
|
||||
|
@ -67,9 +67,9 @@ def _real_main(argv=None):
|
||||
# Custom HTTP headers
|
||||
if opts.headers is not None:
|
||||
for h in opts.headers:
|
||||
if h.find(':', 1) < 0:
|
||||
if ':' not in h:
|
||||
parser.error('wrong header formatting, it should be key:value, not "%s"' % h)
|
||||
key, value = h.split(':', 2)
|
||||
key, value = h.split(':', 1)
|
||||
if opts.verbose:
|
||||
write_string('[debug] Adding header from command line option %s:%s\n' % (key, value))
|
||||
std_headers[key] = value
|
||||
|
@ -341,9 +341,9 @@ except ImportError: # Python 2
|
||||
return parsed_result
|
||||
|
||||
try:
|
||||
from shlex import quote as shlex_quote
|
||||
from shlex import quote as compat_shlex_quote
|
||||
except ImportError: # Python < 3.3
|
||||
def shlex_quote(s):
|
||||
def compat_shlex_quote(s):
|
||||
if re.match(r'^[-_\w./]+$', s):
|
||||
return s
|
||||
else:
|
||||
@ -466,18 +466,6 @@ else:
|
||||
print(s)
|
||||
|
||||
|
||||
try:
|
||||
subprocess_check_output = subprocess.check_output
|
||||
except AttributeError:
|
||||
def subprocess_check_output(*args, **kwargs):
|
||||
assert 'input' not in kwargs
|
||||
p = subprocess.Popen(*args, stdout=subprocess.PIPE, **kwargs)
|
||||
output, _ = p.communicate()
|
||||
ret = p.poll()
|
||||
if ret:
|
||||
raise subprocess.CalledProcessError(ret, p.args, output=output)
|
||||
return output
|
||||
|
||||
if sys.version_info < (3, 0) and sys.platform == 'win32':
|
||||
def compat_getpass(prompt, *args, **kwargs):
|
||||
if isinstance(prompt, compat_str):
|
||||
@ -635,6 +623,7 @@ __all__ = [
|
||||
'compat_parse_qs',
|
||||
'compat_print',
|
||||
'compat_setenv',
|
||||
'compat_shlex_quote',
|
||||
'compat_shlex_split',
|
||||
'compat_socket_create_connection',
|
||||
'compat_str',
|
||||
@ -656,7 +645,5 @@ __all__ = [
|
||||
'compat_urlretrieve',
|
||||
'compat_xml_parse_error',
|
||||
'compat_xpath',
|
||||
'shlex_quote',
|
||||
'subprocess_check_output',
|
||||
'workaround_optparse_bug9161',
|
||||
]
|
||||
|
@ -23,26 +23,38 @@ from ..utils import (
|
||||
)
|
||||
|
||||
|
||||
class DataTruncatedError(Exception):
|
||||
pass
|
||||
|
||||
|
||||
class FlvReader(io.BytesIO):
|
||||
"""
|
||||
Reader for Flv files
|
||||
The file format is documented in https://www.adobe.com/devnet/f4v.html
|
||||
"""
|
||||
|
||||
def read_bytes(self, n):
|
||||
data = self.read(n)
|
||||
if len(data) < n:
|
||||
raise DataTruncatedError(
|
||||
'FlvReader error: need %d bytes while only %d bytes got' % (
|
||||
n, len(data)))
|
||||
return data
|
||||
|
||||
# Utility functions for reading numbers and strings
|
||||
def read_unsigned_long_long(self):
|
||||
return compat_struct_unpack('!Q', self.read(8))[0]
|
||||
return compat_struct_unpack('!Q', self.read_bytes(8))[0]
|
||||
|
||||
def read_unsigned_int(self):
|
||||
return compat_struct_unpack('!I', self.read(4))[0]
|
||||
return compat_struct_unpack('!I', self.read_bytes(4))[0]
|
||||
|
||||
def read_unsigned_char(self):
|
||||
return compat_struct_unpack('!B', self.read(1))[0]
|
||||
return compat_struct_unpack('!B', self.read_bytes(1))[0]
|
||||
|
||||
def read_string(self):
|
||||
res = b''
|
||||
while True:
|
||||
char = self.read(1)
|
||||
char = self.read_bytes(1)
|
||||
if char == b'\x00':
|
||||
break
|
||||
res += char
|
||||
@ -53,18 +65,18 @@ class FlvReader(io.BytesIO):
|
||||
Read a box and return the info as a tuple: (box_size, box_type, box_data)
|
||||
"""
|
||||
real_size = size = self.read_unsigned_int()
|
||||
box_type = self.read(4)
|
||||
box_type = self.read_bytes(4)
|
||||
header_end = 8
|
||||
if size == 1:
|
||||
real_size = self.read_unsigned_long_long()
|
||||
header_end = 16
|
||||
return real_size, box_type, self.read(real_size - header_end)
|
||||
return real_size, box_type, self.read_bytes(real_size - header_end)
|
||||
|
||||
def read_asrt(self):
|
||||
# version
|
||||
self.read_unsigned_char()
|
||||
# flags
|
||||
self.read(3)
|
||||
self.read_bytes(3)
|
||||
quality_entry_count = self.read_unsigned_char()
|
||||
# QualityEntryCount
|
||||
for i in range(quality_entry_count):
|
||||
@ -85,7 +97,7 @@ class FlvReader(io.BytesIO):
|
||||
# version
|
||||
self.read_unsigned_char()
|
||||
# flags
|
||||
self.read(3)
|
||||
self.read_bytes(3)
|
||||
# time scale
|
||||
self.read_unsigned_int()
|
||||
|
||||
@ -119,7 +131,7 @@ class FlvReader(io.BytesIO):
|
||||
# version
|
||||
self.read_unsigned_char()
|
||||
# flags
|
||||
self.read(3)
|
||||
self.read_bytes(3)
|
||||
|
||||
self.read_unsigned_int() # BootstrapinfoVersion
|
||||
# Profile,Live,Update,Reserved
|
||||
@ -374,7 +386,17 @@ class F4mFD(FragmentFD):
|
||||
down.close()
|
||||
reader = FlvReader(down_data)
|
||||
while True:
|
||||
_, box_type, box_data = reader.read_box_info()
|
||||
try:
|
||||
_, box_type, box_data = reader.read_box_info()
|
||||
except DataTruncatedError:
|
||||
if test:
|
||||
# In tests, segments may be truncated, and thus
|
||||
# FlvReader may not be able to parse the whole
|
||||
# chunk. If so, write the segment as is
|
||||
# See https://github.com/rg3/youtube-dl/issues/9214
|
||||
dest_stream.write(down_data)
|
||||
break
|
||||
raise
|
||||
if box_type == b'mdat':
|
||||
dest_stream.write(box_data)
|
||||
break
|
||||
|
@ -17,6 +17,9 @@ class BloombergIE(InfoExtractor):
|
||||
'title': 'Shah\'s Presentation on Foreign-Exchange Strategies',
|
||||
'description': 'md5:a8ba0302912d03d246979735c17d2761',
|
||||
},
|
||||
'params': {
|
||||
'format': 'best[format_id^=hds]',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.bloomberg.com/news/articles/2015-11-12/five-strange-things-that-have-been-happening-in-financial-markets',
|
||||
'only_matching': True,
|
||||
|
@ -307,9 +307,10 @@ class BrightcoveLegacyIE(InfoExtractor):
|
||||
playlist_title=playlist_info['mediaCollectionDTO']['displayName'])
|
||||
|
||||
def _extract_video_info(self, video_info):
|
||||
video_id = compat_str(video_info['id'])
|
||||
publisher_id = video_info.get('publisherId')
|
||||
info = {
|
||||
'id': compat_str(video_info['id']),
|
||||
'id': video_id,
|
||||
'title': video_info['displayName'].strip(),
|
||||
'description': video_info.get('shortDescription'),
|
||||
'thumbnail': video_info.get('videoStillURL') or video_info.get('thumbnailURL'),
|
||||
@ -331,7 +332,8 @@ class BrightcoveLegacyIE(InfoExtractor):
|
||||
url_comp = compat_urllib_parse_urlparse(url)
|
||||
if url_comp.path.endswith('.m3u8'):
|
||||
formats.extend(
|
||||
self._extract_m3u8_formats(url, info['id'], 'mp4'))
|
||||
self._extract_m3u8_formats(
|
||||
url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False))
|
||||
continue
|
||||
elif 'akamaihd.net' in url_comp.netloc:
|
||||
# This type of renditions are served through
|
||||
@ -365,7 +367,7 @@ class BrightcoveLegacyIE(InfoExtractor):
|
||||
a_format.update({
|
||||
'format_id': 'hls%s' % ('-%s' % tbr if tbr else ''),
|
||||
'ext': 'mp4',
|
||||
'protocol': 'm3u8',
|
||||
'protocol': 'm3u8_native',
|
||||
})
|
||||
|
||||
formats.append(a_format)
|
||||
@ -395,7 +397,7 @@ class BrightcoveLegacyIE(InfoExtractor):
|
||||
return ad_info
|
||||
|
||||
if 'url' not in info and not info.get('formats'):
|
||||
raise ExtractorError('Unable to extract video url for %s' % info['id'])
|
||||
raise ExtractorError('Unable to extract video url for %s' % video_id)
|
||||
return info
|
||||
|
||||
|
||||
@ -527,7 +529,7 @@ class BrightcoveNewIE(InfoExtractor):
|
||||
if not src:
|
||||
continue
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
src, video_id, 'mp4', m3u8_id='hls', fatal=False))
|
||||
src, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False))
|
||||
elif source_type == 'application/dash+xml':
|
||||
if not src:
|
||||
continue
|
||||
|
@ -1,119 +0,0 @@
|
||||
# encoding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import ExtractorError
|
||||
from .screenwavemedia import ScreenwaveMediaIE
|
||||
|
||||
|
||||
class CinemassacreIE(InfoExtractor):
|
||||
_VALID_URL = 'https?://(?:www\.)?cinemassacre\.com/(?P<date_y>[0-9]{4})/(?P<date_m>[0-9]{2})/(?P<date_d>[0-9]{2})/(?P<display_id>[^?#/]+)'
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://cinemassacre.com/2012/11/10/avgn-the-movie-trailer/',
|
||||
'md5': 'fde81fbafaee331785f58cd6c0d46190',
|
||||
'info_dict': {
|
||||
'id': 'Cinemassacre-19911',
|
||||
'ext': 'mp4',
|
||||
'upload_date': '20121110',
|
||||
'title': '“Angry Video Game Nerd: The Movie” – Trailer',
|
||||
'description': 'md5:fb87405fcb42a331742a0dce2708560b',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'http://cinemassacre.com/2013/10/02/the-mummys-hand-1940',
|
||||
'md5': 'd72f10cd39eac4215048f62ab477a511',
|
||||
'info_dict': {
|
||||
'id': 'Cinemassacre-521be8ef82b16',
|
||||
'ext': 'mp4',
|
||||
'upload_date': '20131002',
|
||||
'title': 'The Mummy’s Hand (1940)',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
{
|
||||
# Youtube embedded video
|
||||
'url': 'http://cinemassacre.com/2006/12/07/chronologically-confused-about-bad-movie-and-video-game-sequel-titles/',
|
||||
'md5': 'ec9838a5520ef5409b3e4e42fcb0a3b9',
|
||||
'info_dict': {
|
||||
'id': 'OEVzPCY2T-g',
|
||||
'ext': 'webm',
|
||||
'title': 'AVGN: Chronologically Confused about Bad Movie and Video Game Sequel Titles',
|
||||
'upload_date': '20061207',
|
||||
'uploader': 'Cinemassacre',
|
||||
'uploader_id': 'JamesNintendoNerd',
|
||||
'description': 'md5:784734696c2b8b7f4b8625cc799e07f6',
|
||||
}
|
||||
},
|
||||
{
|
||||
# Youtube embedded video
|
||||
'url': 'http://cinemassacre.com/2006/09/01/mckids/',
|
||||
'md5': '7393c4e0f54602ad110c793eb7a6513a',
|
||||
'info_dict': {
|
||||
'id': 'FnxsNhuikpo',
|
||||
'ext': 'webm',
|
||||
'upload_date': '20060901',
|
||||
'uploader': 'Cinemassacre Extra',
|
||||
'description': 'md5:de9b751efa9e45fbaafd9c8a1123ed53',
|
||||
'uploader_id': 'Cinemassacre',
|
||||
'title': 'AVGN: McKids',
|
||||
}
|
||||
},
|
||||
{
|
||||
'url': 'http://cinemassacre.com/2015/05/25/mario-kart-64-nintendo-64-james-mike-mondays/',
|
||||
'md5': '1376908e49572389e7b06251a53cdd08',
|
||||
'info_dict': {
|
||||
'id': 'Cinemassacre-555779690c440',
|
||||
'ext': 'mp4',
|
||||
'description': 'Let’s Play Mario Kart 64 !! Mario Kart 64 is a classic go-kart racing game released for the Nintendo 64 (N64). Today James & Mike do 4 player Battle Mode with Kyle and Bootsy!',
|
||||
'title': 'Mario Kart 64 (Nintendo 64) James & Mike Mondays',
|
||||
'upload_date': '20150525',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
display_id = mobj.group('display_id')
|
||||
video_date = mobj.group('date_y') + mobj.group('date_m') + mobj.group('date_d')
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
playerdata_url = self._search_regex(
|
||||
[
|
||||
ScreenwaveMediaIE.EMBED_PATTERN,
|
||||
r'<iframe[^>]+src="(?P<url>(?:https?:)?//(?:[^.]+\.)?youtube\.com/.+?)"',
|
||||
],
|
||||
webpage, 'player data URL', default=None, group='url')
|
||||
if not playerdata_url:
|
||||
raise ExtractorError('Unable to find player data')
|
||||
|
||||
video_title = self._html_search_regex(
|
||||
r'<title>(?P<title>.+?)\|', webpage, 'title')
|
||||
video_description = self._html_search_regex(
|
||||
r'<div class="entry-content">(?P<description>.+?)</div>',
|
||||
webpage, 'description', flags=re.DOTALL, fatal=False)
|
||||
video_thumbnail = self._og_search_thumbnail(webpage)
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'display_id': display_id,
|
||||
'title': video_title,
|
||||
'description': video_description,
|
||||
'upload_date': video_date,
|
||||
'thumbnail': video_thumbnail,
|
||||
'url': playerdata_url,
|
||||
}
|
@ -1,101 +0,0 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import int_or_none
|
||||
|
||||
|
||||
class CollegeHumorIE(InfoExtractor):
|
||||
_VALID_URL = r'^(?:https?://)?(?:www\.)?collegehumor\.com/(video|embed|e)/(?P<videoid>[0-9]+)/?(?P<shorttitle>.*)$'
|
||||
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://www.collegehumor.com/video/6902724/comic-con-cosplay-catastrophe',
|
||||
'md5': 'dcc0f5c1c8be98dc33889a191f4c26bd',
|
||||
'info_dict': {
|
||||
'id': '6902724',
|
||||
'ext': 'mp4',
|
||||
'title': 'Comic-Con Cosplay Catastrophe',
|
||||
'description': "Fans get creative this year at San Diego. Too creative. And yes, that's really Joss Whedon.",
|
||||
'age_limit': 13,
|
||||
'duration': 187,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.collegehumor.com/video/3505939/font-conference',
|
||||
'md5': '72fa701d8ef38664a4dbb9e2ab721816',
|
||||
'info_dict': {
|
||||
'id': '3505939',
|
||||
'ext': 'mp4',
|
||||
'title': 'Font Conference',
|
||||
'description': "This video wasn't long enough, so we made it double-spaced.",
|
||||
'age_limit': 10,
|
||||
'duration': 179,
|
||||
},
|
||||
}, {
|
||||
# embedded youtube video
|
||||
'url': 'http://www.collegehumor.com/embed/6950306',
|
||||
'info_dict': {
|
||||
'id': 'Z-bao9fg6Yc',
|
||||
'ext': 'mp4',
|
||||
'title': 'Young Americans Think President John F. Kennedy Died THIS MORNING IN A CAR ACCIDENT!!!',
|
||||
'uploader': 'Mark Dice',
|
||||
'uploader_id': 'MarkDice',
|
||||
'description': 'md5:62c3dab9351fac7bb44b53b69511d87f',
|
||||
'upload_date': '20140127',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'add_ie': ['Youtube'],
|
||||
},
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('videoid')
|
||||
|
||||
jsonUrl = 'http://www.collegehumor.com/moogaloop/video/' + video_id + '.json'
|
||||
data = json.loads(self._download_webpage(
|
||||
jsonUrl, video_id, 'Downloading info JSON'))
|
||||
vdata = data['video']
|
||||
if vdata.get('youtubeId') is not None:
|
||||
return {
|
||||
'_type': 'url',
|
||||
'url': vdata['youtubeId'],
|
||||
'ie_key': 'Youtube',
|
||||
}
|
||||
|
||||
AGE_LIMITS = {'nc17': 18, 'r': 18, 'pg13': 13, 'pg': 10, 'g': 0}
|
||||
rating = vdata.get('rating')
|
||||
if rating:
|
||||
age_limit = AGE_LIMITS.get(rating.lower())
|
||||
else:
|
||||
age_limit = None # None = No idea
|
||||
|
||||
PREFS = {'high_quality': 2, 'low_quality': 0}
|
||||
formats = []
|
||||
for format_key in ('mp4', 'webm'):
|
||||
for qname, qurl in vdata.get(format_key, {}).items():
|
||||
formats.append({
|
||||
'format_id': format_key + '_' + qname,
|
||||
'url': qurl,
|
||||
'format': format_key,
|
||||
'preference': PREFS.get(qname),
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
duration = int_or_none(vdata.get('duration'), 1000)
|
||||
like_count = int_or_none(vdata.get('likes'))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': vdata['title'],
|
||||
'description': vdata.get('description'),
|
||||
'thumbnail': vdata.get('thumbnail'),
|
||||
'formats': formats,
|
||||
'age_limit': age_limit,
|
||||
'duration': duration,
|
||||
'like_count': like_count,
|
||||
}
|
@ -1139,11 +1139,14 @@ class InfoExtractor(object):
|
||||
if m3u8_id:
|
||||
format_id.append(m3u8_id)
|
||||
last_media_name = last_media.get('NAME') if last_media and last_media.get('TYPE') != 'SUBTITLES' else None
|
||||
# Despite specification does not mention NAME attribute for
|
||||
# EXT-X-STREAM-INF it still sometimes may be present
|
||||
stream_name = last_info.get('NAME') or last_media_name
|
||||
# Bandwidth of live streams may differ over time thus making
|
||||
# format_id unpredictable. So it's better to keep provided
|
||||
# format_id intact.
|
||||
if not live:
|
||||
format_id.append(last_media_name if last_media_name else '%d' % (tbr if tbr else len(formats)))
|
||||
format_id.append(stream_name if stream_name else '%d' % (tbr if tbr else len(formats)))
|
||||
f = {
|
||||
'format_id': '-'.join(format_id),
|
||||
'url': format_url(line.strip()),
|
||||
|
@ -124,7 +124,6 @@ from .chirbit import (
|
||||
ChirbitProfileIE,
|
||||
)
|
||||
from .cinchcast import CinchcastIE
|
||||
from .cinemassacre import CinemassacreIE
|
||||
from .cliprs import ClipRsIE
|
||||
from .clipfish import ClipfishIE
|
||||
from .cliphunter import CliphunterIE
|
||||
@ -139,7 +138,6 @@ from .cnn import (
|
||||
CNNBlogsIE,
|
||||
CNNArticleIE,
|
||||
)
|
||||
from .collegehumor import CollegeHumorIE
|
||||
from .collegerama import CollegeRamaIE
|
||||
from .comedycentral import ComedyCentralIE, ComedyCentralShowsIE
|
||||
from .comcarcoff import ComCarCoffIE
|
||||
@ -767,6 +765,7 @@ from .thesixtyone import TheSixtyOneIE
|
||||
from .thestar import TheStarIE
|
||||
from .thisamericanlife import ThisAmericanLifeIE
|
||||
from .thisav import ThisAVIE
|
||||
from .threeqsdn import ThreeQSDNIE
|
||||
from .tinypic import TinyPicIE
|
||||
from .tlc import TlcDeIE
|
||||
from .tmz import (
|
||||
@ -834,7 +833,6 @@ from .twitch import (
|
||||
TwitchVodIE,
|
||||
TwitchProfileIE,
|
||||
TwitchPastBroadcastsIE,
|
||||
TwitchBookmarksIE,
|
||||
TwitchStreamIE,
|
||||
)
|
||||
from .twitter import (
|
||||
@ -852,7 +850,10 @@ from .unistra import UnistraIE
|
||||
from .urort import UrortIE
|
||||
from .usatoday import USATodayIE
|
||||
from .ustream import UstreamIE, UstreamChannelIE
|
||||
from .ustudio import UstudioIE
|
||||
from .ustudio import (
|
||||
UstudioIE,
|
||||
UstudioEmbedIE,
|
||||
)
|
||||
from .varzesh3 import Varzesh3IE
|
||||
from .vbox7 import Vbox7IE
|
||||
from .veehd import VeeHDIE
|
||||
|
@ -61,6 +61,7 @@ from .jwplatform import JWPlatformIE
|
||||
from .digiteka import DigitekaIE
|
||||
from .instagram import InstagramIE
|
||||
from .liveleak import LiveLeakIE
|
||||
from .threeqsdn import ThreeQSDNIE
|
||||
|
||||
|
||||
class GenericIE(InfoExtractor):
|
||||
@ -1427,7 +1428,8 @@ class GenericIE(InfoExtractor):
|
||||
# Site Name | Video Title
|
||||
# Video Title - Tagline | Site Name
|
||||
# and so on and so forth; it's just not practical
|
||||
video_title = self._html_search_regex(
|
||||
video_title = self._og_search_title(
|
||||
webpage, default=None) or self._html_search_regex(
|
||||
r'(?s)<title>(.*?)</title>', webpage, 'video title',
|
||||
default='video')
|
||||
|
||||
@ -1445,6 +1447,9 @@ class GenericIE(InfoExtractor):
|
||||
video_uploader = self._search_regex(
|
||||
r'^(?:https?://)?([^/]*)/.*', url, 'video uploader')
|
||||
|
||||
video_description = self._og_search_description(webpage, default=None)
|
||||
video_thumbnail = self._og_search_thumbnail(webpage, default=None)
|
||||
|
||||
# Helper method
|
||||
def _playlist_from_matches(matches, getter=None, ie=None):
|
||||
urlrs = orderedSet(
|
||||
@ -1983,6 +1988,19 @@ class GenericIE(InfoExtractor):
|
||||
if liveleak_url:
|
||||
return self.url_result(liveleak_url, 'LiveLeak')
|
||||
|
||||
# Look for 3Q SDN embeds
|
||||
threeqsdn_url = ThreeQSDNIE._extract_url(webpage)
|
||||
if threeqsdn_url:
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'ie_key': ThreeQSDNIE.ie_key(),
|
||||
'url': self._proto_relative_url(threeqsdn_url),
|
||||
'title': video_title,
|
||||
'description': video_description,
|
||||
'thumbnail': video_thumbnail,
|
||||
'uploader': video_uploader,
|
||||
}
|
||||
|
||||
def check_video(vurl):
|
||||
if YoutubeIE.suitable(vurl):
|
||||
return True
|
||||
|
@ -4,7 +4,7 @@ from .common import InfoExtractor
|
||||
|
||||
|
||||
class GrouponIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://www\.groupon\.com/deals/(?P<id>[^?#]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?groupon\.com/deals/(?P<id>[^/?#&]+)'
|
||||
|
||||
_TEST = {
|
||||
'url': 'https://www.groupon.com/deals/bikram-yoga-huntington-beach-2#ooid=tubGNycTo_9Uxg82uESj4i61EYX8nyuf',
|
||||
@ -15,18 +15,26 @@ class GrouponIE(InfoExtractor):
|
||||
},
|
||||
'playlist': [{
|
||||
'info_dict': {
|
||||
'id': 'tubGNycTo_9Uxg82uESj4i61EYX8nyuf',
|
||||
'ext': 'flv',
|
||||
'title': 'Bikram Yoga Huntington Beach | Orange County',
|
||||
'id': 'fk6OhWpXgIQ',
|
||||
'ext': 'mp4',
|
||||
'title': 'Bikram Yoga Huntington Beach | Orange County !tubGNycTo@9Uxg82uESj4i61EYX8nyuf',
|
||||
'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
|
||||
'duration': 44.961,
|
||||
'duration': 45,
|
||||
'upload_date': '20160405',
|
||||
'uploader_id': 'groupon',
|
||||
'uploader': 'Groupon',
|
||||
},
|
||||
}],
|
||||
'params': {
|
||||
'skip_download': 'HDS',
|
||||
'skip_download': True,
|
||||
}
|
||||
}
|
||||
|
||||
_PROVIDERS = {
|
||||
'ooyala': ('ooyala:%s', 'Ooyala'),
|
||||
'youtube': ('%s', 'Youtube'),
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
playlist_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, playlist_id)
|
||||
@ -36,12 +44,17 @@ class GrouponIE(InfoExtractor):
|
||||
videos = payload['carousel'].get('dealVideos', [])
|
||||
entries = []
|
||||
for v in videos:
|
||||
if v.get('provider') != 'OOYALA':
|
||||
provider = v.get('provider')
|
||||
video_id = v.get('media') or v.get('id') or v.get('baseURL')
|
||||
if not provider or not video_id:
|
||||
continue
|
||||
url_pattern, ie_key = self._PROVIDERS.get(provider.lower())
|
||||
if not url_pattern:
|
||||
self.report_warning(
|
||||
'%s: Unsupported video provider %s, skipping video' %
|
||||
(playlist_id, v.get('provider')))
|
||||
(playlist_id, provider))
|
||||
continue
|
||||
entries.append(self.url_result('ooyala:%s' % v['media']))
|
||||
entries.append(self.url_result(url_pattern % video_id, ie_key))
|
||||
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
|
@ -7,6 +7,7 @@ from .common import InfoExtractor
|
||||
from ..compat import compat_urlparse
|
||||
from ..utils import (
|
||||
HEADRequest,
|
||||
KNOWN_EXTENSIONS,
|
||||
sanitized_Request,
|
||||
str_to_int,
|
||||
urlencode_postdata,
|
||||
@ -17,7 +18,7 @@ from ..utils import (
|
||||
class HearThisAtIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?hearthis\.at/(?P<artist>[^/]+)/(?P<title>[A-Za-z0-9\-]+)/?$'
|
||||
_PLAYLIST_URL = 'https://hearthis.at/playlist.php'
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
'url': 'https://hearthis.at/moofi/dr-kreep',
|
||||
'md5': 'ab6ec33c8fed6556029337c7885eb4e0',
|
||||
'info_dict': {
|
||||
@ -26,7 +27,7 @@ class HearThisAtIE(InfoExtractor):
|
||||
'title': 'Moofi - Dr. Kreep',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'timestamp': 1421564134,
|
||||
'description': 'Creepy Patch. Mutable Instruments Braids Vowel + Formant Mode.',
|
||||
'description': 'Listen to Dr. Kreep by Moofi on hearthis.at - Modular, Eurorack, Mutable Intruments Braids, Valhalla-DSP',
|
||||
'upload_date': '20150118',
|
||||
'comment_count': int,
|
||||
'view_count': int,
|
||||
@ -34,7 +35,25 @@ class HearThisAtIE(InfoExtractor):
|
||||
'duration': 71,
|
||||
'categories': ['Experimental'],
|
||||
}
|
||||
}
|
||||
}, {
|
||||
# 'download' link redirects to the original webpage
|
||||
'url': 'https://hearthis.at/twitchsf/dj-jim-hopkins-totally-bitchin-80s-dance-mix/',
|
||||
'md5': '5980ceb7c461605d30f1f039df160c6e',
|
||||
'info_dict': {
|
||||
'id': '811296',
|
||||
'ext': 'mp3',
|
||||
'title': 'TwitchSF - DJ Jim Hopkins - Totally Bitchin\' 80\'s Dance Mix!',
|
||||
'description': 'Listen to DJ Jim Hopkins - Totally Bitchin\' 80\'s Dance Mix! by TwitchSF on hearthis.at - Dance',
|
||||
'upload_date': '20160328',
|
||||
'timestamp': 1459186146,
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'comment_count': int,
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'duration': 4360,
|
||||
'categories': ['Dance'],
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
m = re.match(self._VALID_URL, url)
|
||||
@ -90,13 +109,14 @@ class HearThisAtIE(InfoExtractor):
|
||||
ext_handle = self._request_webpage(
|
||||
ext_req, display_id, note='Determining extension')
|
||||
ext = urlhandle_detect_ext(ext_handle)
|
||||
formats.append({
|
||||
'format_id': 'download',
|
||||
'vcodec': 'none',
|
||||
'ext': ext,
|
||||
'url': download_url,
|
||||
'preference': 2, # Usually better quality
|
||||
})
|
||||
if ext in KNOWN_EXTENSIONS:
|
||||
formats.append({
|
||||
'format_id': 'download',
|
||||
'vcodec': 'none',
|
||||
'ext': ext,
|
||||
'url': download_url,
|
||||
'preference': 2, # Usually better quality
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
|
@ -1,10 +1,10 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
mimetype2ext,
|
||||
qualities,
|
||||
)
|
||||
|
||||
@ -12,9 +12,9 @@ from ..utils import (
|
||||
class ImdbIE(InfoExtractor):
|
||||
IE_NAME = 'imdb'
|
||||
IE_DESC = 'Internet Movie Database trailers'
|
||||
_VALID_URL = r'https?://(?:www|m)\.imdb\.com/video/imdb/vi(?P<id>\d+)'
|
||||
_VALID_URL = r'https?://(?:www|m)\.imdb\.com/video/[^/]+/vi(?P<id>\d+)'
|
||||
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
'url': 'http://www.imdb.com/video/imdb/vi2524815897',
|
||||
'info_dict': {
|
||||
'id': '2524815897',
|
||||
@ -22,7 +22,10 @@ class ImdbIE(InfoExtractor):
|
||||
'title': 'Ice Age: Continental Drift Trailer (No. 2) - IMDb',
|
||||
'description': 'md5:9061c2219254e5d14e03c25c98e96a81',
|
||||
}
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.imdb.com/video/_/vi2524815897',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
@ -48,13 +51,27 @@ class ImdbIE(InfoExtractor):
|
||||
json_data = self._search_regex(
|
||||
r'<script[^>]+class="imdb-player-data"[^>]*?>(.*?)</script>',
|
||||
format_page, 'json data', flags=re.DOTALL)
|
||||
info = json.loads(json_data)
|
||||
format_info = info['videoPlayerObject']['video']
|
||||
f_id = format_info['ffname']
|
||||
info = self._parse_json(json_data, video_id, fatal=False)
|
||||
if not info:
|
||||
continue
|
||||
format_info = info.get('videoPlayerObject', {}).get('video', {})
|
||||
if not format_info:
|
||||
continue
|
||||
video_info_list = format_info.get('videoInfoList')
|
||||
if not video_info_list or not isinstance(video_info_list, list):
|
||||
continue
|
||||
video_info = video_info_list[0]
|
||||
if not video_info or not isinstance(video_info, dict):
|
||||
continue
|
||||
video_url = video_info.get('videoUrl')
|
||||
if not video_url:
|
||||
continue
|
||||
format_id = format_info.get('ffname')
|
||||
formats.append({
|
||||
'format_id': f_id,
|
||||
'url': format_info['videoInfoList'][0]['videoUrl'],
|
||||
'quality': quality(f_id),
|
||||
'format_id': format_id,
|
||||
'url': video_url,
|
||||
'ext': mimetype2ext(video_info.get('videoMimeType')),
|
||||
'quality': quality(format_id),
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
|
@ -505,7 +505,10 @@ class IqiyiIE(InfoExtractor):
|
||||
'enc': md5_text(enc_key + tail),
|
||||
'qyid': _uuid,
|
||||
'tn': random.random(),
|
||||
'um': 0,
|
||||
# In iQiyi's flash player, um is set to 1 if there's a logged user
|
||||
# Some 1080P formats are only available with a logged user.
|
||||
# Here force um=1 to trick the iQiyi server
|
||||
'um': 1,
|
||||
'authkey': md5_text(md5_text('') + tail),
|
||||
'k_tag': 1,
|
||||
}
|
||||
|
@ -11,7 +11,7 @@ class MGTVIE(InfoExtractor):
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.mgtv.com/v/1/290525/f/3116640.html',
|
||||
'md5': '',
|
||||
'md5': '1bdadcf760a0b90946ca68ee9a2db41a',
|
||||
'info_dict': {
|
||||
'id': '3116640',
|
||||
'ext': 'mp4',
|
||||
@ -20,15 +20,6 @@ class MGTVIE(InfoExtractor):
|
||||
'duration': 7461,
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True, # m3u8 download
|
||||
},
|
||||
}
|
||||
|
||||
_FORMAT_MAP = {
|
||||
'标清': ('Standard', 0),
|
||||
'高清': ('High', 1),
|
||||
'超清': ('SuperHigh', 2),
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
@ -40,17 +31,27 @@ class MGTVIE(InfoExtractor):
|
||||
|
||||
formats = []
|
||||
for idx, stream in enumerate(api_data['stream']):
|
||||
format_name = stream.get('name')
|
||||
format_id, preference = self._FORMAT_MAP.get(format_name, (None, None))
|
||||
format_info = self._download_json(
|
||||
stream['url'], video_id,
|
||||
note='Download video info for format %s' % format_id or '#%d' % idx)
|
||||
formats.append({
|
||||
'format_id': format_id,
|
||||
'url': format_info['info'],
|
||||
'ext': 'mp4', # These are m3u8 playlists
|
||||
'preference': preference,
|
||||
})
|
||||
stream_url = stream.get('url')
|
||||
if not stream_url:
|
||||
continue
|
||||
tbr = int_or_none(self._search_regex(
|
||||
r'(\d+)\.mp4', stream_url, 'tbr', default=None))
|
||||
|
||||
def extract_format(stream_url, format_id, idx, query={}):
|
||||
format_info = self._download_json(
|
||||
stream_url, video_id,
|
||||
note='Download video info for format %s' % format_id or '#%d' % idx, query=query)
|
||||
return {
|
||||
'format_id': format_id,
|
||||
'url': format_info['info'],
|
||||
'ext': 'mp4',
|
||||
'tbr': tbr,
|
||||
}
|
||||
|
||||
formats.append(extract_format(
|
||||
stream_url, 'hls-%d' % tbr if tbr else None, idx * 2))
|
||||
formats.append(extract_format(stream_url.replace(
|
||||
'/playlist.m3u8', ''), 'http-%d' % tbr if tbr else None, idx * 2 + 1, {'pno': 1031}))
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
|
@ -4,91 +4,217 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urlparse,
|
||||
compat_urllib_parse_unquote,
|
||||
)
|
||||
from ..compat import compat_urllib_parse_unquote
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
parse_age_limit,
|
||||
parse_duration,
|
||||
unified_strdate,
|
||||
)
|
||||
|
||||
|
||||
class NRKIE(InfoExtractor):
|
||||
_VALID_URL = r'(?:nrk:|https?://(?:www\.)?nrk\.no/video/PS\*)(?P<id>\d+)'
|
||||
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://www.nrk.no/video/PS*150533',
|
||||
# MD5 is unstable
|
||||
'info_dict': {
|
||||
'id': '150533',
|
||||
'ext': 'flv',
|
||||
'title': 'Dompap og andre fugler i Piip-Show',
|
||||
'description': 'md5:d9261ba34c43b61c812cb6b0269a5c8f',
|
||||
'duration': 263,
|
||||
}
|
||||
},
|
||||
{
|
||||
'url': 'http://www.nrk.no/video/PS*154915',
|
||||
# MD5 is unstable
|
||||
'info_dict': {
|
||||
'id': '154915',
|
||||
'ext': 'flv',
|
||||
'title': 'Slik høres internett ut når du er blind',
|
||||
'description': 'md5:a621f5cc1bd75c8d5104cb048c6b8568',
|
||||
'duration': 20,
|
||||
}
|
||||
},
|
||||
]
|
||||
class NRKBaseIE(InfoExtractor):
|
||||
def _extract_formats(self, manifest_url, video_id, fatal=True):
|
||||
formats = []
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
manifest_url + '?hdcore=3.5.0&plugin=aasp-3.5.0.151.81',
|
||||
video_id, f4m_id='hds', fatal=fatal))
|
||||
formats.extend(self._extract_m3u8_formats(manifest_url.replace(
|
||||
'akamaihd.net/z/', 'akamaihd.net/i/').replace('/manifest.f4m', '/master.m3u8'),
|
||||
video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=fatal))
|
||||
return formats
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
data = self._download_json(
|
||||
'http://v8.psapi.nrk.no/mediaelement/%s' % video_id,
|
||||
video_id, 'Downloading media JSON')
|
||||
'http://%s/mediaelement/%s' % (self._API_HOST, video_id),
|
||||
video_id, 'Downloading mediaelement JSON')
|
||||
|
||||
media_url = data.get('mediaUrl')
|
||||
title = data.get('fullTitle') or data.get('mainTitle') or data['title']
|
||||
video_id = data.get('id') or video_id
|
||||
|
||||
if not media_url:
|
||||
if data['usageRights']['isGeoBlocked']:
|
||||
entries = []
|
||||
|
||||
media_assets = data.get('mediaAssets')
|
||||
if media_assets and isinstance(media_assets, list):
|
||||
def video_id_and_title(idx):
|
||||
return ((video_id, title) if len(media_assets) == 1
|
||||
else ('%s-%d' % (video_id, idx), '%s (Part %d)' % (title, idx)))
|
||||
for num, asset in enumerate(media_assets, 1):
|
||||
asset_url = asset.get('url')
|
||||
if not asset_url:
|
||||
continue
|
||||
formats = self._extract_formats(asset_url, video_id, fatal=False)
|
||||
if not formats:
|
||||
continue
|
||||
self._sort_formats(formats)
|
||||
entry_id, entry_title = video_id_and_title(num)
|
||||
duration = parse_duration(asset.get('duration'))
|
||||
subtitles = {}
|
||||
for subtitle in ('webVtt', 'timedText'):
|
||||
subtitle_url = asset.get('%sSubtitlesUrl' % subtitle)
|
||||
if subtitle_url:
|
||||
subtitles.setdefault('no', []).append({'url': subtitle_url})
|
||||
entries.append({
|
||||
'id': asset.get('carrierId') or entry_id,
|
||||
'title': entry_title,
|
||||
'duration': duration,
|
||||
'subtitles': subtitles,
|
||||
'formats': formats,
|
||||
})
|
||||
|
||||
if not entries:
|
||||
media_url = data.get('mediaUrl')
|
||||
if media_url:
|
||||
formats = self._extract_formats(media_url, video_id)
|
||||
self._sort_formats(formats)
|
||||
duration = parse_duration(data.get('duration'))
|
||||
entries = [{
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'duration': duration,
|
||||
'formats': formats,
|
||||
}]
|
||||
|
||||
if not entries:
|
||||
if data.get('usageRights', {}).get('isGeoBlocked'):
|
||||
raise ExtractorError(
|
||||
'NRK har ikke rettigheter til å vise dette programmet utenfor Norge',
|
||||
expected=True)
|
||||
|
||||
if determine_ext(media_url) == 'f4m':
|
||||
formats = self._extract_f4m_formats(
|
||||
media_url + '?hdcore=3.5.0&plugin=aasp-3.5.0.151.81', video_id, f4m_id='hds')
|
||||
self._sort_formats(formats)
|
||||
else:
|
||||
formats = [{
|
||||
'url': media_url,
|
||||
'ext': 'flv',
|
||||
}]
|
||||
|
||||
duration = parse_duration(data.get('duration'))
|
||||
conviva = data.get('convivaStatistics') or {}
|
||||
series = conviva.get('seriesName') or data.get('seriesTitle')
|
||||
episode = conviva.get('episodeName') or data.get('episodeNumberOrDate')
|
||||
|
||||
thumbnails = None
|
||||
images = data.get('images')
|
||||
if images:
|
||||
thumbnails = images['webImages']
|
||||
thumbnails.sort(key=lambda image: image['pixelWidth'])
|
||||
thumbnail = thumbnails[-1]['imageUrl']
|
||||
else:
|
||||
thumbnail = None
|
||||
if images and isinstance(images, dict):
|
||||
web_images = images.get('webImages')
|
||||
if isinstance(web_images, list):
|
||||
thumbnails = [{
|
||||
'url': image['imageUrl'],
|
||||
'width': int_or_none(image.get('width')),
|
||||
'height': int_or_none(image.get('height')),
|
||||
} for image in web_images if image.get('imageUrl')]
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': data['title'],
|
||||
'description': data['description'],
|
||||
'duration': duration,
|
||||
'thumbnail': thumbnail,
|
||||
'formats': formats,
|
||||
description = data.get('description')
|
||||
|
||||
common_info = {
|
||||
'description': description,
|
||||
'series': series,
|
||||
'episode': episode,
|
||||
'age_limit': parse_age_limit(data.get('legalAge')),
|
||||
'thumbnails': thumbnails,
|
||||
}
|
||||
|
||||
vcodec = 'none' if data.get('mediaType') == 'Audio' else None
|
||||
|
||||
# TODO: extract chapters when https://github.com/rg3/youtube-dl/pull/9409 is merged
|
||||
|
||||
for entry in entries:
|
||||
entry.update(common_info)
|
||||
for f in entry['formats']:
|
||||
f['vcodec'] = vcodec
|
||||
|
||||
return self.playlist_result(entries, video_id, title, description)
|
||||
|
||||
|
||||
class NRKIE(NRKBaseIE):
|
||||
_VALID_URL = r'(?:nrk:|https?://(?:www\.)?nrk\.no/video/PS\*)(?P<id>\d+)'
|
||||
_API_HOST = 'v8.psapi.nrk.no'
|
||||
_TESTS = [{
|
||||
# video
|
||||
'url': 'http://www.nrk.no/video/PS*150533',
|
||||
'md5': '2f7f6eeb2aacdd99885f355428715cfa',
|
||||
'info_dict': {
|
||||
'id': '150533',
|
||||
'ext': 'mp4',
|
||||
'title': 'Dompap og andre fugler i Piip-Show',
|
||||
'description': 'md5:d9261ba34c43b61c812cb6b0269a5c8f',
|
||||
'duration': 263,
|
||||
}
|
||||
}, {
|
||||
# audio
|
||||
'url': 'http://www.nrk.no/video/PS*154915',
|
||||
# MD5 is unstable
|
||||
'info_dict': {
|
||||
'id': '154915',
|
||||
'ext': 'flv',
|
||||
'title': 'Slik høres internett ut når du er blind',
|
||||
'description': 'md5:a621f5cc1bd75c8d5104cb048c6b8568',
|
||||
'duration': 20,
|
||||
}
|
||||
}]
|
||||
|
||||
|
||||
class NRKTVIE(NRKBaseIE):
|
||||
IE_DESC = 'NRK TV and NRK Radio'
|
||||
_VALID_URL = r'https?://(?:tv|radio)\.nrk(?:super)?\.no/(?:serie/[^/]+|program)/(?P<id>[a-zA-Z]{4}\d{8})(?:/\d{2}-\d{2}-\d{4})?(?:#del=(?P<part_id>\d+))?'
|
||||
_API_HOST = 'psapi-we.nrk.no'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://tv.nrk.no/serie/20-spoersmaal-tv/MUHH48000314/23-05-2014',
|
||||
'md5': '4e9ca6629f09e588ed240fb11619922a',
|
||||
'info_dict': {
|
||||
'id': 'MUHH48000314AA',
|
||||
'ext': 'mp4',
|
||||
'title': '20 spørsmål 23.05.2014',
|
||||
'description': 'md5:bdea103bc35494c143c6a9acdd84887a',
|
||||
'duration': 1741.52,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://tv.nrk.no/program/mdfp15000514',
|
||||
'md5': '43d0be26663d380603a9cf0c24366531',
|
||||
'info_dict': {
|
||||
'id': 'MDFP15000514CA',
|
||||
'ext': 'mp4',
|
||||
'title': 'Grunnlovsjubiléet - Stor ståhei for ingenting 24.05.2014',
|
||||
'description': 'md5:89290c5ccde1b3a24bb8050ab67fe1db',
|
||||
'duration': 4605.08,
|
||||
},
|
||||
}, {
|
||||
# single playlist video
|
||||
'url': 'https://tv.nrk.no/serie/tour-de-ski/MSPO40010515/06-01-2015#del=2',
|
||||
'md5': 'adbd1dbd813edaf532b0a253780719c2',
|
||||
'info_dict': {
|
||||
'id': 'MSPO40010515-part2',
|
||||
'ext': 'flv',
|
||||
'title': 'Tour de Ski: Sprint fri teknikk, kvinner og menn 06.01.2015 (del 2:2)',
|
||||
'description': 'md5:238b67b97a4ac7d7b4bf0edf8cc57d26',
|
||||
},
|
||||
'skip': 'Only works from Norway',
|
||||
}, {
|
||||
'url': 'https://tv.nrk.no/serie/tour-de-ski/MSPO40010515/06-01-2015',
|
||||
'playlist': [{
|
||||
'md5': '9480285eff92d64f06e02a5367970a7a',
|
||||
'info_dict': {
|
||||
'id': 'MSPO40010515-part1',
|
||||
'ext': 'flv',
|
||||
'title': 'Tour de Ski: Sprint fri teknikk, kvinner og menn 06.01.2015 (del 1:2)',
|
||||
'description': 'md5:238b67b97a4ac7d7b4bf0edf8cc57d26',
|
||||
},
|
||||
}, {
|
||||
'md5': 'adbd1dbd813edaf532b0a253780719c2',
|
||||
'info_dict': {
|
||||
'id': 'MSPO40010515-part2',
|
||||
'ext': 'flv',
|
||||
'title': 'Tour de Ski: Sprint fri teknikk, kvinner og menn 06.01.2015 (del 2:2)',
|
||||
'description': 'md5:238b67b97a4ac7d7b4bf0edf8cc57d26',
|
||||
},
|
||||
}],
|
||||
'info_dict': {
|
||||
'id': 'MSPO40010515',
|
||||
'title': 'Tour de Ski: Sprint fri teknikk, kvinner og menn',
|
||||
'description': 'md5:238b67b97a4ac7d7b4bf0edf8cc57d26',
|
||||
'duration': 6947.52,
|
||||
},
|
||||
'skip': 'Only works from Norway',
|
||||
}, {
|
||||
'url': 'https://radio.nrk.no/serie/dagsnytt/NPUB21019315/12-07-2015#',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
|
||||
class NRKPlaylistIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?nrk\.no/(?!video|skole)(?:[^/]+/)+(?P<id>[^/]+)'
|
||||
@ -159,179 +285,3 @@ class NRKSkoleIE(InfoExtractor):
|
||||
|
||||
nrk_id = self._search_regex(r'data-nrk-id=["\'](\d+)', webpage, 'nrk id')
|
||||
return self.url_result('nrk:%s' % nrk_id)
|
||||
|
||||
|
||||
class NRKTVIE(InfoExtractor):
|
||||
IE_DESC = 'NRK TV and NRK Radio'
|
||||
_VALID_URL = r'(?P<baseurl>https?://(?:tv|radio)\.nrk(?:super)?\.no/)(?:serie/[^/]+|program)/(?P<id>[a-zA-Z]{4}\d{8})(?:/\d{2}-\d{2}-\d{4})?(?:#del=(?P<part_id>\d+))?'
|
||||
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'https://tv.nrk.no/serie/20-spoersmaal-tv/MUHH48000314/23-05-2014',
|
||||
'info_dict': {
|
||||
'id': 'MUHH48000314',
|
||||
'ext': 'mp4',
|
||||
'title': '20 spørsmål',
|
||||
'description': 'md5:bdea103bc35494c143c6a9acdd84887a',
|
||||
'upload_date': '20140523',
|
||||
'duration': 1741.52,
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'https://tv.nrk.no/program/mdfp15000514',
|
||||
'info_dict': {
|
||||
'id': 'mdfp15000514',
|
||||
'ext': 'mp4',
|
||||
'title': 'Grunnlovsjubiléet - Stor ståhei for ingenting',
|
||||
'description': 'md5:654c12511f035aed1e42bdf5db3b206a',
|
||||
'upload_date': '20140524',
|
||||
'duration': 4605.08,
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
{
|
||||
# single playlist video
|
||||
'url': 'https://tv.nrk.no/serie/tour-de-ski/MSPO40010515/06-01-2015#del=2',
|
||||
'md5': 'adbd1dbd813edaf532b0a253780719c2',
|
||||
'info_dict': {
|
||||
'id': 'MSPO40010515-part2',
|
||||
'ext': 'flv',
|
||||
'title': 'Tour de Ski: Sprint fri teknikk, kvinner og menn 06.01.2015 (del 2:2)',
|
||||
'description': 'md5:238b67b97a4ac7d7b4bf0edf8cc57d26',
|
||||
'upload_date': '20150106',
|
||||
},
|
||||
'skip': 'Only works from Norway',
|
||||
},
|
||||
{
|
||||
'url': 'https://tv.nrk.no/serie/tour-de-ski/MSPO40010515/06-01-2015',
|
||||
'playlist': [
|
||||
{
|
||||
'md5': '9480285eff92d64f06e02a5367970a7a',
|
||||
'info_dict': {
|
||||
'id': 'MSPO40010515-part1',
|
||||
'ext': 'flv',
|
||||
'title': 'Tour de Ski: Sprint fri teknikk, kvinner og menn 06.01.2015 (del 1:2)',
|
||||
'description': 'md5:238b67b97a4ac7d7b4bf0edf8cc57d26',
|
||||
'upload_date': '20150106',
|
||||
},
|
||||
},
|
||||
{
|
||||
'md5': 'adbd1dbd813edaf532b0a253780719c2',
|
||||
'info_dict': {
|
||||
'id': 'MSPO40010515-part2',
|
||||
'ext': 'flv',
|
||||
'title': 'Tour de Ski: Sprint fri teknikk, kvinner og menn 06.01.2015 (del 2:2)',
|
||||
'description': 'md5:238b67b97a4ac7d7b4bf0edf8cc57d26',
|
||||
'upload_date': '20150106',
|
||||
},
|
||||
},
|
||||
],
|
||||
'info_dict': {
|
||||
'id': 'MSPO40010515',
|
||||
'title': 'Tour de Ski: Sprint fri teknikk, kvinner og menn',
|
||||
'description': 'md5:238b67b97a4ac7d7b4bf0edf8cc57d26',
|
||||
'upload_date': '20150106',
|
||||
'duration': 6947.5199999999995,
|
||||
},
|
||||
'skip': 'Only works from Norway',
|
||||
},
|
||||
{
|
||||
'url': 'https://radio.nrk.no/serie/dagsnytt/NPUB21019315/12-07-2015#',
|
||||
'only_matching': True,
|
||||
}
|
||||
]
|
||||
|
||||
def _extract_f4m(self, manifest_url, video_id):
|
||||
return self._extract_f4m_formats(
|
||||
manifest_url + '?hdcore=3.1.1&plugin=aasp-3.1.1.69.124', video_id, f4m_id='hds')
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
part_id = mobj.group('part_id')
|
||||
base_url = mobj.group('baseurl')
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
title = self._html_search_meta(
|
||||
'title', webpage, 'title')
|
||||
description = self._html_search_meta(
|
||||
'description', webpage, 'description')
|
||||
|
||||
thumbnail = self._html_search_regex(
|
||||
r'data-posterimage="([^"]+)"',
|
||||
webpage, 'thumbnail', fatal=False)
|
||||
upload_date = unified_strdate(self._html_search_meta(
|
||||
'rightsfrom', webpage, 'upload date', fatal=False))
|
||||
duration = float_or_none(self._html_search_regex(
|
||||
r'data-duration="([^"]+)"',
|
||||
webpage, 'duration', fatal=False))
|
||||
|
||||
# playlist
|
||||
parts = re.findall(
|
||||
r'<a href="#del=(\d+)"[^>]+data-argument="([^"]+)">([^<]+)</a>', webpage)
|
||||
if parts:
|
||||
entries = []
|
||||
for current_part_id, stream_url, part_title in parts:
|
||||
if part_id and current_part_id != part_id:
|
||||
continue
|
||||
video_part_id = '%s-part%s' % (video_id, current_part_id)
|
||||
formats = self._extract_f4m(stream_url, video_part_id)
|
||||
entries.append({
|
||||
'id': video_part_id,
|
||||
'title': part_title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'upload_date': upload_date,
|
||||
'formats': formats,
|
||||
})
|
||||
if part_id:
|
||||
if entries:
|
||||
return entries[0]
|
||||
else:
|
||||
playlist = self.playlist_result(entries, video_id, title, description)
|
||||
playlist.update({
|
||||
'thumbnail': thumbnail,
|
||||
'upload_date': upload_date,
|
||||
'duration': duration,
|
||||
})
|
||||
return playlist
|
||||
|
||||
formats = []
|
||||
|
||||
f4m_url = re.search(r'data-media="([^"]+)"', webpage)
|
||||
if f4m_url:
|
||||
formats.extend(self._extract_f4m(f4m_url.group(1), video_id))
|
||||
|
||||
m3u8_url = re.search(r'data-hls-media="([^"]+)"', webpage)
|
||||
if m3u8_url:
|
||||
formats.extend(self._extract_m3u8_formats(m3u8_url.group(1), video_id, 'mp4', m3u8_id='hls'))
|
||||
self._sort_formats(formats)
|
||||
|
||||
subtitles_url = self._html_search_regex(
|
||||
r'data-subtitlesurl\s*=\s*(["\'])(?P<url>.+?)\1',
|
||||
webpage, 'subtitle URL', default=None, group='url')
|
||||
subtitles = {}
|
||||
if subtitles_url:
|
||||
subtitles['no'] = [{
|
||||
'ext': 'ttml',
|
||||
'url': compat_urlparse.urljoin(base_url, subtitles_url),
|
||||
}]
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'upload_date': upload_date,
|
||||
'duration': duration,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
@ -100,7 +100,7 @@ class OpenloadIE(InfoExtractor):
|
||||
raise ExtractorError('File not found', expected=True)
|
||||
|
||||
code = self._search_regex(
|
||||
r'<video[^>]+>\s*<script[^>]+>([^<]+)</script>',
|
||||
r'</video>\s*</div>\s*<script[^>]+>([^<]+)</script>',
|
||||
webpage, 'JS code')
|
||||
|
||||
decoded = self.openload_decode(code)
|
||||
|
@ -12,8 +12,8 @@ from ..utils import (
|
||||
|
||||
|
||||
class OraTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?ora\.tv/([^/]+/)*(?P<id>[^/\?#]+)'
|
||||
_TEST = {
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:ora\.tv|unsafespeech\.com)/([^/]+/)*(?P<id>[^/\?#]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.ora.tv/larrykingnow/2015/12/16/vine-youtube-stars-zach-king-king-bach-on-their-viral-videos-0_36jupg6090pq',
|
||||
'md5': 'fa33717591c631ec93b04b0e330df786',
|
||||
'info_dict': {
|
||||
@ -22,7 +22,10 @@ class OraTVIE(InfoExtractor):
|
||||
'title': 'Vine & YouTube Stars Zach King & King Bach On Their Viral Videos!',
|
||||
'description': 'md5:ebbc5b1424dd5dba7be7538148287ac1',
|
||||
}
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.unsafespeech.com/video/2016/5/10/student-self-censorship-and-the-thought-police-on-university-campuses-0_6622bnkppw4d',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
|
@ -4,28 +4,35 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urllib_parse_urlencode
|
||||
from ..utils import sanitized_Request
|
||||
from ..utils import (
|
||||
HEADRequest,
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
update_url_query,
|
||||
qualities,
|
||||
get_element_by_attribute,
|
||||
clean_html,
|
||||
)
|
||||
|
||||
|
||||
class SinaIE(InfoExtractor):
|
||||
_VALID_URL = r'''(?x)https?://(.*?\.)?video\.sina\.com\.cn/
|
||||
(
|
||||
(.+?/(((?P<pseudo_id>\d+).html)|(.*?(\#|(vid=)|b/)(?P<id>\d+?)($|&|\-))))
|
||||
|
|
||||
_VALID_URL = r'''(?x)https?://(?:.*?\.)?video\.sina\.com\.cn/
|
||||
(?:
|
||||
(?:view/|.*\#)(?P<video_id>\d+)|
|
||||
.+?/(?P<pseudo_id>[^/?#]+)(?:\.s?html)|
|
||||
# This is used by external sites like Weibo
|
||||
(api/sinawebApi/outplay.php/(?P<token>.+?)\.swf)
|
||||
api/sinawebApi/outplay.php/(?P<token>.+?)\.swf
|
||||
)
|
||||
'''
|
||||
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://video.sina.com.cn/news/vlist/zt/chczlj2013/?opsubject_id=top12#110028898',
|
||||
'md5': 'd65dd22ddcf44e38ce2bf58a10c3e71f',
|
||||
'url': 'http://video.sina.com.cn/news/spj/topvideoes20160504/?opsubject_id=top1#250576622',
|
||||
'md5': 'd38433e2fc886007729735650ae4b3e9',
|
||||
'info_dict': {
|
||||
'id': '110028898',
|
||||
'ext': 'flv',
|
||||
'title': '《中国新闻》 朝鲜要求巴拿马立即释放被扣船员',
|
||||
'id': '250576622',
|
||||
'ext': 'mp4',
|
||||
'title': '现场:克鲁兹宣布退选 特朗普将稳获提名',
|
||||
}
|
||||
},
|
||||
{
|
||||
@ -35,37 +42,74 @@ class SinaIE(InfoExtractor):
|
||||
'ext': 'flv',
|
||||
'title': '军方提高对朝情报监视级别',
|
||||
},
|
||||
'skip': 'the page does not exist or has been deleted',
|
||||
},
|
||||
{
|
||||
'url': 'http://video.sina.com.cn/view/250587748.html',
|
||||
'md5': '3d1807a25c775092aab3bc157fff49b4',
|
||||
'info_dict': {
|
||||
'id': '250587748',
|
||||
'ext': 'mp4',
|
||||
'title': '瞬间泪目:8年前汶川地震珍贵视频首曝光',
|
||||
},
|
||||
},
|
||||
]
|
||||
|
||||
def _extract_video(self, video_id):
|
||||
data = compat_urllib_parse_urlencode({'vid': video_id})
|
||||
url_doc = self._download_xml('http://v.iask.com/v_play.php?%s' % data,
|
||||
video_id, 'Downloading video url')
|
||||
image_page = self._download_webpage(
|
||||
'http://interface.video.sina.com.cn/interface/common/getVideoImage.php?%s' % data,
|
||||
video_id, 'Downloading thumbnail info')
|
||||
|
||||
return {'id': video_id,
|
||||
'url': url_doc.find('./durl/url').text,
|
||||
'ext': 'flv',
|
||||
'title': url_doc.find('./vname').text,
|
||||
'thumbnail': image_page.split('=')[1],
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
if mobj.group('token') is not None:
|
||||
# The video id is in the redirected url
|
||||
self.to_screen('Getting video id')
|
||||
request = sanitized_Request(url)
|
||||
request.get_method = lambda: 'HEAD'
|
||||
(_, urlh) = self._download_webpage_handle(request, 'NA', False)
|
||||
return self._real_extract(urlh.geturl())
|
||||
elif video_id is None:
|
||||
pseudo_id = mobj.group('pseudo_id')
|
||||
webpage = self._download_webpage(url, pseudo_id)
|
||||
video_id = self._search_regex(r'vid:\'(\d+?)\'', webpage, 'video id')
|
||||
|
||||
return self._extract_video(video_id)
|
||||
video_id = mobj.group('video_id')
|
||||
if not video_id:
|
||||
if mobj.group('token') is not None:
|
||||
# The video id is in the redirected url
|
||||
self.to_screen('Getting video id')
|
||||
request = HEADRequest(url)
|
||||
(_, urlh) = self._download_webpage_handle(request, 'NA', False)
|
||||
return self._real_extract(urlh.geturl())
|
||||
else:
|
||||
pseudo_id = mobj.group('pseudo_id')
|
||||
webpage = self._download_webpage(url, pseudo_id)
|
||||
error = get_element_by_attribute('class', 'errtitle', webpage)
|
||||
if error:
|
||||
raise ExtractorError('%s said: %s' % (
|
||||
self.IE_NAME, clean_html(error)), expected=True)
|
||||
video_id = self._search_regex(
|
||||
r"video_id\s*:\s*'(\d+)'", webpage, 'video id')
|
||||
|
||||
video_data = self._download_json(
|
||||
'http://s.video.sina.com.cn/video/h5play',
|
||||
video_id, query={'video_id': video_id})
|
||||
if video_data['code'] != 1:
|
||||
raise ExtractorError('%s said: %s' % (
|
||||
self.IE_NAME, video_data['message']), expected=True)
|
||||
else:
|
||||
video_data = video_data['data']
|
||||
title = video_data['title']
|
||||
description = video_data.get('description')
|
||||
if description:
|
||||
description = description.strip()
|
||||
|
||||
preference = qualities(['cif', 'sd', 'hd', 'fhd', 'ffd'])
|
||||
formats = []
|
||||
for quality_id, quality in video_data.get('videos', {}).get('mp4', {}).items():
|
||||
file_api = quality.get('file_api')
|
||||
file_id = quality.get('file_id')
|
||||
if not file_api or not file_id:
|
||||
continue
|
||||
formats.append({
|
||||
'format_id': quality_id,
|
||||
'url': update_url_query(file_api, {'vid': file_id}),
|
||||
'preference': preference(quality_id),
|
||||
'ext': 'mp4',
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': video_data.get('image'),
|
||||
'duration': int_or_none(video_data.get('length')),
|
||||
'timestamp': int_or_none(video_data.get('create_time')),
|
||||
'formats': formats,
|
||||
}
|
||||
|
@ -88,7 +88,7 @@ class TeamcocoIE(InfoExtractor):
|
||||
preload_codes = self._html_search_regex(
|
||||
r'(function.+)setTimeout\(function\(\)\{playlist',
|
||||
webpage, 'preload codes')
|
||||
base64_fragments = re.findall(r'"([a-zA-z0-9+/=]+)"', preload_codes)
|
||||
base64_fragments = re.findall(r'"([a-zA-Z0-9+/=]+)"', preload_codes)
|
||||
base64_fragments.remove('init')
|
||||
|
||||
def _check_sequence(cur_fragments):
|
||||
|
139
youtube_dl/extractor/threeqsdn.py
Normal file
139
youtube_dl/extractor/threeqsdn.py
Normal file
@ -0,0 +1,139 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
js_to_json,
|
||||
mimetype2ext,
|
||||
)
|
||||
|
||||
|
||||
class ThreeQSDNIE(InfoExtractor):
|
||||
IE_NAME = '3qsdn'
|
||||
IE_DESC = '3Q SDN'
|
||||
_VALID_URL = r'https?://playout\.3qsdn\.com/(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})'
|
||||
_TESTS = [{
|
||||
# ondemand from http://www.philharmonie.tv/veranstaltung/26/
|
||||
'url': 'http://playout.3qsdn.com/0280d6b9-1215-11e6-b427-0cc47a188158?protocol=http',
|
||||
'md5': 'ab040e37bcfa2e0c079f92cb1dd7f6cd',
|
||||
'info_dict': {
|
||||
'id': '0280d6b9-1215-11e6-b427-0cc47a188158',
|
||||
'ext': 'mp4',
|
||||
'title': '0280d6b9-1215-11e6-b427-0cc47a188158',
|
||||
'is_live': False,
|
||||
},
|
||||
'expected_warnings': ['Failed to download MPD manifest'],
|
||||
}, {
|
||||
# live video stream
|
||||
'url': 'https://playout.3qsdn.com/d755d94b-4ab9-11e3-9162-0025907ad44f?js=true',
|
||||
'info_dict': {
|
||||
'id': 'd755d94b-4ab9-11e3-9162-0025907ad44f',
|
||||
'ext': 'mp4',
|
||||
'title': 'd755d94b-4ab9-11e3-9162-0025907ad44f',
|
||||
'is_live': False,
|
||||
},
|
||||
}, {
|
||||
# live audio stream
|
||||
'url': 'http://playout.3qsdn.com/9edf36e0-6bf2-11e2-a16a-9acf09e2db48',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# live audio stream with some 404 URLs
|
||||
'url': 'http://playout.3qsdn.com/ac5c3186-777a-11e2-9c30-9acf09e2db48',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# geo restricted with 'This content is not available in your country'
|
||||
'url': 'http://playout.3qsdn.com/d63a3ffe-75e8-11e2-9c30-9acf09e2db48',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# geo restricted with 'playout.3qsdn.com/forbidden'
|
||||
'url': 'http://playout.3qsdn.com/8e330f26-6ae2-11e2-a16a-9acf09e2db48',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# live video with rtmp link
|
||||
'url': 'https://playout.3qsdn.com/6092bb9e-8f72-11e4-a173-002590c750be',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
def _extract_url(webpage):
|
||||
mobj = re.search(
|
||||
r'<iframe[^>]+\b(?:data-)?src=(["\'])(?P<url>%s.*?)\1' % ThreeQSDNIE._VALID_URL, webpage)
|
||||
if mobj:
|
||||
return mobj.group('url')
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
js = self._download_webpage(
|
||||
'http://playout.3qsdn.com/%s' % video_id, video_id,
|
||||
query={'js': 'true'})
|
||||
|
||||
if any(p in js for p in (
|
||||
'>This content is not available in your country',
|
||||
'playout.3qsdn.com/forbidden')):
|
||||
self.raise_geo_restricted()
|
||||
|
||||
stream_content = self._search_regex(
|
||||
r'streamContent\s*:\s*(["\'])(?P<content>.+?)\1', js,
|
||||
'stream content', default='demand', group='content')
|
||||
|
||||
live = stream_content == 'live'
|
||||
|
||||
stream_type = self._search_regex(
|
||||
r'streamType\s*:\s*(["\'])(?P<type>audio|video)\1', js,
|
||||
'stream type', default='video', group='type')
|
||||
|
||||
formats = []
|
||||
urls = set()
|
||||
|
||||
def extract_formats(item_url, item={}):
|
||||
if not item_url or item_url in urls:
|
||||
return
|
||||
urls.add(item_url)
|
||||
type_ = item.get('type')
|
||||
ext = determine_ext(item_url, default_ext=None)
|
||||
if type_ == 'application/dash+xml' or ext == 'mpd':
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
item_url, video_id, mpd_id='mpd', fatal=False))
|
||||
elif type_ in ('application/vnd.apple.mpegURL', 'application/x-mpegurl') or ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
item_url, video_id, 'mp4',
|
||||
entry_protocol='m3u8' if live else 'm3u8_native',
|
||||
m3u8_id='hls', fatal=False))
|
||||
elif ext == 'f4m':
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
item_url, video_id, f4m_id='hds', fatal=False))
|
||||
else:
|
||||
if not self._is_valid_url(item_url, video_id):
|
||||
return
|
||||
formats.append({
|
||||
'url': item_url,
|
||||
'format_id': item.get('quality'),
|
||||
'ext': 'mp4' if item_url.startswith('rtsp') else mimetype2ext(type_) or ext,
|
||||
'vcodec': 'none' if stream_type == 'audio' else None,
|
||||
})
|
||||
|
||||
for item_js in re.findall(r'({.*?\b(?:src|source)\s*:\s*["\'].+?})', js):
|
||||
f = self._parse_json(
|
||||
item_js, video_id, transform_source=js_to_json, fatal=False)
|
||||
if not f:
|
||||
continue
|
||||
extract_formats(f.get('src'), f)
|
||||
|
||||
# More relaxed version to collect additional URLs and acting
|
||||
# as a future-proof fallback
|
||||
for _, src in re.findall(r'\b(?:src|source)\s*:\s*(["\'])((?:https?|rtsp)://.+?)\1', js):
|
||||
extract_formats(src)
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
title = self._live_title(video_id) if live else video_id
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'is_live': live,
|
||||
'formats': formats,
|
||||
}
|
@ -171,6 +171,7 @@ class TwitchVideoIE(TwitchItemBaseIE):
|
||||
'title': 'Worlds Semifinals - Star Horn Royal Club vs. OMG',
|
||||
},
|
||||
'playlist_mincount': 12,
|
||||
'skip': 'HTTP Error 404: Not Found',
|
||||
}
|
||||
|
||||
|
||||
@ -187,6 +188,7 @@ class TwitchChapterIE(TwitchItemBaseIE):
|
||||
'title': 'ACRL Off Season - Sports Cars @ Nordschleife',
|
||||
},
|
||||
'playlist_mincount': 3,
|
||||
'skip': 'HTTP Error 404: Not Found',
|
||||
}, {
|
||||
'url': 'http://www.twitch.tv/tsm_theoddone/c/2349361',
|
||||
'only_matching': True,
|
||||
@ -355,31 +357,6 @@ class TwitchPastBroadcastsIE(TwitchPlaylistBaseIE):
|
||||
}
|
||||
|
||||
|
||||
class TwitchBookmarksIE(TwitchPlaylistBaseIE):
|
||||
IE_NAME = 'twitch:bookmarks'
|
||||
_VALID_URL = r'%s/(?P<id>[^/]+)/profile/bookmarks/?(?:\#.*)?$' % TwitchBaseIE._VALID_URL_BASE
|
||||
_PLAYLIST_URL = '%s/api/bookmark/?user=%%s&offset=%%d&limit=%%d' % TwitchBaseIE._API_BASE
|
||||
_PLAYLIST_TYPE = 'bookmarks'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.twitch.tv/ognos/profile/bookmarks',
|
||||
'info_dict': {
|
||||
'id': 'ognos',
|
||||
'title': 'Ognos',
|
||||
},
|
||||
'playlist_mincount': 3,
|
||||
}
|
||||
|
||||
def _extract_playlist_page(self, response):
|
||||
entries = []
|
||||
for bookmark in response.get('bookmarks', []):
|
||||
video = bookmark.get('video')
|
||||
if not video:
|
||||
continue
|
||||
entries.append(video['url'])
|
||||
return entries
|
||||
|
||||
|
||||
class TwitchStreamIE(TwitchBaseIE):
|
||||
IE_NAME = 'twitch:stream'
|
||||
_VALID_URL = r'%s/(?P<id>[^/#?]+)/?(?:\#.*)?$' % TwitchBaseIE._VALID_URL_BASE
|
||||
|
@ -6,10 +6,12 @@ from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
unified_strdate,
|
||||
unescapeHTML,
|
||||
)
|
||||
|
||||
|
||||
class UstudioIE(InfoExtractor):
|
||||
IE_NAME = 'ustudio'
|
||||
_VALID_URL = r'https?://(?:(?:www|v1)\.)?ustudio\.com/video/(?P<id>[^/]+)/(?P<display_id>[^/?#&]+)'
|
||||
_TEST = {
|
||||
'url': 'http://ustudio.com/video/Uxu2my9bgSph/san_francisco_golden_gate_bridge',
|
||||
@ -27,9 +29,7 @@ class UstudioIE(InfoExtractor):
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
display_id = mobj.group('display_id')
|
||||
video_id, display_id = re.match(self._VALID_URL, url).groups()
|
||||
|
||||
config = self._download_xml(
|
||||
'http://v1.ustudio.com/embed/%s/ustudio/config.xml' % video_id,
|
||||
@ -37,7 +37,7 @@ class UstudioIE(InfoExtractor):
|
||||
|
||||
def extract(kind):
|
||||
return [{
|
||||
'url': item.attrib['url'],
|
||||
'url': unescapeHTML(item.attrib['url']),
|
||||
'width': int_or_none(item.get('width')),
|
||||
'height': int_or_none(item.get('height')),
|
||||
} for item in config.findall('./qualities/quality/%s' % kind) if item.get('url')]
|
||||
@ -65,3 +65,61 @@ class UstudioIE(InfoExtractor):
|
||||
'uploader': uploader,
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
|
||||
class UstudioEmbedIE(InfoExtractor):
|
||||
IE_NAME = 'ustudio:embed'
|
||||
_VALID_URL = r'https?://(?:(?:app|embed)\.)?ustudio\.com/embed/(?P<uid>[^/]+)/(?P<id>[^/]+)'
|
||||
_TEST = {
|
||||
'url': 'http://app.ustudio.com/embed/DeN7VdYRDKhP/Uw7G1kMCe65T',
|
||||
'md5': '47c0be52a09b23a7f40de9469cec58f4',
|
||||
'info_dict': {
|
||||
'id': 'Uw7G1kMCe65T',
|
||||
'ext': 'mp4',
|
||||
'title': '5 Things IT Should Know About Video',
|
||||
'description': 'md5:93d32650884b500115e158c5677d25ad',
|
||||
'uploader_id': 'DeN7VdYRDKhP',
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
uploader_id, video_id = re.match(self._VALID_URL, url).groups()
|
||||
video_data = self._download_json(
|
||||
'http://app.ustudio.com/embed/%s/%s/config.json' % (uploader_id, video_id),
|
||||
video_id)['videos'][0]
|
||||
title = video_data['name']
|
||||
|
||||
formats = []
|
||||
for ext, qualities in video_data.get('transcodes', {}).items():
|
||||
for quality in qualities:
|
||||
quality_url = quality.get('url')
|
||||
if not quality_url:
|
||||
continue
|
||||
height = int_or_none(quality.get('height'))
|
||||
formats.append({
|
||||
'format_id': '%s-%dp' % (ext, height) if height else ext,
|
||||
'url': quality_url,
|
||||
'width': int_or_none(quality.get('width')),
|
||||
'height': height,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
thumbnails = []
|
||||
for image in video_data.get('images', []):
|
||||
image_url = image.get('url')
|
||||
if not image_url:
|
||||
continue
|
||||
thumbnails.append({
|
||||
'url': image_url,
|
||||
})
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': video_data.get('description'),
|
||||
'duration': int_or_none(video_data.get('duration')),
|
||||
'uploader_id': uploader_id,
|
||||
'tags': video_data.get('keywords'),
|
||||
'thumbnails': thumbnails,
|
||||
'formats': formats,
|
||||
}
|
||||
|
@ -213,19 +213,17 @@ class VevoIE(VevoBaseIE):
|
||||
formats = []
|
||||
|
||||
if not video_info:
|
||||
if response and response.get('statusCode') != 909:
|
||||
try:
|
||||
self._initialize_api(video_id)
|
||||
except ExtractorError:
|
||||
ytid = response.get('errorInfo', {}).get('ytid')
|
||||
if ytid:
|
||||
self.report_warning(
|
||||
'Video is geoblocked, trying with the YouTube video %s' % ytid)
|
||||
return self.url_result(ytid, 'Youtube', ytid)
|
||||
|
||||
if 'statusMessage' in response:
|
||||
raise ExtractorError('%s said: %s' % (
|
||||
self.IE_NAME, response['statusMessage']), expected=True)
|
||||
raise ExtractorError('Unable to extract videos')
|
||||
raise
|
||||
|
||||
self._initialize_api(video_id)
|
||||
video_info = self._call_api(
|
||||
'video/%s' % video_id, video_id, 'Downloading api video info',
|
||||
'Failed to download video info')
|
||||
|
@ -8,7 +8,6 @@ from ..utils import (
|
||||
clean_html,
|
||||
ExtractorError,
|
||||
determine_ext,
|
||||
sanitized_Request,
|
||||
)
|
||||
|
||||
|
||||
@ -25,8 +24,6 @@ class XVideosIE(InfoExtractor):
|
||||
}
|
||||
}
|
||||
|
||||
_ANDROID_USER_AGENT = 'Mozilla/5.0 (Linux; Android 4.0.4; Galaxy Nexus Build/IMM76B) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/18.0.1025.133 Mobile Safari/535.19'
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
@ -35,31 +32,34 @@ class XVideosIE(InfoExtractor):
|
||||
if mobj:
|
||||
raise ExtractorError('%s said: %s' % (self.IE_NAME, clean_html(mobj.group(1))), expected=True)
|
||||
|
||||
video_url = compat_urllib_parse_unquote(
|
||||
self._search_regex(r'flv_url=(.+?)&', webpage, 'video URL'))
|
||||
video_title = self._html_search_regex(
|
||||
r'<title>(.*?)\s+-\s+XVID', webpage, 'title')
|
||||
video_thumbnail = self._search_regex(
|
||||
r'url_bigthumb=(.+?)&', webpage, 'thumbnail', fatal=False)
|
||||
|
||||
formats = [{
|
||||
'url': video_url,
|
||||
}]
|
||||
formats = []
|
||||
|
||||
android_req = sanitized_Request(url)
|
||||
android_req.add_header('User-Agent', self._ANDROID_USER_AGENT)
|
||||
android_webpage = self._download_webpage(android_req, video_id, fatal=False)
|
||||
video_url = compat_urllib_parse_unquote(self._search_regex(
|
||||
r'flv_url=(.+?)&', webpage, 'video URL', default=''))
|
||||
if video_url:
|
||||
formats.append({'url': video_url})
|
||||
|
||||
if android_webpage is not None:
|
||||
player_params_str = self._search_regex(
|
||||
'mobileReplacePlayerDivTwoQual\(([^)]+)\)',
|
||||
android_webpage, 'player parameters', default='')
|
||||
player_params = list(map(lambda s: s.strip(' \''), player_params_str.split(',')))
|
||||
if player_params:
|
||||
formats.extend([{
|
||||
'url': param,
|
||||
'preference': -10,
|
||||
} for param in player_params if determine_ext(param) == 'mp4'])
|
||||
player_args = self._search_regex(
|
||||
r'(?s)new\s+HTML5Player\((.+?)\)', webpage, ' html5 player', default=None)
|
||||
if player_args:
|
||||
for arg in player_args.split(','):
|
||||
format_url = self._search_regex(
|
||||
r'(["\'])(?P<url>https?://.+?)\1', arg, 'url',
|
||||
default=None, group='url')
|
||||
if not format_url:
|
||||
continue
|
||||
ext = determine_ext(format_url)
|
||||
if ext == 'mp4':
|
||||
formats.append({'url': format_url})
|
||||
elif ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
format_url, video_id, 'mp4',
|
||||
entry_protocol='m3u8_native', m3u8_id='hls', fatal=False))
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
@ -67,7 +67,6 @@ class XVideosIE(InfoExtractor):
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'title': video_title,
|
||||
'ext': 'flv',
|
||||
'thumbnail': video_thumbnail,
|
||||
'age_limit': 18,
|
||||
}
|
||||
|
@ -3,7 +3,7 @@ from __future__ import unicode_literals
|
||||
import subprocess
|
||||
|
||||
from .common import PostProcessor
|
||||
from ..compat import shlex_quote
|
||||
from ..compat import compat_shlex_quote
|
||||
from ..utils import PostProcessingError
|
||||
|
||||
|
||||
@ -17,7 +17,7 @@ class ExecAfterDownloadPP(PostProcessor):
|
||||
if '{}' not in cmd:
|
||||
cmd += ' {}'
|
||||
|
||||
cmd = cmd.replace('{}', shlex_quote(information['filepath']))
|
||||
cmd = cmd.replace('{}', compat_shlex_quote(information['filepath']))
|
||||
|
||||
self._downloader.to_screen('[exec] Executing command: %s' % cmd)
|
||||
retCode = subprocess.call(cmd, shell=True)
|
||||
|
@ -42,6 +42,7 @@ from .compat import (
|
||||
compat_http_client,
|
||||
compat_kwargs,
|
||||
compat_parse_qs,
|
||||
compat_shlex_quote,
|
||||
compat_socket_create_connection,
|
||||
compat_str,
|
||||
compat_struct_pack,
|
||||
@ -49,10 +50,10 @@ from .compat import (
|
||||
compat_urllib_parse,
|
||||
compat_urllib_parse_urlencode,
|
||||
compat_urllib_parse_urlparse,
|
||||
compat_urllib_parse_unquote_plus,
|
||||
compat_urllib_request,
|
||||
compat_urlparse,
|
||||
compat_xpath,
|
||||
shlex_quote,
|
||||
)
|
||||
|
||||
from .socks import (
|
||||
@ -104,9 +105,9 @@ KNOWN_EXTENSIONS = (
|
||||
'f4f', 'f4m', 'm3u8', 'smil')
|
||||
|
||||
# needed for sanitizing filenames in restricted mode
|
||||
ACCENT_CHARS = dict(zip('ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõöøùúûüýþÿ',
|
||||
itertools.chain('AAAAAA', ['AE'], 'CEEEEIIIIDNOOOOOOUUUUYP', ['ss'],
|
||||
'aaaaaa', ['ae'], 'ceeeeiiiionoooooouuuuypy')))
|
||||
ACCENT_CHARS = dict(zip('ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØŒÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõöøœùúûüýþÿ',
|
||||
itertools.chain('AAAAAA', ['AE'], 'CEEEEIIIIDNOOOOOO', ['OE'], 'UUUUYP', ['ss'],
|
||||
'aaaaaa', ['ae'], 'ceeeeiiiionoooooo', ['oe'], 'uuuuypy')))
|
||||
|
||||
|
||||
def preferredencoding():
|
||||
@ -882,11 +883,17 @@ def make_socks_conn_class(base_class, socks_proxy):
|
||||
elif url_components.scheme.lower() == 'socks4a':
|
||||
socks_type = ProxyType.SOCKS4A
|
||||
|
||||
def unquote_if_non_empty(s):
|
||||
if not s:
|
||||
return s
|
||||
return compat_urllib_parse_unquote_plus(s)
|
||||
|
||||
proxy_args = (
|
||||
socks_type,
|
||||
url_components.hostname, url_components.port or 1080,
|
||||
True, # Remote DNS
|
||||
url_components.username, url_components.password
|
||||
unquote_if_non_empty(url_components.username),
|
||||
unquote_if_non_empty(url_components.password),
|
||||
)
|
||||
|
||||
class SocksConnection(base_class):
|
||||
@ -1912,24 +1919,38 @@ def js_to_json(code):
|
||||
v = m.group(0)
|
||||
if v in ('true', 'false', 'null'):
|
||||
return v
|
||||
if v.startswith('"'):
|
||||
v = re.sub(r"\\'", "'", v[1:-1])
|
||||
elif v.startswith("'"):
|
||||
v = v[1:-1]
|
||||
v = re.sub(r"\\\\|\\'|\"", lambda m: {
|
||||
'\\\\': '\\\\',
|
||||
"\\'": "'",
|
||||
elif v.startswith('/*') or v == ',':
|
||||
return ""
|
||||
|
||||
if v[0] in ("'", '"'):
|
||||
v = re.sub(r'(?s)\\.|"', lambda m: {
|
||||
'"': '\\"',
|
||||
}[m.group(0)], v)
|
||||
"\\'": "'",
|
||||
'\\\n': '',
|
||||
'\\x': '\\u00',
|
||||
}.get(m.group(0), m.group(0)), v[1:-1])
|
||||
|
||||
INTEGER_TABLE = (
|
||||
(r'^0[xX][0-9a-fA-F]+', 16),
|
||||
(r'^0+[0-7]+', 8),
|
||||
)
|
||||
|
||||
for regex, base in INTEGER_TABLE:
|
||||
im = re.match(regex, v)
|
||||
if im:
|
||||
i = int(im.group(0), base)
|
||||
return '"%d":' % i if v.endswith(':') else '%d' % i
|
||||
|
||||
return '"%s"' % v
|
||||
|
||||
res = re.sub(r'''(?x)
|
||||
"(?:[^"\\]*(?:\\\\|\\['"nu]))*[^"\\]*"|
|
||||
'(?:[^'\\]*(?:\\\\|\\['"nu]))*[^'\\]*'|
|
||||
[a-zA-Z_][.a-zA-Z_0-9]*
|
||||
return re.sub(r'''(?sx)
|
||||
"(?:[^"\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^"\\]*"|
|
||||
'(?:[^'\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^'\\]*'|
|
||||
/\*.*?\*/|,(?=\s*[\]}])|
|
||||
[a-zA-Z_][.a-zA-Z_0-9]*|
|
||||
(?:0[xX][0-9a-fA-F]+|0+[0-7]+)(?:\s*:)?|
|
||||
[0-9]+(?=\s*:)
|
||||
''', fix_kv, code)
|
||||
res = re.sub(r',(\s*[\]}])', lambda m: m.group(1), res)
|
||||
return res
|
||||
|
||||
|
||||
def qualities(quality_ids):
|
||||
@ -1977,7 +1998,7 @@ def ytdl_is_updateable():
|
||||
|
||||
def args_to_str(args):
|
||||
# Get a short string representation for a subprocess command
|
||||
return ' '.join(shlex_quote(a) for a in args)
|
||||
return ' '.join(compat_shlex_quote(a) for a in args)
|
||||
|
||||
|
||||
def error_to_compat_str(err):
|
||||
@ -2015,11 +2036,7 @@ def mimetype2ext(mt):
|
||||
|
||||
|
||||
def urlhandle_detect_ext(url_handle):
|
||||
try:
|
||||
url_handle.headers
|
||||
getheader = lambda h: url_handle.headers[h]
|
||||
except AttributeError: # Python < 3
|
||||
getheader = url_handle.info().getheader
|
||||
getheader = url_handle.headers.get
|
||||
|
||||
cd = getheader('Content-Disposition')
|
||||
if cd:
|
||||
|
@ -1,3 +1,3 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
__version__ = '2016.05.10'
|
||||
__version__ = '2016.05.16'
|
||||
|
Loading…
x
Reference in New Issue
Block a user