This commit is contained in:
Gilles Habran 2016-05-17 09:08:34 +02:00
commit a07fcf5328
34 changed files with 786 additions and 690 deletions

View File

@ -6,8 +6,8 @@
---
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.05.10*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.05.10**
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.05.16*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.05.16**
### Before submitting an *issue* make sure you have:
- [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
@ -35,7 +35,7 @@ $ youtube-dl -v <your command line>
[debug] User config: []
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
[debug] youtube-dl version 2016.05.10
[debug] youtube-dl version 2016.05.16
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
[debug] Proxy map: {}

View File

@ -418,7 +418,7 @@ which means you can modify it, redistribute it or use it however you like.
# CONFIGURATION
You can configure youtube-dl by placing any supported command line option to a configuration file. On Linux, the system wide configuration file is located at `/etc/youtube-dl.conf` and the user wide configuration file at `~/.config/youtube-dl/config`. On Windows, the user wide configuration file locations are `%APPDATA%\youtube-dl\config.txt` or `C:\Users\<user name>\youtube-dl.conf`.
You can configure youtube-dl by placing any supported command line option to a configuration file. On Linux and OS X, the system wide configuration file is located at `/etc/youtube-dl.conf` and the user wide configuration file at `~/.config/youtube-dl/config`. On Windows, the user wide configuration file locations are `%APPDATA%\youtube-dl\config.txt` or `C:\Users\<user name>\youtube-dl.conf`.
For example, with the following configuration file youtube-dl will always extract the audio, not copy the mtime, use a proxy and save all videos under `Movies` directory in your home directory:
```

View File

@ -6,6 +6,7 @@
- **22tracks:genre**
- **22tracks:track**
- **24video**
- **3qsdn**: 3Q SDN
- **3sat**
- **4tube**
- **56.com**
@ -114,7 +115,6 @@
- **chirbit**
- **chirbit:profile**
- **Cinchcast**
- **Cinemassacre**
- **Clipfish**
- **cliphunter**
- **ClipRs**
@ -128,7 +128,6 @@
- **CNN**
- **CNNArticle**
- **CNNBlogs**
- **CollegeHumor**
- **CollegeRama**
- **ComCarCoff**
- **ComedyCentral**
@ -680,7 +679,6 @@
- **tvp.pl:Series**
- **TVPlay**: TV3Play and related services
- **Tweakers**
- **twitch:bookmarks**
- **twitch:chapter**
- **twitch:past_broadcasts**
- **twitch:profile**
@ -698,7 +696,8 @@
- **USAToday**
- **ustream**
- **ustream:channel**
- **Ustudio**
- **ustudio**
- **ustudio:embed**
- **Varzesh3**
- **Vbox7**
- **VeeHD**

View File

@ -77,17 +77,28 @@ class TestMultipleSocks(unittest.TestCase):
class TestSocks(unittest.TestCase):
_SKIP_SOCKS_TEST = True
def setUp(self):
if self._SKIP_SOCKS_TEST:
return
self.port = random.randint(20000, 30000)
self.server_process = subprocess.Popen([
'srelay', '-f', '-i', '127.0.0.1:%d' % self.port],
stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
def tearDown(self):
if self._SKIP_SOCKS_TEST:
return
self.server_process.terminate()
self.server_process.communicate()
def _get_ip(self, protocol):
if self._SKIP_SOCKS_TEST:
return '127.0.0.1'
ydl = FakeYDL({
'proxy': '%s://127.0.0.1:%d' % (protocol, self.port),
})

View File

@ -155,8 +155,8 @@ class TestUtil(unittest.TestCase):
self.assertTrue(sanitize_filename(':', restricted=True) != '')
self.assertEqual(sanitize_filename(
'ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõöøùúûüýþÿ', restricted=True),
'AAAAAAAECEEEEIIIIDNOOOOOOUUUUYPssaaaaaaaeceeeeiiiionoooooouuuuypy')
'ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØŒÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõöøœùúûüýþÿ', restricted=True),
'AAAAAAAECEEEEIIIIDNOOOOOOOEUUUUYPssaaaaaaaeceeeeiiiionoooooooeuuuuypy')
def test_sanitize_ids(self):
self.assertEqual(sanitize_filename('_n_cd26wFpw', is_id=True), '_n_cd26wFpw')
@ -617,6 +617,15 @@ class TestUtil(unittest.TestCase):
json_code = js_to_json(inp)
self.assertEqual(json.loads(json_code), json.loads(inp))
inp = '''{
0:{src:'skipped', type: 'application/dash+xml'},
1:{src:'skipped', type: 'application/vnd.apple.mpegURL'},
}'''
self.assertEqual(js_to_json(inp), '''{
"0":{"src":"skipped", "type": "application/dash+xml"},
"1":{"src":"skipped", "type": "application/vnd.apple.mpegURL"}
}''')
def test_js_to_json_edgecases(self):
on = js_to_json("{abc_def:'1\\'\\\\2\\\\\\'3\"4'}")
self.assertEqual(json.loads(on), {"abc_def": "1'\\2\\'3\"4"})
@ -640,6 +649,27 @@ class TestUtil(unittest.TestCase):
on = js_to_json('{"abc": "def",}')
self.assertEqual(json.loads(on), {'abc': 'def'})
on = js_to_json('{ 0: /* " \n */ ",]" , }')
self.assertEqual(json.loads(on), {'0': ',]'})
on = js_to_json(r'["<p>x<\/p>"]')
self.assertEqual(json.loads(on), ['<p>x</p>'])
on = js_to_json(r'["\xaa"]')
self.assertEqual(json.loads(on), ['\u00aa'])
on = js_to_json("['a\\\nb']")
self.assertEqual(json.loads(on), ['ab'])
on = js_to_json('{0xff:0xff}')
self.assertEqual(json.loads(on), {'255': 255})
on = js_to_json('{077:077}')
self.assertEqual(json.loads(on), {'63': 63})
on = js_to_json('{42:42}')
self.assertEqual(json.loads(on), {'42': 42})
def test_extract_attributes(self):
self.assertEqual(extract_attributes('<e x="y">'), {'x': 'y'})
self.assertEqual(extract_attributes("<e x='y'>"), {'x': 'y'})

View File

@ -326,7 +326,7 @@ class YoutubeDL(object):
['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
self._output_channel = os.fdopen(master, 'rb')
except OSError as ose:
if ose.errno == 2:
if ose.errno == errno.ENOENT:
self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that fribidi is an executable file in one of the directories in your $PATH.')
else:
raise
@ -720,6 +720,7 @@ class YoutubeDL(object):
result_type = ie_result.get('_type', 'video')
if result_type in ('url', 'url_transparent'):
ie_result['url'] = sanitize_url(ie_result['url'])
extract_flat = self.params.get('extract_flat', False)
if ((extract_flat == 'in_playlist' and 'playlist' in extra_info) or
extract_flat is True):

View File

@ -67,9 +67,9 @@ def _real_main(argv=None):
# Custom HTTP headers
if opts.headers is not None:
for h in opts.headers:
if h.find(':', 1) < 0:
if ':' not in h:
parser.error('wrong header formatting, it should be key:value, not "%s"' % h)
key, value = h.split(':', 2)
key, value = h.split(':', 1)
if opts.verbose:
write_string('[debug] Adding header from command line option %s:%s\n' % (key, value))
std_headers[key] = value

View File

@ -341,9 +341,9 @@ except ImportError: # Python 2
return parsed_result
try:
from shlex import quote as shlex_quote
from shlex import quote as compat_shlex_quote
except ImportError: # Python < 3.3
def shlex_quote(s):
def compat_shlex_quote(s):
if re.match(r'^[-_\w./]+$', s):
return s
else:
@ -466,18 +466,6 @@ else:
print(s)
try:
subprocess_check_output = subprocess.check_output
except AttributeError:
def subprocess_check_output(*args, **kwargs):
assert 'input' not in kwargs
p = subprocess.Popen(*args, stdout=subprocess.PIPE, **kwargs)
output, _ = p.communicate()
ret = p.poll()
if ret:
raise subprocess.CalledProcessError(ret, p.args, output=output)
return output
if sys.version_info < (3, 0) and sys.platform == 'win32':
def compat_getpass(prompt, *args, **kwargs):
if isinstance(prompt, compat_str):
@ -635,6 +623,7 @@ __all__ = [
'compat_parse_qs',
'compat_print',
'compat_setenv',
'compat_shlex_quote',
'compat_shlex_split',
'compat_socket_create_connection',
'compat_str',
@ -656,7 +645,5 @@ __all__ = [
'compat_urlretrieve',
'compat_xml_parse_error',
'compat_xpath',
'shlex_quote',
'subprocess_check_output',
'workaround_optparse_bug9161',
]

View File

@ -23,26 +23,38 @@ from ..utils import (
)
class DataTruncatedError(Exception):
pass
class FlvReader(io.BytesIO):
"""
Reader for Flv files
The file format is documented in https://www.adobe.com/devnet/f4v.html
"""
def read_bytes(self, n):
data = self.read(n)
if len(data) < n:
raise DataTruncatedError(
'FlvReader error: need %d bytes while only %d bytes got' % (
n, len(data)))
return data
# Utility functions for reading numbers and strings
def read_unsigned_long_long(self):
return compat_struct_unpack('!Q', self.read(8))[0]
return compat_struct_unpack('!Q', self.read_bytes(8))[0]
def read_unsigned_int(self):
return compat_struct_unpack('!I', self.read(4))[0]
return compat_struct_unpack('!I', self.read_bytes(4))[0]
def read_unsigned_char(self):
return compat_struct_unpack('!B', self.read(1))[0]
return compat_struct_unpack('!B', self.read_bytes(1))[0]
def read_string(self):
res = b''
while True:
char = self.read(1)
char = self.read_bytes(1)
if char == b'\x00':
break
res += char
@ -53,18 +65,18 @@ class FlvReader(io.BytesIO):
Read a box and return the info as a tuple: (box_size, box_type, box_data)
"""
real_size = size = self.read_unsigned_int()
box_type = self.read(4)
box_type = self.read_bytes(4)
header_end = 8
if size == 1:
real_size = self.read_unsigned_long_long()
header_end = 16
return real_size, box_type, self.read(real_size - header_end)
return real_size, box_type, self.read_bytes(real_size - header_end)
def read_asrt(self):
# version
self.read_unsigned_char()
# flags
self.read(3)
self.read_bytes(3)
quality_entry_count = self.read_unsigned_char()
# QualityEntryCount
for i in range(quality_entry_count):
@ -85,7 +97,7 @@ class FlvReader(io.BytesIO):
# version
self.read_unsigned_char()
# flags
self.read(3)
self.read_bytes(3)
# time scale
self.read_unsigned_int()
@ -119,7 +131,7 @@ class FlvReader(io.BytesIO):
# version
self.read_unsigned_char()
# flags
self.read(3)
self.read_bytes(3)
self.read_unsigned_int() # BootstrapinfoVersion
# Profile,Live,Update,Reserved
@ -374,7 +386,17 @@ class F4mFD(FragmentFD):
down.close()
reader = FlvReader(down_data)
while True:
try:
_, box_type, box_data = reader.read_box_info()
except DataTruncatedError:
if test:
# In tests, segments may be truncated, and thus
# FlvReader may not be able to parse the whole
# chunk. If so, write the segment as is
# See https://github.com/rg3/youtube-dl/issues/9214
dest_stream.write(down_data)
break
raise
if box_type == b'mdat':
dest_stream.write(box_data)
break

View File

@ -17,6 +17,9 @@ class BloombergIE(InfoExtractor):
'title': 'Shah\'s Presentation on Foreign-Exchange Strategies',
'description': 'md5:a8ba0302912d03d246979735c17d2761',
},
'params': {
'format': 'best[format_id^=hds]',
},
}, {
'url': 'http://www.bloomberg.com/news/articles/2015-11-12/five-strange-things-that-have-been-happening-in-financial-markets',
'only_matching': True,

View File

@ -307,9 +307,10 @@ class BrightcoveLegacyIE(InfoExtractor):
playlist_title=playlist_info['mediaCollectionDTO']['displayName'])
def _extract_video_info(self, video_info):
video_id = compat_str(video_info['id'])
publisher_id = video_info.get('publisherId')
info = {
'id': compat_str(video_info['id']),
'id': video_id,
'title': video_info['displayName'].strip(),
'description': video_info.get('shortDescription'),
'thumbnail': video_info.get('videoStillURL') or video_info.get('thumbnailURL'),
@ -331,7 +332,8 @@ class BrightcoveLegacyIE(InfoExtractor):
url_comp = compat_urllib_parse_urlparse(url)
if url_comp.path.endswith('.m3u8'):
formats.extend(
self._extract_m3u8_formats(url, info['id'], 'mp4'))
self._extract_m3u8_formats(
url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False))
continue
elif 'akamaihd.net' in url_comp.netloc:
# This type of renditions are served through
@ -365,7 +367,7 @@ class BrightcoveLegacyIE(InfoExtractor):
a_format.update({
'format_id': 'hls%s' % ('-%s' % tbr if tbr else ''),
'ext': 'mp4',
'protocol': 'm3u8',
'protocol': 'm3u8_native',
})
formats.append(a_format)
@ -395,7 +397,7 @@ class BrightcoveLegacyIE(InfoExtractor):
return ad_info
if 'url' not in info and not info.get('formats'):
raise ExtractorError('Unable to extract video url for %s' % info['id'])
raise ExtractorError('Unable to extract video url for %s' % video_id)
return info
@ -527,7 +529,7 @@ class BrightcoveNewIE(InfoExtractor):
if not src:
continue
formats.extend(self._extract_m3u8_formats(
src, video_id, 'mp4', m3u8_id='hls', fatal=False))
src, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False))
elif source_type == 'application/dash+xml':
if not src:
continue

View File

@ -1,119 +0,0 @@
# encoding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import ExtractorError
from .screenwavemedia import ScreenwaveMediaIE
class CinemassacreIE(InfoExtractor):
_VALID_URL = 'https?://(?:www\.)?cinemassacre\.com/(?P<date_y>[0-9]{4})/(?P<date_m>[0-9]{2})/(?P<date_d>[0-9]{2})/(?P<display_id>[^?#/]+)'
_TESTS = [
{
'url': 'http://cinemassacre.com/2012/11/10/avgn-the-movie-trailer/',
'md5': 'fde81fbafaee331785f58cd6c0d46190',
'info_dict': {
'id': 'Cinemassacre-19911',
'ext': 'mp4',
'upload_date': '20121110',
'title': '“Angry Video Game Nerd: The Movie” Trailer',
'description': 'md5:fb87405fcb42a331742a0dce2708560b',
},
'params': {
# m3u8 download
'skip_download': True,
},
},
{
'url': 'http://cinemassacre.com/2013/10/02/the-mummys-hand-1940',
'md5': 'd72f10cd39eac4215048f62ab477a511',
'info_dict': {
'id': 'Cinemassacre-521be8ef82b16',
'ext': 'mp4',
'upload_date': '20131002',
'title': 'The Mummys Hand (1940)',
},
'params': {
# m3u8 download
'skip_download': True,
},
},
{
# Youtube embedded video
'url': 'http://cinemassacre.com/2006/12/07/chronologically-confused-about-bad-movie-and-video-game-sequel-titles/',
'md5': 'ec9838a5520ef5409b3e4e42fcb0a3b9',
'info_dict': {
'id': 'OEVzPCY2T-g',
'ext': 'webm',
'title': 'AVGN: Chronologically Confused about Bad Movie and Video Game Sequel Titles',
'upload_date': '20061207',
'uploader': 'Cinemassacre',
'uploader_id': 'JamesNintendoNerd',
'description': 'md5:784734696c2b8b7f4b8625cc799e07f6',
}
},
{
# Youtube embedded video
'url': 'http://cinemassacre.com/2006/09/01/mckids/',
'md5': '7393c4e0f54602ad110c793eb7a6513a',
'info_dict': {
'id': 'FnxsNhuikpo',
'ext': 'webm',
'upload_date': '20060901',
'uploader': 'Cinemassacre Extra',
'description': 'md5:de9b751efa9e45fbaafd9c8a1123ed53',
'uploader_id': 'Cinemassacre',
'title': 'AVGN: McKids',
}
},
{
'url': 'http://cinemassacre.com/2015/05/25/mario-kart-64-nintendo-64-james-mike-mondays/',
'md5': '1376908e49572389e7b06251a53cdd08',
'info_dict': {
'id': 'Cinemassacre-555779690c440',
'ext': 'mp4',
'description': 'Lets Play Mario Kart 64 !! Mario Kart 64 is a classic go-kart racing game released for the Nintendo 64 (N64). Today James & Mike do 4 player Battle Mode with Kyle and Bootsy!',
'title': 'Mario Kart 64 (Nintendo 64) James & Mike Mondays',
'upload_date': '20150525',
},
'params': {
# m3u8 download
'skip_download': True,
},
}
]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
display_id = mobj.group('display_id')
video_date = mobj.group('date_y') + mobj.group('date_m') + mobj.group('date_d')
webpage = self._download_webpage(url, display_id)
playerdata_url = self._search_regex(
[
ScreenwaveMediaIE.EMBED_PATTERN,
r'<iframe[^>]+src="(?P<url>(?:https?:)?//(?:[^.]+\.)?youtube\.com/.+?)"',
],
webpage, 'player data URL', default=None, group='url')
if not playerdata_url:
raise ExtractorError('Unable to find player data')
video_title = self._html_search_regex(
r'<title>(?P<title>.+?)\|', webpage, 'title')
video_description = self._html_search_regex(
r'<div class="entry-content">(?P<description>.+?)</div>',
webpage, 'description', flags=re.DOTALL, fatal=False)
video_thumbnail = self._og_search_thumbnail(webpage)
return {
'_type': 'url_transparent',
'display_id': display_id,
'title': video_title,
'description': video_description,
'upload_date': video_date,
'thumbnail': video_thumbnail,
'url': playerdata_url,
}

View File

@ -1,101 +0,0 @@
from __future__ import unicode_literals
import json
import re
from .common import InfoExtractor
from ..utils import int_or_none
class CollegeHumorIE(InfoExtractor):
_VALID_URL = r'^(?:https?://)?(?:www\.)?collegehumor\.com/(video|embed|e)/(?P<videoid>[0-9]+)/?(?P<shorttitle>.*)$'
_TESTS = [
{
'url': 'http://www.collegehumor.com/video/6902724/comic-con-cosplay-catastrophe',
'md5': 'dcc0f5c1c8be98dc33889a191f4c26bd',
'info_dict': {
'id': '6902724',
'ext': 'mp4',
'title': 'Comic-Con Cosplay Catastrophe',
'description': "Fans get creative this year at San Diego. Too creative. And yes, that's really Joss Whedon.",
'age_limit': 13,
'duration': 187,
},
}, {
'url': 'http://www.collegehumor.com/video/3505939/font-conference',
'md5': '72fa701d8ef38664a4dbb9e2ab721816',
'info_dict': {
'id': '3505939',
'ext': 'mp4',
'title': 'Font Conference',
'description': "This video wasn't long enough, so we made it double-spaced.",
'age_limit': 10,
'duration': 179,
},
}, {
# embedded youtube video
'url': 'http://www.collegehumor.com/embed/6950306',
'info_dict': {
'id': 'Z-bao9fg6Yc',
'ext': 'mp4',
'title': 'Young Americans Think President John F. Kennedy Died THIS MORNING IN A CAR ACCIDENT!!!',
'uploader': 'Mark Dice',
'uploader_id': 'MarkDice',
'description': 'md5:62c3dab9351fac7bb44b53b69511d87f',
'upload_date': '20140127',
},
'params': {
'skip_download': True,
},
'add_ie': ['Youtube'],
},
]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('videoid')
jsonUrl = 'http://www.collegehumor.com/moogaloop/video/' + video_id + '.json'
data = json.loads(self._download_webpage(
jsonUrl, video_id, 'Downloading info JSON'))
vdata = data['video']
if vdata.get('youtubeId') is not None:
return {
'_type': 'url',
'url': vdata['youtubeId'],
'ie_key': 'Youtube',
}
AGE_LIMITS = {'nc17': 18, 'r': 18, 'pg13': 13, 'pg': 10, 'g': 0}
rating = vdata.get('rating')
if rating:
age_limit = AGE_LIMITS.get(rating.lower())
else:
age_limit = None # None = No idea
PREFS = {'high_quality': 2, 'low_quality': 0}
formats = []
for format_key in ('mp4', 'webm'):
for qname, qurl in vdata.get(format_key, {}).items():
formats.append({
'format_id': format_key + '_' + qname,
'url': qurl,
'format': format_key,
'preference': PREFS.get(qname),
})
self._sort_formats(formats)
duration = int_or_none(vdata.get('duration'), 1000)
like_count = int_or_none(vdata.get('likes'))
return {
'id': video_id,
'title': vdata['title'],
'description': vdata.get('description'),
'thumbnail': vdata.get('thumbnail'),
'formats': formats,
'age_limit': age_limit,
'duration': duration,
'like_count': like_count,
}

View File

@ -1139,11 +1139,14 @@ class InfoExtractor(object):
if m3u8_id:
format_id.append(m3u8_id)
last_media_name = last_media.get('NAME') if last_media and last_media.get('TYPE') != 'SUBTITLES' else None
# Despite specification does not mention NAME attribute for
# EXT-X-STREAM-INF it still sometimes may be present
stream_name = last_info.get('NAME') or last_media_name
# Bandwidth of live streams may differ over time thus making
# format_id unpredictable. So it's better to keep provided
# format_id intact.
if not live:
format_id.append(last_media_name if last_media_name else '%d' % (tbr if tbr else len(formats)))
format_id.append(stream_name if stream_name else '%d' % (tbr if tbr else len(formats)))
f = {
'format_id': '-'.join(format_id),
'url': format_url(line.strip()),

View File

@ -124,7 +124,6 @@ from .chirbit import (
ChirbitProfileIE,
)
from .cinchcast import CinchcastIE
from .cinemassacre import CinemassacreIE
from .cliprs import ClipRsIE
from .clipfish import ClipfishIE
from .cliphunter import CliphunterIE
@ -139,7 +138,6 @@ from .cnn import (
CNNBlogsIE,
CNNArticleIE,
)
from .collegehumor import CollegeHumorIE
from .collegerama import CollegeRamaIE
from .comedycentral import ComedyCentralIE, ComedyCentralShowsIE
from .comcarcoff import ComCarCoffIE
@ -767,6 +765,7 @@ from .thesixtyone import TheSixtyOneIE
from .thestar import TheStarIE
from .thisamericanlife import ThisAmericanLifeIE
from .thisav import ThisAVIE
from .threeqsdn import ThreeQSDNIE
from .tinypic import TinyPicIE
from .tlc import TlcDeIE
from .tmz import (
@ -834,7 +833,6 @@ from .twitch import (
TwitchVodIE,
TwitchProfileIE,
TwitchPastBroadcastsIE,
TwitchBookmarksIE,
TwitchStreamIE,
)
from .twitter import (
@ -852,7 +850,10 @@ from .unistra import UnistraIE
from .urort import UrortIE
from .usatoday import USATodayIE
from .ustream import UstreamIE, UstreamChannelIE
from .ustudio import UstudioIE
from .ustudio import (
UstudioIE,
UstudioEmbedIE,
)
from .varzesh3 import Varzesh3IE
from .vbox7 import Vbox7IE
from .veehd import VeeHDIE

View File

@ -61,6 +61,7 @@ from .jwplatform import JWPlatformIE
from .digiteka import DigitekaIE
from .instagram import InstagramIE
from .liveleak import LiveLeakIE
from .threeqsdn import ThreeQSDNIE
class GenericIE(InfoExtractor):
@ -1427,7 +1428,8 @@ class GenericIE(InfoExtractor):
# Site Name | Video Title
# Video Title - Tagline | Site Name
# and so on and so forth; it's just not practical
video_title = self._html_search_regex(
video_title = self._og_search_title(
webpage, default=None) or self._html_search_regex(
r'(?s)<title>(.*?)</title>', webpage, 'video title',
default='video')
@ -1445,6 +1447,9 @@ class GenericIE(InfoExtractor):
video_uploader = self._search_regex(
r'^(?:https?://)?([^/]*)/.*', url, 'video uploader')
video_description = self._og_search_description(webpage, default=None)
video_thumbnail = self._og_search_thumbnail(webpage, default=None)
# Helper method
def _playlist_from_matches(matches, getter=None, ie=None):
urlrs = orderedSet(
@ -1983,6 +1988,19 @@ class GenericIE(InfoExtractor):
if liveleak_url:
return self.url_result(liveleak_url, 'LiveLeak')
# Look for 3Q SDN embeds
threeqsdn_url = ThreeQSDNIE._extract_url(webpage)
if threeqsdn_url:
return {
'_type': 'url_transparent',
'ie_key': ThreeQSDNIE.ie_key(),
'url': self._proto_relative_url(threeqsdn_url),
'title': video_title,
'description': video_description,
'thumbnail': video_thumbnail,
'uploader': video_uploader,
}
def check_video(vurl):
if YoutubeIE.suitable(vurl):
return True

View File

@ -4,7 +4,7 @@ from .common import InfoExtractor
class GrouponIE(InfoExtractor):
_VALID_URL = r'https?://www\.groupon\.com/deals/(?P<id>[^?#]+)'
_VALID_URL = r'https?://(?:www\.)?groupon\.com/deals/(?P<id>[^/?#&]+)'
_TEST = {
'url': 'https://www.groupon.com/deals/bikram-yoga-huntington-beach-2#ooid=tubGNycTo_9Uxg82uESj4i61EYX8nyuf',
@ -15,18 +15,26 @@ class GrouponIE(InfoExtractor):
},
'playlist': [{
'info_dict': {
'id': 'tubGNycTo_9Uxg82uESj4i61EYX8nyuf',
'ext': 'flv',
'title': 'Bikram Yoga Huntington Beach | Orange County',
'id': 'fk6OhWpXgIQ',
'ext': 'mp4',
'title': 'Bikram Yoga Huntington Beach | Orange County !tubGNycTo@9Uxg82uESj4i61EYX8nyuf',
'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
'duration': 44.961,
'duration': 45,
'upload_date': '20160405',
'uploader_id': 'groupon',
'uploader': 'Groupon',
},
}],
'params': {
'skip_download': 'HDS',
'skip_download': True,
}
}
_PROVIDERS = {
'ooyala': ('ooyala:%s', 'Ooyala'),
'youtube': ('%s', 'Youtube'),
}
def _real_extract(self, url):
playlist_id = self._match_id(url)
webpage = self._download_webpage(url, playlist_id)
@ -36,12 +44,17 @@ class GrouponIE(InfoExtractor):
videos = payload['carousel'].get('dealVideos', [])
entries = []
for v in videos:
if v.get('provider') != 'OOYALA':
provider = v.get('provider')
video_id = v.get('media') or v.get('id') or v.get('baseURL')
if not provider or not video_id:
continue
url_pattern, ie_key = self._PROVIDERS.get(provider.lower())
if not url_pattern:
self.report_warning(
'%s: Unsupported video provider %s, skipping video' %
(playlist_id, v.get('provider')))
(playlist_id, provider))
continue
entries.append(self.url_result('ooyala:%s' % v['media']))
entries.append(self.url_result(url_pattern % video_id, ie_key))
return {
'_type': 'playlist',

View File

@ -7,6 +7,7 @@ from .common import InfoExtractor
from ..compat import compat_urlparse
from ..utils import (
HEADRequest,
KNOWN_EXTENSIONS,
sanitized_Request,
str_to_int,
urlencode_postdata,
@ -17,7 +18,7 @@ from ..utils import (
class HearThisAtIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?hearthis\.at/(?P<artist>[^/]+)/(?P<title>[A-Za-z0-9\-]+)/?$'
_PLAYLIST_URL = 'https://hearthis.at/playlist.php'
_TEST = {
_TESTS = [{
'url': 'https://hearthis.at/moofi/dr-kreep',
'md5': 'ab6ec33c8fed6556029337c7885eb4e0',
'info_dict': {
@ -26,7 +27,7 @@ class HearThisAtIE(InfoExtractor):
'title': 'Moofi - Dr. Kreep',
'thumbnail': 're:^https?://.*\.jpg$',
'timestamp': 1421564134,
'description': 'Creepy Patch. Mutable Instruments Braids Vowel + Formant Mode.',
'description': 'Listen to Dr. Kreep by Moofi on hearthis.at - Modular, Eurorack, Mutable Intruments Braids, Valhalla-DSP',
'upload_date': '20150118',
'comment_count': int,
'view_count': int,
@ -34,7 +35,25 @@ class HearThisAtIE(InfoExtractor):
'duration': 71,
'categories': ['Experimental'],
}
}
}, {
# 'download' link redirects to the original webpage
'url': 'https://hearthis.at/twitchsf/dj-jim-hopkins-totally-bitchin-80s-dance-mix/',
'md5': '5980ceb7c461605d30f1f039df160c6e',
'info_dict': {
'id': '811296',
'ext': 'mp3',
'title': 'TwitchSF - DJ Jim Hopkins - Totally Bitchin\' 80\'s Dance Mix!',
'description': 'Listen to DJ Jim Hopkins - Totally Bitchin\' 80\'s Dance Mix! by TwitchSF on hearthis.at - Dance',
'upload_date': '20160328',
'timestamp': 1459186146,
'thumbnail': 're:^https?://.*\.jpg$',
'comment_count': int,
'view_count': int,
'like_count': int,
'duration': 4360,
'categories': ['Dance'],
},
}]
def _real_extract(self, url):
m = re.match(self._VALID_URL, url)
@ -90,6 +109,7 @@ class HearThisAtIE(InfoExtractor):
ext_handle = self._request_webpage(
ext_req, display_id, note='Determining extension')
ext = urlhandle_detect_ext(ext_handle)
if ext in KNOWN_EXTENSIONS:
formats.append({
'format_id': 'download',
'vcodec': 'none',

View File

@ -1,10 +1,10 @@
from __future__ import unicode_literals
import re
import json
from .common import InfoExtractor
from ..utils import (
mimetype2ext,
qualities,
)
@ -12,9 +12,9 @@ from ..utils import (
class ImdbIE(InfoExtractor):
IE_NAME = 'imdb'
IE_DESC = 'Internet Movie Database trailers'
_VALID_URL = r'https?://(?:www|m)\.imdb\.com/video/imdb/vi(?P<id>\d+)'
_VALID_URL = r'https?://(?:www|m)\.imdb\.com/video/[^/]+/vi(?P<id>\d+)'
_TEST = {
_TESTS = [{
'url': 'http://www.imdb.com/video/imdb/vi2524815897',
'info_dict': {
'id': '2524815897',
@ -22,7 +22,10 @@ class ImdbIE(InfoExtractor):
'title': 'Ice Age: Continental Drift Trailer (No. 2) - IMDb',
'description': 'md5:9061c2219254e5d14e03c25c98e96a81',
}
}
}, {
'url': 'http://www.imdb.com/video/_/vi2524815897',
'only_matching': True,
}]
def _real_extract(self, url):
video_id = self._match_id(url)
@ -48,13 +51,27 @@ class ImdbIE(InfoExtractor):
json_data = self._search_regex(
r'<script[^>]+class="imdb-player-data"[^>]*?>(.*?)</script>',
format_page, 'json data', flags=re.DOTALL)
info = json.loads(json_data)
format_info = info['videoPlayerObject']['video']
f_id = format_info['ffname']
info = self._parse_json(json_data, video_id, fatal=False)
if not info:
continue
format_info = info.get('videoPlayerObject', {}).get('video', {})
if not format_info:
continue
video_info_list = format_info.get('videoInfoList')
if not video_info_list or not isinstance(video_info_list, list):
continue
video_info = video_info_list[0]
if not video_info or not isinstance(video_info, dict):
continue
video_url = video_info.get('videoUrl')
if not video_url:
continue
format_id = format_info.get('ffname')
formats.append({
'format_id': f_id,
'url': format_info['videoInfoList'][0]['videoUrl'],
'quality': quality(f_id),
'format_id': format_id,
'url': video_url,
'ext': mimetype2ext(video_info.get('videoMimeType')),
'quality': quality(format_id),
})
self._sort_formats(formats)

View File

@ -505,7 +505,10 @@ class IqiyiIE(InfoExtractor):
'enc': md5_text(enc_key + tail),
'qyid': _uuid,
'tn': random.random(),
'um': 0,
# In iQiyi's flash player, um is set to 1 if there's a logged user
# Some 1080P formats are only available with a logged user.
# Here force um=1 to trick the iQiyi server
'um': 1,
'authkey': md5_text(md5_text('') + tail),
'k_tag': 1,
}

View File

@ -11,7 +11,7 @@ class MGTVIE(InfoExtractor):
_TEST = {
'url': 'http://www.mgtv.com/v/1/290525/f/3116640.html',
'md5': '',
'md5': '1bdadcf760a0b90946ca68ee9a2db41a',
'info_dict': {
'id': '3116640',
'ext': 'mp4',
@ -20,15 +20,6 @@ class MGTVIE(InfoExtractor):
'duration': 7461,
'thumbnail': 're:^https?://.*\.jpg$',
},
'params': {
'skip_download': True, # m3u8 download
},
}
_FORMAT_MAP = {
'标清': ('Standard', 0),
'高清': ('High', 1),
'超清': ('SuperHigh', 2),
}
def _real_extract(self, url):
@ -40,17 +31,27 @@ class MGTVIE(InfoExtractor):
formats = []
for idx, stream in enumerate(api_data['stream']):
format_name = stream.get('name')
format_id, preference = self._FORMAT_MAP.get(format_name, (None, None))
stream_url = stream.get('url')
if not stream_url:
continue
tbr = int_or_none(self._search_regex(
r'(\d+)\.mp4', stream_url, 'tbr', default=None))
def extract_format(stream_url, format_id, idx, query={}):
format_info = self._download_json(
stream['url'], video_id,
note='Download video info for format %s' % format_id or '#%d' % idx)
formats.append({
stream_url, video_id,
note='Download video info for format %s' % format_id or '#%d' % idx, query=query)
return {
'format_id': format_id,
'url': format_info['info'],
'ext': 'mp4', # These are m3u8 playlists
'preference': preference,
})
'ext': 'mp4',
'tbr': tbr,
}
formats.append(extract_format(
stream_url, 'hls-%d' % tbr if tbr else None, idx * 2))
formats.append(extract_format(stream_url.replace(
'/playlist.m3u8', ''), 'http-%d' % tbr if tbr else None, idx * 2 + 1, {'pno': 1031}))
self._sort_formats(formats)
return {

View File

@ -4,35 +4,138 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..compat import (
compat_urlparse,
compat_urllib_parse_unquote,
)
from ..compat import compat_urllib_parse_unquote
from ..utils import (
determine_ext,
ExtractorError,
float_or_none,
int_or_none,
parse_age_limit,
parse_duration,
unified_strdate,
)
class NRKIE(InfoExtractor):
_VALID_URL = r'(?:nrk:|https?://(?:www\.)?nrk\.no/video/PS\*)(?P<id>\d+)'
class NRKBaseIE(InfoExtractor):
def _extract_formats(self, manifest_url, video_id, fatal=True):
formats = []
formats.extend(self._extract_f4m_formats(
manifest_url + '?hdcore=3.5.0&plugin=aasp-3.5.0.151.81',
video_id, f4m_id='hds', fatal=fatal))
formats.extend(self._extract_m3u8_formats(manifest_url.replace(
'akamaihd.net/z/', 'akamaihd.net/i/').replace('/manifest.f4m', '/master.m3u8'),
video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=fatal))
return formats
_TESTS = [
{
def _real_extract(self, url):
video_id = self._match_id(url)
data = self._download_json(
'http://%s/mediaelement/%s' % (self._API_HOST, video_id),
video_id, 'Downloading mediaelement JSON')
title = data.get('fullTitle') or data.get('mainTitle') or data['title']
video_id = data.get('id') or video_id
entries = []
media_assets = data.get('mediaAssets')
if media_assets and isinstance(media_assets, list):
def video_id_and_title(idx):
return ((video_id, title) if len(media_assets) == 1
else ('%s-%d' % (video_id, idx), '%s (Part %d)' % (title, idx)))
for num, asset in enumerate(media_assets, 1):
asset_url = asset.get('url')
if not asset_url:
continue
formats = self._extract_formats(asset_url, video_id, fatal=False)
if not formats:
continue
self._sort_formats(formats)
entry_id, entry_title = video_id_and_title(num)
duration = parse_duration(asset.get('duration'))
subtitles = {}
for subtitle in ('webVtt', 'timedText'):
subtitle_url = asset.get('%sSubtitlesUrl' % subtitle)
if subtitle_url:
subtitles.setdefault('no', []).append({'url': subtitle_url})
entries.append({
'id': asset.get('carrierId') or entry_id,
'title': entry_title,
'duration': duration,
'subtitles': subtitles,
'formats': formats,
})
if not entries:
media_url = data.get('mediaUrl')
if media_url:
formats = self._extract_formats(media_url, video_id)
self._sort_formats(formats)
duration = parse_duration(data.get('duration'))
entries = [{
'id': video_id,
'title': title,
'duration': duration,
'formats': formats,
}]
if not entries:
if data.get('usageRights', {}).get('isGeoBlocked'):
raise ExtractorError(
'NRK har ikke rettigheter til å vise dette programmet utenfor Norge',
expected=True)
conviva = data.get('convivaStatistics') or {}
series = conviva.get('seriesName') or data.get('seriesTitle')
episode = conviva.get('episodeName') or data.get('episodeNumberOrDate')
thumbnails = None
images = data.get('images')
if images and isinstance(images, dict):
web_images = images.get('webImages')
if isinstance(web_images, list):
thumbnails = [{
'url': image['imageUrl'],
'width': int_or_none(image.get('width')),
'height': int_or_none(image.get('height')),
} for image in web_images if image.get('imageUrl')]
description = data.get('description')
common_info = {
'description': description,
'series': series,
'episode': episode,
'age_limit': parse_age_limit(data.get('legalAge')),
'thumbnails': thumbnails,
}
vcodec = 'none' if data.get('mediaType') == 'Audio' else None
# TODO: extract chapters when https://github.com/rg3/youtube-dl/pull/9409 is merged
for entry in entries:
entry.update(common_info)
for f in entry['formats']:
f['vcodec'] = vcodec
return self.playlist_result(entries, video_id, title, description)
class NRKIE(NRKBaseIE):
_VALID_URL = r'(?:nrk:|https?://(?:www\.)?nrk\.no/video/PS\*)(?P<id>\d+)'
_API_HOST = 'v8.psapi.nrk.no'
_TESTS = [{
# video
'url': 'http://www.nrk.no/video/PS*150533',
# MD5 is unstable
'md5': '2f7f6eeb2aacdd99885f355428715cfa',
'info_dict': {
'id': '150533',
'ext': 'flv',
'ext': 'mp4',
'title': 'Dompap og andre fugler i Piip-Show',
'description': 'md5:d9261ba34c43b61c812cb6b0269a5c8f',
'duration': 263,
}
},
{
}, {
# audio
'url': 'http://www.nrk.no/video/PS*154915',
# MD5 is unstable
'info_dict': {
@ -42,52 +145,75 @@ class NRKIE(InfoExtractor):
'description': 'md5:a621f5cc1bd75c8d5104cb048c6b8568',
'duration': 20,
}
},
]
def _real_extract(self, url):
video_id = self._match_id(url)
data = self._download_json(
'http://v8.psapi.nrk.no/mediaelement/%s' % video_id,
video_id, 'Downloading media JSON')
media_url = data.get('mediaUrl')
if not media_url:
if data['usageRights']['isGeoBlocked']:
raise ExtractorError(
'NRK har ikke rettigheter til å vise dette programmet utenfor Norge',
expected=True)
if determine_ext(media_url) == 'f4m':
formats = self._extract_f4m_formats(
media_url + '?hdcore=3.5.0&plugin=aasp-3.5.0.151.81', video_id, f4m_id='hds')
self._sort_formats(formats)
else:
formats = [{
'url': media_url,
'ext': 'flv',
}]
duration = parse_duration(data.get('duration'))
images = data.get('images')
if images:
thumbnails = images['webImages']
thumbnails.sort(key=lambda image: image['pixelWidth'])
thumbnail = thumbnails[-1]['imageUrl']
else:
thumbnail = None
class NRKTVIE(NRKBaseIE):
IE_DESC = 'NRK TV and NRK Radio'
_VALID_URL = r'https?://(?:tv|radio)\.nrk(?:super)?\.no/(?:serie/[^/]+|program)/(?P<id>[a-zA-Z]{4}\d{8})(?:/\d{2}-\d{2}-\d{4})?(?:#del=(?P<part_id>\d+))?'
_API_HOST = 'psapi-we.nrk.no'
return {
'id': video_id,
'title': data['title'],
'description': data['description'],
'duration': duration,
'thumbnail': thumbnail,
'formats': formats,
}
_TESTS = [{
'url': 'https://tv.nrk.no/serie/20-spoersmaal-tv/MUHH48000314/23-05-2014',
'md5': '4e9ca6629f09e588ed240fb11619922a',
'info_dict': {
'id': 'MUHH48000314AA',
'ext': 'mp4',
'title': '20 spørsmål 23.05.2014',
'description': 'md5:bdea103bc35494c143c6a9acdd84887a',
'duration': 1741.52,
},
}, {
'url': 'https://tv.nrk.no/program/mdfp15000514',
'md5': '43d0be26663d380603a9cf0c24366531',
'info_dict': {
'id': 'MDFP15000514CA',
'ext': 'mp4',
'title': 'Grunnlovsjubiléet - Stor ståhei for ingenting 24.05.2014',
'description': 'md5:89290c5ccde1b3a24bb8050ab67fe1db',
'duration': 4605.08,
},
}, {
# single playlist video
'url': 'https://tv.nrk.no/serie/tour-de-ski/MSPO40010515/06-01-2015#del=2',
'md5': 'adbd1dbd813edaf532b0a253780719c2',
'info_dict': {
'id': 'MSPO40010515-part2',
'ext': 'flv',
'title': 'Tour de Ski: Sprint fri teknikk, kvinner og menn 06.01.2015 (del 2:2)',
'description': 'md5:238b67b97a4ac7d7b4bf0edf8cc57d26',
},
'skip': 'Only works from Norway',
}, {
'url': 'https://tv.nrk.no/serie/tour-de-ski/MSPO40010515/06-01-2015',
'playlist': [{
'md5': '9480285eff92d64f06e02a5367970a7a',
'info_dict': {
'id': 'MSPO40010515-part1',
'ext': 'flv',
'title': 'Tour de Ski: Sprint fri teknikk, kvinner og menn 06.01.2015 (del 1:2)',
'description': 'md5:238b67b97a4ac7d7b4bf0edf8cc57d26',
},
}, {
'md5': 'adbd1dbd813edaf532b0a253780719c2',
'info_dict': {
'id': 'MSPO40010515-part2',
'ext': 'flv',
'title': 'Tour de Ski: Sprint fri teknikk, kvinner og menn 06.01.2015 (del 2:2)',
'description': 'md5:238b67b97a4ac7d7b4bf0edf8cc57d26',
},
}],
'info_dict': {
'id': 'MSPO40010515',
'title': 'Tour de Ski: Sprint fri teknikk, kvinner og menn',
'description': 'md5:238b67b97a4ac7d7b4bf0edf8cc57d26',
'duration': 6947.52,
},
'skip': 'Only works from Norway',
}, {
'url': 'https://radio.nrk.no/serie/dagsnytt/NPUB21019315/12-07-2015#',
'only_matching': True,
}]
class NRKPlaylistIE(InfoExtractor):
@ -159,179 +285,3 @@ class NRKSkoleIE(InfoExtractor):
nrk_id = self._search_regex(r'data-nrk-id=["\'](\d+)', webpage, 'nrk id')
return self.url_result('nrk:%s' % nrk_id)
class NRKTVIE(InfoExtractor):
IE_DESC = 'NRK TV and NRK Radio'
_VALID_URL = r'(?P<baseurl>https?://(?:tv|radio)\.nrk(?:super)?\.no/)(?:serie/[^/]+|program)/(?P<id>[a-zA-Z]{4}\d{8})(?:/\d{2}-\d{2}-\d{4})?(?:#del=(?P<part_id>\d+))?'
_TESTS = [
{
'url': 'https://tv.nrk.no/serie/20-spoersmaal-tv/MUHH48000314/23-05-2014',
'info_dict': {
'id': 'MUHH48000314',
'ext': 'mp4',
'title': '20 spørsmål',
'description': 'md5:bdea103bc35494c143c6a9acdd84887a',
'upload_date': '20140523',
'duration': 1741.52,
},
'params': {
# m3u8 download
'skip_download': True,
},
},
{
'url': 'https://tv.nrk.no/program/mdfp15000514',
'info_dict': {
'id': 'mdfp15000514',
'ext': 'mp4',
'title': 'Grunnlovsjubiléet - Stor ståhei for ingenting',
'description': 'md5:654c12511f035aed1e42bdf5db3b206a',
'upload_date': '20140524',
'duration': 4605.08,
},
'params': {
# m3u8 download
'skip_download': True,
},
},
{
# single playlist video
'url': 'https://tv.nrk.no/serie/tour-de-ski/MSPO40010515/06-01-2015#del=2',
'md5': 'adbd1dbd813edaf532b0a253780719c2',
'info_dict': {
'id': 'MSPO40010515-part2',
'ext': 'flv',
'title': 'Tour de Ski: Sprint fri teknikk, kvinner og menn 06.01.2015 (del 2:2)',
'description': 'md5:238b67b97a4ac7d7b4bf0edf8cc57d26',
'upload_date': '20150106',
},
'skip': 'Only works from Norway',
},
{
'url': 'https://tv.nrk.no/serie/tour-de-ski/MSPO40010515/06-01-2015',
'playlist': [
{
'md5': '9480285eff92d64f06e02a5367970a7a',
'info_dict': {
'id': 'MSPO40010515-part1',
'ext': 'flv',
'title': 'Tour de Ski: Sprint fri teknikk, kvinner og menn 06.01.2015 (del 1:2)',
'description': 'md5:238b67b97a4ac7d7b4bf0edf8cc57d26',
'upload_date': '20150106',
},
},
{
'md5': 'adbd1dbd813edaf532b0a253780719c2',
'info_dict': {
'id': 'MSPO40010515-part2',
'ext': 'flv',
'title': 'Tour de Ski: Sprint fri teknikk, kvinner og menn 06.01.2015 (del 2:2)',
'description': 'md5:238b67b97a4ac7d7b4bf0edf8cc57d26',
'upload_date': '20150106',
},
},
],
'info_dict': {
'id': 'MSPO40010515',
'title': 'Tour de Ski: Sprint fri teknikk, kvinner og menn',
'description': 'md5:238b67b97a4ac7d7b4bf0edf8cc57d26',
'upload_date': '20150106',
'duration': 6947.5199999999995,
},
'skip': 'Only works from Norway',
},
{
'url': 'https://radio.nrk.no/serie/dagsnytt/NPUB21019315/12-07-2015#',
'only_matching': True,
}
]
def _extract_f4m(self, manifest_url, video_id):
return self._extract_f4m_formats(
manifest_url + '?hdcore=3.1.1&plugin=aasp-3.1.1.69.124', video_id, f4m_id='hds')
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
part_id = mobj.group('part_id')
base_url = mobj.group('baseurl')
webpage = self._download_webpage(url, video_id)
title = self._html_search_meta(
'title', webpage, 'title')
description = self._html_search_meta(
'description', webpage, 'description')
thumbnail = self._html_search_regex(
r'data-posterimage="([^"]+)"',
webpage, 'thumbnail', fatal=False)
upload_date = unified_strdate(self._html_search_meta(
'rightsfrom', webpage, 'upload date', fatal=False))
duration = float_or_none(self._html_search_regex(
r'data-duration="([^"]+)"',
webpage, 'duration', fatal=False))
# playlist
parts = re.findall(
r'<a href="#del=(\d+)"[^>]+data-argument="([^"]+)">([^<]+)</a>', webpage)
if parts:
entries = []
for current_part_id, stream_url, part_title in parts:
if part_id and current_part_id != part_id:
continue
video_part_id = '%s-part%s' % (video_id, current_part_id)
formats = self._extract_f4m(stream_url, video_part_id)
entries.append({
'id': video_part_id,
'title': part_title,
'description': description,
'thumbnail': thumbnail,
'upload_date': upload_date,
'formats': formats,
})
if part_id:
if entries:
return entries[0]
else:
playlist = self.playlist_result(entries, video_id, title, description)
playlist.update({
'thumbnail': thumbnail,
'upload_date': upload_date,
'duration': duration,
})
return playlist
formats = []
f4m_url = re.search(r'data-media="([^"]+)"', webpage)
if f4m_url:
formats.extend(self._extract_f4m(f4m_url.group(1), video_id))
m3u8_url = re.search(r'data-hls-media="([^"]+)"', webpage)
if m3u8_url:
formats.extend(self._extract_m3u8_formats(m3u8_url.group(1), video_id, 'mp4', m3u8_id='hls'))
self._sort_formats(formats)
subtitles_url = self._html_search_regex(
r'data-subtitlesurl\s*=\s*(["\'])(?P<url>.+?)\1',
webpage, 'subtitle URL', default=None, group='url')
subtitles = {}
if subtitles_url:
subtitles['no'] = [{
'ext': 'ttml',
'url': compat_urlparse.urljoin(base_url, subtitles_url),
}]
return {
'id': video_id,
'title': title,
'description': description,
'thumbnail': thumbnail,
'upload_date': upload_date,
'duration': duration,
'formats': formats,
'subtitles': subtitles,
}

View File

@ -100,7 +100,7 @@ class OpenloadIE(InfoExtractor):
raise ExtractorError('File not found', expected=True)
code = self._search_regex(
r'<video[^>]+>\s*<script[^>]+>([^<]+)</script>',
r'</video>\s*</div>\s*<script[^>]+>([^<]+)</script>',
webpage, 'JS code')
decoded = self.openload_decode(code)

View File

@ -12,8 +12,8 @@ from ..utils import (
class OraTVIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?ora\.tv/([^/]+/)*(?P<id>[^/\?#]+)'
_TEST = {
_VALID_URL = r'https?://(?:www\.)?(?:ora\.tv|unsafespeech\.com)/([^/]+/)*(?P<id>[^/\?#]+)'
_TESTS = [{
'url': 'https://www.ora.tv/larrykingnow/2015/12/16/vine-youtube-stars-zach-king-king-bach-on-their-viral-videos-0_36jupg6090pq',
'md5': 'fa33717591c631ec93b04b0e330df786',
'info_dict': {
@ -22,7 +22,10 @@ class OraTVIE(InfoExtractor):
'title': 'Vine & YouTube Stars Zach King & King Bach On Their Viral Videos!',
'description': 'md5:ebbc5b1424dd5dba7be7538148287ac1',
}
}
}, {
'url': 'http://www.unsafespeech.com/video/2016/5/10/student-self-censorship-and-the-thought-police-on-university-campuses-0_6622bnkppw4d',
'only_matching': True,
}]
def _real_extract(self, url):
display_id = self._match_id(url)

View File

@ -4,28 +4,35 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..compat import compat_urllib_parse_urlencode
from ..utils import sanitized_Request
from ..utils import (
HEADRequest,
ExtractorError,
int_or_none,
update_url_query,
qualities,
get_element_by_attribute,
clean_html,
)
class SinaIE(InfoExtractor):
_VALID_URL = r'''(?x)https?://(.*?\.)?video\.sina\.com\.cn/
(
(.+?/(((?P<pseudo_id>\d+).html)|(.*?(\#|(vid=)|b/)(?P<id>\d+?)($|&|\-))))
|
_VALID_URL = r'''(?x)https?://(?:.*?\.)?video\.sina\.com\.cn/
(?:
(?:view/|.*\#)(?P<video_id>\d+)|
.+?/(?P<pseudo_id>[^/?#]+)(?:\.s?html)|
# This is used by external sites like Weibo
(api/sinawebApi/outplay.php/(?P<token>.+?)\.swf)
api/sinawebApi/outplay.php/(?P<token>.+?)\.swf
)
'''
_TESTS = [
{
'url': 'http://video.sina.com.cn/news/vlist/zt/chczlj2013/?opsubject_id=top12#110028898',
'md5': 'd65dd22ddcf44e38ce2bf58a10c3e71f',
'url': 'http://video.sina.com.cn/news/spj/topvideoes20160504/?opsubject_id=top1#250576622',
'md5': 'd38433e2fc886007729735650ae4b3e9',
'info_dict': {
'id': '110028898',
'ext': 'flv',
'title': '《中国新闻》 朝鲜要求巴拿马立即释放被扣船员',
'id': '250576622',
'ext': 'mp4',
'title': '现场:克鲁兹宣布退选 特朗普将稳获提名',
}
},
{
@ -35,37 +42,74 @@ class SinaIE(InfoExtractor):
'ext': 'flv',
'title': '军方提高对朝情报监视级别',
},
'skip': 'the page does not exist or has been deleted',
},
{
'url': 'http://video.sina.com.cn/view/250587748.html',
'md5': '3d1807a25c775092aab3bc157fff49b4',
'info_dict': {
'id': '250587748',
'ext': 'mp4',
'title': '瞬间泪目8年前汶川地震珍贵视频首曝光',
},
},
]
def _extract_video(self, video_id):
data = compat_urllib_parse_urlencode({'vid': video_id})
url_doc = self._download_xml('http://v.iask.com/v_play.php?%s' % data,
video_id, 'Downloading video url')
image_page = self._download_webpage(
'http://interface.video.sina.com.cn/interface/common/getVideoImage.php?%s' % data,
video_id, 'Downloading thumbnail info')
return {'id': video_id,
'url': url_doc.find('./durl/url').text,
'ext': 'flv',
'title': url_doc.find('./vname').text,
'thumbnail': image_page.split('=')[1],
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
video_id = mobj.group('video_id')
if not video_id:
if mobj.group('token') is not None:
# The video id is in the redirected url
self.to_screen('Getting video id')
request = sanitized_Request(url)
request.get_method = lambda: 'HEAD'
request = HEADRequest(url)
(_, urlh) = self._download_webpage_handle(request, 'NA', False)
return self._real_extract(urlh.geturl())
elif video_id is None:
else:
pseudo_id = mobj.group('pseudo_id')
webpage = self._download_webpage(url, pseudo_id)
video_id = self._search_regex(r'vid:\'(\d+?)\'', webpage, 'video id')
error = get_element_by_attribute('class', 'errtitle', webpage)
if error:
raise ExtractorError('%s said: %s' % (
self.IE_NAME, clean_html(error)), expected=True)
video_id = self._search_regex(
r"video_id\s*:\s*'(\d+)'", webpage, 'video id')
return self._extract_video(video_id)
video_data = self._download_json(
'http://s.video.sina.com.cn/video/h5play',
video_id, query={'video_id': video_id})
if video_data['code'] != 1:
raise ExtractorError('%s said: %s' % (
self.IE_NAME, video_data['message']), expected=True)
else:
video_data = video_data['data']
title = video_data['title']
description = video_data.get('description')
if description:
description = description.strip()
preference = qualities(['cif', 'sd', 'hd', 'fhd', 'ffd'])
formats = []
for quality_id, quality in video_data.get('videos', {}).get('mp4', {}).items():
file_api = quality.get('file_api')
file_id = quality.get('file_id')
if not file_api or not file_id:
continue
formats.append({
'format_id': quality_id,
'url': update_url_query(file_api, {'vid': file_id}),
'preference': preference(quality_id),
'ext': 'mp4',
})
self._sort_formats(formats)
return {
'id': video_id,
'title': title,
'description': description,
'thumbnail': video_data.get('image'),
'duration': int_or_none(video_data.get('length')),
'timestamp': int_or_none(video_data.get('create_time')),
'formats': formats,
}

View File

@ -88,7 +88,7 @@ class TeamcocoIE(InfoExtractor):
preload_codes = self._html_search_regex(
r'(function.+)setTimeout\(function\(\)\{playlist',
webpage, 'preload codes')
base64_fragments = re.findall(r'"([a-zA-z0-9+/=]+)"', preload_codes)
base64_fragments = re.findall(r'"([a-zA-Z0-9+/=]+)"', preload_codes)
base64_fragments.remove('init')
def _check_sequence(cur_fragments):

View File

@ -0,0 +1,139 @@
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
determine_ext,
js_to_json,
mimetype2ext,
)
class ThreeQSDNIE(InfoExtractor):
IE_NAME = '3qsdn'
IE_DESC = '3Q SDN'
_VALID_URL = r'https?://playout\.3qsdn\.com/(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})'
_TESTS = [{
# ondemand from http://www.philharmonie.tv/veranstaltung/26/
'url': 'http://playout.3qsdn.com/0280d6b9-1215-11e6-b427-0cc47a188158?protocol=http',
'md5': 'ab040e37bcfa2e0c079f92cb1dd7f6cd',
'info_dict': {
'id': '0280d6b9-1215-11e6-b427-0cc47a188158',
'ext': 'mp4',
'title': '0280d6b9-1215-11e6-b427-0cc47a188158',
'is_live': False,
},
'expected_warnings': ['Failed to download MPD manifest'],
}, {
# live video stream
'url': 'https://playout.3qsdn.com/d755d94b-4ab9-11e3-9162-0025907ad44f?js=true',
'info_dict': {
'id': 'd755d94b-4ab9-11e3-9162-0025907ad44f',
'ext': 'mp4',
'title': 'd755d94b-4ab9-11e3-9162-0025907ad44f',
'is_live': False,
},
}, {
# live audio stream
'url': 'http://playout.3qsdn.com/9edf36e0-6bf2-11e2-a16a-9acf09e2db48',
'only_matching': True,
}, {
# live audio stream with some 404 URLs
'url': 'http://playout.3qsdn.com/ac5c3186-777a-11e2-9c30-9acf09e2db48',
'only_matching': True,
}, {
# geo restricted with 'This content is not available in your country'
'url': 'http://playout.3qsdn.com/d63a3ffe-75e8-11e2-9c30-9acf09e2db48',
'only_matching': True,
}, {
# geo restricted with 'playout.3qsdn.com/forbidden'
'url': 'http://playout.3qsdn.com/8e330f26-6ae2-11e2-a16a-9acf09e2db48',
'only_matching': True,
}, {
# live video with rtmp link
'url': 'https://playout.3qsdn.com/6092bb9e-8f72-11e4-a173-002590c750be',
'only_matching': True,
}]
@staticmethod
def _extract_url(webpage):
mobj = re.search(
r'<iframe[^>]+\b(?:data-)?src=(["\'])(?P<url>%s.*?)\1' % ThreeQSDNIE._VALID_URL, webpage)
if mobj:
return mobj.group('url')
def _real_extract(self, url):
video_id = self._match_id(url)
js = self._download_webpage(
'http://playout.3qsdn.com/%s' % video_id, video_id,
query={'js': 'true'})
if any(p in js for p in (
'>This content is not available in your country',
'playout.3qsdn.com/forbidden')):
self.raise_geo_restricted()
stream_content = self._search_regex(
r'streamContent\s*:\s*(["\'])(?P<content>.+?)\1', js,
'stream content', default='demand', group='content')
live = stream_content == 'live'
stream_type = self._search_regex(
r'streamType\s*:\s*(["\'])(?P<type>audio|video)\1', js,
'stream type', default='video', group='type')
formats = []
urls = set()
def extract_formats(item_url, item={}):
if not item_url or item_url in urls:
return
urls.add(item_url)
type_ = item.get('type')
ext = determine_ext(item_url, default_ext=None)
if type_ == 'application/dash+xml' or ext == 'mpd':
formats.extend(self._extract_mpd_formats(
item_url, video_id, mpd_id='mpd', fatal=False))
elif type_ in ('application/vnd.apple.mpegURL', 'application/x-mpegurl') or ext == 'm3u8':
formats.extend(self._extract_m3u8_formats(
item_url, video_id, 'mp4',
entry_protocol='m3u8' if live else 'm3u8_native',
m3u8_id='hls', fatal=False))
elif ext == 'f4m':
formats.extend(self._extract_f4m_formats(
item_url, video_id, f4m_id='hds', fatal=False))
else:
if not self._is_valid_url(item_url, video_id):
return
formats.append({
'url': item_url,
'format_id': item.get('quality'),
'ext': 'mp4' if item_url.startswith('rtsp') else mimetype2ext(type_) or ext,
'vcodec': 'none' if stream_type == 'audio' else None,
})
for item_js in re.findall(r'({.*?\b(?:src|source)\s*:\s*["\'].+?})', js):
f = self._parse_json(
item_js, video_id, transform_source=js_to_json, fatal=False)
if not f:
continue
extract_formats(f.get('src'), f)
# More relaxed version to collect additional URLs and acting
# as a future-proof fallback
for _, src in re.findall(r'\b(?:src|source)\s*:\s*(["\'])((?:https?|rtsp)://.+?)\1', js):
extract_formats(src)
self._sort_formats(formats)
title = self._live_title(video_id) if live else video_id
return {
'id': video_id,
'title': title,
'is_live': live,
'formats': formats,
}

View File

@ -171,6 +171,7 @@ class TwitchVideoIE(TwitchItemBaseIE):
'title': 'Worlds Semifinals - Star Horn Royal Club vs. OMG',
},
'playlist_mincount': 12,
'skip': 'HTTP Error 404: Not Found',
}
@ -187,6 +188,7 @@ class TwitchChapterIE(TwitchItemBaseIE):
'title': 'ACRL Off Season - Sports Cars @ Nordschleife',
},
'playlist_mincount': 3,
'skip': 'HTTP Error 404: Not Found',
}, {
'url': 'http://www.twitch.tv/tsm_theoddone/c/2349361',
'only_matching': True,
@ -355,31 +357,6 @@ class TwitchPastBroadcastsIE(TwitchPlaylistBaseIE):
}
class TwitchBookmarksIE(TwitchPlaylistBaseIE):
IE_NAME = 'twitch:bookmarks'
_VALID_URL = r'%s/(?P<id>[^/]+)/profile/bookmarks/?(?:\#.*)?$' % TwitchBaseIE._VALID_URL_BASE
_PLAYLIST_URL = '%s/api/bookmark/?user=%%s&offset=%%d&limit=%%d' % TwitchBaseIE._API_BASE
_PLAYLIST_TYPE = 'bookmarks'
_TEST = {
'url': 'http://www.twitch.tv/ognos/profile/bookmarks',
'info_dict': {
'id': 'ognos',
'title': 'Ognos',
},
'playlist_mincount': 3,
}
def _extract_playlist_page(self, response):
entries = []
for bookmark in response.get('bookmarks', []):
video = bookmark.get('video')
if not video:
continue
entries.append(video['url'])
return entries
class TwitchStreamIE(TwitchBaseIE):
IE_NAME = 'twitch:stream'
_VALID_URL = r'%s/(?P<id>[^/#?]+)/?(?:\#.*)?$' % TwitchBaseIE._VALID_URL_BASE

View File

@ -6,10 +6,12 @@ from .common import InfoExtractor
from ..utils import (
int_or_none,
unified_strdate,
unescapeHTML,
)
class UstudioIE(InfoExtractor):
IE_NAME = 'ustudio'
_VALID_URL = r'https?://(?:(?:www|v1)\.)?ustudio\.com/video/(?P<id>[^/]+)/(?P<display_id>[^/?#&]+)'
_TEST = {
'url': 'http://ustudio.com/video/Uxu2my9bgSph/san_francisco_golden_gate_bridge',
@ -27,9 +29,7 @@ class UstudioIE(InfoExtractor):
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
display_id = mobj.group('display_id')
video_id, display_id = re.match(self._VALID_URL, url).groups()
config = self._download_xml(
'http://v1.ustudio.com/embed/%s/ustudio/config.xml' % video_id,
@ -37,7 +37,7 @@ class UstudioIE(InfoExtractor):
def extract(kind):
return [{
'url': item.attrib['url'],
'url': unescapeHTML(item.attrib['url']),
'width': int_or_none(item.get('width')),
'height': int_or_none(item.get('height')),
} for item in config.findall('./qualities/quality/%s' % kind) if item.get('url')]
@ -65,3 +65,61 @@ class UstudioIE(InfoExtractor):
'uploader': uploader,
'formats': formats,
}
class UstudioEmbedIE(InfoExtractor):
IE_NAME = 'ustudio:embed'
_VALID_URL = r'https?://(?:(?:app|embed)\.)?ustudio\.com/embed/(?P<uid>[^/]+)/(?P<id>[^/]+)'
_TEST = {
'url': 'http://app.ustudio.com/embed/DeN7VdYRDKhP/Uw7G1kMCe65T',
'md5': '47c0be52a09b23a7f40de9469cec58f4',
'info_dict': {
'id': 'Uw7G1kMCe65T',
'ext': 'mp4',
'title': '5 Things IT Should Know About Video',
'description': 'md5:93d32650884b500115e158c5677d25ad',
'uploader_id': 'DeN7VdYRDKhP',
}
}
def _real_extract(self, url):
uploader_id, video_id = re.match(self._VALID_URL, url).groups()
video_data = self._download_json(
'http://app.ustudio.com/embed/%s/%s/config.json' % (uploader_id, video_id),
video_id)['videos'][0]
title = video_data['name']
formats = []
for ext, qualities in video_data.get('transcodes', {}).items():
for quality in qualities:
quality_url = quality.get('url')
if not quality_url:
continue
height = int_or_none(quality.get('height'))
formats.append({
'format_id': '%s-%dp' % (ext, height) if height else ext,
'url': quality_url,
'width': int_or_none(quality.get('width')),
'height': height,
})
self._sort_formats(formats)
thumbnails = []
for image in video_data.get('images', []):
image_url = image.get('url')
if not image_url:
continue
thumbnails.append({
'url': image_url,
})
return {
'id': video_id,
'title': title,
'description': video_data.get('description'),
'duration': int_or_none(video_data.get('duration')),
'uploader_id': uploader_id,
'tags': video_data.get('keywords'),
'thumbnails': thumbnails,
'formats': formats,
}

View File

@ -213,19 +213,17 @@ class VevoIE(VevoBaseIE):
formats = []
if not video_info:
if response and response.get('statusCode') != 909:
try:
self._initialize_api(video_id)
except ExtractorError:
ytid = response.get('errorInfo', {}).get('ytid')
if ytid:
self.report_warning(
'Video is geoblocked, trying with the YouTube video %s' % ytid)
return self.url_result(ytid, 'Youtube', ytid)
if 'statusMessage' in response:
raise ExtractorError('%s said: %s' % (
self.IE_NAME, response['statusMessage']), expected=True)
raise ExtractorError('Unable to extract videos')
raise
self._initialize_api(video_id)
video_info = self._call_api(
'video/%s' % video_id, video_id, 'Downloading api video info',
'Failed to download video info')

View File

@ -8,7 +8,6 @@ from ..utils import (
clean_html,
ExtractorError,
determine_ext,
sanitized_Request,
)
@ -25,8 +24,6 @@ class XVideosIE(InfoExtractor):
}
}
_ANDROID_USER_AGENT = 'Mozilla/5.0 (Linux; Android 4.0.4; Galaxy Nexus Build/IMM76B) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/18.0.1025.133 Mobile Safari/535.19'
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
@ -35,31 +32,34 @@ class XVideosIE(InfoExtractor):
if mobj:
raise ExtractorError('%s said: %s' % (self.IE_NAME, clean_html(mobj.group(1))), expected=True)
video_url = compat_urllib_parse_unquote(
self._search_regex(r'flv_url=(.+?)&', webpage, 'video URL'))
video_title = self._html_search_regex(
r'<title>(.*?)\s+-\s+XVID', webpage, 'title')
video_thumbnail = self._search_regex(
r'url_bigthumb=(.+?)&amp', webpage, 'thumbnail', fatal=False)
formats = [{
'url': video_url,
}]
formats = []
android_req = sanitized_Request(url)
android_req.add_header('User-Agent', self._ANDROID_USER_AGENT)
android_webpage = self._download_webpage(android_req, video_id, fatal=False)
video_url = compat_urllib_parse_unquote(self._search_regex(
r'flv_url=(.+?)&', webpage, 'video URL', default=''))
if video_url:
formats.append({'url': video_url})
if android_webpage is not None:
player_params_str = self._search_regex(
'mobileReplacePlayerDivTwoQual\(([^)]+)\)',
android_webpage, 'player parameters', default='')
player_params = list(map(lambda s: s.strip(' \''), player_params_str.split(',')))
if player_params:
formats.extend([{
'url': param,
'preference': -10,
} for param in player_params if determine_ext(param) == 'mp4'])
player_args = self._search_regex(
r'(?s)new\s+HTML5Player\((.+?)\)', webpage, ' html5 player', default=None)
if player_args:
for arg in player_args.split(','):
format_url = self._search_regex(
r'(["\'])(?P<url>https?://.+?)\1', arg, 'url',
default=None, group='url')
if not format_url:
continue
ext = determine_ext(format_url)
if ext == 'mp4':
formats.append({'url': format_url})
elif ext == 'm3u8':
formats.extend(self._extract_m3u8_formats(
format_url, video_id, 'mp4',
entry_protocol='m3u8_native', m3u8_id='hls', fatal=False))
self._sort_formats(formats)
@ -67,7 +67,6 @@ class XVideosIE(InfoExtractor):
'id': video_id,
'formats': formats,
'title': video_title,
'ext': 'flv',
'thumbnail': video_thumbnail,
'age_limit': 18,
}

View File

@ -3,7 +3,7 @@ from __future__ import unicode_literals
import subprocess
from .common import PostProcessor
from ..compat import shlex_quote
from ..compat import compat_shlex_quote
from ..utils import PostProcessingError
@ -17,7 +17,7 @@ class ExecAfterDownloadPP(PostProcessor):
if '{}' not in cmd:
cmd += ' {}'
cmd = cmd.replace('{}', shlex_quote(information['filepath']))
cmd = cmd.replace('{}', compat_shlex_quote(information['filepath']))
self._downloader.to_screen('[exec] Executing command: %s' % cmd)
retCode = subprocess.call(cmd, shell=True)

View File

@ -42,6 +42,7 @@ from .compat import (
compat_http_client,
compat_kwargs,
compat_parse_qs,
compat_shlex_quote,
compat_socket_create_connection,
compat_str,
compat_struct_pack,
@ -49,10 +50,10 @@ from .compat import (
compat_urllib_parse,
compat_urllib_parse_urlencode,
compat_urllib_parse_urlparse,
compat_urllib_parse_unquote_plus,
compat_urllib_request,
compat_urlparse,
compat_xpath,
shlex_quote,
)
from .socks import (
@ -104,9 +105,9 @@ KNOWN_EXTENSIONS = (
'f4f', 'f4m', 'm3u8', 'smil')
# needed for sanitizing filenames in restricted mode
ACCENT_CHARS = dict(zip('ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõöøùúûüýþÿ',
itertools.chain('AAAAAA', ['AE'], 'CEEEEIIIIDNOOOOOOUUUUYP', ['ss'],
'aaaaaa', ['ae'], 'ceeeeiiiionoooooouuuuypy')))
ACCENT_CHARS = dict(zip('ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØŒÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõöøœùúûüýþÿ',
itertools.chain('AAAAAA', ['AE'], 'CEEEEIIIIDNOOOOOO', ['OE'], 'UUUUYP', ['ss'],
'aaaaaa', ['ae'], 'ceeeeiiiionoooooo', ['oe'], 'uuuuypy')))
def preferredencoding():
@ -882,11 +883,17 @@ def make_socks_conn_class(base_class, socks_proxy):
elif url_components.scheme.lower() == 'socks4a':
socks_type = ProxyType.SOCKS4A
def unquote_if_non_empty(s):
if not s:
return s
return compat_urllib_parse_unquote_plus(s)
proxy_args = (
socks_type,
url_components.hostname, url_components.port or 1080,
True, # Remote DNS
url_components.username, url_components.password
unquote_if_non_empty(url_components.username),
unquote_if_non_empty(url_components.password),
)
class SocksConnection(base_class):
@ -1912,24 +1919,38 @@ def js_to_json(code):
v = m.group(0)
if v in ('true', 'false', 'null'):
return v
if v.startswith('"'):
v = re.sub(r"\\'", "'", v[1:-1])
elif v.startswith("'"):
v = v[1:-1]
v = re.sub(r"\\\\|\\'|\"", lambda m: {
'\\\\': '\\\\',
"\\'": "'",
elif v.startswith('/*') or v == ',':
return ""
if v[0] in ("'", '"'):
v = re.sub(r'(?s)\\.|"', lambda m: {
'"': '\\"',
}[m.group(0)], v)
"\\'": "'",
'\\\n': '',
'\\x': '\\u00',
}.get(m.group(0), m.group(0)), v[1:-1])
INTEGER_TABLE = (
(r'^0[xX][0-9a-fA-F]+', 16),
(r'^0+[0-7]+', 8),
)
for regex, base in INTEGER_TABLE:
im = re.match(regex, v)
if im:
i = int(im.group(0), base)
return '"%d":' % i if v.endswith(':') else '%d' % i
return '"%s"' % v
res = re.sub(r'''(?x)
"(?:[^"\\]*(?:\\\\|\\['"nu]))*[^"\\]*"|
'(?:[^'\\]*(?:\\\\|\\['"nu]))*[^'\\]*'|
[a-zA-Z_][.a-zA-Z_0-9]*
return re.sub(r'''(?sx)
"(?:[^"\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^"\\]*"|
'(?:[^'\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^'\\]*'|
/\*.*?\*/|,(?=\s*[\]}])|
[a-zA-Z_][.a-zA-Z_0-9]*|
(?:0[xX][0-9a-fA-F]+|0+[0-7]+)(?:\s*:)?|
[0-9]+(?=\s*:)
''', fix_kv, code)
res = re.sub(r',(\s*[\]}])', lambda m: m.group(1), res)
return res
def qualities(quality_ids):
@ -1977,7 +1998,7 @@ def ytdl_is_updateable():
def args_to_str(args):
# Get a short string representation for a subprocess command
return ' '.join(shlex_quote(a) for a in args)
return ' '.join(compat_shlex_quote(a) for a in args)
def error_to_compat_str(err):
@ -2015,11 +2036,7 @@ def mimetype2ext(mt):
def urlhandle_detect_ext(url_handle):
try:
url_handle.headers
getheader = lambda h: url_handle.headers[h]
except AttributeError: # Python < 3
getheader = url_handle.info().getheader
getheader = url_handle.headers.get
cd = getheader('Content-Disposition')
if cd:

View File

@ -1,3 +1,3 @@
from __future__ import unicode_literals
__version__ = '2016.05.10'
__version__ = '2016.05.16'