Merge branch 'master' into DrTuber-issue-12058

This commit is contained in:
Parmjit Virk 2017-06-20 16:55:21 -05:00
commit 894861d1b1
21 changed files with 334 additions and 87 deletions

View File

@ -6,8 +6,8 @@
---
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.06.12*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.06.12**
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.06.18*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.06.18**
### Before submitting an *issue* make sure you have:
- [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
@ -35,7 +35,7 @@ $ youtube-dl -v <your command line>
[debug] User config: []
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
[debug] youtube-dl version 2017.06.12
[debug] youtube-dl version 2017.06.18
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
[debug] Proxy map: {}

View File

@ -1,10 +1,31 @@
version <unreleased>
Extractors
* [pandora.tv] Fix upload_date extraction (#12846)
version 2017.06.18
Core
* [downloader/common] Use utils.shell_quote for debug command line
* [utils] Use compat_shlex_quote in shell_quote
* [postprocessor/execafterdownload] Encode command line (#13407)
* [compat] Fix compat_shlex_quote on Windows (#5889, #10254)
* [postprocessor/metadatafromtitle] Fix missing optional meta fields processing
in --metadata-from-title (#13408)
* [extractor/common] Fix json dumping with --geo-bypass
+ [extractor/common] Improve jwplayer subtitles extraction
+ [extractor/common] Improve jwplayer formats extraction (#13379)
Extractors
* [polskieradio] Fix extraction (#13392)
+ [xfileshare] Add support for fastvideo.me (#13385)
* [bilibili] Fix extraction of videos with double quotes in titles (#13387)
* [4tube] Fix extraction (#13381, #13382)
+ [disney] Add support for disneychannel.de (#13383)
* [npo] Improve URL regular expression (#13376)
+ [corus] Add support for showcase.ca
+ [corus] Add support for history.ca (#13359)
version 2017.06.12

View File

@ -101,7 +101,7 @@ youtube-dl.tar.gz: youtube-dl README.md README.txt youtube-dl.1 youtube-dl.bash-
--exclude '*.pyc' \
--exclude '*.pyo' \
--exclude '*~' \
--exclude '__pycache' \
--exclude '__pycache__' \
--exclude '.git' \
--exclude 'testdata' \
--exclude 'docs/_build' \

View File

@ -8,7 +8,7 @@ import re
ROOT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
README_FILE = os.path.join(ROOT_DIR, 'README.md')
PREFIX = '''%YOUTUBE-DL(1)
PREFIX = r'''%YOUTUBE-DL(1)
# NAME

View File

@ -975,7 +975,7 @@
- **WSJArticle**
- **XBef**
- **XboxClips**
- **XFileShare**: XFileShare based sites: DaClips, FileHoot, GorillaVid, MovPod, PowerWatch, Rapidvideo.ws, TheVideoBee, Vidto, Streamin.To, XVIDSTAGE, Vid ABC, VidBom, vidlo, RapidVideo.TV
- **XFileShare**: XFileShare based sites: DaClips, FileHoot, GorillaVid, MovPod, PowerWatch, Rapidvideo.ws, TheVideoBee, Vidto, Streamin.To, XVIDSTAGE, Vid ABC, VidBom, vidlo, RapidVideo.TV, FastVideo.me
- **XHamster**
- **XHamsterEmbed**
- **xiami:album**: 虾米音乐 - 专辑

View File

@ -2617,14 +2617,22 @@ except ImportError: # Python 2
parsed_result[name] = [value]
return parsed_result
try:
from shlex import quote as compat_shlex_quote
except ImportError: # Python < 3.3
compat_os_name = os._name if os.name == 'java' else os.name
if compat_os_name == 'nt':
def compat_shlex_quote(s):
if re.match(r'^[-_\w./]+$', s):
return s
else:
return "'" + s.replace("'", "'\"'\"'") + "'"
return s if re.match(r'^[-_\w./]+$', s) else '"%s"' % s.replace('"', '\\"')
else:
try:
from shlex import quote as compat_shlex_quote
except ImportError: # Python < 3.3
def compat_shlex_quote(s):
if re.match(r'^[-_\w./]+$', s):
return s
else:
return "'" + s.replace("'", "'\"'\"'") + "'"
try:
@ -2649,9 +2657,6 @@ def compat_ord(c):
return ord(c)
compat_os_name = os._name if os.name == 'java' else os.name
if sys.version_info >= (3, 0):
compat_getenv = os.getenv
compat_expanduser = os.path.expanduser

View File

@ -8,10 +8,11 @@ import random
from ..compat import compat_os_name
from ..utils import (
decodeArgument,
encodeFilename,
error_to_compat_str,
decodeArgument,
format_bytes,
shell_quote,
timeconvert,
)
@ -381,10 +382,5 @@ class FileDownloader(object):
if exe is None:
exe = os.path.basename(str_args[0])
try:
import pipes
shell_quote = lambda args: ' '.join(map(pipes.quote, str_args))
except ImportError:
shell_quote = repr
self.to_screen('[debug] %s command line: %s' % (
exe, shell_quote(str_args)))

View File

@ -22,7 +22,7 @@ class ABCOTVSIE(InfoExtractor):
'display_id': 'east-bay-museum-celebrates-vintage-synthesizers',
'ext': 'mp4',
'title': 'East Bay museum celebrates vintage synthesizers',
'description': 'md5:a4f10fb2f2a02565c1749d4adbab4b10',
'description': 'md5:24ed2bd527096ec2a5c67b9d5a9005f3',
'thumbnail': r're:^https?://.*\.jpg$',
'timestamp': 1421123075,
'upload_date': '20150113',

View File

@ -6,7 +6,10 @@ import time
import xml.etree.ElementTree as etree
from .common import InfoExtractor
from ..compat import compat_urlparse
from ..compat import (
compat_kwargs,
compat_urlparse,
)
from ..utils import (
unescapeHTML,
urlencode_postdata,
@ -1317,7 +1320,8 @@ class AdobePassIE(InfoExtractor):
headers = kwargs.get('headers', {})
headers.update(self.geo_verification_headers())
kwargs['headers'] = headers
return super(AdobePassIE, self)._download_webpage_handle(*args, **kwargs)
return super(AdobePassIE, self)._download_webpage_handle(
*args, **compat_kwargs(kwargs))
@staticmethod
def _get_mvpd_resource(provider_id, title, guid, rating):

View File

@ -0,0 +1,93 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from .kaltura import KalturaIE
from ..utils import (
extract_attributes,
remove_end,
urlencode_postdata,
)
class AsianCrushIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?asiancrush\.com/video/(?:[^/]+/)?0+(?P<id>\d+)v\b'
_TESTS = [{
'url': 'https://www.asiancrush.com/video/012869v/women-who-flirt/',
'md5': 'c3b740e48d0ba002a42c0b72857beae6',
'info_dict': {
'id': '1_y4tmjm5r',
'ext': 'mp4',
'title': 'Women Who Flirt',
'description': 'md5:3db14e9186197857e7063522cb89a805',
'timestamp': 1496936429,
'upload_date': '20170608',
'uploader_id': 'craig@crifkin.com',
},
}, {
'url': 'https://www.asiancrush.com/video/she-was-pretty/011886v-pretty-episode-3/',
'only_matching': True,
}]
def _real_extract(self, url):
video_id = self._match_id(url)
data = self._download_json(
'https://www.asiancrush.com/wp-admin/admin-ajax.php', video_id,
data=urlencode_postdata({
'postid': video_id,
'action': 'get_channel_kaltura_vars',
}))
entry_id = data['entry_id']
return self.url_result(
'kaltura:%s:%s' % (data['partner_id'], entry_id),
ie=KalturaIE.ie_key(), video_id=entry_id,
video_title=data.get('vid_label'))
class AsianCrushPlaylistIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?asiancrush\.com/series/0+(?P<id>\d+)s\b'
_TEST = {
'url': 'https://www.asiancrush.com/series/012481s/scholar-walks-night/',
'info_dict': {
'id': '12481',
'title': 'Scholar Who Walks the Night',
'description': 'md5:7addd7c5132a09fd4741152d96cce886',
},
'playlist_count': 20,
}
def _real_extract(self, url):
playlist_id = self._match_id(url)
webpage = self._download_webpage(url, playlist_id)
entries = []
for mobj in re.finditer(
r'<a[^>]+href=(["\'])(?P<url>%s.*?)\1[^>]*>' % AsianCrushIE._VALID_URL,
webpage):
attrs = extract_attributes(mobj.group(0))
if attrs.get('class') == 'clearfix':
entries.append(self.url_result(
mobj.group('url'), ie=AsianCrushIE.ie_key()))
title = remove_end(
self._html_search_regex(
r'(?s)<h1\b[^>]\bid=["\']movieTitle[^>]+>(.+?)</h1>', webpage,
'title', default=None) or self._og_search_title(
webpage, default=None) or self._html_search_meta(
'twitter:title', webpage, 'title',
default=None) or self._search_regex(
r'<title>([^<]+)</title>', webpage, 'title', fatal=False),
' | AsianCrush')
description = self._og_search_description(
webpage, default=None) or self._html_search_meta(
'twitter:description', webpage, 'description', fatal=False)
return self.playlist_result(entries, playlist_id, title, description)

View File

@ -71,6 +71,10 @@ from .arte import (
TheOperaPlatformIE,
ArteTVPlaylistIE,
)
from .asiancrush import (
AsianCrushIE,
AsianCrushPlaylistIE,
)
from .atresplayer import AtresPlayerIE
from .atttechchannel import ATTTechChannelIE
from .atvat import ATVAtIE
@ -871,6 +875,7 @@ from .rutube import (
)
from .rutv import RUTVIE
from .ruutu import RuutuIE
from .ruv import RuvIE
from .sandia import SandiaIE
from .safari import (
SafariIE,

View File

@ -69,19 +69,32 @@ class GoogleDriveIE(InfoExtractor):
r'"fmt_stream_map"\s*,\s*"([^"]+)', webpage, 'fmt stream map').split(',')
fmt_list = self._search_regex(r'"fmt_list"\s*,\s*"([^"]+)', webpage, 'fmt_list').split(',')
resolutions = {}
for fmt in fmt_list:
mobj = re.search(
r'^(?P<format_id>\d+)/(?P<width>\d+)[xX](?P<height>\d+)', fmt)
if mobj:
resolutions[mobj.group('format_id')] = (
int(mobj.group('width')), int(mobj.group('height')))
formats = []
for fmt, fmt_stream in zip(fmt_list, fmt_stream_map):
fmt_id, fmt_url = fmt_stream.split('|')
resolution = fmt.split('/')[1]
width, height = resolution.split('x')
formats.append({
'url': lowercase_escape(fmt_url),
'format_id': fmt_id,
'resolution': resolution,
'width': int_or_none(width),
'height': int_or_none(height),
'ext': self._FORMATS_EXT[fmt_id],
})
for fmt_stream in fmt_stream_map:
fmt_stream_split = fmt_stream.split('|')
if len(fmt_stream_split) < 2:
continue
format_id, format_url = fmt_stream_split[:2]
f = {
'url': lowercase_escape(format_url),
'format_id': format_id,
'ext': self._FORMATS_EXT[format_id],
}
resolution = resolutions.get(format_id)
if resolution:
f.update({
'width': resolution[0],
'height': resolution[0],
})
formats.append(f)
self._sort_formats(formats)
return {

View File

@ -19,7 +19,7 @@ class PandoraTVIE(InfoExtractor):
IE_NAME = 'pandora.tv'
IE_DESC = '판도라TV'
_VALID_URL = r'https?://(?:.+?\.)?channel\.pandora\.tv/channel/video\.ptv\?'
_TEST = {
_TESTS = [{
'url': 'http://jp.channel.pandora.tv/channel/video.ptv?c1=&prgid=53294230&ch_userid=mikakim&ref=main&lot=cate_01_2',
'info_dict': {
'id': '53294230',
@ -34,7 +34,26 @@ class PandoraTVIE(InfoExtractor):
'view_count': int,
'like_count': int,
}
}
}, {
'url': 'http://channel.pandora.tv/channel/video.ptv?ch_userid=gogoucc&prgid=54721744',
'info_dict': {
'id': '54721744',
'ext': 'flv',
'title': '[HD] JAPAN COUNTDOWN 170423',
'description': '[HD] JAPAN COUNTDOWN 170423',
'thumbnail': r're:^https?://.*\.jpg$',
'duration': 1704.9,
'upload_date': '20170423',
'uploader': 'GOGO_UCC',
'uploader_id': 'gogoucc',
'view_count': int,
'like_count': int,
},
'params': {
# Test metadata only
'skip_download': True,
},
}]
def _real_extract(self, url):
qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
@ -86,7 +105,7 @@ class PandoraTVIE(InfoExtractor):
'description': info.get('body'),
'thumbnail': info.get('thumbnail') or info.get('poster'),
'duration': float_or_none(info.get('runtime'), 1000) or parse_duration(info.get('time')),
'upload_date': info['fid'][:8] if isinstance(info.get('fid'), compat_str) else None,
'upload_date': info['fid'].split('/')[-1][:8] if isinstance(info.get('fid'), compat_str) else None,
'uploader': info.get('nickname'),
'uploader_id': info.get('upload_userid'),
'view_count': str_to_int(info.get('hit')),

101
youtube_dl/extractor/ruv.py Normal file
View File

@ -0,0 +1,101 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
determine_ext,
unified_timestamp,
)
class RuvIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?ruv\.is/(?:sarpurinn/[^/]+|node)/(?P<id>[^/]+(?:/\d+)?)'
_TESTS = [{
# m3u8
'url': 'http://ruv.is/sarpurinn/ruv-aukaras/fh-valur/20170516',
'md5': '66347652f4e13e71936817102acc1724',
'info_dict': {
'id': '1144499',
'display_id': 'fh-valur/20170516',
'ext': 'mp4',
'title': 'FH - Valur',
'description': 'Bein útsending frá 3. leik FH og Vals í úrslitum Olísdeildar karla í handbolta.',
'timestamp': 1494963600,
'upload_date': '20170516',
},
}, {
# mp3
'url': 'http://ruv.is/sarpurinn/ras-2/morgunutvarpid/20170619',
'md5': '395ea250c8a13e5fdb39d4670ef85378',
'info_dict': {
'id': '1153630',
'display_id': 'morgunutvarpid/20170619',
'ext': 'mp3',
'title': 'Morgunútvarpið',
'description': 'md5:a4cf1202c0a1645ca096b06525915418',
'timestamp': 1497855000,
'upload_date': '20170619',
},
}, {
'url': 'http://ruv.is/sarpurinn/ruv/frettir/20170614',
'only_matching': True,
}, {
'url': 'http://www.ruv.is/node/1151854',
'only_matching': True,
}, {
'url': 'http://ruv.is/sarpurinn/klippa/secret-soltice-hefst-a-morgun',
'only_matching': True,
}, {
'url': 'http://ruv.is/sarpurinn/ras-1/morgunvaktin/20170619',
'only_matching': True,
}]
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
title = self._og_search_title(webpage)
FIELD_RE = r'video\.%s\s*=\s*(["\'])(?P<url>(?:(?!\1).)+)\1'
media_url = self._html_search_regex(
FIELD_RE % 'src', webpage, 'video URL', group='url')
video_id = self._search_regex(
r'<link\b[^>]+\bhref=["\']https?://www\.ruv\.is/node/(\d+)',
webpage, 'video id', default=display_id)
ext = determine_ext(media_url)
if ext == 'm3u8':
formats = self._extract_m3u8_formats(
media_url, video_id, 'mp4', entry_protocol='m3u8_native',
m3u8_id='hls')
elif ext == 'mp3':
formats = [{
'format_id': 'mp3',
'url': media_url,
'vcodec': 'none',
}]
else:
formats = [{
'url': media_url,
}]
description = self._og_search_description(webpage, default=None)
thumbnail = self._og_search_thumbnail(
webpage, default=None) or self._search_regex(
FIELD_RE % 'poster', webpage, 'thumbnail', fatal=False)
timestamp = unified_timestamp(self._html_search_meta(
'article:published_time', webpage, 'timestamp', fatal=False))
return {
'id': video_id,
'display_id': display_id,
'title': title,
'description': description,
'thumbnail': thumbnail,
'timestamp': timestamp,
'formats': formats,
}

View File

@ -615,7 +615,10 @@ class VimeoIE(VimeoBaseInfoExtractor):
if download_url and not source_file.get('is_cold') and not source_file.get('is_defrosting'):
source_name = source_file.get('public_name', 'Original')
if self._is_valid_url(download_url, video_id, '%s video' % source_name):
ext = source_file.get('extension', determine_ext(download_url)).lower()
ext = (try_get(
source_file, lambda x: x['extension'],
compat_str) or determine_ext(
download_url, None) or 'mp4').lower()
formats.append({
'url': download_url,
'ext': ext,

View File

@ -4,7 +4,10 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..compat import compat_str
from ..compat import (
compat_kwargs,
compat_str,
)
from ..utils import (
ExtractorError,
int_or_none,
@ -36,7 +39,8 @@ class ViuBaseIE(InfoExtractor):
headers.update(kwargs.get('headers', {}))
kwargs['headers'] = headers
response = self._download_json(
'https://www.viu.com/api/' + path, *args, **kwargs)['response']
'https://www.viu.com/api/' + path, *args,
**compat_kwargs(kwargs))['response']
if response.get('status') != 'success':
raise ExtractorError('%s said: %s' % (
self.IE_NAME, response['message']), expected=True)

View File

@ -4,11 +4,7 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
unified_strdate,
parse_duration,
int_or_none,
)
from ..utils import parse_duration
class WatchIndianPornIE(InfoExtractor):
@ -23,11 +19,8 @@ class WatchIndianPornIE(InfoExtractor):
'ext': 'mp4',
'title': 'Hot milf from kerala shows off her gorgeous large breasts on camera',
'thumbnail': r're:^https?://.*\.jpg$',
'uploader': 'LoveJay',
'upload_date': '20160428',
'duration': 226,
'view_count': int,
'comment_count': int,
'categories': list,
'age_limit': 18,
}
@ -40,51 +33,36 @@ class WatchIndianPornIE(InfoExtractor):
webpage = self._download_webpage(url, display_id)
video_url = self._html_search_regex(
r"url: escape\('([^']+)'\)", webpage, 'url')
info_dict = self._parse_html5_media_entries(url, webpage, video_id)[0]
title = self._html_search_regex(
r'<h2 class="he2"><span>(.*?)</span>',
webpage, 'title')
thumbnail = self._html_search_regex(
r'<span id="container"><img\s+src="([^"]+)"',
webpage, 'thumbnail', fatal=False)
uploader = self._html_search_regex(
r'class="aupa">\s*(.*?)</a>',
webpage, 'uploader')
upload_date = unified_strdate(self._html_search_regex(
r'Added: <strong>(.+?)</strong>', webpage, 'upload date', fatal=False))
title = self._html_search_regex((
r'<title>(.+?)\s*-\s*Indian\s+Porn</title>',
r'<h4>(.+?)</h4>'
), webpage, 'title')
duration = parse_duration(self._search_regex(
r'<td>Time:\s*</td>\s*<td align="right"><span>\s*(.+?)\s*</span>',
r'Time:\s*<strong>\s*(.+?)\s*</strong>',
webpage, 'duration', fatal=False))
view_count = int_or_none(self._search_regex(
r'<td>Views:\s*</td>\s*<td align="right"><span>\s*(\d+)\s*</span>',
view_count = int(self._search_regex(
r'(?s)Time:\s*<strong>.*?</strong>.*?<strong>\s*(\d+)\s*</strong>',
webpage, 'view count', fatal=False))
comment_count = int_or_none(self._search_regex(
r'<td>Comments:\s*</td>\s*<td align="right"><span>\s*(\d+)\s*</span>',
webpage, 'comment count', fatal=False))
categories = re.findall(
r'<a href="[^"]+/search/video/desi"><span>([^<]+)</span></a>',
r'<a[^>]+class=[\'"]categories[\'"][^>]*>\s*([^<]+)\s*</a>',
webpage)
return {
info_dict.update({
'id': video_id,
'display_id': display_id,
'url': video_url,
'http_headers': {
'Referer': url,
},
'title': title,
'thumbnail': thumbnail,
'uploader': uploader,
'upload_date': upload_date,
'duration': duration,
'view_count': view_count,
'comment_count': comment_count,
'categories': categories,
'age_limit': 18,
}
})
return info_dict

View File

@ -4,7 +4,10 @@ import subprocess
from .common import PostProcessor
from ..compat import compat_shlex_quote
from ..utils import PostProcessingError
from ..utils import (
encodeArgument,
PostProcessingError,
)
class ExecAfterDownloadPP(PostProcessor):
@ -20,7 +23,7 @@ class ExecAfterDownloadPP(PostProcessor):
cmd = cmd.replace('{}', compat_shlex_quote(information['filepath']))
self._downloader.to_screen('[exec] Executing command: %s' % cmd)
retCode = subprocess.call(cmd, shell=True)
retCode = subprocess.call(encodeArgument(cmd), shell=True)
if retCode != 0:
raise PostProcessingError(
'Command returned error code %d' % retCode)

View File

@ -35,11 +35,14 @@ class MetadataFromTitlePP(PostProcessor):
title = info['title']
match = re.match(self._titleregex, title)
if match is None:
self._downloader.to_screen('[fromtitle] Could not interpret title of video as "%s"' % self._titleformat)
self._downloader.to_screen(
'[fromtitle] Could not interpret title of video as "%s"'
% self._titleformat)
return [], info
for attribute, value in match.groupdict().items():
value = match.group(attribute)
info[attribute] = value
self._downloader.to_screen('[fromtitle] parsed ' + attribute + ': ' + value)
self._downloader.to_screen(
'[fromtitle] parsed %s: %s'
% (attribute, value if value is not None else 'NA'))
return [], info

View File

@ -22,7 +22,6 @@ import locale
import math
import operator
import os
import pipes
import platform
import random
import re
@ -1535,7 +1534,7 @@ def shell_quote(args):
if isinstance(a, bytes):
# We may get a filename encoded with 'encodeFilename'
a = a.decode(encoding)
quoted_args.append(pipes.quote(a))
quoted_args.append(compat_shlex_quote(a))
return ' '.join(quoted_args)

View File

@ -1,3 +1,3 @@
from __future__ import unicode_literals
__version__ = '2017.06.12'
__version__ = '2017.06.18'