Merge branch 'master' into DrTuber-issue-12058

This commit is contained in:
Parmjit Virk 2017-06-20 16:55:21 -05:00
commit 894861d1b1
21 changed files with 334 additions and 87 deletions

View File

@ -6,8 +6,8 @@
--- ---
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.06.12*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. ### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.06.18*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.06.12** - [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.06.18**
### Before submitting an *issue* make sure you have: ### Before submitting an *issue* make sure you have:
- [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
@ -35,7 +35,7 @@ $ youtube-dl -v <your command line>
[debug] User config: [] [debug] User config: []
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
[debug] youtube-dl version 2017.06.12 [debug] youtube-dl version 2017.06.18
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
[debug] Proxy map: {} [debug] Proxy map: {}

View File

@ -1,10 +1,31 @@
version <unreleased> version <unreleased>
Extractors
* [pandora.tv] Fix upload_date extraction (#12846)
version 2017.06.18
Core Core
* [downloader/common] Use utils.shell_quote for debug command line
* [utils] Use compat_shlex_quote in shell_quote
* [postprocessor/execafterdownload] Encode command line (#13407)
* [compat] Fix compat_shlex_quote on Windows (#5889, #10254)
* [postprocessor/metadatafromtitle] Fix missing optional meta fields processing
in --metadata-from-title (#13408)
* [extractor/common] Fix json dumping with --geo-bypass * [extractor/common] Fix json dumping with --geo-bypass
+ [extractor/common] Improve jwplayer subtitles extraction
+ [extractor/common] Improve jwplayer formats extraction (#13379)
Extractors Extractors
* [polskieradio] Fix extraction (#13392)
+ [xfileshare] Add support for fastvideo.me (#13385)
* [bilibili] Fix extraction of videos with double quotes in titles (#13387) * [bilibili] Fix extraction of videos with double quotes in titles (#13387)
* [4tube] Fix extraction (#13381, #13382)
+ [disney] Add support for disneychannel.de (#13383)
* [npo] Improve URL regular expression (#13376)
+ [corus] Add support for showcase.ca
+ [corus] Add support for history.ca (#13359)
version 2017.06.12 version 2017.06.12

View File

@ -101,7 +101,7 @@ youtube-dl.tar.gz: youtube-dl README.md README.txt youtube-dl.1 youtube-dl.bash-
--exclude '*.pyc' \ --exclude '*.pyc' \
--exclude '*.pyo' \ --exclude '*.pyo' \
--exclude '*~' \ --exclude '*~' \
--exclude '__pycache' \ --exclude '__pycache__' \
--exclude '.git' \ --exclude '.git' \
--exclude 'testdata' \ --exclude 'testdata' \
--exclude 'docs/_build' \ --exclude 'docs/_build' \

View File

@ -8,7 +8,7 @@ import re
ROOT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) ROOT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
README_FILE = os.path.join(ROOT_DIR, 'README.md') README_FILE = os.path.join(ROOT_DIR, 'README.md')
PREFIX = '''%YOUTUBE-DL(1) PREFIX = r'''%YOUTUBE-DL(1)
# NAME # NAME

View File

@ -975,7 +975,7 @@
- **WSJArticle** - **WSJArticle**
- **XBef** - **XBef**
- **XboxClips** - **XboxClips**
- **XFileShare**: XFileShare based sites: DaClips, FileHoot, GorillaVid, MovPod, PowerWatch, Rapidvideo.ws, TheVideoBee, Vidto, Streamin.To, XVIDSTAGE, Vid ABC, VidBom, vidlo, RapidVideo.TV - **XFileShare**: XFileShare based sites: DaClips, FileHoot, GorillaVid, MovPod, PowerWatch, Rapidvideo.ws, TheVideoBee, Vidto, Streamin.To, XVIDSTAGE, Vid ABC, VidBom, vidlo, RapidVideo.TV, FastVideo.me
- **XHamster** - **XHamster**
- **XHamsterEmbed** - **XHamsterEmbed**
- **xiami:album**: 虾米音乐 - 专辑 - **xiami:album**: 虾米音乐 - 专辑

View File

@ -2617,14 +2617,22 @@ except ImportError: # Python 2
parsed_result[name] = [value] parsed_result[name] = [value]
return parsed_result return parsed_result
try:
from shlex import quote as compat_shlex_quote compat_os_name = os._name if os.name == 'java' else os.name
except ImportError: # Python < 3.3
if compat_os_name == 'nt':
def compat_shlex_quote(s): def compat_shlex_quote(s):
if re.match(r'^[-_\w./]+$', s): return s if re.match(r'^[-_\w./]+$', s) else '"%s"' % s.replace('"', '\\"')
return s else:
else: try:
return "'" + s.replace("'", "'\"'\"'") + "'" from shlex import quote as compat_shlex_quote
except ImportError: # Python < 3.3
def compat_shlex_quote(s):
if re.match(r'^[-_\w./]+$', s):
return s
else:
return "'" + s.replace("'", "'\"'\"'") + "'"
try: try:
@ -2649,9 +2657,6 @@ def compat_ord(c):
return ord(c) return ord(c)
compat_os_name = os._name if os.name == 'java' else os.name
if sys.version_info >= (3, 0): if sys.version_info >= (3, 0):
compat_getenv = os.getenv compat_getenv = os.getenv
compat_expanduser = os.path.expanduser compat_expanduser = os.path.expanduser

View File

@ -8,10 +8,11 @@ import random
from ..compat import compat_os_name from ..compat import compat_os_name
from ..utils import ( from ..utils import (
decodeArgument,
encodeFilename, encodeFilename,
error_to_compat_str, error_to_compat_str,
decodeArgument,
format_bytes, format_bytes,
shell_quote,
timeconvert, timeconvert,
) )
@ -381,10 +382,5 @@ class FileDownloader(object):
if exe is None: if exe is None:
exe = os.path.basename(str_args[0]) exe = os.path.basename(str_args[0])
try:
import pipes
shell_quote = lambda args: ' '.join(map(pipes.quote, str_args))
except ImportError:
shell_quote = repr
self.to_screen('[debug] %s command line: %s' % ( self.to_screen('[debug] %s command line: %s' % (
exe, shell_quote(str_args))) exe, shell_quote(str_args)))

View File

@ -22,7 +22,7 @@ class ABCOTVSIE(InfoExtractor):
'display_id': 'east-bay-museum-celebrates-vintage-synthesizers', 'display_id': 'east-bay-museum-celebrates-vintage-synthesizers',
'ext': 'mp4', 'ext': 'mp4',
'title': 'East Bay museum celebrates vintage synthesizers', 'title': 'East Bay museum celebrates vintage synthesizers',
'description': 'md5:a4f10fb2f2a02565c1749d4adbab4b10', 'description': 'md5:24ed2bd527096ec2a5c67b9d5a9005f3',
'thumbnail': r're:^https?://.*\.jpg$', 'thumbnail': r're:^https?://.*\.jpg$',
'timestamp': 1421123075, 'timestamp': 1421123075,
'upload_date': '20150113', 'upload_date': '20150113',

View File

@ -6,7 +6,10 @@ import time
import xml.etree.ElementTree as etree import xml.etree.ElementTree as etree
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_urlparse from ..compat import (
compat_kwargs,
compat_urlparse,
)
from ..utils import ( from ..utils import (
unescapeHTML, unescapeHTML,
urlencode_postdata, urlencode_postdata,
@ -1317,7 +1320,8 @@ class AdobePassIE(InfoExtractor):
headers = kwargs.get('headers', {}) headers = kwargs.get('headers', {})
headers.update(self.geo_verification_headers()) headers.update(self.geo_verification_headers())
kwargs['headers'] = headers kwargs['headers'] = headers
return super(AdobePassIE, self)._download_webpage_handle(*args, **kwargs) return super(AdobePassIE, self)._download_webpage_handle(
*args, **compat_kwargs(kwargs))
@staticmethod @staticmethod
def _get_mvpd_resource(provider_id, title, guid, rating): def _get_mvpd_resource(provider_id, title, guid, rating):

View File

@ -0,0 +1,93 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from .kaltura import KalturaIE
from ..utils import (
extract_attributes,
remove_end,
urlencode_postdata,
)
class AsianCrushIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?asiancrush\.com/video/(?:[^/]+/)?0+(?P<id>\d+)v\b'
_TESTS = [{
'url': 'https://www.asiancrush.com/video/012869v/women-who-flirt/',
'md5': 'c3b740e48d0ba002a42c0b72857beae6',
'info_dict': {
'id': '1_y4tmjm5r',
'ext': 'mp4',
'title': 'Women Who Flirt',
'description': 'md5:3db14e9186197857e7063522cb89a805',
'timestamp': 1496936429,
'upload_date': '20170608',
'uploader_id': 'craig@crifkin.com',
},
}, {
'url': 'https://www.asiancrush.com/video/she-was-pretty/011886v-pretty-episode-3/',
'only_matching': True,
}]
def _real_extract(self, url):
video_id = self._match_id(url)
data = self._download_json(
'https://www.asiancrush.com/wp-admin/admin-ajax.php', video_id,
data=urlencode_postdata({
'postid': video_id,
'action': 'get_channel_kaltura_vars',
}))
entry_id = data['entry_id']
return self.url_result(
'kaltura:%s:%s' % (data['partner_id'], entry_id),
ie=KalturaIE.ie_key(), video_id=entry_id,
video_title=data.get('vid_label'))
class AsianCrushPlaylistIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?asiancrush\.com/series/0+(?P<id>\d+)s\b'
_TEST = {
'url': 'https://www.asiancrush.com/series/012481s/scholar-walks-night/',
'info_dict': {
'id': '12481',
'title': 'Scholar Who Walks the Night',
'description': 'md5:7addd7c5132a09fd4741152d96cce886',
},
'playlist_count': 20,
}
def _real_extract(self, url):
playlist_id = self._match_id(url)
webpage = self._download_webpage(url, playlist_id)
entries = []
for mobj in re.finditer(
r'<a[^>]+href=(["\'])(?P<url>%s.*?)\1[^>]*>' % AsianCrushIE._VALID_URL,
webpage):
attrs = extract_attributes(mobj.group(0))
if attrs.get('class') == 'clearfix':
entries.append(self.url_result(
mobj.group('url'), ie=AsianCrushIE.ie_key()))
title = remove_end(
self._html_search_regex(
r'(?s)<h1\b[^>]\bid=["\']movieTitle[^>]+>(.+?)</h1>', webpage,
'title', default=None) or self._og_search_title(
webpage, default=None) or self._html_search_meta(
'twitter:title', webpage, 'title',
default=None) or self._search_regex(
r'<title>([^<]+)</title>', webpage, 'title', fatal=False),
' | AsianCrush')
description = self._og_search_description(
webpage, default=None) or self._html_search_meta(
'twitter:description', webpage, 'description', fatal=False)
return self.playlist_result(entries, playlist_id, title, description)

View File

@ -71,6 +71,10 @@ from .arte import (
TheOperaPlatformIE, TheOperaPlatformIE,
ArteTVPlaylistIE, ArteTVPlaylistIE,
) )
from .asiancrush import (
AsianCrushIE,
AsianCrushPlaylistIE,
)
from .atresplayer import AtresPlayerIE from .atresplayer import AtresPlayerIE
from .atttechchannel import ATTTechChannelIE from .atttechchannel import ATTTechChannelIE
from .atvat import ATVAtIE from .atvat import ATVAtIE
@ -871,6 +875,7 @@ from .rutube import (
) )
from .rutv import RUTVIE from .rutv import RUTVIE
from .ruutu import RuutuIE from .ruutu import RuutuIE
from .ruv import RuvIE
from .sandia import SandiaIE from .sandia import SandiaIE
from .safari import ( from .safari import (
SafariIE, SafariIE,

View File

@ -69,19 +69,32 @@ class GoogleDriveIE(InfoExtractor):
r'"fmt_stream_map"\s*,\s*"([^"]+)', webpage, 'fmt stream map').split(',') r'"fmt_stream_map"\s*,\s*"([^"]+)', webpage, 'fmt stream map').split(',')
fmt_list = self._search_regex(r'"fmt_list"\s*,\s*"([^"]+)', webpage, 'fmt_list').split(',') fmt_list = self._search_regex(r'"fmt_list"\s*,\s*"([^"]+)', webpage, 'fmt_list').split(',')
resolutions = {}
for fmt in fmt_list:
mobj = re.search(
r'^(?P<format_id>\d+)/(?P<width>\d+)[xX](?P<height>\d+)', fmt)
if mobj:
resolutions[mobj.group('format_id')] = (
int(mobj.group('width')), int(mobj.group('height')))
formats = [] formats = []
for fmt, fmt_stream in zip(fmt_list, fmt_stream_map): for fmt_stream in fmt_stream_map:
fmt_id, fmt_url = fmt_stream.split('|') fmt_stream_split = fmt_stream.split('|')
resolution = fmt.split('/')[1] if len(fmt_stream_split) < 2:
width, height = resolution.split('x') continue
formats.append({ format_id, format_url = fmt_stream_split[:2]
'url': lowercase_escape(fmt_url), f = {
'format_id': fmt_id, 'url': lowercase_escape(format_url),
'resolution': resolution, 'format_id': format_id,
'width': int_or_none(width), 'ext': self._FORMATS_EXT[format_id],
'height': int_or_none(height), }
'ext': self._FORMATS_EXT[fmt_id], resolution = resolutions.get(format_id)
}) if resolution:
f.update({
'width': resolution[0],
'height': resolution[0],
})
formats.append(f)
self._sort_formats(formats) self._sort_formats(formats)
return { return {

View File

@ -19,7 +19,7 @@ class PandoraTVIE(InfoExtractor):
IE_NAME = 'pandora.tv' IE_NAME = 'pandora.tv'
IE_DESC = '판도라TV' IE_DESC = '판도라TV'
_VALID_URL = r'https?://(?:.+?\.)?channel\.pandora\.tv/channel/video\.ptv\?' _VALID_URL = r'https?://(?:.+?\.)?channel\.pandora\.tv/channel/video\.ptv\?'
_TEST = { _TESTS = [{
'url': 'http://jp.channel.pandora.tv/channel/video.ptv?c1=&prgid=53294230&ch_userid=mikakim&ref=main&lot=cate_01_2', 'url': 'http://jp.channel.pandora.tv/channel/video.ptv?c1=&prgid=53294230&ch_userid=mikakim&ref=main&lot=cate_01_2',
'info_dict': { 'info_dict': {
'id': '53294230', 'id': '53294230',
@ -34,7 +34,26 @@ class PandoraTVIE(InfoExtractor):
'view_count': int, 'view_count': int,
'like_count': int, 'like_count': int,
} }
} }, {
'url': 'http://channel.pandora.tv/channel/video.ptv?ch_userid=gogoucc&prgid=54721744',
'info_dict': {
'id': '54721744',
'ext': 'flv',
'title': '[HD] JAPAN COUNTDOWN 170423',
'description': '[HD] JAPAN COUNTDOWN 170423',
'thumbnail': r're:^https?://.*\.jpg$',
'duration': 1704.9,
'upload_date': '20170423',
'uploader': 'GOGO_UCC',
'uploader_id': 'gogoucc',
'view_count': int,
'like_count': int,
},
'params': {
# Test metadata only
'skip_download': True,
},
}]
def _real_extract(self, url): def _real_extract(self, url):
qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query) qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
@ -86,7 +105,7 @@ class PandoraTVIE(InfoExtractor):
'description': info.get('body'), 'description': info.get('body'),
'thumbnail': info.get('thumbnail') or info.get('poster'), 'thumbnail': info.get('thumbnail') or info.get('poster'),
'duration': float_or_none(info.get('runtime'), 1000) or parse_duration(info.get('time')), 'duration': float_or_none(info.get('runtime'), 1000) or parse_duration(info.get('time')),
'upload_date': info['fid'][:8] if isinstance(info.get('fid'), compat_str) else None, 'upload_date': info['fid'].split('/')[-1][:8] if isinstance(info.get('fid'), compat_str) else None,
'uploader': info.get('nickname'), 'uploader': info.get('nickname'),
'uploader_id': info.get('upload_userid'), 'uploader_id': info.get('upload_userid'),
'view_count': str_to_int(info.get('hit')), 'view_count': str_to_int(info.get('hit')),

101
youtube_dl/extractor/ruv.py Normal file
View File

@ -0,0 +1,101 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
determine_ext,
unified_timestamp,
)
class RuvIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?ruv\.is/(?:sarpurinn/[^/]+|node)/(?P<id>[^/]+(?:/\d+)?)'
_TESTS = [{
# m3u8
'url': 'http://ruv.is/sarpurinn/ruv-aukaras/fh-valur/20170516',
'md5': '66347652f4e13e71936817102acc1724',
'info_dict': {
'id': '1144499',
'display_id': 'fh-valur/20170516',
'ext': 'mp4',
'title': 'FH - Valur',
'description': 'Bein útsending frá 3. leik FH og Vals í úrslitum Olísdeildar karla í handbolta.',
'timestamp': 1494963600,
'upload_date': '20170516',
},
}, {
# mp3
'url': 'http://ruv.is/sarpurinn/ras-2/morgunutvarpid/20170619',
'md5': '395ea250c8a13e5fdb39d4670ef85378',
'info_dict': {
'id': '1153630',
'display_id': 'morgunutvarpid/20170619',
'ext': 'mp3',
'title': 'Morgunútvarpið',
'description': 'md5:a4cf1202c0a1645ca096b06525915418',
'timestamp': 1497855000,
'upload_date': '20170619',
},
}, {
'url': 'http://ruv.is/sarpurinn/ruv/frettir/20170614',
'only_matching': True,
}, {
'url': 'http://www.ruv.is/node/1151854',
'only_matching': True,
}, {
'url': 'http://ruv.is/sarpurinn/klippa/secret-soltice-hefst-a-morgun',
'only_matching': True,
}, {
'url': 'http://ruv.is/sarpurinn/ras-1/morgunvaktin/20170619',
'only_matching': True,
}]
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
title = self._og_search_title(webpage)
FIELD_RE = r'video\.%s\s*=\s*(["\'])(?P<url>(?:(?!\1).)+)\1'
media_url = self._html_search_regex(
FIELD_RE % 'src', webpage, 'video URL', group='url')
video_id = self._search_regex(
r'<link\b[^>]+\bhref=["\']https?://www\.ruv\.is/node/(\d+)',
webpage, 'video id', default=display_id)
ext = determine_ext(media_url)
if ext == 'm3u8':
formats = self._extract_m3u8_formats(
media_url, video_id, 'mp4', entry_protocol='m3u8_native',
m3u8_id='hls')
elif ext == 'mp3':
formats = [{
'format_id': 'mp3',
'url': media_url,
'vcodec': 'none',
}]
else:
formats = [{
'url': media_url,
}]
description = self._og_search_description(webpage, default=None)
thumbnail = self._og_search_thumbnail(
webpage, default=None) or self._search_regex(
FIELD_RE % 'poster', webpage, 'thumbnail', fatal=False)
timestamp = unified_timestamp(self._html_search_meta(
'article:published_time', webpage, 'timestamp', fatal=False))
return {
'id': video_id,
'display_id': display_id,
'title': title,
'description': description,
'thumbnail': thumbnail,
'timestamp': timestamp,
'formats': formats,
}

View File

@ -615,7 +615,10 @@ class VimeoIE(VimeoBaseInfoExtractor):
if download_url and not source_file.get('is_cold') and not source_file.get('is_defrosting'): if download_url and not source_file.get('is_cold') and not source_file.get('is_defrosting'):
source_name = source_file.get('public_name', 'Original') source_name = source_file.get('public_name', 'Original')
if self._is_valid_url(download_url, video_id, '%s video' % source_name): if self._is_valid_url(download_url, video_id, '%s video' % source_name):
ext = source_file.get('extension', determine_ext(download_url)).lower() ext = (try_get(
source_file, lambda x: x['extension'],
compat_str) or determine_ext(
download_url, None) or 'mp4').lower()
formats.append({ formats.append({
'url': download_url, 'url': download_url,
'ext': ext, 'ext': ext,

View File

@ -4,7 +4,10 @@ from __future__ import unicode_literals
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_str from ..compat import (
compat_kwargs,
compat_str,
)
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
int_or_none, int_or_none,
@ -36,7 +39,8 @@ class ViuBaseIE(InfoExtractor):
headers.update(kwargs.get('headers', {})) headers.update(kwargs.get('headers', {}))
kwargs['headers'] = headers kwargs['headers'] = headers
response = self._download_json( response = self._download_json(
'https://www.viu.com/api/' + path, *args, **kwargs)['response'] 'https://www.viu.com/api/' + path, *args,
**compat_kwargs(kwargs))['response']
if response.get('status') != 'success': if response.get('status') != 'success':
raise ExtractorError('%s said: %s' % ( raise ExtractorError('%s said: %s' % (
self.IE_NAME, response['message']), expected=True) self.IE_NAME, response['message']), expected=True)

View File

@ -4,11 +4,7 @@ from __future__ import unicode_literals
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import parse_duration
unified_strdate,
parse_duration,
int_or_none,
)
class WatchIndianPornIE(InfoExtractor): class WatchIndianPornIE(InfoExtractor):
@ -23,11 +19,8 @@ class WatchIndianPornIE(InfoExtractor):
'ext': 'mp4', 'ext': 'mp4',
'title': 'Hot milf from kerala shows off her gorgeous large breasts on camera', 'title': 'Hot milf from kerala shows off her gorgeous large breasts on camera',
'thumbnail': r're:^https?://.*\.jpg$', 'thumbnail': r're:^https?://.*\.jpg$',
'uploader': 'LoveJay',
'upload_date': '20160428',
'duration': 226, 'duration': 226,
'view_count': int, 'view_count': int,
'comment_count': int,
'categories': list, 'categories': list,
'age_limit': 18, 'age_limit': 18,
} }
@ -40,51 +33,36 @@ class WatchIndianPornIE(InfoExtractor):
webpage = self._download_webpage(url, display_id) webpage = self._download_webpage(url, display_id)
video_url = self._html_search_regex( info_dict = self._parse_html5_media_entries(url, webpage, video_id)[0]
r"url: escape\('([^']+)'\)", webpage, 'url')
title = self._html_search_regex( title = self._html_search_regex((
r'<h2 class="he2"><span>(.*?)</span>', r'<title>(.+?)\s*-\s*Indian\s+Porn</title>',
webpage, 'title') r'<h4>(.+?)</h4>'
thumbnail = self._html_search_regex( ), webpage, 'title')
r'<span id="container"><img\s+src="([^"]+)"',
webpage, 'thumbnail', fatal=False)
uploader = self._html_search_regex(
r'class="aupa">\s*(.*?)</a>',
webpage, 'uploader')
upload_date = unified_strdate(self._html_search_regex(
r'Added: <strong>(.+?)</strong>', webpage, 'upload date', fatal=False))
duration = parse_duration(self._search_regex( duration = parse_duration(self._search_regex(
r'<td>Time:\s*</td>\s*<td align="right"><span>\s*(.+?)\s*</span>', r'Time:\s*<strong>\s*(.+?)\s*</strong>',
webpage, 'duration', fatal=False)) webpage, 'duration', fatal=False))
view_count = int_or_none(self._search_regex( view_count = int(self._search_regex(
r'<td>Views:\s*</td>\s*<td align="right"><span>\s*(\d+)\s*</span>', r'(?s)Time:\s*<strong>.*?</strong>.*?<strong>\s*(\d+)\s*</strong>',
webpage, 'view count', fatal=False)) webpage, 'view count', fatal=False))
comment_count = int_or_none(self._search_regex(
r'<td>Comments:\s*</td>\s*<td align="right"><span>\s*(\d+)\s*</span>',
webpage, 'comment count', fatal=False))
categories = re.findall( categories = re.findall(
r'<a href="[^"]+/search/video/desi"><span>([^<]+)</span></a>', r'<a[^>]+class=[\'"]categories[\'"][^>]*>\s*([^<]+)\s*</a>',
webpage) webpage)
return { info_dict.update({
'id': video_id, 'id': video_id,
'display_id': display_id, 'display_id': display_id,
'url': video_url,
'http_headers': { 'http_headers': {
'Referer': url, 'Referer': url,
}, },
'title': title, 'title': title,
'thumbnail': thumbnail,
'uploader': uploader,
'upload_date': upload_date,
'duration': duration, 'duration': duration,
'view_count': view_count, 'view_count': view_count,
'comment_count': comment_count,
'categories': categories, 'categories': categories,
'age_limit': 18, 'age_limit': 18,
} })
return info_dict

View File

@ -4,7 +4,10 @@ import subprocess
from .common import PostProcessor from .common import PostProcessor
from ..compat import compat_shlex_quote from ..compat import compat_shlex_quote
from ..utils import PostProcessingError from ..utils import (
encodeArgument,
PostProcessingError,
)
class ExecAfterDownloadPP(PostProcessor): class ExecAfterDownloadPP(PostProcessor):
@ -20,7 +23,7 @@ class ExecAfterDownloadPP(PostProcessor):
cmd = cmd.replace('{}', compat_shlex_quote(information['filepath'])) cmd = cmd.replace('{}', compat_shlex_quote(information['filepath']))
self._downloader.to_screen('[exec] Executing command: %s' % cmd) self._downloader.to_screen('[exec] Executing command: %s' % cmd)
retCode = subprocess.call(cmd, shell=True) retCode = subprocess.call(encodeArgument(cmd), shell=True)
if retCode != 0: if retCode != 0:
raise PostProcessingError( raise PostProcessingError(
'Command returned error code %d' % retCode) 'Command returned error code %d' % retCode)

View File

@ -35,11 +35,14 @@ class MetadataFromTitlePP(PostProcessor):
title = info['title'] title = info['title']
match = re.match(self._titleregex, title) match = re.match(self._titleregex, title)
if match is None: if match is None:
self._downloader.to_screen('[fromtitle] Could not interpret title of video as "%s"' % self._titleformat) self._downloader.to_screen(
'[fromtitle] Could not interpret title of video as "%s"'
% self._titleformat)
return [], info return [], info
for attribute, value in match.groupdict().items(): for attribute, value in match.groupdict().items():
value = match.group(attribute)
info[attribute] = value info[attribute] = value
self._downloader.to_screen('[fromtitle] parsed ' + attribute + ': ' + value) self._downloader.to_screen(
'[fromtitle] parsed %s: %s'
% (attribute, value if value is not None else 'NA'))
return [], info return [], info

View File

@ -22,7 +22,6 @@ import locale
import math import math
import operator import operator
import os import os
import pipes
import platform import platform
import random import random
import re import re
@ -1535,7 +1534,7 @@ def shell_quote(args):
if isinstance(a, bytes): if isinstance(a, bytes):
# We may get a filename encoded with 'encodeFilename' # We may get a filename encoded with 'encodeFilename'
a = a.decode(encoding) a = a.decode(encoding)
quoted_args.append(pipes.quote(a)) quoted_args.append(compat_shlex_quote(a))
return ' '.join(quoted_args) return ' '.join(quoted_args)

View File

@ -1,3 +1,3 @@
from __future__ import unicode_literals from __future__ import unicode_literals
__version__ = '2017.06.12' __version__ = '2017.06.18'