diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index b8806e7b3..002c1274a 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.06.12*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.06.12** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.06.18*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.06.18** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2017.06.12 +[debug] youtube-dl version 2017.06.18 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index 5cd0b3393..7f077fd24 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,10 +1,31 @@ version +Extractors +* [pandora.tv] Fix upload_date extraction (#12846) + + +version 2017.06.18 + Core +* [downloader/common] Use utils.shell_quote for debug command line +* [utils] Use compat_shlex_quote in shell_quote +* [postprocessor/execafterdownload] Encode command line (#13407) +* [compat] Fix compat_shlex_quote on Windows (#5889, #10254) +* [postprocessor/metadatafromtitle] Fix missing optional meta fields processing + in --metadata-from-title (#13408) * [extractor/common] Fix json dumping with --geo-bypass ++ [extractor/common] Improve jwplayer subtitles extraction ++ [extractor/common] Improve jwplayer formats extraction (#13379) Extractors +* [polskieradio] Fix extraction (#13392) ++ [xfileshare] Add support for fastvideo.me (#13385) * [bilibili] Fix extraction of videos with double quotes in titles (#13387) +* [4tube] Fix extraction (#13381, #13382) ++ [disney] Add support for disneychannel.de (#13383) +* [npo] Improve URL regular expression (#13376) ++ [corus] Add support for showcase.ca ++ [corus] Add support for history.ca (#13359) version 2017.06.12 diff --git a/Makefile b/Makefile index 023556391..84ccce2b3 100644 --- a/Makefile +++ b/Makefile @@ -101,7 +101,7 @@ youtube-dl.tar.gz: youtube-dl README.md README.txt youtube-dl.1 youtube-dl.bash- --exclude '*.pyc' \ --exclude '*.pyo' \ --exclude '*~' \ - --exclude '__pycache' \ + --exclude '__pycache__' \ --exclude '.git' \ --exclude 'testdata' \ --exclude 'docs/_build' \ diff --git a/devscripts/prepare_manpage.py b/devscripts/prepare_manpage.py index f9fe63f1f..76bf873e1 100644 --- a/devscripts/prepare_manpage.py +++ b/devscripts/prepare_manpage.py @@ -8,7 +8,7 @@ import re ROOT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) README_FILE = os.path.join(ROOT_DIR, 'README.md') -PREFIX = '''%YOUTUBE-DL(1) +PREFIX = r'''%YOUTUBE-DL(1) # NAME diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 7189f31d9..0f21be0a2 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -975,7 +975,7 @@ - **WSJArticle** - **XBef** - **XboxClips** - - **XFileShare**: XFileShare based sites: DaClips, FileHoot, GorillaVid, MovPod, PowerWatch, Rapidvideo.ws, TheVideoBee, Vidto, Streamin.To, XVIDSTAGE, Vid ABC, VidBom, vidlo, RapidVideo.TV + - **XFileShare**: XFileShare based sites: DaClips, FileHoot, GorillaVid, MovPod, PowerWatch, Rapidvideo.ws, TheVideoBee, Vidto, Streamin.To, XVIDSTAGE, Vid ABC, VidBom, vidlo, RapidVideo.TV, FastVideo.me - **XHamster** - **XHamsterEmbed** - **xiami:album**: 虾米音乐 - 专辑 diff --git a/youtube_dl/compat.py b/youtube_dl/compat.py index 7ef327451..9e4e13bcf 100644 --- a/youtube_dl/compat.py +++ b/youtube_dl/compat.py @@ -2617,14 +2617,22 @@ except ImportError: # Python 2 parsed_result[name] = [value] return parsed_result -try: - from shlex import quote as compat_shlex_quote -except ImportError: # Python < 3.3 + +compat_os_name = os._name if os.name == 'java' else os.name + + +if compat_os_name == 'nt': def compat_shlex_quote(s): - if re.match(r'^[-_\w./]+$', s): - return s - else: - return "'" + s.replace("'", "'\"'\"'") + "'" + return s if re.match(r'^[-_\w./]+$', s) else '"%s"' % s.replace('"', '\\"') +else: + try: + from shlex import quote as compat_shlex_quote + except ImportError: # Python < 3.3 + def compat_shlex_quote(s): + if re.match(r'^[-_\w./]+$', s): + return s + else: + return "'" + s.replace("'", "'\"'\"'") + "'" try: @@ -2649,9 +2657,6 @@ def compat_ord(c): return ord(c) -compat_os_name = os._name if os.name == 'java' else os.name - - if sys.version_info >= (3, 0): compat_getenv = os.getenv compat_expanduser = os.path.expanduser diff --git a/youtube_dl/downloader/common.py b/youtube_dl/downloader/common.py index 5d6621147..77242dacc 100644 --- a/youtube_dl/downloader/common.py +++ b/youtube_dl/downloader/common.py @@ -8,10 +8,11 @@ import random from ..compat import compat_os_name from ..utils import ( + decodeArgument, encodeFilename, error_to_compat_str, - decodeArgument, format_bytes, + shell_quote, timeconvert, ) @@ -381,10 +382,5 @@ class FileDownloader(object): if exe is None: exe = os.path.basename(str_args[0]) - try: - import pipes - shell_quote = lambda args: ' '.join(map(pipes.quote, str_args)) - except ImportError: - shell_quote = repr self.to_screen('[debug] %s command line: %s' % ( exe, shell_quote(str_args))) diff --git a/youtube_dl/extractor/abcotvs.py b/youtube_dl/extractor/abcotvs.py index 76e98132b..03b92a39c 100644 --- a/youtube_dl/extractor/abcotvs.py +++ b/youtube_dl/extractor/abcotvs.py @@ -22,7 +22,7 @@ class ABCOTVSIE(InfoExtractor): 'display_id': 'east-bay-museum-celebrates-vintage-synthesizers', 'ext': 'mp4', 'title': 'East Bay museum celebrates vintage synthesizers', - 'description': 'md5:a4f10fb2f2a02565c1749d4adbab4b10', + 'description': 'md5:24ed2bd527096ec2a5c67b9d5a9005f3', 'thumbnail': r're:^https?://.*\.jpg$', 'timestamp': 1421123075, 'upload_date': '20150113', diff --git a/youtube_dl/extractor/adobepass.py b/youtube_dl/extractor/adobepass.py index d57ad85c2..3dfc632e7 100644 --- a/youtube_dl/extractor/adobepass.py +++ b/youtube_dl/extractor/adobepass.py @@ -6,7 +6,10 @@ import time import xml.etree.ElementTree as etree from .common import InfoExtractor -from ..compat import compat_urlparse +from ..compat import ( + compat_kwargs, + compat_urlparse, +) from ..utils import ( unescapeHTML, urlencode_postdata, @@ -1317,7 +1320,8 @@ class AdobePassIE(InfoExtractor): headers = kwargs.get('headers', {}) headers.update(self.geo_verification_headers()) kwargs['headers'] = headers - return super(AdobePassIE, self)._download_webpage_handle(*args, **kwargs) + return super(AdobePassIE, self)._download_webpage_handle( + *args, **compat_kwargs(kwargs)) @staticmethod def _get_mvpd_resource(provider_id, title, guid, rating): diff --git a/youtube_dl/extractor/asiancrush.py b/youtube_dl/extractor/asiancrush.py new file mode 100644 index 000000000..594c88c9c --- /dev/null +++ b/youtube_dl/extractor/asiancrush.py @@ -0,0 +1,93 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from .kaltura import KalturaIE +from ..utils import ( + extract_attributes, + remove_end, + urlencode_postdata, +) + + +class AsianCrushIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?asiancrush\.com/video/(?:[^/]+/)?0+(?P\d+)v\b' + _TESTS = [{ + 'url': 'https://www.asiancrush.com/video/012869v/women-who-flirt/', + 'md5': 'c3b740e48d0ba002a42c0b72857beae6', + 'info_dict': { + 'id': '1_y4tmjm5r', + 'ext': 'mp4', + 'title': 'Women Who Flirt', + 'description': 'md5:3db14e9186197857e7063522cb89a805', + 'timestamp': 1496936429, + 'upload_date': '20170608', + 'uploader_id': 'craig@crifkin.com', + }, + }, { + 'url': 'https://www.asiancrush.com/video/she-was-pretty/011886v-pretty-episode-3/', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + + data = self._download_json( + 'https://www.asiancrush.com/wp-admin/admin-ajax.php', video_id, + data=urlencode_postdata({ + 'postid': video_id, + 'action': 'get_channel_kaltura_vars', + })) + + entry_id = data['entry_id'] + + return self.url_result( + 'kaltura:%s:%s' % (data['partner_id'], entry_id), + ie=KalturaIE.ie_key(), video_id=entry_id, + video_title=data.get('vid_label')) + + +class AsianCrushPlaylistIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?asiancrush\.com/series/0+(?P\d+)s\b' + _TEST = { + 'url': 'https://www.asiancrush.com/series/012481s/scholar-walks-night/', + 'info_dict': { + 'id': '12481', + 'title': 'Scholar Who Walks the Night', + 'description': 'md5:7addd7c5132a09fd4741152d96cce886', + }, + 'playlist_count': 20, + } + + def _real_extract(self, url): + playlist_id = self._match_id(url) + + webpage = self._download_webpage(url, playlist_id) + + entries = [] + + for mobj in re.finditer( + r']+href=(["\'])(?P%s.*?)\1[^>]*>' % AsianCrushIE._VALID_URL, + webpage): + attrs = extract_attributes(mobj.group(0)) + if attrs.get('class') == 'clearfix': + entries.append(self.url_result( + mobj.group('url'), ie=AsianCrushIE.ie_key())) + + title = remove_end( + self._html_search_regex( + r'(?s)]\bid=["\']movieTitle[^>]+>(.+?)', webpage, + 'title', default=None) or self._og_search_title( + webpage, default=None) or self._html_search_meta( + 'twitter:title', webpage, 'title', + default=None) or self._search_regex( + r'([^<]+)', webpage, 'title', fatal=False), + ' | AsianCrush') + + description = self._og_search_description( + webpage, default=None) or self._html_search_meta( + 'twitter:description', webpage, 'description', fatal=False) + + return self.playlist_result(entries, playlist_id, title, description) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 7e45232dd..e97691daa 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -71,6 +71,10 @@ from .arte import ( TheOperaPlatformIE, ArteTVPlaylistIE, ) +from .asiancrush import ( + AsianCrushIE, + AsianCrushPlaylistIE, +) from .atresplayer import AtresPlayerIE from .atttechchannel import ATTTechChannelIE from .atvat import ATVAtIE @@ -871,6 +875,7 @@ from .rutube import ( ) from .rutv import RUTVIE from .ruutu import RuutuIE +from .ruv import RuvIE from .sandia import SandiaIE from .safari import ( SafariIE, diff --git a/youtube_dl/extractor/googledrive.py b/youtube_dl/extractor/googledrive.py index fec36cbbb..9705cfadd 100644 --- a/youtube_dl/extractor/googledrive.py +++ b/youtube_dl/extractor/googledrive.py @@ -69,19 +69,32 @@ class GoogleDriveIE(InfoExtractor): r'"fmt_stream_map"\s*,\s*"([^"]+)', webpage, 'fmt stream map').split(',') fmt_list = self._search_regex(r'"fmt_list"\s*,\s*"([^"]+)', webpage, 'fmt_list').split(',') + resolutions = {} + for fmt in fmt_list: + mobj = re.search( + r'^(?P\d+)/(?P\d+)[xX](?P\d+)', fmt) + if mobj: + resolutions[mobj.group('format_id')] = ( + int(mobj.group('width')), int(mobj.group('height'))) + formats = [] - for fmt, fmt_stream in zip(fmt_list, fmt_stream_map): - fmt_id, fmt_url = fmt_stream.split('|') - resolution = fmt.split('/')[1] - width, height = resolution.split('x') - formats.append({ - 'url': lowercase_escape(fmt_url), - 'format_id': fmt_id, - 'resolution': resolution, - 'width': int_or_none(width), - 'height': int_or_none(height), - 'ext': self._FORMATS_EXT[fmt_id], - }) + for fmt_stream in fmt_stream_map: + fmt_stream_split = fmt_stream.split('|') + if len(fmt_stream_split) < 2: + continue + format_id, format_url = fmt_stream_split[:2] + f = { + 'url': lowercase_escape(format_url), + 'format_id': format_id, + 'ext': self._FORMATS_EXT[format_id], + } + resolution = resolutions.get(format_id) + if resolution: + f.update({ + 'width': resolution[0], + 'height': resolution[0], + }) + formats.append(f) self._sort_formats(formats) return { diff --git a/youtube_dl/extractor/pandoratv.py b/youtube_dl/extractor/pandoratv.py index 89c95fffb..fc7bd3411 100644 --- a/youtube_dl/extractor/pandoratv.py +++ b/youtube_dl/extractor/pandoratv.py @@ -19,7 +19,7 @@ class PandoraTVIE(InfoExtractor): IE_NAME = 'pandora.tv' IE_DESC = '판도라TV' _VALID_URL = r'https?://(?:.+?\.)?channel\.pandora\.tv/channel/video\.ptv\?' - _TEST = { + _TESTS = [{ 'url': 'http://jp.channel.pandora.tv/channel/video.ptv?c1=&prgid=53294230&ch_userid=mikakim&ref=main&lot=cate_01_2', 'info_dict': { 'id': '53294230', @@ -34,7 +34,26 @@ class PandoraTVIE(InfoExtractor): 'view_count': int, 'like_count': int, } - } + }, { + 'url': 'http://channel.pandora.tv/channel/video.ptv?ch_userid=gogoucc&prgid=54721744', + 'info_dict': { + 'id': '54721744', + 'ext': 'flv', + 'title': '[HD] JAPAN COUNTDOWN 170423', + 'description': '[HD] JAPAN COUNTDOWN 170423', + 'thumbnail': r're:^https?://.*\.jpg$', + 'duration': 1704.9, + 'upload_date': '20170423', + 'uploader': 'GOGO_UCC', + 'uploader_id': 'gogoucc', + 'view_count': int, + 'like_count': int, + }, + 'params': { + # Test metadata only + 'skip_download': True, + }, + }] def _real_extract(self, url): qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query) @@ -86,7 +105,7 @@ class PandoraTVIE(InfoExtractor): 'description': info.get('body'), 'thumbnail': info.get('thumbnail') or info.get('poster'), 'duration': float_or_none(info.get('runtime'), 1000) or parse_duration(info.get('time')), - 'upload_date': info['fid'][:8] if isinstance(info.get('fid'), compat_str) else None, + 'upload_date': info['fid'].split('/')[-1][:8] if isinstance(info.get('fid'), compat_str) else None, 'uploader': info.get('nickname'), 'uploader_id': info.get('upload_userid'), 'view_count': str_to_int(info.get('hit')), diff --git a/youtube_dl/extractor/ruv.py b/youtube_dl/extractor/ruv.py new file mode 100644 index 000000000..8f3cc4095 --- /dev/null +++ b/youtube_dl/extractor/ruv.py @@ -0,0 +1,101 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( + determine_ext, + unified_timestamp, +) + + +class RuvIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?ruv\.is/(?:sarpurinn/[^/]+|node)/(?P[^/]+(?:/\d+)?)' + _TESTS = [{ + # m3u8 + 'url': 'http://ruv.is/sarpurinn/ruv-aukaras/fh-valur/20170516', + 'md5': '66347652f4e13e71936817102acc1724', + 'info_dict': { + 'id': '1144499', + 'display_id': 'fh-valur/20170516', + 'ext': 'mp4', + 'title': 'FH - Valur', + 'description': 'Bein útsending frá 3. leik FH og Vals í úrslitum Olísdeildar karla í handbolta.', + 'timestamp': 1494963600, + 'upload_date': '20170516', + }, + }, { + # mp3 + 'url': 'http://ruv.is/sarpurinn/ras-2/morgunutvarpid/20170619', + 'md5': '395ea250c8a13e5fdb39d4670ef85378', + 'info_dict': { + 'id': '1153630', + 'display_id': 'morgunutvarpid/20170619', + 'ext': 'mp3', + 'title': 'Morgunútvarpið', + 'description': 'md5:a4cf1202c0a1645ca096b06525915418', + 'timestamp': 1497855000, + 'upload_date': '20170619', + }, + }, { + 'url': 'http://ruv.is/sarpurinn/ruv/frettir/20170614', + 'only_matching': True, + }, { + 'url': 'http://www.ruv.is/node/1151854', + 'only_matching': True, + }, { + 'url': 'http://ruv.is/sarpurinn/klippa/secret-soltice-hefst-a-morgun', + 'only_matching': True, + }, { + 'url': 'http://ruv.is/sarpurinn/ras-1/morgunvaktin/20170619', + 'only_matching': True, + }] + + def _real_extract(self, url): + display_id = self._match_id(url) + + webpage = self._download_webpage(url, display_id) + + title = self._og_search_title(webpage) + + FIELD_RE = r'video\.%s\s*=\s*(["\'])(?P(?:(?!\1).)+)\1' + + media_url = self._html_search_regex( + FIELD_RE % 'src', webpage, 'video URL', group='url') + + video_id = self._search_regex( + r']+\bhref=["\']https?://www\.ruv\.is/node/(\d+)', + webpage, 'video id', default=display_id) + + ext = determine_ext(media_url) + + if ext == 'm3u8': + formats = self._extract_m3u8_formats( + media_url, video_id, 'mp4', entry_protocol='m3u8_native', + m3u8_id='hls') + elif ext == 'mp3': + formats = [{ + 'format_id': 'mp3', + 'url': media_url, + 'vcodec': 'none', + }] + else: + formats = [{ + 'url': media_url, + }] + + description = self._og_search_description(webpage, default=None) + thumbnail = self._og_search_thumbnail( + webpage, default=None) or self._search_regex( + FIELD_RE % 'poster', webpage, 'thumbnail', fatal=False) + timestamp = unified_timestamp(self._html_search_meta( + 'article:published_time', webpage, 'timestamp', fatal=False)) + + return { + 'id': video_id, + 'display_id': display_id, + 'title': title, + 'description': description, + 'thumbnail': thumbnail, + 'timestamp': timestamp, + 'formats': formats, + } diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py index a483c8409..c3f71b45e 100644 --- a/youtube_dl/extractor/vimeo.py +++ b/youtube_dl/extractor/vimeo.py @@ -615,7 +615,10 @@ class VimeoIE(VimeoBaseInfoExtractor): if download_url and not source_file.get('is_cold') and not source_file.get('is_defrosting'): source_name = source_file.get('public_name', 'Original') if self._is_valid_url(download_url, video_id, '%s video' % source_name): - ext = source_file.get('extension', determine_ext(download_url)).lower() + ext = (try_get( + source_file, lambda x: x['extension'], + compat_str) or determine_ext( + download_url, None) or 'mp4').lower() formats.append({ 'url': download_url, 'ext': ext, diff --git a/youtube_dl/extractor/viu.py b/youtube_dl/extractor/viu.py index db6a65d2e..5cf93591c 100644 --- a/youtube_dl/extractor/viu.py +++ b/youtube_dl/extractor/viu.py @@ -4,7 +4,10 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..compat import compat_str +from ..compat import ( + compat_kwargs, + compat_str, +) from ..utils import ( ExtractorError, int_or_none, @@ -36,7 +39,8 @@ class ViuBaseIE(InfoExtractor): headers.update(kwargs.get('headers', {})) kwargs['headers'] = headers response = self._download_json( - 'https://www.viu.com/api/' + path, *args, **kwargs)['response'] + 'https://www.viu.com/api/' + path, *args, + **compat_kwargs(kwargs))['response'] if response.get('status') != 'success': raise ExtractorError('%s said: %s' % ( self.IE_NAME, response['message']), expected=True) diff --git a/youtube_dl/extractor/watchindianporn.py b/youtube_dl/extractor/watchindianporn.py index ed099beea..fadc539ee 100644 --- a/youtube_dl/extractor/watchindianporn.py +++ b/youtube_dl/extractor/watchindianporn.py @@ -4,11 +4,7 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..utils import ( - unified_strdate, - parse_duration, - int_or_none, -) +from ..utils import parse_duration class WatchIndianPornIE(InfoExtractor): @@ -23,11 +19,8 @@ class WatchIndianPornIE(InfoExtractor): 'ext': 'mp4', 'title': 'Hot milf from kerala shows off her gorgeous large breasts on camera', 'thumbnail': r're:^https?://.*\.jpg$', - 'uploader': 'LoveJay', - 'upload_date': '20160428', 'duration': 226, 'view_count': int, - 'comment_count': int, 'categories': list, 'age_limit': 18, } @@ -40,51 +33,36 @@ class WatchIndianPornIE(InfoExtractor): webpage = self._download_webpage(url, display_id) - video_url = self._html_search_regex( - r"url: escape\('([^']+)'\)", webpage, 'url') + info_dict = self._parse_html5_media_entries(url, webpage, video_id)[0] - title = self._html_search_regex( - r'

(.*?)', - webpage, 'title') - thumbnail = self._html_search_regex( - r'\s*(.*?)', - webpage, 'uploader') - upload_date = unified_strdate(self._html_search_regex( - r'Added: (.+?)', webpage, 'upload date', fatal=False)) + title = self._html_search_regex(( + r'(.+?)\s*-\s*Indian\s+Porn', + r'

(.+?)

' + ), webpage, 'title') duration = parse_duration(self._search_regex( - r'Time:\s*\s*\s*(.+?)\s*', + r'Time:\s*\s*(.+?)\s*', webpage, 'duration', fatal=False)) - view_count = int_or_none(self._search_regex( - r'Views:\s*\s*\s*(\d+)\s*', + view_count = int(self._search_regex( + r'(?s)Time:\s*.*?.*?\s*(\d+)\s*', webpage, 'view count', fatal=False)) - comment_count = int_or_none(self._search_regex( - r'Comments:\s*\s*\s*(\d+)\s*', - webpage, 'comment count', fatal=False)) categories = re.findall( - r'([^<]+)', + r']+class=[\'"]categories[\'"][^>]*>\s*([^<]+)\s*', webpage) - return { + info_dict.update({ 'id': video_id, 'display_id': display_id, - 'url': video_url, 'http_headers': { 'Referer': url, }, 'title': title, - 'thumbnail': thumbnail, - 'uploader': uploader, - 'upload_date': upload_date, 'duration': duration, 'view_count': view_count, - 'comment_count': comment_count, 'categories': categories, 'age_limit': 18, - } + }) + + return info_dict diff --git a/youtube_dl/postprocessor/execafterdownload.py b/youtube_dl/postprocessor/execafterdownload.py index 90630c2d7..64dabe790 100644 --- a/youtube_dl/postprocessor/execafterdownload.py +++ b/youtube_dl/postprocessor/execafterdownload.py @@ -4,7 +4,10 @@ import subprocess from .common import PostProcessor from ..compat import compat_shlex_quote -from ..utils import PostProcessingError +from ..utils import ( + encodeArgument, + PostProcessingError, +) class ExecAfterDownloadPP(PostProcessor): @@ -20,7 +23,7 @@ class ExecAfterDownloadPP(PostProcessor): cmd = cmd.replace('{}', compat_shlex_quote(information['filepath'])) self._downloader.to_screen('[exec] Executing command: %s' % cmd) - retCode = subprocess.call(cmd, shell=True) + retCode = subprocess.call(encodeArgument(cmd), shell=True) if retCode != 0: raise PostProcessingError( 'Command returned error code %d' % retCode) diff --git a/youtube_dl/postprocessor/metadatafromtitle.py b/youtube_dl/postprocessor/metadatafromtitle.py index c73f02447..f5c14d974 100644 --- a/youtube_dl/postprocessor/metadatafromtitle.py +++ b/youtube_dl/postprocessor/metadatafromtitle.py @@ -35,11 +35,14 @@ class MetadataFromTitlePP(PostProcessor): title = info['title'] match = re.match(self._titleregex, title) if match is None: - self._downloader.to_screen('[fromtitle] Could not interpret title of video as "%s"' % self._titleformat) + self._downloader.to_screen( + '[fromtitle] Could not interpret title of video as "%s"' + % self._titleformat) return [], info for attribute, value in match.groupdict().items(): - value = match.group(attribute) info[attribute] = value - self._downloader.to_screen('[fromtitle] parsed ' + attribute + ': ' + value) + self._downloader.to_screen( + '[fromtitle] parsed %s: %s' + % (attribute, value if value is not None else 'NA')) return [], info diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 1973bd483..39860e9d1 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -22,7 +22,6 @@ import locale import math import operator import os -import pipes import platform import random import re @@ -1535,7 +1534,7 @@ def shell_quote(args): if isinstance(a, bytes): # We may get a filename encoded with 'encodeFilename' a = a.decode(encoding) - quoted_args.append(pipes.quote(a)) + quoted_args.append(compat_shlex_quote(a)) return ' '.join(quoted_args) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index a7386c3a8..8782a6a1e 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2017.06.12' +__version__ = '2017.06.18'