From 4f90d2aeacccab6dc461e92236480701c4a0900b Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sat, 17 Jun 2017 17:09:24 +0800 Subject: [PATCH 01/19] [Makefile] Excluding __pycache__ correctly (#13400) --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 023556391..84ccce2b3 100644 --- a/Makefile +++ b/Makefile @@ -101,7 +101,7 @@ youtube-dl.tar.gz: youtube-dl README.md README.txt youtube-dl.1 youtube-dl.bash- --exclude '*.pyc' \ --exclude '*.pyo' \ --exclude '*~' \ - --exclude '__pycache' \ + --exclude '__pycache__' \ --exclude '.git' \ --exclude 'testdata' \ --exclude 'docs/_build' \ From 473e87064b7b60ea68147401a2e6487b715f25c8 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sat, 17 Jun 2017 17:37:25 +0800 Subject: [PATCH 02/19] [devscripts/prepare_manpage] Fix deprecated escape sequence on py36 --- devscripts/prepare_manpage.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/devscripts/prepare_manpage.py b/devscripts/prepare_manpage.py index f9fe63f1f..76bf873e1 100644 --- a/devscripts/prepare_manpage.py +++ b/devscripts/prepare_manpage.py @@ -8,7 +8,7 @@ import re ROOT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) README_FILE = os.path.join(ROOT_DIR, 'README.md') -PREFIX = '''%YOUTUBE-DL(1) +PREFIX = r'''%YOUTUBE-DL(1) # NAME From be80986ed9175af0a0fb216edfdfaeeb9769d1dd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 17 Jun 2017 19:01:27 +0700 Subject: [PATCH 03/19] [postprocessor/metadatafromtitle] Fix missing optional meta fields (closes #13408) --- youtube_dl/postprocessor/metadatafromtitle.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/youtube_dl/postprocessor/metadatafromtitle.py b/youtube_dl/postprocessor/metadatafromtitle.py index c73f02447..f5c14d974 100644 --- a/youtube_dl/postprocessor/metadatafromtitle.py +++ b/youtube_dl/postprocessor/metadatafromtitle.py @@ -35,11 +35,14 @@ class MetadataFromTitlePP(PostProcessor): title = info['title'] match = re.match(self._titleregex, title) if match is None: - self._downloader.to_screen('[fromtitle] Could not interpret title of video as "%s"' % self._titleformat) + self._downloader.to_screen( + '[fromtitle] Could not interpret title of video as "%s"' + % self._titleformat) return [], info for attribute, value in match.groupdict().items(): - value = match.group(attribute) info[attribute] = value - self._downloader.to_screen('[fromtitle] parsed ' + attribute + ': ' + value) + self._downloader.to_screen( + '[fromtitle] parsed %s: %s' + % (attribute, value if value is not None else 'NA')) return [], info From b08e235f09b360262b97ef361a2321839a0534cc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 17 Jun 2017 23:14:24 +0700 Subject: [PATCH 04/19] [compat] Fix compat_shlex_quote on Windows (closes #5889, closes #10254) --- youtube_dl/compat.py | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/youtube_dl/compat.py b/youtube_dl/compat.py index 7ef327451..9e4e13bcf 100644 --- a/youtube_dl/compat.py +++ b/youtube_dl/compat.py @@ -2617,14 +2617,22 @@ except ImportError: # Python 2 parsed_result[name] = [value] return parsed_result -try: - from shlex import quote as compat_shlex_quote -except ImportError: # Python < 3.3 + +compat_os_name = os._name if os.name == 'java' else os.name + + +if compat_os_name == 'nt': def compat_shlex_quote(s): - if re.match(r'^[-_\w./]+$', s): - return s - else: - return "'" + s.replace("'", "'\"'\"'") + "'" + return s if re.match(r'^[-_\w./]+$', s) else '"%s"' % s.replace('"', '\\"') +else: + try: + from shlex import quote as compat_shlex_quote + except ImportError: # Python < 3.3 + def compat_shlex_quote(s): + if re.match(r'^[-_\w./]+$', s): + return s + else: + return "'" + s.replace("'", "'\"'\"'") + "'" try: @@ -2649,9 +2657,6 @@ def compat_ord(c): return ord(c) -compat_os_name = os._name if os.name == 'java' else os.name - - if sys.version_info >= (3, 0): compat_getenv = os.getenv compat_expanduser = os.path.expanduser From 8b6ac49ecc873f787f647f1e623af989f85de644 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 17 Jun 2017 23:15:57 +0700 Subject: [PATCH 05/19] [postprocessor/execafterdownload] Encode command line (closes #13407) --- youtube_dl/postprocessor/execafterdownload.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/youtube_dl/postprocessor/execafterdownload.py b/youtube_dl/postprocessor/execafterdownload.py index 90630c2d7..64dabe790 100644 --- a/youtube_dl/postprocessor/execafterdownload.py +++ b/youtube_dl/postprocessor/execafterdownload.py @@ -4,7 +4,10 @@ import subprocess from .common import PostProcessor from ..compat import compat_shlex_quote -from ..utils import PostProcessingError +from ..utils import ( + encodeArgument, + PostProcessingError, +) class ExecAfterDownloadPP(PostProcessor): @@ -20,7 +23,7 @@ class ExecAfterDownloadPP(PostProcessor): cmd = cmd.replace('{}', compat_shlex_quote(information['filepath'])) self._downloader.to_screen('[exec] Executing command: %s' % cmd) - retCode = subprocess.call(cmd, shell=True) + retCode = subprocess.call(encodeArgument(cmd), shell=True) if retCode != 0: raise PostProcessingError( 'Command returned error code %d' % retCode) From aefce8e6dca8a0ae784468a5e1a4ba01f1471380 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 17 Jun 2017 23:48:58 +0700 Subject: [PATCH 06/19] [utils] Use compat_shlex_quote in shell_quote --- youtube_dl/utils.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 1973bd483..39860e9d1 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -22,7 +22,6 @@ import locale import math import operator import os -import pipes import platform import random import re @@ -1535,7 +1534,7 @@ def shell_quote(args): if isinstance(a, bytes): # We may get a filename encoded with 'encodeFilename' a = a.decode(encoding) - quoted_args.append(pipes.quote(a)) + quoted_args.append(compat_shlex_quote(a)) return ' '.join(quoted_args) From 1433734c35dbfe16193ddcb6e59f55b58e76ba74 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 17 Jun 2017 23:50:21 +0700 Subject: [PATCH 07/19] [downloader/common] Use utils.shell_quote for debug command line --- youtube_dl/downloader/common.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/youtube_dl/downloader/common.py b/youtube_dl/downloader/common.py index 5d6621147..77242dacc 100644 --- a/youtube_dl/downloader/common.py +++ b/youtube_dl/downloader/common.py @@ -8,10 +8,11 @@ import random from ..compat import compat_os_name from ..utils import ( + decodeArgument, encodeFilename, error_to_compat_str, - decodeArgument, format_bytes, + shell_quote, timeconvert, ) @@ -381,10 +382,5 @@ class FileDownloader(object): if exe is None: exe = os.path.basename(str_args[0]) - try: - import pipes - shell_quote = lambda args: ' '.join(map(pipes.quote, str_args)) - except ImportError: - shell_quote = repr self.to_screen('[debug] %s command line: %s' % ( exe, shell_quote(str_args))) From a6f29820c6b4911a3c0a38ede6cff0bb3efdab9f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 18 Jun 2017 00:15:43 +0700 Subject: [PATCH 08/19] [ChangeLog] Actualize --- ChangeLog | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/ChangeLog b/ChangeLog index 5cd0b3393..b738d2063 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,10 +1,25 @@ version Core +* [downloader/common] Use utils.shell_quote for debug command line +* [utils] Use compat_shlex_quote in shell_quote +* [postprocessor/execafterdownload] Encode command line (#13407) +* [compat] Fix compat_shlex_quote on Windows (#5889, #10254) +* [postprocessor/metadatafromtitle] Fix missing optional meta fields processing + in --metadata-from-title (#13408) * [extractor/common] Fix json dumping with --geo-bypass ++ [extractor/common] Improve jwplayer subtitles extraction ++ [extractor/common] Improve jwplayer formats extraction (#13379) Extractors +* [polskieradio] Fix extraction (#13392) ++ [xfileshare] Add support for fastvideo.me (#13385) * [bilibili] Fix extraction of videos with double quotes in titles (#13387) +* [4tube] Fix extraction (#13381, #13382) ++ [disney] Add support for disneychannel.de (#13383) +* [npo] Improve URL regular expression (#13376) ++ [corus] Add support for showcase.ca ++ [corus] Add support for history.ca (#13359) version 2017.06.12 From eaa006d1bd6a5fd1d3c482a04963068061fd2682 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 18 Jun 2017 00:16:49 +0700 Subject: [PATCH 09/19] release 2017.06.18 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- docs/supportedsites.md | 2 +- youtube_dl/version.py | 2 +- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index b8806e7b3..002c1274a 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.06.12*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.06.12** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.06.18*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.06.18** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2017.06.12 +[debug] youtube-dl version 2017.06.18 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index b738d2063..29d21e4e4 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version +version 2017.06.18 Core * [downloader/common] Use utils.shell_quote for debug command line diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 7189f31d9..0f21be0a2 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -975,7 +975,7 @@ - **WSJArticle** - **XBef** - **XboxClips** - - **XFileShare**: XFileShare based sites: DaClips, FileHoot, GorillaVid, MovPod, PowerWatch, Rapidvideo.ws, TheVideoBee, Vidto, Streamin.To, XVIDSTAGE, Vid ABC, VidBom, vidlo, RapidVideo.TV + - **XFileShare**: XFileShare based sites: DaClips, FileHoot, GorillaVid, MovPod, PowerWatch, Rapidvideo.ws, TheVideoBee, Vidto, Streamin.To, XVIDSTAGE, Vid ABC, VidBom, vidlo, RapidVideo.TV, FastVideo.me - **XHamster** - **XHamsterEmbed** - **xiami:album**: 虾米音乐 - 专辑 diff --git a/youtube_dl/version.py b/youtube_dl/version.py index a7386c3a8..8782a6a1e 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2017.06.12' +__version__ = '2017.06.18' From 560050669b1e98bc76c797256c6e21719541b2ac Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 18 Jun 2017 20:18:51 +0700 Subject: [PATCH 10/19] [asiancrush] Add extractor (closes #13420) --- youtube_dl/extractor/asiancrush.py | 93 ++++++++++++++++++++++++++++++ youtube_dl/extractor/extractors.py | 4 ++ 2 files changed, 97 insertions(+) create mode 100644 youtube_dl/extractor/asiancrush.py diff --git a/youtube_dl/extractor/asiancrush.py b/youtube_dl/extractor/asiancrush.py new file mode 100644 index 000000000..594c88c9c --- /dev/null +++ b/youtube_dl/extractor/asiancrush.py @@ -0,0 +1,93 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from .kaltura import KalturaIE +from ..utils import ( + extract_attributes, + remove_end, + urlencode_postdata, +) + + +class AsianCrushIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?asiancrush\.com/video/(?:[^/]+/)?0+(?P\d+)v\b' + _TESTS = [{ + 'url': 'https://www.asiancrush.com/video/012869v/women-who-flirt/', + 'md5': 'c3b740e48d0ba002a42c0b72857beae6', + 'info_dict': { + 'id': '1_y4tmjm5r', + 'ext': 'mp4', + 'title': 'Women Who Flirt', + 'description': 'md5:3db14e9186197857e7063522cb89a805', + 'timestamp': 1496936429, + 'upload_date': '20170608', + 'uploader_id': 'craig@crifkin.com', + }, + }, { + 'url': 'https://www.asiancrush.com/video/she-was-pretty/011886v-pretty-episode-3/', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + + data = self._download_json( + 'https://www.asiancrush.com/wp-admin/admin-ajax.php', video_id, + data=urlencode_postdata({ + 'postid': video_id, + 'action': 'get_channel_kaltura_vars', + })) + + entry_id = data['entry_id'] + + return self.url_result( + 'kaltura:%s:%s' % (data['partner_id'], entry_id), + ie=KalturaIE.ie_key(), video_id=entry_id, + video_title=data.get('vid_label')) + + +class AsianCrushPlaylistIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?asiancrush\.com/series/0+(?P\d+)s\b' + _TEST = { + 'url': 'https://www.asiancrush.com/series/012481s/scholar-walks-night/', + 'info_dict': { + 'id': '12481', + 'title': 'Scholar Who Walks the Night', + 'description': 'md5:7addd7c5132a09fd4741152d96cce886', + }, + 'playlist_count': 20, + } + + def _real_extract(self, url): + playlist_id = self._match_id(url) + + webpage = self._download_webpage(url, playlist_id) + + entries = [] + + for mobj in re.finditer( + r']+href=(["\'])(?P%s.*?)\1[^>]*>' % AsianCrushIE._VALID_URL, + webpage): + attrs = extract_attributes(mobj.group(0)) + if attrs.get('class') == 'clearfix': + entries.append(self.url_result( + mobj.group('url'), ie=AsianCrushIE.ie_key())) + + title = remove_end( + self._html_search_regex( + r'(?s)]\bid=["\']movieTitle[^>]+>(.+?)', webpage, + 'title', default=None) or self._og_search_title( + webpage, default=None) or self._html_search_meta( + 'twitter:title', webpage, 'title', + default=None) or self._search_regex( + r'([^<]+)', webpage, 'title', fatal=False), + ' | AsianCrush') + + description = self._og_search_description( + webpage, default=None) or self._html_search_meta( + 'twitter:description', webpage, 'description', fatal=False) + + return self.playlist_result(entries, playlist_id, title, description) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 7e45232dd..edbb4bdde 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -71,6 +71,10 @@ from .arte import ( TheOperaPlatformIE, ArteTVPlaylistIE, ) +from .asiancrush import ( + AsianCrushIE, + AsianCrushPlaylistIE, +) from .atresplayer import AtresPlayerIE from .atttechchannel import ATTTechChannelIE from .atvat import ATVAtIE From cc2ffe5afe692bccc9214110bd253b87f4361856 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Mon, 19 Jun 2017 16:20:36 +0800 Subject: [PATCH 11/19] [pandora.tv] Fix upload_date extraction (closes #12846) --- ChangeLog | 6 ++++++ youtube_dl/extractor/pandoratv.py | 25 ++++++++++++++++++++++--- 2 files changed, 28 insertions(+), 3 deletions(-) diff --git a/ChangeLog b/ChangeLog index 29d21e4e4..7f077fd24 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +version + +Extractors +* [pandora.tv] Fix upload_date extraction (#12846) + + version 2017.06.18 Core diff --git a/youtube_dl/extractor/pandoratv.py b/youtube_dl/extractor/pandoratv.py index 89c95fffb..fc7bd3411 100644 --- a/youtube_dl/extractor/pandoratv.py +++ b/youtube_dl/extractor/pandoratv.py @@ -19,7 +19,7 @@ class PandoraTVIE(InfoExtractor): IE_NAME = 'pandora.tv' IE_DESC = '판도라TV' _VALID_URL = r'https?://(?:.+?\.)?channel\.pandora\.tv/channel/video\.ptv\?' - _TEST = { + _TESTS = [{ 'url': 'http://jp.channel.pandora.tv/channel/video.ptv?c1=&prgid=53294230&ch_userid=mikakim&ref=main&lot=cate_01_2', 'info_dict': { 'id': '53294230', @@ -34,7 +34,26 @@ class PandoraTVIE(InfoExtractor): 'view_count': int, 'like_count': int, } - } + }, { + 'url': 'http://channel.pandora.tv/channel/video.ptv?ch_userid=gogoucc&prgid=54721744', + 'info_dict': { + 'id': '54721744', + 'ext': 'flv', + 'title': '[HD] JAPAN COUNTDOWN 170423', + 'description': '[HD] JAPAN COUNTDOWN 170423', + 'thumbnail': r're:^https?://.*\.jpg$', + 'duration': 1704.9, + 'upload_date': '20170423', + 'uploader': 'GOGO_UCC', + 'uploader_id': 'gogoucc', + 'view_count': int, + 'like_count': int, + }, + 'params': { + # Test metadata only + 'skip_download': True, + }, + }] def _real_extract(self, url): qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query) @@ -86,7 +105,7 @@ class PandoraTVIE(InfoExtractor): 'description': info.get('body'), 'thumbnail': info.get('thumbnail') or info.get('poster'), 'duration': float_or_none(info.get('runtime'), 1000) or parse_duration(info.get('time')), - 'upload_date': info['fid'][:8] if isinstance(info.get('fid'), compat_str) else None, + 'upload_date': info['fid'].split('/')[-1][:8] if isinstance(info.get('fid'), compat_str) else None, 'uploader': info.get('nickname'), 'uploader_id': info.get('upload_userid'), 'view_count': str_to_int(info.get('hit')), From 2d2132ac6e75932c9c7e6c143f20156482043729 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 19 Jun 2017 22:53:39 +0700 Subject: [PATCH 12/19] [adobepass] Fix extraction on older python 2.6 --- youtube_dl/extractor/adobepass.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/adobepass.py b/youtube_dl/extractor/adobepass.py index d57ad85c2..3dfc632e7 100644 --- a/youtube_dl/extractor/adobepass.py +++ b/youtube_dl/extractor/adobepass.py @@ -6,7 +6,10 @@ import time import xml.etree.ElementTree as etree from .common import InfoExtractor -from ..compat import compat_urlparse +from ..compat import ( + compat_kwargs, + compat_urlparse, +) from ..utils import ( unescapeHTML, urlencode_postdata, @@ -1317,7 +1320,8 @@ class AdobePassIE(InfoExtractor): headers = kwargs.get('headers', {}) headers.update(self.geo_verification_headers()) kwargs['headers'] = headers - return super(AdobePassIE, self)._download_webpage_handle(*args, **kwargs) + return super(AdobePassIE, self)._download_webpage_handle( + *args, **compat_kwargs(kwargs)) @staticmethod def _get_mvpd_resource(provider_id, title, guid, rating): From fee00b3884922f6cb44926349df788d0fc589811 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 19 Jun 2017 22:57:37 +0700 Subject: [PATCH 13/19] [viu] Fix extraction on older python 2.6 --- youtube_dl/extractor/viu.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/viu.py b/youtube_dl/extractor/viu.py index db6a65d2e..5cf93591c 100644 --- a/youtube_dl/extractor/viu.py +++ b/youtube_dl/extractor/viu.py @@ -4,7 +4,10 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..compat import compat_str +from ..compat import ( + compat_kwargs, + compat_str, +) from ..utils import ( ExtractorError, int_or_none, @@ -36,7 +39,8 @@ class ViuBaseIE(InfoExtractor): headers.update(kwargs.get('headers', {})) kwargs['headers'] = headers response = self._download_json( - 'https://www.viu.com/api/' + path, *args, **kwargs)['response'] + 'https://www.viu.com/api/' + path, *args, + **compat_kwargs(kwargs))['response'] if response.get('status') != 'success': raise ExtractorError('%s said: %s' % ( self.IE_NAME, response['message']), expected=True) From a1de83e5f01cc220fa45caee80b9159cc555609d Mon Sep 17 00:00:00 2001 From: Orn Date: Thu, 15 Jun 2017 22:29:27 +0000 Subject: [PATCH 14/19] [ruv] Add extractor --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/ruv.py | 31 ++++++++++++++++++++++++++++++ 2 files changed, 32 insertions(+) create mode 100644 youtube_dl/extractor/ruv.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index edbb4bdde..e97691daa 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -875,6 +875,7 @@ from .rutube import ( ) from .rutv import RUTVIE from .ruutu import RuutuIE +from .ruv import RuvIE from .sandia import SandiaIE from .safari import ( SafariIE, diff --git a/youtube_dl/extractor/ruv.py b/youtube_dl/extractor/ruv.py new file mode 100644 index 000000000..518fb5027 --- /dev/null +++ b/youtube_dl/extractor/ruv.py @@ -0,0 +1,31 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor + + +class RuvIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?ruv\.is/sarpurinn/ruv/\w+/(?P[0-9]+)' + _TEST = { + 'url': 'http://ruv.is/sarpurinn/ruv/frettir/20170614', + 'md5': 'a07ea1ebaba64082d90323b1c96f264b', + 'info_dict': { + 'id': '20170614', + 'ext': 'mp4', + 'title': 'Fréttir', + } + } + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + title = self._og_search_title(webpage) + video_url = self._html_search_regex(r'video\.src\s*=\s*["\'](.+?)["\']', webpage, 'video URL') + + return { + 'id': video_id, + 'title': title, + 'url': video_url, + 'ext': 'mp4' + } From 85cbcede5b9ad2b059e799bb9047264c2a38b7fe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 19 Jun 2017 23:45:09 +0700 Subject: [PATCH 15/19] [ruv] Improve, extract all formats and metadata (closes #13396) --- youtube_dl/extractor/ruv.py | 96 ++++++++++++++++++++++++++++++++----- 1 file changed, 83 insertions(+), 13 deletions(-) diff --git a/youtube_dl/extractor/ruv.py b/youtube_dl/extractor/ruv.py index 518fb5027..8f3cc4095 100644 --- a/youtube_dl/extractor/ruv.py +++ b/youtube_dl/extractor/ruv.py @@ -2,30 +2,100 @@ from __future__ import unicode_literals from .common import InfoExtractor +from ..utils import ( + determine_ext, + unified_timestamp, +) class RuvIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?ruv\.is/sarpurinn/ruv/\w+/(?P[0-9]+)' - _TEST = { - 'url': 'http://ruv.is/sarpurinn/ruv/frettir/20170614', - 'md5': 'a07ea1ebaba64082d90323b1c96f264b', + _VALID_URL = r'https?://(?:www\.)?ruv\.is/(?:sarpurinn/[^/]+|node)/(?P[^/]+(?:/\d+)?)' + _TESTS = [{ + # m3u8 + 'url': 'http://ruv.is/sarpurinn/ruv-aukaras/fh-valur/20170516', + 'md5': '66347652f4e13e71936817102acc1724', 'info_dict': { - 'id': '20170614', + 'id': '1144499', + 'display_id': 'fh-valur/20170516', 'ext': 'mp4', - 'title': 'Fréttir', - } - } + 'title': 'FH - Valur', + 'description': 'Bein útsending frá 3. leik FH og Vals í úrslitum Olísdeildar karla í handbolta.', + 'timestamp': 1494963600, + 'upload_date': '20170516', + }, + }, { + # mp3 + 'url': 'http://ruv.is/sarpurinn/ras-2/morgunutvarpid/20170619', + 'md5': '395ea250c8a13e5fdb39d4670ef85378', + 'info_dict': { + 'id': '1153630', + 'display_id': 'morgunutvarpid/20170619', + 'ext': 'mp3', + 'title': 'Morgunútvarpið', + 'description': 'md5:a4cf1202c0a1645ca096b06525915418', + 'timestamp': 1497855000, + 'upload_date': '20170619', + }, + }, { + 'url': 'http://ruv.is/sarpurinn/ruv/frettir/20170614', + 'only_matching': True, + }, { + 'url': 'http://www.ruv.is/node/1151854', + 'only_matching': True, + }, { + 'url': 'http://ruv.is/sarpurinn/klippa/secret-soltice-hefst-a-morgun', + 'only_matching': True, + }, { + 'url': 'http://ruv.is/sarpurinn/ras-1/morgunvaktin/20170619', + 'only_matching': True, + }] def _real_extract(self, url): - video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) + display_id = self._match_id(url) + + webpage = self._download_webpage(url, display_id) title = self._og_search_title(webpage) - video_url = self._html_search_regex(r'video\.src\s*=\s*["\'](.+?)["\']', webpage, 'video URL') + + FIELD_RE = r'video\.%s\s*=\s*(["\'])(?P(?:(?!\1).)+)\1' + + media_url = self._html_search_regex( + FIELD_RE % 'src', webpage, 'video URL', group='url') + + video_id = self._search_regex( + r']+\bhref=["\']https?://www\.ruv\.is/node/(\d+)', + webpage, 'video id', default=display_id) + + ext = determine_ext(media_url) + + if ext == 'm3u8': + formats = self._extract_m3u8_formats( + media_url, video_id, 'mp4', entry_protocol='m3u8_native', + m3u8_id='hls') + elif ext == 'mp3': + formats = [{ + 'format_id': 'mp3', + 'url': media_url, + 'vcodec': 'none', + }] + else: + formats = [{ + 'url': media_url, + }] + + description = self._og_search_description(webpage, default=None) + thumbnail = self._og_search_thumbnail( + webpage, default=None) or self._search_regex( + FIELD_RE % 'poster', webpage, 'thumbnail', fatal=False) + timestamp = unified_timestamp(self._html_search_meta( + 'article:published_time', webpage, 'timestamp', fatal=False)) return { 'id': video_id, + 'display_id': display_id, 'title': title, - 'url': video_url, - 'ext': 'mp4' + 'description': description, + 'thumbnail': thumbnail, + 'timestamp': timestamp, + 'formats': formats, } From 1641ca402d03b36d28bab94eb898997cadf69993 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 20 Jun 2017 01:27:59 +0700 Subject: [PATCH 16/19] [vimeo] Add fallback mp4 extension for original format --- youtube_dl/extractor/vimeo.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py index a483c8409..c3f71b45e 100644 --- a/youtube_dl/extractor/vimeo.py +++ b/youtube_dl/extractor/vimeo.py @@ -615,7 +615,10 @@ class VimeoIE(VimeoBaseInfoExtractor): if download_url and not source_file.get('is_cold') and not source_file.get('is_defrosting'): source_name = source_file.get('public_name', 'Original') if self._is_valid_url(download_url, video_id, '%s video' % source_name): - ext = source_file.get('extension', determine_ext(download_url)).lower() + ext = (try_get( + source_file, lambda x: x['extension'], + compat_str) or determine_ext( + download_url, None) or 'mp4').lower() formats.append({ 'url': download_url, 'ext': ext, From 6ce79d7ac067ef5c286661923b8df6dc7dc7b0da Mon Sep 17 00:00:00 2001 From: Giuseppe Fabiano Date: Mon, 19 Jun 2017 23:07:00 +0200 Subject: [PATCH 17/19] [abcotvs] Fix test md5 --- youtube_dl/extractor/abcotvs.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/abcotvs.py b/youtube_dl/extractor/abcotvs.py index 76e98132b..03b92a39c 100644 --- a/youtube_dl/extractor/abcotvs.py +++ b/youtube_dl/extractor/abcotvs.py @@ -22,7 +22,7 @@ class ABCOTVSIE(InfoExtractor): 'display_id': 'east-bay-museum-celebrates-vintage-synthesizers', 'ext': 'mp4', 'title': 'East Bay museum celebrates vintage synthesizers', - 'description': 'md5:a4f10fb2f2a02565c1749d4adbab4b10', + 'description': 'md5:24ed2bd527096ec2a5c67b9d5a9005f3', 'thumbnail': r're:^https?://.*\.jpg$', 'timestamp': 1421123075, 'upload_date': '20150113', From 048b55804da21c2b2978cd2f710b2c7b438e24ba Mon Sep 17 00:00:00 2001 From: Giuseppe Fabiano Date: Mon, 19 Jun 2017 23:30:45 +0200 Subject: [PATCH 18/19] [watchindianporn] Fix extraction (closes #13411) --- youtube_dl/extractor/watchindianporn.py | 50 +++++++------------------ 1 file changed, 14 insertions(+), 36 deletions(-) diff --git a/youtube_dl/extractor/watchindianporn.py b/youtube_dl/extractor/watchindianporn.py index ed099beea..fadc539ee 100644 --- a/youtube_dl/extractor/watchindianporn.py +++ b/youtube_dl/extractor/watchindianporn.py @@ -4,11 +4,7 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..utils import ( - unified_strdate, - parse_duration, - int_or_none, -) +from ..utils import parse_duration class WatchIndianPornIE(InfoExtractor): @@ -23,11 +19,8 @@ class WatchIndianPornIE(InfoExtractor): 'ext': 'mp4', 'title': 'Hot milf from kerala shows off her gorgeous large breasts on camera', 'thumbnail': r're:^https?://.*\.jpg$', - 'uploader': 'LoveJay', - 'upload_date': '20160428', 'duration': 226, 'view_count': int, - 'comment_count': int, 'categories': list, 'age_limit': 18, } @@ -40,51 +33,36 @@ class WatchIndianPornIE(InfoExtractor): webpage = self._download_webpage(url, display_id) - video_url = self._html_search_regex( - r"url: escape\('([^']+)'\)", webpage, 'url') + info_dict = self._parse_html5_media_entries(url, webpage, video_id)[0] - title = self._html_search_regex( - r'

(.*?)', - webpage, 'title') - thumbnail = self._html_search_regex( - r'\s*(.*?)', - webpage, 'uploader') - upload_date = unified_strdate(self._html_search_regex( - r'Added: (.+?)', webpage, 'upload date', fatal=False)) + title = self._html_search_regex(( + r'(.+?)\s*-\s*Indian\s+Porn', + r'

(.+?)

' + ), webpage, 'title') duration = parse_duration(self._search_regex( - r'Time:\s*\s*\s*(.+?)\s*', + r'Time:\s*\s*(.+?)\s*', webpage, 'duration', fatal=False)) - view_count = int_or_none(self._search_regex( - r'Views:\s*\s*\s*(\d+)\s*', + view_count = int(self._search_regex( + r'(?s)Time:\s*.*?.*?\s*(\d+)\s*', webpage, 'view count', fatal=False)) - comment_count = int_or_none(self._search_regex( - r'Comments:\s*\s*\s*(\d+)\s*', - webpage, 'comment count', fatal=False)) categories = re.findall( - r'([^<]+)', + r']+class=[\'"]categories[\'"][^>]*>\s*([^<]+)\s*', webpage) - return { + info_dict.update({ 'id': video_id, 'display_id': display_id, - 'url': video_url, 'http_headers': { 'Referer': url, }, 'title': title, - 'thumbnail': thumbnail, - 'uploader': uploader, - 'upload_date': upload_date, 'duration': duration, 'view_count': view_count, - 'comment_count': comment_count, 'categories': categories, 'age_limit': 18, - } + }) + + return info_dict From 9be9ec5980c1c53642eb112ccced3246ac8a391e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 20 Jun 2017 22:58:33 +0700 Subject: [PATCH 19/19] [googledrive] Fix formats' sorting (closes #13443) --- youtube_dl/extractor/googledrive.py | 37 +++++++++++++++++++---------- 1 file changed, 25 insertions(+), 12 deletions(-) diff --git a/youtube_dl/extractor/googledrive.py b/youtube_dl/extractor/googledrive.py index fec36cbbb..9705cfadd 100644 --- a/youtube_dl/extractor/googledrive.py +++ b/youtube_dl/extractor/googledrive.py @@ -69,19 +69,32 @@ class GoogleDriveIE(InfoExtractor): r'"fmt_stream_map"\s*,\s*"([^"]+)', webpage, 'fmt stream map').split(',') fmt_list = self._search_regex(r'"fmt_list"\s*,\s*"([^"]+)', webpage, 'fmt_list').split(',') + resolutions = {} + for fmt in fmt_list: + mobj = re.search( + r'^(?P\d+)/(?P\d+)[xX](?P\d+)', fmt) + if mobj: + resolutions[mobj.group('format_id')] = ( + int(mobj.group('width')), int(mobj.group('height'))) + formats = [] - for fmt, fmt_stream in zip(fmt_list, fmt_stream_map): - fmt_id, fmt_url = fmt_stream.split('|') - resolution = fmt.split('/')[1] - width, height = resolution.split('x') - formats.append({ - 'url': lowercase_escape(fmt_url), - 'format_id': fmt_id, - 'resolution': resolution, - 'width': int_or_none(width), - 'height': int_or_none(height), - 'ext': self._FORMATS_EXT[fmt_id], - }) + for fmt_stream in fmt_stream_map: + fmt_stream_split = fmt_stream.split('|') + if len(fmt_stream_split) < 2: + continue + format_id, format_url = fmt_stream_split[:2] + f = { + 'url': lowercase_escape(format_url), + 'format_id': format_id, + 'ext': self._FORMATS_EXT[format_id], + } + resolution = resolutions.get(format_id) + if resolution: + f.update({ + 'width': resolution[0], + 'height': resolution[0], + }) + formats.append(f) self._sort_formats(formats) return {