From 5c9ced9504bd2ceb8e55a929124aad2091b23403 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 2 May 2016 18:19:00 +0600 Subject: [PATCH 01/13] [vevo] Improve genre extraction --- youtube_dl/extractor/vevo.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/vevo.py b/youtube_dl/extractor/vevo.py index 63eab4148..3cb0343e6 100644 --- a/youtube_dl/extractor/vevo.py +++ b/youtube_dl/extractor/vevo.py @@ -5,6 +5,7 @@ import re from .common import InfoExtractor from ..compat import ( compat_etree_fromstring, + compat_str, compat_urlparse, ) from ..utils import ( @@ -116,6 +117,10 @@ class VevoIE(VevoBaseIE): 'genre': 'Pop', }, 'expected_warnings': ['Failed to download video versions info'], + }, { + # no genres available + 'url': 'http://www.vevo.com/watch/INS171400764', + 'only_matching': True, }] _SMIL_BASE_URL = 'http://smil.lvl3.vevo.com' _SOURCE_TYPES = { @@ -339,7 +344,11 @@ class VevoIE(VevoBaseIE): if featured_artist: artist = '%s ft. %s' % (artist, featured_artist) title = '%s - %s' % (artist, track) if artist else track - genre = video_info.get('genres', [None])[0] + + genres = video_info.get('genres') + genre = ( + genres[0] if genres and isinstance(genres, list) and + isinstance(genres[0], compat_str) else None) is_explicit = video_info.get('isExplicit') if is_explicit is True: From 7960b0563b957d418ddd36555275d98ba4668c03 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 2 May 2016 18:35:50 +0600 Subject: [PATCH 02/13] [YoutubeDL] Properly process unable-to-download-error on python2 --- youtube_dl/YoutubeDL.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 055433362..4e57c9687 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -1639,7 +1639,7 @@ class YoutubeDL(object): # Just a single file success = dl(filename, info_dict) except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: - self.report_error('unable to download video data: %s' % str(err)) + self.report_error('unable to download video data: %s' % error_to_compat_str(err)) return except (OSError, IOError) as err: raise UnavailableVideoError(err) From df5f4e8888bc02f6064b9b92fbf4cfc4eedd4c1f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 2 May 2016 18:47:35 +0600 Subject: [PATCH 03/13] [vevo] Remove superfluous code --- youtube_dl/extractor/vevo.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/youtube_dl/extractor/vevo.py b/youtube_dl/extractor/vevo.py index 3cb0343e6..a6177f2cb 100644 --- a/youtube_dl/extractor/vevo.py +++ b/youtube_dl/extractor/vevo.py @@ -205,12 +205,10 @@ class VevoIE(VevoBaseIE): response = self._download_json( json_url, video_id, 'Downloading video info', 'Unable to download info') video_info = response.get('video') or {} - video_versions = video_info.get('videoVersions') artist = None featured_artist = None uploader = None view_count = None - timestamp = None formats = [] if not video_info: From f0e14fdd43bf8e86c5385220430eef842a10ccab Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 2 May 2016 20:05:06 +0600 Subject: [PATCH 04/13] [YoutubeDL] Skip non-relevant field types when building output template --- youtube_dl/YoutubeDL.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 4e57c9687..2187dcc8f 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -580,7 +580,7 @@ class YoutubeDL(object): is_id=(k == 'id')) template_dict = dict((k, sanitize(k, v)) for k, v in template_dict.items() - if v is not None) + if v is not None and not isinstance(v, (list, tuple, dict))) template_dict = collections.defaultdict(lambda: 'NA', template_dict) outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL) From 8a92e51c60fd122e676e4619e7e132b692292801 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 2 May 2016 21:31:35 +0600 Subject: [PATCH 05/13] [extractor/common] Relax wording for creator metafield --- youtube_dl/extractor/common.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 61a5d124c..0843d89af 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -163,7 +163,7 @@ class InfoExtractor(object): description: Full video description. uploader: Full name of the video uploader. license: License name the video is licensed under. - creator: The main artist who created the video. + creator: The creator of the video. release_date: The date (YYYYMMDD) when the video was released. timestamp: UNIX timestamp of the moment the video became available. upload_date: Video upload date (YYYYMMDD). From 6c52a86f54b230a3f08dd10a89f55b8af4d98ee3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 2 May 2016 21:32:57 +0600 Subject: [PATCH 06/13] [README.md] Update creator description --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index ecf737047..50acb26a0 100644 --- a/README.md +++ b/README.md @@ -465,7 +465,7 @@ The basic usage is not to set any template arguments when downloading a single f - `display_id`: An alternative identifier for the video - `uploader`: Full name of the video uploader - `license`: License name the video is licensed under - - `creator`: The main artist who created the video + - `creator`: The creator of the video - `release_date`: The date (YYYYMMDD) when the video was released - `timestamp`: UNIX timestamp of the moment the video became available - `upload_date`: Video upload date (YYYYMMDD) From a0a309b9734ea3d5f66d4a52e42f5cc24df7f808 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Tue, 3 May 2016 16:06:28 +0800 Subject: [PATCH 07/13] [kuwo:category] Fix description and update test --- youtube_dl/extractor/kuwo.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/kuwo.py b/youtube_dl/extractor/kuwo.py index 3740869c7..616ed19e1 100644 --- a/youtube_dl/extractor/kuwo.py +++ b/youtube_dl/extractor/kuwo.py @@ -266,7 +266,6 @@ class KuwoCategoryIE(InfoExtractor): 'info_dict': { 'id': '86375', 'title': '八十年代精选', - 'description': '这些都是属于八十年代的回忆!', }, 'playlist_mincount': 24, } @@ -283,6 +282,8 @@ class KuwoCategoryIE(InfoExtractor): category_desc = remove_start( get_element_by_id('intro', webpage).strip(), '%s简介:' % category_name) + if category_desc == '暂无': + category_desc = None jsonm = self._parse_json(self._html_search_regex( r'var\s+jsonm\s*=\s*([^;]+);', webpage, 'category songs'), category_id) From 7759be38da1b5ad99a8ef04d9a1df21b989e3b8c Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Tue, 3 May 2016 16:19:20 +0800 Subject: [PATCH 08/13] [xiami] Detect georestriction and skip tests --- youtube_dl/extractor/xiami.py | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/xiami.py b/youtube_dl/extractor/xiami.py index e4ed306b4..a6dfc4af9 100644 --- a/youtube_dl/extractor/xiami.py +++ b/youtube_dl/extractor/xiami.py @@ -9,6 +9,11 @@ from ..utils import int_or_none class XiamiBaseIE(InfoExtractor): _API_BASE_URL = 'http://www.xiami.com/song/playlist/cat/json/id' + def _download_webpage(self, *args, **kwargs): + webpage = super(XiamiBaseIE, self)._download_webpage(*args, **kwargs) + if '>Xiami is currently not available in your country.<' in webpage: + self.raise_geo_restricted('Xiami is currently not available in your country') + def _extract_track(self, track, track_id=None): title = track['title'] track_url = self._decrypt(track['location']) @@ -81,7 +86,8 @@ class XiamiSongIE(XiamiBaseIE): 'ext': 'lrc', }], }, - } + }, + 'skip': 'Georestricted', }, { 'url': 'http://www.xiami.com/song/1775256504', 'md5': '932a3abd45c6aa2b1fdbe028fcb4c4fc', @@ -100,7 +106,8 @@ class XiamiSongIE(XiamiBaseIE): 'ext': 'lrc', }], }, - } + }, + 'skip': 'Georestricted', }] def _real_extract(self, url): @@ -124,6 +131,7 @@ class XiamiAlbumIE(XiamiPlaylistBaseIE): 'id': '2100300444', }, 'playlist_count': 10, + 'skip': 'Georestricted', }, { 'url': 'http://www.xiami.com/album/512288?spm=a1z1s.6843761.1110925389.6.hhE9p9', 'only_matching': True, @@ -141,6 +149,7 @@ class XiamiArtistIE(XiamiPlaylistBaseIE): 'id': '2132', }, 'playlist_count': 20, + 'skip': 'Georestricted', } @@ -155,4 +164,5 @@ class XiamiCollectionIE(XiamiPlaylistBaseIE): 'id': '156527391', }, 'playlist_mincount': 29, + 'skip': 'Georestricted', } From 80bc4106aff21c46cf65c7310f1ed988518b3df5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 3 May 2016 15:09:23 +0600 Subject: [PATCH 09/13] [xfileshare] Add support for thevideobee.to (Closes #9374) --- youtube_dl/extractor/xfileshare.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/xfileshare.py b/youtube_dl/extractor/xfileshare.py index 2d1504eaa..472ed1b3f 100644 --- a/youtube_dl/extractor/xfileshare.py +++ b/youtube_dl/extractor/xfileshare.py @@ -16,7 +16,7 @@ class XFileShareIE(InfoExtractor): IE_DESC = 'XFileShare based sites: GorillaVid.in, daclips.in, movpod.in, fastvideo.in, realvid.net, filehoot.com and vidto.me' _VALID_URL = r'''(?x) https?://(?P(?:www\.)? - (?:daclips\.in|gorillavid\.in|movpod\.in|fastvideo\.in|realvid\.net|filehoot\.com|vidto\.me|powerwatch\.pw))/ + (?:daclips\.in|gorillavid\.in|movpod\.in|fastvideo\.in|realvid\.net|filehoot\.com|vidto\.me|powerwatch\.pw|thevideobee\.to))/ (?:embed-)?(?P[0-9a-zA-Z]+)(?:-[0-9]+x[0-9]+\.html)? ''' From 41745523918dee174a3e642b629bcbc585931c4a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 3 May 2016 15:35:32 +0600 Subject: [PATCH 10/13] [xfileshare] Refactor _VALID_URL and remove ded sites --- youtube_dl/extractor/xfileshare.py | 39 +++++++++++------------------- 1 file changed, 14 insertions(+), 25 deletions(-) diff --git a/youtube_dl/extractor/xfileshare.py b/youtube_dl/extractor/xfileshare.py index 472ed1b3f..4ab42d24e 100644 --- a/youtube_dl/extractor/xfileshare.py +++ b/youtube_dl/extractor/xfileshare.py @@ -13,12 +13,20 @@ from ..utils import ( class XFileShareIE(InfoExtractor): - IE_DESC = 'XFileShare based sites: GorillaVid.in, daclips.in, movpod.in, fastvideo.in, realvid.net, filehoot.com and vidto.me' - _VALID_URL = r'''(?x) - https?://(?P(?:www\.)? - (?:daclips\.in|gorillavid\.in|movpod\.in|fastvideo\.in|realvid\.net|filehoot\.com|vidto\.me|powerwatch\.pw|thevideobee\.to))/ - (?:embed-)?(?P[0-9a-zA-Z]+)(?:-[0-9]+x[0-9]+\.html)? - ''' + _SITES = ( + ('daclips.in', 'DaClips'), + ('filehoot.com', 'FileHoot'), + ('gorillavid.in', 'GorillaVid'), + ('movpod.in', 'MovPod'), + ('powerwatch.pw', 'PowerWatch'), + ('rapidvideo.ws', 'Rapidvideo.ws'), + ('thevideobee.to', 'TheVideoBee'), + ('vidto.me', 'Vidto'), + ) + + IE_DESC = 'XFileShare based sites: %s' % ', '.join(list(zip(*_SITES))[1]) + _VALID_URL = (r'https?://(?P(?:www\.)?(?:%s))/(?:embed-)?(?P[0-9a-zA-Z]+)' + % '|'.join(re.escape(site) for site in list(zip(*_SITES))[0])) _FILE_NOT_FOUND_REGEX = r'>(?:404 - )?File Not Found<' @@ -43,25 +51,6 @@ class XFileShareIE(InfoExtractor): 'title': 'Micro Pig piglets ready on 16th July 2009-bG0PdrCdxUc', 'thumbnail': 're:http://.*\.jpg', } - }, { - # video with countdown timeout - 'url': 'http://fastvideo.in/1qmdn1lmsmbw', - 'md5': '8b87ec3f6564a3108a0e8e66594842ba', - 'info_dict': { - 'id': '1qmdn1lmsmbw', - 'ext': 'mp4', - 'title': 'Man of Steel - Trailer', - 'thumbnail': 're:http://.*\.jpg', - }, - }, { - 'url': 'http://realvid.net/ctn2y6p2eviw', - 'md5': 'b2166d2cf192efd6b6d764c18fd3710e', - 'info_dict': { - 'id': 'ctn2y6p2eviw', - 'ext': 'flv', - 'title': 'rdx 1955', - 'thumbnail': 're:http://.*\.jpg', - }, }, { 'url': 'http://movpod.in/0wguyyxi1yca', 'only_matching': True, From 57d8e32a3ec7fe70522edad6fd0c2847b4e00944 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 3 May 2016 16:58:11 +0600 Subject: [PATCH 11/13] [xfileshare] Add support for streamin.to --- youtube_dl/extractor/xfileshare.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/xfileshare.py b/youtube_dl/extractor/xfileshare.py index 4ab42d24e..769003735 100644 --- a/youtube_dl/extractor/xfileshare.py +++ b/youtube_dl/extractor/xfileshare.py @@ -22,6 +22,7 @@ class XFileShareIE(InfoExtractor): ('rapidvideo.ws', 'Rapidvideo.ws'), ('thevideobee.to', 'TheVideoBee'), ('vidto.me', 'Vidto'), + ('streamin.to', 'Streamin.To'), ) IE_DESC = 'XFileShare based sites: %s' % ', '.join(list(zip(*_SITES))[1]) From 964f49336fcf94b3a5399a026db3914b27a2a445 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 3 May 2016 21:24:51 +0600 Subject: [PATCH 12/13] [aol] Improve _VALID_URL (Closes #9381) --- youtube_dl/extractor/aol.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/aol.py b/youtube_dl/extractor/aol.py index 24df8fe93..42c21bf41 100644 --- a/youtube_dl/extractor/aol.py +++ b/youtube_dl/extractor/aol.py @@ -12,7 +12,7 @@ from ..utils import ( class AolIE(InfoExtractor): IE_NAME = 'on.aol.com' - _VALID_URL = r'(?:aol-video:|https?://on\.aol\.com/.*-)(?P[^/?-]+)' + _VALID_URL = r'(?:aol-video:|https?://on\.aol\.com/(?:[^/]+/)*(?:[^/?#&]+-)?)(?P[^/?#&]+)' _TESTS = [{ # video with 5min ID @@ -53,6 +53,12 @@ class AolIE(InfoExtractor): }, { 'url': 'http://on.aol.com/shows/park-bench-shw518173474-559a1b9be4b0c3bfad3357a7?context=SH:SHW518173474:PL4327:1460619712763', 'only_matching': True, + }, { + 'url': 'http://on.aol.com/video/519442220', + 'only_matching': True, + }, { + 'url': 'aol-video:5707d6b8e4b090497b04f706', + 'only_matching': True, }] def _real_extract(self, url): From bc7e77a04be6094e64263f9c622cff3cd1fc13cb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 3 May 2016 23:18:36 +0600 Subject: [PATCH 13/13] [vevo] Use raise_geo_restricted --- youtube_dl/extractor/vevo.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/vevo.py b/youtube_dl/extractor/vevo.py index a6177f2cb..c0ef08c02 100644 --- a/youtube_dl/extractor/vevo.py +++ b/youtube_dl/extractor/vevo.py @@ -189,8 +189,8 @@ class VevoIE(VevoBaseIE): errnote='Unable to retrieve oauth token') if 'THIS PAGE IS CURRENTLY UNAVAILABLE IN YOUR REGION' in webpage: - raise ExtractorError( - '%s said: This page is currently unavailable in your region.' % self.IE_NAME, expected=True) + self.raise_geo_restricted( + '%s said: This page is currently unavailable in your region' % self.IE_NAME) auth_info = self._parse_json(webpage, video_id) self._api_url_template = self.http_scheme() + '//apiv2.vevo.com/%s?token=' + auth_info['access_token']