From 6d467183786e8b9bdb9d6afdcab96da74a74787e Mon Sep 17 00:00:00 2001 From: oleksis <“oleksis.fraga@gmail.com”> Date: Wed, 12 Aug 2020 19:52:13 -0400 Subject: [PATCH] Add PictaChannelPlaylistIE, PictaUserPlaylistIE --- youtube_dl/extractor/extractors.py | 2 +- youtube_dl/extractor/picta.py | 177 ++++++++++++++++++++++++----- 2 files changed, 149 insertions(+), 30 deletions(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 8b03aef4b..0948f1c21 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -840,7 +840,7 @@ from .picarto import ( PicartoIE, PicartoVodIE, ) -from .picta import PictaIE, PictaPlaylistIE +from .picta import PictaIE, PictaChannelPlaylistIE, PictaUserPlaylistIE from .piksel import PikselIE from .pinkbike import PinkbikeIE from .pladform import PladformIE diff --git a/youtube_dl/extractor/picta.py b/youtube_dl/extractor/picta.py index 7dc4657f5..009197d28 100644 --- a/youtube_dl/extractor/picta.py +++ b/youtube_dl/extractor/picta.py @@ -1,6 +1,10 @@ # coding: utf-8 from __future__ import unicode_literals -from ..compat import compat_str + +from base64 import b64encode +import re + +from ..compat import compat_str, compat_HTTPError from ..utils import int_or_none, unified_timestamp, try_get, ExtractorError from .common import InfoExtractor @@ -24,6 +28,9 @@ class PictaBaseIE(InfoExtractor): description = try_get( video, lambda x: x["results"][0]["descripcion"], compat_str ) + slug_url = try_get( + video, lambda x: x["results"][0]["slug_url"], compat_str + ) uploader = try_get( video, lambda x: x["results"][0]["usuario"]["username"], compat_str ) @@ -43,6 +50,7 @@ class PictaBaseIE(InfoExtractor): return { "id": try_get(video, lambda x: x["results"][0]["id"], compat_str) or video_id, "title": title, + "slug_url": slug_url, "description": description, "thumbnail": thumbnail, "uploader": uploader, @@ -57,9 +65,7 @@ class PictaBaseIE(InfoExtractor): class PictaIE(PictaBaseIE): IE_NAME = "picta" IE_DESC = "Picta videos" - _VALID_URL = ( - r"https?://(?:www\.)?picta\.cu/(?:medias|embed)/(?:\?v=)?(?P[\da-z-]+)" - ) + _VALID_URL = r"https?://(?:www\.)?picta\.cu/(?:medias|embed)/(?:\?v=)?(?P[\da-z-]+)(?:\?playlist=(?P[\da-z-]+))?" _TESTS = [ { @@ -67,7 +73,8 @@ class PictaIE(PictaBaseIE): "file": "Orishas - Everyday-orishas-everyday-2019-01-16-16-36-42-443003.webm", "md5": "7ffdeb0043500c4bb660c04e74e90f7a", "info_dict": { - "id": "orishas-everyday-2019-01-16-16-36-42-443003", + "id": "818", + "slug_url": "orishas-everyday-2019-01-16-16-36-42-443003", "ext": "webm", "title": "Orishas - Everyday", "thumbnail": r"re:^https?://.*imagen/img.*\.png$", @@ -85,7 +92,8 @@ class PictaIE(PictaBaseIE): "robótica-palmiche-galeno-tercer-lugar-torneo-virtual-robotica-2020-05-21-16-15-31-431895.mp4"), "md5": "6031b7a3add2eade9c5bef7ecf5d4b02", "info_dict": { - "id": "palmiche-galeno-tercer-lugar-torneo-virtual-robotica-2020-05-21-16-15-31-431895", + "id": "3500", + "slug_url": "palmiche-galeno-tercer-lugar-torneo-virtual-robotica-2020-05-21-16-15-31-431895", "ext": "mp4", "title": "Palmiche Galeno tercer lugar en torneo virtual de robótica", "thumbnail": r"re:^https?://.*imagen/img.*\.jpeg$", @@ -117,6 +125,14 @@ class PictaIE(PictaBaseIE): def _real_initialize(self): self.playlist_id = None + @classmethod + def _match_playlist_id(cls, url): + if '_VALID_URL_RE' not in cls.__dict__: + cls._VALID_URL_RE = re.compile(cls._VALID_URL) + m = cls._VALID_URL_RE.match(url) + assert m + return compat_str(m.group('playlist_id')) + def _real_extract(self, url): playlist_id = None video_id = self._match_id(url) @@ -130,8 +146,21 @@ class PictaIE(PictaBaseIE): if playlist_id and not self._downloader.params.get('noplaylist'): self.to_screen('Downloading playlist %s - add --no-playlist to just download video' % playlist_id) return self.url_result( - ROOT_BASE_URL + "playlist/" + str(playlist_id), - PictaPlaylistIE.ie_key(), + ROOT_BASE_URL + "medias/" + video_id + "?" + "playlist=" + compat_str(playlist_id), + PictaChannelPlaylistIE.ie_key(), + playlist_id + ) + elif ( + self.playlist_id is None + and self._match_playlist_id(url) + and not self._downloader.params.get('noplaylist') + ): + playlist_id = compat_str(self._match_playlist_id(url)) + self.playlist_id = playlist_id + self.to_screen('Downloading playlist %s - add --no-playlist to just download video' % playlist_id) + return self.url_result( + ROOT_BASE_URL + "medias/" + video_id + "?" + "playlist=" + playlist_id, + PictaUserPlaylistIE.ie_key(), playlist_id ) elif self._downloader.params.get('noplaylist'): @@ -157,7 +186,7 @@ class PictaIE(PictaBaseIE): class PictaEmbedIE(InfoExtractor): IE_NAME = "picta:embed" IE_DESC = "Picta embedded videos" - _VALID_URL = r"https?://www\.picta\.cu/embed/(?:\?v=)?(?P[0-9]+)" + _VALID_URL = r"https?://www\.picta\.cu/embed/(?:\?v=)?(?P[\d]+)" _TESTS = [ { @@ -165,7 +194,8 @@ class PictaEmbedIE(InfoExtractor): "file": "Orishas - Everyday-orishas-everyday-2019-01-16-16-36-42-443003.webm", "md5": "7ffdeb0043500c4bb660c04e74e90f7a", "info_dict": { - "id": "orishas-everyday-2019-01-16-16-36-42-443003", + "id": "818", + "slug_url": "orishas-everyday-2019-01-16-16-36-42-443003", "ext": "webm", "title": "Orishas - Everyday", "thumbnail": r"re:^https?://.*imagen/img.*\.png$", @@ -183,7 +213,8 @@ class PictaEmbedIE(InfoExtractor): "robótica-palmiche-galeno-tercer-lugar-torneo-virtual-robotica-2020-05-21-16-15-31-431895.mp4"), "md5": "6031b7a3add2eade9c5bef7ecf5d4b02", "info_dict": { - "id": "palmiche-galeno-tercer-lugar-torneo-virtual-robotica-2020-05-21-16-15-31-431895", + "id": "3500", + "slug_url": "palmiche-galeno-tercer-lugar-torneo-virtual-robotica-2020-05-21-16-15-31-431895", "ext": "mp4", "title": "Palmiche Galeno tercer lugar en torneo virtual de robótica", "thumbnail": r"re:^https?://.*imagen/img.*\.jpeg$", @@ -214,22 +245,37 @@ class PictaEmbedIE(InfoExtractor): class PictaPlaylistIE(InfoExtractor): API_PLAYLIST_ENDPOINT = API_BASE_URL + "lista_reproduccion_canal/" IE_NAME = "picta:playlist" - IE_DESC = "Picta playlist videos" - _VALID_URL = r"https?://www\.picta\.cu/playlist/(?P[0-9]+)" + IE_DESC = "Picta playlist" + _VALID_URL = r"https?://(?:www\.)?picta\.cu/(?:medias|embed)/(?:\?v=)?(?P[\da-z-]+)(?:\?playlist=(?P[\da-z-]+))?" - _TESTS = [ - { - "url": "https://www.picta.cu/playlist/4441", - "info_dict": { - "id": 4441, - "title": "D\u00eda 2: Telecomunicaciones, Redes y Ciberseguridad", - "thumbnail": r"re:^https?://.*imagen/img.*\.jpeg$", - }, - }, - ] + _NETRC_MACHINE = "picta" - @staticmethod - def _extract_playlist(playlist, playlist_id=None, require_title=True): + @classmethod + def _match_playlist_id(cls, url): + if '_VALID_URL_RE' not in cls.__dict__: + cls._VALID_URL_RE = re.compile(cls._VALID_URL) + m = cls._VALID_URL_RE.match(url) + assert m + return compat_str(m.group('playlist_id')) + + def _set_auth_basic(self): + header = {} + username, password = self._get_login_info() + if username is None: + return header + + if isinstance(username, str): + username = username.encode('latin1') + + if isinstance(password, str): + password = password.encode('latin1') + + authstr = "Basic " + compat_str(b64encode(b":".join((username, password))).decode("utf-8")) + + header["Authorization"] = authstr + return header + + def _extract_playlist(self, playlist, playlist_id=None, require_title=True): if len(playlist["results"]) == 0: raise ExtractorError("Cannot find playlist!") @@ -238,7 +284,7 @@ class PictaPlaylistIE(InfoExtractor): if require_title else playlist.get("results")[0].get("nombre") ) - thumbnail = try_get(playlist, lambda x: x["results"][0]["url_imagen"]) + thumbnail = try_get(playlist, lambda x: x["results"][0].get("url_imagen")) entries = try_get(playlist, lambda x: x["results"][0]["publicaciones"]) return { @@ -250,16 +296,89 @@ class PictaPlaylistIE(InfoExtractor): def _entries(self, playlist_id): json_url = self.API_PLAYLIST_ENDPOINT + "?format=json&id=%s" % playlist_id - playlist = self._download_json(json_url, playlist_id, "Downloading playlist JSON") + headers = self._set_auth_basic() + try: + playlist = self._download_json(json_url, playlist_id, "Downloading playlist JSON", headers=headers) + except ExtractorError as e: + if isinstance(e.cause, compat_HTTPError) and e.cause.code in (403,): + raise self.raise_login_required( + msg='This playlist is only available for registered users. Check your username and password' + ) + + raise ExtractorError("Unable to login...") + info_playlist = self._extract_playlist(playlist, playlist_id) playlist_entries = info_playlist.get("entries") for video in playlist_entries: video_id = video.get("id") - video_url = ROOT_BASE_URL + "medias/" + video.get("slug_url") + video_url = ROOT_BASE_URL + "medias/" + video.get("slug_url") + "?" + "playlist=" + compat_str(playlist_id) yield self.url_result(video_url, PictaIE.ie_key(), video_id) def _real_extract(self, url): - playlist_id = self._match_id(url) + playlist_id = self._match_playlist_id(url) entries = self._entries(playlist_id) return self.playlist_result(entries, playlist_id) + + +# noinspection PyAbstractClass +class PictaChannelPlaylistIE(PictaPlaylistIE): + IE_NAME = "picta:channel:playlist" + IE_DESC = "Picta channel playlist" + + _TESTS = [ + { + "url": ("https://www.picta.cu/medias/" + "201-paradigma-devops-implementacion-tecnomatica-2020-07-05-22-44-41-299736?playlist=4441"), + "info_dict": { + "id": 4441, + "title": "D\u00eda 2: Telecomunicaciones, Redes y Ciberseguridad", + "thumbnail": r"re:^https?://.*imagen/img.*\.jpeg$", + }, + }, + ] + + +# noinspection PyAbstractClass +class PictaUserPlaylistIE(PictaPlaylistIE, PictaBaseIE): + API_PLAYLIST_ENDPOINT = API_BASE_URL + "lista_reproduccion/" + IE_NAME = "picta:user:playlist" + IE_DESC = "Picta user playlist" + + _TESTS = [ + { + "url": "https://www.picta.cu/medias/die-hart-1x01-2020-08-06-18-12-50-310131?playlist=96", + "info_dict": { + "id": 96, + "title": "Die hart", + "thumbnail": None, + }, + }, + ] + + def _extract_playlist(self, playlist, playlist_id=None, require_title=True): + if len(playlist["results"]) == 0: + raise ExtractorError("Cannot find playlist!") + + title = ( + playlist["results"][0]["nombre"] + if require_title + else playlist.get("results")[0].get("nombre") + ) + thumbnail = None + entries = try_get(playlist, lambda x: x["results"][0]["publicacion"]) + + # Playlist User need update slug_url video + for entry in entries: + video_id = entry.get("id") + json_url = API_BASE_URL + "publicacion/?format=json&id=%s" % video_id + video = self._download_json(json_url, video_id, "Downloading video JSON") + info = self._extract_video(video, video_id) + entry["slug_url"] = info.get("slug_url") + + return { + "id": try_get(playlist, lambda x: x["results"][0]["id"], compat_str) or playlist_id, + "title": title, + "thumbnail": thumbnail, + "entries": entries, + }