Add PictaChannelPlaylistIE, PictaUserPlaylistIE

This commit is contained in:
oleksis 2020-08-12 19:52:13 -04:00
parent 92cee65f94
commit 6d46718378
2 changed files with 149 additions and 30 deletions

View File

@ -840,7 +840,7 @@ from .picarto import (
PicartoIE, PicartoIE,
PicartoVodIE, PicartoVodIE,
) )
from .picta import PictaIE, PictaPlaylistIE from .picta import PictaIE, PictaChannelPlaylistIE, PictaUserPlaylistIE
from .piksel import PikselIE from .piksel import PikselIE
from .pinkbike import PinkbikeIE from .pinkbike import PinkbikeIE
from .pladform import PladformIE from .pladform import PladformIE

View File

@ -1,6 +1,10 @@
# coding: utf-8 # coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
from ..compat import compat_str
from base64 import b64encode
import re
from ..compat import compat_str, compat_HTTPError
from ..utils import int_or_none, unified_timestamp, try_get, ExtractorError from ..utils import int_or_none, unified_timestamp, try_get, ExtractorError
from .common import InfoExtractor from .common import InfoExtractor
@ -24,6 +28,9 @@ class PictaBaseIE(InfoExtractor):
description = try_get( description = try_get(
video, lambda x: x["results"][0]["descripcion"], compat_str video, lambda x: x["results"][0]["descripcion"], compat_str
) )
slug_url = try_get(
video, lambda x: x["results"][0]["slug_url"], compat_str
)
uploader = try_get( uploader = try_get(
video, lambda x: x["results"][0]["usuario"]["username"], compat_str video, lambda x: x["results"][0]["usuario"]["username"], compat_str
) )
@ -43,6 +50,7 @@ class PictaBaseIE(InfoExtractor):
return { return {
"id": try_get(video, lambda x: x["results"][0]["id"], compat_str) or video_id, "id": try_get(video, lambda x: x["results"][0]["id"], compat_str) or video_id,
"title": title, "title": title,
"slug_url": slug_url,
"description": description, "description": description,
"thumbnail": thumbnail, "thumbnail": thumbnail,
"uploader": uploader, "uploader": uploader,
@ -57,9 +65,7 @@ class PictaBaseIE(InfoExtractor):
class PictaIE(PictaBaseIE): class PictaIE(PictaBaseIE):
IE_NAME = "picta" IE_NAME = "picta"
IE_DESC = "Picta videos" IE_DESC = "Picta videos"
_VALID_URL = ( _VALID_URL = r"https?://(?:www\.)?picta\.cu/(?:medias|embed)/(?:\?v=)?(?P<id>[\da-z-]+)(?:\?playlist=(?P<playlist_id>[\da-z-]+))?"
r"https?://(?:www\.)?picta\.cu/(?:medias|embed)/(?:\?v=)?(?P<id>[\da-z-]+)"
)
_TESTS = [ _TESTS = [
{ {
@ -67,7 +73,8 @@ class PictaIE(PictaBaseIE):
"file": "Orishas - Everyday-orishas-everyday-2019-01-16-16-36-42-443003.webm", "file": "Orishas - Everyday-orishas-everyday-2019-01-16-16-36-42-443003.webm",
"md5": "7ffdeb0043500c4bb660c04e74e90f7a", "md5": "7ffdeb0043500c4bb660c04e74e90f7a",
"info_dict": { "info_dict": {
"id": "orishas-everyday-2019-01-16-16-36-42-443003", "id": "818",
"slug_url": "orishas-everyday-2019-01-16-16-36-42-443003",
"ext": "webm", "ext": "webm",
"title": "Orishas - Everyday", "title": "Orishas - Everyday",
"thumbnail": r"re:^https?://.*imagen/img.*\.png$", "thumbnail": r"re:^https?://.*imagen/img.*\.png$",
@ -85,7 +92,8 @@ class PictaIE(PictaBaseIE):
"robótica-palmiche-galeno-tercer-lugar-torneo-virtual-robotica-2020-05-21-16-15-31-431895.mp4"), "robótica-palmiche-galeno-tercer-lugar-torneo-virtual-robotica-2020-05-21-16-15-31-431895.mp4"),
"md5": "6031b7a3add2eade9c5bef7ecf5d4b02", "md5": "6031b7a3add2eade9c5bef7ecf5d4b02",
"info_dict": { "info_dict": {
"id": "palmiche-galeno-tercer-lugar-torneo-virtual-robotica-2020-05-21-16-15-31-431895", "id": "3500",
"slug_url": "palmiche-galeno-tercer-lugar-torneo-virtual-robotica-2020-05-21-16-15-31-431895",
"ext": "mp4", "ext": "mp4",
"title": "Palmiche Galeno tercer lugar en torneo virtual de robótica", "title": "Palmiche Galeno tercer lugar en torneo virtual de robótica",
"thumbnail": r"re:^https?://.*imagen/img.*\.jpeg$", "thumbnail": r"re:^https?://.*imagen/img.*\.jpeg$",
@ -117,6 +125,14 @@ class PictaIE(PictaBaseIE):
def _real_initialize(self): def _real_initialize(self):
self.playlist_id = None self.playlist_id = None
@classmethod
def _match_playlist_id(cls, url):
if '_VALID_URL_RE' not in cls.__dict__:
cls._VALID_URL_RE = re.compile(cls._VALID_URL)
m = cls._VALID_URL_RE.match(url)
assert m
return compat_str(m.group('playlist_id'))
def _real_extract(self, url): def _real_extract(self, url):
playlist_id = None playlist_id = None
video_id = self._match_id(url) video_id = self._match_id(url)
@ -130,8 +146,21 @@ class PictaIE(PictaBaseIE):
if playlist_id and not self._downloader.params.get('noplaylist'): if playlist_id and not self._downloader.params.get('noplaylist'):
self.to_screen('Downloading playlist %s - add --no-playlist to just download video' % playlist_id) self.to_screen('Downloading playlist %s - add --no-playlist to just download video' % playlist_id)
return self.url_result( return self.url_result(
ROOT_BASE_URL + "playlist/" + str(playlist_id), ROOT_BASE_URL + "medias/" + video_id + "?" + "playlist=" + compat_str(playlist_id),
PictaPlaylistIE.ie_key(), PictaChannelPlaylistIE.ie_key(),
playlist_id
)
elif (
self.playlist_id is None
and self._match_playlist_id(url)
and not self._downloader.params.get('noplaylist')
):
playlist_id = compat_str(self._match_playlist_id(url))
self.playlist_id = playlist_id
self.to_screen('Downloading playlist %s - add --no-playlist to just download video' % playlist_id)
return self.url_result(
ROOT_BASE_URL + "medias/" + video_id + "?" + "playlist=" + playlist_id,
PictaUserPlaylistIE.ie_key(),
playlist_id playlist_id
) )
elif self._downloader.params.get('noplaylist'): elif self._downloader.params.get('noplaylist'):
@ -157,7 +186,7 @@ class PictaIE(PictaBaseIE):
class PictaEmbedIE(InfoExtractor): class PictaEmbedIE(InfoExtractor):
IE_NAME = "picta:embed" IE_NAME = "picta:embed"
IE_DESC = "Picta embedded videos" IE_DESC = "Picta embedded videos"
_VALID_URL = r"https?://www\.picta\.cu/embed/(?:\?v=)?(?P<id>[0-9]+)" _VALID_URL = r"https?://www\.picta\.cu/embed/(?:\?v=)?(?P<id>[\d]+)"
_TESTS = [ _TESTS = [
{ {
@ -165,7 +194,8 @@ class PictaEmbedIE(InfoExtractor):
"file": "Orishas - Everyday-orishas-everyday-2019-01-16-16-36-42-443003.webm", "file": "Orishas - Everyday-orishas-everyday-2019-01-16-16-36-42-443003.webm",
"md5": "7ffdeb0043500c4bb660c04e74e90f7a", "md5": "7ffdeb0043500c4bb660c04e74e90f7a",
"info_dict": { "info_dict": {
"id": "orishas-everyday-2019-01-16-16-36-42-443003", "id": "818",
"slug_url": "orishas-everyday-2019-01-16-16-36-42-443003",
"ext": "webm", "ext": "webm",
"title": "Orishas - Everyday", "title": "Orishas - Everyday",
"thumbnail": r"re:^https?://.*imagen/img.*\.png$", "thumbnail": r"re:^https?://.*imagen/img.*\.png$",
@ -183,7 +213,8 @@ class PictaEmbedIE(InfoExtractor):
"robótica-palmiche-galeno-tercer-lugar-torneo-virtual-robotica-2020-05-21-16-15-31-431895.mp4"), "robótica-palmiche-galeno-tercer-lugar-torneo-virtual-robotica-2020-05-21-16-15-31-431895.mp4"),
"md5": "6031b7a3add2eade9c5bef7ecf5d4b02", "md5": "6031b7a3add2eade9c5bef7ecf5d4b02",
"info_dict": { "info_dict": {
"id": "palmiche-galeno-tercer-lugar-torneo-virtual-robotica-2020-05-21-16-15-31-431895", "id": "3500",
"slug_url": "palmiche-galeno-tercer-lugar-torneo-virtual-robotica-2020-05-21-16-15-31-431895",
"ext": "mp4", "ext": "mp4",
"title": "Palmiche Galeno tercer lugar en torneo virtual de robótica", "title": "Palmiche Galeno tercer lugar en torneo virtual de robótica",
"thumbnail": r"re:^https?://.*imagen/img.*\.jpeg$", "thumbnail": r"re:^https?://.*imagen/img.*\.jpeg$",
@ -214,22 +245,37 @@ class PictaEmbedIE(InfoExtractor):
class PictaPlaylistIE(InfoExtractor): class PictaPlaylistIE(InfoExtractor):
API_PLAYLIST_ENDPOINT = API_BASE_URL + "lista_reproduccion_canal/" API_PLAYLIST_ENDPOINT = API_BASE_URL + "lista_reproduccion_canal/"
IE_NAME = "picta:playlist" IE_NAME = "picta:playlist"
IE_DESC = "Picta playlist videos" IE_DESC = "Picta playlist"
_VALID_URL = r"https?://www\.picta\.cu/playlist/(?P<id>[0-9]+)" _VALID_URL = r"https?://(?:www\.)?picta\.cu/(?:medias|embed)/(?:\?v=)?(?P<id>[\da-z-]+)(?:\?playlist=(?P<playlist_id>[\da-z-]+))?"
_TESTS = [ _NETRC_MACHINE = "picta"
{
"url": "https://www.picta.cu/playlist/4441",
"info_dict": {
"id": 4441,
"title": "D\u00eda 2: Telecomunicaciones, Redes y Ciberseguridad",
"thumbnail": r"re:^https?://.*imagen/img.*\.jpeg$",
},
},
]
@staticmethod @classmethod
def _extract_playlist(playlist, playlist_id=None, require_title=True): def _match_playlist_id(cls, url):
if '_VALID_URL_RE' not in cls.__dict__:
cls._VALID_URL_RE = re.compile(cls._VALID_URL)
m = cls._VALID_URL_RE.match(url)
assert m
return compat_str(m.group('playlist_id'))
def _set_auth_basic(self):
header = {}
username, password = self._get_login_info()
if username is None:
return header
if isinstance(username, str):
username = username.encode('latin1')
if isinstance(password, str):
password = password.encode('latin1')
authstr = "Basic " + compat_str(b64encode(b":".join((username, password))).decode("utf-8"))
header["Authorization"] = authstr
return header
def _extract_playlist(self, playlist, playlist_id=None, require_title=True):
if len(playlist["results"]) == 0: if len(playlist["results"]) == 0:
raise ExtractorError("Cannot find playlist!") raise ExtractorError("Cannot find playlist!")
@ -238,7 +284,7 @@ class PictaPlaylistIE(InfoExtractor):
if require_title if require_title
else playlist.get("results")[0].get("nombre") else playlist.get("results")[0].get("nombre")
) )
thumbnail = try_get(playlist, lambda x: x["results"][0]["url_imagen"]) thumbnail = try_get(playlist, lambda x: x["results"][0].get("url_imagen"))
entries = try_get(playlist, lambda x: x["results"][0]["publicaciones"]) entries = try_get(playlist, lambda x: x["results"][0]["publicaciones"])
return { return {
@ -250,16 +296,89 @@ class PictaPlaylistIE(InfoExtractor):
def _entries(self, playlist_id): def _entries(self, playlist_id):
json_url = self.API_PLAYLIST_ENDPOINT + "?format=json&id=%s" % playlist_id json_url = self.API_PLAYLIST_ENDPOINT + "?format=json&id=%s" % playlist_id
playlist = self._download_json(json_url, playlist_id, "Downloading playlist JSON") headers = self._set_auth_basic()
try:
playlist = self._download_json(json_url, playlist_id, "Downloading playlist JSON", headers=headers)
except ExtractorError as e:
if isinstance(e.cause, compat_HTTPError) and e.cause.code in (403,):
raise self.raise_login_required(
msg='This playlist is only available for registered users. Check your username and password'
)
raise ExtractorError("Unable to login...")
info_playlist = self._extract_playlist(playlist, playlist_id) info_playlist = self._extract_playlist(playlist, playlist_id)
playlist_entries = info_playlist.get("entries") playlist_entries = info_playlist.get("entries")
for video in playlist_entries: for video in playlist_entries:
video_id = video.get("id") video_id = video.get("id")
video_url = ROOT_BASE_URL + "medias/" + video.get("slug_url") video_url = ROOT_BASE_URL + "medias/" + video.get("slug_url") + "?" + "playlist=" + compat_str(playlist_id)
yield self.url_result(video_url, PictaIE.ie_key(), video_id) yield self.url_result(video_url, PictaIE.ie_key(), video_id)
def _real_extract(self, url): def _real_extract(self, url):
playlist_id = self._match_id(url) playlist_id = self._match_playlist_id(url)
entries = self._entries(playlist_id) entries = self._entries(playlist_id)
return self.playlist_result(entries, playlist_id) return self.playlist_result(entries, playlist_id)
# noinspection PyAbstractClass
class PictaChannelPlaylistIE(PictaPlaylistIE):
IE_NAME = "picta:channel:playlist"
IE_DESC = "Picta channel playlist"
_TESTS = [
{
"url": ("https://www.picta.cu/medias/"
"201-paradigma-devops-implementacion-tecnomatica-2020-07-05-22-44-41-299736?playlist=4441"),
"info_dict": {
"id": 4441,
"title": "D\u00eda 2: Telecomunicaciones, Redes y Ciberseguridad",
"thumbnail": r"re:^https?://.*imagen/img.*\.jpeg$",
},
},
]
# noinspection PyAbstractClass
class PictaUserPlaylistIE(PictaPlaylistIE, PictaBaseIE):
API_PLAYLIST_ENDPOINT = API_BASE_URL + "lista_reproduccion/"
IE_NAME = "picta:user:playlist"
IE_DESC = "Picta user playlist"
_TESTS = [
{
"url": "https://www.picta.cu/medias/die-hart-1x01-2020-08-06-18-12-50-310131?playlist=96",
"info_dict": {
"id": 96,
"title": "Die hart",
"thumbnail": None,
},
},
]
def _extract_playlist(self, playlist, playlist_id=None, require_title=True):
if len(playlist["results"]) == 0:
raise ExtractorError("Cannot find playlist!")
title = (
playlist["results"][0]["nombre"]
if require_title
else playlist.get("results")[0].get("nombre")
)
thumbnail = None
entries = try_get(playlist, lambda x: x["results"][0]["publicacion"])
# Playlist User need update slug_url video
for entry in entries:
video_id = entry.get("id")
json_url = API_BASE_URL + "publicacion/?format=json&id=%s" % video_id
video = self._download_json(json_url, video_id, "Downloading video JSON")
info = self._extract_video(video, video_id)
entry["slug_url"] = info.get("slug_url")
return {
"id": try_get(playlist, lambda x: x["results"][0]["id"], compat_str) or playlist_id,
"title": title,
"thumbnail": thumbnail,
"entries": entries,
}