wykop extractor, closes #25362

This commit is contained in:
selfisekai 2020-05-24 19:40:10 +02:00
parent 2791e80b60
commit 9104d5c362
2 changed files with 155 additions and 0 deletions

View File

@ -1417,6 +1417,7 @@ from .wsj import (
WSJArticleIE, WSJArticleIE,
) )
from .wwe import WWEIE from .wwe import WWEIE
from .wykop import WykopIE
from .xbef import XBefIE from .xbef import XBefIE
from .xboxclips import XboxClipsIE from .xboxclips import XboxClipsIE
from .xfileshare import XFileShareIE from .xfileshare import XFileShareIE

View File

@ -0,0 +1,154 @@
# coding=utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
clean_html,
int_or_none,
url_or_none,
)
class WykopIE(InfoExtractor):
_VALID_URL = r'(?P<url>https?://(?:[^/]+\.)?wykop\.pl/(?P<type>link|wpis)/(?P<main_id>\d+)/(?:comment/\d+/|(?P<display_id>[^/#]+)/?){0,2}(?:#comment-(?P<comment_id>\d+))?$)'
_TESTS = [{
# youtube @ link
'url': 'https://www.wykop.pl/link/5515619/prof-obirek-o-pedofilii-wsrod-duchownych-jan-pawel-2-wiedzial-i-nic-nie-zrobil/',
'info_dict': {
'id': 'CQoJ7TQjrI4',
'ext': 'webm',
'title': 'Prof. Obirek o pedofilii wśród duchownych: Jan Paweł 2 wiedział i nic nie zrobił',
'thumbnail': r're:^https?://.*\.jpg$',
'uploader': 'invisibleborder',
'like_count': int,
'dislike_count': int,
'comment_count': int,
'age_limit': 0,
'upload_date': '20200519',
'description': 'md5:3f6e7f7fd2cad0a312e030987517b7b3',
'uploader_id': 'onetnews',
},
'params': {
'format': 'bestvideo',
'skip_download': True,
},
}, {
'url': 'https://www.wykop.pl/link/5515619/',
'only_matching': True,
}, {
# youtube @ link comment
'url': 'https://www.wykop.pl/link/5517513/#comment-77502323',
'info_dict': {
'id': 'rIHIxNha_FE',
'ext': 'mp4',
'title': '@LrPrl: Już niedługo...',
'thumbnail': r're:^https?://.*\.jpg$',
'uploader': 'RzecznikNASA',
'like_count': int,
'dislike_count': int,
'comment_count': None,
'age_limit': 0,
'upload_date': '20150811',
'description': 'md5:a5eb775ff886debe6abb43e1a43a7fbc',
'uploader_id': 'UCMT04abyhI8FVVu_uxy-rkA',
},
'params': {
'format': 'bestvideo',
'skip_download': True,
},
}, {
'url': 'https://www.wykop.pl/link/5504073/comment/77196337/#comment-77196337',
'only_matching': True,
}, {
# streamable @ entry
'url': 'https://www.wykop.pl/wpis/49614397/',
'only_matching': True,
}, {
# gfycat @ entry
'url': 'https://www.wykop.pl/wpis/49579089/kiedy-skryptujesz-zaczynajac-tablice-od-1-programo/',
'info_dict': {
'id': 'polishedmarriedgoral',
'ext': 'mp4',
'title': 'Kiedy skryptujesz zaczynając tablicę od [1]\n#programowanie #heheszki #2jednostkowe0integracyjnych',
'thumbnail': r're:^https?://.*\.jpg$',
'uploader': 'Acri',
'like_count': int,
'dislike_count': int,
'comment_count': int,
'age_limit': 0,
'upload_date': '20200521',
'timestamp': 1590097062,
},
'params': {
'format': 'best',
'skip_download': True,
},
}, {
# youtube @ entry comment
'url': 'https://www.wykop.pl/wpis/49583499/#comment-175441409',
'info_dict': {
'id': 'qa3KvvJhmbk',
'ext': 'mp4',
'title': '@KsiundzRobak: nic nowego ¯\\_(ツ)_/¯\nOd dawna jest wiadomo, że taki jest poziom wyborców PiSu:',
'thumbnail': r're:^https?://.*\.jpg$',
'uploader': 'Alex_Krycek',
'like_count': int,
'dislike_count': int,
'comment_count': None,
'age_limit': 0,
'upload_date': '20121114',
'description': 'Wolność słowa wg PiS. Awantura w trakcie spotkania Antoniego Macierewicza w Czeladzi.\nWięcej na www.Czeladz24.com - zamieszczać na swoich stronach proszę tylko i wyłącznie ze źródłem, podając www.Czeladz24.com',
'uploader_id': 'kondipr1988',
},
'params': {
'format': 'bestvideo',
'skip_download': True,
},
}, {
# youtube @ entry comment
'url': 'https://www.wykop.pl/wpis/49583499/dzien-dobry-serdecznie-chcialbym-dzis-zaprezentowa/#comment-175441409',
'only_matching': True,
}]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
url, entity_type, main_id, comment_id, display_id = mobj.group('url', 'type', 'main_id', 'comment_id', 'display_id')
api_url = 'https://a2.wykop.pl/%s/%s/%s/appkey/aNd401dAPp' % ('links' if entity_type == 'link' else 'entries', 'comment' if comment_id is not None else 'link' if entity_type == 'link' else 'entry', comment_id or main_id)
data = self._download_json(
api_url, entity_type[0] + (('c' + comment_id) if comment_id is not None else main_id))['data']
if comment_id is not None or entity_type == 'wpis':
video_url = data['embed']['url']
if data['embed']['type'] == 'animated':
video_url = video_url.replace('.jpg', '.gif')
elif entity_type == 'link':
video_url = data['source_url']
embed_or_data = data['embed'] if 'embed' in data else data
over_18 = embed_or_data['plus18']
if over_18 is True:
age_limit = 18
elif over_18 is False:
age_limit = 0
else:
age_limit = None
return {
'_type': 'url_transparent',
'url': video_url,
'title': data['title'] if 'title' in data else clean_html(data['body']) if 'body' in data else '',
'alt_title': data['description'] if 'description' in data else None,
'display_id': display_id,
'thumbnail': url_or_none(embed_or_data['preview']),
'uploader': data['author']['login'],
'like_count': int_or_none(data['vote_count_plus'] if 'vote_count_plus' in data else data['vote_count']),
'dislike_count': int_or_none(data['bury_count'] if 'bury_count' in data else (data['vote_count'] - data['vote_count_plus']) if 'vote_count_plus' in data else None),
'comment_count': int_or_none(data['comments_count'] if 'comments_count' in data else None),
'age_limit': age_limit,
}