[cda] Fix extractor (fixes #24458)
This commit is contained in:
parent
a4ed50bb84
commit
c88611a773
@ -5,10 +5,12 @@ import codecs
|
|||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
from ..compat import compat_urllib_parse_unquote
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
float_or_none,
|
float_or_none,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
|
try_get,
|
||||||
multipart_encode,
|
multipart_encode,
|
||||||
parse_duration,
|
parse_duration,
|
||||||
random_birthday,
|
random_birthday,
|
||||||
@ -98,6 +100,14 @@ class CDAIE(InfoExtractor):
|
|||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
|
|
||||||
|
metadata_json = self._html_search_regex(r'''(?x)
|
||||||
|
<script[^>]+type=(["\'])application/ld\+json\1[^>]*>
|
||||||
|
(?P<metadata_json>(?:.|\n)+?)
|
||||||
|
</script>
|
||||||
|
''', webpage, 'metadata_json', fatal=False, group='metadata_json')
|
||||||
|
|
||||||
|
metadata = self._parse_json(metadata_json, 'metadata', fatal=False)
|
||||||
|
|
||||||
uploader = self._search_regex(r'''(?x)
|
uploader = self._search_regex(r'''(?x)
|
||||||
<(span|meta)[^>]+itemprop=(["\'])author\2[^>]*>
|
<(span|meta)[^>]+itemprop=(["\'])author\2[^>]*>
|
||||||
(?:<\1[^>]*>[^<]*</\1>|(?!</\1>)(?:.|\n))*?
|
(?:<\1[^>]*>[^<]*</\1>|(?!</\1>)(?:.|\n))*?
|
||||||
@ -106,9 +116,7 @@ class CDAIE(InfoExtractor):
|
|||||||
view_count = self._search_regex(
|
view_count = self._search_regex(
|
||||||
r'Odsłony:(?:\s| )*([0-9]+)', webpage,
|
r'Odsłony:(?:\s| )*([0-9]+)', webpage,
|
||||||
'view_count', default=None)
|
'view_count', default=None)
|
||||||
average_rating = self._search_regex(
|
average_rating = try_get(metadata, lambda x: x[0]['aggregateRating']['ratingValue'], str)
|
||||||
r'<(?:span|meta)[^>]+itemprop=(["\'])ratingValue\1[^>]*>(?P<rating_value>[0-9.]+)',
|
|
||||||
webpage, 'rating', fatal=False, group='rating_value')
|
|
||||||
|
|
||||||
info_dict = {
|
info_dict = {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
@ -123,6 +131,47 @@ class CDAIE(InfoExtractor):
|
|||||||
'age_limit': 18 if need_confirm_age else 0,
|
'age_limit': 18 if need_confirm_age else 0,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# Function extracted from cda.pl player.js script
|
||||||
|
def deobfuscate_video_url(url):
|
||||||
|
if not any(word in url for word in ['http', '.mp4', 'uggcf://']):
|
||||||
|
word_list = [
|
||||||
|
'_XDDD',
|
||||||
|
'_CDA',
|
||||||
|
'_ADC',
|
||||||
|
'_CXD',
|
||||||
|
'_QWE',
|
||||||
|
'_Q5',
|
||||||
|
'_IKSDE',
|
||||||
|
]
|
||||||
|
for word in word_list:
|
||||||
|
url = url.replace(word, '')
|
||||||
|
|
||||||
|
url = compat_urllib_parse_unquote(url)
|
||||||
|
|
||||||
|
char_list = list(url)
|
||||||
|
for i, char in enumerate(char_list):
|
||||||
|
char_code = ord(char)
|
||||||
|
if 33 <= char_code <= 126:
|
||||||
|
char_list[i] = chr(33 + ((char_code + 14) % 94))
|
||||||
|
url = ''.join(char_list)
|
||||||
|
|
||||||
|
url = url.replace('.cda.mp4', '')
|
||||||
|
url = url.replace('.2cda.pl', '.cda.pl')
|
||||||
|
url = url.replace('.3cda.pl', '.cda.pl')
|
||||||
|
|
||||||
|
url = 'https://' + (url.replace('/upstream', '.mp4/upstream')
|
||||||
|
if '/upstream' in url else url + '.mp4')
|
||||||
|
|
||||||
|
if 'http' not in url:
|
||||||
|
url = codecs.decode(url, 'rot_13')
|
||||||
|
|
||||||
|
if 'mp4' not in url:
|
||||||
|
url += '.mp4'
|
||||||
|
|
||||||
|
url = url.replace('adc.mp4', '.mp4')
|
||||||
|
|
||||||
|
return url
|
||||||
|
|
||||||
def extract_format(page, version):
|
def extract_format(page, version):
|
||||||
json_str = self._html_search_regex(
|
json_str = self._html_search_regex(
|
||||||
r'player_data=(\\?["\'])(?P<player_data>.+?)\1', page,
|
r'player_data=(\\?["\'])(?P<player_data>.+?)\1', page,
|
||||||
@ -137,12 +186,10 @@ class CDAIE(InfoExtractor):
|
|||||||
if not video or 'file' not in video:
|
if not video or 'file' not in video:
|
||||||
self.report_warning('Unable to extract %s version information' % version)
|
self.report_warning('Unable to extract %s version information' % version)
|
||||||
return
|
return
|
||||||
if video['file'].startswith('uggc'):
|
|
||||||
video['file'] = codecs.decode(video['file'], 'rot_13')
|
url = deobfuscate_video_url(video['file'])
|
||||||
if video['file'].endswith('adc.mp4'):
|
|
||||||
video['file'] = video['file'].replace('adc.mp4', '.mp4')
|
|
||||||
f = {
|
f = {
|
||||||
'url': video['file'],
|
'url': url,
|
||||||
}
|
}
|
||||||
m = re.search(
|
m = re.search(
|
||||||
r'<a[^>]+data-quality="(?P<format_id>[^"]+)"[^>]+href="[^"]+"[^>]+class="[^"]*quality-btn-active[^"]*">(?P<height>[0-9]+)p',
|
r'<a[^>]+data-quality="(?P<format_id>[^"]+)"[^>]+href="[^"]+"[^>]+class="[^"]*quality-btn-active[^"]*">(?P<height>[0-9]+)p',
|
||||||
|
Loading…
x
Reference in New Issue
Block a user