From 42db58ec7367e7ee6555e5f14107712add61d013 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 15 Dec 2019 23:15:24 +0700 Subject: [PATCH 1/3] [utils] Improve str_to_int --- test/test_utils.py | 5 +++++ youtube_dl/utils.py | 8 +++++--- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/test/test_utils.py b/test/test_utils.py index fed94a906..0896f4150 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -500,6 +500,11 @@ class TestUtil(unittest.TestCase): self.assertEqual(str_to_int('123,456'), 123456) self.assertEqual(str_to_int('123.456'), 123456) self.assertEqual(str_to_int(523), 523) + # Python 3 has no long + if sys.version_info < (3, 0): + eval('self.assertEqual(str_to_int(123456L), 123456)') + self.assertEqual(str_to_int('noninteger'), None) + self.assertEqual(str_to_int([]), None) def test_url_basename(self): self.assertEqual(url_basename('http://foo.de/'), '') diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 328f037a8..f6204692a 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -46,6 +46,7 @@ from .compat import ( compat_html_entities, compat_html_entities_html5, compat_http_client, + compat_integer_types, compat_kwargs, compat_os_name, compat_parse_qs, @@ -3519,10 +3520,11 @@ def str_or_none(v, default=None): def str_to_int(int_str): """ A more relaxed version of int_or_none """ - if not isinstance(int_str, compat_str): + if isinstance(int_str, compat_integer_types): return int_str - int_str = re.sub(r'[,\.\+]', '', int_str) - return int(int_str) + elif isinstance(int_str, compat_str): + int_str = re.sub(r'[,\.\+]', '', int_str) + return int_or_none(int_str) def float_or_none(v, scale=1, invscale=1, default=None): From fab01080f402dbfad00122b73714d92b5d1deb24 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 16 Dec 2019 00:08:18 +0700 Subject: [PATCH 2/3] [tv2dk:bornholm:play] Add extractor (closes #23291) --- youtube_dl/extractor/extractors.py | 5 +- youtube_dl/extractor/tv2dk.py | 74 +++++++++++++++++++++++++++++- 2 files changed, 77 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index fd93730fa..376d07727 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1168,7 +1168,10 @@ from .tv2 import ( TV2ArticleIE, KatsomoIE, ) -from .tv2dk import TV2DKIE +from .tv2dk import ( + TV2DKIE, + TV2DKBornholmPlayIE, +) from .tv2hu import TV2HuIE from .tv4 import TV4IE from .tv5mondeplus import TV5MondePlusIE diff --git a/youtube_dl/extractor/tv2dk.py b/youtube_dl/extractor/tv2dk.py index eb39424df..611fdc0c6 100644 --- a/youtube_dl/extractor/tv2dk.py +++ b/youtube_dl/extractor/tv2dk.py @@ -1,10 +1,16 @@ # coding: utf-8 from __future__ import unicode_literals +import json import re from .common import InfoExtractor -from ..utils import extract_attributes +from ..utils import ( + determine_ext, + extract_attributes, + js_to_json, + url_or_none, +) class TV2DKIE(InfoExtractor): @@ -80,3 +86,69 @@ class TV2DKIE(InfoExtractor): 'kaltura:%s:%s' % (partner_id, kaltura_id), 'Kaltura', video_id=kaltura_id)) return self.playlist_result(entries) + + +class TV2DKBornholmPlayIE(InfoExtractor): + _VALID_URL = r'https?://play\.tv2bornholm\.dk/\?.*?\bid=(?P\d+)' + _TEST = { + 'url': 'http://play.tv2bornholm.dk/?area=specifikTV&id=781021', + 'info_dict': { + 'id': '781021', + 'ext': 'mp4', + 'title': '12Nyheder-27.11.19', + }, + 'params': { + 'skip_download': True, + }, + } + + def _real_extract(self, url): + video_id = self._match_id(url) + + video = self._download_json( + 'http://play.tv2bornholm.dk/controls/AJAX.aspx/specifikVideo', video_id, + data=json.dumps({ + 'playlist_id': video_id, + 'serienavn': '', + }).encode(), headers={ + 'X-Requested-With': 'XMLHttpRequest', + 'Content-Type': 'application/json; charset=UTF-8', + })['d'] + + # TODO: generalize flowplayer + title = self._search_regex( + r'title\s*:\s*(["\'])(?P(?:(?!\1).)+)\1', video, 'title', + group='value') + sources = self._parse_json(self._search_regex( + r'(?s)sources:\s*(\[.+?\]),', video, 'sources'), + video_id, js_to_json) + + formats = [] + srcs = set() + for source in sources: + src = url_or_none(source.get('src')) + if not src: + continue + if src in srcs: + continue + srcs.add(src) + ext = determine_ext(src) + src_type = source.get('type') + if src_type == 'application/x-mpegurl' or ext == 'm3u8': + formats.extend(self._extract_m3u8_formats( + src, video_id, ext='mp4', entry_protocol='m3u8_native', + m3u8_id='hls', fatal=False)) + elif src_type == 'application/dash+xml' or ext == 'mpd': + formats.extend(self._extract_mpd_formats( + src, video_id, mpd_id='dash', fatal=False)) + else: + formats.append({ + 'url': src, + }) + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': title, + 'formats': formats, + } From 2dbc0967f26425acc204395bc69c9446d9ebd682 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 16 Dec 2019 00:40:34 +0700 Subject: [PATCH 3/3] [ChangeLog] Actualize [ci skip] --- ChangeLog | 56 +++++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 54 insertions(+), 2 deletions(-) diff --git a/ChangeLog b/ChangeLog index d4f809fc6..d2f17ee06 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,55 @@ +version + +Core +* [utils] Improve str_to_int ++ [downloader/hls] Add ability to override AES decryption key URL (#17521) + +Extractors ++ [tv2dk:bornholm:play] Add support for play.tv2bornholm.dk (#23291) ++ [slideslive] Add support for url and vimeo service names (#23414) +* [slideslive] Fix extraction (#23413) +* [twitch:clips] Fix extraction (#23375) ++ [soundcloud] Add support for token protected embeds (#18954) +* [vk] Improve extraction + * Fix User Videos extraction (#23356) + * Extract all videos for lists with more than 1000 videos (#23356) + + Add support for video albums (#14327, #14492) +- [kontrtube] Remove extractor +- [videopremium] Remove extractor +- [musicplayon] Remove extractor (#9225) ++ [ufctv] Add support for ufcfightpass.imgdge.com and + ufcfightpass.imggaming.com (#23343) ++ [twitch] Extract m3u8 formats frame rate (#23333) ++ [imggaming] Add support for playlists and extract subtitles ++ [ufcarabia] Add support for UFC Arabia (#23312) +* [ufctv] Fix extraction +* [yahoo] Fix gyao brightcove player id (#23303) +* [vzaar] Override AES decryption key URL (#17521) ++ [vzaar] Add support for AES HLS manifests (#17521, #23299) +* [nrl] Fix extraction +* [teachingchannel] Fix extraction +* [nintendo] Fix extraction and partially add support for Nintendo Direct + videos (#4592) ++ [ooyala] Add better fallback values for domain and streams variables ++ [youtube] Add support youtubekids.com (#23272) +* [tv2] Detect DRM protection ++ [tv2] Add support for katsomo.fi and mtv.fi (#10543) +* [tv2] Fix tv2.no article extraction +* [msn] Improve extraction + + Add support for YouTube and NBCSports embeds + + Add support for articles with multiple videos + * Improve AOL embed support + * Improve format extraction +* [abcotvs] Relax URL regular expression and improve metadata extraction + (#18014) +* [channel9] Reduce response size +* [adobetv] Improve extaction + * Use OnDemandPagedList for list extractors + * Reduce show extraction requests + * Extract original video format and subtitles + + Add support for adobe tv embeds + + version 2019.11.28 Core @@ -583,7 +635,7 @@ Extractors version 2019.04.17 Extractors -* [openload] Randomize User-Agent (closes #20688) +* [openload] Randomize User-Agent (#20688) + [openload] Add support for oladblock domains (#20471) * [adn] Fix subtitle extraction (#12724) + [aol] Add support for localized websites @@ -1148,7 +1200,7 @@ Extractors + [youtube] Extract channel meta fields (#9676, #12939) * [porntube] Fix extraction (#17541) * [asiancrush] Fix extraction (#15630) -+ [twitch:clips] Extend URL regular expression (closes #17559) ++ [twitch:clips] Extend URL regular expression (#17559) + [vzaar] Add support for HLS * [tube8] Fix metadata extraction (#17520) * [eporner] Extract JSON-LD (#17519)