From 44e9793a972a1999246d9f4256fb704ab81d6626 Mon Sep 17 00:00:00 2001 From: Diogo Lemos Date: Tue, 25 Feb 2020 20:39:54 +0000 Subject: [PATCH 1/7] airvuz: add preliminary logic --- docs/supportedsites.md | 1 + youtube_dl/extractor/airvuz.py | 29 +++++++++++++++++++++++++++++ youtube_dl/extractor/extractors.py | 1 + 3 files changed, 31 insertions(+) create mode 100644 youtube_dl/extractor/airvuz.py diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 02bc088ab..b2adbf2f9 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -37,6 +37,7 @@ - **aenetworks**: A+E Networks: A&E, Lifetime, History.com, FYI Network and History Vault - **afreecatv**: afreecatv.com - **AirMozilla** + - **AirVuz** - **AliExpressLive** - **AlJazeera** - **Allocine** diff --git a/youtube_dl/extractor/airvuz.py b/youtube_dl/extractor/airvuz.py new file mode 100644 index 000000000..a53dd0339 --- /dev/null +++ b/youtube_dl/extractor/airvuz.py @@ -0,0 +1,29 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor + + +class AirVuzIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?airvuz\.com/video/(?:.+)?id=(?P.+)' + _TEST = { + 'url': 'https://www.airvuz.com/video/An-Imaginary-World?id=599e85c49282a717c50f2f7a', + 'info_dict': { + 'id': '599e85c49282a717c50f2f7a', + 'ext': 'mp4', + 'title': 'md5:7fc56270e7a70fa81a5935b72eacbe29', + }, + } + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + title = self._html_search_meta('og:title', webpage) + video_url = self._html_search_regex(r']+?(?:name|property)=(?:\'og:video:url\'|"og:video:url"|og:video:url)[^>]+?content=(?:"([^"]+?)"|\'([^\']+?)\'|([^\s"\'=<>`]+))', webpage, 'video_url') + + return { + 'id': video_id, + 'title': title, + 'url': video_url, + } diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 64d1fa251..2393791c2 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -34,6 +34,7 @@ from .aenetworks import ( ) from .afreecatv import AfreecaTVIE from .airmozilla import AirMozillaIE +from .airvuz import AirVuzIE from .aljazeera import AlJazeeraIE from .alphaporno import AlphaPornoIE from .amcnetworks import AMCNetworksIE From 80a2c8399880449790432d3ead2e267dcbd1643e Mon Sep 17 00:00:00 2001 From: Diogo Lemos Date: Tue, 25 Feb 2020 21:15:11 +0000 Subject: [PATCH 2/7] airvuz: add remaining attributes --- youtube_dl/extractor/airvuz.py | 26 ++++++++++++++++++++++---- 1 file changed, 22 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/airvuz.py b/youtube_dl/extractor/airvuz.py index a53dd0339..c677e1147 100644 --- a/youtube_dl/extractor/airvuz.py +++ b/youtube_dl/extractor/airvuz.py @@ -2,28 +2,46 @@ from __future__ import unicode_literals from .common import InfoExtractor +from ..utils import determine_ext + +import re class AirVuzIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?airvuz\.com/video/(?:.+)?id=(?P.+)' + _VALID_URL = r'https?://(?:www\.)?airvuz\.com/video/(?P.+)\?id=(?P.+)' _TEST = { 'url': 'https://www.airvuz.com/video/An-Imaginary-World?id=599e85c49282a717c50f2f7a', 'info_dict': { 'id': '599e85c49282a717c50f2f7a', - 'ext': 'mp4', + 'display_id': 'An-Imaginary-World', 'title': 'md5:7fc56270e7a70fa81a5935b72eacbe29', + 'ext': 'mp4', + 'thumbnail': r're:^https?://.*\.jpg$', }, } def _real_extract(self, url): - video_id = self._match_id(url) + groups = re.match(self._VALID_URL, url) + video_id = groups.group('id') + display_id = groups.group('display_id') + webpage = self._download_webpage(url, video_id) - title = self._html_search_meta('og:title', webpage) + title = self._og_search_title(webpage) + thumbnail = self._og_search_thumbnail(webpage) + description = self._og_search_description(webpage) + uploader = self._html_search_regex(r'class=(?:\'img-circle\'|"img-circle"|img-circle)[^>]+?alt=(?:"([^"]+?)"|\'([^\']+?)\'|([^\s"\'=<>`]+))', webpage, 'uploader', fatal=False) or self._html_search_regex(r'https?://(?:www\.)?airvuz\.com/user/([^>]*)', webpage, 'uploader', fatal=False) + video_url = self._html_search_regex(r']+?(?:name|property)=(?:\'og:video:url\'|"og:video:url"|og:video:url)[^>]+?content=(?:"([^"]+?)"|\'([^\']+?)\'|([^\s"\'=<>`]+))', webpage, 'video_url') + ext = determine_ext(video_url) return { 'id': video_id, + 'display_id': display_id, 'title': title, 'url': video_url, + 'ext': ext, + 'thumbnail': thumbnail, + 'description': description, + 'uploader': uploader, } From ac4c6c709063b85bc93248cecb92ddcf94fa7054 Mon Sep 17 00:00:00 2001 From: Diogo Lemos Date: Thu, 27 Feb 2020 19:02:24 +0000 Subject: [PATCH 3/7] airvuz: add formats --- youtube_dl/extractor/airvuz.py | 38 +++++++++++++++++++++++++++++----- 1 file changed, 33 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/airvuz.py b/youtube_dl/extractor/airvuz.py index c677e1147..325b2e2be 100644 --- a/youtube_dl/extractor/airvuz.py +++ b/youtube_dl/extractor/airvuz.py @@ -2,7 +2,6 @@ from __future__ import unicode_literals from .common import InfoExtractor -from ..utils import determine_ext import re @@ -32,15 +31,44 @@ class AirVuzIE(InfoExtractor): description = self._og_search_description(webpage) uploader = self._html_search_regex(r'class=(?:\'img-circle\'|"img-circle"|img-circle)[^>]+?alt=(?:"([^"]+?)"|\'([^\']+?)\'|([^\s"\'=<>`]+))', webpage, 'uploader', fatal=False) or self._html_search_regex(r'https?://(?:www\.)?airvuz\.com/user/([^>]*)', webpage, 'uploader', fatal=False) - video_url = self._html_search_regex(r']+?(?:name|property)=(?:\'og:video:url\'|"og:video:url"|og:video:url)[^>]+?content=(?:"([^"]+?)"|\'([^\']+?)\'|([^\s"\'=<>`]+))', webpage, 'video_url') - ext = determine_ext(video_url) + formats = [] + + meta = self._download_json('https://www.airvuz.com/api/videos/%s?type=dynamic' % video_id, video_id, fatal=False) + if meta: + info_res = meta.get('data') + + for res in reversed(info_res.get('resolutions')): + video_url = res.get('src') + if not video_url: + continue + # URL is a relative path + video_url = 'https://www.airvuz.com/%s' % video_url + + formats.append({ + 'url': video_url, + 'format_id': res.get('label'), + 'height': res.get('res') + }) + else: + self.report_extraction(video_id) + + video_url = self._html_search_regex(r']+?(?:name|property)=(?:\'og:video:url\'|"og:video:url"|og:video:url)[^>]+?content=(?:"([^"]+?)"|\'([^\']+?)\'|([^\s"\'=<>`]+))', webpage, 'video_url') + + if video_url: + format_id = video_url.split("-")[-1].split(".")[0] + if len(format_id) <= 2: + format_id = None + + formats.append({ + 'url': video_url, + 'format_id': format_id, + }) return { 'id': video_id, 'display_id': display_id, 'title': title, - 'url': video_url, - 'ext': ext, + 'formats': formats, 'thumbnail': thumbnail, 'description': description, 'uploader': uploader, From 24c4208c32b28bc0e850a129dd90aab0b183c241 Mon Sep 17 00:00:00 2001 From: Diogo Lemos Date: Thu, 27 Feb 2020 19:56:48 +0000 Subject: [PATCH 4/7] airvuz: fix and add more tests --- youtube_dl/extractor/airvuz.py | 40 ++++++++++++++++++++++++---------- 1 file changed, 29 insertions(+), 11 deletions(-) diff --git a/youtube_dl/extractor/airvuz.py b/youtube_dl/extractor/airvuz.py index 325b2e2be..0ae35759a 100644 --- a/youtube_dl/extractor/airvuz.py +++ b/youtube_dl/extractor/airvuz.py @@ -2,27 +2,45 @@ from __future__ import unicode_literals from .common import InfoExtractor +from ..compat import compat_urllib_parse_unquote import re class AirVuzIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?airvuz\.com/video/(?P.+)\?id=(?P.+)' - _TEST = { - 'url': 'https://www.airvuz.com/video/An-Imaginary-World?id=599e85c49282a717c50f2f7a', - 'info_dict': { - 'id': '599e85c49282a717c50f2f7a', - 'display_id': 'An-Imaginary-World', - 'title': 'md5:7fc56270e7a70fa81a5935b72eacbe29', - 'ext': 'mp4', - 'thumbnail': r're:^https?://.*\.jpg$', + _TESTS = [ + { + 'url': 'https://www.airvuz.com/video/An-Imaginary-World?id=599e85c49282a717c50f2f7a', + 'info_dict': { + 'id': '599e85c49282a717c50f2f7a', + 'display_id': 'An-Imaginary-World', + 'title': 'An Imaginary World', + 'ext': 'mp4', + 'thumbnail': r're:^https?://.*\.jpg', + 'uploader': 'Tobias Hägg', + 'description': 'md5:176b43a79a0a19d592c0261d9c0a48c7', + } }, - } + # Emojis in the URL, title and description + { + 'url': 'https://www.airvuz.com/video/Cinematic-FPV-Flying-at-a-Cove-%F0%9F%8C%8A%F0%9F%8C%8A%F0%9F%8C%8A-The-rocks-waves-and-seaweed%F0%9F%98%8D?id=5d3db133ec63bf7e65c2226e', + 'info_dict': { + 'id': '5d3db133ec63bf7e65c2226e', + 'display_id': 'Cinematic-FPV-Flying-at-a-Cove-🌊🌊🌊-The-rocks-waves-and-seaweed😍', + 'title': 'Cinematic FPV Flying at a Cove! 🌊🌊🌊 The rocks, waves, and seaweed😍!', + 'ext': 'mp4', + 'thumbnail': r're:^https?://.*\.jpg', + 'uploader': 'Mako Reactra', + 'description': 'md5:ac91310ff7c2de26a0f1e8e8caae2ee6' + }, + }, + ] def _real_extract(self, url): groups = re.match(self._VALID_URL, url) video_id = groups.group('id') - display_id = groups.group('display_id') + display_id = compat_urllib_parse_unquote(groups.group('display_id')) webpage = self._download_webpage(url, video_id) @@ -47,7 +65,6 @@ class AirVuzIE(InfoExtractor): formats.append({ 'url': video_url, 'format_id': res.get('label'), - 'height': res.get('res') }) else: self.report_extraction(video_id) @@ -57,6 +74,7 @@ class AirVuzIE(InfoExtractor): if video_url: format_id = video_url.split("-")[-1].split(".")[0] if len(format_id) <= 2: + # Format can't be induced from the filename format_id = None formats.append({ From d0188b52c582b49e4d98181236f2008d82f4771b Mon Sep 17 00:00:00 2001 From: Diogo Lemos Date: Sat, 29 Feb 2020 18:06:36 +0000 Subject: [PATCH 5/7] airvuz: handle dash --- youtube_dl/extractor/airvuz.py | 101 +++++++++++++++++++++++++-------- 1 file changed, 76 insertions(+), 25 deletions(-) diff --git a/youtube_dl/extractor/airvuz.py b/youtube_dl/extractor/airvuz.py index 0ae35759a..f00628da1 100644 --- a/youtube_dl/extractor/airvuz.py +++ b/youtube_dl/extractor/airvuz.py @@ -3,6 +3,7 @@ from __future__ import unicode_literals from .common import InfoExtractor from ..compat import compat_urllib_parse_unquote +from ..utils import ExtractorError import re @@ -10,6 +11,24 @@ import re class AirVuzIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?airvuz\.com/video/(?P.+)\?id=(?P.+)' _TESTS = [ + { + 'url': 'https://www.airvuz.com/video/1-pack-before-the-thunderstorm?id=5d3c10176c32ae7ddc7cab29', + 'info_dict': { + 'id': '5d3c10176c32ae7ddc7cab29', + 'display_id': '1-pack-before-the-thunderstorm', + 'title': '1 pack before the thunderstorm', + 'ext': 'mp4', + 'thumbnail': r're:^https?://cdn.airvuz.com/image/drone-video-thumbnail\?image=airvuz-drone-video/43a6dd35ec08457545655905d638ea58/4c71ed0d6e1d93a06a0f3a053097af85.45.*', + 'uploader': 'Menga FPV', + 'uploader_id': 'menga-fpv', + 'uploader_url': 'https://www.airvuz.com/user/menga-fpv', + 'description': 'md5:13e8079235de737142d475f0b4058869', + }, + 'params': { + 'format': 'video-1' + } + }, + # No MPD { 'url': 'https://www.airvuz.com/video/An-Imaginary-World?id=599e85c49282a717c50f2f7a', 'info_dict': { @@ -44,43 +63,75 @@ class AirVuzIE(InfoExtractor): webpage = self._download_webpage(url, video_id) + self.report_extraction(video_id) + title = self._og_search_title(webpage) thumbnail = self._og_search_thumbnail(webpage) description = self._og_search_description(webpage) uploader = self._html_search_regex(r'class=(?:\'img-circle\'|"img-circle"|img-circle)[^>]+?alt=(?:"([^"]+?)"|\'([^\']+?)\'|([^\s"\'=<>`]+))', webpage, 'uploader', fatal=False) or self._html_search_regex(r'https?://(?:www\.)?airvuz\.com/user/([^>]*)', webpage, 'uploader', fatal=False) + video_url = self._html_search_regex(r']+?(?:name|property)=(?:\'og:video:url\'|"og:video:url"|og:video:url)[^>]+?content=(?:"([^"]+?)"|\'([^\']+?)\'|([^\s"\'=<>`]+))', webpage, 'video_url', fatal=False) or None + formats = [] + mpd_info = False - meta = self._download_json('https://www.airvuz.com/api/videos/%s?type=dynamic' % video_id, video_id, fatal=False) - if meta: - info_res = meta.get('data') + result = re.match(r'https?://cdn\.airvuz\.com/drone-video/(?P.+)/', video_url) + if result: + mpd_id = result.group('id') + mpd_pattern = 'https://www.airvuz.com/drone-video/%s/dash/%s_dash.mpd' % (mpd_id, mpd_id) - for res in reversed(info_res.get('resolutions')): - video_url = res.get('src') - if not video_url: - continue - # URL is a relative path - video_url = 'https://www.airvuz.com/%s' % video_url + try: + # Try to get mpd file + mpd_formats = self._extract_mpd_formats(mpd_pattern, video_id, fatal=True) + if mpd_formats: + # VIDEO-1 has always the highest quality + for format in reversed(mpd_formats): + format["format_id"] = format["format_id"].lower() + formats.append(format) - formats.append({ - 'url': video_url, - 'format_id': res.get('label'), - }) - else: - self.report_extraction(video_id) + mpd_info = True - video_url = self._html_search_regex(r']+?(?:name|property)=(?:\'og:video:url\'|"og:video:url"|og:video:url)[^>]+?content=(?:"([^"]+?)"|\'([^\']+?)\'|([^\s"\'=<>`]+))', webpage, 'video_url') + except ExtractorError: + pass - if video_url: - format_id = video_url.split("-")[-1].split(".")[0] - if len(format_id) <= 2: - # Format can't be induced from the filename - format_id = None + if mpd_info is False: + try: + # Some videos don't have MPD information + # Use undocumented API to get the formats + meta = self._download_json('https://www.airvuz.com/api/videos/%s?type=dynamic' % video_id, video_id, fatal=True) + if meta: + info_res = meta.get('data') - formats.append({ - 'url': video_url, - 'format_id': format_id, - }) + for res in reversed(info_res.get('resolutions')): + video_url = res.get('src') + if not video_url: + continue + + # URL is a relative path + video_url = 'https://www.airvuz.com/%s' % video_url + + formats.append({ + 'url': video_url, + 'format_id': res.get('label'), + }) + + except ExtractorError: + # Fallback to original video + self.report_warning('Unable to extract formats') + self.to_screen('%s: Extracting original video' % video_id) + + if video_url: + format_id = video_url.split("-")[-1].split(".")[0] + if len(format_id) <= 2: + # Format can't be induced from the filename + format_id = None + + formats.append({ + 'url': video_url, + 'format_id': format_id, + }) + else: + raise ExtractorError('Unable to extract video data') return { 'id': video_id, From 9684b12c897d4647ab41644ba961eb5ba9850539 Mon Sep 17 00:00:00 2001 From: Diogo Lemos Date: Sun, 1 Mar 2020 02:16:44 +0000 Subject: [PATCH 6/7] airvuz: more optional attributes and alternatives --- youtube_dl/extractor/airvuz.py | 40 +++++++++++++++++++++++++++++----- 1 file changed, 35 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/airvuz.py b/youtube_dl/extractor/airvuz.py index f00628da1..eeb7cb946 100644 --- a/youtube_dl/extractor/airvuz.py +++ b/youtube_dl/extractor/airvuz.py @@ -3,7 +3,10 @@ from __future__ import unicode_literals from .common import InfoExtractor from ..compat import compat_urllib_parse_unquote -from ..utils import ExtractorError +from ..utils import ( + ExtractorError, + unified_timestamp +) import re @@ -19,6 +22,8 @@ class AirVuzIE(InfoExtractor): 'title': '1 pack before the thunderstorm', 'ext': 'mp4', 'thumbnail': r're:^https?://cdn.airvuz.com/image/drone-video-thumbnail\?image=airvuz-drone-video/43a6dd35ec08457545655905d638ea58/4c71ed0d6e1d93a06a0f3a053097af85.45.*', + 'timestamp': 1564217367, + 'upload_date': '20190727', 'uploader': 'Menga FPV', 'uploader_id': 'menga-fpv', 'uploader_url': 'https://www.airvuz.com/user/menga-fpv', @@ -37,7 +42,11 @@ class AirVuzIE(InfoExtractor): 'title': 'An Imaginary World', 'ext': 'mp4', 'thumbnail': r're:^https?://.*\.jpg', + 'timestamp': 1503561156, + 'upload_date': '20170824', 'uploader': 'Tobias Hägg', + 'uploader_id': 'tobias-hägg', + 'uploader_url': 'https://www.airvuz.com/user/tobias-hägg', 'description': 'md5:176b43a79a0a19d592c0261d9c0a48c7', } }, @@ -50,12 +59,22 @@ class AirVuzIE(InfoExtractor): 'title': 'Cinematic FPV Flying at a Cove! 🌊🌊🌊 The rocks, waves, and seaweed😍!', 'ext': 'mp4', 'thumbnail': r're:^https?://.*\.jpg', + 'timestamp': 1564324147, + 'upload_date': '20190728', 'uploader': 'Mako Reactra', + 'uploader_id': 'mako-reactra', + 'uploader_url': 'https://www.airvuz.com/user/mako-reactra', 'description': 'md5:ac91310ff7c2de26a0f1e8e8caae2ee6' }, + 'params': { + 'format': 'video-1' + } }, ] + def _extract_og_property(self, prop, html, fatal=False): + return self._html_search_regex(r']+?(?:name|property)=(?:\'og:%s\'|"og:%s"|og:%s)[^>]+?content=(?:"([^"]+?)"|\'([^\']+?)\'|([^\s"\'=<>`]+))' % (prop, prop, prop), html, prop, fatal=fatal, default=None) + def _real_extract(self, url): groups = re.match(self._VALID_URL, url) video_id = groups.group('id') @@ -65,12 +84,20 @@ class AirVuzIE(InfoExtractor): self.report_extraction(video_id) - title = self._og_search_title(webpage) - thumbnail = self._og_search_thumbnail(webpage) + title = self._og_search_title(webpage) or self._html_search_meta('twitter:title', webpage, fatal=True) + thumbnail = self._og_search_thumbnail(webpage) or self._html_search_meta('twitter:image', webpage, fatal=False) description = self._og_search_description(webpage) - uploader = self._html_search_regex(r'class=(?:\'img-circle\'|"img-circle"|img-circle)[^>]+?alt=(?:"([^"]+?)"|\'([^\']+?)\'|([^\s"\'=<>`]+))', webpage, 'uploader', fatal=False) or self._html_search_regex(r'https?://(?:www\.)?airvuz\.com/user/([^>]*)', webpage, 'uploader', fatal=False) + timestamp = unified_timestamp(self._extract_og_property('updated_time', webpage, fatal=False)) + uploader = self._html_search_regex(r'class=(?:\'img-circle\'|"img-circle"|img-circle)[^>]+?alt=(?:"([^"]+?)"|\'([^\']+?)\'|([^\s"\'=<>`]+))', webpage, 'uploader', fatal=False, default=None) - video_url = self._html_search_regex(r']+?(?:name|property)=(?:\'og:video:url\'|"og:video:url"|og:video:url)[^>]+?content=(?:"([^"]+?)"|\'([^\']+?)\'|([^\s"\'=<>`]+))', webpage, 'video_url', fatal=False) or None + uploader_id = None + uploader_url = None + uploader_info = re.search(r'(?Phttps?://(?:www\.)?airvuz\.com/user/(?P[^>]+))', webpage) + if uploader_info is not None: + uploader_id = uploader_info.group('id') + uploader_url = uploader_info.group('url') + + video_url = self._extract_og_property('video:url', webpage, fatal=True) formats = [] mpd_info = False @@ -140,5 +167,8 @@ class AirVuzIE(InfoExtractor): 'formats': formats, 'thumbnail': thumbnail, 'description': description, + 'timestamp': timestamp, 'uploader': uploader, + 'uploader_id': uploader_id, + 'uploader_url': uploader_url, } From 4ec487ffa0bed5ebcecc8e069a14de103f2a6871 Mon Sep 17 00:00:00 2001 From: Diogo Lemos Date: Tue, 3 Mar 2020 19:07:09 +0000 Subject: [PATCH 7/7] airvuz: fix dash --- youtube_dl/extractor/airvuz.py | 84 +++++++++++++++++++++++++--------- 1 file changed, 63 insertions(+), 21 deletions(-) diff --git a/youtube_dl/extractor/airvuz.py b/youtube_dl/extractor/airvuz.py index eeb7cb946..7c7b3eeb4 100644 --- a/youtube_dl/extractor/airvuz.py +++ b/youtube_dl/extractor/airvuz.py @@ -4,11 +4,17 @@ from __future__ import unicode_literals from .common import InfoExtractor from ..compat import compat_urllib_parse_unquote from ..utils import ( + determine_ext, ExtractorError, - unified_timestamp + replace_extension, + unified_timestamp, + url_basename, + urljoin, + int_or_none, ) import re +import copy class AirVuzIE(InfoExtractor): @@ -29,9 +35,6 @@ class AirVuzIE(InfoExtractor): 'uploader_url': 'https://www.airvuz.com/user/menga-fpv', 'description': 'md5:13e8079235de737142d475f0b4058869', }, - 'params': { - 'format': 'video-1' - } }, # No MPD { @@ -66,9 +69,6 @@ class AirVuzIE(InfoExtractor): 'uploader_url': 'https://www.airvuz.com/user/mako-reactra', 'description': 'md5:ac91310ff7c2de26a0f1e8e8caae2ee6' }, - 'params': { - 'format': 'video-1' - } }, ] @@ -76,9 +76,9 @@ class AirVuzIE(InfoExtractor): return self._html_search_regex(r']+?(?:name|property)=(?:\'og:%s\'|"og:%s"|og:%s)[^>]+?content=(?:"([^"]+?)"|\'([^\']+?)\'|([^\s"\'=<>`]+))' % (prop, prop, prop), html, prop, fatal=fatal, default=None) def _real_extract(self, url): - groups = re.match(self._VALID_URL, url) - video_id = groups.group('id') - display_id = compat_urllib_parse_unquote(groups.group('display_id')) + re_url = re.match(self._VALID_URL, url) + video_id = re_url.group('id') + display_id = compat_urllib_parse_unquote(re_url.group('display_id')) webpage = self._download_webpage(url, video_id) @@ -105,27 +105,70 @@ class AirVuzIE(InfoExtractor): result = re.match(r'https?://cdn\.airvuz\.com/drone-video/(?P.+)/', video_url) if result: mpd_id = result.group('id') - mpd_pattern = 'https://www.airvuz.com/drone-video/%s/dash/%s_dash.mpd' % (mpd_id, mpd_id) try: # Try to get mpd file - mpd_formats = self._extract_mpd_formats(mpd_pattern, video_id, fatal=True) + mpd_formats = self._extract_mpd_formats('https://www.airvuz.com/drone-video/%s/dash/%s_dash.mpd' % (mpd_id, mpd_id), video_id, fatal=True) + if mpd_formats: # VIDEO-1 has always the highest quality - for format in reversed(mpd_formats): - format["format_id"] = format["format_id"].lower() - formats.append(format) + # Sorts from worst to best + self._sort_formats(mpd_formats) + + # Adapt original audio and video only formats list + a_index = None + for i, format in enumerate(mpd_formats): + if 'AUDIO-1' in format.get('format_id').upper(): + a_index = i + format['acodec'] = '%s@%sk (%s Hz)' % (format.get('acodec'), int_or_none(format.get('tbr')), format.get('asr')) + format['format_note'] = 'tiny ' + format['asr'] = None + format['container'] = None + else: + format['format_note'] = '%sp ' % (format.get('height')) + # reject video only formats priority, otherwise it gets picked up when format is not specified + format['acodec'] = 'video only' + + # Confirm audio track was found + if a_index is None: + raise KeyError('Unable to extract audio data') + else: + a_format = mpd_formats[a_index] + del mpd_formats[a_index] + + formats.append(a_format) + formats.extend(mpd_formats) + + # Attach video+audio to the available formats + count = len(mpd_formats) + for avf in copy.deepcopy(mpd_formats): + # Replace URL to CDN containing whole media + file_baseurl = url_basename(avf.get('url')) + file_url = urljoin(result[0], file_baseurl) + if avf.get('ext'): + avf['url'] = replace_extension(file_url, avf.get('ext')) + else: + avf['url'] = replace_extension(file_url, determine_ext(avf.get('url'))) + + avf['format_id'] = 'av-%s' % count + avf['acodec'] = a_format.get('acodec') + avf['format_note'] = '%sp ' % (avf.get('height')) + avf['container'] = None + + formats.append(avf) + count -= 1 mpd_info = True - except ExtractorError: + except (KeyError, ExtractorError): + # ExtractorError can occur if dash file is not available, in that case we proceed to the other extraction methods pass if mpd_info is False: try: # Some videos don't have MPD information - # Use undocumented API to get the formats - meta = self._download_json('https://www.airvuz.com/api/videos/%s?type=dynamic' % video_id, video_id, fatal=True) + # Use API to get the formats + meta = self._download_json('https://www.airvuz.com/api/videos/%s' % video_id, video_id, fatal=True) if meta: info_res = meta.get('data') @@ -133,7 +176,6 @@ class AirVuzIE(InfoExtractor): video_url = res.get('src') if not video_url: continue - # URL is a relative path video_url = 'https://www.airvuz.com/%s' % video_url @@ -143,8 +185,8 @@ class AirVuzIE(InfoExtractor): }) except ExtractorError: - # Fallback to original video - self.report_warning('Unable to extract formats') + # Fallback to og original video + self.report_warning('Unable to extract formats from JSON') self.to_screen('%s: Extracting original video' % video_id) if video_url: