From ed10b1cfea328517d91cfeecbf641b8eefaac7f0 Mon Sep 17 00:00:00 2001 From: Christian Pointner Date: Sun, 17 Jul 2016 22:29:55 +0200 Subject: [PATCH 1/8] [extractor] added info extractor for CBA (https://cba.fro.at) --- youtube_dl/extractor/cba.py | 105 +++++++++++++++++++++++++++++ youtube_dl/extractor/extractors.py | 1 + youtube_dl/utils.py | 8 +++ 3 files changed, 114 insertions(+) create mode 100644 youtube_dl/extractor/cba.py diff --git a/youtube_dl/extractor/cba.py b/youtube_dl/extractor/cba.py new file mode 100644 index 000000000..dfd30767e --- /dev/null +++ b/youtube_dl/extractor/cba.py @@ -0,0 +1,105 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import datetime +import os + +from .common import InfoExtractor +from ..utils import ( + clean_html, + ExtractorError, + strip_bom_utf8, + RegexNotFoundError, + UnavailableVideoError, + update_url_query, +) + +class CBAIE(InfoExtractor): + IE_NAME = 'cba' + IE_DESC = 'cultural broadcasting archive' + _VALID_URL = r'https?://(?:www\.)?cba\.fro\.at/(?P[0-9]+)' + _TEST = { + 'url': 'https://cba.fro.at/320619', + 'md5': 'e40379688fcc5e95d6d8a482bb665b02', + 'info_dict': { + 'id': '320619', + 'ext': 'mp3', + 'title': 'Radio Netwatcher Classics vom 15.7.2016 – Peter Pilz, Sicherheitssprecher Grüne über die nationale Entwicklung zum Überwachungsstaat', + 'url': 'https://cba.fro.at/wp-content/uploads/radio_netwatcher/netwatcher-20160715.mp3', + } + } + _FORMATS = { + 'audio/ogg': {'id': '1', 'ext': 'ogg', 'preference': 100}, + 'audio/mpeg': {'id': '2', 'ext': 'mp3', 'preference': 50} + } + _API_KEY = None + + def __init__(self, *args, **kwargs): + try: + self._API_KEY = os.environ["CBA_API_KEY"] + except KeyError: + pass + + def _add_optional_parameter(self, formats, name, data, key, convert=None): + try: + param = data[key] + if convert: + param = convert(param) + formats[name] = param + except KeyError: + pass + + def _real_extract(self, url): + video_id = self._match_id(url) + api_posts_url = "https://cba.fro.at/wp-json/wp/v2/posts/%s" % video_id + api_media_url = "https://cba.fro.at/wp-json/wp/v2/media?media_type=audio&parent=%s" % video_id + + title = 'unknown' + description = '' + formats = [] + + posts_result = self._download_json(api_posts_url, video_id, 'query posts api-endpoint', + 'unable to query posts api-endpoint', transform_source=strip_bom_utf8) + try: + title = clean_html(posts_result['title']['rendered']) + description = clean_html(posts_result['content']['rendered']) + except KeyError: + pass + + api_key_str = " (without API_KEY)" + if self._API_KEY: + api_key_str = " (using API_KEY '%s')" % self._API_KEY + api_media_url = update_url_query(api_media_url, {'c': self._API_KEY}) + + media_result = self._download_json(api_media_url, video_id, 'query media api-endpoint%s' % api_key_str, + 'unable to qeury media api-endpoint%s' % api_key_str, transform_source=strip_bom_utf8) + for media in media_result: + try: + url = media['source_url'] + if url == "": + continue + + ft = media['mime_type'] + f = { 'url': url, 'format': ft, 'format_id': self._FORMATS[ft]['id'], 'preference': self._FORMATS[ft]['preference'] } + self._add_optional_parameter(f, 'filesize', media['media_details'], 'filesize') + self._add_optional_parameter(f, 'abr', media['media_details'], 'bitrate', lambda x: x/1000) + self._add_optional_parameter(f, 'asr', media['media_details'], 'sample_rate') + + formats.append(f) + except KeyError: + pass + + if len(formats) == 0: + if self._API_KEY: + raise ExtractorError('unable to fetch CBA entry') + else: + raise UnavailableVideoError('you may need an API key to download copyright protected files') + + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': title, + 'description': description, + 'formats': formats, + } diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 21efa96b2..e05147f9a 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -128,6 +128,7 @@ from .carambatv import ( CarambaTVPageIE, ) from .cartoonnetwork import CartoonNetworkIE +from .cba import CBAIE from .cbc import ( CBCIE, CBCPlayerIE, diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 1091f17f3..1d3d6600c 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -3121,3 +3121,11 @@ def decode_png(png_data): current_row.append(color) return width, height, pixels + + +def strip_bom_utf8(s): + BOM_UTF8 = u'\ufeff' + if s.startswith(BOM_UTF8): + return s[len(BOM_UTF8):] + + return s From 408716467827680eab0ae1d1ef9f027d3b59e9dc Mon Sep 17 00:00:00 2001 From: Christian Pointner Date: Thu, 1 Sep 2016 19:59:56 +0200 Subject: [PATCH 2/8] added missing description field in test vector --- youtube_dl/extractor/cba.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/cba.py b/youtube_dl/extractor/cba.py index dfd30767e..c87171e2b 100644 --- a/youtube_dl/extractor/cba.py +++ b/youtube_dl/extractor/cba.py @@ -26,6 +26,7 @@ class CBAIE(InfoExtractor): 'ext': 'mp3', 'title': 'Radio Netwatcher Classics vom 15.7.2016 – Peter Pilz, Sicherheitssprecher Grüne über die nationale Entwicklung zum Überwachungsstaat', 'url': 'https://cba.fro.at/wp-content/uploads/radio_netwatcher/netwatcher-20160715.mp3', + 'description': 'Peter Pilz von den Grünen zu Gast in Radio Netwatcher 2008\nRadio Netwatcher Classics 2016 – Das Sommerprogramm in deinem Freien Radio\nPeter Pilz über nationale Entwicklung zum Polizeistaat zu Gast in Radio Netwatcher 2008\nqtalk v. 29.1.2008 Thema:“Österreich auf dem Weg in den Polizeistaat?“\nDas neue SicherheitsPolizeiGesetz aus Sicht von NR Peter Pilz (Die Grünen)\nIm Biedermeier setzte Metternich durch die Karlsbader Beschlüsse von 1819 eine strenge Zensur und eine starke Einschränkung jeglicher politischer Betätigung durch. In einem mehr als fragwürdigen Eilverfahren wurden die Beschlüsse vom Bundestag in Frankfurt einstimmig bestätigt, obwohl sie tief in die Rechte der Einzelstaaten des Deutschen Bundes eingriffen. Erst mit der bürgerlichen Märzrevolution von 1848 gelang es, sich aus einem System von Verfolgung und Zensur durch die Polizei zu befreien.\nDie jüngsten Ereignisse in Österreich und der EU zeigen beängstigende Parallelen. In einer überfallartigen Übernacht-Aktion beschließt der Nationalrat gegen Mitternacht des 6. Dezember 2007 die Novelle zum Sicherheitspolizeigesetz – ohne die in Österreich üblichen Begutachtungen von neuen Gesetzen und ohne Vorlage beim Innenausschuss des Parlamentes.\nAusgehehelt wurde mit dieser Novelle auch das Prinzip der österreichischen Gewaltenteilung: Erstmalig ist es der Polizei ohne richterlichen Befehl gestattet den aktuellen Standort unserer Mobiltelefonen abzufragen, einen IMSI-Catcher einzusetzen (und damit unsere Handytelefonate mitzuhören) und von den Netzbetreibern Auskunft über dynamische IP-Adressen zu erzwingen (und damit unsere private Internetnutzung zu erfragen).\nDie massive und dabei noch stetige Ausweitung der Polizeibefugnisse durch das Sicherheitspolizeigesetz öffnet die Tür in den Überwachungsstaat. Immer umfassender wollen uns Polizei und Nachrichtendienste kontrollieren: durch Maßnahmen wie Lauschangriff, Rasterfahndung, Bundestrojaner, Bildungsevidenz, Videoüberwachung, Fingerabdrücke, Genmusterabdrücke, Vorratsdatenspeicherung und IMSI-Catcher.\nWir sind der Überzeugung, dass nicht alles zulässig sein darf, was technisch möglich ist. Verfassung, Justiz und Polizei haben eine gemeinsame Aufgabe: Uns und unsere Freiheit zu schützen. Immer öfter wächst aber aus vermeintlichem Schutz eine Bedrohung heran. Und immer öfter geht eine schrankenlose Überwachung auf Kosten unserer Freiheit und Demokratie. Denn Menschen, die sich überwacht fühlen, sind nicht mehr bereit eine eigene Meinung zu äußern.\nPeter Pilz von den Grünen hat eine Petition – Gegen die Ausweitung der polizeilichen Überwachung – initiert: „Wir erwarten vom österreichischen Nationalrat Sorgfalt und Verantwortungsbewusstsein im Umgang mit den Grundrechten der Menschen und der Verfassung der Republik.“\nPetition der Grünen: http://www.ueberwachungsstaat.at/\nWenn einseitige Sicherheitspolitik die Freiheit gefährdet, ist es Zeit die Freiheit vor der Sicherheitspolitik zu schützen.\nhttp://www.quintessenz.at/d/000100004175\nMit der Veranstaltungsreihe q/talk lädt die q/uintessenz zu Fachvorträgen über die Themen Bürgerrechte und neue Technologien monatlich ins MQ Wien. http://www.quintessenz.at/\nqTalk by q/uintessenz\nBeteiligte:\nRadio Netwatcher – Redaktionsteam (Verfasser/in oder Urheber/in)\nQuelle: https://cba.fro.at/72100\n \nPlaylist / Bonustrack: Rockit Gaming – POKEMON GO RAP SONG' } } _FORMATS = { From 7f4b4650b0d243c42436eadc57277a4a229e0929 Mon Sep 17 00:00:00 2001 From: Christian Pointner Date: Thu, 1 Sep 2016 20:24:37 +0200 Subject: [PATCH 3/8] try to get Encoding from Unicode BOM and strip it from downloaded webpage --- youtube_dl/extractor/cba.py | 6 ++---- youtube_dl/extractor/common.py | 5 +++++ youtube_dl/utils.py | 28 ++++++++++++++-------------- 3 files changed, 21 insertions(+), 18 deletions(-) diff --git a/youtube_dl/extractor/cba.py b/youtube_dl/extractor/cba.py index c87171e2b..f291b2da4 100644 --- a/youtube_dl/extractor/cba.py +++ b/youtube_dl/extractor/cba.py @@ -8,7 +8,6 @@ from .common import InfoExtractor from ..utils import ( clean_html, ExtractorError, - strip_bom_utf8, RegexNotFoundError, UnavailableVideoError, update_url_query, @@ -59,8 +58,7 @@ class CBAIE(InfoExtractor): description = '' formats = [] - posts_result = self._download_json(api_posts_url, video_id, 'query posts api-endpoint', - 'unable to query posts api-endpoint', transform_source=strip_bom_utf8) + posts_result = self._download_json(api_posts_url, video_id, 'query posts api-endpoint', 'unable to query posts api-endpoint') try: title = clean_html(posts_result['title']['rendered']) description = clean_html(posts_result['content']['rendered']) @@ -73,7 +71,7 @@ class CBAIE(InfoExtractor): api_media_url = update_url_query(api_media_url, {'c': self._API_KEY}) media_result = self._download_json(api_media_url, video_id, 'query media api-endpoint%s' % api_key_str, - 'unable to qeury media api-endpoint%s' % api_key_str, transform_source=strip_bom_utf8) + 'unable to qeury media api-endpoint%s' % api_key_str) for media in media_result: try: url = media['source_url'] diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index da0af29ec..f46857523 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -57,6 +57,7 @@ from ..utils import ( parse_m3u8_attributes, extract_attributes, parse_codecs, + parse_strip_bom, ) @@ -438,6 +439,10 @@ class InfoExtractor(object): def _webpage_read_content(self, urlh, url_or_request, video_id, note=None, errnote=None, fatal=True, prefix=None, encoding=None): content_type = urlh.headers.get('Content-Type', '') webpage_bytes = urlh.read() + webpage_bytes, bom_enc = parse_strip_bom(webpage_bytes) + if not encoding: + encoding = bom_enc + if prefix is not None: webpage_bytes = prefix + webpage_bytes if not encoding: diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 1d3d6600c..3dea36635 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -2235,8 +2235,8 @@ def age_restricted(content_limit, age_limit): return age_limit < content_limit -def is_html(first_bytes): - """ Detect whether a file contains HTML by examining its first bytes. """ +def parse_strip_bom(data): + """ try to find Unicode BOM and strip it. """ BOMS = [ (b'\xef\xbb\xbf', 'utf-8'), @@ -2246,12 +2246,20 @@ def is_html(first_bytes): (b'\xfe\xff', 'utf-16-be'), ] for bom, enc in BOMS: - if first_bytes.startswith(bom): - s = first_bytes[len(bom):].decode(enc, 'replace') - break + if data.startswith(bom): + return data[len(bom):], enc else: - s = first_bytes.decode('utf-8', 'replace') + return data, None + +def is_html(first_bytes): + """ Detect whether a file contains HTML by examining its first bytes. """ + + first_bytes, enc = parse_strip_bom(first_bytes) + if enc == None: + enc = 'utf-8' + + s = first_bytes.decode(enc, 'replace') return re.match(r'^\s*<', s) @@ -3121,11 +3129,3 @@ def decode_png(png_data): current_row.append(color) return width, height, pixels - - -def strip_bom_utf8(s): - BOM_UTF8 = u'\ufeff' - if s.startswith(BOM_UTF8): - return s[len(BOM_UTF8):] - - return s From 2ac405397e8265aede36aa3d5cdd54fde2b55449 Mon Sep 17 00:00:00 2001 From: Christian Pointner Date: Fri, 2 Sep 2016 00:03:04 +0200 Subject: [PATCH 4/8] fixed some violations of the coding conventions (more to come) --- youtube_dl/extractor/cba.py | 38 +++++++++++++++++++------------------ 1 file changed, 20 insertions(+), 18 deletions(-) diff --git a/youtube_dl/extractor/cba.py b/youtube_dl/extractor/cba.py index f291b2da4..99cc4b158 100644 --- a/youtube_dl/extractor/cba.py +++ b/youtube_dl/extractor/cba.py @@ -11,6 +11,7 @@ from ..utils import ( RegexNotFoundError, UnavailableVideoError, update_url_query, + int_or_none, ) class CBAIE(InfoExtractor): @@ -40,30 +41,29 @@ class CBAIE(InfoExtractor): except KeyError: pass - def _add_optional_parameter(self, formats, name, data, key, convert=None): - try: - param = data[key] - if convert: - param = convert(param) - formats[name] = param - except KeyError: - pass - def _real_extract(self, url): video_id = self._match_id(url) api_posts_url = "https://cba.fro.at/wp-json/wp/v2/posts/%s" % video_id api_media_url = "https://cba.fro.at/wp-json/wp/v2/media?media_type=audio&parent=%s" % video_id - title = 'unknown' + title = '' description = '' formats = [] posts_result = self._download_json(api_posts_url, video_id, 'query posts api-endpoint', 'unable to query posts api-endpoint') try: - title = clean_html(posts_result['title']['rendered']) - description = clean_html(posts_result['content']['rendered']) + title = posts_result['title']['raw'] except KeyError: - pass + title = clean_html(posts_result['title']['rendered']) + + try: + description = posts_result['content']['raw'] + except KeyError: + try: + description = clean_html(posts_result['content']['rendered']) + except KeyError: + pass + api_key_str = " (without API_KEY)" if self._API_KEY: @@ -75,20 +75,22 @@ class CBAIE(InfoExtractor): for media in media_result: try: url = media['source_url'] - if url == "": + if not url: continue ft = media['mime_type'] f = { 'url': url, 'format': ft, 'format_id': self._FORMATS[ft]['id'], 'preference': self._FORMATS[ft]['preference'] } - self._add_optional_parameter(f, 'filesize', media['media_details'], 'filesize') - self._add_optional_parameter(f, 'abr', media['media_details'], 'bitrate', lambda x: x/1000) - self._add_optional_parameter(f, 'asr', media['media_details'], 'sample_rate') + media_details = media.get('media_details') + if media_details: + f['filesize'] = int_or_none(media_details.get('filesize')) + f['abr'] = int_or_none(media_details.get('bitrate'), 1000) + f['asr'] = int_or_none(media_details.get('sample_rate')) formats.append(f) except KeyError: pass - if len(formats) == 0: + if not formats: if self._API_KEY: raise ExtractorError('unable to fetch CBA entry') else: From a72b74c6f9bc26fd3f9c18e6b5ff5cefebc3caf4 Mon Sep 17 00:00:00 2001 From: Christian Pointner Date: Fri, 2 Sep 2016 01:36:46 +0200 Subject: [PATCH 5/8] reverted commit 7f4b465 in favor of much easier fix: using utf-8-sig for encoding --- youtube_dl/extractor/cba.py | 5 +++-- youtube_dl/extractor/common.py | 5 ----- youtube_dl/utils.py | 20 ++++++-------------- 3 files changed, 9 insertions(+), 21 deletions(-) diff --git a/youtube_dl/extractor/cba.py b/youtube_dl/extractor/cba.py index 99cc4b158..def68f802 100644 --- a/youtube_dl/extractor/cba.py +++ b/youtube_dl/extractor/cba.py @@ -50,7 +50,8 @@ class CBAIE(InfoExtractor): description = '' formats = [] - posts_result = self._download_json(api_posts_url, video_id, 'query posts api-endpoint', 'unable to query posts api-endpoint') + posts_result = self._download_json(api_posts_url, video_id, 'query posts api-endpoint', + 'unable to query posts api-endpoint', encoding='utf-8-sig') try: title = posts_result['title']['raw'] except KeyError: @@ -71,7 +72,7 @@ class CBAIE(InfoExtractor): api_media_url = update_url_query(api_media_url, {'c': self._API_KEY}) media_result = self._download_json(api_media_url, video_id, 'query media api-endpoint%s' % api_key_str, - 'unable to qeury media api-endpoint%s' % api_key_str) + 'unable to qeury media api-endpoint%s' % api_key_str, encoding='utf-8-sig') for media in media_result: try: url = media['source_url'] diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index f46857523..da0af29ec 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -57,7 +57,6 @@ from ..utils import ( parse_m3u8_attributes, extract_attributes, parse_codecs, - parse_strip_bom, ) @@ -439,10 +438,6 @@ class InfoExtractor(object): def _webpage_read_content(self, urlh, url_or_request, video_id, note=None, errnote=None, fatal=True, prefix=None, encoding=None): content_type = urlh.headers.get('Content-Type', '') webpage_bytes = urlh.read() - webpage_bytes, bom_enc = parse_strip_bom(webpage_bytes) - if not encoding: - encoding = bom_enc - if prefix is not None: webpage_bytes = prefix + webpage_bytes if not encoding: diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 3dea36635..1091f17f3 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -2235,8 +2235,8 @@ def age_restricted(content_limit, age_limit): return age_limit < content_limit -def parse_strip_bom(data): - """ try to find Unicode BOM and strip it. """ +def is_html(first_bytes): + """ Detect whether a file contains HTML by examining its first bytes. """ BOMS = [ (b'\xef\xbb\xbf', 'utf-8'), @@ -2246,20 +2246,12 @@ def parse_strip_bom(data): (b'\xfe\xff', 'utf-16-be'), ] for bom, enc in BOMS: - if data.startswith(bom): - return data[len(bom):], enc + if first_bytes.startswith(bom): + s = first_bytes[len(bom):].decode(enc, 'replace') + break else: - return data, None + s = first_bytes.decode('utf-8', 'replace') - -def is_html(first_bytes): - """ Detect whether a file contains HTML by examining its first bytes. """ - - first_bytes, enc = parse_strip_bom(first_bytes) - if enc == None: - enc = 'utf-8' - - s = first_bytes.decode(enc, 'replace') return re.match(r'^\s*<', s) From fc75a2d468ed7eb49e5c38bcbb0f363ded2e9427 Mon Sep 17 00:00:00 2001 From: Christian Pointner Date: Fri, 2 Sep 2016 02:36:50 +0200 Subject: [PATCH 6/8] use --username/--password, --netrc for API Key --- youtube_dl/extractor/cba.py | 26 ++++++++++---------------- 1 file changed, 10 insertions(+), 16 deletions(-) diff --git a/youtube_dl/extractor/cba.py b/youtube_dl/extractor/cba.py index def68f802..86d4e2a05 100644 --- a/youtube_dl/extractor/cba.py +++ b/youtube_dl/extractor/cba.py @@ -33,13 +33,7 @@ class CBAIE(InfoExtractor): 'audio/ogg': {'id': '1', 'ext': 'ogg', 'preference': 100}, 'audio/mpeg': {'id': '2', 'ext': 'mp3', 'preference': 50} } - _API_KEY = None - - def __init__(self, *args, **kwargs): - try: - self._API_KEY = os.environ["CBA_API_KEY"] - except KeyError: - pass + _NETRC_MACHINE = 'cba' def _real_extract(self, url): video_id = self._match_id(url) @@ -65,14 +59,14 @@ class CBAIE(InfoExtractor): except KeyError: pass + (_, api_key) = self._get_login_info() + api_key_msg = " (without API_KEY)" + if api_key: + api_key_msg = " (using API_KEY '%s')" % api_key + api_media_url = update_url_query(api_media_url, {'c': api_key}) - api_key_str = " (without API_KEY)" - if self._API_KEY: - api_key_str = " (using API_KEY '%s')" % self._API_KEY - api_media_url = update_url_query(api_media_url, {'c': self._API_KEY}) - - media_result = self._download_json(api_media_url, video_id, 'query media api-endpoint%s' % api_key_str, - 'unable to qeury media api-endpoint%s' % api_key_str, encoding='utf-8-sig') + media_result = self._download_json(api_media_url, video_id, 'query media api-endpoint%s' % api_key_msg, + 'unable to qeury media api-endpoint%s' % api_key_msg, encoding='utf-8-sig') for media in media_result: try: url = media['source_url'] @@ -92,10 +86,10 @@ class CBAIE(InfoExtractor): pass if not formats: - if self._API_KEY: + if api_key: raise ExtractorError('unable to fetch CBA entry') else: - raise UnavailableVideoError('you may need an API key to download copyright protected files') + self.raise_login_required('you need an API key to download copyright protected files') self._sort_formats(formats) From c7eb72b3972b414ae89a10edce1a956d17f8ec7f Mon Sep 17 00:00:00 2001 From: Christian Pointner Date: Fri, 2 Sep 2016 02:51:02 +0200 Subject: [PATCH 7/8] replaced remaining data[key] with data.get(key) --- youtube_dl/extractor/cba.py | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/youtube_dl/extractor/cba.py b/youtube_dl/extractor/cba.py index 86d4e2a05..e0e4a8260 100644 --- a/youtube_dl/extractor/cba.py +++ b/youtube_dl/extractor/cba.py @@ -68,22 +68,22 @@ class CBAIE(InfoExtractor): media_result = self._download_json(api_media_url, video_id, 'query media api-endpoint%s' % api_key_msg, 'unable to qeury media api-endpoint%s' % api_key_msg, encoding='utf-8-sig') for media in media_result: - try: - url = media['source_url'] - if not url: - continue + url = media.get('source_url') + if not url: + continue - ft = media['mime_type'] - f = { 'url': url, 'format': ft, 'format_id': self._FORMATS[ft]['id'], 'preference': self._FORMATS[ft]['preference'] } - media_details = media.get('media_details') - if media_details: - f['filesize'] = int_or_none(media_details.get('filesize')) - f['abr'] = int_or_none(media_details.get('bitrate'), 1000) - f['asr'] = int_or_none(media_details.get('sample_rate')) + ft = media.get('mime_type') + if not ft or not self._FORMATS.get(ft): + continue - formats.append(f) - except KeyError: - pass + f = { 'url': url, 'format': ft, 'format_id': self._FORMATS[ft]['id'], 'preference': self._FORMATS[ft]['preference'] } + media_details = media.get('media_details') + if media_details: + f['filesize'] = int_or_none(media_details.get('filesize')) + f['abr'] = int_or_none(media_details.get('bitrate'), 1000) + f['asr'] = int_or_none(media_details.get('sample_rate')) + + formats.append(f) if not formats: if api_key: From fb579823d22bb0c5de024be0221c10654b7ffd38 Mon Sep 17 00:00:00 2001 From: Christian Pointner Date: Sat, 3 Sep 2016 01:43:21 +0200 Subject: [PATCH 8/8] added fallback to preview player in case of copyrighted material without API key This needs PR #10540 to work. --- youtube_dl/extractor/cba.py | 47 +++++++++++++++++++++++++++++-------- 1 file changed, 37 insertions(+), 10 deletions(-) diff --git a/youtube_dl/extractor/cba.py b/youtube_dl/extractor/cba.py index e0e4a8260..1c4fdc138 100644 --- a/youtube_dl/extractor/cba.py +++ b/youtube_dl/extractor/cba.py @@ -35,8 +35,8 @@ class CBAIE(InfoExtractor): } _NETRC_MACHINE = 'cba' - def _real_extract(self, url): - video_id = self._match_id(url) + + def _try_wp_api(self, video_id, api_key): api_posts_url = "https://cba.fro.at/wp-json/wp/v2/posts/%s" % video_id api_media_url = "https://cba.fro.at/wp-json/wp/v2/media?media_type=audio&parent=%s" % video_id @@ -59,7 +59,6 @@ class CBAIE(InfoExtractor): except KeyError: pass - (_, api_key) = self._get_login_info() api_key_msg = " (without API_KEY)" if api_key: api_key_msg = " (using API_KEY '%s')" % api_key @@ -85,13 +84,8 @@ class CBAIE(InfoExtractor): formats.append(f) - if not formats: - if api_key: - raise ExtractorError('unable to fetch CBA entry') - else: - self.raise_login_required('you need an API key to download copyright protected files') - - self._sort_formats(formats) + if formats: + self._sort_formats(formats) return { 'id': video_id, @@ -99,3 +93,36 @@ class CBAIE(InfoExtractor): 'description': description, 'formats': formats, } + + + def _try_preview_player(self, video_id): + url = "https://cba.fro.at/%s" % video_id + webpage = self._download_webpage(url, video_id, "fetch preview player", "unable to fetch preview player") + + info_dict = self._parse_html5_media_entries(url, webpage, video_id)[0] + + self._sort_formats(info_dict['formats']) + self._remove_duplicate_formats(info_dict['formats']) + + info_dict.update({ + 'id': video_id, + 'title': self._og_search_title(webpage), + 'description': self._og_search_description(webpage), + }) + + return info_dict + + + def _real_extract(self, url): + video_id = self._match_id(url) + + (_, api_key) = self._get_login_info() + info = self._try_wp_api(video_id, api_key) + + if not info.get('formats'): + if api_key: + raise ExtractorError('unable to fetch CBA entry from API') + else: + return self._try_preview_player(video_id) + + return info