From 29626f0b6a299a9a704dbd041ff230c02def6d2a Mon Sep 17 00:00:00 2001 From: jjatria Date: Wed, 28 Oct 2015 18:31:52 +0000 Subject: [PATCH 1/2] [biobiotv] Add new extractor --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/biobiotv.py | 75 ++++++++++++++++++++++++++++++++ 2 files changed, 76 insertions(+) create mode 100644 youtube_dl/extractor/biobiotv.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index f98e6487e..955771224 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -54,6 +54,7 @@ from .beatportpro import BeatportProIE from .bet import BetIE from .bild import BildIE from .bilibili import BiliBiliIE +from .biobiotv import BioBioTVIE from .blinkx import BlinkxIE from .bliptv import BlipTVIE, BlipTVUserIE from .bloomberg import BloombergIE diff --git a/youtube_dl/extractor/biobiotv.py b/youtube_dl/extractor/biobiotv.py new file mode 100644 index 000000000..aae0588ef --- /dev/null +++ b/youtube_dl/extractor/biobiotv.py @@ -0,0 +1,75 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor + + +class BioBioTVIE(InfoExtractor): + _VALID_URL = r'https?://tv\.biobiochile\.cl/notas/(?P\d{4})/\d{2}/\d{2}/(?P[\w-]+)(?:\.shtml)?' + + _TESTS = [{ + 'url': 'http://tv.biobiochile.cl/notas/2015/10/21/sobre-camaras-y-camarillas-parlamentarias.shtml', + 'md5': '26f51f03cf580265defefb4518faec09', + 'info_dict': { + 'id': 'col_c266', + 'display_id': 'sobre-camaras-y-camarillas-parlamentarias', + 'ext': 'mp4', + 'title': 'Sobre Cámaras y camarillas parlamentarias - BioBioChile TV', + 'thumbnail': 'http://media.biobiochile.cl/wp-content/uploads/2015/10/atria-2010-730x350.jpg', + 'url': 'http://unlimited2-cl.digitalproserver.com/bbtv/2015/col_c266.mp4', + 'uploader': 'Fernando Atria', + } + }, { + 'url': 'http://tv.biobiochile.cl/notas/2015/10/22/ninos-transexuales-de-quien-es-la-decision.shtml', + 'md5': 'a8c868e6b5f6c17d56873d5633204f84', + 'info_dict': { + 'id': 'col_c270', + 'display_id': 'ninos-transexuales-de-quien-es-la-decision', + 'ext': 'mp4', + 'title': 'Niños transexuales: ¿De quién es la decisión? - BioBioChile TV', + 'thumbnail': 'http://media.biobiochile.cl/wp-content/uploads/2015/10/samantha-2210-730x350.jpg', + 'url': 'http://unlimited2-cl.digitalproserver.com/bbtv/2015/col_c270.mp4', + 'uploader': 'Samantha Morán', + } + }, { + 'url': 'http://tv.biobiochile.cl/notas/2015/10/21/exclusivo-hector-pinto-formador-de-chupete-revela-version-del-ex-delantero-albo.shtml', + 'md5': 'c8369b50d42ff0a4f6b969fbd1a7c32d', + 'info_dict': { + 'id': 'Keno_Pinto', + 'display_id': 'exclusivo-hector-pinto-formador-de-chupete-revela-version-del-ex-delantero-albo', + 'ext': 'mp4', + 'title': 'Exclusivo: Héctor Pinto, formador de “Chupete”, revela versión del ex delantero albo - BioBioChile TV', + 'thumbnail': 'http://media.biobiochile.cl/wp-content/uploads/2015/10/pinto-730x350.jpg', + 'url': 'http://unlimited2-cl.digitalproserver.com/bbtv/2015/Keno_Pinto.mp4', + 'uploader': 'Juan Pablo Echenique', + } + }] + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + display_id = mobj.group('id') + year = mobj.group('year') + + webpage = self._download_webpage(url, display_id) + + title = self._html_search_meta( + 'og:title', webpage, 'title', fatal=True) + + thumbnail = self._html_search_meta( + 'og:image', webpage, 'thumbnail', fatal=True) + + video_id = self._html_search_regex( + r'loadFWPlayerVideo\(\"player_0\", \"\d{4}/(.+)\.mp4\"\)', webpage, 'title') + + url = 'http://unlimited2-cl.digitalproserver.com/bbtv/' + year + '/' + video_id + '.mp4' + + return { + 'id': video_id, + 'title': title, + 'url': url, + 'display_id': display_id, + 'thumbnail': thumbnail, + 'uploader': self._search_regex(r'biobiochile\.cl/author[^"]+"[^>]*>([^<]+)<', webpage, 'uploader', fatal=False), + } From e9eb184f3a186f9189d47d84961b2fbca895b4b2 Mon Sep 17 00:00:00 2001 From: jjatria Date: Thu, 21 Jan 2016 23:10:00 +0000 Subject: [PATCH 2/2] Use _og methods for title and thumbnail --- youtube_dl/extractor/biobiotv.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/biobiotv.py b/youtube_dl/extractor/biobiotv.py index aae0588ef..0260b81d9 100644 --- a/youtube_dl/extractor/biobiotv.py +++ b/youtube_dl/extractor/biobiotv.py @@ -54,11 +54,9 @@ class BioBioTVIE(InfoExtractor): webpage = self._download_webpage(url, display_id) - title = self._html_search_meta( - 'og:title', webpage, 'title', fatal=True) + title = self._og_search_title(webpage) - thumbnail = self._html_search_meta( - 'og:image', webpage, 'thumbnail', fatal=True) + thumbnail = self._og_search_thumbnail(webpage) video_id = self._html_search_regex( r'loadFWPlayerVideo\(\"player_0\", \"\d{4}/(.+)\.mp4\"\)', webpage, 'title')