Made corrections after review; merged into npo.py
This commit is contained in:
parent
503acf8c87
commit
f106284a5e
@ -683,10 +683,10 @@ from .npo import (
|
||||
NPORadioFragmentIE,
|
||||
SchoolTVIE,
|
||||
HetKlokhuisIE,
|
||||
NPORecentsIE,
|
||||
VPROIE,
|
||||
WNLIE,
|
||||
)
|
||||
from .nporecents import NPORecentsIE
|
||||
from .npr import NprIE
|
||||
from .nrk import (
|
||||
NRKIE,
|
||||
|
@ -1,6 +1,7 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import xml.etree.ElementTree as ET
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
@ -477,7 +478,89 @@ class HetKlokhuisIE(NPODataMidEmbedIE):
|
||||
}
|
||||
}
|
||||
|
||||
class NPORecentsIE(NPOIE):
|
||||
IE_Name = 'npo:recents'
|
||||
_VALID_URL = r'(?:https?://)?(?:www\.)?npo\.nl/(?P<alt_id>[^/]+)/(?P<program_id>\w+_\d+)'
|
||||
_TESTS = [
|
||||
{
|
||||
# Example of an npo3 program
|
||||
'url': 'https://www.npo.nl/keuringsdienst-van-waarde/KN_1678993',
|
||||
'info_dict': {
|
||||
'title': 'Keuringsdienst van Waarde',
|
||||
'id': 'KN_1678993',
|
||||
'description': u'md5:5ffaf131f175d8a771e7a7884833dad2'
|
||||
},
|
||||
'playlist_mincount': 8
|
||||
},
|
||||
{
|
||||
# Example of an npo1/npo2 program
|
||||
'url': 'https://www.npo.nl/jinek/KN_1676589',
|
||||
'info_dict': {
|
||||
'title': 'Jinek',
|
||||
'id': 'KN_1676589',
|
||||
'description': u'md5:6998986899b4903395f0cdd0670cedaf'
|
||||
},
|
||||
'playlist_mincount': 8
|
||||
},
|
||||
{
|
||||
# Example of a program for which there will be only one available episode (if any)
|
||||
'url': 'https://www.npo.nl/midsomer-murders/POW_00828660',
|
||||
'info_dict': {
|
||||
'title': 'Midsomer murders',
|
||||
'id': 'POW_00828660',
|
||||
'description': u'md5:a8b6e9d3e3bd367be88766e3ce8e8362'
|
||||
},
|
||||
'playlist_maxcount': 1
|
||||
}
|
||||
]
|
||||
|
||||
def _extract_entries(self, webpage, program_id, program_url):
|
||||
is_npo3 = 'www-assets.npo.nl/uploads/tv_channel/265/logo/smaller_npo3-logo.png' in webpage
|
||||
|
||||
if is_npo3:
|
||||
episodes_url = '%s//search?category=broadcasts&page=1' % program_url
|
||||
else:
|
||||
episodes_url = '%s/search?media_type=broadcast&start=0&rows=8' % program_url
|
||||
|
||||
episodes = self._download_webpage(
|
||||
episodes_url, program_id, note='Retrieving episodes')
|
||||
tree = ET.fromstring(episodes.encode('utf-8'))
|
||||
for element in tree.findall('.//div'):
|
||||
if 'span4' in element.get('class'):
|
||||
hyperlink = element.find('.//a')
|
||||
|
||||
# Note: ElementTree in Python 2.6+ doesn't support
|
||||
# the required XPath constructs
|
||||
inactive = False
|
||||
divs = hyperlink.findall('div')
|
||||
for div in divs:
|
||||
if div.attrib.get('class') == 'program-not-available':
|
||||
inactive = True
|
||||
|
||||
if not inactive:
|
||||
yield self.url_result(
|
||||
url='http://npo.nl%s' % hyperlink.get('href'),
|
||||
video_title=self._og_search_title(webpage))
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
alt_id = mobj.group('alt_id')
|
||||
program_id = mobj.group('program_id')
|
||||
webpage = self._download_webpage(url, program_id)
|
||||
title = self._og_search_title(webpage, fatal=False) or alt_id
|
||||
description = self._og_search_description(webpage) or self._html_search_meta('description', webpage, 'description', fatal=False)
|
||||
entries = self._extract_entries(webpage, program_id, url)
|
||||
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
'id': program_id,
|
||||
'display_id': alt_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'entries': entries
|
||||
}
|
||||
|
||||
|
||||
class NPOPlaylistBaseIE(NPOIE):
|
||||
def _real_extract(self, url):
|
||||
playlist_id = self._match_id(url)
|
||||
|
@ -1,60 +0,0 @@
|
||||
#!/usr/bin/python
|
||||
from .common import InfoExtractor
|
||||
|
||||
import re
|
||||
import xml.etree.ElementTree as ET
|
||||
|
||||
|
||||
class NPORecentsIE(InfoExtractor):
|
||||
IE_Name = 'npo:recents'
|
||||
_VALID_URL = r'(?:https?://)?(?:www\.)?npo\.nl/(?P<alt_id>[^/]+)/(?P<program_id>\w+_\d+)'
|
||||
_TEST = {
|
||||
'url': 'https://www.npo.nl/keuringsdienst-van-waarde/KN_1678993',
|
||||
'info_dict': {
|
||||
'title': 'Keuringsdienst van Waarde',
|
||||
'id': 'KN_1678993',
|
||||
'description': 'In dit programma staat centraal wat fabrikanten ons als consumenten vertellen. Klopt het wat ze claimen en wat ze ons in reclames verkopen? Verslaggevers Teun van de Keuken, Sofie van den Enk, Daan Nieber, Ersin Kiris, Marijn Frank en Maarten Remmers nemen de telefoon ter hand en bellen er actief op los. Ze stellen simpele vragen en krijgen de meest verbazingwekkende antwoorden op food, non-food en nieuwsgerelateerde kwesties. Prikkelend, onderzoekend en vasthoudend. Keuringsdienst van Waarde: simpele vragen,verbazingwekkende antwoorden.'
|
||||
},
|
||||
'playlist_mincount': 8
|
||||
}
|
||||
|
||||
def _extract_entries(self, webpage, program_id, program_url):
|
||||
is_npo3 = 'www-assets.npo.nl/uploads/tv_channel/265/logo/smaller_npo3-logo.png' in webpage
|
||||
|
||||
if is_npo3:
|
||||
episodes_url = '{}//search?category=broadcasts&page=1'.format(
|
||||
program_url)
|
||||
else:
|
||||
episodes_url = '{}/search?media_type=broadcast&start=0&rows=8'.format(
|
||||
program_url)
|
||||
|
||||
episodes = self._download_webpage(
|
||||
episodes_url, program_id, note='Retrieving episodes')
|
||||
tree = ET.fromstring(episodes.encode('utf-8'))
|
||||
for element in tree.findall('.//div'):
|
||||
if 'span4' in element.get('class'):
|
||||
hyperlink = element.find('.//a')
|
||||
inactive = hyperlink.find(
|
||||
'./div[@class="program-not-available"]')
|
||||
if inactive is None:
|
||||
yield self.url_result(
|
||||
url='http://npo.nl{}'.format(hyperlink.get('href')),
|
||||
video_title=self._og_search_title(webpage))
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
alt_id = mobj.group('alt_id')
|
||||
program_id = mobj.group('program_id')
|
||||
webpage = self._download_webpage(url, program_id)
|
||||
title = self._og_search_title(webpage)
|
||||
description = self._og_search_description(webpage)
|
||||
entries = self._extract_entries(webpage, program_id, url)
|
||||
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
'id': program_id,
|
||||
'display_id': alt_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'entries': entries
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user