Extractor for npo.nl programs (as opposed to episodes of programs).

Retrieves only the most recent episodes of the program in question (hence the name). Some programs have so many episodes available that it doesn't make any practical sense to retrieve all, as discussed in issue #7947.
2017-04-03 17:49:50 +02:00 · 2017-04-03 17:49:50 +02:00 · d53104f923
commit d53104f923
parent b022f4f600
3 changed files with 118 additions and 0 deletions
--- a/testnporecent.ps1
+++ b/testnporecent.ps1
@ -0,0 +1,43 @@
 Describe 'Flake8' {
    It 'Does not return any errors' {
        & flake8 /Users/jhoek/GitHub/youtube-dl/youtube_dl/extractor/npo.py | Should BeNullOrEmpty
    }
 }
 Describe 'Tests' {
    It 'Should work in Python 2.6' {
        & 'python2.6' '--version' 2>&1 | Should Be 'Python 2.6.9'
        '', '_1', '_2' | ForEach-Object {
            & 'python2.6' /Users/jhoek/GitHub/youtube-dl/test/test_download.py "TestDownload.test_NPORecents$($_)" 2>&1 
            $LASTEXITCODE | Should Be 0
        }
    }
    It 'Should work in Python 2.7' {
        & python '--version' 2>&1 | Should Be 'Python 2.7.13'
        '', '_1', '_2' | ForEach-Object {
            & python /Users/jhoek/GitHub/youtube-dl/test/test_download.py "TestDownload.test_NPORecents$($_)" 2>&1 
            $LASTEXITCODE | Should Be 0
        }
    }
    It 'Should work in Python 3.5' {
        & python3 '--version' | Should Be 'Python 3.5.2'
        '', '_1', '_2' | ForEach-Object {
            & python3 /Users/jhoek/GitHub/youtube-dl/test/test_download.py "TestDownload.test_NPORecents$($_)" 2>&1 
            $LASTEXITCODE | Should Be 0
        }
    }
    It 'Should work in Python 3.6' {
        & python3.6 '--version' | Should Be 'Python 3.6.1'
        '', '_1', '_2' | ForEach-Object {
            & 'python3.6' /Users/jhoek/GitHub/youtube-dl/test/test_download.py "TestDownload.test_NPORecents$($_)" 2>&1 
            $LASTEXITCODE | Should Be 0
        }
    }
 }
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@ -686,6 +686,7 @@ from .npo import (
    NPORadioFragmentIE,
    SchoolTVIE,
    HetKlokhuisIE,
    NPORecentsIE,
    VPROIE,
    WNLIE,
 )
--- a/youtube_dl/extractor/npo.py
+++ b/youtube_dl/extractor/npo.py
@ -478,6 +478,80 @@ class HetKlokhuisIE(NPODataMidEmbedIE):
    }
 class NPORecentsIE(NPOIE):
    IE_Name = 'npo:recents'
    npo12_regex = r"""<div class='span4'>\s*<div class='image-container'>\s*<a href="(.*?)">\s*(<div class="program-not-available">)?"""
    npo3_regex = r"""<div class='span4 image'>\s*<a href="(.*?)">\s*<div class="meta-container">\s*<div class="meta first">\s*<div class="md-label"><span class="npo-glyph triangle-right"></span></div>\s*<div class="md-value">.*?</div>\s*</div>\s*</div>\s*(<div class="program-not-available">)?"""
    _VALID_URL = r'(?:https?://)?(?:www\.)?npo\.nl/(?P<alt_id>[^/]+)/(?P<program_id>\w+_\d+)'
    _TESTS = [{
        # Example of an npo3 program
        'url': 'https://www.npo.nl/keuringsdienst-van-waarde/KN_1678993',
        'info_dict': {
            'title': 'Keuringsdienst van Waarde',
            'id': 'KN_1678993',
            'description': 'md5:5ffaf131f175d8a771e7a7884833dad2'
        },
        'playlist_mincount': 8
    }, {
        # Example of an npo1/npo2 program
        'url': 'https://www.npo.nl/jinek/KN_1676589',
        'info_dict': {
            'title': 'Jinek',
            'id': 'KN_1676589',
            'description': 'md5:6998986899b4903395f0cdd0670cedaf'
        },
        'playlist_mincount': 8
    }, {
        # Example of a program for which there will be only one available episode (if any)
        'url': 'https://www.npo.nl/midsomer-murders/POW_00828660',
        'info_dict': {
            'title': 'Midsomer murders',
            'id': 'POW_00828660',
            'description': 'md5:a8b6e9d3e3bd367be88766e3ce8e8362'
        },
        'playlist_maxcount': 1
    }]
    def _extract_entries(self, webpage, program_id, program_url):
        is_npo3 = 'www-assets.npo.nl/uploads/tv_channel/265/logo/smaller_npo3-logo.png' in webpage
        if is_npo3:
            episodes_url = '%s//search?category=broadcasts&page=1' % program_url
            regex = self.npo3_regex
        else:
            episodes_url = '%s/search?media_type=broadcast&start=0&rows=8' % program_url
            regex = self.npo12_regex
        episodes = self._download_webpage(episodes_url, program_id, note='Retrieving episodes')
        for match in re.finditer(regex, episodes):
            url = match.group(1)
            available = match.group(2) is None
            if available:
                yield self.url_result(
                    url='http://npo.nl%s' % url,
                    video_title=self._og_search_title(webpage))
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        alt_id = mobj.group('alt_id')
        program_id = mobj.group('program_id')
        webpage = self._download_webpage(url, program_id)
        title = self._og_search_title(webpage, fatal=False) or alt_id
        description = self._og_search_description(webpage) or self._html_search_meta('description', webpage, 'description', fatal=False)
        entries = self._extract_entries(webpage, program_id, url)
        return {
            '_type': 'playlist',
            'id': program_id,
            'display_id': alt_id,
            'title': title,
            'description': description,
            'entries': entries
        }
 class NPOPlaylistBaseIE(NPOIE):
    def _real_extract(self, url):
        playlist_id = self._match_id(url)