diff --git a/testnporecent.ps1 b/testnporecent.ps1 new file mode 100644 index 000000000..f95778abd --- /dev/null +++ b/testnporecent.ps1 @@ -0,0 +1,43 @@ +Describe 'Flake8' { + It 'Does not return any errors' { + & flake8 /Users/jhoek/GitHub/youtube-dl/youtube_dl/extractor/npo.py | Should BeNullOrEmpty + } +} + +Describe 'Tests' { + It 'Should work in Python 2.6' { + & 'python2.6' '--version' 2>&1 | Should Be 'Python 2.6.9' + + '', '_1', '_2' | ForEach-Object { + & 'python2.6' /Users/jhoek/GitHub/youtube-dl/test/test_download.py "TestDownload.test_NPORecents$($_)" 2>&1 + $LASTEXITCODE | Should Be 0 + } + } + + It 'Should work in Python 2.7' { + & python '--version' 2>&1 | Should Be 'Python 2.7.13' + + '', '_1', '_2' | ForEach-Object { + & python /Users/jhoek/GitHub/youtube-dl/test/test_download.py "TestDownload.test_NPORecents$($_)" 2>&1 + $LASTEXITCODE | Should Be 0 + } + } + + It 'Should work in Python 3.5' { + & python3 '--version' | Should Be 'Python 3.5.2' + + '', '_1', '_2' | ForEach-Object { + & python3 /Users/jhoek/GitHub/youtube-dl/test/test_download.py "TestDownload.test_NPORecents$($_)" 2>&1 + $LASTEXITCODE | Should Be 0 + } + } + + It 'Should work in Python 3.6' { + & python3.6 '--version' | Should Be 'Python 3.6.1' + + '', '_1', '_2' | ForEach-Object { + & 'python3.6' /Users/jhoek/GitHub/youtube-dl/test/test_download.py "TestDownload.test_NPORecents$($_)" 2>&1 + $LASTEXITCODE | Should Be 0 + } + } +} diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 13ca1d2cd..36ff44103 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -686,6 +686,7 @@ from .npo import ( NPORadioFragmentIE, SchoolTVIE, HetKlokhuisIE, + NPORecentsIE, VPROIE, WNLIE, ) diff --git a/youtube_dl/extractor/npo.py b/youtube_dl/extractor/npo.py index 38fefe492..408bbc36d 100644 --- a/youtube_dl/extractor/npo.py +++ b/youtube_dl/extractor/npo.py @@ -478,6 +478,80 @@ class HetKlokhuisIE(NPODataMidEmbedIE): } +class NPORecentsIE(NPOIE): + IE_Name = 'npo:recents' + npo12_regex = r"""
\s*
\s*\s*(
)?""" + npo3_regex = r"""
\s*\s*
\s*
\s*
\s*
.*?
\s*
\s*
\s*(
)?""" + _VALID_URL = r'(?:https?://)?(?:www\.)?npo\.nl/(?P[^/]+)/(?P\w+_\d+)' + _TESTS = [{ + # Example of an npo3 program + 'url': 'https://www.npo.nl/keuringsdienst-van-waarde/KN_1678993', + 'info_dict': { + 'title': 'Keuringsdienst van Waarde', + 'id': 'KN_1678993', + 'description': 'md5:5ffaf131f175d8a771e7a7884833dad2' + }, + 'playlist_mincount': 8 + }, { + # Example of an npo1/npo2 program + 'url': 'https://www.npo.nl/jinek/KN_1676589', + 'info_dict': { + 'title': 'Jinek', + 'id': 'KN_1676589', + 'description': 'md5:6998986899b4903395f0cdd0670cedaf' + }, + 'playlist_mincount': 8 + }, { + # Example of a program for which there will be only one available episode (if any) + 'url': 'https://www.npo.nl/midsomer-murders/POW_00828660', + 'info_dict': { + 'title': 'Midsomer murders', + 'id': 'POW_00828660', + 'description': 'md5:a8b6e9d3e3bd367be88766e3ce8e8362' + }, + 'playlist_maxcount': 1 + }] + + def _extract_entries(self, webpage, program_id, program_url): + is_npo3 = 'www-assets.npo.nl/uploads/tv_channel/265/logo/smaller_npo3-logo.png' in webpage + + if is_npo3: + episodes_url = '%s//search?category=broadcasts&page=1' % program_url + regex = self.npo3_regex + else: + episodes_url = '%s/search?media_type=broadcast&start=0&rows=8' % program_url + regex = self.npo12_regex + + episodes = self._download_webpage(episodes_url, program_id, note='Retrieving episodes') + + for match in re.finditer(regex, episodes): + url = match.group(1) + available = match.group(2) is None + + if available: + yield self.url_result( + url='http://npo.nl%s' % url, + video_title=self._og_search_title(webpage)) + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + alt_id = mobj.group('alt_id') + program_id = mobj.group('program_id') + webpage = self._download_webpage(url, program_id) + title = self._og_search_title(webpage, fatal=False) or alt_id + description = self._og_search_description(webpage) or self._html_search_meta('description', webpage, 'description', fatal=False) + entries = self._extract_entries(webpage, program_id, url) + + return { + '_type': 'playlist', + 'id': program_id, + 'display_id': alt_id, + 'title': title, + 'description': description, + 'entries': entries + } + + class NPOPlaylistBaseIE(NPOIE): def _real_extract(self, url): playlist_id = self._match_id(url)