From 8bf47118ef25987ee563b11558bfa44aae9189d1 Mon Sep 17 00:00:00 2001 From: RPing Date: Wed, 18 Nov 2015 17:12:18 +0800 Subject: [PATCH] enhance udn support --- test/unittest_all_urls.py | 159 +++++++++++++++++++++++++++++++ youtube_dl/extractor/__init__.py | 5 +- youtube_dl/extractor/udn.py | 7 ++ 3 files changed, 170 insertions(+), 1 deletion(-) create mode 100644 test/unittest_all_urls.py diff --git a/test/unittest_all_urls.py b/test/unittest_all_urls.py new file mode 100644 index 000000000..2872c05e9 --- /dev/null +++ b/test/unittest_all_urls.py @@ -0,0 +1,159 @@ +#!/usr/bin/env python + +from __future__ import unicode_literals + +# Allow direct execution +import os +import sys +import unittest +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + + +from test.helper import gettestcases + +from youtube_dl.extractor import ( + FacebookIE, + gen_extractors, + YoutubeIE, +) + + +class TestAllURLsMatching(unittest.TestCase): + def setUp(self): + self.ies = gen_extractors() + + def matching_ies(self, url): + return [ie.IE_NAME for ie in self.ies if ie.suitable(url) and ie.IE_NAME != 'generic'] + + def assertMatch(self, url, ie_list): + self.assertEqual(self.matching_ies(url), ie_list) + + def test_youtube_playlist_matching(self): + assertPlaylist = lambda url: self.assertMatch(url, ['youtube:playlist']) + assertPlaylist('ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8') + assertPlaylist('UUBABnxM4Ar9ten8Mdjj1j0Q') # 585 + assertPlaylist('PL63F0C78739B09958') + assertPlaylist('https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q') + assertPlaylist('https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8') + assertPlaylist('https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC') + assertPlaylist('https://www.youtube.com/watch?v=AV6J6_AeFEQ&playnext=1&list=PL4023E734DA416012') # 668 + self.assertFalse('youtube:playlist' in self.matching_ies('PLtS2H6bU1M')) + # Top tracks + assertPlaylist('https://www.youtube.com/playlist?list=MCUS.20142101') + + def test_youtube_matching(self): + self.assertTrue(YoutubeIE.suitable('PLtS2H6bU1M')) + self.assertFalse(YoutubeIE.suitable('https://www.youtube.com/watch?v=AV6J6_AeFEQ&playnext=1&list=PL4023E734DA416012')) # 668 + self.assertMatch('http://youtu.be/BaW_jenozKc', ['youtube']) + self.assertMatch('http://www.youtube.com/v/BaW_jenozKc', ['youtube']) + self.assertMatch('https://youtube.googleapis.com/v/BaW_jenozKc', ['youtube']) + self.assertMatch('http://www.cleanvideosearch.com/media/action/yt/watch?videoId=8v_4O44sfjM', ['youtube']) + + def test_youtube_channel_matching(self): + assertChannel = lambda url: self.assertMatch(url, ['youtube:channel']) + assertChannel('https://www.youtube.com/channel/HCtnHdj3df7iM') + assertChannel('https://www.youtube.com/channel/HCtnHdj3df7iM?feature=gb_ch_rec') + assertChannel('https://www.youtube.com/channel/HCtnHdj3df7iM/videos') + + def test_youtube_user_matching(self): + self.assertMatch('www.youtube.com/NASAgovVideo/videos', ['youtube:user']) + + def test_youtube_feeds(self): + self.assertMatch('https://www.youtube.com/feed/watch_later', ['youtube:watchlater']) + self.assertMatch('https://www.youtube.com/feed/subscriptions', ['youtube:subscriptions']) + self.assertMatch('https://www.youtube.com/feed/recommended', ['youtube:recommended']) + self.assertMatch('https://www.youtube.com/my_favorites', ['youtube:favorites']) + + def test_youtube_show_matching(self): + self.assertMatch('http://www.youtube.com/show/airdisasters', ['youtube:show']) + + def test_youtube_search_matching(self): + self.assertMatch('http://www.youtube.com/results?search_query=making+mustard', ['youtube:search_url']) + self.assertMatch('https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video', ['youtube:search_url']) + self.assertMatch('https://www.youtube.com/results?lclk=week&search_query=making+mustard&filters=week', ['youtube:search:date']) + + def test_youtube_extract(self): + assertExtractId = lambda url, id: self.assertEqual(YoutubeIE.extract_id(url), id) + assertExtractId('http://www.youtube.com/watch?&v=BaW_jenozKc', 'BaW_jenozKc') + assertExtractId('https://www.youtube.com/watch?&v=BaW_jenozKc', 'BaW_jenozKc') + assertExtractId('https://www.youtube.com/watch?feature=player_embedded&v=BaW_jenozKc', 'BaW_jenozKc') + assertExtractId('https://www.youtube.com/watch_popup?v=BaW_jenozKc', 'BaW_jenozKc') + assertExtractId('http://www.youtube.com/watch?v=BaW_jenozKcsharePLED17F32AD9753930', 'BaW_jenozKc') + assertExtractId('BaW_jenozKc', 'BaW_jenozKc') + + def test_facebook_matching(self): + self.assertTrue(FacebookIE.suitable('https://www.facebook.com/Shiniknoh#!/photo.php?v=10153317450565268')) + self.assertTrue(FacebookIE.suitable('https://www.facebook.com/cindyweather?fref=ts#!/photo.php?v=10152183998945793')) + + def test_no_duplicates(self): + ies = gen_extractors() + for tc in gettestcases(include_onlymatching=True): + url = tc['url'] + for ie in ies: + if type(ie).__name__ in ('GenericIE', tc['name'] + 'IE'): + self.assertTrue(ie.suitable(url), '%s should match URL %r' % (type(ie).__name__, url)) + else: + self.assertFalse( + ie.suitable(url), + '%s should not match URL %r . That URL belongs to %s.' % (type(ie).__name__, url, tc['name'])) + + def test_keywords(self): + self.assertMatch(':ytsubs', ['youtube:subscriptions']) + self.assertMatch(':ytsubscriptions', ['youtube:subscriptions']) + self.assertMatch(':ythistory', ['youtube:history']) + self.assertMatch(':thedailyshow', ['ComedyCentralShows']) + self.assertMatch(':tds', ['ComedyCentralShows']) + + def test_vimeo_matching(self): + self.assertMatch('https://vimeo.com/channels/tributes', ['vimeo:channel']) + self.assertMatch('https://vimeo.com/channels/31259', ['vimeo:channel']) + self.assertMatch('https://vimeo.com/channels/31259/53576664', ['vimeo']) + self.assertMatch('https://vimeo.com/user7108434', ['vimeo:user']) + self.assertMatch('https://vimeo.com/user7108434/videos', ['vimeo:user']) + self.assertMatch('https://vimeo.com/user21297594/review/75524534/3c257a1b5d', ['vimeo:review']) + + # https://github.com/rg3/youtube-dl/issues/1930 + def test_soundcloud_not_matching_sets(self): + self.assertMatch('http://soundcloud.com/floex/sets/gone-ep', ['soundcloud:set']) + + def test_tumblr(self): + self.assertMatch('http://tatianamaslanydaily.tumblr.com/post/54196191430/orphan-black-dvd-extra-behind-the-scenes', ['Tumblr']) + self.assertMatch('http://tatianamaslanydaily.tumblr.com/post/54196191430', ['Tumblr']) + + def test_pbs(self): + # https://github.com/rg3/youtube-dl/issues/2350 + self.assertMatch('http://video.pbs.org/viralplayer/2365173446/', ['PBS']) + self.assertMatch('http://video.pbs.org/widget/partnerplayer/980042464/', ['PBS']) + + def test_yahoo_https(self): + # https://github.com/rg3/youtube-dl/issues/2701 + self.assertMatch( + 'https://screen.yahoo.com/smartwatches-latest-wearable-gadgets-163745379-cbs.html', + ['Yahoo']) + + def test_appledaily(self): + self.assertMatch('http://www.appledaily.com.tw/animation/appledaily/new/20151117/36903515/', ['AppleDaily']) + self.assertMatch('http://www.appledaily.com.tw/realtimenews/article/sports/20151117/734539/', ['AppleDaily']) + + def test_ctsnews(self): + self.assertMatch('http://news.cts.com.tw/cts/life/201511/201511151683198.html#.VkssxbNZOHs', ['CtsNews']) + self.assertMatch('http://news.cts.com.tw/cts/international/201511/201511171683689.html#.Vksv_bNZOHs', ['CtsNews']) + + def test_UDN(self): + self.assertMatch('https://video.udn.com/news/398685', ['UDN']) + self.assertMatch('https://video.udn.com/embed/news/300040', ['UDNEmbed']) + self.assertMatch('https://video.udn.com/play/news/303776', ['UDNEmbed']) + + def test_xuite(self): + self.assertMatch('http://vlog.xuite.net/play/T2lMdGpZLTk0NDA1MS5mbHY=', ['Xuite']) + + def test_yam(self): + self.assertMatch('http://mymedia.yam.com/m/2283921', ['Yam']) + self.assertMatch('http://mymedia.yam.com/m/3599430', ['Yam']) + + def test_mlb(self): + self.assertMatch('http://m.mlb.com/video/topic/9674738/v529001783/111015-mlbcom-fastcast-gold-gloves-announced', ['MLB']) + + +if __name__ == '__main__': + unittest.main() diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 26e5745d6..c0665c0e7 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -706,7 +706,10 @@ from .udemy import ( UdemyIE, UdemyCourseIE ) -from .udn import UDNEmbedIE +from .udn import ( + UDNEmbedIE, + UDNIE +) from .ultimedia import UltimediaIE from .unistra import UnistraIE from .urort import UrortIE diff --git a/youtube_dl/extractor/udn.py b/youtube_dl/extractor/udn.py index 2151f8338..852f9cad0 100644 --- a/youtube_dl/extractor/udn.py +++ b/youtube_dl/extractor/udn.py @@ -34,6 +34,10 @@ class UDNEmbedIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) + if isinstance(self, UDNIE): + p = url.index("com/") + 4 + url = url[:p] + "embed/" + url[p:] + page = self._download_webpage(url, video_id) options = json.loads(js_to_json(self._html_search_regex( @@ -73,3 +77,6 @@ class UDNEmbedIE(InfoExtractor): 'title': options['title'], 'thumbnail': thumbnail } + +class UDNIE(UDNEmbedIE): + _VALID_URL = r'https?://video\.udn\.com/news/(?P\d+)'