From 8bf47118ef25987ee563b11558bfa44aae9189d1 Mon Sep 17 00:00:00 2001
From: RPing <g1222888@gmail.com>
Date: Wed, 18 Nov 2015 17:12:18 +0800
Subject: [PATCH] enhance udn support

---
 test/unittest_all_urls.py        | 159 +++++++++++++++++++++++++++++++
 youtube_dl/extractor/__init__.py |   5 +-
 youtube_dl/extractor/udn.py      |   7 ++
 3 files changed, 170 insertions(+), 1 deletion(-)
 create mode 100644 test/unittest_all_urls.py

diff --git a/test/unittest_all_urls.py b/test/unittest_all_urls.py
new file mode 100644
index 000000000..2872c05e9
--- /dev/null
+++ b/test/unittest_all_urls.py
@@ -0,0 +1,159 @@
+#!/usr/bin/env python
+
+from __future__ import unicode_literals
+
+# Allow direct execution
+import os
+import sys
+import unittest
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+
+from test.helper import gettestcases
+
+from youtube_dl.extractor import (
+    FacebookIE,
+    gen_extractors,
+    YoutubeIE,
+)
+
+
+class TestAllURLsMatching(unittest.TestCase):
+    def setUp(self):
+        self.ies = gen_extractors()
+
+    def matching_ies(self, url):
+        return [ie.IE_NAME for ie in self.ies if ie.suitable(url) and ie.IE_NAME != 'generic']
+
+    def assertMatch(self, url, ie_list):
+        self.assertEqual(self.matching_ies(url), ie_list)
+
+    def test_youtube_playlist_matching(self):
+        assertPlaylist = lambda url: self.assertMatch(url, ['youtube:playlist'])
+        assertPlaylist('ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8')
+        assertPlaylist('UUBABnxM4Ar9ten8Mdjj1j0Q')  # 585
+        assertPlaylist('PL63F0C78739B09958')
+        assertPlaylist('https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q')
+        assertPlaylist('https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8')
+        assertPlaylist('https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC')
+        assertPlaylist('https://www.youtube.com/watch?v=AV6J6_AeFEQ&playnext=1&list=PL4023E734DA416012')  # 668
+        self.assertFalse('youtube:playlist' in self.matching_ies('PLtS2H6bU1M'))
+        # Top tracks
+        assertPlaylist('https://www.youtube.com/playlist?list=MCUS.20142101')
+
+    def test_youtube_matching(self):
+        self.assertTrue(YoutubeIE.suitable('PLtS2H6bU1M'))
+        self.assertFalse(YoutubeIE.suitable('https://www.youtube.com/watch?v=AV6J6_AeFEQ&playnext=1&list=PL4023E734DA416012'))  # 668
+        self.assertMatch('http://youtu.be/BaW_jenozKc', ['youtube'])
+        self.assertMatch('http://www.youtube.com/v/BaW_jenozKc', ['youtube'])
+        self.assertMatch('https://youtube.googleapis.com/v/BaW_jenozKc', ['youtube'])
+        self.assertMatch('http://www.cleanvideosearch.com/media/action/yt/watch?videoId=8v_4O44sfjM', ['youtube'])
+
+    def test_youtube_channel_matching(self):
+        assertChannel = lambda url: self.assertMatch(url, ['youtube:channel'])
+        assertChannel('https://www.youtube.com/channel/HCtnHdj3df7iM')
+        assertChannel('https://www.youtube.com/channel/HCtnHdj3df7iM?feature=gb_ch_rec')
+        assertChannel('https://www.youtube.com/channel/HCtnHdj3df7iM/videos')
+
+    def test_youtube_user_matching(self):
+        self.assertMatch('www.youtube.com/NASAgovVideo/videos', ['youtube:user'])
+
+    def test_youtube_feeds(self):
+        self.assertMatch('https://www.youtube.com/feed/watch_later', ['youtube:watchlater'])
+        self.assertMatch('https://www.youtube.com/feed/subscriptions', ['youtube:subscriptions'])
+        self.assertMatch('https://www.youtube.com/feed/recommended', ['youtube:recommended'])
+        self.assertMatch('https://www.youtube.com/my_favorites', ['youtube:favorites'])
+
+    def test_youtube_show_matching(self):
+        self.assertMatch('http://www.youtube.com/show/airdisasters', ['youtube:show'])
+
+    def test_youtube_search_matching(self):
+        self.assertMatch('http://www.youtube.com/results?search_query=making+mustard', ['youtube:search_url'])
+        self.assertMatch('https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video', ['youtube:search_url'])
+        self.assertMatch('https://www.youtube.com/results?lclk=week&search_query=making+mustard&filters=week', ['youtube:search:date'])
+
+    def test_youtube_extract(self):
+        assertExtractId = lambda url, id: self.assertEqual(YoutubeIE.extract_id(url), id)
+        assertExtractId('http://www.youtube.com/watch?&v=BaW_jenozKc', 'BaW_jenozKc')
+        assertExtractId('https://www.youtube.com/watch?&v=BaW_jenozKc', 'BaW_jenozKc')
+        assertExtractId('https://www.youtube.com/watch?feature=player_embedded&v=BaW_jenozKc', 'BaW_jenozKc')
+        assertExtractId('https://www.youtube.com/watch_popup?v=BaW_jenozKc', 'BaW_jenozKc')
+        assertExtractId('http://www.youtube.com/watch?v=BaW_jenozKcsharePLED17F32AD9753930', 'BaW_jenozKc')
+        assertExtractId('BaW_jenozKc', 'BaW_jenozKc')
+
+    def test_facebook_matching(self):
+        self.assertTrue(FacebookIE.suitable('https://www.facebook.com/Shiniknoh#!/photo.php?v=10153317450565268'))
+        self.assertTrue(FacebookIE.suitable('https://www.facebook.com/cindyweather?fref=ts#!/photo.php?v=10152183998945793'))
+
+    def test_no_duplicates(self):
+        ies = gen_extractors()
+        for tc in gettestcases(include_onlymatching=True):
+            url = tc['url']
+            for ie in ies:
+                if type(ie).__name__ in ('GenericIE', tc['name'] + 'IE'):
+                    self.assertTrue(ie.suitable(url), '%s should match URL %r' % (type(ie).__name__, url))
+                else:
+                    self.assertFalse(
+                        ie.suitable(url),
+                        '%s should not match URL %r . That URL belongs to %s.' % (type(ie).__name__, url, tc['name']))
+
+    def test_keywords(self):
+        self.assertMatch(':ytsubs', ['youtube:subscriptions'])
+        self.assertMatch(':ytsubscriptions', ['youtube:subscriptions'])
+        self.assertMatch(':ythistory', ['youtube:history'])
+        self.assertMatch(':thedailyshow', ['ComedyCentralShows'])
+        self.assertMatch(':tds', ['ComedyCentralShows'])
+
+    def test_vimeo_matching(self):
+        self.assertMatch('https://vimeo.com/channels/tributes', ['vimeo:channel'])
+        self.assertMatch('https://vimeo.com/channels/31259', ['vimeo:channel'])
+        self.assertMatch('https://vimeo.com/channels/31259/53576664', ['vimeo'])
+        self.assertMatch('https://vimeo.com/user7108434', ['vimeo:user'])
+        self.assertMatch('https://vimeo.com/user7108434/videos', ['vimeo:user'])
+        self.assertMatch('https://vimeo.com/user21297594/review/75524534/3c257a1b5d', ['vimeo:review'])
+
+    # https://github.com/rg3/youtube-dl/issues/1930
+    def test_soundcloud_not_matching_sets(self):
+        self.assertMatch('http://soundcloud.com/floex/sets/gone-ep', ['soundcloud:set'])
+
+    def test_tumblr(self):
+        self.assertMatch('http://tatianamaslanydaily.tumblr.com/post/54196191430/orphan-black-dvd-extra-behind-the-scenes', ['Tumblr'])
+        self.assertMatch('http://tatianamaslanydaily.tumblr.com/post/54196191430', ['Tumblr'])
+
+    def test_pbs(self):
+        # https://github.com/rg3/youtube-dl/issues/2350
+        self.assertMatch('http://video.pbs.org/viralplayer/2365173446/', ['PBS'])
+        self.assertMatch('http://video.pbs.org/widget/partnerplayer/980042464/', ['PBS'])
+
+    def test_yahoo_https(self):
+        # https://github.com/rg3/youtube-dl/issues/2701
+        self.assertMatch(
+            'https://screen.yahoo.com/smartwatches-latest-wearable-gadgets-163745379-cbs.html',
+            ['Yahoo'])
+
+    def test_appledaily(self):
+        self.assertMatch('http://www.appledaily.com.tw/animation/appledaily/new/20151117/36903515/', ['AppleDaily'])
+        self.assertMatch('http://www.appledaily.com.tw/realtimenews/article/sports/20151117/734539/', ['AppleDaily'])
+
+    def test_ctsnews(self):
+        self.assertMatch('http://news.cts.com.tw/cts/life/201511/201511151683198.html#.VkssxbNZOHs', ['CtsNews'])
+        self.assertMatch('http://news.cts.com.tw/cts/international/201511/201511171683689.html#.Vksv_bNZOHs', ['CtsNews'])
+
+    def test_UDN(self):
+        self.assertMatch('https://video.udn.com/news/398685', ['UDN'])
+        self.assertMatch('https://video.udn.com/embed/news/300040', ['UDNEmbed'])
+        self.assertMatch('https://video.udn.com/play/news/303776', ['UDNEmbed'])
+
+    def test_xuite(self):
+        self.assertMatch('http://vlog.xuite.net/play/T2lMdGpZLTk0NDA1MS5mbHY=', ['Xuite'])
+
+    def test_yam(self):
+        self.assertMatch('http://mymedia.yam.com/m/2283921', ['Yam'])
+        self.assertMatch('http://mymedia.yam.com/m/3599430', ['Yam'])
+
+    def test_mlb(self):
+        self.assertMatch('http://m.mlb.com/video/topic/9674738/v529001783/111015-mlbcom-fastcast-gold-gloves-announced', ['MLB'])
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py
index 26e5745d6..c0665c0e7 100644
--- a/youtube_dl/extractor/__init__.py
+++ b/youtube_dl/extractor/__init__.py
@@ -706,7 +706,10 @@ from .udemy import (
     UdemyIE,
     UdemyCourseIE
 )
-from .udn import UDNEmbedIE
+from .udn import (
+    UDNEmbedIE,
+    UDNIE
+)
 from .ultimedia import UltimediaIE
 from .unistra import UnistraIE
 from .urort import UrortIE
diff --git a/youtube_dl/extractor/udn.py b/youtube_dl/extractor/udn.py
index 2151f8338..852f9cad0 100644
--- a/youtube_dl/extractor/udn.py
+++ b/youtube_dl/extractor/udn.py
@@ -34,6 +34,10 @@ class UDNEmbedIE(InfoExtractor):
     def _real_extract(self, url):
         video_id = self._match_id(url)
 
+        if isinstance(self, UDNIE):
+            p = url.index("com/") + 4
+            url = url[:p] + "embed/" + url[p:]
+
         page = self._download_webpage(url, video_id)
 
         options = json.loads(js_to_json(self._html_search_regex(
@@ -73,3 +77,6 @@ class UDNEmbedIE(InfoExtractor):
             'title': options['title'],
             'thumbnail': thumbnail
         }
+
+class UDNIE(UDNEmbedIE):
+    _VALID_URL = r'https?://video\.udn\.com/news/(?P<id>\d+)'