From f7fb256946e92d05031db26d3fadfde1bd8c8c94 Mon Sep 17 00:00:00 2001
From: Alex Seiler <seileralex@gmail.com>
Date: Fri, 3 Mar 2017 05:46:33 +0100
Subject: [PATCH] [3plus] Handle real video extraction in 3qsdn information
 extractor

- Add support for 3qsdn playlists.
- Better title extraction for 3qsdn media.
- Add description extraction for 3qsdn media.
- Remove ThreePlusBaseIE which is not needed anymore, since everything
can directly be done by the 3qsdn information extractor.
---
 youtube_dl/extractor/threeplus.py | 53 ++++++++-----------------------
 youtube_dl/extractor/threeqsdn.py | 32 ++++++++++++++++---
 2 files changed, 42 insertions(+), 43 deletions(-)
diff --git a/youtube_dl/extractor/threeplus.py b/youtube_dl/extractor/threeplus.py
index 48acdabcb..47f576200 100644
--- a/youtube_dl/extractor/threeplus.py
+++ b/youtube_dl/extractor/threeplus.py
@@ -5,55 +5,26 @@ import re
 
 from .common import InfoExtractor
 from .threeqsdn import ThreeQSDNIE
-from ..utils import get_element_by_class
+from ..utils import (
+    get_element_by_class,
+    smuggle_url,
+)
 
 
-class ThreePlusBaseIE(InfoExtractor):
-    _HOST_URL = 'http://playout.3qsdn.com/'
-
-    def _get_title_and_description(self, video_id):
-        webpage = self._download_webpage(
-            self._HOST_URL + video_id, video_id)
-        title = self._og_search_title(webpage)
-        description = self._og_search_description(webpage)
-        return title, description
-
-    def _get_real_video_id(self, video_id):
-        video_info = self._download_webpage(self._HOST_URL + video_id + '?js=true', video_id)
-        video_id = self._search_regex(
-            r'sdnPlayoutId\s*:\s*\'(.+?)\'', video_info, 'Real video id', default=video_id)
-        video_id = video_id.replace('\\x2D', '-')
-        return video_id
-
-    def _extract_from_id(self, video_id):
-        video_id = self._get_real_video_id(video_id)
-        title, description = self._get_title_and_description(video_id)
-        return {
-            '_type': 'url_transparent',
-            'ie_key': ThreeQSDNIE.ie_key(),
-            'url': self._HOST_URL + video_id,
-            'id': video_id,
-            'title': title,
-            'description': description,
-        }
-
-
-class ThreePlusIE(ThreePlusBaseIE):
+class ThreePlusIE(InfoExtractor):
     IE_NAME = '3 Plus'
     _VALID_URL = r'https?://(?:www\.)3plus\.(?:tv|ch)/(?!videos)(?P<id>.+)'
 
     _TESTS = [{
         # Real video ID known in advance
         'url': 'http://www.3plus.tv/episode/mama-ich-bin-schwanger/teenager-werden-muetter-folge-3',
+        'md5': '424d9bd2b10e7d4149299bef74e5ddd2',
         'info_dict': {
             'id': 'de1b7745-11d6-11e6-b427-0cc47a188158',
             'ext': 'mp4',
             'title': 'MAMA ICH BIN SCHWANGER ST01 - Episode 03',
             'description': 'md5:2b93142fd82f4b5460f97b13fee40eb8',
         },
-        'params': {
-            'skip_download': True,
-        },
         'expected_warnings': ['Unable to download f4m manifest', 'Failed to parse JSON'],
     }, {
         # Real video ID not known in advance
@@ -75,11 +46,13 @@ class ThreePlusIE(ThreePlusBaseIE):
         webpage = self._download_webpage(url, display_id)
         video_id = self._search_regex(
             r'var\s+sdnPlayoutId\s*=\s*"([0-9a-f\-]{36})"', webpage, 'video id')
-
-        return self._extract_from_id(video_id)
+            
+        return self.url_result(
+            smuggle_url('3qsdn:%s' % video_id, {'first_video_only': True}),
+            ThreeQSDNIE.ie_key())
 
 
-class ThreePlusPlaylistIE(ThreePlusBaseIE):
+class ThreePlusPlaylistIE(InfoExtractor):
     IE_NAME = '3 Plus Playlists'
     _VALID_URL = r'https?://(?:www\.)3plus\.(?:tv|ch)/videos/(?P<id>.+)'
 
@@ -97,6 +70,8 @@ class ThreePlusPlaylistIE(ThreePlusBaseIE):
         webpage = self._download_webpage(url, playlist_id)
 
         title = get_element_by_class('pane-title', webpage)
-        entries = [self._extract_from_id(m.group('id')) for m in re.finditer(
+        entries = [self.url_result(
+            smuggle_url('3qsdn:%s' % m.group('id'), {'first_video_only': True})) for m in re.finditer(
             r'<div[^>]+class\s*=\s*"field-content\s*"\s*>(?P<id>[0-9a-f\-]{36})</div>', webpage)]
+
         return self.playlist_result(entries, playlist_id, title)
diff --git a/youtube_dl/extractor/threeqsdn.py b/youtube_dl/extractor/threeqsdn.py
index f26937da1..a2d99997e 100644
--- a/youtube_dl/extractor/threeqsdn.py
+++ b/youtube_dl/extractor/threeqsdn.py
@@ -7,13 +7,15 @@ from ..utils import (
     determine_ext,
     js_to_json,
     mimetype2ext,
+    unsmuggle_url,
 )
 
 
 class ThreeQSDNIE(InfoExtractor):
     IE_NAME = '3qsdn'
     IE_DESC = '3Q SDN'
-    _VALID_URL = r'https?://playout\.3qsdn\.com/(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})'
+    _VALID_URL = r'(?:https?://playout\.3qsdn\.com/|3qsdn:)(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})'
+    _API_URL = 'http://playout.3qsdn.com/'
     _TESTS = [{
         # ondemand from http://www.philharmonie.tv/veranstaltung/26/
         'url': 'http://playout.3qsdn.com/0280d6b9-1215-11e6-b427-0cc47a188158?protocol=http',
@@ -21,7 +23,8 @@ class ThreeQSDNIE(InfoExtractor):
         'info_dict': {
             'id': '0280d6b9-1215-11e6-b427-0cc47a188158',
             'ext': 'mp4',
-            'title': '0280d6b9-1215-11e6-b427-0cc47a188158',
+            'title': '160504_sixpianos',
+            'description': '160504_sixpianos',
             'is_live': False,
         },
         'expected_warnings': ['Failed to download MPD manifest', 'Failed to parse JSON'],
@@ -38,6 +41,13 @@ class ThreeQSDNIE(InfoExtractor):
             'skip_download': True,  # m3u8 downloads
         },
         'expected_warnings': ['Failed to download MPD manifest'],
+    }, {
+        # playlist
+        'url': 'http://playout.3qsdn.com/2a70223f-b56f-11e6-a78b-0cc47a188158',
+        'info_dict': {
+            'id': '2a70223f-b56f-11e6-a78b-0cc47a188158',
+        },
+        'playlist_count': 11,
     }, {
         # live audio stream
         'url': 'http://playout.3qsdn.com/9edf36e0-6bf2-11e2-a16a-9acf09e2db48',
@@ -69,11 +79,20 @@ class ThreeQSDNIE(InfoExtractor):
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
+        url, smuggled_data = unsmuggle_url(url, {})
 
         js = self._download_webpage(
-            'http://playout.3qsdn.com/%s' % video_id, video_id,
+            self._API_URL + video_id, video_id,
             query={'js': 'true'})
 
+        playout_ids = [m.group('id').replace('\\x2D', '-') for m in re.finditer(
+            r'sdnPlayoutId\s*:\s*["\'](?P<id>.+?)["\']', js)]
+        if playout_ids:
+            if smuggled_data.get('first_video_only'):
+                return self.url_result(self._API_URL + playout_ids[0], self.ie_key())
+            return self.playlist_result(
+                [self.url_result(self._API_URL + vid, self.ie_key()) for vid in playout_ids], video_id)
+
         if any(p in js for p in (
                 '>This content is not available in your country',
                 'playout.3qsdn.com/forbidden')):
@@ -132,11 +151,16 @@ class ThreeQSDNIE(InfoExtractor):
 
         self._sort_formats(formats)
 
-        title = self._live_title(video_id) if live else video_id
+        webpage = self._download_webpage(self._API_URL + video_id, video_id)
+        title = self._live_title(video_id) if live else self._og_search_title(webpage, default=None)
+        if not title:
+            title = video_id
+        description = self._og_search_description(webpage, default=None)
 
         return {
             'id': video_id,
             'title': title,
+            'description': description,
             'is_live': live,
             'formats': formats,
         }