From d4f0908217d51124c1b9cfb3819348bf76d0f5f3 Mon Sep 17 00:00:00 2001
From: fnord <fnord@fnord.mobi>
Date: Thu, 23 Jul 2015 01:36:59 -0500
Subject: [PATCH 1/2] Add support for cbc.ca

---
 youtube_dl/extractor/__init__.py |  1 +
 youtube_dl/extractor/cbc.py      | 97 ++++++++++++++++++++++++++++++++
 2 files changed, 98 insertions(+)
 create mode 100644 youtube_dl/extractor/cbc.py
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py
index 3cfa804ec..e11ce60f0 100644
--- a/youtube_dl/extractor/__init__.py
+++ b/youtube_dl/extractor/__init__.py
@@ -60,6 +60,7 @@ from .brightcove import BrightcoveIE
 from .buzzfeed import BuzzFeedIE
 from .byutv import BYUtvIE
 from .c56 import C56IE
+from .cbc import CBCIE
 from .camdemy import (
     CamdemyIE,
     CamdemyFolderIE
diff --git a/youtube_dl/extractor/cbc.py b/youtube_dl/extractor/cbc.py
new file mode 100644
index 000000000..73c0bbc7c
--- /dev/null
+++ b/youtube_dl/extractor/cbc.py
@@ -0,0 +1,97 @@
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+    url_basename,
+    unescapeHTML,
+    js_to_json,
+    ExtractorError,
+)
+import re
+
+
+class CBCIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?cbc.ca/[^/]+/'
+
+    _TESTS = [
+        {
+            'url': 'http://www.cbc.ca/news/thenational/the-real-cost-of-the-world-s-most-expensive-drug-1.3126338',
+            'info_dict': {
+                'id': 'if3k_n58u3hDrVX9dOXSTbtHBnSZGQpe',
+                'ext': 'flv',
+                'title': 'The real cost of the world\'s most expensive drug',
+                'description': 'md5:407fb27bb8b10c2e1447bbad0c27e551',
+            },
+            'add_ie': ['ThePlatform'],
+        },
+        {
+            'url': 'http://www.cbc.ca/player/News/ID/2672225049/',
+            'info_dict': {
+                'id': 'VfTVl5c2pr40a9jxAMWGIRZO8Mz4ubPZ',
+                'ext': 'flv',
+                'title': 'WATCH: New Earth from space image released by NASA',
+                'description': 'md5:3ddd36b5d1066a067a0b0c8891a72506',
+            },
+            'add_ie': ['ThePlatform'],
+        },
+        {
+            'url': 'http://www.cbc.ca/natureofthings/episodes/stonehenge-uncovered',
+            'info_dict': {
+                'id': 'QPnDq_piKkN5x0dH7SQF85cyJb_KOsG0',
+                'ext': 'flv',
+                'title': 'Stonehenge Uncovered',
+            },
+            'add_ie': ['ThePlatform'],
+            'skip': 'Canada only',
+        }
+    ]
+
+    def _real_extract(self, url):
+        # from http://www.cbc.ca/i/caffeine/js/Caffeine.js
+        #   TP_FEED_DOMAIN:"http://tpfeed.cbc.ca/f/h9dtGB/5akSXx4Ng_Zn?"
+        #   MPX_ACCOUNT_PID:"h9dtGB"
+        tp_feed_domain = "http://tpfeed.cbc.ca/f/h9dtGB/5akSXx4Ng_Zn?"
+        mpx_account_id = "h9dtGB"
+
+        name = url_basename(url)
+
+        webpage = self._download_webpage(url, name)
+        title = unescapeHTML(
+            self._search_regex('<title>\s*(.+?)\s*</title>', webpage, 'title'))
+
+        cbcapp = re.findall(
+            r'CBC.APP.Caffeine.initInstance\((.+?)\);', webpage, re.DOTALL)
+
+        clipids = []
+        for jstr in cbcapp:
+            vdata = self._parse_json(
+                jstr, 'javascript chunk', transform_source=js_to_json)
+            if 'clipId' in vdata:
+                if vdata['clipId'] not in clipids:
+                    clipids.append(vdata['clipId'])
+
+        vids = []
+        for cid in clipids:
+            feedurl = tp_feed_domain + \
+                'range=1-1&byContent=byReleases%3DbyId%253D' + cid
+            feedpage = self._download_webpage(feedurl, 'feed for clip ' + cid)
+            cjson = self._parse_json(
+                feedpage, 'clip feed json', transform_source=js_to_json)
+            for ent in cjson.get('entries', []):
+                for content in ent.get('content', []):
+                    # assuming multi-content is playlist or multi-part video
+                    vid = {}
+                    for release in content.get('releases', []):
+                        if 'url' in vid:
+                            self.report_warning(
+                                cid + ': multi-release video? Skipping, if content is missing please file a bug report')
+                            continue
+                        vid['url'] = 'http://link.theplatform.com/s/' + \
+                            mpx_account_id + '/' + release['pid']
+                    if 'url' in vid:
+                        vids.append(self.url_result(vid['url']))
+        if not vids:
+            raise ExtractorError('No video found', expected=True)
+        if len(vids) > 1:
+            return self.playlist_result(vids, name, title)
+        return vids[0]

From ab84ca19f2e5180f467de71ea189eae5e0a9cc7b Mon Sep 17 00:00:00 2001
From: fnord <fnord@fnord.mobi>
Date: Sat, 25 Jul 2015 20:02:59 -0500
Subject: [PATCH 2/2] Title extract adjustment

---
 youtube_dl/extractor/cbc.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/youtube_dl/extractor/cbc.py b/youtube_dl/extractor/cbc.py
index 73c0bbc7c..5420d445f 100644
--- a/youtube_dl/extractor/cbc.py
+++ b/youtube_dl/extractor/cbc.py
@@ -56,8 +56,7 @@ class CBCIE(InfoExtractor):
         name = url_basename(url)
 
         webpage = self._download_webpage(url, name)
-        title = unescapeHTML(
-            self._search_regex('<title>\s*(.+?)\s*</title>', webpage, 'title'))
+        title = self._html_search_regex('<title>\s*(.+?)\s*</title>', webpage, 'title')
 
         cbcapp = re.findall(
             r'CBC.APP.Caffeine.initInstance\((.+?)\);', webpage, re.DOTALL)