From fd2bd579117b7039a2b99080752b8b5952d09f93 Mon Sep 17 00:00:00 2001
From: Alex Seiler <seileralex@gmail.com>
Date: Tue, 24 May 2016 18:59:54 +0200
Subject: [PATCH 1/5] [blick] Add new extractor (blick.ch is a swiss newspaper
 platform, which provides also videos)

---
 youtube_dl/extractor/blick.py      | 123 +++++++++++++++++++++++++++++
 youtube_dl/extractor/extractors.py |   1 +
 2 files changed, 124 insertions(+)
 create mode 100644 youtube_dl/extractor/blick.py
diff --git a/youtube_dl/extractor/blick.py b/youtube_dl/extractor/blick.py
new file mode 100644
index 000000000..3a0977205
--- /dev/null
+++ b/youtube_dl/extractor/blick.py
@@ -0,0 +1,123 @@
+# encoding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+import re
+
+
+class BlickIE(InfoExtractor):
+    _VALID_URL = r'(?:https?://)?(?:www\.)?blick\.ch/.*-id(?P<id>\d+).*\.html'
+
+    _TESTS = [{
+        'url': 'http://www.blick.ch/sport/uli-forte-vor-dem-abstiegs-showdown-ich-gehe-davon-aus-dass-der-fussball-gott-fcz-fan-ist-id5070813.html',
+        'info_dict': {
+            'id': '5070813',
+            'ext': 'mp4',
+            'title': 'uli-forte-vor-dem-abstiegs-showdown-ich-gehe-davon-aus-dass-der-fussball-gott-fcz-fan-ist',
+            'thumbnail': 'http://blick.simplex.tv/content/51/52/70062/simvid_1.jpg',
+            'description': 'Am Mittwochabend entscheidet sich, ob der FCZ oder der FC Lugano aus der Super League absteigt. Uli Forte schwört dabei auf den Fussball-Gott und zündet in der Kirche eine Kerze an.'
+        }
+    }, {
+        'url': 'http://www.blick.ch/sport/tennis/nominiert-fuer-musik-preis-in-schweden-so-toll-singt-guenthardts-tochter-alessandra-id5066863.html',
+        'info_dict': {
+            'id': '5066863',
+            'ext': 'mp4',
+            'title': 'nominiert-fuer-musik-preis-in-schweden-so-toll-singt-guenthardts-tochter-alessandra',
+            'thumbnail': 'http://f.blick.ch/img/incoming/crop5066860/5146024130-csquare-w300-h300/Bildschirmfoto-2016-05-23-um-14.jpg',
+            'description': 'Da ist Papa Heinz mächtig stolz. Seine Tochter Alessandra Günthardt ist für einen schwedischen Musik-Preis unter den drei Nominierten. Die Abstimmung läuft noch bis 7. Juni.'
+        }
+    }]
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+        webpage = self._download_webpage(url, video_id)
+
+        found_videos_og = re.findall(r'<meta.*?property="og:video".*?content="(.*?)"', webpage)
+        found_videos_ogs = re.findall(r'<meta.*?property="og:video".*?content="(.*?)"', webpage)
+        found_videos_meta = re.findall(r'<meta.*?itemprop="contentURL".*?content="(.*?)"', webpage)
+        found_videos = found_videos_og + found_videos_ogs + found_videos_meta
+        video_url = ''
+        for video in found_videos:
+            if re.match(r'.*detect\.mp4', video):
+                ind = video.rfind('/')
+                video_url = video[:ind + 1]
+                video_url += 'index.m3u8'
+                break
+            elif re.match(r'.*\.m3u8', video):
+                video_url = video
+                break
+
+        if not video_url:
+            return []
+
+        video_title = str(url)
+        b_ind = video_title.rfind('/') + 1
+        e_ind = video_title.rfind('-id')
+        video_title = video_title[b_ind:e_ind]
+
+        video_description = self._og_search_description(webpage)
+        thumbnail = self._og_search_thumbnail(webpage)
+
+        entry_info_dict = {
+            'id': video_id,
+            'title': video_title,
+            'description': video_description,
+            'thumbnail': thumbnail,
+            'duration': None,
+        }
+        entry_info_dict['formats'] = self._extract_m3u8_formats(
+            video_url,
+            video_id,
+            ext='mp4',
+            entry_protocol='m3u8_native')
+
+        if entry_info_dict.get('formats'):
+            self._sort_formats(entry_info_dict['formats'])
+
+            # Remove entries containing a url to an index.m3u8 file
+            cleaned_formats = [x for x in entry_info_dict['formats'] if x.get('format_id') != 'meta']
+            entry_info_dict['formats'] = cleaned_formats
+
+            duration_found = False
+            duration = None
+            attr = ''
+            for elem in entry_info_dict.get('formats'):
+                if not duration_found:
+                    duration = self.calculateDuration(elem['url'], video_id)
+                    duration_found = True if duration else False
+                tbr = elem.get('tbr')
+                try:
+                    attr = ''
+                    if tbr < 1000:
+                        attr = 'lq'
+                    elif tbr >= 1000 and tbr < 2000:
+                        attr = 'sq'
+                    elif tbr >= 2000:
+                        attr = 'hq'
+                except TypeError:
+                    attr = 'un'
+                elem['format_id'] = attr + '-' + str(tbr)
+        entry_info_dict['duration'] = duration
+        return entry_info_dict
+
+    def calculateDuration(self, m3u8_url, video_id):
+        content = self._download_webpage_handle(
+            m3u8_url,
+            video_id,
+            note='Downloading m3u8 information',
+            errnote='Failed to download m3u8 information',
+            fatal=False
+        )
+        if content is False:
+            return None
+        m3u8_doc, rlh = content
+        duration = 0.0
+        try:
+            for line in m3u8_doc.splitlines():
+                if line.startswith('#EXTINF:'):
+                    dur = line[8:].strip()[:-1]
+                    duration += float(dur)
+        except ValueError:
+            return None
+        return duration
diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py
index f9fed18f6..e2a6cf315 100644
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@@ -84,6 +84,7 @@ from .bleacherreport import (
     BleacherReportIE,
     BleacherReportCMSIE,
 )
+from .blick import BlickIE
 from .blinkx import BlinkxIE
 from .bloomberg import BloombergIE
 from .bokecc import BokeCCIE

From ea6e87e1655f80468b7382e6de4dfa44ca750624 Mon Sep 17 00:00:00 2001
From: Alex Seiler <seileralex@gmail.com>
Date: Wed, 25 May 2016 02:27:19 +0200
Subject: [PATCH 2/5] [blick] Use builtin functions from common.py instead of
 re.findall

---
 youtube_dl/extractor/blick.py | 35 +++++++++++++++++++++++++++++++----
 1 file changed, 31 insertions(+), 4 deletions(-)

diff --git a/youtube_dl/extractor/blick.py b/youtube_dl/extractor/blick.py
index 3a0977205..c5a1a8442 100644
--- a/youtube_dl/extractor/blick.py
+++ b/youtube_dl/extractor/blick.py
@@ -2,6 +2,7 @@
 from __future__ import unicode_literals
 
 from .common import InfoExtractor
+from ..utils import RegexNotFoundError
 import re
 
 
@@ -26,6 +27,15 @@ class BlickIE(InfoExtractor):
             'thumbnail': 'http://f.blick.ch/img/incoming/crop5066860/5146024130-csquare-w300-h300/Bildschirmfoto-2016-05-23-um-14.jpg',
             'description': 'Da ist Papa Heinz mächtig stolz. Seine Tochter Alessandra Günthardt ist für einen schwedischen Musik-Preis unter den drei Nominierten. Die Abstimmung läuft noch bis 7. Juni.'
         }
+    }, {
+        'url': 'http://www.blick.ch/sport/fussball/superleague/totomat-fehler-in-sion-fcz-buff-stinksauer-wegen-falschem-lugano-resultat-id5063421.html',
+        'info_dict': {
+            'id': '5063421',
+            'ext': 'mp4',
+            'title': 'totomat-fehler-in-sion-fcz-buff-stinksauer-wegen-falschem-lugano-resultat',
+            'thumbnail': 'http://f.blick.ch/img/incoming/crop5063475/820602933-csquare-w300-h300/Bildschirmfoto-2016-05-22-um-19.jpg',
+            'description': 'Der FC Zürich bleibt das Schlusslicht der Raiffeisen Super League. Einen dicken Hals bekommen Buff und Co. aber wegen einer falschen Resultatanzeige aus dem Ländle.',
+        }
     }]
 
     def _real_extract(self, url):
@@ -33,10 +43,27 @@ class BlickIE(InfoExtractor):
         video_id = mobj.group('id')
         webpage = self._download_webpage(url, video_id)
 
-        found_videos_og = re.findall(r'<meta.*?property="og:video".*?content="(.*?)"', webpage)
-        found_videos_ogs = re.findall(r'<meta.*?property="og:video".*?content="(.*?)"', webpage)
-        found_videos_meta = re.findall(r'<meta.*?itemprop="contentURL".*?content="(.*?)"', webpage)
-        found_videos = found_videos_og + found_videos_ogs + found_videos_meta
+        found_videos = []
+        regex_og = self._og_regexes('video')
+        regex_ogs = self._og_regexes('video:secure_url')
+        try:
+            video_og = self._html_search_regex(regex_og, webpage, name=None)
+            found_videos.append(video_og)
+        except RegexNotFoundError:
+            pass
+        try:
+            video_ogs = self._html_search_regex(regex_ogs, webpage, name=None)
+            if video_ogs not in found_videos:
+                found_videos.append(video_ogs)
+        except RegexNotFoundError:
+            pass
+        try:
+            video_meta = self._html_search_meta('contentURL', webpage)
+            if video_meta not in found_videos:
+                found_videos.append(video_meta)
+        except RegexNotFoundError:
+            pass
+
         video_url = ''
         for video in found_videos:
             if re.match(r'.*detect\.mp4', video):

From c2bec2b0d80e2a47f06417eff9f2a6dfcf8e13ea Mon Sep 17 00:00:00 2001
From: Alex Seiler <seileralex@gmail.com>
Date: Wed, 25 May 2016 15:45:35 +0200
Subject: [PATCH 3/5] [blick] Use default=False argument instead of catching
 possible RegexNotFoundError.

---
 youtube_dl/extractor/blick.py | 24 ++++++------------------
 1 file changed, 6 insertions(+), 18 deletions(-)

diff --git a/youtube_dl/extractor/blick.py b/youtube_dl/extractor/blick.py
index c5a1a8442..5f9837cfe 100644
--- a/youtube_dl/extractor/blick.py
+++ b/youtube_dl/extractor/blick.py
@@ -2,7 +2,6 @@
 from __future__ import unicode_literals
 
 from .common import InfoExtractor
-from ..utils import RegexNotFoundError
 import re
 
 
@@ -46,23 +45,12 @@ class BlickIE(InfoExtractor):
         found_videos = []
         regex_og = self._og_regexes('video')
         regex_ogs = self._og_regexes('video:secure_url')
-        try:
-            video_og = self._html_search_regex(regex_og, webpage, name=None)
-            found_videos.append(video_og)
-        except RegexNotFoundError:
-            pass
-        try:
-            video_ogs = self._html_search_regex(regex_ogs, webpage, name=None)
-            if video_ogs not in found_videos:
-                found_videos.append(video_ogs)
-        except RegexNotFoundError:
-            pass
-        try:
-            video_meta = self._html_search_meta('contentURL', webpage)
-            if video_meta not in found_videos:
-                found_videos.append(video_meta)
-        except RegexNotFoundError:
-            pass
+        video_og = self._html_search_regex(regex_og, webpage, name=None, default=None, fatal=False)
+        video_ogs = self._html_search_regex(regex_ogs, webpage, name=None, default=None, fatal=False)
+        video_meta = self._html_search_meta('contentURL', webpage, fatal=False, default=None)
+        for elem in [video_og, video_ogs, video_meta]:
+            if elem:
+                found_videos.append(elem)
 
         video_url = ''
         for video in found_videos:

From ab193cf18a6df418bed706c4b0a4920e011f691d Mon Sep 17 00:00:00 2001
From: Alex Seiler <seileralex@gmail.com>
Date: Fri, 27 May 2016 00:30:00 +0200
Subject: [PATCH 4/5] [blick] Did some suggested changes.

 - Use _match_id to get the video_id
 - Extract the video title from the webpage instead of the url
 - Removed unnecessary "if entry_info_dict.get('formats'):" check
---
 youtube_dl/extractor/blick.py | 66 ++++++++++++++++-------------------
 1 file changed, 30 insertions(+), 36 deletions(-)

diff --git a/youtube_dl/extractor/blick.py b/youtube_dl/extractor/blick.py
index 5f9837cfe..d57ed4f75 100644
--- a/youtube_dl/extractor/blick.py
+++ b/youtube_dl/extractor/blick.py
@@ -13,7 +13,7 @@ class BlickIE(InfoExtractor):
         'info_dict': {
             'id': '5070813',
             'ext': 'mp4',
-            'title': 'uli-forte-vor-dem-abstiegs-showdown-ich-gehe-davon-aus-dass-der-fussball-gott-fcz-fan-ist',
+            'title': 'Uli Forte vor dem Abstiegs-Showdown: «Ich gehe davon aus, dass der Fussball-Gott FCZ-Fan ist»',
             'thumbnail': 'http://blick.simplex.tv/content/51/52/70062/simvid_1.jpg',
             'description': 'Am Mittwochabend entscheidet sich, ob der FCZ oder der FC Lugano aus der Super League absteigt. Uli Forte schwört dabei auf den Fussball-Gott und zündet in der Kirche eine Kerze an.'
         }
@@ -22,7 +22,7 @@ class BlickIE(InfoExtractor):
         'info_dict': {
             'id': '5066863',
             'ext': 'mp4',
-            'title': 'nominiert-fuer-musik-preis-in-schweden-so-toll-singt-guenthardts-tochter-alessandra',
+            'title': 'Nominiert für Musik-Preis in Schweden: So toll singt Günthardts Tochter Alessandra',
             'thumbnail': 'http://f.blick.ch/img/incoming/crop5066860/5146024130-csquare-w300-h300/Bildschirmfoto-2016-05-23-um-14.jpg',
             'description': 'Da ist Papa Heinz mächtig stolz. Seine Tochter Alessandra Günthardt ist für einen schwedischen Musik-Preis unter den drei Nominierten. Die Abstimmung läuft noch bis 7. Juni.'
         }
@@ -31,22 +31,21 @@ class BlickIE(InfoExtractor):
         'info_dict': {
             'id': '5063421',
             'ext': 'mp4',
-            'title': 'totomat-fehler-in-sion-fcz-buff-stinksauer-wegen-falschem-lugano-resultat',
+            'title': 'Totomat-Fehler in Sion! FCZ-Buff stinksauer wegen falschem Lugano-Resultat',
             'thumbnail': 'http://f.blick.ch/img/incoming/crop5063475/820602933-csquare-w300-h300/Bildschirmfoto-2016-05-22-um-19.jpg',
             'description': 'Der FC Zürich bleibt das Schlusslicht der Raiffeisen Super League. Einen dicken Hals bekommen Buff und Co. aber wegen einer falschen Resultatanzeige aus dem Ländle.',
         }
     }]
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
+        video_id = self._match_id(url)
         webpage = self._download_webpage(url, video_id)
 
         found_videos = []
         regex_og = self._og_regexes('video')
         regex_ogs = self._og_regexes('video:secure_url')
-        video_og = self._html_search_regex(regex_og, webpage, name=None, default=None, fatal=False)
-        video_ogs = self._html_search_regex(regex_ogs, webpage, name=None, default=None, fatal=False)
+        video_og = self._html_search_regex(regex_og, webpage, name=None, default=None)
+        video_ogs = self._html_search_regex(regex_ogs, webpage, name=None, default=None)
         video_meta = self._html_search_meta('contentURL', webpage, fatal=False, default=None)
         for elem in [video_og, video_ogs, video_meta]:
             if elem:
@@ -66,11 +65,7 @@ class BlickIE(InfoExtractor):
         if not video_url:
             return []
 
-        video_title = str(url)
-        b_ind = video_title.rfind('/') + 1
-        e_ind = video_title.rfind('-id')
-        video_title = video_title[b_ind:e_ind]
-
+        video_title = self._og_search_title(webpage)
         video_description = self._og_search_description(webpage)
         thumbnail = self._og_search_thumbnail(webpage)
 
@@ -87,32 +82,31 @@ class BlickIE(InfoExtractor):
             ext='mp4',
             entry_protocol='m3u8_native')
 
-        if entry_info_dict.get('formats'):
-            self._sort_formats(entry_info_dict['formats'])
+        self._sort_formats(entry_info_dict['formats'])
 
-            # Remove entries containing a url to an index.m3u8 file
-            cleaned_formats = [x for x in entry_info_dict['formats'] if x.get('format_id') != 'meta']
-            entry_info_dict['formats'] = cleaned_formats
+        # Remove entries containing a url to an index.m3u8 file
+        cleaned_formats = [x for x in entry_info_dict['formats'] if x.get('format_id') != 'meta']
+        entry_info_dict['formats'] = cleaned_formats
 
-            duration_found = False
-            duration = None
-            attr = ''
-            for elem in entry_info_dict.get('formats'):
-                if not duration_found:
-                    duration = self.calculateDuration(elem['url'], video_id)
-                    duration_found = True if duration else False
-                tbr = elem.get('tbr')
-                try:
-                    attr = ''
-                    if tbr < 1000:
-                        attr = 'lq'
-                    elif tbr >= 1000 and tbr < 2000:
-                        attr = 'sq'
-                    elif tbr >= 2000:
-                        attr = 'hq'
-                except TypeError:
-                    attr = 'un'
-                elem['format_id'] = attr + '-' + str(tbr)
+        duration_found = False
+        duration = None
+        attr = ''
+        for elem in entry_info_dict.get('formats'):
+            if not duration_found:
+                duration = self.calculateDuration(elem['url'], video_id)
+                duration_found = True if duration else False
+            tbr = elem.get('tbr')
+            try:
+                attr = ''
+                if tbr < 1000:
+                    attr = 'lq'
+                elif tbr >= 1000 and tbr < 2000:
+                    attr = 'sq'
+                elif tbr >= 2000:
+                    attr = 'hq'
+            except TypeError:
+                attr = 'un'
+            elem['format_id'] = attr + '-' + str(tbr)
         entry_info_dict['duration'] = duration
         return entry_info_dict
 

From 57f9aafeefba3e58e2765f67de5a166d124adca4 Mon Sep 17 00:00:00 2001
From: Alex Seiler <seileralex@gmail.com>
Date: Fri, 27 May 2016 00:38:59 +0200
Subject: [PATCH 5/5] [blick] Do not remove the 'formats' entry, which contains
 a .m3u8 file for multiple version of the video.

---
 youtube_dl/extractor/blick.py | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/youtube_dl/extractor/blick.py b/youtube_dl/extractor/blick.py
index d57ed4f75..566b3969c 100644
--- a/youtube_dl/extractor/blick.py
+++ b/youtube_dl/extractor/blick.py
@@ -84,10 +84,6 @@ class BlickIE(InfoExtractor):
 
         self._sort_formats(entry_info_dict['formats'])
 
-        # Remove entries containing a url to an index.m3u8 file
-        cleaned_formats = [x for x in entry_info_dict['formats'] if x.get('format_id') != 'meta']
-        entry_info_dict['formats'] = cleaned_formats
-
         duration_found = False
         duration = None
         attr = ''