From 7a5773090789bec38a3f58dfb09039155919a540 Mon Sep 17 00:00:00 2001
From: rrooij <rrooij@users.noreply.github.com>
Date: Sun, 9 Jul 2017 09:21:40 +0200
Subject: [PATCH 01/22] [npo:live] Fix live stream id extraction (closes
 #13568)

---
 youtube_dl/extractor/npo.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/youtube_dl/extractor/npo.py b/youtube_dl/extractor/npo.py
index 5f8b6def1..516b1e941 100644
--- a/youtube_dl/extractor/npo.py
+++ b/youtube_dl/extractor/npo.py
@@ -341,7 +341,7 @@ class NPOLiveIE(NPOBaseIE):
         webpage = self._download_webpage(url, display_id)
 
         live_id = self._search_regex(
-            r'data-prid="([^"]+)"', webpage, 'live id')
+            [r'media-id="([^"]+)"', r'data-prid="([^"]+)"'], webpage, 'live id')
 
         return {
             '_type': 'url_transparent',

From 15237fcd51dca192103f08a910660616e3b241b8 Mon Sep 17 00:00:00 2001
From: mlindner <mlindner@users.noreply.github.com>
Date: Sun, 9 Jul 2017 00:54:52 -0700
Subject: [PATCH 02/22] [veoh] Extend _VALID_URL

---
 youtube_dl/extractor/veoh.py | 73 ++++++++++++++++++------------------
 1 file changed, 36 insertions(+), 37 deletions(-)

diff --git a/youtube_dl/extractor/veoh.py b/youtube_dl/extractor/veoh.py
index 0f5d68738..b20dddc5c 100644
--- a/youtube_dl/extractor/veoh.py
+++ b/youtube_dl/extractor/veoh.py
@@ -12,47 +12,46 @@ from ..utils import (
 
 
 class VeohIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?veoh\.com/(?:watch|iphone/#_Watch)/(?P<id>(?:v|yapi-)[\da-zA-Z]+)'
+    _VALID_URL = r'https?://(?:www\.)?veoh\.com/(?:watch|iphone/#_Watch)/(?P<id>(?:v|e|yapi-)[\da-zA-Z]+)'
 
-    _TESTS = [
-        {
-            'url': 'http://www.veoh.com/watch/v56314296nk7Zdmz3',
-            'md5': '620e68e6a3cff80086df3348426c9ca3',
-            'info_dict': {
-                'id': '56314296',
-                'ext': 'mp4',
-                'title': 'Straight Backs Are Stronger',
-                'uploader': 'LUMOback',
-                'description': 'At LUMOback, we believe straight backs are stronger.  The LUMOback Posture & Movement Sensor:  It gently vibrates when you slouch, inspiring improved posture and mobility.  Use the app to track your data and improve your posture over time. ',
-            },
+    _TESTS = [{
+        'url': 'http://www.veoh.com/watch/v56314296nk7Zdmz3',
+        'md5': '620e68e6a3cff80086df3348426c9ca3',
+        'info_dict': {
+            'id': '56314296',
+            'ext': 'mp4',
+            'title': 'Straight Backs Are Stronger',
+            'uploader': 'LUMOback',
+            'description': 'At LUMOback, we believe straight backs are stronger.  The LUMOback Posture & Movement Sensor:  It gently vibrates when you slouch, inspiring improved posture and mobility.  Use the app to track your data and improve your posture over time. ',
         },
-        {
-            'url': 'http://www.veoh.com/watch/v27701988pbTc4wzN?h1=Chile+workers+cover+up+to+avoid+skin+damage',
-            'md5': '4a6ff84b87d536a6a71e6aa6c0ad07fa',
-            'info_dict': {
-                'id': '27701988',
-                'ext': 'mp4',
-                'title': 'Chile workers cover up to avoid skin damage',
-                'description': 'md5:2bd151625a60a32822873efc246ba20d',
-                'uploader': 'afp-news',
-                'duration': 123,
-            },
-            'skip': 'This video has been deleted.',
+    }, {
+        'url': 'http://www.veoh.com/watch/v27701988pbTc4wzN?h1=Chile+workers+cover+up+to+avoid+skin+damage',
+        'md5': '4a6ff84b87d536a6a71e6aa6c0ad07fa',
+        'info_dict': {
+            'id': '27701988',
+            'ext': 'mp4',
+            'title': 'Chile workers cover up to avoid skin damage',
+            'description': 'md5:2bd151625a60a32822873efc246ba20d',
+            'uploader': 'afp-news',
+            'duration': 123,
         },
-        {
-            'url': 'http://www.veoh.com/watch/v69525809F6Nc4frX',
-            'md5': '4fde7b9e33577bab2f2f8f260e30e979',
-            'note': 'Embedded ooyala video',
-            'info_dict': {
-                'id': '69525809',
-                'ext': 'mp4',
-                'title': 'Doctors Alter Plan For Preteen\'s Weight Loss Surgery',
-                'description': 'md5:f5a11c51f8fb51d2315bca0937526891',
-                'uploader': 'newsy-videos',
-            },
-            'skip': 'This video has been deleted.',
+        'skip': 'This video has been deleted.',
+    }, {
+        'url': 'http://www.veoh.com/watch/v69525809F6Nc4frX',
+        'md5': '4fde7b9e33577bab2f2f8f260e30e979',
+        'note': 'Embedded ooyala video',
+        'info_dict': {
+            'id': '69525809',
+            'ext': 'mp4',
+            'title': 'Doctors Alter Plan For Preteen\'s Weight Loss Surgery',
+            'description': 'md5:f5a11c51f8fb51d2315bca0937526891',
+            'uploader': 'newsy-videos',
         },
-    ]
+        'skip': 'This video has been deleted.',
+    }, {
+        'url': 'http://www.veoh.com/watch/e152215AJxZktGS',
+        'only_matching': True,
+    }]
 
     def _extract_formats(self, source):
         formats = []

From 5af2fd7fa02734c2a23f917fb60f1c14da149d3d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com>
Date: Sun, 9 Jul 2017 15:55:04 +0700
Subject: [PATCH 03/22] [eagleplatform] Add support for another embed pattern
 (#13557)

---
 youtube_dl/extractor/eagleplatform.py | 36 ++++++++++++++++++++++-----
 1 file changed, 30 insertions(+), 6 deletions(-)

diff --git a/youtube_dl/extractor/eagleplatform.py b/youtube_dl/extractor/eagleplatform.py
index 76d39adac..5e1de04a1 100644
--- a/youtube_dl/extractor/eagleplatform.py
+++ b/youtube_dl/extractor/eagleplatform.py
@@ -60,16 +60,40 @@ class EaglePlatformIE(InfoExtractor):
             webpage)
         if mobj is not None:
             return mobj.group('url')
-        # Basic usage embedding (see http://dultonmedia.github.io/eplayer/)
+        PLAYER_JS_RE = r'''
+                        <script[^>]+
+                            src=(?P<qjs>["\'])(?:https?:)?//(?P<host>(?:(?!(?P=qjs)).)+\.media\.eagleplatform\.com)/player/player\.js(?P=qjs)
+                        .+?
+                    '''
+        # "Basic usage" embedding (see http://dultonmedia.github.io/eplayer/)
         mobj = re.search(
             r'''(?xs)
-                    <script[^>]+
-                        src=(?P<q1>["\'])(?:https?:)?//(?P<host>.+?\.media\.eagleplatform\.com)/player/player\.js(?P=q1)
-                    .+?
+                    %s
                     <div[^>]+
-                        class=(?P<q2>["\'])eagleplayer(?P=q2)[^>]+
+                        class=(?P<qclass>["\'])eagleplayer(?P=qclass)[^>]+
                         data-id=["\'](?P<id>\d+)
-            ''', webpage)
+            ''' % PLAYER_JS_RE, webpage)
+        if mobj is not None:
+            return 'eagleplatform:%(host)s:%(id)s' % mobj.groupdict()
+        # Generalization of "Javascript code usage", "Combined usage" and
+        # "Usage without attaching to DOM" embeddings (see
+        # http://dultonmedia.github.io/eplayer/)
+        mobj = re.search(
+            r'''(?xs)
+                    %s
+                    <script>
+                    .+?
+                    new\s+EaglePlayer\(
+                        (?:[^,]+\s*,\s*)?
+                        {
+                            .+?
+                            \bid\s*:\s*["\']?(?P<id>\d+)
+                            .+?
+                        }
+                    \s*\)
+                    .+?
+                    </script>
+            ''' % PLAYER_JS_RE, webpage)
         if mobj is not None:
             return 'eagleplatform:%(host)s:%(id)s' % mobj.groupdict()
 

From 665e9452461abaff7127653265c78bd585acea6c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com>
Date: Sun, 9 Jul 2017 15:57:33 +0700
Subject: [PATCH 04/22] [eagleplatform] Add support for referrer protected
 videos (closes #13557)

---
 youtube_dl/extractor/eagleplatform.py | 25 ++++++++++++++++++++++---
 youtube_dl/extractor/generic.py       | 10 +++++-----
 2 files changed, 27 insertions(+), 8 deletions(-)

diff --git a/youtube_dl/extractor/eagleplatform.py b/youtube_dl/extractor/eagleplatform.py
index 5e1de04a1..34891a362 100644
--- a/youtube_dl/extractor/eagleplatform.py
+++ b/youtube_dl/extractor/eagleplatform.py
@@ -11,6 +11,7 @@ from ..compat import (
 from ..utils import (
     ExtractorError,
     int_or_none,
+    unsmuggle_url,
 )
 
 
@@ -50,6 +51,10 @@ class EaglePlatformIE(InfoExtractor):
             'view_count': int,
         },
         'skip': 'Georestricted',
+    }, {
+        # referrer protected video (https://tvrain.ru/lite/teleshow/kak_vse_nachinalos/namin-418921/)
+        'url': 'tvrainru.media.eagleplatform.com:582306',
+        'only_matching': True,
     }]
 
     @staticmethod
@@ -103,9 +108,10 @@ class EaglePlatformIE(InfoExtractor):
         if status != 200:
             raise ExtractorError(' '.join(response['errors']), expected=True)
 
-    def _download_json(self, url_or_request, video_id, note='Downloading JSON metadata', *args, **kwargs):
+    def _download_json(self, url_or_request, video_id, *args, **kwargs):
         try:
-            response = super(EaglePlatformIE, self)._download_json(url_or_request, video_id, note)
+            response = super(EaglePlatformIE, self)._download_json(
+                url_or_request, video_id, *args, **kwargs)
         except ExtractorError as ee:
             if isinstance(ee.cause, compat_HTTPError):
                 response = self._parse_json(ee.cause.read().decode('utf-8'), video_id)
@@ -117,11 +123,24 @@ class EaglePlatformIE(InfoExtractor):
         return self._download_json(url_or_request, video_id, note)['data'][0]
 
     def _real_extract(self, url):
+        url, smuggled_data = unsmuggle_url(url, {})
+
         mobj = re.match(self._VALID_URL, url)
         host, video_id = mobj.group('custom_host') or mobj.group('host'), mobj.group('id')
 
+        headers = {}
+        query = {
+            'id': video_id,
+        }
+
+        referrer = smuggled_data.get('referrer')
+        if referrer:
+            headers['Referer'] = referrer
+            query['referrer'] = referrer
+
         player_data = self._download_json(
-            'http://%s/api/player_data?id=%s' % (host, video_id), video_id)
+            'http://%s/api/player_data' % host, video_id,
+            headers=headers, query=query)
 
         media = player_data['data']['playlist']['viewports'][0]['medialist'][0]
 
diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py
index f9bff433c..7232f39db 100644
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@@ -1185,7 +1185,7 @@ class GenericIE(InfoExtractor):
             },
             'add_ie': ['Kaltura'],
         },
-        # Eagle.Platform embed (generic URL)
+        # EaglePlatform embed (generic URL)
         {
             'url': 'http://lenta.ru/news/2015/03/06/navalny/',
             # Not checking MD5 as sometimes the direct HTTP link results in 404 and HLS is used
@@ -1200,7 +1200,7 @@ class GenericIE(InfoExtractor):
                 'age_limit': 0,
             },
         },
-        # ClipYou (Eagle.Platform) embed (custom URL)
+        # ClipYou (EaglePlatform) embed (custom URL)
         {
             'url': 'http://muz-tv.ru/play/7129/',
             # Not checking MD5 as sometimes the direct HTTP link results in 404 and HLS is used
@@ -2443,12 +2443,12 @@ class GenericIE(InfoExtractor):
         if kaltura_url:
             return self.url_result(smuggle_url(kaltura_url, {'source_url': url}), KalturaIE.ie_key())
 
-        # Look for Eagle.Platform embeds
+        # Look for EaglePlatform embeds
         eagleplatform_url = EaglePlatformIE._extract_url(webpage)
         if eagleplatform_url:
-            return self.url_result(eagleplatform_url, EaglePlatformIE.ie_key())
+            return self.url_result(smuggle_url(eagleplatform_url, {'referrer': url}), EaglePlatformIE.ie_key())
 
-        # Look for ClipYou (uses Eagle.Platform) embeds
+        # Look for ClipYou (uses EaglePlatform) embeds
         mobj = re.search(
             r'<iframe[^>]+src="https?://(?P<host>media\.clipyou\.ru)/index/player\?.*\brecord_id=(?P<id>\d+).*"', webpage)
         if mobj is not None:

From 250b042c7e71a6e8bbff534aa41c2b92dae1acf7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com>
Date: Sun, 9 Jul 2017 16:02:38 +0700
Subject: [PATCH 05/22] [generic] Add tests for #13557

---
 youtube_dl/extractor/generic.py | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py
index 7232f39db..95c38698d 100644
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@@ -1199,6 +1199,24 @@ class GenericIE(InfoExtractor):
                 'view_count': int,
                 'age_limit': 0,
             },
+            'params': {
+                'skip_download': True,
+            },
+        },
+        # referrer protected EaglePlatform embed
+        {
+            'url': 'https://tvrain.ru/lite/teleshow/kak_vse_nachinalos/namin-418921/',
+            'info_dict': {
+                'id': '582306',
+                'ext': 'mp4',
+                'title': 'Стас Намин: «Мы нарушили девственность Кремля»',
+                'thumbnail': r're:^https?://.*\.jpg$',
+                'duration': 3382,
+                'view_count': int,
+            },
+            'params': {
+                'skip_download': True,
+            },
         },
         # ClipYou (EaglePlatform) embed (custom URL)
         {
@@ -1212,6 +1230,9 @@ class GenericIE(InfoExtractor):
                 'duration': 216,
                 'view_count': int,
             },
+            'params': {
+                'skip_download': True,
+            },
         },
         # Pladform embed
         {

From 4328ddf82b812420ffc120b4150251f751bff08c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com>
Date: Sun, 9 Jul 2017 16:29:52 +0700
Subject: [PATCH 06/22] [extractor/common] Add support for AMP tags in
 _parse_html5_media_entries

---
 youtube_dl/extractor/common.py  |  7 +++++--
 youtube_dl/extractor/generic.py | 10 ++++++++++
 2 files changed, 15 insertions(+), 2 deletions(-)

diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index afeb4c5da..daa10885f 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -2132,15 +2132,18 @@ class InfoExtractor(object):
             return is_plain_url, formats
 
         entries = []
+        # amp-video and amp-audio are very similar to their HTML5 counterparts
+        # so we wll include them right here (see
+        # https://www.ampproject.org/docs/reference/components/amp-video)
         media_tags = [(media_tag, media_type, '')
                       for media_tag, media_type
-                      in re.findall(r'(?s)(<(video|audio)[^>]*/>)', webpage)]
+                      in re.findall(r'(?s)(<(?:amp-)?(video|audio)[^>]*/>)', webpage)]
         media_tags.extend(re.findall(
             # We only allow video|audio followed by a whitespace or '>'.
             # Allowing more characters may end up in significant slow down (see
             # https://github.com/rg3/youtube-dl/issues/11979, example URL:
             # http://www.porntrex.com/maps/videositemap.xml).
-            r'(?s)(<(?P<tag>video|audio)(?:\s+[^>]*)?>)(.*?)</(?P=tag)>', webpage))
+            r'(?s)(<(?P<tag>(?:amp-)?(?:video|audio))(?:\s+[^>]*)?>)(.*?)</(?P=tag)>', webpage))
         for media_tag, media_type, media_content in media_tags:
             media_info = {
                 'formats': [],
diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py
index 95c38698d..919f4f987 100644
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@@ -1770,6 +1770,16 @@ class GenericIE(InfoExtractor):
             },
             'add_ie': [MediasetIE.ie_key()],
         },
+        {
+            # AMP embed (see https://www.ampproject.org/docs/reference/components/amp-video)
+            'url': 'https://tvrain.ru/amp/418921/',
+            'md5': 'cc00413936695987e8de148b67d14f1d',
+            'info_dict': {
+                'id': '418921',
+                'ext': 'mp4',
+                'title': 'Стас Намин: «Мы нарушили девственность Кремля»',
+            },
+        },
         # {
         #     # TODO: find another test
         #     # http://schema.org/VideoObject

From d2b9f362fabad8f9490825456d8ed679d7159271 Mon Sep 17 00:00:00 2001
From: Christopher Smith <ctsmi@ucalgary.ca>
Date: Thu, 29 Jun 2017 13:10:45 -0600
Subject: [PATCH 07/22] [cjsw] Add extractor

---
 youtube_dl/extractor/cjsw.py       | 41 ++++++++++++++++++++++++++++++
 youtube_dl/extractor/extractors.py |  1 +
 2 files changed, 42 insertions(+)
 create mode 100644 youtube_dl/extractor/cjsw.py

diff --git a/youtube_dl/extractor/cjsw.py b/youtube_dl/extractor/cjsw.py
new file mode 100644
index 000000000..087cac9bc
--- /dev/null
+++ b/youtube_dl/extractor/cjsw.py
@@ -0,0 +1,41 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+
+
+class CJSWIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?cjsw\.com/program/\S+/(?P<id>[0-9]+)'
+    IE_NAME = 'cjsw'
+    _TEST = {
+        'url': 'http://cjsw.com/program/freshly-squeezed/episode/20170620',
+        'md5': 'cee14d40f1e9433632c56e3d14977120',
+        'info_dict': {
+            'id': '20170620',
+            'ext': 'mp3',
+            'title': 'Freshly Squeezed',
+            'description': 'Sled Island artists featured // Live session with Phi Pho, followed by a live session with Sinzere & The Late Nights! // Stay Fresh Y\'all!!',
+        }
+    }
+
+    def _real_extract(self, url):
+        episode_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, episode_id)
+
+        title = self._search_regex(
+            r'<button[^>]+data-showname=(["\'])(?P<title>(?!\1).+?)\1[^>]*>', webpage, 'title', group='title')
+        description = self._html_search_regex(
+            r'<p>(?P<description>.+?)</p>', webpage, 'description', fatal=False)
+        formats = [{
+            'url': self._search_regex(
+                r'<button[^>]+data-audio-src=(["\'])(?P<audio_url>(?!\1).+?)\1[^>]*>', webpage, 'audio_url', group='audio_url'),
+            'ext': 'mp3',
+            'vcodec': 'none',
+        }]
+        return {
+            'id': episode_id,
+            'title': title,
+            'description': description,
+            'formats': formats,
+        }
diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py
index b83c3aba5..4524fa687 100644
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@@ -185,6 +185,7 @@ from .chirbit import (
     ChirbitProfileIE,
 )
 from .cinchcast import CinchcastIE
+from .cjsw import CJSWIE
 from .clipfish import ClipfishIE
 from .cliphunter import CliphunterIE
 from .cliprs import ClipRsIE

From c319d1c4833f89df818fe39f4c99cdc5c9a8bf01 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com>
Date: Sun, 9 Jul 2017 17:00:45 +0700
Subject: [PATCH 08/22] [csjw] Fix issues and improve extraction (closes
 #13525)

---
 youtube_dl/extractor/cjsw.py | 57 ++++++++++++++++++++++++++----------
 1 file changed, 41 insertions(+), 16 deletions(-)

diff --git a/youtube_dl/extractor/cjsw.py b/youtube_dl/extractor/cjsw.py
index 087cac9bc..aab6ea535 100644
--- a/youtube_dl/extractor/cjsw.py
+++ b/youtube_dl/extractor/cjsw.py
@@ -1,41 +1,66 @@
-# coding: utf-8
 from __future__ import unicode_literals
 
+import re
+
 from .common import InfoExtractor
+from ..utils import (
+    determine_ext,
+    unescapeHTML,
+)
 
 
 class CJSWIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?cjsw\.com/program/\S+/(?P<id>[0-9]+)'
-    IE_NAME = 'cjsw'
+    _VALID_URL = r'https?://(?:www\.)?cjsw\.com/program/(?P<program>[^/]+)/episode/(?P<id>\d+)'
     _TEST = {
         'url': 'http://cjsw.com/program/freshly-squeezed/episode/20170620',
         'md5': 'cee14d40f1e9433632c56e3d14977120',
         'info_dict': {
-            'id': '20170620',
+            'id': '91d9f016-a2e7-46c5-8dcb-7cbcd7437c41',
             'ext': 'mp3',
-            'title': 'Freshly Squeezed',
-            'description': 'Sled Island artists featured // Live session with Phi Pho, followed by a live session with Sinzere & The Late Nights! // Stay Fresh Y\'all!!',
-        }
+            'title': 'Freshly Squeezed – Episode June 20, 2017',
+            'description': 'md5:c967d63366c3898a80d0c7b0ff337202',
+            'series': 'Freshly Squeezed',
+            'episode_id': '20170620',
+        },
     }
 
     def _real_extract(self, url):
-        episode_id = self._match_id(url)
+        mobj = re.match(self._VALID_URL, url)
+        program, episode_id = mobj.group('program', 'id')
+        audio_id = '%s/%s' % (program, episode_id)
 
         webpage = self._download_webpage(url, episode_id)
 
-        title = self._search_regex(
-            r'<button[^>]+data-showname=(["\'])(?P<title>(?!\1).+?)\1[^>]*>', webpage, 'title', group='title')
-        description = self._html_search_regex(
-            r'<p>(?P<description>.+?)</p>', webpage, 'description', fatal=False)
+        title = unescapeHTML(self._search_regex(
+            (r'<h1[^>]+class=["\']episode-header__title["\'][^>]*>(?P<title>[^<]+)',
+             r'data-audio-title=(["\'])(?P<title>(?:(?!\1).)+)\1'),
+            webpage, 'title', group='title'))
+
+        audio_url = self._search_regex(
+            r'<button[^>]+data-audio-src=(["\'])(?P<url>(?:(?!\1).)+)\1',
+            webpage, 'audio url', group='url')
+
+        audio_id = self._search_regex(
+            r'/([\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})\.mp3',
+            audio_url, 'audio id', default=audio_id)
+
         formats = [{
-            'url': self._search_regex(
-                r'<button[^>]+data-audio-src=(["\'])(?P<audio_url>(?!\1).+?)\1[^>]*>', webpage, 'audio_url', group='audio_url'),
-            'ext': 'mp3',
+            'url': audio_url,
+            'ext': determine_ext(audio_url, 'mp3'),
             'vcodec': 'none',
         }]
+
+        description = self._html_search_regex(
+            r'<p>(?P<description>.+?)</p>', webpage, 'description', fatal=False)
+        series = self._search_regex(
+            r'data-showname=(["\'])(?P<name>(?:(?!\1).)+)\1', webpage,
+            'series', default=program, group='name')
+
         return {
-            'id': episode_id,
+            'id': audio_id,
             'title': title,
             'description': description,
             'formats': formats,
+            'series': series,
+            'episode_id': episode_id,
         }

From 0d2f0b0357325823782884327a158aeccf4f9b49 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com>
Date: Sun, 9 Jul 2017 17:05:11 +0700
Subject: [PATCH 09/22] [csjw] Make description optional

---
 youtube_dl/extractor/cjsw.py | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/youtube_dl/extractor/cjsw.py b/youtube_dl/extractor/cjsw.py
index aab6ea535..dd271586f 100644
--- a/youtube_dl/extractor/cjsw.py
+++ b/youtube_dl/extractor/cjsw.py
@@ -11,7 +11,7 @@ from ..utils import (
 
 class CJSWIE(InfoExtractor):
     _VALID_URL = r'https?://(?:www\.)?cjsw\.com/program/(?P<program>[^/]+)/episode/(?P<id>\d+)'
-    _TEST = {
+    _TESTS = [{
         'url': 'http://cjsw.com/program/freshly-squeezed/episode/20170620',
         'md5': 'cee14d40f1e9433632c56e3d14977120',
         'info_dict': {
@@ -22,7 +22,11 @@ class CJSWIE(InfoExtractor):
             'series': 'Freshly Squeezed',
             'episode_id': '20170620',
         },
-    }
+    }, {
+        # no description
+        'url': 'http://cjsw.com/program/road-pops/episode/20170707/',
+        'only_matching': True,
+    }]
 
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
@@ -51,7 +55,8 @@ class CJSWIE(InfoExtractor):
         }]
 
         description = self._html_search_regex(
-            r'<p>(?P<description>.+?)</p>', webpage, 'description', fatal=False)
+            r'<p>(?P<description>.+?)</p>', webpage, 'description',
+            default=None)
         series = self._search_regex(
             r'data-showname=(["\'])(?P<name>(?:(?!\1).)+)\1', webpage,
             'series', default=program, group='name')

From a02682fd13ce5ba88d2508c90559eaa7f43b65d5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com>
Date: Sun, 9 Jul 2017 17:09:44 +0700
Subject: [PATCH 10/22] Keep in sync with ffmpeg's current malformed AAC
 bitstream wording (closes #13587)

---
 youtube_dl/YoutubeDL.py            | 4 ++--
 youtube_dl/postprocessor/ffmpeg.py | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py
index b3a6d4d3b..60ee4b7d8 100755
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -1890,7 +1890,7 @@ class YoutubeDL(object):
                         info_dict.get('protocol') == 'm3u8' and
                         self.params.get('hls_prefer_native')):
                     if fixup_policy == 'warn':
-                        self.report_warning('%s: malformated aac bitstream.' % (
+                        self.report_warning('%s: malformed AAC bitstream detected.' % (
                             info_dict['id']))
                     elif fixup_policy == 'detect_or_warn':
                         fixup_pp = FFmpegFixupM3u8PP(self)
@@ -1899,7 +1899,7 @@ class YoutubeDL(object):
                             info_dict['__postprocessors'].append(fixup_pp)
                         else:
                             self.report_warning(
-                                '%s: malformated aac bitstream. %s'
+                                '%s: malformed AAC bitstream detected. %s'
                                 % (info_dict['id'], INSTALL_FFMPEG_MESSAGE))
                     else:
                         assert fixup_policy in ('ignore', 'never')
diff --git a/youtube_dl/postprocessor/ffmpeg.py b/youtube_dl/postprocessor/ffmpeg.py
index f021ea8fd..51256a3fb 100644
--- a/youtube_dl/postprocessor/ffmpeg.py
+++ b/youtube_dl/postprocessor/ffmpeg.py
@@ -542,7 +542,7 @@ class FFmpegFixupM3u8PP(FFmpegPostProcessor):
             temp_filename = prepend_extension(filename, 'temp')
 
             options = ['-c', 'copy', '-f', 'mp4', '-bsf:a', 'aac_adtstoasc']
-            self._downloader.to_screen('[ffmpeg] Fixing malformated aac bitstream in "%s"' % filename)
+            self._downloader.to_screen('[ffmpeg] Fixing malformed AAC bitstream in "%s"' % filename)
             self.run_ffmpeg(filename, temp_filename, options)
 
             os.remove(encodeFilename(filename))

From ed84454d358f3cbfdc43dab31328b165f9c72c68 Mon Sep 17 00:00:00 2001
From: Santiago Calcagno <santicalcagno@gmail.com>
Date: Tue, 13 Jun 2017 12:32:04 -0300
Subject: [PATCH 11/22] [egghead:course] Fix extraction

---
 youtube_dl/extractor/egghead.py | 16 +++++++---------
 1 file changed, 7 insertions(+), 9 deletions(-)

diff --git a/youtube_dl/extractor/egghead.py b/youtube_dl/extractor/egghead.py
index db921465e..01fcdb6cf 100644
--- a/youtube_dl/extractor/egghead.py
+++ b/youtube_dl/extractor/egghead.py
@@ -1,8 +1,6 @@
 # coding: utf-8
 from __future__ import unicode_literals
 
-import re
-
 from .common import InfoExtractor
 
 
@@ -22,18 +20,18 @@ class EggheadCourseIE(InfoExtractor):
 
     def _real_extract(self, url):
         playlist_id = self._match_id(url)
-        webpage = self._download_webpage(url, playlist_id)
+        api_url = 'https://egghead.io/api/v1/series/' + playlist_id
+        course = self._download_json(api_url, playlist_id)
+        title = course.get('title')
+        description = course.get('description')
 
-        title = self._html_search_regex(r'<h1 class="title">([^<]+)</h1>', webpage, 'title')
-        ul = self._search_regex(r'(?s)<ul class="series-lessons-list">(.*?)</ul>', webpage, 'session list')
-
-        found = re.findall(r'(?s)<a class="[^"]*"\s*href="([^"]+)">\s*<li class="item', ul)
-        entries = [self.url_result(m) for m in found]
+        lessons = course.get('lessons')
+        entries = [{'_type': 'url', 'ie_key': 'Wistia', 'url': 'wistia:' + l.get('wistia_id')} for l in lessons]
 
         return {
             '_type': 'playlist',
             'id': playlist_id,
             'title': title,
-            'description': self._og_search_description(webpage),
+            'description': description,
             'entries': entries,
         }

From 485cb375766df8f2ef79b7fe2915ead4ef61a01e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com>
Date: Sun, 9 Jul 2017 17:28:42 +0700
Subject: [PATCH 12/22] [egghead:course] Improve (closes #13370)

---
 youtube_dl/extractor/egghead.py | 26 ++++++++++++--------------
 1 file changed, 12 insertions(+), 14 deletions(-)

diff --git a/youtube_dl/extractor/egghead.py b/youtube_dl/extractor/egghead.py
index 01fcdb6cf..c86f52319 100644
--- a/youtube_dl/extractor/egghead.py
+++ b/youtube_dl/extractor/egghead.py
@@ -7,7 +7,7 @@ from .common import InfoExtractor
 class EggheadCourseIE(InfoExtractor):
     IE_DESC = 'egghead.io course'
     IE_NAME = 'egghead:course'
-    _VALID_URL = r'https://egghead\.io/courses/(?P<id>[a-zA-Z_0-9-]+)'
+    _VALID_URL = r'https://egghead\.io/courses/(?P<id>[^/?#&]+)'
     _TEST = {
         'url': 'https://egghead.io/courses/professor-frisby-introduces-composable-functional-javascript',
         'playlist_count': 29,
@@ -20,18 +20,16 @@ class EggheadCourseIE(InfoExtractor):
 
     def _real_extract(self, url):
         playlist_id = self._match_id(url)
-        api_url = 'https://egghead.io/api/v1/series/' + playlist_id
-        course = self._download_json(api_url, playlist_id)
-        title = course.get('title')
-        description = course.get('description')
 
-        lessons = course.get('lessons')
-        entries = [{'_type': 'url', 'ie_key': 'Wistia', 'url': 'wistia:' + l.get('wistia_id')} for l in lessons]
+        course = self._download_json(
+            'https://egghead.io/api/v1/series/%s' % playlist_id, playlist_id)
 
-        return {
-            '_type': 'playlist',
-            'id': playlist_id,
-            'title': title,
-            'description': description,
-            'entries': entries,
-        }
+        entries = [
+            self.url_result(
+                'wistia:%s' % lesson['wistia_id'], ie='Wistia',
+                video_id=lesson['wistia_id'], video_title=lesson.get('title'))
+            for lesson in course['lessons'] if lesson.get('wistia_id')]
+
+        return self.playlist_result(
+            entries, playlist_id, course.get('title'),
+            course.get('description'))

From 58179eb7d96ebef26a0083e80a2022fab4ca1558 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com>
Date: Sun, 9 Jul 2017 17:55:40 +0700
Subject: [PATCH 13/22] [abc.net.au:iview] Extract more formats (closes #13492,
 closes #13489)

---
 youtube_dl/extractor/abc.py | 17 ++++++++++++++++-
 1 file changed, 16 insertions(+), 1 deletion(-)

diff --git a/youtube_dl/extractor/abc.py b/youtube_dl/extractor/abc.py
index 0247cabf9..60f753b95 100644
--- a/youtube_dl/extractor/abc.py
+++ b/youtube_dl/extractor/abc.py
@@ -3,11 +3,13 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
+from ..compat import compat_str
 from ..utils import (
     ExtractorError,
     js_to_json,
     int_or_none,
     parse_iso8601,
+    try_get,
 )
 
 
@@ -124,7 +126,20 @@ class ABCIViewIE(InfoExtractor):
         title = video_params.get('title') or video_params['seriesTitle']
         stream = next(s for s in video_params['playlist'] if s.get('type') == 'program')
 
-        formats = self._extract_akamai_formats(stream['hds-unmetered'], video_id)
+        format_urls = [
+            try_get(stream, lambda x: x['hds-unmetered'], compat_str)]
+
+        # May have higher quality video
+        sd_url = try_get(
+            stream, lambda x: x['streams']['hds']['sd'], compat_str)
+        if sd_url:
+            format_urls.append(sd_url.replace('metered', 'um'))
+
+        formats = []
+        for format_url in format_urls:
+            if format_url:
+                formats.extend(
+                    self._extract_akamai_formats(format_url, video_id))
         self._sort_formats(formats)
 
         subtitles = {}

From 256a746d21634eccad07a1e6dcafedcdf8b6181b Mon Sep 17 00:00:00 2001
From: luboss <lubos.katrinec@gmail.com>
Date: Fri, 2 Jun 2017 22:44:39 +0200
Subject: [PATCH 14/22] [joj] Add extractor

---
 youtube_dl/extractor/extractors.py |  1 +
 youtube_dl/extractor/joj.py        | 56 ++++++++++++++++++++++++++++++
 2 files changed, 57 insertions(+)
 create mode 100755 youtube_dl/extractor/joj.py

diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py
index 4524fa687..9ee080895 100644
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@@ -470,6 +470,7 @@ from .jamendo import (
 )
 from .jeuxvideo import JeuxVideoIE
 from .jove import JoveIE
+from .joj import JojIE
 from .jwplatform import JWPlatformIE
 from .jpopsukitv import JpopsukiIE
 from .kaltura import KalturaIE
diff --git a/youtube_dl/extractor/joj.py b/youtube_dl/extractor/joj.py
new file mode 100755
index 000000000..2ebfec902
--- /dev/null
+++ b/youtube_dl/extractor/joj.py
@@ -0,0 +1,56 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+import re
+
+
+class JojIE(InfoExtractor):
+    _VALID_URL = r'https?://[a-z0-9]+\.joj\.sk/([^/]+/)*(?P<title_query>(?P<release_date>[0-9]{4}(-[0-9]{2}){2}).*)' # noqa
+    _TESTS = [{
+        'url': 'https://www.joj.sk/nove-byvanie/archiv/2017-05-28-nove-byvanie', # noqa
+        'info_dict': {
+            'id': 'a388ec4c-6019-4a4a-9312-b1bee194e932',
+            'ext': 'mp4',
+            'title': 'Nové Bývanie',
+            'release_date': '20170528'
+        }
+    }, {
+        'url': 'http://nasi.joj.sk/epizody/2016-09-06-stari-rodicia',
+        'info_dict': {
+            'id': 'f18b2c5f-9ea8-4941-a164-a814c53306ad',
+            'ext': 'mp4',
+            'title': 'Starí Rodičia',
+            'release_date': '20160906'
+        }
+    }]
+
+    media_src_url = 'http://n16.joj.sk/storage/'
+    xml_source_url = 'https://media.joj.sk/services/Video.php?clip='
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        release_date = mobj.group('release_date').replace('-', '')
+        webpage = self._download_webpage(url, 'id')
+        video_id = self._html_search_regex(
+            r'https?://([a-z0-9]+\.)joj\.sk/embed/(?P<video_id>[a-f0-9\-]+)',
+            webpage, 'id', group='video_id')
+        xml_playlist_url = self.xml_source_url + video_id
+        xml_playlist_et = self._download_xml(xml_playlist_url, 'XML playlist')
+        formats = []
+        for file_el in xml_playlist_et.findall('files/file'):
+            try:
+                height = int(file_el.attrib['id'].replace('p', ''))
+            except ValueError:
+                height = 0
+            formats.append({'height': height,
+                            'url': self.media_src_url + file_el.attrib['path'].replace(  # noqa
+                                'dat/', '', 1)})
+        self._sort_formats(formats)
+
+        return {
+            'id': video_id,
+            'title': self._og_search_title(webpage).title(),
+            'formats': formats,
+            'release_date': release_date
+        }

From 73cf76a93fe48240bf82b1685b1403f05b793ebf Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com>
Date: Sun, 9 Jul 2017 19:05:18 +0700
Subject: [PATCH 15/22] [joj] Rewrite and add support for generic embeds
 (closes #13268)

---
 youtube_dl/extractor/generic.py |  17 +++++
 youtube_dl/extractor/joj.py     | 108 ++++++++++++++++++++++----------
 2 files changed, 93 insertions(+), 32 deletions(-)

diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py
index 919f4f987..f2c577f98 100644
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@@ -91,6 +91,7 @@ from .anvato import AnvatoIE
 from .washingtonpost import WashingtonPostIE
 from .wistia import WistiaIE
 from .mediaset import MediasetIE
+from .joj import JojIE
 
 
 class GenericIE(InfoExtractor):
@@ -1770,6 +1771,16 @@ class GenericIE(InfoExtractor):
             },
             'add_ie': [MediasetIE.ie_key()],
         },
+        {
+            # JOJ.sk embeds
+            'url': 'https://www.noviny.sk/slovensko/238543-slovenskom-sa-prehnala-vlna-silnych-burok',
+            'info_dict': {
+                'id': '238543-slovenskom-sa-prehnala-vlna-silnych-burok',
+                'title': 'Slovenskom sa prehnala vlna silných búrok',
+            },
+            'playlist_mincount': 5,
+            'add_ie': [JojIE.ie_key()],
+        },
         {
             # AMP embed (see https://www.ampproject.org/docs/reference/components/amp-video)
             'url': 'https://tvrain.ru/amp/418921/',
@@ -2722,6 +2733,12 @@ class GenericIE(InfoExtractor):
             return self.playlist_from_matches(
                 mediaset_urls, video_id, video_title, ie=MediasetIE.ie_key())
 
+        # Look for JOJ.sk embeds
+        joj_urls = JojIE._extract_urls(webpage)
+        if joj_urls:
+            return self.playlist_from_matches(
+                joj_urls, video_id, video_title, ie=JojIE.ie_key())
+
         def merge_dicts(dict1, dict2):
             merged = {}
             for k, v in dict1.items():
diff --git a/youtube_dl/extractor/joj.py b/youtube_dl/extractor/joj.py
index 2ebfec902..a764023e9 100755
--- a/youtube_dl/extractor/joj.py
+++ b/youtube_dl/extractor/joj.py
@@ -1,56 +1,100 @@
 # coding: utf-8
 from __future__ import unicode_literals
 
-from .common import InfoExtractor
 import re
 
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+    int_or_none,
+    js_to_json,
+    try_get,
+)
+
 
 class JojIE(InfoExtractor):
-    _VALID_URL = r'https?://[a-z0-9]+\.joj\.sk/([^/]+/)*(?P<title_query>(?P<release_date>[0-9]{4}(-[0-9]{2}){2}).*)' # noqa
+    _VALID_URL = r'''(?x)
+                    (?:
+                        joj:|
+                        https?://media\.joj\.sk/embed/
+                    )
+                    (?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})
+                '''
     _TESTS = [{
-        'url': 'https://www.joj.sk/nove-byvanie/archiv/2017-05-28-nove-byvanie', # noqa
+        'url': 'https://media.joj.sk/embed/a388ec4c-6019-4a4a-9312-b1bee194e932',
         'info_dict': {
             'id': 'a388ec4c-6019-4a4a-9312-b1bee194e932',
             'ext': 'mp4',
-            'title': 'Nové Bývanie',
-            'release_date': '20170528'
+            'title': 'NOVÉ BÝVANIE',
+            'thumbnail': r're:^https?://.*\.jpg$',
+            'duration': 3118,
         }
     }, {
-        'url': 'http://nasi.joj.sk/epizody/2016-09-06-stari-rodicia',
-        'info_dict': {
-            'id': 'f18b2c5f-9ea8-4941-a164-a814c53306ad',
-            'ext': 'mp4',
-            'title': 'Starí Rodičia',
-            'release_date': '20160906'
-        }
+        'url': 'joj:a388ec4c-6019-4a4a-9312-b1bee194e932',
+        'only_matching': True,
     }]
 
-    media_src_url = 'http://n16.joj.sk/storage/'
-    xml_source_url = 'https://media.joj.sk/services/Video.php?clip='
+    @staticmethod
+    def _extract_urls(webpage):
+        return re.findall(
+            r'<iframe\b[^>]+\bsrc=["\'](?P<url>(?:https?:)?//media\.joj\.sk/embed/[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})',
+            webpage)
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        release_date = mobj.group('release_date').replace('-', '')
-        webpage = self._download_webpage(url, 'id')
-        video_id = self._html_search_regex(
-            r'https?://([a-z0-9]+\.)joj\.sk/embed/(?P<video_id>[a-f0-9\-]+)',
-            webpage, 'id', group='video_id')
-        xml_playlist_url = self.xml_source_url + video_id
-        xml_playlist_et = self._download_xml(xml_playlist_url, 'XML playlist')
+        video_id = self._match_id(url)
+
+        webpage = self._download_webpage(
+            'https://media.joj.sk/embed/%s' % video_id, video_id)
+
+        title = self._search_regex(
+            (r'videoTitle\s*:\s*(["\'])(?P<title>(?:(?!\1).)+)\1',
+             r'<title>(?P<title>[^<]+)'), webpage, 'title',
+            default=None, group='title') or self._og_search_title(webpage)
+
+        bitrates = self._parse_json(
+            self._search_regex(
+                r'(?s)bitrates\s*=\s*({.+?});', webpage, 'bitrates',
+                default='{}'),
+            video_id, transform_source=js_to_json, fatal=False)
+
         formats = []
-        for file_el in xml_playlist_et.findall('files/file'):
-            try:
-                height = int(file_el.attrib['id'].replace('p', ''))
-            except ValueError:
-                height = 0
-            formats.append({'height': height,
-                            'url': self.media_src_url + file_el.attrib['path'].replace(  # noqa
-                                'dat/', '', 1)})
+        for format_url in try_get(bitrates, lambda x: x['mp4'], list) or []:
+            if isinstance(format_url, compat_str):
+                height = self._search_regex(
+                    r'(\d+)[pP]\.', format_url, 'height', default=None)
+                formats.append({
+                    'url': format_url,
+                    'format_id': '%sp' % height if height else None,
+                    'height': int(height),
+                })
+        if not formats:
+            playlist = self._download_xml(
+                'https://media.joj.sk/services/Video.php?clip=%s' % video_id,
+                video_id)
+            for file_el in playlist.findall('./files/file'):
+                path = file_el.get('path')
+                if not path:
+                    continue
+                format_id = file_el.get('id') or file_el.get('label')
+                formats.append({
+                    'url': 'http://n16.joj.sk/storage/%s' % path.replace(
+                        'dat/', '', 1),
+                    'format_id': format_id,
+                    'height': int_or_none(self._search_regex(
+                        r'(\d+)[pP]', format_id or path, 'height',
+                        default=None)),
+                })
         self._sort_formats(formats)
 
+        thumbnail = self._og_search_thumbnail(webpage)
+
+        duration = int_or_none(self._search_regex(
+            r'videoDuration\s*:\s*(\d+)', webpage, 'duration', fatal=False))
+
         return {
             'id': video_id,
-            'title': self._og_search_title(webpage).title(),
+            'title': title,
+            'thumbnail': thumbnail,
+            'duration': duration,
             'formats': formats,
-            'release_date': release_date
         }

From 6e925598d68f5d5216aa3e9abed5c7706a68c891 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com>
Date: Sun, 9 Jul 2017 19:15:48 +0700
Subject: [PATCH 16/22] [csjw] Add coding cookie

---
 youtube_dl/extractor/cjsw.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/youtube_dl/extractor/cjsw.py b/youtube_dl/extractor/cjsw.py
index dd271586f..505bdbe16 100644
--- a/youtube_dl/extractor/cjsw.py
+++ b/youtube_dl/extractor/cjsw.py
@@ -1,3 +1,4 @@
+# coding: utf-8
 from __future__ import unicode_literals
 
 import re

From 71a1db89198100a0e9bc5099aeed622264690203 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com>
Date: Sun, 9 Jul 2017 20:06:24 +0700
Subject: [PATCH 17/22] [dailymail] Add support for embeds

---
 youtube_dl/extractor/dailymail.py | 17 ++++++++++++++---
 youtube_dl/extractor/generic.py   | 21 +++++++++++++++++++++
 2 files changed, 35 insertions(+), 3 deletions(-)

diff --git a/youtube_dl/extractor/dailymail.py b/youtube_dl/extractor/dailymail.py
index 538565c66..af3978035 100644
--- a/youtube_dl/extractor/dailymail.py
+++ b/youtube_dl/extractor/dailymail.py
@@ -1,6 +1,8 @@
 # coding: utf-8
 from __future__ import unicode_literals
 
+import re
+
 from .common import InfoExtractor
 from ..compat import compat_str
 from ..utils import (
@@ -12,8 +14,8 @@ from ..utils import (
 
 
 class DailyMailIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?dailymail\.co\.uk/video/[^/]+/video-(?P<id>[0-9]+)'
-    _TEST = {
+    _VALID_URL = r'https?://(?:www\.)?dailymail\.co\.uk/(?:video/[^/]+/video-|embed/video/)(?P<id>[0-9]+)'
+    _TESTS = [{
         'url': 'http://www.dailymail.co.uk/video/tvshowbiz/video-1295863/The-Mountain-appears-sparkling-water-ad-Heavy-Bubbles.html',
         'md5': 'f6129624562251f628296c3a9ffde124',
         'info_dict': {
@@ -22,7 +24,16 @@ class DailyMailIE(InfoExtractor):
             'title': 'The Mountain appears in sparkling water ad for \'Heavy Bubbles\'',
             'description': 'md5:a93d74b6da172dd5dc4d973e0b766a84',
         }
-    }
+    }, {
+        'url': 'http://www.dailymail.co.uk/embed/video/1295863.html',
+        'only_matching': True,
+    }]
+
+    @staticmethod
+    def _extract_urls(webpage):
+        return re.findall(
+            r'<iframe\b[^>]+\bsrc=["\'](?P<url>(?:https?:)?//(?:www\.)?dailymail\.co\.uk/embed/video/\d+\.html)',
+            webpage)
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py
index f2c577f98..5e8890d41 100644
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@@ -57,6 +57,7 @@ from .dailymotion import (
     DailymotionIE,
     DailymotionCloudIE,
 )
+from .dailymail import DailyMailIE
 from .onionstudios import OnionStudiosIE
 from .viewlift import ViewLiftEmbedIE
 from .mtv import MTVServicesEmbeddedIE
@@ -760,6 +761,20 @@ class GenericIE(InfoExtractor):
             },
             'add_ie': ['Dailymotion'],
         },
+        # DailyMail embed
+        {
+            'url': 'http://www.bumm.sk/krimi/2017/07/05/biztonsagi-kamera-buktatta-le-az-agg-ferfit-utlegelo-apolot',
+            'info_dict': {
+                'id': '1495629',
+                'ext': 'mp4',
+                'title': 'Care worker punches elderly dementia patient in head 11 times',
+                'description': 'md5:3a743dee84e57e48ec68bf67113199a5',
+            },
+            'add_ie': ['DailyMail'],
+            'params': {
+                'skip_download': True,
+            },
+        },
         # YouTube embed
         {
             'url': 'http://www.badzine.de/ansicht/datum/2014/06/09/so-funktioniert-die-neue-englische-badminton-liga.html',
@@ -2190,6 +2205,12 @@ class GenericIE(InfoExtractor):
                 return self.playlist_from_matches(
                     playlists, video_id, video_title, lambda p: '//dailymotion.com/playlist/%s' % p)
 
+        # Look for DailyMail embeds
+        dailymail_urls = DailyMailIE._extract_urls(webpage)
+        if dailymail_urls:
+            return self.playlist_from_matches(
+                dailymail_urls, video_id, video_title, ie=DailyMailIE.ie_key())
+
         # Look for embedded Wistia player
         wistia_url = WistiaIE._extract_url(webpage)
         if wistia_url:

From 207acd8465b51d9d00d2bdda22f10858eb7f1bb5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com>
Date: Sun, 9 Jul 2017 20:15:15 +0700
Subject: [PATCH 18/22] [ChangeLog] Actualize

---
 ChangeLog | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/ChangeLog b/ChangeLog
index 5d07c12cb..edfde8b6f 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,7 +1,27 @@
 version <unreleased>
 
+Core
++ [extractor/common] Add support for AMP tags in _parse_html5_media_entries
++ [utils] Support attributes with no values in get_elements_by_attribute
+
 Extractors
++ [dailymail] Add support for embeds
++ [joj] Add support for joj.sk (#13268)
+* [abc.net.au:iview] Extract more formats (#13492, #13489)
+* [egghead:course] Fix extraction (#6635, #13370)
++ [cjsw] Add support for cjsw.com (#13525)
++ [eagleplatform] Add support for referrer protected videos (#13557)
++ [eagleplatform] Add support for another embed pattern (#13557)
+* [veoh] Extend URL regular expression (#13601)
+* [npo:live] Fix live stream id extraction (#13568, #13605)
+* [googledrive] Fix height extraction (#13603)
++ [dailymotion] Add support for new layout (#13580)
 - [yam] Remove extractor
+* [xhamster] Extract all formats and fix duration extraction (#13593)
++ [xhamster] Add support for new URL schema (#13593)
+* [espn] Extend URL regular expression (#13244, #13549)
+* [kaltura] Fix typo in subtitles extraction (#13569)
+* [vier] Adapt extraction to redesign (#13575)
 
 
 version 2017.07.02

From 65c416dda896f8a0023f01547e6b707dd57ed30a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com>
Date: Sun, 9 Jul 2017 20:16:38 +0700
Subject: [PATCH 19/22] release 2017.07.09

---
 .github/ISSUE_TEMPLATE.md | 6 +++---
 ChangeLog                 | 2 +-
 docs/supportedsites.md    | 3 ++-
 youtube_dl/version.py     | 2 +-
 4 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md
index 974603507..c4314855d 100644
--- a/.github/ISSUE_TEMPLATE.md
+++ b/.github/ISSUE_TEMPLATE.md
@@ -6,8 +6,8 @@
 
 ---
 
-### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.07.02*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
-- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.07.02**
+### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.07.09*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
+- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.07.09**
 
 ### Before submitting an *issue* make sure you have:
 - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
@@ -35,7 +35,7 @@ $ youtube-dl -v <your command line>
 [debug] User config: []
 [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
 [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
-[debug] youtube-dl version 2017.07.02
+[debug] youtube-dl version 2017.07.09
 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
 [debug] Proxy map: {}
diff --git a/ChangeLog b/ChangeLog
index edfde8b6f..c379cae71 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,4 +1,4 @@
-version <unreleased>
+version 2017.07.09
 
 Core
 + [extractor/common] Add support for AMP tags in _parse_html5_media_entries
diff --git a/docs/supportedsites.md b/docs/supportedsites.md
index db2e2bac9..b6a147faf 100644
--- a/docs/supportedsites.md
+++ b/docs/supportedsites.md
@@ -154,6 +154,7 @@
  - **chirbit**
  - **chirbit:profile**
  - **Cinchcast**
+ - **CJSW**
  - **Clipfish**
  - **cliphunter**
  - **ClipRs**
@@ -369,6 +370,7 @@
  - **Jamendo**
  - **JamendoAlbum**
  - **JeuxVideo**
+ - **Joj**
  - **Jove**
  - **jpopsuki.tv**
  - **JWPlatform**
@@ -996,7 +998,6 @@
  - **XVideos**
  - **XXXYMovies**
  - **Yahoo**: Yahoo screen and movies
- - **Yam**: 蕃薯藤yam天空部落
  - **yandexmusic:album**: Яндекс.Музыка - Альбом
  - **yandexmusic:playlist**: Яндекс.Музыка - Плейлист
  - **yandexmusic:track**: Яндекс.Музыка - Трек
diff --git a/youtube_dl/version.py b/youtube_dl/version.py
index 0db974f97..14358a74c 100644
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@@ -1,3 +1,3 @@
 from __future__ import unicode_literals
 
-__version__ = '2017.07.02'
+__version__ = '2017.07.09'

From 7bf539edcc3dc44481d5196fd01637698653ffc7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com>
Date: Mon, 10 Jul 2017 00:14:41 +0700
Subject: [PATCH 20/22] [eagleplatform] Fix test

---
 youtube_dl/extractor/eagleplatform.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/youtube_dl/extractor/eagleplatform.py b/youtube_dl/extractor/eagleplatform.py
index 34891a362..42789278e 100644
--- a/youtube_dl/extractor/eagleplatform.py
+++ b/youtube_dl/extractor/eagleplatform.py
@@ -53,7 +53,7 @@ class EaglePlatformIE(InfoExtractor):
         'skip': 'Georestricted',
     }, {
         # referrer protected video (https://tvrain.ru/lite/teleshow/kak_vse_nachinalos/namin-418921/)
-        'url': 'tvrainru.media.eagleplatform.com:582306',
+        'url': 'eagleplatform:tvrainru.media.eagleplatform.com:582306',
         'only_matching': True,
     }]
 

From b71c18b4343d54ce8373e9a11df882aca1ae82a0 Mon Sep 17 00:00:00 2001
From: coreynicholson <coreynicholson@users.noreply.github.com>
Date: Sun, 9 Jul 2017 22:24:04 +0100
Subject: [PATCH 21/22] [vlive:playlist] Add extractor

---
 youtube_dl/extractor/extractors.py |  3 +-
 youtube_dl/extractor/vlive.py      | 56 ++++++++++++++++++++++++++++++
 2 files changed, 58 insertions(+), 1 deletion(-)

diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py
index 9ee080895..eb1541729 100644
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@@ -1206,7 +1206,8 @@ from .vk import (
 )
 from .vlive import (
     VLiveIE,
-    VLiveChannelIE
+    VLiveChannelIE,
+    VLivePlaylistIE
 )
 from .vodlocker import VodlockerIE
 from .vodpl import VODPlIE
diff --git a/youtube_dl/extractor/vlive.py b/youtube_dl/extractor/vlive.py
index e58940607..f3825db5c 100644
--- a/youtube_dl/extractor/vlive.py
+++ b/youtube_dl/extractor/vlive.py
@@ -49,6 +49,10 @@ class VLiveIE(InfoExtractor):
         },
     }]
 
+    @classmethod
+    def suitable(cls, url):
+        return False if VLivePlaylistIE.suitable(url) else super(VLiveIE, cls).suitable(url)
+
     def _real_extract(self, url):
         video_id = self._match_id(url)
 
@@ -261,3 +265,55 @@ class VLiveChannelIE(InfoExtractor):
 
         return self.playlist_result(
             entries, channel_code, channel_name)
+
+
+class VLivePlaylistIE(InfoExtractor):
+    IE_NAME = 'vlive:playlist'
+    _VALID_URL = r'https?://(?:(?:www|m)\.)?vlive\.tv/video/(?P<video_id>[0-9]+)/playlist/(?P<id>[0-9]+)'
+    _TEST = {
+        'url': 'http://www.vlive.tv/video/22867/playlist/22912',
+        'info_dict': {
+            'id': '22912',
+            'title': 'Valentine Day Message from TWICE'
+        },
+        'playlist_mincount': 9
+    }
+
+    def _real_extract(self, url):
+        playlist_id = self._match_id(url)
+        video_id_match = re.match(self._VALID_URL, url)
+        assert video_id_match
+        video_id = compat_str(video_id_match.group('video_id'))
+
+        VIDEO_URL_TEMPLATE = 'http://www.vlive.tv/video/%s'
+        if self._downloader.params.get('noplaylist'):
+            self.to_screen(
+                'Downloading just video %s because of --no-playlist' % video_id)
+            return self.url_result(
+                VIDEO_URL_TEMPLATE % video_id,
+                ie=VLiveIE.ie_key(), video_id=video_id)
+
+        self.to_screen(
+            'Downloading playlist %s - add --no-playlist to just download video' % playlist_id)
+
+        webpage = self._download_webpage(
+            'http://www.vlive.tv/video/%s/playlist/%s' % (video_id, playlist_id), video_id)
+
+        playlist_name = self._html_search_regex(
+            r'<div[^>]+class="[^"]*multicam_playlist[^>]*>\s*<h3[^>]+>([^<]+)',
+            webpage, 'playlist name', fatal=False)
+
+        item_ids = self._search_regex(
+            r'\bvar\s+playlistVideoSeqs\s*=\s*(\[[^]]+\])',
+            webpage, 'playlist item ids')
+
+        entries = []
+        for item_id in self._parse_json(item_ids, playlist_id):
+            item_id = compat_str(item_id)
+            entries.append(
+                self.url_result(
+                    VIDEO_URL_TEMPLATE % item_id,
+                    ie=VLiveIE.ie_key(), video_id=item_id))
+
+        return self.playlist_result(
+            entries, playlist_id, playlist_name)

From e3cd1fcdd177613acae4198cafbff51fbbb912c2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com>
Date: Mon, 10 Jul 2017 04:32:24 +0700
Subject: [PATCH 22/22] [vlive:playlist] Relax and simplify

---
 youtube_dl/extractor/vlive.py | 41 +++++++++++++++++------------------
 1 file changed, 20 insertions(+), 21 deletions(-)

diff --git a/youtube_dl/extractor/vlive.py b/youtube_dl/extractor/vlive.py
index f3825db5c..77c120a57 100644
--- a/youtube_dl/extractor/vlive.py
+++ b/youtube_dl/extractor/vlive.py
@@ -280,10 +280,8 @@ class VLivePlaylistIE(InfoExtractor):
     }
 
     def _real_extract(self, url):
-        playlist_id = self._match_id(url)
-        video_id_match = re.match(self._VALID_URL, url)
-        assert video_id_match
-        video_id = compat_str(video_id_match.group('video_id'))
+        mobj = re.match(self._VALID_URL, url)
+        video_id, playlist_id = mobj.group('video_id', 'id')
 
         VIDEO_URL_TEMPLATE = 'http://www.vlive.tv/video/%s'
         if self._downloader.params.get('noplaylist'):
@@ -294,26 +292,27 @@ class VLivePlaylistIE(InfoExtractor):
                 ie=VLiveIE.ie_key(), video_id=video_id)
 
         self.to_screen(
-            'Downloading playlist %s - add --no-playlist to just download video' % playlist_id)
+            'Downloading playlist %s - add --no-playlist to just download video'
+            % playlist_id)
 
         webpage = self._download_webpage(
-            'http://www.vlive.tv/video/%s/playlist/%s' % (video_id, playlist_id), video_id)
+            'http://www.vlive.tv/video/%s/playlist/%s'
+            % (video_id, playlist_id), playlist_id)
+
+        item_ids = self._parse_json(
+            self._search_regex(
+                r'playlistVideoSeqs\s*=\s*(\[[^]]+\])', webpage,
+                'playlist video seqs'),
+            playlist_id)
+
+        entries = [
+            self.url_result(
+                VIDEO_URL_TEMPLATE % item_id, ie=VLiveIE.ie_key(),
+                video_id=compat_str(item_id))
+            for item_id in item_ids]
 
         playlist_name = self._html_search_regex(
             r'<div[^>]+class="[^"]*multicam_playlist[^>]*>\s*<h3[^>]+>([^<]+)',
-            webpage, 'playlist name', fatal=False)
+            webpage, 'playlist title', fatal=False)
 
-        item_ids = self._search_regex(
-            r'\bvar\s+playlistVideoSeqs\s*=\s*(\[[^]]+\])',
-            webpage, 'playlist item ids')
-
-        entries = []
-        for item_id in self._parse_json(item_ids, playlist_id):
-            item_id = compat_str(item_id)
-            entries.append(
-                self.url_result(
-                    VIDEO_URL_TEMPLATE % item_id,
-                    ie=VLiveIE.ie_key(), video_id=item_id))
-
-        return self.playlist_result(
-            entries, playlist_id, playlist_name)
+        return self.playlist_result(entries, playlist_id, playlist_name)