From f57f84f606b246db4f102fc5bc55e64e4f7a3d60 Mon Sep 17 00:00:00 2001
From: fnord <fnord@fnord.mobi>
Date: Tue, 21 Jul 2015 16:38:40 -0500
Subject: [PATCH 01/83] Twitter: get and describe video from status urls

---
 youtube_dl/extractor/twitter.py | 44 +++++++++++++++++++++++++++++++++
 1 file changed, 44 insertions(+)
diff --git a/youtube_dl/extractor/twitter.py b/youtube_dl/extractor/twitter.py
index 1aaa06305..a65252cc6 100644
--- a/youtube_dl/extractor/twitter.py
+++ b/youtube_dl/extractor/twitter.py
@@ -70,3 +70,47 @@ class TwitterCardIE(InfoExtractor):
             'duration': duration,
             'formats': formats,
         }
+
+
+class TwitterIE(TwitterCardIE):
+    _VALID_URL = r'https?://(?:www|m|mobile)?\.?twitter\.com/(?P<id>[^/]+/status/\d+)'
+
+    _TESTS = [{
+        'url': 'https://m.twitter.com/thereaIbanksy/status/614301758345490432',
+        'md5': '8bbccb487bd7a31349b775915fcd412f',
+        'info_dict': {
+            'id': '614301758345490432',
+            'ext': 'mp4',
+            'title': 'thereaIbanksy - This time lapse is so pretty \U0001f60d\U0001f60d',
+            'thumbnail': 're:^https?://.*\.jpg',
+            'duration': 29.5,
+            'description': 'banksy on Twitter: "This time lapse is so pretty \U0001f60d\U0001f60d http://t.co/QB8DDbqiR1"',
+            'uploader': 'banksy',
+            'uploader_id': 'thereaIbanksy',
+        },
+    }]
+
+    def _real_extract(self, url):
+        id = self._match_id(url)
+        username, twid = re.match(r'([^/]+)/status/(\d+)', id).groups()
+        name = username
+        url = re.sub(r'https?://(m|mobile)\.', 'https://', url)
+        webpage = self._download_webpage(url, 'tweet: ' + url)
+        description = unescapeHTML(self._search_regex('<title>\s*(.+?)\s*</title>', webpage, 'title'))
+        title = description.replace('\n', ' ')
+        splitdesc = re.match(r'^(.+?)\s*on Twitter:\s* "(.+?)"$', title)
+        if splitdesc:
+            name, title = splitdesc.groups()
+        title = re.sub(r'\s*https?://[^ ]+', '', title)  # strip  'https -_t.co_BJYgOjSeGA' junk from filenames
+        card_id = self._search_regex(r'["\']/i/cards/tfw/v1/(\d+)', webpage, '/i/card/...')
+        card_url = 'https://twitter.com/i/cards/tfw/v1/' + card_id
+        return {
+            '_type': 'url_transparent',
+            'ie_key': 'TwitterCard',
+            'uploader_id': username,
+            'uploader': name,
+            'url': card_url,
+            'webpage_url': url,
+            'description': description,
+            'title': username + ' - ' + title,
+        }

From c3dea3f878133f3cbdad9e548609d3077572af66 Mon Sep 17 00:00:00 2001
From: fnord <fnord@fnord.mobi>
Date: Tue, 21 Jul 2015 16:45:36 -0500
Subject: [PATCH 02/83] Twittercard: support vmapurl method

---
 youtube_dl/extractor/twitter.py | 47 ++++++++++++++++++++++++++-------
 1 file changed, 37 insertions(+), 10 deletions(-)

diff --git a/youtube_dl/extractor/twitter.py b/youtube_dl/extractor/twitter.py
index a65252cc6..1dd43ff3c 100644
--- a/youtube_dl/extractor/twitter.py
+++ b/youtube_dl/extractor/twitter.py
@@ -12,17 +12,30 @@ from ..utils import (
 
 class TwitterCardIE(InfoExtractor):
     _VALID_URL = r'https?://(?:www\.)?twitter\.com/i/cards/tfw/v1/(?P<id>\d+)'
-    _TEST = {
-        'url': 'https://twitter.com/i/cards/tfw/v1/560070183650213889',
-        'md5': 'a74f50b310c83170319ba16de6955192',
-        'info_dict': {
-            'id': '560070183650213889',
-            'ext': 'mp4',
-            'title': 'TwitterCard',
-            'thumbnail': 're:^https?://.*\.jpg$',
-            'duration': 30.033,
+    _TESTS = [
+        {
+            'url': 'https://twitter.com/i/cards/tfw/v1/560070183650213889',
+            'md5': 'a74f50b310c83170319ba16de6955192',
+            'info_dict': {
+                'id': '560070183650213889',
+                'ext': 'mp4',
+                'title': 'TwitterCard',
+                'thumbnail': 're:^https?://.*\.jpg$',
+                'duration': 30.033,
+            }
         },
-    }
+        {
+            'url': 'https://twitter.com/i/cards/tfw/v1/623160978427936768',
+            'md5': '7ee2a553b63d1bccba97fbed97d9e1c8',
+            'info_dict': {
+                'id': '623160978427936768',
+                'ext': 'mp4',
+                'title': 'TwitterCard',
+                'thumbnail': 're:^https?://.*\.jpg',
+                'duration': 80.155,
+            },
+        }
+    ]
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
@@ -44,6 +57,20 @@ class TwitterCardIE(InfoExtractor):
                 unescapeHTML(self._search_regex(
                     r'data-player-config="([^"]+)"', webpage, 'data player config')),
                 video_id)
+            if 'playlist' not in config:
+                if 'vmapUrl' in config:
+                    webpage = self._download_webpage(config['vmapUrl'], video_id + ' (xml)')
+                    video_url = self._search_regex(
+                        r'<MediaFile>\s*<!\[CDATA\[(https?://.+?)\]\]>', webpage, 'data player config (xml)')
+                    f = {
+                        'url': video_url,
+                    }
+                    ext = re.search(r'\.([a-z0-9]{2,4})(\?.+)?$', video_url)
+                    if ext:
+                        f['ext'] = ext.group(1)
+                    formats.append(f)
+                    break   # same video regardless of UA
+                continue
 
             video_url = config['playlist'][0]['source']
 

From 9e7e0dffd5e3e3c959e8d99a5e236b9099886fe9 Mon Sep 17 00:00:00 2001
From: fnord <fnord@fnord.mobi>
Date: Tue, 21 Jul 2015 16:56:35 -0500
Subject: [PATCH 03/83] Actually add the extractor

---
 youtube_dl/extractor/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py
index 50da08830..5c03bf8e8 100644
--- a/youtube_dl/extractor/__init__.py
+++ b/youtube_dl/extractor/__init__.py
@@ -651,7 +651,7 @@ from .twitch import (
     TwitchBookmarksIE,
     TwitchStreamIE,
 )
-from .twitter import TwitterCardIE
+from .twitter import TwitterCardIE, TwitterIE
 from .ubu import UbuIE
 from .udemy import (
     UdemyIE,

From ee2edd838a1e8770488e695c380943ded44d0983 Mon Sep 17 00:00:00 2001
From: Philipp Hagemeister <phihag@phihag.de>
Date: Tue, 13 Oct 2015 00:53:05 +0200
Subject: [PATCH 04/83] release 2015.10.13

---
 youtube_dl/version.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/youtube_dl/version.py b/youtube_dl/version.py
index 0908e963d..aaa43d315 100644
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@@ -1,3 +1,3 @@
 from __future__ import unicode_literals
 
-__version__ = '2015.10.12'
+__version__ = '2015.10.13'

From 5946cda7c6f2e4a7eb90fff6f10c66af0ff2a0d7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com>
Date: Tue, 13 Oct 2015 21:04:39 +0600
Subject: [PATCH 05/83] [beeg] Fix extraction (Closes #7155)

---
 youtube_dl/extractor/beeg.py | 68 +++++++++++++++++++-----------------
 1 file changed, 35 insertions(+), 33 deletions(-)

diff --git a/youtube_dl/extractor/beeg.py b/youtube_dl/extractor/beeg.py
index b38057f2f..e6c928699 100644
--- a/youtube_dl/extractor/beeg.py
+++ b/youtube_dl/extractor/beeg.py
@@ -1,65 +1,67 @@
 from __future__ import unicode_literals
 
-import re
-
 from .common import InfoExtractor
+from ..utils import (
+    int_or_none,
+    parse_iso8601,
+)
 
 
 class BeegIE(InfoExtractor):
     _VALID_URL = r'https?://(?:www\.)?beeg\.com/(?P<id>\d+)'
     _TEST = {
         'url': 'http://beeg.com/5416503',
-        'md5': '1bff67111adb785c51d1b42959ec10e5',
+        'md5': '46c384def73b33dbc581262e5ee67cef',
         'info_dict': {
             'id': '5416503',
             'ext': 'mp4',
             'title': 'Sultry Striptease',
-            'description': 'md5:6db3c6177972822aaba18652ff59c773',
-            'categories': list,  # NSFW
-            'thumbnail': 're:https?://.*\.jpg$',
+            'description': 'md5:d22219c09da287c14bed3d6c37ce4bc2',
+            'timestamp': 1391813355,
+            'upload_date': '20140207',
+            'duration': 383,
+            'tags': list,
             'age_limit': 18,
         }
     }
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
+        video_id = self._match_id(url)
 
-        webpage = self._download_webpage(url, video_id)
-
-        quality_arr = self._search_regex(
-            r'(?s)var\s+qualityArr\s*=\s*{\s*(.+?)\s*}', webpage, 'quality formats')
-
-        formats = [{
-            'url': fmt[1],
-            'format_id': fmt[0],
-            'height': int(fmt[0][:-1]),
-        } for fmt in re.findall(r"'([^']+)'\s*:\s*'([^']+)'", quality_arr)]
+        video = self._download_json(
+            'http://beeg.com/api/v1/video/%s' % video_id, video_id)
 
+        formats = []
+        for format_id, video_url in video.items():
+            height = self._search_regex(
+                r'^(\d+)[pP]$', format_id, 'height', default=None)
+            if not height:
+                continue
+            formats.append({
+                'url': self._proto_relative_url(video_url.replace('{DATA_MARKERS}', ''), 'http:'),
+                'format_id': format_id,
+                'height': int(height),
+            })
         self._sort_formats(formats)
 
-        title = self._html_search_regex(
-            r'<title>([^<]+)\s*-\s*beeg\.?</title>', webpage, 'title')
+        title = video['title']
+        video_id = video.get('id') or video_id
+        display_id = video.get('code')
+        description = video.get('desc')
 
-        description = self._html_search_regex(
-            r'<meta name="description" content="([^"]*)"',
-            webpage, 'description', fatal=False)
-        thumbnail = self._html_search_regex(
-            r'\'previewer.url\'\s*:\s*"([^"]*)"',
-            webpage, 'thumbnail', fatal=False)
+        timestamp = parse_iso8601(video.get('date'), ' ')
+        duration = int_or_none(video.get('duration'))
 
-        categories_str = self._html_search_regex(
-            r'<meta name="keywords" content="([^"]+)"', webpage, 'categories', fatal=False)
-        categories = (
-            None if categories_str is None
-            else categories_str.split(','))
+        tags = [tag.strip() for tag in video['tags'].split(',')] if video.get('tags') else None
 
         return {
             'id': video_id,
+            'display_id': display_id,
             'title': title,
             'description': description,
-            'thumbnail': thumbnail,
-            'categories': categories,
+            'timestamp': timestamp,
+            'duration': duration,
+            'tags': tags,
             'formats': formats,
             'age_limit': 18,
         }

From 3eeff489e80838cd2d1b3f55fc96db747386fd50 Mon Sep 17 00:00:00 2001
From: DesweR <egomail@inbox.ru>
Date: Tue, 13 Oct 2015 16:29:16 +0700
Subject: [PATCH 06/83] Extract thumbnail url

---
 youtube_dl/extractor/yandexmusic.py | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/youtube_dl/extractor/yandexmusic.py b/youtube_dl/extractor/yandexmusic.py
index 4098e4629..6842f834f 100644
--- a/youtube_dl/extractor/yandexmusic.py
+++ b/youtube_dl/extractor/yandexmusic.py
@@ -46,6 +46,14 @@ class YandexMusicTrackIE(InfoExtractor):
                 % (data['host'], key, data['ts'] + data['path'], storage[1]))
 
     def _get_track_info(self, track):
+        album = track['albums'][0]
+        a_thumb = None
+
+        if 'coverUri' in album:
+            a_thumb = album['coverUri']
+            if a_thumb:
+                a_thumb = 'http://' + a_thumb.replace('%%', '1000x1000')
+
         return {
             'id': track['id'],
             'ext': 'mp3',
@@ -53,6 +61,7 @@ class YandexMusicTrackIE(InfoExtractor):
             'title': '%s - %s' % (track['artists'][0]['name'], track['title']),
             'filesize': int_or_none(track.get('fileSize')),
             'duration': float_or_none(track.get('durationMs'), 1000),
+            'thumbnail': a_thumb,
         }
 
     def _real_extract(self, url):

From b30c4992a93d411f4f89faf2af153fc580138a90 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?=
 <jaime.marquinez.ferrandiz@gmail.com>
Date: Tue, 13 Oct 2015 21:14:33 +0200
Subject: [PATCH 07/83] [channel9] Return a single dictionary for single videos
 (closes #7086)

Returning a list is deprecated.
---
 youtube_dl/extractor/channel9.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/youtube_dl/extractor/channel9.py b/youtube_dl/extractor/channel9.py
index 3dfc24f5b..79fd0a30e 100644
--- a/youtube_dl/extractor/channel9.py
+++ b/youtube_dl/extractor/channel9.py
@@ -224,12 +224,12 @@ class Channel9IE(InfoExtractor):
         if contents is None:
             return contents
 
-        authors = self._extract_authors(html)
+        if len(contents) > 1:
+            raise ExtractorError('Got more than one entry')
+        result = contents[0]
+        result['authors'] = self._extract_authors(html)
 
-        for content in contents:
-            content['authors'] = authors
-
-        return contents
+        return result
 
     def _extract_session(self, html, content_path):
         contents = self._extract_content(html, content_path)

From 506e261d2073d8c00d5b43d272e8173cb0d63728 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?=
 <jaime.marquinez.ferrandiz@gmail.com>
Date: Tue, 13 Oct 2015 21:18:30 +0200
Subject: [PATCH 08/83] [channel9] strip 'session_day'

---
 youtube_dl/extractor/channel9.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/youtube_dl/extractor/channel9.py b/youtube_dl/extractor/channel9.py
index 79fd0a30e..1ce004932 100644
--- a/youtube_dl/extractor/channel9.py
+++ b/youtube_dl/extractor/channel9.py
@@ -158,7 +158,7 @@ class Channel9IE(InfoExtractor):
 
     def _extract_session_day(self, html):
         m = re.search(r'<li class="day">\s*<a href="/Events/[^"]+">(?P<day>[^<]+)</a>\s*</li>', html)
-        return m.group('day') if m is not None else None
+        return m.group('day').strip() if m is not None else None
 
     def _extract_session_room(self, html):
         m = re.search(r'<li class="room">\s*(?P<room>.+?)\s*</li>', html)

From 3dc582e5ea69af4ad7f51d30c1d87cf93aa6b72b Mon Sep 17 00:00:00 2001
From: kaspi <je326@hotmail.com>
Date: Mon, 12 Oct 2015 01:25:57 -0400
Subject: [PATCH 09/83] [fczenit] Add extractor

Closes #7143.
---
 youtube_dl/extractor/__init__.py |  1 +
 youtube_dl/extractor/fczenit.py  | 41 ++++++++++++++++++++++++++++++++
 2 files changed, 42 insertions(+)
 create mode 100644 youtube_dl/extractor/fczenit.py

diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py
index 75720843c..f6d185818 100644
--- a/youtube_dl/extractor/__init__.py
+++ b/youtube_dl/extractor/__init__.py
@@ -167,6 +167,7 @@ from .extremetube import ExtremeTubeIE
 from .facebook import FacebookIE
 from .faz import FazIE
 from .fc2 import FC2IE
+from .fczenit import FczenitIE
 from .firstpost import FirstpostIE
 from .firsttv import FirstTVIE
 from .fivemin import FiveMinIE
diff --git a/youtube_dl/extractor/fczenit.py b/youtube_dl/extractor/fczenit.py
new file mode 100644
index 000000000..f1f150ef2
--- /dev/null
+++ b/youtube_dl/extractor/fczenit.py
@@ -0,0 +1,41 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+
+
+class FczenitIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?fc-zenit\.ru/video/gl(?P<id>[0-9]+)'
+    _TEST = {
+        'url': 'http://fc-zenit.ru/video/gl6785/',
+        'md5': '458bacc24549173fe5a5aa29174a5606',
+        'info_dict': {
+            'id': '6785',
+            'ext': 'mp4',
+            'title': '«Зенит-ТВ»: как Олег Шатов играл против «Урала»',
+        },
+    }
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        webpage = self._download_webpage(url, video_id)
+
+        video_title = self._html_search_regex(r'<div class=\"photoalbum__title\">([^<]+)', webpage, 'title')
+
+        bitrates_raw = self._html_search_regex(r'bitrates:.*\n(.*)\]', webpage, 'video URL')
+        bitrates = re.findall(r'url:.?\'(.+?)\'.*?bitrate:.?([0-9]{3}?)', bitrates_raw)
+
+        formats = [{
+            'url': furl,
+            'tbr': tbr,
+        } for furl, tbr in bitrates]
+
+        self._sort_formats(formats)
+
+        return {
+            'id': video_id,
+            'title': video_title,
+            'formats': formats,
+        }

From 26669ea3cf596f2ea4bce9e21ce73c1d8fc3ff72 Mon Sep 17 00:00:00 2001
From: remitamine <remitamine@gmail.com>
Date: Sat, 10 Oct 2015 16:51:37 +0100
Subject: [PATCH 10/83] [5min] extract more video info and formats

Closes #7124.
---
 youtube_dl/extractor/fivemin.py | 84 ++++++++++++++++++++++++++++-----
 1 file changed, 71 insertions(+), 13 deletions(-)

diff --git a/youtube_dl/extractor/fivemin.py b/youtube_dl/extractor/fivemin.py
index 157094e8c..2955965d9 100644
--- a/youtube_dl/extractor/fivemin.py
+++ b/youtube_dl/extractor/fivemin.py
@@ -2,11 +2,15 @@ from __future__ import unicode_literals
 
 from .common import InfoExtractor
 from ..compat import (
-    compat_str,
     compat_urllib_parse,
+    compat_parse_qs,
+    compat_urllib_parse_urlparse,
+    compat_urlparse,
 )
 from ..utils import (
     ExtractorError,
+    parse_duration,
+    replace_extension,
 )
 
 
@@ -28,6 +32,7 @@ class FiveMinIE(InfoExtractor):
                 'id': '518013791',
                 'ext': 'mp4',
                 'title': 'iPad Mini with Retina Display Review',
+                'duration': 177,
             },
         },
         {
@@ -38,9 +43,52 @@ class FiveMinIE(InfoExtractor):
                 'id': '518086247',
                 'ext': 'mp4',
                 'title': 'How to Make a Next-Level Fruit Salad',
+                'duration': 184,
             },
         },
     ]
+    _ERRORS = {
+        'ErrorVideoNotExist': 'We\'re sorry, but the video you are trying to watch does not exist.',
+        'ErrorVideoNoLongerAvailable': 'We\'re sorry, but the video you are trying to watch is no longer available.',
+        'ErrorVideoRejected': 'We\'re sorry, but the video you are trying to watch has been removed.',
+        'ErrorVideoUserNotGeo': 'We\'re sorry, but the video you are trying to watch cannot be viewed from your current location.',
+        'ErrorVideoLibraryRestriction': 'We\'re sorry, but the video you are trying to watch is currently unavailable for viewing at this domain.',
+        'ErrorExposurePermission': 'We\'re sorry, but the video you are trying to watch is currently unavailable for viewing at this domain.',
+    }
+    _QUALITIES = {
+        1: {
+            'width': 640,
+            'height': 360,
+        },
+        2: {
+            'width': 854,
+            'height': 480,
+        },
+        4: {
+            'width': 1280,
+            'height': 720,
+        },
+        8: {
+            'width': 1920,
+            'height': 1080,
+        },
+        16: {
+            'width': 640,
+            'height': 360,
+        },
+        32: {
+            'width': 854,
+            'height': 480,
+        },
+        64: {
+            'width': 1280,
+            'height': 720,
+        },
+        128: {
+            'width': 640,
+            'height': 360,
+        },
+    }
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
@@ -59,26 +107,36 @@ class FiveMinIE(InfoExtractor):
             'https://syn.5min.com/handlers/SenseHandler.ashx?' + query,
             video_id)
         if not response['success']:
-            err_msg = response['errorMessage']
-            if err_msg == 'ErrorVideoUserNotGeo':
-                msg = 'Video not available from your location'
-            else:
-                msg = 'Aol said: %s' % err_msg
-            raise ExtractorError(msg, expected=True, video_id=video_id)
+            raise ExtractorError(
+                '%s said: %s' % (
+                    self.IE_NAME,
+                    self._ERRORS.get(response['errorMessage'], response['errorMessage'])),
+                expected=True)
         info = response['binding'][0]
 
-        second_id = compat_str(int(video_id[:-2]) + 1)
         formats = []
-        for quality, height in [(1, 320), (2, 480), (4, 720), (8, 1080)]:
-            if any(r['ID'] == quality for r in info['Renditions']):
+        parsed_video_url = compat_urllib_parse_urlparse(compat_parse_qs(
+            compat_urllib_parse_urlparse(info['EmbededURL']).query)['videoUrl'][0])
+        for rendition in info['Renditions']:
+            if rendition['RenditionType'] == 'm3u8':
+                formats.extend(self._extract_m3u8_formats(rendition['Url'], video_id, m3u8_id='hls'))
+            elif rendition['RenditionType'] == 'aac':
+                continue
+            else:
+                rendition_url = compat_urlparse.urlunparse(parsed_video_url._replace(path=replace_extension(parsed_video_url.path.replace('//', '/%s/' % rendition['ID']), rendition['RenditionType'])))
+                quality = self._QUALITIES.get(rendition['ID'], {})
                 formats.append({
-                    'format_id': compat_str(quality),
-                    'url': 'http://avideos.5min.com/%s/%s/%s_%s.mp4' % (second_id[-3:], second_id, video_id, quality),
-                    'height': height,
+                    'format_id': '%s-%d' % (rendition['RenditionType'], rendition['ID']),
+                    'url': rendition_url,
+                    'width': quality.get('width'),
+                    'height': quality.get('height'),
                 })
+        self._sort_formats(formats)
 
         return {
             'id': video_id,
             'title': info['Title'],
+            'thumbnail': info.get('ThumbURL'),
+            'duration': parse_duration(info.get('Duration')),
             'formats': formats,
         }

From 1f36085df94c2addd1175e7e299f6235aca3ac68 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?=
 <jaime.marquinez.ferrandiz@gmail.com>
Date: Wed, 14 Oct 2015 13:41:39 +0200
Subject: [PATCH 11/83] [vimeo] Fix extraction of password protected videos
 (fixes #7169)

---
 youtube_dl/extractor/vimeo.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py
index 027f47ee3..fa1b22049 100644
--- a/youtube_dl/extractor/vimeo.py
+++ b/youtube_dl/extractor/vimeo.py
@@ -212,7 +212,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
             url = url.replace('http://', 'https://')
         password_request = compat_urllib_request.Request(url + '/password', data)
         password_request.add_header('Content-Type', 'application/x-www-form-urlencoded')
-        password_request.add_header('Cookie', 'clip_v=1; vuid=%s' % vuid)
+        password_request.add_header('Cookie', 'clip_test2=1; vuid=%s' % vuid)
         password_request.add_header('Referer', url)
         return self._download_webpage(
             password_request, video_id,

From 36bb63fad19df5ee419979f875e2265936511644 Mon Sep 17 00:00:00 2001
From: remitamine <remitamine@gmail.com>
Date: Wed, 14 Oct 2015 14:13:53 +0100
Subject: [PATCH 12/83] [criterion] fix description extraction

---
 youtube_dl/extractor/criterion.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/youtube_dl/extractor/criterion.py b/youtube_dl/extractor/criterion.py
index 4fb178165..dedb810a0 100644
--- a/youtube_dl/extractor/criterion.py
+++ b/youtube_dl/extractor/criterion.py
@@ -27,9 +27,7 @@ class CriterionIE(InfoExtractor):
         final_url = self._search_regex(
             r'so.addVariable\("videoURL", "(.+?)"\)\;', webpage, 'video url')
         title = self._og_search_title(webpage)
-        description = self._html_search_regex(
-            r'<meta name="description" content="(.+?)" />',
-            webpage, 'video description')
+        description = self._html_search_meta('description', webpage)
         thumbnail = self._search_regex(
             r'so.addVariable\("thumbnailURL", "(.+?)"\)\;',
             webpage, 'thumbnail url')

From 7a6d76a64d8a89a08bb79791506fc18b993c4580 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com>
Date: Wed, 14 Oct 2015 20:49:39 +0600
Subject: [PATCH 13/83] [extractor/common] Require closing quote in _og_regexes
 (Closes #7174)

E.g. do not match `property='og:video:type'` when `og:video` is requested.
---
 youtube_dl/extractor/common.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index 0082a4c84..a0c4af92f 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -646,7 +646,8 @@ class InfoExtractor(object):
     @staticmethod
     def _og_regexes(prop):
         content_re = r'content=(?:"([^>]+?)"|\'([^>]+?)\'|\s*([^\s"\'=<>`]+?))'
-        property_re = r'(?:name|property)=[\'"]?og:%s[\'"]?' % re.escape(prop)
+        property_re = (r'(?:name|property)=(?:\'og:%(prop)s\'|"og:%(prop)s"|\s*og:%(prop)s\b)'
+                       % {'prop': re.escape(prop)})
         template = r'<meta[^>]+?%s[^>]+?%s'
         return [
             template % (property_re, content_re),

From 1c29e81e620241b9013b23e7acd9d6ab06587fb1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com>
Date: Wed, 14 Oct 2015 20:58:52 +0600
Subject: [PATCH 14/83] [test_InfoExtractor] Add test for
 7a6d76a64d8a89a08bb79791506fc18b993c4580

---
 test/test_InfoExtractor.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/test/test_InfoExtractor.py b/test/test_InfoExtractor.py
index be8d12997..4ce5b5a35 100644
--- a/test/test_InfoExtractor.py
+++ b/test/test_InfoExtractor.py
@@ -35,10 +35,12 @@ class TestInfoExtractor(unittest.TestCase):
             <meta name="og:title" content='Foo'/>
             <meta content="Some video's description " name="og:description"/>
             <meta property='og:image' content='http://domain.com/pic.jpg?key1=val1&amp;key2=val2'/>
+            <meta content='application/x-shockwave-flash' property='og:video:type'>
             '''
         self.assertEqual(ie._og_search_title(html), 'Foo')
         self.assertEqual(ie._og_search_description(html), 'Some video\'s description ')
         self.assertEqual(ie._og_search_thumbnail(html), 'http://domain.com/pic.jpg?key1=val1&key2=val2')
+        self.assertEqual(ie._og_search_video_url(html, default=None), None)
 
     def test_html_search_meta(self):
         ie = self.ie

From db0a8ad97993cb3f0c398d3a5dc55389565e0ffd Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com>
Date: Wed, 14 Oct 2015 21:11:06 +0600
Subject: [PATCH 15/83] [test_InfoExtractor] Add test for unquoted attribute

---
 test/test_InfoExtractor.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/test/test_InfoExtractor.py b/test/test_InfoExtractor.py
index 4ce5b5a35..2a00d09a5 100644
--- a/test/test_InfoExtractor.py
+++ b/test/test_InfoExtractor.py
@@ -36,11 +36,13 @@ class TestInfoExtractor(unittest.TestCase):
             <meta content="Some video's description " name="og:description"/>
             <meta property='og:image' content='http://domain.com/pic.jpg?key1=val1&amp;key2=val2'/>
             <meta content='application/x-shockwave-flash' property='og:video:type'>
+            <meta content='Foo' property=og:foobar>
             '''
         self.assertEqual(ie._og_search_title(html), 'Foo')
         self.assertEqual(ie._og_search_description(html), 'Some video\'s description ')
         self.assertEqual(ie._og_search_thumbnail(html), 'http://domain.com/pic.jpg?key1=val1&key2=val2')
         self.assertEqual(ie._og_search_video_url(html, default=None), None)
+        self.assertEqual(ie._og_search_property('foobar', html), 'Foo')
 
     def test_html_search_meta(self):
         ie = self.ie

From ab953c64a0e8b8558e95d0318110c0885a4eec3d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com>
Date: Wed, 14 Oct 2015 21:15:29 +0600
Subject: [PATCH 16/83] [yandexmusic:track] Extract original size thumbnail
 (Closes #7160)

---
 youtube_dl/extractor/yandexmusic.py | 16 +++++++---------
 1 file changed, 7 insertions(+), 9 deletions(-)

diff --git a/youtube_dl/extractor/yandexmusic.py b/youtube_dl/extractor/yandexmusic.py
index 6842f834f..08dc81f3a 100644
--- a/youtube_dl/extractor/yandexmusic.py
+++ b/youtube_dl/extractor/yandexmusic.py
@@ -46,14 +46,12 @@ class YandexMusicTrackIE(InfoExtractor):
                 % (data['host'], key, data['ts'] + data['path'], storage[1]))
 
     def _get_track_info(self, track):
-        album = track['albums'][0]
-        a_thumb = None
-
-        if 'coverUri' in album:
-            a_thumb = album['coverUri']
-            if a_thumb:
-                a_thumb = 'http://' + a_thumb.replace('%%', '1000x1000')
-
+        thumbnail = None
+        cover_uri = track.get('albums', [{}])[0].get('coverUri')
+        if cover_uri:
+            thumbnail = cover_uri.replace('%%', 'orig')
+            if not thumbnail.startswith('http'):
+                thumbnail = 'http://' + thumbnail
         return {
             'id': track['id'],
             'ext': 'mp3',
@@ -61,7 +59,7 @@ class YandexMusicTrackIE(InfoExtractor):
             'title': '%s - %s' % (track['artists'][0]['name'], track['title']),
             'filesize': int_or_none(track.get('fileSize')),
             'duration': float_or_none(track.get('durationMs'), 1000),
-            'thumbnail': a_thumb,
+            'thumbnail': thumbnail,
         }
 
     def _real_extract(self, url):

From 9fb66c780cee8668b1bb07f70e70ae1161e13320 Mon Sep 17 00:00:00 2001
From: remitamine <remitamine@gmail.com>
Date: Wed, 14 Oct 2015 17:25:07 +0100
Subject: [PATCH 17/83] [megavideoz] remove extractor

---
 youtube_dl/extractor/__init__.py   |  1 -
 youtube_dl/extractor/megavideoz.py | 56 ------------------------------
 2 files changed, 57 deletions(-)
 delete mode 100644 youtube_dl/extractor/megavideoz.py

diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py
index f6d185818..462717b1e 100644
--- a/youtube_dl/extractor/__init__.py
+++ b/youtube_dl/extractor/__init__.py
@@ -319,7 +319,6 @@ from .macgamestore import MacGameStoreIE
 from .mailru import MailRuIE
 from .malemotion import MalemotionIE
 from .mdr import MDRIE
-from .megavideoz import MegaVideozIE
 from .metacafe import MetacafeIE
 from .metacritic import MetacriticIE
 from .mgoon import MgoonIE
diff --git a/youtube_dl/extractor/megavideoz.py b/youtube_dl/extractor/megavideoz.py
deleted file mode 100644
index af7ff07ea..000000000
--- a/youtube_dl/extractor/megavideoz.py
+++ /dev/null
@@ -1,56 +0,0 @@
-# encoding: utf-8
-from __future__ import unicode_literals
-
-import re
-
-from .common import InfoExtractor
-from ..utils import (
-    ExtractorError,
-    float_or_none,
-    xpath_text,
-)
-
-
-class MegaVideozIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?megavideoz\.eu/video/(?P<id>[^/]+)(?:/(?P<display_id>[^/]+))?'
-    _TEST = {
-        'url': 'http://megavideoz.eu/video/WM6UB919XMXH/SMPTE-Universal-Film-Leader',
-        'info_dict': {
-            'id': '48723',
-            'display_id': 'SMPTE-Universal-Film-Leader',
-            'ext': 'mp4',
-            'title': 'SMPTE Universal Film Leader',
-            'thumbnail': 're:https?://.*?\.jpg',
-            'duration': 10.93,
-        }
-    }
-
-    def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
-        display_id = mobj.group('display_id') or video_id
-
-        webpage = self._download_webpage(url, display_id)
-
-        if any(p in webpage for p in ('>Video Not Found<', '>404 Error<')):
-            raise ExtractorError('Video %s does not exist' % video_id, expected=True)
-
-        config = self._download_xml(
-            self._search_regex(
-                r"var\s+cnf\s*=\s*'([^']+)'", webpage, 'cnf url'),
-            display_id)
-
-        video_url = xpath_text(config, './file', 'video url', fatal=True)
-        title = xpath_text(config, './title', 'title', fatal=True)
-        thumbnail = xpath_text(config, './image', 'thumbnail')
-        duration = float_or_none(xpath_text(config, './duration', 'duration'))
-        video_id = xpath_text(config, './mediaid', 'video id') or video_id
-
-        return {
-            'id': video_id,
-            'display_id': display_id,
-            'url': video_url,
-            'title': title,
-            'thumbnail': thumbnail,
-            'duration': duration
-        }

From 1812afb7b396f4954d5d1ca1cec1c3f2d67550c6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com>
Date: Wed, 14 Oct 2015 22:35:01 +0600
Subject: [PATCH 18/83] [utils] Do not fail in int_or_none on non-numeric data
 (Closes #7175)

---
 youtube_dl/utils.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index 1dc3153fd..86c693358 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -1371,7 +1371,12 @@ def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1):
             v = getattr(v, get_attr, None)
     if v == '':
         v = None
-    return default if v is None else (int(v) * invscale // scale)
+    if v is None:
+        return default
+    try:
+        return int(v) * invscale // scale
+    except ValueError:
+        pass
 
 
 def str_or_none(v, default=None):

From caf80631f0c57b29187e2aa909fa1a3a6325d6e6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com>
Date: Wed, 14 Oct 2015 22:36:37 +0600
Subject: [PATCH 19/83] [utils] Do not fail in float_or_none on non-numeric
 data

---
 youtube_dl/utils.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index 86c693358..83b44caaa 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -1392,7 +1392,12 @@ def str_to_int(int_str):
 
 
 def float_or_none(v, scale=1, invscale=1, default=None):
-    return default if v is None else (float(v) * invscale / scale)
+    if v is None:
+        return default
+    try:
+        return float(v) * invscale / scale
+    except ValueError:
+        return default
 
 
 def parse_duration(s):

From af98f8ff37b3a0d9d1f743f4fc6c646333501eb6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com>
Date: Wed, 14 Oct 2015 22:37:03 +0600
Subject: [PATCH 20/83] [utils] Return default on fail in int_or_none

---
 youtube_dl/utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index 83b44caaa..7dbe25661 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -1376,7 +1376,7 @@ def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1):
     try:
         return int(v) * invscale // scale
     except ValueError:
-        pass
+        return default
 
 
 def str_or_none(v, default=None):

From 1db82381e38181aafbd78c65c58f005ad84cc08a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com>
Date: Thu, 15 Oct 2015 01:52:25 +0600
Subject: [PATCH 21/83] [channel9] Add low quality formats and modernize

---
 youtube_dl/extractor/channel9.py | 35 ++++++++++++--------------------
 1 file changed, 13 insertions(+), 22 deletions(-)

diff --git a/youtube_dl/extractor/channel9.py b/youtube_dl/extractor/channel9.py
index 1ce004932..3a88181d8 100644
--- a/youtube_dl/extractor/channel9.py
+++ b/youtube_dl/extractor/channel9.py
@@ -3,7 +3,11 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
-from ..utils import ExtractorError
+from ..utils import (
+    ExtractorError,
+    parse_filesize,
+    qualities,
+)
 
 
 class Channel9IE(InfoExtractor):
@@ -52,23 +56,6 @@ class Channel9IE(InfoExtractor):
 
     _RSS_URL = 'http://channel9.msdn.com/%s/RSS'
 
-    # Sorted by quality
-    _known_formats = ['MP3', 'MP4', 'Mid Quality WMV', 'Mid Quality MP4', 'High Quality WMV', 'High Quality MP4']
-
-    def _restore_bytes(self, formatted_size):
-        if not formatted_size:
-            return 0
-        m = re.match(r'^(?P<size>\d+(?:\.\d+)?)\s+(?P<units>[a-zA-Z]+)', formatted_size)
-        if not m:
-            return 0
-        units = m.group('units')
-        try:
-            exponent = ['B', 'KB', 'MB', 'GB', 'TB', 'PB', 'EB', 'ZB', 'YB'].index(units.upper())
-        except ValueError:
-            return 0
-        size = float(m.group('size'))
-        return int(size * (1024 ** exponent))
-
     def _formats_from_html(self, html):
         FORMAT_REGEX = r'''
             (?x)
@@ -78,16 +65,20 @@ class Channel9IE(InfoExtractor):
             <h3>File\s+size</h3>\s*(?P<filesize>.*?)\s*
             </div>)?                                                # File size part may be missing
         '''
-        # Extract known formats
+        quality = qualities((
+            'MP3', 'MP4',
+            'Low Quality WMV', 'Low Quality MP4',
+            'Mid Quality WMV', 'Mid Quality MP4',
+            'High Quality WMV', 'High Quality MP4'))
         formats = [{
             'url': x.group('url'),
             'format_id': x.group('quality'),
             'format_note': x.group('note'),
             'format': '%s (%s)' % (x.group('quality'), x.group('note')),
-            'filesize': self._restore_bytes(x.group('filesize')),  # File size is approximate
-            'preference': self._known_formats.index(x.group('quality')),
+            'filesize_approx': parse_filesize(x.group('filesize')),
+            'quality': quality(x.group('quality')),
             'vcodec': 'none' if x.group('note') == 'Audio only' else None,
-        } for x in list(re.finditer(FORMAT_REGEX, html)) if x.group('quality') in self._known_formats]
+        } for x in list(re.finditer(FORMAT_REGEX, html))]
 
         self._sort_formats(formats)
 

From a13d06de420f6968425d48030c37e1150ff9ed6a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com>
Date: Thu, 15 Oct 2015 01:57:59 +0600
Subject: [PATCH 22/83] [channel9] Add test for low quality mp4

---
 youtube_dl/extractor/channel9.py | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/youtube_dl/extractor/channel9.py b/youtube_dl/extractor/channel9.py
index 3a88181d8..554399787 100644
--- a/youtube_dl/extractor/channel9.py
+++ b/youtube_dl/extractor/channel9.py
@@ -51,6 +51,21 @@ class Channel9IE(InfoExtractor):
                 'thumbnail': 'http://video.ch9.ms/ch9/87e1/0300391f-a455-4c72-bec3-4422f19287e1/selfservicenuk_512.jpg',
                 'authors': ['Mike Wilmot'],
             },
+        },
+        {
+            # low quality mp4 is best
+            'url': 'https://channel9.msdn.com/Events/CPP/CppCon-2015/Ranges-for-the-Standard-Library',
+            'info_dict': {
+                'id': 'Events/CPP/CppCon-2015/Ranges-for-the-Standard-Library',
+                'ext': 'mp4',
+                'title': 'Ranges for the Standard Library',
+                'description': 'md5:2e6b4917677af3728c5f6d63784c4c5d',
+                'duration': 5646,
+                'thumbnail': 're:http://.*\.jpg',
+            },
+            'params': {
+                'skip_download': True,
+            },
         }
     ]
 

From fafc7950e2230bf25ac7c7563f1704cf8f134f64 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com>
Date: Thu, 15 Oct 2015 01:59:11 +0600
Subject: [PATCH 23/83] [channel9] Update tests' thumbnails

---
 youtube_dl/extractor/channel9.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/youtube_dl/extractor/channel9.py b/youtube_dl/extractor/channel9.py
index 554399787..c74553dcf 100644
--- a/youtube_dl/extractor/channel9.py
+++ b/youtube_dl/extractor/channel9.py
@@ -32,7 +32,7 @@ class Channel9IE(InfoExtractor):
                 'title': 'Developer Kick-Off Session: Stuff We Love',
                 'description': 'md5:c08d72240b7c87fcecafe2692f80e35f',
                 'duration': 4576,
-                'thumbnail': 'http://video.ch9.ms/ch9/9d51/03902f2d-fc97-4d3c-b195-0bfe15a19d51/KOS002_220.jpg',
+                'thumbnail': 're:http://.*\.jpg',
                 'session_code': 'KOS002',
                 'session_day': 'Day 1',
                 'session_room': 'Arena 1A',
@@ -48,7 +48,7 @@ class Channel9IE(InfoExtractor):
                 'title': 'Self-service BI with Power BI - nuclear testing',
                 'description': 'md5:d1e6ecaafa7fb52a2cacdf9599829f5b',
                 'duration': 1540,
-                'thumbnail': 'http://video.ch9.ms/ch9/87e1/0300391f-a455-4c72-bec3-4422f19287e1/selfservicenuk_512.jpg',
+                'thumbnail': 're:http://.*\.jpg',
                 'authors': ['Mike Wilmot'],
             },
         },

From 6744f36db710eebe2ccc633e7f4f6132b968b0ec Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com>
Date: Fri, 16 Oct 2015 08:44:19 +0600
Subject: [PATCH 24/83] [jeuxvideo] Fallback on og:title (Closes #7186, closes
 #7190)

---
 youtube_dl/extractor/jeuxvideo.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/youtube_dl/extractor/jeuxvideo.py b/youtube_dl/extractor/jeuxvideo.py
index 1df084d87..eef7daa29 100644
--- a/youtube_dl/extractor/jeuxvideo.py
+++ b/youtube_dl/extractor/jeuxvideo.py
@@ -28,7 +28,7 @@ class JeuxVideoIE(InfoExtractor):
         mobj = re.match(self._VALID_URL, url)
         title = mobj.group(1)
         webpage = self._download_webpage(url, title)
-        title = self._html_search_meta('name', webpage)
+        title = self._html_search_meta('name', webpage) or self._og_search_title(webpage)
         config_url = self._html_search_regex(
             r'data-src="(/contenu/medias/video.php.*?)"',
             webpage, 'config URL')

From 8daeeedc06f420e2a87ba4755b56e721391cedba Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com>
Date: Sat, 17 Oct 2015 00:26:45 +0600
Subject: [PATCH 25/83] [bbc] Fix FutureWarning

---
 youtube_dl/extractor/bbc.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/youtube_dl/extractor/bbc.py b/youtube_dl/extractor/bbc.py
index 68995f81e..1b3a33e4e 100644
--- a/youtube_dl/extractor/bbc.py
+++ b/youtube_dl/extractor/bbc.py
@@ -421,7 +421,7 @@ class BBCCoUkIE(InfoExtractor):
                 continue
             title = playlist.find('./{%s}title' % self._EMP_PLAYLIST_NS).text
             description_el = playlist.find('./{%s}summary' % self._EMP_PLAYLIST_NS)
-            description = description_el.text if description_el else None
+            description = description_el.text if description_el is not None else None
 
             def get_programme_id(item):
                 def get_from_attributes(item):

From 1e52776ac3ebbafc2ec4697f3bc6ba05b7e5a9f8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com>
Date: Sat, 17 Oct 2015 00:46:38 +0600
Subject: [PATCH 26/83] [bandcamp] Prepend download URL with scheme when
 necessary (2) (#7077)

---
 youtube_dl/extractor/bandcamp.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/youtube_dl/extractor/bandcamp.py b/youtube_dl/extractor/bandcamp.py
index a27f3e748..f19e19001 100644
--- a/youtube_dl/extractor/bandcamp.py
+++ b/youtube_dl/extractor/bandcamp.py
@@ -52,7 +52,7 @@ class BandcampIE(InfoExtractor):
                     ext, abr_str = format_id.split('-', 1)
                     formats.append({
                         'format_id': format_id,
-                        'url': format_url,
+                        'url': self._proto_relative_url(format_url, 'http:'),
                         'ext': ext,
                         'vcodec': 'none',
                         'acodec': ext,

From ba717dca97925a21870fedcb46358d06cd5485ec Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com>
Date: Sat, 17 Oct 2015 00:51:35 +0600
Subject: [PATCH 27/83] [bandcamp] Modernize

---
 youtube_dl/extractor/bandcamp.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/youtube_dl/extractor/bandcamp.py b/youtube_dl/extractor/bandcamp.py
index f19e19001..c1ef8051d 100644
--- a/youtube_dl/extractor/bandcamp.py
+++ b/youtube_dl/extractor/bandcamp.py
@@ -10,6 +10,8 @@ from ..compat import (
 )
 from ..utils import (
     ExtractorError,
+    float_or_none,
+    int_or_none,
 )
 
 
@@ -56,7 +58,7 @@ class BandcampIE(InfoExtractor):
                         'ext': ext,
                         'vcodec': 'none',
                         'acodec': ext,
-                        'abr': int(abr_str),
+                        'abr': int_or_none(abr_str),
                     })
 
                 self._sort_formats(formats)
@@ -65,7 +67,7 @@ class BandcampIE(InfoExtractor):
                     'id': compat_str(data['id']),
                     'title': data['title'],
                     'formats': formats,
-                    'duration': float(data['duration']),
+                    'duration': float_or_none(data.get('duration')),
                 }
             else:
                 raise ExtractorError('No free songs found')

From 246ce1085804ead9126328cb1ec761d308f561c0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com>
Date: Sat, 17 Oct 2015 01:08:23 +0600
Subject: [PATCH 28/83] [rte] Update _VALID_URL (Closes #7198)

---
 youtube_dl/extractor/rte.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/youtube_dl/extractor/rte.py b/youtube_dl/extractor/rte.py
index 04158b993..427c70866 100644
--- a/youtube_dl/extractor/rte.py
+++ b/youtube_dl/extractor/rte.py
@@ -9,7 +9,7 @@ from ..utils import (
 
 
 class RteIE(InfoExtractor):
-    _VALID_URL = r'http?://(?:www\.)?rte\.ie/player/[^/]{2,3}/show/(?P<id>[0-9]+)/'
+    _VALID_URL = r'http?://(?:www\.)?rte\.ie/player/[^/]{2,3}/show/[^/]+/(?P<id>[0-9]+)'
     _TEST = {
         'url': 'http://www.rte.ie/player/de/show/10363114/',
         'info_dict': {

From 2ccb37beb9e35ebbf2cdf65a4c1641e5286de1e8 Mon Sep 17 00:00:00 2001
From: Philipp Hagemeister <phihag@phihag.de>
Date: Fri, 16 Oct 2015 21:40:38 +0200
Subject: [PATCH 29/83] release 2015.10.16

---
 docs/supportedsites.md | 2 +-
 youtube_dl/version.py  | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/supportedsites.md b/docs/supportedsites.md
index dc0354095..47f7da86d 100644
--- a/docs/supportedsites.md
+++ b/docs/supportedsites.md
@@ -159,6 +159,7 @@
  - **facebook**
  - **faz.net**
  - **fc2**
+ - **Fczenit**
  - **fernsehkritik.tv**
  - **Firstpost**
  - **FiveTV**
@@ -281,7 +282,6 @@
  - **Malemotion**
  - **MDR**
  - **media.ccc.de**
- - **MegaVideoz**
  - **metacafe**
  - **Metacritic**
  - **Mgoon**
diff --git a/youtube_dl/version.py b/youtube_dl/version.py
index aaa43d315..31d2a9dc0 100644
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@@ -1,3 +1,3 @@
 from __future__ import unicode_literals
 
-__version__ = '2015.10.13'
+__version__ = '2015.10.16'

From 36eb802baffda9930e8c821e1adf94b0b53b5ac6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?=
 <jaime.marquinez.ferrandiz@gmail.com>
Date: Sat, 17 Oct 2015 11:49:51 +0200
Subject: [PATCH 30/83] [rte] Replace expired test

According to their webpage it should be available until October 2035.
---
 youtube_dl/extractor/rte.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/youtube_dl/extractor/rte.py b/youtube_dl/extractor/rte.py
index 427c70866..2811767b9 100644
--- a/youtube_dl/extractor/rte.py
+++ b/youtube_dl/extractor/rte.py
@@ -11,14 +11,14 @@ from ..utils import (
 class RteIE(InfoExtractor):
     _VALID_URL = r'http?://(?:www\.)?rte\.ie/player/[^/]{2,3}/show/[^/]+/(?P<id>[0-9]+)'
     _TEST = {
-        'url': 'http://www.rte.ie/player/de/show/10363114/',
+        'url': 'http://www.rte.ie/player/ie/show/iwitness-862/10478715/',
         'info_dict': {
-            'id': '10363114',
+            'id': '10478715',
             'ext': 'mp4',
-            'title': 'One News',
+            'title': 'Watch iWitness  online',
             'thumbnail': 're:^https?://.*\.jpg$',
-            'description': 'The One O\'Clock News followed by Weather.',
-            'duration': 436.844,
+            'description': 'iWitness : The spirit of Ireland, one voice and one minute at a time.',
+            'duration': 60.046,
         },
         'params': {
             'skip_download': 'f4m fails with --test atm'

From 6df7179e6c3c7df165b5788ecb5e712da122356f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?=
 <jaime.marquinez.ferrandiz@gmail.com>
Date: Sat, 17 Oct 2015 11:53:59 +0200
Subject: [PATCH 31/83] [rte] Actually recognize https urls

There was a missing 's' before the '?'.
---
 youtube_dl/extractor/rte.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/youtube_dl/extractor/rte.py b/youtube_dl/extractor/rte.py
index 2811767b9..d9cfbf180 100644
--- a/youtube_dl/extractor/rte.py
+++ b/youtube_dl/extractor/rte.py
@@ -9,7 +9,7 @@ from ..utils import (
 
 
 class RteIE(InfoExtractor):
-    _VALID_URL = r'http?://(?:www\.)?rte\.ie/player/[^/]{2,3}/show/[^/]+/(?P<id>[0-9]+)'
+    _VALID_URL = r'https?://(?:www\.)?rte\.ie/player/[^/]{2,3}/show/[^/]+/(?P<id>[0-9]+)'
     _TEST = {
         'url': 'http://www.rte.ie/player/ie/show/iwitness-862/10478715/',
         'info_dict': {

From fbd9f6ea804328d536aafd2b20a8afb72968e351 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com>
Date: Sat, 17 Oct 2015 18:28:21 +0600
Subject: [PATCH 32/83] [twitch] Improve authentication

---
 youtube_dl/extractor/twitch.py | 20 +++++++++++---------
 1 file changed, 11 insertions(+), 9 deletions(-)

diff --git a/youtube_dl/extractor/twitch.py b/youtube_dl/extractor/twitch.py
index 023911c41..891499a1f 100644
--- a/youtube_dl/extractor/twitch.py
+++ b/youtube_dl/extractor/twitch.py
@@ -15,6 +15,7 @@ from ..compat import (
     compat_urlparse,
 )
 from ..utils import (
+    encode_dict,
     ExtractorError,
     int_or_none,
     parse_duration,
@@ -27,8 +28,7 @@ class TwitchBaseIE(InfoExtractor):
 
     _API_BASE = 'https://api.twitch.tv'
     _USHER_BASE = 'http://usher.twitch.tv'
-    _LOGIN_URL = 'https://secure.twitch.tv/login'
-    _LOGIN_POST_URL = 'https://passport.twitch.tv/authentications/new'
+    _LOGIN_URL = 'http://www.twitch.tv/login'
     _NETRC_MACHINE = 'twitch'
 
     def _handle_error(self, response):
@@ -61,26 +61,28 @@ class TwitchBaseIE(InfoExtractor):
         if username is None:
             return
 
-        login_page = self._download_webpage(
+        login_page, handle = self._download_webpage_handle(
             self._LOGIN_URL, None, 'Downloading login page')
 
         login_form = self._hidden_inputs(login_page)
 
         login_form.update({
-            'login': username.encode('utf-8'),
-            'password': password.encode('utf-8'),
+            'username': username,
+            'password': password,
         })
 
+        redirect_url = handle.geturl()
+
         post_url = self._search_regex(
             r'<form[^>]+action=(["\'])(?P<url>.+?)\1', login_page,
-            'post url', default=self._LOGIN_POST_URL, group='url')
+            'post url', default=redirect_url, group='url')
 
         if not post_url.startswith('http'):
-            post_url = compat_urlparse.urljoin(self._LOGIN_URL, post_url)
+            post_url = compat_urlparse.urljoin(redirect_url, post_url)
 
         request = compat_urllib_request.Request(
-            post_url, compat_urllib_parse.urlencode(login_form).encode('utf-8'))
-        request.add_header('Referer', self._LOGIN_URL)
+            post_url, compat_urllib_parse.urlencode(encode_dict(login_form)).encode('utf-8'))
+        request.add_header('Referer', redirect_url)
         response = self._download_webpage(
             request, None, 'Logging in as %s' % username)
 

From e5e9966199c00a6b89f1f25e1c7b85effb032537 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com>
Date: Sat, 17 Oct 2015 18:29:54 +0600
Subject: [PATCH 33/83] [twitch:vod] Improve extraction

---
 youtube_dl/extractor/twitch.py | 20 +++++++++++++++-----
 1 file changed, 15 insertions(+), 5 deletions(-)

diff --git a/youtube_dl/extractor/twitch.py b/youtube_dl/extractor/twitch.py
index 891499a1f..21ea836ea 100644
--- a/youtube_dl/extractor/twitch.py
+++ b/youtube_dl/extractor/twitch.py
@@ -240,14 +240,24 @@ class TwitchVodIE(TwitchItemBaseIE):
 
     def _real_extract(self, url):
         item_id = self._match_id(url)
+
         info = self._download_info(self._ITEM_SHORTCUT, item_id)
         access_token = self._download_json(
-            '%s/api/vods/%s/access_token' % (self._API_BASE, item_id), item_id,
-            'Downloading %s access token' % self._ITEM_TYPE)
+           '%s/api/vods/%s/access_token' % (self._API_BASE, item_id), item_id,
+           'Downloading %s access token' % self._ITEM_TYPE)
+
         formats = self._extract_m3u8_formats(
-            '%s/vod/%s?nauth=%s&nauthsig=%s&allow_source=true'
-            % (self._USHER_BASE, item_id, access_token['token'], access_token['sig']),
-            item_id, 'mp4')
+           '%s/vod/%s?%s' % (
+               self._USHER_BASE, item_id,
+               compat_urllib_parse.urlencode({
+                    'allow_source': 'true',
+                    'allow_spectre': 'true',
+                    'player': 'twitchweb',
+                    'nauth': access_token['token'],
+                    'nauthsig': access_token['sig'],
+                })),
+           item_id, 'mp4')
+
         self._prefer_source(formats)
         info['formats'] = formats
 

From 350c9481336ac981eadc982b67ccdbc7e28ca0e3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com>
Date: Sat, 17 Oct 2015 18:43:12 +0600
Subject: [PATCH 34/83] [twitch:vod] Formatting

---
 youtube_dl/extractor/twitch.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/youtube_dl/extractor/twitch.py b/youtube_dl/extractor/twitch.py
index 21ea836ea..3ec08b674 100644
--- a/youtube_dl/extractor/twitch.py
+++ b/youtube_dl/extractor/twitch.py
@@ -243,20 +243,20 @@ class TwitchVodIE(TwitchItemBaseIE):
 
         info = self._download_info(self._ITEM_SHORTCUT, item_id)
         access_token = self._download_json(
-           '%s/api/vods/%s/access_token' % (self._API_BASE, item_id), item_id,
-           'Downloading %s access token' % self._ITEM_TYPE)
+            '%s/api/vods/%s/access_token' % (self._API_BASE, item_id), item_id,
+            'Downloading %s access token' % self._ITEM_TYPE)
 
         formats = self._extract_m3u8_formats(
-           '%s/vod/%s?%s' % (
-               self._USHER_BASE, item_id,
-               compat_urllib_parse.urlencode({
+            '%s/vod/%s?%s' % (
+                self._USHER_BASE, item_id,
+                compat_urllib_parse.urlencode({
                     'allow_source': 'true',
                     'allow_spectre': 'true',
                     'player': 'twitchweb',
                     'nauth': access_token['token'],
                     'nauthsig': access_token['sig'],
                 })),
-           item_id, 'mp4')
+            item_id, 'mp4')
 
         self._prefer_source(formats)
         info['formats'] = formats

From 41a7b00f183844e93ae2ba46fb4021f257f3ce79 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Luk=C3=A1=C5=A1=20Lalinsk=C3=BD?= <lukas@oxygene.sk>
Date: Sat, 17 Oct 2015 18:18:40 +0200
Subject: [PATCH 35/83] [vimeo] Extract config URL from (new?) React-based
 Vimeo's page

---
 youtube_dl/extractor/vimeo.py | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py
index fa1b22049..88e462a4d 100644
--- a/youtube_dl/extractor/vimeo.py
+++ b/youtube_dl/extractor/vimeo.py
@@ -286,7 +286,14 @@ class VimeoIE(VimeoBaseInfoExtractor):
         try:
             try:
                 config_url = self._html_search_regex(
-                    r' data-config-url="(.+?)"', webpage, 'config URL')
+                    r' data-config-url="(.+?)"', webpage,
+                    'config URL', default=None)
+                if not config_url:
+                    # New react-based page
+                    vimeo_clip_page_config = self._search_regex(
+                        r'vimeo\.clip_page_config\s*=\s*({.+?});', webpage,
+                        'vimeo clip page config')
+                    config_url = self._parse_json(vimeo_clip_page_config, video_id)['player']['config_url']
                 config_json = self._download_webpage(config_url, video_id)
                 config = json.loads(config_json)
             except RegexNotFoundError:

From dd8417526b13c541e6db8f4200e717b8922a1620 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com>
Date: Sat, 17 Oct 2015 22:48:14 +0600
Subject: [PATCH 36/83] [vimeo] Clarify new react+flux website fallback

---
 youtube_dl/extractor/vimeo.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py
index 88e462a4d..0f84656c0 100644
--- a/youtube_dl/extractor/vimeo.py
+++ b/youtube_dl/extractor/vimeo.py
@@ -289,11 +289,14 @@ class VimeoIE(VimeoBaseInfoExtractor):
                     r' data-config-url="(.+?)"', webpage,
                     'config URL', default=None)
                 if not config_url:
-                    # New react-based page
+                    # Sometimes new react-based page is served instead of old one that require
+                    # different config URL extraction approach (see
+                    # https://github.com/rg3/youtube-dl/pull/7209)
                     vimeo_clip_page_config = self._search_regex(
                         r'vimeo\.clip_page_config\s*=\s*({.+?});', webpage,
                         'vimeo clip page config')
-                    config_url = self._parse_json(vimeo_clip_page_config, video_id)['player']['config_url']
+                    config_url = self._parse_json(
+                        vimeo_clip_page_config, video_id)['player']['config_url']
                 config_json = self._download_webpage(config_url, video_id)
                 config = json.loads(config_json)
             except RegexNotFoundError:

From 59fe4824f80b7e266ea9918ae1b2e49a456b869f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Luk=C3=A1=C5=A1=20Lalinsk=C3=BD?= <lukas@oxygene.sk>
Date: Sat, 17 Oct 2015 18:52:25 +0200
Subject: [PATCH 37/83] [vidme] Better error message for suspended vidme videos

---
 youtube_dl/extractor/vidme.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/youtube_dl/extractor/vidme.py b/youtube_dl/extractor/vidme.py
index 078d283b2..81dcaa231 100644
--- a/youtube_dl/extractor/vidme.py
+++ b/youtube_dl/extractor/vidme.py
@@ -114,6 +114,12 @@ class VidmeIE(InfoExtractor):
 
         video = response['video']
 
+        if video.get('state') == 'user-disabled':
+            raise ExtractorError(
+                'Vidme said: This video has been suspended either due to a copyright claim, '
+                'or for violating the terms of use.',
+                expected=True)
+
         formats = [{
             'format_id': f.get('type'),
             'url': f['uri'],

From 9eb31b265f65ec6b04a508702af1a6feddafb8fe Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com>
Date: Sat, 17 Oct 2015 23:01:24 +0600
Subject: [PATCH 38/83] [vidme] Add user-disabled test

---
 youtube_dl/extractor/vidme.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/youtube_dl/extractor/vidme.py b/youtube_dl/extractor/vidme.py
index 81dcaa231..382517a4a 100644
--- a/youtube_dl/extractor/vidme.py
+++ b/youtube_dl/extractor/vidme.py
@@ -93,6 +93,10 @@ class VidmeIE(InfoExtractor):
         'params': {
             'skip_download': True,
         },
+    }, {
+        # nsfw, user-disabled
+        'url': 'https://vid.me/dzGJ',
+        'only_matching': True,
     }]
 
     def _real_extract(self, url):

From 583882fdce19f8c565402f42523b275f96c91575 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Luk=C3=A1=C5=A1=20Lalinsk=C3=BD?= <lukas@oxygene.sk>
Date: Sat, 17 Oct 2015 19:26:30 +0200
Subject: [PATCH 39/83] [dailymotion] Report errors from player v5

---
 youtube_dl/extractor/dailymotion.py | 17 ++++++++++++++---
 1 file changed, 14 insertions(+), 3 deletions(-)

diff --git a/youtube_dl/extractor/dailymotion.py b/youtube_dl/extractor/dailymotion.py
index 80a05cfee..ea1edceb1 100644
--- a/youtube_dl/extractor/dailymotion.py
+++ b/youtube_dl/extractor/dailymotion.py
@@ -96,6 +96,11 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
                 'uploader': 'HotWaves1012',
                 'age_limit': 18,
             }
+        },
+        # geo-restricted, player v5
+        {
+            'url': 'http://www.dailymotion.com/video/xhza0o',
+            'only_matching': True,
         }
     ]
 
@@ -124,6 +129,9 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
         if player_v5:
             player = self._parse_json(player_v5, video_id)
             metadata = player['metadata']
+
+            self._check_error(metadata)
+
             formats = []
             for quality, media_list in metadata['qualities'].items():
                 for media in media_list:
@@ -201,9 +209,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
                 'video info', flags=re.MULTILINE),
             video_id)
 
-        if info.get('error') is not None:
-            msg = 'Couldn\'t get video, Dailymotion says: %s' % info['error']['title']
-            raise ExtractorError(msg, expected=True)
+        self._check_error(info)
 
         formats = []
         for (key, format_id) in self._FORMATS:
@@ -246,6 +252,11 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
             'duration': info['duration']
         }
 
+    def _check_error(self, info):
+        if info.get('error') is not None:
+            msg = 'Couldn\'t get video, Dailymotion says: %s' % info['error']['title']
+            raise ExtractorError(msg, expected=True)
+
     def _get_subtitles(self, video_id, webpage):
         try:
             sub_list = self._download_webpage(

From 648e6a1ffe45ceae2995c3f9ec6a9413aad55640 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com>
Date: Sun, 18 Oct 2015 00:11:34 +0600
Subject: [PATCH 40/83] [youtube] Generalize playlist entries extraction
 (Closes #6699, closes #6992)

---
 youtube_dl/extractor/youtube.py | 121 ++++++++++++++------------------
 1 file changed, 52 insertions(+), 69 deletions(-)

diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index b252e36e1..08e821362 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -178,6 +178,52 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
             return
 
 
+class YoutubePlaylistBaseInfoExtractor(InfoExtractor):
+    # Extract the video ids from the playlist pages
+    def _entries(self, page, playlist_id):
+        more_widget_html = content_html = page
+        for page_num in itertools.count(1):
+            for video_id, video_title in self.extract_videos_from_page(content_html):
+                yield self.url_result(
+                    video_id, 'Youtube', video_id=video_id,
+                    video_title=video_title)
+
+            mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
+            if not mobj:
+                break
+
+            more = self._download_json(
+                'https://youtube.com/%s' % mobj.group('more'), playlist_id,
+                'Downloading page #%s' % page_num,
+                transform_source=uppercase_escape)
+            content_html = more['content_html']
+            if not content_html.strip():
+                # Some webpages show a "Load more" button but they don't
+                # have more videos
+                break
+            more_widget_html = more['load_more_widget_html']
+
+    def extract_videos_from_page(self, page):
+        ids_in_page = []
+        titles_in_page = []
+        for mobj in re.finditer(self._VIDEO_RE, page):
+            # The link with index 0 is not the first video of the playlist (not sure if still actual)
+            if 'index' in mobj.groupdict() and mobj.group('id') == '0':
+                continue
+            video_id = mobj.group('id')
+            video_title = unescapeHTML(mobj.group('title'))
+            if video_title:
+                video_title = video_title.strip()
+            try:
+                idx = ids_in_page.index(video_id)
+                if video_title and not titles_in_page[idx]:
+                    titles_in_page[idx] = video_title
+            except ValueError:
+                ids_in_page.append(video_id)
+                titles_in_page.append(video_title)
+        return zip(ids_in_page, titles_in_page)
+
+
 class YoutubeIE(YoutubeBaseInfoExtractor):
     IE_DESC = 'YouTube.com'
     _VALID_URL = r"""(?x)^
@@ -1419,7 +1465,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
         }
 
 
-class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
+class YoutubePlaylistIE(YoutubeBaseInfoExtractor, YoutubePlaylistBaseInfoExtractor):
     IE_DESC = 'YouTube.com playlists'
     _VALID_URL = r"""(?x)(?:
                         (?:https?://)?
@@ -1440,7 +1486,7 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
                         ((?:PL|LL|EC|UU|FL|RD|UL)[0-9A-Za-z-_]{10,})
                      )"""
     _TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s'
-    _VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})&amp;[^"]*?index=(?P<index>\d+)'
+    _VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})&amp;[^"]*?index=(?P<index>\d+)(?:[^>]+>(?P<title>[^<]+))?'
     IE_NAME = 'youtube:playlist'
     _TESTS = [{
         'url': 'https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re',
@@ -1557,37 +1603,11 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
             else:
                 self.report_warning('Youtube gives an alert message: ' + match)
 
-        # Extract the video ids from the playlist pages
-        def _entries():
-            more_widget_html = content_html = page
-            for page_num in itertools.count(1):
-                matches = re.finditer(self._VIDEO_RE, content_html)
-                # We remove the duplicates and the link with index 0
-                # (it's not the first video of the playlist)
-                new_ids = orderedSet(m.group('id') for m in matches if m.group('index') != '0')
-                for vid_id in new_ids:
-                    yield self.url_result(vid_id, 'Youtube', video_id=vid_id)
-
-                mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
-                if not mobj:
-                    break
-
-                more = self._download_json(
-                    'https://youtube.com/%s' % mobj.group('more'), playlist_id,
-                    'Downloading page #%s' % page_num,
-                    transform_source=uppercase_escape)
-                content_html = more['content_html']
-                if not content_html.strip():
-                    # Some webpages show a "Load more" button but they don't
-                    # have more videos
-                    break
-                more_widget_html = more['load_more_widget_html']
-
         playlist_title = self._html_search_regex(
             r'(?s)<h1 class="pl-header-title[^"]*">\s*(.*?)\s*</h1>',
             page, 'title')
 
-        return self.playlist_result(_entries(), playlist_id, playlist_title)
+        return self.playlist_result(self._entries(page, playlist_id), playlist_id, playlist_title)
 
     def _real_extract(self, url):
         # Extract playlist id
@@ -1613,10 +1633,11 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
         return self._extract_playlist(playlist_id)
 
 
-class YoutubeChannelIE(InfoExtractor):
+class YoutubeChannelIE(YoutubePlaylistBaseInfoExtractor):
     IE_DESC = 'YouTube.com channels'
     _VALID_URL = r'https?://(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com)/channel/(?P<id>[0-9A-Za-z_-]+)'
     _TEMPLATE_URL = 'https://www.youtube.com/channel/%s/videos'
+    _VIDEO_RE = r'(?:title="(?P<title>[^"]+)"[^>]+)?href="/watch\?v=(?P<id>[0-9A-Za-z_-]+)&?'
     IE_NAME = 'youtube:channel'
     _TESTS = [{
         'note': 'paginated channel',
@@ -1627,22 +1648,6 @@ class YoutubeChannelIE(InfoExtractor):
         }
     }]
 
-    @staticmethod
-    def extract_videos_from_page(page):
-        ids_in_page = []
-        titles_in_page = []
-        for mobj in re.finditer(r'(?:title="(?P<title>[^"]+)"[^>]+)?href="/watch\?v=(?P<id>[0-9A-Za-z_-]+)&?', page):
-            video_id = mobj.group('id')
-            video_title = unescapeHTML(mobj.group('title'))
-            try:
-                idx = ids_in_page.index(video_id)
-                if video_title and not titles_in_page[idx]:
-                    titles_in_page[idx] = video_title
-            except ValueError:
-                ids_in_page.append(video_id)
-                titles_in_page.append(video_title)
-        return zip(ids_in_page, titles_in_page)
-
     def _real_extract(self, url):
         channel_id = self._match_id(url)
 
@@ -1685,29 +1690,7 @@ class YoutubeChannelIE(InfoExtractor):
                 for video_id, video_title in self.extract_videos_from_page(channel_page)]
             return self.playlist_result(entries, channel_id)
 
-        def _entries():
-            more_widget_html = content_html = channel_page
-            for pagenum in itertools.count(1):
-
-                for video_id, video_title in self.extract_videos_from_page(content_html):
-                    yield self.url_result(
-                        video_id, 'Youtube', video_id=video_id,
-                        video_title=video_title)
-
-                mobj = re.search(
-                    r'data-uix-load-more-href="/?(?P<more>[^"]+)"',
-                    more_widget_html)
-                if not mobj:
-                    break
-
-                more = self._download_json(
-                    'https://youtube.com/%s' % mobj.group('more'), channel_id,
-                    'Downloading page #%s' % (pagenum + 1),
-                    transform_source=uppercase_escape)
-                content_html = more['content_html']
-                more_widget_html = more['load_more_widget_html']
-
-        return self.playlist_result(_entries(), channel_id)
+        return self.playlist_result(self._entries(channel_page, channel_id), channel_id)
 
 
 class YoutubeUserIE(YoutubeChannelIE):

From 8e5b1219489be399de55566090e145c89007fa48 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com>
Date: Sun, 18 Oct 2015 00:27:06 +0600
Subject: [PATCH 41/83] [test_youtube_lists] Add test flat playlist entries'
 titles

---
 test/test_youtube_lists.py | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/test/test_youtube_lists.py b/test/test_youtube_lists.py
index c889b6f15..26aadb34f 100644
--- a/test/test_youtube_lists.py
+++ b/test/test_youtube_lists.py
@@ -57,5 +57,14 @@ class TestYoutubeLists(unittest.TestCase):
         entries = result['entries']
         self.assertEqual(len(entries), 100)
 
+    def test_youtube_flat_playlist_titles(self):
+        dl = FakeYDL()
+        dl.params['extract_flat'] = True
+        ie = YoutubePlaylistIE(dl)
+        result = ie.extract('https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re')
+        self.assertIsPlaylist(result)
+        for entry in result['entries']:
+            self.assertTrue(entry.get('title'))
+
 if __name__ == '__main__':
     unittest.main()

From 7593fbaa126f8bf14eecff7f103cb497e3d31de5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com>
Date: Sun, 18 Oct 2015 01:00:37 +0600
Subject: [PATCH 42/83] [dailymotion] Error spelling

---
 youtube_dl/extractor/dailymotion.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/youtube_dl/extractor/dailymotion.py b/youtube_dl/extractor/dailymotion.py
index ea1edceb1..9cd9ff17d 100644
--- a/youtube_dl/extractor/dailymotion.py
+++ b/youtube_dl/extractor/dailymotion.py
@@ -254,8 +254,8 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
 
     def _check_error(self, info):
         if info.get('error') is not None:
-            msg = 'Couldn\'t get video, Dailymotion says: %s' % info['error']['title']
-            raise ExtractorError(msg, expected=True)
+            raise ExtractorError(
+                '%s said: %s' % (self.IE_NAME, info['error']['title']), expected=True)
 
     def _get_subtitles(self, video_id, webpage):
         try:

From 5a11b793fe70beb6b0c7a74a489db9e52c4a742b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com>
Date: Sun, 18 Oct 2015 01:36:03 +0600
Subject: [PATCH 43/83] [lynda] Extract all prioritized streams

---
 youtube_dl/extractor/lynda.py | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/youtube_dl/extractor/lynda.py b/youtube_dl/extractor/lynda.py
index 378117270..5c973e75c 100644
--- a/youtube_dl/extractor/lynda.py
+++ b/youtube_dl/extractor/lynda.py
@@ -140,13 +140,14 @@ class LyndaIE(LyndaBaseIE):
 
         prioritized_streams = video_json.get('PrioritizedStreams')
         if prioritized_streams:
-            formats.extend([
-                {
-                    'url': video_url,
-                    'width': int_or_none(format_id),
-                    'format_id': format_id,
-                } for format_id, video_url in prioritized_streams['0'].items()
-            ])
+            for prioritized_stream_id, prioritized_stream in prioritized_streams.items():
+                formats.extend([
+                    {
+                        'url': video_url,
+                        'width': int_or_none(format_id),
+                        'format_id': '%s-%s' % (prioritized_stream_id, format_id),
+                    } for format_id, video_url in prioritized_stream.items()
+                ])
 
         self._check_formats(formats, video_id)
         self._sort_formats(formats)

From 80f48920c8a909ba55d13932524e55ed970f1c6a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com>
Date: Sun, 18 Oct 2015 06:57:57 +0600
Subject: [PATCH 44/83] [crunchyroll] Bypass maturity wall (Closes #7202)

---
 youtube_dl/extractor/crunchyroll.py | 59 ++++++++++++++++++-----------
 1 file changed, 36 insertions(+), 23 deletions(-)

diff --git a/youtube_dl/extractor/crunchyroll.py b/youtube_dl/extractor/crunchyroll.py
index 95952bc29..aa258bbc2 100644
--- a/youtube_dl/extractor/crunchyroll.py
+++ b/youtube_dl/extractor/crunchyroll.py
@@ -32,6 +32,26 @@ from ..aes import (
 
 
 class CrunchyrollBaseIE(InfoExtractor):
+    _NETRC_MACHINE = 'crunchyroll'
+
+    def _login(self):
+        (username, password) = self._get_login_info()
+        if username is None:
+            return
+        self.report_login()
+        login_url = 'https://www.crunchyroll.com/?a=formhandler'
+        data = urlencode_postdata({
+            'formname': 'RpcApiUser_Login',
+            'name': username,
+            'password': password,
+        })
+        login_request = compat_urllib_request.Request(login_url, data)
+        login_request.add_header('Content-Type', 'application/x-www-form-urlencoded')
+        self._download_webpage(login_request, None, False, 'Wrong login info')
+
+    def _real_initialize(self):
+        self._login()
+
     def _download_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True, tries=1, timeout=5, encoding=None):
         request = (url_or_request if isinstance(url_or_request, compat_urllib_request.Request)
                    else compat_urllib_request.Request(url_or_request))
@@ -46,10 +66,22 @@ class CrunchyrollBaseIE(InfoExtractor):
         return super(CrunchyrollBaseIE, self)._download_webpage(
             request, video_id, note, errnote, fatal, tries, timeout, encoding)
 
+    @staticmethod
+    def _add_skip_wall(url):
+        parsed_url = compat_urlparse.urlparse(url)
+        qs = compat_urlparse.parse_qs(parsed_url.query)
+        # Always force skip_wall to bypass maturity wall, namely 18+ confirmation message:
+        # > This content may be inappropriate for some people.
+        # > Are you sure you want to continue?
+        # since it's not disabled by default in crunchyroll account's settings.
+        # See https://github.com/rg3/youtube-dl/issues/7202.
+        qs['skip_wall'] = ['1']
+        return compat_urlparse.urlunparse(
+            parsed_url._replace(query=compat_urllib_parse.urlencode(qs, True)))
+
 
 class CrunchyrollIE(CrunchyrollBaseIE):
     _VALID_URL = r'https?://(?:(?P<prefix>www|m)\.)?(?P<url>crunchyroll\.(?:com|fr)/(?:media(?:-|/\?id=)|[^/]*/[^/?&]*?)(?P<video_id>[0-9]+))(?:[/?&]|$)'
-    _NETRC_MACHINE = 'crunchyroll'
     _TESTS = [{
         'url': 'http://www.crunchyroll.com/wanna-be-the-strongest-in-the-world/episode-1-an-idol-wrestler-is-born-645513',
         'info_dict': {
@@ -81,7 +113,6 @@ class CrunchyrollIE(CrunchyrollBaseIE):
             # rtmp
             'skip_download': True,
         },
-
     }, {
         'url': 'http://www.crunchyroll.fr/girl-friend-beta/episode-11-goodbye-la-mode-661697',
         'only_matching': True,
@@ -94,24 +125,6 @@ class CrunchyrollIE(CrunchyrollBaseIE):
         '1080': ('80', '108'),
     }
 
-    def _login(self):
-        (username, password) = self._get_login_info()
-        if username is None:
-            return
-        self.report_login()
-        login_url = 'https://www.crunchyroll.com/?a=formhandler'
-        data = urlencode_postdata({
-            'formname': 'RpcApiUser_Login',
-            'name': username,
-            'password': password,
-        })
-        login_request = compat_urllib_request.Request(login_url, data)
-        login_request.add_header('Content-Type', 'application/x-www-form-urlencoded')
-        self._download_webpage(login_request, None, False, 'Wrong login info')
-
-    def _real_initialize(self):
-        self._login()
-
     def _decrypt_subtitles(self, data, iv, id):
         data = bytes_to_intlist(base64.b64decode(data.encode('utf-8')))
         iv = bytes_to_intlist(base64.b64decode(iv.encode('utf-8')))
@@ -254,7 +267,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
         else:
             webpage_url = 'http://www.' + mobj.group('url')
 
-        webpage = self._download_webpage(webpage_url, video_id, 'Downloading webpage')
+        webpage = self._download_webpage(self._add_skip_wall(webpage_url), video_id, 'Downloading webpage')
         note_m = self._html_search_regex(
             r'<div class="showmedia-trailer-notice">(.+?)</div>',
             webpage, 'trailer-notice', default='')
@@ -352,7 +365,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
 
 class CrunchyrollShowPlaylistIE(CrunchyrollBaseIE):
     IE_NAME = "crunchyroll:playlist"
-    _VALID_URL = r'https?://(?:(?P<prefix>www|m)\.)?(?P<url>crunchyroll\.com/(?!(?:news|anime-news|library|forum|launchcalendar|lineup|store|comics|freetrial|login))(?P<id>[\w\-]+))/?$'
+    _VALID_URL = r'https?://(?:(?P<prefix>www|m)\.)?(?P<url>crunchyroll\.com/(?!(?:news|anime-news|library|forum|launchcalendar|lineup|store|comics|freetrial|login))(?P<id>[\w\-]+))/?(?:\?|$)'
 
     _TESTS = [{
         'url': 'http://www.crunchyroll.com/a-bridge-to-the-starry-skies-hoshizora-e-kakaru-hashi',
@@ -366,7 +379,7 @@ class CrunchyrollShowPlaylistIE(CrunchyrollBaseIE):
     def _real_extract(self, url):
         show_id = self._match_id(url)
 
-        webpage = self._download_webpage(url, show_id)
+        webpage = self._download_webpage(self._add_skip_wall(url), show_id)
         title = self._html_search_regex(
             r'(?s)<h1[^>]*>\s*<span itemprop="name">(.*?)</span>',
             webpage, 'title')

From 49941c4e4f6e33785a3be1e0d103bd81657d8a0d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com>
Date: Sun, 18 Oct 2015 07:06:47 +0600
Subject: [PATCH 45/83] [crunchyroll] Add maturity wall reference tests (#7202)

---
 youtube_dl/extractor/crunchyroll.py | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/youtube_dl/extractor/crunchyroll.py b/youtube_dl/extractor/crunchyroll.py
index aa258bbc2..cecd0c784 100644
--- a/youtube_dl/extractor/crunchyroll.py
+++ b/youtube_dl/extractor/crunchyroll.py
@@ -116,6 +116,10 @@ class CrunchyrollIE(CrunchyrollBaseIE):
     }, {
         'url': 'http://www.crunchyroll.fr/girl-friend-beta/episode-11-goodbye-la-mode-661697',
         'only_matching': True,
+    }, {
+        # geo-restricted (US), 18+ maturity wall, non-premium available
+        'url': 'http://www.crunchyroll.com/cosplay-complex-ova/episode-1-the-birth-of-the-cosplay-club-565617',
+        'only_matching': True,
     }]
 
     _FORMAT_IDS = {
@@ -374,6 +378,19 @@ class CrunchyrollShowPlaylistIE(CrunchyrollBaseIE):
             'title': 'A Bridge to the Starry Skies - Hoshizora e Kakaru Hashi'
         },
         'playlist_count': 13,
+    }, {
+        # geo-restricted (US), 18+ maturity wall, non-premium available
+        'url': 'http://www.crunchyroll.com/cosplay-complex-ova',
+        'info_dict': {
+            'id': 'cosplay-complex-ova',
+            'title': 'Cosplay Complex OVA'
+        },
+        'playlist_count': 3,
+        'skip': 'Georestricted',
+    }, {
+        # geo-restricted (US), 18+ maturity wall, non-premium will be available since 2015.11.14
+        'url': 'http://www.crunchyroll.com/ladies-versus-butlers?skip_wall=1',
+        'only_matching': True,
     }]
 
     def _real_extract(self, url):

From 448ef1f31c8bcc1550cf907fd46e31026ec981b2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com>
Date: Sun, 18 Oct 2015 09:11:02 +0600
Subject: [PATCH 46/83] [extractor/common] Allow angle brackets in attributes
 in _og_regexes (#7215)

---
 test/test_InfoExtractor.py     | 4 ++++
 youtube_dl/extractor/common.py | 2 +-
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/test/test_InfoExtractor.py b/test/test_InfoExtractor.py
index 2a00d09a5..938466a80 100644
--- a/test/test_InfoExtractor.py
+++ b/test/test_InfoExtractor.py
@@ -37,12 +37,16 @@ class TestInfoExtractor(unittest.TestCase):
             <meta property='og:image' content='http://domain.com/pic.jpg?key1=val1&amp;key2=val2'/>
             <meta content='application/x-shockwave-flash' property='og:video:type'>
             <meta content='Foo' property=og:foobar>
+            <meta name="og:test1" content='foo > < bar'/>
+            <meta name="og:test2" content="foo >//< bar"/>
             '''
         self.assertEqual(ie._og_search_title(html), 'Foo')
         self.assertEqual(ie._og_search_description(html), 'Some video\'s description ')
         self.assertEqual(ie._og_search_thumbnail(html), 'http://domain.com/pic.jpg?key1=val1&key2=val2')
         self.assertEqual(ie._og_search_video_url(html, default=None), None)
         self.assertEqual(ie._og_search_property('foobar', html), 'Foo')
+        self.assertEqual(ie._og_search_property('test1', html), 'foo > < bar')
+        self.assertEqual(ie._og_search_property('test2', html), 'foo >//< bar')
 
     def test_html_search_meta(self):
         ie = self.ie
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index a0c4af92f..4365077f1 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -645,7 +645,7 @@ class InfoExtractor(object):
     # Helper functions for extracting OpenGraph info
     @staticmethod
     def _og_regexes(prop):
-        content_re = r'content=(?:"([^>]+?)"|\'([^>]+?)\'|\s*([^\s"\'=<>`]+?))'
+        content_re = r'content=(?:"([^"]+?)"|\'([^\']+?)\'|\s*([^\s"\'=<>`]+?))'
         property_re = (r'(?:name|property)=(?:\'og:%(prop)s\'|"og:%(prop)s"|\s*og:%(prop)s\b)'
                        % {'prop': re.escape(prop)})
         template = r'<meta[^>]+?%s[^>]+?%s'

From 94a773feb94a20be66526348a57ebe20495eba3f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Luk=C3=A1=C5=A1=20Lalinsk=C3=BD?= <lukas@oxygene.sk>
Date: Sat, 17 Oct 2015 22:25:08 +0200
Subject: [PATCH 47/83] [vine] Use JS data to get title/alt_title

---
 youtube_dl/extractor/vine.py | 19 +++++++++++++++++--
 1 file changed, 17 insertions(+), 2 deletions(-)

diff --git a/youtube_dl/extractor/vine.py b/youtube_dl/extractor/vine.py
index c733a48fa..d80b580a0 100644
--- a/youtube_dl/extractor/vine.py
+++ b/youtube_dl/extractor/vine.py
@@ -51,6 +51,21 @@ class VineIE(InfoExtractor):
     }, {
         'url': 'https://vine.co/oembed/MYxVapFvz2z.json',
         'only_matching': True,
+    }, {
+        'url': 'https://vine.co/v/e192BnZnZ9V',
+        'info_dict': {
+            'id': 'e192BnZnZ9V',
+            'ext': 'mp4',
+            'title': u'\u0e22\u0e34\u0e49\u0e21~ \u0e40\u0e02\u0e34\u0e19~ \u0e2d\u0e32\u0e22~ \u0e19\u0e48\u0e32\u0e23\u0e49\u0e32\u0e01\u0e2d\u0e49\u0e30 >//< @n_whitewo @orlameena #lovesicktheseries  #lovesickseason2',
+            'alt_title': 'Vine by Pimry_zaa',
+            'description': u'\u0e22\u0e34\u0e49\u0e21~ \u0e40\u0e02\u0e34\u0e19~ \u0e2d\u0e32\u0e22~ \u0e19\u0e48\u0e32\u0e23\u0e49\u0e32\u0e01\u0e2d\u0e49\u0e30 >//< @n_whitewo @orlameena #lovesicktheseries  #lovesickseason2',
+            'upload_date': '20150705',
+            'uploader': 'Pimry_zaa',
+            'uploader_id': '1135760698325307392',
+        },
+        'params': {
+            'skip_download': True,
+        },
     }]
 
     def _real_extract(self, url):
@@ -74,8 +89,8 @@ class VineIE(InfoExtractor):
 
         return {
             'id': video_id,
-            'title': self._og_search_title(webpage),
-            'alt_title': self._og_search_description(webpage, default=None),
+            'title': data['description'],
+            'alt_title': 'Vine by %s' % data['username'],
             'description': data['description'],
             'thumbnail': data['thumbnailUrl'],
             'upload_date': unified_strdate(data['created']),

From 10c38c7ca248d06c2c0f069c5a810e27e207c61e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Luk=C3=A1=C5=A1=20Lalinsk=C3=BD?= <lukas@oxygene.sk>
Date: Sat, 17 Oct 2015 22:29:49 +0200
Subject: [PATCH 48/83] [vine] Fix download tests

---
 youtube_dl/extractor/vine.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/youtube_dl/extractor/vine.py b/youtube_dl/extractor/vine.py
index d80b580a0..d1dbec893 100644
--- a/youtube_dl/extractor/vine.py
+++ b/youtube_dl/extractor/vine.py
@@ -29,10 +29,10 @@ class VineIE(InfoExtractor):
             'id': 'MYxVapFvz2z',
             'ext': 'mp4',
             'title': 'Fuck Da Police #Mikebrown #justice #ferguson #prayforferguson #protesting #NMOS14',
-            'alt_title': 'Vine by Luna',
+            'alt_title': 'Vine by Mars Ruiz',
             'description': 'Fuck Da Police #Mikebrown #justice #ferguson #prayforferguson #protesting #NMOS14',
             'upload_date': '20140815',
-            'uploader': 'Luna',
+            'uploader': 'Mars Ruiz',
             'uploader_id': '1102363502380728320',
         },
     }, {

From 91816e8f16408a3a2753fb254a9e963ad9429ced Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com>
Date: Sun, 18 Oct 2015 09:32:08 +0600
Subject: [PATCH 49/83] [vine] Remove duplicate metadata, make more robust and
 modernize (Closes #7215)

---
 youtube_dl/extractor/vine.py | 39 ++++++++++++++++++------------------
 1 file changed, 20 insertions(+), 19 deletions(-)

diff --git a/youtube_dl/extractor/vine.py b/youtube_dl/extractor/vine.py
index d1dbec893..6e72cc253 100644
--- a/youtube_dl/extractor/vine.py
+++ b/youtube_dl/extractor/vine.py
@@ -1,10 +1,14 @@
+# coding: utf-8
 from __future__ import unicode_literals
 
 import re
 import itertools
 
 from .common import InfoExtractor
-from ..utils import unified_strdate
+from ..utils import (
+    int_or_none,
+    unified_strdate,
+)
 
 
 class VineIE(InfoExtractor):
@@ -17,7 +21,6 @@ class VineIE(InfoExtractor):
             'ext': 'mp4',
             'title': 'Chicken.',
             'alt_title': 'Vine by Jack Dorsey',
-            'description': 'Chicken.',
             'upload_date': '20130519',
             'uploader': 'Jack Dorsey',
             'uploader_id': '76',
@@ -30,7 +33,6 @@ class VineIE(InfoExtractor):
             'ext': 'mp4',
             'title': 'Fuck Da Police #Mikebrown #justice #ferguson #prayforferguson #protesting #NMOS14',
             'alt_title': 'Vine by Mars Ruiz',
-            'description': 'Fuck Da Police #Mikebrown #justice #ferguson #prayforferguson #protesting #NMOS14',
             'upload_date': '20140815',
             'uploader': 'Mars Ruiz',
             'uploader_id': '1102363502380728320',
@@ -43,7 +45,6 @@ class VineIE(InfoExtractor):
             'ext': 'mp4',
             'title': '#mw3 #ac130 #killcam #angelofdeath',
             'alt_title': 'Vine by Z3k3',
-            'description': '#mw3 #ac130 #killcam #angelofdeath',
             'upload_date': '20130430',
             'uploader': 'Z3k3',
             'uploader_id': '936470460173008896',
@@ -56,9 +57,8 @@ class VineIE(InfoExtractor):
         'info_dict': {
             'id': 'e192BnZnZ9V',
             'ext': 'mp4',
-            'title': u'\u0e22\u0e34\u0e49\u0e21~ \u0e40\u0e02\u0e34\u0e19~ \u0e2d\u0e32\u0e22~ \u0e19\u0e48\u0e32\u0e23\u0e49\u0e32\u0e01\u0e2d\u0e49\u0e30 >//< @n_whitewo @orlameena #lovesicktheseries  #lovesickseason2',
+            'title': 'ยิ้ม~ เขิน~ อาย~ น่าร้ากอ้ะ >//< @n_whitewo @orlameena #lovesicktheseries  #lovesickseason2',
             'alt_title': 'Vine by Pimry_zaa',
-            'description': u'\u0e22\u0e34\u0e49\u0e21~ \u0e40\u0e02\u0e34\u0e19~ \u0e2d\u0e32\u0e22~ \u0e19\u0e48\u0e32\u0e23\u0e49\u0e32\u0e01\u0e2d\u0e49\u0e30 >//< @n_whitewo @orlameena #lovesicktheseries  #lovesickseason2',
             'upload_date': '20150705',
             'uploader': 'Pimry_zaa',
             'uploader_id': '1135760698325307392',
@@ -80,25 +80,26 @@ class VineIE(InfoExtractor):
 
         formats = [{
             'format_id': '%(format)s-%(rate)s' % f,
-            'vcodec': f['format'],
-            'quality': f['rate'],
+            'vcodec': f.get('format'),
+            'quality': f.get('rate'),
             'url': f['videoUrl'],
-        } for f in data['videoUrls']]
+        } for f in data['videoUrls'] if f.get('videoUrl')]
 
         self._sort_formats(formats)
 
+        username = data.get('username')
+
         return {
             'id': video_id,
-            'title': data['description'],
-            'alt_title': 'Vine by %s' % data['username'],
-            'description': data['description'],
-            'thumbnail': data['thumbnailUrl'],
-            'upload_date': unified_strdate(data['created']),
-            'uploader': data['username'],
-            'uploader_id': data['userIdStr'],
-            'like_count': data['likes']['count'],
-            'comment_count': data['comments']['count'],
-            'repost_count': data['reposts']['count'],
+            'title': data.get('description') or self._og_search_title(webpage),
+            'alt_title': 'Vine by %s' % username if username else self._og_search_description(webpage, default=None),
+            'thumbnail': data.get('thumbnailUrl'),
+            'upload_date': unified_strdate(data.get('created')),
+            'uploader': username,
+            'uploader_id': data.get('userIdStr'),
+            'like_count': int_or_none(data.get('likes', {}).get('count')),
+            'comment_count': int_or_none(data.get('comments', {}).get('count')),
+            'repost_count': int_or_none(data.get('reposts', {}).get('count')),
             'formats': formats,
         }
 

From 02835c6bf4403a907c058d43220a83b3b427e181 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com>
Date: Sun, 18 Oct 2015 09:34:54 +0600
Subject: [PATCH 50/83] [extractor/common] Document repost_count

---
 youtube_dl/extractor/common.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index 4365077f1..6169fbbeb 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -172,6 +172,7 @@ class InfoExtractor(object):
     view_count:     How many users have watched the video on the platform.
     like_count:     Number of positive ratings of the video
     dislike_count:  Number of negative ratings of the video
+    repost_count:   Number of reposts of the video
     average_rating: Average rating give by users, the scale used depends on the webpage
     comment_count:  Number of comments on the video
     comments:       A list of comments, each with one or more of the following

From 2e022397c45fbcfd2ef6da43d14b0770221aabd5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com>
Date: Sun, 18 Oct 2015 09:36:19 +0600
Subject: [PATCH 51/83] [vine] Add counters to tests

---
 youtube_dl/extractor/vine.py | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/youtube_dl/extractor/vine.py b/youtube_dl/extractor/vine.py
index 6e72cc253..be72f3147 100644
--- a/youtube_dl/extractor/vine.py
+++ b/youtube_dl/extractor/vine.py
@@ -24,6 +24,9 @@ class VineIE(InfoExtractor):
             'upload_date': '20130519',
             'uploader': 'Jack Dorsey',
             'uploader_id': '76',
+            'like_count': int,
+            'comment_count': int,
+            'repost_count': int,
         },
     }, {
         'url': 'https://vine.co/v/MYxVapFvz2z',
@@ -36,6 +39,9 @@ class VineIE(InfoExtractor):
             'upload_date': '20140815',
             'uploader': 'Mars Ruiz',
             'uploader_id': '1102363502380728320',
+            'like_count': int,
+            'comment_count': int,
+            'repost_count': int,
         },
     }, {
         'url': 'https://vine.co/v/bxVjBbZlPUH',
@@ -48,6 +54,9 @@ class VineIE(InfoExtractor):
             'upload_date': '20130430',
             'uploader': 'Z3k3',
             'uploader_id': '936470460173008896',
+            'like_count': int,
+            'comment_count': int,
+            'repost_count': int,
         },
     }, {
         'url': 'https://vine.co/oembed/MYxVapFvz2z.json',
@@ -62,6 +71,9 @@ class VineIE(InfoExtractor):
             'upload_date': '20150705',
             'uploader': 'Pimry_zaa',
             'uploader_id': '1135760698325307392',
+            'like_count': int,
+            'comment_count': int,
+            'repost_count': int,
         },
         'params': {
             'skip_download': True,

From 1e399778ee870ee583135e65458268cd7c0fb923 Mon Sep 17 00:00:00 2001
From: Yen Chi Hsuan <yan12125@gmail.com>
Date: Wed, 22 Jul 2015 20:03:05 +0800
Subject: [PATCH 52/83] [letv] Fix extraction

Using data URIs for passing the decrypted M3U8 manifest, which is
supported by ffmpeg only.
---
 youtube_dl/extractor/letv.py | 70 ++++++++++++++++++++++++++----------
 youtube_dl/utils.py          |  5 +++
 2 files changed, 57 insertions(+), 18 deletions(-)

diff --git a/youtube_dl/extractor/letv.py b/youtube_dl/extractor/letv.py
index a28abb0f0..9ebbc8089 100644
--- a/youtube_dl/extractor/letv.py
+++ b/youtube_dl/extractor/letv.py
@@ -9,13 +9,14 @@ from .common import InfoExtractor
 from ..compat import (
     compat_urllib_parse,
     compat_urllib_request,
-    compat_urlparse,
+    compat_ord,
 )
 from ..utils import (
     determine_ext,
     ExtractorError,
     parse_iso8601,
     int_or_none,
+    encode_data_uri,
 )
 
 
@@ -25,15 +26,16 @@ class LetvIE(InfoExtractor):
 
     _TESTS = [{
         'url': 'http://www.letv.com/ptv/vplay/22005890.html',
-        'md5': 'cab23bd68d5a8db9be31c9a222c1e8df',
+        'md5': 'edadcfe5406976f42f9f266057ee5e40',
         'info_dict': {
             'id': '22005890',
             'ext': 'mp4',
             'title': '第87届奥斯卡颁奖礼完美落幕 《鸟人》成最大赢家',
-            'timestamp': 1424747397,
-            'upload_date': '20150224',
             'description': 'md5:a9cb175fd753e2962176b7beca21a47c',
-        }
+        },
+        'params': {
+            'hls_prefer_native': True,
+        },
     }, {
         'url': 'http://www.letv.com/ptv/vplay/1415246.html',
         'info_dict': {
@@ -42,16 +44,22 @@ class LetvIE(InfoExtractor):
             'title': '美人天下01',
             'description': 'md5:f88573d9d7225ada1359eaf0dbf8bcda',
         },
+        'params': {
+            'hls_prefer_native': True,
+        },
     }, {
         'note': 'This video is available only in Mainland China, thus a proxy is needed',
         'url': 'http://www.letv.com/ptv/vplay/1118082.html',
-        'md5': 'f80936fbe20fb2f58648e81386ff7927',
+        'md5': '2424c74948a62e5f31988438979c5ad1',
         'info_dict': {
             'id': '1118082',
             'ext': 'mp4',
             'title': '与龙共舞 完整版',
             'description': 'md5:7506a5eeb1722bb9d4068f85024e3986',
         },
+        'params': {
+            'hls_prefer_native': True,
+        },
         'skip': 'Only available in China',
     }]
 
@@ -74,6 +82,27 @@ class LetvIE(InfoExtractor):
         _loc3_ = self.ror(_loc3_, _loc2_ % 17)
         return _loc3_
 
+    # see M3U8Encryption class in KLetvPlayer.swf
+    @staticmethod
+    def decrypt_m3u8(encrypted_data):
+        if encrypted_data[:5].decode('utf-8').lower() != 'vc_01':
+            return encrypted_data
+        encrypted_data = encrypted_data[5:]
+
+        _loc4_ = bytearray()
+        while encrypted_data:
+            b = compat_ord(encrypted_data[0])
+            _loc4_.extend([b // 16, b & 0x0f])
+            encrypted_data = encrypted_data[1:]
+        idx = len(_loc4_) - 11
+        _loc4_ = _loc4_[idx:] + _loc4_[:idx]
+        _loc7_ = bytearray()
+        while _loc4_:
+            _loc7_.append(_loc4_[0] * 16 + _loc4_[1])
+            _loc4_ = _loc4_[2:]
+
+        return bytes(_loc7_)
+
     def _real_extract(self, url):
         media_id = self._match_id(url)
         page = self._download_webpage(url, media_id)
@@ -115,23 +144,28 @@ class LetvIE(InfoExtractor):
         for format_id in formats:
             if format_id in dispatch:
                 media_url = playurl['domain'][0] + dispatch[format_id][0]
-
-                # Mimic what flvxz.com do
-                url_parts = list(compat_urlparse.urlparse(media_url))
-                qs = dict(compat_urlparse.parse_qs(url_parts[4]))
-                qs.update({
-                    'platid': '14',
-                    'splatid': '1401',
-                    'tss': 'no',
-                    'retry': 1
+                media_url += '&' + compat_urllib_parse.urlencode({
+                    'm3v': 1,
+                    'format': 1,
+                    'expect': 3,
+                    'rateid': format_id,
                 })
-                url_parts[4] = compat_urllib_parse.urlencode(qs)
-                media_url = compat_urlparse.urlunparse(url_parts)
+
+                nodes_data = self._download_json(
+                    media_url, media_id,
+                    'Download JSON metadata for format %s' % format_id)
+
+                req = self._request_webpage(
+                    nodes_data['nodelist'][0]['location'], media_id,
+                    note='Downloading m3u8 information for format %s' % format_id)
+
+                m3u8_data = self.decrypt_m3u8(req.read())
 
                 url_info_dict = {
-                    'url': media_url,
+                    'url': encode_data_uri(m3u8_data, 'application/x-mpegURL'),
                     'ext': determine_ext(dispatch[format_id][1]),
                     'format_id': format_id,
+                    'protocol': 'm3u8',
                 }
 
                 if format_id[-1:] == 'p':
diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index 7dbe25661..db5b3698e 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -3,6 +3,7 @@
 
 from __future__ import unicode_literals
 
+import base64
 import calendar
 import codecs
 import contextlib
@@ -1795,6 +1796,10 @@ def urlhandle_detect_ext(url_handle):
     return mimetype2ext(getheader('Content-Type'))
 
 
+def encode_data_uri(data, mime_type):
+    return 'data:%s;base64,%s' % (mime_type, base64.b64encode(data).decode('ascii'))
+
+
 def age_restricted(content_limit, age_limit):
     """ Returns True iff the content should be blocked """
 

From 985e4fdc07f00a3fdc8e7b7b4119471ee97f3890 Mon Sep 17 00:00:00 2001
From: Yen Chi Hsuan <yan12125@gmail.com>
Date: Sat, 17 Oct 2015 22:49:05 +0800
Subject: [PATCH 53/83] [downloader/hls] Add headers only for http(s) URLs

ffmpeg 2.8.1 raises an error with -headers and non-http input files.
---
 youtube_dl/downloader/hls.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/youtube_dl/downloader/hls.py b/youtube_dl/downloader/hls.py
index a62d2047b..9a83a73dd 100644
--- a/youtube_dl/downloader/hls.py
+++ b/youtube_dl/downloader/hls.py
@@ -30,7 +30,7 @@ class HlsFD(FileDownloader):
 
         args = [ffpp.executable, '-y']
 
-        if info_dict['http_headers']:
+        if info_dict['http_headers'] and re.match(r'^https?://', url):
             # Trailing \r\n after each HTTP header is important to prevent warning from ffmpeg/avconv:
             # [http @ 00000000003d2fa0] No trailing CRLF found in HTTP header.
             args += [

From 0a67a3632bb9cf76f64658986defc1947090ef50 Mon Sep 17 00:00:00 2001
From: Yen Chi Hsuan <yan12125@gmail.com>
Date: Sat, 17 Oct 2015 23:15:01 +0800
Subject: [PATCH 54/83] [compat] Add compat_urllib_request_DataHandler

---
 youtube_dl/compat.py | 44 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 44 insertions(+)

diff --git a/youtube_dl/compat.py b/youtube_dl/compat.py
index 192e1c515..d103ab9ad 100644
--- a/youtube_dl/compat.py
+++ b/youtube_dl/compat.py
@@ -1,7 +1,10 @@
 from __future__ import unicode_literals
 
+import binascii
 import collections
+import email
 import getpass
+import io
 import optparse
 import os
 import re
@@ -38,6 +41,11 @@ try:
 except ImportError:  # Python 2
     import urlparse as compat_urlparse
 
+try:
+    import urllib.response as compat_urllib_response
+except ImportError:  # Python 2
+    import urllib as compat_urllib_response
+
 try:
     import http.cookiejar as compat_cookiejar
 except ImportError:  # Python 2
@@ -155,6 +163,40 @@ except ImportError:  # Python 2
         string = string.replace('+', ' ')
         return compat_urllib_parse_unquote(string, encoding, errors)
 
+try:
+    from urllib.request import DataHandler as compat_urllib_request_DataHandler
+except ImportError:  # Python < 3.4
+    # Ported from CPython 98774:1733b3bd46db, Lib/urllib/request.py
+    class compat_urllib_request_DataHandler(compat_urllib_request.BaseHandler):
+        def data_open(self, req):
+            # data URLs as specified in RFC 2397.
+            #
+            # ignores POSTed data
+            #
+            # syntax:
+            # dataurl   := "data:" [ mediatype ] [ ";base64" ] "," data
+            # mediatype := [ type "/" subtype ] *( ";" parameter )
+            # data      := *urlchar
+            # parameter := attribute "=" value
+            url = req.get_full_url()
+
+            scheme, data = url.split(":", 1)
+            mediatype, data = data.split(",", 1)
+
+            # even base64 encoded data URLs might be quoted so unquote in any case:
+            data = compat_urllib_parse_unquote_to_bytes(data)
+            if mediatype.endswith(";base64"):
+                data = binascii.a2b_base64(data)
+                mediatype = mediatype[:-7]
+
+            if not mediatype:
+                mediatype = "text/plain;charset=US-ASCII"
+
+            headers = email.message_from_string(
+                "Content-type: %s\nContent-length: %d\n" % (mediatype, len(data)))
+
+            return compat_urllib_response.addinfourl(io.BytesIO(data), headers, url)
+
 try:
     compat_basestring = basestring  # Python 2
 except NameError:
@@ -489,6 +531,8 @@ __all__ = [
     'compat_urllib_parse_unquote_to_bytes',
     'compat_urllib_parse_urlparse',
     'compat_urllib_request',
+    'compat_urllib_request_DataHandler',
+    'compat_urllib_response',
     'compat_urlparse',
     'compat_urlretrieve',
     'compat_xml_parse_error',

From 8b172c2e10fb38c62c213673304c7e8dcd17b768 Mon Sep 17 00:00:00 2001
From: Yen Chi Hsuan <yan12125@gmail.com>
Date: Sat, 17 Oct 2015 23:16:40 +0800
Subject: [PATCH 55/83] [YoutubeDL] Use DataHandler

---
 youtube_dl/YoutubeDL.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py
index adf70d658..12977bf80 100755
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -37,6 +37,7 @@ from .compat import (
     compat_tokenize_tokenize,
     compat_urllib_error,
     compat_urllib_request,
+    compat_urllib_request_DataHandler,
 )
 from .utils import (
     ContentTooShortError,
@@ -1967,8 +1968,9 @@ class YoutubeDL(object):
         debuglevel = 1 if self.params.get('debug_printtraffic') else 0
         https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
         ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
+        data_handler = compat_urllib_request_DataHandler()
         opener = compat_urllib_request.build_opener(
-            proxy_handler, https_handler, cookie_processor, ydlh)
+            proxy_handler, https_handler, cookie_processor, ydlh, data_handler)
 
         # Delete the default user-agent header, which would otherwise apply in
         # cases where our custom HTTP handler doesn't come into play

From 48aae2d2cf49843d0efa227fa393a0c783fc3c1e Mon Sep 17 00:00:00 2001
From: Yen Chi Hsuan <yan12125@gmail.com>
Date: Sun, 18 Oct 2015 17:07:48 +0800
Subject: [PATCH 56/83] [twitter] Update tests

---
 youtube_dl/extractor/twitter.py | 19 ++++++++++---------
 1 file changed, 10 insertions(+), 9 deletions(-)

diff --git a/youtube_dl/extractor/twitter.py b/youtube_dl/extractor/twitter.py
index 1dd43ff3c..b2fff73b9 100644
--- a/youtube_dl/extractor/twitter.py
+++ b/youtube_dl/extractor/twitter.py
@@ -1,3 +1,4 @@
+# coding: utf-8
 from __future__ import unicode_literals
 
 import re
@@ -15,7 +16,7 @@ class TwitterCardIE(InfoExtractor):
     _TESTS = [
         {
             'url': 'https://twitter.com/i/cards/tfw/v1/560070183650213889',
-            'md5': 'a74f50b310c83170319ba16de6955192',
+            'md5': '7d2f6b4d2eb841a7ccc893d479bfceb4',
             'info_dict': {
                 'id': '560070183650213889',
                 'ext': 'mp4',
@@ -103,17 +104,17 @@ class TwitterIE(TwitterCardIE):
     _VALID_URL = r'https?://(?:www|m|mobile)?\.?twitter\.com/(?P<id>[^/]+/status/\d+)'
 
     _TESTS = [{
-        'url': 'https://m.twitter.com/thereaIbanksy/status/614301758345490432',
-        'md5': '8bbccb487bd7a31349b775915fcd412f',
+        'url': 'https://twitter.com/freethenipple/status/643211948184596480',
+        'md5': '31cd83a116fc41f99ae3d909d4caf6a0',
         'info_dict': {
-            'id': '614301758345490432',
+            'id': '643211948184596480',
             'ext': 'mp4',
-            'title': 'thereaIbanksy - This time lapse is so pretty \U0001f60d\U0001f60d',
+            'title': 'freethenipple - FTN supporters on Hollywood Blvd today!',
             'thumbnail': 're:^https?://.*\.jpg',
-            'duration': 29.5,
-            'description': 'banksy on Twitter: "This time lapse is so pretty \U0001f60d\U0001f60d http://t.co/QB8DDbqiR1"',
-            'uploader': 'banksy',
-            'uploader_id': 'thereaIbanksy',
+            'duration': 12.922,
+            'description': 'FREE THE NIPPLE on Twitter: "FTN supporters on Hollywood Blvd today! http://t.co/c7jHH749xJ"',
+            'uploader': 'FREE THE NIPPLE',
+            'uploader_id': 'freethenipple',
         },
     }]
 

From 01d22d47039dedace1c5414c83e9fecfca41b5a5 Mon Sep 17 00:00:00 2001
From: Yen Chi Hsuan <yan12125@gmail.com>
Date: Sun, 18 Oct 2015 17:11:55 +0800
Subject: [PATCH 57/83] [twitter] Use _download_xml

---
 youtube_dl/extractor/twitter.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/youtube_dl/extractor/twitter.py b/youtube_dl/extractor/twitter.py
index b2fff73b9..37a9fd5fd 100644
--- a/youtube_dl/extractor/twitter.py
+++ b/youtube_dl/extractor/twitter.py
@@ -8,6 +8,7 @@ from ..compat import compat_urllib_request
 from ..utils import (
     float_or_none,
     unescapeHTML,
+    xpath_text,
 )
 
 
@@ -60,9 +61,8 @@ class TwitterCardIE(InfoExtractor):
                 video_id)
             if 'playlist' not in config:
                 if 'vmapUrl' in config:
-                    webpage = self._download_webpage(config['vmapUrl'], video_id + ' (xml)')
-                    video_url = self._search_regex(
-                        r'<MediaFile>\s*<!\[CDATA\[(https?://.+?)\]\]>', webpage, 'data player config (xml)')
+                    vmap_data = self._download_xml(config['vmapUrl'], video_id)
+                    video_url = xpath_text(vmap_data, './/MediaFile').strip()
                     f = {
                         'url': video_url,
                     }

From 014e880372e896cdd63f9075864d2a3bba60e706 Mon Sep 17 00:00:00 2001
From: Yen Chi Hsuan <yan12125@gmail.com>
Date: Sun, 18 Oct 2015 17:13:58 +0800
Subject: [PATCH 58/83] [twitter] Add IE_NAMEs

---
 youtube_dl/extractor/twitter.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/youtube_dl/extractor/twitter.py b/youtube_dl/extractor/twitter.py
index 37a9fd5fd..5f697782e 100644
--- a/youtube_dl/extractor/twitter.py
+++ b/youtube_dl/extractor/twitter.py
@@ -13,6 +13,7 @@ from ..utils import (
 
 
 class TwitterCardIE(InfoExtractor):
+    IE_NAME = 'twitter:card'
     _VALID_URL = r'https?://(?:www\.)?twitter\.com/i/cards/tfw/v1/(?P<id>\d+)'
     _TESTS = [
         {
@@ -101,6 +102,7 @@ class TwitterCardIE(InfoExtractor):
 
 
 class TwitterIE(TwitterCardIE):
+    IE_NAME = 'twitter'
     _VALID_URL = r'https?://(?:www|m|mobile)?\.?twitter\.com/(?P<id>[^/]+/status/\d+)'
 
     _TESTS = [{

From f322bfb0638aeeb527459ebcf00f8a3dde26280c Mon Sep 17 00:00:00 2001
From: Yen Chi Hsuan <yan12125@gmail.com>
Date: Sun, 18 Oct 2015 17:15:47 +0800
Subject: [PATCH 59/83] [twitter:card] Remove unneeded 'ext'

---
 youtube_dl/extractor/twitter.py | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/youtube_dl/extractor/twitter.py b/youtube_dl/extractor/twitter.py
index 5f697782e..48bef5d80 100644
--- a/youtube_dl/extractor/twitter.py
+++ b/youtube_dl/extractor/twitter.py
@@ -64,13 +64,9 @@ class TwitterCardIE(InfoExtractor):
                 if 'vmapUrl' in config:
                     vmap_data = self._download_xml(config['vmapUrl'], video_id)
                     video_url = xpath_text(vmap_data, './/MediaFile').strip()
-                    f = {
+                    formats.append({
                         'url': video_url,
-                    }
-                    ext = re.search(r'\.([a-z0-9]{2,4})(\?.+)?$', video_url)
-                    if ext:
-                        f['ext'] = ext.group(1)
-                    formats.append(f)
+                    })
                     break   # same video regardless of UA
                 continue
 

From e04edad621efe56347e155b6dc59a0c3d589b3bd Mon Sep 17 00:00:00 2001
From: Yen Chi Hsuan <yan12125@gmail.com>
Date: Sun, 18 Oct 2015 17:16:57 +0800
Subject: [PATCH 60/83] [twitter] Inherit from InfoExtractor directly

---
 youtube_dl/extractor/twitter.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/youtube_dl/extractor/twitter.py b/youtube_dl/extractor/twitter.py
index 48bef5d80..c9b783745 100644
--- a/youtube_dl/extractor/twitter.py
+++ b/youtube_dl/extractor/twitter.py
@@ -97,11 +97,11 @@ class TwitterCardIE(InfoExtractor):
         }
 
 
-class TwitterIE(TwitterCardIE):
+class TwitterIE(InfoExtractor):
     IE_NAME = 'twitter'
     _VALID_URL = r'https?://(?:www|m|mobile)?\.?twitter\.com/(?P<id>[^/]+/status/\d+)'
 
-    _TESTS = [{
+    _TEST = {
         'url': 'https://twitter.com/freethenipple/status/643211948184596480',
         'md5': '31cd83a116fc41f99ae3d909d4caf6a0',
         'info_dict': {
@@ -114,7 +114,7 @@ class TwitterIE(TwitterCardIE):
             'uploader': 'FREE THE NIPPLE',
             'uploader_id': 'freethenipple',
         },
-    }]
+    }
 
     def _real_extract(self, url):
         id = self._match_id(url)

From f6dfd6603a9e9bb88ebcdcd52490974a34d1bd11 Mon Sep 17 00:00:00 2001
From: Yen Chi Hsuan <yan12125@gmail.com>
Date: Sun, 18 Oct 2015 17:18:01 +0800
Subject: [PATCH 61/83] [twitter] Use _html_search_regex

---
 youtube_dl/extractor/twitter.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/youtube_dl/extractor/twitter.py b/youtube_dl/extractor/twitter.py
index c9b783745..6ff15369c 100644
--- a/youtube_dl/extractor/twitter.py
+++ b/youtube_dl/extractor/twitter.py
@@ -122,7 +122,7 @@ class TwitterIE(InfoExtractor):
         name = username
         url = re.sub(r'https?://(m|mobile)\.', 'https://', url)
         webpage = self._download_webpage(url, 'tweet: ' + url)
-        description = unescapeHTML(self._search_regex('<title>\s*(.+?)\s*</title>', webpage, 'title'))
+        description = self._html_search_regex('<title>\s*(.+?)\s*</title>', webpage, 'title')
         title = description.replace('\n', ' ')
         splitdesc = re.match(r'^(.+?)\s*on Twitter:\s* "(.+?)"$', title)
         if splitdesc:

From 575036b40504bc921b18f05bde64e0e7dceacec6 Mon Sep 17 00:00:00 2001
From: Yen Chi Hsuan <yan12125@gmail.com>
Date: Sun, 18 Oct 2015 18:04:13 +0800
Subject: [PATCH 62/83] [twitter] Simplify and improve

---
 youtube_dl/extractor/twitter.py | 41 +++++++++++++++++++--------------
 1 file changed, 24 insertions(+), 17 deletions(-)

diff --git a/youtube_dl/extractor/twitter.py b/youtube_dl/extractor/twitter.py
index 6ff15369c..6b3b39aee 100644
--- a/youtube_dl/extractor/twitter.py
+++ b/youtube_dl/extractor/twitter.py
@@ -9,6 +9,7 @@ from ..utils import (
     float_or_none,
     unescapeHTML,
     xpath_text,
+    remove_end,
 )
 
 
@@ -99,7 +100,8 @@ class TwitterCardIE(InfoExtractor):
 
 class TwitterIE(InfoExtractor):
     IE_NAME = 'twitter'
-    _VALID_URL = r'https?://(?:www|m|mobile)?\.?twitter\.com/(?P<id>[^/]+/status/\d+)'
+    _VALID_URL = r'https?://(?:www\.|m\.|mobile\.)?twitter\.com/(?P<user_id>[^/]+)/status/(?P<id>\d+)'
+    _TEMPLATE_URL = 'https://twitter.com/%s/status/%s'
 
     _TEST = {
         'url': 'https://twitter.com/freethenipple/status/643211948184596480',
@@ -107,7 +109,7 @@ class TwitterIE(InfoExtractor):
         'info_dict': {
             'id': '643211948184596480',
             'ext': 'mp4',
-            'title': 'freethenipple - FTN supporters on Hollywood Blvd today!',
+            'title': 'FREE THE NIPPLE - FTN supporters on Hollywood Blvd today!',
             'thumbnail': 're:^https?://.*\.jpg',
             'duration': 12.922,
             'description': 'FREE THE NIPPLE on Twitter: "FTN supporters on Hollywood Blvd today! http://t.co/c7jHH749xJ"',
@@ -117,26 +119,31 @@ class TwitterIE(InfoExtractor):
     }
 
     def _real_extract(self, url):
-        id = self._match_id(url)
-        username, twid = re.match(r'([^/]+)/status/(\d+)', id).groups()
-        name = username
-        url = re.sub(r'https?://(m|mobile)\.', 'https://', url)
-        webpage = self._download_webpage(url, 'tweet: ' + url)
-        description = self._html_search_regex('<title>\s*(.+?)\s*</title>', webpage, 'title')
-        title = description.replace('\n', ' ')
-        splitdesc = re.match(r'^(.+?)\s*on Twitter:\s* "(.+?)"$', title)
-        if splitdesc:
-            name, title = splitdesc.groups()
-        title = re.sub(r'\s*https?://[^ ]+', '', title)  # strip  'https -_t.co_BJYgOjSeGA' junk from filenames
-        card_id = self._search_regex(r'["\']/i/cards/tfw/v1/(\d+)', webpage, '/i/card/...')
+        mobj = re.match(self._VALID_URL, url)
+        user_id = mobj.group('user_id')
+        twid = mobj.group('id')
+
+        webpage = self._download_webpage(self._TEMPLATE_URL % (user_id, twid), twid)
+
+        username = remove_end(self._og_search_title(webpage), ' on Twitter')
+
+        title = self._og_search_description(webpage).strip('').replace('\n', ' ')
+
+        # strip  'https -_t.co_BJYgOjSeGA' junk from filenames
+        mobj = re.match(r'“(.*)\s+(http://[^ ]+)”', title)
+        title, short_url = mobj.groups()
+
+        card_id = self._search_regex(
+            r'["\']/i/cards/tfw/v1/(\d+)', webpage, 'twitter card url')
         card_url = 'https://twitter.com/i/cards/tfw/v1/' + card_id
+
         return {
             '_type': 'url_transparent',
             'ie_key': 'TwitterCard',
-            'uploader_id': username,
-            'uploader': name,
+            'uploader_id': user_id,
+            'uploader': username,
             'url': card_url,
             'webpage_url': url,
-            'description': description,
+            'description': '%s on Twitter: "%s %s"' % (username, title, short_url),
             'title': username + ' - ' + title,
         }

From 77a54b6a658059a11de415d793588fdbfec14194 Mon Sep 17 00:00:00 2001
From: Yen Chi Hsuan <yan12125@gmail.com>
Date: Sun, 18 Oct 2015 18:08:24 +0800
Subject: [PATCH 63/83] [twitter:card] Use _html_search_regex

---
 youtube_dl/extractor/twitter.py | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/youtube_dl/extractor/twitter.py b/youtube_dl/extractor/twitter.py
index 6b3b39aee..1cdca544c 100644
--- a/youtube_dl/extractor/twitter.py
+++ b/youtube_dl/extractor/twitter.py
@@ -7,7 +7,6 @@ from .common import InfoExtractor
 from ..compat import compat_urllib_request
 from ..utils import (
     float_or_none,
-    unescapeHTML,
     xpath_text,
     remove_end,
 )
@@ -57,9 +56,8 @@ class TwitterCardIE(InfoExtractor):
             request.add_header('User-Agent', user_agent)
             webpage = self._download_webpage(request, video_id)
 
-            config = self._parse_json(
-                unescapeHTML(self._search_regex(
-                    r'data-player-config="([^"]+)"', webpage, 'data player config')),
+            config = self._parse_json(self._html_search_regex(
+                r'data-player-config="([^"]+)"', webpage, 'data player config'),
                 video_id)
             if 'playlist' not in config:
                 if 'vmapUrl' in config:

From c88aec845a680ef9404b637b3dbcf706dcf00b68 Mon Sep 17 00:00:00 2001
From: Yen Chi Hsuan <yan12125@gmail.com>
Date: Sun, 18 Oct 2015 18:23:56 +0800
Subject: [PATCH 64/83] [twitter] Fix short URL extraction

---
 youtube_dl/extractor/twitter.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/youtube_dl/extractor/twitter.py b/youtube_dl/extractor/twitter.py
index 1cdca544c..1472f22a7 100644
--- a/youtube_dl/extractor/twitter.py
+++ b/youtube_dl/extractor/twitter.py
@@ -128,7 +128,7 @@ class TwitterIE(InfoExtractor):
         title = self._og_search_description(webpage).strip('').replace('\n', ' ')
 
         # strip  'https -_t.co_BJYgOjSeGA' junk from filenames
-        mobj = re.match(r'“(.*)\s+(http://[^ ]+)”', title)
+        mobj = re.match(r'“(.*)\s+(https?://[^ ]+)”', title)
         title, short_url = mobj.groups()
 
         card_id = self._search_regex(

From 4a7b79038425f614af49116edab7897f0db13e5a Mon Sep 17 00:00:00 2001
From: Yen Chi Hsuan <yan12125@gmail.com>
Date: Sun, 18 Oct 2015 19:07:37 +0800
Subject: [PATCH 65/83] [twitter:card] Support YouTube embeds

---
 youtube_dl/extractor/twitter.py | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

diff --git a/youtube_dl/extractor/twitter.py b/youtube_dl/extractor/twitter.py
index 1472f22a7..9d3e46b94 100644
--- a/youtube_dl/extractor/twitter.py
+++ b/youtube_dl/extractor/twitter.py
@@ -37,6 +37,19 @@ class TwitterCardIE(InfoExtractor):
                 'thumbnail': 're:^https?://.*\.jpg',
                 'duration': 80.155,
             },
+        },
+        {
+            'url': 'https://twitter.com/i/cards/tfw/v1/654001591733886977',
+            'md5': 'b6f35e8b08a0bec6c8af77a2f4b3a814',
+            'info_dict': {
+                'id': 'dq4Oj5quskI',
+                'ext': 'mp4',
+                'title': 'Ubuntu 11.10 Overview',
+                'description': 'Take a quick peek at what\'s new and improved in Ubuntu 11.10.\n\nOnce installed take a look at 10 Things to Do After Installing: http://www.omgubuntu.co.uk/2011/10/10-things-to-do-after-installing-ubuntu-11-10/',
+                'upload_date': '20111013',
+                'uploader': 'OMG! Ubuntu!',
+                'uploader_id': 'omgubuntu',
+            },
         }
     ]
 
@@ -56,6 +69,12 @@ class TwitterCardIE(InfoExtractor):
             request.add_header('User-Agent', user_agent)
             webpage = self._download_webpage(request, video_id)
 
+            youtube_url = self._html_search_regex(
+                r'<iframe[^>]+src="((?:https?:)?//www.youtube.com/embed/[^"]+)"',
+                webpage, 'youtube iframe', default=None)
+            if youtube_url:
+                return self.url_result(youtube_url, 'Youtube')
+
             config = self._parse_json(self._html_search_regex(
                 r'data-player-config="([^"]+)"', webpage, 'data player config'),
                 video_id)

From 05a3879f1c142cc2bf0287cde4690d8ccadcdc8f Mon Sep 17 00:00:00 2001
From: Yen Chi Hsuan <yan12125@gmail.com>
Date: Sun, 18 Oct 2015 19:19:46 +0800
Subject: [PATCH 66/83] [letv] Update M3U8's MIME type

The new MIME type appears in the following places:
https://www.iana.org/assignments/media-types/media-types.xhtml#application
https://hg.python.org/cpython/file/tip/Lib/mimetypes.py
---
 youtube_dl/extractor/letv.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/youtube_dl/extractor/letv.py b/youtube_dl/extractor/letv.py
index 9ebbc8089..effd9eb92 100644
--- a/youtube_dl/extractor/letv.py
+++ b/youtube_dl/extractor/letv.py
@@ -162,7 +162,7 @@ class LetvIE(InfoExtractor):
                 m3u8_data = self.decrypt_m3u8(req.read())
 
                 url_info_dict = {
-                    'url': encode_data_uri(m3u8_data, 'application/x-mpegURL'),
+                    'url': encode_data_uri(m3u8_data, 'application/vnd.apple.mpegurl'),
                     'ext': determine_ext(dispatch[format_id][1]),
                     'format_id': format_id,
                     'protocol': 'm3u8',

From dd67702a3ea007369109ee8e4b67043064e1f759 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?=
 <jaime.marquinez.ferrandiz@gmail.com>
Date: Sun, 18 Oct 2015 14:13:06 +0200
Subject: [PATCH 67/83] [imdb] Fix extraction (fixes #7220)

---
 youtube_dl/extractor/imdb.py | 29 +++++++++++++++++++----------
 1 file changed, 19 insertions(+), 10 deletions(-)

diff --git a/youtube_dl/extractor/imdb.py b/youtube_dl/extractor/imdb.py
index 4bb574cf3..02e1e428e 100644
--- a/youtube_dl/extractor/imdb.py
+++ b/youtube_dl/extractor/imdb.py
@@ -4,8 +4,8 @@ import re
 import json
 
 from .common import InfoExtractor
-from ..compat import (
-    compat_urlparse,
+from ..utils import (
+    qualities,
 )
 
 
@@ -30,24 +30,33 @@ class ImdbIE(InfoExtractor):
         descr = self._html_search_regex(
             r'(?s)<span itemprop="description">(.*?)</span>',
             webpage, 'description', fatal=False)
-        available_formats = re.findall(
-            r'case \'(?P<f_id>.*?)\' :$\s+url = \'(?P<path>.*?)\'', webpage,
-            flags=re.MULTILINE)
+        player_url = 'http://www.imdb.com/video/imdb/vi%s/imdb/single' % video_id
+        player_page = self._download_webpage(
+            player_url, video_id, 'Downloading player page')
+        # the player page contains the info for the default format, we have to
+        # fetch other pages for the rest of the formats
+        extra_formats = re.findall(r'href="(?P<url>%s.*?)".*?>(?P<name>.*?)<' % re.escape(player_url), player_page)
+        format_pages = [
+            self._download_webpage(
+                f_url, video_id, 'Downloading info for %s format' % f_name)
+            for f_url, f_name in extra_formats]
+        format_pages.append(player_page)
+
+        quality = qualities(['SD', '480p', '720p'])
         formats = []
-        for f_id, f_path in available_formats:
-            f_path = f_path.strip()
-            format_page = self._download_webpage(
-                compat_urlparse.urljoin(url, f_path),
-                'Downloading info for %s format' % f_id)
+        for format_page in format_pages:
             json_data = self._search_regex(
                 r'<script[^>]+class="imdb-player-data"[^>]*?>(.*?)</script>',
                 format_page, 'json data', flags=re.DOTALL)
             info = json.loads(json_data)
             format_info = info['videoPlayerObject']['video']
+            f_id = format_info['ffname']
             formats.append({
                 'format_id': f_id,
                 'url': format_info['videoInfoList'][0]['videoUrl'],
+                'quality': quality(f_id),
             })
+        self._sort_formats(formats)
 
         return {
             'id': video_id,

From b0f001a6cbd220c8b10c0ce359f17072d6347a8f Mon Sep 17 00:00:00 2001
From: remitamine <remitamine@gmail.com>
Date: Mon, 21 Sep 2015 15:52:36 +0100
Subject: [PATCH 68/83] [canalc2] fix info extraction

---
 youtube_dl/extractor/canalc2.py | 30 ++++++++++++++++++------------
 1 file changed, 18 insertions(+), 12 deletions(-)

diff --git a/youtube_dl/extractor/canalc2.py b/youtube_dl/extractor/canalc2.py
index c4fefefe4..66a9ff093 100644
--- a/youtube_dl/extractor/canalc2.py
+++ b/youtube_dl/extractor/canalc2.py
@@ -8,34 +8,40 @@ from .common import InfoExtractor
 
 class Canalc2IE(InfoExtractor):
     IE_NAME = 'canalc2.tv'
-    _VALID_URL = r'http://.*?\.canalc2\.tv/video\.asp\?.*?idVideo=(?P<id>\d+)'
+    _VALID_URL = r'https?://(www\.)?canalc2\.tv/video/(?P<id>\d+)'
 
     _TEST = {
-        'url': 'http://www.canalc2.tv/video.asp?idVideo=12163&voir=oui',
+        'url': 'http://www.canalc2.tv/video/12163',
         'md5': '060158428b650f896c542dfbb3d6487f',
         'info_dict': {
             'id': '12163',
             'ext': 'mp4',
             'title': 'Terrasses du Numérique'
+        },
+        'params': {
+            'skip_download': True,  # Requires rtmpdump
         }
     }
 
     def _real_extract(self, url):
-        video_id = re.match(self._VALID_URL, url).group('id')
-        # We need to set the voir field for getting the file name
-        url = 'http://www.canalc2.tv/video.asp?idVideo=%s&voir=oui' % video_id
+        video_id = self._match_id(url)
         webpage = self._download_webpage(url, video_id)
-        file_name = self._search_regex(
-            r"so\.addVariable\('file','(.*?)'\);",
-            webpage, 'file name')
-        video_url = 'http://vod-flash.u-strasbg.fr:8080/' + file_name
+        video_url = self._search_regex(
+            r'jwplayer\("Player"\).setup\({[^}]*file: "([^"]+)"',
+            webpage, 'video_url')
+        formats = [{'url': video_url}]
+        if video_url.startswith('rtmp://'):
+            rtmp = re.search(r'^(?P<url>rtmp://[^/]+/(?P<app>.+))/(?P<play_path>mp4:.+)$', video_url)
+            formats[0].update({
+                'app': rtmp.group('app'),
+                'play_path': rtmp.group('play_path'),
+            })
 
         title = self._html_search_regex(
-            r'class="evenement8">(.*?)</a>', webpage, 'title')
+            r'(?s)class="[^"]*col_description[^"]*">.*?<h3>(.*?)</h3>', webpage, 'title')
 
         return {
             'id': video_id,
-            'ext': 'mp4',
-            'url': video_url,
+            'formats': formats,
             'title': title,
         }

From 6682049dee5e73b98e99e1359b959240d0920d6b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com>
Date: Sun, 18 Oct 2015 19:19:43 +0600
Subject: [PATCH 69/83] [canalc2] Improve rtmp extraction

---
 youtube_dl/extractor/canalc2.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/youtube_dl/extractor/canalc2.py b/youtube_dl/extractor/canalc2.py
index 66a9ff093..648af2e18 100644
--- a/youtube_dl/extractor/canalc2.py
+++ b/youtube_dl/extractor/canalc2.py
@@ -31,10 +31,12 @@ class Canalc2IE(InfoExtractor):
             webpage, 'video_url')
         formats = [{'url': video_url}]
         if video_url.startswith('rtmp://'):
-            rtmp = re.search(r'^(?P<url>rtmp://[^/]+/(?P<app>.+))/(?P<play_path>mp4:.+)$', video_url)
+            rtmp = re.search(r'^(?P<url>rtmp://[^/]+/(?P<app>.+/))(?P<play_path>mp4:.+)$', video_url)
             formats[0].update({
+                'url': rtmp.group('url'),
                 'app': rtmp.group('app'),
                 'play_path': rtmp.group('play_path'),
+                'page_url': url,
             })
 
         title = self._html_search_regex(

From ef6c868f23f2fe0d493831e0d4cba71c735bd160 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com>
Date: Sun, 18 Oct 2015 19:23:31 +0600
Subject: [PATCH 70/83] [canalc2] Improve some regexes

---
 youtube_dl/extractor/canalc2.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/youtube_dl/extractor/canalc2.py b/youtube_dl/extractor/canalc2.py
index 648af2e18..d9137e2ef 100644
--- a/youtube_dl/extractor/canalc2.py
+++ b/youtube_dl/extractor/canalc2.py
@@ -8,7 +8,7 @@ from .common import InfoExtractor
 
 class Canalc2IE(InfoExtractor):
     IE_NAME = 'canalc2.tv'
-    _VALID_URL = r'https?://(www\.)?canalc2\.tv/video/(?P<id>\d+)'
+    _VALID_URL = r'https?://(?:www\.)?canalc2\.tv/video/(?P<id>\d+)'
 
     _TEST = {
         'url': 'http://www.canalc2.tv/video/12163',
@@ -27,8 +27,8 @@ class Canalc2IE(InfoExtractor):
         video_id = self._match_id(url)
         webpage = self._download_webpage(url, video_id)
         video_url = self._search_regex(
-            r'jwplayer\("Player"\).setup\({[^}]*file: "([^"]+)"',
-            webpage, 'video_url')
+            r'jwplayer\((["\'])Player\1\)\.setup\({[^}]*file\s*:\s*(["\'])(?P<file>.+?)\2',
+            webpage, 'video_url', group='file')
         formats = [{'url': video_url}]
         if video_url.startswith('rtmp://'):
             rtmp = re.search(r'^(?P<url>rtmp://[^/]+/(?P<app>.+/))(?P<play_path>mp4:.+)$', video_url)

From 14bddf35fbe8253e283042630e24b134996b2575 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com>
Date: Sun, 18 Oct 2015 19:23:52 +0600
Subject: [PATCH 71/83] [canalc2] Add ext

---
 youtube_dl/extractor/canalc2.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/youtube_dl/extractor/canalc2.py b/youtube_dl/extractor/canalc2.py
index d9137e2ef..ba82bb2b7 100644
--- a/youtube_dl/extractor/canalc2.py
+++ b/youtube_dl/extractor/canalc2.py
@@ -34,6 +34,7 @@ class Canalc2IE(InfoExtractor):
             rtmp = re.search(r'^(?P<url>rtmp://[^/]+/(?P<app>.+/))(?P<play_path>mp4:.+)$', video_url)
             formats[0].update({
                 'url': rtmp.group('url'),
+                'ext': 'flv',
                 'app': rtmp.group('app'),
                 'play_path': rtmp.group('play_path'),
                 'page_url': url,

From b1bf063503893192637f95e929d1a9147de59a7e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com>
Date: Sun, 18 Oct 2015 19:27:05 +0600
Subject: [PATCH 72/83] [canalc2] Extract duration

---
 youtube_dl/extractor/canalc2.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/youtube_dl/extractor/canalc2.py b/youtube_dl/extractor/canalc2.py
index ba82bb2b7..e326b8fbd 100644
--- a/youtube_dl/extractor/canalc2.py
+++ b/youtube_dl/extractor/canalc2.py
@@ -4,6 +4,7 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
+from ..utils import parse_duration
 
 
 class Canalc2IE(InfoExtractor):
@@ -42,9 +43,13 @@ class Canalc2IE(InfoExtractor):
 
         title = self._html_search_regex(
             r'(?s)class="[^"]*col_description[^"]*">.*?<h3>(.*?)</h3>', webpage, 'title')
+        duration = parse_duration(self._search_regex(
+            r'id=["\']video_duree["\'][^>]*>([^<]+)',
+            webpage, 'duration', fatal=False))
 
         return {
             'id': video_id,
-            'formats': formats,
             'title': title,
+            'duration': duration,
+            'formats': formats,
         }

From 608945d44a7e47fa5115295839c993af545936eb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com>
Date: Sun, 18 Oct 2015 19:27:22 +0600
Subject: [PATCH 73/83] [canalc2] Fix test

---
 youtube_dl/extractor/canalc2.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/youtube_dl/extractor/canalc2.py b/youtube_dl/extractor/canalc2.py
index e326b8fbd..f6a1ff381 100644
--- a/youtube_dl/extractor/canalc2.py
+++ b/youtube_dl/extractor/canalc2.py
@@ -16,8 +16,9 @@ class Canalc2IE(InfoExtractor):
         'md5': '060158428b650f896c542dfbb3d6487f',
         'info_dict': {
             'id': '12163',
-            'ext': 'mp4',
-            'title': 'Terrasses du Numérique'
+            'ext': 'flv',
+            'title': 'Terrasses du Numérique',
+            'duration': 122,
         },
         'params': {
             'skip_download': True,  # Requires rtmpdump

From dedd35c6bc33eb88f19b16eeb37498cee076c47a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com>
Date: Sun, 18 Oct 2015 19:59:18 +0600
Subject: [PATCH 74/83] [viewster] Fix failing m3u8

---
 youtube_dl/extractor/viewster.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/youtube_dl/extractor/viewster.py b/youtube_dl/extractor/viewster.py
index 632e57fb4..7cf930d69 100644
--- a/youtube_dl/extractor/viewster.py
+++ b/youtube_dl/extractor/viewster.py
@@ -131,10 +131,11 @@ class ViewsterIE(InfoExtractor):
                 formats.extend(self._extract_f4m_formats(
                     video_url, video_id, f4m_id='hds'))
             elif ext == 'm3u8':
-                formats.extend(self._extract_m3u8_formats(
+                m3u8_formats = self._extract_m3u8_formats(
                     video_url, video_id, 'mp4', m3u8_id='hls',
-                    fatal=False  # m3u8 sometimes fail
-                ))
+                    fatal=False)  # m3u8 sometimes fail
+                if m3u8_formats:
+                    formats.extend(m3u8_formats)
             else:
                 format_id = media.get('Bitrate')
                 f = {

From e36963e0eb57294f156a98c38df891dec41ebaa4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com>
Date: Sun, 18 Oct 2015 20:24:33 +0600
Subject: [PATCH 75/83] [eagleplatform] Identify hls formats

---
 youtube_dl/extractor/eagleplatform.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/youtube_dl/extractor/eagleplatform.py b/youtube_dl/extractor/eagleplatform.py
index e529b9b96..7bbf617d4 100644
--- a/youtube_dl/extractor/eagleplatform.py
+++ b/youtube_dl/extractor/eagleplatform.py
@@ -87,7 +87,7 @@ class EaglePlatformIE(InfoExtractor):
         m3u8_url = self._get_video_url(secure_m3u8, video_id, 'Downloading m3u8 JSON')
         formats = self._extract_m3u8_formats(
             m3u8_url, video_id,
-            'mp4', entry_protocol='m3u8_native')
+            'mp4', entry_protocol='m3u8_native', m3u8_id='hls')
 
         mp4_url = self._get_video_url(
             # Secure mp4 URL is constructed according to Player.prototype.mp4 from

From a6e0afa2bbc93d145b31911b8ce40c502994e2a1 Mon Sep 17 00:00:00 2001
From: Philipp Hagemeister <phihag@phihag.de>
Date: Sun, 18 Oct 2015 19:23:40 +0200
Subject: [PATCH 76/83] release 2015.10.18

---
 docs/supportedsites.md | 3 ++-
 youtube_dl/version.py  | 2 +-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/docs/supportedsites.md b/docs/supportedsites.md
index 47f7da86d..cfa665d88 100644
--- a/docs/supportedsites.md
+++ b/docs/supportedsites.md
@@ -588,7 +588,8 @@
  - **twitch:stream**
  - **twitch:video**
  - **twitch:vod**
- - **TwitterCard**
+ - **twitter**
+ - **twitter:card**
  - **Ubu**
  - **udemy**
  - **udemy:course**
diff --git a/youtube_dl/version.py b/youtube_dl/version.py
index 31d2a9dc0..660b0050b 100644
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@@ -1,3 +1,3 @@
 from __future__ import unicode_literals
 
-__version__ = '2015.10.16'
+__version__ = '2015.10.18'

From 264b23e1a42378d52f8774a07c1d906cd1cff96c Mon Sep 17 00:00:00 2001
From: kennell <kevin@fileperms.org>
Date: Sun, 18 Oct 2015 19:56:22 +0200
Subject: [PATCH 77/83] adds thumbnail support for ZDF Mediathek extractor

---
 youtube_dl/extractor/zdf.py | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

diff --git a/youtube_dl/extractor/zdf.py b/youtube_dl/extractor/zdf.py
index 98f15177b..f376025e1 100644
--- a/youtube_dl/extractor/zdf.py
+++ b/youtube_dl/extractor/zdf.py
@@ -70,6 +70,23 @@ def extract_from_xml_url(ie, video_id, xml_url):
             '_available': is_available,
         }
 
+    def xml_to_thumbnails(fnode):
+        thumbnails = list()
+        for node in fnode:
+            width_x_height = node.attrib['key']
+            thumbnail = {
+                'url': node.text,
+                'width': int(width_x_height.split('x')[0]),
+                'height': int(width_x_height.split('x')[1])
+            }
+            thumbnails.append(thumbnail)
+        return thumbnails
+
+
+    thumbnail_nodes = doc.findall('.//teaserimages/teaserimage')
+    thumbnails = xml_to_thumbnails(thumbnail_nodes)
+    thumbnail = thumbnails[-1]['url']
+
     format_nodes = doc.findall('.//formitaeten/formitaet')
     formats = list(filter(
         lambda f: f['_available'],
@@ -81,6 +98,8 @@ def extract_from_xml_url(ie, video_id, xml_url):
         'title': title,
         'description': description,
         'duration': duration,
+        'thumbnail': thumbnail,
+        'thumbnails': thumbnails,
         'uploader': uploader,
         'uploader_id': uploader_id,
         'upload_date': upload_date,

From d762f86e940ad656e8f7e7b93636292e4cf36de5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com>
Date: Mon, 19 Oct 2015 00:11:16 +0600
Subject: [PATCH 78/83] [ok] Extend _VALID_URL

---
 youtube_dl/extractor/odnoklassniki.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/youtube_dl/extractor/odnoklassniki.py b/youtube_dl/extractor/odnoklassniki.py
index ccc88cfb1..184c7a323 100644
--- a/youtube_dl/extractor/odnoklassniki.py
+++ b/youtube_dl/extractor/odnoklassniki.py
@@ -13,7 +13,7 @@ from ..utils import (
 
 
 class OdnoklassnikiIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?(?:odnoklassniki|ok)\.ru/(?:video|web-api/video/moviePlayer)/(?P<id>[\d-]+)'
+    _VALID_URL = r'https?://(?:www\.)?(?:odnoklassniki|ok)\.ru/(?:video(?:embed)?|web-api/video/moviePlayer)/(?P<id>[\d-]+)'
     _TESTS = [{
         # metadata in JSON
         'url': 'http://ok.ru/video/20079905452',
@@ -66,6 +66,9 @@ class OdnoklassnikiIE(InfoExtractor):
     }, {
         'url': 'http://www.ok.ru/video/20648036891',
         'only_matching': True,
+    }, {
+        'url': 'http://www.ok.ru/videoembed/20648036891',
+        'only_matching': True,
     }]
 
     def _real_extract(self, url):

From 8cc83d301dd0e8029aff804e362860d36e3d7e7a Mon Sep 17 00:00:00 2001
From: kennell <kevin@fileperms.org>
Date: Sun, 18 Oct 2015 20:47:42 +0200
Subject: [PATCH 79/83] use int_or_none, check if attrib exists, remove
 thumbnail

---
 youtube_dl/extractor/zdf.py | 13 +++++--------
 1 file changed, 5 insertions(+), 8 deletions(-)

diff --git a/youtube_dl/extractor/zdf.py b/youtube_dl/extractor/zdf.py
index f376025e1..d41c4e712 100644
--- a/youtube_dl/extractor/zdf.py
+++ b/youtube_dl/extractor/zdf.py
@@ -73,19 +73,17 @@ def extract_from_xml_url(ie, video_id, xml_url):
     def xml_to_thumbnails(fnode):
         thumbnails = list()
         for node in fnode:
-            width_x_height = node.attrib['key']
-            thumbnail = {
-                'url': node.text,
-                'width': int(width_x_height.split('x')[0]),
-                'height': int(width_x_height.split('x')[1])
-            }
+            thumbnail = {'url': node.text}
+            if 'key' in node.attrib:
+                width_x_height = node.attrib['key']
+                thumbnail['width'] = int_or_none(width_x_height.split('x')[0])
+                thumbnail['height'] = int_or_none(width_x_height.split('x')[1])
             thumbnails.append(thumbnail)
         return thumbnails
 
 
     thumbnail_nodes = doc.findall('.//teaserimages/teaserimage')
     thumbnails = xml_to_thumbnails(thumbnail_nodes)
-    thumbnail = thumbnails[-1]['url']
 
     format_nodes = doc.findall('.//formitaeten/formitaet')
     formats = list(filter(
@@ -98,7 +96,6 @@ def extract_from_xml_url(ie, video_id, xml_url):
         'title': title,
         'description': description,
         'duration': duration,
-        'thumbnail': thumbnail,
         'thumbnails': thumbnails,
         'uploader': uploader,
         'uploader_id': uploader_id,

From b243340f0ce311443a15a2dfd4356a9504e18c04 Mon Sep 17 00:00:00 2001
From: kennell <kevin@fileperms.org>
Date: Sun, 18 Oct 2015 21:07:52 +0200
Subject: [PATCH 80/83] check if key attrib matches resolution pattern

---
 youtube_dl/extractor/zdf.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/youtube_dl/extractor/zdf.py b/youtube_dl/extractor/zdf.py
index d41c4e712..ed385450c 100644
--- a/youtube_dl/extractor/zdf.py
+++ b/youtube_dl/extractor/zdf.py
@@ -75,9 +75,9 @@ def extract_from_xml_url(ie, video_id, xml_url):
         for node in fnode:
             thumbnail = {'url': node.text}
             if 'key' in node.attrib:
-                width_x_height = node.attrib['key']
-                thumbnail['width'] = int_or_none(width_x_height.split('x')[0])
-                thumbnail['height'] = int_or_none(width_x_height.split('x')[1])
+                if re.match("^[0-9]+x[0-9]+$", node.attrib['key']):
+                    thumbnail['width'] = int_or_none(node.attrib['key'].split('x')[0])
+                    thumbnail['height'] = int_or_none(node.attrib['key'].split('x')[1])
             thumbnails.append(thumbnail)
         return thumbnails
 

From 2038ad6ee71c842420b83cb6c5ce3c6898e8e380 Mon Sep 17 00:00:00 2001
From: "Sergey M." <dstftw@gmail.com>
Date: Mon, 19 Oct 2015 01:12:41 +0600
Subject: [PATCH 81/83] [README.md] Add uploader extraction sample in example
 extractor

---
 README.md | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index cf4aebf3d..a6ec9619c 100644
--- a/README.md
+++ b/README.md
@@ -710,12 +710,13 @@ If you want to add support for a new site, you can follow this quick list (assum
             webpage = self._download_webpage(url, video_id)
 
             # TODO more code goes here, for example ...
-            title = self._html_search_regex(r'<h1>(.*?)</h1>', webpage, 'title')
+            title = self._html_search_regex(r'<h1>(.+?)</h1>', webpage, 'title')
 
             return {
                 'id': video_id,
                 'title': title,
                 'description': self._og_search_description(webpage),
+                'uploader': self._search_regex(r'<div[^>]+id="uploader"[^>]*>([^<]+)<', webpage, 'uploader', fatal=False),
                 # TODO more properties (see youtube_dl/extractor/common.py)
             }
     ```

From b7cedb16043c60d4032b206a83539acbd39f994f Mon Sep 17 00:00:00 2001
From: kennell <kevin@fileperms.org>
Date: Sun, 18 Oct 2015 21:25:26 +0200
Subject: [PATCH 82/83] simplify thumbnail dict building

---
 youtube_dl/extractor/zdf.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/youtube_dl/extractor/zdf.py b/youtube_dl/extractor/zdf.py
index ed385450c..c2b196504 100644
--- a/youtube_dl/extractor/zdf.py
+++ b/youtube_dl/extractor/zdf.py
@@ -75,9 +75,10 @@ def extract_from_xml_url(ie, video_id, xml_url):
         for node in fnode:
             thumbnail = {'url': node.text}
             if 'key' in node.attrib:
-                if re.match("^[0-9]+x[0-9]+$", node.attrib['key']):
-                    thumbnail['width'] = int_or_none(node.attrib['key'].split('x')[0])
-                    thumbnail['height'] = int_or_none(node.attrib['key'].split('x')[1])
+                m = re.match('^([0-9]+)x([0-9]+)$', node.attrib['key'])
+                if m:
+                    thumbnail['width'] = int(m.group(1))
+                    thumbnail['height'] = int(m.group(2))
             thumbnails.append(thumbnail)
         return thumbnails
 

From 7b091c370c0f187545df8b1b1cc990fcf95df108 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com>
Date: Mon, 19 Oct 2015 01:48:05 +0600
Subject: [PATCH 83/83] [zdf] Modernize and PEP 8

---
 youtube_dl/extractor/zdf.py | 43 +++++++++++++++++++------------------
 1 file changed, 22 insertions(+), 21 deletions(-)

diff --git a/youtube_dl/extractor/zdf.py b/youtube_dl/extractor/zdf.py
index c2b196504..a795f56b3 100644
--- a/youtube_dl/extractor/zdf.py
+++ b/youtube_dl/extractor/zdf.py
@@ -9,6 +9,7 @@ from ..utils import (
     int_or_none,
     unified_strdate,
     OnDemandPagedList,
+    xpath_text,
 )
 
 
@@ -19,13 +20,11 @@ def extract_from_xml_url(ie, video_id, xml_url):
         errnote='Failed to download video info')
 
     title = doc.find('.//information/title').text
-    description = doc.find('.//information/detail').text
-    duration = int(doc.find('.//details/lengthSec').text)
-    uploader_node = doc.find('.//details/originChannelTitle')
-    uploader = None if uploader_node is None else uploader_node.text
-    uploader_id_node = doc.find('.//details/originChannelId')
-    uploader_id = None if uploader_id_node is None else uploader_id_node.text
-    upload_date = unified_strdate(doc.find('.//details/airtime').text)
+    description = xpath_text(doc, './/information/detail', 'description')
+    duration = int_or_none(xpath_text(doc, './/details/lengthSec', 'duration'))
+    uploader = xpath_text(doc, './/details/originChannelTitle', 'uploader')
+    uploader_id = xpath_text(doc, './/details/originChannelId', 'uploader id')
+    upload_date = unified_strdate(xpath_text(doc, './/details/airtime', 'upload date'))
 
     def xml_to_format(fnode):
         video_url = fnode.find('url').text
@@ -40,15 +39,14 @@ def extract_from_xml_url(ie, video_id, xml_url):
         ext = format_m.group('container')
         proto = format_m.group('proto').lower()
 
-        quality = fnode.find('./quality').text
-        abr = int(fnode.find('./audioBitrate').text) // 1000
-        vbr_node = fnode.find('./videoBitrate')
-        vbr = None if vbr_node is None else int(vbr_node.text) // 1000
+        quality = xpath_text(fnode, './quality', 'quality')
+        abr = int_or_none(xpath_text(fnode, './audioBitrate', 'abr'), 1000)
+        vbr = int_or_none(xpath_text(fnode, './videoBitrate', 'vbr'), 1000)
 
-        width_node = fnode.find('./width')
-        width = None if width_node is None else int_or_none(width_node.text)
-        height_node = fnode.find('./height')
-        height = None if height_node is None else int_or_none(height_node.text)
+        width = int_or_none(xpath_text(fnode, './width', 'width'))
+        height = int_or_none(xpath_text(fnode, './height', 'height'))
+
+        filesize = int_or_none(xpath_text(fnode, './filesize', 'filesize'))
 
         format_note = ''
         if not format_note:
@@ -64,16 +62,21 @@ def extract_from_xml_url(ie, video_id, xml_url):
             'vbr': vbr,
             'width': width,
             'height': height,
-            'filesize': int_or_none(fnode.find('./filesize').text),
+            'filesize': filesize,
             'format_note': format_note,
             'protocol': proto,
             '_available': is_available,
         }
 
     def xml_to_thumbnails(fnode):
-        thumbnails = list()
+        thumbnails = []
         for node in fnode:
-            thumbnail = {'url': node.text}
+            thumbnail_url = node.text
+            if not thumbnail_url:
+                continue
+            thumbnail = {
+                'url': thumbnail_url,
+            }
             if 'key' in node.attrib:
                 m = re.match('^([0-9]+)x([0-9]+)$', node.attrib['key'])
                 if m:
@@ -82,9 +85,7 @@ def extract_from_xml_url(ie, video_id, xml_url):
             thumbnails.append(thumbnail)
         return thumbnails
 
-
-    thumbnail_nodes = doc.findall('.//teaserimages/teaserimage')
-    thumbnails = xml_to_thumbnails(thumbnail_nodes)
+    thumbnails = xml_to_thumbnails(doc.findall('.//teaserimages/teaserimage'))
 
     format_nodes = doc.findall('.//formitaeten/formitaet')
     formats = list(filter(