Merge branch 'master' of https://github.com/rg3/youtube-dl

2016-07-11 11:08:12 +02:00 · 2016-07-11 11:08:12 +02:00 · e55d4db42b
commit e55d4db42b
parent d3730fb89b 2a49d01600
30 changed files with 860 additions and 194 deletions
--- a/.github/ISSUE_TEMPLATE.md
+++ b/.github/ISSUE_TEMPLATE.md
@ -6,8 +6,8 @@
 ---
-### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.07.07*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
+### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.07.11*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.07.07**
+- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.07.11**
 ### Before submitting an *issue* make sure you have:
 - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
@ -35,7 +35,7 @@ $ youtube-dl -v <your command line>
 [debug] User config: []
 [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
 [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
-[debug] youtube-dl version 2016.07.07
+[debug] youtube-dl version 2016.07.11
 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
 [debug] Proxy map: {}
--- a/1
+++ b/1
@ -177,3 +177,4 @@ Roman Tsiupa
 Artur Krysiak
 Jakub Adam Wieczorek
 Aleksandar Topuzović
 Nehal Patel
--- a/docs/supportedsites.md
+++ b/docs/supportedsites.md
@ -224,6 +224,7 @@
 - **Firstpost**
 - **FiveTV**
 - **Flickr**
 - **Flipagram**
 - **Folketinget**: Folketinget (ft.dk; Danish parliament)
 - **FootyRoom**
 - **Formula1**
@ -553,6 +554,7 @@
 - **RICE**
 - **RingTV**
 - **RockstarGames**
 - **RoosterTeeth**
 - **RottenTomatoes**
 - **Roxwel**
 - **RTBF**
--- a/test/test_utils.py
+++ b/test/test_utils.py
@ -81,6 +81,7 @@ from youtube_dl.utils import (
    cli_option,
    cli_valueless_option,
    cli_bool_option,
    parse_codecs,
 )
 from youtube_dl.compat import (
    compat_chr,
@ -608,6 +609,29 @@ class TestUtil(unittest.TestCase):
            limit_length('foo bar baz asd', 12).startswith('foo bar'))
        self.assertTrue('...' in limit_length('foo bar baz asd', 12))
    def test_parse_codecs(self):
        self.assertEqual(parse_codecs(''), {})
        self.assertEqual(parse_codecs('avc1.77.30, mp4a.40.2'), {
            'vcodec': 'avc1.77.30',
            'acodec': 'mp4a.40.2',
        })
        self.assertEqual(parse_codecs('mp4a.40.2'), {
            'vcodec': 'none',
            'acodec': 'mp4a.40.2',
        })
        self.assertEqual(parse_codecs('mp4a.40.5,avc1.42001e'), {
            'vcodec': 'avc1.42001e',
            'acodec': 'mp4a.40.5',
        })
        self.assertEqual(parse_codecs('avc3.640028'), {
            'vcodec': 'avc3.640028',
            'acodec': 'none',
        })
        self.assertEqual(parse_codecs(', h264,,newcodec,aac'), {
            'vcodec': 'h264',
            'acodec': 'aac',
        })
    def test_escape_rfc3986(self):
        reserved = "!*'();:@&=+$,/?#[]"
        unreserved = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_.~'
--- a/youtube_dl/extractor/animeondemand.py
+++ b/youtube_dl/extractor/animeondemand.py
@ -22,6 +22,7 @@ class AnimeOnDemandIE(InfoExtractor):
    _APPLY_HTML5_URL = 'https://www.anime-on-demand.de/html5apply'
    _NETRC_MACHINE = 'animeondemand'
    _TESTS = [{
        # jap, OmU
        'url': 'https://www.anime-on-demand.de/anime/161',
        'info_dict': {
            'id': '161',
@ -30,17 +31,21 @@ class AnimeOnDemandIE(InfoExtractor):
        },
        'playlist_mincount': 4,
    }, {
-        # Film wording is used instead of Episode
+        # Film wording is used instead of Episode, ger/jap, Dub/OmU
        'url': 'https://www.anime-on-demand.de/anime/39',
        'only_matching': True,
    }, {
-        # Episodes without titles
+        # Episodes without titles, jap, OmU
        'url': 'https://www.anime-on-demand.de/anime/162',
        'only_matching': True,
    }, {
        # ger/jap, Dub/OmU, account required
        'url': 'https://www.anime-on-demand.de/anime/169',
        'only_matching': True,
    }, {
        # Full length film, non-series, ger/jap, Dub/OmU, account required
        'url': 'https://www.anime-on-demand.de/anime/185',
        'only_matching': True,
    }]
    def _login(self):
@ -110,35 +115,12 @@ class AnimeOnDemandIE(InfoExtractor):
        entries = []
-        for num, episode_html in enumerate(re.findall(
+        def extract_info(html, video_id, num=None):
-                r'(?s)<h3[^>]+class="episodebox-title".+?>Episodeninhalt<', webpage), 1):
+            title, description = [None] * 2
            episodebox_title = self._search_regex(
                (r'class="episodebox-title"[^>]+title=(["\'])(?P<title>.+?)\1',
                 r'class="episodebox-title"[^>]+>(?P<title>.+?)<'),
                episode_html, 'episodebox title', default=None, group='title')
            if not episodebox_title:
                continue
            episode_number = int(self._search_regex(
                r'(?:Episode|Film)\s*(\d+)',
                episodebox_title, 'episode number', default=num))
            episode_title = self._search_regex(
                r'(?:Episode|Film)\s*\d+\s*-\s*(.+)',
                episodebox_title, 'episode title', default=None)
            video_id = 'episode-%d' % episode_number
            common_info = {
                'id': video_id,
                'series': anime_title,
                'episode': episode_title,
                'episode_number': episode_number,
            }
            formats = []
            for input_ in re.findall(
-                    r'<input[^>]+class=["\'].*?streamstarter_html5[^>]+>', episode_html):
+                    r'<input[^>]+class=["\'].*?streamstarter_html5[^>]+>', html):
                attributes = extract_attributes(input_)
                playlist_urls = []
                for playlist_key in ('data-playlist', 'data-otherplaylist'):
@ -161,7 +143,7 @@ class AnimeOnDemandIE(InfoExtractor):
                        format_id_list.append(lang)
                    if kind:
                        format_id_list.append(kind)
-                    if not format_id_list:
+                    if not format_id_list and num is not None:
                        format_id_list.append(compat_str(num))
                    format_id = '-'.join(format_id_list)
                    format_note = ', '.join(filter(None, (kind, lang_note)))
@ -215,28 +197,74 @@ class AnimeOnDemandIE(InfoExtractor):
                            })
                        formats.extend(file_formats)
-            if formats:
+            return {
-                self._sort_formats(formats)
+                'title': title,
                'description': description,
                'formats': formats,
            }
        def extract_entries(html, video_id, common_info, num=None):
            info = extract_info(html, video_id, num)
            if info['formats']:
                self._sort_formats(info['formats'])
                f = common_info.copy()
-                f.update({
+                f.update(info)
                    'title': title,
                    'description': description,
                    'formats': formats,
                })
                entries.append(f)
-            # Extract teaser only when full episode is not available
+            # Extract teaser/trailer only when full episode is not available
-            if not formats:
+            if not info['formats']:
                m = re.search(
-                    r'data-dialog-header=(["\'])(?P<title>.+?)\1[^>]+href=(["\'])(?P<href>.+?)\3[^>]*>Teaser<',
+                    r'data-dialog-header=(["\'])(?P<title>.+?)\1[^>]+href=(["\'])(?P<href>.+?)\3[^>]*>(?P<kind>Teaser|Trailer)<',
-                    episode_html)
+                    html)
                if m:
                    f = common_info.copy()
                    f.update({
-                        'id': '%s-teaser' % f['id'],
+                        'id': '%s-%s' % (f['id'], m.group('kind').lower()),
                        'title': m.group('title'),
                        'url': compat_urlparse.urljoin(url, m.group('href')),
                    })
                    entries.append(f)
        def extract_episodes(html):
            for num, episode_html in enumerate(re.findall(
                    r'(?s)<h3[^>]+class="episodebox-title".+?>Episodeninhalt<', html), 1):
                episodebox_title = self._search_regex(
                    (r'class="episodebox-title"[^>]+title=(["\'])(?P<title>.+?)\1',
                     r'class="episodebox-title"[^>]+>(?P<title>.+?)<'),
                    episode_html, 'episodebox title', default=None, group='title')
                if not episodebox_title:
                    continue
                episode_number = int(self._search_regex(
                    r'(?:Episode|Film)\s*(\d+)',
                    episodebox_title, 'episode number', default=num))
                episode_title = self._search_regex(
                    r'(?:Episode|Film)\s*\d+\s*-\s*(.+)',
                    episodebox_title, 'episode title', default=None)
                video_id = 'episode-%d' % episode_number
                common_info = {
                    'id': video_id,
                    'series': anime_title,
                    'episode': episode_title,
                    'episode_number': episode_number,
                }
                extract_entries(episode_html, video_id, common_info)
        def extract_film(html, video_id):
            common_info = {
                'id': anime_id,
                'title': anime_title,
                'description': anime_description,
            }
            extract_entries(html, video_id, common_info)
        extract_episodes(webpage)
        if not entries:
            extract_film(webpage, anime_id)
        return self.playlist_result(entries, anime_id, anime_title, anime_description)
--- a/youtube_dl/extractor/ard.py
+++ b/youtube_dl/extractor/ard.py
@ -13,6 +13,7 @@ from ..utils import (
    parse_duration,
    unified_strdate,
    xpath_text,
    update_url_query,
 )
 from ..compat import compat_etree_fromstring
@ -34,6 +35,7 @@ class ARDMediathekIE(InfoExtractor):
            # m3u8 download
            'skip_download': True,
        },
        'skip': 'HTTP Error 404: Not Found',
    }, {
        'url': 'http://www.ardmediathek.de/tv/Tatort/Tatort-Scheinwelten-H%C3%B6rfassung-Video/Das-Erste/Video?documentId=29522730&bcastId=602916',
        'md5': 'f4d98b10759ac06c0072bbcd1f0b9e3e',
@ -44,6 +46,7 @@ class ARDMediathekIE(InfoExtractor):
            'description': 'md5:196392e79876d0ac94c94e8cdb2875f1',
            'duration': 5252,
        },
        'skip': 'HTTP Error 404: Not Found',
    }, {
        # audio
        'url': 'http://www.ardmediathek.de/tv/WDR-H%C3%B6rspiel-Speicher/Tod-eines-Fu%C3%9Fballers/WDR-3/Audio-Podcast?documentId=28488308&bcastId=23074086',
@ -55,6 +58,7 @@ class ARDMediathekIE(InfoExtractor):
            'description': 'md5:f6e39f3461f0e1f54bfa48c8875c86ef',
            'duration': 3240,
        },
        'skip': 'HTTP Error 404: Not Found',
    }, {
        'url': 'http://mediathek.daserste.de/sendungen_a-z/328454_anne-will/22429276_vertrauen-ist-gut-spionieren-ist-besser-geht',
        'only_matching': True,
@ -113,11 +117,14 @@ class ARDMediathekIE(InfoExtractor):
                        continue
                    if ext == 'f4m':
                        formats.extend(self._extract_f4m_formats(
-                            stream_url + '?hdcore=3.1.1&plugin=aasp-3.1.1.69.124',
+                            update_url_query(stream_url, {
-                            video_id, preference=-1, f4m_id='hds', fatal=False))
+                                'hdcore': '3.1.1',
                                'plugin': 'aasp-3.1.1.69.124'
                            }),
                            video_id, f4m_id='hds', fatal=False))
                    elif ext == 'm3u8':
                        formats.extend(self._extract_m3u8_formats(
-                            stream_url, video_id, 'mp4', preference=1, m3u8_id='hls', fatal=False))
+                            stream_url, video_id, 'mp4', m3u8_id='hls', fatal=False))
                    else:
                        if server and server.startswith('rtmp'):
                            f = {
@ -231,7 +238,8 @@ class ARDIE(InfoExtractor):
            'title': 'Die Story im Ersten: Mission unter falscher Flagge',
            'upload_date': '20140804',
            'thumbnail': 're:^https?://.*\.jpg$',
-        }
+        },
        'skip': 'HTTP Error 404: Not Found',
    }
    def _real_extract(self, url):
--- a/youtube_dl/extractor/biobiochiletv.py
+++ b/youtube_dl/extractor/biobiochiletv.py
@ -2,11 +2,15 @@
 from __future__ import unicode_literals
 from .common import InfoExtractor
-from ..utils import remove_end
+from ..utils import (
    ExtractorError,
    remove_end,
 )
 from .rudo import RudoIE
 class BioBioChileTVIE(InfoExtractor):
-    _VALID_URL = r'https?://tv\.biobiochile\.cl/notas/(?:[^/]+/)+(?P<id>[^/]+)\.shtml'
+    _VALID_URL = r'https?://(?:tv|www)\.biobiochile\.cl/(?:notas|noticias)/(?:[^/]+/)+(?P<id>[^/]+)\.shtml'
    _TESTS = [{
        'url': 'http://tv.biobiochile.cl/notas/2015/10/21/sobre-camaras-y-camarillas-parlamentarias.shtml',
@ -18,6 +22,7 @@ class BioBioChileTVIE(InfoExtractor):
            'thumbnail': 're:^https?://.*\.jpg$',
            'uploader': 'Fernando Atria',
        },
        'skip': 'URL expired and redirected to http://www.biobiochile.cl/portada/bbtv/index.html',
    }, {
        # different uploader layout
        'url': 'http://tv.biobiochile.cl/notas/2016/03/18/natalia-valdebenito-repasa-a-diputado-hasbun-paso-a-la-categoria-de-hablar-brutalidades.shtml',
@ -32,6 +37,16 @@ class BioBioChileTVIE(InfoExtractor):
        'params': {
            'skip_download': True,
        },
        'skip': 'URL expired and redirected to http://www.biobiochile.cl/portada/bbtv/index.html',
    }, {
        'url': 'http://www.biobiochile.cl/noticias/bbtv/comentarios-bio-bio/2016/07/08/edecanes-del-congreso-figuras-decorativas-que-le-cuestan-muy-caro-a-los-chilenos.shtml',
        'info_dict': {
            'id': 'edecanes-del-congreso-figuras-decorativas-que-le-cuestan-muy-caro-a-los-chilenos',
            'ext': 'mp4',
            'uploader': '(none)',
            'upload_date': '20160708',
            'title': 'Edecanes del Congreso: Figuras decorativas que le cuestan muy caro a los chilenos',
        },
    }, {
        'url': 'http://tv.biobiochile.cl/notas/2015/10/22/ninos-transexuales-de-quien-es-la-decision.shtml',
        'only_matching': True,
@ -45,42 +60,22 @@ class BioBioChileTVIE(InfoExtractor):
        webpage = self._download_webpage(url, video_id)
        rudo_url = RudoIE._extract_url(webpage)
        if not rudo_url:
            raise ExtractorError('No videos found')
        title = remove_end(self._og_search_title(webpage), ' - BioBioChile TV')
        file_url = self._search_regex(
            r'loadFWPlayerVideo\([^,]+,\s*(["\'])(?P<url>.+?)\1',
            webpage, 'file url', group='url')
        base_url = self._search_regex(
            r'file\s*:\s*(["\'])(?P<url>.+?)\1\s*\+\s*fileURL', webpage,
            'base url', default='http://unlimited2-cl.digitalproserver.com/bbtv/',
            group='url')
        formats = self._extract_m3u8_formats(
            '%s%s/playlist.m3u8' % (base_url, file_url), video_id, 'mp4',
            entry_protocol='m3u8_native', m3u8_id='hls', fatal=False)
        f = {
            'url': '%s%s' % (base_url, file_url),
            'format_id': 'http',
            'protocol': 'http',
            'preference': 1,
        }
        if formats:
            f_copy = formats[-1].copy()
            f_copy.update(f)
            f = f_copy
        formats.append(f)
        self._sort_formats(formats)
        thumbnail = self._og_search_thumbnail(webpage)
        uploader = self._html_search_regex(
-            r'<a[^>]+href=["\']https?://busca\.biobiochile\.cl/author[^>]+>(.+?)</a>',
+            r'<a[^>]+href=["\']https?://(?:busca|www)\.biobiochile\.cl/(?:lista/)?(?:author|autor)[^>]+>(.+?)</a>',
            webpage, 'uploader', fatal=False)
        return {
            '_type': 'url_transparent',
            'url': rudo_url,
            'id': video_id,
            'title': title,
            'thumbnail': thumbnail,
            'uploader': uploader,
            'formats': formats,
        }
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@ -44,6 +44,7 @@ from ..utils import (
    sanitized_Request,
    unescapeHTML,
    unified_strdate,
    unified_timestamp,
    url_basename,
    xpath_element,
    xpath_text,
@ -54,6 +55,8 @@ from ..utils import (
    update_Request,
    update_url_query,
    parse_m3u8_attributes,
    extract_attributes,
    parse_codecs,
 )
@ -161,6 +164,7 @@ class InfoExtractor(object):
                        * "height" (optional, int)
                        * "resolution" (optional, string "{width}x{height"},
                                        deprecated)
                        * "filesize" (optional, int)
    thumbnail:      Full URL to a video thumbnail image.
    description:    Full video description.
    uploader:       Full name of the video uploader.
@ -803,15 +807,17 @@ class InfoExtractor(object):
        return self._html_search_meta('twitter:player', html,
                                      'twitter card player')
-    def _search_json_ld(self, html, video_id, **kwargs):
+    def _search_json_ld(self, html, video_id, expected_type=None, **kwargs):
        json_ld = self._search_regex(
            r'(?s)<script[^>]+type=(["\'])application/ld\+json\1[^>]*>(?P<json_ld>.+?)</script>',
            html, 'JSON-LD', group='json_ld', **kwargs)
        if not json_ld:
            return {}
-        return self._json_ld(json_ld, video_id, fatal=kwargs.get('fatal', True))
+        return self._json_ld(
            json_ld, video_id, fatal=kwargs.get('fatal', True),
            expected_type=expected_type)
-    def _json_ld(self, json_ld, video_id, fatal=True):
+    def _json_ld(self, json_ld, video_id, fatal=True, expected_type=None):
        if isinstance(json_ld, compat_str):
            json_ld = self._parse_json(json_ld, video_id, fatal=fatal)
        if not json_ld:
@ -819,6 +825,8 @@ class InfoExtractor(object):
        info = {}
        if json_ld.get('@context') == 'http://schema.org':
            item_type = json_ld.get('@type')
            if expected_type is not None and expected_type != item_type:
                return info
            if item_type == 'TVEpisode':
                info.update({
                    'episode': unescapeHTML(json_ld.get('name')),
@ -837,6 +845,19 @@ class InfoExtractor(object):
                    'title': unescapeHTML(json_ld.get('headline')),
                    'description': unescapeHTML(json_ld.get('articleBody')),
                })
            elif item_type == 'VideoObject':
                info.update({
                    'url': json_ld.get('contentUrl'),
                    'title': unescapeHTML(json_ld.get('name')),
                    'description': unescapeHTML(json_ld.get('description')),
                    'thumbnail': json_ld.get('thumbnailUrl'),
                    'duration': parse_duration(json_ld.get('duration')),
                    'timestamp': unified_timestamp(json_ld.get('uploadDate')),
                    'filesize': float_or_none(json_ld.get('contentSize')),
                    'tbr': int_or_none(json_ld.get('bitrate')),
                    'width': int_or_none(json_ld.get('width')),
                    'height': int_or_none(json_ld.get('height')),
                })
        return dict((k, v) for k, v in info.items() if v is not None)
    @staticmethod
@ -1616,6 +1637,62 @@ class InfoExtractor(object):
                        self.report_warning('Unknown MIME type %s in DASH manifest' % mime_type)
        return formats
    def _parse_html5_media_entries(self, base_url, webpage):
        def absolute_url(video_url):
            return compat_urlparse.urljoin(base_url, video_url)
        def parse_content_type(content_type):
            if not content_type:
                return {}
            ctr = re.search(r'(?P<mimetype>[^/]+/[^;]+)(?:;\s*codecs="?(?P<codecs>[^"]+))?', content_type)
            if ctr:
                mimetype, codecs = ctr.groups()
                f = parse_codecs(codecs)
                f['ext'] = mimetype2ext(mimetype)
                return f
            return {}
        entries = []
        for media_tag, media_type, media_content in re.findall(r'(?s)(<(?P<tag>video|audio)[^>]*>)(.*?)</(?P=tag)>', webpage):
            media_info = {
                'formats': [],
                'subtitles': {},
            }
            media_attributes = extract_attributes(media_tag)
            src = media_attributes.get('src')
            if src:
                media_info['formats'].append({
                    'url': absolute_url(src),
                    'vcodec': 'none' if media_type == 'audio' else None,
                })
            media_info['thumbnail'] = media_attributes.get('poster')
            if media_content:
                for source_tag in re.findall(r'<source[^>]+>', media_content):
                    source_attributes = extract_attributes(source_tag)
                    src = source_attributes.get('src')
                    if not src:
                        continue
                    f = parse_content_type(source_attributes.get('type'))
                    f.update({
                        'url': absolute_url(src),
                        'vcodec': 'none' if media_type == 'audio' else None,
                    })
                    media_info['formats'].append(f)
                for track_tag in re.findall(r'<track[^>]+>', media_content):
                    track_attributes = extract_attributes(track_tag)
                    kind = track_attributes.get('kind')
                    if not kind or kind == 'subtitles':
                        src = track_attributes.get('src')
                        if not src:
                            continue
                        lang = track_attributes.get('srclang') or track_attributes.get('lang') or track_attributes.get('label')
                        media_info['subtitles'].setdefault(lang, []).append({
                            'url': absolute_url(src),
                        })
            if media_info['formats']:
                entries.append(media_info)
        return entries
    def _live_title(self, name):
        """ Generate the title for a live video """
        now = datetime.datetime.now()
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@ -256,6 +256,7 @@ from .fivemin import FiveMinIE
 from .fivetv import FiveTVIE
 from .fktv import FKTVIE
 from .flickr import FlickrIE
 from .flipagram import FlipagramIE
 from .folketinget import FolketingetIE
 from .footyroom import FootyRoomIE
 from .formula1 import Formula1IE
@ -679,6 +680,7 @@ from .rice import RICEIE
 from .ringtv import RingTVIE
 from .ro220 import Ro220IE
 from .rockstargames import RockstarGamesIE
 from .roosterteeth import RoosterTeethIE
 from .rottentomatoes import RottenTomatoesIE
 from .roxwel import RoxwelIE
 from .rtbf import RTBFIE
@ -689,6 +691,7 @@ from .rtp import RTPIE
 from .rts import RTSIE
 from .rtve import RTVEALaCartaIE, RTVELiveIE, RTVEInfantilIE
 from .rtvnh import RTVNHIE
 from .rudo import RudoIE
 from .ruhd import RUHDIE
 from .ruleporn import RulePornIE
 from .rutube import (
--- a/youtube_dl/extractor/facebook.py
+++ b/youtube_dl/extractor/facebook.py
@ -219,12 +219,25 @@ class FacebookIE(InfoExtractor):
        BEFORE = '{swf.addParam(param[0], param[1]);});'
        AFTER = '.forEach(function(variable) {swf.addVariable(variable[0], variable[1]);});'
-        m = re.search(re.escape(BEFORE) + '(?:\n|\\\\n)(.*?)' + re.escape(AFTER), webpage)
+        PATTERN = re.escape(BEFORE) + '(?:\n|\\\\n)(.*?)' + re.escape(AFTER)
-        if m:
+
-            swf_params = m.group(1).replace('\\\\', '\\').replace('\\"', '"')
+        for m in re.findall(PATTERN, webpage):
            swf_params = m.replace('\\\\', '\\').replace('\\"', '"')
            data = dict(json.loads(swf_params))
            params_raw = compat_urllib_parse_unquote(data['params'])
-            video_data = json.loads(params_raw)['video_data']
+            video_data_candidate = json.loads(params_raw)['video_data']
            for _, f in video_data_candidate.items():
                if not f:
                    continue
                if isinstance(f, dict):
                    f = [f]
                if not isinstance(f, list):
                    continue
                if f[0].get('video_id') == video_id:
                    video_data = video_data_candidate
                    break
            if video_data:
                break
        def video_data_list2dict(video_data):
            ret = {}
--- a/youtube_dl/extractor/flipagram.py
+++ b/youtube_dl/extractor/flipagram.py
@ -0,0 +1,115 @@
 # coding: utf-8
 from __future__ import unicode_literals
 from .common import InfoExtractor
 from ..compat import compat_str
 from ..utils import (
    int_or_none,
    float_or_none,
    try_get,
    unified_timestamp,
 )
 class FlipagramIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?flipagram\.com/f/(?P<id>[^/?#&]+)'
    _TEST = {
        'url': 'https://flipagram.com/f/nyvTSJMKId',
        'md5': '888dcf08b7ea671381f00fab74692755',
        'info_dict': {
            'id': 'nyvTSJMKId',
            'ext': 'mp4',
            'title': 'Flipagram by sjuria101 featuring Midnight Memories by One Direction',
            'description': 'md5:d55e32edc55261cae96a41fa85ff630e',
            'duration': 35.571,
            'timestamp': 1461244995,
            'upload_date': '20160421',
            'uploader': 'kitty juria',
            'uploader_id': 'sjuria101',
            'creator': 'kitty juria',
            'view_count': int,
            'like_count': int,
            'repost_count': int,
            'comment_count': int,
            'comments': list,
            'formats': 'mincount:2',
        },
    }
    def _real_extract(self, url):
        video_id = self._match_id(url)
        webpage = self._download_webpage(url, video_id)
        video_data = self._parse_json(
            self._search_regex(
                r'window\.reactH2O\s*=\s*({.+});', webpage, 'video data'),
            video_id)
        flipagram = video_data['flipagram']
        video = flipagram['video']
        json_ld = self._search_json_ld(webpage, video_id, default=False)
        title = json_ld.get('title') or flipagram['captionText']
        description = json_ld.get('description') or flipagram.get('captionText')
        formats = [{
            'url': video['url'],
            'width': int_or_none(video.get('width')),
            'height': int_or_none(video.get('height')),
            'filesize': int_or_none(video_data.get('size')),
        }]
        preview_url = try_get(
            flipagram, lambda x: x['music']['track']['previewUrl'], compat_str)
        if preview_url:
            formats.append({
                'url': preview_url,
                'ext': 'm4a',
                'vcodec': 'none',
            })
        self._sort_formats(formats)
        counts = flipagram.get('counts', {})
        user = flipagram.get('user', {})
        video_data = flipagram.get('video', {})
        thumbnails = [{
            'url': self._proto_relative_url(cover['url']),
            'width': int_or_none(cover.get('width')),
            'height': int_or_none(cover.get('height')),
            'filesize': int_or_none(cover.get('size')),
        } for cover in flipagram.get('covers', []) if cover.get('url')]
        # Note that this only retrieves comments that are initally loaded.
        # For videos with large amounts of comments, most won't be retrieved.
        comments = []
        for comment in video_data.get('comments', {}).get(video_id, {}).get('items', []):
            text = comment.get('comment')
            if not text or not isinstance(text, list):
                continue
            comments.append({
                'author': comment.get('user', {}).get('name'),
                'author_id': comment.get('user', {}).get('username'),
                'id': comment.get('id'),
                'text': text[0],
                'timestamp': unified_timestamp(comment.get('created')),
            })
        return {
            'id': video_id,
            'title': title,
            'description': description,
            'duration': float_or_none(flipagram.get('duration'), 1000),
            'thumbnails': thumbnails,
            'timestamp': unified_timestamp(flipagram.get('iso8601Created')),
            'uploader': user.get('name'),
            'uploader_id': user.get('username'),
            'creator': user.get('name'),
            'view_count': int_or_none(counts.get('plays')),
            'like_count': int_or_none(counts.get('likes')),
            'repost_count': int_or_none(counts.get('reflips')),
            'comment_count': int_or_none(counts.get('comments')),
            'comments': comments,
            'formats': formats,
        }
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@ -1313,6 +1313,38 @@ class GenericIE(InfoExtractor):
            },
            'add_ie': ['Kaltura'],
        },
        {
            # Non-standard Vimeo embed
            'url': 'https://openclassrooms.com/courses/understanding-the-web',
            'md5': '64d86f1c7d369afd9a78b38cbb88d80a',
            'info_dict': {
                'id': '148867247',
                'ext': 'mp4',
                'title': 'Understanding the web - Teaser',
                'description': 'This is "Understanding the web - Teaser" by openclassrooms on Vimeo, the home for high quality videos and the people who love them.',
                'upload_date': '20151214',
                'uploader': 'OpenClassrooms',
                'uploader_id': 'openclassrooms',
            },
            'add_ie': ['Vimeo'],
        },
        # {
        #     # TODO: find another test
        #     # http://schema.org/VideoObject
        #     'url': 'https://flipagram.com/f/nyvTSJMKId',
        #     'md5': '888dcf08b7ea671381f00fab74692755',
        #     'info_dict': {
        #         'id': 'nyvTSJMKId',
        #         'ext': 'mp4',
        #         'title': 'Flipagram by sjuria101 featuring Midnight Memories by One Direction',
        #         'description': '#love for cats.',
        #         'timestamp': 1461244995,
        #         'upload_date': '20160421',
        #     },
        #     'params': {
        #         'force_generic_extractor': True,
        #     },
        # }
    ]
    def report_following_redirect(self, new_url):
@ -2157,6 +2189,19 @@ class GenericIE(InfoExtractor):
        if embed_url:
            return self.url_result(embed_url)
        # Looking for http://schema.org/VideoObject
        json_ld = self._search_json_ld(
            webpage, video_id, default=None, expected_type='VideoObject')
        if json_ld and json_ld.get('url'):
            info_dict.update({
                'title': video_title or info_dict['title'],
                'description': video_description,
                'thumbnail': video_thumbnail,
                'age_limit': age_limit
            })
            info_dict.update(json_ld)
            return info_dict
        def check_video(vurl):
            if YoutubeIE.suitable(vurl):
                return True
--- a/youtube_dl/extractor/leeco.py
+++ b/youtube_dl/extractor/leeco.py
@ -23,6 +23,7 @@ from ..utils import (
    str_or_none,
    url_basename,
    urshift,
    update_url_query,
 )
@ -89,6 +90,10 @@ class LeIE(InfoExtractor):
        _loc3_ = self.ror(_loc3_, _loc2_ % 17)
        return _loc3_
    # reversed from http://jstatic.letvcdn.com/sdk/player.js
    def get_mms_key(self, time):
        return self.ror(time, 8) ^ 185025305
    # see M3U8Encryption class in KLetvPlayer.swf
    @staticmethod
    def decrypt_m3u8(encrypted_data):
@ -109,23 +114,7 @@ class LeIE(InfoExtractor):
        return bytes(_loc7_)
-    def _real_extract(self, url):
+    def _check_errors(self, play_json):
        media_id = self._match_id(url)
        page = self._download_webpage(url, media_id)
        params = {
            'id': media_id,
            'platid': 1,
            'splatid': 101,
            'format': 1,
            'tkey': self.calc_time_key(int(time.time())),
            'domain': 'www.le.com'
        }
        play_json = self._download_json(
            'http://api.le.com/mms/out/video/playJson',
            media_id, 'Downloading playJson data', query=params,
            headers=self.geo_verification_headers())
        # Check for errors
        playstatus = play_json['playstatus']
        if playstatus['status'] == 0:
@ -136,43 +125,99 @@ class LeIE(InfoExtractor):
                msg = 'Generic error. flag = %d' % flag
            raise ExtractorError(msg, expected=True)
-        playurl = play_json['playurl']
+    def _real_extract(self, url):
        media_id = self._match_id(url)
        page = self._download_webpage(url, media_id)
-        formats = ['350', '1000', '1300', '720p', '1080p']
+        play_json_h5 = self._download_json(
-        dispatch = playurl['dispatch']
+            'http://api.le.com/mms/out/video/playJsonH5',
            media_id, 'Downloading html5 playJson data', query={
                'id': media_id,
                'platid': 3,
                'splatid': 304,
                'format': 1,
                'tkey': self.get_mms_key(int(time.time())),
                'domain': 'www.le.com',
                'tss': 'no',
            },
            headers=self.geo_verification_headers())
        self._check_errors(play_json_h5)
-        urls = []
+        play_json_flash = self._download_json(
-        for format_id in formats:
+            'http://api.le.com/mms/out/video/playJson',
-            if format_id in dispatch:
+            media_id, 'Downloading flash playJson data', query={
-                media_url = playurl['domain'][0] + dispatch[format_id][0]
+                'id': media_id,
-                media_url += '&' + compat_urllib_parse_urlencode({
+                'platid': 1,
-                    'm3v': 1,
+                'splatid': 101,
                'format': 1,
                'tkey': self.calc_time_key(int(time.time())),
                'domain': 'www.le.com',
            },
            headers=self.geo_verification_headers())
        self._check_errors(play_json_flash)
        def get_h5_urls(media_url, format_id):
            location = self._download_json(
                media_url, media_id,
                'Download JSON metadata for format %s' % format_id, query={
                    'format': 1,
                    'expect': 3,
-                    'rateid': format_id,
+                    'tss': 'no',
-                })
+                })['location']
-                nodes_data = self._download_json(
+            return {
-                    media_url, media_id,
+                'http': update_url_query(location, {'tss': 'no'}),
-                    'Download JSON metadata for format %s' % format_id)
+                'hls': update_url_query(location, {'tss': 'ios'}),
            }
-                req = self._request_webpage(
+        def get_flash_urls(media_url, format_id):
-                    nodes_data['nodelist'][0]['location'], media_id,
+            media_url += '&' + compat_urllib_parse_urlencode({
-                    note='Downloading m3u8 information for format %s' % format_id)
+                'm3v': 1,
                'format': 1,
                'expect': 3,
                'rateid': format_id,
            })
-                m3u8_data = self.decrypt_m3u8(req.read())
+            nodes_data = self._download_json(
                media_url, media_id,
                'Download JSON metadata for format %s' % format_id)
-                url_info_dict = {
+            req = self._request_webpage(
-                    'url': encode_data_uri(m3u8_data, 'application/vnd.apple.mpegurl'),
+                nodes_data['nodelist'][0]['location'], media_id,
-                    'ext': determine_ext(dispatch[format_id][1]),
+                note='Downloading m3u8 information for format %s' % format_id)
                    'format_id': format_id,
                    'protocol': 'm3u8',
                }
-                if format_id[-1:] == 'p':
+            m3u8_data = self.decrypt_m3u8(req.read())
                    url_info_dict['height'] = int_or_none(format_id[:-1])
-                urls.append(url_info_dict)
+            return {
                'hls': encode_data_uri(m3u8_data, 'application/vnd.apple.mpegurl'),
            }
        extracted_formats = []
        formats = []
        for play_json, get_urls in ((play_json_h5, get_h5_urls), (play_json_flash, get_flash_urls)):
            playurl = play_json['playurl']
            play_domain = playurl['domain'][0]
            for format_id, format_data in playurl.get('dispatch', []).items():
                if format_id in extracted_formats:
                    continue
                extracted_formats.append(format_id)
                media_url = play_domain + format_data[0]
                for protocol, format_url in get_urls(media_url, format_id).items():
                    f = {
                        'url': format_url,
                        'ext': determine_ext(format_data[1]),
                        'format_id': '%s-%s' % (protocol, format_id),
                        'protocol': 'm3u8_native' if protocol == 'hls' else 'http',
                        'quality': int_or_none(format_id),
                    }
                    if format_id[-1:] == 'p':
                        f['height'] = int_or_none(format_id[:-1])
                    formats.append(f)
        self._sort_formats(formats, ('height', 'quality', 'format_id'))
        publish_time = parse_iso8601(self._html_search_regex(
            r'发布时间&nbsp;([^<>]+) ', page, 'publish time', default=None),
@ -181,7 +226,7 @@ class LeIE(InfoExtractor):
        return {
            'id': media_id,
-            'formats': urls,
+            'formats': formats,
            'title': playurl['title'],
            'thumbnail': playurl['pic'],
            'description': description,
--- a/youtube_dl/extractor/lynda.py
+++ b/youtube_dl/extractor/lynda.py
@ -100,7 +100,7 @@ class LyndaIE(LyndaBaseIE):
    _TESTS = [{
        'url': 'http://www.lynda.com/Bootstrap-tutorials/Using-exercise-files/110885/114408-4.html',
-        'md5': '679734f6786145da3546585de9a356be',
+        # md5 is unstable
        'info_dict': {
            'id': '114408',
            'ext': 'mp4',
--- a/youtube_dl/extractor/mgtv.py
+++ b/youtube_dl/extractor/mgtv.py
@ -26,7 +26,8 @@ class MGTVIE(InfoExtractor):
        video_id = self._match_id(url)
        api_data = self._download_json(
            'http://v.api.mgtv.com/player/video', video_id,
-            query={'video_id': video_id})['data']
+            query={'video_id': video_id},
            headers=self.geo_verification_headers())['data']
        info = api_data['info']
        formats = []
--- a/youtube_dl/extractor/miomio.py
+++ b/youtube_dl/extractor/miomio.py
@ -4,6 +4,7 @@ from __future__ import unicode_literals
 import random
 from .common import InfoExtractor
 from ..compat import compat_urlparse
 from ..utils import (
    xpath_text,
    int_or_none,
@ -18,13 +19,16 @@ class MioMioIE(InfoExtractor):
    _TESTS = [{
        # "type=video" in flashvars
        'url': 'http://www.miomio.tv/watch/cc88912/',
        'md5': '317a5f7f6b544ce8419b784ca8edae65',
        'info_dict': {
            'id': '88912',
            'ext': 'flv',
            'title': '【SKY】字幕 铠武昭和VS平成 假面骑士大战FEAT战队 魔星字幕组 字幕',
            'duration': 5923,
        },
        'params': {
            # The server provides broken file
            'skip_download': True,
        }
    }, {
        'url': 'http://www.miomio.tv/watch/cc184024/',
        'info_dict': {
@ -32,7 +36,7 @@ class MioMioIE(InfoExtractor):
            'title': '《动漫同人插画绘制》',
        },
        'playlist_mincount': 86,
-        'skip': 'This video takes time too long for retrieving the URL',
+        'skip': 'Unable to load videos',
    }, {
        'url': 'http://www.miomio.tv/watch/cc173113/',
        'info_dict': {
@ -40,20 +44,23 @@ class MioMioIE(InfoExtractor):
            'title': 'The New Macbook 2015 上手试玩与简评'
        },
        'playlist_mincount': 2,
        'skip': 'Unable to load videos',
    }, {
        # new 'h5' player
        'url': 'http://www.miomio.tv/watch/cc273295/',
        'md5': '',
        'info_dict': {
            'id': '273295',
            'ext': 'mp4',
            'title': 'アウト×デラックス 20160526',
        },
        'params': {
            # intermittent HTTP 500
            'skip_download': True,
        },
    }]
-    def _real_extract(self, url):
+    def _extract_mioplayer(self, webpage, video_id, title, http_headers):
        video_id = self._match_id(url)
        webpage = self._download_webpage(url, video_id)
        title = self._html_search_meta(
            'description', webpage, 'title', fatal=True)
        mioplayer_path = self._search_regex(
            r'src="(/mioplayer/[^"]+)"', webpage, 'ref_path')
        http_headers = {'Referer': 'http://www.miomio.tv%s' % mioplayer_path}
        xml_config = self._search_regex(
            r'flashvars="type=(?:sina|video)&amp;(.+?)&amp;',
            webpage, 'xml config')
@ -92,10 +99,34 @@ class MioMioIE(InfoExtractor):
                'http_headers': http_headers,
            })
        return entries
    def _real_extract(self, url):
        video_id = self._match_id(url)
        webpage = self._download_webpage(url, video_id)
        title = self._html_search_meta(
            'description', webpage, 'title', fatal=True)
        mioplayer_path = self._search_regex(
            r'src="(/mioplayer(?:_h5)?/[^"]+)"', webpage, 'ref_path')
        if '_h5' in mioplayer_path:
            player_url = compat_urlparse.urljoin(url, mioplayer_path)
            player_webpage = self._download_webpage(
                player_url, video_id,
                note='Downloading player webpage', headers={'Referer': url})
            entries = self._parse_html5_media_entries(player_url, player_webpage)
            http_headers = {'Referer': player_url}
        else:
            http_headers = {'Referer': 'http://www.miomio.tv%s' % mioplayer_path}
            entries = self._extract_mioplayer(webpage, video_id, title, http_headers)
        if len(entries) == 1:
            segment = entries[0]
            segment['id'] = video_id
            segment['title'] = title
            segment['http_headers'] = http_headers
            return segment
        return {
--- a/youtube_dl/extractor/nick.py
+++ b/youtube_dl/extractor/nick.py
@ -8,7 +8,7 @@ from ..utils import update_url_query
 class NickIE(MTVServicesInfoExtractor):
    IE_NAME = 'nick.com'
-    _VALID_URL = r'https?://(?:www\.)?nick\.com/videos/clip/(?P<id>[^/?#.]+)'
+    _VALID_URL = r'https?://(?:www\.)?nick(?:jr)?\.com/(?:videos/clip|[^/]+/videos)/(?P<id>[^/?#.]+)'
    _FEED_URL = 'http://udat.mtvnservices.com/service1/dispatch.htm'
    _TESTS = [{
        'url': 'http://www.nick.com/videos/clip/alvinnn-and-the-chipmunks-112-full-episode.html',
@ -52,6 +52,9 @@ class NickIE(MTVServicesInfoExtractor):
                }
            },
        ],
    }, {
        'url': 'http://www.nickjr.com/paw-patrol/videos/pups-save-a-goldrush-s3-ep302-full-episode/',
        'only_matching': True,
    }]
    def _get_feed_query(self, uri):
--- a/youtube_dl/extractor/playvid.py
+++ b/youtube_dl/extractor/playvid.py
@ -15,7 +15,7 @@ from ..utils import (
 class PlayvidIE(InfoExtractor):
    _VALID_URL = r'https?://www\.playvid\.com/watch(\?v=|/)(?P<id>.+?)(?:#|$)'
-    _TEST = {
+    _TESTS = [{
        'url': 'http://www.playvid.com/watch/RnmBNgtrrJu',
        'md5': 'ffa2f6b2119af359f544388d8c01eb6c',
        'info_dict': {
@ -24,8 +24,19 @@ class PlayvidIE(InfoExtractor):
            'title': 'md5:9256d01c6317e3f703848b5906880dc8',
            'duration': 82,
            'age_limit': 18,
-        }
+        },
-    }
+        'skip': 'Video removed due to ToS',
    }, {
        'url': 'http://www.playvid.com/watch/hwb0GpNkzgH',
        'md5': '39d49df503ad7b8f23a4432cbf046477',
        'info_dict': {
            'id': 'hwb0GpNkzgH',
            'ext': 'mp4',
            'title': 'Ellen Euro Cutie Blond Takes a Sexy Survey Get Facial in The Park',
            'age_limit': 18,
            'thumbnail': 're:^https?://.*\.jpg$',
        },
    }]
    def _real_extract(self, url):
        video_id = self._match_id(url)
--- a/youtube_dl/extractor/polskieradio.py
+++ b/youtube_dl/extractor/polskieradio.py
@ -33,6 +33,7 @@ class PolskieRadioIE(InfoExtractor):
                'timestamp': 1456594200,
                'upload_date': '20160227',
                'duration': 2364,
                'thumbnail': 're:^https?://static\.prsa\.pl/images/.*\.jpg$'
            },
        }],
    }, {
@ -68,6 +69,8 @@ class PolskieRadioIE(InfoExtractor):
            r'(?s)<span[^>]+id="datetime2"[^>]*>(.+?)</span>',
            webpage, 'timestamp', fatal=False))
        thumbnail_url = self._og_search_thumbnail(webpage)
        entries = []
        media_urls = set()
@ -87,6 +90,7 @@ class PolskieRadioIE(InfoExtractor):
                'duration': int_or_none(media.get('length')),
                'vcodec': 'none' if media.get('provider') == 'audio' else None,
                'timestamp': timestamp,
                'thumbnail': thumbnail_url
            })
        title = self._og_search_title(webpage).strip()
--- a/youtube_dl/extractor/roosterteeth.py
+++ b/youtube_dl/extractor/roosterteeth.py
@ -0,0 +1,148 @@
 # coding: utf-8
 from __future__ import unicode_literals
 import re
 from .common import InfoExtractor
 from ..utils import (
    ExtractorError,
    int_or_none,
    strip_or_none,
    unescapeHTML,
    urlencode_postdata,
 )
 class RoosterTeethIE(InfoExtractor):
    _VALID_URL = r'https?://(?:.+?\.)?roosterteeth\.com/episode/(?P<id>[^/?#&]+)'
    _LOGIN_URL = 'https://roosterteeth.com/login'
    _NETRC_MACHINE = 'roosterteeth'
    _TESTS = [{
        'url': 'http://roosterteeth.com/episode/million-dollars-but-season-2-million-dollars-but-the-game-announcement',
        'md5': 'e2bd7764732d785ef797700a2489f212',
        'info_dict': {
            'id': '26576',
            'display_id': 'million-dollars-but-season-2-million-dollars-but-the-game-announcement',
            'ext': 'mp4',
            'title': 'Million Dollars, But...: Million Dollars, But... The Game Announcement',
            'description': 'md5:0cc3b21986d54ed815f5faeccd9a9ca5',
            'thumbnail': 're:^https?://.*\.png$',
            'series': 'Million Dollars, But...',
            'episode': 'Million Dollars, But... The Game Announcement',
            'comment_count': int,
        },
    }, {
        'url': 'http://achievementhunter.roosterteeth.com/episode/off-topic-the-achievement-hunter-podcast-2016-i-didn-t-think-it-would-pass-31',
        'only_matching': True,
    }, {
        'url': 'http://funhaus.roosterteeth.com/episode/funhaus-shorts-2016-austin-sucks-funhaus-shorts',
        'only_matching': True,
    }, {
        'url': 'http://screwattack.roosterteeth.com/episode/death-battle-season-3-mewtwo-vs-shadow',
        'only_matching': True,
    }, {
        'url': 'http://theknow.roosterteeth.com/episode/the-know-game-news-season-1-boring-steam-sales-are-better',
        'only_matching': True,
    }, {
        # only available for FIRST members
        'url': 'http://roosterteeth.com/episode/rt-docs-the-world-s-greatest-head-massage-the-world-s-greatest-head-massage-an-asmr-journey-part-one',
        'only_matching': True,
    }]
    def _login(self):
        (username, password) = self._get_login_info()
        if username is None:
            return
        login_page = self._download_webpage(
            self._LOGIN_URL, None,
            note='Downloading login page',
            errnote='Unable to download login page')
        login_form = self._hidden_inputs(login_page)
        login_form.update({
            'username': username,
            'password': password,
        })
        login_request = self._download_webpage(
            self._LOGIN_URL, None,
            note='Logging in as %s' % username,
            data=urlencode_postdata(login_form),
            headers={
                'Referer': self._LOGIN_URL,
            })
        if not any(re.search(p, login_request) for p in (
                r'href=["\']https?://(?:www\.)?roosterteeth\.com/logout"',
                r'>Sign Out<')):
            error = self._html_search_regex(
                r'(?s)<div[^>]+class=(["\']).*?\balert-danger\b.*?\1[^>]*>(?:\s*<button[^>]*>.*?</button>)?(?P<error>.+?)</div>',
                login_request, 'alert', default=None, group='error')
            if error:
                raise ExtractorError('Unable to login: %s' % error, expected=True)
            raise ExtractorError('Unable to log in')
    def _real_initialize(self):
        self._login()
    def _real_extract(self, url):
        display_id = self._match_id(url)
        webpage = self._download_webpage(url, display_id)
        episode = strip_or_none(unescapeHTML(self._search_regex(
            (r'videoTitle\s*=\s*(["\'])(?P<title>(?:(?!\1).)+)\1',
             r'<title>(?P<title>[^<]+)</title>'), webpage, 'title',
            default=None, group='title')))
        title = strip_or_none(self._og_search_title(
            webpage, default=None)) or episode
        m3u8_url = self._search_regex(
            r'file\s*:\s*(["\'])(?P<url>http.+?\.m3u8.*?)\1',
            webpage, 'm3u8 url', default=None, group='url')
        if not m3u8_url:
            if re.search(r'<div[^>]+class=["\']non-sponsor', webpage):
                self.raise_login_required(
                    '%s is only available for FIRST members' % display_id)
            if re.search(r'<div[^>]+class=["\']golive-gate', webpage):
                self.raise_login_required('%s is not available yet' % display_id)
            raise ExtractorError('Unable to extract m3u8 URL')
        formats = self._extract_m3u8_formats(
            m3u8_url, display_id, ext='mp4',
            entry_protocol='m3u8_native', m3u8_id='hls')
        self._sort_formats(formats)
        description = strip_or_none(self._og_search_description(webpage))
        thumbnail = self._proto_relative_url(self._og_search_thumbnail(webpage))
        series = self._search_regex(
            (r'<h2>More ([^<]+)</h2>', r'<a[^>]+>See All ([^<]+) Videos<'),
            webpage, 'series', fatal=False)
        comment_count = int_or_none(self._search_regex(
            r'>Comments \((\d+)\)<', webpage,
            'comment count', fatal=False))
        video_id = self._search_regex(
            (r'containerId\s*=\s*["\']episode-(\d+)\1',
             r'<div[^<]+id=["\']episode-(\d+)'), webpage,
            'video id', default=display_id)
        return {
            'id': video_id,
            'display_id': display_id,
            'title': title,
            'description': description,
            'thumbnail': thumbnail,
            'series': series,
            'episode': episode,
            'comment_count': comment_count,
            'formats': formats,
        }
--- a/youtube_dl/extractor/rudo.py
+++ b/youtube_dl/extractor/rudo.py
@ -0,0 +1,53 @@
 # coding: utf-8
 from __future__ import unicode_literals
 import re
 from .jwplatform import JWPlatformBaseIE
 from ..utils import (
    js_to_json,
    get_element_by_class,
    unified_strdate,
 )
 class RudoIE(JWPlatformBaseIE):
    _VALID_URL = r'https?://rudo\.video/vod/(?P<id>[0-9a-zA-Z]+)'
    _TEST = {
        'url': 'http://rudo.video/vod/oTzw0MGnyG',
        'md5': '2a03a5b32dd90a04c83b6d391cf7b415',
        'info_dict': {
            'id': 'oTzw0MGnyG',
            'ext': 'mp4',
            'title': 'Comentario Tomás Mosciatti',
            'upload_date': '20160617',
        },
    }
    @classmethod
    def _extract_url(self, webpage):
        mobj = re.search(
            '<iframe[^>]+src=(?P<q1>[\'"])(?P<url>(?:https?:)?//rudo\.video/vod/[0-9a-zA-Z]+)(?P=q1)',
            webpage)
        if mobj:
            return mobj.group('url')
    def _real_extract(self, url):
        video_id = self._match_id(url)
        webpage = self._download_webpage(url, video_id, encoding='iso-8859-1')
        jwplayer_data = self._parse_json(self._search_regex(
            r'(?s)playerInstance\.setup\(({.+?})\)', webpage, 'jwplayer data'), video_id,
            transform_source=lambda s: js_to_json(re.sub(r'encodeURI\([^)]+\)', '""', s)))
        info_dict = self._parse_jwplayer_data(
            jwplayer_data, video_id, require_title=False, m3u8_id='hls')
        info_dict.update({
            'title': self._og_search_title(webpage),
            'upload_date': unified_strdate(get_element_by_class('date', webpage)),
        })
        return info_dict
--- a/youtube_dl/extractor/srmediathek.py
+++ b/youtube_dl/extractor/srmediathek.py
@ -11,7 +11,7 @@ from ..utils import (
 class SRMediathekIE(ARDMediathekIE):
    IE_NAME = 'sr:mediathek'
    IE_DESC = 'Saarländischer Rundfunk'
-    _VALID_URL = r'https?://sr-mediathek\.sr-online\.de/index\.php\?.*?&id=(?P<id>[0-9]+)'
+    _VALID_URL = r'https?://sr-mediathek(?:\.sr-online)?\.de/index\.php\?.*?&id=(?P<id>[0-9]+)'
    _TESTS = [{
        'url': 'http://sr-mediathek.sr-online.de/index.php?seite=7&id=28455',
@ -35,7 +35,9 @@ class SRMediathekIE(ARDMediathekIE):
            # m3u8 download
            'skip_download': True,
        },
-        'expected_warnings': ['Unable to download f4m manifest']
+    }, {
        'url': 'http://sr-mediathek.de/index.php?seite=7&id=7480',
        'only_matching': True,
    }]
    def _real_extract(self, url):
--- a/youtube_dl/extractor/vidzi.py
+++ b/youtube_dl/extractor/vidzi.py
@ -9,8 +9,8 @@ from ..utils import (
 class VidziIE(JWPlatformBaseIE):
-    _VALID_URL = r'https?://(?:www\.)?vidzi\.tv/(?P<id>\w+)'
+    _VALID_URL = r'https?://(?:www\.)?vidzi\.tv/(?:embed-)?(?P<id>[0-9a-zA-Z]+)'
-    _TEST = {
+    _TESTS = [{
        'url': 'http://vidzi.tv/cghql9yq6emu.html',
        'md5': '4f16c71ca0c8c8635ab6932b5f3f1660',
        'info_dict': {
@ -22,12 +22,16 @@ class VidziIE(JWPlatformBaseIE):
            # m3u8 download
            'skip_download': True,
        },
-    }
+    }, {
        'url': 'http://vidzi.tv/embed-4z2yb0rzphe9-600x338.html',
        'skip_download': True,
    }]
    def _real_extract(self, url):
        video_id = self._match_id(url)
-        webpage = self._download_webpage(url, video_id)
+        webpage = self._download_webpage(
            'http://vidzi.tv/%s' % video_id, video_id)
        title = self._html_search_regex(
            r'(?s)<h2 class="video-title">(.*?)</h2>', webpage, 'title')
--- a/youtube_dl/extractor/vimeo.py
+++ b/youtube_dl/extractor/vimeo.py
@ -364,6 +364,11 @@ class VimeoIE(VimeoBaseInfoExtractor):
            r'<embed[^>]+?src="((?:https?:)?//(?:www\.)?vimeo\.com/moogaloop\.swf.+?)"', webpage)
        if mobj:
            return mobj.group(1)
        # Look more for non-standard embedded Vimeo player
        mobj = re.search(
            r'<video[^>]+src=(?P<q1>[\'"])(?P<url>(?:https?:)?//(?:www\.)?vimeo\.com/[0-9]+)(?P=q1)', webpage)
        if mobj:
            return mobj.group('url')
    def _verify_player_video_password(self, url, video_id):
        password = self._downloader.params.get('videopassword')
--- a/youtube_dl/extractor/vuclip.py
+++ b/youtube_dl/extractor/vuclip.py
@ -9,7 +9,7 @@ from ..compat import (
 from ..utils import (
    ExtractorError,
    parse_duration,
-    qualities,
+    remove_end,
 )
@ -22,7 +22,7 @@ class VuClipIE(InfoExtractor):
            'id': '922692425',
            'ext': '3gp',
            'title': 'The Toy Soldiers - Hollywood Movie Trailer',
-            'duration': 180,
+            'duration': 177,
        }
    }
@ -46,34 +46,21 @@ class VuClipIE(InfoExtractor):
                '%s said: %s' % (self.IE_NAME, error_msg), expected=True)
        # These clowns alternate between two page types
-        links_code = self._search_regex(
+        video_url = self._search_regex(
-            r'''(?xs)
+            r'<a[^>]+href="([^"]+)"[^>]*><img[^>]+src="[^"]*/play\.gif',
-                (?:
+            webpage, 'video URL', default=None)
-                    <img\s+src="[^"]*/play.gif".*?>|
+        if video_url:
-                    <!--\ player\ end\ -->\s*</div><!--\ thumb\ end-->
+            formats = [{
-                )
+                'url': video_url,
-                (.*?)
+            }]
-                (?:
+        else:
-                    <a\s+href="fblike|<div\s+class="social">
+            formats = self._parse_html5_media_entries(url, webpage)[0]['formats']
                )
            ''', webpage, 'links')
        title = self._html_search_regex(
            r'<title>(.*?)-\s*Vuclip</title>', webpage, 'title').strip()
-        quality_order = qualities(['Reg', 'Hi'])
+        title = remove_end(self._html_search_regex(
-        formats = []
+            r'<title>(.*?)-\s*Vuclip</title>', webpage, 'title').strip(), ' - Video')
        for url, q in re.findall(
                r'<a\s+href="(?P<url>[^"]+)".*?>(?:<button[^>]*>)?(?P<q>[^<]+)(?:</button>)?</a>', links_code):
            format_id = compat_urllib_parse_urlparse(url).scheme + '-' + q
            formats.append({
                'format_id': format_id,
                'url': url,
                'quality': quality_order(q),
            })
        self._sort_formats(formats)
-        duration = parse_duration(self._search_regex(
+        duration = parse_duration(self._html_search_regex(
-            r'\(([0-9:]+)\)</span>', webpage, 'duration', fatal=False))
+            r'[(>]([0-9]+:[0-9]+)(?:<span|\))', webpage, 'duration', fatal=False))
        return {
            'id': video_id,
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@ -137,7 +137,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
        # Two-Factor
        # TODO add SMS and phone call support - these require making a request and then prompting the user
-        if re.search(r'(?i)<form[^>]* id="challenge"', login_results) is not None:
+        if re.search(r'(?i)<form[^>]+id="challenge"', login_results) is not None:
            tfa_code = self._get_tfa_info('2-step verification code')
            if not tfa_code:
@ -165,17 +165,17 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
            if tfa_results is False:
                return False
-            if re.search(r'(?i)<form[^>]* id="challenge"', tfa_results) is not None:
+            if re.search(r'(?i)<form[^>]+id="challenge"', tfa_results) is not None:
                self._downloader.report_warning('Two-factor code expired or invalid. Please try again, or use a one-use backup code instead.')
                return False
-            if re.search(r'(?i)<form[^>]* id="gaia_loginform"', tfa_results) is not None:
+            if re.search(r'(?i)<form[^>]+id="gaia_loginform"', tfa_results) is not None:
                self._downloader.report_warning('unable to log in - did the page structure change?')
                return False
            if re.search(r'smsauth-interstitial-reviewsettings', tfa_results) is not None:
                self._downloader.report_warning('Your Google account has a security notice. Please log in on your web browser, resolve the notice, and try again.')
                return False
-        if re.search(r'(?i)<form[^>]* id="gaia_loginform"', login_results) is not None:
+        if re.search(r'(?i)<form[^>]+id="gaia_loginform"', login_results) is not None:
            self._downloader.report_warning('unable to log in: bad username or password')
            return False
        return True
@ -1978,10 +1978,13 @@ class YoutubeChannelIE(YoutubePlaylistBaseInfoExtractor):
        return (False if YoutubePlaylistsIE.suitable(url) or YoutubeLiveIE.suitable(url)
                else super(YoutubeChannelIE, cls).suitable(url))
    def _build_template_url(self, url, channel_id):
        return self._TEMPLATE_URL % channel_id
    def _real_extract(self, url):
        channel_id = self._match_id(url)
-        url = self._TEMPLATE_URL % channel_id
+        url = self._build_template_url(url, channel_id)
        # Channel by page listing is restricted to 35 pages of 30 items, i.e. 1050 videos total (see #5778)
        # Workaround by extracting as a playlist if managed to obtain channel playlist URL
@ -2038,8 +2041,8 @@ class YoutubeChannelIE(YoutubePlaylistBaseInfoExtractor):
 class YoutubeUserIE(YoutubeChannelIE):
    IE_DESC = 'YouTube.com user videos (URL or "ytuser" keyword)'
-    _VALID_URL = r'(?:(?:https?://(?:\w+\.)?youtube\.com/(?:user/|c/)?(?!(?:attribution_link|watch|results)(?:$|[^a-z_A-Z0-9-])))|ytuser:)(?!feed/)(?P<id>[A-Za-z0-9_-]+)'
+    _VALID_URL = r'(?:(?:https?://(?:\w+\.)?youtube\.com/(?:(?P<user>user|c)/)?(?!(?:attribution_link|watch|results)(?:$|[^a-z_A-Z0-9-])))|ytuser:)(?!feed/)(?P<id>[A-Za-z0-9_-]+)'
-    _TEMPLATE_URL = 'https://www.youtube.com/user/%s/videos'
+    _TEMPLATE_URL = 'https://www.youtube.com/%s/%s/videos'
    IE_NAME = 'youtube:user'
    _TESTS = [{
@ -2049,12 +2052,24 @@ class YoutubeUserIE(YoutubeChannelIE):
            'id': 'UUfX55Sx5hEFjoC3cNs6mCUQ',
            'title': 'Uploads from The Linux Foundation',
        }
    }, {
        # Only available via https://www.youtube.com/c/12minuteathlete/videos
        # but not https://www.youtube.com/user/12minuteathlete/videos
        'url': 'https://www.youtube.com/c/12minuteathlete/videos',
        'playlist_mincount': 249,
        'info_dict': {
            'id': 'UUVjM-zV6_opMDx7WYxnjZiQ',
            'title': 'Uploads from 12 Minute Athlete',
        }
    }, {
        'url': 'ytuser:phihag',
        'only_matching': True,
    }, {
        'url': 'https://www.youtube.com/c/gametrailers',
        'only_matching': True,
    }, {
        'url': 'https://www.youtube.com/gametrailers',
        'only_matching': True,
    }, {
        # This channel is not available.
        'url': 'https://www.youtube.com/user/kananishinoSMEJ/videos',
@ -2071,6 +2086,10 @@ class YoutubeUserIE(YoutubeChannelIE):
        else:
            return super(YoutubeUserIE, cls).suitable(url)
    def _build_template_url(self, url, channel_id):
        mobj = re.match(self._VALID_URL, url)
        return self._TEMPLATE_URL % (mobj.group('user') or 'user', mobj.group('id'))
 class YoutubeLiveIE(YoutubeBaseInfoExtractor):
    IE_DESC = 'YouTube.com live streams'
--- a/youtube_dl/options.py
+++ b/youtube_dl/options.py
@ -26,7 +26,11 @@ def parseOpts(overrideArguments=None):
        except IOError:
            return default  # silently skip if file is not present
        try:
-            res = compat_shlex_split(optionf.read(), comments=True)
+            # FIXME: https://github.com/rg3/youtube-dl/commit/dfe5fa49aed02cf36ba9f743b11b0903554b5e56
            contents = optionf.read()
            if sys.version_info < (3,):
                contents = contents.decode(preferredencoding())
            res = compat_shlex_split(contents, comments=True)
        finally:
            optionf.close()
        return res
--- a/youtube_dl/postprocessor/ffmpeg.py
+++ b/youtube_dl/postprocessor/ffmpeg.py
@ -363,8 +363,10 @@ class FFmpegEmbedSubtitlePP(FFmpegPostProcessor):
        input_files = [filename] + sub_filenames
        opts = [
-            '-map', '0',
+            '-map', '0:v',
-            '-c', 'copy',
+            '-c:v', 'copy',
            '-map', '0:a',
            '-c:a', 'copy',
            # Don't copy the existing subtitles, we may be running the
            # postprocessor a second time
            '-map', '-0:s',
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@ -2126,6 +2126,42 @@ def mimetype2ext(mt):
    }.get(res, res)
 def parse_codecs(codecs_str):
    # http://tools.ietf.org/html/rfc6381
    if not codecs_str:
        return {}
    splited_codecs = list(filter(None, map(
        lambda str: str.strip(), codecs_str.strip().strip(',').split(','))))
    vcodec, acodec = None, None
    for full_codec in splited_codecs:
        codec = full_codec.split('.')[0]
        if codec in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2', 'h263', 'h264', 'mp4v'):
            if not vcodec:
                vcodec = full_codec
        elif codec in ('mp4a', 'opus', 'vorbis', 'mp3', 'aac'):
            if not acodec:
                acodec = full_codec
        else:
            write_string('WARNING: Unknown codec %s' % full_codec, sys.stderr)
    if not vcodec and not acodec:
        if len(splited_codecs) == 2:
            return {
                'vcodec': vcodec,
                'acodec': acodec,
            }
        elif len(splited_codecs) == 1:
            return {
                'vcodec': 'none',
                'acodec': vcodec,
            }
    else:
        return {
            'vcodec': vcodec or 'none',
            'acodec': acodec or 'none',
        }
    return {}
 def urlhandle_detect_ext(url_handle):
    getheader = url_handle.headers.get
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@ -1,3 +1,3 @@
 from __future__ import unicode_literals
-__version__ = '2016.07.07'
+__version__ = '2016.07.11'
`@ -1,3 +1,3 @@`
	`from __future__ import unicode_literals`	`from __future__ import unicode_literals`

	`__version__ = '2016.07.07'`	`__version__ = '2016.07.11'`