Merge remote-tracking branch 'upstream/master'

2015-01-22 20:23:21 -05:00 · 2015-01-22 20:23:21 -05:00 · bc6ea7bdf0
commit bc6ea7bdf0
parent da6c81f5e6 08ff6ab07e
29 changed files with 922 additions and 286 deletions
--- a/1
+++ b/1
@ -103,3 +103,4 @@ Christopher Krooss
 Ondřej Caletka
 Dinesh S
 Johan K. Jensen
 Yen Chi Hsuan
--- a/README.md
+++ b/README.md
@ -267,10 +267,22 @@ which means you can modify it, redistribute it or use it however you like.
                                     by extension for the extensions aac, m4a,
                                     mp3, mp4, ogg, wav, webm. You can also use
                                     the special names "best", "bestvideo",
-                                     "bestaudio", "worst".  By default, youtube-
+                                     "bestaudio", "worst".  You can filter the
-                                     dl will pick the best quality. Use commas
+                                     video results by putting a condition in
-                                     to download multiple audio formats, such as
+                                     brackets, as in -f "best[height=720]" (or
-                                     -f
+                                     -f "[filesize>10M]").  This works for
                                     filesize, height, width, tbr, abr, and vbr
                                     and the comparisons <, <=, >, >=, =, != .
                                     Formats for which the value is not known
                                     are excluded unless you put a question mark
                                     (?) after the operator. You can combine
                                     format filters, so  -f "[height <=?
                                     720][tbr>500]" selects up to 720p videos
                                     (or videos where the height is not known)
                                     with a bitrate of at least 500 KBit/s. By
                                     default, youtube-dl will pick the best
                                     quality. Use commas to download multiple
                                     audio formats, such as -f
                                     136/137/mp4/bestvideo,140/m4a/bestaudio.
                                     You can merge the video and audio of two
                                     formats into a single file using -f <video-
@ -304,7 +316,8 @@ which means you can modify it, redistribute it or use it however you like.
 ## Authentication Options:
    -u, --username USERNAME          login with this account ID
-    -p, --password PASSWORD          account password
+    -p, --password PASSWORD          account password. If this option is left
                                     out, youtube-dl will ask interactively.
    -2, --twofactor TWOFACTOR        two-factor auth code
    -n, --netrc                      use .netrc authentication data
    --video-password PASSWORD        video password (vimeo, smotri)
@ -487,6 +500,10 @@ To make a different directory work - either for ffmpeg, or for youtube-dl, or fo
 From then on, after restarting your shell, you will be able to access both youtube-dl and ffmpeg (and youtube-dl will be able to find ffmpeg) by simply typing `youtube-dl` or `ffmpeg`, no matter what directory you're in.
 ### How do I put downloads into a specific folder?
 Use the `-o` to specify an [output template](#output-template), for example `-o "/home/user/videos/%(title)s-%(id)s.%(ext)s"`. If you want this for all of your downloads, put the option into your [configuration file](#configuration).
 ### How can I detect whether a given URL is supported by youtube-dl?
 For one, have a look at the [list of supported sites](docs/supportedsites). Note that it can sometimes happen that the site changes its URL scheme (say, from http://example.com/v/1234567 to http://example.com/v/1234567 ) and youtube-dl reports an URL of a service in that list as unsupported. In that case, simply report a bug.
--- a/test/test_YoutubeDL.py
+++ b/test/test_YoutubeDL.py
@ -281,6 +281,61 @@ class TestFormatSelection(unittest.TestCase):
            downloaded = ydl.downloaded_info_dicts[0]
            self.assertEqual(downloaded['format_id'], f1id)
    def test_format_filtering(self):
        formats = [
            {'format_id': 'A', 'filesize': 500, 'width': 1000},
            {'format_id': 'B', 'filesize': 1000, 'width': 500},
            {'format_id': 'C', 'filesize': 1000, 'width': 400},
            {'format_id': 'D', 'filesize': 2000, 'width': 600},
            {'format_id': 'E', 'filesize': 3000},
            {'format_id': 'F'},
            {'format_id': 'G', 'filesize': 1000000},
        ]
        for f in formats:
            f['url'] = 'http://_/'
            f['ext'] = 'unknown'
        info_dict = _make_result(formats)
        ydl = YDL({'format': 'best[filesize<3000]'})
        ydl.process_ie_result(info_dict)
        downloaded = ydl.downloaded_info_dicts[0]
        self.assertEqual(downloaded['format_id'], 'D')
        ydl = YDL({'format': 'best[filesize<=3000]'})
        ydl.process_ie_result(info_dict)
        downloaded = ydl.downloaded_info_dicts[0]
        self.assertEqual(downloaded['format_id'], 'E')
        ydl = YDL({'format': 'best[filesize <= ? 3000]'})
        ydl.process_ie_result(info_dict)
        downloaded = ydl.downloaded_info_dicts[0]
        self.assertEqual(downloaded['format_id'], 'F')
        ydl = YDL({'format': 'best [filesize = 1000] [width>450]'})
        ydl.process_ie_result(info_dict)
        downloaded = ydl.downloaded_info_dicts[0]
        self.assertEqual(downloaded['format_id'], 'B')
        ydl = YDL({'format': 'best [filesize = 1000] [width!=450]'})
        ydl.process_ie_result(info_dict)
        downloaded = ydl.downloaded_info_dicts[0]
        self.assertEqual(downloaded['format_id'], 'C')
        ydl = YDL({'format': '[filesize>?1]'})
        ydl.process_ie_result(info_dict)
        downloaded = ydl.downloaded_info_dicts[0]
        self.assertEqual(downloaded['format_id'], 'G')
        ydl = YDL({'format': '[filesize<1M]'})
        ydl.process_ie_result(info_dict)
        downloaded = ydl.downloaded_info_dicts[0]
        self.assertEqual(downloaded['format_id'], 'E')
        ydl = YDL({'format': '[filesize<1MiB]'})
        ydl.process_ie_result(info_dict)
        downloaded = ydl.downloaded_info_dicts[0]
        self.assertEqual(downloaded['format_id'], 'G')
    def test_add_extra_info(self):
        test_dict = {
            'extractor': 'Foo',
--- a/test/test_all_urls.py
+++ b/test/test_all_urls.py
@ -14,7 +14,6 @@ from test.helper import gettestcases
 from youtube_dl.extractor import (
    FacebookIE,
    gen_extractors,
    TwitchIE,
    YoutubeIE,
 )
@ -72,18 +71,6 @@ class TestAllURLsMatching(unittest.TestCase):
        self.assertMatch('http://www.youtube.com/results?search_query=making+mustard', ['youtube:search_url'])
        self.assertMatch('https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video', ['youtube:search_url'])
    def test_twitch_channelid_matching(self):
        self.assertTrue(TwitchIE.suitable('twitch.tv/vanillatv'))
        self.assertTrue(TwitchIE.suitable('www.twitch.tv/vanillatv'))
        self.assertTrue(TwitchIE.suitable('http://www.twitch.tv/vanillatv'))
        self.assertTrue(TwitchIE.suitable('http://www.twitch.tv/vanillatv/'))
    def test_twitch_videoid_matching(self):
        self.assertTrue(TwitchIE.suitable('http://www.twitch.tv/vanillatv/b/328087483'))
    def test_twitch_chapterid_matching(self):
        self.assertTrue(TwitchIE.suitable('http://www.twitch.tv/tsm_theoddone/c/2349361'))
    def test_youtube_extract(self):
        assertExtractId = lambda url, id: self.assertEqual(YoutubeIE.extract_id(url), id)
        assertExtractId('http://www.youtube.com/watch?&v=BaW_jenozKc', 'BaW_jenozKc')
@ -115,8 +102,6 @@ class TestAllURLsMatching(unittest.TestCase):
        self.assertMatch(':ythistory', ['youtube:history'])
        self.assertMatch(':thedailyshow', ['ComedyCentralShows'])
        self.assertMatch(':tds', ['ComedyCentralShows'])
        self.assertMatch(':colbertreport', ['ComedyCentralShows'])
        self.assertMatch(':cr', ['ComedyCentralShows'])
    def test_vimeo_matching(self):
        self.assertMatch('http://vimeo.com/channels/tributes', ['vimeo:channel'])
--- a/test/test_utils.py
+++ b/test/test_utils.py
@ -28,6 +28,7 @@ from youtube_dl.utils import (
    fix_xml_ampersands,
    InAdvancePagedList,
    intlist_to_bytes,
    is_html,
    js_to_json,
    limit_length,
    OnDemandPagedList,
@ -417,5 +418,21 @@ ffmpeg version 2.4.4 Copyright (c) 2000-2014 the FFmpeg ...'''), '2.4.4')
        self.assertTrue(age_restricted(18, 14))
        self.assertFalse(age_restricted(18, 18))
    def test_is_html(self):
        self.assertFalse(is_html(b'\x49\x44\x43<html'))
        self.assertTrue(is_html(b'<!DOCTYPE foo>\xaaa'))
        self.assertTrue(is_html(  # UTF-8 with BOM
            b'\xef\xbb\xbf<!DOCTYPE foo>\xaaa'))
        self.assertTrue(is_html(  # UTF-16-LE
            b'\xff\xfe<\x00h\x00t\x00m\x00l\x00>\x00\xe4\x00'
        ))
        self.assertTrue(is_html(  # UTF-16-BE
            b'\xfe\xff\x00<\x00h\x00t\x00m\x00l\x00>\x00\xe4'
        ))
        self.assertTrue(is_html(  # UTF-32-BE
            b'\x00\x00\xFE\xFF\x00\x00\x00<\x00\x00\x00h\x00\x00\x00t\x00\x00\x00m\x00\x00\x00l\x00\x00\x00>\x00\x00\x00\xe4'))
        self.assertTrue(is_html(  # UTF-32-LE
            b'\xFF\xFE\x00\x00<\x00\x00\x00h\x00\x00\x00t\x00\x00\x00m\x00\x00\x00l\x00\x00\x00>\x00\x00\x00\xe4\x00\x00\x00'))
 if __name__ == '__main__':
    unittest.main()
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@ -10,6 +10,7 @@ import io
 import itertools
 import json
 import locale
 import operator
 import os
 import platform
 import re
@ -49,6 +50,7 @@ from .utils import (
    make_HTTPS_handler,
    MaxDownloadsReached,
    PagedList,
    parse_filesize,
    PostProcessingError,
    platform_name,
    preferredencoding,
@ -768,7 +770,59 @@ class YoutubeDL(object):
        else:
            raise Exception('Invalid result type: %s' % result_type)
    def _apply_format_filter(self, format_spec, available_formats):
        " Returns a tuple of the remaining format_spec and filtered formats "
        OPERATORS = {
            '<': operator.lt,
            '<=': operator.le,
            '>': operator.gt,
            '>=': operator.ge,
            '=': operator.eq,
            '!=': operator.ne,
        }
        operator_rex = re.compile(r'''(?x)\s*\[
            (?P<key>width|height|tbr|abr|vbr|filesize)
            \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
            (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
            \]$
            ''' % '|'.join(map(re.escape, OPERATORS.keys())))
        m = operator_rex.search(format_spec)
        if not m:
            raise ValueError('Invalid format specification %r' % format_spec)
        try:
            comparison_value = int(m.group('value'))
        except ValueError:
            comparison_value = parse_filesize(m.group('value'))
            if comparison_value is None:
                comparison_value = parse_filesize(m.group('value') + 'B')
            if comparison_value is None:
                raise ValueError(
                    'Invalid value %r in format specification %r' % (
                        m.group('value'), format_spec))
        op = OPERATORS[m.group('op')]
        def _filter(f):
            actual_value = f.get(m.group('key'))
            if actual_value is None:
                return m.group('none_inclusive')
            return op(actual_value, comparison_value)
        new_formats = [f for f in available_formats if _filter(f)]
        new_format_spec = format_spec[:-len(m.group(0))]
        if not new_format_spec:
            new_format_spec = 'best'
        return (new_format_spec, new_formats)
    def select_format(self, format_spec, available_formats):
        while format_spec.endswith(']'):
            format_spec, available_formats = self._apply_format_filter(
                format_spec, available_formats)
        if not available_formats:
            return None
        if format_spec == 'best' or format_spec is None:
            return available_formats[-1]
        elif format_spec == 'worst':
--- a/youtube_dl/extractor/init.py
+++ b/youtube_dl/extractor/init.py
@ -1,6 +1,7 @@
 from __future__ import unicode_literals
 from .abc import ABCIE
 from .abc7news import Abc7NewsIE
 from .academicearth import AcademicEarthCourseIE
 from .addanime import AddAnimeIE
 from .adobetv import AdobeTVIE
@ -175,6 +176,7 @@ from .goshgay import GoshgayIE
 from .grooveshark import GroovesharkIE
 from .groupon import GrouponIE
 from .hark import HarkIE
 from .hearthisat import HearThisAtIE
 from .heise import HeiseIE
 from .hellporno import HellPornoIE
 from .helsinki import HelsinkiIE
@ -409,6 +411,7 @@ from .stanfordoc import StanfordOpenClassroomIE
 from .steam import SteamIE
 from .streamcloud import StreamcloudIE
 from .streamcz import StreamCZIE
 from .streetvoice import StreetVoiceIE
 from .sunporno import SunPornoIE
 from .swrmediathek import SWRMediathekIE
 from .syfy import SyfyIE
@ -430,6 +433,7 @@ from .telemb import TeleMBIE
 from .teletask import TeleTaskIE
 from .tenplay import TenPlayIE
 from .testurl import TestURLIE
 from .testtube import TestTubeIE
 from .tf1 import TF1IE
 from .theonion import TheOnionIE
 from .theplatform import ThePlatformIE
@ -458,7 +462,14 @@ from .tvigle import TvigleIE
 from .tvp import TvpIE, TvpSeriesIE
 from .tvplay import TVPlayIE
 from .twentyfourvideo import TwentyFourVideoIE
-from .twitch import TwitchIE
+from .twitch import (
    TwitchVideoIE,
    TwitchChapterIE,
    TwitchVodIE,
    TwitchProfileIE,
    TwitchPastBroadcastsIE,
    TwitchStreamIE,
 )
 from .ubu import UbuIE
 from .udemy import (
    UdemyIE,
--- a/youtube_dl/extractor/abc7news.py
+++ b/youtube_dl/extractor/abc7news.py
@ -0,0 +1,68 @@
 from __future__ import unicode_literals
 import re
 from .common import InfoExtractor
 from ..utils import parse_iso8601
 class Abc7NewsIE(InfoExtractor):
    _VALID_URL = r'https?://abc7news\.com(?:/[^/]+/(?P<display_id>[^/]+))?/(?P<id>\d+)'
    _TESTS = [
        {
            'url': 'http://abc7news.com/entertainment/east-bay-museum-celebrates-vintage-synthesizers/472581/',
            'info_dict': {
                'id': '472581',
                'display_id': 'east-bay-museum-celebrates-vintage-synthesizers',
                'ext': 'mp4',
                'title': 'East Bay museum celebrates history of synthesized music',
                'description': 'md5:a4f10fb2f2a02565c1749d4adbab4b10',
                'thumbnail': 're:^https?://.*\.jpg$',
                'timestamp': 1421123075,
                'upload_date': '20150113',
                'uploader': 'Jonathan Bloom',
            },
            'params': {
                # m3u8 download
                'skip_download': True,
            },
        },
        {
            'url': 'http://abc7news.com/472581',
            'only_matching': True,
        },
    ]
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group('id')
        display_id = mobj.group('display_id') or video_id
        webpage = self._download_webpage(url, display_id)
        m3u8 = self._html_search_meta(
            'contentURL', webpage, 'm3u8 url', fatal=True)
        formats = self._extract_m3u8_formats(m3u8, display_id, 'mp4')
        self._sort_formats(formats)
        title = self._og_search_title(webpage).strip()
        description = self._og_search_description(webpage).strip()
        thumbnail = self._og_search_thumbnail(webpage)
        timestamp = parse_iso8601(self._search_regex(
            r'<div class="meta">\s*<time class="timeago" datetime="([^"]+)">',
            webpage, 'upload date', fatal=False))
        uploader = self._search_regex(
            r'rel="author">([^<]+)</a>',
            webpage, 'uploader', default=None)
        return {
            'id': video_id,
            'display_id': display_id,
            'title': title,
            'description': description,
            'thumbnail': thumbnail,
            'timestamp': timestamp,
            'uploader': uploader,
            'formats': formats,
        }
--- a/youtube_dl/extractor/bandcamp.py
+++ b/youtube_dl/extractor/bandcamp.py
@ -161,7 +161,8 @@ class BandcampAlbumIE(InfoExtractor):
        entries = [
            self.url_result(compat_urlparse.urljoin(url, t_path), ie=BandcampIE.ie_key())
            for t_path in tracks_paths]
-        title = self._search_regex(r'album_title : "(.*?)"', webpage, 'title')
+        title = self._search_regex(
            r'album_title\s*:\s*"(.*?)"', webpage, 'title', fatal=False)
        return {
            '_type': 'playlist',
            'id': playlist_id,
--- a/youtube_dl/extractor/cnn.py
+++ b/youtube_dl/extractor/cnn.py
@ -51,7 +51,7 @@ class CNNIE(InfoExtractor):
        mobj = re.match(self._VALID_URL, url)
        path = mobj.group('path')
        page_title = mobj.group('title')
-        info_url = 'http://cnn.com/video/data/3.0/%s/index.xml' % path
+        info_url = 'http://edition.cnn.com/video/data/3.0/%s/index.xml' % path
        info = self._download_xml(info_url, page_title)
        formats = []
@ -143,13 +143,13 @@ class CNNArticleIE(InfoExtractor):
    _VALID_URL = r'https?://(?:(?:edition|www)\.)?cnn\.com/(?!video/)'
    _TEST = {
        'url': 'http://www.cnn.com/2014/12/21/politics/obama-north-koreas-hack-not-war-but-cyber-vandalism/',
-        'md5': '275b326f85d80dff7592a9820f5dc887',
+        'md5': '689034c2a3d9c6dc4aa72d65a81efd01',
        'info_dict': {
-            'id': 'bestoftv/2014/12/21/sotu-crowley-president-obama-north-korea-not-going-to-be-intimidated.cnn',
+            'id': 'bestoftv/2014/12/21/ip-north-korea-obama.cnn',
            'ext': 'mp4',
-            'title': 'Obama: We\'re not going to be intimidated',
+            'title': 'Obama: Cyberattack not an act of war',
-            'description': 'md5:e735586f3dc936075fa654a4d91b21f9',
+            'description': 'md5:51ce6750450603795cad0cdfbd7d05c5',
-            'upload_date': '20141220',
+            'upload_date': '20141221',
        },
        'add_ie': ['CNN'],
    }
--- a/youtube_dl/extractor/comedycentral.py
+++ b/youtube_dl/extractor/comedycentral.py
@ -34,12 +34,12 @@ class ComedyCentralIE(MTVServicesInfoExtractor):
 class ComedyCentralShowsIE(MTVServicesInfoExtractor):
    IE_DESC = 'The Daily Show / The Colbert Report'
-    # urls can be abbreviations like :thedailyshow or :colbert
+    # urls can be abbreviations like :thedailyshow
    # urls for episodes like:
    # or urls for clips like: http://www.thedailyshow.com/watch/mon-december-10-2012/any-given-gun-day
    #                     or: http://www.colbertnation.com/the-colbert-report-videos/421667/november-29-2012/moon-shattering-news
    #                     or: http://www.colbertnation.com/the-colbert-report-collections/422008/festival-of-lights/79524
-    _VALID_URL = r'''(?x)^(:(?P<shortname>tds|thedailyshow|cr|colbert|colbertnation|colbertreport)
+    _VALID_URL = r'''(?x)^(:(?P<shortname>tds|thedailyshow)
                      |https?://(:www\.)?
                          (?P<showname>thedailyshow|thecolbertreport)\.(?:cc\.)?com/
                         ((?:full-)?episodes/(?:[0-9a-z]{6}/)?(?P<episode>.*)|
--- a/youtube_dl/extractor/fourtube.py
+++ b/youtube_dl/extractor/fourtube.py
@ -7,10 +7,9 @@ from ..compat import (
    compat_urllib_request,
 )
 from ..utils import (
    clean_html,
    parse_duration,
    parse_iso8601,
    str_to_int,
    unified_strdate,
 )
@ -28,68 +27,81 @@ class FourTubeIE(InfoExtractor):
            'uploader': 'WCP Club',
            'uploader_id': 'wcp-club',
            'upload_date': '20131031',
            'timestamp': 1383263892,
            'duration': 583,
            'view_count': int,
            'like_count': int,
            'categories': list,
        }
    }
    def _real_extract(self, url):
        video_id = self._match_id(url)
-        webpage_url = 'http://www.4tube.com/videos/' + video_id
+        webpage = self._download_webpage(url, video_id)
        webpage = self._download_webpage(webpage_url, video_id)
-        self.report_extraction(video_id)
+        title = self._html_search_meta('name', webpage)
        timestamp = parse_iso8601(self._html_search_meta(
            'uploadDate', webpage))
        thumbnail = self._html_search_meta('thumbnailUrl', webpage)
        uploader_id = self._html_search_regex(
            r'<a class="img-avatar" href="[^"]+/channels/([^/"]+)" title="Go to [^"]+ page">',
            webpage, 'uploader id')
        uploader = self._html_search_regex(
            r'<a class="img-avatar" href="[^"]+/channels/[^/"]+" title="Go to ([^"]+) page">',
            webpage, 'uploader')
-        playlist_json = self._html_search_regex(r'var playerConfigPlaylist\s+=\s+([^;]+)', webpage, 'Playlist')
+        categories_html = self._search_regex(
-        media_id = self._search_regex(r'idMedia:\s*(\d+)', playlist_json, 'Media Id')
+            r'(?s)><i class="icon icon-tag"></i>\s*Categories / Tags\s*.*?<ul class="list">(.*?)</ul>',
-        sources = self._search_regex(r'sources:\s*\[([^\]]*)\]', playlist_json, 'Sources').split(',')
+            webpage, 'categories', fatal=False)
-        title = self._search_regex(r'title:\s*"([^"]*)', playlist_json, 'Title')
+        categories = None
-        thumbnail_url = self._search_regex(r'image:\s*"([^"]*)', playlist_json, 'Thumbnail', fatal=False)
+        if categories_html:
            categories = [
                c.strip() for c in re.findall(
                    r'(?s)<li><a.*?>(.*?)</a>', categories_html)]
-        uploader_str = self._search_regex(r'<span>Uploaded by</span>(.*?)<span>', webpage, 'uploader', fatal=False)
+        view_count = str_to_int(self._search_regex(
-        mobj = re.search(r'<a href="/sites/(?P<id>[^"]+)"><strong>(?P<name>[^<]+)</strong></a>', uploader_str)
+            r'<meta itemprop="interactionCount" content="UserPlays:([0-9,]+)">',
-        (uploader, uploader_id) = (mobj.group('name'), mobj.group('id')) if mobj else (clean_html(uploader_str), None)
+            webpage, 'view count', fatal=False))
        like_count = str_to_int(self._search_regex(
            r'<meta itemprop="interactionCount" content="UserLikes:([0-9,]+)">',
            webpage, 'like count', fatal=False))
        duration = parse_duration(self._html_search_meta('duration', webpage))
-        upload_date = None
+        params_js = self._search_regex(
-        view_count = None
+            r'\$\.ajax\(url,\ opts\);\s*\}\s*\}\)\(([0-9,\[\] ]+)\)',
-        duration = None
+            webpage, 'initialization parameters'
-        description = self._html_search_meta('description', webpage, 'description')
+        )
-        if description:
+        params = self._parse_json('[%s]' % params_js, video_id)
-            upload_date = self._search_regex(r'Published Date: (\d{2} [a-zA-Z]{3} \d{4})', description, 'upload date',
+        media_id = params[0]
-                                             fatal=False)
+        sources = ['%s' % p for p in params[2]]
            if upload_date:
                upload_date = unified_strdate(upload_date)
            view_count = self._search_regex(r'Views: ([\d,\.]+)', description, 'view count', fatal=False)
            if view_count:
                view_count = str_to_int(view_count)
            duration = parse_duration(self._search_regex(r'Length: (\d+m\d+s)', description, 'duration', fatal=False))
-        token_url = "http://tkn.4tube.com/{0}/desktop/{1}".format(media_id, "+".join(sources))
+        token_url = 'http://tkn.4tube.com/{0}/desktop/{1}'.format(
            media_id, '+'.join(sources))
        headers = {
            b'Content-Type': b'application/x-www-form-urlencoded',
            b'Origin': b'http://www.4tube.com',
        }
        token_req = compat_urllib_request.Request(token_url, b'{}', headers)
        tokens = self._download_json(token_req, video_id)
        formats = [{
            'url': tokens[format]['token'],
            'format_id': format + 'p',
            'resolution': format + 'p',
            'quality': int(format),
        } for format in sources]
        self._sort_formats(formats)
        return {
            'id': video_id,
            'title': title,
            'formats': formats,
-            'thumbnail': thumbnail_url,
+            'categories': categories,
            'thumbnail': thumbnail,
            'uploader': uploader,
            'uploader_id': uploader_id,
-            'upload_date': upload_date,
+            'timestamp': timestamp,
            'like_count': like_count,
            'view_count': view_count,
            'duration': duration,
            'age_limit': 18,
            'webpage_url': webpage_url,
        }
--- a/youtube_dl/extractor/gamestar.py
+++ b/youtube_dl/extractor/gamestar.py
@ -1,8 +1,6 @@
 # coding: utf-8
 from __future__ import unicode_literals
 import re
 from .common import InfoExtractor
 from ..utils import (
    int_or_none,
@ -29,9 +27,7 @@ class GameStarIE(InfoExtractor):
    }
    def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
+        video_id = self._match_id(url)
        video_id = mobj.group('id')
        webpage = self._download_webpage(url, video_id)
        og_title = self._og_search_title(webpage)
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@ -17,6 +17,7 @@ from ..utils import (
    ExtractorError,
    float_or_none,
    HEADRequest,
    is_html,
    orderedSet,
    parse_xml,
    smuggle_url,
@ -647,7 +648,7 @@ class GenericIE(InfoExtractor):
        # Maybe it's a direct link to a video?
        # Be careful not to download the whole thing!
        first_bytes = full_response.read(512)
-        if not re.match(r'^\s*<', first_bytes.decode('utf-8', 'replace')):
+        if not is_html(first_bytes):
            self._downloader.report_warning(
                'URL could be a direct video link, returning it as such.')
            upload_date = unified_strdate(
--- a/youtube_dl/extractor/hearthisat.py
+++ b/youtube_dl/extractor/hearthisat.py
@ -0,0 +1,117 @@
 # coding: utf-8
 from __future__ import unicode_literals
 import re
 from .common import InfoExtractor
 from ..compat import (
    compat_urllib_request,
    compat_urlparse,
 )
 from ..utils import (
    HEADRequest,
    str_to_int,
    urlencode_postdata,
    urlhandle_detect_ext,
 )
 class HearThisAtIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?hearthis\.at/(?P<artist>[^/]+)/(?P<title>[A-Za-z0-9\-]+)/?$'
    _PLAYLIST_URL = 'https://hearthis.at/playlist.php'
    _TEST = {
        'url': 'https://hearthis.at/moofi/dr-kreep',
        'md5': 'ab6ec33c8fed6556029337c7885eb4e0',
        'info_dict': {
            'id': '150939',
            'ext': 'wav',
            'title': 'Moofi - Dr. Kreep',
            'thumbnail': 're:^https?://.*\.jpg$',
            'timestamp': 1421564134,
            'description': 'Creepy Patch. Mutable Instruments Braids Vowel + Formant Mode.',
            'upload_date': '20150118',
            'comment_count': int,
            'view_count': int,
            'like_count': int,
            'duration': 71,
            'categories': ['Experimental'],
        }
    }
    def _real_extract(self, url):
        m = re.match(self._VALID_URL, url)
        display_id = '{artist:s} - {title:s}'.format(**m.groupdict())
        webpage = self._download_webpage(url, display_id)
        track_id = self._search_regex(
            r'intTrackId\s*=\s*(\d+)', webpage, 'track ID')
        payload = urlencode_postdata({'tracks[]': track_id})
        req = compat_urllib_request.Request(self._PLAYLIST_URL, payload)
        req.add_header('Content-type', 'application/x-www-form-urlencoded')
        track = self._download_json(req, track_id, 'Downloading playlist')[0]
        title = '{artist:s} - {title:s}'.format(**track)
        categories = None
        if track.get('category'):
            categories = [track['category']]
        description = self._og_search_description(webpage)
        thumbnail = self._og_search_thumbnail(webpage)
        meta_span = r'<span[^>]+class="%s".*?</i>([^<]+)</span>'
        view_count = str_to_int(self._search_regex(
            meta_span % 'plays_count', webpage, 'view count', fatal=False))
        like_count = str_to_int(self._search_regex(
            meta_span % 'likes_count', webpage, 'like count', fatal=False))
        comment_count = str_to_int(self._search_regex(
            meta_span % 'comment_count', webpage, 'comment count', fatal=False))
        duration = str_to_int(self._search_regex(
            r'data-length="(\d+)', webpage, 'duration', fatal=False))
        timestamp = str_to_int(self._search_regex(
            r'<span[^>]+class="calctime"[^>]+data-time="(\d+)', webpage, 'timestamp', fatal=False))
        formats = []
        mp3_url = self._search_regex(
            r'(?s)<a class="player-link"\s+(?:[a-zA-Z0-9_:-]+="[^"]+"\s+)*?data-mp3="([^"]+)"',
            webpage, 'mp3 URL', fatal=False)
        if mp3_url:
            formats.append({
                'format_id': 'mp3',
                'vcodec': 'none',
                'acodec': 'mp3',
                'url': mp3_url,
            })
        download_path = self._search_regex(
            r'<a class="[^"]*download_fct[^"]*"\s+href="([^"]+)"',
            webpage, 'download URL', default=None)
        if download_path:
            download_url = compat_urlparse.urljoin(url, download_path)
            ext_req = HEADRequest(download_url)
            ext_handle = self._request_webpage(
                ext_req, display_id, note='Determining extension')
            ext = urlhandle_detect_ext(ext_handle)
            formats.append({
                'format_id': 'download',
                'vcodec': 'none',
                'ext': ext,
                'url': download_url,
                'preference': 2,  # Usually better quality
            })
        self._sort_formats(formats)
        return {
            'id': track_id,
            'display_id': display_id,
            'title': title,
            'formats': formats,
            'thumbnail': thumbnail,
            'description': description,
            'duration': duration,
            'timestamp': timestamp,
            'view_count': view_count,
            'comment_count': comment_count,
            'like_count': like_count,
            'categories': categories,
        }
--- a/youtube_dl/extractor/nbc.py
+++ b/youtube_dl/extractor/nbc.py
@ -6,6 +6,7 @@ import json
 from .common import InfoExtractor
 from ..compat import (
    compat_str,
    compat_HTTPError,
 )
 from ..utils import (
    ExtractorError,
@ -78,6 +79,16 @@ class NBCNewsIE(InfoExtractor):
            },
            'add_ie': ['ThePlatform'],
        },
        {
            'url': 'http://www.nbcnews.com/feature/dateline-full-episodes/full-episode-family-business-n285156',
            'md5': 'fdbf39ab73a72df5896b6234ff98518a',
            'info_dict': {
                'id': 'Wjf9EDR3A_60',
                'ext': 'mp4',
                'title': 'FULL EPISODE: Family Business',
                'description': 'md5:757988edbaae9d7be1d585eb5d55cc04',
            },
        },
    ]
    def _real_extract(self, url):
@ -115,10 +126,19 @@ class NBCNewsIE(InfoExtractor):
                if not base_url:
                    continue
                playlist_url = base_url + '?form=MPXNBCNewsAPI'
                all_videos = self._download_json(playlist_url, title)['videos']
                try:
-                    info = next(v for v in all_videos if v['mpxId'] == mpxid)
+                    all_videos = self._download_json(playlist_url, title)
                except ExtractorError as ee:
                    if isinstance(ee.cause, compat_HTTPError):
                        continue
                    raise
                if not all_videos or 'videos' not in all_videos:
                    continue
                try:
                    info = next(v for v in all_videos['videos'] if v['mpxId'] == mpxid)
                    break
                except StopIteration:
                    continue
--- a/youtube_dl/extractor/ndtv.py
+++ b/youtube_dl/extractor/ndtv.py
@ -27,9 +27,7 @@ class NDTVIE(InfoExtractor):
    }
    def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
+        video_id = self._match_id(url)
        video_id = mobj.group('id')
        webpage = self._download_webpage(url, video_id)
        filename = self._search_regex(
--- a/youtube_dl/extractor/npo.py
+++ b/youtube_dl/extractor/npo.py
@ -1,7 +1,5 @@
 from __future__ import unicode_literals
 import re
 from .common import InfoExtractor
 from ..utils import (
    fix_xml_ampersands,
--- a/youtube_dl/extractor/pornhub.py
+++ b/youtube_dl/extractor/pornhub.py
@ -10,6 +10,7 @@ from ..compat import (
    compat_urllib_request,
 )
 from ..utils import (
    ExtractorError,
    str_to_int,
 )
 from ..aes import (
@ -44,6 +45,15 @@ class PornHubIE(InfoExtractor):
        req.add_header('Cookie', 'age_verified=1')
        webpage = self._download_webpage(req, video_id)
        error_msg = self._html_search_regex(
            r'(?s)<div class="userMessageSection[^"]*".*?>(.*?)</div>',
            webpage, 'error message', default=None)
        if error_msg:
            error_msg = re.sub(r'\s+', ' ', error_msg)
            raise ExtractorError(
                'PornHub said: %s' % error_msg,
                expected=True, video_id=video_id)
        video_title = self._html_search_regex(r'<h1 [^>]+>([^<]+)', webpage, 'title')
        video_uploader = self._html_search_regex(
            r'(?s)From:&nbsp;.+?<(?:a href="/users/|a href="/channels/|<span class="username)[^>]+>(.+?)<',
--- a/youtube_dl/extractor/streetvoice.py
+++ b/youtube_dl/extractor/streetvoice.py
@ -0,0 +1,51 @@
 # coding: utf-8
 from __future__ import unicode_literals
 from .common import InfoExtractor
 from ..compat import compat_str
 from ..utils import unified_strdate
 class StreetVoiceIE(InfoExtractor):
    _VALID_URL = r'https?://(?:.+?\.)?streetvoice\.com/[^/]+/songs/(?P<id>[0-9]+)'
    _TESTS = [{
        'url': 'http://streetvoice.com/skippylu/songs/94440/',
        'md5': '15974627fc01a29e492c98593c2fd472',
        'info_dict': {
            'id': '94440',
            'ext': 'mp3',
            'filesize': 4167053,
            'title': '輸',
            'description': 'Crispy脆樂團 - 輸',
            'thumbnail': 're:^https?://.*\.jpg$',
            'duration': 260,
            'upload_date': '20091018',
            'uploader': 'Crispy脆樂團',
            'uploader_id': '627810',
        }
    }, {
        'url': 'http://tw.streetvoice.com/skippylu/songs/94440/',
        'only_matching': True,
    }]
    def _real_extract(self, url):
        song_id = self._match_id(url)
        song = self._download_json(
            'http://streetvoice.com/music/api/song/%s' % song_id, song_id)
        title = song['name']
        author = song['musician']['name']
        return {
            'id': song_id,
            'url': song['file'],
            'filesize': song.get('size'),
            'title': title,
            'description': '%s - %s' % (author, title),
            'thumbnail': self._proto_relative_url(song.get('image'), 'http:'),
            'duration': song.get('length'),
            'upload_date': unified_strdate(song.get('created_at')),
            'uploader': author,
            'uploader_id': compat_str(song['musician']['id']),
        }
--- a/youtube_dl/extractor/testtube.py
+++ b/youtube_dl/extractor/testtube.py
@ -0,0 +1,60 @@
 from __future__ import unicode_literals
 from .common import InfoExtractor
 from ..utils import int_or_none
 class TestTubeIE(InfoExtractor):
    _VALID_URL = r'https?://testtube\.com/[^/?#]+/(?P<id>[^/?#]+)'
    _TESTS = [{
        'url': 'https://testtube.com/dnews/5-weird-ways-plants-can-eat-animals?utm_source=FB&utm_medium=DNews&utm_campaign=DNewsSocial',
        'info_dict': {
            'id': '60163',
            'display_id': '5-weird-ways-plants-can-eat-animals',
            'duration': 275,
            'ext': 'mp4',
            'title': '5 Weird Ways Plants Can Eat Animals',
            'description': 'Why have some plants evolved to eat meat?',
            'thumbnail': 're:^https?://.*\.jpg$',
            'uploader': 'DNews',
            'uploader_id': 'dnews',
        },
    }]
    def _real_extract(self, url):
        display_id = self._match_id(url)
        webpage = self._download_webpage(url, display_id)
        video_id = self._search_regex(
            r"player\.loadRevision3Item\('video_id',\s*([0-9]+)\);",
            webpage, 'video ID')
        all_info = self._download_json(
            'https://testtube.com/api/getPlaylist.json?api_key=ba9c741bce1b9d8e3defcc22193f3651b8867e62&codecs=h264,vp8,theora&video_id=%s' % video_id,
            video_id)
        info = all_info['items'][0]
        formats = []
        for vcodec, fdatas in info['media'].items():
            for name, fdata in fdatas.items():
                formats.append({
                    'format_id': '%s-%s' % (vcodec, name),
                    'url': fdata['url'],
                    'vcodec': vcodec,
                    'tbr': fdata.get('bitrate'),
                })
        self._sort_formats(formats)
        duration = int_or_none(info.get('duration'))
        return {
            'id': video_id,
            'display_id': display_id,
            'title': info['title'],
            'description': info.get('summary'),
            'thumbnail': info.get('images', {}).get('large'),
            'uploader': info.get('show', {}).get('name'),
            'uploader_id': info.get('show', {}).get('slug'),
            'duration': duration,
            'formats': formats,
        }
--- a/youtube_dl/extractor/tinypic.py
+++ b/youtube_dl/extractor/tinypic.py
@ -9,17 +9,23 @@ from ..utils import ExtractorError
 class TinyPicIE(InfoExtractor):
    IE_NAME = 'tinypic'
    IE_DESC = 'tinypic.com videos'
-    _VALID_URL = r'http://tinypic\.com/player\.php\?v=(?P<id>[^&]+)&s=\d+'
+    _VALID_URL = r'http://(?:.+?\.)?tinypic\.com/player\.php\?v=(?P<id>[^&]+)&s=\d+'
-    _TEST = {
+    _TESTS = [
        {
            'url': 'http://tinypic.com/player.php?v=6xw7tc%3E&s=5#.UtqZmbRFCM8',
            'md5': '609b74432465364e72727ebc6203f044',
            'info_dict': {
                'id': '6xw7tc',
                'ext': 'flv',
                'title': 'shadow phenomenon weird',
            },
        },
        {
            'url': 'http://de.tinypic.com/player.php?v=dy90yh&s=8',
            'only_matching': True,
        }
-    }
+    ]
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
--- a/youtube_dl/extractor/tvp.py
+++ b/youtube_dl/extractor/tvp.py
@ -12,61 +12,59 @@ class TvpIE(InfoExtractor):
    _TESTS = [{
        'url': 'http://vod.tvp.pl/filmy-fabularne/filmy-za-darmo/ogniem-i-mieczem/wideo/odc-2/4278035',
        'md5': 'cdd98303338b8a7f7abab5cd14092bf2',
        'info_dict': {
            'id': '4278035',
            'ext': 'wmv',
            'title': 'Ogniem i mieczem, odc. 2',
            'description': 'Bohun dowiaduje się o złamaniu przez kniahinię danego mu słowa i wyrusza do Rozłogów. Helenie w ostatniej chwili udaje się uciec dzięki pomocy Zagłoby.',
        },
    }, {
        'url': 'http://vod.tvp.pl/seriale/obyczajowe/czas-honoru/sezon-1-1-13/i-seria-odc-13/194536',
        'md5': '8aa518c15e5cc32dfe8db400dc921fbb',
        'info_dict': {
            'id': '194536',
            'ext': 'mp4',
            'title': 'Czas honoru, I seria – odc. 13',
            #  'description': 'WŁADEK\nCzesław prosi Marię o dostarczenie Władkowi zarazki tyfusu. Jeśli zachoruje zostanie przewieziony do szpitala skąd łatwiej będzie go odbić. Czy matka zdecyduje się zarazić syna? Karol odwiedza Wandę przyznaje się, że ją oszukiwał, ale ostrzega też, że grozi jej aresztowanie i nalega, żeby wyjechała z Warszawy. Czy dziewczyna zdecyduje się znów oddalić od ukochanego? Rozpoczyna się akcja odbicia Władka.',
        },
    }, {
        'url': 'http://www.tvp.pl/there-can-be-anything-so-i-shortened-it/17916176',
        'md5': 'c3b15ed1af288131115ff17a17c19dda',
        'info_dict': {
            'id': '17916176',
            'ext': 'mp4',
            'title': 'TVP Gorzów pokaże filmy studentów z podroży dookoła świata',
        },
        'params': {
            # m3u8 download
            'skip_download': 'true',
        },
    }, {
        'url': 'http://vod.tvp.pl/seriale/obyczajowe/na-sygnale/sezon-2-27-/odc-39/17834272',
        'md5': 'c3b15ed1af288131115ff17a17c19dda',
        'info_dict': {
            'id': '17834272',
            'ext': 'mp4',
            'title': 'Na sygnale, odc. 39',
            'description': 'Ekipa Wiktora ratuje młodą matkę, która spadła ze schodów trzymając na rękach noworodka. Okazuje się, że dziewczyna jest surogatką, a biologiczni rodzice dziecka próbują zmusić ją do oddania synka…',
        },
        'params': {
            # m3u8 download
            'skip_download': 'true',
        },
    }]
    def _real_extract(self, url):
        video_id = self._match_id(url)
        webpage = self._download_webpage(
            'http://www.tvp.pl/sess/tvplayer.php?object_id=%s' % video_id, video_id)
-        title = self._og_search_title(webpage)
+        title = self._search_regex(
-        series = self._search_regex(
+            r'name\s*:\s*([\'"])Title\1\s*,\s*value\s*:\s*\1(?P<title>.+?)\1',
-            r'{name:\s*([\'"])SeriesTitle\1,\s*value:\s*\1(?P<series>.*?)\1},',
+            webpage, 'title', group='title')
        series_title = self._search_regex(
            r'name\s*:\s*([\'"])SeriesTitle\1\s*,\s*value\s*:\s*\1(?P<series>.+?)\1',
            webpage, 'series', group='series', default=None)
-        if series is not None and series not in title:
+        if series_title:
-            title = '%s, %s' % (series, title)
+            title = '%s, %s' % (series_title, title)
-        description = self._og_search_description(webpage, default=None)
+
        thumbnail = self._search_regex(
            r"poster\s*:\s*'([^']+)'", webpage, 'thumbnail', default=None)
        video_url = self._search_regex(
            r'0:{src:([\'"])(?P<url>.*?)\1', webpage, 'formats', group='url', default=None)
-        if video_url is None:
+        if not video_url:
            video_url = self._download_json(
                'http://www.tvp.pl/pub/stat/videofileinfo?video_id=%s' % video_id,
                video_id)['video_url']
@ -89,8 +87,7 @@ class TvpIE(InfoExtractor):
        return {
            'id': video_id,
            'title': title,
-            'thumbnail': self._og_search_thumbnail(webpage),
+            'thumbnail': thumbnail,
            'description': description,
            'formats': formats,
        }
--- a/youtube_dl/extractor/twitch.py
+++ b/youtube_dl/extractor/twitch.py
@ -3,9 +3,11 @@ from __future__ import unicode_literals
 import itertools
 import re
 import random
 from .common import InfoExtractor
 from ..compat import (
    compat_str,
    compat_urllib_parse,
    compat_urllib_request,
 )
@ -15,44 +17,12 @@ from ..utils import (
 )
-class TwitchIE(InfoExtractor):
+class TwitchBaseIE(InfoExtractor):
-    # TODO: One broadcast may be split into multiple videos. The key
+    _VALID_URL_BASE = r'https?://(?:www\.)?twitch\.tv'
-    # 'broadcast_id' is the same for all parts, and 'broadcast_part'
+
    # starts at 1 and increases. Can we treat all parts as one video?
    _VALID_URL = r"""(?x)^(?:http://)?(?:www\.)?twitch\.tv/
        (?:
            (?P<channelid>[^/]+)|
            (?:(?:[^/]+)/v/(?P<vodid>[^/]+))|
            (?:(?:[^/]+)/b/(?P<videoid>[^/]+))|
            (?:(?:[^/]+)/c/(?P<chapterid>[^/]+))
        )
        /?(?:\#.*)?$
        """
    _PAGE_LIMIT = 100
    _API_BASE = 'https://api.twitch.tv'
    _USHER_BASE = 'http://usher.twitch.tv'
    _LOGIN_URL = 'https://secure.twitch.tv/user/login'
    _TESTS = [{
        'url': 'http://www.twitch.tv/riotgames/b/577357806',
        'info_dict': {
            'id': 'a577357806',
            'title': 'Worlds Semifinals - Star Horn Royal Club vs. OMG',
        },
        'playlist_mincount': 12,
    }, {
        'url': 'http://www.twitch.tv/acracingleague/c/5285812',
        'info_dict': {
            'id': 'c5285812',
            'title': 'ACRL Off Season - Sports Cars @ Nordschleife',
        },
        'playlist_mincount': 3,
    }, {
        'url': 'http://www.twitch.tv/vanillatv',
        'info_dict': {
            'id': 'vanillatv',
            'title': 'VanillaTV',
        },
        'playlist_mincount': 412,
    }]
    def _handle_error(self, response):
        if not isinstance(response, dict):
@ -64,71 +34,10 @@ class TwitchIE(InfoExtractor):
                expected=True)
    def _download_json(self, url, video_id, note='Downloading JSON metadata'):
-        response = super(TwitchIE, self)._download_json(url, video_id, note)
+        response = super(TwitchBaseIE, self)._download_json(url, video_id, note)
        self._handle_error(response)
        return response
    def _extract_media(self, item, item_id):
        ITEMS = {
            'a': 'video',
            'v': 'vod',
            'c': 'chapter',
        }
        info = self._extract_info(self._download_json(
            '%s/kraken/videos/%s%s' % (self._API_BASE, item, item_id), item_id,
            'Downloading %s info JSON' % ITEMS[item]))
        if item == 'v':
            access_token = self._download_json(
                '%s/api/vods/%s/access_token' % (self._API_BASE, item_id), item_id,
                'Downloading %s access token' % ITEMS[item])
            formats = self._extract_m3u8_formats(
                'http://usher.twitch.tv/vod/%s?nauth=%s&nauthsig=%s'
                % (item_id, access_token['token'], access_token['sig']),
                item_id, 'mp4')
            info['formats'] = formats
            return info
        response = self._download_json(
            '%s/api/videos/%s%s' % (self._API_BASE, item, item_id), item_id,
            'Downloading %s playlist JSON' % ITEMS[item])
        entries = []
        chunks = response['chunks']
        qualities = list(chunks.keys())
        for num, fragment in enumerate(zip(*chunks.values()), start=1):
            formats = []
            for fmt_num, fragment_fmt in enumerate(fragment):
                format_id = qualities[fmt_num]
                fmt = {
                    'url': fragment_fmt['url'],
                    'format_id': format_id,
                    'quality': 1 if format_id == 'live' else 0,
                }
                m = re.search(r'^(?P<height>\d+)[Pp]', format_id)
                if m:
                    fmt['height'] = int(m.group('height'))
                formats.append(fmt)
            self._sort_formats(formats)
            entry = dict(info)
            entry['id'] = '%s_%d' % (entry['id'], num)
            entry['title'] = '%s part %d' % (entry['title'], num)
            entry['formats'] = formats
            entries.append(entry)
        return self.playlist_result(entries, info['id'], info['title'])
    def _extract_info(self, info):
        return {
            'id': info['_id'],
            'title': info['title'],
            'description': info['description'],
            'duration': info['length'],
            'thumbnail': info['preview'],
            'uploader': info['channel']['display_name'],
            'uploader_id': info['channel']['name'],
            'timestamp': parse_iso8601(info['recorded_at']),
            'view_count': info['views'],
        }
    def _real_initialize(self):
        self._login()
@ -167,66 +76,139 @@ class TwitchIE(InfoExtractor):
            raise ExtractorError(
                'Unable to login: %s' % m.group('msg').strip(), expected=True)
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        if mobj.group('chapterid'):
            return self._extract_media('c', mobj.group('chapterid'))
-            """
+class TwitchItemBaseIE(TwitchBaseIE):
-            webpage = self._download_webpage(url, chapter_id)
+    def _download_info(self, item, item_id):
-            m = re.search(r'PP\.archive_id = "([0-9]+)";', webpage)
+        return self._extract_info(self._download_json(
-            if not m:
+            '%s/kraken/videos/%s%s' % (self._API_BASE, item, item_id), item_id,
-                raise ExtractorError('Cannot find archive of a chapter')
+            'Downloading %s info JSON' % self._ITEM_TYPE))
            archive_id = m.group(1)
-            api = api_base + '/broadcast/by_chapter/%s.xml' % chapter_id
+    def _extract_media(self, item_id):
-            doc = self._download_xml(
+        info = self._download_info(self._ITEM_SHORTCUT, item_id)
-                api, chapter_id,
+        response = self._download_json(
-                note='Downloading chapter information',
+            '%s/api/videos/%s%s' % (self._API_BASE, self._ITEM_SHORTCUT, item_id), item_id,
-                errnote='Chapter information download failed')
+            'Downloading %s playlist JSON' % self._ITEM_TYPE)
-            for a in doc.findall('.//archive'):
+        entries = []
-                if archive_id == a.find('./id').text:
+        chunks = response['chunks']
-                    break
+        qualities = list(chunks.keys())
-            else:
+        for num, fragment in enumerate(zip(*chunks.values()), start=1):
-                raise ExtractorError('Could not find chapter in chapter information')
+            formats = []
-
+            for fmt_num, fragment_fmt in enumerate(fragment):
-            video_url = a.find('./video_file_url').text
+                format_id = qualities[fmt_num]
-            video_ext = video_url.rpartition('.')[2] or 'flv'
+                fmt = {
-
+                    'url': fragment_fmt['url'],
-            chapter_api_url = 'https://api.twitch.tv/kraken/videos/c' + chapter_id
+                    'format_id': format_id,
-            chapter_info = self._download_json(
+                    'quality': 1 if format_id == 'live' else 0,
                chapter_api_url, 'c' + chapter_id,
                note='Downloading chapter metadata',
                errnote='Download of chapter metadata failed')
            bracket_start = int(doc.find('.//bracket_start').text)
            bracket_end = int(doc.find('.//bracket_end').text)
            # TODO determine start (and probably fix up file)
            #  youtube-dl -v http://www.twitch.tv/firmbelief/c/1757457
            #video_url += '?start=' + TODO:start_timestamp
            # bracket_start is 13290, but we want 51670615
            self._downloader.report_warning('Chapter detected, but we can just download the whole file. '
                                            'Chapter starts at %s and ends at %s' % (formatSeconds(bracket_start), formatSeconds(bracket_end)))
            info = {
                'id': 'c' + chapter_id,
                'url': video_url,
                'ext': video_ext,
                'title': chapter_info['title'],
                'thumbnail': chapter_info['preview'],
                'description': chapter_info['description'],
                'uploader': chapter_info['channel']['display_name'],
                'uploader_id': chapter_info['channel']['name'],
                }
                m = re.search(r'^(?P<height>\d+)[Pp]', format_id)
                if m:
                    fmt['height'] = int(m.group('height'))
                formats.append(fmt)
            self._sort_formats(formats)
            entry = dict(info)
            entry['id'] = '%s_%d' % (entry['id'], num)
            entry['title'] = '%s part %d' % (entry['title'], num)
            entry['formats'] = formats
            entries.append(entry)
        return self.playlist_result(entries, info['id'], info['title'])
    def _extract_info(self, info):
        return {
            'id': info['_id'],
            'title': info['title'],
            'description': info['description'],
            'duration': info['length'],
            'thumbnail': info['preview'],
            'uploader': info['channel']['display_name'],
            'uploader_id': info['channel']['name'],
            'timestamp': parse_iso8601(info['recorded_at']),
            'view_count': info['views'],
        }
    def _real_extract(self, url):
        return self._extract_media(self._match_id(url))
 class TwitchVideoIE(TwitchItemBaseIE):
    IE_NAME = 'twitch:video'
    _VALID_URL = r'%s/[^/]+/b/(?P<id>[^/]+)' % TwitchBaseIE._VALID_URL_BASE
    _ITEM_TYPE = 'video'
    _ITEM_SHORTCUT = 'a'
    _TEST = {
        'url': 'http://www.twitch.tv/riotgames/b/577357806',
        'info_dict': {
            'id': 'a577357806',
            'title': 'Worlds Semifinals - Star Horn Royal Club vs. OMG',
        },
        'playlist_mincount': 12,
    }
 class TwitchChapterIE(TwitchItemBaseIE):
    IE_NAME = 'twitch:chapter'
    _VALID_URL = r'%s/[^/]+/c/(?P<id>[^/]+)' % TwitchBaseIE._VALID_URL_BASE
    _ITEM_TYPE = 'chapter'
    _ITEM_SHORTCUT = 'c'
    _TESTS = [{
        'url': 'http://www.twitch.tv/acracingleague/c/5285812',
        'info_dict': {
            'id': 'c5285812',
            'title': 'ACRL Off Season - Sports Cars @ Nordschleife',
        },
        'playlist_mincount': 3,
    }, {
        'url': 'http://www.twitch.tv/tsm_theoddone/c/2349361',
        'only_matching': True,
    }]
 class TwitchVodIE(TwitchItemBaseIE):
    IE_NAME = 'twitch:vod'
    _VALID_URL = r'%s/[^/]+/v/(?P<id>[^/]+)' % TwitchBaseIE._VALID_URL_BASE
    _ITEM_TYPE = 'vod'
    _ITEM_SHORTCUT = 'v'
    _TEST = {
        'url': 'http://www.twitch.tv/ksptv/v/3622000',
        'info_dict': {
            'id': 'v3622000',
            'ext': 'mp4',
            'title': '''KSPTV: Squadcast: "Everyone's on vacation so here's Dahud" Edition!''',
            'thumbnail': 're:^https?://.*\.jpg$',
            'duration': 6951,
            'timestamp': 1419028564,
            'upload_date': '20141219',
            'uploader': 'KSPTV',
            'uploader_id': 'ksptv',
            'view_count': int,
        },
        'params': {
            # m3u8 download
            'skip_download': True,
        },
    }
    def _real_extract(self, url):
        item_id = self._match_id(url)
        info = self._download_info(self._ITEM_SHORTCUT, item_id)
        access_token = self._download_json(
            '%s/api/vods/%s/access_token' % (self._API_BASE, item_id), item_id,
            'Downloading %s access token' % self._ITEM_TYPE)
        formats = self._extract_m3u8_formats(
            '%s/vod/%s?nauth=%s&nauthsig=%s'
            % (self._USHER_BASE, item_id, access_token['token'], access_token['sig']),
            item_id, 'mp4')
        info['formats'] = formats
        return info
-            """
+
-        elif mobj.group('videoid'):
+
-            return self._extract_media('a', mobj.group('videoid'))
+class TwitchPlaylistBaseIE(TwitchBaseIE):
-        elif mobj.group('vodid'):
+    _PLAYLIST_URL = '%s/kraken/channels/%%s/videos/?offset=%%d&limit=%%d' % TwitchBaseIE._API_BASE
-            return self._extract_media('v', mobj.group('vodid'))
+    _PAGE_LIMIT = 100
-        elif mobj.group('channelid'):
+
-            channel_id = mobj.group('channelid')
+    def _extract_playlist(self, channel_id):
        info = self._download_json(
            '%s/kraken/channels/%s' % (self._API_BASE, channel_id),
            channel_id, 'Downloading channel info JSON')
@ -236,12 +218,134 @@ class TwitchIE(InfoExtractor):
        limit = self._PAGE_LIMIT
        for counter in itertools.count(1):
            response = self._download_json(
-                    '%s/kraken/channels/%s/videos/?offset=%d&limit=%d'
+                self._PLAYLIST_URL % (channel_id, offset, limit),
-                    % (self._API_BASE, channel_id, offset, limit),
+                channel_id, 'Downloading %s videos JSON page %d' % (self._PLAYLIST_TYPE, counter))
                    channel_id, 'Downloading channel videos JSON page %d' % counter)
            videos = response['videos']
            if not videos:
                break
-                entries.extend([self.url_result(video['url'], 'Twitch') for video in videos])
+            entries.extend([self.url_result(video['url']) for video in videos])
            offset += limit
        return self.playlist_result(entries, channel_id, channel_name)
    def _real_extract(self, url):
        return self._extract_playlist(self._match_id(url))
 class TwitchProfileIE(TwitchPlaylistBaseIE):
    IE_NAME = 'twitch:profile'
    _VALID_URL = r'%s/(?P<id>[^/]+)/profile/?(?:\#.*)?$' % TwitchBaseIE._VALID_URL_BASE
    _PLAYLIST_TYPE = 'profile'
    _TEST = {
        'url': 'http://www.twitch.tv/vanillatv/profile',
        'info_dict': {
            'id': 'vanillatv',
            'title': 'VanillaTV',
        },
        'playlist_mincount': 412,
    }
 class TwitchPastBroadcastsIE(TwitchPlaylistBaseIE):
    IE_NAME = 'twitch:past_broadcasts'
    _VALID_URL = r'%s/(?P<id>[^/]+)/profile/past_broadcasts/?(?:\#.*)?$' % TwitchBaseIE._VALID_URL_BASE
    _PLAYLIST_URL = TwitchPlaylistBaseIE._PLAYLIST_URL + '&broadcasts=true'
    _PLAYLIST_TYPE = 'past broadcasts'
    _TEST = {
        'url': 'http://www.twitch.tv/spamfish/profile/past_broadcasts',
        'info_dict': {
            'id': 'spamfish',
            'title': 'Spamfish',
        },
        'playlist_mincount': 54,
    }
 class TwitchStreamIE(TwitchBaseIE):
    IE_NAME = 'twitch:stream'
    _VALID_URL = r'%s/(?P<id>[^/]+)/?(?:\#.*)?$' % TwitchBaseIE._VALID_URL_BASE
    _TEST = {
        'url': 'http://www.twitch.tv/shroomztv',
        'info_dict': {
            'id': '12772022048',
            'display_id': 'shroomztv',
            'ext': 'mp4',
            'title': 're:^ShroomzTV [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
            'description': 'H1Z1 - lonewolfing with ShroomzTV | A3 Battle Royale later - @ShroomzTV',
            'is_live': True,
            'timestamp': 1421928037,
            'upload_date': '20150122',
            'uploader': 'ShroomzTV',
            'uploader_id': 'shroomztv',
            'view_count': int,
        },
        'params': {
            # m3u8 download
            'skip_download': True,
        },
    }
    def _real_extract(self, url):
        channel_id = self._match_id(url)
        stream = self._download_json(
            '%s/kraken/streams/%s' % (self._API_BASE, channel_id), channel_id,
            'Downloading stream JSON').get('stream')
        # Fallback on profile extraction if stream is offline
        if not stream:
            return self.url_result(
                'http://www.twitch.tv/%s/profile' % channel_id,
                'TwitchProfile', channel_id)
        access_token = self._download_json(
            '%s/api/channels/%s/access_token' % (self._API_BASE, channel_id), channel_id,
            'Downloading channel access token')
        query = {
            'allow_source': 'true',
            'p': random.randint(1000000, 10000000),
            'player': 'twitchweb',
            'segment_preference': '4',
            'sig': access_token['sig'],
            'token': access_token['token'],
        }
        formats = self._extract_m3u8_formats(
            '%s/api/channel/hls/%s.m3u8?%s'
            % (self._USHER_BASE, channel_id, compat_urllib_parse.urlencode(query).encode('utf-8')),
            channel_id, 'mp4')
        view_count = stream.get('viewers')
        timestamp = parse_iso8601(stream.get('created_at'))
        channel = stream['channel']
        title = self._live_title(channel.get('display_name') or channel.get('name'))
        description = channel.get('status')
        thumbnails = []
        for thumbnail_key, thumbnail_url in stream['preview'].items():
            m = re.search(r'(?P<width>\d+)x(?P<height>\d+)\.jpg$', thumbnail_key)
            if not m:
                continue
            thumbnails.append({
                'url': thumbnail_url,
                'width': int(m.group('width')),
                'height': int(m.group('height')),
            })
        return {
            'id': compat_str(stream['_id']),
            'display_id': channel_id,
            'title': title,
            'description': description,
            'thumbnails': thumbnails,
            'uploader': channel.get('display_name'),
            'uploader_id': channel.get('name'),
            'timestamp': timestamp,
            'view_count': view_count,
            'formats': formats,
            'is_live': True,
        }
--- a/youtube_dl/extractor/videomega.py
+++ b/youtube_dl/extractor/videomega.py
@ -1,12 +1,15 @@
 # coding: utf-8
 from __future__ import unicode_literals
 import re
 from .common import InfoExtractor
 from ..compat import (
    compat_urllib_parse,
    compat_urllib_request,
 )
 from ..utils import (
    ExtractorError,
    remove_start,
 )
@ -35,8 +38,11 @@ class VideoMegaIE(InfoExtractor):
        req.add_header('Referer', url)
        webpage = self._download_webpage(req, video_id)
-        escaped_data = self._search_regex(
+        try:
-            r'unescape\("([^"]+)"\)', webpage, 'escaped data')
+            escaped_data = re.findall(r'unescape\("([^"]+)"\)', webpage)[-1]
        except IndexError:
            raise ExtractorError('Unable to extract escaped data')
        playlist = compat_urllib_parse.unquote(escaped_data)
        thumbnail = self._search_regex(
--- a/youtube_dl/options.py
+++ b/youtube_dl/options.py
@ -264,7 +264,7 @@ def parseOpts(overrideArguments=None):
    authentication.add_option(
        '-p', '--password',
        dest='password', metavar='PASSWORD',
-        help='account password')
+        help='account password. If this option is left out, youtube-dl will ask interactively.')
    authentication.add_option(
        '-2', '--twofactor',
        dest='twofactor', metavar='TWOFACTOR',
@ -289,6 +289,17 @@ def parseOpts(overrideArguments=None):
            'extensions aac, m4a, mp3, mp4, ogg, wav, webm. '
            'You can also use the special names "best",'
            ' "bestvideo", "bestaudio", "worst". '
            ' You can filter the video results by putting a condition in'
            ' brackets, as in -f "best[height=720]"'
            ' (or -f "[filesize>10M]"). '
            ' This works for filesize, height, width, tbr, abr, and vbr'
            ' and the comparisons <, <=, >, >=, =, != .'
            ' Formats for which the value is not known are excluded unless you'
            ' put a question mark (?) after the operator.'
            ' You can combine format filters, so  '
            '-f "[height <=? 720][tbr>500]" '
            'selects up to 720p videos (or videos where the height is not '
            'known) with a bitrate of at least 500 KBit/s.'
            ' By default, youtube-dl will pick the best quality.'
            ' Use commas to download multiple audio formats, such as'
            ' -f  136/137/mp4/bestvideo,140/m4a/bestaudio.'
--- a/youtube_dl/postprocessor/ffmpeg.py
+++ b/youtube_dl/postprocessor/ffmpeg.py
@ -475,15 +475,21 @@ class FFmpegEmbedSubtitlePP(FFmpegPostProcessor):
        filename = information['filepath']
        input_files = [filename] + [subtitles_filename(filename, lang, self._subformat) for lang in sub_langs]
-        opts = ['-map', '0:0', '-map', '0:1', '-c:v', 'copy', '-c:a', 'copy']
+        opts = [
            '-map', '0',
            '-c', 'copy',
            # Don't copy the existing subtitles, we may be running the
            # postprocessor a second time
            '-map', '-0:s',
            '-c:s', 'mov_text',
        ]
        for (i, lang) in enumerate(sub_langs):
-            opts.extend(['-map', '%d:0' % (i + 1), '-c:s:%d' % i, 'mov_text'])
+            opts.extend(['-map', '%d:0' % (i + 1)])
            lang_code = self._conver_lang_code(lang)
            if lang_code is not None:
                opts.extend(['-metadata:s:s:%d' % i, 'language=%s' % lang_code])
        opts.extend(['-f', 'mp4'])
-        temp_filename = filename + '.temp'
+        temp_filename = prepend_extension(filename, 'temp')
        self._downloader.to_screen('[ffmpeg] Embedding subtitles in \'%s\'' % filename)
        self.run_ffmpeg_multiple_files(input_files, temp_filename, opts)
        os.remove(encodeFilename(filename))
@ -503,6 +509,10 @@ class FFmpegMetadataPP(FFmpegPostProcessor):
            metadata['artist'] = info['uploader']
        elif info.get('uploader_id') is not None:
            metadata['artist'] = info['uploader_id']
        if info.get('description') is not None:
            metadata['description'] = info['description']
        if info.get('webpage_url') is not None:
            metadata['comment'] = info['webpage_url']
        if not metadata:
            self._downloader.to_screen('[ffmpeg] There isn\'t any metadata to add')
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@ -612,7 +612,9 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
    def http_request(self, req):
        for h, v in std_headers.items():
-            if h not in req.headers:
+            # Capitalize is needed because of Python bug 2275: http://bugs.python.org/issue2275
            # The dict keys are capitalized because of this bug by urllib
            if h.capitalize() not in req.headers:
                req.add_header(h, v)
        if 'Youtubedl-no-compression' in req.headers:
            if 'Accept-encoding' in req.headers:
@ -1277,7 +1279,7 @@ def parse_duration(s):
    s = s.strip()
    m = re.match(
-        r'''(?ix)T?
+        r'''(?ix)(?:P?T)?
        (?:
            (?P<only_mins>[0-9.]+)\s*(?:mins?|minutes?)\s*|
            (?P<only_hours>[0-9.]+)\s*(?:hours?)|
@ -1612,6 +1614,14 @@ def urlhandle_detect_ext(url_handle):
    except AttributeError:  # Python < 3
        getheader = url_handle.info().getheader
    cd = getheader('Content-Disposition')
    if cd:
        m = re.match(r'attachment;\s*filename="(?P<filename>[^"]+)"', cd)
        if m:
            e = determine_ext(m.group('filename'), default_ext=None)
            if e:
                return e
    return getheader('Content-Type').split("/")[1]
@ -1623,3 +1633,23 @@ def age_restricted(content_limit, age_limit):
    if content_limit is None:
        return False  # Content available for everyone
    return age_limit < content_limit
 def is_html(first_bytes):
    """ Detect whether a file contains HTML by examining its first bytes. """
    BOMS = [
        (b'\xef\xbb\xbf', 'utf-8'),
        (b'\x00\x00\xfe\xff', 'utf-32-be'),
        (b'\xff\xfe\x00\x00', 'utf-32-le'),
        (b'\xff\xfe', 'utf-16-le'),
        (b'\xfe\xff', 'utf-16-be'),
    ]
    for bom, enc in BOMS:
        if first_bytes.startswith(bom):
            s = first_bytes[len(bom):].decode(enc, 'replace')
            break
    else:
        s = first_bytes.decode('utf-8', 'replace')
    return re.match(r'^\s*<', s)
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@ -1,3 +1,3 @@
 from __future__ import unicode_literals
-__version__ = '2015.01.15.1'
+__version__ = '2015.01.23.1'
`@ -1,3 +1,3 @@`
	`from __future__ import unicode_literals`	`from __future__ import unicode_literals`

	`__version__ = '2015.01.15.1'`	`__version__ = '2015.01.23.1'`