Merge branch 'master' of https://github.com/rg3/youtube-dl into multipart_videos

2014-04-15 12:41:03 -07:00 · 2014-04-15 12:41:03 -07:00 · 833600e3de
commit 833600e3de
parent d7b31b5b35 1db2666916
50 changed files with 1011 additions and 346 deletions
--- a/README.md
+++ b/README.md
@ -250,6 +250,7 @@ which means you can modify it, redistribute it or use it however you like.
                                     default
    --embed-subs                     embed subtitles in the video (only for mp4
                                     videos)
+    --embed-thumbnail                embed thumbnail in the audio as cover art
    --add-metadata                   write metadata to the video file
    --xattrs                         write metadata to the video file's xattrs
                                     (using dublin core and xdg standards)
--- a/test/test_all_urls.py
+++ b/test/test_all_urls.py
@ -49,6 +49,7 @@ class TestAllURLsMatching(unittest.TestCase):
        self.assertMatch('http://youtu.be/BaW_jenozKc', ['youtube'])
        self.assertMatch('http://www.youtube.com/v/BaW_jenozKc', ['youtube'])
        self.assertMatch('https://youtube.googleapis.com/v/BaW_jenozKc', ['youtube'])
+        self.assertMatch('http://www.cleanvideosearch.com/media/action/yt/watch?videoId=8v_4O44sfjM', ['youtube'])

    def test_youtube_channel_matching(self):
        assertChannel = lambda url: self.assertMatch(url, ['youtube:channel'])
@ -156,6 +157,24 @@ class TestAllURLsMatching(unittest.TestCase):
        self.assertMatch(
            'http://thedailyshow.cc.com/guests/michael-lewis/3efna8/exclusive---michael-lewis-extended-interview-pt--3',
            ['ComedyCentralShows'])
+        self.assertMatch(
+            'http://thedailyshow.cc.com/episodes/sy7yv0/april-8--2014---denis-leary',
+            ['ComedyCentralShows'])
+        self.assertMatch(
+            'http://thecolbertreport.cc.com/episodes/8ase07/april-8--2014---jane-goodall',
+            ['ComedyCentralShows'])
+        self.assertMatch(
+            'http://thedailyshow.cc.com/video-playlists/npde3s/the-daily-show-19088-highlights',
+            ['ComedyCentralShows'])
+        self.assertMatch(
+            'http://thedailyshow.cc.com/special-editions/2l8fdb/special-edition---a-look-back-at-food',
+            ['ComedyCentralShows'])
+
+    def test_yahoo_https(self):
+        # https://github.com/rg3/youtube-dl/issues/2701
+        self.assertMatch(
+            'https://screen.yahoo.com/smartwatches-latest-wearable-gadgets-163745379-cbs.html',
+            ['Yahoo'])

 if __name__ == '__main__':
    unittest.main()
--- a/test/test_playlists.py
+++ b/test/test_playlists.py
@ -43,6 +43,7 @@ from youtube_dl.extractor import (
    XTubeUserIE,
    InstagramUserIE,
    CSpanIE,
+    AolIE,
 )


@ -324,10 +325,19 @@ class TestPlaylists(unittest.TestCase):
        self.assertEqual(result['id'], '342759')
        self.assertEqual(
            result['title'], 'General Motors Ignition Switch Recall')
-        self.assertEqual(len(result['entries']), 9)
        whole_duration = sum(e['duration'] for e in result['entries'])
        self.assertEqual(whole_duration, 14855)

+    def test_aol_playlist(self):
+        dl = FakeYDL()
+        ie = AolIE(dl)
+        result = ie.extract(
+            'http://on.aol.com/playlist/brace-yourself---todays-weirdest-news-152147?icid=OnHomepageC4_Omg_Img#_videoid=518184316')
+        self.assertIsPlaylist(result)
+        self.assertEqual(result['id'], '152147')
+        self.assertEqual(
+            result['title'], 'Brace Yourself - Today\'s Weirdest News')
+        self.assertTrue(len(result['entries']) >= 10)

 if __name__ == '__main__':
    unittest.main()
--- a/test/test_utils.py
+++ b/test/test_utils.py
@ -38,6 +38,7 @@ from youtube_dl.utils import (
    xpath_with_ns,
    parse_iso8601,
    strip_jsonp,
+    uppercase_escape,
 )

 if sys.version_info < (3, 0):
@ -279,6 +280,9 @@ class TestUtil(unittest.TestCase):
        d = json.loads(stripped)
        self.assertEqual(d, [{"id": "532cb", "x": 3}])

+    def test_uppercase_escpae(self):
+        self.assertEqual(uppercase_escape(u'aä'), u'aä')
+        self.assertEqual(uppercase_escape(u'\\U0001d550'), u'𝕐')

 if __name__ == '__main__':
    unittest.main()
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@ -287,6 +287,9 @@ class YoutubeDL(object):
        """Print message to stdout if not in quiet mode."""
        return self.to_stdout(message, skip_eol, check_quiet=True)

+    def _write_string(self, s, out=None):
+        write_string(s, out=out, encoding=self.params.get('encoding'))
+
    def to_stdout(self, message, skip_eol=False, check_quiet=False):
        """Print message to stdout if not in quiet mode."""
        if self.params.get('logger'):
@ -296,7 +299,7 @@ class YoutubeDL(object):
            terminator = ['\n', ''][skip_eol]
            output = message + terminator

-            write_string(output, self._screen_file)
+            self._write_string(output, self._screen_file)

    def to_stderr(self, message):
        """Print message to stderr."""
@ -306,7 +309,7 @@ class YoutubeDL(object):
        else:
            message = self._bidi_workaround(message)
            output = message + '\n'
-            write_string(output, self._err_file)
+            self._write_string(output, self._err_file)

    def to_console_title(self, message):
        if not self.params.get('consoletitle', False):
@ -316,21 +319,21 @@ class YoutubeDL(object):
            # already of type unicode()
            ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
        elif 'TERM' in os.environ:
-            write_string('\033]0;%s\007' % message, self._screen_file)
+            self._write_string('\033]0;%s\007' % message, self._screen_file)

    def save_console_title(self):
        if not self.params.get('consoletitle', False):
            return
        if 'TERM' in os.environ:
            # Save the title on stack
-            write_string('\033[22;0t', self._screen_file)
+            self._write_string('\033[22;0t', self._screen_file)

    def restore_console_title(self):
        if not self.params.get('consoletitle', False):
            return
        if 'TERM' in os.environ:
            # Restore the title from stack
-            write_string('\033[23;0t', self._screen_file)
+            self._write_string('\033[23;0t', self._screen_file)

    def __enter__(self):
        self.save_console_title()
@ -939,7 +942,7 @@ class YoutubeDL(object):
                        with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
                                subfile.write(sub)
                except (OSError, IOError):
-                    self.report_error('Cannot write subtitles file ' + descfn)
+                    self.report_error('Cannot write subtitles file ' + sub_filename)
                    return

        if self.params.get('writeinfojson', False):
@ -1235,9 +1238,16 @@ class YoutubeDL(object):
        if not self.params.get('verbose'):
            return

-        write_string('[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' %
-                 (locale.getpreferredencoding(), sys.getfilesystemencoding(), sys.stdout.encoding, self.get_encoding()))
-        write_string('[debug] youtube-dl version ' + __version__ + '\n')
+        write_string(
+            '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
+                locale.getpreferredencoding(),
+                sys.getfilesystemencoding(),
+                sys.stdout.encoding,
+                self.get_encoding()),
+            encoding=None
+        )
+
+        self._write_string('[debug] youtube-dl version ' + __version__ + '\n')
        try:
            sp = subprocess.Popen(
                ['git', 'rev-parse', '--short', 'HEAD'],
@ -1246,20 +1256,20 @@ class YoutubeDL(object):
            out, err = sp.communicate()
            out = out.decode().strip()
            if re.match('[0-9a-f]+', out):
-                write_string('[debug] Git HEAD: ' + out + '\n')
+                self._write_string('[debug] Git HEAD: ' + out + '\n')
        except:
            try:
                sys.exc_clear()
            except:
                pass
-        write_string('[debug] Python version %s - %s' %
+        self._write_string('[debug] Python version %s - %s' %
                     (platform.python_version(), platform_name()) + '\n')

        proxy_map = {}
        for handler in self._opener.handlers:
            if hasattr(handler, 'proxies'):
                proxy_map.update(handler.proxies)
-        write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
+        self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')

    def _setup_opener(self):
        timeout_val = self.params.get('socket_timeout')
--- a/youtube_dl/init.py
+++ b/youtube_dl/init.py
@ -52,6 +52,7 @@ __authors__  = (
    'Juan C. Olivares',
    'Mattias Harrysson',
    'phaer',
+    'Sainyam Kapoor',
 )

 __license__ = 'Public Domain'
@ -91,6 +92,8 @@ from .extractor import gen_extractors
 from .version import __version__
 from .YoutubeDL import YoutubeDL
 from .postprocessor import (
+    AtomicParsleyPP,
+    FFmpegAudioFixPP,
    FFmpegMetadataPP,
    FFmpegVideoConvertor,
    FFmpegExtractAudioPP,
@ -243,7 +246,7 @@ def parseOpts(overrideArguments=None):
        help='Use the specified HTTP/HTTPS proxy. Pass in an empty string (--proxy "") for direct connection')
    general.add_option('--no-check-certificate', action='store_true', dest='no_check_certificate', default=False, help='Suppress HTTPS certificate validation.')
    general.add_option(
-        '--prefer-insecure', action='store_true', dest='prefer_insecure',
+        '--prefer-insecure', '--prefer-unsecure', action='store_true', dest='prefer_insecure',
        help='Use an unencrypted connection to retrieve information about the video. (Currently supported only for YouTube)')
    general.add_option(
        '--cache-dir', dest='cachedir', default=get_cachedir(), metavar='DIR',
@ -505,6 +508,8 @@ def parseOpts(overrideArguments=None):
            help='do not overwrite post-processed files; the post-processed files are overwritten by default')
    postproc.add_option('--embed-subs', action='store_true', dest='embedsubtitles', default=False,
            help='embed subtitles in the video (only for mp4 videos)')
+    postproc.add_option('--embed-thumbnail', action='store_true', dest='embedthumbnail', default=False,
+            help='embed thumbnail in the audio as cover art')
    postproc.add_option('--add-metadata', action='store_true', dest='addmetadata', default=False,
            help='write metadata to the video file')
    postproc.add_option('--xattrs', action='store_true', dest='xattrs', default=False,
@ -813,6 +818,10 @@ def _real_main(argv=None):
            ydl.add_post_processor(FFmpegEmbedSubtitlePP(subtitlesformat=opts.subtitlesformat))
        if opts.xattrs:
            ydl.add_post_processor(XAttrMetadataPP())
+        if opts.embedthumbnail:
+            if not opts.addmetadata:
+                ydl.add_post_processor(FFmpegAudioFixPP())
+            ydl.add_post_processor(AtomicParsleyPP())

        # Update version
        if opts.update_self:
--- a/youtube_dl/extractor/init.py
+++ b/youtube_dl/extractor/init.py
@ -32,6 +32,7 @@ from .canal13cl import Canal13clIE
 from .canalplus import CanalplusIE
 from .canalc2 import Canalc2IE
 from .cbs import CBSIE
+from .cbsnews import CBSNewsIE
 from .ceskatelevize import CeskaTelevizeIE
 from .channel9 import Channel9IE
 from .chilloutzone import ChilloutzoneIE
@ -62,6 +63,7 @@ from .dotsub import DotsubIE
 from .dreisat import DreiSatIE
 from .defense import DefenseGouvFrIE
 from .discovery import DiscoveryIE
+from .divxstage import DivxStageIE
 from .dropbox import DropboxIE
 from .ebaumsworld import EbaumsWorldIE
 from .ehow import EHowIE
@ -156,6 +158,7 @@ from .mofosex import MofosexIE
 from .mooshare import MooshareIE
 from .morningstar import MorningstarIE
 from .motorsport import MotorsportIE
+from .movshare import MovShareIE
 from .mtv import (
    MTVIE,
    MTVIggyIE,
@ -205,6 +208,7 @@ from .rottentomatoes import RottenTomatoesIE
 from .roxwel import RoxwelIE
 from .rtlnow import RTLnowIE
 from .rts import RTSIE
+from .rtve import RTVEALaCartaIE
 from .rutube import (
    RutubeIE,
    RutubeChannelIE,
@ -276,6 +280,7 @@ from .videodetective import VideoDetectiveIE
 from .videolecturesnet import VideoLecturesNetIE
 from .videofyme import VideofyMeIE
 from .videopremium import VideoPremiumIE
+from .videoweed import VideoWeedIE
 from .vimeo import (
    VimeoIE,
    VimeoChannelIE,
--- a/youtube_dl/extractor/aol.py
+++ b/youtube_dl/extractor/aol.py
@ -8,7 +8,18 @@ from .fivemin import FiveMinIE

 class AolIE(InfoExtractor):
    IE_NAME = 'on.aol.com'
-    _VALID_URL = r'http://on\.aol\.com/video/.*-(?P<id>\d+)($|\?)'
+    _VALID_URL = r'''(?x)
+        (?:
+            aol-video:|
+            http://on\.aol\.com/
+            (?:
+                video/.*-|
+                playlist/(?P<playlist_display_id>[^/?#]+?)-(?P<playlist_id>[0-9]+)[?#].*_videoid=
+            )
+        )
+        (?P<id>[0-9]+)
+        (?:$|\?)
+    '''

    _TEST = {
        'url': 'http://on.aol.com/video/u-s--official-warns-of-largest-ever-irs-phone-scam-518167793?icid=OnHomepageC2Wide_MustSee_Img',
@ -24,5 +35,31 @@ class AolIE(InfoExtractor):
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group('id')
-        self.to_screen('Downloading 5min.com video %s' % video_id)
+
+        playlist_id = mobj.group('playlist_id')
+        if playlist_id and not self._downloader.params.get('noplaylist'):
+            self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))
+
+            webpage = self._download_webpage(url, playlist_id)
+            title = self._html_search_regex(
+                r'<h1 class="video-title[^"]*">(.+?)</h1>', webpage, 'title')
+            playlist_html = self._search_regex(
+                r"(?s)<ul\s+class='video-related[^']*'>(.*?)</ul>", webpage,
+                'playlist HTML')
+            entries = [{
+                '_type': 'url',
+                'url': 'aol-video:%s' % m.group('id'),
+                'ie_key': 'Aol',
+            } for m in re.finditer(
+                r"<a\s+href='.*videoid=(?P<id>[0-9]+)'\s+class='video-thumb'>",
+                playlist_html)]
+
+            return {
+                '_type': 'playlist',
+                'id': playlist_id,
+                'display_id': mobj.group('playlist_display_id'),
+                'title': title,
+                'entries': entries,
+            }
+
        return FiveMinIE._build_result(video_id)
--- a/youtube_dl/extractor/br.py
+++ b/youtube_dl/extractor/br.py
@ -4,39 +4,72 @@ from __future__ import unicode_literals
 import re

 from .common import InfoExtractor
-from ..utils import ExtractorError
+from ..utils import (
+    ExtractorError,
+    int_or_none,
+)


 class BRIE(InfoExtractor):
-    IE_DESC = "Bayerischer Rundfunk Mediathek"
-    _VALID_URL = r"^https?://(?:www\.)?br\.de/mediathek/video/(?:sendungen/)?(?:[a-z0-9\-/]+/)?(?P<id>[a-z0-9\-]+)\.html$"
-    _BASE_URL = "http://www.br.de"
+    IE_DESC = 'Bayerischer Rundfunk Mediathek'
+    _VALID_URL = r'https?://(?:www\.)?br\.de/(?:[a-z0-9\-]+/)+(?P<id>[a-z0-9\-]+)\.html'
+    _BASE_URL = 'http://www.br.de'

    _TESTS = [
        {
-            "url": "http://www.br.de/mediathek/video/anselm-gruen-114.html",
-            "md5": "c4f83cf0f023ba5875aba0bf46860df2",
-            "info_dict": {
-                "id": "2c8d81c5-6fb7-4a74-88d4-e768e5856532",
-                "ext": "mp4",
-                "title": "Feiern und Verzichten",
-                "description": "Anselm Grün: Feiern und Verzichten",
-                "uploader": "BR/Birgit Baier",
-                "upload_date": "20140301"
+            'url': 'http://www.br.de/mediathek/video/anselm-gruen-114.html',
+            'md5': 'c4f83cf0f023ba5875aba0bf46860df2',
+            'info_dict': {
+                'id': '2c8d81c5-6fb7-4a74-88d4-e768e5856532',
+                'ext': 'mp4',
+                'title': 'Feiern und Verzichten',
+                'description': 'Anselm Grün: Feiern und Verzichten',
+                'uploader': 'BR/Birgit Baier',
+                'upload_date': '20140301',
            }
        },
        {
-            "url": "http://www.br.de/mediathek/video/sendungen/unter-unserem-himmel/unter-unserem-himmel-alpen-ueber-den-pass-100.html",
-            "md5": "ab451b09d861dbed7d7cc9ab0be19ebe",
-            "info_dict": {
-                "id": "2c060e69-3a27-4e13-b0f0-668fac17d812",
-                "ext": "mp4",
-                "title": "Über den Pass",
-                "description": "Die Eroberung der Alpen: Über den Pass",
-                "uploader": None,
-                "upload_date": None
+            'url': 'http://www.br.de/mediathek/video/sendungen/unter-unserem-himmel/unter-unserem-himmel-alpen-ueber-den-pass-100.html',
+            'md5': 'ab451b09d861dbed7d7cc9ab0be19ebe',
+            'info_dict': {
+                'id': '2c060e69-3a27-4e13-b0f0-668fac17d812',
+                'ext': 'mp4',
+                'title': 'Über den Pass',
+                'description': 'Die Eroberung der Alpen: Über den Pass',
            }
-        }
+        },
+        {
+            'url': 'http://www.br.de/nachrichten/schaeuble-haushaltsentwurf-bundestag-100.html',
+            'md5': '3db0df1a9a9cd9fa0c70e6ea8aa8e820',
+            'info_dict': {
+                'id': 'c6aae3de-2cf9-43f2-957f-f17fef9afaab',
+                'ext': 'aac',
+                'title': '"Keine neuen Schulden im nächsten Jahr"',
+                'description': 'Haushaltsentwurf: "Keine neuen Schulden im nächsten Jahr"',
+            }
+        },
+        {
+            'url': 'http://www.br.de/radio/bayern1/service/team/videos/team-video-erdelt100.html',
+            'md5': 'dbab0aef2e047060ea7a21fc1ce1078a',
+            'info_dict': {
+                'id': '6ba73750-d405-45d3-861d-1ce8c524e059',
+                'ext': 'mp4',
+                'title': 'Umweltbewusster Häuslebauer',
+                'description': 'Uwe Erdelt: Umweltbewusster Häuslebauer',
+            }
+        },
+        {
+            'url': 'http://www.br.de/fernsehen/br-alpha/sendungen/kant-fuer-anfaenger/kritik-der-reinen-vernunft/kant-kritik-01-metaphysik100.html',
+            'md5': '23bca295f1650d698f94fc570977dae3',
+            'info_dict': {
+                'id': 'd982c9ce-8648-4753-b358-98abb8aec43d',
+                'ext': 'mp4',
+                'title': 'Folge 1 - Metaphysik',
+                'description': 'Kant für Anfänger: Folge 1 - Metaphysik',
+                'uploader': 'Eva Maria Steimle',
+                'upload_date': '20140117',
+            }
+        },
    ]

    def _real_extract(self, url):
@ -44,56 +77,63 @@ class BRIE(InfoExtractor):
        display_id = mobj.group('id')
        page = self._download_webpage(url, display_id)
        xml_url = self._search_regex(
-            r"return BRavFramework\.register\(BRavFramework\('avPlayer_(?:[a-f0-9-]{36})'\)\.setup\({dataURL:'(/mediathek/video/[a-z0-9/~_.-]+)'}\)\);", page, "XMLURL")
+            r"return BRavFramework\.register\(BRavFramework\('avPlayer_(?:[a-f0-9-]{36})'\)\.setup\({dataURL:'(/(?:[a-z0-9\-]+/)+[a-z0-9/~_.-]+)'}\)\);", page, 'XMLURL')
        xml = self._download_xml(self._BASE_URL + xml_url, None)

-        videos = []
-        for xml_video in xml.findall("video"):
-            video = {
-                "id": xml_video.get("externalId"),
-                "title": xml_video.find("title").text,
-                "formats": self._extract_formats(xml_video.find("assets")),
-                "thumbnails": self._extract_thumbnails(xml_video.find("teaserImage/variants")),
-                "description": " ".join(xml_video.find("shareTitle").text.splitlines()),
-                "webpage_url": xml_video.find("permalink").text
-            }
-            if xml_video.find("author").text:
-                video["uploader"] = xml_video.find("author").text
-            if xml_video.find("broadcastDate").text:
-                video["upload_date"] =  "".join(reversed(xml_video.find("broadcastDate").text.split(".")))
-            videos.append(video)
+        medias = []

-        if len(videos) > 1:
+        for xml_media in xml.findall('video') + xml.findall('audio'):
+            media = {
+                'id': xml_media.get('externalId'),
+                'title': xml_media.find('title').text,
+                'formats': self._extract_formats(xml_media.find('assets')),
+                'thumbnails': self._extract_thumbnails(xml_media.find('teaserImage/variants')),
+                'description': ' '.join(xml_media.find('shareTitle').text.splitlines()),
+                'webpage_url': xml_media.find('permalink').text
+            }
+            if xml_media.find('author').text:
+                media['uploader'] = xml_media.find('author').text
+            if xml_media.find('broadcastDate').text:
+                media['upload_date'] = ''.join(reversed(xml_media.find('broadcastDate').text.split('.')))
+            medias.append(media)
+
+        if len(medias) > 1:
            self._downloader.report_warning(
-                'found multiple videos; please '
+                'found multiple medias; please '
                'report this with the video URL to http://yt-dl.org/bug')
-        if not videos:
-            raise ExtractorError('No video entries found')
-        return videos[0]
+        if not medias:
+            raise ExtractorError('No media entries found')
+        return medias[0]

    def _extract_formats(self, assets):
+
+        def text_or_none(asset, tag):
+            elem = asset.find(tag)
+            return None if elem is None else elem.text
+
        formats = [{
-            "url": asset.find("downloadUrl").text,
-            "ext": asset.find("mediaType").text,
-            "format_id": asset.get("type"),
-            "width": int(asset.find("frameWidth").text),
-            "height": int(asset.find("frameHeight").text),
-            "tbr": int(asset.find("bitrateVideo").text),
-            "abr": int(asset.find("bitrateAudio").text),
-            "vcodec": asset.find("codecVideo").text,
-            "container": asset.find("mediaType").text,
-            "filesize": int(asset.find("size").text),
-        } for asset in assets.findall("asset")
-            if asset.find("downloadUrl") is not None]
+            'url': text_or_none(asset, 'downloadUrl'),
+            'ext': text_or_none(asset, 'mediaType'),
+            'format_id': asset.get('type'),
+            'width': int_or_none(text_or_none(asset, 'frameWidth')),
+            'height': int_or_none(text_or_none(asset, 'frameHeight')),
+            'tbr': int_or_none(text_or_none(asset, 'bitrateVideo')),
+            'abr': int_or_none(text_or_none(asset, 'bitrateAudio')),
+            'vcodec': text_or_none(asset, 'codecVideo'),
+            'acodec': text_or_none(asset, 'codecAudio'),
+            'container': text_or_none(asset, 'mediaType'),
+            'filesize': int_or_none(text_or_none(asset, 'size')),
+        } for asset in assets.findall('asset')
+            if asset.find('downloadUrl') is not None]

        self._sort_formats(formats)
        return formats

    def _extract_thumbnails(self, variants):
        thumbnails = [{
-            "url": self._BASE_URL + variant.find("url").text,
-            "width": int(variant.find("width").text),
-            "height": int(variant.find("height").text),
-        } for variant in variants.findall("variant")]
-        thumbnails.sort(key=lambda x: x["width"] * x["height"], reverse=True)
+            'url': self._BASE_URL + variant.find('url').text,
+            'width': int_or_none(variant.find('width').text),
+            'height': int_or_none(variant.find('height').text),
+        } for variant in variants.findall('variant')]
+        thumbnails.sort(key=lambda x: x['width'] * x['height'], reverse=True)
        return thumbnails
--- a/youtube_dl/extractor/breakcom.py
+++ b/youtube_dl/extractor/breakcom.py
@ -27,9 +27,10 @@ class BreakIE(InfoExtractor):
            webpage, 'info json', flags=re.DOTALL)
        info = json.loads(info_json)
        video_url = info['videoUri']
-        m_youtube = re.search(r'(https?://www\.youtube\.com/watch\?v=.*)', video_url)
-        if m_youtube is not None:
-            return self.url_result(m_youtube.group(1), 'Youtube')
+        youtube_id = info.get('youtubeId')
+        if youtube_id:
+            return self.url_result(youtube_id, 'Youtube')
+
        final_url = video_url + '?' + info['AuthToken']
        return {
            'id': video_id,
--- a/youtube_dl/extractor/brightcove.py
+++ b/youtube_dl/extractor/brightcove.py
@ -140,7 +140,11 @@ class BrightcoveIE(InfoExtractor):

        url_m = re.search(r'<meta\s+property="og:video"\s+content="(http://c.brightcove.com/[^"]+)"', webpage)
        if url_m:
-            return [unescapeHTML(url_m.group(1))]
+            url = unescapeHTML(url_m.group(1))
+            # Some sites don't add it, we can't download with this url, for example:
+            # http://www.ktvu.com/videos/news/raw-video-caltrain-releases-video-of-man-almost/vCTZdY/
+            if 'playerKey' in url:
+                return [url]

        matches = re.findall(
            r'''(?sx)<object
--- a/youtube_dl/extractor/byutv.py
+++ b/youtube_dl/extractor/byutv.py
@ -4,9 +4,7 @@ import json
 import re

 from .common import InfoExtractor
-from ..utils import (
-    ExtractorError,
-)
+from ..utils import ExtractorError


 class BYUtvIE(InfoExtractor):
@ -16,7 +14,7 @@ class BYUtvIE(InfoExtractor):
        'info_dict': {
            'id': 'granite-flats-talking',
            'ext': 'mp4',
-            'description': 'md5:1a7ae3e153359b7cc355ef3963441e5f',
+            'description': 'md5:4e9a7ce60f209a33eca0ac65b4918e1c',
            'title': 'Talking',
            'thumbnail': 're:^https?://.*promo.*'
        },
--- a/youtube_dl/extractor/cbsnews.py
+++ b/youtube_dl/extractor/cbsnews.py
@ -0,0 +1,87 @@
+# encoding: utf-8
+from __future__ import unicode_literals
+
+import re
+import json
+
+from .common import InfoExtractor
+
+
+class CBSNewsIE(InfoExtractor):
+    IE_DESC = 'CBS News'
+    _VALID_URL = r'http://(?:www\.)?cbsnews\.com/(?:[^/]+/)+(?P<id>[\da-z_-]+)'
+
+    _TESTS = [
+        {
+            'url': 'http://www.cbsnews.com/news/tesla-and-spacex-elon-musks-industrial-empire/',
+            'info_dict': {
+                'id': 'tesla-and-spacex-elon-musks-industrial-empire',
+                'ext': 'flv',
+                'title': 'Tesla and SpaceX: Elon Musk\'s industrial empire',
+                'thumbnail': 'http://beta.img.cbsnews.com/i/2014/03/30/60147937-2f53-4565-ad64-1bdd6eb64679/60-0330-pelley-640x360.jpg',
+                'duration': 791,
+            },
+            'params': {
+                # rtmp download
+                'skip_download': True,
+            },
+        },
+        {
+            'url': 'http://www.cbsnews.com/videos/fort-hood-shooting-army-downplays-mental-illness-as-cause-of-attack/',
+            'info_dict': {
+                'id': 'fort-hood-shooting-army-downplays-mental-illness-as-cause-of-attack',
+                'ext': 'flv',
+                'title': 'Fort Hood shooting: Army downplays mental illness as cause of attack',
+                'thumbnail': 'http://cbsnews2.cbsistatic.com/hub/i/r/2014/04/04/0c9fbc66-576b-41ca-8069-02d122060dd2/thumbnail/140x90/6dad7a502f88875ceac38202984b6d58/en-0404-werner-replace-640x360.jpg',
+                'duration': 205,
+            },
+            'params': {
+                # rtmp download
+                'skip_download': True,
+            },
+        },
+    ]
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+
+        webpage = self._download_webpage(url, video_id)
+
+        video_info = json.loads(self._html_search_regex(
+            r'(?:<ul class="media-list items" id="media-related-items"><li data-video-info|<div id="cbsNewsVideoPlayer" data-video-player-options)=\'({.+?})\'',
+            webpage, 'video JSON info'))
+
+        item = video_info['item'] if 'item' in video_info else video_info
+        title = item.get('articleTitle') or item.get('hed')
+        duration = item.get('duration')
+        thumbnail = item.get('mediaImage') or item.get('thumbnail')
+
+        formats = []
+        for format_id in ['RtmpMobileLow', 'RtmpMobileHigh', 'Hls', 'RtmpDesktop']:
+            uri = item.get('media' + format_id + 'URI')
+            if not uri:
+                continue
+            fmt = {
+                'url': uri,
+                'format_id': format_id,
+            }
+            if uri.startswith('rtmp'):
+                fmt.update({
+                    'app': 'ondemand?auth=cbs',
+                    'play_path': 'mp4:' + uri.split('<break>')[-1],
+                    'player_url': 'http://www.cbsnews.com/[[IMPORT]]/vidtech.cbsinteractive.com/player/3_3_0/CBSI_PLAYER_HD.swf',
+                    'page_url': 'http://www.cbsnews.com',
+                    'ext': 'flv',
+                })
+            elif uri.endswith('.m3u8'):
+                fmt['ext'] = 'mp4'
+            formats.append(fmt)
+
+        return {
+            'id': video_id,
+            'title': title,
+            'thumbnail': thumbnail,
+            'duration': duration,
+            'formats': formats,
+        }
--- a/youtube_dl/extractor/comedycentral.py
+++ b/youtube_dl/extractor/comedycentral.py
@ -21,7 +21,7 @@ class ComedyCentralIE(MTVServicesInfoExtractor):

    _TEST = {
        'url': 'http://www.comedycentral.com/video-clips/kllhuv/stand-up-greg-fitzsimmons--uncensored---too-good-of-a-mother',
-        'md5': '4167875aae411f903b751a21f357f1ee',
+        'md5': 'c4f48e9eda1b16dd10add0744344b6d8',
        'info_dict': {
            'id': 'cef0cbb3-e776-4bc9-b62e-8016deccb354',
            'ext': 'mp4',
@ -41,9 +41,9 @@ class ComedyCentralShowsIE(InfoExtractor):
    _VALID_URL = r'''(?x)^(:(?P<shortname>tds|thedailyshow|cr|colbert|colbertnation|colbertreport)
                      |https?://(:www\.)?
                          (?P<showname>thedailyshow|thecolbertreport)\.(?:cc\.)?com/
-                         (full-episodes/(?:[0-9a-z]{6}/)?(?P<episode>.*)|
+                         ((?:full-)?episodes/(?:[0-9a-z]{6}/)?(?P<episode>.*)|
                          (?P<clip>
-                              (?:(?:guests/[^/]+|videos)/[^/]+/(?P<videotitle>[^/?#]+))
+                              (?:(?:guests/[^/]+|videos|video-playlists|special-editions)/[^/]+/(?P<videotitle>[^/?#]+))
                              |(the-colbert-report-(videos|collections)/(?P<clipID>[0-9]+)/[^/]*/(?P<cntitle>.*?))
                              |(watch/(?P<date>[^/]*)/(?P<tdstitle>.*))
                          )|
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@ -255,7 +255,10 @@ class InfoExtractor(object):
            with open(filename, 'wb') as outf:
                outf.write(webpage_bytes)

-        content = webpage_bytes.decode(encoding, 'replace')
+        try:
+            content = webpage_bytes.decode(encoding, 'replace')
+        except LookupError:
+            content = webpage_bytes.decode('utf-8', 'replace')

        if (u'<title>Access to this site is blocked</title>' in content and
                u'Websense' in content[:512]):
--- a/youtube_dl/extractor/dailymotion.py
+++ b/youtube_dl/extractor/dailymotion.py
@ -8,7 +8,6 @@ from .subtitles import SubtitlesInfoExtractor
 from ..utils import (
    compat_urllib_request,
    compat_str,
-    get_element_by_attribute,
    get_element_by_id,
    orderedSet,
    str_to_int,
@ -202,11 +201,12 @@ class DailymotionPlaylistIE(DailymotionBaseInfoExtractor):
        playlist_id = mobj.group('id')
        webpage = self._download_webpage(url, playlist_id)

-        return {'_type': 'playlist',
-                'id': playlist_id,
-                'title': get_element_by_id(u'playlist_name', webpage),
-                'entries': self._extract_entries(playlist_id),
-                }
+        return {
+            '_type': 'playlist',
+            'id': playlist_id,
+            'title': self._og_search_title(webpage),
+            'entries': self._extract_entries(playlist_id),
+        }


 class DailymotionUserIE(DailymotionPlaylistIE):
--- a/youtube_dl/extractor/divxstage.py
+++ b/youtube_dl/extractor/divxstage.py
@ -0,0 +1,27 @@
+from __future__ import unicode_literals
+
+from .novamov import NovaMovIE
+
+
+class DivxStageIE(NovaMovIE):
+    IE_NAME = 'divxstage'
+    IE_DESC = 'DivxStage'
+
+    _VALID_URL = NovaMovIE._VALID_URL_TEMPLATE % {'host': 'divxstage\.(?:eu|net|ch|co|at|ag)'}
+
+    _HOST = 'www.divxstage.eu'
+
+    _FILE_DELETED_REGEX = r'>This file no longer exists on our servers.<'
+    _TITLE_REGEX = r'<div class="video_det">\s*<strong>([^<]+)</strong>'
+    _DESCRIPTION_REGEX = r'<div class="video_det">\s*<strong>[^<]+</strong>\s*<p>([^<]+)</p>'
+
+    _TEST = {
+        'url': 'http://www.divxstage.eu/video/57f238e2e5e01',
+        'md5': '63969f6eb26533a1968c4d325be63e72',
+        'info_dict': {
+            'id': '57f238e2e5e01',
+            'ext': 'flv',
+            'title': 'youtubedl test video',
+            'description': 'This is a test video for youtubedl.',
+        }
+    }
--- a/youtube_dl/extractor/firstpost.py
+++ b/youtube_dl/extractor/firstpost.py
@ -6,7 +6,6 @@ from .common import InfoExtractor


 class FirstpostIE(InfoExtractor):
-    IE_NAME = 'Firstpost.com'
    _VALID_URL = r'http://(?:www\.)?firstpost\.com/[^/]+/.*-(?P<id>[0-9]+)\.html'

    _TEST = {
@ -16,7 +15,6 @@ class FirstpostIE(InfoExtractor):
            'id': '1025403',
            'ext': 'mp4',
            'title': 'India to launch indigenous aircraft carrier INS Vikrant today',
-            'description': 'Its flight deck is over twice the size of a football field, its power unit can light up the entire Kochi city and the cabling is enough to cover the distance between here to Delhi.',
        }
    }

@ -24,15 +22,26 @@ class FirstpostIE(InfoExtractor):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group('id')

-        webpage = self._download_webpage(url, video_id)
-        video_url = self._html_search_regex(
-            r'<div.*?name="div_video".*?flashvars="([^"]+)">',
-            webpage, 'video URL')
+        data = self._download_xml(
+            'http://www.firstpost.com/getvideoxml-%s.xml' % video_id, video_id,
+            'Downloading video XML')
+
+        item = data.find('./playlist/item')
+        thumbnail = item.find('./image').text
+        title = item.find('./title').text
+
+        formats = [
+            {
+                'url': details.find('./file').text,
+                'format_id': details.find('./label').text.strip(),
+                'width': int(details.find('./width').text.strip()),
+                'height': int(details.find('./height').text.strip()),
+            } for details in item.findall('./source/file_details') if details.find('./file').text
+        ]

        return {
            'id': video_id,
-            'url': video_url,
-            'title': self._og_search_title(webpage),
-            'description': self._og_search_description(webpage),
-            'thumbnail': self._og_search_thumbnail(webpage),
+            'title': title,
+            'thumbnail': thumbnail,
+            'formats': formats,
        }
--- a/youtube_dl/extractor/fivemin.py
+++ b/youtube_dl/extractor/fivemin.py
@ -5,6 +5,7 @@ import re
 from .common import InfoExtractor
 from ..utils import (
    compat_str,
+    compat_urllib_parse,
 )


@ -16,16 +17,28 @@ class FiveMinIE(InfoExtractor):
        (?P<id>\d+)
        '''

-    _TEST = {
-        # From http://www.engadget.com/2013/11/15/ipad-mini-retina-display-review/
-        'url': 'http://pshared.5min.com/Scripts/PlayerSeed.js?sid=281&width=560&height=345&playList=518013791',
-        'md5': '4f7b0b79bf1a470e5004f7112385941d',
-        'info_dict': {
-            'id': '518013791',
-            'ext': 'mp4',
-            'title': 'iPad Mini with Retina Display Review',
+    _TESTS = [
+        {
+            # From http://www.engadget.com/2013/11/15/ipad-mini-retina-display-review/
+            'url': 'http://pshared.5min.com/Scripts/PlayerSeed.js?sid=281&width=560&height=345&playList=518013791',
+            'md5': '4f7b0b79bf1a470e5004f7112385941d',
+            'info_dict': {
+                'id': '518013791',
+                'ext': 'mp4',
+                'title': 'iPad Mini with Retina Display Review',
+            },
        },
-    }
+        {
+            # From http://on.aol.com/video/how-to-make-a-next-level-fruit-salad-518086247
+            'url': '5min:518086247',
+            'md5': 'e539a9dd682c288ef5a498898009f69e',
+            'info_dict': {
+                'id': '518086247',
+                'ext': 'mp4',
+                'title': 'How to Make a Next-Level Fruit Salad',
+            },
+        },
+    ]

    @classmethod
    def _build_result(cls, video_id):
@ -34,9 +47,19 @@ class FiveMinIE(InfoExtractor):
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group('id')
+        embed_url = 'https://embed.5min.com/playerseed/?playList=%s' % video_id
+        embed_page = self._download_webpage(embed_url, video_id,
+            'Downloading embed page')
+        sid = self._search_regex(r'sid=(\d+)', embed_page, 'sid')
+        query = compat_urllib_parse.urlencode({
+            'func': 'GetResults',
+            'playlist': video_id,
+            'sid': sid,
+            'isPlayerSeed': 'true',
+            'url': embed_url,
+        })
        info = self._download_json(
-            'https://syn.5min.com/handlers/SenseHandler.ashx?func=GetResults&'
-            'playlist=%s&url=https' % video_id,
+            'https://syn.5min.com/handlers/SenseHandler.ashx?' + query,
            video_id)['binding'][0]

        second_id = compat_str(int(video_id[:-2]) + 1)
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@ -35,9 +35,10 @@ class GenericIE(InfoExtractor):
    _TESTS = [
        {
            'url': 'http://www.hodiho.fr/2013/02/regis-plante-sa-jeep.html',
-            'file': '13601338388002.mp4',
-            'md5': '6e15c93721d7ec9e9ca3fdbf07982cfd',
+            'md5': '85b90ccc9d73b4acd9138d3af4c27f89',
            'info_dict': {
+                'id': '13601338388002',
+                'ext': 'mp4',
                'uploader': 'www.hodiho.fr',
                'title': 'R\u00e9gis plante sa Jeep',
            }
@ -46,8 +47,9 @@ class GenericIE(InfoExtractor):
        {
            'add_ie': ['Bandcamp'],
            'url': 'http://bronyrock.com/track/the-pony-mash',
-            'file': '3235767654.mp3',
            'info_dict': {
+                'id': '3235767654',
+                'ext': 'mp3',
                'title': 'The Pony Mash',
                'uploader': 'M_Pallante',
            },
@ -73,9 +75,10 @@ class GenericIE(InfoExtractor):
        {
            # https://github.com/rg3/youtube-dl/issues/2253
            'url': 'http://bcove.me/i6nfkrc3',
-            'file': '3101154703001.mp4',
            'md5': '0ba9446db037002366bab3b3eb30c88c',
            'info_dict': {
+                'id': '3101154703001',
+                'ext': 'mp4',
                'title': 'Still no power',
                'uploader': 'thestar.com',
                'description': 'Mississauga resident David Farmer is still out of power as a result of the ice storm a month ago. To keep the house warm, Farmer cuts wood from his property for a wood burning stove downstairs.',
@ -184,6 +187,17 @@ class GenericIE(InfoExtractor):
                'description': 'md5:ddb2a40ecd6b6a147e400e535874947b',
            }
        },
+        # Embeded Ustream video
+        {
+            'url': 'http://www.american.edu/spa/pti/nsa-privacy-janus-2014.cfm',
+            'md5': '27b99cdb639c9b12a79bca876a073417',
+            'info_dict': {
+                'id': '45734260',
+                'ext': 'flv',
+                'uploader': 'AU SPA:  The NSA and Privacy',
+                'title': 'NSA and Privacy Forum Debate featuring General Hayden and Barton Gellman'
+            }
+        },
        # nowvideo embed hidden behind percent encoding
        {
            'url': 'http://www.waoanime.tv/the-super-dimension-fortress-macross-episode-1/',
@ -500,17 +514,18 @@ class GenericIE(InfoExtractor):
        if mobj is not None:
            return self.url_result(mobj.group(1), 'Mpora')

-        # Look for embedded NovaMov player
+        # Look for embedded NovaMov-based player
        mobj = re.search(
-            r'<iframe[^>]+?src=(["\'])(?P<url>http://(?:(?:embed|www)\.)?novamov\.com/embed\.php.+?)\1', webpage)
+            r'''(?x)<iframe[^>]+?src=(["\'])
+                    (?P<url>http://(?:(?:embed|www)\.)?
+                        (?:novamov\.com|
+                           nowvideo\.(?:ch|sx|eu|at|ag|co)|
+                           videoweed\.(?:es|com)|
+                           movshare\.(?:net|sx|ag)|
+                           divxstage\.(?:eu|net|ch|co|at|ag))
+                        /embed\.php.+?)\1''', webpage)
        if mobj is not None:
-            return self.url_result(mobj.group('url'), 'NovaMov')
-
-        # Look for embedded NowVideo player
-        mobj = re.search(
-            r'<iframe[^>]+?src=(["\'])(?P<url>http://(?:(?:embed|www)\.)?nowvideo\.(?:ch|sx|eu)/embed\.php.+?)\1', webpage)
-        if mobj is not None:
-            return self.url_result(mobj.group('url'), 'NowVideo')
+            return self.url_result(mobj.group('url'))

        # Look for embedded Facebook player
        mobj = re.search(
@ -556,6 +571,12 @@ class GenericIE(InfoExtractor):
        if mobj is not None:
            return self.url_result(mobj.group('url'), 'TED')

+        # Look for embedded Ustream videos
+        mobj = re.search(
+            r'<iframe[^>]+?src=(["\'])(?P<url>http://www\.ustream\.tv/embed/.+?)\1', webpage)
+        if mobj is not None:
+            return self.url_result(mobj.group('url'), 'Ustream')
+
        # Look for embedded arte.tv player
        mobj = re.search(
            r'<script [^>]*?src="(?P<url>http://www\.arte\.tv/playerv2/embed[^"]+)"',
--- a/youtube_dl/extractor/justintv.py
+++ b/youtube_dl/extractor/justintv.py
@ -1,9 +1,12 @@
+from __future__ import unicode_literals
+
 import json
 import os
 import re

 from .common import InfoExtractor
 from ..utils import (
+    compat_str,
    ExtractorError,
    formatSeconds,
 )
@ -24,34 +27,31 @@ class JustinTVIE(InfoExtractor):
        /?(?:\#.*)?$
        """
    _JUSTIN_PAGE_LIMIT = 100
-    IE_NAME = u'justin.tv'
+    IE_NAME = 'justin.tv'
+    IE_DESC = 'justin.tv and twitch.tv'
    _TEST = {
-        u'url': u'http://www.twitch.tv/thegamedevhub/b/296128360',
-        u'file': u'296128360.flv',
-        u'md5': u'ecaa8a790c22a40770901460af191c9a',
-        u'info_dict': {
-            u"upload_date": u"20110927", 
-            u"uploader_id": 25114803, 
-            u"uploader": u"thegamedevhub", 
-            u"title": u"Beginner Series - Scripting With Python Pt.1"
+        'url': 'http://www.twitch.tv/thegamedevhub/b/296128360',
+        'md5': 'ecaa8a790c22a40770901460af191c9a',
+        'info_dict': {
+            'id': '296128360',
+            'ext': 'flv',
+            'upload_date': '20110927',
+            'uploader_id': 25114803,
+            'uploader': 'thegamedevhub',
+            'title': 'Beginner Series - Scripting With Python Pt.1'
        }
    }

-    def report_download_page(self, channel, offset):
-        """Report attempt to download a single page of videos."""
-        self.to_screen(u'%s: Downloading video information from %d to %d' %
-                (channel, offset, offset + self._JUSTIN_PAGE_LIMIT))
-
    # Return count of items, list of *valid* items
    def _parse_page(self, url, video_id):
        info_json = self._download_webpage(url, video_id,
-                                           u'Downloading video info JSON',
-                                           u'unable to download video info JSON')
+                                           'Downloading video info JSON',
+                                           'unable to download video info JSON')

        response = json.loads(info_json)
        if type(response) != list:
            error_text = response.get('error', 'unknown error')
-            raise ExtractorError(u'Justin.tv API: %s' % error_text)
+            raise ExtractorError('Justin.tv API: %s' % error_text)
        info = []
        for clip in response:
            video_url = clip['video_file_url']
@ -62,7 +62,7 @@ class JustinTVIE(InfoExtractor):
                video_id = clip['id']
                video_title = clip.get('title', video_id)
                info.append({
-                    'id': video_id,
+                    'id': compat_str(video_id),
                    'url': video_url,
                    'title': video_title,
                    'uploader': clip.get('channel_name', video_uploader_id),
@ -74,8 +74,6 @@ class JustinTVIE(InfoExtractor):

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
-        if mobj is None:
-            raise ExtractorError(u'invalid URL: %s' % url)

        api_base = 'http://api.justin.tv'
        paged = False
@ -89,40 +87,41 @@ class JustinTVIE(InfoExtractor):
            webpage = self._download_webpage(url, chapter_id)
            m = re.search(r'PP\.archive_id = "([0-9]+)";', webpage)
            if not m:
-                raise ExtractorError(u'Cannot find archive of a chapter')
+                raise ExtractorError('Cannot find archive of a chapter')
            archive_id = m.group(1)

            api = api_base + '/broadcast/by_chapter/%s.xml' % chapter_id
-            doc = self._download_xml(api, chapter_id,
-                                             note=u'Downloading chapter information',
-                                             errnote=u'Chapter information download failed')
+            doc = self._download_xml(
+                api, chapter_id,
+                note='Downloading chapter information',
+                errnote='Chapter information download failed')
            for a in doc.findall('.//archive'):
                if archive_id == a.find('./id').text:
                    break
            else:
-                raise ExtractorError(u'Could not find chapter in chapter information')
+                raise ExtractorError('Could not find chapter in chapter information')

            video_url = a.find('./video_file_url').text
-            video_ext = video_url.rpartition('.')[2] or u'flv'
+            video_ext = video_url.rpartition('.')[2] or 'flv'

-            chapter_api_url = u'https://api.twitch.tv/kraken/videos/c' + chapter_id
-            chapter_info_json = self._download_webpage(chapter_api_url, u'c' + chapter_id,
-                                   note='Downloading chapter metadata',
-                                   errnote='Download of chapter metadata failed')
-            chapter_info = json.loads(chapter_info_json)
+            chapter_api_url = 'https://api.twitch.tv/kraken/videos/c' + chapter_id
+            chapter_info = self._download_json(
+                chapter_api_url, 'c' + chapter_id,
+                note='Downloading chapter metadata',
+                errnote='Download of chapter metadata failed')

            bracket_start = int(doc.find('.//bracket_start').text)
            bracket_end = int(doc.find('.//bracket_end').text)

            # TODO determine start (and probably fix up file)
            #  youtube-dl -v http://www.twitch.tv/firmbelief/c/1757457
-            #video_url += u'?start=' + TODO:start_timestamp
+            #video_url += '?start=' + TODO:start_timestamp
            # bracket_start is 13290, but we want 51670615
-            self._downloader.report_warning(u'Chapter detected, but we can just download the whole file. '
-                                            u'Chapter starts at %s and ends at %s' % (formatSeconds(bracket_start), formatSeconds(bracket_end)))
+            self._downloader.report_warning('Chapter detected, but we can just download the whole file. '
+                                            'Chapter starts at %s and ends at %s' % (formatSeconds(bracket_start), formatSeconds(bracket_end)))

            info = {
-                'id': u'c' + chapter_id,
+                'id': 'c' + chapter_id,
                'url': video_url,
                'ext': video_ext,
                'title': chapter_info['title'],
@ -131,14 +130,12 @@ class JustinTVIE(InfoExtractor):
                'uploader': chapter_info['channel']['display_name'],
                'uploader_id': chapter_info['channel']['name'],
            }
-            return [info]
+            return info
        else:
            video_id = mobj.group('videoid')
            api = api_base + '/broadcast/by_archive/%s.json' % video_id

-        self.report_extraction(video_id)
-
-        info = []
+        entries = []
        offset = 0
        limit = self._JUSTIN_PAGE_LIMIT
        while True:
@ -146,8 +143,12 @@ class JustinTVIE(InfoExtractor):
                self.report_download_page(video_id, offset)
            page_url = api + ('?offset=%d&limit=%d' % (offset, limit))
            page_count, page_info = self._parse_page(page_url, video_id)
-            info.extend(page_info)
+            entries.extend(page_info)
            if not paged or page_count != limit:
                break
            offset += limit
-        return info
+        return {
+            '_type': 'playlist',
+            'id': video_id,
+            'entries': entries,
+        }
--- a/youtube_dl/extractor/keezmovies.py
+++ b/youtube_dl/extractor/keezmovies.py
@ -1,3 +1,5 @@
+from __future__ import unicode_literals
+
 import os
 import re

@ -11,22 +13,22 @@ from ..aes import (
    aes_decrypt_text
 )

+
 class KeezMoviesIE(InfoExtractor):
-    _VALID_URL = r'^(?:https?://)?(?:www\.)?(?P<url>keezmovies\.com/video/.+?(?P<videoid>[0-9]+))(?:[/?&]|$)'
+    _VALID_URL = r'^https?://(?:www\.)?keezmovies\.com/video/.+?(?P<videoid>[0-9]+)(?:[/?&]|$)'
    _TEST = {
-        u'url': u'http://www.keezmovies.com/video/petite-asian-lady-mai-playing-in-bathtub-1214711',
-        u'file': u'1214711.mp4',
-        u'md5': u'6e297b7e789329923fcf83abb67c9289',
-        u'info_dict': {
-            u"title": u"Petite Asian Lady Mai Playing In Bathtub",
-            u"age_limit": 18,
+        'url': 'http://www.keezmovies.com/video/petite-asian-lady-mai-playing-in-bathtub-1214711',
+        'file': '1214711.mp4',
+        'md5': '6e297b7e789329923fcf83abb67c9289',
+        'info_dict': {
+            'title': 'Petite Asian Lady Mai Playing In Bathtub',
+            'age_limit': 18,
        }
    }

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group('videoid')
-        url = 'http://www.' + mobj.group('url')

        req = compat_urllib_request.Request(url)
        req.add_header('Cookie', 'age_verified=1')
@ -38,10 +40,10 @@ class KeezMoviesIE(InfoExtractor):
            embedded_url = mobj.group(1)
            return self.url_result(embedded_url)

-        video_title = self._html_search_regex(r'<h1 [^>]*>([^<]+)', webpage, u'title')
-        video_url = compat_urllib_parse.unquote(self._html_search_regex(r'video_url=(.+?)&amp;', webpage, u'video_url'))
-        if webpage.find('encrypted=true')!=-1:
-            password = self._html_search_regex(r'video_title=(.+?)&amp;', webpage, u'password')
+        video_title = self._html_search_regex(r'<h1 [^>]*>([^<]+)', webpage, 'title')
+        video_url = compat_urllib_parse.unquote(self._html_search_regex(r'video_url=(.+?)&amp;', webpage, 'video_url'))
+        if 'encrypted=true' in webpage:
+            password = self._html_search_regex(r'video_title=(.+?)&amp;', webpage, 'password')
            video_url = aes_decrypt_text(video_url, password, 32).decode('utf-8')
        path = compat_urllib_parse_urlparse(video_url).path
        extension = os.path.splitext(path)[1][1:]
--- a/youtube_dl/extractor/morningstar.py
+++ b/youtube_dl/extractor/morningstar.py
@ -1,22 +1,14 @@
 # coding: utf-8
 from __future__ import unicode_literals

-import hashlib
-import json
 import re
-import time

 from .common import InfoExtractor
-from ..utils import (
-    compat_parse_qs,
-    compat_str,
-    int_or_none,
-)


 class MorningstarIE(InfoExtractor):
    IE_DESC = 'morningstar.com'
-    _VALID_URL = r'https?://(?:www\.)?morningstar\.com/cover/videocenter\.aspx\?id=(?P<id>[0-9]+)'
+    _VALID_URL = r'https?://(?:www\.)?morningstar\.com/[cC]over/video[cC]enter\.aspx\?id=(?P<id>[0-9]+)'
    _TEST = {
        'url': 'http://www.morningstar.com/cover/videocenter.aspx?id=615869',
        'md5': '6c0acface7a787aadc8391e4bbf7b0f5',
--- a/youtube_dl/extractor/motorsport.py
+++ b/youtube_dl/extractor/motorsport.py
@ -44,7 +44,7 @@ class MotorsportIE(InfoExtractor):
        e = compat_str(int(time.time()) + 24 * 60 * 60)
        base_video_url = params['location'] + '?e=' + e
        s = 'h3hg713fh32'
-        h = hashlib.md5(s + base_video_url).hexdigest()
+        h = hashlib.md5((s + base_video_url).encode('utf-8')).hexdigest()
        video_url = base_video_url + '&h=' + h

        uploader = self._html_search_regex(
--- a/youtube_dl/extractor/movshare.py
+++ b/youtube_dl/extractor/movshare.py
@ -0,0 +1,27 @@
+from __future__ import unicode_literals
+
+from .novamov import NovaMovIE
+
+
+class MovShareIE(NovaMovIE):
+    IE_NAME = 'movshare'
+    IE_DESC = 'MovShare'
+
+    _VALID_URL = NovaMovIE._VALID_URL_TEMPLATE % {'host': 'movshare\.(?:net|sx|ag)'}
+
+    _HOST = 'www.movshare.net'
+
+    _FILE_DELETED_REGEX = r'>This file no longer exists on our servers.<'
+    _TITLE_REGEX = r'<strong>Title:</strong> ([^<]+)</p>'
+    _DESCRIPTION_REGEX = r'<strong>Description:</strong> ([^<]+)</p>'
+
+    _TEST = {
+        'url': 'http://www.movshare.net/video/559e28be54d96',
+        'md5': 'abd31a2132947262c50429e1d16c1bfd',
+        'info_dict': {
+            'id': '559e28be54d96',
+            'ext': 'flv',
+            'title': 'dissapeared image',
+            'description': 'optical illusion  dissapeared image  magic illusion',
+        }
+    }
--- a/youtube_dl/extractor/mpora.py
+++ b/youtube_dl/extractor/mpora.py
@ -4,9 +4,7 @@ import json
 import re

 from .common import InfoExtractor
-from ..utils import (
-    int_or_none,
-)
+from ..utils import int_or_none


 class MporaIE(InfoExtractor):
@ -20,7 +18,7 @@ class MporaIE(InfoExtractor):
        'info_dict': {
            'title': 'Katy Curd -  Winter in the Forest',
            'duration': 416,
-            'uploader': 'petenewman',
+            'uploader': 'Peter Newman Media',
        },
    }

--- a/youtube_dl/extractor/ninegag.py
+++ b/youtube_dl/extractor/ninegag.py
@ -1,15 +1,22 @@
 from __future__ import unicode_literals

 import re
+import json

 from .common import InfoExtractor
+from ..utils import str_to_int


 class NineGagIE(InfoExtractor):
    IE_NAME = '9gag'
-    _VALID_URL = r'^https?://(?:www\.)?9gag\.tv/v/(?P<id>[0-9]+)'
+    _VALID_URL = r'''(?x)^https?://(?:www\.)?9gag\.tv/
+        (?:
+            v/(?P<numid>[0-9]+)|
+            p/(?P<id>[a-zA-Z0-9]+)/(?P<display_id>[^?#/]+)
+        )
+    '''

-    _TEST = {
+    _TESTS = [{
        "url": "http://9gag.tv/v/1912",
        "info_dict": {
            "id": "1912",
@ -20,34 +27,42 @@ class NineGagIE(InfoExtractor):
            "thumbnail": "re:^https?://",
        },
        'add_ie': ['Youtube']
-    }
+    },
+    {
+        'url': 'http://9gag.tv/p/KklwM/alternate-banned-opening-scene-of-gravity?ref=fsidebar',
+        'info_dict': {
+            'id': 'KklwM',
+            'ext': 'mp4',
+            'display_id': 'alternate-banned-opening-scene-of-gravity',
+            "description": "While Gravity was a pretty awesome movie already, YouTuber Krishna Shenoi came up with a way to improve upon it, introducing a much better solution to Sandra Bullock's seemingly endless tumble in space. The ending is priceless.",
+            'title': "Banned Opening Scene Of \"Gravity\" That Changes The Whole Movie",
+        },
+    }]

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
+        video_id = mobj.group('numid') or mobj.group('id')
+        display_id = mobj.group('display_id') or video_id

-        webpage = self._download_webpage(url, video_id)
+        webpage = self._download_webpage(url, display_id)

-        youtube_id = self._html_search_regex(
-            r'(?s)id="jsid-video-post-container".*?data-external-id="([^"]+)"',
-            webpage, 'video ID')
-        description = self._html_search_regex(
-            r'(?s)<div class="video-caption">.*?<p>(.*?)</p>', webpage,
-            'description', fatal=False)
-        view_count_str = self._html_search_regex(
-            r'<p><b>([0-9][0-9,]*)</b> views</p>', webpage, 'view count',
-            fatal=False)
-        view_count = (
-            None if view_count_str is None
-            else int(view_count_str.replace(',', '')))
+        post_view = json.loads(self._html_search_regex(
+            r'var postView = new app\.PostView\({ post: ({.+?}),', webpage, 'post view'))
+
+        youtube_id = post_view['videoExternalId']
+        title = post_view['title']
+        description = post_view['description']
+        view_count = str_to_int(post_view['externalView'])
+        thumbnail = post_view.get('thumbnail_700w') or post_view.get('ogImageUrl') or post_view.get('thumbnail_300w')

        return {
            '_type': 'url_transparent',
            'url': youtube_id,
            'ie_key': 'Youtube',
            'id': video_id,
-            'title': self._og_search_title(webpage),
+            'display_id': display_id,
+            'title': title,
            'description': description,
            'view_count': view_count,
-            'thumbnail': self._og_search_thumbnail(webpage),
+            'thumbnail': thumbnail,
        }
--- a/youtube_dl/extractor/novamov.py
+++ b/youtube_dl/extractor/novamov.py
@ -13,7 +13,8 @@ class NovaMovIE(InfoExtractor):
    IE_NAME = 'novamov'
    IE_DESC = 'NovaMov'

-    _VALID_URL = r'http://(?:(?:www\.)?%(host)s/video/|(?:(?:embed|www)\.)%(host)s/embed\.php\?(?:.*?&)?v=)(?P<videoid>[a-z\d]{13})' % {'host': 'novamov\.com'}
+    _VALID_URL_TEMPLATE = r'http://(?:(?:www\.)?%(host)s/(?:file|video)/|(?:(?:embed|www)\.)%(host)s/embed\.php\?(?:.*?&)?v=)(?P<id>[a-z\d]{13})'
+    _VALID_URL = _VALID_URL_TEMPLATE % {'host': 'novamov\.com'}

    _HOST = 'www.novamov.com'

@ -36,18 +37,17 @@ class NovaMovIE(InfoExtractor):

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('videoid')
+        video_id = mobj.group('id')

        page = self._download_webpage(
            'http://%s/video/%s' % (self._HOST, video_id), video_id, 'Downloading video page')

        if re.search(self._FILE_DELETED_REGEX, page) is not None:
-            raise ExtractorError(u'Video %s does not exist' % video_id, expected=True)
+            raise ExtractorError('Video %s does not exist' % video_id, expected=True)

        filekey = self._search_regex(self._FILEKEY_REGEX, page, 'filekey')

        title = self._html_search_regex(self._TITLE_REGEX, page, 'title', fatal=False)
-
        description = self._html_search_regex(self._DESCRIPTION_REGEX, page, 'description', default='', fatal=False)

        api_response = self._download_webpage(
--- a/youtube_dl/extractor/nowvideo.py
+++ b/youtube_dl/extractor/nowvideo.py
@ -7,7 +7,7 @@ class NowVideoIE(NovaMovIE):
    IE_NAME = 'nowvideo'
    IE_DESC = 'NowVideo'

-    _VALID_URL = r'http://(?:(?:www\.)?%(host)s/video/|(?:(?:embed|www)\.)%(host)s/embed\.php\?(?:.*?&)?v=)(?P<videoid>[a-z\d]{13})' % {'host': 'nowvideo\.(?:ch|sx|eu)'}
+    _VALID_URL = NovaMovIE._VALID_URL_TEMPLATE % {'host': 'nowvideo\.(?:ch|sx|eu|at|ag|co)'}

    _HOST = 'www.nowvideo.ch'

--- a/youtube_dl/extractor/ntv.py
+++ b/youtube_dl/extractor/ntv.py
@ -59,11 +59,11 @@ class NTVIE(InfoExtractor):
        {
            'url': 'http://www.ntv.ru/kino/Koma_film',
            'info_dict': {
-                'id': '750783',
+                'id': '758100',
                'ext': 'flv',
-                'title': 'Остросюжетный фильм «Кома»  4 апреля вечером на НТВ',
-                'description': 'Остросюжетный фильм «Кома»  4 апреля вечером на НТВ',
-                'duration': 28,
+                'title': 'Остросюжетный фильм «Кома»',
+                'description': 'Остросюжетный фильм «Кома»',
+                'duration': 5592,
            },
            'params': {
                    # rtmp download
--- a/youtube_dl/extractor/pornhd.py
+++ b/youtube_dl/extractor/pornhd.py
@ -39,11 +39,11 @@ class PornHdIE(InfoExtractor):

        formats = [
            {
-                'url': url,
+                'url': format_url,
                'ext': format.lower(),
                'format_id': '%s-%s' % (format.lower(), quality.lower()),
                'quality': 1 if quality.lower() == 'high' else 0,
-            } for format, quality, url in re.findall(
+            } for format, quality, format_url in re.findall(
                r'var __video([\da-zA-Z]+?)(Low|High)StreamUrl = \'(http://.+?)\?noProxy=1\'', webpage)
        ]

--- a/youtube_dl/extractor/prosiebensat1.py
+++ b/youtube_dl/extractor/prosiebensat1.py
@ -160,6 +160,7 @@ class ProSiebenSat1IE(InfoExtractor):
    _CLIPID_REGEXES = [
        r'"clip_id"\s*:\s+"(\d+)"',
        r'clipid: "(\d+)"',
+        r'clipId=(\d+)',
    ]
    _TITLE_REGEXES = [
        r'<h2 class="subtitle" itemprop="name">\s*(.+?)</h2>',
--- a/youtube_dl/extractor/pyvideo.py
+++ b/youtube_dl/extractor/pyvideo.py
@ -46,7 +46,8 @@ class PyvideoIE(InfoExtractor):
            return self.url_result(m_youtube.group(1), 'Youtube')

        title = self._html_search_regex(
-            r'<div class="section">.*?<h3>([^>]+?)</h3>', webpage, 'title', flags=re.DOTALL)
+            r'<div class="section">.*?<h3(?:\s+class="[^"]*")?>([^>]+?)</h3>',
+            webpage, 'title', flags=re.DOTALL)
        video_url = self._search_regex(
            [r'<source src="(.*?)"', r'<dt>Download</dt>.*?<a href="(.+?)"'],
            webpage, 'video url', flags=re.DOTALL)
--- a/youtube_dl/extractor/ro220.py
+++ b/youtube_dl/extractor/ro220.py
@ -18,7 +18,7 @@ class Ro220IE(InfoExtractor):
        'md5': '03af18b73a07b4088753930db7a34add',
        'info_dict': {
            "title": "Luati-le Banii sez 4 ep 1",
-            "description": "Iata-ne reveniti dupa o binemeritata vacanta. Va astept si pe Facebook cu pareri si comentarii.",
+            "description": "re:^Iata-ne reveniti dupa o binemeritata vacanta\. +Va astept si pe Facebook cu pareri si comentarii.$",
        }
    }

--- a/youtube_dl/extractor/rts.py
+++ b/youtube_dl/extractor/rts.py
@ -35,13 +35,13 @@ class RTSIE(InfoExtractor):
        },
        {
            'url': 'http://www.rts.ch/emissions/passe-moi-les-jumelles/5624067-entre-ciel-et-mer.html',
-            'md5': 'c197f0b2421995c63a64cc73d800f42e',
+            'md5': 'c148457a27bdc9e5b1ffe081a7a8337b',
            'info_dict': {
-                'id': '5738317',
+                'id': '5624067',
                'ext': 'mp4',
-                'duration': 55,
-                'title': 'Bande de lancement de Passe-moi les jumelles',
-                'description': '',
+                'duration': 3720,
+                'title': 'Les yeux dans les cieux - Mon homard au Canada',
+                'description': 'md5:d22ee46f5cc5bac0912e5a0c6d44a9f7',
                'uploader': 'Passe-moi les jumelles',
                'upload_date': '20140404',
                'timestamp': 1396635300,
@ -98,17 +98,20 @@ class RTSIE(InfoExtractor):
        m = re.match(self._VALID_URL, url)
        video_id = m.group('id')

-        def download_json(video_id):
+        def download_json(internal_id):
            return self._download_json(
-                'http://www.rts.ch/a/%s.html?f=json/article' % video_id, video_id)
+                'http://www.rts.ch/a/%s.html?f=json/article' % internal_id,
+                video_id)

        all_info = download_json(video_id)

        # video_id extracted out of URL is not always a real id
        if 'video' not in all_info and 'audio' not in all_info:
            page = self._download_webpage(url, video_id)
-            video_id = self._html_search_regex(r'<(?:video|audio) data-id="(\d+)"', page, 'video id')
-            all_info = download_json(video_id)
+            internal_id = self._html_search_regex(
+                r'<(?:video|audio) data-id="([0-9]+)"', page,
+                'internal video id')
+            all_info = download_json(internal_id)

        info = all_info['video']['JSONinfo'] if 'video' in all_info else all_info['audio']

--- a/youtube_dl/extractor/rtve.py
+++ b/youtube_dl/extractor/rtve.py
@ -0,0 +1,84 @@
+# encoding: utf-8
+from __future__ import unicode_literals
+
+import re
+import base64
+
+from .common import InfoExtractor
+from ..utils import (
+    struct_unpack,
+)
+
+
+class RTVEALaCartaIE(InfoExtractor):
+    IE_NAME = 'rtve.es:alacarta'
+    IE_DESC = 'RTVE a la carta'
+    _VALID_URL = r'http://www\.rtve\.es/alacarta/videos/[^/]+/[^/]+/(?P<id>\d+)'
+
+    _TEST = {
+        'url': 'http://www.rtve.es/alacarta/videos/balonmano/o-swiss-cup-masculina-final-espana-suecia/2491869/',
+        'md5': '18fcd45965bdd076efdb12cd7f6d7b9e',
+        'info_dict': {
+            'id': '2491869',
+            'ext': 'mp4',
+            'title': 'Balonmano - Swiss Cup masculina. Final: España-Suecia',
+        },
+    }
+
+    def _decrypt_url(self, png):
+        encrypted_data = base64.b64decode(png)
+        text_index = encrypted_data.find(b'tEXt')
+        text_chunk = encrypted_data[text_index-4:]
+        length = struct_unpack('!I', text_chunk[:4])[0]
+        # Use bytearray to get integers when iterating in both python 2.x and 3.x
+        data = bytearray(text_chunk[8:8+length])
+        data = [chr(b) for b in data if b != 0]
+        hash_index = data.index('#')
+        alphabet_data = data[:hash_index]
+        url_data = data[hash_index+1:]
+
+        alphabet = []
+        e = 0
+        d = 0
+        for l in alphabet_data:
+            if d == 0:
+                alphabet.append(l)
+                d = e = (e + 1) % 4
+            else:
+                d -= 1
+        url = ''
+        f = 0
+        e = 3
+        b = 1
+        for letter in url_data:
+            if f == 0:
+                l = int(letter)*10
+                f = 1
+            else:
+                if e == 0:
+                    l += int(letter)
+                    url += alphabet[l]
+                    e = (b + 3) % 4
+                    f = 0
+                    b += 1
+                else:
+                    e -= 1
+
+        return url
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+        info = self._download_json(
+            'http://www.rtve.es/api/videos/%s/config/alacarta_videos.json' % video_id,
+            video_id)['page']['items'][0]
+        png_url = 'http://www.rtve.es/ztnr/movil/thumbnail/default/videos/%s.png' % video_id
+        png = self._download_webpage(png_url, video_id, 'Downloading url information')
+        video_url = self._decrypt_url(png)
+
+        return {
+            'id': video_id,
+            'title': info['title'],
+            'url': video_url,
+            'thumbnail': info['image'],
+        }
--- a/youtube_dl/extractor/slideshare.py
+++ b/youtube_dl/extractor/slideshare.py
@ -39,7 +39,8 @@ class SlideshareIE(InfoExtractor):
        ext = info['jsplayer']['video_extension']
        video_url = compat_urlparse.urljoin(bucket, doc + '-SD.' + ext)
        description = self._html_search_regex(
-            r'<p class="description.*?"[^>]*>(.*?)</p>', webpage, 'description')
+            r'<p\s+(?:style="[^"]*"\s+)?class="description.*?"[^>]*>(.*?)</p>', webpage,
+            'description', fatal=False)

        return {
            '_type': 'video',
--- a/youtube_dl/extractor/teamcoco.py
+++ b/youtube_dl/extractor/teamcoco.py
@ -9,8 +9,18 @@ from ..utils import (


 class TeamcocoIE(InfoExtractor):
-    _VALID_URL = r'http://teamcoco\.com/video/(?P<url_title>.*)'
-    _TEST = {
+    _VALID_URL = r'http://teamcoco\.com/video/(?P<video_id>[0-9]+)?/?(?P<display_id>.*)'
+    _TESTS = [
+    {
+        'url': 'http://teamcoco.com/video/80187/conan-becomes-a-mary-kay-beauty-consultant',
+        'file': '80187.mp4',
+        'md5': '3f7746aa0dc86de18df7539903d399ea',
+        'info_dict': {
+            'title': 'Conan Becomes A Mary Kay Beauty Consultant',
+            'description': 'Mary Kay is perhaps the most trusted name in female beauty, so of course Conan is a natural choice to sell their products.'
+        }
+    },
+    {
        'url': 'http://teamcoco.com/video/louis-ck-interview-george-w-bush',
        'file': '19705.mp4',
        'md5': 'cde9ba0fa3506f5f017ce11ead928f9a',
@ -19,22 +29,23 @@ class TeamcocoIE(InfoExtractor):
            "title": "Louis C.K. Interview Pt. 1 11/3/11"
        }
    }
+    ]

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
-        if mobj is None:
-            raise ExtractorError('Invalid URL: %s' % url)
-        url_title = mobj.group('url_title')
-        webpage = self._download_webpage(url, url_title)

-        video_id = self._html_search_regex(
-            r'<article class="video" data-id="(\d+?)"',
-            webpage, 'video id')
-
-        self.report_extraction(video_id)
+        display_id = mobj.group('display_id')
+        webpage = self._download_webpage(url, display_id)
+        
+        video_id = mobj.group("video_id")
+        if not video_id:
+            video_id = self._html_search_regex(
+                r'<article class="video" data-id="(\d+?)"',
+                webpage, 'video id')

        data_url = 'http://teamcoco.com/cvp/2.0/%s.xml' % video_id
-        data = self._download_xml(data_url, video_id, 'Downloading data webpage')
+        data = self._download_xml(
+            data_url, display_id, 'Downloading data webpage')

        qualities = ['500k', '480p', '1000k', '720p', '1080p']
        formats = []
@ -69,6 +80,7 @@ class TeamcocoIE(InfoExtractor):

        return {
            'id': video_id,
+            'display_id': display_id,
            'formats': formats,
            'title': self._og_search_title(webpage),
            'thumbnail': self._og_search_thumbnail(webpage),
--- a/youtube_dl/extractor/ted.py
+++ b/youtube_dl/extractor/ted.py
@ -37,6 +37,7 @@ class TEDIE(SubtitlesInfoExtractor):
                'consciousness, but that half the time our brains are '
                'actively fooling us.'),
            'uploader': 'Dan Dennett',
+            'width': 854,
        }
    }, {
        'url': 'http://www.ted.com/watch/ted-institute/ted-bcg/vishal-sikka-the-beauty-and-power-of-algorithms',
@ -48,12 +49,25 @@ class TEDIE(SubtitlesInfoExtractor):
            'thumbnail': 're:^https?://.+\.jpg',
            'description': 'Adaptive, intelligent, and consistent, algorithms are emerging as the ultimate app for everything from matching consumers to products to assessing medical diagnoses. Vishal Sikka shares his appreciation for the algorithm, charting both its inherent beauty and its growing power.',
        }
+    }, {
+        'url': 'http://www.ted.com/talks/gabby_giffords_and_mark_kelly_be_passionate_be_courageous_be_your_best',
+        'info_dict': {
+            'id': '1972',
+            'ext': 'flv',
+            'title': 'Be passionate. Be courageous. Be your best.',
+            'uploader': 'Gabby Giffords and Mark Kelly',
+            'description': 'md5:d89e1d8ebafdac8e55df4c219ecdbfe9',
+        },
+        'params': {
+            # rtmp download
+            'skip_download': True,
+        },
    }]

-    _FORMATS_PREFERENCE = {
-        'low': 1,
-        'medium': 2,
-        'high': 3,
+    _NATIVE_FORMATS = {
+        'low': {'preference': 1, 'width': 320, 'height': 180},
+        'medium': {'preference': 2, 'width': 512, 'height': 288},
+        'high': {'preference': 3, 'width': 854, 'height': 480},
    }

    def _extract_info(self, webpage):
@ -98,12 +112,26 @@ class TEDIE(SubtitlesInfoExtractor):
        talk_info = self._extract_info(webpage)['talks'][0]

        formats = [{
-            'ext': 'mp4',
            'url': format_url,
            'format_id': format_id,
            'format': format_id,
-            'preference': self._FORMATS_PREFERENCE.get(format_id, -1),
-        } for (format_id, format_url) in talk_info['nativeDownloads'].items()]
+        } for (format_id, format_url) in talk_info['nativeDownloads'].items() if format_url is not None]
+        if formats:
+            for f in formats:
+                finfo = self._NATIVE_FORMATS.get(f['format_id'])
+                if finfo:
+                    f.update(finfo)
+        else:
+            # Use rtmp downloads
+            formats = [{
+                'format_id': f['name'],
+                'url': talk_info['streamer'],
+                'play_path': f['file'],
+                'ext': 'flv',
+                'width': f['width'],
+                'height': f['height'],
+                'tbr': f['bitrate'],
+            } for f in talk_info['resources']['rtmp']]
        self._sort_formats(formats)

        video_id = compat_str(talk_info['id'])
--- a/youtube_dl/extractor/tube8.py
+++ b/youtube_dl/extractor/tube8.py
@ -1,63 +1,83 @@
-import os
+from __future__ import unicode_literals
+
+import json
 import re

 from .common import InfoExtractor
 from ..utils import (
    compat_urllib_parse_urlparse,
    compat_urllib_request,
+    int_or_none,
+    str_to_int,
 )
-from ..aes import (
-    aes_decrypt_text
-)
+from ..aes import aes_decrypt_text
+

 class Tube8IE(InfoExtractor):
-    _VALID_URL = r'^(?:https?://)?(?:www\.)?(?P<url>tube8\.com/.+?/(?P<videoid>\d+)/?)$'
+    _VALID_URL = r'https?://(?:www\.)?tube8\.com/(?:[^/]+/){2}(?P<id>\d+)'
    _TEST = {
-        u'url': u'http://www.tube8.com/teen/kasia-music-video/229795/',
-        u'file': u'229795.mp4',
-        u'md5': u'e9e0b0c86734e5e3766e653509475db0',
-        u'info_dict': {
-            u"description": u"hot teen Kasia grinding", 
-            u"uploader": u"unknown", 
-            u"title": u"Kasia music video",
-            u"age_limit": 18,
+        'url': 'http://www.tube8.com/teen/kasia-music-video/229795/',
+        'file': '229795.mp4',
+        'md5': 'e9e0b0c86734e5e3766e653509475db0',
+        'info_dict': {
+            'description': 'hot teen Kasia grinding',
+            'uploader': 'unknown',
+            'title': 'Kasia music video',
+            'age_limit': 18,
        }
    }

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('videoid')
-        url = 'http://www.' + mobj.group('url')
+        video_id = mobj.group('id')

        req = compat_urllib_request.Request(url)
        req.add_header('Cookie', 'age_verified=1')
        webpage = self._download_webpage(req, video_id)

-        video_title = self._html_search_regex(r'videotitle	="([^"]+)', webpage, u'title')
-        video_description = self._html_search_regex(r'>Description:</strong>(.+?)<', webpage, u'description', fatal=False)
-        video_uploader = self._html_search_regex(r'>Submitted by:</strong>(?:\s|<[^>]*>)*(.+?)<', webpage, u'uploader', fatal=False)
-        thumbnail = self._html_search_regex(r'"image_url":"([^"]+)', webpage, u'thumbnail', fatal=False)
-        if thumbnail:
-            thumbnail = thumbnail.replace('\\/', '/')
+        flashvars = json.loads(self._html_search_regex(
+            r'var flashvars\s*=\s*({.+?})', webpage, 'flashvars'))

-        video_url = self._html_search_regex(r'"video_url":"([^"]+)', webpage, u'video_url')
-        if webpage.find('"encrypted":true')!=-1:
-            password = self._html_search_regex(r'"video_title":"([^"]+)', webpage, u'password')
-            video_url = aes_decrypt_text(video_url, password, 32).decode('utf-8')
+        video_url = flashvars['video_url']
+        if flashvars.get('encrypted') is True:
+            video_url = aes_decrypt_text(video_url, flashvars['video_title'], 32).decode('utf-8')
        path = compat_urllib_parse_urlparse(video_url).path
-        extension = os.path.splitext(path)[1][1:]
-        format = path.split('/')[4].split('_')[:2]
-        format = "-".join(format)
+        format_id = '-'.join(path.split('/')[4].split('_')[:2])
+
+        thumbnail = flashvars.get('image_url')
+
+        title = self._html_search_regex(
+            r'videotitle\s*=\s*"([^"]+)', webpage, 'title')
+        description = self._html_search_regex(
+            r'>Description:</strong>(.+?)<', webpage, 'description', fatal=False)
+        uploader = self._html_search_regex(
+            r'<strong class="video-username">(?:<a href="[^"]+">)?([^<]+)(?:</a>)?</strong>',
+            webpage, 'uploader', fatal=False)
+
+        like_count = int_or_none(self._html_search_regex(
+            r"rupVar\s*=\s*'(\d+)'", webpage, 'like count', fatal=False))
+        dislike_count = int_or_none(self._html_search_regex(
+            r"rdownVar\s*=\s*'(\d+)'", webpage, 'dislike count', fatal=False))
+        view_count = self._html_search_regex(
+            r'<strong>Views: </strong>([\d,\.]+)</li>', webpage, 'view count', fatal=False)
+        if view_count:
+            view_count = str_to_int(view_count)
+        comment_count = self._html_search_regex(
+            r'<span id="allCommentsCount">(\d+)</span>', webpage, 'comment count', fatal=False)
+        if comment_count:
+            comment_count = str_to_int(comment_count)

        return {
            'id': video_id,
-            'uploader': video_uploader,
-            'title': video_title,
-            'thumbnail': thumbnail,
-            'description': video_description,
            'url': video_url,
-            'ext': extension,
-            'format': format,
-            'format_id': format,
+            'title': title,
+            'description': description,
+            'thumbnail': thumbnail,
+            'uploader': uploader,
+            'format_id': format_id,
+            'view_count': view_count,
+            'like_count': like_count,
+            'dislike_count': dislike_count,
+            'comment_count': comment_count,
            'age_limit': 18,
        }
--- a/youtube_dl/extractor/ustream.py
+++ b/youtube_dl/extractor/ustream.py
@ -11,7 +11,7 @@ from ..utils import (


 class UstreamIE(InfoExtractor):
-    _VALID_URL = r'https?://www\.ustream\.tv/recorded/(?P<videoID>\d+)'
+    _VALID_URL = r'https?://www\.ustream\.tv/(?P<type>recorded|embed)/(?P<videoID>\d+)'
    IE_NAME = 'ustream'
    _TEST = {
        'url': 'http://www.ustream.tv/recorded/20274954',
@ -25,6 +25,13 @@ class UstreamIE(InfoExtractor):

    def _real_extract(self, url):
        m = re.match(self._VALID_URL, url)
+        if m.group('type') == 'embed':
+            video_id = m.group('videoID')
+            webpage = self._download_webpage(url, video_id)
+            desktop_video_id = self._html_search_regex(r'ContentVideoIds=\["([^"]*?)"\]', webpage, 'desktop_video_id')
+            desktop_url = 'http://www.ustream.tv/recorded/' + desktop_video_id
+            return self.url_result(desktop_url, 'Ustream')
+
        video_id = m.group('videoID')

        video_url = 'http://tcdn.ustream.tv/video/%s' % video_id
--- a/youtube_dl/extractor/videoweed.py
+++ b/youtube_dl/extractor/videoweed.py
@ -0,0 +1,26 @@
+from __future__ import unicode_literals
+
+from .novamov import NovaMovIE
+
+
+class VideoWeedIE(NovaMovIE):
+    IE_NAME = 'videoweed'
+    IE_DESC = 'VideoWeed'
+
+    _VALID_URL = NovaMovIE._VALID_URL_TEMPLATE % {'host': 'videoweed\.(?:es|com)'}
+
+    _HOST = 'www.videoweed.es'
+
+    _FILE_DELETED_REGEX = r'>This file no longer exists on our servers.<'
+    _TITLE_REGEX = r'<h1 class="text_shadow">([^<]+)</h1>'
+
+    _TEST = {
+        'url': 'http://www.videoweed.es/file/b42178afbea14',
+        'md5': 'abd31a2132947262c50429e1d16c1bfd',
+        'info_dict': {
+            'id': 'b42178afbea14',
+            'ext': 'flv',
+            'title': 'optical illusion  dissapeared image magic illusion',
+            'description': ''
+        },
+    }
--- a/youtube_dl/extractor/weibo.py
+++ b/youtube_dl/extractor/weibo.py
@ -1,10 +1,11 @@
 # coding: utf-8
+from __future__ import unicode_literals

 import re
-import json

 from .common import InfoExtractor

+
 class WeiboIE(InfoExtractor):
    """
    The videos in Weibo come from different sites, this IE just finds the link
@ -13,16 +14,16 @@ class WeiboIE(InfoExtractor):
    _VALID_URL = r'https?://video\.weibo\.com/v/weishipin/t_(?P<id>.+?)\.htm'

    _TEST = {
-        u'add_ie': ['Sina'],
-        u'url': u'http://video.weibo.com/v/weishipin/t_zjUw2kZ.htm',
-        u'file': u'98322879.flv',
-        u'info_dict': {
-            u'title': u'魔声耳机最新广告“All Eyes On Us”',
+        'url': 'http://video.weibo.com/v/weishipin/t_zjUw2kZ.htm',
+        'info_dict': {
+            'id': '98322879',
+            'ext': 'flv',
+            'title': '魔声耳机最新广告“All Eyes On Us”',
        },
-        u'note': u'Sina video',
-        u'params': {
-            u'skip_download': True,
+        'params': {
+            'skip_download': True,
        },
+        'add_ie': ['Sina'],
    }

    # Additional example videos from different sites
@ -33,17 +34,16 @@ class WeiboIE(InfoExtractor):
        mobj = re.match(self._VALID_URL, url, flags=re.VERBOSE)
        video_id = mobj.group('id')
        info_url = 'http://video.weibo.com/?s=v&a=play_list&format=json&mix_video_id=t_%s' % video_id
-        info_page = self._download_webpage(info_url, video_id)
-        info = json.loads(info_page)
+        info = self._download_json(info_url, video_id)

        videos_urls = map(lambda v: v['play_page_url'], info['result']['data'])
-        #Prefer sina video since they have thumbnails
-        videos_urls = sorted(videos_urls, key=lambda u: u'video.sina.com' in u)
+        # Prefer sina video since they have thumbnails
+        videos_urls = sorted(videos_urls, key=lambda u: 'video.sina.com' in u)
        player_url = videos_urls[-1]
-        m_sina = re.match(r'https?://video.sina.com.cn/v/b/(\d+)-\d+.html', player_url)
+        m_sina = re.match(r'https?://video\.sina\.com\.cn/v/b/(\d+)-\d+\.html',
+            player_url)
        if m_sina is not None:
            self.to_screen('Sina video detected')
            sina_id = m_sina.group(1)
            player_url = 'http://you.video.sina.com.cn/swf/quotePlayer.swf?vid=%s' % sina_id
        return self.url_result(player_url)
-
--- a/youtube_dl/extractor/yahoo.py
+++ b/youtube_dl/extractor/yahoo.py
@ -15,22 +15,24 @@ from ..utils import (

 class YahooIE(InfoExtractor):
    IE_DESC = 'Yahoo screen'
-    _VALID_URL = r'http://screen\.yahoo\.com/.*?-(?P<id>\d*?)\.html'
+    _VALID_URL = r'https?://screen\.yahoo\.com/.*?-(?P<id>[0-9]+)(?:-[a-z]+)?\.html'
    _TESTS = [
        {
            'url': 'http://screen.yahoo.com/julian-smith-travis-legg-watch-214727115.html',
-            'file': '214727115.mp4',
            'md5': '4962b075c08be8690a922ee026d05e69',
            'info_dict': {
+                'id': '214727115',
+                'ext': 'mp4',
                'title': 'Julian Smith & Travis Legg Watch Julian Smith',
                'description': 'Julian and Travis watch Julian Smith',
            },
        },
        {
            'url': 'http://screen.yahoo.com/wired/codefellas-s1-ep12-cougar-lies-103000935.html',
-            'file': '103000935.mp4',
            'md5': 'd6e6fc6e1313c608f316ddad7b82b306',
            'info_dict': {
+                'id': '103000935',
+                'ext': 'mp4',
                'title': 'Codefellas - The Cougar Lies with Spanish Moss',
                'description': 'Agent Topple\'s mustache does its dirty work, and Nicole brokers a deal for peace. But why is the NSA collecting millions of Instagram brunch photos? And if your waffles have nothing to hide, what are they so worried about?',
            },
@ -60,10 +62,9 @@ class YahooIE(InfoExtractor):
            'env': 'prod',
            'format': 'json',
        })
-        query_result_json = self._download_webpage(
+        query_result = self._download_json(
            'http://video.query.yahoo.com/v1/public/yql?' + data,
            video_id, 'Downloading video info')
-        query_result = json.loads(query_result_json)
        info = query_result['query']['results']['mediaObj'][0]
        meta = info['meta']

@ -86,7 +87,6 @@ class YahooIE(InfoExtractor):
            else:
                format_url = compat_urlparse.urljoin(host, path)
                format_info['url'] = format_url
-                
            formats.append(format_info)

        self._sort_formats(formats)
@ -134,27 +134,25 @@ class YahooSearchIE(SearchInfoExtractor):

    def _get_n_results(self, query, n):
        """Get a specified number of results for a query"""
-
-        res = {
-            '_type': 'playlist',
-            'id': query,
-            'entries': []
-        }
-        for pagenum in itertools.count(0): 
+        entries = []
+        for pagenum in itertools.count(0):
            result_url = 'http://video.search.yahoo.com/search/?p=%s&fr=screen&o=js&gs=0&b=%d' % (compat_urllib_parse.quote_plus(query), pagenum * 30)
-            webpage = self._download_webpage(result_url, query,
-                                             note='Downloading results page '+str(pagenum+1))
-            info = json.loads(webpage)
+            info = self._download_json(result_url, query,
+                note='Downloading results page '+str(pagenum+1))
            m = info['m']
            results = info['results']

            for (i, r) in enumerate(results):
-                if (pagenum * 30) +i >= n:
+                if (pagenum * 30) + i >= n:
                    break
                mobj = re.search(r'(?P<url>screen\.yahoo\.com/.*?-\d*?\.html)"', r)
                e = self.url_result('http://' + mobj.group('url'), 'Yahoo')
-                res['entries'].append(e)
-            if (pagenum * 30 +i >= n) or (m['last'] >= (m['total'] -1)):
+                entries.append(e)
+            if (pagenum * 30 + i >= n) or (m['last'] >= (m['total'] - 1)):
                break

-        return res
+        return {
+            '_type': 'playlist',
+            'id': query,
+            'entries': entries,
+        }
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@ -151,6 +151,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
                             )
                         ))
                         |youtu\.be/                                          # just youtu.be/xxxx
+                         |https?://(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
                         )
                     )?                                                       # all until now is optional -> you can pass the naked ID
                     ([0-9A-Za-z_-]{11})                                      # here is it! the YouTube video ID
@ -1418,7 +1419,7 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
                self.to_screen(u'Downloading just video %s because of --no-playlist' % video_id)
                return self.url_result(video_id, 'Youtube', video_id=video_id)
            else:
-                self.to_screen(u'Downloading playlist PL%s - add --no-playlist to just download video %s' % (playlist_id, video_id))
+                self.to_screen(u'Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))

        if playlist_id.startswith('RD'):
            # Mixes require a custom extraction process
--- a/youtube_dl/postprocessor/init.py
+++ b/youtube_dl/postprocessor/init.py
@ -1,5 +1,7 @@

+from .atomicparsley import AtomicParsleyPP
 from .ffmpeg import (
+    FFmpegAudioFixPP,
    FFmpegMergerPP,
    FFmpegMetadataPP,
    FFmpegVideoConvertor,
@ -10,6 +12,8 @@ from .ffmpeg import (
 from .xattrpp import XAttrMetadataPP

 __all__ = [
+    'AtomicParsleyPP',
+    'FFmpegAudioFixPP',
    'FFmpegMergerPP',
    'FFmpegMetadataPP',
    'FFmpegVideoConvertor',
--- a/youtube_dl/postprocessor/atomicparsley.py
+++ b/youtube_dl/postprocessor/atomicparsley.py
@ -0,0 +1,56 @@
+# -*- coding: utf-8 -*-
+from __future__ import unicode_literals
+
+
+import os
+import subprocess
+
+from .common import PostProcessor
+
+from ..utils import (
+    check_executable,
+    compat_urlretrieve,
+    encodeFilename,
+    PostProcessingError,
+    prepend_extension,
+    shell_quote
+)
+
+
+class AtomicParsleyPPError(PostProcessingError):
+    pass
+
+
+class AtomicParsleyPP(PostProcessor):
+    def run(self, info):
+        if not check_executable('AtomicParsley', ['-v']):
+            raise AtomicParsleyPPError('AtomicParsley was not found. Please install.')
+
+        filename = info['filepath']
+        temp_filename = prepend_extension(filename, 'temp')
+        temp_thumbnail = prepend_extension(filename, 'thumb')
+
+        if not info.get('thumbnail'):
+            raise AtomicParsleyPPError('Thumbnail was not found. Nothing to do.')
+
+        compat_urlretrieve(info['thumbnail'], temp_thumbnail)
+
+        cmd = ['AtomicParsley', filename, '--artwork', temp_thumbnail, '-o', temp_filename]
+
+        self._downloader.to_screen('[atomicparsley] Adding thumbnail to "%s"' % filename)
+
+        if self._downloader.params.get('verbose', False):
+            self._downloader.to_screen('[debug] AtomicParsley command line: %s' % shell_quote(cmd))
+
+        p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+        stdout, stderr = p.communicate()
+
+        if p.returncode != 0:
+            msg = stderr.decode('utf-8', 'replace').strip()
+            raise AtomicParsleyPPError(msg)
+
+        os.remove(encodeFilename(filename))
+        os.remove(encodeFilename(temp_thumbnail))
+        os.rename(encodeFilename(temp_filename), encodeFilename(filename))
+
+        return True, info
--- a/youtube_dl/postprocessor/ffmpeg.py
+++ b/youtube_dl/postprocessor/ffmpeg.py
@ -55,8 +55,7 @@ class FFmpegPostProcessor(PostProcessor):

        if self._downloader.params.get('verbose', False):
            self._downloader.to_screen(u'[debug] ffmpeg command line: %s' % shell_quote(cmd))
-        bcmd = [self._downloader.encode(c) for c in cmd]
-        p = subprocess.Popen(bcmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+        p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
        stdout, stderr = p.communicate()
        if p.returncode != 0:
            stderr = stderr.decode('utf-8', 'replace')
@ -467,7 +466,11 @@ class FFmpegMetadataPP(FFmpegPostProcessor):
        filename = info['filepath']
        temp_filename = prepend_extension(filename, 'temp')

-        options = ['-c', 'copy']
+        if info['ext'] == u'm4a':
+            options = ['-vn', '-acodec', 'copy']
+        else:
+            options = ['-c', 'copy']
+
        for (name, value) in metadata.items():
            options.extend(['-metadata', '%s=%s' % (name, value)])

@ -487,6 +490,21 @@ class FFmpegMergerPP(FFmpegPostProcessor):
        return True, info


+class FFmpegAudioFixPP(FFmpegPostProcessor):
+    def run(self, info):
+        filename = info['filepath']
+        temp_filename = prepend_extension(filename, 'temp')
+
+        options = ['-vn', '-acodec', 'copy']
+        self._downloader.to_screen(u'[ffmpeg] Fixing audio file "%s"' % filename)
+        self.run_ffmpeg(filename, temp_filename, options)
+
+        os.remove(encodeFilename(filename))
+        os.rename(encodeFilename(temp_filename), encodeFilename(filename))
+
+        return True, info
+
+
 class FFmpegJoinVideosPP(FFmpegPostProcessor):
    def run(self, information):
        filename = information['filepath']
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@ -2,6 +2,7 @@
 # -*- coding: utf-8 -*-

 import calendar
+import codecs
 import contextlib
 import ctypes
 import datetime
@ -913,25 +914,84 @@ def platform_name():
    return res


-def write_string(s, out=None):
+def _windows_write_string(s, out):
+    """ Returns True if the string was written using special methods,
+    False if it has yet to be written out."""
+    # Adapted from http://stackoverflow.com/a/3259271/35070
+
+    import ctypes
+    import ctypes.wintypes
+
+    WIN_OUTPUT_IDS = {
+        1: -11,
+        2: -12,
+    }
+
+    def ucs2_len(s):
+        return sum((2 if ord(c) > 0xffff else 1) for c in s)
+
+    fileno = out.fileno()
+    if fileno not in WIN_OUTPUT_IDS:
+        return False
+
+    GetStdHandle = ctypes.WINFUNCTYPE(
+        ctypes.wintypes.HANDLE, ctypes.wintypes.DWORD)(
+        ("GetStdHandle", ctypes.windll.kernel32))
+    h = GetStdHandle(WIN_OUTPUT_IDS[fileno])
+
+    WriteConsoleW = ctypes.WINFUNCTYPE(
+        ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE, ctypes.wintypes.LPWSTR,
+        ctypes.wintypes.DWORD, ctypes.POINTER(ctypes.wintypes.DWORD),
+        ctypes.wintypes.LPVOID)(("WriteConsoleW", ctypes.windll.kernel32))
+    written = ctypes.wintypes.DWORD(0)
+
+    GetFileType = ctypes.WINFUNCTYPE(ctypes.wintypes.DWORD, ctypes.wintypes.DWORD)(("GetFileType", ctypes.windll.kernel32))
+    FILE_TYPE_CHAR = 0x0002
+    FILE_TYPE_REMOTE = 0x8000
+    GetConsoleMode = ctypes.WINFUNCTYPE(
+        ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE,
+        ctypes.POINTER(ctypes.wintypes.DWORD))(
+        ("GetConsoleMode", ctypes.windll.kernel32))
+    INVALID_HANDLE_VALUE = ctypes.wintypes.DWORD(-1).value
+
+    def not_a_console(handle):
+        if handle == INVALID_HANDLE_VALUE or handle is None:
+            return True
+        return ((GetFileType(handle) & ~FILE_TYPE_REMOTE) != FILE_TYPE_CHAR
+                or GetConsoleMode(handle, ctypes.byref(ctypes.wintypes.DWORD())) == 0)
+
+    if not_a_console(h):
+        return False
+
+    remaining = ucs2_len(s)
+    while remaining > 0:
+        ret = WriteConsoleW(
+            h, s, min(remaining, 1024), ctypes.byref(written), None)
+        if ret == 0:
+            raise OSError('Failed to write string')
+        remaining -= written.value
+    return True
+
+
+def write_string(s, out=None, encoding=None):
    if out is None:
        out = sys.stderr
    assert type(s) == compat_str

+    if sys.platform == 'win32' and encoding is None and hasattr(out, 'fileno'):
+        if _windows_write_string(s, out):
+            return
+
    if ('b' in getattr(out, 'mode', '') or
            sys.version_info[0] < 3):  # Python 2 lies about mode of sys.stderr
-        s = s.encode(preferredencoding(), 'ignore')
-    try:
+        byt = s.encode(encoding or preferredencoding(), 'ignore')
+        out.write(byt)
+    elif hasattr(out, 'buffer'):
+        enc = encoding or getattr(out, 'encoding', None) or preferredencoding()
+        byt = s.encode(enc, 'ignore')
+        out.buffer.write(byt)
+    else:
        out.write(s)
-    except UnicodeEncodeError:
-        # In Windows shells, this can fail even when the codec is just charmap!?
-        # See https://wiki.python.org/moin/PrintFails#Issue
-        if sys.platform == 'win32' and hasattr(out, 'encoding'):
-            s = s.encode(out.encoding, 'ignore').decode(out.encoding)
-            out.write(s)
-        else:
-            raise
-
    out.flush()


@ -1267,9 +1327,11 @@ class PagedList(object):


 def uppercase_escape(s):
+    unicode_escape = codecs.getdecoder('unicode_escape')
    return re.sub(
        r'\\U[0-9a-fA-F]{8}',
-        lambda m: m.group(0).decode('unicode-escape'), s)
+        lambda m: unicode_escape(m.group(0))[0],
+        s)

 try:
    struct.pack(u'!I', 0)
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@ -1,2 +1,2 @@

-__version__ = '2014.04.04.2'
+__version__ = '2014.04.13'