Merge branch 'master' of https://github.com/rg3/youtube-dl into multipart_videos

2014-04-02 08:40:37 -07:00 · 2014-04-02 08:40:37 -07:00 · 5d0495f7b4
commit 5d0495f7b4
parent 840e7cf4f3 91a76c40c0
30 changed files with 610 additions and 358 deletions
--- a/README.md
+++ b/README.md
@ -65,6 +65,7 @@ which means you can modify it, redistribute it or use it however you like.
                                     configuration in ~/.config/youtube-dl.conf
                                     (%APPDATA%/youtube-dl/config.txt on
                                     Windows)
+    --encoding ENCODING              Force the specified encoding (experimental)

 ## Video Selection:
    --playlist-start NUMBER          playlist video to start at (default is 1)
--- a/test/test_all_urls.py
+++ b/test/test_all_urls.py
@ -144,7 +144,15 @@ class TestAllURLsMatching(unittest.TestCase):
        self.assertMatch('http://video.pbs.org/widget/partnerplayer/980042464/', ['PBS'])

    def test_ComedyCentralShows(self):
-        self.assertMatch('http://thedailyshow.cc.com/extended-interviews/xm3fnq/andrew-napolitano-extended-interview', ['ComedyCentralShows'])
+        self.assertMatch(
+            'http://thedailyshow.cc.com/extended-interviews/xm3fnq/andrew-napolitano-extended-interview',
+            ['ComedyCentralShows'])
+        self.assertMatch(
+            'http://thecolbertreport.cc.com/videos/29w6fx/-realhumanpraise-for-fox-news',
+            ['ComedyCentralShows'])
+        self.assertMatch(
+            'http://thecolbertreport.cc.com/videos/gh6urb/neil-degrasse-tyson-pt--1?xrs=eml_col_031114',
+            ['ComedyCentralShows'])

 if __name__ == '__main__':
    unittest.main()
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@ -8,6 +8,7 @@ import datetime
 import errno
 import io
 import json
+import locale
 import os
 import platform
 import re
@ -160,6 +161,7 @@ class YoutubeDL(object):
    include_ads:       Download ads as well
    default_search:    Prepend this string if an input url is not valid.
                       'auto' for elaborate guessing
+    encoding:          Use this encoding instead of the system-specified.

    The following parameters are not used by YoutubeDL itself, they are used by
    the FileDownloader:
@ -1219,6 +1221,9 @@ class YoutubeDL(object):
    def print_debug_header(self):
        if not self.params.get('verbose'):
            return
+
+        write_string('[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' %
+                 (locale.getpreferredencoding(), sys.getfilesystemencoding(), sys.stdout.encoding, self.get_encoding()))
        write_string('[debug] youtube-dl version ' + __version__ + '\n')
        try:
            sp = subprocess.Popen(
@ -1283,3 +1288,19 @@ class YoutubeDL(object):
        # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
        opener.addheaders = []
        self._opener = opener
+
+    def encode(self, s):
+        if isinstance(s, bytes):
+            return s  # Already encoded
+
+        try:
+            return s.encode(self.get_encoding())
+        except UnicodeEncodeError as err:
+            err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
+            raise
+
+    def get_encoding(self):
+        encoding = self.params.get('encoding')
+        if encoding is None:
+            encoding = preferredencoding()
+        return encoding
--- a/youtube_dl/init.py
+++ b/youtube_dl/init.py
@ -51,6 +51,7 @@ __authors__  = (
    'David Wagner',
    'Juan C. Olivares',
    'Mattias Harrysson',
+    'phaer',
 )

 __license__ = 'Public Domain'
@ -256,13 +257,17 @@ def parseOpts(overrideArguments=None):
    general.add_option(
        '--bidi-workaround', dest='bidi_workaround', action='store_true',
        help=u'Work around terminals that lack bidirectional text support. Requires bidiv or fribidi executable in PATH')
-    general.add_option('--default-search',
-            dest='default_search', metavar='PREFIX',
-            help='Use this prefix for unqualified URLs. For example "gvsearch2:" downloads two videos from google videos for  youtube-dl "large apple". By default (with value "auto") youtube-dl guesses.')
+    general.add_option(
+        '--default-search',
+        dest='default_search', metavar='PREFIX',
+        help='Use this prefix for unqualified URLs. For example "gvsearch2:" downloads two videos from google videos for  youtube-dl "large apple". By default (with value "auto") youtube-dl guesses.')
    general.add_option(
        '--ignore-config',
        action='store_true',
        help='Do not read configuration files. When given in the global configuration file /etc/youtube-dl.conf: do not read the user configuration in ~/.config/youtube-dl.conf (%APPDATA%/youtube-dl/config.txt on Windows)')
+    general.add_option(
+        '--encoding', dest='encoding', metavar='ENCODING',
+        help='Force the specified encoding (experimental)')

    selection.add_option(
        '--playlist-start',
@ -542,8 +547,6 @@ def parseOpts(overrideArguments=None):
            write_string(u'[debug] System config: ' + repr(_hide_login_info(systemConf)) + '\n')
            write_string(u'[debug] User config: ' + repr(_hide_login_info(userConf)) + '\n')
            write_string(u'[debug] Command-line args: ' + repr(_hide_login_info(commandLineConf)) + '\n')
-            write_string(u'[debug] Encodings: locale %r, fs %r, out %r, pref: %r\n' %
-                         (locale.getpreferredencoding(), sys.getfilesystemencoding(), sys.stdout.encoding, preferredencoding()))

    return parser, opts, args

@ -677,7 +680,7 @@ def _real_main(argv=None):
        date = DateRange.day(opts.date)
    else:
        date = DateRange(opts.dateafter, opts.datebefore)
-    if opts.default_search not in ('auto', None) and ':' not in opts.default_search:
+    if opts.default_search not in ('auto', 'auto_warning', None) and ':' not in opts.default_search:
        parser.error(u'--default-search invalid; did you forget a colon (:) at the end?')

    # Do not download videos when there are audio-only formats
@ -789,6 +792,7 @@ def _real_main(argv=None):
        'include_ads': opts.include_ads,
        'default_search': opts.default_search,
        'youtube_include_dash_manifest': opts.youtube_include_dash_manifest,
+        'encoding': opts.encoding,
    }

    with YoutubeDL(ydl_opts) as ydl:
--- a/youtube_dl/extractor/init.py
+++ b/youtube_dl/extractor/init.py
@ -156,6 +156,7 @@ from .mtv import (
    MTVIE,
    MTVIggyIE,
 )
+from .musicplayon import MusicPlayOnIE
 from .muzu import MuzuTVIE
 from .myspace import MySpaceIE
 from .myspass import MySpassIE
@ -285,7 +286,10 @@ from .vk import VKIE
 from .vube import VubeIE
 from .washingtonpost import WashingtonPostIE
 from .wat import WatIE
-from .wdr import WDRIE
+from .wdr import (
+    WDRIE,
+    WDRMausIE,
+)
 from .weibo import WeiboIE
 from .wimp import WimpIE
 from .wistia import WistiaIE
--- a/youtube_dl/extractor/appletrailers.py
+++ b/youtube_dl/extractor/appletrailers.py
@ -6,7 +6,6 @@ import json
 from .common import InfoExtractor
 from ..utils import (
    compat_urlparse,
-    determine_ext,
 )


--- a/youtube_dl/extractor/bloomberg.py
+++ b/youtube_dl/extractor/bloomberg.py
@ -1,22 +1,21 @@
+from __future__ import unicode_literals
+
 import re

 from .common import InfoExtractor
-from .ooyala import OoyalaIE


 class BloombergIE(InfoExtractor):
    _VALID_URL = r'https?://www\.bloomberg\.com/video/(?P<name>.+?)\.html'

    _TEST = {
-        u'url': u'http://www.bloomberg.com/video/shah-s-presentation-on-foreign-exchange-strategies-qurhIVlJSB6hzkVi229d8g.html',
-        u'file': u'12bzhqZTqQHmmlA8I-i0NpzJgcG5NNYX.mp4',
-        u'info_dict': {
-            u'title': u'Shah\'s Presentation on Foreign-Exchange Strategies',
-            u'description': u'md5:abc86e5236f9f0e4866c59ad36736686',
-        },
-        u'params': {
-            # Requires ffmpeg (m3u8 manifest)
-            u'skip_download': True,
+        'url': 'http://www.bloomberg.com/video/shah-s-presentation-on-foreign-exchange-strategies-qurhIVlJSB6hzkVi229d8g.html',
+        'md5': '7bf08858ff7c203c870e8a6190e221e5',
+        'info_dict': {
+            'id': 'qurhIVlJSB6hzkVi229d8g',
+            'ext': 'flv',
+            'title': 'Shah\'s Presentation on Foreign-Exchange Strategies',
+            'description': 'md5:0681e0d30dcdfc6abf34594961d8ea88',
        },
    }

@ -24,7 +23,16 @@ class BloombergIE(InfoExtractor):
        mobj = re.match(self._VALID_URL, url)
        name = mobj.group('name')
        webpage = self._download_webpage(url, name)
-        embed_code = self._search_regex(
-            r'<source src="https?://[^/]+/[^/]+/[^/]+/([^/]+)', webpage,
-            'embed code')
-        return OoyalaIE._build_url_result(embed_code)
+        f4m_url = self._search_regex(
+            r'<source src="(https?://[^"]+\.f4m.*?)"', webpage,
+            'f4m url')
+        title = re.sub(': Video$', '', self._og_search_title(webpage))
+
+        return {
+            'id': name.split('-')[-1],
+            'title': title,
+            'url': f4m_url,
+            'ext': 'flv',
+            'description': self._og_search_description(webpage),
+            'thumbnail': self._og_search_thumbnail(webpage),
+        }
--- a/youtube_dl/extractor/brightcove.py
+++ b/youtube_dl/extractor/brightcove.py
@ -87,7 +87,7 @@ class BrightcoveIE(InfoExtractor):
        object_str = object_str.replace('<--', '<!--')
        object_str = fix_xml_ampersands(object_str)

-        object_doc = xml.etree.ElementTree.fromstring(object_str)
+        object_doc = xml.etree.ElementTree.fromstring(object_str.encode('utf-8'))

        fv_el = find_xpath_attr(object_doc, './param', 'name', 'flashVars')
        if fv_el is not None:
--- a/youtube_dl/extractor/comedycentral.py
+++ b/youtube_dl/extractor/comedycentral.py
@ -43,11 +43,13 @@ class ComedyCentralShowsIE(InfoExtractor):
                          (?P<showname>thedailyshow|thecolbertreport)\.(?:cc\.)?com/
                         (full-episodes/(?P<episode>.*)|
                          (?P<clip>
-                              (the-colbert-report-(videos|collections)/(?P<clipID>[0-9]+)/[^/]*/(?P<cntitle>.*?))
-                              |(watch/(?P<date>[^/]*)/(?P<tdstitle>.*)))|
+                              (?:videos/[^/]+/(?P<videotitle>[^/?#]+))
+                              |(the-colbert-report-(videos|collections)/(?P<clipID>[0-9]+)/[^/]*/(?P<cntitle>.*?))
+                              |(watch/(?P<date>[^/]*)/(?P<tdstitle>.*))
+                          )|
                          (?P<interview>
                              extended-interviews/(?P<interID>[0-9a-z]+)/(?:playlist_tds_extended_)?(?P<interview_title>.*?)(/.*?)?)))
-                     $'''
+                     (?:[?#].*|$)'''
    _TEST = {
        'url': 'http://thedailyshow.cc.com/watch/thu-december-13-2012/kristen-stewart',
        'md5': '4e2f5cb088a83cd8cdb7756132f9739d',
@ -57,7 +59,7 @@ class ComedyCentralShowsIE(InfoExtractor):
            'upload_date': '20121213',
            'description': 'Kristen Stewart learns to let loose in "On the Road."',
            'uploader': 'thedailyshow',
-            'title': 'thedailyshow-kristen-stewart part 1',
+            'title': 'thedailyshow kristen-stewart part 1',
        }
    }

@ -102,7 +104,9 @@ class ComedyCentralShowsIE(InfoExtractor):
            assert mobj is not None

        if mobj.group('clip'):
-            if mobj.group('showname') == 'thedailyshow':
+            if mobj.group('videotitle'):
+                epTitle = mobj.group('videotitle')
+            elif mobj.group('showname') == 'thedailyshow':
                epTitle = mobj.group('tdstitle')
            else:
                epTitle = mobj.group('cntitle')
@ -161,7 +165,7 @@ class ComedyCentralShowsIE(InfoExtractor):
            content = itemEl.find('.//{http://search.yahoo.com/mrss/}content')
            duration = float_or_none(content.attrib.get('duration'))
            mediagen_url = content.attrib['url']
-            guid = itemEl.find('.//guid').text.rpartition(':')[-1]
+            guid = itemEl.find('./guid').text.rpartition(':')[-1]

            cdoc = self._download_xml(
                mediagen_url, epTitle,
--- a/youtube_dl/extractor/discovery.py
+++ b/youtube_dl/extractor/discovery.py
@ -10,9 +10,10 @@ class DiscoveryIE(InfoExtractor):
    _VALID_URL = r'http://dsc\.discovery\.com\/[a-zA-Z0-9\-]*/[a-zA-Z0-9\-]*/videos/(?P<id>[a-zA-Z0-9\-]*)(.htm)?'
    _TEST = {
        'url': 'http://dsc.discovery.com/tv-shows/mythbusters/videos/mission-impossible-outtakes.htm',
-        'file': '614784.mp4',
        'md5': 'e12614f9ee303a6ccef415cb0793eba2',
        'info_dict': {
+            'id': '614784',
+            'ext': 'mp4',
            'title': 'MythBusters: Mission Impossible Outtakes',
            'description': ('Watch Jamie Hyneman and Adam Savage practice being'
                ' each other -- to the point of confusing Jamie\'s dog -- and '
@ -34,7 +35,7 @@ class DiscoveryIE(InfoExtractor):
        formats = []
        for f in info['mp4']:
            formats.append(
-                {'url': f['src'], r'ext': r'mp4', 'tbr': int(f['bitrate'][:-1])})
+                {'url': f['src'], 'ext': 'mp4', 'tbr': int(f['bitrate'][:-1])})

        return {
            'id': info['contentId'],
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@ -82,6 +82,17 @@ class GenericIE(InfoExtractor):
            },
            'add_ie': ['Brightcove'],
        },
+        {
+            'url': 'http://www.championat.com/video/football/v/87/87499.html',
+            'md5': 'fb973ecf6e4a78a67453647444222983',
+            'info_dict': {
+                'id': '3414141473001',
+                'ext': 'mp4',
+                'title': 'Видео. Удаление Дзагоева (ЦСКА)',
+                'description': 'Онлайн-трансляция матча ЦСКА - "Волга"',
+                'uploader': 'Championat',
+            },
+        },
        # Direct link to a video
        {
            'url': 'http://media.w3.org/2010/05/sintel/trailer.mp4',
@ -316,13 +327,16 @@ class GenericIE(InfoExtractor):
        if not parsed_url.scheme:
            default_search = self._downloader.params.get('default_search')
            if default_search is None:
-                default_search = 'auto'
+                default_search = 'auto_warning'

-            if default_search == 'auto':
+            if default_search in ('auto', 'auto_warning'):
                if '/' in url:
                    self._downloader.report_warning('The url doesn\'t specify the protocol, trying with http')
                    return self.url_result('http://' + url)
                else:
+                    if default_search == 'auto_warning':
+                        self._downloader.report_warning(
+                            'Falling back to youtube search for  %s . Set --default-search to "auto" to suppress this warning.' % url)
                    return self.url_result('ytsearch:' + url)
            else:
                assert ':' in default_search
--- a/youtube_dl/extractor/huffpost.py
+++ b/youtube_dl/extractor/huffpost.py
@ -21,9 +21,10 @@ class HuffPostIE(InfoExtractor):

    _TEST = {
        'url': 'http://live.huffingtonpost.com/r/segment/legalese-it/52dd3e4b02a7602131000677',
-        'file': '52dd3e4b02a7602131000677.mp4',
        'md5': '55f5e8981c1c80a64706a44b74833de8',
        'info_dict': {
+            'id': '52dd3e4b02a7602131000677',
+            'ext': 'mp4',
            'title': 'Legalese It! with @MikeSacksHP',
            'description': 'This week on Legalese It, Mike talks to David Bosco about his new book on the ICC, "Rough Justice," he also discusses the Virginia AG\'s historic stance on gay marriage, the execution of Edgar Tamayo, the ICC\'s delay of Kenya\'s President and more.  ',
            'duration': 1549,
--- a/youtube_dl/extractor/ign.py
+++ b/youtube_dl/extractor/ign.py
@ -1,10 +1,8 @@
+from __future__ import unicode_literals
+
 import re
-import json

 from .common import InfoExtractor
-from ..utils import (
-    determine_ext,
-)


 class IGNIE(InfoExtractor):
@ -14,52 +12,57 @@ class IGNIE(InfoExtractor):
    """

    _VALID_URL = r'https?://.+?\.ign\.com/(?P<type>videos|show_videos|articles|(?:[^/]*/feature))(/.+)?/(?P<name_or_id>.+)'
-    IE_NAME = u'ign.com'
+    IE_NAME = 'ign.com'

    _CONFIG_URL_TEMPLATE = 'http://www.ign.com/videos/configs/id/%s.config'
-    _DESCRIPTION_RE = [r'<span class="page-object-description">(.+?)</span>',
-                       r'id="my_show_video">.*?<p>(.*?)</p>',
-                       ]
+    _DESCRIPTION_RE = [
+        r'<span class="page-object-description">(.+?)</span>',
+        r'id="my_show_video">.*?<p>(.*?)</p>',
+    ]

    _TESTS = [
        {
-            u'url': u'http://www.ign.com/videos/2013/06/05/the-last-of-us-review',
-            u'file': u'8f862beef863986b2785559b9e1aa599.mp4',
-            u'md5': u'eac8bdc1890980122c3b66f14bdd02e9',
-            u'info_dict': {
-                u'title': u'The Last of Us Review',
-                u'description': u'md5:c8946d4260a4d43a00d5ae8ed998870c',
+            'url': 'http://www.ign.com/videos/2013/06/05/the-last-of-us-review',
+            'md5': 'eac8bdc1890980122c3b66f14bdd02e9',
+            'info_dict': {
+                'id': '8f862beef863986b2785559b9e1aa599',
+                'ext': 'mp4',
+                'title': 'The Last of Us Review',
+                'description': 'md5:c8946d4260a4d43a00d5ae8ed998870c',
            }
        },
        {
-            u'url': u'http://me.ign.com/en/feature/15775/100-little-things-in-gta-5-that-will-blow-your-mind',
-            u'playlist': [
+            'url': 'http://me.ign.com/en/feature/15775/100-little-things-in-gta-5-that-will-blow-your-mind',
+            'playlist': [
                {
-                    u'file': u'5ebbd138523268b93c9141af17bec937.mp4',
-                    u'info_dict': {
-                        u'title': u'GTA 5 Video Review',
-                        u'description': u'Rockstar drops the mic on this generation of games. Watch our review of the masterly Grand Theft Auto V.',
+                    'info_dict': {
+                        'id': '5ebbd138523268b93c9141af17bec937',
+                        'ext': 'mp4',
+                        'title': 'GTA 5 Video Review',
+                        'description': 'Rockstar drops the mic on this generation of games. Watch our review of the masterly Grand Theft Auto V.',
                    },
                },
                {
-                    u'file': u'638672ee848ae4ff108df2a296418ee2.mp4',
-                    u'info_dict': {
-                        u'title': u'26 Twisted Moments from GTA 5 in Slow Motion',
-                        u'description': u'The twisted beauty of GTA 5 in stunning slow motion.',
+                    'info_dict': {
+                        'id': '638672ee848ae4ff108df2a296418ee2',
+                        'ext': 'mp4',
+                        'title': '26 Twisted Moments from GTA 5 in Slow Motion',
+                        'description': 'The twisted beauty of GTA 5 in stunning slow motion.',
                    },
                },
            ],
-            u'params': {
-                u'skip_download': True,
+            'params': {
+                'skip_download': True,
            },
        },
    ]

    def _find_video_id(self, webpage):
-        res_id = [r'data-video-id="(.+?)"',
-                  r'<object id="vid_(.+?)"',
-                  r'<meta name="og:image" content=".*/(.+?)-(.+?)/.+.jpg"',
-                  ]
+        res_id = [
+            r'data-video-id="(.+?)"',
+            r'<object id="vid_(.+?)"',
+            r'<meta name="og:image" content=".*/(.+?)-(.+?)/.+.jpg"',
+        ]
        return self._search_regex(res_id, webpage, 'video id')

    def _real_extract(self, url):
@ -68,7 +71,7 @@ class IGNIE(InfoExtractor):
        page_type = mobj.group('type')
        webpage = self._download_webpage(url, name_or_id)
        if page_type == 'articles':
-            video_url = self._search_regex(r'var videoUrl = "(.+?)"', webpage, u'video url')
+            video_url = self._search_regex(r'var videoUrl = "(.+?)"', webpage, 'video url')
            return self.url_result(video_url, ie='IGN')
        elif page_type != 'video':
            multiple_urls = re.findall(
@ -80,41 +83,37 @@ class IGNIE(InfoExtractor):
        video_id = self._find_video_id(webpage)
        result = self._get_video_info(video_id)
        description = self._html_search_regex(self._DESCRIPTION_RE,
-                                              webpage, 'video description',
-                                              flags=re.DOTALL)
+            webpage, 'video description', flags=re.DOTALL)
        result['description'] = description
        return result

    def _get_video_info(self, video_id):
        config_url = self._CONFIG_URL_TEMPLATE % video_id
-        config = json.loads(self._download_webpage(config_url, video_id,
-                            u'Downloading video info'))
+        config = self._download_json(config_url, video_id)
        media = config['playlist']['media']
-        video_url = media['url']

-        return {'id': media['metadata']['videoId'],
-                'url': video_url,
-                'ext': determine_ext(video_url),
-                'title': media['metadata']['title'],
-                'thumbnail': media['poster'][0]['url'].replace('{size}', 'grande'),
-                }
+        return {
+            'id': media['metadata']['videoId'],
+            'url': media['url'],
+            'title': media['metadata']['title'],
+            'thumbnail': media['poster'][0]['url'].replace('{size}', 'grande'),
+        }


 class OneUPIE(IGNIE):
-    """Extractor for 1up.com, it uses the ign videos system."""
-
    _VALID_URL = r'https?://gamevideos\.1up\.com/(?P<type>video)/id/(?P<name_or_id>.+)'
    IE_NAME = '1up.com'

    _DESCRIPTION_RE = r'<div id="vid_summary">(.+?)</div>'

    _TEST = {
-        u'url': u'http://gamevideos.1up.com/video/id/34976',
-        u'file': u'34976.mp4',
-        u'md5': u'68a54ce4ebc772e4b71e3123d413163d',
-        u'info_dict': {
-            u'title': u'Sniper Elite V2 - Trailer',
-            u'description': u'md5:5d289b722f5a6d940ca3136e9dae89cf',
+        'url': 'http://gamevideos.1up.com/video/id/34976',
+        'md5': '68a54ce4ebc772e4b71e3123d413163d',
+        'info_dict': {
+            'id': '34976',
+            'ext': 'mp4',
+            'title': 'Sniper Elite V2 - Trailer',
+            'description': 'md5:5d289b722f5a6d940ca3136e9dae89cf',
        }
    }

@ -123,7 +122,6 @@ class OneUPIE(IGNIE):

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
-        id = mobj.group('name_or_id')
        result = super(OneUPIE, self)._real_extract(url)
-        result['id'] = id
+        result['id'] = mobj.group('name_or_id')
        return result
--- a/youtube_dl/extractor/kickstarter.py
+++ b/youtube_dl/extractor/kickstarter.py
@ -1,37 +1,39 @@
+# encoding: utf-8
+from __future__ import unicode_literals
+
 import re

 from .common import InfoExtractor


 class KickStarterIE(InfoExtractor):
-    _VALID_URL = r'https?://www\.kickstarter\.com/projects/(?P<id>\d*)/.*'
+    _VALID_URL = r'https?://www\.kickstarter\.com/projects/(?P<id>[^/]*)/.*'
    _TEST = {
-        u"url": u"https://www.kickstarter.com/projects/1404461844/intersection-the-story-of-josh-grant?ref=home_location",
-        u"file": u"1404461844.mp4",
-        u"md5": u"c81addca81327ffa66c642b5d8b08cab",
-        u"info_dict": {
-            u"title": u"Intersection: The Story of Josh Grant by Kyle Cowling",
+        'url': 'https://www.kickstarter.com/projects/1404461844/intersection-the-story-of-josh-grant?ref=home_location',
+        'md5': 'c81addca81327ffa66c642b5d8b08cab',
+        'info_dict': {
+            'id': '1404461844',
+            'ext': 'mp4',
+            'title': 'Intersection: The Story of Josh Grant by Kyle Cowling',
+            'description': 'A unique motocross documentary that examines the '
+                'life and mind of one of sports most elite athletes: Josh Grant.',
        },
    }

    def _real_extract(self, url):
        m = re.match(self._VALID_URL, url)
        video_id = m.group('id')
-        webpage_src = self._download_webpage(url, video_id)
+        webpage = self._download_webpage(url, video_id)

-        video_url = self._search_regex(r'data-video="(.*?)">',
-            webpage_src, u'video URL')
-        if 'mp4' in video_url:
-            ext = 'mp4'
-        else:
-            ext = 'flv'
-        video_title = self._html_search_regex(r"<title>(.*?)</title>",
-            webpage_src, u'title').rpartition(u'\u2014 Kickstarter')[0].strip()
+        video_url = self._search_regex(r'data-video-url="(.*?)"',
+            webpage, 'video URL')
+        video_title = self._html_search_regex(r'<title>(.*?)</title>',
+            webpage, 'title').rpartition('— Kickstarter')[0].strip()

-        results = [{
-                    'id': video_id,
-                    'url': video_url,
-                    'title': video_title,
-                    'ext': ext,
-                    }]
-        return results
+        return {
+            'id': video_id,
+            'url': video_url,
+            'title': video_title,
+            'description': self._og_search_description(webpage),
+            'thumbnail': self._og_search_thumbnail(webpage),
+        }
--- a/youtube_dl/extractor/metacritic.py
+++ b/youtube_dl/extractor/metacritic.py
@ -13,8 +13,9 @@ class MetacriticIE(InfoExtractor):

    _TEST = {
        'url': 'http://www.metacritic.com/game/playstation-4/infamous-second-son/trailers/3698222',
-        'file': '3698222.mp4',
        'info_dict': {
+            'id': '3698222',
+            'ext': 'mp4',
            'title': 'inFamous: Second Son - inSide Sucker Punch: Smoke & Mirrors',
            'description': 'Take a peak behind-the-scenes to see how Sucker Punch brings smoke into the universe of inFAMOUS Second Son on the PS4.',
            'duration': 221,
--- a/youtube_dl/extractor/musicplayon.py
+++ b/youtube_dl/extractor/musicplayon.py
@ -0,0 +1,75 @@
+# encoding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import int_or_none
+
+
+class MusicPlayOnIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:.+?\.)?musicplayon\.com/play(?:-touch)?\?(?:v|pl=100&play)=(?P<id>\d+)'
+
+    _TEST = {
+        'url': 'http://en.musicplayon.com/play?v=433377',
+        'info_dict': {
+            'id': '433377',
+            'ext': 'mp4',
+            'title': 'Rick Ross - Interview On Chelsea Lately (2014)',
+            'description': 'Rick Ross Interview On Chelsea Lately',
+            'duration': 342,
+            'uploader': 'ultrafish',
+        },
+        'params': {
+            # m3u8 download
+            'skip_download': True,
+        },
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+
+        page = self._download_webpage(url, video_id)
+
+        title = self._og_search_title(page)
+        description = self._og_search_description(page)
+        thumbnail = self._og_search_thumbnail(page)
+        duration = self._html_search_meta('video:duration', page, 'duration', fatal=False)
+        view_count = self._og_search_property('count', page, fatal=False)
+        uploader = self._html_search_regex(
+            r'<div>by&nbsp;<a href="[^"]+" class="purple">([^<]+)</a></div>', page, 'uploader', fatal=False)
+
+        formats = [
+            {
+                'url': 'http://media0-eu-nl.musicplayon.com/stream-mobile?id=%s&type=.mp4' % video_id,
+                'ext': 'mp4',
+            }
+        ]
+
+        manifest = self._download_webpage(
+            'http://en.musicplayon.com/manifest.m3u8?v=%s' % video_id, video_id, 'Downloading manifest')
+
+        for entry in manifest.split('#')[1:]:
+            if entry.startswith('EXT-X-STREAM-INF:'):
+                meta, url, _ = entry.split('\n')
+                params = dict(param.split('=') for param in meta.split(',')[1:])
+                formats.append({
+                    'url': url,
+                    'ext': 'mp4',
+                    'tbr': int(params['BANDWIDTH']),
+                    'width': int(params['RESOLUTION'].split('x')[1]),
+                    'height': int(params['RESOLUTION'].split('x')[-1]),
+                    'format_note': params['NAME'].replace('"', '').strip(),
+                })
+
+        return {
+            'id': video_id,
+            'title': title,
+            'description': description,
+            'thumbnail': thumbnail,
+            'uploader': uploader,
+            'duration': int_or_none(duration),
+            'view_count': int_or_none(view_count),
+            'formats': formats,
+        }
--- a/youtube_dl/extractor/nba.py
+++ b/youtube_dl/extractor/nba.py
@ -6,12 +6,13 @@ from .common import InfoExtractor


 class NBAIE(InfoExtractor):
-    _VALID_URL = r'^(?:https?://)?(?:watch\.|www\.)?nba\.com/(?:nba/)?video(/[^?]*?)(?:/index\.html)?(?:\?.*)?$'
+    _VALID_URL = r'https?://(?:watch\.|www\.)?nba\.com/(?:nba/)?video(?P<id>/[^?]*?)(?:/index\.html)?(?:\?.*)?$'
    _TEST = {
        'url': 'http://www.nba.com/video/games/nets/2012/12/04/0021200253-okc-bkn-recap.nba/index.html',
-        'file': u'0021200253-okc-bkn-recap.nba.mp4',
        'md5': u'c0edcfc37607344e2ff8f13c378c88a4',
        'info_dict': {
+            'id': '0021200253-okc-bkn-recap.nba',
+            'ext': 'mp4',
            'description': 'Kevin Durant scores 32 points and dishes out six assists as the Thunder beat the Nets in Brooklyn.',
            'title': 'Thunder vs. Nets',
        },
@ -19,7 +20,7 @@ class NBAIE(InfoExtractor):

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group(1)
+        video_id = mobj.group('id')

        webpage = self._download_webpage(url, video_id)

@ -33,7 +34,6 @@ class NBAIE(InfoExtractor):
        return {
            'id': shortened_video_id,
            'url': video_url,
-            'ext': 'mp4',
            'title': title,
            'description': description,
        }
--- a/youtube_dl/extractor/ntv.py
+++ b/youtube_dl/extractor/ntv.py
@ -5,7 +5,7 @@ import re

 from .common import InfoExtractor
 from ..utils import (
-    RegexNotFoundError,
+    ExtractorError,
    unescapeHTML
 )

@ -98,16 +98,15 @@ class NTVIE(InfoExtractor):

        page = self._download_webpage(url, video_id, 'Downloading page')

-        def extract(patterns, name, page, fatal=False):
-            for pattern in patterns:
-                mobj = re.search(pattern, page)
-                if mobj:
-                    return mobj.group(1)
-            if fatal:
-                raise RegexNotFoundError(u'Unable to extract %s' % name)
-            return None
+        for pattern in self._VIDEO_ID_REGEXES:
+            mobj = re.search(pattern, page)
+            if mobj:
+                break

-        video_id = extract(self._VIDEO_ID_REGEXES, 'video id', page, fatal=True)
+        if not mobj:
+            raise ExtractorError('No media links available for %s' % video_id)
+
+        video_id = mobj.group(1)

        player = self._download_xml('http://www.ntv.ru/vi%s/' % video_id, video_id, 'Downloading video XML')
        title = unescapeHTML(player.find('./data/title').text)
--- a/youtube_dl/extractor/pyvideo.py
+++ b/youtube_dl/extractor/pyvideo.py
@ -1,3 +1,5 @@
+from __future__ import unicode_literals
+
 import re
 import os

@ -5,45 +7,50 @@ from .common import InfoExtractor


 class PyvideoIE(InfoExtractor):
-    _VALID_URL = r'(?:http://)?(?:www\.)?pyvideo\.org/video/(?P<id>\d+)/(.*)'
-    _TESTS = [{
-        u'url': u'http://pyvideo.org/video/1737/become-a-logging-expert-in-30-minutes',
-        u'file': u'24_4WWkSmNo.mp4',
-        u'md5': u'de317418c8bc76b1fd8633e4f32acbc6',
-        u'info_dict': {
-            u"title": u"Become a logging expert in 30 minutes",
-            u"description": u"md5:9665350d466c67fb5b1598de379021f7",
-            u"upload_date": u"20130320",
-            u"uploader": u"NextDayVideo",
-            u"uploader_id": u"NextDayVideo",
+    _VALID_URL = r'http://(?:www\.)?pyvideo\.org/video/(?P<id>\d+)/(.*)'
+
+    _TESTS = [
+        {
+            'url': 'http://pyvideo.org/video/1737/become-a-logging-expert-in-30-minutes',
+            'md5': 'de317418c8bc76b1fd8633e4f32acbc6',
+            'info_dict': {
+                'id': '24_4WWkSmNo',
+                'ext': 'mp4',
+                'title': 'Become a logging expert in 30 minutes',
+                'description': 'md5:9665350d466c67fb5b1598de379021f7',
+                'upload_date': '20130320',
+                'uploader': 'NextDayVideo',
+                'uploader_id': 'NextDayVideo',
+            },
+            'add_ie': ['Youtube'],
        },
-        u'add_ie': ['Youtube'],
-    },
-    {
-        u'url': u'http://pyvideo.org/video/2542/gloriajw-spotifywitherikbernhardsson182m4v',
-        u'md5': u'5fe1c7e0a8aa5570330784c847ff6d12',
-        u'info_dict': {
-            u'id': u'2542',
-            u'ext': u'm4v',
-            u'title': u'Gloriajw-SpotifyWithErikBernhardsson182',
+        {
+            'url': 'http://pyvideo.org/video/2542/gloriajw-spotifywitherikbernhardsson182m4v',
+            'md5': '5fe1c7e0a8aa5570330784c847ff6d12',
+            'info_dict': {
+                'id': '2542',
+                'ext': 'm4v',
+                'title': 'Gloriajw-SpotifyWithErikBernhardsson182',
+            },
        },
-    },
    ]

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group('id')
-        webpage = self._download_webpage(url, video_id)
-        m_youtube = re.search(r'(https?://www\.youtube\.com/watch\?v=.*)', webpage)

+        webpage = self._download_webpage(url, video_id)
+
+        m_youtube = re.search(r'(https?://www\.youtube\.com/watch\?v=.*)', webpage)
        if m_youtube is not None:
            return self.url_result(m_youtube.group(1), 'Youtube')

-        title = self._html_search_regex(r'<div class="section">.*?<h3>([^>]+?)</h3>',
-            webpage, u'title', flags=re.DOTALL)
-        video_url = self._search_regex([r'<source src="(.*?)"',
-            r'<dt>Download</dt>.*?<a href="(.+?)"'],
-            webpage, u'video url', flags=re.DOTALL)
+        title = self._html_search_regex(
+            r'<div class="section">.*?<h3>([^>]+?)</h3>', webpage, 'title', flags=re.DOTALL)
+        video_url = self._search_regex(
+            [r'<source src="(.*?)"', r'<dt>Download</dt>.*?<a href="(.+?)"'],
+            webpage, 'video url', flags=re.DOTALL)
+
        return {
            'id': video_id,
            'title': os.path.splitext(title)[0],
--- a/youtube_dl/extractor/roxwel.py
+++ b/youtube_dl/extractor/roxwel.py
@ -1,5 +1,6 @@
+from __future__ import unicode_literals
+
 import re
-import json

 from .common import InfoExtractor
 from ..utils import unified_strdate, determine_ext
@ -9,41 +10,44 @@ class RoxwelIE(InfoExtractor):
    _VALID_URL = r'https?://www\.roxwel\.com/player/(?P<filename>.+?)(\.|\?|$)'

    _TEST = {
-        u'url': u'http://www.roxwel.com/player/passionpittakeawalklive.html',
-        u'file': u'passionpittakeawalklive.flv',
-        u'md5': u'd9dea8360a1e7d485d2206db7fe13035',
-        u'info_dict': {
-            u'title': u'Take A Walk (live)',
-            u'uploader': u'Passion Pit',
-            u'description': u'Passion Pit performs "Take A Walk\" live at The Backyard in Austin, Texas. ',
+        'url': 'http://www.roxwel.com/player/passionpittakeawalklive.html',
+        'info_dict': {
+            'id': 'passionpittakeawalklive',
+            'ext': 'flv',
+            'title': 'Take A Walk (live)',
+            'uploader': 'Passion Pit',
+            'uploader_id': 'passionpit',
+            'upload_date': '20120928',
+            'description': 'Passion Pit performs "Take A Walk\" live at The Backyard in Austin, Texas. ',
        },
-        u'skip': u'Requires rtmpdump',
+        'params': {
+            # rtmp download
+            'skip_download': True,
+        }
    }

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        filename = mobj.group('filename')
        info_url = 'http://www.roxwel.com/api/videos/%s' % filename
-        info_page = self._download_webpage(info_url, filename,
-                                           u'Downloading video info')
+        info = self._download_json(info_url, filename)

-        self.report_extraction(filename)
-        info = json.loads(info_page)
        rtmp_rates = sorted([int(r.replace('flv_', '')) for r in info['media_rates'] if r.startswith('flv_')])
        best_rate = rtmp_rates[-1]
        url_page_url = 'http://roxwel.com/pl_one_time.php?filename=%s&quality=%s' % (filename, best_rate)
-        rtmp_url = self._download_webpage(url_page_url, filename, u'Downloading video url')
+        rtmp_url = self._download_webpage(url_page_url, filename, 'Downloading video url')
        ext = determine_ext(rtmp_url)
        if ext == 'f4v':
            rtmp_url = rtmp_url.replace(filename, 'mp4:%s' % filename)

-        return {'id': filename,
-                'title': info['title'],
-                'url': rtmp_url,
-                'ext': 'flv',
-                'description': info['description'],
-                'thumbnail': info.get('player_image_url') or info.get('image_url_large'),
-                'uploader': info['artist'],
-                'uploader_id': info['artistname'],
-                'upload_date': unified_strdate(info['dbdate']),
-                }
+        return {
+            'id': filename,
+            'title': info['title'],
+            'url': rtmp_url,
+            'ext': 'flv',
+            'description': info['description'],
+            'thumbnail': info.get('player_image_url') or info.get('image_url_large'),
+            'uploader': info['artist'],
+            'uploader_id': info['artistname'],
+            'upload_date': unified_strdate(info['dbdate']),
+        }
--- a/youtube_dl/extractor/rutube.py
+++ b/youtube_dl/extractor/rutube.py
@ -2,7 +2,6 @@
 from __future__ import unicode_literals

 import re
-import json
 import itertools

 from .common import InfoExtractor
@ -20,8 +19,9 @@ class RutubeIE(InfoExtractor):

    _TEST = {
        'url': 'http://rutube.ru/video/3eac3b4561676c17df9132a9a1e62e3e/',
-        'file': '3eac3b4561676c17df9132a9a1e62e3e.mp4',
        'info_dict': {
+            'id': '3eac3b4561676c17df9132a9a1e62e3e',
+            'ext': 'mp4',
            'title': 'Раненный кенгуру забежал в аптеку',
            'description': 'http://www.ntdtv.ru ',
            'duration': 80,
@ -39,13 +39,13 @@ class RutubeIE(InfoExtractor):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group('id')

-        api_response = self._download_webpage('http://rutube.ru/api/video/%s/?format=json' % video_id,
-                                              video_id, 'Downloading video JSON')
-        video = json.loads(api_response)
+        video = self._download_json(
+            'http://rutube.ru/api/video/%s/?format=json' % video_id,
+            video_id, 'Downloading video JSON')

-        api_response = self._download_webpage('http://rutube.ru/api/play/trackinfo/%s/?format=json' % video_id,
-                                              video_id, 'Downloading trackinfo JSON')
-        trackinfo = json.loads(api_response)
+        trackinfo = self._download_json(
+            'http://rutube.ru/api/play/trackinfo/%s/?format=json' % video_id,
+            video_id, 'Downloading trackinfo JSON')

        # Some videos don't have the author field
        author = trackinfo.get('author') or {}
@ -79,10 +79,9 @@ class RutubeChannelIE(InfoExtractor):
    def _extract_videos(self, channel_id, channel_title=None):
        entries = []
        for pagenum in itertools.count(1):
-            api_response = self._download_webpage(
+            page = self._download_json(
                self._PAGE_TEMPLATE % (channel_id, pagenum),
                channel_id, 'Downloading page %s' % pagenum)
-            page = json.loads(api_response)
            results = page['results']
            if not results:
                break
@ -108,10 +107,9 @@ class RutubeMovieIE(RutubeChannelIE):
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        movie_id = mobj.group('id')
-        api_response = self._download_webpage(
+        movie = self._download_json(
            self._MOVIE_TEMPLATE % movie_id, movie_id,
            'Downloading movie JSON')
-        movie = json.loads(api_response)
        movie_name = movie['name']
        return self._extract_videos(movie_id, movie_name)

--- a/youtube_dl/extractor/tf1.py
+++ b/youtube_dl/extractor/tf1.py
@ -1,33 +1,37 @@
 # coding: utf-8
+from __future__ import unicode_literals

-import json
 import re

 from .common import InfoExtractor

+
 class TF1IE(InfoExtractor):
    """TF1 uses the wat.tv player."""
-    _VALID_URL = r'http://videos\.tf1\.fr/.*-(.*?)\.html'
+    _VALID_URL = r'http://videos\.tf1\.fr/.*-(?P<id>.*?)\.html'
    _TEST = {
-        u'url': u'http://videos.tf1.fr/auto-moto/citroen-grand-c4-picasso-2013-presentation-officielle-8062060.html',
-        u'file': u'10635995.mp4',
-        u'md5': u'2e378cc28b9957607d5e88f274e637d8',
-        u'info_dict': {
-            u'title': u'Citroën Grand C4 Picasso 2013 : présentation officielle',
-            u'description': u'Vidéo officielle du nouveau Citroën Grand C4 Picasso, lancé à l\'automne 2013.',
+        'url': 'http://videos.tf1.fr/auto-moto/citroen-grand-c4-picasso-2013-presentation-officielle-8062060.html',
+        'info_dict': {
+            'id': '10635995',
+            'ext': 'mp4',
+            'title': 'Citroën Grand C4 Picasso 2013 : présentation officielle',
+            'description': 'Vidéo officielle du nouveau Citroën Grand C4 Picasso, lancé à l\'automne 2013.',
+        },
+        'params': {
+            # Sometimes wat serves the whole file with the --test option
+            'skip_download': True,
        },
-        u'skip': u'Sometimes wat serves the whole file with the --test option',
    }

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
-        id = mobj.group(1)
-        webpage = self._download_webpage(url, id)
-        embed_url = self._html_search_regex(r'"(https://www.wat.tv/embedframe/.*?)"',
-                                webpage, 'embed url')
-        embed_page = self._download_webpage(embed_url, id, u'Downloading embed player page')
+        video_id = mobj.group('id')
+        webpage = self._download_webpage(url, video_id)
+        embed_url = self._html_search_regex(
+            r'"(https://www.wat.tv/embedframe/.*?)"', webpage, 'embed url')
+        embed_page = self._download_webpage(embed_url, video_id,
+            'Downloading embed player page')
        wat_id = self._search_regex(r'UVID=(.*?)&', embed_page, 'wat id')
-        wat_info = self._download_webpage('http://www.wat.tv/interface/contentv3/%s' % wat_id, id, u'Downloading Wat info')
-        wat_info = json.loads(wat_info)['media']
-        wat_url = wat_info['url']
-        return self.url_result(wat_url, 'Wat')
+        wat_info = self._download_json(
+            'http://www.wat.tv/interface/contentv3/%s' % wat_id, video_id)
+        return self.url_result(wat_info['media']['url'], 'Wat')
--- a/youtube_dl/extractor/vk.py
+++ b/youtube_dl/extractor/vk.py
@ -16,7 +16,7 @@ from ..utils import (

 class VKIE(InfoExtractor):
    IE_NAME = 'vk.com'
-    _VALID_URL = r'https?://vk\.com/(?:video_ext\.php\?.*?\boid=(?P<oid>\d+).*?\bid=(?P<id>\d+)|(?:videos.*?\?.*?z=)?video(?P<videoid>.*?)(?:\?|%2F|$))'
+    _VALID_URL = r'https?://vk\.com/(?:video_ext\.php\?.*?\boid=(?P<oid>-?\d+).*?\bid=(?P<id>\d+)|(?:videos.*?\?.*?z=)?video(?P<videoid>.*?)(?:\?|%2F|$))'
    _NETRC_MACHINE = 'vk'

    _TESTS = [
--- a/youtube_dl/extractor/wat.py
+++ b/youtube_dl/extractor/wat.py
@ -1,37 +1,37 @@
 # coding: utf-8
+from __future__ import unicode_literals

-import json
 import re

 from .common import InfoExtractor
-
 from ..utils import (
    unified_strdate,
 )


 class WatIE(InfoExtractor):
-    _VALID_URL=r'http://www\.wat\.tv/.*-(?P<shortID>.*?)_.*?\.html'
+    _VALID_URL = r'http://www\.wat\.tv/.*-(?P<shortID>.*?)_.*?\.html'
    IE_NAME = 'wat.tv'
    _TEST = {
-        u'url': u'http://www.wat.tv/video/world-war-philadelphia-vost-6bv55_2fjr7_.html',
-        u'file': u'10631273.mp4',
-        u'md5': u'd8b2231e1e333acd12aad94b80937e19',
-        u'info_dict': {
-            u'title': u'World War Z - Philadelphia VOST',
-            u'description': u'La menace est partout. Que se passe-t-il à Philadelphia ?\r\nWORLD WAR Z, avec Brad Pitt, au cinéma le 3 juillet.\r\nhttp://www.worldwarz.fr',
+        'url': 'http://www.wat.tv/video/world-war-philadelphia-vost-6bv55_2fjr7_.html',
+        'info_dict': {
+            'id': '10631273',
+            'ext': 'mp4',
+            'title': 'World War Z - Philadelphia VOST',
+            'description': 'La menace est partout. Que se passe-t-il à Philadelphia ?\r\nWORLD WAR Z, avec Brad Pitt, au cinéma le 3 juillet.\r\nhttp://www.worldwarz.fr',
+        },
+        'params': {
+            # Sometimes wat serves the whole file with the --test option
+            'skip_download': True,
        },
-        u'skip': u'Sometimes wat serves the whole file with the --test option',
    }

    def download_video_info(self, real_id):
        # 'contentv4' is used in the website, but it also returns the related
        # videos, we don't need them
-        info = self._download_webpage('http://www.wat.tv/interface/contentv3/' + real_id, real_id, 'Downloading video info')
-        info = json.loads(info)
+        info = self._download_json('http://www.wat.tv/interface/contentv3/' + real_id, real_id)
        return info['media']

-
    def _real_extract(self, url):
        def real_id_for_chapter(chapter):
            return chapter['tc_start'].split('-')[0]
@ -56,17 +56,17 @@ class WatIE(InfoExtractor):
            entries = [self.url_result(chapter_url) for chapter_url in chapter_urls]
            return self.playlist_result(entries, real_id, video_info['title'])

+        upload_date = None
+        if 'date_diffusion' in first_chapter:
+            upload_date = unified_strdate(first_chapter['date_diffusion'])
        # Otherwise we can continue and extract just one part, we have to use
        # the short id for getting the video url
-        info = {'id': real_id,
-                'url': 'http://wat.tv/get/android5/%s.mp4' % real_id,
-                'ext': 'mp4',
-                'title': first_chapter['title'],
-                'thumbnail': first_chapter['preview'],
-                'description': first_chapter['description'],
-                'view_count': video_info['views'],
-                }
-        if 'date_diffusion' in first_chapter:
-            info['upload_date'] = unified_strdate(first_chapter['date_diffusion'])
-
-        return info
+        return {
+            'id': real_id,
+            'url': 'http://wat.tv/get/android5/%s.mp4' % real_id,
+            'title': first_chapter['title'],
+            'thumbnail': first_chapter['preview'],
+            'description': first_chapter['description'],
+            'view_count': video_info['views'],
+            'upload_date': upload_date,
+        }
--- a/youtube_dl/extractor/wdr.py
+++ b/youtube_dl/extractor/wdr.py
@ -4,9 +4,10 @@ import re

 from .common import InfoExtractor
 from ..utils import (
-    unified_strdate,
+    compat_parse_qs,
    compat_urlparse,
    determine_ext,
+    unified_strdate,
 )


@ -112,3 +113,84 @@ class WDRIE(InfoExtractor):
            'thumbnail': thumbnail,
            'upload_date': upload_date,
        }
+
+
+class WDRMausIE(InfoExtractor):
+    _VALID_URL = 'http://(?:www\.)?wdrmaus\.de/(?:[^/]+/){,2}(?P<id>[^/?#]+)(?:/index\.php5|(?<!index)\.php5|/(?:$|[?#]))'
+    IE_DESC = 'Sendung mit der Maus'
+    _TESTS = [{
+        'url': 'http://www.wdrmaus.de/aktuelle-sendung/index.php5',
+        'info_dict': {
+            'id': 'aktuelle-sendung',
+            'ext': 'mp4',
+            'thumbnail': 're:^http://.+\.jpg',
+            'upload_date': 're:^[0-9]{8}$',
+            'title': 're:^[0-9.]{10} - Aktuelle Sendung$',
+        }
+    }, {
+        'url': 'http://www.wdrmaus.de/sachgeschichten/sachgeschichten/40_jahre_maus.php5',
+        'md5': '3b1227ca3ed28d73ec5737c65743b2a3',
+        'info_dict': {
+            'id': '40_jahre_maus',
+            'ext': 'mp4',
+            'thumbnail': 're:^http://.+\.jpg',
+            'upload_date': '20131007',
+            'title': '12.03.2011 - 40 Jahre Maus',
+        }
+    }]
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+
+        webpage = self._download_webpage(url, video_id)
+        param_code = self._html_search_regex(
+            r'<a href="\?startVideo=1&amp;([^"]+)"', webpage, 'parameters')
+
+        title_date = self._search_regex(
+            r'<div class="sendedatum"><p>Sendedatum:\s*([0-9\.]+)</p>',
+            webpage, 'air date')
+        title_str = self._html_search_regex(
+            r'<h1>(.*?)</h1>', webpage, 'title')
+        title = '%s - %s' % (title_date, title_str)
+        upload_date = unified_strdate(
+            self._html_search_meta('dc.date', webpage))
+
+        fields = compat_parse_qs(param_code)
+        video_url = fields['firstVideo'][0]
+        thumbnail = compat_urlparse.urljoin(url, fields['startPicture'][0])
+
+        formats = [{
+            'format_id': 'rtmp',
+            'url': video_url,
+        }]
+
+        jscode = self._download_webpage(
+            'http://www.wdrmaus.de/codebase/js/extended-medien.min.js',
+            video_id, fatal=False,
+            note='Downloading URL translation table',
+            errnote='Could not download URL translation table')
+        if jscode:
+            for m in re.finditer(
+                    r"stream:\s*'dslSrc=(?P<stream>[^']+)',\s*download:\s*'(?P<dl>[^']+)'\s*\}",
+                    jscode):
+                if video_url.startswith(m.group('stream')):
+                    http_url = video_url.replace(
+                        m.group('stream'), m.group('dl'))
+                    formats.append({
+                        'format_id': 'http',
+                        'url': http_url,
+                    })
+                    break
+
+        self._sort_formats(formats)
+
+        return {
+            'id': video_id,
+            'title': title,
+            'formats': formats,
+            'thumbnail': thumbnail,
+            'upload_date': upload_date,
+        }
+
+# TODO test _1
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@ -7,13 +7,13 @@ import itertools
 import json
 import os.path
 import re
-import string
 import struct
 import traceback
 import zlib

 from .common import InfoExtractor, SearchInfoExtractor
 from .subtitles import SubtitlesInfoExtractor
+from ..jsinterp import JSInterpreter
 from ..utils import (
    compat_chr,
    compat_parse_qs,
@ -438,113 +438,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
    def _parse_sig_js(self, jscode):
        funcname = self._search_regex(
            r'signature=([a-zA-Z]+)', jscode,
-            u'Initial JS player signature function name')
+             u'Initial JS player signature function name')

-        functions = {}
-
-        def argidx(varname):
-            return string.lowercase.index(varname)
-
-        def interpret_statement(stmt, local_vars, allow_recursion=20):
-            if allow_recursion < 0:
-                raise ExtractorError(u'Recursion limit reached')
-
-            if stmt.startswith(u'var '):
-                stmt = stmt[len(u'var '):]
-            ass_m = re.match(r'^(?P<out>[a-z]+)(?:\[(?P<index>[^\]]+)\])?' +
-                             r'=(?P<expr>.*)$', stmt)
-            if ass_m:
-                if ass_m.groupdict().get('index'):
-                    def assign(val):
-                        lvar = local_vars[ass_m.group('out')]
-                        idx = interpret_expression(ass_m.group('index'),
-                                                   local_vars, allow_recursion)
-                        assert isinstance(idx, int)
-                        lvar[idx] = val
-                        return val
-                    expr = ass_m.group('expr')
-                else:
-                    def assign(val):
-                        local_vars[ass_m.group('out')] = val
-                        return val
-                    expr = ass_m.group('expr')
-            elif stmt.startswith(u'return '):
-                assign = lambda v: v
-                expr = stmt[len(u'return '):]
-            else:
-                raise ExtractorError(
-                    u'Cannot determine left side of statement in %r' % stmt)
-
-            v = interpret_expression(expr, local_vars, allow_recursion)
-            return assign(v)
-
-        def interpret_expression(expr, local_vars, allow_recursion):
-            if expr.isdigit():
-                return int(expr)
-
-            if expr.isalpha():
-                return local_vars[expr]
-
-            m = re.match(r'^(?P<in>[a-z]+)\.(?P<member>.*)$', expr)
-            if m:
-                member = m.group('member')
-                val = local_vars[m.group('in')]
-                if member == 'split("")':
-                    return list(val)
-                if member == 'join("")':
-                    return u''.join(val)
-                if member == 'length':
-                    return len(val)
-                if member == 'reverse()':
-                    return val[::-1]
-                slice_m = re.match(r'slice\((?P<idx>.*)\)', member)
-                if slice_m:
-                    idx = interpret_expression(
-                        slice_m.group('idx'), local_vars, allow_recursion-1)
-                    return val[idx:]
-
-            m = re.match(
-                r'^(?P<in>[a-z]+)\[(?P<idx>.+)\]$', expr)
-            if m:
-                val = local_vars[m.group('in')]
-                idx = interpret_expression(m.group('idx'), local_vars,
-                                           allow_recursion-1)
-                return val[idx]
-
-            m = re.match(r'^(?P<a>.+?)(?P<op>[%])(?P<b>.+?)$', expr)
-            if m:
-                a = interpret_expression(m.group('a'),
-                                         local_vars, allow_recursion)
-                b = interpret_expression(m.group('b'),
-                                         local_vars, allow_recursion)
-                return a % b
-
-            m = re.match(
-                r'^(?P<func>[a-zA-Z$]+)\((?P<args>[a-z0-9,]+)\)$', expr)
-            if m:
-                fname = m.group('func')
-                if fname not in functions:
-                    functions[fname] = extract_function(fname)
-                argvals = [int(v) if v.isdigit() else local_vars[v]
-                           for v in m.group('args').split(',')]
-                return functions[fname](argvals)
-            raise ExtractorError(u'Unsupported JS expression %r' % expr)
-
-        def extract_function(funcname):
-            func_m = re.search(
-                r'function ' + re.escape(funcname) +
-                r'\((?P<args>[a-z,]+)\){(?P<code>[^}]+)}',
-                jscode)
-            argnames = func_m.group('args').split(',')
-
-            def resf(args):
-                local_vars = dict(zip(argnames, args))
-                for stmt in func_m.group('code').split(';'):
-                    res = interpret_statement(stmt, local_vars)
-                return res
-            return resf
-
-        initial_function = extract_function(funcname)
+        jsi = JSInterpreter(jscode)
+        initial_function = jsi.extract_function(funcname)
        return lambda s: initial_function([s])

    def _parse_sig_swf(self, file_contents):
@ -1549,7 +1446,9 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
                break

            more = self._download_json(
-                'https://youtube.com/%s' % mobj.group('more'), playlist_id, 'Downloading page #%s' % page_num)
+                'https://youtube.com/%s' % mobj.group('more'), playlist_id,
+                'Downloading page #%s' % page_num,
+                transform_source=uppercase_escape)
            content_html = more['content_html']
            more_widget_html = more['load_more_widget_html']

@ -1712,7 +1611,7 @@ class YoutubeUserIE(InfoExtractor):

 class YoutubeSearchIE(SearchInfoExtractor):
    IE_DESC = u'YouTube.com searches'
-    _API_URL = 'https://gdata.youtube.com/feeds/api/videos?q=%s&start-index=%i&max-results=50&v=2&alt=jsonc'
+    _API_URL = u'https://gdata.youtube.com/feeds/api/videos?q=%s&start-index=%i&max-results=50&v=2&alt=jsonc'
    _MAX_RESULTS = 1000
    IE_NAME = u'youtube:search'
    _SEARCH_KEY = 'ytsearch'
@ -1723,9 +1622,12 @@ class YoutubeSearchIE(SearchInfoExtractor):
        video_ids = []
        pagenum = 0
        limit = n
+        PAGE_SIZE = 50

-        while (50 * pagenum) < limit:
-            result_url = self._API_URL % (compat_urllib_parse.quote_plus(query), (50*pagenum)+1)
+        while (PAGE_SIZE * pagenum) < limit:
+            result_url = self._API_URL % (
+                compat_urllib_parse.quote_plus(query.encode('utf-8')),
+                (PAGE_SIZE * pagenum) + 1)
            data_json = self._download_webpage(
                result_url, video_id=u'query "%s"' % query,
                note=u'Downloading page %s' % (pagenum + 1),
@ -1836,11 +1738,10 @@ class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
        feed_entries = []
        paging = 0
        for i in itertools.count(1):
-            info = self._download_webpage(self._FEED_TEMPLATE % paging,
+            info = self._download_json(self._FEED_TEMPLATE % paging,
                                          u'%s feed' % self._FEED_NAME,
                                          u'Downloading page %s' % i)
-            info = json.loads(info)
-            feed_html = info['feed_html']
+            feed_html = info.get('feed_html') or info.get('content_html')
            m_ids = re.finditer(r'"/watch\?v=(.*?)["&]', feed_html)
            ids = orderedSet(m.group(1) for m in m_ids)
            feed_entries.extend(
--- a/youtube_dl/jsinterp.py
+++ b/youtube_dl/jsinterp.py
@ -0,0 +1,116 @@
+from __future__ import unicode_literals
+
+import re
+
+from .utils import (
+    ExtractorError,
+)
+
+
+class JSInterpreter(object):
+    def __init__(self, code):
+        self.code = code
+        self._functions = {}
+
+    def interpret_statement(self, stmt, local_vars, allow_recursion=20):
+        if allow_recursion < 0:
+            raise ExtractorError('Recursion limit reached')
+
+        if stmt.startswith('var '):
+            stmt = stmt[len('var '):]
+        ass_m = re.match(r'^(?P<out>[a-z]+)(?:\[(?P<index>[^\]]+)\])?' +
+                         r'=(?P<expr>.*)$', stmt)
+        if ass_m:
+            if ass_m.groupdict().get('index'):
+                def assign(val):
+                    lvar = local_vars[ass_m.group('out')]
+                    idx = self.interpret_expression(
+                        ass_m.group('index'), local_vars, allow_recursion)
+                    assert isinstance(idx, int)
+                    lvar[idx] = val
+                    return val
+                expr = ass_m.group('expr')
+            else:
+                def assign(val):
+                    local_vars[ass_m.group('out')] = val
+                    return val
+                expr = ass_m.group('expr')
+        elif stmt.startswith('return '):
+            assign = lambda v: v
+            expr = stmt[len('return '):]
+        else:
+            raise ExtractorError(
+                'Cannot determine left side of statement in %r' % stmt)
+
+        v = self.interpret_expression(expr, local_vars, allow_recursion)
+        return assign(v)
+
+    def interpret_expression(self, expr, local_vars, allow_recursion):
+        if expr.isdigit():
+            return int(expr)
+
+        if expr.isalpha():
+            return local_vars[expr]
+
+        m = re.match(r'^(?P<in>[a-z]+)\.(?P<member>.*)$', expr)
+        if m:
+            member = m.group('member')
+            val = local_vars[m.group('in')]
+            if member == 'split("")':
+                return list(val)
+            if member == 'join("")':
+                return u''.join(val)
+            if member == 'length':
+                return len(val)
+            if member == 'reverse()':
+                return val[::-1]
+            slice_m = re.match(r'slice\((?P<idx>.*)\)', member)
+            if slice_m:
+                idx = self.interpret_expression(
+                    slice_m.group('idx'), local_vars, allow_recursion - 1)
+                return val[idx:]
+
+        m = re.match(
+            r'^(?P<in>[a-z]+)\[(?P<idx>.+)\]$', expr)
+        if m:
+            val = local_vars[m.group('in')]
+            idx = self.interpret_expression(
+                m.group('idx'), local_vars, allow_recursion - 1)
+            return val[idx]
+
+        m = re.match(r'^(?P<a>.+?)(?P<op>[%])(?P<b>.+?)$', expr)
+        if m:
+            a = self.interpret_expression(
+                m.group('a'), local_vars, allow_recursion)
+            b = self.interpret_expression(
+                m.group('b'), local_vars, allow_recursion)
+            return a % b
+
+        m = re.match(
+            r'^(?P<func>[a-zA-Z$]+)\((?P<args>[a-z0-9,]+)\)$', expr)
+        if m:
+            fname = m.group('func')
+            if fname not in self._functions:
+                self._functions[fname] = self.extract_function(fname)
+            argvals = [int(v) if v.isdigit() else local_vars[v]
+                       for v in m.group('args').split(',')]
+            return self._functions[fname](argvals)
+        raise ExtractorError('Unsupported JS expression %r' % expr)
+
+    def extract_function(self, funcname):
+        func_m = re.search(
+            (r'(?:function %s|%s\s*=\s*function)' % (
+                re.escape(funcname), re.escape(funcname))) +
+            r'\((?P<args>[a-z,]+)\){(?P<code>[^}]+)}',
+            self.code)
+        if func_m is None:
+            raise ExtractorError('Could not find JS function %r' % funcname)
+        argnames = func_m.group('args').split(',')
+
+        def resf(args):
+            local_vars = dict(zip(argnames, args))
+            for stmt in func_m.group('code').split(';'):
+                res = self.interpret_statement(stmt, local_vars)
+            return res
+        return resf
+
--- a/youtube_dl/postprocessor/ffmpeg.py
+++ b/youtube_dl/postprocessor/ffmpeg.py
@ -55,8 +55,9 @@ class FFmpegPostProcessor(PostProcessor):

        if self._downloader.params.get('verbose', False):
            self._downloader.to_screen(u'[debug] ffmpeg command line: %s' % shell_quote(cmd))
-        p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
-        stdout,stderr = p.communicate()
+        bcmd = [self._downloader.encode(c) for c in cmd]
+        p = subprocess.Popen(bcmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+        stdout, stderr = p.communicate()
        if p.returncode != 0:
            stderr = stderr.decode('utf-8', 'replace')
            msg = stderr.strip().split('\n')[-1]
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@ -539,7 +539,6 @@ def encodeFilename(s, for_subprocess=False):
        encoding = 'utf-8'
    return s.encode(encoding, 'ignore')

-
 def decodeOption(optval):
    if optval is None:
        return optval
@ -1269,8 +1268,8 @@ class PagedList(object):

 def uppercase_escape(s):
    return re.sub(
-        r'\\U([0-9a-fA-F]{8})',
-        lambda m: compat_chr(int(m.group(1), base=16)), s)
+        r'\\U[0-9a-fA-F]{8}',
+        lambda m: m.group(0).decode('unicode-escape'), s)

 try:
    struct.pack(u'!I', 0)
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@ -1,2 +1,2 @@

-__version__ = '2014.03.28'
+__version__ = '2014.04.02'