Merge branch 'master' of https://github.com/rg3/youtube-dl into multipart_videos

2014-04-02 08:40:37 -07:00 · 2014-04-02 08:40:37 -07:00 · 5d0495f7b4
commit 5d0495f7b4
parent 840e7cf4f3 91a76c40c0
30 changed files with 610 additions and 358 deletions
--- a/README.md
+++ b/README.md
@ -65,6 +65,7 @@ which means you can modify it, redistribute it or use it however you like.
                                     configuration in ~/.config/youtube-dl.conf
                                     (%APPDATA%/youtube-dl/config.txt on
                                     Windows)
    --encoding ENCODING              Force the specified encoding (experimental)
 ## Video Selection:
    --playlist-start NUMBER          playlist video to start at (default is 1)
--- a/test/test_all_urls.py
+++ b/test/test_all_urls.py
@ -144,7 +144,15 @@ class TestAllURLsMatching(unittest.TestCase):
        self.assertMatch('http://video.pbs.org/widget/partnerplayer/980042464/', ['PBS'])
    def test_ComedyCentralShows(self):
-        self.assertMatch('http://thedailyshow.cc.com/extended-interviews/xm3fnq/andrew-napolitano-extended-interview', ['ComedyCentralShows'])
+        self.assertMatch(
            'http://thedailyshow.cc.com/extended-interviews/xm3fnq/andrew-napolitano-extended-interview',
            ['ComedyCentralShows'])
        self.assertMatch(
            'http://thecolbertreport.cc.com/videos/29w6fx/-realhumanpraise-for-fox-news',
            ['ComedyCentralShows'])
        self.assertMatch(
            'http://thecolbertreport.cc.com/videos/gh6urb/neil-degrasse-tyson-pt--1?xrs=eml_col_031114',
            ['ComedyCentralShows'])
 if __name__ == '__main__':
    unittest.main()
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@ -8,6 +8,7 @@ import datetime
 import errno
 import io
 import json
 import locale
 import os
 import platform
 import re
@ -160,6 +161,7 @@ class YoutubeDL(object):
    include_ads:       Download ads as well
    default_search:    Prepend this string if an input url is not valid.
                       'auto' for elaborate guessing
    encoding:          Use this encoding instead of the system-specified.
    The following parameters are not used by YoutubeDL itself, they are used by
    the FileDownloader:
@ -1219,6 +1221,9 @@ class YoutubeDL(object):
    def print_debug_header(self):
        if not self.params.get('verbose'):
            return
        write_string('[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' %
                 (locale.getpreferredencoding(), sys.getfilesystemencoding(), sys.stdout.encoding, self.get_encoding()))
        write_string('[debug] youtube-dl version ' + __version__ + '\n')
        try:
            sp = subprocess.Popen(
@ -1283,3 +1288,19 @@ class YoutubeDL(object):
        # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
        opener.addheaders = []
        self._opener = opener
    def encode(self, s):
        if isinstance(s, bytes):
            return s  # Already encoded
        try:
            return s.encode(self.get_encoding())
        except UnicodeEncodeError as err:
            err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
            raise
    def get_encoding(self):
        encoding = self.params.get('encoding')
        if encoding is None:
            encoding = preferredencoding()
        return encoding
--- a/youtube_dl/init.py
+++ b/youtube_dl/init.py
@ -51,6 +51,7 @@ __authors__  = (
    'David Wagner',
    'Juan C. Olivares',
    'Mattias Harrysson',
    'phaer',
 )
 __license__ = 'Public Domain'
@ -256,13 +257,17 @@ def parseOpts(overrideArguments=None):
    general.add_option(
        '--bidi-workaround', dest='bidi_workaround', action='store_true',
        help=u'Work around terminals that lack bidirectional text support. Requires bidiv or fribidi executable in PATH')
-    general.add_option('--default-search',
+    general.add_option(
-            dest='default_search', metavar='PREFIX',
+        '--default-search',
-            help='Use this prefix for unqualified URLs. For example "gvsearch2:" downloads two videos from google videos for  youtube-dl "large apple". By default (with value "auto") youtube-dl guesses.')
+        dest='default_search', metavar='PREFIX',
        help='Use this prefix for unqualified URLs. For example "gvsearch2:" downloads two videos from google videos for  youtube-dl "large apple". By default (with value "auto") youtube-dl guesses.')
    general.add_option(
        '--ignore-config',
        action='store_true',
        help='Do not read configuration files. When given in the global configuration file /etc/youtube-dl.conf: do not read the user configuration in ~/.config/youtube-dl.conf (%APPDATA%/youtube-dl/config.txt on Windows)')
    general.add_option(
        '--encoding', dest='encoding', metavar='ENCODING',
        help='Force the specified encoding (experimental)')
    selection.add_option(
        '--playlist-start',
@ -542,8 +547,6 @@ def parseOpts(overrideArguments=None):
            write_string(u'[debug] System config: ' + repr(_hide_login_info(systemConf)) + '\n')
            write_string(u'[debug] User config: ' + repr(_hide_login_info(userConf)) + '\n')
            write_string(u'[debug] Command-line args: ' + repr(_hide_login_info(commandLineConf)) + '\n')
            write_string(u'[debug] Encodings: locale %r, fs %r, out %r, pref: %r\n' %
                         (locale.getpreferredencoding(), sys.getfilesystemencoding(), sys.stdout.encoding, preferredencoding()))
    return parser, opts, args
@ -677,7 +680,7 @@ def _real_main(argv=None):
        date = DateRange.day(opts.date)
    else:
        date = DateRange(opts.dateafter, opts.datebefore)
-    if opts.default_search not in ('auto', None) and ':' not in opts.default_search:
+    if opts.default_search not in ('auto', 'auto_warning', None) and ':' not in opts.default_search:
        parser.error(u'--default-search invalid; did you forget a colon (:) at the end?')
    # Do not download videos when there are audio-only formats
@ -789,6 +792,7 @@ def _real_main(argv=None):
        'include_ads': opts.include_ads,
        'default_search': opts.default_search,
        'youtube_include_dash_manifest': opts.youtube_include_dash_manifest,
        'encoding': opts.encoding,
    }
    with YoutubeDL(ydl_opts) as ydl:
--- a/youtube_dl/extractor/init.py
+++ b/youtube_dl/extractor/init.py
@ -156,6 +156,7 @@ from .mtv import (
    MTVIE,
    MTVIggyIE,
 )
 from .musicplayon import MusicPlayOnIE
 from .muzu import MuzuTVIE
 from .myspace import MySpaceIE
 from .myspass import MySpassIE
@ -285,7 +286,10 @@ from .vk import VKIE
 from .vube import VubeIE
 from .washingtonpost import WashingtonPostIE
 from .wat import WatIE
-from .wdr import WDRIE
+from .wdr import (
    WDRIE,
    WDRMausIE,
 )
 from .weibo import WeiboIE
 from .wimp import WimpIE
 from .wistia import WistiaIE
--- a/youtube_dl/extractor/appletrailers.py
+++ b/youtube_dl/extractor/appletrailers.py
@ -6,7 +6,6 @@ import json
 from .common import InfoExtractor
 from ..utils import (
    compat_urlparse,
    determine_ext,
 )
--- a/youtube_dl/extractor/bloomberg.py
+++ b/youtube_dl/extractor/bloomberg.py
@ -1,22 +1,21 @@
 from __future__ import unicode_literals
 import re
 from .common import InfoExtractor
 from .ooyala import OoyalaIE
 class BloombergIE(InfoExtractor):
    _VALID_URL = r'https?://www\.bloomberg\.com/video/(?P<name>.+?)\.html'
    _TEST = {
-        u'url': u'http://www.bloomberg.com/video/shah-s-presentation-on-foreign-exchange-strategies-qurhIVlJSB6hzkVi229d8g.html',
+        'url': 'http://www.bloomberg.com/video/shah-s-presentation-on-foreign-exchange-strategies-qurhIVlJSB6hzkVi229d8g.html',
-        u'file': u'12bzhqZTqQHmmlA8I-i0NpzJgcG5NNYX.mp4',
+        'md5': '7bf08858ff7c203c870e8a6190e221e5',
-        u'info_dict': {
+        'info_dict': {
-            u'title': u'Shah\'s Presentation on Foreign-Exchange Strategies',
+            'id': 'qurhIVlJSB6hzkVi229d8g',
-            u'description': u'md5:abc86e5236f9f0e4866c59ad36736686',
+            'ext': 'flv',
-        },
+            'title': 'Shah\'s Presentation on Foreign-Exchange Strategies',
-        u'params': {
+            'description': 'md5:0681e0d30dcdfc6abf34594961d8ea88',
            # Requires ffmpeg (m3u8 manifest)
            u'skip_download': True,
        },
    }
@ -24,7 +23,16 @@ class BloombergIE(InfoExtractor):
        mobj = re.match(self._VALID_URL, url)
        name = mobj.group('name')
        webpage = self._download_webpage(url, name)
-        embed_code = self._search_regex(
+        f4m_url = self._search_regex(
-            r'<source src="https?://[^/]+/[^/]+/[^/]+/([^/]+)', webpage,
+            r'<source src="(https?://[^"]+\.f4m.*?)"', webpage,
-            'embed code')
+            'f4m url')
-        return OoyalaIE._build_url_result(embed_code)
+        title = re.sub(': Video$', '', self._og_search_title(webpage))
        return {
            'id': name.split('-')[-1],
            'title': title,
            'url': f4m_url,
            'ext': 'flv',
            'description': self._og_search_description(webpage),
            'thumbnail': self._og_search_thumbnail(webpage),
        }
--- a/youtube_dl/extractor/brightcove.py
+++ b/youtube_dl/extractor/brightcove.py
@ -87,7 +87,7 @@ class BrightcoveIE(InfoExtractor):
        object_str = object_str.replace('<--', '<!--')
        object_str = fix_xml_ampersands(object_str)
-        object_doc = xml.etree.ElementTree.fromstring(object_str)
+        object_doc = xml.etree.ElementTree.fromstring(object_str.encode('utf-8'))
        fv_el = find_xpath_attr(object_doc, './param', 'name', 'flashVars')
        if fv_el is not None:
--- a/youtube_dl/extractor/comedycentral.py
+++ b/youtube_dl/extractor/comedycentral.py
@ -43,11 +43,13 @@ class ComedyCentralShowsIE(InfoExtractor):
                          (?P<showname>thedailyshow|thecolbertreport)\.(?:cc\.)?com/
                         (full-episodes/(?P<episode>.*)|
                          (?P<clip>
-                              (the-colbert-report-(videos|collections)/(?P<clipID>[0-9]+)/[^/]*/(?P<cntitle>.*?))
+                              (?:videos/[^/]+/(?P<videotitle>[^/?#]+))
-                              |(watch/(?P<date>[^/]*)/(?P<tdstitle>.*)))|
+                              |(the-colbert-report-(videos|collections)/(?P<clipID>[0-9]+)/[^/]*/(?P<cntitle>.*?))
                              |(watch/(?P<date>[^/]*)/(?P<tdstitle>.*))
                          )|
                          (?P<interview>
                              extended-interviews/(?P<interID>[0-9a-z]+)/(?:playlist_tds_extended_)?(?P<interview_title>.*?)(/.*?)?)))
-                     $'''
+                     (?:[?#].*|$)'''
    _TEST = {
        'url': 'http://thedailyshow.cc.com/watch/thu-december-13-2012/kristen-stewart',
        'md5': '4e2f5cb088a83cd8cdb7756132f9739d',
@ -57,7 +59,7 @@ class ComedyCentralShowsIE(InfoExtractor):
            'upload_date': '20121213',
            'description': 'Kristen Stewart learns to let loose in "On the Road."',
            'uploader': 'thedailyshow',
-            'title': 'thedailyshow-kristen-stewart part 1',
+            'title': 'thedailyshow kristen-stewart part 1',
        }
    }
@ -102,7 +104,9 @@ class ComedyCentralShowsIE(InfoExtractor):
            assert mobj is not None
        if mobj.group('clip'):
-            if mobj.group('showname') == 'thedailyshow':
+            if mobj.group('videotitle'):
                epTitle = mobj.group('videotitle')
            elif mobj.group('showname') == 'thedailyshow':
                epTitle = mobj.group('tdstitle')
            else:
                epTitle = mobj.group('cntitle')
@ -161,7 +165,7 @@ class ComedyCentralShowsIE(InfoExtractor):
            content = itemEl.find('.//{http://search.yahoo.com/mrss/}content')
            duration = float_or_none(content.attrib.get('duration'))
            mediagen_url = content.attrib['url']
-            guid = itemEl.find('.//guid').text.rpartition(':')[-1]
+            guid = itemEl.find('./guid').text.rpartition(':')[-1]
            cdoc = self._download_xml(
                mediagen_url, epTitle,
--- a/youtube_dl/extractor/discovery.py
+++ b/youtube_dl/extractor/discovery.py
@ -10,9 +10,10 @@ class DiscoveryIE(InfoExtractor):
    _VALID_URL = r'http://dsc\.discovery\.com\/[a-zA-Z0-9\-]*/[a-zA-Z0-9\-]*/videos/(?P<id>[a-zA-Z0-9\-]*)(.htm)?'
    _TEST = {
        'url': 'http://dsc.discovery.com/tv-shows/mythbusters/videos/mission-impossible-outtakes.htm',
        'file': '614784.mp4',
        'md5': 'e12614f9ee303a6ccef415cb0793eba2',
        'info_dict': {
            'id': '614784',
            'ext': 'mp4',
            'title': 'MythBusters: Mission Impossible Outtakes',
            'description': ('Watch Jamie Hyneman and Adam Savage practice being'
                ' each other -- to the point of confusing Jamie\'s dog -- and '
@ -34,7 +35,7 @@ class DiscoveryIE(InfoExtractor):
        formats = []
        for f in info['mp4']:
            formats.append(
-                {'url': f['src'], r'ext': r'mp4', 'tbr': int(f['bitrate'][:-1])})
+                {'url': f['src'], 'ext': 'mp4', 'tbr': int(f['bitrate'][:-1])})
        return {
            'id': info['contentId'],
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@ -82,6 +82,17 @@ class GenericIE(InfoExtractor):
            },
            'add_ie': ['Brightcove'],
        },
        {
            'url': 'http://www.championat.com/video/football/v/87/87499.html',
            'md5': 'fb973ecf6e4a78a67453647444222983',
            'info_dict': {
                'id': '3414141473001',
                'ext': 'mp4',
                'title': 'Видео. Удаление Дзагоева (ЦСКА)',
                'description': 'Онлайн-трансляция матча ЦСКА - "Волга"',
                'uploader': 'Championat',
            },
        },
        # Direct link to a video
        {
            'url': 'http://media.w3.org/2010/05/sintel/trailer.mp4',
@ -316,13 +327,16 @@ class GenericIE(InfoExtractor):
        if not parsed_url.scheme:
            default_search = self._downloader.params.get('default_search')
            if default_search is None:
-                default_search = 'auto'
+                default_search = 'auto_warning'
-            if default_search == 'auto':
+            if default_search in ('auto', 'auto_warning'):
                if '/' in url:
                    self._downloader.report_warning('The url doesn\'t specify the protocol, trying with http')
                    return self.url_result('http://' + url)
                else:
                    if default_search == 'auto_warning':
                        self._downloader.report_warning(
                            'Falling back to youtube search for  %s . Set --default-search to "auto" to suppress this warning.' % url)
                    return self.url_result('ytsearch:' + url)
            else:
                assert ':' in default_search
--- a/youtube_dl/extractor/huffpost.py
+++ b/youtube_dl/extractor/huffpost.py
@ -21,9 +21,10 @@ class HuffPostIE(InfoExtractor):
    _TEST = {
        'url': 'http://live.huffingtonpost.com/r/segment/legalese-it/52dd3e4b02a7602131000677',
        'file': '52dd3e4b02a7602131000677.mp4',
        'md5': '55f5e8981c1c80a64706a44b74833de8',
        'info_dict': {
            'id': '52dd3e4b02a7602131000677',
            'ext': 'mp4',
            'title': 'Legalese It! with @MikeSacksHP',
            'description': 'This week on Legalese It, Mike talks to David Bosco about his new book on the ICC, "Rough Justice," he also discusses the Virginia AG\'s historic stance on gay marriage, the execution of Edgar Tamayo, the ICC\'s delay of Kenya\'s President and more.  ',
            'duration': 1549,
--- a/youtube_dl/extractor/ign.py
+++ b/youtube_dl/extractor/ign.py
@ -1,10 +1,8 @@
 from __future__ import unicode_literals
 import re
 import json
 from .common import InfoExtractor
 from ..utils import (
    determine_ext,
 )
 class IGNIE(InfoExtractor):
@ -14,52 +12,57 @@ class IGNIE(InfoExtractor):
    """
    _VALID_URL = r'https?://.+?\.ign\.com/(?P<type>videos|show_videos|articles|(?:[^/]*/feature))(/.+)?/(?P<name_or_id>.+)'
-    IE_NAME = u'ign.com'
+    IE_NAME = 'ign.com'
    _CONFIG_URL_TEMPLATE = 'http://www.ign.com/videos/configs/id/%s.config'
-    _DESCRIPTION_RE = [r'<span class="page-object-description">(.+?)</span>',
+    _DESCRIPTION_RE = [
-                       r'id="my_show_video">.*?<p>(.*?)</p>',
+        r'<span class="page-object-description">(.+?)</span>',
-                       ]
+        r'id="my_show_video">.*?<p>(.*?)</p>',
    ]
    _TESTS = [
        {
-            u'url': u'http://www.ign.com/videos/2013/06/05/the-last-of-us-review',
+            'url': 'http://www.ign.com/videos/2013/06/05/the-last-of-us-review',
-            u'file': u'8f862beef863986b2785559b9e1aa599.mp4',
+            'md5': 'eac8bdc1890980122c3b66f14bdd02e9',
-            u'md5': u'eac8bdc1890980122c3b66f14bdd02e9',
+            'info_dict': {
-            u'info_dict': {
+                'id': '8f862beef863986b2785559b9e1aa599',
-                u'title': u'The Last of Us Review',
+                'ext': 'mp4',
-                u'description': u'md5:c8946d4260a4d43a00d5ae8ed998870c',
+                'title': 'The Last of Us Review',
                'description': 'md5:c8946d4260a4d43a00d5ae8ed998870c',
            }
        },
        {
-            u'url': u'http://me.ign.com/en/feature/15775/100-little-things-in-gta-5-that-will-blow-your-mind',
+            'url': 'http://me.ign.com/en/feature/15775/100-little-things-in-gta-5-that-will-blow-your-mind',
-            u'playlist': [
+            'playlist': [
                {
-                    u'file': u'5ebbd138523268b93c9141af17bec937.mp4',
+                    'info_dict': {
-                    u'info_dict': {
+                        'id': '5ebbd138523268b93c9141af17bec937',
-                        u'title': u'GTA 5 Video Review',
+                        'ext': 'mp4',
-                        u'description': u'Rockstar drops the mic on this generation of games. Watch our review of the masterly Grand Theft Auto V.',
+                        'title': 'GTA 5 Video Review',
                        'description': 'Rockstar drops the mic on this generation of games. Watch our review of the masterly Grand Theft Auto V.',
                    },
                },
                {
-                    u'file': u'638672ee848ae4ff108df2a296418ee2.mp4',
+                    'info_dict': {
-                    u'info_dict': {
+                        'id': '638672ee848ae4ff108df2a296418ee2',
-                        u'title': u'26 Twisted Moments from GTA 5 in Slow Motion',
+                        'ext': 'mp4',
-                        u'description': u'The twisted beauty of GTA 5 in stunning slow motion.',
+                        'title': '26 Twisted Moments from GTA 5 in Slow Motion',
                        'description': 'The twisted beauty of GTA 5 in stunning slow motion.',
                    },
                },
            ],
-            u'params': {
+            'params': {
-                u'skip_download': True,
+                'skip_download': True,
            },
        },
    ]
    def _find_video_id(self, webpage):
-        res_id = [r'data-video-id="(.+?)"',
+        res_id = [
-                  r'<object id="vid_(.+?)"',
+            r'data-video-id="(.+?)"',
-                  r'<meta name="og:image" content=".*/(.+?)-(.+?)/.+.jpg"',
+            r'<object id="vid_(.+?)"',
-                  ]
+            r'<meta name="og:image" content=".*/(.+?)-(.+?)/.+.jpg"',
        ]
        return self._search_regex(res_id, webpage, 'video id')
    def _real_extract(self, url):
@ -68,7 +71,7 @@ class IGNIE(InfoExtractor):
        page_type = mobj.group('type')
        webpage = self._download_webpage(url, name_or_id)
        if page_type == 'articles':
-            video_url = self._search_regex(r'var videoUrl = "(.+?)"', webpage, u'video url')
+            video_url = self._search_regex(r'var videoUrl = "(.+?)"', webpage, 'video url')
            return self.url_result(video_url, ie='IGN')
        elif page_type != 'video':
            multiple_urls = re.findall(
@ -80,41 +83,37 @@ class IGNIE(InfoExtractor):
        video_id = self._find_video_id(webpage)
        result = self._get_video_info(video_id)
        description = self._html_search_regex(self._DESCRIPTION_RE,
-                                              webpage, 'video description',
+            webpage, 'video description', flags=re.DOTALL)
                                              flags=re.DOTALL)
        result['description'] = description
        return result
    def _get_video_info(self, video_id):
        config_url = self._CONFIG_URL_TEMPLATE % video_id
-        config = json.loads(self._download_webpage(config_url, video_id,
+        config = self._download_json(config_url, video_id)
                            u'Downloading video info'))
        media = config['playlist']['media']
        video_url = media['url']
-        return {'id': media['metadata']['videoId'],
+        return {
-                'url': video_url,
+            'id': media['metadata']['videoId'],
-                'ext': determine_ext(video_url),
+            'url': media['url'],
-                'title': media['metadata']['title'],
+            'title': media['metadata']['title'],
-                'thumbnail': media['poster'][0]['url'].replace('{size}', 'grande'),
+            'thumbnail': media['poster'][0]['url'].replace('{size}', 'grande'),
-                }
+        }
 class OneUPIE(IGNIE):
    """Extractor for 1up.com, it uses the ign videos system."""
    _VALID_URL = r'https?://gamevideos\.1up\.com/(?P<type>video)/id/(?P<name_or_id>.+)'
    IE_NAME = '1up.com'
    _DESCRIPTION_RE = r'<div id="vid_summary">(.+?)</div>'
    _TEST = {
-        u'url': u'http://gamevideos.1up.com/video/id/34976',
+        'url': 'http://gamevideos.1up.com/video/id/34976',
-        u'file': u'34976.mp4',
+        'md5': '68a54ce4ebc772e4b71e3123d413163d',
-        u'md5': u'68a54ce4ebc772e4b71e3123d413163d',
+        'info_dict': {
-        u'info_dict': {
+            'id': '34976',
-            u'title': u'Sniper Elite V2 - Trailer',
+            'ext': 'mp4',
-            u'description': u'md5:5d289b722f5a6d940ca3136e9dae89cf',
+            'title': 'Sniper Elite V2 - Trailer',
            'description': 'md5:5d289b722f5a6d940ca3136e9dae89cf',
        }
    }
@ -123,7 +122,6 @@ class OneUPIE(IGNIE):
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        id = mobj.group('name_or_id')
        result = super(OneUPIE, self)._real_extract(url)
-        result['id'] = id
+        result['id'] = mobj.group('name_or_id')
        return result
--- a/youtube_dl/extractor/kickstarter.py
+++ b/youtube_dl/extractor/kickstarter.py
@ -1,37 +1,39 @@
 # encoding: utf-8
 from __future__ import unicode_literals
 import re
 from .common import InfoExtractor
 class KickStarterIE(InfoExtractor):
-    _VALID_URL = r'https?://www\.kickstarter\.com/projects/(?P<id>\d*)/.*'
+    _VALID_URL = r'https?://www\.kickstarter\.com/projects/(?P<id>[^/]*)/.*'
    _TEST = {
-        u"url": u"https://www.kickstarter.com/projects/1404461844/intersection-the-story-of-josh-grant?ref=home_location",
+        'url': 'https://www.kickstarter.com/projects/1404461844/intersection-the-story-of-josh-grant?ref=home_location',
-        u"file": u"1404461844.mp4",
+        'md5': 'c81addca81327ffa66c642b5d8b08cab',
-        u"md5": u"c81addca81327ffa66c642b5d8b08cab",
+        'info_dict': {
-        u"info_dict": {
+            'id': '1404461844',
-            u"title": u"Intersection: The Story of Josh Grant by Kyle Cowling",
+            'ext': 'mp4',
            'title': 'Intersection: The Story of Josh Grant by Kyle Cowling',
            'description': 'A unique motocross documentary that examines the '
                'life and mind of one of sports most elite athletes: Josh Grant.',
        },
    }
    def _real_extract(self, url):
        m = re.match(self._VALID_URL, url)
        video_id = m.group('id')
-        webpage_src = self._download_webpage(url, video_id)
+        webpage = self._download_webpage(url, video_id)
-        video_url = self._search_regex(r'data-video="(.*?)">',
+        video_url = self._search_regex(r'data-video-url="(.*?)"',
-            webpage_src, u'video URL')
+            webpage, 'video URL')
-        if 'mp4' in video_url:
+        video_title = self._html_search_regex(r'<title>(.*?)</title>',
-            ext = 'mp4'
+            webpage, 'title').rpartition('— Kickstarter')[0].strip()
        else:
            ext = 'flv'
        video_title = self._html_search_regex(r"<title>(.*?)</title>",
            webpage_src, u'title').rpartition(u'\u2014 Kickstarter')[0].strip()
-        results = [{
+        return {
-                    'id': video_id,
+            'id': video_id,
-                    'url': video_url,
+            'url': video_url,
-                    'title': video_title,
+            'title': video_title,
-                    'ext': ext,
+            'description': self._og_search_description(webpage),
-                    }]
+            'thumbnail': self._og_search_thumbnail(webpage),
-        return results
+        }
--- a/youtube_dl/extractor/metacritic.py
+++ b/youtube_dl/extractor/metacritic.py
@ -13,8 +13,9 @@ class MetacriticIE(InfoExtractor):
    _TEST = {
        'url': 'http://www.metacritic.com/game/playstation-4/infamous-second-son/trailers/3698222',
        'file': '3698222.mp4',
        'info_dict': {
            'id': '3698222',
            'ext': 'mp4',
            'title': 'inFamous: Second Son - inSide Sucker Punch: Smoke & Mirrors',
            'description': 'Take a peak behind-the-scenes to see how Sucker Punch brings smoke into the universe of inFAMOUS Second Son on the PS4.',
            'duration': 221,
--- a/youtube_dl/extractor/musicplayon.py
+++ b/youtube_dl/extractor/musicplayon.py
@ -0,0 +1,75 @@
 # encoding: utf-8
 from __future__ import unicode_literals
 import re
 from .common import InfoExtractor
 from ..utils import int_or_none
 class MusicPlayOnIE(InfoExtractor):
    _VALID_URL = r'https?://(?:.+?\.)?musicplayon\.com/play(?:-touch)?\?(?:v|pl=100&play)=(?P<id>\d+)'
    _TEST = {
        'url': 'http://en.musicplayon.com/play?v=433377',
        'info_dict': {
            'id': '433377',
            'ext': 'mp4',
            'title': 'Rick Ross - Interview On Chelsea Lately (2014)',
            'description': 'Rick Ross Interview On Chelsea Lately',
            'duration': 342,
            'uploader': 'ultrafish',
        },
        'params': {
            # m3u8 download
            'skip_download': True,
        },
    }
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group('id')
        page = self._download_webpage(url, video_id)
        title = self._og_search_title(page)
        description = self._og_search_description(page)
        thumbnail = self._og_search_thumbnail(page)
        duration = self._html_search_meta('video:duration', page, 'duration', fatal=False)
        view_count = self._og_search_property('count', page, fatal=False)
        uploader = self._html_search_regex(
            r'<div>by&nbsp;<a href="[^"]+" class="purple">([^<]+)</a></div>', page, 'uploader', fatal=False)
        formats = [
            {
                'url': 'http://media0-eu-nl.musicplayon.com/stream-mobile?id=%s&type=.mp4' % video_id,
                'ext': 'mp4',
            }
        ]
        manifest = self._download_webpage(
            'http://en.musicplayon.com/manifest.m3u8?v=%s' % video_id, video_id, 'Downloading manifest')
        for entry in manifest.split('#')[1:]:
            if entry.startswith('EXT-X-STREAM-INF:'):
                meta, url, _ = entry.split('\n')
                params = dict(param.split('=') for param in meta.split(',')[1:])
                formats.append({
                    'url': url,
                    'ext': 'mp4',
                    'tbr': int(params['BANDWIDTH']),
                    'width': int(params['RESOLUTION'].split('x')[1]),
                    'height': int(params['RESOLUTION'].split('x')[-1]),
                    'format_note': params['NAME'].replace('"', '').strip(),
                })
        return {
            'id': video_id,
            'title': title,
            'description': description,
            'thumbnail': thumbnail,
            'uploader': uploader,
            'duration': int_or_none(duration),
            'view_count': int_or_none(view_count),
            'formats': formats,
        }
--- a/youtube_dl/extractor/nba.py
+++ b/youtube_dl/extractor/nba.py
@ -6,12 +6,13 @@ from .common import InfoExtractor
 class NBAIE(InfoExtractor):
-    _VALID_URL = r'^(?:https?://)?(?:watch\.|www\.)?nba\.com/(?:nba/)?video(/[^?]*?)(?:/index\.html)?(?:\?.*)?$'
+    _VALID_URL = r'https?://(?:watch\.|www\.)?nba\.com/(?:nba/)?video(?P<id>/[^?]*?)(?:/index\.html)?(?:\?.*)?$'
    _TEST = {
        'url': 'http://www.nba.com/video/games/nets/2012/12/04/0021200253-okc-bkn-recap.nba/index.html',
        'file': u'0021200253-okc-bkn-recap.nba.mp4',
        'md5': u'c0edcfc37607344e2ff8f13c378c88a4',
        'info_dict': {
            'id': '0021200253-okc-bkn-recap.nba',
            'ext': 'mp4',
            'description': 'Kevin Durant scores 32 points and dishes out six assists as the Thunder beat the Nets in Brooklyn.',
            'title': 'Thunder vs. Nets',
        },
@ -19,7 +20,7 @@ class NBAIE(InfoExtractor):
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group(1)
+        video_id = mobj.group('id')
        webpage = self._download_webpage(url, video_id)
@ -33,7 +34,6 @@ class NBAIE(InfoExtractor):
        return {
            'id': shortened_video_id,
            'url': video_url,
            'ext': 'mp4',
            'title': title,
            'description': description,
        }
--- a/youtube_dl/extractor/ntv.py
+++ b/youtube_dl/extractor/ntv.py
@ -5,7 +5,7 @@ import re
 from .common import InfoExtractor
 from ..utils import (
-    RegexNotFoundError,
+    ExtractorError,
    unescapeHTML
 )
@ -98,16 +98,15 @@ class NTVIE(InfoExtractor):
        page = self._download_webpage(url, video_id, 'Downloading page')
-        def extract(patterns, name, page, fatal=False):
+        for pattern in self._VIDEO_ID_REGEXES:
-            for pattern in patterns:
+            mobj = re.search(pattern, page)
-                mobj = re.search(pattern, page)
+            if mobj:
-                if mobj:
+                break
                    return mobj.group(1)
            if fatal:
                raise RegexNotFoundError(u'Unable to extract %s' % name)
            return None
-        video_id = extract(self._VIDEO_ID_REGEXES, 'video id', page, fatal=True)
+        if not mobj:
            raise ExtractorError('No media links available for %s' % video_id)
        video_id = mobj.group(1)
        player = self._download_xml('http://www.ntv.ru/vi%s/' % video_id, video_id, 'Downloading video XML')
        title = unescapeHTML(player.find('./data/title').text)
--- a/youtube_dl/extractor/pyvideo.py
+++ b/youtube_dl/extractor/pyvideo.py
@ -1,3 +1,5 @@
 from __future__ import unicode_literals
 import re
 import os
@ -5,45 +7,50 @@ from .common import InfoExtractor
 class PyvideoIE(InfoExtractor):
-    _VALID_URL = r'(?:http://)?(?:www\.)?pyvideo\.org/video/(?P<id>\d+)/(.*)'
+    _VALID_URL = r'http://(?:www\.)?pyvideo\.org/video/(?P<id>\d+)/(.*)'
-    _TESTS = [{
+
-        u'url': u'http://pyvideo.org/video/1737/become-a-logging-expert-in-30-minutes',
+    _TESTS = [
-        u'file': u'24_4WWkSmNo.mp4',
+        {
-        u'md5': u'de317418c8bc76b1fd8633e4f32acbc6',
+            'url': 'http://pyvideo.org/video/1737/become-a-logging-expert-in-30-minutes',
-        u'info_dict': {
+            'md5': 'de317418c8bc76b1fd8633e4f32acbc6',
-            u"title": u"Become a logging expert in 30 minutes",
+            'info_dict': {
-            u"description": u"md5:9665350d466c67fb5b1598de379021f7",
+                'id': '24_4WWkSmNo',
-            u"upload_date": u"20130320",
+                'ext': 'mp4',
-            u"uploader": u"NextDayVideo",
+                'title': 'Become a logging expert in 30 minutes',
-            u"uploader_id": u"NextDayVideo",
+                'description': 'md5:9665350d466c67fb5b1598de379021f7',
                'upload_date': '20130320',
                'uploader': 'NextDayVideo',
                'uploader_id': 'NextDayVideo',
            },
            'add_ie': ['Youtube'],
        },
-        u'add_ie': ['Youtube'],
+        {
-    },
+            'url': 'http://pyvideo.org/video/2542/gloriajw-spotifywitherikbernhardsson182m4v',
-    {
+            'md5': '5fe1c7e0a8aa5570330784c847ff6d12',
-        u'url': u'http://pyvideo.org/video/2542/gloriajw-spotifywitherikbernhardsson182m4v',
+            'info_dict': {
-        u'md5': u'5fe1c7e0a8aa5570330784c847ff6d12',
+                'id': '2542',
-        u'info_dict': {
+                'ext': 'm4v',
-            u'id': u'2542',
+                'title': 'Gloriajw-SpotifyWithErikBernhardsson182',
-            u'ext': u'm4v',
+            },
            u'title': u'Gloriajw-SpotifyWithErikBernhardsson182',
        },
    },
    ]
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group('id')
        webpage = self._download_webpage(url, video_id)
        m_youtube = re.search(r'(https?://www\.youtube\.com/watch\?v=.*)', webpage)
        webpage = self._download_webpage(url, video_id)
        m_youtube = re.search(r'(https?://www\.youtube\.com/watch\?v=.*)', webpage)
        if m_youtube is not None:
            return self.url_result(m_youtube.group(1), 'Youtube')
-        title = self._html_search_regex(r'<div class="section">.*?<h3>([^>]+?)</h3>',
+        title = self._html_search_regex(
-            webpage, u'title', flags=re.DOTALL)
+            r'<div class="section">.*?<h3>([^>]+?)</h3>', webpage, 'title', flags=re.DOTALL)
-        video_url = self._search_regex([r'<source src="(.*?)"',
+        video_url = self._search_regex(
-            r'<dt>Download</dt>.*?<a href="(.+?)"'],
+            [r'<source src="(.*?)"', r'<dt>Download</dt>.*?<a href="(.+?)"'],
-            webpage, u'video url', flags=re.DOTALL)
+            webpage, 'video url', flags=re.DOTALL)
        return {
            'id': video_id,
            'title': os.path.splitext(title)[0],
--- a/youtube_dl/extractor/roxwel.py
+++ b/youtube_dl/extractor/roxwel.py
@ -1,5 +1,6 @@
 from __future__ import unicode_literals
 import re
 import json
 from .common import InfoExtractor
 from ..utils import unified_strdate, determine_ext
@ -9,41 +10,44 @@ class RoxwelIE(InfoExtractor):
    _VALID_URL = r'https?://www\.roxwel\.com/player/(?P<filename>.+?)(\.|\?|$)'
    _TEST = {
-        u'url': u'http://www.roxwel.com/player/passionpittakeawalklive.html',
+        'url': 'http://www.roxwel.com/player/passionpittakeawalklive.html',
-        u'file': u'passionpittakeawalklive.flv',
+        'info_dict': {
-        u'md5': u'd9dea8360a1e7d485d2206db7fe13035',
+            'id': 'passionpittakeawalklive',
-        u'info_dict': {
+            'ext': 'flv',
-            u'title': u'Take A Walk (live)',
+            'title': 'Take A Walk (live)',
-            u'uploader': u'Passion Pit',
+            'uploader': 'Passion Pit',
-            u'description': u'Passion Pit performs "Take A Walk\" live at The Backyard in Austin, Texas. ',
+            'uploader_id': 'passionpit',
            'upload_date': '20120928',
            'description': 'Passion Pit performs "Take A Walk\" live at The Backyard in Austin, Texas. ',
        },
-        u'skip': u'Requires rtmpdump',
+        'params': {
            # rtmp download
            'skip_download': True,
        }
    }
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        filename = mobj.group('filename')
        info_url = 'http://www.roxwel.com/api/videos/%s' % filename
-        info_page = self._download_webpage(info_url, filename,
+        info = self._download_json(info_url, filename)
                                           u'Downloading video info')
        self.report_extraction(filename)
        info = json.loads(info_page)
        rtmp_rates = sorted([int(r.replace('flv_', '')) for r in info['media_rates'] if r.startswith('flv_')])
        best_rate = rtmp_rates[-1]
        url_page_url = 'http://roxwel.com/pl_one_time.php?filename=%s&quality=%s' % (filename, best_rate)
-        rtmp_url = self._download_webpage(url_page_url, filename, u'Downloading video url')
+        rtmp_url = self._download_webpage(url_page_url, filename, 'Downloading video url')
        ext = determine_ext(rtmp_url)
        if ext == 'f4v':
            rtmp_url = rtmp_url.replace(filename, 'mp4:%s' % filename)
-        return {'id': filename,
+        return {
-                'title': info['title'],
+            'id': filename,
-                'url': rtmp_url,
+            'title': info['title'],
-                'ext': 'flv',
+            'url': rtmp_url,
-                'description': info['description'],
+            'ext': 'flv',
-                'thumbnail': info.get('player_image_url') or info.get('image_url_large'),
+            'description': info['description'],
-                'uploader': info['artist'],
+            'thumbnail': info.get('player_image_url') or info.get('image_url_large'),
-                'uploader_id': info['artistname'],
+            'uploader': info['artist'],
-                'upload_date': unified_strdate(info['dbdate']),
+            'uploader_id': info['artistname'],
-                }
+            'upload_date': unified_strdate(info['dbdate']),
        }
--- a/youtube_dl/extractor/rutube.py
+++ b/youtube_dl/extractor/rutube.py
@ -2,7 +2,6 @@
 from __future__ import unicode_literals
 import re
 import json
 import itertools
 from .common import InfoExtractor
@ -20,8 +19,9 @@ class RutubeIE(InfoExtractor):
    _TEST = {
        'url': 'http://rutube.ru/video/3eac3b4561676c17df9132a9a1e62e3e/',
        'file': '3eac3b4561676c17df9132a9a1e62e3e.mp4',
        'info_dict': {
            'id': '3eac3b4561676c17df9132a9a1e62e3e',
            'ext': 'mp4',
            'title': 'Раненный кенгуру забежал в аптеку',
            'description': 'http://www.ntdtv.ru ',
            'duration': 80,
@ -38,15 +38,15 @@ class RutubeIE(InfoExtractor):
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group('id')
-        
+
-        api_response = self._download_webpage('http://rutube.ru/api/video/%s/?format=json' % video_id,
+        video = self._download_json(
-                                              video_id, 'Downloading video JSON')
+            'http://rutube.ru/api/video/%s/?format=json' % video_id,
-        video = json.loads(api_response)
+            video_id, 'Downloading video JSON')
-        
+
-        api_response = self._download_webpage('http://rutube.ru/api/play/trackinfo/%s/?format=json' % video_id,
+        trackinfo = self._download_json(
-                                              video_id, 'Downloading trackinfo JSON')
+            'http://rutube.ru/api/play/trackinfo/%s/?format=json' % video_id,
-        trackinfo = json.loads(api_response)
+            video_id, 'Downloading trackinfo JSON')
-        
+
        # Some videos don't have the author field
        author = trackinfo.get('author') or {}
        m3u8_url = trackinfo['video_balancer'].get('m3u8')
@ -79,10 +79,9 @@ class RutubeChannelIE(InfoExtractor):
    def _extract_videos(self, channel_id, channel_title=None):
        entries = []
        for pagenum in itertools.count(1):
-            api_response = self._download_webpage(
+            page = self._download_json(
                self._PAGE_TEMPLATE % (channel_id, pagenum),
                channel_id, 'Downloading page %s' % pagenum)
            page = json.loads(api_response)
            results = page['results']
            if not results:
                break
@ -108,10 +107,9 @@ class RutubeMovieIE(RutubeChannelIE):
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        movie_id = mobj.group('id')
-        api_response = self._download_webpage(
+        movie = self._download_json(
            self._MOVIE_TEMPLATE % movie_id, movie_id,
            'Downloading movie JSON')
        movie = json.loads(api_response)
        movie_name = movie['name']
        return self._extract_videos(movie_id, movie_name)
--- a/youtube_dl/extractor/tf1.py
+++ b/youtube_dl/extractor/tf1.py
@ -1,33 +1,37 @@
 # coding: utf-8
 from __future__ import unicode_literals
 import json
 import re
 from .common import InfoExtractor
 class TF1IE(InfoExtractor):
    """TF1 uses the wat.tv player."""
-    _VALID_URL = r'http://videos\.tf1\.fr/.*-(.*?)\.html'
+    _VALID_URL = r'http://videos\.tf1\.fr/.*-(?P<id>.*?)\.html'
    _TEST = {
-        u'url': u'http://videos.tf1.fr/auto-moto/citroen-grand-c4-picasso-2013-presentation-officielle-8062060.html',
+        'url': 'http://videos.tf1.fr/auto-moto/citroen-grand-c4-picasso-2013-presentation-officielle-8062060.html',
-        u'file': u'10635995.mp4',
+        'info_dict': {
-        u'md5': u'2e378cc28b9957607d5e88f274e637d8',
+            'id': '10635995',
-        u'info_dict': {
+            'ext': 'mp4',
-            u'title': u'Citroën Grand C4 Picasso 2013 : présentation officielle',
+            'title': 'Citroën Grand C4 Picasso 2013 : présentation officielle',
-            u'description': u'Vidéo officielle du nouveau Citroën Grand C4 Picasso, lancé à l\'automne 2013.',
+            'description': 'Vidéo officielle du nouveau Citroën Grand C4 Picasso, lancé à l\'automne 2013.',
        },
        'params': {
            # Sometimes wat serves the whole file with the --test option
            'skip_download': True,
        },
        u'skip': u'Sometimes wat serves the whole file with the --test option',
    }
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
-        id = mobj.group(1)
+        video_id = mobj.group('id')
-        webpage = self._download_webpage(url, id)
+        webpage = self._download_webpage(url, video_id)
-        embed_url = self._html_search_regex(r'"(https://www.wat.tv/embedframe/.*?)"',
+        embed_url = self._html_search_regex(
-                                webpage, 'embed url')
+            r'"(https://www.wat.tv/embedframe/.*?)"', webpage, 'embed url')
-        embed_page = self._download_webpage(embed_url, id, u'Downloading embed player page')
+        embed_page = self._download_webpage(embed_url, video_id,
            'Downloading embed player page')
        wat_id = self._search_regex(r'UVID=(.*?)&', embed_page, 'wat id')
-        wat_info = self._download_webpage('http://www.wat.tv/interface/contentv3/%s' % wat_id, id, u'Downloading Wat info')
+        wat_info = self._download_json(
-        wat_info = json.loads(wat_info)['media']
+            'http://www.wat.tv/interface/contentv3/%s' % wat_id, video_id)
-        wat_url = wat_info['url']
+        return self.url_result(wat_info['media']['url'], 'Wat')
        return self.url_result(wat_url, 'Wat')
--- a/youtube_dl/extractor/vk.py
+++ b/youtube_dl/extractor/vk.py
@ -16,7 +16,7 @@ from ..utils import (
 class VKIE(InfoExtractor):
    IE_NAME = 'vk.com'
-    _VALID_URL = r'https?://vk\.com/(?:video_ext\.php\?.*?\boid=(?P<oid>\d+).*?\bid=(?P<id>\d+)|(?:videos.*?\?.*?z=)?video(?P<videoid>.*?)(?:\?|%2F|$))'
+    _VALID_URL = r'https?://vk\.com/(?:video_ext\.php\?.*?\boid=(?P<oid>-?\d+).*?\bid=(?P<id>\d+)|(?:videos.*?\?.*?z=)?video(?P<videoid>.*?)(?:\?|%2F|$))'
    _NETRC_MACHINE = 'vk'
    _TESTS = [
--- a/youtube_dl/extractor/wat.py
+++ b/youtube_dl/extractor/wat.py
@ -1,37 +1,37 @@
 # coding: utf-8
 from __future__ import unicode_literals
 import json
 import re
 from .common import InfoExtractor
 from ..utils import (
    unified_strdate,
 )
 class WatIE(InfoExtractor):
-    _VALID_URL=r'http://www\.wat\.tv/.*-(?P<shortID>.*?)_.*?\.html'
+    _VALID_URL = r'http://www\.wat\.tv/.*-(?P<shortID>.*?)_.*?\.html'
    IE_NAME = 'wat.tv'
    _TEST = {
-        u'url': u'http://www.wat.tv/video/world-war-philadelphia-vost-6bv55_2fjr7_.html',
+        'url': 'http://www.wat.tv/video/world-war-philadelphia-vost-6bv55_2fjr7_.html',
-        u'file': u'10631273.mp4',
+        'info_dict': {
-        u'md5': u'd8b2231e1e333acd12aad94b80937e19',
+            'id': '10631273',
-        u'info_dict': {
+            'ext': 'mp4',
-            u'title': u'World War Z - Philadelphia VOST',
+            'title': 'World War Z - Philadelphia VOST',
-            u'description': u'La menace est partout. Que se passe-t-il à Philadelphia ?\r\nWORLD WAR Z, avec Brad Pitt, au cinéma le 3 juillet.\r\nhttp://www.worldwarz.fr',
+            'description': 'La menace est partout. Que se passe-t-il à Philadelphia ?\r\nWORLD WAR Z, avec Brad Pitt, au cinéma le 3 juillet.\r\nhttp://www.worldwarz.fr',
        },
        'params': {
            # Sometimes wat serves the whole file with the --test option
            'skip_download': True,
        },
        u'skip': u'Sometimes wat serves the whole file with the --test option',
    }
-    
+
    def download_video_info(self, real_id):
        # 'contentv4' is used in the website, but it also returns the related
        # videos, we don't need them
-        info = self._download_webpage('http://www.wat.tv/interface/contentv3/' + real_id, real_id, 'Downloading video info')
+        info = self._download_json('http://www.wat.tv/interface/contentv3/' + real_id, real_id)
        info = json.loads(info)
        return info['media']
    def _real_extract(self, url):
        def real_id_for_chapter(chapter):
            return chapter['tc_start'].split('-')[0]
@ -56,17 +56,17 @@ class WatIE(InfoExtractor):
            entries = [self.url_result(chapter_url) for chapter_url in chapter_urls]
            return self.playlist_result(entries, real_id, video_info['title'])
        upload_date = None
        if 'date_diffusion' in first_chapter:
            upload_date = unified_strdate(first_chapter['date_diffusion'])
        # Otherwise we can continue and extract just one part, we have to use
        # the short id for getting the video url
-        info = {'id': real_id,
+        return {
-                'url': 'http://wat.tv/get/android5/%s.mp4' % real_id,
+            'id': real_id,
-                'ext': 'mp4',
+            'url': 'http://wat.tv/get/android5/%s.mp4' % real_id,
-                'title': first_chapter['title'],
+            'title': first_chapter['title'],
-                'thumbnail': first_chapter['preview'],
+            'thumbnail': first_chapter['preview'],
-                'description': first_chapter['description'],
+            'description': first_chapter['description'],
-                'view_count': video_info['views'],
+            'view_count': video_info['views'],
-                }
+            'upload_date': upload_date,
-        if 'date_diffusion' in first_chapter:
+        }
            info['upload_date'] = unified_strdate(first_chapter['date_diffusion'])
        return info
--- a/youtube_dl/extractor/wdr.py
+++ b/youtube_dl/extractor/wdr.py
@ -4,9 +4,10 @@ import re
 from .common import InfoExtractor
 from ..utils import (
-    unified_strdate,
+    compat_parse_qs,
    compat_urlparse,
    determine_ext,
    unified_strdate,
 )
@ -111,4 +112,85 @@ class WDRIE(InfoExtractor):
            'description': description,
            'thumbnail': thumbnail,
            'upload_date': upload_date,
-        }
+        }
 class WDRMausIE(InfoExtractor):
    _VALID_URL = 'http://(?:www\.)?wdrmaus\.de/(?:[^/]+/){,2}(?P<id>[^/?#]+)(?:/index\.php5|(?<!index)\.php5|/(?:$|[?#]))'
    IE_DESC = 'Sendung mit der Maus'
    _TESTS = [{
        'url': 'http://www.wdrmaus.de/aktuelle-sendung/index.php5',
        'info_dict': {
            'id': 'aktuelle-sendung',
            'ext': 'mp4',
            'thumbnail': 're:^http://.+\.jpg',
            'upload_date': 're:^[0-9]{8}$',
            'title': 're:^[0-9.]{10} - Aktuelle Sendung$',
        }
    }, {
        'url': 'http://www.wdrmaus.de/sachgeschichten/sachgeschichten/40_jahre_maus.php5',
        'md5': '3b1227ca3ed28d73ec5737c65743b2a3',
        'info_dict': {
            'id': '40_jahre_maus',
            'ext': 'mp4',
            'thumbnail': 're:^http://.+\.jpg',
            'upload_date': '20131007',
            'title': '12.03.2011 - 40 Jahre Maus',
        }
    }]
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group('id')
        webpage = self._download_webpage(url, video_id)
        param_code = self._html_search_regex(
            r'<a href="\?startVideo=1&amp;([^"]+)"', webpage, 'parameters')
        title_date = self._search_regex(
            r'<div class="sendedatum"><p>Sendedatum:\s*([0-9\.]+)</p>',
            webpage, 'air date')
        title_str = self._html_search_regex(
            r'<h1>(.*?)</h1>', webpage, 'title')
        title = '%s - %s' % (title_date, title_str)
        upload_date = unified_strdate(
            self._html_search_meta('dc.date', webpage))
        fields = compat_parse_qs(param_code)
        video_url = fields['firstVideo'][0]
        thumbnail = compat_urlparse.urljoin(url, fields['startPicture'][0])
        formats = [{
            'format_id': 'rtmp',
            'url': video_url,
        }]
        jscode = self._download_webpage(
            'http://www.wdrmaus.de/codebase/js/extended-medien.min.js',
            video_id, fatal=False,
            note='Downloading URL translation table',
            errnote='Could not download URL translation table')
        if jscode:
            for m in re.finditer(
                    r"stream:\s*'dslSrc=(?P<stream>[^']+)',\s*download:\s*'(?P<dl>[^']+)'\s*\}",
                    jscode):
                if video_url.startswith(m.group('stream')):
                    http_url = video_url.replace(
                        m.group('stream'), m.group('dl'))
                    formats.append({
                        'format_id': 'http',
                        'url': http_url,
                    })
                    break
        self._sort_formats(formats)
        return {
            'id': video_id,
            'title': title,
            'formats': formats,
            'thumbnail': thumbnail,
            'upload_date': upload_date,
        }
 # TODO test _1
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@ -7,13 +7,13 @@ import itertools
 import json
 import os.path
 import re
 import string
 import struct
 import traceback
 import zlib
 from .common import InfoExtractor, SearchInfoExtractor
 from .subtitles import SubtitlesInfoExtractor
 from ..jsinterp import JSInterpreter
 from ..utils import (
    compat_chr,
    compat_parse_qs,
@ -438,113 +438,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
    def _parse_sig_js(self, jscode):
        funcname = self._search_regex(
            r'signature=([a-zA-Z]+)', jscode,
-            u'Initial JS player signature function name')
+             u'Initial JS player signature function name')
-        functions = {}
+        jsi = JSInterpreter(jscode)
-
+        initial_function = jsi.extract_function(funcname)
        def argidx(varname):
            return string.lowercase.index(varname)
        def interpret_statement(stmt, local_vars, allow_recursion=20):
            if allow_recursion < 0:
                raise ExtractorError(u'Recursion limit reached')
            if stmt.startswith(u'var '):
                stmt = stmt[len(u'var '):]
            ass_m = re.match(r'^(?P<out>[a-z]+)(?:\[(?P<index>[^\]]+)\])?' +
                             r'=(?P<expr>.*)$', stmt)
            if ass_m:
                if ass_m.groupdict().get('index'):
                    def assign(val):
                        lvar = local_vars[ass_m.group('out')]
                        idx = interpret_expression(ass_m.group('index'),
                                                   local_vars, allow_recursion)
                        assert isinstance(idx, int)
                        lvar[idx] = val
                        return val
                    expr = ass_m.group('expr')
                else:
                    def assign(val):
                        local_vars[ass_m.group('out')] = val
                        return val
                    expr = ass_m.group('expr')
            elif stmt.startswith(u'return '):
                assign = lambda v: v
                expr = stmt[len(u'return '):]
            else:
                raise ExtractorError(
                    u'Cannot determine left side of statement in %r' % stmt)
            v = interpret_expression(expr, local_vars, allow_recursion)
            return assign(v)
        def interpret_expression(expr, local_vars, allow_recursion):
            if expr.isdigit():
                return int(expr)
            if expr.isalpha():
                return local_vars[expr]
            m = re.match(r'^(?P<in>[a-z]+)\.(?P<member>.*)$', expr)
            if m:
                member = m.group('member')
                val = local_vars[m.group('in')]
                if member == 'split("")':
                    return list(val)
                if member == 'join("")':
                    return u''.join(val)
                if member == 'length':
                    return len(val)
                if member == 'reverse()':
                    return val[::-1]
                slice_m = re.match(r'slice\((?P<idx>.*)\)', member)
                if slice_m:
                    idx = interpret_expression(
                        slice_m.group('idx'), local_vars, allow_recursion-1)
                    return val[idx:]
            m = re.match(
                r'^(?P<in>[a-z]+)\[(?P<idx>.+)\]$', expr)
            if m:
                val = local_vars[m.group('in')]
                idx = interpret_expression(m.group('idx'), local_vars,
                                           allow_recursion-1)
                return val[idx]
            m = re.match(r'^(?P<a>.+?)(?P<op>[%])(?P<b>.+?)$', expr)
            if m:
                a = interpret_expression(m.group('a'),
                                         local_vars, allow_recursion)
                b = interpret_expression(m.group('b'),
                                         local_vars, allow_recursion)
                return a % b
            m = re.match(
                r'^(?P<func>[a-zA-Z$]+)\((?P<args>[a-z0-9,]+)\)$', expr)
            if m:
                fname = m.group('func')
                if fname not in functions:
                    functions[fname] = extract_function(fname)
                argvals = [int(v) if v.isdigit() else local_vars[v]
                           for v in m.group('args').split(',')]
                return functions[fname](argvals)
            raise ExtractorError(u'Unsupported JS expression %r' % expr)
        def extract_function(funcname):
            func_m = re.search(
                r'function ' + re.escape(funcname) +
                r'\((?P<args>[a-z,]+)\){(?P<code>[^}]+)}',
                jscode)
            argnames = func_m.group('args').split(',')
            def resf(args):
                local_vars = dict(zip(argnames, args))
                for stmt in func_m.group('code').split(';'):
                    res = interpret_statement(stmt, local_vars)
                return res
            return resf
        initial_function = extract_function(funcname)
        return lambda s: initial_function([s])
    def _parse_sig_swf(self, file_contents):
@ -1549,7 +1446,9 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
                break
            more = self._download_json(
-                'https://youtube.com/%s' % mobj.group('more'), playlist_id, 'Downloading page #%s' % page_num)
+                'https://youtube.com/%s' % mobj.group('more'), playlist_id,
                'Downloading page #%s' % page_num,
                transform_source=uppercase_escape)
            content_html = more['content_html']
            more_widget_html = more['load_more_widget_html']
@ -1712,7 +1611,7 @@ class YoutubeUserIE(InfoExtractor):
 class YoutubeSearchIE(SearchInfoExtractor):
    IE_DESC = u'YouTube.com searches'
-    _API_URL = 'https://gdata.youtube.com/feeds/api/videos?q=%s&start-index=%i&max-results=50&v=2&alt=jsonc'
+    _API_URL = u'https://gdata.youtube.com/feeds/api/videos?q=%s&start-index=%i&max-results=50&v=2&alt=jsonc'
    _MAX_RESULTS = 1000
    IE_NAME = u'youtube:search'
    _SEARCH_KEY = 'ytsearch'
@ -1723,9 +1622,12 @@ class YoutubeSearchIE(SearchInfoExtractor):
        video_ids = []
        pagenum = 0
        limit = n
        PAGE_SIZE = 50
-        while (50 * pagenum) < limit:
+        while (PAGE_SIZE * pagenum) < limit:
-            result_url = self._API_URL % (compat_urllib_parse.quote_plus(query), (50*pagenum)+1)
+            result_url = self._API_URL % (
                compat_urllib_parse.quote_plus(query.encode('utf-8')),
                (PAGE_SIZE * pagenum) + 1)
            data_json = self._download_webpage(
                result_url, video_id=u'query "%s"' % query,
                note=u'Downloading page %s' % (pagenum + 1),
@ -1836,11 +1738,10 @@ class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
        feed_entries = []
        paging = 0
        for i in itertools.count(1):
-            info = self._download_webpage(self._FEED_TEMPLATE % paging,
+            info = self._download_json(self._FEED_TEMPLATE % paging,
                                          u'%s feed' % self._FEED_NAME,
                                          u'Downloading page %s' % i)
-            info = json.loads(info)
+            feed_html = info.get('feed_html') or info.get('content_html')
            feed_html = info['feed_html']
            m_ids = re.finditer(r'"/watch\?v=(.*?)["&]', feed_html)
            ids = orderedSet(m.group(1) for m in m_ids)
            feed_entries.extend(
--- a/youtube_dl/jsinterp.py
+++ b/youtube_dl/jsinterp.py
@ -0,0 +1,116 @@
 from __future__ import unicode_literals
 import re
 from .utils import (
    ExtractorError,
 )
 class JSInterpreter(object):
    def __init__(self, code):
        self.code = code
        self._functions = {}
    def interpret_statement(self, stmt, local_vars, allow_recursion=20):
        if allow_recursion < 0:
            raise ExtractorError('Recursion limit reached')
        if stmt.startswith('var '):
            stmt = stmt[len('var '):]
        ass_m = re.match(r'^(?P<out>[a-z]+)(?:\[(?P<index>[^\]]+)\])?' +
                         r'=(?P<expr>.*)$', stmt)
        if ass_m:
            if ass_m.groupdict().get('index'):
                def assign(val):
                    lvar = local_vars[ass_m.group('out')]
                    idx = self.interpret_expression(
                        ass_m.group('index'), local_vars, allow_recursion)
                    assert isinstance(idx, int)
                    lvar[idx] = val
                    return val
                expr = ass_m.group('expr')
            else:
                def assign(val):
                    local_vars[ass_m.group('out')] = val
                    return val
                expr = ass_m.group('expr')
        elif stmt.startswith('return '):
            assign = lambda v: v
            expr = stmt[len('return '):]
        else:
            raise ExtractorError(
                'Cannot determine left side of statement in %r' % stmt)
        v = self.interpret_expression(expr, local_vars, allow_recursion)
        return assign(v)
    def interpret_expression(self, expr, local_vars, allow_recursion):
        if expr.isdigit():
            return int(expr)
        if expr.isalpha():
            return local_vars[expr]
        m = re.match(r'^(?P<in>[a-z]+)\.(?P<member>.*)$', expr)
        if m:
            member = m.group('member')
            val = local_vars[m.group('in')]
            if member == 'split("")':
                return list(val)
            if member == 'join("")':
                return u''.join(val)
            if member == 'length':
                return len(val)
            if member == 'reverse()':
                return val[::-1]
            slice_m = re.match(r'slice\((?P<idx>.*)\)', member)
            if slice_m:
                idx = self.interpret_expression(
                    slice_m.group('idx'), local_vars, allow_recursion - 1)
                return val[idx:]
        m = re.match(
            r'^(?P<in>[a-z]+)\[(?P<idx>.+)\]$', expr)
        if m:
            val = local_vars[m.group('in')]
            idx = self.interpret_expression(
                m.group('idx'), local_vars, allow_recursion - 1)
            return val[idx]
        m = re.match(r'^(?P<a>.+?)(?P<op>[%])(?P<b>.+?)$', expr)
        if m:
            a = self.interpret_expression(
                m.group('a'), local_vars, allow_recursion)
            b = self.interpret_expression(
                m.group('b'), local_vars, allow_recursion)
            return a % b
        m = re.match(
            r'^(?P<func>[a-zA-Z$]+)\((?P<args>[a-z0-9,]+)\)$', expr)
        if m:
            fname = m.group('func')
            if fname not in self._functions:
                self._functions[fname] = self.extract_function(fname)
            argvals = [int(v) if v.isdigit() else local_vars[v]
                       for v in m.group('args').split(',')]
            return self._functions[fname](argvals)
        raise ExtractorError('Unsupported JS expression %r' % expr)
    def extract_function(self, funcname):
        func_m = re.search(
            (r'(?:function %s|%s\s*=\s*function)' % (
                re.escape(funcname), re.escape(funcname))) +
            r'\((?P<args>[a-z,]+)\){(?P<code>[^}]+)}',
            self.code)
        if func_m is None:
            raise ExtractorError('Could not find JS function %r' % funcname)
        argnames = func_m.group('args').split(',')
        def resf(args):
            local_vars = dict(zip(argnames, args))
            for stmt in func_m.group('code').split(';'):
                res = self.interpret_statement(stmt, local_vars)
            return res
        return resf
--- a/youtube_dl/postprocessor/ffmpeg.py
+++ b/youtube_dl/postprocessor/ffmpeg.py
@ -55,8 +55,9 @@ class FFmpegPostProcessor(PostProcessor):
        if self._downloader.params.get('verbose', False):
            self._downloader.to_screen(u'[debug] ffmpeg command line: %s' % shell_quote(cmd))
-        p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+        bcmd = [self._downloader.encode(c) for c in cmd]
-        stdout,stderr = p.communicate()
+        p = subprocess.Popen(bcmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
        stdout, stderr = p.communicate()
        if p.returncode != 0:
            stderr = stderr.decode('utf-8', 'replace')
            msg = stderr.strip().split('\n')[-1]
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@ -539,7 +539,6 @@ def encodeFilename(s, for_subprocess=False):
        encoding = 'utf-8'
    return s.encode(encoding, 'ignore')
 def decodeOption(optval):
    if optval is None:
        return optval
@ -1269,8 +1268,8 @@ class PagedList(object):
 def uppercase_escape(s):
    return re.sub(
-        r'\\U([0-9a-fA-F]{8})',
+        r'\\U[0-9a-fA-F]{8}',
-        lambda m: compat_chr(int(m.group(1), base=16)), s)
+        lambda m: m.group(0).decode('unicode-escape'), s)
 try:
    struct.pack(u'!I', 0)
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@ -1,2 +1,2 @@
-__version__ = '2014.03.28'
+__version__ = '2014.04.02'
`@ -1,2 +1,2 @@`

	`__version__ = '2014.03.28'`	`__version__ = '2014.04.02'`