Merge branch 'master' into use-other-downloaders

2014-02-24 23:11:24 -03:00 · 2014-02-24 23:11:24 -03:00 · ab073c9568
commit ab073c9568
parent e69ab137bf cc3a3b6b47
34 changed files with 814 additions and 404 deletions
--- a/README.md
+++ b/README.md
@ -20,7 +20,7 @@ which means you can modify it, redistribute it or use it however you like.
                                     sure that you have sufficient permissions
                                     (run with sudo if needed)
    -i, --ignore-errors              continue on download errors, for example to
-                                     to skip unavailable videos in a playlist
+                                     skip unavailable videos in a playlist
    --abort-on-error                 Abort downloading of further videos (in the
                                     playlist or the command line) if an error
                                     occurs
@ -246,7 +246,7 @@ which means you can modify it, redistribute it or use it however you like.
 # CONFIGURATION
-You can configure youtube-dl by placing default arguments (such as `--extract-audio --no-mtime` to always extract the audio and not copy the mtime) into `/etc/youtube-dl.conf` and/or `~/.config/youtube-dl.conf`. On Windows, the configuration file locations are `%APPDATA%\youtube-dl\config.txt` and `C:\Users\<Yourname>\youtube-dl.conf`.
+You can configure youtube-dl by placing default arguments (such as `--extract-audio --no-mtime` to always extract the audio and not copy the mtime) into `/etc/youtube-dl.conf` and/or `~/.config/youtube-dl/config`. On Windows, the configuration file locations are `%APPDATA%\youtube-dl\config.txt` and `C:\Users\<Yourname>\youtube-dl.conf`.
 # OUTPUT TEMPLATE
@ -357,7 +357,7 @@ If you want to create a build of youtube-dl yourself, you'll need
 ### Adding support for a new site
-If you want to add support for a new site, copy *any* [recently modified](https://github.com/rg3/youtube-dl/commits/master/youtube_dl/extractor) file in `youtube_dl/extractor`, add an import in [`youtube_dl/extractor/__init__.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/__init__.py). Have a look at [`youtube_dl/common/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should return](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L38). Don't forget to run the tests with `python test/test_download.py Test_Download.test_YourExtractor`! For a detailed tutorial, refer to [this blog post](http://filippo.io/add-support-for-a-new-video-site-to-youtube-dl/).
+If you want to add support for a new site, copy *any* [recently modified](https://github.com/rg3/youtube-dl/commits/master/youtube_dl/extractor) file in `youtube_dl/extractor`, add an import in [`youtube_dl/extractor/__init__.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/__init__.py). Have a look at [`youtube_dl/common/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should return](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L38). Don't forget to run the tests with `python test/test_download.py TestDownload.test_YourExtractor`! For a detailed tutorial, refer to [this blog post](http://filippo.io/add-support-for-a-new-video-site-to-youtube-dl/).
 # BUGS
--- a/devscripts/release.sh
+++ b/devscripts/release.sh
@ -14,9 +14,9 @@
 set -e
-skip_tests=false
+skip_tests=true
-if [ "$1" = '--skip-test' ]; then
+if [ "$1" = '--run-tests' ]; then
-    skip_tests=true
+    skip_tests=false
    shift
 fi
--- a/test/test_download.py
+++ b/test/test_download.py
@ -18,6 +18,7 @@ from test.helper import (
 import hashlib
 import io
 import json
 import re
 import socket
 import youtube_dl.YoutubeDL
@ -137,12 +138,21 @@ def generator(test_case):
                with io.open(info_json_fn, encoding='utf-8') as infof:
                    info_dict = json.load(infof)
                for (info_field, expected) in tc.get('info_dict', {}).items():
-                    if isinstance(expected, compat_str) and expected.startswith('md5:'):
+                    if isinstance(expected, compat_str) and expected.startswith('re:'):
                        got = 'md5:' + md5(info_dict.get(info_field))
                    else:
                        got = info_dict.get(info_field)
-                    self.assertEqual(expected, got,
+                        match_str = expected[len('re:'):]
-                        u'invalid value for field %s, expected %r, got %r' % (info_field, expected, got))
+                        match_rex = re.compile(match_str)
                        self.assertTrue(
                            isinstance(got, compat_str) and match_rex.match(got),
                            u'field %s (value: %r) should match %r' % (info_field, got, match_str))
                    else:
                        if isinstance(expected, compat_str) and expected.startswith('md5:'):
                            got = 'md5:' + md5(info_dict.get(info_field))
                        else:
                            got = info_dict.get(info_field)
                        self.assertEqual(expected, got,
                            u'invalid value for field %s, expected %r, got %r' % (info_field, expected, got))
                # If checkable fields are missing from the test case, print the info_dict
                test_info_dict = dict((key, value if not isinstance(value, compat_str) or len(value) < 250 else 'md5:' + md5(value))
--- a/test/test_playlists.py
+++ b/test/test_playlists.py
@ -170,12 +170,12 @@ class TestPlaylists(unittest.TestCase):
    def test_AcademicEarthCourse(self):
        dl = FakeYDL()
        ie = AcademicEarthCourseIE(dl)
-        result = ie.extract('http://academicearth.org/courses/building-dynamic-websites/')
+        result = ie.extract('http://academicearth.org/playlists/laws-of-nature/')
        self.assertIsPlaylist(result)
-        self.assertEqual(result['id'], 'building-dynamic-websites')
+        self.assertEqual(result['id'], 'laws-of-nature')
-        self.assertEqual(result['title'], 'Building Dynamic Websites')
+        self.assertEqual(result['title'], 'Laws of Nature')
-        self.assertEqual(result['description'], u"Today's websites are increasingly dynamic. Pages are no longer static HTML files but instead generated by scripts and database calls. User interfaces are more seamless, with technologies like Ajax replacing traditional page reloads. This course teaches students how to build dynamic websites with Ajax and with Linux, Apache, MySQL, and PHP (LAMP), one of today's most popular frameworks. Students learn how to set up domain names with DNS, how to structure pages with XHTML and CSS, how to program in JavaScript and PHP, how to configure Apache and MySQL, how to design and query databases with SQL, how to use Ajax with both XML and JSON, and how to build mashups. The course explores issues of security, scalability, and cross-browser support and also discusses enterprise-level deployments of websites, including third-party hosting, virtualization, colocation in data centers, firewalling, and load-balancing.")
+        self.assertEqual(result['description'],u'Introduce yourself to the laws of nature with these free online college lectures from Yale, Harvard, and MIT.')# u"Today's websites are increasingly dynamic. Pages are no longer static HTML files but instead generated by scripts and database calls. User interfaces are more seamless, with technologies like Ajax replacing traditional page reloads. This course teaches students how to build dynamic websites with Ajax and with Linux, Apache, MySQL, and PHP (LAMP), one of today's most popular frameworks. Students learn how to set up domain names with DNS, how to structure pages with XHTML and CSS, how to program in JavaScript and PHP, how to configure Apache and MySQL, how to design and query databases with SQL, how to use Ajax with both XML and JSON, and how to build mashups. The course explores issues of security, scalability, and cross-browser support and also discusses enterprise-level deployments of websites, including third-party hosting, virtualization, colocation in data centers, firewalling, and load-balancing.")
-        self.assertEqual(len(result['entries']), 10)
+        self.assertEqual(len(result['entries']), 4)
    def test_ivi_compilation(self):
        dl = FakeYDL()
@ -250,5 +250,14 @@ class TestPlaylists(unittest.TestCase):
        self.assertEqual(result['title'], 'python language')
        self.assertTrue(len(result['entries']) == 15)
    def test_generic_rss_feed(self):
        dl = FakeYDL()
        ie = GenericIE(dl)
        result = ie.extract('http://www.escapistmagazine.com/rss/videos/list/1.xml')
        self.assertIsPlaylist(result)
        self.assertEqual(result['id'], 'http://www.escapistmagazine.com/rss/videos/list/1.xml')
        self.assertEqual(result['title'], 'Zero Punctuation')
        self.assertTrue(len(result['entries']) > 10)
 if __name__ == '__main__':
    unittest.main()
--- a/test/test_utils.py
+++ b/test/test_utils.py
@ -9,6 +9,7 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 # Various small unit tests
 import io
 import xml.etree.ElementTree
 #from youtube_dl.utils import htmlentity_transform
@ -21,6 +22,7 @@ from youtube_dl.utils import (
    orderedSet,
    PagedList,
    parse_duration,
    read_batch_urls,
    sanitize_filename,
    shell_quote,
    smuggle_url,
@ -250,5 +252,14 @@ class TestUtil(unittest.TestCase):
    def test_struct_unpack(self):
        self.assertEqual(struct_unpack(u'!B', b'\x00'), (0,))
    def test_read_batch_urls(self):
        f = io.StringIO(u'''\xef\xbb\xbf foo
            bar\r
            baz
            # More after this line\r
            ; or after this
            bam''')
        self.assertEqual(read_batch_urls(f), [u'foo', u'bar', u'baz', u'bam'])
 if __name__ == '__main__':
    unittest.main()
--- a/youtube_dl/init.py
+++ b/youtube_dl/init.py
@ -46,6 +46,7 @@ __authors__  = (
    'Andreas Schmitz',
    'Michael Kaiser',
    'Niklas Laxström',
    'David Triendl',
 )
 __license__ = 'Public Domain'
@ -70,6 +71,7 @@ from .utils import (
    get_cachedir,
    MaxDownloadsReached,
    preferredencoding,
    read_batch_urls,
    SameFileError,
    setproctitle,
    std_headers,
@ -208,7 +210,7 @@ def parseOpts(overrideArguments=None):
    general.add_option('-U', '--update',
            action='store_true', dest='update_self', help='update this program to latest version. Make sure that you have sufficient permissions (run with sudo if needed)')
    general.add_option('-i', '--ignore-errors',
-            action='store_true', dest='ignoreerrors', help='continue on download errors, for example to to skip unavailable videos in a playlist', default=False)
+            action='store_true', dest='ignoreerrors', help='continue on download errors, for example to skip unavailable videos in a playlist', default=False)
    general.add_option('--abort-on-error',
            action='store_false', dest='ignoreerrors',
            help='Abort downloading of further videos (in the playlist or the command line) if an error occurs')
@ -551,21 +553,19 @@ def _real_main(argv=None):
        sys.exit(0)
    # Batch file verification
-    batchurls = []
+    batch_urls = []
    if opts.batchfile is not None:
        try:
            if opts.batchfile == '-':
                batchfd = sys.stdin
            else:
-                batchfd = open(opts.batchfile, 'r')
+                batchfd = io.open(opts.batchfile, 'r', encoding='utf-8', errors='ignore')
-            batchurls = batchfd.readlines()
+            batch_urls = read_batch_urls(batchfd)
            batchurls = [x.strip() for x in batchurls]
            batchurls = [x for x in batchurls if len(x) > 0 and not re.search(r'^[#/;]', x)]
            if opts.verbose:
-                write_string(u'[debug] Batch file urls: ' + repr(batchurls) + u'\n')
+                write_string(u'[debug] Batch file urls: ' + repr(batch_urls) + u'\n')
        except IOError:
            sys.exit(u'ERROR: batch file could not be read')
-    all_urls = batchurls + args
+    all_urls = batch_urls + args
    all_urls = [url.strip() for url in all_urls]
    _enc = preferredencoding()
    all_urls = [url.decode(_enc, 'ignore') if isinstance(url, bytes) else url for url in all_urls]
--- a/youtube_dl/downloader/f4m.py
+++ b/youtube_dl/downloader/f4m.py
@ -12,7 +12,6 @@ from .http import HttpFD
 from ..utils import (
    struct_pack,
    struct_unpack,
    compat_urllib_request,
    compat_urlparse,
    format_bytes,
    encodeFilename,
@ -117,8 +116,8 @@ class FlvReader(io.BytesIO):
        self.read_unsigned_char()
        # flags
        self.read(3)
-        # BootstrapinfoVersion
+
-        bootstrap_info_version = self.read_unsigned_int()
+        self.read_unsigned_int()  # BootstrapinfoVersion
        # Profile,Live,Update,Reserved
        self.read(1)
        # time scale
@ -127,15 +126,15 @@ class FlvReader(io.BytesIO):
        self.read_unsigned_long_long()
        # SmpteTimeCodeOffset
        self.read_unsigned_long_long()
-        # MovieIdentifier
+
-        movie_identifier = self.read_string()
+        self.read_string()  # MovieIdentifier
        server_count = self.read_unsigned_char()
        # ServerEntryTable
        for i in range(server_count):
            self.read_string()
        quality_count = self.read_unsigned_char()
        # QualityEntryTable
-        for i in range(server_count):
+        for i in range(quality_count):
            self.read_string()
        # DrmData
        self.read_string()
--- a/youtube_dl/extractor/init.py
+++ b/youtube_dl/extractor/init.py
@ -19,6 +19,7 @@ from .bbccouk import BBCCoUkIE
 from .blinkx import BlinkxIE
 from .bliptv import BlipTVIE, BlipTVUserIE
 from .bloomberg import BloombergIE
 from .br import BRIE
 from .breakcom import BreakIE
 from .brightcove import BrightcoveIE
 from .c56 import C56IE
@ -160,7 +161,7 @@ from .nhl import NHLIE, NHLVideocenterIE
 from .niconico import NiconicoIE
 from .ninegag import NineGagIE
 from .normalboots import NormalbootsIE
-from .novamov import NovamovIE
+from .novamov import NovaMovIE
 from .nowness import NownessIE
 from .nowvideo import NowVideoIE
 from .ooyala import OoyalaIE
@ -186,6 +187,7 @@ from .rutube import (
    RutubeMovieIE,
    RutubePersonIE,
 )
 from .savefrom import SaveFromIE
 from .servingsys import ServingSysIE
 from .sina import SinaIE
 from .slashdot import SlashdotIE
@ -224,6 +226,7 @@ from .tinypic import TinyPicIE
 from .toutv import TouTvIE
 from .traileraddict import TrailerAddictIE
 from .trilulilu import TriluliluIE
 from .trutube import TruTubeIE
 from .tube8 import Tube8IE
 from .tudou import TudouIE
 from .tumblr import TumblrIE
@ -238,6 +241,7 @@ from .vesti import VestiIE
 from .vevo import VevoIE
 from .vice import ViceIE
 from .viddler import ViddlerIE
 from .videobam import VideoBamIE
 from .videodetective import VideoDetectiveIE
 from .videofyme import VideofyMeIE
 from .videopremium import VideoPremiumIE
--- a/youtube_dl/extractor/academicearth.py
+++ b/youtube_dl/extractor/academicearth.py
@ -5,7 +5,7 @@ from .common import InfoExtractor
 class AcademicEarthCourseIE(InfoExtractor):
-    _VALID_URL = r'^https?://(?:www\.)?academicearth\.org/(?:courses|playlists)/(?P<id>[^?#/]+)'
+    _VALID_URL = r'^https?://(?:www\.)?academicearth\.org/playlists/(?P<id>[^?#/]+)'
    IE_NAME = 'AcademicEarth:Course'
    def _real_extract(self, url):
@ -14,12 +14,12 @@ class AcademicEarthCourseIE(InfoExtractor):
        webpage = self._download_webpage(url, playlist_id)
        title = self._html_search_regex(
-            r'<h1 class="playlist-name">(.*?)</h1>', webpage, u'title')
+            r'<h1 class="playlist-name"[^>]*?>(.*?)</h1>', webpage, u'title')
        description = self._html_search_regex(
-            r'<p class="excerpt">(.*?)</p>',
+            r'<p class="excerpt"[^>]*?>(.*?)</p>',
            webpage, u'description', fatal=False)
        urls = re.findall(
-            r'<h3 class="lecture-title"><a target="_blank" href="([^"]+)">',
+            r'<li class="lecture-preview">\s*?<a target="_blank" href="([^"]+)">',
            webpage)
        entries = [self.url_result(u) for u in urls]
--- a/youtube_dl/extractor/bbccouk.py
+++ b/youtube_dl/extractor/bbccouk.py
@ -162,6 +162,11 @@ class BBCCoUkIE(SubtitlesInfoExtractor):
        mobj = re.match(self._VALID_URL, url)
        group_id = mobj.group('id')
        webpage = self._download_webpage(url, group_id, 'Downloading video page')
        if re.search(r'id="emp-error" class="notinuk">', webpage):
            raise ExtractorError('Currently BBC iPlayer TV programmes are available to play in the UK only',
                expected=True)
        playlist = self._download_xml('http://www.bbc.co.uk/iplayer/playlist/%s' % group_id, group_id,
            'Downloading playlist XML')
--- a/youtube_dl/extractor/br.py
+++ b/youtube_dl/extractor/br.py
@ -0,0 +1,80 @@
 # coding: utf-8
 from __future__ import unicode_literals
 import re
 from .common import InfoExtractor
 from ..utils import ExtractorError
 class BRIE(InfoExtractor):
    IE_DESC = "Bayerischer Rundfunk Mediathek"
    _VALID_URL = r"^https?://(?:www\.)?br\.de/mediathek/video/(?:sendungen/)?(?P<id>[a-z0-9\-]+)\.html$"
    _BASE_URL = "http://www.br.de"
    _TEST = {
        "url": "http://www.br.de/mediathek/video/anselm-gruen-114.html",
        "md5": "c4f83cf0f023ba5875aba0bf46860df2",
        "info_dict": {
            "id": "2c8d81c5-6fb7-4a74-88d4-e768e5856532",
            "ext": "mp4",
            "title": "Feiern und Verzichten",
            "description": "Anselm Grün: Feiern und Verzichten",
            "uploader": "BR/Birgit Baier",
            "upload_date": "20140301"
        }
    }
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        display_id = mobj.group('id')
        page = self._download_webpage(url, display_id)
        xml_url = self._search_regex(
            r"return BRavFramework\.register\(BRavFramework\('avPlayer_(?:[a-f0-9-]{36})'\)\.setup\({dataURL:'(/mediathek/video/[a-z0-9/~_.-]+)'}\)\);", page, "XMLURL")
        xml = self._download_xml(self._BASE_URL + xml_url, None)
        videos = [{
            "id": xml_video.get("externalId"),
            "title": xml_video.find("title").text,
            "formats": self._extract_formats(xml_video.find("assets")),
            "thumbnails": self._extract_thumbnails(xml_video.find("teaserImage/variants")),
            "description": " ".join(xml_video.find("shareTitle").text.splitlines()),
            "uploader": xml_video.find("author").text,
            "upload_date": "".join(reversed(xml_video.find("broadcastDate").text.split("."))),
            "webpage_url": xml_video.find("permalink").text,
        } for xml_video in xml.findall("video")]
        if len(videos) > 1:
            self._downloader.report_warning(
                'found multiple videos; please '
                'report this with the video URL to http://yt-dl.org/bug')
        if not videos:
            raise ExtractorError('No video entries found')
        return videos[0]
    def _extract_formats(self, assets):
        formats = [{
            "url": asset.find("downloadUrl").text,
            "ext": asset.find("mediaType").text,
            "format_id": asset.get("type"),
            "width": int(asset.find("frameWidth").text),
            "height": int(asset.find("frameHeight").text),
            "tbr": int(asset.find("bitrateVideo").text),
            "abr": int(asset.find("bitrateAudio").text),
            "vcodec": asset.find("codecVideo").text,
            "container": asset.find("mediaType").text,
            "filesize": int(asset.find("size").text),
        } for asset in assets.findall("asset")
            if asset.find("downloadUrl") is not None]
        self._sort_formats(formats)
        return formats
    def _extract_thumbnails(self, variants):
        thumbnails = [{
            "url": self._BASE_URL + variant.find("url").text,
            "width": int(variant.find("width").text),
            "height": int(variant.find("height").text),
        } for variant in variants.findall("variant")]
        thumbnails.sort(key=lambda x: x["width"] * x["height"], reverse=True)
        return thumbnails
--- a/youtube_dl/extractor/breakcom.py
+++ b/youtube_dl/extractor/breakcom.py
@ -23,8 +23,8 @@ class BreakIE(InfoExtractor):
        video_id = mobj.group(1).split("-")[-1]
        embed_url = 'http://www.break.com/embed/%s' % video_id
        webpage = self._download_webpage(embed_url, video_id)
-        info_json = self._search_regex(r'var embedVars = ({.*?});', webpage,
+        info_json = self._search_regex(r'var embedVars = ({.*})\s*?</script>',
-                                       'info json', flags=re.DOTALL)
+            webpage, 'info json', flags=re.DOTALL)
        info = json.loads(info_json)
        video_url = info['videoUri']
        m_youtube = re.search(r'(https?://www\.youtube\.com/watch\?v=.*)', video_url)
--- a/youtube_dl/extractor/canalc2.py
+++ b/youtube_dl/extractor/canalc2.py
@ -1,4 +1,6 @@
 # coding: utf-8
 from __future__ import unicode_literals
 import re
 from .common import InfoExtractor
@ -9,11 +11,12 @@ class Canalc2IE(InfoExtractor):
    _VALID_URL = r'http://.*?\.canalc2\.tv/video\.asp\?.*?idVideo=(?P<id>\d+)'
    _TEST = {
-        u'url': u'http://www.canalc2.tv/video.asp?idVideo=12163&voir=oui',
+        'url': 'http://www.canalc2.tv/video.asp?idVideo=12163&voir=oui',
-        u'file': u'12163.mp4',
+        'md5': '060158428b650f896c542dfbb3d6487f',
-        u'md5': u'060158428b650f896c542dfbb3d6487f',
+        'info_dict': {
-        u'info_dict': {
+            'id': '12163',
-            u'title': u'Terrasses du Numérique'
+            'ext': 'mp4',
            'title': 'Terrasses du Numérique'
        }
    }
@ -28,10 +31,11 @@ class Canalc2IE(InfoExtractor):
        video_url = 'http://vod-flash.u-strasbg.fr:8080/' + file_name
        title = self._html_search_regex(
-            r'class="evenement8">(.*?)</a>', webpage, u'title')
+            r'class="evenement8">(.*?)</a>', webpage, 'title')
-        
+
-        return {'id': video_id,
+        return {
-                'ext': 'mp4',
+            'id': video_id,
-                'url': video_url,
+            'ext': 'mp4',
-                'title': title,
+            'url': video_url,
-                }
+            'title': title,
        }
--- a/youtube_dl/extractor/cinemassacre.py
+++ b/youtube_dl/extractor/cinemassacre.py
@ -1,4 +1,5 @@
 # encoding: utf-8
 from __future__ import unicode_literals
 import re
 from .common import InfoExtractor
@ -8,73 +9,63 @@ from ..utils import (
 class CinemassacreIE(InfoExtractor):
-    _VALID_URL = r'(?:http://)?(?:www\.)?(?P<url>cinemassacre\.com/(?P<date_Y>[0-9]{4})/(?P<date_m>[0-9]{2})/(?P<date_d>[0-9]{2})/.+?)(?:[/?].*)?'
+    _VALID_URL = r'http://(?:www\.)?cinemassacre\.com/(?P<date_Y>[0-9]{4})/(?P<date_m>[0-9]{2})/(?P<date_d>[0-9]{2})/.+?'
-    _TESTS = [{
+    _TESTS = [
-        u'url': u'http://cinemassacre.com/2012/11/10/avgn-the-movie-trailer/',
+        {
-        u'file': u'19911.flv',
+            'url': 'http://cinemassacre.com/2012/11/10/avgn-the-movie-trailer/',
-        u'info_dict': {
+            'file': '19911.mp4',
-            u'upload_date': u'20121110',
+            'md5': 'fde81fbafaee331785f58cd6c0d46190',
-            u'title': u'“Angry Video Game Nerd: The Movie” – Trailer',
+            'info_dict': {
-            u'description': u'md5:fb87405fcb42a331742a0dce2708560b',
+                'upload_date': '20121110',
                'title': '“Angry Video Game Nerd: The Movie” – Trailer',
                'description': 'md5:fb87405fcb42a331742a0dce2708560b',
            },
        },
-        u'params': {
+        {
-            # rtmp download
+            'url': 'http://cinemassacre.com/2013/10/02/the-mummys-hand-1940',
-            u'skip_download': True,
+            'file': '521be8ef82b16.mp4',
-        },
+            'md5': 'd72f10cd39eac4215048f62ab477a511',
-    },
+            'info_dict': {
-    {
+                'upload_date': '20131002',
-        u'url': u'http://cinemassacre.com/2013/10/02/the-mummys-hand-1940',
+                'title': 'The Mummy’s Hand (1940)',
-        u'file': u'521be8ef82b16.flv',
+            },
-        u'info_dict': {
+        }
-            u'upload_date': u'20131002',
+    ]
            u'title': u'The Mummy’s Hand (1940)',
        },
        u'params': {
            # rtmp download
            u'skip_download': True,
        },
    }]
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
-        webpage_url = u'http://' + mobj.group('url')
+        webpage = self._download_webpage(url, None)  # Don't know video id yet
        webpage = self._download_webpage(webpage_url, None) # Don't know video id yet
        video_date = mobj.group('date_Y') + mobj.group('date_m') + mobj.group('date_d')
        mobj = re.search(r'src="(?P<embed_url>http://player\.screenwavemedia\.com/play/[a-zA-Z]+\.php\?id=(?:Cinemassacre-)?(?P<video_id>.+?))"', webpage)
        if not mobj:
-            raise ExtractorError(u'Can\'t extract embed url and video id')
+            raise ExtractorError('Can\'t extract embed url and video id')
-        playerdata_url = mobj.group(u'embed_url')
+        playerdata_url = mobj.group('embed_url')
-        video_id = mobj.group(u'video_id')
+        video_id = mobj.group('video_id')
        video_title = self._html_search_regex(r'<title>(?P<title>.+?)\|',
-            webpage, u'title')
+            webpage, 'title')
        video_description = self._html_search_regex(r'<div class="entry-content">(?P<description>.+?)</div>',
-            webpage, u'description', flags=re.DOTALL, fatal=False)
+            webpage, 'description', flags=re.DOTALL, fatal=False)
        if len(video_description) == 0:
            video_description = None
        playerdata = self._download_webpage(playerdata_url, video_id)
        url = self._html_search_regex(r'\'streamer\': \'(?P<url>[^\']+)\'', playerdata, u'url')
-        sd_file = self._html_search_regex(r'\'file\': \'(?P<sd_file>[^\']+)\'', playerdata, u'sd_file')
+        sd_url = self._html_search_regex(r'file: \'(?P<sd_file>[^\']+)\', label: \'SD\'', playerdata, 'sd_file')
-        hd_file = self._html_search_regex(r'\'?file\'?: "(?P<hd_file>[^"]+)"', playerdata, u'hd_file')
+        hd_url = self._html_search_regex(r'file: \'(?P<hd_file>[^\']+)\', label: \'HD\'', playerdata, 'hd_file')
-        video_thumbnail = self._html_search_regex(r'\'image\': \'(?P<thumbnail>[^\']+)\'', playerdata, u'thumbnail', fatal=False)
+        video_thumbnail = self._html_search_regex(r'image: \'(?P<thumbnail>[^\']+)\'', playerdata, 'thumbnail', fatal=False)
        formats = [
            {
-                'url': url,
+                'url': sd_url,
-                'play_path': 'mp4:' + sd_file,
+                'ext': 'mp4',
                'rtmp_live': True, # workaround
                'ext': 'flv',
                'format': 'sd',
                'format_id': 'sd',
            },
            {
-                'url': url,
+                'url': hd_url,
-                'play_path': 'mp4:' + hd_file,
+                'ext': 'mp4',
                'rtmp_live': True, # workaround
                'ext': 'flv',
                'format': 'hd',
                'format_id': 'hd',
            },
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@ -4,6 +4,7 @@ from __future__ import unicode_literals
 import os
 import re
 import xml.etree.ElementTree
 from .common import InfoExtractor
 from .youtube import YoutubeIE
@ -12,6 +13,7 @@ from ..utils import (
    compat_urllib_parse,
    compat_urllib_request,
    compat_urlparse,
    compat_xml_parse_error,
    ExtractorError,
    HEADRequest,
@ -159,6 +161,25 @@ class GenericIE(InfoExtractor):
            raise ExtractorError('Invalid URL protocol')
        return response
    def _extract_rss(self, url, video_id, doc):
        playlist_title = doc.find('./channel/title').text
        playlist_desc_el = doc.find('./channel/description')
        playlist_desc = None if playlist_desc_el is None else playlist_desc_el.text
        entries = [{
            '_type': 'url',
            'url': e.find('link').text,
            'title': e.find('title').text,
        } for e in doc.findall('./channel/item')]
        return {
            '_type': 'playlist',
            'id': url,
            'title': playlist_title,
            'description': playlist_desc,
            'entries': entries,
        }
    def _real_extract(self, url):
        parsed_url = compat_urlparse.urlparse(url)
        if not parsed_url.scheme:
@ -219,6 +240,14 @@ class GenericIE(InfoExtractor):
        self.report_extraction(video_id)
        # Is it an RSS feed?
        try:
            doc = xml.etree.ElementTree.fromstring(webpage.encode('utf-8'))
            if doc.tag == 'rss':
                return self._extract_rss(url, video_id, doc)
        except compat_xml_parse_error:
            pass
        # it's tempting to parse this further, but you would
        # have to take into account all the variations like
        #   Video Title - Site Name
@ -334,11 +363,17 @@ class GenericIE(InfoExtractor):
        if mobj is not None:
            return self.url_result(mobj.group(1), 'Mpora')
-        # Look for embedded Novamov player
+        # Look for embedded NovaMov player
        mobj = re.search(
            r'<iframe[^>]+?src=(["\'])(?P<url>http://(?:(?:embed|www)\.)?novamov\.com/embed\.php.+?)\1', webpage)
        if mobj is not None:
-            return self.url_result(mobj.group('url'), 'Novamov')
+            return self.url_result(mobj.group('url'), 'NovaMov')
        # Look for embedded NowVideo player
        mobj = re.search(
            r'<iframe[^>]+?src=(["\'])(?P<url>http://(?:(?:embed|www)\.)?nowvideo\.(?:ch|sx|eu)/embed\.php.+?)\1', webpage)
        if mobj is not None:
            return self.url_result(mobj.group('url'), 'NowVideo')
        # Look for embedded Facebook player
        mobj = re.search(
--- a/youtube_dl/extractor/iprima.py
+++ b/youtube_dl/extractor/iprima.py
@ -10,7 +10,7 @@ from ..utils import compat_urllib_request
 class IPrimaIE(InfoExtractor):
-    _VALID_URL = r'https?://play\.iprima\.cz/(?P<videogroup>.+)/(?P<videoid>.+)'
+    _VALID_URL = r'https?://play\.iprima\.cz/[^?#]+/(?P<id>[^?#]+)'
    _TESTS = [{
        'url': 'http://play.iprima.cz/particka/particka-92',
@ -22,20 +22,32 @@ class IPrimaIE(InfoExtractor):
            'thumbnail': 'http://play.iprima.cz/sites/default/files/image_crops/image_620x349/3/491483_particka-92_image_620x349.jpg',
        },
        'params': {
-            'skip_download': True,
+            'skip_download': True,  # requires rtmpdump
        },
-    },
+    }, {
-    ]
+        'url': 'http://play.iprima.cz/particka/tchibo-particka-jarni-moda',
        'info_dict': {
            'id': '9718337',
            'ext': 'flv',
            'title': 'Tchibo Partička - Jarní móda',
            'description': 'md5:589f8f59f414220621ff8882eb3ce7be',
            'thumbnail': 're:^http:.*\.jpg$',
        },
        'params': {
            'skip_download': True,  # requires rtmpdump
        },
    }]
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('videoid')
+        video_id = mobj.group('id')
        webpage = self._download_webpage(url, video_id)
-        player_url = 'http://embed.livebox.cz/iprimaplay/player-embed-v2.js?__tok%s__=%s' % (
+        player_url = (
-                         floor(random()*1073741824),
+            'http://embed.livebox.cz/iprimaplay/player-embed-v2.js?__tok%s__=%s' %
-                         floor(random()*1073741824))
+            (floor(random()*1073741824), floor(random()*1073741824))
        )
        req = compat_urllib_request.Request(player_url)
        req.add_header('Referer', url)
@ -44,18 +56,20 @@ class IPrimaIE(InfoExtractor):
        base_url = ''.join(re.findall(r"embed\['stream'\] = '(.+?)'.+'(\?auth=)'.+'(.+?)';", playerpage)[1])
        zoneGEO = self._html_search_regex(r'"zoneGEO":(.+?),', webpage, 'zoneGEO')
        if zoneGEO != '0':
-            base_url = base_url.replace('token', 'token_'+zoneGEO)
+            base_url = base_url.replace('token', 'token_' + zoneGEO)
        formats = []
        for format_id in ['lq', 'hq', 'hd']:
-            filename = self._html_search_regex(r'"%s_id":(.+?),' % format_id, webpage, 'filename')
+            filename = self._html_search_regex(
                r'"%s_id":(.+?),' % format_id, webpage, 'filename')
            if filename == 'null':
                continue
-            real_id = self._search_regex(r'Prima-[0-9]{10}-([0-9]+)_', filename, 'real video id')
+            real_id = self._search_regex(
                r'Prima-(?:[0-9]{10}|WEB)-([0-9]+)[-_]',
                filename, 'real video id')
            if format_id == 'lq':
                quality = 0
@ -63,13 +77,13 @@ class IPrimaIE(InfoExtractor):
                quality = 1
            elif format_id == 'hd':
                quality = 2
-                filename = 'hq/'+filename
+                filename = 'hq/' + filename
            formats.append({
                'format_id': format_id,
                'url': base_url,
                'quality': quality,
-                'play_path': 'mp4:'+filename.replace('"', '')[:-4],
+                'play_path': 'mp4:' + filename.replace('"', '')[:-4],
                'rtmp_live': True,
                'ext': 'flv',
            })
--- a/youtube_dl/extractor/nbc.py
+++ b/youtube_dl/extractor/nbc.py
@ -1,3 +1,5 @@
 from __future__ import unicode_literals
 import re
 from .common import InfoExtractor
@ -8,12 +10,13 @@ class NBCNewsIE(InfoExtractor):
    _VALID_URL = r'https?://www\.nbcnews\.com/video/.+?/(?P<id>\d+)'
    _TEST = {
-        u'url': u'http://www.nbcnews.com/video/nbc-news/52753292',
+        'url': 'http://www.nbcnews.com/video/nbc-news/52753292',
-        u'file': u'52753292.flv',
+        'md5': '47abaac93c6eaf9ad37ee6c4463a5179',
-        u'md5': u'47abaac93c6eaf9ad37ee6c4463a5179',
+        'info_dict': {
-        u'info_dict': {
+            'id': '52753292',
-            u'title': u'Crew emerges after four-month Mars food study',
+            'ext': 'flv',
-            u'description': u'md5:24e632ffac72b35f8b67a12d1b6ddfc1',
+            'title': 'Crew emerges after four-month Mars food study',
            'description': 'md5:24e632ffac72b35f8b67a12d1b6ddfc1',
        },
    }
@ -23,10 +26,11 @@ class NBCNewsIE(InfoExtractor):
        all_info = self._download_xml('http://www.nbcnews.com/id/%s/displaymode/1219' % video_id, video_id)
        info = all_info.find('video')
-        return {'id': video_id,
+        return {
-                'title': info.find('headline').text,
+            'id': video_id,
-                'ext': 'flv',
+            'title': info.find('headline').text,
-                'url': find_xpath_attr(info, 'media', 'type', 'flashVideo').text,
+            'ext': 'flv',
-                'description': compat_str(info.find('caption').text),
+            'url': find_xpath_attr(info, 'media', 'type', 'flashVideo').text,
-                'thumbnail': find_xpath_attr(info, 'media', 'type', 'thumbnail').text,
+            'description': compat_str(info.find('caption').text),
-                }
+            'thumbnail': find_xpath_attr(info, 'media', 'type', 'thumbnail').text,
        }
--- a/youtube_dl/extractor/normalboots.py
+++ b/youtube_dl/extractor/normalboots.py
@ -1,61 +1,51 @@
 # encoding: utf-8
 from __future__ import unicode_literals
 import re
 from .common import InfoExtractor
 from ..utils import (
    ExtractorError,
    unified_strdate,
 )
 class NormalbootsIE(InfoExtractor):
-    _VALID_URL = r'(?:http://)?(?:www\.)?normalboots\.com/video/(?P<videoid>[0-9a-z-]*)/?$'
+    _VALID_URL = r'http://(?:www\.)?normalboots\.com/video/(?P<videoid>[0-9a-z-]*)/?$'
    _TEST = {
-        u'url': u'http://normalboots.com/video/home-alone-games-jontron/',
+        'url': 'http://normalboots.com/video/home-alone-games-jontron/',
-        u'file': u'home-alone-games-jontron.mp4',
+        'md5': '8bf6de238915dd501105b44ef5f1e0f6',
-        u'md5': u'8bf6de238915dd501105b44ef5f1e0f6',
+        'info_dict': {
-        u'info_dict': {
+            'id': 'home-alone-games-jontron',
-            u'title': u'Home Alone Games - JonTron - NormalBoots',
+            'ext': 'mp4',
-            u'description': u'Jon is late for Christmas. Typical. Thanks to: Paul Ritchey for Co-Writing/Filming: http://www.youtube.com/user/ContinueShow Michael Azzi for Christmas Intro Animation: http://michafrar.tumblr.com/ Jerrod Waters for Christmas Intro Music: http://www.youtube.com/user/xXJerryTerryXx Casey Ormond for \u2018Tense Battle Theme\u2019:\xa0http://www.youtube.com/Kiamet/',
+            'title': 'Home Alone Games - JonTron - NormalBoots',
-            u'uploader': u'JonTron',
+            'description': 'Jon is late for Christmas. Typical. Thanks to: Paul Ritchey for Co-Writing/Filming: http://www.youtube.com/user/ContinueShow Michael Azzi for Christmas Intro Animation: http://michafrar.tumblr.com/ Jerrod Waters for Christmas Intro Music: http://www.youtube.com/user/xXJerryTerryXx Casey Ormond for ‘Tense Battle Theme’:\xa0http://www.youtube.com/Kiamet/',
-            u'upload_date': u'20140125',
+            'uploader': 'JonTron',
            'upload_date': '20140125',
        }
    }
-    
+
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        if mobj is None:
            raise ExtractorError(u'Invalid URL: %s' % url)
        video_id = mobj.group('videoid')
-        
+
        info = {
            'id': video_id,
            'uploader': None,
            'upload_date': None,
        }
        if url[:4] != 'http':
            url = 'http://' + url
        webpage = self._download_webpage(url, video_id)
        video_title = self._og_search_title(webpage)
        video_description = self._og_search_description(webpage)
        video_thumbnail = self._og_search_thumbnail(webpage)
        video_uploader = self._html_search_regex(r'Posted\sby\s<a\shref="[A-Za-z0-9/]*">(?P<uploader>[A-Za-z]*)\s</a>',
            webpage, 'uploader')
-        raw_upload_date = self._html_search_regex('<span style="text-transform:uppercase; font-size:inherit;">[A-Za-z]+, (?P<date>.*)</span>', 
+        raw_upload_date = self._html_search_regex('<span style="text-transform:uppercase; font-size:inherit;">[A-Za-z]+, (?P<date>.*)</span>',
            webpage, 'date')
        video_upload_date = unified_strdate(raw_upload_date)
-        video_upload_date = unified_strdate(raw_upload_date)
+
        player_url = self._html_search_regex(r'<iframe\swidth="[0-9]+"\sheight="[0-9]+"\ssrc="(?P<url>[\S]+)"', webpage, 'url')
        player_page = self._download_webpage(player_url, video_id)
-        video_url = u'http://player.screenwavemedia.com/' + self._html_search_regex(r"'file':\s'(?P<file>[0-9A-Za-z-_\.]+)'", player_page, 'file')
+        video_url = self._html_search_regex(r"file:\s'(?P<file>[^']+\.mp4)'", player_page, 'file')
-        
+
-        info['url'] = video_url
+        return {
-        info['title'] = video_title
+            'id': video_id,
-        info['description'] = video_description
+            'url': video_url,
-        info['thumbnail'] = video_thumbnail
+            'title': self._og_search_title(webpage),
-        info['uploader'] = video_uploader
+            'description': self._og_search_description(webpage),
-        info['upload_date'] = video_upload_date
+            'thumbnail': self._og_search_thumbnail(webpage),
-        
+            'uploader': video_uploader,
-        return info
+            'upload_date': video_upload_date,
        }
--- a/youtube_dl/extractor/novamov.py
+++ b/youtube_dl/extractor/novamov.py
@ -9,14 +9,25 @@ from ..utils import (
 )
-class NovamovIE(InfoExtractor):
+class NovaMovIE(InfoExtractor):
-    _VALID_URL = r'http://(?:(?:www\.)?novamov\.com/video/|(?:(?:embed|www)\.)novamov\.com/embed\.php\?v=)(?P<videoid>[a-z\d]{13})'
+    IE_NAME = 'novamov'
    IE_DESC = 'NovaMov'
    _VALID_URL = r'http://(?:(?:www\.)?%(host)s/video/|(?:(?:embed|www)\.)%(host)s/embed\.php\?(?:.*?&)?v=)(?P<videoid>[a-z\d]{13})' % {'host': 'novamov\.com'}
    _HOST = 'www.novamov.com'
    _FILE_DELETED_REGEX = r'This file no longer exists on our servers!</h2>'
    _FILEKEY_REGEX = r'flashvars\.filekey="(?P<filekey>[^"]+)";'
    _TITLE_REGEX = r'(?s)<div class="v_tab blockborder rounded5" id="v_tab1">\s*<h3>([^<]+)</h3>'
    _DESCRIPTION_REGEX = r'(?s)<div class="v_tab blockborder rounded5" id="v_tab1">\s*<h3>[^<]+</h3><p>([^<]+)</p>'
    _TEST = {
        'url': 'http://www.novamov.com/video/4rurhn9x446jj',
        'file': '4rurhn9x446jj.flv',
        'md5': '7205f346a52bbeba427603ba10d4b935',
        'info_dict': {
            'id': '4rurhn9x446jj',
            'ext': 'flv',
            'title': 'search engine optimization',
            'description': 'search engine optimization is used to rank the web page in the google search engine'
        },
@ -27,31 +38,26 @@ class NovamovIE(InfoExtractor):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group('videoid')
-        page = self._download_webpage('http://www.novamov.com/video/%s' % video_id,
+        page = self._download_webpage(
-                                      video_id, 'Downloading video page')
+            'http://%s/video/%s' % (self._HOST, video_id), video_id, 'Downloading video page')
-        if re.search(r'This file no longer exists on our servers!</h2>', page) is not None:
+        if re.search(self._FILE_DELETED_REGEX, page) is not None:
            raise ExtractorError(u'Video %s does not exist' % video_id, expected=True)
-        filekey = self._search_regex(
+        filekey = self._search_regex(self._FILEKEY_REGEX, page, 'filekey')
            r'flashvars\.filekey="(?P<filekey>[^"]+)";', page, 'filekey')
-        title = self._html_search_regex(
+        title = self._html_search_regex(self._TITLE_REGEX, page, 'title', fatal=False)
            r'(?s)<div class="v_tab blockborder rounded5" id="v_tab1">\s*<h3>([^<]+)</h3>',
            page, 'title', fatal=False)
-        description = self._html_search_regex(
+        description = self._html_search_regex(self._DESCRIPTION_REGEX, page, 'description', default='', fatal=False)
            r'(?s)<div class="v_tab blockborder rounded5" id="v_tab1">\s*<h3>[^<]+</h3><p>([^<]+)</p>',
            page, 'description', fatal=False)
        api_response = self._download_webpage(
-            'http://www.novamov.com/api/player.api.php?key=%s&file=%s' % (filekey, video_id),
+            'http://%s/api/player.api.php?key=%s&file=%s' % (self._HOST, filekey, video_id), video_id,
-            video_id, 'Downloading video api response')
+            'Downloading video api response')
        response = compat_urlparse.parse_qs(api_response)
        if 'error_msg' in response:
-            raise ExtractorError('novamov returned error: %s' % response['error_msg'][0], expected=True)
+            raise ExtractorError('%s returned error: %s' % (self.IE_NAME, response['error_msg'][0]), expected=True)
        video_url = response['url'][0]
@ -60,4 +66,4 @@ class NovamovIE(InfoExtractor):
            'url': video_url,
            'title': title,
            'description': description
-        }
+        }
--- a/youtube_dl/extractor/nowvideo.py
+++ b/youtube_dl/extractor/nowvideo.py
@ -1,46 +1,28 @@
-import re
+from __future__ import unicode_literals
-from .common import InfoExtractor
+from .novamov import NovaMovIE
 from ..utils import compat_urlparse
-class NowVideoIE(InfoExtractor):
+class NowVideoIE(NovaMovIE):
-    _VALID_URL = r'(?:https?://)?(?:www\.)?nowvideo\.(?:ch|sx)/video/(?P<id>\w+)'
+    IE_NAME = 'nowvideo'
    IE_DESC = 'NowVideo'
    _VALID_URL = r'http://(?:(?:www\.)?%(host)s/video/|(?:(?:embed|www)\.)%(host)s/embed\.php\?(?:.*?&)?v=)(?P<videoid>[a-z\d]{13})' % {'host': 'nowvideo\.(?:ch|sx|eu)'}
    _HOST = 'www.nowvideo.ch'
    _FILE_DELETED_REGEX = r'>This file no longer exists on our servers.<'
    _FILEKEY_REGEX = r'var fkzd="([^"]+)";'
    _TITLE_REGEX = r'<h4>([^<]+)</h4>'
    _DESCRIPTION_REGEX = r'</h4>\s*<p>([^<]+)</p>'
    _TEST = {
-        u'url': u'http://www.nowvideo.ch/video/0mw0yow7b6dxa',
+        'url': 'http://www.nowvideo.ch/video/0mw0yow7b6dxa',
-        u'file': u'0mw0yow7b6dxa.flv',
+        'md5': 'f8fbbc8add72bd95b7850c6a02fc8817',
-        u'md5': u'f8fbbc8add72bd95b7850c6a02fc8817',
+        'info_dict': {
-        u'info_dict': {
+            'id': '0mw0yow7b6dxa',
-            u"title": u"youtubedl test video _BaW_jenozKc.mp4"
+            'ext': 'flv',
            'title': 'youtubedl test video _BaW_jenozKc.mp4',
            'description': 'Description',
        }
-    }
+    }
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group('id')
        webpage_url = 'http://www.nowvideo.ch/video/' + video_id
        embed_url = 'http://embed.nowvideo.ch/embed.php?v=' + video_id
        webpage = self._download_webpage(webpage_url, video_id)
        embed_page = self._download_webpage(embed_url, video_id,
            u'Downloading embed page')
        self.report_extraction(video_id)
        video_title = self._html_search_regex(r'<h4>(.*)</h4>',
            webpage, u'video title')
        video_key = self._search_regex(r'var fkzd="(.*)";',
            embed_page, u'video key')
        api_call = "http://www.nowvideo.ch/api/player.api.php?file={0}&numOfErrors=0&cid=1&key={1}".format(video_id, video_key)
        api_response = self._download_webpage(api_call, video_id,
            u'Downloading API page')
        video_url = compat_urlparse.parse_qs(api_response)[u'url'][0]
        return [{
            'id':        video_id,
            'url':       video_url,
            'ext':       'flv',
            'title':     video_title,
        }]
--- a/youtube_dl/extractor/podomatic.py
+++ b/youtube_dl/extractor/podomatic.py
@ -1,7 +1,10 @@
 from __future__ import unicode_literals
 import json
 import re
 from .common import InfoExtractor
 from ..utils import int_or_none
 class PodomaticIE(InfoExtractor):
@ -9,14 +12,14 @@ class PodomaticIE(InfoExtractor):
    _VALID_URL = r'^(?P<proto>https?)://(?P<channel>[^.]+)\.podomatic\.com/entry/(?P<id>[^?]+)'
    _TEST = {
-        u"url": u"http://scienceteachingtips.podomatic.com/entry/2009-01-02T16_03_35-08_00",
+        "url": "http://scienceteachingtips.podomatic.com/entry/2009-01-02T16_03_35-08_00",
-        u"file": u"2009-01-02T16_03_35-08_00.mp3",
+        "file": "2009-01-02T16_03_35-08_00.mp3",
-        u"md5": u"84bb855fcf3429e6bf72460e1eed782d",
+        "md5": "84bb855fcf3429e6bf72460e1eed782d",
-        u"info_dict": {
+        "info_dict": {
-            u"uploader": u"Science Teaching Tips",
+            "uploader": "Science Teaching Tips",
-            u"uploader_id": u"scienceteachingtips",
+            "uploader_id": "scienceteachingtips",
-            u"title": u"64.  When the Moon Hits Your Eye",
+            "title": "64.  When the Moon Hits Your Eye",
-            u"duration": 446,
+            "duration": 446,
        }
    }
@ -36,7 +39,7 @@ class PodomaticIE(InfoExtractor):
        uploader = data['podcast']
        title = data['title']
        thumbnail = data['imageLocation']
-        duration = int(data['length'] / 1000.0)
+        duration = int_or_none(data.get('length'), 1000)
        return {
            'id': video_id,
--- a/youtube_dl/extractor/savefrom.py
+++ b/youtube_dl/extractor/savefrom.py
@ -0,0 +1,37 @@
 # coding: utf-8
 from __future__ import unicode_literals
 import os.path
 import re
 from .common import InfoExtractor
 class SaveFromIE(InfoExtractor):
    IE_NAME = 'savefrom.net'
    _VALID_URL = r'https?://[^.]+\.savefrom\.net/\#url=(?P<url>.*)$'
    _TEST = {
        'url': 'http://en.savefrom.net/#url=http://youtube.com/watch?v=UlVRAPW2WJY&utm_source=youtube.com&utm_medium=short_domains&utm_campaign=ssyoutube.com',
        'info_dict': {
            'id': 'UlVRAPW2WJY',
            'ext': 'mp4',
            'title': 'About Team Radical MMA | MMA Fighting',
            'upload_date': '20120816',
            'uploader': 'Howcast',
            'uploader_id': 'Howcast',
            'description': 'md5:4f0aac94361a12e1ce57d74f85265175',
        },
        'params': {
            'skip_download': True
        }
    }
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        video_id = os.path.splitext(url.split('/')[-1])[0]
        return {
            '_type': 'url',
            'id': video_id,
            'url': mobj.group('url'),
        }
--- a/youtube_dl/extractor/spankwire.py
+++ b/youtube_dl/extractor/spankwire.py
@ -1,6 +1,5 @@
 from __future__ import unicode_literals
 import os
 import re
 from .common import InfoExtractor
@ -8,23 +7,27 @@ from ..utils import (
    compat_urllib_parse_urlparse,
    compat_urllib_request,
    compat_urllib_parse,
    unified_strdate,
    str_to_int,
    int_or_none,
 )
-from ..aes import (
+from ..aes import aes_decrypt_text
    aes_decrypt_text
 )
 class SpankwireIE(InfoExtractor):
-    _VALID_URL = r'^(?:https?://)?(?:www\.)?(?P<url>spankwire\.com/[^/]*/video(?P<videoid>[0-9]+)/?)'
+    _VALID_URL = r'https?://(?:www\.)?(?P<url>spankwire\.com/[^/]*/video(?P<videoid>[0-9]+)/?)'
    _TEST = {
        'url': 'http://www.spankwire.com/Buckcherry-s-X-Rated-Music-Video-Crazy-Bitch/video103545/',
-        'file': '103545.mp4',
+        'md5': '8bbfde12b101204b39e4b9fe7eb67095',
        'md5': '1b3f55e345500552dbc252a3e9c1af43',
        'info_dict': {
-            "uploader": "oreusz",
+            'id': '103545',
-            "title": "Buckcherry`s X Rated Music Video Crazy Bitch",
+            'ext': 'mp4',
-            "description": "Crazy Bitch X rated music video.",
+            'title': 'Buckcherry`s X Rated Music Video Crazy Bitch',
-            "age_limit": 18,
+            'description': 'Crazy Bitch X rated music video.',
            'uploader': 'oreusz',
            'uploader_id': '124697',
            'upload_date': '20070508',
            'age_limit': 18,
        }
    }
@ -37,13 +40,26 @@ class SpankwireIE(InfoExtractor):
        req.add_header('Cookie', 'age_verified=1')
        webpage = self._download_webpage(req, video_id)
-        video_title = self._html_search_regex(r'<h1>([^<]+)', webpage, 'title')
+        title = self._html_search_regex(r'<h1>([^<]+)', webpage, 'title')
        video_uploader = self._html_search_regex(
            r'by:\s*<a [^>]*>(.+?)</a>', webpage, 'uploader', fatal=False)
        thumbnail = self._html_search_regex(
            r'flashvars\.image_url = "([^"]+)', webpage, 'thumbnail', fatal=False)
        description = self._html_search_regex(
            r'<div\s+id="descriptionContent">([^<]+)<', webpage, 'description', fatal=False)
        thumbnail = self._html_search_regex(
            r'flashvars\.image_url = "([^"]+)', webpage, 'thumbnail', fatal=False)
        uploader = self._html_search_regex(
            r'by:\s*<a [^>]*>(.+?)</a>', webpage, 'uploader', fatal=False)
        uploader_id = self._html_search_regex(
            r'by:\s*<a href="/Profile\.aspx\?.*?UserId=(\d+).*?"', webpage, 'uploader id', fatal=False)
        upload_date = self._html_search_regex(r'</a> on (.+?) at \d+:\d+', webpage, 'upload date', fatal=False)
        if upload_date:
            upload_date = unified_strdate(upload_date)
        view_count = self._html_search_regex(
            r'<div id="viewsCounter"><span>([^<]+)</span> views</div>', webpage, 'view count', fatal=False)
        if view_count:
            view_count = str_to_int(view_count)
        comment_count = int_or_none(self._html_search_regex(
            r'<span id="spCommentCount">\s*(\d+)</span> Comments</div>', webpage, 'comment count', fatal=False))
        video_urls = list(map(compat_urllib_parse.unquote , re.findall(r'flashvars\.quality_[0-9]{3}p = "([^"]+)', webpage)))
        if webpage.find('flashvars\.encrypted = "true"') != -1:
@ -53,16 +69,13 @@ class SpankwireIE(InfoExtractor):
        formats = []
        for video_url in video_urls:
            path = compat_urllib_parse_urlparse(video_url).path
            extension = os.path.splitext(path)[1][1:]
            format = path.split('/')[4].split('_')[:2]
            resolution, bitrate_str = format
            format = "-".join(format)
-            height = int(resolution.rstrip('P'))
+            height = int(resolution.rstrip('Pp'))
-            tbr = int(bitrate_str.rstrip('K'))
+            tbr = int(bitrate_str.rstrip('Kk'))
            formats.append({
                'url': video_url,
                'ext': extension,
                'resolution': resolution,
                'format': format,
                'tbr': tbr,
@ -75,10 +88,14 @@ class SpankwireIE(InfoExtractor):
        return {
            'id': video_id,
-            'uploader': video_uploader,
+            'title': title,
            'title': video_title,
            'thumbnail': thumbnail,
            'description': description,
            'thumbnail': thumbnail,
            'uploader': uploader,
            'uploader_id': uploader_id,
            'upload_date': upload_date,
            'view_count': view_count,
            'comment_count': comment_count,
            'formats': formats,
            'age_limit': age_limit,
        }
--- a/youtube_dl/extractor/trutube.py
+++ b/youtube_dl/extractor/trutube.py
@ -0,0 +1,44 @@
 from __future__ import unicode_literals
 import re
 from .common import InfoExtractor
 class TruTubeIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?trutube\.tv/video/(?P<id>[0-9]+)/.*'
    _TEST = {
        'url': 'http://trutube.tv/video/14880/Ramses-II-Proven-To-Be-A-Red-Headed-Caucasoid-',
        'md5': 'c5b6e301b0a2040b074746cbeaa26ca1',
        'info_dict': {
            'id': '14880',
            'ext': 'flv',
            'title': 'Ramses II - Proven To Be A Red Headed Caucasoid',
            'thumbnail': 're:^http:.*\.jpg$',
        }
    }
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group('id')
        webpage = self._download_webpage(url, video_id)
        video_title = self._og_search_title(webpage).strip()
        thumbnail = self._search_regex(
            r"var splash_img = '([^']+)';", webpage, 'thumbnail', fatal=False)
        all_formats = re.finditer(
            r"var (?P<key>[a-z]+)_video_file\s*=\s*'(?P<url>[^']+)';", webpage)
        formats = [{
            'format_id': m.group('key'),
            'quality': -i,
            'url': m.group('url'),
        } for i, m in enumerate(all_formats)]
        self._sort_formats(formats)
        return {
            'id': video_id,
            'title': video_title,
            'formats': formats,
            'thumbnail': thumbnail,
        }
--- a/youtube_dl/extractor/veoh.py
+++ b/youtube_dl/extractor/veoh.py
@ -4,6 +4,7 @@ import re
 import json
 from .common import InfoExtractor
 from ..utils import compat_urllib_request
 class VeohIE(InfoExtractor):
@ -24,6 +25,13 @@ class VeohIE(InfoExtractor):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group('id')
        webpage = self._download_webpage(url, video_id)
        age_limit = 0
        if 'class="adultwarning-container"' in webpage:
            self.report_age_confirmation()
            age_limit = 18
            request = compat_urllib_request.Request(url)
            request.add_header('Cookie', 'confirmedAdult=true')
            webpage = self._download_webpage(request, video_id)
        m_youtube = re.search(r'http://www\.youtube\.com/v/(.*?)(\&|")', webpage)
        if m_youtube is not None:
@ -44,4 +52,5 @@ class VeohIE(InfoExtractor):
            'thumbnail': info.get('highResImage') or info.get('medResImage'),
            'description': info['description'],
            'view_count': info['views'],
            'age_limit': age_limit,
        }
--- a/youtube_dl/extractor/videobam.py
+++ b/youtube_dl/extractor/videobam.py
@ -0,0 +1,80 @@
 from __future__ import unicode_literals
 import re
 import json
 from .common import InfoExtractor
 from ..utils import int_or_none
 class VideoBamIE(InfoExtractor):
    _VALID_URL = r'http://(?:www\.)?videobam\.com/(?:videos/download/)?(?P<id>[a-zA-Z]+)'
    _TESTS = [
        {
            'url': 'http://videobam.com/OiJQM',
            'md5': 'db471f27763a531f10416a0c58b5a1e0',
            'info_dict': {
                'id': 'OiJQM',
                'ext': 'mp4',
                'title': 'Is Alcohol Worse Than Ecstasy?',
                'description': 'md5:d25b96151515c91debc42bfbb3eb2683',
                'uploader': 'frihetsvinge',
            },
        },
        {
            'url': 'http://videobam.com/pqLvq',
            'md5': 'd9a565b5379a99126ef94e1d7f9a383e',
            'note': 'HD video',
            'info_dict': {
                'id': 'pqLvq',
                'ext': 'mp4',
            }
        },
    ]
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group('id')
        page = self._download_webpage('http://videobam.com/%s' % video_id, video_id, 'Downloading page')
        formats = []
        for preference, format_id in enumerate(['low', 'high']):
            mobj = re.search(r"%s: '(?P<url>[^']+)'" % format_id, page)
            if not mobj:
                continue
            formats.append({
                'url': mobj.group('url'),
                'ext': 'mp4',
                'format_id': format_id,
                'preference': preference,
            })
        if not formats:
            player_config = json.loads(self._html_search_regex(r'var player_config = ({.+?});', page, 'player config'))
            formats = [{
                'url': item['url'],
                'ext': 'mp4',
            } for item in player_config['playlist'] if 'autoPlay' in item]
        self._sort_formats(formats)
        title = self._og_search_title(page, default='VideoBam', fatal=False)
        description = self._og_search_description(page, default=None)
        thumbnail = self._og_search_thumbnail(page)
        uploader = self._html_search_regex(r'Upload by ([^<]+)</a>', page, 'uploader', fatal=False, default=None)
        view_count = int_or_none(
            self._html_search_regex(r'<strong>Views:</strong> (\d+) ', page, 'view count', fatal=False))
        return {
            'id': video_id,
            'title': title,
            'description': description,
            'thumbnail': thumbnail,
            'uploader': uploader,
            'view_count': view_count,
            'formats': formats,
            'age_limit': 18,
        }
--- a/youtube_dl/extractor/vine.py
+++ b/youtube_dl/extractor/vine.py
@ -1,8 +1,10 @@
 from __future__ import unicode_literals
 import re
 import json
 from .common import InfoExtractor
 from ..utils import unified_strdate
 class VineIE(InfoExtractor):
@ -13,31 +15,46 @@ class VineIE(InfoExtractor):
        'info_dict': {
            'id': 'b9KOOWX7HUx',
            'ext': 'mp4',
            'uploader': 'Jack Dorsey',
            'title': 'Chicken.',
            'description': 'Chicken.',
            'upload_date': '20130519',
            'uploader': 'Jack Dorsey',
            'uploader_id': '76',
        },
    }
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group('id')
        webpage_url = 'https://vine.co/v/' + video_id
        webpage = self._download_webpage(webpage_url, video_id)
-        self.report_extraction(video_id)
+        webpage = self._download_webpage('https://vine.co/v/' + video_id, video_id)
-        video_url = self._html_search_meta('twitter:player:stream', webpage,
+        data = json.loads(self._html_search_regex(
-            'video URL')
+            r'window\.POST_DATA = { %s: ({.+?}) }' % video_id, webpage, 'vine data'))
-        uploader = self._html_search_regex(r'<p class="username">(.*?)</p>',
+        formats = [
-            webpage, 'uploader', fatal=False, flags=re.DOTALL)
+            {
                'url': data['videoLowURL'],
                'ext': 'mp4',
                'format_id': 'low',
            },
            {
                'url': data['videoUrl'],
                'ext': 'mp4',
                'format_id': 'standard',
            }
        ]
        return {
            'id': video_id,
            'url': video_url,
            'ext': 'mp4',
            'title': self._og_search_title(webpage),
-            'thumbnail': self._og_search_thumbnail(webpage),
+            'description': data['description'],
-            'uploader': uploader,
+            'thumbnail': data['thumbnailUrl'],
-        }
+            'upload_date': unified_strdate(data['created']),
            'uploader': data['username'],
            'uploader_id': data['userIdStr'],
            'like_count': data['likes']['count'],
            'comment_count': data['comments']['count'],
            'repost_count': data['reposts']['count'],
            'formats': formats,
        }
--- a/youtube_dl/extractor/wimp.py
+++ b/youtube_dl/extractor/wimp.py
@ -6,14 +6,15 @@ from .common import InfoExtractor
 class WimpIE(InfoExtractor):
-    _VALID_URL = r'(?:http://)?(?:www\.)?wimp\.com/([^/]+)/'
+    _VALID_URL = r'http://(?:www\.)?wimp\.com/([^/]+)/'
    _TEST = {
-        'url': 'http://www.wimp.com/deerfence/',
+        'url': 'http://www.wimp.com/maruexhausted/',
-        'file': 'deerfence.flv',
+        'md5': 'f1acced123ecb28d9bb79f2479f2b6a1',
        'md5': '8b215e2e0168c6081a1cf84b2846a2b5',
        'info_dict': {
-            "title": "Watch Till End: Herd of deer jump over a fence.",
+            'id': 'maruexhausted',
-            "description": "These deer look as fluid as running water when they jump over this fence as a herd. This video is one that needs to be watched until the very end for the true majesty to be witnessed, but once it comes, it's sure to take your breath away.",
+            'ext': 'flv',
            'title': 'Maru is exhausted.',
            'description': 'md5:57e099e857c0a4ea312542b684a869b8',
        }
    }
@ -30,4 +31,4 @@ class WimpIE(InfoExtractor):
            'title': self._og_search_title(webpage),
            'thumbnail': self._og_search_thumbnail(webpage),
            'description': self._og_search_description(webpage),
-        }
+        }
--- a/youtube_dl/extractor/worldstarhiphop.py
+++ b/youtube_dl/extractor/worldstarhiphop.py
@ -22,8 +22,8 @@ class WorldStarHipHopIE(InfoExtractor):
        webpage_src = self._download_webpage(url, video_id)
        m_vevo_id = re.search(r'videoId=(.*?)&amp?',
-            webpage_src)
+                              webpage_src)
-        
+
        if m_vevo_id is not None:
            self.to_screen(u'Vevo video detected:')
            return self.url_result('vevo:%s' % m_vevo_id.group(1), ie='Vevo')
--- a/youtube_dl/extractor/xhamster.py
+++ b/youtube_dl/extractor/xhamster.py
@ -4,51 +4,51 @@ import re
 from .common import InfoExtractor
 from ..utils import (
    compat_urllib_parse,
    ExtractorError,
    unified_strdate,
    str_to_int,
    int_or_none,
    parse_duration,
 )
 class XHamsterIE(InfoExtractor):
    """Information Extractor for xHamster"""
-    _VALID_URL = r'(?:http://)?(?:www\.)?xhamster\.com/movies/(?P<id>[0-9]+)/(?P<seo>.+?)\.html(?:\?.*)?'
+    _VALID_URL = r'http://(?:www\.)?xhamster\.com/movies/(?P<id>[0-9]+)/(?P<seo>.+?)\.html(?:\?.*)?'
-    _TESTS = [{
+    _TESTS = [
-        'url': 'http://xhamster.com/movies/1509445/femaleagent_shy_beauty_takes_the_bait.html',
+        {
-        'file': '1509445.mp4',
+            'url': 'http://xhamster.com/movies/1509445/femaleagent_shy_beauty_takes_the_bait.html',
-        'md5': '8281348b8d3c53d39fffb377d24eac4e',
+            'md5': '8281348b8d3c53d39fffb377d24eac4e',
-        'info_dict': {
+            'info_dict': {
-            "upload_date": "20121014",
+                'id': '1509445',
-            "uploader_id": "Ruseful2011",
+                'ext': 'mp4',
-            "title": "FemaleAgent Shy beauty takes the bait",
+                'title': 'FemaleAgent Shy beauty takes the bait',
-            "age_limit": 18,
+                'upload_date': '20121014',
                'uploader_id': 'Ruseful2011',
                'duration': 893,
                'age_limit': 18,
            }
        },
        {
            'url': 'http://xhamster.com/movies/2221348/britney_spears_sexy_booty.html?hd',
            'md5': '4cbd8d56708ecb4fb4124c23e4acb81a',
            'info_dict': {
                'id': '2221348',
                'ext': 'mp4',
                'title': 'Britney Spears  Sexy Booty',
                'upload_date': '20130914',
                'uploader_id': 'jojo747400',
                'duration': 200,
                'age_limit': 18,
            }
        }
-    },
+    ]
    {
        'url': 'http://xhamster.com/movies/2221348/britney_spears_sexy_booty.html?hd',
        'file': '2221348.flv',
        'md5': 'e767b9475de189320f691f49c679c4c7',
        'info_dict': {
            "upload_date": "20130914",
            "uploader_id": "jojo747400",
            "title": "Britney Spears  Sexy Booty",
            "age_limit": 18,
        }
    }]
    def _real_extract(self,url):
        def extract_video_url(webpage):
-            mobj = re.search(r'\'srv\': \'(?P<server>[^\']*)\',\s*\'file\': \'(?P<file>[^\']+)\',', webpage)
+            mp4 = re.search(r'<video\s+.*?file="([^"]+)".*?>', webpage)
            if mobj is None:
                raise ExtractorError('Unable to extract media URL')
            if len(mobj.group('server')) == 0:
                return compat_urllib_parse.unquote(mobj.group('file'))
            else:
                return mobj.group('server')+'/key='+mobj.group('file')
        def extract_mp4_video_url(webpage):
            mp4 = re.search(r'<a href=\"(.+?)\" class=\"mp4Play\"',webpage)
            if mp4 is None:
-                return None
+                raise ExtractorError('Unable to extract media URL')
            else:
                return mp4.group(1)
@ -62,50 +62,48 @@ class XHamsterIE(InfoExtractor):
        mrss_url = 'http://xhamster.com/movies/%s/%s.html' % (video_id, seo)
        webpage = self._download_webpage(mrss_url, video_id)
-        video_title = self._html_search_regex(
+        title = self._html_search_regex(r'<title>(?P<title>.+?) - xHamster\.com</title>', webpage, 'title')
            r'<title>(?P<title>.+?) - xHamster\.com</title>', webpage, 'title')
        # Only a few videos have an description
        mobj = re.search(r'<span>Description: </span>([^<]+)', webpage)
-        video_description = mobj.group(1) if mobj else None
+        description = mobj.group(1) if mobj else None
-        mobj = re.search(r'hint=\'(?P<upload_date_Y>[0-9]{4})-(?P<upload_date_m>[0-9]{2})-(?P<upload_date_d>[0-9]{2}) [0-9]{2}:[0-9]{2}:[0-9]{2} [A-Z]{3,4}\'', webpage)
+        upload_date = self._html_search_regex(r'hint=\'(\d{4}-\d{2}-\d{2}) \d{2}:\d{2}:\d{2} [A-Z]{3,4}\'',
-        if mobj:
+            webpage, 'upload date', fatal=False)
-            video_upload_date = mobj.group('upload_date_Y')+mobj.group('upload_date_m')+mobj.group('upload_date_d')
+        if upload_date:
-        else:
+            upload_date = unified_strdate(upload_date)
            video_upload_date = None
            self._downloader.report_warning('Unable to extract upload date')
-        video_uploader_id = self._html_search_regex(
+        uploader_id = self._html_search_regex(r'<a href=\'/user/[^>]+>(?P<uploader_id>[^<]+)',
            r'<a href=\'/user/[^>]+>(?P<uploader_id>[^<]+)',
            webpage, 'uploader id', default='anonymous')
-        video_thumbnail = self._search_regex(
+        thumbnail = self._html_search_regex(r'<video\s+.*?poster="([^"]+)".*?>', webpage, 'thumbnail', fatal=False)
-            r'\'image\':\'(?P<thumbnail>[^\']+)\'',
+
-            webpage, 'thumbnail', fatal=False)
+        duration = parse_duration(self._html_search_regex(r'<span>Runtime:</span> (\d+:\d+)</div>',
            webpage, 'duration', fatal=False))
        view_count = self._html_search_regex(r'<span>Views:</span> ([^<]+)</div>', webpage, 'view count', fatal=False)
        if view_count:
            view_count = str_to_int(view_count)
        mobj = re.search(r"hint='(?P<likecount>\d+) Likes / (?P<dislikecount>\d+) Dislikes'", webpage)
        (like_count, dislike_count) = (mobj.group('likecount'), mobj.group('dislikecount')) if mobj else (None, None)
        mobj = re.search(r'</label>Comments \((?P<commentcount>\d+)\)</div>', webpage)
        comment_count = mobj.group('commentcount') if mobj else 0
        age_limit = self._rta_search(webpage)
        hd = is_hd(webpage)
        video_url = extract_video_url(webpage)
        formats = [{
            'url': video_url,
            'format_id': 'hd' if hd else 'sd',
-            'preference': 0,
+            'preference': 1,
        }]
        video_mp4_url = extract_mp4_video_url(webpage)
        if video_mp4_url is not None:
            formats.append({
                'url': video_mp4_url,
                'ext': 'mp4',
                'format_id': 'mp4-hd' if hd else 'mp4-sd',
                'preference': 1,
            })
        if not hd:
-            webpage = self._download_webpage(
+            webpage = self._download_webpage(mrss_url + '?hd', video_id, note='Downloading HD webpage')
                mrss_url + '?hd', video_id, note='Downloading HD webpage')
            if is_hd(webpage):
                video_url = extract_video_url(webpage)
                formats.append({
@ -118,11 +116,16 @@ class XHamsterIE(InfoExtractor):
        return {
            'id': video_id,
-            'title': video_title,
+            'title': title,
-            'formats': formats,
+            'description': description,
-            'description': video_description,
+            'upload_date': upload_date,
-            'upload_date': video_upload_date,
+            'uploader_id': uploader_id,
-            'uploader_id': video_uploader_id,
+            'thumbnail': thumbnail,
-            'thumbnail': video_thumbnail,
+            'duration': duration,
            'view_count': view_count,
            'like_count': int_or_none(like_count),
            'dislike_count': int_or_none(dislike_count),
            'comment_count': int_or_none(comment_count),
            'age_limit': age_limit,
            'formats': formats,
        }
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@ -29,7 +29,6 @@ from ..utils import (
    ExtractorError,
    int_or_none,
    PagedList,
    RegexNotFoundError,
    unescapeHTML,
    unified_strdate,
    orderedSet,
@ -297,6 +296,23 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
                u"format": "141",
            },
        },
        # DASH manifest with encrypted signature
        {
            u'url': u'https://www.youtube.com/watch?v=IB3lcPjvWLA',
            u'info_dict': {
                u'id': u'IB3lcPjvWLA',
                u'ext': u'm4a',
                u'title': u'Afrojack - The Spark ft. Spree Wilson',
                u'description': u'md5:3199ed45ee8836572865580804d7ac0f',
                u'uploader': u'AfrojackVEVO',
                u'uploader_id': u'AfrojackVEVO',
                u'upload_date': u'20131011',
            },
            u"params": {
                u'youtube_include_dash_manifest': True,
                u'format': '141',
            },
        },
    ]
@ -1272,8 +1288,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
            mobj = re.search(r';ytplayer.config = ({.*?});', video_webpage)
            if not mobj:
                raise ValueError('Could not find vevo ID')
-            info = json.loads(mobj.group(1))
+            ytplayer_config = json.loads(mobj.group(1))
-            args = info['args']
+            args = ytplayer_config['args']
            # Easy way to know if the 's' value is in url_encoded_fmt_stream_map
            # this signatures are encrypted
            if 'url_encoded_fmt_stream_map' not in args:
@ -1366,12 +1382,24 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
            raise ExtractorError(u'no conn, hlsvp or url_encoded_fmt_stream_map information found in video info')
        # Look for the DASH manifest
-        dash_manifest_url_lst = video_info.get('dashmpd')
+        if (self._downloader.params.get('youtube_include_dash_manifest', False)):
        if (dash_manifest_url_lst and dash_manifest_url_lst[0] and
                self._downloader.params.get('youtube_include_dash_manifest', False)):
            try:
                # The DASH manifest used needs to be the one from the original video_webpage.
                # The one found in get_video_info seems to be using different signatures.
                # However, in the case of an age restriction there won't be any embedded dashmpd in the video_webpage.
                # Luckily, it seems, this case uses some kind of default signature (len == 86), so the
                # combination of get_video_info and the _static_decrypt_signature() decryption fallback will work here.
                if age_gate:
                    dash_manifest_url = video_info.get('dashmpd')[0]
                else:
                    dash_manifest_url = ytplayer_config['args']['dashmpd']
                def decrypt_sig(mobj):
                    s = mobj.group(1)
                    dec_s = self._decrypt_signature(s, video_id, player_url, age_gate)
                    return '/signature/%s' % dec_s
                dash_manifest_url = re.sub(r'/s/([\w\.]+)', decrypt_sig, dash_manifest_url)
                dash_doc = self._download_xml(
-                    dash_manifest_url_lst[0], video_id,
+                    dash_manifest_url, video_id,
                    note=u'Downloading DASH manifest',
                    errnote=u'Could not download DASH manifest')
                for r in dash_doc.findall(u'.//{urn:mpeg:DASH:schema:MPD:2011}Representation'):
@ -1443,9 +1471,9 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
                     |
                        ((?:PL|EC|UU|FL|RD)[0-9A-Za-z-_]{10,})
                     )"""
-    _TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s&page=%s'
+    _TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s'
    _MORE_PAGES_INDICATOR = r'data-link-type="next"'
-    _VIDEO_RE = r'href="/watch\?v=(?P<id>[0-9A-Za-z_-]{11})&amp;[^"]*?index=(?P<index>\d+)'
+    _VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})&amp;[^"]*?index=(?P<index>\d+)'
    IE_NAME = u'youtube:playlist'
    def _real_initialize(self):
@ -1460,11 +1488,15 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
        # the id of the playlist is just 'RD' + video_id
        url = 'https://youtube.com/watch?v=%s&list=%s' % (playlist_id[-11:], playlist_id)
        webpage = self._download_webpage(url, playlist_id, u'Downloading Youtube mix')
-        title_span = (get_element_by_attribute('class', 'title long-title', webpage) or
+        search_title = lambda class_name: get_element_by_attribute('class', class_name, webpage)
-            get_element_by_attribute('class', 'title ', webpage))
+        title_span = (search_title('playlist-title') or
            search_title('title long-title') or search_title('title'))
        title = clean_html(title_span)
-        video_re = r'data-index="\d+".*?href="/watch\?v=([0-9A-Za-z_-]{11})&amp;[^"]*?list=%s' % re.escape(playlist_id)
+        video_re = r'''(?x)data-video-username="(.*?)".*?
-        ids = orderedSet(re.findall(video_re, webpage))
+                       href="/watch\?v=([0-9A-Za-z_-]{11})&amp;[^"]*?list=%s''' % re.escape(playlist_id)
        matches = orderedSet(re.findall(video_re, webpage, flags=re.DOTALL))
        # Some of the videos may have been deleted, their username field is empty
        ids = [video_id for (username, video_id) in matches if username]
        url_results = self._ids_to_results(ids)
        return self.playlist_result(url_results, playlist_id, title)
@ -1493,29 +1525,31 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
            raise ExtractorError(u'For downloading YouTube.com top lists, use '
                u'the "yttoplist" keyword, for example "youtube-dl \'yttoplist:music:Top Tracks\'"', expected=True)
        url = self._TEMPLATE_URL % playlist_id
        page = self._download_webpage(url, playlist_id)
        more_widget_html = content_html = page
        # Extract the video ids from the playlist pages
        ids = []
        for page_num in itertools.count(1):
-            url = self._TEMPLATE_URL % (playlist_id, page_num)
+            matches = re.finditer(self._VIDEO_RE, content_html)
            page = self._download_webpage(url, playlist_id, u'Downloading page #%s' % page_num)
            matches = re.finditer(self._VIDEO_RE, page)
            # We remove the duplicates and the link with index 0
            # (it's not the first video of the playlist)
            new_ids = orderedSet(m.group('id') for m in matches if m.group('index') != '0')
            ids.extend(new_ids)
-            if re.search(self._MORE_PAGES_INDICATOR, page) is None:
+            mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
            if not mobj:
                break
-        try:
+            more = self._download_json(
-            playlist_title = self._og_search_title(page)
+                'https://youtube.com/%s' % mobj.group('more'), playlist_id, 'Downloading page #%s' % page_num)
-        except RegexNotFoundError:
+            content_html = more['content_html']
-            self.report_warning(
+            more_widget_html = more['load_more_widget_html']
-                u'Playlist page is missing OpenGraph title, falling back ...',
+
-                playlist_id)
+        playlist_title = self._html_search_regex(
-            playlist_title = self._html_search_regex(
+                r'<h1 class="pl-header-title">\s*(.*?)\s*</h1>', page, u'title')
                r'<h1 class="pl-header-title">(.*?)</h1>', page, u'title')
        url_results = self._ids_to_results(ids)
        return self.playlist_result(url_results, playlist_id, playlist_title)
--- a/youtube_dl/extractor/zdf.py
+++ b/youtube_dl/extractor/zdf.py
@ -1,4 +1,5 @@
 # coding: utf-8
 from __future__ import unicode_literals
 import re
@ -13,52 +14,42 @@ class ZDFIE(InfoExtractor):
    _VALID_URL = r'^https?://www\.zdf\.de/ZDFmediathek(?P<hash>#)?/(.*beitrag/(?:video/)?)(?P<video_id>[0-9]+)(?:/[^/?]+)?(?:\?.*)?'
    _TEST = {
-        u"url": u"http://www.zdf.de/ZDFmediathek/beitrag/video/2037704/ZDFspezial---Ende-des-Machtpokers--?bc=sts;stt",
+        'url': 'http://www.zdf.de/ZDFmediathek/beitrag/video/2037704/ZDFspezial---Ende-des-Machtpokers--?bc=sts;stt',
-        u"file": u"2037704.webm",
+        'info_dict': {
-        u"info_dict": {
+            'id': '2037704',
-            u"upload_date": u"20131127",
+            'ext': 'webm',
-            u"description": u"Union und SPD haben sich auf einen Koalitionsvertrag geeinigt. Aber was bedeutet das für die Bürger? Sehen Sie hierzu das ZDFspezial \"Ende des Machtpokers - Große Koalition für Deutschland\".",
+            'title': 'ZDFspezial - Ende des Machtpokers',
-            u"uploader": u"spezial",
+            'description': 'Union und SPD haben sich auf einen Koalitionsvertrag geeinigt. Aber was bedeutet das für die Bürger? Sehen Sie hierzu das ZDFspezial "Ende des Machtpokers - Große Koalition für Deutschland".',
-            u"title": u"ZDFspezial - Ende des Machtpokers"
+            'duration': 1022,
            'uploader': 'spezial',
            'uploader_id': '225948',
            'upload_date': '20131127',
        },
-        u"skip": u"Videos on ZDF.de are depublicised in short order",
+        'skip': 'Videos on ZDF.de are depublicised in short order',
    }
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group('video_id')
-        xml_url = u'http://www.zdf.de/ZDFmediathek/xmlservice/web/beitragsDetails?ak=web&id=%s' % video_id
+        xml_url = 'http://www.zdf.de/ZDFmediathek/xmlservice/web/beitragsDetails?ak=web&id=%s' % video_id
        doc = self._download_xml(
            xml_url, video_id,
-            note=u'Downloading video info',
+            note='Downloading video info',
-            errnote=u'Failed to download video info')
+            errnote='Failed to download video info')
        title = doc.find('.//information/title').text
        description = doc.find('.//information/detail').text
        duration = int(doc.find('.//details/lengthSec').text)
        uploader_node = doc.find('.//details/originChannelTitle')
        uploader = None if uploader_node is None else uploader_node.text
-        duration_str = doc.find('.//details/length').text
+        uploader_id_node = doc.find('.//details/originChannelId')
-        duration_m = re.match(r'''(?x)^
+        uploader_id = None if uploader_id_node is None else uploader_id_node.text
            (?P<hours>[0-9]{2})
            :(?P<minutes>[0-9]{2})
            :(?P<seconds>[0-9]{2})
            (?:\.(?P<ms>[0-9]+)?)
            ''', duration_str)
        duration = (
            (
                (int(duration_m.group('hours')) * 60 * 60) +
                (int(duration_m.group('minutes')) * 60) +
                int(duration_m.group('seconds'))
            )
            if duration_m
            else None
        )
        upload_date = unified_strdate(doc.find('.//details/airtime').text)
        def xml_to_format(fnode):
            video_url = fnode.find('url').text
-            is_available = u'http://www.metafilegenerator' not in video_url
+            is_available = 'http://www.metafilegenerator' not in video_url
            format_id = fnode.attrib['basetype']
            format_m = re.match(r'''(?x)
@ -71,22 +62,28 @@ class ZDFIE(InfoExtractor):
            quality = fnode.find('./quality').text
            abr = int(fnode.find('./audioBitrate').text) // 1000
-            vbr = int(fnode.find('./videoBitrate').text) // 1000
+            vbr_node = fnode.find('./videoBitrate')
            vbr = None if vbr_node is None else int(vbr_node.text) // 1000
-            format_note = u''
+            width_node = fnode.find('./width')
            width = None if width_node is None else int_or_none(width_node.text)
            height_node = fnode.find('./height')
            height = None if height_node is None else int_or_none(height_node.text)
            format_note = ''
            if not format_note:
                format_note = None
            return {
-                'format_id': format_id + u'-' + quality,
+                'format_id': format_id + '-' + quality,
                'url': video_url,
                'ext': ext,
                'acodec': format_m.group('acodec'),
                'vcodec': format_m.group('vcodec'),
                'abr': abr,
                'vbr': vbr,
-                'width': int_or_none(fnode.find('./width').text),
+                'width': width,
-                'height': int_or_none(fnode.find('./height').text),
+                'height': height,
                'filesize': int_or_none(fnode.find('./filesize').text),
                'format_note': format_note,
                'protocol': proto,
@ -103,9 +100,10 @@ class ZDFIE(InfoExtractor):
        return {
            'id': video_id,
            'title': title,
            'formats': formats,
            'description': description,
            'uploader': uploader,
            'duration': duration,
            'uploader': uploader,
            'uploader_id': uploader_id,
            'upload_date': upload_date,
-        }
+            'formats': formats,
        }
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@ -1,6 +1,7 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 import contextlib
 import ctypes
 import datetime
 import email.utils
@ -174,6 +175,11 @@ try:
 except NameError:
    compat_chr = chr
 try:
    from xml.etree.ElementTree import ParseError as compat_xml_parse_error
 except ImportError:  # Python 2.6
    from xml.parsers.expat import ExpatError as compat_xml_parse_error
 def compat_ord(c):
    if type(c) is int: return c
    else: return ord(c)
@ -774,6 +780,7 @@ def unified_strdate(date_str):
        '%Y-%m-%dT%H:%M:%S.%fZ',
        '%Y-%m-%dT%H:%M:%S.%f0Z',
        '%Y-%m-%dT%H:%M:%S',
        '%Y-%m-%dT%H:%M:%S.%f',
        '%Y-%m-%dT%H:%M',
    ]
    for expression in format_expressions:
@ -1239,3 +1246,19 @@ except TypeError:
 else:
    struct_pack = struct.pack
    struct_unpack = struct.unpack
 def read_batch_urls(batch_fd):
    def fixup(url):
        if not isinstance(url, compat_str):
            url = url.decode('utf-8', 'replace')
        BOM_UTF8 = u'\xef\xbb\xbf'
        if url.startswith(BOM_UTF8):
            url = url[len(BOM_UTF8):]
        url = url.strip()
        if url.startswith(('#', ';', ']')):
            return False
        return url
    with contextlib.closing(batch_fd) as fd:
        return [url for url in map(fixup, fd) if url]
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@ -1,2 +1,2 @@
-__version__ = '2014.02.19.1'
+__version__ = '2014.02.25'
`@ -1,2 +1,2 @@`

	`__version__ = '2014.02.19.1'`	`__version__ = '2014.02.25'`