Merge branch 'master' of https://github.com/rg3/youtube-dl

2016-05-20 08:08:25 +02:00 · 2016-05-20 08:08:25 +02:00 · d12939e16b
commit d12939e16b
parent a07fcf5328 52f7c75cff
12 changed files with 326 additions and 116 deletions
--- a/2
+++ b/2
@ -1,7 +1,7 @@
 all: youtube-dl README.md CONTRIBUTING.md README.txt youtube-dl.1 youtube-dl.bash-completion youtube-dl.zsh youtube-dl.fish supportedsites
 clean:
-	rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish youtube_dl/extractor/lazy_extractors.py *.dump *.part *.info.json *.mp4 *.flv *.mp3 *.avi *.mkv *.webm CONTRIBUTING.md.tmp ISSUE_TEMPLATE.md.tmp youtube-dl youtube-dl.exe
+	rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish youtube_dl/extractor/lazy_extractors.py *.dump *.part *.info.json *.mp4 *.flv *.mp3 *.avi *.mkv *.webm *.jpg *.png CONTRIBUTING.md.tmp ISSUE_TEMPLATE.md.tmp youtube-dl youtube-dl.exe
 	find . -name "*.pyc" -delete
 	find . -name "*.class" -delete
--- a/test/test_utils.py
+++ b/test/test_utils.py
@ -50,6 +50,8 @@ from youtube_dl.utils import (
    sanitize_path,
    prepend_extension,
    replace_extension,
    remove_start,
    remove_end,
    remove_quotes,
    shell_quote,
    smuggle_url,
@ -215,6 +217,16 @@ class TestUtil(unittest.TestCase):
        self.assertEqual(replace_extension('.abc', 'temp'), '.abc.temp')
        self.assertEqual(replace_extension('.abc.ext', 'temp'), '.abc.temp')
    def test_remove_start(self):
        self.assertEqual(remove_start(None, 'A - '), None)
        self.assertEqual(remove_start('A - B', 'A - '), 'B')
        self.assertEqual(remove_start('B - A', 'A - '), 'B - A')
    def test_remove_end(self):
        self.assertEqual(remove_end(None, ' - B'), None)
        self.assertEqual(remove_end('A - B', ' - B'), 'A')
        self.assertEqual(remove_end('B - A', ' - B'), 'B - A')
    def test_remove_quotes(self):
        self.assertEqual(remove_quotes(None), None)
        self.assertEqual(remove_quotes('"'), '"')
--- a/youtube_dl/extractor/abcnews.py
+++ b/youtube_dl/extractor/abcnews.py
@ -0,0 +1,135 @@
 # coding: utf-8
 from __future__ import unicode_literals
 import calendar
 import re
 import time
 from .amp import AMPIE
 from .common import InfoExtractor
 from ..compat import compat_urlparse
 class AbcNewsVideoIE(AMPIE):
    IE_NAME = 'abcnews:video'
    _VALID_URL = 'http://abcnews.go.com/[^/]+/video/(?P<display_id>[0-9a-z-]+)-(?P<id>\d+)'
    _TESTS = [{
        'url': 'http://abcnews.go.com/ThisWeek/video/week-exclusive-irans-foreign-minister-zarif-20411932',
        'info_dict': {
            'id': '20411932',
            'ext': 'mp4',
            'display_id': 'week-exclusive-irans-foreign-minister-zarif',
            'title': '\'This Week\' Exclusive: Iran\'s Foreign Minister Zarif',
            'description': 'George Stephanopoulos goes one-on-one with Iranian Foreign Minister Dr. Javad Zarif.',
            'duration': 180,
            'thumbnail': 're:^https?://.*\.jpg$',
        },
        'params': {
            # m3u8 download
            'skip_download': True,
        },
    }, {
        'url': 'http://abcnews.go.com/2020/video/2020-husband-stands-teacher-jail-student-affairs-26119478',
        'only_matching': True,
    }]
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        display_id = mobj.group('display_id')
        video_id = mobj.group('id')
        info_dict = self._extract_feed_info(
            'http://abcnews.go.com/video/itemfeed?id=%s' % video_id)
        info_dict.update({
            'id': video_id,
            'display_id': display_id,
        })
        return info_dict
 class AbcNewsIE(InfoExtractor):
    IE_NAME = 'abcnews'
    _VALID_URL = 'https?://abcnews\.go\.com/(?:[^/]+/)+(?P<display_id>[0-9a-z-]+)/story\?id=(?P<id>\d+)'
    _TESTS = [{
        'url': 'http://abcnews.go.com/Blotter/News/dramatic-video-rare-death-job-america/story?id=10498713#.UIhwosWHLjY',
        'info_dict': {
            'id': '10498713',
            'ext': 'flv',
            'display_id': 'dramatic-video-rare-death-job-america',
            'title': 'Occupational Hazards',
            'description': 'Nightline investigates the dangers that lurk at various jobs.',
            'thumbnail': 're:^https?://.*\.jpg$',
            'upload_date': '20100428',
            'timestamp': 1272412800,
        },
        'add_ie': ['AbcNewsVideo'],
    }, {
        'url': 'http://abcnews.go.com/Entertainment/justin-timberlake-performs-stop-feeling-eurovision-2016/story?id=39125818',
        'info_dict': {
            'id': '39125818',
            'ext': 'mp4',
            'display_id': 'justin-timberlake-performs-stop-feeling-eurovision-2016',
            'title': 'Justin Timberlake Drops Hints For Secret Single',
            'description': 'Lara Spencer reports the buzziest stories of the day in "GMA" Pop News.',
            'upload_date': '20160515',
            'timestamp': 1463329500,
        },
        'params': {
            # m3u8 download
            'skip_download': True,
            # The embedded YouTube video is blocked due to copyright issues
            'playlist_items': '1',
        },
        'add_ie': ['AbcNewsVideo'],
    }, {
        'url': 'http://abcnews.go.com/Technology/exclusive-apple-ceo-tim-cook-iphone-cracking-software/story?id=37173343',
        'only_matching': True,
    }]
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        display_id = mobj.group('display_id')
        video_id = mobj.group('id')
        webpage = self._download_webpage(url, video_id)
        video_url = self._search_regex(
            r'window\.abcnvideo\.url\s*=\s*"([^"]+)"', webpage, 'video URL')
        full_video_url = compat_urlparse.urljoin(url, video_url)
        youtube_url = self._html_search_regex(
            r'<iframe[^>]+src="(https://www\.youtube\.com/embed/[^"]+)"',
            webpage, 'YouTube URL', default=None)
        timestamp = None
        date_str = self._html_search_regex(
            r'<span[^>]+class="timestamp">([^<]+)</span>',
            webpage, 'timestamp', fatal=False)
        if date_str:
            tz_offset = 0
            if date_str.endswith(' ET'):  # Eastern Time
                tz_offset = -5
                date_str = date_str[:-3]
            date_formats = ['%b. %d, %Y', '%b %d, %Y, %I:%M %p']
            for date_format in date_formats:
                try:
                    timestamp = calendar.timegm(time.strptime(date_str.strip(), date_format))
                except ValueError:
                    continue
            if timestamp is not None:
                timestamp -= tz_offset * 3600
        entry = {
            '_type': 'url_transparent',
            'ie_key': AbcNewsVideoIE.ie_key(),
            'url': full_video_url,
            'id': video_id,
            'display_id': display_id,
            'timestamp': timestamp,
        }
        if youtube_url:
            entries = [entry, self.url_result(youtube_url, 'Youtube')]
            return self.playlist_result(entries)
        return entry
--- a/youtube_dl/extractor/amp.py
+++ b/youtube_dl/extractor/amp.py
@ -52,7 +52,7 @@ class AMPIE(InfoExtractor):
        for media_data in media_content:
            media = media_data['@attributes']
            media_type = media['type']
-            if media_type == 'video/f4m':
+            if media_type in ('video/f4m', 'application/f4m+xml'):
                formats.extend(self._extract_f4m_formats(
                    media['url'] + '?hdcore=3.4.0&plugin=aasp-3.4.0.132.124',
                    video_id, f4m_id='hds', fatal=False))
@ -61,7 +61,7 @@ class AMPIE(InfoExtractor):
                    media['url'], video_id, 'mp4', m3u8_id='hls', fatal=False))
            else:
                formats.append({
-                    'format_id': media_data['media-category']['@attributes']['label'],
+                    'format_id': media_data.get('media-category', {}).get('@attributes', {}).get('label'),
                    'url': media['url'],
                    'tbr': int_or_none(media.get('bitrate')),
                    'filesize': int_or_none(media.get('fileSize')),
--- a/youtube_dl/extractor/cbc.py
+++ b/youtube_dl/extractor/cbc.py
@ -4,7 +4,10 @@ from __future__ import unicode_literals
 import re
 from .common import InfoExtractor
-from ..utils import js_to_json
+from ..utils import (
    js_to_json,
    smuggle_url,
 )
 class CBCIE(InfoExtractor):
@ -12,57 +15,54 @@ class CBCIE(InfoExtractor):
    _TESTS = [{
        # with mediaId
        'url': 'http://www.cbc.ca/22minutes/videos/clips-season-23/don-cherry-play-offs',
        'md5': '97e24d09672fc4cf56256d6faa6c25bc',
        'info_dict': {
            'id': '2682904050',
-            'ext': 'flv',
+            'ext': 'mp4',
            'title': 'Don Cherry – All-Stars',
            'description': 'Don Cherry has a bee in his bonnet about AHL player John Scott because that guy’s got heart.',
-            'timestamp': 1454475540,
+            'timestamp': 1454463000,
            'upload_date': '20160203',
-        },
+            'uploader': 'CBCC-NEW',
        'params': {
            # rtmp download
            'skip_download': True,
        },
    }, {
        # with clipId
        'url': 'http://www.cbc.ca/archives/entry/1978-robin-williams-freestyles-on-90-minutes-live',
        'info_dict': {
            'id': '2487345465',
-            'ext': 'flv',
+            'ext': 'mp4',
            'title': 'Robin Williams freestyles on 90 Minutes Live',
            'description': 'Wacky American comedian Robin Williams shows off his infamous "freestyle" comedic talents while being interviewed on CBC\'s 90 Minutes Live.',
-            'upload_date': '19700101',
+            'upload_date': '19780210',
            'uploader': 'CBCC-NEW',
-        },
+            'timestamp': 255977160,
        'params': {
            # rtmp download
            'skip_download': True,
        },
    }, {
        # multiple iframes
        'url': 'http://www.cbc.ca/natureofthings/blog/birds-eye-view-from-vancouvers-burrard-street-bridge-how-we-got-the-shot',
        'playlist': [{
            'md5': '377572d0b49c4ce0c9ad77470e0b96b4',
            'info_dict': {
                'id': '2680832926',
-                'ext': 'flv',
+                'ext': 'mp4',
                'title': 'An Eagle\'s-Eye View Off Burrard Bridge',
                'description': 'Hercules the eagle flies from Vancouver\'s Burrard Bridge down to a nearby park with a mini-camera strapped to his back.',
-                'upload_date': '19700101',
+                'upload_date': '20160201',
                'timestamp': 1454342820,
                'uploader': 'CBCC-NEW',
            },
        }, {
            'md5': '415a0e3f586113894174dfb31aa5bb1a',
            'info_dict': {
                'id': '2658915080',
-                'ext': 'flv',
+                'ext': 'mp4',
                'title': 'Fly like an eagle!',
                'description': 'Eagle equipped with a mini camera flies from the world\'s tallest tower',
-                'upload_date': '19700101',
+                'upload_date': '20150315',
                'timestamp': 1426443984,
                'uploader': 'CBCC-NEW',
            },
        }],
        'params': {
            # rtmp download
            'skip_download': True,
        },
    }]
    @classmethod
@ -95,20 +95,23 @@ class CBCPlayerIE(InfoExtractor):
        'url': 'http://www.cbc.ca/player/play/2683190193',
        'info_dict': {
            'id': '2683190193',
-            'ext': 'flv',
+            'ext': 'mp4',
            'title': 'Gerry Runs a Sweat Shop',
            'description': 'md5:b457e1c01e8ff408d9d801c1c2cd29b0',
-            'timestamp': 1455067800,
+            'timestamp': 1455071400,
            'upload_date': '20160210',
-        },
+            'uploader': 'CBCC-NEW',
        'params': {
            # rtmp download
            'skip_download': True,
        },
    }
    def _real_extract(self, url):
        video_id = self._match_id(url)
-        return self.url_result(
+        return {
-            'http://feed.theplatform.com/f/ExhSPC/vms_5akSXx4Ng_Zn?byGuid=%s' % video_id,
+            '_type': 'url_transparent',
-            'ThePlatformFeed', video_id)
+            'ie_key': 'ThePlatform',
            'url': smuggle_url(
                'http://link.theplatform.com/s/ExhSPC/media/guid/2655402169/%s?mbr=true' % video_id, {
                    'force_smil_url': True
                }),
            'id': video_id,
        }
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@ -3,6 +3,10 @@ from __future__ import unicode_literals
 from .abc import ABCIE
 from .abc7news import Abc7NewsIE
 from .abcnews import (
    AbcNewsIE,
    AbcNewsVideoIE,
 )
 from .academicearth import AcademicEarthCourseIE
 from .acast import (
    ACastIE,
@ -238,6 +242,7 @@ from .fktv import FKTVIE
 from .flickr import FlickrIE
 from .folketinget import FolketingetIE
 from .footyroom import FootyRoomIE
 from .formula1 import Formula1IE
 from .fourtube import FourTubeIE
 from .fox import FOXIE
 from .foxgay import FoxgayIE
@ -365,6 +370,7 @@ from .kuwo import (
 )
 from .la7 import LA7IE
 from .laola1tv import Laola1TvIE
 from .learnr import LearnrIE
 from .lecture2go import Lecture2GoIE
 from .lemonde import LemondeIE
 from .leeco import (
--- a/youtube_dl/extractor/formula1.py
+++ b/youtube_dl/extractor/formula1.py
@ -0,0 +1,25 @@
 # coding: utf-8
 from __future__ import unicode_literals
 from .common import InfoExtractor
 class Formula1IE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?formula1\.com/content/fom-website/en/video/\d{4}/\d{1,2}/(?P<id>.+?)\.html'
    _TEST = {
        'url': 'http://www.formula1.com/content/fom-website/en/video/2016/5/Race_highlights_-_Spain_2016.html',
        'md5': '8c79e54be72078b26b89e0e111c0502b',
        'info_dict': {
            'id': 'JvYXJpMzE6pArfHWm5ARp5AiUmD-gibV',
            'ext': 'flv',
            'title': 'Race highlights - Spain 2016',
        }
    }
    def _real_extract(self, url):
        display_id = self._match_id(url)
        webpage = self._download_webpage(url, display_id)
        ooyala_embed_code = self._search_regex(
            r'data-videoid="([^"]+)"', webpage, 'ooyala embed code')
        return self.url_result(
            'ooyala:%s' % ooyala_embed_code, 'Ooyala', ooyala_embed_code)
--- a/youtube_dl/extractor/learnr.py
+++ b/youtube_dl/extractor/learnr.py
@ -0,0 +1,33 @@
 # coding: utf-8
 from __future__ import unicode_literals
 from .common import InfoExtractor
 class LearnrIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?learnr\.pro/view/video/(?P<id>[0-9]+)'
    _TEST = {
        'url': 'http://www.learnr.pro/view/video/51624-web-development-tutorial-for-beginners-1-how-to-build-webpages-with-html-css-javascript',
        'md5': '3719fdf0a68397f49899e82c308a89de',
        'info_dict': {
            'id': '51624',
            'ext': 'mp4',
            'title': 'Web Development Tutorial for Beginners (#1) - How to build webpages with HTML, CSS, Javascript',
            'description': 'md5:b36dbfa92350176cdf12b4d388485503',
            'uploader': 'LearnCode.academy',
            'uploader_id': 'learncodeacademy',
            'upload_date': '20131021',
        },
        'add_ie': ['Youtube'],
    }
    def _real_extract(self, url):
        video_id = self._match_id(url)
        webpage = self._download_webpage(url, video_id)
        return {
            '_type': 'url_transparent',
            'url': self._search_regex(
                r"videoId\s*:\s*'([^']+)'", webpage, 'youtube id'),
            'id': video_id,
        }
--- a/youtube_dl/extractor/ndtv.py
+++ b/youtube_dl/extractor/ndtv.py
@ -1,19 +1,18 @@
 from __future__ import unicode_literals
 import re
 from .common import InfoExtractor
 from ..utils import (
    month_by_name,
    int_or_none,
    remove_end,
    unified_strdate,
 )
 class NDTVIE(InfoExtractor):
-    _VALID_URL = r'^https?://(?:www\.)?ndtv\.com/video/player/[^/]*/[^/]*/(?P<id>[a-z0-9]+)'
+    _VALID_URL = r'https?://(?:www\.)?ndtv\.com/video/(?:[^/]+/)+[^/?^&]+-(?P<id>\d+)'
    _TEST = {
-        'url': 'http://www.ndtv.com/video/player/news/ndtv-exclusive-don-t-need-character-certificate-from-rahul-gandhi-says-arvind-kejriwal/300710',
+        'url': 'http://www.ndtv.com/video/news/news/ndtv-exclusive-don-t-need-character-certificate-from-rahul-gandhi-says-arvind-kejriwal-300710',
        'md5': '39f992dbe5fb531c395d8bbedb1e5e88',
        'info_dict': {
            'id': '300710',
@ -22,7 +21,7 @@ class NDTVIE(InfoExtractor):
            'description': 'md5:ab2d4b4a6056c5cb4caa6d729deabf02',
            'upload_date': '20131208',
            'duration': 1327,
-            'thumbnail': 'http://i.ndtvimg.com/video/images/vod/medium/2013-12/big_300710_1386518307.jpg',
+            'thumbnail': 're:https?://.*\.jpg',
        },
    }
@ -30,36 +29,19 @@ class NDTVIE(InfoExtractor):
        video_id = self._match_id(url)
        webpage = self._download_webpage(url, video_id)
        title = remove_end(self._og_search_title(webpage), ' - NDTV')
        filename = self._search_regex(
            r"__filename='([^']+)'", webpage, 'video filename')
-        video_url = ('http://bitcast-b.bitgravity.com/ndtvod/23372/ndtv/%s' %
+        video_url = 'http://bitcast-b.bitgravity.com/ndtvod/23372/ndtv/%s' % filename
                     filename)
        duration = int_or_none(self._search_regex(
            r"__duration='([^']+)'", webpage, 'duration', fatal=False))
-        date_m = re.search(r'''(?x)
+        upload_date = unified_strdate(self._html_search_meta(
-            <p\s+class="vod_dateline">\s*
+            'publish-date', webpage, 'upload date', fatal=False))
                Published\s+On:\s*
                (?P<monthname>[A-Za-z]+)\s+(?P<day>[0-9]+),\s*(?P<year>[0-9]+)
            ''', webpage)
        upload_date = None
-        if date_m is not None:
+        description = remove_end(self._og_search_description(webpage), ' (Read more)')
            month = month_by_name(date_m.group('monthname'))
            if month is not None:
                upload_date = '%s%02d%02d' % (
                    date_m.group('year'), month, int(date_m.group('day')))
        description = self._og_search_description(webpage)
        READ_MORE = ' (Read more)'
        if description.endswith(READ_MORE):
            description = description[:-len(READ_MORE)]
        title = self._og_search_title(webpage)
        TITLE_SUFFIX = ' - NDTV'
        if title.endswith(TITLE_SUFFIX):
            title = title[:-len(TITLE_SUFFIX)]
        return {
            'id': video_id,
--- a/youtube_dl/extractor/nfb.py
+++ b/youtube_dl/extractor/nfb.py
@ -2,8 +2,12 @@ from __future__ import unicode_literals
 from .common import InfoExtractor
 from ..utils import (
-    sanitized_Request,
+    clean_html,
    determine_ext,
    int_or_none,
    qualities,
    urlencode_postdata,
    xpath_text,
 )
@ -16,12 +20,12 @@ class NFBIE(InfoExtractor):
        'url': 'https://www.nfb.ca/film/qallunaat_why_white_people_are_funny',
        'info_dict': {
            'id': 'qallunaat_why_white_people_are_funny',
-            'ext': 'mp4',
+            'ext': 'flv',
            'title': 'Qallunaat! Why White People Are Funny ',
-            'description': 'md5:836d8aff55e087d04d9f6df554d4e038',
+            'description': 'md5:6b8e32dde3abf91e58857b174916620c',
            'duration': 3128,
            'creator': 'Mark Sandiford',
            'uploader': 'Mark Sandiford',
            'uploader_id': 'mark-sandiford',
        },
        'params': {
            # rtmp download
@ -31,65 +35,78 @@ class NFBIE(InfoExtractor):
    def _real_extract(self, url):
        video_id = self._match_id(url)
        page = self._download_webpage(
            'https://www.nfb.ca/film/%s' % video_id, video_id,
            'Downloading film page')
-        uploader_id = self._html_search_regex(r'<a class="director-link" href="/explore-all-directors/([^/]+)/"',
+        config = self._download_xml(
                                              page, 'director id', fatal=False)
        uploader = self._html_search_regex(r'<em class="director-name" itemprop="name">([^<]+)</em>',
                                           page, 'director name', fatal=False)
        request = sanitized_Request(
            'https://www.nfb.ca/film/%s/player_config' % video_id,
-            urlencode_postdata({'getConfig': 'true'}))
+            video_id, 'Downloading player config XML',
-        request.add_header('Content-Type', 'application/x-www-form-urlencoded')
+            data=urlencode_postdata({'getConfig': 'true'}),
-        request.add_header('X-NFB-Referer', 'http://www.nfb.ca/medias/flash/NFBVideoPlayer.swf')
+            headers={
                'Content-Type': 'application/x-www-form-urlencoded',
                'X-NFB-Referer': 'http://www.nfb.ca/medias/flash/NFBVideoPlayer.swf'
            })
-        config = self._download_xml(request, video_id, 'Downloading player config XML')
+        title, description, thumbnail, duration, uploader, author = [None] * 6
-
+        thumbnails, formats = [[]] * 2
-        title = None
+        subtitles = {}
        description = None
        thumbnail = None
        duration = None
        formats = []
        def extract_thumbnail(media):
            thumbnails = {}
            for asset in media.findall('assets/asset'):
                thumbnails[asset.get('quality')] = asset.find('default/url').text
            if not thumbnails:
                return None
            if 'high' in thumbnails:
                return thumbnails['high']
            return list(thumbnails.values())[0]
        for media in config.findall('./player/stream/media'):
            if media.get('type') == 'posterImage':
-                thumbnail = extract_thumbnail(media)
+                quality_key = qualities(('low', 'high'))
-            elif media.get('type') == 'video':
+                thumbnails = []
                duration = int(media.get('duration'))
                title = media.find('title').text
                description = media.find('description').text
                # It seems assets always go from lower to better quality, so no need to sort
                for asset in media.findall('assets/asset'):
-                    for x in asset:
+                    asset_url = xpath_text(asset, 'default/url', default=None)
                    if not asset_url:
                        continue
                    quality = asset.get('quality')
                    thumbnails.append({
                        'url': asset_url,
                        'id': quality,
                        'preference': quality_key(quality),
                    })
            elif media.get('type') == 'video':
                title = xpath_text(media, 'title', fatal=True)
                for asset in media.findall('assets/asset'):
                    quality = asset.get('quality')
                    height = int_or_none(self._search_regex(
                        r'^(\d+)[pP]$', quality or '', 'height', default=None))
                    for node in asset:
                        streamer = xpath_text(node, 'streamerURI', default=None)
                        if not streamer:
                            continue
                        play_path = xpath_text(node, 'url', default=None)
                        if not play_path:
                            continue
                        formats.append({
-                            'url': x.find('streamerURI').text,
+                            'url': streamer,
-                            'app': x.find('streamerURI').text.split('/', 3)[3],
+                            'app': streamer.split('/', 3)[3],
-                            'play_path': x.find('url').text,
+                            'play_path': play_path,
                            'rtmp_live': False,
-                            'ext': 'mp4',
+                            'ext': 'flv',
-                            'format_id': '%s-%s' % (x.tag, asset.get('quality')),
+                            'format_id': '%s-%s' % (node.tag, quality) if quality else node.tag,
                            'height': height,
                        })
                self._sort_formats(formats)
                description = clean_html(xpath_text(media, 'description'))
                uploader = xpath_text(media, 'author')
                duration = int_or_none(media.get('duration'))
                for subtitle in media.findall('./subtitles/subtitle'):
                    subtitle_url = xpath_text(subtitle, 'url', default=None)
                    if not subtitle_url:
                        continue
                    lang = xpath_text(subtitle, 'lang', default='en')
                    subtitles.setdefault(lang, []).append({
                        'url': subtitle_url,
                        'ext': (subtitle.get('format') or determine_ext(subtitle_url)).lower(),
                    })
        return {
            'id': video_id,
            'title': title,
            'description': description,
-            'thumbnail': thumbnail,
+            'thumbnails': thumbnails,
            'duration': duration,
            'creator': uploader,
            'uploader': uploader,
            'uploader_id': uploader_id,
            'formats': formats,
            'subtitles': subtitles,
        }
--- a/youtube_dl/extractor/twentyfourvideo.py
+++ b/youtube_dl/extractor/twentyfourvideo.py
@ -47,7 +47,8 @@ class TwentyFourVideoIE(InfoExtractor):
        title = self._og_search_title(webpage)
        description = self._html_search_regex(
-            r'<span itemprop="description">([^<]+)</span>', webpage, 'description', fatal=False)
+            r'<(p|span)[^>]+itemprop="description"[^>]*>(?P<description>[^<]+)</\1>',
            webpage, 'description', fatal=False, group='description')
        thumbnail = self._og_search_thumbnail(webpage)
        duration = int_or_none(self._og_search_property(
            'duration', webpage, 'duration', fatal=False))
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@ -1549,15 +1549,11 @@ def setproctitle(title):
 def remove_start(s, start):
-    if s.startswith(start):
+    return s[len(start):] if s is not None and s.startswith(start) else s
        return s[len(start):]
    return s
 def remove_end(s, end):
-    if s.endswith(end):
+    return s[:-len(end)] if s is not None and s.endswith(end) else s
        return s[:-len(end)]
    return s
 def remove_quotes(s):