Merge branch 'master' of https://github.com/rg3/youtube-dl into tumblr

2015-10-04 18:13:59 +01:00 · 2015-10-04 18:13:59 +01:00 · 92e0aee3e4
commit 92e0aee3e4
parent c70e57bdc2 90ab741e90
14 changed files with 413 additions and 19 deletions
--- a/devscripts/prepare_manpage.py
+++ b/devscripts/prepare_manpage.py
@ -8,6 +8,35 @@ import re
 ROOT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
 README_FILE = os.path.join(ROOT_DIR, 'README.md')
 def filter_options(readme):
    ret = ''
    in_options = False
    for line in readme.split('\n'):
        if line.startswith('# '):
            if line[2:].startswith('OPTIONS'):
                in_options = True
            else:
                in_options = False
        if in_options:
            if line.lstrip().startswith('-'):
                option, description = re.split(r'\s{2,}', line.lstrip())
                split_option = option.split(' ')
                if not split_option[-1].startswith('-'):  # metavar
                    option = ' '.join(split_option[:-1] + ['*%s*' % split_option[-1]])
                # Pandoc's definition_lists. See http://pandoc.org/README.html
                # for more information.
                ret += '\n%s\n:   %s\n' % (option, description)
            else:
                ret += line.lstrip() + '\n'
        else:
            ret += line + '\n'
    return ret
 with io.open(README_FILE, encoding='utf-8') as f:
    readme = f.read()
@ -26,6 +55,8 @@ readme = re.sub(r'(?s)^.*?(?=# DESCRIPTION)', '', readme)
 readme = re.sub(r'\s+youtube-dl \[OPTIONS\] URL \[URL\.\.\.\]', '', readme)
 readme = PREFIX + readme
 readme = filter_options(readme)
 if sys.version_info < (3, 0):
    print(readme.encode('utf-8'))
 else:
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@ -1232,13 +1232,20 @@ class YoutubeDL(object):
            except (ValueError, OverflowError, OSError):
                pass
        subtitles = info_dict.get('subtitles')
        if subtitles:
            for _, subtitle in subtitles.items():
                for subtitle_format in subtitle:
                    if 'ext' not in subtitle_format:
                        subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
        if self.params.get('listsubtitles', False):
            if 'automatic_captions' in info_dict:
                self.list_subtitles(info_dict['id'], info_dict.get('automatic_captions'), 'automatic captions')
-            self.list_subtitles(info_dict['id'], info_dict.get('subtitles'), 'subtitles')
+            self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
            return
        info_dict['requested_subtitles'] = self.process_subtitles(
-            info_dict['id'], info_dict.get('subtitles'),
+            info_dict['id'], subtitles,
            info_dict.get('automatic_captions'))
        # We now pick which formats have to be downloaded
--- a/youtube_dl/compat.py
+++ b/youtube_dl/compat.py
@ -416,7 +416,7 @@ if hasattr(shutil, 'get_terminal_size'):  # Python >= 3.3
 else:
    _terminal_size = collections.namedtuple('terminal_size', ['columns', 'lines'])
-    def compat_get_terminal_size():
+    def compat_get_terminal_size(fallback=(80, 24)):
        columns = compat_getenv('COLUMNS', None)
        if columns:
            columns = int(columns)
@ -428,14 +428,20 @@ else:
        else:
            lines = None
        if columns <= 0 or lines <= 0:
            try:
                sp = subprocess.Popen(
                    ['stty', 'size'],
                    stdout=subprocess.PIPE, stderr=subprocess.PIPE)
                out, err = sp.communicate()
-            lines, columns = map(int, out.split())
+                _columns, _lines = map(int, out.split())
            except Exception:
-            pass
+                _columns, _lines = _terminal_size(*fallback)
            if columns <= 0:
                columns = _columns
            if lines <= 0:
                lines = _lines
        return _terminal_size(columns, lines)
 try:
--- a/youtube_dl/extractor/init.py
+++ b/youtube_dl/extractor/init.py
@ -158,6 +158,7 @@ from .eroprofile import EroProfileIE
 from .escapist import EscapistIE
 from .espn import ESPNIE
 from .esri import EsriVideoIE
 from .europa import EuropaIE
 from .everyonesmixtape import EveryonesMixtapeIE
 from .exfm import ExfmIE
 from .expotv import ExpoTVIE
@ -294,6 +295,11 @@ from .lifenews import (
    LifeNewsIE,
    LifeEmbedIE,
 )
 from .limelight import (
    LimelightMediaIE,
    LimelightChannelIE,
    LimelightChannelListIE,
 )
 from .liveleak import LiveLeakIE
 from .livestream import (
    LivestreamIE,
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@ -165,6 +165,7 @@ class InfoExtractor(object):
                    with the "ext" entry and one of:
                        * "data": The subtitles file contents
                        * "url": A URL pointing to the subtitles file
                    "ext" will be calculated from URL if missing
    automatic_captions: Like 'subtitles', used by the YoutubeIE for
                    automatically generated captions
    duration:       Length of the video in seconds, as an integer.
--- a/youtube_dl/extractor/engadget.py
+++ b/youtube_dl/extractor/engadget.py
@ -10,7 +10,7 @@ from ..utils import (
 class EngadgetIE(InfoExtractor):
    _VALID_URL = r'''(?x)https?://www.engadget.com/
-        (?:video/5min/(?P<id>\d+)|
+        (?:video(?:/5min)?/(?P<id>\d+)|
            [\d/]+/.*?)
        '''
--- a/youtube_dl/extractor/europa.py
+++ b/youtube_dl/extractor/europa.py
@ -0,0 +1,93 @@
 # coding: utf-8
 from __future__ import unicode_literals
 from .common import InfoExtractor
 from ..compat import compat_urlparse
 from ..utils import (
    int_or_none,
    orderedSet,
    parse_duration,
    qualities,
    unified_strdate,
    xpath_text
 )
 class EuropaIE(InfoExtractor):
    _VALID_URL = r'https?://ec\.europa\.eu/avservices/(?:video/player|audio/audioDetails)\.cfm\?.*?\bref=(?P<id>[A-Za-z0-9-]+)'
    _TESTS = [{
        'url': 'http://ec.europa.eu/avservices/video/player.cfm?ref=I107758',
        'md5': '574f080699ddd1e19a675b0ddf010371',
        'info_dict': {
            'id': 'I107758',
            'ext': 'mp4',
            'title': 'TRADE - Wikileaks on TTIP',
            'description': 'NEW  LIVE EC Midday press briefing of 11/08/2015',
            'thumbnail': 're:^https?://.*\.jpg$',
            'upload_date': '20150811',
            'duration': 34,
            'view_count': int,
            'formats': 'mincount:3',
        }
    }, {
        'url': 'http://ec.europa.eu/avservices/video/player.cfm?sitelang=en&ref=I107786',
        'only_matching': True,
    }, {
        'url': 'http://ec.europa.eu/avservices/audio/audioDetails.cfm?ref=I-109295&sitelang=en',
        'only_matching': True,
    }]
    def _real_extract(self, url):
        video_id = self._match_id(url)
        playlist = self._download_xml(
            'http://ec.europa.eu/avservices/video/player/playlist.cfm?ID=%s' % video_id, video_id)
        def get_item(type_, preference):
            items = {}
            for item in playlist.findall('./info/%s/item' % type_):
                lang, label = xpath_text(item, 'lg', default=None), xpath_text(item, 'label', default=None)
                if lang and label:
                    items[lang] = label.strip()
            for p in preference:
                if items.get(p):
                    return items[p]
        query = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
        preferred_lang = query.get('sitelang', ('en', ))[0]
        preferred_langs = orderedSet((preferred_lang, 'en', 'int'))
        title = get_item('title', preferred_langs) or video_id
        description = get_item('description', preferred_langs)
        thumbnmail = xpath_text(playlist, './info/thumburl', 'thumbnail')
        upload_date = unified_strdate(xpath_text(playlist, './info/date', 'upload date'))
        duration = parse_duration(xpath_text(playlist, './info/duration', 'duration'))
        view_count = int_or_none(xpath_text(playlist, './info/views', 'views'))
        language_preference = qualities(preferred_langs[::-1])
        formats = []
        for file_ in playlist.findall('./files/file'):
            video_url = xpath_text(file_, './url')
            if not video_url:
                continue
            lang = xpath_text(file_, './lg')
            formats.append({
                'url': video_url,
                'format_id': lang,
                'format_note': xpath_text(file_, './lglabel'),
                'language_preference': language_preference(lang)
            })
        self._sort_formats(formats)
        return {
            'id': video_id,
            'title': title,
            'description': description,
            'thumbnail': thumbnmail,
            'upload_date': upload_date,
            'duration': duration,
            'view_count': view_count,
            'formats': formats
        }
--- a/youtube_dl/extractor/limelight.py
+++ b/youtube_dl/extractor/limelight.py
@ -0,0 +1,229 @@
 # coding: utf-8
 from __future__ import unicode_literals
 import re
 from .common import InfoExtractor
 from ..utils import (
    determine_ext,
    float_or_none,
    int_or_none,
 )
 class LimelightBaseIE(InfoExtractor):
    _PLAYLIST_SERVICE_URL = 'http://production-ps.lvp.llnw.net/r/PlaylistService/%s/%s/%s'
    _API_URL = 'http://api.video.limelight.com/rest/organizations/%s/%s/%s/%s.json'
    def _call_playlist_service(self, item_id, method, fatal=True):
        return self._download_json(
            self._PLAYLIST_SERVICE_URL % (self._PLAYLIST_SERVICE_PATH, item_id, method),
            item_id, 'Downloading PlaylistService %s JSON' % method, fatal=fatal)
    def _call_api(self, organization_id, item_id, method):
        return self._download_json(
            self._API_URL % (organization_id, self._API_PATH, item_id, method),
            item_id, 'Downloading API %s JSON' % method)
    def _extract(self, item_id, pc_method, mobile_method, meta_method):
        pc = self._call_playlist_service(item_id, pc_method)
        metadata = self._call_api(pc['orgId'], item_id, meta_method)
        mobile = self._call_playlist_service(item_id, mobile_method, fatal=False)
        return pc, mobile, metadata
    def _extract_info(self, streams, mobile_urls, properties):
        video_id = properties['media_id']
        formats = []
        for stream in streams:
            stream_url = stream.get('url')
            if not stream_url:
                continue
            if '.f4m' in stream_url:
                formats.extend(self._extract_f4m_formats(stream_url, video_id))
            else:
                fmt = {
                    'url': stream_url,
                    'abr': float_or_none(stream.get('audioBitRate')),
                    'vbr': float_or_none(stream.get('videoBitRate')),
                    'fps': float_or_none(stream.get('videoFrameRate')),
                    'width': int_or_none(stream.get('videoWidthInPixels')),
                    'height': int_or_none(stream.get('videoHeightInPixels')),
                    'ext': determine_ext(stream_url)
                }
                rtmp = re.search(r'^(?P<url>rtmpe?://[^/]+/(?P<app>.+))/(?P<playpath>mp4:.+)$', stream_url)
                if rtmp:
                    format_id = 'rtmp'
                    if stream.get('videoBitRate'):
                        format_id += '-%d' % int_or_none(stream['videoBitRate'])
                    fmt.update({
                        'url': rtmp.group('url'),
                        'play_path': rtmp.group('playpath'),
                        'app': rtmp.group('app'),
                        'ext': 'flv',
                        'format_id': format_id,
                    })
                formats.append(fmt)
        for mobile_url in mobile_urls:
            media_url = mobile_url.get('mobileUrl')
            if not media_url:
                continue
            format_id = mobile_url.get('targetMediaPlatform')
            if determine_ext(media_url) == 'm3u8':
                formats.extend(self._extract_m3u8_formats(
                    media_url, video_id, 'mp4', entry_protocol='m3u8_native',
                    preference=-1, m3u8_id=format_id))
            else:
                formats.append({
                    'url': media_url,
                    'format_id': format_id,
                    'preference': -1,
                })
        self._sort_formats(formats)
        title = properties['title']
        description = properties.get('description')
        timestamp = int_or_none(properties.get('publish_date') or properties.get('create_date'))
        duration = float_or_none(properties.get('duration_in_milliseconds'), 1000)
        filesize = int_or_none(properties.get('total_storage_in_bytes'))
        categories = [properties.get('category')]
        tags = properties.get('tags', [])
        thumbnails = [{
            'url': thumbnail['url'],
            'width': int_or_none(thumbnail.get('width')),
            'height': int_or_none(thumbnail.get('height')),
        } for thumbnail in properties.get('thumbnails', []) if thumbnail.get('url')]
        subtitles = {}
        for caption in properties.get('captions', {}):
            lang = caption.get('language_code')
            subtitles_url = caption.get('url')
            if lang and subtitles_url:
                subtitles[lang] = [{
                    'url': subtitles_url,
                }]
        return {
            'id': video_id,
            'title': title,
            'description': description,
            'formats': formats,
            'timestamp': timestamp,
            'duration': duration,
            'filesize': filesize,
            'categories': categories,
            'tags': tags,
            'thumbnails': thumbnails,
            'subtitles': subtitles,
        }
 class LimelightMediaIE(LimelightBaseIE):
    IE_NAME = 'limelight'
    _VALID_URL = r'(?:limelight:media:|http://link\.videoplatform\.limelight\.com/media/\??\bmediaId=)(?P<id>[a-z0-9]{32})'
    _TESTS = [{
        'url': 'http://link.videoplatform.limelight.com/media/?mediaId=3ffd040b522b4485b6d84effc750cd86',
        'info_dict': {
            'id': '3ffd040b522b4485b6d84effc750cd86',
            'ext': 'flv',
            'title': 'HaP and the HB Prince Trailer',
            'description': 'md5:8005b944181778e313d95c1237ddb640',
            'thumbnail': 're:^https?://.*\.jpeg$',
            'duration': 144.23,
            'timestamp': 1244136834,
            'upload_date': '20090604',
        },
        'params': {
            # rtmp download
            'skip_download': True,
        },
    }, {
        # video with subtitles
        'url': 'limelight:media:a3e00274d4564ec4a9b29b9466432335',
        'info_dict': {
            'id': 'a3e00274d4564ec4a9b29b9466432335',
            'ext': 'flv',
            'title': '3Play Media Overview Video',
            'description': '',
            'thumbnail': 're:^https?://.*\.jpeg$',
            'duration': 78.101,
            'timestamp': 1338929955,
            'upload_date': '20120605',
            'subtitles': 'mincount:9',
        },
        'params': {
            # rtmp download
            'skip_download': True,
        },
    }]
    _PLAYLIST_SERVICE_PATH = 'media'
    _API_PATH = 'media'
    def _real_extract(self, url):
        video_id = self._match_id(url)
        pc, mobile, metadata = self._extract(
            video_id, 'getPlaylistByMediaId', 'getMobilePlaylistByMediaId', 'properties')
        return self._extract_info(
            pc['playlistItems'][0].get('streams', []),
            mobile['mediaList'][0].get('mobileUrls', []) if mobile else [],
            metadata)
 class LimelightChannelIE(LimelightBaseIE):
    IE_NAME = 'limelight:channel'
    _VALID_URL = r'(?:limelight:channel:|http://link\.videoplatform\.limelight\.com/media/\??\bchannelId=)(?P<id>[a-z0-9]{32})'
    _TEST = {
        'url': 'http://link.videoplatform.limelight.com/media/?channelId=ab6a524c379342f9b23642917020c082',
        'info_dict': {
            'id': 'ab6a524c379342f9b23642917020c082',
            'title': 'Javascript Sample Code',
        },
        'playlist_mincount': 3,
    }
    _PLAYLIST_SERVICE_PATH = 'channel'
    _API_PATH = 'channels'
    def _real_extract(self, url):
        channel_id = self._match_id(url)
        pc, mobile, medias = self._extract(
            channel_id, 'getPlaylistByChannelId',
            'getMobilePlaylistWithNItemsByChannelId?begin=0&count=-1', 'media')
        entries = [
            self._extract_info(
                pc['playlistItems'][i].get('streams', []),
                mobile['mediaList'][i].get('mobileUrls', []) if mobile else [],
                medias['media_list'][i])
            for i in range(len(medias['media_list']))]
        return self.playlist_result(entries, channel_id, pc['title'])
 class LimelightChannelListIE(LimelightBaseIE):
    IE_NAME = 'limelight:channel_list'
    _VALID_URL = r'(?:limelight:channel_list:|http://link\.videoplatform\.limelight\.com/media/\?.*?\bchannelListId=)(?P<id>[a-z0-9]{32})'
    _TEST = {
        'url': 'http://link.videoplatform.limelight.com/media/?channelListId=301b117890c4465c8179ede21fd92e2b',
        'info_dict': {
            'id': '301b117890c4465c8179ede21fd92e2b',
            'title': 'Website - Hero Player',
        },
        'playlist_mincount': 2,
    }
    _PLAYLIST_SERVICE_PATH = 'channel_list'
    def _real_extract(self, url):
        channel_list_id = self._match_id(url)
        channel_list = self._call_playlist_service(channel_list_id, 'getMobileChannelListById')
        entries = [
            self.url_result('limelight:channel:%s' % channel['id'], 'LimelightChannel')
            for channel in channel_list['channelList']]
        return self.playlist_result(entries, channel_list_id, channel_list['title'])
--- a/youtube_dl/extractor/naver.py
+++ b/youtube_dl/extractor/naver.py
@ -10,7 +10,6 @@ from ..compat import (
 )
 from ..utils import (
    ExtractorError,
    clean_html,
 )
--- a/youtube_dl/extractor/pbs.py
+++ b/youtube_dl/extractor/pbs.py
@ -134,6 +134,24 @@ class PBSIE(InfoExtractor):
            'params': {
                'skip_download': True,  # requires ffmpeg
            },
        },
        {
            # Video embedded in iframe containing angle brackets as attribute's value (e.g.
            # "<iframe style='position: absolute;<br />\ntop: 0; left: 0;' ...", see
            # https://github.com/rg3/youtube-dl/issues/7059)
            'url': 'http://www.pbs.org/food/features/a-chefs-life-season-3-episode-5-prickly-business/',
            'info_dict': {
                'id': '2365546844',
                'display_id': 'a-chefs-life-season-3-episode-5-prickly-business',
                'ext': 'mp4',
                'title': "A Chef's Life - Season 3, Ep. 5: Prickly Business",
                'description': 'md5:61db2ddf27c9912f09c241014b118ed1',
                'duration': 1480,
                'thumbnail': 're:^https?://.*\.jpg$',
            },
            'params': {
                'skip_download': True,  # requires ffmpeg
            },
        }
    ]
@ -167,7 +185,7 @@ class PBSIE(InfoExtractor):
                return media_id, presumptive_id, upload_date
            url = self._search_regex(
-                r'<iframe\s+[^>]*\s+src=["\']([^\'"]+partnerplayer[^\'"]+)["\']',
+                r'(?s)<iframe[^>]+?(?:[a-z-]+?=["\'].*?["\'][^>]+?)*?\bsrc=["\']([^\'"]+partnerplayer[^\'"]+)["\']',
                webpage, 'player URL')
            mobj = re.match(self._VALID_URL, url)
--- a/youtube_dl/extractor/ruutu.py
+++ b/youtube_dl/extractor/ruutu.py
@ -74,7 +74,7 @@ class RuutuIE(InfoExtractor):
                        preference = -1 if proto == 'rtmp' else 1
                        label = child.get('label')
                        tbr = int_or_none(child.get('bitrate'))
-                        width, height = [int_or_none(x) for x in child.get('resolution', '').split('x')]
+                        width, height = [int_or_none(x) for x in child.get('resolution', 'x').split('x')[:2]]
                        formats.append({
                            'format_id': '%s-%s' % (proto, label if label else tbr),
                            'url': video_url,
--- a/youtube_dl/extractor/tapely.py
+++ b/youtube_dl/extractor/tapely.py
@ -16,7 +16,7 @@ from ..utils import (
 class TapelyIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?tape\.ly/(?P<id>[A-Za-z0-9\-_]+)(?:/(?P<songnr>\d+))?'
+    _VALID_URL = r'https?://(?:www\.)?(?:tape\.ly|tapely\.com)/(?P<id>[A-Za-z0-9\-_]+)(?:/(?P<songnr>\d+))?'
    _API_URL = 'http://tape.ly/showtape?id={0:}'
    _S3_SONG_URL = 'http://mytape.s3.amazonaws.com/{0:}'
    _SOUNDCLOUD_SONG_URL = 'http://api.soundcloud.com{0:}'
@ -42,6 +42,10 @@ class TapelyIE(InfoExtractor):
                'ext': 'm4a',
            },
        },
        {
            'url': 'https://tapely.com/my-grief-as-told-by-water',
            'only_matching': True,
        },
    ]
    def _real_extract(self, url):