diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 7a79389ce..c65462ba4 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.07.06*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.07.06** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.07.07*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.07.07** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2016.07.06 +[debug] youtube-dl version 2016.07.07 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/README.md b/README.md index bc214562d..44332ea9a 100644 --- a/README.md +++ b/README.md @@ -432,6 +432,7 @@ For example, with the following configuration file youtube-dl will always extrac --no-mtime --proxy 127.0.0.1:3128 -o ~/Movies/%(title)s.%(ext)s +# Lines starting with # are comments ``` Note that options in configuration file are just the same options aka switches used in regular command line calls thus there **must be no whitespace** after `-` or `--`, e.g. `-o` or `--proxy` but not `- o` or `-- proxy`. diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 3388fe221..9174c6f89 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -312,6 +312,7 @@ - **jpopsuki.tv** - **JWPlatform** - **Kaltura** + - **Kamcord** - **KanalPlay**: Kanal 5/9/11 Play - **Kankan** - **Karaoketv** @@ -476,6 +477,8 @@ - **Odnoklassniki** - **OktoberfestTV** - **on.aol.com** + - **onet.tv** + - **onet.tv:channel** - **OnionStudios** - **Ooyala** - **OoyalaExternal** diff --git a/test/test_compat.py b/test/test_compat.py index 1d7ac9f16..b57424948 100644 --- a/test/test_compat.py +++ b/test/test_compat.py @@ -88,6 +88,7 @@ class TestCompat(unittest.TestCase): def test_compat_shlex_split(self): self.assertEqual(compat_shlex_split('-option "one two"'), ['-option', 'one two']) self.assertEqual(compat_shlex_split('-option "one\ntwo" \n -flag'), ['-option', 'one\ntwo', '-flag']) + self.assertEqual(compat_shlex_split('-val 中文'), ['-val', '中文']) def test_compat_etree_fromstring(self): xml = ''' diff --git a/youtube_dl/compat.py b/youtube_dl/compat.py index 67db1c7c6..b8aaf5a46 100644 --- a/youtube_dl/compat.py +++ b/youtube_dl/compat.py @@ -1,3 +1,4 @@ +# coding: utf-8 from __future__ import unicode_literals import binascii @@ -2594,15 +2595,19 @@ except ImportError: # Python < 3.3 return "'" + s.replace("'", "'\"'\"'") + "'" -if sys.version_info >= (2, 7, 3): +try: + args = shlex.split('中文') + assert (isinstance(args, list) and + isinstance(args[0], compat_str) and + args[0] == '中文') compat_shlex_split = shlex.split -else: +except (AssertionError, UnicodeEncodeError): # Working around shlex issue with unicode strings on some python 2 # versions (see http://bugs.python.org/issue1548891) def compat_shlex_split(s, comments=False, posix=True): if isinstance(s, compat_str): s = s.encode('utf-8') - return shlex.split(s, comments, posix) + return list(map(lambda s: s.decode('utf-8'), shlex.split(s, comments, posix))) def compat_ord(c): diff --git a/youtube_dl/extractor/brightcove.py b/youtube_dl/extractor/brightcove.py index b49b1977d..57ce0c174 100644 --- a/youtube_dl/extractor/brightcove.py +++ b/youtube_dl/extractor/brightcove.py @@ -90,6 +90,7 @@ class BrightcoveLegacyIE(InfoExtractor): 'description': 'md5:363109c02998fee92ec02211bd8000df', 'uploader': 'National Ballet of Canada', }, + 'skip': 'Video gone', }, { # test flv videos served by akamaihd.net @@ -108,7 +109,7 @@ class BrightcoveLegacyIE(InfoExtractor): }, }, { - # playlist test + # playlist with 'videoList' # from http://support.brightcove.com/en/video-cloud/docs/playlist-support-single-video-players 'url': 'http://c.brightcove.com/services/viewer/htmlFederated?playerID=3550052898001&playerKey=AQ%7E%7E%2CAAABmA9XpXk%7E%2C-Kp7jNgisre1fG5OdqpAFUTcs0lP_ZoL', 'info_dict': { @@ -117,6 +118,15 @@ class BrightcoveLegacyIE(InfoExtractor): }, 'playlist_mincount': 7, }, + { + # playlist with 'playlistTab' (https://github.com/rg3/youtube-dl/issues/9965) + 'url': 'http://c.brightcove.com/services/json/experience/runtime/?command=get_programming_for_experience&playerKey=AQ%7E%7E,AAABXlLMdok%7E,NJ4EoMlZ4rZdx9eU1rkMVd8EaYPBBUlg', + 'info_dict': { + 'id': '1522758701001', + 'title': 'Lesson 08', + }, + 'playlist_mincount': 10, + }, ] FLV_VCODECS = { 1: 'SORENSON', @@ -298,13 +308,19 @@ class BrightcoveLegacyIE(InfoExtractor): info_url, player_key, 'Downloading playlist information') json_data = json.loads(playlist_info) - if 'videoList' not in json_data: + if 'videoList' in json_data: + playlist_info = json_data['videoList'] + playlist_dto = playlist_info['mediaCollectionDTO'] + elif 'playlistTabs' in json_data: + playlist_info = json_data['playlistTabs'] + playlist_dto = playlist_info['lineupListDTO']['playlistDTOs'][0] + else: raise ExtractorError('Empty playlist') - playlist_info = json_data['videoList'] - videos = [self._extract_video_info(video_info) for video_info in playlist_info['mediaCollectionDTO']['videoDTOs']] + + videos = [self._extract_video_info(video_info) for video_info in playlist_dto['videoDTOs']] return self.playlist_result(videos, playlist_id='%s' % playlist_info['id'], - playlist_title=playlist_info['mediaCollectionDTO']['displayName']) + playlist_title=playlist_dto['displayName']) def _extract_video_info(self, video_info): video_id = compat_str(video_info['id']) diff --git a/youtube_dl/extractor/dailymotion.py b/youtube_dl/extractor/dailymotion.py index 15bfc59b2..1f92823b7 100644 --- a/youtube_dl/extractor/dailymotion.py +++ b/youtube_dl/extractor/dailymotion.py @@ -112,6 +112,13 @@ class DailymotionIE(DailymotionBaseInfoExtractor): } ] + @staticmethod + def _extract_urls(webpage): + # Look for embedded Dailymotion player + matches = re.findall( + r'<(?:(?:embed|iframe)[^>]+?src=|input[^>]+id=[\'"]dmcloudUrlEmissionSelect[\'"][^>]+value=)(["\'])(?P(?:https?:)?//(?:www\.)?dailymotion\.com/(?:embed|swf)/video/.+?)\1', webpage) + return list(map(lambda m: unescapeHTML(m[1]), matches)) + def _real_extract(self, url): video_id = self._match_id(url) diff --git a/youtube_dl/extractor/daum.py b/youtube_dl/extractor/daum.py index 86024a745..b5c310ccb 100644 --- a/youtube_dl/extractor/daum.py +++ b/youtube_dl/extractor/daum.py @@ -66,22 +66,32 @@ class DaumIE(InfoExtractor): 'view_count': int, 'comment_count': int, }, + }, { + # Requires dte_type=WEB (#9972) + 'url': 'http://tvpot.daum.net/v/s3794Uf1NZeZ1qMpGpeqeRU', + 'md5': 'a8917742069a4dd442516b86e7d66529', + 'info_dict': { + 'id': 's3794Uf1NZeZ1qMpGpeqeRU', + 'ext': 'mp4', + 'title': '러블리즈 - Destiny (나의 지구) (Lovelyz - Destiny) [쇼! 음악중심] 508회 20160611', + 'description': '러블리즈 - Destiny (나의 지구) (Lovelyz - Destiny)\n\n[쇼! 음악중심] 20160611, 507회', + 'upload_date': '20160611', + }, }] def _real_extract(self, url): video_id = compat_urllib_parse_unquote(self._match_id(url)) - query = compat_urllib_parse_urlencode({'vid': video_id}) movie_data = self._download_json( - 'http://videofarm.daum.net/controller/api/closed/v1_2/IntegratedMovieData.json?' + query, - video_id, 'Downloading video formats info') + 'http://videofarm.daum.net/controller/api/closed/v1_2/IntegratedMovieData.json', + video_id, 'Downloading video formats info', query={'vid': video_id, 'dte_type': 'WEB'}) # For urls like http://m.tvpot.daum.net/v/65139429, where the video_id is really a clipid if not movie_data.get('output_list', {}).get('output_list') and re.match(r'^\d+$', video_id): return self.url_result('http://tvpot.daum.net/clip/ClipView.do?clipid=%s' % video_id) info = self._download_xml( - 'http://tvpot.daum.net/clip/ClipInfoXml.do?' + query, video_id, - 'Downloading video info') + 'http://tvpot.daum.net/clip/ClipInfoXml.do', video_id, + 'Downloading video info', query={'vid': video_id}) formats = [] for format_el in movie_data['output_list']['output_list']: diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index e52faa078..12cc1b5f7 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -368,6 +368,7 @@ from .jove import JoveIE from .jwplatform import JWPlatformIE from .jpopsukitv import JpopsukiIE from .kaltura import KalturaIE +from .kamcord import KamcordIE from .kanalplay import KanalPlayIE from .kankan import KankanIE from .karaoketv import KaraoketvIE diff --git a/youtube_dl/extractor/francetv.py b/youtube_dl/extractor/francetv.py index ad94e31f3..7653975e3 100644 --- a/youtube_dl/extractor/francetv.py +++ b/youtube_dl/extractor/francetv.py @@ -14,7 +14,10 @@ from ..utils import ( parse_duration, determine_ext, ) -from .dailymotion import DailymotionCloudIE +from .dailymotion import ( + DailymotionIE, + DailymotionCloudIE, +) class FranceTVBaseInfoExtractor(InfoExtractor): @@ -188,6 +191,21 @@ class FranceTvInfoIE(FranceTVBaseInfoExtractor): 'params': { 'skip_download': True, }, + }, { + # Dailymotion embed + 'url': 'http://www.francetvinfo.fr/politique/notre-dame-des-landes/video-sur-france-inter-cecile-duflot-denonce-le-regard-meprisant-de-patrick-cohen_1520091.html', + 'md5': 'ee7f1828f25a648addc90cb2687b1f12', + 'info_dict': { + 'id': 'x4iiko0', + 'ext': 'mp4', + 'title': 'NDDL, référendum, Brexit : Cécile Duflot répond à Patrick Cohen', + 'description': 'Au lendemain de la victoire du "oui" au référendum sur l\'aéroport de Notre-Dame-des-Landes, l\'ancienne ministre écologiste est l\'invitée de Patrick Cohen. Plus d\'info : https://www.franceinter.fr/emissions/le-7-9/le-7-9-27-juin-2016', + 'timestamp': 1467011958, + 'upload_date': '20160627', + 'uploader': 'France Inter', + 'uploader_id': 'x2q2ez', + }, + 'add_ie': ['Dailymotion'], }] def _real_extract(self, url): @@ -197,7 +215,13 @@ class FranceTvInfoIE(FranceTVBaseInfoExtractor): dmcloud_url = DailymotionCloudIE._extract_dmcloud_url(webpage) if dmcloud_url: - return self.url_result(dmcloud_url, 'DailymotionCloud') + return self.url_result(dmcloud_url, DailymotionCloudIE.ie_key()) + + dailymotion_urls = DailymotionIE._extract_urls(webpage) + if dailymotion_urls: + return self.playlist_result([ + self.url_result(dailymotion_url, DailymotionIE.ie_key()) + for dailymotion_url in dailymotion_urls]) video_id, catalogue = self._search_regex( (r'id-video=([^@]+@[^"]+)', diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 764697bd2..31527d1c6 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -49,7 +49,10 @@ from .pornhub import PornHubIE from .xhamster import XHamsterEmbedIE from .tnaflix import TNAFlixNetworkEmbedIE from .vimeo import VimeoIE -from .dailymotion import DailymotionCloudIE +from .dailymotion import ( + DailymotionIE, + DailymotionCloudIE, +) from .onionstudios import OnionStudiosIE from .viewlift import ViewLiftEmbedIE from .screenwavemedia import ScreenwaveMediaIE @@ -1673,12 +1676,9 @@ class GenericIE(InfoExtractor): if matches: return _playlist_from_matches(matches, lambda m: m[-1]) - # Look for embedded Dailymotion player - matches = re.findall( - r'<(?:(?:embed|iframe)[^>]+?src=|input[^>]+id=[\'"]dmcloudUrlEmissionSelect[\'"][^>]+value=)(["\'])(?P(?:https?:)?//(?:www\.)?dailymotion\.com/(?:embed|swf)/video/.+?)\1', webpage) + matches = DailymotionIE._extract_urls(webpage) if matches: - return _playlist_from_matches( - matches, lambda m: unescapeHTML(m[1])) + return _playlist_from_matches(matches) # Look for embedded Dailymotion playlist player (#3822) m = re.search( diff --git a/youtube_dl/extractor/kamcord.py b/youtube_dl/extractor/kamcord.py new file mode 100644 index 000000000..b50120d98 --- /dev/null +++ b/youtube_dl/extractor/kamcord.py @@ -0,0 +1,71 @@ +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..compat import compat_str +from ..utils import ( + int_or_none, + qualities, +) + + +class KamcordIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?kamcord\.com/v/(?P[^/?#&]+)' + _TEST = { + 'url': 'https://www.kamcord.com/v/hNYRduDgWb4', + 'md5': 'c3180e8a9cfac2e86e1b88cb8751b54c', + 'info_dict': { + 'id': 'hNYRduDgWb4', + 'ext': 'mp4', + 'title': 'Drinking Madness', + 'uploader': 'jacksfilms', + 'uploader_id': '3044562', + 'view_count': int, + 'like_count': int, + 'comment_count': int, + }, + } + + def _real_extract(self, url): + video_id = self._match_id(url) + + webpage = self._download_webpage(url, video_id) + + video = self._parse_json( + self._search_regex( + r'window\.__props\s*=\s*({.+?});?(?:\n|\s*)', + webpage, 'ms video player')) + video_id = player_data['data-media-id'] + config_url = compat_urlparse.urljoin(url, player_data['data-config']) + config = self._download_json( + config_url, video_id, 'Downloading config JSON') + mmc_url = config['services']['mmc'] + + duration = None + formats = [] + for m_url in (mmc_url, mmc_url.replace('/flash.json', '/html5.json')): + mmc = self._download_json( + m_url, video_id, 'Downloading mmc JSON') + if not duration: + duration = int_or_none(mmc.get('duration')) + for location in mmc['locations']: + gat = self._proto_relative_url(location.get('gat'), 'http:') + bas = location.get('bas') + loc = location.get('loc') + ogn = location.get('ogn') + if None in (gat, bas, loc, ogn): + continue + token_data = { + 'bas': bas, + 'icd': loc, + 'ogn': ogn, + 'sta': '0', + } + media = self._download_json( + '%s/?%s' % (gat, compat_urllib_parse_urlencode(token_data)), + video_id, 'Downloading %s JSON' % location['loc']) + file_ = media.get('file') + if not file_: + continue + ext = determine_ext(file_) + if ext == 'f4m': + formats.extend(self._extract_f4m_formats( + file_ + '&hdcore=3.2.0&plugin=aasp-3.2.0.77.18', + video_id, f4m_id='hds', fatal=False)) + elif ext == 'm3u8': + formats.extend(self._extract_m3u8_formats( + file_, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)) + self._sort_formats(formats) + + return { + 'id': video_id, + 'formats': formats, + 'thumbnail': player_data.get('data-poster') or config.get('poster', {}).get('imageUrl'), + 'duration': duration, + } + + +class MiTeleIE(MiTeleBaseIE): IE_DESC = 'mitele.es' - _VALID_URL = r'https?://www\.mitele\.es/[^/]+/[^/]+/[^/]+/(?P[^/]+)/' + _VALID_URL = r'https?://www\.mitele\.es/(?:[^/]+/){3}(?P[^/]+)/' _TESTS = [{ 'url': 'http://www.mitele.es/programas-tv/diario-de/la-redaccion/programa-144/', @@ -25,7 +82,7 @@ class MiTeleIE(InfoExtractor): 'info_dict': { 'id': '0NF1jJnxS1Wu3pHrmvFyw2', 'display_id': 'programa-144', - 'ext': 'flv', + 'ext': 'mp4', 'title': 'Tor, la web invisible', 'description': 'md5:3b6fce7eaa41b2d97358726378d9369f', 'series': 'Diario de', @@ -40,7 +97,7 @@ class MiTeleIE(InfoExtractor): 'info_dict': { 'id': 'eLZSwoEd1S3pVyUm8lc6F', 'display_id': 'programa-226', - 'ext': 'flv', + 'ext': 'mp4', 'title': 'Cuarto Milenio - Temporada 6 - Programa 226', 'description': 'md5:50daf9fadefa4e62d9fc866d0c015701', 'series': 'Cuarto Milenio', @@ -59,40 +116,7 @@ class MiTeleIE(InfoExtractor): webpage = self._download_webpage(url, display_id) - config_url = self._search_regex( - r'data-config\s*=\s*"([^"]+)"', webpage, 'data config url') - config_url = compat_urlparse.urljoin(url, config_url) - - config = self._download_json( - config_url, display_id, 'Downloading config JSON') - - mmc = self._download_json( - config['services']['mmc'], display_id, 'Downloading mmc JSON') - - formats = [] - for location in mmc['locations']: - gat = self._proto_relative_url(location.get('gat'), 'http:') - bas = location.get('bas') - loc = location.get('loc') - ogn = location.get('ogn') - if None in (gat, bas, loc, ogn): - continue - token_data = { - 'bas': bas, - 'icd': loc, - 'ogn': ogn, - 'sta': '0', - } - media = self._download_json( - '%s/?%s' % (gat, compat_urllib_parse_urlencode(token_data)), - display_id, 'Downloading %s JSON' % location['loc']) - file_ = media.get('file') - if not file_: - continue - formats.extend(self._extract_f4m_formats( - file_ + '&hdcore=3.2.0&plugin=aasp-3.2.0.77.18', - display_id, f4m_id=loc)) - self._sort_formats(formats) + info = self._get_player_info(url, webpage) title = self._search_regex( r'class="Destacado-text"[^>]*>\s*([^<]+)', @@ -112,21 +136,12 @@ class MiTeleIE(InfoExtractor): title = remove_start(self._search_regex( r'([^<]+)', webpage, 'title'), 'Ver online ') - video_id = self._search_regex( - r'data-media-id\s*=\s*"([^"]+)"', webpage, - 'data media id', default=None) or display_id - thumbnail = config.get('poster', {}).get('imageUrl') - duration = int_or_none(mmc.get('duration')) - - return { - 'id': video_id, + info.update({ 'display_id': display_id, 'title': title, 'description': get_element_by_attribute('class', 'text', webpage), 'series': series, 'season': season, 'episode': episode, - 'thumbnail': thumbnail, - 'duration': duration, - 'formats': formats, - } + }) + return info diff --git a/youtube_dl/extractor/radiocanada.py b/youtube_dl/extractor/radiocanada.py index 4f05bbddc..8ec402646 100644 --- a/youtube_dl/extractor/radiocanada.py +++ b/youtube_dl/extractor/radiocanada.py @@ -12,6 +12,7 @@ from ..utils import ( unified_strdate, xpath_element, ExtractorError, + determine_protocol, ) @@ -22,13 +23,13 @@ class RadioCanadaIE(InfoExtractor): 'url': 'http://ici.radio-canada.ca/widgets/mediaconsole/medianet/7184272', 'info_dict': { 'id': '7184272', - 'ext': 'flv', + 'ext': 'mp4', 'title': 'Le parcours du tireur capté sur vidéo', 'description': 'Images des caméras de surveillance fournies par la GRC montrant le parcours du tireur d\'Ottawa', 'upload_date': '20141023', }, 'params': { - # rtmp download + # m3u8 download 'skip_download': True, }, } @@ -36,11 +37,14 @@ class RadioCanadaIE(InfoExtractor): def _real_extract(self, url): app_code, video_id = re.match(self._VALID_URL, url).groups() + device_types = ['ipad', 'android'] + if app_code != 'toutv': + device_types.append('flash') + formats = [] - # TODO: extract m3u8 and f4m formats - # m3u8 formats can be extracted using ipad device_type return 403 error code when ffmpeg try to download segements + # TODO: extract f4m formats # f4m formats can be extracted using flashhd device_type but they produce unplayable file - for device_type in ('flash',): + for device_type in device_types: v_data = self._download_xml( 'http://api.radio-canada.ca/validationMedia/v1/Validation.ashx', video_id, note='Downloading %s XML' % device_type, query={ @@ -52,7 +56,7 @@ class RadioCanadaIE(InfoExtractor): # paysJ391wsHjbOJwvCs26toz and bypasslock are used to bypass geo-restriction 'paysJ391wsHjbOJwvCs26toz': 'CA', 'bypasslock': 'NZt5K62gRqfc', - }) + }, fatal=False) v_url = xpath_text(v_data, 'url') if not v_url: continue @@ -64,7 +68,8 @@ class RadioCanadaIE(InfoExtractor): formats.extend(self._extract_m3u8_formats( v_url, video_id, 'mp4', m3u8_id='hls', fatal=False)) elif ext == 'f4m': - formats.extend(self._extract_f4m_formats(v_url, video_id, f4m_id='hds', fatal=False)) + formats.extend(self._extract_f4m_formats( + v_url, video_id, f4m_id='hds', fatal=False)) else: ext = determine_ext(v_url) bitrates = xpath_element(v_data, 'bitrates') @@ -72,15 +77,28 @@ class RadioCanadaIE(InfoExtractor): tbr = int_or_none(url_e.get('bitrate')) if not tbr: continue + f_url = re.sub(r'\d+\.%s' % ext, '%d.%s' % (tbr, ext), v_url) + protocol = determine_protocol({'url': f_url}) formats.append({ - 'format_id': 'rtmp-%d' % tbr, - 'url': re.sub(r'\d+\.%s' % ext, '%d.%s' % (tbr, ext), v_url), - 'ext': 'flv', - 'protocol': 'rtmp', + 'format_id': '%s-%d' % (protocol, tbr), + 'url': f_url, + 'ext': 'flv' if protocol == 'rtmp' else ext, + 'protocol': protocol, 'width': int_or_none(url_e.get('width')), 'height': int_or_none(url_e.get('height')), 'tbr': tbr, }) + if protocol == 'rtsp': + base_url = self._search_regex( + r'rtsp://([^?]+)', f_url, 'base url', default=None) + if base_url: + base_url = 'http://' + base_url + formats.extend(self._extract_m3u8_formats( + base_url + '/playlist.m3u8', video_id, 'mp4', + 'm3u8_native', m3u8_id='hls', fatal=False)) + formats.extend(self._extract_f4m_formats( + base_url + '/manifest.f4m', video_id, + f4m_id='hds', fatal=False)) self._sort_formats(formats) metadata = self._download_xml( @@ -115,13 +133,13 @@ class RadioCanadaAudioVideoIE(InfoExtractor): 'url': 'http://ici.radio-canada.ca/audio-video/media-7527184/barack-obama-au-vietnam', 'info_dict': { 'id': '7527184', - 'ext': 'flv', + 'ext': 'mp4', 'title': 'Barack Obama au Vietnam', 'description': 'Les États-Unis lèvent l\'embargo sur la vente d\'armes qui datait de la guerre du Vietnam', 'upload_date': '20160523', }, 'params': { - # rtmp download + # m3u8 download 'skip_download': True, }, } diff --git a/youtube_dl/extractor/telecinco.py b/youtube_dl/extractor/telecinco.py index 4b4b740b4..2ecfd0405 100644 --- a/youtube_dl/extractor/telecinco.py +++ b/youtube_dl/extractor/telecinco.py @@ -1,50 +1,41 @@ # coding: utf-8 from __future__ import unicode_literals -import json - -from .common import InfoExtractor -from ..compat import ( - compat_urllib_parse_unquote, - compat_urllib_parse_urlencode, - compat_urlparse, -) -from ..utils import ( - get_element_by_attribute, - parse_duration, - strip_jsonp, -) +from .mitele import MiTeleBaseIE -class TelecincoIE(InfoExtractor): +class TelecincoIE(MiTeleBaseIE): IE_DESC = 'telecinco.es, cuatro.com and mediaset.es' _VALID_URL = r'https?://www\.(?:telecinco\.es|cuatro\.com|mediaset\.es)/(?:[^/]+/)+(?P.+?)\.html' _TESTS = [{ 'url': 'http://www.telecinco.es/robinfood/temporada-01/t01xp14/Bacalao-cocochas-pil-pil_0_1876350223.html', - 'md5': '5cbef3ad5ef17bf0d21570332d140729', + 'md5': '8d7b2d5f699ee2709d992a63d5cd1712', 'info_dict': { - 'id': 'MDSVID20141015_0058', + 'id': 'JEA5ijCnF6p5W08A1rNKn7', 'ext': 'mp4', - 'title': 'Con Martín Berasategui, hacer un bacalao al ...', + 'title': 'Bacalao con kokotxas al pil-pil', + 'description': 'md5:1382dacd32dd4592d478cbdca458e5bb', 'duration': 662, }, }, { 'url': 'http://www.cuatro.com/deportes/futbol/barcelona/Leo_Messi-Champions-Roma_2_2052780128.html', - 'md5': '0a5b9f3cc8b074f50a0578f823a12694', + 'md5': '284393e5387b3b947b77c613ef04749a', 'info_dict': { - 'id': 'MDSVID20150916_0128', + 'id': 'jn24Od1zGLG4XUZcnUnZB6', 'ext': 'mp4', - 'title': '¿Quién es este ex futbolista con el que hablan ...', + 'title': '¿Quién es este ex futbolista con el que hablan Leo Messi y Luis Suárez?', + 'description': 'md5:a62ecb5f1934fc787107d7b9a2262805', 'duration': 79, }, }, { 'url': 'http://www.mediaset.es/12meses/campanas/doylacara/conlatratanohaytrato/Ayudame-dar-cara-trata-trato_2_1986630220.html', - 'md5': 'ad1bfaaba922dd4a295724b05b68f86a', + 'md5': '749afab6ea5a136a8806855166ae46a2', 'info_dict': { - 'id': 'MDSVID20150513_0220', + 'id': 'aywerkD2Sv1vGNqq9b85Q2', 'ext': 'mp4', 'title': '#DOYLACARA. Con la trata no hay trato', + 'description': 'md5:2771356ff7bfad9179c5f5cd954f1477', 'duration': 50, }, }, { @@ -56,40 +47,16 @@ class TelecincoIE(InfoExtractor): }] def _real_extract(self, url): - episode = self._match_id(url) - webpage = self._download_webpage(url, episode) - embed_data_json = self._search_regex( - r'(?s)MSV\.embedData\[.*?\]\s*=\s*({.*?});', webpage, 'embed data', - ).replace('\'', '"') - embed_data = json.loads(embed_data_json) - - domain = embed_data['mediaUrl'] - if not domain.startswith('http'): - # only happens in telecinco.es videos - domain = 'http://' + domain - info_url = compat_urlparse.urljoin( - domain, - compat_urllib_parse_unquote(embed_data['flashvars']['host']) - ) - info_el = self._download_xml(info_url, episode).find('./video/info') - - video_link = info_el.find('videoUrl/link').text - token_query = compat_urllib_parse_urlencode({'id': video_link}) - token_info = self._download_json( - embed_data['flashvars']['ov_tk'] + '?' + token_query, - episode, - transform_source=strip_jsonp - ) - formats = self._extract_m3u8_formats( - token_info['tokenizedUrl'], episode, ext='mp4', entry_protocol='m3u8_native') - self._sort_formats(formats) - - return { - 'id': embed_data['videoId'], - 'display_id': episode, - 'title': info_el.find('title').text, - 'formats': formats, - 'description': get_element_by_attribute('class', 'text', webpage), - 'thumbnail': info_el.find('thumb').text, - 'duration': parse_duration(info_el.find('duration').text), - } + display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) + title = self._html_search_meta( + ['og:title', 'twitter:title'], webpage, 'title') + info = self._get_player_info(url, webpage) + info.update({ + 'display_id': display_id, + 'title': title, + 'description': self._html_search_meta( + ['og:description', 'twitter:description'], + webpage, 'title', fatal=False), + }) + return info diff --git a/youtube_dl/extractor/toutv.py b/youtube_dl/extractor/toutv.py index 4797d1310..54c2d0aa6 100644 --- a/youtube_dl/extractor/toutv.py +++ b/youtube_dl/extractor/toutv.py @@ -1,74 +1,41 @@ # coding: utf-8 from __future__ import unicode_literals -import re - from .common import InfoExtractor -from ..utils import ( - ExtractorError, - unified_strdate, -) +from ..utils import int_or_none class TouTvIE(InfoExtractor): IE_NAME = 'tou.tv' - _VALID_URL = r'https?://www\.tou\.tv/(?P[a-zA-Z0-9_-]+(?:/(?PS[0-9]+E[0-9]+)))' + _VALID_URL = r'https?://ici\.tou\.tv/(?P[a-zA-Z0-9_-]+/S[0-9]+E[0-9]+)' _TEST = { - 'url': 'http://www.tou.tv/30-vies/S04E41', + 'url': 'http://ici.tou.tv/garfield-tout-court/S2015E17', 'info_dict': { - 'id': '30-vies_S04E41', + 'id': '122017', 'ext': 'mp4', - 'title': '30 vies Saison 4 / Épisode 41', - 'description': 'md5:da363002db82ccbe4dafeb9cab039b09', - 'age_limit': 8, - 'uploader': 'Groupe des Nouveaux Médias', - 'duration': 1296, - 'upload_date': '20131118', - 'thumbnail': 'http://static.tou.tv/medias/images/2013-11-18_19_00_00_30VIES_0341_01_L.jpeg', + 'title': 'Saison 2015 Épisode 17', + 'description': 'La photo de famille 2', + 'upload_date': '20100717', }, 'params': { - 'skip_download': True, # Requires rtmpdump + # m3u8 download + 'skip_download': True, }, - 'skip': 'Only available in Canada' } def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') - webpage = self._download_webpage(url, video_id) - - mediaId = self._search_regex( - r'"idMedia":\s*"([^"]+)"', webpage, 'media ID') - - streams_url = 'http://release.theplatform.com/content.select?pid=' + mediaId - streams_doc = self._download_xml( - streams_url, video_id, note='Downloading stream list') - - video_url = next(n.text - for n in streams_doc.findall('.//choice/url') - if '//ad.doubleclick' not in n.text) - if video_url.endswith('/Unavailable.flv'): - raise ExtractorError( - 'Access to this video is blocked from outside of Canada', - expected=True) - - duration_str = self._html_search_meta( - 'video:duration', webpage, 'duration') - duration = int(duration_str) if duration_str else None - upload_date_str = self._html_search_meta( - 'video:release_date', webpage, 'upload date') - upload_date = unified_strdate(upload_date_str) if upload_date_str else None + path = self._match_id(url) + metadata = self._download_json('http://ici.tou.tv/presentation/%s' % path, path) + video_id = metadata['IdMedia'] + details = metadata['Details'] + title = details['OriginalTitle'] return { + '_type': 'url_transparent', + 'url': 'radiocanada:%s:%s' % (metadata.get('AppCode', 'toutv'), video_id), 'id': video_id, - 'title': self._og_search_title(webpage), - 'url': video_url, - 'description': self._og_search_description(webpage), - 'uploader': self._dc_search_uploader(webpage), - 'thumbnail': self._og_search_thumbnail(webpage), - 'age_limit': self._media_rating_search(webpage), - 'duration': duration, - 'upload_date': upload_date, - 'ext': 'mp4', + 'title': title, + 'thumbnail': details.get('ImageUrl'), + 'duration': int_or_none(details.get('LengthInSeconds')), } diff --git a/youtube_dl/extractor/tweakers.py b/youtube_dl/extractor/tweakers.py index f3198fb85..7a9386cde 100644 --- a/youtube_dl/extractor/tweakers.py +++ b/youtube_dl/extractor/tweakers.py @@ -1,25 +1,62 @@ from __future__ import unicode_literals from .common import InfoExtractor +from ..utils import ( + int_or_none, + determine_ext, + mimetype2ext, +) class TweakersIE(InfoExtractor): _VALID_URL = r'https?://tweakers\.net/video/(?P\d+)' _TEST = { 'url': 'https://tweakers.net/video/9926/new-nintendo-3ds-xl-op-alle-fronten-beter.html', - 'md5': '3147e4ddad366f97476a93863e4557c8', + 'md5': 'fe73e417c093a788e0160c4025f88b15', 'info_dict': { 'id': '9926', 'ext': 'mp4', 'title': 'New Nintendo 3DS XL - Op alle fronten beter', - 'description': 'md5:f97324cc71e86e11c853f0763820e3ba', + 'description': 'md5:3789b21fed9c0219e9bcaacd43fab280', 'thumbnail': 're:^https?://.*\.jpe?g$', 'duration': 386, + 'uploader_id': 's7JeEm', } } def _real_extract(self, url): - playlist_id = self._match_id(url) - entries = self._extract_xspf_playlist( - 'https://tweakers.net/video/s1playlist/%s/playlist.xspf' % playlist_id, playlist_id) - return self.playlist_result(entries, playlist_id) + video_id = self._match_id(url) + video_data = self._download_json( + 'https://tweakers.net/video/s1playlist/%s/1920/1080/playlist.json' % video_id, + video_id)['items'][0] + + title = video_data['title'] + + formats = [] + for location in video_data.get('locations', {}).get('progressive', []): + format_id = location.get('label') + width = int_or_none(location.get('width')) + height = int_or_none(location.get('height')) + for source in location.get('sources', []): + source_url = source.get('src') + if not source_url: + continue + ext = mimetype2ext(source.get('type')) or determine_ext(source_url) + formats.append({ + 'format_id': format_id, + 'url': source_url, + 'width': width, + 'height': height, + 'ext': ext, + }) + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': title, + 'description': video_data.get('description'), + 'thumbnail': video_data.get('poster'), + 'duration': int_or_none(video_data.get('duration')), + 'uploader_id': video_data.get('account'), + 'formats': formats, + } diff --git a/youtube_dl/options.py b/youtube_dl/options.py index f2e3ad7a1..5302b67cc 100644 --- a/youtube_dl/options.py +++ b/youtube_dl/options.py @@ -812,11 +812,11 @@ def parseOpts(overrideArguments=None): system_conf = [] user_conf = [] else: - system_conf = compat_conf(_readOptions('/etc/youtube-dl.conf')) + system_conf = _readOptions('/etc/youtube-dl.conf') if '--ignore-config' in system_conf: user_conf = [] else: - user_conf = compat_conf(_readUserConf()) + user_conf = _readUserConf() argv = system_conf + user_conf + command_line_conf opts, args = parser.parse_args(argv) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index d1974d089..6396ad4c9 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2016.07.06' +__version__ = '2016.07.07'