From c0837a12c8a64c682a01e4bfdee6f22615568d69 Mon Sep 17 00:00:00 2001 From: Boris Wachtmeister Date: Sat, 12 Mar 2016 18:00:26 +0100 Subject: [PATCH 01/43] [WDR] complete overhaul after relaunch of the site The WDR relaunched their site on 2016-02-23 which not only changed the URL-schema completely but also the layout of their pages. Apparently the whole "mediathek" now runs on the wdr-domain, so no separate URL for funkhauseuropa anymore. There seems to be no explicit handling of video-sizes on the page or in the URLs anymore. There seems to be only one size for HTML5, but still several sizes for flash. The extractor adds all to the list of formats. There is no metadata for the HTML5-stream, so that the best flash-stream will always be considered as the "best" format. At least in my tests this seemed to be true anyway. --- youtube_dl/extractor/wdr.py | 251 +++++++++++++++--------------------- 1 file changed, 101 insertions(+), 150 deletions(-) diff --git a/youtube_dl/extractor/wdr.py b/youtube_dl/extractor/wdr.py index 31c904303..f881b7300 100644 --- a/youtube_dl/extractor/wdr.py +++ b/youtube_dl/extractor/wdr.py @@ -1,7 +1,6 @@ # -*- coding: utf-8 -*- from __future__ import unicode_literals -import itertools import re from .common import InfoExtractor @@ -11,204 +10,156 @@ from ..compat import ( ) from ..utils import ( unified_strdate, - qualities, + ExtractorError, ) class WDRIE(InfoExtractor): - _PLAYER_REGEX = '-(?:video|audio)player(?:_size-[LMS])?' - _VALID_URL = r'(?Phttps?://www\d?\.(?:wdr\d?|funkhauseuropa)\.de/)(?P.+?)(?P%s)?\.html' % _PLAYER_REGEX + _PAGE_REGEX = r'/mediathek/(?P[^/]+)/(?P[^/]+)/(?P.+)\.html' + _VALID_URL = r'(?Phttps?://(?:www\d\.)?wdr\d?\.de)' + _PAGE_REGEX + + _JS_URL_REGEX = r'(https?://deviceids-medp.wdr.de/ondemand/\d+/\d+\.js)' _TESTS = [ { - 'url': 'http://www1.wdr.de/mediathek/video/sendungen/servicezeit/videoservicezeit560-videoplayer_size-L.html', + 'url': 'http://www1.wdr.de/mediathek/video/sendungen/doku-am-freitag/video-geheimnis-aachener-dom-100.html', + 'md5': 'e58c39c3e30077141d258bf588700a7b', 'info_dict': { - 'id': 'mdb-362427', + 'id': 'mdb-1058683', 'ext': 'flv', - 'title': 'Servicezeit', - 'description': 'md5:c8f43e5e815eeb54d0b96df2fba906cb', - 'upload_date': '20140310', - 'is_live': False - }, - 'params': { - 'skip_download': True, + 'display_id': 'doku-am-freitag/video-geheimnis-aachener-dom-100', + 'title': 'Geheimnis Aachener Dom', + 'alt_title': 'Doku am Freitag', + 'upload_date': '20160304', + 'description': 'md5:87be8ff14d8dfd7a7ee46f0299b52318', + 'is_live': False, + 'subtitles': {'de': [{ + 'url': 'http://ondemand-ww.wdr.de/medp/fsk0/105/1058683/1058683_12220974.xml' + }]}, }, 'skip': 'Page Not Found', }, { - 'url': 'http://www1.wdr.de/themen/av/videomargaspiegelisttot101-videoplayer.html', + 'url': 'http://www1.wdr.de/mediathek/audio/wdr3/wdr3-gespraech-am-samstag/audio-schriftstellerin-juli-zeh-100.html', + 'md5': 'f4c1f96d01cf285240f53ea4309663d8', 'info_dict': { - 'id': 'mdb-363194', + 'id': 'mdb-1072000', + 'ext': 'mp3', + 'display_id': 'wdr3-gespraech-am-samstag/audio-schriftstellerin-juli-zeh-100', + 'title': 'Schriftstellerin Juli Zeh', + 'alt_title': 'WDR 3 Gespräch am Samstag', + 'upload_date': '20160312', + 'description': 'md5:e127d320bc2b1f149be697ce044a3dd7', + 'is_live': False, + 'subtitles': {} + }, + 'skip': 'Page Not Found', + }, + { + 'url': 'http://www1.wdr.de/mediathek/video/live/index.html', + 'info_dict': { + 'id': 'mdb-103364', 'ext': 'flv', - 'title': 'Marga Spiegel ist tot', - 'description': 'md5:2309992a6716c347891c045be50992e4', - 'upload_date': '20140311', - 'is_live': False - }, - 'params': { - 'skip_download': True, - }, - 'skip': 'Page Not Found', - }, - { - 'url': 'http://www1.wdr.de/themen/kultur/audioerlebtegeschichtenmargaspiegel100-audioplayer.html', - 'md5': '83e9e8fefad36f357278759870805898', - 'info_dict': { - 'id': 'mdb-194332', - 'ext': 'mp3', - 'title': 'Erlebte Geschichten: Marga Spiegel (29.11.2009)', - 'description': 'md5:2309992a6716c347891c045be50992e4', - 'upload_date': '20091129', - 'is_live': False - }, - }, - { - 'url': 'http://www.funkhauseuropa.de/av/audioflaviacoelhoamaramar100-audioplayer.html', - 'md5': '99a1443ff29af19f6c52cf6f4dc1f4aa', - 'info_dict': { - 'id': 'mdb-478135', - 'ext': 'mp3', - 'title': 'Flavia Coelho: Amar é Amar', - 'description': 'md5:7b29e97e10dfb6e265238b32fa35b23a', - 'upload_date': '20140717', - 'is_live': False - }, - 'skip': 'Page Not Found', - }, - { - 'url': 'http://www1.wdr.de/mediathek/video/sendungen/quarks_und_co/filterseite-quarks-und-co100.html', - 'playlist_mincount': 146, - 'info_dict': { - 'id': 'mediathek/video/sendungen/quarks_und_co/filterseite-quarks-und-co100', + 'display_id': 'index', + 'title': r're:^WDR Fernsehen im Livestream [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', + 'alt_title': 'WDR Fernsehen Live', + 'upload_date': None, + 'description': 'md5:ae2ff888510623bf8d4b115f95a9b7c9', + 'is_live': True, + 'subtitles': {} } }, { - 'url': 'http://www1.wdr.de/mediathek/video/livestream/index.html', + 'url': 'http://www1.wdr.de/mediathek/video/sendungen/aktuelle-stunde/aktuelle-stunde-120.html', + 'playlist_mincount': 10, 'info_dict': { - 'id': 'mdb-103364', - 'title': 're:^WDR Fernsehen Live [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', - 'description': 'md5:ae2ff888510623bf8d4b115f95a9b7c9', - 'ext': 'flv', - 'upload_date': '20150101', - 'is_live': True - }, - 'params': { - 'skip_download': True, + 'id': 'aktuelle-stunde/aktuelle-stunde-120', }, } ] def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) - page_url = mobj.group('url') - page_id = mobj.group('id') + url_type = mobj.group('type') + page_url = mobj.group('page_url') + display_id = mobj.group('display_id') + webpage = self._download_webpage(url, display_id) - webpage = self._download_webpage(url, page_id) + js_url = self._search_regex(self._JS_URL_REGEX, webpage, 'js_url', default=None) - if mobj.group('player') is None: + if not js_url: entries = [ - self.url_result(page_url + href, 'WDR') + self.url_result(page_url + href[0], 'WDR') for href in re.findall( - r'\s*]*>\s*\s*]+href="([^"]+)"', - webpage, 'm3u8 url', default=None) - if m3u8_url: - formats.extend(self._extract_m3u8_formats( - m3u8_url, page_id, 'mp4', 'm3u8_native', - m3u8_id='hls', fatal=False)) - - direct_urls = re.findall( - r'rel="web(S|M|L|XL)"[^>]+href="([^"]+)"', webpage) - if direct_urls: - for quality, video_url in direct_urls: - formats.append({ - 'url': video_url, - 'preference': preference(quality), - 'http_headers': { - 'User-Agent': 'mobile', - }, - }) - self._sort_formats(formats) - description = self._html_search_meta('Description', webpage, 'description') - return { - 'id': page_id, - 'formats': formats, + 'id': metadata_tracker_data.get("trackerClipId", display_id), + 'display_id': display_id, 'title': title, - 'description': description, - 'thumbnail': thumbnail, + 'alt_title': metadata_tracker_data.get("trackerClipSubcategory"), + 'formats': formats, 'upload_date': upload_date, - 'is_live': is_live + 'description': self._html_search_meta("Description", webpage), + 'is_live': is_live, + 'subtitles': subtitles, } From 14f7a2b8af17d1f490c46a0a9028ba9d97cf7df2 Mon Sep 17 00:00:00 2001 From: Boris Wachtmeister Date: Sat, 12 Mar 2016 20:14:46 +0100 Subject: [PATCH 02/43] [WDRMaus] switch current show to new WDR extractor (fixes #8562) It seems that the "current show" already uses the new WDR video-player, while all the others videos still use the old player. I just added the current show URL to the normal WDR-extractor, which works fine. This commit needs my changes from PR #8842 that fix the support for WDR. --- youtube_dl/extractor/wdr.py | 37 ++++++++++++++++++++----------------- 1 file changed, 20 insertions(+), 17 deletions(-) diff --git a/youtube_dl/extractor/wdr.py b/youtube_dl/extractor/wdr.py index f881b7300..ec81f1a28 100644 --- a/youtube_dl/extractor/wdr.py +++ b/youtube_dl/extractor/wdr.py @@ -15,8 +15,9 @@ from ..utils import ( class WDRIE(InfoExtractor): + _CURRENT_MAUS_URL = r'https?://www.wdrmaus.de/aktuelle-sendung/(wdr|index).php5' _PAGE_REGEX = r'/mediathek/(?P[^/]+)/(?P[^/]+)/(?P.+)\.html' - _VALID_URL = r'(?Phttps?://(?:www\d\.)?wdr\d?\.de)' + _PAGE_REGEX + _VALID_URL = r'(?Phttps?://(?:www\d\.)?wdr\d?\.de)' + _PAGE_REGEX + "|" + _CURRENT_MAUS_URL _JS_URL_REGEX = r'(https?://deviceids-medp.wdr.de/ondemand/\d+/\d+\.js)' @@ -75,7 +76,18 @@ class WDRIE(InfoExtractor): 'info_dict': { 'id': 'aktuelle-stunde/aktuelle-stunde-120', }, - } + }, + { + 'url': 'http://www.wdrmaus.de/aktuelle-sendung/index.php5', + 'info_dict': { + 'id': 'mdb-1096487', + 'ext': 'flv', + 'upload_date': 're:^[0-9]{8}$', + 'title': 're:^Die Sendung mit der Maus vom [0-9.]{10}$', + 'description': '- Die Sendung mit der Maus -', + }, + 'skip': 'The id changes from week to week because of the new episode' + }, ] def _real_extract(self, url): @@ -195,26 +207,17 @@ class WDRMobileIE(InfoExtractor): class WDRMausIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?wdrmaus\.de/(?:[^/]+/){,2}(?P[^/?#]+)(?:/index\.php5|(?[^/?#]+)((? Date: Thu, 26 May 2016 16:45:14 +0200 Subject: [PATCH 03/43] [WDR] use single quotes for strings --- youtube_dl/extractor/wdr.py | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/youtube_dl/extractor/wdr.py b/youtube_dl/extractor/wdr.py index ec81f1a28..05bfe7deb 100644 --- a/youtube_dl/extractor/wdr.py +++ b/youtube_dl/extractor/wdr.py @@ -17,7 +17,7 @@ from ..utils import ( class WDRIE(InfoExtractor): _CURRENT_MAUS_URL = r'https?://www.wdrmaus.de/aktuelle-sendung/(wdr|index).php5' _PAGE_REGEX = r'/mediathek/(?P[^/]+)/(?P[^/]+)/(?P.+)\.html' - _VALID_URL = r'(?Phttps?://(?:www\d\.)?wdr\d?\.de)' + _PAGE_REGEX + "|" + _CURRENT_MAUS_URL + _VALID_URL = r'(?Phttps?://(?:www\d\.)?wdr\d?\.de)' + _PAGE_REGEX + '|' + _CURRENT_MAUS_URL _JS_URL_REGEX = r'(https?://deviceids-medp.wdr.de/ondemand/\d+/\d+\.js)' @@ -116,23 +116,23 @@ class WDRIE(InfoExtractor): json_data = self._search_regex(r'\(({.*})\)', js_data, 'json') metadata = self._parse_json(json_data, display_id) - metadata_tracker_data = metadata["trackerData"] - metadata_media_resource = metadata["mediaResource"] + metadata_tracker_data = metadata['trackerData'] + metadata_media_resource = metadata['mediaResource'] formats = [] # check if the metadata contains a direct URL to a file - metadata_media_alt = metadata_media_resource.get("alt") + metadata_media_alt = metadata_media_resource.get('alt') if metadata_media_alt: - for tag_name in ["videoURL", 'audioURL']: + for tag_name in ['videoURL', 'audioURL']: if tag_name in metadata_media_alt: formats.append({ 'url': metadata_media_alt[tag_name] }) # check if there are flash-streams for this video - if "dflt" in metadata_media_resource and "videoURL" in metadata_media_resource["dflt"]: - video_url = metadata_media_resource["dflt"]["videoURL"] + if 'dflt' in metadata_media_resource and 'videoURL' in metadata_media_resource['dflt']: + video_url = metadata_media_resource['dflt']['videoURL'] if video_url.endswith('.f4m'): full_video_url = video_url + '?hdcore=3.2.0&plugin=aasp-3.2.0.77.18' formats.extend(self._extract_f4m_formats(full_video_url, display_id, f4m_id='hds', fatal=False)) @@ -140,13 +140,13 @@ class WDRIE(InfoExtractor): formats.extend(self._extract_smil_formats(video_url, 'stream', fatal=False)) subtitles = {} - caption_url = metadata_media_resource.get("captionURL") + caption_url = metadata_media_resource.get('captionURL') if caption_url: subtitles['de'] = [{ 'url': caption_url }] - title = metadata_tracker_data.get("trackerClipTitle") + title = metadata_tracker_data.get('trackerClipTitle') is_live = url_type == 'live' if is_live: @@ -163,13 +163,13 @@ class WDRIE(InfoExtractor): self._sort_formats(formats) return { - 'id': metadata_tracker_data.get("trackerClipId", display_id), + 'id': metadata_tracker_data.get('trackerClipId', display_id), 'display_id': display_id, 'title': title, - 'alt_title': metadata_tracker_data.get("trackerClipSubcategory"), + 'alt_title': metadata_tracker_data.get('trackerClipSubcategory'), 'formats': formats, 'upload_date': upload_date, - 'description': self._html_search_meta("Description", webpage), + 'description': self._html_search_meta('Description', webpage), 'is_live': is_live, 'subtitles': subtitles, } From 37f972954da0d0f1f0c5e97da8357c4baf687ee6 Mon Sep 17 00:00:00 2001 From: Boris Wachtmeister Date: Thu, 26 May 2016 16:59:45 +0200 Subject: [PATCH 04/43] [WDR] use _download_json with a strip_jsonp --- youtube_dl/extractor/wdr.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/wdr.py b/youtube_dl/extractor/wdr.py index 05bfe7deb..73a343c69 100644 --- a/youtube_dl/extractor/wdr.py +++ b/youtube_dl/extractor/wdr.py @@ -9,6 +9,7 @@ from ..compat import ( compat_urlparse, ) from ..utils import ( + strip_jsonp, unified_strdate, ExtractorError, ) @@ -112,9 +113,8 @@ class WDRIE(InfoExtractor): raise ExtractorError('No downloadable streams found', expected=True) - js_data = self._download_webpage(js_url, 'metadata') - json_data = self._search_regex(r'\(({.*})\)', js_data, 'json') - metadata = self._parse_json(json_data, display_id) + metadata = self._download_json( + js_url, 'metadata', transform_source=strip_jsonp) metadata_tracker_data = metadata['trackerData'] metadata_media_resource = metadata['mediaResource'] From bec2c14f2cf4f06f1b99e04d59779d8d103d726a Mon Sep 17 00:00:00 2001 From: Boris Wachtmeister Date: Thu, 26 May 2016 17:30:38 +0200 Subject: [PATCH 05/43] [WDR] add special handling if alt-url is a m3u8 --- youtube_dl/extractor/wdr.py | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/wdr.py b/youtube_dl/extractor/wdr.py index 73a343c69..fddcbf190 100644 --- a/youtube_dl/extractor/wdr.py +++ b/youtube_dl/extractor/wdr.py @@ -9,6 +9,7 @@ from ..compat import ( compat_urlparse, ) from ..utils import ( + determine_ext, strip_jsonp, unified_strdate, ExtractorError, @@ -61,7 +62,7 @@ class WDRIE(InfoExtractor): 'url': 'http://www1.wdr.de/mediathek/video/live/index.html', 'info_dict': { 'id': 'mdb-103364', - 'ext': 'flv', + 'ext': 'mp4', 'display_id': 'index', 'title': r're:^WDR Fernsehen im Livestream [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', 'alt_title': 'WDR Fernsehen Live', @@ -69,7 +70,10 @@ class WDRIE(InfoExtractor): 'description': 'md5:ae2ff888510623bf8d4b115f95a9b7c9', 'is_live': True, 'subtitles': {} - } + }, + 'params': { + 'skip_download': True, # m3u8 download + }, }, { 'url': 'http://www1.wdr.de/mediathek/video/sendungen/aktuelle-stunde/aktuelle-stunde-120.html', @@ -126,9 +130,16 @@ class WDRIE(InfoExtractor): if metadata_media_alt: for tag_name in ['videoURL', 'audioURL']: if tag_name in metadata_media_alt: - formats.append({ - 'url': metadata_media_alt[tag_name] - }) + alt_url = metadata_media_alt[tag_name] + if determine_ext(alt_url) == 'm3u8': + m3u_fmt = self._extract_m3u8_formats( + alt_url, display_id, 'mp4', 'm3u8_native', + m3u8_id='hls') + formats.extend(m3u_fmt) + else: + formats.append({ + 'url': alt_url + }) # check if there are flash-streams for this video if 'dflt' in metadata_media_resource and 'videoURL' in metadata_media_resource['dflt']: From 33a1ff7113d9dd656b3c56cb404de85646caa559 Mon Sep 17 00:00:00 2001 From: Boris Wachtmeister Date: Thu, 26 May 2016 19:08:12 +0200 Subject: [PATCH 06/43] [WDR] extract jsonp-url by parsing data-extension of mediaLink --- youtube_dl/extractor/wdr.py | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/wdr.py b/youtube_dl/extractor/wdr.py index fddcbf190..dd107ef8a 100644 --- a/youtube_dl/extractor/wdr.py +++ b/youtube_dl/extractor/wdr.py @@ -10,6 +10,7 @@ from ..compat import ( ) from ..utils import ( determine_ext, + js_to_json, strip_jsonp, unified_strdate, ExtractorError, @@ -21,8 +22,6 @@ class WDRIE(InfoExtractor): _PAGE_REGEX = r'/mediathek/(?P[^/]+)/(?P[^/]+)/(?P.+)\.html' _VALID_URL = r'(?Phttps?://(?:www\d\.)?wdr\d?\.de)' + _PAGE_REGEX + '|' + _CURRENT_MAUS_URL - _JS_URL_REGEX = r'(https?://deviceids-medp.wdr.de/ondemand/\d+/\d+\.js)' - _TESTS = [ { 'url': 'http://www1.wdr.de/mediathek/video/sendungen/doku-am-freitag/video-geheimnis-aachener-dom-100.html', @@ -102,9 +101,13 @@ class WDRIE(InfoExtractor): display_id = mobj.group('display_id') webpage = self._download_webpage(url, display_id) - js_url = self._search_regex(self._JS_URL_REGEX, webpage, 'js_url', default=None) + # for wdr.de the data-extension is in a tag with the class "mediaLink" + # for wdrmaus its in a link to the page in a multiline "videoLink"-tag + json_metadata = self._html_search_regex( + r'class=(?:"mediaLink\b[^"]*"[^>]+|"videoLink\b[^"]*"[\s]*>\n[^\n]*)data-extension="([^"]+)"', + webpage, 'media link', default=None, flags=re.MULTILINE) - if not js_url: + if not json_metadata: entries = [ self.url_result(page_url + href[0], 'WDR') for href in re.findall( @@ -117,8 +120,12 @@ class WDRIE(InfoExtractor): raise ExtractorError('No downloadable streams found', expected=True) + media_link_obj = self._parse_json(json_metadata, display_id, + transform_source=js_to_json) + jsonp_url = media_link_obj['mediaObj']['url'] + metadata = self._download_json( - js_url, 'metadata', transform_source=strip_jsonp) + jsonp_url, 'metadata', transform_source=strip_jsonp) metadata_tracker_data = metadata['trackerData'] metadata_media_resource = metadata['mediaResource'] From 949fc42e009aed5414caad280d0dc551ffcd9c14 Mon Sep 17 00:00:00 2001 From: Boris Wachtmeister Date: Thu, 26 May 2016 19:58:55 +0200 Subject: [PATCH 07/43] [WDR] the other wdrmaus.de pages also changed to the new player --- youtube_dl/extractor/extractors.py | 1 - youtube_dl/extractor/wdr.py | 89 +++++------------------------- 2 files changed, 15 insertions(+), 75 deletions(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 6de3438fc..023598130 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -923,7 +923,6 @@ from .wat import WatIE from .wdr import ( WDRIE, WDRMobileIE, - WDRMausIE, ) from .webofstories import ( WebOfStoriesIE, diff --git a/youtube_dl/extractor/wdr.py b/youtube_dl/extractor/wdr.py index dd107ef8a..1af1e996d 100644 --- a/youtube_dl/extractor/wdr.py +++ b/youtube_dl/extractor/wdr.py @@ -4,10 +4,6 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..compat import ( - compat_parse_qs, - compat_urlparse, -) from ..utils import ( determine_ext, js_to_json, @@ -18,7 +14,7 @@ from ..utils import ( class WDRIE(InfoExtractor): - _CURRENT_MAUS_URL = r'https?://www.wdrmaus.de/aktuelle-sendung/(wdr|index).php5' + _CURRENT_MAUS_URL = r'https?://(?:www\.)wdrmaus.de/(?:[^/]+/){1,2}[^/?#]+\.php5' _PAGE_REGEX = r'/mediathek/(?P[^/]+)/(?P[^/]+)/(?P.+)\.html' _VALID_URL = r'(?Phttps?://(?:www\d\.)?wdr\d?\.de)' + _PAGE_REGEX + '|' + _CURRENT_MAUS_URL @@ -92,6 +88,20 @@ class WDRIE(InfoExtractor): }, 'skip': 'The id changes from week to week because of the new episode' }, + { + 'url': 'http://www.wdrmaus.de/sachgeschichten/sachgeschichten/achterbahn.php5', + 'md5': 'ca365705551e4bd5217490f3b0591290', + 'info_dict': { + 'id': 'mdb-186083', + 'ext': 'flv', + 'upload_date': '20130919', + 'title': 'Sachgeschichte - Achterbahn ', + 'description': '- Die Sendung mit der Maus -', + }, + 'params': { + 'skip_download': True, # the file has different versions :( + }, + }, ] def _real_extract(self, url): @@ -222,72 +232,3 @@ class WDRMobileIE(InfoExtractor): 'User-Agent': 'mobile', }, } - - -class WDRMausIE(InfoExtractor): - _VALID_URL = 'https?://(?:www\.)?wdrmaus\.de/(?:[^/]+/){,2}(?P[^/?#]+)((?

Sendedatum:\s*([0-9\.]+)

', - webpage, 'air date') - title_str = self._html_search_regex( - r'

(.*?)

', webpage, 'title') - title = '%s - %s' % (title_date, title_str) - upload_date = unified_strdate( - self._html_search_meta('dc.date', webpage)) - - fields = compat_parse_qs(param_code) - video_url = fields['firstVideo'][0] - thumbnail = compat_urlparse.urljoin(url, fields['startPicture'][0]) - - formats = [{ - 'format_id': 'rtmp', - 'url': video_url, - }] - - jscode = self._download_webpage( - 'http://www.wdrmaus.de/codebase/js/extended-medien.min.js', - video_id, fatal=False, - note='Downloading URL translation table', - errnote='Could not download URL translation table') - if jscode: - for m in re.finditer( - r"stream:\s*'dslSrc=(?P[^']+)',\s*download:\s*'(?P
[^']+)'\s*\}", - jscode): - if video_url.startswith(m.group('stream')): - http_url = video_url.replace( - m.group('stream'), m.group('dl')) - formats.append({ - 'format_id': 'http', - 'url': http_url, - }) - break - - self._sort_formats(formats) - - return { - 'id': video_id, - 'title': title, - 'formats': formats, - 'thumbnail': thumbnail, - 'upload_date': upload_date, - } From 3a686853e1739dfc26548cdc09fe89e693e76a9f Mon Sep 17 00:00:00 2001 From: Boris Wachtmeister Date: Thu, 26 May 2016 20:16:33 +0200 Subject: [PATCH 08/43] [WDR] fixed parsing of playlists --- youtube_dl/extractor/wdr.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/wdr.py b/youtube_dl/extractor/wdr.py index 1af1e996d..1e729cb7c 100644 --- a/youtube_dl/extractor/wdr.py +++ b/youtube_dl/extractor/wdr.py @@ -72,7 +72,7 @@ class WDRIE(InfoExtractor): }, { 'url': 'http://www1.wdr.de/mediathek/video/sendungen/aktuelle-stunde/aktuelle-stunde-120.html', - 'playlist_mincount': 10, + 'playlist_mincount': 8, 'info_dict': { 'id': 'aktuelle-stunde/aktuelle-stunde-120', }, @@ -121,7 +121,7 @@ class WDRIE(InfoExtractor): entries = [ self.url_result(page_url + href[0], 'WDR') for href in re.findall( - r']+data-extension=' % self._PAGE_REGEX, webpage) ] From 9c3c447eb389726d98189d972a2d772ef729132d Mon Sep 17 00:00:00 2001 From: TRox1972 Date: Tue, 17 May 2016 16:21:52 +0200 Subject: [PATCH 09/43] [loc] Add extractor (Closes #3188) Added extractor of loc.gov, which closes #3188. I am not an experienced programmer, so I am sure I did a bunch of mistakes, but the extractor works (for me at least). [LibraryOfCongress] don't use video_id for _search_regex() [LibraryOfCongress] Improvements --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/libraryofcongress.py | 65 +++++++++++++++++++++++ 2 files changed, 66 insertions(+) create mode 100644 youtube_dl/extractor/libraryofcongress.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 9dd55bd70..3b5143ace 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -382,6 +382,7 @@ from .leeco import ( LePlaylistIE, LetvCloudIE, ) +from .libraryofcongress import LibraryOfCongressIE from .libsyn import LibsynIE from .lifenews import ( LifeNewsIE, diff --git a/youtube_dl/extractor/libraryofcongress.py b/youtube_dl/extractor/libraryofcongress.py new file mode 100644 index 000000000..0c34dbce3 --- /dev/null +++ b/youtube_dl/extractor/libraryofcongress.py @@ -0,0 +1,65 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor + +from ..utils import determine_ext + + +class LibraryOfCongressIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?loc\.gov/item/(?P[0-9]+)' + _TESTS = [{ + 'url': 'http://loc.gov/item/90716351/', + 'info_dict': { + 'id': '90716351', + 'ext': 'mp4', + 'title': 'Pa\'s trip to Mars /' + }, + 'params': { + # m3u8 download + 'skip_download': True, + } + }, { + 'url': 'https://www.loc.gov/item/97516576/', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + self.report_extraction(video_id) + json_id = self._search_regex('media-player-([0-9A-Z]{32})', webpage, 'json id') + + data = self._parse_json(self._download_webpage( + 'https://media.loc.gov/services/v1/media?id=%s' % json_id, + video_id), video_id) + data = data['mediaObject'] + + media_url = data['derivatives'][0]['derivativeUrl'] + media_url = media_url.replace('rtmp', 'https') + + is_video = data['mediaType'].lower() == 'v' + if not determine_ext(media_url) in ('mp4', 'mp3'): + media_url += '.mp4' if is_video else '.mp3' + + if media_url.index('vod/mp4:') > -1: + media_url = media_url.replace('vod/mp4:', 'hls-vod/media/') + '.m3u8' + elif url.index('vod/mp3:') > -1: + media_url = media_url.replace('vod/mp3:', '') + + formats = [] + if determine_ext(media_url) == 'm3u8': + formats = self._extract_m3u8_formats(media_url, video_id, ext='mp4') + elif determine_ext(media_url) is 'mp3': + formats.append({ + 'url': media_url, + 'ext': 'mp3', + }) + + return { + 'id': video_id, + 'thumbnail': self._og_search_thumbnail(webpage), + 'title': self._og_search_title(webpage), + 'formats': formats, + } From 7f3c3dfa52769d1f44c1f1031449118c564a92bf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 3 Jun 2016 23:19:11 +0700 Subject: [PATCH 10/43] [loc] Improve (Closes #9521) --- youtube_dl/extractor/libraryofcongress.py | 87 ++++++++++++++--------- 1 file changed, 53 insertions(+), 34 deletions(-) diff --git a/youtube_dl/extractor/libraryofcongress.py b/youtube_dl/extractor/libraryofcongress.py index 0c34dbce3..d311f9946 100644 --- a/youtube_dl/extractor/libraryofcongress.py +++ b/youtube_dl/extractor/libraryofcongress.py @@ -3,63 +3,82 @@ from __future__ import unicode_literals from .common import InfoExtractor -from ..utils import determine_ext +from ..utils import ( + determine_ext, + float_or_none, + int_or_none, +) class LibraryOfCongressIE(InfoExtractor): + IE_NAME = 'loc' + IE_DESC = 'Library of Congress' _VALID_URL = r'https?://(?:www\.)?loc\.gov/item/(?P[0-9]+)' - _TESTS = [{ - 'url': 'http://loc.gov/item/90716351/', + _TEST = { + 'url': 'http://loc.gov/item/90716351/', + 'md5': '353917ff7f0255aa6d4b80a034833de8', 'info_dict': { 'id': '90716351', 'ext': 'mp4', - 'title': 'Pa\'s trip to Mars /' + 'title': "Pa's trip to Mars", + 'thumbnail': 're:^https?://.*\.jpg$', + 'duration': 0, + 'view_count': int, }, - 'params': { - # m3u8 download - 'skip_download': True, - } - }, { - 'url': 'https://www.loc.gov/item/97516576/', - 'only_matching': True, - }] + } def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - self.report_extraction(video_id) - json_id = self._search_regex('media-player-([0-9A-Z]{32})', webpage, 'json id') + media_id = self._search_regex( + (r'id=(["\'])media-player-(?P.+?)\1', + r']+id=(["\'])uuid-(?P.+?)\1', + r']+data-uuid=(["\'])(?P.+?)\1'), + webpage, 'media id', group='id') - data = self._parse_json(self._download_webpage( - 'https://media.loc.gov/services/v1/media?id=%s' % json_id, - video_id), video_id) - data = data['mediaObject'] + data = self._parse_json( + self._download_webpage( + 'https://media.loc.gov/services/v1/media?id=%s&context=json' % media_id, + video_id), + video_id)['mediaObject'] - media_url = data['derivatives'][0]['derivativeUrl'] + derivative = data['derivatives'][0] + media_url = derivative['derivativeUrl'] + + # Following algorithm was extracted from setAVSource js function + # found in webpage media_url = media_url.replace('rtmp', 'https') - is_video = data['mediaType'].lower() == 'v' - if not determine_ext(media_url) in ('mp4', 'mp3'): + is_video = data.get('mediaType', 'v').lower() == 'v' + ext = determine_ext(media_url) + if ext not in ('mp4', 'mp3'): media_url += '.mp4' if is_video else '.mp3' - if media_url.index('vod/mp4:') > -1: - media_url = media_url.replace('vod/mp4:', 'hls-vod/media/') + '.m3u8' - elif url.index('vod/mp3:') > -1: - media_url = media_url.replace('vod/mp3:', '') + if 'vod/mp4:' in media_url: + formats = [{ + 'url': media_url.replace('vod/mp4:', 'hls-vod/media/') + '.m3u8', + 'format_id': 'hls', + 'ext': 'mp4', + 'protocol': 'm3u8_native', + }] + elif 'vod/mp3:' in media_url: + formats = [{ + 'url': media_url.replace('vod/mp3:', ''), + 'vcodec': 'none', + }] - formats = [] - if determine_ext(media_url) == 'm3u8': - formats = self._extract_m3u8_formats(media_url, video_id, ext='mp4') - elif determine_ext(media_url) is 'mp3': - formats.append({ - 'url': media_url, - 'ext': 'mp3', - }) + self._sort_formats(formats) + + title = derivative.get('shortName') or data.get('shortName') or self._og_search_title(webpage) + duration = float_or_none(data.get('duration')) + view_count = int_or_none(data.get('viewCount')) return { 'id': video_id, + 'title': title, 'thumbnail': self._og_search_thumbnail(webpage), - 'title': self._og_search_title(webpage), + 'duration': duration, + 'view_count': view_count, 'formats': formats, } From bf4c6a38e1a98606b269d70ccc65c7ec5d47ec07 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 3 Jun 2016 23:25:24 +0700 Subject: [PATCH 11/43] release 2016.06.03 --- .github/ISSUE_TEMPLATE.md | 6 +++--- docs/supportedsites.md | 7 +++++-- youtube_dl/version.py | 2 +- 3 files changed, 9 insertions(+), 6 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index ae98e0626..e593ee78a 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.06.02*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.06.02** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.06.03*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.06.03** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2016.06.02 +[debug] youtube-dl version 2016.06.03 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/docs/supportedsites.md b/docs/supportedsites.md index dcbc632a1..619bd0825 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -43,8 +43,8 @@ - **appletrailers:section** - **archive.org**: archive.org videos - **ARD** - - **ARD:mediathek** - **ARD:mediathek**: Saarländischer Rundfunk + - **ARD:mediathek** - **arte.tv** - **arte.tv:+7** - **arte.tv:cinema** @@ -339,6 +339,7 @@ - **livestream** - **livestream:original** - **LnkGo** + - **loc**: Library of Congress - **LocalNews8** - **LoveHomePorn** - **lrt.lt** @@ -528,7 +529,8 @@ - **Restudy** - **Reuters** - **ReverbNation** - - **Revision3** + - **revision** + - **revision3:embed** - **RICE** - **RingTV** - **RottenTomatoes** @@ -567,6 +569,7 @@ - **ScreencastOMatic** - **ScreenJunkies** - **ScreenwaveMedia** + - **Seeker** - **SenateISVP** - **SendtoNews** - **ServingSys** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index fba427dde..d24d06f4a 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2016.06.02' +__version__ = '2016.06.03' From 76e9cd7f24f6b175e4cce85082647403266ed233 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 3 Jun 2016 23:43:34 +0700 Subject: [PATCH 12/43] [loc] Add support for another URL schema and simplify --- youtube_dl/extractor/libraryofcongress.py | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/youtube_dl/extractor/libraryofcongress.py b/youtube_dl/extractor/libraryofcongress.py index d311f9946..a5f22b204 100644 --- a/youtube_dl/extractor/libraryofcongress.py +++ b/youtube_dl/extractor/libraryofcongress.py @@ -13,8 +13,8 @@ from ..utils import ( class LibraryOfCongressIE(InfoExtractor): IE_NAME = 'loc' IE_DESC = 'Library of Congress' - _VALID_URL = r'https?://(?:www\.)?loc\.gov/item/(?P[0-9]+)' - _TEST = { + _VALID_URL = r'https?://(?:www\.)?loc\.gov/(?:item/|today/cyberlc/feature_wdesc\.php\?.*\brec=)(?P[0-9]+)' + _TESTS = [{ 'url': 'http://loc.gov/item/90716351/', 'md5': '353917ff7f0255aa6d4b80a034833de8', 'info_dict': { @@ -25,7 +25,10 @@ class LibraryOfCongressIE(InfoExtractor): 'duration': 0, 'view_count': int, }, - } + }, { + 'url': 'https://www.loc.gov/today/cyberlc/feature_wdesc.php?rec=5578', + 'only_matching': True, + }] def _real_extract(self, url): video_id = self._match_id(url) @@ -34,13 +37,12 @@ class LibraryOfCongressIE(InfoExtractor): media_id = self._search_regex( (r'id=(["\'])media-player-(?P.+?)\1', r']+id=(["\'])uuid-(?P.+?)\1', - r']+data-uuid=(["\'])(?P.+?)\1'), + r']+data-uuid=(["\'])(?P.+?)\1', + r'mediaObjectId\s*:\s*(["\'])(?P.+?)\1'), webpage, 'media id', group='id') - data = self._parse_json( - self._download_webpage( - 'https://media.loc.gov/services/v1/media?id=%s&context=json' % media_id, - video_id), + data = self._download_json( + 'https://media.loc.gov/services/v1/media?id=%s&context=json' % media_id, video_id)['mediaObject'] derivative = data['derivatives'][0] @@ -77,7 +79,7 @@ class LibraryOfCongressIE(InfoExtractor): return { 'id': video_id, 'title': title, - 'thumbnail': self._og_search_thumbnail(webpage), + 'thumbnail': self._og_search_thumbnail(webpage, default=None), 'duration': duration, 'view_count': view_count, 'formats': formats, From c917106be4d6d98ce7504d71a32b58ddca2bc03d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 3 Jun 2016 23:55:22 +0700 Subject: [PATCH 13/43] [loc] Extract subtites --- youtube_dl/extractor/libraryofcongress.py | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/libraryofcongress.py b/youtube_dl/extractor/libraryofcongress.py index a5f22b204..49351759e 100644 --- a/youtube_dl/extractor/libraryofcongress.py +++ b/youtube_dl/extractor/libraryofcongress.py @@ -15,6 +15,7 @@ class LibraryOfCongressIE(InfoExtractor): IE_DESC = 'Library of Congress' _VALID_URL = r'https?://(?:www\.)?loc\.gov/(?:item/|today/cyberlc/feature_wdesc\.php\?.*\brec=)(?P[0-9]+)' _TESTS = [{ + # embedded via
Date: Sat, 4 Jun 2016 00:26:03 +0700 Subject: [PATCH 14/43] [loc] Extract direct download links --- youtube_dl/extractor/libraryofcongress.py | 38 ++++++++++++++++++++++- 1 file changed, 37 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/libraryofcongress.py b/youtube_dl/extractor/libraryofcongress.py index 49351759e..0a94366fd 100644 --- a/youtube_dl/extractor/libraryofcongress.py +++ b/youtube_dl/extractor/libraryofcongress.py @@ -1,12 +1,15 @@ # coding: utf-8 from __future__ import unicode_literals +import re + from .common import InfoExtractor from ..utils import ( determine_ext, float_or_none, int_or_none, + parse_filesize, ) @@ -40,6 +43,20 @@ class LibraryOfCongressIE(InfoExtractor): 'params': { 'skip_download': True, }, + }, { + # with direct download links + 'url': 'https://www.loc.gov/item/78710669/', + 'info_dict': { + 'id': '78710669', + 'ext': 'mp4', + 'title': 'La vie et la passion de Jesus-Christ', + 'duration': 0, + 'view_count': int, + 'formats': 'mincount:4', + }, + 'params': { + 'skip_download': True, + }, }] def _real_extract(self, url): @@ -60,6 +77,9 @@ class LibraryOfCongressIE(InfoExtractor): derivative = data['derivatives'][0] media_url = derivative['derivativeUrl'] + title = derivative.get('shortName') or data.get('shortName') or self._og_search_title( + webpage) + # Following algorithm was extracted from setAVSource js function # found in webpage media_url = media_url.replace('rtmp', 'https') @@ -75,6 +95,7 @@ class LibraryOfCongressIE(InfoExtractor): 'format_id': 'hls', 'ext': 'mp4', 'protocol': 'm3u8_native', + 'quality': 1, }] elif 'vod/mp3:' in media_url: formats = [{ @@ -82,9 +103,24 @@ class LibraryOfCongressIE(InfoExtractor): 'vcodec': 'none', }] + download_urls = set() + for m in re.finditer( + r']+value=(["\'])(?P.+?)\1[^>]+data-file-download=[^>]+>\s*(?P.+?)(?:(?: |\s+)\((?P.+?)\))?\s*<', webpage): + format_id = m.group('id').lower() + if format_id == 'gif': + continue + download_url = m.group('url') + if download_url in download_urls: + continue + download_urls.add(download_url) + formats.append({ + 'url': download_url, + 'format_id': format_id, + 'filesize_approx': parse_filesize(m.group('size')), + }) + self._sort_formats(formats) - title = derivative.get('shortName') or data.get('shortName') or self._og_search_title(webpage) duration = float_or_none(data.get('duration')) view_count = int_or_none(data.get('viewCount')) From 762d44c9567af424b2731cb643429ddd8e76d704 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 4 Jun 2016 04:57:16 +0700 Subject: [PATCH 15/43] [channel9] Add support for rss links (Closes #9673) --- youtube_dl/extractor/channel9.py | 123 ++++++++++++++++++------------- 1 file changed, 70 insertions(+), 53 deletions(-) diff --git a/youtube_dl/extractor/channel9.py b/youtube_dl/extractor/channel9.py index c74553dcf..34d4e6156 100644 --- a/youtube_dl/extractor/channel9.py +++ b/youtube_dl/extractor/channel9.py @@ -20,54 +20,64 @@ class Channel9IE(InfoExtractor): ''' IE_DESC = 'Channel 9' IE_NAME = 'channel9' - _VALID_URL = r'https?://(?:www\.)?channel9\.msdn\.com/(?P.+)/?' + _VALID_URL = r'https?://(?:www\.)?channel9\.msdn\.com/(?P.+?)(?P/RSS)?/?(?:[?#&]|$)' - _TESTS = [ - { - 'url': 'http://channel9.msdn.com/Events/TechEd/Australia/2013/KOS002', - 'md5': 'bbd75296ba47916b754e73c3a4bbdf10', - 'info_dict': { - 'id': 'Events/TechEd/Australia/2013/KOS002', - 'ext': 'mp4', - 'title': 'Developer Kick-Off Session: Stuff We Love', - 'description': 'md5:c08d72240b7c87fcecafe2692f80e35f', - 'duration': 4576, - 'thumbnail': 're:http://.*\.jpg', - 'session_code': 'KOS002', - 'session_day': 'Day 1', - 'session_room': 'Arena 1A', - 'session_speakers': ['Ed Blankenship', 'Andrew Coates', 'Brady Gaster', 'Patrick Klug', 'Mads Kristensen'], - }, + _TESTS = [{ + 'url': 'http://channel9.msdn.com/Events/TechEd/Australia/2013/KOS002', + 'md5': 'bbd75296ba47916b754e73c3a4bbdf10', + 'info_dict': { + 'id': 'Events/TechEd/Australia/2013/KOS002', + 'ext': 'mp4', + 'title': 'Developer Kick-Off Session: Stuff We Love', + 'description': 'md5:c08d72240b7c87fcecafe2692f80e35f', + 'duration': 4576, + 'thumbnail': 're:http://.*\.jpg', + 'session_code': 'KOS002', + 'session_day': 'Day 1', + 'session_room': 'Arena 1A', + 'session_speakers': ['Ed Blankenship', 'Andrew Coates', 'Brady Gaster', 'Patrick Klug', + 'Mads Kristensen'], }, - { - 'url': 'http://channel9.msdn.com/posts/Self-service-BI-with-Power-BI-nuclear-testing', - 'md5': 'b43ee4529d111bc37ba7ee4f34813e68', - 'info_dict': { - 'id': 'posts/Self-service-BI-with-Power-BI-nuclear-testing', - 'ext': 'mp4', - 'title': 'Self-service BI with Power BI - nuclear testing', - 'description': 'md5:d1e6ecaafa7fb52a2cacdf9599829f5b', - 'duration': 1540, - 'thumbnail': 're:http://.*\.jpg', - 'authors': ['Mike Wilmot'], - }, + }, { + 'url': 'http://channel9.msdn.com/posts/Self-service-BI-with-Power-BI-nuclear-testing', + 'md5': 'b43ee4529d111bc37ba7ee4f34813e68', + 'info_dict': { + 'id': 'posts/Self-service-BI-with-Power-BI-nuclear-testing', + 'ext': 'mp4', + 'title': 'Self-service BI with Power BI - nuclear testing', + 'description': 'md5:d1e6ecaafa7fb52a2cacdf9599829f5b', + 'duration': 1540, + 'thumbnail': 're:http://.*\.jpg', + 'authors': ['Mike Wilmot'], }, - { - # low quality mp4 is best - 'url': 'https://channel9.msdn.com/Events/CPP/CppCon-2015/Ranges-for-the-Standard-Library', - 'info_dict': { - 'id': 'Events/CPP/CppCon-2015/Ranges-for-the-Standard-Library', - 'ext': 'mp4', - 'title': 'Ranges for the Standard Library', - 'description': 'md5:2e6b4917677af3728c5f6d63784c4c5d', - 'duration': 5646, - 'thumbnail': 're:http://.*\.jpg', - }, - 'params': { - 'skip_download': True, - }, - } - ] + }, { + # low quality mp4 is best + 'url': 'https://channel9.msdn.com/Events/CPP/CppCon-2015/Ranges-for-the-Standard-Library', + 'info_dict': { + 'id': 'Events/CPP/CppCon-2015/Ranges-for-the-Standard-Library', + 'ext': 'mp4', + 'title': 'Ranges for the Standard Library', + 'description': 'md5:2e6b4917677af3728c5f6d63784c4c5d', + 'duration': 5646, + 'thumbnail': 're:http://.*\.jpg', + }, + 'params': { + 'skip_download': True, + }, + }, { + 'url': 'https://channel9.msdn.com/Niners/Splendid22/Queue/76acff796e8f411184b008028e0d492b/RSS', + 'info_dict': { + 'id': 'Niners/Splendid22/Queue/76acff796e8f411184b008028e0d492b', + 'title': 'Channel 9', + }, + 'playlist_count': 2, + }, { + 'url': 'https://channel9.msdn.com/Events/DEVintersection/DEVintersection-2016/RSS', + 'only_matching': True, + }, { + 'url': 'https://channel9.msdn.com/Events/Speakers/scott-hanselman/RSS?UrlSafeName=scott-hanselman', + 'only_matching': True, + }] _RSS_URL = 'http://channel9.msdn.com/%s/RSS' @@ -254,22 +264,30 @@ class Channel9IE(InfoExtractor): return self.playlist_result(contents) - def _extract_list(self, content_path): - rss = self._download_xml(self._RSS_URL % content_path, content_path, 'Downloading RSS') + def _extract_list(self, video_id, rss_url=None): + if not rss_url: + rss_url = self._RSS_URL % video_id + rss = self._download_xml(rss_url, video_id, 'Downloading RSS') entries = [self.url_result(session_url.text, 'Channel9') for session_url in rss.findall('./channel/item/link')] title_text = rss.find('./channel/title').text - return self.playlist_result(entries, content_path, title_text) + return self.playlist_result(entries, video_id, title_text) def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) content_path = mobj.group('contentpath') + rss = mobj.group('rss') - webpage = self._download_webpage(url, content_path, 'Downloading web page') + if rss: + return self._extract_list(content_path, url) - page_type_m = re.search(r'', webpage) - if page_type_m is not None: - page_type = page_type_m.group('pagetype') + webpage = self._download_webpage( + url, content_path, 'Downloading web page') + + page_type = self._search_regex( + r']+name=(["\'])WT\.entryid\1[^>]+content=(["\'])(?P[^:]+).+?\2', + webpage, 'page type', default=None, group='pagetype') + if page_type: if page_type == 'Entry': # Any 'item'-like page, may contain downloadable content return self._extract_entry_item(webpage, content_path) elif page_type == 'Session': # Event session page, may contain downloadable content @@ -278,6 +296,5 @@ class Channel9IE(InfoExtractor): return self._extract_list(content_path) else: raise ExtractorError('Unexpected WT.entryid %s' % page_type, expected=True) - else: # Assuming list return self._extract_list(content_path) From cad88f96dc8eaa845a458f0b80e92c1ba36c5491 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Sat, 4 Jun 2016 11:42:52 +0200 Subject: [PATCH 16/43] disable uploading to yt-dl.org for now --- devscripts/release.sh | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/devscripts/release.sh b/devscripts/release.sh index cde4d0a39..1a7b1e054 100755 --- a/devscripts/release.sh +++ b/devscripts/release.sh @@ -97,8 +97,10 @@ RELEASE_FILES="youtube-dl youtube-dl.exe youtube-dl-$version.tar.gz" /bin/echo -e "\n### Signing and uploading the new binaries to yt-dl.org ..." for f in $RELEASE_FILES; do gpg --passphrase-repeat 5 --detach-sig "build/$version/$f"; done -scp -r "build/$version" ytdl@yt-dl.org:html/tmp/ -ssh ytdl@yt-dl.org "mv html/tmp/$version html/downloads/" + +echo 'TODO: upload on GitHub' +exit 1 + ssh ytdl@yt-dl.org "sh html/update_latest.sh $version" /bin/echo -e "\n### Now switching to gh-pages..." From 7def35712a7047578643f18eaf6dda79fd8c9291 Mon Sep 17 00:00:00 2001 From: TRox1972 Date: Sat, 21 May 2016 17:48:17 +0200 Subject: [PATCH 17/43] [vidio] Add extractor (Closes #7195) [Vidio] fix fallback value and wrap duration in int_or_none [Vidio] don't use video_id for _html_search_regex() --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/vidio.py | 48 ++++++++++++++++++++++++++++++ 2 files changed, 49 insertions(+) create mode 100644 youtube_dl/extractor/vidio.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 3b5143ace..ed4e39574 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -910,6 +910,7 @@ from .videomore import ( ) from .videopremium import VideoPremiumIE from .videott import VideoTtIE +from .vidio import VidioIE from .vidme import ( VidmeIE, VidmeUserIE, diff --git a/youtube_dl/extractor/vidio.py b/youtube_dl/extractor/vidio.py new file mode 100644 index 000000000..d17c663fd --- /dev/null +++ b/youtube_dl/extractor/vidio.py @@ -0,0 +1,48 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re +from .common import InfoExtractor + +from ..utils import int_or_none + + +class VidioIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?vidio\.com/watch/(?P\d{6})-(?P[^/?]+)' + _TEST = { + 'url': 'http://www.vidio.com/watch/165683-dj_ambred-booyah-live-2015', + 'info_dict': { + 'id': '165683', + 'title': 'DJ_AMBRED - Booyah (Live 2015)', + 'ext': 'mp4', + 'thumbnail': 'https://cdn0-a.production.vidio.static6.com/uploads/video/image/165683/dj_ambred-booyah-live-2015-bfb2ba.jpg', + 'description': 'md5:27dc15f819b6a78a626490881adbadf8', + 'duration': 149, + }, + 'params': { + # m3u8 download + 'skip_download': True + } + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id, display_id = mobj.group('id', 'display_id') + + webpage = self._download_webpage(url, display_id) + + video_data = self._parse_json(self._html_search_regex( + r'data-json-clips\s*=\s*"\[(.+)\]"', webpage, 'video data'), display_id) + + formats = self._extract_m3u8_formats( + video_data['sources'][0]['file'], + display_id, ext='mp4') + + return { + 'id': video_id, + 'title': self._og_search_title(webpage), + 'formats': formats, + 'thumbnail': video_data.get('image'), + 'description': self._og_search_description(webpage), + 'duration': int_or_none(video_data.get('clip_duration')), + } From 0fc832e1b2c8f48298e135d42818a16bfba4d3ed Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 4 Jun 2016 16:47:43 +0700 Subject: [PATCH 18/43] [vidio] Improve (Closes #9562) --- youtube_dl/extractor/vidio.py | 65 ++++++++++++++++++++++++----------- 1 file changed, 45 insertions(+), 20 deletions(-) diff --git a/youtube_dl/extractor/vidio.py b/youtube_dl/extractor/vidio.py index d17c663fd..6898042de 100644 --- a/youtube_dl/extractor/vidio.py +++ b/youtube_dl/extractor/vidio.py @@ -2,28 +2,30 @@ from __future__ import unicode_literals import re -from .common import InfoExtractor +from .common import InfoExtractor from ..utils import int_or_none class VidioIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?vidio\.com/watch/(?P\d{6})-(?P[^/?]+)' - _TEST = { + _VALID_URL = r'https?://(?:www\.)?vidio\.com/watch/(?P\d+)-(?P[^/?#&]+)' + _TESTS = [{ 'url': 'http://www.vidio.com/watch/165683-dj_ambred-booyah-live-2015', + 'md5': 'cd2801394afc164e9775db6a140b91fe', 'info_dict': { 'id': '165683', - 'title': 'DJ_AMBRED - Booyah (Live 2015)', + 'display_id': 'dj_ambred-booyah-live-2015', 'ext': 'mp4', - 'thumbnail': 'https://cdn0-a.production.vidio.static6.com/uploads/video/image/165683/dj_ambred-booyah-live-2015-bfb2ba.jpg', + 'title': 'DJ_AMBRED - Booyah (Live 2015)', 'description': 'md5:27dc15f819b6a78a626490881adbadf8', - 'duration': 149, + 'thumbnail': 're:^https?://.*\.jpg$', + 'duration': 149, + 'like_count': int, }, - 'params': { - # m3u8 download - 'skip_download': True - } - } + }, { + 'url': 'https://www.vidio.com/watch/77949-south-korea-test-fires-missile-that-can-strike-all-of-the-north', + 'only_matching': True, + }] def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) @@ -31,18 +33,41 @@ class VidioIE(InfoExtractor): webpage = self._download_webpage(url, display_id) - video_data = self._parse_json(self._html_search_regex( - r'data-json-clips\s*=\s*"\[(.+)\]"', webpage, 'video data'), display_id) + title = self._og_search_title(webpage) - formats = self._extract_m3u8_formats( - video_data['sources'][0]['file'], - display_id, ext='mp4') + m3u8_url, duration, thumbnail = [None] * 3 + + clips = self._parse_json( + self._html_search_regex( + r'data-json-clips\s*=\s*(["\'])(?P\[.+?\])\1', + webpage, 'video data', default='[]', group='data'), + display_id, fatal=False) + if clips: + clip = clips[0] + m3u8_url = clip.get('sources', [{}])[0].get('file') + duration = clip.get('clip_duration') + thumbnail = clip.get('image') + + m3u8_url = m3u8_url or self._search_regex( + r'data(?:-vjs)?-clip-hls-url=(["\'])(?P.+?)\1', webpage, 'hls url') + formats = self._extract_m3u8_formats(m3u8_url, display_id, 'mp4', entry_protocol='m3u8_native') + + duration = int_or_none(duration or self._search_regex( + r'data-video-duration=(["\'])(?P\d+)\1', webpage, 'duration')) + thumbnail = thumbnail or self._og_search_thumbnail(webpage) + + like_count = int_or_none(self._search_regex( + (r']+data-comment-vote-count=["\'](\d+)', + r']+class=["\'].*?\blike(?:__|-)count\b.*?["\'][^>]*>\s*(\d+)'), + webpage, 'like count', fatal=False)) return { 'id': video_id, - 'title': self._og_search_title(webpage), - 'formats': formats, - 'thumbnail': video_data.get('image'), + 'display_id': display_id, + 'title': title, 'description': self._og_search_description(webpage), - 'duration': int_or_none(video_data.get('clip_duration')), + 'thumbnail': thumbnail, + 'duration': duration, + 'like_count': like_count, + 'formats': formats, } From 1ae6c83bceb6dbc7093fe35ddafcde08dd0151a6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 5 Jun 2016 00:43:55 +0700 Subject: [PATCH 19/43] [compat] Add compat_input --- youtube_dl/compat.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/youtube_dl/compat.py b/youtube_dl/compat.py index 06e5f3ff6..fabac9fd2 100644 --- a/youtube_dl/compat.py +++ b/youtube_dl/compat.py @@ -482,6 +482,11 @@ if sys.version_info < (3, 0) and sys.platform == 'win32': else: compat_getpass = getpass.getpass +try: + compat_input = raw_input +except NameError: # Python 3 + compat_input = input + # Python < 2.6.5 require kwargs to be bytes try: def _testfunc(x): From e92b552a102f509066a605b26d6df38eb73764b1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 5 Jun 2016 00:44:51 +0700 Subject: [PATCH 20/43] [devscripts/buildserver] Use compat_input from compat --- devscripts/buildserver.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/devscripts/buildserver.py b/devscripts/buildserver.py index f7979c43e..fc99c3213 100644 --- a/devscripts/buildserver.py +++ b/devscripts/buildserver.py @@ -13,6 +13,7 @@ import os.path sys.path.insert(0, os.path.dirname(os.path.dirname((os.path.abspath(__file__))))) from youtube_dl.compat import ( + compat_input, compat_http_server, compat_str, compat_urlparse, @@ -30,11 +31,6 @@ try: except ImportError: # Python 2 import SocketServer as compat_socketserver -try: - compat_input = raw_input -except NameError: # Python 3 - compat_input = input - class BuildHTTPServer(compat_socketserver.ThreadingMixIn, compat_http_server.HTTPServer): allow_reuse_address = True From db56f281d9c5d57cb2c44a2ea356a9a0a12b3b4d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 5 Jun 2016 00:47:26 +0700 Subject: [PATCH 21/43] [devscripts/create-github-release] Add script for releasing on GitHub Yet only Basic authentication is supported either via .netrc or by manual input --- devscripts/create-github-release.py | 112 ++++++++++++++++++++++++++++ 1 file changed, 112 insertions(+) create mode 100644 devscripts/create-github-release.py diff --git a/devscripts/create-github-release.py b/devscripts/create-github-release.py new file mode 100644 index 000000000..f74d39490 --- /dev/null +++ b/devscripts/create-github-release.py @@ -0,0 +1,112 @@ +#!/usr/bin/env python +from __future__ import unicode_literals + +import base64 +import json +import mimetypes +import netrc +import optparse +import os +import sys + +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +from youtube_dl.compat import ( + compat_basestring, + compat_input, + compat_getpass, + compat_print, + compat_urllib_request, +) +from youtube_dl.utils import ( + make_HTTPS_handler, + sanitized_Request, +) + + +class GitHubReleaser(object): + _API_URL = 'https://api.github.com/repos/rg3/youtube-dl/releases' + _UPLOADS_URL = 'https://uploads.github.com/repos/rg3/youtube-dl/releases/%s/assets?name=%s' + _NETRC_MACHINE = 'github.com' + + def __init__(self, debuglevel=0): + self._init_github_account() + https_handler = make_HTTPS_handler({}, debuglevel=debuglevel) + self._opener = compat_urllib_request.build_opener(https_handler) + + def _init_github_account(self): + try: + info = netrc.netrc().authenticators(self._NETRC_MACHINE) + if info is not None: + self._username = info[0] + self._password = info[2] + compat_print('Using GitHub credentials found in .netrc...') + return + else: + compat_print('No GitHub credentials found in .netrc') + except (IOError, netrc.NetrcParseError): + compat_print('Unable to parse .netrc') + self._username = compat_input( + 'Type your GitHub username or email address and press [Return]: ') + self._password = compat_getpass( + 'Type your GitHub password and press [Return]: ') + + def _call(self, req): + if isinstance(req, compat_basestring): + req = sanitized_Request(req) + # Authorizing manually since GitHub does not response with 401 with + # WWW-Authenticate header set (see + # https://developer.github.com/v3/#basic-authentication) + b64 = base64.b64encode( + ('%s:%s' % (self._username, self._password)).encode('utf-8')).decode('ascii') + req.add_header('Authorization', 'Basic %s' % b64) + response = self._opener.open(req).read().decode('utf-8') + return json.loads(response) + + def list_releases(self): + return self._call(self._API_URL) + + def create_release(self, tag_name, name=None, body='', draft=False, prerelease=False): + data = { + 'tag_name': tag_name, + 'target_commitish': 'master', + 'name': name, + 'body': body, + 'draft': draft, + 'prerelease': prerelease, + } + req = sanitized_Request(self._API_URL, json.dumps(data).encode('utf-8')) + return self._call(req) + + def create_asset(self, release_id, asset): + asset_name = os.path.basename(asset) + url = self._UPLOADS_URL % (release_id, asset_name) + # Our files are small enough to be loaded directly into memory. + data = open(asset, 'rb').read() + req = sanitized_Request(url, data) + mime_type, _ = mimetypes.guess_type(asset_name) + req.add_header('Content-Type', mime_type or 'application/octet-stream') + return self._call(req) + + +def main(): + parser = optparse.OptionParser(usage='%prog VERSION BUILDPATH') + options, args = parser.parse_args() + if len(args) != 2: + parser.error('Expected a version and a build directory') + + version, build_path = args + + releaser = GitHubReleaser(debuglevel=0) + + new_release = releaser.create_release( + version, name='youtube-dl %s' % version, draft=True, prerelease=True) + release_id = new_release['id'] + + for asset in os.listdir(build_path): + compat_print('Uploading %s...' % asset) + releaser.create_asset(release_id, os.path.join(build_path, asset)) + + +if __name__ == '__main__': + main() From 39b32571df802ef869db1067454aa654f3f66235 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 5 Jun 2016 00:48:33 +0700 Subject: [PATCH 22/43] [devscripts/release.sh] Release to GitHub --- devscripts/release.sh | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/devscripts/release.sh b/devscripts/release.sh index 1a7b1e054..87e8eda50 100755 --- a/devscripts/release.sh +++ b/devscripts/release.sh @@ -95,17 +95,16 @@ RELEASE_FILES="youtube-dl youtube-dl.exe youtube-dl-$version.tar.gz" (cd build/$version/ && sha256sum $RELEASE_FILES > SHA2-256SUMS) (cd build/$version/ && sha512sum $RELEASE_FILES > SHA2-512SUMS) -/bin/echo -e "\n### Signing and uploading the new binaries to yt-dl.org ..." +/bin/echo -e "\n### Signing and uploading the new binaries to GitHub..." for f in $RELEASE_FILES; do gpg --passphrase-repeat 5 --detach-sig "build/$version/$f"; done -echo 'TODO: upload on GitHub' -exit 1 +ROOT=$(pwd) +python devscripts/create-github-release.py $version "$ROOT/build/$version" ssh ytdl@yt-dl.org "sh html/update_latest.sh $version" /bin/echo -e "\n### Now switching to gh-pages..." git clone --branch gh-pages --single-branch . build/gh-pages -ROOT=$(pwd) ( set -e ORIGIN_URL=$(git config --get remote.origin.url) From 2c347352677f023678ffd488a51b19f54b97fa36 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 5 Jun 2016 01:44:13 +0700 Subject: [PATCH 23/43] [youtube] Add itags 256 and 258 --- youtube_dl/extractor/youtube.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index f3f102c30..6c9f77d95 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -344,6 +344,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor): '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'preference': -50, 'container': 'm4a_dash'}, '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'preference': -50, 'container': 'm4a_dash'}, '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'preference': -50, 'container': 'm4a_dash'}, + '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'preference': -50, 'container': 'm4a_dash'}, + '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'preference': -50, 'container': 'm4a_dash'}, # Dash webm '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40}, From 1e236d7e2350e055bbe230b12490e4369aaa0956 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 5 Jun 2016 03:16:05 +0700 Subject: [PATCH 24/43] [downloader/hls] Do not rely on EXT-X-PLAYLIST-TYPE:EVENT --- youtube_dl/downloader/hls.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/youtube_dl/downloader/hls.py b/youtube_dl/downloader/hls.py index 62136ee54..049fb78ce 100644 --- a/youtube_dl/downloader/hls.py +++ b/youtube_dl/downloader/hls.py @@ -23,11 +23,17 @@ class HlsFD(FragmentFD): UNSUPPORTED_FEATURES = ( r'#EXT-X-KEY:METHOD=(?!NONE)', # encrypted streams [1] r'#EXT-X-BYTERANGE', # playlists composed of byte ranges of media files [2] + # Live streams heuristic does not always work (e.g. geo restricted to Germany # http://hls-geo.daserste.de/i/videoportal/Film/c_620000/622873/format,716451,716457,716450,716458,716459,.mp4.csmil/index_4_av.m3u8?null=0) # r'#EXT-X-MEDIA-SEQUENCE:(?!0$)', # live streams [3] - r'#EXT-X-PLAYLIST-TYPE:EVENT', # media segments may be appended to the end of - # event media playlists [4] + + # This heuristic also is not correct since segments may not be appended as well. + # Twitch vods have EXT-X-PLAYLIST-TYPE:EVENT despite no segments will definitely + # be appended to the end of the playlist. + # r'#EXT-X-PLAYLIST-TYPE:EVENT', # media segments may be appended to the end of + # event media playlists [4] + # 1. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.2.4 # 2. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.2.2 # 3. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.3.2 From 631d4c87ee84183917fcdf5db59e1cd1bb48d9a9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 5 Jun 2016 03:19:44 +0700 Subject: [PATCH 25/43] [twitch:vod] Use native hls --- youtube_dl/extractor/twitch.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/twitch.py b/youtube_dl/extractor/twitch.py index f7b98e190..d898f14c3 100644 --- a/youtube_dl/extractor/twitch.py +++ b/youtube_dl/extractor/twitch.py @@ -260,7 +260,7 @@ class TwitchVodIE(TwitchItemBaseIE): 'nauth': access_token['token'], 'nauthsig': access_token['sig'], })), - item_id, 'mp4') + item_id, 'mp4', entry_protocol='m3u8_native') self._prefer_source(formats) info['formats'] = formats From 51c4d85ce788497584bd056d571ed9b7b24c9651 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 5 Jun 2016 03:21:43 +0700 Subject: [PATCH 26/43] [downloader/hls] PEP 8 --- youtube_dl/downloader/hls.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/downloader/hls.py b/youtube_dl/downloader/hls.py index 049fb78ce..8e4a7189a 100644 --- a/youtube_dl/downloader/hls.py +++ b/youtube_dl/downloader/hls.py @@ -32,7 +32,7 @@ class HlsFD(FragmentFD): # Twitch vods have EXT-X-PLAYLIST-TYPE:EVENT despite no segments will definitely # be appended to the end of the playlist. # r'#EXT-X-PLAYLIST-TYPE:EVENT', # media segments may be appended to the end of - # event media playlists [4] + # # event media playlists [4] # 1. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.2.4 # 2. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.2.2 From 633b444fd29aa9d8b3ba722285ae2475ae66595f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 5 Jun 2016 03:31:10 +0700 Subject: [PATCH 27/43] [downloader/hls] Correct comment on twitch vods --- youtube_dl/downloader/hls.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/downloader/hls.py b/youtube_dl/downloader/hls.py index 8e4a7189a..54f2108e9 100644 --- a/youtube_dl/downloader/hls.py +++ b/youtube_dl/downloader/hls.py @@ -29,8 +29,8 @@ class HlsFD(FragmentFD): # r'#EXT-X-MEDIA-SEQUENCE:(?!0$)', # live streams [3] # This heuristic also is not correct since segments may not be appended as well. - # Twitch vods have EXT-X-PLAYLIST-TYPE:EVENT despite no segments will definitely - # be appended to the end of the playlist. + # Twitch vods of finished streams have EXT-X-PLAYLIST-TYPE:EVENT despite + # no segments will definitely be appended to the end of the playlist. # r'#EXT-X-PLAYLIST-TYPE:EVENT', # media segments may be appended to the end of # # event media playlists [4] From 71b9cb3107e156c7f17ec4cdf1d09421cb4dd4b1 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Sat, 4 Jun 2016 22:55:15 +0200 Subject: [PATCH 28/43] extend FAQ (#9696) --- README.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/README.md b/README.md index 253d51bc8..91078eee8 100644 --- a/README.md +++ b/README.md @@ -842,6 +842,12 @@ It is *not* possible to detect whether a URL is supported or not. That's because If you want to find out whether a given URL is supported, simply call youtube-dl with it. If you get no videos back, chances are the URL is either not referring to a video or unsupported. You can find out which by examining the output (if you run youtube-dl on the console) or catching an `UnsupportedError` exception if you run it from a Python program. +# Why do I need to go through that much red tape when filing bugs? + +Before we had the issue template, despite our extensive [bug reporting instructions](#bugs), about 80% of the issue reports we got were useless, for instance because people used ancient versions hundreds of releases old, because of simple syntactic errors (not in youtube-dl but in general shell usage), because the problem was alrady reported multiple times before, because people did not actually read an error message, even if it said "please install ffmpeg", because people did not mention the URL they were trying to download and many more simple, easy-to-avoid problems, many of whom were totally unrelated to youtube-dl. + +youtube-dl is an open-source project manned by too few volunteers, so we'd rather spend time fixing bugs where we are certain none of those simple problems apply, and where we can be reasonably confident to be able to reproduce the issue without asking the reporter repeatedly. As such, the output of `youtube-dl -v YOUR_URL_HERE` is really all that's required to file an issue. The issue template also guides you through some basic steps you can do, such as checking that your version of youtube-dl is current. + # DEVELOPER INSTRUCTIONS Most users do not need to build youtube-dl and can [download the builds](http://rg3.github.io/youtube-dl/download.html) or get them from their distribution. From bc270284b56b8ce7623b22b5c5cbf8d0d390c09e Mon Sep 17 00:00:00 2001 From: Ryan Schmidt Date: Sat, 4 Jun 2016 21:30:22 -0500 Subject: [PATCH 29/43] Update README.md to mention MacPorts --- README.md | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 91078eee8..f60e7ce33 100644 --- a/README.md +++ b/README.md @@ -27,10 +27,14 @@ If you do not have curl, you can alternatively use a recent wget: Windows users can [download an .exe file](https://yt-dl.org/latest/youtube-dl.exe) and place it in any location on their [PATH](http://en.wikipedia.org/wiki/PATH_%28variable%29) except for `%SYSTEMROOT%\System32` (e.g. **do not** put in `C:\Windows\System32`). -OS X users can install **youtube-dl** with [Homebrew](http://brew.sh/). +OS X users can install **youtube-dl** with [Homebrew](http://brew.sh/): brew install youtube-dl +Or with [MacPorts](https://www.macports.org/): + + sudo port install youtube-dl + You can also use pip: sudo pip install youtube-dl From 8f1aaa97a1e3eb60749f8046f2f0b1a0749d007c Mon Sep 17 00:00:00 2001 From: Sergey M Date: Sun, 5 Jun 2016 11:19:44 +0700 Subject: [PATCH 30/43] [README.md] Update pypi instructions --- README.md | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index f60e7ce33..e7240f41a 100644 --- a/README.md +++ b/README.md @@ -27,7 +27,13 @@ If you do not have curl, you can alternatively use a recent wget: Windows users can [download an .exe file](https://yt-dl.org/latest/youtube-dl.exe) and place it in any location on their [PATH](http://en.wikipedia.org/wiki/PATH_%28variable%29) except for `%SYSTEMROOT%\System32` (e.g. **do not** put in `C:\Windows\System32`). -OS X users can install **youtube-dl** with [Homebrew](http://brew.sh/): +You can also use pip: + + sudo pip install --upgrade youtube-dl + +This command will update youtube-dl if you have already installed it. See the [pypi page](https://pypi.python.org/pypi/youtube_dl) for more information. + +OS X users can install youtube-dl with [Homebrew](http://brew.sh/): brew install youtube-dl @@ -35,10 +41,6 @@ Or with [MacPorts](https://www.macports.org/): sudo port install youtube-dl -You can also use pip: - - sudo pip install youtube-dl - Alternatively, refer to the [developer instructions](#developer-instructions) for how to check out and work with the git repository. For further options, including PGP signatures, see the [youtube-dl Download Page](https://rg3.github.io/youtube-dl/download.html). # DESCRIPTION From 47f12ad3e39ebe714abec7e7588e8e411e2841b5 Mon Sep 17 00:00:00 2001 From: Tobias Salzmann Date: Sun, 5 Jun 2016 11:04:55 +0200 Subject: [PATCH 31/43] curl: follow redirect --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index e7240f41a..205c485d0 100644 --- a/README.md +++ b/README.md @@ -17,7 +17,7 @@ youtube-dl - download videos from youtube.com or other video platforms To install it right away for all UNIX users (Linux, OS X, etc.), type: - sudo curl https://yt-dl.org/latest/youtube-dl -o /usr/local/bin/youtube-dl + sudo curl -L https://yt-dl.org/latest/youtube-dl -o /usr/local/bin/youtube-dl sudo chmod a+rx /usr/local/bin/youtube-dl If you do not have curl, you can alternatively use a recent wget: From 7b0d1c28597bd38567e5b4e853f669a5a601c6e8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 5 Jun 2016 21:01:20 +0700 Subject: [PATCH 32/43] [__init__] Use write_string instead of compat_string (Closes #9689) --- youtube_dl/__init__.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 5df965191..4905674ad 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -18,7 +18,6 @@ from .options import ( from .compat import ( compat_expanduser, compat_getpass, - compat_print, compat_shlex_split, workaround_optparse_bug9161, ) @@ -76,7 +75,7 @@ def _real_main(argv=None): # Dump user agent if opts.dump_user_agent: - compat_print(std_headers['User-Agent']) + write_string(std_headers['User-Agent'] + '\n', out=sys.stdout) sys.exit(0) # Batch file verification @@ -101,10 +100,10 @@ def _real_main(argv=None): if opts.list_extractors: for ie in list_extractors(opts.age_limit): - compat_print(ie.IE_NAME + (' (CURRENTLY BROKEN)' if not ie._WORKING else '')) + write_string(ie.IE_NAME + (' (CURRENTLY BROKEN)' if not ie._WORKING else '') + '\n', out=sys.stdout) matchedUrls = [url for url in all_urls if ie.suitable(url)] for mu in matchedUrls: - compat_print(' ' + mu) + write_string(' ' + mu + '\n', out=sys.stdout) sys.exit(0) if opts.list_extractor_descriptions: for ie in list_extractors(opts.age_limit): @@ -117,7 +116,7 @@ def _real_main(argv=None): _SEARCHES = ('cute kittens', 'slithering pythons', 'falling cat', 'angry poodle', 'purple fish', 'running tortoise', 'sleeping bunny', 'burping cow') _COUNTS = ('', '5', '10', 'all') desc += ' (Example: "%s%s:%s" )' % (ie.SEARCH_KEY, random.choice(_COUNTS), random.choice(_SEARCHES)) - compat_print(desc) + write_string(desc + '\n', out=sys.stdout) sys.exit(0) # Conflicting, missing and erroneous options From 244fe977fec880f1bce55683437a711e12075b72 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 6 Jun 2016 02:52:58 +0700 Subject: [PATCH 33/43] [options] Add --load-info-json alias for symmetry with --write-info-json --- youtube_dl/options.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/options.py b/youtube_dl/options.py index 14051b714..99ce4131f 100644 --- a/youtube_dl/options.py +++ b/youtube_dl/options.py @@ -668,7 +668,7 @@ def parseOpts(overrideArguments=None): action='store_true', dest='writeannotations', default=False, help='Write video annotations to a .annotations.xml file') filesystem.add_option( - '--load-info', + '--load-info-json', '--load-info', dest='load_info_filename', metavar='FILE', help='JSON file containing the video information (created with the "--write-info-json" option)') filesystem.add_option( From db59b37d0bb2bbb4894f28b6b65d1d7f5496444d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 6 Jun 2016 03:02:11 +0700 Subject: [PATCH 34/43] [devscripts/create-github-release] Make full published releases by default --- devscripts/create-github-release.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/devscripts/create-github-release.py b/devscripts/create-github-release.py index f74d39490..3b8021e74 100644 --- a/devscripts/create-github-release.py +++ b/devscripts/create-github-release.py @@ -97,10 +97,9 @@ def main(): version, build_path = args - releaser = GitHubReleaser(debuglevel=0) + releaser = GitHubReleaser() - new_release = releaser.create_release( - version, name='youtube-dl %s' % version, draft=True, prerelease=True) + new_release = releaser.create_release(version, name='youtube-dl %s' % version) release_id = new_release['id'] for asset in os.listdir(build_path): From e67f6880257068c395d38e24a5e13f69902e1e4c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Sun, 5 Jun 2016 23:16:08 +0200 Subject: [PATCH 35/43] [compat] Add 'compat_input' to __all__ --- youtube_dl/compat.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/compat.py b/youtube_dl/compat.py index fabac9fd2..e3cab4dd0 100644 --- a/youtube_dl/compat.py +++ b/youtube_dl/compat.py @@ -628,6 +628,7 @@ __all__ = [ 'compat_html_entities', 'compat_http_client', 'compat_http_server', + 'compat_input', 'compat_itertools_count', 'compat_kwargs', 'compat_ord', From 345dec937fcc2b9ae106e91f4c01568c8c7e41f8 Mon Sep 17 00:00:00 2001 From: Kagami Hiiragi Date: Tue, 7 Jun 2016 14:39:21 +0300 Subject: [PATCH 36/43] [vlive] Acknowledge vlive+ streams statuses Same as common statuses just with "PRODUCT_" prefix: PRODUCE_LIVE_END, PRODUCT_COMING_SOON, etc. --- youtube_dl/extractor/vlive.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/youtube_dl/extractor/vlive.py b/youtube_dl/extractor/vlive.py index 147f52d45..8d671cca7 100644 --- a/youtube_dl/extractor/vlive.py +++ b/youtube_dl/extractor/vlive.py @@ -9,6 +9,7 @@ from ..utils import ( ExtractorError, float_or_none, int_or_none, + remove_start, ) from ..compat import compat_urllib_parse_urlencode @@ -39,6 +40,7 @@ class VLiveIE(InfoExtractor): webpage, 'video params') status, _, _, live_params, long_video_id, key = re.split( r'"\s*,\s*"', video_params)[2:8] + status = remove_start(status, 'PRODUCT_') if status == 'LIVE_ON_AIR' or status == 'BIG_EVENT_ON_AIR': live_params = self._parse_json('"%s"' % live_params, video_id) From 74193838f71addcb08a9f56a7fad8c2e7df298ec Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 7 Jun 2016 22:12:20 +0700 Subject: [PATCH 37/43] [canal+] Improve extraction (Closes #9718) --- youtube_dl/extractor/canalplus.py | 33 ++++++++++++++++++++++--------- 1 file changed, 24 insertions(+), 9 deletions(-) diff --git a/youtube_dl/extractor/canalplus.py b/youtube_dl/extractor/canalplus.py index 25b2d4efe..8d0f91158 100644 --- a/youtube_dl/extractor/canalplus.py +++ b/youtube_dl/extractor/canalplus.py @@ -4,11 +4,11 @@ from __future__ import unicode_literals import re from .common import InfoExtractor +from ..compat import compat_urllib_parse_urlparse from ..utils import ( ExtractorError, HEADRequest, unified_strdate, - url_basename, qualities, int_or_none, ) @@ -16,13 +16,25 @@ from ..utils import ( class CanalplusIE(InfoExtractor): IE_DESC = 'canalplus.fr, piwiplus.fr and d8.tv' - _VALID_URL = r'https?://(?:www\.(?Pcanalplus\.fr|piwiplus\.fr|d8\.tv|itele\.fr)/.*?/(?P.*)|player\.canalplus\.fr/#/(?P[0-9]+))' + _VALID_URL = r'''(?x) + https?:// + (?: + (?: + (?:(?:www|m)\.)?canalplus\.fr| + (?:www\.)?piwiplus\.fr| + (?:www\.)?d8\.tv| + (?:www\.)?itele\.fr + )/(?:(?:[^/]+/)*(?P[^/?#&]+))?(?:\?.*\bvid=(?P\d+))?| + player\.canalplus\.fr/#/(?P\d+) + ) + + ''' _VIDEO_INFO_TEMPLATE = 'http://service.canal-plus.com/video/rest/getVideosLiees/%s/%s?format=json' _SITE_ID_MAP = { - 'canalplus.fr': 'cplus', - 'piwiplus.fr': 'teletoon', - 'd8.tv': 'd8', - 'itele.fr': 'itele', + 'canalplus': 'cplus', + 'piwiplus': 'teletoon', + 'd8': 'd8', + 'itele': 'itele', } _TESTS = [{ @@ -65,16 +77,19 @@ class CanalplusIE(InfoExtractor): 'description': 'md5:8216206ec53426ea6321321f3b3c16db', 'upload_date': '20150211', }, + }, { + 'url': 'http://m.canalplus.fr/?vid=1398231', + 'only_matching': True, }] def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) - video_id = mobj.groupdict().get('id') + video_id = mobj.groupdict().get('id') or mobj.groupdict().get('vid') - site_id = self._SITE_ID_MAP[mobj.group('site') or 'canal'] + site_id = self._SITE_ID_MAP[compat_urllib_parse_urlparse(url).netloc.rsplit('.', 2)[-2]] # Beware, some subclasses do not define an id group - display_id = url_basename(mobj.group('path')) + display_id = mobj.group('display_id') or video_id if video_id is None: webpage = self._download_webpage(url, display_id) From 3d9b3605a35eb48bd20e569ed9ce9d706e457ec6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 7 Jun 2016 22:26:18 +0700 Subject: [PATCH 38/43] [canal+] Update tests --- youtube_dl/extractor/canalplus.py | 48 +++++++++++++++++-------------- 1 file changed, 26 insertions(+), 22 deletions(-) diff --git a/youtube_dl/extractor/canalplus.py b/youtube_dl/extractor/canalplus.py index 8d0f91158..605c5e957 100644 --- a/youtube_dl/extractor/canalplus.py +++ b/youtube_dl/extractor/canalplus.py @@ -38,14 +38,14 @@ class CanalplusIE(InfoExtractor): } _TESTS = [{ - 'url': 'http://www.canalplus.fr/c-emissions/pid1830-c-zapping.html?vid=1263092', - 'md5': '12164a6f14ff6df8bd628e8ba9b10b78', + 'url': 'http://www.canalplus.fr/c-emissions/pid1830-c-zapping.html?vid=1192814', + 'md5': '41f438a4904f7664b91b4ed0dec969dc', 'info_dict': { - 'id': '1263092', + 'id': '1192814', 'ext': 'mp4', - 'title': 'Le Zapping - 13/05/15', - 'description': 'md5:09738c0d06be4b5d06a0940edb0da73f', - 'upload_date': '20150513', + 'title': "L'Année du Zapping 2014 - L'Année du Zapping 2014", + 'description': "Toute l'année 2014 dans un Zapping exceptionnel !", + 'upload_date': '20150105', }, }, { 'url': 'http://www.piwiplus.fr/videos-piwi/pid1405-le-labyrinthe-boing-super-ranger.html?vid=1108190', @@ -58,24 +58,28 @@ class CanalplusIE(InfoExtractor): }, 'skip': 'Only works from France', }, { - 'url': 'http://www.d8.tv/d8-docs-mags/pid6589-d8-campagne-intime.html', + 'url': 'http://www.d8.tv/d8-docs-mags/pid5198-d8-en-quete-d-actualite.html?vid=1390231', 'info_dict': { - 'id': '966289', - 'ext': 'flv', - 'title': 'Campagne intime - Documentaire exceptionnel', - 'description': 'md5:d2643b799fb190846ae09c61e59a859f', - 'upload_date': '20131108', - }, - 'skip': 'videos get deleted after a while', - }, { - 'url': 'http://www.itele.fr/france/video/aubervilliers-un-lycee-en-colere-111559', - 'md5': '38b8f7934def74f0d6f3ba6c036a5f82', - 'info_dict': { - 'id': '1213714', + 'id': '1390231', 'ext': 'mp4', - 'title': 'Aubervilliers : un lycée en colère - Le 11/02/2015 à 06h45', - 'description': 'md5:8216206ec53426ea6321321f3b3c16db', - 'upload_date': '20150211', + 'title': "Vacances pas chères : prix discount ou grosses dépenses ? - En quête d'actualité", + 'description': 'md5:edb6cf1cb4a1e807b5dd089e1ac8bfc6', + 'upload_date': '20160512', + }, + 'params': { + 'skip_download': True, + }, + }, { + 'url': 'http://www.itele.fr/chroniques/invite-bruce-toussaint/thierry-solere-nicolas-sarkozy-officialisera-sa-candidature-a-la-primaire-quand-il-le-voudra-167224', + 'info_dict': { + 'id': '1398334', + 'ext': 'mp4', + 'title': "L'invité de Bruce Toussaint du 07/06/2016 - ", + 'description': 'md5:40ac7c9ad0feaeb6f605bad986f61324', + 'upload_date': '20160607', + }, + 'params': { + 'skip_download': True, }, }, { 'url': 'http://m.canalplus.fr/?vid=1398231', From 57b6e9652e27aa46395dab6238e54d63746f9a0e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 7 Jun 2016 22:32:08 +0700 Subject: [PATCH 39/43] [canal+] Add support for d17.tv --- youtube_dl/extractor/canalplus.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/youtube_dl/extractor/canalplus.py b/youtube_dl/extractor/canalplus.py index 605c5e957..61463f249 100644 --- a/youtube_dl/extractor/canalplus.py +++ b/youtube_dl/extractor/canalplus.py @@ -23,6 +23,7 @@ class CanalplusIE(InfoExtractor): (?:(?:www|m)\.)?canalplus\.fr| (?:www\.)?piwiplus\.fr| (?:www\.)?d8\.tv| + (?:www\.)?d17\.tv| (?:www\.)?itele\.fr )/(?:(?:[^/]+/)*(?P[^/?#&]+))?(?:\?.*\bvid=(?P\d+))?| player\.canalplus\.fr/#/(?P\d+) @@ -34,6 +35,7 @@ class CanalplusIE(InfoExtractor): 'canalplus': 'cplus', 'piwiplus': 'teletoon', 'd8': 'd8', + 'd17': 'd17', 'itele': 'itele', } @@ -84,6 +86,9 @@ class CanalplusIE(InfoExtractor): }, { 'url': 'http://m.canalplus.fr/?vid=1398231', 'only_matching': True, + }, { + 'url': 'http://www.d17.tv/emissions/pid8303-lolywood.html?vid=1397061', + 'only_matching': True, }] def _real_extract(self, url): From a6571f1073eab6c9a4cc9800a0bff31cf12fe09f Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Wed, 8 Jun 2016 00:19:33 +0800 Subject: [PATCH 40/43] [common] Fix detection in F4M manifests Regression since 0a5685b26fae0940f14cb063a6e4fc6986f9c124 --- youtube_dl/extractor/common.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 57793537b..bfd432160 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -45,6 +45,7 @@ from ..utils import ( unescapeHTML, unified_strdate, url_basename, + xpath_element, xpath_text, xpath_with_ns, determine_protocol, @@ -1030,7 +1031,7 @@ class InfoExtractor(object): if base_url: base_url = base_url.strip() - bootstrap_info = xpath_text( + bootstrap_info = xpath_element( manifest, ['{http://ns.adobe.com/f4m/1.0}bootstrapInfo', '{http://ns.adobe.com/f4m/2.0}bootstrapInfo'], 'bootstrap info', default=None) @@ -1085,7 +1086,7 @@ class InfoExtractor(object): formats.append({ 'format_id': format_id, 'url': manifest_url, - 'ext': 'flv' if bootstrap_info else None, + 'ext': 'flv' if bootstrap_info is not None else None, 'tbr': tbr, 'width': width, 'height': height, From a4a8201c02d06bff384ecb66a257dbec0652ff52 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Wed, 8 Jun 2016 00:25:51 +0800 Subject: [PATCH 41/43] [wdr] Update _TESTS --- youtube_dl/extractor/wdr.py | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/wdr.py b/youtube_dl/extractor/wdr.py index 1e729cb7c..6174eb19f 100644 --- a/youtube_dl/extractor/wdr.py +++ b/youtube_dl/extractor/wdr.py @@ -21,7 +21,7 @@ class WDRIE(InfoExtractor): _TESTS = [ { 'url': 'http://www1.wdr.de/mediathek/video/sendungen/doku-am-freitag/video-geheimnis-aachener-dom-100.html', - 'md5': 'e58c39c3e30077141d258bf588700a7b', + # HDS download, MD5 is unstable 'info_dict': { 'id': 'mdb-1058683', 'ext': 'flv', @@ -35,7 +35,6 @@ class WDRIE(InfoExtractor): 'url': 'http://ondemand-ww.wdr.de/medp/fsk0/105/1058683/1058683_12220974.xml' }]}, }, - 'skip': 'Page Not Found', }, { 'url': 'http://www1.wdr.de/mediathek/audio/wdr3/wdr3-gespraech-am-samstag/audio-schriftstellerin-juli-zeh-100.html', @@ -51,7 +50,6 @@ class WDRIE(InfoExtractor): 'is_live': False, 'subtitles': {} }, - 'skip': 'Page Not Found', }, { 'url': 'http://www1.wdr.de/mediathek/video/live/index.html', @@ -90,7 +88,7 @@ class WDRIE(InfoExtractor): }, { 'url': 'http://www.wdrmaus.de/sachgeschichten/sachgeschichten/achterbahn.php5', - 'md5': 'ca365705551e4bd5217490f3b0591290', + # HDS download, MD5 is unstable 'info_dict': { 'id': 'mdb-186083', 'ext': 'flv', @@ -98,9 +96,6 @@ class WDRIE(InfoExtractor): 'title': 'Sachgeschichte - Achterbahn ', 'description': '- Die Sendung mit der Maus -', }, - 'params': { - 'skip_download': True, # the file has different versions :( - }, }, ] From a26a9d62396641364690974de9c859cf26f9acf6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 7 Jun 2016 23:53:08 +0700 Subject: [PATCH 42/43] [livestream:event] Ensure video id is string (Closes #9721) --- youtube_dl/extractor/livestream.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/livestream.py b/youtube_dl/extractor/livestream.py index 0edc06c43..bc7894bf1 100644 --- a/youtube_dl/extractor/livestream.py +++ b/youtube_dl/extractor/livestream.py @@ -203,9 +203,10 @@ class LivestreamIE(InfoExtractor): if not videos_info: break for v in videos_info: + v_id = compat_str(v['id']) entries.append(self.url_result( - 'http://livestream.com/accounts/%s/events/%s/videos/%s' % (account_id, event_id, v['id']), - 'Livestream', v['id'], v['caption'])) + 'http://livestream.com/accounts/%s/events/%s/videos/%s' % (account_id, event_id, v_id), + 'Livestream', v_id, v.get('caption'))) last_video = videos_info[-1]['id'] return self.playlist_result(entries, event_id, event_data['full_name']) From 33d9f3707ccccfe8d73c1b398f198792e80a259f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 8 Jun 2016 02:22:04 +0700 Subject: [PATCH 43/43] [thesixtyone] Relax _VALID_URL (Closes #9714) --- youtube_dl/extractor/thesixtyone.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/thesixtyone.py b/youtube_dl/extractor/thesixtyone.py index d8b1fd281..d63aef5de 100644 --- a/youtube_dl/extractor/thesixtyone.py +++ b/youtube_dl/extractor/thesixtyone.py @@ -12,7 +12,7 @@ class TheSixtyOneIE(InfoExtractor): s| song/comments/list| song - )/(?P[A-Za-z0-9]+)/?$''' + )/(?:[^/]+/)?(?P[A-Za-z0-9]+)/?$''' _SONG_URL_TEMPLATE = 'http://thesixtyone.com/s/{0:}' _SONG_FILE_URL_TEMPLATE = 'http://{audio_server:}/thesixtyone_production/audio/{0:}_stream' _THUMBNAIL_URL_TEMPLATE = '{photo_base_url:}_desktop' @@ -45,6 +45,10 @@ class TheSixtyOneIE(InfoExtractor): 'url': 'http://www.thesixtyone.com/song/SrE3zD7s1jt/', 'only_matching': True, }, + { + 'url': 'http://www.thesixtyone.com/maryatmidnight/song/StrawberriesandCream/yvWtLp0c4GQ/', + 'only_matching': True, + }, ] _DECODE_MAP = {