From 178ee88319a384b66d9b2da27a819f32ba870425 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 17 Mar 2018 23:57:07 +0700 Subject: [PATCH 01/13] [generic] Add support for xfileshare embeds (closes #15879) --- youtube_dl/extractor/generic.py | 6 ++++++ youtube_dl/extractor/xfileshare.py | 9 +++++++++ 2 files changed, 15 insertions(+) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index a98f3636a..dbd565066 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -104,6 +104,7 @@ from .mediasite import MediasiteIE from .springboardplatform import SpringboardPlatformIE from .yapfiles import YapFilesIE from .vice import ViceIE +from .xfileshare import XFileShareIE class GenericIE(InfoExtractor): @@ -2971,6 +2972,11 @@ class GenericIE(InfoExtractor): return self.playlist_from_matches( vice_urls, video_id, video_title, ie=ViceIE.ie_key()) + xfileshare_urls = XFileShareIE._extract_urls(webpage) + if xfileshare_urls: + return self.playlist_from_matches( + xfileshare_urls, video_id, video_title, ie=XFileShareIE.ie_key()) + def merge_dicts(dict1, dict2): merged = {} for k, v in dict1.items(): diff --git a/youtube_dl/extractor/xfileshare.py b/youtube_dl/extractor/xfileshare.py index ad747978d..bc3239f68 100644 --- a/youtube_dl/extractor/xfileshare.py +++ b/youtube_dl/extractor/xfileshare.py @@ -118,6 +118,15 @@ class XFileShareIE(InfoExtractor): 'only_matching': True }] + @staticmethod + def _extract_urls(webpage): + return [ + mobj.group('url') + for mobj in re.finditer( + r']+\bsrc=(["\'])(?P(?:https?:)?//(?:%s)/embed-[0-9a-zA-Z]+.*?)\1' + % '|'.join(site for site in list(zip(*XFileShareIE._SITES))[0]), + webpage)] + def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) video_id = mobj.group('id') From 96b8b9abaecb7518d901dc9d6a617f19c3161236 Mon Sep 17 00:00:00 2001 From: Ricardo Constantino Date: Wed, 7 Mar 2018 21:31:53 +0000 Subject: [PATCH 02/13] [extractor/generic] Support relative URIs in _parse_xspf can have relative URIs, not just absolute. --- test/test_InfoExtractor.py | 42 ++++++++++++++++++++++++++++++++ test/testdata/xspf/foo_xspf.xspf | 34 ++++++++++++++++++++++++++ youtube_dl/extractor/common.py | 6 ++--- youtube_dl/extractor/generic.py | 4 ++- 4 files changed, 82 insertions(+), 4 deletions(-) create mode 100644 test/testdata/xspf/foo_xspf.xspf diff --git a/test/test_InfoExtractor.py b/test/test_InfoExtractor.py index 7b31d5198..a695ce64b 100644 --- a/test/test_InfoExtractor.py +++ b/test/test_InfoExtractor.py @@ -694,6 +694,48 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/ self.ie._sort_formats(formats) expect_value(self, formats, expected_formats, None) + def test_parse_xspf(self): + _TEST_CASES = [ + ( + 'foo_xspf', + 'https://example.org/src/', + [{ + 'description': 'Visit http://bigbrother404.bandcamp.com', + 'duration': 202.416, + 'formats': [{'url': 'https://example.org/src/cd1/track%201.mp3'}], + 'id': 'foo_xspf', + 'title': 'Pandemonium' + }, + { + 'description': 'Visit http://bigbrother404.bandcamp.com', + 'duration': 255.857, + 'formats': [{'url': 'https://example.org/%E3%83%88%E3%83%A9%E3%83%83%E3%82%AF%E3%80%80%EF%BC%92.mp3'}], + 'id': 'foo_xspf', + 'title': 'Final Cartridge (Nichico Twelve Remix)' + }, + { + 'description': 'Visit http://bigbrother404.bandcamp.com', + 'duration': 287.915, + 'formats': [ + {'url': 'https://example.org/src/track3.mp3'}, + {'url': 'https://example.com/track3.mp3'} + ], + 'id': 'foo_xspf', + 'title': 'Rebuilding Nightingale' + }] + ), + ] + + for xspf_file, xspf_base_url, expected_entries in _TEST_CASES: + with io.open('./test/testdata/xspf/%s.xspf' % xspf_file, + mode='r', encoding='utf-8') as f: + entries = self.ie._parse_xspf( + compat_etree_fromstring(f.read().encode('utf-8')), + xspf_file, xspf_base_url) + expect_value(self, entries, expected_entries, None) + for i in range(len(entries)): + expect_dict(self, entries[i], expected_entries[i]) + if __name__ == '__main__': unittest.main() diff --git a/test/testdata/xspf/foo_xspf.xspf b/test/testdata/xspf/foo_xspf.xspf new file mode 100644 index 000000000..b7f0086b3 --- /dev/null +++ b/test/testdata/xspf/foo_xspf.xspf @@ -0,0 +1,34 @@ + + + 2018-03-09T18:01:43Z + + + cd1/track%201.mp3 + Pandemonium + Foilverb + Visit http://bigbrother404.bandcamp.com + Pandemonium EP + 1 + 202416 + + + ../%E3%83%88%E3%83%A9%E3%83%83%E3%82%AF%E3%80%80%EF%BC%92.mp3 + Final Cartridge (Nichico Twelve Remix) + Visit http://bigbrother404.bandcamp.com + Foilverb + Pandemonium EP + 2 + 255857 + + + track3.mp3 + https://example.com/track3.mp3 + Rebuilding Nightingale + Visit http://bigbrother404.bandcamp.com + Foilverb + Pandemonium EP + 3 + 287915 + + + diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index fcdd0fd14..c1e1012e7 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1700,9 +1700,9 @@ class InfoExtractor(object): 'Unable to download xspf manifest', fatal=fatal) if xspf is False: return [] - return self._parse_xspf(xspf, playlist_id) + return self._parse_xspf(xspf, playlist_id, base_url(playlist_url)) - def _parse_xspf(self, playlist, playlist_id): + def _parse_xspf(self, playlist, playlist_id, playlist_base_url=''): NS_MAP = { 'xspf': 'http://xspf.org/ns/0/', 's1': 'http://static.streamone.nl/player/ns/0', @@ -1720,7 +1720,7 @@ class InfoExtractor(object): xpath_text(track, xpath_with_ns('./xspf:duration', NS_MAP), 'duration'), 1000) formats = [{ - 'url': location.text, + 'url': urljoin(playlist_base_url, location.text), 'format_id': location.get(xpath_with_ns('s1:label', NS_MAP)), 'width': int_or_none(location.get(xpath_with_ns('s1:width', NS_MAP))), 'height': int_or_none(location.get(xpath_with_ns('s1:height', NS_MAP))), diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index dbd565066..023ccbc9b 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -2232,7 +2232,9 @@ class GenericIE(InfoExtractor): self._sort_formats(smil['formats']) return smil elif doc.tag == '{http://xspf.org/ns/0/}playlist': - return self.playlist_result(self._parse_xspf(doc, video_id), video_id) + return self.playlist_result( + self._parse_xspf(doc, video_id, compat_str(full_response.geturl())), + video_id) elif re.match(r'(?i)^(?:{[^}]+})?MPD$', doc.tag): info_dict['formats'] = self._parse_mpd_formats( doc, From e0d198c18d4a5f191adbfb43259c104d16e30596 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 18 Mar 2018 02:17:34 +0700 Subject: [PATCH 03/13] [extractor/common] Add _download_xml_handle --- youtube_dl/extractor/common.py | 28 ++++++++++++++++++++-------- 1 file changed, 20 insertions(+), 8 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index c1e1012e7..a50778509 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -644,19 +644,31 @@ class InfoExtractor(object): content, _ = res return content + def _download_xml_handle( + self, url_or_request, video_id, note='Downloading XML', + errnote='Unable to download XML', transform_source=None, + fatal=True, encoding=None, data=None, headers={}, query={}): + """Return a tuple (xml as an xml.etree.ElementTree.Element, URL handle)""" + res = self._download_webpage_handle( + url_or_request, video_id, note, errnote, fatal=fatal, + encoding=encoding, data=data, headers=headers, query=query) + if res is False: + return res + xml_string, urlh = res + return self._parse_xml( + xml_string, video_id, transform_source=transform_source, + fatal=fatal), urlh + def _download_xml(self, url_or_request, video_id, note='Downloading XML', errnote='Unable to download XML', transform_source=None, fatal=True, encoding=None, data=None, headers={}, query={}): """Return the xml as an xml.etree.ElementTree.Element""" - xml_string = self._download_webpage( - url_or_request, video_id, note, errnote, fatal=fatal, - encoding=encoding, data=data, headers=headers, query=query) - if xml_string is False: - return xml_string - return self._parse_xml( - xml_string, video_id, transform_source=transform_source, - fatal=fatal) + res = self._download_xml_handle( + url_or_request, video_id, note=note, errnote=errnote, + transform_source=transform_source, fatal=fatal, encoding=encoding, + data=data, headers=headers, query=query) + return res if res is False else res[0] def _parse_xml(self, xml_string, video_id, transform_source=None, fatal=True): if transform_source: From 47a5cb77344536ca79d81a04904ac9ef9b02050f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 18 Mar 2018 02:46:50 +0700 Subject: [PATCH 04/13] Generalize XML manifest processing code and improve XSPF parsing (closes #15794) --- test/test_InfoExtractor.py | 41 ++++++++++++++++++------------- youtube_dl/extractor/common.py | 43 +++++++++++++++++++-------------- youtube_dl/extractor/generic.py | 4 ++- 3 files changed, 52 insertions(+), 36 deletions(-) diff --git a/test/test_InfoExtractor.py b/test/test_InfoExtractor.py index a695ce64b..4833396a5 100644 --- a/test/test_InfoExtractor.py +++ b/test/test_InfoExtractor.py @@ -698,40 +698,47 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/ _TEST_CASES = [ ( 'foo_xspf', - 'https://example.org/src/', + 'https://example.org/src/foo_xspf.xspf', [{ + 'id': 'foo_xspf', + 'title': 'Pandemonium', 'description': 'Visit http://bigbrother404.bandcamp.com', 'duration': 202.416, - 'formats': [{'url': 'https://example.org/src/cd1/track%201.mp3'}], + 'formats': [{ + 'manifest_url': 'https://example.org/src/foo_xspf.xspf', + 'url': 'https://example.org/src/cd1/track%201.mp3', + }], + }, { 'id': 'foo_xspf', - 'title': 'Pandemonium' - }, - { + 'title': 'Final Cartridge (Nichico Twelve Remix)', 'description': 'Visit http://bigbrother404.bandcamp.com', 'duration': 255.857, - 'formats': [{'url': 'https://example.org/%E3%83%88%E3%83%A9%E3%83%83%E3%82%AF%E3%80%80%EF%BC%92.mp3'}], + 'formats': [{ + 'manifest_url': 'https://example.org/src/foo_xspf.xspf', + 'url': 'https://example.org/%E3%83%88%E3%83%A9%E3%83%83%E3%82%AF%E3%80%80%EF%BC%92.mp3', + }], + }, { 'id': 'foo_xspf', - 'title': 'Final Cartridge (Nichico Twelve Remix)' - }, - { + 'title': 'Rebuilding Nightingale', 'description': 'Visit http://bigbrother404.bandcamp.com', 'duration': 287.915, - 'formats': [ - {'url': 'https://example.org/src/track3.mp3'}, - {'url': 'https://example.com/track3.mp3'} - ], - 'id': 'foo_xspf', - 'title': 'Rebuilding Nightingale' + 'formats': [{ + 'manifest_url': 'https://example.org/src/foo_xspf.xspf', + 'url': 'https://example.org/src/track3.mp3', + }, { + 'manifest_url': 'https://example.org/src/foo_xspf.xspf', + 'url': 'https://example.com/track3.mp3', + }] }] ), ] - for xspf_file, xspf_base_url, expected_entries in _TEST_CASES: + for xspf_file, xspf_url, expected_entries in _TEST_CASES: with io.open('./test/testdata/xspf/%s.xspf' % xspf_file, mode='r', encoding='utf-8') as f: entries = self.ie._parse_xspf( compat_etree_fromstring(f.read().encode('utf-8')), - xspf_file, xspf_base_url) + xspf_file, xspf_url=xspf_url, xspf_base_url=xspf_url) expect_value(self, entries, expected_entries, None) for i in range(len(entries)): expect_dict(self, entries[i], expected_entries[i]) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index a50778509..2e2a02948 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1706,22 +1706,24 @@ class InfoExtractor(object): }) return subtitles - def _extract_xspf_playlist(self, playlist_url, playlist_id, fatal=True): + def _extract_xspf_playlist(self, xspf_url, playlist_id, fatal=True): xspf = self._download_xml( - playlist_url, playlist_id, 'Downloading xpsf playlist', + xspf_url, playlist_id, 'Downloading xpsf playlist', 'Unable to download xspf manifest', fatal=fatal) if xspf is False: return [] - return self._parse_xspf(xspf, playlist_id, base_url(playlist_url)) + return self._parse_xspf( + xspf, playlist_id, xspf_url=xspf_url, + xspf_base_url=base_url(xspf_url)) - def _parse_xspf(self, playlist, playlist_id, playlist_base_url=''): + def _parse_xspf(self, xspf_doc, playlist_id, xspf_url=None, xspf_base_url=None): NS_MAP = { 'xspf': 'http://xspf.org/ns/0/', 's1': 'http://static.streamone.nl/player/ns/0', } entries = [] - for track in playlist.findall(xpath_with_ns('./xspf:trackList/xspf:track', NS_MAP)): + for track in xspf_doc.findall(xpath_with_ns('./xspf:trackList/xspf:track', NS_MAP)): title = xpath_text( track, xpath_with_ns('./xspf:title', NS_MAP), 'title', default=playlist_id) description = xpath_text( @@ -1731,12 +1733,18 @@ class InfoExtractor(object): duration = float_or_none( xpath_text(track, xpath_with_ns('./xspf:duration', NS_MAP), 'duration'), 1000) - formats = [{ - 'url': urljoin(playlist_base_url, location.text), - 'format_id': location.get(xpath_with_ns('s1:label', NS_MAP)), - 'width': int_or_none(location.get(xpath_with_ns('s1:width', NS_MAP))), - 'height': int_or_none(location.get(xpath_with_ns('s1:height', NS_MAP))), - } for location in track.findall(xpath_with_ns('./xspf:location', NS_MAP))] + formats = [] + for location in track.findall(xpath_with_ns('./xspf:location', NS_MAP)): + format_url = urljoin(xspf_base_url, location.text) + if not format_url: + continue + formats.append({ + 'url': format_url, + 'manifest_url': xspf_url, + 'format_id': location.get(xpath_with_ns('s1:label', NS_MAP)), + 'width': int_or_none(location.get(xpath_with_ns('s1:width', NS_MAP))), + 'height': int_or_none(location.get(xpath_with_ns('s1:height', NS_MAP))), + }) self._sort_formats(formats) entries.append({ @@ -1750,18 +1758,18 @@ class InfoExtractor(object): return entries def _extract_mpd_formats(self, mpd_url, video_id, mpd_id=None, note=None, errnote=None, fatal=True, formats_dict={}): - res = self._download_webpage_handle( + res = self._download_xml_handle( mpd_url, video_id, note=note or 'Downloading MPD manifest', errnote=errnote or 'Failed to download MPD manifest', fatal=fatal) if res is False: return [] - mpd, urlh = res + mpd_doc, urlh = res mpd_base_url = base_url(urlh.geturl()) return self._parse_mpd_formats( - compat_etree_fromstring(mpd.encode('utf-8')), mpd_id, mpd_base_url, + mpd_doc, mpd_id=mpd_id, mpd_base_url=mpd_base_url, formats_dict=formats_dict, mpd_url=mpd_url) def _parse_mpd_formats(self, mpd_doc, mpd_id=None, mpd_base_url='', formats_dict={}, mpd_url=None): @@ -2035,17 +2043,16 @@ class InfoExtractor(object): return formats def _extract_ism_formats(self, ism_url, video_id, ism_id=None, note=None, errnote=None, fatal=True): - res = self._download_webpage_handle( + res = self._download_xml_handle( ism_url, video_id, note=note or 'Downloading ISM manifest', errnote=errnote or 'Failed to download ISM manifest', fatal=fatal) if res is False: return [] - ism, urlh = res + ism_doc, urlh = res - return self._parse_ism_formats( - compat_etree_fromstring(ism.encode('utf-8')), urlh.geturl(), ism_id) + return self._parse_ism_formats(ism_doc, urlh.geturl(), ism_id) def _parse_ism_formats(self, ism_doc, ism_url, ism_id=None): """ diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 023ccbc9b..1cc491b19 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -2233,7 +2233,9 @@ class GenericIE(InfoExtractor): return smil elif doc.tag == '{http://xspf.org/ns/0/}playlist': return self.playlist_result( - self._parse_xspf(doc, video_id, compat_str(full_response.geturl())), + self._parse_xspf( + doc, video_id, xspf_url=url, + xspf_base_url=compat_str(full_response.geturl())), video_id) elif re.match(r'(?i)^(?:{[^}]+})?MPD$', doc.tag): info_dict['formats'] = self._parse_mpd_formats( From 6e3f23d912ae2b7018a13f87ff89572dfac10d02 Mon Sep 17 00:00:00 2001 From: kayb94 <30302445+kayb94@users.noreply.github.com> Date: Sun, 18 Mar 2018 21:14:33 +0000 Subject: [PATCH 05/13] [prosiebensat1] Add support for galileo.tv (closes #15894) --- youtube_dl/extractor/prosiebensat1.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/prosiebensat1.py b/youtube_dl/extractor/prosiebensat1.py index 7efff4566..d0955d079 100644 --- a/youtube_dl/extractor/prosiebensat1.py +++ b/youtube_dl/extractor/prosiebensat1.py @@ -133,7 +133,7 @@ class ProSiebenSat1IE(ProSiebenSat1BaseIE): (?: prosieben(?:maxx)?|sixx|sat1(?:gold)?|kabeleins(?:doku)?|the-voice-of-germany|7tv|advopedia )\.(?:de|at|ch)| - ran\.de|fem\.com|advopedia\.de + ran\.de|fem\.com|advopedia\.de|galileo\.tv/video ) /(?P.+) ''' @@ -326,6 +326,11 @@ class ProSiebenSat1IE(ProSiebenSat1BaseIE): 'url': 'http://www.sat1gold.de/tv/edel-starck/video/11-staffel-1-episode-1-partner-wider-willen-ganze-folge', 'only_matching': True, }, + { + # geo restricted to Germany + 'url': 'https://www.galileo.tv/video/diese-emojis-werden-oft-missverstanden', + 'only_matching': True, + }, { 'url': 'http://www.sat1gold.de/tv/edel-starck/playlist/die-gesamte-1-staffel', 'only_matching': True, @@ -343,7 +348,7 @@ class ProSiebenSat1IE(ProSiebenSat1BaseIE): r'"clip_id"\s*:\s+"(\d+)"', r'clipid: "(\d+)"', r'clip[iI]d=(\d+)', - r'clip[iI]d\s*=\s*["\'](\d+)', + r'clip[iI][dD]\s*=\s*["\'](\d+)', r"'itemImageUrl'\s*:\s*'/dynamic/thumbnails/full/\d+/(\d+)", r'proMamsId"\s*:\s*"(\d+)', r'proMamsId"\s*:\s*"(\d+)', From 9a054fcbbadf06101b081f8be0594b38b654364f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 19 Mar 2018 23:28:37 +0700 Subject: [PATCH 06/13] [ceskatelevize] Add support for iframe embeds (closes #15918) --- youtube_dl/extractor/ceskatelevize.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/ceskatelevize.py b/youtube_dl/extractor/ceskatelevize.py index e250de18c..6bad90859 100644 --- a/youtube_dl/extractor/ceskatelevize.py +++ b/youtube_dl/extractor/ceskatelevize.py @@ -13,6 +13,7 @@ from ..utils import ( float_or_none, sanitized_Request, unescapeHTML, + update_url_query, urlencode_postdata, USER_AGENTS, ) @@ -265,6 +266,10 @@ class CeskaTelevizePoradyIE(InfoExtractor): # m3u8 download 'skip_download': True, }, + }, { + # iframe embed + 'url': 'http://www.ceskatelevize.cz/porady/10614999031-neviditelni/21251212048/', + 'only_matching': True, }] def _real_extract(self, url): @@ -272,8 +277,11 @@ class CeskaTelevizePoradyIE(InfoExtractor): webpage = self._download_webpage(url, video_id) - data_url = unescapeHTML(self._search_regex( - r']*\bdata-url=(["\'])(?P(?:(?!\1).)+)\1', - webpage, 'iframe player url', group='url')) + data_url = update_url_query(unescapeHTML(self._search_regex( + (r']*\bdata-url=(["\'])(?P(?:(?!\1).)+)\1', + r']+\bsrc=(["\'])(?P(?:https?:)?//(?:www\.)?ceskatelevize\.cz/ivysilani/embed/iFramePlayer\.php.*?)\1'), + webpage, 'iframe player url', group='url')), query={ + 'autoStart': 'true', + }) return self.url_result(data_url, ie=CeskaTelevizeIE.ie_key()) From 38f59e2793dfcb5f493977857304ab50b784e6ee Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 19 Mar 2018 23:40:19 +0700 Subject: [PATCH 07/13] [canalc2] Add support for HTML5 videos (closes #15916, closes #15919) --- youtube_dl/extractor/canalc2.py | 28 ++++++++++++++++++---------- 1 file changed, 18 insertions(+), 10 deletions(-) diff --git a/youtube_dl/extractor/canalc2.py b/youtube_dl/extractor/canalc2.py index acd87e371..407cc8084 100644 --- a/youtube_dl/extractor/canalc2.py +++ b/youtube_dl/extractor/canalc2.py @@ -31,6 +31,10 @@ class Canalc2IE(InfoExtractor): webpage = self._download_webpage( 'http://www.canalc2.tv/video/%s' % video_id, video_id) + title = self._html_search_regex( + r'(?s)class="[^"]*col_description[^"]*">.*?

(.+?)

', + webpage, 'title') + formats = [] for _, video_url in re.findall(r'file\s*=\s*(["\'])(.+?)\1', webpage): if video_url.startswith('rtmp://'): @@ -49,17 +53,21 @@ class Canalc2IE(InfoExtractor): 'url': video_url, 'format_id': 'http', }) - self._sort_formats(formats) - title = self._html_search_regex( - r'(?s)class="[^"]*col_description[^"]*">.*?

(.*?)

', webpage, 'title') - duration = parse_duration(self._search_regex( - r'id=["\']video_duree["\'][^>]*>([^<]+)', - webpage, 'duration', fatal=False)) + if formats: + info = { + 'formats': formats, + } + else: + info = self._parse_html5_media_entries(url, webpage, url)[0] - return { + self._sort_formats(info['formats']) + + info.update({ 'id': video_id, 'title': title, - 'duration': duration, - 'formats': formats, - } + 'duration': parse_duration(self._search_regex( + r'id=["\']video_duree["\'][^>]*>([^<]+)', + webpage, 'duration', fatal=False)), + }) + return info From 6780154e6bcdabdb35a24d2b1c5049c94fbe27a1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 19 Mar 2018 23:43:53 +0700 Subject: [PATCH 08/13] [extractor/common] Improve thumbnail extraction for HTML5 entries --- youtube_dl/extractor/common.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 2e2a02948..890232586 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -2150,8 +2150,8 @@ class InfoExtractor(object): return formats def _parse_html5_media_entries(self, base_url, webpage, video_id, m3u8_id=None, m3u8_entry_protocol='m3u8', mpd_id=None, preference=None): - def absolute_url(video_url): - return compat_urlparse.urljoin(base_url, video_url) + def absolute_url(item_url): + return urljoin(base_url, item_url) def parse_content_type(content_type): if not content_type: @@ -2208,7 +2208,7 @@ class InfoExtractor(object): if src: _, formats = _media_formats(src, media_type) media_info['formats'].extend(formats) - media_info['thumbnail'] = media_attributes.get('poster') + media_info['thumbnail'] = absolute_url(media_attributes.get('poster')) if media_content: for source_tag in re.findall(r']+>', media_content): source_attributes = extract_attributes(source_tag) From 21dedcb5804b070bea143e4670df3b6f2951a078 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 20 Mar 2018 00:27:39 +0700 Subject: [PATCH 09/13] [cbs] Skip unavailable assets (closes #13490, closes #13506, closes #15776) --- youtube_dl/extractor/cbs.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/cbs.py b/youtube_dl/extractor/cbs.py index 1268e38ef..f425562ab 100644 --- a/youtube_dl/extractor/cbs.py +++ b/youtube_dl/extractor/cbs.py @@ -2,6 +2,7 @@ from __future__ import unicode_literals from .theplatform import ThePlatformFeedIE from ..utils import ( + ExtractorError, int_or_none, find_xpath_attr, xpath_element, @@ -61,6 +62,7 @@ class CBSIE(CBSBaseIE): asset_types = [] subtitles = {} formats = [] + last_e = None for item in items_data.findall('.//item'): asset_type = xpath_text(item, 'assetType') if not asset_type or asset_type in asset_types: @@ -74,11 +76,17 @@ class CBSIE(CBSBaseIE): query['formats'] = 'MPEG4,M3U' elif asset_type in ('RTMP', 'WIFI', '3G'): query['formats'] = 'MPEG4,FLV' - tp_formats, tp_subtitles = self._extract_theplatform_smil( - update_url_query(tp_release_url, query), content_id, - 'Downloading %s SMIL data' % asset_type) + try: + tp_formats, tp_subtitles = self._extract_theplatform_smil( + update_url_query(tp_release_url, query), content_id, + 'Downloading %s SMIL data' % asset_type) + except ExtractorError as e: + last_e = e + continue formats.extend(tp_formats) subtitles = self._merge_subtitles(subtitles, tp_subtitles) + if last_e and not formats: + raise last_e self._sort_formats(formats) info = self._extract_theplatform_metadata(tp_path, content_id) From 832f9d5258ac53e916515ad0b6b1490c872d6174 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 20 Mar 2018 01:06:58 +0700 Subject: [PATCH 10/13] [9now] Bypass geo restriction (closes #15920) --- youtube_dl/extractor/ninenow.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/ninenow.py b/youtube_dl/extractor/ninenow.py index 351bea7ba..f32f530f7 100644 --- a/youtube_dl/extractor/ninenow.py +++ b/youtube_dl/extractor/ninenow.py @@ -4,15 +4,17 @@ from __future__ import unicode_literals from .common import InfoExtractor from ..compat import compat_str from ..utils import ( + ExtractorError, int_or_none, float_or_none, - ExtractorError, + smuggle_url, ) class NineNowIE(InfoExtractor): IE_NAME = '9now.com.au' _VALID_URL = r'https?://(?:www\.)?9now\.com\.au/(?:[^/]+/){2}(?P[^/?#]+)' + _GEO_COUNTRIES = ['AU'] _TESTS = [{ # clip 'url': 'https://www.9now.com.au/afl-footy-show/2016/clip-ciql02091000g0hp5oktrnytc', @@ -75,7 +77,9 @@ class NineNowIE(InfoExtractor): return { '_type': 'url_transparent', - 'url': self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id, + 'url': smuggle_url( + self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id, + {'geo_countries': self._GEO_COUNTRIES}), 'id': video_id, 'title': title, 'description': common_data.get('description'), From d9e2240f7c5d6b1a8ecd133625827f2c806dc9c1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 20 Mar 2018 01:40:53 +0700 Subject: [PATCH 11/13] [7plus] Extract series metadata (closes #15862, closes #15906) --- youtube_dl/extractor/sevenplus.py | 31 ++++++++++++++++++++++++------- 1 file changed, 24 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/sevenplus.py b/youtube_dl/extractor/sevenplus.py index 9792f820a..84568ac69 100644 --- a/youtube_dl/extractor/sevenplus.py +++ b/youtube_dl/extractor/sevenplus.py @@ -4,22 +4,30 @@ from __future__ import unicode_literals import re from .brightcove import BrightcoveNewIE -from ..utils import update_url_query +from ..compat import compat_str +from ..utils import ( + try_get, + update_url_query, +) class SevenPlusIE(BrightcoveNewIE): IE_NAME = '7plus' _VALID_URL = r'https?://(?:www\.)?7plus\.com\.au/(?P[^?]+\?.*?\bepisode-id=(?P[^&#]+))' _TESTS = [{ - 'url': 'https://7plus.com.au/BEAT?episode-id=BEAT-001', + 'url': 'https://7plus.com.au/MTYS?episode-id=MTYS7-003', 'info_dict': { - 'id': 'BEAT-001', + 'id': 'MTYS7-003', 'ext': 'mp4', - 'title': 'S1 E1 - Help / Lucy In The Sky With Diamonds', - 'description': 'md5:37718bea20a8eedaca7f7361af566131', + 'title': 'S7 E3 - Wind Surf', + 'description': 'md5:29c6a69f21accda7601278f81b46483d', 'uploader_id': '5303576322001', - 'upload_date': '20171031', - 'timestamp': 1509440068, + 'upload_date': '20171201', + 'timestamp': 1512106377, + 'series': 'Mighty Ships', + 'season_number': 7, + 'episode_number': 3, + 'episode': 'Wind Surf', }, 'params': { 'format': 'bestvideo', @@ -63,5 +71,14 @@ class SevenPlusIE(BrightcoveNewIE): value = item.get(src_key) if value: info[dst_key] = value + info['series'] = try_get( + item, lambda x: x['seriesLogo']['name'], compat_str) + mobj = re.search(r'^S(\d+)\s+E(\d+)\s+-\s+(.+)$', info['title']) + if mobj: + info.update({ + 'season_number': int(mobj.group(1)), + 'episode_number': int(mobj.group(2)), + 'episode': mobj.group(3), + }) return info From c651de39d51cddf5ddefb446a89a62a6a424c39c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 20 Mar 2018 01:49:22 +0700 Subject: [PATCH 12/13] [ChangeLog] Actualize [ci skip] --- ChangeLog | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/ChangeLog b/ChangeLog index 47736e076..f3a1ca60d 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,25 @@ +version + +Core +* [extractor/common] Improve thumbnail extraction for HTML5 entries +* Generalize XML manifest processing code and improve XSPF parsing ++ [extractor/common] Add _download_xml_handle ++ [extractor/common] Add support for relative URIs in _parse_xspf (#15794) + +Extractors ++ [7plus] Extract series metadata (#15862, #15906) +* [9now] Bypass geo restriction (#15920) +* [cbs] Skip unavailable assets (#13490, #13506, #15776) ++ [canalc2] Add support for HTML5 videos (#15916, #15919) ++ [ceskatelevize] Add support for iframe embeds (#15918) ++ [prosiebensat1] Add support for galileo.tv (#15894) ++ [generic] Add support for xfileshare embeds (#15879) +* [bilibili] Switch to v2 playurl API +* [bilibili] Fix and improve extraction (#15048, #15430, #15622, #15863) +* [heise] Improve extraction (#15496, #15784, #15026) +* [instagram] Fix user videos extraction (#15858) + + version 2018.03.14 Extractors From a66d1d079a3c2f2791b0a67c97cc9cec8c2faffd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 20 Mar 2018 01:55:48 +0700 Subject: [PATCH 13/13] release 2018.03.20 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- youtube_dl/version.py | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 481e2ed74..75c5b2226 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2018.03.14*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2018.03.14** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2018.03.20*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2018.03.20** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -36,7 +36,7 @@ Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2018.03.14 +[debug] youtube-dl version 2018.03.20 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index f3a1ca60d..0d748316e 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version +version 2018.03.20 Core * [extractor/common] Improve thumbnail extraction for HTML5 entries diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 6ce11c39b..c686714f0 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2018.03.14' +__version__ = '2018.03.20'