diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 481e2ed74..75c5b2226 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2018.03.14*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2018.03.14** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2018.03.20*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2018.03.20** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -36,7 +36,7 @@ Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2018.03.14 +[debug] youtube-dl version 2018.03.20 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index 47736e076..0d748316e 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,25 @@ +version 2018.03.20 + +Core +* [extractor/common] Improve thumbnail extraction for HTML5 entries +* Generalize XML manifest processing code and improve XSPF parsing ++ [extractor/common] Add _download_xml_handle ++ [extractor/common] Add support for relative URIs in _parse_xspf (#15794) + +Extractors ++ [7plus] Extract series metadata (#15862, #15906) +* [9now] Bypass geo restriction (#15920) +* [cbs] Skip unavailable assets (#13490, #13506, #15776) ++ [canalc2] Add support for HTML5 videos (#15916, #15919) ++ [ceskatelevize] Add support for iframe embeds (#15918) ++ [prosiebensat1] Add support for galileo.tv (#15894) ++ [generic] Add support for xfileshare embeds (#15879) +* [bilibili] Switch to v2 playurl API +* [bilibili] Fix and improve extraction (#15048, #15430, #15622, #15863) +* [heise] Improve extraction (#15496, #15784, #15026) +* [instagram] Fix user videos extraction (#15858) + + version 2018.03.14 Extractors diff --git a/test/test_InfoExtractor.py b/test/test_InfoExtractor.py index 7b31d5198..4833396a5 100644 --- a/test/test_InfoExtractor.py +++ b/test/test_InfoExtractor.py @@ -694,6 +694,55 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/ self.ie._sort_formats(formats) expect_value(self, formats, expected_formats, None) + def test_parse_xspf(self): + _TEST_CASES = [ + ( + 'foo_xspf', + 'https://example.org/src/foo_xspf.xspf', + [{ + 'id': 'foo_xspf', + 'title': 'Pandemonium', + 'description': 'Visit http://bigbrother404.bandcamp.com', + 'duration': 202.416, + 'formats': [{ + 'manifest_url': 'https://example.org/src/foo_xspf.xspf', + 'url': 'https://example.org/src/cd1/track%201.mp3', + }], + }, { + 'id': 'foo_xspf', + 'title': 'Final Cartridge (Nichico Twelve Remix)', + 'description': 'Visit http://bigbrother404.bandcamp.com', + 'duration': 255.857, + 'formats': [{ + 'manifest_url': 'https://example.org/src/foo_xspf.xspf', + 'url': 'https://example.org/%E3%83%88%E3%83%A9%E3%83%83%E3%82%AF%E3%80%80%EF%BC%92.mp3', + }], + }, { + 'id': 'foo_xspf', + 'title': 'Rebuilding Nightingale', + 'description': 'Visit http://bigbrother404.bandcamp.com', + 'duration': 287.915, + 'formats': [{ + 'manifest_url': 'https://example.org/src/foo_xspf.xspf', + 'url': 'https://example.org/src/track3.mp3', + }, { + 'manifest_url': 'https://example.org/src/foo_xspf.xspf', + 'url': 'https://example.com/track3.mp3', + }] + }] + ), + ] + + for xspf_file, xspf_url, expected_entries in _TEST_CASES: + with io.open('./test/testdata/xspf/%s.xspf' % xspf_file, + mode='r', encoding='utf-8') as f: + entries = self.ie._parse_xspf( + compat_etree_fromstring(f.read().encode('utf-8')), + xspf_file, xspf_url=xspf_url, xspf_base_url=xspf_url) + expect_value(self, entries, expected_entries, None) + for i in range(len(entries)): + expect_dict(self, entries[i], expected_entries[i]) + if __name__ == '__main__': unittest.main() diff --git a/test/testdata/xspf/foo_xspf.xspf b/test/testdata/xspf/foo_xspf.xspf new file mode 100644 index 000000000..b7f0086b3 --- /dev/null +++ b/test/testdata/xspf/foo_xspf.xspf @@ -0,0 +1,34 @@ + + + 2018-03-09T18:01:43Z + + + cd1/track%201.mp3 + Pandemonium + Foilverb + Visit http://bigbrother404.bandcamp.com + Pandemonium EP + 1 + 202416 + + + ../%E3%83%88%E3%83%A9%E3%83%83%E3%82%AF%E3%80%80%EF%BC%92.mp3 + Final Cartridge (Nichico Twelve Remix) + Visit http://bigbrother404.bandcamp.com + Foilverb + Pandemonium EP + 2 + 255857 + + + track3.mp3 + https://example.com/track3.mp3 + Rebuilding Nightingale + Visit http://bigbrother404.bandcamp.com + Foilverb + Pandemonium EP + 3 + 287915 + + + diff --git a/youtube_dl/extractor/canalc2.py b/youtube_dl/extractor/canalc2.py index acd87e371..407cc8084 100644 --- a/youtube_dl/extractor/canalc2.py +++ b/youtube_dl/extractor/canalc2.py @@ -31,6 +31,10 @@ class Canalc2IE(InfoExtractor): webpage = self._download_webpage( 'http://www.canalc2.tv/video/%s' % video_id, video_id) + title = self._html_search_regex( + r'(?s)class="[^"]*col_description[^"]*">.*?

(.+?)

', + webpage, 'title') + formats = [] for _, video_url in re.findall(r'file\s*=\s*(["\'])(.+?)\1', webpage): if video_url.startswith('rtmp://'): @@ -49,17 +53,21 @@ class Canalc2IE(InfoExtractor): 'url': video_url, 'format_id': 'http', }) - self._sort_formats(formats) - title = self._html_search_regex( - r'(?s)class="[^"]*col_description[^"]*">.*?

(.*?)

', webpage, 'title') - duration = parse_duration(self._search_regex( - r'id=["\']video_duree["\'][^>]*>([^<]+)', - webpage, 'duration', fatal=False)) + if formats: + info = { + 'formats': formats, + } + else: + info = self._parse_html5_media_entries(url, webpage, url)[0] - return { + self._sort_formats(info['formats']) + + info.update({ 'id': video_id, 'title': title, - 'duration': duration, - 'formats': formats, - } + 'duration': parse_duration(self._search_regex( + r'id=["\']video_duree["\'][^>]*>([^<]+)', + webpage, 'duration', fatal=False)), + }) + return info diff --git a/youtube_dl/extractor/cbs.py b/youtube_dl/extractor/cbs.py index 1268e38ef..f425562ab 100644 --- a/youtube_dl/extractor/cbs.py +++ b/youtube_dl/extractor/cbs.py @@ -2,6 +2,7 @@ from __future__ import unicode_literals from .theplatform import ThePlatformFeedIE from ..utils import ( + ExtractorError, int_or_none, find_xpath_attr, xpath_element, @@ -61,6 +62,7 @@ class CBSIE(CBSBaseIE): asset_types = [] subtitles = {} formats = [] + last_e = None for item in items_data.findall('.//item'): asset_type = xpath_text(item, 'assetType') if not asset_type or asset_type in asset_types: @@ -74,11 +76,17 @@ class CBSIE(CBSBaseIE): query['formats'] = 'MPEG4,M3U' elif asset_type in ('RTMP', 'WIFI', '3G'): query['formats'] = 'MPEG4,FLV' - tp_formats, tp_subtitles = self._extract_theplatform_smil( - update_url_query(tp_release_url, query), content_id, - 'Downloading %s SMIL data' % asset_type) + try: + tp_formats, tp_subtitles = self._extract_theplatform_smil( + update_url_query(tp_release_url, query), content_id, + 'Downloading %s SMIL data' % asset_type) + except ExtractorError as e: + last_e = e + continue formats.extend(tp_formats) subtitles = self._merge_subtitles(subtitles, tp_subtitles) + if last_e and not formats: + raise last_e self._sort_formats(formats) info = self._extract_theplatform_metadata(tp_path, content_id) diff --git a/youtube_dl/extractor/ceskatelevize.py b/youtube_dl/extractor/ceskatelevize.py index e250de18c..6bad90859 100644 --- a/youtube_dl/extractor/ceskatelevize.py +++ b/youtube_dl/extractor/ceskatelevize.py @@ -13,6 +13,7 @@ from ..utils import ( float_or_none, sanitized_Request, unescapeHTML, + update_url_query, urlencode_postdata, USER_AGENTS, ) @@ -265,6 +266,10 @@ class CeskaTelevizePoradyIE(InfoExtractor): # m3u8 download 'skip_download': True, }, + }, { + # iframe embed + 'url': 'http://www.ceskatelevize.cz/porady/10614999031-neviditelni/21251212048/', + 'only_matching': True, }] def _real_extract(self, url): @@ -272,8 +277,11 @@ class CeskaTelevizePoradyIE(InfoExtractor): webpage = self._download_webpage(url, video_id) - data_url = unescapeHTML(self._search_regex( - r']*\bdata-url=(["\'])(?P(?:(?!\1).)+)\1', - webpage, 'iframe player url', group='url')) + data_url = update_url_query(unescapeHTML(self._search_regex( + (r']*\bdata-url=(["\'])(?P(?:(?!\1).)+)\1', + r']+\bsrc=(["\'])(?P(?:https?:)?//(?:www\.)?ceskatelevize\.cz/ivysilani/embed/iFramePlayer\.php.*?)\1'), + webpage, 'iframe player url', group='url')), query={ + 'autoStart': 'true', + }) return self.url_result(data_url, ie=CeskaTelevizeIE.ie_key()) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index fcdd0fd14..890232586 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -644,19 +644,31 @@ class InfoExtractor(object): content, _ = res return content + def _download_xml_handle( + self, url_or_request, video_id, note='Downloading XML', + errnote='Unable to download XML', transform_source=None, + fatal=True, encoding=None, data=None, headers={}, query={}): + """Return a tuple (xml as an xml.etree.ElementTree.Element, URL handle)""" + res = self._download_webpage_handle( + url_or_request, video_id, note, errnote, fatal=fatal, + encoding=encoding, data=data, headers=headers, query=query) + if res is False: + return res + xml_string, urlh = res + return self._parse_xml( + xml_string, video_id, transform_source=transform_source, + fatal=fatal), urlh + def _download_xml(self, url_or_request, video_id, note='Downloading XML', errnote='Unable to download XML', transform_source=None, fatal=True, encoding=None, data=None, headers={}, query={}): """Return the xml as an xml.etree.ElementTree.Element""" - xml_string = self._download_webpage( - url_or_request, video_id, note, errnote, fatal=fatal, - encoding=encoding, data=data, headers=headers, query=query) - if xml_string is False: - return xml_string - return self._parse_xml( - xml_string, video_id, transform_source=transform_source, - fatal=fatal) + res = self._download_xml_handle( + url_or_request, video_id, note=note, errnote=errnote, + transform_source=transform_source, fatal=fatal, encoding=encoding, + data=data, headers=headers, query=query) + return res if res is False else res[0] def _parse_xml(self, xml_string, video_id, transform_source=None, fatal=True): if transform_source: @@ -1694,22 +1706,24 @@ class InfoExtractor(object): }) return subtitles - def _extract_xspf_playlist(self, playlist_url, playlist_id, fatal=True): + def _extract_xspf_playlist(self, xspf_url, playlist_id, fatal=True): xspf = self._download_xml( - playlist_url, playlist_id, 'Downloading xpsf playlist', + xspf_url, playlist_id, 'Downloading xpsf playlist', 'Unable to download xspf manifest', fatal=fatal) if xspf is False: return [] - return self._parse_xspf(xspf, playlist_id) + return self._parse_xspf( + xspf, playlist_id, xspf_url=xspf_url, + xspf_base_url=base_url(xspf_url)) - def _parse_xspf(self, playlist, playlist_id): + def _parse_xspf(self, xspf_doc, playlist_id, xspf_url=None, xspf_base_url=None): NS_MAP = { 'xspf': 'http://xspf.org/ns/0/', 's1': 'http://static.streamone.nl/player/ns/0', } entries = [] - for track in playlist.findall(xpath_with_ns('./xspf:trackList/xspf:track', NS_MAP)): + for track in xspf_doc.findall(xpath_with_ns('./xspf:trackList/xspf:track', NS_MAP)): title = xpath_text( track, xpath_with_ns('./xspf:title', NS_MAP), 'title', default=playlist_id) description = xpath_text( @@ -1719,12 +1733,18 @@ class InfoExtractor(object): duration = float_or_none( xpath_text(track, xpath_with_ns('./xspf:duration', NS_MAP), 'duration'), 1000) - formats = [{ - 'url': location.text, - 'format_id': location.get(xpath_with_ns('s1:label', NS_MAP)), - 'width': int_or_none(location.get(xpath_with_ns('s1:width', NS_MAP))), - 'height': int_or_none(location.get(xpath_with_ns('s1:height', NS_MAP))), - } for location in track.findall(xpath_with_ns('./xspf:location', NS_MAP))] + formats = [] + for location in track.findall(xpath_with_ns('./xspf:location', NS_MAP)): + format_url = urljoin(xspf_base_url, location.text) + if not format_url: + continue + formats.append({ + 'url': format_url, + 'manifest_url': xspf_url, + 'format_id': location.get(xpath_with_ns('s1:label', NS_MAP)), + 'width': int_or_none(location.get(xpath_with_ns('s1:width', NS_MAP))), + 'height': int_or_none(location.get(xpath_with_ns('s1:height', NS_MAP))), + }) self._sort_formats(formats) entries.append({ @@ -1738,18 +1758,18 @@ class InfoExtractor(object): return entries def _extract_mpd_formats(self, mpd_url, video_id, mpd_id=None, note=None, errnote=None, fatal=True, formats_dict={}): - res = self._download_webpage_handle( + res = self._download_xml_handle( mpd_url, video_id, note=note or 'Downloading MPD manifest', errnote=errnote or 'Failed to download MPD manifest', fatal=fatal) if res is False: return [] - mpd, urlh = res + mpd_doc, urlh = res mpd_base_url = base_url(urlh.geturl()) return self._parse_mpd_formats( - compat_etree_fromstring(mpd.encode('utf-8')), mpd_id, mpd_base_url, + mpd_doc, mpd_id=mpd_id, mpd_base_url=mpd_base_url, formats_dict=formats_dict, mpd_url=mpd_url) def _parse_mpd_formats(self, mpd_doc, mpd_id=None, mpd_base_url='', formats_dict={}, mpd_url=None): @@ -2023,17 +2043,16 @@ class InfoExtractor(object): return formats def _extract_ism_formats(self, ism_url, video_id, ism_id=None, note=None, errnote=None, fatal=True): - res = self._download_webpage_handle( + res = self._download_xml_handle( ism_url, video_id, note=note or 'Downloading ISM manifest', errnote=errnote or 'Failed to download ISM manifest', fatal=fatal) if res is False: return [] - ism, urlh = res + ism_doc, urlh = res - return self._parse_ism_formats( - compat_etree_fromstring(ism.encode('utf-8')), urlh.geturl(), ism_id) + return self._parse_ism_formats(ism_doc, urlh.geturl(), ism_id) def _parse_ism_formats(self, ism_doc, ism_url, ism_id=None): """ @@ -2131,8 +2150,8 @@ class InfoExtractor(object): return formats def _parse_html5_media_entries(self, base_url, webpage, video_id, m3u8_id=None, m3u8_entry_protocol='m3u8', mpd_id=None, preference=None): - def absolute_url(video_url): - return compat_urlparse.urljoin(base_url, video_url) + def absolute_url(item_url): + return urljoin(base_url, item_url) def parse_content_type(content_type): if not content_type: @@ -2189,7 +2208,7 @@ class InfoExtractor(object): if src: _, formats = _media_formats(src, media_type) media_info['formats'].extend(formats) - media_info['thumbnail'] = media_attributes.get('poster') + media_info['thumbnail'] = absolute_url(media_attributes.get('poster')) if media_content: for source_tag in re.findall(r']+>', media_content): source_attributes = extract_attributes(source_tag) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index a98f3636a..1cc491b19 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -104,6 +104,7 @@ from .mediasite import MediasiteIE from .springboardplatform import SpringboardPlatformIE from .yapfiles import YapFilesIE from .vice import ViceIE +from .xfileshare import XFileShareIE class GenericIE(InfoExtractor): @@ -2231,7 +2232,11 @@ class GenericIE(InfoExtractor): self._sort_formats(smil['formats']) return smil elif doc.tag == '{http://xspf.org/ns/0/}playlist': - return self.playlist_result(self._parse_xspf(doc, video_id), video_id) + return self.playlist_result( + self._parse_xspf( + doc, video_id, xspf_url=url, + xspf_base_url=compat_str(full_response.geturl())), + video_id) elif re.match(r'(?i)^(?:{[^}]+})?MPD$', doc.tag): info_dict['formats'] = self._parse_mpd_formats( doc, @@ -2971,6 +2976,11 @@ class GenericIE(InfoExtractor): return self.playlist_from_matches( vice_urls, video_id, video_title, ie=ViceIE.ie_key()) + xfileshare_urls = XFileShareIE._extract_urls(webpage) + if xfileshare_urls: + return self.playlist_from_matches( + xfileshare_urls, video_id, video_title, ie=XFileShareIE.ie_key()) + def merge_dicts(dict1, dict2): merged = {} for k, v in dict1.items(): diff --git a/youtube_dl/extractor/ninenow.py b/youtube_dl/extractor/ninenow.py index 351bea7ba..f32f530f7 100644 --- a/youtube_dl/extractor/ninenow.py +++ b/youtube_dl/extractor/ninenow.py @@ -4,15 +4,17 @@ from __future__ import unicode_literals from .common import InfoExtractor from ..compat import compat_str from ..utils import ( + ExtractorError, int_or_none, float_or_none, - ExtractorError, + smuggle_url, ) class NineNowIE(InfoExtractor): IE_NAME = '9now.com.au' _VALID_URL = r'https?://(?:www\.)?9now\.com\.au/(?:[^/]+/){2}(?P[^/?#]+)' + _GEO_COUNTRIES = ['AU'] _TESTS = [{ # clip 'url': 'https://www.9now.com.au/afl-footy-show/2016/clip-ciql02091000g0hp5oktrnytc', @@ -75,7 +77,9 @@ class NineNowIE(InfoExtractor): return { '_type': 'url_transparent', - 'url': self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id, + 'url': smuggle_url( + self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id, + {'geo_countries': self._GEO_COUNTRIES}), 'id': video_id, 'title': title, 'description': common_data.get('description'), diff --git a/youtube_dl/extractor/prosiebensat1.py b/youtube_dl/extractor/prosiebensat1.py index 7efff4566..d0955d079 100644 --- a/youtube_dl/extractor/prosiebensat1.py +++ b/youtube_dl/extractor/prosiebensat1.py @@ -133,7 +133,7 @@ class ProSiebenSat1IE(ProSiebenSat1BaseIE): (?: prosieben(?:maxx)?|sixx|sat1(?:gold)?|kabeleins(?:doku)?|the-voice-of-germany|7tv|advopedia )\.(?:de|at|ch)| - ran\.de|fem\.com|advopedia\.de + ran\.de|fem\.com|advopedia\.de|galileo\.tv/video ) /(?P.+) ''' @@ -326,6 +326,11 @@ class ProSiebenSat1IE(ProSiebenSat1BaseIE): 'url': 'http://www.sat1gold.de/tv/edel-starck/video/11-staffel-1-episode-1-partner-wider-willen-ganze-folge', 'only_matching': True, }, + { + # geo restricted to Germany + 'url': 'https://www.galileo.tv/video/diese-emojis-werden-oft-missverstanden', + 'only_matching': True, + }, { 'url': 'http://www.sat1gold.de/tv/edel-starck/playlist/die-gesamte-1-staffel', 'only_matching': True, @@ -343,7 +348,7 @@ class ProSiebenSat1IE(ProSiebenSat1BaseIE): r'"clip_id"\s*:\s+"(\d+)"', r'clipid: "(\d+)"', r'clip[iI]d=(\d+)', - r'clip[iI]d\s*=\s*["\'](\d+)', + r'clip[iI][dD]\s*=\s*["\'](\d+)', r"'itemImageUrl'\s*:\s*'/dynamic/thumbnails/full/\d+/(\d+)", r'proMamsId"\s*:\s*"(\d+)', r'proMamsId"\s*:\s*"(\d+)', diff --git a/youtube_dl/extractor/sevenplus.py b/youtube_dl/extractor/sevenplus.py index 9792f820a..84568ac69 100644 --- a/youtube_dl/extractor/sevenplus.py +++ b/youtube_dl/extractor/sevenplus.py @@ -4,22 +4,30 @@ from __future__ import unicode_literals import re from .brightcove import BrightcoveNewIE -from ..utils import update_url_query +from ..compat import compat_str +from ..utils import ( + try_get, + update_url_query, +) class SevenPlusIE(BrightcoveNewIE): IE_NAME = '7plus' _VALID_URL = r'https?://(?:www\.)?7plus\.com\.au/(?P[^?]+\?.*?\bepisode-id=(?P[^&#]+))' _TESTS = [{ - 'url': 'https://7plus.com.au/BEAT?episode-id=BEAT-001', + 'url': 'https://7plus.com.au/MTYS?episode-id=MTYS7-003', 'info_dict': { - 'id': 'BEAT-001', + 'id': 'MTYS7-003', 'ext': 'mp4', - 'title': 'S1 E1 - Help / Lucy In The Sky With Diamonds', - 'description': 'md5:37718bea20a8eedaca7f7361af566131', + 'title': 'S7 E3 - Wind Surf', + 'description': 'md5:29c6a69f21accda7601278f81b46483d', 'uploader_id': '5303576322001', - 'upload_date': '20171031', - 'timestamp': 1509440068, + 'upload_date': '20171201', + 'timestamp': 1512106377, + 'series': 'Mighty Ships', + 'season_number': 7, + 'episode_number': 3, + 'episode': 'Wind Surf', }, 'params': { 'format': 'bestvideo', @@ -63,5 +71,14 @@ class SevenPlusIE(BrightcoveNewIE): value = item.get(src_key) if value: info[dst_key] = value + info['series'] = try_get( + item, lambda x: x['seriesLogo']['name'], compat_str) + mobj = re.search(r'^S(\d+)\s+E(\d+)\s+-\s+(.+)$', info['title']) + if mobj: + info.update({ + 'season_number': int(mobj.group(1)), + 'episode_number': int(mobj.group(2)), + 'episode': mobj.group(3), + }) return info diff --git a/youtube_dl/extractor/xfileshare.py b/youtube_dl/extractor/xfileshare.py index ad747978d..bc3239f68 100644 --- a/youtube_dl/extractor/xfileshare.py +++ b/youtube_dl/extractor/xfileshare.py @@ -118,6 +118,15 @@ class XFileShareIE(InfoExtractor): 'only_matching': True }] + @staticmethod + def _extract_urls(webpage): + return [ + mobj.group('url') + for mobj in re.finditer( + r']+\bsrc=(["\'])(?P(?:https?:)?//(?:%s)/embed-[0-9a-zA-Z]+.*?)\1' + % '|'.join(site for site in list(zip(*XFileShareIE._SITES))[0]), + webpage)] + def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) video_id = mobj.group('id') diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 6ce11c39b..c686714f0 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2018.03.14' +__version__ = '2018.03.20'