diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 128ba2fc0..bf9494646 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.04.01*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.04.01** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.04.06*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.04.06** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2016.04.01 +[debug] youtube-dl version 2016.04.06 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/Makefile b/Makefile index 3a6c37944..ba7f7ed36 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,4 @@ -all: youtube-dl README.md CONTRIBUTING.md ISSUE_TEMPLATE.md README.txt youtube-dl.1 youtube-dl.bash-completion youtube-dl.zsh youtube-dl.fish supportedsites +all: youtube-dl README.md CONTRIBUTING.md README.txt youtube-dl.1 youtube-dl.bash-completion youtube-dl.zsh youtube-dl.fish supportedsites clean: rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish *.dump *.part *.info.json *.mp4 *.flv *.mp3 *.avi CONTRIBUTING.md.tmp ISSUE_TEMPLATE.md.tmp youtube-dl youtube-dl.exe @@ -59,7 +59,7 @@ README.md: youtube_dl/*.py youtube_dl/*/*.py CONTRIBUTING.md: README.md $(PYTHON) devscripts/make_contributing.py README.md CONTRIBUTING.md -ISSUE_TEMPLATE.md: +.github/ISSUE_TEMPLATE.md: devscripts/make_issue_template.py .github/ISSUE_TEMPLATE_tmpl.md youtube_dl/version.py $(PYTHON) devscripts/make_issue_template.py .github/ISSUE_TEMPLATE_tmpl.md .github/ISSUE_TEMPLATE.md supportedsites: diff --git a/devscripts/release.sh b/devscripts/release.sh index 6718ce39b..8dea55dbb 100755 --- a/devscripts/release.sh +++ b/devscripts/release.sh @@ -46,7 +46,7 @@ fi sed -i "s/__version__ = '.*'/__version__ = '$version'/" youtube_dl/version.py /bin/echo -e "\n### Committing documentation, templates and youtube_dl/version.py..." -make README.md CONTRIBUTING.md ISSUE_TEMPLATE.md supportedsites +make README.md CONTRIBUTING.md .github/ISSUE_TEMPLATE.md supportedsites git add README.md CONTRIBUTING.md .github/ISSUE_TEMPLATE.md docs/supportedsites.md youtube_dl/version.py git commit -m "release $version" diff --git a/docs/supportedsites.md b/docs/supportedsites.md index b9bcf450c..d6ee8476b 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -57,6 +57,7 @@ - **AudioBoom** - **audiomack** - **audiomack:album** + - **auroravid**: AuroraVid - **Azubu** - **AzubuLive** - **BaiduVideo**: 百度视频 @@ -92,12 +93,14 @@ - **BYUtv** - **Camdemy** - **CamdemyFolder** + - **CamWithHer** - **canalc2.tv** - **Canalplus**: canalplus.fr, piwiplus.fr and d8.tv - **Canvas** - **CBC** - **CBCPlayer** - **CBS** + - **CBSInteractive** - **CBSNews**: CBS News - **CBSNewsLiveVideo**: CBS News Live Videos - **CBSSports** @@ -119,7 +122,6 @@ - **Clyp** - **cmt.com** - **CNBC** - - **CNET** - **CNN** - **CNNArticle** - **CNNBlogs** @@ -419,7 +421,6 @@ - **Normalboots** - **NosVideo** - **Nova**: TN.cz, Prásk.tv, Nova.cz, Novaplus.cz, FANDA.tv, Krásná.cz and Doma.cz - - **novamov**: NovaMov - **nowness** - **nowness:playlist** - **nowness:series** diff --git a/test/test_compat.py b/test/test_compat.py index cc105807a..618668210 100644 --- a/test/test_compat.py +++ b/test/test_compat.py @@ -76,6 +76,10 @@ class TestCompat(unittest.TestCase): self.assertEqual(compat_urllib_parse_urlencode({'abc': b'def'}), 'abc=def') self.assertEqual(compat_urllib_parse_urlencode({b'abc': 'def'}), 'abc=def') self.assertEqual(compat_urllib_parse_urlencode({b'abc': b'def'}), 'abc=def') + self.assertEqual(compat_urllib_parse_urlencode([('abc', 'def')]), 'abc=def') + self.assertEqual(compat_urllib_parse_urlencode([('abc', b'def')]), 'abc=def') + self.assertEqual(compat_urllib_parse_urlencode([(b'abc', 'def')]), 'abc=def') + self.assertEqual(compat_urllib_parse_urlencode([(b'abc', b'def')]), 'abc=def') def test_compat_shlex_split(self): self.assertEqual(compat_shlex_split('-option "one two"'), ['-option', 'one two']) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index d7aa951ff..cd0805303 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -1240,7 +1240,10 @@ class YoutubeDL(object): self.list_thumbnails(info_dict) return - if thumbnails and 'thumbnail' not in info_dict: + thumbnail = info_dict.get('thumbnail') + if thumbnail: + info_dict['thumbnail'] = sanitize_url(thumbnail) + elif thumbnails: info_dict['thumbnail'] = thumbnails[-1]['url'] if 'display_id' not in info_dict and 'id' in info_dict: diff --git a/youtube_dl/compat.py b/youtube_dl/compat.py index 76b6b0e38..0b6c5ca7a 100644 --- a/youtube_dl/compat.py +++ b/youtube_dl/compat.py @@ -181,7 +181,8 @@ except ImportError: # Python 2 if isinstance(e, dict): e = encode_dict(e) elif isinstance(e, (list, tuple,)): - e = encode_list(e) + list_e = encode_list(e) + e = tuple(list_e) if isinstance(e, tuple) else list_e elif isinstance(e, compat_str): e = e.encode(encoding) return e diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index ebf5ca3e9..c3121d83c 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -496,11 +496,11 @@ from .normalboots import NormalbootsIE from .nosvideo import NosVideoIE from .nova import NovaIE from .novamov import ( - NovaMovIE, - WholeCloudIE, + AuroraVidIE, + CloudTimeIE, NowVideoIE, VideoWeedIE, - CloudTimeIE, + WholeCloudIE, ) from .nowness import ( NownessIE, diff --git a/youtube_dl/extractor/acast.py b/youtube_dl/extractor/acast.py index 92eee8119..94ce88c83 100644 --- a/youtube_dl/extractor/acast.py +++ b/youtube_dl/extractor/acast.py @@ -2,10 +2,14 @@ from __future__ import unicode_literals import re +import functools from .common import InfoExtractor from ..compat import compat_str -from ..utils import int_or_none +from ..utils import ( + int_or_none, + OnDemandPagedList, +) class ACastIE(InfoExtractor): @@ -26,13 +30,8 @@ class ACastIE(InfoExtractor): def _real_extract(self, url): channel, display_id = re.match(self._VALID_URL, url).groups() - - embed_page = self._download_webpage( - re.sub('(?:www\.)?acast\.com', 'embedcdn.acast.com', url), display_id) - cast_data = self._parse_json(self._search_regex( - r'window\[\'acast/queries\'\]\s*=\s*([^;]+);', embed_page, 'acast data'), - display_id)['GetAcast/%s/%s' % (channel, display_id)] - + cast_data = self._download_json( + 'https://embed.acast.com/api/acasts/%s/%s' % (channel, display_id), display_id) return { 'id': compat_str(cast_data['id']), 'display_id': display_id, @@ -58,15 +57,26 @@ class ACastChannelIE(InfoExtractor): 'playlist_mincount': 20, } _API_BASE_URL = 'https://www.acast.com/api/' + _PAGE_SIZE = 10 @classmethod def suitable(cls, url): return False if ACastIE.suitable(url) else super(ACastChannelIE, cls).suitable(url) - def _real_extract(self, url): - display_id = self._match_id(url) - channel_data = self._download_json(self._API_BASE_URL + 'channels/%s' % display_id, display_id) - casts = self._download_json(self._API_BASE_URL + 'channels/%s/acasts' % display_id, display_id) - entries = [self.url_result('https://www.acast.com/%s/%s' % (display_id, cast['url']), 'ACast') for cast in casts] + def _fetch_page(self, channel_slug, page): + casts = self._download_json( + self._API_BASE_URL + 'channels/%s/acasts?page=%s' % (channel_slug, page), + channel_slug, note='Download page %d of channel data' % page) + for cast in casts: + yield self.url_result( + 'https://www.acast.com/%s/%s' % (channel_slug, cast['url']), + 'ACast', cast['id']) - return self.playlist_result(entries, compat_str(channel_data['id']), channel_data['name'], channel_data.get('description')) + def _real_extract(self, url): + channel_slug = self._match_id(url) + channel_data = self._download_json( + self._API_BASE_URL + 'channels/%s' % channel_slug, channel_slug) + entries = OnDemandPagedList(functools.partial( + self._fetch_page, channel_slug), self._PAGE_SIZE) + return self.playlist_result(entries, compat_str( + channel_data['id']), channel_data['name'], channel_data.get('description')) diff --git a/youtube_dl/extractor/beeg.py b/youtube_dl/extractor/beeg.py index 9072949dd..956c7680e 100644 --- a/youtube_dl/extractor/beeg.py +++ b/youtube_dl/extractor/beeg.py @@ -33,8 +33,33 @@ class BeegIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + cpl_url = self._search_regex( + r']+src=(["\'])(?P(?:https?:)?//static\.beeg\.com/cpl/\d+\.js.*?)\1', + webpage, 'cpl', default=None, group='url') + + beeg_version, beeg_salt = [None] * 2 + + if cpl_url: + cpl = self._download_webpage( + self._proto_relative_url(cpl_url), video_id, + 'Downloading cpl JS', fatal=False) + if cpl: + beeg_version = self._search_regex( + r'beeg_version\s*=\s*(\d+)', cpl, + 'beeg version', default=None) or self._search_regex( + r'/(\d+)\.js', cpl_url, 'beeg version', default=None) + beeg_salt = self._search_regex( + r'beeg_salt\s*=\s*(["\'])(?P.+?)\1', cpl, 'beeg beeg_salt', + default=None, group='beeg_salt') + + beeg_version = beeg_version or '1750' + beeg_salt = beeg_salt or 'MIDtGaw96f0N1kMMAM1DE46EC9pmFr' + video = self._download_json( - 'https://api.beeg.com/api/v6/1738/video/%s' % video_id, video_id) + 'http://api.beeg.com/api/v6/%s/video/%s' % (beeg_version, video_id), + video_id) def split(o, e): def cut(s, x): @@ -51,7 +76,7 @@ class BeegIE(InfoExtractor): def decrypt_key(key): # Reverse engineered from http://static.beeg.com/cpl/1738.js - a = 'GUuyodcfS8FW8gQp4OKLMsZBcX0T7B' + a = beeg_salt e = compat_urllib_parse_unquote(key) o = ''.join([ compat_chr(compat_ord(e[n]) - compat_ord(a[n % len(a)]) % 21) @@ -101,5 +126,5 @@ class BeegIE(InfoExtractor): 'duration': duration, 'tags': tags, 'formats': formats, - 'age_limit': 18, + 'age_limit': self._rta_search(webpage), } diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index ec6625eea..2b40f3b7c 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -232,6 +232,24 @@ class InfoExtractor(object): episode_number: Number of the video episode within a season, as an integer. episode_id: Id of the video episode, as a unicode string. + The following fields should only be used when the media is a track or a part of + a music album: + + track: Title of the track. + track_number: Number of the track within an album or a disc, as an integer. + track_id: Id of the track (useful in case of custom indexing, e.g. 6.iii), + as a unicode string. + artist: Artist(s) of the track. + genre: Genre(s) of the track. + album: Title of the album the track belongs to. + album_type: Type of the album (e.g. "Demo", "Full-length", "Split", "Compilation", etc). + album_artist: List of all artists appeared on the album (e.g. + "Ash Borer / Fell Voices" or "Various Artists", useful for splits + and compilations). + disc_number: Number of the disc or other physical medium the track belongs to, + as an integer. + release_year: Year (YYYY) when the album was released. + Unless mentioned otherwise, the fields should be Unicode strings. Unless mentioned otherwise, None is equivalent to absence of information. diff --git a/youtube_dl/extractor/deezer.py b/youtube_dl/extractor/deezer.py index c3205ff5f..7a07f3267 100644 --- a/youtube_dl/extractor/deezer.py +++ b/youtube_dl/extractor/deezer.py @@ -41,7 +41,9 @@ class DeezerPlaylistIE(InfoExtractor): 'Deezer said: %s' % geoblocking_msg, expected=True) data_json = self._search_regex( - r'naboo\.display\(\'[^\']+\',\s*(.*?)\);\n', webpage, 'data JSON') + (r'__DZR_APP_STATE__\s*=\s*({.+?})\s*', + r'naboo\.display\(\'[^\']+\',\s*(.*?)\);\n'), + webpage, 'data JSON') data = json.loads(data_json) playlist_title = data.get('DATA', {}).get('TITLE') diff --git a/youtube_dl/extractor/democracynow.py b/youtube_dl/extractor/democracynow.py index 6cd395e11..65a98d789 100644 --- a/youtube_dl/extractor/democracynow.py +++ b/youtube_dl/extractor/democracynow.py @@ -17,37 +17,53 @@ class DemocracynowIE(InfoExtractor): IE_NAME = 'democracynow' _TESTS = [{ 'url': 'http://www.democracynow.org/shows/2015/7/3', - 'md5': 'fbb8fe3d7a56a5e12431ce2f9b2fab0d', + 'md5': '3757c182d3d84da68f5c8f506c18c196', 'info_dict': { 'id': '2015-0703-001', 'ext': 'mp4', - 'title': 'July 03, 2015 - Democracy Now!', - 'description': 'A daily independent global news hour with Amy Goodman & Juan González "What to the Slave is 4th of July?": James Earl Jones Reads Frederick Douglass\u2019 Historic Speech : "This Flag Comes Down Today": Bree Newsome Scales SC Capitol Flagpole, Takes Down Confederate Flag : "We Shall Overcome": Remembering Folk Icon, Activist Pete Seeger in His Own Words & Songs', + 'title': 'Daily Show', }, }, { 'url': 'http://www.democracynow.org/2015/7/3/this_flag_comes_down_today_bree', - 'md5': 'fbb8fe3d7a56a5e12431ce2f9b2fab0d', 'info_dict': { 'id': '2015-0703-001', 'ext': 'mp4', 'title': '"This Flag Comes Down Today": Bree Newsome Scales SC Capitol Flagpole, Takes Down Confederate Flag', 'description': 'md5:4d2bc4f0d29f5553c2210a4bc7761a21', }, + 'params': { + 'skip_download': True, + }, }] def _real_extract(self, url): display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) - description = self._og_search_description(webpage) json_data = self._parse_json(self._search_regex( r']+type="text/json"[^>]*>\s*({[^>]+})', webpage, 'json'), display_id) - video_id = None + + title = json_data['title'] formats = [] - default_lang = 'en' + video_id = None + for key in ('file', 'audio', 'video', 'high_res_video'): + media_url = json_data.get(key, '') + if not media_url: + continue + media_url = re.sub(r'\?.*', '', compat_urlparse.urljoin(url, media_url)) + video_id = video_id or remove_start(os.path.splitext(url_basename(media_url))[0], 'dn') + formats.append({ + 'url': media_url, + 'vcodec': 'none' if key == 'audio' else None, + }) + + self._sort_formats(formats) + + default_lang = 'en' subtitles = {} def add_subtitle_item(lang, info_dict): @@ -67,22 +83,13 @@ class DemocracynowIE(InfoExtractor): 'url': compat_urlparse.urljoin(url, subtitle_item['url']), }) - for key in ('file', 'audio', 'video'): - media_url = json_data.get(key, '') - if not media_url: - continue - media_url = re.sub(r'\?.*', '', compat_urlparse.urljoin(url, media_url)) - video_id = video_id or remove_start(os.path.splitext(url_basename(media_url))[0], 'dn') - formats.append({ - 'url': media_url, - }) - - self._sort_formats(formats) + description = self._og_search_description(webpage, default=None) return { 'id': video_id or display_id, - 'title': json_data['title'], + 'title': title, 'description': description, + 'thumbnail': json_data.get('image'), 'subtitles': subtitles, 'formats': formats, } diff --git a/youtube_dl/extractor/iqiyi.py b/youtube_dl/extractor/iqiyi.py index 9e8c9432a..88570f261 100644 --- a/youtube_dl/extractor/iqiyi.py +++ b/youtube_dl/extractor/iqiyi.py @@ -368,7 +368,10 @@ class IqiyiIE(InfoExtractor): auth_req, video_id, note='Downloading video authentication JSON', errnote='Unable to download video authentication JSON') - if auth_result['code'] == 'Q00506': # requires a VIP account + + if auth_result['code'] == 'Q00505': # No preview available (不允许试看鉴权失败) + raise ExtractorError('This video requires a VIP account', expected=True) + if auth_result['code'] == 'Q00506': # End of preview time (试看结束鉴权失败) if do_report_warning: self.report_warning('Needs a VIP account for full video') return False diff --git a/youtube_dl/extractor/novamov.py b/youtube_dl/extractor/novamov.py index a131f7dbd..3bbd47355 100644 --- a/youtube_dl/extractor/novamov.py +++ b/youtube_dl/extractor/novamov.py @@ -16,7 +16,14 @@ class NovaMovIE(InfoExtractor): IE_NAME = 'novamov' IE_DESC = 'NovaMov' - _VALID_URL_TEMPLATE = r'http://(?:(?:www\.)?%(host)s/(?:file|video|mobile/#/videos)/|(?:(?:embed|www)\.)%(host)s/embed\.php\?(?:.*?&)?v=)(?P[a-z\d]{13})' + _VALID_URL_TEMPLATE = r'''(?x) + http:// + (?: + (?:www\.)?%(host)s/(?:file|video|mobile/\#/videos)/| + (?:(?:embed|www)\.)%(host)s/embed(?:\.php|/)?\?(?:.*?&)?\bv= + ) + (?P[a-z\d]{13}) + ''' _VALID_URL = _VALID_URL_TEMPLATE % {'host': 'novamov\.com'} _HOST = 'www.novamov.com' @@ -27,17 +34,7 @@ class NovaMovIE(InfoExtractor): _DESCRIPTION_REGEX = r'(?s)
\s*

[^<]+

([^<]+)

' _URL_TEMPLATE = 'http://%s/video/%s' - _TEST = { - 'url': 'http://www.novamov.com/video/4rurhn9x446jj', - 'md5': '7205f346a52bbeba427603ba10d4b935', - 'info_dict': { - 'id': '4rurhn9x446jj', - 'ext': 'flv', - 'title': 'search engine optimization', - 'description': 'search engine optimization is used to rank the web page in the google search engine' - }, - 'skip': '"Invalid token" errors abound (in web interface as well as youtube-dl, there is nothing we can do about it.)' - } + _TEST = None def _check_existence(self, webpage, video_id): if re.search(self._FILE_DELETED_REGEX, webpage) is not None: @@ -81,7 +78,7 @@ class NovaMovIE(InfoExtractor): filekey = extract_filekey() - title = self._html_search_regex(self._TITLE_REGEX, webpage, 'title', fatal=False) + title = self._html_search_regex(self._TITLE_REGEX, webpage, 'title') description = self._html_search_regex(self._DESCRIPTION_REGEX, webpage, 'description', default='', fatal=False) api_response = self._download_webpage( @@ -187,3 +184,29 @@ class CloudTimeIE(NovaMovIE): _TITLE_REGEX = r']+class=["\']video_det["\'][^>]*>\s*([^<]+)' _TEST = None + + +class AuroraVidIE(NovaMovIE): + IE_NAME = 'auroravid' + IE_DESC = 'AuroraVid' + + _VALID_URL = NovaMovIE._VALID_URL_TEMPLATE % {'host': 'auroravid\.to'} + + _HOST = 'www.auroravid.to' + + _FILE_DELETED_REGEX = r'This file no longer exists on our servers!<' + + _TESTS = [{ + 'url': 'http://www.auroravid.to/video/4rurhn9x446jj', + 'md5': '7205f346a52bbeba427603ba10d4b935', + 'info_dict': { + 'id': '4rurhn9x446jj', + 'ext': 'flv', + 'title': 'search engine optimization', + 'description': 'search engine optimization is used to rank the web page in the google search engine' + }, + 'skip': '"Invalid token" errors abound (in web interface as well as youtube-dl, there is nothing we can do about it.)' + }, { + 'url': 'http://www.auroravid.to/embed/?v=4rurhn9x446jj', + 'only_matching': True, + }] diff --git a/youtube_dl/extractor/rte.py b/youtube_dl/extractor/rte.py index 9c89974e7..ebe563ebb 100644 --- a/youtube_dl/extractor/rte.py +++ b/youtube_dl/extractor/rte.py @@ -39,9 +39,14 @@ class RteIE(InfoExtractor): duration = float_or_none(self._html_search_meta( 'duration', webpage, 'duration', fatal=False), 1000) - thumbnail_id = self._search_regex( - r'', webpage, 'thumbnail') - thumbnail = 'http://img.rasset.ie/' + thumbnail_id + '.jpg' + thumbnail = None + thumbnail_meta = self._html_search_meta('thumbnail', webpage) + if thumbnail_meta: + thumbnail_id = self._search_regex( + r'uri:irus:(.+)', thumbnail_meta, + 'thumbnail id', fatal=False) + if thumbnail_id: + thumbnail = 'http://img.rasset.ie/%s.jpg' % thumbnail_id feeds_url = self._html_search_meta('feeds-prefix', webpage, 'feeds url') + video_id json_string = self._download_json(feeds_url, video_id) diff --git a/youtube_dl/extractor/vrt.py b/youtube_dl/extractor/vrt.py index 2b6bae89b..8e35f24e8 100644 --- a/youtube_dl/extractor/vrt.py +++ b/youtube_dl/extractor/vrt.py @@ -4,7 +4,10 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..utils import float_or_none +from ..utils import ( + determine_ext, + float_or_none, +) class VRTIE(InfoExtractor): @@ -52,6 +55,11 @@ class VRTIE(InfoExtractor): 'duration': 661, } }, + { + # YouTube video + 'url': 'http://deredactie.be/cm/vrtnieuws/videozone/nieuws/cultuurenmedia/1.2622957', + 'only_matching': True, + }, { 'url': 'http://cobra.canvas.be/cm/cobra/videozone/rubriek/film-videozone/1.2377055', 'only_matching': True, @@ -66,7 +74,17 @@ class VRTIE(InfoExtractor): video_id = self._search_regex( r'data-video-id="([^"]+)_[^"]+"', webpage, 'video id', fatal=False) + src = self._search_regex( + r'data-video-src="([^"]+)"', webpage, 'video src', default=None) + + video_type = self._search_regex( + r'data-video-type="([^"]+)"', webpage, 'video type', default=None) + + if video_type == 'YouTubeVideo': + return self.url_result(src, 'Youtube') + formats = [] + mobj = re.search( r'data-video-iphone-server="(?P[^"]+)"\s+data-video-iphone-path="(?P[^"]+)"', webpage) @@ -74,11 +92,15 @@ class VRTIE(InfoExtractor): formats.extend(self._extract_m3u8_formats( '%s/%s' % (mobj.group('server'), mobj.group('path')), video_id, 'mp4', m3u8_id='hls', fatal=False)) - mobj = re.search(r'data-video-src="(?P[^"]+)"', webpage) - if mobj: - formats.extend(self._extract_f4m_formats( - '%s/manifest.f4m' % mobj.group('src'), - video_id, f4m_id='hds', fatal=False)) + + if src: + if determine_ext(src) == 'm3u8': + formats.extend(self._extract_m3u8_formats( + src, video_id, 'mp4', entry_protocol='m3u8_native', + m3u8_id='hls', fatal=False)) + else: + formats.extend(self._extract_f4m_formats( + '%s/manifest.f4m' % src, video_id, f4m_id='hds', fatal=False)) if not formats and 'data-video-geoblocking="true"' in webpage: self.raise_geo_restricted('This video is only available in Belgium') diff --git a/youtube_dl/extractor/yandexmusic.py b/youtube_dl/extractor/yandexmusic.py index 025716958..7a90cc60c 100644 --- a/youtube_dl/extractor/yandexmusic.py +++ b/youtube_dl/extractor/yandexmusic.py @@ -39,9 +39,14 @@ class YandexMusicTrackIE(YandexMusicBaseIE): 'info_dict': { 'id': '4878838', 'ext': 'mp3', - 'title': 'Carlo Ambrosio - Gypsy Eyes 1', + 'title': 'Carlo Ambrosio & Fabio Di Bari, Carlo Ambrosio - Gypsy Eyes 1', 'filesize': 4628061, 'duration': 193.04, + 'track': 'Gypsy Eyes 1', + 'album': 'Gypsy Soul', + 'album_artist': 'Carlo Ambrosio', + 'artist': 'Carlo Ambrosio & Fabio Di Bari, Carlo Ambrosio', + 'release_year': '2009', } } @@ -64,16 +69,45 @@ class YandexMusicTrackIE(YandexMusicBaseIE): thumbnail = cover_uri.replace('%%', 'orig') if not thumbnail.startswith('http'): thumbnail = 'http://' + thumbnail - return { + + track_title = track['title'] + track_info = { 'id': track['id'], 'ext': 'mp3', 'url': self._get_track_url(track['storageDir'], track['id']), - 'title': '%s - %s' % (track['artists'][0]['name'], track['title']), 'filesize': int_or_none(track.get('fileSize')), 'duration': float_or_none(track.get('durationMs'), 1000), 'thumbnail': thumbnail, + 'track': track_title, } + def extract_artist(artist_list): + if artist_list and isinstance(artist_list, list): + artists_names = [a['name'] for a in artist_list if a.get('name')] + if artists_names: + return ', '.join(artists_names) + + albums = track.get('albums') + if albums and isinstance(albums, list): + album = albums[0] + if isinstance(album, dict): + year = album.get('year') + track_info.update({ + 'album': album.get('title'), + 'album_artist': extract_artist(album.get('artists')), + 'release_year': compat_str(year) if year else None, + }) + + track_artist = extract_artist(track.get('artists')) + if track_artist: + track_info.update({ + 'artist': track_artist, + 'title': '%s - %s' % (track_artist, track_title), + }) + else: + track_info['title'] = track_title + return track_info + def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) album_id, track_id = mobj.group('album_id'), mobj.group('id') diff --git a/youtube_dl/version.py b/youtube_dl/version.py index d9e1cb2a8..167b16e24 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2016.04.01' +__version__ = '2016.04.06'