From 9435401f6e77dbb0ce2138aee784518994f789d3 Mon Sep 17 00:00:00 2001 From: Jared Schwartz Date: Thu, 22 Sep 2016 16:49:13 -0700 Subject: [PATCH 1/4] Add music, movie, other naver sites --- youtube_dl/extractor/naver.py | 236 ++++++++++++++++++++++------------ 1 file changed, 151 insertions(+), 85 deletions(-) diff --git a/youtube_dl/extractor/naver.py b/youtube_dl/extractor/naver.py index 0891d2772..25d3ed06d 100644 --- a/youtube_dl/extractor/naver.py +++ b/youtube_dl/extractor/naver.py @@ -4,24 +4,32 @@ from __future__ import unicode_literals import re from .common import InfoExtractor +from ..compat import ( + compat_urllib_parse, + compat_urlparse, + compat_urllib_request, +) from ..utils import ( ExtractorError, int_or_none, - update_url_query, + float_or_none, + determine_ext, + unescapeHTML, ) class NaverIE(InfoExtractor): - _VALID_URL = r'https?://(?:m\.)?tvcast\.naver\.com/v/(?P\d+)' + _VALID_URL = r'https?://(?:m\.)?(?:tvcast\.naver\.com/v/|news\.naver\.com/main/read\.nhn?.*aid=|sports\.news\.naver.com/(?:videoCenter|sports)/(?:index|video)\.nhn?.*id=|movie\.naver.com/movie/bi/mi/mediaView\.nhn?.*mid=|music\.naver\.com/artist/videoPlayer\.nhn?.*videoId=)(?P\d+)' _TESTS = [{ 'url': 'http://tvcast.naver.com/v/81652', + 'md5': '0fe25e226a0ec388cd75679981bd2a1a', 'info_dict': { - 'id': '81652', + 'id': '4B40C2B7F4BC7C7BBA5237C5E3CED1ADEAF5', 'ext': 'mp4', 'title': '[9월 모의고사 해설강의][수학_김상희] 수학 A형 16~20번', - 'description': '합격불변의 법칙 메가스터디 | 메가스터디 수학 김상희 선생님이 9월 모의고사 수학A형 16번에서 20번까지 해설강의를 공개합니다.', - 'upload_date': '20130903', + 'uploader_id': 'megastudy', + 'uploader': '합격불변의 법칙 메가스터디', }, }, { 'url': 'http://tvcast.naver.com/v/395837', @@ -34,89 +42,147 @@ class NaverIE(InfoExtractor): 'upload_date': '20150519', }, 'skip': 'Georestricted', + }, { + 'url': 'http://news.naver.com/main/read.nhn?mode=LSD&mid=tvh&oid=056&aid=0010235712&sid1=293', + 'md5': 'ea02b7943173553618f49de08d6bd36e', + 'info_dict': { + 'id': '3C026B64F022C136ED9057AA820458275CD7', + 'ext': 'mp4', + 'title': '한미 정상 “북핵 문제 최우선”…공동성명 첫 채택', + 'uploader_id': 'muploader_o', + 'uploader': '', + }, + }, { + 'url': 'http://sports.news.naver.com/videoCenter/index.nhn?uCategory=esports&category=lol&id=158508', + 'md5': '436254dbbb7dde42053c731039f6f14d', + 'info_dict': { + 'id': '3CD20A3B7B15044056189B09557FB349DE0B', + 'ext': 'mp4', + 'title': '\'전승 가도\' 절대강자 Faker의 인터뷰', + 'uploader_id': 'muploader_n', + 'uploader': '', + }, + }, { + 'url': 'http://movie.naver.com/movie/bi/mi/mediaView.nhn?code=115622&mid=27362', + 'md5': 'ad9623ff5a23d6e9c0c7f451b063912f', + 'info_dict': { + 'id': '1132280440B9037B8E48DB0F569208440008', + 'ext': 'mp4', + 'title': '<인사이드 아웃> 메인 예고편', + 'uploader_id': 'navermovie', + 'uploader': '네이버 영화', + }, + }, { + 'url': 'http://music.naver.com/artist/videoPlayer.nhn?videoId=99476', + 'md5': '4378409358f457bdce12e90f40ba33e2', + 'info_dict': { + 'id': 'E2651FBE1723D209C17AB611C296C57EA0A1', + 'ext': 'mp4', + 'title': '디아크 인사말', + 'uploader_id': 'muploader_c', + 'uploader': '', + }, }] + def _extract_video_formats(self, formats_list, vid): + formats = [] + for format_el in formats_list: + url = format_el.get('source') + if url: + if format_el.get('type') == 'HLS': + key = format_el.get('key') + if key: + url += '?%s=%s' % (key['name'], key['value']) + formats.extend(self._extract_m3u8_formats(url, vid, 'mp4', m3u8_id='hls')) + else: + encoding_option = format_el.get('encodingOption') + bitrate = format_el.get('bitrate') + formats.append({ + 'format_id': encoding_option.get('id') or encoding_option.get('name'), + 'url': format_el['source'], + 'width': int_or_none(encoding_option.get('width')), + 'height': int_or_none(encoding_option.get('height')), + 'vbr': float_or_none(bitrate.get('video')), + 'abr': float_or_none(bitrate.get('audio')), + 'filesize': int_or_none(format_el.get('size')), + 'vcodec': format_el.get('type'), + 'ext': determine_ext(url, 'mp4'), + }) + if formats: + self._sort_formats(formats) + return formats + + def _parse_video_info(self, play_data, vid): + meta = play_data.get('meta') + user = meta.get('user', {}) + + thumbnails = [] + for thumbnail in play_data.get('thumbnails', {}).get('list', []): + thumbnails.append({'url': thumbnail['source']}) + + subtitles = {} + for caption in play_data.get('captions', {}).get('list', []): + subtitles[caption['language']] = [ + {'ext': determine_ext(caption['source'], default_ext='vtt'), + 'url': caption['source']}] + + formats = self._extract_video_formats(play_data['videos']['list'] + play_data.get('streams', []), vid) + + return { + 'id': vid, + 'title': meta.get('subject'), + 'formats': formats, + 'thumbnail': meta.get('cover', {}).get('source'), + 'thumbnails': thumbnails, + 'view_count': int_or_none(meta.get('count')), + 'uploader_id': user.get('id'), + 'uploader': user.get('name'), + } + + def _extract_video_info(self, vid, key): + play_data = self._download_json( + 'http://global.apis.naver.com/rmcnmv/rmcnmv/vod_play_videoInfo.json?' + compat_urllib_parse.urlencode({'videoId': vid, 'key': key}), + vid, 'Downloading video info') + info = self._parse_video_info(play_data, vid) + if not info['formats']: + play_data = self._download_json( + 'http://serviceapi.rmcnmv.naver.com/mobile/getVideoInfo.nhn?' + compat_urllib_parse.urlencode({'videoId': vid, 'inKey': key, 'protocol': 'http'}), + vid, 'Downloading video info') + info['formats'] = self._extract_video_formats(play_data['videos']['list'] + play_data.get('streams', []), vid) + return info + + def _extract_id_and_key(self, webpage): + m_id = re.search(r'(?s)new\s+nhn.rmcnmv.RMCVideoPlayer\(\s*["\']([^"\']+)["\']\s*,\s*(?:{[^}]*?value[^:]*?:\s*?)?["\']([^"\']+)["\']', webpage) + if not m_id: + m_id = re.search(r'(?s)_sVid\s*=\s*["\']([^"\']+)["\'];\s*var\s+_sInkey\s*=\s*["\']([^"\']+)["\'];', webpage) + return m_id + def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - m_id = re.search(r'var rmcPlayer = new nhn.rmcnmv.RMCVideoPlayer\("(.+?)", "(.+?)"', - webpage) - if m_id is None: - error = self._html_search_regex( - r'(?s)
\s*(?:)?\s*

(?P.+?)

\s*
', - webpage, 'error', default=None) - if error: - raise ExtractorError(error, expected=True) - raise ExtractorError('couldn\'t extract vid and key') - video_data = self._download_json( - 'http://play.rmcnmv.naver.com/vod/play/v2.0/' + m_id.group(1), - video_id, query={ - 'key': m_id.group(2), - }) - meta = video_data['meta'] - title = meta['subject'] - formats = [] - - def extract_formats(streams, stream_type, query={}): - for stream in streams: - stream_url = stream.get('source') - if not stream_url: - continue - stream_url = update_url_query(stream_url, query) - encoding_option = stream.get('encodingOption', {}) - bitrate = stream.get('bitrate', {}) - formats.append({ - 'format_id': '%s_%s' % (stream.get('type') or stream_type, encoding_option.get('id') or encoding_option.get('name')), - 'url': stream_url, - 'width': int_or_none(encoding_option.get('width')), - 'height': int_or_none(encoding_option.get('height')), - 'vbr': int_or_none(bitrate.get('video')), - 'abr': int_or_none(bitrate.get('audio')), - 'filesize': int_or_none(stream.get('size')), - 'protocol': 'm3u8_native' if stream_type == 'HLS' else None, - }) - - extract_formats(video_data.get('videos', {}).get('list', []), 'H264') - for stream_set in video_data.get('streams', []): - query = {} - for param in stream_set.get('keys', []): - query[param['name']] = param['value'] - stream_type = stream_set.get('type') - videos = stream_set.get('videos') - if videos: - extract_formats(videos, stream_type, query) - elif stream_type == 'HLS': - stream_url = stream_set.get('source') - if not stream_url: - continue - formats.extend(self._extract_m3u8_formats( - update_url_query(stream_url, query), video_id, - 'mp4', 'm3u8_native', m3u8_id=stream_type, fatal=False)) - self._sort_formats(formats) - - subtitles = {} - for caption in video_data.get('captions', {}).get('list', []): - caption_url = caption.get('source') - if not caption_url: - continue - subtitles.setdefault(caption.get('language') or caption.get('locale'), []).append({ - 'url': caption_url, - }) - - upload_date = self._search_regex( - r']+class="date".*?(\d{4}\.\d{2}\.\d{2})', - webpage, 'upload date', fatal=False) - if upload_date: - upload_date = upload_date.replace('.', '') - - return { - 'id': video_id, - 'title': title, - 'formats': formats, - 'subtitles': subtitles, - 'description': self._og_search_description(webpage), - 'thumbnail': meta.get('cover', {}).get('source') or self._og_search_thumbnail(webpage), - 'view_count': int_or_none(meta.get('count')), - 'upload_date': upload_date, - } + m_id = self._extract_id_and_key(webpage) + if not m_id: + iframe_urls = re.findall(r'<(?:iframe|IFRAME)[^>]+src="((?:/main/readVod|/movie/bi/mi/videoPlayer|http://serviceapi\.rmcnmv\.naver\.com/flash/outKeyPlayer)\.nhn[^"]+)"', webpage) + if iframe_urls: + entries = [] + for iframe_url in iframe_urls: + iframe_url = unescapeHTML(iframe_url) + if iframe_url.startswith('/'): + iframe_url = compat_urlparse.urljoin(url, iframe_url) + request = compat_urllib_request.Request(iframe_url, headers={'Referer': url}) + iframe_webpage = self._download_webpage(request, video_id, 'Downloading iframe webpage') + m_id = self._extract_id_and_key(iframe_webpage) + if m_id: + vid, key = m_id.groups() + entries.append(self._extract_video_info(vid, key)) + return entries[0] if len(entries) == 1 else self.playlist_result(entries) + else: + error = self._html_search_regex( + r'(?s)
\s*(?:)?\s*

(?P.+?)

\s*
', + webpage, 'error', default=None) + if error: + raise ExtractorError(error, expected=True) + raise ExtractorError('couldn\'t extract vid and key') + vid, key = m_id.groups() + return self._extract_video_info(vid, key) From cc4b0c2dd08d1ec5967287cb585d570bc2f31d17 Mon Sep 17 00:00:00 2001 From: Jared Schwartz Date: Thu, 22 Sep 2016 17:09:00 -0700 Subject: [PATCH 2/4] Update .travis.yml --- .travis.yml | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/.travis.yml b/.travis.yml index c74c9cc12..9b66c015c 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,18 +1,8 @@ language: python python: - - "2.6" - "2.7" - - "3.2" - - "3.3" - "3.4" - "3.5" sudo: false script: nosetests test --verbose -notifications: - email: - - filippo.valsorda@gmail.com - - yasoob.khld@gmail.com -# irc: -# channels: -# - "irc.freenode.org#youtube-dl" -# skip_join: true +os: linux From 1d5820056be221db63badea55e621b1d92d027d1 Mon Sep 17 00:00:00 2001 From: Jared Schwartz Date: Fri, 23 Sep 2016 12:05:31 -0700 Subject: [PATCH 3/4] Support for Naver music and sports --- youtube_dl/extractor/naver.py | 22 +--------------------- 1 file changed, 1 insertion(+), 21 deletions(-) diff --git a/youtube_dl/extractor/naver.py b/youtube_dl/extractor/naver.py index 25d3ed06d..cd74d88fd 100644 --- a/youtube_dl/extractor/naver.py +++ b/youtube_dl/extractor/naver.py @@ -19,7 +19,7 @@ from ..utils import ( class NaverIE(InfoExtractor): - _VALID_URL = r'https?://(?:m\.)?(?:tvcast\.naver\.com/v/|news\.naver\.com/main/read\.nhn?.*aid=|sports\.news\.naver.com/(?:videoCenter|sports)/(?:index|video)\.nhn?.*id=|movie\.naver.com/movie/bi/mi/mediaView\.nhn?.*mid=|music\.naver\.com/artist/videoPlayer\.nhn?.*videoId=)(?P\d+)' + _VALID_URL = r'https?://(?:m\.)?(?:tvcast\.naver\.com/v/|sports\.news\.naver.com/(?:videoCenter|sports)/(?:index|video)\.nhn?.*id=|music\.naver\.com/artist/videoPlayer\.nhn?.*videoId=)(?P\d+)' _TESTS = [{ 'url': 'http://tvcast.naver.com/v/81652', @@ -42,16 +42,6 @@ class NaverIE(InfoExtractor): 'upload_date': '20150519', }, 'skip': 'Georestricted', - }, { - 'url': 'http://news.naver.com/main/read.nhn?mode=LSD&mid=tvh&oid=056&aid=0010235712&sid1=293', - 'md5': 'ea02b7943173553618f49de08d6bd36e', - 'info_dict': { - 'id': '3C026B64F022C136ED9057AA820458275CD7', - 'ext': 'mp4', - 'title': '한미 정상 “북핵 문제 최우선”…공동성명 첫 채택', - 'uploader_id': 'muploader_o', - 'uploader': '', - }, }, { 'url': 'http://sports.news.naver.com/videoCenter/index.nhn?uCategory=esports&category=lol&id=158508', 'md5': '436254dbbb7dde42053c731039f6f14d', @@ -62,16 +52,6 @@ class NaverIE(InfoExtractor): 'uploader_id': 'muploader_n', 'uploader': '', }, - }, { - 'url': 'http://movie.naver.com/movie/bi/mi/mediaView.nhn?code=115622&mid=27362', - 'md5': 'ad9623ff5a23d6e9c0c7f451b063912f', - 'info_dict': { - 'id': '1132280440B9037B8E48DB0F569208440008', - 'ext': 'mp4', - 'title': '<인사이드 아웃> 메인 예고편', - 'uploader_id': 'navermovie', - 'uploader': '네이버 영화', - }, }, { 'url': 'http://music.naver.com/artist/videoPlayer.nhn?videoId=99476', 'md5': '4378409358f457bdce12e90f40ba33e2', From 94f42e56f3b7a663f9c87cf77d8943de250abd0c Mon Sep 17 00:00:00 2001 From: Jared Schwartz Date: Fri, 23 Sep 2016 12:13:24 -0700 Subject: [PATCH 4/4] Update .travis.yml --- .travis.yml | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 9b66c015c..c74c9cc12 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,8 +1,18 @@ language: python python: + - "2.6" - "2.7" + - "3.2" + - "3.3" - "3.4" - "3.5" sudo: false script: nosetests test --verbose -os: linux +notifications: + email: + - filippo.valsorda@gmail.com + - yasoob.khld@gmail.com +# irc: +# channels: +# - "irc.freenode.org#youtube-dl" +# skip_join: true