From ccff2c404d7ea9f5b21ede8ae57bb79feec7eb94 Mon Sep 17 00:00:00 2001 From: Wang Jun Tham Date: Sun, 24 Apr 2016 00:08:02 +0800 Subject: [PATCH 01/42] [ffmpeg] Fix embedding subtitles (#9063) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Changed command line parameters for ffmpeg when embedding subtitles. Changed to ‘-map 0:v -c:v copy -map 0:a -c:a copy’ --- youtube_dl/postprocessor/ffmpeg.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/youtube_dl/postprocessor/ffmpeg.py b/youtube_dl/postprocessor/ffmpeg.py index 1793a878c..ca2d401f8 100644 --- a/youtube_dl/postprocessor/ffmpeg.py +++ b/youtube_dl/postprocessor/ffmpeg.py @@ -363,8 +363,10 @@ class FFmpegEmbedSubtitlePP(FFmpegPostProcessor): input_files = [filename] + sub_filenames opts = [ - '-map', '0', - '-c', 'copy', + '-map', '0:v', + '-c:v', 'copy', + '-map', '0:a', + '-c:a', 'copy', # Don't copy the existing subtitles, we may be running the # postprocessor a second time '-map', '-0:s', From 4f3c5e062715bb8c2084bda139ddcd9a2036f267 Mon Sep 17 00:00:00 2001 From: remitamine Date: Wed, 16 Mar 2016 18:48:06 +0100 Subject: [PATCH 02/42] [utils] add helper function for parsing codecs --- test/test_utils.py | 24 ++++++++++++++++++++++++ youtube_dl/utils.py | 36 ++++++++++++++++++++++++++++++++++++ 2 files changed, 60 insertions(+) diff --git a/test/test_utils.py b/test/test_utils.py index b7ef51f8d..d84eb438f 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -78,6 +78,7 @@ from youtube_dl.utils import ( cli_option, cli_valueless_option, cli_bool_option, + parse_codecs, ) from youtube_dl.compat import ( compat_chr, @@ -579,6 +580,29 @@ class TestUtil(unittest.TestCase): limit_length('foo bar baz asd', 12).startswith('foo bar')) self.assertTrue('...' in limit_length('foo bar baz asd', 12)) + def test_parse_codecs(self): + self.assertEqual(parse_codecs(''), {}) + self.assertEqual(parse_codecs('avc1.77.30, mp4a.40.2'), { + 'vcodec': 'avc1.77.30', + 'acodec': 'mp4a.40.2', + }) + self.assertEqual(parse_codecs('mp4a.40.2'), { + 'vcodec': 'none', + 'acodec': 'mp4a.40.2', + }) + self.assertEqual(parse_codecs('mp4a.40.5,avc1.42001e'), { + 'vcodec': 'avc1.42001e', + 'acodec': 'mp4a.40.5', + }) + self.assertEqual(parse_codecs('avc3.640028'), { + 'vcodec': 'avc3.640028', + 'acodec': 'none', + }) + self.assertEqual(parse_codecs(', h264,,newcodec,aac'), { + 'vcodec': 'h264', + 'acodec': 'aac', + }) + def test_escape_rfc3986(self): reserved = "!*'();:@&=+$,/?#[]" unreserved = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_.~' diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 562031fe1..fe175e82c 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -2060,6 +2060,42 @@ def mimetype2ext(mt): }.get(res, res) +def parse_codecs(codecs_str): + # http://tools.ietf.org/html/rfc6381 + if not codecs_str: + return {} + splited_codecs = list(filter(None, map( + lambda str: str.strip(), codecs_str.strip().strip(',').split(',')))) + vcodec, acodec = None, None + for full_codec in splited_codecs: + codec = full_codec.split('.')[0] + if codec in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2', 'h263', 'h264', 'mp4v'): + if not vcodec: + vcodec = full_codec + elif codec in ('mp4a', 'opus', 'vorbis', 'mp3', 'aac'): + if not acodec: + acodec = full_codec + else: + write_string('WARNING: Unknown codec %s' % full_codec, sys.stderr) + if not vcodec and not acodec: + if len(splited_codecs) == 2: + return { + 'vcodec': vcodec, + 'acodec': acodec, + } + elif len(splited_codecs) == 1: + return { + 'vcodec': 'none', + 'acodec': vcodec, + } + else: + return { + 'vcodec': vcodec or 'none', + 'acodec': acodec or 'none', + } + return {} + + def urlhandle_detect_ext(url_handle): getheader = url_handle.headers.get From 59bbe4911acd4493bf407925bfdeb1ad03db6ef3 Mon Sep 17 00:00:00 2001 From: remitamine Date: Wed, 16 Mar 2016 18:50:45 +0100 Subject: [PATCH 03/42] [extractor/common] add helper method to extract html5 media entries --- youtube_dl/extractor/common.py | 58 ++++++++++++++++++++++++++++++++++ 1 file changed, 58 insertions(+) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 5a2603b50..661889593 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -54,6 +54,8 @@ from ..utils import ( update_Request, update_url_query, parse_m3u8_attributes, + extract_attributes, + parse_codecs, ) @@ -1610,6 +1612,62 @@ class InfoExtractor(object): self.report_warning('Unknown MIME type %s in DASH manifest' % mime_type) return formats + def _parse_html5_media_entries(self, base_url, webpage): + def absolute_url(video_url): + return compat_urlparse.urljoin(base_url, video_url) + + def parse_content_type(content_type): + if not content_type: + return {} + ctr = re.search(r'(?P[^/]+/[^;]+)(?:;\s*codecs="?(?P[^"]+))?', content_type) + if ctr: + mimetype, codecs = ctr.groups() + f = parse_codecs(codecs) + f['ext'] = mimetype2ext(mimetype) + return f + return {} + + entries = [] + for media_tag, media_type, media_content in re.findall(r'(?s)(<(?Pvideo|audio)[^>]*>)(.*?)', webpage): + media_info = { + 'formats': [], + 'subtitles': {}, + } + media_attributes = extract_attributes(media_tag) + src = media_attributes.get('src') + if src: + media_info['formats'].append({ + 'url': absolute_url(src), + 'vcodec': 'none' if media_type == 'audio' else None, + }) + media_info['thumbnail'] = media_attributes.get('poster') + if media_content: + for source_tag in re.findall(r']+>', media_content): + source_attributes = extract_attributes(source_tag) + src = source_attributes.get('src') + if not src: + continue + f = parse_content_type(source_attributes.get('type')) + f.update({ + 'url': absolute_url(src), + 'vcodec': 'none' if media_type == 'audio' else None, + }) + media_info['formats'].append(f) + for track_tag in re.findall(r']+>', media_content): + track_attributes = extract_attributes(track_tag) + kind = track_attributes.get('kind') + if not kind or kind == 'subtitles': + src = track_attributes.get('src') + if not src: + continue + lang = track_attributes.get('srclang') or track_attributes.get('lang') or track_attributes.get('label') + media_info['subtitles'].setdefault(lang, []).append({ + 'url': absolute_url(src), + }) + if media_info['formats']: + entries.append(media_info) + return entries + def _live_title(self, name): """ Generate the title for a live video """ now = datetime.datetime.now() From 0ab7f4fe2b0006b6eaf81fcbf31c0cde7a1a14d5 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Fri, 8 Jul 2016 15:11:28 +0800 Subject: [PATCH 04/42] [nick] support nickjr.com (closes #7542) --- youtube_dl/extractor/nick.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/nick.py b/youtube_dl/extractor/nick.py index e96013791..4935002d0 100644 --- a/youtube_dl/extractor/nick.py +++ b/youtube_dl/extractor/nick.py @@ -8,7 +8,7 @@ from ..utils import update_url_query class NickIE(MTVServicesInfoExtractor): IE_NAME = 'nick.com' - _VALID_URL = r'https?://(?:www\.)?nick\.com/videos/clip/(?P[^/?#.]+)' + _VALID_URL = r'https?://(?:www\.)?nick(?:jr)?\.com/(?:videos/clip|[^/]+/videos)/(?P[^/?#.]+)' _FEED_URL = 'http://udat.mtvnservices.com/service1/dispatch.htm' _TESTS = [{ 'url': 'http://www.nick.com/videos/clip/alvinnn-and-the-chipmunks-112-full-episode.html', @@ -52,6 +52,9 @@ class NickIE(MTVServicesInfoExtractor): } }, ], + }, { + 'url': 'http://www.nickjr.com/paw-patrol/videos/pups-save-a-goldrush-s3-ep302-full-episode/', + 'only_matching': True, }] def _get_feed_query(self, uri): From e2d616dd30b8f4b4b159bc7ee80180861f3bb908 Mon Sep 17 00:00:00 2001 From: Jakub Adam Wieczorek Date: Thu, 7 Jul 2016 11:27:31 +0200 Subject: [PATCH 05/42] [polskieradio] Add thumbnails. --- youtube_dl/extractor/polskieradio.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/youtube_dl/extractor/polskieradio.py b/youtube_dl/extractor/polskieradio.py index d3bebaea3..9e7eab12e 100644 --- a/youtube_dl/extractor/polskieradio.py +++ b/youtube_dl/extractor/polskieradio.py @@ -33,6 +33,7 @@ class PolskieRadioIE(InfoExtractor): 'timestamp': 1456594200, 'upload_date': '20160227', 'duration': 2364, + 'thumbnail': 're:^https?://static.prsa.pl/images/.*\.jpg$' }, }], }, { @@ -68,6 +69,8 @@ class PolskieRadioIE(InfoExtractor): r'(?s)]+id="datetime2"[^>]*>(.+?)', webpage, 'timestamp', fatal=False)) + thumbnail_url = self._og_search_thumbnail(webpage) + entries = [] media_urls = set() @@ -87,6 +90,7 @@ class PolskieRadioIE(InfoExtractor): 'duration': int_or_none(media.get('length')), 'vcodec': 'none' if media.get('provider') == 'audio' else None, 'timestamp': timestamp, + 'thumbnail': thumbnail_url }) title = self._og_search_title(webpage).strip() From 2e32ac0b9a64f500fded1bd96b4d16965d4c9092 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Fri, 8 Jul 2016 19:34:53 +0800 Subject: [PATCH 06/42] [polskieradio] Fix regex in _TESTS --- youtube_dl/extractor/polskieradio.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/polskieradio.py b/youtube_dl/extractor/polskieradio.py index 9e7eab12e..f559b899f 100644 --- a/youtube_dl/extractor/polskieradio.py +++ b/youtube_dl/extractor/polskieradio.py @@ -33,7 +33,7 @@ class PolskieRadioIE(InfoExtractor): 'timestamp': 1456594200, 'upload_date': '20160227', 'duration': 2364, - 'thumbnail': 're:^https?://static.prsa.pl/images/.*\.jpg$' + 'thumbnail': 're:^https?://static\.prsa\.pl/images/.*\.jpg$' }, }], }, { From 07d7689f2eb4698cf98ee837b56489b7ff1924df Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Fri, 8 Jul 2016 15:35:20 +0100 Subject: [PATCH 07/42] [le] extract http formats --- youtube_dl/extractor/leeco.py | 135 ++++++++++++++++++++++------------ 1 file changed, 90 insertions(+), 45 deletions(-) diff --git a/youtube_dl/extractor/leeco.py b/youtube_dl/extractor/leeco.py index 171b705c7..e9cc9aa59 100644 --- a/youtube_dl/extractor/leeco.py +++ b/youtube_dl/extractor/leeco.py @@ -23,6 +23,7 @@ from ..utils import ( str_or_none, url_basename, urshift, + update_url_query, ) @@ -89,6 +90,10 @@ class LeIE(InfoExtractor): _loc3_ = self.ror(_loc3_, _loc2_ % 17) return _loc3_ + # reversed from http://jstatic.letvcdn.com/sdk/player.js + def get_mms_key(self, time): + return self.ror(time, 8) ^ 185025305 + # see M3U8Encryption class in KLetvPlayer.swf @staticmethod def decrypt_m3u8(encrypted_data): @@ -109,23 +114,7 @@ class LeIE(InfoExtractor): return bytes(_loc7_) - def _real_extract(self, url): - media_id = self._match_id(url) - page = self._download_webpage(url, media_id) - params = { - 'id': media_id, - 'platid': 1, - 'splatid': 101, - 'format': 1, - 'tkey': self.calc_time_key(int(time.time())), - 'domain': 'www.le.com' - } - - play_json = self._download_json( - 'http://api.le.com/mms/out/video/playJson', - media_id, 'Downloading playJson data', query=params, - headers=self.geo_verification_headers()) - + def _check_errors(self, play_json): # Check for errors playstatus = play_json['playstatus'] if playstatus['status'] == 0: @@ -136,43 +125,99 @@ class LeIE(InfoExtractor): msg = 'Generic error. flag = %d' % flag raise ExtractorError(msg, expected=True) - playurl = play_json['playurl'] + def _real_extract(self, url): + media_id = self._match_id(url) + page = self._download_webpage(url, media_id) - formats = ['350', '1000', '1300', '720p', '1080p'] - dispatch = playurl['dispatch'] + play_json_h5 = self._download_json( + 'http://api.le.com/mms/out/video/playJsonH5', + media_id, 'Downloading html5 playJson data', query={ + 'id': media_id, + 'platid': 3, + 'splatid': 304, + 'format': 1, + 'tkey': self.get_mms_key(int(time.time())), + 'domain': 'www.le.com', + 'tss': 'no', + }, + headers=self.geo_verification_headers()) + self._check_errors(play_json_h5) - urls = [] - for format_id in formats: - if format_id in dispatch: - media_url = playurl['domain'][0] + dispatch[format_id][0] - media_url += '&' + compat_urllib_parse_urlencode({ - 'm3v': 1, + play_json_flash = self._download_json( + 'http://api.le.com/mms/out/video/playJson', + media_id, 'Downloading flash playJson data', query={ + 'id': media_id, + 'platid': 1, + 'splatid': 101, + 'format': 1, + 'tkey': self.calc_time_key(int(time.time())), + 'domain': 'www.le.com', + }, + headers=self.geo_verification_headers()) + self._check_errors(play_json_flash) + + def get_h5_urls(media_url, format_id): + location = self._download_json( + media_url, media_id, + 'Download JSON metadata for format %s' % format_id, query={ 'format': 1, 'expect': 3, - 'rateid': format_id, - }) + 'tss': 'no', + })['location'] - nodes_data = self._download_json( - media_url, media_id, - 'Download JSON metadata for format %s' % format_id) + return { + 'http': update_url_query(location, {'tss': 'no'}), + 'hls': update_url_query(location, {'tss': 'ios'}), + } - req = self._request_webpage( - nodes_data['nodelist'][0]['location'], media_id, - note='Downloading m3u8 information for format %s' % format_id) + def get_flash_urls(media_url, format_id): + media_url += '&' + compat_urllib_parse_urlencode({ + 'm3v': 1, + 'format': 1, + 'expect': 3, + 'rateid': format_id, + }) - m3u8_data = self.decrypt_m3u8(req.read()) + nodes_data = self._download_json( + media_url, media_id, + 'Download JSON metadata for format %s' % format_id) - url_info_dict = { - 'url': encode_data_uri(m3u8_data, 'application/vnd.apple.mpegurl'), - 'ext': determine_ext(dispatch[format_id][1]), - 'format_id': format_id, - 'protocol': 'm3u8', - } + req = self._request_webpage( + nodes_data['nodelist'][0]['location'], media_id, + note='Downloading m3u8 information for format %s' % format_id) - if format_id[-1:] == 'p': - url_info_dict['height'] = int_or_none(format_id[:-1]) + m3u8_data = self.decrypt_m3u8(req.read()) - urls.append(url_info_dict) + return { + 'hls': encode_data_uri(m3u8_data, 'application/vnd.apple.mpegurl'), + } + + extracted_formats = [] + formats = [] + for play_json, get_urls in ((play_json_h5, get_h5_urls), (play_json_flash, get_flash_urls)): + playurl = play_json['playurl'] + play_domain = playurl['domain'][0] + + for format_id, format_data in playurl.get('dispatch', []).items(): + if format_id in extracted_formats: + continue + extracted_formats.append(format_id) + + media_url = play_domain + format_data[0] + for protocol, format_url in get_urls(media_url, format_id).items(): + f = { + 'url': format_url, + 'ext': determine_ext(format_data[1]), + 'format_id': '%s-%s' % (protocol, format_id), + 'protocol': 'm3u8_native' if protocol == 'hls' else 'http', + 'quality': int_or_none(format_id), + } + + if format_id[-1:] == 'p': + f['height'] = int_or_none(format_id[:-1]) + + formats.append(f) + self._sort_formats(formats, ('height', 'quality', 'format_id')) publish_time = parse_iso8601(self._html_search_regex( r'发布时间 ([^<>]+) ', page, 'publish time', default=None), @@ -181,7 +226,7 @@ class LeIE(InfoExtractor): return { 'id': media_id, - 'formats': urls, + 'formats': formats, 'title': playurl['title'], 'thumbnail': playurl['pic'], 'description': description, From cedc70b29277c7ceebee4843bc2f13ebe88109b0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 9 Jul 2016 00:28:07 +0700 Subject: [PATCH 08/42] [facebook] Fix invalid video being extracted (Closes #9851) --- youtube_dl/extractor/facebook.py | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/facebook.py b/youtube_dl/extractor/facebook.py index f5d4f966a..0d43acc4a 100644 --- a/youtube_dl/extractor/facebook.py +++ b/youtube_dl/extractor/facebook.py @@ -219,12 +219,23 @@ class FacebookIE(InfoExtractor): BEFORE = '{swf.addParam(param[0], param[1]);});' AFTER = '.forEach(function(variable) {swf.addVariable(variable[0], variable[1]);});' - m = re.search(re.escape(BEFORE) + '(?:\n|\\\\n)(.*?)' + re.escape(AFTER), webpage) - if m: - swf_params = m.group(1).replace('\\\\', '\\').replace('\\"', '"') + PATTERN = re.escape(BEFORE) + '(?:\n|\\\\n)(.*?)' + re.escape(AFTER) + + for m in re.findall(PATTERN, webpage): + swf_params = m.replace('\\\\', '\\').replace('\\"', '"') data = dict(json.loads(swf_params)) params_raw = compat_urllib_parse_unquote(data['params']) - video_data = json.loads(params_raw)['video_data'] + video_data_candidate = json.loads(params_raw)['video_data'] + for _, f in video_data_candidate.items(): + if not f: + continue + if isinstance(f, dict): + f = [f] + if isinstance(f, list): + continue + if f[0].get('video_id') == video_id: + video_data = video_data_candidate + break def video_data_list2dict(video_data): ret = {} From 89e2fff2b7e4aeda92cfc811d395b68d18f2d85d Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Fri, 8 Jul 2016 20:15:21 +0100 Subject: [PATCH 09/42] [mgtv] pass geo verification headers for api request --- youtube_dl/extractor/mgtv.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/mgtv.py b/youtube_dl/extractor/mgtv.py index 9fbc74f5d..d970e94ec 100644 --- a/youtube_dl/extractor/mgtv.py +++ b/youtube_dl/extractor/mgtv.py @@ -26,7 +26,8 @@ class MGTVIE(InfoExtractor): video_id = self._match_id(url) api_data = self._download_json( 'http://v.api.mgtv.com/player/video', video_id, - query={'video_id': video_id})['data'] + query={'video_id': video_id}, + headers=self.geo_verification_headers())['data'] info = api_data['info'] formats = [] From 3fee7f636cb63e6cb785b07e4ffea3cc91981525 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?D=C3=A9stin=20Reed?= Date: Sat, 25 Jun 2016 19:01:47 +0200 Subject: [PATCH 10/42] [flipagram] Add extractor --- youtube_dl/extractor/common.py | 7 ++ youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/flipagram.py | 103 +++++++++++++++++++++++++++++ 3 files changed, 111 insertions(+) create mode 100644 youtube_dl/extractor/flipagram.py diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index be2b6ff66..b5fce5de2 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -837,6 +837,13 @@ class InfoExtractor(object): 'title': unescapeHTML(json_ld.get('headline')), 'description': unescapeHTML(json_ld.get('articleBody')), }) + elif item_type == 'VideoObject': + info.update({ + 'title': unescapeHTML(json_ld.get('name')), + 'description': unescapeHTML(json_ld.get('description')), + 'upload_date': unified_strdate(json_ld.get('upload_date')), + 'url': unescapeHTML(json_ld.get('contentUrl')), + }) return dict((k, v) for k, v in info.items() if v is not None) @staticmethod diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 12cc1b5f7..9f70ce752 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -256,6 +256,7 @@ from .fivemin import FiveMinIE from .fivetv import FiveTVIE from .fktv import FKTVIE from .flickr import FlickrIE +from .flipagram import FlipagramIE from .folketinget import FolketingetIE from .footyroom import FootyRoomIE from .formula1 import Formula1IE diff --git a/youtube_dl/extractor/flipagram.py b/youtube_dl/extractor/flipagram.py new file mode 100644 index 000000000..7143126b5 --- /dev/null +++ b/youtube_dl/extractor/flipagram.py @@ -0,0 +1,103 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor + +from ..utils import ( + int_or_none, + parse_iso8601, + unified_strdate, + unified_timestamp, +) + + +class FlipagramIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?flipagram\.com/f/(?P[^/?_]+)' + _TESTS = [{ + 'url': 'https://flipagram.com/f/myrWjW9RJw', + 'md5': '541988fb6c4c7c375215ea22a4a21841', + 'info_dict': { + 'id': 'myrWjW9RJw', + 'title': 'Flipagram by crystaldolce featuring King and Lionheart by Of Monsters and Men', + 'description': 'Herbie\'s first bannana🍌🐢🍌. #animals #pets #reptile #tortoise #sulcata #tort #justatreat #snacktime #bannanas #rescuepets #ofmonstersandmen @animals', + 'ext': 'mp4', + 'uploader': 'Crystal Dolce', + 'creator': 'Crystal Dolce', + 'uploader_id': 'crystaldolce', + } + }, { + 'url': 'https://flipagram.com/f/nyvTSJMKId', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + self.report_extraction(video_id) + user_data = self._parse_json(self._search_regex(r'window.reactH2O\s*=\s*({.+});', webpage, 'user data'), video_id) + content_data = self._search_json_ld(webpage, video_id) + + flipagram = user_data.get('flipagram', {}) + counts = flipagram.get('counts', {}) + user = flipagram.get('user', {}) + video = flipagram.get('video', {}) + + thumbnails = [] + for cover in flipagram.get('covers', []): + if not cover.get('url'): + continue + thumbnails.append({ + 'url': self._proto_relative_url(cover.get('url')), + 'width': int_or_none(cover.get('width')), + 'height': int_or_none(cover.get('height')), + }) + + # Note that this only retrieves comments that are initally loaded. + # For videos with large amounts of comments, most won't be retrieved. + comments = [] + for comment in user_data.get('comments', {}).get(video_id, {}).get('items', []): + text = comment.get('comment', []) + comments.append({ + 'author': comment.get('user', {}).get('name'), + 'author_id': comment.get('user', {}).get('username'), + 'id': comment.get('id'), + 'text': text[0] if text else '', + 'timestamp': unified_timestamp(comment.get('created', '')), + }) + + tags = [tag for item in flipagram['story'][1:] for tag in item] + + formats = [] + if flipagram.get('music', {}).get('track', {}).get('previewUrl', {}): + formats.append({ + 'url': flipagram.get('music').get('track').get('previewUrl'), + 'ext': 'm4a', + 'vcodec': 'none', + }) + + formats.append({ + 'url': video.get('url'), + 'ext': 'mp4', + 'width': int_or_none(video.get('width')), + 'height': int_or_none(video.get('height')), + 'filesize': int_or_none(video.get('size')), + }) + + return { + 'id': video_id, + 'title': content_data['title'], + 'formats': formats, + 'thumbnails': thumbnails, + 'description': content_data.get('description'), + 'uploader': user.get('name'), + 'creator': user.get('name'), + 'timestamp': parse_iso8601(flipagram.get('iso801Created')), + 'upload_date': unified_strdate(flipagram.get('created')), + 'uploader_id': user.get('username'), + 'view_count': int_or_none(counts.get('plays')), + 'repost_count': int_or_none(counts.get('reflips')), + 'comment_count': int_or_none(counts.get('comments')), + 'comments': comments, + 'tags': tags, + } From 2de624fdd5b2d94bcf548633d6fe1897ccb7cf46 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 9 Jul 2016 03:24:36 +0700 Subject: [PATCH 11/42] [extractor/common] Introduce filesize metafield for thumbnails --- youtube_dl/extractor/common.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index b5fce5de2..0cb5e5bb0 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -161,6 +161,7 @@ class InfoExtractor(object): * "height" (optional, int) * "resolution" (optional, string "{width}x{height"}, deprecated) + * "filesize" (optional, int) thumbnail: Full URL to a video thumbnail image. description: Full video description. uploader: Full name of the video uploader. From 6b3a3098b53519ba8052ad0572e1d559947cfdd8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 9 Jul 2016 03:27:11 +0700 Subject: [PATCH 12/42] [extractor/common] Extract more metadata for VideoObject in _json_ld --- youtube_dl/extractor/common.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 0cb5e5bb0..9ffe64d05 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -44,6 +44,7 @@ from ..utils import ( sanitized_Request, unescapeHTML, unified_strdate, + unified_timestamp, url_basename, xpath_element, xpath_text, @@ -840,10 +841,16 @@ class InfoExtractor(object): }) elif item_type == 'VideoObject': info.update({ + 'url': json_ld.get('contentUrl'), 'title': unescapeHTML(json_ld.get('name')), 'description': unescapeHTML(json_ld.get('description')), - 'upload_date': unified_strdate(json_ld.get('upload_date')), - 'url': unescapeHTML(json_ld.get('contentUrl')), + 'thumbnail': json_ld.get('thumbnailUrl'), + 'duration': parse_duration(json_ld.get('duration')), + 'timestamp': unified_timestamp(json_ld.get('uploadDate')), + 'filesize': float_or_none(json_ld.get('contentSize')), + 'tbr': int_or_none(json_ld.get('bitrate')), + 'width': int_or_none(json_ld.get('width')), + 'height': int_or_none(json_ld.get('height')), }) return dict((k, v) for k, v in info.items() if v is not None) From 95b31e266b930dc753b8bf5a1673ced9b50fd519 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 9 Jul 2016 03:28:04 +0700 Subject: [PATCH 13/42] [extractor/common] Add expected_type in json ld routines --- youtube_dl/extractor/common.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 9ffe64d05..816baa424 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -805,15 +805,17 @@ class InfoExtractor(object): return self._html_search_meta('twitter:player', html, 'twitter card player') - def _search_json_ld(self, html, video_id, **kwargs): + def _search_json_ld(self, html, video_id, expected_type=None, **kwargs): json_ld = self._search_regex( r'(?s)]+type=(["\'])application/ld\+json\1[^>]*>(?P.+?)', html, 'JSON-LD', group='json_ld', **kwargs) if not json_ld: return {} - return self._json_ld(json_ld, video_id, fatal=kwargs.get('fatal', True)) + return self._json_ld( + json_ld, video_id, fatal=kwargs.get('fatal', True), + expected_type=expected_type) - def _json_ld(self, json_ld, video_id, fatal=True): + def _json_ld(self, json_ld, video_id, fatal=True, expected_type=None): if isinstance(json_ld, compat_str): json_ld = self._parse_json(json_ld, video_id, fatal=fatal) if not json_ld: @@ -821,6 +823,8 @@ class InfoExtractor(object): info = {} if json_ld.get('@context') == 'http://schema.org': item_type = json_ld.get('@type') + if expected_type is not None and expected_type != item_type: + return info if item_type == 'TVEpisode': info.update({ 'episode': unescapeHTML(json_ld.get('name')), From 0de168f7ed2da440f6a1bcb614abd26ff73bb840 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 9 Jul 2016 03:29:07 +0700 Subject: [PATCH 14/42] [extractor/generic] Detect schema.org/VideoObject embeds --- youtube_dl/extractor/generic.py | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 31527d1c6..62da9bbc0 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -1313,6 +1313,23 @@ class GenericIE(InfoExtractor): }, 'add_ie': ['Kaltura'], }, + { + # TODO: find another test + # http://schema.org/VideoObject + # 'url': 'https://flipagram.com/f/nyvTSJMKId', + # 'md5': '888dcf08b7ea671381f00fab74692755', + # 'info_dict': { + # 'id': 'nyvTSJMKId', + # 'ext': 'mp4', + # 'title': 'Flipagram by sjuria101 featuring Midnight Memories by One Direction', + # 'description': '#love for cats.', + # 'timestamp': 1461244995, + # 'upload_date': '20160421', + # }, + # 'params': { + # 'force_generic_extractor': True, + # }, + } ] def report_following_redirect(self, new_url): @@ -2157,6 +2174,19 @@ class GenericIE(InfoExtractor): if embed_url: return self.url_result(embed_url) + # Looking for http://schema.org/VideoObject + json_ld = self._search_json_ld( + webpage, video_id, default=None, expected_type='VideoObject') + if json_ld and json_ld.get('url'): + info_dict.update({ + 'title': video_title or info_dict['title'], + 'description': video_description, + 'thumbnail': video_thumbnail, + 'age_limit': age_limit + }) + info_dict.update(json_ld) + return info_dict + def check_video(vurl): if YoutubeIE.suitable(vurl): return True From 0af985069b64ca7cdf0a55b721417e807760ab1e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 9 Jul 2016 03:31:17 +0700 Subject: [PATCH 15/42] [flipagram] Improve extraction (Closes #9898) --- youtube_dl/extractor/flipagram.py | 144 ++++++++++++++++-------------- 1 file changed, 78 insertions(+), 66 deletions(-) diff --git a/youtube_dl/extractor/flipagram.py b/youtube_dl/extractor/flipagram.py index 7143126b5..acb6133ff 100644 --- a/youtube_dl/extractor/flipagram.py +++ b/youtube_dl/extractor/flipagram.py @@ -2,102 +2,114 @@ from __future__ import unicode_literals from .common import InfoExtractor - +from ..compat import compat_str from ..utils import ( int_or_none, - parse_iso8601, - unified_strdate, + float_or_none, + try_get, unified_timestamp, ) class FlipagramIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?flipagram\.com/f/(?P[^/?_]+)' - _TESTS = [{ - 'url': 'https://flipagram.com/f/myrWjW9RJw', - 'md5': '541988fb6c4c7c375215ea22a4a21841', - 'info_dict': { - 'id': 'myrWjW9RJw', - 'title': 'Flipagram by crystaldolce featuring King and Lionheart by Of Monsters and Men', - 'description': 'Herbie\'s first bannana🍌🐢🍌. #animals #pets #reptile #tortoise #sulcata #tort #justatreat #snacktime #bannanas #rescuepets #ofmonstersandmen @animals', - 'ext': 'mp4', - 'uploader': 'Crystal Dolce', - 'creator': 'Crystal Dolce', - 'uploader_id': 'crystaldolce', - } - }, { + _VALID_URL = r'https?://(?:www\.)?flipagram\.com/f/(?P[^/?#&]+)' + _TEST = { 'url': 'https://flipagram.com/f/nyvTSJMKId', - 'only_matching': True, - }] + 'md5': '888dcf08b7ea671381f00fab74692755', + 'info_dict': { + 'id': 'nyvTSJMKId', + 'ext': 'mp4', + 'title': 'Flipagram by sjuria101 featuring Midnight Memories by One Direction', + 'description': 'md5:d55e32edc55261cae96a41fa85ff630e', + 'duration': 35.571, + 'timestamp': 1461244995, + 'upload_date': '20160421', + 'uploader': 'kitty juria', + 'uploader_id': 'sjuria101', + 'creator': 'kitty juria', + 'view_count': int, + 'like_count': int, + 'repost_count': int, + 'comment_count': int, + 'comments': list, + 'formats': 'mincount:2', + }, + } def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - self.report_extraction(video_id) - user_data = self._parse_json(self._search_regex(r'window.reactH2O\s*=\s*({.+});', webpage, 'user data'), video_id) - content_data = self._search_json_ld(webpage, video_id) + video_data = self._parse_json( + self._search_regex( + r'window\.reactH2O\s*=\s*({.+});', webpage, 'video data'), + video_id) - flipagram = user_data.get('flipagram', {}) - counts = flipagram.get('counts', {}) - user = flipagram.get('user', {}) - video = flipagram.get('video', {}) + flipagram = video_data['flipagram'] + video = flipagram['video'] - thumbnails = [] - for cover in flipagram.get('covers', []): - if not cover.get('url'): - continue - thumbnails.append({ - 'url': self._proto_relative_url(cover.get('url')), - 'width': int_or_none(cover.get('width')), - 'height': int_or_none(cover.get('height')), - }) + json_ld = self._search_json_ld(webpage, video_id, default=False) + title = json_ld.get('title') or flipagram['captionText'] + description = json_ld.get('description') or flipagram.get('captionText') - # Note that this only retrieves comments that are initally loaded. - # For videos with large amounts of comments, most won't be retrieved. - comments = [] - for comment in user_data.get('comments', {}).get(video_id, {}).get('items', []): - text = comment.get('comment', []) - comments.append({ - 'author': comment.get('user', {}).get('name'), - 'author_id': comment.get('user', {}).get('username'), - 'id': comment.get('id'), - 'text': text[0] if text else '', - 'timestamp': unified_timestamp(comment.get('created', '')), - }) + formats = [{ + 'url': video['url'], + 'width': int_or_none(video.get('width')), + 'height': int_or_none(video.get('height')), + 'filesize': int_or_none(video_data.get('size')), + }] - tags = [tag for item in flipagram['story'][1:] for tag in item] - - formats = [] - if flipagram.get('music', {}).get('track', {}).get('previewUrl', {}): + preview_url = try_get( + flipagram, lambda x: x['music']['track']['previewUrl'], compat_str) + if preview_url: formats.append({ - 'url': flipagram.get('music').get('track').get('previewUrl'), + 'url': preview_url, 'ext': 'm4a', 'vcodec': 'none', }) - formats.append({ - 'url': video.get('url'), - 'ext': 'mp4', - 'width': int_or_none(video.get('width')), - 'height': int_or_none(video.get('height')), - 'filesize': int_or_none(video.get('size')), - }) + self._sort_formats(formats) + + counts = flipagram.get('counts', {}) + user = flipagram.get('user', {}) + video_data = flipagram.get('video', {}) + + thumbnails = [{ + 'url': self._proto_relative_url(cover['url']), + 'width': int_or_none(cover.get('width')), + 'height': int_or_none(cover.get('height')), + 'filesize': int_or_none(cover.get('size')), + } for cover in flipagram.get('covers', []) if cover.get('url')] + + # Note that this only retrieves comments that are initally loaded. + # For videos with large amounts of comments, most won't be retrieved. + comments = [] + for comment in video_data.get('comments', {}).get(video_id, {}).get('items', []): + text = comment.get('comment') + if not text or not isinstance(text, list): + continue + comments.append({ + 'author': comment.get('user', {}).get('name'), + 'author_id': comment.get('user', {}).get('username'), + 'id': comment.get('id'), + 'text': text[0], + 'timestamp': unified_timestamp(comment.get('created')), + }) return { 'id': video_id, - 'title': content_data['title'], - 'formats': formats, + 'title': title, + 'description': description, + 'duration': float_or_none(flipagram.get('duration'), 1000), 'thumbnails': thumbnails, - 'description': content_data.get('description'), + 'timestamp': unified_timestamp(flipagram.get('iso8601Created')), 'uploader': user.get('name'), - 'creator': user.get('name'), - 'timestamp': parse_iso8601(flipagram.get('iso801Created')), - 'upload_date': unified_strdate(flipagram.get('created')), 'uploader_id': user.get('username'), + 'creator': user.get('name'), 'view_count': int_or_none(counts.get('plays')), + 'like_count': int_or_none(counts.get('likes')), 'repost_count': int_or_none(counts.get('reflips')), 'comment_count': int_or_none(counts.get('comments')), 'comments': comments, - 'tags': tags, + 'formats': formats, } From 1f7258a367bfcc0d60441e38d25373a6ada6b45e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 9 Jul 2016 06:57:04 +0700 Subject: [PATCH 16/42] [animeondemand] Add support for full length films (Closes #10031) --- youtube_dl/extractor/animeondemand.py | 108 ++++++++++++++++---------- 1 file changed, 68 insertions(+), 40 deletions(-) diff --git a/youtube_dl/extractor/animeondemand.py b/youtube_dl/extractor/animeondemand.py index 9b01e38f5..1f044e20b 100644 --- a/youtube_dl/extractor/animeondemand.py +++ b/youtube_dl/extractor/animeondemand.py @@ -22,6 +22,7 @@ class AnimeOnDemandIE(InfoExtractor): _APPLY_HTML5_URL = 'https://www.anime-on-demand.de/html5apply' _NETRC_MACHINE = 'animeondemand' _TESTS = [{ + # jap, OmU 'url': 'https://www.anime-on-demand.de/anime/161', 'info_dict': { 'id': '161', @@ -30,17 +31,21 @@ class AnimeOnDemandIE(InfoExtractor): }, 'playlist_mincount': 4, }, { - # Film wording is used instead of Episode + # Film wording is used instead of Episode, ger/jap, Dub/OmU 'url': 'https://www.anime-on-demand.de/anime/39', 'only_matching': True, }, { - # Episodes without titles + # Episodes without titles, jap, OmU 'url': 'https://www.anime-on-demand.de/anime/162', 'only_matching': True, }, { # ger/jap, Dub/OmU, account required 'url': 'https://www.anime-on-demand.de/anime/169', 'only_matching': True, + }, { + # Full length film, non-series, ger/jap, Dub/OmU, account required + 'url': 'https://www.anime-on-demand.de/anime/185', + 'only_matching': True, }] def _login(self): @@ -110,35 +115,12 @@ class AnimeOnDemandIE(InfoExtractor): entries = [] - for num, episode_html in enumerate(re.findall( - r'(?s)]+class="episodebox-title".+?>Episodeninhalt<', webpage), 1): - episodebox_title = self._search_regex( - (r'class="episodebox-title"[^>]+title=(["\'])(?P.+?)\1', - r'class="episodebox-title"[^>]+>(?P<title>.+?)<'), - episode_html, 'episodebox title', default=None, group='title') - if not episodebox_title: - continue - - episode_number = int(self._search_regex( - r'(?:Episode|Film)\s*(\d+)', - episodebox_title, 'episode number', default=num)) - episode_title = self._search_regex( - r'(?:Episode|Film)\s*\d+\s*-\s*(.+)', - episodebox_title, 'episode title', default=None) - - video_id = 'episode-%d' % episode_number - - common_info = { - 'id': video_id, - 'series': anime_title, - 'episode': episode_title, - 'episode_number': episode_number, - } - + def extract_info(html, video_id): + title, description = [None] * 2 formats = [] for input_ in re.findall( - r'<input[^>]+class=["\'].*?streamstarter_html5[^>]+>', episode_html): + r'<input[^>]+class=["\'].*?streamstarter_html5[^>]+>', html): attributes = extract_attributes(input_) playlist_urls = [] for playlist_key in ('data-playlist', 'data-otherplaylist'): @@ -215,28 +197,74 @@ class AnimeOnDemandIE(InfoExtractor): }) formats.extend(file_formats) - if formats: - self._sort_formats(formats) + return { + 'title': title, + 'description': description, + 'formats': formats, + } + + def extract_entries(html, video_id, common_info): + info = extract_info(html, video_id) + + if info['formats']: + self._sort_formats(info['formats']) f = common_info.copy() - f.update({ - 'title': title, - 'description': description, - 'formats': formats, - }) + f.update(info) entries.append(f) - # Extract teaser only when full episode is not available - if not formats: + # Extract teaser/trailer only when full episode is not available + if not info['formats']: m = re.search( - r'data-dialog-header=(["\'])(?P<title>.+?)\1[^>]+href=(["\'])(?P<href>.+?)\3[^>]*>Teaser<', - episode_html) + r'data-dialog-header=(["\'])(?P<title>.+?)\1[^>]+href=(["\'])(?P<href>.+?)\3[^>]*>(?P<kind>Teaser|Trailer)<', + html) if m: f = common_info.copy() f.update({ - 'id': '%s-teaser' % f['id'], + 'id': '%s-%s' % (f['id'], m.group('kind').lower()), 'title': m.group('title'), 'url': compat_urlparse.urljoin(url, m.group('href')), }) entries.append(f) + def extract_episodes(html): + for num, episode_html in enumerate(re.findall( + r'(?s)<h3[^>]+class="episodebox-title".+?>Episodeninhalt<', html), 1): + episodebox_title = self._search_regex( + (r'class="episodebox-title"[^>]+title=(["\'])(?P<title>.+?)\1', + r'class="episodebox-title"[^>]+>(?P<title>.+?)<'), + episode_html, 'episodebox title', default=None, group='title') + if not episodebox_title: + continue + + episode_number = int(self._search_regex( + r'(?:Episode|Film)\s*(\d+)', + episodebox_title, 'episode number', default=num)) + episode_title = self._search_regex( + r'(?:Episode|Film)\s*\d+\s*-\s*(.+)', + episodebox_title, 'episode title', default=None) + + video_id = 'episode-%d' % episode_number + + common_info = { + 'id': video_id, + 'series': anime_title, + 'episode': episode_title, + 'episode_number': episode_number, + } + + extract_entries(episode_html, video_id, common_info) + + def extract_film(html, video_id): + common_info = { + 'id': anime_id, + 'title': anime_title, + 'description': anime_description, + } + extract_entries(html, video_id, common_info) + + extract_episodes(webpage) + + if not entries: + extract_film(webpage, anime_id) + return self.playlist_result(entries, anime_id, anime_title, anime_description) From 1251565ee0efd71f98d77f1eeefe3f3e38ec6f43 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 9 Jul 2016 07:12:52 +0700 Subject: [PATCH 17/42] [options] Rollback old behavior for configuratio files' encoding Until agreed with some solution --- youtube_dl/options.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/youtube_dl/options.py b/youtube_dl/options.py index 5302b67cc..c4a85b2c0 100644 --- a/youtube_dl/options.py +++ b/youtube_dl/options.py @@ -26,7 +26,11 @@ def parseOpts(overrideArguments=None): except IOError: return default # silently skip if file is not present try: - res = compat_shlex_split(optionf.read(), comments=True) + # FIXME: https://github.com/rg3/youtube-dl/commit/dfe5fa49aed02cf36ba9f743b11b0903554b5e56 + contents = optionf.read() + if sys.version_info < (3,): + contents = contents.decode(preferredencoding()) + res = compat_shlex_split(contents, comments=True) finally: optionf.close() return res From 9e4f5dc1e920b181d7e4a4ae824f86db8bc3a8e3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 9 Jul 2016 07:13:32 +0700 Subject: [PATCH 18/42] [animeondemand] Pass num for episode based videos --- youtube_dl/extractor/animeondemand.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/animeondemand.py b/youtube_dl/extractor/animeondemand.py index 1f044e20b..159c6ef5a 100644 --- a/youtube_dl/extractor/animeondemand.py +++ b/youtube_dl/extractor/animeondemand.py @@ -115,7 +115,7 @@ class AnimeOnDemandIE(InfoExtractor): entries = [] - def extract_info(html, video_id): + def extract_info(html, video_id, num=None): title, description = [None] * 2 formats = [] @@ -143,7 +143,7 @@ class AnimeOnDemandIE(InfoExtractor): format_id_list.append(lang) if kind: format_id_list.append(kind) - if not format_id_list: + if not format_id_list and num is not None: format_id_list.append(compat_str(num)) format_id = '-'.join(format_id_list) format_note = ', '.join(filter(None, (kind, lang_note))) @@ -203,8 +203,8 @@ class AnimeOnDemandIE(InfoExtractor): 'formats': formats, } - def extract_entries(html, video_id, common_info): - info = extract_info(html, video_id) + def extract_entries(html, video_id, common_info, num): + info = extract_info(html, video_id, num) if info['formats']: self._sort_formats(info['formats']) From d417fd88d09846edf0c2bed65d7d074c62c0c51b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 9 Jul 2016 07:16:47 +0700 Subject: [PATCH 19/42] release 2016.07.09 --- .github/ISSUE_TEMPLATE.md | 6 +++--- docs/supportedsites.md | 1 + youtube_dl/version.py | 2 +- 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index c65462ba4..c2f352f41 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.07.07*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.07.07** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.07.09*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.07.09** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v <your command line> [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2016.07.07 +[debug] youtube-dl version 2016.07.09 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 9174c6f89..d2a2ef0c4 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -224,6 +224,7 @@ - **Firstpost** - **FiveTV** - **Flickr** + - **Flipagram** - **Folketinget**: Folketinget (ft.dk; Danish parliament) - **FootyRoom** - **Formula1** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 6396ad4c9..1dd2c3300 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2016.07.07' +__version__ = '2016.07.09' From 6e6b70d65f0681317c425bfe1e157f3474afbbe8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 9 Jul 2016 08:32:55 +0700 Subject: [PATCH 20/42] [extractor/generic] Properly comment out a test --- youtube_dl/extractor/generic.py | 34 ++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 62da9bbc0..4efdf146e 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -1313,23 +1313,23 @@ class GenericIE(InfoExtractor): }, 'add_ie': ['Kaltura'], }, - { - # TODO: find another test - # http://schema.org/VideoObject - # 'url': 'https://flipagram.com/f/nyvTSJMKId', - # 'md5': '888dcf08b7ea671381f00fab74692755', - # 'info_dict': { - # 'id': 'nyvTSJMKId', - # 'ext': 'mp4', - # 'title': 'Flipagram by sjuria101 featuring Midnight Memories by One Direction', - # 'description': '#love for cats.', - # 'timestamp': 1461244995, - # 'upload_date': '20160421', - # }, - # 'params': { - # 'force_generic_extractor': True, - # }, - } + # { + # # TODO: find another test + # # http://schema.org/VideoObject + # 'url': 'https://flipagram.com/f/nyvTSJMKId', + # 'md5': '888dcf08b7ea671381f00fab74692755', + # 'info_dict': { + # 'id': 'nyvTSJMKId', + # 'ext': 'mp4', + # 'title': 'Flipagram by sjuria101 featuring Midnight Memories by One Direction', + # 'description': '#love for cats.', + # 'timestamp': 1461244995, + # 'upload_date': '20160421', + # }, + # 'params': { + # 'force_generic_extractor': True, + # }, + # } ] def report_following_redirect(self, new_url): From 9558dcec9c7806c811f4fe8e7758977eaa01a702 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 9 Jul 2016 08:37:02 +0700 Subject: [PATCH 21/42] [youtube:user] Preserve user/c path segment --- youtube_dl/extractor/youtube.py | 25 ++++++++++++++++++++++--- 1 file changed, 22 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 69603c1f8..8aa7dfc41 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1978,10 +1978,13 @@ class YoutubeChannelIE(YoutubePlaylistBaseInfoExtractor): return (False if YoutubePlaylistsIE.suitable(url) or YoutubeLiveIE.suitable(url) else super(YoutubeChannelIE, cls).suitable(url)) + def _build_template_url(self, url, channel_id): + return self._TEMPLATE_URL % channel_id + def _real_extract(self, url): channel_id = self._match_id(url) - url = self._TEMPLATE_URL % channel_id + url = self._build_template_url(url, channel_id) # Channel by page listing is restricted to 35 pages of 30 items, i.e. 1050 videos total (see #5778) # Workaround by extracting as a playlist if managed to obtain channel playlist URL @@ -2038,8 +2041,8 @@ class YoutubeChannelIE(YoutubePlaylistBaseInfoExtractor): class YoutubeUserIE(YoutubeChannelIE): IE_DESC = 'YouTube.com user videos (URL or "ytuser" keyword)' - _VALID_URL = r'(?:(?:https?://(?:\w+\.)?youtube\.com/(?:user/|c/)?(?!(?:attribution_link|watch|results)(?:$|[^a-z_A-Z0-9-])))|ytuser:)(?!feed/)(?P<id>[A-Za-z0-9_-]+)' - _TEMPLATE_URL = 'https://www.youtube.com/user/%s/videos' + _VALID_URL = r'(?:(?:https?://(?:\w+\.)?youtube\.com/(?:(?P<user>user|c)/)?(?!(?:attribution_link|watch|results)(?:$|[^a-z_A-Z0-9-])))|ytuser:)(?!feed/)(?P<id>[A-Za-z0-9_-]+)' + _TEMPLATE_URL = 'https://www.youtube.com/%s/%s/videos' IE_NAME = 'youtube:user' _TESTS = [{ @@ -2049,12 +2052,24 @@ class YoutubeUserIE(YoutubeChannelIE): 'id': 'UUfX55Sx5hEFjoC3cNs6mCUQ', 'title': 'Uploads from The Linux Foundation', } + }, { + # Only available via https://www.youtube.com/c/12minuteathlete/videos + # but not https://www.youtube.com/user/12minuteathlete/videos + 'url': 'https://www.youtube.com/c/12minuteathlete/videos', + 'playlist_mincount': 249, + 'info_dict': { + 'id': 'UUVjM-zV6_opMDx7WYxnjZiQ', + 'title': 'Uploads from 12 Minute Athlete', + } }, { 'url': 'ytuser:phihag', 'only_matching': True, }, { 'url': 'https://www.youtube.com/c/gametrailers', 'only_matching': True, + }, { + 'url': 'https://www.youtube.com/gametrailers', + 'only_matching': True, }, { # This channel is not available. 'url': 'https://www.youtube.com/user/kananishinoSMEJ/videos', @@ -2071,6 +2086,10 @@ class YoutubeUserIE(YoutubeChannelIE): else: return super(YoutubeUserIE, cls).suitable(url) + def _build_template_url(self, url, channel_id): + mobj = re.match(self._VALID_URL, url) + return self._TEMPLATE_URL % (mobj.group('user') or 'user', mobj.group('id')) + class YoutubeLiveIE(YoutubeBaseInfoExtractor): IE_DESC = 'YouTube.com live streams' From 31eeab9f412d17fc7c455340e46cfcc9c91c3c99 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Sat, 9 Jul 2016 03:18:45 +0100 Subject: [PATCH 22/42] [ard] fix f4m extraction and skip tests with 404 errors --- youtube_dl/extractor/ard.py | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/ard.py b/youtube_dl/extractor/ard.py index fd45b3e42..13a06396d 100644 --- a/youtube_dl/extractor/ard.py +++ b/youtube_dl/extractor/ard.py @@ -13,6 +13,7 @@ from ..utils import ( parse_duration, unified_strdate, xpath_text, + update_url_query, ) from ..compat import compat_etree_fromstring @@ -34,6 +35,7 @@ class ARDMediathekIE(InfoExtractor): # m3u8 download 'skip_download': True, }, + 'skip': 'HTTP Error 404: Not Found', }, { 'url': 'http://www.ardmediathek.de/tv/Tatort/Tatort-Scheinwelten-H%C3%B6rfassung-Video/Das-Erste/Video?documentId=29522730&bcastId=602916', 'md5': 'f4d98b10759ac06c0072bbcd1f0b9e3e', @@ -44,6 +46,7 @@ class ARDMediathekIE(InfoExtractor): 'description': 'md5:196392e79876d0ac94c94e8cdb2875f1', 'duration': 5252, }, + 'skip': 'HTTP Error 404: Not Found', }, { # audio 'url': 'http://www.ardmediathek.de/tv/WDR-H%C3%B6rspiel-Speicher/Tod-eines-Fu%C3%9Fballers/WDR-3/Audio-Podcast?documentId=28488308&bcastId=23074086', @@ -55,6 +58,7 @@ class ARDMediathekIE(InfoExtractor): 'description': 'md5:f6e39f3461f0e1f54bfa48c8875c86ef', 'duration': 3240, }, + 'skip': 'HTTP Error 404: Not Found', }, { 'url': 'http://mediathek.daserste.de/sendungen_a-z/328454_anne-will/22429276_vertrauen-ist-gut-spionieren-ist-besser-geht', 'only_matching': True, @@ -113,11 +117,14 @@ class ARDMediathekIE(InfoExtractor): continue if ext == 'f4m': formats.extend(self._extract_f4m_formats( - stream_url + '?hdcore=3.1.1&plugin=aasp-3.1.1.69.124', - video_id, preference=-1, f4m_id='hds', fatal=False)) + update_url_query(stream_url, { + 'hdcore': '3.1.1', + 'plugin': 'aasp-3.1.1.69.124' + }), + video_id, f4m_id='hds', fatal=False)) elif ext == 'm3u8': formats.extend(self._extract_m3u8_formats( - stream_url, video_id, 'mp4', preference=1, m3u8_id='hls', fatal=False)) + stream_url, video_id, 'mp4', m3u8_id='hls', fatal=False)) else: if server and server.startswith('rtmp'): f = { @@ -231,7 +238,8 @@ class ARDIE(InfoExtractor): 'title': 'Die Story im Ersten: Mission unter falscher Flagge', 'upload_date': '20140804', 'thumbnail': 're:^https?://.*\.jpg$', - } + }, + 'skip': 'HTTP Error 404: Not Found', } def _real_extract(self, url): From 8cc9b4016d6ae33d8f6b42b7096e8042e7915003 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Sat, 9 Jul 2016 03:21:50 +0100 Subject: [PATCH 23/42] [srmediathek] extend _VALID_URL(closes #9373) --- youtube_dl/extractor/srmediathek.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/srmediathek.py b/youtube_dl/extractor/srmediathek.py index a2569dfba..409d50304 100644 --- a/youtube_dl/extractor/srmediathek.py +++ b/youtube_dl/extractor/srmediathek.py @@ -11,7 +11,7 @@ from ..utils import ( class SRMediathekIE(ARDMediathekIE): IE_NAME = 'sr:mediathek' IE_DESC = 'Saarländischer Rundfunk' - _VALID_URL = r'https?://sr-mediathek\.sr-online\.de/index\.php\?.*?&id=(?P<id>[0-9]+)' + _VALID_URL = r'https?://sr-mediathek(?:\.sr-online)?\.de/index\.php\?.*?&id=(?P<id>[0-9]+)' _TESTS = [{ 'url': 'http://sr-mediathek.sr-online.de/index.php?seite=7&id=28455', @@ -35,7 +35,9 @@ class SRMediathekIE(ARDMediathekIE): # m3u8 download 'skip_download': True, }, - 'expected_warnings': ['Unable to download f4m manifest'] + }, { + 'url': 'http://sr-mediathek.de/index.php?seite=7&id=7480', + 'only_matching': True, }] def _real_extract(self, url): From ce43100a0143837432c52fcd156068c3464c4303 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 9 Jul 2016 10:06:40 +0700 Subject: [PATCH 24/42] release 2016.07.09.1 --- .github/ISSUE_TEMPLATE.md | 6 +++--- youtube_dl/version.py | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index c2f352f41..a1b5b0baa 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.07.09*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.07.09** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.07.09.1*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.07.09.1** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v <your command line> [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2016.07.09 +[debug] youtube-dl version 2016.07.09.1 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 1dd2c3300..3e45977c9 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2016.07.09' +__version__ = '2016.07.09.1' From 0ece114b7b3c9f7277adbd77de17534e39137675 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sat, 9 Jul 2016 14:38:27 +0800 Subject: [PATCH 25/42] [vimeo] Recognize non-standard embeds (#1638) --- youtube_dl/extractor/vimeo.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py index d9c9852d4..7e854f326 100644 --- a/youtube_dl/extractor/vimeo.py +++ b/youtube_dl/extractor/vimeo.py @@ -364,6 +364,11 @@ class VimeoIE(VimeoBaseInfoExtractor): r'<embed[^>]+?src="((?:https?:)?//(?:www\.)?vimeo\.com/moogaloop\.swf.+?)"', webpage) if mobj: return mobj.group(1) + # Look more for non-standard embedded Vimeo player + mobj = re.search( + r'<video[^>]+src=(?P<q1>[\'"])(?P<url>(?:https?:)?//(?:www\.)?vimeo\.com/[0-9]+)(?P=q1)', webpage) + if mobj: + return mobj.group('url') def _verify_player_video_password(self, url, video_id): password = self._downloader.params.get('videopassword') From c03adf90bd92ceba55f99cf8f3b61e6199166486 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sat, 9 Jul 2016 14:39:01 +0800 Subject: [PATCH 26/42] [generic] Add the test. Closes #1638 --- youtube_dl/extractor/generic.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 4efdf146e..cddd1a817 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -1313,6 +1313,21 @@ class GenericIE(InfoExtractor): }, 'add_ie': ['Kaltura'], }, + { + # Non-standard Vimeo embed + 'url': 'https://openclassrooms.com/courses/understanding-the-web', + 'md5': '64d86f1c7d369afd9a78b38cbb88d80a', + 'info_dict': { + 'id': '148867247', + 'ext': 'mp4', + 'title': 'Understanding the web - Teaser', + 'description': 'This is "Understanding the web - Teaser" by openclassrooms on Vimeo, the home for high quality videos and the people who love them.', + 'upload_date': '20151214', + 'uploader': 'OpenClassrooms', + 'uploader_id': 'openclassrooms', + }, + 'add_ie': ['Vimeo'], + }, # { # # TODO: find another test # # http://schema.org/VideoObject From 6daf34a0457cdb3b657cc01997ce5197dab91047 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 9 Jul 2016 21:25:07 +0700 Subject: [PATCH 27/42] [facebook] Fix typo and break when found video_data (Closes #10048) --- youtube_dl/extractor/facebook.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/facebook.py b/youtube_dl/extractor/facebook.py index 0d43acc4a..cdb093262 100644 --- a/youtube_dl/extractor/facebook.py +++ b/youtube_dl/extractor/facebook.py @@ -231,11 +231,13 @@ class FacebookIE(InfoExtractor): continue if isinstance(f, dict): f = [f] - if isinstance(f, list): + if not isinstance(f, list): continue if f[0].get('video_id') == video_id: video_data = video_data_candidate break + if video_data: + break def video_data_list2dict(video_data): ret = {} From 61a98b862320801ad2ae9d6cb922d9575ead5276 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 9 Jul 2016 21:29:11 +0700 Subject: [PATCH 28/42] [lynda] Remove md5 from test (Closes #10047) --- youtube_dl/extractor/lynda.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/lynda.py b/youtube_dl/extractor/lynda.py index 1237e1573..a98c4c530 100644 --- a/youtube_dl/extractor/lynda.py +++ b/youtube_dl/extractor/lynda.py @@ -100,7 +100,7 @@ class LyndaIE(LyndaBaseIE): _TESTS = [{ 'url': 'http://www.lynda.com/Bootstrap-tutorials/Using-exercise-files/110885/114408-4.html', - 'md5': '679734f6786145da3546585de9a356be', + # md5 is unstable 'info_dict': { 'id': '114408', 'ext': 'mp4', From ab52bb5137faf1cb5595b6777adf721eec6af78a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 9 Jul 2016 22:20:34 +0700 Subject: [PATCH 29/42] [animeondemand] Fix typo --- youtube_dl/extractor/animeondemand.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/animeondemand.py b/youtube_dl/extractor/animeondemand.py index 159c6ef5a..9e28f2579 100644 --- a/youtube_dl/extractor/animeondemand.py +++ b/youtube_dl/extractor/animeondemand.py @@ -203,7 +203,7 @@ class AnimeOnDemandIE(InfoExtractor): 'formats': formats, } - def extract_entries(html, video_id, common_info, num): + def extract_entries(html, video_id, common_info, num=None): info = extract_info(html, video_id, num) if info['formats']: From 0286b85c79c278375cc5e71d3199bdc8284a5a11 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 9 Jul 2016 22:22:24 +0700 Subject: [PATCH 30/42] release 2016.07.09.2 --- .github/ISSUE_TEMPLATE.md | 6 +++--- youtube_dl/version.py | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index a1b5b0baa..1a3a50eb2 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.07.09.1*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.07.09.1** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.07.09.2*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.07.09.2** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v <your command line> [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2016.07.09.1 +[debug] youtube-dl version 2016.07.09.2 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 3e45977c9..728ad2d50 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2016.07.09.1' +__version__ = '2016.07.09.2' From 3121b25639a06cc9cad48fad0ce222482a58b216 Mon Sep 17 00:00:00 2001 From: Nehal Patel <nehalvpatels+github@gmail.com> Date: Wed, 22 Jun 2016 02:58:42 -0500 Subject: [PATCH 31/42] [roosterteeth] Add extractor --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/roosterteeth.py | 112 +++++++++++++++++++++++++++ 2 files changed, 113 insertions(+) create mode 100644 youtube_dl/extractor/roosterteeth.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 9f70ce752..864c9af68 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -680,6 +680,7 @@ from .rice import RICEIE from .ringtv import RingTVIE from .ro220 import Ro220IE from .rockstargames import RockstarGamesIE +from .roosterteeth import RoosterTeethIE from .rottentomatoes import RottenTomatoesIE from .roxwel import RoxwelIE from .rtbf import RTBFIE diff --git a/youtube_dl/extractor/roosterteeth.py b/youtube_dl/extractor/roosterteeth.py new file mode 100644 index 000000000..4053747d3 --- /dev/null +++ b/youtube_dl/extractor/roosterteeth.py @@ -0,0 +1,112 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( + ExtractorError, + urlencode_postdata, +) + + +class RoosterTeethIE(InfoExtractor): + _VALID_URL = r'https?://(?:.+?\.)?roosterteeth\.com/episode/(?P<id>[^/?#&]+)' + _LOGIN_URL = 'https://roosterteeth.com/login' + _NETRC_MACHINE = 'roosterteeth' + _TESTS = [{ + 'url': 'http://roosterteeth.com/episode/million-dollars-but-season-2-million-dollars-but-the-game-announcement', + 'info_dict': { + 'id': '26576', + 'ext': 'mp4', + 'title': 'Million Dollars, But... The Game Announcement', + 'thumbnail': 're:^https?://.*\.png$', + 'description': 'Introducing Million Dollars, But... The Game! Available for pre-order now at www.MDBGame.com ', + 'creator': 'Rooster Teeth', + 'series': 'Million Dollars, But...', + 'episode': 'Million Dollars, But... The Game Announcement', + 'episode_id': '26576', + }, + 'params': { + 'skip_download': True, # m3u8 downloads + }, + }, { + 'url': 'http://achievementhunter.roosterteeth.com/episode/off-topic-the-achievement-hunter-podcast-2016-i-didn-t-think-it-would-pass-31', + 'only_matching': True, + }, { + 'url': 'http://funhaus.roosterteeth.com/episode/funhaus-shorts-2016-austin-sucks-funhaus-shorts', + 'only_matching': True, + }, { + 'url': 'http://screwattack.roosterteeth.com/episode/death-battle-season-3-mewtwo-vs-shadow', + 'only_matching': True, + }, { + 'url': 'http://theknow.roosterteeth.com/episode/the-know-game-news-season-1-boring-steam-sales-are-better', + 'only_matching': True, + }] + + def _login(self): + (username, password) = self._get_login_info() + if username is None or password is None: + return False + + # token is required to authenticate request + login_page = self._download_webpage(self._LOGIN_URL, None, 'Getting login token', 'Unable to get login token') + + login_form = self._hidden_inputs(login_page) + login_form.update({ + 'username': username, + 'password': password, + }) + login_payload = urlencode_postdata(login_form) + + # required for proper responses + login_headers = { + 'Referer': self._LOGIN_URL, + } + + login_request = self._download_webpage( + self._LOGIN_URL, None, + note='Logging in as %s' % username, + data=login_payload, + headers=login_headers) + + if 'Authentication failed' in login_request: + raise ExtractorError( + 'Login failed (invalid username/password)', expected=True) + + def _real_initialize(self): + self._login() + + def _real_extract(self, url): + match_id = self._match_id(url) + webpage = self._download_webpage(url, match_id) + + episode_id = self._html_search_regex(r"commentControls\('#comment-([0-9]+)'\)", webpage, 'episode id', match_id, False) + + self.report_extraction(episode_id) + + title = self._html_search_regex(r'<title>([^<]+)', webpage, 'episode title', self._og_search_title(webpage), False) + thumbnail = self._og_search_thumbnail(webpage) + description = self._og_search_description(webpage) + creator = self._html_search_regex(r'

Latest (.+) Gear

', webpage, 'site', 'Rooster Teeth', False) + series = self._html_search_regex(r'

More ([^<]+)

', webpage, 'series', fatal=False) + episode = self._html_search_regex(r'([^<]+)', webpage, 'episode title', fatal=False) + + if '
' in webpage: + self.raise_login_required('%s is only available for FIRST members' % title) + + if '
' in webpage: + self.raise_login_required('%s is not available yet' % title) + + formats = self._extract_m3u8_formats(self._html_search_regex(r"file: '(.+?)m3u8'", webpage, 'm3u8 url') + 'm3u8', episode_id, ext='mp4') + self._sort_formats(formats) + + return { + 'id': episode_id, + 'title': title, + 'formats': formats, + 'thumbnail': thumbnail, + 'description': description, + 'creator': creator, + 'series': series, + 'episode': episode, + 'episode_id': episode_id, + } From 865b0872249e0d402244b4c72e9f79dc6415c926 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 10 Jul 2016 01:28:28 +0700 Subject: [PATCH 32/42] [roosterteeth] Improve (Closes #9864) --- youtube_dl/extractor/roosterteeth.py | 120 +++++++++++++++++---------- 1 file changed, 78 insertions(+), 42 deletions(-) diff --git a/youtube_dl/extractor/roosterteeth.py b/youtube_dl/extractor/roosterteeth.py index 4053747d3..f5b2f560c 100644 --- a/youtube_dl/extractor/roosterteeth.py +++ b/youtube_dl/extractor/roosterteeth.py @@ -1,9 +1,14 @@ # coding: utf-8 from __future__ import unicode_literals +import re + from .common import InfoExtractor from ..utils import ( ExtractorError, + int_or_none, + strip_or_none, + unescapeHTML, urlencode_postdata, ) @@ -14,19 +19,17 @@ class RoosterTeethIE(InfoExtractor): _NETRC_MACHINE = 'roosterteeth' _TESTS = [{ 'url': 'http://roosterteeth.com/episode/million-dollars-but-season-2-million-dollars-but-the-game-announcement', + 'md5': 'e2bd7764732d785ef797700a2489f212', 'info_dict': { 'id': '26576', + 'display_id': 'million-dollars-but-season-2-million-dollars-but-the-game-announcement', 'ext': 'mp4', - 'title': 'Million Dollars, But... The Game Announcement', + 'title': 'Million Dollars, But...: Million Dollars, But... The Game Announcement', + 'description': 'md5:0cc3b21986d54ed815f5faeccd9a9ca5', 'thumbnail': 're:^https?://.*\.png$', - 'description': 'Introducing Million Dollars, But... The Game! Available for pre-order now at www.MDBGame.com ', - 'creator': 'Rooster Teeth', 'series': 'Million Dollars, But...', 'episode': 'Million Dollars, But... The Game Announcement', - 'episode_id': '26576', - }, - 'params': { - 'skip_download': True, # m3u8 downloads + 'comment_count': int, }, }, { 'url': 'http://achievementhunter.roosterteeth.com/episode/off-topic-the-achievement-hunter-podcast-2016-i-didn-t-think-it-would-pass-31', @@ -40,73 +43,106 @@ class RoosterTeethIE(InfoExtractor): }, { 'url': 'http://theknow.roosterteeth.com/episode/the-know-game-news-season-1-boring-steam-sales-are-better', 'only_matching': True, + }, { + # only available for FIRST members + 'url': 'http://roosterteeth.com/episode/rt-docs-the-world-s-greatest-head-massage-the-world-s-greatest-head-massage-an-asmr-journey-part-one', + 'only_matching': True, }] def _login(self): (username, password) = self._get_login_info() - if username is None or password is None: - return False + if username is None: + return - # token is required to authenticate request - login_page = self._download_webpage(self._LOGIN_URL, None, 'Getting login token', 'Unable to get login token') + login_page = self._download_webpage( + self._LOGIN_URL, None, + note='Downloading login page', + errnote='Unable to download login page') login_form = self._hidden_inputs(login_page) + login_form.update({ 'username': username, 'password': password, }) - login_payload = urlencode_postdata(login_form) - - # required for proper responses - login_headers = { - 'Referer': self._LOGIN_URL, - } login_request = self._download_webpage( self._LOGIN_URL, None, note='Logging in as %s' % username, - data=login_payload, - headers=login_headers) + data=urlencode_postdata(login_form), + headers={ + 'Referer': self._LOGIN_URL, + }) - if 'Authentication failed' in login_request: - raise ExtractorError( - 'Login failed (invalid username/password)', expected=True) + if not any(re.search(p, login_request) for p in ( + r'href=["\']https?://(?:www\.)?roosterteeth\.com/logout"', + r'>Sign Out<')): + error = self._html_search_regex( + r'(?s)]+class=(["\']).*?\balert-danger\b.*?\1[^>]*>(?:\s*]*>.*?)?(?P.+?)
', + login_request, 'alert', default=None, group='error') + if error: + raise ExtractorError('Unable to login: %s' % error, expected=True) + raise ExtractorError('Unable to log in') def _real_initialize(self): self._login() def _real_extract(self, url): - match_id = self._match_id(url) - webpage = self._download_webpage(url, match_id) + display_id = self._match_id(url) - episode_id = self._html_search_regex(r"commentControls\('#comment-([0-9]+)'\)", webpage, 'episode id', match_id, False) + webpage = self._download_webpage(url, display_id) - self.report_extraction(episode_id) + episode = strip_or_none(unescapeHTML(self._search_regex( + (r'videoTitle\s*=\s*(["\'])(?P(?:(?!\1).)+)\1', + r'<title>(?P<title>[^<]+)'), webpage, 'title', + default=None, group='title'))) - title = self._html_search_regex(r'([^<]+)', webpage, 'episode title', self._og_search_title(webpage), False) - thumbnail = self._og_search_thumbnail(webpage) - description = self._og_search_description(webpage) - creator = self._html_search_regex(r'

Latest (.+) Gear

', webpage, 'site', 'Rooster Teeth', False) - series = self._html_search_regex(r'

More ([^<]+)

', webpage, 'series', fatal=False) - episode = self._html_search_regex(r'([^<]+)', webpage, 'episode title', fatal=False) + title = strip_or_none(self._og_search_title( + webpage, default=None)) or episode - if '
' in webpage: - self.raise_login_required('%s is only available for FIRST members' % title) + m3u8_url = self._search_regex( + r'file\s*:\s*(["\'])(?Phttp.+?\.m3u8.*?)\1', + webpage, 'm3u8 url', default=None, group='url') - if '
' in webpage: - self.raise_login_required('%s is not available yet' % title) + if not m3u8_url: + if re.search(r']+class=["\']non-sponsor', webpage): + self.raise_login_required( + '%s is only available for FIRST members' % display_id) - formats = self._extract_m3u8_formats(self._html_search_regex(r"file: '(.+?)m3u8'", webpage, 'm3u8 url') + 'm3u8', episode_id, ext='mp4') + if re.search(r']+class=["\']golive-gate', webpage): + self.raise_login_required('%s is not available yet' % display_id) + + raise ExtractorError('Unable to extract m3u8 URL') + + formats = self._extract_m3u8_formats( + m3u8_url, display_id, ext='mp4', + entry_protocol='m3u8_native', m3u8_id='hls') self._sort_formats(formats) + description = strip_or_none(self._og_search_description(webpage)) + thumbnail = self._proto_relative_url(self._og_search_thumbnail(webpage)) + + series = self._search_regex( + (r'

More ([^<]+)

', r']+>See All ([^<]+) Videos<'), + webpage, 'series', fatal=False) + + comment_count = int_or_none(self._search_regex( + r'>Comments \((\d+)\)<', webpage, + 'comment count', fatal=False)) + + video_id = self._search_regex( + (r'containerId\s*=\s*["\']episode-(\d+)\1', + r' Date: Sun, 10 Jul 2016 01:29:30 +0700 Subject: [PATCH 33/42] Credit @nehalvpatel for roosterteeth (#9864) --- AUTHORS | 1 + 1 file changed, 1 insertion(+) diff --git a/AUTHORS b/AUTHORS index f74b30d07..f762e8a16 100644 --- a/AUTHORS +++ b/AUTHORS @@ -177,3 +177,4 @@ Roman Tsiupa Artur Krysiak Jakub Adam Wieczorek Aleksandar Topuzović +Nehal Patel From c3baaedfc8aac6ee6f809ef97b07ebebb07a1097 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sun, 29 May 2016 03:33:27 +0800 Subject: [PATCH 34/42] [miomio] Support new 'h5' player (closes #9605) Depends on #8876 --- youtube_dl/extractor/miomio.py | 51 ++++++++++++++++++++++++++-------- 1 file changed, 39 insertions(+), 12 deletions(-) diff --git a/youtube_dl/extractor/miomio.py b/youtube_dl/extractor/miomio.py index 170ebd9eb..890aba19d 100644 --- a/youtube_dl/extractor/miomio.py +++ b/youtube_dl/extractor/miomio.py @@ -4,6 +4,7 @@ from __future__ import unicode_literals import random from .common import InfoExtractor +from ..compat import compat_urlparse from ..utils import ( xpath_text, int_or_none, @@ -40,20 +41,22 @@ class MioMioIE(InfoExtractor): 'title': 'The New Macbook 2015 上手试玩与简评' }, 'playlist_mincount': 2, + }, { + # new 'h5' player + 'url': 'http://www.miomio.tv/watch/cc273295/', + 'md5': '', + 'info_dict': { + 'id': '273295', + 'ext': 'mp4', + 'title': 'アウト×デラックス 20160526', + }, + 'params': { + # intermittent HTTP 500 + 'skip_download': True, + }, }] - def _real_extract(self, url): - video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - - title = self._html_search_meta( - 'description', webpage, 'title', fatal=True) - - mioplayer_path = self._search_regex( - r'src="(/mioplayer/[^"]+)"', webpage, 'ref_path') - - http_headers = {'Referer': 'http://www.miomio.tv%s' % mioplayer_path} - + def _extract_mioplayer(self, webpage, video_id, title, http_headers): xml_config = self._search_regex( r'flashvars="type=(?:sina|video)&(.+?)&', webpage, 'xml config') @@ -92,10 +95,34 @@ class MioMioIE(InfoExtractor): 'http_headers': http_headers, }) + return entries + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + title = self._html_search_meta( + 'description', webpage, 'title', fatal=True) + + mioplayer_path = self._search_regex( + r'src="(/mioplayer(?:_h5)?/[^"]+)"', webpage, 'ref_path') + + if '_h5' in mioplayer_path: + player_url = compat_urlparse.urljoin(url, mioplayer_path) + player_webpage = self._download_webpage( + player_url, video_id, + note='Downloading player webpage', headers={'Referer': url}) + entries = self._parse_html5_media_entries(player_url, player_webpage) + http_headers = {'Referer': player_url} + else: + http_headers = {'Referer': 'http://www.miomio.tv%s' % mioplayer_path} + entries = self._extract_mioplayer(webpage, video_id, title, http_headers) + if len(entries) == 1: segment = entries[0] segment['id'] = video_id segment['title'] = title + segment['http_headers'] = http_headers return segment return { From ae0185016521e6fd284c87e2b138c0a8aca8a849 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Mon, 11 Jul 2016 00:03:24 +0800 Subject: [PATCH 35/42] [miomio] Fix _TESTS --- youtube_dl/extractor/miomio.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/miomio.py b/youtube_dl/extractor/miomio.py index 890aba19d..937ba0f28 100644 --- a/youtube_dl/extractor/miomio.py +++ b/youtube_dl/extractor/miomio.py @@ -19,13 +19,16 @@ class MioMioIE(InfoExtractor): _TESTS = [{ # "type=video" in flashvars 'url': 'http://www.miomio.tv/watch/cc88912/', - 'md5': '317a5f7f6b544ce8419b784ca8edae65', 'info_dict': { 'id': '88912', 'ext': 'flv', 'title': '【SKY】字幕 铠武昭和VS平成 假面骑士大战FEAT战队 魔星字幕组 字幕', 'duration': 5923, }, + 'params': { + # The server provides broken file + 'skip_download': True, + } }, { 'url': 'http://www.miomio.tv/watch/cc184024/', 'info_dict': { @@ -33,7 +36,7 @@ class MioMioIE(InfoExtractor): 'title': '《动漫同人插画绘制》', }, 'playlist_mincount': 86, - 'skip': 'This video takes time too long for retrieving the URL', + 'skip': 'Unable to load videos', }, { 'url': 'http://www.miomio.tv/watch/cc173113/', 'info_dict': { @@ -41,6 +44,7 @@ class MioMioIE(InfoExtractor): 'title': 'The New Macbook 2015 上手试玩与简评' }, 'playlist_mincount': 2, + 'skip': 'Unable to load videos', }, { # new 'h5' player 'url': 'http://www.miomio.tv/watch/cc273295/', From e2dbcaa1bf65aa502718005d5fbd00189618469f Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Mon, 11 Jul 2016 00:52:25 +0800 Subject: [PATCH 36/42] [vuclip] Fix extraction --- youtube_dl/extractor/vuclip.py | 43 ++++++++++++---------------------- 1 file changed, 15 insertions(+), 28 deletions(-) diff --git a/youtube_dl/extractor/vuclip.py b/youtube_dl/extractor/vuclip.py index eaa888f00..b73da5cd0 100644 --- a/youtube_dl/extractor/vuclip.py +++ b/youtube_dl/extractor/vuclip.py @@ -9,7 +9,7 @@ from ..compat import ( from ..utils import ( ExtractorError, parse_duration, - qualities, + remove_end, ) @@ -22,7 +22,7 @@ class VuClipIE(InfoExtractor): 'id': '922692425', 'ext': '3gp', 'title': 'The Toy Soldiers - Hollywood Movie Trailer', - 'duration': 180, + 'duration': 177, } } @@ -46,34 +46,21 @@ class VuClipIE(InfoExtractor): '%s said: %s' % (self.IE_NAME, error_msg), expected=True) # These clowns alternate between two page types - links_code = self._search_regex( - r'''(?xs) - (?: - | - \s*
- ) - (.*?) - (?: - - ) - ''', webpage, 'links') - title = self._html_search_regex( - r'(.*?)-\s*Vuclip', webpage, 'title').strip() + video_url = self._search_regex( + r']+href="([^"]+)"[^>]*>]+src="[^"]*/play\.gif', + webpage, 'video URL', default=None) + if video_url: + formats = [{ + 'url': video_url, + }] + else: + formats = self._parse_html5_media_entries(url, webpage)[0]['formats'] - quality_order = qualities(['Reg', 'Hi']) - formats = [] - for url, q in re.findall( - r'[^"]+)".*?>(?:]*>)?(?P[^<]+)(?:)?', links_code): - format_id = compat_urllib_parse_urlparse(url).scheme + '-' + q - formats.append({ - 'format_id': format_id, - 'url': url, - 'quality': quality_order(q), - }) - self._sort_formats(formats) + title = remove_end(self._html_search_regex( + r'(.*?)-\s*Vuclip', webpage, 'title').strip(), ' - Video') - duration = parse_duration(self._search_regex( - r'\(([0-9:]+)\)', webpage, 'duration', fatal=False)) + duration = parse_duration(self._html_search_regex( + r'[(>]([0-9]+:[0-9]+)(?: Date: Mon, 11 Jul 2016 03:08:38 +0700 Subject: [PATCH 37/42] [youtube] Relax TFA regexes --- youtube_dl/extractor/youtube.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 8aa7dfc41..1687d5ef9 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -137,7 +137,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor): # Two-Factor # TODO add SMS and phone call support - these require making a request and then prompting the user - if re.search(r'(?i)]* id="challenge"', login_results) is not None: + if re.search(r'(?i)]+id="challenge"', login_results) is not None: tfa_code = self._get_tfa_info('2-step verification code') if not tfa_code: @@ -165,17 +165,17 @@ class YoutubeBaseInfoExtractor(InfoExtractor): if tfa_results is False: return False - if re.search(r'(?i)]* id="challenge"', tfa_results) is not None: + if re.search(r'(?i)]+id="challenge"', tfa_results) is not None: self._downloader.report_warning('Two-factor code expired or invalid. Please try again, or use a one-use backup code instead.') return False - if re.search(r'(?i)]* id="gaia_loginform"', tfa_results) is not None: + if re.search(r'(?i)]+id="gaia_loginform"', tfa_results) is not None: self._downloader.report_warning('unable to log in - did the page structure change?') return False if re.search(r'smsauth-interstitial-reviewsettings', tfa_results) is not None: self._downloader.report_warning('Your Google account has a security notice. Please log in on your web browser, resolve the notice, and try again.') return False - if re.search(r'(?i)]* id="gaia_loginform"', login_results) is not None: + if re.search(r'(?i)]+id="gaia_loginform"', login_results) is not None: self._downloader.report_warning('unable to log in: bad username or password') return False return True From 5c4dcf817268105aa0bcb6dc4864bc9b98826abb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 11 Jul 2016 03:14:39 +0700 Subject: [PATCH 38/42] [vidzi] Add support for embed URLs (Closes #10058) --- youtube_dl/extractor/vidzi.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/vidzi.py b/youtube_dl/extractor/vidzi.py index 3c78fb3d5..d49cc6cbc 100644 --- a/youtube_dl/extractor/vidzi.py +++ b/youtube_dl/extractor/vidzi.py @@ -9,8 +9,8 @@ from ..utils import ( class VidziIE(JWPlatformBaseIE): - _VALID_URL = r'https?://(?:www\.)?vidzi\.tv/(?P\w+)' - _TEST = { + _VALID_URL = r'https?://(?:www\.)?vidzi\.tv/(?:embed-)?(?P[0-9a-zA-Z]+)' + _TESTS = [{ 'url': 'http://vidzi.tv/cghql9yq6emu.html', 'md5': '4f16c71ca0c8c8635ab6932b5f3f1660', 'info_dict': { @@ -22,12 +22,16 @@ class VidziIE(JWPlatformBaseIE): # m3u8 download 'skip_download': True, }, - } + }, { + 'url': 'http://vidzi.tv/embed-4z2yb0rzphe9-600x338.html', + 'skip_download': True, + }] def _real_extract(self, url): video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) + webpage = self._download_webpage( + 'http://vidzi.tv/%s' % video_id, video_id) title = self._html_search_regex( r'(?s)

(.*?)

', webpage, 'title') From a26bcc61c177470606a1b5e8fd74469e894745b2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 11 Jul 2016 03:17:12 +0700 Subject: [PATCH 39/42] release 2016.07.11 --- .github/ISSUE_TEMPLATE.md | 6 +++--- docs/supportedsites.md | 1 + youtube_dl/version.py | 2 +- 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 1a3a50eb2..704a8b911 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.07.09.2*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.07.09.2** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.07.11*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.07.11** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2016.07.09.2 +[debug] youtube-dl version 2016.07.11 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/docs/supportedsites.md b/docs/supportedsites.md index d2a2ef0c4..5bcd6de1c 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -554,6 +554,7 @@ - **RICE** - **RingTV** - **RockstarGames** + - **RoosterTeeth** - **RottenTomatoes** - **Roxwel** - **RTBF** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 728ad2d50..d60480223 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2016.07.09.2' +__version__ = '2016.07.11' From 8e7020daef5477a05e6f02c3b59b0bd04b315eb6 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Mon, 11 Jul 2016 13:19:25 +0800 Subject: [PATCH 40/42] [rudo] Add new extractor Used in biobiochile.tv --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/rudo.py | 53 ++++++++++++++++++++++++++++++ 2 files changed, 54 insertions(+) create mode 100644 youtube_dl/extractor/rudo.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 864c9af68..b08df41b4 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -691,6 +691,7 @@ from .rtp import RTPIE from .rts import RTSIE from .rtve import RTVEALaCartaIE, RTVELiveIE, RTVEInfantilIE from .rtvnh import RTVNHIE +from .rudo import RudoIE from .ruhd import RUHDIE from .ruleporn import RulePornIE from .rutube import ( diff --git a/youtube_dl/extractor/rudo.py b/youtube_dl/extractor/rudo.py new file mode 100644 index 000000000..38366b784 --- /dev/null +++ b/youtube_dl/extractor/rudo.py @@ -0,0 +1,53 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .jwplatform import JWPlatformBaseIE +from ..utils import ( + js_to_json, + get_element_by_class, + unified_strdate, +) + + +class RudoIE(JWPlatformBaseIE): + _VALID_URL = r'https?://rudo\.video/vod/(?P[0-9a-zA-Z]+)' + + _TEST = { + 'url': 'http://rudo.video/vod/oTzw0MGnyG', + 'md5': '2a03a5b32dd90a04c83b6d391cf7b415', + 'info_dict': { + 'id': 'oTzw0MGnyG', + 'ext': 'mp4', + 'title': 'Comentario Tomás Mosciatti', + 'upload_date': '20160617', + }, + } + + @classmethod + def _extract_url(self, webpage): + mobj = re.search( + ']+src=(?P[\'"])(?P(?:https?:)?//rudo\.video/vod/[0-9a-zA-Z]+)(?P=q1)', + webpage) + if mobj: + return mobj.group('url') + + def _real_extract(self, url): + video_id = self._match_id(url) + + webpage = self._download_webpage(url, video_id, encoding='iso-8859-1') + + jwplayer_data = self._parse_json(self._search_regex( + r'(?s)playerInstance\.setup\(({.+?})\)', webpage, 'jwplayer data'), video_id, + transform_source=lambda s: js_to_json(re.sub(r'encodeURI\([^)]+\)', '""', s))) + + info_dict = self._parse_jwplayer_data( + jwplayer_data, video_id, require_title=False, m3u8_id='hls') + + info_dict.update({ + 'title': self._og_search_title(webpage), + 'upload_date': unified_strdate(get_element_by_class('date', webpage)), + }) + + return info_dict From b99af8a51cf359d7448740de7159383ff63cfe6c Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Mon, 11 Jul 2016 13:23:57 +0800 Subject: [PATCH 41/42] [biobiochiletv] Fix extraction and update _TESTS --- youtube_dl/extractor/biobiochiletv.py | 53 ++++++++++++--------------- 1 file changed, 24 insertions(+), 29 deletions(-) diff --git a/youtube_dl/extractor/biobiochiletv.py b/youtube_dl/extractor/biobiochiletv.py index 133228133..7608c0a08 100644 --- a/youtube_dl/extractor/biobiochiletv.py +++ b/youtube_dl/extractor/biobiochiletv.py @@ -2,11 +2,15 @@ from __future__ import unicode_literals from .common import InfoExtractor -from ..utils import remove_end +from ..utils import ( + ExtractorError, + remove_end, +) +from .rudo import RudoIE class BioBioChileTVIE(InfoExtractor): - _VALID_URL = r'https?://tv\.biobiochile\.cl/notas/(?:[^/]+/)+(?P[^/]+)\.shtml' + _VALID_URL = r'https?://(?:tv|www)\.biobiochile\.cl/(?:notas|noticias)/(?:[^/]+/)+(?P[^/]+)\.shtml' _TESTS = [{ 'url': 'http://tv.biobiochile.cl/notas/2015/10/21/sobre-camaras-y-camarillas-parlamentarias.shtml', @@ -18,6 +22,7 @@ class BioBioChileTVIE(InfoExtractor): 'thumbnail': 're:^https?://.*\.jpg$', 'uploader': 'Fernando Atria', }, + 'skip': 'URL expired and redirected to http://www.biobiochile.cl/portada/bbtv/index.html', }, { # different uploader layout 'url': 'http://tv.biobiochile.cl/notas/2016/03/18/natalia-valdebenito-repasa-a-diputado-hasbun-paso-a-la-categoria-de-hablar-brutalidades.shtml', @@ -32,6 +37,16 @@ class BioBioChileTVIE(InfoExtractor): 'params': { 'skip_download': True, }, + 'skip': 'URL expired and redirected to http://www.biobiochile.cl/portada/bbtv/index.html', + }, { + 'url': 'http://www.biobiochile.cl/noticias/bbtv/comentarios-bio-bio/2016/07/08/edecanes-del-congreso-figuras-decorativas-que-le-cuestan-muy-caro-a-los-chilenos.shtml', + 'info_dict': { + 'id': 'edecanes-del-congreso-figuras-decorativas-que-le-cuestan-muy-caro-a-los-chilenos', + 'ext': 'mp4', + 'uploader': '(none)', + 'upload_date': '20160708', + 'title': 'Edecanes del Congreso: Figuras decorativas que le cuestan muy caro a los chilenos', + }, }, { 'url': 'http://tv.biobiochile.cl/notas/2015/10/22/ninos-transexuales-de-quien-es-la-decision.shtml', 'only_matching': True, @@ -45,42 +60,22 @@ class BioBioChileTVIE(InfoExtractor): webpage = self._download_webpage(url, video_id) + rudo_url = RudoIE._extract_url(webpage) + if not rudo_url: + raise ExtractorError('No videos found') + title = remove_end(self._og_search_title(webpage), ' - BioBioChile TV') - file_url = self._search_regex( - r'loadFWPlayerVideo\([^,]+,\s*(["\'])(?P.+?)\1', - webpage, 'file url', group='url') - - base_url = self._search_regex( - r'file\s*:\s*(["\'])(?P.+?)\1\s*\+\s*fileURL', webpage, - 'base url', default='http://unlimited2-cl.digitalproserver.com/bbtv/', - group='url') - - formats = self._extract_m3u8_formats( - '%s%s/playlist.m3u8' % (base_url, file_url), video_id, 'mp4', - entry_protocol='m3u8_native', m3u8_id='hls', fatal=False) - f = { - 'url': '%s%s' % (base_url, file_url), - 'format_id': 'http', - 'protocol': 'http', - 'preference': 1, - } - if formats: - f_copy = formats[-1].copy() - f_copy.update(f) - f = f_copy - formats.append(f) - self._sort_formats(formats) - thumbnail = self._og_search_thumbnail(webpage) uploader = self._html_search_regex( - r']+href=["\']https?://busca\.biobiochile\.cl/author[^>]+>(.+?)', + r']+href=["\']https?://(?:busca|www)\.biobiochile\.cl/(?:lista/)?(?:author|autor)[^>]+>(.+?)', webpage, 'uploader', fatal=False) return { + '_type': 'url_transparent', + 'url': rudo_url, 'id': video_id, 'title': title, 'thumbnail': thumbnail, 'uploader': uploader, - 'formats': formats, } From 2a49d016002bbfb7f9d310aab45a122847bacee9 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Mon, 11 Jul 2016 15:15:28 +0800 Subject: [PATCH 42/42] [playvid] Update _TESTS Blocks https://travis-ci.org/rg3/youtube-dl/jobs/143809100 --- youtube_dl/extractor/playvid.py | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/playvid.py b/youtube_dl/extractor/playvid.py index 2eb4fd96d..78d219299 100644 --- a/youtube_dl/extractor/playvid.py +++ b/youtube_dl/extractor/playvid.py @@ -15,7 +15,7 @@ from ..utils import ( class PlayvidIE(InfoExtractor): _VALID_URL = r'https?://www\.playvid\.com/watch(\?v=|/)(?P.+?)(?:#|$)' - _TEST = { + _TESTS = [{ 'url': 'http://www.playvid.com/watch/RnmBNgtrrJu', 'md5': 'ffa2f6b2119af359f544388d8c01eb6c', 'info_dict': { @@ -24,8 +24,19 @@ class PlayvidIE(InfoExtractor): 'title': 'md5:9256d01c6317e3f703848b5906880dc8', 'duration': 82, 'age_limit': 18, - } - } + }, + 'skip': 'Video removed due to ToS', + }, { + 'url': 'http://www.playvid.com/watch/hwb0GpNkzgH', + 'md5': '39d49df503ad7b8f23a4432cbf046477', + 'info_dict': { + 'id': 'hwb0GpNkzgH', + 'ext': 'mp4', + 'title': 'Ellen Euro Cutie Blond Takes a Sexy Survey Get Facial in The Park', + 'age_limit': 18, + 'thumbnail': 're:^https?://.*\.jpg$', + }, + }] def _real_extract(self, url): video_id = self._match_id(url)