diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 2d67247e6..f41266f32 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2018.09.08*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2018.09.08** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2018.09.10*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2018.09.10** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -36,7 +36,7 @@ Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2018.09.08 +[debug] youtube-dl version 2018.09.10 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index ac95c9b71..d184f69ee 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,14 @@ +version 2018.09.10 + +Core ++ [utils] Properly recognize AV1 codec (#17506) + +Extractors ++ [iprima] Add support for prima.iprima.cz (#17514) ++ [tele5] Add support for tele5.de (#7805, #7922, #17331, #17414) +* [nbc] Fix extraction of percent encoded URLs (#17374) + + version 2018.09.08 Extractors diff --git a/docs/supportedsites.md b/docs/supportedsites.md index f0d6be901..9b8601751 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -847,6 +847,7 @@ - **techtv.mit.edu** - **ted** - **Tele13** + - **Tele5** - **TeleBruxelles** - **Telecinco**: telecinco.es, cuatro.com and mediaset.es - **Telegraaf** diff --git a/test/test_utils.py b/test/test_utils.py index 8da5ccc56..9e28e008f 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -785,6 +785,10 @@ class TestUtil(unittest.TestCase): 'vcodec': 'h264', 'acodec': 'aac', }) + self.assertEqual(parse_codecs('av01.0.05M.08'), { + 'vcodec': 'av01.0.05M.08', + 'acodec': 'none', + }) def test_escape_rfc3986(self): reserved = "!*'();:@&=+$,/?#[]" diff --git a/youtube_dl/extractor/asiancrush.py b/youtube_dl/extractor/asiancrush.py index 594c88c9c..6d71c5ad5 100644 --- a/youtube_dl/extractor/asiancrush.py +++ b/youtube_dl/extractor/asiancrush.py @@ -8,7 +8,6 @@ from .kaltura import KalturaIE from ..utils import ( extract_attributes, remove_end, - urlencode_postdata, ) @@ -34,19 +33,40 @@ class AsianCrushIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) - data = self._download_json( - 'https://www.asiancrush.com/wp-admin/admin-ajax.php', video_id, - data=urlencode_postdata({ - 'postid': video_id, - 'action': 'get_channel_kaltura_vars', - })) + webpage = self._download_webpage(url, video_id) - entry_id = data['entry_id'] + entry_id, partner_id, title = [None] * 3 + + vars = self._parse_json( + self._search_regex( + r'iEmbedVars\s*=\s*({.+?})', webpage, 'embed vars', + default='{}'), video_id, fatal=False) + if vars: + entry_id = vars.get('entry_id') + partner_id = vars.get('partner_id') + title = vars.get('vid_label') + + if not entry_id: + entry_id = self._search_regex( + r'\bentry_id["\']\s*:\s*["\'](\d+)', webpage, 'entry id') + + player = self._download_webpage( + 'https://api.asiancrush.com/embeddedVideoPlayer', video_id, + query={'id': entry_id}) + + kaltura_id = self._search_regex( + r'entry_id["\']\s*:\s*(["\'])(?P(?:(?!\1).)+)\1', player, + 'kaltura id', group='id') + + if not partner_id: + partner_id = self._search_regex( + r'/p(?:artner_id)?/(\d+)', player, 'partner id', + default='513551') return self.url_result( - 'kaltura:%s:%s' % (data['partner_id'], entry_id), - ie=KalturaIE.ie_key(), video_id=entry_id, - video_title=data.get('vid_label')) + 'kaltura:%s:%s' % (partner_id, kaltura_id), + ie=KalturaIE.ie_key(), video_id=kaltura_id, + video_title=title) class AsianCrushPlaylistIE(InfoExtractor): diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index b8bbaf81a..2dbf81e6e 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -211,6 +211,11 @@ class InfoExtractor(object): If not explicitly set, calculated from timestamp. uploader_id: Nickname or id of the video uploader. uploader_url: Full URL to a personal webpage of the video uploader. + channel: Full name of the channel the video is uploaded on. + Note that channel fields may or may not repeat uploader + fields. This depends on a particular extractor. + channel_id: Id of the channel. + channel_url: Full URL to a channel webpage. location: Physical location where the video was filmed. subtitles: The available subtitles as a dictionary in the format {tag: subformats}. "tag" is usually a language code, and @@ -1701,9 +1706,9 @@ class InfoExtractor(object): # However, this is not always respected, for example, [2] # contains EXT-X-STREAM-INF tag which references AUDIO # rendition group but does not have CODECS and despite - # referencing audio group an audio group, it represents - # a complete (with audio and video) format. So, for such cases - # we will ignore references to rendition groups and treat them + # referencing an audio group it represents a complete + # (with audio and video) format. So, for such cases we will + # ignore references to rendition groups and treat them # as complete formats. if audio_group_id and codecs and f.get('vcodec') != 'none': audio_group = groups.get(audio_group_id) diff --git a/youtube_dl/extractor/dtube.py b/youtube_dl/extractor/dtube.py index 4ca97f860..5887887e1 100644 --- a/youtube_dl/extractor/dtube.py +++ b/youtube_dl/extractor/dtube.py @@ -59,7 +59,7 @@ class DTubeIE(InfoExtractor): try: self.to_screen('%s: Checking %s video format URL' % (video_id, format_id)) self._downloader._opener.open(video_url, timeout=5).close() - except timeout as e: + except timeout: self.to_screen( '%s: %s URL is invalid, skipping' % (video_id, format_id)) continue diff --git a/youtube_dl/extractor/eporner.py b/youtube_dl/extractor/eporner.py index 6d03d7095..c050bf9df 100644 --- a/youtube_dl/extractor/eporner.py +++ b/youtube_dl/extractor/eporner.py @@ -9,6 +9,7 @@ from ..utils import ( encode_base_n, ExtractorError, int_or_none, + merge_dicts, parse_duration, str_to_int, url_or_none, @@ -25,10 +26,16 @@ class EpornerIE(InfoExtractor): 'display_id': 'Infamous-Tiffany-Teen-Strip-Tease-Video', 'ext': 'mp4', 'title': 'Infamous Tiffany Teen Strip Tease Video', + 'description': 'md5:764f39abf932daafa37485eb46efa152', + 'timestamp': 1232520922, + 'upload_date': '20090121', 'duration': 1838, 'view_count': int, 'age_limit': 18, }, + 'params': { + 'proxy': '127.0.0.1:8118' + } }, { # New (May 2016) URL layout 'url': 'http://www.eporner.com/hd-porn/3YRUtzMcWn0/Star-Wars-XXX-Parody/', @@ -104,12 +111,15 @@ class EpornerIE(InfoExtractor): }) self._sort_formats(formats) - duration = parse_duration(self._html_search_meta('duration', webpage)) + json_ld = self._search_json_ld(webpage, display_id, default={}) + + duration = parse_duration(self._html_search_meta( + 'duration', webpage, default=None)) view_count = str_to_int(self._search_regex( r'id="cinemaviews">\s*([0-9,]+)\s*views', webpage, 'view count', fatal=False)) - return { + return merge_dicts(json_ld, { 'id': video_id, 'display_id': display_id, 'title': title, @@ -117,4 +127,4 @@ class EpornerIE(InfoExtractor): 'view_count': view_count, 'formats': formats, 'age_limit': 18, - } + }) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 995af9988..7dc569724 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1086,6 +1086,7 @@ from .teachingchannel import TeachingChannelIE from .teamcoco import TeamcocoIE from .techtalks import TechTalksIE from .ted import TEDIE +from .tele5 import Tele5IE from .tele13 import Tele13IE from .telebruxelles import TeleBruxellesIE from .telecinco import TelecincoIE diff --git a/youtube_dl/extractor/fourtube.py b/youtube_dl/extractor/fourtube.py index ad273a0e7..a9a1f911e 100644 --- a/youtube_dl/extractor/fourtube.py +++ b/youtube_dl/extractor/fourtube.py @@ -3,15 +3,45 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..compat import compat_urlparse +from ..compat import ( + compat_b64decode, + compat_str, + compat_urllib_parse_unquote, + compat_urlparse, +) from ..utils import ( + int_or_none, parse_duration, parse_iso8601, + str_or_none, str_to_int, + try_get, + unified_timestamp, + url_or_none, ) class FourTubeBaseIE(InfoExtractor): + _TKN_HOST = 'tkn.kodicdn.com' + + def _extract_formats(self, url, video_id, media_id, sources): + token_url = 'https://%s/%s/desktop/%s' % ( + self._TKN_HOST, media_id, '+'.join(sources)) + + parsed_url = compat_urlparse.urlparse(url) + tokens = self._download_json(token_url, video_id, data=b'', headers={ + 'Origin': '%s://%s' % (parsed_url.scheme, parsed_url.hostname), + 'Referer': url, + }) + formats = [{ + 'url': tokens[format]['token'], + 'format_id': format + 'p', + 'resolution': format + 'p', + 'quality': int(format), + } for format in sources] + self._sort_formats(formats) + return formats + def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) kind, video_id, display_id = mobj.group('kind', 'id', 'display_id') @@ -68,21 +98,7 @@ class FourTubeBaseIE(InfoExtractor): media_id = params[0] sources = ['%s' % p for p in params[2]] - token_url = 'https://tkn.kodicdn.com/{0}/desktop/{1}'.format( - media_id, '+'.join(sources)) - - parsed_url = compat_urlparse.urlparse(url) - tokens = self._download_json(token_url, video_id, data=b'', headers={ - 'Origin': '%s://%s' % (parsed_url.scheme, parsed_url.hostname), - 'Referer': url, - }) - formats = [{ - 'url': tokens[format]['token'], - 'format_id': format + 'p', - 'resolution': format + 'p', - 'quality': int(format), - } for format in sources] - self._sort_formats(formats) + formats = self._extract_formats(url, video_id, media_id, sources) return { 'id': video_id, @@ -164,6 +180,7 @@ class FuxIE(FourTubeBaseIE): class PornTubeIE(FourTubeBaseIE): _VALID_URL = r'https?://(?:(?Pwww|m)\.)?porntube\.com/(?:videos/(?P[^/]+)_|embed/)(?P\d+)' _URL_TEMPLATE = 'https://www.porntube.com/videos/video_%s' + _TKN_HOST = 'tkn.porntube.com' _TESTS = [{ 'url': 'https://www.porntube.com/videos/teen-couple-doing-anal_7089759', 'info_dict': { @@ -171,13 +188,32 @@ class PornTubeIE(FourTubeBaseIE): 'ext': 'mp4', 'title': 'Teen couple doing anal', 'uploader': 'Alexy', - 'uploader_id': 'Alexy', + 'uploader_id': '91488', 'upload_date': '20150606', 'timestamp': 1433595647, 'duration': 5052, 'view_count': int, 'like_count': int, - 'categories': list, + 'age_limit': 18, + }, + 'params': { + 'skip_download': True, + }, + }, { + 'url': 'https://www.porntube.com/videos/squirting-teen-ballerina-ecg_1331406', + 'info_dict': { + 'id': '1331406', + 'ext': 'mp4', + 'title': 'Squirting Teen Ballerina on ECG', + 'uploader': 'Exploited College Girls', + 'uploader_id': '665', + 'channel': 'Exploited College Girls', + 'channel_id': '665', + 'upload_date': '20130920', + 'timestamp': 1379685485, + 'duration': 851, + 'view_count': int, + 'like_count': int, 'age_limit': 18, }, 'params': { @@ -191,6 +227,55 @@ class PornTubeIE(FourTubeBaseIE): 'only_matching': True, }] + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id, display_id = mobj.group('id', 'display_id') + + webpage = self._download_webpage(url, display_id) + + video = self._parse_json( + self._search_regex( + r'INITIALSTATE\s*=\s*(["\'])(?P(?:(?!\1).)+)\1', + webpage, 'data', group='value'), video_id, + transform_source=lambda x: compat_urllib_parse_unquote( + compat_b64decode(x).decode('utf-8')))['page']['video'] + + title = video['title'] + media_id = video['mediaId'] + sources = [compat_str(e['height']) + for e in video['encodings'] if e.get('height')] + formats = self._extract_formats(url, video_id, media_id, sources) + + thumbnail = url_or_none(video.get('masterThumb')) + uploader = try_get(video, lambda x: x['user']['username'], compat_str) + uploader_id = str_or_none(try_get( + video, lambda x: x['user']['id'], int)) + channel = try_get(video, lambda x: x['channel']['name'], compat_str) + channel_id = str_or_none(try_get( + video, lambda x: x['channel']['id'], int)) + like_count = int_or_none(video.get('likes')) + dislike_count = int_or_none(video.get('dislikes')) + view_count = int_or_none(video.get('playsQty')) + duration = int_or_none(video.get('durationInSeconds')) + timestamp = unified_timestamp(video.get('publishedAt')) + + return { + 'id': video_id, + 'title': title, + 'formats': formats, + 'thumbnail': thumbnail, + 'uploader': uploader or channel, + 'uploader_id': uploader_id or channel_id, + 'channel': channel, + 'channel_id': channel_id, + 'timestamp': timestamp, + 'like_count': like_count, + 'dislike_count': dislike_count, + 'view_count': view_count, + 'duration': duration, + 'age_limit': 18, + } + class PornerBrosIE(FourTubeBaseIE): _VALID_URL = r'https?://(?:(?Pwww|m)\.)?pornerbros\.com/(?:videos/(?P[^/]+)_|embed/)(?P\d+)' diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 1db154c4f..76ef01332 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -3112,7 +3112,7 @@ class GenericIE(InfoExtractor): return self.playlist_from_matches( foxnews_urls, video_id, video_title, ie=FoxNewsIE.ie_key()) - sharevideos_urls = [mobj.group('url') for mobj in re.finditer( + sharevideos_urls = [sharevideos_mobj.group('url') for sharevideos_mobj in re.finditer( r']+?\bsrc\s*=\s*(["\'])(?P(?:https?:)?//embed\.share-videos\.se/auto/embed/\d+\?.*?\buid=\d+.*?)\1', webpage)] if sharevideos_urls: diff --git a/youtube_dl/extractor/iprima.py b/youtube_dl/extractor/iprima.py index 3c4b7e48b..1d58d6e85 100644 --- a/youtube_dl/extractor/iprima.py +++ b/youtube_dl/extractor/iprima.py @@ -12,7 +12,7 @@ from ..utils import ( class IPrimaIE(InfoExtractor): - _VALID_URL = r'https?://play\.iprima\.cz/(?:.+/)?(?P[^?#]+)' + _VALID_URL = r'https?://(?:play|prima)\.iprima\.cz/(?:.+/)?(?P[^?#]+)' _GEO_BYPASS = False _TESTS = [{ @@ -33,6 +33,14 @@ class IPrimaIE(InfoExtractor): # geo restricted 'url': 'http://play.iprima.cz/closer-nove-pripady/closer-nove-pripady-iv-1', 'only_matching': True, + }, { + # iframe api.play-backend.iprima.cz + 'url': 'https://prima.iprima.cz/my-little-pony/mapa-znameni-2-2', + 'only_matching': True, + }, { + # iframe prima.iprima.cz + 'url': 'https://prima.iprima.cz/porady/jak-se-stavi-sen/rodina-rathousova-praha', + 'only_matching': True, }] def _real_extract(self, url): @@ -42,7 +50,10 @@ class IPrimaIE(InfoExtractor): webpage = self._download_webpage(url, video_id) - video_id = self._search_regex(r'data-product="([^"]+)">', webpage, 'real id') + video_id = self._search_regex( + (r']+\bsrc=["\'](?:https?:)?//(?:api\.play-backend\.iprima\.cz/prehravac/embedded|prima\.iprima\.cz/[^/]+/[^/]+)\?.*?\bid=(p\d+)', + r'data-product="([^"]+)">'), + webpage, 'real id') playerpage = self._download_webpage( 'http://play.iprima.cz/prehravac/init', diff --git a/youtube_dl/extractor/motherless.py b/youtube_dl/extractor/motherless.py index bed5645f2..d4bd273b6 100644 --- a/youtube_dl/extractor/motherless.py +++ b/youtube_dl/extractor/motherless.py @@ -167,9 +167,9 @@ class MotherlessGroupIE(InfoExtractor): if not entries: entries = [ self.url_result( - compat_urlparse.urljoin(base, '/' + video_id), - ie=MotherlessIE.ie_key(), video_id=video_id) - for video_id in orderedSet(re.findall( + compat_urlparse.urljoin(base, '/' + entry_id), + ie=MotherlessIE.ie_key(), video_id=entry_id) + for entry_id in orderedSet(re.findall( r'data-codename=["\']([A-Z0-9]+)', webpage))] return entries diff --git a/youtube_dl/extractor/nbc.py b/youtube_dl/extractor/nbc.py index c843f8649..765c46fd2 100644 --- a/youtube_dl/extractor/nbc.py +++ b/youtube_dl/extractor/nbc.py @@ -7,6 +7,7 @@ import re from .common import InfoExtractor from .theplatform import ThePlatformIE from .adobepass import AdobePassIE +from ..compat import compat_urllib_parse_unquote from ..utils import ( find_xpath_attr, smuggle_url, @@ -75,11 +76,16 @@ class NBCIE(AdobePassIE): 'url': 'https://www.nbc.com/classic-tv/charles-in-charge/video/charles-in-charge-pilot/n3310', 'only_matching': True, }, + { + # Percent escaped url + 'url': 'https://www.nbc.com/up-all-night/video/day-after-valentine%27s-day/n2189', + 'only_matching': True, + } ] def _real_extract(self, url): permalink, video_id = re.match(self._VALID_URL, url).groups() - permalink = 'http' + permalink + permalink = 'http' + compat_urllib_parse_unquote(permalink) response = self._download_json( 'https://api.nbc.com/v3/videos', video_id, query={ 'filter[permalink]': permalink, diff --git a/youtube_dl/extractor/pornhub.py b/youtube_dl/extractor/pornhub.py index 6782848d9..19eaf389f 100644 --- a/youtube_dl/extractor/pornhub.py +++ b/youtube_dl/extractor/pornhub.py @@ -40,6 +40,7 @@ class PornHubIE(InfoExtractor): 'ext': 'mp4', 'title': 'Seductive Indian beauty strips down and fingers her pink pussy', 'uploader': 'Babes', + 'upload_date': '20130628', 'duration': 361, 'view_count': int, 'like_count': int, @@ -57,6 +58,7 @@ class PornHubIE(InfoExtractor): 'ext': 'mp4', 'title': '重庆婷婷女王足交', 'uploader': 'Unknown', + 'upload_date': '20150213', 'duration': 1753, 'view_count': int, 'like_count': int, @@ -237,8 +239,14 @@ class PornHubIE(InfoExtractor): video_urls.append((video_url, None)) video_urls_set.add(video_url) + upload_date = None formats = [] for video_url, height in video_urls: + if not upload_date: + upload_date = self._search_regex( + r'/(\d{6}/\d{2})/', video_url, 'upload data', default=None) + if upload_date: + upload_date = upload_date.replace('/', '') tbr = None mobj = re.search(r'(?P\d+)[pP]?_(?P\d+)[kK]', video_url) if mobj: @@ -278,6 +286,7 @@ class PornHubIE(InfoExtractor): return { 'id': video_id, 'uploader': video_uploader, + 'upload_date': upload_date, 'title': title, 'thumbnail': thumbnail, 'duration': duration, diff --git a/youtube_dl/extractor/seznamzpravy.py b/youtube_dl/extractor/seznamzpravy.py index 6d4e3b76d..7a1c7e38b 100644 --- a/youtube_dl/extractor/seznamzpravy.py +++ b/youtube_dl/extractor/seznamzpravy.py @@ -164,6 +164,6 @@ class SeznamZpravyArticleIE(InfoExtractor): description = info.get('description') or self._og_search_description(webpage) return self.playlist_result([ - self.url_result(url, ie=SeznamZpravyIE.ie_key()) - for url in SeznamZpravyIE._extract_urls(webpage)], + self.url_result(entry_url, ie=SeznamZpravyIE.ie_key()) + for entry_url in SeznamZpravyIE._extract_urls(webpage)], article_id, title, description) diff --git a/youtube_dl/extractor/tele5.py b/youtube_dl/extractor/tele5.py new file mode 100644 index 000000000..25573e49f --- /dev/null +++ b/youtube_dl/extractor/tele5.py @@ -0,0 +1,44 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from .nexx import NexxIE +from ..compat import compat_urlparse + + +class Tele5IE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?tele5\.de/(?:mediathek|tv)/(?P[^?#&]+)' + _TESTS = [{ + 'url': 'https://www.tele5.de/mediathek/filme-online/videos?vid=1549416', + 'info_dict': { + 'id': '1549416', + 'ext': 'mp4', + 'upload_date': '20180814', + 'timestamp': 1534290623, + 'title': 'Pandorum', + }, + 'params': { + 'skip_download': True, + }, + }, { + 'url': 'https://www.tele5.de/tv/kalkofes-mattscheibe/video-clips/politik-und-gesellschaft?ve_id=1551191', + 'only_matching': True, + }, { + 'url': 'https://www.tele5.de/tv/dark-matter/videos', + 'only_matching': True, + }] + + def _real_extract(self, url): + qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query) + video_id = (qs.get('vid') or qs.get('ve_id') or [None])[0] + + if not video_id: + display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) + video_id = self._html_search_regex( + r'id\s*=\s*["\']video-player["\'][^>]+data-id\s*=\s*["\'](\d+)', + webpage, 'video id') + + return self.url_result( + 'https://api.nexx.cloud/v3/759/videos/byid/%s' % video_id, + ie=NexxIE.ie_key(), video_id=video_id) diff --git a/youtube_dl/extractor/tube8.py b/youtube_dl/extractor/tube8.py index 368c45729..db93b0182 100644 --- a/youtube_dl/extractor/tube8.py +++ b/youtube_dl/extractor/tube8.py @@ -45,7 +45,7 @@ class Tube8IE(KeezMoviesIE): r'videoTitle\s*=\s*"([^"]+)', webpage, 'title') description = self._html_search_regex( - r'>Description:\s*(.+?)\s*<', webpage, 'description', fatal=False) + r'(?s)Description:\s*
(.+?)
', webpage, 'description', fatal=False) uploader = self._html_search_regex( r'\s*(.+?)\s*<', webpage, 'uploader', fatal=False) @@ -55,19 +55,19 @@ class Tube8IE(KeezMoviesIE): dislike_count = int_or_none(self._search_regex( r'rdownVar\s*=\s*"(\d+)"', webpage, 'dislike count', fatal=False)) view_count = str_to_int(self._search_regex( - r'Views: ([\d,\.]+)\s*', + r'Views:\s*\s*
([\d,\.]+)', webpage, 'view count', fatal=False)) comment_count = str_to_int(self._search_regex( r'(\d+)', webpage, 'comment count', fatal=False)) category = self._search_regex( - r'Category:\s*\s*]+href=[^>]+>([^<]+)', + r'Category:\s*\s*
\s*]+href=[^>]+>([^<]+)', webpage, 'category', fatal=False) categories = [category] if category else None tags_str = self._search_regex( - r'(?s)Tags:\s*(.+?)\s*
(.+?)]+href=[^>]+>([^<]+)', tags_str)] if tags_str else None diff --git a/youtube_dl/extractor/twitch.py b/youtube_dl/extractor/twitch.py index b39972b1e..26661fdf6 100644 --- a/youtube_dl/extractor/twitch.py +++ b/youtube_dl/extractor/twitch.py @@ -559,7 +559,8 @@ class TwitchStreamIE(TwitchBaseIE): TwitchAllVideosIE, TwitchUploadsIE, TwitchPastBroadcastsIE, - TwitchHighlightsIE)) + TwitchHighlightsIE, + TwitchClipsIE)) else super(TwitchStreamIE, cls).suitable(url)) def _real_extract(self, url): @@ -633,7 +634,7 @@ class TwitchStreamIE(TwitchBaseIE): class TwitchClipsIE(TwitchBaseIE): IE_NAME = 'twitch:clips' - _VALID_URL = r'https?://clips\.twitch\.tv/(?:[^/]+/)*(?P[^/?#&]+)' + _VALID_URL = r'https?://(?:clips\.twitch\.tv/(?:[^/]+/)*|(?:www\.)?twitch\.tv/[^/]+/clip/)(?P[^/?#&]+)' _TESTS = [{ 'url': 'https://clips.twitch.tv/FaintLightGullWholeWheat', @@ -653,6 +654,9 @@ class TwitchClipsIE(TwitchBaseIE): # multiple formats 'url': 'https://clips.twitch.tv/rflegendary/UninterestedBeeDAESuppy', 'only_matching': True, + }, { + 'url': 'https://www.twitch.tv/sergeynixon/clip/StormyThankfulSproutFutureMan', + 'only_matching': True, }] def _real_extract(self, url): diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py index e49b233f2..0a9239b62 100644 --- a/youtube_dl/extractor/vimeo.py +++ b/youtube_dl/extractor/vimeo.py @@ -299,10 +299,13 @@ class VimeoIE(VimeoBaseInfoExtractor): 'uploader_url': r're:https?://(?:www\.)?vimeo\.com/atencio', 'uploader_id': 'atencio', 'uploader': 'Peter Atencio', + 'channel_id': 'keypeele', + 'channel_url': r're:https?://(?:www\.)?vimeo\.com/channels/keypeele', 'timestamp': 1380339469, 'upload_date': '20130928', 'duration': 187, }, + 'expected_warnings': ['Unable to download JSON metadata'], }, { 'url': 'http://vimeo.com/76979871', @@ -355,11 +358,13 @@ class VimeoIE(VimeoBaseInfoExtractor): 'url': 'https://vimeo.com/channels/tributes/6213729', 'info_dict': { 'id': '6213729', - 'ext': 'mov', + 'ext': 'mp4', 'title': 'Vimeo Tribute: The Shining', 'uploader': 'Casey Donahue', 'uploader_url': r're:https?://(?:www\.)?vimeo\.com/caseydonahue', 'uploader_id': 'caseydonahue', + 'channel_url': r're:https?://(?:www\.)?vimeo\.com/channels/tributes', + 'channel_id': 'tributes', 'timestamp': 1250886430, 'upload_date': '20090821', 'description': 'md5:bdbf314014e58713e6e5b66eb252f4a6', @@ -465,6 +470,9 @@ class VimeoIE(VimeoBaseInfoExtractor): if 'Referer' not in headers: headers['Referer'] = url + channel_id = self._search_regex( + r'vimeo\.com/channels/([^/]+)', url, 'channel id', default=None) + # Extract ID from URL mobj = re.match(self._VALID_URL, url) video_id = mobj.group('id') @@ -563,19 +571,23 @@ class VimeoIE(VimeoBaseInfoExtractor): if config.get('view') == 4: config = self._verify_player_video_password(redirect_url, video_id) + vod = config.get('video', {}).get('vod', {}) + def is_rented(): if '>You rented this title.<' in webpage: return True if config.get('user', {}).get('purchased'): return True - label = try_get( - config, lambda x: x['video']['vod']['purchase_options'][0]['label_string'], compat_str) - if label and label.startswith('You rented this'): - return True + for purchase_option in vod.get('purchase_options', []): + if purchase_option.get('purchased'): + return True + label = purchase_option.get('label_string') + if label and (label.startswith('You rented this') or label.endswith(' remaining')): + return True return False - if is_rented(): - feature_id = config.get('video', {}).get('vod', {}).get('feature_id') + if is_rented() and vod.get('is_trailer'): + feature_id = vod.get('feature_id') if feature_id and not data.get('force_feature_id', False): return self.url_result(smuggle_url( 'https://player.vimeo.com/player/%s' % feature_id, @@ -652,6 +664,8 @@ class VimeoIE(VimeoBaseInfoExtractor): r']+rel=["\']license["\'][^>]+href=(["\'])(?P(?:(?!\1).)+)\1', webpage, 'license', default=None, group='license') + channel_url = 'https://vimeo.com/channels/%s' % channel_id if channel_id else None + info_dict = { 'id': video_id, 'formats': formats, @@ -662,6 +676,8 @@ class VimeoIE(VimeoBaseInfoExtractor): 'like_count': like_count, 'comment_count': comment_count, 'license': cc_license, + 'channel_id': channel_id, + 'channel_url': channel_url, } info_dict = merge_dicts(info_dict, info_dict_config, json_ld) diff --git a/youtube_dl/extractor/vzaar.py b/youtube_dl/extractor/vzaar.py index 02fcd52c7..6000671c3 100644 --- a/youtube_dl/extractor/vzaar.py +++ b/youtube_dl/extractor/vzaar.py @@ -4,15 +4,19 @@ from __future__ import unicode_literals import re from .common import InfoExtractor +from ..compat import compat_str from ..utils import ( int_or_none, float_or_none, + unified_timestamp, + url_or_none, ) class VzaarIE(InfoExtractor): _VALID_URL = r'https?://(?:(?:www|view)\.)?vzaar\.com/(?:videos/)?(?P\d+)' _TESTS = [{ + # HTTP and HLS 'url': 'https://vzaar.com/videos/1152805', 'md5': 'bde5ddfeb104a6c56a93a06b04901dbf', 'info_dict': { @@ -40,24 +44,48 @@ class VzaarIE(InfoExtractor): video_id = self._match_id(url) video_data = self._download_json( 'http://view.vzaar.com/v2/%s/video' % video_id, video_id) - source_url = video_data['sourceUrl'] - info = { + title = video_data['videoTitle'] + + formats = [] + + source_url = url_or_none(video_data.get('sourceUrl')) + if source_url: + f = { + 'url': source_url, + 'format_id': 'http', + } + if 'audio' in source_url: + f.update({ + 'vcodec': 'none', + 'ext': 'mp3', + }) + else: + f.update({ + 'width': int_or_none(video_data.get('width')), + 'height': int_or_none(video_data.get('height')), + 'ext': 'mp4', + 'fps': float_or_none(video_data.get('fps')), + }) + formats.append(f) + + video_guid = video_data.get('guid') + usp = video_data.get('usp') + if isinstance(video_guid, compat_str) and isinstance(usp, dict): + m3u8_url = ('http://fable.vzaar.com/v4/usp/%s/%s.ism/.m3u8?' + % (video_guid, video_id)) + '&'.join( + '%s=%s' % (k, v) for k, v in usp.items()) + formats.extend(self._extract_m3u8_formats( + m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native', + m3u8_id='hls', fatal=False)) + + self._sort_formats(formats) + + return { 'id': video_id, - 'title': video_data['videoTitle'], - 'url': source_url, + 'title': title, 'thumbnail': self._proto_relative_url(video_data.get('poster')), 'duration': float_or_none(video_data.get('videoDuration')), + 'timestamp': unified_timestamp(video_data.get('ts')), + 'formats': formats, } - if 'audio' in source_url: - info.update({ - 'vcodec': 'none', - 'ext': 'mp3', - }) - else: - info.update({ - 'width': int_or_none(video_data.get('width')), - 'height': int_or_none(video_data.get('height')), - 'ext': 'mp4', - }) - return info diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 27047425d..2fe074cb4 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -490,6 +490,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'uploader': 'Philipp Hagemeister', 'uploader_id': 'phihag', 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag', + 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q', + 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q', 'upload_date': '20121002', 'license': 'Standard YouTube License', 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .', @@ -1907,6 +1909,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor): else: self._downloader.report_warning('unable to extract uploader nickname') + channel_id = self._html_search_meta( + 'channelId', video_webpage, 'channel id') + channel_url = 'http://www.youtube.com/channel/%s' % channel_id if channel_id else None + # thumbnail image # We try first to get a high quality image: m_thumb = re.search(r'', @@ -2078,6 +2084,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'uploader': video_uploader, 'uploader_id': video_uploader_id, 'uploader_url': video_uploader_url, + 'channel_id': channel_id, + 'channel_url': channel_url, 'upload_date': upload_date, 'license': video_license, 'creator': video_creator or artist, diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index bcfb72d43..e84d35d4d 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -2477,7 +2477,7 @@ def parse_codecs(codecs_str): vcodec, acodec = None, None for full_codec in splited_codecs: codec = full_codec.split('.')[0] - if codec in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2', 'h263', 'h264', 'mp4v', 'hvc1'): + if codec in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2', 'h263', 'h264', 'mp4v', 'hvc1', 'av01'): if not vcodec: vcodec = full_codec elif codec in ('mp4a', 'opus', 'vorbis', 'mp3', 'aac', 'ac-3', 'ec-3', 'eac3', 'dtsc', 'dtse', 'dtsh', 'dtsl'): diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 716d9ffe0..b078c4993 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2018.09.08' +__version__ = '2018.09.10'