From 553c68bbd98621580339f0d4b4b7129ce8455553 Mon Sep 17 00:00:00 2001 From: J Date: Fri, 6 Jan 2017 21:56:59 +0100 Subject: [PATCH 01/86] [hitrecord] Add extractor --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/hitrecord.py | 48 ++++++++++++++++++++++++++++++ 2 files changed, 49 insertions(+) create mode 100644 youtube_dl/extractor/hitrecord.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index ed9a133ea..f7f6c025f 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -366,6 +366,7 @@ from .hgtv import ( ) from .historicfilms import HistoricFilmsIE from .hitbox import HitboxIE, HitboxLiveIE +from .hitrecord import HitRecordIE from .hornbunny import HornBunnyIE from .hotnewhiphop import HotNewHipHopIE from .hotstar import HotStarIE diff --git a/youtube_dl/extractor/hitrecord.py b/youtube_dl/extractor/hitrecord.py new file mode 100644 index 000000000..35bbb3eb8 --- /dev/null +++ b/youtube_dl/extractor/hitrecord.py @@ -0,0 +1,48 @@ +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( + clean_html, + int_or_none, + unified_strdate, +) +from ..compat import compat_str + + +class HitRecordIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?hitrecord\.org/records/(?P\d+)' + + _TEST = { + 'url': 'https://hitrecord.org/records/2954362', + 'md5': 'fe1cdc2023bce0bbb95c39c57426aa71', + 'info_dict': { + 'id': '2954362', + 'ext': 'mp4', + 'title': 'A Very Different World (HITRECORD x ACLU)', + 'description': 'md5:e62defaffab5075a5277736bead95a3d', + 'release_date': '20160818', + 'timestamp': 1471557582, + 'uploader': 'Zuzi.C12', + 'uploader_id': '362811', + } + } + + def _real_extract(self, url): + video_id = self._match_id(url) + video_info = self._download_json('https://hitrecord.org/api/web/records/' + video_id, video_id) + user_info = video_info.get('user', {}) + + return { + 'id': video_id, + 'title': video_info['title'], + 'url': video_info['source_url']['mp4_url'], + 'description': clean_html(video_info.get('body')), + 'uploader': user_info.get('username'), + 'uploader_id': compat_str(user_info.get('id')), + 'release_date': unified_strdate(video_info.get('created_at')), + 'timestamp': video_info.get('created_at_i'), + 'view_count': int_or_none(video_info.get('total_views_count')), + 'like_count': int_or_none(video_info.get('hearts_count')), + 'comment_count': int_or_none(video_info.get('comments_count')), + 'tags': [tag.get('text') for tag in video_info.get('tags', [])], + } From 364131584be61801080785e8bced4dc11cb7c9b9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 8 Jan 2017 20:15:39 +0700 Subject: [PATCH 02/86] [hitrecord] Improve (closes #11626) --- youtube_dl/extractor/hitrecord.py | 54 +++++++++++++++++++++---------- 1 file changed, 37 insertions(+), 17 deletions(-) diff --git a/youtube_dl/extractor/hitrecord.py b/youtube_dl/extractor/hitrecord.py index 35bbb3eb8..01a6946d0 100644 --- a/youtube_dl/extractor/hitrecord.py +++ b/youtube_dl/extractor/hitrecord.py @@ -1,17 +1,17 @@ from __future__ import unicode_literals from .common import InfoExtractor +from ..compat import compat_str from ..utils import ( clean_html, + float_or_none, int_or_none, - unified_strdate, + try_get, ) -from ..compat import compat_str class HitRecordIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?hitrecord\.org/records/(?P\d+)' - _TEST = { 'url': 'https://hitrecord.org/records/2954362', 'md5': 'fe1cdc2023bce0bbb95c39c57426aa71', @@ -20,29 +20,49 @@ class HitRecordIE(InfoExtractor): 'ext': 'mp4', 'title': 'A Very Different World (HITRECORD x ACLU)', 'description': 'md5:e62defaffab5075a5277736bead95a3d', - 'release_date': '20160818', + 'duration': 139.327, 'timestamp': 1471557582, + 'upload_date': '20160818', 'uploader': 'Zuzi.C12', 'uploader_id': '362811', + 'view_count': int, + 'like_count': int, + 'comment_count': int, + 'tags': list, } } def _real_extract(self, url): video_id = self._match_id(url) - video_info = self._download_json('https://hitrecord.org/api/web/records/' + video_id, video_id) - user_info = video_info.get('user', {}) + + video = self._download_json( + 'https://hitrecord.org/api/web/records/%s' % video_id, video_id) + + title = video['title'] + video_url = video['source_url']['mp4_url'] + + tags = None + tags_list = try_get(video, lambda x: x['tags'], list) + if tags_list: + tags = [ + t['text'] + for t in tags_list + if isinstance(t, dict) and t.get('text') and + isinstance(t['text'], compat_str)] return { 'id': video_id, - 'title': video_info['title'], - 'url': video_info['source_url']['mp4_url'], - 'description': clean_html(video_info.get('body')), - 'uploader': user_info.get('username'), - 'uploader_id': compat_str(user_info.get('id')), - 'release_date': unified_strdate(video_info.get('created_at')), - 'timestamp': video_info.get('created_at_i'), - 'view_count': int_or_none(video_info.get('total_views_count')), - 'like_count': int_or_none(video_info.get('hearts_count')), - 'comment_count': int_or_none(video_info.get('comments_count')), - 'tags': [tag.get('text') for tag in video_info.get('tags', [])], + 'url': video_url, + 'title': title, + 'description': clean_html(video.get('body')), + 'duration': float_or_none(video.get('duration'), 1000), + 'timestamp': int_or_none(video.get('created_at_i')), + 'uploader': try_get( + video, lambda x: x['user']['username'], compat_str), + 'uploader_id': try_get( + video, lambda x: compat_str(x['user']['id'])), + 'view_count': int_or_none(video.get('total_views_count')), + 'like_count': int_or_none(video.get('hearts_count')), + 'comment_count': int_or_none(video.get('comments_count')), + 'tags': tags, } From e60166020b58c74f538dbbb1b9d27aaf28abc41e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 8 Jan 2017 20:56:38 +0700 Subject: [PATCH 03/86] [ChangeLog] Actualize --- ChangeLog | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/ChangeLog b/ChangeLog index 2d2e22af9..45ba5fe91 100644 --- a/ChangeLog +++ b/ChangeLog @@ -4,6 +4,14 @@ Core * Fix "invalid escape sequence" errors under Python 3.6 (#11581) Extractors ++ [hitrecord] Add support for hitrecord.org (#10867, #11626) +- [videott] Remove extractor +* [swrmediathek] Improve extraction +- [sharesix] Remove extractor +- [aol:features] Remove extractor +* [sendtonews] Improve info extraction +* [3sat,phoenix] Fix extraction (#11619) +* [comedycentral/mtv] Add support for HLS videos (#11600) * [discoverygo] Fix JSON data parsing (#11219, #11522) From e7ea724cb9cca344b4f486231f12a76918df80ee Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 8 Jan 2017 20:58:43 +0700 Subject: [PATCH 04/86] release 2017.01.08 --- .github/ISSUE_TEMPLATE.md | 6 +++--- CONTRIBUTING.md | 2 +- ChangeLog | 2 +- docs/supportedsites.md | 5 +---- youtube_dl/version.py | 2 +- 5 files changed, 7 insertions(+), 10 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index a78413518..f70a58693 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.01.05*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.01.05** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.01.08*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.01.08** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2017.01.05 +[debug] youtube-dl version 2017.01.08 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index f50f52841..d606eab0e 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -124,7 +124,7 @@ After you have ensured this site is distributing its content legally, you can fo 'id': '42', 'ext': 'mp4', 'title': 'Video title goes here', - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', # TODO more properties, either as: # * A value # * MD5 checksum; start the string with md5: diff --git a/ChangeLog b/ChangeLog index 45ba5fe91..da9cf24af 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version +version 2017.01.08 Core * Fix "invalid escape sequence" errors under Python 3.6 (#11581) diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 0e301e8f3..9ac0ffefe 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -240,7 +240,6 @@ - **fc2** - **fc2:embed** - **Fczenit** - - **features.aol.com** - **fernsehkritik.tv** - **Firstpost** - **FiveTV** @@ -304,6 +303,7 @@ - **history:topic**: History.com Topic - **hitbox** - **hitbox:live** + - **HitRecord** - **HornBunny** - **HotNewHipHop** - **HotStar** @@ -650,7 +650,6 @@ - **screen.yahoo:search**: Yahoo screen search - **Screencast** - **ScreencastOMatic** - - **ScreenJunkies** - **Seeker** - **SenateISVP** - **SendtoNews** @@ -658,7 +657,6 @@ - **Sexu** - **Shahid** - **Shared**: shared.sx - - **ShareSix** - **ShowRoomLive** - **Sina** - **SixPlay** @@ -845,7 +843,6 @@ - **videomore:season** - **videomore:video** - **VideoPremium** - - **VideoTt**: video.tt - Your True Tube (Currently broken) - **videoweed**: VideoWeed - **Vidio** - **vidme** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 2c8e5bcf6..b7306d393 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2017.01.05' +__version__ = '2017.01.08' From 8084951b7f3886cbd57faab0c15f4f2ce3580779 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Mon, 9 Jan 2017 11:24:40 +0100 Subject: [PATCH 05/86] [egghead:course] Add support for egghead.io course playlists Individual egghead videos are already handled by the generic/Wistia extractors. --- ChangeLog | 5 +++- youtube_dl/extractor/egghead.py | 39 ++++++++++++++++++++++++++++++ youtube_dl/extractor/extractors.py | 1 + 3 files changed, 44 insertions(+), 1 deletion(-) create mode 100644 youtube_dl/extractor/egghead.py diff --git a/ChangeLog b/ChangeLog index da9cf24af..c7cee5412 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,7 @@ +version + ++ [egghead:course] Add support for egghead.io courses + version 2017.01.08 Core @@ -14,7 +18,6 @@ Extractors * [comedycentral/mtv] Add support for HLS videos (#11600) * [discoverygo] Fix JSON data parsing (#11219, #11522) - version 2017.01.05 Extractors diff --git a/youtube_dl/extractor/egghead.py b/youtube_dl/extractor/egghead.py new file mode 100644 index 000000000..db921465e --- /dev/null +++ b/youtube_dl/extractor/egghead.py @@ -0,0 +1,39 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor + + +class EggheadCourseIE(InfoExtractor): + IE_DESC = 'egghead.io course' + IE_NAME = 'egghead:course' + _VALID_URL = r'https://egghead\.io/courses/(?P[a-zA-Z_0-9-]+)' + _TEST = { + 'url': 'https://egghead.io/courses/professor-frisby-introduces-composable-functional-javascript', + 'playlist_count': 29, + 'info_dict': { + 'id': 'professor-frisby-introduces-composable-functional-javascript', + 'title': 'Professor Frisby Introduces Composable Functional JavaScript', + 'description': 're:(?s)^This course teaches the ubiquitous.*You\'ll start composing functionality before you know it.$', + }, + } + + def _real_extract(self, url): + playlist_id = self._match_id(url) + webpage = self._download_webpage(url, playlist_id) + + title = self._html_search_regex(r'

([^<]+)

', webpage, 'title') + ul = self._search_regex(r'(?s)
    (.*?)
', webpage, 'session list') + + found = re.findall(r'(?s)\s*
  • Date: Mon, 9 Jan 2017 23:08:59 +0700 Subject: [PATCH 10/86] [inc] Improve (closes #11647) --- youtube_dl/extractor/inc.py | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/youtube_dl/extractor/inc.py b/youtube_dl/extractor/inc.py index 279e53c15..241ec83c4 100644 --- a/youtube_dl/extractor/inc.py +++ b/youtube_dl/extractor/inc.py @@ -1,10 +1,11 @@ from __future__ import unicode_literals from .common import InfoExtractor +from .kaltura import KalturaIE class IncIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?inc\.com(?:/[\w-]+)+/(?P[\w-]+)(?:\.html)?' + _VALID_URL = r'https?://(?:www\.)?inc\.com/(?:[^/]+/)+(?P[^.]+).html' _TESTS = [{ 'url': 'http://www.inc.com/tip-sheet/bill-gates-says-these-5-books-will-make-you-smarter.html', 'md5': '7416739c9c16438c09fa35619d6ba5cb', @@ -17,6 +18,9 @@ class IncIE(InfoExtractor): 'upload_date': '20160920', 'uploader_id': 'video@inc.com', }, + 'params': { + 'skip_download': True, + }, }, { 'url': 'http://www.inc.com/video/david-whitford/founders-forum-tripadvisor-steve-kaufer-most-enjoyable-moment-for-entrepreneur.html', 'only_matching': True, @@ -27,14 +31,11 @@ class IncIE(InfoExtractor): webpage = self._download_webpage(url, display_id) partner_id = self._search_regex( - r'var\s+_bizo_data_partner_id\s*=\s*"(\d+)";', - webpage, - 'partner id') + r'var\s+_?bizo_data_partner_id\s*=\s*["\'](\d+)', webpage, 'partner id') kaltura_id = self._parse_json(self._search_regex( - r'pageInfo\.videos\s*=\s*\[(.+)\];', - webpage, - 'kaltura id'), + r'pageInfo\.videos\s*=\s*\[(.+)\];', webpage, 'kaltura id'), display_id)['vid_kaltura_id'] - return self.url_result('kaltura:%s:%s' % (partner_id, kaltura_id), 'Kaltura') + return self.url_result( + 'kaltura:%s:%s' % (partner_id, kaltura_id), KalturaIE.ie_key()) From d1aeacd9bfe12bdf064d8888f77ccf8bd30f1723 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 10 Jan 2017 21:25:29 +0700 Subject: [PATCH 11/86] [youtube] Fix extraction (closes #11663, #11664) --- youtube_dl/jsinterp.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/jsinterp.py b/youtube_dl/jsinterp.py index a8df4aef0..24cdec28c 100644 --- a/youtube_dl/jsinterp.py +++ b/youtube_dl/jsinterp.py @@ -213,7 +213,7 @@ class JSInterpreter(object): def extract_object(self, objname): obj = {} obj_m = re.search( - (r'(?:var\s+)?%s\s*=\s*\{' % re.escape(objname)) + + (r'(?([a-zA-Z$0-9]+\s*:\s*function\(.*?\)\s*\{.*?\}(?:,\s*)?)*)' + r'\}\s*;', self.code) From 2184d44361723bab7822d2e919437f213eaed93a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 10 Jan 2017 21:27:17 +0700 Subject: [PATCH 12/86] [ChangeLog] Actualize --- ChangeLog | 3 +++ 1 file changed, 3 insertions(+) diff --git a/ChangeLog b/ChangeLog index 5b6466be6..135b337d9 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,6 +1,9 @@ version Extractors +* [youtube] Fix extraction (#11663, #11664) ++ [inc] Add support for inc.com (#11277, #11647) ++ [youtube] Add itag 212 (#11575) + [egghead:course] Add support for egghead.io courses From 31ea2ad89da64247d894e739e2c7c0f7e44411bd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 10 Jan 2017 21:29:20 +0700 Subject: [PATCH 13/86] release 2017.01.10 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- docs/supportedsites.md | 2 ++ youtube_dl/version.py | 2 +- 4 files changed, 7 insertions(+), 5 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index f70a58693..6a4c25680 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.01.08*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.01.08** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.01.10*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.01.10** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2017.01.08 +[debug] youtube-dl version 2017.01.10 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index 135b337d9..f1e234507 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version +version 2017.01.10 Extractors * [youtube] Fix extraction (#11663, #11664) diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 9ac0ffefe..0f6c4ec0c 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -214,6 +214,7 @@ - **EaglePlatform** - **EbaumsWorld** - **EchoMsk** + - **egghead:course**: egghead.io course - **eHow** - **Einthusan** - **eitb.tv** @@ -321,6 +322,7 @@ - **Imgur** - **ImgurAlbum** - **Ina** + - **Inc** - **Indavideo** - **IndavideoEmbed** - **InfoQ** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index b7306d393..214124722 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2017.01.08' +__version__ = '2017.01.10' From 2032d935d1d99c83e28585ca4c20415dcab56701 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 10 Jan 2017 22:25:33 +0700 Subject: [PATCH 14/86] [mtv] Add default value for use_hls These methods are used across codebase with old number of arguments --- youtube_dl/extractor/mtv.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/mtv.py b/youtube_dl/extractor/mtv.py index e1f1f8fa4..49c192e1a 100644 --- a/youtube_dl/extractor/mtv.py +++ b/youtube_dl/extractor/mtv.py @@ -123,7 +123,7 @@ class MTVServicesInfoExtractor(InfoExtractor): } for typographic in transcript.findall('./typographic')] return subtitles - def _get_video_info(self, itemdoc, use_hls): + def _get_video_info(self, itemdoc, use_hls=False): uri = itemdoc.find('guid').text video_id = self._id_from_uri(uri) self.report_extraction(video_id) @@ -199,7 +199,7 @@ class MTVServicesInfoExtractor(InfoExtractor): info_url = update_url_query(feed_url, self._get_feed_query(uri)) return self._get_videos_info_from_url(info_url, video_id, use_hls) - def _get_videos_info_from_url(self, url, video_id, use_hls): + def _get_videos_info_from_url(self, url, video_id, use_hls=False): idoc = self._download_xml( url, video_id, 'Downloading info', transform_source=fix_xml_ampersands) From 20faad74b6416b137d4400853d7a871a256d731e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 10 Jan 2017 22:27:23 +0700 Subject: [PATCH 15/86] [mtv] Fix non-hls extraction method attribute may not be present --- youtube_dl/extractor/mtv.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/mtv.py b/youtube_dl/extractor/mtv.py index 49c192e1a..7c45c7738 100644 --- a/youtube_dl/extractor/mtv.py +++ b/youtube_dl/extractor/mtv.py @@ -88,7 +88,7 @@ class MTVServicesInfoExtractor(InfoExtractor): formats = [] for rendition in mdoc.findall('.//rendition'): - if rendition.attrib['method'] == 'hls': + if rendition.get('method') == 'hls': hls_url = rendition.find('./src').text formats.extend(self._extract_m3u8_formats(hls_url, video_id, ext='mp4')) else: From 67fc365b86dccac4b52c5eb7a3f1e659ef945dc9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 10 Jan 2017 22:30:47 +0700 Subject: [PATCH 16/86] [mtv,cc] Use hls by default (closes #11641) --- youtube_dl/extractor/comedycentral.py | 2 +- youtube_dl/extractor/mtv.py | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/comedycentral.py b/youtube_dl/extractor/comedycentral.py index 8bd589774..816e0bfb6 100644 --- a/youtube_dl/extractor/comedycentral.py +++ b/youtube_dl/extractor/comedycentral.py @@ -57,7 +57,7 @@ class ComedyCentralFullEpisodesIE(MTVServicesInfoExtractor): feed = self._download_json(video_zone['feed'], playlist_id) mgid = feed['result']['data']['id'] - videos_info = self._get_videos_info(mgid, use_hls=True) + videos_info = self._get_videos_info(mgid) return videos_info diff --git a/youtube_dl/extractor/mtv.py b/youtube_dl/extractor/mtv.py index 7c45c7738..d27c6686b 100644 --- a/youtube_dl/extractor/mtv.py +++ b/youtube_dl/extractor/mtv.py @@ -123,7 +123,7 @@ class MTVServicesInfoExtractor(InfoExtractor): } for typographic in transcript.findall('./typographic')] return subtitles - def _get_video_info(self, itemdoc, use_hls=False): + def _get_video_info(self, itemdoc, use_hls=True): uri = itemdoc.find('guid').text video_id = self._id_from_uri(uri) self.report_extraction(video_id) @@ -193,13 +193,13 @@ class MTVServicesInfoExtractor(InfoExtractor): data['lang'] = self._LANG return data - def _get_videos_info(self, uri, use_hls=False): + def _get_videos_info(self, uri, use_hls=True): video_id = self._id_from_uri(uri) feed_url = self._get_feed_url(uri) info_url = update_url_query(feed_url, self._get_feed_query(uri)) return self._get_videos_info_from_url(info_url, video_id, use_hls) - def _get_videos_info_from_url(self, url, video_id, use_hls=False): + def _get_videos_info_from_url(self, url, video_id, use_hls=True): idoc = self._download_xml( url, video_id, 'Downloading info', transform_source=fix_xml_ampersands) From cdd11c054013997b6f0053a3958b6e5b0aa698b6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 10 Jan 2017 22:31:20 +0700 Subject: [PATCH 17/86] [mtv] Use native hls by default --- youtube_dl/extractor/mtv.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/mtv.py b/youtube_dl/extractor/mtv.py index d27c6686b..5250db212 100644 --- a/youtube_dl/extractor/mtv.py +++ b/youtube_dl/extractor/mtv.py @@ -90,7 +90,8 @@ class MTVServicesInfoExtractor(InfoExtractor): for rendition in mdoc.findall('.//rendition'): if rendition.get('method') == 'hls': hls_url = rendition.find('./src').text - formats.extend(self._extract_m3u8_formats(hls_url, video_id, ext='mp4')) + formats.extend(self._extract_m3u8_formats( + hls_url, video_id, ext='mp4', entry_protocol='m3u8_native')) else: # fms try: From 10cd2003b4e939a13c020f94c0518252411f9ddf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 10 Jan 2017 22:32:34 +0700 Subject: [PATCH 18/86] [nick] Add support for beta.nick.com (closes #11655) --- youtube_dl/extractor/nick.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/nick.py b/youtube_dl/extractor/nick.py index 7672845bf..08a75929e 100644 --- a/youtube_dl/extractor/nick.py +++ b/youtube_dl/extractor/nick.py @@ -10,7 +10,7 @@ from ..utils import update_url_query class NickIE(MTVServicesInfoExtractor): # None of videos on the website are still alive? IE_NAME = 'nick.com' - _VALID_URL = r'https?://(?:www\.)?nick(?:jr)?\.com/(?:videos/clip|[^/]+/videos)/(?P[^/?#.]+)' + _VALID_URL = r'https?://(?:(?:www|beta)\.)?nick(?:jr)?\.com/(?:[^/]+/)?(?:videos/clip|[^/]+/videos)/(?P[^/?#.]+)' _FEED_URL = 'http://udat.mtvnservices.com/service1/dispatch.htm' _TESTS = [{ 'url': 'http://www.nick.com/videos/clip/alvinnn-and-the-chipmunks-112-full-episode.html', @@ -57,6 +57,9 @@ class NickIE(MTVServicesInfoExtractor): }, { 'url': 'http://www.nickjr.com/paw-patrol/videos/pups-save-a-goldrush-s3-ep302-full-episode/', 'only_matching': True, + }, { + 'url': 'http://beta.nick.com/nicky-ricky-dicky-and-dawn/videos/nicky-ricky-dicky-dawn-301-full-episode/', + 'only_matching': True, }] def _get_feed_query(self, uri): From 1fd0fc42bd0b67dba1635ade98d92473b41eff84 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 11 Jan 2017 22:51:03 +0700 Subject: [PATCH 19/86] [vimeo:ondemand] Fix test (closes #11651) --- youtube_dl/extractor/vimeo.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py index 37e1da70d..19dc73966 100644 --- a/youtube_dl/extractor/vimeo.py +++ b/youtube_dl/extractor/vimeo.py @@ -629,6 +629,9 @@ class VimeoOndemandIE(VimeoBaseInfoExtractor): 'uploader_url': r're:https?://(?:www\.)?vimeo\.com/gumfilms', 'uploader_id': 'gumfilms', }, + 'params': { + 'format': 'best[protocol=https]', + }, }, { # requires Referer to be passed along with og:video:url 'url': 'https://vimeo.com/ondemand/36938/126682985', From 365d136b7c924dc0378a1b78db61ede7b97033be Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 11 Jan 2017 22:57:08 +0700 Subject: [PATCH 20/86] [vimeo] Fix tests --- youtube_dl/extractor/vimeo.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py index 19dc73966..2e98b0e6f 100644 --- a/youtube_dl/extractor/vimeo.py +++ b/youtube_dl/extractor/vimeo.py @@ -254,7 +254,7 @@ class VimeoIE(VimeoBaseInfoExtractor): 'uploader_id': 'user18948128', 'uploader': 'Jaime Marquínez Ferrándiz', 'duration': 10, - 'description': 'This is "youtube-dl password protected test video" by on Vimeo, the home for high quality videos and the people who love them.', + 'description': 'md5:dca3ea23adb29ee387127bc4ddfce63f', }, 'params': { 'videopassword': 'youtube-dl', @@ -306,7 +306,7 @@ class VimeoIE(VimeoBaseInfoExtractor): { # contains original format 'url': 'https://vimeo.com/33951933', - 'md5': '2d9f5475e0537f013d0073e812ab89e6', + 'md5': '53c688fa95a55bf4b7293d37a89c5c53', 'info_dict': { 'id': '33951933', 'ext': 'mp4', @@ -324,7 +324,7 @@ class VimeoIE(VimeoBaseInfoExtractor): 'url': 'https://vimeo.com/channels/tributes/6213729', 'info_dict': { 'id': '6213729', - 'ext': 'mp4', + 'ext': 'mov', 'title': 'Vimeo Tribute: The Shining', 'uploader': 'Casey Donahue', 'uploader_url': r're:https?://(?:www\.)?vimeo\.com/caseydonahue', From ed06da4e7b274fd444a6ada23ba9bb4c559761d3 Mon Sep 17 00:00:00 2001 From: sh!zeeg Date: Thu, 5 Jan 2017 04:52:42 +0300 Subject: [PATCH 21/86] [freesound] Fix extraction and extended (closes #11602) --- youtube_dl/extractor/freesound.py | 55 ++++++++++++++++++++++++++++--- 1 file changed, 50 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/freesound.py b/youtube_dl/extractor/freesound.py index 5ff62af2a..f0b2400cf 100644 --- a/youtube_dl/extractor/freesound.py +++ b/youtube_dl/extractor/freesound.py @@ -3,6 +3,15 @@ from __future__ import unicode_literals import re from .common import InfoExtractor +from ..utils import ( + determine_ext, + float_or_none, + get_element_by_class, + get_element_by_id, + int_or_none, + parse_filesize, + unified_strdate, +) class FreesoundIE(InfoExtractor): @@ -23,17 +32,53 @@ class FreesoundIE(InfoExtractor): mobj = re.match(self._VALID_URL, url) music_id = mobj.group('id') webpage = self._download_webpage(url, music_id) - title = self._html_search_regex( - r'
    .*?(.+?)', - webpage, 'music title', flags=re.DOTALL) + + audio_url = self._og_search_property('audio', webpage, 'song url') + title = self._og_search_property('audio:title', webpage, 'song title') + duration = float_or_none(get_element_by_class('duration', webpage), scale=1000) + tags = get_element_by_class('tags', webpage) + sound_info = get_element_by_id('sound_information_box', webpage) + release_date = get_element_by_id('sound_date', webpage) + description = self._html_search_regex( r'
    (.*?)
    ', webpage, 'description', fatal=False, flags=re.DOTALL) + download_count = int_or_none(self._html_search_regex( + r'Downloaded.*>(\d+)<', webpage, 'downloaded', fatal=False)) + + filesize = float_or_none(parse_filesize(self._search_regex( + r'Filesize
    (.*)
    ', sound_info, 'file size (approx)', fatal=False))) + + if release_date: + release_date = unified_strdate(release_date.replace('th', '')) + + bitdepth = self._html_search_regex( + r'Bitdepth
    (.*)
    ', sound_info, 'Bitdepth', fatal=False) + + channels = self._html_search_regex( + r'Channels
    (.*)
    ', sound_info, 'Channels info', fatal=False) + + formats = [{ + 'url': audio_url, + 'id': music_id, + 'format_id': self._og_search_property('audio:type', webpage, 'audio format', fatal=False), + 'format_note': '{0} {1} {2}'.format(determine_ext(audio_url), bitdepth, channels), + 'filesize_approx': filesize, + 'asr': int_or_none(self._html_search_regex( + r'Samplerate
    (\d+).*
    ', + sound_info, 'samplerate', fatal=False)), + }] + return { 'id': music_id, 'title': title, - 'url': self._og_search_property('audio', webpage, 'music url'), - 'uploader': self._og_search_property('audio:artist', webpage, 'music uploader'), + 'uploader': self._og_search_property('audio:artist', webpage, 'music uploader', fatal=False), 'description': description, + 'duration': duration, + 'tags': [self._html_search_regex(r'>(.*)', t, 'tag', fatal=False) + for t in tags.split('\n') if t.strip()], + 'formats': formats, + 'release_date': release_date, + 'likes_count': download_count, } From cb655f34fbbd741f18e22cb8ec0cae1c4c3bfebe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 12 Jan 2017 22:39:45 +0700 Subject: [PATCH 22/86] [utils] Add more date formats --- test/test_utils.py | 3 +++ youtube_dl/utils.py | 6 ++++++ 2 files changed, 9 insertions(+) diff --git a/test/test_utils.py b/test/test_utils.py index 3092db5c1..e99bf794e 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -295,6 +295,9 @@ class TestUtil(unittest.TestCase): self.assertEqual(unified_strdate('27.02.2016 17:30'), '20160227') self.assertEqual(unified_strdate('UNKNOWN DATE FORMAT'), None) self.assertEqual(unified_strdate('Feb 7, 2016 at 6:35 pm'), '20160207') + self.assertEqual(unified_strdate('July 15th, 2013'), '20130715') + self.assertEqual(unified_strdate('September 1st, 2013'), '20130901') + self.assertEqual(unified_strdate('Sep 2nd, 2013'), '20130902') def test_unified_timestamps(self): self.assertEqual(unified_timestamp('December 21, 2010'), 1292889600) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 39dd6c49f..12863e74a 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -128,7 +128,13 @@ DATE_FORMATS = ( '%d %B %Y', '%d %b %Y', '%B %d %Y', + '%B %dst %Y', + '%B %dnd %Y', + '%B %dth %Y', '%b %d %Y', + '%b %dst %Y', + '%b %dnd %Y', + '%b %dth %Y', '%b %dst %Y %I:%M', '%b %dnd %Y %I:%M', '%b %dth %Y %I:%M', From 3a407e707ac96bc082fd82325e916802a3b55d36 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 12 Jan 2017 23:03:53 +0700 Subject: [PATCH 23/86] [freesound] Improve and remove unrelated metadata (closes #11608) --- youtube_dl/extractor/freesound.py | 75 +++++++++++++++---------------- 1 file changed, 35 insertions(+), 40 deletions(-) diff --git a/youtube_dl/extractor/freesound.py b/youtube_dl/extractor/freesound.py index f0b2400cf..138b6bc58 100644 --- a/youtube_dl/extractor/freesound.py +++ b/youtube_dl/extractor/freesound.py @@ -4,18 +4,15 @@ import re from .common import InfoExtractor from ..utils import ( - determine_ext, float_or_none, get_element_by_class, get_element_by_id, - int_or_none, - parse_filesize, unified_strdate, ) class FreesoundIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?freesound\.org/people/([^/]+)/sounds/(?P[^/]+)' + _VALID_URL = r'https?://(?:www\.)?freesound\.org/people/[^/]+/sounds/(?P[^/]+)' _TEST = { 'url': 'http://www.freesound.org/people/miklovan/sounds/194503/', 'md5': '12280ceb42c81f19a515c745eae07650', @@ -23,62 +20,60 @@ class FreesoundIE(InfoExtractor): 'id': '194503', 'ext': 'mp3', 'title': 'gulls in the city.wav', - 'uploader': 'miklovan', 'description': 'the sounds of seagulls in the city', + 'duration': 130.233, + 'uploader': 'miklovan', + 'upload_date': '20130715', + 'tags': list, } } def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - music_id = mobj.group('id') - webpage = self._download_webpage(url, music_id) + audio_id = self._match_id(url) + + webpage = self._download_webpage(url, audio_id) audio_url = self._og_search_property('audio', webpage, 'song url') title = self._og_search_property('audio:title', webpage, 'song title') - duration = float_or_none(get_element_by_class('duration', webpage), scale=1000) - tags = get_element_by_class('tags', webpage) - sound_info = get_element_by_id('sound_information_box', webpage) - release_date = get_element_by_id('sound_date', webpage) description = self._html_search_regex( - r'
    (.*?)
    ', webpage, 'description', - fatal=False, flags=re.DOTALL) + r'(?s)id=["\']sound_description["\'][^>]*>(.+?)
    ', + webpage, 'description', fatal=False) - download_count = int_or_none(self._html_search_regex( - r'Downloaded.*>(\d+)<', webpage, 'downloaded', fatal=False)) + duration = float_or_none( + get_element_by_class('duration', webpage), scale=1000) - filesize = float_or_none(parse_filesize(self._search_regex( - r'Filesize
    (.*)
    ', sound_info, 'file size (approx)', fatal=False))) - - if release_date: - release_date = unified_strdate(release_date.replace('th', '')) - - bitdepth = self._html_search_regex( - r'Bitdepth
    (.*)
    ', sound_info, 'Bitdepth', fatal=False) + upload_date = unified_strdate(get_element_by_id('sound_date', webpage)) + uploader = self._og_search_property( + 'audio:artist', webpage, 'uploader', fatal=False) channels = self._html_search_regex( - r'Channels
    (.*)
    ', sound_info, 'Channels info', fatal=False) + r'Channels
    (.+?)
    ', webpage, + 'channels info', fatal=False) + + tags_str = get_element_by_class('tags', webpage) + tags = re.findall(r']+>([^<]+)', tags_str) if tags_str else None + + audio_urls = [audio_url] + + LQ_FORMAT = '-lq.mp3' + if LQ_FORMAT in audio_url: + audio_urls.append(audio_url.replace(LQ_FORMAT, '-hq.mp3')) formats = [{ - 'url': audio_url, - 'id': music_id, - 'format_id': self._og_search_property('audio:type', webpage, 'audio format', fatal=False), - 'format_note': '{0} {1} {2}'.format(determine_ext(audio_url), bitdepth, channels), - 'filesize_approx': filesize, - 'asr': int_or_none(self._html_search_regex( - r'Samplerate
    (\d+).*
    ', - sound_info, 'samplerate', fatal=False)), - }] + 'url': format_url, + 'format_note': channels, + 'quality': quality, + } for quality, format_url in enumerate(audio_urls)] + self._sort_formats(formats) return { - 'id': music_id, + 'id': audio_id, 'title': title, - 'uploader': self._og_search_property('audio:artist', webpage, 'music uploader', fatal=False), 'description': description, 'duration': duration, - 'tags': [self._html_search_regex(r'>(.*)', t, 'tag', fatal=False) - for t in tags.split('\n') if t.strip()], + 'uploader': uploader, + 'upload_date': upload_date, + 'tags': tags, 'formats': formats, - 'release_date': release_date, - 'likes_count': download_count, } From c4251b9aaa9a69e7f7b55197b3907e52b17150d4 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Fri, 13 Jan 2017 10:08:51 +0100 Subject: [PATCH 24/86] [common] add possibility to customize akamai manifest host --- youtube_dl/extractor/common.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 6fa7c334e..dce8c7d0d 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1967,10 +1967,13 @@ class InfoExtractor(object): entries.append(media_info) return entries - def _extract_akamai_formats(self, manifest_url, video_id): + def _extract_akamai_formats(self, manifest_url, video_id, hosts={}): formats = [] hdcore_sign = 'hdcore=3.7.0' - f4m_url = re.sub(r'(https?://.+?)/i/', r'\1/z/', manifest_url).replace('/master.m3u8', '/manifest.f4m') + f4m_url = re.sub(r'(https?://[^/+])/i/', r'\1/z/', manifest_url).replace('/master.m3u8', '/manifest.f4m') + hds_host = hosts.get('hds') + if hds_host: + f4m_url = re.sub(r'(https?://)[^/]+', r'\1' + hds_host, f4m_url) if 'hdcore=' not in f4m_url: f4m_url += ('&' if '?' in f4m_url else '?') + hdcore_sign f4m_formats = self._extract_f4m_formats( @@ -1978,7 +1981,10 @@ class InfoExtractor(object): for entry in f4m_formats: entry.update({'extra_param_to_segment_url': hdcore_sign}) formats.extend(f4m_formats) - m3u8_url = re.sub(r'(https?://.+?)/z/', r'\1/i/', manifest_url).replace('/manifest.f4m', '/master.m3u8') + m3u8_url = re.sub(r'(https?://[^/]+)/z/', r'\1/i/', manifest_url).replace('/manifest.f4m', '/master.m3u8') + hls_host = hosts.get('hls') + if hls_host: + m3u8_url = re.sub(r'(https?://)[^/]+', r'\1' + hls_host, m3u8_url) formats.extend(self._extract_m3u8_formats( m3u8_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)) From 1f393a324191591d895bafc1e4c756951f368b3c Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Fri, 13 Jan 2017 10:19:53 +0100 Subject: [PATCH 25/86] [tv4] improve extraction(closes #11698) - remove check for requires_subscription - extract more formats - extract subtitles --- youtube_dl/extractor/tv4.py | 49 +++++++++++++++++++++++-------------- 1 file changed, 31 insertions(+), 18 deletions(-) diff --git a/youtube_dl/extractor/tv4.py b/youtube_dl/extractor/tv4.py index 29f62b970..ad79db92b 100644 --- a/youtube_dl/extractor/tv4.py +++ b/youtube_dl/extractor/tv4.py @@ -4,11 +4,10 @@ from __future__ import unicode_literals from .common import InfoExtractor from ..compat import compat_str from ..utils import ( - ExtractorError, int_or_none, parse_iso8601, try_get, - update_url_query, + determine_ext, ) @@ -28,7 +27,7 @@ class TV4IE(InfoExtractor): _TESTS = [ { 'url': 'http://www.tv4.se/kalla-fakta/klipp/kalla-fakta-5-english-subtitles-2491650', - 'md5': '909d6454b87b10a25aa04c4bdd416a9b', + 'md5': 'cb837212f342d77cec06e6dad190e96d', 'info_dict': { 'id': '2491650', 'ext': 'mp4', @@ -40,7 +39,7 @@ class TV4IE(InfoExtractor): }, { 'url': 'http://www.tv4play.se/iframe/video/3054113', - 'md5': '77f851c55139ffe0ebd41b6a5552489b', + 'md5': 'cb837212f342d77cec06e6dad190e96d', 'info_dict': { 'id': '3054113', 'ext': 'mp4', @@ -75,11 +74,10 @@ class TV4IE(InfoExtractor): # If is_geo_restricted is true, it doesn't necessarily mean we can't download it if info.get('is_geo_restricted'): self.report_warning('This content might not be available in your country due to licensing restrictions.') - if info.get('requires_subscription'): - raise ExtractorError('This content requires subscription.', expected=True) title = info['title'] + subtitles = {} formats = [] # http formats are linked with unresolvable host for kind in ('hls', ''): @@ -87,26 +85,41 @@ class TV4IE(InfoExtractor): 'https://prima.tv4play.se/api/web/asset/%s/play.json' % video_id, video_id, 'Downloading sources JSON', query={ 'protocol': kind, - 'videoFormat': 'MP4+WEBVTTS+WEBVTT', + 'videoFormat': 'MP4+WEBVTT', }) - item = try_get(data, lambda x: x['playback']['items']['item'], dict) - manifest_url = item.get('url') - if not isinstance(manifest_url, compat_str): + items = try_get(data, lambda x: x['playback']['items']['item']) + if not items: continue - if kind == 'hls': - formats.extend(self._extract_m3u8_formats( - manifest_url, video_id, 'mp4', entry_protocol='m3u8_native', - m3u8_id=kind, fatal=False)) - else: - formats.extend(self._extract_f4m_formats( - update_url_query(manifest_url, {'hdcore': '3.8.0'}), - video_id, f4m_id='hds', fatal=False)) + if isinstance(items, dict): + items = [items] + for item in items: + manifest_url = item.get('url') + if not isinstance(manifest_url, compat_str): + continue + ext = determine_ext(manifest_url) + if ext == 'm3u8': + formats.extend(self._extract_m3u8_formats( + manifest_url, video_id, 'mp4', entry_protocol='m3u8_native', + m3u8_id=kind, fatal=False)) + elif ext == 'f4m': + formats.extend(self._extract_akamai_formats( + manifest_url, video_id, { + 'hls': 'tv4play-i.akamaihd.net', + })) + elif ext == 'webvtt': + subtitles = self._merge_subtitles( + subtitles, { + 'sv': [{ + 'url': manifest_url, + 'ext': 'vtt', + }]}) self._sort_formats(formats) return { 'id': video_id, 'title': title, 'formats': formats, + 'subtitles': subtitles, 'description': info.get('description'), 'timestamp': parse_iso8601(info.get('broadcast_date_time')), 'duration': int_or_none(info.get('duration')), From 06e9363b7a21acf6a592780a706b0fdd6b5a2d4e Mon Sep 17 00:00:00 2001 From: Vijay Singh Date: Sun, 8 Jan 2017 22:27:28 +0530 Subject: [PATCH 26/86] [openload] Fix extraction (closes #10408) Just a minor fix for openload --- youtube_dl/extractor/openload.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/openload.py b/youtube_dl/extractor/openload.py index 2ce9f3826..3d4ad7dca 100644 --- a/youtube_dl/extractor/openload.py +++ b/youtube_dl/extractor/openload.py @@ -64,16 +64,17 @@ class OpenloadIE(InfoExtractor): raise ExtractorError('File not found', expected=True) ol_id = self._search_regex( - ']+id="[a-zA-Z0-9]+x"[^>]*>([0-9]+)', + ']+id="[^"]+"[^>]*>([0-9]+)', webpage, 'openload ID') - first_two_chars = int(float(ol_id[0:][:2])) + first_three_chars = int(float(ol_id[0:][:3])) + fifth_char = int(float(ol_id[3:5])) urlcode = '' - num = 2 + num = 5 while num < len(ol_id): - urlcode += compat_chr(int(float(ol_id[num:][:3])) - - first_two_chars * int(float(ol_id[num + 3:][:2]))) + urlcode += compat_chr(int(float(ol_id[num:][:3])) + + first_three_chars - fifth_char * int(float(ol_id[num + 3:][:2]))) num += 5 video_url = 'https://openload.co/stream/' + urlcode From fb6a59205e3dc5bb1d37d50ac1161314c0d66cf1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 13 Jan 2017 23:55:55 +0700 Subject: [PATCH 27/86] [mixcloud] Fix extraction (closes #11674) --- youtube_dl/extractor/mixcloud.py | 22 +++++----------------- 1 file changed, 5 insertions(+), 17 deletions(-) diff --git a/youtube_dl/extractor/mixcloud.py b/youtube_dl/extractor/mixcloud.py index 4ba2310fd..a24b3165a 100644 --- a/youtube_dl/extractor/mixcloud.py +++ b/youtube_dl/extractor/mixcloud.py @@ -16,7 +16,6 @@ from ..utils import ( clean_html, ExtractorError, OnDemandPagedList, - parse_count, str_to_int, ) @@ -36,7 +35,6 @@ class MixcloudIE(InfoExtractor): 'uploader_id': 'dholbach', 'thumbnail': r're:https?://.*\.jpg', 'view_count': int, - 'like_count': int, }, }, { 'url': 'http://www.mixcloud.com/gillespeterson/caribou-7-inch-vinyl-mix-chat/', @@ -49,7 +47,6 @@ class MixcloudIE(InfoExtractor): 'uploader_id': 'gillespeterson', 'thumbnail': 're:https?://.*', 'view_count': int, - 'like_count': int, }, }, { 'url': 'https://beta.mixcloud.com/RedLightRadio/nosedrip-15-red-light-radio-01-18-2016/', @@ -89,26 +86,18 @@ class MixcloudIE(InfoExtractor): song_url = play_info['stream_url'] - PREFIX = ( - r'm-play-on-spacebar[^>]+' - r'(?:\s+[a-zA-Z0-9-]+(?:="[^"]+")?)*?\s+') - title = self._html_search_regex( - PREFIX + r'm-title="([^"]+)"', webpage, 'title') + title = self._html_search_regex(r'm-title="([^"]+)"', webpage, 'title') thumbnail = self._proto_relative_url(self._html_search_regex( - PREFIX + r'm-thumbnail-url="([^"]+)"', webpage, 'thumbnail', - fatal=False)) + r'm-thumbnail-url="([^"]+)"', webpage, 'thumbnail', fatal=False)) uploader = self._html_search_regex( - PREFIX + r'm-owner-name="([^"]+)"', - webpage, 'uploader', fatal=False) + r'm-owner-name="([^"]+)"', webpage, 'uploader', fatal=False) uploader_id = self._search_regex( r'\s+"profile": "([^"]+)",', webpage, 'uploader id', fatal=False) description = self._og_search_description(webpage) - like_count = parse_count(self._search_regex( - r'\bbutton-favorite[^>]+>.*?]+class=["\']toggle-number[^>]+>\s*([^<]+)', - webpage, 'like count', default=None)) view_count = str_to_int(self._search_regex( [r'([0-9,.]+)'], + r'/listeners/?">([0-9,.]+)', + r'm-tooltip=["\']([\d,.]+) plays'], webpage, 'play count', default=None)) return { @@ -120,7 +109,6 @@ class MixcloudIE(InfoExtractor): 'uploader': uploader, 'uploader_id': uploader_id, 'view_count': view_count, - 'like_count': like_count, } From 9837cb7507e0635755082a7fd2e748c4106fefc4 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Fri, 13 Jan 2017 23:02:50 +0100 Subject: [PATCH 28/86] [ooyala] add support for videos with embedToken(#11684) --- youtube_dl/extractor/generic.py | 9 ++++++++- youtube_dl/extractor/ooyala.py | 14 +++++++++++--- 2 files changed, 19 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 86dc79307..ac29ec600 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -1939,7 +1939,14 @@ class GenericIE(InfoExtractor): re.search(r'SBN\.VideoLinkset\.ooyala\([\'"](?P.{32})[\'"]\)', webpage) or re.search(r'data-ooyala-video-id\s*=\s*[\'"](?P.{32})[\'"]', webpage)) if mobj is not None: - return OoyalaIE._build_url_result(smuggle_url(mobj.group('ec'), {'domain': url})) + embed_token = self._search_regex( + r'embedToken[\'"]?\s*:\s*[\'"]([^\'"]+)', + webpage, 'ooyala embed token', default=None) + return OoyalaIE._build_url_result(smuggle_url( + mobj.group('ec'), { + 'domain': url, + 'embed_token': embed_token, + })) # Look for multiple Ooyala embeds on SBN network websites mobj = re.search(r'SBN\.VideoLinkset\.entryGroup\((\[.*?\])', webpage) diff --git a/youtube_dl/extractor/ooyala.py b/youtube_dl/extractor/ooyala.py index c2807d0f6..f00cf745b 100644 --- a/youtube_dl/extractor/ooyala.py +++ b/youtube_dl/extractor/ooyala.py @@ -18,7 +18,7 @@ class OoyalaBaseIE(InfoExtractor): _CONTENT_TREE_BASE = _PLAYER_BASE + 'player_api/v1/content_tree/' _AUTHORIZATION_URL_TEMPLATE = _PLAYER_BASE + 'sas/player_api/v2/authorization/embed_code/%s/%s?' - def _extract(self, content_tree_url, video_id, domain='example.org', supportedformats=None): + def _extract(self, content_tree_url, video_id, domain='example.org', supportedformats=None, embed_token=None): content_tree = self._download_json(content_tree_url, video_id)['content_tree'] metadata = content_tree[list(content_tree)[0]] embed_code = metadata['embed_code'] @@ -29,7 +29,8 @@ class OoyalaBaseIE(InfoExtractor): self._AUTHORIZATION_URL_TEMPLATE % (pcode, embed_code) + compat_urllib_parse_urlencode({ 'domain': domain, - 'supportedFormats': supportedformats or 'mp4,rtmp,m3u8,hds', + 'supportedFormats': supportedformats or 'mp4,rtmp,m3u8,hds,dash,smooth', + 'embedToken': embed_token, }), video_id) cur_auth_data = auth_data['authorization_data'][embed_code] @@ -52,6 +53,12 @@ class OoyalaBaseIE(InfoExtractor): elif delivery_type == 'hds' or ext == 'f4m': formats.extend(self._extract_f4m_formats( s_url + '?hdcore=3.7.0', embed_code, f4m_id='hds', fatal=False)) + elif delivery_type == 'hds' or ext == 'mpd': + formats.extend(self._extract_mpd_formats( + s_url, embed_code, mpd_id='dash', fatal=False)) + elif delivery_type == 'smooth': + self._extract_ism_formats( + s_url, embed_code, ism_id='mss', fatal=False) elif ext == 'smil': formats.extend(self._extract_smil_formats( s_url, embed_code, fatal=False)) @@ -146,8 +153,9 @@ class OoyalaIE(OoyalaBaseIE): embed_code = self._match_id(url) domain = smuggled_data.get('domain') supportedformats = smuggled_data.get('supportedformats') + embed_token = smuggled_data.get('embed_token') content_tree_url = self._CONTENT_TREE_BASE + 'embed_code/%s/%s' % (embed_code, embed_code) - return self._extract(content_tree_url, embed_code, domain, supportedformats) + return self._extract(content_tree_url, embed_code, domain, supportedformats, embed_token) class OoyalaExternalIE(OoyalaBaseIE): From 5e8eebb6009ac3e9f7dfc803d8561174d207c1a2 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Fri, 13 Jan 2017 23:06:07 +0100 Subject: [PATCH 29/86] [mitele] extract dash formats --- youtube_dl/extractor/mitele.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/mitele.py b/youtube_dl/extractor/mitele.py index 8984d3b8d..79e0b8ada 100644 --- a/youtube_dl/extractor/mitele.py +++ b/youtube_dl/extractor/mitele.py @@ -190,7 +190,7 @@ class MiTeleIE(InfoExtractor): return { '_type': 'url_transparent', # for some reason only HLS is supported - 'url': smuggle_url('ooyala:' + embedCode, {'supportedformats': 'm3u8'}), + 'url': smuggle_url('ooyala:' + embedCode, {'supportedformats': 'm3u8,dash'}), 'id': video_id, 'title': title, 'description': description, From adf063dad1792f0c9c680d13ccd984b4ad60ac29 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 14 Jan 2017 06:17:03 +0700 Subject: [PATCH 30/86] [mtv,cc,cmt,spike] Improve and refactor - Eliminate _transform_rtmp_url * Generalize triforce mgid extraction + [cmt] Add support for full-episodes (closes #11623) --- youtube_dl/extractor/cmt.py | 25 ++++++------ youtube_dl/extractor/comedycentral.py | 17 +------- youtube_dl/extractor/mtv.py | 58 ++++++++++++++++++--------- youtube_dl/extractor/spike.py | 2 +- 4 files changed, 54 insertions(+), 48 deletions(-) diff --git a/youtube_dl/extractor/cmt.py b/youtube_dl/extractor/cmt.py index 7d3e9b0c9..6302b8d9c 100644 --- a/youtube_dl/extractor/cmt.py +++ b/youtube_dl/extractor/cmt.py @@ -1,13 +1,11 @@ from __future__ import unicode_literals from .mtv import MTVIE -from ..utils import ExtractorError class CMTIE(MTVIE): IE_NAME = 'cmt.com' - _VALID_URL = r'https?://(?:www\.)?cmt\.com/(?:videos|shows)/(?:[^/]+/)*(?P\d+)' - _FEED_URL = 'http://www.cmt.com/sitewide/apps/player/embed/rss/' + _VALID_URL = r'https?://(?:www\.)?cmt\.com/(?:videos|shows|full-episodes)/(?P[^/]+)' _TESTS = [{ 'url': 'http://www.cmt.com/videos/garth-brooks/989124/the-call-featuring-trisha-yearwood.jhtml#artist=30061', @@ -35,15 +33,16 @@ class CMTIE(MTVIE): 'only_matching': True, }] - @classmethod - def _transform_rtmp_url(cls, rtmp_video_url): - if 'error_not_available.swf' in rtmp_video_url: - raise ExtractorError( - '%s said: video is not available' % cls.IE_NAME, expected=True) - - return super(CMTIE, cls)._transform_rtmp_url(rtmp_video_url) - def _extract_mgid(self, webpage): - return self._search_regex( + mgid = self._search_regex( r'MTVN\.VIDEO\.contentUri\s*=\s*([\'"])(?P.+?)\1', - webpage, 'mgid', group='mgid') + webpage, 'mgid', group='mgid', default=None) + if not mgid: + mgid = self._extract_triforce_mgid(webpage) + return mgid + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + mgid = self._extract_mgid(webpage) + return self.url_result('http://media.mtvnservices.com/embed/%s' % mgid) diff --git a/youtube_dl/extractor/comedycentral.py b/youtube_dl/extractor/comedycentral.py index 816e0bfb6..4cac29415 100644 --- a/youtube_dl/extractor/comedycentral.py +++ b/youtube_dl/extractor/comedycentral.py @@ -48,17 +48,8 @@ class ComedyCentralFullEpisodesIE(MTVServicesInfoExtractor): def _real_extract(self, url): playlist_id = self._match_id(url) webpage = self._download_webpage(url, playlist_id) - - feed_json = self._search_regex(r'var triforceManifestFeed\s*=\s*(\{.+?\});\n', webpage, 'triforce feeed') - feed = self._parse_json(feed_json, playlist_id) - zones = feed['manifest']['zones'] - - video_zone = zones['t2_lc_promo1'] - feed = self._download_json(video_zone['feed'], playlist_id) - mgid = feed['result']['data']['id'] - + mgid = self._extract_triforce_mgid(webpage, data_zone='t2_lc_promo1') videos_info = self._get_videos_info(mgid) - return videos_info @@ -94,12 +85,6 @@ class ToshIE(MTVServicesInfoExtractor): 'only_matching': True, }] - @classmethod - def _transform_rtmp_url(cls, rtmp_video_url): - new_urls = super(ToshIE, cls)._transform_rtmp_url(rtmp_video_url) - new_urls['rtmp'] = rtmp_video_url.replace('viacomccstrm', 'viacommtvstrm') - return new_urls - class ComedyCentralTVIE(MTVServicesInfoExtractor): _VALID_URL = r'https?://(?:www\.)?comedycentral\.tv/(?:staffeln|shows)/(?P[^/?#&]+)' diff --git a/youtube_dl/extractor/mtv.py b/youtube_dl/extractor/mtv.py index 5250db212..00a980c7d 100644 --- a/youtube_dl/extractor/mtv.py +++ b/youtube_dl/extractor/mtv.py @@ -13,11 +13,11 @@ from ..utils import ( fix_xml_ampersands, float_or_none, HEADRequest, - NO_DEFAULT, RegexNotFoundError, sanitized_Request, strip_or_none, timeconvert, + try_get, unescapeHTML, update_url_query, url_basename, @@ -42,15 +42,6 @@ class MTVServicesInfoExtractor(InfoExtractor): # Remove the templates, like &device={device} return re.sub(r'&[^=]*?={.*?}(?=(&|$))', '', url) - # This was originally implemented for ComedyCentral, but it also works here - @classmethod - def _transform_rtmp_url(cls, rtmp_video_url): - m = re.match(r'^rtmpe?://.*?/(?Pgsp\..+?/.*)$', rtmp_video_url) - if not m: - return {'rtmp': rtmp_video_url} - base = 'http://viacommtvstrmfs.fplive.net/' - return {'http': base + m.group('finalid')} - def _get_feed_url(self, uri): return self._FEED_URL @@ -91,22 +82,28 @@ class MTVServicesInfoExtractor(InfoExtractor): if rendition.get('method') == 'hls': hls_url = rendition.find('./src').text formats.extend(self._extract_m3u8_formats( - hls_url, video_id, ext='mp4', entry_protocol='m3u8_native')) + hls_url, video_id, ext='mp4', entry_protocol='m3u8_native', + m3u8_id='hls')) else: # fms try: _, _, ext = rendition.attrib['type'].partition('/') rtmp_video_url = rendition.find('./src').text + if 'error_not_available.swf' in rtmp_video_url: + raise ExtractorError( + '%s said: video is not available' % self.IE_NAME, + expected=True) if rtmp_video_url.endswith('siteunavail.png'): continue - new_urls = self._transform_rtmp_url(rtmp_video_url) formats.extend([{ - 'ext': 'flv' if new_url.startswith('rtmp') else ext, - 'url': new_url, - 'format_id': '-'.join(filter(None, [kind, rendition.get('bitrate')])), + 'ext': 'flv' if rtmp_video_url.startswith('rtmp') else ext, + 'url': rtmp_video_url, + 'format_id': '-'.join(filter(None, [ + 'rtmp' if rtmp_video_url.startswith('rtmp') else None, + rendition.get('bitrate')])), 'width': int(rendition.get('width')), 'height': int(rendition.get('height')), - } for kind, new_url in new_urls.items()]) + }]) except (KeyError, TypeError): raise ExtractorError('Invalid rendition field.') self._sort_formats(formats) @@ -212,7 +209,28 @@ class MTVServicesInfoExtractor(InfoExtractor): [self._get_video_info(item, use_hls) for item in idoc.findall('.//item')], playlist_title=title, playlist_description=description) - def _extract_mgid(self, webpage, default=NO_DEFAULT): + def _extract_triforce_mgid(self, webpage, data_zone=None, video_id=None): + triforce_feed = self._parse_json(self._search_regex( + r'triforceManifestFeed\s*=\s*(\{.+?\});\n', webpage, + 'triforce feed', default='{}'), video_id, fatal=False) + + data_zone = self._search_regex( + r'data-zone=(["\'])(?P.+?_lc_promo.*?)\1', webpage, + 'data zone', default=data_zone, group='zone') + + feed_url = try_get( + triforce_feed, lambda x: x['manifest']['zones'][data_zone]['feed'], + compat_str) + if not feed_url: + return + + feed = self._download_json(feed_url, video_id, fatal=False) + if not feed: + return + + return try_get(feed, lambda x: x['result']['data']['id'], compat_str) + + def _extract_mgid(self, webpage): try: # the url can be http://media.mtvnservices.com/fb/{mgid}.swf # or http://media.mtvnservices.com/{mgid} @@ -232,7 +250,11 @@ class MTVServicesInfoExtractor(InfoExtractor): sm4_embed = self._html_search_meta( 'sm4:video:embed', webpage, 'sm4 embed', default='') mgid = self._search_regex( - r'embed/(mgid:.+?)["\'&?/]', sm4_embed, 'mgid', default=default) + r'embed/(mgid:.+?)["\'&?/]', sm4_embed, 'mgid', default=None) + + if not mgid: + mgid = self._extract_triforce_mgid(webpage) + return mgid def _real_extract(self, url): diff --git a/youtube_dl/extractor/spike.py b/youtube_dl/extractor/spike.py index abfee3ece..c59896a17 100644 --- a/youtube_dl/extractor/spike.py +++ b/youtube_dl/extractor/spike.py @@ -46,7 +46,7 @@ class SpikeIE(MTVServicesInfoExtractor): _CUSTOM_URL_REGEX = re.compile(r'spikenetworkapp://([^/]+/[-a-fA-F0-9]+)') def _extract_mgid(self, webpage): - mgid = super(SpikeIE, self)._extract_mgid(webpage, default=None) + mgid = super(SpikeIE, self)._extract_mgid(webpage) if mgid is None: url_parts = self._search_regex(self._CUSTOM_URL_REGEX, webpage, 'episode_id') video_type, episode_id = url_parts.split('/', 1) From e54fc0524ebf7e3ec02fbd22f00fce466c952791 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 14 Jan 2017 06:23:24 +0700 Subject: [PATCH 31/86] [cmt] Add support for video-clips --- youtube_dl/extractor/cmt.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/cmt.py b/youtube_dl/extractor/cmt.py index 6302b8d9c..f6b794fb3 100644 --- a/youtube_dl/extractor/cmt.py +++ b/youtube_dl/extractor/cmt.py @@ -5,7 +5,7 @@ from .mtv import MTVIE class CMTIE(MTVIE): IE_NAME = 'cmt.com' - _VALID_URL = r'https?://(?:www\.)?cmt\.com/(?:videos|shows|full-episodes)/(?P[^/]+)' + _VALID_URL = r'https?://(?:www\.)?cmt\.com/(?:videos|shows|full-episodes|video-clips)/(?P[^/]+)' _TESTS = [{ 'url': 'http://www.cmt.com/videos/garth-brooks/989124/the-call-featuring-trisha-yearwood.jhtml#artist=30061', @@ -31,6 +31,12 @@ class CMTIE(MTVIE): }, { 'url': 'http://www.cmt.com/shows/party-down-south/party-down-south-ep-407-gone-girl/1738172/playlist/#id=1738172', 'only_matching': True, + }, { + 'url': 'http://www.cmt.com/full-episodes/537qb3/nashville-the-wayfaring-stranger-season-5-ep-501', + 'only_matching': True, + }, { + 'url': 'http://www.cmt.com/video-clips/t9e4ci/nashville-juliette-in-2-minutes', + 'only_matching': True, }] def _extract_mgid(self, webpage): From 4f66c16f337f3b2250d369b56bc31cfd7de06f89 Mon Sep 17 00:00:00 2001 From: Jakub Wilk Date: Sat, 14 Jan 2017 00:26:11 +0100 Subject: [PATCH 32/86] [brightcove:legacy] Fix misplaced backslash in a regexp --- youtube_dl/extractor/brightcove.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/brightcove.py b/youtube_dl/extractor/brightcove.py index aa2923ccf..2e56d1df9 100644 --- a/youtube_dl/extractor/brightcove.py +++ b/youtube_dl/extractor/brightcove.py @@ -204,7 +204,7 @@ class BrightcoveLegacyIE(InfoExtractor): # // build Brightcove XML # } m = re.search( - r'''(?x)customBC.\createVideo\( + r'''(?x)customBC\.createVideo\( .*? # skipping width and height ["\'](?P\d+)["\']\s*,\s* # playerID ["\'](?PAQ[^"\']{48})[^"\']*["\']\s*,\s* # playerKey begins with AQ and is 50 characters From 0b94510cd00d50ddda74ba0079f856650f24680e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 14 Jan 2017 07:27:20 +0700 Subject: [PATCH 33/86] [ChangeLog] Actualize --- ChangeLog | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/ChangeLog b/ChangeLog index f1e234507..0106a7ae8 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,24 @@ +version + +Core ++ [common] Add ability to customize akamai manifest host ++ [utils] Add more date formats + +Extractors +- [mtv] Eliminate _transform_rtmp_url +* [mtv] Generalize triforce mgid extraction ++ [cmt] Add support for full episodes and video clips (#11623) ++ [mitele] Extract DASH formats ++ [ooyala] Add support for videos with embedToken (#11684) +* [mixcloud] Fix extraction (#11674) +* [openload] Fix extraction (#10408) +* [tv4] Improve extraction (#11698) +* [freesound] Fix and improve extraction (#11602) ++ [nick] Add support for beta.nick.com (#11655) +* [mtv,cc] Use HLS by default with native HLS downloader (#11641) +* [mtv] Fix non-HLS extraction + + version 2017.01.10 Extractors From 5d4c7daa49b8ff83aa6fb13b183f47d4427c6513 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 14 Jan 2017 07:31:07 +0700 Subject: [PATCH 34/86] release 2017.01.14 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- youtube_dl/version.py | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 6a4c25680..a7bf2b90c 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.01.10*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.01.10** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.01.14*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.01.14** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2017.01.10 +[debug] youtube-dl version 2017.01.14 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index 0106a7ae8..dba18d39b 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version +version 2017.01.14 Core + [common] Add ability to customize akamai manifest host diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 214124722..17c6f9eb2 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2017.01.10' +__version__ = '2017.01.14' From abe8cb763fd43ee2db09c73965f38db7db02559e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 14 Jan 2017 08:30:00 +0700 Subject: [PATCH 35/86] [cbc] Improve playlist support (closes #11704) --- youtube_dl/extractor/cbc.py | 55 +++++++++++++++++++++++-------------- 1 file changed, 34 insertions(+), 21 deletions(-) diff --git a/youtube_dl/extractor/cbc.py b/youtube_dl/extractor/cbc.py index 7c76ceac8..a291685bf 100644 --- a/youtube_dl/extractor/cbc.py +++ b/youtube_dl/extractor/cbc.py @@ -90,36 +90,49 @@ class CBCIE(InfoExtractor): }, }], 'skip': 'Geo-restricted to Canada', + }, { + # multiple CBC.APP.Caffeine.initInstance(...) + 'url': 'http://www.cbc.ca/news/canada/calgary/dog-indoor-exercise-winter-1.3928238', + 'info_dict': { + 'title': 'Keep Rover active during the deep freeze with doggie pushups and other fun indoor tasks', + 'id': 'dog-indoor-exercise-winter-1.3928238', + }, + 'playlist_mincount': 6, }] @classmethod def suitable(cls, url): return False if CBCPlayerIE.suitable(url) else super(CBCIE, cls).suitable(url) + def _extract_player_init(self, player_init, display_id): + player_info = self._parse_json(player_init, display_id, js_to_json) + media_id = player_info.get('mediaId') + if not media_id: + clip_id = player_info['clipId'] + feed = self._download_json( + 'http://tpfeed.cbc.ca/f/ExhSPC/vms_5akSXx4Ng_Zn?byCustomValue={:mpsReleases}{%s}' % clip_id, + clip_id, fatal=False) + if feed: + media_id = try_get(feed, lambda x: x['entries'][0]['guid'], compat_str) + if not media_id: + media_id = self._download_json( + 'http://feed.theplatform.com/f/h9dtGB/punlNGjMlc1F?fields=id&byContent=byReleases%3DbyId%253D' + clip_id, + clip_id)['entries'][0]['id'].split('/')[-1] + return self.url_result('cbcplayer:%s' % media_id, 'CBCPlayer', media_id) + def _real_extract(self, url): display_id = self._match_id(url) webpage = self._download_webpage(url, display_id) - player_init = self._search_regex( - r'CBC\.APP\.Caffeine\.initInstance\(({.+?})\);', webpage, 'player init', - default=None) - if player_init: - player_info = self._parse_json(player_init, display_id, js_to_json) - media_id = player_info.get('mediaId') - if not media_id: - clip_id = player_info['clipId'] - feed = self._download_json( - 'http://tpfeed.cbc.ca/f/ExhSPC/vms_5akSXx4Ng_Zn?byCustomValue={:mpsReleases}{%s}' % clip_id, - clip_id, fatal=False) - if feed: - media_id = try_get(feed, lambda x: x['entries'][0]['guid'], compat_str) - if not media_id: - media_id = self._download_json( - 'http://feed.theplatform.com/f/h9dtGB/punlNGjMlc1F?fields=id&byContent=byReleases%3DbyId%253D' + clip_id, - clip_id)['entries'][0]['id'].split('/')[-1] - return self.url_result('cbcplayer:%s' % media_id, 'CBCPlayer', media_id) - else: - entries = [self.url_result('cbcplayer:%s' % media_id, 'CBCPlayer', media_id) for media_id in re.findall(r']+src="[^"]+?mediaId=(\d+)"', webpage)] - return self.playlist_result(entries) + entries = [ + self._extract_player_init(player_init, display_id) + for player_init in re.findall(r'CBC\.APP\.Caffeine\.initInstance\(({.+?})\);', webpage)] + entries.extend([ + self.url_result('cbcplayer:%s' % media_id, 'CBCPlayer', media_id) + for media_id in re.findall(r']+src="[^"]+?mediaId=(\d+)"', webpage)]) + return self.playlist_result( + entries, display_id, + self._og_search_title(webpage, fatal=False), + self._og_search_description(webpage)) class CBCPlayerIE(InfoExtractor): From 8854f3fe782e48f4b145eacf58cca533a9f9b199 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 14 Jan 2017 08:30:00 +0700 Subject: [PATCH 36/86] [README.md] Clarify newline format in cookies section (closes #11709) --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 905c1b73f..a606346b2 100644 --- a/README.md +++ b/README.md @@ -841,7 +841,7 @@ Use the `--cookies` option, for example `--cookies /path/to/cookies/file.txt`. In order to extract cookies from browser use any conforming browser extension for exporting cookies. For example, [cookies.txt](https://chrome.google.com/webstore/detail/cookiestxt/njabckikapfpffapmjgojcnbfjonfjfg) (for Chrome) or [Export Cookies](https://addons.mozilla.org/en-US/firefox/addon/export-cookies/) (for Firefox). -Note that the cookies file must be in Mozilla/Netscape format and the first line of the cookies file must be either `# HTTP Cookie File` or `# Netscape HTTP Cookie File`. Make sure you have correct [newline format](https://en.wikipedia.org/wiki/Newline) in the cookies file and convert newlines if necessary to correspond with your OS, namely `CRLF` (`\r\n`) for Windows, `LF` (`\n`) for Linux and `CR` (`\r`) for Mac OS. `HTTP Error 400: Bad Request` when using `--cookies` is a good sign of invalid newline format. +Note that the cookies file must be in Mozilla/Netscape format and the first line of the cookies file must be either `# HTTP Cookie File` or `# Netscape HTTP Cookie File`. Make sure you have correct [newline format](https://en.wikipedia.org/wiki/Newline) in the cookies file and convert newlines if necessary to correspond with your OS, namely `CRLF` (`\r\n`) for Windows and `LF` (`\n`) for Unix and Unix-like systems (Linux, Mac OS, etc.). `HTTP Error 400: Bad Request` when using `--cookies` is a good sign of invalid newline format. Passing cookies to youtube-dl is a good way to workaround login when a particular extractor does not implement it explicitly. Another use case is working around [CAPTCHA](https://en.wikipedia.org/wiki/CAPTCHA) some websites require you to solve in particular cases in order to get access (e.g. YouTube, CloudFlare). From 99d537a5e08499e20c3507c3f84048feacf77522 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Sat, 14 Jan 2017 07:12:31 +0100 Subject: [PATCH 37/86] [ooyala] fix typo --- youtube_dl/extractor/ooyala.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/ooyala.py b/youtube_dl/extractor/ooyala.py index f00cf745b..84be2b1e3 100644 --- a/youtube_dl/extractor/ooyala.py +++ b/youtube_dl/extractor/ooyala.py @@ -53,7 +53,7 @@ class OoyalaBaseIE(InfoExtractor): elif delivery_type == 'hds' or ext == 'f4m': formats.extend(self._extract_f4m_formats( s_url + '?hdcore=3.7.0', embed_code, f4m_id='hds', fatal=False)) - elif delivery_type == 'hds' or ext == 'mpd': + elif delivery_type == 'dash' or ext == 'mpd': formats.extend(self._extract_mpd_formats( s_url, embed_code, mpd_id='dash', fatal=False)) elif delivery_type == 'smooth': From b80e2ebc8daa1ec30396cfa69836f1d96d23028f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 14 Jan 2017 18:27:22 +0700 Subject: [PATCH 38/86] [dramafever] Add support for URLs with language code (#11714) --- youtube_dl/extractor/dramafever.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/dramafever.py b/youtube_dl/extractor/dramafever.py index 1edd8e7bd..bcd9fe2a0 100644 --- a/youtube_dl/extractor/dramafever.py +++ b/youtube_dl/extractor/dramafever.py @@ -66,7 +66,7 @@ class DramaFeverBaseIE(AMPIE): class DramaFeverIE(DramaFeverBaseIE): IE_NAME = 'dramafever' - _VALID_URL = r'https?://(?:www\.)?dramafever\.com/drama/(?P[0-9]+/[0-9]+)(?:/|$)' + _VALID_URL = r'https?://(?:www\.)?dramafever\.com/(?:[^/]+/)?drama/(?P[0-9]+/[0-9]+)(?:/|$)' _TESTS = [{ 'url': 'http://www.dramafever.com/drama/4512/1/Cooking_with_Shin/', 'info_dict': { @@ -103,6 +103,9 @@ class DramaFeverIE(DramaFeverBaseIE): # m3u8 download 'skip_download': True, }, + }, { + 'url': 'https://www.dramafever.com/zh-cn/drama/4972/15/Doctor_Romantic/', + 'only_matching': True, }] def _real_extract(self, url): @@ -148,7 +151,7 @@ class DramaFeverIE(DramaFeverBaseIE): class DramaFeverSeriesIE(DramaFeverBaseIE): IE_NAME = 'dramafever:series' - _VALID_URL = r'https?://(?:www\.)?dramafever\.com/drama/(?P[0-9]+)(?:/(?:(?!\d+(?:/|$)).+)?)?$' + _VALID_URL = r'https?://(?:www\.)?dramafever\.com/(?:[^/]+/)?drama/(?P[0-9]+)(?:/(?:(?!\d+(?:/|$)).+)?)?$' _TESTS = [{ 'url': 'http://www.dramafever.com/drama/4512/Cooking_with_Shin/', 'info_dict': { From 621a2800ca259399c0c010a1cbc2c56aee90228c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 15 Jan 2017 04:42:05 +0700 Subject: [PATCH 39/86] [vevo] Improve geo restriction detection --- youtube_dl/extractor/vevo.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/vevo.py b/youtube_dl/extractor/vevo.py index d82261e5e..f0a8075fb 100644 --- a/youtube_dl/extractor/vevo.py +++ b/youtube_dl/extractor/vevo.py @@ -206,7 +206,7 @@ class VevoIE(VevoBaseIE): note='Retrieving oauth token', errnote='Unable to retrieve oauth token') - if 'THIS PAGE IS CURRENTLY UNAVAILABLE IN YOUR REGION' in webpage: + if re.search(r'(?i)THIS PAGE IS CURRENTLY UNAVAILABLE IN YOUR REGION', webpage): self.raise_geo_restricted( '%s said: This page is currently unavailable in your region' % self.IE_NAME) From cd55c6ccd7b9cd0c48d475330c40f382eb0bc625 Mon Sep 17 00:00:00 2001 From: sh!zeeg Date: Wed, 4 Jan 2017 01:51:08 +0300 Subject: [PATCH 40/86] [beam:live] Add extractor --- youtube_dl/extractor/beampro.py | 82 ++++++++++++++++++++++++++++++ youtube_dl/extractor/extractors.py | 1 + 2 files changed, 83 insertions(+) create mode 100644 youtube_dl/extractor/beampro.py diff --git a/youtube_dl/extractor/beampro.py b/youtube_dl/extractor/beampro.py new file mode 100644 index 000000000..dc0a2b4af --- /dev/null +++ b/youtube_dl/extractor/beampro.py @@ -0,0 +1,82 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( + ExtractorError, + clean_html, + compat_str, + int_or_none, + parse_iso8601, + try_get, +) + + +class BeamProLiveIE(InfoExtractor): + IE_NAME = 'Beam:live' + _VALID_URL = r'https?://(?:\w+.)?beam.pro/(?P[^?]+)$' + _API_CHANNEL = 'https://beam.pro/api/v1/channels/{0}' + _API_MANIFEST = 'https://beam.pro/api/v1/channels/{0}/manifest.m3u8' + _RATINGS = {'family': 0, 'teen': 13, '18+': 18} + + _TEST = { + 'url': 'http://www.beam.pro/niterhayven', + 'info_dict': { + 'id': '261562', + 'ext': 'mp4', + 'uploader': 'niterhayven', + 'timestamp': 1483477281, + 'age_limit': 18, + 'title': 'Introducing The Witcher 3 // The Grind Starts Now!', + 'thumbnail': r're:https://.*\.jpg$', + 'upload_date': '20170103', + 'uploader_id': 373396, + 'description': 'md5:0b161ac080f15fe05d18a07adb44a74d', + 'is_live': True, + }, + 'skip': 'niterhayven is offline', + 'params': { + 'skip_download': True, + }, + } + + def _real_extract(self, url): + channel_id = self._match_id(url) + chan_data = self._download_json(self._API_CHANNEL.format(channel_id), channel_id) + + if not chan_data.get('online'): + raise ExtractorError('{0} is offline'.format(channel_id), expected=True) + + formats = self._extract_m3u8_formats( + self._API_MANIFEST.format(chan_data.get('id')), channel_id, ext='mp4') + + self._sort_formats(formats) + info = {} + info['formats'] = formats + if chan_data: + info.update(self._extract_info(chan_data)) + if not info.get('title'): + info['title'] = self._live_title(channel_id) + if not info.get('id'): # barely possible but just in case + info['id'] = compat_str(abs(hash(channel_id)) % (10 ** 8)) + + return info + + def _extract_info(self, info): + thumbnail = try_get(info, lambda x: x['thumbnail']['url'], compat_str) + username = try_get(info, lambda x: x['user']['url'], compat_str) + video_id = compat_str(info['id']) if info.get('id') else None + rating = info.get('audience') + + return { + 'id': video_id, + 'title': info.get('name'), + 'description': clean_html(info.get('description')), + 'age_limit': self._RATINGS[rating] if rating in self._RATINGS else None, + 'is_live': True if info.get('online') else False, + 'timestamp': parse_iso8601(info.get('updatedAt')), + 'uploader': info.get('token') or username, + 'uploader_id': int_or_none(info.get('userId')), + 'view_count': int_or_none(info.get('viewersTotal')), + 'thumbnail': thumbnail, + } diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 5ba8efb0e..9d0610d21 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -88,6 +88,7 @@ from .bbc import ( BBCCoUkPlaylistIE, BBCIE, ) +from .beampro import BeamProLiveIE from .beeg import BeegIE from .behindkink import BehindKinkIE from .bellmedia import BellMediaIE From af62de104f33ebf8b473b3f7935451077fa56ee9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 15 Jan 2017 06:07:35 +0700 Subject: [PATCH 41/86] [beam:live] Improve and simplify (#10702, closes #11596) --- youtube_dl/extractor/beampro.py | 73 +++++++++++++++------------------ 1 file changed, 32 insertions(+), 41 deletions(-) diff --git a/youtube_dl/extractor/beampro.py b/youtube_dl/extractor/beampro.py index dc0a2b4af..f3a9e3278 100644 --- a/youtube_dl/extractor/beampro.py +++ b/youtube_dl/extractor/beampro.py @@ -14,25 +14,23 @@ from ..utils import ( class BeamProLiveIE(InfoExtractor): IE_NAME = 'Beam:live' - _VALID_URL = r'https?://(?:\w+.)?beam.pro/(?P[^?]+)$' - _API_CHANNEL = 'https://beam.pro/api/v1/channels/{0}' - _API_MANIFEST = 'https://beam.pro/api/v1/channels/{0}/manifest.m3u8' + _VALID_URL = r'https?://(?:\w+\.)?beam\.pro/(?P[^/?#&]+)' _RATINGS = {'family': 0, 'teen': 13, '18+': 18} - _TEST = { 'url': 'http://www.beam.pro/niterhayven', 'info_dict': { 'id': '261562', 'ext': 'mp4', - 'uploader': 'niterhayven', - 'timestamp': 1483477281, - 'age_limit': 18, 'title': 'Introducing The Witcher 3 // The Grind Starts Now!', - 'thumbnail': r're:https://.*\.jpg$', - 'upload_date': '20170103', - 'uploader_id': 373396, 'description': 'md5:0b161ac080f15fe05d18a07adb44a74d', + 'thumbnail': r're:https://.*\.jpg$', + 'timestamp': 1483477281, + 'upload_date': '20170103', + 'uploader': 'niterhayven', + 'uploader_id': '373396', + 'age_limit': 18, 'is_live': True, + 'view_count': int, }, 'skip': 'niterhayven is offline', 'params': { @@ -41,42 +39,35 @@ class BeamProLiveIE(InfoExtractor): } def _real_extract(self, url): - channel_id = self._match_id(url) - chan_data = self._download_json(self._API_CHANNEL.format(channel_id), channel_id) + channel_name = self._match_id(url) - if not chan_data.get('online'): - raise ExtractorError('{0} is offline'.format(channel_id), expected=True) + chan = self._download_json( + 'https://beam.pro/api/v1/channels/%s' % channel_name, channel_name) + + if chan.get('online') is False: + raise ExtractorError( + '{0} is offline'.format(channel_name), expected=True) + + channel_id = chan['id'] formats = self._extract_m3u8_formats( - self._API_MANIFEST.format(chan_data.get('id')), channel_id, ext='mp4') - + 'https://beam.pro/api/v1/channels/%s/manifest.m3u8' % channel_id, + channel_name, ext='mp4', m3u8_id='hls', fatal=False) self._sort_formats(formats) - info = {} - info['formats'] = formats - if chan_data: - info.update(self._extract_info(chan_data)) - if not info.get('title'): - info['title'] = self._live_title(channel_id) - if not info.get('id'): # barely possible but just in case - info['id'] = compat_str(abs(hash(channel_id)) % (10 ** 8)) - return info - - def _extract_info(self, info): - thumbnail = try_get(info, lambda x: x['thumbnail']['url'], compat_str) - username = try_get(info, lambda x: x['user']['url'], compat_str) - video_id = compat_str(info['id']) if info.get('id') else None - rating = info.get('audience') + user_id = chan.get('userId') or try_get(chan, lambda x: x['user']['id']) return { - 'id': video_id, - 'title': info.get('name'), - 'description': clean_html(info.get('description')), - 'age_limit': self._RATINGS[rating] if rating in self._RATINGS else None, - 'is_live': True if info.get('online') else False, - 'timestamp': parse_iso8601(info.get('updatedAt')), - 'uploader': info.get('token') or username, - 'uploader_id': int_or_none(info.get('userId')), - 'view_count': int_or_none(info.get('viewersTotal')), - 'thumbnail': thumbnail, + 'id': compat_str(chan.get('id') or channel_name), + 'title': self._live_title(chan.get('name') or channel_name), + 'description': clean_html(chan.get('description')), + 'thumbnail': try_get(chan, lambda x: x['thumbnail']['url'], compat_str), + 'timestamp': parse_iso8601(chan.get('updatedAt')), + 'uploader': chan.get('token') or try_get( + chan, lambda x: x['user']['username'], compat_str), + 'uploader_id': compat_str(user_id) if user_id else None, + 'age_limit': self._RATINGS.get(chan.get('audience')), + 'is_live': True, + 'view_count': int_or_none(chan.get('viewersTotal')), + 'formats': formats, } From 6f0be937473c5d5f60cd8e712287fcee844093d5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 15 Jan 2017 06:09:32 +0700 Subject: [PATCH 42/86] [YoutubeDL] Improve protocol auto determining (closes #11720) --- youtube_dl/YoutubeDL.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 5d654f55f..41d9a63ee 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -1363,7 +1363,7 @@ class YoutubeDL(object): format['ext'] = determine_ext(format['url']).lower() # Automatically determine protocol if missing (useful for format # selection purposes) - if 'protocol' not in format: + if format.get('protocol') is None: format['protocol'] = determine_protocol(format) # Add HTTP headers, so that external programs can use them from the # json output From a7acf868a55b3d734bef564e3392020f18c20422 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 15 Jan 2017 10:34:39 +0700 Subject: [PATCH 43/86] [yourupload] Fix extraction (closes #11601) --- youtube_dl/extractor/yourupload.py | 49 +++++++++++++----------------- 1 file changed, 21 insertions(+), 28 deletions(-) diff --git a/youtube_dl/extractor/yourupload.py b/youtube_dl/extractor/yourupload.py index 4ce327845..9fa772838 100644 --- a/youtube_dl/extractor/yourupload.py +++ b/youtube_dl/extractor/yourupload.py @@ -2,44 +2,37 @@ from __future__ import unicode_literals from .common import InfoExtractor +from ..utils import urljoin class YourUploadIE(InfoExtractor): - _VALID_URL = r'''(?x)https?://(?:www\.)? - (?:yourupload\.com/watch| - embed\.yourupload\.com| - embed\.yucache\.net - )/(?P[A-Za-z0-9]+) - ''' - _TESTS = [ - { - 'url': 'http://yourupload.com/watch/14i14h', - 'md5': '5e2c63385454c557f97c4c4131a393cd', - 'info_dict': { - 'id': '14i14h', - 'ext': 'mp4', - 'title': 'BigBuckBunny_320x180.mp4', - 'thumbnail': r're:^https?://.*\.jpe?g', - } - }, - { - 'url': 'http://embed.yourupload.com/14i14h', - 'only_matching': True, - }, - { - 'url': 'http://embed.yucache.net/14i14h?client_file_id=803349', - 'only_matching': True, - }, - ] + _VALID_URL = r'https?://(?:www\.)?(?:yourupload\.com/(?:watch|embed)|embed\.yourupload\.com)/(?P[A-Za-z0-9]+)' + _TESTS = [{ + 'url': 'http://yourupload.com/watch/14i14h', + 'md5': '5e2c63385454c557f97c4c4131a393cd', + 'info_dict': { + 'id': '14i14h', + 'ext': 'mp4', + 'title': 'BigBuckBunny_320x180.mp4', + 'thumbnail': r're:^https?://.*\.jpe?g', + } + }, { + 'url': 'http://www.yourupload.com/embed/14i14h', + 'only_matching': True, + }, { + 'url': 'http://embed.yourupload.com/14i14h', + 'only_matching': True, + }] def _real_extract(self, url): video_id = self._match_id(url) - embed_url = 'http://embed.yucache.net/{0:}'.format(video_id) + embed_url = 'http://www.yourupload.com/embed/%s' % video_id + webpage = self._download_webpage(embed_url, video_id) title = self._og_search_title(webpage) - video_url = self._og_search_video_url(webpage) + video_url = urljoin(embed_url, self._og_search_video_url(webpage)) thumbnail = self._og_search_thumbnail(webpage, default=None) return { From 8e4988f1a21184839dcd23d7133c250a43c5ea58 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sun, 15 Jan 2017 22:10:57 +0800 Subject: [PATCH 44/86] [niconico] Remove codes for downloading anonymously Apparently Niconico now blocks playing without an account Closes #11170 --- youtube_dl/extractor/niconico.py | 27 +++++++-------------------- 1 file changed, 7 insertions(+), 20 deletions(-) diff --git a/youtube_dl/extractor/niconico.py b/youtube_dl/extractor/niconico.py index a104e33f8..7e6c594c8 100644 --- a/youtube_dl/extractor/niconico.py +++ b/youtube_dl/extractor/niconico.py @@ -7,7 +7,6 @@ import datetime from .common import InfoExtractor from ..compat import ( - compat_urllib_parse_urlencode, compat_urlparse, ) from ..utils import ( @@ -40,6 +39,7 @@ class NiconicoIE(InfoExtractor): 'description': '(c) copyright 2008, Blender Foundation / www.bigbuckbunny.org', 'duration': 33, }, + 'skip': 'Requires an account', }, { # File downloaded with and without credentials are different, so omit # the md5 field @@ -55,6 +55,7 @@ class NiconicoIE(InfoExtractor): 'timestamp': 1304065916, 'duration': 209, }, + 'skip': 'Requires an account', }, { # 'video exists but is marked as "deleted" # md5 is unstable @@ -65,9 +66,10 @@ class NiconicoIE(InfoExtractor): 'description': 'deleted', 'title': 'ドラえもんエターナル第3話「決戦第3新東京市」<前編>', 'upload_date': '20071224', - 'timestamp': 1198527840, # timestamp field has different value if logged in + 'timestamp': int, # timestamp field has different value if logged in 'duration': 304, }, + 'skip': 'Requires an account', }, { 'url': 'http://www.nicovideo.jp/watch/so22543406', 'info_dict': { @@ -79,7 +81,8 @@ class NiconicoIE(InfoExtractor): 'upload_date': '20140104', 'uploader': 'アニメロチャンネル', 'uploader_id': '312', - } + }, + 'skip': 'The viewing period of the video you were searching for has expired.', }] _VALID_URL = r'https?://(?:www\.|secure\.)?nicovideo\.jp/watch/(?P(?:[a-z]{2})?[0-9]+)' @@ -134,23 +137,7 @@ class NiconicoIE(InfoExtractor): 'http://flapi.nicovideo.jp/api/getflv/' + video_id + '?as3=1', video_id, 'Downloading flv info') else: - # Get external player info - ext_player_info = self._download_webpage( - 'http://ext.nicovideo.jp/thumb_watch/' + video_id, video_id) - thumb_play_key = self._search_regex( - r'\'thumbPlayKey\'\s*:\s*\'(.*?)\'', ext_player_info, 'thumbPlayKey') - - # Get flv info - flv_info_data = compat_urllib_parse_urlencode({ - 'k': thumb_play_key, - 'v': video_id - }) - flv_info_request = sanitized_Request( - 'http://ext.nicovideo.jp/thumb_watch', flv_info_data, - {'Content-Type': 'application/x-www-form-urlencoded'}) - flv_info_webpage = self._download_webpage( - flv_info_request, video_id, - note='Downloading flv info', errnote='Unable to download flv info') + raise ExtractorError('Niconico videos now require logging in', expected=True) flv_info = compat_urlparse.parse_qs(flv_info_webpage) if 'url' not in flv_info: From dcae7b3fdc6e6812e78c8dba96d671ccf0ab068e Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sun, 15 Jan 2017 22:51:54 +0800 Subject: [PATCH 45/86] [niconico] Allow login via cookies Some codes are borrowed from #7968, which is by @jlhg Closes #7968 --- ChangeLog | 5 +++++ youtube_dl/extractor/niconico.py | 18 +++++++----------- 2 files changed, 12 insertions(+), 11 deletions(-) diff --git a/ChangeLog b/ChangeLog index dba18d39b..029d13426 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,8 @@ +version + +Extractors ++ [niconico] Support login via cookies (#7968) + version 2017.01.14 Core diff --git a/youtube_dl/extractor/niconico.py b/youtube_dl/extractor/niconico.py index 7e6c594c8..8baac23e4 100644 --- a/youtube_dl/extractor/niconico.py +++ b/youtube_dl/extractor/niconico.py @@ -87,8 +87,6 @@ class NiconicoIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.|secure\.)?nicovideo\.jp/watch/(?P(?:[a-z]{2})?[0-9]+)' _NETRC_MACHINE = 'niconico' - # Determine whether the downloader used authentication to download video - _AUTHENTICATED = False def _real_initialize(self): self._login() @@ -112,8 +110,6 @@ class NiconicoIE(InfoExtractor): if re.search(r'(?i)

    Log in error

    ', login_results) is not None: self._downloader.report_warning('unable to log in: bad username or password') return False - # Successful login - self._AUTHENTICATED = True return True def _real_extract(self, url): @@ -131,19 +127,19 @@ class NiconicoIE(InfoExtractor): 'http://ext.nicovideo.jp/api/getthumbinfo/' + video_id, video_id, note='Downloading video info page') - if self._AUTHENTICATED: - # Get flv info - flv_info_webpage = self._download_webpage( - 'http://flapi.nicovideo.jp/api/getflv/' + video_id + '?as3=1', - video_id, 'Downloading flv info') - else: - raise ExtractorError('Niconico videos now require logging in', expected=True) + # Get flv info + flv_info_webpage = self._download_webpage( + 'http://flapi.nicovideo.jp/api/getflv/' + video_id + '?as3=1', + video_id, 'Downloading flv info') flv_info = compat_urlparse.parse_qs(flv_info_webpage) if 'url' not in flv_info: if 'deleted' in flv_info: raise ExtractorError('The video has been deleted.', expected=True) + elif 'closed' in flv_info: + raise ExtractorError('Niconico videos now require logging in', + expected=True) else: raise ExtractorError('Unable to find video URL') From 16e2c8f7710bffb462921dbc93adfa6274bd9334 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Mon, 16 Jan 2017 00:06:52 +0800 Subject: [PATCH 46/86] [brightcove] Recognize another player ID Closes #11688 --- ChangeLog | 1 + youtube_dl/extractor/brightcove.py | 2 +- youtube_dl/extractor/generic.py | 20 ++++++++++++++++++++ 3 files changed, 22 insertions(+), 1 deletion(-) diff --git a/ChangeLog b/ChangeLog index 029d13426..2e0ddd4f6 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,6 +1,7 @@ version Extractors ++ [brightcove] Recognize another player ID pattern (#11688) + [niconico] Support login via cookies (#7968) version 2017.01.14 diff --git a/youtube_dl/extractor/brightcove.py b/youtube_dl/extractor/brightcove.py index 2e56d1df9..5c6e99da1 100644 --- a/youtube_dl/extractor/brightcove.py +++ b/youtube_dl/extractor/brightcove.py @@ -179,7 +179,7 @@ class BrightcoveLegacyIE(InfoExtractor): params = {} - playerID = find_param('playerID') + playerID = find_param('playerID') or find_param('playerId') if playerID is None: raise ExtractorError('Cannot find player ID') params['playerID'] = playerID diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index ac29ec600..a3ac7d26b 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -422,6 +422,26 @@ class GenericIE(InfoExtractor): 'skip_download': True, # m3u8 download }, }, + { + # Brightcove with alternative playerID key + 'url': 'http://www.nature.com/nmeth/journal/v9/n7/fig_tab/nmeth.2062_SV1.html', + 'info_dict': { + 'id': 'nmeth.2062_SV1', + 'title': 'Simultaneous multiview imaging of the Drosophila syncytial blastoderm : Quantitative high-speed imaging of entire developing embryos with simultaneous multiview light-sheet microscopy : Nature Methods : Nature Research', + }, + 'playlist': [{ + 'info_dict': { + 'id': '2228375078001', + 'ext': 'mp4', + 'title': 'nmeth.2062-sv1', + 'description': 'nmeth.2062-sv1', + 'timestamp': 1363357591, + 'upload_date': '20130315', + 'uploader': 'Nature Publishing Group', + 'uploader_id': '1964492299001', + }, + }], + }, # ooyala video { 'url': 'http://www.rollingstone.com/music/videos/norwegian-dj-cashmere-cat-goes-spartan-on-with-me-premiere-20131219', From 906420cae37ee3c2f48d23c3a4fa0543a66947d5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 16 Jan 2017 21:54:47 +0700 Subject: [PATCH 47/86] [limelight] Improve and make more robust (closes #11737) + Add support for direct http for videos hosted on video.llnw.net * Check handmade http URLs --- youtube_dl/extractor/limelight.py | 28 ++++++++++++++++++++-------- 1 file changed, 20 insertions(+), 8 deletions(-) diff --git a/youtube_dl/extractor/limelight.py b/youtube_dl/extractor/limelight.py index 905a0e85f..e635f3c4d 100644 --- a/youtube_dl/extractor/limelight.py +++ b/youtube_dl/extractor/limelight.py @@ -59,14 +59,26 @@ class LimelightBaseIE(InfoExtractor): format_id = 'rtmp' if stream.get('videoBitRate'): format_id += '-%d' % int_or_none(stream['videoBitRate']) - http_url = 'http://cpl.delvenetworks.com/' + rtmp.group('playpath')[4:] - urls.append(http_url) - http_fmt = fmt.copy() - http_fmt.update({ - 'url': http_url, - 'format_id': format_id.replace('rtmp', 'http'), - }) - formats.append(http_fmt) + http_format_id = format_id.replace('rtmp', 'http') + + CDN_HOSTS = ( + ('delvenetworks.com', 'cpl.delvenetworks.com'), + ('video.llnw.net', 's2.content.video.llnw.net'), + ) + for cdn_host, http_host in CDN_HOSTS: + if cdn_host not in rtmp.group('host').lower(): + continue + http_url = 'http://%s/%s' % (http_host, rtmp.group('playpath')[4:]) + urls.append(http_url) + if self._is_valid_url(http_url, video_id, http_format_id): + http_fmt = fmt.copy() + http_fmt.update({ + 'url': http_url, + 'format_id': http_format_id, + }) + formats.append(http_fmt) + break + fmt.update({ 'url': rtmp.group('url'), 'play_path': rtmp.group('playpath'), From 0ce8c66fb05fefbe51ac1eca8d3ddbd561b38a54 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 16 Jan 2017 22:07:12 +0700 Subject: [PATCH 48/86] [options] Include custom conf in final argv (closes #11741) --- youtube_dl/options.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/options.py b/youtube_dl/options.py index 0eb4924b6..0b8c1671d 100644 --- a/youtube_dl/options.py +++ b/youtube_dl/options.py @@ -867,7 +867,7 @@ def parseOpts(overrideArguments=None): if '--ignore-config' not in system_conf: user_conf = _readUserConf() - argv = system_conf + user_conf + command_line_conf + argv = system_conf + user_conf + custom_conf + command_line_conf opts, args = parser.parse_args(argv) if opts.verbose: for conf_label, conf in ( From 79fc8496c6ab423d591f9ed1a41358d038242bbb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 16 Jan 2017 23:31:50 +0700 Subject: [PATCH 49/86] [xiami] Improve extraction (closes #11699) * Relax _VALID_URLs * Improve track metadata extraction --- youtube_dl/extractor/xiami.py | 53 +++++++++++++++++++++++++++-------- 1 file changed, 41 insertions(+), 12 deletions(-) diff --git a/youtube_dl/extractor/xiami.py b/youtube_dl/extractor/xiami.py index 86abef257..d017e03de 100644 --- a/youtube_dl/extractor/xiami.py +++ b/youtube_dl/extractor/xiami.py @@ -16,7 +16,9 @@ class XiamiBaseIE(InfoExtractor): return webpage def _extract_track(self, track, track_id=None): - title = track['title'] + track_name = track.get('songName') or track.get('name') or track['subName'] + artist = track.get('artist') or track.get('artist_name') or track.get('singers') + title = '%s - %s' % (artist, track_name) if artist else track_name track_url = self._decrypt(track['location']) subtitles = {} @@ -31,9 +33,10 @@ class XiamiBaseIE(InfoExtractor): 'thumbnail': track.get('pic') or track.get('album_pic'), 'duration': int_or_none(track.get('length')), 'creator': track.get('artist', '').split(';')[0], - 'track': title, - 'album': track.get('album_name'), - 'artist': track.get('artist'), + 'track': track_name, + 'track_number': int_or_none(track.get('track')), + 'album': track.get('album_name') or track.get('title'), + 'artist': artist, 'subtitles': subtitles, } @@ -68,14 +71,14 @@ class XiamiBaseIE(InfoExtractor): class XiamiSongIE(XiamiBaseIE): IE_NAME = 'xiami:song' IE_DESC = '虾米音乐' - _VALID_URL = r'https?://(?:www\.)?xiami\.com/song/(?P[0-9]+)' + _VALID_URL = r'https?://(?:www\.)?xiami\.com/song/(?P[^/?#&]+)' _TESTS = [{ 'url': 'http://www.xiami.com/song/1775610518', 'md5': '521dd6bea40fd5c9c69f913c232cb57e', 'info_dict': { 'id': '1775610518', 'ext': 'mp3', - 'title': 'Woman', + 'title': 'HONNE - Woman', 'thumbnail': r're:http://img\.xiami\.net/images/album/.*\.jpg', 'duration': 265, 'creator': 'HONNE', @@ -95,7 +98,7 @@ class XiamiSongIE(XiamiBaseIE): 'info_dict': { 'id': '1775256504', 'ext': 'mp3', - 'title': '悟空', + 'title': '戴荃 - 悟空', 'thumbnail': r're:http://img\.xiami\.net/images/album/.*\.jpg', 'duration': 200, 'creator': '戴荃', @@ -109,6 +112,26 @@ class XiamiSongIE(XiamiBaseIE): }, }, 'skip': 'Georestricted', + }, { + 'url': 'http://www.xiami.com/song/1775953850', + 'info_dict': { + 'id': '1775953850', + 'ext': 'mp3', + 'title': 'До Скону - Чума Пожирает Землю', + 'thumbnail': r're:http://img\.xiami\.net/images/album/.*\.jpg', + 'duration': 683, + 'creator': 'До Скону', + 'track': 'Чума Пожирает Землю', + 'track_number': 7, + 'album': 'Ад', + 'artist': 'До Скону', + }, + 'params': { + 'skip_download': True, + }, + }, { + 'url': 'http://www.xiami.com/song/xLHGwgd07a1', + 'only_matching': True, }] def _real_extract(self, url): @@ -124,7 +147,7 @@ class XiamiPlaylistBaseIE(XiamiBaseIE): class XiamiAlbumIE(XiamiPlaylistBaseIE): IE_NAME = 'xiami:album' IE_DESC = '虾米音乐 - 专辑' - _VALID_URL = r'https?://(?:www\.)?xiami\.com/album/(?P[0-9]+)' + _VALID_URL = r'https?://(?:www\.)?xiami\.com/album/(?P[^/?#&]+)' _TYPE = '1' _TESTS = [{ 'url': 'http://www.xiami.com/album/2100300444', @@ -136,28 +159,34 @@ class XiamiAlbumIE(XiamiPlaylistBaseIE): }, { 'url': 'http://www.xiami.com/album/512288?spm=a1z1s.6843761.1110925389.6.hhE9p9', 'only_matching': True, + }, { + 'url': 'http://www.xiami.com/album/URVDji2a506', + 'only_matching': True, }] class XiamiArtistIE(XiamiPlaylistBaseIE): IE_NAME = 'xiami:artist' IE_DESC = '虾米音乐 - 歌手' - _VALID_URL = r'https?://(?:www\.)?xiami\.com/artist/(?P[0-9]+)' + _VALID_URL = r'https?://(?:www\.)?xiami\.com/artist/(?P[^/?#&]+)' _TYPE = '2' - _TEST = { + _TESTS = [{ 'url': 'http://www.xiami.com/artist/2132?spm=0.0.0.0.dKaScp', 'info_dict': { 'id': '2132', }, 'playlist_count': 20, 'skip': 'Georestricted', - } + }, { + 'url': 'http://www.xiami.com/artist/bC5Tk2K6eb99', + 'only_matching': True, + }] class XiamiCollectionIE(XiamiPlaylistBaseIE): IE_NAME = 'xiami:collection' IE_DESC = '虾米音乐 - 精选集' - _VALID_URL = r'https?://(?:www\.)?xiami\.com/collect/(?P[0-9]+)' + _VALID_URL = r'https?://(?:www\.)?xiami\.com/collect/(?P[^/?#&]+)' _TYPE = '3' _TEST = { 'url': 'http://www.xiami.com/collect/156527391?spm=a1z1s.2943601.6856193.12.4jpBnr', From ddd53c392e0b3d3d2c62ba28117a9b07702c5bd8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 16 Jan 2017 23:42:04 +0700 Subject: [PATCH 50/86] [ChangeLog] Actualize --- ChangeLog | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/ChangeLog b/ChangeLog index 2e0ddd4f6..ee59e120c 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,8 +1,22 @@ version +Core +* [options] Apply custom config to final composite configuration (#11741) +* [YoutubeDL] Improve protocol auto determining (#11720) + Extractors +* [xiami] Relax URL regular expressions +* [xiami] Improve track metadata extraction (#11699) ++ [limelight] Check hand-make direct HTTP links ++ [limelight] Add support for direct HTTP links at video.llnw.net (#11737) + [brightcove] Recognize another player ID pattern (#11688) + [niconico] Support login via cookies (#7968) +* [yourupload] Fix extraction (#11601) ++ [beam:live] Add support for beam.pro live streams (#10702, #11596) +* [vevo] Improve geo restriction detection ++ [dramafever] Add support for URLs with language code (#11714) +* [cbc] Improve playlist support (#11704) + version 2017.01.14 From c1c2fe2045911c310fd5d2eda7bbb53ad581d250 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 16 Jan 2017 23:44:04 +0700 Subject: [PATCH 51/86] release 2017.01.16 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- docs/supportedsites.md | 1 + youtube_dl/version.py | 2 +- 4 files changed, 6 insertions(+), 5 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index a7bf2b90c..c04f6246a 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.01.14*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.01.14** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.01.16*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.01.16** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2017.01.14 +[debug] youtube-dl version 2017.01.16 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index ee59e120c..f6d73f982 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version +version 2017.01.16 Core * [options] Apply custom config to final composite configuration (#11741) diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 0f6c4ec0c..a3c76d5db 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -86,6 +86,7 @@ - **bbc.co.uk:article**: BBC articles - **bbc.co.uk:iplayer:playlist** - **bbc.co.uk:playlist** + - **Beam:live** - **Beatport** - **Beeg** - **BehindKink** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 17c6f9eb2..c20718dd6 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2017.01.14' +__version__ = '2017.01.16' From c0bd51c090d617811f5e405294dce06f5871d717 Mon Sep 17 00:00:00 2001 From: Kagami Hiiragi Date: Mon, 16 Jan 2017 22:19:52 +0300 Subject: [PATCH 52/86] [naver] Support tv.naver.com links --- youtube_dl/extractor/naver.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/naver.py b/youtube_dl/extractor/naver.py index 055070ff5..aba0a9a70 100644 --- a/youtube_dl/extractor/naver.py +++ b/youtube_dl/extractor/naver.py @@ -12,7 +12,7 @@ from ..utils import ( class NaverIE(InfoExtractor): - _VALID_URL = r'https?://(?:m\.)?tvcast\.naver\.com/v/(?P\d+)' + _VALID_URL = r'https?://(?:m\.)?tv(?:cast)?\.naver\.com/v/(?P\d+)' _TESTS = [{ 'url': 'http://tvcast.naver.com/v/81652', From 8a5f0a6357746d293f7330e40a3cf5823b1b626d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 17 Jan 2017 21:19:57 +0700 Subject: [PATCH 53/86] [naver] Update tests for #11743 --- youtube_dl/extractor/naver.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/naver.py b/youtube_dl/extractor/naver.py index aba0a9a70..e8131333f 100644 --- a/youtube_dl/extractor/naver.py +++ b/youtube_dl/extractor/naver.py @@ -15,7 +15,7 @@ class NaverIE(InfoExtractor): _VALID_URL = r'https?://(?:m\.)?tv(?:cast)?\.naver\.com/v/(?P\d+)' _TESTS = [{ - 'url': 'http://tvcast.naver.com/v/81652', + 'url': 'http://tv.naver.com/v/81652', 'info_dict': { 'id': '81652', 'ext': 'mp4', @@ -24,7 +24,7 @@ class NaverIE(InfoExtractor): 'upload_date': '20130903', }, }, { - 'url': 'http://tvcast.naver.com/v/395837', + 'url': 'http://tv.naver.com/v/395837', 'md5': '638ed4c12012c458fefcddfd01f173cd', 'info_dict': { 'id': '395837', @@ -34,6 +34,9 @@ class NaverIE(InfoExtractor): 'upload_date': '20150519', }, 'skip': 'Georestricted', + }, { + 'url': 'http://tvcast.naver.com/v/81652', + 'only_matching': True, }] def _real_extract(self, url): From 136078966b2047b21e9784060cebdc893c643ee9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 17 Jan 2017 23:14:07 +0700 Subject: [PATCH 54/86] [imdb] Extend _VALID_URL (closes #11744) --- youtube_dl/extractor/imdb.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/imdb.py b/youtube_dl/extractor/imdb.py index f0fc8d49a..f95c00c73 100644 --- a/youtube_dl/extractor/imdb.py +++ b/youtube_dl/extractor/imdb.py @@ -13,7 +13,7 @@ from ..utils import ( class ImdbIE(InfoExtractor): IE_NAME = 'imdb' IE_DESC = 'Internet Movie Database trailers' - _VALID_URL = r'https?://(?:www|m)\.imdb\.com/(?:video/[^/]+/|title/tt\d+.*?#lb-)vi(?P\d+)' + _VALID_URL = r'https?://(?:www|m)\.imdb\.com/(?:video/[^/]+/|title/tt\d+.*?#lb-|videoplayer/)vi(?P\d+)' _TESTS = [{ 'url': 'http://www.imdb.com/video/imdb/vi2524815897', @@ -32,6 +32,9 @@ class ImdbIE(InfoExtractor): }, { 'url': 'http://www.imdb.com/title/tt1667889/#lb-vi2524815897', 'only_matching': True, + }, { + 'url': 'http://www.imdb.com/videoplayer/vi1562949145', + 'only_matching': True, }] def _real_extract(self, url): From 4e44598547b02d42aa628506245c40c3d633814e Mon Sep 17 00:00:00 2001 From: Alex Seiler Date: Mon, 9 Jan 2017 21:19:55 +0100 Subject: [PATCH 55/86] [20min] Fix extraction --- youtube_dl/extractor/twentymin.py | 37 ++++++++++++++++++++++++++----- 1 file changed, 31 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/twentymin.py b/youtube_dl/extractor/twentymin.py index b721ecb0a..68d5a0cb5 100644 --- a/youtube_dl/extractor/twentymin.py +++ b/youtube_dl/extractor/twentymin.py @@ -13,10 +13,10 @@ class TwentyMinutenIE(InfoExtractor): _TESTS = [{ # regular video 'url': 'http://www.20min.ch/videotv/?vid=469148&cid=2', - 'md5': 'b52d6bc6ea6398e6a38f12cfd418149c', + 'md5': 'e7264320db31eed8c38364150c12496e', 'info_dict': { 'id': '469148', - 'ext': 'flv', + 'ext': 'mp4', 'title': '85 000 Franken für 15 perfekte Minuten', 'description': 'Was die Besucher vom Silvesterzauber erwarten können. (Video: Alice Grosjean/Murat Temel)', 'thumbnail': 'http://thumbnails.20min-tv.ch/server063/469148/frame-72-469148.jpg' @@ -34,17 +34,29 @@ class TwentyMinutenIE(InfoExtractor): 'thumbnail': 'http://www.20min.ch/images/content/2/2/0/22050469/10/teaserbreit.jpg' }, 'skip': '"This video is no longer available" is shown both on the web page and in the downloaded file.', + }, { + # news article with video + 'url': 'http://www.20min.ch/schweiz/news/story/So-kommen-Sie-bei-Eis-und-Schnee-sicher-an-27032552', + 'md5': '372917ba85ed969e176d287ae54b2f94', + 'info_dict': { + 'id': '523629', + 'display_id': 'So-kommen-Sie-bei-Eis-und-Schnee-sicher-an-27032552', + 'ext': 'mp4', + 'title': 'So kommen Sie bei Eis und Schnee sicher an', + 'description': 'Schneegestöber und Glatteis führten in den letzten Tagen zu zahlreichen Strassenunfällen. Ein Experte erklärt, worauf man nun beim Autofahren achten muss.', + 'thumbnail': 'http://www.20min.ch/images/content/2/7/0/27032552/83/teaserbreit.jpg', + } }, { # YouTube embed 'url': 'http://www.20min.ch/ro/sports/football/story/Il-marque-une-bicyclette-de-plus-de-30-metres--21115184', - 'md5': 'cec64d59aa01c0ed9dbba9cf639dd82f', + 'md5': 'e7e237fd98da2a3cc1422ce683df234d', 'info_dict': { 'id': 'ivM7A7SpDOs', 'ext': 'mp4', 'title': 'GOLAZO DE CHILENA DE JAVI GÓMEZ, FINALISTA AL BALÓN DE CLM 2016', 'description': 'md5:903c92fbf2b2f66c09de514bc25e9f5a', 'upload_date': '20160424', - 'uploader': 'RTVCM Castilla-La Mancha', + 'uploader': 'CMM Castilla-La Mancha Media', 'uploader_id': 'RTVCM', }, 'add_ie': ['Youtube'], @@ -77,18 +89,31 @@ class TwentyMinutenIE(InfoExtractor): r'^20 [Mm]inuten.*? -', '', self._og_search_title(webpage)), ' - News') if not video_id: + params = self._html_search_regex( + r']+src="(?:https?:)?//www\.20min\.ch/videoplayer/videoplayer\.html\?params=(.+?[^"])"', + webpage, '20min embed URL') video_id = self._search_regex( - r'"file\d?"\s*,\s*\"(\d+)', webpage, 'video id') + r'.*videoId@(\d+)', + params, 'Video Id') description = self._html_search_meta( 'description', webpage, 'description') thumbnail = self._og_search_thumbnail(webpage) + formats = [] + format_preferences = [('sd', ''), ('hd', 'h')] + for format_id, url_extension in format_preferences: + format_url = 'http://podcast.20min-tv.ch/podcast/20min/%s%s.mp4' % (video_id, url_extension) + formats.append({ + 'format_id': format_id, + 'url': format_url, + }) + return { 'id': video_id, 'display_id': display_id, - 'url': 'http://speed.20min-tv.ch/%sm.flv' % video_id, 'title': title, 'description': description, 'thumbnail': thumbnail, + 'formats': formats, } From 538b17a09c6546d58babc5eb4a3abc08dcff2d89 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 18 Jan 2017 22:05:11 +0700 Subject: [PATCH 56/86] [20min] Improve --- youtube_dl/extractor/twentymin.py | 122 ++++++++++++------------------ 1 file changed, 47 insertions(+), 75 deletions(-) diff --git a/youtube_dl/extractor/twentymin.py b/youtube_dl/extractor/twentymin.py index 68d5a0cb5..4fd1aa4bf 100644 --- a/youtube_dl/extractor/twentymin.py +++ b/youtube_dl/extractor/twentymin.py @@ -4,116 +4,88 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..utils import remove_end +from ..utils import ( + int_or_none, + try_get, +) class TwentyMinutenIE(InfoExtractor): IE_NAME = '20min' - _VALID_URL = r'https?://(?:www\.)?20min\.ch/(?:videotv/*\?.*\bvid=(?P\d+)|(?:[^/]+/)*(?P[^/#?]+))' + _VALID_URL = r'''(?x) + https?:// + (?:www\.)?20min\.ch/ + (?: + videotv/*\?.*?\bvid=| + videoplayer/videoplayer\.html\?.*?\bvideoId@ + ) + (?P\d+) + ''' _TESTS = [{ - # regular video 'url': 'http://www.20min.ch/videotv/?vid=469148&cid=2', 'md5': 'e7264320db31eed8c38364150c12496e', 'info_dict': { 'id': '469148', 'ext': 'mp4', 'title': '85 000 Franken für 15 perfekte Minuten', - 'description': 'Was die Besucher vom Silvesterzauber erwarten können. (Video: Alice Grosjean/Murat Temel)', - 'thumbnail': 'http://thumbnails.20min-tv.ch/server063/469148/frame-72-469148.jpg' - } - }, { - # news article with video - 'url': 'http://www.20min.ch/schweiz/news/story/-Wir-muessen-mutig-nach-vorne-schauen--22050469', - 'md5': 'cd4cbb99b94130cff423e967cd275e5e', - 'info_dict': { - 'id': '469408', - 'display_id': '-Wir-muessen-mutig-nach-vorne-schauen--22050469', - 'ext': 'flv', - 'title': '«Wir müssen mutig nach vorne schauen»', - 'description': 'Kein Land sei innovativer als die Schweiz, sagte Johann Schneider-Ammann in seiner Neujahrsansprache. Das Land müsse aber seine Hausaufgaben machen.', - 'thumbnail': 'http://www.20min.ch/images/content/2/2/0/22050469/10/teaserbreit.jpg' + 'thumbnail': r're:https?://.*\.jpg$', }, - 'skip': '"This video is no longer available" is shown both on the web page and in the downloaded file.', }, { - # news article with video - 'url': 'http://www.20min.ch/schweiz/news/story/So-kommen-Sie-bei-Eis-und-Schnee-sicher-an-27032552', - 'md5': '372917ba85ed969e176d287ae54b2f94', + 'url': 'http://www.20min.ch/videoplayer/videoplayer.html?params=client@twentyDE|videoId@523629', 'info_dict': { 'id': '523629', - 'display_id': 'So-kommen-Sie-bei-Eis-und-Schnee-sicher-an-27032552', 'ext': 'mp4', 'title': 'So kommen Sie bei Eis und Schnee sicher an', - 'description': 'Schneegestöber und Glatteis führten in den letzten Tagen zu zahlreichen Strassenunfällen. Ein Experte erklärt, worauf man nun beim Autofahren achten muss.', - 'thumbnail': 'http://www.20min.ch/images/content/2/7/0/27032552/83/teaserbreit.jpg', - } - }, { - # YouTube embed - 'url': 'http://www.20min.ch/ro/sports/football/story/Il-marque-une-bicyclette-de-plus-de-30-metres--21115184', - 'md5': 'e7e237fd98da2a3cc1422ce683df234d', - 'info_dict': { - 'id': 'ivM7A7SpDOs', - 'ext': 'mp4', - 'title': 'GOLAZO DE CHILENA DE JAVI GÓMEZ, FINALISTA AL BALÓN DE CLM 2016', - 'description': 'md5:903c92fbf2b2f66c09de514bc25e9f5a', - 'upload_date': '20160424', - 'uploader': 'CMM Castilla-La Mancha Media', - 'uploader_id': 'RTVCM', + 'description': 'md5:117c212f64b25e3d95747e5276863f7d', + 'thumbnail': r're:https?://.*\.jpg$', + }, + 'params': { + 'skip_download': True, }, - 'add_ie': ['Youtube'], }, { 'url': 'http://www.20min.ch/videotv/?cid=44&vid=468738', 'only_matching': True, - }, { - 'url': 'http://www.20min.ch/ro/sortir/cinema/story/Grandir-au-bahut--c-est-dur-18927411', - 'only_matching': True, }] + @staticmethod + def _extract_urls(webpage): + return [m.group('url') for m in re.finditer( + r']+src=(["\'])(?P(?:https?://)?(?:www\.)?20min\.ch/videoplayer/videoplayer.html\?.*?\bvideoId@\d+.*?)\1', + webpage)] + def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') - display_id = mobj.group('display_id') or video_id + video_id = self._match_id(url) - webpage = self._download_webpage(url, display_id) + video = self._download_json( + 'http://api.20min.ch/video/%s/show' % video_id, + video_id)['content'] - youtube_url = self._html_search_regex( - r']+src="((?:https?:)?//www\.youtube\.com/embed/[^"]+)"', - webpage, 'YouTube embed URL', default=None) - if youtube_url is not None: - return self.url_result(youtube_url, 'Youtube') + title = video['title'] - title = self._html_search_regex( - r'

    .*?(.+?)

    ', - webpage, 'title', default=None) - if not title: - title = remove_end(re.sub( - r'^20 [Mm]inuten.*? -', '', self._og_search_title(webpage)), ' - News') + formats = [{ + 'format_id': format_id, + 'url': 'http://podcast.20min-tv.ch/podcast/20min/%s%s.mp4' % (video_id, p), + 'quality': quality, + } for quality, (format_id, p) in enumerate([('sd', ''), ('hd', 'h')])] + self._sort_formats(formats) - if not video_id: - params = self._html_search_regex( - r']+src="(?:https?:)?//www\.20min\.ch/videoplayer/videoplayer\.html\?params=(.+?[^"])"', - webpage, '20min embed URL') - video_id = self._search_regex( - r'.*videoId@(\d+)', - params, 'Video Id') + description = video.get('lead') + thumbnail = video.get('thumbnail') - description = self._html_search_meta( - 'description', webpage, 'description') - thumbnail = self._og_search_thumbnail(webpage) + def extract_count(kind): + return try_get( + video, + lambda x: int_or_none(x['communityobject']['thumbs_%s' % kind])) - formats = [] - format_preferences = [('sd', ''), ('hd', 'h')] - for format_id, url_extension in format_preferences: - format_url = 'http://podcast.20min-tv.ch/podcast/20min/%s%s.mp4' % (video_id, url_extension) - formats.append({ - 'format_id': format_id, - 'url': format_url, - }) + like_count = extract_count('up') + dislike_count = extract_count('down') return { 'id': video_id, - 'display_id': display_id, 'title': title, 'description': description, 'thumbnail': thumbnail, + 'like_count': like_count, + 'dislike_count': dislike_count, 'formats': formats, } From b687c85eab942553e925256ad10de693227ba553 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 18 Jan 2017 22:08:31 +0700 Subject: [PATCH 57/86] [extractor/generic] Add support for 20 minuten embeds (closes #11683, closes #11751) --- youtube_dl/extractor/generic.py | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index a3ac7d26b..154545df7 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -78,6 +78,7 @@ from .vbox7 import Vbox7IE from .dbtv import DBTVIE from .piksel import PikselIE from .videa import VideaIE +from .twentymin import TwentyMinutenIE class GenericIE(InfoExtractor): @@ -1468,6 +1469,20 @@ class GenericIE(InfoExtractor): }, 'playlist_mincount': 2, }, + { + # 20 minuten embed + 'url': 'http://www.20min.ch/schweiz/news/story/So-kommen-Sie-bei-Eis-und-Schnee-sicher-an-27032552', + 'info_dict': { + 'id': '523629', + 'ext': 'mp4', + 'title': 'So kommen Sie bei Eis und Schnee sicher an', + 'description': 'md5:117c212f64b25e3d95747e5276863f7d', + }, + 'params': { + 'skip_download': True, + }, + 'add_ie': [TwentyMinutenIE.ie_key()], + } # { # # TODO: find another test # # http://schema.org/VideoObject @@ -2421,6 +2436,12 @@ class GenericIE(InfoExtractor): if videa_urls: return _playlist_from_matches(videa_urls, ie=VideaIE.ie_key()) + # Look for 20 minuten embeds + twentymin_urls = TwentyMinutenIE._extract_urls(webpage) + if twentymin_urls: + return _playlist_from_matches( + twentymin_urls, ie=TwentyMinutenIE.ie_key()) + # Looking for http://schema.org/VideoObject json_ld = self._search_json_ld( webpage, video_id, default={}, expected_type='VideoObject') From aaf2b7c57a3d2dc9ba12f1aa401cba088e114916 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 18 Jan 2017 22:20:11 +0700 Subject: [PATCH 58/86] [canalplus] Add fallback for video id (closes #11764) --- youtube_dl/extractor/canalplus.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/canalplus.py b/youtube_dl/extractor/canalplus.py index 10cf165bc..b3f76a7b1 100644 --- a/youtube_dl/extractor/canalplus.py +++ b/youtube_dl/extractor/canalplus.py @@ -107,7 +107,7 @@ class CanalplusIE(InfoExtractor): [r']+?videoId=(["\'])(?P\d+)', r'id=["\']canal_video_player(?P\d+)', r'data-video=["\'](?P\d+)'], - webpage, 'video id', group='id') + webpage, 'video id', default=mobj.group('vid'), group='id') info_url = self._VIDEO_INFO_TEMPLATE % (site_id, video_id) video_data = self._download_json(info_url, video_id, 'Downloading video JSON') From baa3e1845b26d9756642325bbb0d58e22025b2ec Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Wed, 18 Jan 2017 17:00:15 +0100 Subject: [PATCH 59/86] [bilibili] fix extraction(closes #11077) --- youtube_dl/extractor/bilibili.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/bilibili.py b/youtube_dl/extractor/bilibili.py index 5051934ef..85ea5e6ee 100644 --- a/youtube_dl/extractor/bilibili.py +++ b/youtube_dl/extractor/bilibili.py @@ -34,8 +34,8 @@ class BiliBiliIE(InfoExtractor): }, } - _APP_KEY = '6f90a59ac58a4123' - _BILIBILI_KEY = '0bfd84cc3940035173f35e6777508326' + _APP_KEY = '84956560bc028eb7' + _BILIBILI_KEY = '94aba54af9065f71de72f5508f1cd42e' def _real_extract(self, url): video_id = self._match_id(url) From 460f61fac42592eb273b7d58efc314cc83687b8b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 18 Jan 2017 23:06:46 +0700 Subject: [PATCH 60/86] [ChangeLog] Actualize --- ChangeLog | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/ChangeLog b/ChangeLog index f6d73f982..994895edc 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,13 @@ +version + +Extractors +* [bilibili] Fix extraction (#11077) ++ [canalplus] Add fallback for video id (#11764) +* [20min] Fix extraction (#11683, #11751) +* [imdb] Extend URL regular expression (#11744) ++ [naver] Add support for tv.naver.com links (#11743) + + version 2017.01.16 Core From 1560baacc677c43c1007acfc89b8190f81a59684 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 18 Jan 2017 23:10:00 +0700 Subject: [PATCH 61/86] release 2017.01.18 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- youtube_dl/version.py | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index c04f6246a..38cb13a33 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.01.16*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.01.16** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.01.18*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.01.18** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2017.01.16 +[debug] youtube-dl version 2017.01.18 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index 994895edc..5aa4e3c6b 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version +version 2017.01.18 Extractors * [bilibili] Fix extraction (#11077) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index c20718dd6..669f60f65 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2017.01.16' +__version__ = '2017.01.18' From f1e70fc2ff6f1536873ed73ffc9bff63653fd5ca Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 18 Jan 2017 23:34:11 +0700 Subject: [PATCH 62/86] [mtv] Relax triforce feed regex (closes #11766) --- youtube_dl/extractor/mtv.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/mtv.py b/youtube_dl/extractor/mtv.py index 00a980c7d..e48ea2481 100644 --- a/youtube_dl/extractor/mtv.py +++ b/youtube_dl/extractor/mtv.py @@ -211,7 +211,7 @@ class MTVServicesInfoExtractor(InfoExtractor): def _extract_triforce_mgid(self, webpage, data_zone=None, video_id=None): triforce_feed = self._parse_json(self._search_regex( - r'triforceManifestFeed\s*=\s*(\{.+?\});\n', webpage, + r'triforceManifestFeed\s*=\s*({.+?})\s*;\s*\n', webpage, 'triforce feed', default='{}'), video_id, fatal=False) data_zone = self._search_regex( From eb3f008c9e686f38c50511004d5c9a51b2e8cdd2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 19 Jan 2017 04:49:31 +0700 Subject: [PATCH 63/86] [uol] Fix extraction (closes #11770) --- youtube_dl/extractor/uol.py | 27 +++++++++++++++++++++------ 1 file changed, 21 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/uol.py b/youtube_dl/extractor/uol.py index c27c64387..e67083004 100644 --- a/youtube_dl/extractor/uol.py +++ b/youtube_dl/extractor/uol.py @@ -84,12 +84,27 @@ class UOLIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) - if not video_id.isdigit(): - embed_page = self._download_webpage('https://jsuol.com.br/c/tv/uol/embed/?params=[embed,%s]' % video_id, video_id) - video_id = self._search_regex(r'mediaId=(\d+)', embed_page, 'media id') + media_id = None + + if video_id.isdigit(): + media_id = video_id + + if not media_id: + embed_page = self._download_webpage( + 'https://jsuol.com.br/c/tv/uol/embed/?params=[embed,%s]' % video_id, + video_id, 'Downloading embed page', fatal=False) + if embed_page: + media_id = self._search_regex( + (r'uol\.com\.br/(\d+)', r'mediaId=(\d+)'), + embed_page, 'media id', default=None) + + if not media_id: + webpage = self._download_webpage(url, video_id) + media_id = self._search_regex(r'mediaId=(\d+)', webpage, 'media id') + video_data = self._download_json( - 'http://mais.uol.com.br/apiuol/v3/player/getMedia/%s.json' % video_id, - video_id)['item'] + 'http://mais.uol.com.br/apiuol/v3/player/getMedia/%s.json' % media_id, + media_id)['item'] title = video_data['title'] query = { @@ -118,7 +133,7 @@ class UOLIE(InfoExtractor): tags.append(tag_description) return { - 'id': video_id, + 'id': media_id, 'title': title, 'description': clean_html(video_data.get('desMedia')), 'thumbnail': video_data.get('thumbnail'), From cccd70a2752ad079ed560e42ff085adcabebaac2 Mon Sep 17 00:00:00 2001 From: james mike dupont Date: Thu, 19 Jan 2017 04:18:13 -0500 Subject: [PATCH 64/86] untie --- youtube_dl/extractor/flipagram.py | 2 +- youtube_dl/extractor/vimeo.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/flipagram.py b/youtube_dl/extractor/flipagram.py index 1902a2393..b7be40f1b 100644 --- a/youtube_dl/extractor/flipagram.py +++ b/youtube_dl/extractor/flipagram.py @@ -81,7 +81,7 @@ class FlipagramIE(InfoExtractor): 'filesize': int_or_none(cover.get('size')), } for cover in flipagram.get('covers', []) if cover.get('url')] - # Note that this only retrieves comments that are initally loaded. + # Note that this only retrieves comments that are initially loaded. # For videos with large amounts of comments, most won't be retrieved. comments = [] for comment in video_data.get('comments', {}).get(video_id, {}).get('items', []): diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py index 2e98b0e6f..add753635 100644 --- a/youtube_dl/extractor/vimeo.py +++ b/youtube_dl/extractor/vimeo.py @@ -338,7 +338,7 @@ class VimeoIE(VimeoBaseInfoExtractor): 'expected_warnings': ['Unable to download JSON metadata'], }, { - # redirects to ondemand extractor and should be passed throught it + # redirects to ondemand extractor and should be passed through it # for successful extraction 'url': 'https://vimeo.com/73445910', 'info_dict': { From 1fe84be0f3b36822af804db6cf7c06a1ac5ac688 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 20 Jan 2017 00:47:04 +0700 Subject: [PATCH 65/86] [1tv] Add support for hls (closes #11786) --- youtube_dl/extractor/firsttv.py | 27 ++++++++++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/firsttv.py b/youtube_dl/extractor/firsttv.py index c6fb67057..081c71842 100644 --- a/youtube_dl/extractor/firsttv.py +++ b/youtube_dl/extractor/firsttv.py @@ -86,18 +86,43 @@ class FirstTVIE(InfoExtractor): title = item['title'] quality = qualities(QUALITIES) formats = [] + path = None for f in item.get('mbr', []): src = f.get('src') if not src or not isinstance(src, compat_str): continue tbr = int_or_none(self._search_regex( r'_(\d{3,})\.mp4', src, 'tbr', default=None)) + if not path: + path = self._search_regex( + r'//[^/]+/(.+?)_\d+\.mp4', src, + 'm3u8 path', default=None) formats.append({ 'url': src, 'format_id': f.get('name'), 'tbr': tbr, - 'quality': quality(f.get('name')), + 'source_preference': quality(f.get('name')), }) + # m3u8 URL format is reverse engineered from [1] (search for + # master.m3u8). dashEdges (that is currently balancer-vod.1tv.ru) + # is taken from [2]. + # 1. http://static.1tv.ru/player/eump1tv-current/eump-1tv.all.min.js?rnd=9097422834:formatted + # 2. http://static.1tv.ru/player/eump1tv-config/config-main.js?rnd=9097422834 + if not path and len(formats) == 1: + path = self._search_regex( + r'//[^/]+/(.+?$)', formats[0]['url'], + 'm3u8 path', default=None) + if path: + if len(formats) == 1: + m3u8_path = ',' + else: + tbrs = [compat_str(t) for t in sorted(f['tbr'] for f in formats)] + m3u8_path = '_,%s,%s' % (','.join(tbrs), '.mp4') + formats.extend(self._extract_m3u8_formats( + 'http://balancer-vod.1tv.ru/%s%s.urlset/master.m3u8' + % (path, m3u8_path), + display_id, 'mp4', + entry_protocol='m3u8_native', m3u8_id='hls', fatal=False)) self._sort_formats(formats) thumbnail = item.get('poster') or self._og_search_thumbnail(webpage) From d77ac737900eede5e1508b9822e71c8595fe0879 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Fri, 20 Jan 2017 21:59:24 +0800 Subject: [PATCH 66/86] [ustream] Add UstreamIE._extract_url() Ref: #11547 --- youtube_dl/extractor/generic.py | 8 ++++---- youtube_dl/extractor/ustream.py | 7 +++++++ 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 154545df7..a7c104845 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -79,6 +79,7 @@ from .dbtv import DBTVIE from .piksel import PikselIE from .videa import VideaIE from .twentymin import TwentyMinutenIE +from .ustream import UstreamIE class GenericIE(InfoExtractor): @@ -2112,10 +2113,9 @@ class GenericIE(InfoExtractor): return self.url_result(mobj.group('url'), 'TED') # Look for embedded Ustream videos - mobj = re.search( - r']+?src=(["\'])(?Phttp://www\.ustream\.tv/embed/.+?)\1', webpage) - if mobj is not None: - return self.url_result(mobj.group('url'), 'Ustream') + ustream_url = UstreamIE._extract_url(webpage) + if ustream_url: + return self.url_result(ustream_url, UstreamIE.ie_key()) # Look for embedded arte.tv player mobj = re.search( diff --git a/youtube_dl/extractor/ustream.py b/youtube_dl/extractor/ustream.py index 0c06bf36b..5737d4d16 100644 --- a/youtube_dl/extractor/ustream.py +++ b/youtube_dl/extractor/ustream.py @@ -69,6 +69,13 @@ class UstreamIE(InfoExtractor): }, }] + @staticmethod + def _extract_url(webpage): + mobj = re.search( + r']+?src=(["\'])(?Phttp://www\.ustream\.tv/embed/.+?)\1', webpage) + if mobj is not None: + return mobj.group('url') + def _get_stream_info(self, url, video_id, app_id_ver, extra_note=None): def num_to_hex(n): return hex(n)[2:] From 4447fb23320b9214ab3188717794d00b18887617 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Fri, 20 Jan 2017 22:11:43 +0800 Subject: [PATCH 67/86] [cspan] Support Ustream embedded videos Closes #11547 --- ChangeLog | 6 ++++++ youtube_dl/extractor/cspan.py | 19 +++++++++++++++++++ 2 files changed, 25 insertions(+) diff --git a/ChangeLog b/ChangeLog index 5aa4e3c6b..217971ec6 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +version + +Extractors ++ [cspan] Support Ustream embedded videos (#11547) + + version 2017.01.18 Extractors diff --git a/youtube_dl/extractor/cspan.py b/youtube_dl/extractor/cspan.py index 7e5d4f227..92a827a4b 100644 --- a/youtube_dl/extractor/cspan.py +++ b/youtube_dl/extractor/cspan.py @@ -12,6 +12,7 @@ from ..utils import ( ExtractorError, ) from .senateisvp import SenateISVPIE +from .ustream import UstreamIE class CSpanIE(InfoExtractor): @@ -57,12 +58,30 @@ class CSpanIE(InfoExtractor): 'params': { 'skip_download': True, # m3u8 downloads } + }, { + # Ustream embedded video + 'url': 'https://www.c-span.org/video/?114917-1/armed-services', + 'info_dict': { + 'id': '58428542', + 'ext': 'flv', + 'title': 'USHR07 Armed Services Committee', + 'description': 'hsas00-2118-20150204-1000et-07\n\n\nUSHR07 Armed Services Committee', + 'timestamp': 1423060374, + 'upload_date': '20150204', + 'uploader': 'HouseCommittee', + 'uploader_id': '12987475', + }, }] def _real_extract(self, url): video_id = self._match_id(url) video_type = None webpage = self._download_webpage(url, video_id) + + ustream_url = UstreamIE._extract_url(webpage) + if ustream_url: + return self.url_result(ustream_url, UstreamIE.ie_key()) + # We first look for clipid, because clipprog always appears before patterns = [r'id=\'clip(%s)\'\s*value=\'([0-9]+)\'' % t for t in ('id', 'prog')] results = list(filter(None, (re.search(p, webpage) for p in patterns))) From 972efe60c3fdaff83f9b8e7a637ee81f4c27bb64 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Fri, 20 Jan 2017 22:13:54 +0800 Subject: [PATCH 68/86] [generic] Remove a dead test The web page does not contain a video anymore Ref: #2694, #2696 --- youtube_dl/extractor/generic.py | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index a7c104845..40201f311 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -589,17 +589,6 @@ class GenericIE(InfoExtractor): 'description': 'md5:8145d19d320ff3e52f28401f4c4283b9', } }, - # Embedded Ustream video - { - 'url': 'http://www.american.edu/spa/pti/nsa-privacy-janus-2014.cfm', - 'md5': '27b99cdb639c9b12a79bca876a073417', - 'info_dict': { - 'id': '45734260', - 'ext': 'flv', - 'uploader': 'AU SPA: The NSA and Privacy', - 'title': 'NSA and Privacy Forum Debate featuring General Hayden and Barton Gellman' - } - }, # nowvideo embed hidden behind percent encoding { 'url': 'http://www.waoanime.tv/the-super-dimension-fortress-macross-episode-1/', From f3c21cb7a7e2d8685f466368e3142739077498cf Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Fri, 20 Jan 2017 22:25:20 +0800 Subject: [PATCH 69/86] [cspan] Fix _TESTS --- youtube_dl/extractor/cspan.py | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/cspan.py b/youtube_dl/extractor/cspan.py index 92a827a4b..d4576160b 100644 --- a/youtube_dl/extractor/cspan.py +++ b/youtube_dl/extractor/cspan.py @@ -23,14 +23,13 @@ class CSpanIE(InfoExtractor): 'md5': '94b29a4f131ff03d23471dd6f60b6a1d', 'info_dict': { 'id': '315139', - 'ext': 'mp4', 'title': 'Attorney General Eric Holder on Voting Rights Act Decision', - 'description': 'Attorney General Eric Holder speaks to reporters following the Supreme Court decision in [Shelby County v. Holder], in which the court ruled that the preclearance provisions of the Voting Rights Act could not be enforced.', }, + 'playlist_mincount': 2, 'skip': 'Regularly fails on travis, for unknown reasons', }, { 'url': 'http://www.c-span.org/video/?c4486943/cspan-international-health-care-models', - 'md5': '8e5fbfabe6ad0f89f3012a7943c1287b', + # md5 is unstable 'info_dict': { 'id': 'c4486943', 'ext': 'mp4', @@ -39,14 +38,11 @@ class CSpanIE(InfoExtractor): } }, { 'url': 'http://www.c-span.org/video/?318608-1/gm-ignition-switch-recall', - 'md5': '2ae5051559169baadba13fc35345ae74', 'info_dict': { 'id': '342759', - 'ext': 'mp4', 'title': 'General Motors Ignition Switch Recall', - 'duration': 14848, - 'description': 'md5:118081aedd24bf1d3b68b3803344e7f3' }, + 'playlist_mincount': 6, }, { # Video from senate.gov 'url': 'http://www.c-span.org/video/?104517-1/immigration-reforms-needed-protect-skilled-american-workers', From f4ec8dce481564589419e4dffc45437211daa13f Mon Sep 17 00:00:00 2001 From: Iulian Onofrei Date: Fri, 20 Jan 2017 18:25:04 +0200 Subject: [PATCH 70/86] Update README.md (#11787) Add audio format argument dependency warning --- youtube_dl/options.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/options.py b/youtube_dl/options.py index 0b8c1671d..0d2ce8d15 100644 --- a/youtube_dl/options.py +++ b/youtube_dl/options.py @@ -751,7 +751,7 @@ def parseOpts(overrideArguments=None): help='Convert video files to audio-only files (requires ffmpeg or avconv and ffprobe or avprobe)') postproc.add_option( '--audio-format', metavar='FORMAT', dest='audioformat', default='best', - help='Specify audio format: "best", "aac", "vorbis", "mp3", "m4a", "opus", or "wav"; "%default" by default') + help='Specify audio format: "best", "aac", "vorbis", "mp3", "m4a", "opus", or "wav"; "%default" by default; No effect without -x') postproc.add_option( '--audio-quality', metavar='QUALITY', dest='audioquality', default='5', From 12afdc2ad617dedfd7d60654b8c57b99604332ed Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 21 Jan 2017 18:10:32 +0700 Subject: [PATCH 71/86] [youtube] Extract episode metadata (closes #9695, closes #11774) --- youtube_dl/extractor/youtube.py | 37 +++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index e6b840735..63597dd16 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -864,6 +864,30 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'skip_download': True, }, }, + { + # YouTube Red video with episode data + 'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4', + 'info_dict': { + 'id': 'iqKdEhx-dD4', + 'ext': 'mp4', + 'title': 'Isolation - Mind Field (Ep 1)', + 'description': 'md5:3a72f23c086a1496c9e2c54a25fa0822', + 'upload_date': '20170118', + 'uploader': 'Vsauce', + 'uploader_id': 'Vsauce', + 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce', + 'license': 'Standard YouTube License', + 'series': 'Mind Field', + 'season_number': 1, + 'episode_number': 1, + }, + 'params': { + 'skip_download': True, + }, + 'expected_warnings': [ + 'Skipping DASH manifest', + ], + }, { # itag 212 'url': '1t24XAntNCY', @@ -1454,6 +1478,16 @@ class YoutubeIE(YoutubeBaseInfoExtractor): else: video_alt_title = video_creator = None + m_episode = re.search( + r']+id="watch7-headline"[^>]*>\s*]*>.*?>(?P[^<]+)\s*S(?P\d+)\s*•\s*E(?P\d+)', + video_webpage) + if m_episode: + series = m_episode.group('series') + season_number = int(m_episode.group('season')) + episode_number = int(m_episode.group('episode')) + else: + series = season_number = episode_number = None + m_cat_container = self._search_regex( r'(?s)]*>\s*Category\s*\s*]*>(.*?)', video_webpage, 'categories', default=None) @@ -1743,6 +1777,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'is_live': is_live, 'start_time': start_time, 'end_time': end_time, + 'series': series, + 'season_number': season_number, + 'episode_number': episode_number, } From 04a3d4d23472ffa4a482d8ebf2d8fdbb3e974327 Mon Sep 17 00:00:00 2001 From: ha shao Date: Sat, 21 Jan 2017 15:47:39 +0800 Subject: [PATCH 72/86] [vimeo:channel] Extract videos' titles for playlist entries --- youtube_dl/extractor/vimeo.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py index add753635..a6bbd4c05 100644 --- a/youtube_dl/extractor/vimeo.py +++ b/youtube_dl/extractor/vimeo.py @@ -730,12 +730,12 @@ class VimeoChannelIE(VimeoBaseInfoExtractor): # Try extracting href first since not all videos are available via # short https://vimeo.com/id URL (e.g. https://vimeo.com/channels/tributes/6213729) clips = re.findall( - r'id="clip_(\d+)"[^>]*>\s*]+href="(/(?:[^/]+/)*\1)', webpage) + r'id="clip_(\d+)"[^>]*>\s*]+href="(/(?:[^/]+/)*\1)(?:[^>]+\btitle="([^"]+)")?', webpage) if clips: - for video_id, video_url in clips: + for video_id, video_url, video_title in clips: yield self.url_result( compat_urlparse.urljoin(base_url, video_url), - VimeoIE.ie_key(), video_id=video_id) + VimeoIE.ie_key(), video_id=video_id, video_title=video_title) # More relaxed fallback else: for video_id in re.findall(r'id=["\']clip_(\d+)', webpage): From 7c20b7484cc91a4818a98ca8d5b7ef94d5c38fb8 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sun, 22 Jan 2017 02:06:34 +0800 Subject: [PATCH 73/86] [nextmedia] Support redirected URLs --- ChangeLog | 1 + youtube_dl/extractor/nextmedia.py | 13 ++++++++++++- 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/ChangeLog b/ChangeLog index 217971ec6..00c8a063f 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,6 +1,7 @@ version Extractors ++ [nextmedia] Support redirected URLs + [cspan] Support Ustream embedded videos (#11547) diff --git a/youtube_dl/extractor/nextmedia.py b/youtube_dl/extractor/nextmedia.py index c900f232a..626ed8b49 100644 --- a/youtube_dl/extractor/nextmedia.py +++ b/youtube_dl/extractor/nextmedia.py @@ -2,6 +2,7 @@ from __future__ import unicode_literals from .common import InfoExtractor +from ..compat import compat_urlparse from ..utils import parse_iso8601 @@ -30,6 +31,12 @@ class NextMediaIE(InfoExtractor): return self._extract_from_nextmedia_page(news_id, url, page) def _extract_from_nextmedia_page(self, news_id, url, page): + redirection_url = self._search_regex( + r'window\.location\.href\s*=\s*([\'"])(?P(?!\1).+)\1', + page, 'redirection URL', default=None, group='url') + if redirection_url: + return self.url_result(compat_urlparse.urljoin(url, redirection_url)) + title = self._fetch_title(page) video_url = self._search_regex(self._URL_PATTERN, page, 'video url') @@ -93,7 +100,7 @@ class NextMediaActionNewsIE(NextMediaIE): class AppleDailyIE(NextMediaIE): IE_DESC = '臺灣蘋果日報' - _VALID_URL = r'https?://(www|ent)\.appledaily\.com\.tw/(?:animation|appledaily|enews|realtimenews|actionnews)/[^/]+/[^/]+/(?P\d+)/(?P\d+)(/.*)?' + _VALID_URL = r'https?://(www|ent)\.appledaily\.com\.tw/[^/]+/[^/]+/[^/]+/(?P\d+)/(?P\d+)(/.*)?' _TESTS = [{ 'url': 'http://ent.appledaily.com.tw/enews/article/entertainment/20150128/36354694', 'md5': 'a843ab23d150977cc55ef94f1e2c1e4d', @@ -157,6 +164,10 @@ class AppleDailyIE(NextMediaIE): }, { 'url': 'http://www.appledaily.com.tw/actionnews/appledaily/7/20161003/960588/', 'only_matching': True, + }, { + # Redirected from http://ent.appledaily.com.tw/enews/article/entertainment/20150128/36354694 + 'url': 'http://ent.appledaily.com.tw/section/article/headline/20150128/36354694', + 'only_matching': True, }] _URL_PATTERN = r'\{url: \'(.+)\'\}' From e84495cd8d7bdb89bbfe233263bd8ad0b448f8cc Mon Sep 17 00:00:00 2001 From: Alex Seiler Date: Sat, 21 Jan 2017 15:23:26 +0100 Subject: [PATCH 74/86] [azmedien] Add extractor (closes #11785) --- youtube_dl/extractor/azmedientv.py | 87 ++++++++++++++++++++++++++++++ youtube_dl/extractor/extractors.py | 4 ++ 2 files changed, 91 insertions(+) create mode 100644 youtube_dl/extractor/azmedientv.py diff --git a/youtube_dl/extractor/azmedientv.py b/youtube_dl/extractor/azmedientv.py new file mode 100644 index 000000000..51d46fb94 --- /dev/null +++ b/youtube_dl/extractor/azmedientv.py @@ -0,0 +1,87 @@ +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from .kaltura import KalturaIE +from ..utils import get_element_by_class + + +class AZMedienTVIE(InfoExtractor): + IE_DESC = 'telezueri.ch, telebaern.tv and telem1.ch videos' + _VALID_URL = r'http://(?:www\.)?(?:telezueri\.ch|telebaern\.tv|telem1\.ch)/[0-9]+-show-[^/#]+(?:/[0-9]+-episode-[^/#]+(?:/[0-9]+-segment-(?:[^/#]+#)?|#)|#)(?P[^#]+)' + + _TESTS = [{ + # URL with 'segment' + 'url': 'http://www.telezueri.ch/62-show-zuerinews/13772-episode-sonntag-18-dezember-2016/32419-segment-massenabweisungen-beim-hiltl-club-wegen-pelzboom', + 'md5': 'fda85ada1299cee517a622bfbc5f6b66', + 'info_dict': { + 'id': '1_2444peh4', + 'ext': 'mov', + 'title': 'Massenabweisungen beim Hiltl Club wegen Pelzboom', + 'description': 'md5:9ea9dd1b159ad65b36ddcf7f0d7c76a8', + 'uploader_id': 'TeleZ?ri', + 'upload_date': '20161218', + 'timestamp': 1482084490, + } + }, { + # URL with 'segment' and fragment: + 'url': 'http://www.telebaern.tv/118-show-news/14240-episode-dienstag-17-januar-2017/33666-segment-achtung-gefahr#zu-wenig-pflegerinnen-und-pfleger', + 'only_matching': True + }, { + # URL with 'episode' and fragment: + 'url': 'http://www.telem1.ch/47-show-sonntalk/13986-episode-soldaten-fuer-grenzschutz-energiestrategie-obama-bilanz#soldaten-fuer-grenzschutz-energiestrategie-obama-bilanz', + 'only_matching': True + }, { + # URL with 'show' and fragment: + 'url': 'http://www.telezueri.ch/66-show-sonntalk#burka-plakate-trump-putin-china-besuch', + 'only_matching': True + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + kaltura_partner_id = self._html_search_regex( + r']+src=["\']https?://www\.kaltura\.com/.*/partner_id/([0-9]+)', + webpage, 'Kaltura partner ID') + kaltura_entry_id = self._html_search_regex( + r']+data-id=["\'](.*?)["\'][^>]+data-slug=["\']%s' % video_id, + webpage, 'Kaltura entry ID') + + return self.url_result( + 'kaltura:%s:%s' % (kaltura_partner_id, kaltura_entry_id), + ie=KalturaIE.ie_key()) + + +class AZMedienTVShowIE(InfoExtractor): + IE_DESC = 'telezueri.ch, telebaern.tv and telem1.ch shows' + _VALID_URL = r'http://(?:www\.)?(?:telezueri\.ch|telebaern\.tv|telem1\.ch)/(?P[0-9]+-show-[^/#]+(?:/[0-9]+-episode-[^/#]+)?)$' + + _TESTS = [{ + # URL with 'episode': + 'url': 'http://www.telebaern.tv/118-show-news/13735-episode-donnerstag-15-dezember-2016', + 'info_dict': { + 'id': '118-show-news/13735-episode-donnerstag-15-dezember-2016', + 'title': 'News', + }, + 'playlist_count': 9, + }, { + # URL with 'show' only: + 'url': 'http://www.telezueri.ch/86-show-talktaeglich', + 'only_matching': True + }] + + def _real_extract(self, url): + show_id = self._match_id(url) + webpage = self._download_webpage(url, show_id) + + title = get_element_by_class('title-block-cell', webpage) + if title: + title = title.strip() + + entries = [self.url_result(m.group('url'), ie=AZMedienTVIE.ie_key()) for m in re.finditer( + r']+data-real=["\'](?P.+?)["\']', webpage)] + + return self.playlist_result( + entries, show_id, title) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 9d0610d21..4cfb3c70f 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -77,6 +77,10 @@ from .awaan import ( AWAANLiveIE, AWAANSeasonIE, ) +from .azmedientv import ( + AZMedienTVIE, + AZMedienTVShowIE, +) from .azubu import AzubuIE, AzubuLiveIE from .baidu import BaiduVideoIE from .bambuser import BambuserIE, BambuserChannelIE From 94629e537f2f6ed80b19e3863456f9ba8073af36 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 22 Jan 2017 02:15:20 +0700 Subject: [PATCH 75/86] [azmedien] Improve (closes #11784) --- youtube_dl/extractor/azmedien.py | 132 +++++++++++++++++++++++++++++ youtube_dl/extractor/azmedientv.py | 87 ------------------- youtube_dl/extractor/extractors.py | 6 +- 3 files changed, 135 insertions(+), 90 deletions(-) create mode 100644 youtube_dl/extractor/azmedien.py delete mode 100644 youtube_dl/extractor/azmedientv.py diff --git a/youtube_dl/extractor/azmedien.py b/youtube_dl/extractor/azmedien.py new file mode 100644 index 000000000..059dc6e4b --- /dev/null +++ b/youtube_dl/extractor/azmedien.py @@ -0,0 +1,132 @@ +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from .kaltura import KalturaIE +from ..utils import ( + get_element_by_class, + strip_or_none, +) + + +class AZMedienBaseIE(InfoExtractor): + def _kaltura_video(self, partner_id, entry_id): + return self.url_result( + 'kaltura:%s:%s' % (partner_id, entry_id), ie=KalturaIE.ie_key(), + video_id=entry_id) + + +class AZMedienIE(AZMedienBaseIE): + IE_DESC = 'AZ Medien videos' + _VALID_URL = r'''(?x) + https?:// + (?:www\.)? + (?: + telezueri\.ch| + telebaern\.tv| + telem1\.ch + )/ + [0-9]+-show-[^/\#]+ + (?: + /[0-9]+-episode-[^/\#]+ + (?: + /[0-9]+-segment-(?:[^/\#]+\#)?| + \# + )| + \# + ) + (?P[^\#]+) + ''' + + _TESTS = [{ + # URL with 'segment' + 'url': 'http://www.telezueri.ch/62-show-zuerinews/13772-episode-sonntag-18-dezember-2016/32419-segment-massenabweisungen-beim-hiltl-club-wegen-pelzboom', + 'info_dict': { + 'id': '1_2444peh4', + 'ext': 'mov', + 'title': 'Massenabweisungen beim Hiltl Club wegen Pelzboom', + 'description': 'md5:9ea9dd1b159ad65b36ddcf7f0d7c76a8', + 'uploader_id': 'TeleZ?ri', + 'upload_date': '20161218', + 'timestamp': 1482084490, + }, + 'params': { + 'skip_download': True, + }, + }, { + # URL with 'segment' and fragment: + 'url': 'http://www.telebaern.tv/118-show-news/14240-episode-dienstag-17-januar-2017/33666-segment-achtung-gefahr#zu-wenig-pflegerinnen-und-pfleger', + 'only_matching': True + }, { + # URL with 'episode' and fragment: + 'url': 'http://www.telem1.ch/47-show-sonntalk/13986-episode-soldaten-fuer-grenzschutz-energiestrategie-obama-bilanz#soldaten-fuer-grenzschutz-energiestrategie-obama-bilanz', + 'only_matching': True + }, { + # URL with 'show' and fragment: + 'url': 'http://www.telezueri.ch/66-show-sonntalk#burka-plakate-trump-putin-china-besuch', + 'only_matching': True + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + + webpage = self._download_webpage(url, video_id) + + partner_id = self._search_regex( + r']+src=["\'](?:https?:)?//(?:[^/]+\.)?kaltura\.com(?:/[^/]+)*/(?:p|partner_id)/([0-9]+)', + webpage, 'kaltura partner id') + entry_id = self._html_search_regex( + r']+data-id=(["\'])(?P(?:(?!\1).)+)\1[^>]+data-slug=["\']%s' + % re.escape(video_id), webpage, 'kaltura entry id', group='id') + + return self._kaltura_video(partner_id, entry_id) + + +class AZMedienShowIE(AZMedienBaseIE): + IE_DESC = 'AZ Medien shows' + _VALID_URL = r'https?://(?:www\.)?(?:telezueri\.ch|telebaern\.tv|telem1\.ch)/(?P[0-9]+-show-[^/#]+(?:/[0-9]+-episode-[^/#]+)?)$' + + _TESTS = [{ + # URL with 'episode' + 'url': 'http://www.telebaern.tv/118-show-news/13735-episode-donnerstag-15-dezember-2016', + 'info_dict': { + 'id': '118-show-news/13735-episode-donnerstag-15-dezember-2016', + 'title': 'News - Donnerstag, 15. Dezember 2016', + }, + 'playlist_count': 9, + }, { + # URL with 'show' only + 'url': 'http://www.telezueri.ch/86-show-talktaeglich', + 'only_matching': True + }] + + def _real_extract(self, url): + show_id = self._match_id(url) + webpage = self._download_webpage(url, show_id) + + entries = [] + + partner_id = self._search_regex( + r'src=["\'](?:https?:)?//(?:[^/]+\.)kaltura\.com/(?:[^/]+/)*(?:p|partner_id)/(\d+)', + webpage, 'kaltura partner id', default=None) + + if partner_id: + entries = [ + self._kaltura_video(partner_id, m.group('id')) + for m in re.finditer( + r'data-id=(["\'])(?P(?:(?!\1).)+)\1', webpage)] + + if not entries: + entries = [ + self.url_result(m.group('url'), ie=AZMedienIE.ie_key()) + for m in re.finditer( + r']+data-real=(["\'])(?Phttp.+?)\1', webpage)] + + title = self._search_regex( + r'episodeShareTitle\s*=\s*(["\'])(?P(?:(?!\1).)+)\1', + webpage, 'title', + default=strip_or_none(get_element_by_class( + 'title-block-cell', webpage)), group='title') + + return self.playlist_result(entries, show_id, title) diff --git a/youtube_dl/extractor/azmedientv.py b/youtube_dl/extractor/azmedientv.py deleted file mode 100644 index 51d46fb94..000000000 --- a/youtube_dl/extractor/azmedientv.py +++ /dev/null @@ -1,87 +0,0 @@ -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor -from .kaltura import KalturaIE -from ..utils import get_element_by_class - - -class AZMedienTVIE(InfoExtractor): - IE_DESC = 'telezueri.ch, telebaern.tv and telem1.ch videos' - _VALID_URL = r'http://(?:www\.)?(?:telezueri\.ch|telebaern\.tv|telem1\.ch)/[0-9]+-show-[^/#]+(?:/[0-9]+-episode-[^/#]+(?:/[0-9]+-segment-(?:[^/#]+#)?|#)|#)(?P<id>[^#]+)' - - _TESTS = [{ - # URL with 'segment' - 'url': 'http://www.telezueri.ch/62-show-zuerinews/13772-episode-sonntag-18-dezember-2016/32419-segment-massenabweisungen-beim-hiltl-club-wegen-pelzboom', - 'md5': 'fda85ada1299cee517a622bfbc5f6b66', - 'info_dict': { - 'id': '1_2444peh4', - 'ext': 'mov', - 'title': 'Massenabweisungen beim Hiltl Club wegen Pelzboom', - 'description': 'md5:9ea9dd1b159ad65b36ddcf7f0d7c76a8', - 'uploader_id': 'TeleZ?ri', - 'upload_date': '20161218', - 'timestamp': 1482084490, - } - }, { - # URL with 'segment' and fragment: - 'url': 'http://www.telebaern.tv/118-show-news/14240-episode-dienstag-17-januar-2017/33666-segment-achtung-gefahr#zu-wenig-pflegerinnen-und-pfleger', - 'only_matching': True - }, { - # URL with 'episode' and fragment: - 'url': 'http://www.telem1.ch/47-show-sonntalk/13986-episode-soldaten-fuer-grenzschutz-energiestrategie-obama-bilanz#soldaten-fuer-grenzschutz-energiestrategie-obama-bilanz', - 'only_matching': True - }, { - # URL with 'show' and fragment: - 'url': 'http://www.telezueri.ch/66-show-sonntalk#burka-plakate-trump-putin-china-besuch', - 'only_matching': True - }] - - def _real_extract(self, url): - video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - - kaltura_partner_id = self._html_search_regex( - r'<script[^>]+src=["\']https?://www\.kaltura\.com/.*/partner_id/([0-9]+)', - webpage, 'Kaltura partner ID') - kaltura_entry_id = self._html_search_regex( - r'<a[^>]+data-id=["\'](.*?)["\'][^>]+data-slug=["\']%s' % video_id, - webpage, 'Kaltura entry ID') - - return self.url_result( - 'kaltura:%s:%s' % (kaltura_partner_id, kaltura_entry_id), - ie=KalturaIE.ie_key()) - - -class AZMedienTVShowIE(InfoExtractor): - IE_DESC = 'telezueri.ch, telebaern.tv and telem1.ch shows' - _VALID_URL = r'http://(?:www\.)?(?:telezueri\.ch|telebaern\.tv|telem1\.ch)/(?P<id>[0-9]+-show-[^/#]+(?:/[0-9]+-episode-[^/#]+)?)$' - - _TESTS = [{ - # URL with 'episode': - 'url': 'http://www.telebaern.tv/118-show-news/13735-episode-donnerstag-15-dezember-2016', - 'info_dict': { - 'id': '118-show-news/13735-episode-donnerstag-15-dezember-2016', - 'title': 'News', - }, - 'playlist_count': 9, - }, { - # URL with 'show' only: - 'url': 'http://www.telezueri.ch/86-show-talktaeglich', - 'only_matching': True - }] - - def _real_extract(self, url): - show_id = self._match_id(url) - webpage = self._download_webpage(url, show_id) - - title = get_element_by_class('title-block-cell', webpage) - if title: - title = title.strip() - - entries = [self.url_result(m.group('url'), ie=AZMedienTVIE.ie_key()) for m in re.finditer( - r'<a href=["\']#["\'][^>]+data-real=["\'](?P<url>.+?)["\']', webpage)] - - return self.playlist_result( - entries, show_id, title) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 4cfb3c70f..de5f94738 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -77,9 +77,9 @@ from .awaan import ( AWAANLiveIE, AWAANSeasonIE, ) -from .azmedientv import ( - AZMedienTVIE, - AZMedienTVShowIE, +from .azmedien import ( + AZMedienIE, + AZMedienShowIE, ) from .azubu import AzubuIE, AzubuLiveIE from .baidu import BaiduVideoIE From 42697bab3c4d65a232054d5d5482cc177da12c72 Mon Sep 17 00:00:00 2001 From: einstein95 <einstein95@users.noreply.github.com> Date: Sun, 22 Jan 2017 02:00:38 +1300 Subject: [PATCH 76/86] [chaturbate] Fix extraction --- youtube_dl/extractor/chaturbate.py | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/chaturbate.py b/youtube_dl/extractor/chaturbate.py index 29a8820d5..1c2f065df 100644 --- a/youtube_dl/extractor/chaturbate.py +++ b/youtube_dl/extractor/chaturbate.py @@ -1,5 +1,7 @@ from __future__ import unicode_literals +import re + from .common import InfoExtractor from ..utils import ExtractorError @@ -31,30 +33,32 @@ class ChaturbateIE(InfoExtractor): webpage = self._download_webpage(url, video_id) - m3u8_url = self._search_regex( - r'src=(["\'])(?P<url>http.+?\.m3u8.*?)\1', webpage, - 'playlist', default=None, group='url') + m3u8_urls = re.findall( + r'var hlsSource.+? = (["\'])(?P<url>http.+?\.m3u8)', webpage) - if not m3u8_url: + if not m3u8_urls: error = self._search_regex( [r'<span[^>]+class=(["\'])desc_span\1[^>]*>(?P<error>[^<]+)</span>', r'<div[^>]+id=(["\'])defchat\1[^>]*>\s*<p><strong>(?P<error>[^<]+)<'], webpage, 'error', group='error', default=None) if not error: - if any(p not in webpage for p in ( + if any(p in webpage for p in ( self._ROOM_OFFLINE, 'offline_tipping', 'tip_offline')): error = self._ROOM_OFFLINE if error: raise ExtractorError(error, expected=True) raise ExtractorError('Unable to find stream URL') - formats = self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4') + formats = [] + for m3u8_url in m3u8_urls: + formats.append(self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4')[0]) + self._sort_formats(formats) return { 'id': video_id, 'title': self._live_title(video_id), - 'thumbnail': 'https://cdn-s.highwebmedia.com/uHK3McUtGCG3SMFcd4ZJsRv8/roomimage/%s.jpg' % video_id, + 'thumbnail': 'https://roomimg.stream.highwebmedia.com/ri/%s.jpg' % video_id, 'age_limit': self._rta_search(webpage), 'is_live': True, 'formats': formats, From a243abb80d5fdaacc502bc5a2b5cb20d0766e93a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 22 Jan 2017 03:00:10 +0700 Subject: [PATCH 77/86] [chaturbate] Improve (closes #11797) --- youtube_dl/extractor/chaturbate.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/chaturbate.py b/youtube_dl/extractor/chaturbate.py index 1c2f065df..8fbc91c1f 100644 --- a/youtube_dl/extractor/chaturbate.py +++ b/youtube_dl/extractor/chaturbate.py @@ -33,10 +33,10 @@ class ChaturbateIE(InfoExtractor): webpage = self._download_webpage(url, video_id) - m3u8_urls = re.findall( - r'var hlsSource.+? = (["\'])(?P<url>http.+?\.m3u8)', webpage) + m3u8_formats = [(m.group('id').lower(), m.group('url')) for m in re.finditer( + r'hlsSource(?P<id>.+?)\s*=\s*(?P<q>["\'])(?P<url>http.+?)(?P=q)', webpage)] - if not m3u8_urls: + if not m3u8_formats: error = self._search_regex( [r'<span[^>]+class=(["\'])desc_span\1[^>]*>(?P<error>[^<]+)</span>', r'<div[^>]+id=(["\'])defchat\1[^>]*>\s*<p><strong>(?P<error>[^<]+)<'], @@ -50,9 +50,12 @@ class ChaturbateIE(InfoExtractor): raise ExtractorError('Unable to find stream URL') formats = [] - for m3u8_url in m3u8_urls: - formats.append(self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4')[0]) - + for m3u8_id, m3u8_url in m3u8_formats: + formats.extend(self._extract_m3u8_formats( + m3u8_url, video_id, ext='mp4', + # ffmpeg skips segments for fast m3u8 + preference=-10 if m3u8_id == 'fast' else None, + m3u8_id=m3u8_id, fatal=False, live=True)) self._sort_formats(formats) return { From 8d1fbe0cb20fdfab8487bb478c2a002f12c1a5d9 Mon Sep 17 00:00:00 2001 From: einstein95 <einstein95@users.noreply.github.com> Date: Sat, 21 Jan 2017 20:02:55 +1300 Subject: [PATCH 78/86] [pornflip] Add extractor (closes #11556) --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/pornflip.py | 59 ++++++++++++++++++++++++++++++ 2 files changed, 60 insertions(+) create mode 100644 youtube_dl/extractor/pornflip.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index de5f94738..cfddf5b92 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -724,6 +724,7 @@ from .polskieradio import ( ) from .porn91 import Porn91IE from .porncom import PornComIE +from .pornflip import PornFlipIE from .pornhd import PornHdIE from .pornhub import ( PornHubIE, diff --git a/youtube_dl/extractor/pornflip.py b/youtube_dl/extractor/pornflip.py new file mode 100644 index 000000000..b6077f7cb --- /dev/null +++ b/youtube_dl/extractor/pornflip.py @@ -0,0 +1,59 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..compat import ( + compat_parse_qs, +) +from ..utils import ( + int_or_none, + try_get, + RegexNotFoundError, +) + + +class PornFlipIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?pornflip\.com/v/(?P<id>[0-9A-Za-z]{11})' + _TEST = { + 'url': 'https://www.pornflip.com/v/wz7DfNhMmep', + 'md5': '98c46639849145ae1fd77af532a9278c', + 'info_dict': { + 'id': 'wz7DfNhMmep', + 'ext': 'mp4', + 'title': '2 Amateurs swallow make his dream cumshots true', + 'uploader': 'figifoto', + 'thumbnail': r're:^https?://.*\.jpg$', + 'age_limit': 18, + } + } + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + uploader = self._html_search_regex( + r'<span class="name">\s+<a class="ajax" href=".+>\s+<strong>([^<]+)<', webpage, 'uploader', fatal=False) + flashvars = compat_parse_qs(self._html_search_regex( + r'<embed.+?flashvars="([^"]+)"', + webpage, 'flashvars')) + title = flashvars['video_vars[title]'][0] + thumbnail = try_get(flashvars, lambda x: x['video_vars[big_thumb]'][0]) + formats = [] + for k, v in flashvars.items(): + height = self._search_regex(r'video_vars\[video_urls\]\[(\d+).+?\]', k, 'height', default=None) + if height: + url = v[0] + formats.append({ + 'height': int_or_none(height), + 'url': url + }) + + self._sort_formats(formats) + + return { + 'id': video_id, + 'formats': formats, + 'title': title, + 'uploader': uploader, + 'thumbnail': thumbnail, + 'age_limit': 18, + } From 271808b6b2bd75ec9bdf943a55dbc4737bfa6f81 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 22 Jan 2017 03:43:27 +0700 Subject: [PATCH 79/86] [pornflip] Improve and extract dash formats (closes #11795) --- youtube_dl/extractor/pornflip.py | 79 ++++++++++++++++++++++---------- 1 file changed, 56 insertions(+), 23 deletions(-) diff --git a/youtube_dl/extractor/pornflip.py b/youtube_dl/extractor/pornflip.py index b6077f7cb..a4a5d390e 100644 --- a/youtube_dl/extractor/pornflip.py +++ b/youtube_dl/extractor/pornflip.py @@ -4,56 +4,89 @@ from __future__ import unicode_literals from .common import InfoExtractor from ..compat import ( compat_parse_qs, + compat_str, ) from ..utils import ( int_or_none, try_get, - RegexNotFoundError, + unified_timestamp, ) class PornFlipIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?pornflip\.com/v/(?P<id>[0-9A-Za-z]{11})' - _TEST = { + _VALID_URL = r'https?://(?:www\.)?pornflip\.com/(?:v|embed)/(?P<id>[0-9A-Za-z]{11})' + _TESTS = [{ 'url': 'https://www.pornflip.com/v/wz7DfNhMmep', 'md5': '98c46639849145ae1fd77af532a9278c', 'info_dict': { 'id': 'wz7DfNhMmep', 'ext': 'mp4', 'title': '2 Amateurs swallow make his dream cumshots true', - 'uploader': 'figifoto', 'thumbnail': r're:^https?://.*\.jpg$', + 'duration': 112, + 'timestamp': 1481655502, + 'upload_date': '20161213', + 'uploader_id': '106786', + 'uploader': 'figifoto', + 'view_count': int, 'age_limit': 18, } - } + }, { + 'url': 'https://www.pornflip.com/embed/wz7DfNhMmep', + 'only_matching': True, + }] def _real_extract(self, url): video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - uploader = self._html_search_regex( - r'<span class="name">\s+<a class="ajax" href=".+>\s+<strong>([^<]+)<', webpage, 'uploader', fatal=False) - flashvars = compat_parse_qs(self._html_search_regex( - r'<embed.+?flashvars="([^"]+)"', - webpage, 'flashvars')) - title = flashvars['video_vars[title]'][0] - thumbnail = try_get(flashvars, lambda x: x['video_vars[big_thumb]'][0]) - formats = [] - for k, v in flashvars.items(): - height = self._search_regex(r'video_vars\[video_urls\]\[(\d+).+?\]', k, 'height', default=None) - if height: - url = v[0] - formats.append({ - 'height': int_or_none(height), - 'url': url - }) + webpage = self._download_webpage( + 'https://www.pornflip.com/v/%s' % video_id, video_id) + + flashvars = compat_parse_qs(self._search_regex( + r'<embed[^>]+flashvars=(["\'])(?P<flashvars>(?:(?!\1).)+)\1', + webpage, 'flashvars', group='flashvars')) + + title = flashvars['video_vars[title]'][0] + + def flashvar(kind): + return try_get( + flashvars, lambda x: x['video_vars[%s]' % kind][0], compat_str) + + formats = [] + for key, value in flashvars.items(): + if not (value and isinstance(value, list)): + continue + format_url = value[0] + if key == 'video_vars[hds_manifest]': + formats.extend(self._extract_mpd_formats( + format_url, video_id, mpd_id='dash', fatal=False)) + continue + height = self._search_regex( + r'video_vars\[video_urls\]\[(\d+)', key, 'height', default=None) + if not height: + continue + formats.append({ + 'url': format_url, + 'format_id': 'http-%s' % height, + 'height': int_or_none(height), + }) self._sort_formats(formats) + uploader = self._html_search_regex( + (r'<span[^>]+class="name"[^>]*>\s*<a[^>]+>\s*<strong>(?P<uploader>[^<]+)', + r'<meta[^>]+content=(["\'])[^>]*\buploaded by (?P<uploader>.+?)\1'), + webpage, 'uploader', fatal=False, group='uploader') + return { 'id': video_id, 'formats': formats, 'title': title, + 'thumbnail': flashvar('big_thumb'), + 'duration': int_or_none(flashvar('duration')), + 'timestamp': unified_timestamp(self._html_search_meta( + 'uploadDate', webpage, 'timestamp')), + 'uploader_id': flashvar('author_id'), 'uploader': uploader, - 'thumbnail': thumbnail, + 'view_count': int_or_none(flashvar('views')), 'age_limit': 18, } From 6c031a35f31717cc1a535d5d808b94967b841a93 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 22 Jan 2017 18:57:15 +0700 Subject: [PATCH 80/86] [ChangeLog] Actualize --- ChangeLog | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/ChangeLog b/ChangeLog index 00c8a063f..a814b934c 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,8 +1,16 @@ version <unreleased> Extractors ++ [pornflip] Add support for pornflip.com (#11556, #11795) +* [chaturbate] Fix extraction (#11797, #11802) ++ [azmedien] Add support for AZ Medien sites (#11784, #11785) + [nextmedia] Support redirected URLs ++ [vimeo:channel] Extract videos' titles for playlist entries (#11796) ++ [youtube] Extract episode metadata (#9695, #11774) + [cspan] Support Ustream embedded videos (#11547) ++ [1tv] Add support for HLS videos (#11786) +* [uol] Fix extraction (#11770) +* [mtv] Relax triforce feed regular expression (#11766) version 2017.01.18 From 9d5b29c881f679b1d4270326af4ba6f657807011 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 22 Jan 2017 18:59:04 +0700 Subject: [PATCH 81/86] release 2017.01.22 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- README.md | 2 +- docs/supportedsites.md | 3 +++ youtube_dl/version.py | 2 +- 5 files changed, 9 insertions(+), 6 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 38cb13a33..30cc27c7b 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.01.18*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.01.18** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.01.22*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.01.22** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v <your command line> [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2017.01.18 +[debug] youtube-dl version 2017.01.22 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index a814b934c..beea17e54 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version <unreleased> +version 2017.01.22 Extractors + [pornflip] Add support for pornflip.com (#11556, #11795) diff --git a/README.md b/README.md index a606346b2..4f677d0cc 100644 --- a/README.md +++ b/README.md @@ -374,7 +374,7 @@ Alternatively, refer to the [developer instructions](#developer-instructions) fo avprobe) --audio-format FORMAT Specify audio format: "best", "aac", "vorbis", "mp3", "m4a", "opus", or "wav"; - "best" by default + "best" by default; No effect without -x --audio-quality QUALITY Specify ffmpeg/avconv audio quality, insert a value between 0 (better) and 9 (worse) for VBR or a specific bitrate like 128K diff --git a/docs/supportedsites.md b/docs/supportedsites.md index a3c76d5db..b906d443a 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -74,6 +74,8 @@ - **awaan:live** - **awaan:season** - **awaan:video** + - **AZMedien**: AZ Medien videos + - **AZMedienShow**: AZ Medien shows - **Azubu** - **AzubuLive** - **BaiduVideo**: 百度视频 @@ -572,6 +574,7 @@ - **PolskieRadio** - **PolskieRadioCategory** - **PornCom** + - **PornFlip** - **PornHd** - **PornHub**: PornHub and Thumbzilla - **PornHubPlaylist** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 669f60f65..9466c9637 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2017.01.18' +__version__ = '2017.01.22' From 30dda24de304dd53fc63dfb5bf4672c2ec747014 Mon Sep 17 00:00:00 2001 From: Gaetan Gilbert <gaetan.gilbert@ens-lyon.fr> Date: Sun, 22 Jan 2017 20:27:38 +0100 Subject: [PATCH 82/86] [chirbit] Extract uploader --- youtube_dl/extractor/chirbit.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/youtube_dl/extractor/chirbit.py b/youtube_dl/extractor/chirbit.py index f35df143a..4815b34be 100644 --- a/youtube_dl/extractor/chirbit.py +++ b/youtube_dl/extractor/chirbit.py @@ -19,6 +19,7 @@ class ChirbitIE(InfoExtractor): 'title': 'md5:f542ea253f5255240be4da375c6a5d7e', 'description': 'md5:f24a4e22a71763e32da5fed59e47c770', 'duration': 306, + 'uploader': 'Gerryaudio', }, 'params': { 'skip_download': True, @@ -54,6 +55,9 @@ class ChirbitIE(InfoExtractor): duration = parse_duration(self._search_regex( r'class=["\']c-length["\'][^>]*>([^<]+)', webpage, 'duration', fatal=False)) + uploader = self._search_regex( + r'id=["\']chirbit-username["\'][^>]*>([^<]+)', + webpage, 'uploader', fatal=False) return { 'id': audio_id, @@ -61,6 +65,7 @@ class ChirbitIE(InfoExtractor): 'title': title, 'description': description, 'duration': duration, + 'uploader': uploader, } From a089545e036619a798aa19f33085f2b0b87a1b0a Mon Sep 17 00:00:00 2001 From: Alex Seiler <seileralex@gmail.com> Date: Sun, 22 Jan 2017 20:30:29 +0100 Subject: [PATCH 83/86] [azmedien:show] Improve _VALID_URL --- youtube_dl/extractor/azmedien.py | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/azmedien.py b/youtube_dl/extractor/azmedien.py index 059dc6e4b..a89f71c20 100644 --- a/youtube_dl/extractor/azmedien.py +++ b/youtube_dl/extractor/azmedien.py @@ -85,7 +85,20 @@ class AZMedienIE(AZMedienBaseIE): class AZMedienShowIE(AZMedienBaseIE): IE_DESC = 'AZ Medien shows' - _VALID_URL = r'https?://(?:www\.)?(?:telezueri\.ch|telebaern\.tv|telem1\.ch)/(?P<id>[0-9]+-show-[^/#]+(?:/[0-9]+-episode-[^/#]+)?)$' + _VALID_URL = r'''(?x) + https?:// + (?:www\.)? + (?: + telezueri\.ch| + telebaern\.tv| + telem1\.ch + )/ + (?P<id>[0-9]+-show-[^/\#]+ + (?: + /[0-9]+-episode-[^/\#]+ + )? + )$ + ''' _TESTS = [{ # URL with 'episode' From 8bc0800d7cf24b17204f0fb3c6e76327ed8d527f Mon Sep 17 00:00:00 2001 From: Grzegorz P <Grzechooo@users.noreply.github.com> Date: Sun, 22 Jan 2017 20:35:38 +0100 Subject: [PATCH 84/86] [youtube:playlist] Fix nonexistent/private playlist detection (closes #11604) --- youtube_dl/extractor/youtube.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 63597dd16..644653357 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1998,7 +1998,8 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor): url = self._TEMPLATE_URL % playlist_id page = self._download_webpage(url, playlist_id) - for match in re.findall(r'<div class="yt-alert-message">([^<]+)</div>', page): + # the yt-alert-message now has tabindex attribute (see https://github.com/rg3/youtube-dl/issues/11604) + for match in re.findall(r'<div class="yt-alert-message"[^>]*>([^<]+)</div>', page): match = match.strip() # Check if the playlist exists or is private if re.match(r'[^<]*(The|This) playlist (does not exist|is private)[^<]*', match): From 4201ba13e674788c36ae69fbfbffc4b246717d6a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 23 Jan 2017 02:49:56 +0700 Subject: [PATCH 85/86] [youtube:playlist] Fix nonexistent/private playlist detection and skip private tests --- youtube_dl/extractor/youtube.py | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 644653357..5202beb3e 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1856,6 +1856,7 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor): 'title': 'YDL_Empty_List', }, 'playlist_count': 0, + 'skip': 'This playlist is private', }, { 'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.', 'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC', @@ -1887,6 +1888,7 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor): 'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl', }, 'playlist_count': 2, + 'skip': 'This playlist is private', }, { 'note': 'embedded', 'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu', @@ -2002,11 +2004,14 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor): for match in re.findall(r'<div class="yt-alert-message"[^>]*>([^<]+)</div>', page): match = match.strip() # Check if the playlist exists or is private - if re.match(r'[^<]*(The|This) playlist (does not exist|is private)[^<]*', match): - raise ExtractorError( - 'The playlist doesn\'t exist or is private, use --username or ' - '--netrc to access it.', - expected=True) + mobj = re.match(r'[^<]*(?:The|This) playlist (?P<reason>does not exist|is private)[^<]*', match) + if mobj: + reason = mobj.group('reason') + message = 'This playlist %s' % reason + if 'private' in reason: + message += ', use --username or --netrc to access it' + message += '.' + raise ExtractorError(message, expected=True) elif re.match(r'[^<]*Invalid parameters[^<]*', match): raise ExtractorError( 'Invalid parameters. Maybe URL is incorrect.', From 6d119c2a6bdd2a987ef2e7553b357bd4a3f18690 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 23 Jan 2017 03:50:39 +0700 Subject: [PATCH 86/86] [24video] Fix extraction (closes #11811) --- youtube_dl/extractor/twentyfourvideo.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/twentyfourvideo.py b/youtube_dl/extractor/twentyfourvideo.py index 1093a3829..a983ebf05 100644 --- a/youtube_dl/extractor/twentyfourvideo.py +++ b/youtube_dl/extractor/twentyfourvideo.py @@ -12,7 +12,7 @@ from ..utils import ( class TwentyFourVideoIE(InfoExtractor): IE_NAME = '24video' - _VALID_URL = r'https?://(?:www\.)?24video\.(?:net|me|xxx)/(?:video/(?:view|xml)/|player/new24_play\.swf\?id=)(?P<id>\d+)' + _VALID_URL = r'https?://(?:www\.)?24video\.(?:net|me|xxx|sex)/(?:video/(?:view|xml)/|player/new24_play\.swf\?id=)(?P<id>\d+)' _TESTS = [{ 'url': 'http://www.24video.net/video/view/1044982', @@ -43,7 +43,7 @@ class TwentyFourVideoIE(InfoExtractor): video_id = self._match_id(url) webpage = self._download_webpage( - 'http://www.24video.net/video/view/%s' % video_id, video_id) + 'http://www.24video.sex/video/view/%s' % video_id, video_id) title = self._og_search_title(webpage) description = self._html_search_regex( @@ -69,11 +69,11 @@ class TwentyFourVideoIE(InfoExtractor): # Sets some cookies self._download_xml( - r'http://www.24video.net/video/xml/%s?mode=init' % video_id, + r'http://www.24video.sex/video/xml/%s?mode=init' % video_id, video_id, 'Downloading init XML') video_xml = self._download_xml( - 'http://www.24video.net/video/xml/%s?mode=play' % video_id, + 'http://www.24video.sex/video/xml/%s?mode=play' % video_id, video_id, 'Downloading video XML') video = xpath_element(video_xml, './/video', 'video', fatal=True)