diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index bf9494646..caed64e38 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.04.06*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.04.06** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.04.13*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.04.13** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2016.04.06 +[debug] youtube-dl version 2016.04.13 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 0df6193fb..c83b8655a 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -140,14 +140,14 @@ After you have ensured this site is distributing it's content legally, you can f # TODO more properties (see youtube_dl/extractor/common.py) } ``` -5. Add an import in [`youtube_dl/extractor/__init__.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/__init__.py). +5. Add an import in [`youtube_dl/extractor/extractors.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/extractors.py). 6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc. 7. Have a look at [`youtube_dl/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/rg3/youtube-dl/blob/58525c94d547be1c8167d16c298bdd75506db328/youtube_dl/extractor/common.py#L68-L226). Add tests and code for as many as you want. 8. Keep in mind that the only mandatory fields in info dict for successful extraction process are `id`, `title` and either `url` or `formats`, i.e. these are the critical data the extraction does not make any sense without. This means that [any field](https://github.com/rg3/youtube-dl/blob/58525c94d547be1c8167d16c298bdd75506db328/youtube_dl/extractor/common.py#L138-L226) apart from aforementioned mandatory ones should be treated **as optional** and extraction should be **tolerate** to situations when sources for these fields can potentially be unavailable (even if they always available at the moment) and **future-proof** in order not to break the extraction of general purpose mandatory fields. For example, if you have some intermediate dict `meta` that is a source of metadata and it has a key `summary` that you want to extract and put into resulting info dict as `description`, you should be ready that this key may be missing from the `meta` dict, i.e. you should extract it as `meta.get('summary')` and not `meta['summary']`. Similarly, you should pass `fatal=False` when extracting data from a webpage with `_search_regex/_html_search_regex`. 9. Check the code with [flake8](https://pypi.python.org/pypi/flake8). 10. When the tests pass, [add](http://git-scm.com/docs/git-add) the new files and [commit](http://git-scm.com/docs/git-commit) them and [push](http://git-scm.com/docs/git-push) the result, like this: - $ git add youtube_dl/extractor/__init__.py + $ git add youtube_dl/extractor/extractors.py $ git add youtube_dl/extractor/yourextractor.py $ git commit -m '[yourextractor] Add new extractor' $ git push origin yourextractor diff --git a/docs/supportedsites.md b/docs/supportedsites.md index d6ee8476b..51a6b5609 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -115,6 +115,7 @@ - **Cinemassacre** - **Clipfish** - **cliphunter** + - **ClipRs** - **Clipsyndicate** - **cloudtime**: CloudTime - **Cloudy** @@ -286,7 +287,6 @@ - **ivi:compilation**: ivi.ru compilations - **ivideon**: Ivideon TV - **Izlesene** - - **JadoreCettePub** - **JeuxVideo** - **Jove** - **jpopsuki.tv** @@ -484,6 +484,7 @@ - **Pornotube** - **PornoVoisines** - **PornoXO** + - **PressTV** - **PrimeShareTV** - **PromptFile** - **prosiebensat1**: ProSiebenSat.1 Digital diff --git a/youtube_dl/downloader/rtsp.py b/youtube_dl/downloader/rtsp.py index 3eb29526c..939358b2a 100644 --- a/youtube_dl/downloader/rtsp.py +++ b/youtube_dl/downloader/rtsp.py @@ -27,6 +27,8 @@ class RtspFD(FileDownloader): self.report_error('MMS or RTSP download detected but neither "mplayer" nor "mpv" could be run. Please install any.') return False + self._debug_cmd(args) + retval = subprocess.call(args) if retval == 0: fsize = os.path.getsize(encodeFilename(tmpfilename)) diff --git a/youtube_dl/extractor/aol.py b/youtube_dl/extractor/aol.py index d4801a25b..24df8fe93 100644 --- a/youtube_dl/extractor/aol.py +++ b/youtube_dl/extractor/aol.py @@ -12,9 +12,10 @@ from ..utils import ( class AolIE(InfoExtractor): IE_NAME = 'on.aol.com' - _VALID_URL = r'(?:aol-video:|https?://on\.aol\.com/video/.*-)(?P[^/?-]+)' + _VALID_URL = r'(?:aol-video:|https?://on\.aol\.com/.*-)(?P[^/?-]+)' _TESTS = [{ + # video with 5min ID 'url': 'http://on.aol.com/video/u-s--official-warns-of-largest-ever-irs-phone-scam-518167793?icid=OnHomepageC2Wide_MustSee_Img', 'md5': '18ef68f48740e86ae94b98da815eec42', 'info_dict': { @@ -31,6 +32,7 @@ class AolIE(InfoExtractor): 'skip_download': True, } }, { + # video with vidible ID 'url': 'http://on.aol.com/video/netflix-is-raising-rates-5707d6b8e4b090497b04f706?context=PC:homepage:PL1944:1460189336183', 'info_dict': { 'id': '5707d6b8e4b090497b04f706', @@ -45,6 +47,12 @@ class AolIE(InfoExtractor): # m3u8 download 'skip_download': True, } + }, { + 'url': 'http://on.aol.com/partners/abc-551438d309eab105804dbfe8/sneak-peek-was-haley-really-framed-570eaebee4b0448640a5c944', + 'only_matching': True, + }, { + 'url': 'http://on.aol.com/shows/park-bench-shw518173474-559a1b9be4b0c3bfad3357a7?context=SH:SHW518173474:PL4327:1460619712763', + 'only_matching': True, }] def _real_extract(self, url): diff --git a/youtube_dl/extractor/arte.py b/youtube_dl/extractor/arte.py index f042d9163..a9e3266dc 100644 --- a/youtube_dl/extractor/arte.py +++ b/youtube_dl/extractor/arte.py @@ -210,7 +210,7 @@ class ArteTVPlus7IE(InfoExtractor): # It also uses the arte_vp_url url from the webpage to extract the information class ArteTVCreativeIE(ArteTVPlus7IE): IE_NAME = 'arte.tv:creative' - _VALID_URL = r'https?://creative\.arte\.tv/(?Pfr|de|en|es)/(?:magazine?/)?(?P[^/?#&]+)' + _VALID_URL = r'https?://creative\.arte\.tv/(?Pfr|de|en|es)/(?:[^/]+/)*(?P[^/?#&]+)' _TESTS = [{ 'url': 'http://creative.arte.tv/de/magazin/agentur-amateur-corporate-design', @@ -229,9 +229,27 @@ class ArteTVCreativeIE(ArteTVPlus7IE): 'description': 'Événement ! Quarante-cinq ans après leurs premiers succès, les légendaires Monty Python remontent sur scène.\n', 'upload_date': '20140805', } + }, { + 'url': 'http://creative.arte.tv/de/episode/agentur-amateur-4-der-erste-kunde', + 'only_matching': True, }] +class ArteTVInfoIE(ArteTVPlus7IE): + IE_NAME = 'arte.tv:info' + _VALID_URL = r'https?://info\.arte\.tv/(?Pfr|de|en|es)/(?:[^/]+/)*(?P[^/?#&]+)' + + _TEST = { + 'url': 'http://info.arte.tv/fr/service-civique-un-cache-misere', + 'info_dict': { + 'id': '067528-000-A', + 'ext': 'mp4', + 'title': 'Service civique, un cache misère ?', + 'upload_date': '20160403', + }, + } + + class ArteTVFutureIE(ArteTVPlus7IE): IE_NAME = 'arte.tv:future' _VALID_URL = r'https?://future\.arte\.tv/(?Pfr|de|en|es)/(?P[^/?#&]+)' diff --git a/youtube_dl/extractor/brightcove.py b/youtube_dl/extractor/brightcove.py index c718cf385..f0781fc27 100644 --- a/youtube_dl/extractor/brightcove.py +++ b/youtube_dl/extractor/brightcove.py @@ -340,7 +340,7 @@ class BrightcoveLegacyIE(InfoExtractor): ext = 'flv' if ext is None: ext = determine_ext(url) - tbr = int_or_none(rend.get('encodingRate'), 1000), + tbr = int_or_none(rend.get('encodingRate'), 1000) a_format = { 'format_id': 'http%s' % ('-%s' % tbr if tbr else ''), 'url': url, diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 04c6508f1..d00445b3c 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -46,6 +46,7 @@ from .arte import ( ArteTVPlus7IE, ArteTVCreativeIE, ArteTVConcertIE, + ArteTVInfoIE, ArteTVFutureIE, ArteTVCinemaIE, ArteTVDDCIE, @@ -411,7 +412,12 @@ from .minoto import MinotoIE from .miomio import MioMioIE from .mit import TechTVMITIE, MITIE, OCWMITIE from .mitele import MiTeleIE -from .mixcloud import MixcloudIE +from .mixcloud import ( + MixcloudIE, + MixcloudUserIE, + MixcloudPlaylistIE, + MixcloudStreamIE, +) from .mlb import MLBIE from .mnet import MnetIE from .mpora import MporaIE @@ -729,6 +735,7 @@ from .sztvhu import SztvHuIE from .tagesschau import TagesschauIE from .tapely import TapelyIE from .tass import TassIE +from .tdslifeway import TDSLifewayIE from .teachertube import ( TeacherTubeIE, TeacherTubeUserIE, @@ -831,7 +838,6 @@ from .twitter import ( TwitterIE, TwitterAmplifyIE, ) -from .ubu import UbuIE from .udemy import ( UdemyIE, UdemyCourseIE @@ -916,7 +922,6 @@ from .vulture import VultureIE from .walla import WallaIE from .washingtonpost import WashingtonPostIE from .wat import WatIE -from .wayofthemaster import WayOfTheMasterIE from .wdr import ( WDRIE, WDRMobileIE, diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 2aadd6a12..5b22b6b5e 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -60,6 +60,7 @@ from .googledrive import GoogleDriveIE from .jwplatform import JWPlatformIE from .digiteka import DigitekaIE from .instagram import InstagramIE +from .liveleak import LiveLeakIE class GenericIE(InfoExtractor): @@ -1140,6 +1141,18 @@ class GenericIE(InfoExtractor): 'upload_date': '20160409', }, }, + # LiveLeak embed + { + 'url': 'http://www.wykop.pl/link/3088787/', + 'md5': 'ace83b9ed19b21f68e1b50e844fdf95d', + 'info_dict': { + 'id': '874_1459135191', + 'ext': 'mp4', + 'title': 'Man shows poor quality of new apartment building', + 'description': 'The wall is like a sand pile.', + 'uploader': 'Lake8737', + } + }, ] def report_following_redirect(self, new_url): @@ -1944,6 +1957,11 @@ class GenericIE(InfoExtractor): if instagram_embed_url is not None: return self.url_result(instagram_embed_url, InstagramIE.ie_key()) + # Look for LiveLeak embeds + liveleak_url = LiveLeakIE._extract_url(webpage) + if liveleak_url: + return self.url_result(liveleak_url, 'LiveLeak') + def check_video(vurl): if YoutubeIE.suitable(vurl): return True diff --git a/youtube_dl/extractor/huffpost.py b/youtube_dl/extractor/huffpost.py index a38eae421..1dc5701b2 100644 --- a/youtube_dl/extractor/huffpost.py +++ b/youtube_dl/extractor/huffpost.py @@ -4,6 +4,7 @@ import re from .common import InfoExtractor from ..utils import ( + determine_ext, parse_duration, unified_strdate, ) @@ -29,7 +30,12 @@ class HuffPostIE(InfoExtractor): 'description': 'This week on Legalese It, Mike talks to David Bosco about his new book on the ICC, "Rough Justice," he also discusses the Virginia AG\'s historic stance on gay marriage, the execution of Edgar Tamayo, the ICC\'s delay of Kenya\'s President and more. ', 'duration': 1549, 'upload_date': '20140124', - } + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, + 'expected_warnings': ['HTTP Error 404: Not Found'], } def _real_extract(self, url): @@ -45,7 +51,7 @@ class HuffPostIE(InfoExtractor): description = data.get('description') thumbnails = [] - for url in data['images'].values(): + for url in filter(None, data['images'].values()): m = re.match('.*-([0-9]+x[0-9]+)\.', url) if not m: continue @@ -54,13 +60,25 @@ class HuffPostIE(InfoExtractor): 'resolution': m.group(1), }) - formats = [{ - 'format': key, - 'format_id': key.replace('/', '.'), - 'ext': 'mp4', - 'url': url, - 'vcodec': 'none' if key.startswith('audio/') else None, - } for key, url in data.get('sources', {}).get('live', {}).items()] + formats = [] + sources = data.get('sources', {}) + live_sources = list(sources.get('live', {}).items()) + list(sources.get('live_again', {}).items()) + for key, url in live_sources: + ext = determine_ext(url) + if ext == 'm3u8': + formats.extend(self._extract_m3u8_formats( + url, video_id, ext='mp4', m3u8_id='hls', fatal=False)) + elif ext == 'f4m': + formats.extend(self._extract_f4m_formatsa( + url + '?hdcore=2.9.5', video_id, f4m_id='hds', fatal=False)) + else: + formats.append({ + 'format': key, + 'format_id': key.replace('/', '.'), + 'ext': 'mp4', + 'url': url, + 'vcodec': 'none' if key.startswith('audio/') else None, + }) if not formats and data.get('fivemin_id'): return self.url_result('5min:%s' % data['fivemin_id']) diff --git a/youtube_dl/extractor/karrierevideos.py b/youtube_dl/extractor/karrierevideos.py index 2cb04e533..c05263e61 100644 --- a/youtube_dl/extractor/karrierevideos.py +++ b/youtube_dl/extractor/karrierevideos.py @@ -52,9 +52,12 @@ class KarriereVideosIE(InfoExtractor): video_id = self._search_regex( r'/config/video/(.+?)\.xml', webpage, 'video id') + # Server returns malformed headers + # Force Accept-Encoding: * to prevent gzipped results playlist = self._download_xml( 'http://www.karrierevideos.at/player-playlist.xml.php?p=%s' % video_id, - video_id, transform_source=fix_xml_ampersands) + video_id, transform_source=fix_xml_ampersands, + headers={'Accept-Encoding': '*'}) NS_MAP = { 'jwplayer': 'http://developer.longtailvideo.com/trac/wiki/FlashFormats' diff --git a/youtube_dl/extractor/laola1tv.py b/youtube_dl/extractor/laola1tv.py index d4fbafece..2fab38079 100644 --- a/youtube_dl/extractor/laola1tv.py +++ b/youtube_dl/extractor/laola1tv.py @@ -63,6 +63,7 @@ class Laola1TvIE(InfoExtractor): 'params': { 'skip_download': True, }, + 'skip': 'This live stream has already finished.', }] def _real_extract(self, url): @@ -74,6 +75,9 @@ class Laola1TvIE(InfoExtractor): webpage = self._download_webpage(url, display_id) + if 'Dieser Livestream ist bereits beendet.' in webpage: + raise ExtractorError('This live stream has already finished.', expected=True) + iframe_url = self._search_regex( r']*?id="videoplayer"[^>]*?src="([^"]+)"', webpage, 'iframe url') diff --git a/youtube_dl/extractor/lecture2go.py b/youtube_dl/extractor/lecture2go.py index 40a3d2346..81b5d41be 100644 --- a/youtube_dl/extractor/lecture2go.py +++ b/youtube_dl/extractor/lecture2go.py @@ -6,6 +6,7 @@ import re from .common import InfoExtractor from ..utils import ( determine_ext, + determine_protocol, parse_duration, int_or_none, ) @@ -18,10 +19,14 @@ class Lecture2GoIE(InfoExtractor): 'md5': 'ac02b570883020d208d405d5a3fd2f7f', 'info_dict': { 'id': '17473', - 'ext': 'flv', + 'ext': 'mp4', 'title': '2 - Endliche Automaten und reguläre Sprachen', 'creator': 'Frank Heitmann', 'duration': 5220, + }, + 'params': { + # m3u8 download + 'skip_download': True, } } @@ -32,14 +37,18 @@ class Lecture2GoIE(InfoExtractor): title = self._html_search_regex(r']+class="title">(.+)', webpage, 'title') formats = [] - for url in set(re.findall(r'"src","([^"]+)"', webpage)): + for url in set(re.findall(r'var\s+playerUri\d+\s*=\s*"([^"]+)"', webpage)): ext = determine_ext(url) + protocol = determine_protocol({'url': url}) if ext == 'f4m': - formats.extend(self._extract_f4m_formats(url, video_id)) + formats.extend(self._extract_f4m_formats(url, video_id, f4m_id='hds')) elif ext == 'm3u8': - formats.extend(self._extract_m3u8_formats(url, video_id)) + formats.extend(self._extract_m3u8_formats(url, video_id, ext='mp4', m3u8_id='hls')) else: + if protocol == 'rtmp': + continue # XXX: currently broken formats.append({ + 'format_id': protocol, 'url': url, }) diff --git a/youtube_dl/extractor/liveleak.py b/youtube_dl/extractor/liveleak.py index 4684994e1..29fba5f30 100644 --- a/youtube_dl/extractor/liveleak.py +++ b/youtube_dl/extractor/liveleak.py @@ -53,6 +53,14 @@ class LiveLeakIE(InfoExtractor): } }] + @staticmethod + def _extract_url(webpage): + mobj = re.search( + r']+src="https?://(?:\w+\.)?liveleak\.com/ll_embed\?(?:.*?)i=(?P[\w_]+)(?:.*)', + webpage) + if mobj: + return 'http://www.liveleak.com/view?i=%s' % mobj.group('id') + def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) diff --git a/youtube_dl/extractor/metacritic.py b/youtube_dl/extractor/metacritic.py index e30320569..444ec0310 100644 --- a/youtube_dl/extractor/metacritic.py +++ b/youtube_dl/extractor/metacritic.py @@ -11,7 +11,7 @@ from ..utils import ( class MetacriticIE(InfoExtractor): _VALID_URL = r'https?://www\.metacritic\.com/.+?/trailers/(?P\d+)' - _TEST = { + _TESTS = [{ 'url': 'http://www.metacritic.com/game/playstation-4/infamous-second-son/trailers/3698222', 'info_dict': { 'id': '3698222', @@ -20,7 +20,17 @@ class MetacriticIE(InfoExtractor): 'description': 'Take a peak behind-the-scenes to see how Sucker Punch brings smoke into the universe of inFAMOUS Second Son on the PS4.', 'duration': 221, }, - } + 'skip': 'Not providing trailers anymore', + }, { + 'url': 'http://www.metacritic.com/game/playstation-4/tales-from-the-borderlands-a-telltale-game-series/trailers/5740315', + 'info_dict': { + 'id': '5740315', + 'ext': 'mp4', + 'title': 'Tales from the Borderlands - Finale: The Vault of the Traveler', + 'description': 'In the final episode of the season, all hell breaks loose. Jack is now in control of Helios\' systems, and he\'s ready to reclaim his rightful place as king of Hyperion (with or without you).', + 'duration': 114, + }, + }] def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) diff --git a/youtube_dl/extractor/ministrygrid.py b/youtube_dl/extractor/ministrygrid.py index 949ad11db..e48eba3fa 100644 --- a/youtube_dl/extractor/ministrygrid.py +++ b/youtube_dl/extractor/ministrygrid.py @@ -1,8 +1,5 @@ from __future__ import unicode_literals -import json -import re - from .common import InfoExtractor from ..utils import ( ExtractorError, @@ -20,21 +17,28 @@ class MinistryGridIE(InfoExtractor): 'id': '3453494717001', 'ext': 'mp4', 'title': 'The Gospel by Numbers', + 'thumbnail': 're:^https?://.*\.jpg', + 'upload_date': '20140410', 'description': 'Coming soon from T4G 2014!', - 'uploader': 'LifeWay Christian Resources (MG)', + 'uploader_id': '2034960640001', + 'timestamp': 1397145591, }, + 'params': { + # m3u8 download + 'skip_download': True, + }, + 'add_ie': ['TDSLifeway'], } def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') + video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - portlets_json = self._search_regex( - r'Liferay\.Portlet\.list=(\[.+?\])', webpage, 'portlet list') - portlets = json.loads(portlets_json) + portlets = self._parse_json(self._search_regex( + r'Liferay\.Portlet\.list=(\[.+?\])', webpage, 'portlet list'), + video_id) pl_id = self._search_regex( - r'