diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 76e09c42a..45f8db721 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.03.10*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.03.10** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.04.17*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.04.17** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2017.03.10 +[debug] youtube-dl version 2017.04.17 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/AUTHORS b/AUTHORS index 273a6a034..1bdb74285 100644 --- a/AUTHORS +++ b/AUTHORS @@ -209,3 +209,6 @@ Olivier Bilodeau Lars Vierbergen Juanjo Benages Xiao Di Guan +Thomas Winant +Daniel Twardowski +Jeremie Jarosh diff --git a/ChangeLog b/ChangeLog index b1425e630..03ef0363b 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,8 +1,210 @@ -version +version 2017.04.17 + +Extractors +* [limelight] Improve extraction LimelightEmbeddedPlayerFlash media embeds and + add support for channel and channelList embeds +* [generic] Extract multiple Limelight embeds (#12761) ++ [itv] Extract series metadata +* [itv] Fix RTMP formats downloading (#12759) +* [itv] Use native HLS downloader by default ++ [go90] Extract subtitles (#12752) ++ [go90] Extract series metadata (#12752) + + +version 2017.04.16 + +Core +* [YoutubeDL] Apply expand_path after output template substitution ++ [YoutubeDL] Propagate overridden meta fields to extraction results of type + url (#11163) + +Extractors ++ [generic] Extract RSS entries as url_transparent (#11163) ++ [streamango] Add support for streamango.com (#12643) ++ [wsj:article] Add support for articles (#12558) +* [brightcove] Relax video tag embeds extraction and validate ambiguous embeds' + URLs (#9163, #12005, #12178, #12480) ++ [udemy] Add support for react rendition (#12744) + + +version 2017.04.15 + +Extractors +* [youku] Fix fileid extraction (#12741, #12743) + + +version 2017.04.14 + +Core ++ [downloader/hls] Add basic support for EXT-X-BYTERANGE tag (#10955) ++ [adobepass] Improve Comcast and Verison login code (#10803) ++ [adobepass] Add support for Verizon (#10803) + +Extractors ++ [aenetworks] Add support for specials (#12723) ++ [hbo] Extract HLS formats ++ [go90] Add support for go90.com (#10127) ++ [tv2hu] Add support for tv2.hu (#10509) ++ [generic] Exclude URLs with xml ext from valid video URLs (#10768, #11654) +* [youtube] Improve HLS formats extraction +* [afreecatv] Fix extraction for videos with different key layout (#12718) +- [youtube] Remove explicit preference for audio-only and video-only formats in + order not to break sorting when new formats appear +* [canalplus] Bypass geo restriction + + +version 2017.04.11 + +Extractors +* [afreecatv] Fix extraction (#12706) ++ [generic] Add support for YouTube embeds (#12637) +* [bbccouk] Treat bitrate as audio+video bitrate in media selector ++ [bbccouk] Skip unrecognized formats in media selector (#12701) ++ [bbccouk] Add support for https protocol in media selector (#12701) +* [curiositystream] Fix extraction (#12638) +* [adn] Update subtitle decryption key +* [chaturbate] Fix extraction (#12665, #12688, #12690) + + +version 2017.04.09 + +Extractors ++ [medici] Add support for medici.tv (#3406) ++ [rbmaradio] Add support for redbullradio.com URLs (#12687) ++ [npo:live] Add support for default URL (#12555) +* [mixcloud:playlist] Fix title, description and view count extraction (#12582) ++ [thesun] Add suport for thesun.co.uk (#11298, #12674) ++ [ceskateleveize:porady] Add support for porady (#7411, #12645) +* [ceskateleveize] Improve extraction and remove URL replacement hacks ++ [kaltura] Add support for iframe embeds (#12679) +* [airmozilla] Fix extraction (#12670) +* [wshh] Extract html5 entries and delegate to generic extractor (12676) ++ [raiplay] Extract subtitles ++ [xfileshare] Add support for vidlo.us (#12660) ++ [xfileshare] Add support for vidbom.com (#12661) ++ [aenetworks] Add more video URL regular expressions (#12657) ++ [odnoklassniki] Fix format sorting for 1080p quality ++ [rtl2] Add support for you.rtl2.de (#10257) ++ [vshare] Add support for vshare.io (#12278) + + +version 2017.04.03 + +Core ++ [extractor/common] Add censorship check for TransTelekom ISP +* [extractor/common] Move censorship checks to a separate method + +Extractors ++ [discoveryvr] Add support for discoveryvr.com (#12578) ++ [tv5mondeplus] Add support for tv5mondeplus.com (#11386) ++ [periscope] Add support for pscp.tv URLs (#12618, #12625) + + +version 2017.04.02 + +Core +* [YoutubeDL] Return early when extraction of url_transparent fails + +Extractors +* [rai] Fix and improve extraction (#11790) ++ [vrv] Add support for series pages +* [limelight] Improve extraction for audio only formats +* [funimation] Fix extraction (#10696, #11773) ++ [xfileshare] Add support for vidabc.com (#12589) ++ [xfileshare] Improve extraction and extract hls formats ++ [crunchyroll] Pass geo verifcation proxy ++ [cwtv] Extract ISM formats ++ [tvplay] Bypass geo restriction ++ [vrv] Add support for vrv.co ++ [packtpub] Add support for packtpub.com (#12610) ++ [generic] Pass base_url to _parse_jwplayer_data ++ [adn] Add support for animedigitalnetwork.fr (#4866) ++ [allocine] Extract more metadata +* [allocine] Fix extraction (#12592) +* [openload] Fix extraction + + +version 2017.03.26 + +Core +* Don't raise an error if JWPlayer config data is not a Javascript object + literal. _find_jwplayer_data now returns a dict rather than an str. (#12307) +* Expand environment variables for options representing paths (#12556) ++ [utils] Introduce expand_path +* [downloader/hls] Delegate downloading to ffmpeg immediately for live streams + +Extractors +* [afreecatv] Fix extraction (#12179) ++ [atvat] Add support for atv.at (#5325) ++ [fox] Add metadata extraction (#12391) ++ [atresplayer] Extract DASH formats ++ [atresplayer] Extract HD manifest (#12548) +* [atresplayer] Fix login error detection (#12548) +* [franceculture] Fix extraction (#12547) +* [youtube] Improve URL regular expression (#12538) +* [generic] Do not follow redirects to the same URL + + +version 2017.03.24 + +Extractors +- [9c9media] Remove mp4 URL extraction request ++ [bellmedia] Add support for etalk.ca and space.ca (#12447) +* [channel9] Fix extraction (#11323) +* [cloudy] Fix extraction (#12525) ++ [hbo] Add support for free episode URLs and new formats extraction (#12519) +* [condenast] Fix extraction and style (#12526) +* [viu] Relax URL regular expression (#12529) + + +version 2017.03.22 + +Extractors +- [pluralsight] Omit module title from video title (#12506) +* [pornhub] Decode obfuscated video URL (#12470, #12515) +* [senateisvp] Allow https URL scheme for embeds (#12512) + + +version 2017.03.20 + +Core ++ [YoutubeDL] Allow multiple input URLs to be used with stdout (-) as + output template ++ [adobepass] Detect and output error on authz token extraction (#12472) + +Extractors ++ [bostonglobe] Add extractor for bostonglobe.com (#12099) ++ [toongoggles] Add support for toongoggles.com (#12171) ++ [medialaan] Add support for Medialaan sites (#9974, #11912) ++ [discoverynetworks] Add support for more domains and bypass geo restiction +* [openload] Fix extraction (#10408) + + +version 2017.03.16 + +Core ++ [postprocessor/ffmpeg] Add support for flac ++ [extractor/common] Extract SMIL formats from jwplayer + +Extractors ++ [generic] Add forgotten return for jwplayer formats +* [redbulltv] Improve extraction + + +version 2017.03.15 Core * Fix missing subtitles if --add-metadata is used (#12423) +Extractors +* [facebook] Make title optional (#12443) ++ [mitele] Add support for ooyala videos (#12430) +* [openload] Fix extraction (#12435, #12446) +* [streamable] Update API URL (#12433) ++ [crunchyroll] Extract season name (#12428) +* [discoverygo] Bypass geo restriction ++ [discoverygo:playlist] Add support for playlists (#12424) + version 2017.03.10 diff --git a/README.md b/README.md index 0fc5984dc..41f647aaa 100644 --- a/README.md +++ b/README.md @@ -181,10 +181,10 @@ Alternatively, refer to the [developer instructions](#developer-instructions) fo -R, --retries RETRIES Number of retries (default is 10), or "infinite". --fragment-retries RETRIES Number of retries for a fragment (default - is 10), or "infinite" (DASH and hlsnative - only) - --skip-unavailable-fragments Skip unavailable fragments (DASH and - hlsnative only) + is 10), or "infinite" (DASH, hlsnative and + ISM) + --skip-unavailable-fragments Skip unavailable fragments (DASH, hlsnative + and ISM) --abort-on-unavailable-fragment Abort downloading when some fragment is not available --buffer-size SIZE Size of download buffer (e.g. 1024 or 16K) @@ -375,8 +375,9 @@ Alternatively, refer to the [developer instructions](#developer-instructions) fo (requires ffmpeg or avconv and ffprobe or avprobe) --audio-format FORMAT Specify audio format: "best", "aac", - "vorbis", "mp3", "m4a", "opus", or "wav"; - "best" by default; No effect without -x + "flac", "mp3", "m4a", "opus", "vorbis", or + "wav"; "best" by default; No effect without + -x --audio-quality QUALITY Specify ffmpeg/avconv audio quality, insert a value between 0 (better) and 9 (worse) for VBR or a specific bitrate like 128K diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 09dc830cb..afae82214 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -28,6 +28,7 @@ - **acast** - **acast:channel** - **AddAnime** + - **ADN**: Anime Digital Network - **AdobeTV** - **AdobeTVChannel** - **AdobeTVShow** @@ -67,6 +68,7 @@ - **arte.tv:playlist** - **AtresPlayer** - **ATTTechChannel** + - **ATVAt** - **AudiMedia** - **AudioBoom** - **audiomack** @@ -108,6 +110,7 @@ - **blinkx** - **Bloomberg** - **BokeCC** + - **BostonGlobe** - **Bpb**: Bundeszentrale für politische Bildung - **BR**: Bayerischer Rundfunk Mediathek - **BravoTV** @@ -124,7 +127,7 @@ - **CamWithHer** - **canalc2.tv** - **Canalplus**: canalplus.fr, piwiplus.fr and d8.tv - - **Canvas** + - **Canvas**: canvas.be and een.be - **CarambaTV** - **CarambaTVPage** - **CartoonNetwork** @@ -142,6 +145,7 @@ - **CCTV**: 央视网 - **CDA** - **CeskaTelevize** + - **CeskaTelevizePorady** - **channel9**: Channel 9 - **CharlieRose** - **Chaturbate** @@ -208,6 +212,9 @@ - **Digiteka** - **Discovery** - **DiscoveryGo** + - **DiscoveryGoPlaylist** + - **DiscoveryNetworksDe** + - **DiscoveryVR** - **Disney** - **Dotsub** - **DouyuTV**: 斗鱼 @@ -301,6 +308,7 @@ - **Globo** - **GloboArticle** - **Go** + - **Go90** - **GodTube** - **GodTV** - **Golem** @@ -309,8 +317,8 @@ - **GPUTechConf** - **Groupon** - **Hark** - - **HBO** - - **HBOEpisode** + - **hbo** + - **hbo:episode** - **HearThisAt** - **Heise** - **HellPorno** @@ -424,6 +432,8 @@ - **MatchTV** - **MDR**: MDR.DE and KiKA - **media.ccc.de** + - **Medialaan** + - **Medici** - **Meipai**: 美拍 - **MelonVOD** - **META** @@ -567,6 +577,8 @@ - **orf:iptv**: iptv.ORF.at - **orf:oe1**: Radio Österreich 1 - **orf:tvthek**: ORF TVthek + - **PacktPub** + - **PacktPubCourse** - **PandaTV**: 熊猫TV - **pandora.tv**: 판도라TV - **parliamentlive.tv**: UK parliament videos @@ -624,7 +636,7 @@ - **radiofrance** - **RadioJavan** - **Rai** - - **RaiTV** + - **RaiPlay** - **RBMARadio** - **RDS**: RDS.ca - **RedBullTV** @@ -649,7 +661,9 @@ - **rte**: Raidió Teilifís Éireann TV - **rte:radio**: Raidió Teilifís Éireann radio - **rtl.nl**: rtl.nl and rtlxl.nl - - **RTL2** + - **rtl2** + - **rtl2:you** + - **rtl2:you:series** - **RTP** - **RTS**: RTS.ch - **rtve.es:alacarta**: RTVE a la carta @@ -731,6 +745,7 @@ - **Steam** - **Stitcher** - **Streamable** + - **Streamango** - **streamcloud.eu** - **StreamCZ** - **StreetVoice** @@ -771,17 +786,18 @@ - **TheScene** - **TheSixtyOne** - **TheStar** + - **TheSun** - **TheWeatherChannel** - **ThisAmericanLife** - **ThisAV** - **ThisOldHouse** - **tinypic**: tinypic.com videos - - **tlc.de** - **TMZ** - **TMZArticle** - **TNAFlix** - **TNAFlixNetworkEmbed** - **toggle** + - **ToonGoggles** - **Tosh**: Tosh.0 - **tou.tv** - **Toypics**: Toypics user profile @@ -804,9 +820,11 @@ - **Tutv** - **tv.dfb.de** - **TV2** + - **tv2.hu** - **TV2Article** - **TV3** - **TV4**: tv4.se and tv4play.se + - **TV5MondePlus**: TV5MONDE+ - **TVA** - **TVANouvelles** - **TVANouvellesArticle** @@ -883,7 +901,7 @@ - **vidme:user** - **vidme:user:likes** - **Vidzi** - - **vier** + - **vier**: vier.be and vijf.be - **vier:videos** - **ViewLift** - **ViewLiftEmbed** @@ -920,7 +938,10 @@ - **Vporn** - **vpro**: npo.nl and ntr.nl - **Vrak** - - **VRT** + - **VRT**: deredactie.be, sporza.be, cobra.be and cobra.canvas.be + - **vrv** + - **vrv:series** + - **VShare** - **vube**: Vube.com - **VuClip** - **VVVVID** @@ -946,9 +967,10 @@ - **wrzuta.pl** - **wrzuta.pl:playlist** - **WSJ**: Wall Street Journal + - **WSJArticle** - **XBef** - **XboxClips** - - **XFileShare**: XFileShare based sites: DaClips, FileHoot, GorillaVid, MovPod, PowerWatch, Rapidvideo.ws, TheVideoBee, Vidto, Streamin.To, XVIDSTAGE + - **XFileShare**: XFileShare based sites: DaClips, FileHoot, GorillaVid, MovPod, PowerWatch, Rapidvideo.ws, TheVideoBee, Vidto, Streamin.To, XVIDSTAGE, Vid ABC, VidBom, vidlo - **XHamster** - **XHamsterEmbed** - **xiami:album**: 虾米音乐 - 专辑 diff --git a/test/test_InfoExtractor.py b/test/test_InfoExtractor.py index 437c7270e..881197afb 100644 --- a/test/test_InfoExtractor.py +++ b/test/test_InfoExtractor.py @@ -8,7 +8,7 @@ import sys import unittest sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) -from test.helper import FakeYDL +from test.helper import FakeYDL, expect_dict from youtube_dl.extractor.common import InfoExtractor from youtube_dl.extractor import YoutubeIE, get_info_extractor from youtube_dl.utils import encode_data_uri, strip_jsonp, ExtractorError, RegexNotFoundError @@ -84,6 +84,97 @@ class TestInfoExtractor(unittest.TestCase): self.assertRaises(ExtractorError, self.ie._download_json, uri, None) self.assertEqual(self.ie._download_json(uri, None, fatal=False), None) + def test_extract_jwplayer_data_realworld(self): + # from http://www.suffolk.edu/sjc/ + expect_dict( + self, + self.ie._extract_jwplayer_data(r''' + + ''', None, require_title=False), + { + 'id': 'XEgvuql4', + 'formats': [{ + 'url': 'rtmp://192.138.214.154/live/sjclive', + 'ext': 'flv' + }] + }) + + # from https://www.pornoxo.com/videos/7564/striptease-from-sexy-secretary/ + expect_dict( + self, + self.ie._extract_jwplayer_data(r''' + + ''', 'dummy', require_title=False), + { + 'thumbnail': 'https://t03.vipstreamservice.com/thumbs/pxo-full/2009-12/14/a4b2157147afe5efa93ce1978e0265289c193874e02597.flv-full-13.jpg', + 'formats': [{ + 'url': 'https://cdn.pornoxo.com/key=MF+oEbaxqTKb50P-w9G3nA,end=1489689259,ip=104.199.146.27/ip=104.199.146.27/speed=6573765/buffer=3.0/2009-12/4b2157147afe5efa93ce1978e0265289c193874e02597.flv', + 'ext': 'flv' + }] + }) + + # from http://www.indiedb.com/games/king-machine/videos + expect_dict( + self, + self.ie._extract_jwplayer_data(r''' + + ''', 'dummy'), + { + 'title': 'king machine trailer 1', + 'thumbnail': 'http://media.indiedb.com/cache/images/games/1/50/49678/thumb_620x2000/king-machine-trailer.mp4.jpg', + 'formats': [{ + 'url': 'http://cdn.dbolical.com/cache/videos/games/1/50/49678/encode_mp4/king-machine-trailer.mp4', + 'height': 360, + 'ext': 'mp4' + }, { + 'url': 'http://cdn.dbolical.com/cache/videos/games/1/50/49678/encode720p_mp4/king-machine-trailer.mp4', + 'height': 720, + 'ext': 'mp4' + }] + }) + if __name__ == '__main__': unittest.main() diff --git a/test/test_YoutubeDL.py b/test/test_YoutubeDL.py index 8491a88bd..75945e38f 100644 --- a/test/test_YoutubeDL.py +++ b/test/test_YoutubeDL.py @@ -755,6 +755,7 @@ class TestYoutubeDL(unittest.TestCase): '_type': 'url_transparent', 'url': 'foo2:', 'ie_key': 'Foo2', + 'title': 'foo1 title' } class Foo2IE(InfoExtractor): @@ -771,7 +772,7 @@ class TestYoutubeDL(unittest.TestCase): _VALID_URL = r'foo3:' def _real_extract(self, url): - return _make_result([{'url': TEST_URL}]) + return _make_result([{'url': TEST_URL}], title='foo3 title') ydl.add_info_extractor(Foo1IE(ydl)) ydl.add_info_extractor(Foo2IE(ydl)) @@ -779,6 +780,7 @@ class TestYoutubeDL(unittest.TestCase): ydl.extract_info('foo1:') downloaded = ydl.downloaded_info_dicts[0] self.assertEqual(downloaded['url'], TEST_URL) + self.assertEqual(downloaded['title'], 'foo1 title') if __name__ == '__main__': diff --git a/test/test_compat.py b/test/test_compat.py index b57424948..d6c54e135 100644 --- a/test/test_compat.py +++ b/test/test_compat.py @@ -27,11 +27,11 @@ from youtube_dl.compat import ( class TestCompat(unittest.TestCase): def test_compat_getenv(self): test_str = 'тест' - compat_setenv('YOUTUBE-DL-TEST', test_str) - self.assertEqual(compat_getenv('YOUTUBE-DL-TEST'), test_str) + compat_setenv('YOUTUBE_DL_COMPAT_GETENV', test_str) + self.assertEqual(compat_getenv('YOUTUBE_DL_COMPAT_GETENV'), test_str) def test_compat_setenv(self): - test_var = 'YOUTUBE-DL-TEST' + test_var = 'YOUTUBE_DL_COMPAT_SETENV' test_str = 'тест' compat_setenv(test_var, test_str) compat_getenv(test_var) diff --git a/test/test_download.py b/test/test_download.py index 30034f978..0e9f293b5 100644 --- a/test/test_download.py +++ b/test/test_download.py @@ -71,6 +71,18 @@ class TestDownload(unittest.TestCase): maxDiff = None + def __str__(self): + """Identify each test with the `add_ie` attribute, if available.""" + + def strclass(cls): + """From 2.7's unittest; 2.6 had _strclass so we can't import it.""" + return '%s.%s' % (cls.__module__, cls.__name__) + + add_ie = getattr(self, self._testMethodName).add_ie + return '%s (%s)%s:' % (self._testMethodName, + strclass(self.__class__), + ' [%s]' % add_ie if add_ie else '') + def setUp(self): self.defs = defs @@ -139,7 +151,7 @@ def generator(test_case, tname): try_num = 1 while True: try: - # We're not using .download here sine that is just a shim + # We're not using .download here since that is just a shim # for outside error handling, and returns the exit code # instead of the result dict. res_dict = ydl.extract_info( @@ -187,7 +199,16 @@ def generator(test_case, tname): self.assertEqual( test_case['playlist_duration_sum'], got_duration) - for tc in test_cases: + # Generalize both playlists and single videos to unified format for + # simplicity + if 'entries' not in res_dict: + res_dict['entries'] = [res_dict] + + for tc_num, tc in enumerate(test_cases): + tc_res_dict = res_dict['entries'][tc_num] + # First, check test cases' data against extracted data alone + expect_info_dict(self, tc_res_dict, tc.get('info_dict', {})) + # Now, check downloaded file consistency tc_filename = get_tc_filename(tc) if not test_case.get('params', {}).get('skip_download', False): self.assertTrue(os.path.exists(tc_filename), msg='Missing file ' + tc_filename) @@ -205,13 +226,14 @@ def generator(test_case, tname): if 'md5' in tc: md5_for_file = _file_md5(tc_filename) self.assertEqual(md5_for_file, tc['md5']) + # Finally, check test cases' data again but this time against + # extracted data from info JSON file written during processing info_json_fn = os.path.splitext(tc_filename)[0] + '.info.json' self.assertTrue( os.path.exists(info_json_fn), 'Missing info file %s' % info_json_fn) with io.open(info_json_fn, encoding='utf-8') as infof: info_dict = json.load(infof) - expect_info_dict(self, info_dict, tc.get('info_dict', {})) finally: try_rm_tcs_files() @@ -233,6 +255,8 @@ for n, test_case in enumerate(defs): i += 1 test_method = generator(test_case, tname) test_method.__name__ = str(tname) + ie_list = test_case.get('add_ie') + test_method.add_ie = ie_list and ','.join(ie_list) setattr(TestDownload, test_method.__name__, test_method) del test_method diff --git a/test/test_subtitles.py b/test/test_subtitles.py index 27e763edd..1b8de822a 100644 --- a/test/test_subtitles.py +++ b/test/test_subtitles.py @@ -21,7 +21,7 @@ from youtube_dl.extractor import ( NPOIE, ComedyCentralIE, NRKTVIE, - RaiTVIE, + RaiPlayIE, VikiIE, ThePlatformIE, ThePlatformFeedIE, @@ -258,9 +258,9 @@ class TestNRKSubtitles(BaseTestSubtitles): self.assertEqual(md5(subtitles['no']), '544fa917d3197fcbee64634559221cc2') -class TestRaiSubtitles(BaseTestSubtitles): - url = 'http://www.rai.tv/dl/RaiTV/programmi/media/ContentItem-cb27157f-9dd0-4aee-b788-b1f67643a391.html' - IE = RaiTVIE +class TestRaiPlaySubtitles(BaseTestSubtitles): + url = 'http://www.raiplay.it/video/2014/04/Report-del-07042014-cb27157f-9dd0-4aee-b788-b1f67643a391.html' + IE = RaiPlayIE def test_allsubtitles(self): self.DL.params['writesubtitles'] = True diff --git a/test/test_utils.py b/test/test_utils.py index 173c49514..aa4569b81 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -56,6 +56,7 @@ from youtube_dl.utils import ( read_batch_urls, sanitize_filename, sanitize_path, + expand_path, prepend_extension, replace_extension, remove_start, @@ -95,6 +96,8 @@ from youtube_dl.utils import ( from youtube_dl.compat import ( compat_chr, compat_etree_fromstring, + compat_getenv, + compat_setenv, compat_urlparse, compat_parse_qs, ) @@ -214,6 +217,18 @@ class TestUtil(unittest.TestCase): self.assertEqual(sanitize_path('./abc'), 'abc') self.assertEqual(sanitize_path('./../abc'), '..\\abc') + def test_expand_path(self): + def env(var): + return '%{0}%'.format(var) if sys.platform == 'win32' else '${0}'.format(var) + + compat_setenv('YOUTUBE_DL_EXPATH_PATH', 'expanded') + self.assertEqual(expand_path(env('YOUTUBE_DL_EXPATH_PATH')), 'expanded') + self.assertEqual(expand_path(env('HOME')), compat_getenv('HOME')) + self.assertEqual(expand_path('~'), compat_getenv('HOME')) + self.assertEqual( + expand_path('~/%s' % env('YOUTUBE_DL_EXPATH_PATH')), + '%s/expanded' % compat_getenv('HOME')) + def test_prepend_extension(self): self.assertEqual(prepend_extension('abc.ext', 'temp'), 'abc.temp.ext') self.assertEqual(prepend_extension('abc.ext', 'temp', 'ext'), 'abc.temp.ext') diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 13a3a909e..819b374ef 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -29,7 +29,6 @@ import random from .compat import ( compat_basestring, compat_cookiejar, - compat_expanduser, compat_get_terminal_size, compat_http_client, compat_kwargs, @@ -54,6 +53,7 @@ from .utils import ( encode_compat_str, encodeFilename, error_to_compat_str, + expand_path, ExtractorError, format_bytes, formatSeconds, @@ -672,8 +672,7 @@ class YoutubeDL(object): FORMAT_RE.format(numeric_field), r'%({0})s'.format(numeric_field), outtmpl) - tmpl = compat_expanduser(outtmpl) - filename = tmpl % template_dict + filename = expand_path(outtmpl % template_dict) # Temporary fix for #4787 # 'Treat' all problem characters by passing filename through preferredencoding # to workaround encoding issues with subprocess on python2 @ Windows @@ -837,6 +836,12 @@ class YoutubeDL(object): ie_result['url'], ie_key=ie_result.get('ie_key'), extra_info=extra_info, download=False, process=False) + # extract_info may return None when ignoreerrors is enabled and + # extraction failed with an error, don't crash and return early + # in this case + if not info: + return info + force_properties = dict( (k, v) for k, v in ie_result.items() if v is not None) for f in ('_type', 'url', 'ie_key'): @@ -845,11 +850,18 @@ class YoutubeDL(object): new_result = info.copy() new_result.update(force_properties) - assert new_result.get('_type') != 'url_transparent' + # Extracted info may not be a video result (i.e. + # info.get('_type', 'video') != video) but rather an url or + # url_transparent. In such cases outer metadata (from ie_result) + # should be propagated to inner one (info). For this to happen + # _type of info should be overridden with url_transparent. This + # fixes issue from https://github.com/rg3/youtube-dl/pull/11163. + if new_result.get('_type') == 'url': + new_result['_type'] = 'url_transparent' return self.process_ie_result( new_result, download=download, extra_info=extra_info) - elif result_type == 'playlist' or result_type == 'multi_video': + elif result_type in ('playlist', 'multi_video'): # We process each entry in the playlist playlist = ie_result.get('title') or ie_result.get('id') self.to_screen('[download] Downloading playlist: %s' % playlist) @@ -1872,6 +1884,7 @@ class YoutubeDL(object): """Download a given list of URLs.""" outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL) if (len(url_list) > 1 and + outtmpl != '-' and '%' not in outtmpl and self.params.get('max_downloads') != 1): raise SameFileError(outtmpl) @@ -2169,7 +2182,7 @@ class YoutubeDL(object): if opts_cookiefile is None: self.cookiejar = compat_cookiejar.CookieJar() else: - opts_cookiefile = compat_expanduser(opts_cookiefile) + opts_cookiefile = expand_path(opts_cookiefile) self.cookiejar = compat_cookiejar.MozillaCookieJar( opts_cookiefile) if os.access(opts_cookiefile, os.R_OK): diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index c482f9375..f15606568 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -16,7 +16,6 @@ from .options import ( parseOpts, ) from .compat import ( - compat_expanduser, compat_getpass, compat_shlex_split, workaround_optparse_bug9161, @@ -26,6 +25,7 @@ from .utils import ( decodeOption, DEFAULT_OUTTMPL, DownloadError, + expand_path, match_filter_func, MaxDownloadsReached, preferredencoding, @@ -88,7 +88,7 @@ def _real_main(argv=None): batchfd = sys.stdin else: batchfd = io.open( - compat_expanduser(opts.batchfile), + expand_path(opts.batchfile), 'r', encoding='utf-8', errors='ignore') batch_urls = read_batch_urls(batchfd) if opts.verbose: @@ -196,7 +196,7 @@ def _real_main(argv=None): if opts.playlistend not in (-1, None) and opts.playlistend < opts.playliststart: raise ValueError('Playlist end must be greater than playlist start') if opts.extractaudio: - if opts.audioformat not in ['best', 'aac', 'mp3', 'm4a', 'opus', 'vorbis', 'wav']: + if opts.audioformat not in ['best', 'aac', 'flac', 'mp3', 'm4a', 'opus', 'vorbis', 'wav']: parser.error('invalid audio format specified') if opts.audioquality: opts.audioquality = opts.audioquality.strip('k').strip('K') @@ -238,7 +238,7 @@ def _real_main(argv=None): any_getting = opts.geturl or opts.gettitle or opts.getid or opts.getthumbnail or opts.getdescription or opts.getfilename or opts.getformat or opts.getduration or opts.dumpjson or opts.dump_single_json any_printing = opts.print_json - download_archive_fn = compat_expanduser(opts.download_archive) if opts.download_archive is not None else opts.download_archive + download_archive_fn = expand_path(opts.download_archive) if opts.download_archive is not None else opts.download_archive # PostProcessors postprocessors = [] @@ -449,7 +449,7 @@ def _real_main(argv=None): try: if opts.load_info_filename is not None: - retcode = ydl.download_with_info_file(compat_expanduser(opts.load_info_filename)) + retcode = ydl.download_with_info_file(expand_path(opts.load_info_filename)) else: retcode = ydl.download(all_urls) except MaxDownloadsReached: diff --git a/youtube_dl/cache.py b/youtube_dl/cache.py index 5fe839eb1..7bdade1bd 100644 --- a/youtube_dl/cache.py +++ b/youtube_dl/cache.py @@ -8,8 +8,11 @@ import re import shutil import traceback -from .compat import compat_expanduser, compat_getenv -from .utils import write_json_file +from .compat import compat_getenv +from .utils import ( + expand_path, + write_json_file, +) class Cache(object): @@ -21,7 +24,7 @@ class Cache(object): if res is None: cache_root = compat_getenv('XDG_CACHE_HOME', '~/.cache') res = os.path.join(cache_root, 'youtube-dl') - return compat_expanduser(res) + return expand_path(res) def _get_cache_fn(self, section, key, dtype): assert re.match(r'^[a-zA-Z0-9_.-]+$', section), \ diff --git a/youtube_dl/compat.py b/youtube_dl/compat.py index 0c119e417..39527117f 100644 --- a/youtube_dl/compat.py +++ b/youtube_dl/compat.py @@ -2692,7 +2692,7 @@ else: userhome = pwent.pw_dir userhome = userhome.rstrip('/') return (userhome + path[i:]) or '/' - elif compat_os_name == 'nt' or compat_os_name == 'ce': + elif compat_os_name in ('nt', 'ce'): def compat_expanduser(path): """Expand ~ and ~user constructs. diff --git a/youtube_dl/downloader/__init__.py b/youtube_dl/downloader/__init__.py index 16952e359..2e485df9d 100644 --- a/youtube_dl/downloader/__init__.py +++ b/youtube_dl/downloader/__init__.py @@ -43,6 +43,9 @@ def get_suitable_downloader(info_dict, params={}): if ed.can_download(info_dict): return ed + if protocol.startswith('m3u8') and info_dict.get('is_live'): + return FFmpegFD + if protocol == 'm3u8' and params.get('hls_prefer_native') is True: return HlsFD diff --git a/youtube_dl/downloader/hls.py b/youtube_dl/downloader/hls.py index 4989abce1..d0a5f7ba4 100644 --- a/youtube_dl/downloader/hls.py +++ b/youtube_dl/downloader/hls.py @@ -34,7 +34,7 @@ class HlsFD(FragmentFD): def can_download(manifest, info_dict): UNSUPPORTED_FEATURES = ( r'#EXT-X-KEY:METHOD=(?!NONE|AES-128)', # encrypted streams [1] - r'#EXT-X-BYTERANGE', # playlists composed of byte ranges of media files [2] + # r'#EXT-X-BYTERANGE', # playlists composed of byte ranges of media files [2] # Live streams heuristic does not always work (e.g. geo restricted to Germany # http://hls-geo.daserste.de/i/videoportal/Film/c_620000/622873/format,716451,716457,716450,716458,716459,.mp4.csmil/index_4_av.m3u8?null=0) @@ -52,7 +52,9 @@ class HlsFD(FragmentFD): # 4. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.3.5 ) check_results = [not re.search(feature, manifest) for feature in UNSUPPORTED_FEATURES] - check_results.append(can_decrypt_frag or '#EXT-X-KEY:METHOD=AES-128' not in manifest) + is_aes128_enc = '#EXT-X-KEY:METHOD=AES-128' in manifest + check_results.append(can_decrypt_frag or not is_aes128_enc) + check_results.append(not (is_aes128_enc and r'#EXT-X-BYTERANGE' in manifest)) check_results.append(not info_dict.get('is_live')) return all(check_results) @@ -100,6 +102,7 @@ class HlsFD(FragmentFD): i = 0 media_sequence = 0 decrypt_info = {'METHOD': 'NONE'} + byte_range = {} frags_filenames = [] for line in s.splitlines(): line = line.strip() @@ -114,11 +117,14 @@ class HlsFD(FragmentFD): if extra_query: frag_url = update_url_query(frag_url, extra_query) count = 0 + headers = info_dict.get('http_headers', {}) + if byte_range: + headers['Range'] = 'bytes=%d-%d' % (byte_range['start'], byte_range['end']) while count <= fragment_retries: try: success = ctx['dl'].download(frag_filename, { 'url': frag_url, - 'http_headers': info_dict.get('http_headers'), + 'http_headers': headers, }) if not success: return False @@ -167,6 +173,13 @@ class HlsFD(FragmentFD): decrypt_info['KEY'] = self.ydl.urlopen(decrypt_info['URI']).read() elif line.startswith('#EXT-X-MEDIA-SEQUENCE'): media_sequence = int(line[22:]) + elif line.startswith('#EXT-X-BYTERANGE'): + splitted_byte_range = line[17:].split('@') + sub_range_start = int(splitted_byte_range[1]) if len(splitted_byte_range) == 2 else byte_range['end'] + byte_range = { + 'start': sub_range_start, + 'end': sub_range_start + int(splitted_byte_range[0]), + } self._finish_frag_download(ctx) diff --git a/youtube_dl/downloader/rtmp.py b/youtube_dl/downloader/rtmp.py index 9de6e70bb..b823b5171 100644 --- a/youtube_dl/downloader/rtmp.py +++ b/youtube_dl/downloader/rtmp.py @@ -169,7 +169,7 @@ class RtmpFD(FileDownloader): self.report_error('[rtmpdump] Could not connect to RTMP server.') return False - while (retval == RD_INCOMPLETE or retval == RD_FAILED) and not test and not live: + while retval in (RD_INCOMPLETE, RD_FAILED) and not test and not live: prevsize = os.path.getsize(encodeFilename(tmpfilename)) self.to_screen('[rtmpdump] %s bytes' % prevsize) time.sleep(5.0) # This seems to be needed diff --git a/youtube_dl/extractor/adn.py b/youtube_dl/extractor/adn.py new file mode 100644 index 000000000..66caf6a81 --- /dev/null +++ b/youtube_dl/extractor/adn.py @@ -0,0 +1,136 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import base64 +import json +import os + +from .common import InfoExtractor +from ..aes import aes_cbc_decrypt +from ..compat import compat_ord +from ..utils import ( + bytes_to_intlist, + ExtractorError, + float_or_none, + intlist_to_bytes, + srt_subtitles_timecode, + strip_or_none, +) + + +class ADNIE(InfoExtractor): + IE_DESC = 'Anime Digital Network' + _VALID_URL = r'https?://(?:www\.)?animedigitalnetwork\.fr/video/[^/]+/(?P\d+)' + _TEST = { + 'url': 'http://animedigitalnetwork.fr/video/blue-exorcist-kyoto-saga/7778-episode-1-debut-des-hostilites', + 'md5': 'e497370d847fd79d9d4c74be55575c7a', + 'info_dict': { + 'id': '7778', + 'ext': 'mp4', + 'title': 'Blue Exorcist - Kyôto Saga - Épisode 1', + 'description': 'md5:2f7b5aa76edbc1a7a92cedcda8a528d5', + } + } + + def _get_subtitles(self, sub_path, video_id): + if not sub_path: + return None + + enc_subtitles = self._download_webpage( + 'http://animedigitalnetwork.fr/' + sub_path, + video_id, fatal=False) + if not enc_subtitles: + return None + + # http://animedigitalnetwork.fr/components/com_vodvideo/videojs/adn-vjs.min.js + dec_subtitles = intlist_to_bytes(aes_cbc_decrypt( + bytes_to_intlist(base64.b64decode(enc_subtitles[24:])), + bytes_to_intlist(b'\nd\xaf\xd2J\xd0\xfc\xe1\xfc\xdf\xb61\xe8\xe1\xf0\xcc'), + bytes_to_intlist(base64.b64decode(enc_subtitles[:24])) + )) + subtitles_json = self._parse_json( + dec_subtitles[:-compat_ord(dec_subtitles[-1])], + None, fatal=False) + if not subtitles_json: + return None + + subtitles = {} + for sub_lang, sub in subtitles_json.items(): + srt = '' + for num, current in enumerate(sub): + start, end, text = ( + float_or_none(current.get('startTime')), + float_or_none(current.get('endTime')), + current.get('text')) + if start is None or end is None or text is None: + continue + srt += os.linesep.join( + ( + '%d' % num, + '%s --> %s' % ( + srt_subtitles_timecode(start), + srt_subtitles_timecode(end)), + text, + os.linesep, + )) + + if sub_lang == 'vostf': + sub_lang = 'fr' + subtitles.setdefault(sub_lang, []).extend([{ + 'ext': 'json', + 'data': json.dumps(sub), + }, { + 'ext': 'srt', + 'data': srt, + }]) + return subtitles + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + player_config = self._parse_json(self._search_regex( + r'playerConfig\s*=\s*({.+});', webpage, 'player config'), video_id) + + video_info = {} + video_info_str = self._search_regex( + r'videoInfo\s*=\s*({.+});', webpage, + 'video info', fatal=False) + if video_info_str: + video_info = self._parse_json( + video_info_str, video_id, fatal=False) or {} + + options = player_config.get('options') or {} + metas = options.get('metas') or {} + title = metas.get('title') or video_info['title'] + links = player_config.get('links') or {} + + formats = [] + for format_id, qualities in links.items(): + for load_balancer_url in qualities.values(): + load_balancer_data = self._download_json( + load_balancer_url, video_id, fatal=False) or {} + m3u8_url = load_balancer_data.get('location') + if not m3u8_url: + continue + m3u8_formats = self._extract_m3u8_formats( + m3u8_url, video_id, 'mp4', 'm3u8_native', + m3u8_id=format_id, fatal=False) + if format_id == 'vf': + for f in m3u8_formats: + f['language'] = 'fr' + formats.extend(m3u8_formats) + error = options.get('error') + if not formats and error: + raise ExtractorError('%s said: %s' % (self.IE_NAME, error), expected=True) + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': title, + 'description': strip_or_none(metas.get('summary') or video_info.get('resume')), + 'thumbnail': video_info.get('image'), + 'formats': formats, + 'subtitles': self.extract_subtitles(player_config.get('subtitles'), video_id), + 'episode': metas.get('subtitle') or video_info.get('videoTitle'), + 'series': video_info.get('playlistTitle'), + } diff --git a/youtube_dl/extractor/adobepass.py b/youtube_dl/extractor/adobepass.py index d4816abf5..100cf997f 100644 --- a/youtube_dl/extractor/adobepass.py +++ b/youtube_dl/extractor/adobepass.py @@ -41,6 +41,11 @@ MSO_INFO = { 'username_field': 'IDToken1', 'password_field': 'IDToken2', }, + 'Verizon': { + 'name': 'Verizon FiOS', + 'username_field': 'IDToken1', + 'password_field': 'IDToken2', + }, 'thr030': { 'name': '3 Rivers Communications' }, @@ -1384,40 +1389,72 @@ class AdobePassIE(InfoExtractor): # Comcast page flow varies by video site and whether you # are on Comcast's network. provider_redirect_page, urlh = provider_redirect_page_res - # Check for Comcast auto login if 'automatically signing you in' in provider_redirect_page: oauth_redirect_url = self._html_search_regex( r'window\.location\s*=\s*[\'"]([^\'"]+)', provider_redirect_page, 'oauth redirect') - # Just need to process the request. No useful data comes back self._download_webpage( oauth_redirect_url, video_id, 'Confirming auto login') else: if '
Resume' in mvpd_confirm_page: post_form(mvpd_confirm_page_res, 'Confirming Login') - + elif mso_id == 'Verizon': + # In general, if you're connecting from a Verizon-assigned IP, + # you will not actually pass your credentials. + provider_redirect_page, urlh = provider_redirect_page_res + if 'Please wait ...' in provider_redirect_page: + saml_redirect_url = self._html_search_regex( + r'self\.parent\.location=(["\'])(?P.+?)\1', + provider_redirect_page, + 'SAML Redirect URL', group='url') + saml_login_page = self._download_webpage( + saml_redirect_url, video_id, + 'Downloading SAML Login Page') + else: + saml_login_page_res = post_form( + provider_redirect_page_res, 'Logging in', { + mso_info['username_field']: username, + mso_info['password_field']: password, + }) + saml_login_page, urlh = saml_login_page_res + if 'Please try again.' in saml_login_page: + raise ExtractorError( + 'We\'re sorry, but either the User ID or Password entered is not correct.') + saml_login_url = self._search_regex( + r'xmlHttp\.open\("POST"\s*,\s*(["\'])(?P.+?)\1', + saml_login_page, 'SAML Login URL', group='url') + saml_response_json = self._download_json( + saml_login_url, video_id, 'Downloading SAML Response', + headers={'Content-Type': 'text/xml'}) + self._download_webpage( + saml_response_json['targetValue'], video_id, + 'Confirming Login', data=urlencode_postdata({ + 'SAMLResponse': saml_response_json['SAMLResponse'], + 'RelayState': saml_response_json['RelayState'] + }), headers={ + 'Content-Type': 'application/x-www-form-urlencoded' + }) else: - # Normal, non-Comcast flow provider_login_page_res = post_form( provider_redirect_page_res, 'Downloading Provider Login Page') mvpd_confirm_page_res = post_form(provider_login_page_res, 'Logging in', { @@ -1458,6 +1495,8 @@ class AdobePassIE(InfoExtractor): self._downloader.cache.store(self._MVPD_CACHE, requestor_id, {}) count += 1 continue + if '(?:history|aetv|mylifetime|lifetimemovieclub)\.com|fyi\.tv)/(?:shows/(?P[^/]+(?:/[^/]+){0,2})|movies/(?P[^/]+)(?:/full-movie)?)' + _VALID_URL = r'''(?x) + https?:// + (?:www\.)? + (?P + (?:history|aetv|mylifetime|lifetimemovieclub)\.com| + fyi\.tv + )/ + (?: + shows/(?P[^/]+(?:/[^/]+){0,2})| + movies/(?P[^/]+)(?:/full-movie)?| + specials/(?P[^/]+)/full-special + ) + ''' _TESTS = [{ 'url': 'http://www.history.com/shows/mountain-men/season-1/episode-1', 'md5': 'a97a65f7e823ae10e9244bc5433d5fe6', @@ -65,6 +77,9 @@ class AENetworksIE(AENetworksBaseIE): }, { 'url': 'https://www.lifetimemovieclub.com/movies/a-killer-among-us', 'only_matching': True + }, { + 'url': 'http://www.history.com/specials/sniper-into-the-kill-zone/full-special', + 'only_matching': True }] _DOMAIN_TO_REQUESTOR_ID = { 'history.com': 'HISTORY', @@ -75,8 +90,8 @@ class AENetworksIE(AENetworksBaseIE): } def _real_extract(self, url): - domain, show_path, movie_display_id = re.match(self._VALID_URL, url).groups() - display_id = show_path or movie_display_id + domain, show_path, movie_display_id, special_display_id = re.match(self._VALID_URL, url).groups() + display_id = show_path or movie_display_id or special_display_id webpage = self._download_webpage(url, display_id) if show_path: url_parts = show_path.split('/') @@ -107,7 +122,10 @@ class AENetworksIE(AENetworksBaseIE): } video_id = self._html_search_meta('aetn:VideoID', webpage) media_url = self._search_regex( - r"media_url\s*=\s*'([^']+)'", webpage, 'video url') + [r"media_url\s*=\s*'(?P[^']+)'", + r'data-media-url=(?P(?:https?:)?//[^\s>]+)', + r'data-media-url=(["\'])(?P(?:(?!\1).)+?)\1'], + webpage, 'video url', group='url') theplatform_metadata = self._download_theplatform_metadata(self._search_regex( r'https?://link.theplatform.com/s/([^?]+)', media_url, 'theplatform_path'), video_id) info = self._parse_theplatform_metadata(theplatform_metadata) diff --git a/youtube_dl/extractor/afreecatv.py b/youtube_dl/extractor/afreecatv.py index e0a0f7c57..78d29c861 100644 --- a/youtube_dl/extractor/afreecatv.py +++ b/youtube_dl/extractor/afreecatv.py @@ -4,15 +4,11 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..compat import ( - compat_urllib_parse_urlparse, - compat_urlparse, -) +from ..compat import compat_xpath from ..utils import ( + determine_ext, ExtractorError, int_or_none, - update_url_query, - xpath_element, xpath_text, ) @@ -43,7 +39,8 @@ class AfreecaTVIE(InfoExtractor): 'uploader': 'dailyapril', 'uploader_id': 'dailyapril', 'upload_date': '20160503', - } + }, + 'skip': 'Video is gone', }, { 'url': 'http://afbbs.afreecatv.com:8080/app/read_ucc_bbs.cgi?nStationNo=16711924&nTitleNo=36153164&szBjId=dailyapril&nBbsNo=18605867', 'info_dict': { @@ -71,6 +68,76 @@ class AfreecaTVIE(InfoExtractor): 'upload_date': '20160502', }, }], + 'skip': 'Video is gone', + }, { + 'url': 'http://vod.afreecatv.com/PLAYER/STATION/18650793', + 'info_dict': { + 'id': '18650793', + 'ext': 'mp4', + 'title': '오늘은 다르다! 쏘님의 우월한 위아래~ 댄스리액션!', + 'thumbnail': r're:^https?://.*\.jpg$', + 'uploader': '윈아디', + 'uploader_id': 'badkids', + 'duration': 107, + }, + 'params': { + 'skip_download': True, + }, + }, { + 'url': 'http://vod.afreecatv.com/PLAYER/STATION/10481652', + 'info_dict': { + 'id': '10481652', + 'title': "BJ유트루와 함께하는 '팅커벨 메이크업!'", + 'thumbnail': 're:^https?://(?:video|st)img.afreecatv.com/.*$', + 'uploader': 'dailyapril', + 'uploader_id': 'dailyapril', + 'duration': 6492, + }, + 'playlist_count': 2, + 'playlist': [{ + 'md5': 'd8b7c174568da61d774ef0203159bf97', + 'info_dict': { + 'id': '20160502_c4c62b9d_174361386_1', + 'ext': 'mp4', + 'title': "BJ유트루와 함께하는 '팅커벨 메이크업!' (part 1)", + 'thumbnail': 're:^https?://(?:video|st)img.afreecatv.com/.*$', + 'uploader': 'dailyapril', + 'uploader_id': 'dailyapril', + 'upload_date': '20160502', + 'duration': 3601, + }, + }, { + 'md5': '58f2ce7f6044e34439ab2d50612ab02b', + 'info_dict': { + 'id': '20160502_39e739bb_174361386_2', + 'ext': 'mp4', + 'title': "BJ유트루와 함께하는 '팅커벨 메이크업!' (part 2)", + 'thumbnail': 're:^https?://(?:video|st)img.afreecatv.com/.*$', + 'uploader': 'dailyapril', + 'uploader_id': 'dailyapril', + 'upload_date': '20160502', + 'duration': 2891, + }, + }], + 'params': { + 'skip_download': True, + }, + }, { + # non standard key + 'url': 'http://vod.afreecatv.com/PLAYER/STATION/20515605', + 'info_dict': { + 'id': '20170411_BE689A0E_190960999_1_2_h', + 'ext': 'mp4', + 'title': '혼자사는여자집', + 'thumbnail': 're:^https?://(?:video|st)img.afreecatv.com/.*$', + 'uploader': '♥이슬이', + 'uploader_id': 'dasl8121', + 'upload_date': '20170411', + 'duration': 213, + }, + 'params': { + 'skip_download': True, + }, }, { 'url': 'http://www.afreecatv.com/player/Player.swf?szType=szBjId=djleegoon&nStationNo=11273158&nBbsNo=13161095&nTitleNo=36327652', 'only_matching': True, @@ -85,42 +152,77 @@ class AfreecaTVIE(InfoExtractor): m = re.match(r'^(?P\d{8})_\w+_(?P\d+)$', key) if m: video_key['upload_date'] = m.group('upload_date') - video_key['part'] = m.group('part') + video_key['part'] = int(m.group('part')) return video_key def _real_extract(self, url): video_id = self._match_id(url) - parsed_url = compat_urllib_parse_urlparse(url) - info_url = compat_urlparse.urlunparse(parsed_url._replace( - netloc='afbbs.afreecatv.com:8080', - path='/api/video/get_video_info.php')) video_xml = self._download_xml( - update_url_query(info_url, {'nTitleNo': video_id}), video_id) + 'http://afbbs.afreecatv.com:8080/api/video/get_video_info.php', + video_id, query={'nTitleNo': video_id}) - if xpath_element(video_xml, './track/video/file') is None: + video_element = video_xml.findall(compat_xpath('./track/video'))[1] + if video_element is None or video_element.text is None: raise ExtractorError('Specified AfreecaTV video does not exist', expected=True) - title = xpath_text(video_xml, './track/title', 'title') + video_url = video_element.text.strip() + + title = xpath_text(video_xml, './track/title', 'title', fatal=True) + uploader = xpath_text(video_xml, './track/nickname', 'uploader') uploader_id = xpath_text(video_xml, './track/bj_id', 'uploader id') - duration = int_or_none(xpath_text(video_xml, './track/duration', - 'duration')) + duration = int_or_none(xpath_text( + video_xml, './track/duration', 'duration')) thumbnail = xpath_text(video_xml, './track/titleImage', 'thumbnail') - entries = [] - for i, video_file in enumerate(video_xml.findall('./track/video/file')): - video_key = self.parse_video_key(video_file.get('key', '')) - if not video_key: - continue - entries.append({ - 'id': '%s_%s' % (video_id, video_key.get('part', i + 1)), - 'title': title, - 'upload_date': video_key.get('upload_date'), - 'duration': int_or_none(video_file.get('duration')), - 'url': video_file.text, + common_entry = { + 'uploader': uploader, + 'uploader_id': uploader_id, + 'thumbnail': thumbnail, + } + + info = common_entry.copy() + info.update({ + 'id': video_id, + 'title': title, + 'duration': duration, + }) + + if not video_url: + entries = [] + file_elements = video_element.findall(compat_xpath('./file')) + one = len(file_elements) == 1 + for file_num, file_element in enumerate(file_elements, start=1): + file_url = file_element.text + if not file_url: + continue + key = file_element.get('key', '') + upload_date = self._search_regex( + r'^(\d{8})_', key, 'upload date', default=None) + file_duration = int_or_none(file_element.get('duration')) + format_id = key if key else '%s_%s' % (video_id, file_num) + formats = self._extract_m3u8_formats( + file_url, video_id, 'mp4', entry_protocol='m3u8_native', + m3u8_id='hls', + note='Downloading part %d m3u8 information' % file_num) + title = title if one else '%s (part %d)' % (title, file_num) + file_info = common_entry.copy() + file_info.update({ + 'id': format_id, + 'title': title, + 'upload_date': upload_date, + 'duration': file_duration, + 'formats': formats, + }) + entries.append(file_info) + entries_info = info.copy() + entries_info.update({ + '_type': 'multi_video', + 'entries': entries, }) + return entries_info info = { 'id': video_id, @@ -131,17 +233,18 @@ class AfreecaTVIE(InfoExtractor): 'thumbnail': thumbnail, } - if len(entries) > 1: - info['_type'] = 'multi_video' - info['entries'] = entries - elif len(entries) == 1: - info['url'] = entries[0]['url'] - info['upload_date'] = entries[0].get('upload_date') + if determine_ext(video_url) == 'm3u8': + info['formats'] = self._extract_m3u8_formats( + video_url, video_id, 'mp4', entry_protocol='m3u8_native', + m3u8_id='hls') else: - raise ExtractorError( - 'No files found for the specified AfreecaTV video, either' - ' the URL is incorrect or the video has been made private.', - expected=True) + app, playpath = video_url.split('mp4:') + info.update({ + 'url': app, + 'ext': 'flv', + 'play_path': 'mp4:' + playpath, + 'rtmp_live': True, # downloading won't end without this + }) return info diff --git a/youtube_dl/extractor/airmozilla.py b/youtube_dl/extractor/airmozilla.py index 0e0691879..9e38136b4 100644 --- a/youtube_dl/extractor/airmozilla.py +++ b/youtube_dl/extractor/airmozilla.py @@ -15,12 +15,12 @@ class AirMozillaIE(InfoExtractor): _VALID_URL = r'https?://air\.mozilla\.org/(?P[0-9a-z-]+)/?' _TEST = { 'url': 'https://air.mozilla.org/privacy-lab-a-meetup-for-privacy-minded-people-in-san-francisco/', - 'md5': '2e3e7486ba5d180e829d453875b9b8bf', + 'md5': '8d02f53ee39cf006009180e21df1f3ba', 'info_dict': { 'id': '6x4q2w', 'ext': 'mp4', 'title': 'Privacy Lab - a meetup for privacy minded people in San Francisco', - 'thumbnail': r're:https?://vid\.ly/(?P[0-9a-z-]+)/poster', + 'thumbnail': r're:https?://.*/poster\.jpg', 'description': 'Brings together privacy professionals and others interested in privacy at for-profits, non-profits, and NGOs in an effort to contribute to the state of the ecosystem...', 'timestamp': 1422487800, 'upload_date': '20150128', @@ -34,21 +34,13 @@ class AirMozillaIE(InfoExtractor): def _real_extract(self, url): display_id = self._match_id(url) webpage = self._download_webpage(url, display_id) - video_id = self._html_search_regex(r'//vid.ly/(.*?)/embed', webpage, 'id') + video_id = self._html_search_regex(r'//vid\.ly/(.*?)/embed', webpage, 'id') embed_script = self._download_webpage('https://vid.ly/{0}/embed'.format(video_id), video_id) - jwconfig = self._search_regex(r'\svar jwconfig = (\{.*?\});\s', embed_script, 'metadata') - metadata = self._parse_json(jwconfig, video_id) - - formats = [{ - 'url': source['file'], - 'ext': source['type'], - 'format_id': self._search_regex(r'&format=(.*)$', source['file'], 'video format'), - 'format': source['label'], - 'height': int(source['label'].rstrip('p')), - } for source in metadata['playlist'][0]['sources']] - self._sort_formats(formats) + jwconfig = self._parse_json(self._search_regex( + r'initCallback\((.*)\);', embed_script, 'metadata'), video_id)['config'] + info_dict = self._parse_jwplayer_data(jwconfig, video_id) view_count = int_or_none(self._html_search_regex( r'Views since archived: ([0-9]+)', webpage, 'view count', fatal=False)) @@ -58,17 +50,17 @@ class AirMozillaIE(InfoExtractor): r'Duration:\s*(\d+\s*hours?\s*\d+\s*minutes?)', webpage, 'duration', fatal=False)) - return { + info_dict.update({ 'id': video_id, 'title': self._og_search_title(webpage), - 'formats': formats, 'url': self._og_search_url(webpage), 'display_id': display_id, - 'thumbnail': metadata['playlist'][0].get('image'), 'description': self._og_search_description(webpage), 'timestamp': timestamp, 'location': self._html_search_regex(r'Location: (.*)', webpage, 'location', default=None), 'duration': duration, 'view_count': view_count, 'categories': re.findall(r'(.*?)', webpage), - } + }) + + return info_dict diff --git a/youtube_dl/extractor/allocine.py b/youtube_dl/extractor/allocine.py index 90f11d39f..cd533acfc 100644 --- a/youtube_dl/extractor/allocine.py +++ b/youtube_dl/extractor/allocine.py @@ -2,9 +2,13 @@ from __future__ import unicode_literals from .common import InfoExtractor +from ..compat import compat_str from ..utils import ( - remove_end, + int_or_none, qualities, + remove_end, + try_get, + unified_timestamp, url_basename, ) @@ -22,6 +26,10 @@ class AllocineIE(InfoExtractor): 'title': 'Astérix - Le Domaine des Dieux Teaser VF', 'description': 'md5:4a754271d9c6f16c72629a8a993ee884', 'thumbnail': r're:http://.*\.jpg', + 'duration': 39, + 'timestamp': 1404273600, + 'upload_date': '20140702', + 'view_count': int, }, }, { 'url': 'http://www.allocine.fr/video/player_gen_cmedia=19540403&cfilm=222257.html', @@ -33,6 +41,10 @@ class AllocineIE(InfoExtractor): 'title': 'Planes 2 Bande-annonce VF', 'description': 'Regardez la bande annonce du film Planes 2 (Planes 2 Bande-annonce VF). Planes 2, un film de Roberts Gannaway', 'thumbnail': r're:http://.*\.jpg', + 'duration': 69, + 'timestamp': 1385659800, + 'upload_date': '20131128', + 'view_count': int, }, }, { 'url': 'http://www.allocine.fr/video/player_gen_cmedia=19544709&cfilm=181290.html', @@ -44,6 +56,10 @@ class AllocineIE(InfoExtractor): 'title': 'Dragons 2 - Bande annonce finale VF', 'description': 'md5:6cdd2d7c2687d4c6aafe80a35e17267a', 'thumbnail': r're:http://.*\.jpg', + 'duration': 144, + 'timestamp': 1397589900, + 'upload_date': '20140415', + 'view_count': int, }, }, { 'url': 'http://www.allocine.fr/video/video-19550147/', @@ -69,34 +85,37 @@ class AllocineIE(InfoExtractor): r'data-model="([^"]+)"', webpage, 'data model', default=None) if model: model_data = self._parse_json(model, display_id) - - for video_url in model_data['sources'].values(): + video = model_data['videos'][0] + title = video['title'] + for video_url in video['sources'].values(): video_id, format_id = url_basename(video_url).split('_')[:2] formats.append({ 'format_id': format_id, 'quality': quality(format_id), 'url': video_url, }) - - title = model_data['title'] + duration = int_or_none(video.get('duration')) + view_count = int_or_none(video.get('view_count')) + timestamp = unified_timestamp(try_get( + video, lambda x: x['added_at']['date'], compat_str)) else: video_id = display_id media_data = self._download_json( 'http://www.allocine.fr/ws/AcVisiondataV5.ashx?media=%s' % video_id, display_id) + title = remove_end( + self._html_search_regex( + r'(?s)(.+?)', webpage, 'title').strip(), + ' - AlloCiné') for key, value in media_data['video'].items(): if not key.endswith('Path'): continue - format_id = key[:-len('Path')] formats.append({ 'format_id': format_id, 'quality': quality(format_id), 'url': value, }) - - title = remove_end(self._html_search_regex( - r'(?s)(.+?)', webpage, 'title' - ).strip(), ' - AlloCiné') + duration, view_count, timestamp = [None] * 3 self._sort_formats(formats) @@ -104,7 +123,10 @@ class AllocineIE(InfoExtractor): 'id': video_id, 'display_id': display_id, 'title': title, - 'thumbnail': self._og_search_thumbnail(webpage), - 'formats': formats, 'description': self._og_search_description(webpage), + 'thumbnail': self._og_search_thumbnail(webpage), + 'duration': duration, + 'timestamp': timestamp, + 'view_count': view_count, + 'formats': formats, } diff --git a/youtube_dl/extractor/arkena.py b/youtube_dl/extractor/arkena.py index 50ffb442d..4495ddbb0 100644 --- a/youtube_dl/extractor/arkena.py +++ b/youtube_dl/extractor/arkena.py @@ -93,8 +93,7 @@ class ArkenaIE(InfoExtractor): exts = (mimetype2ext(f.get('Type')), determine_ext(f_url, None)) if kind == 'm3u8' or 'm3u8' in exts: formats.extend(self._extract_m3u8_formats( - f_url, video_id, 'mp4', - entry_protocol='m3u8' if is_live else 'm3u8_native', + f_url, video_id, 'mp4', 'm3u8_native', m3u8_id=kind, fatal=False, live=is_live)) elif kind == 'flash' or 'f4m' in exts: formats.extend(self._extract_f4m_formats( diff --git a/youtube_dl/extractor/atresplayer.py b/youtube_dl/extractor/atresplayer.py index e3c669830..99af6dc5a 100644 --- a/youtube_dl/extractor/atresplayer.py +++ b/youtube_dl/extractor/atresplayer.py @@ -90,7 +90,8 @@ class AtresPlayerIE(InfoExtractor): request, None, 'Logging in as %s' % username) error = self._html_search_regex( - r'(?s)
    (.+?)
', response, 'error', default=None) + r'(?s)]+class="[^"]*\blist_error\b[^"]*">(.+?)', + response, 'error', default=None) if error: raise ExtractorError( 'Unable to login: %s' % error, expected=True) @@ -155,13 +156,17 @@ class AtresPlayerIE(InfoExtractor): if format_id == 'token' or not video_url.startswith('http'): continue if 'geodeswowsmpra3player' in video_url: - f4m_path = video_url.split('smil:', 1)[-1].split('free_', 1)[0] - f4m_url = 'http://drg.antena3.com/{0}hds/es/sd.f4m'.format(f4m_path) + # f4m_path = video_url.split('smil:', 1)[-1].split('free_', 1)[0] + # f4m_url = 'http://drg.antena3.com/{0}hds/es/sd.f4m'.format(f4m_path) # this videos are protected by DRM, the f4m downloader doesn't support them continue - else: - f4m_url = video_url[:-9] + '/manifest.f4m' - formats.extend(self._extract_f4m_formats(f4m_url, video_id, f4m_id='hds', fatal=False)) + video_url_hd = video_url.replace('free_es', 'es') + formats.extend(self._extract_f4m_formats( + video_url_hd[:-9] + '/manifest.f4m', video_id, f4m_id='hds', + fatal=False)) + formats.extend(self._extract_mpd_formats( + video_url_hd[:-9] + '/manifest.mpd', video_id, mpd_id='dash', + fatal=False)) self._sort_formats(formats) path_data = player.get('pathData') diff --git a/youtube_dl/extractor/atvat.py b/youtube_dl/extractor/atvat.py new file mode 100644 index 000000000..1584d53fc --- /dev/null +++ b/youtube_dl/extractor/atvat.py @@ -0,0 +1,73 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( + determine_ext, + int_or_none, + unescapeHTML, +) + + +class ATVAtIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?atv\.at/(?:[^/]+/){2}(?P[dv]\d+)' + _TESTS = [{ + 'url': 'http://atv.at/aktuell/di-210317-2005-uhr/v1698449/', + 'md5': 'c3b6b975fb3150fc628572939df205f2', + 'info_dict': { + 'id': '1698447', + 'ext': 'mp4', + 'title': 'DI, 21.03.17 | 20:05 Uhr 1/1', + } + }, { + 'url': 'http://atv.at/aktuell/meinrad-knapp/d8416/', + 'only_matching': True, + }] + + def _real_extract(self, url): + display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) + video_data = self._parse_json(unescapeHTML(self._search_regex( + r'class="[^"]*jsb_video/FlashPlayer[^"]*"[^>]+data-jsb="([^"]+)"', + webpage, 'player data')), display_id)['config']['initial_video'] + + video_id = video_data['id'] + video_title = video_data['title'] + + parts = [] + for part in video_data.get('parts', []): + part_id = part['id'] + part_title = part['title'] + + formats = [] + for source in part.get('sources', []): + source_url = source.get('src') + if not source_url: + continue + ext = determine_ext(source_url) + if ext == 'm3u8': + formats.extend(self._extract_m3u8_formats( + source_url, part_id, 'mp4', 'm3u8_native', + m3u8_id='hls', fatal=False)) + else: + formats.append({ + 'format_id': source.get('delivery'), + 'url': source_url, + }) + self._sort_formats(formats) + + parts.append({ + 'id': part_id, + 'title': part_title, + 'thumbnail': part.get('preview_image_url'), + 'duration': int_or_none(part.get('duration')), + 'is_live': part.get('is_livestream'), + 'formats': formats, + }) + + return { + '_type': 'multi_video', + 'id': video_id, + 'title': video_title, + 'entries': parts, + } diff --git a/youtube_dl/extractor/bbc.py b/youtube_dl/extractor/bbc.py index 8a2ed0ab6..dd65b8d86 100644 --- a/youtube_dl/extractor/bbc.py +++ b/youtube_dl/extractor/bbc.py @@ -361,7 +361,7 @@ class BBCCoUkIE(InfoExtractor): fmt.update({ 'width': width, 'height': height, - 'vbr': bitrate, + 'tbr': bitrate, 'vcodec': encoding, }) else: @@ -370,7 +370,7 @@ class BBCCoUkIE(InfoExtractor): 'acodec': encoding, 'vcodec': 'none', }) - if protocol == 'http': + if protocol in ('http', 'https'): # Direct link fmt.update({ 'url': href, @@ -389,6 +389,8 @@ class BBCCoUkIE(InfoExtractor): 'rtmp_live': False, 'ext': 'flv', }) + else: + continue formats.append(fmt) elif kind == 'captions': subtitles = self.extract_subtitles(media, programme_id) @@ -407,7 +409,7 @@ class BBCCoUkIE(InfoExtractor): description = smp_config['summary'] for item in smp_config['items']: kind = item['kind'] - if kind != 'programme' and kind != 'radioProgramme': + if kind not in ('programme', 'radioProgramme'): continue programme_id = item.get('vpid') duration = int_or_none(item.get('duration')) @@ -448,7 +450,7 @@ class BBCCoUkIE(InfoExtractor): for item in self._extract_items(playlist): kind = item.get('kind') - if kind != 'programme' and kind != 'radioProgramme': + if kind not in ('programme', 'radioProgramme'): continue title = playlist.find('./{%s}title' % self._EMP_PLAYLIST_NS).text description_el = playlist.find('./{%s}summary' % self._EMP_PLAYLIST_NS) diff --git a/youtube_dl/extractor/bellmedia.py b/youtube_dl/extractor/bellmedia.py index 1f5b6ed92..8820a3914 100644 --- a/youtube_dl/extractor/bellmedia.py +++ b/youtube_dl/extractor/bellmedia.py @@ -21,10 +21,11 @@ class BellMediaIE(InfoExtractor): animalplanet| bravo| mtv| - space + space| + etalk )\.ca| much\.com - )/.*?(?:\bvid=|-vid|~|%7E|/(?:episode)?)(?P[0-9]{6,})''' + )/.*?(?:\bvid(?:eoid)?=|-vid|~|%7E|/(?:episode)?)(?P[0-9]{6,})''' _TESTS = [{ 'url': 'http://www.ctv.ca/video/player?vid=706966', 'md5': 'ff2ebbeae0aa2dcc32a830c3fd69b7b0', @@ -58,6 +59,9 @@ class BellMediaIE(InfoExtractor): }, { 'url': 'http://www.ctv.ca/DCs-Legends-of-Tomorrow/Video/S2E11-Turncoat-vid1051430', 'only_matching': True, + }, { + 'url': 'http://www.etalk.ca/video?videoid=663455', + 'only_matching': True, }] _DOMAINS = { 'thecomedynetwork': 'comedy', @@ -65,6 +69,7 @@ class BellMediaIE(InfoExtractor): 'sciencechannel': 'discsci', 'investigationdiscovery': 'invdisc', 'animalplanet': 'aniplan', + 'etalk': 'ctv', } def _real_extract(self, url): diff --git a/youtube_dl/extractor/bostonglobe.py b/youtube_dl/extractor/bostonglobe.py new file mode 100644 index 000000000..57882fbee --- /dev/null +++ b/youtube_dl/extractor/bostonglobe.py @@ -0,0 +1,72 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor + +from ..utils import ( + extract_attributes, +) + + +class BostonGlobeIE(InfoExtractor): + _VALID_URL = r'(?i)https?://(?:www\.)?bostonglobe\.com/.*/(?P[^/]+)/\w+(?:\.html)?' + _TESTS = [ + { + 'url': 'http://www.bostonglobe.com/metro/2017/02/11/tree-finally-succumbs-disease-leaving-hole-neighborhood/h1b4lviqzMTIn9sVy8F3gP/story.html', + 'md5': '0a62181079c85c2d2b618c9a738aedaf', + 'info_dict': { + 'title': 'A tree finally succumbs to disease, leaving a hole in a neighborhood', + 'id': '5320421710001', + 'ext': 'mp4', + 'description': 'It arrived as a sapling when the Back Bay was in its infancy, a spindly American elm tamped down into a square of dirt cut into the brick sidewalk of 1880s Marlborough Street, no higher than the first bay window of the new brownstone behind it.', + 'timestamp': 1486877593, + 'upload_date': '20170212', + 'uploader_id': '245991542', + }, + }, + { + # Embedded youtube video; we hand it off to the Generic extractor. + 'url': 'https://www.bostonglobe.com/lifestyle/names/2017/02/17/does-ben-affleck-play-matt-damon-favorite-version-batman/ruqkc9VxKBYmh5txn1XhSI/story.html', + 'md5': '582b40327089d5c0c949b3c54b13c24b', + 'info_dict': { + 'title': "Who Is Matt Damon's Favorite Batman?", + 'id': 'ZW1QCnlA6Qc', + 'ext': 'mp4', + 'upload_date': '20170217', + 'description': 'md5:3b3dccb9375867e0b4d527ed87d307cb', + 'uploader': 'The Late Late Show with James Corden', + 'uploader_id': 'TheLateLateShow', + }, + 'expected_warnings': ['404'], + }, + ] + + def _real_extract(self, url): + page_id = self._match_id(url) + webpage = self._download_webpage(url, page_id) + + page_title = self._og_search_title(webpage, default=None) + + # .*? - ]+ - src=["\'](?:https?:)?//players\.brightcove\.net/ - (\d+)/([^/]+)_([^/]+)/index(?:\.min)?\.js + # Look for