Merge branch 'master' into fix.25.12.2018

# Conflicts:
#	youtube_dl/version.py
This commit is contained in:
Avi Peretz 2019-04-24 16:28:40 +03:00
commit 471056460f
51 changed files with 1779 additions and 666 deletions

View File

@ -6,8 +6,8 @@
--- ---
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2019.03.18*. If it's not, read [this FAQ entry](https://github.com/ytdl-org/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. ### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2019.04.24*. If it's not, read [this FAQ entry](https://github.com/ytdl-org/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2019.03.18** - [ ] I've **verified** and **I assure** that I'm running youtube-dl **2019.04.24**
### Before submitting an *issue* make sure you have: ### Before submitting an *issue* make sure you have:
- [ ] At least skimmed through the [README](https://github.com/ytdl-org/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/ytdl-org/youtube-dl#faq) and [BUGS](https://github.com/ytdl-org/youtube-dl#bugs) sections - [ ] At least skimmed through the [README](https://github.com/ytdl-org/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/ytdl-org/youtube-dl#faq) and [BUGS](https://github.com/ytdl-org/youtube-dl#bugs) sections
@ -36,7 +36,7 @@ Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl
[debug] User config: [] [debug] User config: []
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
[debug] youtube-dl version 2019.03.18 [debug] youtube-dl version 2019.04.24
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
[debug] Proxy map: {} [debug] Proxy map: {}

View File

@ -1,3 +1,96 @@
version 2019.04.24
Extractors
* [youtube] Fix extraction (#20758, #20759, #20761, #20762, #20764, #20766,
#20767, #20769, #20771, #20768, #20770)
* [toutv] Fix extraction and extract series info (#20757)
+ [vrv] Add support for movie listings (#19229)
+ [youtube] Print error when no data is available (#20737)
+ [soundcloud] Add support for new rendition and improve extraction (#20699)
+ [ooyala] Add support for geo verification proxy
+ [nrl] Add support for nrl.com (#15991)
+ [vimeo] Extract live archive source format (#19144)
+ [vimeo] Add support for live streams and improve info extraction (#19144)
+ [ntvcojp] Add support for cu.ntv.co.jp
+ [nhk] Extract RTMPT format
+ [nhk] Add support for audio URLs
+ [udemy] Add another course id extraction pattern (#20491)
+ [openload] Add support for oload.services (#20691)
+ [openload] Add support for openloed.co (#20691, #20693)
* [bravotv] Fix extraction (#19213)
version 2019.04.17
Extractors
* [openload] Randomize User-Agent (closes #20688)
+ [openload] Add support for oladblock domains (#20471)
* [adn] Fix subtitle extraction (#12724)
+ [aol] Add support for localized websites
+ [yahoo] Add support GYAO episode URLs
+ [yahoo] Add support for streaming.yahoo.co.jp (#5811, #7098)
+ [yahoo] Add support for gyao.yahoo.co.jp
* [aenetworks] Fix history topic extraction and extract more formats
+ [cbs] Extract smpte and vtt subtitles
+ [streamango] Add support for streamcherry.com (#20592)
+ [yourporn] Add support for sxyprn.com (#20646)
* [mgtv] Fix extraction (#20650)
* [linkedin:learning] Use urljoin for form action URL (#20431)
+ [gdc] Add support for kaltura embeds (#20575)
* [dispeak] Improve mp4 bitrate extraction
* [kaltura] Sanitize embed URLs
* [jwplatfom] Do not match manifest URLs (#20596)
* [aol] Restrict URL regular expression and improve format extraction
+ [tiktok] Add support for new URL schema (#20573)
+ [stv:player] Add support for player.stv.tv (#20586)
version 2019.04.07
Core
+ [downloader/external] Pass rtmp_conn to ffmpeg
Extractors
+ [ruutu] Add support for audio podcasts (#20473, #20545)
+ [xvideos] Extract all thumbnails (#20432)
+ [platzi] Add support for platzi.com (#20562)
* [dvtv] Fix extraction (#18514, #19174)
+ [vrv] Add basic support for individual movie links (#19229)
+ [bfi:player] Add support for player.bfi.org.uk (#19235)
* [hbo] Fix extraction and extract subtitles (#14629, #13709)
* [youtube] Extract srv[1-3] subtitle formats (#20566)
* [adultswim] Fix extraction (#18025)
* [teamcoco] Fix extraction and add suport for subdomains (#17099, #20339)
* [adn] Fix subtitle compatibility with ffmpeg
* [adn] Fix extraction and add support for positioning styles (#20549)
* [vk] Use unique video id (#17848)
* [newstube] Fix extraction
* [rtl2] Actualize extraction
+ [adobeconnect] Add support for adobeconnect.com (#20283)
+ [gaia] Add support for authentication (#14605)
+ [mediasite] Add support for dashed ids and named catalogs (#20531)
version 2019.04.01
Core
* [utils] Improve int_or_none and float_or_none (#20403)
* Check for valid --min-sleep-interval when --max-sleep-interval is specified
(#20435)
Extractors
+ [weibo] Extend URL regular expression (#20496)
+ [xhamster] Add support for xhamster.one (#20508)
+ [mediasite] Add support for catalogs (#20507)
+ [teamtreehouse] Add support for teamtreehouse.com (#9836)
+ [ina] Add support for audio URLs
* [ina] Improve extraction
* [cwtv] Fix episode number extraction (#20461)
* [npo] Improve DRM detection
+ [pornhub] Add support for DASH formats (#20403)
* [svtplay] Update API endpoint (#20430)
version 2019.03.18 version 2019.03.18
Core Core

View File

@ -642,6 +642,7 @@ The simplest case is requesting a specific format, for example with `-f 22` you
You can also use a file extension (currently `3gp`, `aac`, `flv`, `m4a`, `mp3`, `mp4`, `ogg`, `wav`, `webm` are supported) to download the best quality format of a particular file extension served as a single file, e.g. `-f webm` will download the best quality format with the `webm` extension served as a single file. You can also use a file extension (currently `3gp`, `aac`, `flv`, `m4a`, `mp3`, `mp4`, `ogg`, `wav`, `webm` are supported) to download the best quality format of a particular file extension served as a single file, e.g. `-f webm` will download the best quality format with the `webm` extension served as a single file.
You can also use special names to select particular edge case formats: You can also use special names to select particular edge case formats:
- `best`: Select the best quality format represented by a single file with video and audio. - `best`: Select the best quality format represented by a single file with video and audio.
- `worst`: Select the worst quality format represented by a single file with video and audio. - `worst`: Select the worst quality format represented by a single file with video and audio.
- `bestvideo`: Select the best quality video-only format (e.g. DASH video). May not be available. - `bestvideo`: Select the best quality video-only format (e.g. DASH video). May not be available.
@ -658,6 +659,7 @@ If you want to download several formats of the same video use a comma as a separ
You can also filter the video formats by putting a condition in brackets, as in `-f "best[height=720]"` (or `-f "[filesize>10M]"`). You can also filter the video formats by putting a condition in brackets, as in `-f "best[height=720]"` (or `-f "[filesize>10M]"`).
The following numeric meta fields can be used with comparisons `<`, `<=`, `>`, `>=`, `=` (equals), `!=` (not equals): The following numeric meta fields can be used with comparisons `<`, `<=`, `>`, `>=`, `=` (equals), `!=` (not equals):
- `filesize`: The number of bytes, if known in advance - `filesize`: The number of bytes, if known in advance
- `width`: Width of the video, if known - `width`: Width of the video, if known
- `height`: Height of the video, if known - `height`: Height of the video, if known
@ -668,6 +670,7 @@ The following numeric meta fields can be used with comparisons `<`, `<=`, `>`, `
- `fps`: Frame rate - `fps`: Frame rate
Also filtering work for comparisons `=` (equals), `^=` (starts with), `$=` (ends with), `*=` (contains) and following string meta fields: Also filtering work for comparisons `=` (equals), `^=` (starts with), `$=` (ends with), `*=` (contains) and following string meta fields:
- `ext`: File extension - `ext`: File extension
- `acodec`: Name of the audio codec in use - `acodec`: Name of the audio codec in use
- `vcodec`: Name of the video codec in use - `vcodec`: Name of the video codec in use
@ -697,7 +700,7 @@ Note that on Windows you may need to use double quotes instead of single.
# Download best mp4 format available or any other best if no mp4 available # Download best mp4 format available or any other best if no mp4 available
$ youtube-dl -f 'bestvideo[ext=mp4]+bestaudio[ext=m4a]/best[ext=mp4]/best' $ youtube-dl -f 'bestvideo[ext=mp4]+bestaudio[ext=m4a]/best[ext=mp4]/best'
# Download best format available but not better that 480p # Download best format available but no better than 480p
$ youtube-dl -f 'bestvideo[height<=480]+bestaudio/best[height<=480]' $ youtube-dl -f 'bestvideo[height<=480]+bestaudio/best[height<=480]'
# Download best video only format but no bigger than 50 MB # Download best video only format but no bigger than 50 MB

View File

@ -28,6 +28,7 @@
- **acast:channel** - **acast:channel**
- **AddAnime** - **AddAnime**
- **ADN**: Anime Digital Network - **ADN**: Anime Digital Network
- **AdobeConnect**
- **AdobeTV** - **AdobeTV**
- **AdobeTVChannel** - **AdobeTVChannel**
- **AdobeTVShow** - **AdobeTVShow**
@ -45,6 +46,7 @@
- **anderetijden**: npo.nl, ntr.nl, omroepwnl.nl, zapp.nl and npo3.nl - **anderetijden**: npo.nl, ntr.nl, omroepwnl.nl, zapp.nl and npo3.nl
- **AnimeOnDemand** - **AnimeOnDemand**
- **Anvato** - **Anvato**
- **aol.com**
- **APA** - **APA**
- **Aparat** - **Aparat**
- **AppleConnect** - **AppleConnect**
@ -101,6 +103,7 @@
- **Bellator** - **Bellator**
- **BellMedia** - **BellMedia**
- **Bet** - **Bet**
- **bfi:player**
- **Bigflix** - **Bigflix**
- **Bild**: Bild.de - **Bild**: Bild.de
- **BiliBili** - **BiliBili**
@ -198,6 +201,7 @@
- **CSpan**: C-SPAN - **CSpan**: C-SPAN
- **CtsNews**: 華視新聞 - **CtsNews**: 華視新聞
- **CTVNews** - **CTVNews**
- **cu.ntv.co.jp**: Nippon Television Network
- **Culturebox** - **Culturebox**
- **CultureUnplugged** - **CultureUnplugged**
- **curiositystream** - **curiositystream**
@ -345,7 +349,6 @@
- **Groupon** - **Groupon**
- **Hark** - **Hark**
- **hbo** - **hbo**
- **hbo:episode**
- **HearThisAt** - **HearThisAt**
- **Heise** - **Heise**
- **HellPorno** - **HellPorno**
@ -488,6 +491,8 @@
- **Medialaan** - **Medialaan**
- **Mediaset** - **Mediaset**
- **Mediasite** - **Mediasite**
- **MediasiteCatalog**
- **MediasiteNamedCatalog**
- **Medici** - **Medici**
- **megaphone.fm**: megaphone.fm embedded players - **megaphone.fm**: megaphone.fm embedded players
- **Meipai**: 美拍 - **Meipai**: 美拍
@ -620,6 +625,7 @@
- **NRKTVEpisodes** - **NRKTVEpisodes**
- **NRKTVSeason** - **NRKTVSeason**
- **NRKTVSeries** - **NRKTVSeries**
- **NRLTV**
- **ntv.ru** - **ntv.ru**
- **Nuvid** - **Nuvid**
- **NYTimes** - **NYTimes**
@ -629,7 +635,6 @@
- **OdaTV** - **OdaTV**
- **Odnoklassniki** - **Odnoklassniki**
- **OktoberfestTV** - **OktoberfestTV**
- **on.aol.com**
- **OnDemandKorea** - **OnDemandKorea**
- **onet.pl** - **onet.pl**
- **onet.tv** - **onet.tv**
@ -670,6 +675,8 @@
- **Piksel** - **Piksel**
- **Pinkbike** - **Pinkbike**
- **Pladform** - **Pladform**
- **Platzi**
- **PlatziCourse**
- **play.fm** - **play.fm**
- **PlayPlusTV** - **PlayPlusTV**
- **PlaysTV** - **PlaysTV**
@ -848,6 +855,7 @@
- **StreamCZ** - **StreamCZ**
- **StreetVoice** - **StreetVoice**
- **StretchInternet** - **StretchInternet**
- **stv:player**
- **SunPorno** - **SunPorno**
- **SVT** - **SVT**
- **SVTPage** - **SVTPage**
@ -869,6 +877,7 @@
- **teachertube:user:collection**: teachertube.com user and collection videos - **teachertube:user:collection**: teachertube.com user and collection videos
- **TeachingChannel** - **TeachingChannel**
- **Teamcoco** - **Teamcoco**
- **TeamTreeHouse**
- **TechTalks** - **TechTalks**
- **techtv.mit.edu** - **techtv.mit.edu**
- **ted** - **ted**
@ -1118,6 +1127,8 @@
- **XVideos** - **XVideos**
- **XXXYMovies** - **XXXYMovies**
- **Yahoo**: Yahoo screen and movies - **Yahoo**: Yahoo screen and movies
- **yahoo:gyao**
- **yahoo:gyao:player**
- **YandexDisk** - **YandexDisk**
- **yandexmusic:album**: Яндекс.Музыка - Альбом - **yandexmusic:album**: Яндекс.Музыка - Альбом
- **yandexmusic:playlist**: Яндекс.Музыка - Плейлист - **yandexmusic:playlist**: Яндекс.Музыка - Плейлист

View File

@ -309,6 +309,8 @@ class YoutubeDL(object):
The following options are used by the post processors: The following options are used by the post processors:
prefer_ffmpeg: If False, use avconv instead of ffmpeg if both are available, prefer_ffmpeg: If False, use avconv instead of ffmpeg if both are available,
otherwise prefer ffmpeg. otherwise prefer ffmpeg.
ffmpeg_location: Location of the ffmpeg/avconv binary; either the path
to the binary or its containing directory.
postprocessor_args: A list of additional command-line arguments for the postprocessor_args: A list of additional command-line arguments for the
postprocessor. postprocessor.

View File

@ -289,6 +289,7 @@ class FFmpegFD(ExternalFD):
tc_url = info_dict.get('tc_url') tc_url = info_dict.get('tc_url')
flash_version = info_dict.get('flash_version') flash_version = info_dict.get('flash_version')
live = info_dict.get('rtmp_live', False) live = info_dict.get('rtmp_live', False)
conn = info_dict.get('rtmp_conn')
if player_url is not None: if player_url is not None:
args += ['-rtmp_swfverify', player_url] args += ['-rtmp_swfverify', player_url]
if page_url is not None: if page_url is not None:
@ -303,6 +304,11 @@ class FFmpegFD(ExternalFD):
args += ['-rtmp_flashver', flash_version] args += ['-rtmp_flashver', flash_version]
if live: if live:
args += ['-rtmp_live', 'live'] args += ['-rtmp_live', 'live']
if isinstance(conn, list):
for entry in conn:
args += ['-rtmp_conn', entry]
elif isinstance(conn, compat_str):
args += ['-rtmp_conn', conn]
args += ['-i', url, '-c', 'copy'] args += ['-i', url, '-c', 'copy']

View File

@ -21,7 +21,6 @@ from ..utils import (
intlist_to_bytes, intlist_to_bytes,
long_to_bytes, long_to_bytes,
pkcs1pad, pkcs1pad,
srt_subtitles_timecode,
strip_or_none, strip_or_none,
urljoin, urljoin,
) )
@ -42,6 +41,18 @@ class ADNIE(InfoExtractor):
} }
_BASE_URL = 'http://animedigitalnetwork.fr' _BASE_URL = 'http://animedigitalnetwork.fr'
_RSA_KEY = (0xc35ae1e4356b65a73b551493da94b8cb443491c0aa092a357a5aee57ffc14dda85326f42d716e539a34542a0d3f363adf16c5ec222d713d5997194030ee2e4f0d1fb328c01a81cf6868c090d50de8e169c6b13d1675b9eeed1cbc51e1fffca9b38af07f37abd790924cd3bee59d0257cfda4fe5f3f0534877e21ce5821447d1b, 65537) _RSA_KEY = (0xc35ae1e4356b65a73b551493da94b8cb443491c0aa092a357a5aee57ffc14dda85326f42d716e539a34542a0d3f363adf16c5ec222d713d5997194030ee2e4f0d1fb328c01a81cf6868c090d50de8e169c6b13d1675b9eeed1cbc51e1fffca9b38af07f37abd790924cd3bee59d0257cfda4fe5f3f0534877e21ce5821447d1b, 65537)
_POS_ALIGN_MAP = {
'start': 1,
'end': 3,
}
_LINE_ALIGN_MAP = {
'middle': 8,
'end': 4,
}
@staticmethod
def _ass_subtitles_timecode(seconds):
return '%01d:%02d:%02d.%02d' % (seconds / 3600, (seconds % 3600) / 60, seconds % 60, (seconds % 1) * 100)
def _get_subtitles(self, sub_path, video_id): def _get_subtitles(self, sub_path, video_id):
if not sub_path: if not sub_path:
@ -49,14 +60,19 @@ class ADNIE(InfoExtractor):
enc_subtitles = self._download_webpage( enc_subtitles = self._download_webpage(
urljoin(self._BASE_URL, sub_path), urljoin(self._BASE_URL, sub_path),
video_id, fatal=False) video_id, 'Downloading subtitles location', fatal=False) or '{}'
subtitle_location = (self._parse_json(enc_subtitles, video_id, fatal=False) or {}).get('location')
if subtitle_location:
enc_subtitles = self._download_webpage(
urljoin(self._BASE_URL, subtitle_location),
video_id, 'Downloading subtitles data', fatal=False)
if not enc_subtitles: if not enc_subtitles:
return None return None
# http://animedigitalnetwork.fr/components/com_vodvideo/videojs/adn-vjs.min.js # http://animedigitalnetwork.fr/components/com_vodvideo/videojs/adn-vjs.min.js
dec_subtitles = intlist_to_bytes(aes_cbc_decrypt( dec_subtitles = intlist_to_bytes(aes_cbc_decrypt(
bytes_to_intlist(compat_b64decode(enc_subtitles[24:])), bytes_to_intlist(compat_b64decode(enc_subtitles[24:])),
bytes_to_intlist(binascii.unhexlify(self._K + '9032ad7083106400')), bytes_to_intlist(binascii.unhexlify(self._K + '4421de0a5f0814ba')),
bytes_to_intlist(compat_b64decode(enc_subtitles[:24])) bytes_to_intlist(compat_b64decode(enc_subtitles[:24]))
)) ))
subtitles_json = self._parse_json( subtitles_json = self._parse_json(
@ -67,23 +83,27 @@ class ADNIE(InfoExtractor):
subtitles = {} subtitles = {}
for sub_lang, sub in subtitles_json.items(): for sub_lang, sub in subtitles_json.items():
srt = '' ssa = '''[Script Info]
for num, current in enumerate(sub): ScriptType:V4.00
start, end, text = ( [V4 Styles]
Format: Name,Fontname,Fontsize,PrimaryColour,SecondaryColour,TertiaryColour,BackColour,Bold,Italic,BorderStyle,Outline,Shadow,Alignment,MarginL,MarginR,MarginV,AlphaLevel,Encoding
Style: Default,Arial,18,16777215,16777215,16777215,0,-1,0,1,1,0,2,20,20,20,0,0
[Events]
Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text'''
for current in sub:
start, end, text, line_align, position_align = (
float_or_none(current.get('startTime')), float_or_none(current.get('startTime')),
float_or_none(current.get('endTime')), float_or_none(current.get('endTime')),
current.get('text')) current.get('text'), current.get('lineAlign'),
current.get('positionAlign'))
if start is None or end is None or text is None: if start is None or end is None or text is None:
continue continue
srt += os.linesep.join( alignment = self._POS_ALIGN_MAP.get(position_align, 2) + self._LINE_ALIGN_MAP.get(line_align, 0)
( ssa += os.linesep + 'Dialogue: Marked=0,%s,%s,Default,,0,0,0,,%s%s' % (
'%d' % num, self._ass_subtitles_timecode(start),
'%s --> %s' % ( self._ass_subtitles_timecode(end),
srt_subtitles_timecode(start), '{\\a%d}' % alignment if alignment != 2 else '',
srt_subtitles_timecode(end)), text.replace('\n', '\\N').replace('<i>', '{\\i1}').replace('</i>', '{\\i0}'))
text,
os.linesep,
))
if sub_lang == 'vostf': if sub_lang == 'vostf':
sub_lang = 'fr' sub_lang = 'fr'
@ -91,8 +111,8 @@ class ADNIE(InfoExtractor):
'ext': 'json', 'ext': 'json',
'data': json.dumps(sub), 'data': json.dumps(sub),
}, { }, {
'ext': 'srt', 'ext': 'ssa',
'data': srt, 'data': ssa,
}]) }])
return subtitles return subtitles
@ -100,7 +120,15 @@ class ADNIE(InfoExtractor):
video_id = self._match_id(url) video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
player_config = self._parse_json(self._search_regex( player_config = self._parse_json(self._search_regex(
r'playerConfig\s*=\s*({.+});', webpage, 'player config'), video_id) r'playerConfig\s*=\s*({.+});', webpage,
'player config', default='{}'), video_id, fatal=False)
if not player_config:
config_url = urljoin(self._BASE_URL, self._search_regex(
r'(?:id="player"|class="[^"]*adn-player-container[^"]*")[^>]+data-url="([^"]+)"',
webpage, 'config url'))
player_config = self._download_json(
config_url, video_id,
'Downloading player config JSON metadata')['player']
video_info = {} video_info = {}
video_info_str = self._search_regex( video_info_str = self._search_regex(
@ -129,12 +157,15 @@ class ADNIE(InfoExtractor):
encrypted_message = long_to_bytes(pow(bytes_to_long(padded_message), e, n)) encrypted_message = long_to_bytes(pow(bytes_to_long(padded_message), e, n))
authorization = base64.b64encode(encrypted_message).decode() authorization = base64.b64encode(encrypted_message).decode()
links_data = self._download_json( links_data = self._download_json(
urljoin(self._BASE_URL, links_url), video_id, headers={ urljoin(self._BASE_URL, links_url), video_id,
'Downloading links JSON metadata', headers={
'Authorization': 'Bearer ' + authorization, 'Authorization': 'Bearer ' + authorization,
}) })
links = links_data.get('links') or {} links = links_data.get('links') or {}
metas = metas or links_data.get('meta') or {} metas = metas or links_data.get('meta') or {}
sub_path = (sub_path or links_data.get('subtitles')) + '&token=' + token sub_path = sub_path or links_data.get('subtitles') or \
'index.php?option=com_vodapi&task=subtitles.getJSON&format=json&id=' + video_id
sub_path += '&token=' + token
error = links_data.get('error') error = links_data.get('error')
title = metas.get('title') or video_info['title'] title = metas.get('title') or video_info['title']
@ -142,9 +173,11 @@ class ADNIE(InfoExtractor):
for format_id, qualities in links.items(): for format_id, qualities in links.items():
if not isinstance(qualities, dict): if not isinstance(qualities, dict):
continue continue
for load_balancer_url in qualities.values(): for quality, load_balancer_url in qualities.items():
load_balancer_data = self._download_json( load_balancer_data = self._download_json(
load_balancer_url, video_id, fatal=False) or {} load_balancer_url, video_id,
'Downloading %s %s JSON metadata' % (format_id, quality),
fatal=False) or {}
m3u8_url = load_balancer_data.get('location') m3u8_url = load_balancer_data.get('location')
if not m3u8_url: if not m3u8_url:
continue continue

View File

@ -0,0 +1,37 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..compat import (
compat_parse_qs,
compat_urlparse,
)
class AdobeConnectIE(InfoExtractor):
_VALID_URL = r'https?://\w+\.adobeconnect\.com/(?P<id>[\w-]+)'
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
title = self._html_search_regex(r'<title>(.+?)</title>', webpage, 'title')
qs = compat_parse_qs(self._search_regex(r"swfUrl\s*=\s*'([^']+)'", webpage, 'swf url').split('?')[1])
is_live = qs.get('isLive', ['false'])[0] == 'true'
formats = []
for con_string in qs['conStrings'][0].split(','):
formats.append({
'format_id': con_string.split('://')[0],
'app': compat_urlparse.quote('?' + con_string.split('?')[1] + 'flvplayerapp/' + qs['appInstance'][0]),
'ext': 'flv',
'play_path': 'mp4:' + qs['streamName'][0],
'rtmp_conn': 'S:' + qs['ticket'][0],
'rtmp_live': is_live,
'url': con_string,
})
return {
'id': video_id,
'title': self._live_title(title) if is_live else title,
'formats': formats,
'is_live': is_live,
}

View File

@ -1,13 +1,19 @@
# coding: utf-8 # coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import json
import re import re
from .turner import TurnerBaseIE from .turner import TurnerBaseIE
from ..utils import ( from ..utils import (
determine_ext,
float_or_none,
int_or_none, int_or_none,
mimetype2ext,
parse_age_limit,
parse_iso8601,
strip_or_none, strip_or_none,
url_or_none, try_get,
) )
@ -21,8 +27,8 @@ class AdultSwimIE(TurnerBaseIE):
'ext': 'mp4', 'ext': 'mp4',
'title': 'Rick and Morty - Pilot', 'title': 'Rick and Morty - Pilot',
'description': 'Rick moves in with his daughter\'s family and establishes himself as a bad influence on his grandson, Morty.', 'description': 'Rick moves in with his daughter\'s family and establishes himself as a bad influence on his grandson, Morty.',
'timestamp': 1493267400, 'timestamp': 1543294800,
'upload_date': '20170427', 'upload_date': '20181127',
}, },
'params': { 'params': {
# m3u8 download # m3u8 download
@ -43,6 +49,7 @@ class AdultSwimIE(TurnerBaseIE):
# m3u8 download # m3u8 download
'skip_download': True, 'skip_download': True,
}, },
'skip': '404 Not Found',
}, { }, {
'url': 'http://www.adultswim.com/videos/decker/inside-decker-a-new-hero/', 'url': 'http://www.adultswim.com/videos/decker/inside-decker-a-new-hero/',
'info_dict': { 'info_dict': {
@ -61,9 +68,9 @@ class AdultSwimIE(TurnerBaseIE):
}, { }, {
'url': 'http://www.adultswim.com/videos/attack-on-titan', 'url': 'http://www.adultswim.com/videos/attack-on-titan',
'info_dict': { 'info_dict': {
'id': 'b7A69dzfRzuaXIECdxW8XQ', 'id': 'attack-on-titan',
'title': 'Attack on Titan', 'title': 'Attack on Titan',
'description': 'md5:6c8e003ea0777b47013e894767f5e114', 'description': 'md5:41caa9416906d90711e31dc00cb7db7e',
}, },
'playlist_mincount': 12, 'playlist_mincount': 12,
}, { }, {
@ -78,83 +85,118 @@ class AdultSwimIE(TurnerBaseIE):
# m3u8 download # m3u8 download
'skip_download': True, 'skip_download': True,
}, },
'skip': '404 Not Found',
}] }]
def _real_extract(self, url): def _real_extract(self, url):
show_path, episode_path = re.match(self._VALID_URL, url).groups() show_path, episode_path = re.match(self._VALID_URL, url).groups()
display_id = episode_path or show_path display_id = episode_path or show_path
webpage = self._download_webpage(url, display_id) query = '''query {
initial_data = self._parse_json(self._search_regex( getShowBySlug(slug:"%s") {
r'AS_INITIAL_DATA(?:__)?\s*=\s*({.+?});', %%s
webpage, 'initial data'), display_id) }
}''' % show_path
is_stream = show_path == 'streams' if episode_path:
if is_stream: query = query % '''title
if not episode_path: getVideoBySlug(slug:"%s") {
episode_path = 'live-stream' _id
auth
video_data = next(stream for stream_path, stream in initial_data['streams'].items() if stream_path == episode_path) description
video_id = video_data.get('stream') duration
episodeNumber
if not video_id: launchDate
entries = [] mediaID
for episode in video_data.get('archiveEpisodes', []): seasonNumber
episode_url = url_or_none(episode.get('url')) poster
if not episode_url: title
continue tvRating
entries.append(self.url_result( }''' % episode_path
episode_url, 'AdultSwim', episode.get('id'))) ['getVideoBySlug']
return self.playlist_result(
entries, video_data.get('id'), video_data.get('title'),
strip_or_none(video_data.get('description')))
else: else:
show_data = initial_data['show'] query = query % '''metaDescription
title
videos(first:1000,sort:["episode_number"]) {
edges {
node {
_id
slug
}
}
}'''
show_data = self._download_json(
'https://www.adultswim.com/api/search', display_id,
data=json.dumps({'query': query}).encode(),
headers={'Content-Type': 'application/json'})['data']['getShowBySlug']
if episode_path:
video_data = show_data['getVideoBySlug']
video_id = video_data['_id']
episode_title = title = video_data['title']
series = show_data.get('title')
if series:
title = '%s - %s' % (series, title)
info = {
'id': video_id,
'title': title,
'description': strip_or_none(video_data.get('description')),
'duration': float_or_none(video_data.get('duration')),
'formats': [],
'subtitles': {},
'age_limit': parse_age_limit(video_data.get('tvRating')),
'thumbnail': video_data.get('poster'),
'timestamp': parse_iso8601(video_data.get('launchDate')),
'series': series,
'season_number': int_or_none(video_data.get('seasonNumber')),
'episode': episode_title,
'episode_number': int_or_none(video_data.get('episodeNumber')),
}
if not episode_path: auth = video_data.get('auth')
entries = [] media_id = video_data.get('mediaID')
for video in show_data.get('videos', []): if media_id:
slug = video.get('slug') info.update(self._extract_ngtv_info(media_id, {
if not slug: # CDN_TOKEN_APP_ID from:
# https://d2gg02c3xr550i.cloudfront.net/assets/asvp.e9c8bef24322d060ef87.bundle.js
'appId': 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJhcHBJZCI6ImFzLXR2ZS1kZXNrdG9wLXB0enQ2bSIsInByb2R1Y3QiOiJ0dmUiLCJuZXR3b3JrIjoiYXMiLCJwbGF0Zm9ybSI6ImRlc2t0b3AiLCJpYXQiOjE1MzI3MDIyNzl9.BzSCk-WYOZ2GMCIaeVb8zWnzhlgnXuJTCu0jGp_VaZE',
}, {
'url': url,
'site_name': 'AdultSwim',
'auth_required': auth,
}))
if not auth:
extract_data = self._download_json(
'https://www.adultswim.com/api/shows/v1/videos/' + video_id,
video_id, query={'fields': 'stream'}, fatal=False) or {}
assets = try_get(extract_data, lambda x: x['data']['video']['stream']['assets'], list) or []
for asset in assets:
asset_url = asset.get('url')
if not asset_url:
continue continue
entries.append(self.url_result( ext = determine_ext(asset_url, mimetype2ext(asset.get('mime_type')))
'http://adultswim.com/videos/%s/%s' % (show_path, slug), if ext == 'm3u8':
'AdultSwim', video.get('id'))) info['formats'].extend(self._extract_m3u8_formats(
return self.playlist_result( asset_url, video_id, 'mp4', m3u8_id='hls', fatal=False))
entries, show_data.get('id'), show_data.get('title'), elif ext == 'f4m':
strip_or_none(show_data.get('metadata', {}).get('description'))) continue
# info['formats'].extend(self._extract_f4m_formats(
# asset_url, video_id, f4m_id='hds', fatal=False))
elif ext in ('scc', 'ttml', 'vtt'):
info['subtitles'].setdefault('en', []).append({
'url': asset_url,
})
self._sort_formats(info['formats'])
video_data = show_data['sluggedVideo'] return info
video_id = video_data['id'] else:
entries = []
info = self._extract_cvp_info( for edge in show_data.get('videos', {}).get('edges', []):
'http://www.adultswim.com/videos/api/v0/assets?platform=desktop&id=' + video_id, video = edge.get('node') or {}
video_id, { slug = video.get('slug')
'secure': { if not slug:
'media_src': 'http://androidhls-secure.cdn.turner.com/adultswim/big', continue
'tokenizer_src': 'http://www.adultswim.com/astv/mvpd/processors/services/token_ipadAdobe.do', entries.append(self.url_result(
}, 'http://adultswim.com/videos/%s/%s' % (show_path, slug),
}, { 'AdultSwim', video.get('_id')))
'url': url, return self.playlist_result(
'site_name': 'AdultSwim', entries, show_path, show_data.get('title'),
'auth_required': video_data.get('auth'), strip_or_none(show_data.get('metaDescription')))
})
info.update({
'id': video_id,
'display_id': display_id,
'description': info.get('description') or strip_or_none(video_data.get('description')),
})
if not is_stream:
info.update({
'duration': info.get('duration') or int_or_none(video_data.get('duration')),
'timestamp': info.get('timestamp') or int_or_none(video_data.get('launch_date')),
'season_number': info.get('season_number') or int_or_none(video_data.get('season_number')),
'episode': info['title'],
'episode_number': info.get('episode_number') or int_or_none(video_data.get('episode_number')),
})
info['series'] = video_data.get('collection_title') or info.get('series')
if info['series'] and info['series'] != info['title']:
info['title'] = '%s - %s' % (info['series'], info['title'])
return info

View File

@ -1,14 +1,15 @@
# coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import re import re
from .theplatform import ThePlatformIE from .theplatform import ThePlatformIE
from ..utils import ( from ..utils import (
extract_attributes,
ExtractorError,
int_or_none,
smuggle_url, smuggle_url,
update_url_query, update_url_query,
unescapeHTML,
extract_attributes,
get_element_by_attribute,
) )
from ..compat import ( from ..compat import (
compat_urlparse, compat_urlparse,
@ -19,6 +20,43 @@ class AENetworksBaseIE(ThePlatformIE):
_THEPLATFORM_KEY = 'crazyjava' _THEPLATFORM_KEY = 'crazyjava'
_THEPLATFORM_SECRET = 's3cr3t' _THEPLATFORM_SECRET = 's3cr3t'
def _extract_aen_smil(self, smil_url, video_id, auth=None):
query = {'mbr': 'true'}
if auth:
query['auth'] = auth
TP_SMIL_QUERY = [{
'assetTypes': 'high_video_ak',
'switch': 'hls_high_ak'
}, {
'assetTypes': 'high_video_s3'
}, {
'assetTypes': 'high_video_s3',
'switch': 'hls_ingest_fastly'
}]
formats = []
subtitles = {}
last_e = None
for q in TP_SMIL_QUERY:
q.update(query)
m_url = update_url_query(smil_url, q)
m_url = self._sign_url(m_url, self._THEPLATFORM_KEY, self._THEPLATFORM_SECRET)
try:
tp_formats, tp_subtitles = self._extract_theplatform_smil(
m_url, video_id, 'Downloading %s SMIL data' % (q.get('switch') or q['assetTypes']))
except ExtractorError as e:
last_e = e
continue
formats.extend(tp_formats)
subtitles = self._merge_subtitles(subtitles, tp_subtitles)
if last_e and not formats:
raise last_e
self._sort_formats(formats)
return {
'id': video_id,
'formats': formats,
'subtitles': subtitles,
}
class AENetworksIE(AENetworksBaseIE): class AENetworksIE(AENetworksBaseIE):
IE_NAME = 'aenetworks' IE_NAME = 'aenetworks'
@ -33,22 +71,25 @@ class AENetworksIE(AENetworksBaseIE):
(?: (?:
shows/(?P<show_path>[^/]+(?:/[^/]+){0,2})| shows/(?P<show_path>[^/]+(?:/[^/]+){0,2})|
movies/(?P<movie_display_id>[^/]+)(?:/full-movie)?| movies/(?P<movie_display_id>[^/]+)(?:/full-movie)?|
specials/(?P<special_display_id>[^/]+)/full-special| specials/(?P<special_display_id>[^/]+)/(?:full-special|preview-)|
collections/[^/]+/(?P<collection_display_id>[^/]+) collections/[^/]+/(?P<collection_display_id>[^/]+)
) )
''' '''
_TESTS = [{ _TESTS = [{
'url': 'http://www.history.com/shows/mountain-men/season-1/episode-1', 'url': 'http://www.history.com/shows/mountain-men/season-1/episode-1',
'md5': 'a97a65f7e823ae10e9244bc5433d5fe6',
'info_dict': { 'info_dict': {
'id': '22253814', 'id': '22253814',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Winter Is Coming', 'title': 'Winter is Coming',
'description': 'md5:641f424b7a19d8e24f26dea22cf59d74', 'description': 'md5:641f424b7a19d8e24f26dea22cf59d74',
'timestamp': 1338306241, 'timestamp': 1338306241,
'upload_date': '20120529', 'upload_date': '20120529',
'uploader': 'AENE-NEW', 'uploader': 'AENE-NEW',
}, },
'params': {
# m3u8 download
'skip_download': True,
},
'add_ie': ['ThePlatform'], 'add_ie': ['ThePlatform'],
}, { }, {
'url': 'http://www.history.com/shows/ancient-aliens/season-1', 'url': 'http://www.history.com/shows/ancient-aliens/season-1',
@ -84,6 +125,9 @@ class AENetworksIE(AENetworksBaseIE):
}, { }, {
'url': 'https://www.historyvault.com/collections/america-the-story-of-us/westward', 'url': 'https://www.historyvault.com/collections/america-the-story-of-us/westward',
'only_matching': True 'only_matching': True
}, {
'url': 'https://www.aetv.com/specials/hunting-jonbenets-killer-the-untold-story/preview-hunting-jonbenets-killer-the-untold-story',
'only_matching': True
}] }]
_DOMAIN_TO_REQUESTOR_ID = { _DOMAIN_TO_REQUESTOR_ID = {
'history.com': 'HISTORY', 'history.com': 'HISTORY',
@ -124,11 +168,6 @@ class AENetworksIE(AENetworksBaseIE):
return self.playlist_result( return self.playlist_result(
entries, self._html_search_meta('aetn:SeasonId', webpage)) entries, self._html_search_meta('aetn:SeasonId', webpage))
query = {
'mbr': 'true',
'assetTypes': 'high_video_ak',
'switch': 'hls_high_ak',
}
video_id = self._html_search_meta('aetn:VideoID', webpage) video_id = self._html_search_meta('aetn:VideoID', webpage)
media_url = self._search_regex( media_url = self._search_regex(
[r"media_url\s*=\s*'(?P<url>[^']+)'", [r"media_url\s*=\s*'(?P<url>[^']+)'",
@ -138,64 +177,39 @@ class AENetworksIE(AENetworksBaseIE):
theplatform_metadata = self._download_theplatform_metadata(self._search_regex( theplatform_metadata = self._download_theplatform_metadata(self._search_regex(
r'https?://link\.theplatform\.com/s/([^?]+)', media_url, 'theplatform_path'), video_id) r'https?://link\.theplatform\.com/s/([^?]+)', media_url, 'theplatform_path'), video_id)
info = self._parse_theplatform_metadata(theplatform_metadata) info = self._parse_theplatform_metadata(theplatform_metadata)
auth = None
if theplatform_metadata.get('AETN$isBehindWall'): if theplatform_metadata.get('AETN$isBehindWall'):
requestor_id = self._DOMAIN_TO_REQUESTOR_ID[domain] requestor_id = self._DOMAIN_TO_REQUESTOR_ID[domain]
resource = self._get_mvpd_resource( resource = self._get_mvpd_resource(
requestor_id, theplatform_metadata['title'], requestor_id, theplatform_metadata['title'],
theplatform_metadata.get('AETN$PPL_pplProgramId') or theplatform_metadata.get('AETN$PPL_pplProgramId_OLD'), theplatform_metadata.get('AETN$PPL_pplProgramId') or theplatform_metadata.get('AETN$PPL_pplProgramId_OLD'),
theplatform_metadata['ratings'][0]['rating']) theplatform_metadata['ratings'][0]['rating'])
query['auth'] = self._extract_mvpd_auth( auth = self._extract_mvpd_auth(
url, video_id, requestor_id, resource) url, video_id, requestor_id, resource)
info.update(self._search_json_ld(webpage, video_id, fatal=False)) info.update(self._search_json_ld(webpage, video_id, fatal=False))
media_url = update_url_query(media_url, query) info.update(self._extract_aen_smil(media_url, video_id, auth))
media_url = self._sign_url(media_url, self._THEPLATFORM_KEY, self._THEPLATFORM_SECRET)
formats, subtitles = self._extract_theplatform_smil(media_url, video_id)
self._sort_formats(formats)
info.update({
'id': video_id,
'formats': formats,
'subtitles': subtitles,
})
return info return info
class HistoryTopicIE(AENetworksBaseIE): class HistoryTopicIE(AENetworksBaseIE):
IE_NAME = 'history:topic' IE_NAME = 'history:topic'
IE_DESC = 'History.com Topic' IE_DESC = 'History.com Topic'
_VALID_URL = r'https?://(?:www\.)?history\.com/topics/(?:[^/]+/)?(?P<topic_id>[^/]+)(?:/[^/]+(?:/(?P<video_display_id>[^/?#]+))?)?' _VALID_URL = r'https?://(?:www\.)?history\.com/topics/[^/]+/(?P<id>[\w+-]+?)-video'
_TESTS = [{ _TESTS = [{
'url': 'http://www.history.com/topics/valentines-day/history-of-valentines-day/videos/bet-you-didnt-know-valentines-day?m=528e394da93ae&s=undefined&f=1&free=false', 'url': 'https://www.history.com/topics/valentines-day/history-of-valentines-day-video',
'info_dict': { 'info_dict': {
'id': '40700995724', 'id': '40700995724',
'ext': 'mp4', 'ext': 'mp4',
'title': "Bet You Didn't Know: Valentine's Day", 'title': "History of Valentines Day",
'description': 'md5:7b57ea4829b391995b405fa60bd7b5f7', 'description': 'md5:7b57ea4829b391995b405fa60bd7b5f7',
'timestamp': 1375819729, 'timestamp': 1375819729,
'upload_date': '20130806', 'upload_date': '20130806',
'uploader': 'AENE-NEW',
}, },
'params': { 'params': {
# m3u8 download # m3u8 download
'skip_download': True, 'skip_download': True,
}, },
'add_ie': ['ThePlatform'], 'add_ie': ['ThePlatform'],
}, {
'url': 'http://www.history.com/topics/world-war-i/world-war-i-history/videos',
'info_dict':
{
'id': 'world-war-i-history',
'title': 'World War I History',
},
'playlist_mincount': 23,
}, {
'url': 'http://www.history.com/topics/world-war-i-history/videos',
'only_matching': True,
}, {
'url': 'http://www.history.com/topics/world-war-i/world-war-i-history',
'only_matching': True,
}, {
'url': 'http://www.history.com/topics/world-war-i/world-war-i-history/speeches',
'only_matching': True,
}] }]
def theplatform_url_result(self, theplatform_url, video_id, query): def theplatform_url_result(self, theplatform_url, video_id, query):
@ -215,27 +229,19 @@ class HistoryTopicIE(AENetworksBaseIE):
} }
def _real_extract(self, url): def _real_extract(self, url):
topic_id, video_display_id = re.match(self._VALID_URL, url).groups() display_id = self._match_id(url)
if video_display_id: webpage = self._download_webpage(url, display_id)
webpage = self._download_webpage(url, video_display_id) video_id = self._search_regex(
release_url, video_id = re.search(r"_videoPlayer.play\('([^']+)'\s*,\s*'[^']+'\s*,\s*'(\d+)'\)", webpage).groups() r'<phoenix-iframe[^>]+src="[^"]+\btpid=(\d+)', webpage, 'tpid')
release_url = unescapeHTML(release_url) result = self._download_json(
'https://feeds.video.aetnd.com/api/v2/history/videos',
return self.theplatform_url_result( video_id, query={'filter[id]': video_id})['results'][0]
release_url, video_id, { title = result['title']
'mbr': 'true', info = self._extract_aen_smil(result['publicUrl'], video_id)
'switch': 'hls', info.update({
'assetTypes': 'high_video_ak', 'title': title,
}) 'description': result.get('description'),
else: 'duration': int_or_none(result.get('duration')),
webpage = self._download_webpage(url, topic_id) 'timestamp': int_or_none(result.get('added'), 1000),
entries = [] })
for episode_item in re.findall(r'<a.+?data-release-url="[^"]+"[^>]*>', webpage): return info
video_attributes = extract_attributes(episode_item)
entries.append(self.theplatform_url_result(
video_attributes['data-release-url'], video_attributes['data-id'], {
'mbr': 'true',
'switch': 'hls',
'assetTypes': 'high_video_ak',
}))
return self.playlist_result(entries, topic_id, get_element_by_attribute('class', 'show-title', webpage))

View File

@ -4,6 +4,10 @@ from __future__ import unicode_literals
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import (
compat_parse_qs,
compat_urllib_parse_urlparse,
)
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
int_or_none, int_or_none,
@ -12,12 +16,12 @@ from ..utils import (
class AolIE(InfoExtractor): class AolIE(InfoExtractor):
IE_NAME = 'on.aol.com' IE_NAME = 'aol.com'
_VALID_URL = r'(?:aol-video:|https?://(?:(?:www|on)\.)?aol\.com/(?:[^/]+/)*(?:[^/?#&]+-)?)(?P<id>[^/?#&]+)' _VALID_URL = r'(?:aol-video:|https?://(?:www\.)?aol\.(?:com|ca|co\.uk|de|jp)/video/(?:[^/]+/)*)(?P<id>[0-9a-f]+)'
_TESTS = [{ _TESTS = [{
# video with 5min ID # video with 5min ID
'url': 'http://on.aol.com/video/u-s--official-warns-of-largest-ever-irs-phone-scam-518167793?icid=OnHomepageC2Wide_MustSee_Img', 'url': 'https://www.aol.com/video/view/u-s--official-warns-of-largest-ever-irs-phone-scam/518167793/',
'md5': '18ef68f48740e86ae94b98da815eec42', 'md5': '18ef68f48740e86ae94b98da815eec42',
'info_dict': { 'info_dict': {
'id': '518167793', 'id': '518167793',
@ -34,7 +38,7 @@ class AolIE(InfoExtractor):
} }
}, { }, {
# video with vidible ID # video with vidible ID
'url': 'http://www.aol.com/video/view/netflix-is-raising-rates/5707d6b8e4b090497b04f706/', 'url': 'https://www.aol.com/video/view/netflix-is-raising-rates/5707d6b8e4b090497b04f706/',
'info_dict': { 'info_dict': {
'id': '5707d6b8e4b090497b04f706', 'id': '5707d6b8e4b090497b04f706',
'ext': 'mp4', 'ext': 'mp4',
@ -49,17 +53,29 @@ class AolIE(InfoExtractor):
'skip_download': True, 'skip_download': True,
} }
}, { }, {
'url': 'http://on.aol.com/partners/abc-551438d309eab105804dbfe8/sneak-peek-was-haley-really-framed-570eaebee4b0448640a5c944', 'url': 'https://www.aol.com/video/view/park-bench-season-2-trailer/559a1b9be4b0c3bfad3357a7/',
'only_matching': True, 'only_matching': True,
}, { }, {
'url': 'http://on.aol.com/shows/park-bench-shw518173474-559a1b9be4b0c3bfad3357a7?context=SH:SHW518173474:PL4327:1460619712763', 'url': 'https://www.aol.com/video/view/donald-trump-spokeswoman-tones-down-megyn-kelly-attacks/519442220/',
'only_matching': True,
}, {
'url': 'http://on.aol.com/video/519442220',
'only_matching': True, 'only_matching': True,
}, { }, {
'url': 'aol-video:5707d6b8e4b090497b04f706', 'url': 'aol-video:5707d6b8e4b090497b04f706',
'only_matching': True, 'only_matching': True,
}, {
'url': 'https://www.aol.com/video/playlist/PL8245/5ca79d19d21f1a04035db606/',
'only_matching': True,
}, {
'url': 'https://www.aol.ca/video/view/u-s-woman-s-family-arrested-for-murder-first-pinned-on-panhandler-police/5c7ccf45bc03931fa04b2fe1/',
'only_matching': True,
}, {
'url': 'https://www.aol.co.uk/video/view/-one-dead-and-22-hurt-in-bus-crash-/5cb3a6f3d21f1a072b457347/',
'only_matching': True,
}, {
'url': 'https://www.aol.de/video/view/eva-braun-privataufnahmen-von-hitlers-geliebter-werden-digitalisiert/5cb2d49de98ab54c113d3d5d/',
'only_matching': True,
}, {
'url': 'https://www.aol.jp/video/playlist/5a28e936a1334d000137da0c/5a28f3151e642219fde19831/',
'only_matching': True,
}] }]
def _real_extract(self, url): def _real_extract(self, url):
@ -73,7 +89,7 @@ class AolIE(InfoExtractor):
video_data = response['data'] video_data = response['data']
formats = [] formats = []
m3u8_url = video_data.get('videoMasterPlaylist') m3u8_url = url_or_none(video_data.get('videoMasterPlaylist'))
if m3u8_url: if m3u8_url:
formats.extend(self._extract_m3u8_formats( formats.extend(self._extract_m3u8_formats(
m3u8_url, video_id, 'mp4', m3u8_id='hls', fatal=False)) m3u8_url, video_id, 'mp4', m3u8_id='hls', fatal=False))
@ -96,6 +112,12 @@ class AolIE(InfoExtractor):
'width': int(mobj.group(1)), 'width': int(mobj.group(1)),
'height': int(mobj.group(2)), 'height': int(mobj.group(2)),
}) })
else:
qs = compat_parse_qs(compat_urllib_parse_urlparse(video_url).query)
f.update({
'width': int_or_none(qs.get('w', [None])[0]),
'height': int_or_none(qs.get('h', [None])[0]),
})
formats.append(f) formats.append(f)
self._sort_formats(formats, ('width', 'height', 'tbr', 'format_id')) self._sort_formats(formats, ('width', 'height', 'tbr', 'format_id'))

View File

@ -0,0 +1,37 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import extract_attributes
class BFIPlayerIE(InfoExtractor):
IE_NAME = 'bfi:player'
_VALID_URL = r'https?://player\.bfi\.org\.uk/[^/]+/film/watch-(?P<id>[\w-]+)-online'
_TEST = {
'url': 'https://player.bfi.org.uk/free/film/watch-computer-doctor-1974-online',
'md5': 'e8783ebd8e061ec4bc6e9501ed547de8',
'info_dict': {
'id': 'htNnhlZjE60C9VySkQEIBtU-cNV1Xx63',
'ext': 'mp4',
'title': 'Computer Doctor',
'description': 'md5:fb6c240d40c4dbe40428bdd62f78203b',
},
'skip': 'BFI Player films cannot be played outside of the UK',
}
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
entries = []
for player_el in re.findall(r'(?s)<[^>]+class="player"[^>]*>', webpage):
player_attr = extract_attributes(player_el)
ooyala_id = player_attr.get('data-video-id')
if not ooyala_id:
continue
entries.append(self.url_result(
'ooyala:' + ooyala_id, 'Ooyala',
ooyala_id, player_attr.get('data-label')))
return self.playlist_result(entries)

View File

@ -28,7 +28,7 @@ class BIQLEIE(InfoExtractor):
'url': 'http://biqle.org/watch/-44781847_168547604', 'url': 'http://biqle.org/watch/-44781847_168547604',
'md5': '7f24e72af1db0edf7c1aaba513174f97', 'md5': '7f24e72af1db0edf7c1aaba513174f97',
'info_dict': { 'info_dict': {
'id': '168547604', 'id': '-44781847_168547604',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Ребенок в шоке от автоматической мойки', 'title': 'Ребенок в шоке от автоматической мойки',
'timestamp': 1396633454, 'timestamp': 1396633454,

View File

@ -1,6 +1,8 @@
# coding: utf-8 # coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import re
from .adobepass import AdobePassIE from .adobepass import AdobePassIE
from ..utils import ( from ..utils import (
smuggle_url, smuggle_url,
@ -12,16 +14,16 @@ from ..utils import (
class BravoTVIE(AdobePassIE): class BravoTVIE(AdobePassIE):
_VALID_URL = r'https?://(?:www\.)?bravotv\.com/(?:[^/]+/)+(?P<id>[^/?#]+)' _VALID_URL = r'https?://(?:www\.)?bravotv\.com/(?:[^/]+/)+(?P<id>[^/?#]+)'
_TESTS = [{ _TESTS = [{
'url': 'http://www.bravotv.com/last-chance-kitchen/season-5/videos/lck-ep-12-fishy-finale', 'url': 'https://www.bravotv.com/top-chef/season-16/episode-15/videos/the-top-chef-season-16-winner-is',
'md5': '9086d0b7ef0ea2aabc4781d75f4e5863', 'md5': 'e34684cfea2a96cd2ee1ef3a60909de9',
'info_dict': { 'info_dict': {
'id': 'zHyk1_HU_mPy', 'id': 'epL0pmK1kQlT',
'ext': 'mp4', 'ext': 'mp4',
'title': 'LCK Ep 12: Fishy Finale', 'title': 'The Top Chef Season 16 Winner Is...',
'description': 'S13/E12: Two eliminated chefs have just 12 minutes to cook up a delicious fish dish.', 'description': 'Find out who takes the title of Top Chef!',
'uploader': 'NBCU-BRAV', 'uploader': 'NBCU-BRAV',
'upload_date': '20160302', 'upload_date': '20190314',
'timestamp': 1456945320, 'timestamp': 1552591860,
} }
}, { }, {
'url': 'http://www.bravotv.com/below-deck/season-3/ep-14-reunion-part-1', 'url': 'http://www.bravotv.com/below-deck/season-3/ep-14-reunion-part-1',
@ -32,30 +34,38 @@ class BravoTVIE(AdobePassIE):
display_id = self._match_id(url) display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id) webpage = self._download_webpage(url, display_id)
settings = self._parse_json(self._search_regex( settings = self._parse_json(self._search_regex(
r'jQuery\.extend\(Drupal\.settings\s*,\s*({.+?})\);', webpage, 'drupal settings'), r'<script[^>]+data-drupal-selector="drupal-settings-json"[^>]*>({.+?})</script>', webpage, 'drupal settings'),
display_id) display_id)
info = {} info = {}
query = { query = {
'mbr': 'true', 'mbr': 'true',
} }
account_pid, release_pid = [None] * 2 account_pid, release_pid = [None] * 2
tve = settings.get('sharedTVE') tve = settings.get('ls_tve')
if tve: if tve:
query['manifest'] = 'm3u' query['manifest'] = 'm3u'
account_pid = 'HNK2IC' mobj = re.search(r'<[^>]+id="pdk-player"[^>]+data-url=["\']?(?:https?:)?//player\.theplatform\.com/p/([^/]+)/(?:[^/]+/)*select/([^?#&"\']+)', webpage)
release_pid = tve['release_pid'] if mobj:
account_pid, tp_path = mobj.groups()
release_pid = tp_path.strip('/').split('/')[-1]
else:
account_pid = 'HNK2IC'
tp_path = release_pid = tve['release_pid']
if tve.get('entitlement') == 'auth': if tve.get('entitlement') == 'auth':
adobe_pass = settings.get('adobePass', {}) adobe_pass = settings.get('tve_adobe_auth', {})
resource = self._get_mvpd_resource( resource = self._get_mvpd_resource(
adobe_pass.get('adobePassResourceId', 'bravo'), adobe_pass.get('adobePassResourceId', 'bravo'),
tve['title'], release_pid, tve.get('rating')) tve['title'], release_pid, tve.get('rating'))
query['auth'] = self._extract_mvpd_auth( query['auth'] = self._extract_mvpd_auth(
url, release_pid, adobe_pass.get('adobePassRequestorId', 'bravo'), resource) url, release_pid, adobe_pass.get('adobePassRequestorId', 'bravo'), resource)
else: else:
shared_playlist = settings['shared_playlist'] shared_playlist = settings['ls_playlist']
account_pid = shared_playlist['account_pid'] account_pid = shared_playlist['account_pid']
metadata = shared_playlist['video_metadata'][shared_playlist['default_clip']] metadata = shared_playlist['video_metadata'][shared_playlist['default_clip']]
release_pid = metadata['release_pid'] tp_path = release_pid = metadata.get('release_pid')
if not release_pid:
release_pid = metadata['guid']
tp_path = 'media/guid/2140479951/' + release_pid
info.update({ info.update({
'title': metadata['title'], 'title': metadata['title'],
'description': metadata.get('description'), 'description': metadata.get('description'),
@ -67,7 +77,7 @@ class BravoTVIE(AdobePassIE):
'_type': 'url_transparent', '_type': 'url_transparent',
'id': release_pid, 'id': release_pid,
'url': smuggle_url(update_url_query( 'url': smuggle_url(update_url_query(
'http://link.theplatform.com/s/%s/%s' % (account_pid, release_pid), 'http://link.theplatform.com/s/%s/%s' % (account_pid, tp_path),
query), {'force_smil_url': True}), query), {'force_smil_url': True}),
'ie_key': 'ThePlatform', 'ie_key': 'ThePlatform',
}) })

View File

@ -13,13 +13,17 @@ from ..utils import (
class CBSBaseIE(ThePlatformFeedIE): class CBSBaseIE(ThePlatformFeedIE):
def _parse_smil_subtitles(self, smil, namespace=None, subtitles_lang='en'): def _parse_smil_subtitles(self, smil, namespace=None, subtitles_lang='en'):
closed_caption_e = find_xpath_attr(smil, self._xpath_ns('.//param', namespace), 'name', 'ClosedCaptionURL') subtitles = {}
return { for k, ext in [('sMPTE-TTCCURL', 'tt'), ('ClosedCaptionURL', 'ttml'), ('webVTTCaptionURL', 'vtt')]:
'en': [{ cc_e = find_xpath_attr(smil, self._xpath_ns('.//param', namespace), 'name', k)
'ext': 'ttml', if cc_e is not None:
'url': closed_caption_e.attrib['value'], cc_url = cc_e.get('value')
}] if cc_url:
} if closed_caption_e is not None and closed_caption_e.attrib.get('value') else [] subtitles.setdefault(subtitles_lang, []).append({
'ext': ext,
'url': cc_url,
})
return subtitles
class CBSIE(CBSBaseIE): class CBSIE(CBSBaseIE):

View File

@ -2019,6 +2019,8 @@ class InfoExtractor(object):
if res is False: if res is False:
return [] return []
mpd_doc, urlh = res mpd_doc, urlh = res
if mpd_doc is None:
return []
mpd_base_url = base_url(urlh.geturl()) mpd_base_url = base_url(urlh.geturl())
return self._parse_mpd_formats( return self._parse_mpd_formats(

View File

@ -58,10 +58,17 @@ class DigitallySpeakingIE(InfoExtractor):
stream_name = xpath_text(a_format, 'streamName', fatal=True) stream_name = xpath_text(a_format, 'streamName', fatal=True)
video_path = re.match(r'mp4\:(?P<path>.*)', stream_name).group('path') video_path = re.match(r'mp4\:(?P<path>.*)', stream_name).group('path')
url = video_root + video_path url = video_root + video_path
vbr = xpath_text(a_format, 'bitrate') bitrate = xpath_text(a_format, 'bitrate')
tbr = int_or_none(bitrate)
vbr = int_or_none(self._search_regex(
r'-(\d+)\.mp4', video_path, 'vbr', default=None))
abr = tbr - vbr if tbr and vbr else None
video_formats.append({ video_formats.append({
'format_id': bitrate,
'url': url, 'url': url,
'vbr': int_or_none(vbr), 'tbr': tbr,
'vbr': vbr,
'abr': abr,
}) })
return video_formats return video_formats

View File

@ -10,16 +10,16 @@ from ..utils import (
int_or_none, int_or_none,
js_to_json, js_to_json,
mimetype2ext, mimetype2ext,
try_get,
unescapeHTML, unescapeHTML,
parse_iso8601,
) )
class DVTVIE(InfoExtractor): class DVTVIE(InfoExtractor):
IE_NAME = 'dvtv' IE_NAME = 'dvtv'
IE_DESC = 'http://video.aktualne.cz/' IE_DESC = 'http://video.aktualne.cz/'
_VALID_URL = r'https?://video\.aktualne\.cz/(?:[^/]+/)+r~(?P<id>[0-9a-f]{32})' _VALID_URL = r'https?://video\.aktualne\.cz/(?:[^/]+/)+r~(?P<id>[0-9a-f]{32})'
_TESTS = [{ _TESTS = [{
'url': 'http://video.aktualne.cz/dvtv/vondra-o-ceskem-stoleti-pri-pohledu-na-havla-mi-bylo-trapne/r~e5efe9ca855511e4833a0025900fea04/', 'url': 'http://video.aktualne.cz/dvtv/vondra-o-ceskem-stoleti-pri-pohledu-na-havla-mi-bylo-trapne/r~e5efe9ca855511e4833a0025900fea04/',
'md5': '67cb83e4a955d36e1b5d31993134a0c2', 'md5': '67cb83e4a955d36e1b5d31993134a0c2',
@ -28,11 +28,13 @@ class DVTVIE(InfoExtractor):
'ext': 'mp4', 'ext': 'mp4',
'title': 'Vondra o Českém století: Při pohledu na Havla mi bylo trapně', 'title': 'Vondra o Českém století: Při pohledu na Havla mi bylo trapně',
'duration': 1484, 'duration': 1484,
'upload_date': '20141217',
'timestamp': 1418792400,
} }
}, { }, {
'url': 'http://video.aktualne.cz/dvtv/dvtv-16-12-2014-utok-talibanu-boj-o-kliniku-uprchlici/r~973eb3bc854e11e498be002590604f2e/', 'url': 'http://video.aktualne.cz/dvtv/dvtv-16-12-2014-utok-talibanu-boj-o-kliniku-uprchlici/r~973eb3bc854e11e498be002590604f2e/',
'info_dict': { 'info_dict': {
'title': r're:^DVTV 16\. 12\. 2014: útok Talibanu, boj o kliniku, uprchlíci', 'title': r'DVTV 16. 12. 2014: útok Talibanu, boj o kliniku, uprchlíci',
'id': '973eb3bc854e11e498be002590604f2e', 'id': '973eb3bc854e11e498be002590604f2e',
}, },
'playlist': [{ 'playlist': [{
@ -84,6 +86,8 @@ class DVTVIE(InfoExtractor):
'ext': 'mp4', 'ext': 'mp4',
'title': 'Zeman si jen léčí mindráky, Sobotku nenávidí a Babiš se mu teď hodí, tvrdí Kmenta', 'title': 'Zeman si jen léčí mindráky, Sobotku nenávidí a Babiš se mu teď hodí, tvrdí Kmenta',
'duration': 1103, 'duration': 1103,
'upload_date': '20170511',
'timestamp': 1494514200,
}, },
'params': { 'params': {
'skip_download': True, 'skip_download': True,
@ -91,43 +95,59 @@ class DVTVIE(InfoExtractor):
}, { }, {
'url': 'http://video.aktualne.cz/v-cechach-poprve-zazni-zelenkova-zrestaurovana-mse/r~45b4b00483ec11e4883b002590604f2e/', 'url': 'http://video.aktualne.cz/v-cechach-poprve-zazni-zelenkova-zrestaurovana-mse/r~45b4b00483ec11e4883b002590604f2e/',
'only_matching': True, 'only_matching': True,
}, {
# Test live stream video (liveStarter) parsing
'url': 'https://video.aktualne.cz/dvtv/zive-mistryne-sveta-eva-samkova-po-navratu-ze-sampionatu/r~182654c2288811e990fd0cc47ab5f122/',
'md5': '2e552e483f2414851ca50467054f9d5d',
'info_dict': {
'id': '8d116360288011e98c840cc47ab5f122',
'ext': 'mp4',
'title': 'Živě: Mistryně světa Eva Samková po návratu ze šampionátu',
'upload_date': '20190204',
'timestamp': 1549289591,
},
'params': {
# Video content is no longer available
'skip_download': True,
},
}] }]
def _parse_video_metadata(self, js, video_id, live_js=None): def _parse_video_metadata(self, js, video_id, timestamp):
data = self._parse_json(js, video_id, transform_source=js_to_json) data = self._parse_json(js, video_id, transform_source=js_to_json)
if live_js:
data.update(self._parse_json(
live_js, video_id, transform_source=js_to_json))
title = unescapeHTML(data['title']) title = unescapeHTML(data['title'])
live_starter = try_get(data, lambda x: x['plugins']['liveStarter'], dict)
if live_starter:
data.update(live_starter)
formats = [] formats = []
for video in data['sources']: for tracks in data.get('tracks', {}).values():
video_url = video.get('file') for video in tracks:
if not video_url: video_url = video.get('src')
continue if not video_url:
video_type = video.get('type') continue
ext = determine_ext(video_url, mimetype2ext(video_type)) video_type = video.get('type')
if video_type == 'application/vnd.apple.mpegurl' or ext == 'm3u8': ext = determine_ext(video_url, mimetype2ext(video_type))
formats.extend(self._extract_m3u8_formats( if video_type == 'application/vnd.apple.mpegurl' or ext == 'm3u8':
video_url, video_id, 'mp4', entry_protocol='m3u8_native', formats.extend(self._extract_m3u8_formats(
m3u8_id='hls', fatal=False)) video_url, video_id, 'mp4', entry_protocol='m3u8_native',
elif video_type == 'application/dash+xml' or ext == 'mpd': m3u8_id='hls', fatal=False))
formats.extend(self._extract_mpd_formats( elif video_type == 'application/dash+xml' or ext == 'mpd':
video_url, video_id, mpd_id='dash', fatal=False)) formats.extend(self._extract_mpd_formats(
else: video_url, video_id, mpd_id='dash', fatal=False))
label = video.get('label') else:
height = self._search_regex( label = video.get('label')
r'^(\d+)[pP]', label or '', 'height', default=None) height = self._search_regex(
format_id = ['http'] r'^(\d+)[pP]', label or '', 'height', default=None)
for f in (ext, label): format_id = ['http']
if f: for f in (ext, label):
format_id.append(f) if f:
formats.append({ format_id.append(f)
'url': video_url, formats.append({
'format_id': '-'.join(format_id), 'url': video_url,
'height': int_or_none(height), 'format_id': '-'.join(format_id),
}) 'height': int_or_none(height),
})
self._sort_formats(formats) self._sort_formats(formats)
return { return {
@ -136,41 +156,29 @@ class DVTVIE(InfoExtractor):
'description': data.get('description'), 'description': data.get('description'),
'thumbnail': data.get('image'), 'thumbnail': data.get('image'),
'duration': int_or_none(data.get('duration')), 'duration': int_or_none(data.get('duration')),
'timestamp': int_or_none(data.get('pubtime')), 'timestamp': int_or_none(timestamp),
'formats': formats 'formats': formats
} }
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
timestamp = parse_iso8601(self._html_search_meta(
'article:published_time', webpage, 'published time', default=None))
# live content items = re.findall(r'(?s)playlist\.push\(({.+?})\);', webpage)
live_item = self._search_regex(
r'(?s)embedData[0-9a-f]{32}\.asset\.liveStarter\s*=\s*(\{.+?\});',
webpage, 'video', default=None)
# single video
item = self._search_regex(
r'(?s)embedData[0-9a-f]{32}\[["\']asset["\']\]\s*=\s*(\{.+?\});',
webpage, 'video', default=None)
if item:
return self._parse_video_metadata(item, video_id, live_item)
# playlist
items = re.findall(
r"(?s)BBX\.context\.assets\['[0-9a-f]{32}'\]\.push\(({.+?})\);",
webpage)
if not items:
items = re.findall(r'(?s)var\s+asset\s*=\s*({.+?});\n', webpage)
if items: if items:
return { return self.playlist_result(
'_type': 'playlist', [self._parse_video_metadata(i, video_id, timestamp) for i in items],
'id': video_id, video_id, self._html_search_meta('twitter:title', webpage))
'title': self._og_search_title(webpage),
'entries': [self._parse_video_metadata(i, video_id) for i in items] item = self._search_regex(
} r'(?s)BBXPlayer\.setup\((.+?)\);',
webpage, 'video', default=None)
if item:
# remove function calls (ex. htmldeentitize)
# TODO this should be fixed in a general way in the js_to_json
item = re.sub(r'\w+?\((.+)\)', r'\1', item)
return self._parse_video_metadata(item, video_id, timestamp)
raise ExtractorError('Could not find neither video nor playlist') raise ExtractorError('Could not find neither video nor playlist')

View File

@ -20,6 +20,7 @@ from .acast import (
) )
from .addanime import AddAnimeIE from .addanime import AddAnimeIE
from .adn import ADNIE from .adn import ADNIE
from .adobeconnect import AdobeConnectIE
from .adobetv import ( from .adobetv import (
AdobeTVIE, AdobeTVIE,
AdobeTVShowIE, AdobeTVShowIE,
@ -106,6 +107,7 @@ from .behindkink import BehindKinkIE
from .bellmedia import BellMediaIE from .bellmedia import BellMediaIE
from .beatport import BeatportIE from .beatport import BeatportIE
from .bet import BetIE from .bet import BetIE
from .bfi import BFIPlayerIE
from .bigflix import BigflixIE from .bigflix import BigflixIE
from .bild import BildIE from .bild import BildIE
from .bilibili import ( from .bilibili import (
@ -440,10 +442,7 @@ from .goshgay import GoshgayIE
from .gputechconf import GPUTechConfIE from .gputechconf import GPUTechConfIE
from .groupon import GrouponIE from .groupon import GrouponIE
from .hark import HarkIE from .hark import HarkIE
from .hbo import ( from .hbo import HBOIE
HBOIE,
HBOEpisodeIE,
)
from .hearthisat import HearThisAtIE from .hearthisat import HearThisAtIE
from .heise import HeiseIE from .heise import HeiseIE
from .hellporno import HellPornoIE from .hellporno import HellPornoIE
@ -635,6 +634,7 @@ from .mediaset import MediasetIE
from .mediasite import ( from .mediasite import (
MediasiteIE, MediasiteIE,
MediasiteCatalogIE, MediasiteCatalogIE,
MediasiteNamedCatalogIE,
) )
from .medici import MediciIE from .medici import MediciIE
from .megaphone import MegaphoneIE from .megaphone import MegaphoneIE
@ -808,6 +808,8 @@ from .nrk import (
NRKTVSeasonIE, NRKTVSeasonIE,
NRKTVSeriesIE, NRKTVSeriesIE,
) )
from .nrl import NRLTVIE
from .ntvcojp import NTVCoJpCUIE
from .ntvde import NTVDeIE from .ntvde import NTVDeIE
from .ntvru import NTVRuIE from .ntvru import NTVRuIE
from .nytimes import ( from .nytimes import (
@ -868,6 +870,10 @@ from .picarto import (
from .piksel import PikselIE from .piksel import PikselIE
from .pinkbike import PinkbikeIE from .pinkbike import PinkbikeIE
from .pladform import PladformIE from .pladform import PladformIE
from .platzi import (
PlatziIE,
PlatziCourseIE,
)
from .playfm import PlayFMIE from .playfm import PlayFMIE
from .playplustv import PlayPlusTVIE from .playplustv import PlayPlusTVIE
from .plays import PlaysTVIE from .plays import PlaysTVIE
@ -1089,6 +1095,7 @@ from .streamcloud import StreamcloudIE
from .streamcz import StreamCZIE from .streamcz import StreamCZIE
from .streetvoice import StreetVoiceIE from .streetvoice import StreetVoiceIE
from .stretchinternet import StretchInternetIE from .stretchinternet import StretchInternetIE
from .stv import STVPlayerIE
from .sunporno import SunPornoIE from .sunporno import SunPornoIE
from .svt import ( from .svt import (
SVTIE, SVTIE,
@ -1447,6 +1454,8 @@ from .xxxymovies import XXXYMoviesIE
from .yahoo import ( from .yahoo import (
YahooIE, YahooIE,
YahooSearchIE, YahooSearchIE,
YahooGyaOPlayerIE,
YahooGyaOIE,
) )
from .yandexdisk import YandexDiskIE from .yandexdisk import YandexDiskIE
from .yandexmusic import ( from .yandexmusic import (

View File

@ -4,12 +4,17 @@ from __future__ import unicode_literals
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_str from ..compat import (
compat_str,
compat_urllib_parse_unquote,
)
from ..utils import ( from ..utils import (
ExtractorError,
int_or_none, int_or_none,
str_or_none, str_or_none,
strip_or_none, strip_or_none,
try_get, try_get,
urlencode_postdata,
) )
@ -46,6 +51,29 @@ class GaiaIE(InfoExtractor):
'skip_download': True, 'skip_download': True,
}, },
}] }]
_NETRC_MACHINE = 'gaia'
_jwt = None
def _real_initialize(self):
auth = self._get_cookies('https://www.gaia.com/').get('auth')
if auth:
auth = self._parse_json(
compat_urllib_parse_unquote(auth.value),
None, fatal=False)
if not auth:
username, password = self._get_login_info()
if username is None:
return
auth = self._download_json(
'https://auth.gaia.com/v1/login',
None, data=urlencode_postdata({
'username': username,
'password': password
}))
if auth.get('success') is False:
raise ExtractorError(', '.join(auth['messages']), expected=True)
if auth:
self._jwt = auth.get('jwt')
def _real_extract(self, url): def _real_extract(self, url):
display_id, vtype = re.search(self._VALID_URL, url).groups() display_id, vtype = re.search(self._VALID_URL, url).groups()
@ -59,8 +87,12 @@ class GaiaIE(InfoExtractor):
media_id = compat_str(vdata['nid']) media_id = compat_str(vdata['nid'])
title = node['title'] title = node['title']
headers = None
if self._jwt:
headers = {'Authorization': 'Bearer ' + self._jwt}
media = self._download_json( media = self._download_json(
'https://brooklyn.gaia.com/media/' + media_id, media_id) 'https://brooklyn.gaia.com/media/' + media_id,
media_id, headers=headers)
formats = self._extract_m3u8_formats( formats = self._extract_m3u8_formats(
media['mediaUrls']['bcHLS'], media_id, 'mp4') media['mediaUrls']['bcHLS'], media_id, 'mp4')
self._sort_formats(formats) self._sort_formats(formats)

View File

@ -3,22 +3,24 @@ from __future__ import unicode_literals
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from .kaltura import KalturaIE
from ..utils import ( from ..utils import (
HEADRequest, HEADRequest,
sanitized_Request, sanitized_Request,
smuggle_url,
urlencode_postdata, urlencode_postdata,
) )
class GDCVaultIE(InfoExtractor): class GDCVaultIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?gdcvault\.com/play/(?P<id>\d+)/(?P<name>(\w|-)+)?' _VALID_URL = r'https?://(?:www\.)?gdcvault\.com/play/(?P<id>\d+)(?:/(?P<name>[\w-]+))?'
_NETRC_MACHINE = 'gdcvault' _NETRC_MACHINE = 'gdcvault'
_TESTS = [ _TESTS = [
{ {
'url': 'http://www.gdcvault.com/play/1019721/Doki-Doki-Universe-Sweet-Simple', 'url': 'http://www.gdcvault.com/play/1019721/Doki-Doki-Universe-Sweet-Simple',
'md5': '7ce8388f544c88b7ac11c7ab1b593704', 'md5': '7ce8388f544c88b7ac11c7ab1b593704',
'info_dict': { 'info_dict': {
'id': '1019721', 'id': '201311826596_AWNY',
'display_id': 'Doki-Doki-Universe-Sweet-Simple', 'display_id': 'Doki-Doki-Universe-Sweet-Simple',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Doki-Doki Universe: Sweet, Simple and Genuine (GDC Next 10)' 'title': 'Doki-Doki Universe: Sweet, Simple and Genuine (GDC Next 10)'
@ -27,7 +29,7 @@ class GDCVaultIE(InfoExtractor):
{ {
'url': 'http://www.gdcvault.com/play/1015683/Embracing-the-Dark-Art-of', 'url': 'http://www.gdcvault.com/play/1015683/Embracing-the-Dark-Art-of',
'info_dict': { 'info_dict': {
'id': '1015683', 'id': '201203272_1330951438328RSXR',
'display_id': 'Embracing-the-Dark-Art-of', 'display_id': 'Embracing-the-Dark-Art-of',
'ext': 'flv', 'ext': 'flv',
'title': 'Embracing the Dark Art of Mathematical Modeling in AI' 'title': 'Embracing the Dark Art of Mathematical Modeling in AI'
@ -56,7 +58,7 @@ class GDCVaultIE(InfoExtractor):
'url': 'http://gdcvault.com/play/1023460/Tenacious-Design-and-The-Interface', 'url': 'http://gdcvault.com/play/1023460/Tenacious-Design-and-The-Interface',
'md5': 'a8efb6c31ed06ca8739294960b2dbabd', 'md5': 'a8efb6c31ed06ca8739294960b2dbabd',
'info_dict': { 'info_dict': {
'id': '1023460', 'id': '840376_BQRC',
'ext': 'mp4', 'ext': 'mp4',
'display_id': 'Tenacious-Design-and-The-Interface', 'display_id': 'Tenacious-Design-and-The-Interface',
'title': 'Tenacious Design and The Interface of \'Destiny\'', 'title': 'Tenacious Design and The Interface of \'Destiny\'',
@ -66,26 +68,38 @@ class GDCVaultIE(InfoExtractor):
# Multiple audios # Multiple audios
'url': 'http://www.gdcvault.com/play/1014631/Classic-Game-Postmortem-PAC', 'url': 'http://www.gdcvault.com/play/1014631/Classic-Game-Postmortem-PAC',
'info_dict': { 'info_dict': {
'id': '1014631', 'id': '12396_1299111843500GMPX',
'ext': 'flv', 'ext': 'mp4',
'title': 'How to Create a Good Game - From My Experience of Designing Pac-Man', 'title': 'How to Create a Good Game - From My Experience of Designing Pac-Man',
}, },
'params': { # 'params': {
'skip_download': True, # Requires rtmpdump # 'skip_download': True, # Requires rtmpdump
'format': 'jp', # The japanese audio # 'format': 'jp', # The japanese audio
} # }
}, },
{ {
# gdc-player.html # gdc-player.html
'url': 'http://www.gdcvault.com/play/1435/An-American-engine-in-Tokyo', 'url': 'http://www.gdcvault.com/play/1435/An-American-engine-in-Tokyo',
'info_dict': { 'info_dict': {
'id': '1435', 'id': '9350_1238021887562UHXB',
'display_id': 'An-American-engine-in-Tokyo', 'display_id': 'An-American-engine-in-Tokyo',
'ext': 'flv', 'ext': 'mp4',
'title': 'An American Engine in Tokyo:/nThe collaboration of Epic Games and Square Enix/nFor THE LAST REMINANT', 'title': 'An American Engine in Tokyo:/nThe collaboration of Epic Games and Square Enix/nFor THE LAST REMINANT',
}, },
},
{
# Kaltura Embed
'url': 'https://www.gdcvault.com/play/1026180/Mastering-the-Apex-of-Scaling',
'info_dict': {
'id': '0_h1fg8j3p',
'ext': 'mp4',
'title': 'Mastering the Apex of Scaling Game Servers (Presented by Multiplay)',
'timestamp': 1554401811,
'upload_date': '20190404',
'uploader_id': 'joe@blazestreaming.com',
},
'params': { 'params': {
'skip_download': True, # Requires rtmpdump 'format': 'mp4-408',
}, },
}, },
] ]
@ -114,10 +128,8 @@ class GDCVaultIE(InfoExtractor):
return start_page return start_page
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) video_id, name = re.match(self._VALID_URL, url).groups()
display_id = name or video_id
video_id = mobj.group('id')
display_id = mobj.group('name') or video_id
webpage_url = 'http://www.gdcvault.com/play/' + video_id webpage_url = 'http://www.gdcvault.com/play/' + video_id
start_page = self._download_webpage(webpage_url, display_id) start_page = self._download_webpage(webpage_url, display_id)
@ -127,12 +139,12 @@ class GDCVaultIE(InfoExtractor):
start_page, 'url', default=None) start_page, 'url', default=None)
if direct_url: if direct_url:
title = self._html_search_regex( title = self._html_search_regex(
r'<td><strong>Session Name</strong></td>\s*<td>(.*?)</td>', r'<td><strong>Session Name:?</strong></td>\s*<td>(.*?)</td>',
start_page, 'title') start_page, 'title')
video_url = 'http://www.gdcvault.com' + direct_url video_url = 'http://www.gdcvault.com' + direct_url
# resolve the url so that we can detect the correct extension # resolve the url so that we can detect the correct extension
head = self._request_webpage(HEADRequest(video_url), video_id) video_url = self._request_webpage(
video_url = head.geturl() HEADRequest(video_url), video_id).geturl()
return { return {
'id': video_id, 'id': video_id,
@ -141,34 +153,36 @@ class GDCVaultIE(InfoExtractor):
'title': title, 'title': title,
} }
PLAYER_REGEX = r'<iframe src="(?P<xml_root>.+?)/(?:gdc-)?player.*?\.html.*?".*?</iframe>' embed_url = KalturaIE._extract_url(start_page)
if embed_url:
embed_url = smuggle_url(embed_url, {'source_url': url})
ie_key = 'Kaltura'
else:
PLAYER_REGEX = r'<iframe src="(?P<xml_root>.+?)/(?:gdc-)?player.*?\.html.*?".*?</iframe>'
xml_root = self._html_search_regex( xml_root = self._html_search_regex(
PLAYER_REGEX, start_page, 'xml root', default=None) PLAYER_REGEX, start_page, 'xml root', default=None)
if xml_root is None: if xml_root is None:
# Probably need to authenticate # Probably need to authenticate
login_res = self._login(webpage_url, display_id) login_res = self._login(webpage_url, display_id)
if login_res is None: if login_res is None:
self.report_warning('Could not login.') self.report_warning('Could not login.')
else: else:
start_page = login_res start_page = login_res
# Grab the url from the authenticated page # Grab the url from the authenticated page
xml_root = self._html_search_regex( xml_root = self._html_search_regex(
PLAYER_REGEX, start_page, 'xml root') PLAYER_REGEX, start_page, 'xml root')
xml_name = self._html_search_regex(
r'<iframe src=".*?\?xml=(.+?\.xml).*?".*?</iframe>',
start_page, 'xml filename', default=None)
if xml_name is None:
# Fallback to the older format
xml_name = self._html_search_regex( xml_name = self._html_search_regex(
r'<iframe src=".*?\?xmlURL=xml/(?P<xml_file>.+?\.xml).*?".*?</iframe>', r'<iframe src=".*?\?xml(?:=|URL=xml/)(.+?\.xml).*?".*?</iframe>',
start_page, 'xml filename') start_page, 'xml filename')
embed_url = '%s/xml/%s' % (xml_root, xml_name)
ie_key = 'DigitallySpeaking'
return { return {
'_type': 'url_transparent', '_type': 'url_transparent',
'id': video_id, 'id': video_id,
'display_id': display_id, 'display_id': display_id,
'url': '%s/xml/%s' % (xml_root, xml_name), 'url': embed_url,
'ie_key': 'DigitallySpeaking', 'ie_key': ie_key,
} }

View File

@ -4,16 +4,28 @@ from __future__ import unicode_literals
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_str
from ..utils import ( from ..utils import (
xpath_text, xpath_text,
xpath_element, xpath_element,
int_or_none, int_or_none,
parse_duration, parse_duration,
urljoin,
) )
class HBOBaseIE(InfoExtractor): class HBOIE(InfoExtractor):
IE_NAME = 'hbo'
_VALID_URL = r'https?://(?:www\.)?hbo\.com/(?:video|embed)(?:/[^/]+)*/(?P<id>[^/?#]+)'
_TEST = {
'url': 'https://www.hbo.com/video/game-of-thrones/seasons/season-8/videos/trailer',
'md5': '8126210656f433c452a21367f9ad85b3',
'info_dict': {
'id': '22113301',
'ext': 'mp4',
'title': 'Game of Thrones - Trailer',
},
'expected_warnings': ['Unknown MIME type application/mp4 in DASH manifest'],
}
_FORMATS_INFO = { _FORMATS_INFO = {
'pro7': { 'pro7': {
'width': 1280, 'width': 1280,
@ -53,10 +65,17 @@ class HBOBaseIE(InfoExtractor):
}, },
} }
def _extract_from_id(self, video_id): def _real_extract(self, url):
video_data = self._download_xml( display_id = self._match_id(url)
'http://render.lv3.hbo.com/data/content/global/videos/data/%s.xml' % video_id, video_id) webpage = self._download_webpage(url, display_id)
title = xpath_text(video_data, 'title', 'title', True) location_path = self._parse_json(self._html_search_regex(
r'data-state="({.+?})"', webpage, 'state'), display_id)['video']['locationUrl']
video_data = self._download_xml(urljoin(url, location_path), display_id)
video_id = xpath_text(video_data, 'id', fatal=True)
episode_title = title = xpath_text(video_data, 'title', fatal=True)
series = xpath_text(video_data, 'program')
if series:
title = '%s - %s' % (series, title)
formats = [] formats = []
for source in xpath_element(video_data, 'videos', 'sources', True): for source in xpath_element(video_data, 'videos', 'sources', True):
@ -128,68 +147,23 @@ class HBOBaseIE(InfoExtractor):
'width': width, 'width': width,
}) })
subtitles = None
caption_url = xpath_text(video_data, 'captionUrl')
if caption_url:
subtitles = {
'en': [{
'url': caption_url,
'ext': 'ttml'
}],
}
return { return {
'id': video_id, 'id': video_id,
'title': title, 'title': title,
'duration': parse_duration(xpath_text(video_data, 'duration/tv14')), 'duration': parse_duration(xpath_text(video_data, 'duration/tv14')),
'series': series,
'episode': episode_title,
'formats': formats, 'formats': formats,
'thumbnails': thumbnails, 'thumbnails': thumbnails,
'subtitles': subtitles,
} }
class HBOIE(HBOBaseIE):
IE_NAME = 'hbo'
_VALID_URL = r'https?://(?:www\.)?hbo\.com/video/video\.html\?.*vid=(?P<id>[0-9]+)'
_TEST = {
'url': 'http://www.hbo.com/video/video.html?autoplay=true&g=u&vid=1437839',
'md5': '2c6a6bc1222c7e91cb3334dad1746e5a',
'info_dict': {
'id': '1437839',
'ext': 'mp4',
'title': 'Ep. 64 Clip: Encryption',
'thumbnail': r're:https?://.*\.jpg$',
'duration': 1072,
}
}
def _real_extract(self, url):
video_id = self._match_id(url)
return self._extract_from_id(video_id)
class HBOEpisodeIE(HBOBaseIE):
IE_NAME = 'hbo:episode'
_VALID_URL = r'https?://(?:www\.)?hbo\.com/(?P<path>(?!video)(?:(?:[^/]+/)+video|watch-free-episodes)/(?P<id>[0-9a-z-]+))(?:\.html)?'
_TESTS = [{
'url': 'http://www.hbo.com/girls/episodes/5/52-i-love-you-baby/video/ep-52-inside-the-episode.html?autoplay=true',
'md5': '61ead79b9c0dfa8d3d4b07ef4ac556fb',
'info_dict': {
'id': '1439518',
'display_id': 'ep-52-inside-the-episode',
'ext': 'mp4',
'title': 'Ep. 52: Inside the Episode',
'thumbnail': r're:https?://.*\.jpg$',
'duration': 240,
},
}, {
'url': 'http://www.hbo.com/game-of-thrones/about/video/season-5-invitation-to-the-set.html?autoplay=true',
'only_matching': True,
}, {
'url': 'http://www.hbo.com/watch-free-episodes/last-week-tonight-with-john-oliver',
'only_matching': True,
}]
def _real_extract(self, url):
path, display_id = re.match(self._VALID_URL, url).groups()
content = self._download_json(
'http://www.hbo.com/api/content/' + path, display_id)['content']
video_id = compat_str((content.get('parsed', {}).get(
'common:FullBleedVideo', {}) or content['selectedEpisode'])['videoId'])
info_dict = self._extract_from_id(video_id)
info_dict['display_id'] = display_id
return info_dict

View File

@ -7,7 +7,7 @@ from .common import InfoExtractor
class JWPlatformIE(InfoExtractor): class JWPlatformIE(InfoExtractor):
_VALID_URL = r'(?:https?://(?:content\.jwplatform|cdn\.jwplayer)\.com/(?:(?:feed|player|thumb|preview|video|manifest)s|jw6|v2/media)/|jwplatform:)(?P<id>[a-zA-Z0-9]{8})' _VALID_URL = r'(?:https?://(?:content\.jwplatform|cdn\.jwplayer)\.com/(?:(?:feed|player|thumb|preview|video)s|jw6|v2/media)/|jwplatform:)(?P<id>[a-zA-Z0-9]{8})'
_TESTS = [{ _TESTS = [{
'url': 'http://content.jwplatform.com/players/nPripu9l-ALJ3XQCI.js', 'url': 'http://content.jwplatform.com/players/nPripu9l-ALJ3XQCI.js',
'md5': 'fa8899fa601eb7c83a64e9d568bdf325', 'md5': 'fa8899fa601eb7c83a64e9d568bdf325',

View File

@ -145,6 +145,8 @@ class KalturaIE(InfoExtractor):
) )
if mobj: if mobj:
embed_info = mobj.groupdict() embed_info = mobj.groupdict()
for k, v in embed_info.items():
embed_info[k] = v.strip()
url = 'kaltura:%(partner_id)s:%(id)s' % embed_info url = 'kaltura:%(partner_id)s:%(id)s' % embed_info
escaped_pid = re.escape(embed_info['partner_id']) escaped_pid = re.escape(embed_info['partner_id'])
service_url = re.search( service_url = re.search(

View File

@ -9,11 +9,13 @@ from ..utils import (
float_or_none, float_or_none,
int_or_none, int_or_none,
urlencode_postdata, urlencode_postdata,
urljoin,
) )
class LinkedInLearningBaseIE(InfoExtractor): class LinkedInLearningBaseIE(InfoExtractor):
_NETRC_MACHINE = 'linkedin' _NETRC_MACHINE = 'linkedin'
_LOGIN_URL = 'https://www.linkedin.com/uas/login?trk=learning'
def _call_api(self, course_slug, fields, video_slug=None, resolution=None): def _call_api(self, course_slug, fields, video_slug=None, resolution=None):
query = { query = {
@ -50,11 +52,10 @@ class LinkedInLearningBaseIE(InfoExtractor):
return return
login_page = self._download_webpage( login_page = self._download_webpage(
'https://www.linkedin.com/uas/login?trk=learning', self._LOGIN_URL, None, 'Downloading login page')
None, 'Downloading login page') action_url = urljoin(self._LOGIN_URL, self._search_regex(
action_url = self._search_regex(
r'<form[^>]+action=(["\'])(?P<url>.+?)\1', login_page, 'post url', r'<form[^>]+action=(["\'])(?P<url>.+?)\1', login_page, 'post url',
default='https://www.linkedin.com/uas/login-submit', group='url') default='https://www.linkedin.com/uas/login-submit', group='url'))
data = self._hidden_inputs(login_page) data = self._hidden_inputs(login_page)
data.update({ data.update({
'session_key': email, 'session_key': email,

View File

@ -22,7 +22,7 @@ from ..utils import (
) )
_ID_RE = r'[0-9a-f]{32,34}' _ID_RE = r'(?:[0-9a-f]{32,34}|[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12,14})'
class MediasiteIE(InfoExtractor): class MediasiteIE(InfoExtractor):
@ -98,6 +98,11 @@ class MediasiteIE(InfoExtractor):
'url': 'https://mediasite.ntnu.no/Mediasite/Showcase/default/Presentation/7d8b913259334b688986e970fae6fcb31d', 'url': 'https://mediasite.ntnu.no/Mediasite/Showcase/default/Presentation/7d8b913259334b688986e970fae6fcb31d',
'only_matching': True, 'only_matching': True,
}, },
{
# dashed id
'url': 'https://hitsmediaweb.h-its.org/mediasite/Play/2db6c271-681e-4f19-9af3-c60d1f82869b1d',
'only_matching': True,
}
] ]
# look in Mediasite.Core.js (Mediasite.ContentStreamType[*]) # look in Mediasite.Core.js (Mediasite.ContentStreamType[*])
@ -264,6 +269,10 @@ class MediasiteCatalogIE(InfoExtractor):
}, { }, {
'url': 'https://medaudio.medicine.iu.edu/Mediasite/Catalog/Full/9518c4a6c5cf4993b21cbd53e828a92521/97a9db45f7ab47428c77cd2ed74bb98f14/9518c4a6c5cf4993b21cbd53e828a92521', 'url': 'https://medaudio.medicine.iu.edu/Mediasite/Catalog/Full/9518c4a6c5cf4993b21cbd53e828a92521/97a9db45f7ab47428c77cd2ed74bb98f14/9518c4a6c5cf4993b21cbd53e828a92521',
'only_matching': True, 'only_matching': True,
}, {
# dashed id
'url': 'http://events7.mediasite.com/Mediasite/Catalog/Full/631f9e48-530d-4543-8154-9f955d08c75e',
'only_matching': True,
}] }]
def _real_extract(self, url): def _real_extract(self, url):
@ -333,3 +342,25 @@ class MediasiteCatalogIE(InfoExtractor):
catalog, lambda x: x['CurrentFolder']['Name'], compat_str) catalog, lambda x: x['CurrentFolder']['Name'], compat_str)
return self.playlist_result(entries, catalog_id, title,) return self.playlist_result(entries, catalog_id, title,)
class MediasiteNamedCatalogIE(InfoExtractor):
_VALID_URL = r'(?xi)(?P<url>https?://[^/]+/Mediasite)/Catalog/catalogs/(?P<catalog_name>[^/?#&]+)'
_TESTS = [{
'url': 'https://msite.misis.ru/Mediasite/Catalog/catalogs/2016-industrial-management-skriabin-o-o',
'only_matching': True,
}]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
mediasite_url = mobj.group('url')
catalog_name = mobj.group('catalog_name')
webpage = self._download_webpage(url, catalog_name)
catalog_id = self._search_regex(
r'CatalogId\s*:\s*["\'](%s)' % _ID_RE, webpage, 'catalog id')
return self.url_result(
'%s/Catalog/Full/%s' % (mediasite_url, catalog_id),
ie=MediasiteCatalogIE.ie_key(), video_id=catalog_id)

View File

@ -1,22 +1,32 @@
# coding: utf-8 # coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import base64
import time
import uuid
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_str from ..compat import (
from ..utils import int_or_none compat_HTTPError,
compat_str,
)
from ..utils import (
ExtractorError,
int_or_none,
)
class MGTVIE(InfoExtractor): class MGTVIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?mgtv\.com/(v|b)/(?:[^/]+/)*(?P<id>\d+)\.html' _VALID_URL = r'https?://(?:www\.)?mgtv\.com/(v|b)/(?:[^/]+/)*(?P<id>\d+)\.html'
IE_DESC = '芒果TV' IE_DESC = '芒果TV'
_GEO_COUNTRIES = ['CN']
_TESTS = [{ _TESTS = [{
'url': 'http://www.mgtv.com/v/1/290525/f/3116640.html', 'url': 'http://www.mgtv.com/v/1/290525/f/3116640.html',
'md5': 'b1ffc0fc163152acf6beaa81832c9ee7',
'info_dict': { 'info_dict': {
'id': '3116640', 'id': '3116640',
'ext': 'mp4', 'ext': 'mp4',
'title': '我是歌手第四季双年巅峰会:韩红李玟“双王”领军对抗', 'title': '我是歌手 第四季',
'description': '我是歌手第四季双年巅峰会', 'description': '我是歌手第四季双年巅峰会',
'duration': 7461, 'duration': 7461,
'thumbnail': r're:^https?://.*\.jpg$', 'thumbnail': r're:^https?://.*\.jpg$',
@ -28,16 +38,30 @@ class MGTVIE(InfoExtractor):
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
api_data = self._download_json( try:
'http://pcweb.api.mgtv.com/player/video', video_id, api_data = self._download_json(
query={'video_id': video_id}, 'https://pcweb.api.mgtv.com/player/video', video_id, query={
headers=self.geo_verification_headers())['data'] 'tk2': base64.urlsafe_b64encode(b'did=%s|pno=1030|ver=0.3.0301|clit=%d' % (compat_str(uuid.uuid4()).encode(), time.time()))[::-1],
'video_id': video_id,
}, headers=self.geo_verification_headers())['data']
except ExtractorError as e:
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
error = self._parse_json(e.cause.read().decode(), None)
if error.get('code') == 40005:
self.raise_geo_restricted(countries=self._GEO_COUNTRIES)
raise ExtractorError(error['msg'], expected=True)
raise
info = api_data['info'] info = api_data['info']
title = info['title'].strip() title = info['title'].strip()
stream_domain = api_data['stream_domain'][0] stream_data = self._download_json(
'https://pcweb.api.mgtv.com/player/getSource', video_id, query={
'pm2': api_data['atc']['pm2'],
'video_id': video_id,
}, headers=self.geo_verification_headers())['data']
stream_domain = stream_data['stream_domain'][0]
formats = [] formats = []
for idx, stream in enumerate(api_data['stream']): for idx, stream in enumerate(stream_data['stream']):
stream_path = stream.get('url') stream_path = stream.get('url')
if not stream_path: if not stream_path:
continue continue
@ -47,7 +71,7 @@ class MGTVIE(InfoExtractor):
format_url = format_data.get('info') format_url = format_data.get('info')
if not format_url: if not format_url:
continue continue
tbr = int_or_none(self._search_regex( tbr = int_or_none(stream.get('filebitrate') or self._search_regex(
r'_(\d+)_mp4/', format_url, 'tbr', default=None)) r'_(\d+)_mp4/', format_url, 'tbr', default=None))
formats.append({ formats.append({
'format_id': compat_str(tbr or idx), 'format_id': compat_str(tbr or idx),

View File

@ -1,12 +1,17 @@
# coding: utf-8 # coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import re import base64
import hashlib
from .common import InfoExtractor from .common import InfoExtractor
from ..aes import aes_cbc_decrypt
from ..utils import ( from ..utils import (
ExtractorError, bytes_to_intlist,
int_or_none, int_or_none,
intlist_to_bytes,
parse_codecs,
parse_duration,
) )
@ -14,7 +19,7 @@ class NewstubeIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?newstube\.ru/media/(?P<id>.+)' _VALID_URL = r'https?://(?:www\.)?newstube\.ru/media/(?P<id>.+)'
_TEST = { _TEST = {
'url': 'http://www.newstube.ru/media/telekanal-cnn-peremestil-gorod-slavyansk-v-krym', 'url': 'http://www.newstube.ru/media/telekanal-cnn-peremestil-gorod-slavyansk-v-krym',
'md5': '801eef0c2a9f4089fa04e4fe3533abdc', 'md5': '9d10320ad473444352f72f746ccb8b8c',
'info_dict': { 'info_dict': {
'id': '728e0ef2-e187-4012-bac0-5a081fdcb1f6', 'id': '728e0ef2-e187-4012-bac0-5a081fdcb1f6',
'ext': 'mp4', 'ext': 'mp4',
@ -25,84 +30,45 @@ class NewstubeIE(InfoExtractor):
} }
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) video_id = self._match_id(url)
video_id = mobj.group('id')
page = self._download_webpage(url, video_id, 'Downloading page') page = self._download_webpage(url, video_id)
title = self._html_search_meta(['og:title', 'twitter:title'], page, fatal=True)
video_guid = self._html_search_regex( video_guid = self._html_search_regex(
r'<meta property="og:video:url" content="https?://(?:www\.)?newstube\.ru/freshplayer\.swf\?guid=(?P<guid>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})', r'<meta\s+property="og:video(?::(?:(?:secure_)?url|iframe))?"\s+content="https?://(?:www\.)?newstube\.ru/embed/(?P<guid>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})',
page, 'video GUID') page, 'video GUID')
player = self._download_xml( enc_data = base64.b64decode(self._download_webpage(
'http://p.newstube.ru/v2/player.asmx/GetAutoPlayInfo6?state=&url=%s&sessionId=&id=%s&placement=profile&location=n2' % (url, video_guid), 'https://www.newstube.ru/embed/api/player/getsources2',
video_guid, 'Downloading player XML') video_guid, query={
'guid': video_guid,
def ns(s): 'ff': 3,
return s.replace('/', '/%(ns)s') % {'ns': '{http://app1.newstube.ru/N2SiteWS/player.asmx}'} }))
key = hashlib.pbkdf2_hmac(
error_message = player.find(ns('./ErrorMessage')) 'sha1', video_guid.replace('-', '').encode(), enc_data[:16], 1)[:16]
if error_message is not None: dec_data = aes_cbc_decrypt(
raise ExtractorError('%s returned error: %s' % (self.IE_NAME, error_message.text), expected=True) bytes_to_intlist(enc_data[32:]), bytes_to_intlist(key),
bytes_to_intlist(enc_data[16:32]))
session_id = player.find(ns('./SessionId')).text sources = self._parse_json(intlist_to_bytes(dec_data[:-dec_data[-1]]), video_guid)
media_info = player.find(ns('./Medias/MediaInfo'))
title = media_info.find(ns('./Name')).text
description = self._og_search_description(page)
thumbnail = media_info.find(ns('./KeyFrame')).text
duration = int(media_info.find(ns('./Duration')).text) / 1000.0
formats = [] formats = []
for source in sources:
for stream_info in media_info.findall(ns('./Streams/StreamInfo')): source_url = source.get('Src')
media_location = stream_info.find(ns('./MediaLocation')) if not source_url:
if media_location is None:
continue continue
height = int_or_none(source.get('Height'))
server = media_location.find(ns('./Server')).text f = {
app = media_location.find(ns('./App')).text 'format_id': 'http' + ('-%dp' % height if height else ''),
media_id = stream_info.find(ns('./Id')).text 'url': source_url,
name = stream_info.find(ns('./Name')).text 'width': int_or_none(source.get('Width')),
width = int(stream_info.find(ns('./Width')).text)
height = int(stream_info.find(ns('./Height')).text)
formats.append({
'url': 'rtmp://%s/%s' % (server, app),
'app': app,
'play_path': '01/%s' % video_guid.upper(),
'rtmp_conn': ['S:%s' % session_id, 'S:%s' % media_id, 'S:n2'],
'page_url': url,
'ext': 'flv',
'format_id': 'rtmp' + ('-%s' % name if name else ''),
'width': width,
'height': height, 'height': height,
}) }
source_type = source.get('Type')
sources_data = self._download_json( if source_type:
'http://www.newstube.ru/player2/getsources?guid=%s' % video_guid, f.update(parse_codecs(self._search_regex(
video_guid, fatal=False) r'codecs="([^"]+)"', source_type, 'codecs', fatal=False)))
if sources_data: formats.append(f)
for source in sources_data.get('Sources', []):
source_url = source.get('Src')
if not source_url:
continue
height = int_or_none(source.get('Height'))
f = {
'format_id': 'http' + ('-%dp' % height if height else ''),
'url': source_url,
'width': int_or_none(source.get('Width')),
'height': height,
}
source_type = source.get('Type')
if source_type:
mobj = re.search(r'codecs="([^,]+),\s*([^"]+)"', source_type)
if mobj:
vcodec, acodec = mobj.groups()
f.update({
'vcodec': vcodec,
'acodec': acodec,
})
formats.append(f)
self._check_formats(formats, video_guid) self._check_formats(formats, video_guid)
self._sort_formats(formats) self._sort_formats(formats)
@ -110,8 +76,8 @@ class NewstubeIE(InfoExtractor):
return { return {
'id': video_guid, 'id': video_guid,
'title': title, 'title': title,
'description': description, 'description': self._html_search_meta(['description', 'og:description'], page),
'thumbnail': thumbnail, 'thumbnail': self._html_search_meta(['og:image:secure_url', 'og:image', 'twitter:image'], page),
'duration': duration, 'duration': parse_duration(self._html_search_meta('duration', page)),
'formats': formats, 'formats': formats,
} }

View File

@ -1,54 +1,81 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ExtractorError
class NhkVodIE(InfoExtractor): class NhkVodIE(InfoExtractor):
_VALID_URL = r'https?://www3\.nhk\.or\.jp/nhkworld/en/(?:vod|ondemand)/(?P<id>[^/]+/[^/?#&]+)' _VALID_URL = r'https?://www3\.nhk\.or\.jp/nhkworld/(?P<lang>[a-z]{2})/ondemand/(?P<type>video|audio)/(?P<id>\d{7}|[a-z]+-\d{8}-\d+)'
# Content available only for a limited period of time. Visit
# https://www3.nhk.or.jp/nhkworld/en/ondemand/ for working samples.
_TESTS = [{ _TESTS = [{
# Videos available only for a limited period of time. Visit
# http://www3.nhk.or.jp/nhkworld/en/vod/ for working samples.
'url': 'http://www3.nhk.or.jp/nhkworld/en/vod/tokyofashion/20160815',
'info_dict': {
'id': 'A1bnNiNTE6nY3jLllS-BIISfcC_PpvF5',
'ext': 'flv',
'title': 'TOKYO FASHION EXPRESS - The Kimono as Global Fashion',
'description': 'md5:db338ee6ce8204f415b754782f819824',
'series': 'TOKYO FASHION EXPRESS',
'episode': 'The Kimono as Global Fashion',
},
'skip': 'Videos available only for a limited period of time',
}, {
'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/video/2015173/', 'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/video/2015173/',
'only_matching': True, 'only_matching': True,
}, {
'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/audio/plugin-20190404-1/',
'only_matching': True,
}, {
'url': 'https://www3.nhk.or.jp/nhkworld/fr/ondemand/audio/plugin-20190404-1/',
'only_matching': True,
}] }]
_API_URL = 'http://api.nhk.or.jp/nhkworld/vodesdlist/v1/all/all/all.json?apikey=EJfK8jdS57GqlupFgAfAAwr573q01y6k' _API_URL_TEMPLATE = 'https://api.nhk.or.jp/nhkworld/%sodesdlist/v7/episode/%s/%s/all%s.json'
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) lang, m_type, episode_id = re.match(self._VALID_URL, url).groups()
if episode_id.isdigit():
data = self._download_json(self._API_URL, video_id) episode_id = episode_id[:4] + '-' + episode_id[4:]
try:
episode = next(
e for e in data['data']['episodes']
if e.get('url') and video_id in e['url'])
except StopIteration:
raise ExtractorError('Unable to find episode')
embed_code = episode['vod_id']
is_video = m_type == 'video'
episode = self._download_json(
self._API_URL_TEMPLATE % ('v' if is_video else 'r', episode_id, lang, '/all' if is_video else ''),
episode_id, query={'apikey': 'EJfK8jdS57GqlupFgAfAAwr573q01y6k'})['data']['episodes'][0]
title = episode.get('sub_title_clean') or episode['sub_title'] title = episode.get('sub_title_clean') or episode['sub_title']
description = episode.get('description_clean') or episode.get('description')
series = episode.get('title_clean') or episode.get('title')
return { def get_clean_field(key):
'_type': 'url_transparent', return episode.get(key + '_clean') or episode.get(key)
'ie_key': 'Ooyala',
'url': 'ooyala:%s' % embed_code, series = get_clean_field('title')
thumbnails = []
for s, w, h in [('', 640, 360), ('_l', 1280, 720)]:
img_path = episode.get('image' + s)
if not img_path:
continue
thumbnails.append({
'id': '%dp' % h,
'height': h,
'width': w,
'url': 'https://www3.nhk.or.jp' + img_path,
})
info = {
'id': episode_id + '-' + lang,
'title': '%s - %s' % (series, title) if series and title else title, 'title': '%s - %s' % (series, title) if series and title else title,
'description': description, 'description': get_clean_field('description'),
'thumbnails': thumbnails,
'series': series, 'series': series,
'episode': title, 'episode': title,
} }
if is_video:
info.update({
'_type': 'url_transparent',
'ie_key': 'Ooyala',
'url': 'ooyala:' + episode['vod_id'],
})
else:
audio = episode['audio']
audio_path = audio['audio']
info['formats'] = self._extract_m3u8_formats(
'https://nhks-vh.akamaihd.net/i%s/master.m3u8' % audio_path,
episode_id, 'm4a', m3u8_id='hls', fatal=False)
for proto in ('rtmpt', 'rtmp'):
info['formats'].append({
'ext': 'flv',
'format_id': proto,
'url': '%s://flv.nhk.or.jp/ondemand/mp4:flv%s' % (proto, audio_path),
'vcodec': 'none',
})
for f in info['formats']:
f['language'] = lang
return info

View File

@ -0,0 +1,30 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
class NRLTVIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?nrl\.com/tv(/[^/]+)*/(?P<id>[^/?&#]+)'
_TEST = {
'url': 'https://www.nrl.com/tv/news/match-highlights-titans-v-knights-862805/',
'info_dict': {
'id': 'YyNnFuaDE6kPJqlDhG4CGQ_w89mKTau4',
'ext': 'mp4',
'title': 'Match Highlights: Titans v Knights',
},
'params': {
# m3u8 download
'skip_download': True,
'format': 'bestvideo',
},
}
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
q_data = self._parse_json(self._search_regex(
r"(?s)q-data='({.+?})'", webpage, 'player data'), display_id)
ooyala_id = q_data['videoId']
return self.url_result(
'ooyala:' + ooyala_id, 'Ooyala', ooyala_id, q_data.get('title'))

View File

@ -0,0 +1,49 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
js_to_json,
smuggle_url,
)
class NTVCoJpCUIE(InfoExtractor):
IE_NAME = 'cu.ntv.co.jp'
IE_DESC = 'Nippon Television Network'
_VALID_URL = r'https?://cu\.ntv\.co\.jp/(?!program)(?P<id>[^/?&#]+)'
_TEST = {
'url': 'https://cu.ntv.co.jp/televiva-chill-gohan_181031/',
'info_dict': {
'id': '5978891207001',
'ext': 'mp4',
'title': '桜エビと炒り卵がポイント! 「中華風 エビチリおにぎり」──『美虎』五十嵐美幸',
'upload_date': '20181213',
'description': 'md5:211b52f4fd60f3e0e72b68b0c6ba52a9',
'uploader_id': '3855502814001',
'timestamp': 1544669941,
},
'params': {
# m3u8 download
'skip_download': True,
},
}
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/%s/default_default/index.html?videoId=%s'
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
player_config = self._parse_json(self._search_regex(
r'(?s)PLAYER_CONFIG\s*=\s*({.+?})',
webpage, 'player config'), display_id, js_to_json)
video_id = player_config['videoId']
account_id = player_config.get('account') or '3855502814001'
return {
'_type': 'url_transparent',
'id': video_id,
'display_id': display_id,
'title': self._search_regex(r'<h1[^>]+class="title"[^>]*>([^<]+)', webpage, 'title').strip(),
'description': self._html_search_meta(['description', 'og:description'], webpage),
'url': smuggle_url(self.BRIGHTCOVE_URL_TEMPLATE % (account_id, video_id), {'geo_countries': ['JP']}),
'ie_key': 'BrightcoveNew',
}

View File

@ -36,7 +36,7 @@ class OoyalaBaseIE(InfoExtractor):
'domain': domain, 'domain': domain,
'supportedFormats': supportedformats or 'mp4,rtmp,m3u8,hds,dash,smooth', 'supportedFormats': supportedformats or 'mp4,rtmp,m3u8,hds,dash,smooth',
'embedToken': embed_token, 'embedToken': embed_token,
}), video_id) }), video_id, headers=self.geo_verification_headers())
cur_auth_data = auth_data['authorization_data'][embed_code] cur_auth_data = auth_data['authorization_data'][embed_code]

View File

@ -3,6 +3,7 @@ from __future__ import unicode_literals
import json import json
import os import os
import random
import re import re
import subprocess import subprocess
import tempfile import tempfile
@ -243,7 +244,7 @@ class PhantomJSwrapper(object):
class OpenloadIE(InfoExtractor): class OpenloadIE(InfoExtractor):
_DOMAINS = r'(?:openload\.(?:co|io|link|pw)|oload\.(?:tv|stream|site|xyz|win|download|cloud|cc|icu|fun|club|info|pw|live|space))' _DOMAINS = r'(?:openload\.(?:co|io|link|pw)|oload\.(?:tv|stream|site|xyz|win|download|cloud|cc|icu|fun|club|info|pw|live|space|services)|oladblock\.(?:services|xyz|me)|openloed\.co)'
_VALID_URL = r'''(?x) _VALID_URL = r'''(?x)
https?:// https?://
(?P<host> (?P<host>
@ -350,9 +351,24 @@ class OpenloadIE(InfoExtractor):
}, { }, {
'url': 'https://oload.space/f/IY4eZSst3u8/', 'url': 'https://oload.space/f/IY4eZSst3u8/',
'only_matching': True, 'only_matching': True,
}, {
'url': 'https://oload.services/embed/bs1NWj1dCag/',
'only_matching': True,
}, {
'url': 'https://oladblock.services/f/b8NWEgkqNLI/',
'only_matching': True,
}, {
'url': 'https://oladblock.xyz/f/b8NWEgkqNLI/',
'only_matching': True,
}, {
'url': 'https://oladblock.me/f/b8NWEgkqNLI/',
'only_matching': True,
}, {
'url': 'https://openloed.co/f/b8NWEgkqNLI/',
'only_matching': True,
}] }]
_USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36' _USER_AGENT_TPL = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/{major}.0.{build}.{patch} Safari/537.36'
@staticmethod @staticmethod
def _extract_urls(webpage): def _extract_urls(webpage):
@ -367,7 +383,11 @@ class OpenloadIE(InfoExtractor):
url_pattern = 'https://%s/%%s/%s/' % (host, video_id) url_pattern = 'https://%s/%%s/%s/' % (host, video_id)
headers = { headers = {
'User-Agent': self._USER_AGENT, 'User-Agent': self._USER_AGENT_TPL % {
'major': random.randint(63, 73),
'build': random.randint(3239, 3683),
'patch': random.randint(0, 100),
},
} }
for path in ('embed', 'f'): for path in ('embed', 'f'):

View File

@ -0,0 +1,217 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..compat import (
compat_b64decode,
compat_str,
)
from ..utils import (
clean_html,
ExtractorError,
int_or_none,
str_or_none,
try_get,
url_or_none,
urlencode_postdata,
urljoin,
)
class PlatziIE(InfoExtractor):
_VALID_URL = r'''(?x)
https?://
(?:
platzi\.com/clases| # es version
courses\.platzi\.com/classes # en version
)/[^/]+/(?P<id>\d+)-[^/?\#&]+
'''
_LOGIN_URL = 'https://platzi.com/login/'
_NETRC_MACHINE = 'platzi'
_TESTS = [{
'url': 'https://platzi.com/clases/1311-next-js/12074-creando-nuestra-primera-pagina/',
'md5': '8f56448241005b561c10f11a595b37e3',
'info_dict': {
'id': '12074',
'ext': 'mp4',
'title': 'Creando nuestra primera página',
'description': 'md5:4c866e45034fc76412fbf6e60ae008bc',
'duration': 420,
},
'skip': 'Requires platzi account credentials',
}, {
'url': 'https://courses.platzi.com/classes/1367-communication-codestream/13430-background/',
'info_dict': {
'id': '13430',
'ext': 'mp4',
'title': 'Background',
'description': 'md5:49c83c09404b15e6e71defaf87f6b305',
'duration': 360,
},
'skip': 'Requires platzi account credentials',
'params': {
'skip_download': True,
},
}]
def _real_initialize(self):
self._login()
def _login(self):
username, password = self._get_login_info()
if username is None:
return
login_page = self._download_webpage(
self._LOGIN_URL, None, 'Downloading login page')
login_form = self._hidden_inputs(login_page)
login_form.update({
'email': username,
'password': password,
})
urlh = self._request_webpage(
self._LOGIN_URL, None, 'Logging in',
data=urlencode_postdata(login_form),
headers={'Referer': self._LOGIN_URL})
# login succeeded
if 'platzi.com/login' not in compat_str(urlh.geturl()):
return
login_error = self._webpage_read_content(
urlh, self._LOGIN_URL, None, 'Downloading login error page')
login = self._parse_json(
self._search_regex(
r'login\s*=\s*({.+?})(?:\s*;|\s*</script)', login_error, 'login'),
None)
for kind in ('error', 'password', 'nonFields'):
error = str_or_none(login.get('%sError' % kind))
if error:
raise ExtractorError(
'Unable to login: %s' % error, expected=True)
raise ExtractorError('Unable to log in')
def _real_extract(self, url):
lecture_id = self._match_id(url)
webpage = self._download_webpage(url, lecture_id)
data = self._parse_json(
self._search_regex(
r'client_data\s*=\s*({.+?})\s*;', webpage, 'client data'),
lecture_id)
material = data['initialState']['material']
desc = material['description']
title = desc['title']
formats = []
for server_id, server in material['videos'].items():
if not isinstance(server, dict):
continue
for format_id in ('hls', 'dash'):
format_url = url_or_none(server.get(format_id))
if not format_url:
continue
if format_id == 'hls':
formats.extend(self._extract_m3u8_formats(
format_url, lecture_id, 'mp4',
entry_protocol='m3u8_native', m3u8_id=format_id,
note='Downloading %s m3u8 information' % server_id,
fatal=False))
elif format_id == 'dash':
formats.extend(self._extract_mpd_formats(
format_url, lecture_id, mpd_id=format_id,
note='Downloading %s MPD manifest' % server_id,
fatal=False))
self._sort_formats(formats)
content = str_or_none(desc.get('content'))
description = (clean_html(compat_b64decode(content).decode('utf-8'))
if content else None)
duration = int_or_none(material.get('duration'), invscale=60)
return {
'id': lecture_id,
'title': title,
'description': description,
'duration': duration,
'formats': formats,
}
class PlatziCourseIE(InfoExtractor):
_VALID_URL = r'''(?x)
https?://
(?:
platzi\.com/clases| # es version
courses\.platzi\.com/classes # en version
)/(?P<id>[^/?\#&]+)
'''
_TESTS = [{
'url': 'https://platzi.com/clases/next-js/',
'info_dict': {
'id': '1311',
'title': 'Curso de Next.js',
},
'playlist_count': 22,
}, {
'url': 'https://courses.platzi.com/classes/communication-codestream/',
'info_dict': {
'id': '1367',
'title': 'Codestream Course',
},
'playlist_count': 14,
}]
@classmethod
def suitable(cls, url):
return False if PlatziIE.suitable(url) else super(PlatziCourseIE, cls).suitable(url)
def _real_extract(self, url):
course_name = self._match_id(url)
webpage = self._download_webpage(url, course_name)
props = self._parse_json(
self._search_regex(r'data\s*=\s*({.+?})\s*;', webpage, 'data'),
course_name)['initialProps']
entries = []
for chapter_num, chapter in enumerate(props['concepts'], 1):
if not isinstance(chapter, dict):
continue
materials = chapter.get('materials')
if not materials or not isinstance(materials, list):
continue
chapter_title = chapter.get('title')
chapter_id = str_or_none(chapter.get('id'))
for material in materials:
if not isinstance(material, dict):
continue
if material.get('material_type') != 'video':
continue
video_url = urljoin(url, material.get('url'))
if not video_url:
continue
entries.append({
'_type': 'url_transparent',
'url': video_url,
'title': str_or_none(material.get('name')),
'id': str_or_none(material.get('id')),
'ie_key': PlatziIE.ie_key(),
'chapter': chapter_title,
'chapter_number': chapter_num,
'chapter_id': chapter_id,
})
course_id = compat_str(try_get(props, lambda x: x['course']['id']))
course_title = try_get(props, lambda x: x['course']['name'], compat_str)
return self.playlist_result(entries, course_id, course_title)

View File

@ -21,7 +21,7 @@ from ..utils import (
class RTL2IE(InfoExtractor): class RTL2IE(InfoExtractor):
IE_NAME = 'rtl2' IE_NAME = 'rtl2'
_VALID_URL = r'http?://(?:www\.)?rtl2\.de/[^?#]*?/(?P<id>[^?#/]*?)(?:$|/(?:$|[?#]))' _VALID_URL = r'https?://(?:www\.)?rtl2\.de/sendung/[^/]+/(?:video/(?P<vico_id>\d+)[^/]+/(?P<vivi_id>\d+)-|folge/)(?P<id>[^/?#]+)'
_TESTS = [{ _TESTS = [{
'url': 'http://www.rtl2.de/sendung/grip-das-motormagazin/folge/folge-203-0', 'url': 'http://www.rtl2.de/sendung/grip-das-motormagazin/folge/folge-203-0',
'info_dict': { 'info_dict': {
@ -34,10 +34,11 @@ class RTL2IE(InfoExtractor):
# rtmp download # rtmp download
'skip_download': True, 'skip_download': True,
}, },
'expected_warnings': ['Unable to download f4m manifest', 'Failed to download m3u8 information'],
}, { }, {
'url': 'http://www.rtl2.de/sendung/koeln-50667/video/5512-anna/21040-anna-erwischt-alex/', 'url': 'http://www.rtl2.de/sendung/koeln-50667/video/5512-anna/21040-anna-erwischt-alex/',
'info_dict': { 'info_dict': {
'id': '21040-anna-erwischt-alex', 'id': 'anna-erwischt-alex',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Anna erwischt Alex!', 'title': 'Anna erwischt Alex!',
'description': 'Anna nimmt ihrem Vater nicht ab, dass er nicht spielt. Und tatsächlich erwischt sie ihn auf frischer Tat.' 'description': 'Anna nimmt ihrem Vater nicht ab, dass er nicht spielt. Und tatsächlich erwischt sie ihn auf frischer Tat.'
@ -46,31 +47,29 @@ class RTL2IE(InfoExtractor):
# rtmp download # rtmp download
'skip_download': True, 'skip_download': True,
}, },
'expected_warnings': ['Unable to download f4m manifest', 'Failed to download m3u8 information'],
}] }]
def _real_extract(self, url): def _real_extract(self, url):
# Some rtl2 urls have no slash at the end, so append it. vico_id, vivi_id, display_id = re.match(self._VALID_URL, url).groups()
if not url.endswith('/'): if not vico_id:
url += '/' webpage = self._download_webpage(url, display_id)
video_id = self._match_id(url) mobj = re.search(
webpage = self._download_webpage(url, video_id) r'data-collection="(?P<vico_id>\d+)"[^>]+data-video="(?P<vivi_id>\d+)"',
webpage)
mobj = re.search( if mobj:
r'<div[^>]+data-collection="(?P<vico_id>\d+)"[^>]+data-video="(?P<vivi_id>\d+)"', vico_id = mobj.group('vico_id')
webpage) vivi_id = mobj.group('vivi_id')
if mobj: else:
vico_id = mobj.group('vico_id') vico_id = self._html_search_regex(
vivi_id = mobj.group('vivi_id') r'vico_id\s*:\s*([0-9]+)', webpage, 'vico_id')
else: vivi_id = self._html_search_regex(
vico_id = self._html_search_regex( r'vivi_id\s*:\s*([0-9]+)', webpage, 'vivi_id')
r'vico_id\s*:\s*([0-9]+)', webpage, 'vico_id')
vivi_id = self._html_search_regex(
r'vivi_id\s*:\s*([0-9]+)', webpage, 'vivi_id')
info = self._download_json( info = self._download_json(
'http://www.rtl2.de/sites/default/modules/rtl2/mediathek/php/get_video_jw.php', 'https://service.rtl2.de/api-player-vipo/video.php',
video_id, query={ display_id, query={
'vico_id': vico_id, 'vico_id': vico_id,
'vivi_id': vivi_id, 'vivi_id': vivi_id,
}) })
@ -89,7 +88,7 @@ class RTL2IE(InfoExtractor):
'format_id': 'rtmp', 'format_id': 'rtmp',
'url': rtmp_url, 'url': rtmp_url,
'play_path': stream_url, 'play_path': stream_url,
'player_url': 'http://www.rtl2.de/flashplayer/vipo_player.swf', 'player_url': 'https://www.rtl2.de/sites/default/modules/rtl2/jwplayer/jwplayer-7.6.0/jwplayer.flash.swf',
'page_url': url, 'page_url': url,
'flash_version': 'LNX 11,2,202,429', 'flash_version': 'LNX 11,2,202,429',
'rtmp_conn': rtmp_conn, 'rtmp_conn': rtmp_conn,
@ -99,12 +98,12 @@ class RTL2IE(InfoExtractor):
m3u8_url = video_info.get('streamurl_hls') m3u8_url = video_info.get('streamurl_hls')
if m3u8_url: if m3u8_url:
formats.extend(self._extract_akamai_formats(m3u8_url, video_id)) formats.extend(self._extract_akamai_formats(m3u8_url, display_id))
self._sort_formats(formats) self._sort_formats(formats)
return { return {
'id': video_id, 'id': display_id,
'title': title, 'title': title,
'thumbnail': video_info.get('image'), 'thumbnail': video_info.get('image'),
'description': video_info.get('beschreibung'), 'description': video_info.get('beschreibung'),

View File

@ -59,6 +59,20 @@ class RuutuIE(InfoExtractor):
'url': 'http://www.ruutu.fi/video/3193728', 'url': 'http://www.ruutu.fi/video/3193728',
'only_matching': True, 'only_matching': True,
}, },
{
# audio podcast
'url': 'https://www.supla.fi/supla/3382410',
'md5': 'b9d7155fed37b2ebf6021d74c4b8e908',
'info_dict': {
'id': '3382410',
'ext': 'mp3',
'title': 'Mikä ihmeen poltergeist?',
'description': 'md5:bbb6963df17dfd0ecd9eb9a61bf14b52',
'thumbnail': r're:^https?://.*\.jpg$',
'age_limit': 0,
},
'expected_warnings': ['HTTP Error 502: Bad Gateway'],
}
] ]
def _real_extract(self, url): def _real_extract(self, url):
@ -94,6 +108,12 @@ class RuutuIE(InfoExtractor):
continue continue
formats.extend(self._extract_mpd_formats( formats.extend(self._extract_mpd_formats(
video_url, video_id, mpd_id='dash', fatal=False)) video_url, video_id, mpd_id='dash', fatal=False))
elif ext == 'mp3' or child.tag == 'AudioMediaFile':
formats.append({
'format_id': 'audio',
'url': video_url,
'vcodec': 'none',
})
else: else:
proto = compat_urllib_parse_urlparse(video_url).scheme proto = compat_urllib_parse_urlparse(video_url).scheme
if not child.tag.startswith('HTTP') and proto != 'rtmp': if not child.tag.startswith('HTTP') and proto != 'rtmp':

View File

@ -15,7 +15,12 @@ from ..compat import (
) )
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
float_or_none,
int_or_none, int_or_none,
KNOWN_EXTENSIONS,
merge_dicts,
mimetype2ext,
str_or_none,
try_get, try_get,
unified_timestamp, unified_timestamp,
update_url_query, update_url_query,
@ -57,7 +62,7 @@ class SoundcloudIE(InfoExtractor):
'uploader': 'E.T. ExTerrestrial Music', 'uploader': 'E.T. ExTerrestrial Music',
'timestamp': 1349920598, 'timestamp': 1349920598,
'upload_date': '20121011', 'upload_date': '20121011',
'duration': 143, 'duration': 143.216,
'license': 'all-rights-reserved', 'license': 'all-rights-reserved',
'view_count': int, 'view_count': int,
'like_count': int, 'like_count': int,
@ -100,7 +105,7 @@ class SoundcloudIE(InfoExtractor):
'uploader': 'jaimeMF', 'uploader': 'jaimeMF',
'timestamp': 1386604920, 'timestamp': 1386604920,
'upload_date': '20131209', 'upload_date': '20131209',
'duration': 9, 'duration': 9.927,
'license': 'all-rights-reserved', 'license': 'all-rights-reserved',
'view_count': int, 'view_count': int,
'like_count': int, 'like_count': int,
@ -120,7 +125,7 @@ class SoundcloudIE(InfoExtractor):
'uploader': 'jaimeMF', 'uploader': 'jaimeMF',
'timestamp': 1386604920, 'timestamp': 1386604920,
'upload_date': '20131209', 'upload_date': '20131209',
'duration': 9, 'duration': 9.927,
'license': 'all-rights-reserved', 'license': 'all-rights-reserved',
'view_count': int, 'view_count': int,
'like_count': int, 'like_count': int,
@ -140,7 +145,7 @@ class SoundcloudIE(InfoExtractor):
'uploader': 'oddsamples', 'uploader': 'oddsamples',
'timestamp': 1389232924, 'timestamp': 1389232924,
'upload_date': '20140109', 'upload_date': '20140109',
'duration': 17, 'duration': 17.346,
'license': 'cc-by-sa', 'license': 'cc-by-sa',
'view_count': int, 'view_count': int,
'like_count': int, 'like_count': int,
@ -160,7 +165,7 @@ class SoundcloudIE(InfoExtractor):
'uploader': 'Ori Uplift Music', 'uploader': 'Ori Uplift Music',
'timestamp': 1504206263, 'timestamp': 1504206263,
'upload_date': '20170831', 'upload_date': '20170831',
'duration': 7449, 'duration': 7449.096,
'license': 'all-rights-reserved', 'license': 'all-rights-reserved',
'view_count': int, 'view_count': int,
'like_count': int, 'like_count': int,
@ -180,7 +185,7 @@ class SoundcloudIE(InfoExtractor):
'uploader': 'garyvee', 'uploader': 'garyvee',
'timestamp': 1488152409, 'timestamp': 1488152409,
'upload_date': '20170226', 'upload_date': '20170226',
'duration': 207, 'duration': 207.012,
'thumbnail': r're:https?://.*\.jpg', 'thumbnail': r're:https?://.*\.jpg',
'license': 'all-rights-reserved', 'license': 'all-rights-reserved',
'view_count': int, 'view_count': int,
@ -192,9 +197,31 @@ class SoundcloudIE(InfoExtractor):
'skip_download': True, 'skip_download': True,
}, },
}, },
# not avaialble via api.soundcloud.com/i1/tracks/id/streams
{
'url': 'https://soundcloud.com/giovannisarani/mezzo-valzer',
'md5': 'e22aecd2bc88e0e4e432d7dcc0a1abf7',
'info_dict': {
'id': '583011102',
'ext': 'mp3',
'title': 'Mezzo Valzer',
'description': 'md5:4138d582f81866a530317bae316e8b61',
'uploader': 'Giovanni Sarani',
'timestamp': 1551394171,
'upload_date': '20190228',
'duration': 180.157,
'thumbnail': r're:https?://.*\.jpg',
'license': 'all-rights-reserved',
'view_count': int,
'like_count': int,
'comment_count': int,
'repost_count': int,
},
'expected_warnings': ['Unable to download JSON metadata'],
}
] ]
_CLIENT_ID = 'NmW1FlPaiL94ueEu7oziOWjYEzZzQDcK' _CLIENT_ID = 'FweeGBOOEOYJWLJN3oEyToGLKhmSz0I7'
@staticmethod @staticmethod
def _extract_urls(webpage): def _extract_urls(webpage):
@ -202,10 +229,6 @@ class SoundcloudIE(InfoExtractor):
r'<iframe[^>]+src=(["\'])(?P<url>(?:https?://)?(?:w\.)?soundcloud\.com/player.+?)\1', r'<iframe[^>]+src=(["\'])(?P<url>(?:https?://)?(?:w\.)?soundcloud\.com/player.+?)\1',
webpage)] webpage)]
def report_resolve(self, video_id):
"""Report information extraction."""
self.to_screen('%s: Resolving id' % video_id)
@classmethod @classmethod
def _resolv_url(cls, url): def _resolv_url(cls, url):
return 'https://api.soundcloud.com/resolve.json?url=' + url + '&client_id=' + cls._CLIENT_ID return 'https://api.soundcloud.com/resolve.json?url=' + url + '&client_id=' + cls._CLIENT_ID
@ -224,6 +247,10 @@ class SoundcloudIE(InfoExtractor):
def extract_count(key): def extract_count(key):
return int_or_none(info.get('%s_count' % key)) return int_or_none(info.get('%s_count' % key))
like_count = extract_count('favoritings')
if like_count is None:
like_count = extract_count('likes')
result = { result = {
'id': track_id, 'id': track_id,
'uploader': username, 'uploader': username,
@ -231,15 +258,17 @@ class SoundcloudIE(InfoExtractor):
'title': title, 'title': title,
'description': info.get('description'), 'description': info.get('description'),
'thumbnail': thumbnail, 'thumbnail': thumbnail,
'duration': int_or_none(info.get('duration'), 1000), 'duration': float_or_none(info.get('duration'), 1000),
'webpage_url': info.get('permalink_url'), 'webpage_url': info.get('permalink_url'),
'license': info.get('license'), 'license': info.get('license'),
'view_count': extract_count('playback'), 'view_count': extract_count('playback'),
'like_count': extract_count('favoritings'), 'like_count': like_count,
'comment_count': extract_count('comment'), 'comment_count': extract_count('comment'),
'repost_count': extract_count('reposts'), 'repost_count': extract_count('reposts'),
'genre': info.get('genre'), 'genre': info.get('genre'),
} }
format_urls = set()
formats = [] formats = []
query = {'client_id': self._CLIENT_ID} query = {'client_id': self._CLIENT_ID}
if secret_token is not None: if secret_token is not None:
@ -248,6 +277,7 @@ class SoundcloudIE(InfoExtractor):
# We can build a direct link to the song # We can build a direct link to the song
format_url = update_url_query( format_url = update_url_query(
'https://api.soundcloud.com/tracks/%s/download' % track_id, query) 'https://api.soundcloud.com/tracks/%s/download' % track_id, query)
format_urls.add(format_url)
formats.append({ formats.append({
'format_id': 'download', 'format_id': 'download',
'ext': info.get('original_format', 'mp3'), 'ext': info.get('original_format', 'mp3'),
@ -256,44 +286,91 @@ class SoundcloudIE(InfoExtractor):
'preference': 10, 'preference': 10,
}) })
# We have to retrieve the url # Old API, does not work for some tracks (e.g.
# https://soundcloud.com/giovannisarani/mezzo-valzer)
format_dict = self._download_json( format_dict = self._download_json(
'https://api.soundcloud.com/i1/tracks/%s/streams' % track_id, 'https://api.soundcloud.com/i1/tracks/%s/streams' % track_id,
track_id, 'Downloading track url', query=query) track_id, 'Downloading track url', query=query, fatal=False)
for key, stream_url in format_dict.items(): if format_dict:
ext, abr = 'mp3', None for key, stream_url in format_dict.items():
mobj = re.search(r'_([^_]+)_(\d+)_url', key) if stream_url in format_urls:
if mobj: continue
ext, abr = mobj.groups() format_urls.add(stream_url)
abr = int(abr) ext, abr = 'mp3', None
if key.startswith('http'): mobj = re.search(r'_([^_]+)_(\d+)_url', key)
stream_formats = [{ if mobj:
'format_id': key, ext, abr = mobj.groups()
'ext': ext, abr = int(abr)
'url': stream_url, if key.startswith('http'):
}] stream_formats = [{
elif key.startswith('rtmp'): 'format_id': key,
# The url doesn't have an rtmp app, we have to extract the playpath 'ext': ext,
url, path = stream_url.split('mp3:', 1) 'url': stream_url,
stream_formats = [{ }]
'format_id': key, elif key.startswith('rtmp'):
'url': url, # The url doesn't have an rtmp app, we have to extract the playpath
'play_path': 'mp3:' + path, url, path = stream_url.split('mp3:', 1)
'ext': 'flv', stream_formats = [{
}] 'format_id': key,
elif key.startswith('hls'): 'url': url,
stream_formats = self._extract_m3u8_formats( 'play_path': 'mp3:' + path,
stream_url, track_id, ext, entry_protocol='m3u8_native', 'ext': 'flv',
m3u8_id=key, fatal=False) }]
else: elif key.startswith('hls'):
stream_formats = self._extract_m3u8_formats(
stream_url, track_id, ext, entry_protocol='m3u8_native',
m3u8_id=key, fatal=False)
else:
continue
if abr:
for f in stream_formats:
f['abr'] = abr
formats.extend(stream_formats)
# New API
transcodings = try_get(
info, lambda x: x['media']['transcodings'], list) or []
for t in transcodings:
if not isinstance(t, dict):
continue continue
format_url = url_or_none(t.get('url'))
if abr: if not format_url:
for f in stream_formats: continue
f['abr'] = abr stream = self._download_json(
update_url_query(format_url, query), track_id, fatal=False)
formats.extend(stream_formats) if not isinstance(stream, dict):
continue
stream_url = url_or_none(stream.get('url'))
if not stream_url:
continue
if stream_url in format_urls:
continue
format_urls.add(stream_url)
protocol = try_get(t, lambda x: x['format']['protocol'], compat_str)
if protocol != 'hls' and '/hls' in format_url:
protocol = 'hls'
ext = None
preset = str_or_none(t.get('preset'))
if preset:
ext = preset.split('_')[0]
if ext not in KNOWN_EXTENSIONS:
mimetype = try_get(
t, lambda x: x['format']['mime_type'], compat_str)
ext = mimetype2ext(mimetype) or 'mp3'
format_id_list = []
if protocol:
format_id_list.append(protocol)
format_id_list.append(ext)
format_id = '_'.join(format_id_list)
formats.append({
'url': stream_url,
'format_id': format_id,
'ext': ext,
'protocol': 'm3u8_native' if protocol == 'hls' else 'http',
})
if not formats: if not formats:
# We fallback to the stream_url in the original info, this # We fallback to the stream_url in the original info, this
@ -303,11 +380,11 @@ class SoundcloudIE(InfoExtractor):
'url': update_url_query(info['stream_url'], query), 'url': update_url_query(info['stream_url'], query),
'ext': 'mp3', 'ext': 'mp3',
}) })
self._check_formats(formats, track_id)
for f in formats: for f in formats:
f['vcodec'] = 'none' f['vcodec'] = 'none'
self._check_formats(formats, track_id)
self._sort_formats(formats) self._sort_formats(formats)
result['formats'] = formats result['formats'] = formats
@ -319,6 +396,7 @@ class SoundcloudIE(InfoExtractor):
raise ExtractorError('Invalid URL: %s' % url) raise ExtractorError('Invalid URL: %s' % url)
track_id = mobj.group('track_id') track_id = mobj.group('track_id')
new_info = {}
if track_id is not None: if track_id is not None:
info_json_url = 'https://api.soundcloud.com/tracks/' + track_id + '.json?client_id=' + self._CLIENT_ID info_json_url = 'https://api.soundcloud.com/tracks/' + track_id + '.json?client_id=' + self._CLIENT_ID
@ -344,13 +422,31 @@ class SoundcloudIE(InfoExtractor):
if token: if token:
resolve_title += '/%s' % token resolve_title += '/%s' % token
self.report_resolve(full_title) webpage = self._download_webpage(url, full_title, fatal=False)
if webpage:
entries = self._parse_json(
self._search_regex(
r'var\s+c\s*=\s*(\[.+?\])\s*,\s*o\s*=Date\b', webpage,
'data', default='[]'), full_title, fatal=False)
if entries:
for e in entries:
if not isinstance(e, dict):
continue
if e.get('id') != 67:
continue
data = try_get(e, lambda x: x['data'][0], dict)
if data:
new_info = data
break
info_json_url = self._resolv_url(
'https://soundcloud.com/%s' % resolve_title)
url = 'https://soundcloud.com/%s' % resolve_title # Contains some additional info missing from new_info
info_json_url = self._resolv_url(url) info = self._download_json(
info = self._download_json(info_json_url, full_title, 'Downloading info JSON') info_json_url, full_title, 'Downloading info JSON')
return self._extract_info_dict(info, full_title, secret_token=token) return self._extract_info_dict(
merge_dicts(info, new_info), full_title, secret_token=token)
class SoundcloudPlaylistBaseIE(SoundcloudIE): class SoundcloudPlaylistBaseIE(SoundcloudIE):
@ -396,8 +492,6 @@ class SoundcloudSetIE(SoundcloudPlaylistBaseIE):
full_title += '/' + token full_title += '/' + token
url += '/' + token url += '/' + token
self.report_resolve(full_title)
resolv_url = self._resolv_url(url) resolv_url = self._resolv_url(url)
info = self._download_json(resolv_url, full_title) info = self._download_json(resolv_url, full_title)

View File

@ -14,7 +14,7 @@ from ..utils import (
class StreamangoIE(InfoExtractor): class StreamangoIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?(?:streamango\.com|fruithosts\.net)/(?:f|embed)/(?P<id>[^/?#&]+)' _VALID_URL = r'https?://(?:www\.)?(?:streamango\.com|fruithosts\.net|streamcherry\.com)/(?:f|embed)/(?P<id>[^/?#&]+)'
_TESTS = [{ _TESTS = [{
'url': 'https://streamango.com/f/clapasobsptpkdfe/20170315_150006_mp4', 'url': 'https://streamango.com/f/clapasobsptpkdfe/20170315_150006_mp4',
'md5': 'e992787515a182f55e38fc97588d802a', 'md5': 'e992787515a182f55e38fc97588d802a',
@ -41,6 +41,9 @@ class StreamangoIE(InfoExtractor):
}, { }, {
'url': 'https://fruithosts.net/f/mreodparcdcmspsm/w1f1_r4lph_2018_brrs_720p_latino_mp4', 'url': 'https://fruithosts.net/f/mreodparcdcmspsm/w1f1_r4lph_2018_brrs_720p_latino_mp4',
'only_matching': True, 'only_matching': True,
}, {
'url': 'https://streamcherry.com/f/clapasobsptpkdfe/',
'only_matching': True,
}] }]
def _real_extract(self, url): def _real_extract(self, url):

View File

@ -0,0 +1,94 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..compat import (
compat_parse_qs,
compat_urllib_parse_urlparse
)
from ..utils import (
extract_attributes,
float_or_none,
int_or_none,
str_or_none,
)
class STVPlayerIE(InfoExtractor):
IE_NAME = 'stv:player'
_VALID_URL = r'https?://player\.stv\.tv/(?P<type>episode|video)/(?P<id>[a-z0-9]{4})'
_TEST = {
'url': 'https://player.stv.tv/video/7srz/victoria/interview-with-the-cast-ahead-of-new-victoria/',
'md5': '2ad867d4afd641fa14187596e0fbc91b',
'info_dict': {
'id': '6016487034001',
'ext': 'mp4',
'upload_date': '20190321',
'title': 'Interview with the cast ahead of new Victoria',
'description': 'Nell Hudson and Lily Travers tell us what to expect in the new season of Victoria.',
'timestamp': 1553179628,
'uploader_id': '1486976045',
},
'skip': 'this resource is unavailable outside of the UK',
}
_PUBLISHER_ID = '1486976045'
_PTYPE_MAP = {
'episode': 'episodes',
'video': 'shortform',
}
def _real_extract(self, url):
ptype, video_id = re.match(self._VALID_URL, url).groups()
webpage = self._download_webpage(url, video_id)
qs = compat_parse_qs(compat_urllib_parse_urlparse(self._search_regex(
r'itemprop="embedURL"[^>]+href="([^"]+)',
webpage, 'embed URL', default=None)).query)
publisher_id = qs.get('publisherID', [None])[0] or self._PUBLISHER_ID
player_attr = extract_attributes(self._search_regex(
r'(<[^>]+class="bcplayer"[^>]+>)', webpage, 'player', default=None)) or {}
info = {}
duration = ref_id = series = video_id = None
api_ref_id = player_attr.get('data-player-api-refid')
if api_ref_id:
resp = self._download_json(
'https://player.api.stv.tv/v1/%s/%s' % (self._PTYPE_MAP[ptype], api_ref_id),
api_ref_id, fatal=False)
if resp:
result = resp.get('results') or {}
video = result.get('video') or {}
video_id = str_or_none(video.get('id'))
ref_id = video.get('guid')
duration = video.get('length')
programme = result.get('programme') or {}
series = programme.get('name') or programme.get('shortName')
subtitles = {}
_subtitles = result.get('_subtitles') or {}
for ext, sub_url in _subtitles.items():
subtitles.setdefault('en', []).append({
'ext': 'vtt' if ext == 'webvtt' else ext,
'url': sub_url,
})
info.update({
'description': result.get('summary'),
'subtitles': subtitles,
'view_count': int_or_none(result.get('views')),
})
if not video_id:
video_id = qs.get('videoId', [None])[0] or self._search_regex(
r'<link\s+itemprop="url"\s+href="(\d+)"',
webpage, 'video id', default=None) or 'ref:' + (ref_id or player_attr['data-refid'])
info.update({
'_type': 'url_transparent',
'duration': float_or_none(duration or player_attr.get('data-duration'), 1000),
'id': video_id,
'ie_key': 'BrightcoveNew',
'series': series or player_attr.get('data-programme-name'),
'url': 'http://players.brightcove.net/%s/default_default/index.html?videoId=%s' % (publisher_id, video_id),
})
return info

View File

@ -16,7 +16,7 @@ from ..utils import (
class TeamcocoIE(TurnerBaseIE): class TeamcocoIE(TurnerBaseIE):
_VALID_URL = r'https?://teamcoco\.com/(?P<id>([^/]+/)*[^/?#]+)' _VALID_URL = r'https?://(?:\w+\.)?teamcoco\.com/(?P<id>([^/]+/)*[^/?#]+)'
_TESTS = [ _TESTS = [
{ {
'url': 'http://teamcoco.com/video/mary-kay-remote', 'url': 'http://teamcoco.com/video/mary-kay-remote',
@ -79,15 +79,20 @@ class TeamcocoIE(TurnerBaseIE):
}, { }, {
'url': 'http://teamcoco.com/israel/conan-hits-the-streets-beaches-of-tel-aviv', 'url': 'http://teamcoco.com/israel/conan-hits-the-streets-beaches-of-tel-aviv',
'only_matching': True, 'only_matching': True,
}, {
'url': 'https://conan25.teamcoco.com/video/ice-cube-kevin-hart-conan-share-lyft',
'only_matching': True,
} }
] ]
def _graphql_call(self, query_template, object_type, object_id): def _graphql_call(self, query_template, object_type, object_id):
find_object = 'find' + object_type find_object = 'find' + object_type
return self._download_json( return self._download_json(
'http://teamcoco.com/graphql/', object_id, data=json.dumps({ 'https://teamcoco.com/graphql', object_id, data=json.dumps({
'query': query_template % (find_object, object_id) 'query': query_template % (find_object, object_id)
}))['data'][find_object] }).encode(), headers={
'Content-Type': 'application/json',
})['data'][find_object]
def _real_extract(self, url): def _real_extract(self, url):
display_id = self._match_id(url) display_id = self._match_id(url)
@ -145,7 +150,12 @@ class TeamcocoIE(TurnerBaseIE):
'accessTokenType': 'jws', 'accessTokenType': 'jws',
})) }))
else: else:
video_sources = self._graphql_call('''{ d = self._download_json(
'https://teamcoco.com/_truman/d/' + video_id,
video_id, fatal=False) or {}
video_sources = d.get('meta') or {}
if not video_sources:
video_sources = self._graphql_call('''{
%s(id: "%s") { %s(id: "%s") {
src src
} }

View File

@ -65,8 +65,15 @@ class TikTokBaseIE(InfoExtractor):
class TikTokIE(TikTokBaseIE): class TikTokIE(TikTokBaseIE):
_VALID_URL = r'https?://(?:m\.)?tiktok\.com/v/(?P<id>\d+)' _VALID_URL = r'''(?x)
_TEST = { https?://
(?:
(?:m\.)?tiktok\.com/v|
(?:www\.)?tiktok\.com/share/video
)
/(?P<id>\d+)
'''
_TESTS = [{
'url': 'https://m.tiktok.com/v/6606727368545406213.html', 'url': 'https://m.tiktok.com/v/6606727368545406213.html',
'md5': 'd584b572e92fcd48888051f238022420', 'md5': 'd584b572e92fcd48888051f238022420',
'info_dict': { 'info_dict': {
@ -81,25 +88,39 @@ class TikTokIE(TikTokBaseIE):
'comment_count': int, 'comment_count': int,
'repost_count': int, 'repost_count': int,
} }
} }, {
'url': 'https://www.tiktok.com/share/video/6606727368545406213',
'only_matching': True,
}]
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(
'https://m.tiktok.com/v/%s.html' % video_id, video_id)
data = self._parse_json(self._search_regex( data = self._parse_json(self._search_regex(
r'\bdata\s*=\s*({.+?})\s*;', webpage, 'data'), video_id) r'\bdata\s*=\s*({.+?})\s*;', webpage, 'data'), video_id)
return self._extract_aweme(data) return self._extract_aweme(data)
class TikTokUserIE(TikTokBaseIE): class TikTokUserIE(TikTokBaseIE):
_VALID_URL = r'https?://(?:m\.)?tiktok\.com/h5/share/usr/(?P<id>\d+)' _VALID_URL = r'''(?x)
_TEST = { https?://
(?:
(?:m\.)?tiktok\.com/h5/share/usr|
(?:www\.)?tiktok\.com/share/user
)
/(?P<id>\d+)
'''
_TESTS = [{
'url': 'https://m.tiktok.com/h5/share/usr/188294915489964032.html', 'url': 'https://m.tiktok.com/h5/share/usr/188294915489964032.html',
'info_dict': { 'info_dict': {
'id': '188294915489964032', 'id': '188294915489964032',
}, },
'playlist_mincount': 24, 'playlist_mincount': 24,
} }, {
'url': 'https://www.tiktok.com/share/user/188294915489964032',
'only_matching': True,
}]
def _real_extract(self, url): def _real_extract(self, url):
user_id = self._match_id(url) user_id = self._match_id(url)

View File

@ -66,7 +66,12 @@ class TouTvIE(RadioCanadaIE):
def _real_extract(self, url): def _real_extract(self, url):
path = self._match_id(url) path = self._match_id(url)
metadata = self._download_json('http://ici.tou.tv/presentation/%s' % path, path) metadata = self._download_json(
'https://services.radio-canada.ca/toutv/presentation/%s' % path, path, query={
'client_key': self._CLIENT_KEY,
'device': 'web',
'version': 4,
})
# IsDrm does not necessarily mean the video is DRM protected (see # IsDrm does not necessarily mean the video is DRM protected (see
# https://github.com/ytdl-org/youtube-dl/issues/13994). # https://github.com/ytdl-org/youtube-dl/issues/13994).
if metadata.get('IsDrm'): if metadata.get('IsDrm'):
@ -77,6 +82,12 @@ class TouTvIE(RadioCanadaIE):
return merge_dicts({ return merge_dicts({
'id': video_id, 'id': video_id,
'title': details.get('OriginalTitle'), 'title': details.get('OriginalTitle'),
'description': details.get('Description'),
'thumbnail': details.get('ImageUrl'), 'thumbnail': details.get('ImageUrl'),
'duration': int_or_none(details.get('LengthInSeconds')), 'duration': int_or_none(details.get('LengthInSeconds')),
'series': metadata.get('ProgramTitle'),
'season_number': int_or_none(metadata.get('SeasonNumber')),
'season': metadata.get('SeasonTitle'),
'episode_number': int_or_none(metadata.get('EpisodeNumber')),
'episode': metadata.get('EpisodeTitle'),
}, self._extract_info(metadata.get('AppCode', 'toutv'), video_id)) }, self._extract_info(metadata.get('AppCode', 'toutv'), video_id))

View File

@ -76,7 +76,10 @@ class UdemyIE(InfoExtractor):
webpage, 'course', default='{}')), webpage, 'course', default='{}')),
video_id, fatal=False) or {} video_id, fatal=False) or {}
course_id = course.get('id') or self._search_regex( course_id = course.get('id') or self._search_regex(
r'data-course-id=["\'](\d+)', webpage, 'course id') [
r'data-course-id=["\'](\d+)',
r'&quot;courseId&quot;\s*:\s*(\d+)'
], webpage, 'course id')
return course_id, course.get('title') return course_id, course.get('title')
def _enroll_course(self, base_url, webpage, course_id): def _enroll_course(self, base_url, webpage, course_id):

View File

@ -109,23 +109,9 @@ class VimeoBaseInfoExtractor(InfoExtractor):
def _parse_config(self, config, video_id): def _parse_config(self, config, video_id):
video_data = config['video'] video_data = config['video']
# Extract title
video_title = video_data['title'] video_title = video_data['title']
live_event = video_data.get('live_event') or {}
# Extract uploader, uploader_url and uploader_id is_live = live_event.get('status') == 'started'
video_uploader = video_data.get('owner', {}).get('name')
video_uploader_url = video_data.get('owner', {}).get('url')
video_uploader_id = video_uploader_url.split('/')[-1] if video_uploader_url else None
# Extract video thumbnail
video_thumbnail = video_data.get('thumbnail')
if video_thumbnail is None:
video_thumbs = video_data.get('thumbs')
if video_thumbs and isinstance(video_thumbs, dict):
_, video_thumbnail = sorted((int(width if width.isdigit() else 0), t_url) for (width, t_url) in video_thumbs.items())[-1]
# Extract video duration
video_duration = int_or_none(video_data.get('duration'))
formats = [] formats = []
config_files = video_data.get('files') or config['request'].get('files', {}) config_files = video_data.get('files') or config['request'].get('files', {})
@ -142,6 +128,7 @@ class VimeoBaseInfoExtractor(InfoExtractor):
'tbr': int_or_none(f.get('bitrate')), 'tbr': int_or_none(f.get('bitrate')),
}) })
# TODO: fix handling of 308 status code returned for live archive manifest requests
for files_type in ('hls', 'dash'): for files_type in ('hls', 'dash'):
for cdn_name, cdn_data in config_files.get(files_type, {}).get('cdns', {}).items(): for cdn_name, cdn_data in config_files.get(files_type, {}).get('cdns', {}).items():
manifest_url = cdn_data.get('url') manifest_url = cdn_data.get('url')
@ -151,7 +138,7 @@ class VimeoBaseInfoExtractor(InfoExtractor):
if files_type == 'hls': if files_type == 'hls':
formats.extend(self._extract_m3u8_formats( formats.extend(self._extract_m3u8_formats(
manifest_url, video_id, 'mp4', manifest_url, video_id, 'mp4',
'm3u8_native', m3u8_id=format_id, 'm3u8' if is_live else 'm3u8_native', m3u8_id=format_id,
note='Downloading %s m3u8 information' % cdn_name, note='Downloading %s m3u8 information' % cdn_name,
fatal=False)) fatal=False))
elif files_type == 'dash': elif files_type == 'dash':
@ -164,6 +151,10 @@ class VimeoBaseInfoExtractor(InfoExtractor):
else: else:
mpd_manifest_urls = [(format_id, manifest_url)] mpd_manifest_urls = [(format_id, manifest_url)]
for f_id, m_url in mpd_manifest_urls: for f_id, m_url in mpd_manifest_urls:
if 'json=1' in m_url:
real_m_url = (self._download_json(m_url, video_id, fatal=False) or {}).get('url')
if real_m_url:
m_url = real_m_url
mpd_formats = self._extract_mpd_formats( mpd_formats = self._extract_mpd_formats(
m_url.replace('/master.json', '/master.mpd'), video_id, f_id, m_url.replace('/master.json', '/master.mpd'), video_id, f_id,
'Downloading %s MPD information' % cdn_name, 'Downloading %s MPD information' % cdn_name,
@ -175,6 +166,15 @@ class VimeoBaseInfoExtractor(InfoExtractor):
f['preference'] = -40 f['preference'] = -40
formats.extend(mpd_formats) formats.extend(mpd_formats)
live_archive = live_event.get('archive') or {}
live_archive_source_url = live_archive.get('source_url')
if live_archive_source_url and live_archive.get('status') == 'done':
formats.append({
'format_id': 'live-archive-source',
'url': live_archive_source_url,
'preference': 1,
})
subtitles = {} subtitles = {}
text_tracks = config['request'].get('text_tracks') text_tracks = config['request'].get('text_tracks')
if text_tracks: if text_tracks:
@ -184,15 +184,33 @@ class VimeoBaseInfoExtractor(InfoExtractor):
'url': 'https://vimeo.com' + tt['url'], 'url': 'https://vimeo.com' + tt['url'],
}] }]
thumbnails = []
if not is_live:
for key, thumb in video_data.get('thumbs', {}).items():
thumbnails.append({
'id': key,
'width': int_or_none(key),
'url': thumb,
})
thumbnail = video_data.get('thumbnail')
if thumbnail:
thumbnails.append({
'url': thumbnail,
})
owner = video_data.get('owner') or {}
video_uploader_url = owner.get('url')
return { return {
'title': video_title, 'title': self._live_title(video_title) if is_live else video_title,
'uploader': video_uploader, 'uploader': owner.get('name'),
'uploader_id': video_uploader_id, 'uploader_id': video_uploader_url.split('/')[-1] if video_uploader_url else None,
'uploader_url': video_uploader_url, 'uploader_url': video_uploader_url,
'thumbnail': video_thumbnail, 'thumbnails': thumbnails,
'duration': video_duration, 'duration': int_or_none(video_data.get('duration')),
'formats': formats, 'formats': formats,
'subtitles': subtitles, 'subtitles': subtitles,
'is_live': is_live,
} }
def _extract_original_format(self, url, video_id): def _extract_original_format(self, url, video_id):

View File

@ -6,10 +6,7 @@ import re
import sys import sys
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import ( from ..compat import compat_urlparse
compat_str,
compat_urlparse,
)
from ..utils import ( from ..utils import (
clean_html, clean_html,
ExtractorError, ExtractorError,
@ -103,7 +100,7 @@ class VKIE(VKBaseIE):
'url': 'http://vk.com/videos-77521?z=video-77521_162222515%2Fclub77521', 'url': 'http://vk.com/videos-77521?z=video-77521_162222515%2Fclub77521',
'md5': '7babad3b85ea2e91948005b1b8b0cb84', 'md5': '7babad3b85ea2e91948005b1b8b0cb84',
'info_dict': { 'info_dict': {
'id': '162222515', 'id': '-77521_162222515',
'ext': 'mp4', 'ext': 'mp4',
'title': 'ProtivoGunz - Хуёвая песня', 'title': 'ProtivoGunz - Хуёвая песня',
'uploader': 're:(?:Noize MC|Alexander Ilyashenko).*', 'uploader': 're:(?:Noize MC|Alexander Ilyashenko).*',
@ -117,7 +114,7 @@ class VKIE(VKBaseIE):
'url': 'http://vk.com/video205387401_165548505', 'url': 'http://vk.com/video205387401_165548505',
'md5': '6c0aeb2e90396ba97035b9cbde548700', 'md5': '6c0aeb2e90396ba97035b9cbde548700',
'info_dict': { 'info_dict': {
'id': '165548505', 'id': '205387401_165548505',
'ext': 'mp4', 'ext': 'mp4',
'title': 'No name', 'title': 'No name',
'uploader': 'Tom Cruise', 'uploader': 'Tom Cruise',
@ -132,7 +129,7 @@ class VKIE(VKBaseIE):
'url': 'http://vk.com/video_ext.php?oid=32194266&id=162925554&hash=7d8c2e0d5e05aeaa&hd=1', 'url': 'http://vk.com/video_ext.php?oid=32194266&id=162925554&hash=7d8c2e0d5e05aeaa&hd=1',
'md5': 'c7ce8f1f87bec05b3de07fdeafe21a0a', 'md5': 'c7ce8f1f87bec05b3de07fdeafe21a0a',
'info_dict': { 'info_dict': {
'id': '162925554', 'id': '32194266_162925554',
'ext': 'mp4', 'ext': 'mp4',
'uploader': 'Vladimir Gavrin', 'uploader': 'Vladimir Gavrin',
'title': 'Lin Dan', 'title': 'Lin Dan',
@ -149,7 +146,7 @@ class VKIE(VKBaseIE):
'md5': 'a590bcaf3d543576c9bd162812387666', 'md5': 'a590bcaf3d543576c9bd162812387666',
'note': 'Only available for registered users', 'note': 'Only available for registered users',
'info_dict': { 'info_dict': {
'id': '164049491', 'id': '-8871596_164049491',
'ext': 'mp4', 'ext': 'mp4',
'uploader': 'Триллеры', 'uploader': 'Триллеры',
'title': '► Бойцовский клуб / Fight Club 1999 [HD 720]', 'title': '► Бойцовский клуб / Fight Club 1999 [HD 720]',
@ -163,7 +160,7 @@ class VKIE(VKBaseIE):
'url': 'http://vk.com/hd_kino_mania?z=video-43215063_168067957%2F15c66b9b533119788d', 'url': 'http://vk.com/hd_kino_mania?z=video-43215063_168067957%2F15c66b9b533119788d',
'md5': '4d7a5ef8cf114dfa09577e57b2993202', 'md5': '4d7a5ef8cf114dfa09577e57b2993202',
'info_dict': { 'info_dict': {
'id': '168067957', 'id': '-43215063_168067957',
'ext': 'mp4', 'ext': 'mp4',
'uploader': 'Киномания - лучшее из мира кино', 'uploader': 'Киномания - лучшее из мира кино',
'title': ' ', 'title': ' ',
@ -177,7 +174,7 @@ class VKIE(VKBaseIE):
'md5': '0c45586baa71b7cb1d0784ee3f4e00a6', 'md5': '0c45586baa71b7cb1d0784ee3f4e00a6',
'note': 'ivi.ru embed', 'note': 'ivi.ru embed',
'info_dict': { 'info_dict': {
'id': '60690', 'id': '-43215063_169084319',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Книга Илая', 'title': 'Книга Илая',
'duration': 6771, 'duration': 6771,
@ -191,7 +188,7 @@ class VKIE(VKBaseIE):
'url': 'https://vk.com/video30481095_171201961?list=8764ae2d21f14088d4', 'url': 'https://vk.com/video30481095_171201961?list=8764ae2d21f14088d4',
'md5': '091287af5402239a1051c37ec7b92913', 'md5': '091287af5402239a1051c37ec7b92913',
'info_dict': { 'info_dict': {
'id': '171201961', 'id': '30481095_171201961',
'ext': 'mp4', 'ext': 'mp4',
'title': 'ТюменцевВВ_09.07.2015', 'title': 'ТюменцевВВ_09.07.2015',
'uploader': 'Anton Ivanov', 'uploader': 'Anton Ivanov',
@ -206,10 +203,10 @@ class VKIE(VKBaseIE):
'url': 'https://vk.com/video276849682_170681728', 'url': 'https://vk.com/video276849682_170681728',
'info_dict': { 'info_dict': {
'id': 'V3K4mi0SYkc', 'id': 'V3K4mi0SYkc',
'ext': 'webm', 'ext': 'mp4',
'title': "DSWD Awards 'Children's Joy Foundation, Inc.' Certificate of Registration and License to Operate", 'title': "DSWD Awards 'Children's Joy Foundation, Inc.' Certificate of Registration and License to Operate",
'description': 'md5:bf9c26cfa4acdfb146362682edd3827a', 'description': 'md5:bf9c26cfa4acdfb146362682edd3827a',
'duration': 179, 'duration': 178,
'upload_date': '20130116', 'upload_date': '20130116',
'uploader': "Children's Joy Foundation Inc.", 'uploader': "Children's Joy Foundation Inc.",
'uploader_id': 'thecjf', 'uploader_id': 'thecjf',
@ -239,7 +236,7 @@ class VKIE(VKBaseIE):
'url': 'http://vk.com/video-110305615_171782105', 'url': 'http://vk.com/video-110305615_171782105',
'md5': 'e13fcda136f99764872e739d13fac1d1', 'md5': 'e13fcda136f99764872e739d13fac1d1',
'info_dict': { 'info_dict': {
'id': '171782105', 'id': '-110305615_171782105',
'ext': 'mp4', 'ext': 'mp4',
'title': 'S-Dance, репетиции к The way show', 'title': 'S-Dance, репетиции к The way show',
'uploader': 'THE WAY SHOW | 17 апреля', 'uploader': 'THE WAY SHOW | 17 апреля',
@ -254,14 +251,17 @@ class VKIE(VKBaseIE):
{ {
# finished live stream, postlive_mp4 # finished live stream, postlive_mp4
'url': 'https://vk.com/videos-387766?z=video-387766_456242764%2Fpl_-387766_-2', 'url': 'https://vk.com/videos-387766?z=video-387766_456242764%2Fpl_-387766_-2',
'md5': '90d22d051fccbbe9becfccc615be6791',
'info_dict': { 'info_dict': {
'id': '456242764', 'id': '-387766_456242764',
'ext': 'mp4', 'ext': 'mp4',
'title': 'ИгроМир 2016 — день 1', 'title': 'ИгроМир 2016 День 1 — Игромания Утром',
'uploader': 'Игромания', 'uploader': 'Игромания',
'duration': 5239, 'duration': 5239,
'view_count': int, # TODO: use act=show to extract view_count
# 'view_count': int,
'upload_date': '20160929',
'uploader_id': '-387766',
'timestamp': 1475137527,
}, },
}, },
{ {
@ -474,7 +474,7 @@ class VKIE(VKBaseIE):
self._sort_formats(formats) self._sort_formats(formats)
return { return {
'id': compat_str(data.get('vid') or video_id), 'id': video_id,
'formats': formats, 'formats': formats,
'title': title, 'title': title,
'thumbnail': data.get('jpg'), 'thumbnail': data.get('jpg'),

View File

@ -102,6 +102,15 @@ class VRVIE(VRVBaseIE):
# m3u8 download # m3u8 download
'skip_download': True, 'skip_download': True,
}, },
}, {
# movie listing
'url': 'https://vrv.co/watch/G6NQXZ1J6/Lily-CAT',
'info_dict': {
'id': 'G6NQXZ1J6',
'title': 'Lily C.A.T',
'description': 'md5:988b031e7809a6aeb60968be4af7db07',
},
'playlist_count': 2,
}] }]
_NETRC_MACHINE = 'vrv' _NETRC_MACHINE = 'vrv'
@ -123,23 +132,23 @@ class VRVIE(VRVBaseIE):
def _extract_vrv_formats(self, url, video_id, stream_format, audio_lang, hardsub_lang): def _extract_vrv_formats(self, url, video_id, stream_format, audio_lang, hardsub_lang):
if not url or stream_format not in ('hls', 'dash'): if not url or stream_format not in ('hls', 'dash'):
return [] return []
assert audio_lang or hardsub_lang
stream_id_list = [] stream_id_list = []
if audio_lang: if audio_lang:
stream_id_list.append('audio-%s' % audio_lang) stream_id_list.append('audio-%s' % audio_lang)
if hardsub_lang: if hardsub_lang:
stream_id_list.append('hardsub-%s' % hardsub_lang) stream_id_list.append('hardsub-%s' % hardsub_lang)
stream_id = '-'.join(stream_id_list) format_id = stream_format
format_id = '%s-%s' % (stream_format, stream_id) if stream_id_list:
format_id += '-' + '-'.join(stream_id_list)
if stream_format == 'hls': if stream_format == 'hls':
adaptive_formats = self._extract_m3u8_formats( adaptive_formats = self._extract_m3u8_formats(
url, video_id, 'mp4', m3u8_id=format_id, url, video_id, 'mp4', m3u8_id=format_id,
note='Downloading %s m3u8 information' % stream_id, note='Downloading %s information' % format_id,
fatal=False) fatal=False)
elif stream_format == 'dash': elif stream_format == 'dash':
adaptive_formats = self._extract_mpd_formats( adaptive_formats = self._extract_mpd_formats(
url, video_id, mpd_id=format_id, url, video_id, mpd_id=format_id,
note='Downloading %s MPD information' % stream_id, note='Downloading %s information' % format_id,
fatal=False) fatal=False)
if audio_lang: if audio_lang:
for f in adaptive_formats: for f in adaptive_formats:
@ -150,10 +159,28 @@ class VRVIE(VRVBaseIE):
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
episode_path = self._get_cms_resource( object_data = self._call_cms(self._get_cms_resource(
'cms:/episodes/' + video_id, video_id) 'cms:/objects/' + video_id, video_id), video_id, 'object')['items'][0]
video_data = self._call_cms(episode_path, video_id, 'video') resource_path = object_data['__links__']['resource']['href']
video_data = self._call_cms(resource_path, video_id, 'video')
title = video_data['title'] title = video_data['title']
description = video_data.get('description')
if video_data.get('__class__') == 'movie_listing':
items = self._call_cms(
video_data['__links__']['movie_listing/movies']['href'],
video_id, 'movie listing').get('items') or []
if len(items) != 1:
entries = []
for item in items:
item_id = item.get('id')
if not item_id:
continue
entries.append(self.url_result(
'https://vrv.co/watch/' + item_id,
self.ie_key(), item_id, item.get('title')))
return self.playlist_result(entries, video_id, title, description)
video_data = items[0]
streams_path = video_data['__links__'].get('streams', {}).get('href') streams_path = video_data['__links__'].get('streams', {}).get('href')
if not streams_path: if not streams_path:
@ -197,7 +224,7 @@ class VRVIE(VRVBaseIE):
'formats': formats, 'formats': formats,
'subtitles': subtitles, 'subtitles': subtitles,
'thumbnails': thumbnails, 'thumbnails': thumbnails,
'description': video_data.get('description'), 'description': description,
'duration': float_or_none(video_data.get('duration_ms'), 1000), 'duration': float_or_none(video_data.get('duration_ms'), 1000),
'uploader_id': video_data.get('channel_id'), 'uploader_id': video_data.get('channel_id'),
'series': video_data.get('series_title'), 'series': video_data.get('series_title'),

View File

@ -57,10 +57,17 @@ class XVideosIE(InfoExtractor):
webpage, 'title', default=None, webpage, 'title', default=None,
group='title') or self._og_search_title(webpage) group='title') or self._og_search_title(webpage)
thumbnail = self._search_regex( thumbnails = []
(r'setThumbUrl\(\s*(["\'])(?P<thumbnail>(?:(?!\1).)+)\1', for preference, thumbnail in enumerate(('', '169')):
r'url_bigthumb=(?P<thumbnail>.+?)&amp'), thumbnail_url = self._search_regex(
webpage, 'thumbnail', fatal=False, group='thumbnail') r'setThumbUrl%s\(\s*(["\'])(?P<thumbnail>(?:(?!\1).)+)\1' % thumbnail,
webpage, 'thumbnail', default=None, group='thumbnail')
if thumbnail_url:
thumbnails.append({
'url': thumbnail_url,
'preference': preference,
})
duration = int_or_none(self._og_search_property( duration = int_or_none(self._og_search_property(
'duration', webpage, default=None)) or parse_duration( 'duration', webpage, default=None)) or parse_duration(
self._search_regex( self._search_regex(
@ -98,6 +105,6 @@ class XVideosIE(InfoExtractor):
'formats': formats, 'formats': formats,
'title': title, 'title': title,
'duration': duration, 'duration': duration,
'thumbnail': thumbnail, 'thumbnails': thumbnails,
'age_limit': 18, 'age_limit': 18,
} }

View File

@ -477,3 +477,77 @@ class YahooSearchIE(SearchInfoExtractor):
'id': query, 'id': query,
'entries': entries, 'entries': entries,
} }
class YahooGyaOPlayerIE(InfoExtractor):
IE_NAME = 'yahoo:gyao:player'
_VALID_URL = r'https?://(?:gyao\.yahoo\.co\.jp/(?:player|episode/[^/]+)|streaming\.yahoo\.co\.jp/c/y)/(?P<id>\d+/v\d+/v\d+|[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})'
_TESTS = [{
'url': 'https://gyao.yahoo.co.jp/player/00998/v00818/v0000000000000008564/',
'info_dict': {
'id': '5993125228001',
'ext': 'mp4',
'title': 'フューリー 【字幕版】',
'description': 'md5:21e691c798a15330eda4db17a8fe45a5',
'uploader_id': '4235717419001',
'upload_date': '20190124',
'timestamp': 1548294365,
},
'params': {
# m3u8 download
'skip_download': True,
},
}, {
'url': 'https://streaming.yahoo.co.jp/c/y/01034/v00133/v0000000000000000706/',
'only_matching': True,
}, {
'url': 'https://gyao.yahoo.co.jp/episode/%E3%81%8D%E3%81%AE%E3%81%86%E4%BD%95%E9%A3%9F%E3%81%B9%E3%81%9F%EF%BC%9F%20%E7%AC%AC2%E8%A9%B1%202019%2F4%2F12%E6%94%BE%E9%80%81%E5%88%86/5cb02352-b725-409e-9f8d-88f947a9f682',
'only_matching': True,
}]
def _real_extract(self, url):
video_id = self._match_id(url).replace('/', ':')
video = self._download_json(
'https://gyao.yahoo.co.jp/dam/v1/videos/' + video_id,
video_id, query={
'fields': 'longDescription,title,videoId',
})
return {
'_type': 'url_transparent',
'id': video_id,
'title': video['title'],
'url': smuggle_url(
'http://players.brightcove.net/4235717419001/default_default/index.html?videoId=' + video['videoId'],
{'geo_countries': ['JP']}),
'description': video.get('longDescription'),
'ie_key': BrightcoveNewIE.ie_key(),
}
class YahooGyaOIE(InfoExtractor):
IE_NAME = 'yahoo:gyao'
_VALID_URL = r'https?://(?:gyao\.yahoo\.co\.jp/p|streaming\.yahoo\.co\.jp/p/y)/(?P<id>\d+/v\d+)'
_TESTS = [{
'url': 'https://gyao.yahoo.co.jp/p/00449/v03102/',
'info_dict': {
'id': '00449:v03102',
},
'playlist_count': 2,
}, {
'url': 'https://streaming.yahoo.co.jp/p/y/01034/v00133/',
'only_matching': True,
}]
def _real_extract(self, url):
program_id = self._match_id(url).replace('/', ':')
videos = self._download_json(
'https://gyao.yahoo.co.jp/api/programs/%s/videos' % program_id, program_id)['videos']
entries = []
for video in videos:
video_id = video.get('id')
if not video_id:
continue
entries.append(self.url_result(
'https://gyao.yahoo.co.jp/player/%s/' % video_id.replace(':', '/'),
YahooGyaOPlayerIE.ie_key(), video_id))
return self.playlist_result(entries, program_id)

View File

@ -8,8 +8,8 @@ from ..utils import (
class YourPornIE(InfoExtractor): class YourPornIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?yourporn\.sexy/post/(?P<id>[^/?#&.]+)' _VALID_URL = r'https?://(?:www\.)?(?:yourporn\.sexy|sxyprn\.com)/post/(?P<id>[^/?#&.]+)'
_TEST = { _TESTS = [{
'url': 'https://yourporn.sexy/post/57ffcb2e1179b.html', 'url': 'https://yourporn.sexy/post/57ffcb2e1179b.html',
'md5': '6f8682b6464033d87acaa7a8ff0c092e', 'md5': '6f8682b6464033d87acaa7a8ff0c092e',
'info_dict': { 'info_dict': {
@ -23,7 +23,10 @@ class YourPornIE(InfoExtractor):
'params': { 'params': {
'skip_download': True, 'skip_download': True,
}, },
} }, {
'url': 'https://sxyprn.com/post/57ffcb2e1179b.html',
'only_matching': True,
}]
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)

View File

@ -484,7 +484,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
# RTMP (unnamed) # RTMP (unnamed)
'_rtmp': {'protocol': 'rtmp'}, '_rtmp': {'protocol': 'rtmp'},
} }
_SUBTITLE_FORMATS = ('ttml', 'vtt') _SUBTITLE_FORMATS = ('srv1', 'srv2', 'srv3', 'ttml', 'vtt')
_GEO_BYPASS = False _GEO_BYPASS = False
@ -1652,7 +1652,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
view_count = extract_view_count(get_video_info) view_count = extract_view_count(get_video_info)
if not video_info: if not video_info:
video_info = get_video_info video_info = get_video_info
if 'token' in get_video_info: get_token = get_video_info.get('token') or get_video_info.get('account_playback_token')
if get_token:
# Different get_video_info requests may report different results, e.g. # Different get_video_info requests may report different results, e.g.
# some may report video unavailability, but some may serve it without # some may report video unavailability, but some may serve it without
# any complaint (see https://github.com/ytdl-org/youtube-dl/issues/7362, # any complaint (see https://github.com/ytdl-org/youtube-dl/issues/7362,
@ -1662,7 +1663,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
# due to YouTube measures against IP ranges of hosting providers. # due to YouTube measures against IP ranges of hosting providers.
# Working around by preferring the first succeeded video_info containing # Working around by preferring the first succeeded video_info containing
# the token if no such video_info yet was found. # the token if no such video_info yet was found.
if 'token' not in video_info: token = video_info.get('token') or video_info.get('account_playback_token')
if not token:
video_info = get_video_info video_info = get_video_info
break break
@ -1671,7 +1673,15 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
r'(?s)<h1[^>]+id="unavailable-message"[^>]*>(.+?)</h1>', r'(?s)<h1[^>]+id="unavailable-message"[^>]*>(.+?)</h1>',
video_webpage, 'unavailable message', default=None) video_webpage, 'unavailable message', default=None)
if 'token' not in video_info: if not video_info:
unavailable_message = extract_unavailable_message()
if not unavailable_message:
unavailable_message = 'Unable to extract video data'
raise ExtractorError(
'YouTube said: %s' % unavailable_message, expected=True, video_id=video_id)
token = video_info.get('token') or video_info.get('account_playback_token')
if not token:
if 'reason' in video_info: if 'reason' in video_info:
if 'The uploader has not made this video available in your country.' in video_info['reason']: if 'The uploader has not made this video available in your country.' in video_info['reason']:
regions_allowed = self._html_search_meta( regions_allowed = self._html_search_meta(

View File

@ -1,3 +1,3 @@
from __future__ import unicode_literals from __future__ import unicode_literals
__version__ = 'vc.2019.03.28' __version__ = 'vc.2019.04.24'