Merge pull request #21 from rg3/master

update
This commit is contained in:
siddht1 2016-11-08 20:49:02 +05:30 committed by GitHub
commit 048fa3dc57
14 changed files with 263 additions and 113 deletions

View File

@ -6,8 +6,8 @@
--- ---
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.11.04*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. ### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.11.08*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.11.04** - [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.11.08**
### Before submitting an *issue* make sure you have: ### Before submitting an *issue* make sure you have:
- [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
@ -35,7 +35,7 @@ $ youtube-dl -v <your command line>
[debug] User config: [] [debug] User config: []
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
[debug] youtube-dl version 2016.11.04 [debug] youtube-dl version 2016.11.08
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
[debug] Proxy map: {} [debug] Proxy map: {}

View File

@ -1,3 +1,19 @@
version 2016.11.08
Extractors
* [tmz:article] Fix extraction (#11052)
* [espn] Fix extraction (#11041)
* [mitele] Fix extraction after website redesign (#10824)
- [ard] Remove age restriction check (#11129)
* [generic] Improve support for pornhub.com embeds (#11100)
+ [generic] Add support for redtube.com embeds (#11099)
+ [generic] Add support for drtuber.com embeds (#11098)
+ [redtube] Add support for embed URLs
+ [drtuber] Add support for embed URLs
+ [yahoo] Improve content id extraction (#11088)
* [toutv] Relax URL regular expression (#11121)
version 2016.11.04 version 2016.11.04
Core Core

View File

@ -758,7 +758,7 @@ Once the video is fully downloaded, use any video player, such as [mpv](https://
### I extracted a video URL with `-g`, but it does not play on another machine / in my webbrowser. ### I extracted a video URL with `-g`, but it does not play on another machine / in my webbrowser.
It depends a lot on the service. In many cases, requests for the video (to download/play it) must come from the same IP address and with the same cookies. Use the `--cookies` option to write the required cookies into a file, and advise your downloader to read cookies from that file. Some sites also require a common user agent to be used, use `--dump-user-agent` to see the one in use by youtube-dl. It depends a lot on the service. In many cases, requests for the video (to download/play it) must come from the same IP address and with the same cookies and/or HTTP headers. Use the `--cookies` option to write the required cookies into a file, and advise your downloader to read cookies from that file. Some sites also require a common user agent to be used, use `--dump-user-agent` to see the one in use by youtube-dl. You can also get necessary cookies and HTTP headers from JSON output obtained with `--dump-json`.
It may be beneficial to use IPv6; in some cases, the restrictions are only applied to IPv4. Some services (sometimes only for a subset of videos) do not restrict the video URL by IP address, cookie, or user-agent, but these are the exception rather than the rule. It may be beneficial to use IPv6; in some cases, the restrictions are only applied to IPv4. Some services (sometimes only for a subset of videos) do not restrict the video URL by IP address, cookie, or user-agent, but these are the exception rather than the rule.

View File

@ -178,8 +178,6 @@ class ARDMediathekIE(InfoExtractor):
('>Leider liegt eine Störung vor.', 'Video %s is unavailable'), ('>Leider liegt eine Störung vor.', 'Video %s is unavailable'),
('>Der gewünschte Beitrag ist nicht mehr verfügbar.<', ('>Der gewünschte Beitrag ist nicht mehr verfügbar.<',
'Video %s is no longer available'), 'Video %s is no longer available'),
('Diese Sendung ist für Jugendliche unter 12 Jahren nicht geeignet. Der Clip ist deshalb nur von 20 bis 6 Uhr verfügbar.',
'This program is only suitable for those aged 12 and older. Video %s is therefore only available between 8 pm and 6 am.'),
) )
for pattern, message in ERRORS: for pattern, message in ERRORS:

View File

@ -10,8 +10,8 @@ from ..utils import (
class DrTuberIE(InfoExtractor): class DrTuberIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?drtuber\.com/video/(?P<id>\d+)/(?P<display_id>[\w-]+)' _VALID_URL = r'https?://(?:www\.)?drtuber\.com/(?:video|embed)/(?P<id>\d+)(?:/(?P<display_id>[\w-]+))?'
_TEST = { _TESTS = [{
'url': 'http://www.drtuber.com/video/1740434/hot-perky-blonde-naked-golf', 'url': 'http://www.drtuber.com/video/1740434/hot-perky-blonde-naked-golf',
'md5': '93e680cf2536ad0dfb7e74d94a89facd', 'md5': '93e680cf2536ad0dfb7e74d94a89facd',
'info_dict': { 'info_dict': {
@ -25,20 +25,30 @@ class DrTuberIE(InfoExtractor):
'thumbnail': 're:https?://.*\.jpg$', 'thumbnail': 're:https?://.*\.jpg$',
'age_limit': 18, 'age_limit': 18,
} }
} }, {
'url': 'http://www.drtuber.com/embed/489939',
'only_matching': True,
}]
@staticmethod
def _extract_urls(webpage):
return re.findall(
r'<iframe[^>]+?src=["\'](?P<url>(?:https?:)?//(?:www\.)?drtuber\.com/embed/\d+)',
webpage)
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id') video_id = mobj.group('id')
display_id = mobj.group('display_id') display_id = mobj.group('display_id') or video_id
webpage = self._download_webpage(url, display_id) webpage = self._download_webpage(
'http://www.drtuber.com/video/%s' % video_id, display_id)
video_url = self._html_search_regex( video_url = self._html_search_regex(
r'<source src="([^"]+)"', webpage, 'video URL') r'<source src="([^"]+)"', webpage, 'video URL')
title = self._html_search_regex( title = self._html_search_regex(
(r'class="title_watch"[^>]*><p>([^<]+)<', (r'class="title_watch"[^>]*><(?:p|h\d+)[^>]*>([^<]+)<',
r'<p[^>]+class="title_substrate">([^<]+)</p>', r'<p[^>]+class="title_substrate">([^<]+)</p>',
r'<title>([^<]+) - \d+'), r'<title>([^<]+) - \d+'),
webpage, 'title') webpage, 'title')

View File

@ -1,38 +1,117 @@
from __future__ import unicode_literals from __future__ import unicode_literals
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import remove_end from ..compat import compat_str
from ..utils import (
determine_ext,
int_or_none,
unified_timestamp,
)
class ESPNIE(InfoExtractor): class ESPNIE(InfoExtractor):
_VALID_URL = r'https?://(?:espn\.go|(?:www\.)?espn)\.com/(?:[^/]+/)*(?P<id>[^/]+)' _VALID_URL = r'https?://(?:espn\.go|(?:www\.)?espn)\.com/video/clip(?:\?.*?\bid=|/_/id/)(?P<id>\d+)'
_TESTS = [{ _TESTS = [{
'url': 'http://espn.go.com/video/clip?id=10365079', 'url': 'http://espn.go.com/video/clip?id=10365079',
'md5': '60e5d097a523e767d06479335d1bdc58',
'info_dict': { 'info_dict': {
'id': 'FkYWtmazr6Ed8xmvILvKLWjd4QvYZpzG', 'id': '10365079',
'ext': 'mp4', 'ext': 'mp4',
'title': '30 for 30 Shorts: Judging Jewell', 'title': '30 for 30 Shorts: Judging Jewell',
'description': None, 'description': 'md5:39370c2e016cb4ecf498ffe75bef7f0f',
'timestamp': 1390936111,
'upload_date': '20140128',
}, },
'params': { 'params': {
'skip_download': True, 'skip_download': True,
}, },
'add_ie': ['OoyalaExternal'],
}, { }, {
# intl video, from http://www.espnfc.us/video/mls-highlights/150/video/2743663/must-see-moments-best-of-the-mls-season # intl video, from http://www.espnfc.us/video/mls-highlights/150/video/2743663/must-see-moments-best-of-the-mls-season
'url': 'http://espn.go.com/video/clip?id=2743663', 'url': 'http://espn.go.com/video/clip?id=2743663',
'md5': 'f4ac89b59afc7e2d7dbb049523df6768',
'info_dict': { 'info_dict': {
'id': '50NDFkeTqRHB0nXBOK-RGdSG5YQPuxHg', 'id': '2743663',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Must-See Moments: Best of the MLS season', 'title': 'Must-See Moments: Best of the MLS season',
'description': 'md5:4c2d7232beaea572632bec41004f0aeb',
'timestamp': 1449446454,
'upload_date': '20151207',
}, },
'params': { 'params': {
'skip_download': True, 'skip_download': True,
}, },
'add_ie': ['OoyalaExternal'], 'expected_warnings': ['Unable to download f4m manifest'],
}, { }, {
'url': 'http://www.espn.com/video/clip?id=10365079',
'only_matching': True,
}, {
'url': 'http://www.espn.com/video/clip/_/id/17989860',
'only_matching': True,
}]
def _real_extract(self, url):
video_id = self._match_id(url)
clip = self._download_json(
'http://api-app.espn.com/v1/video/clips/%s' % video_id,
video_id)['videos'][0]
title = clip['headline']
format_urls = set()
formats = []
def traverse_source(source, base_source_id=None):
for source_id, source in source.items():
if isinstance(source, compat_str):
extract_source(source, base_source_id)
elif isinstance(source, dict):
traverse_source(
source,
'%s-%s' % (base_source_id, source_id)
if base_source_id else source_id)
def extract_source(source_url, source_id=None):
if source_url in format_urls:
return
format_urls.add(source_url)
ext = determine_ext(source_url)
if ext == 'smil':
formats.extend(self._extract_smil_formats(
source_url, video_id, fatal=False))
elif ext == 'f4m':
formats.extend(self._extract_f4m_formats(
source_url, video_id, f4m_id=source_id, fatal=False))
elif ext == 'm3u8':
formats.extend(self._extract_m3u8_formats(
source_url, video_id, 'mp4', entry_protocol='m3u8_native',
m3u8_id=source_id, fatal=False))
else:
formats.append({
'url': source_url,
'format_id': source_id,
})
traverse_source(clip['links']['source'])
self._sort_formats(formats)
description = clip.get('caption') or clip.get('description')
thumbnail = clip.get('thumbnail')
duration = int_or_none(clip.get('duration'))
timestamp = unified_timestamp(clip.get('originalPublishDate'))
return {
'id': video_id,
'title': title,
'description': description,
'thumbnail': thumbnail,
'timestamp': timestamp,
'duration': duration,
'formats': formats,
}
class ESPNArticleIE(InfoExtractor):
_VALID_URL = r'https?://(?:espn\.go|(?:www\.)?espn)\.com/(?:[^/]+/)*(?P<id>[^/]+)'
_TESTS = [{
'url': 'https://espn.go.com/video/iframe/twitter/?cms=espn&id=10365079', 'url': 'https://espn.go.com/video/iframe/twitter/?cms=espn&id=10365079',
'only_matching': True, 'only_matching': True,
}, { }, {
@ -47,11 +126,12 @@ class ESPNIE(InfoExtractor):
}, { }, {
'url': 'http://espn.go.com/nba/playoffs/2015/story/_/id/12887571/john-wall-washington-wizards-no-swelling-left-hand-wrist-game-5-return', 'url': 'http://espn.go.com/nba/playoffs/2015/story/_/id/12887571/john-wall-washington-wizards-no-swelling-left-hand-wrist-game-5-return',
'only_matching': True, 'only_matching': True,
}, {
'url': 'http://www.espn.com/video/clip?id=10365079',
'only_matching': True,
}] }]
@classmethod
def suitable(cls, url):
return False if ESPNIE.suitable(url) else super(ESPNArticleIE, cls).suitable(url)
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
@ -61,23 +141,5 @@ class ESPNIE(InfoExtractor):
r'class=(["\']).*?video-play-button.*?\1[^>]+data-id=["\'](?P<id>\d+)', r'class=(["\']).*?video-play-button.*?\1[^>]+data-id=["\'](?P<id>\d+)',
webpage, 'video id', group='id') webpage, 'video id', group='id')
cms = 'espn' return self.url_result(
if 'data-source="intl"' in webpage: 'http://espn.go.com/video/clip?id=%s' % video_id, ESPNIE.ie_key())
cms = 'intl'
player_url = 'https://espn.go.com/video/iframe/twitter/?id=%s&cms=%s' % (video_id, cms)
player = self._download_webpage(
player_url, video_id)
pcode = self._search_regex(
r'["\']pcode=([^"\']+)["\']', player, 'pcode')
title = remove_end(
self._og_search_title(webpage),
'- ESPN Video').strip()
return {
'_type': 'url_transparent',
'url': 'ooyalaexternal:%s:%s:%s' % (cms, video_id, pcode),
'ie_key': 'OoyalaExternal',
'title': title,
}

View File

@ -47,6 +47,8 @@ from .svt import SVTIE
from .pornhub import PornHubIE from .pornhub import PornHubIE
from .xhamster import XHamsterEmbedIE from .xhamster import XHamsterEmbedIE
from .tnaflix import TNAFlixNetworkEmbedIE from .tnaflix import TNAFlixNetworkEmbedIE
from .drtuber import DrTuberIE
from .redtube import RedTubeIE
from .vimeo import VimeoIE from .vimeo import VimeoIE
from .dailymotion import ( from .dailymotion import (
DailymotionIE, DailymotionIE,
@ -1981,11 +1983,6 @@ class GenericIE(InfoExtractor):
if sportbox_urls: if sportbox_urls:
return _playlist_from_matches(sportbox_urls, ie='SportBoxEmbed') return _playlist_from_matches(sportbox_urls, ie='SportBoxEmbed')
# Look for embedded PornHub player
pornhub_url = PornHubIE._extract_url(webpage)
if pornhub_url:
return self.url_result(pornhub_url, 'PornHub')
# Look for embedded XHamster player # Look for embedded XHamster player
xhamster_urls = XHamsterEmbedIE._extract_urls(webpage) xhamster_urls = XHamsterEmbedIE._extract_urls(webpage)
if xhamster_urls: if xhamster_urls:
@ -1996,6 +1993,21 @@ class GenericIE(InfoExtractor):
if tnaflix_urls: if tnaflix_urls:
return _playlist_from_matches(tnaflix_urls, ie=TNAFlixNetworkEmbedIE.ie_key()) return _playlist_from_matches(tnaflix_urls, ie=TNAFlixNetworkEmbedIE.ie_key())
# Look for embedded PornHub player
pornhub_urls = PornHubIE._extract_urls(webpage)
if pornhub_urls:
return _playlist_from_matches(pornhub_urls, ie=PornHubIE.ie_key())
# Look for embedded DrTuber player
drtuber_urls = DrTuberIE._extract_urls(webpage)
if drtuber_urls:
return _playlist_from_matches(drtuber_urls, ie=DrTuberIE.ie_key())
# Look for embedded RedTube player
redtube_urls = RedTubeIE._extract_urls(webpage)
if redtube_urls:
return _playlist_from_matches(redtube_urls, ie=RedTubeIE.ie_key())
# Look for embedded Tvigle player # Look for embedded Tvigle player
mobj = re.search( mobj = re.search(
r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//cloud\.tvigle\.ru/video/.+?)\1', webpage) r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//cloud\.tvigle\.ru/video/.+?)\1', webpage)

View File

@ -1,19 +1,20 @@
# coding: utf-8 # coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import re import uuid
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import ( from ..compat import (
compat_str,
compat_urllib_parse_urlencode, compat_urllib_parse_urlencode,
compat_urlparse, compat_urlparse,
) )
from ..utils import ( from ..utils import (
get_element_by_attribute,
int_or_none, int_or_none,
remove_start,
extract_attributes, extract_attributes,
determine_ext, determine_ext,
smuggle_url,
parse_duration,
) )
@ -72,16 +73,14 @@ class MiTeleBaseIE(InfoExtractor):
} }
class MiTeleIE(MiTeleBaseIE): class MiTeleIE(InfoExtractor):
IE_DESC = 'mitele.es' IE_DESC = 'mitele.es'
_VALID_URL = r'https?://(?:www\.)?mitele\.es/(?:[^/]+/){3}(?P<id>[^/]+)/' _VALID_URL = r'https?://(?:www\.)?mitele\.es/programas-tv/(?:[^/]+/)(?P<id>[^/]+)/player'
_TESTS = [{ _TESTS = [{
'url': 'http://www.mitele.es/programas-tv/diario-de/la-redaccion/programa-144/', 'url': 'http://www.mitele.es/programas-tv/diario-de/57b0dfb9c715da65618b4afa/player',
# MD5 is unstable
'info_dict': { 'info_dict': {
'id': '0NF1jJnxS1Wu3pHrmvFyw2', 'id': '57b0dfb9c715da65618b4afa',
'display_id': 'programa-144',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Tor, la web invisible', 'title': 'Tor, la web invisible',
'description': 'md5:3b6fce7eaa41b2d97358726378d9369f', 'description': 'md5:3b6fce7eaa41b2d97358726378d9369f',
@ -91,57 +90,71 @@ class MiTeleIE(MiTeleBaseIE):
'thumbnail': 're:(?i)^https?://.*\.jpg$', 'thumbnail': 're:(?i)^https?://.*\.jpg$',
'duration': 2913, 'duration': 2913,
}, },
'add_ie': ['Ooyala'],
}, { }, {
# no explicit title # no explicit title
'url': 'http://www.mitele.es/programas-tv/cuarto-milenio/temporada-6/programa-226/', 'url': 'http://www.mitele.es/programas-tv/cuarto-milenio/57b0de3dc915da14058b4876/player',
'info_dict': { 'info_dict': {
'id': 'eLZSwoEd1S3pVyUm8lc6F', 'id': '57b0de3dc915da14058b4876',
'display_id': 'programa-226',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Cuarto Milenio - Temporada 6 - Programa 226', 'title': 'Cuarto Milenio Temporada 6 Programa 226',
'description': 'md5:50daf9fadefa4e62d9fc866d0c015701', 'description': 'md5:5ff132013f0cd968ffbf1f5f3538a65f',
'series': 'Cuarto Milenio', 'series': 'Cuarto Milenio',
'season': 'Temporada 6', 'season': 'Temporada 6',
'episode': 'Programa 226', 'episode': 'Programa 226',
'thumbnail': 're:(?i)^https?://.*\.jpg$', 'thumbnail': 're:(?i)^https?://.*\.jpg$',
'duration': 7312, 'duration': 7313,
}, },
'params': { 'params': {
'skip_download': True, 'skip_download': True,
}, },
'add_ie': ['Ooyala'],
}] }]
def _real_extract(self, url): def _real_extract(self, url):
display_id = self._match_id(url) video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
webpage = self._download_webpage(url, display_id) gigya_url = self._search_regex(r'<gigya-api>[^>]*</gigya-api>[^>]*<script\s*src="([^"]*)">[^>]*</script>', webpage, 'gigya', default=None)
gigya_sc = self._download_webpage(compat_urlparse.urljoin(r'http://www.mitele.es/', gigya_url), video_id, 'Downloading gigya script')
# Get a appKey/uuid for getting the session key
appKey_var = self._search_regex(r'value\("appGridApplicationKey",([0-9a-f]+)\)', gigya_sc, 'appKey variable')
appKey = self._search_regex(r'var %s="([0-9a-f]+)"' % appKey_var, gigya_sc, 'appKey')
uid = compat_str(uuid.uuid4())
session_url = 'https://appgrid-api.cloud.accedo.tv/session?appKey=%s&uuid=%s' % (appKey, uid)
session_json = self._download_json(session_url, video_id, 'Downloading session keys')
sessionKey = compat_str(session_json['sessionKey'])
info = self._get_player_info(url, webpage) paths_url = 'https://appgrid-api.cloud.accedo.tv/metadata/general_configuration,%20web_configuration?sessionKey=' + sessionKey
paths = self._download_json(paths_url, video_id, 'Downloading paths JSON')
ooyala_s = paths['general_configuration']['api_configuration']['ooyala_search']
data_p = (
'http://' + ooyala_s['base_url'] + ooyala_s['full_path'] + ooyala_s['provider_id'] +
'/docs/' + video_id + '?include_titles=Series,Season&product_name=test&format=full')
data = self._download_json(data_p, video_id, 'Downloading data JSON')
source = data['hits']['hits'][0]['_source']
embedCode = source['offers'][0]['embed_codes'][0]
title = self._search_regex( titles = source['localizable_titles'][0]
r'class="Destacado-text"[^>]*>\s*<strong>([^<]+)</strong>', title = titles.get('title_medium') or titles['title_long']
webpage, 'title', default=None) episode = titles['title_sort_name']
description = titles['summary_long']
titles_series = source['localizable_titles_series'][0]
series = titles_series['title_long']
titles_season = source['localizable_titles_season'][0]
season = titles_season['title_medium']
duration = parse_duration(source['videos'][0]['duration'])
mobj = re.search(r'''(?sx) return {
class="Destacado-text"[^>]*>.*?<h1>\s* '_type': 'url_transparent',
<span>(?P<series>[^<]+)</span>\s* # for some reason only HLS is supported
<span>(?P<season>[^<]+)</span>\s* 'url': smuggle_url('ooyala:' + embedCode, {'supportedformats': 'm3u8'}),
<span>(?P<episode>[^<]+)</span>''', webpage) 'id': video_id,
series, season, episode = mobj.groups() if mobj else [None] * 3
if not title:
if mobj:
title = '%s - %s - %s' % (series, season, episode)
else:
title = remove_start(self._search_regex(
r'<title>([^<]+)</title>', webpage, 'title'), 'Ver online ')
info.update({
'display_id': display_id,
'title': title, 'title': title,
'description': get_element_by_attribute('class', 'text', webpage), 'description': description,
'series': series, 'series': series,
'season': season, 'season': season,
'episode': episode, 'episode': episode,
}) 'duration': duration,
return info 'thumbnail': source['images'][0]['url'],
}

View File

@ -18,7 +18,7 @@ class OoyalaBaseIE(InfoExtractor):
_CONTENT_TREE_BASE = _PLAYER_BASE + 'player_api/v1/content_tree/' _CONTENT_TREE_BASE = _PLAYER_BASE + 'player_api/v1/content_tree/'
_AUTHORIZATION_URL_TEMPLATE = _PLAYER_BASE + 'sas/player_api/v2/authorization/embed_code/%s/%s?' _AUTHORIZATION_URL_TEMPLATE = _PLAYER_BASE + 'sas/player_api/v2/authorization/embed_code/%s/%s?'
def _extract(self, content_tree_url, video_id, domain='example.org'): def _extract(self, content_tree_url, video_id, domain='example.org', supportedformats=None):
content_tree = self._download_json(content_tree_url, video_id)['content_tree'] content_tree = self._download_json(content_tree_url, video_id)['content_tree']
metadata = content_tree[list(content_tree)[0]] metadata = content_tree[list(content_tree)[0]]
embed_code = metadata['embed_code'] embed_code = metadata['embed_code']
@ -29,7 +29,7 @@ class OoyalaBaseIE(InfoExtractor):
self._AUTHORIZATION_URL_TEMPLATE % (pcode, embed_code) + self._AUTHORIZATION_URL_TEMPLATE % (pcode, embed_code) +
compat_urllib_parse_urlencode({ compat_urllib_parse_urlencode({
'domain': domain, 'domain': domain,
'supportedFormats': 'mp4,rtmp,m3u8,hds', 'supportedFormats': supportedformats or 'mp4,rtmp,m3u8,hds',
}), video_id) }), video_id)
cur_auth_data = auth_data['authorization_data'][embed_code] cur_auth_data = auth_data['authorization_data'][embed_code]
@ -145,8 +145,9 @@ class OoyalaIE(OoyalaBaseIE):
url, smuggled_data = unsmuggle_url(url, {}) url, smuggled_data = unsmuggle_url(url, {})
embed_code = self._match_id(url) embed_code = self._match_id(url)
domain = smuggled_data.get('domain') domain = smuggled_data.get('domain')
supportedformats = smuggled_data.get('supportedformats')
content_tree_url = self._CONTENT_TREE_BASE + 'embed_code/%s/%s' % (embed_code, embed_code) content_tree_url = self._CONTENT_TREE_BASE + 'embed_code/%s/%s' % (embed_code, embed_code)
return self._extract(content_tree_url, embed_code, domain) return self._extract(content_tree_url, embed_code, domain, supportedformats)
class OoyalaExternalIE(OoyalaBaseIE): class OoyalaExternalIE(OoyalaBaseIE):

View File

@ -33,7 +33,7 @@ class PornHubIE(InfoExtractor):
(?:[a-z]+\.)?pornhub\.com/(?:view_video\.php\?viewkey=|embed/)| (?:[a-z]+\.)?pornhub\.com/(?:view_video\.php\?viewkey=|embed/)|
(?:www\.)?thumbzilla\.com/video/ (?:www\.)?thumbzilla\.com/video/
) )
(?P<id>[0-9a-z]+) (?P<id>[\da-z]+)
''' '''
_TESTS = [{ _TESTS = [{
'url': 'http://www.pornhub.com/view_video.php?viewkey=648719015', 'url': 'http://www.pornhub.com/view_video.php?viewkey=648719015',
@ -96,12 +96,11 @@ class PornHubIE(InfoExtractor):
'only_matching': True, 'only_matching': True,
}] }]
@classmethod @staticmethod
def _extract_url(cls, webpage): def _extract_urls(webpage):
mobj = re.search( return re.findall(
r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?pornhub\.com/embed/\d+)\1', webpage) r'<iframe[^>]+?src=["\'](?P<url>(?:https?:)?//(?:www\.)?pornhub\.com/embed/[\da-z]+)',
if mobj: webpage)
return mobj.group('url')
def _extract_count(self, pattern, webpage, name): def _extract_count(self, pattern, webpage, name):
return str_to_int(self._search_regex( return str_to_int(self._search_regex(

View File

@ -1,5 +1,7 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
@ -10,8 +12,8 @@ from ..utils import (
class RedTubeIE(InfoExtractor): class RedTubeIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?redtube\.com/(?P<id>[0-9]+)' _VALID_URL = r'https?://(?:(?:www\.)?redtube\.com/|embed\.redtube\.com/\?.*?\bid=)(?P<id>[0-9]+)'
_TEST = { _TESTS = [{
'url': 'http://www.redtube.com/66418', 'url': 'http://www.redtube.com/66418',
'md5': '7b8c22b5e7098a3e1c09709df1126d2d', 'md5': '7b8c22b5e7098a3e1c09709df1126d2d',
'info_dict': { 'info_dict': {
@ -23,11 +25,21 @@ class RedTubeIE(InfoExtractor):
'view_count': int, 'view_count': int,
'age_limit': 18, 'age_limit': 18,
} }
} }, {
'url': 'http://embed.redtube.com/?bgcolor=000000&id=1443286',
'only_matching': True,
}]
@staticmethod
def _extract_urls(webpage):
return re.findall(
r'<iframe[^>]+?src=["\'](?P<url>(?:https?:)?//embed\.redtube\.com/\?.*?\bid=\d+)',
webpage)
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(
'http://www.redtube.com/%s' % video_id, video_id)
if any(s in webpage for s in ['video-deleted-info', '>This video has been removed']): if any(s in webpage for s in ['video-deleted-info', '>This video has been removed']):
raise ExtractorError('Video %s has been removed' % video_id, expected=True) raise ExtractorError('Video %s has been removed' % video_id, expected=True)

View File

@ -32,12 +32,15 @@ class TMZArticleIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?tmz\.com/\d{4}/\d{2}/\d{2}/(?P<id>[^/]+)/?' _VALID_URL = r'https?://(?:www\.)?tmz\.com/\d{4}/\d{2}/\d{2}/(?P<id>[^/]+)/?'
_TEST = { _TEST = {
'url': 'http://www.tmz.com/2015/04/19/bobby-brown-bobbi-kristina-awake-video-concert', 'url': 'http://www.tmz.com/2015/04/19/bobby-brown-bobbi-kristina-awake-video-concert',
'md5': 'e482a414a38db73087450e3a6ce69d00', 'md5': '3316ff838ae5bb7f642537825e1e90d2',
'info_dict': { 'info_dict': {
'id': '0_6snoelag', 'id': '0_6snoelag',
'ext': 'mp4', 'ext': 'mov',
'title': 'Bobby Brown Tells Crowd ... Bobbi Kristina is Awake', 'title': 'Bobby Brown Tells Crowd ... Bobbi Kristina is Awake',
'description': 'Bobby Brown stunned his audience during a concert Saturday night, when he told the crowd, "Bobbi is awake. She\'s watching me."', 'description': 'Bobby Brown stunned his audience during a concert Saturday night, when he told the crowd, "Bobbi is awake. She\'s watching me."',
'timestamp': 1429467813,
'upload_date': '20150419',
'uploader_id': 'batchUser',
} }
} }
@ -45,12 +48,9 @@ class TMZArticleIE(InfoExtractor):
video_id = self._match_id(url) video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
embedded_video_info_str = self._html_search_regex( embedded_video_info = self._parse_json(self._html_search_regex(
r'tmzVideoEmbedV2\("([^)]+)"\);', webpage, 'embedded video info') r'tmzVideoEmbed\(({.+?})\);', webpage, 'embedded video info'),
video_id)
embedded_video_info = self._parse_json(
embedded_video_info_str, video_id,
transform_source=lambda s: s.replace('\\', ''))
return self.url_result( return self.url_result(
'http://www.tmz.com/videos/%s/' % embedded_video_info['id']) 'http://www.tmz.com/videos/%s/' % embedded_video_info['id'])

View File

@ -201,6 +201,32 @@ class YahooIE(InfoExtractor):
}, },
'skip': 'redirect to https://www.yahoo.com/music', 'skip': 'redirect to https://www.yahoo.com/music',
}, },
{
# yahoo://article/
'url': 'https://www.yahoo.com/movies/video/true-story-trailer-173000497.html',
'info_dict': {
'id': '071c4013-ce30-3a93-a5b2-e0413cd4a9d1',
'ext': 'mp4',
'title': "'True Story' Trailer",
'description': 'True Story',
},
'params': {
'skip_download': True,
},
},
{
# ytwnews://cavideo/
'url': 'https://tw.video.yahoo.com/movie-tw/單車天使-中文版預-092316541.html',
'info_dict': {
'id': 'ba133ff2-0793-3510-b636-59dfe9ff6cff',
'ext': 'mp4',
'title': '單車天使 - 中文版預',
'description': '中文版預',
},
'params': {
'skip_download': True,
},
},
] ]
def _real_extract(self, url): def _real_extract(self, url):
@ -269,7 +295,8 @@ class YahooIE(InfoExtractor):
r'"first_videoid"\s*:\s*"([^"]+)"', r'"first_videoid"\s*:\s*"([^"]+)"',
r'%s[^}]*"ccm_id"\s*:\s*"([^"]+)"' % re.escape(page_id), r'%s[^}]*"ccm_id"\s*:\s*"([^"]+)"' % re.escape(page_id),
r'<article[^>]data-uuid=["\']([^"\']+)', r'<article[^>]data-uuid=["\']([^"\']+)',
r'yahoo://article/view\?.*\buuid=([^&"\']+)', r'<meta[^<>]+yahoo://article/view\?.*\buuid=([^&"\']+)',
r'<meta[^<>]+["\']ytwnews://cavideo/(?:[^/]+/)+([\da-fA-F-]+)[&"\']',
] ]
video_id = self._search_regex( video_id = self._search_regex(
CONTENT_ID_REGEXES, webpage, 'content ID') CONTENT_ID_REGEXES, webpage, 'content ID')

View File

@ -1,3 +1,3 @@
from __future__ import unicode_literals from __future__ import unicode_literals
__version__ = '2016.11.04' __version__ = '2016.11.08'