Merge pull request #10 from rg3/master

update 24 may
This commit is contained in:
siddht1 2016-05-24 23:50:42 +05:30
commit d030e9ca0b
26 changed files with 600 additions and 263 deletions

View File

@ -172,3 +172,4 @@ blahgeek
Kevin Deldycke
inondle
Tomáš Čech
Déstin Reed

View File

@ -693,6 +693,10 @@ hash -r
Again, from then on you'll be able to update with `sudo youtube-dl -U`.
### youtube-dl is extremely slow to start on Windows
Add a file exclusion for `youtube-dl.exe` in Windows Defender settings.
### I'm getting an error `Unable to extract OpenGraph title` on YouTube playlists
YouTube changed their playlist format in March 2014 and later on, so you'll need at least youtube-dl 2014.07.25 to download all YouTube videos.

View File

@ -103,6 +103,12 @@ class TestCompat(unittest.TestCase):
self.assertTrue(isinstance(doc.find('chinese').text, compat_str))
self.assertTrue(isinstance(doc.find('foo/bar').text, compat_str))
def test_compat_etree_fromstring_doctype(self):
xml = '''<?xml version="1.0"?>
<!DOCTYPE smil PUBLIC "-//W3C//DTD SMIL 2.0//EN" "http://www.w3.org/2001/SMIL20/SMIL20.dtd">
<smil xmlns="http://www.w3.org/2001/SMIL20/Language"></smil>'''
compat_etree_fromstring(xml)
def test_struct_unpack(self):
self.assertEqual(compat_struct_unpack('!B', b'\x00'), (0,))

View File

@ -245,13 +245,20 @@ try:
except ImportError: # Python 2.6
from xml.parsers.expat import ExpatError as compat_xml_parse_error
etree = xml.etree.ElementTree
class _TreeBuilder(etree.TreeBuilder):
def doctype(self, name, pubid, system):
pass
if sys.version_info[0] >= 3:
compat_etree_fromstring = xml.etree.ElementTree.fromstring
def compat_etree_fromstring(text):
return etree.XML(text, parser=etree.XMLParser(target=_TreeBuilder()))
else:
# python 2.x tries to encode unicode strings with ascii (see the
# XMLParser._fixtext method)
etree = xml.etree.ElementTree
try:
_etree_iter = etree.Element.iter
except AttributeError: # Python <=2.6
@ -265,7 +272,7 @@ else:
# 2.7 source
def _XML(text, parser=None):
if not parser:
parser = etree.XMLParser(target=etree.TreeBuilder())
parser = etree.XMLParser(target=_TreeBuilder())
parser.feed(text)
return parser.close()
@ -277,7 +284,7 @@ else:
return el
def compat_etree_fromstring(text):
doc = _XML(text, parser=etree.XMLParser(target=etree.TreeBuilder(element_factory=_element_factory)))
doc = _XML(text, parser=etree.XMLParser(target=_TreeBuilder(element_factory=_element_factory)))
for el in _etree_iter(doc):
if el.text is not None and isinstance(el.text, bytes):
el.text = el.text.decode('utf-8')

View File

@ -29,7 +29,7 @@ class BandcampIE(InfoExtractor):
'_skip': 'There is a limit of 200 free downloads / month for the test song'
}, {
'url': 'http://benprunty.bandcamp.com/track/lanius-battle',
'md5': '2b68e5851514c20efdff2afc5603b8b4',
'md5': '73d0b3171568232574e45652f8720b5c',
'info_dict': {
'id': '2650410135',
'ext': 'mp3',
@ -48,6 +48,10 @@ class BandcampIE(InfoExtractor):
if m_trackinfo:
json_code = m_trackinfo.group(1)
data = json.loads(json_code)[0]
track_id = compat_str(data['id'])
if not data.get('file'):
raise ExtractorError('Not streamable', video_id=track_id, expected=True)
formats = []
for format_id, format_url in data['file'].items():
@ -64,7 +68,7 @@ class BandcampIE(InfoExtractor):
self._sort_formats(formats)
return {
'id': compat_str(data['id']),
'id': track_id,
'title': data['title'],
'formats': formats,
'duration': float_or_none(data.get('duration')),

View File

@ -11,6 +11,7 @@ class BYUtvIE(InfoExtractor):
_VALID_URL = r'^https?://(?:www\.)?byutv.org/watch/[0-9a-f-]+/(?P<video_id>[^/?#]+)'
_TEST = {
'url': 'http://www.byutv.org/watch/6587b9a3-89d2-42a6-a7f7-fd2f81840a7d/studio-c-season-5-episode-5',
'md5': '05850eb8c749e2ee05ad5a1c34668493',
'info_dict': {
'id': 'studio-c-season-5-episode-5',
'ext': 'mp4',
@ -21,7 +22,8 @@ class BYUtvIE(InfoExtractor):
},
'params': {
'skip_download': True,
}
},
'add_ie': ['Ooyala'],
}
def _real_extract(self, url):

View File

@ -1,5 +1,7 @@
from __future__ import unicode_literals
import re
from .theplatform import ThePlatformIE
from ..utils import (
xpath_text,
@ -21,7 +23,7 @@ class CBSBaseIE(ThePlatformIE):
class CBSIE(CBSBaseIE):
_VALID_URL = r'https?://(?:www\.)?(?:cbs\.com/shows/[^/]+/(?:video|artist)|colbertlateshow\.com/(?:video|podcasts))/[^/]+/(?P<id>[^/]+)'
_VALID_URL = r'(?:cbs:(?P<content_id>\w+)|https?://(?:www\.)?(?:cbs\.com/shows/[^/]+/(?:video|artist)|colbertlateshow\.com/(?:video|podcasts))/[^/]+/(?P<display_id>[^/]+))'
_TESTS = [{
'url': 'http://www.cbs.com/shows/garth-brooks/video/_u7W953k6la293J7EPTd9oHkSPs6Xn6_/connect-chat-feat-garth-brooks/',
@ -66,7 +68,8 @@ class CBSIE(CBSBaseIE):
TP_RELEASE_URL_TEMPLATE = 'http://link.theplatform.com/s/dJ5BDC/%s?mbr=true'
def _real_extract(self, url):
display_id = self._match_id(url)
content_id, display_id = re.match(self._VALID_URL, url).groups()
if not content_id:
webpage = self._download_webpage(url, display_id)
content_id = self._search_regex(
[r"video\.settings\.content_id\s*=\s*'([^']+)';", r"cbsplayer\.contentId\s*=\s*'([^']+)';"],

View File

@ -8,6 +8,7 @@ class ESPNIE(InfoExtractor):
_VALID_URL = r'https?://espn\.go\.com/(?:[^/]+/)*(?P<id>[^/]+)'
_TESTS = [{
'url': 'http://espn.go.com/video/clip?id=10365079',
'md5': '60e5d097a523e767d06479335d1bdc58',
'info_dict': {
'id': 'FkYWtmazr6Ed8xmvILvKLWjd4QvYZpzG',
'ext': 'mp4',
@ -15,21 +16,22 @@ class ESPNIE(InfoExtractor):
'description': None,
},
'params': {
# m3u8 download
'skip_download': True,
},
'add_ie': ['OoyalaExternal'],
}, {
# intl video, from http://www.espnfc.us/video/mls-highlights/150/video/2743663/must-see-moments-best-of-the-mls-season
'url': 'http://espn.go.com/video/clip?id=2743663',
'md5': 'f4ac89b59afc7e2d7dbb049523df6768',
'info_dict': {
'id': '50NDFkeTqRHB0nXBOK-RGdSG5YQPuxHg',
'ext': 'mp4',
'title': 'Must-See Moments: Best of the MLS season',
},
'params': {
# m3u8 download
'skip_download': True,
},
'add_ie': ['OoyalaExternal'],
}, {
'url': 'https://espn.go.com/video/iframe/twitter/?cms=espn&id=10365079',
'only_matching': True,

View File

@ -617,6 +617,10 @@ from .qqmusic import (
QQMusicPlaylistIE,
)
from .r7 import R7IE
from .radiocanada import (
RadioCanadaIE,
RadioCanadaAudioVideoIE,
)
from .radiode import RadioDeIE
from .radiojavan import RadioJavanIE
from .radiobremen import RadioBremenIE
@ -630,6 +634,7 @@ from .rds import RDSIE
from .redtube import RedTubeIE
from .regiotv import RegioTVIE
from .restudy import RestudyIE
from .reuters import ReutersIE
from .reverbnation import ReverbNationIE
from .revision3 import Revision3IE
from .rice import RICEIE
@ -941,7 +946,10 @@ from .vube import VubeIE
from .vuclip import VuClipIE
from .vulture import VultureIE
from .walla import WallaIE
from .washingtonpost import WashingtonPostIE
from .washingtonpost import (
WashingtonPostIE,
WashingtonPostArticleIE,
)
from .wat import WatIE
from .watchindianporn import WatchIndianPornIE
from .wdr import (

View File

@ -13,7 +13,8 @@ class Formula1IE(InfoExtractor):
'id': 'JvYXJpMzE6pArfHWm5ARp5AiUmD-gibV',
'ext': 'flv',
'title': 'Race highlights - Spain 2016',
}
},
'add_ie': ['Ooyala'],
}
def _real_extract(self, url):

View File

@ -784,6 +784,19 @@ class GenericIE(InfoExtractor):
'title': 'Rosetta #CometLanding webcast HL 10',
}
},
# Another Livestream embed, without 'new.' in URL
{
'url': 'https://www.freespeech.org/',
'info_dict': {
'id': '123537347',
'ext': 'mp4',
'title': 're:^FSTV [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
},
'params': {
# Live stream
'skip_download': True,
},
},
# LazyYT
{
'url': 'http://discourse.ubuntu.com/t/unity-8-desktop-mode-windows-on-mir/1986',
@ -1194,6 +1207,16 @@ class GenericIE(InfoExtractor):
'uploader': 'Lake8737',
}
},
# Duplicated embedded video URLs
{
'url': 'http://www.hudl.com/athlete/2538180/highlights/149298443',
'info_dict': {
'id': '149298443_480_16c25b74_2',
'ext': 'mp4',
'title': 'vs. Blue Orange Spring Game',
'uploader': 'www.hudl.com',
},
},
]
def report_following_redirect(self, new_url):
@ -1868,7 +1891,7 @@ class GenericIE(InfoExtractor):
return self.url_result(self._proto_relative_url(mobj.group('url'), scheme='http:'), 'CondeNast')
mobj = re.search(
r'<iframe[^>]+src="(?P<url>https?://new\.livestream\.com/[^"]+/player[^"]+)"',
r'<iframe[^>]+src="(?P<url>https?://(?:new\.)?livestream\.com/[^"]+/player[^"]+)"',
webpage)
if mobj is not None:
return self.url_result(mobj.group('url'), 'Livestream')
@ -2111,7 +2134,7 @@ class GenericIE(InfoExtractor):
raise UnsupportedError(url)
entries = []
for video_url in found:
for video_url in orderedSet(found):
video_url = unescapeHTML(video_url)
video_url = video_url.replace('\\/', '/')
video_url = compat_urlparse.urljoin(url, video_url)

View File

@ -14,6 +14,7 @@ class GrouponIE(InfoExtractor):
'description': 'Studio kept at 105 degrees and 40% humidity with anti-microbial and anti-slip Flotex flooring; certified instructors',
},
'playlist': [{
'md5': '42428ce8a00585f9bc36e49226eae7a1',
'info_dict': {
'id': 'fk6OhWpXgIQ',
'ext': 'mp4',
@ -24,10 +25,11 @@ class GrouponIE(InfoExtractor):
'uploader_id': 'groupon',
'uploader': 'Groupon',
},
'add_ie': ['Youtube'],
}],
'params': {
'skip_download': True,
}
},
}
_PROVIDERS = {

View File

@ -8,7 +8,7 @@ class HowcastIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?howcast\.com/videos/(?P<id>\d+)'
_TEST = {
'url': 'http://www.howcast.com/videos/390161-How-to-Tie-a-Square-Knot-Properly',
'md5': '8b743df908c42f60cf6496586c7f12c3',
'md5': '7d45932269a288149483144f01b99789',
'info_dict': {
'id': '390161',
'ext': 'mp4',
@ -19,9 +19,9 @@ class HowcastIE(InfoExtractor):
'duration': 56.823,
},
'params': {
# m3u8 download
'skip_download': True,
},
'add_ie': ['Ooyala'],
}
def _real_extract(self, url):

View File

@ -7,48 +7,53 @@ from .common import InfoExtractor
from ..compat import compat_urlparse
from ..utils import (
determine_ext,
int_or_none,
remove_end,
unified_strdate,
ExtractorError,
int_or_none,
parse_iso8601,
remove_end,
)
class LifeNewsIE(InfoExtractor):
IE_NAME = 'lifenews'
IE_DESC = 'LIFE | NEWS'
_VALID_URL = r'https?://lifenews\.ru/(?:mobile/)?(?P<section>news|video)/(?P<id>\d+)'
IE_NAME = 'life'
IE_DESC = 'Life.ru'
_VALID_URL = r'https?://life\.ru/t/[^/]+/(?P<id>\d+)'
_TESTS = [{
# single video embedded via video/source
'url': 'http://lifenews.ru/news/98736',
'url': 'https://life.ru/t/новости/98736',
'md5': '77c95eaefaca216e32a76a343ad89d23',
'info_dict': {
'id': '98736',
'ext': 'mp4',
'title': 'Мужчина нашел дома архив оборонного завода',
'description': 'md5:3b06b1b39b5e2bea548e403d99b8bf26',
'timestamp': 1344154740,
'upload_date': '20120805',
'view_count': int,
}
}, {
# single video embedded via iframe
'url': 'http://lifenews.ru/news/152125',
'url': 'https://life.ru/t/новости/152125',
'md5': '77d19a6f0886cd76bdbf44b4d971a273',
'info_dict': {
'id': '152125',
'ext': 'mp4',
'title': 'В Сети появилось видео захвата «Правым сектором» колхозных полей ',
'description': 'Жители двух поселков Днепропетровской области не простили радикалам угрозу лишения плодородных земель и пошли в лобовую. ',
'timestamp': 1427961840,
'upload_date': '20150402',
'view_count': int,
}
}, {
# two videos embedded via iframe
'url': 'http://lifenews.ru/news/153461',
'url': 'https://life.ru/t/новости/153461',
'info_dict': {
'id': '153461',
'title': 'В Москве спасли потерявшегося медвежонка, который спрятался на дереве',
'description': 'Маленький хищник не смог найти дорогу домой и обрел временное убежище на тополе недалеко от жилого массива, пока его не нашла соседская собака.',
'upload_date': '20150505',
'timestamp': 1430825520,
'view_count': int,
},
'playlist': [{
'md5': '9b6ef8bc0ffa25aebc8bdb40d89ab795',
@ -57,6 +62,7 @@ class LifeNewsIE(InfoExtractor):
'ext': 'mp4',
'title': 'В Москве спасли потерявшегося медвежонка, который спрятался на дереве (Видео 1)',
'description': 'Маленький хищник не смог найти дорогу домой и обрел временное убежище на тополе недалеко от жилого массива, пока его не нашла соседская собака.',
'timestamp': 1430825520,
'upload_date': '20150505',
},
}, {
@ -66,22 +72,25 @@ class LifeNewsIE(InfoExtractor):
'ext': 'mp4',
'title': 'В Москве спасли потерявшегося медвежонка, который спрятался на дереве (Видео 2)',
'description': 'Маленький хищник не смог найти дорогу домой и обрел временное убежище на тополе недалеко от жилого массива, пока его не нашла соседская собака.',
'timestamp': 1430825520,
'upload_date': '20150505',
},
}],
}, {
'url': 'http://lifenews.ru/video/13035',
'url': 'https://life.ru/t/новости/213035',
'only_matching': True,
}, {
'url': 'https://life.ru/t/%D0%BD%D0%BE%D0%B2%D0%BE%D1%81%D1%82%D0%B8/153461',
'only_matching': True,
}, {
'url': 'https://life.ru/t/новости/411489/manuel_vals_nazval_frantsiiu_tsieliu_nomier_odin_dlia_ighil',
'only_matching': True,
}]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
section = mobj.group('section')
video_id = self._match_id(url)
webpage = self._download_webpage(
'http://lifenews.ru/%s/%s' % (section, video_id),
video_id, 'Downloading page')
webpage = self._download_webpage(url, video_id)
video_urls = re.findall(
r'<video[^>]+><source[^>]+src=["\'](.+?)["\']', webpage)
@ -95,26 +104,22 @@ class LifeNewsIE(InfoExtractor):
title = remove_end(
self._og_search_title(webpage),
' - Первый по срочным новостям — LIFE | NEWS')
' - Life.ru')
description = self._og_search_description(webpage)
view_count = self._html_search_regex(
r'<div class=\'views\'>\s*(\d+)\s*</div>', webpage, 'view count', fatal=False)
comment_count = self._html_search_regex(
r'=\'commentCount\'[^>]*>\s*(\d+)\s*<',
webpage, 'comment count', fatal=False)
r'<div[^>]+class=(["\']).*?\bhits-count\b.*?\1[^>]*>\s*(?P<value>\d+)\s*</div>',
webpage, 'view count', fatal=False, group='value')
upload_date = self._html_search_regex(
r'<time[^>]*datetime=\'([^\']+)\'', webpage, 'upload date', fatal=False)
if upload_date is not None:
upload_date = unified_strdate(upload_date)
timestamp = parse_iso8601(self._search_regex(
r'<time[^>]+datetime=(["\'])(?P<value>.+?)\1',
webpage, 'upload date', fatal=False, group='value'))
common_info = {
'description': description,
'view_count': int_or_none(view_count),
'comment_count': int_or_none(comment_count),
'upload_date': upload_date,
'timestamp': timestamp,
}
def make_entry(video_id, video_url, index=None):
@ -183,7 +188,8 @@ class LifeEmbedIE(InfoExtractor):
ext = determine_ext(video_url)
if ext == 'm3u8':
formats.extend(self._extract_m3u8_formats(
video_url, video_id, 'mp4', m3u8_id='m3u8'))
video_url, video_id, 'mp4',
entry_protocol='m3u8_native', m3u8_id='m3u8'))
else:
formats.append({
'url': video_url,

View File

@ -150,7 +150,7 @@ class LivestreamIE(InfoExtractor):
}
def _extract_stream_info(self, stream_info):
broadcast_id = stream_info['broadcast_id']
broadcast_id = compat_str(stream_info['broadcast_id'])
is_live = stream_info.get('is_live')
formats = []

View File

@ -8,6 +8,7 @@ from ..utils import (
float_or_none,
ExtractorError,
unsmuggle_url,
determine_ext,
)
from ..compat import compat_urllib_parse_urlencode
@ -15,56 +16,49 @@ from ..compat import compat_urllib_parse_urlencode
class OoyalaBaseIE(InfoExtractor):
_PLAYER_BASE = 'http://player.ooyala.com/'
_CONTENT_TREE_BASE = _PLAYER_BASE + 'player_api/v1/content_tree/'
_AUTHORIZATION_URL_TEMPLATE = _PLAYER_BASE + 'sas/player_api/v1/authorization/embed_code/%s/%s?'
_AUTHORIZATION_URL_TEMPLATE = _PLAYER_BASE + 'sas/player_api/v2/authorization/embed_code/%s/%s?'
def _extract(self, content_tree_url, video_id, domain='example.org'):
content_tree = self._download_json(content_tree_url, video_id)['content_tree']
metadata = content_tree[list(content_tree)[0]]
embed_code = metadata['embed_code']
pcode = metadata.get('asset_pcode') or embed_code
video_info = {
'id': embed_code,
'title': metadata['title'],
'description': metadata.get('description'),
'thumbnail': metadata.get('thumbnail_image') or metadata.get('promo_image'),
'duration': float_or_none(metadata.get('duration'), 1000),
}
title = metadata['title']
urls = []
formats = []
for supported_format in ('mp4', 'm3u8', 'hds', 'rtmp'):
auth_data = self._download_json(
self._AUTHORIZATION_URL_TEMPLATE % (pcode, embed_code) +
compat_urllib_parse_urlencode({
'domain': domain,
'supportedFormats': supported_format
}),
video_id, 'Downloading %s JSON' % supported_format)
'supportedFormats': 'mp4,rtmp,m3u8,hds',
}), video_id)
cur_auth_data = auth_data['authorization_data'][embed_code]
urls = []
formats = []
if cur_auth_data['authorized']:
for stream in cur_auth_data['streams']:
url = base64.b64decode(
s_url = base64.b64decode(
stream['url']['data'].encode('ascii')).decode('utf-8')
if url in urls:
if s_url in urls:
continue
urls.append(url)
urls.append(s_url)
ext = determine_ext(s_url, None)
delivery_type = stream['delivery_type']
if delivery_type == 'hls' or '.m3u8' in url:
if delivery_type == 'hls' or ext == 'm3u8':
formats.extend(self._extract_m3u8_formats(
url, embed_code, 'mp4', 'm3u8_native',
s_url, embed_code, 'mp4', 'm3u8_native',
m3u8_id='hls', fatal=False))
elif delivery_type == 'hds' or '.f4m' in url:
elif delivery_type == 'hds' or ext == 'f4m':
formats.extend(self._extract_f4m_formats(
url + '?hdcore=3.7.0', embed_code, f4m_id='hds', fatal=False))
elif '.smil' in url:
s_url + '?hdcore=3.7.0', embed_code, f4m_id='hds', fatal=False))
elif ext == 'smil':
formats.extend(self._extract_smil_formats(
url, embed_code, fatal=False))
s_url, embed_code, fatal=False))
else:
formats.append({
'url': url,
'ext': stream.get('delivery_type'),
'url': s_url,
'ext': ext or stream.get('delivery_type'),
'vcodec': stream.get('video_codec'),
'format_id': delivery_type,
'width': int_or_none(stream.get('width')),
@ -78,8 +72,24 @@ class OoyalaBaseIE(InfoExtractor):
self.IE_NAME, cur_auth_data['message']), expected=True)
self._sort_formats(formats)
video_info['formats'] = formats
return video_info
subtitles = {}
for lang, sub in metadata.get('closed_captions_vtt', {}).get('captions', {}).items():
sub_url = sub.get('url')
if not sub_url:
continue
subtitles[lang] = [{
'url': sub_url,
}]
return {
'id': embed_code,
'title': title,
'description': metadata.get('description'),
'thumbnail': metadata.get('thumbnail_image') or metadata.get('promo_image'),
'duration': float_or_none(metadata.get('duration'), 1000),
'subtitles': subtitles,
'formats': formats,
}
class OoyalaIE(OoyalaBaseIE):

View File

@ -0,0 +1,130 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
xpath_text,
find_xpath_attr,
determine_ext,
int_or_none,
unified_strdate,
xpath_element,
ExtractorError,
)
class RadioCanadaIE(InfoExtractor):
IE_NAME = 'radiocanada'
_VALID_URL = r'(?:radiocanada:|https?://ici\.radio-canada\.ca/widgets/mediaconsole/)(?P<app_code>[^:/]+)[:/](?P<id>[0-9]+)'
_TEST = {
'url': 'http://ici.radio-canada.ca/widgets/mediaconsole/medianet/7184272',
'info_dict': {
'id': '7184272',
'ext': 'flv',
'title': 'Le parcours du tireur capté sur vidéo',
'description': 'Images des caméras de surveillance fournies par la GRC montrant le parcours du tireur d\'Ottawa',
'upload_date': '20141023',
},
'params': {
# rtmp download
'skip_download': True,
},
}
def _real_extract(self, url):
app_code, video_id = re.match(self._VALID_URL, url).groups()
formats = []
# TODO: extract m3u8 and f4m formats
# m3u8 formats can be extracted using ipad device_type return 403 error code when ffmpeg try to download segements
# f4m formats can be extracted using flashhd device_type but they produce unplayable file
for device_type in ('flash',):
v_data = self._download_xml(
'http://api.radio-canada.ca/validationMedia/v1/Validation.ashx',
video_id, note='Downloading %s XML' % device_type, query={
'appCode': app_code,
'idMedia': video_id,
'connectionType': 'broadband',
'multibitrate': 'true',
'deviceType': device_type,
# paysJ391wsHjbOJwvCs26toz and bypasslock are used to bypass geo-restriction
'paysJ391wsHjbOJwvCs26toz': 'CA',
'bypasslock': 'NZt5K62gRqfc',
})
v_url = xpath_text(v_data, 'url')
if not v_url:
continue
if v_url == 'null':
raise ExtractorError('%s said: %s' % (
self.IE_NAME, xpath_text(v_data, 'message')), expected=True)
ext = determine_ext(v_url)
if ext == 'm3u8':
formats.extend(self._extract_m3u8_formats(
v_url, video_id, 'mp4', m3u8_id='hls', fatal=False))
elif ext == 'f4m':
formats.extend(self._extract_f4m_formats(v_url, video_id, f4m_id='hds', fatal=False))
else:
ext = determine_ext(v_url)
bitrates = xpath_element(v_data, 'bitrates')
for url_e in bitrates.findall('url'):
tbr = int_or_none(url_e.get('bitrate'))
if not tbr:
continue
formats.append({
'format_id': 'rtmp-%d' % tbr,
'url': re.sub(r'\d+\.%s' % ext, '%d.%s' % (tbr, ext), v_url),
'ext': 'flv',
'protocol': 'rtmp',
'width': int_or_none(url_e.get('width')),
'height': int_or_none(url_e.get('height')),
'tbr': tbr,
})
self._sort_formats(formats)
metadata = self._download_xml(
'http://api.radio-canada.ca/metaMedia/v1/index.ashx',
video_id, note='Downloading metadata XML', query={
'appCode': app_code,
'idMedia': video_id,
})
def get_meta(name):
el = find_xpath_attr(metadata, './/Meta', 'name', name)
return el.text if el is not None else None
return {
'id': video_id,
'title': get_meta('Title'),
'description': get_meta('Description') or get_meta('ShortDescription'),
'thumbnail': get_meta('imageHR') or get_meta('imageMR') or get_meta('imageBR'),
'duration': int_or_none(get_meta('length')),
'series': get_meta('Emission'),
'season_number': int_or_none('SrcSaison'),
'episode_number': int_or_none('SrcEpisode'),
'upload_date': unified_strdate(get_meta('Date')),
'formats': formats,
}
class RadioCanadaAudioVideoIE(InfoExtractor):
'radiocanada:audiovideo'
_VALID_URL = r'https?://ici\.radio-canada\.ca/audio-video/media-(?P<id>[0-9]+)'
_TEST = {
'url': 'http://ici.radio-canada.ca/audio-video/media-7527184/barack-obama-au-vietnam',
'info_dict': {
'id': '7527184',
'ext': 'flv',
'title': 'Barack Obama au Vietnam',
'description': 'Les États-Unis lèvent l\'embargo sur la vente d\'armes qui datait de la guerre du Vietnam',
'upload_date': '20160523',
},
'params': {
# rtmp download
'skip_download': True,
},
}
def _real_extract(self, url):
return self.url_result('radiocanada:medianet:%s' % self._match_id(url))

View File

@ -0,0 +1,69 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
js_to_json,
int_or_none,
unescapeHTML,
)
class ReutersIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?reuters\.com/.*?\?.*?videoId=(?P<id>[0-9]+)'
_TEST = {
'url': 'http://www.reuters.com/video/2016/05/20/san-francisco-police-chief-resigns?videoId=368575562',
'md5': '8015113643a0b12838f160b0b81cc2ee',
'info_dict': {
'id': '368575562',
'ext': 'mp4',
'title': 'San Francisco police chief resigns',
}
}
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(
'http://www.reuters.com/assets/iframe/yovideo?videoId=%s' % video_id, video_id)
video_data = js_to_json(self._search_regex(
r'(?s)Reuters\.yovideo\.drawPlayer\(({.*?})\);',
webpage, 'video data'))
def get_json_value(key, fatal=False):
return self._search_regex('"%s"\s*:\s*"([^"]+)"' % key, video_data, key, fatal=fatal)
title = unescapeHTML(get_json_value('title', fatal=True))
mmid, fid = re.search(r',/(\d+)\?f=(\d+)', get_json_value('flv', fatal=True)).groups()
mas_data = self._download_json(
'http://mas-e.cds1.yospace.com/mas/%s/%s?trans=json' % (mmid, fid),
video_id, transform_source=js_to_json)
formats = []
for f in mas_data:
f_url = f.get('url')
if not f_url:
continue
method = f.get('method')
if method == 'hls':
formats.extend(self._extract_m3u8_formats(
f_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False))
else:
container = f.get('container')
ext = '3gp' if method == 'mobile' else container
formats.append({
'format_id': ext,
'url': f_url,
'ext': ext,
'container': container if method != 'mobile' else None,
})
self._sort_formats(formats)
return {
'id': video_id,
'title': title,
'thumbnail': get_json_value('thumb'),
'duration': int_or_none(get_json_value('seconds')),
'formats': formats,
}

View File

@ -11,6 +11,7 @@ class TeachingChannelIE(InfoExtractor):
_TEST = {
'url': 'https://www.teachingchannel.org/videos/teacher-teaming-evolution',
'md5': '3d6361864d7cac20b57c8784da17166f',
'info_dict': {
'id': 'F3bnlzbToeI6pLEfRyrlfooIILUjz4nM',
'ext': 'mp4',
@ -19,9 +20,9 @@ class TeachingChannelIE(InfoExtractor):
'duration': 422.255,
},
'params': {
# m3u8 download
'skip_download': True,
},
'add_ie': ['Ooyala'],
}
def _real_extract(self, url):

View File

@ -6,7 +6,7 @@ from .common import InfoExtractor
class TF1IE(InfoExtractor):
"""TF1 uses the wat.tv player."""
_VALID_URL = r'https?://(?:(?:videos|www|lci)\.tf1|www\.tfou)\.fr/(?:[^/]+/)*(?P<id>.+?)\.html'
_VALID_URL = r'https?://(?:(?:videos|www|lci)\.tf1|(?:www\.)?(?:tfou|ushuaiatv|histoire|tvbreizh))\.fr/(?:[^/]+/)*(?P<id>[^/?#.]+)'
_TESTS = [{
'url': 'http://videos.tf1.fr/auto-moto/citroen-grand-c4-picasso-2013-presentation-officielle-8062060.html',
'info_dict': {
@ -48,6 +48,6 @@ class TF1IE(InfoExtractor):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
wat_id = self._html_search_regex(
r'(["\'])(?:https?:)?//www\.wat\.tv/embedframe/.*?(?P<id>\d{8})(?:#.*?)?\1',
r'(["\'])(?:https?:)?//www\.wat\.tv/embedframe/.*?(?P<id>\d{8})(?:.*?)?\1',
webpage, 'wat id', group='id')
return self.url_result('wat:%s' % wat_id, 'Wat')

View File

@ -37,6 +37,7 @@ class VeohIE(InfoExtractor):
'uploader': 'afp-news',
'duration': 123,
},
'skip': 'This video has been deleted.',
},
{
'url': 'http://www.veoh.com/watch/v69525809F6Nc4frX',

View File

@ -11,12 +11,14 @@ class ViceIE(InfoExtractor):
_TESTS = [{
'url': 'http://www.vice.com/video/cowboy-capitalists-part-1',
'md5': 'e9d77741f9e42ba583e683cd170660f7',
'info_dict': {
'id': '43cW1mYzpia9IlestBjVpd23Yu3afAfp',
'ext': 'flv',
'title': 'VICE_COWBOYCAPITALISTS_PART01_v1_VICE_WM_1080p.mov',
'duration': 725.983,
},
'add_ie': ['Ooyala'],
}, {
'url': 'http://www.vice.com/video/how-to-hack-a-car',
'md5': '6fb2989a3fed069fb8eab3401fc2d3c9',
@ -29,6 +31,7 @@ class ViceIE(InfoExtractor):
'uploader': 'Motherboard',
'upload_date': '20140529',
},
'add_ie': ['Youtube'],
}, {
'url': 'https://news.vice.com/video/experimenting-on-animals-inside-the-monkey-lab',
'only_matching': True,

View File

@ -15,7 +15,8 @@ class VoxMediaIE(InfoExtractor):
'ext': 'mp4',
'title': 'Google\'s new material design direction',
'description': 'md5:2f44f74c4d14a1f800ea73e1c6832ad2',
}
},
'add_ie': ['Ooyala'],
}, {
# data-ooyala-id
'url': 'http://www.theverge.com/2014/10/21/7025853/google-nexus-6-hands-on-photos-video-android-phablet',
@ -25,7 +26,8 @@ class VoxMediaIE(InfoExtractor):
'ext': 'mp4',
'title': 'The Nexus 6: hands-on with Google\'s phablet',
'description': 'md5:87a51fe95ff8cea8b5bdb9ac7ae6a6af',
}
},
'add_ie': ['Ooyala'],
}, {
# volume embed
'url': 'http://www.vox.com/2016/3/31/11336640/mississippi-lgbt-religious-freedom-bill',
@ -35,7 +37,8 @@ class VoxMediaIE(InfoExtractor):
'ext': 'mp4',
'title': 'The new frontier of LGBTQ civil rights, explained',
'description': 'md5:0dc58e94a465cbe91d02950f770eb93f',
}
},
'add_ie': ['Ooyala'],
}, {
# youtube embed
'url': 'http://www.vox.com/2016/3/24/11291692/robot-dance',
@ -48,7 +51,8 @@ class VoxMediaIE(InfoExtractor):
'upload_date': '20160324',
'uploader_id': 'voxdotcom',
'uploader': 'Vox',
}
},
'add_ie': ['Youtube'],
}, {
# SBN.VideoLinkset.entryGroup multiple ooyala embeds
'url': 'http://www.sbnation.com/college-football-recruiting/2015/2/3/7970291/national-signing-day-rationalizations-itll-be-ok-itll-be-ok',
@ -117,7 +121,7 @@ class VoxMediaIE(InfoExtractor):
volume_webpage = self._download_webpage(
'http://volume.vox-cdn.com/embed/%s' % volume_uuid, volume_uuid)
video_data = self._parse_json(self._search_regex(
r'Volume\.createVideo\(({.+})\s*,\s*{.*}\);', volume_webpage, 'video data'), volume_uuid)
r'Volume\.createVideo\(({.+})\s*,\s*{.*}\s*,\s*\[.*\]\s*,\s*{.*}\);', volume_webpage, 'video data'), volume_uuid)
for provider_video_type in ('ooyala', 'youtube'):
provider_video_id = video_data.get('%s_id' % provider_video_type)
if provider_video_id:

View File

@ -11,7 +11,96 @@ from ..utils import (
class WashingtonPostIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?washingtonpost\.com/.*?/(?P<id>[^/]+)/(?:$|[?#])'
IE_NAME = 'washingtonpost'
_VALID_URL = r'(?:washingtonpost:|https?://(?:www\.)?washingtonpost\.com/video/(?:[^/]+/)*)(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})'
_TEST = {
'url': 'https://www.washingtonpost.com/video/c/video/480ba4ee-1ec7-11e6-82c2-a7dcb313287d',
'md5': '6f537e1334b714eb15f9563bd4b9cdfa',
'info_dict': {
'id': '480ba4ee-1ec7-11e6-82c2-a7dcb313287d',
'ext': 'mp4',
'title': 'Egypt finds belongings, debris from plane crash',
'description': 'md5:a17ceee432f215a5371388c1f680bd86',
'upload_date': '20160520',
'uploader': 'Reuters',
'timestamp': 1463778452,
},
}
def _real_extract(self, url):
video_id = self._match_id(url)
video_data = self._download_json(
'http://www.washingtonpost.com/posttv/c/videojson/%s?resType=jsonp' % video_id,
video_id, transform_source=strip_jsonp)[0]['contentConfig']
title = video_data['title']
urls = []
formats = []
for s in video_data.get('streams', []):
s_url = s.get('url')
if not s_url or s_url in urls:
continue
urls.append(s_url)
video_type = s.get('type')
if video_type == 'smil':
continue
elif video_type in ('ts', 'hls') and ('_master.m3u8' in s_url or '_mobile.m3u8' in s_url):
m3u8_formats = self._extract_m3u8_formats(
s_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)
for m3u8_format in m3u8_formats:
width = m3u8_format.get('width')
if not width:
continue
vbr = self._search_regex(
r'%d_%d_(\d+)' % (width, m3u8_format['height']), m3u8_format['url'], 'vbr', default=None)
if vbr:
m3u8_format.update({
'vbr': int_or_none(vbr),
})
formats.extend(m3u8_formats)
else:
width = int_or_none(s.get('width'))
vbr = int_or_none(s.get('bitrate'))
has_width = width != 0
formats.append({
'format_id': (
'%s-%d-%d' % (video_type, width, vbr)
if width
else video_type),
'vbr': vbr if has_width else None,
'width': width,
'height': int_or_none(s.get('height')),
'acodec': s.get('audioCodec'),
'vcodec': s.get('videoCodec') if has_width else 'none',
'filesize': int_or_none(s.get('fileSize')),
'url': s_url,
'ext': 'mp4',
'protocol': 'm3u8_native' if video_type in ('ts', 'hls') else None,
})
source_media_url = video_data.get('sourceMediaURL')
if source_media_url:
formats.append({
'format_id': 'source_media',
'url': source_media_url,
})
self._sort_formats(
formats, ('width', 'height', 'vbr', 'filesize', 'tbr', 'format_id'))
return {
'id': video_id,
'title': title,
'description': video_data.get('blurb'),
'uploader': video_data.get('credits', {}).get('source'),
'formats': formats,
'duration': int_or_none(video_data.get('videoDuration'), 100),
'timestamp': int_or_none(
video_data.get('dateConfig', {}).get('dateFirstPublished'), 1000),
}
class WashingtonPostArticleIE(InfoExtractor):
IE_NAME = 'washingtonpost:article'
_VALID_URL = r'https?://(?:www\.)?washingtonpost\.com/(?:[^/]+/)*(?P<id>[^/?#]+)'
_TESTS = [{
'url': 'http://www.washingtonpost.com/sf/national/2014/03/22/sinkhole-of-bureaucracy/',
'info_dict': {
@ -63,6 +152,10 @@ class WashingtonPostIE(InfoExtractor):
}]
}]
@classmethod
def suitable(cls, url):
return False if WashingtonPostIE.suitable(url) else super(WashingtonPostArticleIE, cls).suitable(url)
def _real_extract(self, url):
page_id = self._match_id(url)
webpage = self._download_webpage(url, page_id)
@ -74,54 +167,7 @@ class WashingtonPostIE(InfoExtractor):
<div\s+class="posttv-video-embed[^>]*?data-uuid=|
data-video-uuid=
)"([^"]+)"''', webpage)
entries = []
for i, uuid in enumerate(uuids, start=1):
vinfo_all = self._download_json(
'http://www.washingtonpost.com/posttv/c/videojson/%s?resType=jsonp' % uuid,
page_id,
transform_source=strip_jsonp,
note='Downloading information of video %d/%d' % (i, len(uuids))
)
vinfo = vinfo_all[0]['contentConfig']
uploader = vinfo.get('credits', {}).get('source')
timestamp = int_or_none(
vinfo.get('dateConfig', {}).get('dateFirstPublished'), 1000)
formats = [{
'format_id': (
'%s-%s-%s' % (s.get('type'), s.get('width'), s.get('bitrate'))
if s.get('width')
else s.get('type')),
'vbr': s.get('bitrate') if s.get('width') != 0 else None,
'width': s.get('width'),
'height': s.get('height'),
'acodec': s.get('audioCodec'),
'vcodec': s.get('videoCodec') if s.get('width') != 0 else 'none',
'filesize': s.get('fileSize'),
'url': s.get('url'),
'ext': 'mp4',
'preference': -100 if s.get('type') == 'smil' else None,
'protocol': {
'MP4': 'http',
'F4F': 'f4m',
}.get(s.get('type')),
} for s in vinfo.get('streams', [])]
source_media_url = vinfo.get('sourceMediaURL')
if source_media_url:
formats.append({
'format_id': 'source_media',
'url': source_media_url,
})
self._sort_formats(formats)
entries.append({
'id': uuid,
'title': vinfo['title'],
'description': vinfo.get('blurb'),
'uploader': uploader,
'formats': formats,
'duration': int_or_none(vinfo.get('videoDuration'), 100),
'timestamp': timestamp,
})
entries = [self.url_result('washingtonpost:%s' % uuid, 'WashingtonPost', uuid) for uuid in uuids]
return {
'_type': 'playlist',

View File

@ -2,25 +2,26 @@
from __future__ import unicode_literals
import re
import hashlib
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
ExtractorError,
unified_strdate,
HEADRequest,
float_or_none,
)
class WatIE(InfoExtractor):
_VALID_URL = r'(?:wat:(?P<real_id>\d{8})|https?://www\.wat\.tv/video/(?P<display_id>.*)-(?P<short_id>.*?)_.*?\.html)'
_VALID_URL = r'(?:wat:|https?://(?:www\.)?wat\.tv/video/.*-)(?P<id>[0-9a-z]+)'
IE_NAME = 'wat.tv'
_TESTS = [
{
'url': 'http://www.wat.tv/video/soupe-figues-l-orange-aux-epices-6z1uz_2hvf7_.html',
'md5': 'ce70e9223945ed26a8056d413ca55dc9',
'md5': '83d882d9de5c9d97f0bb2c6273cde56a',
'info_dict': {
'id': '11713067',
'display_id': 'soupe-figues-l-orange-aux-epices',
'ext': 'mp4',
'title': 'Soupe de figues à l\'orange et aux épices',
'description': 'Retrouvez l\'émission "Petits plats en équilibre", diffusée le 18 août 2014.',
@ -33,7 +34,6 @@ class WatIE(InfoExtractor):
'md5': 'fbc84e4378165278e743956d9c1bf16b',
'info_dict': {
'id': '11713075',
'display_id': 'gregory-lemarchal-voix-ange',
'ext': 'mp4',
'title': 'Grégory Lemarchal, une voix d\'ange depuis 10 ans (1/3)',
'description': 'md5:b7a849cf16a2b733d9cd10c52906dee3',
@ -44,96 +44,85 @@ class WatIE(InfoExtractor):
},
]
def download_video_info(self, real_id):
def _real_extract(self, url):
video_id = self._match_id(url)
video_id = video_id if video_id.isdigit() and len(video_id) > 6 else compat_str(int(video_id, 36))
# 'contentv4' is used in the website, but it also returns the related
# videos, we don't need them
info = self._download_json('http://www.wat.tv/interface/contentv3/' + real_id, real_id)
return info['media']
def _real_extract(self, url):
def real_id_for_chapter(chapter):
return chapter['tc_start'].split('-')[0]
mobj = re.match(self._VALID_URL, url)
display_id = mobj.group('display_id')
real_id = mobj.group('real_id')
if not real_id:
short_id = mobj.group('short_id')
webpage = self._download_webpage(url, display_id or short_id)
real_id = self._search_regex(r'xtpage = ".*-(.*?)";', webpage, 'real id')
video_info = self.download_video_info(real_id)
video_info = self._download_json(
'http://www.wat.tv/interface/contentv3/' + video_id, video_id)['media']
error_desc = video_info.get('error_desc')
if error_desc:
raise ExtractorError(
'%s returned error: %s' % (self.IE_NAME, error_desc), expected=True)
geo_list = video_info.get('geoList')
country = geo_list[0] if geo_list else ''
chapters = video_info['chapters']
first_chapter = chapters[0]
files = video_info['files']
first_file = files[0]
if real_id_for_chapter(first_chapter) != real_id:
def video_id_for_chapter(chapter):
return chapter['tc_start'].split('-')[0]
if video_id_for_chapter(first_chapter) != video_id:
self.to_screen('Multipart video detected')
chapter_urls = []
for chapter in chapters:
chapter_id = real_id_for_chapter(chapter)
# Yes, when we this chapter is processed by WatIE,
# it will download the info again
chapter_info = self.download_video_info(chapter_id)
chapter_urls.append(chapter_info['url'])
entries = [self.url_result(chapter_url) for chapter_url in chapter_urls]
return self.playlist_result(entries, real_id, video_info['title'])
upload_date = None
if 'date_diffusion' in first_chapter:
upload_date = unified_strdate(first_chapter['date_diffusion'])
entries = [self.url_result('wat:%s' % video_id_for_chapter(chapter)) for chapter in chapters]
return self.playlist_result(entries, video_id, video_info['title'])
# Otherwise we can continue and extract just one part, we have to use
# the short id for getting the video url
# the video id for getting the video url
formats = [{
'url': 'http://wat.tv/get/android5/%s.mp4' % real_id,
'format_id': 'Mobile',
}]
date_diffusion = first_chapter.get('date_diffusion')
upload_date = unified_strdate(date_diffusion) if date_diffusion else None
fmts = [('SD', 'web')]
if first_file.get('hasHD'):
fmts.append(('HD', 'webhd'))
def extract_url(path_template, url_type):
req_url = 'http://www.wat.tv/get/%s' % (path_template % video_id)
head = self._request_webpage(HEADRequest(req_url), video_id, 'Extracting %s url' % url_type)
red_url = head.geturl()
if req_url == red_url:
raise ExtractorError(
'%s said: Sorry, this video is not available from your country.' % self.IE_NAME,
expected=True)
return red_url
def compute_token(param):
timestamp = '%08x' % int(self._download_webpage(
'http://www.wat.tv/servertime', real_id,
'Downloading server time').split('|')[0])
magic = '9b673b13fa4682ed14c3cfa5af5310274b514c4133e9b3a81e6e3aba009l2564'
return '%s/%s' % (hashlib.md5((magic + param + timestamp).encode('ascii')).hexdigest(), timestamp)
m3u8_url = extract_url('ipad/%s.m3u8', 'm3u8')
http_url = extract_url('android5/%s.mp4', 'http')
for fmt in fmts:
webid = '/%s/%s' % (fmt[1], real_id)
video_url = self._download_webpage(
'http://www.wat.tv/get%s?token=%s&getURL=1&country=%s' % (webid, compute_token(webid), country),
real_id,
'Downloading %s video URL' % fmt[0],
'Failed to download %s video URL' % fmt[0],
False)
if not video_url:
formats = []
m3u8_formats = self._extract_m3u8_formats(
m3u8_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls')
formats.extend(m3u8_formats)
formats.extend(self._extract_f4m_formats(
m3u8_url.replace('ios.', 'web.').replace('.m3u8', '.f4m'),
video_id, f4m_id='hds', fatal=False))
for m3u8_format in m3u8_formats:
mobj = re.search(
r'audio.*?%3D(\d+)(?:-video.*?%3D(\d+))?', m3u8_format['url'])
if not mobj:
continue
formats.append({
'url': video_url,
'ext': 'mp4',
'format_id': fmt[0],
abr, vbr = mobj.groups()
abr, vbr = float_or_none(abr, 1000), float_or_none(vbr, 1000)
m3u8_format.update({
'vbr': vbr,
'abr': abr,
})
if not vbr or not abr:
continue
f = m3u8_format.copy()
f.update({
'url': re.sub(r'%s-\d+00-\d+' % video_id, '%s-%d00-%d' % (video_id, round(vbr / 100), round(abr)), http_url),
'format_id': f['format_id'].replace('hls', 'http'),
'protocol': 'http',
})
formats.append(f)
self._sort_formats(formats)
return {
'id': real_id,
'display_id': display_id,
'id': video_id,
'title': first_chapter['title'],
'thumbnail': first_chapter['preview'],
'description': first_chapter['description'],
'view_count': video_info['views'],
'upload_date': upload_date,
'duration': first_file['duration'],
'duration': video_info['files'][0]['duration'],
'formats': formats,
}

View File

@ -12,10 +12,10 @@ from ..utils import (
class XHamsterIE(InfoExtractor):
_VALID_URL = r'(?P<proto>https?)://(?:.+?\.)?xhamster\.com/movies/(?P<id>[0-9]+)/(?P<seo>.+?)\.html(?:\?.*)?'
_TESTS = [
{
_VALID_URL = r'(?P<proto>https?)://(?:.+?\.)?xhamster\.com/movies/(?P<id>[0-9]+)/(?P<seo>.*?)\.html(?:\?.*)?'
_TESTS = [{
'url': 'http://xhamster.com/movies/1509445/femaleagent_shy_beauty_takes_the_bait.html',
'md5': '8281348b8d3c53d39fffb377d24eac4e',
'info_dict': {
'id': '1509445',
'ext': 'mp4',
@ -24,9 +24,8 @@ class XHamsterIE(InfoExtractor):
'uploader': 'Ruseful2011',
'duration': 893.52,
'age_limit': 18,
}
},
{
}, {
'url': 'http://xhamster.com/movies/2221348/britney_spears_sexy_booty.html?hd',
'info_dict': {
'id': '2221348',
@ -36,13 +35,29 @@ class XHamsterIE(InfoExtractor):
'uploader': 'jojo747400',
'duration': 200.48,
'age_limit': 18,
}
},
{
'params': {
'skip_download': True,
},
}, {
# empty seo
'url': 'http://xhamster.com/movies/5667973/.html',
'info_dict': {
'id': '5667973',
'ext': 'mp4',
'title': '....',
'upload_date': '20160208',
'uploader': 'parejafree',
'duration': 72.0,
'age_limit': 18,
},
'params': {
'skip_download': True,
},
}, {
'url': 'https://xhamster.com/movies/2272726/amber_slayed_by_the_knight.html',
'only_matching': True,
},
]
}]
def _real_extract(self, url):
def extract_video_url(webpage, name):
@ -170,7 +185,7 @@ class XHamsterEmbedIE(InfoExtractor):
webpage = self._download_webpage(url, video_id)
video_url = self._search_regex(
r'href="(https?://xhamster\.com/movies/%s/[^"]+\.html[^"]*)"' % video_id,
r'href="(https?://xhamster\.com/movies/%s/[^"]*\.html[^"]*)"' % video_id,
webpage, 'xhamster url', default=None)
if not video_url: