Merge remote-tracking branch 'refs/remotes/rg3/master'

This commit is contained in:
forDream 2016-01-10 11:14:09 +08:00
commit 42820b7192
22 changed files with 394 additions and 303 deletions

View File

@ -28,7 +28,7 @@ So please elaborate on what feature you are requesting, or what bug you want to
- How it could be fixed - How it could be fixed
- How your proposed solution would look like - How your proposed solution would look like
If your report is shorter than two lines, it is almost certainly missing some of these, which makes it hard for us to respond to it. We're often too polite to close the issue outright, but the missing info makes misinterpretation likely. As a commiter myself, I often get frustrated by these issues, since the only possible way for me to move forward on them is to ask for clarification over and over. If your report is shorter than two lines, it is almost certainly missing some of these, which makes it hard for us to respond to it. We're often too polite to close the issue outright, but the missing info makes misinterpretation likely. As a committer myself, I often get frustrated by these issues, since the only possible way for me to move forward on them is to ask for clarification over and over.
For bug reports, this means that your report should contain the *complete* output of youtube-dl when called with the `-v` flag. The error message you get for (most) bugs even says so, but you would not believe how many of our bug reports do not contain this information. For bug reports, this means that your report should contain the *complete* output of youtube-dl when called with the `-v` flag. The error message you get for (most) bugs even says so, but you would not believe how many of our bug reports do not contain this information.

View File

@ -830,7 +830,7 @@ So please elaborate on what feature you are requesting, or what bug you want to
- How it could be fixed - How it could be fixed
- How your proposed solution would look like - How your proposed solution would look like
If your report is shorter than two lines, it is almost certainly missing some of these, which makes it hard for us to respond to it. We're often too polite to close the issue outright, but the missing info makes misinterpretation likely. As a commiter myself, I often get frustrated by these issues, since the only possible way for me to move forward on them is to ask for clarification over and over. If your report is shorter than two lines, it is almost certainly missing some of these, which makes it hard for us to respond to it. We're often too polite to close the issue outright, but the missing info makes misinterpretation likely. As a committer myself, I often get frustrated by these issues, since the only possible way for me to move forward on them is to ask for clarification over and over.
For bug reports, this means that your report should contain the *complete* output of youtube-dl when called with the `-v` flag. The error message you get for (most) bugs even says so, but you would not believe how many of our bug reports do not contain this information. For bug reports, this means that your report should contain the *complete* output of youtube-dl when called with the `-v` flag. The error message you get for (most) bugs even says so, but you would not believe how many of our bug reports do not contain this information.

View File

@ -23,7 +23,6 @@
- **AdobeTVShow** - **AdobeTVShow**
- **AdobeTVVideo** - **AdobeTVVideo**
- **AdultSwim** - **AdultSwim**
- **Aftenposten**
- **Aftonbladet** - **Aftonbladet**
- **AirMozilla** - **AirMozilla**
- **AlJazeera** - **AlJazeera**
@ -34,7 +33,8 @@
- **Aparat** - **Aparat**
- **AppleConnect** - **AppleConnect**
- **AppleDaily**: 臺灣蘋果日報 - **AppleDaily**: 臺灣蘋果日報
- **AppleTrailers** - **appletrailers**
- **appletrailers:section**
- **archive.org**: archive.org videos - **archive.org**: archive.org videos
- **ARD** - **ARD**
- **ARD:mediathek** - **ARD:mediathek**
@ -502,8 +502,6 @@
- **SnagFilmsEmbed** - **SnagFilmsEmbed**
- **Snotr** - **Snotr**
- **Sohu** - **Sohu**
- **soompi**
- **soompi:show**
- **soundcloud** - **soundcloud**
- **soundcloud:playlist** - **soundcloud:playlist**
- **soundcloud:search**: Soundcloud search - **soundcloud:search**: Soundcloud search
@ -627,7 +625,7 @@
- **Vessel** - **Vessel**
- **Vesti**: Вести.Ru - **Vesti**: Вести.Ru
- **Vevo** - **Vevo**
- **VGTV**: VGTV and BTTV - **VGTV**: VGTV, BTTV, FTV, Aftenposten and Aftonbladet
- **vh1.com** - **vh1.com**
- **Vice** - **Vice**
- **Viddler** - **Viddler**

View File

@ -15,7 +15,6 @@ from .adobetv import (
AdobeTVVideoIE, AdobeTVVideoIE,
) )
from .adultswim import AdultSwimIE from .adultswim import AdultSwimIE
from .aftenposten import AftenpostenIE
from .aftonbladet import AftonbladetIE from .aftonbladet import AftonbladetIE
from .airmozilla import AirMozillaIE from .airmozilla import AirMozillaIE
from .aljazeera import AlJazeeraIE from .aljazeera import AlJazeeraIE
@ -26,7 +25,10 @@ from .aol import AolIE
from .allocine import AllocineIE from .allocine import AllocineIE
from .aparat import AparatIE from .aparat import AparatIE
from .appleconnect import AppleConnectIE from .appleconnect import AppleConnectIE
from .appletrailers import AppleTrailersIE from .appletrailers import (
AppleTrailersIE,
AppleTrailersSectionIE,
)
from .archiveorg import ArchiveOrgIE from .archiveorg import ArchiveOrgIE
from .ard import ( from .ard import (
ARDIE, ARDIE,
@ -591,10 +593,6 @@ from .snagfilms import (
) )
from .snotr import SnotrIE from .snotr import SnotrIE
from .sohu import SohuIE from .sohu import SohuIE
from .soompi import (
SoompiIE,
SoompiShowIE,
)
from .soundcloud import ( from .soundcloud import (
SoundcloudIE, SoundcloudIE,
SoundcloudSetIE, SoundcloudSetIE,
@ -663,6 +661,7 @@ from .tenplay import TenPlayIE
from .testurl import TestURLIE from .testurl import TestURLIE
from .testtube import TestTubeIE from .testtube import TestTubeIE
from .tf1 import TF1IE from .tf1 import TF1IE
from .theintercept import TheInterceptIE
from .theonion import TheOnionIE from .theonion import TheOnionIE
from .theplatform import ( from .theplatform import (
ThePlatformIE, ThePlatformIE,

View File

@ -1,23 +0,0 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
class AftenpostenIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?aftenposten\.no/webtv/(?:#!/)?video/(?P<id>\d+)'
_TEST = {
'url': 'http://www.aftenposten.no/webtv/#!/video/21039/trailer-sweatshop-i-can-t-take-any-more',
'md5': 'fd828cd29774a729bf4d4425fe192972',
'info_dict': {
'id': '21039',
'ext': 'mov',
'title': 'TRAILER: "Sweatshop" - I can´t take any more',
'description': 'md5:21891f2b0dd7ec2f78d84a50e54f8238',
'timestamp': 1416927969,
'upload_date': '20141125',
}
}
def _real_extract(self, url):
return self.url_result('xstream:ap:%s' % self._match_id(url), 'Xstream')

View File

@ -11,6 +11,7 @@ from ..utils import (
class AppleTrailersIE(InfoExtractor): class AppleTrailersIE(InfoExtractor):
IE_NAME = 'appletrailers'
_VALID_URL = r'https?://(?:www\.)?trailers\.apple\.com/(?:trailers|ca)/(?P<company>[^/]+)/(?P<movie>[^/]+)' _VALID_URL = r'https?://(?:www\.)?trailers\.apple\.com/(?:trailers|ca)/(?P<company>[^/]+)/(?P<movie>[^/]+)'
_TESTS = [{ _TESTS = [{
'url': 'http://trailers.apple.com/trailers/wb/manofsteel/', 'url': 'http://trailers.apple.com/trailers/wb/manofsteel/',
@ -63,6 +64,12 @@ class AppleTrailersIE(InfoExtractor):
}, },
}, },
] ]
}, {
'url': 'http://trailers.apple.com/trailers/magnolia/blackthorn/',
'info_dict': {
'id': 'blackthorn',
},
'playlist_mincount': 2,
}, { }, {
'url': 'http://trailers.apple.com/ca/metropole/autrui/', 'url': 'http://trailers.apple.com/ca/metropole/autrui/',
'only_matching': True, 'only_matching': True,
@ -79,7 +86,7 @@ class AppleTrailersIE(InfoExtractor):
def fix_html(s): def fix_html(s):
s = re.sub(r'(?s)<script[^<]*?>.*?</script>', '', s) s = re.sub(r'(?s)<script[^<]*?>.*?</script>', '', s)
s = re.sub(r'<img ([^<]*?)>', r'<img \1/>', s) s = re.sub(r'<img ([^<]*?)/?>', r'<img \1/>', s)
# The ' in the onClick attributes are not escaped, it couldn't be parsed # The ' in the onClick attributes are not escaped, it couldn't be parsed
# like: http://trailers.apple.com/trailers/wb/gravity/ # like: http://trailers.apple.com/trailers/wb/gravity/
@ -96,6 +103,9 @@ class AppleTrailersIE(InfoExtractor):
trailer_info_json = self._search_regex(self._JSON_RE, trailer_info_json = self._search_regex(self._JSON_RE,
on_click, 'trailer info') on_click, 'trailer info')
trailer_info = json.loads(trailer_info_json) trailer_info = json.loads(trailer_info_json)
first_url = trailer_info.get('url')
if not first_url:
continue
title = trailer_info['title'] title = trailer_info['title']
video_id = movie + '-' + re.sub(r'[^a-zA-Z0-9]', '', title).lower() video_id = movie + '-' + re.sub(r'[^a-zA-Z0-9]', '', title).lower()
thumbnail = li.find('.//img').attrib['src'] thumbnail = li.find('.//img').attrib['src']
@ -107,7 +117,6 @@ class AppleTrailersIE(InfoExtractor):
if m: if m:
duration = 60 * int(m.group('minutes')) + int(m.group('seconds')) duration = 60 * int(m.group('minutes')) + int(m.group('seconds'))
first_url = trailer_info['url']
trailer_id = first_url.split('/')[-1].rpartition('_')[0].lower() trailer_id = first_url.split('/')[-1].rpartition('_')[0].lower()
settings_json_url = compat_urlparse.urljoin(url, 'includes/settings/%s.json' % trailer_id) settings_json_url = compat_urlparse.urljoin(url, 'includes/settings/%s.json' % trailer_id)
settings = self._download_json(settings_json_url, trailer_id, 'Downloading settings json') settings = self._download_json(settings_json_url, trailer_id, 'Downloading settings json')
@ -144,3 +153,76 @@ class AppleTrailersIE(InfoExtractor):
'id': movie, 'id': movie,
'entries': playlist, 'entries': playlist,
} }
class AppleTrailersSectionIE(InfoExtractor):
IE_NAME = 'appletrailers:section'
_SECTIONS = {
'justadded': {
'feed_path': 'just_added',
'title': 'Just Added',
},
'exclusive': {
'feed_path': 'exclusive',
'title': 'Exclusive',
},
'justhd': {
'feed_path': 'just_hd',
'title': 'Just HD',
},
'mostpopular': {
'feed_path': 'most_pop',
'title': 'Most Popular',
},
'moviestudios': {
'feed_path': 'studios',
'title': 'Movie Studios',
},
}
_VALID_URL = r'https?://(?:www\.)?trailers\.apple\.com/#section=(?P<id>%s)' % '|'.join(_SECTIONS)
_TESTS = [{
'url': 'http://trailers.apple.com/#section=justadded',
'info_dict': {
'title': 'Just Added',
'id': 'justadded',
},
'playlist_mincount': 80,
}, {
'url': 'http://trailers.apple.com/#section=exclusive',
'info_dict': {
'title': 'Exclusive',
'id': 'exclusive',
},
'playlist_mincount': 80,
}, {
'url': 'http://trailers.apple.com/#section=justhd',
'info_dict': {
'title': 'Just HD',
'id': 'justhd',
},
'playlist_mincount': 80,
}, {
'url': 'http://trailers.apple.com/#section=mostpopular',
'info_dict': {
'title': 'Most Popular',
'id': 'mostpopular',
},
'playlist_mincount': 80,
}, {
'url': 'http://trailers.apple.com/#section=moviestudios',
'info_dict': {
'title': 'Movie Studios',
'id': 'moviestudios',
},
'playlist_mincount': 80,
}]
def _real_extract(self, url):
section = self._match_id(url)
section_data = self._download_json(
'http://trailers.apple.com/trailers/home/feeds/%s.json' % self._SECTIONS[section]['feed_path'],
section)
entries = [
self.url_result('http://trailers.apple.com' + e['location'])
for e in section_data]
return self.playlist_result(entries, section, self._SECTIONS[section]['title'])

View File

@ -68,9 +68,13 @@ class ArteTVPlus7IE(InfoExtractor):
def _extract_url_info(cls, url): def _extract_url_info(cls, url):
mobj = re.match(cls._VALID_URL, url) mobj = re.match(cls._VALID_URL, url)
lang = mobj.group('lang') lang = mobj.group('lang')
# This is not a real id, it can be for example AJT for the news query = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
# http://www.arte.tv/guide/fr/emissions/AJT/arte-journal if 'vid' in query:
video_id = mobj.group('id') video_id = query['vid'][0]
else:
# This is not a real id, it can be for example AJT for the news
# http://www.arte.tv/guide/fr/emissions/AJT/arte-journal
video_id = mobj.group('id')
return video_id, lang return video_id, lang
def _real_extract(self, url): def _real_extract(self, url):
@ -79,9 +83,15 @@ class ArteTVPlus7IE(InfoExtractor):
return self._extract_from_webpage(webpage, video_id, lang) return self._extract_from_webpage(webpage, video_id, lang)
def _extract_from_webpage(self, webpage, video_id, lang): def _extract_from_webpage(self, webpage, video_id, lang):
patterns_templates = (r'arte_vp_url=["\'](.*?%s.*?)["\']', r'data-url=["\']([^"]+%s[^"]+)["\']')
ids = (video_id, '')
# some pages contain multiple videos (like
# http://www.arte.tv/guide/de/sendungen/XEN/xenius/?vid=055918-015_PLUS7-D),
# so we first try to look for json URLs that contain the video id from
# the 'vid' parameter.
patterns = [t % re.escape(_id) for _id in ids for t in patterns_templates]
json_url = self._html_search_regex( json_url = self._html_search_regex(
[r'arte_vp_url=["\'](.*?)["\']', r'data-url=["\']([^"]+)["\']'], patterns, webpage, 'json vp url', default=None)
webpage, 'json vp url', default=None)
if not json_url: if not json_url:
iframe_url = self._html_search_regex( iframe_url = self._html_search_regex(
r'<iframe[^>]+src=(["\'])(?P<url>.+\bjson_url=.+?)\1', r'<iframe[^>]+src=(["\'])(?P<url>.+\bjson_url=.+?)\1',

View File

@ -90,7 +90,7 @@ class BleacherReportCMSIE(AMPIE):
_VALID_URL = r'https?://(?:www\.)?bleacherreport\.com/video_embed\?id=(?P<id>[0-9a-f-]{36})' _VALID_URL = r'https?://(?:www\.)?bleacherreport\.com/video_embed\?id=(?P<id>[0-9a-f-]{36})'
_TESTS = [{ _TESTS = [{
'url': 'http://bleacherreport.com/video_embed?id=8fd44c2f-3dc5-4821-9118-2c825a98c0e1', 'url': 'http://bleacherreport.com/video_embed?id=8fd44c2f-3dc5-4821-9118-2c825a98c0e1',
'md5': 'f0ca220af012d4df857b54f792c586bb', 'md5': '8c2c12e3af7805152675446c905d159b',
'info_dict': { 'info_dict': {
'id': '8fd44c2f-3dc5-4821-9118-2c825a98c0e1', 'id': '8fd44c2f-3dc5-4821-9118-2c825a98c0e1',
'ext': 'flv', 'ext': 'flv',

View File

@ -1,10 +1,12 @@
# encoding: utf-8 # encoding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import json
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import parse_iso8601 from ..utils import (
int_or_none,
parse_duration,
parse_iso8601,
)
class ComCarCoffIE(InfoExtractor): class ComCarCoffIE(InfoExtractor):
@ -16,6 +18,7 @@ class ComCarCoffIE(InfoExtractor):
'ext': 'mp4', 'ext': 'mp4',
'upload_date': '20141127', 'upload_date': '20141127',
'timestamp': 1417107600, 'timestamp': 1417107600,
'duration': 1232,
'title': 'Happy Thanksgiving Miranda', 'title': 'Happy Thanksgiving Miranda',
'description': 'Jerry Seinfeld and his special guest Miranda Sings cruise around town in search of coffee, complaining and apologizing along the way.', 'description': 'Jerry Seinfeld and his special guest Miranda Sings cruise around town in search of coffee, complaining and apologizing along the way.',
'thumbnail': 'http://ccc.crackle.com/images/s5e4_thumb.jpg', 'thumbnail': 'http://ccc.crackle.com/images/s5e4_thumb.jpg',
@ -31,9 +34,10 @@ class ComCarCoffIE(InfoExtractor):
display_id = 'comediansincarsgettingcoffee.com' display_id = 'comediansincarsgettingcoffee.com'
webpage = self._download_webpage(url, display_id) webpage = self._download_webpage(url, display_id)
full_data = json.loads(self._search_regex( full_data = self._parse_json(
r'<script type="application/json" id="videoData">(?P<json>.+?)</script>', self._search_regex(
webpage, 'full data json')) r'window\.app\s*=\s*({.+?});\n', webpage, 'full data json'),
display_id)['videoData']
video_id = full_data['activeVideo']['video'] video_id = full_data['activeVideo']['video']
video_data = full_data.get('videos', {}).get(video_id) or full_data['singleshots'][video_id] video_data = full_data.get('videos', {}).get(video_id) or full_data['singleshots'][video_id]
@ -45,12 +49,18 @@ class ComCarCoffIE(InfoExtractor):
formats = self._extract_m3u8_formats( formats = self._extract_m3u8_formats(
video_data['mediaUrl'], video_id, ext='mp4') video_data['mediaUrl'], video_id, ext='mp4')
timestamp = int_or_none(video_data.get('pubDateTime')) or parse_iso8601(
video_data.get('pubDate'))
duration = int_or_none(video_data.get('durationSeconds')) or parse_duration(
video_data.get('duration'))
return { return {
'id': video_id, 'id': video_id,
'display_id': display_id, 'display_id': display_id,
'title': video_data['title'], 'title': video_data['title'],
'description': video_data.get('description'), 'description': video_data.get('description'),
'timestamp': parse_iso8601(video_data.get('pubDate')), 'timestamp': timestamp,
'duration': duration,
'thumbnails': thumbnails, 'thumbnails': thumbnails,
'formats': formats, 'formats': formats,
'webpage_url': 'http://comediansincarsgettingcoffee.com/%s' % (video_data.get('urlSlug', video_data.get('slug'))), 'webpage_url': 'http://comediansincarsgettingcoffee.com/%s' % (video_data.get('urlSlug', video_data.get('slug'))),

View File

@ -24,6 +24,18 @@ class DaumIE(InfoExtractor):
'upload_date': '20130831', 'upload_date': '20130831',
'duration': 3868, 'duration': 3868,
}, },
}, {
# Test for https://github.com/rg3/youtube-dl/issues/7949
'url': 'http://tvpot.daum.net/mypot/View.do?ownerid=M1O35s8HPOo0&clipid=73147290',
'md5': 'c92d78bcee4424451f1667f275c1dc97',
'info_dict': {
'id': '73147290',
'ext': 'mp4',
'title': '싸이 - 나팔바지 [유희열의 스케치북] 299회 20151218',
'description': '싸이 - 나팔바지',
'upload_date': '20151219',
'duration': 232,
},
}, { }, {
'url': 'http://tvpot.daum.net/v/vab4dyeDBysyBssyukBUjBz', 'url': 'http://tvpot.daum.net/v/vab4dyeDBysyBssyukBUjBz',
'only_matching': True, 'only_matching': True,
@ -37,9 +49,11 @@ class DaumIE(InfoExtractor):
video_id = mobj.group('id') video_id = mobj.group('id')
canonical_url = 'http://tvpot.daum.net/v/%s' % video_id canonical_url = 'http://tvpot.daum.net/v/%s' % video_id
webpage = self._download_webpage(canonical_url, video_id) webpage = self._download_webpage(canonical_url, video_id)
og_url = self._og_search_url(webpage, default=None) or self._search_regex(
r'<link[^>]+rel=(["\'])canonical\1[^>]+href=(["\'])(?P<url>.+?)\2',
webpage, 'canonical url', group='url')
full_id = self._search_regex( full_id = self._search_regex(
r'src=["\']http://videofarm\.daum\.net/controller/video/viewer/Video\.html\?.*?vid=(.+?)[&"\']', r'tvpot\.daum\.net/v/([^/]+)', og_url, 'full id')
webpage, 'full id')
query = compat_urllib_parse.urlencode({'vid': full_id}) query = compat_urllib_parse.urlencode({'vid': full_id})
info = self._download_xml( info = self._download_xml(
'http://tvpot.daum.net/clip/ClipInfoXml.do?' + query, video_id, 'http://tvpot.daum.net/clip/ClipInfoXml.do?' + query, video_id,

View File

@ -1,8 +1,6 @@
# coding: utf-8 # coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import int_or_none from ..utils import int_or_none
@ -23,8 +21,7 @@ class FranceInterIE(InfoExtractor):
} }
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) video_id = self._match_id(url)
video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
@ -33,7 +30,7 @@ class FranceInterIE(InfoExtractor):
video_url = 'http://www.franceinter.fr/' + path video_url = 'http://www.franceinter.fr/' + path
title = self._html_search_regex( title = self._html_search_regex(
r'<span class="title">(.+?)</span>', webpage, 'title') r'<span class="title-diffusion">(.+?)</span>', webpage, 'title')
description = self._html_search_regex( description = self._html_search_regex(
r'<span class="description">(.*?)</span>', r'<span class="description">(.*?)</span>',
webpage, 'description', fatal=False) webpage, 'description', fatal=False)

View File

@ -13,7 +13,7 @@ from ..utils import (
class ImgurIE(InfoExtractor): class ImgurIE(InfoExtractor):
_VALID_URL = r'https?://(?:i\.)?imgur\.com/(?!gallery)(?P<id>[a-zA-Z0-9]+)' _VALID_URL = r'https?://(?:i\.)?imgur\.com/(?:(?:gallery|topic/[^/]+)/)?(?P<id>[a-zA-Z0-9]{6,})(?:[/?#&]+|\.[a-z]+)?$'
_TESTS = [{ _TESTS = [{
'url': 'https://i.imgur.com/A61SaA1.gifv', 'url': 'https://i.imgur.com/A61SaA1.gifv',
@ -21,7 +21,7 @@ class ImgurIE(InfoExtractor):
'id': 'A61SaA1', 'id': 'A61SaA1',
'ext': 'mp4', 'ext': 'mp4',
'title': 're:Imgur GIF$|MRW gifv is up and running without any bugs$', 'title': 're:Imgur GIF$|MRW gifv is up and running without any bugs$',
'description': 're:The origin of the Internet\'s most viral images$|The Internet\'s visual storytelling community\. Explore, share, and discuss the best visual stories the Internet has to offer\.$', 'description': 'Imgur: The most awesome images on the Internet.',
}, },
}, { }, {
'url': 'https://imgur.com/A61SaA1', 'url': 'https://imgur.com/A61SaA1',
@ -29,8 +29,20 @@ class ImgurIE(InfoExtractor):
'id': 'A61SaA1', 'id': 'A61SaA1',
'ext': 'mp4', 'ext': 'mp4',
'title': 're:Imgur GIF$|MRW gifv is up and running without any bugs$', 'title': 're:Imgur GIF$|MRW gifv is up and running without any bugs$',
'description': 're:The origin of the Internet\'s most viral images$|The Internet\'s visual storytelling community\. Explore, share, and discuss the best visual stories the Internet has to offer\.$', 'description': 'Imgur: The most awesome images on the Internet.',
}, },
}, {
'url': 'https://imgur.com/gallery/YcAQlkx',
'info_dict': {
'id': 'YcAQlkx',
'ext': 'mp4',
'title': 'Classic Steve Carell gif...cracks me up everytime....damn the repost downvotes....',
'description': 'Imgur: The most awesome images on the Internet.'
}
}, {
'url': 'http://imgur.com/topic/Funny/N8rOudd',
'only_matching': True,
}] }]
def _real_extract(self, url): def _real_extract(self, url):
@ -100,25 +112,38 @@ class ImgurIE(InfoExtractor):
class ImgurAlbumIE(InfoExtractor): class ImgurAlbumIE(InfoExtractor):
_VALID_URL = r'https?://(?:i\.)?imgur\.com/gallery/(?P<id>[a-zA-Z0-9]+)' _VALID_URL = r'https?://(?:i\.)?imgur\.com/(?:(?:a|gallery|topic/[^/]+)/)?(?P<id>[a-zA-Z0-9]{5})(?:[/?#&]+)?$'
_TEST = { _TESTS = [{
'url': 'http://imgur.com/gallery/Q95ko', 'url': 'http://imgur.com/gallery/Q95ko',
'info_dict': { 'info_dict': {
'id': 'Q95ko', 'id': 'Q95ko',
}, },
'playlist_count': 25, 'playlist_count': 25,
} }, {
'url': 'http://imgur.com/a/j6Orj',
'only_matching': True,
}, {
'url': 'http://imgur.com/topic/Aww/ll5Vk',
'only_matching': True,
}]
def _real_extract(self, url): def _real_extract(self, url):
album_id = self._match_id(url) album_id = self._match_id(url)
album_images = self._download_json( album_images = self._download_json(
'http://imgur.com/gallery/%s/album_images/hit.json?all=true' % album_id, 'http://imgur.com/gallery/%s/album_images/hit.json?all=true' % album_id,
album_id)['data']['images'] album_id, fatal=False)
entries = [ if album_images:
self.url_result('http://imgur.com/%s' % image['hash']) data = album_images.get('data')
for image in album_images if image.get('hash')] if data and isinstance(data, dict):
images = data.get('images')
if images and isinstance(images, list):
entries = [
self.url_result('http://imgur.com/%s' % image['hash'])
for image in images if image.get('hash')]
return self.playlist_result(entries, album_id)
return self.playlist_result(entries, album_id) # Fallback to single video
return self.url_result('http://imgur.com/%s' % album_id, ImgurIE.ie_key())

View File

@ -47,7 +47,7 @@ class InstagramIE(InfoExtractor):
class InstagramUserIE(InfoExtractor): class InstagramUserIE(InfoExtractor):
_VALID_URL = r'https://instagram\.com/(?P<username>[^/]{2,})/?(?:$|[?#])' _VALID_URL = r'https?://(?:www\.)?instagram\.com/(?P<username>[^/]{2,})/?(?:$|[?#])'
IE_DESC = 'Instagram user profile' IE_DESC = 'Instagram user profile'
IE_NAME = 'instagram:user' IE_NAME = 'instagram:user'
_TEST = { _TEST = {

View File

@ -16,7 +16,7 @@ from ..utils import (
class PBSIE(InfoExtractor): class PBSIE(InfoExtractor):
_STATIONS = ( _STATIONS = (
(r'(?:video|www)\.pbs\.org', 'PBS: Public Broadcasting Service'), # http://www.pbs.org/ (r'(?:video|www|player)\.pbs\.org', 'PBS: Public Broadcasting Service'), # http://www.pbs.org/
(r'video\.aptv\.org', 'APT - Alabama Public Television (WBIQ)'), # http://aptv.org/ (r'video\.aptv\.org', 'APT - Alabama Public Television (WBIQ)'), # http://aptv.org/
(r'video\.gpb\.org', 'GPB/Georgia Public Broadcasting (WGTV)'), # http://www.gpb.org/ (r'video\.gpb\.org', 'GPB/Georgia Public Broadcasting (WGTV)'), # http://www.gpb.org/
(r'video\.mpbonline\.org', 'Mississippi Public Broadcasting (WMPN)'), # http://www.mpbonline.org (r'video\.mpbonline\.org', 'Mississippi Public Broadcasting (WMPN)'), # http://www.mpbonline.org

View File

@ -31,9 +31,8 @@ class PeriscopeIE(InfoExtractor):
}] }]
def _call_api(self, method, value): def _call_api(self, method, value):
attribute = 'token' if len(value) > 13 else 'broadcast_id'
return self._download_json( return self._download_json(
'https://api.periscope.tv/api/v2/%s?%s=%s' % (method, attribute, value), value) 'https://api.periscope.tv/api/v2/%s?broadcast_id=%s' % (method, value), value)
def _real_extract(self, url): def _real_extract(self, url):
token = self._match_id(url) token = self._match_id(url)

View File

@ -1,146 +0,0 @@
# encoding: utf-8
from __future__ import unicode_literals
import re
from .crunchyroll import CrunchyrollIE
from .common import InfoExtractor
from ..compat import compat_HTTPError
from ..utils import (
ExtractorError,
int_or_none,
remove_start,
xpath_text,
)
class SoompiBaseIE(InfoExtractor):
def _get_episodes(self, webpage, episode_filter=None):
episodes = self._parse_json(
self._search_regex(
r'VIDEOS\s*=\s*(\[.+?\]);', webpage, 'episodes JSON'),
None)
return list(filter(episode_filter, episodes))
class SoompiIE(SoompiBaseIE, CrunchyrollIE):
IE_NAME = 'soompi'
_VALID_URL = r'https?://tv\.soompi\.com/(?:en/)?watch/(?P<id>[0-9]+)'
_TESTS = [{
'url': 'http://tv.soompi.com/en/watch/29235',
'info_dict': {
'id': '29235',
'ext': 'mp4',
'title': 'Episode 1096',
'description': '2015-05-20'
},
'params': {
'skip_download': True,
},
}]
def _get_episode(self, webpage, video_id):
return self._get_episodes(webpage, lambda x: x['id'] == video_id)[0]
def _get_subtitles(self, config, video_id):
sub_langs = {}
for subtitle in config.findall('./{default}preload/subtitles/subtitle'):
sub_langs[subtitle.attrib['id']] = subtitle.attrib['title']
subtitles = {}
for s in config.findall('./{default}preload/subtitle'):
lang_code = sub_langs.get(s.attrib['id'])
if not lang_code:
continue
sub_id = s.get('id')
data = xpath_text(s, './data', 'data')
iv = xpath_text(s, './iv', 'iv')
if not id or not iv or not data:
continue
subtitle = self._decrypt_subtitles(data, iv, sub_id).decode('utf-8')
subtitles[lang_code] = self._extract_subtitles(subtitle)
return subtitles
def _real_extract(self, url):
video_id = self._match_id(url)
try:
webpage = self._download_webpage(
url, video_id, 'Downloading episode page')
except ExtractorError as ee:
if isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 403:
webpage = ee.cause.read()
block_message = self._html_search_regex(
r'(?s)<div class="block-message">(.+?)</div>', webpage,
'block message', default=None)
if block_message:
raise ExtractorError(block_message, expected=True)
raise
formats = []
config = None
for format_id in re.findall(r'\?quality=([0-9a-zA-Z]+)', webpage):
config = self._download_xml(
'http://tv.soompi.com/en/show/_/%s-config.xml?mode=hls&quality=%s' % (video_id, format_id),
video_id, 'Downloading %s XML' % format_id)
m3u8_url = xpath_text(
config, './{default}preload/stream_info/file',
'%s m3u8 URL' % format_id)
if not m3u8_url:
continue
formats.extend(self._extract_m3u8_formats(
m3u8_url, video_id, 'mp4', m3u8_id=format_id))
self._sort_formats(formats)
episode = self._get_episode(webpage, video_id)
title = episode['name']
description = episode.get('description')
duration = int_or_none(episode.get('duration'))
thumbnails = [{
'id': thumbnail_id,
'url': thumbnail_url,
} for thumbnail_id, thumbnail_url in episode.get('img_url', {}).items()]
subtitles = self.extract_subtitles(config, video_id)
return {
'id': video_id,
'title': title,
'description': description,
'thumbnails': thumbnails,
'duration': duration,
'formats': formats,
'subtitles': subtitles
}
class SoompiShowIE(SoompiBaseIE):
IE_NAME = 'soompi:show'
_VALID_URL = r'https?://tv\.soompi\.com/en/shows/(?P<id>[0-9a-zA-Z\-_]+)'
_TESTS = [{
'url': 'http://tv.soompi.com/en/shows/liar-game',
'info_dict': {
'id': 'liar-game',
'title': 'Liar Game',
'description': 'md5:52c02bce0c1a622a95823591d0589b66',
},
'playlist_count': 14,
}]
def _real_extract(self, url):
show_id = self._match_id(url)
webpage = self._download_webpage(
url, show_id, 'Downloading show page')
title = remove_start(self._og_search_title(webpage), 'SoompiTV | ')
description = self._og_search_description(webpage)
entries = [
self.url_result('http://tv.soompi.com/en/watch/%s' % episode['id'], 'Soompi')
for episode in self._get_episodes(webpage)]
return self.playlist_result(entries, show_id, title, description)

View File

@ -0,0 +1,49 @@
# encoding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
parse_iso8601,
int_or_none,
ExtractorError,
)
class TheInterceptIE(InfoExtractor):
_VALID_URL = r'https://theintercept.com/fieldofvision/(?P<id>[^/?#]+)'
_TESTS = [{
'url': 'https://theintercept.com/fieldofvision/thisisacoup-episode-four-surrender-or-die/',
'md5': '145f28b41d44aab2f87c0a4ac8ec95bd',
'info_dict': {
'id': '46214',
'ext': 'mp4',
'title': '#ThisIsACoup Episode Four: Surrender or Die',
'description': 'md5:74dd27f0e2fbd50817829f97eaa33140',
'timestamp': 1450429239,
'upload_date': '20151218',
'comment_count': int,
}
}]
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
json_data = self._parse_json(self._search_regex(
r'initialStoreTree\s*=\s*(?P<json_data>{.+})', webpage,
'initialStoreTree'), display_id)
for post in json_data['resources']['posts'].values():
if post['slug'] == display_id:
return {
'_type': 'url_transparent',
'url': 'jwplatform:%s' % post['fov_videoid'],
'id': compat_str(post['ID']),
'display_id': display_id,
'title': post['title'],
'description': post.get('excerpt'),
'timestamp': parse_iso8601(post.get('date')),
'comment_count': int_or_none(post.get('comments_number')),
}
raise ExtractorError('Unable to find the current post')

View File

@ -5,6 +5,8 @@ from .common import InfoExtractor
from ..utils import ( from ..utils import (
parse_iso8601, parse_iso8601,
int_or_none, int_or_none,
xpath_attr,
xpath_element,
) )
@ -15,7 +17,7 @@ class TwentyFourVideoIE(InfoExtractor):
_TESTS = [ _TESTS = [
{ {
'url': 'http://www.24video.net/video/view/1044982', 'url': 'http://www.24video.net/video/view/1044982',
'md5': 'd041af8b5b4246ea466226a0d6693345', 'md5': 'e09fc0901d9eaeedac872f154931deeb',
'info_dict': { 'info_dict': {
'id': '1044982', 'id': '1044982',
'ext': 'mp4', 'ext': 'mp4',
@ -64,33 +66,24 @@ class TwentyFourVideoIE(InfoExtractor):
r'<div class="comments-title" id="comments-count">(\d+) комментари', r'<div class="comments-title" id="comments-count">(\d+) комментари',
webpage, 'comment count', fatal=False)) webpage, 'comment count', fatal=False))
formats = [] # Sets some cookies
self._download_xml(
r'http://www.24video.net/video/xml/%s?mode=init' % video_id,
video_id, 'Downloading init XML')
pc_video = self._download_xml( video_xml = self._download_xml(
'http://www.24video.net/video/xml/%s?mode=play' % video_id, 'http://www.24video.net/video/xml/%s?mode=play' % video_id,
video_id, 'Downloading PC video URL').find('.//video') video_id, 'Downloading video XML')
formats.append({ video = xpath_element(video_xml, './/video', 'video', fatal=True)
'url': pc_video.attrib['url'],
'format_id': 'pc',
'quality': 1,
})
like_count = int_or_none(pc_video.get('ratingPlus')) formats = [{
dislike_count = int_or_none(pc_video.get('ratingMinus')) 'url': xpath_attr(video, '', 'url', 'video URL', fatal=True),
age_limit = 18 if pc_video.get('adult') == 'true' else 0 }]
mobile_video = self._download_xml( like_count = int_or_none(video.get('ratingPlus'))
'http://www.24video.net/video/xml/%s' % video_id, dislike_count = int_or_none(video.get('ratingMinus'))
video_id, 'Downloading mobile video URL').find('.//video') age_limit = 18 if video.get('adult') == 'true' else 0
formats.append({
'url': mobile_video.attrib['url'],
'format_id': 'mobile',
'quality': 0,
})
self._sort_formats(formats)
return { return {
'id': video_id, 'id': video_id,

View File

@ -4,26 +4,48 @@ from __future__ import unicode_literals
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from .xstream import XstreamIE
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
float_or_none, float_or_none,
) )
class VGTVIE(InfoExtractor): class VGTVIE(XstreamIE):
IE_DESC = 'VGTV and BTTV' IE_DESC = 'VGTV, BTTV, FTV, Aftenposten and Aftonbladet'
_HOST_TO_APPNAME = {
'vgtv.no': 'vgtv',
'bt.no/tv': 'bttv',
'aftenbladet.no/tv': 'satv',
'fvn.no/fvntv': 'fvntv',
'aftenposten.no/webtv': 'aptv',
}
_APP_NAME_TO_VENDOR = {
'vgtv': 'vgtv',
'bttv': 'bt',
'satv': 'sa',
'fvntv': 'fvn',
'aptv': 'ap',
}
_VALID_URL = r'''(?x) _VALID_URL = r'''(?x)
(?: (?:https?://(?:www\.)?
vgtv:| (?P<host>
http://(?:www\.)? %s
) )
(?P<host>vgtv|bt) /
(?: (?:
:| \#!/(?:video|live)/|
\.no/(?:tv/)?\#!/(?:video|live)/ embed?.*id=
) )|
(?P<id>[0-9]+) (?P<appname>
''' %s
):)
(?P<id>\d+)
''' % ('|'.join(_HOST_TO_APPNAME.keys()), '|'.join(_APP_NAME_TO_VENDOR.keys()))
_TESTS = [ _TESTS = [
{ {
# streamType: vod # streamType: vod
@ -59,17 +81,18 @@ class VGTVIE(InfoExtractor):
# m3u8 download # m3u8 download
'skip_download': True, 'skip_download': True,
}, },
'skip': 'Video is no longer available',
}, },
{ {
# streamType: live # streamType: wasLive
'url': 'http://www.vgtv.no/#!/live/113063/direkte-v75-fra-solvalla', 'url': 'http://www.vgtv.no/#!/live/113063/direkte-v75-fra-solvalla',
'info_dict': { 'info_dict': {
'id': '113063', 'id': '113063',
'ext': 'flv', 'ext': 'mp4',
'title': 're:^DIREKTE: V75 fra Solvalla [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', 'title': 'V75 fra Solvalla 30.05.15',
'description': 'md5:b3743425765355855f88e096acc93231', 'description': 'md5:b3743425765355855f88e096acc93231',
'thumbnail': 're:^https?://.*\.jpg', 'thumbnail': 're:^https?://.*\.jpg',
'duration': 0, 'duration': 25966,
'timestamp': 1432975582, 'timestamp': 1432975582,
'upload_date': '20150530', 'upload_date': '20150530',
'view_count': int, 'view_count': int,
@ -79,6 +102,20 @@ class VGTVIE(InfoExtractor):
'skip_download': True, 'skip_download': True,
}, },
}, },
{
'url': 'http://www.aftenposten.no/webtv/#!/video/21039/trailer-sweatshop-i-can-t-take-any-more',
'md5': 'fd828cd29774a729bf4d4425fe192972',
'info_dict': {
'id': '21039',
'ext': 'mov',
'title': 'TRAILER: «SWEATSHOP» - I can´t take any more',
'description': 'md5:21891f2b0dd7ec2f78d84a50e54f8238',
'duration': 66,
'timestamp': 1417002452,
'upload_date': '20141126',
'view_count': int,
}
},
{ {
'url': 'http://www.bt.no/tv/#!/video/100250/norling-dette-er-forskjellen-paa-1-divisjon-og-eliteserien', 'url': 'http://www.bt.no/tv/#!/video/100250/norling-dette-er-forskjellen-paa-1-divisjon-og-eliteserien',
'only_matching': True, 'only_matching': True,
@ -89,21 +126,27 @@ class VGTVIE(InfoExtractor):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id') video_id = mobj.group('id')
host = mobj.group('host') host = mobj.group('host')
appname = self._HOST_TO_APPNAME[host] if host else mobj.group('appname')
HOST_WEBSITES = { vendor = self._APP_NAME_TO_VENDOR[appname]
'vgtv': 'vgtv',
'bt': 'bttv',
}
data = self._download_json( data = self._download_json(
'http://svp.vg.no/svp/api/v1/%s/assets/%s?appName=%s-website' 'http://svp.vg.no/svp/api/v1/%s/assets/%s?appName=%s-website'
% (host, video_id, HOST_WEBSITES[host]), % (vendor, video_id, appname),
video_id, 'Downloading media JSON') video_id, 'Downloading media JSON')
if data.get('status') == 'inactive': if data.get('status') == 'inactive':
raise ExtractorError( raise ExtractorError(
'Video %s is no longer available' % video_id, expected=True) 'Video %s is no longer available' % video_id, expected=True)
info = {
'formats': [],
}
if len(video_id) == 5:
if appname == 'bttv':
info = self._extract_video_info('btno', video_id)
elif appname == 'aptv':
info = self._extract_video_info('ap', video_id)
streams = data['streamUrls'] streams = data['streamUrls']
stream_type = data.get('streamType') stream_type = data.get('streamType')
@ -111,48 +154,53 @@ class VGTVIE(InfoExtractor):
hls_url = streams.get('hls') hls_url = streams.get('hls')
if hls_url: if hls_url:
formats.extend(self._extract_m3u8_formats( m3u8_formats = self._extract_m3u8_formats(
hls_url, video_id, 'mp4', m3u8_id='hls')) hls_url, video_id, 'mp4', m3u8_id='hls', fatal=False)
if m3u8_formats:
formats.extend(m3u8_formats)
hds_url = streams.get('hds') hds_url = streams.get('hds')
# wasLive hds are always 404 # wasLive hds are always 404
if hds_url and stream_type != 'wasLive': if hds_url and stream_type != 'wasLive':
formats.extend(self._extract_f4m_formats( f4m_formats = self._extract_f4m_formats(
hds_url + '?hdcore=3.2.0&plugin=aasp-3.2.0.77.18', hds_url + '?hdcore=3.2.0&plugin=aasp-3.2.0.77.18', video_id, f4m_id='hds', fatal=False)
video_id, f4m_id='hds')) if f4m_formats:
formats.extend(f4m_formats)
mp4_urls = streams.get('pseudostreaming') or []
mp4_url = streams.get('mp4') mp4_url = streams.get('mp4')
if mp4_url: if mp4_url:
_url = hls_url or hds_url mp4_urls.append(mp4_url)
MP4_URL_TEMPLATE = '%s/%%s.%s' % (mp4_url.rpartition('/')[0], mp4_url.rpartition('.')[-1]) for mp4_url in mp4_urls:
for mp4_format in _url.split(','): format_info = {
m = re.search('(?P<width>\d+)_(?P<height>\d+)_(?P<vbr>\d+)', mp4_format) 'url': mp4_url,
if not m: }
continue mobj = re.search('(\d+)_(\d+)_(\d+)', mp4_url)
width = int(m.group('width')) if mobj:
height = int(m.group('height')) tbr = int(mobj.group(3))
vbr = int(m.group('vbr')) format_info.update({
formats.append({ 'width': int(mobj.group(1)),
'url': MP4_URL_TEMPLATE % mp4_format, 'height': int(mobj.group(2)),
'format_id': 'mp4-%s' % vbr, 'tbr': tbr,
'width': width, 'format_id': 'mp4-%s' % tbr,
'height': height,
'vbr': vbr,
'preference': 1,
}) })
self._sort_formats(formats) formats.append(format_info)
return { info['formats'].extend(formats)
self._sort_formats(info['formats'])
info.update({
'id': video_id, 'id': video_id,
'title': self._live_title(data['title']), 'title': self._live_title(data['title']) if stream_type == 'live' else data['title'],
'description': data['description'], 'description': data['description'],
'thumbnail': data['images']['main'] + '?t[]=900x506q80', 'thumbnail': data['images']['main'] + '?t[]=900x506q80',
'timestamp': data['published'], 'timestamp': data['published'],
'duration': float_or_none(data['duration'], 1000), 'duration': float_or_none(data['duration'], 1000),
'view_count': data['displays'], 'view_count': data['displays'],
'formats': formats,
'is_live': True if stream_type == 'live' else False, 'is_live': True if stream_type == 'live' else False,
} })
return info
class BTArticleIE(InfoExtractor): class BTArticleIE(InfoExtractor):
@ -161,7 +209,7 @@ class BTArticleIE(InfoExtractor):
_VALID_URL = 'http://(?:www\.)?bt\.no/(?:[^/]+/)+(?P<id>[^/]+)-\d+\.html' _VALID_URL = 'http://(?:www\.)?bt\.no/(?:[^/]+/)+(?P<id>[^/]+)-\d+\.html'
_TEST = { _TEST = {
'url': 'http://www.bt.no/nyheter/lokalt/Kjemper-for-internatet-1788214.html', 'url': 'http://www.bt.no/nyheter/lokalt/Kjemper-for-internatet-1788214.html',
'md5': 'd055e8ee918ef2844745fcfd1a4175fb', 'md5': '2acbe8ad129b3469d5ae51b1158878df',
'info_dict': { 'info_dict': {
'id': '23199', 'id': '23199',
'ext': 'mp4', 'ext': 'mp4',
@ -178,15 +226,15 @@ class BTArticleIE(InfoExtractor):
def _real_extract(self, url): def _real_extract(self, url):
webpage = self._download_webpage(url, self._match_id(url)) webpage = self._download_webpage(url, self._match_id(url))
video_id = self._search_regex( video_id = self._search_regex(
r'SVP\.Player\.load\(\s*(\d+)', webpage, 'video id') r'<video[^>]+data-id="(\d+)"', webpage, 'video id')
return self.url_result('vgtv:bt:%s' % video_id, 'VGTV') return self.url_result('bttv:%s' % video_id, 'VGTV')
class BTVestlendingenIE(InfoExtractor): class BTVestlendingenIE(InfoExtractor):
IE_NAME = 'bt:vestlendingen' IE_NAME = 'bt:vestlendingen'
IE_DESC = 'Bergens Tidende - Vestlendingen' IE_DESC = 'Bergens Tidende - Vestlendingen'
_VALID_URL = 'http://(?:www\.)?bt\.no/spesial/vestlendingen/#!/(?P<id>\d+)' _VALID_URL = 'http://(?:www\.)?bt\.no/spesial/vestlendingen/#!/(?P<id>\d+)'
_TEST = { _TESTS = [{
'url': 'http://www.bt.no/spesial/vestlendingen/#!/86588', 'url': 'http://www.bt.no/spesial/vestlendingen/#!/86588',
'md5': 'd7d17e3337dc80de6d3a540aefbe441b', 'md5': 'd7d17e3337dc80de6d3a540aefbe441b',
'info_dict': { 'info_dict': {
@ -197,7 +245,19 @@ class BTVestlendingenIE(InfoExtractor):
'timestamp': 1430473209, 'timestamp': 1430473209,
'upload_date': '20150501', 'upload_date': '20150501',
}, },
} 'skip': '404 Error',
}, {
'url': 'http://www.bt.no/spesial/vestlendingen/#!/86255',
'md5': 'a2893f8632e96389f4bdf36aa9463ceb',
'info_dict': {
'id': '86255',
'ext': 'mov',
'title': 'Du må tåle å fryse og være sulten',
'description': 'md5:b8046f4d022d5830ddab04865791d063',
'upload_date': '20150321',
'timestamp': 1426942023,
},
}]
def _real_extract(self, url): def _real_extract(self, url):
return self.url_result('xstream:btno:%s' % self._match_id(url), 'Xstream') return self.url_result('bttv:%s' % self._match_id(url), 'VGTV')

View File

@ -30,6 +30,12 @@ class VikiBaseIE(InfoExtractor):
_token = None _token = None
_ERRORS = {
'geo': 'Sorry, this content is not available in your region.',
'upcoming': 'Sorry, this content is not yet available.',
# 'paywall': 'paywall',
}
def _prepare_call(self, path, timestamp=None, post_data=None): def _prepare_call(self, path, timestamp=None, post_data=None):
path += '?' if '?' not in path else '&' path += '?' if '?' not in path else '&'
if not timestamp: if not timestamp:
@ -67,6 +73,12 @@ class VikiBaseIE(InfoExtractor):
'%s returned error: %s' % (self.IE_NAME, error), '%s returned error: %s' % (self.IE_NAME, error),
expected=True) expected=True)
def _check_errors(self, data):
for reason, status in data.get('blocking', {}).items():
if status and reason in self._ERRORS:
raise ExtractorError('%s said: %s' % (
self.IE_NAME, self._ERRORS[reason]), expected=True)
def _real_initialize(self): def _real_initialize(self):
self._login() self._login()
@ -193,6 +205,7 @@ class VikiIE(VikiBaseIE):
'timestamp': 1321985454, 'timestamp': 1321985454,
'description': 'md5:44b1e46619df3a072294645c770cef36', 'description': 'md5:44b1e46619df3a072294645c770cef36',
'title': 'Love In Magic', 'title': 'Love In Magic',
'age_limit': 13,
}, },
}] }]
@ -202,6 +215,8 @@ class VikiIE(VikiBaseIE):
video = self._call_api( video = self._call_api(
'videos/%s.json' % video_id, video_id, 'Downloading video JSON') 'videos/%s.json' % video_id, video_id, 'Downloading video JSON')
self._check_errors(video)
title = self.dict_selection(video.get('titles', {}), 'en') title = self.dict_selection(video.get('titles', {}), 'en')
if not title: if not title:
title = 'Episode %d' % video.get('number') if video.get('type') == 'episode' else video.get('id') or video_id title = 'Episode %d' % video.get('number') if video.get('type') == 'episode' else video.get('id') or video_id
@ -262,8 +277,11 @@ class VikiIE(VikiBaseIE):
r'^(\d+)[pP]$', format_id, 'height', default=None)) r'^(\d+)[pP]$', format_id, 'height', default=None))
for protocol, format_dict in stream_dict.items(): for protocol, format_dict in stream_dict.items():
if format_id == 'm3u8': if format_id == 'm3u8':
formats = self._extract_m3u8_formats( m3u8_formats = self._extract_m3u8_formats(
format_dict['url'], video_id, 'mp4', m3u8_id='m3u8-%s' % protocol) format_dict['url'], video_id, 'mp4', 'm3u8_native',
m3u8_id='m3u8-%s' % protocol, fatal=None)
if m3u8_formats:
formats.extend(m3u8_formats)
else: else:
formats.append({ formats.append({
'url': format_dict['url'], 'url': format_dict['url'],
@ -315,6 +333,8 @@ class VikiChannelIE(VikiBaseIE):
'containers/%s.json' % channel_id, channel_id, 'containers/%s.json' % channel_id, channel_id,
'Downloading channel JSON') 'Downloading channel JSON')
self._check_errors(channel)
title = self.dict_selection(channel['titles'], 'en') title = self.dict_selection(channel['titles'], 'en')
description = self.dict_selection(channel['descriptions'], 'en') description = self.dict_selection(channel['descriptions'], 'en')

View File

@ -42,11 +42,7 @@ class XstreamIE(InfoExtractor):
'only_matching': True, 'only_matching': True,
}] }]
def _real_extract(self, url): def _extract_video_info(self, partner_id, video_id):
mobj = re.match(self._VALID_URL, url)
partner_id = mobj.group('partner_id')
video_id = mobj.group('id')
data = self._download_xml( data = self._download_xml(
'http://frontend.xstream.dk/%s/feed/video/?platform=web&id=%s' 'http://frontend.xstream.dk/%s/feed/video/?platform=web&id=%s'
% (partner_id, video_id), % (partner_id, video_id),
@ -97,6 +93,7 @@ class XstreamIE(InfoExtractor):
formats.append({ formats.append({
'url': link.get('href'), 'url': link.get('href'),
'format_id': link.get('rel'), 'format_id': link.get('rel'),
'preference': 1,
}) })
thumbnails = [{ thumbnails = [{
@ -113,3 +110,10 @@ class XstreamIE(InfoExtractor):
'formats': formats, 'formats': formats,
'thumbnails': thumbnails, 'thumbnails': thumbnails,
} }
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
partner_id = mobj.group('partner_id')
video_id = mobj.group('id')
return self._extract_video_info(partner_id, video_id)

View File

@ -1,3 +1,3 @@
from __future__ import unicode_literals from __future__ import unicode_literals
__version__ = '2015.12.21' __version__ = '2015.12.23'