Merge remote-tracking branch 'refs/remotes/rg3/master'

2016-01-10 11:14:09 +08:00 · 2016-01-10 11:14:09 +08:00 · 42820b7192
commit 42820b7192
parent d575ded6a7 96db61ffb8
22 changed files with 394 additions and 303 deletions
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@ -28,7 +28,7 @@ So please elaborate on what feature you are requesting, or what bug you want to
 - How it could be fixed
 - How your proposed solution would look like
-If your report is shorter than two lines, it is almost certainly missing some of these, which makes it hard for us to respond to it. We're often too polite to close the issue outright, but the missing info makes misinterpretation likely. As a commiter myself, I often get frustrated by these issues, since the only possible way for me to move forward on them is to ask for clarification over and over.
+If your report is shorter than two lines, it is almost certainly missing some of these, which makes it hard for us to respond to it. We're often too polite to close the issue outright, but the missing info makes misinterpretation likely. As a committer myself, I often get frustrated by these issues, since the only possible way for me to move forward on them is to ask for clarification over and over.
 For bug reports, this means that your report should contain the *complete* output of youtube-dl when called with the `-v` flag. The error message you get for (most) bugs even says so, but you would not believe how many of our bug reports do not contain this information.
--- a/README.md
+++ b/README.md
@ -830,7 +830,7 @@ So please elaborate on what feature you are requesting, or what bug you want to
 - How it could be fixed
 - How your proposed solution would look like
-If your report is shorter than two lines, it is almost certainly missing some of these, which makes it hard for us to respond to it. We're often too polite to close the issue outright, but the missing info makes misinterpretation likely. As a commiter myself, I often get frustrated by these issues, since the only possible way for me to move forward on them is to ask for clarification over and over.
+If your report is shorter than two lines, it is almost certainly missing some of these, which makes it hard for us to respond to it. We're often too polite to close the issue outright, but the missing info makes misinterpretation likely. As a committer myself, I often get frustrated by these issues, since the only possible way for me to move forward on them is to ask for clarification over and over.
 For bug reports, this means that your report should contain the *complete* output of youtube-dl when called with the `-v` flag. The error message you get for (most) bugs even says so, but you would not believe how many of our bug reports do not contain this information.
--- a/docs/supportedsites.md
+++ b/docs/supportedsites.md
@ -23,7 +23,6 @@
 - **AdobeTVShow**
 - **AdobeTVVideo**
 - **AdultSwim**
 - **Aftenposten**
 - **Aftonbladet**
 - **AirMozilla**
 - **AlJazeera**
@ -34,7 +33,8 @@
 - **Aparat**
 - **AppleConnect**
 - **AppleDaily**: 臺灣蘋果日報
- - **AppleTrailers**
+ - **appletrailers**
 - **appletrailers:section**
 - **archive.org**: archive.org videos
 - **ARD**
 - **ARD:mediathek**
@ -502,8 +502,6 @@
 - **SnagFilmsEmbed**
 - **Snotr**
 - **Sohu**
 - **soompi**
 - **soompi:show**
 - **soundcloud**
 - **soundcloud:playlist**
 - **soundcloud:search**: Soundcloud search
@ -627,7 +625,7 @@
 - **Vessel**
 - **Vesti**: Вести.Ru
 - **Vevo**
- - **VGTV**: VGTV and BTTV
+ - **VGTV**: VGTV, BTTV, FTV, Aftenposten and Aftonbladet
 - **vh1.com**
 - **Vice**
 - **Viddler**
--- a/youtube_dl/extractor/init.py
+++ b/youtube_dl/extractor/init.py
@ -15,7 +15,6 @@ from .adobetv import (
    AdobeTVVideoIE,
 )
 from .adultswim import AdultSwimIE
 from .aftenposten import AftenpostenIE
 from .aftonbladet import AftonbladetIE
 from .airmozilla import AirMozillaIE
 from .aljazeera import AlJazeeraIE
@ -26,7 +25,10 @@ from .aol import AolIE
 from .allocine import AllocineIE
 from .aparat import AparatIE
 from .appleconnect import AppleConnectIE
-from .appletrailers import AppleTrailersIE
+from .appletrailers import (
    AppleTrailersIE,
    AppleTrailersSectionIE,
 )
 from .archiveorg import ArchiveOrgIE
 from .ard import (
    ARDIE,
@ -591,10 +593,6 @@ from .snagfilms import (
 )
 from .snotr import SnotrIE
 from .sohu import SohuIE
 from .soompi import (
    SoompiIE,
    SoompiShowIE,
 )
 from .soundcloud import (
    SoundcloudIE,
    SoundcloudSetIE,
@ -663,6 +661,7 @@ from .tenplay import TenPlayIE
 from .testurl import TestURLIE
 from .testtube import TestTubeIE
 from .tf1 import TF1IE
 from .theintercept import TheInterceptIE
 from .theonion import TheOnionIE
 from .theplatform import (
    ThePlatformIE,
--- a/youtube_dl/extractor/aftenposten.py
+++ b/youtube_dl/extractor/aftenposten.py
@ -1,23 +0,0 @@
 # coding: utf-8
 from __future__ import unicode_literals
 from .common import InfoExtractor
 class AftenpostenIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?aftenposten\.no/webtv/(?:#!/)?video/(?P<id>\d+)'
    _TEST = {
        'url': 'http://www.aftenposten.no/webtv/#!/video/21039/trailer-sweatshop-i-can-t-take-any-more',
        'md5': 'fd828cd29774a729bf4d4425fe192972',
        'info_dict': {
            'id': '21039',
            'ext': 'mov',
            'title': 'TRAILER: "Sweatshop" - I can´t take any more',
            'description': 'md5:21891f2b0dd7ec2f78d84a50e54f8238',
            'timestamp': 1416927969,
            'upload_date': '20141125',
        }
    }
    def _real_extract(self, url):
        return self.url_result('xstream:ap:%s' % self._match_id(url), 'Xstream')
--- a/youtube_dl/extractor/appletrailers.py
+++ b/youtube_dl/extractor/appletrailers.py
@ -11,6 +11,7 @@ from ..utils import (
 class AppleTrailersIE(InfoExtractor):
    IE_NAME = 'appletrailers'
    _VALID_URL = r'https?://(?:www\.)?trailers\.apple\.com/(?:trailers|ca)/(?P<company>[^/]+)/(?P<movie>[^/]+)'
    _TESTS = [{
        'url': 'http://trailers.apple.com/trailers/wb/manofsteel/',
@ -63,6 +64,12 @@ class AppleTrailersIE(InfoExtractor):
                },
            },
        ]
    }, {
        'url': 'http://trailers.apple.com/trailers/magnolia/blackthorn/',
        'info_dict': {
            'id': 'blackthorn',
        },
        'playlist_mincount': 2,
    }, {
        'url': 'http://trailers.apple.com/ca/metropole/autrui/',
        'only_matching': True,
@ -79,7 +86,7 @@ class AppleTrailersIE(InfoExtractor):
        def fix_html(s):
            s = re.sub(r'(?s)<script[^<]*?>.*?</script>', '', s)
-            s = re.sub(r'<img ([^<]*?)>', r'<img \1/>', s)
+            s = re.sub(r'<img ([^<]*?)/?>', r'<img \1/>', s)
            # The ' in the onClick attributes are not escaped, it couldn't be parsed
            # like: http://trailers.apple.com/trailers/wb/gravity/
@ -96,6 +103,9 @@ class AppleTrailersIE(InfoExtractor):
            trailer_info_json = self._search_regex(self._JSON_RE,
                                                   on_click, 'trailer info')
            trailer_info = json.loads(trailer_info_json)
            first_url = trailer_info.get('url')
            if not first_url:
                continue
            title = trailer_info['title']
            video_id = movie + '-' + re.sub(r'[^a-zA-Z0-9]', '', title).lower()
            thumbnail = li.find('.//img').attrib['src']
@ -107,7 +117,6 @@ class AppleTrailersIE(InfoExtractor):
            if m:
                duration = 60 * int(m.group('minutes')) + int(m.group('seconds'))
            first_url = trailer_info['url']
            trailer_id = first_url.split('/')[-1].rpartition('_')[0].lower()
            settings_json_url = compat_urlparse.urljoin(url, 'includes/settings/%s.json' % trailer_id)
            settings = self._download_json(settings_json_url, trailer_id, 'Downloading settings json')
@ -144,3 +153,76 @@ class AppleTrailersIE(InfoExtractor):
            'id': movie,
            'entries': playlist,
        }
 class AppleTrailersSectionIE(InfoExtractor):
    IE_NAME = 'appletrailers:section'
    _SECTIONS = {
        'justadded': {
            'feed_path': 'just_added',
            'title': 'Just Added',
        },
        'exclusive': {
            'feed_path': 'exclusive',
            'title': 'Exclusive',
        },
        'justhd': {
            'feed_path': 'just_hd',
            'title': 'Just HD',
        },
        'mostpopular': {
            'feed_path': 'most_pop',
            'title': 'Most Popular',
        },
        'moviestudios': {
            'feed_path': 'studios',
            'title': 'Movie Studios',
        },
    }
    _VALID_URL = r'https?://(?:www\.)?trailers\.apple\.com/#section=(?P<id>%s)' % '|'.join(_SECTIONS)
    _TESTS = [{
        'url': 'http://trailers.apple.com/#section=justadded',
        'info_dict': {
            'title': 'Just Added',
            'id': 'justadded',
        },
        'playlist_mincount': 80,
    }, {
        'url': 'http://trailers.apple.com/#section=exclusive',
        'info_dict': {
            'title': 'Exclusive',
            'id': 'exclusive',
        },
        'playlist_mincount': 80,
    }, {
        'url': 'http://trailers.apple.com/#section=justhd',
        'info_dict': {
            'title': 'Just HD',
            'id': 'justhd',
        },
        'playlist_mincount': 80,
    }, {
        'url': 'http://trailers.apple.com/#section=mostpopular',
        'info_dict': {
            'title': 'Most Popular',
            'id': 'mostpopular',
        },
        'playlist_mincount': 80,
    }, {
        'url': 'http://trailers.apple.com/#section=moviestudios',
        'info_dict': {
            'title': 'Movie Studios',
            'id': 'moviestudios',
        },
        'playlist_mincount': 80,
    }]
    def _real_extract(self, url):
        section = self._match_id(url)
        section_data = self._download_json(
            'http://trailers.apple.com/trailers/home/feeds/%s.json' % self._SECTIONS[section]['feed_path'],
            section)
        entries = [
            self.url_result('http://trailers.apple.com' + e['location'])
            for e in section_data]
        return self.playlist_result(entries, section, self._SECTIONS[section]['title'])
--- a/youtube_dl/extractor/arte.py
+++ b/youtube_dl/extractor/arte.py
@ -68,9 +68,13 @@ class ArteTVPlus7IE(InfoExtractor):
    def _extract_url_info(cls, url):
        mobj = re.match(cls._VALID_URL, url)
        lang = mobj.group('lang')
-        # This is not a real id, it can be for example AJT for the news
+        query = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
-        # http://www.arte.tv/guide/fr/emissions/AJT/arte-journal
+        if 'vid' in query:
-        video_id = mobj.group('id')
+            video_id = query['vid'][0]
        else:
            # This is not a real id, it can be for example AJT for the news
            # http://www.arte.tv/guide/fr/emissions/AJT/arte-journal
            video_id = mobj.group('id')
        return video_id, lang
    def _real_extract(self, url):
@ -79,9 +83,15 @@ class ArteTVPlus7IE(InfoExtractor):
        return self._extract_from_webpage(webpage, video_id, lang)
    def _extract_from_webpage(self, webpage, video_id, lang):
        patterns_templates = (r'arte_vp_url=["\'](.*?%s.*?)["\']', r'data-url=["\']([^"]+%s[^"]+)["\']')
        ids = (video_id, '')
        # some pages contain multiple videos (like
        # http://www.arte.tv/guide/de/sendungen/XEN/xenius/?vid=055918-015_PLUS7-D),
        # so we first try to look for json URLs that contain the video id from
        # the 'vid' parameter.
        patterns = [t % re.escape(_id) for _id in ids for t in patterns_templates]
        json_url = self._html_search_regex(
-            [r'arte_vp_url=["\'](.*?)["\']', r'data-url=["\']([^"]+)["\']'],
+            patterns, webpage, 'json vp url', default=None)
            webpage, 'json vp url', default=None)
        if not json_url:
            iframe_url = self._html_search_regex(
                r'<iframe[^>]+src=(["\'])(?P<url>.+\bjson_url=.+?)\1',
--- a/youtube_dl/extractor/bleacherreport.py
+++ b/youtube_dl/extractor/bleacherreport.py
@ -90,7 +90,7 @@ class BleacherReportCMSIE(AMPIE):
    _VALID_URL = r'https?://(?:www\.)?bleacherreport\.com/video_embed\?id=(?P<id>[0-9a-f-]{36})'
    _TESTS = [{
        'url': 'http://bleacherreport.com/video_embed?id=8fd44c2f-3dc5-4821-9118-2c825a98c0e1',
-        'md5': 'f0ca220af012d4df857b54f792c586bb',
+        'md5': '8c2c12e3af7805152675446c905d159b',
        'info_dict': {
            'id': '8fd44c2f-3dc5-4821-9118-2c825a98c0e1',
            'ext': 'flv',
--- a/youtube_dl/extractor/comcarcoff.py
+++ b/youtube_dl/extractor/comcarcoff.py
@ -1,10 +1,12 @@
 # encoding: utf-8
 from __future__ import unicode_literals
 import json
 from .common import InfoExtractor
-from ..utils import parse_iso8601
+from ..utils import (
    int_or_none,
    parse_duration,
    parse_iso8601,
 )
 class ComCarCoffIE(InfoExtractor):
@ -16,6 +18,7 @@ class ComCarCoffIE(InfoExtractor):
            'ext': 'mp4',
            'upload_date': '20141127',
            'timestamp': 1417107600,
            'duration': 1232,
            'title': 'Happy Thanksgiving Miranda',
            'description': 'Jerry Seinfeld and his special guest Miranda Sings cruise around town in search of coffee, complaining and apologizing along the way.',
            'thumbnail': 'http://ccc.crackle.com/images/s5e4_thumb.jpg',
@ -31,9 +34,10 @@ class ComCarCoffIE(InfoExtractor):
            display_id = 'comediansincarsgettingcoffee.com'
        webpage = self._download_webpage(url, display_id)
-        full_data = json.loads(self._search_regex(
+        full_data = self._parse_json(
-            r'<script type="application/json" id="videoData">(?P<json>.+?)</script>',
+            self._search_regex(
-            webpage, 'full data json'))
+                r'window\.app\s*=\s*({.+?});\n', webpage, 'full data json'),
            display_id)['videoData']
        video_id = full_data['activeVideo']['video']
        video_data = full_data.get('videos', {}).get(video_id) or full_data['singleshots'][video_id]
@ -45,12 +49,18 @@ class ComCarCoffIE(InfoExtractor):
        formats = self._extract_m3u8_formats(
            video_data['mediaUrl'], video_id, ext='mp4')
        timestamp = int_or_none(video_data.get('pubDateTime')) or parse_iso8601(
            video_data.get('pubDate'))
        duration = int_or_none(video_data.get('durationSeconds')) or parse_duration(
            video_data.get('duration'))
        return {
            'id': video_id,
            'display_id': display_id,
            'title': video_data['title'],
            'description': video_data.get('description'),
-            'timestamp': parse_iso8601(video_data.get('pubDate')),
+            'timestamp': timestamp,
            'duration': duration,
            'thumbnails': thumbnails,
            'formats': formats,
            'webpage_url': 'http://comediansincarsgettingcoffee.com/%s' % (video_data.get('urlSlug', video_data.get('slug'))),
--- a/youtube_dl/extractor/daum.py
+++ b/youtube_dl/extractor/daum.py
@ -24,6 +24,18 @@ class DaumIE(InfoExtractor):
            'upload_date': '20130831',
            'duration': 3868,
        },
    }, {
        # Test for https://github.com/rg3/youtube-dl/issues/7949
        'url': 'http://tvpot.daum.net/mypot/View.do?ownerid=M1O35s8HPOo0&clipid=73147290',
        'md5': 'c92d78bcee4424451f1667f275c1dc97',
        'info_dict': {
            'id': '73147290',
            'ext': 'mp4',
            'title': '싸이 - 나팔바지 [유희열의 스케치북] 299회 20151218',
            'description': '싸이 - 나팔바지',
            'upload_date': '20151219',
            'duration': 232,
        },
    }, {
        'url': 'http://tvpot.daum.net/v/vab4dyeDBysyBssyukBUjBz',
        'only_matching': True,
@ -37,9 +49,11 @@ class DaumIE(InfoExtractor):
        video_id = mobj.group('id')
        canonical_url = 'http://tvpot.daum.net/v/%s' % video_id
        webpage = self._download_webpage(canonical_url, video_id)
        og_url = self._og_search_url(webpage, default=None) or self._search_regex(
            r'<link[^>]+rel=(["\'])canonical\1[^>]+href=(["\'])(?P<url>.+?)\2',
            webpage, 'canonical url', group='url')
        full_id = self._search_regex(
-            r'src=["\']http://videofarm\.daum\.net/controller/video/viewer/Video\.html\?.*?vid=(.+?)[&"\']',
+            r'tvpot\.daum\.net/v/([^/]+)', og_url, 'full id')
            webpage, 'full id')
        query = compat_urllib_parse.urlencode({'vid': full_id})
        info = self._download_xml(
            'http://tvpot.daum.net/clip/ClipInfoXml.do?' + query, video_id,
--- a/youtube_dl/extractor/franceinter.py
+++ b/youtube_dl/extractor/franceinter.py
@ -1,8 +1,6 @@
 # coding: utf-8
 from __future__ import unicode_literals
 import re
 from .common import InfoExtractor
 from ..utils import int_or_none
@ -23,8 +21,7 @@ class FranceInterIE(InfoExtractor):
    }
    def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
+        video_id = self._match_id(url)
        video_id = mobj.group('id')
        webpage = self._download_webpage(url, video_id)
@ -33,7 +30,7 @@ class FranceInterIE(InfoExtractor):
        video_url = 'http://www.franceinter.fr/' + path
        title = self._html_search_regex(
-            r'<span class="title">(.+?)</span>', webpage, 'title')
+            r'<span class="title-diffusion">(.+?)</span>', webpage, 'title')
        description = self._html_search_regex(
            r'<span class="description">(.*?)</span>',
            webpage, 'description', fatal=False)
--- a/youtube_dl/extractor/imgur.py
+++ b/youtube_dl/extractor/imgur.py
@ -13,7 +13,7 @@ from ..utils import (
 class ImgurIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:i\.)?imgur\.com/(?!gallery)(?P<id>[a-zA-Z0-9]+)'
+    _VALID_URL = r'https?://(?:i\.)?imgur\.com/(?:(?:gallery|topic/[^/]+)/)?(?P<id>[a-zA-Z0-9]{6,})(?:[/?#&]+|\.[a-z]+)?$'
    _TESTS = [{
        'url': 'https://i.imgur.com/A61SaA1.gifv',
@ -21,7 +21,7 @@ class ImgurIE(InfoExtractor):
            'id': 'A61SaA1',
            'ext': 'mp4',
            'title': 're:Imgur GIF$|MRW gifv is up and running without any bugs$',
-            'description': 're:The origin of the Internet\'s most viral images$|The Internet\'s visual storytelling community\. Explore, share, and discuss the best visual stories the Internet has to offer\.$',
+            'description': 'Imgur: The most awesome images on the Internet.',
        },
    }, {
        'url': 'https://imgur.com/A61SaA1',
@ -29,8 +29,20 @@ class ImgurIE(InfoExtractor):
            'id': 'A61SaA1',
            'ext': 'mp4',
            'title': 're:Imgur GIF$|MRW gifv is up and running without any bugs$',
-            'description': 're:The origin of the Internet\'s most viral images$|The Internet\'s visual storytelling community\. Explore, share, and discuss the best visual stories the Internet has to offer\.$',
+            'description': 'Imgur: The most awesome images on the Internet.',
        },
    }, {
        'url': 'https://imgur.com/gallery/YcAQlkx',
        'info_dict': {
            'id': 'YcAQlkx',
            'ext': 'mp4',
            'title': 'Classic Steve Carell gif...cracks me up everytime....damn the repost downvotes....',
            'description': 'Imgur: The most awesome images on the Internet.'
        }
    }, {
        'url': 'http://imgur.com/topic/Funny/N8rOudd',
        'only_matching': True,
    }]
    def _real_extract(self, url):
@ -100,25 +112,38 @@ class ImgurIE(InfoExtractor):
 class ImgurAlbumIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:i\.)?imgur\.com/gallery/(?P<id>[a-zA-Z0-9]+)'
+    _VALID_URL = r'https?://(?:i\.)?imgur\.com/(?:(?:a|gallery|topic/[^/]+)/)?(?P<id>[a-zA-Z0-9]{5})(?:[/?#&]+)?$'
-    _TEST = {
+    _TESTS = [{
        'url': 'http://imgur.com/gallery/Q95ko',
        'info_dict': {
            'id': 'Q95ko',
        },
        'playlist_count': 25,
-    }
+    }, {
        'url': 'http://imgur.com/a/j6Orj',
        'only_matching': True,
    }, {
        'url': 'http://imgur.com/topic/Aww/ll5Vk',
        'only_matching': True,
    }]
    def _real_extract(self, url):
        album_id = self._match_id(url)
        album_images = self._download_json(
            'http://imgur.com/gallery/%s/album_images/hit.json?all=true' % album_id,
-            album_id)['data']['images']
+            album_id, fatal=False)
-        entries = [
+        if album_images:
-            self.url_result('http://imgur.com/%s' % image['hash'])
+            data = album_images.get('data')
-            for image in album_images if image.get('hash')]
+            if data and isinstance(data, dict):
                images = data.get('images')
                if images and isinstance(images, list):
                    entries = [
                        self.url_result('http://imgur.com/%s' % image['hash'])
                        for image in images if image.get('hash')]
                    return self.playlist_result(entries, album_id)
-        return self.playlist_result(entries, album_id)
+        # Fallback to single video
        return self.url_result('http://imgur.com/%s' % album_id, ImgurIE.ie_key())
--- a/youtube_dl/extractor/instagram.py
+++ b/youtube_dl/extractor/instagram.py
@ -47,7 +47,7 @@ class InstagramIE(InfoExtractor):
 class InstagramUserIE(InfoExtractor):
-    _VALID_URL = r'https://instagram\.com/(?P<username>[^/]{2,})/?(?:$|[?#])'
+    _VALID_URL = r'https?://(?:www\.)?instagram\.com/(?P<username>[^/]{2,})/?(?:$|[?#])'
    IE_DESC = 'Instagram user profile'
    IE_NAME = 'instagram:user'
    _TEST = {
--- a/youtube_dl/extractor/pbs.py
+++ b/youtube_dl/extractor/pbs.py
@ -16,7 +16,7 @@ from ..utils import (
 class PBSIE(InfoExtractor):
    _STATIONS = (
-        (r'(?:video|www)\.pbs\.org', 'PBS: Public Broadcasting Service'),  # http://www.pbs.org/
+        (r'(?:video|www|player)\.pbs\.org', 'PBS: Public Broadcasting Service'),  # http://www.pbs.org/
        (r'video\.aptv\.org', 'APT - Alabama Public Television (WBIQ)'),  # http://aptv.org/
        (r'video\.gpb\.org', 'GPB/Georgia Public Broadcasting (WGTV)'),  # http://www.gpb.org/
        (r'video\.mpbonline\.org', 'Mississippi Public Broadcasting (WMPN)'),  # http://www.mpbonline.org
--- a/youtube_dl/extractor/periscope.py
+++ b/youtube_dl/extractor/periscope.py
@ -31,9 +31,8 @@ class PeriscopeIE(InfoExtractor):
    }]
    def _call_api(self, method, value):
        attribute = 'token' if len(value) > 13 else 'broadcast_id'
        return self._download_json(
-            'https://api.periscope.tv/api/v2/%s?%s=%s' % (method, attribute, value), value)
+            'https://api.periscope.tv/api/v2/%s?broadcast_id=%s' % (method, value), value)
    def _real_extract(self, url):
        token = self._match_id(url)
--- a/youtube_dl/extractor/soompi.py
+++ b/youtube_dl/extractor/soompi.py
@ -1,146 +0,0 @@
 # encoding: utf-8
 from __future__ import unicode_literals
 import re
 from .crunchyroll import CrunchyrollIE
 from .common import InfoExtractor
 from ..compat import compat_HTTPError
 from ..utils import (
    ExtractorError,
    int_or_none,
    remove_start,
    xpath_text,
 )
 class SoompiBaseIE(InfoExtractor):
    def _get_episodes(self, webpage, episode_filter=None):
        episodes = self._parse_json(
            self._search_regex(
                r'VIDEOS\s*=\s*(\[.+?\]);', webpage, 'episodes JSON'),
            None)
        return list(filter(episode_filter, episodes))
 class SoompiIE(SoompiBaseIE, CrunchyrollIE):
    IE_NAME = 'soompi'
    _VALID_URL = r'https?://tv\.soompi\.com/(?:en/)?watch/(?P<id>[0-9]+)'
    _TESTS = [{
        'url': 'http://tv.soompi.com/en/watch/29235',
        'info_dict': {
            'id': '29235',
            'ext': 'mp4',
            'title': 'Episode 1096',
            'description': '2015-05-20'
        },
        'params': {
            'skip_download': True,
        },
    }]
    def _get_episode(self, webpage, video_id):
        return self._get_episodes(webpage, lambda x: x['id'] == video_id)[0]
    def _get_subtitles(self, config, video_id):
        sub_langs = {}
        for subtitle in config.findall('./{default}preload/subtitles/subtitle'):
            sub_langs[subtitle.attrib['id']] = subtitle.attrib['title']
        subtitles = {}
        for s in config.findall('./{default}preload/subtitle'):
            lang_code = sub_langs.get(s.attrib['id'])
            if not lang_code:
                continue
            sub_id = s.get('id')
            data = xpath_text(s, './data', 'data')
            iv = xpath_text(s, './iv', 'iv')
            if not id or not iv or not data:
                continue
            subtitle = self._decrypt_subtitles(data, iv, sub_id).decode('utf-8')
            subtitles[lang_code] = self._extract_subtitles(subtitle)
        return subtitles
    def _real_extract(self, url):
        video_id = self._match_id(url)
        try:
            webpage = self._download_webpage(
                url, video_id, 'Downloading episode page')
        except ExtractorError as ee:
            if isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 403:
                webpage = ee.cause.read()
                block_message = self._html_search_regex(
                    r'(?s)<div class="block-message">(.+?)</div>', webpage,
                    'block message', default=None)
                if block_message:
                    raise ExtractorError(block_message, expected=True)
            raise
        formats = []
        config = None
        for format_id in re.findall(r'\?quality=([0-9a-zA-Z]+)', webpage):
            config = self._download_xml(
                'http://tv.soompi.com/en/show/_/%s-config.xml?mode=hls&quality=%s' % (video_id, format_id),
                video_id, 'Downloading %s XML' % format_id)
            m3u8_url = xpath_text(
                config, './{default}preload/stream_info/file',
                '%s m3u8 URL' % format_id)
            if not m3u8_url:
                continue
            formats.extend(self._extract_m3u8_formats(
                m3u8_url, video_id, 'mp4', m3u8_id=format_id))
        self._sort_formats(formats)
        episode = self._get_episode(webpage, video_id)
        title = episode['name']
        description = episode.get('description')
        duration = int_or_none(episode.get('duration'))
        thumbnails = [{
            'id': thumbnail_id,
            'url': thumbnail_url,
        } for thumbnail_id, thumbnail_url in episode.get('img_url', {}).items()]
        subtitles = self.extract_subtitles(config, video_id)
        return {
            'id': video_id,
            'title': title,
            'description': description,
            'thumbnails': thumbnails,
            'duration': duration,
            'formats': formats,
            'subtitles': subtitles
        }
 class SoompiShowIE(SoompiBaseIE):
    IE_NAME = 'soompi:show'
    _VALID_URL = r'https?://tv\.soompi\.com/en/shows/(?P<id>[0-9a-zA-Z\-_]+)'
    _TESTS = [{
        'url': 'http://tv.soompi.com/en/shows/liar-game',
        'info_dict': {
            'id': 'liar-game',
            'title': 'Liar Game',
            'description': 'md5:52c02bce0c1a622a95823591d0589b66',
        },
        'playlist_count': 14,
    }]
    def _real_extract(self, url):
        show_id = self._match_id(url)
        webpage = self._download_webpage(
            url, show_id, 'Downloading show page')
        title = remove_start(self._og_search_title(webpage), 'SoompiTV | ')
        description = self._og_search_description(webpage)
        entries = [
            self.url_result('http://tv.soompi.com/en/watch/%s' % episode['id'], 'Soompi')
            for episode in self._get_episodes(webpage)]
        return self.playlist_result(entries, show_id, title, description)
--- a/youtube_dl/extractor/theintercept.py
+++ b/youtube_dl/extractor/theintercept.py
@ -0,0 +1,49 @@
 # encoding: utf-8
 from __future__ import unicode_literals
 from .common import InfoExtractor
 from ..compat import compat_str
 from ..utils import (
    parse_iso8601,
    int_or_none,
    ExtractorError,
 )
 class TheInterceptIE(InfoExtractor):
    _VALID_URL = r'https://theintercept.com/fieldofvision/(?P<id>[^/?#]+)'
    _TESTS = [{
        'url': 'https://theintercept.com/fieldofvision/thisisacoup-episode-four-surrender-or-die/',
        'md5': '145f28b41d44aab2f87c0a4ac8ec95bd',
        'info_dict': {
            'id': '46214',
            'ext': 'mp4',
            'title': '#ThisIsACoup – Episode Four: Surrender or Die',
            'description': 'md5:74dd27f0e2fbd50817829f97eaa33140',
            'timestamp': 1450429239,
            'upload_date': '20151218',
            'comment_count': int,
        }
    }]
    def _real_extract(self, url):
        display_id = self._match_id(url)
        webpage = self._download_webpage(url, display_id)
        json_data = self._parse_json(self._search_regex(
            r'initialStoreTree\s*=\s*(?P<json_data>{.+})', webpage,
            'initialStoreTree'), display_id)
        for post in json_data['resources']['posts'].values():
            if post['slug'] == display_id:
                return {
                    '_type': 'url_transparent',
                    'url': 'jwplatform:%s' % post['fov_videoid'],
                    'id': compat_str(post['ID']),
                    'display_id': display_id,
                    'title': post['title'],
                    'description': post.get('excerpt'),
                    'timestamp': parse_iso8601(post.get('date')),
                    'comment_count': int_or_none(post.get('comments_number')),
                }
        raise ExtractorError('Unable to find the current post')
--- a/youtube_dl/extractor/twentyfourvideo.py
+++ b/youtube_dl/extractor/twentyfourvideo.py
@ -5,6 +5,8 @@ from .common import InfoExtractor
 from ..utils import (
    parse_iso8601,
    int_or_none,
    xpath_attr,
    xpath_element,
 )
@ -15,7 +17,7 @@ class TwentyFourVideoIE(InfoExtractor):
    _TESTS = [
        {
            'url': 'http://www.24video.net/video/view/1044982',
-            'md5': 'd041af8b5b4246ea466226a0d6693345',
+            'md5': 'e09fc0901d9eaeedac872f154931deeb',
            'info_dict': {
                'id': '1044982',
                'ext': 'mp4',
@ -64,33 +66,24 @@ class TwentyFourVideoIE(InfoExtractor):
            r'<div class="comments-title" id="comments-count">(\d+) комментари',
            webpage, 'comment count', fatal=False))
-        formats = []
+        # Sets some cookies
        self._download_xml(
            r'http://www.24video.net/video/xml/%s?mode=init' % video_id,
            video_id, 'Downloading init XML')
-        pc_video = self._download_xml(
+        video_xml = self._download_xml(
            'http://www.24video.net/video/xml/%s?mode=play' % video_id,
-            video_id, 'Downloading PC video URL').find('.//video')
+            video_id, 'Downloading video XML')
-        formats.append({
+        video = xpath_element(video_xml, './/video', 'video', fatal=True)
            'url': pc_video.attrib['url'],
            'format_id': 'pc',
            'quality': 1,
        })
-        like_count = int_or_none(pc_video.get('ratingPlus'))
+        formats = [{
-        dislike_count = int_or_none(pc_video.get('ratingMinus'))
+            'url': xpath_attr(video, '', 'url', 'video URL', fatal=True),
-        age_limit = 18 if pc_video.get('adult') == 'true' else 0
+        }]
-        mobile_video = self._download_xml(
+        like_count = int_or_none(video.get('ratingPlus'))
-            'http://www.24video.net/video/xml/%s' % video_id,
+        dislike_count = int_or_none(video.get('ratingMinus'))
-            video_id, 'Downloading mobile video URL').find('.//video')
+        age_limit = 18 if video.get('adult') == 'true' else 0
        formats.append({
            'url': mobile_video.attrib['url'],
            'format_id': 'mobile',
            'quality': 0,
        })
        self._sort_formats(formats)
        return {
            'id': video_id,
--- a/youtube_dl/extractor/vgtv.py
+++ b/youtube_dl/extractor/vgtv.py
@ -4,26 +4,48 @@ from __future__ import unicode_literals
 import re
 from .common import InfoExtractor
 from .xstream import XstreamIE
 from ..utils import (
    ExtractorError,
    float_or_none,
 )
-class VGTVIE(InfoExtractor):
+class VGTVIE(XstreamIE):
-    IE_DESC = 'VGTV and BTTV'
+    IE_DESC = 'VGTV, BTTV, FTV, Aftenposten and Aftonbladet'
    _HOST_TO_APPNAME = {
        'vgtv.no': 'vgtv',
        'bt.no/tv': 'bttv',
        'aftenbladet.no/tv': 'satv',
        'fvn.no/fvntv': 'fvntv',
        'aftenposten.no/webtv': 'aptv',
    }
    _APP_NAME_TO_VENDOR = {
        'vgtv': 'vgtv',
        'bttv': 'bt',
        'satv': 'sa',
        'fvntv': 'fvn',
        'aptv': 'ap',
    }
    _VALID_URL = r'''(?x)
-                    (?:
+                    (?:https?://(?:www\.)?
-                        vgtv:|
+                    (?P<host>
-                        http://(?:www\.)?
+                        %s
                    )
-                    (?P<host>vgtv|bt)
+                    /
                    (?:
-                        :|
+                        \#!/(?:video|live)/|
-                        \.no/(?:tv/)?\#!/(?:video|live)/
+                        embed?.*id=
-                    )
+                    )|
-                    (?P<id>[0-9]+)
+                    (?P<appname>
-                    '''
+                        %s
                    ):)
                    (?P<id>\d+)
                    ''' % ('|'.join(_HOST_TO_APPNAME.keys()), '|'.join(_APP_NAME_TO_VENDOR.keys()))
    _TESTS = [
        {
            # streamType: vod
@ -59,17 +81,18 @@ class VGTVIE(InfoExtractor):
                # m3u8 download
                'skip_download': True,
            },
            'skip': 'Video is no longer available',
        },
        {
-            # streamType: live
+            # streamType: wasLive
            'url': 'http://www.vgtv.no/#!/live/113063/direkte-v75-fra-solvalla',
            'info_dict': {
                'id': '113063',
-                'ext': 'flv',
+                'ext': 'mp4',
-                'title': 're:^DIREKTE: V75 fra Solvalla [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
+                'title': 'V75 fra Solvalla 30.05.15',
                'description': 'md5:b3743425765355855f88e096acc93231',
                'thumbnail': 're:^https?://.*\.jpg',
-                'duration': 0,
+                'duration': 25966,
                'timestamp': 1432975582,
                'upload_date': '20150530',
                'view_count': int,
@ -79,6 +102,20 @@ class VGTVIE(InfoExtractor):
                'skip_download': True,
            },
        },
        {
            'url': 'http://www.aftenposten.no/webtv/#!/video/21039/trailer-sweatshop-i-can-t-take-any-more',
            'md5': 'fd828cd29774a729bf4d4425fe192972',
            'info_dict': {
                'id': '21039',
                'ext': 'mov',
                'title': 'TRAILER: «SWEATSHOP» - I can´t take any more',
                'description': 'md5:21891f2b0dd7ec2f78d84a50e54f8238',
                'duration': 66,
                'timestamp': 1417002452,
                'upload_date': '20141126',
                'view_count': int,
            }
        },
        {
            'url': 'http://www.bt.no/tv/#!/video/100250/norling-dette-er-forskjellen-paa-1-divisjon-og-eliteserien',
            'only_matching': True,
@ -89,21 +126,27 @@ class VGTVIE(InfoExtractor):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group('id')
        host = mobj.group('host')
-
+        appname = self._HOST_TO_APPNAME[host] if host else mobj.group('appname')
-        HOST_WEBSITES = {
+        vendor = self._APP_NAME_TO_VENDOR[appname]
            'vgtv': 'vgtv',
            'bt': 'bttv',
        }
        data = self._download_json(
            'http://svp.vg.no/svp/api/v1/%s/assets/%s?appName=%s-website'
-            % (host, video_id, HOST_WEBSITES[host]),
+            % (vendor, video_id, appname),
            video_id, 'Downloading media JSON')
        if data.get('status') == 'inactive':
            raise ExtractorError(
                'Video %s is no longer available' % video_id, expected=True)
        info = {
            'formats': [],
        }
        if len(video_id) == 5:
            if appname == 'bttv':
                info = self._extract_video_info('btno', video_id)
            elif appname == 'aptv':
                info = self._extract_video_info('ap', video_id)
        streams = data['streamUrls']
        stream_type = data.get('streamType')
@ -111,48 +154,53 @@ class VGTVIE(InfoExtractor):
        hls_url = streams.get('hls')
        if hls_url:
-            formats.extend(self._extract_m3u8_formats(
+            m3u8_formats = self._extract_m3u8_formats(
-                hls_url, video_id, 'mp4', m3u8_id='hls'))
+                hls_url, video_id, 'mp4', m3u8_id='hls', fatal=False)
            if m3u8_formats:
                formats.extend(m3u8_formats)
        hds_url = streams.get('hds')
        # wasLive hds are always 404
        if hds_url and stream_type != 'wasLive':
-            formats.extend(self._extract_f4m_formats(
+            f4m_formats = self._extract_f4m_formats(
-                hds_url + '?hdcore=3.2.0&plugin=aasp-3.2.0.77.18',
+                hds_url + '?hdcore=3.2.0&plugin=aasp-3.2.0.77.18', video_id, f4m_id='hds', fatal=False)
-                video_id, f4m_id='hds'))
+            if f4m_formats:
                formats.extend(f4m_formats)
        mp4_urls = streams.get('pseudostreaming') or []
        mp4_url = streams.get('mp4')
        if mp4_url:
-            _url = hls_url or hds_url
+            mp4_urls.append(mp4_url)
-            MP4_URL_TEMPLATE = '%s/%%s.%s' % (mp4_url.rpartition('/')[0], mp4_url.rpartition('.')[-1])
+        for mp4_url in mp4_urls:
-            for mp4_format in _url.split(','):
+            format_info = {
-                m = re.search('(?P<width>\d+)_(?P<height>\d+)_(?P<vbr>\d+)', mp4_format)
+                'url': mp4_url,
-                if not m:
+            }
-                    continue
+            mobj = re.search('(\d+)_(\d+)_(\d+)', mp4_url)
-                width = int(m.group('width'))
+            if mobj:
-                height = int(m.group('height'))
+                tbr = int(mobj.group(3))
-                vbr = int(m.group('vbr'))
+                format_info.update({
-                formats.append({
+                    'width': int(mobj.group(1)),
-                    'url': MP4_URL_TEMPLATE % mp4_format,
+                    'height': int(mobj.group(2)),
-                    'format_id': 'mp4-%s' % vbr,
+                    'tbr': tbr,
-                    'width': width,
+                    'format_id': 'mp4-%s' % tbr,
                    'height': height,
                    'vbr': vbr,
                    'preference': 1,
                })
-        self._sort_formats(formats)
+            formats.append(format_info)
-        return {
+        info['formats'].extend(formats)
        self._sort_formats(info['formats'])
        info.update({
            'id': video_id,
-            'title': self._live_title(data['title']),
+            'title': self._live_title(data['title']) if stream_type == 'live' else data['title'],
            'description': data['description'],
            'thumbnail': data['images']['main'] + '?t[]=900x506q80',
            'timestamp': data['published'],
            'duration': float_or_none(data['duration'], 1000),
            'view_count': data['displays'],
            'formats': formats,
            'is_live': True if stream_type == 'live' else False,
-        }
+        })
        return info
 class BTArticleIE(InfoExtractor):
@ -161,7 +209,7 @@ class BTArticleIE(InfoExtractor):
    _VALID_URL = 'http://(?:www\.)?bt\.no/(?:[^/]+/)+(?P<id>[^/]+)-\d+\.html'
    _TEST = {
        'url': 'http://www.bt.no/nyheter/lokalt/Kjemper-for-internatet-1788214.html',
-        'md5': 'd055e8ee918ef2844745fcfd1a4175fb',
+        'md5': '2acbe8ad129b3469d5ae51b1158878df',
        'info_dict': {
            'id': '23199',
            'ext': 'mp4',
@ -178,15 +226,15 @@ class BTArticleIE(InfoExtractor):
    def _real_extract(self, url):
        webpage = self._download_webpage(url, self._match_id(url))
        video_id = self._search_regex(
-            r'SVP\.Player\.load\(\s*(\d+)', webpage, 'video id')
+            r'<video[^>]+data-id="(\d+)"', webpage, 'video id')
-        return self.url_result('vgtv:bt:%s' % video_id, 'VGTV')
+        return self.url_result('bttv:%s' % video_id, 'VGTV')
 class BTVestlendingenIE(InfoExtractor):
    IE_NAME = 'bt:vestlendingen'
    IE_DESC = 'Bergens Tidende - Vestlendingen'
    _VALID_URL = 'http://(?:www\.)?bt\.no/spesial/vestlendingen/#!/(?P<id>\d+)'
-    _TEST = {
+    _TESTS = [{
        'url': 'http://www.bt.no/spesial/vestlendingen/#!/86588',
        'md5': 'd7d17e3337dc80de6d3a540aefbe441b',
        'info_dict': {
@ -197,7 +245,19 @@ class BTVestlendingenIE(InfoExtractor):
            'timestamp': 1430473209,
            'upload_date': '20150501',
        },
-    }
+        'skip': '404 Error',
    }, {
        'url': 'http://www.bt.no/spesial/vestlendingen/#!/86255',
        'md5': 'a2893f8632e96389f4bdf36aa9463ceb',
        'info_dict': {
            'id': '86255',
            'ext': 'mov',
            'title': 'Du må tåle å fryse og være sulten',
            'description': 'md5:b8046f4d022d5830ddab04865791d063',
            'upload_date': '20150321',
            'timestamp': 1426942023,
        },
    }]
    def _real_extract(self, url):
-        return self.url_result('xstream:btno:%s' % self._match_id(url), 'Xstream')
+        return self.url_result('bttv:%s' % self._match_id(url), 'VGTV')
--- a/youtube_dl/extractor/viki.py
+++ b/youtube_dl/extractor/viki.py
@ -30,6 +30,12 @@ class VikiBaseIE(InfoExtractor):
    _token = None
    _ERRORS = {
        'geo': 'Sorry, this content is not available in your region.',
        'upcoming': 'Sorry, this content is not yet available.',
        # 'paywall': 'paywall',
    }
    def _prepare_call(self, path, timestamp=None, post_data=None):
        path += '?' if '?' not in path else '&'
        if not timestamp:
@ -67,6 +73,12 @@ class VikiBaseIE(InfoExtractor):
            '%s returned error: %s' % (self.IE_NAME, error),
            expected=True)
    def _check_errors(self, data):
        for reason, status in data.get('blocking', {}).items():
            if status and reason in self._ERRORS:
                raise ExtractorError('%s said: %s' % (
                    self.IE_NAME, self._ERRORS[reason]), expected=True)
    def _real_initialize(self):
        self._login()
@ -193,6 +205,7 @@ class VikiIE(VikiBaseIE):
            'timestamp': 1321985454,
            'description': 'md5:44b1e46619df3a072294645c770cef36',
            'title': 'Love In Magic',
            'age_limit': 13,
        },
    }]
@ -202,6 +215,8 @@ class VikiIE(VikiBaseIE):
        video = self._call_api(
            'videos/%s.json' % video_id, video_id, 'Downloading video JSON')
        self._check_errors(video)
        title = self.dict_selection(video.get('titles', {}), 'en')
        if not title:
            title = 'Episode %d' % video.get('number') if video.get('type') == 'episode' else video.get('id') or video_id
@ -262,8 +277,11 @@ class VikiIE(VikiBaseIE):
                r'^(\d+)[pP]$', format_id, 'height', default=None))
            for protocol, format_dict in stream_dict.items():
                if format_id == 'm3u8':
-                    formats = self._extract_m3u8_formats(
+                    m3u8_formats = self._extract_m3u8_formats(
-                        format_dict['url'], video_id, 'mp4', m3u8_id='m3u8-%s' % protocol)
+                        format_dict['url'], video_id, 'mp4', 'm3u8_native',
                        m3u8_id='m3u8-%s' % protocol, fatal=None)
                    if m3u8_formats:
                        formats.extend(m3u8_formats)
                else:
                    formats.append({
                        'url': format_dict['url'],
@ -315,6 +333,8 @@ class VikiChannelIE(VikiBaseIE):
            'containers/%s.json' % channel_id, channel_id,
            'Downloading channel JSON')
        self._check_errors(channel)
        title = self.dict_selection(channel['titles'], 'en')
        description = self.dict_selection(channel['descriptions'], 'en')
--- a/youtube_dl/extractor/xstream.py
+++ b/youtube_dl/extractor/xstream.py
@ -42,11 +42,7 @@ class XstreamIE(InfoExtractor):
        'only_matching': True,
    }]
-    def _real_extract(self, url):
+    def _extract_video_info(self, partner_id, video_id):
        mobj = re.match(self._VALID_URL, url)
        partner_id = mobj.group('partner_id')
        video_id = mobj.group('id')
        data = self._download_xml(
            'http://frontend.xstream.dk/%s/feed/video/?platform=web&id=%s'
            % (partner_id, video_id),
@ -97,6 +93,7 @@ class XstreamIE(InfoExtractor):
            formats.append({
                'url': link.get('href'),
                'format_id': link.get('rel'),
                'preference': 1,
            })
        thumbnails = [{
@ -113,3 +110,10 @@ class XstreamIE(InfoExtractor):
            'formats': formats,
            'thumbnails': thumbnails,
        }
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        partner_id = mobj.group('partner_id')
        video_id = mobj.group('id')
        return self._extract_video_info(partner_id, video_id)
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@ -1,3 +1,3 @@
 from __future__ import unicode_literals
-__version__ = '2015.12.21'
+__version__ = '2015.12.23'
`@ -1,3 +1,3 @@`
	`from __future__ import unicode_literals`	`from __future__ import unicode_literals`

	`__version__ = '2015.12.21'`	`__version__ = '2015.12.23'`