From 0d66bd0eab436f7215f5da168b378127898ccd66 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com>
Date: Sun, 1 May 2016 13:56:51 +0600
Subject: [PATCH 01/11] [downloader/hls] Delegate extraction to ffmpeg when
 unsupported features detected

---
 youtube_dl/downloader/hls.py | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)
diff --git a/youtube_dl/downloader/hls.py b/youtube_dl/downloader/hls.py
index a01dac031..d7b34bde3 100644
--- a/youtube_dl/downloader/hls.py
+++ b/youtube_dl/downloader/hls.py
@@ -4,6 +4,7 @@ import os.path
 import re
 
 from .fragment import FragmentFD
+from .external import FFmpegFD
 
 from ..compat import compat_urlparse
 from ..utils import (
@@ -17,12 +18,34 @@ class HlsFD(FragmentFD):
 
     FD_NAME = 'hlsnative'
 
+    @staticmethod
+    def can_download(manifest):
+        UNSUPPORTED_FEATURES = (
+            r'#EXT-X-KEY:METHOD=(?!NONE)',  # encrypted streams [1]
+            r'#EXT-X-BYTERANGE',  # playlists composed of byte ranges of media files [2]
+            r'#EXT-X-MEDIA-SEQUENCE:(?!0$)',  # live streams [3]
+            # 1. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.2.4
+            # 2. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.2.2
+            # 3. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.3.2
+        )
+        return all(not re.search(feature, manifest) for feature in UNSUPPORTED_FEATURES)
+
     def real_download(self, filename, info_dict):
         man_url = info_dict['url']
         self.to_screen('[%s] Downloading m3u8 manifest' % self.FD_NAME)
         manifest = self.ydl.urlopen(man_url).read()
 
         s = manifest.decode('utf-8', 'ignore')
+
+        if not self.can_download(s):
+            self.report_warning(
+                'hlsnative has detected features it does not support, '
+                'extraction will be delegated to ffmpeg')
+            fd = FFmpegFD(self.ydl, self.params)
+            for ph in self._progress_hooks:
+                fd.add_progress_hook(ph)
+            return fd.real_download(filename, info_dict)
+
         fragment_urls = []
         for line in s.splitlines():
             line = line.strip()

From a0904c5d8024c12b7f95b1126a6b8152a4e1021f Mon Sep 17 00:00:00 2001
From: Yen Chi Hsuan <yan12125@gmail.com>
Date: Sun, 8 May 2016 00:56:31 +0800
Subject: [PATCH 02/11] [telegraaf] Fix extractor (closes #9318)

---
 youtube_dl/extractor/telegraaf.py | 58 +++++++++++++++++++++++++++----
 1 file changed, 51 insertions(+), 7 deletions(-)

diff --git a/youtube_dl/extractor/telegraaf.py b/youtube_dl/extractor/telegraaf.py
index 6f8333cfc..9092e9b85 100644
--- a/youtube_dl/extractor/telegraaf.py
+++ b/youtube_dl/extractor/telegraaf.py
@@ -2,14 +2,16 @@
 from __future__ import unicode_literals
 
 from .common import InfoExtractor
-from ..utils import remove_end
+from ..utils import (
+    determine_ext,
+    remove_end,
+)
 
 
 class TelegraafIE(InfoExtractor):
     _VALID_URL = r'https?://(?:www\.)?telegraaf\.nl/tv/(?:[^/]+/)+(?P<id>\d+)/[^/]+\.html'
     _TEST = {
         'url': 'http://www.telegraaf.nl/tv/nieuws/binnenland/24353229/__Tikibad_ontruimd_wegens_brand__.html',
-        'md5': '83245a9779bcc4a24454bfd53c65b6dc',
         'info_dict': {
             'id': '24353229',
             'ext': 'mp4',
@@ -18,18 +20,60 @@ class TelegraafIE(InfoExtractor):
             'thumbnail': 're:^https?://.*\.jpg$',
             'duration': 33,
         },
+        'params': {
+            # m3u8 download
+            'skip_download': True,
+        },
     }
 
     def _real_extract(self, url):
-        playlist_id = self._match_id(url)
+        video_id = self._match_id(url)
 
-        webpage = self._download_webpage(url, playlist_id)
+        webpage = self._download_webpage(url, video_id)
 
+        player_url = self._html_search_regex(
+            r'<iframe[^>]+src="([^"]+")', webpage, 'player URL')
+        player_page = self._download_webpage(
+            player_url, video_id, note='Download player webpage')
         playlist_url = self._search_regex(
-            r"iframe\.loadPlayer\('([^']+)'", webpage, 'player')
+            r'playlist\s*:\s*"([^"]+)"', player_page, 'playlist URL')
+        playlist_data = self._download_json(playlist_url, video_id)
+
+        item = playlist_data['items'][0]
+        formats = []
+        locations = item['locations']
+        for location in locations.get('adaptive', []):
+            manifest_url = location['src']
+            ext = determine_ext(manifest_url)
+            if ext == 'm3u8':
+                formats.extend(self._extract_m3u8_formats(
+                    manifest_url, video_id, ext='mp4', m3u8_id='hls'))
+            elif ext == 'mpd':
+                # TODO: Current DASH formats are broken - $Time$ pattern in
+                # <SegmentTemplate> not implemented yet
+                continue
+            else:
+                self.report_warning('Unknown adaptive format %s' % ext)
+        for location in locations.get('progressive', []):
+            formats.append({
+                'url': location['sources'][0]['src'],
+                'width': location.get('width'),
+                'height': location.get('height'),
+                'format_id': 'http-%s' % location['label'],
+            })
+
+        self._sort_formats(formats)
 
-        entries = self._extract_xspf_playlist(playlist_url, playlist_id)
         title = remove_end(self._og_search_title(webpage), ' - VIDEO')
         description = self._og_search_description(webpage)
+        duration = item.get('duration')
+        thumbnail = item.get('poster')
 
-        return self.playlist_result(entries, playlist_id, title, description)
+        return {
+            'id': video_id,
+            'title': title,
+            'description': description,
+            'formats': formats,
+            'duration': duration,
+            'thumbnail': thumbnail,
+        }

From e2eca6f65e9969c31b3374bd3688321f3e471cd7 Mon Sep 17 00:00:00 2001
From: Kevin Deldycke <kevin@deldycke.com>
Date: Sat, 7 May 2016 20:03:25 +0200
Subject: [PATCH 03/11] Expand user's home in batch file path.

---
 youtube_dl/__init__.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py
index 737f6545d..7a0466077 100644
--- a/youtube_dl/__init__.py
+++ b/youtube_dl/__init__.py
@@ -86,7 +86,9 @@ def _real_main(argv=None):
             if opts.batchfile == '-':
                 batchfd = sys.stdin
             else:
-                batchfd = io.open(opts.batchfile, 'r', encoding='utf-8', errors='ignore')
+                batchfd = io.open(
+                    compat_expanduser(opts.batchfile),
+                    'r', encoding='utf-8', errors='ignore')
             batch_urls = read_batch_urls(batchfd)
             if opts.verbose:
                 write_string('[debug] Batch file urls: ' + repr(batch_urls) + '\n')

From 00c21c225decf648199013f2fa3385a1332037bb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com>
Date: Sun, 8 May 2016 00:11:44 +0600
Subject: [PATCH 04/11] Credit @kdeldycke for #9430

---
 AUTHORS | 1 +
 1 file changed, 1 insertion(+)

diff --git a/AUTHORS b/AUTHORS
index 814fe9ec3..5f668338b 100644
--- a/AUTHORS
+++ b/AUTHORS
@@ -169,3 +169,4 @@ Viťas Strádal
 Kagami Hiiragi
 Philip Huppert
 blahgeek
+Kevin Deldycke

From 5c24873a9e6a47e58b10eb0c0825e165604796f2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com>
Date: Sun, 8 May 2016 02:04:34 +0600
Subject: [PATCH 05/11] Credit @inondle for #9400

---
 AUTHORS | 1 +
 1 file changed, 1 insertion(+)

diff --git a/AUTHORS b/AUTHORS
index 5f668338b..bf860b7f7 100644
--- a/AUTHORS
+++ b/AUTHORS
@@ -170,3 +170,4 @@ Kagami Hiiragi
 Philip Huppert
 blahgeek
 Kevin Deldycke
+inondle

From f5436c5d9e4e65790440ada40476712ff430651b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com>
Date: Sun, 8 May 2016 02:29:26 +0600
Subject: [PATCH 06/11] [downloader/external] Add temp fix ffmpeg m3u8
 downloads (Closes #9394)

---
 youtube_dl/downloader/external.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/youtube_dl/downloader/external.py b/youtube_dl/downloader/external.py
index 8d642fc3e..45f49c350 100644
--- a/youtube_dl/downloader/external.py
+++ b/youtube_dl/downloader/external.py
@@ -224,7 +224,7 @@ class FFmpegFD(ExternalFD):
                 args += ['-rtmp_live', 'live']
 
         args += ['-i', url, '-c', 'copy']
-        if protocol == 'm3u8':
+        if protocol in ('m3u8', 'm3u8_native'):
             if self.params.get('hls_use_mpegts', False) or tmpfilename == '-':
                 args += ['-f', 'mpegts']
             else:

From 3e169233daf76cd7585ebac12504f8e624b7693b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com>
Date: Sun, 8 May 2016 04:36:57 +0600
Subject: [PATCH 07/11] Expanduser for more options with input files

---
 youtube_dl/YoutubeDL.py | 1 +
 youtube_dl/__init__.py  | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py
index 2187dcc8f..a96482e68 100755
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -2018,6 +2018,7 @@ class YoutubeDL(object):
         if opts_cookiefile is None:
             self.cookiejar = compat_cookiejar.CookieJar()
         else:
+            opts_cookiefile = compat_expanduser(opts_cookiefile)
             self.cookiejar = compat_cookiejar.MozillaCookieJar(
                 opts_cookiefile)
             if os.access(opts_cookiefile, os.R_OK):
diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py
index 7a0466077..cbd84c3af 100644
--- a/youtube_dl/__init__.py
+++ b/youtube_dl/__init__.py
@@ -406,7 +406,7 @@ def _real_main(argv=None):
 
         try:
             if opts.load_info_filename is not None:
-                retcode = ydl.download_with_info_file(opts.load_info_filename)
+                retcode = ydl.download_with_info_file(compat_expanduser(opts.load_info_filename))
             else:
                 retcode = ydl.download(all_urls)
         except MaxDownloadsReached:

From 9c072d38c6b0361d91e92c50cd0c753dc8ce3101 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com>
Date: Sun, 8 May 2016 06:52:42 +0600
Subject: [PATCH 08/11] [arte] Improve language preference (Closes #9401,
 closes #9162)

---
 youtube_dl/extractor/arte.py | 58 ++++++++++++++++++++++++++----------
 1 file changed, 43 insertions(+), 15 deletions(-)

diff --git a/youtube_dl/extractor/arte.py b/youtube_dl/extractor/arte.py
index 881cacfab..e37fdae13 100644
--- a/youtube_dl/extractor/arte.py
+++ b/youtube_dl/extractor/arte.py
@@ -161,24 +161,53 @@ class ArteTVPlus7IE(InfoExtractor):
             'es': 'E[ESP]',
         }
 
+        langcode = LANGS.get(lang, lang)
+
         formats = []
         for format_id, format_dict in player_info['VSR'].items():
             f = dict(format_dict)
             versionCode = f.get('versionCode')
-            langcode = LANGS.get(lang, lang)
-            lang_rexs = [r'VO?%s-' % re.escape(langcode), r'VO?.-ST%s$' % re.escape(langcode)]
-            lang_pref = None
-            if versionCode:
-                matched_lang_rexs = [r for r in lang_rexs if re.match(r, versionCode)]
-                lang_pref = -10 if not matched_lang_rexs else 10 * len(matched_lang_rexs)
-            source_pref = 0
-            if versionCode is not None:
-                # The original version with subtitles has lower relevance
-                if re.match(r'VO-ST(F|A|E)', versionCode):
-                    source_pref -= 10
-                # The version with sourds/mal subtitles has also lower relevance
-                elif re.match(r'VO?(F|A|E)-STM\1', versionCode):
-                    source_pref -= 9
+            l = re.escape(langcode)
+
+            # Language preference from most to least priority
+            # Reference: section 5.6.3 of
+            # http://www.arte.tv/sites/en/corporate/files/complete-technical-guidelines-arte-geie-v1-05.pdf
+            PREFERENCES = (
+                # original version in requested language, without subtitles
+                r'VO{0}$'.format(l),
+                # original version in requested language, with partial subtitles in requested language
+                r'VO{0}-ST{0}$'.format(l),
+                # original version in requested language, with subtitles for the deaf and hard-of-hearing in requested language
+                r'VO{0}-STM{0}$'.format(l),
+                # non-original (dubbed) version in requested language, without subtitles
+                r'V{0}$'.format(l),
+                # non-original (dubbed) version in requested language, with subtitles partial subtitles in requested language
+                r'V{0}-ST{0}$'.format(l),
+                # non-original (dubbed) version in requested language, with subtitles for the deaf and hard-of-hearing in requested language
+                r'V{0}-STM{0}$'.format(l),
+                # original version in requested language, with partial subtitles in different language
+                r'VO{0}-ST(?!{0}).+?$'.format(l),
+                # original version in requested language, with subtitles for the deaf and hard-of-hearing in different language
+                r'VO{0}-STM(?!{0}).+?$'.format(l),
+                # original version in different language, with partial subtitles in requested language
+                r'VO(?:(?!{0}).+?)?-ST{0}$'.format(l),
+                # original version in different language, with subtitles for the deaf and hard-of-hearing in requested language
+                r'VO(?:(?!{0}).+?)?-STM{0}$'.format(l),
+                # original version in different language, without subtitles
+                r'VO(?:(?!{0}))?$'.format(l),
+                # original version in different language, with partial subtitles in different language
+                r'VO(?:(?!{0}).+?)?-ST(?!{0}).+?$'.format(l),
+                # original version in different language, with subtitles for the deaf and hard-of-hearing in different language
+                r'VO(?:(?!{0}).+?)?-STM(?!{0}).+?$'.format(l),
+            )
+
+            for pref, p in enumerate(PREFERENCES):
+                if re.match(p, versionCode):
+                    lang_pref = len(PREFERENCES) - pref
+                    break
+            else:
+                lang_pref = -1
+
             format = {
                 'format_id': format_id,
                 'preference': -10 if f.get('videoFormat') == 'M3U8' else None,
@@ -188,7 +217,6 @@ class ArteTVPlus7IE(InfoExtractor):
                 'height': int_or_none(f.get('height')),
                 'tbr': int_or_none(f.get('bitrate')),
                 'quality': qfunc(f.get('quality')),
-                'source_preference': source_pref,
             }
 
             if f.get('mediaType') == 'rtmp':

From 3b01a9fbb63e33325fa979db8a846d3e655e79e6 Mon Sep 17 00:00:00 2001
From: Yen Chi Hsuan <yan12125@gmail.com>
Date: Sun, 8 May 2016 14:34:38 +0800
Subject: [PATCH 09/11] [litv] Add new extractor

LiTV is a streaming platform providing free and paid legal contents in
Taiwan.
---
 youtube_dl/extractor/extractors.py |   1 +
 youtube_dl/extractor/litv.py       | 137 +++++++++++++++++++++++++++++
 2 files changed, 138 insertions(+)
 create mode 100644 youtube_dl/extractor/litv.py

diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py
index 14b4f245f..7bacef184 100644
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@@ -384,6 +384,7 @@ from .limelight import (
     LimelightChannelIE,
     LimelightChannelListIE,
 )
+from .litv import LiTVIE
 from .liveleak import LiveLeakIE
 from .livestream import (
     LivestreamIE,
diff --git a/youtube_dl/extractor/litv.py b/youtube_dl/extractor/litv.py
new file mode 100644
index 000000000..3356d015d
--- /dev/null
+++ b/youtube_dl/extractor/litv.py
@@ -0,0 +1,137 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import json
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    ExtractorError,
+    int_or_none,
+    smuggle_url,
+    unsmuggle_url,
+)
+
+
+class LiTVIE(InfoExtractor):
+    _VALID_URL = r'https?://www\.litv\.tv/vod/[^/]+/content\.do\?.*?\bid=(?P<id>[^&]+)'
+
+    _URL_TEMPLATE = 'https://www.litv.tv/vod/%s/content.do?id=%s'
+
+    _TESTS = [{
+        'url': 'https://www.litv.tv/vod/drama/content.do?brc_id=root&id=VOD00041610&isUHEnabled=true&autoPlay=1',
+        'info_dict': {
+            'id': 'VOD00041606',
+            'title': '花千骨',
+        },
+        'playlist_count': 50,
+    }, {
+        'url': 'https://www.litv.tv/vod/drama/content.do?brc_id=root&id=VOD00041610&isUHEnabled=true&autoPlay=1',
+        'info_dict': {
+            'id': 'VOD00041610',
+            'ext': 'mp4',
+            'title': '花千骨第1集',
+            'thumbnail': 're:https?://.*\.jpg$',
+            'description': 'md5:c7017aa144c87467c4fb2909c4b05d6f',
+            'episode_number': 1,
+        },
+        'params': {
+            'noplaylist': True,
+            'skip_download': True,  # m3u8 download
+        },
+        'skip': 'Georestricted to Taiwan',
+    }]
+
+    def _extract_playlist(self, season_list, video_id, vod_data, view_data, prompt=True):
+        episode_title = view_data['title']
+        content_id = season_list['contentId']
+
+        if prompt:
+            self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (content_id, video_id))
+
+        all_episodes = [
+            self.url_result(smuggle_url(
+                self._URL_TEMPLATE % (view_data['contentType'], episode['contentId']),
+                {'force_noplaylist': True}))  # To prevent infinite recursion
+            for episode in season_list['episode']]
+
+        return self.playlist_result(all_episodes, content_id, episode_title)
+
+    def _real_extract(self, url):
+        url, data = unsmuggle_url(url, {})
+
+        video_id = self._match_id(url)
+
+        noplaylist = self._downloader.params.get('noplaylist')
+        noplaylist_prompt = True
+        if 'force_noplaylist' in data:
+            noplaylist = data['force_noplaylist']
+            noplaylist_prompt = False
+
+        webpage = self._download_webpage(url, video_id)
+
+        view_data = dict(map(lambda t: (t[0], t[2]), re.findall(
+            r'viewData\.([a-zA-Z]+)\s*=\s*(["\'])([^"\']+)\2',
+            webpage)))
+
+        vod_data = self._parse_json(self._search_regex(
+            'var\s+vod\s*=\s*([^;]+)', webpage, 'VOD data', default='{}'),
+            video_id)
+
+        season_list = list(vod_data.get('seasonList', {}).values())
+        if season_list:
+            if not noplaylist:
+                return self._extract_playlist(
+                    season_list[0], video_id, vod_data, view_data,
+                    prompt=noplaylist_prompt)
+
+            if noplaylist_prompt:
+                self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
+
+        # In browsers `getMainUrl` request is always issued. Usually this
+        # endpoint gives the same result as the data embedded in the webpage.
+        # If georestricted, there are no embedded data, so an extra request is
+        # necessary to get the error code
+        video_data = self._parse_json(self._search_regex(
+            r'uiHlsUrl\s*=\s*testBackendData\(([^;]+)\);',
+            webpage, 'video data', default='{}'), video_id)
+        if not video_data:
+            payload = {
+                'assetId': view_data['assetId'],
+                'watchDevices': vod_data['watchDevices'],
+                'contentType': view_data['contentType'],
+            }
+            video_data = self._download_json(
+                'https://www.litv.tv/vod/getMainUrl', video_id,
+                data=json.dumps(payload).encode('utf-8'),
+                headers={'Content-Type': 'application/json'})
+
+        if not video_data.get('fullpath'):
+            error_msg = video_data.get('errorMessage')
+            if error_msg == 'vod.error.outsideregionerror':
+                self.raise_geo_restricted('This video is available in Taiwan only')
+            if error_msg:
+                raise ExtractorError('%s said: %s' % (self.IE_NAME, error_msg), expected=True)
+            raise ExtractorError('Unexpected result from %s' % self.IE_NAME)
+
+        formats = self._extract_m3u8_formats(
+            video_data['fullpath'], video_id, ext='mp4', m3u8_id='hls')
+        for a_format in formats:
+            # LiTV HLS segments doesn't like compressions
+            a_format.setdefault('http_headers', {})['Youtubedl-no-compression'] = True
+
+        title = view_data['title'] + view_data.get('secondaryMark', '')
+        description = view_data.get('description')
+        thumbnail = view_data.get('imageFile')
+        categories = [item['name'] for item in vod_data.get('category', [])]
+        episode = int_or_none(view_data.get('episode'))
+
+        return {
+            'id': video_id,
+            'formats': formats,
+            'title': title,
+            'description': description,
+            'thumbnail': thumbnail,
+            'categories': categories,
+            'episode_number': episode,
+        }

From f23a92a0cecac0d4db60e086e429793556347271 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com>
Date: Sun, 8 May 2016 20:02:54 +0600
Subject: [PATCH 10/11] [mva] Add extractor (Closes #6667)

---
 youtube_dl/extractor/extractors.py            |   4 +
 .../extractor/microsoftvirtualacademy.py      | 192 ++++++++++++++++++
 2 files changed, 196 insertions(+)
 create mode 100644 youtube_dl/extractor/microsoftvirtualacademy.py

diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py
index 7bacef184..a0bb3d4c2 100644
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@@ -409,6 +409,10 @@ from .metacafe import MetacafeIE
 from .metacritic import MetacriticIE
 from .mgoon import MgoonIE
 from .mgtv import MGTVIE
+from .microsoftvirtualacademy import (
+    MicrosoftVirtualAcademyIE,
+    MicrosoftVirtualAcademyCourseIE,
+)
 from .minhateca import MinhatecaIE
 from .ministrygrid import MinistryGridIE
 from .minoto import MinotoIE
diff --git a/youtube_dl/extractor/microsoftvirtualacademy.py b/youtube_dl/extractor/microsoftvirtualacademy.py
new file mode 100644
index 000000000..b7fea47ee
--- /dev/null
+++ b/youtube_dl/extractor/microsoftvirtualacademy.py
@@ -0,0 +1,192 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import (
+    compat_xpath,
+)
+from ..utils import (
+    int_or_none,
+    parse_duration,
+    smuggle_url,
+    unsmuggle_url,
+    xpath_text,
+)
+
+
+class MicrosoftVirtualAcademyBaseIE(InfoExtractor):
+    def _extract_base_url(self, course_id, display_id):
+        return self._download_json(
+            'https://api-mlxprod.microsoft.com/services/products/anonymous/%s' % course_id,
+            display_id, 'Downloading course base URL')
+
+    def _extract_chapter_and_title(self, title):
+        if not title:
+            return None, None
+        m = re.search(r'(?P<chapter>\d+)\s*\|\s*(?P<title>.+)', title)
+        return (int(m.group('chapter')), m.group('title')) if m else (None, title)
+
+
+class MicrosoftVirtualAcademyIE(MicrosoftVirtualAcademyBaseIE):
+    IE_NAME = 'mva'
+    IE_DESC = 'Microsoft Virtual Academy videos'
+    _VALID_URL = r'(?:%s:|https?://(?:mva\.microsoft|microsoftvirtualacademy)\.com/[^/]+/training-courses/[^/?#&]+-)(?P<course_id>\d+)(?::|\?l=)(?P<id>[\da-zA-Z]+_\d+)' % IE_NAME
+
+    _TESTS = [{
+        'url': 'https://mva.microsoft.com/en-US/training-courses/microsoft-azure-fundamentals-virtual-machines-11788?l=gfVXISmEB_6804984382',
+        'md5': '7826c44fc31678b12ad8db11f6b5abb9',
+        'info_dict': {
+            'id': 'gfVXISmEB_6804984382',
+            'ext': 'mp4',
+            'title': 'Course Introduction',
+            'formats': 'mincount:3',
+            'subtitles': {
+                'en': [{
+                    'ext': 'ttml',
+                }],
+            },
+        }
+    }, {
+        'url': 'mva:11788:gfVXISmEB_6804984382',
+        'only_matching': True,
+    }]
+
+    def _real_extract(self, url):
+        url, smuggled_data = unsmuggle_url(url, {})
+
+        mobj = re.match(self._VALID_URL, url)
+        course_id = mobj.group('course_id')
+        video_id = mobj.group('id')
+
+        base_url = smuggled_data.get('base_url') or self._extract_base_url(course_id, video_id)
+
+        settings = self._download_xml(
+            '%s/content/content_%s/videosettings.xml?v=1' % (base_url, video_id),
+            video_id, 'Downloading video settings XML')
+
+        _, title = self._extract_chapter_and_title(xpath_text(
+            settings, './/Title', 'title', fatal=True))
+
+        formats = []
+
+        for sources in settings.findall(compat_xpath('.//MediaSources')):
+            if sources.get('videoType') == 'smoothstreaming':
+                continue
+            for source in sources.findall(compat_xpath('./MediaSource')):
+                video_url = source.text
+                if not video_url or not video_url.startswith('http'):
+                    continue
+                video_mode = source.get('videoMode')
+                height = int_or_none(self._search_regex(
+                    r'^(\d+)[pP]$', video_mode or '', 'height', default=None))
+                codec = source.get('codec')
+                acodec, vcodec = [None] * 2
+                if codec:
+                    codecs = codec.split(',')
+                    if len(codecs) == 2:
+                        acodec, vcodec = codecs
+                    elif len(codecs) == 1:
+                        vcodec = codecs[0]
+                formats.append({
+                    'url': video_url,
+                    'format_id': video_mode,
+                    'height': height,
+                    'acodec': acodec,
+                    'vcodec': vcodec,
+                })
+        self._sort_formats(formats)
+
+        subtitles = {}
+        for source in settings.findall(compat_xpath('.//MarkerResourceSource')):
+            subtitle_url = source.text
+            if not subtitle_url:
+                continue
+            subtitles.setdefault('en', []).append({
+                'url': '%s/%s' % (base_url, subtitle_url),
+                'ext': source.get('type'),
+            })
+
+        return {
+            'id': video_id,
+            'title': title,
+            'subtitles': subtitles,
+            'formats': formats
+        }
+
+
+class MicrosoftVirtualAcademyCourseIE(MicrosoftVirtualAcademyBaseIE):
+    IE_NAME = 'mva:course'
+    IE_DESC = 'Microsoft Virtual Academy courses'
+    _VALID_URL = r'(?:%s:|https?://(?:mva\.microsoft|microsoftvirtualacademy)\.com/[^/]+/training-courses/(?P<display_id>[^/?#&]+)-)(?P<id>\d+)' % IE_NAME
+
+    _TESTS = [{
+        'url': 'https://mva.microsoft.com/en-US/training-courses/microsoft-azure-fundamentals-virtual-machines-11788',
+        'info_dict': {
+            'id': '11788',
+            'title': 'Microsoft Azure Fundamentals: Virtual Machines',
+        },
+        'playlist_count': 36,
+    }, {
+        # with emphasized chapters
+        'url': 'https://mva.microsoft.com/en-US/training-courses/developing-windows-10-games-with-construct-2-16335',
+        'info_dict': {
+            'id': '16335',
+            'title': 'Developing Windows 10 Games with Construct 2',
+        },
+        'playlist_count': 10,
+    }, {
+        'url': 'https://www.microsoftvirtualacademy.com/en-US/training-courses/microsoft-azure-fundamentals-virtual-machines-11788',
+        'only_matching': True,
+    }, {
+        'url': 'mva:course:11788',
+        'only_matching': True,
+    }]
+
+    @classmethod
+    def suitable(cls, url):
+        return False if MicrosoftVirtualAcademyIE.suitable(url) else super(
+            MicrosoftVirtualAcademyCourseIE, cls).suitable(url)
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        course_id = mobj.group('id')
+        display_id = mobj.group('display_id')
+
+        base_url = self._extract_base_url(course_id, display_id)
+
+        manifest = self._download_json(
+            '%s/imsmanifestlite.json' % base_url,
+            display_id, 'Downloading course manifest JSON')['manifest']
+
+        organization = manifest['organizations']['organization'][0]
+
+        entries = []
+        for chapter in organization['item']:
+            chapter_number, chapter_title = self._extract_chapter_and_title(chapter.get('title'))
+            chapter_id = chapter.get('@identifier')
+            for item in chapter.get('item', []):
+                item_id = item.get('@identifier')
+                if not item_id:
+                    continue
+                metadata = item.get('resource', {}).get('metadata') or {}
+                if metadata.get('learningresourcetype') != 'Video':
+                    continue
+                _, title = self._extract_chapter_and_title(item.get('title'))
+                duration = parse_duration(metadata.get('duration'))
+                description = metadata.get('description')
+                entries.append({
+                    '_type': 'url_transparent',
+                    'url': smuggle_url(
+                        'mva:%s:%s' % (course_id, item_id), {'base_url': base_url}),
+                    'title': title,
+                    'description': description,
+                    'duration': duration,
+                    'chapter': chapter_title,
+                    'chapter_number': chapter_number,
+                    'chapter_id': chapter_id,
+                })
+
+        title = organization.get('title') or manifest.get('metadata', {}).get('title')
+
+        return self.playlist_result(entries, course_id, title)

From c52f4efaee2386a72c3f6b694fb4f4c3132ced55 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com>
Date: Sun, 8 May 2016 20:10:20 +0600
Subject: [PATCH 11/11] [mva] Improve _VALID_URLs

---
 youtube_dl/extractor/microsoftvirtualacademy.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/youtube_dl/extractor/microsoftvirtualacademy.py b/youtube_dl/extractor/microsoftvirtualacademy.py
index b7fea47ee..afd3e98ec 100644
--- a/youtube_dl/extractor/microsoftvirtualacademy.py
+++ b/youtube_dl/extractor/microsoftvirtualacademy.py
@@ -31,7 +31,7 @@ class MicrosoftVirtualAcademyBaseIE(InfoExtractor):
 class MicrosoftVirtualAcademyIE(MicrosoftVirtualAcademyBaseIE):
     IE_NAME = 'mva'
     IE_DESC = 'Microsoft Virtual Academy videos'
-    _VALID_URL = r'(?:%s:|https?://(?:mva\.microsoft|microsoftvirtualacademy)\.com/[^/]+/training-courses/[^/?#&]+-)(?P<course_id>\d+)(?::|\?l=)(?P<id>[\da-zA-Z]+_\d+)' % IE_NAME
+    _VALID_URL = r'(?:%s:|https?://(?:mva\.microsoft|(?:www\.)?microsoftvirtualacademy)\.com/[^/]+/training-courses/[^/?#&]+-)(?P<course_id>\d+)(?::|\?l=)(?P<id>[\da-zA-Z]+_\d+)' % IE_NAME
 
     _TESTS = [{
         'url': 'https://mva.microsoft.com/en-US/training-courses/microsoft-azure-fundamentals-virtual-machines-11788?l=gfVXISmEB_6804984382',
@@ -118,7 +118,7 @@ class MicrosoftVirtualAcademyIE(MicrosoftVirtualAcademyBaseIE):
 class MicrosoftVirtualAcademyCourseIE(MicrosoftVirtualAcademyBaseIE):
     IE_NAME = 'mva:course'
     IE_DESC = 'Microsoft Virtual Academy courses'
-    _VALID_URL = r'(?:%s:|https?://(?:mva\.microsoft|microsoftvirtualacademy)\.com/[^/]+/training-courses/(?P<display_id>[^/?#&]+)-)(?P<id>\d+)' % IE_NAME
+    _VALID_URL = r'(?:%s:|https?://(?:mva\.microsoft|(?:www\.)?microsoftvirtualacademy)\.com/[^/]+/training-courses/(?P<display_id>[^/?#&]+)-)(?P<id>\d+)' % IE_NAME
 
     _TESTS = [{
         'url': 'https://mva.microsoft.com/en-US/training-courses/microsoft-azure-fundamentals-virtual-machines-11788',