From 3e70b4f036e8e2eb782a9430855af012c388b35b Mon Sep 17 00:00:00 2001
From: Olivier Berger <olivier.berger@telecom-sudparis.eu>
Date: Fri, 1 May 2020 19:09:32 +0200
Subject: [PATCH 1/4] Initial tests

---
 youtube_dl/extractor/bbb.py | 72 +++++++++++++++++++++++++++++++++++++
 1 file changed, 72 insertions(+)
 create mode 100644 youtube_dl/extractor/bbb.py
diff --git a/youtube_dl/extractor/bbb.py b/youtube_dl/extractor/bbb.py
new file mode 100644
index 000000000..a3e58fb47
--- /dev/null
+++ b/youtube_dl/extractor/bbb.py
@@ -0,0 +1,72 @@
+# coding: utf-8
+
+# Extract material from recordings made inside BigBlueButton
+
+# BigBlueButton records multiple videos :
+#  - speaker speech & webcam
+#  - screesharing
+# for slides, annotations, etc. the playback app typically renders them on the fly upon playback
+# so it may not be easy to capture that with youtube-dl
+
+
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+
+from .openload import PhantomJSwrapper
+
+# TODO : thumbnails
+
+class BigBlueButtonIE(InfoExtractor):
+    _VALID_URL = r'(?P<website>https?://[^/]+)/playback/presentation/2.0/playback.html\?meetingId=(?P<id>[0-9a-f\-]+)'
+    _TEST = {
+        'url': 'https://mybbb.example.com/playback/presentation/2.0/playback.html?meetingId=12345679a50a715e8d6dc692df996dceb8d788f8-1234566973639',
+        'md5': 'TODO: md5 sum of the first 10241 bytes of the video file (use --test)',
+        'info_dict': {
+            'id': '42',
+            'ext': 'mp4',
+            'title': 'Video title goes here',
+            'thumbnail': r're:^https?://.*\.jpg$',
+            # TODO more properties, either as:
+            # * A value
+            # * MD5 checksum; start the string with md5:
+            # * A regular expression; start the string with re:
+            # * Any Python type (for example int or float)
+        }
+    }
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        m = self._VALID_URL_RE.match(url)
+        website = m.group('website')
+        #print(video_id)
+        print(website)
+
+        webpage = self._download_webpage(url, video_id)
+
+        # print(webpagejs)
+
+        # TODO more code goes here, for example ...
+        #title = self._html_search_regex(r'<h1>(.+?)</h1>', webpage, 'title')
+        title = video_id
+
+        formats = []
+
+        sources = { 'speaker': '/video/webcams.webm', 'slides': '/deskshare/deskshare.webm' }
+        for format_id, source in sources.items():
+            video_url = website + '/presentation/' + video_id + source
+            formats.append({
+                'url': video_url,
+                'format_id': format_id
+            })
+        self._sort_formats(formats)
+
+        return {
+            'id': video_id,
+            'title': title,
+            'formats': formats,
+ #           'description': self._og_search_description(webpage),
+#            'uploader': self._search_regex(r'<div[^>]+id="uploader"[^>]*>([^<]+)<', webpage, 'uploader', fatal=False),
+            # TODO more properties (see youtube_dl/extractor/common.py)
+        }
+    

From a7756ff5976533bdcc19dd5d61ad26c7532d7eb4 Mon Sep 17 00:00:00 2001
From: Olivier Berger <olivier.berger@telecom-sudparis.eu>
Date: Fri, 1 May 2020 22:07:23 +0200
Subject: [PATCH 2/4] Mostly ready for contribution

---
 youtube_dl/extractor/bbb.py        | 56 +++++++++++++++++++++++-------
 youtube_dl/extractor/extractors.py |  1 +
 2 files changed, 45 insertions(+), 12 deletions(-)

diff --git a/youtube_dl/extractor/bbb.py b/youtube_dl/extractor/bbb.py
index a3e58fb47..8186ee741 100644
--- a/youtube_dl/extractor/bbb.py
+++ b/youtube_dl/extractor/bbb.py
@@ -1,5 +1,7 @@
 # coding: utf-8
 
+# Contributed by Olivier Berger <olivier.berger@telecom-sudparis.eu>
+
 # Extract material from recordings made inside BigBlueButton
 
 # BigBlueButton records multiple videos :
@@ -8,14 +10,21 @@
 # for slides, annotations, etc. the playback app typically renders them on the fly upon playback
 # so it may not be easy to capture that with youtube-dl
 
+# Extract a merged video, without the slides with
+# youtube-dl --merge-output-format mkv -f slides+speaker "https://mybbb.example.com/playback/presentation/2.0/playback.html?meetingId=12345679a50a715e8d6dc692df996dceb8d788f8-1234566973639"
 
 from __future__ import unicode_literals
 
 from .common import InfoExtractor
 
-from .openload import PhantomJSwrapper
+from ..utils import (
+    unified_timestamp,
+    xpath_text,
+    xpath_with_ns,
+)
 
-# TODO : thumbnails
+_s = lambda p: xpath_with_ns(p, {'svg': 'http://www.w3.org/2000/svg'})
+_x = lambda p: xpath_with_ns(p, {'xlink': 'http://www.w3.org/1999/xlink'})
 
 class BigBlueButtonIE(InfoExtractor):
     _VALID_URL = r'(?P<website>https?://[^/]+)/playback/presentation/2.0/playback.html\?meetingId=(?P<id>[0-9a-f\-]+)'
@@ -39,17 +48,42 @@ class BigBlueButtonIE(InfoExtractor):
         video_id = self._match_id(url)
         m = self._VALID_URL_RE.match(url)
         website = m.group('website')
-        #print(video_id)
-        print(website)
 
         webpage = self._download_webpage(url, video_id)
 
-        # print(webpagejs)
+        # Extract basic metadata (more available in metadata.xml)
+        metadata_url = website + '/presentation/' + video_id + '/metadata.xml'
+        metadata = self._download_xml(metadata_url, video_id)
 
-        # TODO more code goes here, for example ...
-        #title = self._html_search_regex(r'<h1>(.+?)</h1>', webpage, 'title')
-        title = video_id
+        id = xpath_text(metadata, 'id')
+        meta = metadata.find('./meta')
+        meeting_name = xpath_text(meta, 'meetingName')
+        start_time = xpath_text(metadata, 'start_time')
 
+        title = meeting_name
+
+        # This code unused : have to grasp what to do with thumbnails
+        thumbnails = []
+        images = metadata.find('./playback/extensions/preview/images')
+        for image in images:
+            thumbnails += {
+                'url': image.text.strip(),
+                'width': image.get('width'),
+                'height': image.get('height')
+                }
+
+        # This code mostly useless unless one know how to process slides
+        shapes_url = website + '/presentation/' + video_id + '/shapes.svg'
+        shapes = self._download_xml(shapes_url, video_id)
+        images = shapes.findall(_s("./svg:image[@class='slide']"))
+        slides = []
+        for image in images:
+            slides.append(image.get(_x('xlink:href')))
+
+        # We produce 2 formats :
+        # - the 'webcams.webm' one, for speaker (can be used for merging its audio)
+        # - the 'deskshare.webm' one, for screen sharing (can be used
+        #   for merging its video) - it lacks the slides unfortunately
         formats = []
 
         sources = { 'speaker': '/video/webcams.webm', 'slides': '/deskshare/deskshare.webm' }
@@ -65,8 +99,6 @@ class BigBlueButtonIE(InfoExtractor):
             'id': video_id,
             'title': title,
             'formats': formats,
- #           'description': self._og_search_description(webpage),
-#            'uploader': self._search_regex(r'<div[^>]+id="uploader"[^>]*>([^<]+)<', webpage, 'uploader', fatal=False),
-            # TODO more properties (see youtube_dl/extractor/common.py)
+            'timestamp': int(start_time),
+#            'thumbnails': thumbnails
         }
-    
diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py
index e407ab3d9..9b568bb61 100644
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@@ -81,6 +81,7 @@ from .awaan import (
 from .azmedien import AZMedienIE
 from .baidu import BaiduVideoIE
 from .bandcamp import BandcampIE, BandcampAlbumIE, BandcampWeeklyIE
+from .bbb import BigBlueButtonIE
 from .bbc import (
     BBCCoUkIE,
     BBCCoUkArticleIE,

From 7f84a8f13c7055702efd1250883f8839c758db11 Mon Sep 17 00:00:00 2001
From: Olivier Berger <olivier.berger@telecom-sudparis.eu>
Date: Fri, 1 May 2020 22:40:10 +0200
Subject: [PATCH 3/4] Fixed format names and added tests

---
 youtube_dl/extractor/bbb.py | 64 +++++++++++++++++++++++--------------
 1 file changed, 40 insertions(+), 24 deletions(-)

diff --git a/youtube_dl/extractor/bbb.py b/youtube_dl/extractor/bbb.py
index 8186ee741..c124add71 100644
--- a/youtube_dl/extractor/bbb.py
+++ b/youtube_dl/extractor/bbb.py
@@ -5,13 +5,15 @@
 # Extract material from recordings made inside BigBlueButton
 
 # BigBlueButton records multiple videos :
-#  - speaker speech & webcam
-#  - screesharing
-# for slides, annotations, etc. the playback app typically renders them on the fly upon playback
-# so it may not be easy to capture that with youtube-dl
+#  - webcams feed : sound & webcam views : useful for extracting sound
+#  - deskshare captures : screensharing, but not the slides
 
-# Extract a merged video, without the slides with
-# youtube-dl --merge-output-format mkv -f slides+speaker "https://mybbb.example.com/playback/presentation/2.0/playback.html?meetingId=12345679a50a715e8d6dc692df996dceb8d788f8-1234566973639"
+# For slides, annotations, polls and other stuff displayed to the
+# audience the playback app typically renders them on the fly upon
+# playback (SVG) so it may not be easy to capture that with youtube-dl
+
+# To extract a merged video, which will miss the slides and webcam views, proceed with :
+# youtube-dl --merge-output-format mkv -f deskshare+webcams "https://mybbb.example.com/playback/presentation/2.0/playback.html?meetingId=12345679a50a715e8d6dc692df996dceb8d788f8-1234566973639"
 
 from __future__ import unicode_literals
 
@@ -28,21 +30,35 @@ _x = lambda p: xpath_with_ns(p, {'xlink': 'http://www.w3.org/1999/xlink'})
 
 class BigBlueButtonIE(InfoExtractor):
     _VALID_URL = r'(?P<website>https?://[^/]+)/playback/presentation/2.0/playback.html\?meetingId=(?P<id>[0-9a-f\-]+)'
-    _TEST = {
-        'url': 'https://mybbb.example.com/playback/presentation/2.0/playback.html?meetingId=12345679a50a715e8d6dc692df996dceb8d788f8-1234566973639',
-        'md5': 'TODO: md5 sum of the first 10241 bytes of the video file (use --test)',
-        'info_dict': {
-            'id': '42',
-            'ext': 'mp4',
-            'title': 'Video title goes here',
-            'thumbnail': r're:^https?://.*\.jpg$',
-            # TODO more properties, either as:
-            # * A value
-            # * MD5 checksum; start the string with md5:
-            # * A regular expression; start the string with re:
-            # * Any Python type (for example int or float)
-        }
-    }
+    _TESTS = [
+        {
+            'url': 'https://webconf.imtbs-tsp.eu/playback/presentation/2.0/playback.html?meetingId=522d1d51bee82a57b535ced7091addeecb074d47-1588254659509',
+            'md5': 'dc98924b35c2234a8c7b3a61b30d968e',
+            'info_dict': {
+                'id': '522d1d51bee82a57b535ced7091addeecb074d47-1588254659509',
+                'ext': 'webm',
+                'title': 'PRO 3600',
+                'timestamp': 1588254659509,
+                'format': 'webcams - unknown'
+            },
+            'params': {
+                'format': 'webcams',
+            }
+        },
+        {
+            'url': 'https://webconf.imtbs-tsp.eu/playback/presentation/2.0/playback.html?meetingId=522d1d51bee82a57b535ced7091addeecb074d47-1588254659509',
+            'md5': '99c9191dbe03dd5eab34ba02352f1742',
+            'info_dict': {
+                'id': '522d1d51bee82a57b535ced7091addeecb074d47-1588254659509',
+                'ext': 'webm',
+                'title': 'PRO 3600',
+                'timestamp': 1588254659509,
+                'format': 'deskshare - unknown'
+            },
+            'params': {
+                'format': 'deskshare',
+            }
+        }]
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
@@ -81,12 +97,12 @@ class BigBlueButtonIE(InfoExtractor):
             slides.append(image.get(_x('xlink:href')))
 
         # We produce 2 formats :
-        # - the 'webcams.webm' one, for speaker (can be used for merging its audio)
+        # - the 'webcams.webm' one, for webcams (can be used for merging its audio)
         # - the 'deskshare.webm' one, for screen sharing (can be used
-        #   for merging its video) - it lacks the slides unfortunately
+        #   for merging its video) - it lacks the slides, unfortunately
         formats = []
 
-        sources = { 'speaker': '/video/webcams.webm', 'slides': '/deskshare/deskshare.webm' }
+        sources = { 'webcams': '/video/webcams.webm', 'deskshare': '/deskshare/deskshare.webm' }
         for format_id, source in sources.items():
             video_url = website + '/presentation/' + video_id + source
             formats.append({

From 3ceed8f6351b0b89aae511fdde828f7f56d9657e Mon Sep 17 00:00:00 2001
From: Olivier Berger <olivier.berger@telecom-sudparis.eu>
Date: Fri, 1 May 2020 22:57:30 +0200
Subject: [PATCH 4/4] Flake 8 happy

---
 youtube_dl/extractor/bbb.py | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/youtube_dl/extractor/bbb.py b/youtube_dl/extractor/bbb.py
index c124add71..3e918c9be 100644
--- a/youtube_dl/extractor/bbb.py
+++ b/youtube_dl/extractor/bbb.py
@@ -20,7 +20,6 @@ from __future__ import unicode_literals
 from .common import InfoExtractor
 
 from ..utils import (
-    unified_timestamp,
     xpath_text,
     xpath_with_ns,
 )
@@ -28,6 +27,7 @@ from ..utils import (
 _s = lambda p: xpath_with_ns(p, {'svg': 'http://www.w3.org/2000/svg'})
 _x = lambda p: xpath_with_ns(p, {'xlink': 'http://www.w3.org/1999/xlink'})
 
+
 class BigBlueButtonIE(InfoExtractor):
     _VALID_URL = r'(?P<website>https?://[^/]+)/playback/presentation/2.0/playback.html\?meetingId=(?P<id>[0-9a-f\-]+)'
     _TESTS = [
@@ -65,7 +65,8 @@ class BigBlueButtonIE(InfoExtractor):
         m = self._VALID_URL_RE.match(url)
         website = m.group('website')
 
-        webpage = self._download_webpage(url, video_id)
+        # We don't parse anything, but make sure it exists
+        self._download_webpage(url, video_id)
 
         # Extract basic metadata (more available in metadata.xml)
         metadata_url = website + '/presentation/' + video_id + '/metadata.xml'
@@ -86,7 +87,7 @@ class BigBlueButtonIE(InfoExtractor):
                 'url': image.text.strip(),
                 'width': image.get('width'),
                 'height': image.get('height')
-                }
+            }
 
         # This code mostly useless unless one know how to process slides
         shapes_url = website + '/presentation/' + video_id + '/shapes.svg'
@@ -102,7 +103,10 @@ class BigBlueButtonIE(InfoExtractor):
         #   for merging its video) - it lacks the slides, unfortunately
         formats = []
 
-        sources = { 'webcams': '/video/webcams.webm', 'deskshare': '/deskshare/deskshare.webm' }
+        sources = {
+            'webcams': '/video/webcams.webm',
+            'deskshare': '/deskshare/deskshare.webm'
+        }
         for format_id, source in sources.items():
             video_url = website + '/presentation/' + video_id + source
             formats.append({
@@ -112,9 +116,8 @@ class BigBlueButtonIE(InfoExtractor):
         self._sort_formats(formats)
 
         return {
-            'id': video_id,
+            'id': id,
             'title': title,
             'formats': formats,
             'timestamp': int(start_time),
-#            'thumbnails': thumbnails
         }