2014-01-07 10:04:48 +01:00
from __future__ import unicode_literals
2014-11-26 12:35:57 +01:00
2013-12-16 22:18:27 +01:00
import re
from . common import InfoExtractor
class AcademicEarthCourseIE ( InfoExtractor ) :
2014-02-24 14:18:12 +01:00
_VALID_URL = r ' ^https?://(?:www \ .)?academicearth \ .org/playlists/(?P<id>[^?#/]+) '
2014-01-07 10:04:48 +01:00
IE_NAME = ' AcademicEarth:Course '
2019-12-09 02:09:23 -05:00
_TESTS = [ {
2014-08-28 00:58:24 +02:00
' url ' : ' http://academicearth.org/playlists/laws-of-nature/ ' ,
' info_dict ' : {
' id ' : ' laws-of-nature ' ,
' title ' : ' Laws of Nature ' ,
' description ' : ' Introduce yourself to the laws of nature with these free online college lectures from Yale, Harvard, and MIT. ' ,
} ,
2015-09-06 04:52:27 +01:00
' playlist_count ' : 3 ,
2019-12-09 02:09:23 -05:00
} , {
' url ' : " https://academicearth.org/playlists/first-day-of-freshman-year/ " ,
' info_dict ' : {
' id ' : ' first-day-of-freshman-year ' ,
' title ' : ' FIRST DAY OF FRESHMAN YEAR ' ,
' description ' : ' Relive the first day of your freshman year with a series of first lectures from introductory college courses at MIT, Yale, and Stanford. '
} ,
' playlist_count ' : 3 ,
} , {
' url ' : ' https://academicearth.org/playlists/financial-crisis ' ,
' info_dict ' : {
' id ' : ' financial-crisis ' ,
' title ' : ' UNDERSTANDING THE FINANCIAL CRISIS ' ,
' description ' : ' Expert perspectives on the Financial Crisis and how to manage it. '
} ,
' playlist_count ' : 7 ,
} ]
2013-12-16 22:18:27 +01:00
def _real_extract ( self , url ) :
2014-11-26 12:35:57 +01:00
playlist_id = self . _match_id ( url )
2013-12-16 22:18:27 +01:00
webpage = self . _download_webpage ( url , playlist_id )
title = self . _html_search_regex (
2014-11-26 12:35:57 +01:00
r ' <h1 class= " playlist-name " [^>]*?>(.*?)</h1> ' , webpage , ' title ' )
2013-12-16 22:18:27 +01:00
description = self . _html_search_regex (
2014-02-24 14:18:12 +01:00
r ' <p class= " excerpt " [^>]*?>(.*?)</p> ' ,
2014-11-26 12:35:57 +01:00
webpage , ' description ' , fatal = False )
2013-12-16 22:18:27 +01:00
urls = re . findall (
2014-02-24 14:18:12 +01:00
r ' <li class= " lecture-preview " > \ s*?<a target= " _blank " href= " ([^ " ]+) " > ' ,
2013-12-16 22:18:27 +01:00
webpage )
entries = [ self . url_result ( u ) for u in urls ]
return {
' _type ' : ' playlist ' ,
' id ' : playlist_id ,
' title ' : title ,
' description ' : description ,
' entries ' : entries ,
}