2016-10-03 15:27:09 +01:00
# coding: utf-8
from __future__ import unicode_literals
2020-01-16 01:03:19 -05:00
import re
2016-10-03 15:27:09 +01:00
from . common import InfoExtractor
2017-07-02 20:04:08 +07:00
from . . compat import compat_str
2020-01-16 01:03:19 -05:00
from . . utils import (
try_get ,
int_or_none
)
2016-10-03 15:27:09 +01:00
class ThisOldHouseIE ( InfoExtractor ) :
2016-12-01 14:56:52 +08:00
_VALID_URL = r ' https?://(?:www \ .)?thisoldhouse \ .com/(?:watch|how-to|tv-episode)/(?P<id>[^/?#]+) '
2016-10-03 15:27:09 +01:00
_TESTS = [ {
' url ' : ' https://www.thisoldhouse.com/how-to/how-to-build-storage-bench ' ,
2017-07-01 21:11:58 -05:00
' md5 ' : ' 568acf9ca25a639f0c4ff905826b662f ' ,
2016-10-03 15:27:09 +01:00
' info_dict ' : {
' id ' : ' 2REGtUDQ ' ,
' ext ' : ' mp4 ' ,
' title ' : ' How to Build a Storage Bench ' ,
' description ' : ' In the workshop, Tom Silva and Kevin O \' Connor build a storage bench for an entryway. ' ,
' timestamp ' : 1442548800 ,
' upload_date ' : ' 20150918 ' ,
}
2020-01-16 01:03:19 -05:00
} , {
' url ' : ' https://www.thisoldhouse.com/watch/taking-modern-back-to-future-brookline-mid-century-modern-house ' ,
' md5 ' : ' 5bff4b17e959527066efba9371bb81ba ' ,
' info_dict ' : {
' id ' : ' 8WrwQuEr ' ,
' ext ' : ' mp4 ' ,
' title ' : ' Taking Modern Back to the Future | Brookline Mid-Century Modern House ' ,
' description ' : ' After months of hard work, the lackluster mid-century box is a modern marvel once again. Kevin, Tommy and Richard tour the home and review all the special features that went into this beautiful space Sunil and Neha can now call home. ' ,
' upload_date ' : ' 20190624 ' ,
' timestamp ' : 1561397187 ,
' season_number ' : 40 ,
' episode_number ' : 26
} ,
2016-10-03 15:27:09 +01:00
} , {
' url ' : ' https://www.thisoldhouse.com/watch/arlington-arts-crafts-arts-and-crafts-class-begins ' ,
' only_matching ' : True ,
2016-12-01 14:56:52 +08:00
} , {
' url ' : ' https://www.thisoldhouse.com/tv-episode/ask-toh-shelf-rough-electric ' ,
' only_matching ' : True ,
2016-10-03 15:27:09 +01:00
} ]
def _real_extract ( self , url ) :
display_id = self . _match_id ( url )
webpage = self . _download_webpage ( url , display_id )
2017-07-02 20:04:08 +07:00
video_id = self . _search_regex (
( r ' data-mid=([ " \' ])(?P<id>(?:(?! \ 1).)+) \ 1 ' ,
r ' id=([ " \' ])inline-video-player-(?P<id>(?:(?! \ 1).)+) \ 1 ' ) ,
webpage , ' video id ' , default = None , group = ' id ' )
if not video_id :
drupal_settings = self . _parse_json ( self . _search_regex (
r ' jQuery \ .extend \ (Drupal \ .settings \ s*, \ s*( { .+?}) \ ); ' ,
webpage , ' drupal settings ' ) , display_id )
video_id = try_get (
drupal_settings , lambda x : x [ ' jwplatform ' ] [ ' video_id ' ] ,
compat_str ) or list ( drupal_settings [ ' comScore ' ] ) [ 0 ]
2020-01-16 01:03:19 -05:00
series = self . _search_regex (
r ' (?s)episode-breadcrumb.*?>.*?>(.*?)</a> ' , webpage ,
' series name ' , default = None )
season_number = int_or_none ( self . _search_regex (
r ' Season ( \ d+); ' , webpage , ' season number ' ,
default = None ) )
episode_number = int_or_none ( self . _search_regex (
r ' Season \ d+;[ \ s \ S]*Ep \ .( \ d+) ' , webpage , ' episode number ' ,
default = None ) )
if series :
series = series . replace ( ' TV ' , ' ' )
return {
' _type ' : ' url_transparent ' ,
' id ' : video_id ,
' series ' : series ,
' season_number ' : season_number ,
' episode_number ' : episode_number ,
' url ' : ' jwplatform: ' + video_id ,
' ie_key ' : ' JWPlatform ' ,
}