From 03999993af576a89aaed3f6d67d71206020669c7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?D=C3=A9stin=20Reed?= Date: Sat, 1 Oct 2016 13:54:35 +0200 Subject: [PATCH] [techtalks] Use get_element_by_class and rely on _match_id --- youtube_dl/extractor/techtalks.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/techtalks.py b/youtube_dl/extractor/techtalks.py index 16e945d8e..d9e0a5c71 100644 --- a/youtube_dl/extractor/techtalks.py +++ b/youtube_dl/extractor/techtalks.py @@ -4,14 +4,13 @@ import re from .common import InfoExtractor from ..utils import ( - get_element_by_attribute, + get_element_by_class, clean_html, ) class TechTalksIE(InfoExtractor): _VALID_URL = r'https?://techtalks\.tv/talks/[^/]*/(?P\d+)/' - _TEST = { 'url': 'http://techtalks.tv/talks/learning-topic-models-going-beyond-svd/57758/', 'info_dict': { @@ -41,15 +40,14 @@ class TechTalksIE(InfoExtractor): } def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - talk_id = mobj.group('id') + talk_id = self._match_id(url) webpage = self._download_webpage(url, talk_id) rtmp_url = self._search_regex( r'netConnectionUrl: \'(.*?)\'', webpage, 'rtmp url') play_path = self._search_regex( r'href=\'(.*?)\' [^>]*id="flowplayer_presenter"', webpage, 'presenter play path') - title = clean_html(get_element_by_attribute('class', 'title', webpage)) + title = clean_html(get_element_by_class('title', webpage)) video_info = { 'id': talk_id, 'title': title,