[camdemy] Add support for folders
This commit is contained in:
		
							parent
							
								
									8367d3f3cb
								
							
						
					
					
						commit
						c40feaba77
					
				| @ -49,7 +49,10 @@ from .brightcove import BrightcoveIE | |||||||
| from .buzzfeed import BuzzFeedIE | from .buzzfeed import BuzzFeedIE | ||||||
| from .byutv import BYUtvIE | from .byutv import BYUtvIE | ||||||
| from .c56 import C56IE | from .c56 import C56IE | ||||||
| from .camdemy import CamdemyIE | from .camdemy import ( | ||||||
|  |     CamdemyIE, | ||||||
|  |     CamdemyFolderIE | ||||||
|  | ) | ||||||
| from .canal13cl import Canal13clIE | from .canal13cl import Canal13clIE | ||||||
| from .canalplus import CanalplusIE | from .canalplus import CanalplusIE | ||||||
| from .canalc2 import Canalc2IE | from .canalc2 import Canalc2IE | ||||||
|  | |||||||
| @ -4,12 +4,12 @@ from __future__ import unicode_literals | |||||||
| import re | import re | ||||||
| 
 | 
 | ||||||
| from .common import InfoExtractor | from .common import InfoExtractor | ||||||
| from ..compat import compat_urlparse | from ..compat import compat_urllib_parse | ||||||
| from ..utils import parse_iso8601 | from ..utils import parse_iso8601 | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| class CamdemyIE(InfoExtractor): | class CamdemyIE(InfoExtractor): | ||||||
|     _VALID_URL = r'http://www.camdemy.com/media/(?P<id>\d+).*' |     _VALID_URL = r'http://www.camdemy.com/media/(?P<id>\d+)' | ||||||
|     _TESTS = [{ |     _TESTS = [{ | ||||||
|         # single file |         # single file | ||||||
|         'url': 'http://www.camdemy.com/media/5181/', |         'url': 'http://www.camdemy.com/media/5181/', | ||||||
| @ -69,25 +69,25 @@ class CamdemyIE(InfoExtractor): | |||||||
|             'http://www.camdemy.com/oembed/?format=json&url=' + url, video_id) |             'http://www.camdemy.com/oembed/?format=json&url=' + url, video_id) | ||||||
| 
 | 
 | ||||||
|         thumb_url = oembed_obj['thumbnail_url'] |         thumb_url = oembed_obj['thumbnail_url'] | ||||||
|         video_folder = compat_urlparse.urljoin(thumb_url, 'video/') |         video_folder = compat_urllib_parse.urljoin(thumb_url, 'video/') | ||||||
|         fileListXML = self._download_xml( |         fileListXML = self._download_xml( | ||||||
|             compat_urlparse.urljoin(video_folder, 'fileList.xml'), |             compat_urllib_parse.urljoin(video_folder, 'fileList.xml'), | ||||||
|             video_id, 'Filelist XML') |             video_id, 'Filelist XML') | ||||||
|         fileName = fileListXML.find('./video/item/fileName').text |         fileName = fileListXML.find('./video/item/fileName').text | ||||||
| 
 | 
 | ||||||
|         creation_time = self._html_search_regex( |         creation_time = self._html_search_regex( | ||||||
|             r"<div class='title'>Posted :</div>.*<div class='value'>([0-9:\- ]+)<", |             r"<div class='title'>Posted :</div>[\r\n ]*<div class='value'>([^<>]+)<", | ||||||
|             page, 'creation time', flags=re.MULTILINE | re.DOTALL) + '+08:00' |             page, 'creation time', flags=re.MULTILINE) + '+08:00' | ||||||
|         creation_timestamp = parse_iso8601(creation_time, delimiter=' ') |         creation_timestamp = parse_iso8601(creation_time, delimiter=' ') | ||||||
| 
 | 
 | ||||||
|         view_count_str = self._html_search_regex( |         view_count_str = self._html_search_regex( | ||||||
|             r"<div class='title'>Views :</div>.*<div class='value'>([0-9,]+)<", |             r"<div class='title'>Views :</div>[\r\n ]*<div class='value'>([^<>]+)<", | ||||||
|             page, 'view count', flags=re.MULTILINE | re.DOTALL) |             page, 'view count', flags=re.MULTILINE) | ||||||
|         views = int(view_count_str.replace(',', '')) |         views = int(view_count_str.replace(',', '')) | ||||||
| 
 | 
 | ||||||
|         return { |         return { | ||||||
|             'id': video_id, |             'id': video_id, | ||||||
|             'url': compat_urlparse.urljoin(video_folder, fileName), |             'url': compat_urllib_parse.urljoin(video_folder, fileName), | ||||||
|             'title': oembed_obj['title'], |             'title': oembed_obj['title'], | ||||||
|             'thumbnail': thumb_url, |             'thumbnail': thumb_url, | ||||||
|             'description': self._html_search_meta('description', page), |             'description': self._html_search_meta('description', page), | ||||||
| @ -96,3 +96,53 @@ class CamdemyIE(InfoExtractor): | |||||||
|             'timestamp': creation_timestamp, |             'timestamp': creation_timestamp, | ||||||
|             'view_count': views, |             'view_count': views, | ||||||
|         } |         } | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | class CamdemyFolderIE(InfoExtractor): | ||||||
|  |     _VALID_URL = r'http://www.camdemy.com/folder/(?P<id>\d+)' | ||||||
|  |     _TESTS = [{ | ||||||
|  |         # links with trailing slash | ||||||
|  |         'url': 'http://www.camdemy.com/folder/450', | ||||||
|  |         'info_dict': { | ||||||
|  |             'id': '450', | ||||||
|  |             'title': '信號與系統 2012 & 2011 (Signals and Systems)', | ||||||
|  |         }, | ||||||
|  |         'playlist_mincount': 145 | ||||||
|  |     }, { | ||||||
|  |         # links without trailing slash | ||||||
|  |         # and multi-page | ||||||
|  |         'url': 'http://www.camdemy.com/folder/853', | ||||||
|  |         'info_dict': { | ||||||
|  |             'id': '853', | ||||||
|  |             'title': '科學計算 - 使用 Matlab' | ||||||
|  |         }, | ||||||
|  |         'playlist_mincount': 20 | ||||||
|  |     }, { | ||||||
|  |         # with displayMode parameter. For testing the codes to add parameters | ||||||
|  |         'url': 'http://www.camdemy.com/folder/853/?displayMode=defaultOrderByOrg', | ||||||
|  |         'info_dict': { | ||||||
|  |             'id': '853', | ||||||
|  |             'title': '科學計算 - 使用 Matlab' | ||||||
|  |         }, | ||||||
|  |         'playlist_mincount': 20 | ||||||
|  |     }] | ||||||
|  | 
 | ||||||
|  |     def _real_extract(self, url): | ||||||
|  |         folder_id = self._match_id(url) | ||||||
|  | 
 | ||||||
|  |         # Add displayMode=list so that all links are displayed in a single page | ||||||
|  |         parsed_url = list(compat_urllib_parse.urlparse(url)) | ||||||
|  |         query = dict(compat_urllib_parse.parse_qsl(parsed_url[4])) | ||||||
|  |         query.update({'displayMode': 'list'}) | ||||||
|  |         parsed_url[4] = compat_urllib_parse.urlencode(query) | ||||||
|  |         final_url = compat_urllib_parse.urlunparse(parsed_url) | ||||||
|  | 
 | ||||||
|  |         page = self._download_webpage(final_url, folder_id) | ||||||
|  |         matches = re.findall(r"href='(/media/\d+/?)'", page) | ||||||
|  | 
 | ||||||
|  |         entries = [self.url_result('http://www.camdemy.com' + media_path) | ||||||
|  |                    for media_path in matches] | ||||||
|  | 
 | ||||||
|  |         folder_title = self._html_search_meta('keywords', page) | ||||||
|  | 
 | ||||||
|  |         return self.playlist_result(entries, folder_id, folder_title) | ||||||
|  | |||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user