Merge branch 'pr-democracynow' of https://github.com/atomicdryad/youtube-dl into atomicdryad-pr-democracynow
This commit is contained in:
		
						commit
						33a513faf7
					
				| @ -124,6 +124,7 @@ from .dbtv import DBTVIE | |||||||
| from .dcn import DCNIE | from .dcn import DCNIE | ||||||
| from .dctp import DctpTvIE | from .dctp import DctpTvIE | ||||||
| from .deezer import DeezerPlaylistIE | from .deezer import DeezerPlaylistIE | ||||||
|  | from .democracynow import DemocracynowIE | ||||||
| from .dfb import DFBIE | from .dfb import DFBIE | ||||||
| from .dhm import DHMIE | from .dhm import DHMIE | ||||||
| from .dotsub import DotsubIE | from .dotsub import DotsubIE | ||||||
|  | |||||||
							
								
								
									
										85
									
								
								youtube_dl/extractor/democracynow.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										85
									
								
								youtube_dl/extractor/democracynow.py
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,85 @@ | |||||||
|  | # coding: utf-8 | ||||||
|  | from __future__ import unicode_literals | ||||||
|  | 
 | ||||||
|  | import re | ||||||
|  | from .common import InfoExtractor | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | class DemocracynowIE(InfoExtractor): | ||||||
|  |     _VALID_URL = r'https?://(?:www\.)?democracynow.org/?(?P<id>[^\?]*)' | ||||||
|  |     IE_NAME = 'democracynow' | ||||||
|  |     _TESTS = [{ | ||||||
|  |         'url': 'http://www.democracynow.org/shows/2015/7/3', | ||||||
|  |         'info_dict': { | ||||||
|  |             'id': '2015-0703-001', | ||||||
|  |             'ext': 'mp4', | ||||||
|  |             'title': 'July 03, 2015 - Democracy Now!', | ||||||
|  |             'description': 'A daily independent global news hour with Amy Goodman & Juan Gonz\xe1lez "What to the Slave is 4th of July?": James Earl Jones Reads Frederick Douglass\u2019 Historic Speech : "This Flag Comes Down Today": Bree Newsome Scales SC Capitol Flagpole, Takes Down Confederate Flag : "We Shall Overcome": Remembering Folk Icon, Activist Pete Seeger in His Own Words & Songs', | ||||||
|  |             'uploader': 'Democracy Now', | ||||||
|  |             'upload_date': None, | ||||||
|  |         }, | ||||||
|  |     }, { | ||||||
|  |         'url': 'http://www.democracynow.org/2015/7/3/this_flag_comes_down_today_bree', | ||||||
|  |         'info_dict': { | ||||||
|  |             'id': '2015-0703-001', | ||||||
|  |             'ext': 'mp4', | ||||||
|  |             'title': '"This Flag Comes Down Today": Bree Newsome Scales SC Capitol Flagpole, Takes Down Confederate Flag', | ||||||
|  |             'description': 'md5:4d2bc4f0d29f5553c2210a4bc7761a21', | ||||||
|  |             'uploader': 'Democracy Now', | ||||||
|  |             'upload_date': None, | ||||||
|  |         }, | ||||||
|  |     }] | ||||||
|  | 
 | ||||||
|  |     def _real_extract(self, url): | ||||||
|  |         display_id = self._match_id(url) | ||||||
|  |         base_host = re.search(r'^(.+?://[^/]+)', url).group(1) | ||||||
|  |         if display_id == '': | ||||||
|  |             display_id = 'home' | ||||||
|  |         webpage = self._download_webpage(url, display_id) | ||||||
|  |         re_desc = re.search(r'<meta property=.og:description. content=(["\'])(.+?)\1', webpage, re.DOTALL) | ||||||
|  |         description = re_desc.group(2) if re_desc else '' | ||||||
|  | 
 | ||||||
|  |         jstr = self._search_regex(r'({.+?"related_video_xml".+?})', webpage, 'json', default=None) | ||||||
|  |         js = self._parse_json(jstr, display_id) | ||||||
|  |         video_id = None | ||||||
|  |         formats = [] | ||||||
|  |         subtitles = {} | ||||||
|  |         for key in ('caption_file', '.......'): | ||||||
|  |             # ....... = pending vtt support that doesn't clobber srt 'chapter_file': | ||||||
|  |             url = js.get(key, '') | ||||||
|  |             if url == '' or url is None: | ||||||
|  |                 continue | ||||||
|  |             if not re.match(r'^https?://', url): | ||||||
|  |                 url = base_host + url | ||||||
|  |             ext = re.search(r'\.([^\.]+)$', url).group(1) | ||||||
|  |             subtitles['eng'] = [{ | ||||||
|  |                 'ext': ext, | ||||||
|  |                 'url': url, | ||||||
|  |             }] | ||||||
|  |         for key in ('file', 'audio'): | ||||||
|  |             url = js.get(key, '') | ||||||
|  |             if url == '' or url is None: | ||||||
|  |                 continue | ||||||
|  |             if not re.match(r'^https?://', url): | ||||||
|  |                 url = base_host + url | ||||||
|  |             purl = re.search(r'/(?P<dir>[^/]+)/(?:dn)?(?P<fn>[^/]+?)\.(?P<ext>[^\.\?]+)(?P<hasparams>\?|$)', url) | ||||||
|  |             if video_id is None: | ||||||
|  |                 video_id = purl.group('fn') | ||||||
|  |             if js.get('start') is not None: | ||||||
|  |                 url += '&' if purl.group('hasparams') == '?' else '?' | ||||||
|  |                 url = url + 'start=' + str(js.get('start')) | ||||||
|  |             formats.append({ | ||||||
|  |                 'format_id': purl.group('dir'), | ||||||
|  |                 'ext': purl.group('ext'), | ||||||
|  |                 'url': url, | ||||||
|  |             }) | ||||||
|  |         self._sort_formats(formats) | ||||||
|  |         ret = { | ||||||
|  |             'id': video_id, | ||||||
|  |             'title': js.get('title'), | ||||||
|  |             'description': description, | ||||||
|  |             'uploader': 'Democracy Now', | ||||||
|  |             'subtitles': subtitles, | ||||||
|  |             'formats': formats, | ||||||
|  |         } | ||||||
|  |         return ret | ||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user