| 
									
										
										
										
											2013-09-04 22:06:50 +02:00
										 |  |  | # encoding: utf-8 | 
					
						
							| 
									
										
										
										
											2014-03-21 14:03:18 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  | from __future__ import unicode_literals | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2013-09-04 22:06:50 +02:00
										 |  |  | import re | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | from .common import InfoExtractor | 
					
						
							| 
									
										
										
										
											2014-12-13 12:24:42 +01:00
										 |  |  | from ..compat import ( | 
					
						
							| 
									
										
										
										
											2013-09-04 22:06:50 +02:00
										 |  |  |     compat_urllib_parse, | 
					
						
							|  |  |  | ) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | class DaumIE(InfoExtractor): | 
					
						
							| 
									
										
										
										
											2014-09-15 15:25:35 +02:00
										 |  |  |     _VALID_URL = r'https?://(?:m\.)?tvpot\.daum\.net/(?:v/|.*?clipid=)(?P<id>[^?#&]+)' | 
					
						
							| 
									
										
										
										
											2014-03-21 14:03:18 +01:00
										 |  |  |     IE_NAME = 'daum.net' | 
					
						
							| 
									
										
										
										
											2013-09-04 22:06:50 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-09-15 15:25:35 +02:00
										 |  |  |     _TESTS = [{ | 
					
						
							| 
									
										
										
										
											2014-03-21 14:03:18 +01:00
										 |  |  |         'url': 'http://tvpot.daum.net/clip/ClipView.do?clipid=52554690', | 
					
						
							|  |  |  |         'info_dict': { | 
					
						
							|  |  |  |             'id': '52554690', | 
					
						
							|  |  |  |             'ext': 'mp4', | 
					
						
							|  |  |  |             'title': 'DOTA 2GETHER 시즌2 6회 - 2부', | 
					
						
							|  |  |  |             'description': 'DOTA 2GETHER 시즌2 6회 - 2부', | 
					
						
							|  |  |  |             'upload_date': '20130831', | 
					
						
							|  |  |  |             'duration': 3868, | 
					
						
							| 
									
										
										
										
											2013-09-04 22:06:50 +02:00
										 |  |  |         }, | 
					
						
							| 
									
										
										
										
											2014-09-15 15:25:35 +02:00
										 |  |  |     }, { | 
					
						
							|  |  |  |         'url': 'http://tvpot.daum.net/v/vab4dyeDBysyBssyukBUjBz', | 
					
						
							|  |  |  |         'only_matching': True, | 
					
						
							|  |  |  |     }, { | 
					
						
							|  |  |  |         'url': 'http://tvpot.daum.net/v/07dXWRka62Y%24', | 
					
						
							|  |  |  |         'only_matching': True, | 
					
						
							|  |  |  |     }] | 
					
						
							| 
									
										
										
										
											2013-09-04 22:06:50 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  |     def _real_extract(self, url): | 
					
						
							|  |  |  |         mobj = re.match(self._VALID_URL, url) | 
					
						
							| 
									
										
										
										
											2014-09-15 15:25:35 +02:00
										 |  |  |         video_id = mobj.group('id') | 
					
						
							| 
									
										
										
										
											2013-09-05 10:08:17 +02:00
										 |  |  |         canonical_url = 'http://tvpot.daum.net/v/%s' % video_id | 
					
						
							|  |  |  |         webpage = self._download_webpage(canonical_url, video_id) | 
					
						
							| 
									
										
										
										
											2013-12-03 14:16:58 +01:00
										 |  |  |         full_id = self._search_regex( | 
					
						
							| 
									
										
										
										
											2014-12-30 16:55:53 +06:00
										 |  |  |             r'src=["\']http://videofarm\.daum\.net/controller/video/viewer/Video\.html\?.*?vid=(.+?)[&"\']', | 
					
						
							| 
									
										
										
										
											2014-03-21 14:03:18 +01:00
										 |  |  |             webpage, 'full id') | 
					
						
							| 
									
										
										
										
											2013-09-04 22:06:50 +02:00
										 |  |  |         query = compat_urllib_parse.urlencode({'vid': full_id}) | 
					
						
							| 
									
										
										
										
											2013-11-26 18:48:52 +01:00
										 |  |  |         info = self._download_xml( | 
					
						
							| 
									
										
										
										
											2013-09-04 22:06:50 +02:00
										 |  |  |             'http://tvpot.daum.net/clip/ClipInfoXml.do?' + query, video_id, | 
					
						
							| 
									
										
										
										
											2014-03-21 14:03:18 +01:00
										 |  |  |             'Downloading video info') | 
					
						
							| 
									
										
										
										
											2013-11-26 18:48:52 +01:00
										 |  |  |         urls = self._download_xml( | 
					
						
							| 
									
										
										
										
											2013-09-04 22:06:50 +02:00
										 |  |  |             'http://videofarm.daum.net/controller/api/open/v1_2/MovieData.apixml?' + query, | 
					
						
							| 
									
										
										
										
											2014-03-21 14:03:18 +01:00
										 |  |  |             video_id, 'Downloading video formats info') | 
					
						
							| 
									
										
										
										
											2013-09-04 22:06:50 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  |         formats = [] | 
					
						
							|  |  |  |         for format_el in urls.findall('result/output_list/output_list'): | 
					
						
							|  |  |  |             profile = format_el.attrib['profile'] | 
					
						
							|  |  |  |             format_query = compat_urllib_parse.urlencode({ | 
					
						
							|  |  |  |                 'vid': full_id, | 
					
						
							|  |  |  |                 'profile': profile, | 
					
						
							|  |  |  |             }) | 
					
						
							| 
									
										
										
										
											2013-11-26 18:48:52 +01:00
										 |  |  |             url_doc = self._download_xml( | 
					
						
							| 
									
										
										
										
											2013-09-04 22:06:50 +02:00
										 |  |  |                 'http://videofarm.daum.net/controller/api/open/v1_2/MovieLocation.apixml?' + format_query, | 
					
						
							| 
									
										
										
										
											2014-09-15 15:25:35 +02:00
										 |  |  |                 video_id, note='Downloading video data for %s format' % profile) | 
					
						
							| 
									
										
										
										
											2013-09-04 22:06:50 +02:00
										 |  |  |             format_url = url_doc.find('result/url').text | 
					
						
							|  |  |  |             formats.append({ | 
					
						
							|  |  |  |                 'url': format_url, | 
					
						
							|  |  |  |                 'format_id': profile, | 
					
						
							|  |  |  |             }) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2013-12-03 14:21:06 +01:00
										 |  |  |         return { | 
					
						
							| 
									
										
										
										
											2013-09-04 22:06:50 +02:00
										 |  |  |             'id': video_id, | 
					
						
							|  |  |  |             'title': info.find('TITLE').text, | 
					
						
							|  |  |  |             'formats': formats, | 
					
						
							|  |  |  |             'thumbnail': self._og_search_thumbnail(webpage), | 
					
						
							| 
									
										
										
										
											2013-09-05 10:08:17 +02:00
										 |  |  |             'description': info.find('CONTENTS').text, | 
					
						
							| 
									
										
										
										
											2013-09-04 22:06:50 +02:00
										 |  |  |             'duration': int(info.find('DURATION').text), | 
					
						
							|  |  |  |             'upload_date': info.find('REGDTTM').text[:8], | 
					
						
							|  |  |  |         } |