2013-10-28 01:50:17 -04:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								#!/usr/bin/env python
							 | 
						
					
						
							
								
									
										
										
										
											2014-11-26 20:01:20 +01:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								from __future__ import unicode_literals
							 | 
						
					
						
							
								
									
										
										
										
											2013-10-28 01:50:17 -04:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								"""
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								This script employs a VERY basic heuristic ('porn' in webpage.lower()) to check
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								if we are not 'age_limit' tagging some porn site
							 | 
						
					
						
							
								
									
										
										
										
											2014-01-14 16:01:00 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								A second approach implemented relies on a list of porn domains, to activate it
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								pass the list filename as the only argument
							 | 
						
					
						
							
								
									
										
										
										
											2013-10-28 01:50:17 -04:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								"""
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								# Allow direct execution
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								import os
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								import sys
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2017-09-19 08:51:20 -07:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								from test.helper import gettestcases
							 | 
						
					
						
							
								
									
										
										
										
											2014-01-14 16:01:00 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								from youtube_dl.utils import compat_urllib_parse_urlparse
							 | 
						
					
						
							
								
									
										
										
										
											2013-10-28 01:50:17 -04:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								from youtube_dl.utils import compat_urllib_request
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2014-01-14 16:01:00 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								if len(sys.argv) > 1:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    METHOD = 'LIST'
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    LIST = open(sys.argv[1]).read().decode('utf8').strip()
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								else:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    METHOD = 'EURISTIC'
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2017-09-19 08:51:20 -07:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								for test in gettestcases():
							 | 
						
					
						
							
								
									
										
										
										
											2014-01-14 16:01:00 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    if METHOD == 'EURISTIC':
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        try:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            webpage = compat_urllib_request.urlopen(test['url'], timeout=10).read()
							 | 
						
					
						
							
								
									
										
										
										
											2015-03-27 13:02:20 +01:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        except Exception:
							 | 
						
					
						
							
								
									
										
										
										
											2014-01-14 16:01:00 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								            print('\nFail: {0}'.format(test['name']))
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            continue
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        webpage = webpage.decode('utf8', 'replace')
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        RESULT = 'porn' in webpage.lower()
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    elif METHOD == 'LIST':
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        domain = compat_urllib_parse_urlparse(test['url']).netloc
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        if not domain:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            print('\nFail: {0}'.format(test['name']))
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            continue
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        domain = '.'.join(domain.split('.')[-2:])
							 | 
						
					
						
							
								
									
										
										
										
											2013-10-28 01:50:17 -04:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2014-01-14 16:01:00 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        RESULT = ('.' + domain + '\n' in LIST or '\n' + domain + '\n' in LIST)
							 | 
						
					
						
							
								
									
										
										
										
											2013-10-28 01:50:17 -04:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2015-02-21 14:55:13 +01:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    if RESULT and ('info_dict' not in test or 'age_limit' not in test['info_dict'] or
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                   test['info_dict']['age_limit'] != 18):
							 | 
						
					
						
							
								
									
										
										
										
											2013-10-28 01:50:17 -04:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        print('\nPotential missing age_limit check: {0}'.format(test['name']))
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2015-02-21 14:55:13 +01:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    elif not RESULT and ('info_dict' in test and 'age_limit' in test['info_dict'] and
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                         test['info_dict']['age_limit'] == 18):
							 | 
						
					
						
							
								
									
										
										
										
											2013-10-28 01:50:17 -04:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        print('\nPotential false negative: {0}'.format(test['name']))
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    else:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        sys.stdout.write('.')
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    sys.stdout.flush()
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								print()
							 |