From 7d871e66fe24c4275ec158142bf70780ba032128 Mon Sep 17 00:00:00 2001 From: "M.Yasoob Khalid" Date: Wed, 5 Jun 2013 19:16:53 +0500 Subject: [PATCH 01/14] added Hypem IE but it did not work when i tested it --- youtube_dl/InfoExtractors.py | 62 ++++++++++++++++++++++++++++++++++++ 1 file changed, 62 insertions(+) diff --git a/youtube_dl/InfoExtractors.py b/youtube_dl/InfoExtractors.py index 5811ef0da..e816fc459 100755 --- a/youtube_dl/InfoExtractors.py +++ b/youtube_dl/InfoExtractors.py @@ -4483,6 +4483,67 @@ class XHamsterIE(InfoExtractor): 'thumbnail': video_thumbnail }] +class HypemIE(InfoExtractor): + """Information Extractor for hypem""" + _VALID_URL = r'(?:http://)?(?:www\.)?hypem\.com/track/([^/]+)/([^/]+)' + + def removeDisallowedFilenameChars(filename): + validFilenameChars = "-_.() %s%s" % (string.ascii_letters, string.digits) + cleanedFilename = unicodedata.normalize('NFKD', filename).encode('ASCII', 'ignore') + return ''.join(c for c in cleanedFilename if c in validFilenameChars) + + def _real_extract(self,url): + mobj = re.match(self._VALID_URL, url) + if mobj is None: + raise ExtractorError(u'Invalid URL: %s' % url) + data = {'ax':1 , + 'ts': time() + } + data_encoded = urllib.urlencode(data) + complete_url = url + "?"+data_encoded + request = urllib2.Request(complete_url) + response = urllib2.urlopen(request) + #save our cookie + cookie = response.headers.get('Set-Cookie') + #grab the HTML + html = response.read() + response.close() + track_list = [] + list_data = re.search(r'',html) + html_tracks = list_data.group(1) + if html_tracks is None: + tracks = track_list + try: + track_list = json.loads(html_tracks) + tracks = track_list[u'tracks'] + except ValueError: + print "Hypemachine contained invalid JSON." + tracks = track_list + + for track in tracks: + key = track[u"key"] + id = track[u"id"] + artist = removeDisallowedFilenameChars(track[u"artist"]) + title = removeDisallowedFilenameChars(track[u"song"]) + type = track[u"type"] + if type is False: + continue + serve_url = "http://hypem.com/serve/source/{}/{}".format(id, key) + request = urllib2.Request(serve_url, "" , {'Content-Type': 'application/json'}) + request.add_header('cookie', cookie) + response = urllib2.urlopen(request) + song_data_json = response.read() + response.close() + song_data = json.loads(song_data_json) + final_url = song_data[u"url"] + return [{ + 'id': id, + 'url': final_url, + 'ext': "mp3", + 'title': title, + 'artist': artist, + }] + def gen_extractors(): """ Return a list of an instance of every supported extractor. The order does matter; the first extractor matched is the one handling the URL. @@ -4490,6 +4551,7 @@ def gen_extractors(): return [ YoutubePlaylistIE(), YoutubeChannelIE(), + HypemIE(), YoutubeUserIE(), YoutubeSearchIE(), YoutubeIE(), From 40e4fcf910ed283332d9c0c5e3e521e5f4f80fe8 Mon Sep 17 00:00:00 2001 From: "M.Yasoob Khalid" Date: Wed, 5 Jun 2013 19:40:29 +0500 Subject: [PATCH 02/14] moved HypemIE() to the end of the list --- youtube_dl/InfoExtractors.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/InfoExtractors.py b/youtube_dl/InfoExtractors.py index e816fc459..33315e0d6 100755 --- a/youtube_dl/InfoExtractors.py +++ b/youtube_dl/InfoExtractors.py @@ -4551,7 +4551,6 @@ def gen_extractors(): return [ YoutubePlaylistIE(), YoutubeChannelIE(), - HypemIE(), YoutubeUserIE(), YoutubeSearchIE(), YoutubeIE(), @@ -4607,6 +4606,7 @@ def gen_extractors(): FlickrIE(), TeamcocoIE(), XHamsterIE(), + HypemIE(), GenericIE() ] From 90e48f573d5c6707b7bd5b5b762234da0c8f67bf Mon Sep 17 00:00:00 2001 From: "M.Yasoob Khalid" Date: Wed, 5 Jun 2013 22:06:25 +0500 Subject: [PATCH 03/14] corrected HypemIE and added tests for hypem.com --- test/tests.json | 9 +++++++++ youtube_dl/InfoExtractors.py | 28 +++++++++++++--------------- 2 files changed, 22 insertions(+), 15 deletions(-) diff --git a/test/tests.json b/test/tests.json index dc2671daa..c39d1d9c1 100644 --- a/test/tests.json +++ b/test/tests.json @@ -491,5 +491,14 @@ "info_dict":{ "title":"FemaleAgent Shy beauty takes the bait" } + }, + { + "name": "Hypem", + "url": "http://hypem.com/track/1v6ga/BODYWORK+-+TAME", + "file": "1v6ga.mp3", + "md5": "b9cc91b5af8995e9f0c1cee04c575828", + "info_dict":{ + "title":"TAME" + } } ] diff --git a/youtube_dl/InfoExtractors.py b/youtube_dl/InfoExtractors.py index 33315e0d6..dfd3b0705 100755 --- a/youtube_dl/InfoExtractors.py +++ b/youtube_dl/InfoExtractors.py @@ -19,6 +19,7 @@ import operator import hashlib import binascii import urllib +import urllib2 from .utils import * @@ -4487,22 +4488,17 @@ class HypemIE(InfoExtractor): """Information Extractor for hypem""" _VALID_URL = r'(?:http://)?(?:www\.)?hypem\.com/track/([^/]+)/([^/]+)' - def removeDisallowedFilenameChars(filename): - validFilenameChars = "-_.() %s%s" % (string.ascii_letters, string.digits) - cleanedFilename = unicodedata.normalize('NFKD', filename).encode('ASCII', 'ignore') - return ''.join(c for c in cleanedFilename if c in validFilenameChars) - def _real_extract(self,url): mobj = re.match(self._VALID_URL, url) if mobj is None: raise ExtractorError(u'Invalid URL: %s' % url) data = {'ax':1 , - 'ts': time() + 'ts': time.time() } data_encoded = urllib.urlencode(data) complete_url = url + "?"+data_encoded - request = urllib2.Request(complete_url) - response = urllib2.urlopen(request) + request = compat_urllib_request.Request(complete_url) + response = compat_urllib_request.urlopen(request) #save our cookie cookie = response.headers.get('Set-Cookie') #grab the HTML @@ -4523,15 +4519,16 @@ class HypemIE(InfoExtractor): for track in tracks: key = track[u"key"] id = track[u"id"] - artist = removeDisallowedFilenameChars(track[u"artist"]) - title = removeDisallowedFilenameChars(track[u"song"]) + artist = track[u"artist"] + title = track[u"song"] type = track[u"type"] - if type is False: - continue - serve_url = "http://hypem.com/serve/source/{}/{}".format(id, key) - request = urllib2.Request(serve_url, "" , {'Content-Type': 'application/json'}) + if type is False: + continue + serve_url = "http://hypem.com/serve/source/%s/%s"%(str(id), str(key)) + self.report_extraction(id) + request = compat_urllib_request.Request(serve_url, "" , {'Content-Type': 'application/json'}) request.add_header('cookie', cookie) - response = urllib2.urlopen(request) + response = compat_urllib_request.urlopen(request) song_data_json = response.read() response.close() song_data = json.loads(song_data_json) @@ -4544,6 +4541,7 @@ class HypemIE(InfoExtractor): 'artist': artist, }] + def gen_extractors(): """ Return a list of an instance of every supported extractor. The order does matter; the first extractor matched is the one handling the URL. From 6479bb1f42c5b6af379111cee3cade46d039e788 Mon Sep 17 00:00:00 2001 From: "M.Yasoob Khalid" Date: Wed, 5 Jun 2013 23:40:13 +0500 Subject: [PATCH 04/14] removed urllib2 --- youtube_dl/InfoExtractors.py | 1 - 1 file changed, 1 deletion(-) diff --git a/youtube_dl/InfoExtractors.py b/youtube_dl/InfoExtractors.py index dfd3b0705..859dcc3d8 100755 --- a/youtube_dl/InfoExtractors.py +++ b/youtube_dl/InfoExtractors.py @@ -19,7 +19,6 @@ import operator import hashlib import binascii import urllib -import urllib2 from .utils import * From 72ec1d3c2cd90b62eaaed10304e2e1a68e234421 Mon Sep 17 00:00:00 2001 From: "M.Yasoob Khalid" Date: Wed, 5 Jun 2013 23:52:37 +0500 Subject: [PATCH 05/14] changed it a little bit. Removed small useless pieces of code. --- youtube_dl/InfoExtractors.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/youtube_dl/InfoExtractors.py b/youtube_dl/InfoExtractors.py index 859dcc3d8..80a752a39 100755 --- a/youtube_dl/InfoExtractors.py +++ b/youtube_dl/InfoExtractors.py @@ -4512,7 +4512,7 @@ class HypemIE(InfoExtractor): track_list = json.loads(html_tracks) tracks = track_list[u'tracks'] except ValueError: - print "Hypemachine contained invalid JSON." + self.to_screen("Hypemachine contained invalid JSON.") tracks = track_list for track in tracks: @@ -4520,9 +4520,6 @@ class HypemIE(InfoExtractor): id = track[u"id"] artist = track[u"artist"] title = track[u"song"] - type = track[u"type"] - if type is False: - continue serve_url = "http://hypem.com/serve/source/%s/%s"%(str(id), str(key)) self.report_extraction(id) request = compat_urllib_request.Request(serve_url, "" , {'Content-Type': 'application/json'}) From dad6154b20a14991e2393adb102e6dfe562f98ef Mon Sep 17 00:00:00 2001 From: "M.Yasoob Khalid" Date: Thu, 6 Jun 2013 00:18:17 +0500 Subject: [PATCH 06/14] changed urllib.urlencode(data) to compat_urllib_parse(data) --- youtube_dl/InfoExtractors.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/InfoExtractors.py b/youtube_dl/InfoExtractors.py index 80a752a39..25f7725b0 100755 --- a/youtube_dl/InfoExtractors.py +++ b/youtube_dl/InfoExtractors.py @@ -4494,7 +4494,7 @@ class HypemIE(InfoExtractor): data = {'ax':1 , 'ts': time.time() } - data_encoded = urllib.urlencode(data) + data_encoded = compat_urllib_parse(data) complete_url = url + "?"+data_encoded request = compat_urllib_request.Request(complete_url) response = compat_urllib_request.urlopen(request) From 6e354901f12eda3d70d4537f07d8cbd9a4e6a55d Mon Sep 17 00:00:00 2001 From: "M.Yasoob Khalid" Date: Thu, 6 Jun 2013 00:24:54 +0500 Subject: [PATCH 07/14] changed compat_urllib_parse(data) to compat_urllib_parse.urlencode(data) --- youtube_dl/InfoExtractors.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/InfoExtractors.py b/youtube_dl/InfoExtractors.py index 25f7725b0..dc7c341f7 100755 --- a/youtube_dl/InfoExtractors.py +++ b/youtube_dl/InfoExtractors.py @@ -4494,7 +4494,7 @@ class HypemIE(InfoExtractor): data = {'ax':1 , 'ts': time.time() } - data_encoded = compat_urllib_parse(data) + data_encoded = compat_urllib_parse.urlencode(data) complete_url = url + "?"+data_encoded request = compat_urllib_request.Request(complete_url) response = compat_urllib_request.urlopen(request) From a89c45ef53a42ab09ecbe8679934922ecf4e9113 Mon Sep 17 00:00:00 2001 From: "M.Yasoob Khalid" Date: Thu, 6 Jun 2013 00:35:41 +0500 Subject: [PATCH 08/14] changed my regex a bit --- youtube_dl/InfoExtractors.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/InfoExtractors.py b/youtube_dl/InfoExtractors.py index dc7c341f7..2e8d0df9f 100755 --- a/youtube_dl/InfoExtractors.py +++ b/youtube_dl/InfoExtractors.py @@ -4504,7 +4504,7 @@ class HypemIE(InfoExtractor): html = response.read() response.close() track_list = [] - list_data = re.search(r'',html) + list_data = re.search(b'',html) html_tracks = list_data.group(1) if html_tracks is None: tracks = track_list From b8e189f1decc620695bb8831ccc79382d8154c0a Mon Sep 17 00:00:00 2001 From: "M.Yasoob Khalid" Date: Thu, 6 Jun 2013 00:51:27 +0500 Subject: [PATCH 09/14] added the decoding of the response (for python 3) --- youtube_dl/InfoExtractors.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/youtube_dl/InfoExtractors.py b/youtube_dl/InfoExtractors.py index 2e8d0df9f..b6711a6e0 100755 --- a/youtube_dl/InfoExtractors.py +++ b/youtube_dl/InfoExtractors.py @@ -4500,11 +4500,13 @@ class HypemIE(InfoExtractor): response = compat_urllib_request.urlopen(request) #save our cookie cookie = response.headers.get('Set-Cookie') + encoding = response.headers.get('Content-Type') + encoding = (encoding.split(';')[1]).split('=')[1] #grab the HTML - html = response.read() + html = response.read().decode(encoding) response.close() track_list = [] - list_data = re.search(b'',html) + list_data = re.search(r'',html) html_tracks = list_data.group(1) if html_tracks is None: tracks = track_list From fd133bfffa28785db9f721a95bdf879de6e70082 Mon Sep 17 00:00:00 2001 From: "M.Yasoob Khalid" Date: Thu, 6 Jun 2013 01:00:24 +0500 Subject: [PATCH 10/14] used the fix for python 3 provided by @jaimeMF --- youtube_dl/InfoExtractors.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/youtube_dl/InfoExtractors.py b/youtube_dl/InfoExtractors.py index b6711a6e0..211ef0ba9 100755 --- a/youtube_dl/InfoExtractors.py +++ b/youtube_dl/InfoExtractors.py @@ -4509,13 +4509,13 @@ class HypemIE(InfoExtractor): list_data = re.search(r'',html) html_tracks = list_data.group(1) if html_tracks is None: - tracks = track_list + tracks = track_list try: - track_list = json.loads(html_tracks) - tracks = track_list[u'tracks'] + track_list = json.loads(html_tracks) + tracks = track_list[u'tracks'] except ValueError: - self.to_screen("Hypemachine contained invalid JSON.") - tracks = track_list + self.to_screen("Hypemachine contained invalid JSON.") + tracks = track_list for track in tracks: key = track[u"key"] @@ -4529,6 +4529,7 @@ class HypemIE(InfoExtractor): response = compat_urllib_request.urlopen(request) song_data_json = response.read() response.close() + (song_data_json, response) = self._download_webpage_handle(request, id, u'Downloading webpage with the url') song_data = json.loads(song_data_json) final_url = song_data[u"url"] return [{ From c56102dd16899f4a169e2ab47c896aa22438bcc0 Mon Sep 17 00:00:00 2001 From: "M.Yasoob Khalid" Date: Thu, 6 Jun 2013 12:28:18 +0500 Subject: [PATCH 11/14] rephrased my code a little. --- youtube_dl/InfoExtractors.py | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/youtube_dl/InfoExtractors.py b/youtube_dl/InfoExtractors.py index 211ef0ba9..1936ee241 100755 --- a/youtube_dl/InfoExtractors.py +++ b/youtube_dl/InfoExtractors.py @@ -4494,19 +4494,14 @@ class HypemIE(InfoExtractor): data = {'ax':1 , 'ts': time.time() } + id = mobj.group(1) data_encoded = compat_urllib_parse.urlencode(data) complete_url = url + "?"+data_encoded request = compat_urllib_request.Request(complete_url) - response = compat_urllib_request.urlopen(request) - #save our cookie - cookie = response.headers.get('Set-Cookie') - encoding = response.headers.get('Content-Type') - encoding = (encoding.split(';')[1]).split('=')[1] - #grab the HTML - html = response.read().decode(encoding) - response.close() + response,urlh = self._download_webpage_handle(request, id, u'Downloading webpage with the url') + cookie = urlh.headers.get('Set-Cookie', '') track_list = [] - list_data = re.search(r'',html) + list_data = re.search(r'',response) html_tracks = list_data.group(1) if html_tracks is None: tracks = track_list From fa71aa397aad28609adefdd61e2764f7c5537e1f Mon Sep 17 00:00:00 2001 From: "M.Yasoob Khalid" Date: Thu, 6 Jun 2013 12:34:50 +0500 Subject: [PATCH 12/14] Conflicts: youtube_dl/InfoExtractors.py --- youtube_dl/InfoExtractors.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/youtube_dl/InfoExtractors.py b/youtube_dl/InfoExtractors.py index 1936ee241..698cc9650 100755 --- a/youtube_dl/InfoExtractors.py +++ b/youtube_dl/InfoExtractors.py @@ -4494,12 +4494,26 @@ class HypemIE(InfoExtractor): data = {'ax':1 , 'ts': time.time() } +<<<<<<< HEAD id = mobj.group(1) data_encoded = compat_urllib_parse.urlencode(data) complete_url = url + "?"+data_encoded request = compat_urllib_request.Request(complete_url) response,urlh = self._download_webpage_handle(request, id, u'Downloading webpage with the url') cookie = urlh.headers.get('Set-Cookie', '') +======= + data_encoded = compat_urllib_parse.urlencode(data) + complete_url = url + "?"+data_encoded + request = compat_urllib_request.Request(complete_url) + response = compat_urllib_request.urlopen(request) + #save our cookie + cookie = response.headers.get('Set-Cookie') + encoding = response.headers.get('Content-Type') + encoding = (encoding.split(';')[1]).split('=')[1] + #grab the HTML + html = response.read().decode(encoding) + response.close() +>>>>>>> remotes/origin/HEAD track_list = [] list_data = re.search(r'',response) html_tracks = list_data.group(1) From e896e41f9c97e0ac589dd6fcd131595c59365ebd Mon Sep 17 00:00:00 2001 From: "M.Yasoob Khalid" Date: Thu, 6 Jun 2013 12:37:57 +0500 Subject: [PATCH 13/14] trying to rebase mt code --- youtube_dl/InfoExtractors.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/youtube_dl/InfoExtractors.py b/youtube_dl/InfoExtractors.py index 698cc9650..873f80fe0 100755 --- a/youtube_dl/InfoExtractors.py +++ b/youtube_dl/InfoExtractors.py @@ -4494,14 +4494,12 @@ class HypemIE(InfoExtractor): data = {'ax':1 , 'ts': time.time() } -<<<<<<< HEAD id = mobj.group(1) data_encoded = compat_urllib_parse.urlencode(data) complete_url = url + "?"+data_encoded request = compat_urllib_request.Request(complete_url) response,urlh = self._download_webpage_handle(request, id, u'Downloading webpage with the url') cookie = urlh.headers.get('Set-Cookie', '') -======= data_encoded = compat_urllib_parse.urlencode(data) complete_url = url + "?"+data_encoded request = compat_urllib_request.Request(complete_url) @@ -4513,7 +4511,6 @@ class HypemIE(InfoExtractor): #grab the HTML html = response.read().decode(encoding) response.close() ->>>>>>> remotes/origin/HEAD track_list = [] list_data = re.search(r'',response) html_tracks = list_data.group(1) From a9f9845b326955883b3f6bd6ca0b853979fe4db5 Mon Sep 17 00:00:00 2001 From: "M.Yasoob Khalid" Date: Thu, 6 Jun 2013 12:54:11 +0500 Subject: [PATCH 14/14] Again changed a little bit --- youtube_dl/InfoExtractors.py | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/youtube_dl/InfoExtractors.py b/youtube_dl/InfoExtractors.py index 873f80fe0..1936ee241 100755 --- a/youtube_dl/InfoExtractors.py +++ b/youtube_dl/InfoExtractors.py @@ -4500,17 +4500,6 @@ class HypemIE(InfoExtractor): request = compat_urllib_request.Request(complete_url) response,urlh = self._download_webpage_handle(request, id, u'Downloading webpage with the url') cookie = urlh.headers.get('Set-Cookie', '') - data_encoded = compat_urllib_parse.urlencode(data) - complete_url = url + "?"+data_encoded - request = compat_urllib_request.Request(complete_url) - response = compat_urllib_request.urlopen(request) - #save our cookie - cookie = response.headers.get('Set-Cookie') - encoding = response.headers.get('Content-Type') - encoding = (encoding.split(';')[1]).split('=')[1] - #grab the HTML - html = response.read().decode(encoding) - response.close() track_list = [] list_data = re.search(r'',response) html_tracks = list_data.group(1)