Merge branch 'master' into use-other-downloaders

2013-02-17 23:29:47 -03:00 · 2013-02-17 23:29:47 -03:00 · 3fee6f4265
commit 3fee6f4265
parent f5ce5321c4 906417c7c5
4 changed files with 82 additions and 11 deletions
--- a/test/tests.json
+++ b/test/tests.json
@ -276,5 +276,15 @@
        }
      }
    ]
+  },
+  {
+    "name": "Keek",
+    "url": "http://www.keek.com/ytdl/keeks/NODfbab",
+    "file": "NODfbab.mp4",
+    "md5": "9b0636f8c0f7614afa4ea5e4c6e57e83",
+    "info_dict": {
+      "title": "test chars: \"'/\\ä<>This is a test video for youtube-dl.For more information, contact phihag@phihag.de ."
+    }
+
  }
 ]
--- a/youtube_dl/FileDownloader.py
+++ b/youtube_dl/FileDownloader.py
@ -208,7 +208,7 @@ class FileDownloader(object):
            # already of type unicode()
            ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
        elif 'TERM' in os.environ:
-            sys.stderr.write('\033]0;%s\007' % message.encode(preferredencoding()))
+            self.to_screen('\033]0;%s\007' % message, skip_eol=True)

    def fixed_template(self):
        """Checks if the output template is fixed."""
--- a/youtube_dl/InfoExtractors.py
+++ b/youtube_dl/InfoExtractors.py
@ -151,7 +151,7 @@ class YoutubeIE(InfoExtractor):
                     (?(1).+)?                                                # if we found the ID, everything can follow
                     $"""
    _LANG_URL = r'http://www.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1'
-    _LOGIN_URL = 'https://www.youtube.com/signup?next=/&gl=US&hl=en'
+    _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
    _AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en'
    _NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
    _NETRC_MACHINE = 'youtube'
@ -320,19 +320,54 @@ class YoutubeIE(InfoExtractor):
        if username is None:
            return

+        request = compat_urllib_request.Request(self._LOGIN_URL)
+        try:
+            login_page = compat_urllib_request.urlopen(request).read().decode('utf-8')
+        except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
+            self._downloader.to_stderr(u'WARNING: unable to fetch login page: %s' % compat_str(err))
+            return
+
+        galx = None
+        dsh = None
+        match = re.search(re.compile(r'<input.+?name="GALX".+?value="(.+?)"', re.DOTALL), login_page)
+        if match:
+          galx = match.group(1)
+
+        match = re.search(re.compile(r'<input.+?name="dsh".+?value="(.+?)"', re.DOTALL), login_page)
+        if match:
+          dsh = match.group(1)
+
        # Log in
-        login_form = {
-                'current_form': 'loginForm',
-                'next':     '/',
-                'action_login': 'Log In',
-                'username': username,
-                'password': password,
-                }
-        request = compat_urllib_request.Request(self._LOGIN_URL, compat_urllib_parse.urlencode(login_form))
+        login_form_strs = {
+                u'continue': u'http://www.youtube.com/signin?action_handle_signin=true&feature=sign_in_button&hl=en_US&nomobiletemp=1',
+                u'Email': username,
+                u'GALX': galx,
+                u'Passwd': password,
+                u'PersistentCookie': u'yes',
+                u'_utf8': u'霱',
+                u'bgresponse': u'js_disabled',
+                u'checkConnection': u'',
+                u'checkedDomains': u'youtube',
+                u'dnConn': u'',
+                u'dsh': dsh,
+                u'pstMsg': u'0',
+                u'rmShown': u'1',
+                u'secTok': u'',
+                u'signIn': u'Sign in',
+                u'timeStmp': u'',
+                u'service': u'youtube',
+                u'uilel': u'3',
+                u'hl': u'en_US',
+        }
+        # Convert to UTF-8 *before* urlencode because Python 2.x's urlencode
+        # chokes on unicode
+        login_form = dict((k.encode('utf-8'), v.encode('utf-8')) for k,v in login_form_strs.items())
+        login_data = compat_urllib_parse.urlencode(login_form).encode('ascii')
+        request = compat_urllib_request.Request(self._LOGIN_URL, login_data)
        try:
            self.report_login()
            login_results = compat_urllib_request.urlopen(request).read().decode('utf-8')
-            if re.search(r'(?i)<form[^>]* name="loginForm"', login_results) is not None:
+            if re.search(r'(?i)<form[^>]* id="gaia_loginform"', login_results) is not None:
                self._downloader.to_stderr(u'WARNING: unable to log in: bad username or password')
                return
        except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
@ -3909,6 +3944,30 @@ class EightTracksIE(InfoExtractor):
            next_url = 'http://8tracks.com/sets/%s/next?player=sm&mix_id=%s&format=jsonh&track_id=%s' % (session, mix_id, track_data['id'])
        return res

+class KeekIE(InfoExtractor):
+    _VALID_URL = r'http://(?:www\.)?keek\.com/(?:!|\w+/keeks/)(?P<videoID>\w+)'
+    IE_NAME = u'keek'
+
+    def _real_extract(self, url):
+        m = re.match(self._VALID_URL, url)
+        video_id = m.group('videoID')
+        video_url = u'http://cdn.keek.com/keek/video/%s' % video_id
+        thumbnail = u'http://cdn.keek.com/keek/thumbnail/%s/w100/h75' % video_id
+        webpage = self._download_webpage(url, video_id)
+        m = re.search(r'<meta property="og:title" content="(?P<title>.+)"', webpage)
+        title = unescapeHTML(m.group('title'))
+        m = re.search(r'<div class="bio-names-and-report">[\s\n]+<h4>(?P<uploader>\w+)</h4>', webpage)
+        uploader = unescapeHTML(m.group('uploader'))
+        info = {
+                'id':video_id,
+                'url':video_url,
+                'ext': 'mp4',
+                'title': title,
+                'thumbnail': thumbnail,
+                'uploader': uploader
+        }
+        return [info]
+
 def gen_extractors():
    """ Return a list of an instance of every supported extractor.
    The order does matter; the first extractor matched is the one handling the URL.
@ -3955,6 +4014,7 @@ def gen_extractors():
        UstreamIE(),
        RBMARadioIE(),
        EightTracksIE(),
+        KeekIE(),
        GenericIE()
    ]

--- a/youtube_dl/init.py
+++ b/youtube_dl/init.py
@ -23,6 +23,7 @@ __authors__  = (
    'Dave Vasilevsky',
    'Jaime Marquínez Ferrándiz',
    'Jeff Crouse',
+    'Osama Khalid',
    )

 __license__ = 'Public Domain'