# coding: utf-8
from __future__ import unicode_literals

import os.path
import re
from time import sleep

from .common import InfoExtractor
from ..utils import ExtractorError
from ..compat import (
    compat_html_parser,
    compat_urlparse
)

class MovieStormHTMLParser(compat_html_parser.HTMLParser):
    def __init__(self):
        self.found_button = False
        self.watch_urls = []
        self.direct_url = False
        self.series_home_page = False
        compat_html_parser.HTMLParser.__init__(self)

    def handle_starttag(self, tag, attrs):
        attrs = dict((k, v) for k, v in attrs)
        if tag == 'td' and attrs['class'] == 'link_td':
            self.found_button = True
        elif tag == 'a' and self.found_button:
            # Suppress ishare and other direct links, can't handle now
            if 'moviestorm' in attrs['href']:
                self.watch_urls.append(attrs['href'].strip())
        elif tag == 'a' and 'class' in attrs and attrs['class'] == 'real_link':
            self.direct_url = attrs['href'].strip()

    def handle_endtag(self, tag):
        if tag == 'td':
            self.found_button = False

    def handle_data(self, data):
        if data.strip() == 'SHOW EPISODES':
            self.series_home_page = True

    @classmethod
    def custom_parse(cls, html, return_variable):
        p = cls()
        p.feed(html)
        p.close()
        return getattr(p, return_variable)

class MovieStormIE(InfoExtractor):
    # HANDLER INFO:
    # There are no tests for this IE because the links on any given moviestorm
    # page can dynamically change, and because the actual download/extraction
    # is ultimately preformed by another IE. Example urls to
    # feed to this IE are:
    #
    #   EPISODE: http://moviestorm.eu/view/5821-watch-portlandia/season-1/episode-1
    #   MOVIE:   http://moviestorm.eu/view/5269-watch-taken-3-online.html
    #
    # If the user provides a series url, like the one below, this IE should detect
    # and raise an error:
    #
    #   SERIES:  http://moviestorm.eu/view/5821-watch-portlandia.html
    #
    # In other news, moviestorm's drupal db config is unstable at times retry up to 5
    # times before giving up, waiting 5 second delay between each retry.
    #
    # Also, this IE will catch all links with http://moviestorm.eu urls. If it's an
    # un-handleable url, an error will be thrown informing the user of appropriate
    # urls to provide. Not using a more complex regex is meant to prevent unacceptable
    # moviestorm urls from falling back into the generic IE, as that will always fail on
    # moviestorm links.

    IE_DESC = 'Movie Storm (link farm)'
    IE_NAME = 'MovieStorm'
    _VALID_URL = r'http://moviestorm\.eu'
    _LINK_FARM = True
    _TEST = False

    retry_count = 0
    max_retries = 5
    retry_wait = 5
    direct_urls = []

    def _parse_target(self, target):
        uri = compat_urlparse.urlparse(target)
        hash = uri.fragment[1:].split('?')[0]
        token = os.path.basename(hash.rstrip('/'))
        return (uri, hash, token)

    def _real_extract(self, url):
        # Inform user to provide proper moviestorm link
        if 'watch' not in url:
            msg = ('The moviestorm handler requires either a movie page link or '
                'a series episode page link.  Please try again with one of those.')
            raise ExtractorError(msg, expected=True)

        while True:
            if self.retry_count == 0:
                note = 'Downloading link farm page'
            else:
                note = ('Unstable db connection, retying again in %s seconds '
                    '[%s/%s]' % (self.retry_wait, self.retry_count,
                    self.max_retries))

            (_, _, token) = self._parse_target(url)
            farmpage = self._download_webpage(
                url, token,
                note=note,
                errnote='Unable to download link farm page',
                fatal=False
            )

            if farmpage.strip() != 'MySQL server has gone away':
                series_home_page = MovieStormHTMLParser.custom_parse(
                    farmpage,
                    'series_home_page'
                )

                # Fail if provided series home page
                if series_home_page:
                    msg = ('It looks like you provided an show page url.  You must provide '
                        'an episode page url or movie page url')
                    raise ExtractorError(msg, expected=True)

                # Success
                break

            # Continue retrying if moviestorm database is currently unstable
            if self.retry_count < self.max_retries:
                self.retry_count += 1
                sleep(self.retry_wait)
            else:
                msg = 'The moviestorm database is currently unstable.  Please try again later.'
                raise ExtractorError(msg, expected=True)

        # Scrape WATCH button links from moviestorm page
        self.to_screen(': Extracting watch page urls')
        watch_urls = MovieStormHTMLParser.custom_parse(
            farmpage,
            'watch_urls'
        )

        # Get direct urls from scraped watch pages
        self.to_screen(': Extracting direct links from watch pages')
        direct_url_count = 1

        for watch_url in watch_urls:
            # Stop after gathering 50 urls, moviestorm sends 503 if
            # request too many in rapid succession
            if direct_url_count < 50:
                (_, _, token) = self._parse_target(watch_url)
                watchpage = self._download_webpage(
                    watch_url, token,
                    note=False,
                    errnote='Unable to download link farm watch page',
                    fatal=False
                )

                if watchpage is not None:
                    direct_url = MovieStormHTMLParser.custom_parse(
                        watchpage,
                        'direct_url'
                    )

                    if direct_url:
                        self.direct_urls.append(direct_url)

            direct_url_count += 1

        self.to_screen(': Passing off farmed links to InfoExtractors')
        return list(set(self.direct_urls))