#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Website
=======

This module implement classes representing (static) websites:
- HTMLWebFile
- CSSWebFile
- Website
"""
from collections import Counter
from pathlib import Path
from typing import (
    Tuple,
    List,
    Optional,
    Counter as CounterType,
    Iterator,
    Type,
    TypeVar,
    Iterable,
    Dict,
)

from bs4 import BeautifulSoup, Comment, Tag  # type: ignore

from websites_test_framework.css_parser import CssSimpleParser, Rule, CssError, looks_like_css_code
from websites_test_framework.custom_types import PathLike, HtmlValidationResult
from websites_test_framework.tools import (
    detect_encoding,
    validate_html,
    validate_css,
    cached,
    cached_property,
    is_valid_xml,
    as_path,
    is_relative_to,
    is_url_relative,
    warn,
    LastOrderedSet,
)


class WebFile:
    """Generic base class for classes HTMLFile and CSSFile.

    You should not use it directly."""

    # This attribute must be set when subclassing this class.
    valid_extensions: Tuple[str, ...]

    def __init__(self, path: Path, root: Optional[Path] = None):
        if (ext := path.suffix) not in (extensions := self.valid_extensions):
            raise RuntimeError(f"Invalid file extension: {ext!r} (expected: {extensions})")
        self.path = path
        if root is None:
            root = path
        self.root = root

    @property
    def structure(self):
        raise NotImplementedError

    @cached_property
    def encoding(self):
        return detect_encoding(str(self.path))

    @cached_property
    def is_utf8(self) -> bool:
        return self.encoding.lower().replace("-", "") == "utf8"

    @cached_property
    def text(self) -> str:
        if self.encoding == "binary":
            print(f"Can't decode file {self.path} (binary file !)")
            return "BINARY FILE !"
        return self.path.read_text(encoding=self.encoding)

    @cached_property
    def number_of_characters(self) -> int:
        # Note that `self.path.stat().st_size` would return the binary size,
        # not the number of characters !
        return len(self.text)

    @cached_property
    def number_of_lines(self) -> int:
        n = 0
        if self.encoding != "binary":
            with open(self.path, encoding=self.encoding) as file:
                for n, line in enumerate(file, start=1):
                    pass
        return n

    @cached_property
    def relative_path(self) -> Path:
        return self.path.relative_to(self.root)

    @cached_property
    def name(self):
        return self.path.name

    def __hash__(self):
        return hash((self.path, self.root))

    def __eq__(self, other):
        return isinstance(other, WebFile) and (self.path, self.root) == (other.path, other.root)

    def __str__(self):
        return f"{self.__class__.__name__}('{self.relative_path}')"


class HTMLWebFile(WebFile):
    """Class representing an HTML file."""

    valid_extensions = (".html", ".htm")

    @cached
    def get_w3c_validation(self) -> HtmlValidationResult:
        return validate_html(str(self.path))

    @cached_property
    def errors(self) -> list:
        return self.get_w3c_validation()["errors"]

    @cached_property
    def warnings(self) -> list:
        return self.get_w3c_validation()["warnings"]

    @property
    def is_valid_html5(self) -> bool:
        raise NotImplementedError

    @property
    def is_valid_xml(self) -> bool:
        return is_valid_xml(self.path)[0]

    @cached_property
    def tags(self) -> List[Tag]:
        tags = self.structure.find_all()
        assert all(isinstance(tag, Tag) for tag in tags)
        return tags

    @cached_property
    def tags_count(self) -> CounterType[str]:
        return Counter(tag.name for tag in self.tags)

    @cached_property
    def linked_css(self) -> List[str]:
        raise NotImplementedError

    @cached_property
    def comments(self):
        return self.structure.find_all(text=lambda s: isinstance(s, Comment))

    @cached_property
    def structure(self) -> BeautifulSoup:
        # Use in binary mode, and let BeautifulSoup detect encoding.
        with open(self.path, "rb") as f:
            html_code = f.read()
        # Parsing as HTML, since too many students would be eliminated
        # if we parsed it at XML !
        return BeautifulSoup(html_code, "lxml")

    @cached_property
    def directly_linked_local_stylesheets(self) -> List[Path]:
        paths = []
        for link in self.structure.find_all(name="link", rel="stylesheet"):
            url: str = link.attrs["href"].strip()
            if is_url_relative(url):
                paths.append((self.path.parent / url).resolve())
        return paths

    @cached_property
    def local_css_files(self) -> List["CSSWebFile"]:
        return _get_local_css_files([self], self.root)

    def get_css_property(self, selector: str) -> Dict[str, str]:
        properties: Dict[str, str] = {}
        for css_file in self.local_css_files:
            properties.update(css_file.structure.get_properties(selector))
        return properties


class CSSWebFile(WebFile):
    """Class representing a CSS file."""

    valid_extensions = (".css",)

    @cached
    def get_w3c_validation(self) -> int:
        return validate_css(str(self.path))

    @cached_property
    def errors(self) -> int:
        return self.get_w3c_validation()

    @cached_property
    def comments(self) -> List[str]:
        return [comment for comment in self.structure.comments if not looks_like_css_code(comment)]

    @cached_property
    def structure(self) -> CssSimpleParser:
        structure = CssSimpleParser()
        try:
            structure.parse(self.text)
        except CssError:
            if self.errors == 0:
                # TODO: fix following issue: files which are not detected as CSS
                #  are reported as error-free by W3C-Validator !
                # For example, a CSS file starting with "}" will be considered
                # an empty valid CSS file ! On the validator website, the mention
                # "Aucune feuille de style trouvée" appears, this may be useful
                # to detect bad CSS files when no error is reported.
                warn(
                    "CSS parsing error:",
                    "CSS can not be parsed but no error was reported"
                    f"by W3C Validator for {self.path!r}",
                )
        return structure

    @cached_property
    def rules(self) -> List[Rule]:
        """Find recursively all the css rules and return them as a list."""
        return self.structure.rules

    @cached_property
    def selectors(self) -> List[str]:
        """Find recursively all the css selectors and return them as a list."""
        return self.structure.selectors

    @cached_property
    def at_rules_names(self) -> List[str]:
        """Find recursively all the css @rules' names and return them as a list."""
        return self.structure.at_rules_names


T = TypeVar("T", bound=WebFile)


class Website:
    """Class representing a full website.

    Currently, provides simple access to HTML and CSS file,
    using its .html_files and .css_files attributes.
    """

    def __init__(self, root: PathLike, path_on_server: str = "/"):
        self.root = as_path(root)
        if self.root.is_file() and self.root.suffix == ".zip":
            self._extract_from_zip()
        if not self.root.is_dir():
            raise FileNotFoundError(f"No such directory: {root}")
        self.path_on_server = path_on_server

    def _files_of_given_type(self, class_: Type[T]) -> Tuple[T, ...]:
        return tuple(
            class_(path, self.root)
            for path in self.root.rglob("*")
            if path.is_file() and path.suffix in class_.valid_extensions
        )

    def rewrite_absolute_url(self, url: str) -> str:
        if url.startswith(self.path_on_server):
            url = url[len(self.path_on_server) :]
            if not url.startswith("/"):
                url = "/" + url
        return url

    @cached_property
    def html_files(self) -> Tuple[HTMLWebFile, ...]:
        return self._files_of_given_type(HTMLWebFile)

    @cached_property
    def css_files(self) -> Tuple[CSSWebFile, ...]:
        return self._files_of_given_type(CSSWebFile)

    @cached_property
    def used_css_files(self) -> List[CSSWebFile]:
        """Return only the CSS files which are effectively used by the website.

        A CSS file is considered used if it meets one of the following:
        - it is linked to an HTML file using a <link> tag
        - it is imported in a used CSS file using an @import rule.
        """
        return _get_local_css_files(self.html_files, self.root)

    @cached_property
    def web_files(self) -> Tuple[WebFile, ...]:
        """Return CSS and HTML files."""
        return self.html_files + self.css_files

    def iterate_over_files_paths(self, *extensions: str) -> Iterator[Path]:
        """Return path of all files"""
        if extensions:
            extensions = tuple(ext.lower() for ext in extensions)
            return (
                path
                for path in self.root.rglob("*")
                if path.is_file() and path.suffix.lower() in extensions
            )
        return (path for path in self.root.rglob("*") if path.is_file())

    def get_directory_html_files(self, relative_path: PathLike = "") -> List[HTMLWebFile]:
        directory = self.root / relative_path
        if not directory.is_dir():
            raise FileNotFoundError(f"{relative_path} is not a directory")
        return [file for file in self.html_files if is_relative_to(file.path, directory)]

    def get_directory_css_files(self, relative_path: PathLike = "") -> List[CSSWebFile]:
        directory = self.root / relative_path
        if not directory.is_dir():
            raise FileNotFoundError(f"{relative_path} is not a directory")
        return [file for file in self.css_files if is_relative_to(file.path, directory)]

    def __getitem__(self, relative_path: PathLike) -> WebFile:
        path = (self.root / Path(relative_path)).resolve()
        if path.suffix == ".html":
            files = self.html_files
        elif path.suffix == ".css":
            files = self.css_files
        else:
            raise NotImplementedError(f"HTML or CSS file expected (file: '{relative_path}').")
        for file in files:
            if file.path == path:
                return file
        raise FileNotFoundError(f"File '{path}' not found.")

    @cached_property
    def tags_count(self) -> CounterType[str]:
        """Return a dictionary {tag.name: number of occurrences}."""
        tags_count: CounterType[str] = Counter()
        for file in self.html_files:
            tags_count += file.tags_count
        return tags_count

    @cached_property
    def tags_global_number(self) -> int:
        """Return global tags' number."""
        return sum(self.tags_count.values())

    @cached_property
    def all_selectors(self) -> List[str]:
        return [selector for css_file in self.css_files for selector in css_file.selectors]

    @cached_property
    def css_rules_global_number(self) -> int:
        """Return global css rules' number."""
        return sum(len(css_file.rules) for css_file in self.css_files)

    @cached_property
    def all_files_paths(self):
        return tuple(pth for pth in self.root.glob("**/*") if pth.is_file())

    def _extract_from_zip(self):
        raise NotImplementedError


def _get_local_css_files(html_files: Iterable[HTMLWebFile], root: Path) -> List[CSSWebFile]:
    # Order of css imports matters, so it's important to use a special ordered set,
    # which when adding something to it, will always put it last.
    # (If already present, it will be moved last).
    css_files: LastOrderedSet = LastOrderedSet()
    for html_file in html_files:
        for path in html_file.directly_linked_local_stylesheets:
            if path.is_file() and is_relative_to(path, root):
                css_files.add(CSSWebFile(path, root))
    processed_css_files: LastOrderedSet = LastOrderedSet()
    while css_files:
        css_file = css_files.pop()
        processed_css_files.add(css_file)
        for url in css_file.structure.list_imports():
            path = (css_file.path.parent / url).resolve()
            if path.is_file() and is_relative_to(path, root):
                new_css_file = CSSWebFile(path, root)
                if new_css_file not in processed_css_files:
                    css_files.add(new_css_file)
    return list(processed_css_files)