#!/usr/bin/env python
# -*- coding: utf-8 -*-

import glob
import zipfile
import re
from typing import List, Iterable, Set


class Plugin(object):
    name: str
    files: List[str]
    ignore_list_regex = [re.compile(r'^.*/$'), re.compile(r'data/.*'),
                         re.compile(r"META-INF/LICENSE.*"),
                         re.compile(r"META-INF/.*module-info.class"),
                         re.compile(r"LICENSE.*"), re.compile(r"images/.*")]
    ignore_list = ["README", "GPL-v2.0.txt", "GPL-v3.0.txt",
                   "GPL-3.0.txt", "META-INF/MANIFEST.MF",
                   "META-INF/INDEX.LIST"]

    @staticmethod
    def _not_ignored(file: str) -> bool:
        for ignore in Plugin.ignore_list_regex:
            if ignore.match(file):
                return False
        return file not in Plugin.ignore_list

    @staticmethod
    def _filter_files(files: List[str]) -> List[str]:
        return [f for f in files if Plugin._not_ignored(f)]

    def __init__(self, name: str, files: List[str]):
        self.name = name
        self.files = Plugin._filter_files(files)

    def __repr__(self) -> str:
        return self.name

    def __hash__(self):
        return self.name.__hash__()

    def __eq__(self, other):
        if isinstance(other, Plugin):
            return self.name == other.name
        return False


class PluginDuplicates(object):
    plugin1: str
    plugin2: str
    duplicate_files: Set[str]

    def __init__(self, plugin1: str, plugin2: str, duplicate_files: Set[str]):
        if plugin1 < plugin2:
            self.plugin1 = plugin1
            self.plugin2 = plugin2
        else:
            self.plugin1 = plugin2
            self.plugin2 = plugin1
        self.duplicate_files = duplicate_files

    def __repr__(self) -> str:
        return self.plugin1 + ', ' + self.plugin2

    def __hash__(self):
        return self.plugin1.__hash__() + 31 * self.plugin2.__hash__()

    def __eq__(self, other):
        if isinstance(other, PluginDuplicates):
            return other.plugin1 == self.plugin1 and self.plugin2 == other.plugin2


def read_plugins() -> Iterable[Plugin]:
    for jar in glob.glob("*.jar"):
        if jar.endswith("-sources.jar") or jar.endswith("-javadoc.jar"):
            continue
        zfile = zipfile.ZipFile(jar)
        yield Plugin(jar, zfile.namelist())


def get_common_files(file_list_1: Iterable[str], file_list_2: Iterable[str]) -> \
        Set[str]:
    fl1 = set(file_list_1)
    fl2 = set(file_list_2)
    return fl1 & fl2


def compare_plugins(plugins: Iterable[Plugin]) -> Iterable[PluginDuplicates]:
    plugins1 = set(plugins)
    for p1 in plugins1:
        for p2 in plugins1:
            if p1 == p2:
                continue
            common_files = get_common_files(p1.files, p2.files)
            if common_files:
                yield PluginDuplicates(p1.name, p2.name, common_files)


if __name__ == "__main__":
    plugins = read_plugins()
    bad_plugins = set(compare_plugins(plugins))
    for plugin in bad_plugins:
        print(plugin.plugin1, plugin.plugin2, plugin.duplicate_files)
