#!/usr/bin/env python
#
# Copyright (C) 2013 EPITA Research and Development Laboratory (LRDE)
#
# This script is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, version 2 of the License.
#
# It is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
# License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Olena.  If not, see <http://www.gnu.org/licenses/>.


import sys
import os
import urllib2
import hashlib
import zipfile
import subprocess
import shlex
import argparse
import logging


class Config:
    def __init__(self):
        self.base_dir = os.getcwd() + "/"

    def set_dataset_version(self, dataset_version):
        self.dataset_version = dataset_version
        self.dl_dir = self.base_dir + dataset_version + "/download"
        self.src_dir = self.base_dir + dataset_version + "/src"
        self.oln_src_dir = self.src_dir + "/olena"
        self.data_dir = self.base_dir + dataset_version + "/data"
        self.bench_dir = self.base_dir + dataset_version + "/bench"
        self.bin_dir = self.base_dir + dataset_version + "/bench/bin"
        self.build_dir = self.base_dir + dataset_version + "/build"
        self.oln_build_dir = self.base_dir + dataset_version + "/build/olena"
        self.dataset_dir = self.base_dir + dataset_version + "/"
        self.dataset_url = self.input_baseurl + dataset_version + "/"

    input_baseurl = "http://www.lrde.epita.fr/dload/olena/datasets/dbd/"
    setup_version = "1.0"
    supported_impl_bin = [ "sauvola", "sauvola_ms", "wolf", "otsu", "niblack", "kim" ]
    root_dirs = [ "download", "data", "bench", "build", "src", "bench/bin" ]


#------------------------------------
def setup_logger():
    # Make a global logging object.
    log = logging.getLogger("logfun")
    log.setLevel(logging.DEBUG)

    # This handler writes everything to a file.
    h1 = logging.FileHandler("./setup.log", 'w')
    f = logging.Formatter("%(message)s")
    h1.setFormatter(f)
    h1.setLevel(logging.DEBUG)
    log.addHandler(h1)

    # Also output on stdout
    h2 = logging.StreamHandler(sys.stdout)
    log.addHandler(h2)

    return log

# The logger is global.
log = setup_logger()


#------------------------------------
# Perform a download and display a progressbar.
def download(url, dl_dir, desc=""):
    from urllib2 import HTTPError, URLError, urlopen
    try:
        file_name = url.split('/')[-1]
        u = urllib2.urlopen(url)
        f = open(dl_dir + "/" + file_name, 'wb')
        meta = u.info()
        file_size = int(meta.getheaders("Content-Length")[0])
        mb_ratio = 1048576
        if desc:
            print "Downloading: %s (%sMB)" % (desc, file_size / mb_ratio)
        else:
            print "Downloading: %s (%sMB)" % (file_name, file_size / mb_ratio)

        file_size_dl = 0
        block_sz = 8192
        while True:
            buffer = u.read(block_sz)
            if not buffer:
                break

            file_size_dl += len(buffer)
            f.write(buffer)
            status = r"%10dMB  [%3.2f%%]" % (file_size_dl / mb_ratio, file_size_dl * 100. / file_size)
            status = status + chr(8)*(len(status)+1)
            print status,

        f.close()
        print "\r"

    except HTTPError, e:
        log.exception("HTTP Error:", e.code, url)
        exit(1)
    except URLError, e:
        log.exception("URL Error:", e.reason, url)
        exit(1)


#------------------------------------
# Download tools .
def download_files(conf, name, filelist, uncompressed_file, subdir=""):
    log.debug("* Downloading " + name + " ...")

    # Create download directory
    if not os.path.exists(conf.dl_dir):
        os.makedirs(conf.dl_dir)

    # Get filelist and checksums
    files = []
    download(conf.dataset_url + "/" + filelist,
             conf.dl_dir, "file list and checksums")
    with open(conf.dl_dir + "/" + filelist) as f:
        content = f.readlines()
        for line in content:
            files.append(line.split())

    # Download file only if it does not exist or its sha1 is not correct.
    for file in files:
        if not os.path.exists(conf.dl_dir + "/" + file[1]) or (os.path.exists(conf.dl_dir + "/" + file[1]) and hashlib.sha1(open(conf.dl_dir + "/" + file[1], 'rb').read()).hexdigest() != file[0]):
            download(conf.dataset_url + "/" + subdir + "/" + file[1], conf.dl_dir)

            # Handle invalid checksum
            if hashlib.sha1(open(conf.dl_dir + "/" + file[1], 'rb').read()).hexdigest() != file[0]:
                log.critical("Checksum error: " + file[1] + " has not been correctly downloaded")
                exit(1)

            # Mark That archives must be cleaned up and uncompressed
            # again.
            try:
                os.remove(conf.dl_dir + "/" + uncompressed_file)
            except:
                pass
        else:
            log.debug(file[1] + " is already up-to-date.")

    log.debug("Done. " + name + " downloaded")


#------------------------------------
# Check current dataset version.
def check_latest_dataset_version(conf):
    from urllib2 import HTTPError, URLError, urlopen
    try:
        data = urlopen(conf.input_baseurl + "/VERSION")
        version = data.read().rstrip('\r\n')
    except HTTPError, e:
        log.exception("HTTP Error:", e.code, "Cannot check latest version! Check your connectivity!")
        exit(1)
    except URLError, e:
        log.exception("URL Error:", e.reason, "Cannot check latest version! Contact olena@lrde.epita.fr!")
        exit(1)

    log.debug("Latest dataset version is " + version)
    conf.set_dataset_version(version)


#------------------------------------
# Check current dataset version.
def check_current_setup_version(conf, update = False):
    from urllib2 import HTTPError, URLError, urlopen
    try:
        data = urlopen(conf.input_baseurl + "/SETUP_VERSION")
        version = data.read().rstrip('\r\n')
    except HTTPError, e:
        log.exception("HTTP Error:", e.code, "Cannot check current version! Check your connectivity!")
        exit(1)
    except URLError, e:
        log.exception("URL Error:", e.reason, "Cannot check current version! Contact olena@lrde.epita.fr!")
        exit(1)

    if version != conf.setup_version:
        if update:
            download(conf.input_baseurl + "/setup.py", "/tmp", "Script update")
            # Save current script and move new one to final destination.
            bkp_file = os.path.splitext(sys.argv[0])[0] + "-" + conf.setup_version + ".py"
            shutil.move(conf.base_dir + "/" + sys.argv[0], conf.base_dir + "/" + bkp_file)
            shutil.move("/tmp/setup.py", conf.base_dir)
            log.debug("This script has been successfully updated. The old one was saved as " + bkp_file)
            exit(0)
        else:
            log.debug("A new setup script is available! You have version " + conf.setup_version + " and current version is " + version)
            log.debug("Run this script again with --selfupdate option when you are ready to update.")
            exit(1)
    else:
        log.debug("Current setup version: " + conf.setup_version + ". This setup script is up-to-date.")


#------------------------------------
def uncompress_bench_tools(conf):
    log.debug("* Uncompressing benchmark tools...")
    # Check if we need to uncompress for the first time or uncompress
    # again after the archive were updated.
    if not os.path.exists(conf.dl_dir + "/uncompressed.tools.done"):

        # FIXME : Write Cleanup of possible extracted data

        for file in os.listdir(conf.dl_dir):
            if file.endswith(".zip") and file.startswith("lrde-dbd-tools"):
                log.debug("Uncompressing " + file)
                with zipfile.ZipFile(conf.dl_dir + "/" + file, 'r') as fzip:
                    fzip.extractall(conf.dataset_dir)

        # Remember that it has already been uncompressed.
        open(conf.dl_dir + "/uncompressed.tools.done", "w+")
        log.debug("Uncompress done.")
    else:
        log.debug("Data already uncompressed.")


#------------------------------------
def uncompress_input_data(conf):
    log.debug("* Uncompressing input data...")
    # Check if we need to uncompress for the first time or uncompress
    # again after the archive were updated.
    if not os.path.exists(conf.dl_dir + "/uncompressed.inputs.done"):
        # Cleanup possible extracted data
        try:
            os.removedirs(conf.data_dir)
        except:
            pass

        for file in os.listdir(conf.dl_dir):
            if file.endswith(".zip") and file.startswith("nouvel_obs_2402_"):
                log.debug("Uncompressing " + file)
                with zipfile.ZipFile(conf.dl_dir + "/" + file, 'r') as fzip:
                    fzip.extractall(conf.data_dir)

        # Remember that it has already been uncompressed.
        open(conf.dl_dir + "/uncompressed.inputs.done", "w+")
        log.debug("Uncompress done.")
    else:
        log.debug("Data already uncompressed.")


#------------------------------------
def uncompress_output_data(conf):
    log.debug("* Uncompressing output data...")
    # Check if we need to uncompress for the first time or uncompress
    # again after the archive were updated.
    if not os.path.exists(conf.dl_dir + "/uncompressed.outputs.done"):
        # Cleanup possible extracted data
        try:
            os.removedirs(conf.bench_dir + "/output" )
            os.makedirs(conf.bench_dir + "/output")
        except:
            pass

        for file in os.listdir(conf.dl_dir):
            if file.endswith(".zip") and file.startswith("bin_"):
                log.debug("Uncompressing " + file)
                with zipfile.ZipFile(conf.dl_dir + "/" + file, 'r') as fzip:
                    fzip.extractall(conf.bench_dir + "/output")

        # Remember that it has already been uncompressed.
        open(conf.dl_dir + "/uncompressed.outputs.done", "w+")
        log.debug("Uncompress output data done.")
    else:
        log.debug("Output data already uncompressed.")


#------------------------------------
# Prepare the bench directory w.r.t. the downloaded files.
def setup_bench_directory(conf):
    log.debug("* Setting up benchmark directory")

    # Create directory
    if not os.path.exists(conf.bench_dir):
        os.makedirs(conf.bench_dir)

    # Add links to data
    if os.path.lexists(conf.bench_dir + "/input"):
        os.remove(conf.bench_dir + "/input")
    os.symlink(conf.data_dir + "/input", conf.bench_dir + "/input")
    if os.path.lexists(conf.bench_dir + "/gt"):
        os.remove(conf.bench_dir + "/gt")
    os.symlink(conf.data_dir + "/gt", conf.bench_dir + "/gt")

    log.debug("Done. Benchmark directory is ready.")


#------------------------------------
def check_and_install_dependencies(conf):
    debian_system = os.path.exists("/usr/bin/apt-get")

    # Get dependency list
    deps = []
    download(conf.dataset_url + "/DEPS", conf.dl_dir, "Dependency list.")
    with open(conf.dl_dir + "/DEPS") as f:
        content = f.readlines()
        for line in content:
            deps.append(line.split())

    # Check
    if not debian_system:
        log.debug("You are not running a Debian-based system. We cannot install the required dependencies for you. Please install the following dependencies before continuing:")
        log.debug(deps_list)
        log.debug("If you do think that everything is installed, then relaunch this script with option '--no-deps-check'")

    # On Debian-based systems, we use APT to install packages.
    else:
        error = False
        apt_cmd = "sudo apt-get install"
        for dep in deps:
            # This package requires a specific version.
            if len(dep) == 2:
                # Checking available versions available through APT.
                try:
                    output = subprocess.check_output(["/usr/bin/apt-cache", "madison", dep[0]])
                except subprocess.CalledProcessError:
                    exit(1)

                found = False
                for package in output.splitlines():
                    version  = package.split('|')[1].strip()
                    numbers = version.split('.')
                    min_numbers = dep[1].split('.')

                    # The minimum version of the required package is
                    # found.
                    if (numbers[0] >= min_numbers[0]
                        and numbers[1] >= min_numbers[1]):
                        apt_cmd += " " + dep[0] + "=" + version
                        found = True
                        break

                if not found:
                    log.critical("ERROR: The required dependency " + dep[0] + " cannot be installed: version " + version + "found but " + dep[1] + " required.")
                    error = True

            # No specific version required
            else:
                apt_cmd += " " + dep[0]

        log.debug("* Installing missing dependencies through APT. Administrator privileges are needed.")
        try:
            rval = subprocess.call(shlex.split(apt_cmd))
        except subprocess.CalledProcessError:
            exit(1)

        if error or rval != 0:
            log.critical("ERROR: all dependencies could not be installed. Please see the output. You must install these dependencies by hand and relaunch this script with --no-deps-check.")
            exit(1)
        else:
            log.debug("Done. Missing dependencies installed.")


#------------------------------------
def setup_olena(conf):
    log.debug("* Setting up Olena...")

    patch_file = conf.src_dir + "/OLENA.PATCH"

    # 1/ Download and configure source directory.  Apply a specific patch
    # to avoid unnecessary dependencies on build tools.

    # The directory does not exist, we need to clone it from scratch.
    if not os.path.exists(conf.oln_src_dir):
        try:
            log.debug("Cloning Git repository")
            os.chdir(conf.src_dir)
            subprocess.call(shlex.split("git clone git://git.lrde.epita.fr/olena --single-branch -b papers/lazzara.13.ijdar"))

            os.chdir(conf.oln_src_dir)

            # Get patch and apply it if it is valid.
            download(conf.dataset_url + "/OLENA.PATCH", conf.src_dir)
            if os.path.getsize(patch_file):
                subprocess.call(shlex.split("git apply " + patch_file))

            # Bootstrap source directory
            subprocess.call(["./bootstrap"])

        except subprocess.CalledProcessError:
            log.exception("Cannot clone or boostrap Olena source directory.")
            exit(1)

    # The directory exists and we just need to check if the repository
    # has been updated.
    else:
        try:
            os.chdir(conf.oln_src_dir)
            # Revert possible changes.
            subprocess.call(shlex.split("git reset --hard"))

            # Update repository and patches
            subprocess.call(shlex.split("git pull --rebase origin papers/lazzara.13.ijdar"))
            download(conf.dataset_url + "/OLENA.PATCH", conf.src_dir)
            if os.path.getsize(patch_file):
                subprocess.call(shlex.split("git apply " + patch_file))

            # Bootstrap source directory
            subprocess.call(["./bootstrap"])

        except subprocess.CalledProcessError:
            log.exception("Olena directory could not be upgraded correctly. Try to remove " + conf.oln_src_dir + " directory and relaunch this script.")
            exit(1)

    # 2/ Configure and Build binarization implementations.
    try:
        # Create build directory if needed
        if not os.path.exists(conf.oln_build_dir):
            os.makedirs(conf.oln_build_dir)

        # Configure Olena
        log.debug("Configuring Olena...")
        os.chdir(conf.oln_build_dir)
        subprocess.call([conf.oln_src_dir + "/configure", "--enable-scribo", "CXX=g++-4.6"])

        # FIXME: check configuration

        # Build binarization implementations
        log.debug("Building binarization implementations")
        bin_build_dir = conf.oln_build_dir + "/scribo/src/binarization"
        os.chdir(bin_build_dir)
        for impl in conf.supported_impl_bin:
            if subprocess.call(["make", impl]):
                raise subprocess.CalledProcessError('', 'make')

        # Linking binaries to bin/ directory
        if not os.path.exists(conf.bin_dir):
            os.makedirs(conf.bin_dir)
        os.chdir(conf.bin_dir)
        for impl in conf.supported_impl_bin:
            if os.path.lexists(conf.bin_dir + "/" + impl):
                os.remove(conf.bin_dir + "/" + impl)

            os.symlink(bin_build_dir + "/" + impl, conf.bin_dir + "/" + impl)

    except subprocess.CalledProcessError:
        log.exception("There was an error while building Olena. Maybe a dependency issue. Please contact olena@lrde.epita.fr with the script output.")
        exit(1)

    # Return to base directory.
    os.chdir(conf.base_dir)


#------------------------------------
def setup_bench_tools(conf):
    log.debug("* Setting up benchmark tools...")
    for file in os.listdir(conf.src_dir):
        if file.endswith(".cc"):
            filename = file.split('.')[0]
            bin_file = conf.build_dir + "/" + filename
            try:
                # Build standalone .cc tools
                log.debug("Compiling " + filename)
                subprocess.call(["g++-4.6", "-DNDEBUG", "-O3", conf.src_dir + "/" + file, "-I" + conf.oln_build_dir + "/milena", "-I" + conf.oln_src_dir + "/milena", "-o", bin_file, "-lGraphicsMagick++"])
            except subprocess.CalledProcessError:
                log.exception("ERROR: Cannot compile " + file)
                exit(1)
            # Linking binaries to bin/ directory
            if os.path.lexists(conf.bin_dir + "/" + filename):
                os.remove(conf.bin_dir + "/" + filename)
            os.symlink(conf.build_dir + "/" + filename, conf.bin_dir + "/" + filename)
        # Build Qt-based applications
        elif os.path.isdir(conf.src_dir + "/" + file) and file != "olena":
            if not os.path.lexists(conf.build_dir + "/" + file):
                os.makedirs(conf.build_dir + "/" + file)
            os.chdir(conf.build_dir + "/" + file)
            try:
                log.debug("Compiling " + file)
                subprocess.call(["qmake-qt4", conf.src_dir + "/" + file + "/" + file + ".pro", "QMAKE_CXX=g++-4.6"])
                subprocess.call(["make"])
            except subprocess.CalledProcessError:
                log.exception("ERROR: Cannot compile " + file)
                exit(1)

            # Linking binaries to bin/ directory
            if os.path.lexists(conf.bin_dir + "/" + file):
                os.remove(conf.bin_dir + "/" + file)

            os.symlink(conf.build_dir + "/" + file + "/" + file, conf.bin_dir + "/" + file)
            os.chdir(conf.base_dir)


#------------------------------------
def parse_options(conf):
    parser = argparse.ArgumentParser(description='Installation script for LRDE Document Binarization Dataset.',
                                     epilog='Copyright (C) 2013 EPITA Research and Development Laboratory (LRDE) http://olena.lrde.epita.fr | Contact: olena@lrde.epita.fr',
                                     add_help=True)
    group = parser.add_mutually_exclusive_group()
    group.add_argument('--prefix', metavar='<prefix>',
                       help='Set the target directory where the dataset will be installed. By default, this is in the current directory.')
    group.add_argument('--use-version', metavar='<version>',
                       help='Set a version of the Dataset to be setup. By default, this is the latest.')
    parser.add_argument('--no-deps-check', action='store_true',
                        help='Disable dependency checks. This is useful if dependencies are installed by hand.')
    group.add_argument('--selfupdate', action='store_true',
                       help='Download and update this setup script if a new version is available.')
    group.add_argument('--check-dataset-update', action='store_true',
                       help='Check if a new version of the dataset is available.')
    parser.add_argument('--version', action='version', version='%(prog)s ' + conf.setup_version + " - Copyright LRDE 2013")

    parser.parse_args(namespace = conf)


#------------------------------------
def handle_options(conf):
    if conf.selfupdate:
        check_current_setup_version(conf, True)
        exit(0)
    if conf.check_dataset_update:
        check_latest_dataset_version(conf)
        exit(0)
    if conf.prefix:
        conf.base_dir = os.path.abspath(conf.prefix) + "/"


#------------------------------------
def prepare_dataset_directory(conf):
    if conf.use_version:
        log.debug("* Forcing setup of dataset version " + conf.use_version + " as required by the user.")
        conf.set_dataset_version(conf.use_version)
    else:
        log.debug("* Setting up dataset version " + conf.dataset_version)

    # Creating dataset directory
    if not os.path.exists(conf.base_dir + "/" + conf.dataset_version):
        os.makedirs(conf.base_dir + "/" + conf.dataset_version)

    # Creating root directories
    for dir in conf.root_dirs:
        if not os.path.exists(conf.dataset_dir + "/" + dir):
            os.makedirs(conf.dataset_dir + "/" + dir)


#------------------------------------
def setup_is_finished(conf):
    log.debug("------------------------------------------")
    log.debug("Data and benchmark tools are ready to use!")


#------------------------------------
def main():
    conf = Config()

    # Parse user options.
    parse_options(conf)
    handle_options(conf)

    # Check versions of tools and data.
    check_current_setup_version(conf)
    check_latest_dataset_version(conf)

    prepare_dataset_directory(conf)

    # Check and install required dependencies
    if (conf.no_deps_check):
        log.debug("Skipping dependency check as required by the user.")
    else:
        check_and_install_dependencies(conf)

    # Download data.
    download_files(conf, "input data", "INPUTS.CHECKSUM", "uncompressed.inputs.done")
    download_files(conf, "output data", "OUTPUTS.CHECKSUM", "uncompressed.outputs.done", "outputs")
    download_files(conf, "benchmark tools", "TOOLS.CHECKSUM", "uncompressed.tools.done")

    # Uncompress data.
    uncompress_bench_tools(conf)
    uncompress_input_data(conf)
    uncompress_output_data(conf)

    # Setup Olena and build binarization algorithms
    setup_olena(conf)
    setup_bench_tools(conf)

    # Setup bench directory.
    setup_bench_directory(conf)

    setup_is_finished(conf)


# Start setup
main()