#!/usr/bin/python2 -s

"""
This script crawls the local copy of the master mirrors (which in our case
is just a nfs mount of the master mirror content). According to what it
finds, it updates the mirrormanager2 database.
It will create new product/version if it finds them and drop directories if
they disappeared.

Basically, it checks the content of the nfs mount, if the directory
contains a lot of files, it takes the 3 most recents ones, stores them in
the DB and they will be used later on to check if the mirrors are up to date.
If the directory does not contain too many files, it will register them all
and thus will check them all on the mirrors.
The threshold is stored in: `short_filelist` and is currently at 10.

If the script finds a yum or atomic repository (finds a repo data or an atomic
summary file), it will create a repository object (cf `make_repository`) which
is basically a mapping between a yum repo name (ie: Fedora-20-updates) and a
directory (/pub/fedora/linux/updates/....)

"""

import glob
import logging
import re
import optparse
import os
import stat
import sys
import yum.repoMDObject
import datetime
import time
import hashlib


sys.path.insert(0, os.path.join(os.path.dirname(
    os.path.abspath(__file__)), '..'))
import mirrormanager2.lib
import mirrormanager2.lib.umdl as umdl


logger = logging.getLogger('umdl')
stdexcludes=['.*\.snapshot', '.*/\.~tmp~']

def cache_directories(category):
    cache = dict()
    topdirName = category.topdir.name
    for directory in list(category.directories):
        relativeDName = umdl.remove_category_topdir(
            topdirName, directory.name).strip(u'/')
        cache[relativeDName] = directory
    return cache


def is_excluded(path, excludes):
    for e in excludes:
        if re.compile(e).match(path):
            return True
    return False


def sync_directories_from_fullfilelist(
    session, config, diskpath, category, excludes=None):

    excludes = excludes or []
    logger.debug("sync_directories_from_fullfilelist %r" % diskpath)
    category.unreadable_dirs = set()
    # drop any trailing slashes from diskpath
    diskpath = diskpath.rstrip(u'/')
    category_directories = {}

    seen = set()
    with open(diskpath + '/fullfilelist') as stream:
        for row in stream:
            row = os.path.dirname(os.path.join(diskpath, row.strip()))
            if os.path.splitext(row)[1]:
                # If there is an extension it's a file not a folder
                continue
            if row in seen:
                continue
            seen.add(row)
            relativeDName = os.path.join(diskpath, row).replace(
                config.get('UMDL_PREFIX', ''), '')
            relativeDName = relativeDName.rstrip('/')
            logger.debug("  walking %r" % relativeDName)
            if is_excluded(relativeDName, stdexcludes + excludes):
                logger.info("excluding %s" % (relativeDName))
                continue

            # avoid disappearing directories
            try:
                s = os.stat(os.path.join(
                    config.get('UMDL_PREFIX', ''), relativeDName))
                ctime = s[stat.ST_CTIME]
            except OSError:
                logger.debug("Avoiding %r, dissappeared." % relativeDName)
                continue

            mode = s.st_mode
            readable = not not (mode & stat.S_IRWXO & (stat.S_IROTH|stat.S_IXOTH))
            if not readable \
                or umdl.parent_dir(relativeDName) in category.unreadable_dirs:
                category.unreadable_dirs.add(relativeDName)

            umdl.sync_category_directory(
                session, config, category, relativeDName, readable, ctime)
            session.commit()


def sync_directories_from_disk(
    session, config, diskpath, category, excludes=None):

    excludes = excludes or []
    logger.debug("sync_directories_from_disk %r" % diskpath)
    category.unreadable_dirs = set()
    # drop any trailing slashes from diskpath
    diskpath = diskpath.rstrip(u'/')
    category_directories = {}

    for dirpath, dirnames, filenames in os.walk(diskpath, topdown=True):
        relativeDName = dirpath.replace(config.get('UMDL_PREFIX', ''), '')
        logger.debug("  walking %r" % relativeDName)

        if is_excluded(relativeDName, stdexcludes + excludes):
            logger.info("excluding %s" % (relativeDName))
            continue

        # avoid disappearing directories
        try:
            s = os.stat(os.path.join(
                config.get('UMDL_PREFIX', ''), relativeDName))
            ctime = s[stat.ST_CTIME]
        except OSError:
            logger.debug("Avoiding %r, dissappeared." % relativeDName)
            continue

        mode = s.st_mode
        readable = not not (mode & stat.S_IRWXO & (stat.S_IROTH|stat.S_IXOTH))
        if not readable \
                or umdl.parent_dir(relativeDName) in category.unreadable_dirs:
            category.unreadable_dirs.add(relativeDName)

        umdl.sync_category_directory(
            session, config, category, relativeDName, readable, ctime)
    session.commit()


def setup_logging(config, options):
    log_dir = config.get('MM_LOG_DIR', None)
    # check if the directory exists
    if log_dir is not None:
        if not os.path.isdir(log_dir):
            # MM_LOG_DIR seems to be configured but does not exist
            # Logging into cwd.
            logger.warning("Directory " + log_dir + " does not exists."
                           " Logging into CWD.")
            log_dir = None

    if log_dir is not None:
        log_file = log_dir + "/" + options.logfile
    else:
        log_file = options.logfile

    fmt = '%(asctime)s %(message)s'
    datefmt = '%m/%d/%Y %I:%M:%S %p'
    formatter = logging.Formatter(fmt=fmt, datefmt=datefmt)
    handler = logging.handlers.WatchedFileHandler(log_file, "a+b")
    handler.setFormatter(formatter)
    logger.addHandler(handler)
    # list_categories is a special case where the user wants to see something
    # on the console and not only in the log file
    if options.debug or options.list_categories:
        sh = logging.StreamHandler()
        sh.setFormatter(formatter)
        logger.addHandler(sh)
        logger.setLevel(logging.DEBUG)
    else:
        logger.setLevel(logging.INFO)


def main():
    parser = optparse.OptionParser(usage=sys.argv[0] + " [options]")
    parser.add_option(
        "-c", "--config",
        dest="config", default='/etc/mirrormanager/mirrormanager2.cfg',
        help="Configuration file to use")
    parser.add_option(
        "--logfile",
        dest="logfile", default='umdl.log',
        metavar="FILE", help="write logs to FILE")
    parser.add_option(
        "--list",
        dest="list_categories", default=False, action="store_true",
        help="list existing categories and exit")
    parser.add_option(
        "--category", metavar="CATEGORY",
        dest="categories", default=None,
        help="only scan category CATEGORY")
    parser.add_option(
        "--debug",
        dest="debug", default=False, action="store_true",
        help="enable debugging")
    parser.add_option(
        "--delete-directories",
        dest="delete_directories",
        default=False,
        action="store_true",
        help="delete directories from the database that are no longer "
            "on disk")
    parser.add_option(
        "--start-at", dest="path", default=None,
        help="Specify the path at which to start the run")

    (options, args) = parser.parse_args()
    config = dict()
    with open(options.config) as config_file:
        exec(compile(config_file.read(), options.config, 'exec'), config)

    session = mirrormanager2.lib.create_session(config['DB_URL'])

    setup_logging(config, options)

    logger.info("Starting umdl")

    if options.list_categories:
        categories = mirrormanager2.lib.get_categories(session)
        for c in categories:
            logger.info(c)
        session.close()
        logger.info("Ending umdl")
        return 0

    umdl.setup_arch_version_cache(session)
    check_categories = []
    if options.categories is None:
        check_categories = config.get('umdl_master_directories')
    else:
        for i in config.get('umdl_master_directories'):
            if i['category'] == options.categories:
                check_categories.append(i)

    for i in check_categories:
        cname = i['category']

        category = mirrormanager2.lib.get_category_by_name(session, cname)
        if not category:
            logger.error(
                'umdl_master_directories Category %s does not exist in the '
                'database, skipping' % (cname))
            continue

        if category.product is None:
            logger.error(
                'umdl_master_directories Category %s has null Product, '
                'skipping' % (cname))
            continue

        category.excludes = i.get('excludes', [])
        category.extra_rsync_options = i.get('options')
        category.directory_cache = cache_directories(category)

        path = i['path']
        if len(check_categories) == 1:
            path = options.path or i['path']

        if i['type'] == 'directory':
            #import cProfile, pstats, StringIO
            #pr = cProfile.Profile()
            #pr.enable()
            sync_directories_from_disk(session, config, path, category)
            #pr.disable()

            #s = StringIO.StringIO()
            #sortby = 'cumulative'
            #ps = pstats.Stats(pr, stream=s).sort_stats(sortby)
            #ps.print_stats(.1)
            #print s.getvalue()
        elif i['type'] == 'fullfilelist':
            #import cProfile, pstats, StringIO
            #pr = cProfile.Profile()
            #pr.enable()
            sync_directories_from_fullfilelist(
                session, config, path, category)
            #pr.disable()

            #s = StringIO.StringIO()
            #sortby = 'cumulative'
            #ps = pstats.Stats(pr, stream=s).sort_stats(sortby)
            #ps.print_stats(.1)
            #print s.getvalue()
        else:
            logger.info('Un-supported type: %s' % i['type'])

    logger.info('Refresh the list of repomd.xml')
    Directory.age_file_details(session, config)

    logger.info("Ending umdl")

    return 0


if __name__ == "__main__":
    sys.exit(main())
