#!/usr/bin/env python
import re
import argparse
import configparser
import sys
import signal

import minio

re_filename_filter = re.compile(r'^(\S+\.narinfo|nar\/\S+\.nar\.\S+)$')
re_narinfo_line = re.compile(r'^(\S+): (.*)$')
re_nix_path_hash = re.compile(r'^([0123456789abcdfghijklmnpqrsvwxyz]+)-.+$')

def parse_narinfo(data):
    lines = data.decode('utf-8').split('\n')
    vals = {'_NarInfoSize': len(data)}
    for line in lines:
        m = re_narinfo_line.match(line)
        if not m:
            continue

        key = m.group(1)
        val = m.group(2)

        if key == 'References':
            if not val:
                val = set()
            else:
                refs = val.split(' ')
                val = set()
                for ref in refs:
                    m = re_nix_path_hash.match(ref)
                    assert m
                    val.add(f'{m.group(1)}.narinfo')
        elif key in ('FileSize', 'NarSize'):
            val = int(val)

        vals[key] = val
    return vals

def log(message):
    print(message, file=sys.stderr)
    sys.stderr.flush()

def main():
    def sig_handler(signum, frame):
        sys.exit(0)
    signal.signal(signal.SIGTERM, sig_handler)

    parser = argparse.ArgumentParser(description='"Garbage collect" S3-based Nix cache')
    parser.add_argument('-c', '--config', required=True, action='append', help='config file')
    parser.add_argument('-d', '--dry-run', action='store_true', help="don't actually delete anything")
    parser.add_argument('-v', '--verbose', action='store_true', help="log extra info")

    args = parser.parse_args()

    def verbose(message):
        if args.verbose:
            log(message)

    config = configparser.ConfigParser()
    config.read(args.config)

    gc_thresh = config.getint('gc', 'threshold')*1024*1024
    gc_stop = config.getint('gc', 'stop')*1024*1024
    assert gc_stop < gc_thresh

    s3_special = {'endpoint', 'bucket'}
    s3_ext = dict(filter(lambda i: i[0] not in s3_special, config.items('s3')))
    mio = minio.Minio(config.get('s3', 'endpoint'), **s3_ext)

    bucket = config.get('s3', 'bucket')
    objs = list(filter(lambda o: re_filename_filter.match(o.object_name), mio.list_objects(bucket, recursive=True)))

    total_size = sum(map(lambda o: o.size, objs))
    if total_size < gc_thresh:
        log(f'Cache is only {total_size/1024/1024}MiB, not bothering')
        return
    log(f'Cache is {total_size/1024/1024}MiB, collecting garbage')

    oldest = sorted(objs, key=lambda o: o.last_modified)
    free_size = 0
    i = 0
    to_delete = []
    while free_size < total_size - gc_stop:
        obj = oldest[i]
        free_size += obj.size
        to_delete.append(obj.object_name)
        verbose(f'Deleting {obj.object_name}')
        verbose(f'Up to {free_size/1024/1024}MiB')
        i += 1

    log(f'About to delete {len(to_delete)} NARs / narinfos, total size {free_size/1024/1024}MiB')
    if args.dry_run:
        return

    delete_objs = [minio.deleteobjects.DeleteObject(name) for name in to_delete]
    errors = mio.remove_objects(bucket, delete_objs)
    for err in errors:
        log(f'Error while deleting: {err}')
        sys.exit(1)

    # TODO: Make this smart?
    #narinfos = sorted(filter(lambda o: o.object_name.endswith('.narinfo'), objs), key=lambda o: o.last_modified)
    #narinfos_map = {}
    #def narinfo(name):
    #    if name not in narinfos_map:
    #        try:
    #            resp = mio.get_object(bucket, name)
    #            info = parse_narinfo(resp.read())
    #            narinfos_map[name] = info
    #        finally:
    #            resp.close()
    #            resp.release_conn()
    #    return narinfos_map[name]

    #free_size = 0
    #to_delete = set()
    #to_delete_nars = []
    #def traverse_narinfo(name):
    #    if name in to_delete:
    #        return 0

    #    info = narinfo(name)
    #    verbose(f"Going to delete {name} ({info['URL']}; {info['StorePath']})")
    #    to_delete_nars.append(info['URL'])
    #    size = info['_NarInfoSize'] + info['FileSize']
    #    for ref in info['References']:
    #        if ref == name:
    #            continue
    #        size += traverse_narinfo(ref)

    #    to_delete.add(name)
    #    return size

    #i = 0
    #while free_size < total_size - gc_stop:
    #    obj = narinfos[i]
    #    free_size += traverse_narinfo(obj.object_name)
    #    verbose(f'Up to {free_size/1024/1024}MiB')
    #    i += 1

    #assert len(to_delete_nars) == len(to_delete)
    #log(f'About to delete {len(to_delete)} NARs (and associated narinfos), total size {free_size/1024/1024}MiB')

if __name__ == '__main__':
    main()