GeneticAlgo/inc/genetic.h

#pragma once

#include <algorithm>
#include <cfloat>
#include <cstdarg>
#include <cstdio>
#include <cstdlib>

#include "util.h"
#include "sync.h"
#include "rand.h"

using namespace sync;

namespace genetic {

template <class T> struct Stats;
template <class T> struct Strategy;
struct CellTracker;


const char *global_stat_format_str = "GLOBAL, Progress %.1f%%, Top: %.5e, Overhead Per: %.4f%%, Gen: %.4f, Overhead: %.4f, Cross: %.4f (s), Mutate: %.4f (s), Fitness: %.4f (s), Sorting: %.4f (s)\n";
const char *thread_stat_format_str = "%d, Progress %d/%d, Top: %.5e, Overhead Per: %.4f%%, Gen: %.4f, Overhead: %.4f, Cross: %.4f (s), Mutate: %.4f (s), Fitness: %.4f (s), Sorting: %.4f (s)\n";

static int stat_str_len = 2*max(strlen(thread_stat_format_str), strlen(global_stat_format_str));
static char *stat_str = (char*)malloc(stat_str_len);
static char *filename = (char*)malloc(64);
static int n_threads = 0;

void log(const char *format_str, ...) {
    va_list list;
    va_start(list, format_str);
    vsprintf_s(stat_str, 2*max(strlen(thread_stat_format_str), strlen(global_stat_format_str)), format_str, list);

    printf("%s", stat_str);

    FILE *f;
    sprintf(filename, "logs/logs-%d.txt", n_threads);
    fopen_s(&f, filename, "a");
    fwrite(stat_str, sizeof(char), strlen(stat_str), f);
    fclose(f);
}

template <class T> T run(Strategy<T>);

template <class T> struct Strategy {
    // Number of worker threads that will be evaluating cell fitness
    int num_threads;

    // Period of print statements (in seconds)
    float stats_print_period_s;

    // Size of the population pool per sim thread
    int num_cells_per_thread;

    // Number of times (epochs) to run the algorithm
    int num_generations;

    // Each thread will integrate the best globally performing cell
    bool share_breakthroughs;

    // How many generations to explore before resyncing with the global best
    int share_breakthrough_gen_period;

    bool test_all; // Sets whether or not every cell's fitness is evaluated every
                   // generation
    float test_chance; // Chance to test any given cell's fitness. Relevant only
                       // if test_all is false.
    bool enable_crossover; // Cells that score well in the evaluation stage
                           // produce children that replace low-scoring cells
    int crossover_parent_num;        // Number of unique high-scoring parents in a
                                     // crossover call.
    int crossover_parent_stride; // Number of parents to skip over when moving to
                                 // the next set of parents. A stride of 1 would
                                 // produce maximum overlap because the set of
                                 // parents would only change by one every
                                 // crossover.
    int crossover_children_num;  // Number of children to expect the user to
                                 // produce in the crossover function.
    bool enable_mutation;          // Cells may be mutated
                                   // before fitness evaluation
    float mutation_chance; // Chance for any given cell to be mutated cells during
                           // the mutation
    uint64_t rand_seed;
    bool higher_fitness_is_better; // Sets whether or not to consider higher
                                   // fitness values better or worse. Set this to
                                   // false if fitness is an error function.

    // User defined functions
    T (*make_default_cell)();
    void (*mutate)(T &cell_to_modify);
    void (*crossover)(const Array<T *> parents, const Array<T *> out_children);
    float (*fitness)(const T &cell);
};

template<class T> struct Stats {
    DynArray<T> best_cells;
    DynArray<float> best_cell_fitness;
    int gen;
    bool done;
    DynArray<TimeSpan> gen_time;
    DynArray<TimeSpan> crossover_time;
    DynArray<TimeSpan> mutate_time;
    DynArray<TimeSpan> fitness_time;
    DynArray<TimeSpan> sorting_time;
    Mutex m;
};

struct CellTracker {
    float score;
    int cellid;
};

template<class T>
struct WorkerThreadArgs {
    Strategy<T> strat;
    Array<T> cells;
    Array<CellTracker> trackers;
    Stats<T> *stats;

    Mutex m;
    float *best_global_score;
    T* best_global_cell;
};

template<class T> T* _cellp(Array<T> cells, CellTracker tracker) { return &cells[tracker.cellid]; }

template <class T> DWORD worker(LPVOID args) {
    // Unpack everything...
    WorkerThreadArgs<T>* worker_args = static_cast<WorkerThreadArgs<T>*>(args);
    Strategy<T> strat = worker_args->strat;
    Array<T> cells = worker_args->cells;
    Array<CellTracker> trackers = worker_args->trackers;
    Stats<T> &stats = *worker_args->stats;
    float* best_global_score = worker_args->best_global_score;
    T* best_global_cell = worker_args->best_global_cell;
    Mutex best_m = worker_args->m;

    // Prepare crossover operations as these will be the same every time except
    // for the exact cell pointers
    int npar = strat.crossover_parent_num;
    int nchild = strat.crossover_children_num;
    Array<T*> parents = make_array<T*>(npar);
    Array<T*> children = make_array<T*>(nchild);

    bool gt = strat.higher_fitness_is_better; // Writing strat.higher... is annoying

    TimeSpan start, diff, gen_start;
    while(stats.gen < strat.num_generations) {
	gen_start = now();

	// 0. Share/Integrate global breakthrough
	if (strat.share_breakthroughs && (stats.gen + get_affinity()) % strat.share_breakthrough_gen_period) {
	    lock(best_m);
	    if (better(gt, front(trackers).score, *best_global_score) != *best_global_score) {
		// Share
		*best_global_cell = *_cellp(cells, trackers[0]);
		*best_global_score = trackers[0].score;

	    } else {
		// Integrate
		*_cellp(cells, trackers[0]) = *best_global_cell;
		trackers[0].score = *best_global_score;
	    }
	    unlock(best_m);
	}

	// 1. crossover
	start = now();
	if (strat.enable_crossover) {
	    int parent_end = npar;
	    int child_begin = trackers.len-nchild;
	    while (parent_end <= child_begin) {

		// Get pointers to all the parent cells
		for (int i = parent_end-npar; i < parent_end; i++) {
		    T* cell = _cellp(cells, trackers[i]);
		    assert(cell != NULL);
		    parents[i - (parent_end-npar)] = cell;
		}

		// Get pointers to all the child cells (these will be overwritten)
		for (int i = child_begin; i < child_begin+nchild; i++) {
		    T* cell = _cellp(cells, trackers[i]);
		    assert(cell != NULL);
		    children[i-child_begin] = cell;
		}
		strat.crossover(parents, children);
		parent_end += strat.crossover_parent_stride;
		child_begin -= nchild;
	    }
	}
	lock(stats.m);
	append(stats.crossover_time, now() - start);
	unlock(stats.m);


	// 2. mutate
	start = now();
	for (int i = 0; i < trackers.len; i++) {
	    if (abs(norm_rand(strat.rand_seed)) < strat.mutation_chance) {
		strat.mutate(cells[trackers[i].cellid]);
	    }
	}
	lock(stats.m);
	append(stats.mutate_time, now() - start);
	unlock(stats.m);

	// 3. evaluate
	start = now();
	if (strat.test_all) {
	    for (int i = 0; i < trackers.len; i++) {
		trackers[i].score = strat.fitness(cells[trackers[i].cellid]);
	    }
	} else {
	    for (int i = 0; i < trackers.len; i++) {
		if (abs(norm_rand(strat.rand_seed)) < strat.test_chance) {
		    trackers[i].score = strat.fitness(cells[trackers[i].cellid]);
		}
	    }
	}
	lock(stats.m);
	append(stats.fitness_time, now() - start);
	unlock(stats.m);

	auto comp = [strat](CellTracker &a, CellTracker &b){
	    return strat.higher_fitness_is_better ? (a.score > b.score) : (a.score < b.score);
	};

	// 4. sort
	start = now();
	std::sort(&trackers[0], &trackers[trackers.len-1], comp);
	lock(stats.m);
	append(stats.sorting_time, now() - start);

	append(stats.best_cells, cells[trackers[0].cellid]);
	append(stats.best_cell_fitness, trackers[0].score);
	append(stats.gen_time, now() - gen_start);
	stats.gen++;
	unlock(stats.m);
    }
    stats.done = true;
    return 0;
}

template <class T> T run(Strategy<T> strat) {
    Array<Stats<T>> stats = make_array<Stats<T>>(strat.num_threads);
    Array<Thread> threads = make_array<Thread>(strat.num_threads);
    Array<WorkerThreadArgs<T>> args = make_array<WorkerThreadArgs<T>>(strat.num_threads);

    float best_global_score = strat.higher_fitness_is_better ? FLT_MIN : FLT_MAX;
    T best_global_cell;

    allow_all_processors();
    set_affinity(0);

    for (int i = 0; i < strat.num_threads; i++) {
	stats[i] = {
	    .best_cells=make_dynarray<T>(strat.num_generations),
	    .best_cell_fitness=make_dynarray<float>(strat.num_generations),
	    .gen_time=make_dynarray<TimeSpan>(strat.num_generations),
	    .crossover_time=make_dynarray<TimeSpan>(strat.num_generations),
	    .mutate_time=make_dynarray<TimeSpan>(strat.num_generations),
	    .fitness_time=make_dynarray<TimeSpan>(strat.num_generations),
	    .sorting_time=make_dynarray<TimeSpan>(strat.num_generations),
	    .m=make_mutex()
	};
	Array<T> cells = make_array<T>(strat.num_threads*strat.num_cells_per_thread);
	Array<CellTracker> trackers = make_array<CellTracker>(strat.num_cells_per_thread);
	for (int i = 0; i < strat.num_cells_per_thread; i++) {
	    cells[i] = strat.make_default_cell();
	    trackers[i] = {0, i};
	}

	args[i].strat=strat;
	args[i].cells=cells;
	args[i].trackers=trackers;
	args[i].stats=&stats[i];
	args[i].best_global_score=&best_global_score;
	args[i].best_global_cell=&best_global_cell;
	args[i].m = make_mutex();

	threads[i] = make_thread(worker<T>, &args[i], i+1);
    }


    // We are the stats thread
    bool complete = false;
    while (!complete) {
	sleep(from_s(strat.stats_print_period_s));

	log("**********************\n");
	float g_avg_gen_time = 0;
	float g_avg_crossover_time = 0;
	float g_avg_mutate_time = 0;
	float g_avg_fitness_time = 0;
	float g_avg_sorting_time = 0;
	float g_avg_overhead_time = 0;
	float g_progress_per = 0;
	float g_best_fitness = strat.higher_fitness_is_better ? FLT_MIN : FLT_MAX;

	complete = true;


	for (int i = 0; i < stats.len; i++) {
	    lock(stats[i].m);
	    complete &= stats[i].done;

	    int end = stats[i].gen_time.end-1;

	    float gen_time = to_s(stats[i].gen_time[end]);
	    float crossover_time = to_s(stats[i].crossover_time[end]);
	    float mutate_time = to_s(stats[i].mutate_time[end]);
	    float fitness_time = to_s(stats[i].fitness_time[end]);
	    float sorting_time = to_s(stats[i].sorting_time[end]);
	    float progress_per = static_cast<float>(stats[i].gen) / static_cast<float>(strat.num_generations) * 100;
	    float best_score = back(stats[i].best_cell_fitness);

	    float overhead = max(0, gen_time - (crossover_time + mutate_time + fitness_time + sorting_time));

	    float overhead_per = overhead / gen_time * 100;

	    g_avg_gen_time += gen_time;
	    g_avg_crossover_time += crossover_time;
	    g_avg_mutate_time += mutate_time;
	    g_avg_fitness_time += fitness_time;
	    g_avg_sorting_time += sorting_time;
	    g_progress_per += progress_per;
	    g_best_fitness = better(strat.higher_fitness_is_better, best_score, g_best_fitness);

	    g_avg_overhead_time += overhead;

	    log(thread_stat_format_str, i, stats[i].gen, strat.num_generations, best_score, overhead_per, gen_time, overhead, crossover_time, mutate_time, fitness_time, sorting_time);

	    unlock(stats[i].m);
	}

	g_avg_gen_time       /= stats.len;
	g_avg_crossover_time /= stats.len;
	g_avg_mutate_time    /= stats.len;
	g_avg_fitness_time   /= stats.len;
	g_avg_sorting_time   /= stats.len;
	g_progress_per       /= stats.len;

	g_avg_overhead_time  /= stats.len;

	float g_avg_overhead_per = g_avg_overhead_time / g_avg_gen_time * 100;

	log(global_stat_format_str, g_progress_per, g_best_fitness, g_avg_overhead_per, g_avg_gen_time, g_avg_overhead_time, g_avg_crossover_time, g_avg_mutate_time, g_avg_fitness_time, g_avg_sorting_time);

	if (complete) break;
    }

    for (int i = 0; i < threads.len; i++) {
	join(threads[i]);
    }

    T best_cell;
    // TODO: bad
    float best_score = strat.higher_fitness_is_better ? FLT_MIN : FLT_MAX;
    for (int i = 0; i < stats.len; i++) {
	float score = back(stats[i].best_cell_fitness);
	if (strat.higher_fitness_is_better ? score > best_score : score < best_score) {
	    best_cell = back(stats[i].best_cells);
	    best_score = score;
	}
    }

    return best_cell;
}

} // namespace genetic