#pragma once #include #include #include #include "util.h" #include "sync.h" #include "rand.h" using namespace sync; using namespace std; namespace genetic { template struct Stats; template struct Strategy; struct CellTracker; template T run(Strategy); template struct Strategy { // Number of worker threads that will be evaluating cell fitness int num_threads; // Period of print statements (in seconds) float stats_print_period_s; // Size of the population pool per sim thread int num_cells_per_thread; // Number of times (epochs) to run the algorithm int num_generations; // Each thread will integrate the best globally performing cell bool share_breakthroughs; // How many generations to explore before resyncing with the global best int share_breakthrough_gen_period; bool test_all; // Sets whether or not every cell's fitness is evaluated every // generation float test_chance; // Chance to test any given cell's fitness. Relevant only // if test_all is false. bool enable_crossover; // Cells that score well in the evaluation stage // produce children that replace low-scoring cells int crossover_parent_num; // Number of unique high-scoring parents in a // crossover call. int crossover_parent_stride; // Number of parents to skip over when moving to // the next set of parents. A stride of 1 would // produce maximum overlap because the set of // parents would only change by one every // crossover. int crossover_children_num; // Number of children to expect the user to // produce in the crossover function. bool enable_mutation; // Cells may be mutated // before fitness evaluation float mutation_chance; // Chance for any given cell to be mutated cells during // the mutation uint64_t rand_seed; bool higher_fitness_is_better; // Sets whether or not to consider higher // fitness values better or worse. Set this to // false if fitness is an error function. // User defined functions T (*make_default_cell)(); void (*mutate)(T &cell_to_modify); void (*crossover)(const Array parents, const Array out_children); float (*fitness)(const T &cell); }; template struct Stats { DynArray best_cells; DynArray best_cell_fitness; int gen; bool done; DynArray gen_time; DynArray crossover_time; DynArray mutate_time; DynArray fitness_time; DynArray sorting_time; Mutex m; }; struct CellTracker { float score; int cellid; }; template struct WorkerThreadArgs { Strategy strat; Array cells; Array trackers; Stats *stats; Mutex m; float *best_global_score; T* best_global_cell; }; template T* _cellp(Array cells, CellTracker tracker) { return &cells[tracker.cellid]; } template DWORD worker(LPVOID args) { // Unpack everything... WorkerThreadArgs* worker_args = static_cast*>(args); Strategy strat = worker_args->strat; Array cells = worker_args->cells; Array trackers = worker_args->trackers; Stats &stats = *worker_args->stats; float* best_global_score = worker_args->best_global_score; T* best_global_cell = worker_args->best_global_cell; Mutex best_m = worker_args->m; // Prepare crossover operations as these will be the same every time except // for the exact cell pointers int npar = strat.crossover_parent_num; int nchild = strat.crossover_children_num; Array parents = make_array(npar); Array children = make_array(nchild); bool gt = strat.higher_fitness_is_better; // Writing strat.higher... is annoying // printf("Core: %d\n", get_affinity()); TimeSpan start, diff, gen_start; while(stats.gen < strat.num_generations) { gen_start = now(); // 0. Share/Integrate global breakthrough if (strat.share_breakthroughs && (stats.gen + get_affinity()) % strat.share_breakthrough_gen_period) { lock(best_m); if (better(gt, front(trackers).score, *best_global_score) != *best_global_score) { // Share *best_global_cell = *_cellp(cells, trackers[0]); *best_global_score = trackers[0].score; } else { // Integrate *_cellp(cells, trackers[0]) = *best_global_cell; trackers[0].score = *best_global_score; } unlock(best_m); } // 1. crossover start = now(); if (strat.enable_crossover) { int parent_end = npar; int child_begin = trackers.len-nchild; while (parent_end <= child_begin) { // Get pointers to all the parent cells for (int i = parent_end-npar; i < parent_end; i++) { T* cell = _cellp(cells, trackers[i]); assert(cell != NULL); parents[i - (parent_end-npar)] = cell; } // Get pointers to all the child cells (these will be overwritten) for (int i = child_begin; i < child_begin+nchild; i++) { T* cell = _cellp(cells, trackers[i]); assert(cell != NULL); children[i-child_begin] = cell; } CrossoverJob cj = {parents, children}; TaggedJob job; job.data.c=cj; job.type=JobType::CROSSOVER; q.jobs[q.write_i++] = job; parent_end += strat.crossover_parent_stride; child_begin -= nchild; } } lock(stats.m); append(stats.crossover_time, now() - start); unlock(stats.m); // 2. mutate start = now(); for (int i = 0; i < trackers.len; i++) { if (abs(norm_rand(strat.rand_seed)) < strat.mutation_chance) { strat.mutate(cells[trackers[i].cellid]); } } lock(stats.m); append(stats.mutate_time, now() - start); unlock(stats.m); // 3. evaluate start = now(); if (strat.test_all) { for (int i = 0; i < trackers.len; i++) { FitnessJob fj = {&cells[trackers[i].cellid], &trackers[i]}; TaggedJob job; job.data.f=fj; job.type=JobType::FITNESS; if (i == trackers.len-1) lock(q.m); q.jobs[q.write_i++] = job; if (i == trackers.len-1) { q.done_writing = true; unlock(q.m); } } } else { lock(q.m); for (int i = 0; i < trackers.len; i++) { if (abs(norm_rand(strat.rand_seed)) < strat.test_chance) { FitnessJob fj = {&cells[trackers[i].cellid], &trackers[i]}; TaggedJob job; job.data.f=fj; job.type=JobType::FITNESS; q.jobs[q.write_i++] = job; } } q.done_writing = true; unlock(q.m); } lock(stats.m); append(stats.fitness_time, now() - start); unlock(stats.m); // 4. sort start = now(); std::sort(&trackers[0], &trackers[trackers.len-1], [strat](CellTracker &a, CellTracker &b){ return better(strat.higher_fitness_is_better, a.score, b.score) == a.score; }); lock(stats.m); append(stats.sorting_time, now() - start); append(stats.best_cells, cells[trackers[0].cellid]); append(stats.best_cell_fitness, trackers[0].score); append(stats.gen_time, now() - gen_start); stats.gen++; unlock(stats.m); } stats.done = true; return 0; } template T run(Strategy strat) { Array> stats = make_array>(strat.num_threads); Array threads = make_array(strat.num_threads); Array> args = make_array>(strat.num_threads); float best_global_score = strat.higher_fitness_is_better ? FLT_MIN : FLT_MAX; T best_global_cell; allow_all_processors(); set_affinity(0); for (int i = 0; i < strat.num_threads; i++) { stats[i] = { .best_cells=make_dynarray(strat.num_generations), .best_cell_fitness=make_dynarray(strat.num_generations), .gen_time=make_dynarray(strat.num_generations), .crossover_time=make_dynarray(strat.num_generations), .mutate_time=make_dynarray(strat.num_generations), .fitness_time=make_dynarray(strat.num_generations), .sorting_time=make_dynarray(strat.num_generations), .m=make_mutex() }; Array cells = make_array(strat.num_threads*strat.num_cells_per_thread); Array trackers = make_array(strat.num_cells_per_thread); for (int i = 0; i < strat.num_cells_per_thread; i++) { cells[i] = strat.make_default_cell(); trackers[i] = {0, i}; } args[i].strat=strat; args[i].cells=cells; args[i].trackers=trackers; args[i].stats=&stats[i]; args[i].best_global_score=&best_global_score; args[i].best_global_cell=&best_global_cell; args[i].m = make_mutex(); threads[i] = make_thread(worker, &args[i], i+1); } // We are the stats thread bool complete = false; while (!complete) { sleep(from_s(strat.stats_print_period_s)); printf("**********************\n"); float g_avg_gen_time = 0; float g_avg_crossover_time = 0; float g_avg_mutate_time = 0; float g_avg_fitness_time = 0; float g_avg_sorting_time = 0; float g_avg_overhead_time = 0; float g_progress_per = 0; float g_best_fitness = strat.higher_fitness_is_better ? FLT_MIN : FLT_MAX; complete = true; for (int i = 0; i < stats.len; i++) { lock(stats[i].m); complete &= stats[i].done; int end = stats[i].gen_time.end-1; float gen_time = to_s(stats[i].gen_time[end]); float crossover_time = to_s(stats[i].crossover_time[end]); float mutate_time = to_s(stats[i].mutate_time[end]); float fitness_time = to_s(stats[i].fitness_time[end]); float sorting_time = to_s(stats[i].sorting_time[end]); float progress_per = static_cast(stats[i].gen) / static_cast(strat.num_generations) * 100; float best_score = back(stats[i].best_cell_fitness); float overhead = max(0, gen_time - (crossover_time + mutate_time + fitness_time + sorting_time)); float overhead_per = overhead / gen_time * 100; g_avg_gen_time += gen_time; g_avg_crossover_time += crossover_time; g_avg_mutate_time += mutate_time; g_avg_fitness_time += fitness_time; g_avg_sorting_time += sorting_time; g_progress_per += progress_per; g_best_fitness = better(strat.higher_fitness_is_better, best_score, g_best_fitness); g_avg_overhead_time += overhead; printf("%d, Progress %d/%d, Top: %.5e, Overhead Per: %.4f%%, Gen: %.4f, Overhead: %.4f, Cross: %.4f (s), Mutate: %.4f (s), Fitness: %.4f (s), Sorting: %.4f (s)\n", i, stats[i].gen, strat.num_generations, best_score, overhead_per, gen_time, overhead, crossover_time, mutate_time, fitness_time, sorting_time); unlock(stats[i].m); } g_avg_gen_time /= stats.len; g_avg_crossover_time /= stats.len; g_avg_mutate_time /= stats.len; g_avg_fitness_time /= stats.len; g_avg_sorting_time /= stats.len; g_progress_per /= stats.len; g_avg_overhead_time /= stats.len; float g_avg_overhead_per = g_avg_overhead_time / g_avg_gen_time * 100; printf("GLOBAL, Progress %.1f%%, Top: %.5e, Overhead Per: %.4f%%, Gen: %.4f, Overhead: %.4f, Cross: %.4f (s), Mutate: %.4f (s), Fitness: %.4f (s), Sorting: %.4f (s)\n", g_progress_per, g_best_fitness, g_avg_overhead_per, g_avg_gen_time, g_avg_overhead_time, g_avg_crossover_time, g_avg_mutate_time, g_avg_fitness_time, g_avg_sorting_time); if (complete) break; } for (int i = 0; i < threads.len; i++) { join(threads[i]); } T best_cell; // TODO: bad float best_score = strat.higher_fitness_is_better ? FLT_MIN : FLT_MAX; for (int i = 0; i < stats.len; i++) { float score = back(stats[i].best_cell_fitness); if (strat.higher_fitness_is_better ? score > best_score : score < best_score) { best_cell = back(stats[i].best_cells); best_score = score; } } return best_cell; } template WorkQueue make_work_queue(int len, int batch_size) { return { .jobs=make_array>(len), .read_i=0, .write_i=0, .batch_size=batch_size, .done_writing=false, .work_complete=false, .m=make_mutex(), .done=make_condition_var(), .jobs_ready=make_condition_var() }; } template bool tryget_job_batch(WorkQueue &q, Array>* out_batch, bool* out_batch_is_end) { lock(q.m); if (q.stop) { unlock(q.m); return false; } // Keep waiting till jobs are available while (q.read_i >= q.write_i) { wait(q.jobs_ready, q.m, infinite_ts); if (q.stop) { unlock(q.m); return false; } } // Yay! Let's grab some jobs to do // If the batch we're about to grab moves read_i to write_i and the producer // is done writing, we should let our callee know it's handling this gen's last // batch know that way it sets work_complete and signals done. *out_batch_is_end = q.done_writing && q.read_i + q.batch_size >= q.write_i; out_batch->data = &q.jobs[q.read_i]; out_batch->len = min(q.batch_size, q.write_i - q.read_i); q.read_i += q.batch_size; unlock(q.m); return true; } template void work_batch(Array> batch, Strategy &s) { for (int i = 0; i < batch.len; i++) { switch (batch[i].type) { case JobType::MUTATE: { MutateJob mj = batch[i].data.m; s.mutate(*mj.cell); } break; case JobType::CROSSOVER: { CrossoverJob cj = batch[i].data.c; s.crossover(cj.parents, cj.children); } break; case JobType::FITNESS: { FitnessJob fj = batch[i].data.f; fj.track->score = s.fitness(*fj.cell); } break; default: { assert(false); } } } } template DWORD worker(LPVOID args) { WorkerThreadArgs* wa = static_cast*>(args); WorkQueue &q = wa->q; Strategy &s = wa->s; // These are written by tryget_job_batch bool batch_is_end; Array> batch; while (tryget_job_batch(q, &batch, &batch_is_end)) { work_batch(batch, s); if (batch_is_end) { lock(q.m); q.work_complete = true; wake_one(q.done); unlock(q.m); } } return NULL; } } // namespace genetic