From bed933055e41f9b42722c85da3b2cf5fb1c1917d Mon Sep 17 00:00:00 2001 From: Seth Hamilton Date: Sun, 7 Sep 2025 16:42:06 -0500 Subject: [PATCH] Rewrite. Currently segfaults --- inc/genetic.h | 166 ++++++++++++++++++++-------- inc/rand.h | 2 + inc/sync.h | 1 + src/genetic.cpp | 279 ------------------------------------------------ src/main.cpp | 10 +- 5 files changed, 131 insertions(+), 327 deletions(-) delete mode 100644 src/genetic.cpp diff --git a/inc/genetic.h b/inc/genetic.h index 61bae51..78021c2 100644 --- a/inc/genetic.h +++ b/inc/genetic.h @@ -1,63 +1,145 @@ +#pragma once + +#include +#include #include +#include "sync.h" +#include "rand.h" + namespace genetic { template struct Array; template struct Stats; template struct Strategy; +struct CellTracker; template Stats run(Strategy); template struct Strategy { - int num_threads; // Number of worker threads that will be evaluating cell - // fitness. - int batch_size; // Number of cells a worker thread tries to work on in a row - // before accessing/locking the work queue again. - int num_cells; // Size of the population pool - int num_generations; // Number of times (epochs) to run the algorithm - bool test_all; // Sets whether or not every cell's fitness is evaluated every - // generation - float test_chance; // Chance to test any given cell's fitness. Relevant only - // if test_all is false. - bool enable_crossover; // Cells that score well in the evaluation stage - // produce children that replace low-scoring cells - bool enable_crossover_mutation; // Mutations can occur after crossover - float crossover_mutation_chance; // Chance to mutate a child cell - int crossover_parent_num; // Number of unique high-scoring parents in a - // crossover call. - int crossover_parent_stride; // Number of parents to skip over when moving to - // the next set of parents. A stride of 1 would - // produce maximum overlap because the set of - // parents would only change by one every - // crossover. - int crossover_children_num; // Number of children to expect the user to - // produce in the crossover function. - bool enable_mutation; // Cells may be mutated - // before fitness evaluation - float mutation_chance; // Chance for any given cell to be mutated cells during - // the mutation - uint64_t rand_seed; - bool higher_fitness_is_better; // Sets whether or not to consider higher - // fitness values better or worse. Set this to - // false if fitness is an error function. + // Number of worker threads that will be evaluating cell fitness + int num_threads; - // User defined functions - T (*make_default_cell)(); - void (*mutate)(T &cell_to_modify); - void (*crossover)(const Array parents, const Array out_children); - float (*fitness)(const T &cell); + int batch_size; // Number of cells a worker thread tries to work on in a row + // before accessing/locking the work queue again. + int num_cells; // Size of the population pool + int num_generations; // Number of times (epochs) to run the algorithm + bool test_all; // Sets whether or not every cell's fitness is evaluated every + // generation + float test_chance; // Chance to test any given cell's fitness. Relevant only + // if test_all is false. + bool enable_crossover; // Cells that score well in the evaluation stage + // produce children that replace low-scoring cells + int crossover_parent_num; // Number of unique high-scoring parents in a + // crossover call. + int crossover_parent_stride; // Number of parents to skip over when moving to + // the next set of parents. A stride of 1 would + // produce maximum overlap because the set of + // parents would only change by one every + // crossover. + int crossover_children_num; // Number of children to expect the user to + // produce in the crossover function. + bool enable_mutation; // Cells may be mutated + // before fitness evaluation + float mutation_chance; // Chance for any given cell to be mutated cells during + // the mutation + uint64_t rand_seed; + bool higher_fitness_is_better; // Sets whether or not to consider higher + // fitness values better or worse. Set this to + // false if fitness is an error function. + + // User defined functions + T (*make_default_cell)(); + void (*mutate)(T &cell_to_modify); + void (*crossover)(const Array parents, const Array out_children); + float (*fitness)(const T &cell); }; -template struct Stats { - std::vector best_cell; - std::vector best_cell_fitness; +template struct Stats { + std::vector best_cell; + std::vector best_cell_fitness; +}; + +struct CellTracker { + float score; + int cellid; }; template struct Array { - T *_data; - int len; + T *data; + int len; - T &operator[](int i); + T &operator[](int i) { return data[i]; } }; +template Array make_array(int len) { + return { + .data = (T*)malloc(sizeof(T)*len), + .len = len + }; +} + +template Stats run(Strategy strat) { + // Create cells + Array cells = make_array(strat.num_cells); + for (int i = 0; i < cells.len; i++) cells[i] = strat.make_default_cell(); + + // Create cell trackers + Array trackers = make_array(strat.num_cells); + for (int i = 0; i < trackers.len; i++) trackers[i] = { .score=0, .cellid=i }; + + // Init stat tracker + Stats stats; + + // Run the algorithm + for (int gen = 0; gen < strat.num_generations; gen++) { + // 1. mutate + for (int i = 0; i < trackers.len; i++) { + if (abs(norm_rand(strat.rand_seed)) < strat.mutation_chance) { + strat.mutate(cells[trackers[i].cellid]); + } + } + // 2. crossover + if (strat.enable_crossover) { + int parent_end = strat.crossover_parent_num; + int child_begin = trackers.len-strat.crossover_children_num; + while (parent_end <= child_begin) { + // Get pointers to all the parent cells + Array parents = make_array(strat.crossover_parent_num); + for (int i = parent_end-strat.crossover_parent_num; i < parent_end; i++) { + parents[i] = &cells[trackers[i].cellid]; + } + + // Get pointers to all the child cells (these will be overwritten) + Array children = make_array(strat.crossover_children_num); + for (int i = child_begin; i < child_begin+strat.crossover_children_num; i++) { + children[i] = &cells[trackers[i].cellid]; + } + strat.crossover(parents, children); + parent_end += strat.crossover_parent_stride; + child_begin -= strat.crossover_children_num; + } + } + // 3. evaluate + if (strat.test_all) { + for (int i = 0; i < trackers.len; i++) { + trackers[i].score = strat.fitness(cells[trackers[i].cellid]); + } + } else { + for (int i = 0; i < trackers.len; i++) { + if (abs(norm_rand(strat.rand_seed)) < strat.test_chance) { + trackers[i].score = strat.fitness(cells[trackers[i].cellid]); + } + } + } + // 4. sort + std::sort(&trackers[0], &trackers[trackers.len-1], [strat](CellTracker &a, CellTracker &b){ return strat.higher_fitness_is_better ? a.score < b.score : a.score > b.score; }); + + printf("Gen: %d, Best Score: %f\n", gen, trackers[0].score); + stats.best_cell.push_back(cells[trackers[0].cellid]); + stats.best_cell_fitness.push_back(trackers[0].score); + } + return stats; +} + } // namespace genetic diff --git a/inc/rand.h b/inc/rand.h index d098a56..f3276f3 100644 --- a/inc/rand.h +++ b/inc/rand.h @@ -1,3 +1,5 @@ +#pragma once + // TODO: This file needs a serious audit #include diff --git a/inc/sync.h b/inc/sync.h index aa4b9b1..b744a67 100644 --- a/inc/sync.h +++ b/inc/sync.h @@ -188,3 +188,4 @@ double to_hours(TimeSpan &sp) { #endif } // namespace sync +// diff --git a/src/genetic.cpp b/src/genetic.cpp deleted file mode 100644 index 63d7870..0000000 --- a/src/genetic.cpp +++ /dev/null @@ -1,279 +0,0 @@ -#include -#include -#include -#include -#include -#include - -#include "sync.h" -#include "genetic.h" -#include "rand.h" - -#define NUM_QUEUE_RETRIES 10 - -using namespace std; - -// std::visit/std::variant overload pattern -// See: -// https://www.modernescpp.com/index.php/visiting-a-std-variant-with-the-overload-pattern/ -// You don't have to understand this, just use it :) -template struct overload : Ts... { - using Ts::operator()...; -}; -template overload(Ts...) -> overload; - -namespace genetic { - -template struct cell_entry { - float score; - T *cell; - bool stale; -}; - -template struct crossover_job { - Array *> &parents; - Array *> &children_out; -}; - -template struct fitness_job { - cell_entry *cell_entry; -}; - -template struct mutate_job { - cell_entry *cell_entry; -}; - -template struct work_queue { - variant, fitness_job, mutate_job> *jobs; - int len; - int read_i; - int write_i; - bool done_writing; - - pthread_mutex_t data_mutex; - pthread_mutex_t gen_complete_mutex; - pthread_mutex_t jobs_available_mutex; - - pthread_cond_t gen_complete_cond; - pthread_cond_t jobs_available_cond; -}; - -template work_queue make_work_queue(int len) { - return {.jobs = (variant, crossover_job> *)malloc( - sizeof(variant, crossover_job>) * len), - .len = len, - .read_i = 0, - .write_i = 0, - .done_writing = false, - .data_mutex = PTHREAD_MUTEX_INITIALIZER, - .gen_complete_mutex = PTHREAD_MUTEX_INITIALIZER, - .jobs_available_mutex = PTHREAD_MUTEX_INITIALIZER, - .gen_complete_cond = PTHREAD_COND_INITIALIZER, - .jobs_available_cond = PTHREAD_COND_INITIALIZER}; -} - -template struct job_batch { - Array, fitness_job>> jobs; - bool gen_complete; -}; - -template -optional> get_job_batch(work_queue &queue, int batch_size, - bool *stop_flag) { - while (true) { - for (int i = 0; i < NUM_QUEUE_RETRIES; i++) { - if (queue.read_i < queue.write_i && - pthread_mutex_trylock(&queue.data_mutex)) { - job_batch res; - res.jobs._data = &queue._jobs[queue.read_i]; - int span_size = min(batch_size, queue.write_i - queue.read_i); - res.jobs.len = span_size; - - queue.read_i += span_size; - res.gen_complete = queue.done_writing && queue.read_i == queue.write_i; - - pthread_mutex_unlock(&queue.data_mutex); - return res; - } - } - pthread_mutex_lock(&queue.jobs_available_mutex); - pthread_cond_wait(queue.jobs_available_cond, &queue.jobs_available_mutex); - if (stop_flag) - return {}; - } -} - -template struct worker_thread_args { - Strategy &strat; - work_queue &queue; - bool *stop_flag; -}; - -template void *worker(void *args) { - worker_thread_args *work_args = (worker_thread_args *)args; - Strategy &strat = work_args->strat; - work_queue &queue = work_args->queue; - bool *stop_flag = work_args->stop_flag; - - auto job_dispatcher = overload{ - [strat](mutate_job mj) { - strat.mutate(*mj.cell_entry->cell); - mj.cell_entry->stale = true; - }, - [strat](fitness_job fj) { - fj.cell_entry->score = strat.fitness(*fj.cell_entry->cell); - fj.cell_entry->stale = false; - }, - [strat](crossover_job cj) { - Array parent_cells, child_cells; - parent_cells = {(T **)malloc(sizeof(T *) * cj.parents.len), - cj.parents.len}; - child_cells = {(T **)malloc(sizeof(T *) * cj.children_out.len), - cj.children_out.len}; - for (int i = 0; i < cj.parents.len; i++) { - parent_cells[i] = cj.parents[i].cell; - } - for (int i = 0; i < cj.children_out.len; i++) { - child_cells[i] = cj.children_out[i].cell; - cj.children_out[i].stale = true; - } - strat.crossover(parent_cells, child_cells); - }, - }; - - while (true) { - auto batch = get_job_batch(queue, strat.batch_size, stop_flag); - if (!batch || *stop_flag) - return NULL; - - // Do the actual work - for (int i = 0; i < batch->jobs.len; i++) { - visit(job_dispatcher, batch->jobs[i]); - } - - if (batch->gen_complete) { - pthread_cond_signal(&queue.gen_complete_cond, &queue.gen_complete_mutex); - } - } -} - -template Stats run(Strategy strat) { - Stats stats; - - // The work queue is what all the worker threads will checking - // for jobs - work_queue queue = make_work_queue(strat.num_cells); - - // The actual cells. Woo! - T cells[strat.num_cells]; - - // Using a vector so I can use the make_heap, push_heap, etc. - vector> cell_queue; - for (int i = 0; i < strat.num_cells; i++) { - cells[i] = strat.make_default_cell(); - cell_queue.push_back({0, &cells[i], true}); - } - - bool stop_flag = false; - worker_thread_args args = { - .strat = strat, .queue = queue, .stop_flag = &stop_flag}; - - // spawn worker threads - pthread_t threads[strat.num_threads]; - for (int i = 0; i < strat.num_threads; i++) { - pthread_create(&threads[i], NULL, worker, (void *)args); - } - - uint64_t rand_state = strat.rand_seed; - - for (int i = 0; i < strat.num_generations; i++) { - // Mutate some random cells in the population - for (int i = 0; i < cell_queue.size(); i++) { - if (abs(norm_rand(rand_state)) < strat.mutation_chance) { - queue.jobs[queue.write_i] = mutate_job{&cell_queue[i]}; - queue.write_i++; - } - } - pthread_cond_broadcast(&queue.jobs_available_cond); - - // Potential issue here where mutations aren't done computing and fitness - // jobs begin. maybe need to gate this. - - // Generate fitness jobs - for (int i = 0; i < cell_queue.size(); i++) { - if (cell_queue[i].stale && - (strat.test_all || abs(norm_rand(rand_state)) < strat.test_chance)) { - queue.jobs[queue.write_i] = fitness_job{&cell_queue[i]}; - queue.write_i++; - } - pthread_cond_broadcast(&queue.jobs_available_cond); - } - queue.done_writing = true; - - // wait for fitness jobs to complete - pthread_mutex_lock(&queue.gen_complete_mutex); - - // Before going to sleep, do a quick check to see if the fitness jobs are - // already complete. - pthread_mutex_lock(&queue.data_mutex); - bool already_complete = queue.read_i != queue.write_i; - pthread_mutex_unlock(&queue.data_mutex); - if (already_complete) { - pthread_mutex_unlock(&queue.gen_complete_mutex); - } else { - pthread_cond_wait(&queue.gen_complete_cond, &queue.gen_complete_mutex); - } - - // Sort cells on performance - std::sort(cell_queue.begin(), cell_queue.end(), - [strat](cell_entry a, cell_entry b) { - return strat.higher_fitness_is_better ? a > b : a < b; - }); - - printf("Top Score: %f\n", cell_queue[0].score); - - if (!strat.enable_crossover) - continue; - - // generate crossover jobs - // dear god. forgive me father - queue.write_i = 0; - queue.read_i = 0; - int count = 0; - int n_par = strat.crossover_parent_num; - int n_child = strat.crossover_children_num; - int child_i = cell_queue.size() - 1; - int par_i = 0; - while (child_i - par_i <= n_par + n_child) { - Array *> parents = { - (cell_entry **)malloc(sizeof(cell_entry *) * n_par), n_par}; - Array *> children = { - (cell_entry **)malloc(sizeof(cell_entry *) * n_child), n_child}; - - for (; par_i < par_i + n_par; par_i++) { - parents[i] = cell_queue[par_i]; - } - - for (; child_i > child_i - n_child; child_i--) { - children[i] = cell_queue[child_i]; - } - - queue.jobs[queue.write_i] = crossover_job{parents, children}; - par_i += strat.crossover_parent_stride; - child_i += strat.crossover_children_stride; - } - } - - // stop worker threads - stop_flag = true; - pthread_cond_broadcast(&queue.jobs_available_cond); - for (int i = 0; i < strat.num_threads; i++) { - pthread_join(threads[i], NULL); - } -} - -template T &Array::operator[](int i) { - return _data[i]; -} - -} // namespace genetic diff --git a/src/main.cpp b/src/main.cpp index 7b07dbc..b33f957 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -35,8 +35,8 @@ void mutate(Array &arr_to_mutate) { void crossover(const Array*> parents, const Array *> out_children) { for (int i = 0; i < len; i++) { - (*out_children._data[0])[i] = i < len/2 ? (*parents._data[0])[i] : (*parents._data[1])[i]; - (*out_children._data[1])[i] = i < len/2 ? (*parents._data[1])[i] : (*parents._data[0])[i]; + (*out_children.data[0])[i] = i < len/2 ? (*parents.data[0])[i] : (*parents.data[1])[i]; + (*out_children.data[1])[i] = i < len/2 ? (*parents.data[1])[i] : (*parents.data[0])[i]; } } @@ -47,8 +47,8 @@ float fitness(const Array &cell) { float sum = 0; float product = 1; for (int i = 0; i < cell.len; i++) { - sum += cell._data[i]; - product *= cell._data[i]; + sum += cell.data[i]; + product *= cell.data[i]; } return abs(sum - target_sum) + abs(product - target_product); } @@ -62,8 +62,6 @@ int main(int argc, char **argv) { .test_all = true, .test_chance = 0.0, // doesn't matter .enable_crossover = true, - .enable_crossover_mutation = true, - .crossover_mutation_chance = 0.6f, .crossover_parent_num = 2, .crossover_parent_stride = 1, .crossover_children_num = 2,