additions to the api. kinda drafting out what I think might be needed to the user to specify crossover and mutation behavior

2025-08-11 00:55:17 -05:00
parent db2272b768
commit 65c7ea743b
3 changed files with 24 additions and 12 deletions
--- a/inc/genetic.h
+++ b/inc/genetic.h
@@ -1,4 +1,5 @@
 #include <vector>
+#include <span>

 namespace genetic {

@@ -8,20 +9,27 @@ template <class T> struct Strategy {
  int num_retries; // Number of times worker threads will try to grab work pool
                   // lock before sleeping
  int batch_size;  // Number of cells a worker thread tries to evaluate in a row
-                   // before locking the pool again. 1 tends to be fine
+                   // before locking the pool again.
  int num_cells;   // Size of the population pool
  int num_generations; // Number of times (epochs) to run the algorithm
  bool test_all; // Sets whether or not every cell is tested every generation
  float test_chance; // Chance to test any given cell's fitness. Relevant only
                     // if test_all is false.
+  bool enable_crossover; // Cells that score well in the evaluation stage
+                         // produce children that replace low-scoring cells
+  bool enable_crossover_mutation;  // Mutations can occur after crossover
+  float crossover_mutation_chance; // Chance to mutate a child cell
+  int crossover_parent_num;        // Number of unique high-scoring parents in a
+                                   // crossover call.
+  int crossover_children_num;      // Number of children produced in a crossover
+  bool enable_mutation;  // Cells may be mutated before fitness evaluation
+  float mutation_chance; // Chance to mutate cells before fitness evaluation

  // User defined functions
  T (*make_default_cell)();
+  void (*mutate)(T &cell);
+  void (*crossover)(const std::span<T> &parents, std::span<T> &out_children);
  float (*fitness)(const T &cell);
-  void (*mutate)(const T &cell, T *out);
-  void (*crossover)(const T &a, const T &b, T *out);
-
-  float mutation_chance_per_gen;
 };

 template <class T> struct Stats {
--- a/2
+++ b/2
@@ -3,7 +3,7 @@ obj_files = $(src_files:src/%.cpp=obj/%.o)


 ifeq ($(OS),Windows_NT)
-    CCFLAGS += -D WIN32 -Iext/PTHREADS-BUILT/include
+    CCFLAGS += -D WIN32 -Iext/PTHREADS-BUILT/include -std=c++20
    PTHREADLIB = ext/PTHREADS-BUILT/lib/pthreadVCE3.lib
    ifeq ($(PROCESSOR_ARCHITEW6432),AMD64)
        CCFLAGS += -D AMD64
--- a/src/genetic.cpp
+++ b/src/genetic.cpp
@@ -1,6 +1,5 @@
 #include "genetic.h"
 #include "pthread.h"
-#include <algorithm>
 #include <queue>
 #include <vector>

@@ -17,6 +16,11 @@ template <class T> struct WorkEntry {
  float &score;
 };

+template <class T> struct WorkQueue {
+  std::vector<WorkEntry<T>> jobs;
+  int i;
+};
+
 static pthread_mutex_t data_mutex = PTHREAD_MUTEX_INITIALIZER;

 static pthread_mutex_t ready_mutex = PTHREAD_MUTEX_INITIALIZER;
@@ -28,7 +32,7 @@ static pthread_cond_t gen_complete_cond = PTHREAD_COND_INITIALIZER;
 static pthread_mutex_t run_complete_mutex = PTHREAD_MUTEX_INITIALIZER;
 static pthread_cond_t run_complete_cond = PTHREAD_COND_INITIALIZER;

-/*  Thoughts on this approach
+/* Thoughts on this approach
 * The ideal implementation of a worker thread has them operating at maximum
 * load with as little synchronization overhead as possible. i.e. The ideal
 * worker thread
@@ -61,13 +65,13 @@ static pthread_cond_t run_complete_cond = PTHREAD_COND_INITIALIZER;
 *
 * I take a hybrid approach. Users can specify a "batch size". Worker threads
 * will bite off jobs in chunks and complete them before locking
- * the job pool again. The user to choose a batch size close to 1 if
- * their fitness function compute time is highly variable, and closer to
+ * the job pool to grab another chunk. The user should choose a batch size close
+ * to 1 if their fitness function compute time is highly variable and closer to
 * num_cells / num_threads if computation time is consistent. Users should
 * experiment with a batch size that works well for their problem.
 *
- * Worth mentioning this optimization work is irrelevant once computation time
- * >>> synchronization time.
+ * Worth mentioning this avoiding synchronization is irrelevant once computation
+ * time >>> synchronization time.
 *
 * There might be room for dynamic batch size modification, but I don't expect
 * to pursue this feature until the library is more mature (and I've run out of