remove .sh in favor of .bat. Do some testing and verify that running w/ 15 threads does indeed use the whole cpu (via btop)

Debugged multithreaded version. Now investigating some performance issues (not every thread is being used). This is an interesting version.
Running. Only tested single thread version. Stats are looking nice. Needs more validation
2025-09-16 15:04:14 -05:00 · 2025-09-10 00:46:50 -05:00 · 2025-09-09 19:57:27 -05:00 · 2025-09-09 09:39:53 -05:00 · 2025-09-07 22:38:04 -05:00 · 2025-09-07 17:08:31 -05:00
14 changed files with 892 additions and 190 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -0,0 +1,6 @@
+*.idb
+*.pdb
+**obj**
+**bin**
+.cache**
+compile_commands.json
--- a/.gitmodules
+++ b/.gitmodules
@@ -1,3 +0,0 @@
-[submodule "ext/pthreads4w-code"]
-	path = ext/pthreads4w-code
-	url = https://git.code.sf.net/p/pthreads4w/code
--- a/activate.bat
+++ b/activate.bat
@@ -0,0 +1 @@
+"C:\Program Files\Microsoft Visual Studio\2022\Community\VC\Auxiliary\Build\vcvars64.bat"
--- a/build.bat
+++ b/build.bat
@@ -0,0 +1,10 @@
+@ECHO off
+SET flags=-Od -ZI -MTd
+SET config=Debug
+IF "%1" == "release" (
+    SET flags=-O2 -MT
+    SET config=Release
+)
+SET srcs=src\*
+mkdir bin obj
+cl %srcs% -I inc %flags% -std:c++20 -MP -Fo:obj\\ -Fe:bin\\
--- a/debug.rad
+++ b/debug.rad
@@ -0,0 +1,31 @@
+// raddbg 0.9.21 project file
+
+recent_file: path: "C:/Program Files (x86)/Windows Kits/10/include/10.0.26100.0/ucrt/stdio.h"
+recent_file: path: "d:/os/obj/amd64fre/minkernel/crts/ucrt/src/appcrt/stdio/xmt/objfre/amd64/minkernel/crts/ucrt/src/appcrt/stdio/output.cpp"
+recent_file: path: "C:/Program Files/Microsoft Visual Studio/2022/Community/VC/Tools/MSVC/14.44.35207/include/xutility"
+recent_file: path: "C:/Users/sethh/Documents/repos/Petri/inc/genetic.h"
+recent_file: path: "C:/Program Files/Microsoft Visual Studio/2022/Community/VC/Tools/MSVC/14.44.35207/include/algorithm"
+recent_file: path: "C:/Users/sethh/Documents/repos/Petri/inc/sync.h"
+recent_file: path: "inc/genetic.h"
+recent_file: path: "inc/sync.h"
+recent_file: path: "d:/os/obj/amd64fre/minkernel/crts/ucrt/src/appcrt/startup/mt/objfre/amd64/minkernel/crts/ucrt/src/appcrt/startup/abort.cpp"
+recent_file: path: "src/main.cpp"
+recent_file: path: "d:/os/obj/amd64fre/minkernel/crts/ucrt/src/appcrt/startup/mt/objfre/amd64/minkernel/crts/ucrt/src/appcrt/startup/assert.cpp"
+recent_file: path: "../../../../../Program Files/Microsoft Visual Studio/2022/Community/VC/Tools/MSVC/14.44.35207/include/vector"
+recent_file: path: "d:/os/obj/amd64fre/minkernel/crts/ucrt/src/appcrt/misc/mt/objfre/amd64/minkernel/crts/ucrt/src/appcrt/misc/invalid_parameter.cpp"
+recent_file: path: "../../../../../Program Files/Microsoft Visual Studio/2022/Community/VC/Tools/MSVC/14.44.35207/include/xmemory"
+recent_file: path: "../../../../../Program Files/Microsoft Visual Studio/2022/Community/VC/Tools/MSVC/14.42.34433/include/algorithm"
+recent_file: path: "../../../../../Program Files/Microsoft Visual Studio/2022/Community/VC/Tools/MSVC/14.42.34433/include/xutility"
+target:
+{
+  executable: "bin/main.exe"
+  working_directory: bin
+  label: main
+  enabled: 1
+  arguments: 1
+}
+breakpoint:
+{
+  source_location: "inc/genetic.h:292:1"
+  hit_count: 0
+}
--- a/export.bat
+++ b/export.bat
@@ -0,0 +1,22 @@
+@ECHO off
+SETLOCAL ENABLEDELAYEDEXPANSION
+
+ECHO [ > compile_commands.json
+
+FOR /r "src\" %%F IN (*.cpp) DO (
+
+SET "file=%%F"
+SET "file=!file:\=/!"
+SET "directory=%~dp0"
+SET "directory=!directory:\=/!"
+
+ECHO { >> compile_commands.json
+ECHO "directory": "!directory!", >> compile_commands.json
+ECHO "command": "cl !file! -I inc %flags% -std:c++20 -MP -Fo:obj\\ ", >> compile_commands.json
+
+ECHO "file": "!file!" >> compile_commands.json
+ECHO }, >> compile_commands.json
+
+)
+
+ECHO ] >> compile_commands.json
--- a/inc/genetic.h
+++ b/inc/genetic.h
@@ -1,30 +1,372 @@
-#include <vector>
+#pragma once 
+
+#include <algorithm>
+#include <cfloat>
+#include <cstdarg>
+#include <cstdio>
+#include <cstdlib>
+
+#include "util.h"
+#include "sync.h"
+#include "rand.h"
+
+using namespace sync;

 namespace genetic {

+template <class T> struct Stats;
+template <class T> struct Strategy;
+struct CellTracker;
+
+
+const char *global_stat_format_str = "GLOBAL, Progress %.1f%%, Top: %.5e, Overhead Per: %.4f%%, Gen: %.4f, Overhead: %.4f, Cross: %.4f (s), Mutate: %.4f (s), Fitness: %.4f (s), Sorting: %.4f (s)\n";
+const char *thread_stat_format_str = "%d, Progress %d/%d, Top: %.5e, Overhead Per: %.4f%%, Gen: %.4f, Overhead: %.4f, Cross: %.4f (s), Mutate: %.4f (s), Fitness: %.4f (s), Sorting: %.4f (s)\n";
+
+static int stat_str_len = 2*max(strlen(thread_stat_format_str), strlen(global_stat_format_str));
+static char *stat_str = (char*)malloc(stat_str_len);
+static char *filename = (char*)malloc(64);
+static int n_threads = 0;
+
+void log(const char *format_str, ...) {
+    va_list list;
+    va_start(list, format_str);
+    vsprintf_s(stat_str, 2*max(strlen(thread_stat_format_str), strlen(global_stat_format_str)), format_str, list);
+
+    printf("%s", stat_str);
+
+    FILE *f;
+    sprintf(filename, "logs/logs-%d.txt", n_threads);
+    fopen_s(&f, filename, "a");
+    fwrite(stat_str, sizeof(char), strlen(stat_str), f);
+    fclose(f);
+}
+
+template <class T> T run(Strategy<T>);
+
 template <class T> struct Strategy {
-  // The recommended number of threads is <= number of cores on your pc.
-  // Set this to -1 use the default value (number of cores - 1)
-  int num_threads;     // Number of worker threads that will be evaluating cell fitness
-  int num_cells;       // Size of the population pool
-  int num_generations; // Number of times (epochs) to run the algorithm
-  bool test_all;       // Sets whether or not every cell is tested every generation
-  float test_chance;   // Chance to test any given cell's fitness. Relevant only if test_all is false.
+    // Number of worker threads that will be evaluating cell fitness
+    int num_threads;

-  // User defined functions
-  T (*make_default_cell)();
-  float (*fitness)(const T &cell);
-  void (*mutate)(const T &cell, T *out);
-  void (*crossover)(const T &a, const T &b, T *out);
+    // Period of print statements (in seconds)
+    float stats_print_period_s;

-  float mutation_chance_per_gen;
+    // Size of the population pool per sim thread
+    int num_cells_per_thread;
+
+    // Number of times (epochs) to run the algorithm
+    int num_generations; 
+
+    // Each thread will integrate the best globally performing cell
+    bool share_breakthroughs;
+
+    // How many generations to explore before resyncing with the global best
+    int share_breakthrough_gen_period;
+
+    bool test_all; // Sets whether or not every cell's fitness is evaluated every
+                   // generation
+    float test_chance; // Chance to test any given cell's fitness. Relevant only
+                       // if test_all is false.
+    bool enable_crossover; // Cells that score well in the evaluation stage
+                           // produce children that replace low-scoring cells
+    int crossover_parent_num;        // Number of unique high-scoring parents in a
+                                     // crossover call.
+    int crossover_parent_stride; // Number of parents to skip over when moving to
+                                 // the next set of parents. A stride of 1 would
+                                 // produce maximum overlap because the set of
+                                 // parents would only change by one every
+                                 // crossover.
+    int crossover_children_num;  // Number of children to expect the user to
+                                 // produce in the crossover function.
+    bool enable_mutation;          // Cells may be mutated
+                                   // before fitness evaluation
+    float mutation_chance; // Chance for any given cell to be mutated cells during
+                           // the mutation
+    uint64_t rand_seed;
+    bool higher_fitness_is_better; // Sets whether or not to consider higher
+                                   // fitness values better or worse. Set this to
+                                   // false if fitness is an error function.
+
+    // User defined functions
+    T (*make_default_cell)();
+    void (*mutate)(T &cell_to_modify);
+    void (*crossover)(const Array<T *> parents, const Array<T *> out_children);
+    float (*fitness)(const T &cell);
 };

-template <class T> struct Stats {
-  std::vector<T> best_cell;
-  std::vector<float> average_fitness;
+template<class T> struct Stats {
+    DynArray<T> best_cells;
+    DynArray<float> best_cell_fitness;
+    int gen;
+    bool done;
+    DynArray<TimeSpan> gen_time;
+    DynArray<TimeSpan> crossover_time;
+    DynArray<TimeSpan> mutate_time;
+    DynArray<TimeSpan> fitness_time;
+    DynArray<TimeSpan> sorting_time;
+    Mutex m;
 };

-template <class T> Stats<T> run(Strategy<T>);
+struct CellTracker {
+    float score;
+    int cellid;
+};
+
+template<class T>
+struct WorkerThreadArgs {
+    Strategy<T> strat;
+    Array<T> cells;
+    Array<CellTracker> trackers;
+    Stats<T> *stats;
+
+    Mutex m;
+    float *best_global_score;
+    T* best_global_cell;
+};
+
+template<class T> T* _cellp(Array<T> cells, CellTracker tracker) { return &cells[tracker.cellid]; }
+
+template <class T> DWORD worker(LPVOID args) {
+    // Unpack everything...
+    WorkerThreadArgs<T>* worker_args = static_cast<WorkerThreadArgs<T>*>(args); 
+    Strategy<T> strat = worker_args->strat;
+    Array<T> cells = worker_args->cells;
+    Array<CellTracker> trackers = worker_args->trackers;
+    Stats<T> &stats = *worker_args->stats;
+    float* best_global_score = worker_args->best_global_score;
+    T* best_global_cell = worker_args->best_global_cell;
+    Mutex best_m = worker_args->m;
+
+    // Prepare crossover operations as these will be the same every time except
+    // for the exact cell pointers
+    int npar = strat.crossover_parent_num;
+    int nchild = strat.crossover_children_num;
+    Array<T*> parents = make_array<T*>(npar);
+    Array<T*> children = make_array<T*>(nchild);
+
+    bool gt = strat.higher_fitness_is_better; // Writing strat.higher... is annoying
+
+    TimeSpan start, diff, gen_start;
+    while(stats.gen < strat.num_generations) {
+	gen_start = now();
+
+	// 0. Share/Integrate global breakthrough
+	if (strat.share_breakthroughs && (stats.gen + get_affinity()) % strat.share_breakthrough_gen_period) {
+	    lock(best_m);
+	    if (better(gt, front(trackers).score, *best_global_score) != *best_global_score) {
+		// Share
+		*best_global_cell = *_cellp(cells, trackers[0]);
+		*best_global_score = trackers[0].score;
+		
+	    } else {
+		// Integrate
+		*_cellp(cells, trackers[0]) = *best_global_cell;
+		trackers[0].score = *best_global_score;
+	    }
+	    unlock(best_m);
+	}
+
+	// 1. crossover
+	start = now();
+	if (strat.enable_crossover) {
+	    int parent_end = npar;
+	    int child_begin = trackers.len-nchild;
+	    while (parent_end <= child_begin) {
+
+		// Get pointers to all the parent cells
+		for (int i = parent_end-npar; i < parent_end; i++) {
+		    T* cell = _cellp(cells, trackers[i]);
+		    assert(cell != NULL);
+		    parents[i - (parent_end-npar)] = cell;
+		}
+
+		// Get pointers to all the child cells (these will be overwritten)
+		for (int i = child_begin; i < child_begin+nchild; i++) {
+		    T* cell = _cellp(cells, trackers[i]);
+		    assert(cell != NULL);
+		    children[i-child_begin] = cell;
+		}
+		strat.crossover(parents, children);
+		parent_end += strat.crossover_parent_stride;
+		child_begin -= nchild;
+	    }
+	}
+	lock(stats.m);
+	append(stats.crossover_time, now() - start);
+	unlock(stats.m);
+
+
+	// 2. mutate
+	start = now();
+	for (int i = 0; i < trackers.len; i++) {
+	    if (abs(norm_rand(strat.rand_seed)) < strat.mutation_chance) {
+		strat.mutate(cells[trackers[i].cellid]);
+	    }
+	}
+	lock(stats.m);
+	append(stats.mutate_time, now() - start);
+	unlock(stats.m);
+
+	// 3. evaluate
+	start = now();
+	if (strat.test_all) {
+	    for (int i = 0; i < trackers.len; i++) {
+		trackers[i].score = strat.fitness(cells[trackers[i].cellid]);
+	    }
+	} else {
+	    for (int i = 0; i < trackers.len; i++) {
+		if (abs(norm_rand(strat.rand_seed)) < strat.test_chance) {
+		    trackers[i].score = strat.fitness(cells[trackers[i].cellid]);
+		}
+	    }
+	}
+	lock(stats.m);
+	append(stats.fitness_time, now() - start);
+	unlock(stats.m);
+
+	auto comp = [strat](CellTracker &a, CellTracker &b){
+	    return strat.higher_fitness_is_better ? (a.score > b.score) : (a.score < b.score);
+	};
+
+	// 4. sort
+	start = now();
+	std::sort(&trackers[0], &trackers[trackers.len-1], comp);
+	lock(stats.m);
+	append(stats.sorting_time, now() - start);
+
+	append(stats.best_cells, cells[trackers[0].cellid]);
+	append(stats.best_cell_fitness, trackers[0].score);
+	append(stats.gen_time, now() - gen_start);
+	stats.gen++;
+	unlock(stats.m);
+    }
+    stats.done = true;
+    return 0;
+}
+
+template <class T> T run(Strategy<T> strat) {
+    Array<Stats<T>> stats = make_array<Stats<T>>(strat.num_threads);
+    Array<Thread> threads = make_array<Thread>(strat.num_threads);
+    Array<WorkerThreadArgs<T>> args = make_array<WorkerThreadArgs<T>>(strat.num_threads);
+
+    float best_global_score = strat.higher_fitness_is_better ? FLT_MIN : FLT_MAX;
+    T best_global_cell;
+
+    allow_all_processors();
+    set_affinity(0);
+
+    for (int i = 0; i < strat.num_threads; i++) {
+	stats[i] = {
+	    .best_cells=make_dynarray<T>(strat.num_generations),
+	    .best_cell_fitness=make_dynarray<float>(strat.num_generations),
+	    .gen_time=make_dynarray<TimeSpan>(strat.num_generations),
+	    .crossover_time=make_dynarray<TimeSpan>(strat.num_generations),
+	    .mutate_time=make_dynarray<TimeSpan>(strat.num_generations),
+	    .fitness_time=make_dynarray<TimeSpan>(strat.num_generations),
+	    .sorting_time=make_dynarray<TimeSpan>(strat.num_generations),
+	    .m=make_mutex()
+	};
+	Array<T> cells = make_array<T>(strat.num_threads*strat.num_cells_per_thread);
+	Array<CellTracker> trackers = make_array<CellTracker>(strat.num_cells_per_thread);
+	for (int i = 0; i < strat.num_cells_per_thread; i++) {
+	    cells[i] = strat.make_default_cell();
+	    trackers[i] = {0, i};
+	}
+
+	args[i].strat=strat;
+	args[i].cells=cells;
+	args[i].trackers=trackers;
+	args[i].stats=&stats[i];
+	args[i].best_global_score=&best_global_score;
+	args[i].best_global_cell=&best_global_cell;
+	args[i].m = make_mutex();
+
+	threads[i] = make_thread(worker<T>, &args[i], i+1);
+    }
+
+
+    // We are the stats thread
+    bool complete = false;
+    while (!complete) {
+	sleep(from_s(strat.stats_print_period_s));
+
+	log("**********************\n");
+	float g_avg_gen_time = 0;
+	float g_avg_crossover_time = 0;
+	float g_avg_mutate_time = 0;
+	float g_avg_fitness_time = 0;
+	float g_avg_sorting_time = 0;
+	float g_avg_overhead_time = 0;
+	float g_progress_per = 0;
+	float g_best_fitness = strat.higher_fitness_is_better ? FLT_MIN : FLT_MAX;
+
+	complete = true;
+
+
+	for (int i = 0; i < stats.len; i++) {
+	    lock(stats[i].m);
+	    complete &= stats[i].done;
+
+	    int end = stats[i].gen_time.end-1;
+
+	    float gen_time = to_s(stats[i].gen_time[end]);
+	    float crossover_time = to_s(stats[i].crossover_time[end]);
+	    float mutate_time = to_s(stats[i].mutate_time[end]);
+	    float fitness_time = to_s(stats[i].fitness_time[end]);
+	    float sorting_time = to_s(stats[i].sorting_time[end]);
+	    float progress_per = static_cast<float>(stats[i].gen) / static_cast<float>(strat.num_generations) * 100;
+	    float best_score = back(stats[i].best_cell_fitness);
+
+	    float overhead = max(0, gen_time - (crossover_time + mutate_time + fitness_time + sorting_time));
+
+	    float overhead_per = overhead / gen_time * 100;
+
+	    g_avg_gen_time += gen_time;
+	    g_avg_crossover_time += crossover_time;
+	    g_avg_mutate_time += mutate_time;
+	    g_avg_fitness_time += fitness_time;
+	    g_avg_sorting_time += sorting_time;
+	    g_progress_per += progress_per;
+	    g_best_fitness = better(strat.higher_fitness_is_better, best_score, g_best_fitness);
+
+	    g_avg_overhead_time += overhead;
+
+	    log(thread_stat_format_str, i, stats[i].gen, strat.num_generations, best_score, overhead_per, gen_time, overhead, crossover_time, mutate_time, fitness_time, sorting_time);
+
+	    unlock(stats[i].m);
+	}
+
+	g_avg_gen_time       /= stats.len;
+	g_avg_crossover_time /= stats.len;
+	g_avg_mutate_time    /= stats.len;
+	g_avg_fitness_time   /= stats.len;
+	g_avg_sorting_time   /= stats.len;
+	g_progress_per       /= stats.len;
+
+	g_avg_overhead_time  /= stats.len;
+
+	float g_avg_overhead_per = g_avg_overhead_time / g_avg_gen_time * 100;
+
+	log(global_stat_format_str, g_progress_per, g_best_fitness, g_avg_overhead_per, g_avg_gen_time, g_avg_overhead_time, g_avg_crossover_time, g_avg_mutate_time, g_avg_fitness_time, g_avg_sorting_time);
+	
+	if (complete) break;
+    }
+
+    for (int i = 0; i < threads.len; i++) {
+	join(threads[i]);
+    }
+
+    T best_cell;
+    // TODO: bad
+    float best_score = strat.higher_fitness_is_better ? FLT_MIN : FLT_MAX;
+    for (int i = 0; i < stats.len; i++) {
+	float score = back(stats[i].best_cell_fitness);
+	if (strat.higher_fitness_is_better ? score > best_score : score < best_score) {
+	    best_cell = back(stats[i].best_cells);
+	    best_score = score;
+	}
+    }
+
+    return best_cell;
+}

 } // namespace genetic
--- a/inc/rand.h
+++ b/inc/rand.h
@@ -0,0 +1,20 @@
+#pragma once 
+
+// TODO: This file needs a serious audit
+
+#include <cstdint>
+
+constexpr uint64_t half_max = UINT64_MAX / 2;
+
+// From https://en.wikipedia.org/wiki/Xorshift
+inline void xorshift64(uint64_t &state) {
+  state ^= state << 13;
+  state ^= state >> 7;
+  state ^= state << 17;
+}
+
+// returns a random value between -1 and 1. modifies seed
+inline float norm_rand(uint64_t &state) {
+  xorshift64(state);
+  return static_cast<float>(state - half_max) / static_cast<float>(half_max);
+}
--- a/inc/sync.h
+++ b/inc/sync.h
@@ -0,0 +1,310 @@
+#pragma once 
+
+#include <cassert>
+#include <cstdint>
+#include <cstdio>
+
+#ifdef _WIN32
+#include <windows.h>
+#endif
+
+namespace sync {
+
+#ifdef _WIN32
+typedef CRITICAL_SECTION Mutex;
+typedef CONDITION_VARIABLE ConditionVar; 
+typedef HANDLE Semaphore;
+typedef HANDLE Thread; 
+typedef LARGE_INTEGER TimeSpan;
+typedef DWORD (WINAPI *ThreadFunc)(_In_ LPVOID lpParameter);
+typedef LPVOID ThreadArg;
+
+const TimeSpan infinite_ts = { .QuadPart = LLONG_MAX };
+
+int get_num_cores() {
+    SYSTEM_INFO sysinfo;
+    GetSystemInfo(&sysinfo);
+    return sysinfo.dwNumberOfProcessors;
+}
+
+const int num_cores = get_num_cores();
+
+LARGE_INTEGER _init_freq() {
+    LARGE_INTEGER freq;
+    QueryPerformanceFrequency(&freq);
+    return freq;
+}
+
+static LARGE_INTEGER freq = _init_freq();
+#endif
+
+Thread make_thread(ThreadFunc t, ThreadArg a);
+Thread make_thread(ThreadFunc t, ThreadArg a, int core_affinity);
+void join(Thread t);
+void sleep(TimeSpan ts);
+void allow_all_processors();
+void set_affinity(Thread &t, int core);
+void set_affinity(int core);
+int get_affinity();
+
+Mutex make_mutex();
+void lock(Mutex &m);
+bool trylock(Mutex &m);
+void unlock(Mutex &m);
+void dispose(Mutex &m);
+
+ConditionVar make_condition_var();
+void wait(ConditionVar &c, Mutex &m, TimeSpan ts);
+void wake_one(ConditionVar &c);
+void wake_all(ConditionVar &c);
+void dispose(ConditionVar &c);
+
+Semaphore make_semaphore(int initial, int max);
+void wait(Semaphore &s);
+void post(Semaphore &s);
+void dispose(Semaphore &s);
+
+TimeSpan from_ms(double milliseconds);
+TimeSpan from_s(double seconds);
+TimeSpan from_min(double minutes);
+TimeSpan from_hours(double hours);
+TimeSpan now();
+TimeSpan operator-(const TimeSpan &a, const TimeSpan &b);
+TimeSpan operator+(const TimeSpan &a, const TimeSpan &b);
+TimeSpan operator*(const TimeSpan &a, const TimeSpan &b);
+TimeSpan operator/(const TimeSpan &a, const TimeSpan &b);
+
+double to_ms(TimeSpan &ts);
+double to_s(TimeSpan &ts);
+double to_min(TimeSpan &ts);
+double to_hours(TimeSpan &ts);
+
+#ifdef _WIN32
+
+uint64_t bitmask (unsigned short n) {
+  if (n == 64) return -((uint64_t)1);
+  return (((uint64_t) 1) << n) - 1;
+}
+
+const int tab64[64] = {
+    63,  0, 58,  1, 59, 47, 53,  2,
+    60, 39, 48, 27, 54, 33, 42,  3,
+    61, 51, 37, 40, 49, 18, 28, 20,
+    55, 30, 34, 11, 43, 14, 22,  4,
+    62, 57, 46, 52, 38, 26, 32, 41,
+    50, 36, 17, 19, 29, 10, 13, 21,
+    56, 45, 25, 31, 35, 16,  9, 12,
+    44, 24, 15,  8, 23,  7,  6,  5};
+
+int log2_64 (uint64_t value)
+{
+    value |= value >> 1;
+    value |= value >> 2;
+    value |= value >> 4;
+    value |= value >> 8;
+    value |= value >> 16;
+    value |= value >> 32;
+    return tab64[((uint64_t)((value - (value >> 1))*0x07EDD5E59A4E28C2)) >> 58];
+}
+
+Thread make_thread(ThreadFunc f, ThreadArg a) {
+    DWORD tid;
+    return CreateThread(NULL, 0, f, a, 0, &tid);
+}
+
+struct DummyThreadArgs {
+    int core_affinity;
+    ThreadFunc f;
+    ThreadArg a;
+};
+
+DWORD _dummy_thread(LPVOID a) {
+    DummyThreadArgs *wrap = static_cast<DummyThreadArgs*>(a);
+    set_affinity(wrap->core_affinity);
+    return wrap->f(wrap->a);
+}
+
+Thread make_thread(ThreadFunc f, ThreadArg a, int core_affinity) {
+    DWORD tid;
+    DummyThreadArgs *args = (DummyThreadArgs*)malloc(sizeof(DummyThreadArgs));
+    *args = {
+	.core_affinity=core_affinity,
+	.f=f,
+	.a=a
+    };
+    return CreateThread(NULL, 0, _dummy_thread, args, 0, &tid);
+}
+
+void join(Thread t) {
+    WaitForSingleObject(t, INFINITE);
+}
+
+void sleep(TimeSpan ts) {
+    Sleep(static_cast<DWORD>(to_ms(ts)));
+}
+
+void allow_all_processors() {
+    Thread t = GetCurrentThread();
+    DWORD affinity = bitmask(num_cores);
+    SetProcessAffinityMask(t, affinity);
+}
+
+void set_affinity(Thread &t, int core) {
+    DWORD mask = 1 << (core % num_cores);
+    DWORD old = SetThreadAffinityMask(t, mask);
+    DWORD confirm = SetThreadAffinityMask(t, mask);
+    assert(old && GetLastError() != ERROR_INVALID_PARAMETER && mask == confirm);
+}
+
+void set_affinity(int core) {
+    Thread cur = GetCurrentThread();
+    set_affinity(cur, core);
+}
+
+int get_affinity() {
+    Thread t = GetCurrentThread();
+    DWORD mask = 1;
+    DWORD affinity = SetThreadAffinityMask(t, (DWORD_PTR)mask);
+    DWORD check = SetThreadAffinityMask(t, (DWORD_PTR)affinity);
+    assert(check == mask);
+    return log2_64(affinity);
+}
+
+Mutex make_mutex() {
+    Mutex m;
+    InitializeCriticalSection(&m);
+    return m;
+}
+
+void lock(Mutex &m) {
+    EnterCriticalSection(&m);
+}
+
+bool trylock(Mutex &m) {
+    return TryEnterCriticalSection(&m);
+}
+
+void unlock(Mutex &m) {
+    LeaveCriticalSection(&m);
+}
+
+void dispose(Mutex &m) {
+    DeleteCriticalSection(&m);
+}
+
+ConditionVar make_condition_var() {
+    ConditionVar c;
+    InitializeConditionVariable(&c);
+    return c;
+}
+
+void wait(ConditionVar &c, Mutex &m, TimeSpan ts) {
+    if (ts.QuadPart == infinite_ts.QuadPart) {
+        SleepConditionVariableCS(&c, &m, INFINITE);
+    } else {
+        SleepConditionVariableCS(&c, &m, static_cast<DWORD>(to_ms(ts)));
+    }
+}
+
+void wake_one(ConditionVar &c) {
+    WakeConditionVariable(&c);
+}
+
+void wake_all(ConditionVar &c) {
+    WakeAllConditionVariable(&c);
+}
+
+void dispose(ConditionVar &c) {
+    return; // Windows doesn't have a delete condition variable func
+}
+
+Semaphore make_semaphore(int initial, int max) {
+    return CreateSemaphoreA(NULL, (long)initial, (long)max, NULL);
+}
+
+void wait(Semaphore &s) {
+    WaitForSingleObject(s, INFINITE);
+}
+
+void post(Semaphore &s) {
+    ReleaseSemaphore(s, 1, NULL);
+}
+
+void dispose(Semaphore &s) {
+    CloseHandle(s);
+}
+
+TimeSpan from_ms(double milliseconds) {
+    TimeSpan ts;
+    ts.QuadPart = static_cast<LONGLONG>(milliseconds/1000.0)*freq.QuadPart;
+    return ts;
+}
+
+TimeSpan from_s(double seconds) {
+    TimeSpan ts;
+    ts.QuadPart = static_cast<LONGLONG>(seconds)*freq.QuadPart;
+    return ts;
+}
+
+TimeSpan from_min(double minutes) {
+    TimeSpan ts;
+    ts.QuadPart = static_cast<LONGLONG>(minutes*60.0)*freq.QuadPart;
+    return ts;
+}
+
+TimeSpan from_hours(double hours) {
+    TimeSpan ts;
+    ts.QuadPart = static_cast<LONGLONG>(hours*60.0*60.0)*freq.QuadPart;
+    return ts;
+}
+
+TimeSpan now() {
+    TimeSpan ts;
+    QueryPerformanceCounter(&ts);
+    return ts;
+}
+
+TimeSpan operator-(const TimeSpan &a, const TimeSpan &b) {
+    TimeSpan ts;
+    ts.QuadPart = a.QuadPart - b.QuadPart;
+    return ts;
+}
+
+TimeSpan operator+(const TimeSpan &a, const TimeSpan &b) {
+    TimeSpan ts;
+    ts.QuadPart = a.QuadPart + b.QuadPart;
+    return ts;
+}
+
+TimeSpan operator*(const TimeSpan &a, const TimeSpan &b) {
+    TimeSpan ts;
+    ts.QuadPart = a.QuadPart * b.QuadPart;
+    return ts;
+}
+
+TimeSpan operator/(const TimeSpan &a, const TimeSpan &b) {
+    TimeSpan ts;
+    ts.QuadPart = a.QuadPart / b.QuadPart;
+    return ts;
+}
+
+double to_ms(TimeSpan &ts) {
+    return static_cast<double>(ts.QuadPart*1000)/static_cast<double>(freq.QuadPart);
+}
+
+double to_s(TimeSpan &ts) {
+    return static_cast<double>(ts.QuadPart)/static_cast<double>(freq.QuadPart);
+}
+
+double to_min(TimeSpan &ts) {
+    return static_cast<double>(ts.QuadPart)/static_cast<double>(freq.QuadPart*60);
+}
+
+double to_hours(TimeSpan &ts) {
+    return static_cast<double>(ts.QuadPart)/static_cast<double>(freq.QuadPart*60*60);
+}
+ 
+#endif
+
+} // namespace sync
+//
--- a/inc/util.h
+++ b/inc/util.h
@@ -0,0 +1,56 @@
+#pragma once 
+
+#include <cstring>
+#define min(A, B) ((A < B) ? (A) : (B))
+#define max(A, B) ((A > B) ? (A) : (B))
+#define better(GT, A, B) (GT ? max((A), (B)) : min((A), (B)))
+
+template <class T> struct Array {
+    T *data;
+    int len;
+
+    T &operator[](int i) { return data[i]; }
+};
+
+
+template <class T> Array<T> make_array(int len) {
+    return {
+	.data=(T*)malloc(sizeof(T)*len),
+	.len=len
+     };
+}
+template <class T> T back(Array<T> &a) { return a.data[a.len-1]; }
+template <class T> T front(Array<T> &a) { return a.data[0]; }
+
+template <class T> struct DynArray {
+    T* _data;
+    int end;
+    int cap;
+
+    T &operator[](int i) { return _data[i]; }
+};
+
+template <class T> DynArray<T> make_dynarray(int cap) {
+    return {
+	._data=(T*)malloc(sizeof(T)*cap),
+	.end=0,
+	.cap=cap
+    };
+}
+
+template <class T> void resize(DynArray<T> &a, int new_cap) {
+    T* old = a._data;
+    a._data = (T*)malloc(sizeof(T)*new_cap);
+    memcpy(a._data, old, min(sizeof(T)*a.end, sizeof(T)*new_cap));
+    a.cap = new_cap;
+    free(old);
+}
+
+template <class T> void append(DynArray<T> &a, T el) {
+    if (a.end == a.cap) resize(a, min(1, a.cap*2));
+    a[a.end++] = el;
+}
+
+template <class T> T back(DynArray<T> &a) { return a._data[a.end-1]; }
+template <class T> T front(DynArray<T> &a) { return a._data[0]; }
+
--- a/13
+++ b/13
@@ -1,13 +0,0 @@
-src_files = $(find src -iname "*.cpp")
-obj_files = $(src_files:%.cpp=%.o)
-
-all: $(obj_files)
-	echo $(obj_files)
-	echo $(src_files)
-	g++ -o main $^
-
-%.o: %.cpp
-	g++ -o $@ $<
-
-clean:
-	rm -f *.o *.exe
--- a/src/genetic.cpp
+++ b/src/genetic.cpp
@@ -1,42 +0,0 @@
-#include "genetic.h"
-#include <queue>
-#include <vector>
-#include <pthread.h>
-
-namespace genetic {
-
-template <class T> struct CellEntry {
-  float score;
-  T cell;
-  bool stale;
-};
-
-template <class T> struct WorkEntry {
-  const std::vector<CellEntry<T>> &cur;
-  std::vector<CellEntry<T>> &next;
-  int cur_i;
-};
-
-// Definitions
-template <class T> Stats<T> run(Strategy<T> strat) {
-  Stats<T> stats;
-
-  std::queue<WorkEntry<T>> fitness_queue;
-  std::vector<CellEntry<T>> cells_a, cells_b;
-  for (int i = 0; i < strat.num_cells; i++) {
-    T cell = strat.make_default_cell();
-    cells_a.push_back({0, cell, true});
-    cells_b.push_back({0, cell, true});
-  }
-
-  std::vector<CellEntry<T>> &cur_cells = cells_a;
-  std::vector<CellEntry<T>> &next_cells = cells_b;
-
-  for (int i = 0; i < strat.num_generations; i++) {
-
-    cur_cells = cur_cells == cells_a ? cells_b : cells_a;
-    next_cells = cur_cells == cells_a ? cells_b : cells_a;
-  }
-}
-
-} // namespace genetic
--- a/src/main.cpp
+++ b/src/main.cpp
@@ -1,133 +1,98 @@
-#include <algorithm>
 #include <cassert>
+#include <cstdint>
 #include <cstdlib>
-#include <iostream>
-#include <vector>
+#include "genetic.h"
+#include "rand.h"
+#include "sync.h"

-#define MUTATION_CHANCE 1.0
+using namespace genetic;

-float norm_rand() { return (float)rand() / RAND_MAX; }
+const int len = 12;
+const float max_float = 999.9f;
+static uint64_t seed = 12;
+static float num_mutate_chance = 0.5;
+static int num_parents = 2;
+static int num_children = 2;

-enum class ConstraintType {
-  PRODUCT = 0,
-  SUM = 1,
-  INDEX_EQ = 2,
-};

-struct Constraint {
-  ConstraintType type;
-  int optional_i;
-  float value;
-};
-static std::vector<Constraint> constraints;
+static int target_sum = 20000;
+static int target_product = 10*target_sum;

-struct Cell {
-  int n;
-  float *params;
-};
-
-Cell make_cell(int num_params) {
-  Cell res = {num_params, (float *)malloc(num_params * sizeof(float))};
-  for (int i = 0; i < num_params; i++) {
-    res.params[i] = 0;
-  }
-  return res;
+Array<float> make_new_arr() {
+    Array<float> arr = make_array<float>(len);
+    for (int i = 0; i < arr.len; i++) {
+        arr[i] = norm_rand(seed) * max_float;
+    }
+    return arr;
 }

-float get_cell_err(const Cell &a) {
-  float total_diff = 0;
-  for (auto c : constraints) {
-    switch (c.type) {
-    case ConstraintType::SUM: {
-      float sum = 0;
-      for (int i = 0; i < a.n; i++) {
-        sum += a.params[i];
-      }
-      total_diff += abs(c.value - sum);
-      break;
+void mutate(Array<float> &arr_to_mutate) {
+    for (int i = 0; i < len; i++) {
+        if (norm_rand(seed) < num_mutate_chance) {
+            arr_to_mutate[i] = norm_rand(seed) * max_float;
+        }
    }
-    case ConstraintType::PRODUCT: {
-      float prod = 1;
-      for (int i = 0; i < a.n; i++) {
-        prod *= a.params[i];
-      }
-      total_diff += abs(c.value - prod);
-      break;
-    }
-    case ConstraintType::INDEX_EQ: {
-      assert(c.optional_i < a.n);
-      total_diff += abs(c.value - a.params[c.optional_i]);
-      break;
-    }
-    }
-  }
-  return total_diff;
 }

-bool operator<(const Cell &a, const Cell &b) {
-  assert(a.n == b.n);
-  return get_cell_err(a) < get_cell_err(b);
+void crossover(const Array<Array<float>*> parents, const Array<Array<float> *> out_children) {
+    for (int i = 0; i < len; i++) {
+        (*out_children.data[0])[i] = i < len/2 ? (*parents.data[0])[i] : (*parents.data[1])[i];
+        (*out_children.data[1])[i] = i < len/2 ? (*parents.data[1])[i] : (*parents.data[0])[i];
+    }
 }

-void combine_cells(const Cell &a, const Cell &b, Cell *child) {
-  bool a_first = norm_rand() > 0.5f;
-  for (int i = 0; i < a.n; i++) {
-    float offset = norm_rand() * 10;
-    float roll = norm_rand();
-    if (a_first) {
-      child->params[i] = (i < a.n / 2 ? a.params[i] : b.params[i]) +
-                         (roll > 0.5 ? offset : -offset);
-    } else {
-      child->params[i] = (i < a.n / 2 ? b.params[i] : a.params[i]) +
-                         (roll > 0.5 ? offset : -offset);
+float fitness(const Array<float> &cell) {
+    float sum = 0;
+    float product = 1;
+    for (int i = 0; i < cell.len; i++) {
+        sum += cell.data[i];
+        product *= cell.data[i];
    }
-  }
-  float r = norm_rand();
-  child->params[(int)r * (a.n - 1)] = r * 100.0;
+    return abs(sum - target_sum)*abs(sum - target_sum) + abs(product - target_product);
 }

 int main(int argc, char **argv) {
-  int num_params, num_cells, num_generations, num_constraints = 0;
-  std::cin >> num_params >> num_cells >> num_generations >> num_constraints;
+    int num_gens = 10000;
+    Strategy<Array<float>> strat {
+        .num_threads = atoi(argv[1]),
+	.stats_print_period_s = 2,
+        .num_cells_per_thread = 100000,
+        .num_generations = num_gens,
+	.share_breakthroughs=true,
+	.share_breakthrough_gen_period=10,
+        .test_all = true,
+        .test_chance = 0.0, // doesn't matter
+        .enable_crossover = true,
+        .crossover_parent_num = 2,
+        .crossover_parent_stride = 1,
+        .crossover_children_num = 2,
+        .enable_mutation = true,
+        .mutation_chance = 0.7,
+        .rand_seed = seed,
+        .higher_fitness_is_better = false,
+        .make_default_cell=make_new_arr,
+        .mutate=mutate,
+        .crossover=crossover,
+        .fitness=fitness
+    };
+    n_threads = atoi(argv[1]);

-  std::cout << num_params << " " << num_cells << " " << num_generations << " "
-            << num_constraints << std::endl;
+    log("Running w/ %d threads\n", atoi(argv[1]));
+    TimeSpan start = now();
+    auto best_cell = run(strat);
+    TimeSpan runtime = now() - start;

-  for (int i = 0; i < num_constraints; i++) {
-    int type_in, optional_i = 0;
-    float value;
-    std::cin >> type_in >> value;
-    ConstraintType type = static_cast<ConstraintType>(type_in);
-    if (type == ConstraintType::INDEX_EQ) {
-      std::cin >> optional_i;
+    float sum = 0;
+    float product = 1;
+    log("Winning cell: ");
+    for (int i = 0; i < best_cell.len; i++) {
+	float val = best_cell[i];
+	sum += val;
+	product *= val;
+	log("%f ", val);
    }
-    constraints.push_back({type, optional_i, value});
-  }
-
-  std::vector<Cell> cells;
-  for (int i = 0; i < num_cells; i++) {
-    cells.push_back(make_cell(num_params));
-  }
-
-  for (int i = 0; i < num_generations; i++) {
-    std::sort(cells.begin(), cells.end());
-    for (int j = 0; j < num_cells / 2; j++) {
-      combine_cells(cells[j], cells[j + 1], &cells[num_cells / 2 + j]);
-    }
-    if (i % 1000 == 0) {
-      std::cout << i << "\t" << get_cell_err(cells[0]) << std::endl;
-    }
-  }
-  std::cout << "Final Answer: ";
-  float sum = 0;
-  float product = 1;
-  for (int i = 0; i < cells[0].n; i++) {
-    std::cout << cells[0].params[i] << " ";
-    sum += cells[0].params[i];
-    product *= cells[0].params[i];
-  }
-  std::cout << std::endl;
-
-  std::cout << "Sum: " << sum << std::endl;
-  std::cout << "Product: " << product << std::endl;
+    log("\n");
+    log("Final Sum: %f\n", sum);
+    log("Final Product: %f\n", product);
+    log("Execution Time %d (min) %f (s)\n", static_cast<int>(sync::to_min(runtime)), fmod(to_s(runtime), 60) );
 }
--- a/test.txt
+++ b/test.txt
@@ -1,3 +0,0 @@
-10 2000 150000 2
-1 200
-0 10000
Author	SHA1	Message	Date
Seth Hamilton	6365dffda9	remove .sh in favor of .bat. Do some testing and verify that running w/ 15 threads does indeed use the whole cpu (via btop)	2025-09-16 15:04:14 -05:00
Seth Hamilton	f7e804607f	Debugged multithreaded version. Now investigating some performance issues (not every thread is being used). This is an interesting version.	2025-09-10 00:46:50 -05:00
Seth Hamilton	5a048bf469	Running. Only tested single thread version. Stats are looking nice. Needs more validation	2025-09-09 19:57:27 -05:00
Seth Hamilton	1b8801519e	draft complete. debugging	2025-09-09 09:39:53 -05:00
Seth Hamilton	bd9820dd68	Working single-threaded version	2025-09-07 22:38:04 -05:00
Seth Hamilton	bfde57caac	Compile with debug info. Start debugging	2025-09-07 17:08:31 -05:00
Seth Hamilton	bed933055e	Rewrite. Currently segfaults	2025-09-07 16:42:06 -05:00
Seth Hamilton	905ca1e43a	remove pthread.h ref in genetic.cpp. copy over actual sync changes that were missing from live plotter	2025-09-07 14:56:33 -05:00
Seth Hamilton	17e6ac5f83	fix windows vs unix path insanity	2025-09-07 14:50:11 -05:00
Seth Hamilton	0e210b4fbb	copy over sync mods in live plotter. might make common code an ext reference in the future	2025-09-07 14:08:07 -05:00
Seth Hamilton	ab639e635a	working compdb export	2025-09-07 14:05:49 -05:00
Seth Hamilton	c28caef038	work on compile commands bash file	2025-09-07 13:24:11 -05:00
Seth Hamilton	d2c7059d3d	Remove pthead windows submodule. remove makefile. add in simple .bat and .sh to build project	2025-09-06 23:22:45 -05:00
Seth Hamilton	7be8d8bb75	timer work in sync header	2025-09-06 22:55:20 -05:00
Seth Hamilton	ff250af7e8	Beginning header wrapper of windows synchronization primitives	2025-08-31 18:56:53 -05:00
Seth Hamilton	3a901a0a40	working through compile bugs	2025-08-21 00:41:51 -05:00
Seth Hamilton	3265f045d1	add in xorshift rand function. begin reworking job queue to use cell entry pointers instead of just cells so results can be posted directly to the relevant entry	2025-08-16 00:41:28 -05:00
Seth Hamilton	edda3761d1	more progress on drafting the worker thread model, get job batch func, etc...	2025-08-15 16:09:33 -05:00
Seth Hamilton	65c7ea743b	additions to the api. kinda drafting out what I think might be needed to the user to specify crossover and mutation behavior	2025-08-11 00:55:17 -05:00
Seth Hamilton	db2272b768	some english notes on the purpose of batch sizes and the beginning of a worker thread implementation	2025-08-10 01:15:35 -05:00
Seth Hamilton	b4d4683f8d	makefile seems to be in a decent state. auto-generates compile_commands.json	2025-08-09 20:38:03 -05:00
Seth Hamilton	6157a80584	ignore build files	2025-08-09 17:39:57 -05:00
Seth Hamilton	05cc2c3f4f	apparently need to commit the embedded repo for the gitmodules thing to actually work. weird	2025-08-09 17:35:12 -05:00
				`@@ -0,0 +1 @@`
				`"C:\Program Files\Microsoft Visual Studio\2022\Community\VC\Auxiliary\Build\vcvars64.bat"`