Compare commits

...

13 Commits

Author SHA1 Message Date
6365dffda9 remove .sh in favor of .bat. Do some testing and verify that running w/ 15 threads does indeed use the whole cpu (via btop) 2025-09-16 15:04:14 -05:00
f7e804607f Debugged multithreaded version. Now investigating some performance issues (not every thread is being used). This is an interesting version. 2025-09-10 00:46:50 -05:00
5a048bf469 Running. Only tested single thread version. Stats are looking nice. Needs more validation 2025-09-09 19:57:27 -05:00
1b8801519e draft complete. debugging 2025-09-09 09:39:53 -05:00
bd9820dd68 Working single-threaded version 2025-09-07 22:38:04 -05:00
bfde57caac Compile with debug info. Start debugging 2025-09-07 17:08:31 -05:00
bed933055e Rewrite. Currently segfaults 2025-09-07 16:42:06 -05:00
905ca1e43a remove pthread.h ref in genetic.cpp. copy over actual sync changes that were missing from live plotter 2025-09-07 14:56:33 -05:00
17e6ac5f83 fix windows vs unix path insanity 2025-09-07 14:50:11 -05:00
0e210b4fbb copy over sync mods in live plotter. might make common code an ext reference in the future 2025-09-07 14:08:07 -05:00
ab639e635a working compdb export 2025-09-07 14:05:49 -05:00
c28caef038 work on compile commands bash file 2025-09-07 13:24:11 -05:00
d2c7059d3d Remove pthead windows submodule. remove makefile. add in simple .bat and .sh to build project 2025-09-06 23:22:45 -05:00
15 changed files with 703 additions and 445 deletions

4
.gitignore vendored
View File

@@ -1,4 +1,6 @@
**PTHREADS-BUILT**
*.idb
*.pdb
**obj**
**bin**
.cache**
compile_commands.json

3
.gitmodules vendored
View File

@@ -1,3 +0,0 @@
[submodule "ext/pthreads4w-code"]
path = ext/pthreads4w-code
url = https://git.code.sf.net/p/pthreads4w/code

1
activate.bat Normal file
View File

@@ -0,0 +1 @@
"C:\Program Files\Microsoft Visual Studio\2022\Community\VC\Auxiliary\Build\vcvars64.bat"

10
build.bat Normal file
View File

@@ -0,0 +1,10 @@
@ECHO off
SET flags=-Od -ZI -MTd
SET config=Debug
IF "%1" == "release" (
SET flags=-O2 -MT
SET config=Release
)
SET srcs=src\*
mkdir bin obj
cl %srcs% -I inc %flags% -std:c++20 -MP -Fo:obj\\ -Fe:bin\\

31
debug.rad Normal file
View File

@@ -0,0 +1,31 @@
// raddbg 0.9.21 project file
recent_file: path: "C:/Program Files (x86)/Windows Kits/10/include/10.0.26100.0/ucrt/stdio.h"
recent_file: path: "d:/os/obj/amd64fre/minkernel/crts/ucrt/src/appcrt/stdio/xmt/objfre/amd64/minkernel/crts/ucrt/src/appcrt/stdio/output.cpp"
recent_file: path: "C:/Program Files/Microsoft Visual Studio/2022/Community/VC/Tools/MSVC/14.44.35207/include/xutility"
recent_file: path: "C:/Users/sethh/Documents/repos/Petri/inc/genetic.h"
recent_file: path: "C:/Program Files/Microsoft Visual Studio/2022/Community/VC/Tools/MSVC/14.44.35207/include/algorithm"
recent_file: path: "C:/Users/sethh/Documents/repos/Petri/inc/sync.h"
recent_file: path: "inc/genetic.h"
recent_file: path: "inc/sync.h"
recent_file: path: "d:/os/obj/amd64fre/minkernel/crts/ucrt/src/appcrt/startup/mt/objfre/amd64/minkernel/crts/ucrt/src/appcrt/startup/abort.cpp"
recent_file: path: "src/main.cpp"
recent_file: path: "d:/os/obj/amd64fre/minkernel/crts/ucrt/src/appcrt/startup/mt/objfre/amd64/minkernel/crts/ucrt/src/appcrt/startup/assert.cpp"
recent_file: path: "../../../../../Program Files/Microsoft Visual Studio/2022/Community/VC/Tools/MSVC/14.44.35207/include/vector"
recent_file: path: "d:/os/obj/amd64fre/minkernel/crts/ucrt/src/appcrt/misc/mt/objfre/amd64/minkernel/crts/ucrt/src/appcrt/misc/invalid_parameter.cpp"
recent_file: path: "../../../../../Program Files/Microsoft Visual Studio/2022/Community/VC/Tools/MSVC/14.44.35207/include/xmemory"
recent_file: path: "../../../../../Program Files/Microsoft Visual Studio/2022/Community/VC/Tools/MSVC/14.42.34433/include/algorithm"
recent_file: path: "../../../../../Program Files/Microsoft Visual Studio/2022/Community/VC/Tools/MSVC/14.42.34433/include/xutility"
target:
{
executable: "bin/main.exe"
working_directory: bin
label: main
enabled: 1
arguments: 1
}
breakpoint:
{
source_location: "inc/genetic.h:292:1"
hit_count: 0
}

22
export.bat Normal file
View File

@@ -0,0 +1,22 @@
@ECHO off
SETLOCAL ENABLEDELAYEDEXPANSION
ECHO [ > compile_commands.json
FOR /r "src\" %%F IN (*.cpp) DO (
SET "file=%%F"
SET "file=!file:\=/!"
SET "directory=%~dp0"
SET "directory=!directory:\=/!"
ECHO { >> compile_commands.json
ECHO "directory": "!directory!", >> compile_commands.json
ECHO "command": "cl !file! -I inc %flags% -std:c++20 -MP -Fo:obj\\ ", >> compile_commands.json
ECHO "file": "!file!" >> compile_commands.json
ECHO }, >> compile_commands.json
)
ECHO ] >> compile_commands.json

View File

@@ -1,28 +1,73 @@
#include <vector>
#pragma once
#include <algorithm>
#include <cfloat>
#include <cstdarg>
#include <cstdio>
#include <cstdlib>
#include "util.h"
#include "sync.h"
#include "rand.h"
using namespace sync;
namespace genetic {
template <class T> struct Array;
template <class T> struct Stats;
template <class T> struct Strategy;
struct CellTracker;
template <class T> Stats<T> run(Strategy<T>);
const char *global_stat_format_str = "GLOBAL, Progress %.1f%%, Top: %.5e, Overhead Per: %.4f%%, Gen: %.4f, Overhead: %.4f, Cross: %.4f (s), Mutate: %.4f (s), Fitness: %.4f (s), Sorting: %.4f (s)\n";
const char *thread_stat_format_str = "%d, Progress %d/%d, Top: %.5e, Overhead Per: %.4f%%, Gen: %.4f, Overhead: %.4f, Cross: %.4f (s), Mutate: %.4f (s), Fitness: %.4f (s), Sorting: %.4f (s)\n";
static int stat_str_len = 2*max(strlen(thread_stat_format_str), strlen(global_stat_format_str));
static char *stat_str = (char*)malloc(stat_str_len);
static char *filename = (char*)malloc(64);
static int n_threads = 0;
void log(const char *format_str, ...) {
va_list list;
va_start(list, format_str);
vsprintf_s(stat_str, 2*max(strlen(thread_stat_format_str), strlen(global_stat_format_str)), format_str, list);
printf("%s", stat_str);
FILE *f;
sprintf(filename, "logs/logs-%d.txt", n_threads);
fopen_s(&f, filename, "a");
fwrite(stat_str, sizeof(char), strlen(stat_str), f);
fclose(f);
}
template <class T> T run(Strategy<T>);
template <class T> struct Strategy {
int num_threads; // Number of worker threads that will be evaluating cell
// fitness.
int batch_size; // Number of cells a worker thread tries to work on in a row
// before accessing/locking the work queue again.
int num_cells; // Size of the population pool
int num_generations; // Number of times (epochs) to run the algorithm
// Number of worker threads that will be evaluating cell fitness
int num_threads;
// Period of print statements (in seconds)
float stats_print_period_s;
// Size of the population pool per sim thread
int num_cells_per_thread;
// Number of times (epochs) to run the algorithm
int num_generations;
// Each thread will integrate the best globally performing cell
bool share_breakthroughs;
// How many generations to explore before resyncing with the global best
int share_breakthrough_gen_period;
bool test_all; // Sets whether or not every cell's fitness is evaluated every
// generation
float test_chance; // Chance to test any given cell's fitness. Relevant only
// if test_all is false.
bool enable_crossover; // Cells that score well in the evaluation stage
// produce children that replace low-scoring cells
bool enable_crossover_mutation; // Mutations can occur after crossover
float crossover_mutation_chance; // Chance to mutate a child cell
int crossover_parent_num; // Number of unique high-scoring parents in a
// crossover call.
int crossover_parent_stride; // Number of parents to skip over when moving to
@@ -48,16 +93,280 @@ template <class T> struct Strategy {
float (*fitness)(const T &cell);
};
template <class T> struct Stats {
std::vector<T> best_cell;
std::vector<float> best_cell_fitness;
template<class T> struct Stats {
DynArray<T> best_cells;
DynArray<float> best_cell_fitness;
int gen;
bool done;
DynArray<TimeSpan> gen_time;
DynArray<TimeSpan> crossover_time;
DynArray<TimeSpan> mutate_time;
DynArray<TimeSpan> fitness_time;
DynArray<TimeSpan> sorting_time;
Mutex m;
};
template <class T> struct Array {
T *_data;
int len;
T &operator[](int i);
struct CellTracker {
float score;
int cellid;
};
template<class T>
struct WorkerThreadArgs {
Strategy<T> strat;
Array<T> cells;
Array<CellTracker> trackers;
Stats<T> *stats;
Mutex m;
float *best_global_score;
T* best_global_cell;
};
template<class T> T* _cellp(Array<T> cells, CellTracker tracker) { return &cells[tracker.cellid]; }
template <class T> DWORD worker(LPVOID args) {
// Unpack everything...
WorkerThreadArgs<T>* worker_args = static_cast<WorkerThreadArgs<T>*>(args);
Strategy<T> strat = worker_args->strat;
Array<T> cells = worker_args->cells;
Array<CellTracker> trackers = worker_args->trackers;
Stats<T> &stats = *worker_args->stats;
float* best_global_score = worker_args->best_global_score;
T* best_global_cell = worker_args->best_global_cell;
Mutex best_m = worker_args->m;
// Prepare crossover operations as these will be the same every time except
// for the exact cell pointers
int npar = strat.crossover_parent_num;
int nchild = strat.crossover_children_num;
Array<T*> parents = make_array<T*>(npar);
Array<T*> children = make_array<T*>(nchild);
bool gt = strat.higher_fitness_is_better; // Writing strat.higher... is annoying
TimeSpan start, diff, gen_start;
while(stats.gen < strat.num_generations) {
gen_start = now();
// 0. Share/Integrate global breakthrough
if (strat.share_breakthroughs && (stats.gen + get_affinity()) % strat.share_breakthrough_gen_period) {
lock(best_m);
if (better(gt, front(trackers).score, *best_global_score) != *best_global_score) {
// Share
*best_global_cell = *_cellp(cells, trackers[0]);
*best_global_score = trackers[0].score;
} else {
// Integrate
*_cellp(cells, trackers[0]) = *best_global_cell;
trackers[0].score = *best_global_score;
}
unlock(best_m);
}
// 1. crossover
start = now();
if (strat.enable_crossover) {
int parent_end = npar;
int child_begin = trackers.len-nchild;
while (parent_end <= child_begin) {
// Get pointers to all the parent cells
for (int i = parent_end-npar; i < parent_end; i++) {
T* cell = _cellp(cells, trackers[i]);
assert(cell != NULL);
parents[i - (parent_end-npar)] = cell;
}
// Get pointers to all the child cells (these will be overwritten)
for (int i = child_begin; i < child_begin+nchild; i++) {
T* cell = _cellp(cells, trackers[i]);
assert(cell != NULL);
children[i-child_begin] = cell;
}
strat.crossover(parents, children);
parent_end += strat.crossover_parent_stride;
child_begin -= nchild;
}
}
lock(stats.m);
append(stats.crossover_time, now() - start);
unlock(stats.m);
// 2. mutate
start = now();
for (int i = 0; i < trackers.len; i++) {
if (abs(norm_rand(strat.rand_seed)) < strat.mutation_chance) {
strat.mutate(cells[trackers[i].cellid]);
}
}
lock(stats.m);
append(stats.mutate_time, now() - start);
unlock(stats.m);
// 3. evaluate
start = now();
if (strat.test_all) {
for (int i = 0; i < trackers.len; i++) {
trackers[i].score = strat.fitness(cells[trackers[i].cellid]);
}
} else {
for (int i = 0; i < trackers.len; i++) {
if (abs(norm_rand(strat.rand_seed)) < strat.test_chance) {
trackers[i].score = strat.fitness(cells[trackers[i].cellid]);
}
}
}
lock(stats.m);
append(stats.fitness_time, now() - start);
unlock(stats.m);
auto comp = [strat](CellTracker &a, CellTracker &b){
return strat.higher_fitness_is_better ? (a.score > b.score) : (a.score < b.score);
};
// 4. sort
start = now();
std::sort(&trackers[0], &trackers[trackers.len-1], comp);
lock(stats.m);
append(stats.sorting_time, now() - start);
append(stats.best_cells, cells[trackers[0].cellid]);
append(stats.best_cell_fitness, trackers[0].score);
append(stats.gen_time, now() - gen_start);
stats.gen++;
unlock(stats.m);
}
stats.done = true;
return 0;
}
template <class T> T run(Strategy<T> strat) {
Array<Stats<T>> stats = make_array<Stats<T>>(strat.num_threads);
Array<Thread> threads = make_array<Thread>(strat.num_threads);
Array<WorkerThreadArgs<T>> args = make_array<WorkerThreadArgs<T>>(strat.num_threads);
float best_global_score = strat.higher_fitness_is_better ? FLT_MIN : FLT_MAX;
T best_global_cell;
allow_all_processors();
set_affinity(0);
for (int i = 0; i < strat.num_threads; i++) {
stats[i] = {
.best_cells=make_dynarray<T>(strat.num_generations),
.best_cell_fitness=make_dynarray<float>(strat.num_generations),
.gen_time=make_dynarray<TimeSpan>(strat.num_generations),
.crossover_time=make_dynarray<TimeSpan>(strat.num_generations),
.mutate_time=make_dynarray<TimeSpan>(strat.num_generations),
.fitness_time=make_dynarray<TimeSpan>(strat.num_generations),
.sorting_time=make_dynarray<TimeSpan>(strat.num_generations),
.m=make_mutex()
};
Array<T> cells = make_array<T>(strat.num_threads*strat.num_cells_per_thread);
Array<CellTracker> trackers = make_array<CellTracker>(strat.num_cells_per_thread);
for (int i = 0; i < strat.num_cells_per_thread; i++) {
cells[i] = strat.make_default_cell();
trackers[i] = {0, i};
}
args[i].strat=strat;
args[i].cells=cells;
args[i].trackers=trackers;
args[i].stats=&stats[i];
args[i].best_global_score=&best_global_score;
args[i].best_global_cell=&best_global_cell;
args[i].m = make_mutex();
threads[i] = make_thread(worker<T>, &args[i], i+1);
}
// We are the stats thread
bool complete = false;
while (!complete) {
sleep(from_s(strat.stats_print_period_s));
log("**********************\n");
float g_avg_gen_time = 0;
float g_avg_crossover_time = 0;
float g_avg_mutate_time = 0;
float g_avg_fitness_time = 0;
float g_avg_sorting_time = 0;
float g_avg_overhead_time = 0;
float g_progress_per = 0;
float g_best_fitness = strat.higher_fitness_is_better ? FLT_MIN : FLT_MAX;
complete = true;
for (int i = 0; i < stats.len; i++) {
lock(stats[i].m);
complete &= stats[i].done;
int end = stats[i].gen_time.end-1;
float gen_time = to_s(stats[i].gen_time[end]);
float crossover_time = to_s(stats[i].crossover_time[end]);
float mutate_time = to_s(stats[i].mutate_time[end]);
float fitness_time = to_s(stats[i].fitness_time[end]);
float sorting_time = to_s(stats[i].sorting_time[end]);
float progress_per = static_cast<float>(stats[i].gen) / static_cast<float>(strat.num_generations) * 100;
float best_score = back(stats[i].best_cell_fitness);
float overhead = max(0, gen_time - (crossover_time + mutate_time + fitness_time + sorting_time));
float overhead_per = overhead / gen_time * 100;
g_avg_gen_time += gen_time;
g_avg_crossover_time += crossover_time;
g_avg_mutate_time += mutate_time;
g_avg_fitness_time += fitness_time;
g_avg_sorting_time += sorting_time;
g_progress_per += progress_per;
g_best_fitness = better(strat.higher_fitness_is_better, best_score, g_best_fitness);
g_avg_overhead_time += overhead;
log(thread_stat_format_str, i, stats[i].gen, strat.num_generations, best_score, overhead_per, gen_time, overhead, crossover_time, mutate_time, fitness_time, sorting_time);
unlock(stats[i].m);
}
g_avg_gen_time /= stats.len;
g_avg_crossover_time /= stats.len;
g_avg_mutate_time /= stats.len;
g_avg_fitness_time /= stats.len;
g_avg_sorting_time /= stats.len;
g_progress_per /= stats.len;
g_avg_overhead_time /= stats.len;
float g_avg_overhead_per = g_avg_overhead_time / g_avg_gen_time * 100;
log(global_stat_format_str, g_progress_per, g_best_fitness, g_avg_overhead_per, g_avg_gen_time, g_avg_overhead_time, g_avg_crossover_time, g_avg_mutate_time, g_avg_fitness_time, g_avg_sorting_time);
if (complete) break;
}
for (int i = 0; i < threads.len; i++) {
join(threads[i]);
}
T best_cell;
// TODO: bad
float best_score = strat.higher_fitness_is_better ? FLT_MIN : FLT_MAX;
for (int i = 0; i < stats.len; i++) {
float score = back(stats[i].best_cell_fitness);
if (strat.higher_fitness_is_better ? score > best_score : score < best_score) {
best_cell = back(stats[i].best_cells);
best_score = score;
}
}
return best_cell;
}
} // namespace genetic

View File

@@ -1,3 +1,5 @@
#pragma once
// TODO: This file needs a serious audit
#include <cstdint>
@@ -14,5 +16,5 @@ inline void xorshift64(uint64_t &state) {
// returns a random value between -1 and 1. modifies seed
inline float norm_rand(uint64_t &state) {
xorshift64(state);
return (state - half_max) / half_max;
return static_cast<float>(state - half_max) / static_cast<float>(half_max);
}

View File

@@ -1,7 +1,11 @@
#pragma once
#include <cassert>
#include <cstdint>
#include <cstdio>
#ifdef _WIN32
#include "windows.h"
#include <windows.h>
#endif
namespace sync {
@@ -11,14 +15,37 @@ typedef CRITICAL_SECTION Mutex;
typedef CONDITION_VARIABLE ConditionVar;
typedef HANDLE Semaphore;
typedef HANDLE Thread;
typedef DWORD TimeSpan;
typedef DWORD WINAPI (*ThreadFunc)(_In_ LPVOID lpParameter);
typedef LPVOID ThreadArg
const TimeSpan infinite_ts = INFINITE;
typedef LARGE_INTEGER TimeSpan;
typedef DWORD (WINAPI *ThreadFunc)(_In_ LPVOID lpParameter);
typedef LPVOID ThreadArg;
const TimeSpan infinite_ts = { .QuadPart = LLONG_MAX };
int get_num_cores() {
SYSTEM_INFO sysinfo;
GetSystemInfo(&sysinfo);
return sysinfo.dwNumberOfProcessors;
}
const int num_cores = get_num_cores();
LARGE_INTEGER _init_freq() {
LARGE_INTEGER freq;
QueryPerformanceFrequency(&freq);
return freq;
}
static LARGE_INTEGER freq = _init_freq();
#endif
Thread make_thread(ThreadFunc t);
Thread make_thread(ThreadFunc t, ThreadArg a);
Thread make_thread(ThreadFunc t, ThreadArg a, int core_affinity);
void join(Thread t);
void sleep(TimeSpan ts);
void allow_all_processors();
void set_affinity(Thread &t, int core);
void set_affinity(int core);
int get_affinity();
Mutex make_mutex();
void lock(Mutex &m);
@@ -41,21 +68,106 @@ TimeSpan from_ms(double milliseconds);
TimeSpan from_s(double seconds);
TimeSpan from_min(double minutes);
TimeSpan from_hours(double hours);
TimeSpan now();
TimeSpan operator-(const TimeSpan &a, const TimeSpan &b);
TimeSpan operator+(const TimeSpan &a, const TimeSpan &b);
TimeSpan operator*(const TimeSpan &a, const TimeSpan &b);
TimeSpan operator/(const TimeSpan &a, const TimeSpan &b);
double to_ms(TimeSpan &sp);
double to_s(TimeSpan &sp);
double to_min(TimeSpan &sp);
double to_hours(TimeSpan &sp);
double to_ms(TimeSpan &ts);
double to_s(TimeSpan &ts);
double to_min(TimeSpan &ts);
double to_hours(TimeSpan &ts);
#ifdef _WIN32
uint64_t bitmask (unsigned short n) {
if (n == 64) return -((uint64_t)1);
return (((uint64_t) 1) << n) - 1;
}
const int tab64[64] = {
63, 0, 58, 1, 59, 47, 53, 2,
60, 39, 48, 27, 54, 33, 42, 3,
61, 51, 37, 40, 49, 18, 28, 20,
55, 30, 34, 11, 43, 14, 22, 4,
62, 57, 46, 52, 38, 26, 32, 41,
50, 36, 17, 19, 29, 10, 13, 21,
56, 45, 25, 31, 35, 16, 9, 12,
44, 24, 15, 8, 23, 7, 6, 5};
int log2_64 (uint64_t value)
{
value |= value >> 1;
value |= value >> 2;
value |= value >> 4;
value |= value >> 8;
value |= value >> 16;
value |= value >> 32;
return tab64[((uint64_t)((value - (value >> 1))*0x07EDD5E59A4E28C2)) >> 58];
}
Thread make_thread(ThreadFunc f, ThreadArg a) {
DWORD tid;
return CreateThread(NULL, 0, t, a, 0, &tid);
return CreateThread(NULL, 0, f, a, 0, &tid);
}
struct DummyThreadArgs {
int core_affinity;
ThreadFunc f;
ThreadArg a;
};
DWORD _dummy_thread(LPVOID a) {
DummyThreadArgs *wrap = static_cast<DummyThreadArgs*>(a);
set_affinity(wrap->core_affinity);
return wrap->f(wrap->a);
}
Thread make_thread(ThreadFunc f, ThreadArg a, int core_affinity) {
DWORD tid;
DummyThreadArgs *args = (DummyThreadArgs*)malloc(sizeof(DummyThreadArgs));
*args = {
.core_affinity=core_affinity,
.f=f,
.a=a
};
return CreateThread(NULL, 0, _dummy_thread, args, 0, &tid);
}
void join(Thread t) {
WaitForSingleObject(t, infinite_ts);
WaitForSingleObject(t, INFINITE);
}
void sleep(TimeSpan ts) {
Sleep(static_cast<DWORD>(to_ms(ts)));
}
void allow_all_processors() {
Thread t = GetCurrentThread();
DWORD affinity = bitmask(num_cores);
SetProcessAffinityMask(t, affinity);
}
void set_affinity(Thread &t, int core) {
DWORD mask = 1 << (core % num_cores);
DWORD old = SetThreadAffinityMask(t, mask);
DWORD confirm = SetThreadAffinityMask(t, mask);
assert(old && GetLastError() != ERROR_INVALID_PARAMETER && mask == confirm);
}
void set_affinity(int core) {
Thread cur = GetCurrentThread();
set_affinity(cur, core);
}
int get_affinity() {
Thread t = GetCurrentThread();
DWORD mask = 1;
DWORD affinity = SetThreadAffinityMask(t, (DWORD_PTR)mask);
DWORD check = SetThreadAffinityMask(t, (DWORD_PTR)affinity);
assert(check == mask);
return log2_64(affinity);
}
Mutex make_mutex() {
@@ -87,7 +199,11 @@ ConditionVar make_condition_var() {
}
void wait(ConditionVar &c, Mutex &m, TimeSpan ts) {
SleepConditionVariable(&c, &m, ts);
if (ts.QuadPart == infinite_ts.QuadPart) {
SleepConditionVariableCS(&c, &m, INFINITE);
} else {
SleepConditionVariableCS(&c, &m, static_cast<DWORD>(to_ms(ts)));
}
}
void wake_one(ConditionVar &c) {
@@ -107,11 +223,11 @@ Semaphore make_semaphore(int initial, int max) {
}
void wait(Semaphore &s) {
WaitForSingleObject(s, infinite_ts);
WaitForSingleObject(s, INFINITE);
}
void post(Semaphore &s) {
ReleaseSemaphore(s);
ReleaseSemaphore(s, 1, NULL);
}
void dispose(Semaphore &s) {
@@ -119,37 +235,76 @@ void dispose(Semaphore &s) {
}
TimeSpan from_ms(double milliseconds) {
return static_cast<TimeSpan>(milliseconds);
TimeSpan ts;
ts.QuadPart = static_cast<LONGLONG>(milliseconds/1000.0)*freq.QuadPart;
return ts;
}
TimeSpan from_s(double seconds) {
return static_cast<TimeSpan>(seconds*1000.0);
TimeSpan ts;
ts.QuadPart = static_cast<LONGLONG>(seconds)*freq.QuadPart;
return ts;
}
TimeSpan from_min(double minutes) {
return static_cast<TimeSpan>(minutes*60.0*1000.0);
TimeSpan ts;
ts.QuadPart = static_cast<LONGLONG>(minutes*60.0)*freq.QuadPart;
return ts;
}
TimeSpan from_hours(double hours) {
return static_cast<TimeSpan>(hours*60.0*60.0*1000.0);
TimeSpan ts;
ts.QuadPart = static_cast<LONGLONG>(hours*60.0*60.0)*freq.QuadPart;
return ts;
}
double to_ms(TimeSpan &sp) {
return static_cast<double>(sp);
TimeSpan now() {
TimeSpan ts;
QueryPerformanceCounter(&ts);
return ts;
}
double to_s(TimeSpan &sp) {
return static_cast<double>(sp)/1000.0;
TimeSpan operator-(const TimeSpan &a, const TimeSpan &b) {
TimeSpan ts;
ts.QuadPart = a.QuadPart - b.QuadPart;
return ts;
}
double to_min(TimeSpan &sp) {
return static_cast<double>(sp)/(1000.0*60.0);
TimeSpan operator+(const TimeSpan &a, const TimeSpan &b) {
TimeSpan ts;
ts.QuadPart = a.QuadPart + b.QuadPart;
return ts;
}
double to_hours(TimeSpan &sp) {
return static_cast<double>(sp)/(1000.0*60.0*60.0);
TimeSpan operator*(const TimeSpan &a, const TimeSpan &b) {
TimeSpan ts;
ts.QuadPart = a.QuadPart * b.QuadPart;
return ts;
}
TimeSpan operator/(const TimeSpan &a, const TimeSpan &b) {
TimeSpan ts;
ts.QuadPart = a.QuadPart / b.QuadPart;
return ts;
}
double to_ms(TimeSpan &ts) {
return static_cast<double>(ts.QuadPart*1000)/static_cast<double>(freq.QuadPart);
}
double to_s(TimeSpan &ts) {
return static_cast<double>(ts.QuadPart)/static_cast<double>(freq.QuadPart);
}
double to_min(TimeSpan &ts) {
return static_cast<double>(ts.QuadPart)/static_cast<double>(freq.QuadPart*60);
}
double to_hours(TimeSpan &ts) {
return static_cast<double>(ts.QuadPart)/static_cast<double>(freq.QuadPart*60*60);
}
#endif
} // namespace sync
//

56
inc/util.h Normal file
View File

@@ -0,0 +1,56 @@
#pragma once
#include <cstring>
#define min(A, B) ((A < B) ? (A) : (B))
#define max(A, B) ((A > B) ? (A) : (B))
#define better(GT, A, B) (GT ? max((A), (B)) : min((A), (B)))
template <class T> struct Array {
T *data;
int len;
T &operator[](int i) { return data[i]; }
};
template <class T> Array<T> make_array(int len) {
return {
.data=(T*)malloc(sizeof(T)*len),
.len=len
};
}
template <class T> T back(Array<T> &a) { return a.data[a.len-1]; }
template <class T> T front(Array<T> &a) { return a.data[0]; }
template <class T> struct DynArray {
T* _data;
int end;
int cap;
T &operator[](int i) { return _data[i]; }
};
template <class T> DynArray<T> make_dynarray(int cap) {
return {
._data=(T*)malloc(sizeof(T)*cap),
.end=0,
.cap=cap
};
}
template <class T> void resize(DynArray<T> &a, int new_cap) {
T* old = a._data;
a._data = (T*)malloc(sizeof(T)*new_cap);
memcpy(a._data, old, min(sizeof(T)*a.end, sizeof(T)*new_cap));
a.cap = new_cap;
free(old);
}
template <class T> void append(DynArray<T> &a, T el) {
if (a.end == a.cap) resize(a, min(1, a.cap*2));
a[a.end++] = el;
}
template <class T> T back(DynArray<T> &a) { return a._data[a.end-1]; }
template <class T> T front(DynArray<T> &a) { return a._data[0]; }

View File

@@ -1,61 +0,0 @@
src_files = $(shell find src -iname "*.cpp")
obj_files = $(src_files:src/%.cpp=obj/%.o)
ifeq ($(OS),Windows_NT)
CCFLAGS += -D WIN32 -Iext/PTHREADS-BUILT/include -std=c++20
PTHREADLIB = ext/PTHREADS-BUILT/lib/pthreadVCE3.lib
ifeq ($(PROCESSOR_ARCHITEW6432),AMD64)
CCFLAGS += -D AMD64
else
ifeq ($(PROCESSOR_ARCHITECTURE),AMD64)
CCFLAGS += -D AMD64
endif
ifeq ($(PROCESSOR_ARCHITECTURE),x86)
CCFLAGS += -D IA32
endif
endif
else
UNAME_S := $(shell uname -s)
ifeq ($(UNAME_S),Linux)
CCFLAGS += -D LINUX
endif
ifeq ($(UNAME_S),Darwin)
CCFLAGS += -D OSX
endif
UNAME_P := $(shell uname -p)
ifeq ($(UNAME_P),x86_64)
CCFLAGS += -D AMD64
endif
ifneq ($(filter %86,$(UNAME_P)),)
CCFLAGS += -D IA32
endif
ifneq ($(filter arm%,$(UNAME_P)),)
CCFLAGS += -D ARM
endif
endif
debug: OPTIMIZATION_FLAG = -g
release: OPTIMIZATION_FLAG = -O3
release: all export_comp_db
debug: all export_comp_db
all: $(obj_files)
@ mkdir -p bin
g++ -I inc/ $^ $(PTHREADLIB) -o bin/main $(OPTIMIZATION_FLAG) $(CCFLAGS)
obj/%.o: src/%.cpp
@ mkdir -p obj
g++ -I inc/ -c $< -o $@ $(OPTIMIZATION_FLAG) $(CCFLAGS)
export_comp_db:
echo [ > compile_commands.json
make debug -B --dry-run > temp
awk '/g\+\+.*\.cpp/ { f="compile_commands.json"; printf "\t\{\n\t\t\"directory\": \"%s\",\n\t\t\"command\": \"%s\",\n\t\t\"file\": \"%s\"\n\t\},\n", ENVIRON["PWD"], $$0, $$5 >> f }' temp
echo ] >> compile_commands.json
rm temp
clean:
rm -f obj/*.o bin/*.exe

View File

@@ -1,279 +0,0 @@
#include <algorithm>
#include <cstdint>
#include <cstdlib>
#include <optional>
#include <variant>
#include <vector>
#include "genetic.h"
#include "pthread.h"
#include "rand.h"
#define NUM_QUEUE_RETRIES 10
using namespace std;
// std::visit/std::variant overload pattern
// See:
// https://www.modernescpp.com/index.php/visiting-a-std-variant-with-the-overload-pattern/
// You don't have to understand this, just use it :)
template <typename... Ts> struct overload : Ts... {
using Ts::operator()...;
};
template <class... Ts> overload(Ts...) -> overload<Ts...>;
namespace genetic {
template <class T> struct cell_entry {
float score;
T *cell;
bool stale;
};
template <class T> struct crossover_job {
Array<cell_entry<T> *> &parents;
Array<cell_entry<T> *> &children_out;
};
template <class T> struct fitness_job {
cell_entry<T> *cell_entry;
};
template <class T> struct mutate_job {
cell_entry<T> *cell_entry;
};
template <class T> struct work_queue {
variant<crossover_job<T>, fitness_job<T>, mutate_job<T>> *jobs;
int len;
int read_i;
int write_i;
bool done_writing;
pthread_mutex_t data_mutex;
pthread_mutex_t gen_complete_mutex;
pthread_mutex_t jobs_available_mutex;
pthread_cond_t gen_complete_cond;
pthread_cond_t jobs_available_cond;
};
template <class T> work_queue<T> make_work_queue(int len) {
return {.jobs = (variant<fitness_job<T>, crossover_job<T>> *)malloc(
sizeof(variant<fitness_job<T>, crossover_job<T>>) * len),
.len = len,
.read_i = 0,
.write_i = 0,
.done_writing = false,
.data_mutex = PTHREAD_MUTEX_INITIALIZER,
.gen_complete_mutex = PTHREAD_MUTEX_INITIALIZER,
.jobs_available_mutex = PTHREAD_MUTEX_INITIALIZER,
.gen_complete_cond = PTHREAD_COND_INITIALIZER,
.jobs_available_cond = PTHREAD_COND_INITIALIZER};
}
template <class T> struct job_batch {
Array<variant<crossover_job<T>, fitness_job<T>>> jobs;
bool gen_complete;
};
template <class T>
optional<job_batch<T>> get_job_batch(work_queue<T> &queue, int batch_size,
bool *stop_flag) {
while (true) {
for (int i = 0; i < NUM_QUEUE_RETRIES; i++) {
if (queue.read_i < queue.write_i &&
pthread_mutex_trylock(&queue.data_mutex)) {
job_batch<T> res;
res.jobs._data = &queue._jobs[queue.read_i];
int span_size = min(batch_size, queue.write_i - queue.read_i);
res.jobs.len = span_size;
queue.read_i += span_size;
res.gen_complete = queue.done_writing && queue.read_i == queue.write_i;
pthread_mutex_unlock(&queue.data_mutex);
return res;
}
}
pthread_mutex_lock(&queue.jobs_available_mutex);
pthread_cond_wait(queue.jobs_available_cond, &queue.jobs_available_mutex);
if (stop_flag)
return {};
}
}
template <class T> struct worker_thread_args {
Strategy<T> &strat;
work_queue<T> &queue;
bool *stop_flag;
};
template <class T> void *worker(void *args) {
worker_thread_args<T> *work_args = (worker_thread_args<T> *)args;
Strategy<T> &strat = work_args->strat;
work_queue<T> &queue = work_args->queue;
bool *stop_flag = work_args->stop_flag;
auto job_dispatcher = overload{
[strat](mutate_job<T> mj) {
strat.mutate(*mj.cell_entry->cell);
mj.cell_entry->stale = true;
},
[strat](fitness_job<T> fj) {
fj.cell_entry->score = strat.fitness(*fj.cell_entry->cell);
fj.cell_entry->stale = false;
},
[strat](crossover_job<T> cj) {
Array<T *> parent_cells, child_cells;
parent_cells = {(T **)malloc(sizeof(T *) * cj.parents.len),
cj.parents.len};
child_cells = {(T **)malloc(sizeof(T *) * cj.children_out.len),
cj.children_out.len};
for (int i = 0; i < cj.parents.len; i++) {
parent_cells[i] = cj.parents[i].cell;
}
for (int i = 0; i < cj.children_out.len; i++) {
child_cells[i] = cj.children_out[i].cell;
cj.children_out[i].stale = true;
}
strat.crossover(parent_cells, child_cells);
},
};
while (true) {
auto batch = get_job_batch(queue, strat.batch_size, stop_flag);
if (!batch || *stop_flag)
return NULL;
// Do the actual work
for (int i = 0; i < batch->jobs.len; i++) {
visit(job_dispatcher, batch->jobs[i]);
}
if (batch->gen_complete) {
pthread_cond_signal(&queue.gen_complete_cond, &queue.gen_complete_mutex);
}
}
}
template <class T> Stats<T> run(Strategy<T> strat) {
Stats<T> stats;
// The work queue is what all the worker threads will checking
// for jobs
work_queue<T> queue = make_work_queue<T>(strat.num_cells);
// The actual cells. Woo!
T cells[strat.num_cells];
// Using a vector so I can use the make_heap, push_heap, etc.
vector<cell_entry<T>> cell_queue;
for (int i = 0; i < strat.num_cells; i++) {
cells[i] = strat.make_default_cell();
cell_queue.push_back({0, &cells[i], true});
}
bool stop_flag = false;
worker_thread_args<T> args = {
.strat = strat, .queue = queue, .stop_flag = &stop_flag};
// spawn worker threads
pthread_t threads[strat.num_threads];
for (int i = 0; i < strat.num_threads; i++) {
pthread_create(&threads[i], NULL, worker<T>, (void *)args);
}
uint64_t rand_state = strat.rand_seed;
for (int i = 0; i < strat.num_generations; i++) {
// Mutate some random cells in the population
for (int i = 0; i < cell_queue.size(); i++) {
if (abs(norm_rand(rand_state)) < strat.mutation_chance) {
queue.jobs[queue.write_i] = mutate_job<T>{&cell_queue[i]};
queue.write_i++;
}
}
pthread_cond_broadcast(&queue.jobs_available_cond);
// Potential issue here where mutations aren't done computing and fitness
// jobs begin. maybe need to gate this.
// Generate fitness jobs
for (int i = 0; i < cell_queue.size(); i++) {
if (cell_queue[i].stale &&
(strat.test_all || abs(norm_rand(rand_state)) < strat.test_chance)) {
queue.jobs[queue.write_i] = fitness_job<T>{&cell_queue[i]};
queue.write_i++;
}
pthread_cond_broadcast(&queue.jobs_available_cond);
}
queue.done_writing = true;
// wait for fitness jobs to complete
pthread_mutex_lock(&queue.gen_complete_mutex);
// Before going to sleep, do a quick check to see if the fitness jobs are
// already complete.
pthread_mutex_lock(&queue.data_mutex);
bool already_complete = queue.read_i != queue.write_i;
pthread_mutex_unlock(&queue.data_mutex);
if (already_complete) {
pthread_mutex_unlock(&queue.gen_complete_mutex);
} else {
pthread_cond_wait(&queue.gen_complete_cond, &queue.gen_complete_mutex);
}
// Sort cells on performance
std::sort(cell_queue.begin(), cell_queue.end(),
[strat](cell_entry<T> a, cell_entry<T> b) {
return strat.higher_fitness_is_better ? a > b : a < b;
});
printf("Top Score: %f\n", cell_queue[0].score);
if (!strat.enable_crossover)
continue;
// generate crossover jobs
// dear god. forgive me father
queue.write_i = 0;
queue.read_i = 0;
int count = 0;
int n_par = strat.crossover_parent_num;
int n_child = strat.crossover_children_num;
int child_i = cell_queue.size() - 1;
int par_i = 0;
while (child_i - par_i <= n_par + n_child) {
Array<cell_entry<T> *> parents = {
(cell_entry<T> **)malloc(sizeof(cell_entry<T> *) * n_par), n_par};
Array<cell_entry<T> *> children = {
(cell_entry<T> **)malloc(sizeof(cell_entry<T> *) * n_child), n_child};
for (; par_i < par_i + n_par; par_i++) {
parents[i] = cell_queue[par_i];
}
for (; child_i > child_i - n_child; child_i--) {
children[i] = cell_queue[child_i];
}
queue.jobs[queue.write_i] = crossover_job<T>{parents, children};
par_i += strat.crossover_parent_stride;
child_i += strat.crossover_children_stride;
}
}
// stop worker threads
stop_flag = true;
pthread_cond_broadcast(&queue.jobs_available_cond);
for (int i = 0; i < strat.num_threads; i++) {
pthread_join(threads[i], NULL);
}
}
template <class T> T &Array<T>::operator[](int i) {
return _data[i];
}
} // namespace genetic

View File

@@ -3,22 +3,23 @@
#include <cstdlib>
#include "genetic.h"
#include "rand.h"
#include "sync.h"
using namespace genetic;
const int len = 10;
const float max_float = 9999.9f;
const int len = 12;
const float max_float = 999.9f;
static uint64_t seed = 12;
static float num_mutate_chance = 0.5;
static int num_parents = 2;
static int num_children = 2;
static int target_sum = 200;
static int target_product = 300;
static int target_sum = 20000;
static int target_product = 10*target_sum;
Array<float> make_new_arr() {
Array<float> arr = { (float*)malloc(sizeof(float)*len), len };
Array<float> arr = make_array<float>(len);
for (int i = 0; i < arr.len; i++) {
arr[i] = norm_rand(seed) * max_float;
}
@@ -35,40 +36,38 @@ void mutate(Array<float> &arr_to_mutate) {
void crossover(const Array<Array<float>*> parents, const Array<Array<float> *> out_children) {
for (int i = 0; i < len; i++) {
(*out_children._data[0])[i] = i < len/2 ? (*parents._data[0])[i] : (*parents._data[1])[i];
(*out_children._data[1])[i] = i < len/2 ? (*parents._data[1])[i] : (*parents._data[0])[i];
(*out_children.data[0])[i] = i < len/2 ? (*parents.data[0])[i] : (*parents.data[1])[i];
(*out_children.data[1])[i] = i < len/2 ? (*parents.data[1])[i] : (*parents.data[0])[i];
}
}
// norm_rand can go negative. fix in genetic.cpp
// child stride doesn't make sense. Should always skip over child num
float fitness(const Array<float> &cell) {
float sum = 0;
float product = 1;
for (int i = 0; i < cell.len; i++) {
sum += cell._data[i];
product *= cell._data[i];
sum += cell.data[i];
product *= cell.data[i];
}
return abs(sum - target_sum) + abs(product - target_product);
return abs(sum - target_sum)*abs(sum - target_sum) + abs(product - target_product);
}
int main(int argc, char **argv) {
int num_gens = 10000;
Strategy<Array<float>> strat {
.num_threads = 1,
.batch_size = 1,
.num_cells = 10,
.num_generations = 10,
.num_threads = atoi(argv[1]),
.stats_print_period_s = 2,
.num_cells_per_thread = 100000,
.num_generations = num_gens,
.share_breakthroughs=true,
.share_breakthrough_gen_period=10,
.test_all = true,
.test_chance = 0.0, // doesn't matter
.enable_crossover = true,
.enable_crossover_mutation = true,
.crossover_mutation_chance = 0.6f,
.crossover_parent_num = 2,
.crossover_parent_stride = 1,
.crossover_children_num = 2,
.enable_mutation = true,
.mutation_chance = 0.8,
.mutation_chance = 0.7,
.rand_seed = seed,
.higher_fitness_is_better = false,
.make_default_cell=make_new_arr,
@@ -76,6 +75,24 @@ int main(int argc, char **argv) {
.crossover=crossover,
.fitness=fitness
};
n_threads = atoi(argv[1]);
auto res = run(strat);
log("Running w/ %d threads\n", atoi(argv[1]));
TimeSpan start = now();
auto best_cell = run(strat);
TimeSpan runtime = now() - start;
float sum = 0;
float product = 1;
log("Winning cell: ");
for (int i = 0; i < best_cell.len; i++) {
float val = best_cell[i];
sum += val;
product *= val;
log("%f ", val);
}
log("\n");
log("Final Sum: %f\n", sum);
log("Final Product: %f\n", product);
log("Execution Time %d (min) %f (s)\n", static_cast<int>(sync::to_min(runtime)), fmod(to_s(runtime), 60) );
}

View File

@@ -1,3 +0,0 @@
10 2000 150000 2
1 200
0 10000