diff --git a/debug.rad b/debug.rad index 7e76a88..5c987bf 100644 --- a/debug.rad +++ b/debug.rad @@ -1,6 +1,7 @@ // raddbg 0.9.21 project file recent_file: path: "inc/genetic.h" +recent_file: path: "inc/sync.h" recent_file: path: "../../../../../Program Files/Microsoft Visual Studio/2022/Community/VC/Tools/MSVC/14.42.34433/include/algorithm" recent_file: path: "../../../../../Program Files/Microsoft Visual Studio/2022/Community/VC/Tools/MSVC/14.42.34433/include/xutility" recent_file: path: "src/main.cpp" diff --git a/inc/genetic.h b/inc/genetic.h index 90aa2cd..e476730 100644 --- a/inc/genetic.h +++ b/inc/genetic.h @@ -8,6 +8,7 @@ #include "rand.h" using namespace sync; +using namespace std; namespace genetic { @@ -83,6 +84,66 @@ template Array make_array(int len) { }; } +template +struct MutateJob { + T* cell; +}; + +template +struct CrossoverJob { + Array parents; + Array children; +}; + +template +struct FitnessJob { + T* cell; + CellTracker* track; +}; + +enum class JobType { + MUTATE, + CROSSOVER, + FITNESS +}; + +template +union Job { + MutateJob m; + CrossoverJob c; + FitnessJob f; +}; + +// Yes. I am aware of variant +// For some reason I like this better +template +struct TaggedJob { + Job data; + JobType type; +}; + +template +struct WorkQueue { + Array> jobs; + int read_i, write_i, batch_size; + bool done_writing, work_complete, stop; // These catch some edge conditions + Mutex m; + ConditionVar done; + ConditionVar jobs_ready; +}; + +template +struct WorkerThreadArgs { + WorkQueue &q; + Strategy &s; +}; + +template WorkQueue make_work_queue(int len, int batch_size); +template bool tryget_job_batch(WorkQueue &q, int len, Array>* out_batch, bool* out_batch_is_end); + +template +DWORD worker(LPVOID args); + template Stats run(Strategy strat) { Stats stats; @@ -97,17 +158,42 @@ template Stats run(Strategy strat) { Array trackers = make_array(strat.num_cells); for (int i = 0; i < trackers.len; i++) trackers[i] = { .score=0, .cellid=i }; + // Create work queue + // Worst case size is every cell mutated, crossed, and evaluated...? Not quite, but 3x should be upper bound + WorkQueue q = make_work_queue(3*strat.num_cells, strat.batch_size); + WorkerThreadArgs args = {q, strat}; + + // Create worker threads + Thread *threads = (Thread*)malloc(sizeof(Thread*)*strat.num_threads); + for (int i = 0; i < strat.num_threads; i++) { + threads[i] = make_thread(worker, &args); + } + stats.setup_time = now() - start_setup; // *********** ALGORITHM ************ TimeSpan start_algo = now(); for (int gen = 0; gen < strat.num_generations; gen++) { + // Reset work queue + lock(q.m); + q.read_i = 0; + q.write_i = 0; + q.work_complete = false; + q.done_writing = false; + unlock(q.m); + // 1. mutate for (int i = 0; i < trackers.len; i++) { if (abs(norm_rand(strat.rand_seed)) < strat.mutation_chance) { - strat.mutate(cells[trackers[i].cellid]); + MutateJob mj = {&cells[trackers[i].cellid]}; + TaggedJob job; + job.data.m = mj; + job.type=JobType::MUTATE; + q.jobs[q.write_i++] = job; } } + wake_all(q.jobs_ready); // There are available jobs for the worker threads! + // 2. crossover if (strat.enable_crossover) { int npar = strat.crossover_parent_num; @@ -115,9 +201,11 @@ template Stats run(Strategy strat) { int parent_end = npar; int child_begin = trackers.len-nchild; - Array parents = make_array(npar); - Array children = make_array(nchild); while (parent_end <= child_begin) { + + // TODO: Variable size arrays please. This is rediculous. + Array parents = make_array(npar); + Array children = make_array(nchild); // Get pointers to all the parent cells for (int i = parent_end-npar; i < parent_end; i++) { parents[i - (parent_end-npar)] = &cells[trackers[i].cellid]; @@ -127,25 +215,50 @@ template Stats run(Strategy strat) { for (int i = child_begin; i < child_begin+nchild; i++) { children[i-child_begin] = &cells[trackers[i].cellid]; } - strat.crossover(parents, children); + CrossoverJob cj = {parents, children}; + TaggedJob job; + job.data.c=cj; + job.type=JobType::CROSSOVER; + q.jobs[q.write_i++] = job; parent_end += strat.crossover_parent_stride; child_begin -= nchild; } - free(parents.data); - free(children.data); + wake_all(q.jobs_ready); // There are available jobs for the worker threads! } + // 3. evaluate if (strat.test_all) { for (int i = 0; i < trackers.len; i++) { - trackers[i].score = strat.fitness(cells[trackers[i].cellid]); + FitnessJob fj = {&cells[trackers[i].cellid], &trackers[i]}; + TaggedJob job; + job.data.f=fj; + job.type=JobType::FITNESS; + if (i == trackers.len-1) lock(q.m); + q.jobs[q.write_i++] = job; + if (i == trackers.len-1) { q.done_writing = true; unlock(q.m); } } } else { + lock(q.m); for (int i = 0; i < trackers.len; i++) { if (abs(norm_rand(strat.rand_seed)) < strat.test_chance) { - trackers[i].score = strat.fitness(cells[trackers[i].cellid]); + FitnessJob fj = {&cells[trackers[i].cellid], &trackers[i]}; + TaggedJob job; + job.data.f=fj; + job.type=JobType::FITNESS; + q.jobs[q.write_i++] = job; } } + q.done_writing = true; + unlock(q.m); } + wake_all(q.jobs_ready); + + // Wait until the work is finished + lock(q.m); + if (!q.work_complete) + wait(q.done, q.m, infinite_ts); + unlock(q.m); + // 4. sort std::sort(&trackers[0], &trackers[trackers.len-1], [strat](CellTracker &a, CellTracker &b){ return strat.higher_fitness_is_better ? a.score > b.score : a.score < b.score; }); @@ -153,8 +266,105 @@ template Stats run(Strategy strat) { stats.best_cell.push_back(cells[trackers[0].cellid]); stats.best_cell_fitness.push_back(trackers[0].score); } + + q.stop = true; + wake_all(q.jobs_ready); + // TODO: join all threads + + // TODO: There's some data freeing that should really be done here stats.run_time = now() - start_algo; return stats; } +template WorkQueue make_work_queue(int len, int batch_size) { + return { + .jobs=make_array>(len), + .read_i=0, + .write_i=0, + .batch_size=batch_size, + .done_writing=false, + .work_complete=false, + .m=make_mutex(), + .done=make_condition_var(), + .jobs_ready=make_condition_var() + }; +} + +template bool tryget_job_batch(WorkQueue &q, Array>* out_batch, bool* out_batch_is_end) { + lock(q.m); + if (q.stop) { + unlock(q.m); + return false; + } + + // Keep waiting till jobs are available + while (q.read_i >= q.write_i) { + wait(q.jobs_ready, q.m, infinite_ts); + + if (q.stop) { + unlock(q.m); + return false; + } + } + + // Yay! Let's grab some jobs to do + + // If the batch we're about to grab moves read_i to write_i and the producer + // is done writing, we should let our callee know it's handling this gen's last + // batch know that way it sets work_complete and signals done. + *out_batch_is_end = q.done_writing && q.read_i + q.batch_size >= q.write_i; + + out_batch->data = &q.jobs[q.read_i]; + out_batch->len = min(q.batch_size, q.write_i - q.read_i); + q.read_i += q.batch_size; + unlock(q.m); + return true; +} + +template +void work_batch(Array> batch, Strategy &s) { + for (int i = 0; i < batch.len; i++) { + switch (batch[i].type) { + case JobType::MUTATE: { + MutateJob mj = batch[i].data.m; + s.mutate(*mj.cell); + } break; + case JobType::CROSSOVER: { + CrossoverJob cj = batch[i].data.c; + s.crossover(cj.parents, cj.children); + } break; + case JobType::FITNESS: { + FitnessJob fj = batch[i].data.f; + fj.track->score = s.fitness(*fj.cell); + } break; + default: { + assert(false); + } + } + } +} + +template +DWORD worker(LPVOID args) { + WorkerThreadArgs* wa = static_cast*>(args); + WorkQueue &q = wa->q; + Strategy &s = wa->s; + + // These are written by tryget_job_batch + bool batch_is_end; + Array> batch; + + while (tryget_job_batch(q, &batch, &batch_is_end)) { + work_batch(batch, s); + if (batch_is_end) { + lock(q.m); + q.work_complete = true; + wake_one(q.done); + unlock(q.m); + } + } + + return NULL; +} + } // namespace genetic diff --git a/inc/sync.h b/inc/sync.h index b744a67..6719047 100644 --- a/inc/sync.h +++ b/inc/sync.h @@ -15,7 +15,7 @@ typedef LARGE_INTEGER TimeSpan; typedef DWORD (WINAPI *ThreadFunc)(_In_ LPVOID lpParameter); typedef LPVOID ThreadArg; -const TimeSpan infinite_ts = { .QuadPart = LLONG_MAX }; +const TimeSpan infinite_ts = { .QuadPart=LLONG_MAX }; LARGE_INTEGER _init_freq() { LARGE_INTEGER freq; diff --git a/src/main.cpp b/src/main.cpp index 04f5b0e..d01004a 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -56,8 +56,8 @@ float fitness(const Array &cell) { int main(int argc, char **argv) { int num_gens = 2000; Strategy> strat { - .num_threads = 1, - .batch_size = 1, + .num_threads = 15, + .batch_size = 1000, .num_cells = 100000, .num_generations = num_gens, .test_all = true,