more progress on drafting the worker thread model, get job batch func, etc...
This commit is contained in:
@@ -1,15 +1,19 @@
|
|||||||
#include <vector>
|
#include <vector>
|
||||||
#include <span>
|
|
||||||
|
|
||||||
namespace genetic {
|
namespace genetic {
|
||||||
|
|
||||||
|
template <class T> struct ReadonlySpan;
|
||||||
|
template <class T> struct Span;
|
||||||
|
template <class T> struct Stats;
|
||||||
|
template <class T> struct Strategy;
|
||||||
|
|
||||||
|
template <class T> Stats<T> run(Strategy<T>);
|
||||||
|
|
||||||
template <class T> struct Strategy {
|
template <class T> struct Strategy {
|
||||||
int num_threads; // Number of worker threads that will be evaluating cell
|
int num_threads; // Number of worker threads that will be evaluating cell
|
||||||
// fitness.
|
// fitness.
|
||||||
int num_retries; // Number of times worker threads will try to grab work pool
|
int batch_size; // Number of cells a worker thread tries to work on in a row
|
||||||
// lock before sleeping
|
// before accessing/locking the work queue again.
|
||||||
int batch_size; // Number of cells a worker thread tries to evaluate in a row
|
|
||||||
// before locking the pool again.
|
|
||||||
int num_cells; // Size of the population pool
|
int num_cells; // Size of the population pool
|
||||||
int num_generations; // Number of times (epochs) to run the algorithm
|
int num_generations; // Number of times (epochs) to run the algorithm
|
||||||
bool test_all; // Sets whether or not every cell is tested every generation
|
bool test_all; // Sets whether or not every cell is tested every generation
|
||||||
@@ -21,14 +25,17 @@ template <class T> struct Strategy {
|
|||||||
float crossover_mutation_chance; // Chance to mutate a child cell
|
float crossover_mutation_chance; // Chance to mutate a child cell
|
||||||
int crossover_parent_num; // Number of unique high-scoring parents in a
|
int crossover_parent_num; // Number of unique high-scoring parents in a
|
||||||
// crossover call.
|
// crossover call.
|
||||||
int crossover_children_num; // Number of children produced in a crossover
|
int crossover_children_num; // Number of children to expect the user to
|
||||||
bool enable_mutation; // Cells may be mutated before fitness evaluation
|
// produce in the crossover function.
|
||||||
|
bool enable_mutation; // Cells may be mutated
|
||||||
|
// before fitness evaluation
|
||||||
float mutation_chance; // Chance to mutate cells before fitness evaluation
|
float mutation_chance; // Chance to mutate cells before fitness evaluation
|
||||||
|
|
||||||
// User defined functions
|
// User defined functions
|
||||||
T (*make_default_cell)();
|
T (*make_default_cell)();
|
||||||
void (*mutate)(T &cell);
|
void (*mutate)(T &cell_to_modify);
|
||||||
void (*crossover)(const std::span<T> &parents, std::span<T> &out_children);
|
void (*crossover)(const ReadonlySpan<T> &parents,
|
||||||
|
const Span<T> &out_children);
|
||||||
float (*fitness)(const T &cell);
|
float (*fitness)(const T &cell);
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -37,6 +44,18 @@ template <class T> struct Stats {
|
|||||||
std::vector<float> average_fitness;
|
std::vector<float> average_fitness;
|
||||||
};
|
};
|
||||||
|
|
||||||
template <class T> Stats<T> run(Strategy<T>);
|
template <class T> struct ReadonlySpan {
|
||||||
|
T *_data;
|
||||||
|
int len;
|
||||||
|
|
||||||
|
const T &operator[](int i);
|
||||||
|
};
|
||||||
|
|
||||||
|
template <class T> struct Span {
|
||||||
|
T *_data;
|
||||||
|
int len;
|
||||||
|
|
||||||
|
T &operator[](int i);
|
||||||
|
};
|
||||||
|
|
||||||
} // namespace genetic
|
} // namespace genetic
|
||||||
|
|||||||
239
src/genetic.cpp
239
src/genetic.cpp
@@ -1,125 +1,139 @@
|
|||||||
#include "genetic.h"
|
#include "genetic.h"
|
||||||
#include "pthread.h"
|
#include "pthread.h"
|
||||||
#include <queue>
|
#include <optional>
|
||||||
|
#include <variant>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
|
#define NUM_QUEUE_RETRIES 10
|
||||||
|
|
||||||
|
using namespace std;
|
||||||
|
|
||||||
|
// std::visit/std::variant overload pattern
|
||||||
|
// See:
|
||||||
|
// https://www.modernescpp.com/index.php/visiting-a-std-variant-with-the-overload-pattern/
|
||||||
|
// You don't have to understand this, just use it :)
|
||||||
|
template <typename... Ts> struct overload : Ts... {
|
||||||
|
using Ts::operator()...;
|
||||||
|
};
|
||||||
|
template <class... Ts> overload(Ts...) -> overload<Ts...>;
|
||||||
|
|
||||||
namespace genetic {
|
namespace genetic {
|
||||||
|
|
||||||
template <class T> struct CellEntry {
|
template <class T> struct CellEntry {
|
||||||
float score;
|
float score;
|
||||||
T cell;
|
T cell;
|
||||||
bool stale;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
template <class T> struct WorkEntry {
|
template <class T> struct CrossoverJob {
|
||||||
const CellEntry<T> &cur;
|
const ReadonlySpan<T> &parents;
|
||||||
float &score;
|
const Span<T> &children_out;
|
||||||
|
};
|
||||||
|
|
||||||
|
template <class T> struct FitnessJob {
|
||||||
|
const T &cell;
|
||||||
|
float &result_out;
|
||||||
};
|
};
|
||||||
|
|
||||||
template <class T> struct WorkQueue {
|
template <class T> struct WorkQueue {
|
||||||
std::vector<WorkEntry<T>> jobs;
|
variant<CrossoverJob<T>, FitnessJob<T>> *jobs;
|
||||||
int i;
|
int len;
|
||||||
|
int read_i;
|
||||||
|
int write_i;
|
||||||
|
bool done_writing;
|
||||||
|
|
||||||
|
pthread_mutex_t data_mutex;
|
||||||
|
pthread_mutex_t gen_complete_mutex;
|
||||||
|
pthread_mutex_t jobs_available_mutex;
|
||||||
|
|
||||||
|
pthread_cond_t gen_complete_cond;
|
||||||
|
pthread_cond_t jobs_available_cond;
|
||||||
};
|
};
|
||||||
|
|
||||||
static pthread_mutex_t data_mutex = PTHREAD_MUTEX_INITIALIZER;
|
template <class T> WorkQueue<T> make_work_queue(int len) {
|
||||||
|
return {.jobs = (variant<FitnessJob<T>, CrossoverJob<T>> *)malloc(
|
||||||
static pthread_mutex_t ready_mutex = PTHREAD_MUTEX_INITIALIZER;
|
sizeof(variant<FitnessJob<T>, CrossoverJob<T>>) * len),
|
||||||
static pthread_cond_t ready_cond = PTHREAD_COND_INITIALIZER;
|
.len = len,
|
||||||
|
.read_i = 0,
|
||||||
static pthread_mutex_t gen_complete_mutex = PTHREAD_MUTEX_INITIALIZER;
|
.write_i = 0,
|
||||||
static pthread_cond_t gen_complete_cond = PTHREAD_COND_INITIALIZER;
|
.done_writing = false,
|
||||||
|
.data_mutex = PTHREAD_MUTEX_INITIALIZER,
|
||||||
static pthread_mutex_t run_complete_mutex = PTHREAD_MUTEX_INITIALIZER;
|
.gen_complete_mutex = PTHREAD_MUTEX_INITIALIZER,
|
||||||
static pthread_cond_t run_complete_cond = PTHREAD_COND_INITIALIZER;
|
.jobs_available_mutex = PTHREAD_MUTEX_INITIALIZER,
|
||||||
|
.gen_complete_cond = PTHREAD_COND_INITIALIZER,
|
||||||
/* Thoughts on this approach
|
.jobs_available_cond = PTHREAD_COND_INITIALIZER};
|
||||||
* The ideal implementation of a worker thread has them operating at maximum
|
}
|
||||||
* load with as little synchronization overhead as possible. i.e. The ideal
|
|
||||||
* worker thread
|
template <class T> struct JobBatch {
|
||||||
* 1. Never waits for new work
|
ReadonlySpan<variant<CrossoverJob<T>, FitnessJob<T>>> jobs;
|
||||||
* 2. Never spends time synchronizing with other worker threads
|
bool gen_complete;
|
||||||
*
|
};
|
||||||
* Never is impossible, but we want to get as close as we can.
|
|
||||||
*
|
template <class T>
|
||||||
* There are two extreme situations to consider
|
optional<JobBatch<T>> get_job_batch(WorkQueue<T> &queue, int batch_size,
|
||||||
* 1. Fitness functions with highly variable computation times
|
bool *stop_flag) {
|
||||||
* 2. Fitness functions with identical computation times.
|
while (true) {
|
||||||
*
|
for (int i = 0; i < NUM_QUEUE_RETRIES; i++) {
|
||||||
* Most applications that use this library will fall into the second
|
if (queue.read_i < queue.write_i &&
|
||||||
* category.
|
pthread_mutex_trylock(&queue.data_mutex)) {
|
||||||
*
|
JobBatch<T> res;
|
||||||
* In the highly-variable computation time case, it's useful for worker threads
|
res.jobs._data = &queue._jobs[queue.read_i];
|
||||||
* to operate on 1 work entry at a time. Imagine a scenario with 2 threads, each
|
int span_size = min(batch_size, queue.write_i - queue.read_i);
|
||||||
* of which claims half the work to do. If thread A completes all of its work
|
res.jobs.len = span_size;
|
||||||
* quickly, it goes to sleep while thread B slogs away on its harder-to-compute
|
|
||||||
* fitness jobs. However, if both threads only claim 1 work entry at a time,
|
queue.read_i += span_size;
|
||||||
* thread A can immediately claim new jobs after it completes its current one.
|
res.gen_complete = queue.done_writing && queue.read_i == queue.write_i;
|
||||||
* Thread B can toil away, but little time is lost since thread A remains
|
|
||||||
* productive.
|
pthread_mutex_unlock(&queue.data_mutex);
|
||||||
*
|
return res;
|
||||||
* In the highly consistent computation time case, it's ideal for each
|
}
|
||||||
* thread to claim an equal share of the jobs (as this minimizes time spent
|
}
|
||||||
* synchronizing access to the job pool). Give each thread its set of work once
|
pthread_mutex_lock(&queue.jobs_available_mutex);
|
||||||
* and let them have at it instead of each thread constantly locking/waiting
|
pthread_cond_wait(queue.jobs_available_cond, &queue.jobs_available_mutex);
|
||||||
* on the job queue.
|
if (stop_flag)
|
||||||
*
|
return {};
|
||||||
* I take a hybrid approach. Users can specify a "batch size". Worker threads
|
}
|
||||||
* will bite off jobs in chunks and complete them before locking
|
}
|
||||||
* the job pool to grab another chunk. The user should choose a batch size close
|
|
||||||
* to 1 if their fitness function compute time is highly variable and closer to
|
template <class T> struct WorkerThreadArgs {
|
||||||
* num_cells / num_threads if computation time is consistent. Users should
|
Strategy<T> &strat;
|
||||||
* experiment with a batch size that works well for their problem.
|
WorkQueue<T> &queue;
|
||||||
*
|
bool *stop_flag;
|
||||||
* Worth mentioning this avoiding synchronization is irrelevant once computation
|
};
|
||||||
* time >>> synchronization time.
|
|
||||||
*
|
template <class T> void *worker(void *args) {
|
||||||
* There might be room for dynamic batch size modification, but I don't expect
|
WorkerThreadArgs<T> *work_args = (WorkerThreadArgs<T> *)args;
|
||||||
* to pursue this feature until the library is more mature (and I've run out of
|
Strategy<T> &strat = work_args->strat;
|
||||||
* cooler things to do).
|
WorkQueue<T> &queue = work_args->queue;
|
||||||
*
|
bool *stop_flag = work_args->stop_flag;
|
||||||
*/
|
|
||||||
template <class T>
|
auto JobDispatcher = overload{
|
||||||
void worker(std::queue<WorkEntry<T>> &fitness_queue, int batch_size,
|
[strat](FitnessJob<T> fj) { fj.result_out = strat.fitness(fj.cell); },
|
||||||
int num_retries) {
|
[strat](CrossoverJob<T> cj) {
|
||||||
int retries = 0;
|
strat.crossover(cj.parents, cj.children_out);
|
||||||
std::vector<WorkEntry<T>> batch;
|
},
|
||||||
bool gen_is_finished;
|
};
|
||||||
while (true) {
|
|
||||||
gen_is_finished = false;
|
while (true) {
|
||||||
if (pthread_mutex_trylock(&data_mutex)) {
|
auto batch = get_job_batch(queue, strat.batch_size, stop_flag);
|
||||||
retries = 0;
|
if (!batch || *stop_flag)
|
||||||
for (int i = 0; i < batch_size; i++) {
|
return NULL;
|
||||||
if (fitness_queue.empty()) {
|
|
||||||
gen_is_finished = true;
|
// Do the actual work
|
||||||
break;
|
for (int i = 0; i < batch->jobs.len; i++) {
|
||||||
}
|
visit(JobDispatcher, batch->jobs[i]);
|
||||||
batch.push_back(fitness_queue.front());
|
}
|
||||||
fitness_queue.pop();
|
|
||||||
}
|
if (batch->gen_complete) {
|
||||||
pthread_mutex_unlock(&data_mutex);
|
pthread_cond_signal(&queue.gen_complete_cond, &queue.gen_complete_mutex);
|
||||||
} else {
|
}
|
||||||
retries++;
|
}
|
||||||
}
|
|
||||||
|
|
||||||
if (gen_is_finished) {
|
|
||||||
pthread_cond_signal(&gen_complete_cond, &gen_complete_mutex);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (retries > num_retries) {
|
|
||||||
pthread_mutex_lock(&ready_mutex);
|
|
||||||
pthread_cond_wait(&ready_cond, &ready_mutex);
|
|
||||||
retries = 0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
pthread_mutex_lock(&data_mutex);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Definitions
|
|
||||||
template <class T> Stats<T> run(Strategy<T> strat) {
|
template <class T> Stats<T> run(Strategy<T> strat) {
|
||||||
Stats<T> stats;
|
Stats<T> stats;
|
||||||
|
WorkQueue<T> queue = make_work_queue<T>(strat.num_cells);
|
||||||
|
|
||||||
std::queue<WorkEntry<T>> fitness_queue;
|
vector<CellEntry<T>> cells_a, cells_b;
|
||||||
std::vector<CellEntry<T>> cells_a, cells_b;
|
|
||||||
for (int i = 0; i < strat.num_cells; i++) {
|
for (int i = 0; i < strat.num_cells; i++) {
|
||||||
T cell = strat.make_default_cell();
|
T cell = strat.make_default_cell();
|
||||||
cells_a.push_back({0, cell, true});
|
cells_a.push_back({0, cell, true});
|
||||||
@@ -129,11 +143,32 @@ template <class T> Stats<T> run(Strategy<T> strat) {
|
|||||||
std::vector<CellEntry<T>> &cur_cells = cells_a;
|
std::vector<CellEntry<T>> &cur_cells = cells_a;
|
||||||
std::vector<CellEntry<T>> &next_cells = cells_b;
|
std::vector<CellEntry<T>> &next_cells = cells_b;
|
||||||
|
|
||||||
for (int i = 0; i < strat.num_generations; i++) {
|
bool stop_flag = false;
|
||||||
|
WorkerThreadArgs<T> args = {
|
||||||
|
.strat = strat, .queue = queue, .stop_flag = &stop_flag};
|
||||||
|
|
||||||
|
// spawn worker threads
|
||||||
|
pthread_t threads[strat.num_threads];
|
||||||
|
for (int i = 0; i < strat.num_threads; i++) {
|
||||||
|
pthread_create(&threads[i], NULL, worker<T>, (void *)args);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int i = 0; i < strat.num_generations; i++) {
|
||||||
|
// generate fitness jobs
|
||||||
|
// wait for fitness jobs to complete
|
||||||
|
// sort cells on performance
|
||||||
|
// generate crossover jobs
|
||||||
cur_cells = cur_cells == cells_a ? cells_b : cells_a;
|
cur_cells = cur_cells == cells_a ? cells_b : cells_a;
|
||||||
next_cells = cur_cells == cells_a ? cells_b : cells_a;
|
next_cells = cur_cells == cells_a ? cells_b : cells_a;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// stop worker threads
|
||||||
|
stop_flag = true;
|
||||||
|
pthread_cond_broadcast(queue.jobs_available_cond);
|
||||||
|
for (int i = 0; i < strat.num_threads; i++) {
|
||||||
|
pthread_join(threads[i], NULL);
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace genetic
|
} // namespace genetic
|
||||||
|
|||||||
Reference in New Issue
Block a user