some english notes on the purpose of batch sizes and the beginning of a worker thread implementation
This commit is contained in:
@@ -3,13 +3,17 @@
|
|||||||
namespace genetic {
|
namespace genetic {
|
||||||
|
|
||||||
template <class T> struct Strategy {
|
template <class T> struct Strategy {
|
||||||
// The recommended number of threads is <= number of cores on your pc.
|
int num_threads; // Number of worker threads that will be evaluating cell
|
||||||
// Set this to -1 use the default value (number of cores - 1)
|
// fitness.
|
||||||
int num_threads; // Number of worker threads that will be evaluating cell fitness
|
int num_retries; // Number of times worker threads will try to grab work pool
|
||||||
int num_cells; // Size of the population pool
|
// lock before sleeping
|
||||||
|
int batch_size; // Number of cells a worker thread tries to evaluate in a row
|
||||||
|
// before locking the pool again. 1 tends to be fine
|
||||||
|
int num_cells; // Size of the population pool
|
||||||
int num_generations; // Number of times (epochs) to run the algorithm
|
int num_generations; // Number of times (epochs) to run the algorithm
|
||||||
bool test_all; // Sets whether or not every cell is tested every generation
|
bool test_all; // Sets whether or not every cell is tested every generation
|
||||||
float test_chance; // Chance to test any given cell's fitness. Relevant only if test_all is false.
|
float test_chance; // Chance to test any given cell's fitness. Relevant only
|
||||||
|
// if test_all is false.
|
||||||
|
|
||||||
// User defined functions
|
// User defined functions
|
||||||
T (*make_default_cell)();
|
T (*make_default_cell)();
|
||||||
|
|||||||
@@ -1,5 +1,6 @@
|
|||||||
#include "genetic.h"
|
#include "genetic.h"
|
||||||
#include "pthread.h"
|
#include "pthread.h"
|
||||||
|
#include <algorithm>
|
||||||
#include <queue>
|
#include <queue>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
@@ -12,11 +13,103 @@ template <class T> struct CellEntry {
|
|||||||
};
|
};
|
||||||
|
|
||||||
template <class T> struct WorkEntry {
|
template <class T> struct WorkEntry {
|
||||||
const std::vector<CellEntry<T>> &cur;
|
const CellEntry<T> &cur;
|
||||||
std::vector<CellEntry<T>> &next;
|
float &score;
|
||||||
int cur_i;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
|
static pthread_mutex_t data_mutex = PTHREAD_MUTEX_INITIALIZER;
|
||||||
|
|
||||||
|
static pthread_mutex_t ready_mutex = PTHREAD_MUTEX_INITIALIZER;
|
||||||
|
static pthread_cond_t ready_cond = PTHREAD_COND_INITIALIZER;
|
||||||
|
|
||||||
|
static pthread_mutex_t gen_complete_mutex = PTHREAD_MUTEX_INITIALIZER;
|
||||||
|
static pthread_cond_t gen_complete_cond = PTHREAD_COND_INITIALIZER;
|
||||||
|
|
||||||
|
static pthread_mutex_t run_complete_mutex = PTHREAD_MUTEX_INITIALIZER;
|
||||||
|
static pthread_cond_t run_complete_cond = PTHREAD_COND_INITIALIZER;
|
||||||
|
|
||||||
|
/* Thoughts on this approach
|
||||||
|
* The ideal implementation of a worker thread has them operating at maximum
|
||||||
|
* load with as little synchronization overhead as possible. i.e. The ideal
|
||||||
|
* worker thread
|
||||||
|
* 1. Never waits for new work
|
||||||
|
* 2. Never spends time synchronizing with other worker threads
|
||||||
|
*
|
||||||
|
* Never is impossible, but we want to get as close as we can.
|
||||||
|
*
|
||||||
|
* There are two extreme situations to consider
|
||||||
|
* 1. Fitness functions with highly variable computation times
|
||||||
|
* 2. Fitness functions with identical computation times.
|
||||||
|
*
|
||||||
|
* Most applications that use this library will fall into the second
|
||||||
|
* category.
|
||||||
|
*
|
||||||
|
* In the highly-variable computation time case, it's useful for worker threads
|
||||||
|
* to operate on 1 work entry at a time. Imagine a scenario with 2 threads, each
|
||||||
|
* of which claims half the work to do. If thread A completes all of its work
|
||||||
|
* quickly, it goes to sleep while thread B slogs away on its harder-to-compute
|
||||||
|
* fitness jobs. However, if both threads only claim 1 work entry at a time,
|
||||||
|
* thread A can immediately claim new jobs after it completes its current one.
|
||||||
|
* Thread B can toil away, but little time is lost since thread A remains
|
||||||
|
* productive.
|
||||||
|
*
|
||||||
|
* In the highly consistent computation time case, it's ideal for each
|
||||||
|
* thread to claim an equal share of the jobs (as this minimizes time spent
|
||||||
|
* synchronizing access to the job pool). Give each thread its set of work once
|
||||||
|
* and let them have at it instead of each thread constantly locking/waiting
|
||||||
|
* on the job queue.
|
||||||
|
*
|
||||||
|
* I take a hybrid approach. Users can specify a "batch size". Worker threads
|
||||||
|
* will bite off jobs in chunks and complete them before locking
|
||||||
|
* the job pool again. The user to choose a batch size close to 1 if
|
||||||
|
* their fitness function compute time is highly variable, and closer to
|
||||||
|
* num_cells / num_threads if computation time is consistent. Users should
|
||||||
|
* experiment with a batch size that works well for their problem.
|
||||||
|
*
|
||||||
|
* Worth mentioning this optimization work is irrelevant once computation time
|
||||||
|
* >>> synchronization time.
|
||||||
|
*
|
||||||
|
* There might be room for dynamic batch size modification, but I don't expect
|
||||||
|
* to pursue this feature until the library is more mature (and I've run out of
|
||||||
|
* cooler things to do).
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
template <class T>
|
||||||
|
void worker(std::queue<WorkEntry<T>> &fitness_queue, int batch_size,
|
||||||
|
int num_retries) {
|
||||||
|
int retries = 0;
|
||||||
|
std::vector<WorkEntry<T>> batch;
|
||||||
|
bool gen_is_finished;
|
||||||
|
while (true) {
|
||||||
|
gen_is_finished = false;
|
||||||
|
if (pthread_mutex_trylock(&data_mutex)) {
|
||||||
|
retries = 0;
|
||||||
|
for (int i = 0; i < batch_size; i++) {
|
||||||
|
if (fitness_queue.empty()) {
|
||||||
|
gen_is_finished = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
batch.push_back(fitness_queue.front());
|
||||||
|
fitness_queue.pop();
|
||||||
|
}
|
||||||
|
pthread_mutex_unlock(&data_mutex);
|
||||||
|
} else {
|
||||||
|
retries++;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (gen_is_finished) {
|
||||||
|
pthread_cond_signal(&gen_complete_cond, &gen_complete_mutex);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (retries > num_retries) {
|
||||||
|
pthread_mutex_lock(&ready_mutex);
|
||||||
|
pthread_cond_wait(&ready_cond, &ready_mutex);
|
||||||
|
retries = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
pthread_mutex_lock(&data_mutex);
|
||||||
|
}
|
||||||
|
|
||||||
// Definitions
|
// Definitions
|
||||||
template <class T> Stats<T> run(Strategy<T> strat) {
|
template <class T> Stats<T> run(Strategy<T> strat) {
|
||||||
Stats<T> stats;
|
Stats<T> stats;
|
||||||
|
|||||||
Reference in New Issue
Block a user