Debugged multithreaded version. Now investigating some performance issues (not every thread is being used). This is an interesting version.
This commit is contained in:
@@ -2,7 +2,9 @@
|
|||||||
|
|
||||||
recent_file: path: "inc/genetic.h"
|
recent_file: path: "inc/genetic.h"
|
||||||
recent_file: path: "inc/sync.h"
|
recent_file: path: "inc/sync.h"
|
||||||
|
recent_file: path: "d:/os/obj/amd64fre/minkernel/crts/ucrt/src/appcrt/startup/mt/objfre/amd64/minkernel/crts/ucrt/src/appcrt/startup/abort.cpp"
|
||||||
recent_file: path: "src/main.cpp"
|
recent_file: path: "src/main.cpp"
|
||||||
|
recent_file: path: "d:/os/obj/amd64fre/minkernel/crts/ucrt/src/appcrt/startup/mt/objfre/amd64/minkernel/crts/ucrt/src/appcrt/startup/assert.cpp"
|
||||||
recent_file: path: "../../../../../Program Files/Microsoft Visual Studio/2022/Community/VC/Tools/MSVC/14.44.35207/include/vector"
|
recent_file: path: "../../../../../Program Files/Microsoft Visual Studio/2022/Community/VC/Tools/MSVC/14.44.35207/include/vector"
|
||||||
recent_file: path: "d:/os/obj/amd64fre/minkernel/crts/ucrt/src/appcrt/misc/mt/objfre/amd64/minkernel/crts/ucrt/src/appcrt/misc/invalid_parameter.cpp"
|
recent_file: path: "d:/os/obj/amd64fre/minkernel/crts/ucrt/src/appcrt/misc/mt/objfre/amd64/minkernel/crts/ucrt/src/appcrt/misc/invalid_parameter.cpp"
|
||||||
recent_file: path: "../../../../../Program Files/Microsoft Visual Studio/2022/Community/VC/Tools/MSVC/14.44.35207/include/xmemory"
|
recent_file: path: "../../../../../Program Files/Microsoft Visual Studio/2022/Community/VC/Tools/MSVC/14.44.35207/include/xmemory"
|
||||||
@@ -14,4 +16,10 @@ target:
|
|||||||
working_directory: bin
|
working_directory: bin
|
||||||
label: main
|
label: main
|
||||||
enabled: 1
|
enabled: 1
|
||||||
|
arguments: 1
|
||||||
|
}
|
||||||
|
breakpoint:
|
||||||
|
{
|
||||||
|
source_location: "inc/genetic.h:292:1"
|
||||||
|
hit_count: 1
|
||||||
}
|
}
|
||||||
|
|||||||
176
inc/genetic.h
176
inc/genetic.h
@@ -1,6 +1,7 @@
|
|||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
|
#include <cfloat>
|
||||||
#include <cstdlib>
|
#include <cstdlib>
|
||||||
|
|
||||||
#include "util.h"
|
#include "util.h"
|
||||||
@@ -30,6 +31,12 @@ template <class T> struct Strategy {
|
|||||||
// Number of times (epochs) to run the algorithm
|
// Number of times (epochs) to run the algorithm
|
||||||
int num_generations;
|
int num_generations;
|
||||||
|
|
||||||
|
// Each thread will integrate the best globally performing cell
|
||||||
|
bool share_breakthroughs;
|
||||||
|
|
||||||
|
// How many generations to explore before resyncing with the global best
|
||||||
|
int share_breakthrough_gen_period;
|
||||||
|
|
||||||
bool test_all; // Sets whether or not every cell's fitness is evaluated every
|
bool test_all; // Sets whether or not every cell's fitness is evaluated every
|
||||||
// generation
|
// generation
|
||||||
float test_chance; // Chance to test any given cell's fitness. Relevant only
|
float test_chance; // Chance to test any given cell's fitness. Relevant only
|
||||||
@@ -66,16 +73,11 @@ template<class T> struct Stats {
|
|||||||
DynArray<float> best_cell_fitness;
|
DynArray<float> best_cell_fitness;
|
||||||
int gen;
|
int gen;
|
||||||
bool done;
|
bool done;
|
||||||
TimeSpan start, end;
|
DynArray<TimeSpan> gen_time;
|
||||||
TimeSpan total_crossover_time;
|
DynArray<TimeSpan> crossover_time;
|
||||||
int total_crossovers;
|
DynArray<TimeSpan> mutate_time;
|
||||||
TimeSpan total_mutate_time;
|
DynArray<TimeSpan> fitness_time;
|
||||||
int total_mutates;
|
DynArray<TimeSpan> sorting_time;
|
||||||
TimeSpan total_fitness_time;
|
|
||||||
int total_evaluations;
|
|
||||||
TimeSpan total_sorting_time;
|
|
||||||
int total_sorts;
|
|
||||||
|
|
||||||
Mutex m;
|
Mutex m;
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -90,6 +92,10 @@ struct WorkerThreadArgs {
|
|||||||
Array<T> cells;
|
Array<T> cells;
|
||||||
Array<CellTracker> trackers;
|
Array<CellTracker> trackers;
|
||||||
Stats<T> *stats;
|
Stats<T> *stats;
|
||||||
|
|
||||||
|
Mutex m;
|
||||||
|
float *best_global_score;
|
||||||
|
T* best_global_cell;
|
||||||
};
|
};
|
||||||
|
|
||||||
template<class T> T* _cellp(Array<T> cells, CellTracker tracker) { return &cells[tracker.cellid]; }
|
template<class T> T* _cellp(Array<T> cells, CellTracker tracker) { return &cells[tracker.cellid]; }
|
||||||
@@ -101,6 +107,9 @@ template <class T> DWORD worker(LPVOID args) {
|
|||||||
Array<T> cells = worker_args->cells;
|
Array<T> cells = worker_args->cells;
|
||||||
Array<CellTracker> trackers = worker_args->trackers;
|
Array<CellTracker> trackers = worker_args->trackers;
|
||||||
Stats<T> &stats = *worker_args->stats;
|
Stats<T> &stats = *worker_args->stats;
|
||||||
|
float* best_global_score = worker_args->best_global_score;
|
||||||
|
T* best_global_cell = worker_args->best_global_cell;
|
||||||
|
Mutex best_m = worker_args->m;
|
||||||
|
|
||||||
// Prepare crossover operations as these will be the same every time except
|
// Prepare crossover operations as these will be the same every time except
|
||||||
// for the exact cell pointers
|
// for the exact cell pointers
|
||||||
@@ -109,9 +118,29 @@ template <class T> DWORD worker(LPVOID args) {
|
|||||||
Array<T*> parents = make_array<T*>(npar);
|
Array<T*> parents = make_array<T*>(npar);
|
||||||
Array<T*> children = make_array<T*>(nchild);
|
Array<T*> children = make_array<T*>(nchild);
|
||||||
|
|
||||||
TimeSpan start_algo = now();
|
bool gt = strat.higher_fitness_is_better; // Writing strat.higher... is annoying
|
||||||
TimeSpan start;
|
|
||||||
|
// printf("Core: %d\n", get_affinity());
|
||||||
|
|
||||||
|
TimeSpan start, diff, gen_start;
|
||||||
while(stats.gen < strat.num_generations) {
|
while(stats.gen < strat.num_generations) {
|
||||||
|
gen_start = now();
|
||||||
|
|
||||||
|
// 0. Share/Integrate global breakthrough
|
||||||
|
if (strat.share_breakthroughs && (stats.gen + get_affinity()) % strat.share_breakthrough_gen_period) {
|
||||||
|
lock(best_m);
|
||||||
|
if (better(gt, front(trackers).score, *best_global_score) != *best_global_score) {
|
||||||
|
// Share
|
||||||
|
*best_global_cell = *_cellp(cells, trackers[0]);
|
||||||
|
*best_global_score = trackers[0].score;
|
||||||
|
|
||||||
|
} else {
|
||||||
|
// Integrate
|
||||||
|
*_cellp(cells, trackers[0]) = *best_global_cell;
|
||||||
|
trackers[0].score = *best_global_score;
|
||||||
|
}
|
||||||
|
unlock(best_m);
|
||||||
|
}
|
||||||
|
|
||||||
// 1. crossover
|
// 1. crossover
|
||||||
start = now();
|
start = now();
|
||||||
@@ -119,14 +148,19 @@ template <class T> DWORD worker(LPVOID args) {
|
|||||||
int parent_end = npar;
|
int parent_end = npar;
|
||||||
int child_begin = trackers.len-nchild;
|
int child_begin = trackers.len-nchild;
|
||||||
while (parent_end <= child_begin) {
|
while (parent_end <= child_begin) {
|
||||||
|
|
||||||
// Get pointers to all the parent cells
|
// Get pointers to all the parent cells
|
||||||
for (int i = parent_end-npar; i < parent_end; i++) {
|
for (int i = parent_end-npar; i < parent_end; i++) {
|
||||||
parents[i - (parent_end-npar)] = _cellp(cells, trackers[i]);
|
T* cell = _cellp(cells, trackers[i]);
|
||||||
|
assert(cell != NULL);
|
||||||
|
parents[i - (parent_end-npar)] = cell;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Get pointers to all the child cells (these will be overwritten)
|
// Get pointers to all the child cells (these will be overwritten)
|
||||||
for (int i = child_begin; i < child_begin+nchild; i++) {
|
for (int i = child_begin; i < child_begin+nchild; i++) {
|
||||||
children[i-child_begin] = _cellp(cells, trackers[i]);
|
T* cell = _cellp(cells, trackers[i]);
|
||||||
|
assert(cell != NULL);
|
||||||
|
children[i-child_begin] = cell;
|
||||||
}
|
}
|
||||||
strat.crossover(parents, children);
|
strat.crossover(parents, children);
|
||||||
parent_end += strat.crossover_parent_stride;
|
parent_end += strat.crossover_parent_stride;
|
||||||
@@ -134,8 +168,7 @@ template <class T> DWORD worker(LPVOID args) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
lock(stats.m);
|
lock(stats.m);
|
||||||
stats.total_crossover_time = stats.total_crossover_time + (now() - start);
|
append(stats.crossover_time, now() - start);
|
||||||
stats.total_crossovers++;
|
|
||||||
unlock(stats.m);
|
unlock(stats.m);
|
||||||
|
|
||||||
|
|
||||||
@@ -147,8 +180,7 @@ template <class T> DWORD worker(LPVOID args) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
lock(stats.m);
|
lock(stats.m);
|
||||||
stats.total_mutate_time = stats.total_mutate_time + (now() - start);
|
append(stats.mutate_time, now() - start);
|
||||||
stats.total_mutates++;
|
|
||||||
unlock(stats.m);
|
unlock(stats.m);
|
||||||
|
|
||||||
// 3. evaluate
|
// 3. evaluate
|
||||||
@@ -165,67 +197,63 @@ template <class T> DWORD worker(LPVOID args) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
lock(stats.m);
|
lock(stats.m);
|
||||||
stats.total_fitness_time = stats.total_fitness_time + (now() - start);
|
append(stats.fitness_time, now() - start);
|
||||||
stats.total_evaluations++;
|
|
||||||
unlock(stats.m);
|
unlock(stats.m);
|
||||||
|
|
||||||
// 4. sort
|
// 4. sort
|
||||||
start = now();
|
start = now();
|
||||||
std::sort(&trackers[0], &trackers[trackers.len-1], [strat](CellTracker &a, CellTracker &b){ return strat.higher_fitness_is_better ? a.score > b.score : a.score < b.score; });
|
std::sort(&trackers[0], &trackers[trackers.len-1], [strat](CellTracker &a, CellTracker &b){ return better(strat.higher_fitness_is_better, a.score, b.score) == a.score; });
|
||||||
lock(stats.m);
|
lock(stats.m);
|
||||||
stats.total_sorting_time = stats.total_sorting_time + (now() - start);
|
append(stats.sorting_time, now() - start);
|
||||||
stats.total_sorts++;
|
|
||||||
|
|
||||||
append(stats.best_cells, cells[trackers[0].cellid]);
|
append(stats.best_cells, cells[trackers[0].cellid]);
|
||||||
append(stats.best_cell_fitness, trackers[0].score);
|
append(stats.best_cell_fitness, trackers[0].score);
|
||||||
|
append(stats.gen_time, now() - gen_start);
|
||||||
stats.gen++;
|
stats.gen++;
|
||||||
unlock(stats.m);
|
unlock(stats.m);
|
||||||
}
|
}
|
||||||
stats.done = true;
|
stats.done = true;
|
||||||
stats.end = now();
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class T> T run(Strategy<T> strat) {
|
template <class T> T run(Strategy<T> strat) {
|
||||||
Array<Stats<T>> stats = make_array<Stats<T>>(strat.num_threads);
|
Array<Stats<T>> stats = make_array<Stats<T>>(strat.num_threads);
|
||||||
Array<Thread> threads = make_array<Thread>(strat.num_threads);
|
Array<Thread> threads = make_array<Thread>(strat.num_threads);
|
||||||
Array<T> cells = make_array<T>(strat.num_threads*strat.num_cells_per_thread);
|
|
||||||
Array<CellTracker> trackers = make_array<CellTracker>(cells.len);
|
|
||||||
|
|
||||||
Array<WorkerThreadArgs<T>> args = make_array<WorkerThreadArgs<T>>(strat.num_threads);
|
Array<WorkerThreadArgs<T>> args = make_array<WorkerThreadArgs<T>>(strat.num_threads);
|
||||||
|
|
||||||
for (int i = 0; i < cells.len; i++) {
|
float best_global_score = strat.higher_fitness_is_better ? FLT_MIN : FLT_MAX;
|
||||||
cells[i] = strat.make_default_cell();
|
T best_global_cell;
|
||||||
trackers[i] = {0, i};
|
|
||||||
}
|
allow_all_processors();
|
||||||
|
set_affinity(0);
|
||||||
|
|
||||||
for (int i = 0; i < strat.num_threads; i++) {
|
for (int i = 0; i < strat.num_threads; i++) {
|
||||||
stats[i] = {
|
stats[i] = {
|
||||||
.best_cells=make_dynarray<T>(strat.num_generations),
|
.best_cells=make_dynarray<T>(strat.num_generations),
|
||||||
.best_cell_fitness=make_dynarray<float>(strat.num_generations),
|
.best_cell_fitness=make_dynarray<float>(strat.num_generations),
|
||||||
.gen=0,
|
.gen_time=make_dynarray<TimeSpan>(strat.num_generations),
|
||||||
.done=false,
|
.crossover_time=make_dynarray<TimeSpan>(strat.num_generations),
|
||||||
.start=from_s(0),
|
.mutate_time=make_dynarray<TimeSpan>(strat.num_generations),
|
||||||
.end=from_s(0),
|
.fitness_time=make_dynarray<TimeSpan>(strat.num_generations),
|
||||||
.total_crossover_time=from_s(0),
|
.sorting_time=make_dynarray<TimeSpan>(strat.num_generations),
|
||||||
.total_crossovers=0,
|
|
||||||
.total_mutate_time=from_s(0),
|
|
||||||
.total_mutates=0,
|
|
||||||
.total_fitness_time=from_s(0),
|
|
||||||
.total_evaluations=0,
|
|
||||||
.total_sorting_time=from_s(0),
|
|
||||||
.total_sorts=0,
|
|
||||||
.m=make_mutex()
|
.m=make_mutex()
|
||||||
};
|
};
|
||||||
Array<T> tcells = { &cells[i*strat.num_cells_per_thread], strat.num_cells_per_thread };
|
Array<T> cells = make_array<T>(strat.num_threads*strat.num_cells_per_thread);
|
||||||
Array<CellTracker> ttrackers = { &trackers[i*strat.num_cells_per_thread], strat.num_cells_per_thread };
|
Array<CellTracker> trackers = make_array<CellTracker>(strat.num_cells_per_thread);
|
||||||
|
for (int i = 0; i < strat.num_cells_per_thread; i++) {
|
||||||
|
cells[i] = strat.make_default_cell();
|
||||||
|
trackers[i] = {0, i};
|
||||||
|
}
|
||||||
|
|
||||||
args[i].strat=strat;
|
args[i].strat=strat;
|
||||||
args[i].cells=tcells;
|
args[i].cells=cells;
|
||||||
args[i].trackers=ttrackers;
|
args[i].trackers=trackers;
|
||||||
args[i].stats=&stats[i];
|
args[i].stats=&stats[i];
|
||||||
|
args[i].best_global_score=&best_global_score;
|
||||||
|
args[i].best_global_cell=&best_global_cell;
|
||||||
|
args[i].m = make_mutex();
|
||||||
|
|
||||||
threads[i] = make_thread(worker<T>, &args[i]);
|
threads[i] = make_thread(worker<T>, &args[i], i+1);
|
||||||
}
|
}
|
||||||
|
|
||||||
// We are the stats thread
|
// We are the stats thread
|
||||||
@@ -234,12 +262,14 @@ template <class T> T run(Strategy<T> strat) {
|
|||||||
sleep(from_s(strat.stats_print_period_s));
|
sleep(from_s(strat.stats_print_period_s));
|
||||||
|
|
||||||
printf("**********************\n");
|
printf("**********************\n");
|
||||||
|
float g_avg_gen_time = 0;
|
||||||
float g_avg_crossover_time = 0;
|
float g_avg_crossover_time = 0;
|
||||||
float g_avg_mutate_time = 0;
|
float g_avg_mutate_time = 0;
|
||||||
float g_avg_fitness_time = 0;
|
float g_avg_fitness_time = 0;
|
||||||
float g_avg_sorting_time = 0;
|
float g_avg_sorting_time = 0;
|
||||||
|
float g_avg_overhead_time = 0;
|
||||||
float g_progress_per = 0;
|
float g_progress_per = 0;
|
||||||
float g_best_fitness = strat.higher_fitness_is_better ? 0.0 : 999999999999999999.9;
|
float g_best_fitness = strat.higher_fitness_is_better ? FLT_MIN : FLT_MAX;
|
||||||
|
|
||||||
complete = true;
|
complete = true;
|
||||||
|
|
||||||
@@ -247,43 +277,57 @@ template <class T> T run(Strategy<T> strat) {
|
|||||||
lock(stats[i].m);
|
lock(stats[i].m);
|
||||||
complete &= stats[i].done;
|
complete &= stats[i].done;
|
||||||
|
|
||||||
float avg_crossover_time = to_s(stats[i].total_crossover_time) / static_cast<float>(stats[i].total_crossovers);
|
int end = stats[i].gen_time.end-1;
|
||||||
|
|
||||||
float avg_mutate_time = to_s(stats[i].total_mutate_time) / static_cast<float>(stats[i].total_mutates);
|
|
||||||
|
|
||||||
float avg_fitness_time = to_s(stats[i].total_fitness_time) / static_cast<float>(stats[i].total_evaluations);
|
|
||||||
|
|
||||||
float avg_sorting_time = to_s(stats[i].total_sorting_time) / static_cast<float>(stats[i].total_sorts);
|
|
||||||
|
|
||||||
|
float gen_time = to_s(stats[i].gen_time[end]);
|
||||||
|
float crossover_time = to_s(stats[i].crossover_time[end]);
|
||||||
|
float mutate_time = to_s(stats[i].mutate_time[end]);
|
||||||
|
float fitness_time = to_s(stats[i].fitness_time[end]);
|
||||||
|
float sorting_time = to_s(stats[i].sorting_time[end]);
|
||||||
float progress_per = static_cast<float>(stats[i].gen) / static_cast<float>(strat.num_generations) * 100;
|
float progress_per = static_cast<float>(stats[i].gen) / static_cast<float>(strat.num_generations) * 100;
|
||||||
|
|
||||||
float best_score = back(stats[i].best_cell_fitness);
|
float best_score = back(stats[i].best_cell_fitness);
|
||||||
|
|
||||||
g_avg_crossover_time += avg_crossover_time;
|
float overhead = max(0, gen_time - (crossover_time + mutate_time + fitness_time + sorting_time));
|
||||||
g_avg_mutate_time += avg_mutate_time;
|
|
||||||
g_avg_fitness_time += avg_fitness_time;
|
|
||||||
g_avg_sorting_time += avg_sorting_time;
|
|
||||||
g_progress_per += progress_per;
|
|
||||||
g_best_fitness = strat.higher_fitness_is_better ? max(best_score, g_best_fitness) : min(best_score, g_best_fitness);
|
|
||||||
|
|
||||||
printf("THREAD %d, Progress %.1f\%, Top Score %.5e, Cross %.5f (s), Mutate: %.5f (s), Fitness: %.5f (s), Sorting: %.5f (s)\n", i, progress_per, best_score, avg_crossover_time, avg_mutate_time, avg_fitness_time, avg_sorting_time);
|
float overhead_per = overhead / gen_time * 100;
|
||||||
|
|
||||||
|
g_avg_gen_time += gen_time;
|
||||||
|
g_avg_crossover_time += crossover_time;
|
||||||
|
g_avg_mutate_time += mutate_time;
|
||||||
|
g_avg_fitness_time += fitness_time;
|
||||||
|
g_avg_sorting_time += sorting_time;
|
||||||
|
g_progress_per += progress_per;
|
||||||
|
g_best_fitness = better(strat.higher_fitness_is_better, best_score, g_best_fitness);
|
||||||
|
|
||||||
|
g_avg_overhead_time += overhead;
|
||||||
|
|
||||||
|
printf("%d, Progress %d/%d, Top: %.5e, Overhead Per: %.4f%%, Gen: %.4f, Overhead: %.4f, Cross: %.4f (s), Mutate: %.4f (s), Fitness: %.4f (s), Sorting: %.4f (s)\n", i, stats[i].gen, strat.num_generations, best_score, overhead_per, gen_time, overhead, crossover_time, mutate_time, fitness_time, sorting_time);
|
||||||
unlock(stats[i].m);
|
unlock(stats[i].m);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
g_avg_gen_time /= stats.len;
|
||||||
g_avg_crossover_time /= stats.len;
|
g_avg_crossover_time /= stats.len;
|
||||||
g_avg_mutate_time /= stats.len;
|
g_avg_mutate_time /= stats.len;
|
||||||
g_avg_fitness_time /= stats.len;
|
g_avg_fitness_time /= stats.len;
|
||||||
g_avg_sorting_time /= stats.len;
|
g_avg_sorting_time /= stats.len;
|
||||||
g_progress_per /= stats.len;
|
g_progress_per /= stats.len;
|
||||||
|
|
||||||
printf("OVERALL, Progress %.1f\%, Top Score: %.5e, Cross %.5f (s), Mutate: %.5f (s), Fitness: %.5f (s), Sorting: %.5f (s)\n", g_progress_per, g_best_fitness, g_avg_crossover_time, g_avg_mutate_time, g_avg_fitness_time, g_avg_sorting_time);
|
g_avg_overhead_time /= stats.len;
|
||||||
|
|
||||||
|
float g_avg_overhead_per = g_avg_overhead_time / g_avg_gen_time * 100;
|
||||||
|
|
||||||
|
printf("GLOBAL, Progress %.1f%%, Top: %.5e, Overhead Per: %.4f%%, Gen: %.4f, Overhead: %.4f, Cross: %.4f (s), Mutate: %.4f (s), Fitness: %.4f (s), Sorting: %.4f (s)\n", g_progress_per, g_best_fitness, g_avg_overhead_per, g_avg_gen_time, g_avg_overhead_time, g_avg_crossover_time, g_avg_mutate_time, g_avg_fitness_time, g_avg_sorting_time);
|
||||||
|
|
||||||
if (complete) break;
|
if (complete) break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
for (int i = 0; i < threads.len; i++) {
|
||||||
|
join(threads[i]);
|
||||||
|
}
|
||||||
|
|
||||||
T best_cell;
|
T best_cell;
|
||||||
// TODO: bad
|
// TODO: bad
|
||||||
float best_score = strat.higher_fitness_is_better ? 0.0 : 999999999999999999.9;
|
float best_score = strat.higher_fitness_is_better ? FLT_MIN : FLT_MAX;
|
||||||
for (int i = 0; i < stats.len; i++) {
|
for (int i = 0; i < stats.len; i++) {
|
||||||
float score = back(stats[i].best_cell_fitness);
|
float score = back(stats[i].best_cell_fitness);
|
||||||
if (strat.higher_fitness_is_better ? score > best_score : score < best_score) {
|
if (strat.higher_fitness_is_better ? score > best_score : score < best_score) {
|
||||||
|
|||||||
93
inc/sync.h
93
inc/sync.h
@@ -1,5 +1,9 @@
|
|||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
|
#include <cassert>
|
||||||
|
#include <cstdint>
|
||||||
|
#include <cstdio>
|
||||||
|
|
||||||
#ifdef _WIN32
|
#ifdef _WIN32
|
||||||
#include <windows.h>
|
#include <windows.h>
|
||||||
#endif
|
#endif
|
||||||
@@ -17,6 +21,14 @@ typedef LPVOID ThreadArg;
|
|||||||
|
|
||||||
const TimeSpan infinite_ts = { .QuadPart = LLONG_MAX };
|
const TimeSpan infinite_ts = { .QuadPart = LLONG_MAX };
|
||||||
|
|
||||||
|
int get_num_cores() {
|
||||||
|
SYSTEM_INFO sysinfo;
|
||||||
|
GetSystemInfo(&sysinfo);
|
||||||
|
return sysinfo.dwNumberOfProcessors;
|
||||||
|
}
|
||||||
|
|
||||||
|
const int num_cores = get_num_cores();
|
||||||
|
|
||||||
LARGE_INTEGER _init_freq() {
|
LARGE_INTEGER _init_freq() {
|
||||||
LARGE_INTEGER freq;
|
LARGE_INTEGER freq;
|
||||||
QueryPerformanceFrequency(&freq);
|
QueryPerformanceFrequency(&freq);
|
||||||
@@ -27,8 +39,13 @@ static LARGE_INTEGER freq = _init_freq();
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
Thread make_thread(ThreadFunc t, ThreadArg a);
|
Thread make_thread(ThreadFunc t, ThreadArg a);
|
||||||
|
Thread make_thread(ThreadFunc t, ThreadArg a, int core_affinity);
|
||||||
void join(Thread t);
|
void join(Thread t);
|
||||||
void sleep(TimeSpan ts);
|
void sleep(TimeSpan ts);
|
||||||
|
void allow_all_processors();
|
||||||
|
void set_affinity(Thread &t, int core);
|
||||||
|
void set_affinity(int core);
|
||||||
|
int get_affinity();
|
||||||
|
|
||||||
Mutex make_mutex();
|
Mutex make_mutex();
|
||||||
void lock(Mutex &m);
|
void lock(Mutex &m);
|
||||||
@@ -64,11 +81,60 @@ double to_hours(TimeSpan &ts);
|
|||||||
|
|
||||||
#ifdef _WIN32
|
#ifdef _WIN32
|
||||||
|
|
||||||
|
uint64_t bitmask (unsigned short n) {
|
||||||
|
if (n == 64) return -((uint64_t)1);
|
||||||
|
return (((uint64_t) 1) << n) - 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
const int tab64[64] = {
|
||||||
|
63, 0, 58, 1, 59, 47, 53, 2,
|
||||||
|
60, 39, 48, 27, 54, 33, 42, 3,
|
||||||
|
61, 51, 37, 40, 49, 18, 28, 20,
|
||||||
|
55, 30, 34, 11, 43, 14, 22, 4,
|
||||||
|
62, 57, 46, 52, 38, 26, 32, 41,
|
||||||
|
50, 36, 17, 19, 29, 10, 13, 21,
|
||||||
|
56, 45, 25, 31, 35, 16, 9, 12,
|
||||||
|
44, 24, 15, 8, 23, 7, 6, 5};
|
||||||
|
|
||||||
|
int log2_64 (uint64_t value)
|
||||||
|
{
|
||||||
|
value |= value >> 1;
|
||||||
|
value |= value >> 2;
|
||||||
|
value |= value >> 4;
|
||||||
|
value |= value >> 8;
|
||||||
|
value |= value >> 16;
|
||||||
|
value |= value >> 32;
|
||||||
|
return tab64[((uint64_t)((value - (value >> 1))*0x07EDD5E59A4E28C2)) >> 58];
|
||||||
|
}
|
||||||
|
|
||||||
Thread make_thread(ThreadFunc f, ThreadArg a) {
|
Thread make_thread(ThreadFunc f, ThreadArg a) {
|
||||||
DWORD tid;
|
DWORD tid;
|
||||||
return CreateThread(NULL, 0, f, a, 0, &tid);
|
return CreateThread(NULL, 0, f, a, 0, &tid);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
struct DummyThreadArgs {
|
||||||
|
int core_affinity;
|
||||||
|
ThreadFunc f;
|
||||||
|
ThreadArg a;
|
||||||
|
};
|
||||||
|
|
||||||
|
DWORD _dummy_thread(LPVOID a) {
|
||||||
|
DummyThreadArgs *wrap = static_cast<DummyThreadArgs*>(a);
|
||||||
|
set_affinity(wrap->core_affinity);
|
||||||
|
return wrap->f(wrap->a);
|
||||||
|
}
|
||||||
|
|
||||||
|
Thread make_thread(ThreadFunc f, ThreadArg a, int core_affinity) {
|
||||||
|
DWORD tid;
|
||||||
|
DummyThreadArgs *args = (DummyThreadArgs*)malloc(sizeof(DummyThreadArgs));
|
||||||
|
*args = {
|
||||||
|
.core_affinity=core_affinity,
|
||||||
|
.f=f,
|
||||||
|
.a=a
|
||||||
|
};
|
||||||
|
return CreateThread(NULL, 0, _dummy_thread, args, 0, &tid);
|
||||||
|
}
|
||||||
|
|
||||||
void join(Thread t) {
|
void join(Thread t) {
|
||||||
WaitForSingleObject(t, INFINITE);
|
WaitForSingleObject(t, INFINITE);
|
||||||
}
|
}
|
||||||
@@ -77,6 +143,33 @@ void sleep(TimeSpan ts) {
|
|||||||
Sleep(static_cast<DWORD>(to_ms(ts)));
|
Sleep(static_cast<DWORD>(to_ms(ts)));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void allow_all_processors() {
|
||||||
|
Thread t = GetCurrentThread();
|
||||||
|
DWORD affinity = bitmask(num_cores);
|
||||||
|
SetProcessAffinityMask(t, affinity);
|
||||||
|
}
|
||||||
|
|
||||||
|
void set_affinity(Thread &t, int core) {
|
||||||
|
DWORD mask = 1 << (core % num_cores);
|
||||||
|
DWORD old = SetThreadAffinityMask(t, mask);
|
||||||
|
DWORD confirm = SetThreadAffinityMask(t, mask);
|
||||||
|
assert(old && GetLastError() != ERROR_INVALID_PARAMETER && mask == confirm);
|
||||||
|
}
|
||||||
|
|
||||||
|
void set_affinity(int core) {
|
||||||
|
Thread cur = GetCurrentThread();
|
||||||
|
set_affinity(cur, core);
|
||||||
|
}
|
||||||
|
|
||||||
|
int get_affinity() {
|
||||||
|
Thread t = GetCurrentThread();
|
||||||
|
DWORD mask = 1;
|
||||||
|
DWORD affinity = SetThreadAffinityMask(t, (DWORD_PTR)mask);
|
||||||
|
DWORD check = SetThreadAffinityMask(t, (DWORD_PTR)affinity);
|
||||||
|
assert(check == mask);
|
||||||
|
return log2_64(affinity);
|
||||||
|
}
|
||||||
|
|
||||||
Mutex make_mutex() {
|
Mutex make_mutex() {
|
||||||
Mutex m;
|
Mutex m;
|
||||||
InitializeCriticalSection(&m);
|
InitializeCriticalSection(&m);
|
||||||
|
|||||||
@@ -3,6 +3,7 @@
|
|||||||
#include <cstring>
|
#include <cstring>
|
||||||
#define min(A, B) ((A < B) ? (A) : (B))
|
#define min(A, B) ((A < B) ? (A) : (B))
|
||||||
#define max(A, B) ((A > B) ? (A) : (B))
|
#define max(A, B) ((A > B) ? (A) : (B))
|
||||||
|
#define better(GT, A, B) (GT ? max((A), (B)) : min((A), (B)))
|
||||||
|
|
||||||
template <class T> struct Array {
|
template <class T> struct Array {
|
||||||
T *data;
|
T *data;
|
||||||
@@ -18,6 +19,8 @@ template <class T> Array<T> make_array(int len) {
|
|||||||
.len=len
|
.len=len
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
template <class T> T back(Array<T> &a) { return a.data[a.len-1]; }
|
||||||
|
template <class T> T front(Array<T> &a) { return a.data[0]; }
|
||||||
|
|
||||||
template <class T> struct DynArray {
|
template <class T> struct DynArray {
|
||||||
T* _data;
|
T* _data;
|
||||||
@@ -48,6 +51,6 @@ template <class T> void append(DynArray<T> &a, T el) {
|
|||||||
a[a.end++] = el;
|
a[a.end++] = el;
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class T> T& back(DynArray<T> &a) { return a._data[a.end-1]; }
|
template <class T> T back(DynArray<T> &a) { return a._data[a.end-1]; }
|
||||||
template <class T> T& front(DynArray<T> &a) { return a._data[0]; }
|
template <class T> T front(DynArray<T> &a) { return a._data[0]; }
|
||||||
|
|
||||||
|
|||||||
11
src/main.cpp
11
src/main.cpp
@@ -41,9 +41,6 @@ void crossover(const Array<Array<float>*> parents, const Array<Array<float> *> o
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// norm_rand can go negative. fix in genetic.cpp
|
|
||||||
// child stride doesn't make sense. Should always skip over child num
|
|
||||||
|
|
||||||
float fitness(const Array<float> &cell) {
|
float fitness(const Array<float> &cell) {
|
||||||
float sum = 0;
|
float sum = 0;
|
||||||
float product = 1;
|
float product = 1;
|
||||||
@@ -55,12 +52,14 @@ float fitness(const Array<float> &cell) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
int main(int argc, char **argv) {
|
int main(int argc, char **argv) {
|
||||||
int num_gens = 1000;
|
int num_gens = 10000;
|
||||||
Strategy<Array<float>> strat {
|
Strategy<Array<float>> strat {
|
||||||
.num_threads = 1,
|
.num_threads = atoi(argv[1]),
|
||||||
.stats_print_period_s = 2,
|
.stats_print_period_s = 2,
|
||||||
.num_cells_per_thread = 10000,
|
.num_cells_per_thread = 100000,
|
||||||
.num_generations = num_gens,
|
.num_generations = num_gens,
|
||||||
|
.share_breakthroughs=true,
|
||||||
|
.share_breakthrough_gen_period=10,
|
||||||
.test_all = true,
|
.test_all = true,
|
||||||
.test_chance = 0.0, // doesn't matter
|
.test_chance = 0.0, // doesn't matter
|
||||||
.enable_crossover = true,
|
.enable_crossover = true,
|
||||||
|
|||||||
Reference in New Issue
Block a user