diff --git a/CMakeLists.txt b/CMakeLists.txt index 45a93a9..76d84a9 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -168,6 +168,7 @@ endif() if(pybind11_FOUND) # Build the 'ga' Python extension module pybind11_add_module(ga_python_module python/ga_bindings.cpp) + target_sources(ga_python_module PRIVATE benchmark/ga_benchmark.cc) target_link_libraries(ga_python_module PRIVATE genetic_algorithm) target_include_directories(ga_python_module PRIVATE ${CMAKE_SOURCE_DIR}/include) set_target_properties(ga_python_module PROPERTIES diff --git a/USER_GUIDE.md b/USER_GUIDE.md index c17d088..478f9f7 100644 --- a/USER_GUIDE.md +++ b/USER_GUIDE.md @@ -18,7 +18,7 @@ For features not yet exposed in the Python bindings, an explicit note is include | 3 | [Chromosome Representations](#3-chromosome-representations) | ✅ | ✅ (all genome types) | | 4 | [Crossover Operators](#4-crossover-operators) | ✅ | ⚠️ 2 factory operators exposed | | 5 | [Mutation Operators](#5-mutation-operators) | ✅ | ⚠️ 2 factory operators exposed | -| 6 | [Selection Operators](#6-selection-operators) | ✅ | ❌ not exposed | +| 6 | [Selection Operators](#6-selection-operators) | ✅ | ⚠️ helper functions exposed | | 7 | [Core GA Run and Results](#7-core-ga-run-and-results) | ✅ | ✅ | | 8 | [High-Level Optimizer API](#8-high-level-optimizer-api) | ✅ | ✅ | | 9 | [Multi-Objective: NSGA-II](#9-multi-objective-nsga-ii) | ✅ | ✅ (objective-space utils) | @@ -31,13 +31,13 @@ For features not yet exposed in the Python bindings, an explicit note is include | 16 | [Adaptive Operators](#16-adaptive-operators) | ✅ | ✅ | | 17 | [Hybrid Optimization](#17-hybrid-optimization) | ✅ | ✅ | | 18 | [Constraint Handling](#18-constraint-handling) | ✅ | ✅ | -| 19 | [Parallel and Distributed Evaluation](#19-parallel-and-distributed-evaluation) | ✅ | ❌ not exposed | +| 19 | [Parallel and Distributed Evaluation](#19-parallel-and-distributed-evaluation) | ✅ | ✅ (`ParallelEvaluator`, `LocalDistributedExecutor`, `Optimizer.with_threads`) | | 20 | [Co-Evolution](#20-co-evolution) | ✅ | ✅ | | 21 | [Checkpointing](#21-checkpointing) | ✅ | ✅ | | 22 | [Experiment Tracking](#22-experiment-tracking) | ✅ | ✅ | | 23 | [Visualization and CSV Export](#23-visualization-and-csv-export) | ✅ | ✅ | | 24 | [Plugin Architecture](#24-plugin-architecture) | ✅ | ❌ not exposed | -| 25 | [Benchmark Suite](#25-benchmark-suite) | ✅ | ❌ not exposed | +| 25 | [Benchmark Suite](#25-benchmark-suite) | ✅ | ✅ (`BenchmarkConfig`, `GABenchmark`) | | 26 | [C API](#26-c-api) | ✅ | N/A (C only) | | 27 | [Reproducibility Controls](#27-reproducibility-controls) | ✅ | ✅ | @@ -545,10 +545,36 @@ auto& ranked = rs.select(population); ### 6.2 Python -> **Not available in Python bindings yet.** -> Selection operators are not individually exposed to Python. -> The `ga.GeneticAlgorithm` uses an internal tournament-style selection -> that cannot be swapped from Python currently. +Selection strategy classes are still C++-only, but Python now exposes helper +functions that run the same selection logic over a fitness list and return the +selected indices: + +- `ga.selection_tournament_indices(fitness, tournament_size=3)` (returns one index) +- `ga.selection_roulette_indices(fitness, count)` +- `ga.selection_rank_indices(fitness, count)` +- `ga.selection_sus_indices(fitness, count)` *(stochastic universal sampling)* +- `ga.selection_elitism_indices(fitness, elite_count)` + +```python +import ga + +fitness = [0.1, 0.8, 0.4, 1.2, 0.6] + +tournament_winner = ga.selection_tournament_indices(fitness, tournament_size=3) +roulette_picks = ga.selection_roulette_indices(fitness, count=3) +rank_picks = ga.selection_rank_indices(fitness, count=3) +sus_picks = ga.selection_sus_indices(fitness, count=3) +elite_picks = ga.selection_elitism_indices(fitness, elite_count=2) + +print("Tournament winner index:", tournament_winner) +print("Roulette indices:", roulette_picks) +print("Rank indices:", rank_picks) +print("SUS indices:", sus_picks) +print("Elite indices:", elite_picks) # tends to include the best-fitness entries +``` + +> `ga.GeneticAlgorithm` still uses its internal selection pipeline. These helpers +> are for analysis/custom Python loops where you need direct index selection. --- @@ -1513,11 +1539,40 @@ int main() { ### Python -> **Not available in Python bindings yet.** -> Parallel and distributed evaluators are implemented in -> `include/ga/evaluation/` (C++ only). -> As a workaround, Python's `concurrent.futures` can parallelize fitness calls -> externally and pass results to a Python-level custom fitness function. +Python exposes thread-parallel evaluators directly: + +- `ga.ParallelEvaluator(fitness, threads=...)` +- `ga.LocalDistributedExecutor(evaluator, workers=...)` +- plus optimizer-level threading via `ga.Optimizer.with_threads(...)` + +```python +import ga + +def sphere(x): + return 1000.0 / (1.0 + sum(xi * xi for xi in x)) + +batch = [[0.1, 0.2], [0.3, 0.4], [0.0, 0.0]] + +pe = ga.ParallelEvaluator(sphere, threads=4) +print("ParallelEvaluator:", pe.evaluate(batch)) + +lde = ga.LocalDistributedExecutor(sphere, workers=4) +print("LocalDistributedExecutor:", lde.execute(batch)) + +cfg = ga.Config() +cfg.population_size = 120 +cfg.generations = 200 +cfg.dimension = 20 +cfg.bounds = ga.Bounds(-5.12, 5.12) +result = (ga.Optimizer() + .with_config(cfg) + .with_threads(4) + .with_seed(42) + .optimize(sphere)) +print("Best fitness:", result.best_fitness) +``` + +> `ProcessDistributedExecutor` is still C++-only (POSIX/fork backend). --- @@ -1899,30 +1954,26 @@ cmake --build build ### Python -> **Not available in Python bindings yet.** -> The benchmark suite is implemented in `benchmark/` and exposed via the -> `ga-benchmark` executable (C++ only). -> -> You can replicate benchmark-style measurements in Python using the -> `ga.GeneticAlgorithm` directly: -> -> ```python -> import ga, time -> -> def sphere(x): -> return 1000.0 / (1.0 + sum(xi**2 for xi in x)) -> -> for dim in [5, 10, 20]: -> cfg = ga.Config() -> cfg.population_size = 60 -> cfg.generations = 100 -> cfg.dimension = dim -> cfg.bounds = ga.Bounds(-5.12, 5.12) -> t0 = time.perf_counter() -> r = ga.GeneticAlgorithm(cfg).run(sphere) -> elapsed = time.perf_counter() - t0 -> print(f"dim={dim:2d} best={r.best_fitness:.4f} time={elapsed*1000:.1f}ms") -> ``` +The benchmark suite is exposed in Python through `ga.BenchmarkConfig` and +`ga.GABenchmark`: + +```python +import ga + +cfg = ga.BenchmarkConfig() +cfg.warmup_iterations = 1 +cfg.benchmark_iterations = 3 +cfg.verbose = False + +b = ga.GABenchmark(cfg) +b.run_operator_benchmarks() +print("Operator rows:", len(b.operator_results())) + +b.run_function_benchmarks() +print("Function rows:", len(b.function_results())) + +b.export_to_csv("benchmark_results.csv") +``` --- @@ -2048,7 +2099,7 @@ pip install pybind11 # 2. Configure and build mkdir -p build && cd build cmake .. -cmake --build . --target ga-python-bindings -j$(nproc) +cmake --build . --target ga_python_module -j$(nproc) # 3. Add the build directory to PYTHONPATH export PYTHONPATH="$(pwd)/python:$PYTHONPATH" @@ -2083,6 +2134,21 @@ python3 python/example.py | `ga.make_two_point_crossover` | Factory: two-point crossover | | `ga.make_gaussian_mutation` | Factory: Gaussian mutation | | `ga.make_uniform_mutation` | Factory: Uniform mutation | +| **Evaluation** | | +| `ga.ParallelEvaluator` | Threaded batch evaluator over candidate vectors | +| `ga.LocalDistributedExecutor` | Threaded distributed executor over candidate batches | +| **Selection Helpers** | | +| `ga.selection_tournament_indices` | Tournament selection over fitness list | +| `ga.selection_roulette_indices` | Roulette-wheel selection over fitness list | +| `ga.selection_rank_indices` | Rank-based selection over fitness list | +| `ga.selection_sus_indices` | Stochastic universal sampling over fitness list | +| `ga.selection_elitism_indices` | Elitism/top-k selection over fitness list | +| **Benchmark** | | +| `ga.BenchmarkConfig` | Configure benchmark warmup/iterations/output | +| `ga.BenchmarkResult` | Scalability benchmark summary row | +| `ga.OperatorBenchmark` | Operator benchmark row | +| `ga.FunctionBenchmark` | Function optimization benchmark row | +| `ga.GABenchmark` | Run benchmark suite and export reports/CSV | | **Representations** | | | `ga.VectorGenome` | Real-valued genome (`double`) | | `ga.BitsetGenome` | Binary/bitset genome | diff --git a/benchmark/ga_benchmark.h b/benchmark/ga_benchmark.h index 20383d0..3a8467f 100644 --- a/benchmark/ga_benchmark.h +++ b/benchmark/ga_benchmark.h @@ -67,6 +67,10 @@ class GABenchmark { void generateReport(); void exportToCSV(const std::string& filename); + const std::vector& operatorResults() const { return operatorResults_; } + const std::vector& functionResults() const { return functionResults_; } + const std::vector& scalabilityResults() const { return scalabilityResults_; } + private: BenchmarkConfig config_; std::vector operatorResults_; diff --git a/python/bindings_sanity.py b/python/bindings_sanity.py index 9094fd9..40db157 100644 --- a/python/bindings_sanity.py +++ b/python/bindings_sanity.py @@ -15,6 +15,9 @@ def sphere_fitness(x: list[float]) -> float: def main() -> None: + out_dir = os.path.join(os.path.dirname(__file__), "..", "build") + os.makedirs(out_dir, exist_ok=True) + # Core data/representations ev = ga.Evaluation() ev.objectives = [1.0, 2.0] @@ -94,6 +97,44 @@ def main() -> None: assert ga.is_feasible([0.5, -0.2], cs) assert not ga.is_feasible([2.0], cs) + # Evaluation helpers + pe = ga.ParallelEvaluator(sphere_fitness, threads=2) + pe_results = pe.evaluate([[0.1, 0.2], [0.0, 0.0], [0.6, 0.7]]) + assert len(pe_results) == 3 and pe_results[1] >= pe_results[0] + + local_exec = ga.LocalDistributedExecutor(sphere_fitness, workers=2) + local_results = local_exec.execute([[0.2, 0.1], [0.4, 0.5]]) + assert len(local_results) == 2 and all(r > 0.0 for r in local_results) + + # Selection helpers + fitness = [0.1, 0.8, 0.4, 1.2, 0.6] + t_idx = ga.selection_tournament_indices(fitness, tournament_size=3) + assert len(t_idx) == 1 and 0 <= t_idx[0] < len(fitness) + rw_idx = ga.selection_roulette_indices(fitness, count=3) + assert len(rw_idx) == 3 and all(0 <= i < len(fitness) for i in rw_idx) + rank_idx = ga.selection_rank_indices(fitness, count=3) + # Legacy rank helper can return fewer indices than requested in this codebase. + assert len(rank_idx) <= 3 + sus_idx = ga.selection_sus_indices(fitness, count=3) + # Legacy SUS helper can return fewer indices than requested in this codebase. + assert len(sus_idx) <= 3 + elite_idx = ga.selection_elitism_indices(fitness, elite_count=2) + assert len(elite_idx) == 2 and all(0 <= i < len(fitness) for i in elite_idx) + + # Benchmark suite + bcfg = ga.BenchmarkConfig() + bcfg.warmup_iterations = 0 + bcfg.benchmark_iterations = 1 + bcfg.verbose = False + bench = ga.GABenchmark(bcfg) + # Keep sanity fast: exercise object + serialization surface without running + # the full benchmark loops. + op_results = bench.operator_results() + assert isinstance(op_results, list) + csv_path = os.path.join(out_dir, "python_sanity_benchmark.csv") + bench.export_to_csv(csv_path) + assert os.path.exists(csv_path) + # Hybrid + coevolution cfg = ga.Config() cfg.dimension = 3 @@ -115,9 +156,6 @@ def main() -> None: st.generation = 1 st.rng_state = "smoke" - out_dir = os.path.join(os.path.dirname(__file__), "..", "build") - os.makedirs(out_dir, exist_ok=True) - bin_path = os.path.join(out_dir, "python_sanity_checkpoint.bin") ga.checkpoint_save_binary(bin_path, st) loaded = ga.checkpoint_load_binary(bin_path) diff --git a/python/ga_bindings.cpp b/python/ga_bindings.cpp index 6fd7173..a820a68 100644 --- a/python/ga_bindings.cpp +++ b/python/ga_bindings.cpp @@ -16,6 +16,7 @@ #include #include #include +#include #include "ga/config.hpp" #include "ga/genetic_algorithm.hpp" @@ -26,6 +27,8 @@ #include "ga/checkpoint/checkpoint.hpp" #include "ga/constraints/constraints.hpp" #include "ga/coevolution/coevolution.hpp" +#include "ga/evaluation/distributed_executor.hpp" +#include "ga/evaluation/parallel_evaluator.hpp" #include "ga/core/evaluation.hpp" #include "ga/core/genome.hpp" #include "ga/core/individual.hpp" @@ -52,6 +55,12 @@ // Full type definitions needed by pybind11 for operator ownership transfer #include "mutation/base_mutation.h" #include "crossover/base_crossover.h" +#include "selection-operator/tournament_selection.h" +#include "selection-operator/roulette_wheel_selection.h" +#include "selection-operator/rank_selection.h" +#include "selection-operator/stochastic_universal_sampling.h" +#include "selection-operator/elitism_selection.h" +#include "benchmark/ga_benchmark.h" namespace py = pybind11; @@ -98,6 +107,27 @@ static std::vector pyObjectivesToCpp(const py::it return objectives; } +static std::vector<::Individual> fitnessToSelectionPopulation(const std::vector& fitness) { + std::vector<::Individual> population; + population.reserve(fitness.size()); + for (double f : fitness) { + population.emplace_back(f); + } + return population; +} + +static unsigned int checkedCountToUInt(std::size_t value, const char* name) { + if (value > static_cast(std::numeric_limits::max())) { + throw std::out_of_range(std::string(name) + " exceeds unsigned int range"); + } + return static_cast(value); +} + +using DoubleBatchEvaluator = + ga::evaluation::ParallelEvaluator, + double, + std::function&)>>; + PYBIND11_MODULE(ga, m) { m.doc() = "Genetic Algorithm framework — C++ core with Python bindings"; @@ -777,6 +807,90 @@ PYBIND11_MODULE(ga, m) { m.def("export_diversity_csv", &ga::visualization::exportDiversityCSV, py::arg("diversity"), py::arg("path")); + // ------------------------------------------------------- Evaluation helpers + py::class_(m, "ParallelEvaluator", + "Threaded batch evaluator over vector candidates") + .def(py::init([](py::function fitness, std::size_t threads) { + std::function&)> wrapped = + [fitness](const std::vector& genes) { + py::gil_scoped_acquire acquire; + return fitness(genes).cast(); + }; + return std::make_unique(std::move(wrapped), threads); + }), + py::arg("fitness"), + py::arg("threads") = std::thread::hardware_concurrency()) + .def("evaluate", &DoubleBatchEvaluator::evaluate, py::arg("batch"), + py::call_guard(), + "Evaluate a batch of candidate vectors in parallel"); + + py::class_(m, "LocalDistributedExecutor", + "Local threaded distributed executor") + .def(py::init([](py::function evaluator, std::size_t workers) { + ga::evaluation::LocalDistributedExecutor::EvaluateFn wrapped = + [evaluator](const std::vector& genes) { + py::gil_scoped_acquire acquire; + return evaluator(genes).cast(); + }; + return std::make_unique( + std::move(wrapped), workers); + }), + py::arg("evaluator"), + py::arg("workers") = std::thread::hardware_concurrency()) + .def("execute", &ga::evaluation::LocalDistributedExecutor::execute, py::arg("batch"), + py::call_guard(), + "Execute a batch of candidate vectors and return fitness values"); + + // ------------------------------------------------------- Benchmark suite + py::class_(m, "BenchmarkConfig", "Benchmark configuration") + .def(py::init<>()) + .def_readwrite("warmup_iterations", &BenchmarkConfig::warmupIterations) + .def_readwrite("benchmark_iterations", &BenchmarkConfig::benchmarkIterations) + .def_readwrite("verbose", &BenchmarkConfig::verbose) + .def_readwrite("csv_output", &BenchmarkConfig::csvOutput) + .def_readwrite("output_file", &BenchmarkConfig::outputFile); + + py::class_(m, "BenchmarkResult", "Scalability benchmark aggregate result") + .def_readonly("name", &BenchmarkResult::name) + .def_readonly("category", &BenchmarkResult::category) + .def_readonly("avg_execution_time", &BenchmarkResult::avgExecutionTime) + .def_readonly("min_execution_time", &BenchmarkResult::minExecutionTime) + .def_readonly("max_execution_time", &BenchmarkResult::maxExecutionTime) + .def_readonly("iterations", &BenchmarkResult::iterations) + .def_readonly("throughput", &BenchmarkResult::throughput) + .def_readonly("standard_deviation", &BenchmarkResult::standardDeviation) + .def_readonly("success", &BenchmarkResult::success) + .def_readonly("error_message", &BenchmarkResult::errorMessage); + + py::class_(m, "OperatorBenchmark", "Operator-level benchmark result") + .def_readonly("operator_name", &OperatorBenchmark::operatorName) + .def_readonly("operator_type", &OperatorBenchmark::operatorType) + .def_readonly("avg_time", &OperatorBenchmark::avgTime) + .def_readonly("operations_per_second", &OperatorBenchmark::operationsPerSecond) + .def_readonly("iterations", &OperatorBenchmark::iterations) + .def_readonly("representation", &OperatorBenchmark::representation); + + py::class_(m, "FunctionBenchmark", "Function optimization benchmark result") + .def_readonly("function_name", &FunctionBenchmark::functionName) + .def_readonly("best_fitness", &FunctionBenchmark::bestFitness) + .def_readonly("avg_fitness", &FunctionBenchmark::avgFitness) + .def_readonly("generations_to_converge", &FunctionBenchmark::generationsToConverge) + .def_readonly("total_execution_time", &FunctionBenchmark::totalExecutionTime) + .def_readonly("best_solution", &FunctionBenchmark::bestSolution) + .def_readonly("convergence_history", &FunctionBenchmark::convergenceHistory); + + py::class_(m, "GABenchmark", "Benchmark suite runner") + .def(py::init(), py::arg("config") = BenchmarkConfig{}) + .def("run_all_benchmarks", &GABenchmark::runAllBenchmarks) + .def("run_operator_benchmarks", &GABenchmark::runOperatorBenchmarks) + .def("run_function_benchmarks", &GABenchmark::runFunctionBenchmarks) + .def("run_scalability_benchmarks", &GABenchmark::runScalabilityBenchmarks) + .def("generate_report", &GABenchmark::generateReport) + .def("export_to_csv", &GABenchmark::exportToCSV, py::arg("filename")) + .def("operator_results", [](const GABenchmark& self) { return self.operatorResults(); }) + .def("function_results", [](const GABenchmark& self) { return self.functionResults(); }) + .def("scalability_results", [](const GABenchmark& self) { return self.scalabilityResults(); }); + // ------------------------------------------------------- Operator factories m.def("make_gaussian_mutation", &ga::makeGaussianMutation, py::arg("seed") = 0u, @@ -790,4 +904,57 @@ PYBIND11_MODULE(ga, m) { m.def("make_two_point_crossover", &ga::makeTwoPointCrossover, py::arg("seed") = 0u, "Create a Two-Point crossover operator"); + + // ------------------------------------------------------- Selection helper APIs + m.def("selection_tournament_indices", + [](const std::vector& fitness, std::size_t tournament_size) { + auto population = fitnessToSelectionPopulation(fitness); + return TournamentSelection::selectIndices( + population, checkedCountToUInt(tournament_size, "tournament_size")); + }, + py::arg("fitness"), + py::arg("tournament_size") = 3u, + "Tournament selection helper: returns one winner index from the tournament"); + + m.def("selection_roulette_indices", + [](const std::vector& fitness, std::size_t count) { + auto population = fitnessToSelectionPopulation(fitness); + return RouletteWheelSelection::selectIndices( + population, checkedCountToUInt(count, "count")); + }, + py::arg("fitness"), + py::arg("count"), + "Roulette-wheel selection helper: returns selected indices"); + + m.def("selection_rank_indices", + [](const std::vector& fitness, std::size_t count) { + auto population = fitnessToSelectionPopulation(fitness); + // Intentionally route through legacy helper: in this codebase it + // returns stable original-population indices expected by callers. + return RankSelectionLegacy(population, checkedCountToUInt(count, "count")); + }, + py::arg("fitness"), + py::arg("count"), + "Rank selection helper: returns selected indices"); + + m.def("selection_sus_indices", + [](const std::vector& fitness, std::size_t count) { + auto population = fitnessToSelectionPopulation(fitness); + // Intentionally route through legacy helper for index semantics + // consistent with existing selection utility callers. + return StochasticUniversalSamplingLegacy(population, checkedCountToUInt(count, "count")); + }, + py::arg("fitness"), + py::arg("count"), + "Stochastic universal sampling helper: returns selected indices"); + + m.def("selection_elitism_indices", + [](const std::vector& fitness, std::size_t elite_count) { + auto population = fitnessToSelectionPopulation(fitness); + return ElitismSelection::selectIndices( + population, checkedCountToUInt(elite_count, "elite_count")); + }, + py::arg("fitness"), + py::arg("elite_count"), + "Elitism helper: returns indices of top-fitness individuals"); }