diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml new file mode 100644 index 0000000..fdcedd9 --- /dev/null +++ b/.github/workflows/pytest.yml @@ -0,0 +1,37 @@ +name: Test Python-bindings on Linux + +on: + push: + branches: + - main + +jobs: + build: + runs-on: ubuntu-latest + strategy: + matrix: + python-version: ["3.10", "3.12"] + + steps: + - uses: actions/checkout@v3 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v4 + with: + python-version: ${{ matrix.python-version }} + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install flake8 pytest + if [ -f requirements.txt ]; then pip install -r requirements.txt; fi + - name: Lint with flake8 + run: | + # stop the build if there are Python syntax errors or undefined names + flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics + # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide + flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics + - name: build and install + run: | + pip install --verbose . + - name: Test with pytest + run: | + pytest -s test diff --git a/.gitignore b/.gitignore index a62bd32..68cf528 100644 --- a/.gitignore +++ b/.gitignore @@ -50,4 +50,144 @@ instances/mps/ex1010-pi.mps TODO.md perf.data* bench.sh -coverage \ No newline at end of file +coverage + +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# skbuild_conan +_skbuild/ +.conan/ +**/CMakeUserPresets.json + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pdm +# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. +#pdm.lock +# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it +# in version control. +# https://pdm.fming.dev/latest/usage/project/#working-with-version-control +.pdm.toml +.pdm-python +.pdm-build/ + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + diff --git a/CMakeLists.txt b/CMakeLists.txt index 66ae31f..3b8cb93 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -28,6 +28,30 @@ add_executable(accft ${SOURCE}) set(LIBRARIES fmt::fmt pthread dl m) target_link_libraries(accft PUBLIC ${LIBRARIES}) +if (SKBUILD) +set(PYTHON_BINDINGS ON) +endif() + +if (PYTHON_BINDINGS) +message(STATUS "PYTHON_BINDINGS: ${PYTHON_BINDINGS}") + + # PyBind11 + FetchContent_Declare(pybind11 GIT_REPOSITORY https://github.com/pybind/pybind11.git GIT_TAG v2.13.6) + FetchContent_MakeAvailable(pybind11) # pybind11, essential + set(CMAKE_POSITION_INDEPENDENT_CODE ON) # The code needs to be compiled as PIC + # to build the shared lib for python. + set(CMAKE_INSTALL_RPATH_USE_LINK_PATH TRUE) + # Ensure fmt is compiled with -fPIC + set_target_properties(fmt PROPERTIES POSITION_INDEPENDENT_CODE ON) + + pybind11_add_module(_bindings ./src/pyaccft/_bindings.cpp) + target_link_libraries(_bindings PUBLIC fmt::fmt ${LIBRARIES}) + # enable compilation warnings + target_compile_options( + _bindings PRIVATE "$<$:-Wall>") + target_compile_definitions(_bindings PRIVATE PYBIND11_DETAILED_ERROR_MESSAGES) + install(TARGETS _bindings DESTINATION ./src/pyaccft/) +endif() ######################################## ############## Unit tests ############## diff --git a/README.md b/README.md index 5c8dca1..03accfd 100644 --- a/README.md +++ b/README.md @@ -177,6 +177,13 @@ _Note: for rail2586, better results can be obtained emphasizing multipliers qual The results for the other datasets can be found in the [`benchmarks`](benchmarks/) directory. + +## Python Bindings + +The project also provides Python bindings with a simplified interface for quick usage. +While you can also access much of the C++-interface directly, the primary purpose is to provide a point and shoot interface for the algorithm. +You can find more details in the separate [README](README.py.md). + ## Coding Style Regarding the source code, here you can find the general set of rules that we try to enforce. Since this is a project we work on in our free time, we haven't been afraid to experiment with simple rules and convention that we adjusted along the way when we felt something wasn't working for us. diff --git a/README.py.md b/README.py.md new file mode 100644 index 0000000..e0ffc30 --- /dev/null +++ b/README.py.md @@ -0,0 +1,60 @@ + + +# Python Bindings for the AC-CFT Set Cover Heuristic + + + +## Install + +We will publish the package on PyPI soon. For now, you can install the package by cloning the repository and running the following command in the root directory: + +```bash +pip install --verbose . +``` + +## Usage + +To use the `SetCoverSolver`, first create an instance of the solver. You can then add sets with their respective costs and solve the set cover problem. The solver will find the optimal selection of sets that covers all elements at the minimum cost. + +Here is an example: + +```python +from pyaccft import SetCoverSolver + +solver = SetCoverSolver() +# Add sets with their respective costs +solver.add_set([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], cost=10) +solver.add_set([0, 1, 2, 3, 4, 5], cost=5) +solver.add_set([0, 1, 2, 3, 4], cost=4) +solver.add_set([6, 7, 8, 9], cost=4) + +# Solve the set cover problem +solver.solve() + +# Retrieve and print the solution +solution = solver.get_solution() +print("The following sets have been selected:", solution) +print("The cost of the solution is:", solver.get_cost()) +print("The lower bound of the solution is:", solver.get_lower_bound()) +``` + +Ensure that all elements are zero-indexed and that every element between 0 and the maximum element appears in at least one set for a feasible solution. + +## Configuration + +You can configure the solver by setting the following parameters in `solve`: + +- seed (int): Seed for the random number generator. Default is 0. +- time_limit (int): Time limit in seconds. Default is 0 (no limit). +- verbose (int): Verbosity level. Default is 2. +- epsilon (float): Epsilon value for objective comparisons. Default is 0.999. +- heur_iters (int): Number of iterations for the heuristic phase. Default is 250. +- alpha (float): Relative fixing fraction increment. Default is 1.1. +- beta (float): Relative cutoff value to terminate Refinement. Default is 1.0. +- abs_subgrad_exit (float): Minimum LBs delta to trigger subgradient termination. Default is 1.0. +- rel_subgrad_exit (float): Minimum LBs gap to trigger subgradient termination. Default is 0.001. +- use_unit_costs (bool): Solve the given instance setting columns cost to one. Default is False. + diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..35cee52 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,11 @@ +# SPDX-FileCopyrightText: 2025 Dominik Krupke +# SPDX-License-Identifier: MIT + +[build-system] +requires = [ + "setuptools", + "scikit-build>=0.17.3", + "cmake>=3.23", + "ninja", +] +build-backend = "setuptools.build_meta" \ No newline at end of file diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..c300e55 --- /dev/null +++ b/setup.py @@ -0,0 +1,33 @@ +# SPDX-FileCopyrightText: 2025 Dominik Krupke +# SPDX-License-Identifier: MIT + +from pathlib import Path + +from setuptools import find_packages +from skbuild import setup + + +def readme(): + """ + :return: Content of README.md + """ + + with Path("README.py.md").open() as file: + return file.read() + + +setup( # https://scikit-build.readthedocs.io/en/latest/usage.html#setup-options + name="pyaccft", + version="0.0.1", + author="TODO", + license="LICENSE", + description="Pybinding for accft", + long_description=readme(), + long_description_content_type="text/markdown", + packages=find_packages("src"), # Include all packages in `./src`. + package_dir={"": "src"}, # The root for our python package is in `./src`. + python_requires=">=3.10", # lowest python version supported. + install_requires=[ # Python Dependencies + ], + cmake_minimum_required_version="3.23", +) diff --git a/src/algorithms/Refinement.hpp b/src/algorithms/Refinement.hpp index f3750ca..7997b70 100644 --- a/src/algorithms/Refinement.hpp +++ b/src/algorithms/Refinement.hpp @@ -88,25 +88,24 @@ namespace local { namespace { } // namespace local // Complete CFT algorithm (Refinement + call to 3-phase) -inline Solution run(Environment const& env, // in - Instance const& orig_inst, // in - Solution const& warmstart_sol = {} // in +inline CftResult run(Environment const& env, // in + Instance const& orig_inst, // in + Solution const& warmstart_sol = {} // in ) { cidx_t const ncols = csize(orig_inst.cols); ridx_t const nrows = rsize(orig_inst.rows); - auto inst = orig_inst; - auto best_sol = Solution(); - best_sol.cost = limits::max(); + auto inst = orig_inst; + auto nofix_dual = DualState(); + auto best_sol = Solution(); + best_sol.cost = limits::max(); if (!warmstart_sol.idxs.empty()) best_sol = warmstart_sol; auto three_phase = ThreePhase(); auto select_cols_to_fix = local::RefinementFixManager(); - auto nofix_lagr_mult = std::vector(); - auto nofix_lb = limits::max(); auto old2new = IdxsMaps(); auto fixing = FixingData(); auto max_cost = limits::max(); @@ -120,16 +119,15 @@ inline Solution run(Environment const& env, // in } if (iter_counter == 0) { - nofix_lagr_mult = std::move(result_3p.nofix_lagr_mult); - nofix_lb = result_3p.nofix_lb; - max_cost = env.beta * nofix_lb + env.epsilon; + nofix_dual = std::move(result_3p.dual); + max_cost = env.beta * nofix_dual.lb + env.epsilon; } if (best_sol.cost <= max_cost || env.timer.elapsed() > env.time_limit) break; inst = orig_inst; - auto cols_to_fix = select_cols_to_fix(env, inst, nofix_lagr_mult, best_sol); + auto cols_to_fix = select_cols_to_fix(env, inst, nofix_dual.mults, best_sol); if (!cols_to_fix.empty()) { make_identity_fixing_data(ncols, nrows, fixing); fix_columns_and_compute_maps(cols_to_fix, inst, fixing, old2new); @@ -140,8 +138,8 @@ inline Solution run(Environment const& env, // in "REFN> {:2}: Best solution {:.2f}, lb {:.2f}, gap {:.2f}%\n", iter_counter, best_sol.cost, - nofix_lb, - 100.0_F * (best_sol.cost - nofix_lb) / best_sol.cost); + nofix_dual.lb, + 100.0_F * (best_sol.cost - nofix_dual.lb) / best_sol.cost); print<2>(env, "REFN> {:2}: Fixed cost {:.2f}, free rows {:.0f}%, time {:.2f}s\n\n", iter_counter, @@ -152,8 +150,7 @@ inline Solution run(Environment const& env, // in if (inst.rows.empty() || env.timer.elapsed() > env.time_limit) break; } - - return best_sol; + return {std::move(best_sol), std::move(nofix_dual)}; } } // namespace cft diff --git a/src/algorithms/ThreePhase.hpp b/src/algorithms/ThreePhase.hpp index 6d4afdb..6f31983 100644 --- a/src/algorithms/ThreePhase.hpp +++ b/src/algorithms/ThreePhase.hpp @@ -15,37 +15,30 @@ namespace cft { -struct ThreePhaseResult { - Solution sol; // Best feasible solution found - std::vector nofix_lagr_mult; // Lagrangian multipliers before the first fixing - real_t nofix_lb; // Lower bound before the first fixing -}; - class ThreePhase { static constexpr real_t init_step_size = 0.1_F; // Caches - Subgradient subgrad; // Subgradient functor - Greedy greedy; // Greedy functor - ColFixing col_fixing; // Column fixing functor - Pricer pricer; // Pricing functor - FixingData fixing; // Column fixing data - Solution sol; // Current solution - Solution best_sol; // Best solution - InstAndMap core; // Core instance - std::vector lagr_mult; // Lagrangian multipliers - std::vector unfixed_lagr_mult; // Best multipliers before the first fixing + Subgradient subgrad; // Subgradient functor + Greedy greedy; // Greedy functor + ColFixing col_fixing; // Column fixing functor + Pricer pricer; // Pricing functor + FixingData fixing; // Column fixing data + Solution sol; // Current solution + Solution best_sol; // Best solution + InstAndMap core; // Core instance + std::vector lagr_mult; // Lagrangian multipliers + DualState nofix_dual; // Best multipliers before the first fixing public: // 3-phase algorithm consisting in subgradient, greedy and column fixing. // NOTE: inst gets progressively fixed inplace, loosing its original state. - ThreePhaseResult operator()(Environment const& env, // in - Instance& inst // in/cache + CftResult operator()(Environment const& env, // in + Instance& inst // in/cache ) { ridx_t const orig_nrows = rsize(inst.rows); // Original number of rows for ColFixing - auto tot_timer = Chrono<>(); - auto unfixed_lb = limits::min(); + auto tot_timer = Chrono<>(); _three_phase_setup(inst, greedy, sol, best_sol, core, lagr_mult, fixing); CFT_IF_DEBUG(auto inst_copy = inst); @@ -57,10 +50,8 @@ class ThreePhase { auto cutoff = best_sol.cost - fixing.fixed_cost; auto real_lb = subgrad(env, inst, cutoff, pricer, core, step_size, lagr_mult); - if (iter_counter == 0) { - unfixed_lagr_mult = lagr_mult; - unfixed_lb = real_lb; - } + if (iter_counter == 0) + nofix_dual = {lagr_mult, real_lb}; if (real_lb + fixing.fixed_cost >= best_sol.cost - env.epsilon || env.timer.elapsed() > env.time_limit) @@ -96,7 +87,7 @@ class ThreePhase { "3PHS> Best solution: {:.2f}, time: {:.2f}s\n\n", best_sol.cost, tot_timer.elapsed()); - return {best_sol, unfixed_lagr_mult, unfixed_lb}; + return {best_sol, nofix_dual}; } private: diff --git a/src/core/cft.hpp b/src/core/cft.hpp index a83a2c8..234aad5 100644 --- a/src/core/cft.hpp +++ b/src/core/cft.hpp @@ -107,10 +107,21 @@ struct CidxAndCost { real_t cost; }; -// Solution represented by a vector of column indexes and the total cost +// Solution for the Set Covering problem struct Solution { - std::vector idxs; - real_t cost; + std::vector idxs; // List of selected column indexes ... + real_t cost; // ... their total cost +}; + +// Dual solution for the Set Covering problem +struct DualState { + std::vector mults; // Lagrangian multipliers + real_t lb; // Lower bound +}; + +struct CftResult { + Solution sol; + DualState dual; }; // Environment struct to hold all the parameters and working variables diff --git a/src/main.cpp b/src/main.cpp index d40807c..6fdf157 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -18,11 +18,11 @@ int main(int argc, char const** argv) { cft::print_arg_values(env); auto fdata = cft::parse_inst_and_initsol(env); - auto sol = cft::run(env, fdata.inst, fdata.init_sol); - cft::write_solution(env.sol_path, sol); + auto res = cft::run(env, fdata.inst, fdata.init_sol); + cft::write_solution(env.sol_path, res.sol); cft::print<1>(env, "CFT> Best solution {:.2f} time {:.2f}s\n", - sol.cost, + res.sol.cost, env.timer.elapsed()); } catch (std::exception const& e) { diff --git a/src/pyaccft/__init__.py b/src/pyaccft/__init__.py new file mode 100644 index 0000000..36bcc75 --- /dev/null +++ b/src/pyaccft/__init__.py @@ -0,0 +1,6 @@ +# SPDX-FileCopyrightText: 2025 Dominik Krupke +# SPDX-License-Identifier: MIT + +from .set_cover_solver import SetCoverSolver + +__all__ = ["SetCoverSolver"] \ No newline at end of file diff --git a/src/pyaccft/_bindings.cpp b/src/pyaccft/_bindings.cpp new file mode 100644 index 0000000..eb92987 --- /dev/null +++ b/src/pyaccft/_bindings.cpp @@ -0,0 +1,178 @@ +// SPDX-FileCopyrightText: 2025 Dominik Krupke +// SPDX-License-Identifier: MIT + +// pybind11 +#include // To define operator overloading +#include // Basic pybind11 functionality +#include // Automatic conversion of vectors + +#include + +#include "../algorithms/Refinement.hpp" +#include "../core/Instance.hpp" +#include "../core/cft.hpp" +#include "../core/parsing.hpp" + +namespace local { namespace { + void check_and_fill_instance(cft::Instance& instance) { + auto& cols = instance.cols; + auto& rows = instance.rows; + using namespace cft; + size_t n = 0; + + // Determine the maximum index in cols + for (cidx_t j = 0_C; j < csize(cols); ++j) + for (ridx_t i : cols[j]) + n = std::max(n, static_cast(i + 1)); + + // Guard against the user entering huge indices because they + // might have misunderstood the 0-based indexing. + if (n > cols.idxs.size()) { + throw std::runtime_error( + fmt::format("Item index out of bounds. Maximum index is {} but " + "size is {}. Please make sure that the n items are " + "indexed from 0 to n-1.", + n, + cols.idxs.size())); + } + + // Check if every element (row) is in at least one column + std::vector in_col(n, false); + // Mark every element that is in a column (set) + for (cidx_t j = 0_C; j < csize(cols); ++j) + for (ridx_t i : cols[j]) + in_col[i] = true; + // Check if every element is in at least one column + for (size_t i = 0; i < n; ++i) + if (!in_col[i]) + throw std::runtime_error(fmt::format("Item {} not contained in any set.", i)); + + // Fill rows from columns + fill_rows_from_cols(cols, n, rows); + } +} // namespace +} // namespace local + +// Pybind11 module definitions +PYBIND11_MODULE(_bindings, m) { + namespace py = pybind11; + using namespace cft; + + m.doc() = "Example of PyBind11 and CGAL."; // Optional module docstring + + py::class_(m, "Environment", "The Environment for the accft solver.") + .def(py::init<>()) + .def_readwrite("inst_path", &Environment::inst_path) + .def_readwrite("sol_path", &Environment::sol_path) + .def_readwrite("initsol_path", &Environment::initsol_path) + .def_readwrite("parser", &Environment::parser) + .def_readwrite("seed", &Environment::seed) + .def_readwrite("time_limit", &Environment::time_limit) + .def_readwrite("verbose", &Environment::verbose) + .def_readwrite("epsilon", &Environment::epsilon) + .def_readwrite("heur_iters", &Environment::heur_iters) + .def_readwrite("alpha", &Environment::alpha) + .def_readwrite("beta", &Environment::beta) + .def_readwrite("abs_subgrad_exit", &Environment::abs_subgrad_exit) + .def_readwrite("rel_subgrad_exit", &Environment::rel_subgrad_exit) + .def_readwrite("use_unit_costs", &Environment::use_unit_costs) + .def_readwrite("min_fixing", &Environment::min_fixing) + .def("__repr__", [](Environment const& a) { + return fmt::format("Environment(inst_path='{}', sol_path='{}', initsol_path='{}', " + "parser={}, " + "seed={}, time_limit={}, verbose={}, epsilon={}, heur_iters={}, " + "alpha={}, beta={}, abs_subgrad_exit={}, rel_subgrad_exit={}, " + "use_unit_costs={})", + a.inst_path, + a.sol_path, + a.initsol_path, + a.parser, + a.seed, + a.time_limit, + a.verbose, + a.epsilon, + a.heur_iters, + a.alpha, + a.beta, + a.abs_subgrad_exit, + a.rel_subgrad_exit, + a.use_unit_costs); + }); + py::class_>(m, "SparseBinMat") + .def(py::init<>()) + .def_readwrite("idxs", &SparseBinMat::idxs) + .def_readwrite("begs", &SparseBinMat::begs) + .def("__getitem__", [](SparseBinMat& self, std::size_t i) { return self[i]; }) + .def("__len__", &SparseBinMat::size) + .def("__repr__", + [](SparseBinMat const& a) { + return fmt::format("SparseBinMat(idxs={}, begs={})", a.idxs, a.begs); + }) + .def("clear", &SparseBinMat::clear) + .def("push_back", + static_cast::*)(std::vector const&)>( + &SparseBinMat::push_back)); + + + py::class_(m, "Instance") + .def(py::init<>()) + .def_readwrite("cols", &Instance::cols) + .def_readwrite("rows", &Instance::rows) + .def_readwrite("costs", &Instance::costs) + .def("add", + [](Instance& self, std::vector const& col, real_t cost) { + self.cols.push_back(col); + if (cost < 0.0) + throw std::runtime_error("Costs must be non-negative."); + self.costs.push_back(cost); + return self.costs.size() - 1; + }) + .def("copy", [](Instance const& a) { return a; }) + .def("prepare", [](Instance& self) { ::local::check_and_fill_instance(self); }); + + + py::class_(m, "CftResult") + .def(py::init<>()) + .def_readwrite("sol", &CftResult::sol) + .def_readwrite("dual", &CftResult::dual) + .def("copy", [](CftResult const& a) { return a; }) + .def("__repr__", [](CftResult const& a) { + return fmt::format("CftResult(sol=({},{}), dual=({},{}))", + a.sol.cost, + a.sol.cost, + a.dual.mults, + a.dual.lb); + }); + + py::class_(m, "Solution") + .def(py::init<>()) + .def_readwrite("idxs", &Solution::idxs) + .def_readwrite("cost", &Solution::cost) + .def("copy", [](Solution const& a) { return a; }) + .def("__repr__", [](Solution const& a) { + return fmt::format("Solution(idxs={}, cost={})", a.idxs, a.cost); + }); + + py::class_(m, "DualState") + .def(py::init<>()) + .def_readwrite("idxs", &DualState::mults) + .def_readwrite("cost", &DualState::lb) + .def("copy", [](DualState const& a) { return a; }) + .def("__repr__", [](DualState const& a) { + return fmt::format("DualState(mults={}, lb={})", a.mults, a.lb); + }); + + + py::class_(m, "FileData") + .def_readwrite("inst", &FileData::inst) + .def_readwrite("init_sol", &FileData::init_sol); + + m.def("parse_inst_and_initsol", + &parse_inst_and_initsol, + "Parse the instance and initial solution."); + + m.def("fill_rows_from_cols", &fill_rows_from_cols, "Fill rows from columns."); + + + m.def("run", &run, "Run the accft solver."); +} \ No newline at end of file diff --git a/src/pyaccft/set_cover_solver.py b/src/pyaccft/set_cover_solver.py new file mode 100644 index 0000000..3d1f7b4 --- /dev/null +++ b/src/pyaccft/set_cover_solver.py @@ -0,0 +1,169 @@ +# SPDX-FileCopyrightText: 2025 Dominik Krupke +# SPDX-License-Identifier: MIT + +from pathlib import Path +from ._bindings import ( + Environment, + parse_inst_and_initsol, + run, + Instance, + Solution, + CftResult, +) + + +class SetCoverSolver: + """ + Defines a Set Cover Solver object that will solve the set cover problem using the CFT heuristic. + The solver can be used incrementally by adding sets and then calling the solve method. + If no new elements are added, the solver will use the previous solution as a starting point. + + All elements must be zero-indexed. As we are working with indices, all elements between 0 and the maximum + element in the sets must appear in one of the sets for a feasible solution. If an element is not in any set, + it will be considered as not covered and the solution will be infeasible. + + The sets are also indexed starting from 0. The index of a set is returned when adding a set and can be used + to retrieve the solution. + + Example: + ```python + from pyaccft import SetCoverSolver + + solver = SetCoverSolver() + # Set 0 + solver.add_set([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], cost=10) + # Set 1 + solver.add_set([0, 1, 2, 3, 4, 5], cost=5) + # Set 2 + solver.add_set([0, 1, 2, 3, 4], cost=4) + # Set 3 + solver.add_set([6, 7, 8, 9], cost=4) + solver.solve() + solution = solver.get_solution() + print("The following sets have been selected:", solution) + print("The cost of the solution is:", solver.get_cost()) + print("The lower bound of the solution is:", solver.get_lower_bound()) + ``` + """ + + def __init__(self): + self._result = None + self._instance = Instance() + self._initialized = False + self._max_element = -1 + + def add_set(self, elements: list[int], cost: float) -> int: + """ + Add a set. Returns the index of the set which is also used in the solution. + """ + if cost < 0: + raise ValueError("Costs must be non-negative") + if not all(0 <= e for e in elements): + raise ValueError("Elements must be zero-indexed.") + max_element = max(elements) + if max_element > self._max_element: + self._max_element = max_element + self._result = ( + None # in case there is a new element, the solution is no longer + ) + # valid + self._instance.add(elements, cost) + self._initialized = False + return len(self._instance.costs) - 1 + + def from_file(self, filename: str | Path, parser: str = "RAIL") -> None: + """ + Load an instance from a file. + """ + env = Environment() + env.time_limit = 10 + env.verbose = 2 + env.inst_path = str(filename) + env.parser = parser + fdata = parse_inst_and_initsol(env) + self._instance = fdata.inst.copy() + self._result.solution = fdata.init_sol.copy() + print(self._result) + self.num_elements = len(self._instance.costs) + self._initialized = True # a file is always considered initialized + + def solve( + self, + seed: int = 0, + time_limit: float = float("inf"), + verbose: int = 2, + epsilon: float = 0.999, + heur_iters: int = 250, + alpha: float = 1.1, + beta: float = 1.0, + abs_subgrad_exit: float = 1.0, + rel_subgrad_exit: float = 0.001, + min_fixing=0.3, + ) -> None: + """ + Solves the set cover problem using the specified parameters. + + Parameters: + seed (int): Seed for the random number generator. Default is 0. + time_limit (int): Time limit in seconds. Default is 0 (no limit). + verbose (int): Verbosity level. Default is 2. + epsilon (float): Epsilon value for objective comparisons. Default is 0.999. + heur_iters (int): Number of iterations for the heuristic phase. Default is 250. + alpha (float): Relative fixing fraction increment. Default is 1.1. + beta (float): Relative cutoff value to terminate Refinement. Default is 1.0. + abs_subgrad_exit (float): Minimum LBs delta to trigger subgradient termination. Default is 1.0. + rel_subgrad_exit (float): Minimum LBs gap to trigger subgradient termination. Default is 0.001. + use_unit_costs (bool): Solve the given instance setting columns cost to one. Default is False. + + Returns: + None + """ + env = Environment() + env.seed = seed + env.time_limit = time_limit + env.verbose = verbose + env.epsilon = epsilon + env.heur_iters = heur_iters + env.alpha = alpha + env.beta = beta + env.abs_subgrad_exit = abs_subgrad_exit + env.rel_subgrad_exit = rel_subgrad_exit + env.min_fixing = min_fixing + if not self._initialized: + self._instance.rows.clear() + self._instance.prepare() + self._initialized = True + init_sol = Solution() if self._result is None else self._result.sol + self._result = run(env, self._instance, init_sol).copy() + + def get_solution(self) -> list[int] | None: + """ + Return the indices of the selected sets in the solution. + """ + if self._result is None: + return None + return list(self._result.sol.idxs) + + def get_cost(self) -> float | None: + """ + Return the cost of the solution. + """ + if self._result is None: + return None + return self._result.sol.cost + + def get_lower_bound(self) -> float: + """ + Return a lower bound on the optimal solution. + """ + if self._result is None: + return 0 + return self._result.dual.cost + + def get_dual_multipliers(self) -> list[float] | None: + """ + Return the dual multipliers of the solution. + """ + if self._result is None: + return None + return list(self._result.dual.mults) diff --git a/test/README_unittests.cpp b/test/README_unittests.cpp index 7c9a3e5..c94577c 100644 --- a/test/README_unittests.cpp +++ b/test/README_unittests.cpp @@ -48,8 +48,8 @@ TEST_CASE("Invoke whole algorithm") { env.time_limit = 10.0; // Time limit in seconds env.verbose = 2; // Log verbosity level (from 0 to 5) env.timer.restart(); // NOTE: the timer is used also for the time limit test - auto sol = cft::run(env, inst); - fmt::print("CFT solution cost: {}\n", sol.cost); + auto res = cft::run(env, inst); + fmt::print("CFT solution cost: {}\n", res.sol.cost); }()); } @@ -66,7 +66,7 @@ TEST_CASE("Invoke 3-phase algorithm") { REQUIRE_NOTHROW([&] { auto three_phase = cft::ThreePhase(); auto result = three_phase(env, inst); - fmt::print("3-phase solution cost: {}, LB: {}\n", result.sol.cost, result.nofix_lb); + fmt::print("3-phase solution cost: {}, LB: {}\n", result.sol.cost, result.dual.lb); }()); } diff --git a/test/Refinement_unittests.cpp b/test/Refinement_unittests.cpp index 2787271..7ef864a 100644 --- a/test/Refinement_unittests.cpp +++ b/test/Refinement_unittests.cpp @@ -23,14 +23,14 @@ TEST_CASE("Whole algorithm run test") { for (int n = 0; n < 100; ++n) { auto inst = Instance(); - auto sol = Solution(); + auto res = CftResult(); REQUIRE_NOTHROW(inst = make_easy_inst(n, 1000_C)); - REQUIRE_NOTHROW(sol = run(env, inst, init_sol)); - CHECK(sol.cost <= 1000.0_F); // Trivial bad solution has 1000 cost - CHECK(sol.cost >= as_real(sol.idxs.size())); // Min col cost is 1.0 - if (abs(sol.cost - 1000.0_F) < 1e-6_F) - CHECK(sol.idxs == init_sol.idxs); - CFT_IF_DEBUG(CHECK_NOTHROW(check_inst_solution(inst, sol))); + REQUIRE_NOTHROW(res = run(env, inst, init_sol)); + CHECK(res.sol.cost <= 1000.0_F); // Trivial bad solution has 1000 cost + CHECK(res.sol.cost >= as_real(res.sol.idxs.size())); // Min col cost is 1.0 + if (abs(res.sol.cost - 1000.0_F) < 1e-6_F) + CHECK(res.sol.idxs == init_sol.idxs); + CFT_IF_DEBUG(CHECK_NOTHROW(check_inst_solution(inst, res.sol))); } } diff --git a/test/cft_unittests.cpp b/test/cft_unittests.cpp index 3cbe843..e7bc77b 100644 --- a/test/cft_unittests.cpp +++ b/test/cft_unittests.cpp @@ -22,6 +22,20 @@ TEST_CASE("Test Solution struct") { CHECK(s.cost == 0.0_F); } +TEST_CASE("Test DualState struct") { + auto d = DualState{{}, 0.0_F}; + CHECK(d.mults.empty()); + CHECK(d.lb == 0.0_F); +} + +TEST_CASE("Test CftResult struct") { + auto r = CftResult{{{}, 1.2_F}, {{}, 2.3_F}}; + CHECK(r.sol.idxs.empty()); + CHECK(r.sol.cost == 1.2_F); + CHECK(r.dual.mults.empty()); + CHECK(r.dual.lb == 2.3_F); +} + TEST_CASE("Test as_cidx function") { int i = 10; cidx_t c = as_cidx(i); diff --git a/test/custom_types_unittests.cpp b/test/custom_types_unittests.cpp index 10d0a7f..0494ce8 100644 --- a/test/custom_types_unittests.cpp +++ b/test/custom_types_unittests.cpp @@ -68,12 +68,12 @@ TEST_CASE("Custom types, whole algorithm run test") { for (int n = 0; n < 20; ++n) { auto inst = make_easy_inst(n, 100_C); - auto sol = run(env, inst, init_sol); - CHECK((sol.cost <= 1000.0_F)); // Trivial bad solution has 1000 cost - CHECK((sol.cost >= as_real(sol.idxs.size()))); // Min col cost is 1.0 - if (abs(sol.cost - 1000.0_F) < 1e-6_F) - DOCTEST_CHECK_EQ(sol.idxs, init_sol.idxs); - CFT_IF_DEBUG(CHECK_NOTHROW(check_inst_solution(inst, sol))); + auto res = run(env, inst, init_sol); + CHECK((res.sol.cost <= 1000.0_F)); // Trivial bad solution has 1000 cost + CHECK((res.sol.cost >= as_real(res.sol.idxs.size()))); // Min col cost is 1.0 + if (abs(res.sol.cost - 1000.0_F) < 1e-6_F) + DOCTEST_CHECK_EQ(res.sol.idxs, init_sol.idxs); + CFT_IF_DEBUG(CHECK_NOTHROW(check_inst_solution(inst, res.sol))); } } diff --git a/test/large_types_unittests.cpp b/test/large_types_unittests.cpp index 4710540..e65a732 100644 --- a/test/large_types_unittests.cpp +++ b/test/large_types_unittests.cpp @@ -26,14 +26,14 @@ TEST_CASE("Custom types, whole algorithm run test") { for (int n = 0; n < 20; ++n) { auto inst = Instance(); - auto sol = Solution(); + auto res = CftResult(); REQUIRE_NOTHROW(inst = make_easy_inst(n, 1000_C)); - REQUIRE_NOTHROW(sol = run(env, inst, init_sol)); - CHECK((sol.cost <= 1000.0_F)); // Trivial bad solution has 1000 cost - CHECK((sol.cost >= as_real(sol.idxs.size()))); // Min col cost is 1.0 - if ((abs(sol.cost - 1000.0_F) < 1e-6_F)) - CHECK(sol.idxs == init_sol.idxs); - CFT_IF_DEBUG(CHECK_NOTHROW(check_inst_solution(inst, sol))); + REQUIRE_NOTHROW(res = run(env, inst, init_sol)); + CHECK((res.sol.cost <= 1000.0_F)); // Trivial bad solution has 1000 cost + CHECK((res.sol.cost >= as_real(res.sol.idxs.size()))); // Min col cost is 1.0 + if ((abs(res.sol.cost - 1000.0_F) < 1e-6_F)) + CHECK(res.sol.idxs == init_sol.idxs); + CFT_IF_DEBUG(CHECK_NOTHROW(check_inst_solution(inst, res.sol))); } } diff --git a/test/small_types_unittests.cpp b/test/small_types_unittests.cpp index 666a2a3..d868cd6 100644 --- a/test/small_types_unittests.cpp +++ b/test/small_types_unittests.cpp @@ -26,14 +26,14 @@ TEST_CASE("Custom types, whole algorithm run test") { for (int n = 0; n < 20; ++n) { auto inst = Instance(); - auto sol = Solution(); + auto res = CftResult(); REQUIRE_NOTHROW(inst = make_easy_inst(n, 1000_C)); - REQUIRE_NOTHROW(sol = run(env, inst, init_sol)); - CHECK((sol.cost <= 1000.0_F)); // Trivial bad solution has 1000 cost - CHECK((sol.cost >= as_real(sol.idxs.size()))); // Min col cost is 1.0 - if ((abs(sol.cost - 1000.0_F) < 1e-6_F)) - CHECK(sol.idxs == init_sol.idxs); - CFT_IF_DEBUG(CHECK_NOTHROW(check_inst_solution(inst, sol))); + REQUIRE_NOTHROW(res = run(env, inst, init_sol)); + CHECK((res.sol.cost <= 1000.0_F)); // Trivial bad solution has 1000 cost + CHECK((res.sol.cost >= as_real(res.sol.idxs.size()))); // Min col cost is 1.0 + if ((abs(res.sol.cost - 1000.0_F) < 1e-6_F)) + CHECK(res.sol.idxs == init_sol.idxs); + CFT_IF_DEBUG(CHECK_NOTHROW(check_inst_solution(inst, res.sol))); } } diff --git a/test/test_basics.py b/test/test_basics.py new file mode 100644 index 0000000..65ac226 --- /dev/null +++ b/test/test_basics.py @@ -0,0 +1,84 @@ +# SPDX-FileCopyrightText: 2025 Dominik Krupke +# SPDX-License-Identifier: MIT + +import pyaccft +import pytest + + +def test_simple(): + solver = pyaccft.SetCoverSolver() + # solver.from_file("instances/rail/rail507", "RAIL") + solver.add_set([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], cost=10) + solver.add_set([0, 1, 2, 3, 4, 5], cost=5) + solver.add_set([0, 1, 2, 3, 4], cost=4) + solver.add_set([6, 7, 8, 9], cost=4) + solver.solve() + solution = solver.get_solution() + assert solution is not None, "No solution found" + assert 1 in solution, "Element 1 not in solution" + assert 3 in solution, "Element 3 not in solution" + assert solver.get_cost() == 9, "Objective value is not 9" + assert solver.get_lower_bound() >= 8.9, "Lower bound is not 9.0" + + +def test_simple_infeasible(): + solver = pyaccft.SetCoverSolver() + # solver.from_file("instances/rail/rail507", "RAIL") + solver.add_set([0, 1, 2, 3, 4, 5, 6, 8, 9], cost=10) + solver.add_set([0, 1, 2, 3, 4, 5], cost=5) + solver.add_set([0, 1, 2, 3, 4], cost=4) + solver.add_set([6, 8, 9], cost=4) + with pytest.raises(RuntimeError): + solver.solve() + + +def test_error_on_negative_cost(): + solver = pyaccft.SetCoverSolver() + with pytest.raises(ValueError): + solver.add_set([0, 1, 2, 3, 4, 5, 6, 8, 9], cost=-10) + + +def test_error_on_negative_element(): + solver = pyaccft.SetCoverSolver() + with pytest.raises(ValueError): + solver.add_set([-1, 1, 2, 3, 4, 5, 6, 8, 9], cost=10) + + +def test_simple_incremental(): + solver = pyaccft.SetCoverSolver() + # solver.from_file("instances/rail/rail507", "RAIL") + solver.add_set([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], cost=10) + solver.add_set([0, 1, 2, 3, 4, 5], cost=5) + solver.add_set([0, 1, 2, 3, 4], cost=4) + solver.add_set([6, 7, 8, 9], cost=4) + solver.solve() + solution = solver.get_solution() + assert solution is not None, "No solution found" + assert 1 in solution, "Element 1 not in solution" + assert 3 in solution, "Element 3 not in solution" + assert solver.get_cost() == 9, "Objective value is not 9" + assert solver.get_lower_bound() >= 8.9, "Lower bound is not 9.0" + solver.add_set([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], cost=5) + solver.solve() + solution = solver.get_solution() + assert solver.get_cost() == 5, "Objective value is not 5" + + +def test_simple_incremental_new_element(): + solver = pyaccft.SetCoverSolver() + # solver.from_file("instances/rail/rail507", "RAIL") + solver.add_set([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], cost=10) + solver.add_set([0, 1, 2, 3, 4, 5], cost=5) + solver.add_set([0, 1, 2, 3, 4], cost=4) + solver.add_set([6, 7, 8, 9], cost=4) + solver.solve() + solution = solver.get_solution() + assert solution is not None, "No solution found" + assert 1 in solution, "Element 1 not in solution" + assert 3 in solution, "Element 3 not in solution" + assert solver.get_cost() == 9, "Objective value is not 9" + assert solver.get_lower_bound() >= 8.9, "Lower bound is not 9.0" + solver.add_set([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10], cost=15) + solver.solve() + solution = solver.get_solution() + assert solver.get_cost() == 15, "Objective value is not 15"