diff --git a/README.md b/README.md index 1eec94dd..70e5648e 100644 --- a/README.md +++ b/README.md @@ -74,6 +74,9 @@ The optional libraries are: - [FFTW](http://www.fftw.org/): Fast Fourier Transform library used for normal mode transformation in Path Integral MD. - [PLUMED](https://www.plumed.org/): A collection of very useful tools for free energy calculations (MetaDynamics, Umbrella Sampling etc). - [TCPB-CPP](https://github.com/mtzgroup/tcpb-cpp): [EXPERIMENTAL] TCPB interface to TeraChem + - [MACE](https://github.com/ACEsuit/mace): Machine Learning Atomic Cluster Expansion potential. + - Integrated via an MPI interface. Requires a Python environment with `mace-torch`, `torch`, `ase`, and `mpi4py`. + - Use `dev_scripts/install_mace.sh` for easy installation. ## Structure of the repository diff --git a/dev_scripts/install_mace.sh b/dev_scripts/install_mace.sh new file mode 100755 index 00000000..05a303a6 --- /dev/null +++ b/dev_scripts/install_mace.sh @@ -0,0 +1,28 @@ +#!/bin/bash +set -euo pipefail + +REPO_URL="https://github.com/ACEsuit/mace.git" +REPO_DIR="$HOME/mace" +if [[ "$#" -eq 1 && ! -z $1 ]];then + REPO_DIR=$1 +fi + +if [[ -e $REPO_DIR ]];then + echo "ERROR: $REPO_DIR already exists." + exit 1 +fi + +git clone "${REPO_URL}" "${REPO_DIR}" && cd "$REPO_DIR" + +pip install --upgrade pip +pip install . +pip install mpi4py + +echo " +Successfully installed MACE and mpi4py. + +To use with ABIN, set MACE_PYTHON in utils/run.mace_mpi_abin.sh +to point to the Python interpreter that has these packages, e.g.: + +export MACE_PYTHON=$(which python3) +" diff --git a/interfaces/MACE/MACE-OFF23_medium.model b/interfaces/MACE/MACE-OFF23_medium.model new file mode 100644 index 00000000..51573784 Binary files /dev/null and b/interfaces/MACE/MACE-OFF23_medium.model differ diff --git a/interfaces/MACE/README.md b/interfaces/MACE/README.md new file mode 100644 index 00000000..21462056 --- /dev/null +++ b/interfaces/MACE/README.md @@ -0,0 +1,73 @@ +# MACE MPI Interface for ABIN + +This directory contains the Python-based server for the MACE (Machine Learning Atomic Cluster Expansion) potential. + +## Requirements + +- Python >= 3.8 +- PyTorch >= 1.12 +- mace-torch +- ase +- mpi4py +- numpy + +You can install all dependencies using: +```console +pip install mace-torch ase mpi4py numpy +``` +Or use the provided script in the root directory: +```console +./dev_scripts/install_mace.sh +``` + +## Usage + +The MACE server is typically launched alongside ABIN using MPI. ABIN communicates with the server to obtain energies and forces. + +### Manual Launch (OpenMPI) + +When using OpenMPI, you need an `ompi-server` running for the connection handshake: + +```console +# 1. Start ompi-server +ompi-server --no-daemonize -r ompi_uri.txt & + +# 2. Start MACE server +mpirun --ompi-server file:ompi_uri.txt -n 1 python3 mace_server.py & + +# 3. Start ABIN (in another terminal or same script) +mpirun --ompi-server file:ompi_uri.txt -n 1 abin -i input.in -x geom.xyz +``` + +### Automatic Launch + +It is recommended to use the provided launch script in `utils/`: +```console +./utils/run.mace_mpi_abin.sh +``` + +## Configuration + +MACE settings are controlled via the `&mace` namelist in the ABIN input file (`input.in`). Below is the full list of available parameters: + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `mace_model` | String | `'MACE-OFF23_medium.model'` | Path to a local `.model` file (TorchScript or PyTorch checkpoint) or a foundation model name (e.g., `MACE-OFF23_medium.model`, `medium`). Foundation models are automatically downloaded. | +| `mace_device` | String | `'cpu'` | Execution device: `'cpu'` or `'cuda'`. | +| `mace_default_dtype` | String | `'float64'` | Default tensor precision: `'float32'` or `'float64'`. | +| `mace_batch_size` | Integer | `64` | Batch size for evaluations. | +| `mace_compute_stress` | Logical | `.false.` | Whether to compute the stress tensor. | +| `mace_return_contributions` | Logical | `.false.` | Whether to return energy contributions per body order. | +| `mace_info_prefix` | String | `'MACE_'` | Prefix for MACE-related metadata in the ASE output. | +| `mace_head` | String | `''` | Model head identifier (for multi-head models). | +| `mace_max_mpi_wait_time` | Real | `60.0` | Maximum time (seconds) to wait for the MACE port file to appear. | +| `mace_mpi_milisleep` | Integer | `50` | Sleep interval (milliseconds) when polling for the MACE port or results. | + +See `sample_inputs/input.in.mace` for a template. + +## MACE License +The provided MACE model (MACE-OFF23_medium.model) is downloaded from the [MACE-OFF GitHub repository](https://github.com/ACEsuit/mace-off) and is licensed under the [Academic Software License Agreement (ASLA)](https://github.com/ACEsuit/mace-off/blob/main/LICENSE.md). + +Note that this is not our model and we are not responsible for its performance or any issues related to its use. The purpose of its inclusion here is to provide a working example of how to use MACE with ABIN. + +For full terms and conditions, please refer to the [MACE-OFF GitHub repository](https://github.com/ACEsuit/mace-off). \ No newline at end of file diff --git a/interfaces/MACE/mace_server.py b/interfaces/MACE/mace_server.py new file mode 100644 index 00000000..c61383ed --- /dev/null +++ b/interfaces/MACE/mace_server.py @@ -0,0 +1,319 @@ +###################### +# MACE MPI SERVER +###################### +# Version: 6.0.0 +# Evaluation logic based on MACE/eval_config [mace.cli.eval_config] +# +# This server communicates with ABIN via MPI (using mpi4py). +# MACE configuration (model path, device, etc.) is received from ABIN's +# &mace namelist section at initialization time. +# +# Dependencies: +# pip install mpi4py mace-torch torch ase numpy +# +# Usage: +# mpirun -n 1 python mace_server.py +# +# The server writes its MPI port to 'mace_port.txt.1' for ABIN to read. + +import sys +import time +from io import StringIO + +import numpy as np +from mpi4py import MPI + +# These imports are deferred until after config is received +# so the server can start quickly and report port +import ase.data + +LOG_NAME = "MaceMPIServer" + +# MPI Tags (must match Fortran module mod_mace_mpi) +MACE_TAG_EXIT = 666 +MACE_TAG_DATA = 2 + + +def log(message, should_print=True): + msg_formatted = "[{}]: {}".format(LOG_NAME, str(message)) + if should_print: + print(msg_formatted, flush=True) + return msg_formatted + + +def parse_config_string(config_str): + """Parse key=value config string sent from ABIN's &mace namelist.""" + config = {} + for pair in config_str.split(';'): + pair = pair.strip() + if '=' in pair: + key, value = pair.split('=', 1) + config[key.strip()] = value.strip() + return config + + +class MaceModel: + """ + Manages the MACE ML model for evaluating atomic configurations. + Configuration is received from ABIN via MPI. + """ + + def __init__(self, config): + import torch + from mace.tools import torch_tools, utils + from mace import data as mace_data + + self.mace_data = mace_data + self.torch_tools = torch_tools + self.utils = utils + self.torch = torch + + log("initializing MaceModel...") + start_time = time.time() + + model_path = config.get('model', '') + if not model_path: + raise RuntimeError("MACE model path not specified") + + device = config.get('device', 'cpu') + default_dtype = config.get('default_dtype', 'float64') + self.batch_size = int(config.get('batch_size', '64')) + self.compute_stress = config.get('compute_stress', 'false').lower() == 'true' + self.return_contributions = config.get('return_contributions', 'false').lower() == 'true' + self.info_prefix = config.get('info_prefix', 'MACE_') + head_val = config.get('head', '') + self.head = head_val if head_val else None + + torch_tools.set_default_dtype(default_dtype) + self.device = str(torch_tools.init_device(device)) + + import os + if os.path.isfile(model_path): + # Load from local file + try: + self.model = torch.jit.load(f=model_path, map_location=device).to(device) + except Exception: + log("Failed to load as TorchScript, trying as regular PyTorch model...") + self.model = torch.load(f=model_path, map_location=device, weights_only=False).to(device) + else: + # Try loading as a MACE foundation model (auto-downloads) + self.model = self._load_foundation_model(model_path, device) + + for param in self.model.parameters(): + param.requires_grad = False + + log("model loaded in %.2f s" % (time.time() - start_time)) + + def _load_foundation_model(self, model_name, device): + """ + Load a MACE foundation model by name. + Supported names include: + - MACE-OFF23_medium.model, MACE-OFF23_small.model, MACE-OFF23_large.model + - medium, small, large (shorthand for MACE-OFF23) + - MACE-MP-0_medium.model, etc. + """ + from mace.calculators.foundations_models import mace_off, mace_mp + + name = model_name.replace('.model', '').strip() + log(f"Loading foundation model: {name}") + + # Parse model family and size + name_lower = name.lower() + if name_lower in ('small', 'medium', 'large'): + size = name_lower + calc = mace_off(model=size, device=device, return_raw_model=True) + elif 'mace-off23' in name_lower or 'mace_off23' in name_lower: + size = name.split('_')[-1].lower() + if size not in ('small', 'medium', 'large'): + size = 'medium' + calc = mace_off(model=size, device=device, return_raw_model=True) + elif 'mace-mp' in name_lower or 'mace_mp' in name_lower: + size = name.split('_')[-1].lower() + if size not in ('small', 'medium', 'large'): + size = 'medium' + calc = mace_mp(model=size, device=device, return_raw_model=True) + else: + raise RuntimeError( + f"Unknown model '{model_name}'. Provide a path to a local .model file, " + f"or use a foundation model name like 'MACE-OFF23_medium.model' or 'medium'." + ) + + return calc + + def evaluate(self, atom_types, coords_bohr): + """ + Evaluate energy and forces for a single configuration. + + Parameters: + atom_types: list of atomic symbols (e.g. ['H', 'O', 'H']) + coords_bohr: numpy array of shape (natom, 3) in Bohr + + Returns: + energy_hartree: energy in Hartree + forces_hartree_bohr: forces in Hartree/Bohr, shape (natom, 3) + """ + import ase + from mace.tools import torch_geometric + + # Unit conversions + bohr_to_ang = 0.529177249 + ev_to_hartree = 1.0 / 27.211399 + ev_per_ang_to_hartree_per_bohr = ev_to_hartree * bohr_to_ang + + # Convert coordinates from Bohr to Angstrom + coords_ang = coords_bohr * bohr_to_ang + + # Create ASE atoms object + atoms = ase.Atoms(symbols=atom_types, positions=coords_ang) + + if self.head is not None: + atoms.info["head"] = self.head + + configs = [self.mace_data.config_from_atoms(atoms)] + + # Prepare dataset + z_table = self.utils.AtomicNumberTable([ + int(z) for z in self.model.atomic_numbers + ]) + + try: + heads = self.model.heads + except AttributeError: + heads = None + + data_loader = torch_geometric.dataloader.DataLoader(dataset=[ + self.mace_data.AtomicData.from_config( + config, + z_table=z_table, + cutoff=float(self.model.r_max), + heads=heads + ) + for config in configs + ], batch_size=self.batch_size, shuffle=False, drop_last=False) + + # Evaluate + for batch in data_loader: + batch = batch.to(self.device) + output = self.model( + batch.to_dict(), + compute_stress=self.compute_stress + ) + + energy_ev = self.torch_tools.to_numpy(output["energy"])[0] + forces_ev_ang = np.split( + self.torch_tools.to_numpy(output["forces"]), + indices_or_sections=batch.ptr[1:], + axis=0, + )[0] + + # Convert to atomic units + energy_hartree = energy_ev * ev_to_hartree + forces_hartree_bohr = forces_ev_ang * ev_per_ang_to_hartree_per_bohr + + return energy_hartree, forces_hartree_bohr + + +def main(): + comm = MPI.COMM_WORLD + + # Open MPI port and write to file for ABIN to read + port_name = MPI.Open_port() + log(f"MPI port opened: {port_name}") + + port_file = "mace_port.txt.1" + with open(port_file, "w") as f: + f.write(port_name) + log(f"Port written to {port_file}") + + # Accept connection from ABIN + log("Waiting for ABIN to connect...") + abin_comm = comm.Accept(port_name) + log("Connection from ABIN accepted!") + + # Receive number of atoms + natom_buf = np.empty(1, dtype=np.intc) + abin_comm.Recv([natom_buf, MPI.INT], source=0, tag=MACE_TAG_DATA) + natom = int(natom_buf[0]) + log(f"Received number of atoms: {natom}") + + # Receive atom types + atom_type_buf = bytearray(natom * 2) + abin_comm.Recv([atom_type_buf, MPI.CHAR], source=0, tag=MACE_TAG_DATA) + atom_types_str = atom_type_buf.decode('ascii') + atom_types = [atom_types_str[i:i + 2].strip() for i in range(0, len(atom_types_str), 2)] + log(f"Received atom types: {atom_types}") + + # Receive configuration string + config_status = MPI.Status() + abin_comm.Probe(source=0, tag=MACE_TAG_DATA, status=config_status) + config_len = config_status.Get_count(MPI.CHAR) + config_buf = bytearray(config_len) + abin_comm.Recv([config_buf, MPI.CHAR], source=0, tag=MACE_TAG_DATA) + config_str = config_buf.decode('ascii').strip() + log(f"Received config: {config_str}") + + config = parse_config_string(config_str) + + # Load MACE model + mace_model = MaceModel(config) + log("MACE model ready. Entering main loop.") + + # Main loop: receive coordinates, compute, send results + eval_count = 0 + while True: + # Receive natom (sent each step for protocol consistency) + status = MPI.Status() + abin_comm.Probe(source=0, tag=MPI.ANY_TAG, status=status) + + if status.Get_tag() == MACE_TAG_EXIT: + log("Received exit signal from ABIN") + # Consume the message + abin_comm.Recv([natom_buf, MPI.INT], source=0, tag=MACE_TAG_EXIT) + break + + abin_comm.Recv([natom_buf, MPI.INT], source=0, tag=MACE_TAG_DATA) + natom_step = int(natom_buf[0]) + + if natom_step != natom: + log(f"ERROR: Received natom={natom_step}, expected {natom}") + break + + # Receive coordinates (3*natom doubles, in Bohr) + coords = np.empty((3, natom), dtype=np.float64) + abin_comm.Recv([coords, MPI.DOUBLE], source=0, tag=MACE_TAG_DATA) + # Transpose to (natom, 3) for ASE + coords_bohr = coords.T.copy() + + eval_count += 1 + log(f"Evaluation {eval_count}: evaluating...") + + try: + energy, forces = mace_model.evaluate(atom_types, coords_bohr) + + # Send energy (1 double, in Hartree) + energy_buf = np.array([energy], dtype=np.float64) + abin_comm.Send([energy_buf, MPI.DOUBLE], dest=0, tag=0) + + # Send forces (3*natom doubles, in Hartree/Bohr) + # Transpose back to (3, natom) to match Fortran column-major layout + forces_send = forces.T.copy() + abin_comm.Send([forces_send, MPI.DOUBLE], dest=0, tag=0) + + log(f"Evaluation {eval_count}: energy = {energy:.10f} Hartree") + + except Exception as e: + log(f"ERROR during evaluation: {e}") + # Send error tag + error_energy = np.array([0.0], dtype=np.float64) + abin_comm.Send([error_energy, MPI.DOUBLE], dest=0, tag=1) + break + + # Cleanup + log("Shutting down...") + abin_comm.Disconnect() + MPI.Close_port(port_name) + log("Server stopped.") + + +if __name__ == "__main__": + main() diff --git a/sample_inputs/input.in.mace b/sample_inputs/input.in.mace new file mode 100644 index 00000000..a7bc1f9a --- /dev/null +++ b/sample_inputs/input.in.mace @@ -0,0 +1,39 @@ +Sample input for ABIN with MACE (Machine Learning Atomic Cluster Expansion) interface. + +The MACE server is a Python process communicating with ABIN via MPI. +Required Python packages: pip install mpi4py mace-torch torch ase numpy + +Launch with: bash utils/run.mace_mpi_abin.sh + +&general +pot='_mace_' ! MACE ML potential via MPI +mdtype='md', ! classical simulation +nstep=5000, ! number of steps +dt=5., ! timestep [au] + +irest=0, ! should we restart from restart.xyz? + +nwrite=1, ! how often some output should be printed +nwritex=5, ! how often should we print coordinates? +nrest=1000, ! how often we print restart files? +nwritev=5, ! how often we print velocities? +/ + +&nhcopt +temp=0.0, ! temperature [K] +inose=0, ! no thermostat (NVE) +/ + + +MACE model configuration + +&mace +mace_model='MACE/MACE_compiled.model', ! path to compiled MACE model (required) +mace_device='cpu', ! 'cpu' or 'cuda' (defaults to 'cpu') +mace_default_dtype='float64', ! 'float32' or 'float64' (defaults to 'float64') +mace_batch_size=64, ! batch size for evaluation (defaults to 64) +mace_compute_stress=.false., ! compute stress tensor (defaults to .false.) +mace_return_contributions=.false., ! return energy contributions per body order (defaults to .false.) +mace_info_prefix='MACE_', ! prefix for output keys (defaults to 'MACE_') +mace_head='', ! model head (optional, for multi-head models) (defaults to '') +/ diff --git a/src/Makefile b/src/Makefile index e2baf631..88aa6f06 100755 --- a/src/Makefile +++ b/src/Makefile @@ -1,7 +1,7 @@ F_OBJS := constants.o fortran_interfaces.o error.o modules.o mpi_wrapper.o files.o utils.o io.o random.o arrays.o qmmm.o fftw_interface.o \ shake.o nosehoover.o gle.o transform.o potentials.o force_spline.o estimators.o ekin.o vinit.o plumed.o \ remd.o force_bound.o water.o h2o_schwenke.o h2o_cvrqd.o force_h2o.o force_cp2k.o sh_integ.o surfacehop.o landau_zener.o potentials_sh.o\ - force_mm.o tera_mpi_api.o force_abin.o force_tcpb.o force_tera.o force_terash.o en_restraint.o analyze_ext_template.o geom_analysis.o analysis.o \ + force_mm.o tera_mpi_api.o mace_mpi_api.o force_abin.o force_tcpb.o force_tera.o force_terash.o force_mace.o en_restraint.o analyze_ext_template.o geom_analysis.o analysis.o \ minimizer.o mdstep.o forces.o cmdline.o init.o C_OBJS := water_interface.o diff --git a/src/force_mace.F90 b/src/force_mace.F90 new file mode 100644 index 00000000..181d6782 --- /dev/null +++ b/src/force_mace.F90 @@ -0,0 +1,146 @@ +module mod_force_mace +! ---------------------------------------------------------------- +! Interface for MACE (Machine Learning Atomic Cluster Expansion) +! Perform MPI communications with the MACE Python server. +! +! Modeled after mod_force_tera for TeraChem. +! ---------------------------------------------------------------- + use mod_const, only: DP + use mod_error, only: fatal_error + use mod_files, only: stderr + use mod_mace_mpi +#ifdef USE_MPI + use mpi + use mod_mpi, only: handle_mpi_error, check_recv_count +#endif + implicit none + private + public :: force_mace + save + +contains + + subroutine force_mace(x, y, z, fx, fy, fz, eclas, walkmax) + real(DP), intent(in) :: x(:, :), y(:, :), z(:, :) + real(DP), intent(inout) :: fx(:, :), fy(:, :), fz(:, :) + real(DP), intent(inout) :: eclas + integer, intent(in) :: walkmax + integer :: iw + integer :: mace_comm + logical :: abort + + abort = .false. + +!$OMP PARALLEL DO PRIVATE(iw, mace_comm) + do iw = 1, walkmax + +!$OMP FLUSH(abort) + if (.not. abort) then + +#ifdef USE_MPI + mace_comm = get_mace_communicator() + + call send_mace(x, y, z, iw, mace_comm) + + call receive_mace(fx, fy, fz, eclas, iw, walkmax, mace_comm, abort) + if (abort) cycle +#endif + + end if + + end do +!$OMP END PARALLEL DO + + if (abort) then + call fatal_error(__FILE__, __LINE__, & + & 'MACE external forces error') + end if + + end subroutine force_mace + +#ifdef USE_MPI + + subroutine send_mace(x, y, z, iw, comm) + use mod_qmmm, only: natqm + real(DP), intent(in) :: x(:, :), y(:, :), z(:, :) + integer, intent(in) :: iw, comm + + ! Send number of atoms each step (for protocol consistency) + call send_mace_natom(natqm, comm) + + ! Send coordinates in Bohr + call send_mace_coordinates(x, y, z, natqm, iw, comm) + end subroutine send_mace + + subroutine receive_mace(fx, fy, fz, eclas, iw, walkmax, comm, abort) + use, intrinsic :: iso_fortran_env, only: OUTPUT_UNIT + use mod_general, only: idebug + use mod_qmmm, only: natqm + real(DP), intent(inout) :: fx(:, :), fy(:, :), fz(:, :) + real(DP), intent(inout) :: eclas + integer, intent(in) :: iw, walkmax + integer, intent(in) :: comm + logical, intent(inout) :: abort + real(DP) :: energy + real(DP) :: forces(3, size(fx, 1)) + integer :: status(MPI_STATUS_SIZE) + integer :: ierr, iat + + call wait_for_mace(comm) + + ! Receive energy (in Hartree, already converted by server) + if (idebug > 2) then + print '(a)', 'MACE: Waiting to receive energy...' + call flush (OUTPUT_UNIT) + end if + call MPI_Recv(energy, 1, MPI_DOUBLE_PRECISION, MPI_ANY_SOURCE, & + & MPI_ANY_TAG, comm, status, ierr) + call handle_mpi_error(ierr) + call check_recv_count(status, 1, MPI_DOUBLE_PRECISION) + + ! Check for error tag + if (status(MPI_TAG) == 1) then + write (stderr, *) 'Got error tag from MACE server. Evaluation failed.' + abort = .true. +!$OMP FLUSH(abort) + return + end if + + if (idebug > 1) then + print '(A,ES15.6)', 'MACE: Received energy [Hartree]:', energy + call flush (OUTPUT_UNIT) + end if + + ! Receive forces (in Hartree/Bohr, already converted by server) + if (idebug > 2) then + print '(a)', 'MACE: Waiting to receive forces...' + end if + call MPI_Recv(forces, 3 * natqm, MPI_DOUBLE_PRECISION, & + MPI_ANY_SOURCE, MPI_ANY_TAG, comm, status, ierr) + call handle_mpi_error(ierr) + call check_recv_count(status, 3 * natqm, MPI_DOUBLE_PRECISION) + + if (idebug > 1) then + print '(A)', 'MACE: Received forces [Hartree/Bohr]:' + do iat = 1, natqm + print*,'Atom ', iat, ': ', forces(:, iat) + end do + call flush (OUTPUT_UNIT) + end if + + ! Forces are received as forces (not gradients), so no sign flip needed + do iat = 1, natqm + fx(iat, iw) = forces(1, iat) + fy(iat, iw) = forces(2, iat) + fz(iat, iw) = forces(3, iat) + end do + +!$OMP ATOMIC + eclas = eclas + energy / walkmax + + end subroutine receive_mace + +! USE_MPI +#endif + +end module mod_force_mace diff --git a/src/forces.F90 b/src/forces.F90 index 21006d0f..67893504 100644 --- a/src/forces.F90 +++ b/src/forces.F90 @@ -140,6 +140,7 @@ subroutine force_wrapper(x, y, z, fx, fy, fz, e_pot, chpot, walkmax) use mod_force_tcpb, only: force_tcpb use mod_force_tera, only: force_tera use mod_terampi_sh, only: force_terash + use mod_force_mace, only: force_mace implicit none ! allow(external-procedure) ! fortitude linter external :: force_water @@ -183,6 +184,8 @@ subroutine force_wrapper(x, y, z, fx, fy, fz, e_pot, chpot, walkmax) else call force_tera(x, y, z, fx, fy, fz, eclas, walkmax) end if + case ("_mace_") + call force_mace(x, y, z, fx, fy, fz, eclas, walkmax) case DEFAULT call force_abin(x, y, z, fx, fy, fz, eclas, chpot, walkmax) end select diff --git a/src/init.F90 b/src/init.F90 index e4598fd3..c61f1bfb 100644 --- a/src/init.F90 +++ b/src/init.F90 @@ -66,6 +66,7 @@ subroutine init(dt) use mod_force_tcpb, only: initialize_tcpb use mod_terampi use mod_terampi_sh + use mod_mace_mpi use mod_mdstep, only: initialize_integrator, nabin, nstep_ref real(DP), intent(out) :: dt ! Input parameters for analytical potentials @@ -149,6 +150,10 @@ subroutine init(dt) namelist /qmmm/ natqm, natmm, q, LJ_rmin, LJ_eps, mm_types + namelist /mace/ mace_model, mace_device, mace_default_dtype, & + mace_batch_size, mace_compute_stress, mace_return_contributions, & + mace_info_prefix, mace_head, mace_max_mpi_wait_time, mace_mpi_milisleep + chcoords = 'mini.dat' xyz_units = 'angstrom' chinput = 'input.in' @@ -216,6 +221,11 @@ subroutine init(dt) call initialize_terachem_interface(trim(tc_server_name)) end if + ! Connect to MACE server as early as possible + if (pot == '_mace_') then + call initialize_mace_interface() + end if + if (mdtype /= '') then mdtype = tolower(mdtype) select case (mdtype) @@ -395,6 +405,17 @@ subroutine init(dt) rewind (uinput) end if + ! Read &mace namelist when using MACE potential + if (pot == '_mace_') then + read (uinput, mace, iostat=iost, iomsg=chiomsg) + rewind (uinput) + if (iost /= 0) then + write (stderr, *) chiomsg + call fatal_error(__FILE__, __LINE__, & + & 'Could not read namelist "mace". Required when pot="_mace_".') + end if + end if + close (uinput) ! END OF READING INPUT @@ -405,6 +426,10 @@ subroutine init(dt) end if end if + if (pot == '_mace_') then + call initialize_mace_server() + end if + if (pot == '_tcpb_' .or. restrain_pot == '_tcpb_' .or. pot_ref == '_tcpb_') then call initialize_tcpb(natqm, atnames, tcpb_port, tcpb_host, tcpb_input_file) end if @@ -823,6 +848,10 @@ subroutine print_simulation_parameters() write (stdout, nml=qmmm, delim='APOSTROPHE') write (stdout, *) end if + if (pot == '_mace_') then + write (stdout, nml=mace, delim='APOSTROPHE') + write (stdout, *) + end if end subroutine print_simulation_parameters subroutine print_logo() @@ -1253,6 +1282,7 @@ subroutine finish(error_code) use mod_plumed, only: iplumed, finalize_plumed use mod_terampi, only: finalize_terachem use mod_terampi_sh, only: finalize_terash + use mod_mace_mpi, only: finalize_mace use mod_splined_grid, only: finalize_spline use mod_force_mm, only: finalize_mm use mod_force_tcpb, only: finalize_tcpb @@ -1267,6 +1297,10 @@ subroutine finish(error_code) call finalize_terachem(error_code) end if + if (pot == '_mace_') then + call finalize_mace(error_code) + end if + if (pot == '_tcpb_') then call finalize_tcpb() end if diff --git a/src/mace_mpi_api.F90 b/src/mace_mpi_api.F90 new file mode 100644 index 00000000..d4f53368 --- /dev/null +++ b/src/mace_mpi_api.F90 @@ -0,0 +1,318 @@ +! MPI interface with MACE Python server +! Modeled after mod_terampi for TeraChem +module mod_mace_mpi + use, intrinsic :: iso_fortran_env, only: OUTPUT_UNIT + use mod_const, only: DP + use mod_error, only: fatal_error + use mod_files, only: stdout, stderr + use mod_utils, only: milisleep +#ifdef USE_MPI + use mpi + use mod_mpi, only: handle_mpi_error, get_mpi_error_string +#endif + implicit none + private + + ! MPI tags for MACE protocol + integer, parameter :: MACE_TAG_EXIT = 666 + integer, parameter :: MACE_TAG_DATA = 2 + + ! Port file for MACE MPI connection + character(len=*), parameter :: MACE_PORT_FILE_NAME = 'mace_port.txt.' + + ! How long do we wait for MACE port [seconds] + real(DP) :: mace_max_mpi_wait_time = 60 + ! Sleep interval in miliseconds while waiting for MACE calculation to finish + integer :: mace_mpi_milisleep = 50 + + ! MACE configuration parameters (read from &mace namelist in init.F90) + character(len=1024) :: mace_model = 'MACE/MACE-OFF23_medium.model' + character(len=16) :: mace_device = 'cpu' + character(len=16) :: mace_default_dtype = 'float64' + integer :: mace_batch_size = 64 + logical :: mace_compute_stress = .false. + logical :: mace_return_contributions = .false. + character(len=64) :: mace_info_prefix = 'MACE_' + character(len=256) :: mace_head = '' + +#ifdef USE_MPI + integer :: mace_comm = MPI_COMM_NULL + logical :: mace_communication_established = .false. +#endif + + public :: MACE_TAG_EXIT, MACE_TAG_DATA + public :: mace_model, mace_device, mace_default_dtype + public :: mace_batch_size, mace_compute_stress, mace_return_contributions + public :: mace_info_prefix, mace_head + public :: mace_max_mpi_wait_time, mace_mpi_milisleep +#ifdef USE_MPI + public :: get_mace_communicator + public :: wait_for_mace + public :: send_mace_natom, send_mace_atom_types, send_mace_config + public :: send_mace_coordinates +#endif + public :: initialize_mace_interface, initialize_mace_server, finalize_mace + save + +contains + +#ifdef USE_MPI + + subroutine initialize_mace_interface() + integer :: ierr + + mace_comm = MPI_COMM_NULL + mace_communication_established = .false. + + ! Set MPI error handler to allow retries + call MPI_Comm_set_errhandler(MPI_COMM_WORLD, MPI_ERRORS_RETURN, ierr) + call handle_mpi_error(ierr) + + call connect_mace_server() + end subroutine initialize_mace_interface + + subroutine connect_mace_server() + character(len=MPI_MAX_PORT_NAME) :: port_name + integer :: ierr, newcomm + character(len=1024) :: portfile + + write (portfile, '(A,I0)') MACE_PORT_FILE_NAME, 1 + call read_mace_port_from_file(trim(portfile), port_name) + + write (stdout, '(2A)') 'Found MACE port: ', trim(port_name) + write (stdout, '(A)') 'Establishing connection to MACE server...' + call flush (OUTPUT_UNIT) + + call MPI_Comm_connect(trim(port_name), MPI_INFO_NULL, 0, MPI_COMM_SELF, newcomm, ierr) + call handle_mpi_error(ierr) + write (stdout, '(A)') 'Connection to MACE server established!' + + mace_comm = newcomm + mace_communication_established = .true. + end subroutine connect_mace_server + + integer function get_mace_communicator() result(comm) + comm = mace_comm + end function get_mace_communicator + + subroutine read_mace_port_from_file(portfile, port_name) + character(len=*), intent(in) :: portfile + character(len=MPI_MAX_PORT_NAME), intent(out) :: port_name + integer :: iunit, iost + real(DP) :: timer + + write (stdout, '(A)') 'Reading MACE port name from file '//portfile + port_name = '' + timer = MPI_WTIME() + + do + open (newunit=iunit, file=portfile, action="read", status="old", iostat=iost) + if (iost == 0) then + exit + end if + + if ((MPI_WTIME() - timer) > mace_max_mpi_wait_time) then + call fatal_error(__FILE__, __LINE__, & + & 'Could not open file '//portfile) + end if + + write (stdout, '(A)') 'WARNING: Cannot open file '//portfile + call milisleep(500) + end do + + read (iunit, '(A)', iostat=iost) port_name + if (iost /= 0) then + call fatal_error(__FILE__, __LINE__, & + & 'Could not read file '//portfile) + end if + + close (iunit, status='delete') + end subroutine read_mace_port_from_file + + subroutine initialize_mace_server() + use mod_qmmm, only: natqm + use mod_system, only: names + + call send_mace_natom(natqm, mace_comm) + call send_mace_atom_types(names, natqm, mace_comm) + call send_mace_config(mace_comm) + end subroutine initialize_mace_server + + subroutine finalize_mace(abin_error_code) + integer, intent(in) :: abin_error_code + integer :: ierr, empty + integer :: i + + ! Suppress unused variable warning + i = abin_error_code + + ! Set error handler to return so we can handle errors gracefully + call MPI_Comm_set_errhandler(MPI_COMM_WORLD, MPI_ERRORS_RETURN, ierr) + + if (.not. mace_communication_established) return + + write (stdout, '(A)') 'Shutting down MACE server' + + call MPI_Send(empty, 0, MPI_INTEGER, 0, MACE_TAG_EXIT, mace_comm, ierr) + + if (ierr /= MPI_SUCCESS) then + write (stderr, '(A)') 'MPI ERROR during shutdown of MACE server' + write (stderr, '(A)') 'Verify manually that the MACE server was terminated.' + write (stderr, *) get_mpi_error_string(ierr) + end if + + call MPI_Comm_free(mace_comm, ierr) + if (ierr /= MPI_SUCCESS) then + write (stderr, *) get_mpi_error_string(ierr) + end if + end subroutine finalize_mace + + subroutine wait_for_mace(comm) + integer, intent(in) :: comm + integer :: status(MPI_STATUS_SIZE) + integer :: ierr + logical :: ready + + if (mace_mpi_milisleep <= 0) return + + ! Reduce CPU usage by probing + sleeping instead of blocking MPI_Recv + ready = .false. + do while (.not. ready) + call MPI_IProbe(MPI_ANY_SOURCE, MPI_ANY_TAG, comm, ready, status, ierr) + call handle_mpi_error(ierr) + call milisleep(mace_mpi_milisleep) + end do + end subroutine wait_for_mace + + subroutine send_mace_natom(num_atom, comm) + use mod_general, only: idebug + integer, intent(in) :: num_atom + integer, intent(in) :: comm + integer :: ierr + + if (idebug > 1) then + write (stdout, '(A, I0)') 'MACE: Sending number of atoms = ', num_atom + call flush (OUTPUT_UNIT) + end if + call MPI_Send(num_atom, 1, MPI_INTEGER, 0, MACE_TAG_DATA, comm, ierr) + call handle_mpi_error(ierr) + end subroutine send_mace_natom + + subroutine send_mace_atom_types(at_names, num_atom, comm) + use mod_general, only: idebug + character(len=2), intent(in) :: at_names(:) + integer, intent(in) :: num_atom + integer, intent(in) :: comm + character(len=2*num_atom) :: buffer + integer :: ierr, offset, iat + + buffer = '' + offset = 1 + do iat = 1, num_atom + write (buffer(offset:offset + 1), '(A2)') at_names(iat) + offset = offset + 2 + end do + + if (idebug > 1) then + write (stdout, '(A)') 'MACE: Sending atom types: ' + write (stdout, '(A)') trim(buffer) + call flush (OUTPUT_UNIT) + end if + + call MPI_Send(buffer, num_atom * 2, MPI_CHARACTER, 0, MACE_TAG_DATA, comm, ierr) + call handle_mpi_error(ierr) + end subroutine send_mace_atom_types + + subroutine send_mace_config(comm) + use mod_general, only: idebug + integer, intent(in) :: comm + character(len=4096) :: config_str + integer :: ierr + character(len=8) :: batch_str + character(len=8) :: stress_str, contrib_str + + ! Build a key=value config string for the Python server + write (batch_str, '(I0)') mace_batch_size + if (mace_compute_stress) then + stress_str = 'true' + else + stress_str = 'false' + end if + if (mace_return_contributions) then + contrib_str = 'true' + else + contrib_str = 'false' + end if + + config_str = 'model='//trim(mace_model)// & + & ';device='//trim(mace_device)// & + & ';default_dtype='//trim(mace_default_dtype)// & + & ';batch_size='//trim(adjustl(batch_str))// & + & ';compute_stress='//trim(stress_str)// & + & ';return_contributions='//trim(contrib_str)// & + & ';info_prefix='//trim(mace_info_prefix)// & + & ';head='//trim(mace_head) + + if (idebug > 1) then + write (stdout, '(A)') 'MACE: Sending config: ' + write (stdout, '(A)') trim(config_str) + call flush (OUTPUT_UNIT) + end if + + call MPI_Send(config_str, len_trim(config_str), MPI_CHARACTER, & + & 0, MACE_TAG_DATA, comm, ierr) + call handle_mpi_error(ierr) + end subroutine send_mace_config + + subroutine send_mace_coordinates(x, y, z, num_atom, iw, comm) + use mod_general, only: idebug + real(DP), intent(in) :: x(:, :), y(:, :), z(:, :) + integer, intent(in) :: num_atom + integer, intent(in) :: iw + integer, intent(in) :: comm + real(DP), allocatable :: coords(:, :) + integer :: ierr, iat + + allocate (coords(3, num_atom)) + + ! Send coordinates in Bohr (server converts to Angstrom) + do iat = 1, num_atom + coords(1, iat) = x(iat, iw) + coords(2, iat) = y(iat, iw) + coords(3, iat) = z(iat, iw) + end do + + if (idebug > 1) then + write (stdout, '(A)') 'MACE: Sending coordinates [Bohr]: ' + do iat = 1, num_atom + write (stdout, *) 'Atom ', iat, ': ', coords(:, iat) + call flush (OUTPUT_UNIT) + end do + end if + call MPI_Send(coords, num_atom * 3, MPI_DOUBLE_PRECISION, 0, MACE_TAG_DATA, comm, ierr) + call handle_mpi_error(ierr) + end subroutine send_mace_coordinates + +#else + + subroutine initialize_mace_interface() + use mod_error, only: not_compiled_with + call not_compiled_with('MPI') + end subroutine initialize_mace_interface + + subroutine initialize_mace_server() + use mod_error, only: not_compiled_with + call not_compiled_with('MPI') + end subroutine initialize_mace_server + + ! This must be a no-op, since it is called from finish() + subroutine finalize_mace(error_code) + integer, intent(in) :: error_code + integer :: i + i = error_code + end subroutine finalize_mace + +! USE_MPI +#endif + +end module mod_mace_mpi diff --git a/src/mpi_wrapper.F90 b/src/mpi_wrapper.F90 index 0c58aa53..b61b6130 100644 --- a/src/mpi_wrapper.F90 +++ b/src/mpi_wrapper.F90 @@ -173,6 +173,7 @@ subroutine initialize_mpi(pot, pot_ref, nteraservers) i = nteraservers if (iremd == 1 .or. & & pot == '_tera_' .or. pot_ref == '_tera_' .or. & + & pot == '_mace_' .or. & & pot == '_cp2k_' .or. pot_ref == '__cp2k__') then call not_compiled_with('MPI') end if diff --git a/tests/MACE/energies.dat.ref b/tests/MACE/energies.dat.ref new file mode 100644 index 00000000..bdba150c --- /dev/null +++ b/tests/MACE/energies.dat.ref @@ -0,0 +1,3 @@ + # Time[fs] E-potential E-kinetic E-Total E-Total-Avg + 0.48 0.1900249814E-01 0.4107749350E-04 0.1904357563E-01 0.1904357563E-01 + 0.97 0.1887955609E-01 0.1639526646E-03 0.1904350876E-01 0.1904354219E-01 diff --git a/tests/MACE/forces.xyz.ref b/tests/MACE/forces.xyz.ref new file mode 100644 index 00000000..700ba5d3 --- /dev/null +++ b/tests/MACE/forces.xyz.ref @@ -0,0 +1,10 @@ + 3 +net force: -0.51642E-02 -0.51642E-02 0.10328E-01 torque force: -0.82312E-02 -0.53959E-02 0.13627E-01 +O 0.7386054962E-03 0.7386054962E-03 -0.1477210992E-02 +H 0.1811684277E-02 -0.1247763221E-01 0.1066594794E-01 +H -0.7714526717E-02 0.6574789773E-02 0.1139736944E-02 + 3 +net force: -0.51451E-02 -0.51451E-02 0.10290E-01 torque force: -0.82038E-02 -0.53788E-02 0.13583E-01 +O 0.7384535391E-03 0.7384535391E-03 -0.1476907078E-02 +H 0.1805766333E-02 -0.1243687350E-01 0.1063110716E-01 +H -0.7689326886E-02 0.6553312942E-02 0.1136013944E-02 diff --git a/tests/MACE/input.in b/tests/MACE/input.in new file mode 100644 index 00000000..40f694e7 --- /dev/null +++ b/tests/MACE/input.in @@ -0,0 +1,25 @@ +&general +pot='_mace_' +mdtype='md', +ipimd=0, +nstep=2, +dt=20., +irandom=13131313, + +nwrite=1, +nwritef=1, +nwritev=1, +nwritex=1, +nrest=1, +/ + +&nhcopt +inose=0, +temp=0.0d0 +/ + +&mace +mace_model='mock.model', +mace_device='cpu', +mace_default_dtype='float64', +/ diff --git a/tests/MACE/mini.xyz b/tests/MACE/mini.xyz new file mode 100644 index 00000000..37dcf4f2 --- /dev/null +++ b/tests/MACE/mini.xyz @@ -0,0 +1,5 @@ +3 +Water molecule +O 0.00000000 0.00000000 0.11726400 +H 0.00000000 0.75698200 -0.46905800 +H 0.00000000 -0.75698200 -0.46905800 diff --git a/tests/MACE/mock_mace_server.py b/tests/MACE/mock_mace_server.py new file mode 100644 index 00000000..8cb39f2e --- /dev/null +++ b/tests/MACE/mock_mace_server.py @@ -0,0 +1,112 @@ +""" +Mock MACE MPI server for E2E testing. + +This server mimics the MPI protocol of the real mace_server.py +but returns hardcoded energy and forces instead of running the +actual MACE ML model. This avoids the need for mace-torch/PyTorch. + +The returned values use a simple harmonic potential for water: +energy = 0.5 * sum(forces^2) (not actually, just fixed values) +""" + +import sys +import numpy as np +from mpi4py import MPI + +MACE_TAG_EXIT = 666 +MACE_TAG_DATA = 2 + +# Atomic unit conversions +BOHR_TO_ANG = 0.529177249 +EV_TO_HARTREE = 1.0 / 27.211399 + + +def main(): + comm = MPI.COMM_WORLD + + # Open MPI port and write to file + port_name = MPI.Open_port() + print(f"[MockMACE]: Port opened: {port_name}", flush=True) + + port_file = "mace_port.txt.1" + with open(port_file, "w") as f: + f.write(port_name) + + # Accept connection from ABIN + print("[MockMACE]: Waiting for ABIN...", flush=True) + abin_comm = comm.Accept(port_name) + print("[MockMACE]: Connected!", flush=True) + + # Receive number of atoms + natom_buf = np.empty(1, dtype=np.intc) + abin_comm.Recv([natom_buf, MPI.INT], source=0, tag=MACE_TAG_DATA) + natom = int(natom_buf[0]) + print(f"[MockMACE]: natom = {natom}", flush=True) + + # Receive atom types + atom_type_buf = bytearray(natom * 2) + abin_comm.Recv([atom_type_buf, MPI.CHAR], source=0, tag=MACE_TAG_DATA) + atom_types_str = atom_type_buf.decode('ascii') + print(f"[MockMACE]: atom types = {atom_types_str}", flush=True) + + # Receive config string + config_status = MPI.Status() + abin_comm.Probe(source=0, tag=MACE_TAG_DATA, status=config_status) + config_len = config_status.Get_count(MPI.CHAR) + config_buf = bytearray(config_len) + abin_comm.Recv([config_buf, MPI.CHAR], source=0, tag=MACE_TAG_DATA) + config_str = config_buf.decode('ascii') + print(f"[MockMACE]: config = {config_str}", flush=True) + + # Main loop + step = 0 + max_steps = 100 + while step < max_steps: + status = MPI.Status() + abin_comm.Probe(source=0, tag=MPI.ANY_TAG, status=status) + + if status.Get_tag() == MACE_TAG_EXIT: + print("[MockMACE]: Exit signal received", flush=True) + abin_comm.Recv([natom_buf, MPI.INT], source=0, tag=MACE_TAG_EXIT) + break + + # Receive natom per step + abin_comm.Recv([natom_buf, MPI.INT], source=0, tag=MACE_TAG_DATA) + + # Receive coordinates (3*natom doubles in Bohr) + coords = np.empty((3, natom), dtype=np.float64) + abin_comm.Recv([coords, MPI.DOUBLE], source=0, tag=MACE_TAG_DATA) + + step += 1 + + # Compute mock energy and forces using simple harmonic potential + # E = 0.5 * k * sum(r^2) where r is displacement from origin + k = 0.01 # force constant in Hartree/Bohr^2 + coords_t = coords.T # (natom, 3) + + # Simple harmonic energy around the center of mass + com = np.mean(coords_t, axis=0) + displ = coords_t - com + energy = 0.5 * k * np.sum(displ ** 2) + + # Forces = -gradient = -k * displacement + forces = -k * displ # (natom, 3) + + # Send energy + energy_buf = np.array([energy], dtype=np.float64) + abin_comm.Send([energy_buf, MPI.DOUBLE], dest=0, tag=0) + + # Send forces (3, natom) for Fortran column-major + forces_send = forces.T.copy() + abin_comm.Send([forces_send, MPI.DOUBLE], dest=0, tag=0) + + print(f"[MockMACE]: Step {step}, energy = {energy:.10f} Hartree", flush=True) + + # Cleanup + abin_comm.Disconnect() + MPI.Close_port(port_name) + print("[MockMACE]: Server stopped.", flush=True) + + +if __name__ == "__main__": + main() diff --git a/tests/MACE/movie.xyz.ref b/tests/MACE/movie.xyz.ref new file mode 100644 index 00000000..79c63dcc --- /dev/null +++ b/tests/MACE/movie.xyz.ref @@ -0,0 +1,10 @@ + 3 +Units: angs.; Time step: 1 ; Sim. Time [au]: 20.00 +O 0.26805290E-05 0.26805290E-05 0.11725864E+00 +H 0.10446384E-03 0.75626253E+00 -0.46844299E+00 +H -0.44482865E-03 -0.75660289E+00 -0.46899228E+00 + 3 +Units: angs.; Time step: 2 ; Sim. Time [au]: 40.00 +O 0.10721748E-04 0.10721748E-04 0.11724256E+00 +H 0.41762794E-03 0.75410567E+00 -0.46659929E+00 +H -0.17783462E-02 -0.75546638E+00 -0.46879527E+00 diff --git a/tests/MACE/temper.dat.ref b/tests/MACE/temper.dat.ref new file mode 100644 index 00000000..e8d85f11 --- /dev/null +++ b/tests/MACE/temper.dat.ref @@ -0,0 +1,3 @@ + # Time[fs] Temperature T-Average Conserved_quantity_of_thermostat + 0.48 2.88 2.88 + 0.97 11.50 7.19 diff --git a/tests/MACE/test.sh b/tests/MACE/test.sh new file mode 100755 index 00000000..f6bea90b --- /dev/null +++ b/tests/MACE/test.sh @@ -0,0 +1,125 @@ +#!/bin/bash +set -euo pipefail + +ABINEXE=$1 + +export ABINOUT=abin.out +export ABININ=input.in +export ABINGEOM=mini.xyz + +MACE_SERVER=mock_mace_server.py +MACE_OUT=mace_server.out + +# If $1 = "clean"; exit early. +if [[ "${1-}" = "clean" ]]; then + rm -f $MACE_OUT $ABINOUT *.dat *.diff + rm -f restart.xyz velocities.xyz forces.xyz movie.xyz restart.xyz.old + rm -f mace_port.txt.* ERROR ompi_uri.txt + exit 0 +fi + +# Determine MPI paths +if [[ -z ${MPI_PATH-} ]]; then + export MPIRUN=mpirun +else + export MPIRUN=$MPI_PATH/bin/mpirun +fi + +# Detect OpenMPI vs MPICH +IS_OPENMPI=false +if $MPIRUN --version 2>&1 | grep -q "Open MPI"; then + IS_OPENMPI=true +fi + +ompi_server_pid="" +MPIRUN_EXTRA_ARGS="" + +if [[ "$IS_OPENMPI" = "true" ]]; then + # OpenMPI requires ompi-server for MPI_Comm_connect/accept + OMPI_SERVER=${MPI_PATH-}/bin/ompi-server + if [[ -z ${MPI_PATH-} ]]; then + OMPI_SERVER=ompi-server + fi + + if ! which $OMPI_SERVER > /dev/null 2>&1; then + echo "Skipping MACE test: ompi-server not found (required for OpenMPI)" + exit 0 + fi + + OMPI_URI_FILE="$PWD/ompi_uri.txt" + $OMPI_SERVER --no-daemonize -r "$OMPI_URI_FILE" & + ompi_server_pid=$! + sleep 0.5 + + if [[ ! -f "$OMPI_URI_FILE" ]]; then + echo "ERROR: ompi-server did not create URI file" >&2 + kill $ompi_server_pid 2>/dev/null || true + exit 1 + fi + + MPIRUN_EXTRA_ARGS="--ompi-server file:$OMPI_URI_FILE" +fi + +MPIRUN_CMD="$MPIRUN --oversubscribe -n 1 $MPIRUN_EXTRA_ARGS" + +ABIN_CMD="$ABINEXE -i $ABININ -x $ABINGEOM" +MACE_CMD="python3 $MACE_SERVER" + +function cleanup { + kill -9 ${macepid-} ${abinpid-} > /dev/null 2>&1 || true + if [[ -n "$ompi_server_pid" ]]; then + kill $ompi_server_pid > /dev/null 2>&1 || true + fi + rm -f ompi_uri.txt + exit 0 +} +trap cleanup INT ABRT TERM EXIT + +# Launch mock MACE server +$MPIRUN_CMD $MACE_CMD > $MACE_OUT 2>&1 & +macepid=$! + +# Wait for the server to write the port file +MAX_WAIT=15 +i=0 +while [[ ! -f mace_port.txt.1 ]] && [[ $i -lt $MAX_WAIT ]]; do + sleep 0.5 + let ++i +done + +if [[ ! -f mace_port.txt.1 ]]; then + echo "ERROR: MACE server did not write port file" + cat $MACE_OUT 2>/dev/null || true + exit 1 +fi + +# Launch ABIN +$MPIRUN_CMD $ABIN_CMD > $ABINOUT 2>&1 & +abinpid=$! + +# Monitor both processes +MAX_ITER=60 +iter=0 +while true; do + abin_alive=$(ps -p $abinpid -o pid= 2>/dev/null | wc -l) + mace_alive=$(ps -p $macepid -o pid= 2>/dev/null | wc -l) + + if [[ $abin_alive -eq 0 ]] && [[ $mace_alive -eq 0 ]]; then + break + elif [[ $abin_alive -eq 0 ]] && [[ $mace_alive -ne 0 ]]; then + sleep 1 + kill -9 $macepid > /dev/null 2>&1 || true + break + elif [[ $mace_alive -eq 0 ]] && [[ $abin_alive -ne 0 ]]; then + echo "MACE server died. Killing ABIN." >&2 + kill -9 $abinpid > /dev/null 2>&1 || true + break + fi + + sleep 0.5 + let ++iter + if [[ $iter -gt $MAX_ITER ]]; then + echo "Maximum wait time exceeded." + break + fi +done diff --git a/tests/MACE/velocities.xyz.ref b/tests/MACE/velocities.xyz.ref new file mode 100644 index 00000000..b904af36 --- /dev/null +++ b/tests/MACE/velocities.xyz.ref @@ -0,0 +1,10 @@ +3 +Units: a.u.; Time step: 1 +O 0.5065291961E-06 0.5065291961E-06 -0.1013058392E-05 +H 0.1973006086E-04 -0.1358870561E-03 0.1161569952E-03 +H -0.8401468378E-04 0.7160243317E-04 0.1241225060E-04 +3 +Units: a.u.; Time step: 2 +O 0.1012988920E-05 0.1012988920E-05 -0.2025977840E-05 +H 0.3941717126E-04 -0.2714782991E-03 0.2320611278E-03 +H -0.1678464756E-03 0.1430489947E-03 0.2479748093E-04 diff --git a/tests/test.sh b/tests/test.sh index 77a6530b..2e7d4695 100755 --- a/tests/test.sh +++ b/tests/test.sh @@ -149,6 +149,8 @@ if [[ $TESTS = "all" ]];then folders[index]=TERAPI-SH-S0 let index++ folders[index]=TERAPI-LZ + let index++ + folders[index]=MACE else let index=${#folders[@]}+1 folders[index]=WITHOUT_MPI diff --git a/unit_tests/Makefile b/unit_tests/Makefile index 0acdbdce..bc413b4c 100755 --- a/unit_tests/Makefile +++ b/unit_tests/Makefile @@ -58,6 +58,13 @@ terapi_EXTRA_USE := throw_with_pfunit_mod terapi_EXTRA_INITIALIZE := initialize_throw $(eval $(call make_pfunit_test,terapi)) +mace_TESTS := test_mace.pf +mace_OTHER_LIBRARIES := ${LDFLAGS} ${LDLIBS} +mace_OTHER_SRCS := throw_with_pfunit.F90 +mace_EXTRA_USE := throw_with_pfunit_mod +mace_EXTRA_INITIALIZE := initialize_throw +$(eval $(call make_pfunit_test,mace)) + fftw_TESTS := test_fftw.pf fftw_OTHER_LIBRARIES := ${LDFLAGS} ${LDLIBS} fftw_OTHER_SRCS := throw_with_pfunit.F90 @@ -117,7 +124,7 @@ $(eval $(call make_pfunit_test,interface)) # TODO: Re-enable spline tests #all : utils prng masses plumed fftw terapi gle nhc pot mm spline interface io -all : utils prng masses plumed fftw terapi gle nhc pot mm interface io +all : utils prng masses plumed fftw terapi mace gle nhc pot mm interface io %.o : %.F90 $(FC) -c $(FFLAGS) $< @@ -129,6 +136,7 @@ test : clean_plumed_output clean_terapi_output clean_gle_output ./plumed ./fftw ./terapi + ./mace ./gle ./nhc ./pot @@ -139,7 +147,7 @@ test : clean_plumed_output clean_terapi_output clean_gle_output clean : clean_plumed_output clean_terapi_output clean_gle_output /bin/rm -f *dat *.o *.out *.mod *.inc test_*.F90 ERROR - /bin/rm -f masses utils plumed fftw terapi gle pot mm spline interface + /bin/rm -f masses utils plumed fftw terapi mace gle pot mm spline interface clean_plumed_output : /bin/rm -f bck.* plumed_*.dat diff --git a/unit_tests/test_mace.pf b/unit_tests/test_mace.pf new file mode 100644 index 00000000..673581d5 --- /dev/null +++ b/unit_tests/test_mace.pf @@ -0,0 +1,48 @@ +! Testing MACE MPI interface +module test_mace + use funit + use mod_const, only: DP + use mod_mace_mpi + implicit none + integer, parameter :: NATOMS = 3, NBEADS = 1 + real(DP) :: x(NATOMS, NBEADS), y(NATOMS, NBEADS), z(NATOMS, NBEADS) + real(DP) :: fx(NATOMS, NBEADS), fy(NATOMS, NBEADS), fz(NATOMS, NBEADS) + save + +contains + + @test + subroutine test_mace_defaults() + ! Verify that MACE namelist variables have correct defaults + @assertEqual('', trim(mace_model)) + @assertEqual('cpu', trim(mace_device)) + @assertEqual('float64', trim(mace_default_dtype)) + @assertEqual(64, mace_batch_size) + @assertFalse(mace_compute_stress) + @assertFalse(mace_return_contributions) + @assertEqual('MACE_', trim(mace_info_prefix)) + @assertEqual('', trim(mace_head)) + end subroutine test_mace_defaults + + @test + subroutine test_mace_tag_values() + ! Verify MPI tag constants + @assertEqual(666, MACE_TAG_EXIT) + @assertEqual(2, MACE_TAG_DATA) + end subroutine test_mace_tag_values + +#ifndef USE_MPI + + @test(ifndef=USE_MPI) + subroutine test_mace_not_compiled_with_mpi() + + call initialize_mace_interface() + @assertExceptionRaised('ABIN was not compiled with MPI') + + call initialize_mace_server() + @assertExceptionRaised('ABIN was not compiled with MPI') + + end subroutine test_mace_not_compiled_with_mpi + +#endif +end module test_mace diff --git a/utils/run.mace_mpi_abin.sh b/utils/run.mace_mpi_abin.sh new file mode 100755 index 00000000..9cb51af4 --- /dev/null +++ b/utils/run.mace_mpi_abin.sh @@ -0,0 +1,117 @@ +#!/bin/bash +# Launch script for ABIN + MACE MPI interface. +# +# This script launches both the Python MACE server and ABIN, +# connecting them via MPI. +# +# Prerequisites: +# - ABIN compiled with MPI=TRUE +# - Python environment with: mpi4py, mace-torch, torch, ase, numpy +# Install with: pip install mpi4py mace-torch torch ase numpy +# - MPICH (not OpenMPI) +# +# Usage: +# bash run.mace_mpi_abin.sh + +set -euo pipefail + +# ABIN SETUP +ABIN_OUT=abin.out +ABIN_IN=input.in +GEOM_IN=mini.xyz +VELOC_IN= + +# Path to Python with MACE dependencies (mpi4py, mace-torch, torch, ase, numpy). +# Uncomment and set to your Python interpreter: +# export MACE_PYTHON=/path/to/my/conda/envs/mace/bin/python +MACE_PYTHON="${MACE_PYTHON:-python3}" + +# Path to MACE server script +MACE_SERVER=MACE/mace_server.py + +################ + +LAUNCH_DIR=$PWD + +function files_exist() { + local error="" + for file in $* ; + do + if [[ ! -f $file ]];then + echo "ERROR: Cannot find file $file" >&2 + error=1 + fi + done + if [[ -n ${error-} ]];then + exit 1 + fi +} + +function validate_inputs() { + files_exist $ABIN_IN $GEOM_IN $MACE_SERVER + + # Check pot='_mace_' in ABIN input + test=$(egrep -o -e "^[^!]*pot[[:space:]]*=[[:space:]]*['\"]_mace_[\"']" $ABIN_IN || true) + if [[ -z $test ]];then + echo "ERROR: You did not specify pot='_mace_' in $ABIN_IN." >&2 + exit 1 + fi +} + +# Validate input files exist +validate_inputs + +# Determine MPIRUN +if [[ -z ${MPI_PATH-} ]];then + MPIRUN=mpirun +else + MPIRUN=$MPI_PATH/bin/mpirun +fi + +MPIRUN_ABIN="$MPIRUN -n 1" +MPIRUN_MACE="$MPIRUN -n 1" + +# Determine ABIN executable location +if [[ -z ${ABINEXE-} ]];then + ABINEXE=bin/abin +fi + +echo "Starting MACE MPI simulation" +echo "==============================" + +declare -A job_pids + +# LAUNCH MACE SERVER +$MPIRUN_MACE $MACE_PYTHON $MACE_SERVER > mace_server.out 2>&1 & +job_pids[mace]=$! +echo "Launched MACE server (PID: ${job_pids[mace]})" + +# Wait a moment for the port file to be created +sleep 1 + +# LAUNCH ABIN +ABIN_CMD="$ABINEXE -i $ABIN_IN -x $GEOM_IN" +if [[ -n $VELOC_IN ]];then + ABIN_CMD=$ABIN_CMD" -v $VELOC_IN" +fi +$MPIRUN_ABIN $ABIN_CMD > $ABIN_OUT 2>&1 & +job_pids[abin]=$! +echo "Launched ABIN (PID: ${job_pids[abin]})" + +# PERIODICALLY CHECK WHETHER ABIN AND MACE ARE RUNNING +function join_by { local IFS="$1"; shift; echo "$*"; } +regex=`join_by \| ${job_pids[@]}` +while true;do + njobs=$(ps -eo pid | egrep "$regex" | wc -l) + if [[ $njobs -eq 0 ]];then + echo "Both ABIN and MACE server stopped" + break + elif [[ $njobs -lt ${#job_pids[@]} ]];then + echo "One of ABIN or MACE server died. Killing the rest." >&2 + kill -9 ${job_pids[@]} 2>/dev/null || true + break + fi + sleep 3 +done + +echo "Simulation finished."