From 50c2b0bd2f56540a45b5493c9b249800d16d0637 Mon Sep 17 00:00:00 2001 From: GeorgeJuniorGG Date: Wed, 16 Apr 2025 09:15:29 -0300 Subject: [PATCH 1/2] add miniwave module --- miniwave/.gitignore | 9 + miniwave/README.md | 25 +++ miniwave/c-frontend/CMakeLists.txt | 19 ++ miniwave/c-frontend/miniwave.c | 238 +++++++++++++++++++++++ miniwave/c-frontend/selected_kernel.h.in | 6 + miniwave/kernels/sequential.c | 49 +++++ miniwave/kernels/sequential.h | 16 ++ miniwave/miniwave.py | 105 ++++++++++ miniwave/selected_kernel.h | 6 + miniwave/utils/__init__.py | 5 + miniwave/utils/compiler.py | 158 +++++++++++++++ miniwave/utils/dataset_writer.py | 68 +++++++ miniwave/utils/kernel.py | 234 ++++++++++++++++++++++ miniwave/utils/model.py | 173 ++++++++++++++++ miniwave/utils/plot.py | 61 ++++++ miniwave/utils/properties.py | 36 ++++ 16 files changed, 1208 insertions(+) create mode 100644 miniwave/.gitignore create mode 100644 miniwave/README.md create mode 100644 miniwave/c-frontend/CMakeLists.txt create mode 100644 miniwave/c-frontend/miniwave.c create mode 100644 miniwave/c-frontend/selected_kernel.h.in create mode 100644 miniwave/kernels/sequential.c create mode 100644 miniwave/kernels/sequential.h create mode 100644 miniwave/miniwave.py create mode 100644 miniwave/selected_kernel.h create mode 100644 miniwave/utils/__init__.py create mode 100644 miniwave/utils/compiler.py create mode 100644 miniwave/utils/dataset_writer.py create mode 100644 miniwave/utils/kernel.py create mode 100644 miniwave/utils/model.py create mode 100644 miniwave/utils/plot.py create mode 100644 miniwave/utils/properties.py diff --git a/miniwave/.gitignore b/miniwave/.gitignore new file mode 100644 index 0000000..740a746 --- /dev/null +++ b/miniwave/.gitignore @@ -0,0 +1,9 @@ +plots/* +R-miniwave* +run.sh +*.nvvp +__pycache__ +tmp/ +*.sif +data/ +build/ \ No newline at end of file diff --git a/miniwave/README.md b/miniwave/README.md new file mode 100644 index 0000000..b4b1e66 --- /dev/null +++ b/miniwave/README.md @@ -0,0 +1,25 @@ +# Miniwave - Minimum Simwave + +Python wrapper for forward wave propagation. + +# Dependencies + +`pip install numpy matplotlib findiff h5py` + +``` +mkdir /hdf5 && \ + cd /hdf5 && \ + wget https://support.hdfgroup.org/ftp/HDF5/releases/hdf5-1.14/hdf5-1.14.3/src/CMake-hdf5-1.14.3.tar.gz && \ + tar xf CMake-hdf5-1.14.3.tar.gz && \ + cd CMake-hdf5-1.14.3 && \ + ./build-unix.sh && \ + yes | ./HDF5-1.14.3-Linux.sh && \ + cp -r HDF5-1.14.3-Linux/HDF_Group/HDF5/1.14.3/ ../build +``` + +# How to use + +`python3 miniwave.py --help` + +`python3 miniwave.py --file FILE --grid_size GRID_SIZE --num_timesteps NUM_TIMESTEPS --language {c,openmp,openacc,cuda,python} --space_order SPACE_ORDER` + diff --git a/miniwave/c-frontend/CMakeLists.txt b/miniwave/c-frontend/CMakeLists.txt new file mode 100644 index 0000000..c507c2f --- /dev/null +++ b/miniwave/c-frontend/CMakeLists.txt @@ -0,0 +1,19 @@ +cmake_minimum_required(VERSION 3.18) + +project(miniwave) + +# Set the kernel source file and header +configure_file(${CMAKE_SOURCE_DIR}/selected_kernel.h.in + ${CMAKE_BINARY_DIR}/selected_kernel.h) +include_directories(${CMAKE_BINARY_DIR}) + +add_executable(miniwave ../${KERNEL_SOURCE} miniwave.c) + +# Include miniwave header files (sequential.h) +target_include_directories(miniwave PUBLIC ${CMAKE_SOURCE_DIR}/../kernels) + +# HDF5 commands +set(HDF5DIR "$ENV{HOME}/hdf5/build") +target_include_directories(miniwave PRIVATE ${HDF5DIR}/include) +target_link_directories(miniwave PRIVATE ${HDF5DIR}/lib) +target_link_libraries(miniwave PRIVATE libhdf5.so libhdf5_cpp.so) diff --git a/miniwave/c-frontend/miniwave.c b/miniwave/c-frontend/miniwave.c new file mode 100644 index 0000000..7f153d3 --- /dev/null +++ b/miniwave/c-frontend/miniwave.c @@ -0,0 +1,238 @@ +#include +#include +#include + +#include "hdf5.h" +#ifdef CONTAINS_MPI + #include "mpi.h" +#endif +#include "selected_kernel.h" + +hid_t open_hdf5_file(char *file) { + return H5Fopen(file, H5F_ACC_RDONLY, H5P_DEFAULT); +} +hid_t open_hdf5_dataset(hid_t file_id, char *dataset) { + return H5Dopen2(file_id, dataset, H5P_DEFAULT); +} +float *read_float_dataset(hid_t dataset_id) { + hid_t dataspace; + hsize_t dims_out[10]; + int rank, total_size; + + dataspace = H5Dget_space(dataset_id); /* dataspace handle */ + rank = H5Sget_simple_extent_ndims(dataspace); + H5Sget_simple_extent_dims(dataspace, dims_out, NULL); + + total_size = 1; + for (size_t i = 0; i < rank; i++) { + total_size *= dims_out[i]; + } + + float *dset_data = malloc(sizeof(float) * total_size); + H5Dread(dataset_id, H5T_NATIVE_FLOAT, H5S_ALL, H5S_ALL, H5P_DEFAULT, + dset_data); + return dset_data; +} +float read_float_attribute(hid_t dataset_id, char *attribute_name) { + // Gets attribute value from a dataset in a file + // Ex: file example_data.h5 has a dataset named scalar_data + // which has an attribute named 'attribute_X'. To get + // the value of this attribute: + // + // float attribute_X = + // read_float_attribute(scalar_data_dataset_id, "attribute_X"); + + + const char *attribute_value; + // Get attribute value as string + hid_t attribute_id = H5Aopen(dataset_id, attribute_name, H5P_DEFAULT); + hid_t attribute_type = H5Aget_type(attribute_id); + H5Aread(attribute_id, attribute_type, &attribute_value); + // Convert attribute value to float + return atof(attribute_value); +} +void close_hdf5_dataset(hid_t dataset_id) { H5Dclose(dataset_id); } +void close_hdf5_file(hid_t file_id) { H5Fclose(file_id); } + +int write_hdf5_result(int n1, int n2, int n3, double execution_time, f_type* next_u) { + hid_t h5t_type = H5T_NATIVE_FLOAT; + #if defined(DOUBLE) + h5t_type = H5T_NATIVE_DOUBLE; + #endif + + // Create a new HDF5 file using the default properties + hid_t file_id = H5Fcreate("c-frontend/data/results.h5", H5F_ACC_TRUNC, H5P_DEFAULT, H5P_DEFAULT); + if (file_id < 0) { + printf("Error creating file.\n"); + return 1; + } + + // Define the dataspace for the vector dataset + hsize_t vector_dims[3] = {n1,n2,n3}; // 3D vector + hid_t vector_dataspace_id = H5Screate_simple(3, vector_dims, NULL); + if (vector_dataspace_id < 0) { + printf("Error creating vector dataspace.\n"); + H5Fclose(file_id); + return 1; + } + + // Create the vector dataset with default properties + hid_t vector_dataset_id = H5Dcreate(file_id, "vector", h5t_type, vector_dataspace_id, + H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); + if (vector_dataset_id < 0) { + printf("Error creating vector dataset.\n"); + H5Sclose(vector_dataspace_id); + H5Fclose(file_id); + return 1; + } + + // Write the vector data to the dataset + if (H5Dwrite(vector_dataset_id, h5t_type, H5S_ALL, H5S_ALL, H5P_DEFAULT, next_u) < 0) { + printf("Error writing vector data.\n"); + H5Dclose(vector_dataset_id); + H5Sclose(vector_dataspace_id); + H5Fclose(file_id); + return 1; + } + + // Define the dataspace for the execution time dataset + hsize_t time_dims[1] = {1}; // Scalar + hid_t time_dataspace_id = H5Screate_simple(1, time_dims, NULL); + if (time_dataspace_id < 0) { + printf("Error creating time dataspace.\n"); + H5Dclose(vector_dataset_id); + H5Sclose(vector_dataspace_id); + H5Fclose(file_id); + return 1; + } + + // Create the execution time dataset with default properties + hid_t time_dataset_id = H5Dcreate(file_id, "execution_time", H5T_NATIVE_DOUBLE, time_dataspace_id, + H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); + if (time_dataset_id < 0) { + printf("Error creating time dataset.\n"); + H5Dclose(vector_dataset_id); + H5Sclose(vector_dataspace_id); + H5Sclose(time_dataspace_id); + H5Fclose(file_id); + return 1; + } + + // Write the execution time to the dataset + if (H5Dwrite(time_dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, &execution_time) < 0) { + printf("Error writing time data.\n"); + H5Dclose(vector_dataset_id); + H5Sclose(vector_dataspace_id); + H5Dclose(time_dataset_id); + H5Sclose(time_dataspace_id); + H5Fclose(file_id); + return 1; + } + + // Close the datasets, dataspaces, and file + H5Dclose(vector_dataset_id); + H5Sclose(vector_dataspace_id); + H5Dclose(time_dataset_id); + H5Sclose(time_dataspace_id); + H5Fclose(file_id); + return 0; +} + +int main() { + #ifdef CONTAINS_MPI + MPI_Init(NULL, NULL); + #endif + + int rank = 0; + + #ifdef CONTAINS_MPI + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + #endif + + // Get the file id + hid_t file_id = open_hdf5_file("c-frontend/data/miniwave_data.h5"); + // Read arguments + hid_t vel_model_id = open_hdf5_dataset(file_id, "vel_model"); + hid_t next_u_id = open_hdf5_dataset(file_id, "next_u"); + hid_t prev_u_id = open_hdf5_dataset(file_id, "prev_u"); + hid_t coefficient_id = open_hdf5_dataset(file_id, "coefficient"); + hid_t scalar_data_id = open_hdf5_dataset(file_id, "scalar_data"); + float *vel_model = read_float_dataset(vel_model_id); + float *next_u = read_float_dataset(next_u_id); + float *prev_u = read_float_dataset(prev_u_id); + float *coefficient = read_float_dataset(coefficient_id); + float block_size_1 = read_float_attribute(scalar_data_id, "block_size_1"); + float block_size_2 = read_float_attribute(scalar_data_id, "block_size_2"); + float block_size_3 = read_float_attribute(scalar_data_id, "block_size_3"); + float d1 = read_float_attribute(scalar_data_id, "d1"); + float d2 = read_float_attribute(scalar_data_id, "d2"); + float d3 = read_float_attribute(scalar_data_id, "d3"); + float dt = read_float_attribute(scalar_data_id, "dt"); + float iterations = read_float_attribute(scalar_data_id, "iterations"); + float n1 = read_float_attribute(scalar_data_id, "n1"); + float n2 = read_float_attribute(scalar_data_id, "n2"); + float n3 = read_float_attribute(scalar_data_id, "n3"); + float stencil_radius = read_float_attribute(scalar_data_id, "stencil_radius"); + + if (rank == 0) { + printf("vel_model[0:50]:\n"); + for (size_t i = 0; i < 50; i++) { + printf("%f ", vel_model[i]); + } + printf("\n"); + printf("next_u[0:50]:\n"); + for (size_t i = 0; i < 50; i++) { + printf("%lf ", next_u[i]); + } + printf("\n"); + printf("prev_u[0:50]:\n"); + for (size_t i = 0; i < 50; i++) { + printf("%lf ", prev_u[i]); + } + printf("\n"); + printf("coefficient:\n"); + for (size_t i = 0; i < 2; i++) { + printf("%lf ", coefficient[i]); + } + printf("\n"); + printf("block_size_1: %f\n",block_size_1); + printf("block_size_2: %f\n",block_size_2); + printf("block_size_3: %f\n",block_size_3); + printf("d1: %f\n",d1); + printf("d2: %f\n",d2); + printf("d3: %f\n",d3); + printf("dt: %f\n",dt); + printf("iterations: %f\n",iterations); + printf("n1: %f\n",n1); + printf("n2: %f\n",n2); + printf("n3: %f\n",n3); + printf("stencil_radius: %f\n",stencil_radius); + } + + clock_t start_time = clock(); + + forward(prev_u, next_u, vel_model, coefficient, d1, d2, d3, dt, n1, n2, n3, iterations, stencil_radius, block_size_1, block_size_2, block_size_3); + + clock_t end_time = clock(); + double execution_time = ((double)(end_time - start_time)) / CLOCKS_PER_SEC; + + if (rank == 0) { + printf("\nprev_u[0:50]"); + for (size_t i = 0; i < 50; i++) { + printf("%lf ", prev_u[i]); + } + printf("\n"); + + write_hdf5_result(n1, n2, n3, execution_time, next_u); + } + + close_hdf5_dataset(vel_model_id); + close_hdf5_dataset(next_u_id); + close_hdf5_dataset(prev_u_id); + close_hdf5_dataset(coefficient_id); + close_hdf5_file(file_id); + + #ifdef CONTAINS_MPI + MPI_Finalize(); + #endif +} \ No newline at end of file diff --git a/miniwave/c-frontend/selected_kernel.h.in b/miniwave/c-frontend/selected_kernel.h.in new file mode 100644 index 0000000..f87a535 --- /dev/null +++ b/miniwave/c-frontend/selected_kernel.h.in @@ -0,0 +1,6 @@ +#ifndef SELECTED_KERNEL_H +#define SELECTED_KERNEL_H + +#include "../@KERNEL_HEADER@" + +#endif // SELECTED_KERNEL_H diff --git a/miniwave/kernels/sequential.c b/miniwave/kernels/sequential.c new file mode 100644 index 0000000..b14f787 --- /dev/null +++ b/miniwave/kernels/sequential.c @@ -0,0 +1,49 @@ +#include +#include + +#include "sequential.h" + +int forward(f_type *prev_u, f_type *next_u, f_type *vel_model, f_type *coefficient, + f_type d1, f_type d2, f_type d3, f_type dt, int n1, int n2, int n3, + int iterations, int stencil_radius, + int block_size_1, int block_size_2, int block_size_3 + ){ + + f_type d1Squared = d1 * d1; + f_type d2Squared = d2 * d2; + f_type d3Squared = d3 * d3; + f_type dtSquared = dt * dt; + + for(int t = 0; t < iterations; t++) { + + for(int i = stencil_radius; i < n1 - stencil_radius; i++){ + for(int j = stencil_radius; j < n2 - stencil_radius; j++){ + for(int k = stencil_radius; k < n3 - stencil_radius; k++){ + // index of the current point in the grid + int current = (i * n2 + j) * n3 + k; + + // stencil code to update grid + f_type value = coefficient[0] * (prev_u[current]/d1Squared + prev_u[current]/d2Squared + prev_u[current]/d3Squared); + + // radius of the stencil + for(int ir = 1; ir <= stencil_radius; ir++){ + value += coefficient[ir] * ( + ( (prev_u[current + ir] + prev_u[current - ir]) / d3Squared ) + //neighbors in the third axis + ( (prev_u[current + (ir * n3)] + prev_u[current - (ir * n3)]) / d2Squared ) + //neighbors in the second axis + ( (prev_u[current + (ir * n2 * n3)] + prev_u[current - (ir * n2 * n3)]) / d1Squared )); //neighbors in the first axis + } + + value *= dtSquared * vel_model[current] * vel_model[current]; + next_u[current] = 2.0 * prev_u[current] - next_u[current] + value; + } + } + } + + // swap arrays for next iteration + f_type *swap = next_u; + next_u = prev_u; + prev_u = swap; + } + + return 0; +} diff --git a/miniwave/kernels/sequential.h b/miniwave/kernels/sequential.h new file mode 100644 index 0000000..63a3f8c --- /dev/null +++ b/miniwave/kernels/sequential.h @@ -0,0 +1,16 @@ + +// use single (float) or double precision +// according to the value passed in the compilation cmd +#if defined(FLOAT) + typedef float f_type; +#elif defined(DOUBLE) + typedef double f_type; +#else + typedef float f_type; +#endif + +int forward(f_type *prev_u, f_type *next_u, f_type *vel_model, f_type *coefficient, + f_type d1, f_type d2, f_type d3, f_type dt, int n1, int n2, int n3, + int iterations, int stencil_radius, + int block_size_1, int block_size_2, int block_size_3 + ); \ No newline at end of file diff --git a/miniwave/miniwave.py b/miniwave/miniwave.py new file mode 100644 index 0000000..3c04f3a --- /dev/null +++ b/miniwave/miniwave.py @@ -0,0 +1,105 @@ +import numpy as np +import argparse +import sys +from utils import Model, Compiler, Kernel, plot +from utils.properties import Properties + +def get_args(args=sys.argv[1:]): + + parser = argparse.ArgumentParser(description='How to use this program') + + parser.add_argument("--file", type=str, default='kernels/sequential.c', + help="Path to the Kernel file") + + parser.add_argument("--grid_size", type=int, default=256, + help="Grid size") + + parser.add_argument("--num_timesteps", type=int, default=400, + help="Number of timesteps") + + parser.add_argument("--language", type=str, default="c", choices=['c', 'openmp', 'openmp_cpu', 'openacc', 'cuda', 'python', 'mpi', 'mpi_cuda', 'ompc'], + help="Language: c, openmp, openacc, cuda, python, ompc, mpi, mpi_cuda") + + parser.add_argument("--space_order", type=int, default=2, + help="Space order") + + parser.add_argument("--block_size_1", type=int, default=1, + help="GPU block size in the first axis") + + parser.add_argument("--block_size_2", type=int, default=1, + help="GPU block size in the second axis") + + parser.add_argument("--block_size_3", type=int, default=1, + help="GPU block size in the third axis") + + parser.add_argument("--sm", type=int, default=75, + help="Cuda capability") + + parser.add_argument("--fast_math", default=False, action="store_true" , help="Enable --fast-math flag") + + parser.add_argument("--plot", default=False, action="store_true" , help="Enable ploting") + + parser.add_argument("--dtype", type=str, default="float64", help="Float Precision. float32 or float64 (default)") + + + parsed_args = parser.parse_args(args) + + return parsed_args + + +if __name__ == "__main__": + + args = get_args() + + # enable/disable fast math + fast_math = args.fast_math + + # cuda capability + sm = args.sm + + # language + language = args.language + + # float precision + dtype = args.dtype + + # create a compiler object + compiler = Compiler(language=language, sm=sm, fast_math=fast_math) + + # define grid shape + grid_size = (args.grid_size, args.grid_size, args.grid_size) + + vel_model = np.ones(shape=grid_size) * 1500.0 + + model = Model( + velocity_model=vel_model, + grid_spacing=(10,10,10), + dt=0.002, + num_timesteps=args.num_timesteps, + space_order=args.space_order, + dtype=dtype + ) + + # GPU block sizes + properties = Properties( + block_size_1=args.block_size_1, + block_size_2=args.block_size_2, + block_size_3=args.block_size_3 + ) + + solver = Kernel( + file=args.file, + model=model, + compiler=compiler, + properties=properties + ) + + # run the kernel + exec_time, u = solver.run() + + # plot a slice + if args.plot: + slice = vel_model.shape[1] // 2 + plot(u[:,slice,:]) + + print(f"Execution time: {exec_time} seconds") diff --git a/miniwave/selected_kernel.h b/miniwave/selected_kernel.h new file mode 100644 index 0000000..883111b --- /dev/null +++ b/miniwave/selected_kernel.h @@ -0,0 +1,6 @@ +#ifndef SELECTED_KERNEL_H +#define SELECTED_KERNEL_H + +#include "{KERNEL_HEADER}" + +#endif // SELECTED_KERNEL_H diff --git a/miniwave/utils/__init__.py b/miniwave/utils/__init__.py new file mode 100644 index 0000000..648f716 --- /dev/null +++ b/miniwave/utils/__init__.py @@ -0,0 +1,5 @@ +from .model import Model +from .compiler import Compiler +from .kernel import Kernel +from .plot import plot +from .properties import Properties \ No newline at end of file diff --git a/miniwave/utils/compiler.py b/miniwave/utils/compiler.py new file mode 100644 index 0000000..6e3bd57 --- /dev/null +++ b/miniwave/utils/compiler.py @@ -0,0 +1,158 @@ +import os +from hashlib import sha1 +from typing import Optional +from utils.properties import Properties + +class Compiler: + """ + Base class to implement the runtime compiler. + + Parameters + ---------- + language : str + Kernel language. + sm : int + Cuda capability. + fast_math : bool + Enable fast_math. Default is False. + """ + + def __init__(self, language: str, sm: int, fast_math: Optional[bool] = False): + self._language = language + self._sm = sm + self._fast_math = fast_math + + self._define_default_flags() + + @property + def language(self) -> str: + return self._language + + @property + def sm(self) -> int: + return self._sm + + @property + def fast_math(self) -> bool: + return self._fast_math + + @property + def cc(self) -> str: + return self._cc + + @cc.setter + def cc(self, value: str) -> None: + self._cc = value + + @property + def flags(self) -> str: + return self._flags + + @flags.setter + def flags(self, value: str) -> None: + self._flags = value + + def _define_default_flags(self) -> None: + + # fast math for GNU and CLANG + if self.fast_math: + fast_math = "-ffast-math" + else: + fast_math = "" + + if self.language == 'c': + self.cc = 'gcc' + self.flags = f'-O3 -fPIC {fast_math} -shared' + + elif self.language == 'openmp': + self.cc = 'clang' + self.flags = f'-O3 -fPIC {fast_math} -fopenmp -fopenmp-version=51 -fopenmp-targets=nvptx64 -Xopenmp-target -march=sm_{self.sm} -shared -I/usr/local/cuda/include -L/usr/local/cuda/lib64 -lcudart -g' + + elif self.language == 'mpi': + self.cc = 'mpicc' + self.flags = f'-O3 -fPIC {fast_math} -shared' + + elif self.language == 'mpi_cuda': + self.cc = 'mpicc' + self.flags = f'-O3 -fPIC {fast_math} -L/usr/local/cuda/lib64 -lcudart -I/usr/local/cuda/include --offload-arch=sm_{self.sm} -shared' + + elif self.language == 'openmp_cpu': + self.cc = 'gcc' + self.flags = f'-O3 -fPIC {fast_math} -fopenmp -shared' + + elif self.language == 'openacc': + + if self.fast_math: + fast_math = ",fastmath" + + self.cc = 'pgcc' + self.flags = f'-O3 -fPIC -acc:gpu -gpu=pinned{fast_math} -shared' + + elif self.language == 'cuda': + if self.fast_math: + fast_math = "--use_fast_math" + + self.cc = 'nvcc' + self.flags = f'-O3 -gencode arch=compute_{self.sm},code=sm_{self.sm} --compiler-options -fPIC,-Wall {fast_math} -shared' + + elif self.language == 'python': + pass + + elif self.language == 'ompc': + self.cc = 'clang' + self.flags = f'-O3 -fPIC {fast_math} -fopenmp -fopenmp-targets=x86_64-pc-linux-gnu -shared' + + else: + raise Exception("Language not supported") + + def compile(self, file: str, properties: Optional[Properties] = None) -> str: + + object_dir = "/tmp/miniwave/" + + # create a dir to save the compiled shared object + os.makedirs(object_dir, exist_ok=True) + + # get c file content + with open(file, 'r', encoding='utf-8') as f: + file_content = f.read() + + # float precision + type = { + 'float32': '-DFLOAT', + 'float64': '-DDOUBLE' + } + + float_precision = type[properties.dtype] + + b1, b2, b3 = properties.block3 + stencil_radius = properties.stencil_radius + + c_def = f" -DBLOCK1={b1} -DBLOCK2={b2} -DBLOCK3={b3} -DSTENCIL_RADIUS={stencil_radius} {float_precision}" + + # add defined properties to flags + self.flags += c_def + + # compose the object string + object_str = "{} {} {}".format(self.cc, self.flags, file_content) + + # apply sha1 hash to name the object + hash = sha1() + hash.update(object_str.encode()) + object_name = hash.hexdigest() + ".so" + + # object complete path + object_path = object_dir + object_name + + # check if object_file already exists + if os.path.exists(object_path): + print("Shared object already compiled in:", object_path) + else: + cmd = self.cc + " " + file + " " + self.flags + " -o " + object_path + + print("Compilation command:", cmd) + + # execute the command + if os.system(cmd) != 0: + raise Exception("Compilation failed") + + return object_path diff --git a/miniwave/utils/dataset_writer.py b/miniwave/utils/dataset_writer.py new file mode 100644 index 0000000..e412c21 --- /dev/null +++ b/miniwave/utils/dataset_writer.py @@ -0,0 +1,68 @@ +import h5py +import numpy as np +import os + + +class DatasetWriter: + def __init__(self) -> None: + pass + + def write_dataset(data: dict, path: str): + """Writes HDF5 file from data. Non array data written to "scalar_data" dataset as attributes. + + :param data: dictionary containing data to be written. + Object format: + + data: { + [dataset_name]: { + dataset_data: np.ndarray | int | float, + dataset_attributes: {[attribute_name]: str} + } + } + + Example: + + ```python + data = { + "my_dataset_1": { + "dataset_data": np.array([1, 2, 3]), + "dataset_attributes": { + "description": "small numbers", + "location": "collected at lab X", + }, + }, + "my_dataset_2": { + "dataset_data": np.array([3, 2, 1]), + "dataset_attributes": { + "my_attribute_1": "small numbers", + "my_attribute_2": "collected at lab Y", + }, + }, + } + ``` + :type data: dict[str, dict[str, np.ndarray | dict]] + :param path: Where the file will be saved. + :type path: str + """ + + # Create file + dir = os.path.dirname(path) + if not os.path.exists(dir): + os.makedirs(dir) + file = h5py.File(path, "w") + # Create datasets + scalar_data_dataset = file.create_dataset("scalar_data", (1,), dtype="f") + for key, value in data.items(): + + dataset_name = key + dataset_properties = value + dataset_data = dataset_properties["dataset_data"] + dataset_attributes = dataset_properties["dataset_attributes"] + + if isinstance(dataset_data, np.ndarray): + dataset = file.create_dataset(name=dataset_name, data=dataset_data) + dataset.attrs.update(dataset_attributes) + else: + scalar_data_dataset.attrs[dataset_name] = str(dataset_data) + + file.close() diff --git a/miniwave/utils/kernel.py b/miniwave/utils/kernel.py new file mode 100644 index 0000000..b007886 --- /dev/null +++ b/miniwave/utils/kernel.py @@ -0,0 +1,234 @@ +import ctypes +import numpy as np +from numpy.ctypeslib import ndpointer +import importlib.util +import sys +from typing import Tuple, Optional, Callable +from utils.model import Model +from utils.compiler import Compiler +from utils.properties import Properties +from time import time +from utils.dataset_writer import DatasetWriter +import subprocess +import h5py + + +class Kernel: + + def __init__( + self, + file: str, + model: Model, + compiler: Optional[Compiler] = None, + properties: Optional[Properties] = Properties(1, 1, 1), + ): + + self._file = file + self._model = model + self._compiler = compiler + self._properties = properties + + if self.language != "python" and compiler is None: + raise Exception("Compiler can not be None") + + @property + def file(self) -> str: + return self._file + + @property + def language(self) -> str: + return self.compiler.language + + @property + def model(self) -> Model: + return self._model + + @property + def compiler(self) -> Compiler: + return self._compiler + + @property + def properties(self) -> Properties: + return self._properties + + def _import_python_lib(self) -> Callable: + + spec = importlib.util.spec_from_file_location("kernel.forward", self.file) + lib = importlib.util.module_from_spec(spec) + sys.modules["kernel.forward"] = lib + spec.loader.exec_module(lib) + + return lib.forward + + def _import_c_lib(self) -> Callable: + + dtype = "float32" if self.model.dtype == np.float32 else "float64" + + # add space order and dtype to properties + self.properties.space_order = self.model.space_order + self.properties.dtype = dtype + + shared_object = self.compiler.compile(self.file, self.properties) + + # load the library + lib = ctypes.cdll.LoadLibrary(shared_object) + + if self.properties.dtype == "float32": + + argtypes = [ + ndpointer(ctypes.c_float, flags="C_CONTIGUOUS"), # prev_u + ndpointer(ctypes.c_float, flags="C_CONTIGUOUS"), # next_u + ndpointer(ctypes.c_float, flags="C_CONTIGUOUS"), # vel_model + ndpointer(ctypes.c_float, flags="C_CONTIGUOUS"), # coefficients + ctypes.c_float, # d1 + ctypes.c_float, # d2 + ctypes.c_float, # d3 + ctypes.c_float, # dt + ctypes.c_int, # n1 + ctypes.c_int, # n2 + ctypes.c_int, # n3 + ctypes.c_int, # number of timesteps + ctypes.c_int, # radius of space order + ctypes.c_int, # block_size_1 + ctypes.c_int, # block_size_2 + ctypes.c_int, # block_size_3 + ] + + elif self.properties.dtype == "float64": + + argtypes = [ + ndpointer(ctypes.c_double, flags="C_CONTIGUOUS"), # prev_u + ndpointer(ctypes.c_double, flags="C_CONTIGUOUS"), # next_u + ndpointer(ctypes.c_double, flags="C_CONTIGUOUS"), # vel_model + ndpointer(ctypes.c_double, flags="C_CONTIGUOUS"), # coefficients + ctypes.c_double, # d1 + ctypes.c_double, # d2 + ctypes.c_double, # d3 + ctypes.c_double, # dt + ctypes.c_int, # n1 + ctypes.c_int, # n2 + ctypes.c_int, # n3 + ctypes.c_int, # number of timesteps + ctypes.c_int, # radius of space order + ctypes.c_int, # block_size_1 + ctypes.c_int, # block_size_2 + ctypes.c_int, # block_size_3 + ] + + else: + raise ValueError("Unkown float precision. Must be float32 or float64") + + forward = lib.forward + forward.restype = ctypes.c_int + forward.argtypes = argtypes + + return forward + + def _load_lib(self) -> Callable: + + if self.language == "python": + return self._import_python_lib() + else: + return self._import_c_lib() + + def read_data_from_hdf5(self, filename): + with h5py.File(filename, "r") as file: + exec_time = file["/execution_time"][()] + vector = file["/vector"][:] + return exec_time, vector + + def print_setup(self): + print("----------") + print("Language: ", self.language) + print("File: ", self.file) + print(f"Dtype: {self.model.dtype} ({self.properties.dtype})") + print("Grid size: ", self.model.grid_shape) + print("Grid spacing: ", self.model.grid_spacing) + print("Space order: ", self.model.space_order) + print("-DSTENCIL_RADIUS: ", self.properties.stencil_radius) + print("Iterations: ", self.model.num_timesteps) + print("Block sizes: ", self.properties.block3) + print("SM: ", self.compiler.sm) + print("Fast Math: ", self.compiler.fast_math) + print("----------") + + def run(self) -> Tuple[float, np.ndarray]: + # return execution time and last wavefield + + # load the lib + forward = self._load_lib() + + # get args + prev_u, next_u = self.model.u_arrays + n1, n2, n3 = self.model.grid_shape + d1, d2, d3 = self.model.grid_spacing + stencil_radius = self.model.space_order // 2 + + # print setup + self.print_setup() + + # Generate HDF5 file + + data = { + "prev_u": {"dataset_data": prev_u, "dataset_attributes": {}}, + "next_u": {"dataset_data": next_u, "dataset_attributes": {}}, + "vel_model": { + "dataset_data": self.model.velocity_model, + "dataset_attributes": {}, + }, + "coefficient": { + "dataset_data": self.model.stencil_coefficients, + "dataset_attributes": {}, + }, + "d1": {"dataset_data": d1, "dataset_attributes": {}}, + "d2": {"dataset_data": d2, "dataset_attributes": {}}, + "d3": {"dataset_data": d3, "dataset_attributes": {}}, + "dt": {"dataset_data": self.model.dt, "dataset_attributes": {}}, + "n1": {"dataset_data": n1, "dataset_attributes": {}}, + "n2": {"dataset_data": n2, "dataset_attributes": {}}, + "n3": {"dataset_data": n3, "dataset_attributes": {}}, + "iterations": { + "dataset_data": self.model.num_timesteps, + "dataset_attributes": {}, + }, + "stencil_radius": { + "dataset_data": stencil_radius, + "dataset_attributes": {}, + }, + "block_size_1": { + "dataset_data": self.properties.block_size_1, + "dataset_attributes": {}, + }, + "block_size_2": { + "dataset_data": self.properties.block_size_2, + "dataset_attributes": {}, + }, + "block_size_3": { + "dataset_data": self.properties.block_size_3, + "dataset_attributes": {}, + }, + } + + DatasetWriter.write_dataset(data, "c-frontend/data/miniwave_data.h5") + + KERNEL_SOURCE = self.file + KERNEL_HEADER = self.file.split('.')[0] + ".h" + CUDA_ARCH = self.compiler.sm + KERNEL_TYPE = self.language + + # Compile C code + subprocess.run("ls c-frontend", shell=True) + subprocess.run(f"cmake -S c-frontend -B c-frontend/build/ -DKERNEL_SOURCE={KERNEL_SOURCE} -DKERNEL_HEADER={KERNEL_HEADER} -DKERNEL_TYPE={KERNEL_TYPE} -DCUDA_ARCHITECTURE={CUDA_ARCH}", shell=True) + subprocess.run("cmake --build c-frontend/build/", shell=True) + + # run the forward function + if self.language == "ompc": + subprocess.run("mpirun -np 4 offload-mpi-worker : -np 1 ./c-frontend/build/miniwave", shell=True) + elif self.language == "mpi" or self.language == "mpi_cuda": + subprocess.run("mpirun -np 4 ./c-frontend/build/miniwave", shell=True) + else: + subprocess.run("./c-frontend/build/miniwave", shell=True) + + exec_time, next_u = self.read_data_from_hdf5("./c-frontend/data/results.h5") + + return exec_time[0], next_u diff --git a/miniwave/utils/model.py b/miniwave/utils/model.py new file mode 100644 index 0000000..8161528 --- /dev/null +++ b/miniwave/utils/model.py @@ -0,0 +1,173 @@ +from multiprocessing.sharedctypes import Value +import numpy as np +from typing import Tuple, Optional +import findiff + + +class Model: + """ + Encapsulates the both spatial and time model of the simulation. + + Parameters + ---------- + velocity_model : ndarray + Numpy 3-dimensional array with P wave velocity (m/s) profile. + grid_spacing : tuple of float + Grid spacing in meters in each axis (z, x, y). + dt : float. + Time step in seconds. + num_timesteps : int + Number of timesteps to run the simulation. + space_order : int, optional + Spatial order of the stencil. Accepts even orders. Default is 2. + dtype : str, optional + Float precision. Default is float64. + """ + def __init__(self, velocity_model: np.ndarray, grid_spacing: Tuple[float, float, float], + dt: float, num_timesteps: int, space_order: Optional[int] = 2, + dtype: Optional[str] = 'float64'): + + if dtype == 'float64': + self.dtype = np.float64 + elif dtype == 'float32': + self.dtype = np.float32 + else: + raise ValueError("Unknown dtype. It must be float32 or float64") + + self.velocity_model = velocity_model + self.grid_spacing = grid_spacing + self.num_timesteps = num_timesteps + self.space_order = space_order + self.dt = dt + + @property + def dtype(self) -> np.dtype: + return self._dtype + + @dtype.setter + def dtype(self, value: np.dtype) -> None: + self._dtype = value + + @property + def velocity_model(self) -> np.ndarray: + return self._velocity_model + + @velocity_model.setter + def velocity_model(self, value: np.ndarray): + + if value is None or value.ndim != 3: + raise ValueError("Velocity model must nd-array and 3-dimensional") + + self._velocity_model = self.dtype(value) + + @property + def grid_spacing(self) -> Tuple[float, float, float]: + return self._grid_spacing + + @grid_spacing.setter + def grid_spacing(self, value: Tuple[float, float, float]): + if len(value) != 3: + raise ValueError("Grid spacing must be 3-dimensional") + + self._grid_spacing = (self.dtype(value[0]), self.dtype(value[1]), self.dtype(value[2])) + + @property + def dt(self) -> float: + return self._dt + + @dt.setter + def dt(self, value: float): + if value < 0: + raise ValueError("Time step cannot be negative.") + elif value > self.critical_dt: + raise ValueError("Time step value violates CFL condition.") + else: + self._dt = self.dtype(value) + + @property + def num_timesteps(self) -> int: + return self._num_timesteps + + @num_timesteps.setter + def num_timesteps(self, value: int): + self._num_timesteps = value + + @property + def space_order(self) -> int: + return self._space_order + + @space_order.setter + def space_order(self, value: int): + self._space_order = value + + @property + def grid_shape(self) -> Tuple[int, int, int]: + return self.velocity_model.shape + + @property + def critical_dt(self) -> float: + """ + Calculate dt with CFL conditions + Based on https://library.seg.org/doi/pdf/10.1190/1.1444605 + for the acoustic case. + + Returns + ---------- + float + Critical dt in seconds. + """ + + # 2nd order in time + a1 = 4 + + # fixed 2nd time derivative + fd_coeffs = findiff.coefficients(deriv=2, acc=self.space_order)['center']['coefficients'] + + a2 = self.velocity_model.ndim * np.sum(np.abs(fd_coeffs)) + coeff = np.sqrt(a1 / a2) + dt = coeff * np.min(self.grid_spacing) / np.max(self.velocity_model) + + return self.dtype(dt) + + @property + def stencil_coefficients(self) -> np.ndarray: + # fixed second derivative + coeffs = findiff.coefficients(deriv=2, acc=self.space_order)['center']['coefficients'] + + # calculate the center point index + middle = len(coeffs) // 2 + + # coefficients starting from the center + coeffs = coeffs[middle:] + + return self.dtype(coeffs) + + @property + def grid(self) -> np.ndarray: + + base_grid = np.zeros(shape=self.grid_shape, dtype=self.dtype) + + return self._add_initial_source(base_grid) + + @property + def u_arrays(self) -> Tuple[np.ndarray, np.ndarray]: + # return prev_u and next_u arrays + + prev_u = self.grid + next_u = self.grid.copy() + + return prev_u, next_u + + + def _add_initial_source(self, grid: np.ndarray) -> np.ndarray: + + n1, n2, n3 = grid.shape + + val = 50.0 + + for s in range(4,-1,-1): + grid[n1//2-s:n1//2+s, n2//2-s:n2//2+s, n3//2-s:n3//2+s] = val + val *= 0.9 + + return grid + \ No newline at end of file diff --git a/miniwave/utils/plot.py b/miniwave/utils/plot.py new file mode 100644 index 0000000..8df78e1 --- /dev/null +++ b/miniwave/utils/plot.py @@ -0,0 +1,61 @@ +import numpy as np +import matplotlib.pyplot as plt +import os +from matplotlib import cm +from mpl_toolkits.axes_grid1 import make_axes_locatable + +def plot(wavefield, file_name="wavefield", colorbar=True, cmap="gray", extent=None, show=False, clim=[-5, 5]): + """ + Plot the wavefield. + + Parameters + ---------- + wavefield : ndarray + Wavefield data. + file_name : str, optional + Name of the image to be saved. + Default is wavefield. + colorbar : bool, optional + If True, show a colorbar. Default is True. + cmap : str, optional + The Colormap instance or registered colormap name + used to map scalar data to colors. + Default is gray. + extent : floats(left, right, bottom, top), optional + The bounding box in data coordinates that the image will fill. + show : bool, optional + If True, show the image on a pop up window. + Default is False. + clim : list + Set the color limits of the current image. + Default is (vmin=-5, vmax=5). + """ + + # create the destination dir + os.makedirs("plots", exist_ok=True) + + # process data and generate the plot + plot = plt.imshow(wavefield, cmap=cmap, extent=extent) + + m_unit = 'm' if extent is not None else 'points' + + # labels + plt.xlabel('Width ({})'.format(m_unit)) + plt.ylabel('Depth ({})'.format(m_unit)) + + # Create aligned colorbar on the right + if colorbar: + ax = plt.gca() + divider = make_axes_locatable(ax) + cax = divider.append_axes("right", size="5%", pad=0.05) + plt.colorbar(plot, cax=cax) + plt.clim(clim) + + plt.savefig("plots/{}.png".format(file_name), format="png") + + if show: + plt.show() + + plt.close() + + print("Wavefield saved in plots/{}.png".format(file_name)) \ No newline at end of file diff --git a/miniwave/utils/properties.py b/miniwave/utils/properties.py new file mode 100644 index 0000000..2927d9c --- /dev/null +++ b/miniwave/utils/properties.py @@ -0,0 +1,36 @@ +class Properties: + + def __init__(self, block_size_1: int, block_size_2: int, block_size_3: int): + + self.block_size_1 = block_size_1 + self.block_size_2 = block_size_2 + self.block_size_3 = block_size_3 + self._space_order = None + self._dtype = None + + @property + def block3(self): + return (self.block_size_1, self.block_size_2, self.block_size_3) + + @property + def dtype(self): + return self._dtype + + @dtype.setter + def dtype(self, value: str): + self._dtype = value + + @property + def space_order(self): + return self._space_order + + @space_order.setter + def space_order(self, value: str): + self._space_order = value + + @property + def stencil_radius(self): + if self.space_order is not None: + return self.space_order // 2 + + return None From decd4c623888da0603579952edd4c521832f833b Mon Sep 17 00:00:00 2001 From: GeorgeJuniorGG Date: Wed, 16 Apr 2025 11:53:44 -0300 Subject: [PATCH 2/2] chore: lint --- miniwave/miniwave.py | 124 +++++++++++++++++----------- miniwave/utils/__init__.py | 3 +- miniwave/utils/compiler.py | 134 ++++++++++++++++++------------- miniwave/utils/dataset_writer.py | 14 +++- miniwave/utils/kernel.py | 42 +++++++--- miniwave/utils/model.py | 133 ++++++++++++++++-------------- miniwave/utils/plot.py | 15 +++- miniwave/utils/properties.py | 31 ++++--- 8 files changed, 299 insertions(+), 197 deletions(-) diff --git a/miniwave/miniwave.py b/miniwave/miniwave.py index 3c04f3a..11d44af 100644 --- a/miniwave/miniwave.py +++ b/miniwave/miniwave.py @@ -4,102 +4,132 @@ from utils import Model, Compiler, Kernel, plot from utils.properties import Properties + def get_args(args=sys.argv[1:]): - + parser = argparse.ArgumentParser(description='How to use this program') - + parser.add_argument("--file", type=str, default='kernels/sequential.c', help="Path to the Kernel file") - + parser.add_argument("--grid_size", type=int, default=256, help="Grid size") - + parser.add_argument("--num_timesteps", type=int, default=400, help="Number of timesteps") - - parser.add_argument("--language", type=str, default="c", choices=['c', 'openmp', 'openmp_cpu', 'openacc', 'cuda', 'python', 'mpi', 'mpi_cuda', 'ompc'], - help="Language: c, openmp, openacc, cuda, python, ompc, mpi, mpi_cuda") - + + parser.add_argument( + "--language", + type=str, + default="c", + choices=[ + 'c', + 'openmp', + 'openmp_cpu', + 'openacc', + 'cuda', + 'python', + 'mpi', + 'mpi_cuda', + 'ompc' + ], + help="Language: c, openmp, openacc, cuda, python, ompc, mpi, mpi_cuda" + ) + parser.add_argument("--space_order", type=int, default=2, help="Space order") - + parser.add_argument("--block_size_1", type=int, default=1, help="GPU block size in the first axis") - + parser.add_argument("--block_size_2", type=int, default=1, help="GPU block size in the second axis") - + parser.add_argument("--block_size_3", type=int, default=1, - help="GPU block size in the third axis") - + help="GPU block size in the third axis") + parser.add_argument("--sm", type=int, default=75, - help="Cuda capability") - - parser.add_argument("--fast_math", default=False, action="store_true" , help="Enable --fast-math flag") - - parser.add_argument("--plot", default=False, action="store_true" , help="Enable ploting") - - parser.add_argument("--dtype", type=str, default="float64", help="Float Precision. float32 or float64 (default)") - - + help="Cuda capability") + + parser.add_argument( + "--fast_math", + default=False, + action="store_true", + help="Enable --fast-math flag" + ) + + parser.add_argument( + "--plot", + default=False, + action="store_true", + help="Enable ploting" + ) + + parser.add_argument( + "--dtype", + type=str, + default="float64", + help="Float Precision. float32 or float64 (default)" + ) + parsed_args = parser.parse_args(args) return parsed_args if __name__ == "__main__": - + args = get_args() - - # enable/disable fast math + + # enable/disable fast math fast_math = args.fast_math - + # cuda capability - sm = args.sm - + sm = args.sm + # language language = args.language - + # float precision dtype = args.dtype - + # create a compiler object compiler = Compiler(language=language, sm=sm, fast_math=fast_math) - + # define grid shape grid_size = (args.grid_size, args.grid_size, args.grid_size) - + vel_model = np.ones(shape=grid_size) * 1500.0 - + model = Model( velocity_model=vel_model, - grid_spacing=(10,10,10), + grid_spacing=(10, 10, 10), dt=0.002, num_timesteps=args.num_timesteps, space_order=args.space_order, dtype=dtype - ) - - # GPU block sizes + ) + + # GPU block sizes properties = Properties( block_size_1=args.block_size_1, - block_size_2=args.block_size_2, + block_size_2=args.block_size_2, block_size_3=args.block_size_3 - ) - + ) + solver = Kernel( - file=args.file, + file=args.file, model=model, compiler=compiler, properties=properties - ) - + ) + # run the kernel exec_time, u = solver.run() - + # plot a slice if args.plot: - slice = vel_model.shape[1] // 2 - plot(u[:,slice,:]) - + slice = vel_model.shape[1] // 2 + plot(u[:, slice, :]) + print(f"Execution time: {exec_time} seconds") diff --git a/miniwave/utils/__init__.py b/miniwave/utils/__init__.py index 648f716..809f6b8 100644 --- a/miniwave/utils/__init__.py +++ b/miniwave/utils/__init__.py @@ -1,5 +1,6 @@ +# flake8: noqa from .model import Model from .compiler import Compiler from .kernel import Kernel from .plot import plot -from .properties import Properties \ No newline at end of file +from .properties import Properties diff --git a/miniwave/utils/compiler.py b/miniwave/utils/compiler.py index 6e3bd57..6e103c9 100644 --- a/miniwave/utils/compiler.py +++ b/miniwave/utils/compiler.py @@ -3,70 +3,79 @@ from typing import Optional from utils.properties import Properties + class Compiler: """ Base class to implement the runtime compiler. - + Parameters ---------- language : str - Kernel language. + Kernel language. sm : int - Cuda capability. + Cuda capability. fast_math : bool - Enable fast_math. Default is False. + Enable fast_math. Default is False. """ - - def __init__(self, language: str, sm: int, fast_math: Optional[bool] = False): - self._language = language + + def __init__( + self, + language: str, + sm: int, + fast_math: Optional[bool] = False + ): + self._language = language self._sm = sm self._fast_math = fast_math - + self._define_default_flags() - + @property def language(self) -> str: return self._language - + @property def sm(self) -> int: return self._sm - + @property def fast_math(self) -> bool: - return self._fast_math + return self._fast_math @property def cc(self) -> str: return self._cc - + @cc.setter def cc(self, value: str) -> None: self._cc = value - + @property def flags(self) -> str: return self._flags - + @flags.setter def flags(self, value: str) -> None: - self._flags = value - + self._flags = value + def _define_default_flags(self) -> None: - + # fast math for GNU and CLANG if self.fast_math: fast_math = "-ffast-math" else: - fast_math = "" - + fast_math = "" + if self.language == 'c': self.cc = 'gcc' self.flags = f'-O3 -fPIC {fast_math} -shared' - + elif self.language == 'openmp': self.cc = 'clang' - self.flags = f'-O3 -fPIC {fast_math} -fopenmp -fopenmp-version=51 -fopenmp-targets=nvptx64 -Xopenmp-target -march=sm_{self.sm} -shared -I/usr/local/cuda/include -L/usr/local/cuda/lib64 -lcudart -g' + self.flags = f'-O3 -fPIC {fast_math} -fopenmp -fopenmp-version=51 \ + -fopenmp-targets=nvptx64 -Xopenmp-target -march=sm_{self.sm} \ + -shared -I/usr/local/cuda/include -L/usr/local/cuda/lib64 \ + -lcudart -g' elif self.language == 'mpi': self.cc = 'mpicc' @@ -74,85 +83,96 @@ def _define_default_flags(self) -> None: elif self.language == 'mpi_cuda': self.cc = 'mpicc' - self.flags = f'-O3 -fPIC {fast_math} -L/usr/local/cuda/lib64 -lcudart -I/usr/local/cuda/include --offload-arch=sm_{self.sm} -shared' - + self.flags = f'-O3 -fPIC {fast_math} -L/usr/local/cuda/lib64 \ + -lcudart -I/usr/local/cuda/include \ + --offload-arch=sm_{self.sm} -shared' + elif self.language == 'openmp_cpu': self.cc = 'gcc' self.flags = f'-O3 -fPIC {fast_math} -fopenmp -shared' - + elif self.language == 'openacc': - + if self.fast_math: - fast_math = ",fastmath" - + fast_math = ",fastmath" + self.cc = 'pgcc' - self.flags = f'-O3 -fPIC -acc:gpu -gpu=pinned{fast_math} -shared' - - elif self.language == 'cuda': + self.flags = f'-O3 -fPIC -acc:gpu -gpu=pinned{fast_math} -shared' + + elif self.language == 'cuda': if self.fast_math: fast_math = "--use_fast_math" - + self.cc = 'nvcc' - self.flags = f'-O3 -gencode arch=compute_{self.sm},code=sm_{self.sm} --compiler-options -fPIC,-Wall {fast_math} -shared' - + self.flags = f'-O3 -gencode \ + arch=compute_{self.sm},code=sm_{self.sm} \ + --compiler-options -fPIC,-Wall {fast_math} -shared' + elif self.language == 'python': pass elif self.language == 'ompc': self.cc = 'clang' - self.flags = f'-O3 -fPIC {fast_math} -fopenmp -fopenmp-targets=x86_64-pc-linux-gnu -shared' - + self.flags = f'-O3 -fPIC {fast_math} -fopenmp \ + -fopenmp-targets=x86_64-pc-linux-gnu -shared' + else: raise Exception("Language not supported") - - def compile(self, file: str, properties: Optional[Properties] = None) -> str: - + + def compile( + self, + file: str, + properties: Optional[Properties] = None + ) -> str: + object_dir = "/tmp/miniwave/" - + # create a dir to save the compiled shared object os.makedirs(object_dir, exist_ok=True) - + # get c file content with open(file, 'r', encoding='utf-8') as f: file_content = f.read() - - # float precision + + # float precision type = { 'float32': '-DFLOAT', 'float64': '-DDOUBLE' } - + float_precision = type[properties.dtype] - + b1, b2, b3 = properties.block3 stencil_radius = properties.stencil_radius - - c_def = f" -DBLOCK1={b1} -DBLOCK2={b2} -DBLOCK3={b3} -DSTENCIL_RADIUS={stencil_radius} {float_precision}" - - # add defined properties to flags + + c_def = f" -DBLOCK1={b1} -DBLOCK2={b2} -DBLOCK3={b3} \ + -DSTENCIL_RADIUS={stencil_radius} {float_precision}" + + # add defined properties to flags self.flags += c_def - + # compose the object string object_str = "{} {} {}".format(self.cc, self.flags, file_content) - + # apply sha1 hash to name the object hash = sha1() hash.update(object_str.encode()) object_name = hash.hexdigest() + ".so" - + # object complete path object_path = object_dir + object_name - + # check if object_file already exists if os.path.exists(object_path): print("Shared object already compiled in:", object_path) else: - cmd = self.cc + " " + file + " " + self.flags + " -o " + object_path - + cmd = self.cc + " " + file + " " + + self.flags + " -o " + object_path + print("Compilation command:", cmd) - + # execute the command if os.system(cmd) != 0: raise Exception("Compilation failed") - + return object_path diff --git a/miniwave/utils/dataset_writer.py b/miniwave/utils/dataset_writer.py index e412c21..681ffe3 100644 --- a/miniwave/utils/dataset_writer.py +++ b/miniwave/utils/dataset_writer.py @@ -8,7 +8,8 @@ def __init__(self) -> None: pass def write_dataset(data: dict, path: str): - """Writes HDF5 file from data. Non array data written to "scalar_data" dataset as attributes. + """Writes HDF5 file from data. + Non array data written to "scalar_data" dataset as attributes. :param data: dictionary containing data to be written. Object format: @@ -51,7 +52,11 @@ def write_dataset(data: dict, path: str): os.makedirs(dir) file = h5py.File(path, "w") # Create datasets - scalar_data_dataset = file.create_dataset("scalar_data", (1,), dtype="f") + scalar_data_dataset = file.create_dataset( + "scalar_data", + (1,), + dtype="f" + ) for key, value in data.items(): dataset_name = key @@ -60,7 +65,10 @@ def write_dataset(data: dict, path: str): dataset_attributes = dataset_properties["dataset_attributes"] if isinstance(dataset_data, np.ndarray): - dataset = file.create_dataset(name=dataset_name, data=dataset_data) + dataset = file.create_dataset( + name=dataset_name, + data=dataset_data + ) dataset.attrs.update(dataset_attributes) else: scalar_data_dataset.attrs[dataset_name] = str(dataset_data) diff --git a/miniwave/utils/kernel.py b/miniwave/utils/kernel.py index b007886..76a2bcb 100644 --- a/miniwave/utils/kernel.py +++ b/miniwave/utils/kernel.py @@ -7,7 +7,6 @@ from utils.model import Model from utils.compiler import Compiler from utils.properties import Properties -from time import time from utils.dataset_writer import DatasetWriter import subprocess import h5py @@ -53,7 +52,10 @@ def properties(self) -> Properties: def _import_python_lib(self) -> Callable: - spec = importlib.util.spec_from_file_location("kernel.forward", self.file) + spec = importlib.util.spec_from_file_location( + "kernel.forward", + self.file + ) lib = importlib.util.module_from_spec(spec) sys.modules["kernel.forward"] = lib spec.loader.exec_module(lib) @@ -79,7 +81,7 @@ def _import_c_lib(self) -> Callable: ndpointer(ctypes.c_float, flags="C_CONTIGUOUS"), # prev_u ndpointer(ctypes.c_float, flags="C_CONTIGUOUS"), # next_u ndpointer(ctypes.c_float, flags="C_CONTIGUOUS"), # vel_model - ndpointer(ctypes.c_float, flags="C_CONTIGUOUS"), # coefficients + ndpointer(ctypes.c_float, flags="C_CONTIGUOUS"), # coeffs ctypes.c_float, # d1 ctypes.c_float, # d2 ctypes.c_float, # d3 @@ -100,7 +102,7 @@ def _import_c_lib(self) -> Callable: ndpointer(ctypes.c_double, flags="C_CONTIGUOUS"), # prev_u ndpointer(ctypes.c_double, flags="C_CONTIGUOUS"), # next_u ndpointer(ctypes.c_double, flags="C_CONTIGUOUS"), # vel_model - ndpointer(ctypes.c_double, flags="C_CONTIGUOUS"), # coefficients + ndpointer(ctypes.c_double, flags="C_CONTIGUOUS"), # coeffs ctypes.c_double, # d1 ctypes.c_double, # d2 ctypes.c_double, # d3 @@ -116,7 +118,9 @@ def _import_c_lib(self) -> Callable: ] else: - raise ValueError("Unkown float precision. Must be float32 or float64") + raise ValueError( + "Unkown float precision. Must be float32 or float64" + ) forward = lib.forward forward.restype = ctypes.c_int @@ -156,7 +160,7 @@ def run(self) -> Tuple[float, np.ndarray]: # return execution time and last wavefield # load the lib - forward = self._load_lib() + self._load_lib() # get args prev_u, next_u = self.model.u_arrays @@ -212,23 +216,37 @@ def run(self) -> Tuple[float, np.ndarray]: DatasetWriter.write_dataset(data, "c-frontend/data/miniwave_data.h5") KERNEL_SOURCE = self.file - KERNEL_HEADER = self.file.split('.')[0] + ".h" + KERNEL_HEADER = self.file.split('.')[0] + ".h" CUDA_ARCH = self.compiler.sm KERNEL_TYPE = self.language - + # Compile C code subprocess.run("ls c-frontend", shell=True) - subprocess.run(f"cmake -S c-frontend -B c-frontend/build/ -DKERNEL_SOURCE={KERNEL_SOURCE} -DKERNEL_HEADER={KERNEL_HEADER} -DKERNEL_TYPE={KERNEL_TYPE} -DCUDA_ARCHITECTURE={CUDA_ARCH}", shell=True) + subprocess.run( + f"cmake -S c-frontend -B c-frontend/build/ \ + -DKERNEL_SOURCE={KERNEL_SOURCE} -DKERNEL_HEADER={KERNEL_HEADER} \ + -DKERNEL_TYPE={KERNEL_TYPE} -DCUDA_ARCHITECTURE={CUDA_ARCH}", + shell=True + ) subprocess.run("cmake --build c-frontend/build/", shell=True) # run the forward function if self.language == "ompc": - subprocess.run("mpirun -np 4 offload-mpi-worker : -np 1 ./c-frontend/build/miniwave", shell=True) + subprocess.run( + "mpirun -np 4 offload-mpi-worker : \ + -np 1 ./c-frontend/build/miniwave", + shell=True + ) elif self.language == "mpi" or self.language == "mpi_cuda": - subprocess.run("mpirun -np 4 ./c-frontend/build/miniwave", shell=True) + subprocess.run( + "mpirun -np 4 ./c-frontend/build/miniwave", + shell=True + ) else: subprocess.run("./c-frontend/build/miniwave", shell=True) - exec_time, next_u = self.read_data_from_hdf5("./c-frontend/data/results.h5") + exec_time, next_u = self.read_data_from_hdf5( + "./c-frontend/data/results.h5" + ) return exec_time[0], next_u diff --git a/miniwave/utils/model.py b/miniwave/utils/model.py index 8161528..a99e2ea 100644 --- a/miniwave/utils/model.py +++ b/miniwave/utils/model.py @@ -1,4 +1,3 @@ -from multiprocessing.sharedctypes import Value import numpy as np from typing import Tuple, Optional import findiff @@ -7,167 +6,181 @@ class Model: """ Encapsulates the both spatial and time model of the simulation. - + Parameters ---------- velocity_model : ndarray - Numpy 3-dimensional array with P wave velocity (m/s) profile. + Numpy 3-dimensional array with P wave velocity (m/s) profile. grid_spacing : tuple of float - Grid spacing in meters in each axis (z, x, y). + Grid spacing in meters in each axis (z, x, y). dt : float. Time step in seconds. num_timesteps : int Number of timesteps to run the simulation. space_order : int, optional - Spatial order of the stencil. Accepts even orders. Default is 2. + Spatial order of the stencil. Accepts even orders. Default is 2. dtype : str, optional - Float precision. Default is float64. + Float precision. Default is float64. """ - def __init__(self, velocity_model: np.ndarray, grid_spacing: Tuple[float, float, float], - dt: float, num_timesteps: int, space_order: Optional[int] = 2, - dtype: Optional[str] = 'float64'): - + def __init__( + self, + velocity_model: np.ndarray, + grid_spacing: Tuple[float, float, float], + dt: float, + num_timesteps: int, + space_order: Optional[int] = 2, + dtype: Optional[str] = 'float64' + ): + if dtype == 'float64': self.dtype = np.float64 elif dtype == 'float32': self.dtype = np.float32 else: - raise ValueError("Unknown dtype. It must be float32 or float64") - + raise ValueError("Unknown dtype. It must be float32 or float64") + self.velocity_model = velocity_model - self.grid_spacing = grid_spacing + self.grid_spacing = grid_spacing self.num_timesteps = num_timesteps self.space_order = space_order self.dt = dt - + @property def dtype(self) -> np.dtype: return self._dtype - + @dtype.setter def dtype(self, value: np.dtype) -> None: self._dtype = value - + @property def velocity_model(self) -> np.ndarray: return self._velocity_model - + @velocity_model.setter def velocity_model(self, value: np.ndarray): - + if value is None or value.ndim != 3: raise ValueError("Velocity model must nd-array and 3-dimensional") - + self._velocity_model = self.dtype(value) - + @property def grid_spacing(self) -> Tuple[float, float, float]: return self._grid_spacing - + @grid_spacing.setter def grid_spacing(self, value: Tuple[float, float, float]): if len(value) != 3: raise ValueError("Grid spacing must be 3-dimensional") - - self._grid_spacing = (self.dtype(value[0]), self.dtype(value[1]), self.dtype(value[2])) - + + self._grid_spacing = ( + self.dtype(value[0]), + self.dtype(value[1]), + self.dtype(value[2]) + ) + @property def dt(self) -> float: return self._dt - + @dt.setter - def dt(self, value: float): + def dt(self, value: float): if value < 0: raise ValueError("Time step cannot be negative.") elif value > self.critical_dt: raise ValueError("Time step value violates CFL condition.") else: - self._dt = self.dtype(value) + self._dt = self.dtype(value) @property def num_timesteps(self) -> int: return self._num_timesteps - + @num_timesteps.setter def num_timesteps(self, value: int): self._num_timesteps = value - + @property def space_order(self) -> int: return self._space_order - + @space_order.setter def space_order(self, value: int): self._space_order = value - + @property def grid_shape(self) -> Tuple[int, int, int]: return self.velocity_model.shape - + @property def critical_dt(self) -> float: """ Calculate dt with CFL conditions Based on https://library.seg.org/doi/pdf/10.1190/1.1444605 for the acoustic case. - + Returns ---------- float Critical dt in seconds. """ - + # 2nd order in time a1 = 4 - + # fixed 2nd time derivative - fd_coeffs = findiff.coefficients(deriv=2, acc=self.space_order)['center']['coefficients'] - + fd_coeffs = findiff.coefficients( + deriv=2, + acc=self.space_order + )['center']['coefficients'] + a2 = self.velocity_model.ndim * np.sum(np.abs(fd_coeffs)) - coeff = np.sqrt(a1 / a2) + coeff = np.sqrt(a1 / a2) dt = coeff * np.min(self.grid_spacing) / np.max(self.velocity_model) - + return self.dtype(dt) - + @property def stencil_coefficients(self) -> np.ndarray: # fixed second derivative - coeffs = findiff.coefficients(deriv=2, acc=self.space_order)['center']['coefficients'] - + coeffs = findiff.coefficients( + deriv=2, + acc=self.space_order + )['center']['coefficients'] + # calculate the center point index middle = len(coeffs) // 2 - - # coefficients starting from the center + + # coefficients starting from the center coeffs = coeffs[middle:] - + return self.dtype(coeffs) - + @property def grid(self) -> np.ndarray: - + base_grid = np.zeros(shape=self.grid_shape, dtype=self.dtype) - + return self._add_initial_source(base_grid) - + @property def u_arrays(self) -> Tuple[np.ndarray, np.ndarray]: # return prev_u and next_u arrays - - prev_u = self.grid + + prev_u = self.grid next_u = self.grid.copy() - + return prev_u, next_u - - + def _add_initial_source(self, grid: np.ndarray) -> np.ndarray: - + n1, n2, n3 = grid.shape - + val = 50.0 - - for s in range(4,-1,-1): + + for s in range(4, -1, -1): grid[n1//2-s:n1//2+s, n2//2-s:n2//2+s, n3//2-s:n3//2+s] = val val *= 0.9 - + return grid - \ No newline at end of file diff --git a/miniwave/utils/plot.py b/miniwave/utils/plot.py index 8df78e1..89bb5cf 100644 --- a/miniwave/utils/plot.py +++ b/miniwave/utils/plot.py @@ -1,10 +1,17 @@ -import numpy as np import matplotlib.pyplot as plt import os -from matplotlib import cm from mpl_toolkits.axes_grid1 import make_axes_locatable -def plot(wavefield, file_name="wavefield", colorbar=True, cmap="gray", extent=None, show=False, clim=[-5, 5]): + +def plot( + wavefield, + file_name="wavefield", + colorbar=True, + cmap="gray", + extent=None, + show=False, + clim=[-5, 5] +): """ Plot the wavefield. @@ -58,4 +65,4 @@ def plot(wavefield, file_name="wavefield", colorbar=True, cmap="gray", extent=No plt.close() - print("Wavefield saved in plots/{}.png".format(file_name)) \ No newline at end of file + print("Wavefield saved in plots/{}.png".format(file_name)) diff --git a/miniwave/utils/properties.py b/miniwave/utils/properties.py index 2927d9c..bbe5af7 100644 --- a/miniwave/utils/properties.py +++ b/miniwave/utils/properties.py @@ -1,36 +1,41 @@ class Properties: - - def __init__(self, block_size_1: int, block_size_2: int, block_size_3: int): - + + def __init__( + self, + block_size_1: int, + block_size_2: int, + block_size_3: int + ): + self.block_size_1 = block_size_1 self.block_size_2 = block_size_2 self.block_size_3 = block_size_3 - self._space_order = None + self._space_order = None self._dtype = None - + @property def block3(self): return (self.block_size_1, self.block_size_2, self.block_size_3) - + @property def dtype(self): - return self._dtype - + return self._dtype + @dtype.setter def dtype(self, value: str): self._dtype = value - + @property def space_order(self): - return self._space_order - + return self._space_order + @space_order.setter def space_order(self, value: str): self._space_order = value - + @property def stencil_radius(self): if self.space_order is not None: return self.space_order // 2 - + return None