diff --git a/miniwave/.gitignore b/miniwave/.gitignore new file mode 100644 index 0000000..740a746 --- /dev/null +++ b/miniwave/.gitignore @@ -0,0 +1,9 @@ +plots/* +R-miniwave* +run.sh +*.nvvp +__pycache__ +tmp/ +*.sif +data/ +build/ \ No newline at end of file diff --git a/miniwave/README.md b/miniwave/README.md new file mode 100644 index 0000000..b4b1e66 --- /dev/null +++ b/miniwave/README.md @@ -0,0 +1,25 @@ +# Miniwave - Minimum Simwave + +Python wrapper for forward wave propagation. + +# Dependencies + +`pip install numpy matplotlib findiff h5py` + +``` +mkdir /hdf5 && \ + cd /hdf5 && \ + wget https://support.hdfgroup.org/ftp/HDF5/releases/hdf5-1.14/hdf5-1.14.3/src/CMake-hdf5-1.14.3.tar.gz && \ + tar xf CMake-hdf5-1.14.3.tar.gz && \ + cd CMake-hdf5-1.14.3 && \ + ./build-unix.sh && \ + yes | ./HDF5-1.14.3-Linux.sh && \ + cp -r HDF5-1.14.3-Linux/HDF_Group/HDF5/1.14.3/ ../build +``` + +# How to use + +`python3 miniwave.py --help` + +`python3 miniwave.py --file FILE --grid_size GRID_SIZE --num_timesteps NUM_TIMESTEPS --language {c,openmp,openacc,cuda,python} --space_order SPACE_ORDER` + diff --git a/miniwave/c-frontend/CMakeLists.txt b/miniwave/c-frontend/CMakeLists.txt new file mode 100644 index 0000000..c507c2f --- /dev/null +++ b/miniwave/c-frontend/CMakeLists.txt @@ -0,0 +1,19 @@ +cmake_minimum_required(VERSION 3.18) + +project(miniwave) + +# Set the kernel source file and header +configure_file(${CMAKE_SOURCE_DIR}/selected_kernel.h.in + ${CMAKE_BINARY_DIR}/selected_kernel.h) +include_directories(${CMAKE_BINARY_DIR}) + +add_executable(miniwave ../${KERNEL_SOURCE} miniwave.c) + +# Include miniwave header files (sequential.h) +target_include_directories(miniwave PUBLIC ${CMAKE_SOURCE_DIR}/../kernels) + +# HDF5 commands +set(HDF5DIR "$ENV{HOME}/hdf5/build") +target_include_directories(miniwave PRIVATE ${HDF5DIR}/include) +target_link_directories(miniwave PRIVATE ${HDF5DIR}/lib) +target_link_libraries(miniwave PRIVATE libhdf5.so libhdf5_cpp.so) diff --git a/miniwave/c-frontend/miniwave.c b/miniwave/c-frontend/miniwave.c new file mode 100644 index 0000000..7f153d3 --- /dev/null +++ b/miniwave/c-frontend/miniwave.c @@ -0,0 +1,238 @@ +#include +#include +#include + +#include "hdf5.h" +#ifdef CONTAINS_MPI + #include "mpi.h" +#endif +#include "selected_kernel.h" + +hid_t open_hdf5_file(char *file) { + return H5Fopen(file, H5F_ACC_RDONLY, H5P_DEFAULT); +} +hid_t open_hdf5_dataset(hid_t file_id, char *dataset) { + return H5Dopen2(file_id, dataset, H5P_DEFAULT); +} +float *read_float_dataset(hid_t dataset_id) { + hid_t dataspace; + hsize_t dims_out[10]; + int rank, total_size; + + dataspace = H5Dget_space(dataset_id); /* dataspace handle */ + rank = H5Sget_simple_extent_ndims(dataspace); + H5Sget_simple_extent_dims(dataspace, dims_out, NULL); + + total_size = 1; + for (size_t i = 0; i < rank; i++) { + total_size *= dims_out[i]; + } + + float *dset_data = malloc(sizeof(float) * total_size); + H5Dread(dataset_id, H5T_NATIVE_FLOAT, H5S_ALL, H5S_ALL, H5P_DEFAULT, + dset_data); + return dset_data; +} +float read_float_attribute(hid_t dataset_id, char *attribute_name) { + // Gets attribute value from a dataset in a file + // Ex: file example_data.h5 has a dataset named scalar_data + // which has an attribute named 'attribute_X'. To get + // the value of this attribute: + // + // float attribute_X = + // read_float_attribute(scalar_data_dataset_id, "attribute_X"); + + + const char *attribute_value; + // Get attribute value as string + hid_t attribute_id = H5Aopen(dataset_id, attribute_name, H5P_DEFAULT); + hid_t attribute_type = H5Aget_type(attribute_id); + H5Aread(attribute_id, attribute_type, &attribute_value); + // Convert attribute value to float + return atof(attribute_value); +} +void close_hdf5_dataset(hid_t dataset_id) { H5Dclose(dataset_id); } +void close_hdf5_file(hid_t file_id) { H5Fclose(file_id); } + +int write_hdf5_result(int n1, int n2, int n3, double execution_time, f_type* next_u) { + hid_t h5t_type = H5T_NATIVE_FLOAT; + #if defined(DOUBLE) + h5t_type = H5T_NATIVE_DOUBLE; + #endif + + // Create a new HDF5 file using the default properties + hid_t file_id = H5Fcreate("c-frontend/data/results.h5", H5F_ACC_TRUNC, H5P_DEFAULT, H5P_DEFAULT); + if (file_id < 0) { + printf("Error creating file.\n"); + return 1; + } + + // Define the dataspace for the vector dataset + hsize_t vector_dims[3] = {n1,n2,n3}; // 3D vector + hid_t vector_dataspace_id = H5Screate_simple(3, vector_dims, NULL); + if (vector_dataspace_id < 0) { + printf("Error creating vector dataspace.\n"); + H5Fclose(file_id); + return 1; + } + + // Create the vector dataset with default properties + hid_t vector_dataset_id = H5Dcreate(file_id, "vector", h5t_type, vector_dataspace_id, + H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); + if (vector_dataset_id < 0) { + printf("Error creating vector dataset.\n"); + H5Sclose(vector_dataspace_id); + H5Fclose(file_id); + return 1; + } + + // Write the vector data to the dataset + if (H5Dwrite(vector_dataset_id, h5t_type, H5S_ALL, H5S_ALL, H5P_DEFAULT, next_u) < 0) { + printf("Error writing vector data.\n"); + H5Dclose(vector_dataset_id); + H5Sclose(vector_dataspace_id); + H5Fclose(file_id); + return 1; + } + + // Define the dataspace for the execution time dataset + hsize_t time_dims[1] = {1}; // Scalar + hid_t time_dataspace_id = H5Screate_simple(1, time_dims, NULL); + if (time_dataspace_id < 0) { + printf("Error creating time dataspace.\n"); + H5Dclose(vector_dataset_id); + H5Sclose(vector_dataspace_id); + H5Fclose(file_id); + return 1; + } + + // Create the execution time dataset with default properties + hid_t time_dataset_id = H5Dcreate(file_id, "execution_time", H5T_NATIVE_DOUBLE, time_dataspace_id, + H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); + if (time_dataset_id < 0) { + printf("Error creating time dataset.\n"); + H5Dclose(vector_dataset_id); + H5Sclose(vector_dataspace_id); + H5Sclose(time_dataspace_id); + H5Fclose(file_id); + return 1; + } + + // Write the execution time to the dataset + if (H5Dwrite(time_dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, &execution_time) < 0) { + printf("Error writing time data.\n"); + H5Dclose(vector_dataset_id); + H5Sclose(vector_dataspace_id); + H5Dclose(time_dataset_id); + H5Sclose(time_dataspace_id); + H5Fclose(file_id); + return 1; + } + + // Close the datasets, dataspaces, and file + H5Dclose(vector_dataset_id); + H5Sclose(vector_dataspace_id); + H5Dclose(time_dataset_id); + H5Sclose(time_dataspace_id); + H5Fclose(file_id); + return 0; +} + +int main() { + #ifdef CONTAINS_MPI + MPI_Init(NULL, NULL); + #endif + + int rank = 0; + + #ifdef CONTAINS_MPI + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + #endif + + // Get the file id + hid_t file_id = open_hdf5_file("c-frontend/data/miniwave_data.h5"); + // Read arguments + hid_t vel_model_id = open_hdf5_dataset(file_id, "vel_model"); + hid_t next_u_id = open_hdf5_dataset(file_id, "next_u"); + hid_t prev_u_id = open_hdf5_dataset(file_id, "prev_u"); + hid_t coefficient_id = open_hdf5_dataset(file_id, "coefficient"); + hid_t scalar_data_id = open_hdf5_dataset(file_id, "scalar_data"); + float *vel_model = read_float_dataset(vel_model_id); + float *next_u = read_float_dataset(next_u_id); + float *prev_u = read_float_dataset(prev_u_id); + float *coefficient = read_float_dataset(coefficient_id); + float block_size_1 = read_float_attribute(scalar_data_id, "block_size_1"); + float block_size_2 = read_float_attribute(scalar_data_id, "block_size_2"); + float block_size_3 = read_float_attribute(scalar_data_id, "block_size_3"); + float d1 = read_float_attribute(scalar_data_id, "d1"); + float d2 = read_float_attribute(scalar_data_id, "d2"); + float d3 = read_float_attribute(scalar_data_id, "d3"); + float dt = read_float_attribute(scalar_data_id, "dt"); + float iterations = read_float_attribute(scalar_data_id, "iterations"); + float n1 = read_float_attribute(scalar_data_id, "n1"); + float n2 = read_float_attribute(scalar_data_id, "n2"); + float n3 = read_float_attribute(scalar_data_id, "n3"); + float stencil_radius = read_float_attribute(scalar_data_id, "stencil_radius"); + + if (rank == 0) { + printf("vel_model[0:50]:\n"); + for (size_t i = 0; i < 50; i++) { + printf("%f ", vel_model[i]); + } + printf("\n"); + printf("next_u[0:50]:\n"); + for (size_t i = 0; i < 50; i++) { + printf("%lf ", next_u[i]); + } + printf("\n"); + printf("prev_u[0:50]:\n"); + for (size_t i = 0; i < 50; i++) { + printf("%lf ", prev_u[i]); + } + printf("\n"); + printf("coefficient:\n"); + for (size_t i = 0; i < 2; i++) { + printf("%lf ", coefficient[i]); + } + printf("\n"); + printf("block_size_1: %f\n",block_size_1); + printf("block_size_2: %f\n",block_size_2); + printf("block_size_3: %f\n",block_size_3); + printf("d1: %f\n",d1); + printf("d2: %f\n",d2); + printf("d3: %f\n",d3); + printf("dt: %f\n",dt); + printf("iterations: %f\n",iterations); + printf("n1: %f\n",n1); + printf("n2: %f\n",n2); + printf("n3: %f\n",n3); + printf("stencil_radius: %f\n",stencil_radius); + } + + clock_t start_time = clock(); + + forward(prev_u, next_u, vel_model, coefficient, d1, d2, d3, dt, n1, n2, n3, iterations, stencil_radius, block_size_1, block_size_2, block_size_3); + + clock_t end_time = clock(); + double execution_time = ((double)(end_time - start_time)) / CLOCKS_PER_SEC; + + if (rank == 0) { + printf("\nprev_u[0:50]"); + for (size_t i = 0; i < 50; i++) { + printf("%lf ", prev_u[i]); + } + printf("\n"); + + write_hdf5_result(n1, n2, n3, execution_time, next_u); + } + + close_hdf5_dataset(vel_model_id); + close_hdf5_dataset(next_u_id); + close_hdf5_dataset(prev_u_id); + close_hdf5_dataset(coefficient_id); + close_hdf5_file(file_id); + + #ifdef CONTAINS_MPI + MPI_Finalize(); + #endif +} \ No newline at end of file diff --git a/miniwave/c-frontend/selected_kernel.h.in b/miniwave/c-frontend/selected_kernel.h.in new file mode 100644 index 0000000..f87a535 --- /dev/null +++ b/miniwave/c-frontend/selected_kernel.h.in @@ -0,0 +1,6 @@ +#ifndef SELECTED_KERNEL_H +#define SELECTED_KERNEL_H + +#include "../@KERNEL_HEADER@" + +#endif // SELECTED_KERNEL_H diff --git a/miniwave/kernels/sequential.c b/miniwave/kernels/sequential.c new file mode 100644 index 0000000..b14f787 --- /dev/null +++ b/miniwave/kernels/sequential.c @@ -0,0 +1,49 @@ +#include +#include + +#include "sequential.h" + +int forward(f_type *prev_u, f_type *next_u, f_type *vel_model, f_type *coefficient, + f_type d1, f_type d2, f_type d3, f_type dt, int n1, int n2, int n3, + int iterations, int stencil_radius, + int block_size_1, int block_size_2, int block_size_3 + ){ + + f_type d1Squared = d1 * d1; + f_type d2Squared = d2 * d2; + f_type d3Squared = d3 * d3; + f_type dtSquared = dt * dt; + + for(int t = 0; t < iterations; t++) { + + for(int i = stencil_radius; i < n1 - stencil_radius; i++){ + for(int j = stencil_radius; j < n2 - stencil_radius; j++){ + for(int k = stencil_radius; k < n3 - stencil_radius; k++){ + // index of the current point in the grid + int current = (i * n2 + j) * n3 + k; + + // stencil code to update grid + f_type value = coefficient[0] * (prev_u[current]/d1Squared + prev_u[current]/d2Squared + prev_u[current]/d3Squared); + + // radius of the stencil + for(int ir = 1; ir <= stencil_radius; ir++){ + value += coefficient[ir] * ( + ( (prev_u[current + ir] + prev_u[current - ir]) / d3Squared ) + //neighbors in the third axis + ( (prev_u[current + (ir * n3)] + prev_u[current - (ir * n3)]) / d2Squared ) + //neighbors in the second axis + ( (prev_u[current + (ir * n2 * n3)] + prev_u[current - (ir * n2 * n3)]) / d1Squared )); //neighbors in the first axis + } + + value *= dtSquared * vel_model[current] * vel_model[current]; + next_u[current] = 2.0 * prev_u[current] - next_u[current] + value; + } + } + } + + // swap arrays for next iteration + f_type *swap = next_u; + next_u = prev_u; + prev_u = swap; + } + + return 0; +} diff --git a/miniwave/kernels/sequential.h b/miniwave/kernels/sequential.h new file mode 100644 index 0000000..63a3f8c --- /dev/null +++ b/miniwave/kernels/sequential.h @@ -0,0 +1,16 @@ + +// use single (float) or double precision +// according to the value passed in the compilation cmd +#if defined(FLOAT) + typedef float f_type; +#elif defined(DOUBLE) + typedef double f_type; +#else + typedef float f_type; +#endif + +int forward(f_type *prev_u, f_type *next_u, f_type *vel_model, f_type *coefficient, + f_type d1, f_type d2, f_type d3, f_type dt, int n1, int n2, int n3, + int iterations, int stencil_radius, + int block_size_1, int block_size_2, int block_size_3 + ); \ No newline at end of file diff --git a/miniwave/miniwave.py b/miniwave/miniwave.py new file mode 100644 index 0000000..11d44af --- /dev/null +++ b/miniwave/miniwave.py @@ -0,0 +1,135 @@ +import numpy as np +import argparse +import sys +from utils import Model, Compiler, Kernel, plot +from utils.properties import Properties + + +def get_args(args=sys.argv[1:]): + + parser = argparse.ArgumentParser(description='How to use this program') + + parser.add_argument("--file", type=str, default='kernels/sequential.c', + help="Path to the Kernel file") + + parser.add_argument("--grid_size", type=int, default=256, + help="Grid size") + + parser.add_argument("--num_timesteps", type=int, default=400, + help="Number of timesteps") + + parser.add_argument( + "--language", + type=str, + default="c", + choices=[ + 'c', + 'openmp', + 'openmp_cpu', + 'openacc', + 'cuda', + 'python', + 'mpi', + 'mpi_cuda', + 'ompc' + ], + help="Language: c, openmp, openacc, cuda, python, ompc, mpi, mpi_cuda" + ) + + parser.add_argument("--space_order", type=int, default=2, + help="Space order") + + parser.add_argument("--block_size_1", type=int, default=1, + help="GPU block size in the first axis") + + parser.add_argument("--block_size_2", type=int, default=1, + help="GPU block size in the second axis") + + parser.add_argument("--block_size_3", type=int, default=1, + help="GPU block size in the third axis") + + parser.add_argument("--sm", type=int, default=75, + help="Cuda capability") + + parser.add_argument( + "--fast_math", + default=False, + action="store_true", + help="Enable --fast-math flag" + ) + + parser.add_argument( + "--plot", + default=False, + action="store_true", + help="Enable ploting" + ) + + parser.add_argument( + "--dtype", + type=str, + default="float64", + help="Float Precision. float32 or float64 (default)" + ) + + parsed_args = parser.parse_args(args) + + return parsed_args + + +if __name__ == "__main__": + + args = get_args() + + # enable/disable fast math + fast_math = args.fast_math + + # cuda capability + sm = args.sm + + # language + language = args.language + + # float precision + dtype = args.dtype + + # create a compiler object + compiler = Compiler(language=language, sm=sm, fast_math=fast_math) + + # define grid shape + grid_size = (args.grid_size, args.grid_size, args.grid_size) + + vel_model = np.ones(shape=grid_size) * 1500.0 + + model = Model( + velocity_model=vel_model, + grid_spacing=(10, 10, 10), + dt=0.002, + num_timesteps=args.num_timesteps, + space_order=args.space_order, + dtype=dtype + ) + + # GPU block sizes + properties = Properties( + block_size_1=args.block_size_1, + block_size_2=args.block_size_2, + block_size_3=args.block_size_3 + ) + + solver = Kernel( + file=args.file, + model=model, + compiler=compiler, + properties=properties + ) + + # run the kernel + exec_time, u = solver.run() + + # plot a slice + if args.plot: + slice = vel_model.shape[1] // 2 + plot(u[:, slice, :]) + + print(f"Execution time: {exec_time} seconds") diff --git a/miniwave/selected_kernel.h b/miniwave/selected_kernel.h new file mode 100644 index 0000000..883111b --- /dev/null +++ b/miniwave/selected_kernel.h @@ -0,0 +1,6 @@ +#ifndef SELECTED_KERNEL_H +#define SELECTED_KERNEL_H + +#include "{KERNEL_HEADER}" + +#endif // SELECTED_KERNEL_H diff --git a/miniwave/utils/__init__.py b/miniwave/utils/__init__.py new file mode 100644 index 0000000..809f6b8 --- /dev/null +++ b/miniwave/utils/__init__.py @@ -0,0 +1,6 @@ +# flake8: noqa +from .model import Model +from .compiler import Compiler +from .kernel import Kernel +from .plot import plot +from .properties import Properties diff --git a/miniwave/utils/compiler.py b/miniwave/utils/compiler.py new file mode 100644 index 0000000..6e103c9 --- /dev/null +++ b/miniwave/utils/compiler.py @@ -0,0 +1,178 @@ +import os +from hashlib import sha1 +from typing import Optional +from utils.properties import Properties + + +class Compiler: + """ + Base class to implement the runtime compiler. + + Parameters + ---------- + language : str + Kernel language. + sm : int + Cuda capability. + fast_math : bool + Enable fast_math. Default is False. + """ + + def __init__( + self, + language: str, + sm: int, + fast_math: Optional[bool] = False + ): + self._language = language + self._sm = sm + self._fast_math = fast_math + + self._define_default_flags() + + @property + def language(self) -> str: + return self._language + + @property + def sm(self) -> int: + return self._sm + + @property + def fast_math(self) -> bool: + return self._fast_math + + @property + def cc(self) -> str: + return self._cc + + @cc.setter + def cc(self, value: str) -> None: + self._cc = value + + @property + def flags(self) -> str: + return self._flags + + @flags.setter + def flags(self, value: str) -> None: + self._flags = value + + def _define_default_flags(self) -> None: + + # fast math for GNU and CLANG + if self.fast_math: + fast_math = "-ffast-math" + else: + fast_math = "" + + if self.language == 'c': + self.cc = 'gcc' + self.flags = f'-O3 -fPIC {fast_math} -shared' + + elif self.language == 'openmp': + self.cc = 'clang' + self.flags = f'-O3 -fPIC {fast_math} -fopenmp -fopenmp-version=51 \ + -fopenmp-targets=nvptx64 -Xopenmp-target -march=sm_{self.sm} \ + -shared -I/usr/local/cuda/include -L/usr/local/cuda/lib64 \ + -lcudart -g' + + elif self.language == 'mpi': + self.cc = 'mpicc' + self.flags = f'-O3 -fPIC {fast_math} -shared' + + elif self.language == 'mpi_cuda': + self.cc = 'mpicc' + self.flags = f'-O3 -fPIC {fast_math} -L/usr/local/cuda/lib64 \ + -lcudart -I/usr/local/cuda/include \ + --offload-arch=sm_{self.sm} -shared' + + elif self.language == 'openmp_cpu': + self.cc = 'gcc' + self.flags = f'-O3 -fPIC {fast_math} -fopenmp -shared' + + elif self.language == 'openacc': + + if self.fast_math: + fast_math = ",fastmath" + + self.cc = 'pgcc' + self.flags = f'-O3 -fPIC -acc:gpu -gpu=pinned{fast_math} -shared' + + elif self.language == 'cuda': + if self.fast_math: + fast_math = "--use_fast_math" + + self.cc = 'nvcc' + self.flags = f'-O3 -gencode \ + arch=compute_{self.sm},code=sm_{self.sm} \ + --compiler-options -fPIC,-Wall {fast_math} -shared' + + elif self.language == 'python': + pass + + elif self.language == 'ompc': + self.cc = 'clang' + self.flags = f'-O3 -fPIC {fast_math} -fopenmp \ + -fopenmp-targets=x86_64-pc-linux-gnu -shared' + + else: + raise Exception("Language not supported") + + def compile( + self, + file: str, + properties: Optional[Properties] = None + ) -> str: + + object_dir = "/tmp/miniwave/" + + # create a dir to save the compiled shared object + os.makedirs(object_dir, exist_ok=True) + + # get c file content + with open(file, 'r', encoding='utf-8') as f: + file_content = f.read() + + # float precision + type = { + 'float32': '-DFLOAT', + 'float64': '-DDOUBLE' + } + + float_precision = type[properties.dtype] + + b1, b2, b3 = properties.block3 + stencil_radius = properties.stencil_radius + + c_def = f" -DBLOCK1={b1} -DBLOCK2={b2} -DBLOCK3={b3} \ + -DSTENCIL_RADIUS={stencil_radius} {float_precision}" + + # add defined properties to flags + self.flags += c_def + + # compose the object string + object_str = "{} {} {}".format(self.cc, self.flags, file_content) + + # apply sha1 hash to name the object + hash = sha1() + hash.update(object_str.encode()) + object_name = hash.hexdigest() + ".so" + + # object complete path + object_path = object_dir + object_name + + # check if object_file already exists + if os.path.exists(object_path): + print("Shared object already compiled in:", object_path) + else: + cmd = self.cc + " " + file + " " + + self.flags + " -o " + object_path + + print("Compilation command:", cmd) + + # execute the command + if os.system(cmd) != 0: + raise Exception("Compilation failed") + + return object_path diff --git a/miniwave/utils/dataset_writer.py b/miniwave/utils/dataset_writer.py new file mode 100644 index 0000000..681ffe3 --- /dev/null +++ b/miniwave/utils/dataset_writer.py @@ -0,0 +1,76 @@ +import h5py +import numpy as np +import os + + +class DatasetWriter: + def __init__(self) -> None: + pass + + def write_dataset(data: dict, path: str): + """Writes HDF5 file from data. + Non array data written to "scalar_data" dataset as attributes. + + :param data: dictionary containing data to be written. + Object format: + + data: { + [dataset_name]: { + dataset_data: np.ndarray | int | float, + dataset_attributes: {[attribute_name]: str} + } + } + + Example: + + ```python + data = { + "my_dataset_1": { + "dataset_data": np.array([1, 2, 3]), + "dataset_attributes": { + "description": "small numbers", + "location": "collected at lab X", + }, + }, + "my_dataset_2": { + "dataset_data": np.array([3, 2, 1]), + "dataset_attributes": { + "my_attribute_1": "small numbers", + "my_attribute_2": "collected at lab Y", + }, + }, + } + ``` + :type data: dict[str, dict[str, np.ndarray | dict]] + :param path: Where the file will be saved. + :type path: str + """ + + # Create file + dir = os.path.dirname(path) + if not os.path.exists(dir): + os.makedirs(dir) + file = h5py.File(path, "w") + # Create datasets + scalar_data_dataset = file.create_dataset( + "scalar_data", + (1,), + dtype="f" + ) + for key, value in data.items(): + + dataset_name = key + dataset_properties = value + dataset_data = dataset_properties["dataset_data"] + dataset_attributes = dataset_properties["dataset_attributes"] + + if isinstance(dataset_data, np.ndarray): + dataset = file.create_dataset( + name=dataset_name, + data=dataset_data + ) + dataset.attrs.update(dataset_attributes) + else: + scalar_data_dataset.attrs[dataset_name] = str(dataset_data) + + file.close() diff --git a/miniwave/utils/kernel.py b/miniwave/utils/kernel.py new file mode 100644 index 0000000..76a2bcb --- /dev/null +++ b/miniwave/utils/kernel.py @@ -0,0 +1,252 @@ +import ctypes +import numpy as np +from numpy.ctypeslib import ndpointer +import importlib.util +import sys +from typing import Tuple, Optional, Callable +from utils.model import Model +from utils.compiler import Compiler +from utils.properties import Properties +from utils.dataset_writer import DatasetWriter +import subprocess +import h5py + + +class Kernel: + + def __init__( + self, + file: str, + model: Model, + compiler: Optional[Compiler] = None, + properties: Optional[Properties] = Properties(1, 1, 1), + ): + + self._file = file + self._model = model + self._compiler = compiler + self._properties = properties + + if self.language != "python" and compiler is None: + raise Exception("Compiler can not be None") + + @property + def file(self) -> str: + return self._file + + @property + def language(self) -> str: + return self.compiler.language + + @property + def model(self) -> Model: + return self._model + + @property + def compiler(self) -> Compiler: + return self._compiler + + @property + def properties(self) -> Properties: + return self._properties + + def _import_python_lib(self) -> Callable: + + spec = importlib.util.spec_from_file_location( + "kernel.forward", + self.file + ) + lib = importlib.util.module_from_spec(spec) + sys.modules["kernel.forward"] = lib + spec.loader.exec_module(lib) + + return lib.forward + + def _import_c_lib(self) -> Callable: + + dtype = "float32" if self.model.dtype == np.float32 else "float64" + + # add space order and dtype to properties + self.properties.space_order = self.model.space_order + self.properties.dtype = dtype + + shared_object = self.compiler.compile(self.file, self.properties) + + # load the library + lib = ctypes.cdll.LoadLibrary(shared_object) + + if self.properties.dtype == "float32": + + argtypes = [ + ndpointer(ctypes.c_float, flags="C_CONTIGUOUS"), # prev_u + ndpointer(ctypes.c_float, flags="C_CONTIGUOUS"), # next_u + ndpointer(ctypes.c_float, flags="C_CONTIGUOUS"), # vel_model + ndpointer(ctypes.c_float, flags="C_CONTIGUOUS"), # coeffs + ctypes.c_float, # d1 + ctypes.c_float, # d2 + ctypes.c_float, # d3 + ctypes.c_float, # dt + ctypes.c_int, # n1 + ctypes.c_int, # n2 + ctypes.c_int, # n3 + ctypes.c_int, # number of timesteps + ctypes.c_int, # radius of space order + ctypes.c_int, # block_size_1 + ctypes.c_int, # block_size_2 + ctypes.c_int, # block_size_3 + ] + + elif self.properties.dtype == "float64": + + argtypes = [ + ndpointer(ctypes.c_double, flags="C_CONTIGUOUS"), # prev_u + ndpointer(ctypes.c_double, flags="C_CONTIGUOUS"), # next_u + ndpointer(ctypes.c_double, flags="C_CONTIGUOUS"), # vel_model + ndpointer(ctypes.c_double, flags="C_CONTIGUOUS"), # coeffs + ctypes.c_double, # d1 + ctypes.c_double, # d2 + ctypes.c_double, # d3 + ctypes.c_double, # dt + ctypes.c_int, # n1 + ctypes.c_int, # n2 + ctypes.c_int, # n3 + ctypes.c_int, # number of timesteps + ctypes.c_int, # radius of space order + ctypes.c_int, # block_size_1 + ctypes.c_int, # block_size_2 + ctypes.c_int, # block_size_3 + ] + + else: + raise ValueError( + "Unkown float precision. Must be float32 or float64" + ) + + forward = lib.forward + forward.restype = ctypes.c_int + forward.argtypes = argtypes + + return forward + + def _load_lib(self) -> Callable: + + if self.language == "python": + return self._import_python_lib() + else: + return self._import_c_lib() + + def read_data_from_hdf5(self, filename): + with h5py.File(filename, "r") as file: + exec_time = file["/execution_time"][()] + vector = file["/vector"][:] + return exec_time, vector + + def print_setup(self): + print("----------") + print("Language: ", self.language) + print("File: ", self.file) + print(f"Dtype: {self.model.dtype} ({self.properties.dtype})") + print("Grid size: ", self.model.grid_shape) + print("Grid spacing: ", self.model.grid_spacing) + print("Space order: ", self.model.space_order) + print("-DSTENCIL_RADIUS: ", self.properties.stencil_radius) + print("Iterations: ", self.model.num_timesteps) + print("Block sizes: ", self.properties.block3) + print("SM: ", self.compiler.sm) + print("Fast Math: ", self.compiler.fast_math) + print("----------") + + def run(self) -> Tuple[float, np.ndarray]: + # return execution time and last wavefield + + # load the lib + self._load_lib() + + # get args + prev_u, next_u = self.model.u_arrays + n1, n2, n3 = self.model.grid_shape + d1, d2, d3 = self.model.grid_spacing + stencil_radius = self.model.space_order // 2 + + # print setup + self.print_setup() + + # Generate HDF5 file + + data = { + "prev_u": {"dataset_data": prev_u, "dataset_attributes": {}}, + "next_u": {"dataset_data": next_u, "dataset_attributes": {}}, + "vel_model": { + "dataset_data": self.model.velocity_model, + "dataset_attributes": {}, + }, + "coefficient": { + "dataset_data": self.model.stencil_coefficients, + "dataset_attributes": {}, + }, + "d1": {"dataset_data": d1, "dataset_attributes": {}}, + "d2": {"dataset_data": d2, "dataset_attributes": {}}, + "d3": {"dataset_data": d3, "dataset_attributes": {}}, + "dt": {"dataset_data": self.model.dt, "dataset_attributes": {}}, + "n1": {"dataset_data": n1, "dataset_attributes": {}}, + "n2": {"dataset_data": n2, "dataset_attributes": {}}, + "n3": {"dataset_data": n3, "dataset_attributes": {}}, + "iterations": { + "dataset_data": self.model.num_timesteps, + "dataset_attributes": {}, + }, + "stencil_radius": { + "dataset_data": stencil_radius, + "dataset_attributes": {}, + }, + "block_size_1": { + "dataset_data": self.properties.block_size_1, + "dataset_attributes": {}, + }, + "block_size_2": { + "dataset_data": self.properties.block_size_2, + "dataset_attributes": {}, + }, + "block_size_3": { + "dataset_data": self.properties.block_size_3, + "dataset_attributes": {}, + }, + } + + DatasetWriter.write_dataset(data, "c-frontend/data/miniwave_data.h5") + + KERNEL_SOURCE = self.file + KERNEL_HEADER = self.file.split('.')[0] + ".h" + CUDA_ARCH = self.compiler.sm + KERNEL_TYPE = self.language + + # Compile C code + subprocess.run("ls c-frontend", shell=True) + subprocess.run( + f"cmake -S c-frontend -B c-frontend/build/ \ + -DKERNEL_SOURCE={KERNEL_SOURCE} -DKERNEL_HEADER={KERNEL_HEADER} \ + -DKERNEL_TYPE={KERNEL_TYPE} -DCUDA_ARCHITECTURE={CUDA_ARCH}", + shell=True + ) + subprocess.run("cmake --build c-frontend/build/", shell=True) + + # run the forward function + if self.language == "ompc": + subprocess.run( + "mpirun -np 4 offload-mpi-worker : \ + -np 1 ./c-frontend/build/miniwave", + shell=True + ) + elif self.language == "mpi" or self.language == "mpi_cuda": + subprocess.run( + "mpirun -np 4 ./c-frontend/build/miniwave", + shell=True + ) + else: + subprocess.run("./c-frontend/build/miniwave", shell=True) + + exec_time, next_u = self.read_data_from_hdf5( + "./c-frontend/data/results.h5" + ) + + return exec_time[0], next_u diff --git a/miniwave/utils/model.py b/miniwave/utils/model.py new file mode 100644 index 0000000..a99e2ea --- /dev/null +++ b/miniwave/utils/model.py @@ -0,0 +1,186 @@ +import numpy as np +from typing import Tuple, Optional +import findiff + + +class Model: + """ + Encapsulates the both spatial and time model of the simulation. + + Parameters + ---------- + velocity_model : ndarray + Numpy 3-dimensional array with P wave velocity (m/s) profile. + grid_spacing : tuple of float + Grid spacing in meters in each axis (z, x, y). + dt : float. + Time step in seconds. + num_timesteps : int + Number of timesteps to run the simulation. + space_order : int, optional + Spatial order of the stencil. Accepts even orders. Default is 2. + dtype : str, optional + Float precision. Default is float64. + """ + def __init__( + self, + velocity_model: np.ndarray, + grid_spacing: Tuple[float, float, float], + dt: float, + num_timesteps: int, + space_order: Optional[int] = 2, + dtype: Optional[str] = 'float64' + ): + + if dtype == 'float64': + self.dtype = np.float64 + elif dtype == 'float32': + self.dtype = np.float32 + else: + raise ValueError("Unknown dtype. It must be float32 or float64") + + self.velocity_model = velocity_model + self.grid_spacing = grid_spacing + self.num_timesteps = num_timesteps + self.space_order = space_order + self.dt = dt + + @property + def dtype(self) -> np.dtype: + return self._dtype + + @dtype.setter + def dtype(self, value: np.dtype) -> None: + self._dtype = value + + @property + def velocity_model(self) -> np.ndarray: + return self._velocity_model + + @velocity_model.setter + def velocity_model(self, value: np.ndarray): + + if value is None or value.ndim != 3: + raise ValueError("Velocity model must nd-array and 3-dimensional") + + self._velocity_model = self.dtype(value) + + @property + def grid_spacing(self) -> Tuple[float, float, float]: + return self._grid_spacing + + @grid_spacing.setter + def grid_spacing(self, value: Tuple[float, float, float]): + if len(value) != 3: + raise ValueError("Grid spacing must be 3-dimensional") + + self._grid_spacing = ( + self.dtype(value[0]), + self.dtype(value[1]), + self.dtype(value[2]) + ) + + @property + def dt(self) -> float: + return self._dt + + @dt.setter + def dt(self, value: float): + if value < 0: + raise ValueError("Time step cannot be negative.") + elif value > self.critical_dt: + raise ValueError("Time step value violates CFL condition.") + else: + self._dt = self.dtype(value) + + @property + def num_timesteps(self) -> int: + return self._num_timesteps + + @num_timesteps.setter + def num_timesteps(self, value: int): + self._num_timesteps = value + + @property + def space_order(self) -> int: + return self._space_order + + @space_order.setter + def space_order(self, value: int): + self._space_order = value + + @property + def grid_shape(self) -> Tuple[int, int, int]: + return self.velocity_model.shape + + @property + def critical_dt(self) -> float: + """ + Calculate dt with CFL conditions + Based on https://library.seg.org/doi/pdf/10.1190/1.1444605 + for the acoustic case. + + Returns + ---------- + float + Critical dt in seconds. + """ + + # 2nd order in time + a1 = 4 + + # fixed 2nd time derivative + fd_coeffs = findiff.coefficients( + deriv=2, + acc=self.space_order + )['center']['coefficients'] + + a2 = self.velocity_model.ndim * np.sum(np.abs(fd_coeffs)) + coeff = np.sqrt(a1 / a2) + dt = coeff * np.min(self.grid_spacing) / np.max(self.velocity_model) + + return self.dtype(dt) + + @property + def stencil_coefficients(self) -> np.ndarray: + # fixed second derivative + coeffs = findiff.coefficients( + deriv=2, + acc=self.space_order + )['center']['coefficients'] + + # calculate the center point index + middle = len(coeffs) // 2 + + # coefficients starting from the center + coeffs = coeffs[middle:] + + return self.dtype(coeffs) + + @property + def grid(self) -> np.ndarray: + + base_grid = np.zeros(shape=self.grid_shape, dtype=self.dtype) + + return self._add_initial_source(base_grid) + + @property + def u_arrays(self) -> Tuple[np.ndarray, np.ndarray]: + # return prev_u and next_u arrays + + prev_u = self.grid + next_u = self.grid.copy() + + return prev_u, next_u + + def _add_initial_source(self, grid: np.ndarray) -> np.ndarray: + + n1, n2, n3 = grid.shape + + val = 50.0 + + for s in range(4, -1, -1): + grid[n1//2-s:n1//2+s, n2//2-s:n2//2+s, n3//2-s:n3//2+s] = val + val *= 0.9 + + return grid diff --git a/miniwave/utils/plot.py b/miniwave/utils/plot.py new file mode 100644 index 0000000..89bb5cf --- /dev/null +++ b/miniwave/utils/plot.py @@ -0,0 +1,68 @@ +import matplotlib.pyplot as plt +import os +from mpl_toolkits.axes_grid1 import make_axes_locatable + + +def plot( + wavefield, + file_name="wavefield", + colorbar=True, + cmap="gray", + extent=None, + show=False, + clim=[-5, 5] +): + """ + Plot the wavefield. + + Parameters + ---------- + wavefield : ndarray + Wavefield data. + file_name : str, optional + Name of the image to be saved. + Default is wavefield. + colorbar : bool, optional + If True, show a colorbar. Default is True. + cmap : str, optional + The Colormap instance or registered colormap name + used to map scalar data to colors. + Default is gray. + extent : floats(left, right, bottom, top), optional + The bounding box in data coordinates that the image will fill. + show : bool, optional + If True, show the image on a pop up window. + Default is False. + clim : list + Set the color limits of the current image. + Default is (vmin=-5, vmax=5). + """ + + # create the destination dir + os.makedirs("plots", exist_ok=True) + + # process data and generate the plot + plot = plt.imshow(wavefield, cmap=cmap, extent=extent) + + m_unit = 'm' if extent is not None else 'points' + + # labels + plt.xlabel('Width ({})'.format(m_unit)) + plt.ylabel('Depth ({})'.format(m_unit)) + + # Create aligned colorbar on the right + if colorbar: + ax = plt.gca() + divider = make_axes_locatable(ax) + cax = divider.append_axes("right", size="5%", pad=0.05) + plt.colorbar(plot, cax=cax) + plt.clim(clim) + + plt.savefig("plots/{}.png".format(file_name), format="png") + + if show: + plt.show() + + plt.close() + + print("Wavefield saved in plots/{}.png".format(file_name)) diff --git a/miniwave/utils/properties.py b/miniwave/utils/properties.py new file mode 100644 index 0000000..bbe5af7 --- /dev/null +++ b/miniwave/utils/properties.py @@ -0,0 +1,41 @@ +class Properties: + + def __init__( + self, + block_size_1: int, + block_size_2: int, + block_size_3: int + ): + + self.block_size_1 = block_size_1 + self.block_size_2 = block_size_2 + self.block_size_3 = block_size_3 + self._space_order = None + self._dtype = None + + @property + def block3(self): + return (self.block_size_1, self.block_size_2, self.block_size_3) + + @property + def dtype(self): + return self._dtype + + @dtype.setter + def dtype(self, value: str): + self._dtype = value + + @property + def space_order(self): + return self._space_order + + @space_order.setter + def space_order(self, value: str): + self._space_order = value + + @property + def stencil_radius(self): + if self.space_order is not None: + return self.space_order // 2 + + return None