diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 3c26f4d7a9..eb938905b2 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -83,6 +83,8 @@ jobs: --install defcon \ --install gadopt \ --install asQ \ + --package-branch tsfc gpu --package-branch pyop2 gpu \ + || (cat firedrake-install.log && /bin/false) - name: Install test dependencies run: | diff --git a/firedrake/__init__.py b/firedrake/__init__.py index 0fc9aeeed6..f1e2e7bf80 100644 --- a/firedrake/__init__.py +++ b/firedrake/__init__.py @@ -118,6 +118,7 @@ from firedrake.ensemble import * from firedrake.randomfunctiongen import * from firedrake.external_operators import * +from firedrake.offload import * from firedrake.progress_bar import ProgressBar # noqa: F401 from firedrake.logging import * diff --git a/firedrake/assemble.py b/firedrake/assemble.py index b72f99ba2c..3de79eff74 100644 --- a/firedrake/assemble.py +++ b/firedrake/assemble.py @@ -1109,7 +1109,7 @@ def allocate(self): # Getting the comm attribute of a form isn't straightforward # form.ufl_domains()[0]._comm seems the most robust method # revisit in a refactor - return op2.Global( + return op2.compute_backend.Global( 1, [0.0], dtype=utils.ScalarType, @@ -1450,7 +1450,7 @@ def _apply_bc(self, tensor, bc): def _apply_bcs_mat_real_block(op2tensor, i, j, component, node_set): dat = op2tensor[i, j].handle.getPythonContext().dat if component is not None: - dat = op2.DatView(dat, component) + dat = op2.compute_backend.DatView(dat, component) dat.zero(subset=node_set) def _check_tensor(self, tensor): @@ -1535,7 +1535,9 @@ def _global_kernel_cache_key(form, local_knl, subdomain_id, all_integer_subdomai return ((sig, subdomain_id) + tuple(subdomain_key) + tuplify(all_integer_subdomain_ids) - + cachetools.keys.hashkey(local_knl, **kwargs)) + + cachetools.keys.hashkey(local_knl, **kwargs) + + op2.compute_backend.cache_key + ) @cachetools.cached(cache={}, key=_global_kernel_cache_key) @@ -1595,14 +1597,14 @@ def build(self): extruded_periodic = self._mesh.extruded_periodic constant_layers = extruded and not self._mesh.variable_layers - return op2.GlobalKernel(self._kinfo.kernel, - kernel_args, - iteration_region=iteration_region, - pass_layer_arg=self._kinfo.pass_layer_arg, - extruded=extruded, - extruded_periodic=extruded_periodic, - constant_layers=constant_layers, - subset=self._needs_subset) + return op2.compute_backend.GlobalKernel(self._kinfo.kernel, + kernel_args, + iteration_region=iteration_region, + pass_layer_arg=self._kinfo.pass_layer_arg, + extruded=extruded, + extruded_periodic=extruded_periodic, + constant_layers=constant_layers, + subset=self._needs_subset) @property def _integral_type(self): @@ -1831,7 +1833,7 @@ def build(self, tensor): unroll=self.needs_unrolling() ) try: - return op2.Parloop(_global_knl, self._iterset, parloop_args) + return op2.compute_backend.Parloop(_global_knl, self._iterset, parloop_args) except MapValueError: raise RuntimeError("Integral measure does not match measure of all " "coefficients/arguments") @@ -2060,7 +2062,7 @@ def _as_parloop_arg_cell_facet(_, self): @_as_parloop_arg.register(LayerCountKernelArg) def _as_parloop_arg_layer_count(_, self): - glob = op2.Global( + glob = op2.compute_backend.Global( (1,), self._iterset.layers-2, dtype=numpy.int32, diff --git a/firedrake/bcs.py b/firedrake/bcs.py index 9f340e4181..5bbedb9976 100644 --- a/firedrake/bcs.py +++ b/firedrake/bcs.py @@ -170,7 +170,7 @@ def node_set(self): '''The subset corresponding to the nodes at which this boundary condition applies.''' - return op2.Subset(self._function_space.node_set, self.nodes) + return op2.op2.compute_backend.Subset(self._function_space.node_set, self.nodes) @PETSc.Log.EventDecorator() def zero(self, r): diff --git a/firedrake/constant.py b/firedrake/constant.py index 9363e32f91..7e0b5c943b 100644 --- a/firedrake/constant.py +++ b/firedrake/constant.py @@ -75,7 +75,7 @@ def __new__(cls, value, domain=None, name=None, count=None): "create a Function in the Real space.", FutureWarning ) - dat, rank, shape = _create_dat(op2.Global, value, domain._comm) + dat, rank, shape = _create_dat(op2.compute_backend.Global, value, domain._comm) if not isinstance(domain, ufl.AbstractDomain): cell = ufl.as_cell(domain) @@ -102,7 +102,7 @@ def __init__(self, value, domain=None, name=None, count=None): # Init also called in mesh constructor, but constant can be built without mesh utils._init() - self.dat, rank, self._ufl_shape = _create_dat(op2.Constant, value, None) + self.dat, rank, self._ufl_shape = _create_dat(op2.compute_backend.Constant, value, None) super().__init__() Counted.__init__(self, count, Counted) diff --git a/firedrake/extrusion_utils.py b/firedrake/extrusion_utils.py index 0a69eecb41..744be9ab63 100644 --- a/firedrake/extrusion_utils.py +++ b/firedrake/extrusion_utils.py @@ -65,7 +65,7 @@ def make_extruded_coords(extruded_topology, base_coords, ext_coords, layer_height = numpy.cumsum(numpy.concatenate(([0], layer_height))) layer_heights = layer_height.size - layer_height = op2.Global(layer_heights, layer_height, dtype=RealType, comm=extruded_topology._comm) + layer_height = op2.compute_backend.Global(layer_heights, layer_height, dtype=RealType, comm=extruded_topology._comm) if kernel is not None: op2.ParLoop(kernel, diff --git a/firedrake/functionspacedata.py b/firedrake/functionspacedata.py index be00e3ffff..e9d6441256 100644 --- a/firedrake/functionspacedata.py +++ b/firedrake/functionspacedata.py @@ -104,7 +104,7 @@ def get_node_set(mesh, key): global_numbering, constrained_size = get_global_numbering(mesh, key) node_classes = mesh.node_classes(nodes_per_entity, real_tensorproduct=real_tensorproduct) halo = halo_mod.Halo(mesh.topology_dm, global_numbering, comm=mesh.comm) - node_set = op2.Set(node_classes, halo=halo, comm=mesh.comm, constrained_size=constrained_size) + node_set = op2.compute_backend.Set(node_classes, halo=halo, comm=mesh.comm, constrained_size=constrained_size) extruded = mesh.cell_set._extruded assert global_numbering.getStorageSize() == node_set.total_size @@ -509,12 +509,12 @@ def get_map(self, V, entity_set, map_arity, name, offset, offset_quotient): entity_node_list = self.entity_node_lists[entity_set] val = self.map_cache[entity_set] if val is None: - val = op2.Map(entity_set, self.node_set, - map_arity, - entity_node_list, - ("%s_"+name) % (V.name), - offset=offset, - offset_quotient=offset_quotient) + val = op2.compute_backend.Map(entity_set, self.node_set, + map_arity, + entity_node_list, + ("%s_"+name) % (V.name), + offset=offset, + offset_quotient=offset_quotient) self.map_cache[entity_set] = val return val diff --git a/firedrake/functionspaceimpl.py b/firedrake/functionspaceimpl.py index 99aa86d203..d775345106 100644 --- a/firedrake/functionspaceimpl.py +++ b/firedrake/functionspaceimpl.py @@ -695,7 +695,7 @@ def dim(self): def make_dat(self, val=None, valuetype=None, name=None): r"""Return a newly allocated :class:`pyop2.types.dat.Dat` defined on the :attr:`dof_dset` of this :class:`.Function`.""" - return op2.Dat(self.dof_dset, val, valuetype, name) + return op2.compute_backend.Dat(self.dof_dset, val, valuetype, name) def entity_node_map(self, source_mesh, source_integral_type, source_subdomain_id, source_all_integer_subdomain_ids): r"""Return entity node map rebased on ``source_mesh``. @@ -1073,7 +1073,7 @@ def node_set(self): :class:`FunctionSpace`\s this :class:`MixedFunctionSpace` is composed of one or (for VectorFunctionSpaces) more degrees of freedom are stored at each node.""" - return op2.MixedSet(s.node_set for s in self._spaces) + return op2.compute_backend.MixedSet(s.node_set for s in self._spaces) @utils.cached_property def dof_dset(self): @@ -1082,7 +1082,7 @@ def dof_dset(self): :attr:`FunctionSpace.dof_dset`\s of the underlying :class:`FunctionSpace`\s of which this :class:`MixedFunctionSpace` is composed.""" - return op2.MixedDataSet(s.dof_dset for s in self._spaces) + return op2.compute_backend.MixedDataSet(s.dof_dset for s in self._spaces) def entity_node_map(self, source_mesh, source_integral_type, source_subdomain_id, source_all_integer_subdomain_ids): r"""Return entity node map rebased on ``source_mesh``. @@ -1114,17 +1114,17 @@ def cell_node_map(self): :attr:`FunctionSpace.cell_node_map`\s of the underlying :class:`FunctionSpace`\s of which this :class:`MixedFunctionSpace` is composed.""" - return op2.MixedMap(s.cell_node_map() for s in self._spaces) + return op2.compute_backend.MixedMap(s.cell_node_map() for s in self._spaces) def interior_facet_node_map(self): r"""Return the :class:`pyop2.types.map.MixedMap` from interior facets to function space nodes.""" - return op2.MixedMap(s.interior_facet_node_map() for s in self) + return op2.compute_backend.MixedMap(s.interior_facet_node_map() for s in self) def exterior_facet_node_map(self): r"""Return the :class:`pyop2.types.map.Map` from exterior facets to function space nodes.""" - return op2.MixedMap(s.exterior_facet_node_map() for s in self) + return op2.compute_backend.MixedMap(s.exterior_facet_node_map() for s in self) def local_to_global_map(self, bcs): r"""Return a map from process local dof numbering to global dof numbering. @@ -1139,8 +1139,8 @@ def make_dat(self, val=None, valuetype=None, name=None): assert len(val) == len(self) else: val = [None for _ in self] - return op2.MixedDat(s.make_dat(v, valuetype, "%s[cmpt-%d]" % (name, i)) - for i, (s, v) in enumerate(zip(self._spaces, val))) + return op2.compute_backend.MixedDat(s.make_dat(v, valuetype, "%s[cmpt-%d]" % (name, i)) + for i, (s, v) in enumerate(zip(self._spaces, val))) @utils.cached_property def dm(self): @@ -1339,12 +1339,12 @@ def set_shared_data(self): pass def make_dof_dset(self): - return op2.GlobalDataSet(self.make_dat()) + return op2.compute_backend.GlobalDataSet(self.make_dat()) def make_dat(self, val=None, valuetype=None, name=None): r"""Return a newly allocated :class:`pyop2.types.glob.Global` representing the data for a :class:`.Function` on this space.""" - return op2.Global(self.value_size, val, valuetype, name, self._comm) + return op2.compute_backend.Global(self.value_size, val, valuetype, name, self._comm) def entity_node_map(self, source_mesh, source_integral_type, source_subdomain_id, source_all_integer_subdomain_ids): return None diff --git a/firedrake/interpolation.py b/firedrake/interpolation.py index a7442d7820..36e82000ff 100644 --- a/firedrake/interpolation.py +++ b/firedrake/interpolation.py @@ -1112,7 +1112,7 @@ def _interpolator(V, tensor, expr, subset, arguments, access, bcs=None): if tensor in set((c.dat for c in coefficients)): output = tensor - tensor = op2.Dat(tensor.dataset) + tensor = op2.compute_backend.Dat(tensor.dataset) if access is not op2.WRITE: copyin = (partial(output.copy, tensor), ) else: diff --git a/firedrake/matrix.py b/firedrake/matrix.py index 551e1ff1f8..602301bab2 100644 --- a/firedrake/matrix.py +++ b/firedrake/matrix.py @@ -116,7 +116,7 @@ def __init__(self, a, bcs, mat_type, *args, **kwargs): # sets self._a, self._bcs, and self._mat_type MatrixBase.__init__(self, a, bcs, mat_type) options_prefix = kwargs.pop("options_prefix") - self.M = op2.Mat(*args, mat_type=mat_type, **kwargs) + self.M = op2.compute_backend.Mat(*args, mat_type=mat_type, **kwargs) self.petscmat = self.M.handle self.petscmat.setOptionsPrefix(options_prefix) self.mat_type = mat_type diff --git a/firedrake/mesh.py b/firedrake/mesh.py index acc9031542..0905e31964 100644 --- a/firedrake/mesh.py +++ b/firedrake/mesh.py @@ -189,15 +189,15 @@ def __init__(self, mesh, facets, classes, kind, facet_cell, local_facet_number, self.facet_cell = facet_cell if isinstance(self.set, op2.ExtrudedSet): - dset = op2.DataSet(self.set.parent, self._rank) + dset = op2.compute_backend.DataSet(self.set.parent, self._rank) else: - dset = op2.DataSet(self.set, self._rank) + dset = op2.compute_backend.DataSet(self.set, self._rank) # Dat indicating which local facet of each adjacent cell corresponds # to the current facet. - self.local_facet_dat = op2.Dat(dset, local_facet_number, np.uintc, - "%s_%s_local_facet_number" % - (self.mesh.name, self.kind)) + self.local_facet_dat = op2.compute_backend.Dat(dset, local_facet_number, np.uintc, + "%s_%s_local_facet_number" % + (self.mesh.name, self.kind)) self.unique_markers = [] if unique_markers is None else unique_markers self._subsets = {} @@ -209,9 +209,9 @@ def set(self): label = "%s_facets" % self.kind layers = self.mesh.entity_layers(1, label) base = getattr(self.mesh._base_mesh, label).set - return op2.ExtrudedSet(base, layers=layers) - return op2.Set(size, "%sFacets" % self.kind.capitalize()[:3], - comm=self.mesh.comm) + return op2.compute_backend.ExtrudedSet(base, layers=layers) + return op2.compute_backend.Set(size, "%sFacets" % self.kind.capitalize()[:3], + comm=self.mesh.comm) @utils.cached_property def _null_subset(self): @@ -219,7 +219,7 @@ def _null_subset(self): a given marker value. This is required because not all markers need be represented on all processors.''' - return op2.Subset(self.set, []) + return op2.compute_backend.Subset(self.set, []) @PETSc.Log.EventDecorator() def measure_set(self, integral_type, subdomain_id, @@ -260,7 +260,7 @@ def measure_set(self, integral_type, subdomain_id, except KeyError: unmarked_points = self._collect_unmarked_points(all_integer_subdomain_ids) _, indices, _ = np.intersect1d(self.facets, unmarked_points, return_indices=True) - return self._subsets.setdefault(key, op2.Subset(self.set, indices)) + return self._subsets.setdefault(key, op2.compute_backend.Subset(self.set, indices)) else: return self.subset(subdomain_id) @@ -295,7 +295,7 @@ def subset(self, markers): marked_points_list.append(self.mesh.topology_dm.getStratumIS(dmcommon.FACE_SETS_LABEL, i).indices) if marked_points_list: _, indices, _ = np.intersect1d(self.facets, np.concatenate(marked_points_list), return_indices=True) - return self._subsets.setdefault(markers, op2.Subset(self.set, indices)) + return self._subsets.setdefault(markers, op2.compute_backend.Subset(self.set, indices)) else: return self._subsets.setdefault(markers, self._null_subset) @@ -314,8 +314,8 @@ def _collect_unmarked_points(self, markers): @utils.cached_property def facet_cell_map(self): """Map from facets to cells.""" - return op2.Map(self.set, self.mesh.cell_set, self._rank, self.facet_cell, - "facet_to_cell_map") + return op2.compute_backend.Map(self.set, self.mesh.cell_set, self._rank, self.facet_cell, + "facet_to_cell_map") @PETSc.Log.EventDecorator() @@ -889,7 +889,7 @@ def cell_subset(self, subdomain_id, all_integer_subdomain_ids=None): indices = dmcommon.get_cell_markers(self.topology_dm, self._cell_numbering, subdomain_id) - return self._subsets.setdefault(key, op2.Subset(self.cell_set, indices)) + return self._subsets.setdefault(key, op2.compute_backend.Subset(self.cell_set, indices)) @PETSc.Log.EventDecorator() def measure_set(self, integral_type, subdomain_id, @@ -1329,9 +1329,9 @@ def cell_to_facets(self): self._cell_numbering, self.cell_closure) if isinstance(self.cell_set, op2.ExtrudedSet): - dataset = op2.DataSet(self.cell_set.parent, dim=cell_facets.shape[1:]) + dataset = op2.compute_backend.DataSet(self.cell_set.parent, dim=cell_facets.shape[1:]) else: - dataset = op2.DataSet(self.cell_set, dim=cell_facets.shape[1:]) + dataset = op2.compute_backend.DataSet(self.cell_set, dim=cell_facets.shape[1:]) return op2.Dat(dataset, cell_facets, dtype=cell_facets.dtype, name="cell-to-local-facet-dat") @@ -1362,7 +1362,7 @@ def num_entities(self, d): @utils.cached_property def cell_set(self): size = list(self._entity_classes[self.cell_dimension(), :]) - return op2.Set(size, "Cells", comm=self._comm) + return op2.compute_backend.Set(size, "Cells", comm=self._comm) @PETSc.Log.EventDecorator() def _set_partitioner(self, plex, distribute, partitioner_type=None): @@ -1691,7 +1691,7 @@ def __init__(self, mesh, layers, periodic=False, name=None): """ else: self.variable_layers = False - self.cell_set = op2.ExtrudedSet(mesh.cell_set, layers=layers, extruded_periodic=periodic) + self.cell_set = op2.computed_backend.ExtrudedSet(mesh.cell_set, layers=layers, extruded_periodic=periodic) # submesh self.submesh_parent = None @@ -2016,7 +2016,7 @@ def num_entities(self, d): @utils.cached_property # TODO: Recalculate if mesh moves def cell_set(self): size = list(self._entity_classes[self.cell_dimension(), :]) - return op2.Set(size, "Cells", comm=self.comm) + return op2.compute_backend.Set(size, "Cells", comm=self.comm) @utils.cached_property # TODO: Recalculate if mesh moves def cell_parent_cell_list(self): @@ -4477,7 +4477,7 @@ def SubDomainData(geometric_expr): # Create cell subset indices, = np.nonzero(f.dat.data_ro_with_halos > 0.5) - return op2.Subset(m.cell_set, indices) + return op2.compute_backend.Subset(m.cell_set, indices) def Submesh(mesh, subdim, subdomain_id, label_name=None, name=None): diff --git a/firedrake/mg/utils.py b/firedrake/mg/utils.py index f3eb475b50..507de8ddc6 100644 --- a/firedrake/mg/utils.py +++ b/firedrake/mg/utils.py @@ -11,7 +11,7 @@ def fine_node_to_coarse_node_map(Vf, Vc): if len(Vf) > 1: assert len(Vf) == len(Vc) - return op2.MixedMap(fine_node_to_coarse_node_map(f, c) for f, c in zip(Vf, Vc)) + return op2.compute_backend.MixedMap(fine_node_to_coarse_node_map(f, c) for f, c in zip(Vf, Vc)) mesh = Vf.mesh() assert hasattr(mesh, "_shared_data_cache") hierarchyf, levelf = get_level(Vf.mesh()) @@ -41,15 +41,15 @@ def fine_node_to_coarse_node_map(Vf, Vc): fine_to_coarse = hierarchy.fine_to_coarse_cells[levelf] fine_to_coarse_nodes = impl.fine_to_coarse_nodes(Vf, Vc, fine_to_coarse) - return cache.setdefault(key, op2.Map(Vf.node_set, Vc.node_set, - fine_to_coarse_nodes.shape[1], - values=fine_to_coarse_nodes)) + return cache.setdefault(key, op2.compute_backend.Map(Vf.node_set, Vc.node_set, + fine_to_coarse_nodes.shape[1], + values=fine_to_coarse_nodes)) def coarse_node_to_fine_node_map(Vc, Vf): if len(Vf) > 1: assert len(Vf) == len(Vc) - return op2.MixedMap(coarse_node_to_fine_node_map(f, c) for f, c in zip(Vf, Vc)) + return op2.compute_backend.MixedMap(coarse_node_to_fine_node_map(f, c) for f, c in zip(Vf, Vc)) mesh = Vc.mesh() assert hasattr(mesh, "_shared_data_cache") hierarchyf, levelf = get_level(Vf.mesh()) @@ -79,15 +79,15 @@ def coarse_node_to_fine_node_map(Vc, Vf): coarse_to_fine = hierarchy.coarse_to_fine_cells[levelc] coarse_to_fine_nodes = impl.coarse_to_fine_nodes(Vc, Vf, coarse_to_fine) - return cache.setdefault(key, op2.Map(Vc.node_set, Vf.node_set, - coarse_to_fine_nodes.shape[1], - values=coarse_to_fine_nodes)) + return cache.setdefault(key, op2.compute_backend.Map(Vc.node_set, Vf.node_set, + coarse_to_fine_nodes.shape[1], + values=coarse_to_fine_nodes)) def coarse_cell_to_fine_node_map(Vc, Vf): if len(Vf) > 1: assert len(Vf) == len(Vc) - return op2.MixedMap(coarse_cell_to_fine_node_map(f, c) for f, c in zip(Vf, Vc)) + return op2.compute_backend.MixedMap(coarse_cell_to_fine_node_map(f, c) for f, c in zip(Vf, Vc)) mesh = Vc.mesh() assert hasattr(mesh, "_shared_data_cache") hierarchyf, levelf = get_level(Vf.mesh()) @@ -128,9 +128,9 @@ def coarse_cell_to_fine_node_map(Vc, Vf): offset = Vf.offset if offset is not None: offset = numpy.tile(offset*level_ratio, ncell*level_ratio) - return cache.setdefault(key, op2.Map(iterset, Vf.node_set, - arity=arity*level_ratio, values=coarse_to_fine_nodes, - offset=offset)) + return cache.setdefault(key, op2.compute_backend.Map(iterset, Vf.node_set, + arity=arity*level_ratio, values=coarse_to_fine_nodes, + offset=offset)) def physical_node_locations(V): diff --git a/firedrake/offload.py b/firedrake/offload.py new file mode 100644 index 0000000000..dc2772039b --- /dev/null +++ b/firedrake/offload.py @@ -0,0 +1,5 @@ +from pyop2 import offload_utils + + +set_offloading_backend = offload_utils.set_offloading_backend +offloading = offload_utils.offloading diff --git a/firedrake/petsc.py b/firedrake/petsc.py index 7bb84dd1bf..643c4ca3ae 100644 --- a/firedrake/petsc.py +++ b/firedrake/petsc.py @@ -13,6 +13,25 @@ from mpi4py import MPI from petsc4py import PETSc from pyop2 import mpi +import firedrake_configuration + + +if firedrake_configuration.get_config()["options"].get("cuda"): + if 0: + # FIXME: Enable this once + # https://gitlab.com/petsc/petsc/-/merge_requests/5400 + # is merged. + PETSc.Sys.initializeDevice(PETSc.Device.Type.CUDA) + else: + # Hacky way to initialize context on the device + x = PETSc.Vec().create(PETSc.COMM_WORLD) + x.setType("cuda") + x.setSizes(size=100) + x.set(2) + x.norm() + del x + + import pycuda.autoprimaryctx # noqa: F401 __all__ = ( diff --git a/firedrake/preconditioners/patch.py b/firedrake/preconditioners/patch.py index 8b1cdeeba0..3767a33473 100644 --- a/firedrake/preconditioners/patch.py +++ b/firedrake/preconditioners/patch.py @@ -169,23 +169,23 @@ def matrix_funptr(form, state): else: get_map = None - toset = op2.Set(1, comm=test.comm) - dofset = op2.DataSet(toset, 1) + toset = op2.compute_backend.Set(1, comm=test.comm) + dofset = op2.compute_backend.DataSet(toset, 1) arity = sum(m.arity*s.cdim for m, s in zip(get_map(test), test.dof_dset)) iterset = get_map(test).iterset - entity_node_map = op2.Map(iterset, - toset, arity, - values=numpy.zeros(iterset.total_size*arity, dtype=IntType)) + entity_node_map = op2.compute_backend.Map(iterset, + toset, arity, + values=numpy.zeros(iterset.total_size*arity, dtype=IntType)) mat = LocalMat(dofset) arg = mat(op2.INC, (entity_node_map, entity_node_map)) args.append(arg) statedat = LocalDat(dofset) - state_entity_node_map = op2.Map(iterset, - toset, arity, - values=numpy.zeros(iterset.total_size*arity, dtype=IntType)) + state_entity_node_map = op2.compute_backend.Map(iterset, + toset, arity, + values=numpy.zeros(iterset.total_size*arity, dtype=IntType)) statearg = statedat(op2.READ, state_entity_node_map) mesh = form.ufl_domains()[kinfo.domain_number] @@ -219,10 +219,10 @@ def matrix_funptr(form, state): if kinfo.integral_type == "interior_facet": arg = mesh.interior_facets.local_facet_dat(op2.READ) args.append(arg) - iterset = op2.Subset(iterset, []) + iterset = op2.compute_backend.Subset(iterset, []) wrapper_knl_args = tuple(a.global_kernel_arg for a in args) - mod = op2.GlobalKernel(kinfo.kernel, wrapper_knl_args, subset=True) + mod = op2.compute_backend.GlobalKernel(kinfo.kernel, wrapper_knl_args, subset=True) kernels.append(CompiledKernel(mod.compile(iterset.comm), kinfo)) return cell_kernels, int_facet_kernels @@ -261,21 +261,21 @@ def residual_funptr(form, state): else: get_map = None - toset = op2.Set(1, comm=test.comm) - dofset = op2.DataSet(toset, 1) + toset = op2.compute_backend.Set(1, comm=test.comm) + dofset = op2.compute_backend.DataSet(toset, 1) arity = sum(m.arity*s.cdim for m, s in zip(get_map(test), test.dof_dset)) iterset = get_map(test).iterset - entity_node_map = op2.Map(iterset, - toset, arity, - values=numpy.zeros(iterset.total_size*arity, dtype=IntType)) + entity_node_map = op2.compute_backend.Map(iterset, + toset, arity, + values=numpy.zeros(iterset.total_size*arity, dtype=IntType)) dat = LocalDat(dofset, needs_mask=True) statedat = LocalDat(dofset) - state_entity_node_map = op2.Map(iterset, - toset, arity, - values=numpy.zeros(iterset.total_size*arity, dtype=IntType)) + state_entity_node_map = op2.compute_backend.Map(iterset, + toset, arity, + values=numpy.zeros(iterset.total_size*arity, dtype=IntType)) statearg = statedat(op2.READ, state_entity_node_map) arg = dat(op2.INC, entity_node_map) @@ -313,10 +313,10 @@ def residual_funptr(form, state): if kinfo.integral_type == "interior_facet": arg = extract_unique_domain(test).interior_facets.local_facet_dat(op2.READ) args.append(arg) - iterset = op2.Subset(iterset, []) + iterset = op2.compute_backend.Subset(iterset, []) wrapper_knl_args = tuple(a.global_kernel_arg for a in args) - mod = op2.GlobalKernel(kinfo.kernel, wrapper_knl_args, subset=True) + mod = op2.compute_backend.GlobalKernel(kinfo.kernel, wrapper_knl_args, subset=True) kernels.append(CompiledKernel(mod.compile(iterset.comm), kinfo)) return cell_kernels, int_facet_kernels diff --git a/firedrake/preconditioners/pmg.py b/firedrake/preconditioners/pmg.py index 7673403017..c2cab11274 100644 --- a/firedrake/preconditioners/pmg.py +++ b/firedrake/preconditioners/pmg.py @@ -1569,7 +1569,7 @@ def prolongation_matrix_aij(P1, Pk, P1_bcs=[], Pk_bcs=[]): for i, rmap in enumerate(Pk.cell_node_map()) for j, cmap in enumerate(P1.cell_node_map()) if i == j}) - mat = op2.Mat(sp, PETSc.ScalarType) + mat = op2.compte_backend.Mat(sp, PETSc.ScalarType) mesh = Pk.mesh() fele = Pk.ufl_element() diff --git a/firedrake/solving_utils.py b/firedrake/solving_utils.py index a9dda71038..d8cca7e829 100644 --- a/firedrake/solving_utils.py +++ b/firedrake/solving_utils.py @@ -328,7 +328,7 @@ def split(self, fields): subu = function.Function(V, val=val) subsplit = (subu, ) else: - val = op2.MixedDat(pieces) + val = op2.compute_backend.MixedDat(pieces) subu = function.Function(V, val=val) # Split it apart to shove in the form. subsplit = split(subu) diff --git a/firedrake/variational_solver.py b/firedrake/variational_solver.py index 609d599800..d47b21d500 100644 --- a/firedrake/variational_solver.py +++ b/firedrake/variational_solver.py @@ -313,6 +313,7 @@ def solve(self, bounds=None): work = self._work with problem.u_restrict.dat.vec as u: + assert work.type == u.type u.copy(work) with ExitStack() as stack: # Ensure options database has full set of options (so monitors diff --git a/scripts/firedrake-install b/scripts/firedrake-install index c32b924371..e66f486fdb 100755 --- a/scripts/firedrake-install +++ b/scripts/firedrake-install @@ -80,7 +80,9 @@ class FiredrakeConfiguration(dict): "honour_petsc_dir", "with_parmetis", "slepc", "packages", "honour_pythonpath", "opencascade", "torch", "jax", - "petsc_int_type", "cache_dir", "complex", "remove_build_files", "with_blas", "netgen"] + "petsc_int_type", "cache_dir", "complex", "remove_build_files", "with_blas", "netgen", + "cuda", "cuda_dir", "cuda_arch", "cudac", "cuda_compile_flags", "opencl", + ] def deep_update(this, that): @@ -262,6 +264,18 @@ honoured.""", help="Install SLEPc along with PETSc.") parser.add_argument("--opencascade", action="store_true", help="Install OpenCASCADE for CAD integration.") + parser.add_argument("--cuda", action="store_true", + help="Use CUDA as backend.") + parser.add_argument('--cuda-dir', type=str, help="cuda toolkit directory path" + " to be passed to PETSc", action="store") + parser.add_argument('--cuda-arch', type=int, help="cuda compute capability" + " to be passed to PETSc", action="store") + parser.add_argument('--cuda-compile-flags', type=str, help="cuda directory location" + " to be passed to PETSc", default=None, action="store") + parser.add_argument('--cudac', type=str, help="path of 'nvcc' " + " to be passed to PETSc", default='nvcc', action="store") + parser.add_argument("--opencl", action="store_true", + help="Use OpenCL as backend.") parser.add_argument("--torch", const="cpu", default=False, nargs='?', choices=["cpu", "cuda"], help="Install PyTorch for a CPU or CUDA backend (default: CPU).") parser.add_argument("--jax", const="cpu", default=False, nargs='?', choices=["cpu", "cuda"], @@ -726,6 +740,11 @@ def get_petsc_options(minimal=False): "--download-bison"} for pkg in get_minimal_petsc_packages(): petsc_options.add("--download-" + pkg) + + if args.cuda: + # SUPERLU requires OpenMP for operating on GPUs + petsc_options.add("--with-openmp=1") + if osname == "Darwin": petsc_options.add("--with-x=0") # These three lines are used to inspect the MacOS Command Line Tools (CLT) version @@ -759,6 +778,25 @@ def get_petsc_options(minimal=False): if options["petsc_int_type"] != "int32": petsc_options.add("--with-64-bit-indices") + if args.cuda: + petsc_options.add('--with-cuda=1') + if args.cuda_dir: + petsc_options.add('--with-cuda-dir={}'.format(args.cuda_dir)) + petsc_options.add('--with-cudac={}'.format(args.cudac)) + if args.cuda_compile_flags: + petsc_options.add('--CUDAFLAGS={}'.format(args.cuda_compile_flags)) + if args.cuda_arch: + petsc_options.add('--with-cuda-arch={args.cuda_arch}') + else: + if args.cuda_dir or args.cuda_arch: + raise InstallError("--cuda-dir/--cuda-arch passed but CUDA installation " + " is disabled.") + + if args.opencl: + petsc_options.add('--with-viennacl=1') + petsc_options.add('--download-viennacl') + petsc_options.add('--with-opencl=1') + if options["complex"]: petsc_options.add('--with-scalar-type=complex') else: @@ -1903,6 +1941,12 @@ if mode == "install": pass packages.remove("petsc4py") + if args.cuda: + run_pip_install(["pycuda"]) + + if args.opencl: + run_pip_install(["pyopencl"]) + else: # Update mode os.chdir("src") diff --git a/tests/extrusion/test_subdomain_extruded.py b/tests/extrusion/test_subdomain_extruded.py index 532b9ed517..4c0bed54d6 100644 --- a/tests/extrusion/test_subdomain_extruded.py +++ b/tests/extrusion/test_subdomain_extruded.py @@ -21,7 +21,7 @@ def run_base_box_3d(): # TEMPORARY HACK: # Retarget base subdomain into columns of the extruded mesh. - sd = op2.Subset(mesh.cell_set, sd.indices) + sd = op2.compute_backend.Subset(mesh.cell_set, sd.indices) assert np.allclose(0.402, assemble(f*dx(subdomain_data=sd))) diff --git a/tests/regression/test_function.py b/tests/regression/test_function.py index 506d9bcb8e..13eca46741 100644 --- a/tests/regression/test_function.py +++ b/tests/regression/test_function.py @@ -108,7 +108,7 @@ def test_function_val(V): def test_function_dat(V): """Initialise a Function with an op2.Dat.""" - f = Function(V, op2.Dat(V.node_set**V.value_size)) + f = Function(V, op2.compute_backend.Dat(V.node_set**V.value_size)) f.interpolate(Constant(1)) assert (f.dat.data_ro == 1.0).all() diff --git a/tests/test_offloading.py b/tests/test_offloading.py new file mode 100644 index 0000000000..dc1cb26794 --- /dev/null +++ b/tests/test_offloading.py @@ -0,0 +1,115 @@ +import pytest +from firedrake import (set_offloading_backend, + offloading, solve, FunctionSpace, TestFunction, + TrialFunction, Function, UnitSquareMesh, + SpatialCoordinate, inner, grad, dx, norm, pi, cos, + assemble) +import firedrake_configuration +from pyop2.backends.cpu import cpu_backend + + +AVAILABLE_BACKENDS = [cpu_backend] + +if firedrake_configuration.get_config()["options"].get("cuda"): + from pyop2.backends.cuda import cuda_backend + AVAILABLE_BACKENDS.append(cuda_backend) + + +def allclose(a, b, rtol=1e-05, atol=1e-08): + """ + Prefer this routine over np.allclose(...) to allow pycuda/pyopencl arrays + """ + return bool(abs(a - b) < (atol + rtol * abs(b))) + + +@pytest.mark.parametrize("offloading_backend", AVAILABLE_BACKENDS) +def test_nonlinear_variational_solver(offloading_backend): + set_offloading_backend(offloading_backend) + mesh = UnitSquareMesh(32, 32) + V = FunctionSpace(mesh, "CG", 1) + u = TrialFunction(V) + v = TestFunction(V) + x, y = SpatialCoordinate(mesh) + + a = (inner(grad(u), grad(v)) + inner(u, v)) * dx + f = Function(V) + f.interpolate((1+8*pi*pi)*cos(x*pi*2)*cos(y*pi*2)) + L = inner(f, v) * dx + fem_soln = Function(V) + sp = {"mat_type": "matfree", + "ksp_monitor_true_residual": None, + "ksp_converged_reason": None} + with offloading(): + solve(a == L, fem_soln, solver_parameters=sp) + + f.interpolate(cos(x*pi*2)*cos(y*pi*2)) + + assert norm(fem_soln-f) < 1e-2 + + with offloading(): + assert norm(fem_soln-f) < 1e-2 + + +@pytest.mark.parametrize("offloading_backend", AVAILABLE_BACKENDS) +def test_linear_variational_solver(offloading_backend): + set_offloading_backend(offloading_backend) + mesh = UnitSquareMesh(32, 32) + V = FunctionSpace(mesh, "CG", 1) + u = TrialFunction(V) + v = TestFunction(V) + f = Function(V) + x, y = SpatialCoordinate(mesh) + f.interpolate((1+8*pi*pi)*cos(x*pi*2)*cos(y*pi*2)) + + L = assemble(inner(f, v) * dx) + fem_soln = Function(V) + + with offloading(): + + a = assemble((inner(grad(u), grad(v)) + inner(u, v)) * dx, + mat_type="matfree") + solve(a, fem_soln, L, + solver_parameters={"pc_type": "none", + "ksp_type": "cg", + "ksp_monitor": None}) + + f.interpolate(cos(x*pi*2)*cos(y*pi*2)) + + assert norm(fem_soln-f) < 1e-2 + + with offloading(): + assert norm(fem_soln-f) < 1e-2 + + +@pytest.mark.parametrize("offloading_backend", AVAILABLE_BACKENDS) +def test_data_manipulation_on_host(offloading_backend): + set_offloading_backend(offloading_backend) + + mesh = UnitSquareMesh(32, 32) + V = FunctionSpace(mesh, "CG", 1) + u = TrialFunction(V) + v = TestFunction(V) + f = Function(V) + x, y = SpatialCoordinate(mesh) + f.interpolate((1+8*pi*pi)*cos(x*pi*2)*cos(y*pi*2)) + + L = assemble(inner(f, v) * dx) + fem_soln = Function(V) + + with offloading(): + + a = assemble((inner(grad(u), grad(v)) + inner(u, v)) * dx, + mat_type="matfree") + solve(a, fem_soln, L, + solver_parameters={"pc_type": "none", + "ksp_type": "cg", + "ksp_monitor": None}) + + old_norm = norm(fem_soln) + kappa = 2.0 + fem_soln.dat.data[:] *= kappa # update data on host + + with offloading(): + new_norm = norm(fem_soln) + + allclose(kappa*old_norm, new_norm)