diff --git a/.github/workflows/docker-petsc.yml b/.github/workflows/docker-petsc.yml index 00d87874f5..a8da7a5fbc 100644 --- a/.github/workflows/docker-petsc.yml +++ b/.github/workflows/docker-petsc.yml @@ -6,7 +6,9 @@ permissions: on: push: branches: - - petsc # Push events on petsc branch + - petsc + # will be dropped after merge into petsc branch + - petscsection jobs: build-and-push: @@ -16,6 +18,9 @@ jobs: # Use buildkit DOCKER_BUILDKIT: "1" + PETSC_REPO: https://gitlab.com/ZoeLeibowitz/petsc.git + PETSC_BRANCH: zoe/feature-da-section-sf + steps: - name: Checkout devito uses: actions/checkout@v5 @@ -38,10 +43,13 @@ jobs: context: . file: docker/Dockerfile.petsc push: true + platforms: linux/amd64 tags: | devitocodes/devito-petsc:latest - build-args: base=devitocodes/devito:gcc-dev-amd64 - platforms: linux/amd64 + build-args: | + base=devitocodes/devito:gcc-dev-amd64 + PETSC_REPO=${{ env.PETSC_REPO }} + PETSC_BRANCH=${{ env.PETSC_BRANCH }} - name: Remove dangling layers run: docker system prune -f \ No newline at end of file diff --git a/.github/workflows/pytest-petsc.yml b/.github/workflows/pytest-petsc.yml index ab66de4cc0..0291624db0 100644 --- a/.github/workflows/pytest-petsc.yml +++ b/.github/workflows/pytest-petsc.yml @@ -5,8 +5,6 @@ concurrency: cancel-in-progress: true on: - # Trigger the workflow on push or pull request, - # but only for the master branch push: branches: - main @@ -50,7 +48,11 @@ jobs: - name: Build docker image run: | - docker build -f docker/Dockerfile.petsc --tag devito_petsc_image:test . + docker build \ + -f docker/Dockerfile.petsc \ + --build-arg PETSC_REPO=https://gitlab.com/ZoeLeibowitz/petsc.git \ + --build-arg PETSC_BRANCH=zoe/feature-da-section-sf \ + --tag devito_petsc_image:test . - name: Set run prefix run: | diff --git a/devito/data/decomposition.py b/devito/data/decomposition.py index 9a4ec7a486..97fefcf9da 100644 --- a/devito/data/decomposition.py +++ b/devito/data/decomposition.py @@ -450,6 +450,39 @@ def index_loc_to_glb(self, *args): else: raise TypeError("Expected 1 arguments, found %d" % len(args)) + def index_glb_to_loc_unsafe(self, glb_idx, rel=True): + """ + Convert a global index to a local index even if not owned. + WARNING: Must not be used to index data as there are no guard + rails against returning out of bound indices. + """ + if not self.loc_empty: + loc_abs_min = self.loc_abs_min - self.glb_min + loc_abs_max = self.loc_abs_max - self.glb_min + glb_max = self.glb_max - self.glb_min + else: + loc_abs_min = self.loc_abs_min + loc_abs_max = self.loc_abs_max + glb_max = self.glb_max + + glb_min = 0 + + base = loc_abs_min if rel else 0 + + # index_glb_to_loc(index) + # -> Base case, empty local subdomain + if self.loc_empty: + return None + # -> Handle negative index + if glb_idx < 0: + glb_idx = glb_max + glb_idx + 1 + # -> Do the actual conversion + if loc_abs_min <= glb_idx <= loc_abs_max or glb_min <= glb_idx <= glb_max: + return glb_idx - base + else: + # This should raise an exception when used to access a numpy.array + return glb_idx + def reshape(self, *args): """ Create a new Decomposition with extended or reduced boundary subdomains. diff --git a/devito/ir/cgen/printer.py b/devito/ir/cgen/printer.py index e4bff5de80..50fedb91cb 100644 --- a/devito/ir/cgen/printer.py +++ b/devito/ir/cgen/printer.py @@ -18,6 +18,7 @@ from devito.symbolics.inspection import has_integer_args, sympy_dtype from devito.symbolics.queries import q_leaf from devito.types.basic import AbstractFunction +from devito.types.misc import PostIncrementIndex from devito.tools import ctypes_to_cstr, dtype_to_ctype, ctypes_vector_mapper __all__ = ['BasePrinter', 'ccode'] @@ -148,8 +149,13 @@ def _print_Indexed(self, expr): -------- U[t,x,y,z] -> U[t][x][y][z] """ - inds = ''.join(['[' + self._print(x) + ']' for x in expr.indices]) - return f'{self._print(expr.base.label)}{inds}' + inds = [] + for i in expr.indices: + if isinstance(i, PostIncrementIndex): + inds.append(f"[{self._print(i)}++]") + else: + inds.append(f"[{self._print(i)}]") + return f"{self._print(expr.base.label)}{''.join(inds)}" def _print_FIndexed(self, expr): """ diff --git a/devito/ir/equations/equation.py b/devito/ir/equations/equation.py index f4fe81bcad..f5b3221c9f 100644 --- a/devito/ir/equations/equation.py +++ b/devito/ir/equations/equation.py @@ -107,10 +107,10 @@ def detect(cls, expr): ReduceMin: OpMin, PetscEq: OpPetsc } - try: - return reduction_mapper[type(expr)] - except KeyError: - pass + + for expr_type, op in reduction_mapper.items(): + if isinstance(expr, expr_type): + return op # NOTE: in the future we might want to track down other kinds # of operations here (e.g., memcpy). However, we don't care for diff --git a/devito/ir/iet/nodes.py b/devito/ir/iet/nodes.py index d4e1893638..84f89a18dc 100644 --- a/devito/ir/iet/nodes.py +++ b/devito/ir/iet/nodes.py @@ -1148,8 +1148,8 @@ def expr_symbols(self): ret.extend([self.pointer._C_symbol, self.pointee._C_symbol]) else: ret.extend([self.pointer, self.pointee.indexed]) - ret.extend(flatten(i.free_symbols - for i in self.pointee.symbolic_shape[1:])) + ret.extend(flatten(i.free_symbols + for i in self.pointee.symbolic_shape[1:])) else: assert False, f"Unexpected pointer type {type(self.pointer)}" diff --git a/devito/operator/operator.py b/devito/operator/operator.py index 03311ecaaf..8faa7b776e 100644 --- a/devito/operator/operator.py +++ b/devito/operator/operator.py @@ -42,6 +42,7 @@ from devito.types.dimension import Thickness from devito.petsc.iet.passes import lower_petsc from devito.petsc.clusters import petsc_preprocess +from devito.petsc.equations import lower_exprs_petsc __all__ = ['Operator'] @@ -368,6 +369,8 @@ def _lower_exprs(cls, expressions, **kwargs): # in particular uniqueness across expressions is ensured expressions = concretize_subdims(expressions, **kwargs) + expressions = lower_exprs_petsc(expressions, **kwargs) + processed = [LoweredEq(i) for i in expressions] return processed diff --git a/devito/petsc/config.py b/devito/petsc/config.py index e743b0bba4..e4b31f8c7d 100644 --- a/devito/petsc/config.py +++ b/devito/petsc/config.py @@ -44,7 +44,8 @@ def core_metadata(): petsc_lib = tuple([arch / 'lib' for arch in petsc_dir]) return { - 'includes': ('petscsnes.h', 'petscdmda.h'), + # TODO: Only add petscsection header when needed + 'includes': ('petscsnes.h', 'petscdmda.h', 'petscsection.h'), 'include_dirs': petsc_include, 'libs': ('petsc'), 'lib_dirs': petsc_lib, @@ -52,6 +53,11 @@ def core_metadata(): } +# Maximum number of bytes (including the null terminator) reserved for a +# KSPType string in the profiler struct. +KSPTYPE_MAX_LEN = 64 + + def get_petsc_type_mappings(): try: petsc_precision = petsc_variables['PETSC_PRECISION'] @@ -73,7 +79,11 @@ def get_petsc_type_mappings(): petsc_type_to_ctype = {v: k for k, v in printer_mapper.items()} # Add other PETSc types petsc_type_to_ctype.update({ - 'KSPType': ctypes.c_char_p, + # KSPType is `const char*` into KSP-owned memory. Using c_char_p + # would dereference that pointer when Python reads the struct, which + # segfaults after SNESDestroy frees the KSP. An inline char array + # (c_char * N) stores a copy, so it is safe to read at any time. + 'KSPType': ctypes.c_char * KSPTYPE_MAX_LEN, 'KSPConvergedReason': petsc_type_to_ctype['PetscInt'], 'KSPNormType': petsc_type_to_ctype['PetscInt'], }) diff --git a/devito/petsc/equations.py b/devito/petsc/equations.py new file mode 100644 index 0000000000..dc1f04a36e --- /dev/null +++ b/devito/petsc/equations.py @@ -0,0 +1,100 @@ +from devito.symbolics import retrieve_indexed, retrieve_dimensions, uxreplace +from devito.types.dimension import SpaceDimension, CustomDimension +from devito import Min, Max + +from devito.petsc.types.equation import ConstrainBC +from devito.petsc.types.dimension import ( + SubDimMax, SubDimMin, + SpaceDimMax, SpaceDimMin, +) + + +def lower_exprs_petsc(expressions, **kwargs): + + # Process `ConstrainBC` equations + expressions = constrain_essential_bcs(expressions, **kwargs) + + return expressions + + +def constrain_essential_bcs(expressions, **kwargs): + """ + """ + constrain_expressions = [e for e in expressions if isinstance(e, ConstrainBC)] + if not constrain_expressions: + return expressions + + sregistry = kwargs.get('sregistry') + new_exprs = [] + + # TODO: rethink + halo_size = {e.target.function._size_halo for e in constrain_expressions} + assert len(halo_size) == 1 + halo_size = halo_size.pop() + + all_dims = {d for e in constrain_expressions for d in extract_dims(e)} + subdims = [d for d in all_dims if d.is_Sub and not d.local] + space_dims = [d for d in all_dims if isinstance(d, SpaceDimension)] + + mapper = {} + + for d in subdims: + halo = halo_size[d] + + subdim_max = SubDimMax( + sregistry.make_name(prefix=f"{d.name}_max"), subdim=d + ) + subdim_min = SubDimMin( + sregistry.make_name(prefix=f"{d.name}_min"), subdim=d + ) + + mapper[d] = CustomDimension( + name=d.name, + symbolic_min=Max(subdim_min, d.parent.symbolic_min - halo.left), + symbolic_max=Min(subdim_max, d.parent.symbolic_max + halo.right), + ) + + for d in space_dims: + halo = halo_size[d] + space_dim_max = SpaceDimMax( + sregistry.make_name(prefix=f"{d.name}_max"), space_dim=d + ) + space_dim_min = SpaceDimMin( + sregistry.make_name(prefix=f"{d.name}_min"), space_dim=d + ) + + mapper[d] = CustomDimension( + name=sregistry.make_name(prefix=f"{d.name}_expanded"), + symbolic_min=Max(space_dim_min, d.symbolic_min - halo.left), + symbolic_max=Min(space_dim_max, d.symbolic_max + halo.right), + ) + + # Apply mapper to expressions + for e in expressions: + if not isinstance(e, ConstrainBC): + new_exprs.append(e) + continue + + dims = extract_dims(e) + if not dims: + new_exprs.append(e) + continue + + new_e = uxreplace(e, mapper) + + if e.implicit_dims: + new_e = new_e._rebuild( + implicit_dims=tuple(mapper.get(d, d) for d in e.implicit_dims) + ) + new_exprs.append(new_e) + return new_exprs + + +def extract_dims(expr): + indexeds = retrieve_indexed(expr) + dims = retrieve_dimensions( + [i for j in indexeds for i in j.indices], + mode="unique", + ) + dims.update(expr.implicit_dims) + return dims diff --git a/devito/petsc/iet/builder.py b/devito/petsc/iet/builder.py index e1c178c059..dc244f6371 100644 --- a/devito/petsc/iet/builder.py +++ b/devito/petsc/iet/builder.py @@ -1,8 +1,9 @@ import math +from functools import cached_property from devito.ir.iet import DummyExpr, BlankLine from devito.symbolics import (Byref, FieldFromPointer, VOID, - FieldFromComposite, Null) + FieldFromComposite, Null, String) from devito.petsc.iet.nodes import ( FormFunctionCallback, MatShellSetOp, PETScCall, petsc_call @@ -22,7 +23,10 @@ def __init__(self, **kwargs): self.callback_builder = kwargs.get('callback_builder') self.field_data = self.inject_solve.expr.rhs.field_data self.formatted_prefix = self.inject_solve.expr.rhs.formatted_prefix - self.calls = self._setup() + + @cached_property + def calls(self): + return self._setup() @property def snes_ctx(self): @@ -82,7 +86,7 @@ def _setup(self): snes_get_ksp = petsc_call('SNESGetKSP', [sobjs['snes'], Byref(sobjs['ksp'])]) - matvec = self.callback_builder.main_matvec_callback + matvec = self.callback_builder.main_matvec_efunc matvec_operation = petsc_call( 'MatShellSetOperation', [sobjs['Jac'], 'MATOP_MULT', MatShellSetOp(matvec.name, void, void)] @@ -100,17 +104,9 @@ def _setup(self): dmda_calls = self._create_dmda_calls(dmda) - mainctx = sobjs['userctx'] - - call_struct_callback = petsc_call( - self.callback_builder.user_struct_callback.name, [Byref(mainctx)] - ) - # TODO: maybe don't need to explictly set this mat_set_dm = petsc_call('MatSetDM', [sobjs['Jac'], dmda]) - calls_set_app_ctx = petsc_call('DMSetApplicationContext', [dmda, Byref(mainctx)]) - base_setup = dmda_calls + ( snes_create, snes_options_prefix, @@ -126,9 +122,7 @@ def _setup(self): matvec_operation, formfunc_operation, snes_set_options, - call_struct_callback, mat_set_dm, - calls_set_app_ctx, BlankLine ) extended_setup = self._extend_setup() @@ -142,9 +136,26 @@ def _extend_setup(self): def _create_dmda_calls(self, dmda): dmda_create = self._create_dmda(dmda) + # TODO: probs need to set the dm options prefix the same as snes? + dm_set_from_opts = petsc_call('DMSetFromOptions', [dmda]) dm_setup = petsc_call('DMSetUp', [dmda]) dm_mat_type = petsc_call('DMSetMatType', [dmda, 'MATSHELL']) - return dmda_create, dm_setup, dm_mat_type + mainctx = self.solver_objs['userctx'] + + call_struct_callback = petsc_call( + self.callback_builder.user_struct_efunc.name, [Byref(mainctx)] + ) + + calls_set_app_ctx = petsc_call('DMSetApplicationContext', [dmda, Byref(mainctx)]) + + return ( + dmda_create, + dm_set_from_opts, + dm_setup, + dm_mat_type, + call_struct_callback, + calls_set_app_ctx + ) def _create_dmda(self, dmda): sobjs = self.solver_objs @@ -223,7 +234,7 @@ def _setup(self): snes_get_ksp = petsc_call('SNESGetKSP', [sobjs['snes'], Byref(sobjs['ksp'])]) - matvec = self.callback_builder.main_matvec_callback + matvec = self.callback_builder.main_matvec_efunc matvec_operation = petsc_call( 'MatShellSetOperation', [sobjs['Jac'], 'MATOP_MULT', MatShellSetOp(matvec.name, void, void)] @@ -244,14 +255,12 @@ def _setup(self): mainctx = sobjs['userctx'] call_struct_callback = petsc_call( - self.callback_builder.user_struct_callback.name, [Byref(mainctx)] + self.callback_builder.user_struct_efunc.name, [Byref(mainctx)] ) # TODO: maybe don't need to explictly set this mat_set_dm = petsc_call('MatSetDM', [sobjs['Jac'], dmda]) - calls_set_app_ctx = petsc_call('DMSetApplicationContext', [dmda, Byref(mainctx)]) - create_field_decomp = petsc_call( 'DMCreateFieldDecomposition', [dmda, Byref(sobjs['nfields']), Null, Byref(sobjs['fields']), @@ -324,7 +333,6 @@ def _setup(self): snes_set_options, call_struct_callback, mat_set_dm, - calls_set_app_ctx, create_field_decomp, matop_create_submats_op, call_coupled_struct_callback, @@ -334,6 +342,89 @@ def _setup(self): return coupled_setup +class ConstrainedBCMixin: + """ + """ + def _create_dmda_calls(self, dmda): + sobjs = self.solver_objs + mainctx = sobjs['userctx'] + + dmda_create = self._create_dmda(dmda) + + # TODO: likely need to set the dm options prefix the same as snes? + # likely shouldn't hardcode this option like this.. (should be set in the options + # callback) + da_create_section = petsc_call( + 'PetscOptionsSetValue', [Null, String("-da_use_section"), Null] + ) + dm_set_from_opts = petsc_call('DMSetFromOptions', [dmda]) + dm_setup = petsc_call('DMSetUp', [dmda]) + dm_mat_type = petsc_call('DMSetMatType', [dmda, 'MATSHELL']) + + targets = self.field_data.targets + count_bcs = petsc_call( + self.callback_builder._count_bc_efunc.name, + [dmda] + [Byref(sobjs[f'numBC_{t.name}']) for t in targets] + ) + + set_point_bcs = petsc_call( + self.callback_builder._set_point_bc_efunc.name, + [dmda] + [sobjs[f'numBC_{t.name}'] for t in targets] + ) + + get_local_section = petsc_call( + 'DMGetLocalSection', [dmda, Byref(sobjs['lsection'])] + ) + + get_point_sf = petsc_call('DMGetPointSF', [dmda, Byref(sobjs['sf'])]) + + create_global_section = petsc_call( + 'PetscSectionCreateGlobalSection', + [sobjs['lsection'], sobjs['sf'], 'PETSC_TRUE', 'PETSC_FALSE', 'PETSC_FALSE', + Byref(sobjs['gsection'])] + ) + + dm_set_global_section = petsc_call( + 'DMSetGlobalSection', [dmda, sobjs['gsection']] + ) + + dm_create_section_sf = petsc_call( + 'DMCreateSectionSF', [dmda, sobjs['lsection'], sobjs['gsection']] + ) + + call_struct_callback = petsc_call( + self.callback_builder.user_struct_efunc.name, [Byref(mainctx)] + ) + + calls_set_app_ctx = petsc_call('DMSetApplicationContext', [dmda, Byref(mainctx)]) + + return ( + dmda_create, + da_create_section, + dm_set_from_opts, + dm_setup, + dm_mat_type, + call_struct_callback, + calls_set_app_ctx, + count_bcs, + set_point_bcs, + get_local_section, + get_point_sf, + create_global_section, + dm_set_global_section, + dm_create_section_sf + ) + + +class ConstrainedBCBuilder(ConstrainedBCMixin, BuilderBase): + pass + + +# TODO: Implement this class properly +class CoupledConstrainedBCBuilder(ConstrainedBCMixin, CoupledBuilder): + pass + + def petsc_call_mpi(specific_call, call_args): return PETScCall('PetscCallMPI', [PETScCall(specific_call, arguments=call_args)]) diff --git a/devito/petsc/iet/callbacks.py b/devito/petsc/iet/callbacks.py index 4d06063242..b57f1f66ed 100644 --- a/devito/petsc/iet/callbacks.py +++ b/devito/petsc/iet/callbacks.py @@ -9,11 +9,15 @@ ) from devito.symbolics.unevaluation import Mul from devito.types.basic import AbstractFunction +from devito.types.misc import PostIncrementIndex from devito.types import Dimension, Temp, TempArray from devito.tools import filter_ordered +from devito.passes.iet.linearization import Stride from devito.petsc.iet.nodes import PETScCallable, MatShellSetOp, petsc_call -from devito.petsc.types import DMCast, MainUserStruct, CallbackUserStruct +from devito.petsc.types import ( + DMCast, MainUserStruct, CallbackUserStruct, PetscObjectCast, PetscInt +) from devito.petsc.iet.type_builder import objs from devito.petsc.types.macros import petsc_func_begin_user from devito.petsc.types.modes import InsertMode @@ -27,25 +31,26 @@ def __init__(self, **kwargs): self.rcompile = kwargs.get('rcompile', None) self.sregistry = kwargs.get('sregistry', None) + self.options = kwargs.get('options', {}) self.concretize_mapper = kwargs.get('concretize_mapper', {}) self.time_dependence = kwargs.get('time_dependence') self.objs = kwargs.get('objs') self.solver_objs = kwargs.get('solver_objs') self.inject_solve = kwargs.get('inject_solve') self.solve_expr = self.inject_solve.expr.rhs - - self._efuncs = OrderedDict() self._struct_params = [] + self._efuncs = OrderedDict() self._set_options_efunc = None self._clear_options_efunc = None - self._main_matvec_callback = None - self._user_struct_callback = None + self._main_matvec_efunc = None + self._user_struct_efunc = None self._F_efunc = None self._b_efunc = None - + self._count_bc_efunc = None + self._set_point_bc_efunc = None self._J_efuncs = [] - self._initial_guesses = [] + self._initial_guess_efuncs = [] self._make_core() self._efuncs = self._uxreplace_efuncs() @@ -63,7 +68,7 @@ def filtered_struct_params(self): return filter_ordered(self.struct_params) @property - def main_matvec_callback(self): + def main_matvec_efunc(self): """ The matrix-vector callback for the full Jacobian. This is the function set in the main Kernel via: @@ -83,12 +88,8 @@ def J_efuncs(self): return self._J_efuncs @property - def initial_guesses(self): - return self._initial_guesses - - @property - def user_struct_callback(self): - return self._user_struct_callback + def user_struct_efunc(self): + return self._user_struct_efunc @property def solver_parameters(self): @@ -112,12 +113,19 @@ def target(self): def _make_core(self): self._make_options_callback() + # Make the mat-vec callback to form the matfree Jacobian self._make_matvec(self.field_data.jacobian) + # Make the residual callback self._make_formfunc() + # Make the RHS callback self._make_formrhs() + # Make the initial guess callback if self.field_data.initial_guess.exprs: self._make_initial_guess() - self._make_user_struct_callback() + # Make the callback used to constrain boundary nodes + if self.field_data.constrain_bc: + self._make_constrain_bc() + self._make_user_struct_efunc() def _make_petsc_callable(self, prefix, body, parameters=()): return PETScCallable( @@ -578,7 +586,7 @@ def _make_initial_guess(self): cb = self._make_petsc_callable( 'FormInitialGuess', body, parameters=(sobjs['callbackdm'], objs['xloc']) ) - self._initial_guesses.append(cb) + self._initial_guess_efuncs.append(cb) self._efuncs[cb.name] = cb def _create_initial_guess_body(self, body): @@ -637,7 +645,167 @@ def _create_initial_guess_body(self, body): return Uxreplace(subs).visit(body) - def _make_user_struct_callback(self): + def _make_constrain_bc(self): + """ + Constructs the `CountBCs` and `SetPointBCs` efuncs. Works for both + single- and multi-field: all fields' expressions are compiled together + (clustering may fuse loops) and a single callback is emitted for each. + """ + constrain_bc = self.field_data.constrain_bc + sobjs = self.solver_objs + + # Normalize to dict {target: ConstrainBC} + if isinstance(constrain_bc, dict): + constrain_bc_dict = constrain_bc + else: + constrain_bc_dict = {self.field_data.target: constrain_bc} + targets = list(constrain_bc_dict.keys()) + + all_increment_exprs = [ + e for t in targets for e in constrain_bc_dict[t].increment_exprs + ] + irs0, _ = self.rcompile( + all_increment_exprs, options={'mpi': False}, + sregistry=self.sregistry, concretize_mapper=self.concretize_mapper + ) + all_point_bc_exprs = [ + e for t in targets for e in constrain_bc_dict[t].point_bc_exprs + ] + irs1, _ = self.rcompile( + all_point_bc_exprs, options={'mpi': False}, + sregistry=self.sregistry, concretize_mapper=self.concretize_mapper + ) + + pairs = [ + (sobjs[f'numBCPtr_{t.name}'], constrain_bc_dict[t].counter) + for t in targets + ] + count_bc_body = self._create_count_bc_body(List(body=irs0.uiet.body), pairs) + set_point_bc_body = self._create_set_point_bc_body( + List(body=irs1.uiet.body), constrain_bc_dict + ) + + numBCPtr_params = tuple(sobjs[f'numBCPtr_{t.name}'] for t in targets) + numBC_params = tuple(sobjs[f'numBC_{t.name}'] for t in targets) + + cb0 = self._make_petsc_callable( + 'CountBCs', count_bc_body, + parameters=(sobjs['callbackdm'],) + numBCPtr_params + ) + cb1 = self._make_petsc_callable( + 'SetPointBCs', set_point_bc_body, + parameters=(sobjs['callbackdm'],) + numBC_params + ) + self._count_bc_efunc = cb0 + self._set_point_bc_efunc = cb1 + self._efuncs[cb0.name] = cb0 + self._efuncs[cb1.name] = cb1 + + def _create_count_bc_body(self, body, pairs): + """ + Generic CountBCs body. `pairs` is a list of (numBCPtr, counter) tuples, + one per field. All fields are handled in a single callback body. + """ + objs = self.objs + sobjs = self.solver_objs + + dmda = sobjs['callbackdm'] + ctx = objs['dummyctx'] + + body = self.time_dependence.uxreplace_time(body) + + fields = get_user_struct_fields(body) + self._struct_params.extend(fields) + + dm_get_app_context = petsc_call( + 'DMGetApplicationContext', [dmda, Byref(ctx._C_symbol)] + ) + + deref_ptrs = tuple( + DummyExpr(counter, Deref(numBCPtr)) for numBCPtr, counter in pairs + ) + move_ptrs = tuple( + DummyExpr(Deref(numBCPtr), counter) for numBCPtr, counter in pairs + ) + + struct_definition = [Definition(ctx), dm_get_app_context] + + body = body._rebuild(body.body + move_ptrs) + + body = self._make_callable_body( + body, standalones=struct_definition, stacks=deref_ptrs + ) + subs = {i._C_symbol: FieldFromPointer(i._C_symbol, ctx) for + i in fields if not isinstance(i.function, AbstractFunction)} + + return Uxreplace(subs).visit(body) + + def _create_set_point_bc_body(self, body, constrain_bc_dict): + """Single-field SetPointBCs body. `constrain_bc_dict` has one entry.""" + (target, constrain_bc), = constrain_bc_dict.items() + tname = target.name + linsolve_expr = self.inject_solve.expr.rhs + objs = self.objs + sobjs = self.solver_objs + + dmda = sobjs['callbackdm'] + ctx = objs['dummyctx'] + + dm_get_local_info = petsc_call( + 'DMDAGetLocalInfo', [dmda, Byref(linsolve_expr.localinfo)] + ) + + body = self.time_dependence.uxreplace_time(body) + + fields = get_user_struct_fields(body) + self._struct_params.extend(fields) + + dm_get_app_context = petsc_call( + 'DMGetApplicationContext', [dmda, Byref(ctx._C_symbol)] + ) + petsc_obj_comm = Call('PetscObjectComm', arguments=[PetscObjectCast(dmda)]) + is_create_general = petsc_call( + 'ISCreateGeneral', + [petsc_obj_comm, sobjs[f'numBC_{tname}'], sobjs[f'bcPointsArr_{tname}'], + 'PETSC_OWN_POINTER', Byref(sobjs['bcPointsIS'])] + ) + malloc_bc_points_arr = petsc_call( + 'PetscMalloc1', + [sobjs[f'numBC_{tname}'], Byref(sobjs[f'bcPointsArr_{tname}']._C_symbol)] + ) + malloc_bc_points = petsc_call( + 'PetscMalloc1', [1, Byref(sobjs['bcPoints']._C_symbol)] + ) + dummy_expr = DummyExpr(sobjs['bcPoints'].indexed[0], sobjs['bcPointsIS']) + set_point_bc = petsc_call( + 'DMDASetPointBC', [dmda, 1, sobjs['bcPoints'], Null] + ) + body = body._rebuild( + body=( + (malloc_bc_points_arr,) + + body.body + + (is_create_general, malloc_bc_points, dummy_expr, set_point_bc,) + ) + ) + + derefs = dereference_funcs(ctx, fields) + standalones = [ + Definition(ctx), + dm_get_app_context, + Definition(sobjs[f'k_iter_{tname}']) + ] + body = self._make_callable_body( + body, standalones=standalones, stacks=(dm_get_local_info,) + derefs + ) + + subs = {i._C_symbol: FieldFromPointer(i._C_symbol, ctx) for + i in fields if not isinstance(i.function, AbstractFunction)} + subs[constrain_bc.counter._C_symbol] = \ + sobjs[f'bcPointsArr_{tname}'].indexed[sobjs[f'k_iter_{tname}']] + + return Uxreplace(subs).visit(body) + + def _make_user_struct_efunc(self): """ This is the struct initialised inside the main kernel and attached to the DM via DMSetApplicationContext. @@ -660,7 +828,7 @@ def _make_user_struct_callback(self): parameters=[mainctx] ) self._efuncs[cb.name] = cb - self._user_struct_callback = cb + self._user_struct_efunc = cb def _uxreplace_efuncs(self): sobjs = self.solver_objs @@ -682,24 +850,124 @@ def _uxreplace_efuncs(self): class CoupledCallbackBuilder(BaseCallbackBuilder): def __init__(self, **kwargs): self._submatrices_callback = None + self._destroy_submat_callback = None super().__init__(**kwargs) @property def submatrices_callback(self): return self._submatrices_callback + def _create_set_point_bc_body(self, body, _constrain_bc_dict): + return self._create_set_point_bc_body_coupled(body) + + def _create_set_point_bc_body_coupled(self, body): + """ + Combined SetPointBCs body for all target fields. The body is compiled + from all fields' point_bc_exprs together (loops may be fused by + clustering). Per-field counter symbols are substituted with the + corresponding bcPointsArr[k_iter] after assembly. + """ + linsolve_expr = self.inject_solve.expr.rhs + objs = self.objs + sobjs = self.solver_objs + constrain_bc = self.field_data.constrain_bc + targets = self.field_data.targets + nfields = len(targets) + dmda = sobjs['callbackdm'] + ctx = objs['dummyctx'] + + dm_get_local_info = petsc_call( + 'DMDAGetLocalInfo', [dmda, Byref(linsolve_expr.localinfo)] + ) + dm_get_app_context = petsc_call( + 'DMGetApplicationContext', [dmda, Byref(ctx._C_symbol)] + ) + petsc_obj_comm = Call('PetscObjectComm', arguments=[PetscObjectCast(dmda)]) + + body = self.time_dependence.uxreplace_time(body) + fields = get_user_struct_fields(body) + self._struct_params.extend(fields) + + bcPointsIS = sobjs['bcPointsIS'] + bcCompsIS = sobjs['bcCompsIS'] + + # Zero-initialise IS arrays (PetscCalloc1 sets pointers to NULL so + # the automatic ISDestroy cleanup is safe even on early exit) + is_array_mallocs = ( + petsc_call('PetscCalloc1', [nfields, Byref(bcPointsIS._C_symbol)]), + petsc_call('PetscCalloc1', [nfields, Byref(bcCompsIS._C_symbol)]), + ) + bc_arr_mallocs = tuple( + petsc_call('PetscMalloc1', + [sobjs[f'numBC_{t.name}'], + Byref(sobjs[f'bcPointsArr_{t.name}']._C_symbol)]) + for t in targets + ) + + is_creates, comp_creates = [], [] + for i, t in enumerate(targets): + tname = t.name + is_creates.append(petsc_call( + 'ISCreateGeneral', + [petsc_obj_comm, sobjs[f'numBC_{tname}'], + sobjs[f'bcPointsArr_{tname}'], + 'PETSC_OWN_POINTER', Byref(bcPointsIS.indexed[i])] + )) + comp_arr = PetscInt(name=f'comp{i}', initvalue=i) + comp_creates.append(petsc_call( + 'ISCreateGeneral', + [petsc_obj_comm, 1, Byref(comp_arr), + 'PETSC_COPY_VALUES', Byref(bcCompsIS.indexed[i])] + )) + + set_point_bc = petsc_call( + 'DMDASetPointBC', [dmda, nfields, bcPointsIS, bcCompsIS] + ) + + body = body._rebuild(body=( + is_array_mallocs + + bc_arr_mallocs + + body.body + + tuple(is_creates) + + tuple(comp_creates) + + (set_point_bc,) + )) + + derefs = dereference_funcs(ctx, fields) + k_defs = [Definition(sobjs[f'k_iter_{t.name}']) for t in targets] + comp_defs = [ + Definition(PetscInt(name=f'comp{i}', initvalue=i)) + for i in range(nfields) + ] + standalones = [Definition(ctx), dm_get_app_context] + k_defs + comp_defs + + body = self._make_callable_body( + body, standalones=standalones, + stacks=(dm_get_local_info,) + derefs + ) + + # Struct substitutions + per-field counter → bcArr[k_iter] + subs = {i._C_symbol: FieldFromPointer(i._C_symbol, ctx) for + i in fields if not isinstance(i.function, AbstractFunction)} + for t in targets: + tname = t.name + subs[constrain_bc[t].counter._C_symbol] = \ + sobjs[f'bcPointsArr_{tname}'].indexed[sobjs[f'k_iter_{tname}']] + + return Uxreplace(subs).visit(body) + @property def jacobian(self): return self.inject_solve.expr.rhs.field_data.jacobian @property - def main_matvec_callback(self): + def main_matvec_efunc(self): """ This is the matrix-vector callback associated with the whole Jacobian i.e is set in the main kernel via `PetscCall(MatShellSetOperation(J,MATOP_MULT,(void (*)(void))MyMatShellMult));` """ - return self._main_matvec_callback + return self._main_matvec_efunc def _make_core(self): for sm in self.field_data.jacobian.nonzero_submatrices: @@ -708,7 +976,10 @@ def _make_core(self): self._make_options_callback() self._make_whole_matvec() self._make_whole_formfunc() - self._make_user_struct_callback() + if self.field_data.constrain_bc: + self._make_constrain_bc() + self._make_user_struct_efunc() + self._create_destroy_submatrix() self._create_submatrices() self._efuncs['PopulateMatContext'] = self.objs['dummyefunc'] @@ -720,7 +991,7 @@ def _make_whole_matvec(self): cb = self._make_petsc_callable( 'WholeMatMult', List(body=body), parameters=parameters ) - self._main_matvec_callback = cb + self._main_matvec_efunc = cb self._efuncs[cb.name] = cb def _whole_matvec_body(self): @@ -908,6 +1179,28 @@ def _whole_formfunc_body(self, body): return Uxreplace(subs).visit(formfunc_body) + def _create_destroy_submatrix(self): + # Need a special destroy because each submatrix has a manually + # PetscMalloc'ed context attached via MatShellSetContext + + objs = self.objs + + get_ctx = petsc_call( + 'MatShellGetContext', [objs['J'], Byref(objs['subctx'])] + ) + + free_ctx = petsc_call( + 'PetscFree', [objs['subctx']] + ) + + body = self._make_callable_body((get_ctx, free_ctx)) + + cb = self._make_petsc_callable( + 'DestroySubMatrixCtx', body, parameters=(objs['J'])) + + self._destroy_submat_callback = cb + self._efuncs[cb.name] = cb + def _create_submatrices(self): body = self._submat_callback_body() objs = self.objs @@ -947,22 +1240,53 @@ def _submat_callback_body(self): dm_get_info = petsc_call( 'DMDAGetInfo', [ - sobjs['callbackdm'], Null, Byref(sobjs['M']), Byref(sobjs['N']), + sobjs['callbackdm'], Null, Null, Null, Null, Null, Null, Null, Byref(objs['dof']), Null, Null, Null, Null, Null ] ) - subblock_rows = DummyExpr(objs['subblockrows'], Mul(sobjs['M'], sobjs['N'])) - subblock_cols = DummyExpr(objs['subblockcols'], Mul(sobjs['M'], sobjs['N'])) ptr = DummyExpr( objs['submat_arr']._C_symbol, Deref(objs['Submats']._C_symbol), init=True ) - mat_create = petsc_call('MatCreate', [sobjs['comm'], Byref(objs['block'])]) + i = Dimension(name='i') + tvec = sobjs['tmpvec'] + + row_idx = DummyExpr(objs['rowidx'], IntDiv(i, objs['dof'])) + col_idx = DummyExpr(objs['colidx'], Mod(i, objs['dof'])) + + # Query global and local sizes from each sub-DM via a temporary Vec. + get_row_vec = petsc_call( + 'DMGetGlobalVector', [objs['Subdms'].indexed[objs['rowidx']], Byref(tvec)] + ) + get_row_size = petsc_call( + 'VecGetSize', [tvec, Byref(objs['subblockrows'])] + ) + get_row_local_size = petsc_call( + 'VecGetLocalSize', [tvec, Byref(objs['sublocalrows'])] + ) + restore_row_vec = petsc_call( + 'DMRestoreGlobalVector', [objs['Subdms'].indexed[objs['rowidx']], Byref(tvec)] + ) + get_col_vec = petsc_call( + 'DMGetGlobalVector', [objs['Subdms'].indexed[objs['colidx']], Byref(tvec)] + ) + get_col_size = petsc_call( + 'VecGetSize', [tvec, Byref(objs['subblockcols'])] + ) + get_col_local_size = petsc_call( + 'VecGetLocalSize', [tvec, Byref(objs['sublocalcols'])] + ) + restore_col_vec = petsc_call( + 'DMRestoreGlobalVector', [objs['Subdms'].indexed[objs['colidx']], Byref(tvec)] + ) + + petsc_obj_comm = Call('PetscObjectComm', arguments=[PetscObjectCast(objs['J'])]) + mat_create = petsc_call('MatCreate', [petsc_obj_comm, Byref(objs['block'])]) mat_set_sizes = petsc_call( 'MatSetSizes', [ - objs['block'], 'PETSC_DECIDE', 'PETSC_DECIDE', + objs['block'], objs['sublocalrows'], objs['sublocalcols'], objs['subblockrows'], objs['subblockcols'] ] ) @@ -970,10 +1294,6 @@ def _submat_callback_body(self): mat_set_type = petsc_call('MatSetType', [objs['block'], 'MATSHELL']) malloc = petsc_call('PetscMalloc1', [1, Byref(objs['subctx'])]) - i = Dimension(name='i') - - row_idx = DummyExpr(objs['rowidx'], IntDiv(i, objs['dof'])) - col_idx = DummyExpr(objs['colidx'], Mod(i, objs['dof'])) deref_subdm = Dereference(objs['Subdms'], objs['ljacctx']) @@ -996,22 +1316,42 @@ def _submat_callback_body(self): set_ctx = petsc_call('MatShellSetContext', [objs['block'], objs['subctx']]) + destroy_cb = self._destroy_submat_callback.name + + set_destroy_mat_op = petsc_call( + 'MatShellSetOperation', + [ + objs['block'], + 'MATOP_DESTROY', + MatShellSetOp(destroy_cb, VOID._dtype, VOID._dtype), + ], + ) + mat_setup = petsc_call('MatSetUp', [objs['block']]) assign_block = DummyExpr(objs['submat_arr'].indexed[i], objs['block']) iter_body = ( + row_idx, + col_idx, + get_row_vec, + get_row_size, + get_row_local_size, + restore_row_vec, + get_col_vec, + get_col_size, + get_col_local_size, + restore_col_vec, mat_create, mat_set_sizes, mat_set_type, malloc, - row_idx, - col_idx, set_rows, set_cols, dm_set_ctx, matset_dm, set_ctx, + set_destroy_mat_op, mat_setup, assign_block ) @@ -1040,8 +1380,6 @@ def _submat_callback_body(self): mat_get_dm, dm_get_app, dm_get_info, - subblock_rows, - subblock_cols, ptr, BlankLine, iteration, @@ -1120,7 +1458,7 @@ def zero_vector(vec): def get_user_struct_fields(iet): fields = [f.function for f in FindSymbols('basics').visit(iet)] from devito.types.basic import LocalType - avoid = (Temp, TempArray, LocalType) + avoid = (Temp, TempArray, LocalType, PostIncrementIndex, Stride) fields = [f for f in fields if not isinstance(f.function, avoid)] fields = [ f for f in fields if not (f.is_Dimension and not (f.is_Time or f.is_Modulo)) diff --git a/devito/petsc/iet/logging.py b/devito/petsc/iet/logging.py index 65e2ec2be8..018efe121c 100644 --- a/devito/petsc/iet/logging.py +++ b/devito/petsc/iet/logging.py @@ -6,7 +6,8 @@ from devito.tools import frozendict from devito.petsc.iet.nodes import petsc_call -from devito.petsc.logging import petsc_return_variable_dict, PetscInfo +from devito.petsc.logging import petsc_return_variable_dict, PetscInfo, KSPTYPE_MAX_LEN +from devito.petsc.types import KSPType class PetscLogger: @@ -93,10 +94,22 @@ def calls(self): petsc_call(return_variable.name, [input] + by_ref_output) ) # TODO: Perform a PetscCIntCast here? - exprs = [ - DummyExpr(FieldFromPointer(i._C_symbol, struct), i._C_symbol) - for i in output_params - ] - calls.extend(exprs) + for i in output_params: + if isinstance(i, KSPType): + # KSPType is `const char*` into KSP-owned memory. After + # SNESDestroy that pointer is invalid, so we copy the string + # into the inline char buffer in the profiler struct instead + # of storing the raw pointer. + calls.append( + petsc_call('PetscStrncpy', [ + FieldFromPointer(i._C_symbol, struct), + i._C_symbol, + KSPTYPE_MAX_LEN + ]) + ) + else: + calls.append( + DummyExpr(FieldFromPointer(i._C_symbol, struct), i._C_symbol) + ) return tuple(calls) diff --git a/devito/petsc/iet/passes.py b/devito/petsc/iet/passes.py index 5154afe43d..695996f67b 100644 --- a/devito/petsc/iet/passes.py +++ b/devito/petsc/iet/passes.py @@ -8,8 +8,10 @@ FindSymbols, DummyExpr, Uxreplace, Dereference ) from devito.symbolics import Byref, Macro, Null, FieldFromPointer -from devito.types.basic import DataSymbol +from devito.types.basic import DataSymbol, LocalType +from devito.types.misc import FIndexed import devito.logger +from devito.passes.iet.linearization import linearize_accesses, Tracker from devito.petsc.types import ( MultipleFieldData, Initialize, Finalize, ArgvSymbol, MainUserStruct, @@ -22,8 +24,14 @@ BaseCallbackBuilder, CoupledCallbackBuilder, populate_matrix_context, get_user_struct_fields ) -from devito.petsc.iet.type_builder import BaseTypeBuilder, CoupledTypeBuilder, objs -from devito.petsc.iet.builder import BuilderBase, CoupledBuilder, make_core_petsc_calls +from devito.petsc.iet.type_builder import ( + BaseTypeBuilder, CoupledTypeBuilder, ConstrainedBCTypeBuilder, + CoupledConstrainedBCTypeBuilder, objs +) +from devito.petsc.iet.builder import ( + BuilderBase, CoupledBuilder, ConstrainedBCBuilder, CoupledConstrainedBCBuilder, + make_core_petsc_calls +) from devito.petsc.iet.solve import Solve, CoupledSolve from devito.petsc.iet.time_dependence import TimeDependent, TimeIndependent from devito.petsc.iet.logging import PetscLogger @@ -70,7 +78,7 @@ def lower_petsc(iet, **kwargs): subs = {} efuncs = {} - # Map each `PetscMetaData`` to its Section (for logging) + # Map each `PetscMetaData` to its Section (for logging) section_mapper = MapNodes(Section, PetscMetaData, 'groupby').visit(iet) # Prefixes within the same `Operator` should not be duplicated @@ -126,6 +134,48 @@ def lower_petsc_symbols(iet, **kwargs): # Rebuild `MainUserStruct` and update iet accordingly rebuild_parent_user_struct(iet, mapper=callback_struct_mapper) + iet = linear_indices(iet, **kwargs) + + +@iet_pass +def linear_indices(iet, **kwargs): + """ + """ + if not iet.name.startswith("SetPointBCs"): + return iet, {} + + if kwargs['options']['index-mode'] == 'int32': + dtype = np.int32 + else: + dtype = np.int64 + + tracker = Tracker('basic', dtype, kwargs['sregistry']) + + indexeds = [ + i for i in FindSymbols('indexeds').visit(iet) + if not isinstance(i.function, LocalType) + ] + candidates = {i.function.name for i in indexeds} + key = lambda f: f.name in candidates + + iet = linearize_accesses(iet, key0=key, tracker=tracker) + + indexeds = [ + i for i in FindSymbols('indexeds').visit(iet) + if i.function.name in candidates + ] + mapper_findexeds = {i: linear_index(i) for i in indexeds} + + return Uxreplace(mapper_findexeds).visit(iet), {} + + +def linear_index(i): + if isinstance(i, FIndexed): + return i.linear_index + # 1D case + assert len(i.indices) == 1 + return i.indices[0] + @iet_pass def rebuild_child_user_struct(iet, mapper, **kwargs): @@ -248,6 +298,7 @@ def __init__(self, inject_solve, iters, comm, section_mapper, **kwargs): self.get_info = inject_solve.expr.rhs.get_info self.kwargs = kwargs self.coupled = isinstance(inject_solve.expr.rhs.field_data, MultipleFieldData) + self.constrain_bc = inject_solve.expr.rhs.field_data.constrain_bc self.common_kwargs = { 'inject_solve': self.inject_solve, 'objs': self.objs, @@ -263,11 +314,14 @@ def __init__(self, inject_solve, iters, comm, section_mapper, **kwargs): @cached_property def type_builder(self): - return ( - CoupledTypeBuilder(**self.common_kwargs) - if self.coupled else - BaseTypeBuilder(**self.common_kwargs) - ) + if self.coupled and self.constrain_bc: + return CoupledConstrainedBCTypeBuilder(**self.common_kwargs) + elif self.coupled: + return CoupledTypeBuilder(**self.common_kwargs) + elif self.constrain_bc: + return ConstrainedBCTypeBuilder(**self.common_kwargs) + else: + return BaseTypeBuilder(**self.common_kwargs) @cached_property def time_dependence(self): @@ -282,8 +336,14 @@ def callback_builder(self): @cached_property def builder(self): - return CoupledBuilder(**self.common_kwargs) \ - if self.coupled else BuilderBase(**self.common_kwargs) + if self.coupled and self.constrain_bc: + return CoupledConstrainedBCBuilder(**self.common_kwargs) + elif self.coupled: + return CoupledBuilder(**self.common_kwargs) + elif self.constrain_bc: + return ConstrainedBCBuilder(**self.common_kwargs) + else: + return BuilderBase(**self.common_kwargs) @cached_property def solve(self): diff --git a/devito/petsc/iet/solve.py b/devito/petsc/iet/solve.py index f6c1fa22d5..d2389511b0 100644 --- a/devito/petsc/iet/solve.py +++ b/devito/petsc/iet/solve.py @@ -37,8 +37,8 @@ def _execute_solve(self): vec_place_array = self.time_dependence.place_array(target) - if self.callback_builder.initial_guesses: - initguess = self.callback_builder.initial_guesses[0] + if self.callback_builder._initial_guess_efuncs: + initguess = self.callback_builder._initial_guess_efuncs[0] initguess_call = petsc_call(initguess.name, [dmda, sobjs['xlocal']]) else: initguess_call = None diff --git a/devito/petsc/iet/type_builder.py b/devito/petsc/iet/type_builder.py index 8462ebb916..e6fe208810 100644 --- a/devito/petsc/iet/type_builder.py +++ b/devito/petsc/iet/type_builder.py @@ -2,13 +2,15 @@ from devito.symbolics import String from devito.types import Symbol +from devito.types.misc import PostIncrementIndex from devito.tools import frozendict from devito.petsc.types import ( - PetscBundle, DM, Mat, CallbackVec, Vec, KSP, PC, SNES, PetscInt, StartPtr, - PointerIS, PointerDM, VecScatter, JacobianStruct, SubMatrixStruct, CallbackDM, - PetscMPIInt, PetscErrorCode, PointerMat, MatReuse, CallbackPointerDM, - CallbackPointerIS, CallbackMat, DummyArg, NofSubMats + PetscBundle, DM, Mat, Vec, KSP, PC, SNES, PetscInt, StartPtr, + PointerIS, PointerDM, VecScatter, JacobianStruct, SubMatrixStruct, + PetscMPIInt, PetscErrorCode, PointerMat, MatReuse, + DummyArg, NofSubMats, PetscSectionGlobal, + PetscSectionLocal, PetscSF, CallbackPetscInt, PointerPetscInt, SingleIS ) @@ -44,7 +46,7 @@ def _build(self): - 'localsize' (PetscInt): The local length of the solution vector. - 'dmda' (DM): The DMDA object associated with this solve, linked to the SNES object via `SNESSetDM`. - - 'callbackdm' (CallbackDM): The DM object accessed within callback + - 'callbackdm' (DM): The DM object accessed within callback functions via `SNESGetDM`. """ sreg = self.sregistry @@ -58,13 +60,13 @@ def _build(self): 'xglobal': Vec(sreg.make_name(prefix='xglobal')), 'xlocal': Vec(sreg.make_name(prefix='xlocal')), 'bglobal': Vec(sreg.make_name(prefix='bglobal')), - 'blocal': CallbackVec(sreg.make_name(prefix='blocal')), + 'blocal': Vec(sreg.make_name(prefix='blocal'), destroy=False), 'ksp': KSP(sreg.make_name(prefix='ksp')), 'pc': PC(sreg.make_name(prefix='pc')), 'snes': SNES(snes_name), 'localsize': PetscInt(sreg.make_name(prefix='localsize')), 'dmda': DM(sreg.make_name(prefix='da'), dofs=len(targets)), - 'callbackdm': CallbackDM(sreg.make_name(prefix='dm')), + 'callbackdm': DM(sreg.make_name(prefix='dm'), destroy=False), 'snes_prefix': String(formatted_prefix), } @@ -106,18 +108,11 @@ def _extend_build(self, base_dict): ) base_dict['nfields'] = PetscInt(sreg.make_name(prefix='nfields')) - space_dims = len(self.field_data.grid.dimensions) - - dim_labels = ["M", "N", "P"] - base_dict.update({ - dim_labels[i]: PetscInt(dim_labels[i]) for i in range(space_dims) - }) - submatrices = self.field_data.jacobian.nonzero_submatrices base_dict['jacctx'] = JacobianStruct( name=sreg.make_name(prefix=objs['ljacctx'].name), - fields=objs['ljacctx'].fields, + fields=objs['ljacctx'].fields, no_of_submats=len(targets)*len(targets) ) for sm in submatrices: @@ -127,9 +122,13 @@ def _extend_build(self, base_dict): name=f'{name}ctx', fields=objs['subctx'].fields, ) - base_dict[f'{name}X'] = CallbackVec(f'{name}X') - base_dict[f'{name}Y'] = CallbackVec(f'{name}Y') - base_dict[f'{name}F'] = CallbackVec(f'{name}F') + base_dict[f'{name}X'] = Vec(f'{name}X', destroy=False) + base_dict[f'{name}Y'] = Vec(f'{name}Y', destroy=False) + base_dict[f'{name}F'] = Vec(f'{name}F', destroy=False) + + # Temporary Vec used in MatCreateSubMatrices to query constrained + # global sizes from sub-DMs (works for both constrained and unconstrained). + base_dict['tmpvec'] = Vec('tmpvec', destroy=False) # Bundle objects/metadata required by the coupled residual callback f_components, x_components = [], [] @@ -173,20 +172,20 @@ def _target_dependent(self, base_dict): base_dict[f'{name}_ptr'] = StartPtr( sreg.make_name(prefix=f'{name}_ptr'), t.dtype ) - base_dict[f'xlocal{name}'] = CallbackVec( + base_dict[f'xlocal{name}'] = Vec( sreg.make_name(prefix=f'xlocal{name}'), liveness='eager' ) - base_dict[f'Fglobal{name}'] = CallbackVec( - sreg.make_name(prefix=f'Fglobal{name}'), liveness='eager' + base_dict[f'Fglobal{name}'] = Vec( + sreg.make_name(prefix=f'Fglobal{name}'), liveness='eager', destroy=False ) - base_dict[f'Xglobal{name}'] = CallbackVec( - sreg.make_name(prefix=f'Xglobal{name}') + base_dict[f'Xglobal{name}'] = Vec( + sreg.make_name(prefix=f'Xglobal{name}'), destroy=False ) base_dict[f'xglobal{name}'] = Vec( sreg.make_name(prefix=f'xglobal{name}') ) - base_dict[f'blocal{name}'] = CallbackVec( - sreg.make_name(prefix=f'blocal{name}'), liveness='eager' + base_dict[f'blocal{name}'] = Vec( + sreg.make_name(prefix=f'blocal{name}'), liveness='eager', destroy=False ) base_dict[f'bglobal{name}'] = Vec( sreg.make_name(prefix=f'bglobal{name}') @@ -199,6 +198,80 @@ def _target_dependent(self, base_dict): ) +class ConstrainedBCTypeBuilder(BaseTypeBuilder): + def _extend_build(self, base_dict): + sreg = self.sregistry + base_dict['lsection'] = PetscSectionLocal( + name=sreg.make_name(prefix='lsection') + ) + base_dict['gsection'] = PetscSectionGlobal( + name=sreg.make_name(prefix='gsection') + ) + base_dict['sf'] = PetscSF( + name=sreg.make_name(prefix='sf') + ) + tname = self.field_data.target.name + base_dict[f'numBC_{tname}'] = PetscInt( + name=sreg.make_name(prefix='numBC'), initvalue=0 + ) + base_dict[f'numBCPtr_{tname}'] = CallbackPetscInt( + name=sreg.make_name(prefix='numBCPtr'), initvalue=0 + ) + base_dict[f'bcPointsArr_{tname}'] = PointerPetscInt( + name=sreg.make_name(prefix='bcPointsArr') + ) + base_dict[f'k_iter_{tname}'] = PostIncrementIndex( + name='k_iter', initvalue=0 + ) + base_dict['bcPointsIS'] = SingleIS(name='bcPointsIS') + base_dict['bcPoints'] = PointerIS(name='bcPoints') + return base_dict + + +class CoupledConstrainedBCTypeBuilder(CoupledTypeBuilder): + def _extend_build(self, base_dict): + base_dict = super()._extend_build(base_dict) + sreg = self.sregistry + targets = self.field_data.targets + nfields = len(targets) + + # Shared section/SF objects + base_dict['lsection'] = PetscSectionLocal( + name=sreg.make_name(prefix='lsection') + ) + base_dict['gsection'] = PetscSectionGlobal( + name=sreg.make_name(prefix='gsection') + ) + base_dict['sf'] = PetscSF( + name=sreg.make_name(prefix='sf') + ) + + # Per-field BC objects + for t in targets: + tname = t.name + base_dict[f'numBC_{tname}'] = PetscInt( + name=sreg.make_name(prefix=f'numBC{tname}'), initvalue=0 + ) + base_dict[f'numBCPtr_{tname}'] = CallbackPetscInt( + name=sreg.make_name(prefix=f'numBCPtr{tname}'), initvalue=0 + ) + base_dict[f'bcPointsArr_{tname}'] = PointerPetscInt( + name=sreg.make_name(prefix=f'bcPointsArr{tname}') + ) + base_dict[f'k_iter_{tname}'] = PostIncrementIndex( + name=sreg.make_name(prefix=f'k{tname}'), initvalue=0 + ) + + # IS arrays for all fields (one entry per field) + base_dict['bcPointsIS'] = PointerIS( + name='bcPointsIS', nindices=nfields + ) + base_dict['bcCompsIS'] = PointerIS( + name='bcCompsIS', nindices=nfields + ) + return base_dict + + subdms = PointerDM(name='subdms') fields = PointerIS(name='fields') submats = PointerMat(name='submats') @@ -213,19 +286,21 @@ def _target_dependent(self, base_dict): objs = frozendict({ 'size': PetscMPIInt(name='size'), 'err': PetscErrorCode(name='err'), - 'block': CallbackMat('block'), + 'block': Mat('block', destroy=False), 'submat_arr': PointerMat(name='submat_arr'), 'subblockrows': PetscInt('subblockrows'), 'subblockcols': PetscInt('subblockcols'), + 'sublocalrows': PetscInt('sublocalrows'), + 'sublocalcols': PetscInt('sublocalcols'), 'rowidx': PetscInt('rowidx'), 'colidx': PetscInt('colidx'), 'J': Mat('J'), 'X': Vec('X'), - 'xloc': CallbackVec('xloc'), + 'xloc': Vec('xloc', destroy=False), 'Y': Vec('Y'), - 'yloc': CallbackVec('yloc'), + 'yloc': Vec('yloc', destroy=False), 'F': Vec('F'), - 'floc': CallbackVec('floc'), + 'floc': Vec('floc', destroy=False), 'B': Vec('B'), 'nfields': PetscInt('nfields'), 'irow': PointerIS(name='irow'), @@ -237,12 +312,12 @@ def _target_dependent(self, base_dict): 'rows': rows, 'cols': cols, 'Subdms': subdms, - 'LocalSubdms': CallbackPointerDM(name='subdms'), + 'LocalSubdms': PointerDM(name='subdms', destroy=False), 'Fields': fields, - 'LocalFields': CallbackPointerIS(name='fields'), + 'LocalFields': PointerIS(name='fields', destroy=False), 'Submats': submats, 'ljacctx': JacobianStruct( - fields=[subdms, fields, submats], modifier=' *' + fields=[subdms, fields, submats], modifier=' *', destroy=False ), 'subctx': SubMatrixStruct(fields=[rows, cols]), 'dummyctx': Symbol('lctx'), diff --git a/devito/petsc/logging.py b/devito/petsc/logging.py index acbf4cc86f..b929e29dd5 100644 --- a/devito/petsc/logging.py +++ b/devito/petsc/logging.py @@ -1,13 +1,16 @@ import os from collections import namedtuple from dataclasses import dataclass +from functools import cached_property + +from cgen import Struct, Value from devito.types import CompositeObject from devito.petsc.types import ( PetscInt, PetscScalar, KSPType, KSPConvergedReason, KSPNormType ) -from devito.petsc.config import petsc_type_to_ctype +from devito.petsc.config import petsc_type_to_ctype, KSPTYPE_MAX_LEN class PetscEntry: @@ -161,6 +164,22 @@ def __init__(self, name, pname, petsc_option_mapper, sobjs, section_mapper, def fields(self): return self._fields + @cached_property + def _C_typedecl(self): + """ + Override generated C struct declaration so that KSPType fields + are emitted as ``char name[KSPTYPE_MAX_LEN]`` rather than ``KSPType name`` + (i.e. ``const char *``). This avoids a segfault when Python reads the + profiler struct after SNESDestroy has freed the KSP. + """ + entries = [] + for field in self._fields: + if str(field.dtype) == 'KSPType': + entries.append(Value('char', '%s[%d]' % (field.name, KSPTYPE_MAX_LEN))) + else: + entries.append(Value(str(field.dtype), field.name)) + return Struct(self.dtype._type_.__name__, entries) + @property def prefix(self): # If users provide an options prefix, use it in the summary; diff --git a/devito/petsc/solve.py b/devito/petsc/solve.py index 3856392436..5941eb16eb 100644 --- a/devito/petsc/solve.py +++ b/devito/petsc/solve.py @@ -6,7 +6,7 @@ from devito.petsc.types import ( LinearSolverMetaData, PETScArray, DMDALocalInfo, FieldData, MultipleFieldData, - Jacobian, Residual, MixedResidual, MixedJacobian, InitialGuess + Jacobian, Residual, MixedResidual, MixedJacobian, InitialGuess, ConstrainBC ) from devito.petsc.types.equation import EssentialBC from devito.petsc.solver_parameters import ( @@ -18,7 +18,7 @@ def petscsolve(target_exprs, target=None, solver_parameters=None, - options_prefix=None, get_info=[]): + options_prefix=None, get_info=[], constrain_bcs=False): """ Returns a symbolic expression representing a linear PETSc solver, enriched with all the necessary metadata for execution within an `Operator`. @@ -78,6 +78,12 @@ def petscsolve(target_exprs, target=None, solver_parameters=None, - ['kspgetiterationnumber', 'kspgettolerances', 'kspgetconvergedreason', 'kspgettype', 'kspgetnormtype', 'snesgetiterationnumber'] + constrain_bcs : bool, optional + If `True`, essential boundary conditions specifed by `EssentialBC` equations + are constrained through a `PetscSection`. As a result, the corresponding degrees + of freedom are excluded from the global solver and are not imposed using + trivial equations. + Returns ------- Eq: @@ -86,15 +92,15 @@ def petscsolve(target_exprs, target=None, solver_parameters=None, """ if target is not None: return InjectSolve(solver_parameters, {target: target_exprs}, - options_prefix, get_info).build_expr() + options_prefix, get_info, constrain_bcs).build_expr() else: return InjectMixedSolve(solver_parameters, target_exprs, - options_prefix, get_info).build_expr() + options_prefix, get_info, constrain_bcs).build_expr() class InjectSolve: def __init__(self, solver_parameters=None, target_exprs=None, options_prefix=None, - get_info=[]): + get_info=[], constrain_bcs=False): self.solver_parameters = linear_solver_parameters(solver_parameters) self.time_mapper = None self.target_exprs = target_exprs @@ -102,6 +108,7 @@ def __init__(self, solver_parameters=None, target_exprs=None, options_prefix=Non self.user_prefix = options_prefix self.formatted_prefix = format_options_prefix(options_prefix) self.get_info = [f.lower() for f in get_info] + self.constrain_bcs = constrain_bcs def build_expr(self): target, funcs, field_data = self.linear_solve_args() @@ -129,16 +136,25 @@ def linear_solve_args(self): exprs = sorted(exprs, key=lambda e: not isinstance(e, EssentialBC)) + # TODO: If constrain_bcs is enabled, essential BC handling may be redundant + # (or need adjusting) in the following classes jacobian = Jacobian(target, exprs, arrays, self.time_mapper) residual = Residual(target, exprs, arrays, self.time_mapper, jacobian.scdiag) initial_guess = InitialGuess(target, exprs, arrays, self.time_mapper) + constrain_bc = ( + ConstrainBC(target, exprs, arrays) + if self.constrain_bcs + else None + ) + field_data = FieldData( target=target, jacobian=jacobian, residual=residual, initial_guess=initial_guess, - arrays=arrays + arrays=arrays, + constrain_bc=constrain_bc ) return target, funcs, field_data @@ -180,11 +196,17 @@ def linear_solve_args(self): jacobian.target_scaler_mapper ) + constrain_bc = { + t: ConstrainBC(t, as_tuple(self.target_exprs[t]), arrays) + for t in targets + } if self.constrain_bcs else None + all_data = MultipleFieldData( targets=targets, arrays=arrays, jacobian=jacobian, - residual=residual + residual=residual, + constrain_bc=constrain_bc ) return targets[0], funcs, all_data diff --git a/devito/petsc/types/array.py b/devito/petsc/types/array.py index a56490f3af..36a2173cca 100644 --- a/devito/petsc/types/array.py +++ b/devito/petsc/types/array.py @@ -94,7 +94,7 @@ def staggered(self): @property def is_Staggered(self): - return self.target.staggered is not None + return bool(self.target.staggered) @property def localinfo(self): diff --git a/devito/petsc/types/dimension.py b/devito/petsc/types/dimension.py new file mode 100644 index 0000000000..b1ed7d6cc0 --- /dev/null +++ b/devito/petsc/types/dimension.py @@ -0,0 +1,91 @@ +from devito.types.dimension import Thickness + + +class SubDimMax(Thickness): + """ + """ + def __init_finalize__(self, *args, **kwargs): + self._subdim = kwargs.pop('subdim') + self._dtype = self._subdim.dtype + + super().__init_finalize__(*args, **kwargs) + + @property + def subdim(self): + return self._subdim + + def _arg_values(self, grid=None, **kwargs): + dist = grid.distributor + # global rtkn + grtkn = kwargs.get(self.subdim.rtkn.name, self.subdim.rtkn.value) + # decomposition info + decomp = dist.decomposition[self.subdim.parent] + glb_max = decomp.glb_max + val = decomp.index_glb_to_loc_unsafe(glb_max - grtkn) + return {self.name: int(val)} + + +class SubDimMin(Thickness): + """ + """ + def __init_finalize__(self, *args, **kwargs): + self._subdim = kwargs.pop('subdim') + self._dtype = self._subdim.dtype + + super().__init_finalize__(*args, **kwargs) + + @property + def subdim(self): + return self._subdim + + def _arg_values(self, grid=None, **kwargs): + dist = grid.distributor + # global ltkn + gltkn = kwargs.get(self.subdim.ltkn.name, self.subdim.ltkn.value) + # decomposition info + decomp = dist.decomposition[self.subdim.parent] + glb_min = decomp.glb_min + val = decomp.index_glb_to_loc_unsafe(glb_min + gltkn) + return {self.name: int(val)} + + +class SpaceDimMax(Thickness): + """ + """ + def __init_finalize__(self, *args, **kwargs): + self._space_dim = kwargs.pop('space_dim') + self._dtype = self._space_dim.dtype + + super().__init_finalize__(*args, **kwargs) + + @property + def space_dim(self): + return self._space_dim + + def _arg_values(self, grid=None, **kwargs): + dist = grid.distributor + decomp = dist.decomposition[self.space_dim] + glb_max = decomp.glb_max + val = decomp.index_glb_to_loc_unsafe(glb_max) + return {self.name: int(val)} + + +class SpaceDimMin(Thickness): + """ + """ + def __init_finalize__(self, *args, **kwargs): + self._space_dim = kwargs.pop('space_dim') + self._dtype = self._space_dim.dtype + + super().__init_finalize__(*args, **kwargs) + + @property + def space_dim(self): + return self._space_dim + + def _arg_values(self, grid=None, **kwargs): + dist = grid.distributor + decomp = dist.decomposition[self.space_dim] + glb_min = decomp.glb_min + val = decomp.index_glb_to_loc_unsafe(glb_min) + return {self.name: int(val)} diff --git a/devito/petsc/types/equation.py b/devito/petsc/types/equation.py index fe9611c1fb..abfb327d03 100644 --- a/devito/petsc/types/equation.py +++ b/devito/petsc/types/equation.py @@ -1,4 +1,4 @@ -from devito.types.equation import Eq +from devito.types.equation import Eq, Inc __all__ = ['EssentialBC'] @@ -8,10 +8,9 @@ class EssentialBC(Eq): """ Represents an essential boundary condition for use with `petscsolve`. - Due to ongoing work on PetscSection and DMDA integration (WIP), - these conditions are imposed as trivial equations. The compiler - will automatically zero the corresponding rows/columns in the Jacobian - and lift the boundary terms into the residual RHS. + The compiler will automatically zero the corresponding rows/columns in the Jacobian + and lift the boundary terms into the residual RHS, unless the user + specifies `constrain_bcs=True` to `petscsolve`. Note: - To define an essential boundary condition, use: @@ -19,7 +18,20 @@ class EssentialBC(Eq): where `target` is the Function-like object passed to `petscsolve`. - SubDomains used for multiple `EssentialBC`s must not overlap. """ - pass + __rkwargs__ = Eq.__rkwargs__ + ("target",) + + def __new__(cls, *args, target=None, **kwargs): + obj = super().__new__(cls, *args, **kwargs) + + if target is None: + target = obj.lhs.function + + obj._target = target + return obj + + @property + def target(self): + return self._target class ZeroRow(EssentialBC): @@ -43,3 +55,18 @@ class ZeroColumn(EssentialBC): Created and managed directly by Devito, not by users. """ pass + + +class ConstrainBC(EssentialBC): + pass + + +class NoOfEssentialBC(Inc, ConstrainBC): + """Equation used count essential boundary condition nodes. + This type of equation is generated inside + petscsolve if the user sets `constrain_bcs=True`.""" + pass + + +class PointEssentialBC(ConstrainBC): + pass diff --git a/devito/petsc/types/metadata.py b/devito/petsc/types/metadata.py index d36e088a36..ece8efd6ce 100644 --- a/devito/petsc/types/metadata.py +++ b/devito/petsc/types/metadata.py @@ -9,7 +9,10 @@ from devito.operations.solve import eval_time_derivatives from devito.petsc.config import petsc_variables -from devito.petsc.types.equation import EssentialBC, ZeroRow, ZeroColumn +from devito.petsc.types.object import PetscInt +from devito.petsc.types.equation import ( + EssentialBC, ZeroRow, ZeroColumn, NoOfEssentialBC, PointEssentialBC +) class MetaData(sympy.Function, Reconstructable): @@ -125,11 +128,14 @@ class FieldData: initial_guess : InitialGuess Defines the initial guess for the solution, which satisfies essential boundary conditions. + constrain_bc : ConstrainBC + Defines the metadata for constraining essential boundary conditions i.e + removing them from the global solve. arrays : dict A dictionary mapping `target` to its corresponding PETScArrays. """ def __init__(self, target=None, jacobian=None, residual=None, - initial_guess=None, arrays=None, **kwargs): + initial_guess=None, constrain_bc=None, arrays=None, **kwargs): self._target = target petsc_precision = dtype_mapper[petsc_variables['PETSC_PRECISION']] if self._target.dtype != petsc_precision: @@ -141,6 +147,7 @@ def __init__(self, target=None, jacobian=None, residual=None, self._jacobian = jacobian self._residual = residual self._initial_guess = initial_guess + self._constrain_bc = constrain_bc self._arrays = arrays @property @@ -159,6 +166,10 @@ def residual(self): def initial_guess(self): return self._initial_guess + @property + def constrain_bc(self): + return self._constrain_bc + @property def arrays(self): return self._arrays @@ -200,11 +211,12 @@ class MultipleFieldData(FieldData): arrays : dict A dictionary mapping the `targets` to their corresponding PETScArrays. """ - def __init__(self, targets, arrays, jacobian=None, residual=None): + def __init__(self, targets, arrays, jacobian=None, residual=None, constrain_bc=None): self._targets = as_tuple(targets) self._arrays = arrays self._jacobian = jacobian self._residual = residual + self._constrain_bc = constrain_bc @cached_property def space_dimensions(self): @@ -265,6 +277,7 @@ def _scale_non_bcs(self, matvecs, target=None): for m in matvecs ] + # TODO: rethink : can likely turn off if user requests to constrain bcs def _scale_bcs(self, matvecs, scdiag): """ Scale the EssentialBCs in `matvecs` by `scdiag`. @@ -569,6 +582,7 @@ def _make_F_target(self, eq, F_target, targets): arrays = self.arrays[self.target] volume = self.target.grid.symbolic_volume_cell + # TODO: rethink : can likely turn off if user requests to constrain bcs if isinstance(eq, EssentialBC): # The initial guess satisfies the essential BCs, so this term is zero. # Still included to support Jacobian testing via finite differences. @@ -592,6 +606,7 @@ def _make_F_target(self, eq, F_target, targets): def _make_b(self, expr, b): b_arr = self.arrays[self.target]['b'] + # TODO: rethink : can likely turn off if user requests to constrain bcs rhs = 0. if isinstance(expr, EssentialBC) else b.subs(self.time_mapper) rhs = rhs * self.target.grid.symbolic_volume_cell return (Eq(b_arr, rhs, subdomain=expr.subdomain),) @@ -650,6 +665,7 @@ def _build_residual(self, expr, target): target_funcs = set(generate_targets(Eq(eval_zeroed_eqn, 0), t)) mapper.update(targets_to_arrays(self.arrays[t]['x'], target_funcs)) + # TODO: rethink : can likely turn off if user requests to constrain bcs if isinstance(expr, EssentialBC): rhs = (self.arrays[target]['x'] - expr.rhs)*self.scdiag[target] zero_row = ZeroRow( @@ -708,6 +724,80 @@ def _make_initial_guess(self, expr): return None +class ConstrainBC: + """ + Metadata passed to `SolverExpr` to constrain essential + boundary conditions using a PetscSection. + """ + def __init__(self, target, exprs, arrays): + self.target = target + self.arrays = arrays + self.counter = PetscInt(name=f'count_{target.name}') + self._make_increment_exprs(as_tuple(exprs)) + self._make_point_bc_exprs(as_tuple(exprs)) + + @property + def increment_exprs(self): + return self._increment_exprs + + @property + def point_bc_exprs(self): + return self._point_bc_exprs + + def _make_increment_exprs(self, exprs): + """ + Return a list of symbolic expressions + used to count the number of essential boundary conditions. + """ + self._increment_exprs = tuple([ + eq for eq in + (self._make_increment_expr(e) for e in exprs) + if eq is not None + ]) + + def _make_increment_expr(self, expr): + """ + Make the Eq that is used to increment the number of essential + boundary nodes in the generated ccode. + """ + if isinstance(expr, EssentialBC): + assert expr.lhs == self.target + return NoOfEssentialBC( + self.counter, 1, + subdomain=expr.subdomain, + implicit_dims=expr.subdomain.dimensions, + target=self.target + ) + else: + return None + + def _make_point_bc_exprs(self, exprs): + """ + Return a list of symbolic expressions + used to count the number of essential boundary conditions. + """ + self._point_bc_exprs = tuple([ + eq for eq in + (self._make_point_bc_expr(e) for e in exprs) + if eq is not None + ]) + + def _make_point_bc_expr(self, expr): + """ + Make the Eq that is used to increment the number of essential + boundary nodes in the generated ccode. + """ + if isinstance(expr, EssentialBC): + assert expr.lhs == self.target + return PointEssentialBC( + self.counter, self.target, + subdomain=expr.subdomain, + target=self.target + ) + else: + return None + + def targets_to_arrays(array, targets): """ Map each target in `targets` to a corresponding array generated from `array`, diff --git a/devito/petsc/types/object.py b/devito/petsc/types/object.py index 8db82be365..f547192a21 100644 --- a/devito/petsc/types/object.py +++ b/devito/petsc/types/object.py @@ -5,40 +5,59 @@ LocalObject, LocalCompositeObject, ModuloDimension, TimeDimension, ArrayObject, CustomDimension, Scalar ) -from devito.symbolics import Byref, cast +from devito.symbolics import Byref, cast, FieldFromComposite from devito.types.basic import DataSymbol, LocalType from devito.petsc.iet.nodes import petsc_call +# TODO: unnecessary use of "CALLBACK" types - just create a simple +# way of destroying or not destroying a certain type + + class PetscMixin: + def __init__(self, *args, destroy=True, **kwargs): + super().__init__(*args, **kwargs) + self._destroy = destroy + + @property + def _C_free(self): + if not self._destroy: + return None + return self._C_free_impl() + + def _C_free_impl(self): + """ + Implement the PETSc destruction logic for this object. + + Subclasses should override this method to emit the appropriate + PETSc destroy call(s) (e.g., `DMDestroy`, `VecDestroy`, etc.). + + Objects obtained inside callback functions via PETSc "Get" + routines (e.g. `SNESGetDM`, `KSPGetPC`) are managed elsewhere + and must not be destroyed here. In such cases, this method + should return ``None``. + """ + return None + @property def _C_free_priority(self): - if type(self) in FREE_PRIORITY: - return FREE_PRIORITY[type(self)] - else: - return super()._C_free_priority + for cls, prio in FREE_PRIORITY.items(): + if isinstance(self, cls): + return prio + return super()._C_free_priority class PetscObject(PetscMixin, LocalObject): pass -class CallbackDM(PetscObject): +class DM(PetscObject): """ - PETSc Data Management object (DM). This is the DM instance - accessed within the callback functions via `SNESGetDM` and - is not destroyed during callback execution. + PETSc Data Management object (DM). """ dtype = CustomDtype('DM') - -class DM(CallbackDM): - """ - PETSc Data Management object (DM). This is the primary DM instance - created within the main kernel and linked to the SNES - solver using `SNESSetDM`. - """ def __init__(self, *args, dofs=1, **kwargs): super().__init__(*args, **kwargs) self._dofs = dofs @@ -47,39 +66,31 @@ def __init__(self, *args, dofs=1, **kwargs): def dofs(self): return self._dofs - @property - def _C_free(self): + def _C_free_impl(self): return petsc_call('DMDestroy', [Byref(self.function)]) DMCast = cast('DM') +PetscObjectCast = cast('PetscObject') -class CallbackMat(PetscObject): +class Mat(PetscObject): """ - PETSc Matrix object (Mat) used within callback functions. - These instances are not destroyed during callback execution; - instead, they are managed and destroyed in the main kernel. + PETSc Matrix object (Mat). """ dtype = CustomDtype('Mat') - -class Mat(CallbackMat): - @property - def _C_free(self): + def _C_free_impl(self): return petsc_call('MatDestroy', [Byref(self.function)]) -class CallbackVec(PetscObject): +class Vec(PetscObject): """ PETSc vector object (Vec). """ dtype = CustomDtype('Vec') - -class Vec(CallbackVec): - @property - def _C_free(self): + def _C_free_impl(self): return petsc_call('VecDestroy', [Byref(self.function)]) @@ -99,6 +110,20 @@ class PetscInt(PetscObject): dtype = CustomIntType('PetscInt') +class CallbackPetscInt(PetscObject): + """ + """ + dtype = CustomIntType('PetscInt', modifier=' *') + + +class PetscIntPtr(PetscObject): + """ + """ + dtype = CustomIntType('PetscInt') + + _C_modifier = ' *' + + class PetscScalar(PetscObject): dtype = CustomIntType('PetscScalar') @@ -123,16 +148,13 @@ class KSPNormType(PetscObject): dtype = CustomDtype('KSPNormType') -class CallbackSNES(PetscObject): +class SNES(PetscObject): """ PETSc SNES : Non-Linear Systems Solvers. """ dtype = CustomDtype('SNES') - -class SNES(CallbackSNES): - @property - def _C_free(self): + def _C_free_impl(self): return petsc_call('SNESDestroy', [Byref(self.function)]) @@ -182,6 +204,9 @@ class MatReuse(PetscObject): class VecScatter(PetscObject): dtype = CustomDtype('VecScatter') + def _C_free_impl(self): + return petsc_call('VecScatterDestroy', [Byref(self.function)]) + class StartPtr(PetscObject): def __init__(self, name, dtype): @@ -193,7 +218,22 @@ class SingleIS(PetscObject): dtype = CustomDtype('IS') -class PETScStruct(LocalCompositeObject): +class PetscSectionGlobal(PetscObject): + dtype = CustomDtype('PetscSection') + + def _C_free_impl(self): + return petsc_call('PetscSectionDestroy', [Byref(self.function)]) + + +class PetscSectionLocal(PetscObject): + dtype = CustomDtype('PetscSection') + + +class PetscSF(PetscObject): + dtype = CustomDtype('PetscSF') + + +class PETScStruct(PetscMixin, LocalCompositeObject): @property def time_dim_fields(self): @@ -233,10 +273,34 @@ def parent(self): class JacobianStruct(PETScStruct): def __init__(self, name='jctx', pname='JacobianCtx', fields=None, - modifier='', liveness='lazy'): - super().__init__(name, pname, fields, modifier, liveness) + modifier='', liveness='lazy', no_of_submats=0, **kwargs): + super().__init__(name, pname, fields, modifier, liveness, **kwargs) + self._no_of_submats = no_of_submats _C_modifier = None + def _C_free_impl(self): + pointer_mat = [i for i in self.fields if isinstance(i, PointerMat)] + assert len(pointer_mat) == 1 + pointer_mat = pointer_mat[0] + + # Destroy each sub-matrix + destroy_calls = [ + petsc_call( + 'MatDestroy', + [Byref(FieldFromComposite(pointer_mat.indexed[i], + self.function))] + ) + for i in range(self._no_of_submats) + ] + # Free the allocated matrix pointer array + destroy_calls.append( + petsc_call( + 'PetscFree', + [FieldFromComposite(pointer_mat.base, self.function)] + ) + ) + return destroy_calls + class SubMatrixStruct(PETScStruct): def __init__(self, name='subctx', pname='SubMatrixCtx', fields=None, @@ -279,7 +343,7 @@ def _mem_stack(self): return False -class CallbackPointerIS(PETScArrayObject): +class PointerIS(PETScArrayObject): """ Index set object used for efficient indexing into vectors and matrices. https://petsc.org/release/manualpages/IS/IS/ @@ -288,10 +352,7 @@ class CallbackPointerIS(PETScArrayObject): def dtype(self): return CustomDtype('IS', modifier=' *') - -class PointerIS(CallbackPointerIS): - @property - def _C_free(self): + def _C_free_impl(self): destroy_calls = [ petsc_call('ISDestroy', [Byref(self.indexify().subs({self.dim: i}))]) for i in range(self._nindices) @@ -300,15 +361,21 @@ def _C_free(self): return destroy_calls -class CallbackPointerDM(PETScArrayObject): +class PointerPetscInt(PETScArrayObject): + """ + TODO: figure out names for this class vs PetscIntPtr class + """ @property def dtype(self): - return CustomDtype('DM', modifier=' *') + return CustomDtype('PetscInt', modifier=' *') -class PointerDM(CallbackPointerDM): +class PointerDM(PETScArrayObject): @property - def _C_free(self): + def dtype(self): + return CustomDtype('DM', modifier=' *') + + def _C_free_impl(self): destroy_calls = [ petsc_call('DMDestroy', [Byref(self.indexify().subs({self.dim: i}))]) for i in range(self._nindices) @@ -336,9 +403,12 @@ class NofSubMats(Scalar, LocalType): FREE_PRIORITY = { - PETScArrayObject: 0, - Vec: 1, - Mat: 2, - SNES: 3, - DM: 4, + JacobianStruct: 0, + PETScArrayObject: 1, + VecScatter: 2, + Vec: 3, + Mat: 4, + SNES: 5, + PetscSectionGlobal: 6, + DM: 7, } diff --git a/devito/symbolics/extended_sympy.py b/devito/symbolics/extended_sympy.py index 10fd7063f1..484d7fc2ec 100644 --- a/devito/symbolics/extended_sympy.py +++ b/devito/symbolics/extended_sympy.py @@ -947,6 +947,3 @@ def rfunc(func, item, *args): min: Min, max: Max, } - - -Null = Macro('NULL') diff --git a/devito/types/misc.py b/devito/types/misc.py index 8cdad91b07..a0c15dd61f 100644 --- a/devito/types/misc.py +++ b/devito/types/misc.py @@ -150,12 +150,35 @@ def bind(self, pname): return ((define, expr), findexed) + @property + def linear_index(self): + """ + TODO: Add tests + """ + f = self.function + strides_map = self.strides_map + indices = self.indices + + items = [ + idx * strides_map[d] + for idx, d in zip(indices, f.dimensions[1:]) + ] + items.append(indices[-1]) + + return sympy.Add(*items, evaluate=False) + func = Pickable._rebuild # Pickling support __reduce_ex__ = Pickable.__reduce_ex__ +class PostIncrementIndex(LocalObject): + """ + """ + dtype = np.int32 + + class Global(Symbol): """ diff --git a/docker/Dockerfile.petsc b/docker/Dockerfile.petsc index ced663019c..96e5319404 100644 --- a/docker/Dockerfile.petsc +++ b/docker/Dockerfile.petsc @@ -3,15 +3,20 @@ ############################################################## ARG base=devitocodes/devito:gcc-dev-amd64 +ARG PETSC_REPO=https://gitlab.com/petsc/petsc.git +ARG PETSC_BRANCH=v3.24.0 FROM $base +ARG PETSC_REPO +ARG PETSC_BRANCH + USER root RUN python3 -m venv /venv && \ mkdir -p /opt/petsc && \ cd /opt/petsc && \ - git clone -b v3.24.0 https://gitlab.com/petsc/petsc.git petsc && \ + git clone -b ${PETSC_BRANCH} ${PETSC_REPO} petsc && \ cd petsc && \ ./configure --with-fortran-bindings=0 --with-mpi-dir=/opt/openmpi \ --with-openblas-include=$(pkg-config --variable=includedir openblas) \ diff --git a/tests/test_petsc.py b/tests/test_petsc.py index 9f7663c131..200d48619b 100644 --- a/tests/test_petsc.py +++ b/tests/test_petsc.py @@ -30,8 +30,8 @@ def command_line(): # Random prefixes to validate command line argument parsing prefix = ( 'd17weqroeg', 'riabfodkj5', 'fir8o3lsak', - 'zwejklqn25', 'qtr2vfvwiu') - + 'zwejklqn25', 'qtr2vfvwiu' + ) petsc_option = ( ('ksp_rtol',), ('ksp_rtol', 'ksp_atol'), @@ -52,6 +52,9 @@ def command_line(): for o, v in zip(opt, val, strict=True): argv.extend([f'-{p}_{o}', str(v)]) expected[p] = zip(opt, val) + + if os.environ.get('DEVITO_PYTEST_FLAG', '0') == '2': + argv.extend(['-malloc_dump']) return argv, expected @@ -226,9 +229,9 @@ def test_petsc_cast(): eqn2 = Eq(f2.laplace, 10) eqn3 = Eq(f3.laplace, 10) - petsc1 = petscsolve(eqn1, f1) - petsc2 = petscsolve(eqn2, f2) - petsc3 = petscsolve(eqn3, f3) + petsc1 = petscsolve(eqn1, f1, options_prefix='cast1d') + petsc2 = petscsolve(eqn2, f2, options_prefix='cast2d') + petsc3 = petscsolve(eqn3, f3, options_prefix='cast3d') with switchconfig(language='petsc'): op1 = Operator(petsc1) @@ -258,9 +261,9 @@ def test_dmda_create(): eqn2 = Eq(f2.laplace, 10) eqn3 = Eq(f3.laplace, 10) - petsc1 = petscsolve(eqn1, f1) - petsc2 = petscsolve(eqn2, f2) - petsc3 = petscsolve(eqn3, f3) + petsc1 = petscsolve(eqn1, f1, options_prefix='dmda1d') + petsc2 = petscsolve(eqn2, f2, options_prefix='dmda2d') + petsc3 = petscsolve(eqn3, f3, options_prefix='dmda3d') with switchconfig(language='petsc'): op1 = Operator(petsc1, opt='noop') @@ -917,7 +920,7 @@ def test_coupled_vs_non_coupled(self, eq1, eq2, so): # TODO: As noted in the other test, some efuncs are not reused # where reuse is possible, investigate. assert len(callbacks1) == 12 - assert len(callbacks2) == 8 + assert len(callbacks2) == 9 # Check field_data type field0 = petsc1.rhs.field_data @@ -991,16 +994,25 @@ def test_coupled_frees(self, n_fields): frees = op.body.frees + n_submats = n_fields*n_fields + # Jacobian struct submats + for i in range(n_submats): + assert str(frees[i]) == f'PetscCall(MatDestroy(&jctx0.submats[{i}]));' + assert str(frees[n_submats]) == 'PetscCall(PetscFree(jctx0.submats));' + + offset = n_submats + 1 + # IS Destroy calls for i in range(n_fields): - assert str(frees[i]) == f'PetscCall(ISDestroy(&fields0[{i}]));' - assert str(frees[n_fields]) == 'PetscCall(PetscFree(fields0));' + assert str(frees[offset + i]) == f'PetscCall(ISDestroy(&fields0[{i}]));' + assert str(frees[offset + n_fields]) == 'PetscCall(PetscFree(fields0));' + + offset += n_fields + 1 # DM Destroy calls for i in range(n_fields): - assert str(frees[n_fields + 1 + i]) == \ - f'PetscCall(DMDestroy(&subdms0[{i}]));' - assert str(frees[n_fields*2 + 1]) == 'PetscCall(PetscFree(subdms0));' + assert str(frees[offset + i]) == f'PetscCall(DMDestroy(&subdms0[{i}]));' + assert str(frees[offset + n_fields]) == 'PetscCall(PetscFree(subdms0));' @skipif('petsc') def test_dmda_dofs(self): @@ -1182,7 +1194,7 @@ def test_coupling(self, eq1, eq2, j01_matvec, j10_matvec): ('Eq(e.laplace + 5.*e, f)', 'Eq(g.laplace + 5.*g, h)', '4', 'h_x*(5.0 - 2.5/h_x**2)'), ('Eq(e.dx + e + e.laplace, f)', 'Eq(g.dx + g + g.laplace, h.dx)', '2', - 'h_x*(1 + 1/h_x - 2.0/h_x**2)'), + 'h_x*(1 - 1/h_x - 2.0/h_x**2)'), ('Eq(e.dx + e + e.laplace, f)', 'Eq(g.dx + g + g.laplace, h.dx)', '4', 'h_x*(1 - 2.5/h_x**2)'), ('Eq(2.*e.laplace + e, f)', 'Eq(2*g.laplace + g, h)', '2', @@ -1232,7 +1244,7 @@ def test_jacobian_scaling_1D(self, eq1, eq2, so, scale): ('Eq(e.laplace + 5.*e, f)', 'Eq(g.laplace + 5.*g, h)', '4', 'h_x*h_y*(5.0 - 2.5/h_y**2 - 2.5/h_x**2)'), ('Eq(e.dx + e.dy + e + e.laplace, f)', 'Eq(g.dx + g.dy + g + g.laplace, h)', - '2', 'h_x*h_y*(1 + 1/h_y - 2.0/h_y**2 + 1/h_x - 2.0/h_x**2)'), + '2', 'h_x*h_y*(1 - 1/h_y - 2.0/h_y**2 - 1/h_x - 2.0/h_x**2)'), ('Eq(e.dx + e.dy + e + e.laplace, f)', 'Eq(g.dx + g.dy + g + g.laplace, h)', '4', 'h_x*h_y*(1 - 2.5/h_y**2 - 2.5/h_x**2)'), ('Eq(2.*e.laplace + e, f)', 'Eq(2*g.laplace + g, h)', '2', @@ -1283,7 +1295,7 @@ def test_jacobian_scaling_2D(self, eq1, eq2, so, scale): 'h_x*h_y*h_z*(5.0 - 2.5/h_z**2 - 2.5/h_y**2 - 2.5/h_x**2)'), ('Eq(e.dx + e.dy + e.dz + e + e.laplace, f)', 'Eq(g.dx + g.dy + g.dz + g + g.laplace, h)', '2', - 'h_x*h_y*h_z*(1 + 1/h_z - 2.0/h_z**2 + 1/h_y - 2.0/h_y**2 + ' + + 'h_x*h_y*h_z*(1 - 1/h_z - 2.0/h_z**2 - 1/h_y - 2.0/h_y**2 - ' + '1/h_x - 2.0/h_x**2)'), ('Eq(e.dx + e.dy + e.dz + e + e.laplace, f)', 'Eq(g.dx + g.dy + g.dz + g + g.laplace, h)', '4', @@ -1443,7 +1455,7 @@ def define(self, dimensions): '+ r1*a0[ix + 2][iy + 3]))*o0->h_x*o0->h_y;' in str(J00) # J00 and J11 are semantically identical so check efunc reuse - assert len(op._func_table.values()) == 9 + assert len(op._func_table.values()) == 10 # J00_MatMult0 is reused (in replace of J11_MatMult0) create = op._func_table['MatCreateSubMatrices0'].root assert 'MatShellSetOperation(submat_arr[0],' \ @@ -1687,7 +1699,8 @@ def setup_class(self): self.eq2 = Eq(self.g.laplace, self.h) @skipif('petsc') - def test_different_solver_params(self): + @pytest.mark.parallel(mode=[1, 2, 4, 6, 8]) + def test_different_solver_params(self, mode): # Explicitly set the solver parameters solver1 = petscsolve( self.eq1, target=self.e, solver_parameters={'ksp_rtol': '1e-10'} @@ -1708,7 +1721,8 @@ def test_different_solver_params(self): in str(op._func_table['SetPetscOptions1'].root) @skipif('petsc') - def test_options_prefix(self): + @pytest.mark.parallel(mode=[1, 2, 4, 6, 8]) + def test_options_prefix(self, mode): solver1 = petscsolve(self.eq1, self.e, solver_parameters={'ksp_rtol': '1e-10'}, options_prefix='poisson1') @@ -1731,7 +1745,8 @@ def test_options_prefix(self): in str(op._func_table['SetPetscOptions1'].root) @skipif('petsc') - def test_options_no_value(self): + @pytest.mark.parallel(mode=[1, 2, 4, 6, 8]) + def test_options_no_value(self, mode): """ Test solver parameters that do not require a value, such as `snes_view` and `ksp_view`. @@ -2188,6 +2203,34 @@ def test_get_ksp_type(self): assert entry2['KSPGetType'] == 'cg' assert entry2['kspgettype'] == 'cg' + @skipif('petsc') + def test_get_ksp_type_large_grid(self): + """ + Test for a dangling-pointer segfault when reading KSPGetType + after op.apply(). KSPType is ``const char*`` into KSP-owned memory; + after SNESDestroy that pointer is invalid. The crash only appeared + reliably on large grids (n=257) because the freed KSP memory must be + reclaimed by the heap before Python reads the profiler struct. + """ + get_info = ['kspgettype'] + grid = Grid(shape=(257, 257), dtype=np.float64) + e = Function(name='e', grid=grid, space_order=2) + f = Function(name='f', grid=grid, space_order=2) + eq = Eq(e.laplace, f) + + solver = petscsolve( + eq, target=e, + solver_parameters={'ksp_type': 'cg'}, + options_prefix='test_ksp_type', + get_info=get_info + ) + with switchconfig(language='petsc'): + op = Operator(solver) + summary = op.apply() + + entry = summary.petsc.get_entry('section0', 'test_ksp_type') + assert entry.KSPGetType == 'cg' + class TestPrinter: @@ -2208,3 +2251,589 @@ def test_petsc_pi(self): assert 'PETSC_PI' in str(op.ccode) assert 'M_PI' not in str(op.ccode) + + +class TestPetscSection: + """ + These tests validate the use of `PetscSection` (from PETSc) to constrain essential + boundary nodes by removing them from the linear solver, rather than keeping them in + the system as trivial equations. + + Users specify essential boundary conditions via the `EssentialBC` equation, + with a specifed `SubDomain`. When `constrain_bcs=True` is passed to `petscsolve`, + the Devito compiler generates code that removes these degrees of freedom from + the linear system. A PETSc requirement is that each MPI rank identifies ALL + constrained nodes within its local data region, including non-owned (halo) nodes. + + To achieve this, the compiler creates new `EssentialBC`-like equations with + modified (sub)dimensions (to extend the loop bounds), which are used in two + callback functions to constrain the nodes. No non-owned (halo) data is indexed + into - the loops are only used to specify the constrained "local" indices + on each rank. + + Tests in this class use the following notation: + - `x` : a grid point + - `[]` : the `SubDomain` specified by the `EssentialBC` (the constrained region) + - `|` : an MPI rank boundary + """ + # first test that the loop generated is correct symbolically.. + + # TODO: loop bound modification only needs to happen for subdomain 'middle' type + # so ensure this happens - by construction left and right subdomains do not + # cross ranks instead of doing the manual loop bound check - grab the actual + # iteration from the generated code I think...? + + def _get_loop_bounds(self, shape, so, subdomain): + grid = Grid( + shape=shape, + subdomains=(subdomain,), + dtype=np.float64 + ) + + u = Function(name='u', grid=grid, space_order=so) + v = Function(name='u', grid=grid, space_order=so) + bc = Function(name='bc', grid=grid, space_order=so) + + eq = Eq(u, v, subdomain=grid.interior) + bc = EssentialBC(u, bc, subdomain=subdomain) + + solver = petscsolve([eq, bc], u, constrain_bcs=True) + + with switchconfig(language='petsc'): + op = Operator(solver) + + args = op.arguments() + rank = grid.distributor.myrank + + bounds = [] + for _, dim in enumerate(grid.dimensions): + lb = max(args[f'i{dim.name}_min0'], args[f'{dim.name}_m'] - so) + ub = min(args[f'i{dim.name}_max0'], args[f'{dim.name}_M'] + so) + bounds.append((lb, ub)) + + return rank, tuple(bounds) + + @skipif('petsc') + @pytest.mark.parallel(mode=[1, 2, 4, 6, 8]) + def test_constrain_indices_1d_left_halo2(self, mode): + """halo_size=2, n=24, constrain left side of grid""" + + # 1 rank: + # 0 + # [x x x x x x x x x x x x x x x x x x x x] x x x x + # Expected bounds per rank: + # {0: (0, 19)} + + # 2 ranks: + # 0 1 + # [x x x x x x x x x x x x|x x x x x x x x] x x x x + # Expected bounds per rank: + # {0: (0, 13), 1: (-2, 7)} + + # 4 ranks: + # 0 1 2 3 + # [x x x x x x|x x x x x x|x x x x x x|x x] x x x x + # Expected bounds per rank: + # {0: (0, 7), 1: (-2, 7), 2: (-2, 7), 3: (-2, 1)} + + # 6 ranks: + # 0 1 2 3 4 5 + # [x x x x|x x x x|x x x x|x x x x|x x x x]|x x x x + # Expected bounds per rank: + # {0: (0, 5), 1: (-2, 5), 2: (-2, 5), 3: (-2, 5), 4: (-2, 3), 5: (-2, -1)} + + # 8 ranks: + # 0 1 2 3 4 5 6 7 + # [x x x|x x x|x x x|x x x|x x x|x x x|x x] x|x x x + # Expected bounds per rank: + # {0: (0, 4), 1: (-2, 4), 2: (-2, 4), 3: (-2, 4), 4: (-2, 4), + # 5: (-2, 4), 6: (-2, 1), 7: (-2, -2)} + + class Middle(SubDomain): + name = 'submiddle' + + def define(self, dimensions): + x, = dimensions + return {x: ('middle', 0, 4)} + + sub = Middle() + + n = 24 + so = 2 + + rank, bounds = self._get_loop_bounds( + shape=(n,), + so=so, + subdomain=sub + ) + actual = bounds[0] + + expected = { + 1: { + 0: (0, 19), + }, + 2: { + 0: (0, 13), + 1: (-2, 7), + }, + 4: { + 0: (0, 7), + 1: (-2, 7), + 2: (-2, 7), + 3: (-2, 1), + }, + 6: { + 0: (0, 5), + 1: (-2, 5), + 2: (-2, 5), + 3: (-2, 5), + 4: (-2, 3), + 5: (-2, -1) + }, + 8: { + 0: (0, 4), + 1: (-2, 4), + 2: (-2, 4), + 3: (-2, 4), + 4: (-2, 4), + 5: (-2, 4), + 6: (-2, 1), + 7: (-2, -2) + } + }[mode] + + assert actual == expected[rank], \ + f"rank {rank}: expected {expected[rank]}, got {actual}" + + @skipif('petsc') + @pytest.mark.parallel(mode=[1, 2, 4, 6]) + def test_constrain_indices_1d_left_halo4(self, mode): + """halo_size=4, n=24, constrain left side of grid""" + + # 1 rank: + # 0 + # [x x x x x x x x x x x x x x x x x x x x] x x x x + # Expected bounds per rank: + # {0: (0, 19)} + + # 2 ranks: + # 0 1 + # [x x x x x x x x x x x x|x x x x x x x x] x x x x + # Expected bounds per rank: + # {0: (0, 15), 1: (-4, 7)} + + # 4 ranks: + # 0 1 2 3 + # [x x x x x x|x x x x x x|x x x x x x|x x] x x x x + # Expected bounds per rank: + # {0: (0, 9), 1: (-4, 9), 2: (-4, 7), 3: (-4, 1)} + + # 6 ranks: + # 0 1 2 3 4 5 + # [x x x x|x x x x|x x x x|x x x x|x x x x]|x x x x + # Expected bounds per rank: + # {0: (0, 7), 1: (-4, 7), 2: (-4, 7), 3: (-4, 7), 4: (-4, 3), 5: (-4, -1)} + + class Middle(SubDomain): + name = 'submiddle' + + def define(self, dimensions): + x, = dimensions + return {x: ('middle', 0, 4)} + + sub = Middle() + + n = 24 + so = 4 + + rank, bounds = self._get_loop_bounds( + shape=(n,), + so=so, + subdomain=sub + ) + actual = bounds[0] + + expected = { + 1: { + 0: (0, 19), + }, + 2: { + 0: (0, 15), + 1: (-4, 7), + }, + 4: { + 0: (0, 9), + 1: (-4, 9), + 2: (-4, 7), + 3: (-4, 1), + }, + 6: { + 0: (0, 7), + 1: (-4, 7), + 2: (-4, 7), + 3: (-4, 7), + 4: (-4, 3), + 5: (-4, -1) + } + }[mode] + + assert actual == expected[rank], \ + f"rank {rank}: expected {expected[rank]}, got {actual}" + + @skipif('petsc') + @pytest.mark.parallel(mode=[1, 2, 4, 6, 8]) + def test_constrain_indices_1d_right_halo2(self, mode): + """halo_size=2, n=24, constrain right side of grid""" + + # 1 rank: + # 0 + # x x x [x x x x x x x x x x x x x x x x x x x x x] + # Expected bounds per rank: + # {0: (3, 23)} + + # 2 ranks: + # 0 1 + # x x x [x x x x x x x x x|x x x x x x x x x x x x] + # Expected bounds per rank: + # {0: (3, 13), 1: (-2, 11)} + + # 4 ranks: + # 0 1 2 3 + # x x x [x x x|x x x x x x|x x x x x x|x x x x x x] + # Expected bounds per rank: + # {0: (3, 7), 1: (-2, 7), 2: (-2, 7), 3: (-2, 5)} + + # 6 ranks: + # 0 1 2 3 4 5 + # x x x [x|x x x x|x x x x|x x x x|x x x x|x x x x] + # Expected bounds per rank: + # {0: (3, 5), 1: (-1, 5), 2: (-2, 5), 3: (-2, 5), 4: (-2, 5), 5: (-2, 3)} + + # 8 ranks: + # 0 1 2 3 4 5 6 7 + # x x x[|x x x|x x x|x x x|x x x|x x x|x x x|x x x] + # Expected bounds per rank: + # {0: (3, 4), 1: (0, 4), 2: (-2, 4), 3: (-2, 4), 4: (-2, 4), + # 5: (-2, 4), 6: (-2, 4), 7: (-2, 2)} + + class Middle(SubDomain): + name = 'submiddle' + + def define(self, dimensions): + x, = dimensions + return {x: ('middle', 3, 0)} + + sub = Middle() + + n = 24 + so = 2 + + rank, bounds = self._get_loop_bounds( + shape=(n,), + so=so, + subdomain=sub + ) + actual = bounds[0] + + expected = { + 1: { + 0: (3, 23), + }, + 2: { + 0: (3, 13), + 1: (-2, 11), + }, + 4: { + 0: (3, 7), + 1: (-2, 7), + 2: (-2, 7), + 3: (-2, 5), + }, + 6: { + 0: (3, 5), + 1: (-1, 5), + 2: (-2, 5), + 3: (-2, 5), + 4: (-2, 5), + 5: (-2, 3) + }, + 8: { + 0: (3, 4), + 1: (0, 4), + 2: (-2, 4), + 3: (-2, 4), + 4: (-2, 4), + 5: (-2, 4), + 6: (-2, 4), + 7: (-2, 2) + } + }[mode] + + assert actual == expected[rank], \ + f"rank {rank}: expected {expected[rank]}, got {actual}" + + @skipif('petsc') + @pytest.mark.parallel(mode=[1, 2, 4, 6]) + def test_constrain_indices_1d_right_halo4(self, mode): + """halo_size=4, n=24, constrain right side of grid""" + + # 1 rank: + # 0 + # x x x [x x x x x x x x x x x x x x x x x x x x x] + # Expected bounds per rank: + # {0: (3, 23)} + + # 2 ranks: + # 0 1 + # x x x [x x x x x x x x x|x x x x x x x x x x x x] + # Expected bounds per rank: + # {0: (3, 15), 1: (-4, 11)} + + # 4 ranks: + # 0 1 2 3 + # x x x [x x x|x x x x x x|x x x x x x|x x x x x x] + # Expected bounds per rank: + # {0: (3, 9), 1: (-3, 9), 2: (-4, 9), 3: (-4, 5)} + + # 6 ranks: + # 0 1 2 3 4 5 + # x x x [x|x x x x|x x x x|x x x x|x x x x|x x x x] + # Expected bounds per rank: + # {0: (3, 7), 1: (-1, 7), 2: (-4, 7), 3: (-4, 7), 4: (-4, 7), 5: (-4, 3)} + + class Middle(SubDomain): + name = 'submiddle' + + def define(self, dimensions): + x, = dimensions + return {x: ('middle', 3, 0)} + + sub = Middle() + + n = 24 + so = 2 + + rank, bounds = self._get_loop_bounds( + shape=(n,), + so=so, + subdomain=sub + ) + actual = bounds[0] + + expected = { + 1: { + 0: (3, 23), + }, + 2: { + 0: (3, 13), + 1: (-2, 11), + }, + 4: { + 0: (3, 7), + 1: (-2, 7), + 2: (-2, 7), + 3: (-2, 5), + }, + 6: { + 0: (3, 5), + 1: (-1, 5), + 2: (-2, 5), + 3: (-2, 5), + 4: (-2, 5), + 5: (-2, 3) + } + }[mode] + + assert actual == expected[rank], \ + f"rank {rank}: expected {expected[rank]}, got {actual}" + + @skipif('petsc') + @pytest.mark.parallel(mode=[1, 2, 4, 6, 8]) + def test_constrain_indices_1d_middle_halo2(self, mode): + """halo_size=2, n=24, constrain middle of grid""" + + # 1 rank: + # 0 + # x x x x x x x x [x x x x x x x x x x x] x x x x x + # Expected bounds per rank: + # {0: (8, 18)} + + # 2 ranks: + # 0 1 + # x x x x x x x x [x x x x|x x x x x x x] x x x x x + # Expected bounds per rank: + # {0: (8, 13), 1: (-2, 6)} + + # 4 ranks: + # 0 1 2 3 + # x x x x x x|x x [x x x x|x x x x x x|x] x x x x x + # Expected bounds per rank: + # {0: (8, 7), 1: (2, 7), 2: (-2, 6), 3: (-2, 0)} + + # 6 ranks: + # 0 1 2 3 4 5 + # x x x x|x x x x[|x x x x|x x x x|x x x] x|x x x x + # Expected bounds per rank: + # {0: (8, 5), 1: (4, 5), 2: (0, 5), 3: (-2, 5), 4: (-2, 2), 5: (-2, -2)} + + # 8 ranks: + # 0 1 2 3 4 5 6 7 + # x x x|x x x|x x [x|x x x|x x x|x x x|x] x x|x x x + # Expected bounds per rank: + # {0: (8, 4), 1: (5, 4), 2: (2, 4), 3: (-1, 4), 4: (-2, 4), + # 5: (-2, 3), 6: (-2, 0), 7: (-2, -3)} + + class Middle(SubDomain): + name = 'submiddle' + + def define(self, dimensions): + x, = dimensions + return {x: ('middle', 8, 5)} + + sub = Middle() + + n = 24 + so = 2 + + rank, bounds = self._get_loop_bounds( + shape=(n,), + so=so, + subdomain=sub + ) + actual = bounds[0] + + expected = { + 1: { + 0: (8, 18), + }, + 2: { + 0: (8, 13), + 1: (-2, 6), + }, + 4: { + 0: (8, 7), + 1: (2, 7), + 2: (-2, 6), + 3: (-2, 0), + }, + 6: { + 0: (8, 5), + 1: (4, 5), + 2: (0, 5), + 3: (-2, 5), + 4: (-2, 2), + 5: (-2, -2) + }, + 8: { + 0: (8, 4), + 1: (5, 4), + 2: (2, 4), + 3: (-1, 4), + 4: (-2, 4), + 5: (-2, 3), + 6: (-2, 0), + 7: (-2, -3) + } + }[mode] + + assert actual == expected[rank], \ + f"rank {rank}: expected {expected[rank]}, got {actual}" + + @skipif('petsc') + @pytest.mark.parallel(mode=[1, 2, 4, 6]) + def test_constrain_indices_1d_middle_halo4(self, mode): + """halo_size=4, n=24, constrain middle of grid""" + + # 1 rank: + # 0 + # x x x x x x x x [x x x x x x x x x x x] x x x x x + # Expected bounds per rank: + # {0: (8, 18)} + + # 2 ranks: + # 0 1 + # x x x x x x x x [x x x x|x x x x x x x] x x x x x + # Expected bounds per rank: + # {0: (8, 15), 1: (-4, 6)} + + # 4 ranks: + # 0 1 2 3 + # x x x x x x|x x [x x x x|x x x x x x|x] x x x x x + # Expected bounds per rank: + # {0: (8, 9), 1: (2, 9), 2: (-4, 6), 3: (-4, 0)} + + # 6 ranks: + # 0 1 2 3 4 5 + # x x x x|x x x x[|x x x x|x x x x|x x x] x|x x x x + # Expected bounds per rank: + # {0: (8, 7), 1: (4, 7), 2: (0, 7), 3: (-4, 6), 4: (-4, 2), 5: (-4, -2)} + + class Middle(SubDomain): + name = 'submiddle' + + def define(self, dimensions): + x, = dimensions + return {x: ('middle', 8, 5)} + + sub = Middle() + + n = 24 + so = 4 + + rank, bounds = self._get_loop_bounds( + shape=(n,), + so=so, + subdomain=sub + ) + actual = bounds[0] + + expected = { + 1: { + 0: (8, 18), + }, + 2: { + 0: (8, 15), + 1: (-4, 6), + }, + 4: { + 0: (8, 9), + 1: (2, 9), + 2: (-4, 6), + 3: (-4, 0), + }, + 6: { + 0: (8, 7), + 1: (4, 7), + 2: (0, 7), + 3: (-4, 6), + 4: (-4, 2), + 5: (-4, -2) + } + }[mode] + + assert actual == expected[rank], \ + f"rank {rank}: expected {expected[rank]}, got {actual}" + + # TODO: add 2d and 3d tests + + +# @skipif('petsc') +# def test_apply_memory(): + +# nx = 81 +# ny = 81 + +# grid = Grid(shape=(nx, ny), extent=(2., 2.), dtype=np.float64) + +# u = Function(name='u', grid=grid, dtype=np.float64, space_order=2) +# v = Function(name='v', grid=grid, dtype=np.float64, space_order=2) + +# v.data[:] = 5.0 + +# eq = Eq(v, u.laplace, subdomain=grid.interior) + +# petsc = petscsolve([eq], u) + +# with switchconfig(language='petsc'): +# op = Operator(petsc) +# op.apply() diff --git a/working_coupled.c b/working_coupled.c new file mode 100644 index 0000000000..227b5595c0 --- /dev/null +++ b/working_coupled.c @@ -0,0 +1,861 @@ +/* Devito generated code for Operator `Kernel` */ + +#define _POSIX_C_SOURCE 200809L +#define START(S) struct timeval start_ ## S , end_ ## S ; gettimeofday(&start_ ## S , NULL); +#define STOP(S,T) gettimeofday(&end_ ## S, NULL); T->S += (double)(end_ ## S .tv_sec-start_ ## S.tv_sec)+(double)(end_ ## S .tv_usec-start_ ## S .tv_usec)/1000000; + +#include "stdlib.h" +#include "math.h" +#include "sys/time.h" +#include "petscsnes.h" +#include "petscdmda.h" +#include "petscsection.h" +#include "xmmintrin.h" +#include "pmmintrin.h" + +struct Field0 +{ + PetscScalar v; + PetscScalar u; +} ; + +struct JacobianCtx +{ + DM * subdms; + IS * fields; + Mat * submats; +} ; + +struct SubMatrixCtx +{ + IS * rows; + IS * cols; +} ; + +struct UserCtx0 +{ + PetscScalar h_x; + PetscScalar h_y; + PetscInt x_M; + PetscInt x_ltkn0; + PetscInt x_ltkn1; + PetscInt x_m; + PetscInt x_rtkn0; + PetscInt x_rtkn2; + PetscInt y_M; + PetscInt y_ltkn1; + PetscInt y_ltkn2; + PetscInt y_m; + PetscInt y_rtkn0; + PetscInt y_rtkn2; + struct dataobj * f_vec; +} ; + +struct dataobj +{ + void * data; + PetscInt * size; + unsigned long nbytes; + unsigned long * npsize; + unsigned long * dsize; + PetscInt * hsize; + PetscInt * hofs; + PetscInt * oofs; + void * dmap; +} ; + +struct petscprofiler0 +{ + KSPConvergedReason reason0; + PetscInt kspits0; + KSPNormType kspnormtype0; + PetscScalar rtol0; + PetscScalar atol0; + PetscScalar divtol0; + PetscInt max_it0; + char ksptype0[64]; + PetscInt snesits0; +} ; + +struct profiler +{ + PetscScalar section0; +} ; + +PetscErrorCode CountBCs0(DM dm0, PetscInt * numBCPtr0); +PetscErrorCode CountBCs1(DM dm0, PetscInt * numBCPtr1); +PetscErrorCode SetPointBCs(DM dm0, PetscInt numBC0, PetscInt numBC1); +PetscErrorCode SetPetscOptions0(); +PetscErrorCode WholeMatMult0(Mat J, Vec X, Vec Y); +PetscErrorCode WholeFormFunc0(SNES snes, Vec X, Vec F, void* dummy); +PetscErrorCode ClearPetscOptions0(); +PetscErrorCode DestroySubMatrixCtx0(Mat J); +PetscErrorCode J00_MatMult0(Mat J, Vec X, Vec Y); +PetscErrorCode J10_MatMult0(Mat J, Vec X, Vec Y); +PetscErrorCode MatCreateSubMatrices0(Mat J, PetscInt nfields, IS * irow, IS * icol, MatReuse scall, Mat * * submats); +PetscErrorCode PopulateUserContext0(struct UserCtx0 * ctx0, struct dataobj * f_vec, const PetscScalar h_x, const PetscScalar h_y, const PetscInt x_M, const PetscInt x_ltkn0, const PetscInt x_ltkn1, const PetscInt x_m, const PetscInt x_rtkn0, const PetscInt x_rtkn2, const PetscInt y_M, const PetscInt y_ltkn1, const PetscInt y_ltkn2, const PetscInt y_m, const PetscInt y_rtkn0, const PetscInt y_rtkn2); +PetscErrorCode PopulateMatContext(struct JacobianCtx * jctx, DM * subdms, IS * fields); + +int Kernel(struct dataobj * u_vec, struct dataobj * v_vec, struct petscprofiler0 * petscinfo0, struct dataobj * f_vec, const PetscScalar h_x, const PetscScalar h_y, const PetscInt x_M, const PetscInt x_ltkn0, const PetscInt x_ltkn1, const PetscInt x_m, const PetscInt x_rtkn0, const PetscInt x_rtkn2, const PetscInt y_M, const PetscInt y_ltkn1, const PetscInt y_ltkn2, const PetscInt y_m, const PetscInt y_rtkn0, const PetscInt y_rtkn2, struct profiler * timers) +{ + Mat J0; + PetscScalar atol0; + DM da0; + PetscScalar divtol0; + KSP ksp0; + PetscInt kspits0; + KSPNormType kspnormtype0; + KSPType ksptype0; + PetscInt localsize0; + PetscInt max_it0; + PetscInt nfields0; + KSPConvergedReason reason0; + PetscScalar rtol0; + VecScatter scatteru0; + VecScatter scatterv0; + PetscMPIInt size; + SNES snes0; + PetscInt snesits0; + Vec xglobal0; + Vec xglobalu0; + Vec xglobalv0; + Vec xlocal0; + Vec xlocalu0; + Vec xlocalv0; + + struct UserCtx0 ctx0; + IS * fields0; + struct JacobianCtx jctx0; + DM * subdms0; + PetscSection gsection0; + PetscSection lsection0; + PetscInt numBC0 = 0; + PetscInt numBC1 = 0; + PetscSF sf0; + + /* Flush denormal numbers to zero in hardware */ + _MM_SET_DENORMALS_ZERO_MODE(_MM_DENORMALS_ZERO_ON); + _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON); + + PetscCallMPI(MPI_Comm_size(PETSC_COMM_WORLD,&size)); + + PetscCall(DMDACreate2d(PETSC_COMM_WORLD,DM_BOUNDARY_GHOSTED,DM_BOUNDARY_GHOSTED,DMDA_STENCIL_BOX,9,9,1,1,2,2,NULL,NULL,&da0)); + PetscCall(PetscOptionsSetValue(NULL,"-da_use_section",NULL)); + PetscCall(DMSetFromOptions(da0)); + PetscCall(DMSetUp(da0)); + PetscCall(DMSetMatType(da0,MATSHELL)); + PetscCall(PopulateUserContext0(&ctx0,f_vec,h_x,h_y,x_M,x_ltkn0,x_ltkn1,x_m,x_rtkn0,x_rtkn2,y_M,y_ltkn1,y_ltkn2,y_m,y_rtkn0,y_rtkn2)); + PetscCall(DMSetApplicationContext(da0,&ctx0)); + PetscCall(CountBCs0(da0,&numBC0)); + PetscCall(CountBCs1(da0,&numBC1)); + PetscCall(SetPointBCs(da0,numBC0,numBC1)); + PetscCall(DMGetLocalSection(da0,&lsection0)); + PetscCall(DMGetPointSF(da0,&sf0)); + PetscCall(PetscSectionCreateGlobalSection(lsection0,sf0,PETSC_TRUE,PETSC_FALSE,PETSC_FALSE,&gsection0)); + PetscCall(DMSetGlobalSection(da0,gsection0)); + PetscCall(DMCreateSectionSF(da0,lsection0,gsection0)); + PetscCall(SNESCreate(PETSC_COMM_WORLD,&snes0)); + PetscCall(SNESSetOptionsPrefix(snes0,"biharm_")); + PetscCall(SetPetscOptions0()); + PetscCall(SNESSetDM(snes0,da0)); + PetscCall(DMCreateMatrix(da0,&J0)); + PetscCall(SNESSetJacobian(snes0,J0,J0,MatMFFDComputeJacobian,NULL)); + PetscCall(DMCreateGlobalVector(da0,&xglobal0)); + PetscCall(DMCreateLocalVector(da0,&xlocal0)); + PetscCall(VecGetSize(xlocal0,&localsize0)); + PetscCall(SNESGetKSP(snes0,&ksp0)); + PetscCall(MatShellSetOperation(J0,MATOP_MULT,(void (*)(void))WholeMatMult0)); + PetscCall(SNESSetFunction(snes0,NULL,WholeFormFunc0,(void*)(da0))); + PetscCall(SNESSetFromOptions(snes0)); + PetscCall(PopulateUserContext0(&ctx0,f_vec,h_x,h_y,x_M,x_ltkn0,x_ltkn1,x_m,x_rtkn0,x_rtkn2,y_M,y_ltkn1,y_ltkn2,y_m,y_rtkn0,y_rtkn2)); + PetscCall(MatSetDM(J0,da0)); + PetscCall(DMCreateFieldDecomposition(da0,&nfields0,NULL,&fields0,&subdms0)); + PetscCall(MatShellSetOperation(J0,MATOP_CREATE_SUBMATRICES,(void (*)(void))MatCreateSubMatrices0)); + PetscCall(PopulateMatContext(&jctx0,subdms0,fields0)); + PetscCall(MatShellSetContext(J0,&jctx0)); + PetscCall(MatCreateSubMatrices(J0,nfields0,fields0,fields0,MAT_INITIAL_MATRIX,&jctx0.submats)); + DM dav0 = subdms0[0]; + DM dau0 = subdms0[1]; + PetscCall(DMCreateGlobalVector(dav0,&xglobalv0)); + PetscCall(DMCreateGlobalVector(dau0,&xglobalu0)); + PetscCall(VecCreateMPIWithArray(PETSC_COMM_WORLD,1,169,PETSC_DECIDE,v_vec->data,&xlocalv0)); + PetscCall(VecCreateMPIWithArray(PETSC_COMM_WORLD,1,169,PETSC_DECIDE,u_vec->data,&xlocalu0)); + + START(section0) + PetscCall(DMLocalToGlobal(dav0,xlocalv0,INSERT_VALUES,xglobalv0)); + PetscCall(VecScatterCreate(xglobal0,fields0[0],xglobalv0,NULL,&scatterv0)); + PetscCall(VecScatterBegin(scatterv0,xglobalv0,xglobal0,INSERT_VALUES,SCATTER_REVERSE)); + PetscCall(VecScatterEnd(scatterv0,xglobalv0,xglobal0,INSERT_VALUES,SCATTER_REVERSE)); + + PetscCall(DMLocalToGlobal(dau0,xlocalu0,INSERT_VALUES,xglobalu0)); + PetscCall(VecScatterCreate(xglobal0,fields0[1],xglobalu0,NULL,&scatteru0)); + PetscCall(VecScatterBegin(scatteru0,xglobalu0,xglobal0,INSERT_VALUES,SCATTER_REVERSE)); + PetscCall(VecScatterEnd(scatteru0,xglobalu0,xglobal0,INSERT_VALUES,SCATTER_REVERSE)); + + PetscCall(SNESSolve(snes0,NULL,xglobal0)); + PetscCall(VecScatterBegin(scatterv0,xglobal0,xglobalv0,INSERT_VALUES,SCATTER_FORWARD)); + PetscCall(VecScatterEnd(scatterv0,xglobal0,xglobalv0,INSERT_VALUES,SCATTER_FORWARD)); + PetscCall(DMGlobalToLocal(dav0,xglobalv0,INSERT_VALUES,xlocalv0)); + PetscCall(VecScatterBegin(scatteru0,xglobal0,xglobalu0,INSERT_VALUES,SCATTER_FORWARD)); + PetscCall(VecScatterEnd(scatteru0,xglobal0,xglobalu0,INSERT_VALUES,SCATTER_FORWARD)); + PetscCall(DMGlobalToLocal(dau0,xglobalu0,INSERT_VALUES,xlocalu0)); + + PetscCall(KSPGetConvergedReason(ksp0,&reason0)); + petscinfo0->reason0 = reason0; + PetscCall(KSPGetIterationNumber(ksp0,&kspits0)); + petscinfo0->kspits0 = kspits0; + PetscCall(KSPGetNormType(ksp0,&kspnormtype0)); + petscinfo0->kspnormtype0 = kspnormtype0; + PetscCall(KSPGetTolerances(ksp0,&rtol0,&atol0,&divtol0,&max_it0)); + petscinfo0->rtol0 = rtol0; + petscinfo0->atol0 = atol0; + petscinfo0->divtol0 = divtol0; + petscinfo0->max_it0 = max_it0; + PetscCall(KSPGetType(ksp0,&ksptype0)); + PetscCall(PetscStrncpy(petscinfo0->ksptype0,ksptype0,64)); + PetscCall(SNESGetIterationNumber(snes0,&snesits0)); + petscinfo0->snesits0 = snesits0; + STOP(section0,timers) + PetscCall(ClearPetscOptions0()); + + PetscCall(MatDestroy(&jctx0.submats[0])); + PetscCall(MatDestroy(&jctx0.submats[1])); + PetscCall(MatDestroy(&jctx0.submats[2])); + PetscCall(MatDestroy(&jctx0.submats[3])); + PetscCall(PetscFree(jctx0.submats)); + PetscCall(ISDestroy(&fields0[0])); + PetscCall(ISDestroy(&fields0[1])); + PetscCall(PetscFree(fields0)); + PetscCall(DMDestroy(&subdms0[0])); + PetscCall(DMDestroy(&subdms0[1])); + PetscCall(PetscFree(subdms0)); + PetscCall(VecScatterDestroy(&scatteru0)); + PetscCall(VecScatterDestroy(&scatterv0)); + PetscCall(VecDestroy(&xglobal0)); + PetscCall(VecDestroy(&xglobalu0)); + PetscCall(VecDestroy(&xglobalv0)); + PetscCall(VecDestroy(&xlocal0)); + PetscCall(VecDestroy(&xlocalu0)); + PetscCall(VecDestroy(&xlocalv0)); + PetscCall(MatDestroy(&J0)); + PetscCall(SNESDestroy(&snes0)); + PetscCall(PetscSectionDestroy(&gsection0)); + PetscCall(DMDestroy(&da0)); + + return 0; +} + +PetscErrorCode CountBCs0(DM dm0, PetscInt * numBCPtr0) +{ + PetscFunctionBeginUser; + + struct UserCtx0 * ctx0; + PetscCall(DMGetApplicationContext(dm0, &ctx0)); + PetscInt count = *numBCPtr0; + + /* Flush denormal numbers to zero in hardware */ + _MM_SET_DENORMALS_ZERO_MODE(_MM_DENORMALS_ZERO_ON); + _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON); + + /* SubTop (middle x, top y) and SubBottom (middle x, bottom y) */ + for (int ix = ctx0->x_m + ctx0->x_ltkn0; ix <= ctx0->x_M - ctx0->x_rtkn0; ix += 1) + { + for (int iy = ctx0->y_M - ctx0->y_rtkn0 + 1; iy <= ctx0->y_M; iy += 1) + count += 1; + for (int iy = ctx0->y_m; iy <= ctx0->y_m + ctx0->y_ltkn1 - 1; iy += 1) + count += 1; + } + /* SubLeft (left x, full y) */ + for (int ix = ctx0->x_m; ix <= ctx0->x_m + ctx0->x_ltkn1 - 1; ix += 1) + { + for (int y = ctx0->y_m; y <= ctx0->y_M; y += 1) + count += 1; + } + /* SubRight (right x, full y) */ + for (int ix = ctx0->x_M - ctx0->x_rtkn2 + 1; ix <= ctx0->x_M; ix += 1) + { + for (int y = ctx0->y_m; y <= ctx0->y_M; y += 1) + count += 1; + } + + *numBCPtr0 = count; + + PetscFunctionReturn(PETSC_SUCCESS); +} + +PetscErrorCode CountBCs1(DM dm0, PetscInt * numBCPtr1) +{ + PetscFunctionBeginUser; + + struct UserCtx0 * ctx0; + PetscCall(DMGetApplicationContext(dm0, &ctx0)); + PetscInt count = *numBCPtr1; + + /* Flush denormal numbers to zero in hardware */ + _MM_SET_DENORMALS_ZERO_MODE(_MM_DENORMALS_ZERO_ON); + _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON); + + /* SubTop (middle x, top y) and SubBottom (middle x, bottom y) */ + for (int ix = ctx0->x_m + ctx0->x_ltkn0; ix <= ctx0->x_M - ctx0->x_rtkn0; ix += 1) + { + for (int iy = ctx0->y_M - ctx0->y_rtkn0 + 1; iy <= ctx0->y_M; iy += 1) + count += 1; + for (int iy = ctx0->y_m; iy <= ctx0->y_m + ctx0->y_ltkn1 - 1; iy += 1) + count += 1; + } + /* SubLeft (left x, full y) */ + for (int ix = ctx0->x_m; ix <= ctx0->x_m + ctx0->x_ltkn1 - 1; ix += 1) + { + for (int y = ctx0->y_m; y <= ctx0->y_M; y += 1) + count += 1; + } + /* SubRight (right x, full y) */ + for (int ix = ctx0->x_M - ctx0->x_rtkn2 + 1; ix <= ctx0->x_M; ix += 1) + { + for (int y = ctx0->y_m; y <= ctx0->y_M; y += 1) + count += 1; + } + + *numBCPtr1 = count; + + PetscFunctionReturn(PETSC_SUCCESS); +} + +PetscErrorCode SetPointBCs(DM dm0, PetscInt numBC0, PetscInt numBC1) +{ + PetscFunctionBeginUser; + + struct UserCtx0 * ctx0; + PetscCall(DMGetApplicationContext(dm0, &ctx0)); + DMDALocalInfo info; + PetscCall(DMDAGetLocalInfo(dm0, &info)); + + PetscInt k0 = 0; + PetscInt k1 = 0; + PetscInt * bcPointsArr0; + PetscInt * bcPointsArr1; + + /* Flush denormal numbers to zero in hardware */ + _MM_SET_DENORMALS_ZERO_MODE(_MM_DENORMALS_ZERO_ON); + _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON); + + /* --- field 0 (v, DOF index 0) --- */ + PetscCall(PetscMalloc1(numBC0, &bcPointsArr0)); + for (int ix = ctx0->x_m + ctx0->x_ltkn0; ix <= ctx0->x_M - ctx0->x_rtkn0; ix += 1) + { + for (int iy = ctx0->y_M - ctx0->y_rtkn0 + 1; iy <= ctx0->y_M; iy += 1) + bcPointsArr0[k0++] = info.gxm * (ix + 2) + (iy + 2); + for (int iy = ctx0->y_m; iy <= ctx0->y_m + ctx0->y_ltkn1 - 1; iy += 1) + bcPointsArr0[k0++] = info.gxm * (ix + 2) + (iy + 2); + } + for (int ix = ctx0->x_m; ix <= ctx0->x_m + ctx0->x_ltkn1 - 1; ix += 1) + { + for (int y = ctx0->y_m; y <= ctx0->y_M; y += 1) + bcPointsArr0[k0++] = info.gxm * (ix + 2) + (y + 2); + } + for (int ix = ctx0->x_M - ctx0->x_rtkn2 + 1; ix <= ctx0->x_M; ix += 1) + { + for (int y = ctx0->y_m; y <= ctx0->y_M; y += 1) + bcPointsArr0[k0++] = info.gxm * (ix + 2) + (y + 2); + } + + /* --- field 1 (u, DOF index 1) --- */ + PetscCall(PetscMalloc1(numBC1, &bcPointsArr1)); + for (int ix = ctx0->x_m + ctx0->x_ltkn0; ix <= ctx0->x_M - ctx0->x_rtkn0; ix += 1) + { + for (int iy = ctx0->y_M - ctx0->y_rtkn0 + 1; iy <= ctx0->y_M; iy += 1) + bcPointsArr1[k1++] = info.gxm * (ix + 2) + (iy + 2); + for (int iy = ctx0->y_m; iy <= ctx0->y_m + ctx0->y_ltkn1 - 1; iy += 1) + bcPointsArr1[k1++] = info.gxm * (ix + 2) + (iy + 2); + } + for (int ix = ctx0->x_m; ix <= ctx0->x_m + ctx0->x_ltkn1 - 1; ix += 1) + { + for (int y = ctx0->y_m; y <= ctx0->y_M; y += 1) + bcPointsArr1[k1++] = info.gxm * (ix + 2) + (y + 2); + } + for (int ix = ctx0->x_M - ctx0->x_rtkn2 + 1; ix <= ctx0->x_M; ix += 1) + { + for (int y = ctx0->y_m; y <= ctx0->y_M; y += 1) + bcPointsArr1[k1++] = info.gxm * (ix + 2) + (y + 2); + } + + /* Build IS arrays and register with DM */ + IS bcPointsIS[2]; + IS bcCompsIS[2]; + PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)dm0), + numBC0, bcPointsArr0, PETSC_OWN_POINTER, &bcPointsIS[0])); + PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)dm0), + numBC1, bcPointsArr1, PETSC_OWN_POINTER, &bcPointsIS[1])); + + PetscInt comp0[] = {0}; + PetscInt comp1[] = {1}; + PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)dm0), + 1, comp0, PETSC_COPY_VALUES, &bcCompsIS[0])); + PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)dm0), + 1, comp1, PETSC_COPY_VALUES, &bcCompsIS[1])); + + /* DMDASetPointBC duplicates all ISes internally */ + PetscCall(DMDASetPointBC(dm0, 2, bcPointsIS, bcCompsIS)); + + PetscCall(ISDestroy(&bcPointsIS[0])); + PetscCall(ISDestroy(&bcPointsIS[1])); + PetscCall(ISDestroy(&bcCompsIS[0])); + PetscCall(ISDestroy(&bcCompsIS[1])); + + PetscFunctionReturn(PETSC_SUCCESS); +} + +PetscErrorCode SetPetscOptions0() +{ + PetscFunctionBeginUser; + + /* Flush denormal numbers to zero in hardware */ + _MM_SET_DENORMALS_ZERO_MODE(_MM_DENORMALS_ZERO_ON); + _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON); + + PetscCall(PetscOptionsSetValue(NULL,"-biharm_snes_type","ksponly")); + PetscCall(PetscOptionsSetValue(NULL,"-biharm_ksp_type","gmres")); + PetscCall(PetscOptionsSetValue(NULL,"-biharm_pc_type","none")); + PetscCall(PetscOptionsSetValue(NULL,"-biharm_ksp_rtol","1e-10")); + PetscCall(PetscOptionsSetValue(NULL,"-biharm_ksp_atol","1e-50")); + PetscCall(PetscOptionsSetValue(NULL,"-biharm_ksp_divtol","100000.0")); + PetscCall(PetscOptionsSetValue(NULL,"-biharm_ksp_max_it","10000")); + + PetscFunctionReturn(0); +} + +PetscErrorCode WholeMatMult0(Mat J, Vec X, Vec Y) +{ + Vec J00X; + Vec J00Y; + Vec J10X; + Vec J10Y; + Vec J11X; + Vec J11Y; + + struct SubMatrixCtx * J00ctx; + struct SubMatrixCtx * J10ctx; + struct SubMatrixCtx * J11ctx; + struct JacobianCtx * jctx; + + /* Flush denormal numbers to zero in hardware */ + _MM_SET_DENORMALS_ZERO_MODE(_MM_DENORMALS_ZERO_ON); + _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON); + + PetscFunctionBeginUser; + + PetscCall(MatShellGetContext(J,&jctx)); + PetscCall(VecSet(Y,0.0)); + + Mat J00 = jctx->submats[0]; + PetscCall(MatShellGetContext(J00,&J00ctx)); + PetscCall(VecGetSubVector(X,*J00ctx->cols,&J00X)); + PetscCall(VecGetSubVector(Y,*J00ctx->rows,&J00Y)); + PetscCall(MatMult(J00,J00X,J00Y)); + PetscCall(VecRestoreSubVector(X,*J00ctx->cols,&J00X)); + PetscCall(VecRestoreSubVector(Y,*J00ctx->rows,&J00Y)); + Mat J10 = jctx->submats[2]; + PetscCall(MatShellGetContext(J10,&J10ctx)); + PetscCall(VecGetSubVector(X,*J10ctx->cols,&J10X)); + PetscCall(VecGetSubVector(Y,*J10ctx->rows,&J10Y)); + PetscCall(MatMult(J10,J10X,J10Y)); + PetscCall(VecRestoreSubVector(X,*J10ctx->cols,&J10X)); + PetscCall(VecRestoreSubVector(Y,*J10ctx->rows,&J10Y)); + Mat J11 = jctx->submats[3]; + PetscCall(MatShellGetContext(J11,&J11ctx)); + PetscCall(VecGetSubVector(X,*J11ctx->cols,&J11X)); + PetscCall(VecGetSubVector(Y,*J11ctx->rows,&J11Y)); + PetscCall(MatMult(J11,J11X,J11Y)); + PetscCall(VecRestoreSubVector(X,*J11ctx->cols,&J11X)); + PetscCall(VecRestoreSubVector(Y,*J11ctx->rows,&J11Y)); + + PetscFunctionReturn(0); +} + +PetscErrorCode WholeFormFunc0(SNES snes, Vec X, Vec F, void* dummy) +{ + PetscFunctionBeginUser; + + struct UserCtx0 * ctx0; + DM dm0 = (DM)(dummy); + PetscCall(DMGetApplicationContext(dm0,&ctx0)); + Vec floc; + DMDALocalInfo info; + Vec xloc; + + PetscScalar * f_bundle_vec; + PetscScalar * x_bundle_vec; + + PetscCall(VecSet(F,0.0)); + PetscCall(DMGetLocalVector(dm0,&xloc)); + PetscCall(DMGlobalToLocalBegin(dm0,X,INSERT_VALUES,xloc)); + PetscCall(DMGlobalToLocalEnd(dm0,X,INSERT_VALUES,xloc)); + PetscCall(DMGetLocalVector(dm0,&floc)); + PetscCall(VecGetArray(floc,&f_bundle_vec)); + PetscCall(VecGetArray(xloc,&x_bundle_vec)); + PetscCall(DMDAGetLocalInfo(dm0,&info)); + struct dataobj * f_vec = ctx0->f_vec; + + PetscScalar (* f)[f_vec->size[1]] __attribute__ ((aligned (64))) = (PetscScalar (*)[f_vec->size[1]]) f_vec->data; + struct Field0 (* f_bundle)[info.gxm] = (struct Field0 (*)[info.gxm]) f_bundle_vec; + struct Field0 (* x_bundle)[info.gxm] = (struct Field0 (*)[info.gxm]) x_bundle_vec; + + /* Flush denormal numbers to zero in hardware */ + _MM_SET_DENORMALS_ZERO_MODE(_MM_DENORMALS_ZERO_ON); + _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON); + + for (int ix = ctx0->x_m + ctx0->x_ltkn0; ix <= ctx0->x_M - ctx0->x_rtkn0; ix += 1) + { + #pragma omp simd + for (int iy = ctx0->y_M - ctx0->y_rtkn0 + 1; iy <= ctx0->y_M; iy += 1) + { + f_bundle[ix + 2][iy + 2].v = (2.0/((ctx0->h_y*ctx0->h_y)) + 2.0/((ctx0->h_x*ctx0->h_x)))*ctx0->h_x*ctx0->h_y*x_bundle[ix + 2][iy + 2].v; + x_bundle[ix + 2][iy + 2].v = 0.0; + } + #pragma omp simd + for (int iy = ctx0->y_m; iy <= ctx0->y_m + ctx0->y_ltkn1 - 1; iy += 1) + { + f_bundle[ix + 2][iy + 2].v = (2.0/((ctx0->h_y*ctx0->h_y)) + 2.0/((ctx0->h_x*ctx0->h_x)))*ctx0->h_x*ctx0->h_y*x_bundle[ix + 2][iy + 2].v; + x_bundle[ix + 2][iy + 2].v = 0.0; + } + #pragma omp simd + for (int iy = ctx0->y_M - ctx0->y_rtkn0 + 1; iy <= ctx0->y_M; iy += 1) + { + f_bundle[ix + 2][iy + 2].u = (2.0/((ctx0->h_y*ctx0->h_y)) + 2.0/((ctx0->h_x*ctx0->h_x)))*ctx0->h_x*ctx0->h_y*x_bundle[ix + 2][iy + 2].u; + x_bundle[ix + 2][iy + 2].u = 0.0; + } + #pragma omp simd + for (int iy = ctx0->y_m; iy <= ctx0->y_m + ctx0->y_ltkn1 - 1; iy += 1) + { + f_bundle[ix + 2][iy + 2].u = (2.0/((ctx0->h_y*ctx0->h_y)) + 2.0/((ctx0->h_x*ctx0->h_x)))*ctx0->h_x*ctx0->h_y*x_bundle[ix + 2][iy + 2].u; + x_bundle[ix + 2][iy + 2].u = 0.0; + } + } + for (int ix = ctx0->x_m; ix <= ctx0->x_m + ctx0->x_ltkn1 - 1; ix += 1) + { + #pragma omp simd + for (int y = ctx0->y_m; y <= ctx0->y_M; y += 1) + { + PetscScalar r6 = 2.0/((ctx0->h_y*ctx0->h_y)) + 2.0/((ctx0->h_x*ctx0->h_x)); + f_bundle[ix + 2][y + 2].v = r6*ctx0->h_x*ctx0->h_y*x_bundle[ix + 2][y + 2].v; + x_bundle[ix + 2][y + 2].v = 0.0; + f_bundle[ix + 2][y + 2].u = r6*ctx0->h_x*ctx0->h_y*x_bundle[ix + 2][y + 2].u; + x_bundle[ix + 2][y + 2].u = 0.0; + } + } + for (int ix = ctx0->x_M - ctx0->x_rtkn2 + 1; ix <= ctx0->x_M; ix += 1) + { + #pragma omp simd + for (int y = ctx0->y_m; y <= ctx0->y_M; y += 1) + { + PetscScalar r7 = 2.0/((ctx0->h_y*ctx0->h_y)) + 2.0/((ctx0->h_x*ctx0->h_x)); + f_bundle[ix + 2][y + 2].v = r7*ctx0->h_x*ctx0->h_y*x_bundle[ix + 2][y + 2].v; + x_bundle[ix + 2][y + 2].v = 0.0; + f_bundle[ix + 2][y + 2].u = r7*ctx0->h_x*ctx0->h_y*x_bundle[ix + 2][y + 2].u; + x_bundle[ix + 2][y + 2].u = 0.0; + } + } + + PetscScalar r4 = 1.0/(ctx0->h_x*ctx0->h_x); + PetscScalar r5 = 1.0/(ctx0->h_y*ctx0->h_y); + + for (int ix = ctx0->x_m + ctx0->x_ltkn0; ix <= ctx0->x_M - ctx0->x_rtkn0; ix += 1) + { + #pragma omp simd aligned(f:32) + for (int iy = ctx0->y_m + ctx0->y_ltkn2; iy <= ctx0->y_M - ctx0->y_rtkn2; iy += 1) + { + f_bundle[ix + 2][iy + 2].v = (2.0*(r4*x_bundle[ix + 2][iy + 2].v + r5*x_bundle[ix + 2][iy + 2].v) - (r4*x_bundle[ix + 1][iy + 2].v + r4*x_bundle[ix + 3][iy + 2].v + r5*x_bundle[ix + 2][iy + 1].v + r5*x_bundle[ix + 2][iy + 3].v + f[ix + 2][iy + 2]))*ctx0->h_x*ctx0->h_y; + f_bundle[ix + 2][iy + 2].u = (2.0*(r4*x_bundle[ix + 2][iy + 2].u + r5*x_bundle[ix + 2][iy + 2].u) - (r4*x_bundle[ix + 1][iy + 2].u + r4*x_bundle[ix + 3][iy + 2].u + r5*x_bundle[ix + 2][iy + 1].u + r5*x_bundle[ix + 2][iy + 3].u + x_bundle[ix + 2][iy + 2].v))*ctx0->h_x*ctx0->h_y; + } + } + PetscCall(VecRestoreArray(floc,&f_bundle_vec)); + PetscCall(VecRestoreArray(xloc,&x_bundle_vec)); + PetscCall(DMLocalToGlobalBegin(dm0,floc,ADD_VALUES,F)); + PetscCall(DMLocalToGlobalEnd(dm0,floc,ADD_VALUES,F)); + PetscCall(DMRestoreLocalVector(dm0,&xloc)); + PetscCall(DMRestoreLocalVector(dm0,&floc)); + + PetscFunctionReturn(0); +} + +PetscErrorCode ClearPetscOptions0() +{ + PetscFunctionBeginUser; + + /* Flush denormal numbers to zero in hardware */ + _MM_SET_DENORMALS_ZERO_MODE(_MM_DENORMALS_ZERO_ON); + _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON); + + PetscCall(PetscOptionsClearValue(NULL,"-biharm_snes_type")); + PetscCall(PetscOptionsClearValue(NULL,"-biharm_ksp_type")); + PetscCall(PetscOptionsClearValue(NULL,"-biharm_pc_type")); + PetscCall(PetscOptionsClearValue(NULL,"-biharm_ksp_rtol")); + PetscCall(PetscOptionsClearValue(NULL,"-biharm_ksp_atol")); + PetscCall(PetscOptionsClearValue(NULL,"-biharm_ksp_divtol")); + PetscCall(PetscOptionsClearValue(NULL,"-biharm_ksp_max_it")); + + PetscFunctionReturn(0); +} + +PetscErrorCode DestroySubMatrixCtx0(Mat J) +{ + PetscFunctionBeginUser; + + struct SubMatrixCtx * subctx; + + /* Flush denormal numbers to zero in hardware */ + _MM_SET_DENORMALS_ZERO_MODE(_MM_DENORMALS_ZERO_ON); + _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON); + + PetscCall(MatShellGetContext(J,&subctx)); + PetscCall(PetscFree(subctx)); + + PetscFunctionReturn(0); +} + +PetscErrorCode J00_MatMult0(Mat J, Vec X, Vec Y) +{ + PetscFunctionBeginUser; + + struct UserCtx0 * o0; + DM dm0; + PetscCall(MatGetDM(J,&dm0)); + PetscCall(DMGetApplicationContext(dm0,&o0)); + DMDALocalInfo info; + Vec xloc; + Vec yloc; + + PetscScalar * a0_vec; + PetscScalar * a1_vec; + + PetscCall(DMGetLocalVector(dm0,&xloc)); + PetscCall(DMGlobalToLocalBegin(dm0,X,INSERT_VALUES,xloc)); + PetscCall(DMGlobalToLocalEnd(dm0,X,INSERT_VALUES,xloc)); + PetscCall(DMGetLocalVector(dm0,&yloc)); + PetscCall(VecSet(yloc,0.0)); + PetscCall(VecGetArray(yloc,&a1_vec)); + PetscCall(VecGetArray(xloc,&a0_vec)); + PetscCall(DMDAGetLocalInfo(dm0,&info)); + + PetscScalar (* a0)[info.gxm] = (PetscScalar (*)[info.gxm]) a0_vec; + PetscScalar (* a1)[info.gxm] = (PetscScalar (*)[info.gxm]) a1_vec; + + /* Flush denormal numbers to zero in hardware */ + _MM_SET_DENORMALS_ZERO_MODE(_MM_DENORMALS_ZERO_ON); + _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON); + + for (int ix = o0->x_m + o0->x_ltkn0; ix <= o0->x_M - o0->x_rtkn0; ix += 1) + { + #pragma omp simd + for (int iy = o0->y_M - o0->y_rtkn0 + 1; iy <= o0->y_M; iy += 1) + { + a1[ix + 2][iy + 2] = (2.0/((o0->h_y*o0->h_y)) + 2.0/((o0->h_x*o0->h_x)))*o0->h_x*o0->h_y*a0[ix + 2][iy + 2]; + a0[ix + 2][iy + 2] = 0.0; + } + #pragma omp simd + for (int iy = o0->y_m; iy <= o0->y_m + o0->y_ltkn1 - 1; iy += 1) + { + a1[ix + 2][iy + 2] = (2.0/((o0->h_y*o0->h_y)) + 2.0/((o0->h_x*o0->h_x)))*o0->h_x*o0->h_y*a0[ix + 2][iy + 2]; + a0[ix + 2][iy + 2] = 0.0; + } + } + for (int ix = o0->x_m; ix <= o0->x_m + o0->x_ltkn1 - 1; ix += 1) + { + #pragma omp simd + for (int y = o0->y_m; y <= o0->y_M; y += 1) + { + a1[ix + 2][y + 2] = (2.0/((o0->h_y*o0->h_y)) + 2.0/((o0->h_x*o0->h_x)))*o0->h_x*o0->h_y*a0[ix + 2][y + 2]; + a0[ix + 2][y + 2] = 0.0; + } + } + for (int ix = o0->x_M - o0->x_rtkn2 + 1; ix <= o0->x_M; ix += 1) + { + #pragma omp simd + for (int y = o0->y_m; y <= o0->y_M; y += 1) + { + a1[ix + 2][y + 2] = (2.0/((o0->h_y*o0->h_y)) + 2.0/((o0->h_x*o0->h_x)))*o0->h_x*o0->h_y*a0[ix + 2][y + 2]; + a0[ix + 2][y + 2] = 0.0; + } + } + + PetscScalar r0 = 1.0/(o0->h_x*o0->h_x); + PetscScalar r1 = 1.0/(o0->h_y*o0->h_y); + + for (int ix = o0->x_m + o0->x_ltkn0; ix <= o0->x_M - o0->x_rtkn0; ix += 1) + { + #pragma omp simd + for (int iy = o0->y_m + o0->y_ltkn2; iy <= o0->y_M - o0->y_rtkn2; iy += 1) + { + a1[ix + 2][iy + 2] = (2.0*(r0*a0[ix + 2][iy + 2] + r1*a0[ix + 2][iy + 2]) - (r0*a0[ix + 1][iy + 2] + r0*a0[ix + 3][iy + 2] + r1*a0[ix + 2][iy + 1] + r1*a0[ix + 2][iy + 3]))*o0->h_x*o0->h_y; + } + } + PetscCall(VecRestoreArray(yloc,&a1_vec)); + PetscCall(VecRestoreArray(xloc,&a0_vec)); + PetscCall(DMLocalToGlobalBegin(dm0,yloc,ADD_VALUES,Y)); + PetscCall(DMLocalToGlobalEnd(dm0,yloc,ADD_VALUES,Y)); + PetscCall(DMRestoreLocalVector(dm0,&xloc)); + PetscCall(DMRestoreLocalVector(dm0,&yloc)); + + PetscFunctionReturn(0); +} + +PetscErrorCode J10_MatMult0(Mat J, Vec X, Vec Y) +{ + PetscFunctionBeginUser; + + struct UserCtx0 * ctx0; + DM dm0; + PetscCall(MatGetDM(J,&dm0)); + PetscCall(DMGetApplicationContext(dm0,&ctx0)); + DMDALocalInfo info; + Vec xloc; + Vec yloc; + + PetscScalar * x_v_vec; + PetscScalar * y_u_vec; + + PetscCall(DMGetLocalVector(dm0,&xloc)); + PetscCall(DMGlobalToLocalBegin(dm0,X,INSERT_VALUES,xloc)); + PetscCall(DMGlobalToLocalEnd(dm0,X,INSERT_VALUES,xloc)); + PetscCall(DMGetLocalVector(dm0,&yloc)); + PetscCall(VecSet(yloc,0.0)); + PetscCall(VecGetArray(yloc,&y_u_vec)); + PetscCall(VecGetArray(xloc,&x_v_vec)); + PetscCall(DMDAGetLocalInfo(dm0,&info)); + + PetscScalar (* x_v)[info.gxm] = (PetscScalar (*)[info.gxm]) x_v_vec; + PetscScalar (* y_u)[info.gxm] = (PetscScalar (*)[info.gxm]) y_u_vec; + + /* Flush denormal numbers to zero in hardware */ + _MM_SET_DENORMALS_ZERO_MODE(_MM_DENORMALS_ZERO_ON); + _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON); + + for (int ix = ctx0->x_m + ctx0->x_ltkn0; ix <= ctx0->x_M - ctx0->x_rtkn0; ix += 1) + { + #pragma omp simd + for (int iy = ctx0->y_m + ctx0->y_ltkn2; iy <= ctx0->y_M - ctx0->y_rtkn2; iy += 1) + { + y_u[ix + 2][iy + 2] = -ctx0->h_x*ctx0->h_y*x_v[ix + 2][iy + 2]; + } + } + PetscCall(VecRestoreArray(yloc,&y_u_vec)); + PetscCall(VecRestoreArray(xloc,&x_v_vec)); + PetscCall(DMLocalToGlobalBegin(dm0,yloc,ADD_VALUES,Y)); + PetscCall(DMLocalToGlobalEnd(dm0,yloc,ADD_VALUES,Y)); + PetscCall(DMRestoreLocalVector(dm0,&xloc)); + PetscCall(DMRestoreLocalVector(dm0,&yloc)); + + PetscFunctionReturn(0); +} + +PetscErrorCode MatCreateSubMatrices0(Mat J, PetscInt nfields, IS * irow, IS * icol, MatReuse scall, Mat * * submats) +{ + PetscFunctionBeginUser; + + Mat block; + DM dm0; + PetscInt dof; + + struct UserCtx0 * ctx0; + struct JacobianCtx * jctx; + struct SubMatrixCtx * subctx; + + PetscCall(MatShellGetContext(J,&jctx)); + DM * subdms = jctx->subdms; + + /* Flush denormal numbers to zero in hardware */ + _MM_SET_DENORMALS_ZERO_MODE(_MM_DENORMALS_ZERO_ON); + _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON); + + PetscInt nsubmats = nfields*nfields; + PetscCall(PetscCalloc1(nsubmats,submats)); + PetscCall(MatGetDM(J,&dm0)); + PetscCall(DMGetApplicationContext(dm0,&ctx0)); + PetscCall(DMDAGetInfo(dm0,NULL,NULL,NULL,NULL,NULL,NULL,NULL,&dof,NULL,NULL,NULL,NULL,NULL)); + Mat * submat_arr = *submats; + + for (int i = 0; i <= nsubmats - 1; i += 1) + { + PetscInt rowidx = i / dof; + PetscInt colidx = (i)%(dof); + /* Query constrained global sizes from sub-DMs (accounts for BC-excluded DOFs) */ + Vec rowvec, colvec; + PetscInt subblockrows, subblockcols; + PetscCall(DMGetGlobalVector(subdms[rowidx], &rowvec)); + PetscCall(VecGetSize(rowvec, &subblockrows)); + PetscCall(DMRestoreGlobalVector(subdms[rowidx], &rowvec)); + PetscCall(DMGetGlobalVector(subdms[colidx], &colvec)); + PetscCall(VecGetSize(colvec, &subblockcols)); + PetscCall(DMRestoreGlobalVector(subdms[colidx], &colvec)); + PetscCall(MatCreate(PETSC_COMM_WORLD,&block)); + PetscCall(MatSetSizes(block,PETSC_DECIDE,PETSC_DECIDE,subblockrows,subblockcols)); + PetscCall(MatSetType(block,MATSHELL)); + PetscCall(PetscMalloc1(1,&subctx)); + subctx->rows = &irow[rowidx]; + subctx->cols = &icol[colidx]; + PetscCall(DMSetApplicationContext(subdms[rowidx],ctx0)); + PetscCall(MatSetDM(block,subdms[rowidx])); + PetscCall(MatShellSetContext(block,subctx)); + PetscCall(MatShellSetOperation(block,MATOP_DESTROY,(void (*)(void))DestroySubMatrixCtx0)); + PetscCall(MatSetUp(block)); + submat_arr[i] = block; + } + PetscCall(MatShellSetOperation(submat_arr[0],MATOP_MULT,(void (*)(void))J00_MatMult0)); + PetscCall(MatShellSetOperation(submat_arr[2],MATOP_MULT,(void (*)(void))J10_MatMult0)); + PetscCall(MatShellSetOperation(submat_arr[3],MATOP_MULT,(void (*)(void))J00_MatMult0)); + + PetscFunctionReturn(0); +} + +PetscErrorCode PopulateUserContext0(struct UserCtx0 * ctx0, struct dataobj * f_vec, const PetscScalar h_x, const PetscScalar h_y, const PetscInt x_M, const PetscInt x_ltkn0, const PetscInt x_ltkn1, const PetscInt x_m, const PetscInt x_rtkn0, const PetscInt x_rtkn2, const PetscInt y_M, const PetscInt y_ltkn1, const PetscInt y_ltkn2, const PetscInt y_m, const PetscInt y_rtkn0, const PetscInt y_rtkn2) +{ + PetscFunctionBeginUser; + + ctx0->h_x = h_x; + ctx0->h_y = h_y; + ctx0->x_M = x_M; + ctx0->x_ltkn0 = x_ltkn0; + ctx0->x_ltkn1 = x_ltkn1; + ctx0->x_m = x_m; + ctx0->x_rtkn0 = x_rtkn0; + ctx0->x_rtkn2 = x_rtkn2; + ctx0->y_M = y_M; + ctx0->y_ltkn1 = y_ltkn1; + ctx0->y_ltkn2 = y_ltkn2; + ctx0->y_m = y_m; + ctx0->y_rtkn0 = y_rtkn0; + ctx0->y_rtkn2 = y_rtkn2; + ctx0->f_vec = f_vec; + + PetscFunctionReturn(0); +} + +PetscErrorCode PopulateMatContext(struct JacobianCtx * jctx, DM * subdms, IS * fields) +{ + PetscFunctionBeginUser; + + /* Flush denormal numbers to zero in hardware */ + _MM_SET_DENORMALS_ZERO_MODE(_MM_DENORMALS_ZERO_ON); + _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON); + + jctx->subdms = subdms; + jctx->fields = fields; + + PetscFunctionReturn(0); +} +/* Backdoor edit at Mon Mar 2 19:26:39 2026*/ +/* Backdoor edit at Mon Mar 2 19:28:58 2026*/ +/* Backdoor edit at Tue Mar 3 10:15:06 2026*/ +/* Backdoor edit at Tue Mar 3 10:30:52 2026*/ +/* Backdoor edit at Tue Mar 3 10:43:27 2026*/ +/* Backdoor edit at Tue Mar 3 10:44:26 2026*/ +/* Backdoor edit at Tue Mar 3 10:44:57 2026*/ +/* Backdoor edit at Tue Mar 3 11:16:06 2026*/ +/* Backdoor edit at Tue Mar 3 11:16:35 2026*/ +/* Backdoor edit at Tue Mar 3 11:31:42 2026*/ +/* Backdoor edit at Tue Mar 3 11:32:14 2026*/ +/* Backdoor edit at Tue Mar 3 11:32:29 2026*/ +/* Backdoor edit at Tue Mar 3 11:38:02 2026*/ +/* Backdoor edit at Tue Mar 3 11:38:28 2026*/ +/* Backdoor edit at Tue Mar 3 11:38:51 2026*/ +/* Backdoor edit at Tue Mar 3 11:39:47 2026*/