Source code for psydac.linalg.stencil

# coding: utf-8
#
# Copyright 2018 Yaman Güçlü

import os
import warnings

import numpy as np

from types        import MappingProxyType
from scipy.sparse import coo_matrix, diags as sp_diags

from psydac.ddm.mpi import mpi as MPI
from psydac.linalg.basic  import VectorSpace, Vector, LinearOperator
from psydac.ddm.cart      import find_mpi_type, CartDecomposition, InterfaceCartDecomposition
from psydac.ddm.utilities import get_data_exchanger
from psydac.api.settings  import PSYDAC_BACKENDS

from psydac.linalg.kernels.axpy_kernels        import axpy_1d, axpy_2d, axpy_3d
from psydac.linalg.kernels.inner_kernels       import inner_1d, inner_2d, inner_3d
from psydac.linalg.kernels.matvec_kernels      import matvec_1d, matvec_2d, matvec_3d
from psydac.linalg.kernels.transpose_kernels   import transpose_1d, transpose_2d, transpose_3d
from psydac.linalg.kernels.transpose_kernels   import interface_transpose_1d, interface_transpose_2d, interface_transpose_3d
from psydac.linalg.kernels.stencil2coo_kernels import stencil2coo_1d_F, stencil2coo_2d_F, stencil2coo_3d_F
from psydac.linalg.kernels.stencil2coo_kernels import stencil2coo_1d_C, stencil2coo_2d_C, stencil2coo_3d_C


__all__ = (
    'StencilVectorSpace',
    'StencilVector',
    'StencilMatrix',
    'StencilInterfaceMatrix'
)

#===============================================================================
# Dictionary used to select correct kernel functions based on dimensionality
kernels = {
    'axpy'  : (None,   axpy_1d,   axpy_2d,   axpy_3d),
    'inner' : (None,  inner_1d,  inner_2d,  inner_3d),
    'matvec': (None, matvec_1d, matvec_2d, matvec_3d),
    'transpose': (None, transpose_1d, transpose_2d, transpose_3d),
    'interface_transpose': (None, interface_transpose_1d, interface_transpose_2d, interface_transpose_3d),
    'stencil2coo': {'F': (None, stencil2coo_1d_F, stencil2coo_2d_F, stencil2coo_3d_F),
                    'C': (None, stencil2coo_1d_C, stencil2coo_2d_C, stencil2coo_3d_C)}
}

#===============================================================================
def compute_diag_len(pads, shifts_domain, shifts_codomain, return_padding=False):
    """
    Compute the diagonal length and the padding of the stencil matrix for each direction,
    using the shifts of the domain and the codomain.

    Parameters
    ----------
    pads : tuple-like (int)
        Padding along each direction.

    shifts_domain : tuple_like (int)
        Shifts of the domain along each direction.

    shifts_codomain : tuple_like (int)
        Shifts of the codomain along each direction.

    return_padding : bool
        Return the new padding if True.

    Returns
    -------
    n : (int)
        Diagonal length of the stencil matrix.

    ep : (int)
        Padding that constitutes the starting index of the non zero elements.
    """
    n  = ((np.ceil((pads+1)/shifts_codomain)-1)*shifts_domain).astype('int')
    ep = -np.minimum(0, n-pads)
    n  = n + ep + pads + 1
    if return_padding:
        return n.astype('int'), ep.astype('int')
    else:
        return n.astype('int')

#===============================================================================
class StencilVectorSpace(VectorSpace):
    """
    Vector space for n-dimensional stencil format. Two different initializations
    are possible:

    - serial  : StencilVectorSpace(npts, pads, periods, shifts=None, starts=None, ends=None, dtype=float)
    - parallel: StencilVectorSpace(cart, dtype=float)

    Parameters
    ----------
    npts : tuple-like (int)
        Number of entries along each direction
        (= global dimensions of vector space).

    pads : tuple-like (int)
        Padding p along each direction needed for the ghost regions.

    periods : tuple-like (bool)
        Periodicity along each direction.

    shifts : tuple-like (int)
        shift m of the coefficients in each direction.

    starts : tuple-like (int)
        Index of the first coefficient local to the space in each direction.

    ends : tuple-like (int)
        Index of the last coefficient local to the space in each direction.

    dtype : type
        Type of scalar entries.

    cart : psydac.ddm.cart.CartDecomposition
        Tensor-product grid decomposition according to MPI Cartesian topology.

    """

    def __init__(self, cart, dtype=float):

        assert isinstance(cart, (CartDecomposition, InterfaceCartDecomposition))

        # Sequential attributes
        self._parallel = cart.is_parallel
        self._cart     = cart
        self._ndim     = cart._ndims
        self._npts     = cart.npts
        self._pads     = cart.pads
        self._periods  = cart.periods
        self._shifts   = cart.shifts
        self._dtype    = dtype
        self._starts   = cart.starts
        self._ends     = cart.ends

        # The shape of the allocated numpy array
        self._shape         = cart.shape
        self._parent_starts = cart.parent_starts
        self._parent_ends   = cart.parent_ends
        self._mpi_type      = find_mpi_type(dtype)

        # The dictionary follows the structure {(axis, ext): StencilVectorSpace()}
        # where axis and ext represent the boundary shared by two patches
        self._interfaces = {}
        self._interfaces_readonly = MappingProxyType(self._interfaces)

        # Parallel attributes
        if cart.is_parallel and not cart.is_comm_null:
            self._mpi_type = find_mpi_type(dtype)
            if isinstance(cart, InterfaceCartDecomposition):
                # TODO : Check if this line really change the ._shape
                self._shape = cart.get_interface_communication_infos(cart.axis)['gbuf_recv_shape'][0]
            else:
                self._synchronizer = get_data_exchanger(cart, dtype , assembly=True, blocking=False)

        # Select kernel for AXPY operation
        if self._ndim in [1, 2, 3]:
            self._axpy_func = kernels['axpy'][self._ndim]
        else:
            self._axpy_func = self._axpy_python
            self._axpy_work = self.zeros()  # work array

        # Select kernel for inner product
        if self._ndim in [1, 2, 3]:
            self._inner_func = kernels['inner'][self._ndim]
        else:
            self._inner_func = self._inner_python

        # Constant arguments for inner product: total number of ghost cells
        self._inner_consts = tuple(np.int64(p * s) for p, s in zip(self._pads, self._shifts))

        # TODO [YG, 06.09.2023]: print warning if pure Python functions are used

    #--------------------------------------
    # Pure Python methods for backup
    #--------------------------------------
    def _axpy_python(self, a, x, y):
        w = self._axpy_work
        x.copy(out=w)  # w <- x
        w *= a         # w <- a * x
        y += w         # y <- a * x + y

    @staticmethod
    def _inner_python(v1, v2, nghost):
        index = tuple(slice(ng, -ng) for ng in nghost)
        return np.vdot(v1[index].flat, v2[index].flat)

    #--------------------------------------
    # Abstract interface
    #--------------------------------------
    @property
    def dimension(self):
        """ The dimension of a vector space V is the cardinality
            (i.e. the number of vectors) of a basis of V over its base field.
        """
        return np.prod(self._npts)

    # ...
    @property
    def dtype(self):
        return self._dtype

    # ...
    def zeros(self):
        """
        Get a copy of the null element of the StencilVectorSpace V.

        Returns
        -------
        null : StencilVector
            A new vector object with all components equal to zero.

        """
        return StencilVector(self)

    #...
    def inner(self, x, y):
        """
        Evaluate the inner vector product between two vectors of this space V.

        If the field of V is real, compute the classical scalar product.
        If the field of V is complex, compute the classical sesquilinear
        product with linearity on the second vector.

        TODO [YG 01.05.2025]: Currently, the first vector is conjugated. We
        want to reverse this behavior in order to align with the convention
        of FEniCS.

        Parameters
        ----------
        x : Vector
            The first vector in the scalar product. In the case of a complex
            field, the inner product is antilinear w.r.t. this vector (hence
            this vector is conjugated).

        y : Vector
            The second vector in the scalar product. The inner product is
            linear w.r.t. this vector.

        Returns
        -------
        float | complex
            The scalar product of the two vectors. Note that inner(x, x) is
            a non-negative real number which is zero if and only if x = 0.

        """

        assert isinstance(x, StencilVector)
        assert isinstance(y, StencilVector)
        assert x.space is self
        assert y.space is self

        inner_func = self._inner_func
        inner_args = (x._data, y._data, *self._inner_consts)

        if self.parallel:
            # Sometimes in the parallel case, we can get an empty vector that breaks our kernel
            x._dot_send_data[0] = 0 if x._data.shape[0] == 0 else inner_func(*inner_args)
            self.cart.global_comm.Allreduce((x._dot_send_data, self.mpi_type),
                                            (x._dot_recv_data, self.mpi_type),
                                             op=MPI.SUM )
            return x._dot_recv_data[0]
        else:
            return inner_func(*inner_args)

    # ...
    def axpy(self, a, x, y):
        """
        Increment the vector y with the a-scaled vector x, i.e. y = a * x + y,
        provided that x and y belong to the same vector space V (self).
        The scalar value a may be real or complex, depending on the field of V.

        Parameters
        ----------
        a : scalar
            The scaling coefficient needed for the operation.

        x : StencilVector
            The vector which is not modified by this function.

        y : StencilVector
            The vector modified by this function (incremented by a * x).
        """
        assert isinstance(x, StencilVector)
        assert isinstance(y, StencilVector)
        assert x._space is self
        assert y._space is self

        if self.dtype == complex:
            a = complex(a)
        else:
            if isinstance(a, complex):
                raise TypeError('A complex scalar was given in a real case')
            else:
                a = float(a)

        self._axpy_func(a, x._data, y._data)

        for axis, ext in self.interfaces:
            self._axpy_func(a, x._interface_data[axis, ext], y._interface_data[axis, ext])

        x._sync = x._sync and y._sync

    #--------------------------------------
    # Other properties/methods
    #--------------------------------------
    @property
    def mpi_type(self):
        return self._mpi_type

    @property
    def shape(self):
        return self._shape

    @property
    def parallel(self):
        return self._parallel

    # ...
    @property
    def cart(self):
        return self._cart

    # ...
    @property
    def npts(self):
        return self._npts

    # ...
    @property
    def starts(self):
        return self._starts

    # ...
    @property
    def ends(self):
        return self._ends

    # ...
    @property
    def parent_starts(self):
        return self._parent_starts

    # ...
    @property
    def parent_ends(self):
        return self._parent_ends

    # ...
    @property
    def pads(self):
        return self._pads

    # ...
    @property
    def periods(self):
        return self._periods

    # ...
    @property
    def shifts(self):
        return self._shifts

    # ...
    @property
    def ndim(self):
        return self._ndim

    @property
    def interfaces(self):
        return self._interfaces_readonly

    def set_interface(self, axis, ext, cart):
        """
        Set the interface space along a given axis and extremity.

        Parameters
        ----------
        axis : int
            The axis of the new Interface space.

        ext: {-1, 1}
            The extremity of the new Interface space.

        cart: CartDecomposition
            The cart of the new space.
        """

        assert int(ext) in [-1, 1]
        assert isinstance(cart, (CartDecomposition, InterfaceCartDecomposition))

        if cart.is_comm_null:
            return

        # Create the interface space in the parallel case using the new cart
        if isinstance(cart, InterfaceCartDecomposition):
            # Case where the patches that share the interface are owned by different intra-communicators
            space = StencilVectorSpace(cart, dtype=self.dtype)
            self._interfaces[axis, ext] = space
        else:
            # Case where the patches that share the interface are owned by the same intra-communicator
            if self.parent_ends[axis] is not None:
                diff = min(1,self.parent_ends[axis]-self.ends[axis])
            else:
                diff = 0

            starts = list(cart._starts)
            ends   = list(cart._ends)
            parent_starts = list(cart._parent_starts)
            parent_ends   = list(cart._parent_ends)
            if ext == 1:
                starts[axis] = self.ends[axis]-self.pads[axis]+diff
                if parent_starts[axis] is not None:
                    parent_starts[axis] = parent_ends[axis]-self.pads[axis]
            else:
                ends[axis] = self.pads[axis]-diff
                if parent_ends[axis] is not None:
                    parent_ends[axis] = self.pads[axis]

            cart = cart.change_starts_ends(tuple(starts), tuple(ends), tuple(parent_starts), tuple(parent_ends))

            #TODO Check if we create object from it, otherwise its only purpose is to store some parameters which is innefficient
            space = StencilVectorSpace(cart, self.dtype)

            self._interfaces[axis, ext] = space

#===============================================================================
[docs] class StencilVector(Vector): """ Vector in n-dimensional stencil format. Parameters ---------- V : psydac.linalg.stencil.StencilVectorSpace Space to which the new vector belongs. """ def __init__(self, V): assert isinstance(V, StencilVectorSpace) self._space = V self._sizes = V.shape self._ndim = len(V.npts) self._data = np.zeros(V.shape, dtype=V.dtype) self._dot_send_data = np.zeros((1,), dtype=V.dtype) self._dot_recv_data = np.zeros((1,), dtype=V.dtype) self._interface_data = {} self._requests = None # allocate data for the boundary that shares an interface for axis, ext in V.interfaces: self._interface_data[axis, ext] = np.zeros(V.interfaces[axis, ext].shape, dtype=V.dtype) #prepare communications if V.cart.is_parallel and not V.cart.is_comm_null and isinstance(V.cart, CartDecomposition): self._requests = V._synchronizer.prepare_communications(self._data) # TODO: distinguish between different directions self._sync = False #... def __del__(self): # Release memory of persistent MPI communication channels if self._requests: for request in self._requests: request.Free() #-------------------------------------- # Abstract interface #-------------------------------------- @property def space(self): return self._space # ...
[docs] def toarray(self, *, order='C', with_pads=False): """ Return a numpy 1D array corresponding to the given StencilVector, with or without pads. Parameters ---------- with_pads : bool If True, include pads in output array (ignored in serial case). order: {'C','F'} Memory representation of the data ‘C’ for row-major ordering (C-style), ‘F’ column-major ordering (Fortran-style). Returns ------- array : numpy.ndarray A copy of the data array collapsed into one dimension. """ # In parallel case, call different functions based on 'with_pads' flag if self.space.parallel: if with_pads: return self._toarray_parallel_with_pads(order=order) else: return self._toarray_parallel_no_pads(order=order) # In serial case, ignore 'with_pads' flag return self.toarray_local(order=order)
#...
[docs] def copy(self, out=None): if self is out: return self w = out or StencilVector( self._space ) np.copyto(w._data, self._data, casting='no') for axis, ext in self._space.interfaces: np.copyto(w._interface_data[axis, ext], self._interface_data[axis, ext], casting='no') w._sync = self._sync return w
#...
[docs] def conjugate(self, out=None): if out is not None: assert isinstance(out, StencilVector) assert out.space is self.space else: out = StencilVector(self.space) np.conjugate(self._data, out=out._data, casting='no') for axis, ext in self._space.interfaces: np.conjugate(self._interface_data[axis, ext], out=out._interface_data[axis, ext], casting='no') out._sync = self._sync return out
#... def __neg__(self): w = StencilVector( self._space ) np.negative(self._data, out=w._data) for axis, ext in self._space.interfaces: np.negative(self._interface_data[axis, ext], out=w._interface_data[axis, ext]) w._sync = self._sync return w #... def __mul__(self, a): w = StencilVector( self._space ) np.multiply(self._data, a, out=w._data) for axis, ext in self._space.interfaces: np.multiply(self._interface_data[axis, ext], a, out=w._interface_data[axis, ext]) w._sync = self._sync return w #... def __add__(self, v): assert isinstance( v, StencilVector ) assert v._space is self._space w = StencilVector( self._space ) np.add(self._data, v._data, out=w._data) for axis, ext in self._space.interfaces: np.add(self._interface_data[axis, ext], v._interface_data[axis, ext], out=w._interface_data[axis, ext]) w._sync = self._sync and v._sync return w #... def __sub__(self, v): assert isinstance( v, StencilVector ) assert v._space is self._space w = StencilVector( self._space ) np.subtract(self._data, v._data, out=w._data) for axis, ext in self._space.interfaces: np.subtract(self._interface_data[axis, ext], v._interface_data[axis, ext], out=w._interface_data[axis, ext]) w._sync = self._sync and v._sync return w #... def __imul__(self, a): self._data *= a for axis, ext in self._space.interfaces: self._interface_data[axis, ext] *= a return self #... def __iadd__(self, v): assert isinstance( v, StencilVector ) assert v._space is self._space self._data += v._data for axis, ext in self._space.interfaces: self._interface_data[axis, ext] += v._interface_data[axis, ext] self._sync = v._sync and self._sync return self #... def __isub__(self, v): assert isinstance( v, StencilVector ) assert v._space is self._space self._data -= v._data for axis, ext in self._space.interfaces: self._interface_data[axis, ext] -= v._interface_data[axis, ext] self._sync = v._sync and self._sync return self #-------------------------------------- # Other properties/methods #-------------------------------------- @property def starts(self): return self._space.starts # ... @property def ends(self): return self._space.ends # ... @property def pads(self): return self._space.pads # ... def __str__(self): txt = '\n' txt += '> starts :: {starts}\n'.format( starts= self.starts ) txt += '> ends :: {ends}\n' .format( ends = self.ends ) txt += '> pads :: {pads}\n' .format( pads = self.pads ) txt += '> data :: {data}\n' .format( data = self._data ) txt += '> sync :: {sync}\n' .format( sync = self._sync ) return txt # ...
[docs] def toarray_local(self , *, order='C'): """ return the local array without the padding""" idx = tuple( slice(m*p,-m*p) if p != 0 else slice(0, None) for p,m in zip(self.pads, self.space.shifts) ) return self._data[idx].flatten( order=order)
# ... def _toarray_parallel_no_pads(self, order='C'): a = np.zeros( self.space.npts, self.dtype ) idx_from = tuple( slice(m*p,-m*p) if p != 0 else slice(0, None) for p,m in zip(self.pads, self.space.shifts) ) idx_to = tuple( slice(s,e+1) for s,e in zip(self.starts,self.ends) ) a[idx_to] = self._data[idx_from] return a.flatten( order=order) # ... def _toarray_parallel_with_pads(self, order='C'): pads = [m*p for m,p in zip(self.space.shifts, self.pads)] # Step 0: create extended n-dimensional array with zero values shape = tuple( n+2*p for n,p in zip( self.space.npts, pads ) ) a = np.zeros( shape, self.dtype ) # Step 1: write extended data chunk (local to process) onto array idx = tuple( slice(s,e+2*p+1) for s,e,p in zip( self.starts, self.ends, pads) ) a[idx] = self._data # Step 2: if necessary, apply periodic boundary conditions to array ndim = self.space.ndim for direction in range( ndim ): periodic = self.space.cart.periods[direction] coord = self.space.cart.coords [direction] nproc = self.space.cart.nprocs [direction] if periodic: p = pads[direction] if p == 0: continue # Left-most process: copy data from left to right if coord == 0: idx_from = tuple( (slice(None,p) if d == direction else slice(None)) for d in range( ndim ) ) idx_to = tuple( (slice(-2*p,-p) if d == direction else slice(None)) for d in range( ndim ) ) a[idx_to] = a[idx_from] # Right-most process: copy data from right to left if coord == nproc-1: idx_from = tuple( (slice(-p,None) if d == direction else slice(None)) for d in range( ndim ) ) idx_to = tuple( (slice(p,2*p) if d == direction else slice(None)) for d in range( ndim ) ) a[idx_to] = a[idx_from] # Step 3: remove ghost regions from global array idx = tuple( slice(p,-p) if p != 0 else slice(0, None) for p in pads ) out = a[idx] # Step 4: return flattened array return out.flatten( order=order) #...
[docs] def topetsc(self): """ Convert to petsc data structure. """ from psydac.linalg.topetsc import vec_topetsc vec = vec_topetsc( self ) return vec
# ... def __getitem__(self, key): index = self._getindex(key) return self._data[index] # ... def __setitem__(self, key, value): index = self._getindex(key) self._data[index] = value # ... @property def ghost_regions_in_sync(self): return self._sync # ... # NOTE: this property must be set collectively @ghost_regions_in_sync.setter def ghost_regions_in_sync(self, value): assert isinstance(value, bool) self._sync = value # ... # TODO: maybe change name to 'exchange'
[docs] def update_ghost_regions(self): """ Update ghost regions before performing non-local access to vector elements (e.g. in matrix-vector product). Parameters ---------- direction : int Single direction along which to operate (if not specified, all of them). """ # Update interior ghost regions if self.space.parallel: if not self.space.cart.is_comm_null: # PARALLEL CASE: fill in ghost regions with data from neighbors self.space._synchronizer.start_update_ghost_regions(self._data, self._requests) self.space._synchronizer. end_update_ghost_regions(self._data, self._requests) else: # SERIAL CASE: fill in ghost regions along periodic directions, otherwise set to zero self._update_ghost_regions_serial() # Update interface ghost regions if self.space.parallel: for axis, ext in self.space.interfaces: V = self.space.interfaces[axis, ext] if isinstance(V.cart, InterfaceCartDecomposition): continue slices = [slice(s, e+2*m*p+1) for s,e,m,p in zip(V.starts, V.ends, V.shifts, V.pads)] self._interface_data[axis, ext][...] = self._data[tuple(slices)] else: for axis, ext in self.space.interfaces: V = self.space.interfaces[axis, ext] slices = [slice(s, e+2*m*p+1) for s,e,m,p in zip(V.starts, V.ends, V.shifts, V.pads)] self._interface_data[axis, ext][...] = self._data[tuple(slices)] # Flag ghost regions as up-to-date self._sync = True
# ... def _update_ghost_regions_serial(self): ndim = self._space.ndim for direction in range(ndim): periodic = self._space.periods[direction] p = self._space.pads [direction] * self._space.shifts[direction] if p == 0: continue idx_front = [slice(None)] * direction idx_back = [slice(None)] * (ndim-direction-1) if periodic: # Copy data from left to right idx_from = tuple(idx_front + [slice( p, 2*p)] + idx_back) idx_to = tuple(idx_front + [slice(-p,None)] + idx_back) self._data[idx_to] = self._data[idx_from] # Copy data from right to left idx_from = tuple(idx_front + [slice(-2*p,-p)] + idx_back) idx_to = tuple(idx_front + [slice(None, p)] + idx_back) self._data[idx_to] = self._data[idx_from] else: # Set left ghost region to zero idx_ghost = tuple(idx_front + [slice(None, p)] + idx_back) self._data[idx_ghost] = 0 # Set right ghost region to zero idx_ghost = tuple(idx_front + [slice(-p,None)] + idx_back) self._data[idx_ghost] = 0 # ...
[docs] def exchange_assembly_data(self): """ Exchange assembly data. """ if self.space.parallel and not self.space.cart.is_comm_null: # PARALLEL CASE: fill in ghost regions with data from neighbors self.space._synchronizer.start_exchange_assembly_data(self._data) self.space._synchronizer. end_exchange_assembly_data(self._data) else: # SERIAL CASE: fill in ghost regions along periodic directions, otherwise set to zero self._exchange_assembly_data_serial() ndim = self._space.ndim for direction in range(ndim): idx_front = [slice(None)] * direction idx_back = [slice(None)] * (ndim-direction-1) p = self._space.pads [direction] m = self._space.shifts[direction] if p == 0: continue idx_from = tuple(idx_front + [slice(-m*p,None) if (-m*p+p)!=0 else slice(-m*p,None)] + idx_back) self._data[idx_from] = 0. idx_from = tuple(idx_front + [slice(0,m*p)] + idx_back) self._data[idx_from] = 0.
# ... def _exchange_assembly_data_serial(self): ndim = self._space.ndim for direction in range(ndim): periodic = self._space.periods[direction] p = self._space.pads [direction] m = self._space.shifts [direction] if p == 0: continue if periodic: idx_front = [slice(None)] * direction idx_back = [slice(None)] * (ndim-direction-1) # Copy data from left to right idx_to = tuple(idx_front + [slice( m*p, m*p+p)] + idx_back) idx_from = tuple(idx_front + [slice(-m*p,-m*p+p) if (-m*p+p)!=0 else slice(-m*p,None)] + idx_back) self._data[idx_to] += self._data[idx_from] #-------------------------------------- # Private methods #-------------------------------------- def _getindex(self, key): # TODO: check if we should ignore padding elements if not isinstance(key, tuple): key = (key,) index = [] for (i,s,p,m) in zip(key, self.starts, self.pads,self.space.shifts): if isinstance(i, slice): start = None if i.start is None else i.start - s + m*p stop = None if i.stop is None else i.stop - s + m*p l = slice(start, stop, i.step) else: l = i - s + m*p index.append(l) return tuple(index)
#=============================================================================== class StencilMatrix(LinearOperator): """ Matrix in n-dimensional stencil format. This is a linear operator that maps elements of stencil vector space V to elements of stencil vector space W. For now we only accept V==W. Parameters ---------- V : psydac.linalg.stencil.StencilVectorSpace Domain of the new linear operator. W : psydac.linalg.stencil.StencilVectorSpace Codomain of the new linear operator. pads: backend: precompiled : bool Whether to use precompiled kernels for .dot() and .transpose() """ def __init__( self, V, W, pads=None , backend=None, precompiled=True): assert isinstance(V, StencilVectorSpace) assert isinstance(W, StencilVectorSpace) assert W.pads == V.pads if not W.dtype==V.dtype: raise NotImplementedError("The domain and the codomain should have the same data type.") if pads is not None: for p,vp in zip(pads, V.pads): assert p<=vp self._pads = pads or tuple(V.pads) dims = list(W.shape) diags = [compute_diag_len(p, md, mc) for p,md,mc in zip(self._pads, V.shifts, W.shifts)] self._data = np.zeros(dims+diags, dtype=W.dtype) self._domain = V self._codomain = W self._ndim = len(dims) self._backend = backend self._precompiled = precompiled self._is_T = False self._diag_indices = None self._requests = None # Parallel attributes if W.parallel: if W.cart.is_comm_null:return # Create data exchanger for ghost regions self._synchronizer = get_data_exchanger( cart = W.cart, dtype = W.dtype, coeff_shape = diags, assembly = True ) # Flag ghost regions as not up-to-date (conservative choice) self._sync = False # Prepare the arguments for the dot product method nd = [(ej-sj+2*gp*mj-mj*p-gp)//mj*mi+1 for sj,ej,mj,mi,p,gp in zip(V.starts, V.ends, V.shifts, W.shifts, self._pads, V.pads)] nc = [ei-si+1 for si,ei,mj,p in zip(W.starts, W.ends, V.shifts, self._pads)] # Number of rows in matrix (along each dimension) nrows = [min(ni, nj) for ni,nj in zip(nc, nd)] nrows_extra = [max(0, ni-nj) for ni,nj in zip(nc, nd)] args = {} args['starts'] = tuple(V.starts) args['nrows'] = tuple(nrows) args['nrows_extra'] = tuple(nrows_extra) args['gpads'] = tuple(V.pads) args['pads'] = tuple(self._pads) args['dm'] = tuple(V.shifts) args['cm'] = tuple(W.shifts) ndiags, _ = list(zip(*[compute_diag_len(p,mj,mi, return_padding=True) for p,mi,mj in zip(self._pads, W.shifts, V.shifts)])) args['pad_imp'] = [gp*m+gp+1-n-s%m+p-gp for gp,m,n,s,p in zip(V.pads, V.shifts, ndiags, V.starts, self._pads)] args['ndiags'] = ndiags self._dotargs_null = args self._dot = kernels['matvec'][self._ndim] self._transpose_args = self._prepare_transpose_args() self._transpose_func = kernels['transpose'][self._ndim] if backend is None: backend = PSYDAC_BACKENDS.get(os.environ.get('PSYDAC_BACKEND')) or PSYDAC_BACKENDS['python'] self.set_backend(backend, precompiled) #-------------------------------------- # Abstract interface #-------------------------------------- @property def domain(self): return self._domain # ... @property def codomain(self): return self._codomain # ... @property def dtype(self): return self._domain.dtype # ... def dot(self, v, out=None): """ Return the matrix/vector product between self and v. This function optimized this product. Parameters ---------- v : StencilVector Vector of the domain of self needed for the Matrix/Vector product. out : StencilVector Vector of the codomain of self. Returns ------- out : StencilVector Vector of the codomain of self, contain the result of the product. """ assert isinstance(v, StencilVector) assert v.space is self.domain if out is not None: assert isinstance( out, StencilVector ) assert out.space is self.codomain else: out = StencilVector( self.codomain ) # Necessary if vector space is distributed across processes if not v.ghost_regions_in_sync: v.update_ghost_regions() self._func(self._data, v._data, out._data, **self._args) # IMPORTANT: flag that ghost regions are not up-to-date out.ghost_regions_in_sync = False return out # ... def vdot( self, v, out=None): """ Return the matrix/vector product between the conjugate of self and v. This function optimized this product. Parameters ---------- v : StencilVector Vector of the domain of self needed for the Matrix/Vector product out : StencilVector Vector of the codomain of self Returns ------- out : StencilVector Vector of the codomain of self, contain the result of the product """ assert isinstance(v, StencilVector) assert v.space is self.domain if out is not None: assert isinstance(out, StencilVector) assert out.space is self.codomain else: out = StencilVector( self.codomain ) # Necessary if vector space is distributed across processes if not v.ghost_regions_in_sync: v.update_ghost_regions() # Instead of computing A_*x, this function computes (A*x_)_ self._func(self._data, np.conjugate(v._data), out._data, **self._args) np.conjugate(out._data, out=out._data) # IMPORTANT: flag that ghost regions are not up-to-date out.ghost_regions_in_sync = False return out # ... def transpose(self, conjugate=False, out=None): """" Return the transposed StencilMatrix, or the Hermitian Transpose if conjugate==True Parameters ---------- conjugate : Bool(optional) True to get the Hermitian adjoint. out : StencilMatrix(optional) Optional out for the transpose to avoid temporaries """ # For clarity rename self M = self # If necessary, update ghost regions of original matrix M if not M.ghost_regions_in_sync: M.update_ghost_regions() # Create new matrix where domain and codomain are swapped if out is not None : assert isinstance(out, StencilMatrix) assert out.codomain == M.domain assert out.domain == M.codomain else : out = StencilMatrix(M.codomain, M.domain, pads=self._pads, backend=self._backend, precompiled=self._precompiled) # Call low-level '_transpose' function (works on Numpy arrays directly) if conjugate: self._transpose_func(np.conjugate(M._data), out._data, **self._transpose_args) else: self._transpose_func(M._data, out._data, **self._transpose_args) return out # ... def toarray(self, **kwargs): """ Convert to Numpy 2D array. """ order = kwargs.pop('order', 'C') with_pads = kwargs.pop('with_pads', False) if self.codomain.parallel and with_pads: coo = self._tocoo_parallel_with_pads(order=order) else: coo = self._tocoo_no_pads(order=order) return coo.toarray() # ... def tosparse(self, **kwargs): """ Convert to any Scipy sparse matrix format. """ order = kwargs.pop('order', 'C') with_pads = kwargs.pop('with_pads', False) if self.codomain.parallel and with_pads: coo = self._tocoo_parallel_with_pads(order=order) else: coo = self._tocoo_no_pads(order=order) return coo #-------------------------------------- # Overridden properties/methods #-------------------------------------- def __neg__(self): return self.__mul__(-1) # ... def __mul__(self, a): w = StencilMatrix(self._domain, self._codomain, self._pads, self._backend, precompiled=self._precompiled) w._data = self._data * a w._func = self._func w._args = self._args w._sync = self._sync return w #... def __add__(self, m): if isinstance(m, StencilMatrix): #assert isinstance(m, StencilMatrix) assert m._domain is self._domain assert m._codomain is self._codomain assert m._pads == self._pads if m._backend is not self._backend: msg = 'Adding two matrices with different backends is ambiguous - defaulting to backend of first addend' warnings.warn(msg, category=RuntimeWarning) w = StencilMatrix(self._domain, self._codomain, self._pads, self._backend, precompiled=self._precompiled) w._data = self._data + m._data w._func = self._func w._args = self._args w._sync = self._sync and m._sync return w else: return LinearOperator.__add__(self, m) #... def __sub__(self, m): if isinstance(m, StencilMatrix): #assert isinstance(m, StencilMatrix) assert m._domain is self._domain assert m._codomain is self._codomain assert m._pads == self._pads if m._backend is not self._backend: msg = 'Subtracting two matrices with different backends is ambiguous - defaulting to backend of the matrix we subtract from' warnings.warn(msg, category=RuntimeWarning) w = StencilMatrix(self._domain, self._codomain, self._pads, backend=self._backend, precompiled=self._precompiled) w._data = self._data - m._data w._func = self._func w._args = self._args w._sync = self._sync and m._sync return w else: return LinearOperator.__sub__(self, m) #-------------------------------------- # New properties/methods #-------------------------------------- # TODO: check if this method is really needed!! def conjugate(self, out=None): if out is not None: assert isinstance(out, StencilMatrix) assert out.domain is self.domain assert out.codomain is self.codomain else: out = StencilMatrix(self.domain, self.codomain, pads=self.pads, backend=self._backend, precompiled=self._precompiled) out._func = self._func out._args = self._args np.conjugate(self._data, out=out._data, casting='no') return out # ... # TODO: check if this method is really needed!! def conj(self, out=None): return self.conjugate(out=out) # ... @property def pads(self): return self._pads # ... @property def backend(self): return self._backend # ... def __getitem__(self, key): index = self._getindex( key ) return self._data[index] # ... def __setitem__(self, key, value): index = self._getindex( key ) self._data[index] = value #... def max(self): return self._data.max() #... def copy(self, out = None): """ Create a copy of self, that can potentially be stored in a given StencilMatrix. Parameters ---------- out : StencilMatrix(optional) The existing StencilMatrix in which we want to copy self. """ if out is not None : assert isinstance(out, StencilMatrix) assert out.domain == self.domain assert out.codomain == self.codomain else : out = StencilMatrix( self.domain, self.codomain, self._pads, backend=self._backend, precompiled=self._precompiled ) out._data[:] = self._data[:] out._func = self._func out._args = self._args return out #... def __imul__(self, a): self._data *= a return self #... def __iadd__(self, m): if isinstance(m, StencilMatrix): #assert isinstance(m, StencilMatrix) assert m._domain is self._domain assert m._codomain is self._codomain assert m._pads == self._pads self._data += m._data self._sync = m._sync and self._sync return self else: return LinearOperator.__add__(self, m) #... def __isub__(self, m): if isinstance(m, StencilMatrix): #assert isinstance(m, StencilMatrix) assert m._domain is self._domain assert m._codomain is self._codomain assert m._pads == self._pads self._data -= m._data self._sync = m._sync and self._sync return self else: return LinearOperator.__sub__(self, m) #... def __abs__(self): w = StencilMatrix( self._domain, self._codomain, self._pads, backend=self._backend, precompiled=self._precompiled ) w._data = abs(self._data) w._func = self._func w._args = self._args w._sync = self._sync return w #... def remove_spurious_entries(self): """ If any dimension is NOT periodic, make sure that the corresponding periodic corners are set to zero. """ # TODO: access 'self._data' directly for increased efficiency ndim = self._domain.ndim for direction in range(ndim): periodic = self._domain.periods[direction] if not periodic: nc = self._codomain.npts[direction] nd = self._domain.npts[direction] s = self._codomain.starts[direction] e = self._codomain.ends [direction] p = self.pads [direction] idx_front = [slice(None)]*direction idx_back = [slice(None)]*(ndim-direction-1) # Top-right corner for i in range( max(0,s), min(p,e+1) ): index = tuple( idx_front + [i] + idx_back + idx_front + [slice(-p,-i)] + idx_back ) self[index] = 0 # Bottom-left corner for i in range( max(nd-p,s), min(nc,e+1) ): index = tuple( idx_front + [i] + idx_back + idx_front + [slice(nd-i,p+1)] + idx_back ) self[index] = 0 # ... def update_ghost_regions(self): """ Update ghost regions before performing non-local access to matrix elements (e.g. in matrix transposition). """ ndim = self._codomain.ndim parallel = self._codomain.parallel if parallel: if not self._codomain.cart.is_comm_null: # PARALLEL CASE: fill in ghost regions with data from neighbors self._synchronizer.start_update_ghost_regions( self._data, self._requests ) self._synchronizer.end_update_ghost_regions( self._data , self._requests) else: # SERIAL CASE: fill in ghost regions along periodic directions, otherwise set to zero self._update_ghost_regions_serial() # Flag ghost regions as up-to-date self._sync = True # ... def exchange_assembly_data(self): """ Exchange assembly data. """ ndim = self._codomain.ndim parallel = self._codomain.parallel if self._codomain.parallel: # PARALLEL CASE: fill in ghost regions with data from neighbors self._synchronizer.start_exchange_assembly_data( self._data ) self._synchronizer.end_exchange_assembly_data( self._data ) else: # SERIAL CASE: fill in ghost regions along periodic directions, otherwise set to zero self._exchange_assembly_data_serial() ndim = self._codomain.ndim for direction in range(ndim): idx_front = [slice(None)]*direction idx_back = [slice(None)]*(ndim-direction-1) p = self._codomain.pads [direction] m = self._codomain.shifts[direction] if p == 0: continue idx_from = tuple( idx_front + [ slice(-m*p,None) if (-m*p+p)!=0 else slice(-m*p,None)] + idx_back ) self._data[idx_from] = 0. idx_from = tuple( idx_front + [ slice(0,m*p)] + idx_back ) self._data[idx_from] = 0. # ... def _exchange_assembly_data_serial(self): ndim = self._codomain.ndim for direction in range(ndim): periodic = self._codomain.periods[direction] p = self._codomain.pads [direction] m = self._codomain.shifts[direction] if p == 0: continue if periodic: idx_front = [slice(None)]*direction idx_back = [slice(None)]*(ndim-direction-1) # Copy data from left to right idx_to = tuple( idx_front + [slice( m*p, m*p+p)] + idx_back ) idx_from = tuple( idx_front + [slice(-m*p,-m*p+p) if (-m*p+p)!=0 else slice(-m*p,None)] + idx_back ) self._data[idx_to] += self._data[idx_from] # ... def diagonal(self, *, inverse = False, sqrt = False, out = None): """ Get the coefficients on the main diagonal as a StencilDiagonalMatrix object. Parameters ---------- inverse : bool If True, get the inverse of the diagonal. (Default: False). Can be combined with sqrt to get the inverse square root. sqrt : bool If True, get the square root of the diagonal. (Default: False). Can be combined with inverse to get the inverse square root. out : StencilDiagonalMatrix If provided, write the diagonal entries into this matrix. (Default: None). Returns ------- StencilDiagonalMatrix The matrix which contains the main diagonal of self (or its inverse). """ # Check `inverse` argument assert isinstance(inverse, bool) # Determine domain and codomain of the StencilDiagonalMatrix V, W = self.domain, self.codomain if inverse: V, W = W, V # Check `out` argument if out is not None: assert isinstance(out, StencilDiagonalMatrix) assert out.domain is V assert out.codomain is W # Extract diagonal data from self and identify output array diagonal_indices = self._get_diagonal_indices() diag = self._data[diagonal_indices] data = out._data if out else None # Calculate entries of StencilDiagonalMatrix if inverse: data = np.divide(1, diag, out=data) elif out: np.copyto(data, diag) else: data = diag.copy() if sqrt: np.sqrt(data, out=data) # If needed create a new StencilDiagonalMatrix object if out is None: out = StencilDiagonalMatrix(V, W, data) return out # ... def topetsc(self): """ Convert to PETSc data structure. """ from psydac.linalg.topetsc import mat_topetsc mat = mat_topetsc(self) return mat #-------------------------------------- # Private methods #-------------------------------------- def _getindex(self, key): nd = self._ndim ii = key[:nd] kk = key[nd:] index = [] for i,s,p,m in zip( ii, self._codomain.starts, self._codomain.pads, self._codomain.shifts ): x = self._shift_index( i, m*p-s ) index.append( x ) for k,p in zip( kk, self._pads ): l = self._shift_index( k, p ) index.append( l ) return tuple(index) # ... @staticmethod def _shift_index(index, shift): if isinstance( index, slice ): start = None if index.start is None else index.start + shift stop = None if index.stop is None else index.stop + shift return slice(start, stop, index.step) else: return index + shift def tocoo_local(self, order='C'): # Shortcuts sc = self._codomain.starts ec = self._codomain.ends pc = self._codomain.pads sd = self._domain.starts ed = self._domain.ends pd = self._domain.pads nd = self._ndim nr = [e-s+1 +2*p for s,e,p in zip(sc, ec, pc)] nc = [e-s+1 +2*p for s,e,p in zip(sd, ed, pd)] ravel_multi_index = np.ravel_multi_index # COO storage rows = [] cols = [] data = [] local = tuple( [slice(p,-p) for p in pc] + [slice(None)] * nd ) dd = [pdi-ppi for pdi,ppi in zip(pd, self._pads)] for (index, value) in np.ndenumerate( self._data[local] ): # index = [i1-s1, i2-s2, ..., p1+j1-i1, p2+j2-i2, ...] xx = index[:nd] # ii is local ll = index[nd:] # l=p+k ii = [x+p for x,p in zip(xx, pc)] jj = [(l+i+d)%n for (i,l,d,n) in zip(xx,ll,dd,nc)] I = ravel_multi_index( ii, dims=nr, order=order ) J = ravel_multi_index( jj, dims=nc, order=order ) rows.append( I ) cols.append( J ) data.append( value ) M = coo_matrix( (data,(rows,cols)), shape = [np.prod(nr),np.prod(nc)], dtype = self._domain.dtype ) M.eliminate_zeros() return M #... def _tocoo_no_pads(self , order='C'): # Shortcuts nr = self._codomain.npts nd = self._ndim nc = self._domain.npts ss = self._codomain.starts cpads = self._codomain.pads dm = self._domain.shifts cm = self._codomain.shifts pp = [np.int64(compute_diag_len(p,mj,mi)-(p+1)) for p,mi,mj in zip(self._pads, cm, dm)] # Range of data owned by local process (no ghost regions) local = tuple( [slice(mi*p,-mi*p) if p != 0 else slice(p, None) for p,mi in zip(cpads, cm)] + [slice(None)] * nd ) size = self._data[local].size # COO storage rows = np.zeros(size, dtype='int64') cols = np.zeros(size, dtype='int64') data = np.zeros(size, dtype=self.dtype) nrl = [np.int64(e-s+1) for s,e in zip(self.codomain.starts, self.codomain.ends)] ncl = [np.int64(i) for i in self._data.shape[nd:]] ss = [np.int64(i) for i in ss] nr = [np.int64(i) for i in nr] nc = [np.int64(i) for i in nc] dm = [np.int64(i) for i in dm] cm = [np.int64(i) for i in cm] cpads = [np.int64(i) for i in cpads] pp = [np.int64(i) for i in pp] stencil2coo = kernels['stencil2coo'][order][nd] ind = stencil2coo(self._data, data, rows, cols, *nrl, *ncl, *ss, *nr, *nc, *dm, *cm, *cpads, *pp) M = coo_matrix( (data[:ind],(rows[:ind],cols[:ind])), shape = [np.prod(nr),np.prod(nc)], dtype = self.dtype) return M #... def _tocoo_parallel_with_pads(self , order='C'): # If necessary, update ghost regions if not self.ghost_regions_in_sync: self.update_ghost_regions() # Shortcuts nr = self._codomain.npts nc = self._domain.npts nd = self._ndim ss = self._codomain.starts ee = self._codomain.ends pp = self._pads pc = self._codomain.pads pd = self._domain.pads cc = self._codomain.periods ravel_multi_index = np.ravel_multi_index # COO storage rows = [] cols = [] data = [] # List of rows (to avoid duplicate updates) I_list = [] # Shape of row and diagonal spaces xx_dims = self._data.shape[:nd] ll_dims = self._data.shape[nd:] # Cycle over rows (x = p + i - s) for xx in np.ndindex( *xx_dims ): # Compute row multi-index with simple shift ii = [s + x - p for (s, x, p) in zip(ss, xx, pc)] # Apply periodicity where appropriate ii = [i - n if (c and i >= n and i - n < s) else i + n if (c and i < 0 and i + n > e) else i for (i, s, e, n, c) in zip(ii, ss, ee, nr, cc)] # Compute row flat index # Exclude values outside global limits of matrix try: I = ravel_multi_index( ii, dims=nr, order=order ) except ValueError: continue # If I is a new row, append it to list of rows # DO NOT update same row twice! if I not in I_list: I_list.append( I ) else: continue # Cycle over diagonals (l = p + k) for ll in np.ndindex( *ll_dims ): # Compute column multi-index (k = j - i) jj = [(i+l-p) % n for (i,l,n,p) in zip(ii,ll,nc,pp)] # Compute column flat index J = ravel_multi_index( jj, dims=nc, order=order ) # Extract matrix value value = self._data[(*xx, *ll)] # Append information to COO arrays rows.append( I ) cols.append( J ) data.append( value ) # Create Scipy COO matrix M = coo_matrix( (data,(rows,cols)), shape = [np.prod(nr), np.prod(nc)], dtype = self._domain.dtype ) M.eliminate_zeros() return M # ... @property def ghost_regions_in_sync(self): return self._sync # ... # NOTE: this property must be set collectively @ghost_regions_in_sync.setter def ghost_regions_in_sync(self, value): assert isinstance(value, bool) self._sync = value # ... def _update_ghost_regions_serial(self): ndim = self._codomain.ndim for direction in range(self._codomain.ndim): periodic = self._codomain.periods[direction] p = self._codomain.pads [direction] if p == 0: continue idx_front = [slice(None)]*direction idx_back = [slice(None)]*(ndim-direction-1 + ndim) if periodic: # Copy data from left to right idx_from = tuple(idx_front + [slice( p, 2*p)] + idx_back) idx_to = tuple(idx_front + [slice(-p,None)] + idx_back) self._data[idx_to] = self._data[idx_from] # Copy data from right to left idx_from = tuple(idx_front + [slice(-2*p,-p)] + idx_back) idx_to = tuple(idx_front + [slice(None, p)] + idx_back) self._data[idx_to] = self._data[idx_from] else: # Set left ghost region to zero idx_ghost = tuple(idx_front + [slice(None, p)] + idx_back) self._data[idx_ghost] = 0 # Set right ghost region to zero idx_ghost = tuple(idx_front + [slice(-p,None)] + idx_back) self._data[idx_ghost] = 0 # ... def _prepare_transpose_args(self): #prepare the arguments for the transpose method V = self.domain W = self.codomain ssc = W.starts eec = W.ends ssd = V.starts eed = V.ends pads = self._pads gpads = V.pads dm = V.shifts cm = W.shifts # Number of rows in the transposed matrix (along each dimension) nrows = [e-s+1 for s, e in zip(ssd, eed)] ncols = [e-s+2*m*p+1 for s, e, m, p in zip(ssc, eec, cm, gpads)] pp = pads ndiags, starts = list(zip(*[compute_diag_len(p, mi, mj, return_padding=True) for p, mi, mj in zip(pp, cm, dm)])) ndiagsT, _ = list(zip(*[compute_diag_len(p, mj, mi, return_padding=True) for p, mi, mj in zip(pp, cm, dm)])) diff = [gp-p for gp, p in zip(gpads, pp)] sl = [(s if mi > mj else 0) + (s % mi + mi//mj if mi < mj else 0)+(s if mi == mj else 0)\ for s, p, mi, mj in zip(starts, pp, cm, dm)] si = [(mi * p - mi * (int(np.ceil((p + 1)/mj)) - 1) if mi > mj else 0) + \ (mi * p - mi * (p//mi) + d * (mi - 1) if mi < mj else 0) + \ (mj * p - mj * (p//mi) + d * (mi - 1) if mi == mj else 0)\ for mi, mj, p, d in zip(cm, dm, pp, diff)] sk = [n-1\ + (-(p % mj) if mi > mj else 0)\ + (-p + mj * (p//mi) if mi < mj else 0)\ + (-p + mj * (p//mi) if mi == mj else 0)\ for mi, mj, n, p in zip(cm, dm, ndiagsT, pp)] args={} args['n'] = np.int64(nrows) args['nc'] = np.int64(ncols) args['gp'] = np.int64(gpads) args['p'] = np.int64(pp) args['dm'] = np.int64(dm) args['cm'] = np.int64(cm) args['nd'] = np.int64(ndiags) args['ndT'] = np.int64(ndiagsT) args['si'] = np.int64(si) args['sk'] = np.int64(sk) args['sl'] = np.int64(sl) return args # ... def set_backend(self, backend, precompiled): ''' Define which kernels are called when using .dot() and .transpose() Parameters ---------- backend : str Psydac backend option. precompiled : bool Whether to use precompiled kernels. ''' self._backend = backend self._args = self._dotargs_null.copy() if self._backend is None: for key, arg in self._args.items(): self._args[key] = np.int64(arg) self._func = self._dot self._args.pop('pads') elif precompiled: # print('Using precompiled matvec and transpose kernels ...') from psydac.linalg import stencil_dot_kernels from psydac.linalg import stencil_transpose_kernels # matvec kernel dot_func_name = 'matvec_' + str(self._ndim) + 'd_kernel' self._func = getattr(stencil_dot_kernels, dot_func_name) # parameter for rectangular matrices add = [int(end_in >= end_out) for end_in, end_out in zip(self.domain.ends, self.codomain.ends)] self._args = {} if self._ndim == 1: self._args['s_in'] = int(self.domain.starts[0]) self._args['p_in'] = int(self.domain.pads[0]) self._args['add'] = int(add[0]) self._args['s_out'] = int(self.codomain.starts[0]) self._args['e_out'] = int(self.codomain.ends[0]) self._args['p_out'] = int(self.codomain.pads[0]) else: self._args['s_in'] = np.array(self.domain.starts) self._args['p_in'] = np.array(self.domain.pads) self._args['add'] = np.array(add) self._args['s_out'] = np.array(self.codomain.starts) self._args['e_out'] = np.array(self.codomain.ends) self._args['p_out'] = np.array(self.codomain.pads) # transpose kernel transp_func_name = 'transpose_' + str(self._ndim) + 'd_kernel' self._transpose_func = getattr(stencil_transpose_kernels, transp_func_name) # parameter for rectangular matrices add = [int(end_out >= end_in) for end_in, end_out in zip(self.domain.ends, self.codomain.ends)] self._transpose_args = {} if self._ndim == 1: self._transpose_args['s_in'] = int(self.codomain.starts[0]) self._transpose_args['p_in'] = int(self.codomain.pads[0]) self._transpose_args['add'] = int(add[0]) self._transpose_args['s_out'] = int(self.domain.starts[0]) self._transpose_args['e_out'] = int(self.domain.ends[0]) self._transpose_args['p_out'] = int(self.domain.pads[0]) else: self._transpose_args['s_in'] = np.array(self.codomain.starts) self._transpose_args['p_in'] = np.array(self.codomain.pads) self._transpose_args['add'] = np.array(add) self._transpose_args['s_out'] = np.array(self.domain.starts) self._transpose_args['e_out'] = np.array(self.domain.ends) self._transpose_args['p_out'] = np.array(self.domain.pads) else: raise AttributeError(f'This is the tiny-psydac version - must use precompiled kernels (but {precompiled = })!') from psydac.api.ast.linalg import LinearOperatorDot if self.domain.parallel: comm = self.codomain.cart.comm if self.domain == self.codomain: # In this case nrows_extra[i] == 0 for all i dot = LinearOperatorDot(self._ndim, block_shape = (1,1), keys = ((0,0),), comm = comm, backend=frozenset(backend.items()), nrows_extra = (self._args['nrows_extra'],), gpads=(self._args['gpads'],), pads=(self._args['pads'],), dm = (self._args['dm'],), cm = (self._args['cm'],), dtype=self.dtype) starts = self._args.pop('starts') nrows = self._args.pop('nrows') self._args.pop('nrows_extra') self._args.pop('gpads') self._args.pop('pads') self._args.pop('dm') self._args.pop('cm') for i in range(len(nrows)): self._args['s00_{i}'.format(i=i+1)] = np.int64(starts[i]) for i in range(len(nrows)): self._args['n00_{i}'.format(i=i+1)] = np.int64(nrows[i]) else: dot = LinearOperatorDot(self._ndim, block_shape = (1,1), keys = ((0,0),), comm = comm, backend=frozenset(backend.items()), gpads=(self._args['gpads'],), pads=(self._args['pads'],), dm = (self._args['dm'],), cm = (self._args['cm'],), dtype=self.dtype) starts = self._args.pop('starts') nrows = self._args.pop('nrows') nrows_extra = self._args.pop('nrows_extra') self._args.pop('gpads') self._args.pop('pads') self._args.pop('dm') self._args.pop('cm') for i in range(len(nrows)): self._args['s00_{i}'.format(i=i+1)] = np.int64(starts[i]) for i in range(len(nrows)): self._args['n00_{i}'.format(i=i+1)] = np.int64(nrows[i]) for i in range(len(nrows)): self._args['ne00_{i}'.format(i=i+1)] = np.int64(nrows_extra[i]) else: dot = LinearOperatorDot(self._ndim, block_shape = (1,1), keys = ((0,0),), comm = None, backend=frozenset(backend.items()), starts = (tuple(self._args['starts']),), nrows=(tuple(self._args['nrows']),), nrows_extra=(self._args['nrows_extra'],), gpads=(self._args['gpads'],), pads=(self._args['pads'],), dm = (self._args['dm'],), cm = (self._args['cm'],), dtype=self.dtype) self._args.pop('nrows') self._args.pop('nrows_extra') self._args.pop('gpads') self._args.pop('pads') self._args.pop('starts') self._args.pop('dm') self._args.pop('cm') self._args.pop('pad_imp') self._args.pop('ndiags') self._func = dot.func # ... def _get_diagonal_indices(self): """ Compute the indices which should be applied to self._data in order to get the matrix entries on the main diagonal. The result is also stored in self._diag_indices, and retrieved from there on successive calls. Returns ------- tuple[numpy.ndarray, ndim] The diagonal indices as a tuple of NumPy arrays of identical shape (n1, n2, n3, ...). """ if self._diag_indices is None: dp = self.domain.pads dm = self.domain.shifts cm = self.codomain.shifts ss = self.codomain.starts pp = [compute_diag_len(p, mj, mi) - p - 1 for p, mi, mj in zip(self._pads, cm, dm)] nrows = [e - s + 1 for s, e in zip(self.codomain.starts, self.codomain.ends)] ndim = self.domain.ndim indices = [np.zeros(np.prod(nrows), dtype=int) for _ in range(2 * ndim)] for l, xx in enumerate(np.ndindex(*nrows)): ii = [m * p + x for m, p, x in zip(dm, dp, xx)] jj = [p + x + s - ((x+s) // mi) * mj for x, mi, mj, p, s in zip(xx, cm, dm, pp, ss)] for k in range(ndim): indices[k][l] = ii[k] indices[k + ndim][l] = jj[k] self._diag_indices = tuple(idx.reshape(nrows) for idx in indices) return self._diag_indices #=============================================================================== class StencilDiagonalMatrix(LinearOperator): """ Linear operator which operates between stencil vector spaces, and which can be represented by a matrix with non-zero entries only on its main diagonal. As such this operator is completely local and requires no data communication. We assume that the vectors in the domain and the codomain have the same shape and are distributed in the same way. Parameters ---------- V : psydac.linalg.stencil.StencilVectorSpace Domain of the new linear operator. W : psydac.linalg.stencil.StencilVectorSpace Codomain of the new linear operator. """ def __init__(self, V, W, data): # Check domain and codomain assert isinstance(V, StencilVectorSpace) assert isinstance(W, StencilVectorSpace) assert V.starts == W.starts assert V.ends == W.ends data = np.asarray(data) # Check shape of provided data shape = tuple(e - s + 1 for s, e in zip(V.starts, V.ends)) assert data.shape == shape # Store info in object self._domain = V self._codomain = W self._data = data #-------------------------------------- # Abstract interface #-------------------------------------- @property def domain(self): return self._domain @property def codomain(self): return self._codomain @property def dtype(self): return self._data.dtype def tosparse(self): return sp_diags(self._data.ravel()) def toarray(self): return self._data.copy() def dot(self, v, out=None): assert isinstance(v, StencilVector) assert v.space is self.domain if out is not None: assert isinstance(out, StencilVector) assert out.space is self.codomain else: out = self.codomain.zeros() V = self.domain i = tuple(slice(s, e + 1) for s, e in zip(V.starts, V.ends)) np.multiply(self._data, v[i], out=out[i]) out.ghost_regions_in_sync = False return out # ... # TODO [YG 22.01.2024]: idot function will require a dedicated kernel # ... def transpose(self, *, conjugate=False, out=None): assert isinstance(conjugate, bool) if out is not None: assert isinstance(out, StencilDiagonalMatrix) assert out.domain is self.codomain assert out.codomain is self.domain if not (conjugate and self.dtype is complex): if out is None: data = self._data.copy() else: np.copyto(out._data, self._data, casting='no') else: if out is None: data = np.conjugate(self._data, casting='no') else: np.conjugate(self._data, out=out._data, casting='no') if out is None: out = StencilDiagonalMatrix(self.codomain, self.domain, data) return out #-------------------------------------- # Other properties/methods #-------------------------------------- def copy(self, *, out=None): if out is self: return self if out is None: data = self._data.copy() out = StencilDiagonalMatrix(self.domain, self.codomain, data) else: assert isinstance(out, StencilDiagonalMatrix) assert out.domain is self.domain assert out.codomain is self.codomain np.copyto(out._data, self._data, casting='no') return out def diagonal(self, *, inverse = False, out = None): """ Get the coefficients on the main diagonal as a StencilDiagonalMatrix object. In the default case (inverse=False, out=None) self is returned. Parameters ---------- inverse : bool If True, get the inverse of the diagonal. (Default: False). out : StencilDiagonalMatrix If provided, write the diagonal entries into this matrix. (Default: None). Returns ------- StencilDiagonalMatrix Either self, or another StencilDiagonalMatrix with the diagonal inverse. """ # Check `inverse` argument assert isinstance(inverse, bool) # Determine domain and codomain of the `out` matrix V, W = self.domain, self.codomain if inverse: V, W = W, V # Check `out` argument and identify `data` array of output vector if out is None: data = None else: assert isinstance(out, StencilDiagonalMatrix) assert out.domain is V assert out.codomain is W data = out._data # Calculate entries, or set `out=self` in default case if inverse: data = np.divide(1, diag, out=data) elif out: np.copyto(data, diag) else: out = self # If needed create a new StencilDiagonalMatrix object if out is None: out = StencilDiagonalMatrix(V, W, data) return out #=============================================================================== # TODO [YG, 28.01.2021]: # - Check if StencilMatrix should be subclassed # - Reimplement magic methods (some are simply copied from StencilMatrix) def flip_axis(index, n): s = n - index.start-1 e = n - index.stop-1 if n > index.stop else None return slice(s,e,-1) class StencilInterfaceMatrix(LinearOperator): """ Matrix in n-dimensional stencil format for an interface. This is a linear operator that maps elements of stencil vector space V to elements of stencil vector space W. Parameters ---------- V : psydac.linalg.stencil.StencilVectorSpace Domain of the new linear operator. W : psydac.linalg.stencil.StencilVectorSpace Codomain of the new linear operator. s_d : int The starting index of the domain. s_c : int The starting index of the codomain. d_axis : int The axis of the Interface of the domain. c_axis : int The axis of the Interface of the codomain. d_ext : int The extremity of the domain Interface space. the values must be 1 or -1. c_ext : int The extremity of the codomain Interface space. the values must be 1 or -1. dim : int The axis of the interface. pads: <list|tuple> Padding of the linear operator. """ def __init__(self, V, W, s_d, s_c, d_axis, c_axis, d_ext, c_ext, *, flip=None, pads=None, backend=None): assert isinstance(V, StencilVectorSpace) assert isinstance(W, StencilVectorSpace) assert W.pads == V.pads Vin = V.interfaces[d_axis, d_ext] if pads is not None: for p,vp in zip(pads, Vin.pads): assert p<=vp self._pads = pads or tuple(Vin.pads) dims = list(W.shape) if W.parent_ends[c_axis] is not None: diff = min(1, W.parent_ends[c_axis]-W.ends[c_axis]) else: diff = 0 dims[c_axis] = W.pads[c_axis] + 1-diff + 2*W.shifts[c_axis]*W.pads[c_axis] diags = [compute_diag_len(p, md, mc) for p,md,mc in zip(self._pads, Vin.shifts, W.shifts)] self._data = np.zeros(dims + diags, dtype=W.dtype) # Parallel attributes if W.parallel and not isinstance(W.cart, InterfaceCartDecomposition): if W.cart.is_comm_null:return # Create data exchanger for ghost regions self._synchronizer = get_data_exchanger( cart = W.cart, dtype = W.dtype, coeff_shape = diags, assembly = True, axis = c_axis, shape = self._data.shape ) self._flip = tuple([1]*len(dims) if flip is None else flip) self._permutation = list(range(len(dims))) self._permutation[d_axis], self._permutation[c_axis] = self._permutation[c_axis], self._permutation[d_axis] self._domain = V self._codomain = W self._domain_axis = d_axis self._codomain_axis = c_axis self._domain_ext = d_ext self._codomain_ext = c_ext self._domain_start = s_d self._codomain_start = s_c self._ndim = len(dims) self._backend = None # Prepare the arguments for the dot product method nd = [(ej-sj+2*gp*mj-mj*p-gp)//mj*mi+1 for sj,ej,mj,mi,p,gp in zip(Vin.starts, Vin.ends, Vin.shifts, W.shifts, self._pads, Vin.pads)] nc = [ei-si+1 for si,ei,mj,p in zip(W.starts, W.ends, Vin.shifts, self._pads)] # Number of rows in matrix (along each dimension) nrows = [min(ni,nj) for ni,nj in zip(nc, nd)] nrows_extra = [max(0,ni-nj) for ni,nj in zip(nc, nd)] nrows_extra[c_axis] = max(W.npts[c_axis]-Vin.npts[c_axis], 0) nrows[c_axis] = W.pads[c_axis] + 1-diff-nrows_extra[c_axis] args = {} args['starts'] = tuple(Vin.starts) args['nrows'] = tuple(nrows) args['nrows_extra'] = tuple(nrows_extra) args['gpads'] = tuple(Vin.pads) args['pads'] = tuple(self._pads) args['dm'] = tuple(Vin.shifts) args['cm'] = tuple(W.shifts) args['c_axis'] = c_axis args['d_start'] = self._domain_start args['c_start'] = self._codomain_start args['flip'] = self._flip args['permutation'] = self._permutation self._dotargs_null = args self._args = args.copy() self._func = self._dot self._transpose_args = self._prepare_transpose_args() self._transpose_func = kernels['interface_transpose'][self._ndim] if backend is None: backend = PSYDAC_BACKENDS.get(os.environ.get('PSYDAC_BACKEND')) if backend: self.set_backend(backend) # Flag ghost regions as not up-to-date (conservative choice) self._sync = False #-------------------------------------- # Abstract interface #-------------------------------------- @property def domain(self): return self._domain # ... @property def codomain(self): return self._codomain # ... @property def dtype(self): return self.domain.dtype # ... def dot(self, v, out=None): assert isinstance(v, StencilVector) assert v.space is self.domain # Necessary if vector space is distributed across processes if out is not None: assert isinstance(out, StencilVector) assert out.space is self.codomain out[(slice(None,None),)*v.space.ndim] = 0. else: out = StencilVector( self.codomain ) # Necessary if vector space is distributed across processes if not v.ghost_regions_in_sync and not v.space.parallel: v.update_ghost_regions() self._func(self._data, v._interface_data[self._domain_axis, self._domain_ext], out._data, **self._args) # IMPORTANT: flag that ghost regions are not up-to-date out.ghost_regions_in_sync = False return out # ... @staticmethod def _dot(mat, v, out, starts, nrows, nrows_extra, gpads, pads, dm, cm, c_axis, d_start, c_start, flip, permutation): # Index for k=i-j nrows = list(nrows) ndim = len(v.shape) kk = [slice(None)]*ndim diff = [xp-p for xp,p in zip(gpads, pads)] ndiags, _ = list(zip(*[compute_diag_len(p,mj,mi, return_padding=True) for p,mi,mj in zip(pads,cm,dm)])) bb = [p*m+p+1-n-s%m for p,m,n,s in zip(gpads, dm, ndiags, starts)] nn = v.shape for xx in np.ndindex( *nrows ): ii = [ mi*pi + x for mi,pi,x in zip(cm, gpads, xx) ] jj = tuple( slice(b-d+(x+s%mj)//mi*mj,b-d+(x+s%mj)//mi*mj+n) for x,mi,mj,b,s,n,d in zip(xx,cm,dm,bb,starts,ndiags,diff) ) jj = [flip_axis(i,n) if f==-1 else i for i,f,n in zip(jj,flip,nn)] jj = tuple(jj[i] for i in permutation) ii_kk = tuple( ii + kk ) ii[c_axis] += c_start out[tuple(ii)] = np.dot( mat[ii_kk].flat, v[jj].flat ) new_nrows = nrows.copy() for d,er in enumerate(nrows_extra): rows = new_nrows.copy() del rows[d] for n in range(er): for xx in np.ndindex(*rows): xx = list(xx) xx.insert(d, nrows[d]+n) ii = [mi*pi + x for mi,pi,x in zip(cm, gpads, xx)] ee = [max(x-l+1,0) for x,l in zip(xx, nrows)] jj = tuple( slice(b-d+(x+s%mj)//mi*mj, b-d+(x+s%mj)//mi*mj+n-e) for x,mi,mj,d,e,b,s,n in zip(xx, cm, dm, diff, ee, bb, starts, ndiags) ) jj = [flip_axis(i,n) if f==-1 else i for i,f,n in zip(jj, flip, nn)] jj = tuple(jj[i] for i in permutation) kk = [slice(None,n-e) for n,e in zip(ndiags, ee)] ii_kk = tuple( ii + kk ) ii[c_axis] += c_start out[tuple(ii)] = np.dot( mat[ii_kk].flat, v[jj].flat ) new_nrows[d] += er # ... def transpose( self, conjugate=False, out=None): """ Create new StencilInterfaceMatrix Mt, where domain and codomain are swapped with respect to original matrix M, and Mt_{ij} = M_{ji}. """ # For clarity rename self M = self if out is None: # Create new matrix where domain and codomain are swapped out = StencilInterfaceMatrix(M.codomain, M.domain, M.codomain_start, M.domain_start, M.codomain_axis, M.domain_axis, M.codomain_ext, M.domain_ext, flip=M.flip, pads=M.pads, backend=M.backend) # Call low-level '_transpose' function (works on Numpy arrays directly) if conjugate: M._transpose_func(np.conjugate(M._data), out._data, **M._transpose_args) else: M._transpose_func(M._data, out._data, **M._transpose_args) return out def _prepare_transpose_args(self): #prepare the arguments for the transpose method V = self.domain W = self.codomain ssc = W.starts eec = W.ends ssd = V.interfaces[self._domain_axis, self._domain_ext].starts eed = V.interfaces[self._domain_axis, self._domain_ext].ends pads = self._pads gpads = V.pads dm = V.shifts cm = W.shifts dim = self._codomain_axis # Number of rows in the transposed matrix (along each dimension) nrows = [e-s+1 for s,e in zip(ssd, eed)] ncols = [e-s+1+2*m*p for s, e, m, p in zip(ssc, eec, cm, gpads)] pp = pads ndiags, starts = list(zip(*[compute_diag_len(p,mi,mj, return_padding=True) for p, mi, mj in zip(pp, cm, dm)])) ndiagsT, _ = list(zip(*[compute_diag_len(p,mj,mi, return_padding=True) for p, mi, mj in zip(pp, cm, dm)])) diff = [gp-p for gp, p in zip(gpads, pp)] sl = [(s if mi > mj else 0) + (s % mi + mi//mj if mi < mj else 0)+(s if mi == mj else 0)\ for s, p, mi, mj in zip(starts, pp, cm, dm)] si = [(mi * p - mi * (int(np.ceil((p + 1)/mj)) - 1) if mi > mj else 0) + \ (mi * p - mi * (p//mi) + d * (mi - 1) if mi < mj else 0) + \ (mj * p - mj * (p//mi) + d * (mi - 1) if mi == mj else 0)\ for mi, mj, p, d in zip(cm, dm, pp, diff)] sk = [n - 1\ + (-(p % mj) if mi > mj else 0)\ + (-p + mj * (p//mi) if mi < mj else 0)\ + (-p + mj * (p//mi) if mi == mj else 0)\ for mi, mj, n, p in zip(cm, dm, ndiagsT, pp)] if V.parent_ends[dim] is not None: diff_r = min(1, V.parent_ends[dim] - V.ends[dim]) else: diff_r = 0 if W.parent_ends[dim] is not None: diff_c = min(1, W.parent_ends[dim] - W.ends[dim]) else: diff_c = 0 nrows[dim] = pads[dim] + 1 - diff_r ncols[dim] = pads[dim] + 1 - diff_c + 2*cm[dim]*pads[dim] args = {} args['n'] = np.int64(nrows) args['nc'] = np.int64(ncols) args['gp'] = np.int64(gpads) args['p'] = np.int64(pp) args['dm'] = np.int64(dm) args['cm'] = np.int64(cm) args['nd'] = np.int64(ndiags) args['ndT'] = np.int64(ndiagsT) args['si'] = np.int64(si) args['sk'] = np.int64(sk) args['sl'] = np.int64(sl) return args # ... def toarray(self, **kwargs): order = kwargs.pop('order', 'C') with_pads = kwargs.pop('with_pads', False) if self.codomain.parallel and with_pads: coo = self._tocoo_parallel_with_pads() else: coo = self._tocoo_no_pads() return coo.toarray() # ... def tosparse(self, **kwargs): order = kwargs.pop('order', 'C') with_pads = kwargs.pop('with_pads', False) if self.codomain.parallel and with_pads: coo = self._tocoo_parallel_with_pads() else: coo = self._tocoo_no_pads() return coo #... def copy(self): M = StencilInterfaceMatrix( self._domain, self._codomain, self._domain_start, self._codomain_start, self._domain_axis, self._codomain_axis, self._domain_ext, self._codomain_ext, flip=self._flip, pads=self._pads, backend=self._backend ) M._data[:] = self._data[:] return M # ... def __neg__(self): return self.__mul__(-1) #... def __mul__(self, a): w = self.copy() w._data *= a w._sync = self._sync return w #... def __add__(self, m): raise NotImplementedError('TODO: StencilInterfaceMatrix.__add__') #... def __sub__(self, m): raise NotImplementedError('TODO: StencilInterfaceMatrix.__sub__') #... def __imul__(self, a): self._data *= a #... def __iadd__(self, m): raise NotImplementedError('TODO: StencilInterfaceMatrix.__iadd__') #... def __isub__(self, m): raise NotImplementedError('TODO: StencilInterfaceMatrix.__isub__') #-------------------------------------- # Other properties/methods #-------------------------------------- # ... @property def domain_axis(self): return self._domain_axis # ... @property def codomain_axis(self): return self._codomain_axis # ... @property def domain_ext(self): return self._domain_ext # ... @property def codomain_ext(self): return self._codomain_ext # ... @property def domain_start(self): return self._domain_start # ... @property def codomain_start(self): return self._codomain_start # ... @property def dim(self): return self._ndim # ... @property def flip(self): return self._flip # ... @property def permutation(self): return self._permutation # ... @property def pads(self): return self._pads # ... def __getitem__(self, key): index = self._getindex( key ) return self._data[index] # ... def __setitem__(self, key, value): index = self._getindex( key ) self._data[index] = value #... def max(self): return self._data.max() # ... @property def backend(self): return self._backend #-------------------------------------- # Private methods #-------------------------------------- def _getindex(self, key): nd = self._ndim ii = key[:nd] kk = key[nd:] index = [] for i,s,p in zip(ii, self._codomain.starts, self._codomain.pads): x = self._shift_index(i, p-s) index.append(x) for k,p in zip(kk, self._pads): l = self._shift_index(k, p) index.append(l) return tuple(index) # ... @staticmethod def _shift_index(index, shift): if isinstance(index, slice): start = None if index.start is None else index.start + shift stop = None if index.stop is None else index.stop + shift return slice(start, stop, index.step) else: return index + shift #... def _tocoo_no_pads(self): # Shortcuts nr = self.codomain.npts nc = self.domain.npts ss = self.codomain.starts pp = self.codomain.pads nd = len(pp) dim = self._codomain_axis flip = self.flip permutation = self.permutation c_start = self.codomain_start d_start = self.domain_start dm = self.domain.shifts cm = self.codomain.shifts ravel_multi_index = np.ravel_multi_index # COO storage rows = [] cols = [] data = [] # Range of data owned by local process (no ghost regions) local = tuple( [slice(m*p,-m*p) if p != 0 else slice(0, None) for m,p in zip(cm, pp)] + [slice(None)] * nd ) pp = [compute_diag_len(p,mj,mi)-(p+1) for p,mi,mj in zip(self._pads, cm, dm)] for (index,value) in np.ndenumerate( self._data[local] ): if value: # index = [i1, i2, ..., p1+j1-i1, p2+j2-i2, ...] xx = index[:nd] # x=i-s ll = index[nd:] # l=p+k ii = [s+x for s,x in zip(ss,xx)] di = [i//m for i,m in zip(ii,cm)] jj = [(i*m+l-p)%n for (i,m,l,n,p) in zip(di,dm,ll,nc,pp)] ii[dim] += c_start jj[dim] += d_start jj = [n-j-1 if f==-1 else j for j,f,n in zip(jj,flip,nc)] jj = [jj[i] for i in permutation] I = ravel_multi_index(ii, dims=nr, order='C') J = ravel_multi_index(jj, dims=nc, order='C') rows.append(I) cols.append(J) data.append(value) M = coo_matrix( (data,(rows,cols)), shape = [np.prod(nr),np.prod(nc)], dtype = self.domain.dtype) return M # ... @property def ghost_regions_in_sync(self): return self._sync # ... # NOTE: this property must be set collectively @ghost_regions_in_sync.setter def ghost_regions_in_sync(self, value): assert isinstance(value, bool) self._sync = value # ... def _update_ghost_regions_serial(self, direction: int): if direction is None: for d in range(self._codomain.ndim): self._update_ghost_regions_serial(d) return ndim = self._codomain.ndim periodic = self._codomain.periods[direction] p = self._codomain.pads [direction] if p == 0: return idx_front = [slice(None)] * direction idx_back = [slice(None)] * (ndim-direction-1) if periodic: # Copy data from left to right idx_from = tuple(idx_front + [slice( p, 2*p)] + idx_back) idx_to = tuple(idx_front + [slice(-p,None)] + idx_back) self._data[idx_to] = self._data[idx_from] # Copy data from right to left idx_from = tuple(idx_front + [slice(-2*p,-p)] + idx_back) idx_to = tuple(idx_front + [slice(None, p)] + idx_back) self._data[idx_to] = self._data[idx_from] else: # Set left ghost region to zero idx_ghost = tuple(idx_front + [slice(None, p)] + idx_back) self._data[idx_ghost] = 0 # Set right ghost region to zero idx_ghost = tuple(idx_front + [slice(-p,None)] + idx_back) self._data[idx_ghost] = 0 # ... def exchange_assembly_data(self): """ Exchange assembly data. """ ndim = self._codomain.ndim parallel = self._codomain.parallel if self._codomain.parallel: # PARALLEL CASE: fill in ghost regions with data from neighbors self._synchronizer.start_exchange_assembly_data(self._data) self._synchronizer. end_exchange_assembly_data(self._data) else: # SERIAL CASE: fill in ghost regions along periodic directions, otherwise set to zero self._exchange_assembly_data_serial() # ... def _exchange_assembly_data_serial(self): ndim = self._codomain.ndim for direction in range(ndim): if direction == self._codomain_axis: continue periodic = self._codomain.periods[direction] p = self._codomain.pads [direction] m = self._codomain.shifts [direction] if periodic: idx_front = [slice(None)] * direction idx_back = [slice(None)] * (ndim-direction-1) # Copy data from left to right idx_to = tuple(idx_front + [slice( m*p, m*p+p)] + idx_back) idx_from = tuple(idx_front + [slice(-m*p,-m*p+p) if (-m*p+p)!=0 else slice(-m*p, None)] + idx_back) self._data[idx_to] += self._data[idx_from] # ... def set_backend(self, backend, precompiled=False): raise AttributeError(f'This is the tiny-psydac version - must use precompiled kernels (but {precompiled = })!') from psydac.api.ast.linalg import LinearOperatorDot self._backend = backend self._args = self._dotargs_null.copy() if self._backend is None: self._func = self._dot else: if self.domain.parallel: comm = self.domain.interfaces[self._domain_axis, self._domain_ext].cart.local_comm if self.domain == self.codomain: # In this case nrows_extra[i] == 0 for all i dot = LinearOperatorDot(self._ndim, block_shape = (1,1), keys = ((0,0),), comm = comm, backend=frozenset(backend.items()), nrows_extra=(self._args['nrows_extra'],), gpads=(self._args['gpads'],), pads=(self._args['pads'],), dm = (self._args['dm'],), cm = (self._args['cm'],), interface=True, flip_axis=self._flip, interface_axis=self._codomain_axis, d_start=(self._domain_start,), c_start=(self._codomain_start,), dtype= self.dtype) starts = self._args.pop('starts') nrows = self._args.pop('nrows') self._args = {} for i in range(len(nrows)): self._args['s00_{i}'.format(i=i+1)] = np.int64(starts[i]) for i in range(len(nrows)): self._args['n00_{i}'.format(i=i+1)] = np.int64(nrows[i]) else: dot = LinearOperatorDot(self._ndim, block_shape = (1,1), keys = ((0,0),), comm = comm, backend=frozenset(backend.items()), gpads=(self._args['gpads'],), pads=(self._args['pads'],), dm = (self._args['dm'],), cm = (self._args['cm'],), interface=True, flip_axis=self._flip, interface_axis=self._codomain_axis, d_start=(self._domain_start,), c_start=(self._codomain_start,), dtype= self.dtype) starts = self._args.pop('starts') nrows = self._args.pop('nrows') nrows_extra = self._args.pop('nrows_extra') self._args = {} for i in range(len(nrows)): self._args['s00_{i}'.format(i=i+1)] = np.int64(starts[i]) for i in range(len(nrows)): self._args['n00_{i}'.format(i=i+1)] = np.int64(nrows[i]) for i in range(len(nrows)): self._args['ne00_{i}'.format(i=i+1)] = np.int64(nrows_extra[i]) else: dot = LinearOperatorDot(self._ndim, block_shape = (1,1), keys = ((0,0),), comm = None, backend=frozenset(backend.items()), starts = (tuple(self._args['starts']),), nrows=(self._args['nrows'],), nrows_extra=(self._args['nrows_extra'],), gpads=(self._args['gpads'],), pads=(self._args['pads'],), dm = (self._args['dm'],), cm = (self._args['cm'],), interface=True, flip_axis=self._flip, interface_axis=self._codomain_axis, d_start=(self._domain_start,), c_start=(self._codomain_start,), dtype= self.dtype) self._args = {} self._func = dot.func #=============================================================================== del VectorSpace, Vector